libdap++ Updated for version 3.8.2
|
00001 00002 // -*- mode: c++; c-basic-offset:4 -*- 00003 00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data 00005 // Access Protocol. 00006 00007 // Copyright (c) 2002,2003 OPeNDAP, Inc. 00008 // Author: James Gallagher <jgallagher@opendap.org> 00009 // 00010 // This library is free software; you can redistribute it and/or 00011 // modify it under the terms of the GNU Lesser General Public 00012 // License as published by the Free Software Foundation; either 00013 // version 2.1 of the License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, 00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00023 // 00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112. 00025 00026 #include "config.h" 00027 00028 // #define DODS_DEBUG 00029 // #define DODS_DEBUG2 00030 #undef USE_GETENV 00031 00032 #include <pthread.h> 00033 #include <limits.h> 00034 #include <unistd.h> // for stat 00035 #include <sys/types.h> // for stat and mkdir 00036 #include <sys/stat.h> 00037 00038 #include <cstring> 00039 #include <iostream> 00040 #include <sstream> 00041 #include <algorithm> 00042 #include <iterator> 00043 #include <set> 00044 00045 #include "Error.h" 00046 #include "InternalErr.h" 00047 #include "ResponseTooBigErr.h" 00048 #ifndef WIN32 00049 #include "SignalHandler.h" 00050 #endif 00051 #include "HTTPCacheInterruptHandler.h" 00052 #include "HTTPCacheTable.h" 00053 #include "HTTPCache.h" 00054 00055 #include "util_mit.h" 00056 #include "debug.h" 00057 00058 using namespace std; 00059 00060 namespace libdap { 00061 00062 HTTPCache *HTTPCache::_instance = 0; 00063 00064 // instance_mutex is used to ensure that only one instance is created. 00065 // That is, it protects the body of the HTTPCache::instance() method. This 00066 // mutex is initialized from within the static function once_init_routine() 00067 // and the call to that takes place using pthread_once_init() where the mutex 00068 // once_block is used to protect that call. All of this ensures that no matter 00069 // how many threads call the instance() method, only one instance is ever 00070 // made. 00071 static pthread_mutex_t instance_mutex; 00072 static pthread_once_t once_block = PTHREAD_ONCE_INIT; 00073 00074 #ifdef WIN32 00075 #include <direct.h> 00076 #include <time.h> 00077 #include <fcntl.h> 00078 #define MKDIR(a,b) _mkdir((a)) 00079 #define UMASK(a) _umask((a)) 00080 #define REMOVE(a) remove((a)) 00081 #define MKSTEMP(a) _open(_mktemp((a)),_O_CREAT,_S_IREAD|_S_IWRITE) 00082 #define DIR_SEPARATOR_CHAR '\\' 00083 #define DIR_SEPARATOR_STR "\\" 00084 #else 00085 #define MKDIR(a,b) mkdir((a), (b)) 00086 #define UMASK(a) umask((a)) 00087 #define REMOVE(a) remove((a)) 00088 #define MKSTEMP(a) mkstemp((a)) 00089 #define DIR_SEPARATOR_CHAR '/' 00090 #define DIR_SEPARATOR_STR "/" 00091 #endif 00092 00093 #ifdef WIN32 00094 #define CACHE_LOCATION "\\tmp\\" 00095 #define CACHE_ROOT "dods-cache\\" 00096 #else 00097 #define CACHE_LOCATION "/tmp/" 00098 #define CACHE_ROOT "dods-cache/" 00099 #endif 00100 #define CACHE_INDEX ".index" 00101 #define CACHE_LOCK ".lock" 00102 #define CACHE_META ".meta" 00103 //#define CACHE_EMPTY_ETAG "@cache@" 00104 00105 #define NO_LM_EXPIRATION 24*3600 // 24 hours 00106 00107 #define DUMP_FREQUENCY 10 // Dump index every x loads 00108 00109 #define MEGA 0x100000L 00110 #define CACHE_TOTAL_SIZE 20 // Default cache size is 20M 00111 #define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc. 00112 #define CACHE_GC_PCT 10 // 10% of cache size free after GC 00113 #define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size 00114 #define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry 00115 00116 static void 00117 once_init_routine() 00118 { 00119 int status; 00120 status = INIT(&instance_mutex); 00121 00122 if (status != 0) 00123 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting."); 00124 } 00125 00154 HTTPCache * 00155 HTTPCache::instance(const string &cache_root, bool force) 00156 { 00157 int status = pthread_once(&once_block, once_init_routine); 00158 if (status != 0) 00159 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting."); 00160 00161 LOCK(&instance_mutex); 00162 00163 DBG(cerr << "Entering instance(); (" << hex << _instance << dec << ")" 00164 << "... "); 00165 00166 try { 00167 if (!_instance) { 00168 _instance = new HTTPCache(cache_root, force); 00169 00170 DBG(cerr << "New instance: " << _instance << ", cache root: " 00171 << _instance->d_cache_root << endl); 00172 00173 atexit(delete_instance); 00174 00175 #ifndef WIN32 00176 // Register the interrupt handler. If we've already registered 00177 // one, barf. If this becomes a problem, hack SignalHandler so 00178 // that we can chain these handlers... 02/10/04 jhrg 00179 // 00180 // Technically we're leaking memory here. However, since this 00181 // class is a singleton, we know that only three objects will 00182 // ever be created and they will all exist until the process 00183 // exits. We can let this slide... 02/12/04 jhrg 00184 EventHandler *old_eh = SignalHandler::instance()->register_handler 00185 (SIGINT, new HTTPCacheInterruptHandler); 00186 if (old_eh) { 00187 SignalHandler::instance()->register_handler(SIGINT, old_eh); 00188 throw SignalHandlerRegisteredErr( 00189 "Could not register event handler for SIGINT without superseding an existing one."); 00190 } 00191 00192 old_eh = SignalHandler::instance()->register_handler 00193 (SIGPIPE, new HTTPCacheInterruptHandler); 00194 if (old_eh) { 00195 SignalHandler::instance()->register_handler(SIGPIPE, old_eh); 00196 throw SignalHandlerRegisteredErr( 00197 "Could not register event handler for SIGPIPE without superseding an existing one."); 00198 } 00199 00200 old_eh = SignalHandler::instance()->register_handler 00201 (SIGTERM, new HTTPCacheInterruptHandler); 00202 if (old_eh) { 00203 SignalHandler::instance()->register_handler(SIGTERM, old_eh); 00204 throw SignalHandlerRegisteredErr( 00205 "Could not register event handler for SIGTERM without superseding an existing one."); 00206 } 00207 #endif 00208 } 00209 } 00210 catch (...) { 00211 DBG2(cerr << "The constructor threw an Error!" << endl); 00212 UNLOCK(&instance_mutex); 00213 throw; 00214 } 00215 00216 UNLOCK(&instance_mutex); 00217 DBGN(cerr << "returning " << hex << _instance << dec << endl); 00218 00219 return _instance; 00220 } 00221 00225 void 00226 HTTPCache::delete_instance() 00227 { 00228 DBG(cerr << "Entering delete_instance()..." << endl); 00229 if (HTTPCache::_instance) { 00230 DBG(cerr << "Deleting the cache: " << HTTPCache::_instance << endl); 00231 delete HTTPCache::_instance; 00232 HTTPCache::_instance = 0; 00233 } 00234 00235 DBG(cerr << "Exiting delete_instance()" << endl); 00236 } 00237 00252 HTTPCache::HTTPCache(string cache_root, bool force) : 00253 d_locked_open_file(0), 00254 d_cache_enabled(false), 00255 d_cache_protected(false), 00256 d_expire_ignored(false), 00257 d_always_validate(false), 00258 d_total_size(CACHE_TOTAL_SIZE * MEGA), 00259 d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT), 00260 d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT), 00261 d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA), 00262 d_default_expiration(NO_LM_EXPIRATION), 00263 d_max_age(-1), 00264 d_max_stale(-1), 00265 d_min_fresh(-1), 00266 d_http_cache_table(0) 00267 { 00268 DBG(cerr << "Entering the constructor for " << this << "... "); 00269 #if 0 00270 int status = pthread_once(&once_block, once_init_routine); 00271 if (status != 0) 00272 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting."); 00273 #endif 00274 INIT(&d_cache_mutex); 00275 00276 // This used to throw an Error object if we could not get the 00277 // single user lock. However, that results in an invalid object. It's 00278 // better to have an instance that has default values. If we cannot get 00279 // the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg 00280 // 00281 // I fixed this block so that the cache root is set before we try to get 00282 // the single user lock. That was the fix for bug #661. To make that 00283 // work, I had to move the call to create_cache_root out of 00284 // set_cache_root(). 09/08/03 jhrg 00285 00286 set_cache_root(cache_root); 00287 int block_size; 00288 00289 if (!get_single_user_lock(force)) 00290 throw Error("Could not get single user lock for the cache"); 00291 00292 #ifdef WIN32 00293 // Windows is unable to provide us this information. 4096 appears 00294 // a best guess. It is likely to be in the range [2048, 8192] on 00295 // windows, but will the level of truth of that statement vary over 00296 // time ? 00297 block_size = 4096; 00298 #else 00299 struct stat s; 00300 if (stat(cache_root.c_str(), &s) == 0) 00301 block_size = s.st_blksize; 00302 else 00303 throw Error("Could not set file system block size."); 00304 #endif 00305 d_http_cache_table = new HTTPCacheTable(d_cache_root, block_size); 00306 d_cache_enabled = true; 00307 00308 DBGN(cerr << "exiting" << endl); 00309 } 00310 00323 HTTPCache::~HTTPCache() 00324 { 00325 DBG(cerr << "Entering the destructor for " << this << "... "); 00326 00327 try { 00328 if (startGC()) 00329 perform_garbage_collection(); 00330 00331 d_http_cache_table->cache_index_write(); 00332 } 00333 catch (Error &e) { 00334 // If the cache index cannot be written, we've got problems. However, 00335 // unless we're debugging, still free up the cache table in memory. 00336 // How should we let users know they cache index is not being 00337 // written?? 10/03/02 jhrg 00338 DBG(cerr << e.get_error_message() << endl); 00339 } 00340 00341 delete d_http_cache_table; 00342 00343 release_single_user_lock(); 00344 00345 DBGN(cerr << "exiting destructor." << endl); 00346 DESTROY(&d_cache_mutex); 00347 } 00348 00349 00353 00357 bool 00358 HTTPCache::stopGC() const 00359 { 00360 return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer); 00361 } 00362 00369 bool 00370 HTTPCache::startGC() const 00371 { 00372 DBG(cerr << "startGC, current_size: " << d_http_cache_table->get_current_size() << endl); 00373 return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size); 00374 } 00375 00390 void 00391 HTTPCache::perform_garbage_collection() 00392 { 00393 DBG(cerr << "Performing garbage collection" << endl); 00394 00395 // Remove all the expired responses. 00396 expired_gc(); 00397 00398 // Remove entries larger than max_entry_size. 00399 too_big_gc(); 00400 00401 // Remove entries starting with zero hits, 1, ..., until stopGC() 00402 // returns true. 00403 hits_gc(); 00404 } 00405 00411 void 00412 HTTPCache::expired_gc() 00413 { 00414 if (!d_expire_ignored) { 00415 d_http_cache_table->delete_expired_entries(); 00416 } 00417 } 00418 00435 void 00436 HTTPCache::hits_gc() 00437 { 00438 int hits = 0; 00439 00440 if (startGC()) { 00441 while (!stopGC()) { 00442 d_http_cache_table->delete_by_hits(hits); 00443 hits++; 00444 } 00445 } 00446 } 00447 00452 void HTTPCache::too_big_gc() { 00453 if (startGC()) 00454 d_http_cache_table->delete_by_size(d_max_entry_size); 00455 } 00456 00458 00469 bool HTTPCache::get_single_user_lock(bool force) 00470 { 00471 if (!d_locked_open_file) { 00472 FILE * fp = NULL; 00473 00474 try { 00475 // It's OK to call create_cache_root if the directory already 00476 // exists. 00477 create_cache_root(d_cache_root); 00478 } 00479 catch (Error &e) { 00480 // We need to catch and return false because this method is 00481 // called from a ctor and throwing at this point will result in a 00482 // partially constructed object. 01/22/04 jhrg 00483 DBG(cerr << "Failure to create the cache root" << endl); 00484 return false; 00485 } 00486 00487 // Try to read the lock file. If we can open for reading, it exists. 00488 string lock = d_cache_root + CACHE_LOCK; 00489 if ((fp = fopen(lock.c_str(), "r")) != NULL) { 00490 int res = fclose(fp); 00491 if (res) { 00492 DBG(cerr << "Failed to close " << (void *)fp << endl); 00493 } 00494 if (force) 00495 REMOVE(lock.c_str()); 00496 else 00497 return false; 00498 } 00499 00500 if ((fp = fopen(lock.c_str(), "w")) == NULL) { 00501 DBG(cerr << "Could not open for write access" << endl); 00502 return false; 00503 } 00504 00505 d_locked_open_file = fp; 00506 return true; 00507 } 00508 00509 cerr << "locked_open_file is true" << endl; 00510 return false; 00511 } 00512 00515 void 00516 HTTPCache::release_single_user_lock() 00517 { 00518 if (d_locked_open_file) { 00519 int res = fclose(d_locked_open_file); 00520 if (res) { 00521 DBG(cerr << "Failed to close " << (void *)d_locked_open_file << endl) ; 00522 } 00523 d_locked_open_file = 0; 00524 } 00525 00526 string lock = d_cache_root + CACHE_LOCK; 00527 REMOVE(lock.c_str()); 00528 } 00529 00532 00536 string 00537 HTTPCache::get_cache_root() const 00538 { 00539 return d_cache_root; 00540 } 00541 00542 00551 void 00552 HTTPCache::create_cache_root(const string &cache_root) 00553 { 00554 struct stat stat_info; 00555 string::size_type cur = 0; 00556 00557 #ifdef WIN32 00558 cur = cache_root[1] == ':' ? 3 : 1; 00559 typedef int mode_t; 00560 #else 00561 cur = 1; 00562 #endif 00563 while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) { 00564 string dir = cache_root.substr(0, cur); 00565 if (stat(dir.c_str(), &stat_info) == -1) { 00566 DBG2(cerr << "Cache....... Creating " << dir << endl); 00567 mode_t mask = UMASK(0); 00568 if (MKDIR(dir.c_str(), 0777) < 0) { 00569 DBG2(cerr << "Error: can't create." << endl); 00570 UMASK(mask); 00571 throw Error(string("Could not create the directory for the cache. Failed when building path at ") + dir + string(".")); 00572 } 00573 UMASK(mask); 00574 } 00575 else { 00576 DBG2(cerr << "Cache....... Found " << dir << endl); 00577 } 00578 cur++; 00579 } 00580 } 00581 00596 void 00597 HTTPCache::set_cache_root(const string &root) 00598 { 00599 if (root != "") { 00600 d_cache_root = root; 00601 // cache root should end in /. 00602 if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR) 00603 d_cache_root += DIR_SEPARATOR_CHAR; 00604 } 00605 else { 00606 // If no cache root has been indicated then look for a suitable 00607 // location. 00608 #ifdef USE_GETENV 00609 char * cr = (char *) getenv("DODS_CACHE"); 00610 if (!cr) cr = (char *) getenv("TMP"); 00611 if (!cr) cr = (char *) getenv("TEMP"); 00612 if (!cr) cr = (char*)CACHE_LOCATION; 00613 d_cache_root = cr; 00614 #else 00615 d_cache_root = CACHE_LOCATION; 00616 #endif 00617 00618 if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR) 00619 d_cache_root += DIR_SEPARATOR_CHAR; 00620 00621 d_cache_root += CACHE_ROOT; 00622 } 00623 00624 // Test d_hhtp_cache_table because this method can be called before that 00625 // instance is created and also can be called later to cahnge the cache 00626 // root. jhrg 05.14.08 00627 if (d_http_cache_table) 00628 d_http_cache_table->set_cache_root(d_cache_root); 00629 } 00630 00642 void 00643 HTTPCache::set_cache_enabled(bool mode) 00644 { 00645 lock_cache_interface(); 00646 00647 d_cache_enabled = mode; 00648 00649 unlock_cache_interface(); 00650 } 00651 00654 bool 00655 HTTPCache::is_cache_enabled() const 00656 { 00657 DBG2(cerr << "In HTTPCache::is_cache_enabled: (" << d_cache_enabled << ")" 00658 << endl); 00659 return d_cache_enabled; 00660 } 00661 00672 void 00673 HTTPCache::set_cache_disconnected(CacheDisconnectedMode mode) 00674 { 00675 lock_cache_interface(); 00676 00677 d_cache_disconnected = mode; 00678 00679 unlock_cache_interface(); 00680 } 00681 00684 CacheDisconnectedMode 00685 HTTPCache::get_cache_disconnected() const 00686 { 00687 return d_cache_disconnected; 00688 } 00689 00698 void 00699 HTTPCache::set_expire_ignored(bool mode) 00700 { 00701 lock_cache_interface(); 00702 00703 d_expire_ignored = mode; 00704 00705 unlock_cache_interface(); 00706 } 00707 00708 /* Is the cache ignoring Expires headers returned with responses that have 00709 been cached? */ 00710 00711 bool 00712 HTTPCache::is_expire_ignored() const 00713 { 00714 return d_expire_ignored; 00715 } 00716 00732 void 00733 HTTPCache::set_max_size(unsigned long size) 00734 { 00735 lock_cache_interface(); 00736 00737 try { 00738 unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ? 00739 MIN_CACHE_TOTAL_SIZE * MEGA : 00740 (size > ULONG_MAX ? ULONG_MAX : size * MEGA); 00741 unsigned long old_size = d_total_size; 00742 d_total_size = new_size; 00743 d_folder_size = d_total_size / CACHE_FOLDER_PCT; 00744 d_gc_buffer = d_total_size / CACHE_GC_PCT; 00745 00746 if (new_size < old_size && startGC()) { 00747 perform_garbage_collection(); 00748 d_http_cache_table->cache_index_write(); 00749 } 00750 } 00751 catch (...) { 00752 unlock_cache_interface(); 00753 DBGN(cerr << "Unlocking interface." << endl); 00754 throw; 00755 } 00756 00757 DBG2(cerr << "Cache....... Total cache size: " << d_total_size 00758 << " with " << d_folder_size 00759 << " bytes for meta information and folders and at least " 00760 << d_gc_buffer << " bytes free after every gc" << endl); 00761 00762 unlock_cache_interface(); 00763 } 00764 00767 unsigned long 00768 HTTPCache::get_max_size() const 00769 { 00770 return d_total_size / MEGA; 00771 } 00772 00781 void 00782 HTTPCache::set_max_entry_size(unsigned long size) 00783 { 00784 lock_cache_interface(); 00785 00786 try { 00787 unsigned long new_size = size * MEGA; 00788 if (new_size > 0 && new_size < d_total_size - d_folder_size) { 00789 unsigned long old_size = d_max_entry_size; 00790 d_max_entry_size = new_size; 00791 if (new_size < old_size && startGC()) { 00792 perform_garbage_collection(); 00793 d_http_cache_table->cache_index_write(); 00794 } 00795 } 00796 } 00797 catch (...) { 00798 unlock_cache_interface(); 00799 throw; 00800 } 00801 00802 DBG2(cerr << "Cache...... Max entry cache size is " 00803 << d_max_entry_size << endl); 00804 00805 unlock_cache_interface(); 00806 } 00807 00812 unsigned long 00813 HTTPCache::get_max_entry_size() const 00814 { 00815 return d_max_entry_size / MEGA; 00816 } 00817 00828 void 00829 HTTPCache::set_default_expiration(const int exp_time) 00830 { 00831 lock_cache_interface(); 00832 00833 d_default_expiration = exp_time; 00834 00835 unlock_cache_interface(); 00836 } 00837 00840 int 00841 HTTPCache::get_default_expiration() const 00842 { 00843 return d_default_expiration; 00844 } 00845 00850 void 00851 HTTPCache::set_always_validate(bool validate) 00852 { 00853 d_always_validate = validate; 00854 } 00855 00859 bool 00860 HTTPCache::get_always_validate() const 00861 { 00862 return d_always_validate; 00863 } 00864 00881 void 00882 HTTPCache::set_cache_control(const vector<string> &cc) 00883 { 00884 lock_cache_interface(); 00885 00886 try { 00887 d_cache_control = cc; 00888 00889 vector<string>::const_iterator i; 00890 for (i = cc.begin(); i != cc.end(); ++i) { 00891 string header = (*i).substr(0, (*i).find(':')); 00892 string value = (*i).substr((*i).find(": ") + 2); 00893 if (header != "Cache-Control") { 00894 throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found."); 00895 } 00896 else { 00897 if (value == "no-cache" || value == "no-store") 00898 d_cache_enabled = false; 00899 else if (value.find("max-age") != string::npos) { 00900 string max_age = value.substr(value.find("=" + 1)); 00901 d_max_age = parse_time(max_age.c_str()); 00902 } 00903 else if (value == "max-stale") 00904 d_max_stale = 0; // indicates will take anything; 00905 else if (value.find("max-stale") != string::npos) { 00906 string max_stale = value.substr(value.find("=" + 1)); 00907 d_max_stale = parse_time(max_stale.c_str()); 00908 } 00909 else if (value.find("min-fresh") != string::npos) { 00910 string min_fresh = value.substr(value.find("=" + 1)); 00911 d_min_fresh = parse_time(min_fresh.c_str()); 00912 } 00913 } 00914 } 00915 } 00916 catch (...) { 00917 unlock_cache_interface(); 00918 throw; 00919 } 00920 00921 unlock_cache_interface(); 00922 } 00923 00924 00929 vector<string> 00930 HTTPCache::get_cache_control() 00931 { 00932 return d_cache_control; 00933 } 00934 00936 00945 bool 00946 HTTPCache::is_url_in_cache(const string &url) 00947 { 00948 DBG(cerr << "Is this url in the cache? (" << url << ")" << endl); 00949 00950 HTTPCacheTable::CacheEntry *entry = d_http_cache_table->get_locked_entry_from_cache_table(url); 00951 bool status = entry != 0; 00952 if (entry) { 00953 entry->unlock_read_response(); 00954 } 00955 return status; 00956 } 00957 00963 bool 00964 is_hop_by_hop_header(const string &header) 00965 { 00966 return header.find("Connection") != string::npos 00967 || header.find("Keep-Alive") != string::npos 00968 || header.find("Proxy-Authenticate") != string::npos 00969 || header.find("Proxy-Authorization") != string::npos 00970 || header.find("Transfer-Encoding") != string::npos 00971 || header.find("Upgrade") != string::npos; 00972 } 00973 00985 void 00986 HTTPCache::write_metadata(const string &cachename, const vector<string> &headers) 00987 { 00988 string fname = cachename + CACHE_META; 00989 d_open_files.push_back(fname); 00990 00991 FILE *dest = fopen(fname.c_str(), "w"); 00992 if (!dest) { 00993 throw InternalErr(__FILE__, __LINE__, 00994 "Could not open named cache entry file."); 00995 } 00996 00997 vector<string>::const_iterator i; 00998 for (i = headers.begin(); i != headers.end(); ++i) { 00999 if (!is_hop_by_hop_header(*i)) { 01000 int s = fwrite((*i).c_str(), (*i).size(), 1, dest); 01001 if (s != 1) 01002 throw InternalErr(__FILE__, __LINE__, "could not write header: '" + (*i) + "' " + long_to_string(s)); 01003 s = fwrite("\n", 1, 1, dest); 01004 if (s != 1) 01005 throw InternalErr(__FILE__, __LINE__, "could not write header: " + long_to_string(s)); 01006 } 01007 } 01008 01009 int res = fclose(dest); 01010 if (res) { 01011 DBG(cerr << "HTTPCache::write_metadata - Failed to close " 01012 << dest << endl); 01013 } 01014 01015 d_open_files.pop_back(); 01016 } 01017 01028 void 01029 HTTPCache::read_metadata(const string &cachename, vector<string> &headers) 01030 { 01031 FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r"); 01032 if (!md) { 01033 throw InternalErr(__FILE__, __LINE__, 01034 "Could not open named cache entry meta data file."); 01035 } 01036 01037 char line[1024]; 01038 while (!feof(md) && fgets(line, 1024, md)) { 01039 line[min(1024, static_cast<int>(strlen(line)))-1] = '\0'; // erase newline 01040 headers.push_back(string(line)); 01041 } 01042 01043 int res = fclose(md); 01044 if (res) { 01045 DBG(cerr << "HTTPCache::read_metadata - Failed to close " 01046 << md << endl); 01047 } 01048 } 01049 01071 int 01072 HTTPCache::write_body(const string &cachename, const FILE *src) 01073 { 01074 d_open_files.push_back(cachename); 01075 01076 FILE *dest = fopen(cachename.c_str(), "wb"); 01077 if (!dest) { 01078 throw InternalErr(__FILE__, __LINE__, 01079 "Could not open named cache entry file."); 01080 } 01081 01082 // Read and write in 1k blocks; an attempt at doing this efficiently. 01083 // 09/30/02 jhrg 01084 char line[1024]; 01085 size_t n; 01086 int total = 0; 01087 while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) { 01088 total += fwrite(line, 1, n, dest); 01089 DBG2(sleep(3)); 01090 } 01091 01092 if (ferror(const_cast<FILE *>(src)) || ferror(dest)) { 01093 int res = fclose(dest); 01094 res = res & unlink(cachename.c_str()); 01095 if (res) { 01096 DBG(cerr << "HTTPCache::write_body - Failed to close/unlink " 01097 << dest << endl); 01098 } 01099 throw InternalErr(__FILE__, __LINE__, 01100 "I/O error transferring data to the cache."); 01101 } 01102 01103 rewind(const_cast<FILE *>(src)); 01104 01105 int res = fclose(dest); 01106 if (res) { 01107 DBG(cerr << "HTTPCache::write_body - Failed to close " 01108 << dest << endl); 01109 } 01110 01111 d_open_files.pop_back(); 01112 01113 return total; 01114 } 01115 01124 FILE * 01125 HTTPCache::open_body(const string &cachename) 01126 { 01127 DBG(cerr << "cachename: " << cachename << endl); 01128 01129 FILE *src = fopen(cachename.c_str(), "rb"); // Read only 01130 if (!src) 01131 throw InternalErr(__FILE__, __LINE__, "Could not open cache file."); 01132 01133 return src; 01134 } 01135 01161 bool 01162 HTTPCache::cache_response(const string &url, time_t request_time, 01163 const vector<string> &headers, const FILE *body) 01164 { 01165 lock_cache_interface(); 01166 01167 DBG(cerr << "Caching url: " << url << "." << endl); 01168 01169 try { 01170 // If this is not an http or https URL, don't cache. 01171 if (url.find("http:") == string::npos && 01172 url.find("https:") == string::npos) { 01173 unlock_cache_interface(); 01174 return false; 01175 } 01176 01177 // This does nothing if url is not already in the cache. It's 01178 // more efficient to do this than to first check and see if the entry 01179 // exists. 10/10/02 jhrg 01180 d_http_cache_table->remove_entry_from_cache_table(url); 01181 01182 HTTPCacheTable::CacheEntry *entry = new HTTPCacheTable::CacheEntry(url); 01183 entry->lock_write_response(); 01184 01185 try { 01186 d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); // etag, lm, date, age, expires, max_age. 01187 if (entry->is_no_cache()) { 01188 DBG(cerr << "Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry 01189 << "(" << url << ")" << endl); 01190 entry->unlock_write_response(); 01191 delete entry; entry = 0; 01192 unlock_cache_interface(); 01193 return false; 01194 } 01195 01196 // corrected_initial_age, freshness_lifetime, response_time. 01197 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time); 01198 01199 d_http_cache_table->create_location(entry); // cachename, cache_body_fd 01200 // move these write function to cache table 01201 entry->set_size(write_body(entry->get_cachename(), body)); 01202 write_metadata(entry->get_cachename(), headers); 01203 d_http_cache_table->add_entry_to_cache_table(entry); 01204 entry->unlock_write_response(); 01205 } 01206 catch (ResponseTooBigErr &e) { 01207 // Oops. Bummer. Clean up and exit. 01208 DBG(cerr << e.get_error_message() << endl); 01209 REMOVE(entry->get_cachename().c_str()); 01210 REMOVE(string(entry->get_cachename() + CACHE_META).c_str()); 01211 DBG(cerr << "Too big; deleting HTTPCacheTable::CacheEntry: " << entry << "(" << url 01212 << ")" << endl); 01213 entry->unlock_write_response(); 01214 delete entry; entry = 0; 01215 unlock_cache_interface(); 01216 return false; 01217 } 01218 01219 if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) { 01220 if (startGC()) 01221 perform_garbage_collection(); 01222 01223 d_http_cache_table->cache_index_write(); // resets new_entries 01224 } 01225 } 01226 catch (...) { 01227 unlock_cache_interface(); 01228 throw; 01229 } 01230 01231 unlock_cache_interface(); 01232 01233 return true; 01234 } 01235 01254 vector<string> 01255 HTTPCache::get_conditional_request_headers(const string &url) 01256 { 01257 lock_cache_interface(); 01258 01259 HTTPCacheTable::CacheEntry *entry = 0; 01260 vector<string> headers; 01261 01262 DBG(cerr << "Getting conditional request headers for " << url << endl); 01263 01264 try { 01265 entry = d_http_cache_table->get_locked_entry_from_cache_table(url); 01266 if (!entry) 01267 throw Error("There is no cache entry for the URL: " + url); 01268 01269 if (entry->get_etag() != "") 01270 headers.push_back(string("If-None-Match: ") + entry->get_etag()); 01271 01272 if (entry->get_lm() > 0) { 01273 time_t lm = entry->get_lm(); 01274 headers.push_back(string("If-Modified-Since: ") 01275 + date_time_str(&lm)); 01276 } 01277 else if (entry->get_max_age() > 0) { 01278 time_t max_age = entry->get_max_age(); 01279 headers.push_back(string("If-Modified-Since: ") 01280 + date_time_str(&max_age)); 01281 } 01282 else if (entry->get_expires() > 0) { 01283 time_t expires = entry->get_expires(); 01284 headers.push_back(string("If-Modified-Since: ") 01285 + date_time_str(&expires)); 01286 } 01287 entry->unlock_read_response(); 01288 unlock_cache_interface(); 01289 } 01290 catch (...) { 01291 unlock_cache_interface(); 01292 if (entry) { 01293 entry->unlock_read_response(); 01294 } 01295 throw; 01296 } 01297 01298 return headers; 01299 } 01300 01304 struct HeaderLess: binary_function<const string&, const string&, bool> 01305 { 01306 bool operator()(const string &s1, const string &s2) const { 01307 return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':')); 01308 } 01309 }; 01310 01324 void 01325 HTTPCache::update_response(const string &url, time_t request_time, 01326 const vector<string> &headers) 01327 { 01328 lock_cache_interface(); 01329 01330 HTTPCacheTable::CacheEntry *entry = 0; 01331 DBG(cerr << "Updating the response headers for: " << url << endl); 01332 01333 try { 01334 entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url); 01335 if (!entry) 01336 throw Error("There is no cache entry for the URL: " + url); 01337 01338 // Merge the new headers with the exiting HTTPCacheTable::CacheEntry object. 01339 d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); 01340 01341 // Update corrected_initial_age, freshness_lifetime, response_time. 01342 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time); 01343 01344 // Merge the new headers with those in the persistent store. How: 01345 // Load the new headers into a set, then merge the old headers. Since 01346 // set<> ignores duplicates, old headers with the same name as a new 01347 // header will got into the bit bucket. Define a special compare 01348 // functor to make sure that headers are compared using only their 01349 // name and not their value too. 01350 set<string, HeaderLess> merged_headers; 01351 01352 // Load in the new headers 01353 copy(headers.begin(), headers.end(), 01354 inserter(merged_headers, merged_headers.begin())); 01355 01356 // Get the old headers and load them in. 01357 vector<string> old_headers; 01358 read_metadata(entry->get_cachename(), old_headers); 01359 copy(old_headers.begin(), old_headers.end(), 01360 inserter(merged_headers, merged_headers.begin())); 01361 01362 // Read the values back out. Use reverse iterators with back_inserter 01363 // to preserve header order. NB: vector<> does not support push_front 01364 // so we can't use front_inserter(). 01/09/03 jhrg 01365 vector<string> result; 01366 copy(merged_headers.rbegin(), merged_headers.rend(), 01367 back_inserter(result)); 01368 01369 write_metadata(entry->get_cachename(), result); 01370 entry->unlock_write_response(); 01371 unlock_cache_interface(); 01372 } 01373 catch (...) { 01374 if (entry) { 01375 entry->unlock_read_response(); 01376 } 01377 unlock_cache_interface(); 01378 throw; 01379 } 01380 } 01381 01393 bool 01394 HTTPCache::is_url_valid(const string &url) 01395 { 01396 lock_cache_interface(); 01397 01398 bool freshness; 01399 HTTPCacheTable::CacheEntry *entry = 0; 01400 01401 DBG(cerr << "Is this URL valid? (" << url << ")" << endl); 01402 01403 try { 01404 if (d_always_validate) { 01405 unlock_cache_interface(); 01406 return false; // force re-validation. 01407 } 01408 01409 entry = d_http_cache_table->get_locked_entry_from_cache_table(url); 01410 if (!entry) 01411 throw Error("There is no cache entry for the URL: " + url); 01412 01413 // If we supported range requests, we'd need code here to check if 01414 // there was only a partial response in the cache. 10/02/02 jhrg 01415 01416 // In case this entry is of type "must-revalidate" then we consider it 01417 // invalid. 01418 if (entry->get_must_revalidate()) { 01419 entry->unlock_read_response(); 01420 unlock_cache_interface(); 01421 return false; 01422 } 01423 01424 time_t resident_time = time(NULL) - entry->get_response_time(); 01425 time_t current_age = entry->get_corrected_initial_age() + resident_time; 01426 01427 // Check that the max-age, max-stale, and min-fresh directives 01428 // given in the request cache control header is followed. 01429 if (d_max_age >= 0 && current_age > d_max_age) { 01430 DBG(cerr << "Cache....... Max-age validation" << endl); 01431 entry->unlock_read_response(); 01432 unlock_cache_interface(); 01433 return false; 01434 } 01435 if (d_min_fresh >= 0 01436 && entry->get_freshness_lifetime() < current_age + d_min_fresh) { 01437 DBG(cerr << "Cache....... Min-fresh validation" << endl); 01438 entry->unlock_read_response(); 01439 unlock_cache_interface(); 01440 return false; 01441 } 01442 01443 freshness = (entry->get_freshness_lifetime() 01444 + (d_max_stale >= 0 ? d_max_stale : 0) > current_age); 01445 entry->unlock_read_response(); 01446 unlock_cache_interface(); 01447 } 01448 catch (...) { 01449 if (entry) { 01450 entry->unlock_read_response(); 01451 } 01452 unlock_cache_interface(); 01453 throw; 01454 } 01455 01456 return freshness; 01457 } 01458 01486 FILE * HTTPCache::get_cached_response(const string &url, 01487 vector<string> &headers, string &cacheName) { 01488 lock_cache_interface(); 01489 01490 FILE *body; 01491 HTTPCacheTable::CacheEntry *entry = 0; 01492 01493 DBG(cerr << "Getting the cached response for " << url << endl); 01494 01495 try { 01496 entry = d_http_cache_table->get_locked_entry_from_cache_table(url); 01497 if (!entry) { 01498 unlock_cache_interface(); 01499 return 0; 01500 } 01501 01502 cacheName = entry->get_cachename(); 01503 read_metadata(entry->get_cachename(), headers); 01504 01505 DBG(cerr << "Headers just read from cache: " << endl); 01506 DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n"))); 01507 01508 body = open_body(entry->get_cachename()); 01509 01510 DBG(cerr << "Returning: " << url << " from the cache." << endl); 01511 01512 d_http_cache_table->bind_entry_to_data(entry, body); 01513 } 01514 catch (...) { 01515 // Why make this unlock operation conditional on entry? 01516 if (entry) 01517 unlock_cache_interface(); 01518 fclose(body); 01519 throw; 01520 } 01521 01522 unlock_cache_interface(); 01523 01524 return body; 01525 } 01526 01538 FILE * 01539 HTTPCache::get_cached_response(const string &url, vector<string> &headers) 01540 { 01541 string discard_name; 01542 return get_cached_response(url, headers, discard_name); 01543 } 01544 01555 FILE * 01556 HTTPCache::get_cached_response(const string &url) 01557 { 01558 string discard_name; 01559 vector<string> discard_headers; 01560 return get_cached_response(url, discard_headers, discard_name); 01561 } 01562 01575 void 01576 HTTPCache::release_cached_response(FILE *body) 01577 { 01578 lock_cache_interface(); 01579 01580 try { 01581 d_http_cache_table->uncouple_entry_from_data(body); 01582 } 01583 catch (...) { 01584 unlock_cache_interface(); 01585 throw; 01586 } 01587 01588 unlock_cache_interface(); 01589 } 01590 01603 void 01604 HTTPCache::purge_cache() 01605 { 01606 lock_cache_interface(); 01607 01608 try { 01609 if (d_http_cache_table->is_locked_read_responses()) 01610 throw Error("Attempt to purge the cache with entries in use."); 01611 01612 d_http_cache_table->delete_all_entries(); 01613 } 01614 catch (...) { 01615 unlock_cache_interface(); 01616 throw; 01617 } 01618 01619 unlock_cache_interface(); 01620 } 01621 01622 } // namespace libdap