libdap++ Updated for version 3.8.2
HTTPCache.cc
Go to the documentation of this file.
00001 
00002 // -*- mode: c++; c-basic-offset:4 -*-
00003 
00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00005 // Access Protocol.
00006 
00007 // Copyright (c) 2002,2003 OPeNDAP, Inc.
00008 // Author: James Gallagher <jgallagher@opendap.org>
00009 //
00010 // This library is free software; you can redistribute it and/or
00011 // modify it under the terms of the GNU Lesser General Public
00012 // License as published by the Free Software Foundation; either
00013 // version 2.1 of the License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023 //
00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00025 
00026 #include "config.h"
00027 
00028 // #define DODS_DEBUG
00029 // #define DODS_DEBUG2
00030 #undef USE_GETENV
00031 
00032 #include <pthread.h>
00033 #include <limits.h>
00034 #include <unistd.h>   // for stat
00035 #include <sys/types.h>  // for stat and mkdir
00036 #include <sys/stat.h>
00037 
00038 #include <cstring>
00039 #include <iostream>
00040 #include <sstream>
00041 #include <algorithm>
00042 #include <iterator>
00043 #include <set>
00044 
00045 #include "Error.h"
00046 #include "InternalErr.h"
00047 #include "ResponseTooBigErr.h"
00048 #ifndef WIN32
00049 #include "SignalHandler.h"
00050 #endif
00051 #include "HTTPCacheInterruptHandler.h"
00052 #include "HTTPCacheTable.h"
00053 #include "HTTPCache.h"
00054 
00055 #include "util_mit.h"
00056 #include "debug.h"
00057 
00058 using namespace std;
00059 
00060 namespace libdap {
00061 
00062 HTTPCache *HTTPCache::_instance = 0;
00063 
00064 // instance_mutex is used to ensure that only one instance is created.
00065 // That is, it protects the body of the HTTPCache::instance() method. This
00066 // mutex is initialized from within the static function once_init_routine()
00067 // and the call to that takes place using pthread_once_init() where the mutex
00068 // once_block is used to protect that call. All of this ensures that no matter
00069 // how many threads call the instance() method, only one instance is ever
00070 // made.
00071 static pthread_mutex_t instance_mutex;
00072 static pthread_once_t once_block = PTHREAD_ONCE_INIT;
00073 
00074 #ifdef WIN32
00075 #include <direct.h>
00076 #include <time.h>
00077 #include <fcntl.h>
00078 #define MKDIR(a,b) _mkdir((a))
00079 #define UMASK(a) _umask((a))
00080 #define REMOVE(a) remove((a))
00081 #define MKSTEMP(a) _open(_mktemp((a)),_O_CREAT,_S_IREAD|_S_IWRITE)
00082 #define DIR_SEPARATOR_CHAR '\\'
00083 #define DIR_SEPARATOR_STR "\\"
00084 #else
00085 #define MKDIR(a,b) mkdir((a), (b))
00086 #define UMASK(a) umask((a))
00087 #define REMOVE(a) remove((a))
00088 #define MKSTEMP(a) mkstemp((a))
00089 #define DIR_SEPARATOR_CHAR '/'
00090 #define DIR_SEPARATOR_STR "/"
00091 #endif
00092 
00093 #ifdef WIN32
00094 #define CACHE_LOCATION "\\tmp\\"
00095 #define CACHE_ROOT "dods-cache\\"
00096 #else
00097 #define CACHE_LOCATION "/tmp/"
00098 #define CACHE_ROOT "dods-cache/"
00099 #endif
00100 #define CACHE_INDEX ".index"
00101 #define CACHE_LOCK ".lock"
00102 #define CACHE_META ".meta"
00103 //#define CACHE_EMPTY_ETAG "@cache@"
00104 
00105 #define NO_LM_EXPIRATION 24*3600 // 24 hours
00106 
00107 #define DUMP_FREQUENCY 10 // Dump index every x loads
00108 
00109 #define MEGA 0x100000L
00110 #define CACHE_TOTAL_SIZE 20 // Default cache size is 20M
00111 #define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc.
00112 #define CACHE_GC_PCT 10  // 10% of cache size free after GC
00113 #define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size
00114 #define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry
00115 
00116 static void
00117 once_init_routine()
00118 {
00119     int status;
00120     status = INIT(&instance_mutex);
00121 
00122     if (status != 0)
00123         throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
00124 }
00125 
00154 HTTPCache *
00155 HTTPCache::instance(const string &cache_root, bool force)
00156 {
00157     int status = pthread_once(&once_block, once_init_routine);
00158     if (status != 0)
00159         throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
00160 
00161     LOCK(&instance_mutex);
00162 
00163     DBG(cerr << "Entering instance(); (" << hex << _instance << dec << ")"
00164             << "... ");
00165 
00166     try {
00167         if (!_instance) {
00168             _instance = new HTTPCache(cache_root, force);
00169 
00170             DBG(cerr << "New instance: " << _instance << ", cache root: "
00171                 << _instance->d_cache_root << endl);
00172 
00173             atexit(delete_instance);
00174 
00175 #ifndef WIN32
00176             // Register the interrupt handler. If we've already registered
00177             // one, barf. If this becomes a problem, hack SignalHandler so
00178             // that we can chain these handlers... 02/10/04 jhrg
00179             //
00180             // Technically we're leaking memory here. However, since this
00181             // class is a singleton, we know that only three objects will
00182             // ever be created and they will all exist until the process
00183             // exits. We can let this slide... 02/12/04 jhrg
00184             EventHandler *old_eh = SignalHandler::instance()->register_handler
00185                                    (SIGINT, new HTTPCacheInterruptHandler);
00186             if (old_eh) {
00187                 SignalHandler::instance()->register_handler(SIGINT, old_eh);
00188                 throw SignalHandlerRegisteredErr(
00189                     "Could not register event handler for SIGINT without superseding an existing one.");
00190             }
00191 
00192             old_eh = SignalHandler::instance()->register_handler
00193                      (SIGPIPE, new HTTPCacheInterruptHandler);
00194             if (old_eh) {
00195                 SignalHandler::instance()->register_handler(SIGPIPE, old_eh);
00196                 throw SignalHandlerRegisteredErr(
00197                     "Could not register event handler for SIGPIPE without superseding an existing one.");
00198             }
00199 
00200             old_eh = SignalHandler::instance()->register_handler
00201                      (SIGTERM, new HTTPCacheInterruptHandler);
00202             if (old_eh) {
00203                 SignalHandler::instance()->register_handler(SIGTERM, old_eh);
00204                 throw SignalHandlerRegisteredErr(
00205                     "Could not register event handler for SIGTERM without superseding an existing one.");
00206             }
00207 #endif
00208         }
00209     }
00210     catch (...) {
00211         DBG2(cerr << "The constructor threw an Error!" << endl);
00212         UNLOCK(&instance_mutex);
00213         throw;
00214     }
00215 
00216     UNLOCK(&instance_mutex);
00217     DBGN(cerr << "returning " << hex << _instance << dec << endl);
00218 
00219     return _instance;
00220 }
00221 
00225 void
00226 HTTPCache::delete_instance()
00227 {
00228     DBG(cerr << "Entering delete_instance()..." << endl);
00229     if (HTTPCache::_instance) {
00230         DBG(cerr << "Deleting the cache: " << HTTPCache::_instance << endl);
00231         delete HTTPCache::_instance;
00232         HTTPCache::_instance = 0;
00233     }
00234 
00235     DBG(cerr << "Exiting delete_instance()" << endl);
00236 }
00237 
00252 HTTPCache::HTTPCache(string cache_root, bool force) :
00253         d_locked_open_file(0),
00254         d_cache_enabled(false),
00255         d_cache_protected(false),
00256         d_expire_ignored(false),
00257         d_always_validate(false),
00258         d_total_size(CACHE_TOTAL_SIZE * MEGA),
00259         d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT),
00260         d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT),
00261         d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA),
00262         d_default_expiration(NO_LM_EXPIRATION),
00263         d_max_age(-1),
00264         d_max_stale(-1),
00265         d_min_fresh(-1),
00266         d_http_cache_table(0)
00267 {
00268     DBG(cerr << "Entering the constructor for " << this << "... ");
00269 #if 0
00270         int status = pthread_once(&once_block, once_init_routine);
00271         if (status != 0)
00272                 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
00273 #endif
00274         INIT(&d_cache_mutex);
00275 
00276         // This used to throw an Error object if we could not get the
00277         // single user lock. However, that results in an invalid object. It's
00278         // better to have an instance that has default values. If we cannot get
00279         // the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg
00280         //
00281         // I fixed this block so that the cache root is set before we try to get
00282         // the single user lock. That was the fix for bug #661. To make that
00283         // work, I had to move the call to create_cache_root out of
00284         // set_cache_root(). 09/08/03 jhrg
00285 
00286         set_cache_root(cache_root);
00287         int block_size;
00288 
00289         if (!get_single_user_lock(force))
00290             throw Error("Could not get single user lock for the cache");
00291 
00292 #ifdef WIN32
00293         //  Windows is unable to provide us this information.  4096 appears
00294         //  a best guess.  It is likely to be in the range [2048, 8192] on
00295         //  windows, but will the level of truth of that statement vary over
00296         //  time ?
00297         block_size = 4096;
00298 #else
00299         struct stat s;
00300         if (stat(cache_root.c_str(), &s) == 0)
00301                 block_size = s.st_blksize;
00302         else
00303                 throw Error("Could not set file system block size.");
00304 #endif
00305         d_http_cache_table = new HTTPCacheTable(d_cache_root, block_size);
00306         d_cache_enabled = true;
00307 
00308         DBGN(cerr << "exiting" << endl);
00309 }
00310 
00323 HTTPCache::~HTTPCache()
00324 {
00325     DBG(cerr << "Entering the destructor for " << this << "... ");
00326 
00327     try {
00328         if (startGC())
00329             perform_garbage_collection();
00330 
00331         d_http_cache_table->cache_index_write();
00332     }
00333     catch (Error &e) {
00334         // If the cache index cannot be written, we've got problems. However,
00335         // unless we're debugging, still free up the cache table in memory.
00336         // How should we let users know they cache index is not being
00337         // written?? 10/03/02 jhrg
00338         DBG(cerr << e.get_error_message() << endl);
00339     }
00340 
00341     delete d_http_cache_table;
00342 
00343     release_single_user_lock();
00344 
00345     DBGN(cerr << "exiting destructor." << endl);
00346     DESTROY(&d_cache_mutex);
00347 }
00348 
00349 
00353 
00357 bool
00358 HTTPCache::stopGC() const
00359 {
00360     return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer);
00361 }
00362 
00369 bool
00370 HTTPCache::startGC() const
00371 {
00372     DBG(cerr << "startGC, current_size: " << d_http_cache_table->get_current_size() << endl);
00373     return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size);
00374 }
00375 
00390 void
00391 HTTPCache::perform_garbage_collection()
00392 {
00393     DBG(cerr << "Performing garbage collection" << endl);
00394 
00395     // Remove all the expired responses.
00396     expired_gc();
00397 
00398     // Remove entries larger than max_entry_size.
00399     too_big_gc();
00400 
00401     // Remove entries starting with zero hits, 1, ..., until stopGC()
00402     // returns true.
00403     hits_gc();
00404 }
00405 
00411 void
00412 HTTPCache::expired_gc()
00413 {
00414     if (!d_expire_ignored) {
00415         d_http_cache_table->delete_expired_entries();
00416     }
00417 }
00418 
00435 void
00436 HTTPCache::hits_gc()
00437 {
00438     int hits = 0;
00439 
00440     if (startGC()) {
00441                 while (!stopGC()) {
00442                         d_http_cache_table->delete_by_hits(hits);
00443                         hits++;
00444                 }
00445         }
00446 }
00447 
00452 void HTTPCache::too_big_gc() {
00453         if (startGC())
00454                 d_http_cache_table->delete_by_size(d_max_entry_size);
00455 }
00456 
00458 
00469 bool HTTPCache::get_single_user_lock(bool force) 
00470 {
00471     if (!d_locked_open_file) {
00472         FILE * fp = NULL;
00473 
00474         try {
00475             // It's OK to call create_cache_root if the directory already
00476             // exists.
00477             create_cache_root(d_cache_root);
00478         }
00479         catch (Error &e) {
00480             // We need to catch and return false because this method is
00481             // called from a ctor and throwing at this point will result in a
00482             // partially constructed object. 01/22/04 jhrg
00483             DBG(cerr << "Failure to create the cache root" << endl);
00484             return false;
00485         }
00486 
00487         // Try to read the lock file. If we can open for reading, it exists.
00488         string lock = d_cache_root + CACHE_LOCK;
00489         if ((fp = fopen(lock.c_str(), "r")) != NULL) {
00490             int res = fclose(fp);
00491             if (res) {
00492                 DBG(cerr << "Failed to close " << (void *)fp << endl);
00493             }
00494             if (force)
00495                 REMOVE(lock.c_str());
00496             else
00497                 return false;
00498         }
00499 
00500         if ((fp = fopen(lock.c_str(), "w")) == NULL) {
00501             DBG(cerr << "Could not open for write access" << endl);
00502             return false;
00503         }
00504 
00505         d_locked_open_file = fp;
00506         return true;
00507     }
00508 
00509     cerr << "locked_open_file is true" << endl;
00510     return false;
00511 }
00512 
00515 void
00516 HTTPCache::release_single_user_lock()
00517 {
00518     if (d_locked_open_file) {
00519         int res = fclose(d_locked_open_file);
00520         if (res) {
00521             DBG(cerr << "Failed to close " << (void *)d_locked_open_file << endl) ;
00522         }
00523         d_locked_open_file = 0;
00524     }
00525 
00526     string lock = d_cache_root + CACHE_LOCK;
00527     REMOVE(lock.c_str());
00528 }
00529 
00532 
00536 string
00537 HTTPCache::get_cache_root() const
00538 {
00539     return d_cache_root;
00540 }
00541 
00542 
00551 void
00552 HTTPCache::create_cache_root(const string &cache_root)
00553 {
00554     struct stat stat_info;
00555     string::size_type cur = 0;
00556 
00557 #ifdef WIN32
00558     cur = cache_root[1] == ':' ? 3 : 1;
00559     typedef int mode_t;
00560 #else
00561     cur = 1;
00562 #endif
00563     while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) {
00564         string dir = cache_root.substr(0, cur);
00565         if (stat(dir.c_str(), &stat_info) == -1) {
00566             DBG2(cerr << "Cache....... Creating " << dir << endl);
00567             mode_t mask = UMASK(0);
00568             if (MKDIR(dir.c_str(), 0777) < 0) {
00569                 DBG2(cerr << "Error: can't create." << endl);
00570                 UMASK(mask);
00571                 throw Error(string("Could not create the directory for the cache. Failed when building path at ") + dir + string("."));
00572             }
00573             UMASK(mask);
00574         }
00575         else {
00576             DBG2(cerr << "Cache....... Found " << dir << endl);
00577         }
00578         cur++;
00579     }
00580 }
00581 
00596 void
00597 HTTPCache::set_cache_root(const string &root)
00598 {
00599     if (root != "") {
00600         d_cache_root = root;
00601         // cache root should end in /.
00602         if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
00603             d_cache_root += DIR_SEPARATOR_CHAR;
00604     }
00605     else {
00606         // If no cache root has been indicated then look for a suitable
00607         // location.
00608 #ifdef USE_GETENV
00609         char * cr = (char *) getenv("DODS_CACHE");
00610         if (!cr) cr = (char *) getenv("TMP");
00611         if (!cr) cr = (char *) getenv("TEMP");
00612         if (!cr) cr = (char*)CACHE_LOCATION;
00613         d_cache_root = cr;
00614 #else
00615         d_cache_root = CACHE_LOCATION;
00616 #endif
00617 
00618         if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
00619             d_cache_root += DIR_SEPARATOR_CHAR;
00620 
00621         d_cache_root += CACHE_ROOT;
00622     }
00623 
00624     // Test d_hhtp_cache_table because this method can be called before that
00625     // instance is created and also can be called later to cahnge the cache
00626     // root. jhrg 05.14.08
00627     if (d_http_cache_table)
00628         d_http_cache_table->set_cache_root(d_cache_root);
00629 }
00630 
00642 void
00643 HTTPCache::set_cache_enabled(bool mode)
00644 {
00645     lock_cache_interface();
00646 
00647     d_cache_enabled = mode;
00648 
00649     unlock_cache_interface();
00650 }
00651 
00654 bool
00655 HTTPCache::is_cache_enabled() const
00656 {
00657     DBG2(cerr << "In HTTPCache::is_cache_enabled: (" << d_cache_enabled << ")"
00658          << endl);
00659     return d_cache_enabled;
00660 }
00661 
00672 void
00673 HTTPCache::set_cache_disconnected(CacheDisconnectedMode mode)
00674 {
00675     lock_cache_interface();
00676 
00677     d_cache_disconnected = mode;
00678 
00679     unlock_cache_interface();
00680 }
00681 
00684 CacheDisconnectedMode
00685 HTTPCache::get_cache_disconnected() const
00686 {
00687     return d_cache_disconnected;
00688 }
00689 
00698 void
00699 HTTPCache::set_expire_ignored(bool mode)
00700 {
00701     lock_cache_interface();
00702 
00703     d_expire_ignored = mode;
00704 
00705     unlock_cache_interface();
00706 }
00707 
00708 /* Is the cache ignoring Expires headers returned with responses that have
00709    been cached? */
00710 
00711 bool
00712 HTTPCache::is_expire_ignored() const
00713 {
00714     return d_expire_ignored;
00715 }
00716 
00732 void
00733 HTTPCache::set_max_size(unsigned long size)
00734 {
00735     lock_cache_interface();
00736 
00737     try {
00738         unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ?
00739                                  MIN_CACHE_TOTAL_SIZE * MEGA :
00740                                  (size > ULONG_MAX ? ULONG_MAX : size * MEGA);
00741         unsigned long old_size = d_total_size;
00742         d_total_size = new_size;
00743         d_folder_size = d_total_size / CACHE_FOLDER_PCT;
00744         d_gc_buffer = d_total_size / CACHE_GC_PCT;
00745 
00746         if (new_size < old_size && startGC()) {
00747             perform_garbage_collection();
00748             d_http_cache_table->cache_index_write();
00749         }
00750     }
00751     catch (...) {
00752         unlock_cache_interface();
00753         DBGN(cerr << "Unlocking interface." << endl);
00754         throw;
00755     }
00756 
00757     DBG2(cerr << "Cache....... Total cache size: " << d_total_size
00758          << " with " << d_folder_size
00759          << " bytes for meta information and folders and at least "
00760          << d_gc_buffer << " bytes free after every gc" << endl);
00761 
00762     unlock_cache_interface();
00763 }
00764 
00767 unsigned long
00768 HTTPCache::get_max_size() const
00769 {
00770     return d_total_size / MEGA;
00771 }
00772 
00781 void
00782 HTTPCache::set_max_entry_size(unsigned long size)
00783 {
00784     lock_cache_interface();
00785 
00786     try {
00787         unsigned long new_size = size * MEGA;
00788         if (new_size > 0 && new_size < d_total_size - d_folder_size) {
00789             unsigned long old_size = d_max_entry_size;
00790             d_max_entry_size = new_size;
00791             if (new_size < old_size && startGC()) {
00792                 perform_garbage_collection();
00793                 d_http_cache_table->cache_index_write();
00794             }
00795         }
00796     }
00797     catch (...) {
00798         unlock_cache_interface();
00799         throw;
00800     }
00801 
00802     DBG2(cerr << "Cache...... Max entry cache size is "
00803          << d_max_entry_size << endl);
00804 
00805     unlock_cache_interface();
00806 }
00807 
00812 unsigned long
00813 HTTPCache::get_max_entry_size() const
00814 {
00815     return d_max_entry_size / MEGA;
00816 }
00817 
00828 void
00829 HTTPCache::set_default_expiration(const int exp_time)
00830 {
00831     lock_cache_interface();
00832 
00833     d_default_expiration = exp_time;
00834 
00835     unlock_cache_interface();
00836 }
00837 
00840 int
00841 HTTPCache::get_default_expiration() const
00842 {
00843     return d_default_expiration;
00844 }
00845 
00850 void
00851 HTTPCache::set_always_validate(bool validate)
00852 {
00853     d_always_validate = validate;
00854 }
00855 
00859 bool
00860 HTTPCache::get_always_validate() const
00861 {
00862     return d_always_validate;
00863 }
00864 
00881 void
00882 HTTPCache::set_cache_control(const vector<string> &cc)
00883 {
00884     lock_cache_interface();
00885 
00886     try {
00887         d_cache_control = cc;
00888 
00889         vector<string>::const_iterator i;
00890         for (i = cc.begin(); i != cc.end(); ++i) {
00891             string header = (*i).substr(0, (*i).find(':'));
00892             string value = (*i).substr((*i).find(": ") + 2);
00893             if (header != "Cache-Control") {
00894                 throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found.");
00895             }
00896             else {
00897                 if (value == "no-cache" || value == "no-store")
00898                     d_cache_enabled = false;
00899                 else if (value.find("max-age") != string::npos) {
00900                     string max_age = value.substr(value.find("=" + 1));
00901                     d_max_age = parse_time(max_age.c_str());
00902                 }
00903                 else if (value == "max-stale")
00904                     d_max_stale = 0; // indicates will take anything;
00905                 else if (value.find("max-stale") != string::npos) {
00906                     string max_stale = value.substr(value.find("=" + 1));
00907                     d_max_stale = parse_time(max_stale.c_str());
00908                 }
00909                 else if (value.find("min-fresh") != string::npos) {
00910                     string min_fresh = value.substr(value.find("=" + 1));
00911                     d_min_fresh = parse_time(min_fresh.c_str());
00912                 }
00913             }
00914         }
00915     }
00916     catch (...) {
00917         unlock_cache_interface();
00918         throw;
00919     }
00920 
00921     unlock_cache_interface();
00922 }
00923 
00924 
00929 vector<string>
00930 HTTPCache::get_cache_control()
00931 {
00932     return d_cache_control;
00933 }
00934 
00936 
00945 bool
00946 HTTPCache::is_url_in_cache(const string &url)
00947 {
00948     DBG(cerr << "Is this url in the cache? (" << url << ")" << endl);
00949 
00950     HTTPCacheTable::CacheEntry *entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
00951     bool status = entry != 0;
00952     if (entry) {
00953         entry->unlock_read_response();
00954     }
00955     return  status;
00956 }
00957 
00963 bool
00964 is_hop_by_hop_header(const string &header)
00965 {
00966     return header.find("Connection") != string::npos
00967            || header.find("Keep-Alive") != string::npos
00968            || header.find("Proxy-Authenticate") != string::npos
00969            || header.find("Proxy-Authorization") != string::npos
00970            || header.find("Transfer-Encoding") != string::npos
00971            || header.find("Upgrade") != string::npos;
00972 }
00973 
00985 void
00986 HTTPCache::write_metadata(const string &cachename, const vector<string> &headers)
00987 {
00988     string fname = cachename + CACHE_META;
00989     d_open_files.push_back(fname);
00990 
00991     FILE *dest = fopen(fname.c_str(), "w");
00992     if (!dest) {
00993         throw InternalErr(__FILE__, __LINE__,
00994                           "Could not open named cache entry file.");
00995     }
00996 
00997     vector<string>::const_iterator i;
00998     for (i = headers.begin(); i != headers.end(); ++i) {
00999         if (!is_hop_by_hop_header(*i)) {
01000             int s = fwrite((*i).c_str(), (*i).size(), 1, dest);
01001             if (s != 1)
01002                 throw InternalErr(__FILE__, __LINE__, "could not write header: '" + (*i) + "' " + long_to_string(s));
01003             s = fwrite("\n", 1, 1, dest);
01004             if (s != 1)
01005                 throw InternalErr(__FILE__, __LINE__, "could not write header: " + long_to_string(s));
01006         }
01007     }
01008 
01009     int res = fclose(dest);
01010     if (res) {
01011         DBG(cerr << "HTTPCache::write_metadata - Failed to close "
01012             << dest << endl);
01013     }
01014 
01015     d_open_files.pop_back();
01016 }
01017 
01028 void
01029 HTTPCache::read_metadata(const string &cachename, vector<string> &headers)
01030 {
01031     FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r");
01032     if (!md) {
01033         throw InternalErr(__FILE__, __LINE__,
01034                           "Could not open named cache entry meta data file.");
01035     }
01036 
01037     char line[1024];
01038     while (!feof(md) && fgets(line, 1024, md)) {
01039         line[min(1024, static_cast<int>(strlen(line)))-1] = '\0'; // erase newline
01040         headers.push_back(string(line));
01041     }
01042 
01043     int res = fclose(md);
01044     if (res) {
01045         DBG(cerr << "HTTPCache::read_metadata - Failed to close "
01046             << md << endl);
01047     }
01048 }
01049 
01071 int
01072 HTTPCache::write_body(const string &cachename, const FILE *src)
01073 {
01074     d_open_files.push_back(cachename);
01075 
01076     FILE *dest = fopen(cachename.c_str(), "wb");
01077     if (!dest) {
01078         throw InternalErr(__FILE__, __LINE__,
01079                           "Could not open named cache entry file.");
01080     }
01081 
01082     // Read and write in 1k blocks; an attempt at doing this efficiently.
01083     // 09/30/02 jhrg
01084     char line[1024];
01085     size_t n;
01086     int total = 0;
01087     while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) {
01088         total += fwrite(line, 1, n, dest);
01089         DBG2(sleep(3));
01090     }
01091 
01092     if (ferror(const_cast<FILE *>(src)) || ferror(dest)) {
01093         int res = fclose(dest);
01094         res = res & unlink(cachename.c_str());
01095         if (res) {
01096             DBG(cerr << "HTTPCache::write_body - Failed to close/unlink "
01097                 << dest << endl);
01098         }
01099         throw InternalErr(__FILE__, __LINE__,
01100                           "I/O error transferring data to the cache.");
01101     }
01102 
01103     rewind(const_cast<FILE *>(src));
01104 
01105     int res = fclose(dest);
01106     if (res) {
01107         DBG(cerr << "HTTPCache::write_body - Failed to close "
01108             << dest << endl);
01109     }
01110 
01111     d_open_files.pop_back();
01112 
01113     return total;
01114 }
01115 
01124 FILE *
01125 HTTPCache::open_body(const string &cachename)
01126 {
01127     DBG(cerr << "cachename: " << cachename << endl);
01128 
01129     FILE *src = fopen(cachename.c_str(), "rb"); // Read only
01130     if (!src)
01131         throw InternalErr(__FILE__, __LINE__, "Could not open cache file.");
01132 
01133     return src;
01134 }
01135 
01161 bool
01162 HTTPCache::cache_response(const string &url, time_t request_time,
01163                           const vector<string> &headers, const FILE *body)
01164 {
01165     lock_cache_interface();
01166 
01167     DBG(cerr << "Caching url: " << url << "." << endl);
01168 
01169     try {
01170         // If this is not an http or https URL, don't cache.
01171         if (url.find("http:") == string::npos &&
01172             url.find("https:") == string::npos) {
01173             unlock_cache_interface();
01174             return false;
01175         }
01176 
01177         // This does nothing if url is not already in the cache. It's
01178         // more efficient to do this than to first check and see if the entry
01179         // exists. 10/10/02 jhrg
01180         d_http_cache_table->remove_entry_from_cache_table(url);
01181 
01182         HTTPCacheTable::CacheEntry *entry = new HTTPCacheTable::CacheEntry(url);
01183         entry->lock_write_response();
01184 
01185         try {
01186             d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); // etag, lm, date, age, expires, max_age.
01187             if (entry->is_no_cache()) {
01188                 DBG(cerr << "Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry
01189                     << "(" << url << ")" << endl);
01190                 entry->unlock_write_response();
01191                 delete entry; entry = 0;
01192                 unlock_cache_interface();
01193                 return false;
01194             }
01195 
01196             // corrected_initial_age, freshness_lifetime, response_time.
01197             d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
01198 
01199             d_http_cache_table->create_location(entry); // cachename, cache_body_fd
01200             // move these write function to cache table
01201             entry->set_size(write_body(entry->get_cachename(), body));
01202             write_metadata(entry->get_cachename(), headers);
01203             d_http_cache_table->add_entry_to_cache_table(entry);
01204             entry->unlock_write_response();
01205         }
01206         catch (ResponseTooBigErr &e) {
01207             // Oops. Bummer. Clean up and exit.
01208             DBG(cerr << e.get_error_message() << endl);
01209             REMOVE(entry->get_cachename().c_str());
01210             REMOVE(string(entry->get_cachename() + CACHE_META).c_str());
01211             DBG(cerr << "Too big; deleting HTTPCacheTable::CacheEntry: " << entry << "(" << url
01212                 << ")" << endl);
01213             entry->unlock_write_response();
01214             delete entry; entry = 0;
01215             unlock_cache_interface();
01216             return false;
01217         }
01218 
01219         if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) {
01220             if (startGC())
01221                 perform_garbage_collection();
01222 
01223             d_http_cache_table->cache_index_write(); // resets new_entries
01224         }
01225     }
01226     catch (...) {
01227         unlock_cache_interface();
01228         throw;
01229     }
01230 
01231     unlock_cache_interface();
01232 
01233     return true;
01234 }
01235 
01254 vector<string>
01255 HTTPCache::get_conditional_request_headers(const string &url)
01256 {
01257     lock_cache_interface();
01258 
01259     HTTPCacheTable::CacheEntry *entry = 0;
01260     vector<string> headers;
01261 
01262     DBG(cerr << "Getting conditional request headers for " << url << endl);
01263 
01264     try {
01265         entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
01266         if (!entry)
01267             throw Error("There is no cache entry for the URL: " + url);
01268 
01269         if (entry->get_etag() != "")
01270             headers.push_back(string("If-None-Match: ") + entry->get_etag());
01271 
01272         if (entry->get_lm() > 0) {
01273                 time_t lm = entry->get_lm();
01274             headers.push_back(string("If-Modified-Since: ")
01275                               + date_time_str(&lm));
01276         }
01277         else if (entry->get_max_age() > 0) {
01278                 time_t max_age = entry->get_max_age();
01279             headers.push_back(string("If-Modified-Since: ")
01280                               + date_time_str(&max_age));
01281         }
01282         else if (entry->get_expires() > 0) {
01283                 time_t expires = entry->get_expires();
01284             headers.push_back(string("If-Modified-Since: ")
01285                               + date_time_str(&expires));
01286         }
01287         entry->unlock_read_response();
01288         unlock_cache_interface();
01289     }
01290     catch (...) {
01291         unlock_cache_interface();
01292         if (entry) {
01293             entry->unlock_read_response();
01294         }
01295         throw;
01296     }
01297 
01298     return headers;
01299 }
01300 
01304 struct HeaderLess: binary_function<const string&, const string&, bool>
01305 {
01306     bool operator()(const string &s1, const string &s2) const {
01307         return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':'));
01308     }
01309 };
01310 
01324 void
01325 HTTPCache::update_response(const string &url, time_t request_time,
01326                            const vector<string> &headers)
01327 {
01328     lock_cache_interface();
01329 
01330     HTTPCacheTable::CacheEntry *entry = 0;
01331     DBG(cerr << "Updating the response headers for: " << url << endl);
01332 
01333     try {
01334         entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url);
01335         if (!entry)
01336             throw Error("There is no cache entry for the URL: " + url);
01337 
01338         // Merge the new headers with the exiting HTTPCacheTable::CacheEntry object.
01339         d_http_cache_table->parse_headers(entry, d_max_entry_size, headers);
01340 
01341         // Update corrected_initial_age, freshness_lifetime, response_time.
01342         d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
01343 
01344         // Merge the new headers with those in the persistent store. How:
01345         // Load the new headers into a set, then merge the old headers. Since
01346         // set<> ignores duplicates, old headers with the same name as a new
01347         // header will got into the bit bucket. Define a special compare
01348         // functor to make sure that headers are compared using only their
01349         // name and not their value too.
01350         set<string, HeaderLess> merged_headers;
01351 
01352         // Load in the new headers
01353         copy(headers.begin(), headers.end(),
01354              inserter(merged_headers, merged_headers.begin()));
01355 
01356         // Get the old headers and load them in.
01357         vector<string> old_headers;
01358         read_metadata(entry->get_cachename(), old_headers);
01359         copy(old_headers.begin(), old_headers.end(),
01360              inserter(merged_headers, merged_headers.begin()));
01361 
01362         // Read the values back out. Use reverse iterators with back_inserter
01363         // to preserve header order. NB: vector<> does not support push_front
01364         // so we can't use front_inserter(). 01/09/03 jhrg
01365         vector<string> result;
01366         copy(merged_headers.rbegin(), merged_headers.rend(),
01367              back_inserter(result));
01368 
01369         write_metadata(entry->get_cachename(), result);
01370         entry->unlock_write_response();
01371         unlock_cache_interface();
01372     }
01373     catch (...) {
01374         if (entry) {
01375             entry->unlock_read_response();
01376         }
01377         unlock_cache_interface();
01378         throw;
01379     }
01380 }
01381 
01393 bool
01394 HTTPCache::is_url_valid(const string &url)
01395 {
01396     lock_cache_interface();
01397 
01398     bool freshness;
01399     HTTPCacheTable::CacheEntry *entry = 0;
01400 
01401     DBG(cerr << "Is this URL valid? (" << url << ")" << endl);
01402 
01403     try {
01404         if (d_always_validate) {
01405             unlock_cache_interface();
01406             return false;  // force re-validation.
01407         }
01408 
01409         entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
01410         if (!entry)
01411             throw Error("There is no cache entry for the URL: " + url);
01412 
01413         // If we supported range requests, we'd need code here to check if
01414         // there was only a partial response in the cache. 10/02/02 jhrg
01415 
01416         // In case this entry is of type "must-revalidate" then we consider it
01417         // invalid.
01418         if (entry->get_must_revalidate()) {
01419             entry->unlock_read_response();
01420             unlock_cache_interface();
01421             return false;
01422         }
01423 
01424         time_t resident_time = time(NULL) - entry->get_response_time();
01425         time_t current_age = entry->get_corrected_initial_age() + resident_time;
01426 
01427         // Check that the max-age, max-stale, and min-fresh directives
01428         // given in the request cache control header is followed.
01429         if (d_max_age >= 0 && current_age > d_max_age) {
01430             DBG(cerr << "Cache....... Max-age validation" << endl);
01431             entry->unlock_read_response();
01432             unlock_cache_interface();
01433             return false;
01434         }
01435         if (d_min_fresh >= 0
01436             && entry->get_freshness_lifetime() < current_age + d_min_fresh) {
01437             DBG(cerr << "Cache....... Min-fresh validation" << endl);
01438             entry->unlock_read_response();
01439             unlock_cache_interface();
01440             return false;
01441         }
01442 
01443         freshness = (entry->get_freshness_lifetime()
01444                      + (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
01445         entry->unlock_read_response();
01446         unlock_cache_interface();
01447     }
01448     catch (...) {
01449         if (entry) {
01450             entry->unlock_read_response();
01451         }
01452         unlock_cache_interface();
01453         throw;
01454     }
01455 
01456     return freshness;
01457 }
01458 
01486 FILE * HTTPCache::get_cached_response(const string &url,
01487                 vector<string> &headers, string &cacheName) {
01488     lock_cache_interface();
01489 
01490     FILE *body;
01491     HTTPCacheTable::CacheEntry *entry = 0;
01492 
01493     DBG(cerr << "Getting the cached response for " << url << endl);
01494 
01495     try {
01496         entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
01497         if (!entry) {
01498                 unlock_cache_interface();
01499                 return 0;
01500         }
01501 
01502         cacheName = entry->get_cachename();
01503         read_metadata(entry->get_cachename(), headers);
01504 
01505         DBG(cerr << "Headers just read from cache: " << endl);
01506         DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n")));
01507 
01508         body = open_body(entry->get_cachename());
01509 
01510         DBG(cerr << "Returning: " << url << " from the cache." << endl);
01511 
01512         d_http_cache_table->bind_entry_to_data(entry, body);
01513     }
01514     catch (...) {
01515         // Why make this unlock operation conditional on entry?
01516         if (entry)
01517                 unlock_cache_interface();
01518         fclose(body);
01519         throw;
01520     }
01521 
01522     unlock_cache_interface();
01523 
01524     return body;
01525 }
01526 
01538 FILE *
01539 HTTPCache::get_cached_response(const string &url, vector<string> &headers)
01540 {
01541         string discard_name;
01542         return get_cached_response(url, headers, discard_name);
01543 }
01544 
01555 FILE *
01556 HTTPCache::get_cached_response(const string &url)
01557 {
01558         string discard_name;
01559         vector<string> discard_headers;
01560         return get_cached_response(url, discard_headers, discard_name);
01561 }
01562 
01575 void
01576 HTTPCache::release_cached_response(FILE *body)
01577 {
01578     lock_cache_interface();
01579 
01580     try {
01581         d_http_cache_table->uncouple_entry_from_data(body);
01582     }
01583     catch (...) {
01584         unlock_cache_interface();
01585         throw;
01586     }
01587 
01588     unlock_cache_interface();
01589 }
01590 
01603 void
01604 HTTPCache::purge_cache()
01605 {
01606     lock_cache_interface();
01607 
01608     try {
01609         if (d_http_cache_table->is_locked_read_responses())
01610             throw Error("Attempt to purge the cache with entries in use.");
01611 
01612         d_http_cache_table->delete_all_entries();
01613     }
01614     catch (...) {
01615         unlock_cache_interface();
01616         throw;
01617     }
01618 
01619     unlock_cache_interface();
01620 }
01621 
01622 } // namespace libdap