libdap++ Updated for version 3.8.2
HTTPConnect.cc
Go to the documentation of this file.
00001 
00002 // -*- mode: c++; c-basic-offset:4 -*-
00003 
00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00005 // Access Protocol.
00006 
00007 // Copyright (c) 2002,2003 OPeNDAP, Inc.
00008 // Author: James Gallagher <jgallagher@opendap.org>
00009 //
00010 // This library is free software; you can redistribute it and/or
00011 // modify it under the terms of the GNU Lesser General Public
00012 // License as published by the Free Software Foundation; either
00013 // version 2.1 of the License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023 //
00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00025 
00026 
00027 #include "config.h"
00028 
00029 static char rcsid[] not_used =
00030     { "$Id: HTTPConnect.cc 24380 2011-03-28 21:47:15Z jimg $"
00031     };
00032 
00033 #ifdef HAVE_UNISTD_H
00034 #include <unistd.h>
00035 #endif
00036 
00037 #include <sys/stat.h>
00038 
00039 #ifdef WIN32
00040 #include <io.h>
00041 #endif
00042 
00043 #include <string>
00044 #include <vector>
00045 #include <functional>
00046 #include <algorithm>
00047 #include <sstream>
00048 #include <iterator>
00049 #include <cstdlib>
00050 #include <cstring>
00051 
00052 // #define DODS_DEBUG
00053 //#define DODS_DEBUG2
00054 //#define HTTP_TRACE
00055 //#define DODS_DEBUG
00056 
00057 #undef USE_GETENV
00058 
00059 
00060 #include "debug.h"
00061 #include "mime_util.h"
00062 #include "GNURegex.h"
00063 #include "HTTPCache.h"
00064 #include "HTTPConnect.h"
00065 #include "RCReader.h"
00066 #include "HTTPResponse.h"
00067 #include "HTTPCacheResponse.h"
00068 
00069 using namespace std;
00070 
00071 namespace libdap {
00072 
00073 // These global variables are not MT-Safe, but I'm leaving them as is because
00074 // they are used only for debugging (set them in a debugger like gdb or ddd).
00075 // They are not static because I think that many debuggers cannot access
00076 // static variables. 08/07/02 jhrg
00077 
00078 // Set this to 1 to turn on libcurl's verbose mode (for debugging).
00079 int www_trace = 0;
00080 
00081 // Keep the temporary files; useful for debugging.
00082 int dods_keep_temps = 0;
00083 
00084 #define CLIENT_ERR_MIN 400
00085 #define CLIENT_ERR_MAX 417
00086 static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
00087     {
00088         "Bad Request:",
00089         "Unauthorized: Contact the server administrator.",
00090         "Payment Required.",
00091         "Forbidden: Contact the server administrator.",
00092         "Not Found: The data source or server could not be found.\n\
00093         Often this means that the OPeNDAP server is missing or needs attention;\n\
00094         Please contact the server administrator.",
00095         "Method Not Allowed.",
00096         "Not Acceptable.",
00097         "Proxy Authentication Required.",
00098         "Request Time-out.",
00099         "Conflict.",
00100         "Gone:.",
00101         "Length Required.",
00102         "Precondition Failed.",
00103         "Request Entity Too Large.",
00104         "Request URI Too Large.",
00105         "Unsupported Media Type.",
00106         "Requested Range Not Satisfiable.",
00107         "Expectation Failed."
00108     };
00109 
00110 #define SERVER_ERR_MIN 500
00111 #define SERVER_ERR_MAX 505
00112 static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] =
00113     {
00114         "Internal Server Error.",
00115         "Not Implemented.",
00116         "Bad Gateway.",
00117         "Service Unavailable.",
00118         "Gateway Time-out.",
00119         "HTTP Version Not Supported."
00120     };
00121 
00124 static string
00125 http_status_to_string(int status)
00126 {
00127     if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
00128         return string(http_client_errors[status - CLIENT_ERR_MIN]);
00129     else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
00130         return string(http_server_errors[status - SERVER_ERR_MIN]);
00131     else
00132         return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
00133 }
00134 
00139 class ParseHeader : public unary_function<const string &, void>
00140 {
00141     ObjectType type;  // What type of object is in the stream?
00142     string server;  // Server's version string.
00143     string protocol;            // Server's protocol version.
00144     string location;            // Url returned by server
00145 
00146 public:
00147     ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0")
00148     { }
00149 
00150     void operator()(const string &line)
00151     {
00152         string name, value;
00153         parse_mime_header(line, name, value);
00154         if (name == "content-description") {
00155             DBG2(cerr << name << ": " << value << endl);
00156             type = get_description_type(value);
00157         }
00158         // The second test (== "dods/0.0") tests if xopendap-server has already
00159         // been seen. If so, use that header in preference to the old
00160         // XDODS-Server header. jhrg 2/7/06
00161         else if (name == "xdods-server" && server == "dods/0.0") {
00162             DBG2(cerr << name << ": " << value << endl);
00163             server = value;
00164         }
00165         else if (name == "xopendap-server") {
00166             DBG2(cerr << name << ": " << value << endl);
00167             server = value;
00168         }
00169         else if (name == "xdap") {
00170             DBG2(cerr << name << ": " << value << endl);
00171             protocol = value;
00172         }
00173         else if (server == "dods/0.0" && name == "server") {
00174             DBG2(cerr << name << ": " << value << endl);
00175             server = value;
00176         }
00177         else if (name == "location") {
00178             DBG2(cerr << name << ": " << value << endl);
00179             location = value;
00180         }
00181         else if (type == unknown_type && name == "content-type"
00182                  && line.find("text/html") != string::npos) {
00183             DBG2(cerr << name << ": text/html..." << endl);
00184             type = web_error;
00185         }
00186     }
00187 
00188     ObjectType get_object_type()
00189     {
00190         return type;
00191     }
00192 
00193     string get_server()
00194     {
00195         return server;
00196     }
00197 
00198     string get_protocol()
00199     {
00200         return protocol;
00201     }
00202 
00203     string get_location() {
00204            return location;
00205     }
00206 };
00207 
00224 static size_t
00225 save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
00226 {
00227     DBG2(cerr << "Inside the header parser." << endl);
00228     vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
00229 
00230     // Grab the header, minus the trailing newline. Or \r\n pair.
00231     string complete_line;
00232     if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
00233         complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
00234     else
00235         complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
00236 
00237     // Store all non-empty headers that are not HTTP status codes
00238     if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
00239         DBG(cerr << "Header line: " << complete_line << endl);
00240         hdrs->push_back(complete_line);
00241     }
00242 
00243     return size * nmemb;
00244 }
00245 
00247 static int
00248 curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void  *)
00249 {
00250     string message(msg, size);
00251 
00252     switch (info) {
00253     case CURLINFO_TEXT:
00254         cerr << "Text: " << message; break;
00255     case CURLINFO_HEADER_IN:
00256         cerr << "Header in: " << message; break;
00257     case CURLINFO_HEADER_OUT:
00258         cerr << "Header out: " << message; break;
00259     case CURLINFO_DATA_IN:
00260         cerr << "Data in: " << message; break;
00261     case CURLINFO_DATA_OUT:
00262         cerr << "Data out: " << message; break;
00263     case CURLINFO_END:
00264         cerr << "End: " << message; break;
00265 #ifdef CURLINFO_SSL_DATA_IN
00266     case CURLINFO_SSL_DATA_IN:
00267         cerr << "SSL Data in: " << message; break;
00268 #endif
00269 #ifdef CURLINFO_SSL_DATA_OUT
00270     case CURLINFO_SSL_DATA_OUT:
00271         cerr << "SSL Data out: " << message; break;
00272 #endif
00273     default:
00274         cerr << "Curl info: " << message; break;
00275     }
00276     return 0;
00277 }
00278 
00282 void
00283 HTTPConnect::www_lib_init()
00284 {
00285     d_curl = curl_easy_init();
00286     if (!d_curl)
00287         throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
00288 
00289     // Now set options that will remain constant for the duration of this
00290     // CURL object.
00291 
00292     // Set the proxy host.
00293     if (!d_rcr->get_proxy_server_host().empty()) {
00294         DBG(cerr << "Setting up a proxy server." << endl);
00295         DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host()
00296             << endl);
00297         DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port()
00298             << endl);
00299         DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw()
00300             << endl);
00301         curl_easy_setopt(d_curl, CURLOPT_PROXY,
00302                          d_rcr->get_proxy_server_host().c_str());
00303         curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
00304                          d_rcr->get_proxy_server_port());
00305 
00306         // As of 4/21/08 only NTLM, Digest and Basic work.
00307 #ifdef CURLOPT_PROXYAUTH
00308         curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY);
00309 #endif
00310 
00311         // Password might not be required. 06/21/04 jhrg
00312         if (!d_rcr->get_proxy_server_userpw().empty())
00313             curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
00314                              d_rcr->get_proxy_server_userpw().c_str());
00315     }
00316 
00317     curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
00318     // We have to set FailOnError to false for any of the non-Basic
00319     // authentication schemes to work. 07/28/03 jhrg
00320     curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
00321 
00322     // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
00323     // choosing the the 'safest' one supported by the server.
00324     // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
00325     curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
00326 
00327     curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
00328     curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
00329     curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
00330     // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
00331     // param of save_raw_http_headers to a vector<string> object.
00332 
00333     // Follow 302 (redirect) responses
00334     curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
00335     curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
00336 
00337     // If the user turns off SSL validation...
00338     if (!d_rcr->get_validate_ssl() == 0) {
00339         curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
00340         curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
00341     }
00342 
00343     // Look to see if cookies are turned on in the .dodsrc file. If so,
00344     // activate here. We honor 'session cookies' (cookies without an
00345     // expiration date) here so that session-base SSO systems will work as
00346     // expected.
00347     if (!d_cookie_jar.empty()) {
00348         DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
00349         curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
00350         curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
00351     }
00352 
00353     if (www_trace) {
00354         cerr << "Curl version: " << curl_version() << endl;
00355         curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
00356         curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
00357     }
00358 }
00359 
00363 class BuildHeaders : public unary_function<const string &, void>
00364 {
00365     struct curl_slist *d_cl;
00366 
00367 public:
00368     BuildHeaders() : d_cl(0)
00369     {}
00370 
00371     void operator()(const string &header)
00372     {
00373         DBG(cerr << "Adding '" << header.c_str() << "' to the header list."
00374             << endl);
00375         d_cl = curl_slist_append(d_cl, header.c_str());
00376     }
00377 
00378     struct curl_slist *get_headers()
00379     {
00380         return d_cl;
00381     }
00382 };
00383 
00398 long
00399 HTTPConnect::read_url(const string &url, FILE *stream,
00400                       vector<string> *resp_hdrs,
00401                       const vector<string> *headers)
00402 {
00403     curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
00404 
00405 #ifdef WIN32
00406     //  See the curl documentation for CURLOPT_FILE (aka CURLOPT_WRITEDATA)
00407     //  and the CURLOPT_WRITEFUNCTION option.  Quote: "If you are using libcurl as
00408     //  a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the
00409     //  CURLOPT_WRITEDATA option or you will experience crashes".  At the root of
00410     //  this issue is that one should not pass a FILE * to a windows DLL.  Close
00411     //  inspection of libcurl yields that their default write function when using
00412     //  the CURLOPT_WRITEDATA is just "fwrite".
00413     curl_easy_setopt(d_curl, CURLOPT_FILE, stream);
00414     curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
00415 #else
00416     curl_easy_setopt(d_curl, CURLOPT_FILE, stream);
00417 #endif
00418 
00419     DBG(copy(d_request_headers.begin(), d_request_headers.end(),
00420              ostream_iterator<string>(cerr, "\n")));
00421 
00422     BuildHeaders req_hdrs;
00423     req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
00424                         req_hdrs);
00425     if (headers)
00426         req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
00427     curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
00428 
00429     // Turn off the proxy for this URL?
00430     bool temporary_proxy = false;
00431     if ((temporary_proxy = url_uses_no_proxy_for(url))) {
00432         DBG(cerr << "Suppress proxy for url: " << url << endl);
00433         curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
00434     }
00435 
00436     string::size_type at_sign = url.find('@');
00437     // Assume username:password present *and* assume it's an HTTP URL; it *is*
00438     // HTTPConnect, after all. 7 is position after "http://"; the second arg
00439     // to substr() is the sub string length.
00440     if (at_sign != url.npos)
00441         d_upstring = url.substr(7, at_sign - 7);
00442 
00443     if (!d_upstring.empty())
00444         curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
00445 
00446     // Pass save_raw_http_headers() a pointer to the vector<string> where the
00447     // response headers may be stored. Callers can use the resp_hdrs
00448     // value/result parameter to get the raw response header information .
00449     curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
00450 
00451     CURLcode res = curl_easy_perform(d_curl);
00452 
00453     // Free the header list and null the value in d_curl.
00454     curl_slist_free_all(req_hdrs.get_headers());
00455     curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
00456 
00457     // Reset the proxy?
00458     if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
00459         curl_easy_setopt(d_curl, CURLOPT_PROXY,
00460                          d_rcr->get_proxy_server_host().c_str());
00461 
00462     if (res != 0)
00463         throw Error(d_error_buffer);
00464 
00465     long status;
00466     res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
00467     if (res != 0)
00468         throw Error(d_error_buffer);
00469 
00470     return status;
00471 }
00472 
00476 bool
00477 HTTPConnect::url_uses_proxy_for(const string &url) throw()
00478 {
00479     if (d_rcr->is_proxy_for_used()) {
00480         Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
00481         int index = 0, matchlen;
00482         return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1;
00483     }
00484 
00485     return false;
00486 }
00487 
00491 bool
00492 HTTPConnect::url_uses_no_proxy_for(const string &url) throw()
00493 {
00494     return d_rcr->is_no_proxy_for_used()
00495            && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
00496 }
00497 
00498 // Public methods. Mostly...
00499 
00506 HTTPConnect::HTTPConnect(RCReader *rcr) : d_username(""), d_password(""),
00507                                           d_cookie_jar(""),
00508                                           d_dap_client_protocol_major(2),
00509                                           d_dap_client_protocol_minor(0)
00510 
00511 {
00512     d_accept_deflate = rcr->get_deflate();
00513     d_rcr = rcr;
00514 
00515     // Load in the default headers to send with a request. The empty Pragma
00516     // headers overrides libcurl's default Pragma: no-cache header (which
00517     // will disable caching by Squid, et c.). The User-Agent header helps
00518     // make server logs more readable. 05/05/03 jhrg
00519     d_request_headers.push_back(string("Pragma:"));
00520     string user_agent = string("User-Agent: ") + string(CNAME)
00521                         + string("/") + string(CVER);
00522     d_request_headers.push_back(user_agent);
00523     if (d_accept_deflate)
00524         d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
00525 
00526     // HTTPCache::instance returns a valid ptr or 0.
00527     if (d_rcr->get_use_cache())
00528         d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),
00529                                            true);
00530     else
00531         d_http_cache = 0;
00532 
00533     DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec
00534          << ")" << endl);
00535 
00536     if (d_http_cache) {
00537         d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
00538         d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
00539         d_http_cache->set_max_size(d_rcr->get_max_cache_size());
00540         d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
00541         d_http_cache->set_default_expiration(d_rcr->get_default_expires());
00542         d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
00543     }
00544 
00545     d_cookie_jar = rcr->get_cookie_jar();
00546 
00547     www_lib_init();  // This may throw either Error or InternalErr
00548 }
00549 
00550 HTTPConnect::~HTTPConnect()
00551 {
00552     DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
00553 
00554     curl_easy_cleanup(d_curl);
00555 
00556     DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
00557 }
00558 
00571 HTTPResponse *
00572 HTTPConnect::fetch_url(const string &url)
00573 {
00574 #ifdef HTTP_TRACE
00575     cout << "GET " << url << " HTTP/1.0" << endl;
00576 #endif
00577 
00578     HTTPResponse *stream;
00579 
00580     if (d_http_cache && d_http_cache->is_cache_enabled()) {
00581         stream = caching_fetch_url(url);
00582     }
00583     else {
00584         stream = plain_fetch_url(url);
00585     }
00586 
00587 #ifdef HTTP_TRACE
00588     stringstream ss;
00589     ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
00590     for (size_t i = 0; i < stream->get_headers()->size(); i++) {
00591         ss << stream->get_headers()->at(i) << endl;
00592     }
00593     cout << ss.str();
00594 #endif
00595 
00596     ParseHeader parser;
00597 
00598     parser = for_each(stream->get_headers()->begin(),
00599                       stream->get_headers()->end(), ParseHeader());
00600 
00601 #ifdef HTTP_TRACE
00602     cout << endl << endl;
00603 #endif
00604 
00605     // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu)
00606     if (parser.get_location() != "" &&
00607             url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) {
00608         delete stream;
00609         return fetch_url(parser.get_location());
00610     }
00611 
00612     stream->set_type(parser.get_object_type());
00613     stream->set_version(parser.get_server());
00614     stream->set_protocol(parser.get_protocol());
00615 
00616     return stream;
00617 }
00618 
00619 // Look around for a reasonable place to put a temporary file. Check first
00620 // the value of the TMPDIR env var. If that does not yeild a path that's
00621 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
00622 // defined in stdio.h. If both come up empty, then use `./'.
00623 
00624 // Change this to a version that either returns a string or an open file
00625 // descriptor. Use information from https://buildsecurityin.us-cert.gov/
00626 // (see open()) to make it more secure. Ideal solution: get deserialize()
00627 // methods to read from a stream returned by libcurl, not from a temporary
00628 // file. 9/21/07 jhrg Updated to use strings, so other misc changes. 3/22/11
00629 static string
00630 get_tempfile_template(const string &file_template)
00631 {
00632     string c;
00633 
00634     // Windows has one idea of the standard name(s) for a temporary files dir
00635 #ifdef WIN32
00636     // white list for a WIN32 directory
00637     Regex directory("[-a-zA-Z0-9_:\\]*");
00638 
00639     // If we're OK to use getenv(), try it.
00640 #ifdef USE_GETENV
00641     c = getenv("TEMP");
00642     if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
00643         goto valid_temp_directory;
00644 
00645     c= getenv("TMP");
00646     if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
00647         goto valid_temp_directory;
00648 #endif // USE_GETENV
00649 
00650     // The windows default
00651     c = "c:\tmp";
00652     if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
00653         goto valid_temp_directory;
00654 
00655 #else   // Unix/Linux/OSX has another...
00656     // white list for a directory
00657     Regex directory("[-a-zA-Z0-9_/]*");
00658 #ifdef USE_GETENV
00659     c = getenv("TMPDIR");
00660     if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
00661         goto valid_temp_directory;
00662 #endif // USE_GETENV
00663 
00664     // Unix defines this sometimes - if present, use it.
00665 #ifdef P_tmpdir
00666     if (access(P_tmpdir, W_OK | R_OK) == 0) {
00667         c = P_tmpdir;
00668         goto valid_temp_directory;
00669     }
00670 #endif
00671 
00672     // The Unix default
00673     c = "/tmp";
00674     if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
00675         goto valid_temp_directory;
00676 
00677 #endif  // WIN32
00678 
00679     // If we found nothing useful, use the current directory
00680     c = ".";
00681 
00682 valid_temp_directory:
00683 
00684 #ifdef WIN32
00685     c += "\\" + file_template;
00686 #else
00687     c += "/" + file_template;
00688 #endif
00689 
00690     return c;
00691 }
00692 
00711 string
00712 get_temp_file(FILE *&stream) throw(InternalErr)
00713 {
00714     string dods_temp = get_tempfile_template((string)"dodsXXXXXX");
00715 
00716     vector<char> pathname(dods_temp.length() + 1);
00717 
00718     strncpy(&pathname[0], dods_temp.c_str(), dods_temp.length());
00719 
00720     DBG(cerr << "pathanme: " << &pathname[0] << " (" << dods_temp.length() + 1 << ")" << endl);
00721 
00722     // Open truncated for update. NB: mkstemp() returns a file descriptor.
00723 #if defined(WIN32) || defined(TEST_WIN32_TEMPS)
00724     stream = fopen(_mktemp(&pathname[0]), "w+b");
00725 #else
00726     // Make sure that temp files are accessible only by the owner.
00727     umask(077);
00728     stream = fdopen(mkstemp(&pathname[0]), "w+");
00729 #endif
00730 
00731     if (!stream) {
00732         throw InternalErr(__FILE__, __LINE__,
00733                 "Failed to open a temporary file for the data values ("
00734                 + dods_temp + ")");
00735     }
00736 
00737     dods_temp = &pathname[0];
00738     return dods_temp;
00739 }
00740 
00742 void
00743 close_temp(FILE *s, const string &name)
00744 {
00745     int res = fclose(s);
00746     if (res)
00747         throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
00748 
00749     res = unlink(name.c_str());
00750     if (res != 0)
00751         throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
00752 }
00753 
00775 HTTPResponse *
00776 HTTPConnect::caching_fetch_url(const string &url)
00777 {
00778     DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
00779 
00780     vector<string> *headers = new vector<string>;
00781     string file_name;
00782     FILE *s = d_http_cache->get_cached_response(url, *headers, file_name);
00783     if (!s) {
00784         // url not in cache; get it and cache it
00785         DBGN(cerr << "no; getting response and caching." << endl);
00786         delete headers; headers = 0;
00787         time_t now = time(0);
00788         HTTPResponse *rs = plain_fetch_url(url);
00789         d_http_cache->cache_response(url, now, *(rs->get_headers()), rs->get_stream());
00790 
00791         return rs;
00792     }
00793     else { // url in cache
00794         DBGN(cerr << "yes... ");
00795 
00796         if (d_http_cache->is_url_valid(url)) { // url in cache and valid
00797             DBGN(cerr << "and it's valid; using cached response." << endl);
00798             HTTPCacheResponse *crs = new HTTPCacheResponse(s, 200, headers, file_name, d_http_cache);
00799             return crs;
00800         }
00801         else { // url in cache but not valid; validate
00802             DBGN(cerr << "but it's not valid; validating... ");
00803 
00804             d_http_cache->release_cached_response(s); // This closes 's'
00805             headers->clear();
00806             vector<string> cond_hdrs = d_http_cache->get_conditional_request_headers(url);
00807             FILE *body = 0;
00808             string dods_temp = get_temp_file(body);
00809             time_t now = time(0); // When was the request made (now).
00810             long http_status;
00811 
00812             try {
00813                 http_status = read_url(url, body, /*resp_hdrs*/headers, &cond_hdrs);
00814                 rewind(body);
00815             }
00816             catch (Error &e) {
00817                 close_temp(body, dods_temp);
00818                 delete headers;
00819                 throw ;
00820             }
00821 
00822             switch (http_status) {
00823                 case 200: { // New headers and new body
00824                     DBGN(cerr << "read a new response; caching." << endl);
00825 
00826                     d_http_cache->cache_response(url, now, /* *resp_hdrs*/*headers, body);
00827                     HTTPResponse *rs = new HTTPResponse(body, http_status, /*resp_hdrs*/headers, dods_temp);
00828 
00829                     return rs;
00830                 }
00831 
00832                 case 304: { // Just new headers, use cached body
00833                     DBGN(cerr << "cached response valid; updating." << endl);
00834 
00835                     close_temp(body, dods_temp);
00836                     d_http_cache->update_response(url, now, /* *resp_hdrs*/ *headers);
00837                     string file_name;
00838                     FILE *hs = d_http_cache->get_cached_response(url, *headers, file_name);
00839                     HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, file_name, d_http_cache);
00840                     return crs;
00841                 }
00842 
00843                 default: { // Oops.
00844                     close_temp(body, dods_temp);
00845                     if (http_status >= 400) {
00846                         delete headers; headers = 0;
00847                         string msg = "Error while reading the URL: ";
00848                         msg += url;
00849                         msg
00850                         += ".\nThe OPeNDAP server returned the following message:\n";
00851                         msg += http_status_to_string(http_status);
00852                         throw Error(msg);
00853                     }
00854                     else {
00855                         delete headers; headers = 0;
00856                         throw InternalErr(__FILE__, __LINE__,
00857                                 "Bad response from the HTTP server: " + long_to_string(http_status));
00858                     }
00859                 }
00860             }
00861         }
00862     }
00863 
00864     throw InternalErr(__FILE__, __LINE__, "Should never get here");
00865 }
00866 
00878 HTTPResponse *
00879 HTTPConnect::plain_fetch_url(const string &url)
00880 {
00881     DBG(cerr << "Getting URL: " << url << endl);
00882     FILE *stream = 0;
00883     string dods_temp = get_temp_file(stream);
00884     vector<string> *resp_hdrs = new vector<string>;
00885 
00886     int status = -1;
00887     try {
00888         status = read_url(url, stream, resp_hdrs); // Throws Error.
00889         if (status >= 400) {
00890                 delete resp_hdrs;
00891             string msg = "Error while reading the URL: ";
00892             msg += url;
00893             msg += ".\nThe OPeNDAP server returned the following message:\n";
00894             msg += http_status_to_string(status);
00895             throw Error(msg);
00896         }
00897     }
00898 
00899     catch (Error &e) {
00900         delete resp_hdrs;
00901         close_temp(stream, dods_temp);
00902         throw;
00903     }
00904 
00905     rewind(stream);
00906 
00907     return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
00908 }
00909 
00921 void
00922 HTTPConnect::set_accept_deflate(bool deflate)
00923 {
00924     d_accept_deflate = deflate;
00925 
00926     if (d_accept_deflate) {
00927         if (find(d_request_headers.begin(), d_request_headers.end(),
00928                  "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
00929             d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
00930         DBG(copy(d_request_headers.begin(), d_request_headers.end(),
00931                  ostream_iterator<string>(cerr, "\n")));
00932     }
00933     else {
00934         vector<string>::iterator i;
00935         i = remove_if(d_request_headers.begin(), d_request_headers.end(),
00936                       bind2nd(equal_to<string>(),
00937                               string("Accept-Encoding: deflate, gzip, compress")));
00938         d_request_headers.erase(i, d_request_headers.end());
00939     }
00940 }
00941 
00943 class HeaderMatch : public unary_function<const string &, bool> {
00944     const string &d_header;
00945     public:
00946         HeaderMatch(const string &header) : d_header(header) {}
00947         bool operator()(const string &arg) { return arg.find(d_header) == 0; }
00948 };
00949 
00958 void
00959 HTTPConnect::set_xdap_protocol(int major, int minor)
00960 {
00961     // Look for, and remove if one exists, an XDAP-Accept header
00962     vector<string>::iterator i;
00963     i = find_if(d_request_headers.begin(), d_request_headers.end(),
00964                 HeaderMatch("XDAP-Accept:"));
00965     if (i != d_request_headers.end())
00966         d_request_headers.erase(i);
00967 
00968     // Record and add the new header value
00969     d_dap_client_protocol_major = major;
00970     d_dap_client_protocol_minor = minor;
00971     ostringstream xdap_accept;
00972     xdap_accept << "XDAP-Accept: " << major << "." << minor;
00973 
00974     d_request_headers.push_back(xdap_accept.str());
00975 
00976     DBG(copy(d_request_headers.begin(), d_request_headers.end(),
00977              ostream_iterator<string>(cerr, "\n")));
00978 }
00979 
00995 void
00996 HTTPConnect::set_credentials(const string &u, const string &p)
00997 {
00998     if (u.empty())
00999         return;
01000 
01001     // Store the credentials locally.
01002     d_username = u;
01003     d_password = p;
01004 
01005     d_upstring = u + ":" + p;
01006 }
01007 
01008 } // namespace libdap