Osmium  0.1
include/osmium/osmfile.hpp
Go to the documentation of this file.
00001 #ifndef OSMIUM_OSMFILE_HPP
00002 #define OSMIUM_OSMFILE_HPP
00003 
00004 /*
00005 
00006 Copyright 2011 Jochen Topf <jochen@topf.org> and others (see README).
00007 
00008 This file is part of Osmium (https://github.com/joto/osmium).
00009 
00010 Osmium is free software: you can redistribute it and/or modify it under the
00011 terms of the GNU Lesser General Public License or (at your option) the GNU
00012 General Public License as published by the Free Software Foundation, either
00013 version 3 of the Licenses, or (at your option) any later version.
00014 
00015 Osmium is distributed in the hope that it will be useful, but WITHOUT ANY
00016 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
00017 PARTICULAR PURPOSE. See the GNU Lesser General Public License and the GNU
00018 General Public License for more details.
00019 
00020 You should have received a copy of the Licenses along with Osmium. If not, see
00021 <http://www.gnu.org/licenses/>.
00022 
00023 */
00024 
00025 #include <unistd.h>
00026 #include <sys/types.h>
00027 #include <sys/wait.h>
00028 #include <fcntl.h>
00029 #include <errno.h>
00030 #include <stdexcept>
00031 #include <boost/utility.hpp>
00032 
00033 namespace Osmium {
00034 
00035     // forward declaration
00036     namespace Output {
00037         class Base;
00038     }
00039 
00047     class OSMFile {
00048 
00049     public:
00050 
00056         class SystemError : public std::runtime_error {
00057 
00058             int m_errno;
00059 
00060         public:
00061 
00062             SystemError(const std::string& whatarg,
00063                         int e)
00064                 : std::runtime_error(whatarg),
00065                   m_errno(e) {
00066             }
00067 
00072             int system_errno() const throw() {
00073                 return m_errno;
00074             }
00075 
00076         };
00077 
00082         class IOError : public std::runtime_error {
00083 
00084             std::string m_filename;
00085             int m_errno;
00086 
00087         public:
00088 
00089             IOError(const std::string& whatarg,
00090                     const std::string& filename,
00091                     int e)
00092                 : std::runtime_error(whatarg),
00093                   m_filename(filename),
00094                   m_errno(e) {
00095             }
00096 
00097             ~IOError() throw() {
00098             }
00099 
00103             const std::string& filename() const throw() {
00104                 return m_filename;
00105             }
00106 
00111             int system_errno() const throw() {
00112                 return m_errno;
00113             }
00114 
00115         };
00116 
00117         class ArgumentError : public std::runtime_error {
00118 
00119             std::string m_value;
00120 
00121         public:
00122 
00123             ArgumentError(const std::string& whatarg,
00124                           const std::string& value="")
00125                 : std::runtime_error(whatarg),
00126                   m_value(value) {
00127             }
00128 
00129             ~ArgumentError() throw() {
00130             }
00131 
00132             const std::string& value() const throw() {
00133                 return m_value;
00134             }
00135 
00136         };
00137 
00142         struct FileTypeError {
00143         };
00144 
00149         struct FileTypeOSMExpected : public FileTypeError {
00150         };
00151 
00156         struct FileTypeHistoryExpected : public FileTypeError {
00157         };
00158 
00165         class FileType : boost::noncopyable {
00166 
00167             std::string m_suffix;
00168             bool m_has_multiple_object_versions;
00169 
00170             FileType(std::string suffix, bool has_multiple_object_versions) : m_suffix(suffix), m_has_multiple_object_versions(has_multiple_object_versions) {
00171             }
00172 
00173         public:
00174 
00175             std::string suffix() const {
00176                 return m_suffix;
00177             }
00178 
00179             bool has_multiple_object_versions() const {
00180                 return m_has_multiple_object_versions;
00181             }
00182 
00186             static FileType* OSM() {
00187                 static FileType instance(".osm", false);
00188                 return &instance;
00189             }
00190 
00194             static FileType* History() {
00195                 static FileType instance(".osh", true);
00196                 return &instance;
00197             }
00198 
00202             static FileType* Change() {
00203                 static FileType instance(".osc", true);
00204                 return &instance;
00205             }
00206 
00207         };
00208 
00216         class FileEncoding : boost::noncopyable {
00217 
00218             std::string m_suffix;
00219             std::string m_compress;
00220             std::string m_decompress;
00221             bool m_pbf;
00222 
00223             FileEncoding(std::string suffix, std::string compress, std::string decompress, bool pbf) : m_suffix(suffix), m_compress(compress), m_decompress(decompress), m_pbf(pbf) {
00224             }
00225 
00226         public:
00227 
00228             std::string suffix() const {
00229                 return m_suffix;
00230             }
00231 
00232             std::string compress() const {
00233                 return m_compress;
00234             }
00235 
00236             std::string decompress() const {
00237                 return m_decompress;
00238             }
00239 
00240             bool is_pbf() const {
00241                 return m_pbf;
00242             }
00243 
00247             static FileEncoding* PBF() {
00248                 static FileEncoding instance(".pbf", "", "", true);
00249                 return &instance;
00250             }
00251 
00255             static FileEncoding* XML() {
00256                 static FileEncoding instance("", "", "", false);
00257                 return &instance;
00258             }
00259 
00263             static FileEncoding* XMLgz() {
00264                 static FileEncoding instance(".gz", "gzip", "gzcat", false);
00265                 return &instance;
00266             }
00267 
00271             static FileEncoding* XMLbz2() {
00272                 static FileEncoding instance(".bz2", "bzip2", "bzcat", false);
00273                 return &instance;
00274             }
00275 
00276         };
00277 
00278     private:
00279 
00281         FileType* m_type;
00282 
00284         FileEncoding* m_encoding;
00285 
00287         std::string m_filename;
00288 
00290         int m_fd;
00291 
00297         pid_t m_childpid;
00298 
00310         int execute(std::string command, int input) {
00311             int pipefd[2];
00312             if (pipe(pipefd) < 0) {
00313                 throw SystemError("Can't create pipe", errno);
00314             }
00315             pid_t pid = fork();
00316             if (pid < 0) {
00317                 throw SystemError("Can't fork", errno);
00318             }
00319             if (pid == 0) { // child
00320                 // close all file descriptors except one end of the pipe
00321                 for (int i=0; i < 32; ++i) {
00322                     if (i != pipefd[1-input]) {
00323                         ::close(i);
00324                     }
00325                 }
00326                 if (dup2(pipefd[1-input], 1-input) < 0) { // put end of pipe as stdout/stdin
00327                     exit(1);
00328                 }
00329 
00330                 if (input == 0) {
00331                     open("/dev/null", O_RDONLY); // stdin
00332                     open("/dev/null", O_WRONLY); // stderr
00333                     if (execlp(command.c_str(), command.c_str(), m_filename.c_str(), NULL) < 0) {
00334                         exit(1);
00335                     }
00336                 } else {
00337                     if (open(m_filename.c_str(), O_WRONLY | O_TRUNC | O_CREAT, 0666) != 1) {
00338                         exit(1);
00339                     }
00340                     open("/dev/null", O_WRONLY); // stderr
00341                     if (execlp(command.c_str(), command.c_str(), 0, NULL) < 0) {
00342                         exit(1);
00343                     }
00344                 }
00345             }
00346             // parent
00347             m_childpid = pid;
00348             ::close(pipefd[1-input]);
00349             return pipefd[input];
00350         }
00351 
00358         int open_input_file() const {
00359             if (m_filename == "") {
00360                 return 0; // stdin
00361             } else {
00362                 int fd = open(m_filename.c_str(), O_RDONLY);
00363                 if (fd < 0) {
00364                     throw IOError("Open failed", m_filename, errno);
00365                 }
00366                 return fd;
00367             }
00368         }
00369 
00377         int open_output_file() const {
00378             if (m_filename == "") {
00379                 return 1; // stdout
00380             } else {
00381                 int fd = open(m_filename.c_str(), O_WRONLY | O_TRUNC | O_CREAT, 0666);
00382                 if (fd < 0) {
00383                     throw IOError("Open failed", m_filename, errno);
00384                 }
00385                 return fd;
00386             }
00387         }
00388 
00398         int open_input_file_or_url() {
00399             std::string protocol = m_filename.substr(0, m_filename.find_first_of(':'));
00400             if (protocol == "http" || protocol == "https") {
00401                 return execute("curl", 0);
00402             } else {
00403                 return open_input_file();
00404             }
00405         }
00406 
00407     public:
00408 
00417         OSMFile(const std::string& filename = "")
00418             : m_type(FileType::OSM()),
00419               m_encoding(FileEncoding::PBF()),
00420               m_filename(filename),
00421               m_fd(-1),
00422               m_childpid(0) {
00423 
00424             // stdin/stdout
00425             if (filename == "" || filename == "-") {
00426                 m_filename = "";
00427                 default_settings_for_stdinout();
00428                 return;
00429             }
00430 
00431             // filename is actually a URL
00432             std::string protocol = m_filename.substr(0, m_filename.find_first_of(':'));
00433             if (protocol == "http" || protocol == "https") {
00434                 default_settings_for_url();
00435                 return;
00436             }
00437 
00438             // isolate filename suffix
00439             size_t n = filename.find_last_of('/');
00440             if (n == std::string::npos) {
00441                 n = 0;
00442             } else {
00443                 ++n;
00444             }
00445             std::string suffix(filename.substr(filename.find_first_of('.', n)+1));
00446 
00447             set_type_and_encoding(suffix);
00448         }
00449 
00450         void set_type_and_encoding(const std::string& suffix) {
00451             if (suffix == "pbf" || suffix == "osm.pbf") {
00452                 m_type     = FileType::OSM();
00453                 m_encoding = FileEncoding::PBF();
00454             } else if (suffix == "osm") {
00455                 m_type     = FileType::OSM();
00456                 m_encoding = FileEncoding::XML();
00457             } else if (suffix == "osm.bz2") {
00458                 m_type     = FileType::OSM();
00459                 m_encoding = FileEncoding::XMLbz2();
00460             } else if (suffix == "osm.gz") {
00461                 m_type     = FileType::OSM();
00462                 m_encoding = FileEncoding::XMLgz();
00463             } else if (suffix == "osh.pbf") {
00464                 m_type     = FileType::History();
00465                 m_encoding = FileEncoding::PBF();
00466             } else if (suffix == "osh") {
00467                 m_type     = FileType::History();
00468                 m_encoding = FileEncoding::XML();
00469             } else if (suffix == "osh.bz2") {
00470                 m_type     = FileType::History();
00471                 m_encoding = FileEncoding::XMLbz2();
00472             } else if (suffix == "osh.gz") {
00473                 m_type     = FileType::History();
00474                 m_encoding = FileEncoding::XMLgz();
00475             } else if (suffix == "osc") {
00476                 m_type     = FileType::Change();
00477                 m_encoding = FileEncoding::XML();
00478             } else if (suffix == "osc.bz2") {
00479                 m_type     = FileType::Change();
00480                 m_encoding = FileEncoding::XMLbz2();
00481             } else if (suffix == "osc.gz") {
00482                 m_type     = FileType::Change();
00483                 m_encoding = FileEncoding::XMLgz();
00484             } else {
00485                 default_settings_for_file();
00486             }
00487         }
00488 
00494         OSMFile(const OSMFile& orig) {
00495             m_fd       = -1;
00496             m_childpid = 0;
00497             m_type     = orig.get_type();
00498             m_encoding = orig.get_encoding();
00499             m_filename = orig.get_filename();
00500         }
00501 
00507         OSMFile& operator=(const OSMFile& orig) {
00508             m_fd       = -1;
00509             m_childpid = 0;
00510             m_type     = orig.get_type();
00511             m_encoding = orig.get_encoding();
00512             m_filename = orig.get_filename();
00513             return *this;
00514         }
00515 
00516         ~OSMFile() {
00517             try {
00518                 close();
00519             } catch (...) {
00520                 // ignore exceptions
00521             }
00522         }
00523 
00524         void close() {
00525             if (m_fd > 0) {
00526                 ::close(m_fd);
00527                 m_fd = -1;
00528             }
00529 
00530             if (m_childpid) {
00531                 int status;
00532                 pid_t pid = waitpid(m_childpid, &status, 0);
00533                 if (pid < 0 || !WIFEXITED(status) || WEXITSTATUS(status) != 0) {
00534                     throw IOError("Subprocess returned error", "", errno);
00535                 }
00536                 m_childpid = 0;
00537             }
00538         }
00539 
00545         void default_settings_for_stdinout() {
00546             m_type     = FileType::OSM();
00547             m_encoding = FileEncoding::PBF();
00548         }
00549 
00555         void default_settings_for_file() {
00556             m_type     = FileType::OSM();
00557             m_encoding = FileEncoding::PBF();
00558         }
00559 
00565         void default_settings_for_url() {
00566             m_type     = FileType::OSM();
00567             m_encoding = FileEncoding::XML();
00568         }
00569 
00570         int get_fd() const {
00571             return m_fd;
00572         }
00573 
00574         FileType* get_type() const {
00575             return m_type;
00576         }
00577 
00578         OSMFile& set_type(FileType* type) {
00579             m_type = type;
00580             return *this;
00581         }
00582 
00583         OSMFile& set_type(std::string& type) {
00584             if (type == "osm") {
00585                 m_type = FileType::OSM();
00586             } else if (type == "history" || type == "osh") {
00587                 m_type = FileType::History();
00588             } else if (type == "change" || type == "osc") {
00589                 m_type = FileType::Change();
00590             } else {
00591                 throw ArgumentError("Unknown OSM file type", type);
00592             }
00593             return *this;
00594         }
00595 
00596         bool has_multiple_object_versions() const {
00597             return m_type->has_multiple_object_versions();
00598         }
00599 
00600         FileEncoding* get_encoding() const {
00601             return m_encoding;
00602         }
00603 
00604         OSMFile& set_encoding(FileEncoding* encoding) {
00605             m_encoding = encoding;
00606             return *this;
00607         }
00608 
00609         OSMFile& set_encoding(std::string& encoding) {
00610             if (encoding == "pbf") {
00611                 m_encoding = FileEncoding::PBF();
00612             } else if (encoding == "xml") {
00613                 m_encoding = FileEncoding::XML();
00614             } else if (encoding == "xmlgz" || encoding == "gz") {
00615                 m_encoding = FileEncoding::XMLgz();
00616             } else if (encoding == "xmlbz2" || encoding == "bz2") {
00617                 m_encoding = FileEncoding::XMLbz2();
00618             } else {
00619                 throw ArgumentError("Unknown OSM file encoding", encoding);
00620             }
00621             return *this;
00622         }
00623 
00624         OSMFile& set_filename(std::string& filename) {
00625             if (filename == "-") {
00626                 m_filename = "";
00627             } else {
00628                 m_filename = filename;
00629             }
00630             return *this;
00631         }
00632 
00633         std::string get_filename() const {
00634             return m_filename;
00635         }
00636 
00637         std::string get_filename_without_suffix() const {
00638             return m_filename.substr(m_filename.find_first_of('.')+1);
00639         }
00640 
00641         std::string get_filename_with_default_suffix() const {
00642             std::string filename = get_filename_without_suffix();
00643             filename += m_type->suffix() + m_encoding->suffix();
00644             return filename;
00645         }
00646 
00647         void open_for_input() {
00648             m_fd = m_encoding->decompress() == "" ? open_input_file_or_url() : execute(m_encoding->decompress(), 0);
00649         }
00650 
00651         void open_for_output() {
00652             m_fd = m_encoding->compress() == "" ? open_output_file() : execute(m_encoding->compress(), 1);
00653         }
00654 
00658         template <class T> void read(T& handler);
00659 
00663         Osmium::Output::Base* create_output_file();
00664 
00665     };
00666 
00667 } // namespace Osmium
00668 
00669 #endif // OSMIUM_OSMFILE_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines