Osmium  0.1
include/osmium/utils/unicode.hpp
Go to the documentation of this file.
00001 #ifndef OSMIUM_UTILS_UNICODE_HPP
00002 #define OSMIUM_UTILS_UNICODE_HPP
00003 
00004 /*
00005 
00006 Copyright 2011 Jochen Topf <jochen@topf.org> and others (see README).
00007 
00008 This file is part of Osmium (https://github.com/joto/osmium).
00009 
00010 Osmium is free software: you can redistribute it and/or modify it under the
00011 terms of the GNU Lesser General Public License or (at your option) the GNU
00012 General Public License as published by the Free Software Foundation, either
00013 version 3 of the Licenses, or (at your option) any later version.
00014 
00015 Osmium is distributed in the hope that it will be useful, but WITHOUT ANY
00016 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
00017 PARTICULAR PURPOSE. See the GNU Lesser General Public License and the GNU
00018 General Public License for more details.
00019 
00020 You should have received a copy of the Licenses along with Osmium. If not, see
00021 <http://www.gnu.org/licenses/>.
00022 
00023 */
00024 
00025 #include <cstdlib>
00026 #include <fstream>
00027 
00028 #include <v8.h>
00029 #include <unicode/ustring.h>
00030 
00031 namespace Osmium {
00032 
00033     /* These unicode conversion functions are used to convert UTF-8 to UTF-16 and then into
00034     a v8::String and back, because the functions that v8 has for this do not handle
00035     characters outside the Basic Multilingual Plane (>16bit) properly. */
00036 
00038     class Unicode_Conversion_Error {
00039 
00040     public:
00041 
00042         UErrorCode error_code;
00043         Unicode_Conversion_Error(UErrorCode ec) : error_code(ec) { }
00044 
00046         bool buffer_overflow() const {
00047             return error_code == U_BUFFER_OVERFLOW_ERROR;
00048         }
00049 
00050     };
00051 
00053     class UTF8_to_UTF16_Conversion_Error : public Unicode_Conversion_Error {
00054 
00055     public:
00056 
00057         UTF8_to_UTF16_Conversion_Error(UErrorCode ec) : Unicode_Conversion_Error(ec) { }
00058 
00059     };
00060 
00062     class UTF16_to_UTF8_Conversion_Error : public Unicode_Conversion_Error {
00063 
00064     public:
00065 
00066         UTF16_to_UTF8_Conversion_Error(UErrorCode ec) : Unicode_Conversion_Error(ec) { }
00067 
00068     };
00069 
00078     template<int characters> v8::Local<v8::String> utf8_to_v8_String(const char *cstring) {
00079         UErrorCode error_code = U_ZERO_ERROR;
00080         UChar dest[characters*2];
00081         int32_t dest_length;
00082         u_strFromUTF8(dest, characters*2, &dest_length, cstring, -1, &error_code);
00083         if (error_code != U_ZERO_ERROR) {
00084             throw UTF8_to_UTF16_Conversion_Error(error_code);
00085         }
00086         return v8::String::New(dest, dest_length);
00087     }
00088 
00097     template<int characters> const char *v8_String_to_utf8(v8::Local<v8::String> string) {
00098         UErrorCode error_code = U_ZERO_ERROR;
00099         uint16_t src[characters*2];
00100         static char buffer[characters*4];
00101         int32_t buffer_length;
00102         string->Write(src, 0, characters*2);
00103         u_strToUTF8(buffer, characters*4, &buffer_length, src, std::min(characters*2, string->Length()), &error_code);
00104         if (error_code != U_ZERO_ERROR) {
00105             throw UTF16_to_UTF8_Conversion_Error(error_code);
00106         }
00107         return buffer;
00108     }
00109 
00110     // this function does not work without the inline. strange.
00118     inline void v8_String_to_ostream(v8::Local<v8::String> string, std::ostream &os) {
00119         UErrorCode error_code = U_ZERO_ERROR;
00120         int length = 4 * (string->Length() + 1);
00121         uint16_t *src = (uint16_t *) malloc(length);
00122         if (!src) {
00123             throw std::bad_alloc();
00124         }
00125         char *buffer = (char *) malloc(length);
00126         if (!buffer) {
00127             throw std::bad_alloc();
00128         }
00129         int32_t buffer_length;
00130         string->Write(src);
00131         u_strToUTF8(buffer, length, &buffer_length, src, string->Length(), &error_code);
00132         if (error_code != U_ZERO_ERROR) {
00133             throw UTF16_to_UTF8_Conversion_Error(error_code);
00134         }
00135         os << buffer;
00136         free(buffer);
00137         free(src);
00138     }
00139 
00140 } // namespace Osmium
00141 
00142 #endif // OSMIUM_UTILS_UNICODE_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines