Eclipse SUMO - Simulation of Urban MObility
StringUtils.cpp
Go to the documentation of this file.
1 /****************************************************************************/
2 // Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.org/sumo
3 // Copyright (C) 2001-2019 German Aerospace Center (DLR) and others.
4 // This program and the accompanying materials
5 // are made available under the terms of the Eclipse Public License v2.0
6 // which accompanies this distribution, and is available at
7 // http://www.eclipse.org/legal/epl-v20.html
8 // SPDX-License-Identifier: EPL-2.0
9 /****************************************************************************/
17 // Some static methods for string processing
18 /****************************************************************************/
19 
20 
21 // ===========================================================================
22 // included modules
23 // ===========================================================================
24 #include <config.h>
25 
26 #include <string>
27 #include <iostream>
28 #include <cstdio>
29 #include <cstring>
30 #include <regex>
31 #include <xercesc/util/TransService.hpp>
32 #include <xercesc/util/TranscodingException.hpp>
34 #include <utils/common/ToString.h>
35 #include "StringUtils.h"
36 
37 
38 // ===========================================================================
39 // static member definitions
40 // ===========================================================================
41 std::string StringUtils::emptyString;
42 
43 
44 // ===========================================================================
45 // method definitions
46 // ===========================================================================
47 std::string
48 StringUtils::prune(const std::string& str) {
49  const std::string::size_type endpos = str.find_last_not_of(" \t\n\r");
50  if (std::string::npos != endpos) {
51  const int startpos = (int)str.find_first_not_of(" \t\n\r");
52  return str.substr(startpos, endpos - startpos + 1);
53  }
54  return "";
55 }
56 
57 
58 std::string
59 StringUtils::to_lower_case(std::string str) {
60  for (int i = 0; i < (int)str.length(); i++) {
61  if (str[i] >= 'A' && str[i] <= 'Z') {
62  str[i] = str[i] + 'a' - 'A';
63  }
64  }
65  return str;
66 }
67 
68 
69 std::string
70 StringUtils::latin1_to_utf8(std::string str) {
71  // inspired by http://stackoverflow.com/questions/4059775/convert-iso-8859-1-strings-to-utf-8-in-c-c
72  std::string result;
73  for (int i = 0; i < (int)str.length(); i++) {
74  const unsigned char c = str[i];
75  if (c < 128) {
76  result += c;
77  } else {
78  result += (char)(0xc2 + (c > 0xbf));
79  result += (char)((c & 0x3f) + 0x80);
80  }
81  }
82  return result;
83 }
84 
85 
86 std::string
87 StringUtils::convertUmlaute(std::string str) {
88  str = replace(str, "\xE4", "ae");
89  str = replace(str, "\xC4", "Ae");
90  str = replace(str, "\xF6", "oe");
91  str = replace(str, "\xD6", "Oe");
92  str = replace(str, "\xFC", "ue");
93  str = replace(str, "\xDC", "Ue");
94  str = replace(str, "\xDF", "ss");
95  str = replace(str, "\xC9", "E");
96  str = replace(str, "\xE9", "e");
97  str = replace(str, "\xC8", "E");
98  str = replace(str, "\xE8", "e");
99  return str;
100 }
101 
102 
103 
104 std::string
105 StringUtils::replace(std::string str, const char* what,
106  const char* by) {
107  const std::string what_tmp(what);
108  const std::string by_tmp(by);
109  std::string::size_type idx = str.find(what);
110  const int what_len = (int)what_tmp.length();
111  if (what_len > 0) {
112  const int by_len = (int)by_tmp.length();
113  while (idx != std::string::npos) {
114  str = str.replace(idx, what_len, by);
115  idx = str.find(what, idx + by_len);
116  }
117  }
118  return str;
119 }
120 
121 
122 std::string StringUtils::substituteEnvironment(std::string str) {
123  // Expression for an environment variables, e.g. ${NAME}
124  // Note: - R"(...)" is a raw string literal syntax to simplify a regex declaration
125  // - .+? looks for the shortest match (non-greedy)
126  // - (.+?) defines a "subgroup" which is already stripped of the $ and {, }
127  std::regex envVarExpr(R"(\$\{(.+?)\})");
128 
129  // Are there any variables in this string?
130  std::smatch match;
131  std::string strIter = str;
132 
133  // Loop over the entire value string and look for variable names
134  while (std::regex_search(strIter, match, envVarExpr)) {
135  std::string varName = match[1];
136 
137  // Find the variable in the environment and its value
138  std::string varValue;
139  if (std::getenv(varName.c_str()) != nullptr) {
140  varValue = std::getenv(varName.c_str());
141  }
142 
143  // Replace the variable placeholder with its value in the original string
144  str = std::regex_replace(str, std::regex("\\$\\{" + varName + "\\}"), varValue);
145 
146  // Continue the loop with the remainder of the string
147  strIter = match.suffix();
148  }
149 
150  return str;
151 }
152 
153 std::string
155  std::ostringstream oss;
156  if (time < 0) {
157  oss << "-";
158  time = -time;
159  }
160  char buffer[10];
161  sprintf(buffer, "%02i:", (time / 3600));
162  oss << buffer;
163  time = time % 3600;
164  sprintf(buffer, "%02i:", (time / 60));
165  oss << buffer;
166  time = time % 60;
167  sprintf(buffer, "%02i", time);
168  oss << buffer;
169  return oss.str();
170 }
171 
172 
173 bool
174 StringUtils::startsWith(const std::string& str, const std::string prefix) {
175  return str.compare(0, prefix.length(), prefix) == 0;
176 }
177 
178 
179 bool
180 StringUtils::endsWith(const std::string& str, const std::string suffix) {
181  if (str.length() >= suffix.length()) {
182  return str.compare(str.length() - suffix.length(), suffix.length(), suffix) == 0;
183  } else {
184  return false;
185  }
186 }
187 
188 
189 std::string
190 StringUtils::escapeXML(const std::string& orig, const bool maskDoubleHyphen) {
191  std::string result = replace(orig, "&", "&amp;");
192  result = replace(result, ">", "&gt;");
193  result = replace(result, "<", "&lt;");
194  result = replace(result, "\"", "&quot;");
195  if (maskDoubleHyphen) {
196  result = replace(result, "--", "&#45;&#45;");
197  }
198  for (char invalid = '\1'; invalid < ' '; invalid++) {
199  result = replace(result, std::string(1, invalid).c_str(), "");
200  }
201  return replace(result, "'", "&apos;");
202 }
203 
204 
205 std::string
206 StringUtils::urlEncode(const std::string& toEncode, const std::string encodeWhich) {
207  std::ostringstream out;
208 
209  for (int i = 0; i < (int)toEncode.length(); ++i) {
210  const char t = toEncode.at(i);
211 
212  if ((encodeWhich != "" && encodeWhich.find(t) == std::string::npos) ||
213  (encodeWhich == "" &&
214  ((t >= 45 && t <= 57) || // hyphen, period, slash, 0-9
215  (t >= 65 && t <= 90) || // A-Z
216  t == 95 || // underscore
217  (t >= 97 && t <= 122) || // a-z
218  t == 126)) // tilde
219  ) {
220  out << toEncode.at(i);
221  } else {
222  out << charToHex(toEncode.at(i));
223  }
224  }
225 
226  return out.str();
227 }
228 
229 
230 std::string
231 StringUtils::urlDecode(const std::string& toDecode) {
232  std::ostringstream out;
233 
234  for (int i = 0; i < (int)toDecode.length(); ++i) {
235  if (toDecode.at(i) == '%') {
236  std::string str(toDecode.substr(i + 1, 2));
237  out << hexToChar(str);
238  i += 2;
239  } else {
240  out << toDecode.at(i);
241  }
242  }
243 
244  return out.str();
245 }
246 
247 std::string
248 StringUtils::charToHex(unsigned char c) {
249  short i = c;
250 
251  std::stringstream s;
252 
253  s << "%" << std::setw(2) << std::setfill('0') << std::hex << i;
254 
255  return s.str();
256 }
257 
258 
259 unsigned char
260 StringUtils::hexToChar(const std::string& str) {
261  short c = 0;
262 
263  if (!str.empty()) {
264  std::istringstream in(str);
265 
266  in >> std::hex >> c;
267 
268  if (in.fail()) {
269  throw std::runtime_error("stream decode failure");
270  }
271  }
272 
273  return static_cast<unsigned char>(c);
274 }
275 
276 
277 int
278 StringUtils::toInt(const std::string& sData) {
279  long long int result = toLong(sData);
280  if (result > std::numeric_limits<int>::max() || result < std::numeric_limits<int>::min()) {
281  throw NumberFormatException(toString(result) + " int overflow");
282  }
283  return (int)result;
284 }
285 
286 
287 int
288 StringUtils::toIntSecure(const std::string& sData, int def) {
289  if (sData.length() == 0) {
290  return def;
291  }
292  return toInt(sData);
293 }
294 
295 
296 long long int
297 StringUtils::toLong(const std::string& sData) {
298  const char* const data = sData.c_str();
299  if (data == 0 || data[0] == 0) {
300  throw EmptyData();
301  }
302  char* end;
303  errno = 0;
304 #ifdef _MSC_VER
305  long long int ret = _strtoi64(data, &end, 10);
306 #else
307  long long int ret = strtoll(data, &end, 10);
308 #endif
309  if (errno == ERANGE) {
310  errno = 0;
311  throw NumberFormatException("(long long integer range) " + sData);
312  }
313  if ((int)(end - data) != (int)strlen(data)) {
314  throw NumberFormatException("(long long integer format) " + sData);
315  }
316  return ret;
317 }
318 
319 
320 int
321 StringUtils::hexToInt(const std::string& sData) {
322  if (sData.length() == 0) {
323  throw EmptyData();
324  }
325  size_t idx = 0;
326  int result;
327  try {
328  if (sData[0] == '#') { // for html color codes
329  result = std::stoi(sData.substr(1), &idx, 16);
330  idx++;
331  } else {
332  result = std::stoi(sData, &idx, 16);
333  }
334  } catch (...) {
335  throw NumberFormatException("(hex integer format) " + sData);
336  }
337  if (idx != sData.length()) {
338  throw NumberFormatException("(hex integer format) " + sData);
339  }
340  return result;
341 }
342 
343 
344 double
345 StringUtils::toDouble(const std::string& sData) {
346  if (sData.size() == 0) {
347  throw EmptyData();
348  }
349  try {
350  size_t idx = 0;
351  const double result = std::stod(sData, &idx);
352  if (idx != sData.size()) {
353  throw NumberFormatException("(double format) " + sData);
354  } else {
355  return result;
356  }
357  } catch (...) {
358  // invalid_argument or out_of_range
359  throw NumberFormatException("(double) " + sData);
360  }
361 }
362 
363 
364 double
365 StringUtils::toDoubleSecure(const std::string& sData, const double def) {
366  if (sData.length() == 0) {
367  return def;
368  }
369  return toDouble(sData);
370 }
371 
372 
373 bool
374 StringUtils::toBool(const std::string& sData) {
375  if (sData.length() == 0) {
376  throw EmptyData();
377  }
378  std::string s = sData;
379  // Don't use std::transform(..., ::tolower) due a C4244 Warning in MSVC17
380  for (int i = 0; i < (int)s.length(); i++) {
381  s[i] = (char)::tolower((char)s[i]);
382  }
383  if (s == "1" || s == "yes" || s == "true" || s == "on" || s == "x" || s == "t") {
384  return true;
385  } else if (s == "0" || s == "no" || s == "false" || s == "off" || s == "-" || s == "f") {
386  return false;
387  } else {
388  throw BoolFormatException(s);
389  }
390 }
391 
392 
393 std::string
394 StringUtils::transcode(const XMLCh* const data, int length) {
395  if (data == 0) {
396  throw EmptyData();
397  }
398  if (length == 0) {
399  return "";
400  }
401 #if _XERCES_VERSION < 30100
403  std::string result(t);
404  XERCES_CPP_NAMESPACE::XMLString::release(&t);
405  return result;
406 #else
407  try {
408  XERCES_CPP_NAMESPACE::TranscodeToStr utf8(data, "UTF-8");
409  return reinterpret_cast<const char*>(utf8.str());
410  } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {
411  return "?";
412  }
413 #endif
414 }
415 
416 
417 /****************************************************************************/
StringUtils::replace
static std::string replace(std::string str, const char *what, const char *by)
Definition: StringUtils.cpp:105
StringUtils::charToHex
static std::string charToHex(unsigned char c)
Definition: StringUtils.cpp:248
ToString.h
StringUtils::urlDecode
static std::string urlDecode(const std::string &encoded)
Definition: StringUtils.cpp:231
StringUtils::toBool
static bool toBool(const std::string &sData)
converts a string into the bool value described by it by calling the char-type converter
Definition: StringUtils.cpp:374
StringUtils::toDouble
static double toDouble(const std::string &sData)
converts a string into the double value described by it by calling the char-type converter
Definition: StringUtils.cpp:345
StringUtils::toDoubleSecure
static double toDoubleSecure(const std::string &sData, const double def)
converts a string into the integer value described by it
Definition: StringUtils.cpp:365
StringUtils::to_lower_case
static std::string to_lower_case(std::string str)
Transfers the content to lower case.
Definition: StringUtils.cpp:59
EmptyData
Definition: UtilExceptions.h:68
StringUtils::convertUmlaute
static std::string convertUmlaute(std::string str)
Converts german "Umlaute" to their latin-version.
Definition: StringUtils.cpp:87
NumberFormatException
Definition: UtilExceptions.h:95
StringUtils::prune
static std::string prune(const std::string &str)
Removes trailing and leading whitechars.
Definition: StringUtils.cpp:48
StringUtils::endsWith
static bool endsWith(const std::string &str, const std::string suffix)
Checks whether a given string ends with the suffix.
Definition: StringUtils.cpp:180
StringUtils::hexToChar
static unsigned char hexToChar(const std::string &str)
Definition: StringUtils.cpp:260
StringUtils::toTimeString
static std::string toTimeString(int time)
Builds a time string (hh:mm:ss) from the given seconds.
Definition: StringUtils.cpp:154
StringUtils::escapeXML
static std::string escapeXML(const std::string &orig, const bool maskDoubleHyphen=false)
Replaces the standard escapes by their XML entities.
Definition: StringUtils.cpp:190
UtilExceptions.h
StringUtils::startsWith
static bool startsWith(const std::string &str, const std::string prefix)
Checks whether a given string starts with the prefix.
Definition: StringUtils.cpp:174
StringUtils::hexToInt
static int hexToInt(const std::string &sData)
converts a string with a hex value into the integer value described by it by calling the char-type co...
Definition: StringUtils.cpp:321
BoolFormatException
Definition: UtilExceptions.h:121
StringUtils::toIntSecure
static int toIntSecure(const std::string &sData, int def)
converts a string into the integer value described by it
Definition: StringUtils.cpp:288
StringUtils::latin1_to_utf8
static std::string latin1_to_utf8(std::string str)
Transfers from Latin 1 (ISO-8859-1) to UTF-8.
Definition: StringUtils.cpp:70
toString
std::string toString(const T &t, std::streamsize accuracy=gPrecision)
Definition: ToString.h:47
StringUtils::substituteEnvironment
static std::string substituteEnvironment(std::string str)
Definition: StringUtils.cpp:122
StringUtils.h
StringUtils::urlEncode
static std::string urlEncode(const std::string &url, const std::string encodeWhich="")
Definition: StringUtils.cpp:206
StringUtils::transcode
static std::string transcode(const XMLCh *const data)
converts a 0-terminated XMLCh* array (usually UTF-16, stemming from Xerces) into std::string in UTF-8
Definition: StringUtils.h:136
StringUtils::toInt
static int toInt(const std::string &sData)
converts a string into the integer value described by it by calling the char-type converter,...
Definition: StringUtils.cpp:278
transcode
std::string transcode(const XMLCh *const qname)
Definition: VehicleEngineHandler.cpp:37
StringUtils::emptyString
static std::string emptyString
An empty string.
Definition: StringUtils.h:83
config.h
StringUtils::toLong
static long long int toLong(const std::string &sData)
converts a string into the long value described by it by calling the char-type converter,...
Definition: StringUtils.cpp:297