Static Public Member Functions | List of all members
utf_decoder< Traits, opt_swap > Struct Template Reference

Static Public Member Functions

static Traits::value_type decode_utf8_block (const uint8_t *data, size_t size, typename Traits::value_type result)
 
static Traits::value_type decode_utf16_block (const uint16_t *data, size_t size, typename Traits::value_type result)
 
static Traits::value_type decode_utf32_block (const uint32_t *data, size_t size, typename Traits::value_type result)
 
static Traits::value_type decode_latin1_block (const uint8_t *data, size_t size, typename Traits::value_type result)
 
static Traits::value_type decode_wchar_block_impl (const uint16_t *data, size_t size, typename Traits::value_type result)
 
static Traits::value_type decode_wchar_block_impl (const uint32_t *data, size_t size, typename Traits::value_type result)
 
static Traits::value_type decode_wchar_block (const wchar_t *data, size_t size, typename Traits::value_type result)
 

Detailed Description

template<typename Traits, typename opt_swap = opt_false>
struct utf_decoder< Traits, opt_swap >

Definition at line 952 of file pugixml.cpp.

Member Function Documentation

◆ decode_latin1_block()

template<typename Traits , typename opt_swap = opt_false>
static Traits::value_type utf_decoder< Traits, opt_swap >::decode_latin1_block ( const uint8_t data,
size_t  size,
typename Traits::value_type  result 
)
inlinestatic

Definition at line 1085 of file pugixml.cpp.

1086  {
1087  for (size_t i = 0; i < size; ++i)
1088  {
1089  result = Traits::low(result, data[i]);
1090  }
1091 
1092  return result;
1093  }

Referenced by convert_buffer_latin1().

◆ decode_utf16_block()

template<typename Traits , typename opt_swap = opt_false>
static Traits::value_type utf_decoder< Traits, opt_swap >::decode_utf16_block ( const uint16_t data,
size_t  size,
typename Traits::value_type  result 
)
inlinestatic

Definition at line 1016 of file pugixml.cpp.

1017  {
1018  const uint16_t* end = data + size;
1019 
1020  while (data < end)
1021  {
1022  unsigned int lead = opt_swap::value ? endian_swap(*data) : *data;
1023 
1024  // U+0000..U+D7FF
1025  if (lead < 0xD800)
1026  {
1027  result = Traits::low(result, lead);
1028  data += 1;
1029  }
1030  // U+E000..U+FFFF
1031  else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
1032  {
1033  result = Traits::low(result, lead);
1034  data += 1;
1035  }
1036  // surrogate pair lead
1037  else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && data + 1 < end)
1038  {
1039  uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
1040 
1041  if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
1042  {
1043  result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
1044  data += 2;
1045  }
1046  else
1047  {
1048  data += 1;
1049  }
1050  }
1051  else
1052  {
1053  data += 1;
1054  }
1055  }
1056 
1057  return result;
1058  }

References endian_swap().

Referenced by convert_buffer_utf16(), and utf_decoder< Traits, opt_swap >::decode_wchar_block_impl().

◆ decode_utf32_block()

template<typename Traits , typename opt_swap = opt_false>
static Traits::value_type utf_decoder< Traits, opt_swap >::decode_utf32_block ( const uint32_t data,
size_t  size,
typename Traits::value_type  result 
)
inlinestatic

Definition at line 1060 of file pugixml.cpp.

1061  {
1062  const uint32_t* end = data + size;
1063 
1064  while (data < end)
1065  {
1066  uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
1067 
1068  // U+0000..U+FFFF
1069  if (lead < 0x10000)
1070  {
1071  result = Traits::low(result, lead);
1072  data += 1;
1073  }
1074  // U+10000..U+10FFFF
1075  else
1076  {
1077  result = Traits::high(result, lead);
1078  data += 1;
1079  }
1080  }
1081 
1082  return result;
1083  }

References endian_swap().

Referenced by convert_buffer_utf32(), and utf_decoder< Traits, opt_swap >::decode_wchar_block_impl().

◆ decode_utf8_block()

template<typename Traits , typename opt_swap = opt_false>
static Traits::value_type utf_decoder< Traits, opt_swap >::decode_utf8_block ( const uint8_t data,
size_t  size,
typename Traits::value_type  result 
)
inlinestatic

Definition at line 954 of file pugixml.cpp.

955  {
956  const uint8_t utf8_byte_mask = 0x3f;
957 
958  while (size)
959  {
960  uint8_t lead = *data;
961 
962  // 0xxxxxxx -> U+0000..U+007F
963  if (lead < 0x80)
964  {
965  result = Traits::low(result, lead);
966  data += 1;
967  size -= 1;
968 
969  // process aligned single-byte (ascii) blocks
970  if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
971  {
972  // round-trip through void* to silence 'cast increases required alignment of target type' warnings
973  while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
974  {
975  result = Traits::low(result, data[0]);
976  result = Traits::low(result, data[1]);
977  result = Traits::low(result, data[2]);
978  result = Traits::low(result, data[3]);
979  data += 4;
980  size -= 4;
981  }
982  }
983  }
984  // 110xxxxx -> U+0080..U+07FF
985  else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
986  {
987  result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
988  data += 2;
989  size -= 2;
990  }
991  // 1110xxxx -> U+0800-U+FFFF
992  else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
993  {
994  result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
995  data += 3;
996  size -= 3;
997  }
998  // 11110xxx -> U+10000..U+10FFFF
999  else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
1000  {
1001  result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
1002  data += 4;
1003  size -= 4;
1004  }
1005  // 10xxxxxx or 11111xxx -> invalid
1006  else
1007  {
1008  data += 1;
1009  size -= 1;
1010  }
1011  }
1012 
1013  return result;
1014  }

Referenced by as_wide_impl(), and convert_buffer_output().

◆ decode_wchar_block()

template<typename Traits , typename opt_swap = opt_false>
static Traits::value_type utf_decoder< Traits, opt_swap >::decode_wchar_block ( const wchar_t *  data,
size_t  size,
typename Traits::value_type  result 
)
inlinestatic

Definition at line 1105 of file pugixml.cpp.

1106  {
1107  return decode_wchar_block_impl(reinterpret_cast<const wchar_selector<sizeof(wchar_t)>::type*>(data), size, result);
1108  }

References utf_decoder< Traits, opt_swap >::decode_wchar_block_impl().

Referenced by as_utf8_begin(), and as_utf8_end().

◆ decode_wchar_block_impl() [1/2]

template<typename Traits , typename opt_swap = opt_false>
static Traits::value_type utf_decoder< Traits, opt_swap >::decode_wchar_block_impl ( const uint16_t data,
size_t  size,
typename Traits::value_type  result 
)
inlinestatic

Definition at line 1095 of file pugixml.cpp.

1096  {
1097  return decode_utf16_block(data, size, result);
1098  }

References utf_decoder< Traits, opt_swap >::decode_utf16_block().

Referenced by utf_decoder< Traits, opt_swap >::decode_wchar_block().

◆ decode_wchar_block_impl() [2/2]

template<typename Traits , typename opt_swap = opt_false>
static Traits::value_type utf_decoder< Traits, opt_swap >::decode_wchar_block_impl ( const uint32_t data,
size_t  size,
typename Traits::value_type  result 
)
inlinestatic

Definition at line 1100 of file pugixml.cpp.

1101  {
1102  return decode_utf32_block(data, size, result);
1103  }

References utf_decoder< Traits, opt_swap >::decode_utf32_block().


The documentation for this struct was generated from the following file:
uint16_t
unsigned short uint16_t
Definition: stdint_msvc.h:80
utf_decoder::decode_utf16_block
static Traits::value_type decode_utf16_block(const uint16_t *data, size_t size, typename Traits::value_type result)
Definition: pugixml.cpp:1016
wchar_selector
Definition: pugixml.cpp:933
uintptr_t
_W64 unsigned int uintptr_t
Definition: stdint_msvc.h:120
uint8_t
unsigned char uint8_t
Definition: stdint_msvc.h:79
utf_decoder::decode_utf32_block
static Traits::value_type decode_utf32_block(const uint32_t *data, size_t size, typename Traits::value_type result)
Definition: pugixml.cpp:1060
utf_decoder::decode_wchar_block_impl
static Traits::value_type decode_wchar_block_impl(const uint16_t *data, size_t size, typename Traits::value_type result)
Definition: pugixml.cpp:1095
uint32_t
unsigned int uint32_t
Definition: stdint_msvc.h:81
endian_swap
PUGI__NS_END PUGI__NS_BEGIN uint16_t endian_swap(uint16_t value)
Definition: pugixml.cpp:752

Generated on Wed Apr 29 2020 19:41:30 for QuickFIX by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2001