Commit e44c395c51518bafbf8f8466ea5a0f4b1f2b2efe

Authored by Jay Berkenbilt
1 parent 44674a3e

QUtil::toUTF16

include/qpdf/QUtil.hh
... ... @@ -140,6 +140,12 @@ namespace QUtil
140 140 QPDF_DLL
141 141 std::string toUTF8(unsigned long uval);
142 142  
  143 + // Return a string containing the byte representation of the
  144 + // UTF-16 BE encoding for the unicode value passed in.
  145 + // Unrepresentable code points are converted to U+FFFD.
  146 + QPDF_DLL
  147 + std::string toUTF16(unsigned long uval);
  148 +
143 149 // If secure random number generation is supported on your
144 150 // platform and qpdf was not compiled with insecure random number
145 151 // generation, this returns a cryptographically secure random
... ...
libqpdf/QUtil.cc
... ... @@ -506,6 +506,41 @@ QUtil::toUTF8(unsigned long uval)
506 506 return result;
507 507 }
508 508  
  509 +std::string
  510 +QUtil::toUTF16(unsigned long uval)
  511 +{
  512 + std::string result;
  513 + if ((uval >= 0xd800) && (uval <= 0xdfff))
  514 + {
  515 + result = "\xff\xfd";
  516 + }
  517 + else if (uval <= 0xffff)
  518 + {
  519 + char out[2];
  520 + out[0] = (uval & 0xff00) >> 8;
  521 + out[1] = (uval & 0xff);
  522 + result = std::string(out, 2);
  523 + }
  524 + else if (uval <= 0x10ffff)
  525 + {
  526 + char out[4];
  527 + uval -= 0x10000;
  528 + unsigned short high = ((uval & 0xffc00) >> 10) + 0xd800;
  529 + unsigned short low = (uval & 0x3ff) + 0xdc00;
  530 + out[0] = (high & 0xff00) >> 8;
  531 + out[1] = (high & 0xff);
  532 + out[2] = (low & 0xff00) >> 8;
  533 + out[3] = (low & 0xff);
  534 + result = std::string(out, 4);
  535 + }
  536 + else
  537 + {
  538 + result = "\xff\xfd";
  539 + }
  540 +
  541 + return result;
  542 +}
  543 +
509 544 // Random data support
510 545  
511 546 long
... ...
libtests/qtest/qutil/qutil.out
... ... @@ -39,6 +39,14 @@ HAGOOGAMAGOOGLE: 0
39 39 0x16059 -> f0 96 81 99
40 40 0x7fffffff -> fd bf bf bf bf bf
41 41 0x80000000: bounds error in QUtil::toUTF8
  42 +---- utf16
  43 +0x41 -> 00 41
  44 +0xf7 -> 00 f7
  45 +0x3c0 -> 03 c0
  46 +0x16059 -> d8 18 dc 59
  47 +0xdead -> ff fd
  48 +0x7fffffff -> ff fd
  49 +0x80000000 -> ff fd
42 50 ---- whoami
43 51 quack1
44 52 quack2
... ...
libtests/qutil.cc
... ... @@ -193,6 +193,30 @@ void to_utf8_test()
193 193 }
194 194 }
195 195  
  196 +static void print_utf16(unsigned long val)
  197 +{
  198 + std::string result = QUtil::toUTF16(val);
  199 + std::cout << "0x" << QUtil::int_to_string_base(val, 16) << " ->";
  200 + for (std::string::iterator iter = result.begin();
  201 + iter != result.end(); ++iter)
  202 + {
  203 + std::cout << " " << QUtil::int_to_string_base(
  204 + static_cast<int>(static_cast<unsigned char>(*iter)), 16, 2);
  205 + }
  206 + std::cout << std::endl;
  207 +}
  208 +
  209 +void to_utf16_test()
  210 +{
  211 + print_utf16(0x41UL);
  212 + print_utf16(0xF7UL);
  213 + print_utf16(0x3c0UL);
  214 + print_utf16(0x16059UL);
  215 + print_utf16(0xdeadUL);
  216 + print_utf16(0x7fffffffUL);
  217 + print_utf16(0x80000000UL);
  218 +}
  219 +
196 220 void print_whoami(char const* str)
197 221 {
198 222 PointerHolder<char> dup(true, QUtil::copy_string(str));
... ... @@ -299,6 +323,8 @@ int main(int argc, char* argv[])
299 323 getenv_test();
300 324 std::cout << "---- utf8" << std::endl;
301 325 to_utf8_test();
  326 + std::cout << "---- utf16" << std::endl;
  327 + to_utf16_test();
302 328 std::cout << "---- whoami" << std::endl;
303 329 get_whoami_test();
304 330 std::cout << "---- file" << std::endl;
... ...