Commit e44c395c51518bafbf8f8466ea5a0f4b1f2b2efe
1 parent
44674a3e
QUtil::toUTF16
Showing
4 changed files
with
75 additions
and
0 deletions
include/qpdf/QUtil.hh
| @@ -140,6 +140,12 @@ namespace QUtil | @@ -140,6 +140,12 @@ namespace QUtil | ||
| 140 | QPDF_DLL | 140 | QPDF_DLL |
| 141 | std::string toUTF8(unsigned long uval); | 141 | std::string toUTF8(unsigned long uval); |
| 142 | 142 | ||
| 143 | + // Return a string containing the byte representation of the | ||
| 144 | + // UTF-16 BE encoding for the unicode value passed in. | ||
| 145 | + // Unrepresentable code points are converted to U+FFFD. | ||
| 146 | + QPDF_DLL | ||
| 147 | + std::string toUTF16(unsigned long uval); | ||
| 148 | + | ||
| 143 | // If secure random number generation is supported on your | 149 | // If secure random number generation is supported on your |
| 144 | // platform and qpdf was not compiled with insecure random number | 150 | // platform and qpdf was not compiled with insecure random number |
| 145 | // generation, this returns a cryptographically secure random | 151 | // generation, this returns a cryptographically secure random |
libqpdf/QUtil.cc
| @@ -506,6 +506,41 @@ QUtil::toUTF8(unsigned long uval) | @@ -506,6 +506,41 @@ QUtil::toUTF8(unsigned long uval) | ||
| 506 | return result; | 506 | return result; |
| 507 | } | 507 | } |
| 508 | 508 | ||
| 509 | +std::string | ||
| 510 | +QUtil::toUTF16(unsigned long uval) | ||
| 511 | +{ | ||
| 512 | + std::string result; | ||
| 513 | + if ((uval >= 0xd800) && (uval <= 0xdfff)) | ||
| 514 | + { | ||
| 515 | + result = "\xff\xfd"; | ||
| 516 | + } | ||
| 517 | + else if (uval <= 0xffff) | ||
| 518 | + { | ||
| 519 | + char out[2]; | ||
| 520 | + out[0] = (uval & 0xff00) >> 8; | ||
| 521 | + out[1] = (uval & 0xff); | ||
| 522 | + result = std::string(out, 2); | ||
| 523 | + } | ||
| 524 | + else if (uval <= 0x10ffff) | ||
| 525 | + { | ||
| 526 | + char out[4]; | ||
| 527 | + uval -= 0x10000; | ||
| 528 | + unsigned short high = ((uval & 0xffc00) >> 10) + 0xd800; | ||
| 529 | + unsigned short low = (uval & 0x3ff) + 0xdc00; | ||
| 530 | + out[0] = (high & 0xff00) >> 8; | ||
| 531 | + out[1] = (high & 0xff); | ||
| 532 | + out[2] = (low & 0xff00) >> 8; | ||
| 533 | + out[3] = (low & 0xff); | ||
| 534 | + result = std::string(out, 4); | ||
| 535 | + } | ||
| 536 | + else | ||
| 537 | + { | ||
| 538 | + result = "\xff\xfd"; | ||
| 539 | + } | ||
| 540 | + | ||
| 541 | + return result; | ||
| 542 | +} | ||
| 543 | + | ||
| 509 | // Random data support | 544 | // Random data support |
| 510 | 545 | ||
| 511 | long | 546 | long |
libtests/qtest/qutil/qutil.out
| @@ -39,6 +39,14 @@ HAGOOGAMAGOOGLE: 0 | @@ -39,6 +39,14 @@ HAGOOGAMAGOOGLE: 0 | ||
| 39 | 0x16059 -> f0 96 81 99 | 39 | 0x16059 -> f0 96 81 99 |
| 40 | 0x7fffffff -> fd bf bf bf bf bf | 40 | 0x7fffffff -> fd bf bf bf bf bf |
| 41 | 0x80000000: bounds error in QUtil::toUTF8 | 41 | 0x80000000: bounds error in QUtil::toUTF8 |
| 42 | +---- utf16 | ||
| 43 | +0x41 -> 00 41 | ||
| 44 | +0xf7 -> 00 f7 | ||
| 45 | +0x3c0 -> 03 c0 | ||
| 46 | +0x16059 -> d8 18 dc 59 | ||
| 47 | +0xdead -> ff fd | ||
| 48 | +0x7fffffff -> ff fd | ||
| 49 | +0x80000000 -> ff fd | ||
| 42 | ---- whoami | 50 | ---- whoami |
| 43 | quack1 | 51 | quack1 |
| 44 | quack2 | 52 | quack2 |
libtests/qutil.cc
| @@ -193,6 +193,30 @@ void to_utf8_test() | @@ -193,6 +193,30 @@ void to_utf8_test() | ||
| 193 | } | 193 | } |
| 194 | } | 194 | } |
| 195 | 195 | ||
| 196 | +static void print_utf16(unsigned long val) | ||
| 197 | +{ | ||
| 198 | + std::string result = QUtil::toUTF16(val); | ||
| 199 | + std::cout << "0x" << QUtil::int_to_string_base(val, 16) << " ->"; | ||
| 200 | + for (std::string::iterator iter = result.begin(); | ||
| 201 | + iter != result.end(); ++iter) | ||
| 202 | + { | ||
| 203 | + std::cout << " " << QUtil::int_to_string_base( | ||
| 204 | + static_cast<int>(static_cast<unsigned char>(*iter)), 16, 2); | ||
| 205 | + } | ||
| 206 | + std::cout << std::endl; | ||
| 207 | +} | ||
| 208 | + | ||
| 209 | +void to_utf16_test() | ||
| 210 | +{ | ||
| 211 | + print_utf16(0x41UL); | ||
| 212 | + print_utf16(0xF7UL); | ||
| 213 | + print_utf16(0x3c0UL); | ||
| 214 | + print_utf16(0x16059UL); | ||
| 215 | + print_utf16(0xdeadUL); | ||
| 216 | + print_utf16(0x7fffffffUL); | ||
| 217 | + print_utf16(0x80000000UL); | ||
| 218 | +} | ||
| 219 | + | ||
| 196 | void print_whoami(char const* str) | 220 | void print_whoami(char const* str) |
| 197 | { | 221 | { |
| 198 | PointerHolder<char> dup(true, QUtil::copy_string(str)); | 222 | PointerHolder<char> dup(true, QUtil::copy_string(str)); |
| @@ -299,6 +323,8 @@ int main(int argc, char* argv[]) | @@ -299,6 +323,8 @@ int main(int argc, char* argv[]) | ||
| 299 | getenv_test(); | 323 | getenv_test(); |
| 300 | std::cout << "---- utf8" << std::endl; | 324 | std::cout << "---- utf8" << std::endl; |
| 301 | to_utf8_test(); | 325 | to_utf8_test(); |
| 326 | + std::cout << "---- utf16" << std::endl; | ||
| 327 | + to_utf16_test(); | ||
| 302 | std::cout << "---- whoami" << std::endl; | 328 | std::cout << "---- whoami" << std::endl; |
| 303 | get_whoami_test(); | 329 | get_whoami_test(); |
| 304 | std::cout << "---- file" << std::endl; | 330 | std::cout << "---- file" << std::endl; |