Commit e44c395c51518bafbf8f8466ea5a0f4b1f2b2efe
1 parent
44674a3e
QUtil::toUTF16
Showing
4 changed files
with
75 additions
and
0 deletions
include/qpdf/QUtil.hh
| ... | ... | @@ -140,6 +140,12 @@ namespace QUtil |
| 140 | 140 | QPDF_DLL |
| 141 | 141 | std::string toUTF8(unsigned long uval); |
| 142 | 142 | |
| 143 | + // Return a string containing the byte representation of the | |
| 144 | + // UTF-16 BE encoding for the unicode value passed in. | |
| 145 | + // Unrepresentable code points are converted to U+FFFD. | |
| 146 | + QPDF_DLL | |
| 147 | + std::string toUTF16(unsigned long uval); | |
| 148 | + | |
| 143 | 149 | // If secure random number generation is supported on your |
| 144 | 150 | // platform and qpdf was not compiled with insecure random number |
| 145 | 151 | // generation, this returns a cryptographically secure random | ... | ... |
libqpdf/QUtil.cc
| ... | ... | @@ -506,6 +506,41 @@ QUtil::toUTF8(unsigned long uval) |
| 506 | 506 | return result; |
| 507 | 507 | } |
| 508 | 508 | |
| 509 | +std::string | |
| 510 | +QUtil::toUTF16(unsigned long uval) | |
| 511 | +{ | |
| 512 | + std::string result; | |
| 513 | + if ((uval >= 0xd800) && (uval <= 0xdfff)) | |
| 514 | + { | |
| 515 | + result = "\xff\xfd"; | |
| 516 | + } | |
| 517 | + else if (uval <= 0xffff) | |
| 518 | + { | |
| 519 | + char out[2]; | |
| 520 | + out[0] = (uval & 0xff00) >> 8; | |
| 521 | + out[1] = (uval & 0xff); | |
| 522 | + result = std::string(out, 2); | |
| 523 | + } | |
| 524 | + else if (uval <= 0x10ffff) | |
| 525 | + { | |
| 526 | + char out[4]; | |
| 527 | + uval -= 0x10000; | |
| 528 | + unsigned short high = ((uval & 0xffc00) >> 10) + 0xd800; | |
| 529 | + unsigned short low = (uval & 0x3ff) + 0xdc00; | |
| 530 | + out[0] = (high & 0xff00) >> 8; | |
| 531 | + out[1] = (high & 0xff); | |
| 532 | + out[2] = (low & 0xff00) >> 8; | |
| 533 | + out[3] = (low & 0xff); | |
| 534 | + result = std::string(out, 4); | |
| 535 | + } | |
| 536 | + else | |
| 537 | + { | |
| 538 | + result = "\xff\xfd"; | |
| 539 | + } | |
| 540 | + | |
| 541 | + return result; | |
| 542 | +} | |
| 543 | + | |
| 509 | 544 | // Random data support |
| 510 | 545 | |
| 511 | 546 | long | ... | ... |
libtests/qtest/qutil/qutil.out
| ... | ... | @@ -39,6 +39,14 @@ HAGOOGAMAGOOGLE: 0 |
| 39 | 39 | 0x16059 -> f0 96 81 99 |
| 40 | 40 | 0x7fffffff -> fd bf bf bf bf bf |
| 41 | 41 | 0x80000000: bounds error in QUtil::toUTF8 |
| 42 | +---- utf16 | |
| 43 | +0x41 -> 00 41 | |
| 44 | +0xf7 -> 00 f7 | |
| 45 | +0x3c0 -> 03 c0 | |
| 46 | +0x16059 -> d8 18 dc 59 | |
| 47 | +0xdead -> ff fd | |
| 48 | +0x7fffffff -> ff fd | |
| 49 | +0x80000000 -> ff fd | |
| 42 | 50 | ---- whoami |
| 43 | 51 | quack1 |
| 44 | 52 | quack2 | ... | ... |
libtests/qutil.cc
| ... | ... | @@ -193,6 +193,30 @@ void to_utf8_test() |
| 193 | 193 | } |
| 194 | 194 | } |
| 195 | 195 | |
| 196 | +static void print_utf16(unsigned long val) | |
| 197 | +{ | |
| 198 | + std::string result = QUtil::toUTF16(val); | |
| 199 | + std::cout << "0x" << QUtil::int_to_string_base(val, 16) << " ->"; | |
| 200 | + for (std::string::iterator iter = result.begin(); | |
| 201 | + iter != result.end(); ++iter) | |
| 202 | + { | |
| 203 | + std::cout << " " << QUtil::int_to_string_base( | |
| 204 | + static_cast<int>(static_cast<unsigned char>(*iter)), 16, 2); | |
| 205 | + } | |
| 206 | + std::cout << std::endl; | |
| 207 | +} | |
| 208 | + | |
| 209 | +void to_utf16_test() | |
| 210 | +{ | |
| 211 | + print_utf16(0x41UL); | |
| 212 | + print_utf16(0xF7UL); | |
| 213 | + print_utf16(0x3c0UL); | |
| 214 | + print_utf16(0x16059UL); | |
| 215 | + print_utf16(0xdeadUL); | |
| 216 | + print_utf16(0x7fffffffUL); | |
| 217 | + print_utf16(0x80000000UL); | |
| 218 | +} | |
| 219 | + | |
| 196 | 220 | void print_whoami(char const* str) |
| 197 | 221 | { |
| 198 | 222 | PointerHolder<char> dup(true, QUtil::copy_string(str)); |
| ... | ... | @@ -299,6 +323,8 @@ int main(int argc, char* argv[]) |
| 299 | 323 | getenv_test(); |
| 300 | 324 | std::cout << "---- utf8" << std::endl; |
| 301 | 325 | to_utf8_test(); |
| 326 | + std::cout << "---- utf16" << std::endl; | |
| 327 | + to_utf16_test(); | |
| 302 | 328 | std::cout << "---- whoami" << std::endl; |
| 303 | 329 | get_whoami_test(); |
| 304 | 330 | std::cout << "---- file" << std::endl; | ... | ... |