Commit e810fe678a5615e3a4bfa16543bfdbdad78cd273
1 parent
e35abe2f
Fix asymmetry between newUnicodeString and getUTF8Value
Showing
3 changed files
with
30 additions
and
4 deletions
ChangeLog
| 1 | 2022-02-15 Jay Berkenbilt <ejb@ql.org> | 1 | 2022-02-15 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | ||
| 3 | + * Fix asymmetrical logic between | ||
| 4 | + QPDFObjectHandle::newUnicodeString() and | ||
| 5 | + QPDFObjectHandle::getUTF8Val(). The asymmetrical logic didn't | ||
| 6 | + matter before fixing the PDF Doc transcoding bugs. | ||
| 7 | + | ||
| 3 | * When analyzing PDF strings, recognize UTF-16LE as UTF-16. The | 8 | * When analyzing PDF strings, recognize UTF-16LE as UTF-16. The |
| 4 | PDF spec only allows UTF-16BE, but most readers seem to allow | 9 | PDF spec only allows UTF-16BE, but most readers seem to allow |
| 5 | both. Fixes #649. | 10 | both. Fixes #649. |
libqpdf/QPDF_String.cc
| @@ -32,8 +32,7 @@ QPDF_String* | @@ -32,8 +32,7 @@ QPDF_String* | ||
| 32 | QPDF_String::new_utf16(std::string const& utf8_val) | 32 | QPDF_String::new_utf16(std::string const& utf8_val) |
| 33 | { | 33 | { |
| 34 | std::string result; | 34 | std::string result; |
| 35 | - if (! (QUtil::utf8_to_ascii(utf8_val, result, '?') || | ||
| 36 | - QUtil::utf8_to_pdf_doc(utf8_val, result, '?'))) | 35 | + if (! QUtil::utf8_to_pdf_doc(utf8_val, result, '?')) |
| 37 | { | 36 | { |
| 38 | result = QUtil::utf8_to_utf16(utf8_val); | 37 | result = QUtil::utf8_to_utf16(utf8_val); |
| 39 | } | 38 | } |
qpdf/test_driver.cc
| @@ -3325,12 +3325,34 @@ static void test_85(QPDF& pdf, char const* arg2) | @@ -3325,12 +3325,34 @@ static void test_85(QPDF& pdf, char const* arg2) | ||
| 3325 | assert(s == "/Test"); | 3325 | assert(s == "/Test"); |
| 3326 | } | 3326 | } |
| 3327 | 3327 | ||
| 3328 | +static void test_86(QPDF& pdf, char const* arg2) | ||
| 3329 | +{ | ||
| 3330 | + // Test symmetry between newUnicodeString and getUTF8Value for | ||
| 3331 | + // strings that can't be encoded as PDFDoc but don't contain any | ||
| 3332 | + // high code points. | ||
| 3333 | + | ||
| 3334 | + std::string utf8_val("\x1f"); | ||
| 3335 | + std::string utf16_val("\xfe\xff\x00\x1f", 4); | ||
| 3336 | + std::string result; | ||
| 3337 | + assert(QUtil::utf8_to_ascii(utf8_val, result, '?')); | ||
| 3338 | + assert(result == "\x1f"); | ||
| 3339 | + assert(! QUtil::utf8_to_pdf_doc(utf8_val, result, '?')); | ||
| 3340 | + assert(result == "?"); | ||
| 3341 | + assert(QUtil::utf8_to_utf16(utf8_val) == utf16_val); | ||
| 3342 | + assert(QUtil::utf16_to_utf8(utf16_val) == utf8_val); | ||
| 3343 | + auto h = QPDFObjectHandle::newUnicodeString("\x1f"); | ||
| 3344 | + assert(h.getStringValue() == std::string("\xfe\xff\x00\x1f", 4)); | ||
| 3345 | + assert(h.getUTF8Value() == "\x1f"); | ||
| 3346 | +} | ||
| 3347 | + | ||
| 3328 | void runtest(int n, char const* filename1, char const* arg2) | 3348 | void runtest(int n, char const* filename1, char const* arg2) |
| 3329 | { | 3349 | { |
| 3330 | // Most tests here are crafted to work on specific files. Look at | 3350 | // Most tests here are crafted to work on specific files. Look at |
| 3331 | // the test suite to see how the test is invoked to find the file | 3351 | // the test suite to see how the test is invoked to find the file |
| 3332 | // that the test is supposed to operate on. | 3352 | // that the test is supposed to operate on. |
| 3333 | 3353 | ||
| 3354 | + std::set<int> ignore_filename = {61, 81, 83, 84, 85, 86}; | ||
| 3355 | + | ||
| 3334 | if (n == 0) | 3356 | if (n == 0) |
| 3335 | { | 3357 | { |
| 3336 | // Throw in some random test cases that don't fit anywhere | 3358 | // Throw in some random test cases that don't fit anywhere |
| @@ -3391,7 +3413,7 @@ void runtest(int n, char const* filename1, char const* arg2) | @@ -3391,7 +3413,7 @@ void runtest(int n, char const* filename1, char const* arg2) | ||
| 3391 | pdf.processMemoryFile((std::string(filename1) + ".pdf").c_str(), | 3413 | pdf.processMemoryFile((std::string(filename1) + ".pdf").c_str(), |
| 3392 | p, size); | 3414 | p, size); |
| 3393 | } | 3415 | } |
| 3394 | - else if ((n == 61) || (n == 81) || (n == 83) || (n == 84) || (n == 85)) | 3416 | + else if (ignore_filename.count(n)) |
| 3395 | { | 3417 | { |
| 3396 | // Ignore filename argument entirely | 3418 | // Ignore filename argument entirely |
| 3397 | } | 3419 | } |
| @@ -3439,7 +3461,7 @@ void runtest(int n, char const* filename1, char const* arg2) | @@ -3439,7 +3461,7 @@ void runtest(int n, char const* filename1, char const* arg2) | ||
| 3439 | {72, test_72}, {73, test_73}, {74, test_74}, {75, test_75}, | 3461 | {72, test_72}, {73, test_73}, {74, test_74}, {75, test_75}, |
| 3440 | {76, test_76}, {77, test_77}, {78, test_78}, {79, test_79}, | 3462 | {76, test_76}, {77, test_77}, {78, test_78}, {79, test_79}, |
| 3441 | {80, test_80}, {81, test_81}, {82, test_82}, {83, test_83}, | 3463 | {80, test_80}, {81, test_81}, {82, test_82}, {83, test_83}, |
| 3442 | - {84, test_84}, {85, test_85}, | 3464 | + {84, test_84}, {85, test_85}, {86, test_86}, |
| 3443 | }; | 3465 | }; |
| 3444 | 3466 | ||
| 3445 | auto fn = test_functions.find(n); | 3467 | auto fn = test_functions.find(n); |