Commit e810fe678a5615e3a4bfa16543bfdbdad78cd273

Authored by Jay Berkenbilt
1 parent e35abe2f

Fix asymmetry between newUnicodeString and getUTF8Value

ChangeLog
1 2022-02-15 Jay Berkenbilt <ejb@ql.org> 1 2022-02-15 Jay Berkenbilt <ejb@ql.org>
2 2
  3 + * Fix asymmetrical logic between
  4 + QPDFObjectHandle::newUnicodeString() and
  5 + QPDFObjectHandle::getUTF8Val(). The asymmetrical logic didn't
  6 + matter before fixing the PDF Doc transcoding bugs.
  7 +
3 * When analyzing PDF strings, recognize UTF-16LE as UTF-16. The 8 * When analyzing PDF strings, recognize UTF-16LE as UTF-16. The
4 PDF spec only allows UTF-16BE, but most readers seem to allow 9 PDF spec only allows UTF-16BE, but most readers seem to allow
5 both. Fixes #649. 10 both. Fixes #649.
libqpdf/QPDF_String.cc
@@ -32,8 +32,7 @@ QPDF_String* @@ -32,8 +32,7 @@ QPDF_String*
32 QPDF_String::new_utf16(std::string const& utf8_val) 32 QPDF_String::new_utf16(std::string const& utf8_val)
33 { 33 {
34 std::string result; 34 std::string result;
35 - if (! (QUtil::utf8_to_ascii(utf8_val, result, '?') ||  
36 - QUtil::utf8_to_pdf_doc(utf8_val, result, '?'))) 35 + if (! QUtil::utf8_to_pdf_doc(utf8_val, result, '?'))
37 { 36 {
38 result = QUtil::utf8_to_utf16(utf8_val); 37 result = QUtil::utf8_to_utf16(utf8_val);
39 } 38 }
qpdf/test_driver.cc
@@ -3325,12 +3325,34 @@ static void test_85(QPDF&amp; pdf, char const* arg2) @@ -3325,12 +3325,34 @@ static void test_85(QPDF&amp; pdf, char const* arg2)
3325 assert(s == "/Test"); 3325 assert(s == "/Test");
3326 } 3326 }
3327 3327
  3328 +static void test_86(QPDF& pdf, char const* arg2)
  3329 +{
  3330 + // Test symmetry between newUnicodeString and getUTF8Value for
  3331 + // strings that can't be encoded as PDFDoc but don't contain any
  3332 + // high code points.
  3333 +
  3334 + std::string utf8_val("\x1f");
  3335 + std::string utf16_val("\xfe\xff\x00\x1f", 4);
  3336 + std::string result;
  3337 + assert(QUtil::utf8_to_ascii(utf8_val, result, '?'));
  3338 + assert(result == "\x1f");
  3339 + assert(! QUtil::utf8_to_pdf_doc(utf8_val, result, '?'));
  3340 + assert(result == "?");
  3341 + assert(QUtil::utf8_to_utf16(utf8_val) == utf16_val);
  3342 + assert(QUtil::utf16_to_utf8(utf16_val) == utf8_val);
  3343 + auto h = QPDFObjectHandle::newUnicodeString("\x1f");
  3344 + assert(h.getStringValue() == std::string("\xfe\xff\x00\x1f", 4));
  3345 + assert(h.getUTF8Value() == "\x1f");
  3346 +}
  3347 +
3328 void runtest(int n, char const* filename1, char const* arg2) 3348 void runtest(int n, char const* filename1, char const* arg2)
3329 { 3349 {
3330 // Most tests here are crafted to work on specific files. Look at 3350 // Most tests here are crafted to work on specific files. Look at
3331 // the test suite to see how the test is invoked to find the file 3351 // the test suite to see how the test is invoked to find the file
3332 // that the test is supposed to operate on. 3352 // that the test is supposed to operate on.
3333 3353
  3354 + std::set<int> ignore_filename = {61, 81, 83, 84, 85, 86};
  3355 +
3334 if (n == 0) 3356 if (n == 0)
3335 { 3357 {
3336 // Throw in some random test cases that don't fit anywhere 3358 // Throw in some random test cases that don't fit anywhere
@@ -3391,7 +3413,7 @@ void runtest(int n, char const* filename1, char const* arg2) @@ -3391,7 +3413,7 @@ void runtest(int n, char const* filename1, char const* arg2)
3391 pdf.processMemoryFile((std::string(filename1) + ".pdf").c_str(), 3413 pdf.processMemoryFile((std::string(filename1) + ".pdf").c_str(),
3392 p, size); 3414 p, size);
3393 } 3415 }
3394 - else if ((n == 61) || (n == 81) || (n == 83) || (n == 84) || (n == 85)) 3416 + else if (ignore_filename.count(n))
3395 { 3417 {
3396 // Ignore filename argument entirely 3418 // Ignore filename argument entirely
3397 } 3419 }
@@ -3439,7 +3461,7 @@ void runtest(int n, char const* filename1, char const* arg2) @@ -3439,7 +3461,7 @@ void runtest(int n, char const* filename1, char const* arg2)
3439 {72, test_72}, {73, test_73}, {74, test_74}, {75, test_75}, 3461 {72, test_72}, {73, test_73}, {74, test_74}, {75, test_75},
3440 {76, test_76}, {77, test_77}, {78, test_78}, {79, test_79}, 3462 {76, test_76}, {77, test_77}, {78, test_78}, {79, test_79},
3441 {80, test_80}, {81, test_81}, {82, test_82}, {83, test_83}, 3463 {80, test_80}, {81, test_81}, {82, test_82}, {83, test_83},
3442 - {84, test_84}, {85, test_85}, 3464 + {84, test_84}, {85, test_85}, {86, test_86},
3443 }; 3465 };
3444 3466
3445 auto fn = test_functions.find(n); 3467 auto fn = test_functions.find(n);