Commit 8dc8e8c35c64c7be57ebd63eea3f2be42e9c6240

Authored by m-holger
1 parent 82497ea8

Refactor `QPDF_String::writeJSON`: inline `getUTF8Val`

libqpdf/QPDF_String.cc
... ... @@ -26,24 +26,40 @@ QPDF_String::create_utf16(std::string const& utf8_val)
26 26 void
27 27 QPDF_String::writeJSON(int json_version, JSON::Writer& p)
28 28 {
29   - auto candidate = getUTF8Val();
30 29 if (json_version == 1) {
31   - p << "\"" << JSON::Writer::encode_string(candidate) << "\"";
32   - } else {
33   - // See if we can unambiguously represent as Unicode.
34   - if (util::is_utf16(val) || util::is_explicit_utf8(val)) {
  30 + if (util::is_utf16(val)) {
  31 + p << "\"" << JSON::Writer::encode_string(QUtil::utf16_to_utf8(val)) << "\"";
  32 + return;
  33 + }
  34 + if (util::is_explicit_utf8(val)) {
  35 + // PDF 2.0 allows UTF-8 strings when explicitly prefixed with the three-byte
  36 + // representation of U+FEFF.
  37 + p << "\"" << JSON::Writer::encode_string(val.substr(3)) << "\"";
  38 + return;
  39 + }
  40 + p << "\"" << JSON::Writer::encode_string(QUtil::pdf_doc_to_utf8(val)) << "\"";
  41 + return;
  42 + }
  43 + // See if we can unambiguously represent as Unicode.
  44 + if (util::is_utf16(val)) {
  45 + p << "\"u:" << JSON::Writer::encode_string(QUtil::utf16_to_utf8(val)) << "\"";
  46 + return;
  47 + }
  48 + // See if we can unambiguously represent as Unicode.
  49 + if (util::is_explicit_utf8(val)) {
  50 + p << "\"u:" << JSON::Writer::encode_string(val.substr(3)) << "\"";
  51 + return;
  52 + }
  53 + if (!useHexString()) {
  54 + auto candidate = QUtil::pdf_doc_to_utf8(val);
  55 + std::string test;
  56 + if (QUtil::utf8_to_pdf_doc(candidate, test, '?') && test == val) {
  57 + // This is a PDF-doc string that can be losslessly encoded as Unicode.
35 58 p << "\"u:" << JSON::Writer::encode_string(candidate) << "\"";
36 59 return;
37   - } else if (!useHexString()) {
38   - std::string test;
39   - if (QUtil::utf8_to_pdf_doc(candidate, test, '?') && (test == val)) {
40   - // This is a PDF-doc string that can be losslessly encoded as Unicode.
41   - p << "\"u:" << JSON::Writer::encode_string(candidate) << "\"";
42   - return;
43   - }
44 60 }
45   - p << "\"b:" << QUtil::hex_encode(val) << "\"";
46 61 }
  62 + p << "\"b:" << QUtil::hex_encode(val) << "\"";
47 63 }
48 64  
49 65 bool
... ...
libqpdf/qpdf/QPDFObject_private.hh
... ... @@ -281,6 +281,7 @@ class QPDF_String final
281 281 {
282 282 }
283 283 bool useHexString() const;
  284 +
284 285 std::string val;
285 286 };
286 287  
... ...
qpdf/qtest/json.test
... ... @@ -38,7 +38,7 @@ my @json_files = (
38 38 ['page-labels-and-outlines',
39 39 ['--json-key=objects', '--json-object=trailer', '--json-object=2 0 R']],
40 40 ['field-types', ['--json-key=acroform']],
41   - ['need-appearances', ['--json-key=acroform']],
  41 + ['need-appearances-utf8', ['--json-key=acroform']],
42 42 ['V4-aes', ['--json-key=encrypt']],
43 43 ['V4-aes', ['--json-key=encrypt', '--show-encryption-key']],
44 44 );
... ...
qpdf/qtest/qpdf/json-need-appearances-acroform-v1.out renamed to qpdf/qtest/qpdf/json-need-appearances-utf8-acroform-v1.out
... ... @@ -27,7 +27,7 @@
27 27 "parent": null,
28 28 "partialname": "text",
29 29 "quadding": 0,
30   - "value": "abc"
  30 + "value": "abcde"
31 31 },
32 32 {
33 33 "alternativename": "r1",
... ...
qpdf/qtest/qpdf/json-need-appearances-acroform-v2.out renamed to qpdf/qtest/qpdf/json-need-appearances-utf8-acroform-v2.out
... ... @@ -27,7 +27,7 @@
27 27 "parent": null,
28 28 "partialname": "text",
29 29 "quadding": 0,
30   - "value": "u:abc"
  30 + "value": "u:abcde"
31 31 },
32 32 {
33 33 "alternativename": "r1",
... ...
qpdf/qtest/qpdf/need-appearances-utf8.pdf 0 → 100644
No preview for this file type