Commit f7ac5915909c7197acf84265f8d8ad41b95a36a8
1 parent
07a2bb33
Recognize explicit UTF-8 strings (fixes #654)
Showing
6 changed files
with
38 additions
and
11 deletions
ChangeLog
TODO
| ... | ... | @@ -10,6 +10,14 @@ Priorities for 11: |
| 10 | 10 | * PointerHolder -> shared_ptr |
| 11 | 11 | * ABI |
| 12 | 12 | |
| 13 | +Misc | |
| 14 | +* Get rid of "ugly switch statements" in QUtil.cc -- replace with | |
| 15 | + static map initializers. (Search for "ugly switch statements" below | |
| 16 | + as well.) | |
| 17 | +* Consider exposing get_next_utf8_codepoint in QUtil | |
| 18 | +* Add QUtil::is_explicit_utf8 that does what QPDF_String::getUTF8Val | |
| 19 | + does to detect UTF-8 encoded strings per PDF 2.0 spec. | |
| 20 | + | |
| 13 | 21 | Soon: Break ground on "Document-level work" |
| 14 | 22 | |
| 15 | 23 | Code Formatting | ... | ... |
libqpdf/QPDF_String.cc
| ... | ... | @@ -183,6 +183,15 @@ QPDF_String::getUTF8Val() const |
| 183 | 183 | { |
| 184 | 184 | return QUtil::utf16_to_utf8(this->val); |
| 185 | 185 | } |
| 186 | + else if ((val.length() >= 3) && | |
| 187 | + (val[0] == '\xEF') && | |
| 188 | + (val[1] == '\xBB') && | |
| 189 | + (val[2] == '\xBF')) | |
| 190 | + { | |
| 191 | + // PDF 2.0 allows UTF-8 strings when explicitly prefixed with | |
| 192 | + // the above bytes, which is just UTF-8 encoding of U+FEFF. | |
| 193 | + return this->val.substr(3); | |
| 194 | + } | |
| 186 | 195 | else |
| 187 | 196 | { |
| 188 | 197 | return QUtil::pdf_doc_to_utf8(this->val); | ... | ... |
manual/release-notes.rst
| ... | ... | @@ -9,6 +9,9 @@ For a detailed list of changes, please see the file |
| 9 | 9 | 10.6.3: XXX |
| 10 | 10 | - Bug fixes: |
| 11 | 11 | |
| 12 | + - Recognize strings explicitly encoded as UTF-8 as allowed by the | |
| 13 | + PDF 2.0 spec. | |
| 14 | + | |
| 12 | 15 | - Fix edge cases with appearance stream generation for form fields |
| 13 | 16 | whose ``/DA`` field lacks proper font size specification or that |
| 14 | 17 | specifies auto sizing. At this time, qpdf does not support auto | ... | ... |
qpdf/qtest/qpdf/numeric-and-string-3.out
qpdf/qtest/qpdf/numeric-and-string-3.pdf
| ... | ... | @@ -12,8 +12,9 @@ |
| 12 | 12 | /QStrings [ |
| 13 | 13 | (No Special Characters) |
| 14 | 14 | (These: ¿÷¢þ and no more) |
| 15 | + (\357\273\277Explicit utf-8 with \317\200) | |
| 15 | 16 | <feff03c003c903c403b103c403c9> |
| 16 | - <feff0074007200650062006c006500200063006c00650066003a0020d834dd20003b0020007300690078007400650065006e007400680020006e006f00740065003a0020d834dd60> | |
| 17 | + <feff0074007200650062006c006500200063006c00650066003a0020d834dd20003b0020007300690078007400650065006e007400680020006e006f00740065003a0020d834dd61> | |
| 17 | 18 | ] |
| 18 | 19 | /Type /Catalog |
| 19 | 20 | >> |
| ... | ... | @@ -110,19 +111,19 @@ xref |
| 110 | 111 | 0 10 |
| 111 | 112 | 0000000000 65535 f |
| 112 | 113 | 0000000025 00000 n |
| 113 | -0000000377 00000 n | |
| 114 | -0000000459 00000 n | |
| 115 | -0000000694 00000 n | |
| 116 | -0000000793 00000 n | |
| 117 | -0000000835 00000 n | |
| 118 | -0000000933 00000 n | |
| 119 | -0000000952 00000 n | |
| 120 | -0000001070 00000 n | |
| 114 | +0000000424 00000 n | |
| 115 | +0000000506 00000 n | |
| 116 | +0000000741 00000 n | |
| 117 | +0000000840 00000 n | |
| 118 | +0000000882 00000 n | |
| 119 | +0000000980 00000 n | |
| 120 | +0000000999 00000 n | |
| 121 | +0000001117 00000 n | |
| 121 | 122 | trailer << |
| 122 | 123 | /Root 1 0 R |
| 123 | 124 | /Size 10 |
| 124 | 125 | /ID [<e017d8dc1fe53a81e40aa79bcb43fdec><76269ee0b6579446b731e060af8ef436>] |
| 125 | 126 | >> |
| 126 | 127 | startxref |
| 127 | -1105 | |
| 128 | +1152 | |
| 128 | 129 | %%EOF | ... | ... |