Commit 431987475b392daf4094570565881e1ebfc9528a
1 parent
e2737ab6
Add new method QPDF_Name::analyzeJSONEncoding
Provide a custom method to check whether a name is valid utf8. Integrate checking for characters that need to be escaped in JSON.
Showing
3 changed files
with
78 additions
and
31 deletions
libqpdf/QPDF_Dictionary.cc
| ... | ... | @@ -77,15 +77,11 @@ QPDF_Dictionary::getJSON(int json_version) |
| 77 | 77 | if (json_version == 1) { |
| 78 | 78 | j.addDictionaryMember( |
| 79 | 79 | QPDF_Name::normalizeName(iter.first), iter.second.getJSON(json_version)); |
| 80 | + } else if (auto res = QPDF_Name::analyzeJSONEncoding(iter.first); res.first) { | |
| 81 | + j.addDictionaryMember(iter.first, iter.second.getJSON(json_version)); | |
| 80 | 82 | } else { |
| 81 | - bool has_8bit_chars; | |
| 82 | - bool is_valid_utf8; | |
| 83 | - bool is_utf16; | |
| 84 | - QUtil::analyze_encoding(iter.first, has_8bit_chars, is_valid_utf8, is_utf16); | |
| 85 | - std::string key = !has_8bit_chars || is_valid_utf8 | |
| 86 | - ? iter.first | |
| 87 | - : "n:" + QPDF_Name::normalizeName(iter.first); | |
| 88 | - j.addDictionaryMember(key, iter.second.getJSON(json_version)); | |
| 83 | + j.addDictionaryMember( | |
| 84 | + "n:" + QPDF_Name::normalizeName(iter.first), iter.second.getJSON(json_version)); | |
| 89 | 85 | } |
| 90 | 86 | } |
| 91 | 87 | } |
| ... | ... | @@ -100,18 +96,17 @@ QPDF_Dictionary::writeJSON(int json_version, JSON::Writer& p) |
| 100 | 96 | if (!iter.second.isNull()) { |
| 101 | 97 | p.writeNext(); |
| 102 | 98 | if (json_version == 1) { |
| 103 | - p << "\"" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first)) << "\": "; | |
| 104 | - } else { | |
| 105 | - bool has_8bit_chars; | |
| 106 | - bool is_valid_utf8; | |
| 107 | - bool is_utf16; | |
| 108 | - QUtil::analyze_encoding(iter.first, has_8bit_chars, is_valid_utf8, is_utf16); | |
| 109 | - if (!has_8bit_chars || is_valid_utf8) { | |
| 110 | - p << "\"" << JSON::Writer::encode_string(iter.first) << "\": "; | |
| 99 | + p << "\"" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first)) | |
| 100 | + << "\": "; | |
| 101 | + } else if (auto res = QPDF_Name::analyzeJSONEncoding(iter.first); res.first) { | |
| 102 | + if (res.second) { | |
| 103 | + p << "\"" << iter.first << "\": "; | |
| 111 | 104 | } else { |
| 112 | - p << "\"n:" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first)) | |
| 113 | - << "\": "; | |
| 105 | + p << "\"" << JSON::Writer::encode_string(iter.first) << "\": "; | |
| 114 | 106 | } |
| 107 | + } else { | |
| 108 | + p << "\"n:" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first)) | |
| 109 | + << "\": "; | |
| 115 | 110 | } |
| 116 | 111 | iter.second.writeJSON(json_version, p); |
| 117 | 112 | } | ... | ... |
libqpdf/QPDF_Name.cc
| ... | ... | @@ -3,6 +3,8 @@ |
| 3 | 3 | #include <qpdf/JSON_writer.hh> |
| 4 | 4 | #include <qpdf/QUtil.hh> |
| 5 | 5 | |
| 6 | +#include <string_view> | |
| 7 | + | |
| 6 | 8 | QPDF_Name::QPDF_Name(std::string const& name) : |
| 7 | 9 | QPDFValue(::ot_name, "name"), |
| 8 | 10 | name(name) |
| ... | ... | @@ -52,20 +54,65 @@ QPDF_Name::unparse() |
| 52 | 54 | return normalizeName(this->name); |
| 53 | 55 | } |
| 54 | 56 | |
| 57 | +std::pair<bool, bool> | |
| 58 | +QPDF_Name::analyzeJSONEncoding(const std::string& name) | |
| 59 | +{ | |
| 60 | + std::basic_string_view<unsigned char> view{ | |
| 61 | + reinterpret_cast<const unsigned char*>(name.data()), name.size()}; | |
| 62 | + | |
| 63 | + int tail = 0; // Number of continuation characters expected. | |
| 64 | + bool tail2 = false; // Potential overlong 3 octet utf-8. | |
| 65 | + bool tail3 = false; // potential overlong 4 octet | |
| 66 | + bool needs_escaping = false; | |
| 67 | + for (auto const& c: view) { | |
| 68 | + if (tail) { | |
| 69 | + if ((c & 0xc0) != 0x80) { | |
| 70 | + return {false, false}; | |
| 71 | + } | |
| 72 | + if (tail2) { | |
| 73 | + if ((c & 0xe0) == 0x80) { | |
| 74 | + return {false, false}; | |
| 75 | + } | |
| 76 | + tail2 = false; | |
| 77 | + } else if (tail3) { | |
| 78 | + if ((c & 0xf0) == 0x80) { | |
| 79 | + return {false, false}; | |
| 80 | + } | |
| 81 | + tail3 = false; | |
| 82 | + } | |
| 83 | + tail--; | |
| 84 | + } else if (c < 0x80) { | |
| 85 | + if (!needs_escaping) { | |
| 86 | + needs_escaping = !((c > 34 && c != '\\') || c == ' ' || c == 33); | |
| 87 | + } | |
| 88 | + } else if ((c & 0xe0) == 0xc0) { | |
| 89 | + if ((c & 0xfe) == 0xc0) { | |
| 90 | + return {false, false}; | |
| 91 | + } | |
| 92 | + tail = 1; | |
| 93 | + } else if ((c & 0xf0) == 0xe0) { | |
| 94 | + tail2 = (c == 0xe0); | |
| 95 | + tail = 2; | |
| 96 | + } else if ((c & 0xf8) == 0xf0) { | |
| 97 | + tail3 = (c == 0xf0); | |
| 98 | + tail = 3; | |
| 99 | + } else { | |
| 100 | + return {false, false}; | |
| 101 | + } | |
| 102 | + } | |
| 103 | + return {tail == 0, !needs_escaping}; | |
| 104 | +} | |
| 105 | + | |
| 55 | 106 | JSON |
| 56 | 107 | QPDF_Name::getJSON(int json_version) |
| 57 | 108 | { |
| 58 | 109 | if (json_version == 1) { |
| 59 | 110 | return JSON::makeString(normalizeName(this->name)); |
| 60 | 111 | } else { |
| 61 | - bool has_8bit_chars; | |
| 62 | - bool is_valid_utf8; | |
| 63 | - bool is_utf16; | |
| 64 | - QUtil::analyze_encoding(this->name, has_8bit_chars, is_valid_utf8, is_utf16); | |
| 65 | - if (!has_8bit_chars || is_valid_utf8) { | |
| 66 | - return JSON::makeString(this->name); | |
| 112 | + if (auto res = analyzeJSONEncoding(name); res.first) { | |
| 113 | + return JSON::makeString(name); | |
| 67 | 114 | } else { |
| 68 | - return JSON::makeString("n:" + normalizeName(this->name)); | |
| 115 | + return JSON::makeString("n:" + normalizeName(name)); | |
| 69 | 116 | } |
| 70 | 117 | } |
| 71 | 118 | } |
| ... | ... | @@ -76,12 +123,12 @@ QPDF_Name::writeJSON(int json_version, JSON::Writer& p) |
| 76 | 123 | if (json_version == 1) { |
| 77 | 124 | p << "\"" << JSON::Writer::encode_string(normalizeName(name)) << "\""; |
| 78 | 125 | } else { |
| 79 | - bool has_8bit_chars; | |
| 80 | - bool is_valid_utf8; | |
| 81 | - bool is_utf16; | |
| 82 | - QUtil::analyze_encoding(this->name, has_8bit_chars, is_valid_utf8, is_utf16); | |
| 83 | - if (!has_8bit_chars || is_valid_utf8) { | |
| 84 | - p << "\"" << JSON::Writer::encode_string(name) << "\""; | |
| 126 | + if (auto res = analyzeJSONEncoding(name); res.first) { | |
| 127 | + if (res.second) { | |
| 128 | + p << "\"" << name << "\""; | |
| 129 | + } else { | |
| 130 | + p << "\"" << JSON::Writer::encode_string(name) << "\""; | |
| 131 | + } | |
| 85 | 132 | } else { |
| 86 | 133 | p << "\"n:" << JSON::Writer::encode_string(normalizeName(name)) << "\""; |
| 87 | 134 | } | ... | ... |
libqpdf/qpdf/QPDF_Name.hh
| ... | ... | @@ -15,6 +15,11 @@ class QPDF_Name: public QPDFValue |
| 15 | 15 | |
| 16 | 16 | // Put # into strings with characters unsuitable for name token |
| 17 | 17 | static std::string normalizeName(std::string const& name); |
| 18 | + | |
| 19 | + // Check whether name is valid utf-8 and whether it contains characters that require escaping. | |
| 20 | + // Return {false, false} if the name is not valid utf-8, otherwise return {true, true} if no | |
| 21 | + // characters require or {true, false} if escaping is required. | |
| 22 | + static std::pair<bool, bool> analyzeJSONEncoding(std::string const& name); | |
| 18 | 23 | std::string |
| 19 | 24 | getStringValue() const override |
| 20 | 25 | { | ... | ... |