Commit 431987475b392daf4094570565881e1ebfc9528a
1 parent
e2737ab6
Add new method QPDF_Name::analyzeJSONEncoding
Provide a custom method to check whether a name is valid utf8. Integrate checking for characters that need to be escaped in JSON.
Showing
3 changed files
with
78 additions
and
31 deletions
libqpdf/QPDF_Dictionary.cc
| @@ -77,15 +77,11 @@ QPDF_Dictionary::getJSON(int json_version) | @@ -77,15 +77,11 @@ QPDF_Dictionary::getJSON(int json_version) | ||
| 77 | if (json_version == 1) { | 77 | if (json_version == 1) { |
| 78 | j.addDictionaryMember( | 78 | j.addDictionaryMember( |
| 79 | QPDF_Name::normalizeName(iter.first), iter.second.getJSON(json_version)); | 79 | QPDF_Name::normalizeName(iter.first), iter.second.getJSON(json_version)); |
| 80 | + } else if (auto res = QPDF_Name::analyzeJSONEncoding(iter.first); res.first) { | ||
| 81 | + j.addDictionaryMember(iter.first, iter.second.getJSON(json_version)); | ||
| 80 | } else { | 82 | } else { |
| 81 | - bool has_8bit_chars; | ||
| 82 | - bool is_valid_utf8; | ||
| 83 | - bool is_utf16; | ||
| 84 | - QUtil::analyze_encoding(iter.first, has_8bit_chars, is_valid_utf8, is_utf16); | ||
| 85 | - std::string key = !has_8bit_chars || is_valid_utf8 | ||
| 86 | - ? iter.first | ||
| 87 | - : "n:" + QPDF_Name::normalizeName(iter.first); | ||
| 88 | - j.addDictionaryMember(key, iter.second.getJSON(json_version)); | 83 | + j.addDictionaryMember( |
| 84 | + "n:" + QPDF_Name::normalizeName(iter.first), iter.second.getJSON(json_version)); | ||
| 89 | } | 85 | } |
| 90 | } | 86 | } |
| 91 | } | 87 | } |
| @@ -100,18 +96,17 @@ QPDF_Dictionary::writeJSON(int json_version, JSON::Writer& p) | @@ -100,18 +96,17 @@ QPDF_Dictionary::writeJSON(int json_version, JSON::Writer& p) | ||
| 100 | if (!iter.second.isNull()) { | 96 | if (!iter.second.isNull()) { |
| 101 | p.writeNext(); | 97 | p.writeNext(); |
| 102 | if (json_version == 1) { | 98 | if (json_version == 1) { |
| 103 | - p << "\"" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first)) << "\": "; | ||
| 104 | - } else { | ||
| 105 | - bool has_8bit_chars; | ||
| 106 | - bool is_valid_utf8; | ||
| 107 | - bool is_utf16; | ||
| 108 | - QUtil::analyze_encoding(iter.first, has_8bit_chars, is_valid_utf8, is_utf16); | ||
| 109 | - if (!has_8bit_chars || is_valid_utf8) { | ||
| 110 | - p << "\"" << JSON::Writer::encode_string(iter.first) << "\": "; | 99 | + p << "\"" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first)) |
| 100 | + << "\": "; | ||
| 101 | + } else if (auto res = QPDF_Name::analyzeJSONEncoding(iter.first); res.first) { | ||
| 102 | + if (res.second) { | ||
| 103 | + p << "\"" << iter.first << "\": "; | ||
| 111 | } else { | 104 | } else { |
| 112 | - p << "\"n:" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first)) | ||
| 113 | - << "\": "; | 105 | + p << "\"" << JSON::Writer::encode_string(iter.first) << "\": "; |
| 114 | } | 106 | } |
| 107 | + } else { | ||
| 108 | + p << "\"n:" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first)) | ||
| 109 | + << "\": "; | ||
| 115 | } | 110 | } |
| 116 | iter.second.writeJSON(json_version, p); | 111 | iter.second.writeJSON(json_version, p); |
| 117 | } | 112 | } |
libqpdf/QPDF_Name.cc
| @@ -3,6 +3,8 @@ | @@ -3,6 +3,8 @@ | ||
| 3 | #include <qpdf/JSON_writer.hh> | 3 | #include <qpdf/JSON_writer.hh> |
| 4 | #include <qpdf/QUtil.hh> | 4 | #include <qpdf/QUtil.hh> |
| 5 | 5 | ||
| 6 | +#include <string_view> | ||
| 7 | + | ||
| 6 | QPDF_Name::QPDF_Name(std::string const& name) : | 8 | QPDF_Name::QPDF_Name(std::string const& name) : |
| 7 | QPDFValue(::ot_name, "name"), | 9 | QPDFValue(::ot_name, "name"), |
| 8 | name(name) | 10 | name(name) |
| @@ -52,20 +54,65 @@ QPDF_Name::unparse() | @@ -52,20 +54,65 @@ QPDF_Name::unparse() | ||
| 52 | return normalizeName(this->name); | 54 | return normalizeName(this->name); |
| 53 | } | 55 | } |
| 54 | 56 | ||
| 57 | +std::pair<bool, bool> | ||
| 58 | +QPDF_Name::analyzeJSONEncoding(const std::string& name) | ||
| 59 | +{ | ||
| 60 | + std::basic_string_view<unsigned char> view{ | ||
| 61 | + reinterpret_cast<const unsigned char*>(name.data()), name.size()}; | ||
| 62 | + | ||
| 63 | + int tail = 0; // Number of continuation characters expected. | ||
| 64 | + bool tail2 = false; // Potential overlong 3 octet utf-8. | ||
| 65 | + bool tail3 = false; // potential overlong 4 octet | ||
| 66 | + bool needs_escaping = false; | ||
| 67 | + for (auto const& c: view) { | ||
| 68 | + if (tail) { | ||
| 69 | + if ((c & 0xc0) != 0x80) { | ||
| 70 | + return {false, false}; | ||
| 71 | + } | ||
| 72 | + if (tail2) { | ||
| 73 | + if ((c & 0xe0) == 0x80) { | ||
| 74 | + return {false, false}; | ||
| 75 | + } | ||
| 76 | + tail2 = false; | ||
| 77 | + } else if (tail3) { | ||
| 78 | + if ((c & 0xf0) == 0x80) { | ||
| 79 | + return {false, false}; | ||
| 80 | + } | ||
| 81 | + tail3 = false; | ||
| 82 | + } | ||
| 83 | + tail--; | ||
| 84 | + } else if (c < 0x80) { | ||
| 85 | + if (!needs_escaping) { | ||
| 86 | + needs_escaping = !((c > 34 && c != '\\') || c == ' ' || c == 33); | ||
| 87 | + } | ||
| 88 | + } else if ((c & 0xe0) == 0xc0) { | ||
| 89 | + if ((c & 0xfe) == 0xc0) { | ||
| 90 | + return {false, false}; | ||
| 91 | + } | ||
| 92 | + tail = 1; | ||
| 93 | + } else if ((c & 0xf0) == 0xe0) { | ||
| 94 | + tail2 = (c == 0xe0); | ||
| 95 | + tail = 2; | ||
| 96 | + } else if ((c & 0xf8) == 0xf0) { | ||
| 97 | + tail3 = (c == 0xf0); | ||
| 98 | + tail = 3; | ||
| 99 | + } else { | ||
| 100 | + return {false, false}; | ||
| 101 | + } | ||
| 102 | + } | ||
| 103 | + return {tail == 0, !needs_escaping}; | ||
| 104 | +} | ||
| 105 | + | ||
| 55 | JSON | 106 | JSON |
| 56 | QPDF_Name::getJSON(int json_version) | 107 | QPDF_Name::getJSON(int json_version) |
| 57 | { | 108 | { |
| 58 | if (json_version == 1) { | 109 | if (json_version == 1) { |
| 59 | return JSON::makeString(normalizeName(this->name)); | 110 | return JSON::makeString(normalizeName(this->name)); |
| 60 | } else { | 111 | } else { |
| 61 | - bool has_8bit_chars; | ||
| 62 | - bool is_valid_utf8; | ||
| 63 | - bool is_utf16; | ||
| 64 | - QUtil::analyze_encoding(this->name, has_8bit_chars, is_valid_utf8, is_utf16); | ||
| 65 | - if (!has_8bit_chars || is_valid_utf8) { | ||
| 66 | - return JSON::makeString(this->name); | 112 | + if (auto res = analyzeJSONEncoding(name); res.first) { |
| 113 | + return JSON::makeString(name); | ||
| 67 | } else { | 114 | } else { |
| 68 | - return JSON::makeString("n:" + normalizeName(this->name)); | 115 | + return JSON::makeString("n:" + normalizeName(name)); |
| 69 | } | 116 | } |
| 70 | } | 117 | } |
| 71 | } | 118 | } |
| @@ -76,12 +123,12 @@ QPDF_Name::writeJSON(int json_version, JSON::Writer& p) | @@ -76,12 +123,12 @@ QPDF_Name::writeJSON(int json_version, JSON::Writer& p) | ||
| 76 | if (json_version == 1) { | 123 | if (json_version == 1) { |
| 77 | p << "\"" << JSON::Writer::encode_string(normalizeName(name)) << "\""; | 124 | p << "\"" << JSON::Writer::encode_string(normalizeName(name)) << "\""; |
| 78 | } else { | 125 | } else { |
| 79 | - bool has_8bit_chars; | ||
| 80 | - bool is_valid_utf8; | ||
| 81 | - bool is_utf16; | ||
| 82 | - QUtil::analyze_encoding(this->name, has_8bit_chars, is_valid_utf8, is_utf16); | ||
| 83 | - if (!has_8bit_chars || is_valid_utf8) { | ||
| 84 | - p << "\"" << JSON::Writer::encode_string(name) << "\""; | 126 | + if (auto res = analyzeJSONEncoding(name); res.first) { |
| 127 | + if (res.second) { | ||
| 128 | + p << "\"" << name << "\""; | ||
| 129 | + } else { | ||
| 130 | + p << "\"" << JSON::Writer::encode_string(name) << "\""; | ||
| 131 | + } | ||
| 85 | } else { | 132 | } else { |
| 86 | p << "\"n:" << JSON::Writer::encode_string(normalizeName(name)) << "\""; | 133 | p << "\"n:" << JSON::Writer::encode_string(normalizeName(name)) << "\""; |
| 87 | } | 134 | } |
libqpdf/qpdf/QPDF_Name.hh
| @@ -15,6 +15,11 @@ class QPDF_Name: public QPDFValue | @@ -15,6 +15,11 @@ class QPDF_Name: public QPDFValue | ||
| 15 | 15 | ||
| 16 | // Put # into strings with characters unsuitable for name token | 16 | // Put # into strings with characters unsuitable for name token |
| 17 | static std::string normalizeName(std::string const& name); | 17 | static std::string normalizeName(std::string const& name); |
| 18 | + | ||
| 19 | + // Check whether name is valid utf-8 and whether it contains characters that require escaping. | ||
| 20 | + // Return {false, false} if the name is not valid utf-8, otherwise return {true, true} if no | ||
| 21 | + // characters require or {true, false} if escaping is required. | ||
| 22 | + static std::pair<bool, bool> analyzeJSONEncoding(std::string const& name); | ||
| 18 | std::string | 23 | std::string |
| 19 | getStringValue() const override | 24 | getStringValue() const override |
| 20 | { | 25 | { |