Commit 431987475b392daf4094570565881e1ebfc9528a

Authored by m-holger
1 parent e2737ab6

Add new method QPDF_Name::analyzeJSONEncoding

Provide a custom method to check whether a name is valid utf8. Integrate
checking for characters that need to be escaped in JSON.
libqpdf/QPDF_Dictionary.cc
... ... @@ -77,15 +77,11 @@ QPDF_Dictionary::getJSON(int json_version)
77 77 if (json_version == 1) {
78 78 j.addDictionaryMember(
79 79 QPDF_Name::normalizeName(iter.first), iter.second.getJSON(json_version));
  80 + } else if (auto res = QPDF_Name::analyzeJSONEncoding(iter.first); res.first) {
  81 + j.addDictionaryMember(iter.first, iter.second.getJSON(json_version));
80 82 } else {
81   - bool has_8bit_chars;
82   - bool is_valid_utf8;
83   - bool is_utf16;
84   - QUtil::analyze_encoding(iter.first, has_8bit_chars, is_valid_utf8, is_utf16);
85   - std::string key = !has_8bit_chars || is_valid_utf8
86   - ? iter.first
87   - : "n:" + QPDF_Name::normalizeName(iter.first);
88   - j.addDictionaryMember(key, iter.second.getJSON(json_version));
  83 + j.addDictionaryMember(
  84 + "n:" + QPDF_Name::normalizeName(iter.first), iter.second.getJSON(json_version));
89 85 }
90 86 }
91 87 }
... ... @@ -100,18 +96,17 @@ QPDF_Dictionary::writeJSON(int json_version, JSON::Writer& p)
100 96 if (!iter.second.isNull()) {
101 97 p.writeNext();
102 98 if (json_version == 1) {
103   - p << "\"" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first)) << "\": ";
104   - } else {
105   - bool has_8bit_chars;
106   - bool is_valid_utf8;
107   - bool is_utf16;
108   - QUtil::analyze_encoding(iter.first, has_8bit_chars, is_valid_utf8, is_utf16);
109   - if (!has_8bit_chars || is_valid_utf8) {
110   - p << "\"" << JSON::Writer::encode_string(iter.first) << "\": ";
  99 + p << "\"" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first))
  100 + << "\": ";
  101 + } else if (auto res = QPDF_Name::analyzeJSONEncoding(iter.first); res.first) {
  102 + if (res.second) {
  103 + p << "\"" << iter.first << "\": ";
111 104 } else {
112   - p << "\"n:" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first))
113   - << "\": ";
  105 + p << "\"" << JSON::Writer::encode_string(iter.first) << "\": ";
114 106 }
  107 + } else {
  108 + p << "\"n:" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first))
  109 + << "\": ";
115 110 }
116 111 iter.second.writeJSON(json_version, p);
117 112 }
... ...
libqpdf/QPDF_Name.cc
... ... @@ -3,6 +3,8 @@
3 3 #include <qpdf/JSON_writer.hh>
4 4 #include <qpdf/QUtil.hh>
5 5  
  6 +#include <string_view>
  7 +
6 8 QPDF_Name::QPDF_Name(std::string const& name) :
7 9 QPDFValue(::ot_name, "name"),
8 10 name(name)
... ... @@ -52,20 +54,65 @@ QPDF_Name::unparse()
52 54 return normalizeName(this->name);
53 55 }
54 56  
  57 +std::pair<bool, bool>
  58 +QPDF_Name::analyzeJSONEncoding(const std::string& name)
  59 +{
  60 + std::basic_string_view<unsigned char> view{
  61 + reinterpret_cast<const unsigned char*>(name.data()), name.size()};
  62 +
  63 + int tail = 0; // Number of continuation characters expected.
  64 + bool tail2 = false; // Potential overlong 3 octet utf-8.
  65 + bool tail3 = false; // potential overlong 4 octet
  66 + bool needs_escaping = false;
  67 + for (auto const& c: view) {
  68 + if (tail) {
  69 + if ((c & 0xc0) != 0x80) {
  70 + return {false, false};
  71 + }
  72 + if (tail2) {
  73 + if ((c & 0xe0) == 0x80) {
  74 + return {false, false};
  75 + }
  76 + tail2 = false;
  77 + } else if (tail3) {
  78 + if ((c & 0xf0) == 0x80) {
  79 + return {false, false};
  80 + }
  81 + tail3 = false;
  82 + }
  83 + tail--;
  84 + } else if (c < 0x80) {
  85 + if (!needs_escaping) {
  86 + needs_escaping = !((c > 34 && c != '\\') || c == ' ' || c == 33);
  87 + }
  88 + } else if ((c & 0xe0) == 0xc0) {
  89 + if ((c & 0xfe) == 0xc0) {
  90 + return {false, false};
  91 + }
  92 + tail = 1;
  93 + } else if ((c & 0xf0) == 0xe0) {
  94 + tail2 = (c == 0xe0);
  95 + tail = 2;
  96 + } else if ((c & 0xf8) == 0xf0) {
  97 + tail3 = (c == 0xf0);
  98 + tail = 3;
  99 + } else {
  100 + return {false, false};
  101 + }
  102 + }
  103 + return {tail == 0, !needs_escaping};
  104 +}
  105 +
55 106 JSON
56 107 QPDF_Name::getJSON(int json_version)
57 108 {
58 109 if (json_version == 1) {
59 110 return JSON::makeString(normalizeName(this->name));
60 111 } else {
61   - bool has_8bit_chars;
62   - bool is_valid_utf8;
63   - bool is_utf16;
64   - QUtil::analyze_encoding(this->name, has_8bit_chars, is_valid_utf8, is_utf16);
65   - if (!has_8bit_chars || is_valid_utf8) {
66   - return JSON::makeString(this->name);
  112 + if (auto res = analyzeJSONEncoding(name); res.first) {
  113 + return JSON::makeString(name);
67 114 } else {
68   - return JSON::makeString("n:" + normalizeName(this->name));
  115 + return JSON::makeString("n:" + normalizeName(name));
69 116 }
70 117 }
71 118 }
... ... @@ -76,12 +123,12 @@ QPDF_Name::writeJSON(int json_version, JSON::Writer&amp; p)
76 123 if (json_version == 1) {
77 124 p << "\"" << JSON::Writer::encode_string(normalizeName(name)) << "\"";
78 125 } else {
79   - bool has_8bit_chars;
80   - bool is_valid_utf8;
81   - bool is_utf16;
82   - QUtil::analyze_encoding(this->name, has_8bit_chars, is_valid_utf8, is_utf16);
83   - if (!has_8bit_chars || is_valid_utf8) {
84   - p << "\"" << JSON::Writer::encode_string(name) << "\"";
  126 + if (auto res = analyzeJSONEncoding(name); res.first) {
  127 + if (res.second) {
  128 + p << "\"" << name << "\"";
  129 + } else {
  130 + p << "\"" << JSON::Writer::encode_string(name) << "\"";
  131 + }
85 132 } else {
86 133 p << "\"n:" << JSON::Writer::encode_string(normalizeName(name)) << "\"";
87 134 }
... ...
libqpdf/qpdf/QPDF_Name.hh
... ... @@ -15,6 +15,11 @@ class QPDF_Name: public QPDFValue
15 15  
16 16 // Put # into strings with characters unsuitable for name token
17 17 static std::string normalizeName(std::string const& name);
  18 +
  19 + // Check whether name is valid utf-8 and whether it contains characters that require escaping.
  20 + // Return {false, false} if the name is not valid utf-8, otherwise return {true, true} if no
  21 + // characters require or {true, false} if escaping is required.
  22 + static std::pair<bool, bool> analyzeJSONEncoding(std::string const& name);
18 23 std::string
19 24 getStringValue() const override
20 25 {
... ...