Commit 431987475b392daf4094570565881e1ebfc9528a

Authored by m-holger
1 parent e2737ab6

Add new method QPDF_Name::analyzeJSONEncoding

Provide a custom method to check whether a name is valid utf8. Integrate
checking for characters that need to be escaped in JSON.
libqpdf/QPDF_Dictionary.cc
@@ -77,15 +77,11 @@ QPDF_Dictionary::getJSON(int json_version) @@ -77,15 +77,11 @@ QPDF_Dictionary::getJSON(int json_version)
77 if (json_version == 1) { 77 if (json_version == 1) {
78 j.addDictionaryMember( 78 j.addDictionaryMember(
79 QPDF_Name::normalizeName(iter.first), iter.second.getJSON(json_version)); 79 QPDF_Name::normalizeName(iter.first), iter.second.getJSON(json_version));
  80 + } else if (auto res = QPDF_Name::analyzeJSONEncoding(iter.first); res.first) {
  81 + j.addDictionaryMember(iter.first, iter.second.getJSON(json_version));
80 } else { 82 } else {
81 - bool has_8bit_chars;  
82 - bool is_valid_utf8;  
83 - bool is_utf16;  
84 - QUtil::analyze_encoding(iter.first, has_8bit_chars, is_valid_utf8, is_utf16);  
85 - std::string key = !has_8bit_chars || is_valid_utf8  
86 - ? iter.first  
87 - : "n:" + QPDF_Name::normalizeName(iter.first);  
88 - j.addDictionaryMember(key, iter.second.getJSON(json_version)); 83 + j.addDictionaryMember(
  84 + "n:" + QPDF_Name::normalizeName(iter.first), iter.second.getJSON(json_version));
89 } 85 }
90 } 86 }
91 } 87 }
@@ -100,18 +96,17 @@ QPDF_Dictionary::writeJSON(int json_version, JSON::Writer& p) @@ -100,18 +96,17 @@ QPDF_Dictionary::writeJSON(int json_version, JSON::Writer& p)
100 if (!iter.second.isNull()) { 96 if (!iter.second.isNull()) {
101 p.writeNext(); 97 p.writeNext();
102 if (json_version == 1) { 98 if (json_version == 1) {
103 - p << "\"" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first)) << "\": ";  
104 - } else {  
105 - bool has_8bit_chars;  
106 - bool is_valid_utf8;  
107 - bool is_utf16;  
108 - QUtil::analyze_encoding(iter.first, has_8bit_chars, is_valid_utf8, is_utf16);  
109 - if (!has_8bit_chars || is_valid_utf8) {  
110 - p << "\"" << JSON::Writer::encode_string(iter.first) << "\": "; 99 + p << "\"" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first))
  100 + << "\": ";
  101 + } else if (auto res = QPDF_Name::analyzeJSONEncoding(iter.first); res.first) {
  102 + if (res.second) {
  103 + p << "\"" << iter.first << "\": ";
111 } else { 104 } else {
112 - p << "\"n:" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first))  
113 - << "\": "; 105 + p << "\"" << JSON::Writer::encode_string(iter.first) << "\": ";
114 } 106 }
  107 + } else {
  108 + p << "\"n:" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first))
  109 + << "\": ";
115 } 110 }
116 iter.second.writeJSON(json_version, p); 111 iter.second.writeJSON(json_version, p);
117 } 112 }
libqpdf/QPDF_Name.cc
@@ -3,6 +3,8 @@ @@ -3,6 +3,8 @@
3 #include <qpdf/JSON_writer.hh> 3 #include <qpdf/JSON_writer.hh>
4 #include <qpdf/QUtil.hh> 4 #include <qpdf/QUtil.hh>
5 5
  6 +#include <string_view>
  7 +
6 QPDF_Name::QPDF_Name(std::string const& name) : 8 QPDF_Name::QPDF_Name(std::string const& name) :
7 QPDFValue(::ot_name, "name"), 9 QPDFValue(::ot_name, "name"),
8 name(name) 10 name(name)
@@ -52,20 +54,65 @@ QPDF_Name::unparse() @@ -52,20 +54,65 @@ QPDF_Name::unparse()
52 return normalizeName(this->name); 54 return normalizeName(this->name);
53 } 55 }
54 56
  57 +std::pair<bool, bool>
  58 +QPDF_Name::analyzeJSONEncoding(const std::string& name)
  59 +{
  60 + std::basic_string_view<unsigned char> view{
  61 + reinterpret_cast<const unsigned char*>(name.data()), name.size()};
  62 +
  63 + int tail = 0; // Number of continuation characters expected.
  64 + bool tail2 = false; // Potential overlong 3 octet utf-8.
  65 + bool tail3 = false; // potential overlong 4 octet
  66 + bool needs_escaping = false;
  67 + for (auto const& c: view) {
  68 + if (tail) {
  69 + if ((c & 0xc0) != 0x80) {
  70 + return {false, false};
  71 + }
  72 + if (tail2) {
  73 + if ((c & 0xe0) == 0x80) {
  74 + return {false, false};
  75 + }
  76 + tail2 = false;
  77 + } else if (tail3) {
  78 + if ((c & 0xf0) == 0x80) {
  79 + return {false, false};
  80 + }
  81 + tail3 = false;
  82 + }
  83 + tail--;
  84 + } else if (c < 0x80) {
  85 + if (!needs_escaping) {
  86 + needs_escaping = !((c > 34 && c != '\\') || c == ' ' || c == 33);
  87 + }
  88 + } else if ((c & 0xe0) == 0xc0) {
  89 + if ((c & 0xfe) == 0xc0) {
  90 + return {false, false};
  91 + }
  92 + tail = 1;
  93 + } else if ((c & 0xf0) == 0xe0) {
  94 + tail2 = (c == 0xe0);
  95 + tail = 2;
  96 + } else if ((c & 0xf8) == 0xf0) {
  97 + tail3 = (c == 0xf0);
  98 + tail = 3;
  99 + } else {
  100 + return {false, false};
  101 + }
  102 + }
  103 + return {tail == 0, !needs_escaping};
  104 +}
  105 +
55 JSON 106 JSON
56 QPDF_Name::getJSON(int json_version) 107 QPDF_Name::getJSON(int json_version)
57 { 108 {
58 if (json_version == 1) { 109 if (json_version == 1) {
59 return JSON::makeString(normalizeName(this->name)); 110 return JSON::makeString(normalizeName(this->name));
60 } else { 111 } else {
61 - bool has_8bit_chars;  
62 - bool is_valid_utf8;  
63 - bool is_utf16;  
64 - QUtil::analyze_encoding(this->name, has_8bit_chars, is_valid_utf8, is_utf16);  
65 - if (!has_8bit_chars || is_valid_utf8) {  
66 - return JSON::makeString(this->name); 112 + if (auto res = analyzeJSONEncoding(name); res.first) {
  113 + return JSON::makeString(name);
67 } else { 114 } else {
68 - return JSON::makeString("n:" + normalizeName(this->name)); 115 + return JSON::makeString("n:" + normalizeName(name));
69 } 116 }
70 } 117 }
71 } 118 }
@@ -76,12 +123,12 @@ QPDF_Name::writeJSON(int json_version, JSON::Writer&amp; p) @@ -76,12 +123,12 @@ QPDF_Name::writeJSON(int json_version, JSON::Writer&amp; p)
76 if (json_version == 1) { 123 if (json_version == 1) {
77 p << "\"" << JSON::Writer::encode_string(normalizeName(name)) << "\""; 124 p << "\"" << JSON::Writer::encode_string(normalizeName(name)) << "\"";
78 } else { 125 } else {
79 - bool has_8bit_chars;  
80 - bool is_valid_utf8;  
81 - bool is_utf16;  
82 - QUtil::analyze_encoding(this->name, has_8bit_chars, is_valid_utf8, is_utf16);  
83 - if (!has_8bit_chars || is_valid_utf8) {  
84 - p << "\"" << JSON::Writer::encode_string(name) << "\""; 126 + if (auto res = analyzeJSONEncoding(name); res.first) {
  127 + if (res.second) {
  128 + p << "\"" << name << "\"";
  129 + } else {
  130 + p << "\"" << JSON::Writer::encode_string(name) << "\"";
  131 + }
85 } else { 132 } else {
86 p << "\"n:" << JSON::Writer::encode_string(normalizeName(name)) << "\""; 133 p << "\"n:" << JSON::Writer::encode_string(normalizeName(name)) << "\"";
87 } 134 }
libqpdf/qpdf/QPDF_Name.hh
@@ -15,6 +15,11 @@ class QPDF_Name: public QPDFValue @@ -15,6 +15,11 @@ class QPDF_Name: public QPDFValue
15 15
16 // Put # into strings with characters unsuitable for name token 16 // Put # into strings with characters unsuitable for name token
17 static std::string normalizeName(std::string const& name); 17 static std::string normalizeName(std::string const& name);
  18 +
  19 + // Check whether name is valid utf-8 and whether it contains characters that require escaping.
  20 + // Return {false, false} if the name is not valid utf-8, otherwise return {true, true} if no
  21 + // characters require or {true, false} if escaping is required.
  22 + static std::pair<bool, bool> analyzeJSONEncoding(std::string const& name);
18 std::string 23 std::string
19 getStringValue() const override 24 getStringValue() const override
20 { 25 {