Commit e259635986a799f0b72b6040aba8c1ed870e552a
1 parent
8b25de24
JSON: add write methods and implement unparse() in terms of those
Showing
6 changed files
with
179 additions
and
69 deletions
ChangeLog
| 1 | 2022-05-04 Jay Berkenbilt <ejb@ql.org> | 1 | 2022-05-04 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | ||
| 3 | + * Enhance JSON by adding a write method that takes a Pipeline* and | ||
| 4 | + depth, and add several helper methods to make it easier to write | ||
| 5 | + large amounts of JSON incrementally without having to have the | ||
| 6 | + whole thing in memory. | ||
| 7 | + | ||
| 3 | * json v1 output: make "pages" and "objects" consistent. | 8 | * json v1 output: make "pages" and "objects" consistent. |
| 4 | Previously, "objects" always reflected the objects exactly as they | 9 | Previously, "objects" always reflected the objects exactly as they |
| 5 | appeared in the original file, while "pages" reflected objects | 10 | appeared in the original file, while "pages" reflected objects |
TODO
| @@ -51,11 +51,6 @@ library, when context is available, to have a pipeline rather than a | @@ -51,11 +51,6 @@ library, when context is available, to have a pipeline rather than a | ||
| 51 | FILE* or std::ostream. This makes it possible for people to capture | 51 | FILE* or std::ostream. This makes it possible for people to capture |
| 52 | output more flexibly. | 52 | output more flexibly. |
| 53 | 53 | ||
| 54 | -JSON: rather than unparse() -> string, there should be write method | ||
| 55 | -that takes a pipeline and a depth. Then rewrite all the unparse | ||
| 56 | -methods to use it. This makes incremental write possible as well as | ||
| 57 | -writing arbitrarily large amounts of output. | ||
| 58 | - | ||
| 59 | JSON::parse should work from an InputSource. BufferInputSource can | 54 | JSON::parse should work from an InputSource. BufferInputSource can |
| 60 | already start with a std::string. | 55 | already start with a std::string. |
| 61 | 56 | ||
| @@ -64,12 +59,6 @@ writes data to the pipeline. It's writer should create a Pl_Base64 -> | @@ -64,12 +59,6 @@ writes data to the pipeline. It's writer should create a Pl_Base64 -> | ||
| 64 | Pl_Concatenate in front of the pipeline passed to write and call the | 59 | Pl_Concatenate in front of the pipeline passed to write and call the |
| 65 | function with that. | 60 | function with that. |
| 66 | 61 | ||
| 67 | -Add methods needed to do incremental writes. Basically we need to | ||
| 68 | -expose functionality the array and dictionary unparse methods. Maybe | ||
| 69 | -we can have a DictionaryWriter and an ArrayWriter that deal with the | ||
| 70 | -first/depth logic and have writeElement or writeEntry(key, value) | ||
| 71 | -methods. | ||
| 72 | - | ||
| 73 | For json output, do not unparse to string. Use the writers instead. | 62 | For json output, do not unparse to string. Use the writers instead. |
| 74 | Write incrementally. This changes ordering only, but we should be able | 63 | Write incrementally. This changes ordering only, but we should be able |
| 75 | manually update the test output for those cases. Objects should be | 64 | manually update the test output for those cases. Objects should be |
cSpell.json
include/qpdf/JSON.hh
| @@ -45,12 +45,56 @@ | @@ -45,12 +45,56 @@ | ||
| 45 | #include <string> | 45 | #include <string> |
| 46 | #include <vector> | 46 | #include <vector> |
| 47 | 47 | ||
| 48 | +class Pipeline; | ||
| 49 | + | ||
| 48 | class JSON | 50 | class JSON |
| 49 | { | 51 | { |
| 50 | public: | 52 | public: |
| 51 | QPDF_DLL | 53 | QPDF_DLL |
| 52 | std::string unparse() const; | 54 | std::string unparse() const; |
| 53 | 55 | ||
| 56 | + // Write the JSON object through a pipline. The `depth` parameter | ||
| 57 | + // specifies how deeply nested this in another JSON structure, | ||
| 58 | + // which makes it possible to write clean-looking JSON | ||
| 59 | + // incrementally. | ||
| 60 | + QPDF_DLL | ||
| 61 | + void write(Pipeline*, size_t depth = 0) const; | ||
| 62 | + | ||
| 63 | + // Helper methods for writing JSON incrementally. Several methods | ||
| 64 | + // take a `bool& first` parameter. The open methods always set it | ||
| 65 | + // to true, and the methods to output items always set it to | ||
| 66 | + // false. This way, the item and close methods can always know | ||
| 67 | + // whether or not a first item is being written. The intended mode | ||
| 68 | + // of operation is to start with `bool first = true` (though it | ||
| 69 | + // doesn't matter how it's initialized) and just pass the same | ||
| 70 | + // `first` through to all the methods, letting the JSON object use | ||
| 71 | + // it to keep track of when it's writing a first object and when | ||
| 72 | + // it's not. | ||
| 73 | + | ||
| 74 | + // Open methods ignore the value of first and set it to false | ||
| 75 | + QPDF_DLL | ||
| 76 | + static void writeDictionaryOpen(Pipeline*, bool& first, size_t depth = 0); | ||
| 77 | + QPDF_DLL | ||
| 78 | + static void writeArrayOpen(Pipeline*, bool& first, size_t depth = 0); | ||
| 79 | + // Close methods don't modify first. A true value indicates that | ||
| 80 | + // we are closing an empty object. | ||
| 81 | + QPDF_DLL | ||
| 82 | + static void writeDictionaryClose(Pipeline*, bool first, size_t depth = 0); | ||
| 83 | + QPDF_DLL | ||
| 84 | + static void writeArrayClose(Pipeline*, bool first, size_t depth = 0); | ||
| 85 | + // The item methods use the value of first to determine if this is | ||
| 86 | + // the first item and always set it to false. | ||
| 87 | + QPDF_DLL | ||
| 88 | + static void writeDictionaryItem( | ||
| 89 | + Pipeline*, | ||
| 90 | + bool& first, | ||
| 91 | + std::string const& key, | ||
| 92 | + JSON const& value, | ||
| 93 | + size_t depth = 0); | ||
| 94 | + QPDF_DLL | ||
| 95 | + static void writeArrayItem( | ||
| 96 | + Pipeline*, bool& first, JSON const& element, size_t depth = 0); | ||
| 97 | + | ||
| 54 | // The JSON spec calls dictionaries "objects", but that creates | 98 | // The JSON spec calls dictionaries "objects", but that creates |
| 55 | // too much confusion when referring to instances of the JSON | 99 | // too much confusion when referring to instances of the JSON |
| 56 | // class. | 100 | // class. |
| @@ -224,29 +268,33 @@ class JSON | @@ -224,29 +268,33 @@ class JSON | ||
| 224 | 268 | ||
| 225 | private: | 269 | private: |
| 226 | static std::string encode_string(std::string const& utf8); | 270 | static std::string encode_string(std::string const& utf8); |
| 271 | + static void | ||
| 272 | + writeClose(Pipeline* p, bool first, size_t depth, char const* delimeter); | ||
| 273 | + static void writeIndent(Pipeline* p, size_t depth); | ||
| 274 | + static void writeNext(Pipeline* p, bool& first, size_t depth); | ||
| 227 | 275 | ||
| 228 | struct JSON_value | 276 | struct JSON_value |
| 229 | { | 277 | { |
| 230 | virtual ~JSON_value() = default; | 278 | virtual ~JSON_value() = default; |
| 231 | - virtual std::string unparse(size_t depth) const = 0; | 279 | + virtual void write(Pipeline*, size_t depth) const = 0; |
| 232 | }; | 280 | }; |
| 233 | struct JSON_dictionary: public JSON_value | 281 | struct JSON_dictionary: public JSON_value |
| 234 | { | 282 | { |
| 235 | virtual ~JSON_dictionary() = default; | 283 | virtual ~JSON_dictionary() = default; |
| 236 | - virtual std::string unparse(size_t depth) const; | 284 | + virtual void write(Pipeline*, size_t depth) const; |
| 237 | std::map<std::string, std::shared_ptr<JSON_value>> members; | 285 | std::map<std::string, std::shared_ptr<JSON_value>> members; |
| 238 | }; | 286 | }; |
| 239 | struct JSON_array: public JSON_value | 287 | struct JSON_array: public JSON_value |
| 240 | { | 288 | { |
| 241 | virtual ~JSON_array() = default; | 289 | virtual ~JSON_array() = default; |
| 242 | - virtual std::string unparse(size_t depth) const; | 290 | + virtual void write(Pipeline*, size_t depth) const; |
| 243 | std::vector<std::shared_ptr<JSON_value>> elements; | 291 | std::vector<std::shared_ptr<JSON_value>> elements; |
| 244 | }; | 292 | }; |
| 245 | struct JSON_string: public JSON_value | 293 | struct JSON_string: public JSON_value |
| 246 | { | 294 | { |
| 247 | JSON_string(std::string const& utf8); | 295 | JSON_string(std::string const& utf8); |
| 248 | virtual ~JSON_string() = default; | 296 | virtual ~JSON_string() = default; |
| 249 | - virtual std::string unparse(size_t depth) const; | 297 | + virtual void write(Pipeline*, size_t depth) const; |
| 250 | std::string utf8; | 298 | std::string utf8; |
| 251 | std::string encoded; | 299 | std::string encoded; |
| 252 | }; | 300 | }; |
| @@ -256,20 +304,20 @@ class JSON | @@ -256,20 +304,20 @@ class JSON | ||
| 256 | JSON_number(double val); | 304 | JSON_number(double val); |
| 257 | JSON_number(std::string const& val); | 305 | JSON_number(std::string const& val); |
| 258 | virtual ~JSON_number() = default; | 306 | virtual ~JSON_number() = default; |
| 259 | - virtual std::string unparse(size_t depth) const; | 307 | + virtual void write(Pipeline*, size_t depth) const; |
| 260 | std::string encoded; | 308 | std::string encoded; |
| 261 | }; | 309 | }; |
| 262 | struct JSON_bool: public JSON_value | 310 | struct JSON_bool: public JSON_value |
| 263 | { | 311 | { |
| 264 | JSON_bool(bool val); | 312 | JSON_bool(bool val); |
| 265 | virtual ~JSON_bool() = default; | 313 | virtual ~JSON_bool() = default; |
| 266 | - virtual std::string unparse(size_t depth) const; | 314 | + virtual void write(Pipeline*, size_t depth) const; |
| 267 | bool value; | 315 | bool value; |
| 268 | }; | 316 | }; |
| 269 | struct JSON_null: public JSON_value | 317 | struct JSON_null: public JSON_value |
| 270 | { | 318 | { |
| 271 | virtual ~JSON_null() = default; | 319 | virtual ~JSON_null() = default; |
| 272 | - virtual std::string unparse(size_t depth) const; | 320 | + virtual void write(Pipeline*, size_t depth) const; |
| 273 | }; | 321 | }; |
| 274 | 322 | ||
| 275 | JSON(std::shared_ptr<JSON_value>); | 323 | JSON(std::shared_ptr<JSON_value>); |
libqpdf/JSON.cc
| 1 | #include <qpdf/JSON.hh> | 1 | #include <qpdf/JSON.hh> |
| 2 | 2 | ||
| 3 | +#include <qpdf/Pipeline.hh> | ||
| 4 | +#include <qpdf/Pl_String.hh> | ||
| 3 | #include <qpdf/QIntC.hh> | 5 | #include <qpdf/QIntC.hh> |
| 4 | #include <qpdf/QTC.hh> | 6 | #include <qpdf/QTC.hh> |
| 5 | #include <qpdf/QUtil.hh> | 7 | #include <qpdf/QUtil.hh> |
| @@ -18,51 +20,103 @@ JSON::JSON(std::shared_ptr<JSON_value> value) : | @@ -18,51 +20,103 @@ JSON::JSON(std::shared_ptr<JSON_value> value) : | ||
| 18 | { | 20 | { |
| 19 | } | 21 | } |
| 20 | 22 | ||
| 21 | -std::string | ||
| 22 | -JSON::JSON_dictionary::unparse(size_t depth) const | 23 | +void |
| 24 | +JSON::writeClose(Pipeline* p, bool first, size_t depth, char const* delimiter) | ||
| 25 | +{ | ||
| 26 | + if (!first) { | ||
| 27 | + *p << "\n"; | ||
| 28 | + writeIndent(p, depth); | ||
| 29 | + } | ||
| 30 | + *p << delimiter; | ||
| 31 | +} | ||
| 32 | + | ||
| 33 | +void | ||
| 34 | +JSON::writeIndent(Pipeline* p, size_t depth) | ||
| 35 | +{ | ||
| 36 | + for (size_t i = 0; i < depth; ++i) { | ||
| 37 | + *p << " "; | ||
| 38 | + } | ||
| 39 | +} | ||
| 40 | + | ||
| 41 | +void | ||
| 42 | +JSON::writeNext(Pipeline* p, bool& first, size_t depth) | ||
| 43 | +{ | ||
| 44 | + if (first) { | ||
| 45 | + first = false; | ||
| 46 | + } else { | ||
| 47 | + *p << ","; | ||
| 48 | + } | ||
| 49 | + *p << "\n"; | ||
| 50 | + writeIndent(p, 1 + depth); | ||
| 51 | +} | ||
| 52 | + | ||
| 53 | +void | ||
| 54 | +JSON::writeDictionaryOpen(Pipeline* p, bool& first, size_t depth) | ||
| 55 | +{ | ||
| 56 | + *p << "{"; | ||
| 57 | + first = true; | ||
| 58 | +} | ||
| 59 | + | ||
| 60 | +void | ||
| 61 | +JSON::writeArrayOpen(Pipeline* p, bool& first, size_t depth) | ||
| 62 | +{ | ||
| 63 | + *p << "["; | ||
| 64 | + first = true; | ||
| 65 | +} | ||
| 66 | + | ||
| 67 | +void | ||
| 68 | +JSON::writeDictionaryClose(Pipeline* p, bool first, size_t depth) | ||
| 69 | +{ | ||
| 70 | + writeClose(p, first, depth, "}"); | ||
| 71 | +} | ||
| 72 | + | ||
| 73 | +void | ||
| 74 | +JSON::writeArrayClose(Pipeline* p, bool first, size_t depth) | ||
| 75 | +{ | ||
| 76 | + writeClose(p, first, depth, "]"); | ||
| 77 | +} | ||
| 78 | + | ||
| 79 | +void | ||
| 80 | +JSON::writeDictionaryItem( | ||
| 81 | + Pipeline* p, | ||
| 82 | + bool& first, | ||
| 83 | + std::string const& key, | ||
| 84 | + JSON const& value, | ||
| 85 | + size_t depth) | ||
| 86 | +{ | ||
| 87 | + writeNext(p, first, depth); | ||
| 88 | + *p << "\"" << key << "\": "; | ||
| 89 | + value.write(p, 1 + depth); | ||
| 90 | +} | ||
| 91 | + | ||
| 92 | +void | ||
| 93 | +JSON::writeArrayItem( | ||
| 94 | + Pipeline* p, bool& first, JSON const& element, size_t depth) | ||
| 95 | +{ | ||
| 96 | + writeNext(p, first, depth); | ||
| 97 | + element.write(p, 1 + depth); | ||
| 98 | +} | ||
| 99 | + | ||
| 100 | +void | ||
| 101 | +JSON::JSON_dictionary::write(Pipeline* p, size_t depth) const | ||
| 23 | { | 102 | { |
| 24 | - std::string result = "{"; | ||
| 25 | bool first = true; | 103 | bool first = true; |
| 104 | + writeDictionaryOpen(p, first, depth); | ||
| 26 | for (auto const& iter: members) { | 105 | for (auto const& iter: members) { |
| 27 | - if (first) { | ||
| 28 | - first = false; | ||
| 29 | - } else { | ||
| 30 | - result.append(1, ','); | ||
| 31 | - } | ||
| 32 | - result.append(1, '\n'); | ||
| 33 | - result.append(2 * (1 + depth), ' '); | ||
| 34 | - result += | ||
| 35 | - ("\"" + iter.first + "\": " + iter.second->unparse(1 + depth)); | 106 | + writeDictionaryItem(p, first, iter.first, iter.second, depth); |
| 36 | } | 107 | } |
| 37 | - if (!first) { | ||
| 38 | - result.append(1, '\n'); | ||
| 39 | - result.append(2 * depth, ' '); | ||
| 40 | - } | ||
| 41 | - result.append(1, '}'); | ||
| 42 | - return result; | 108 | + writeDictionaryClose(p, first, depth); |
| 43 | } | 109 | } |
| 44 | 110 | ||
| 45 | -std::string | ||
| 46 | -JSON::JSON_array::unparse(size_t depth) const | 111 | +void |
| 112 | +JSON::JSON_array::write(Pipeline* p, size_t depth) const | ||
| 47 | { | 113 | { |
| 48 | - std::string result = "["; | ||
| 49 | bool first = true; | 114 | bool first = true; |
| 115 | + writeArrayOpen(p, first, depth); | ||
| 50 | for (auto const& element: elements) { | 116 | for (auto const& element: elements) { |
| 51 | - if (first) { | ||
| 52 | - first = false; | ||
| 53 | - } else { | ||
| 54 | - result.append(1, ','); | ||
| 55 | - } | ||
| 56 | - result.append(1, '\n'); | ||
| 57 | - result.append(2 * (1 + depth), ' '); | ||
| 58 | - result += element->unparse(1 + depth); | ||
| 59 | - } | ||
| 60 | - if (!first) { | ||
| 61 | - result.append(1, '\n'); | ||
| 62 | - result.append(2 * depth, ' '); | 117 | + writeArrayItem(p, first, element, depth); |
| 63 | } | 118 | } |
| 64 | - result.append(1, ']'); | ||
| 65 | - return result; | 119 | + writeArrayClose(p, first, depth); |
| 66 | } | 120 | } |
| 67 | 121 | ||
| 68 | JSON::JSON_string::JSON_string(std::string const& utf8) : | 122 | JSON::JSON_string::JSON_string(std::string const& utf8) : |
| @@ -71,10 +125,10 @@ JSON::JSON_string::JSON_string(std::string const& utf8) : | @@ -71,10 +125,10 @@ JSON::JSON_string::JSON_string(std::string const& utf8) : | ||
| 71 | { | 125 | { |
| 72 | } | 126 | } |
| 73 | 127 | ||
| 74 | -std::string | ||
| 75 | -JSON::JSON_string::unparse(size_t) const | 128 | +void |
| 129 | +JSON::JSON_string::write(Pipeline* p, size_t) const | ||
| 76 | { | 130 | { |
| 77 | - return "\"" + encoded + "\""; | 131 | + *p << "\"" << encoded << "\""; |
| 78 | } | 132 | } |
| 79 | 133 | ||
| 80 | JSON::JSON_number::JSON_number(long long value) : | 134 | JSON::JSON_number::JSON_number(long long value) : |
| @@ -92,10 +146,10 @@ JSON::JSON_number::JSON_number(std::string const& value) : | @@ -92,10 +146,10 @@ JSON::JSON_number::JSON_number(std::string const& value) : | ||
| 92 | { | 146 | { |
| 93 | } | 147 | } |
| 94 | 148 | ||
| 95 | -std::string | ||
| 96 | -JSON::JSON_number::unparse(size_t) const | 149 | +void |
| 150 | +JSON::JSON_number::write(Pipeline* p, size_t) const | ||
| 97 | { | 151 | { |
| 98 | - return encoded; | 152 | + *p << encoded; |
| 99 | } | 153 | } |
| 100 | 154 | ||
| 101 | JSON::JSON_bool::JSON_bool(bool val) : | 155 | JSON::JSON_bool::JSON_bool(bool val) : |
| @@ -103,29 +157,38 @@ JSON::JSON_bool::JSON_bool(bool val) : | @@ -103,29 +157,38 @@ JSON::JSON_bool::JSON_bool(bool val) : | ||
| 103 | { | 157 | { |
| 104 | } | 158 | } |
| 105 | 159 | ||
| 106 | -std::string | ||
| 107 | -JSON::JSON_bool::unparse(size_t) const | 160 | +void |
| 161 | +JSON::JSON_bool::write(Pipeline* p, size_t) const | ||
| 108 | { | 162 | { |
| 109 | - return value ? "true" : "false"; | 163 | + *p << (value ? "true" : "false"); |
| 110 | } | 164 | } |
| 111 | 165 | ||
| 112 | -std::string | ||
| 113 | -JSON::JSON_null::unparse(size_t) const | 166 | +void |
| 167 | +JSON::JSON_null::write(Pipeline* p, size_t) const | ||
| 114 | { | 168 | { |
| 115 | - return "null"; | 169 | + *p << "null"; |
| 116 | } | 170 | } |
| 117 | 171 | ||
| 118 | -std::string | ||
| 119 | -JSON::unparse() const | 172 | +void |
| 173 | +JSON::write(Pipeline* p, size_t depth) const | ||
| 120 | { | 174 | { |
| 121 | if (0 == this->m->value.get()) { | 175 | if (0 == this->m->value.get()) { |
| 122 | - return "null"; | 176 | + *p << "null"; |
| 123 | } else { | 177 | } else { |
| 124 | - return this->m->value->unparse(0); | 178 | + this->m->value->write(p, depth); |
| 125 | } | 179 | } |
| 126 | } | 180 | } |
| 127 | 181 | ||
| 128 | std::string | 182 | std::string |
| 183 | +JSON::unparse() const | ||
| 184 | +{ | ||
| 185 | + std::string s; | ||
| 186 | + Pl_String p("unparse", s); | ||
| 187 | + write(&p, 0); | ||
| 188 | + return s; | ||
| 189 | +} | ||
| 190 | + | ||
| 191 | +std::string | ||
| 129 | JSON::encode_string(std::string const& str) | 192 | JSON::encode_string(std::string const& str) |
| 130 | { | 193 | { |
| 131 | std::string result; | 194 | std::string result; |
manual/release-notes.rst
| @@ -123,6 +123,10 @@ For a detailed list of changes, please see the file | @@ -123,6 +123,10 @@ For a detailed list of changes, please see the file | ||
| 123 | - Add new ``Pipeline`` type ``Pl_String`` to append to a | 123 | - Add new ``Pipeline`` type ``Pl_String`` to append to a |
| 124 | ``std::string``. | 124 | ``std::string``. |
| 125 | 125 | ||
| 126 | + - Enhance JSON class to better support incrementally reading and | ||
| 127 | + writing large amounts of data without having to keep everything | ||
| 128 | + in memory. | ||
| 129 | + | ||
| 126 | - Other changes | 130 | - Other changes |
| 127 | 131 | ||
| 128 | - In JSON v1 mode, the ``"objects"`` key now reflects the repaired | 132 | - In JSON v1 mode, the ``"objects"`` key now reflects the repaired |