Commit e259635986a799f0b72b6040aba8c1ed870e552a
1 parent
8b25de24
JSON: add write methods and implement unparse() in terms of those
Showing
6 changed files
with
179 additions
and
69 deletions
ChangeLog
| 1 | 1 | 2022-05-04 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | |
| 3 | + * Enhance JSON by adding a write method that takes a Pipeline* and | |
| 4 | + depth, and add several helper methods to make it easier to write | |
| 5 | + large amounts of JSON incrementally without having to have the | |
| 6 | + whole thing in memory. | |
| 7 | + | |
| 3 | 8 | * json v1 output: make "pages" and "objects" consistent. |
| 4 | 9 | Previously, "objects" always reflected the objects exactly as they |
| 5 | 10 | appeared in the original file, while "pages" reflected objects | ... | ... |
TODO
| ... | ... | @@ -51,11 +51,6 @@ library, when context is available, to have a pipeline rather than a |
| 51 | 51 | FILE* or std::ostream. This makes it possible for people to capture |
| 52 | 52 | output more flexibly. |
| 53 | 53 | |
| 54 | -JSON: rather than unparse() -> string, there should be write method | |
| 55 | -that takes a pipeline and a depth. Then rewrite all the unparse | |
| 56 | -methods to use it. This makes incremental write possible as well as | |
| 57 | -writing arbitrarily large amounts of output. | |
| 58 | - | |
| 59 | 54 | JSON::parse should work from an InputSource. BufferInputSource can |
| 60 | 55 | already start with a std::string. |
| 61 | 56 | |
| ... | ... | @@ -64,12 +59,6 @@ writes data to the pipeline. It's writer should create a Pl_Base64 -> |
| 64 | 59 | Pl_Concatenate in front of the pipeline passed to write and call the |
| 65 | 60 | function with that. |
| 66 | 61 | |
| 67 | -Add methods needed to do incremental writes. Basically we need to | |
| 68 | -expose functionality the array and dictionary unparse methods. Maybe | |
| 69 | -we can have a DictionaryWriter and an ArrayWriter that deal with the | |
| 70 | -first/depth logic and have writeElement or writeEntry(key, value) | |
| 71 | -methods. | |
| 72 | - | |
| 73 | 62 | For json output, do not unparse to string. Use the writers instead. |
| 74 | 63 | Write incrementally. This changes ordering only, but we should be able |
| 75 | 64 | manually update the test output for those cases. Objects should be | ... | ... |
cSpell.json
include/qpdf/JSON.hh
| ... | ... | @@ -45,12 +45,56 @@ |
| 45 | 45 | #include <string> |
| 46 | 46 | #include <vector> |
| 47 | 47 | |
| 48 | +class Pipeline; | |
| 49 | + | |
| 48 | 50 | class JSON |
| 49 | 51 | { |
| 50 | 52 | public: |
| 51 | 53 | QPDF_DLL |
| 52 | 54 | std::string unparse() const; |
| 53 | 55 | |
| 56 | + // Write the JSON object through a pipline. The `depth` parameter | |
| 57 | + // specifies how deeply nested this in another JSON structure, | |
| 58 | + // which makes it possible to write clean-looking JSON | |
| 59 | + // incrementally. | |
| 60 | + QPDF_DLL | |
| 61 | + void write(Pipeline*, size_t depth = 0) const; | |
| 62 | + | |
| 63 | + // Helper methods for writing JSON incrementally. Several methods | |
| 64 | + // take a `bool& first` parameter. The open methods always set it | |
| 65 | + // to true, and the methods to output items always set it to | |
| 66 | + // false. This way, the item and close methods can always know | |
| 67 | + // whether or not a first item is being written. The intended mode | |
| 68 | + // of operation is to start with `bool first = true` (though it | |
| 69 | + // doesn't matter how it's initialized) and just pass the same | |
| 70 | + // `first` through to all the methods, letting the JSON object use | |
| 71 | + // it to keep track of when it's writing a first object and when | |
| 72 | + // it's not. | |
| 73 | + | |
| 74 | + // Open methods ignore the value of first and set it to false | |
| 75 | + QPDF_DLL | |
| 76 | + static void writeDictionaryOpen(Pipeline*, bool& first, size_t depth = 0); | |
| 77 | + QPDF_DLL | |
| 78 | + static void writeArrayOpen(Pipeline*, bool& first, size_t depth = 0); | |
| 79 | + // Close methods don't modify first. A true value indicates that | |
| 80 | + // we are closing an empty object. | |
| 81 | + QPDF_DLL | |
| 82 | + static void writeDictionaryClose(Pipeline*, bool first, size_t depth = 0); | |
| 83 | + QPDF_DLL | |
| 84 | + static void writeArrayClose(Pipeline*, bool first, size_t depth = 0); | |
| 85 | + // The item methods use the value of first to determine if this is | |
| 86 | + // the first item and always set it to false. | |
| 87 | + QPDF_DLL | |
| 88 | + static void writeDictionaryItem( | |
| 89 | + Pipeline*, | |
| 90 | + bool& first, | |
| 91 | + std::string const& key, | |
| 92 | + JSON const& value, | |
| 93 | + size_t depth = 0); | |
| 94 | + QPDF_DLL | |
| 95 | + static void writeArrayItem( | |
| 96 | + Pipeline*, bool& first, JSON const& element, size_t depth = 0); | |
| 97 | + | |
| 54 | 98 | // The JSON spec calls dictionaries "objects", but that creates |
| 55 | 99 | // too much confusion when referring to instances of the JSON |
| 56 | 100 | // class. |
| ... | ... | @@ -224,29 +268,33 @@ class JSON |
| 224 | 268 | |
| 225 | 269 | private: |
| 226 | 270 | static std::string encode_string(std::string const& utf8); |
| 271 | + static void | |
| 272 | + writeClose(Pipeline* p, bool first, size_t depth, char const* delimeter); | |
| 273 | + static void writeIndent(Pipeline* p, size_t depth); | |
| 274 | + static void writeNext(Pipeline* p, bool& first, size_t depth); | |
| 227 | 275 | |
| 228 | 276 | struct JSON_value |
| 229 | 277 | { |
| 230 | 278 | virtual ~JSON_value() = default; |
| 231 | - virtual std::string unparse(size_t depth) const = 0; | |
| 279 | + virtual void write(Pipeline*, size_t depth) const = 0; | |
| 232 | 280 | }; |
| 233 | 281 | struct JSON_dictionary: public JSON_value |
| 234 | 282 | { |
| 235 | 283 | virtual ~JSON_dictionary() = default; |
| 236 | - virtual std::string unparse(size_t depth) const; | |
| 284 | + virtual void write(Pipeline*, size_t depth) const; | |
| 237 | 285 | std::map<std::string, std::shared_ptr<JSON_value>> members; |
| 238 | 286 | }; |
| 239 | 287 | struct JSON_array: public JSON_value |
| 240 | 288 | { |
| 241 | 289 | virtual ~JSON_array() = default; |
| 242 | - virtual std::string unparse(size_t depth) const; | |
| 290 | + virtual void write(Pipeline*, size_t depth) const; | |
| 243 | 291 | std::vector<std::shared_ptr<JSON_value>> elements; |
| 244 | 292 | }; |
| 245 | 293 | struct JSON_string: public JSON_value |
| 246 | 294 | { |
| 247 | 295 | JSON_string(std::string const& utf8); |
| 248 | 296 | virtual ~JSON_string() = default; |
| 249 | - virtual std::string unparse(size_t depth) const; | |
| 297 | + virtual void write(Pipeline*, size_t depth) const; | |
| 250 | 298 | std::string utf8; |
| 251 | 299 | std::string encoded; |
| 252 | 300 | }; |
| ... | ... | @@ -256,20 +304,20 @@ class JSON |
| 256 | 304 | JSON_number(double val); |
| 257 | 305 | JSON_number(std::string const& val); |
| 258 | 306 | virtual ~JSON_number() = default; |
| 259 | - virtual std::string unparse(size_t depth) const; | |
| 307 | + virtual void write(Pipeline*, size_t depth) const; | |
| 260 | 308 | std::string encoded; |
| 261 | 309 | }; |
| 262 | 310 | struct JSON_bool: public JSON_value |
| 263 | 311 | { |
| 264 | 312 | JSON_bool(bool val); |
| 265 | 313 | virtual ~JSON_bool() = default; |
| 266 | - virtual std::string unparse(size_t depth) const; | |
| 314 | + virtual void write(Pipeline*, size_t depth) const; | |
| 267 | 315 | bool value; |
| 268 | 316 | }; |
| 269 | 317 | struct JSON_null: public JSON_value |
| 270 | 318 | { |
| 271 | 319 | virtual ~JSON_null() = default; |
| 272 | - virtual std::string unparse(size_t depth) const; | |
| 320 | + virtual void write(Pipeline*, size_t depth) const; | |
| 273 | 321 | }; |
| 274 | 322 | |
| 275 | 323 | JSON(std::shared_ptr<JSON_value>); | ... | ... |
libqpdf/JSON.cc
| 1 | 1 | #include <qpdf/JSON.hh> |
| 2 | 2 | |
| 3 | +#include <qpdf/Pipeline.hh> | |
| 4 | +#include <qpdf/Pl_String.hh> | |
| 3 | 5 | #include <qpdf/QIntC.hh> |
| 4 | 6 | #include <qpdf/QTC.hh> |
| 5 | 7 | #include <qpdf/QUtil.hh> |
| ... | ... | @@ -18,51 +20,103 @@ JSON::JSON(std::shared_ptr<JSON_value> value) : |
| 18 | 20 | { |
| 19 | 21 | } |
| 20 | 22 | |
| 21 | -std::string | |
| 22 | -JSON::JSON_dictionary::unparse(size_t depth) const | |
| 23 | +void | |
| 24 | +JSON::writeClose(Pipeline* p, bool first, size_t depth, char const* delimiter) | |
| 25 | +{ | |
| 26 | + if (!first) { | |
| 27 | + *p << "\n"; | |
| 28 | + writeIndent(p, depth); | |
| 29 | + } | |
| 30 | + *p << delimiter; | |
| 31 | +} | |
| 32 | + | |
| 33 | +void | |
| 34 | +JSON::writeIndent(Pipeline* p, size_t depth) | |
| 35 | +{ | |
| 36 | + for (size_t i = 0; i < depth; ++i) { | |
| 37 | + *p << " "; | |
| 38 | + } | |
| 39 | +} | |
| 40 | + | |
| 41 | +void | |
| 42 | +JSON::writeNext(Pipeline* p, bool& first, size_t depth) | |
| 43 | +{ | |
| 44 | + if (first) { | |
| 45 | + first = false; | |
| 46 | + } else { | |
| 47 | + *p << ","; | |
| 48 | + } | |
| 49 | + *p << "\n"; | |
| 50 | + writeIndent(p, 1 + depth); | |
| 51 | +} | |
| 52 | + | |
| 53 | +void | |
| 54 | +JSON::writeDictionaryOpen(Pipeline* p, bool& first, size_t depth) | |
| 55 | +{ | |
| 56 | + *p << "{"; | |
| 57 | + first = true; | |
| 58 | +} | |
| 59 | + | |
| 60 | +void | |
| 61 | +JSON::writeArrayOpen(Pipeline* p, bool& first, size_t depth) | |
| 62 | +{ | |
| 63 | + *p << "["; | |
| 64 | + first = true; | |
| 65 | +} | |
| 66 | + | |
| 67 | +void | |
| 68 | +JSON::writeDictionaryClose(Pipeline* p, bool first, size_t depth) | |
| 69 | +{ | |
| 70 | + writeClose(p, first, depth, "}"); | |
| 71 | +} | |
| 72 | + | |
| 73 | +void | |
| 74 | +JSON::writeArrayClose(Pipeline* p, bool first, size_t depth) | |
| 75 | +{ | |
| 76 | + writeClose(p, first, depth, "]"); | |
| 77 | +} | |
| 78 | + | |
| 79 | +void | |
| 80 | +JSON::writeDictionaryItem( | |
| 81 | + Pipeline* p, | |
| 82 | + bool& first, | |
| 83 | + std::string const& key, | |
| 84 | + JSON const& value, | |
| 85 | + size_t depth) | |
| 86 | +{ | |
| 87 | + writeNext(p, first, depth); | |
| 88 | + *p << "\"" << key << "\": "; | |
| 89 | + value.write(p, 1 + depth); | |
| 90 | +} | |
| 91 | + | |
| 92 | +void | |
| 93 | +JSON::writeArrayItem( | |
| 94 | + Pipeline* p, bool& first, JSON const& element, size_t depth) | |
| 95 | +{ | |
| 96 | + writeNext(p, first, depth); | |
| 97 | + element.write(p, 1 + depth); | |
| 98 | +} | |
| 99 | + | |
| 100 | +void | |
| 101 | +JSON::JSON_dictionary::write(Pipeline* p, size_t depth) const | |
| 23 | 102 | { |
| 24 | - std::string result = "{"; | |
| 25 | 103 | bool first = true; |
| 104 | + writeDictionaryOpen(p, first, depth); | |
| 26 | 105 | for (auto const& iter: members) { |
| 27 | - if (first) { | |
| 28 | - first = false; | |
| 29 | - } else { | |
| 30 | - result.append(1, ','); | |
| 31 | - } | |
| 32 | - result.append(1, '\n'); | |
| 33 | - result.append(2 * (1 + depth), ' '); | |
| 34 | - result += | |
| 35 | - ("\"" + iter.first + "\": " + iter.second->unparse(1 + depth)); | |
| 106 | + writeDictionaryItem(p, first, iter.first, iter.second, depth); | |
| 36 | 107 | } |
| 37 | - if (!first) { | |
| 38 | - result.append(1, '\n'); | |
| 39 | - result.append(2 * depth, ' '); | |
| 40 | - } | |
| 41 | - result.append(1, '}'); | |
| 42 | - return result; | |
| 108 | + writeDictionaryClose(p, first, depth); | |
| 43 | 109 | } |
| 44 | 110 | |
| 45 | -std::string | |
| 46 | -JSON::JSON_array::unparse(size_t depth) const | |
| 111 | +void | |
| 112 | +JSON::JSON_array::write(Pipeline* p, size_t depth) const | |
| 47 | 113 | { |
| 48 | - std::string result = "["; | |
| 49 | 114 | bool first = true; |
| 115 | + writeArrayOpen(p, first, depth); | |
| 50 | 116 | for (auto const& element: elements) { |
| 51 | - if (first) { | |
| 52 | - first = false; | |
| 53 | - } else { | |
| 54 | - result.append(1, ','); | |
| 55 | - } | |
| 56 | - result.append(1, '\n'); | |
| 57 | - result.append(2 * (1 + depth), ' '); | |
| 58 | - result += element->unparse(1 + depth); | |
| 59 | - } | |
| 60 | - if (!first) { | |
| 61 | - result.append(1, '\n'); | |
| 62 | - result.append(2 * depth, ' '); | |
| 117 | + writeArrayItem(p, first, element, depth); | |
| 63 | 118 | } |
| 64 | - result.append(1, ']'); | |
| 65 | - return result; | |
| 119 | + writeArrayClose(p, first, depth); | |
| 66 | 120 | } |
| 67 | 121 | |
| 68 | 122 | JSON::JSON_string::JSON_string(std::string const& utf8) : |
| ... | ... | @@ -71,10 +125,10 @@ JSON::JSON_string::JSON_string(std::string const& utf8) : |
| 71 | 125 | { |
| 72 | 126 | } |
| 73 | 127 | |
| 74 | -std::string | |
| 75 | -JSON::JSON_string::unparse(size_t) const | |
| 128 | +void | |
| 129 | +JSON::JSON_string::write(Pipeline* p, size_t) const | |
| 76 | 130 | { |
| 77 | - return "\"" + encoded + "\""; | |
| 131 | + *p << "\"" << encoded << "\""; | |
| 78 | 132 | } |
| 79 | 133 | |
| 80 | 134 | JSON::JSON_number::JSON_number(long long value) : |
| ... | ... | @@ -92,10 +146,10 @@ JSON::JSON_number::JSON_number(std::string const& value) : |
| 92 | 146 | { |
| 93 | 147 | } |
| 94 | 148 | |
| 95 | -std::string | |
| 96 | -JSON::JSON_number::unparse(size_t) const | |
| 149 | +void | |
| 150 | +JSON::JSON_number::write(Pipeline* p, size_t) const | |
| 97 | 151 | { |
| 98 | - return encoded; | |
| 152 | + *p << encoded; | |
| 99 | 153 | } |
| 100 | 154 | |
| 101 | 155 | JSON::JSON_bool::JSON_bool(bool val) : |
| ... | ... | @@ -103,29 +157,38 @@ JSON::JSON_bool::JSON_bool(bool val) : |
| 103 | 157 | { |
| 104 | 158 | } |
| 105 | 159 | |
| 106 | -std::string | |
| 107 | -JSON::JSON_bool::unparse(size_t) const | |
| 160 | +void | |
| 161 | +JSON::JSON_bool::write(Pipeline* p, size_t) const | |
| 108 | 162 | { |
| 109 | - return value ? "true" : "false"; | |
| 163 | + *p << (value ? "true" : "false"); | |
| 110 | 164 | } |
| 111 | 165 | |
| 112 | -std::string | |
| 113 | -JSON::JSON_null::unparse(size_t) const | |
| 166 | +void | |
| 167 | +JSON::JSON_null::write(Pipeline* p, size_t) const | |
| 114 | 168 | { |
| 115 | - return "null"; | |
| 169 | + *p << "null"; | |
| 116 | 170 | } |
| 117 | 171 | |
| 118 | -std::string | |
| 119 | -JSON::unparse() const | |
| 172 | +void | |
| 173 | +JSON::write(Pipeline* p, size_t depth) const | |
| 120 | 174 | { |
| 121 | 175 | if (0 == this->m->value.get()) { |
| 122 | - return "null"; | |
| 176 | + *p << "null"; | |
| 123 | 177 | } else { |
| 124 | - return this->m->value->unparse(0); | |
| 178 | + this->m->value->write(p, depth); | |
| 125 | 179 | } |
| 126 | 180 | } |
| 127 | 181 | |
| 128 | 182 | std::string |
| 183 | +JSON::unparse() const | |
| 184 | +{ | |
| 185 | + std::string s; | |
| 186 | + Pl_String p("unparse", s); | |
| 187 | + write(&p, 0); | |
| 188 | + return s; | |
| 189 | +} | |
| 190 | + | |
| 191 | +std::string | |
| 129 | 192 | JSON::encode_string(std::string const& str) |
| 130 | 193 | { |
| 131 | 194 | std::string result; | ... | ... |
manual/release-notes.rst
| ... | ... | @@ -123,6 +123,10 @@ For a detailed list of changes, please see the file |
| 123 | 123 | - Add new ``Pipeline`` type ``Pl_String`` to append to a |
| 124 | 124 | ``std::string``. |
| 125 | 125 | |
| 126 | + - Enhance JSON class to better support incrementally reading and | |
| 127 | + writing large amounts of data without having to keep everything | |
| 128 | + in memory. | |
| 129 | + | |
| 126 | 130 | - Other changes |
| 127 | 131 | |
| 128 | 132 | - In JSON v1 mode, the ``"objects"`` key now reflects the repaired | ... | ... |