Commit 1bc8abfdd3eb9b5a6af5d274c85cd1708bdb9e0c
1 parent
3246923c
Implement JSON v2 for Stream
Not fully exercised in this commit
Showing
10 changed files
with
199 additions
and
13 deletions
TODO
| ... | ... | @@ -63,6 +63,8 @@ General things to remember: |
| 63 | 63 | |
| 64 | 64 | * Remember typo: search for "Typo" In QPDFJob::doJSONEncrypt. |
| 65 | 65 | |
| 66 | +* Test stream with invalid data | |
| 67 | + | |
| 66 | 68 | * Consider using camelCase in multi-word key names to be consistent |
| 67 | 69 | with job JSON and with how JSON is often represented in languages |
| 68 | 70 | that use it more natively. | ... | ... |
include/qpdf/Constants.h
| ... | ... | @@ -99,6 +99,12 @@ enum qpdf_stream_decode_level_e { |
| 99 | 99 | qpdf_dl_specialized, /* also decode other non-lossy filters */ |
| 100 | 100 | qpdf_dl_all /* also decode lossy filters */ |
| 101 | 101 | }; |
| 102 | +/* For JSON encoding */ | |
| 103 | +enum qpdf_stream_data_json_e { | |
| 104 | + qpdf_sj_none = 0, | |
| 105 | + qpdf_sj_inline, | |
| 106 | + qpdf_sj_file, | |
| 107 | +}; | |
| 102 | 108 | |
| 103 | 109 | /* R3 Encryption Parameters */ |
| 104 | 110 | ... | ... |
include/qpdf/QPDFObjectHandle.hh
| ... | ... | @@ -1339,8 +1339,8 @@ class QPDFObjectHandle |
| 1339 | 1339 | // unambiguous. The getStreamJSON() call can be used to add |
| 1340 | 1340 | // encoding of the stream's data. |
| 1341 | 1341 | // * Object types that are only valid in content streams (inline |
| 1342 | - // image, operator) as well as "reserved" objects are not | |
| 1343 | - // representable and will be serialized as "null". | |
| 1342 | + // image, operator) are serialized as "null". Attempting to | |
| 1343 | + // serialize a "reserved" object is an error. | |
| 1344 | 1344 | // If dereference_indirect is true and this is an indirect object, |
| 1345 | 1345 | // show the actual contents of the object. The effect of |
| 1346 | 1346 | // dereference_indirect applies only to this object. It is not |
| ... | ... | @@ -1350,9 +1350,42 @@ class QPDFObjectHandle |
| 1350 | 1350 | |
| 1351 | 1351 | // Deprecated version uses v1 for backward compatibility. |
| 1352 | 1352 | // ABI: remove for qpdf 12 |
| 1353 | - [[deprecated("Use getJSON(int version)")]] | |
| 1354 | - QPDF_DLL | |
| 1355 | - JSON getJSON(bool dereference_indirect = false); | |
| 1353 | + [[deprecated("Use getJSON(int version)")]] QPDF_DLL JSON | |
| 1354 | + getJSON(bool dereference_indirect = false); | |
| 1355 | + | |
| 1356 | + // This method can be called on a stream to get a more extended | |
| 1357 | + // JSON representation of the stream that includes the stream's | |
| 1358 | + // data. The JSON object returned is always a dictionary whose | |
| 1359 | + // "dict" key is an encoding of the stream's dictionary. The | |
| 1360 | + // representation of the data is determined by the json_data | |
| 1361 | + // field. | |
| 1362 | + // | |
| 1363 | + // The json_data field may have the value qpdf_sj_none, | |
| 1364 | + // qpdf_sj_inline, or qpdf_sj_file. | |
| 1365 | + // | |
| 1366 | + // If json_data is qpdf_sj_none, stream data is not represented. | |
| 1367 | + // | |
| 1368 | + // If json_data is qpdf_sj_inline or qpdf_sj_file, then stream | |
| 1369 | + // data is filtered or not based on the value of decode_level, | |
| 1370 | + // which has the same meaning as with pipeStreamData. | |
| 1371 | + // | |
| 1372 | + // If json_data is qpdf_sj_inline, the base64-encoded stream data | |
| 1373 | + // is included in the "data" field of the dictionary that is | |
| 1374 | + // returned. | |
| 1375 | + // | |
| 1376 | + // If json_data is qpdf_sj_file, then the Pipeline ("p") and | |
| 1377 | + // data_filename argument must be supplied. The value of | |
| 1378 | + // data_filename is stored in the resulting json in the "datafile" | |
| 1379 | + // key but is not otherwise use. The stream data itself (raw or | |
| 1380 | + // filtered depending on decode level), is written to the | |
| 1381 | + // pipeline via pipeStreamData(). | |
| 1382 | + QPDF_DLL | |
| 1383 | + JSON getStreamJSON( | |
| 1384 | + int json_version, | |
| 1385 | + qpdf_stream_data_json_e json_data, | |
| 1386 | + qpdf_stream_decode_level_e decode_level, | |
| 1387 | + Pipeline* p, | |
| 1388 | + std::string const& data_filename); | |
| 1356 | 1389 | |
| 1357 | 1390 | // Legacy helper methods for commonly performed operations on |
| 1358 | 1391 | // pages. Newer code should use QPDFPageObjectHelper instead. The | ... | ... |
libqpdf/QPDFObjectHandle.cc
| ... | ... | @@ -1797,6 +1797,19 @@ QPDFObjectHandle::getJSON(int json_version, bool dereference_indirect) |
| 1797 | 1797 | } |
| 1798 | 1798 | } |
| 1799 | 1799 | |
| 1800 | +JSON | |
| 1801 | +QPDFObjectHandle::getStreamJSON( | |
| 1802 | + int json_version, | |
| 1803 | + qpdf_stream_data_json_e json_data, | |
| 1804 | + qpdf_stream_decode_level_e decode_level, | |
| 1805 | + Pipeline* p, | |
| 1806 | + std::string const& data_filename) | |
| 1807 | +{ | |
| 1808 | + assertStream(); | |
| 1809 | + return dynamic_cast<QPDF_Stream*>(obj.get())->getStreamJSON( | |
| 1810 | + json_version, json_data, decode_level, p, data_filename); | |
| 1811 | +} | |
| 1812 | + | |
| 1800 | 1813 | QPDFObjectHandle |
| 1801 | 1814 | QPDFObjectHandle::wrapInArray() |
| 1802 | 1815 | { | ... | ... |
libqpdf/QPDF_Stream.cc
| ... | ... | @@ -2,8 +2,10 @@ |
| 2 | 2 | |
| 3 | 3 | #include <qpdf/ContentNormalizer.hh> |
| 4 | 4 | #include <qpdf/Pipeline.hh> |
| 5 | +#include <qpdf/Pl_Base64.hh> | |
| 5 | 6 | #include <qpdf/Pl_Buffer.hh> |
| 6 | 7 | #include <qpdf/Pl_Count.hh> |
| 8 | +#include <qpdf/Pl_Discard.hh> | |
| 7 | 9 | #include <qpdf/Pl_Flate.hh> |
| 8 | 10 | #include <qpdf/Pl_QPDFTokenizer.hh> |
| 9 | 11 | #include <qpdf/QIntC.hh> |
| ... | ... | @@ -54,6 +56,18 @@ namespace |
| 54 | 56 | return nullptr; |
| 55 | 57 | } |
| 56 | 58 | }; |
| 59 | + | |
| 60 | + class StreamBlobProvider | |
| 61 | + { | |
| 62 | + public: | |
| 63 | + StreamBlobProvider( | |
| 64 | + QPDF_Stream* stream, qpdf_stream_decode_level_e decode_level); | |
| 65 | + void operator()(Pipeline*); | |
| 66 | + | |
| 67 | + private: | |
| 68 | + QPDF_Stream* stream; | |
| 69 | + qpdf_stream_decode_level_e decode_level; | |
| 70 | + }; | |
| 57 | 71 | } // namespace |
| 58 | 72 | |
| 59 | 73 | std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = { |
| ... | ... | @@ -81,6 +95,19 @@ std::map<std::string, std::function<std::shared_ptr<QPDFStreamFilter>()>> |
| 81 | 95 | {"/ASCIIHexDecode", SF_ASCIIHexDecode::factory}, |
| 82 | 96 | }; |
| 83 | 97 | |
| 98 | +StreamBlobProvider::StreamBlobProvider( | |
| 99 | + QPDF_Stream* stream, qpdf_stream_decode_level_e decode_level) : | |
| 100 | + stream(stream), | |
| 101 | + decode_level(decode_level) | |
| 102 | +{ | |
| 103 | +} | |
| 104 | + | |
| 105 | +void | |
| 106 | +StreamBlobProvider::operator()(Pipeline* p) | |
| 107 | +{ | |
| 108 | + this->stream->pipeStreamData(p, nullptr, 0, decode_level, false, false); | |
| 109 | +} | |
| 110 | + | |
| 84 | 111 | QPDF_Stream::QPDF_Stream( |
| 85 | 112 | QPDF* qpdf, |
| 86 | 113 | int objid, |
| ... | ... | @@ -153,8 +180,95 @@ QPDF_Stream::unparse() |
| 153 | 180 | JSON |
| 154 | 181 | QPDF_Stream::getJSON(int json_version) |
| 155 | 182 | { |
| 156 | - // QXXXQ | |
| 157 | - return this->stream_dict.getJSON(json_version); | |
| 183 | + if (json_version == 1) { | |
| 184 | + return this->stream_dict.getJSON(json_version); | |
| 185 | + } | |
| 186 | + return getStreamJSON(json_version, qpdf_sj_none, qpdf_dl_none, nullptr, ""); | |
| 187 | +} | |
| 188 | + | |
| 189 | +JSON | |
| 190 | +QPDF_Stream::getStreamJSON( | |
| 191 | + int json_version, | |
| 192 | + qpdf_stream_data_json_e json_data, | |
| 193 | + qpdf_stream_decode_level_e decode_level, | |
| 194 | + Pipeline* p, | |
| 195 | + std::string const& data_filename) | |
| 196 | +{ | |
| 197 | + switch (json_data) { | |
| 198 | + case qpdf_sj_none: | |
| 199 | + case qpdf_sj_inline: | |
| 200 | + if (p != nullptr) { | |
| 201 | + throw std::logic_error("QPDF_Stream::getStreamJSON: pipline should " | |
| 202 | + "only be suppiled json_data is file"); | |
| 203 | + } | |
| 204 | + break; | |
| 205 | + case qpdf_sj_file: | |
| 206 | + if (p == nullptr) { | |
| 207 | + throw std::logic_error("QPDF_Stream::getStreamJSON: pipline must " | |
| 208 | + "be be suppiled json_data is file"); | |
| 209 | + } | |
| 210 | + if (data_filename.empty()) { | |
| 211 | + throw std::logic_error("QPDF_Stream::getStreamJSON: data_filename " | |
| 212 | + "must be supplied when json_data is file"); | |
| 213 | + } | |
| 214 | + break; | |
| 215 | + } | |
| 216 | + | |
| 217 | + auto dict = this->stream_dict; | |
| 218 | + JSON result = JSON::makeDictionary(); | |
| 219 | + if (json_data != qpdf_sj_none) { | |
| 220 | + std::shared_ptr<Buffer> buf; | |
| 221 | + bool filtered = false; | |
| 222 | + bool filter = (decode_level != qpdf_dl_none); | |
| 223 | + for (int attempt = 1; attempt <= 2; ++attempt) { | |
| 224 | + Pl_Discard discard; | |
| 225 | + std::shared_ptr<Pl_Buffer> buf_pl; | |
| 226 | + Pipeline* data_pipeline = nullptr; | |
| 227 | + if (json_data == qpdf_sj_file) { | |
| 228 | + // We need to capture the data to write | |
| 229 | + buf_pl = std::make_shared<Pl_Buffer>("stream data"); | |
| 230 | + data_pipeline = buf_pl.get(); | |
| 231 | + } else { | |
| 232 | + data_pipeline = &discard; | |
| 233 | + } | |
| 234 | + filtered = pipeStreamData( | |
| 235 | + data_pipeline, nullptr, 0, decode_level, false, (attempt == 1)); | |
| 236 | + if (filter && (!filtered)) { | |
| 237 | + // Try again | |
| 238 | + filter = false; | |
| 239 | + } else { | |
| 240 | + if (buf_pl.get()) { | |
| 241 | + buf = buf_pl->getBufferSharedPointer(); | |
| 242 | + } | |
| 243 | + break; | |
| 244 | + } | |
| 245 | + } | |
| 246 | + // We can use unsafeShallowCopy because we are only | |
| 247 | + // touching top-level keys. | |
| 248 | + dict = this->stream_dict.unsafeShallowCopy(); | |
| 249 | + dict.removeKey("/Length"); | |
| 250 | + if (filtered) { | |
| 251 | + dict.removeKey("/Filter"); | |
| 252 | + dict.removeKey("/DecodeParms"); | |
| 253 | + } | |
| 254 | + if (json_data == qpdf_sj_file) { | |
| 255 | + result.addDictionaryMember( | |
| 256 | + "datafile", JSON::makeString(data_filename)); | |
| 257 | + if (!buf.get()) { | |
| 258 | + throw std::logic_error( | |
| 259 | + "QPDF_Stream: failed to get stream data in json file mode"); | |
| 260 | + } | |
| 261 | + p->write(buf->getBuffer(), buf->getSize()); | |
| 262 | + } else if (json_data == qpdf_sj_inline) { | |
| 263 | + result.addDictionaryMember( | |
| 264 | + "data", JSON::makeBlob(StreamBlobProvider(this, decode_level))); | |
| 265 | + } else { | |
| 266 | + throw std::logic_error( | |
| 267 | + "QPDF_Stream: unexpected value of json_data"); | |
| 268 | + } | |
| 269 | + } | |
| 270 | + result.addDictionaryMember("dict", dict.getJSON(json_version)); | |
| 271 | + return result; | |
| 158 | 272 | } |
| 159 | 273 | |
| 160 | 274 | QPDFObject::object_type_e | ... | ... |
libqpdf/qpdf/QPDF_Stream.hh
| ... | ... | @@ -61,6 +61,12 @@ class QPDF_Stream: public QPDFObject |
| 61 | 61 | QPDFObjectHandle const& decode_parms); |
| 62 | 62 | void |
| 63 | 63 | addTokenFilter(std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter); |
| 64 | + JSON getStreamJSON( | |
| 65 | + int json_version, | |
| 66 | + qpdf_stream_data_json_e json_data, | |
| 67 | + qpdf_stream_decode_level_e decode_level, | |
| 68 | + Pipeline* p, | |
| 69 | + std::string const& data_filename); | |
| 64 | 70 | |
| 65 | 71 | void replaceDict(QPDFObjectHandle const& new_dict); |
| 66 | 72 | ... | ... |
qpdf/qtest/qpdf/direct-pages-json-objects.out
qpdf/qtest/qpdf/direct-pages-json-pages.out
qpdf/qtest/qpdf/page_api_2-json-objects.out
| ... | ... | @@ -62,7 +62,9 @@ |
| 62 | 62 | "/Type": "/Page" |
| 63 | 63 | }, |
| 64 | 64 | "6 0 R": { |
| 65 | - "/Length": "7 0 R" | |
| 65 | + "dict": { | |
| 66 | + "/Length": "7 0 R" | |
| 67 | + } | |
| 66 | 68 | }, |
| 67 | 69 | "7 0 R": 47, |
| 68 | 70 | "8 0 R": { |
| ... | ... | @@ -72,7 +74,9 @@ |
| 72 | 74 | "/Type": "/Font" |
| 73 | 75 | }, |
| 74 | 76 | "9 0 R": { |
| 75 | - "/Length": "10 0 R" | |
| 77 | + "dict": { | |
| 78 | + "/Length": "10 0 R" | |
| 79 | + } | |
| 76 | 80 | }, |
| 77 | 81 | "10 0 R": 47, |
| 78 | 82 | "trailer": { | ... | ... |
qpdf/qtest/qpdf/page_api_2-json-pages.out
| ... | ... | @@ -94,7 +94,9 @@ |
| 94 | 94 | "/Type": "/Page" |
| 95 | 95 | }, |
| 96 | 96 | "6 0 R": { |
| 97 | - "/Length": "7 0 R" | |
| 97 | + "dict": { | |
| 98 | + "/Length": "7 0 R" | |
| 99 | + } | |
| 98 | 100 | }, |
| 99 | 101 | "7 0 R": 47, |
| 100 | 102 | "8 0 R": { |
| ... | ... | @@ -104,7 +106,9 @@ |
| 104 | 106 | "/Type": "/Font" |
| 105 | 107 | }, |
| 106 | 108 | "9 0 R": { |
| 107 | - "/Length": "10 0 R" | |
| 109 | + "dict": { | |
| 110 | + "/Length": "10 0 R" | |
| 111 | + } | |
| 108 | 112 | }, |
| 109 | 113 | "10 0 R": 47, |
| 110 | 114 | "11 0 R": { | ... | ... |