Commit 1bc8abfdd3eb9b5a6af5d274c85cd1708bdb9e0c
1 parent
3246923c
Implement JSON v2 for Stream
Not fully exercised in this commit
Showing
10 changed files
with
199 additions
and
13 deletions
TODO
| @@ -63,6 +63,8 @@ General things to remember: | @@ -63,6 +63,8 @@ General things to remember: | ||
| 63 | 63 | ||
| 64 | * Remember typo: search for "Typo" In QPDFJob::doJSONEncrypt. | 64 | * Remember typo: search for "Typo" In QPDFJob::doJSONEncrypt. |
| 65 | 65 | ||
| 66 | +* Test stream with invalid data | ||
| 67 | + | ||
| 66 | * Consider using camelCase in multi-word key names to be consistent | 68 | * Consider using camelCase in multi-word key names to be consistent |
| 67 | with job JSON and with how JSON is often represented in languages | 69 | with job JSON and with how JSON is often represented in languages |
| 68 | that use it more natively. | 70 | that use it more natively. |
include/qpdf/Constants.h
| @@ -99,6 +99,12 @@ enum qpdf_stream_decode_level_e { | @@ -99,6 +99,12 @@ enum qpdf_stream_decode_level_e { | ||
| 99 | qpdf_dl_specialized, /* also decode other non-lossy filters */ | 99 | qpdf_dl_specialized, /* also decode other non-lossy filters */ |
| 100 | qpdf_dl_all /* also decode lossy filters */ | 100 | qpdf_dl_all /* also decode lossy filters */ |
| 101 | }; | 101 | }; |
| 102 | +/* For JSON encoding */ | ||
| 103 | +enum qpdf_stream_data_json_e { | ||
| 104 | + qpdf_sj_none = 0, | ||
| 105 | + qpdf_sj_inline, | ||
| 106 | + qpdf_sj_file, | ||
| 107 | +}; | ||
| 102 | 108 | ||
| 103 | /* R3 Encryption Parameters */ | 109 | /* R3 Encryption Parameters */ |
| 104 | 110 |
include/qpdf/QPDFObjectHandle.hh
| @@ -1339,8 +1339,8 @@ class QPDFObjectHandle | @@ -1339,8 +1339,8 @@ class QPDFObjectHandle | ||
| 1339 | // unambiguous. The getStreamJSON() call can be used to add | 1339 | // unambiguous. The getStreamJSON() call can be used to add |
| 1340 | // encoding of the stream's data. | 1340 | // encoding of the stream's data. |
| 1341 | // * Object types that are only valid in content streams (inline | 1341 | // * Object types that are only valid in content streams (inline |
| 1342 | - // image, operator) as well as "reserved" objects are not | ||
| 1343 | - // representable and will be serialized as "null". | 1342 | + // image, operator) are serialized as "null". Attempting to |
| 1343 | + // serialize a "reserved" object is an error. | ||
| 1344 | // If dereference_indirect is true and this is an indirect object, | 1344 | // If dereference_indirect is true and this is an indirect object, |
| 1345 | // show the actual contents of the object. The effect of | 1345 | // show the actual contents of the object. The effect of |
| 1346 | // dereference_indirect applies only to this object. It is not | 1346 | // dereference_indirect applies only to this object. It is not |
| @@ -1350,9 +1350,42 @@ class QPDFObjectHandle | @@ -1350,9 +1350,42 @@ class QPDFObjectHandle | ||
| 1350 | 1350 | ||
| 1351 | // Deprecated version uses v1 for backward compatibility. | 1351 | // Deprecated version uses v1 for backward compatibility. |
| 1352 | // ABI: remove for qpdf 12 | 1352 | // ABI: remove for qpdf 12 |
| 1353 | - [[deprecated("Use getJSON(int version)")]] | ||
| 1354 | - QPDF_DLL | ||
| 1355 | - JSON getJSON(bool dereference_indirect = false); | 1353 | + [[deprecated("Use getJSON(int version)")]] QPDF_DLL JSON |
| 1354 | + getJSON(bool dereference_indirect = false); | ||
| 1355 | + | ||
| 1356 | + // This method can be called on a stream to get a more extended | ||
| 1357 | + // JSON representation of the stream that includes the stream's | ||
| 1358 | + // data. The JSON object returned is always a dictionary whose | ||
| 1359 | + // "dict" key is an encoding of the stream's dictionary. The | ||
| 1360 | + // representation of the data is determined by the json_data | ||
| 1361 | + // field. | ||
| 1362 | + // | ||
| 1363 | + // The json_data field may have the value qpdf_sj_none, | ||
| 1364 | + // qpdf_sj_inline, or qpdf_sj_file. | ||
| 1365 | + // | ||
| 1366 | + // If json_data is qpdf_sj_none, stream data is not represented. | ||
| 1367 | + // | ||
| 1368 | + // If json_data is qpdf_sj_inline or qpdf_sj_file, then stream | ||
| 1369 | + // data is filtered or not based on the value of decode_level, | ||
| 1370 | + // which has the same meaning as with pipeStreamData. | ||
| 1371 | + // | ||
| 1372 | + // If json_data is qpdf_sj_inline, the base64-encoded stream data | ||
| 1373 | + // is included in the "data" field of the dictionary that is | ||
| 1374 | + // returned. | ||
| 1375 | + // | ||
| 1376 | + // If json_data is qpdf_sj_file, then the Pipeline ("p") and | ||
| 1377 | + // data_filename argument must be supplied. The value of | ||
| 1378 | + // data_filename is stored in the resulting json in the "datafile" | ||
| 1379 | + // key but is not otherwise use. The stream data itself (raw or | ||
| 1380 | + // filtered depending on decode level), is written to the | ||
| 1381 | + // pipeline via pipeStreamData(). | ||
| 1382 | + QPDF_DLL | ||
| 1383 | + JSON getStreamJSON( | ||
| 1384 | + int json_version, | ||
| 1385 | + qpdf_stream_data_json_e json_data, | ||
| 1386 | + qpdf_stream_decode_level_e decode_level, | ||
| 1387 | + Pipeline* p, | ||
| 1388 | + std::string const& data_filename); | ||
| 1356 | 1389 | ||
| 1357 | // Legacy helper methods for commonly performed operations on | 1390 | // Legacy helper methods for commonly performed operations on |
| 1358 | // pages. Newer code should use QPDFPageObjectHelper instead. The | 1391 | // pages. Newer code should use QPDFPageObjectHelper instead. The |
libqpdf/QPDFObjectHandle.cc
| @@ -1797,6 +1797,19 @@ QPDFObjectHandle::getJSON(int json_version, bool dereference_indirect) | @@ -1797,6 +1797,19 @@ QPDFObjectHandle::getJSON(int json_version, bool dereference_indirect) | ||
| 1797 | } | 1797 | } |
| 1798 | } | 1798 | } |
| 1799 | 1799 | ||
| 1800 | +JSON | ||
| 1801 | +QPDFObjectHandle::getStreamJSON( | ||
| 1802 | + int json_version, | ||
| 1803 | + qpdf_stream_data_json_e json_data, | ||
| 1804 | + qpdf_stream_decode_level_e decode_level, | ||
| 1805 | + Pipeline* p, | ||
| 1806 | + std::string const& data_filename) | ||
| 1807 | +{ | ||
| 1808 | + assertStream(); | ||
| 1809 | + return dynamic_cast<QPDF_Stream*>(obj.get())->getStreamJSON( | ||
| 1810 | + json_version, json_data, decode_level, p, data_filename); | ||
| 1811 | +} | ||
| 1812 | + | ||
| 1800 | QPDFObjectHandle | 1813 | QPDFObjectHandle |
| 1801 | QPDFObjectHandle::wrapInArray() | 1814 | QPDFObjectHandle::wrapInArray() |
| 1802 | { | 1815 | { |
libqpdf/QPDF_Stream.cc
| @@ -2,8 +2,10 @@ | @@ -2,8 +2,10 @@ | ||
| 2 | 2 | ||
| 3 | #include <qpdf/ContentNormalizer.hh> | 3 | #include <qpdf/ContentNormalizer.hh> |
| 4 | #include <qpdf/Pipeline.hh> | 4 | #include <qpdf/Pipeline.hh> |
| 5 | +#include <qpdf/Pl_Base64.hh> | ||
| 5 | #include <qpdf/Pl_Buffer.hh> | 6 | #include <qpdf/Pl_Buffer.hh> |
| 6 | #include <qpdf/Pl_Count.hh> | 7 | #include <qpdf/Pl_Count.hh> |
| 8 | +#include <qpdf/Pl_Discard.hh> | ||
| 7 | #include <qpdf/Pl_Flate.hh> | 9 | #include <qpdf/Pl_Flate.hh> |
| 8 | #include <qpdf/Pl_QPDFTokenizer.hh> | 10 | #include <qpdf/Pl_QPDFTokenizer.hh> |
| 9 | #include <qpdf/QIntC.hh> | 11 | #include <qpdf/QIntC.hh> |
| @@ -54,6 +56,18 @@ namespace | @@ -54,6 +56,18 @@ namespace | ||
| 54 | return nullptr; | 56 | return nullptr; |
| 55 | } | 57 | } |
| 56 | }; | 58 | }; |
| 59 | + | ||
| 60 | + class StreamBlobProvider | ||
| 61 | + { | ||
| 62 | + public: | ||
| 63 | + StreamBlobProvider( | ||
| 64 | + QPDF_Stream* stream, qpdf_stream_decode_level_e decode_level); | ||
| 65 | + void operator()(Pipeline*); | ||
| 66 | + | ||
| 67 | + private: | ||
| 68 | + QPDF_Stream* stream; | ||
| 69 | + qpdf_stream_decode_level_e decode_level; | ||
| 70 | + }; | ||
| 57 | } // namespace | 71 | } // namespace |
| 58 | 72 | ||
| 59 | std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = { | 73 | std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = { |
| @@ -81,6 +95,19 @@ std::map<std::string, std::function<std::shared_ptr<QPDFStreamFilter>()>> | @@ -81,6 +95,19 @@ std::map<std::string, std::function<std::shared_ptr<QPDFStreamFilter>()>> | ||
| 81 | {"/ASCIIHexDecode", SF_ASCIIHexDecode::factory}, | 95 | {"/ASCIIHexDecode", SF_ASCIIHexDecode::factory}, |
| 82 | }; | 96 | }; |
| 83 | 97 | ||
| 98 | +StreamBlobProvider::StreamBlobProvider( | ||
| 99 | + QPDF_Stream* stream, qpdf_stream_decode_level_e decode_level) : | ||
| 100 | + stream(stream), | ||
| 101 | + decode_level(decode_level) | ||
| 102 | +{ | ||
| 103 | +} | ||
| 104 | + | ||
| 105 | +void | ||
| 106 | +StreamBlobProvider::operator()(Pipeline* p) | ||
| 107 | +{ | ||
| 108 | + this->stream->pipeStreamData(p, nullptr, 0, decode_level, false, false); | ||
| 109 | +} | ||
| 110 | + | ||
| 84 | QPDF_Stream::QPDF_Stream( | 111 | QPDF_Stream::QPDF_Stream( |
| 85 | QPDF* qpdf, | 112 | QPDF* qpdf, |
| 86 | int objid, | 113 | int objid, |
| @@ -153,8 +180,95 @@ QPDF_Stream::unparse() | @@ -153,8 +180,95 @@ QPDF_Stream::unparse() | ||
| 153 | JSON | 180 | JSON |
| 154 | QPDF_Stream::getJSON(int json_version) | 181 | QPDF_Stream::getJSON(int json_version) |
| 155 | { | 182 | { |
| 156 | - // QXXXQ | ||
| 157 | - return this->stream_dict.getJSON(json_version); | 183 | + if (json_version == 1) { |
| 184 | + return this->stream_dict.getJSON(json_version); | ||
| 185 | + } | ||
| 186 | + return getStreamJSON(json_version, qpdf_sj_none, qpdf_dl_none, nullptr, ""); | ||
| 187 | +} | ||
| 188 | + | ||
| 189 | +JSON | ||
| 190 | +QPDF_Stream::getStreamJSON( | ||
| 191 | + int json_version, | ||
| 192 | + qpdf_stream_data_json_e json_data, | ||
| 193 | + qpdf_stream_decode_level_e decode_level, | ||
| 194 | + Pipeline* p, | ||
| 195 | + std::string const& data_filename) | ||
| 196 | +{ | ||
| 197 | + switch (json_data) { | ||
| 198 | + case qpdf_sj_none: | ||
| 199 | + case qpdf_sj_inline: | ||
| 200 | + if (p != nullptr) { | ||
| 201 | + throw std::logic_error("QPDF_Stream::getStreamJSON: pipline should " | ||
| 202 | + "only be suppiled json_data is file"); | ||
| 203 | + } | ||
| 204 | + break; | ||
| 205 | + case qpdf_sj_file: | ||
| 206 | + if (p == nullptr) { | ||
| 207 | + throw std::logic_error("QPDF_Stream::getStreamJSON: pipline must " | ||
| 208 | + "be be suppiled json_data is file"); | ||
| 209 | + } | ||
| 210 | + if (data_filename.empty()) { | ||
| 211 | + throw std::logic_error("QPDF_Stream::getStreamJSON: data_filename " | ||
| 212 | + "must be supplied when json_data is file"); | ||
| 213 | + } | ||
| 214 | + break; | ||
| 215 | + } | ||
| 216 | + | ||
| 217 | + auto dict = this->stream_dict; | ||
| 218 | + JSON result = JSON::makeDictionary(); | ||
| 219 | + if (json_data != qpdf_sj_none) { | ||
| 220 | + std::shared_ptr<Buffer> buf; | ||
| 221 | + bool filtered = false; | ||
| 222 | + bool filter = (decode_level != qpdf_dl_none); | ||
| 223 | + for (int attempt = 1; attempt <= 2; ++attempt) { | ||
| 224 | + Pl_Discard discard; | ||
| 225 | + std::shared_ptr<Pl_Buffer> buf_pl; | ||
| 226 | + Pipeline* data_pipeline = nullptr; | ||
| 227 | + if (json_data == qpdf_sj_file) { | ||
| 228 | + // We need to capture the data to write | ||
| 229 | + buf_pl = std::make_shared<Pl_Buffer>("stream data"); | ||
| 230 | + data_pipeline = buf_pl.get(); | ||
| 231 | + } else { | ||
| 232 | + data_pipeline = &discard; | ||
| 233 | + } | ||
| 234 | + filtered = pipeStreamData( | ||
| 235 | + data_pipeline, nullptr, 0, decode_level, false, (attempt == 1)); | ||
| 236 | + if (filter && (!filtered)) { | ||
| 237 | + // Try again | ||
| 238 | + filter = false; | ||
| 239 | + } else { | ||
| 240 | + if (buf_pl.get()) { | ||
| 241 | + buf = buf_pl->getBufferSharedPointer(); | ||
| 242 | + } | ||
| 243 | + break; | ||
| 244 | + } | ||
| 245 | + } | ||
| 246 | + // We can use unsafeShallowCopy because we are only | ||
| 247 | + // touching top-level keys. | ||
| 248 | + dict = this->stream_dict.unsafeShallowCopy(); | ||
| 249 | + dict.removeKey("/Length"); | ||
| 250 | + if (filtered) { | ||
| 251 | + dict.removeKey("/Filter"); | ||
| 252 | + dict.removeKey("/DecodeParms"); | ||
| 253 | + } | ||
| 254 | + if (json_data == qpdf_sj_file) { | ||
| 255 | + result.addDictionaryMember( | ||
| 256 | + "datafile", JSON::makeString(data_filename)); | ||
| 257 | + if (!buf.get()) { | ||
| 258 | + throw std::logic_error( | ||
| 259 | + "QPDF_Stream: failed to get stream data in json file mode"); | ||
| 260 | + } | ||
| 261 | + p->write(buf->getBuffer(), buf->getSize()); | ||
| 262 | + } else if (json_data == qpdf_sj_inline) { | ||
| 263 | + result.addDictionaryMember( | ||
| 264 | + "data", JSON::makeBlob(StreamBlobProvider(this, decode_level))); | ||
| 265 | + } else { | ||
| 266 | + throw std::logic_error( | ||
| 267 | + "QPDF_Stream: unexpected value of json_data"); | ||
| 268 | + } | ||
| 269 | + } | ||
| 270 | + result.addDictionaryMember("dict", dict.getJSON(json_version)); | ||
| 271 | + return result; | ||
| 158 | } | 272 | } |
| 159 | 273 | ||
| 160 | QPDFObject::object_type_e | 274 | QPDFObject::object_type_e |
libqpdf/qpdf/QPDF_Stream.hh
| @@ -61,6 +61,12 @@ class QPDF_Stream: public QPDFObject | @@ -61,6 +61,12 @@ class QPDF_Stream: public QPDFObject | ||
| 61 | QPDFObjectHandle const& decode_parms); | 61 | QPDFObjectHandle const& decode_parms); |
| 62 | void | 62 | void |
| 63 | addTokenFilter(std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter); | 63 | addTokenFilter(std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter); |
| 64 | + JSON getStreamJSON( | ||
| 65 | + int json_version, | ||
| 66 | + qpdf_stream_data_json_e json_data, | ||
| 67 | + qpdf_stream_decode_level_e decode_level, | ||
| 68 | + Pipeline* p, | ||
| 69 | + std::string const& data_filename); | ||
| 64 | 70 | ||
| 65 | void replaceDict(QPDFObjectHandle const& new_dict); | 71 | void replaceDict(QPDFObjectHandle const& new_dict); |
| 66 | 72 |
qpdf/qtest/qpdf/direct-pages-json-objects.out
qpdf/qtest/qpdf/direct-pages-json-pages.out
qpdf/qtest/qpdf/page_api_2-json-objects.out
| @@ -62,7 +62,9 @@ | @@ -62,7 +62,9 @@ | ||
| 62 | "/Type": "/Page" | 62 | "/Type": "/Page" |
| 63 | }, | 63 | }, |
| 64 | "6 0 R": { | 64 | "6 0 R": { |
| 65 | - "/Length": "7 0 R" | 65 | + "dict": { |
| 66 | + "/Length": "7 0 R" | ||
| 67 | + } | ||
| 66 | }, | 68 | }, |
| 67 | "7 0 R": 47, | 69 | "7 0 R": 47, |
| 68 | "8 0 R": { | 70 | "8 0 R": { |
| @@ -72,7 +74,9 @@ | @@ -72,7 +74,9 @@ | ||
| 72 | "/Type": "/Font" | 74 | "/Type": "/Font" |
| 73 | }, | 75 | }, |
| 74 | "9 0 R": { | 76 | "9 0 R": { |
| 75 | - "/Length": "10 0 R" | 77 | + "dict": { |
| 78 | + "/Length": "10 0 R" | ||
| 79 | + } | ||
| 76 | }, | 80 | }, |
| 77 | "10 0 R": 47, | 81 | "10 0 R": 47, |
| 78 | "trailer": { | 82 | "trailer": { |
qpdf/qtest/qpdf/page_api_2-json-pages.out
| @@ -94,7 +94,9 @@ | @@ -94,7 +94,9 @@ | ||
| 94 | "/Type": "/Page" | 94 | "/Type": "/Page" |
| 95 | }, | 95 | }, |
| 96 | "6 0 R": { | 96 | "6 0 R": { |
| 97 | - "/Length": "7 0 R" | 97 | + "dict": { |
| 98 | + "/Length": "7 0 R" | ||
| 99 | + } | ||
| 98 | }, | 100 | }, |
| 99 | "7 0 R": 47, | 101 | "7 0 R": 47, |
| 100 | "8 0 R": { | 102 | "8 0 R": { |
| @@ -104,7 +106,9 @@ | @@ -104,7 +106,9 @@ | ||
| 104 | "/Type": "/Font" | 106 | "/Type": "/Font" |
| 105 | }, | 107 | }, |
| 106 | "9 0 R": { | 108 | "9 0 R": { |
| 107 | - "/Length": "10 0 R" | 109 | + "dict": { |
| 110 | + "/Length": "10 0 R" | ||
| 111 | + } | ||
| 108 | }, | 112 | }, |
| 109 | "10 0 R": 47, | 113 | "10 0 R": 47, |
| 110 | "11 0 R": { | 114 | "11 0 R": { |