Commit 1bc8abfdd3eb9b5a6af5d274c85cd1708bdb9e0c

Authored by Jay Berkenbilt
1 parent 3246923c

Implement JSON v2 for Stream

Not fully exercised in this commit
@@ -63,6 +63,8 @@ General things to remember: @@ -63,6 +63,8 @@ General things to remember:
63 63
64 * Remember typo: search for "Typo" In QPDFJob::doJSONEncrypt. 64 * Remember typo: search for "Typo" In QPDFJob::doJSONEncrypt.
65 65
  66 +* Test stream with invalid data
  67 +
66 * Consider using camelCase in multi-word key names to be consistent 68 * Consider using camelCase in multi-word key names to be consistent
67 with job JSON and with how JSON is often represented in languages 69 with job JSON and with how JSON is often represented in languages
68 that use it more natively. 70 that use it more natively.
include/qpdf/Constants.h
@@ -99,6 +99,12 @@ enum qpdf_stream_decode_level_e { @@ -99,6 +99,12 @@ enum qpdf_stream_decode_level_e {
99 qpdf_dl_specialized, /* also decode other non-lossy filters */ 99 qpdf_dl_specialized, /* also decode other non-lossy filters */
100 qpdf_dl_all /* also decode lossy filters */ 100 qpdf_dl_all /* also decode lossy filters */
101 }; 101 };
  102 +/* For JSON encoding */
  103 +enum qpdf_stream_data_json_e {
  104 + qpdf_sj_none = 0,
  105 + qpdf_sj_inline,
  106 + qpdf_sj_file,
  107 +};
102 108
103 /* R3 Encryption Parameters */ 109 /* R3 Encryption Parameters */
104 110
include/qpdf/QPDFObjectHandle.hh
@@ -1339,8 +1339,8 @@ class QPDFObjectHandle @@ -1339,8 +1339,8 @@ class QPDFObjectHandle
1339 // unambiguous. The getStreamJSON() call can be used to add 1339 // unambiguous. The getStreamJSON() call can be used to add
1340 // encoding of the stream's data. 1340 // encoding of the stream's data.
1341 // * Object types that are only valid in content streams (inline 1341 // * Object types that are only valid in content streams (inline
1342 - // image, operator) as well as "reserved" objects are not  
1343 - // representable and will be serialized as "null". 1342 + // image, operator) are serialized as "null". Attempting to
  1343 + // serialize a "reserved" object is an error.
1344 // If dereference_indirect is true and this is an indirect object, 1344 // If dereference_indirect is true and this is an indirect object,
1345 // show the actual contents of the object. The effect of 1345 // show the actual contents of the object. The effect of
1346 // dereference_indirect applies only to this object. It is not 1346 // dereference_indirect applies only to this object. It is not
@@ -1350,9 +1350,42 @@ class QPDFObjectHandle @@ -1350,9 +1350,42 @@ class QPDFObjectHandle
1350 1350
1351 // Deprecated version uses v1 for backward compatibility. 1351 // Deprecated version uses v1 for backward compatibility.
1352 // ABI: remove for qpdf 12 1352 // ABI: remove for qpdf 12
1353 - [[deprecated("Use getJSON(int version)")]]  
1354 - QPDF_DLL  
1355 - JSON getJSON(bool dereference_indirect = false); 1353 + [[deprecated("Use getJSON(int version)")]] QPDF_DLL JSON
  1354 + getJSON(bool dereference_indirect = false);
  1355 +
  1356 + // This method can be called on a stream to get a more extended
  1357 + // JSON representation of the stream that includes the stream's
  1358 + // data. The JSON object returned is always a dictionary whose
  1359 + // "dict" key is an encoding of the stream's dictionary. The
  1360 + // representation of the data is determined by the json_data
  1361 + // field.
  1362 + //
  1363 + // The json_data field may have the value qpdf_sj_none,
  1364 + // qpdf_sj_inline, or qpdf_sj_file.
  1365 + //
  1366 + // If json_data is qpdf_sj_none, stream data is not represented.
  1367 + //
  1368 + // If json_data is qpdf_sj_inline or qpdf_sj_file, then stream
  1369 + // data is filtered or not based on the value of decode_level,
  1370 + // which has the same meaning as with pipeStreamData.
  1371 + //
  1372 + // If json_data is qpdf_sj_inline, the base64-encoded stream data
  1373 + // is included in the "data" field of the dictionary that is
  1374 + // returned.
  1375 + //
  1376 + // If json_data is qpdf_sj_file, then the Pipeline ("p") and
  1377 + // data_filename argument must be supplied. The value of
  1378 + // data_filename is stored in the resulting json in the "datafile"
  1379 + // key but is not otherwise use. The stream data itself (raw or
  1380 + // filtered depending on decode level), is written to the
  1381 + // pipeline via pipeStreamData().
  1382 + QPDF_DLL
  1383 + JSON getStreamJSON(
  1384 + int json_version,
  1385 + qpdf_stream_data_json_e json_data,
  1386 + qpdf_stream_decode_level_e decode_level,
  1387 + Pipeline* p,
  1388 + std::string const& data_filename);
1356 1389
1357 // Legacy helper methods for commonly performed operations on 1390 // Legacy helper methods for commonly performed operations on
1358 // pages. Newer code should use QPDFPageObjectHelper instead. The 1391 // pages. Newer code should use QPDFPageObjectHelper instead. The
libqpdf/QPDFObjectHandle.cc
@@ -1797,6 +1797,19 @@ QPDFObjectHandle::getJSON(int json_version, bool dereference_indirect) @@ -1797,6 +1797,19 @@ QPDFObjectHandle::getJSON(int json_version, bool dereference_indirect)
1797 } 1797 }
1798 } 1798 }
1799 1799
  1800 +JSON
  1801 +QPDFObjectHandle::getStreamJSON(
  1802 + int json_version,
  1803 + qpdf_stream_data_json_e json_data,
  1804 + qpdf_stream_decode_level_e decode_level,
  1805 + Pipeline* p,
  1806 + std::string const& data_filename)
  1807 +{
  1808 + assertStream();
  1809 + return dynamic_cast<QPDF_Stream*>(obj.get())->getStreamJSON(
  1810 + json_version, json_data, decode_level, p, data_filename);
  1811 +}
  1812 +
1800 QPDFObjectHandle 1813 QPDFObjectHandle
1801 QPDFObjectHandle::wrapInArray() 1814 QPDFObjectHandle::wrapInArray()
1802 { 1815 {
libqpdf/QPDF_Stream.cc
@@ -2,8 +2,10 @@ @@ -2,8 +2,10 @@
2 2
3 #include <qpdf/ContentNormalizer.hh> 3 #include <qpdf/ContentNormalizer.hh>
4 #include <qpdf/Pipeline.hh> 4 #include <qpdf/Pipeline.hh>
  5 +#include <qpdf/Pl_Base64.hh>
5 #include <qpdf/Pl_Buffer.hh> 6 #include <qpdf/Pl_Buffer.hh>
6 #include <qpdf/Pl_Count.hh> 7 #include <qpdf/Pl_Count.hh>
  8 +#include <qpdf/Pl_Discard.hh>
7 #include <qpdf/Pl_Flate.hh> 9 #include <qpdf/Pl_Flate.hh>
8 #include <qpdf/Pl_QPDFTokenizer.hh> 10 #include <qpdf/Pl_QPDFTokenizer.hh>
9 #include <qpdf/QIntC.hh> 11 #include <qpdf/QIntC.hh>
@@ -54,6 +56,18 @@ namespace @@ -54,6 +56,18 @@ namespace
54 return nullptr; 56 return nullptr;
55 } 57 }
56 }; 58 };
  59 +
  60 + class StreamBlobProvider
  61 + {
  62 + public:
  63 + StreamBlobProvider(
  64 + QPDF_Stream* stream, qpdf_stream_decode_level_e decode_level);
  65 + void operator()(Pipeline*);
  66 +
  67 + private:
  68 + QPDF_Stream* stream;
  69 + qpdf_stream_decode_level_e decode_level;
  70 + };
57 } // namespace 71 } // namespace
58 72
59 std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = { 73 std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = {
@@ -81,6 +95,19 @@ std::map&lt;std::string, std::function&lt;std::shared_ptr&lt;QPDFStreamFilter&gt;()&gt;&gt; @@ -81,6 +95,19 @@ std::map&lt;std::string, std::function&lt;std::shared_ptr&lt;QPDFStreamFilter&gt;()&gt;&gt;
81 {"/ASCIIHexDecode", SF_ASCIIHexDecode::factory}, 95 {"/ASCIIHexDecode", SF_ASCIIHexDecode::factory},
82 }; 96 };
83 97
  98 +StreamBlobProvider::StreamBlobProvider(
  99 + QPDF_Stream* stream, qpdf_stream_decode_level_e decode_level) :
  100 + stream(stream),
  101 + decode_level(decode_level)
  102 +{
  103 +}
  104 +
  105 +void
  106 +StreamBlobProvider::operator()(Pipeline* p)
  107 +{
  108 + this->stream->pipeStreamData(p, nullptr, 0, decode_level, false, false);
  109 +}
  110 +
84 QPDF_Stream::QPDF_Stream( 111 QPDF_Stream::QPDF_Stream(
85 QPDF* qpdf, 112 QPDF* qpdf,
86 int objid, 113 int objid,
@@ -153,8 +180,95 @@ QPDF_Stream::unparse() @@ -153,8 +180,95 @@ QPDF_Stream::unparse()
153 JSON 180 JSON
154 QPDF_Stream::getJSON(int json_version) 181 QPDF_Stream::getJSON(int json_version)
155 { 182 {
156 - // QXXXQ  
157 - return this->stream_dict.getJSON(json_version); 183 + if (json_version == 1) {
  184 + return this->stream_dict.getJSON(json_version);
  185 + }
  186 + return getStreamJSON(json_version, qpdf_sj_none, qpdf_dl_none, nullptr, "");
  187 +}
  188 +
  189 +JSON
  190 +QPDF_Stream::getStreamJSON(
  191 + int json_version,
  192 + qpdf_stream_data_json_e json_data,
  193 + qpdf_stream_decode_level_e decode_level,
  194 + Pipeline* p,
  195 + std::string const& data_filename)
  196 +{
  197 + switch (json_data) {
  198 + case qpdf_sj_none:
  199 + case qpdf_sj_inline:
  200 + if (p != nullptr) {
  201 + throw std::logic_error("QPDF_Stream::getStreamJSON: pipline should "
  202 + "only be suppiled json_data is file");
  203 + }
  204 + break;
  205 + case qpdf_sj_file:
  206 + if (p == nullptr) {
  207 + throw std::logic_error("QPDF_Stream::getStreamJSON: pipline must "
  208 + "be be suppiled json_data is file");
  209 + }
  210 + if (data_filename.empty()) {
  211 + throw std::logic_error("QPDF_Stream::getStreamJSON: data_filename "
  212 + "must be supplied when json_data is file");
  213 + }
  214 + break;
  215 + }
  216 +
  217 + auto dict = this->stream_dict;
  218 + JSON result = JSON::makeDictionary();
  219 + if (json_data != qpdf_sj_none) {
  220 + std::shared_ptr<Buffer> buf;
  221 + bool filtered = false;
  222 + bool filter = (decode_level != qpdf_dl_none);
  223 + for (int attempt = 1; attempt <= 2; ++attempt) {
  224 + Pl_Discard discard;
  225 + std::shared_ptr<Pl_Buffer> buf_pl;
  226 + Pipeline* data_pipeline = nullptr;
  227 + if (json_data == qpdf_sj_file) {
  228 + // We need to capture the data to write
  229 + buf_pl = std::make_shared<Pl_Buffer>("stream data");
  230 + data_pipeline = buf_pl.get();
  231 + } else {
  232 + data_pipeline = &discard;
  233 + }
  234 + filtered = pipeStreamData(
  235 + data_pipeline, nullptr, 0, decode_level, false, (attempt == 1));
  236 + if (filter && (!filtered)) {
  237 + // Try again
  238 + filter = false;
  239 + } else {
  240 + if (buf_pl.get()) {
  241 + buf = buf_pl->getBufferSharedPointer();
  242 + }
  243 + break;
  244 + }
  245 + }
  246 + // We can use unsafeShallowCopy because we are only
  247 + // touching top-level keys.
  248 + dict = this->stream_dict.unsafeShallowCopy();
  249 + dict.removeKey("/Length");
  250 + if (filtered) {
  251 + dict.removeKey("/Filter");
  252 + dict.removeKey("/DecodeParms");
  253 + }
  254 + if (json_data == qpdf_sj_file) {
  255 + result.addDictionaryMember(
  256 + "datafile", JSON::makeString(data_filename));
  257 + if (!buf.get()) {
  258 + throw std::logic_error(
  259 + "QPDF_Stream: failed to get stream data in json file mode");
  260 + }
  261 + p->write(buf->getBuffer(), buf->getSize());
  262 + } else if (json_data == qpdf_sj_inline) {
  263 + result.addDictionaryMember(
  264 + "data", JSON::makeBlob(StreamBlobProvider(this, decode_level)));
  265 + } else {
  266 + throw std::logic_error(
  267 + "QPDF_Stream: unexpected value of json_data");
  268 + }
  269 + }
  270 + result.addDictionaryMember("dict", dict.getJSON(json_version));
  271 + return result;
158 } 272 }
159 273
160 QPDFObject::object_type_e 274 QPDFObject::object_type_e
libqpdf/qpdf/QPDF_Stream.hh
@@ -61,6 +61,12 @@ class QPDF_Stream: public QPDFObject @@ -61,6 +61,12 @@ class QPDF_Stream: public QPDFObject
61 QPDFObjectHandle const& decode_parms); 61 QPDFObjectHandle const& decode_parms);
62 void 62 void
63 addTokenFilter(std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter); 63 addTokenFilter(std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter);
  64 + JSON getStreamJSON(
  65 + int json_version,
  66 + qpdf_stream_data_json_e json_data,
  67 + qpdf_stream_decode_level_e decode_level,
  68 + Pipeline* p,
  69 + std::string const& data_filename);
64 70
65 void replaceDict(QPDFObjectHandle const& new_dict); 71 void replaceDict(QPDFObjectHandle const& new_dict);
66 72
qpdf/qtest/qpdf/direct-pages-json-objects.out
@@ -49,7 +49,9 @@ @@ -49,7 +49,9 @@
49 "/Type": "/Pages" 49 "/Type": "/Pages"
50 }, 50 },
51 "3 0 R": { 51 "3 0 R": {
52 - "/Length": "4 0 R" 52 + "dict": {
  53 + "/Length": "4 0 R"
  54 + }
53 }, 55 },
54 "4 0 R": 44, 56 "4 0 R": 44,
55 "5 0 R": { 57 "5 0 R": {
qpdf/qtest/qpdf/direct-pages-json-pages.out
@@ -39,7 +39,9 @@ @@ -39,7 +39,9 @@
39 "/Type": "/Pages" 39 "/Type": "/Pages"
40 }, 40 },
41 "3 0 R": { 41 "3 0 R": {
42 - "/Length": "4 0 R" 42 + "dict": {
  43 + "/Length": "4 0 R"
  44 + }
43 }, 45 },
44 "4 0 R": 44, 46 "4 0 R": 44,
45 "5 0 R": { 47 "5 0 R": {
qpdf/qtest/qpdf/page_api_2-json-objects.out
@@ -62,7 +62,9 @@ @@ -62,7 +62,9 @@
62 "/Type": "/Page" 62 "/Type": "/Page"
63 }, 63 },
64 "6 0 R": { 64 "6 0 R": {
65 - "/Length": "7 0 R" 65 + "dict": {
  66 + "/Length": "7 0 R"
  67 + }
66 }, 68 },
67 "7 0 R": 47, 69 "7 0 R": 47,
68 "8 0 R": { 70 "8 0 R": {
@@ -72,7 +74,9 @@ @@ -72,7 +74,9 @@
72 "/Type": "/Font" 74 "/Type": "/Font"
73 }, 75 },
74 "9 0 R": { 76 "9 0 R": {
75 - "/Length": "10 0 R" 77 + "dict": {
  78 + "/Length": "10 0 R"
  79 + }
76 }, 80 },
77 "10 0 R": 47, 81 "10 0 R": 47,
78 "trailer": { 82 "trailer": {
qpdf/qtest/qpdf/page_api_2-json-pages.out
@@ -94,7 +94,9 @@ @@ -94,7 +94,9 @@
94 "/Type": "/Page" 94 "/Type": "/Page"
95 }, 95 },
96 "6 0 R": { 96 "6 0 R": {
97 - "/Length": "7 0 R" 97 + "dict": {
  98 + "/Length": "7 0 R"
  99 + }
98 }, 100 },
99 "7 0 R": 47, 101 "7 0 R": 47,
100 "8 0 R": { 102 "8 0 R": {
@@ -104,7 +106,9 @@ @@ -104,7 +106,9 @@
104 "/Type": "/Font" 106 "/Type": "/Font"
105 }, 107 },
106 "9 0 R": { 108 "9 0 R": {
107 - "/Length": "10 0 R" 109 + "dict": {
  110 + "/Length": "10 0 R"
  111 + }
108 }, 112 },
109 "10 0 R": 47, 113 "10 0 R": 47,
110 "11 0 R": { 114 "11 0 R": {