Commit 1bc8abfdd3eb9b5a6af5d274c85cd1708bdb9e0c

Authored by Jay Berkenbilt
1 parent 3246923c

Implement JSON v2 for Stream

Not fully exercised in this commit
... ... @@ -63,6 +63,8 @@ General things to remember:
63 63  
64 64 * Remember typo: search for "Typo" In QPDFJob::doJSONEncrypt.
65 65  
  66 +* Test stream with invalid data
  67 +
66 68 * Consider using camelCase in multi-word key names to be consistent
67 69 with job JSON and with how JSON is often represented in languages
68 70 that use it more natively.
... ...
include/qpdf/Constants.h
... ... @@ -99,6 +99,12 @@ enum qpdf_stream_decode_level_e {
99 99 qpdf_dl_specialized, /* also decode other non-lossy filters */
100 100 qpdf_dl_all /* also decode lossy filters */
101 101 };
  102 +/* For JSON encoding */
  103 +enum qpdf_stream_data_json_e {
  104 + qpdf_sj_none = 0,
  105 + qpdf_sj_inline,
  106 + qpdf_sj_file,
  107 +};
102 108  
103 109 /* R3 Encryption Parameters */
104 110  
... ...
include/qpdf/QPDFObjectHandle.hh
... ... @@ -1339,8 +1339,8 @@ class QPDFObjectHandle
1339 1339 // unambiguous. The getStreamJSON() call can be used to add
1340 1340 // encoding of the stream's data.
1341 1341 // * Object types that are only valid in content streams (inline
1342   - // image, operator) as well as "reserved" objects are not
1343   - // representable and will be serialized as "null".
  1342 + // image, operator) are serialized as "null". Attempting to
  1343 + // serialize a "reserved" object is an error.
1344 1344 // If dereference_indirect is true and this is an indirect object,
1345 1345 // show the actual contents of the object. The effect of
1346 1346 // dereference_indirect applies only to this object. It is not
... ... @@ -1350,9 +1350,42 @@ class QPDFObjectHandle
1350 1350  
1351 1351 // Deprecated version uses v1 for backward compatibility.
1352 1352 // ABI: remove for qpdf 12
1353   - [[deprecated("Use getJSON(int version)")]]
1354   - QPDF_DLL
1355   - JSON getJSON(bool dereference_indirect = false);
  1353 + [[deprecated("Use getJSON(int version)")]] QPDF_DLL JSON
  1354 + getJSON(bool dereference_indirect = false);
  1355 +
  1356 + // This method can be called on a stream to get a more extended
  1357 + // JSON representation of the stream that includes the stream's
  1358 + // data. The JSON object returned is always a dictionary whose
  1359 + // "dict" key is an encoding of the stream's dictionary. The
  1360 + // representation of the data is determined by the json_data
  1361 + // field.
  1362 + //
  1363 + // The json_data field may have the value qpdf_sj_none,
  1364 + // qpdf_sj_inline, or qpdf_sj_file.
  1365 + //
  1366 + // If json_data is qpdf_sj_none, stream data is not represented.
  1367 + //
  1368 + // If json_data is qpdf_sj_inline or qpdf_sj_file, then stream
  1369 + // data is filtered or not based on the value of decode_level,
  1370 + // which has the same meaning as with pipeStreamData.
  1371 + //
  1372 + // If json_data is qpdf_sj_inline, the base64-encoded stream data
  1373 + // is included in the "data" field of the dictionary that is
  1374 + // returned.
  1375 + //
  1376 + // If json_data is qpdf_sj_file, then the Pipeline ("p") and
  1377 + // data_filename argument must be supplied. The value of
  1378 + // data_filename is stored in the resulting json in the "datafile"
  1379 + // key but is not otherwise use. The stream data itself (raw or
  1380 + // filtered depending on decode level), is written to the
  1381 + // pipeline via pipeStreamData().
  1382 + QPDF_DLL
  1383 + JSON getStreamJSON(
  1384 + int json_version,
  1385 + qpdf_stream_data_json_e json_data,
  1386 + qpdf_stream_decode_level_e decode_level,
  1387 + Pipeline* p,
  1388 + std::string const& data_filename);
1356 1389  
1357 1390 // Legacy helper methods for commonly performed operations on
1358 1391 // pages. Newer code should use QPDFPageObjectHelper instead. The
... ...
libqpdf/QPDFObjectHandle.cc
... ... @@ -1797,6 +1797,19 @@ QPDFObjectHandle::getJSON(int json_version, bool dereference_indirect)
1797 1797 }
1798 1798 }
1799 1799  
  1800 +JSON
  1801 +QPDFObjectHandle::getStreamJSON(
  1802 + int json_version,
  1803 + qpdf_stream_data_json_e json_data,
  1804 + qpdf_stream_decode_level_e decode_level,
  1805 + Pipeline* p,
  1806 + std::string const& data_filename)
  1807 +{
  1808 + assertStream();
  1809 + return dynamic_cast<QPDF_Stream*>(obj.get())->getStreamJSON(
  1810 + json_version, json_data, decode_level, p, data_filename);
  1811 +}
  1812 +
1800 1813 QPDFObjectHandle
1801 1814 QPDFObjectHandle::wrapInArray()
1802 1815 {
... ...
libqpdf/QPDF_Stream.cc
... ... @@ -2,8 +2,10 @@
2 2  
3 3 #include <qpdf/ContentNormalizer.hh>
4 4 #include <qpdf/Pipeline.hh>
  5 +#include <qpdf/Pl_Base64.hh>
5 6 #include <qpdf/Pl_Buffer.hh>
6 7 #include <qpdf/Pl_Count.hh>
  8 +#include <qpdf/Pl_Discard.hh>
7 9 #include <qpdf/Pl_Flate.hh>
8 10 #include <qpdf/Pl_QPDFTokenizer.hh>
9 11 #include <qpdf/QIntC.hh>
... ... @@ -54,6 +56,18 @@ namespace
54 56 return nullptr;
55 57 }
56 58 };
  59 +
  60 + class StreamBlobProvider
  61 + {
  62 + public:
  63 + StreamBlobProvider(
  64 + QPDF_Stream* stream, qpdf_stream_decode_level_e decode_level);
  65 + void operator()(Pipeline*);
  66 +
  67 + private:
  68 + QPDF_Stream* stream;
  69 + qpdf_stream_decode_level_e decode_level;
  70 + };
57 71 } // namespace
58 72  
59 73 std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = {
... ... @@ -81,6 +95,19 @@ std::map&lt;std::string, std::function&lt;std::shared_ptr&lt;QPDFStreamFilter&gt;()&gt;&gt;
81 95 {"/ASCIIHexDecode", SF_ASCIIHexDecode::factory},
82 96 };
83 97  
  98 +StreamBlobProvider::StreamBlobProvider(
  99 + QPDF_Stream* stream, qpdf_stream_decode_level_e decode_level) :
  100 + stream(stream),
  101 + decode_level(decode_level)
  102 +{
  103 +}
  104 +
  105 +void
  106 +StreamBlobProvider::operator()(Pipeline* p)
  107 +{
  108 + this->stream->pipeStreamData(p, nullptr, 0, decode_level, false, false);
  109 +}
  110 +
84 111 QPDF_Stream::QPDF_Stream(
85 112 QPDF* qpdf,
86 113 int objid,
... ... @@ -153,8 +180,95 @@ QPDF_Stream::unparse()
153 180 JSON
154 181 QPDF_Stream::getJSON(int json_version)
155 182 {
156   - // QXXXQ
157   - return this->stream_dict.getJSON(json_version);
  183 + if (json_version == 1) {
  184 + return this->stream_dict.getJSON(json_version);
  185 + }
  186 + return getStreamJSON(json_version, qpdf_sj_none, qpdf_dl_none, nullptr, "");
  187 +}
  188 +
  189 +JSON
  190 +QPDF_Stream::getStreamJSON(
  191 + int json_version,
  192 + qpdf_stream_data_json_e json_data,
  193 + qpdf_stream_decode_level_e decode_level,
  194 + Pipeline* p,
  195 + std::string const& data_filename)
  196 +{
  197 + switch (json_data) {
  198 + case qpdf_sj_none:
  199 + case qpdf_sj_inline:
  200 + if (p != nullptr) {
  201 + throw std::logic_error("QPDF_Stream::getStreamJSON: pipline should "
  202 + "only be suppiled json_data is file");
  203 + }
  204 + break;
  205 + case qpdf_sj_file:
  206 + if (p == nullptr) {
  207 + throw std::logic_error("QPDF_Stream::getStreamJSON: pipline must "
  208 + "be be suppiled json_data is file");
  209 + }
  210 + if (data_filename.empty()) {
  211 + throw std::logic_error("QPDF_Stream::getStreamJSON: data_filename "
  212 + "must be supplied when json_data is file");
  213 + }
  214 + break;
  215 + }
  216 +
  217 + auto dict = this->stream_dict;
  218 + JSON result = JSON::makeDictionary();
  219 + if (json_data != qpdf_sj_none) {
  220 + std::shared_ptr<Buffer> buf;
  221 + bool filtered = false;
  222 + bool filter = (decode_level != qpdf_dl_none);
  223 + for (int attempt = 1; attempt <= 2; ++attempt) {
  224 + Pl_Discard discard;
  225 + std::shared_ptr<Pl_Buffer> buf_pl;
  226 + Pipeline* data_pipeline = nullptr;
  227 + if (json_data == qpdf_sj_file) {
  228 + // We need to capture the data to write
  229 + buf_pl = std::make_shared<Pl_Buffer>("stream data");
  230 + data_pipeline = buf_pl.get();
  231 + } else {
  232 + data_pipeline = &discard;
  233 + }
  234 + filtered = pipeStreamData(
  235 + data_pipeline, nullptr, 0, decode_level, false, (attempt == 1));
  236 + if (filter && (!filtered)) {
  237 + // Try again
  238 + filter = false;
  239 + } else {
  240 + if (buf_pl.get()) {
  241 + buf = buf_pl->getBufferSharedPointer();
  242 + }
  243 + break;
  244 + }
  245 + }
  246 + // We can use unsafeShallowCopy because we are only
  247 + // touching top-level keys.
  248 + dict = this->stream_dict.unsafeShallowCopy();
  249 + dict.removeKey("/Length");
  250 + if (filtered) {
  251 + dict.removeKey("/Filter");
  252 + dict.removeKey("/DecodeParms");
  253 + }
  254 + if (json_data == qpdf_sj_file) {
  255 + result.addDictionaryMember(
  256 + "datafile", JSON::makeString(data_filename));
  257 + if (!buf.get()) {
  258 + throw std::logic_error(
  259 + "QPDF_Stream: failed to get stream data in json file mode");
  260 + }
  261 + p->write(buf->getBuffer(), buf->getSize());
  262 + } else if (json_data == qpdf_sj_inline) {
  263 + result.addDictionaryMember(
  264 + "data", JSON::makeBlob(StreamBlobProvider(this, decode_level)));
  265 + } else {
  266 + throw std::logic_error(
  267 + "QPDF_Stream: unexpected value of json_data");
  268 + }
  269 + }
  270 + result.addDictionaryMember("dict", dict.getJSON(json_version));
  271 + return result;
158 272 }
159 273  
160 274 QPDFObject::object_type_e
... ...
libqpdf/qpdf/QPDF_Stream.hh
... ... @@ -61,6 +61,12 @@ class QPDF_Stream: public QPDFObject
61 61 QPDFObjectHandle const& decode_parms);
62 62 void
63 63 addTokenFilter(std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter);
  64 + JSON getStreamJSON(
  65 + int json_version,
  66 + qpdf_stream_data_json_e json_data,
  67 + qpdf_stream_decode_level_e decode_level,
  68 + Pipeline* p,
  69 + std::string const& data_filename);
64 70  
65 71 void replaceDict(QPDFObjectHandle const& new_dict);
66 72  
... ...
qpdf/qtest/qpdf/direct-pages-json-objects.out
... ... @@ -49,7 +49,9 @@
49 49 "/Type": "/Pages"
50 50 },
51 51 "3 0 R": {
52   - "/Length": "4 0 R"
  52 + "dict": {
  53 + "/Length": "4 0 R"
  54 + }
53 55 },
54 56 "4 0 R": 44,
55 57 "5 0 R": {
... ...
qpdf/qtest/qpdf/direct-pages-json-pages.out
... ... @@ -39,7 +39,9 @@
39 39 "/Type": "/Pages"
40 40 },
41 41 "3 0 R": {
42   - "/Length": "4 0 R"
  42 + "dict": {
  43 + "/Length": "4 0 R"
  44 + }
43 45 },
44 46 "4 0 R": 44,
45 47 "5 0 R": {
... ...
qpdf/qtest/qpdf/page_api_2-json-objects.out
... ... @@ -62,7 +62,9 @@
62 62 "/Type": "/Page"
63 63 },
64 64 "6 0 R": {
65   - "/Length": "7 0 R"
  65 + "dict": {
  66 + "/Length": "7 0 R"
  67 + }
66 68 },
67 69 "7 0 R": 47,
68 70 "8 0 R": {
... ... @@ -72,7 +74,9 @@
72 74 "/Type": "/Font"
73 75 },
74 76 "9 0 R": {
75   - "/Length": "10 0 R"
  77 + "dict": {
  78 + "/Length": "10 0 R"
  79 + }
76 80 },
77 81 "10 0 R": 47,
78 82 "trailer": {
... ...
qpdf/qtest/qpdf/page_api_2-json-pages.out
... ... @@ -94,7 +94,9 @@
94 94 "/Type": "/Page"
95 95 },
96 96 "6 0 R": {
97   - "/Length": "7 0 R"
  97 + "dict": {
  98 + "/Length": "7 0 R"
  99 + }
98 100 },
99 101 "7 0 R": 47,
100 102 "8 0 R": {
... ... @@ -104,7 +106,9 @@
104 106 "/Type": "/Font"
105 107 },
106 108 "9 0 R": {
107   - "/Length": "10 0 R"
  109 + "dict": {
  110 + "/Length": "10 0 R"
  111 + }
108 112 },
109 113 "10 0 R": 47,
110 114 "11 0 R": {
... ...