Commit 0fe8d4476205c97e402e555aac41a88e70e3e9b2

Authored by Jay Berkenbilt
1 parent 63c7eefe

Support stream data -- not tested

There are no automated tests yet, but committing work so far in
preparation for some refactoring.
@@ -54,14 +54,14 @@ Soon: Break ground on "Document-level work" @@ -54,14 +54,14 @@ Soon: Break ground on "Document-level work"
54 Output JSON v2 54 Output JSON v2
55 ============== 55 ==============
56 56
57 -XXX  
58 -  
59 * Reread from perspective of update 57 * Reread from perspective of update
60 * Test all ignore cases with QTC 58 * Test all ignore cases with QTC
61 * Test case of correct file with dict before data/datafile 59 * Test case of correct file with dict before data/datafile
62 * Have a test case if possible that exercises the object description 60 * Have a test case if possible that exercises the object description
63 which means we need some kind of semantic error that gets caught 61 which means we need some kind of semantic error that gets caught
64 after creation. 62 after creation.
  63 +* Test invalid data, invalid data file
  64 +* Tests: round-trip through json, round-trip through qpdf --qdf
65 65
66 Try to never flatten pages tree. Make sure we do something reasonable 66 Try to never flatten pages tree. Make sure we do something reasonable
67 with pages tree repair. The problem is that if pages tree repair is 67 with pages tree repair. The problem is that if pages tree repair is
@@ -236,6 +236,11 @@ Other documentation fodder: @@ -236,6 +236,11 @@ Other documentation fodder:
236 236
237 You can't create a PDF from v1 json because 237 You can't create a PDF from v1 json because
238 238
  239 +* Change: names are written in canonical form with a leading slash
  240 + just as they are treated in the code. In v1, they were written in
  241 + PDF syntax in the json file. Example: /text#2fplain in pdf will be
  242 + written as /text/plain in json v2 and as /text#2fplain in json v1.
  243 +
239 * The PDF version header is not recorded 244 * The PDF version header is not recorded
240 245
241 * Strings cannot be unambiguously encoded/decoded 246 * Strings cannot be unambiguously encoded/decoded
include/qpdf/QPDF.hh
@@ -998,7 +998,8 @@ class QPDF @@ -998,7 +998,8 @@ class QPDF
998 class JSONReactor: public JSON::Reactor 998 class JSONReactor: public JSON::Reactor
999 { 999 {
1000 public: 1000 public:
1001 - JSONReactor(QPDF&, std::string const& filename, bool must_be_complete); 1001 + JSONReactor(
  1002 + QPDF&, std::shared_ptr<InputSource> is, bool must_be_complete);
1002 virtual ~JSONReactor() = default; 1003 virtual ~JSONReactor() = default;
1003 virtual void dictionaryStart() override; 1004 virtual void dictionaryStart() override;
1004 virtual void arrayStart() override; 1005 virtual void arrayStart() override;
@@ -1033,7 +1034,7 @@ class QPDF @@ -1033,7 +1034,7 @@ class QPDF
1033 QPDFObjectHandle to_replace, QPDFObjectHandle replacement); 1034 QPDFObjectHandle to_replace, QPDFObjectHandle replacement);
1034 1035
1035 QPDF& pdf; 1036 QPDF& pdf;
1036 - std::string filename; 1037 + std::shared_ptr<InputSource> is;
1037 bool must_be_complete; 1038 bool must_be_complete;
1038 bool errors; 1039 bool errors;
1039 bool parse_error; 1040 bool parse_error;
libqpdf/QPDF_Dictionary.cc
@@ -37,9 +37,10 @@ QPDF_Dictionary::getJSON(int json_version) @@ -37,9 +37,10 @@ QPDF_Dictionary::getJSON(int json_version)
37 JSON j = JSON::makeDictionary(); 37 JSON j = JSON::makeDictionary();
38 for (auto& iter: this->items) { 38 for (auto& iter: this->items) {
39 if (!iter.second.isNull()) { 39 if (!iter.second.isNull()) {
40 - j.addDictionaryMember(  
41 - QPDF_Name::normalizeName(iter.first),  
42 - iter.second.getJSON(json_version)); 40 + std::string key =
  41 + (json_version == 1 ? QPDF_Name::normalizeName(iter.first)
  42 + : iter.first);
  43 + j.addDictionaryMember(key, iter.second.getJSON(json_version));
43 } 44 }
44 } 45 }
45 return j; 46 return j;
libqpdf/QPDF_Name.cc
@@ -42,7 +42,11 @@ QPDF_Name::unparse() @@ -42,7 +42,11 @@ QPDF_Name::unparse()
42 JSON 42 JSON
43 QPDF_Name::getJSON(int json_version) 43 QPDF_Name::getJSON(int json_version)
44 { 44 {
45 - return JSON::makeString(normalizeName(this->name)); 45 + if (json_version == 1) {
  46 + return JSON::makeString(normalizeName(this->name));
  47 + } else {
  48 + return JSON::makeString(this->name);
  49 + }
46 } 50 }
47 51
48 QPDFObject::object_type_e 52 QPDFObject::object_type_e
libqpdf/QPDF_json.cc
1 #include <qpdf/QPDF.hh> 1 #include <qpdf/QPDF.hh>
2 2
3 #include <qpdf/FileInputSource.hh> 3 #include <qpdf/FileInputSource.hh>
  4 +#include <qpdf/Pl_Base64.hh>
4 #include <qpdf/QIntC.hh> 5 #include <qpdf/QIntC.hh>
5 #include <qpdf/QTC.hh> 6 #include <qpdf/QTC.hh>
6 #include <qpdf/QUtil.hh> 7 #include <qpdf/QUtil.hh>
  8 +#include <algorithm>
7 #include <regex> 9 #include <regex>
8 10
9 // This chart shows an example of the state transitions that would 11 // This chart shows an example of the state transitions that would
@@ -52,17 +54,40 @@ static char const* JSON_PDF = ( @@ -52,17 +54,40 @@ static char const* JSON_PDF = (
52 "9\n" 54 "9\n"
53 "%%EOF\n"); 55 "%%EOF\n");
54 56
  57 +// Note use of [\\s\\S] rather than . to match any character since .
  58 +// doesn't match newlines.
55 static std::regex PDF_VERSION_RE("^\\d+\\.\\d+$"); 59 static std::regex PDF_VERSION_RE("^\\d+\\.\\d+$");
56 static std::regex OBJ_KEY_RE("^obj:(\\d+) (\\d+) R$"); 60 static std::regex OBJ_KEY_RE("^obj:(\\d+) (\\d+) R$");
57 static std::regex INDIRECT_OBJ_RE("^(\\d+) (\\d+) R$"); 61 static std::regex INDIRECT_OBJ_RE("^(\\d+) (\\d+) R$");
58 -static std::regex UNICODE_RE("^u:(.*)$"); 62 +static std::regex UNICODE_RE("^u:([\\s\\S]*)$");
59 static std::regex BINARY_RE("^b:((?:[0-9a-fA-F]{2})*)$"); 63 static std::regex BINARY_RE("^b:((?:[0-9a-fA-F]{2})*)$");
60 -static std::regex NAME_RE("^/.*$"); 64 +static std::regex NAME_RE("^/[\\s\\S]*$");
  65 +
  66 +static std::function<void(Pipeline*)>
  67 +provide_data(std::shared_ptr<InputSource> is, size_t start, size_t end)
  68 +{
  69 + return [is, start, end](Pipeline* p) {
  70 + Pl_Base64 decode("base64-decode", p, Pl_Base64::a_decode);
  71 + p = &decode;
  72 + size_t bytes = end - start;
  73 + char buf[8192];
  74 + is->seek(QIntC::to_offset(start), SEEK_SET);
  75 + size_t len = 0;
  76 + while ((len = is->read(buf, std::min(bytes, sizeof(buf)))) > 0) {
  77 + p->write(buf, len);
  78 + bytes -= len;
  79 + if (bytes == 0) {
  80 + break;
  81 + }
  82 + }
  83 + decode.finish();
  84 + };
  85 +}
61 86
62 QPDF::JSONReactor::JSONReactor( 87 QPDF::JSONReactor::JSONReactor(
63 - QPDF& pdf, std::string const& filename, bool must_be_complete) : 88 + QPDF& pdf, std::shared_ptr<InputSource> is, bool must_be_complete) :
64 pdf(pdf), 89 pdf(pdf),
65 - filename(filename), 90 + is(is),
66 must_be_complete(must_be_complete), 91 must_be_complete(must_be_complete),
67 errors(false), 92 errors(false),
68 parse_error(false), 93 parse_error(false),
@@ -334,8 +359,6 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value) @@ -334,8 +359,6 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
334 replacement = 359 replacement =
335 pdf.reserveStream(tos.getObjectID(), tos.getGeneration()); 360 pdf.reserveStream(tos.getObjectID(), tos.getGeneration());
336 replaceObject(tos, replacement); 361 replaceObject(tos, replacement);
337 - replacement.replaceStreamData(  
338 - "", "<<>>"_qpdf, "<<>>"_qpdf); // QXXXQ  
339 } 362 }
340 } else { 363 } else {
341 // Ignore unknown keys for forward compatibility 364 // Ignore unknown keys for forward compatibility
@@ -369,6 +392,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value) @@ -369,6 +392,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
369 throw std::logic_error("no object on stack in st_stream"); 392 throw std::logic_error("no object on stack in st_stream");
370 } 393 }
371 auto tos = object_stack.back(); 394 auto tos = object_stack.back();
  395 + auto uninitialized = QPDFObjectHandle();
372 if (!tos.isStream()) { 396 if (!tos.isStream()) {
373 // QXXXQ QTC in update mode 397 // QXXXQ QTC in update mode
374 error(value.getStart(), "this object is not a stream"); 398 error(value.getStart(), "this object is not a stream");
@@ -388,10 +412,33 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value) @@ -388,10 +412,33 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
388 } 412 }
389 } else if (key == "data") { 413 } else if (key == "data") {
390 this->saw_data = true; 414 this->saw_data = true;
391 - // QXXXQ 415 + std::string v;
  416 + if (!value.getString(v)) {
  417 + error(value.getStart(), "\"stream.data\" must be a string");
  418 + } else {
  419 + // The range includes the quotes.
  420 + auto start = value.getStart() + 1;
  421 + auto end = value.getEnd() - 1;
  422 + if (end < start) {
  423 + throw std::logic_error("QPDF_json: JSON string length < 0");
  424 + }
  425 + tos.replaceStreamData(
  426 + provide_data(is, start, end), uninitialized, uninitialized);
  427 + }
392 } else if (key == "datafile") { 428 } else if (key == "datafile") {
393 this->saw_datafile = true; 429 this->saw_datafile = true;
394 - // QXXXQ 430 + std::string filename;
  431 + if (value.getString(filename)) {
  432 + tos.replaceStreamData(
  433 + QUtil::file_provider(filename),
  434 + uninitialized,
  435 + uninitialized);
  436 + } else {
  437 + error(
  438 + value.getStart(),
  439 + "\"stream.datafile\" must be a string containing a file "
  440 + "name");
  441 + }
395 } else { 442 } else {
396 // Ignore unknown keys for forward compatibility. 443 // Ignore unknown keys for forward compatibility.
397 // QXXXQ QTC 444 // QXXXQ QTC
@@ -471,7 +518,8 @@ QPDF::JSONReactor::makeObject(JSON const&amp; value) @@ -471,7 +518,8 @@ QPDF::JSONReactor::makeObject(JSON const&amp; value)
471 // QXXXQ include object number in description 518 // QXXXQ include object number in description
472 result.setObjectDescription( 519 result.setObjectDescription(
473 &this->pdf, 520 &this->pdf,
474 - this->filename + " offset " + QUtil::uint_to_string(value.getStart())); 521 + this->is->getName() + " offset " +
  522 + QUtil::uint_to_string(value.getStart()));
475 return result; 523 return result;
476 } 524 }
477 525
@@ -503,7 +551,7 @@ QPDF::updateFromJSON(std::shared_ptr&lt;InputSource&gt; is) @@ -503,7 +551,7 @@ QPDF::updateFromJSON(std::shared_ptr&lt;InputSource&gt; is)
503 void 551 void
504 QPDF::importJSON(std::shared_ptr<InputSource> is, bool must_be_complete) 552 QPDF::importJSON(std::shared_ptr<InputSource> is, bool must_be_complete)
505 { 553 {
506 - JSONReactor reactor(*this, is->getName(), must_be_complete); 554 + JSONReactor reactor(*this, is, must_be_complete);
507 try { 555 try {
508 JSON::parse(*is, &reactor); 556 JSON::parse(*is, &reactor);
509 } catch (std::runtime_error& e) { 557 } catch (std::runtime_error& e) {