Commit 0fe8d4476205c97e402e555aac41a88e70e3e9b2
1 parent
63c7eefe
Support stream data -- not tested
There are no automated tests yet, but committing work so far in preparation for some refactoring.
Showing
5 changed files
with
77 additions
and
18 deletions
TODO
| ... | ... | @@ -54,14 +54,14 @@ Soon: Break ground on "Document-level work" |
| 54 | 54 | Output JSON v2 |
| 55 | 55 | ============== |
| 56 | 56 | |
| 57 | -XXX | |
| 58 | - | |
| 59 | 57 | * Reread from perspective of update |
| 60 | 58 | * Test all ignore cases with QTC |
| 61 | 59 | * Test case of correct file with dict before data/datafile |
| 62 | 60 | * Have a test case if possible that exercises the object description |
| 63 | 61 | which means we need some kind of semantic error that gets caught |
| 64 | 62 | after creation. |
| 63 | +* Test invalid data, invalid data file | |
| 64 | +* Tests: round-trip through json, round-trip through qpdf --qdf | |
| 65 | 65 | |
| 66 | 66 | Try to never flatten pages tree. Make sure we do something reasonable |
| 67 | 67 | with pages tree repair. The problem is that if pages tree repair is |
| ... | ... | @@ -236,6 +236,11 @@ Other documentation fodder: |
| 236 | 236 | |
| 237 | 237 | You can't create a PDF from v1 json because |
| 238 | 238 | |
| 239 | +* Change: names are written in canonical form with a leading slash | |
| 240 | + just as they are treated in the code. In v1, they were written in | |
| 241 | + PDF syntax in the json file. Example: /text#2fplain in pdf will be | |
| 242 | + written as /text/plain in json v2 and as /text#2fplain in json v1. | |
| 243 | + | |
| 239 | 244 | * The PDF version header is not recorded |
| 240 | 245 | |
| 241 | 246 | * Strings cannot be unambiguously encoded/decoded | ... | ... |
include/qpdf/QPDF.hh
| ... | ... | @@ -998,7 +998,8 @@ class QPDF |
| 998 | 998 | class JSONReactor: public JSON::Reactor |
| 999 | 999 | { |
| 1000 | 1000 | public: |
| 1001 | - JSONReactor(QPDF&, std::string const& filename, bool must_be_complete); | |
| 1001 | + JSONReactor( | |
| 1002 | + QPDF&, std::shared_ptr<InputSource> is, bool must_be_complete); | |
| 1002 | 1003 | virtual ~JSONReactor() = default; |
| 1003 | 1004 | virtual void dictionaryStart() override; |
| 1004 | 1005 | virtual void arrayStart() override; |
| ... | ... | @@ -1033,7 +1034,7 @@ class QPDF |
| 1033 | 1034 | QPDFObjectHandle to_replace, QPDFObjectHandle replacement); |
| 1034 | 1035 | |
| 1035 | 1036 | QPDF& pdf; |
| 1036 | - std::string filename; | |
| 1037 | + std::shared_ptr<InputSource> is; | |
| 1037 | 1038 | bool must_be_complete; |
| 1038 | 1039 | bool errors; |
| 1039 | 1040 | bool parse_error; | ... | ... |
libqpdf/QPDF_Dictionary.cc
| ... | ... | @@ -37,9 +37,10 @@ QPDF_Dictionary::getJSON(int json_version) |
| 37 | 37 | JSON j = JSON::makeDictionary(); |
| 38 | 38 | for (auto& iter: this->items) { |
| 39 | 39 | if (!iter.second.isNull()) { |
| 40 | - j.addDictionaryMember( | |
| 41 | - QPDF_Name::normalizeName(iter.first), | |
| 42 | - iter.second.getJSON(json_version)); | |
| 40 | + std::string key = | |
| 41 | + (json_version == 1 ? QPDF_Name::normalizeName(iter.first) | |
| 42 | + : iter.first); | |
| 43 | + j.addDictionaryMember(key, iter.second.getJSON(json_version)); | |
| 43 | 44 | } |
| 44 | 45 | } |
| 45 | 46 | return j; | ... | ... |
libqpdf/QPDF_Name.cc
| ... | ... | @@ -42,7 +42,11 @@ QPDF_Name::unparse() |
| 42 | 42 | JSON |
| 43 | 43 | QPDF_Name::getJSON(int json_version) |
| 44 | 44 | { |
| 45 | - return JSON::makeString(normalizeName(this->name)); | |
| 45 | + if (json_version == 1) { | |
| 46 | + return JSON::makeString(normalizeName(this->name)); | |
| 47 | + } else { | |
| 48 | + return JSON::makeString(this->name); | |
| 49 | + } | |
| 46 | 50 | } |
| 47 | 51 | |
| 48 | 52 | QPDFObject::object_type_e | ... | ... |
libqpdf/QPDF_json.cc
| 1 | 1 | #include <qpdf/QPDF.hh> |
| 2 | 2 | |
| 3 | 3 | #include <qpdf/FileInputSource.hh> |
| 4 | +#include <qpdf/Pl_Base64.hh> | |
| 4 | 5 | #include <qpdf/QIntC.hh> |
| 5 | 6 | #include <qpdf/QTC.hh> |
| 6 | 7 | #include <qpdf/QUtil.hh> |
| 8 | +#include <algorithm> | |
| 7 | 9 | #include <regex> |
| 8 | 10 | |
| 9 | 11 | // This chart shows an example of the state transitions that would |
| ... | ... | @@ -52,17 +54,40 @@ static char const* JSON_PDF = ( |
| 52 | 54 | "9\n" |
| 53 | 55 | "%%EOF\n"); |
| 54 | 56 | |
| 57 | +// Note use of [\\s\\S] rather than . to match any character since . | |
| 58 | +// doesn't match newlines. | |
| 55 | 59 | static std::regex PDF_VERSION_RE("^\\d+\\.\\d+$"); |
| 56 | 60 | static std::regex OBJ_KEY_RE("^obj:(\\d+) (\\d+) R$"); |
| 57 | 61 | static std::regex INDIRECT_OBJ_RE("^(\\d+) (\\d+) R$"); |
| 58 | -static std::regex UNICODE_RE("^u:(.*)$"); | |
| 62 | +static std::regex UNICODE_RE("^u:([\\s\\S]*)$"); | |
| 59 | 63 | static std::regex BINARY_RE("^b:((?:[0-9a-fA-F]{2})*)$"); |
| 60 | -static std::regex NAME_RE("^/.*$"); | |
| 64 | +static std::regex NAME_RE("^/[\\s\\S]*$"); | |
| 65 | + | |
| 66 | +static std::function<void(Pipeline*)> | |
| 67 | +provide_data(std::shared_ptr<InputSource> is, size_t start, size_t end) | |
| 68 | +{ | |
| 69 | + return [is, start, end](Pipeline* p) { | |
| 70 | + Pl_Base64 decode("base64-decode", p, Pl_Base64::a_decode); | |
| 71 | + p = &decode; | |
| 72 | + size_t bytes = end - start; | |
| 73 | + char buf[8192]; | |
| 74 | + is->seek(QIntC::to_offset(start), SEEK_SET); | |
| 75 | + size_t len = 0; | |
| 76 | + while ((len = is->read(buf, std::min(bytes, sizeof(buf)))) > 0) { | |
| 77 | + p->write(buf, len); | |
| 78 | + bytes -= len; | |
| 79 | + if (bytes == 0) { | |
| 80 | + break; | |
| 81 | + } | |
| 82 | + } | |
| 83 | + decode.finish(); | |
| 84 | + }; | |
| 85 | +} | |
| 61 | 86 | |
| 62 | 87 | QPDF::JSONReactor::JSONReactor( |
| 63 | - QPDF& pdf, std::string const& filename, bool must_be_complete) : | |
| 88 | + QPDF& pdf, std::shared_ptr<InputSource> is, bool must_be_complete) : | |
| 64 | 89 | pdf(pdf), |
| 65 | - filename(filename), | |
| 90 | + is(is), | |
| 66 | 91 | must_be_complete(must_be_complete), |
| 67 | 92 | errors(false), |
| 68 | 93 | parse_error(false), |
| ... | ... | @@ -334,8 +359,6 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) |
| 334 | 359 | replacement = |
| 335 | 360 | pdf.reserveStream(tos.getObjectID(), tos.getGeneration()); |
| 336 | 361 | replaceObject(tos, replacement); |
| 337 | - replacement.replaceStreamData( | |
| 338 | - "", "<<>>"_qpdf, "<<>>"_qpdf); // QXXXQ | |
| 339 | 362 | } |
| 340 | 363 | } else { |
| 341 | 364 | // Ignore unknown keys for forward compatibility |
| ... | ... | @@ -369,6 +392,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) |
| 369 | 392 | throw std::logic_error("no object on stack in st_stream"); |
| 370 | 393 | } |
| 371 | 394 | auto tos = object_stack.back(); |
| 395 | + auto uninitialized = QPDFObjectHandle(); | |
| 372 | 396 | if (!tos.isStream()) { |
| 373 | 397 | // QXXXQ QTC in update mode |
| 374 | 398 | error(value.getStart(), "this object is not a stream"); |
| ... | ... | @@ -388,10 +412,33 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) |
| 388 | 412 | } |
| 389 | 413 | } else if (key == "data") { |
| 390 | 414 | this->saw_data = true; |
| 391 | - // QXXXQ | |
| 415 | + std::string v; | |
| 416 | + if (!value.getString(v)) { | |
| 417 | + error(value.getStart(), "\"stream.data\" must be a string"); | |
| 418 | + } else { | |
| 419 | + // The range includes the quotes. | |
| 420 | + auto start = value.getStart() + 1; | |
| 421 | + auto end = value.getEnd() - 1; | |
| 422 | + if (end < start) { | |
| 423 | + throw std::logic_error("QPDF_json: JSON string length < 0"); | |
| 424 | + } | |
| 425 | + tos.replaceStreamData( | |
| 426 | + provide_data(is, start, end), uninitialized, uninitialized); | |
| 427 | + } | |
| 392 | 428 | } else if (key == "datafile") { |
| 393 | 429 | this->saw_datafile = true; |
| 394 | - // QXXXQ | |
| 430 | + std::string filename; | |
| 431 | + if (value.getString(filename)) { | |
| 432 | + tos.replaceStreamData( | |
| 433 | + QUtil::file_provider(filename), | |
| 434 | + uninitialized, | |
| 435 | + uninitialized); | |
| 436 | + } else { | |
| 437 | + error( | |
| 438 | + value.getStart(), | |
| 439 | + "\"stream.datafile\" must be a string containing a file " | |
| 440 | + "name"); | |
| 441 | + } | |
| 395 | 442 | } else { |
| 396 | 443 | // Ignore unknown keys for forward compatibility. |
| 397 | 444 | // QXXXQ QTC |
| ... | ... | @@ -471,7 +518,8 @@ QPDF::JSONReactor::makeObject(JSON const& value) |
| 471 | 518 | // QXXXQ include object number in description |
| 472 | 519 | result.setObjectDescription( |
| 473 | 520 | &this->pdf, |
| 474 | - this->filename + " offset " + QUtil::uint_to_string(value.getStart())); | |
| 521 | + this->is->getName() + " offset " + | |
| 522 | + QUtil::uint_to_string(value.getStart())); | |
| 475 | 523 | return result; |
| 476 | 524 | } |
| 477 | 525 | |
| ... | ... | @@ -503,7 +551,7 @@ QPDF::updateFromJSON(std::shared_ptr<InputSource> is) |
| 503 | 551 | void |
| 504 | 552 | QPDF::importJSON(std::shared_ptr<InputSource> is, bool must_be_complete) |
| 505 | 553 | { |
| 506 | - JSONReactor reactor(*this, is->getName(), must_be_complete); | |
| 554 | + JSONReactor reactor(*this, is, must_be_complete); | |
| 507 | 555 | try { |
| 508 | 556 | JSON::parse(*is, &reactor); |
| 509 | 557 | } catch (std::runtime_error& e) { | ... | ... |