Commit 0fe8d4476205c97e402e555aac41a88e70e3e9b2
1 parent
63c7eefe
Support stream data -- not tested
There are no automated tests yet, but committing work so far in preparation for some refactoring.
Showing
5 changed files
with
77 additions
and
18 deletions
TODO
| @@ -54,14 +54,14 @@ Soon: Break ground on "Document-level work" | @@ -54,14 +54,14 @@ Soon: Break ground on "Document-level work" | ||
| 54 | Output JSON v2 | 54 | Output JSON v2 |
| 55 | ============== | 55 | ============== |
| 56 | 56 | ||
| 57 | -XXX | ||
| 58 | - | ||
| 59 | * Reread from perspective of update | 57 | * Reread from perspective of update |
| 60 | * Test all ignore cases with QTC | 58 | * Test all ignore cases with QTC |
| 61 | * Test case of correct file with dict before data/datafile | 59 | * Test case of correct file with dict before data/datafile |
| 62 | * Have a test case if possible that exercises the object description | 60 | * Have a test case if possible that exercises the object description |
| 63 | which means we need some kind of semantic error that gets caught | 61 | which means we need some kind of semantic error that gets caught |
| 64 | after creation. | 62 | after creation. |
| 63 | +* Test invalid data, invalid data file | ||
| 64 | +* Tests: round-trip through json, round-trip through qpdf --qdf | ||
| 65 | 65 | ||
| 66 | Try to never flatten pages tree. Make sure we do something reasonable | 66 | Try to never flatten pages tree. Make sure we do something reasonable |
| 67 | with pages tree repair. The problem is that if pages tree repair is | 67 | with pages tree repair. The problem is that if pages tree repair is |
| @@ -236,6 +236,11 @@ Other documentation fodder: | @@ -236,6 +236,11 @@ Other documentation fodder: | ||
| 236 | 236 | ||
| 237 | You can't create a PDF from v1 json because | 237 | You can't create a PDF from v1 json because |
| 238 | 238 | ||
| 239 | +* Change: names are written in canonical form with a leading slash | ||
| 240 | + just as they are treated in the code. In v1, they were written in | ||
| 241 | + PDF syntax in the json file. Example: /text#2fplain in pdf will be | ||
| 242 | + written as /text/plain in json v2 and as /text#2fplain in json v1. | ||
| 243 | + | ||
| 239 | * The PDF version header is not recorded | 244 | * The PDF version header is not recorded |
| 240 | 245 | ||
| 241 | * Strings cannot be unambiguously encoded/decoded | 246 | * Strings cannot be unambiguously encoded/decoded |
include/qpdf/QPDF.hh
| @@ -998,7 +998,8 @@ class QPDF | @@ -998,7 +998,8 @@ class QPDF | ||
| 998 | class JSONReactor: public JSON::Reactor | 998 | class JSONReactor: public JSON::Reactor |
| 999 | { | 999 | { |
| 1000 | public: | 1000 | public: |
| 1001 | - JSONReactor(QPDF&, std::string const& filename, bool must_be_complete); | 1001 | + JSONReactor( |
| 1002 | + QPDF&, std::shared_ptr<InputSource> is, bool must_be_complete); | ||
| 1002 | virtual ~JSONReactor() = default; | 1003 | virtual ~JSONReactor() = default; |
| 1003 | virtual void dictionaryStart() override; | 1004 | virtual void dictionaryStart() override; |
| 1004 | virtual void arrayStart() override; | 1005 | virtual void arrayStart() override; |
| @@ -1033,7 +1034,7 @@ class QPDF | @@ -1033,7 +1034,7 @@ class QPDF | ||
| 1033 | QPDFObjectHandle to_replace, QPDFObjectHandle replacement); | 1034 | QPDFObjectHandle to_replace, QPDFObjectHandle replacement); |
| 1034 | 1035 | ||
| 1035 | QPDF& pdf; | 1036 | QPDF& pdf; |
| 1036 | - std::string filename; | 1037 | + std::shared_ptr<InputSource> is; |
| 1037 | bool must_be_complete; | 1038 | bool must_be_complete; |
| 1038 | bool errors; | 1039 | bool errors; |
| 1039 | bool parse_error; | 1040 | bool parse_error; |
libqpdf/QPDF_Dictionary.cc
| @@ -37,9 +37,10 @@ QPDF_Dictionary::getJSON(int json_version) | @@ -37,9 +37,10 @@ QPDF_Dictionary::getJSON(int json_version) | ||
| 37 | JSON j = JSON::makeDictionary(); | 37 | JSON j = JSON::makeDictionary(); |
| 38 | for (auto& iter: this->items) { | 38 | for (auto& iter: this->items) { |
| 39 | if (!iter.second.isNull()) { | 39 | if (!iter.second.isNull()) { |
| 40 | - j.addDictionaryMember( | ||
| 41 | - QPDF_Name::normalizeName(iter.first), | ||
| 42 | - iter.second.getJSON(json_version)); | 40 | + std::string key = |
| 41 | + (json_version == 1 ? QPDF_Name::normalizeName(iter.first) | ||
| 42 | + : iter.first); | ||
| 43 | + j.addDictionaryMember(key, iter.second.getJSON(json_version)); | ||
| 43 | } | 44 | } |
| 44 | } | 45 | } |
| 45 | return j; | 46 | return j; |
libqpdf/QPDF_Name.cc
| @@ -42,7 +42,11 @@ QPDF_Name::unparse() | @@ -42,7 +42,11 @@ QPDF_Name::unparse() | ||
| 42 | JSON | 42 | JSON |
| 43 | QPDF_Name::getJSON(int json_version) | 43 | QPDF_Name::getJSON(int json_version) |
| 44 | { | 44 | { |
| 45 | - return JSON::makeString(normalizeName(this->name)); | 45 | + if (json_version == 1) { |
| 46 | + return JSON::makeString(normalizeName(this->name)); | ||
| 47 | + } else { | ||
| 48 | + return JSON::makeString(this->name); | ||
| 49 | + } | ||
| 46 | } | 50 | } |
| 47 | 51 | ||
| 48 | QPDFObject::object_type_e | 52 | QPDFObject::object_type_e |
libqpdf/QPDF_json.cc
| 1 | #include <qpdf/QPDF.hh> | 1 | #include <qpdf/QPDF.hh> |
| 2 | 2 | ||
| 3 | #include <qpdf/FileInputSource.hh> | 3 | #include <qpdf/FileInputSource.hh> |
| 4 | +#include <qpdf/Pl_Base64.hh> | ||
| 4 | #include <qpdf/QIntC.hh> | 5 | #include <qpdf/QIntC.hh> |
| 5 | #include <qpdf/QTC.hh> | 6 | #include <qpdf/QTC.hh> |
| 6 | #include <qpdf/QUtil.hh> | 7 | #include <qpdf/QUtil.hh> |
| 8 | +#include <algorithm> | ||
| 7 | #include <regex> | 9 | #include <regex> |
| 8 | 10 | ||
| 9 | // This chart shows an example of the state transitions that would | 11 | // This chart shows an example of the state transitions that would |
| @@ -52,17 +54,40 @@ static char const* JSON_PDF = ( | @@ -52,17 +54,40 @@ static char const* JSON_PDF = ( | ||
| 52 | "9\n" | 54 | "9\n" |
| 53 | "%%EOF\n"); | 55 | "%%EOF\n"); |
| 54 | 56 | ||
| 57 | +// Note use of [\\s\\S] rather than . to match any character since . | ||
| 58 | +// doesn't match newlines. | ||
| 55 | static std::regex PDF_VERSION_RE("^\\d+\\.\\d+$"); | 59 | static std::regex PDF_VERSION_RE("^\\d+\\.\\d+$"); |
| 56 | static std::regex OBJ_KEY_RE("^obj:(\\d+) (\\d+) R$"); | 60 | static std::regex OBJ_KEY_RE("^obj:(\\d+) (\\d+) R$"); |
| 57 | static std::regex INDIRECT_OBJ_RE("^(\\d+) (\\d+) R$"); | 61 | static std::regex INDIRECT_OBJ_RE("^(\\d+) (\\d+) R$"); |
| 58 | -static std::regex UNICODE_RE("^u:(.*)$"); | 62 | +static std::regex UNICODE_RE("^u:([\\s\\S]*)$"); |
| 59 | static std::regex BINARY_RE("^b:((?:[0-9a-fA-F]{2})*)$"); | 63 | static std::regex BINARY_RE("^b:((?:[0-9a-fA-F]{2})*)$"); |
| 60 | -static std::regex NAME_RE("^/.*$"); | 64 | +static std::regex NAME_RE("^/[\\s\\S]*$"); |
| 65 | + | ||
| 66 | +static std::function<void(Pipeline*)> | ||
| 67 | +provide_data(std::shared_ptr<InputSource> is, size_t start, size_t end) | ||
| 68 | +{ | ||
| 69 | + return [is, start, end](Pipeline* p) { | ||
| 70 | + Pl_Base64 decode("base64-decode", p, Pl_Base64::a_decode); | ||
| 71 | + p = &decode; | ||
| 72 | + size_t bytes = end - start; | ||
| 73 | + char buf[8192]; | ||
| 74 | + is->seek(QIntC::to_offset(start), SEEK_SET); | ||
| 75 | + size_t len = 0; | ||
| 76 | + while ((len = is->read(buf, std::min(bytes, sizeof(buf)))) > 0) { | ||
| 77 | + p->write(buf, len); | ||
| 78 | + bytes -= len; | ||
| 79 | + if (bytes == 0) { | ||
| 80 | + break; | ||
| 81 | + } | ||
| 82 | + } | ||
| 83 | + decode.finish(); | ||
| 84 | + }; | ||
| 85 | +} | ||
| 61 | 86 | ||
| 62 | QPDF::JSONReactor::JSONReactor( | 87 | QPDF::JSONReactor::JSONReactor( |
| 63 | - QPDF& pdf, std::string const& filename, bool must_be_complete) : | 88 | + QPDF& pdf, std::shared_ptr<InputSource> is, bool must_be_complete) : |
| 64 | pdf(pdf), | 89 | pdf(pdf), |
| 65 | - filename(filename), | 90 | + is(is), |
| 66 | must_be_complete(must_be_complete), | 91 | must_be_complete(must_be_complete), |
| 67 | errors(false), | 92 | errors(false), |
| 68 | parse_error(false), | 93 | parse_error(false), |
| @@ -334,8 +359,6 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) | @@ -334,8 +359,6 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) | ||
| 334 | replacement = | 359 | replacement = |
| 335 | pdf.reserveStream(tos.getObjectID(), tos.getGeneration()); | 360 | pdf.reserveStream(tos.getObjectID(), tos.getGeneration()); |
| 336 | replaceObject(tos, replacement); | 361 | replaceObject(tos, replacement); |
| 337 | - replacement.replaceStreamData( | ||
| 338 | - "", "<<>>"_qpdf, "<<>>"_qpdf); // QXXXQ | ||
| 339 | } | 362 | } |
| 340 | } else { | 363 | } else { |
| 341 | // Ignore unknown keys for forward compatibility | 364 | // Ignore unknown keys for forward compatibility |
| @@ -369,6 +392,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) | @@ -369,6 +392,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) | ||
| 369 | throw std::logic_error("no object on stack in st_stream"); | 392 | throw std::logic_error("no object on stack in st_stream"); |
| 370 | } | 393 | } |
| 371 | auto tos = object_stack.back(); | 394 | auto tos = object_stack.back(); |
| 395 | + auto uninitialized = QPDFObjectHandle(); | ||
| 372 | if (!tos.isStream()) { | 396 | if (!tos.isStream()) { |
| 373 | // QXXXQ QTC in update mode | 397 | // QXXXQ QTC in update mode |
| 374 | error(value.getStart(), "this object is not a stream"); | 398 | error(value.getStart(), "this object is not a stream"); |
| @@ -388,10 +412,33 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) | @@ -388,10 +412,33 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) | ||
| 388 | } | 412 | } |
| 389 | } else if (key == "data") { | 413 | } else if (key == "data") { |
| 390 | this->saw_data = true; | 414 | this->saw_data = true; |
| 391 | - // QXXXQ | 415 | + std::string v; |
| 416 | + if (!value.getString(v)) { | ||
| 417 | + error(value.getStart(), "\"stream.data\" must be a string"); | ||
| 418 | + } else { | ||
| 419 | + // The range includes the quotes. | ||
| 420 | + auto start = value.getStart() + 1; | ||
| 421 | + auto end = value.getEnd() - 1; | ||
| 422 | + if (end < start) { | ||
| 423 | + throw std::logic_error("QPDF_json: JSON string length < 0"); | ||
| 424 | + } | ||
| 425 | + tos.replaceStreamData( | ||
| 426 | + provide_data(is, start, end), uninitialized, uninitialized); | ||
| 427 | + } | ||
| 392 | } else if (key == "datafile") { | 428 | } else if (key == "datafile") { |
| 393 | this->saw_datafile = true; | 429 | this->saw_datafile = true; |
| 394 | - // QXXXQ | 430 | + std::string filename; |
| 431 | + if (value.getString(filename)) { | ||
| 432 | + tos.replaceStreamData( | ||
| 433 | + QUtil::file_provider(filename), | ||
| 434 | + uninitialized, | ||
| 435 | + uninitialized); | ||
| 436 | + } else { | ||
| 437 | + error( | ||
| 438 | + value.getStart(), | ||
| 439 | + "\"stream.datafile\" must be a string containing a file " | ||
| 440 | + "name"); | ||
| 441 | + } | ||
| 395 | } else { | 442 | } else { |
| 396 | // Ignore unknown keys for forward compatibility. | 443 | // Ignore unknown keys for forward compatibility. |
| 397 | // QXXXQ QTC | 444 | // QXXXQ QTC |
| @@ -471,7 +518,8 @@ QPDF::JSONReactor::makeObject(JSON const& value) | @@ -471,7 +518,8 @@ QPDF::JSONReactor::makeObject(JSON const& value) | ||
| 471 | // QXXXQ include object number in description | 518 | // QXXXQ include object number in description |
| 472 | result.setObjectDescription( | 519 | result.setObjectDescription( |
| 473 | &this->pdf, | 520 | &this->pdf, |
| 474 | - this->filename + " offset " + QUtil::uint_to_string(value.getStart())); | 521 | + this->is->getName() + " offset " + |
| 522 | + QUtil::uint_to_string(value.getStart())); | ||
| 475 | return result; | 523 | return result; |
| 476 | } | 524 | } |
| 477 | 525 | ||
| @@ -503,7 +551,7 @@ QPDF::updateFromJSON(std::shared_ptr<InputSource> is) | @@ -503,7 +551,7 @@ QPDF::updateFromJSON(std::shared_ptr<InputSource> is) | ||
| 503 | void | 551 | void |
| 504 | QPDF::importJSON(std::shared_ptr<InputSource> is, bool must_be_complete) | 552 | QPDF::importJSON(std::shared_ptr<InputSource> is, bool must_be_complete) |
| 505 | { | 553 | { |
| 506 | - JSONReactor reactor(*this, is->getName(), must_be_complete); | 554 | + JSONReactor reactor(*this, is, must_be_complete); |
| 507 | try { | 555 | try { |
| 508 | JSON::parse(*is, &reactor); | 556 | JSON::parse(*is, &reactor); |
| 509 | } catch (std::runtime_error& e) { | 557 | } catch (std::runtime_error& e) { |