Commit 7f65a5c21f83df31f3732532f0d836f32c982d67

Authored by Jay Berkenbilt
1 parent a3c99803

Test json against schema only on demand

Testing json against schema requires an in-memory copy, so do it only
when requested by the test suite.
@@ -50,18 +50,6 @@ Output JSON v2 @@ -50,18 +50,6 @@ Output JSON v2
50 50
51 Before starting on v2 format: 51 Before starting on v2 format:
52 52
53 -* Some if not all of the json output functionality should move from  
54 - QPDFJob to QPDF. There can top-level QPDF methods that take a  
55 - pipeline and write the JSON serialization to it. For things that  
56 - generate smaller amounts of output (constant-size stuff, lists of  
57 - attachments), we can also have a version that returns a string. For  
58 - the benefit of users of other languages, we can have something that  
59 - takes a FILE* or writes to stdout as well. This would be a good time  
60 - to make sure all the information from --check and other  
61 - informational options (--show-linearization, --show-encryption,  
62 - --show-xref, --list-attachments, --show-npages) is available in the  
63 - json output.  
64 -  
65 * Writing objects should write in numerical order with the trailer at 53 * Writing objects should write in numerical order with the trailer at
66 the end. 54 the end.
67 55
@@ -70,15 +58,18 @@ Before starting on v2 format: @@ -70,15 +58,18 @@ Before starting on v2 format:
70 the input), not by overwriting, in case this has any unwanted side 58 the input), not by overwriting, in case this has any unwanted side
71 effects. 59 effects.
72 60
73 -* Figure out how/whether to do schema checks with incremental write.  
74 - Consider changing the contract to allow fields to be absent even  
75 - when present in the schema. It's reasonable for people to check for  
76 - presence of a key. Most languages make this easy to do.  
77 -  
78 General things to remember: 61 General things to remember:
79 62
80 * deprecate getJSON without a version 63 * deprecate getJSON without a version
81 64
  65 +* Make sure all the information from --check and other informational
  66 + options (--show-linearization, --show-encryption, --show-xref,
  67 + --list-attachments, --show-npages) is available in the json output.
  68 +
  69 +* Consider changing the contract to allow fields to be absent even
  70 + when present in the schema. It's reasonable for people to check for
  71 + presence of a key. Most languages make this easy to do.
  72 +
82 * The choices for json_key (job.yml) will be different for v1 and v2. 73 * The choices for json_key (job.yml) will be different for v1 and v2.
83 That information is already duplicated in multiple places. 74 That information is already duplicated in multiple places.
84 75
job.sums
@@ -14,4 +14,4 @@ libqpdf/qpdf/auto_job_json_decl.hh 06caa46eaf71db8a50c046f91866baa8087745a947431 @@ -14,4 +14,4 @@ libqpdf/qpdf/auto_job_json_decl.hh 06caa46eaf71db8a50c046f91866baa8087745a947431
14 libqpdf/qpdf/auto_job_json_init.hh e7047a7c83737adfaae49abc295a579bb9b9e0a4644e911d1656a604cb202208 14 libqpdf/qpdf/auto_job_json_init.hh e7047a7c83737adfaae49abc295a579bb9b9e0a4644e911d1656a604cb202208
15 libqpdf/qpdf/auto_job_schema.hh cbbcae166cfecbdbdeb40c5a30870e03604a019a8b4f7a217d554a82431d2e5f 15 libqpdf/qpdf/auto_job_schema.hh cbbcae166cfecbdbdeb40c5a30870e03604a019a8b4f7a217d554a82431d2e5f
16 manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580 16 manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580
17 -manual/cli.rst 8a5a12351df6a42f91d6d271b2d065a843d8daa5125d8723d474e4180d7abbf1 17 +manual/cli.rst 8684ca1f601f2832cded52d1b2f74730f97b7b85b57e31a399231731fbe80d26
libqpdf/QPDFJob.cc
@@ -16,6 +16,7 @@ @@ -16,6 +16,7 @@
16 #include <qpdf/Pl_Flate.hh> 16 #include <qpdf/Pl_Flate.hh>
17 #include <qpdf/Pl_OStream.hh> 17 #include <qpdf/Pl_OStream.hh>
18 #include <qpdf/Pl_StdioFile.hh> 18 #include <qpdf/Pl_StdioFile.hh>
  19 +#include <qpdf/Pl_String.hh>
19 #include <qpdf/QTC.hh> 20 #include <qpdf/QTC.hh>
20 #include <qpdf/QUtil.hh> 21 #include <qpdf/QUtil.hh>
21 22
@@ -1592,6 +1593,13 @@ QPDFJob::json_out_schema_v1() @@ -1592,6 +1593,13 @@ QPDFJob::json_out_schema_v1()
1592 void 1593 void
1593 QPDFJob::doJSON(QPDF& pdf, Pipeline* p) 1594 QPDFJob::doJSON(QPDF& pdf, Pipeline* p)
1594 { 1595 {
  1596 + std::string captured_json;
  1597 + std::shared_ptr<Pl_String> pl_str;
  1598 + if (this->m->test_json_schema) {
  1599 + pl_str = std::make_shared<Pl_String>("capture json", p, captured_json);
  1600 + p = pl_str.get();
  1601 + }
  1602 +
1595 JSON j = JSON::makeDictionary(); 1603 JSON j = JSON::makeDictionary();
1596 // This version is updated every time a non-backward-compatible 1604 // This version is updated every time a non-backward-compatible
1597 // change is made to the JSON format. Clients of the JSON are to 1605 // change is made to the JSON format. Clients of the JSON are to
@@ -1651,23 +1659,22 @@ QPDFJob::doJSON(QPDF&amp; pdf, Pipeline* p) @@ -1651,23 +1659,22 @@ QPDFJob::doJSON(QPDF&amp; pdf, Pipeline* p)
1651 doJSONObjectinfo(pdf, j); 1659 doJSONObjectinfo(pdf, j);
1652 } 1660 }
1653 1661
1654 - // Check against schema 1662 + *p << j.unparse() << "\n";
1655 1663
1656 - JSON schema = json_schema(&m->json_keys);  
1657 - std::list<std::string> errors;  
1658 - if (!j.checkSchema(schema, errors)) {  
1659 - *(this->m->cerr)  
1660 - << "QPDFJob didn't create JSON that complies with its own rules.\n\  
1661 -Please report this as a bug at\n\  
1662 - https://github.com/qpdf/qpdf/issues/new\n\  
1663 -ideally with the file that caused the error and the output below. Thanks!\n\  
1664 -\n";  
1665 - for (auto const& error: errors) {  
1666 - *(this->m->cerr) << error << std::endl; 1664 + if (this->m->test_json_schema) {
  1665 + // Check against schema
  1666 + JSON schema = json_schema(&m->json_keys);
  1667 + std::list<std::string> errors;
  1668 + JSON captured = JSON::parse(captured_json);
  1669 + if (!captured.checkSchema(schema, errors)) {
  1670 + *(this->m->cerr) << "QPDFJob didn't create JSON that complies with"
  1671 + " its own rules."
  1672 + << std::endl;
  1673 + for (auto const& error: errors) {
  1674 + *(this->m->cerr) << error << std::endl;
  1675 + }
1667 } 1676 }
1668 } 1677 }
1669 -  
1670 - *p << j.unparse() << "\n";  
1671 } 1678 }
1672 1679
1673 void 1680 void
manual/cli.rst
@@ -3336,6 +3336,9 @@ Related Options @@ -3336,6 +3336,9 @@ Related Options
3336 3336
3337 This is used by qpdf's test suite to check consistency between the 3337 This is used by qpdf's test suite to check consistency between the
3338 output of ``qpdf --json`` and the output of ``qpdf --json-help``. 3338 output of ``qpdf --json`` and the output of ``qpdf --json-help``.
  3339 + This option causes an extra copy of the generated json to appear in
  3340 + memory and is therefore unsuitable for use with large files. This
  3341 + is why it's also not on by default.
3339 3342
3340 .. _unicode-passwords: 3343 .. _unicode-passwords:
3341 3344
qpdf/qtest/qpdf.test
@@ -1124,7 +1124,8 @@ foreach my $d (@json_files) @@ -1124,7 +1124,8 @@ foreach my $d (@json_files)
1124 } 1124 }
1125 my $in = "$file.pdf"; 1125 my $in = "$file.pdf";
1126 $td->runtest("json $out", 1126 $td->runtest("json $out",
1127 - {$td->COMMAND => ['qpdf', '--json', @$xargs, $in]}, 1127 + {$td->COMMAND =>
  1128 + ['qpdf', '--json=1', '--test-json-schema', @$xargs, $in]},
1128 {$td->FILE => "$out.out", $td->EXIT_STATUS => 0}, 1129 {$td->FILE => "$out.out", $td->EXIT_STATUS => 0},
1129 $td->NORMALIZE_NEWLINES); 1130 $td->NORMALIZE_NEWLINES);
1130 } 1131 }