Commit 5f4224f31a500452a4f97f36ed57351b41ca0114

Authored by Jay Berkenbilt
1 parent 80acfc38

Simplify --json-output

Now --json-output just changes defaults. Allow output file with --json.
... ... @@ -69,46 +69,11 @@ Soon: Break ground on "Document-level work"
69 69 JSON v2 fixes
70 70 =============
71 71  
72   -* Unify code between QPDFJob::doJSONObjects and QPDF::writeJSON. Make
73   - sure that the "qpdf" key is always present when json-output is
74   - specified.
75   -
76   -* Change the name of the "qpdf-v2" key to "qpdf". Use that in place of
77   - "objects" and change its content to a two-element array whose first
78   - element is metadata required (or useful) for parsing and whose
79   - second element contains the actual data. Use of an array is the only
80   - way to ensure that the metadata is guaranteed to be parsed before we
81   - start parsing the objects. Example:
82   -
83   - {
84   - "qpdf": [
85   - {
86   - "jsonversion": 2,
87   - "pdfversion": "1.3",
88   - "pushedinheritedpageresources": false,
89   - "calledgetallpages": false,
90   - "maxobjectid": 10
91   - },
92   - {
93   - ... objects ...
94   - }
95   - ]
96   - }
97   -
98   - This implies a few things:
99   -
100   - * Still need to test pushedinheritedpageresources and
101   - calledgetallpages and check/use their values when reading
102   -
103   - * Fix --json-help
104   -
105   - * When reading back in, we'll have to call
106   - pushInheritedAttributesToPage or getAllPages based on the values
107   - of the metadata.
108   -
109   - * Test --json with --json-stream-data and --json-output with
110   - --json-stream-data=none. Recheck writeJSON's handling of the
111   - pipeline argument.
  72 +* Rethink QPDF::writeJSON. Maybe provide a simpler overload?
  73 +
  74 +* When reading back in, we'll have to call
  75 + pushInheritedAttributesToPage or getAllPages based on the values
  76 + of the metadata.
112 77  
113 78 * Support json v2 in the C API. At a minimum, write_json,
114 79 create_from_json, and update_from_json need to be there and should
... ...
include/qpdf/QPDF.hh
... ... @@ -133,7 +133,7 @@ class QPDF
133 133 QPDF_DLL
134 134 void updateFromJSON(std::shared_ptr<InputSource>);
135 135  
136   - // Write qpdf json format to the pipeline "p". The only supported
  136 + // Write qpdf JSON format to the pipeline "p". The only supported
137 137 // version is 2.
138 138 //
139 139 // If the value of "complete" is true, a complete JSON object
... ...
include/qpdf/QPDFJob.hh
... ... @@ -554,7 +554,7 @@ class QPDFJob
554 554 void setEncryptionOptions(QPDF&, QPDFWriter&);
555 555 void maybeFixWritePassword(int R, std::string& password);
556 556 void writeOutfile(QPDF& pdf);
557   - void writeJSON(Pipeline* p, QPDF& pdf, bool complete, bool& first_key);
  557 + void writeJSON(QPDF& pdf);
558 558  
559 559 // JSON
560 560 void doJSON(QPDF& pdf, Pipeline*);
... ...
job.sums
... ... @@ -8,10 +8,10 @@ include/qpdf/auto_job_c_pages.hh b3cc0f21029f6d89efa043dcdbfa183cb59325b6506001c
8 8 include/qpdf/auto_job_c_uo.hh ae21b69a1efa9333050f4833d465f6daff87e5b38e5106e49bbef5d4132e4ed1
9 9 job.yml f9564f18b08a45d17328af43652645771d3498471820c858b8c9013a193e1412
10 10 libqpdf/qpdf/auto_job_decl.hh 7844eba58edffb9494b19e8eca6fd59a24d6e152ca606c3b07da569f753df2da
11   -libqpdf/qpdf/auto_job_help.hh db2e4350c700e064b204e3e20d4fee4eddfe312b28092afcf608b4b6863d30e5
  11 +libqpdf/qpdf/auto_job_help.hh 700d7600b34588169c80f3e325e39e592e2f5c1af1cdac16614150ff38424b40
12 12 libqpdf/qpdf/auto_job_init.hh fd1635a5ad6ba16b7ae008467145560a59a5ecfd10d29c5ef7cd0d8347747cd2
13 13 libqpdf/qpdf/auto_job_json_decl.hh 06caa46eaf71db8a50c046f91866baa8087745a9474319fb7c86d92634cc8297
14 14 libqpdf/qpdf/auto_job_json_init.hh 59545578a2e47c660ff98516ed53f06638be75eb4658e2a09d32cc08e0cb7268
15   -libqpdf/qpdf/auto_job_schema.hh 9d543cd4a43eafffc2c4b8a6fee29e399c271c52cb6f7d417ae5497b3c1127dc
  15 +libqpdf/qpdf/auto_job_schema.hh 5352ef1be1ad7cc6f4f36dab88f2937d278e6bd3a0e2d46259794dc226c8ba6b
16 16 manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580
17   -manual/cli.rst 8e1f443c6fa000e023e516c318df4d04d58233d4d8648907c4a71f0ea5722bca
  17 +manual/cli.rst bbce4cfb662a96c8df0c8563f8065844b77aca7b4ec6385955546b9a455d9953
... ...
libqpdf/QPDFJob.cc
... ... @@ -680,8 +680,15 @@ QPDFJob::checkConfiguration()
680 680 " an output file is specified");
681 681 } else if (m->split_pages) {
682 682 usage("--split-pages may not be used with --replace-input");
  683 + } else if (m->json_version) {
  684 + usage("--json may not be used with --replace-input");
683 685 }
684 686 }
  687 + if (m->json_version && (m->outfilename == nullptr)) {
  688 + // The output file is optional with --json for backward
  689 + // compatibility and defaults to standard output.
  690 + m->outfilename = QUtil::make_shared_cstr("-");
  691 + }
685 692 if (m->infilename == nullptr) {
686 693 usage("an input file name is required");
687 694 } else if (
... ... @@ -1116,25 +1123,47 @@ QPDFJob::doJSONObject(
1116 1123 void
1117 1124 QPDFJob::doJSONObjects(Pipeline* p, bool& first, QPDF& pdf)
1118 1125 {
1119   - JSON::writeDictionaryKey(p, first, "objects", 1);
1120   - bool first_object = true;
1121   - JSON::writeDictionaryOpen(p, first_object, 1);
1122   - bool all_objects = m->json_objects.empty();
1123   - std::set<QPDFObjGen> wanted_og = getWantedJSONObjects();
1124   - for (auto& obj: pdf.getAllObjects()) {
1125   - std::string key = obj.unparse();
1126   - if (this->m->json_version > 1) {
1127   - key = "obj:" + key;
  1126 + if (m->json_version == 1) {
  1127 + JSON::writeDictionaryKey(p, first, "objects", 1);
  1128 + bool first_object = true;
  1129 + JSON::writeDictionaryOpen(p, first_object, 1);
  1130 + bool all_objects = m->json_objects.empty();
  1131 + std::set<QPDFObjGen> wanted_og = getWantedJSONObjects();
  1132 + for (auto& obj: pdf.getAllObjects()) {
  1133 + std::string key = obj.unparse();
  1134 + if (this->m->json_version > 1) {
  1135 + key = "obj:" + key;
  1136 + }
  1137 + if (all_objects || wanted_og.count(obj.getObjGen())) {
  1138 + doJSONObject(p, first_object, key, obj);
  1139 + }
1128 1140 }
1129   - if (all_objects || wanted_og.count(obj.getObjGen())) {
1130   - doJSONObject(p, first_object, key, obj);
  1141 + if (all_objects || m->json_objects.count("trailer")) {
  1142 + auto trailer = pdf.getTrailer();
  1143 + doJSONObject(p, first_object, "trailer", trailer);
1131 1144 }
  1145 + JSON::writeDictionaryClose(p, first_object, 1);
  1146 + } else {
  1147 + std::set<std::string> json_objects;
  1148 + if (this->m->json_objects.count("trailer")) {
  1149 + json_objects.insert("trailer");
  1150 + }
  1151 + auto wanted = getWantedJSONObjects();
  1152 + for (auto const& og: wanted) {
  1153 + std::ostringstream s;
  1154 + s << "obj:" << og.unparse(' ') << " R";
  1155 + json_objects.insert(s.str());
  1156 + }
  1157 + pdf.writeJSON(
  1158 + this->m->json_version,
  1159 + p,
  1160 + false,
  1161 + first,
  1162 + this->m->decode_level,
  1163 + this->m->json_stream_data,
  1164 + this->m->json_stream_prefix,
  1165 + json_objects);
1132 1166 }
1133   - if (all_objects || m->json_objects.count("trailer")) {
1134   - auto trailer = pdf.getTrailer();
1135   - doJSONObject(p, first_object, "trailer", trailer);
1136   - }
1137   - JSON::writeDictionaryClose(p, first_object, 1);
1138 1167 }
1139 1168  
1140 1169 void
... ... @@ -1777,7 +1806,7 @@ void
1777 1806 QPDFJob::doJSON(QPDF& pdf, Pipeline* p)
1778 1807 {
1779 1808 // qpdf guarantees that no new top-level keys whose names start
1780   - // with "xdata" will be added. These are reserved for users.
  1809 + // with "x-" will be added. These are reserved for users.
1781 1810  
1782 1811 std::string captured_json;
1783 1812 std::shared_ptr<Pl_String> pl_str;
... ... @@ -1788,32 +1817,38 @@ QPDFJob::doJSON(QPDF&amp; pdf, Pipeline* p)
1788 1817  
1789 1818 bool first = true;
1790 1819 JSON::writeDictionaryOpen(p, first, 0);
1791   - // This version is updated every time a non-backward-compatible
1792   - // change is made to the JSON format. Clients of the JSON are to
1793   - // ignore unrecognized keys, so we only update the version of a
1794   - // key disappears or if its value changes meaning.
1795   - JSON::writeDictionaryItem(
1796   - p, first, "version", JSON::makeInt(this->m->json_version), 1);
1797   - JSON j_params = JSON::makeDictionary();
1798   - std::string decode_level_str;
1799   - switch (m->decode_level) {
1800   - case qpdf_dl_none:
1801   - decode_level_str = "none";
1802   - break;
1803   - case qpdf_dl_generalized:
1804   - decode_level_str = "generalized";
1805   - break;
1806   - case qpdf_dl_specialized:
1807   - decode_level_str = "specialized";
1808   - break;
1809   - case qpdf_dl_all:
1810   - decode_level_str = "all";
1811   - break;
1812   - }
1813   - j_params.addDictionaryMember(
1814   - "decodelevel", JSON::makeString(decode_level_str));
1815   - JSON::writeDictionaryItem(p, first, "parameters", j_params, 1);
1816 1820  
  1821 + if (m->json_output) {
  1822 + // Exclude version and parameters to keep the output file
  1823 + // minimal. The JSON version is inside the "qpdf" key for
  1824 + // version 2.
  1825 + } else {
  1826 + // This version is updated every time a non-backward-compatible
  1827 + // change is made to the JSON format. Clients of the JSON are to
  1828 + // ignore unrecognized keys, so we only update the version of a
  1829 + // key disappears or if its value changes meaning.
  1830 + JSON::writeDictionaryItem(
  1831 + p, first, "version", JSON::makeInt(this->m->json_version), 1);
  1832 + JSON j_params = JSON::makeDictionary();
  1833 + std::string decode_level_str;
  1834 + switch (m->decode_level) {
  1835 + case qpdf_dl_none:
  1836 + decode_level_str = "none";
  1837 + break;
  1838 + case qpdf_dl_generalized:
  1839 + decode_level_str = "generalized";
  1840 + break;
  1841 + case qpdf_dl_specialized:
  1842 + decode_level_str = "specialized";
  1843 + break;
  1844 + case qpdf_dl_all:
  1845 + decode_level_str = "all";
  1846 + break;
  1847 + }
  1848 + j_params.addDictionaryMember(
  1849 + "decodelevel", JSON::makeString(decode_level_str));
  1850 + JSON::writeDictionaryItem(p, first, "parameters", j_params, 1);
  1851 + }
1817 1852 bool all_keys = m->json_keys.empty();
1818 1853 // The list of selectable top-level keys id duplicated in the
1819 1854 // following places: job.yml, QPDFJob::json_schema, and
... ... @@ -1850,11 +1885,7 @@ QPDFJob::doJSON(QPDF&amp; pdf, Pipeline* p)
1850 1885 // qpdf/objects/objectinfo without other keys.
1851 1886 if (all_keys || m->json_keys.count("objects") ||
1852 1887 m->json_keys.count("qpdf")) {
1853   - if (this->m->json_version == 1) {
1854   - doJSONObjects(p, first, pdf);
1855   - } else {
1856   - writeJSON(p, pdf, false, first);
1857   - }
  1888 + doJSONObjects(p, first, pdf);
1858 1889 }
1859 1890 if (this->m->json_version == 1) {
1860 1891 // "objectinfo" is not needed for version >1 since you can
... ... @@ -1889,9 +1920,6 @@ QPDFJob::doInspection(QPDF&amp; pdf)
1889 1920 if (m->check) {
1890 1921 doCheck(pdf);
1891 1922 }
1892   - if (m->json_version) {
1893   - doJSON(pdf, &cout);
1894   - }
1895 1923 if (m->show_npages) {
1896 1924 QTC::TC("qpdf", "QPDFJob npages");
1897 1925 cout << pdf.getRoot().getKey("/Pages").getKey("/Count").getIntValue()
... ... @@ -3337,9 +3365,8 @@ QPDFJob::writeOutfile(QPDF&amp; pdf)
3337 3365 } else if (strcmp(m->outfilename.get(), "-") == 0) {
3338 3366 m->outfilename = nullptr;
3339 3367 }
3340   - if (this->m->json_output) {
3341   - bool unused = true;
3342   - writeJSON(nullptr, pdf, true, unused);
  3368 + if (this->m->json_version) {
  3369 + writeJSON(pdf);
3343 3370 } else {
3344 3371 // QPDFWriter must have block scope so the output file will be
3345 3372 // closed after write() finishes.
... ... @@ -3393,52 +3420,30 @@ QPDFJob::writeOutfile(QPDF&amp; pdf)
3393 3420 }
3394 3421  
3395 3422 void
3396   -QPDFJob::writeJSON(Pipeline* p, QPDF& pdf, bool complete, bool& first_key)
  3423 +QPDFJob::writeJSON(QPDF& pdf)
3397 3424 {
3398 3425 // File pipeline must have block scope so it will be closed
3399 3426 // after write.
3400 3427 std::shared_ptr<QUtil::FileCloser> fc;
3401 3428 std::shared_ptr<Pipeline> fp;
3402   - std::string file_prefix = this->m->json_stream_prefix;
3403 3429 if (m->outfilename.get()) {
3404 3430 QTC::TC("qpdf", "QPDFJob write json to file");
3405   - if (file_prefix.empty()) {
3406   - file_prefix = this->m->outfilename.get();
  3431 + if (this->m->json_stream_prefix.empty()) {
  3432 + this->m->json_stream_prefix = this->m->outfilename.get();
3407 3433 }
3408 3434 fc = std::make_shared<QUtil::FileCloser>(
3409 3435 QUtil::safe_fopen(this->m->outfilename.get(), "w"));
3410 3436 fp = std::make_shared<Pl_StdioFile>("json output", fc->f);
3411 3437 } else if (
3412   - (this->m->json_stream_data == qpdf_sj_file) && file_prefix.empty()) {
  3438 + (this->m->json_stream_data == qpdf_sj_file) &&
  3439 + this->m->json_stream_prefix.empty()) {
3413 3440 QTC::TC("qpdf", "QPDFJob need json-stream-prefix for stdout");
3414 3441 usage("please specify --json-stream-prefix since the input file "
3415 3442 "name is unknown");
3416 3443 } else {
3417 3444 QTC::TC("qpdf", "QPDFJob write json to stdout");
3418   - if (p == nullptr) {
3419   - fp = this->m->log->getInfo();
3420   - }
3421   - }
3422   - if (p == nullptr) {
3423   - p = fp.get();
3424   - }
3425   - std::set<std::string> json_objects;
3426   - if (this->m->json_objects.count("trailer")) {
3427   - json_objects.insert("trailer");
3428   - }
3429   - auto wanted = getWantedJSONObjects();
3430   - for (auto const& og: wanted) {
3431   - std::ostringstream s;
3432   - s << "obj:" << og.unparse(' ') << " R";
3433   - json_objects.insert(s.str());
3434   - }
3435   - pdf.writeJSON(
3436   - this->m->json_version,
3437   - p,
3438   - complete,
3439   - first_key,
3440   - this->m->decode_level,
3441   - this->m->json_stream_data,
3442   - file_prefix,
3443   - json_objects);
  3445 + this->m->log->saveToStandardOutput(true);
  3446 + fp = this->m->log->getSave();
  3447 + }
  3448 + doJSON(pdf, fp.get());
3444 3449 }
... ...
libqpdf/QPDFJob_config.cc
... ... @@ -244,7 +244,6 @@ QPDFJob::Config::json(std::string const&amp; parameter)
244 244 if ((o.m->json_version < 1) || (o.m->json_version > JSON::LATEST)) {
245 245 usage(std::string("unsupported json version ") + parameter);
246 246 }
247   - o.m->require_outfile = false;
248 247 return this;
249 248 }
250 249  
... ... @@ -297,14 +296,7 @@ QPDFJob::Config*
297 296 QPDFJob::Config::jsonOutput(std::string const& parameter)
298 297 {
299 298 o.m->json_output = true;
300   - if (parameter.empty() || (parameter == "latest")) {
301   - o.m->json_version = JSON::LATEST;
302   - } else {
303   - o.m->json_version = QUtil::string_to_int(parameter.c_str());
304   - }
305   - if ((o.m->json_version < 2) || (o.m->json_version > JSON::LATEST)) {
306   - usage(std::string("unsupported json output version ") + parameter);
307   - }
  299 + json(parameter);
308 300 if (!o.m->json_stream_data_set) {
309 301 // No need to set json_stream_data_set -- that indicates
310 302 // explicit use of --json-stream-data.
... ... @@ -313,9 +305,7 @@ QPDFJob::Config::jsonOutput(std::string const&amp; parameter)
313 305 if (!o.m->decode_level_set) {
314 306 o.m->decode_level = qpdf_dl_none;
315 307 }
316   - if (o.m->json_keys.empty()) {
317   - o.m->json_keys.insert("qpdf");
318   - }
  308 + o.m->json_keys.insert("qpdf");
319 309 return this;
320 310 }
321 311  
... ...
libqpdf/qpdf/auto_job_help.hh
... ... @@ -803,7 +803,9 @@ depth in the JSON section of the manual. &quot;version&quot; may be a
803 803 specific version or "latest" (the default). Run qpdf --json-help
804 804 for a description of the generated JSON object.
805 805 )");
806   -ap.addOptionHelp("--json-help", "json", "show format of JSON output", R"(Describe the format of the JSON output by writing to standard
  806 +ap.addOptionHelp("--json-help", "json", "show format of JSON output", R"(--json-help[=version]
  807 +
  808 +Describe the format of the JSON output by writing to standard
807 809 output a JSON object with the same keys and with values
808 810 containing descriptive text.
809 811 )");
... ... @@ -838,17 +840,17 @@ which is to use the output file name. Whatever is given here
838 840 will be appended with -nnn to create the name of the file that
839 841 will contain the data for the stream stream in object nnn.
840 842 )");
841   -ap.addOptionHelp("--json-output", "json", "serialize to JSON", R"(--json-output[=version]
  843 +ap.addOptionHelp("--json-output", "json", "apply defaults for JSON serialization", R"(--json-output[=version]
842 844  
843   -The output file will be qpdf JSON format at the given version.
844   -"version" may be a specific version or "latest" (the default).
845   -The only supported version is 2. See also --json-stream-data,
846   ---json-stream-prefix, and --decode-level.
  845 +Implies --json=version. Changes default values for certain
  846 +options so that the JSON output written is the most faithful
  847 +representation of the original PDF and contains no additional
  848 +JSON keys. See also --json-stream-data, --json-stream-prefix,
  849 +and --decode-level.
847 850 )");
848   -ap.addOptionHelp("--json-input", "json", "input file is qpdf JSON", R"(Treat the input file as a JSON file in qpdf JSON format as
849   -written by qpdf --json-output. See the "qpdf JSON Format"
850   -section of the manual for information about how to use this
851   -option.
  851 +ap.addOptionHelp("--json-input", "json", "input file is qpdf JSON", R"(Treat the input file as a JSON file in qpdf JSON format. See the
  852 +"qpdf JSON Format" section of the manual for information about
  853 +how to use this option.
852 854 )");
853 855 ap.addOptionHelp("--update-from-json", "json", "update a PDF from qpdf JSON", R"(--update-from-json=qpdf-json-file
854 856  
... ...
libqpdf/qpdf/auto_job_schema.hh
... ... @@ -28,7 +28,7 @@ static constexpr char const* JOB_SCHEMA_DATA = R&quot;({
28 28 "forceVersion": "set output PDF version",
29 29 "progress": "show progress when writing",
30 30 "splitPages": "write pages to separate files",
31   - "jsonOutput": "serialize to JSON",
  31 + "jsonOutput": "apply defaults for JSON serialization",
32 32 "encrypt": {
33 33 "userPassword": "user password",
34 34 "ownerPassword": "owner password",
... ...
manual/cli.rst
... ... @@ -3194,7 +3194,16 @@ Related Options
3194 3194 :qpdf:ref:`--json-help` option to get a description of the JSON
3195 3195 object.
3196 3196  
3197   -.. qpdf:option:: --json-help
  3197 + Starting with qpdf 11, when this option is specified, an output
  3198 + file is optional (for backward compatibility) and defaults to
  3199 + standard output. You may specify an output file to write the JSON
  3200 + to a file rather than standard output.
  3201 +
  3202 + Stream data is only included if :qpdf:ref:`--json-output` is
  3203 + specified or if a value other than ``none`` is passed to
  3204 + :qpdf:ref:`--json-stream-data`.
  3205 +
  3206 +.. qpdf:option:: --json-help[=version]
3198 3207  
3199 3208 .. help: show format of JSON output
3200 3209  
... ... @@ -3202,12 +3211,13 @@ Related Options
3202 3211 output a JSON object with the same keys and with values
3203 3212 containing descriptive text.
3204 3213  
3205   - Describe the format of the JSON output by writing to standard
3206   - output a JSON object with the same structure as the JSON generated
3207   - by qpdf. In the output written by ``--json-help``, each key's value
3208   - is a description of the key. The specific contract guaranteed by
3209   - qpdf in its JSON representation is explained in more detail in the
3210   - :ref:`json`.
  3214 + Describe the format of the corresponding version of JSON output by
  3215 + writing to standard output a JSON object with the same structure as
  3216 + the JSON generated by qpdf. In the output written by
  3217 + ``--json-help``, each key's value is a description of the key. The
  3218 + specific contract guaranteed by qpdf in its JSON representation is
  3219 + explained in more detail in the :ref:`json`. The default version of
  3220 + help is version ``2``, as with the :qpdf:ref:`--json` flag.
3211 3221  
3212 3222 .. qpdf:option:: --json-key=key
3213 3223  
... ... @@ -3233,11 +3243,9 @@ Related Options
3233 3243 objects will be shown.
3234 3244  
3235 3245 This option is repeatable. If given, only specified objects will be
3236   - shown in the ``"objects"`` key of the JSON output. Otherwise, all
3237   - objects will be shown. For qpdf JSON version 1, this also affects
3238   - the ``"objectinfo"`` key, which is not present in version 2. This
3239   - option may be used with :qpdf:ref:`--json` and also with
3240   - :qpdf:ref:`--json-output`.
  3246 + shown in the objects dictionary in the JSON output. Otherwise, all
  3247 + objects will be shown. See :ref:`json` for details about the qpdf
  3248 + JSON format.
3241 3249  
3242 3250 .. qpdf:option:: --json-stream-data={none|inline|file}
3243 3251  
... ... @@ -3281,28 +3289,30 @@ Related Options
3281 3289  
3282 3290 .. qpdf:option:: --json-output[=version]
3283 3291  
3284   - .. help: serialize to JSON
  3292 + .. help: apply defaults for JSON serialization
3285 3293  
3286   - The output file will be qpdf JSON format at the given version.
3287   - "version" may be a specific version or "latest" (the default).
3288   - The only supported version is 2. See also --json-stream-data,
3289   - --json-stream-prefix, and --decode-level.
  3294 + Implies --json=version. Changes default values for certain
  3295 + options so that the JSON output written is the most faithful
  3296 + representation of the original PDF and contains no additional
  3297 + JSON keys. See also --json-stream-data, --json-stream-prefix,
  3298 + and --decode-level.
3290 3299  
3291   - The output file, instead of being a PDF file, will be a JSON file
3292   - in qpdf JSON format at the given version. ``version`` may be a
3293   - specific version or ``latest`` (the default). The only supported
3294   - version is 2. See also :qpdf:ref:`--json-stream-data` and
3295   - :qpdf:ref:`--json-stream-prefix`. This option also changes the
3296   - following defaults:
  3300 + Implies :qpdf:ref:`--json` at the specified version. This option
  3301 + changes several default values, all of which can be overridden by
  3302 + specifying the stated option:
3297 3303  
3298 3304 - The default value for :qpdf:ref:`--json-stream-data` changes from
3299 3305 ``none`` to ``inline``.
3300 3306  
3301   - - The default decode level for stream data becomes ``none``, but you can
3302   - override it with :qpdf:ref:`--decode-level`.
  3307 + - The default value for :qpdf:ref:`--decode-level` changes from
  3308 + ``generalized`` to ``none``.
  3309 +
  3310 + - By default, only the ``"qpdf"`` key is included in the JSON
  3311 + output, but you can add additional keys with
  3312 + :qpdf:ref:`--json-key`.
3303 3313  
3304   - - Only the ``"qpdf"`` key is included in the JSON output, but you
3305   - can add additional keys with :qpdf:ref:`--json-key`.
  3314 + - Excludes the ``"version"`` and ``"parameters"`` keys from the
  3315 + JSON output.
3306 3316  
3307 3317 If you want to look at the contents of streams easily as you would
3308 3318 in QDF mode (see :ref:`qdf`), you can use
... ... @@ -3313,15 +3323,15 @@ Related Options
3313 3323  
3314 3324 .. help: input file is qpdf JSON
3315 3325  
3316   - Treat the input file as a JSON file in qpdf JSON format as
3317   - written by qpdf --json-output. See the "qpdf JSON Format"
3318   - section of the manual for information about how to use this
3319   - option.
  3326 + Treat the input file as a JSON file in qpdf JSON format. See the
  3327 + "qpdf JSON Format" section of the manual for information about
  3328 + how to use this option.
3320 3329  
3321   - Treat the input file as a JSON file in qpdf JSON format as written
3322   - by ``qpdf --json-output``. The input file must be complete and
3323   - include all stream data. For information about converting between
3324   - PDF and JSON, please see :ref:`json`.
  3330 + Treat the input file as a JSON file in qpdf JSON format. The input
  3331 + file must be complete and include all stream data. The JSON version
  3332 + must be at least 2. All top-level keys are ignored except for
  3333 + ``"qpdf"``. For information about converting between PDF and JSON,
  3334 + please see :ref:`json`.
3325 3335  
3326 3336 .. qpdf:option:: --update-from-json=qpdf-json-file
3327 3337  
... ...
manual/json.rst
... ... @@ -24,27 +24,28 @@ represents the contents of a PDF file. This is distinct from the
24 24 interacting with qpdf the way the command-line tool does. For
25 25 information about that, see :ref:`qpdf-job`.
26 26  
27   -The qpdf JSON format is specific to qpdf. There are two ways to use
28   -qpdf JSON:
29   -
30   -- The :qpdf:ref:`--json` command-line flag causes creation of a JSON
31   - representation of all the objects in a PDF file, excluding stream
32   - data. This includes an unambiguous representation of the PDF object
33   - structure and also provides JSON-formatted summaries of other
34   - information about the file. This functionality is built into
35   - ``QPDFJob`` and can be accessed from the ``qpdf`` command-line tool
36   - or from the ``QPDFJob`` C or C++ API.
37   -
38   -- qpdf can create a JSON file that completely represents a PDF file.
39   - You can think of this as using JSON as an *alternative syntax* for
40   - representing a PDF file. Using qpdf JSON, it is possible to
41   - convert a PDF file to JSON, manipulate the structure or contents of
42   - the objects at a low level, and convert the results back to a PDF
43   - file. This functionality can be accessed from the command-line with
44   - the :qpdf:ref:`--json-output`, :qpdf:ref:`--json-input`, and
45   - :qpdf:ref:`--update-from-json` flags, or from the API using the
46   - ``QPDF::writeJSON``, ``QPDF::createFromJSON``, and
47   - ``QPDF::updateFromJSON`` methods.
  27 +The qpdf JSON format is specific to qpdf. With JSON version 2, the
  28 +:qpdf:ref:`--json` command-line flag causes creation of a JSON
  29 +representation of all the objects in a PDF file. This includes an
  30 +unambiguous representation of the PDF object structure and also
  31 +provides JSON-formatted summaries of other information about the file.
  32 +This functionality is built into ``QPDFJob`` and can be accessed from
  33 +the ``qpdf`` command-line tool or from the ``QPDFJob`` C or C++ API.
  34 +
  35 +By default, stream data is omitted, but it can be included by
  36 +specifying the :qpdf:ref:`--json-stream-data` option. With stream data
  37 +included, the generated JSON file completely represents a PDF file.
  38 +You can think of this as using JSON as an *alternative syntax* for
  39 +representing a PDF file. Using qpdf JSON, it is possible to convert a
  40 +PDF file to JSON, manipulate the structure or contents of the objects
  41 +at a low level, and convert the results back to a PDF file. This
  42 +functionality can be accessed from the command-line with the
  43 +:qpdf:ref:`--json-input`, and :qpdf:ref:`--update-from-json` flags, or
  44 +from the API using the ``QPDF::writeJSON``, ``QPDF::createFromJSON``,
  45 +and ``QPDF::updateFromJSON`` methods. The :qpdf:ref:`--json-output`
  46 +flag changes a handful of defaults so that the resulting JSON is as
  47 +close as possible to the original input and is ready for being
  48 +converted back to PDF.
48 49  
49 50 .. _json-terminology:
50 51  
... ... @@ -120,18 +121,53 @@ qpdf JSON Object Representation
120 121 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
121 122  
122 123 This section describes the representation of PDF objects in qpdf JSON
123   -version 2. PDF objects are represented within the ``"objects"``
124   -dictionary of a qpdf JSON file. This is true both for PDF serialized
125   -to JSON (:qpdf:ref:`--json-output`, ``QPDF::writeJSON``) or objects as
126   -they appear in the output of ``qpdf`` with the :qpdf:ref:`--json`
127   -option.
128   -
129   -Each key in the ``"objects"`` dictionary is either ``"trailer"`` or a
130   -string of the form ``"obj:O G R"`` where ``O`` and ``G`` are the
131   -object and generation numbers and ``R`` is the literal string ``R``.
132   -This is the PDF syntax for the indirect object reference prepended by
133   -``obj:``. The value, representing the object itself, is a JSON object
134   -whose structure is described below.
  124 +version 2. PDF objects are represented within the ``"qpdf"`` entry of
  125 +a qpdf JSON file. The ``"qpdf"`` entry is a two-element array. The
  126 +first element is a dictionary containing header-like information about
  127 +the file such as the PDF version. The second element is a dictionary
  128 +containing all the objects in the PDF file. We refer to this as the
  129 +*objects dictionary*.
  130 +
  131 +The first element contains the following keys:
  132 +
  133 +- ``"jsonversion"`` -- a number indicating the JSON version used for
  134 + writing. This will always be ``2``.
  135 +
  136 +- ``"pdfversion"`` -- a string containing PDF version as indicated in
  137 + the PDF header (e.g. ``"1.7"``, ``"2.0"``)
  138 +
  139 +- ``pushedinheritedpageresources`` -- a boolean indicating whether
  140 + the library pushed inherited resources down to the page level.
  141 + Certain library calls cause this to happen, and qpdf needs to know
  142 + when reading a JSON file back in whether it should do this as it may
  143 + cause certain objects to be renumbered.
  144 +
  145 +- ``calledgetallpages`` -- a boolean indicating whether
  146 + ``getAllPages`` was called prior to writing the JSON output. This
  147 + method causes page tree repair to occur, which may renumber some
  148 + objects (in very rare cases of corrupted page trees), so qpdf needs
  149 + to know this information when reading a JSON file back in.
  150 +
  151 +- ``"maxobjectid"`` -- a number indicating the object ID of the
  152 + highest numbered object in the file. This is provided to make it
  153 + easier for software that wants to add new objects to the file as you
  154 + can safely start with one above that number when creating new
  155 + objects. Note that the value of ``"maxobjectid"`` may be higher than
  156 + the actual maximum object that appears in the input PDF since it
  157 + takes into consideration any dangling indirect object references
  158 + from the original file. This prevents you from unwittingly creating
  159 + an object that doesn't exist but that is referenced, which may have
  160 + unintended side effects. (The PDF specification explicitly allows
  161 + dangling references and says to treat them as nulls. This can happen
  162 + if objects are removed from a PDF file.)
  163 +
  164 +The second element is the objects dictionary. Each key in the objects
  165 +dictionary is either ``"trailer"`` or a string of the form ``"obj:O G
  166 +R"`` where ``O`` and ``G`` are the object and generation numbers and
  167 +``R`` is the literal string ``R``. This is the PDF syntax for the
  168 +indirect object reference prepended by ``obj:``. The value,
  169 +representing the object itself, is a JSON object whose structure is
  170 +described below.
135 171  
136 172 Top-level Stream Objects
137 173 Stream objects are represented as a JSON object with the single key
... ... @@ -143,6 +179,7 @@ Top-level Stream Objects
143 179  
144 180 - ``none``: stream data is not represented; no other keys are
145 181 present
  182 + specified.
146 183  
147 184 - ``inline``: the stream data appears as a base64-encoded string as
148 185 the value of the ``"data"`` key
... ... @@ -249,57 +286,6 @@ Object Values
249 286 the string representations of names and whose values are
250 287 representations of PDF objects.
251 288  
252   -.. _json.output:
253   -
254   -qpdf JSON Output
255   -~~~~~~~~~~~~~~~~
256   -
257   -The format of the JSON written by qpdf's :qpdf:ref:`--json-output`
258   -flag or the ``QPDF::writeJSON`` API call is a JSON object consisting
259   -of a single key: ``"qpdf"``. This may be the only key, or it may be
260   -embedded in the output of ``qpdf --json``. Unknown keys are ignored
261   -for future compatibility. It is guaranteed that qpdf will never add
262   -any keys whose names start with ``xdata``, so users are free to add
263   -their own metadata using keys whose names start with ``xdata`` without
264   -fear of clashing with a future version of qpdf.
265   -
266   -The ``"qpdf"`` key points to a two-element JSON array. The first element is
267   -a JSON object with the following keys:
268   -
269   -- ``"jsonversion"`` -- a number indicating the JSON version used for
270   - writing. This will always be ``2``.
271   -
272   -- ``"pdfversion"`` -- a string containing PDF version as indicated in
273   - the PDF header (e.g. ``"1.7"``, ``"2.0"``)
274   -
275   -- ``pushedinheritedpageresources`` -- a boolean indicating whether
276   - the library pushed inherited resources down to the page level.
277   - Certain library calls cause this to happen, and qpdf needs to know
278   - when reading a JSON file back in whether it should do this as it may
279   - cause certain objects to be renumbered.
280   -
281   -- ``calledgetallpages`` -- a boolean indicating whether
282   - ``getAllPages`` was called prior to writing the JSON output. This
283   - method causes page tree repair to occur, which may renumber some
284   - objects (in very rare cases of corrupted page trees), so qpdf needs
285   - to know this information when reading a JSON file back in.
286   -
287   -- ``"maxobjectid"`` -- a number indicating the object ID of the
288   - highest numbered object in the file. This is provided to make it
289   - easier for software that wants to add new objects to the file as you
290   - can safely start with one above that number when creating new
291   - objects. Note that the value of ``"maxobjectid"`` may be higher than
292   - the actual maximum object that appears in the input PDF since it
293   - takes into consideration any dangling indirect object references
294   - from the original file. This prevents you from unwittingly creating
295   - an object that doesn't exist but that is referenced, which may have
296   - unintended side effects. (The PDF specification explicitly allows
297   - dangling references and says to treat them as nulls. This can happen
298   - if objects are removed from a PDF file.)
299   -
300   -The second element is a JSON object containing the actual PDF objects
301   -as described in :ref:`json.objects`.
302   -
303 289 Note that writing JSON output is done by ``QPDF``, not ``QPDFWriter``.
304 290 As such, none of the things ``QPDFWriter`` does apply. This includes
305 291 recompression of streams, renumbering of objects, anything to do with
... ... @@ -325,7 +311,7 @@ qpdf JSON format.
325 311 "pdfversion": "1.3",
326 312 "pushedinheritedpageresources": false,
327 313 "calledgetallpages": false,
328   - "maxobjectid": 5,
  314 + "maxobjectid": 5
329 315 },
330 316 {
331 317 "obj:1 0 R": {
... ... @@ -389,8 +375,7 @@ qpdf JSON format.
389 375 qpdf JSON Input
390 376 ~~~~~~~~~~~~~~~
391 377  
392   -Output in the JSON output format described in :ref:`json.output` can
393   -be used in two different ways:
  378 +The qpdf JSON output can be used in two different ways:
394 379  
395 380 - By using the :qpdf:ref:`--json-input` flag or calling
396 381 ``QPDF::createFromJSON`` in place of ``QPDF::processFile``, a qpdf
... ... @@ -408,8 +393,11 @@ Here are some important things to know about qpdf JSON input.
408 393 - When a qpdf JSON file is used as the primary input file, it must be
409 394 complete. This means
410 395  
  396 + - A JSON version number must be specified with the ``"jsonversion"``
  397 + key in the first array element
  398 +
411 399 - A PDF version number must be specified with the ``"pdfversion"``
412   - key
  400 + key in the first array element
413 401  
414 402 - Stream data must be present for all streams
415 403  
... ... @@ -422,6 +410,9 @@ Here are some important things to know about qpdf JSON input.
422 410 - ``"maxobjectid"`` is ignored, so it is not necessary to update it
423 411 when adding new objects.
424 412  
  413 + - ``"calledgetallpages"`` and ``"pushedinheritedpageresources"`` are
  414 + treated as false if omitted.
  415 +
425 416 - ``"/Length"`` is ignored in all stream dictionaries. qpdf doesn't
426 417 put it there when it creates JSON output, and it is not necessary
427 418 to add it.
... ... @@ -432,14 +423,13 @@ Here are some important things to know about qpdf JSON input.
432 423 - Unknown keys at the to top level of the file, within ``objects``,
433 424 at the top level of each individual object (inside the object that
434 425 has the ``"value"`` or ``"stream"`` key) and directly within
435   - ``"stream"`` are ignored for future compatibility. You should
436   - avoid putting your own values in those places if you wish to avoid
437   - risking that your JSON files will not work in future versions of
438   - qpdf. The exception to this advice is at the top level of the
439   - overall file where it is explicitly supported for you to add your
440   - own keys. For example, you could add your own metadata at the top
441   - level, and qpdf will ignore it. Note that extra top-level keys are
442   - not preserved when qpdf reads your JSON file.
  426 + ``"stream"`` are ignored for future compatibility. This includes
  427 + other top-level keys generated by ``qpdf`` itself (such as
  428 + ``"pages"``). As such, those keys don't have to be consistent with
  429 + the ``"qpdf"`` key if modifying a JSON file for conversion back to
  430 + PDF. If you wish to store application-specific metadata, you can
  431 + do so by adding a key whose name starts with ``x-``. qpdf is
  432 + guaranteed not to add any of its own keys that starts with ``x-``.
443 433  
444 434 - When qpdf reads a PDF file, the internal object numbers are always
445 435 preserved. However, when qpdf writes a file using ``QPDFWriter``,
... ... @@ -458,9 +448,9 @@ Here are some important things to know about qpdf JSON input.
458 448 # edit pdf.json
459 449 qpdf in.pdf out.pdf --update-from-json=pdf.json
460 450  
461   - The following will not produce predictable results because
462   - ``out.pdf`` won't have the same object numbers as ``pdf.json`` and
463   - ``in.pdf``.
  451 + The following will produce unpredictable and probably incorrect
  452 + results because ``out.pdf`` won't have the same object numbers as
  453 + ``pdf.json`` and ``in.pdf``.
464 454  
465 455 ::
466 456  
... ... @@ -658,15 +648,16 @@ be aware of:
658 648 - If a PDF file has certain types of errors in its pages tree (such as
659 649 page objects that are direct or multiple pages sharing the same
660 650 object ID), qpdf will automatically repair the pages tree. If you
661   - specify ``"objects"`` (and, with qpdf JSON version 1, also
  651 + specify ``"qpdf"`` (or, with qpdf JSON version 1, ``"objects"`` or
662 652 ``"objectinfo"``) without any other keys, you will see the original
663 653 pages tree without any corrections. If you specify any of keys that
664 654 require page tree traversal (for example, ``"pages"``,
665   - ``"outlines"``, or ``"pagelabel"``), then ``"objects"`` (and
666   - ``"objectinfo"``) will show the repaired page tree so that object
667   - references will be consistent throughout the file. This is not an
668   - issue with :qpdf:ref:`--json-output`, which doesn't repair the pages
669   - tree.
  655 + ``"outlines"``, or ``"pagelabel"``), then ``"qpdf"`` (and
  656 + ``"objects"`` and ``"objectinfo"``) will show the repaired page
  657 + tree so that object references will be consistent throughout the
  658 + file. You can tell if this has happened by looking at the
  659 + ``"calledgetallpages"`` and ``"pushedinheritedpageresources"``
  660 + fields in the first element of the ``"qpdf"`` array.
670 661  
671 662 - While qpdf guarantees that keys present in the help will be present
672 663 in the output, those fields may be null or empty if the information
... ... @@ -743,16 +734,17 @@ version 2.
743 734 dictionary containing either a ``"value"`` key or a ``"stream"``
744 735 key, making it possible to distinguish streams from other objects.
745 736  
746   -- The ``"objectinfo"`` key has been removed in favor of a
747   - representation in ``"objects"`` that differentiates between a stream
748   - and other kinds of objects. In v1, it was not possible to tell a
749   - stream from a dictionary within ``"objects"``.
750   -
751   -- Within the ``"objects"`` dictionary, keys are now ``"obj:O G R"``
752   - where ``O`` and ``G`` are the object and generation number.
753   - ``"trailer"`` remains the key for the trailer dictionary. In v1, the
754   - ``obj:`` prefix was not present. The rationale for this change is as
755   - follows:
  737 +- The ``"objectinfo"`` and ``"objects"`` keys have been removed in
  738 + favor of a representation in ``"qpdf"`` that includes header
  739 + information and differentiates between a stream and other kinds of
  740 + objects. In v1, it was not possible to tell a stream from a
  741 + dictionary within ``"objects"``, and the PDF version was not
  742 + captured at all.
  743 +
  744 +- Within the objects dictionary, keys are now ``"obj:O G R"`` where
  745 + ``O`` and ``G`` are the object and generation number. ``"trailer"``
  746 + remains the key for the trailer dictionary. In v1, the ``obj:``
  747 + prefix was not present. The rationale for this change is as follows:
756 748  
757 749 - Having a unique prefix (``obj:``) makes it much easier to search
758 750 in the JSON file for the definition of an object
... ...