Commit 5f4224f31a500452a4f97f36ed57351b41ca0114
1 parent
80acfc38
Simplify --json-output
Now --json-output just changes defaults. Allow output file with --json.
Showing
10 changed files
with
266 additions
and
302 deletions
TODO
| ... | ... | @@ -69,46 +69,11 @@ Soon: Break ground on "Document-level work" |
| 69 | 69 | JSON v2 fixes |
| 70 | 70 | ============= |
| 71 | 71 | |
| 72 | -* Unify code between QPDFJob::doJSONObjects and QPDF::writeJSON. Make | |
| 73 | - sure that the "qpdf" key is always present when json-output is | |
| 74 | - specified. | |
| 75 | - | |
| 76 | -* Change the name of the "qpdf-v2" key to "qpdf". Use that in place of | |
| 77 | - "objects" and change its content to a two-element array whose first | |
| 78 | - element is metadata required (or useful) for parsing and whose | |
| 79 | - second element contains the actual data. Use of an array is the only | |
| 80 | - way to ensure that the metadata is guaranteed to be parsed before we | |
| 81 | - start parsing the objects. Example: | |
| 82 | - | |
| 83 | - { | |
| 84 | - "qpdf": [ | |
| 85 | - { | |
| 86 | - "jsonversion": 2, | |
| 87 | - "pdfversion": "1.3", | |
| 88 | - "pushedinheritedpageresources": false, | |
| 89 | - "calledgetallpages": false, | |
| 90 | - "maxobjectid": 10 | |
| 91 | - }, | |
| 92 | - { | |
| 93 | - ... objects ... | |
| 94 | - } | |
| 95 | - ] | |
| 96 | - } | |
| 97 | - | |
| 98 | - This implies a few things: | |
| 99 | - | |
| 100 | - * Still need to test pushedinheritedpageresources and | |
| 101 | - calledgetallpages and check/use their values when reading | |
| 102 | - | |
| 103 | - * Fix --json-help | |
| 104 | - | |
| 105 | - * When reading back in, we'll have to call | |
| 106 | - pushInheritedAttributesToPage or getAllPages based on the values | |
| 107 | - of the metadata. | |
| 108 | - | |
| 109 | - * Test --json with --json-stream-data and --json-output with | |
| 110 | - --json-stream-data=none. Recheck writeJSON's handling of the | |
| 111 | - pipeline argument. | |
| 72 | +* Rethink QPDF::writeJSON. Maybe provide a simpler overload? | |
| 73 | + | |
| 74 | +* When reading back in, we'll have to call | |
| 75 | + pushInheritedAttributesToPage or getAllPages based on the values | |
| 76 | + of the metadata. | |
| 112 | 77 | |
| 113 | 78 | * Support json v2 in the C API. At a minimum, write_json, |
| 114 | 79 | create_from_json, and update_from_json need to be there and should | ... | ... |
include/qpdf/QPDF.hh
| ... | ... | @@ -133,7 +133,7 @@ class QPDF |
| 133 | 133 | QPDF_DLL |
| 134 | 134 | void updateFromJSON(std::shared_ptr<InputSource>); |
| 135 | 135 | |
| 136 | - // Write qpdf json format to the pipeline "p". The only supported | |
| 136 | + // Write qpdf JSON format to the pipeline "p". The only supported | |
| 137 | 137 | // version is 2. |
| 138 | 138 | // |
| 139 | 139 | // If the value of "complete" is true, a complete JSON object | ... | ... |
include/qpdf/QPDFJob.hh
| ... | ... | @@ -554,7 +554,7 @@ class QPDFJob |
| 554 | 554 | void setEncryptionOptions(QPDF&, QPDFWriter&); |
| 555 | 555 | void maybeFixWritePassword(int R, std::string& password); |
| 556 | 556 | void writeOutfile(QPDF& pdf); |
| 557 | - void writeJSON(Pipeline* p, QPDF& pdf, bool complete, bool& first_key); | |
| 557 | + void writeJSON(QPDF& pdf); | |
| 558 | 558 | |
| 559 | 559 | // JSON |
| 560 | 560 | void doJSON(QPDF& pdf, Pipeline*); | ... | ... |
job.sums
| ... | ... | @@ -8,10 +8,10 @@ include/qpdf/auto_job_c_pages.hh b3cc0f21029f6d89efa043dcdbfa183cb59325b6506001c |
| 8 | 8 | include/qpdf/auto_job_c_uo.hh ae21b69a1efa9333050f4833d465f6daff87e5b38e5106e49bbef5d4132e4ed1 |
| 9 | 9 | job.yml f9564f18b08a45d17328af43652645771d3498471820c858b8c9013a193e1412 |
| 10 | 10 | libqpdf/qpdf/auto_job_decl.hh 7844eba58edffb9494b19e8eca6fd59a24d6e152ca606c3b07da569f753df2da |
| 11 | -libqpdf/qpdf/auto_job_help.hh db2e4350c700e064b204e3e20d4fee4eddfe312b28092afcf608b4b6863d30e5 | |
| 11 | +libqpdf/qpdf/auto_job_help.hh 700d7600b34588169c80f3e325e39e592e2f5c1af1cdac16614150ff38424b40 | |
| 12 | 12 | libqpdf/qpdf/auto_job_init.hh fd1635a5ad6ba16b7ae008467145560a59a5ecfd10d29c5ef7cd0d8347747cd2 |
| 13 | 13 | libqpdf/qpdf/auto_job_json_decl.hh 06caa46eaf71db8a50c046f91866baa8087745a9474319fb7c86d92634cc8297 |
| 14 | 14 | libqpdf/qpdf/auto_job_json_init.hh 59545578a2e47c660ff98516ed53f06638be75eb4658e2a09d32cc08e0cb7268 |
| 15 | -libqpdf/qpdf/auto_job_schema.hh 9d543cd4a43eafffc2c4b8a6fee29e399c271c52cb6f7d417ae5497b3c1127dc | |
| 15 | +libqpdf/qpdf/auto_job_schema.hh 5352ef1be1ad7cc6f4f36dab88f2937d278e6bd3a0e2d46259794dc226c8ba6b | |
| 16 | 16 | manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580 |
| 17 | -manual/cli.rst 8e1f443c6fa000e023e516c318df4d04d58233d4d8648907c4a71f0ea5722bca | |
| 17 | +manual/cli.rst bbce4cfb662a96c8df0c8563f8065844b77aca7b4ec6385955546b9a455d9953 | ... | ... |
libqpdf/QPDFJob.cc
| ... | ... | @@ -680,8 +680,15 @@ QPDFJob::checkConfiguration() |
| 680 | 680 | " an output file is specified"); |
| 681 | 681 | } else if (m->split_pages) { |
| 682 | 682 | usage("--split-pages may not be used with --replace-input"); |
| 683 | + } else if (m->json_version) { | |
| 684 | + usage("--json may not be used with --replace-input"); | |
| 683 | 685 | } |
| 684 | 686 | } |
| 687 | + if (m->json_version && (m->outfilename == nullptr)) { | |
| 688 | + // The output file is optional with --json for backward | |
| 689 | + // compatibility and defaults to standard output. | |
| 690 | + m->outfilename = QUtil::make_shared_cstr("-"); | |
| 691 | + } | |
| 685 | 692 | if (m->infilename == nullptr) { |
| 686 | 693 | usage("an input file name is required"); |
| 687 | 694 | } else if ( |
| ... | ... | @@ -1116,25 +1123,47 @@ QPDFJob::doJSONObject( |
| 1116 | 1123 | void |
| 1117 | 1124 | QPDFJob::doJSONObjects(Pipeline* p, bool& first, QPDF& pdf) |
| 1118 | 1125 | { |
| 1119 | - JSON::writeDictionaryKey(p, first, "objects", 1); | |
| 1120 | - bool first_object = true; | |
| 1121 | - JSON::writeDictionaryOpen(p, first_object, 1); | |
| 1122 | - bool all_objects = m->json_objects.empty(); | |
| 1123 | - std::set<QPDFObjGen> wanted_og = getWantedJSONObjects(); | |
| 1124 | - for (auto& obj: pdf.getAllObjects()) { | |
| 1125 | - std::string key = obj.unparse(); | |
| 1126 | - if (this->m->json_version > 1) { | |
| 1127 | - key = "obj:" + key; | |
| 1126 | + if (m->json_version == 1) { | |
| 1127 | + JSON::writeDictionaryKey(p, first, "objects", 1); | |
| 1128 | + bool first_object = true; | |
| 1129 | + JSON::writeDictionaryOpen(p, first_object, 1); | |
| 1130 | + bool all_objects = m->json_objects.empty(); | |
| 1131 | + std::set<QPDFObjGen> wanted_og = getWantedJSONObjects(); | |
| 1132 | + for (auto& obj: pdf.getAllObjects()) { | |
| 1133 | + std::string key = obj.unparse(); | |
| 1134 | + if (this->m->json_version > 1) { | |
| 1135 | + key = "obj:" + key; | |
| 1136 | + } | |
| 1137 | + if (all_objects || wanted_og.count(obj.getObjGen())) { | |
| 1138 | + doJSONObject(p, first_object, key, obj); | |
| 1139 | + } | |
| 1128 | 1140 | } |
| 1129 | - if (all_objects || wanted_og.count(obj.getObjGen())) { | |
| 1130 | - doJSONObject(p, first_object, key, obj); | |
| 1141 | + if (all_objects || m->json_objects.count("trailer")) { | |
| 1142 | + auto trailer = pdf.getTrailer(); | |
| 1143 | + doJSONObject(p, first_object, "trailer", trailer); | |
| 1131 | 1144 | } |
| 1145 | + JSON::writeDictionaryClose(p, first_object, 1); | |
| 1146 | + } else { | |
| 1147 | + std::set<std::string> json_objects; | |
| 1148 | + if (this->m->json_objects.count("trailer")) { | |
| 1149 | + json_objects.insert("trailer"); | |
| 1150 | + } | |
| 1151 | + auto wanted = getWantedJSONObjects(); | |
| 1152 | + for (auto const& og: wanted) { | |
| 1153 | + std::ostringstream s; | |
| 1154 | + s << "obj:" << og.unparse(' ') << " R"; | |
| 1155 | + json_objects.insert(s.str()); | |
| 1156 | + } | |
| 1157 | + pdf.writeJSON( | |
| 1158 | + this->m->json_version, | |
| 1159 | + p, | |
| 1160 | + false, | |
| 1161 | + first, | |
| 1162 | + this->m->decode_level, | |
| 1163 | + this->m->json_stream_data, | |
| 1164 | + this->m->json_stream_prefix, | |
| 1165 | + json_objects); | |
| 1132 | 1166 | } |
| 1133 | - if (all_objects || m->json_objects.count("trailer")) { | |
| 1134 | - auto trailer = pdf.getTrailer(); | |
| 1135 | - doJSONObject(p, first_object, "trailer", trailer); | |
| 1136 | - } | |
| 1137 | - JSON::writeDictionaryClose(p, first_object, 1); | |
| 1138 | 1167 | } |
| 1139 | 1168 | |
| 1140 | 1169 | void |
| ... | ... | @@ -1777,7 +1806,7 @@ void |
| 1777 | 1806 | QPDFJob::doJSON(QPDF& pdf, Pipeline* p) |
| 1778 | 1807 | { |
| 1779 | 1808 | // qpdf guarantees that no new top-level keys whose names start |
| 1780 | - // with "xdata" will be added. These are reserved for users. | |
| 1809 | + // with "x-" will be added. These are reserved for users. | |
| 1781 | 1810 | |
| 1782 | 1811 | std::string captured_json; |
| 1783 | 1812 | std::shared_ptr<Pl_String> pl_str; |
| ... | ... | @@ -1788,32 +1817,38 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) |
| 1788 | 1817 | |
| 1789 | 1818 | bool first = true; |
| 1790 | 1819 | JSON::writeDictionaryOpen(p, first, 0); |
| 1791 | - // This version is updated every time a non-backward-compatible | |
| 1792 | - // change is made to the JSON format. Clients of the JSON are to | |
| 1793 | - // ignore unrecognized keys, so we only update the version of a | |
| 1794 | - // key disappears or if its value changes meaning. | |
| 1795 | - JSON::writeDictionaryItem( | |
| 1796 | - p, first, "version", JSON::makeInt(this->m->json_version), 1); | |
| 1797 | - JSON j_params = JSON::makeDictionary(); | |
| 1798 | - std::string decode_level_str; | |
| 1799 | - switch (m->decode_level) { | |
| 1800 | - case qpdf_dl_none: | |
| 1801 | - decode_level_str = "none"; | |
| 1802 | - break; | |
| 1803 | - case qpdf_dl_generalized: | |
| 1804 | - decode_level_str = "generalized"; | |
| 1805 | - break; | |
| 1806 | - case qpdf_dl_specialized: | |
| 1807 | - decode_level_str = "specialized"; | |
| 1808 | - break; | |
| 1809 | - case qpdf_dl_all: | |
| 1810 | - decode_level_str = "all"; | |
| 1811 | - break; | |
| 1812 | - } | |
| 1813 | - j_params.addDictionaryMember( | |
| 1814 | - "decodelevel", JSON::makeString(decode_level_str)); | |
| 1815 | - JSON::writeDictionaryItem(p, first, "parameters", j_params, 1); | |
| 1816 | 1820 | |
| 1821 | + if (m->json_output) { | |
| 1822 | + // Exclude version and parameters to keep the output file | |
| 1823 | + // minimal. The JSON version is inside the "qpdf" key for | |
| 1824 | + // version 2. | |
| 1825 | + } else { | |
| 1826 | + // This version is updated every time a non-backward-compatible | |
| 1827 | + // change is made to the JSON format. Clients of the JSON are to | |
| 1828 | + // ignore unrecognized keys, so we only update the version of a | |
| 1829 | + // key disappears or if its value changes meaning. | |
| 1830 | + JSON::writeDictionaryItem( | |
| 1831 | + p, first, "version", JSON::makeInt(this->m->json_version), 1); | |
| 1832 | + JSON j_params = JSON::makeDictionary(); | |
| 1833 | + std::string decode_level_str; | |
| 1834 | + switch (m->decode_level) { | |
| 1835 | + case qpdf_dl_none: | |
| 1836 | + decode_level_str = "none"; | |
| 1837 | + break; | |
| 1838 | + case qpdf_dl_generalized: | |
| 1839 | + decode_level_str = "generalized"; | |
| 1840 | + break; | |
| 1841 | + case qpdf_dl_specialized: | |
| 1842 | + decode_level_str = "specialized"; | |
| 1843 | + break; | |
| 1844 | + case qpdf_dl_all: | |
| 1845 | + decode_level_str = "all"; | |
| 1846 | + break; | |
| 1847 | + } | |
| 1848 | + j_params.addDictionaryMember( | |
| 1849 | + "decodelevel", JSON::makeString(decode_level_str)); | |
| 1850 | + JSON::writeDictionaryItem(p, first, "parameters", j_params, 1); | |
| 1851 | + } | |
| 1817 | 1852 | bool all_keys = m->json_keys.empty(); |
| 1818 | 1853 | // The list of selectable top-level keys id duplicated in the |
| 1819 | 1854 | // following places: job.yml, QPDFJob::json_schema, and |
| ... | ... | @@ -1850,11 +1885,7 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) |
| 1850 | 1885 | // qpdf/objects/objectinfo without other keys. |
| 1851 | 1886 | if (all_keys || m->json_keys.count("objects") || |
| 1852 | 1887 | m->json_keys.count("qpdf")) { |
| 1853 | - if (this->m->json_version == 1) { | |
| 1854 | - doJSONObjects(p, first, pdf); | |
| 1855 | - } else { | |
| 1856 | - writeJSON(p, pdf, false, first); | |
| 1857 | - } | |
| 1888 | + doJSONObjects(p, first, pdf); | |
| 1858 | 1889 | } |
| 1859 | 1890 | if (this->m->json_version == 1) { |
| 1860 | 1891 | // "objectinfo" is not needed for version >1 since you can |
| ... | ... | @@ -1889,9 +1920,6 @@ QPDFJob::doInspection(QPDF& pdf) |
| 1889 | 1920 | if (m->check) { |
| 1890 | 1921 | doCheck(pdf); |
| 1891 | 1922 | } |
| 1892 | - if (m->json_version) { | |
| 1893 | - doJSON(pdf, &cout); | |
| 1894 | - } | |
| 1895 | 1923 | if (m->show_npages) { |
| 1896 | 1924 | QTC::TC("qpdf", "QPDFJob npages"); |
| 1897 | 1925 | cout << pdf.getRoot().getKey("/Pages").getKey("/Count").getIntValue() |
| ... | ... | @@ -3337,9 +3365,8 @@ QPDFJob::writeOutfile(QPDF& pdf) |
| 3337 | 3365 | } else if (strcmp(m->outfilename.get(), "-") == 0) { |
| 3338 | 3366 | m->outfilename = nullptr; |
| 3339 | 3367 | } |
| 3340 | - if (this->m->json_output) { | |
| 3341 | - bool unused = true; | |
| 3342 | - writeJSON(nullptr, pdf, true, unused); | |
| 3368 | + if (this->m->json_version) { | |
| 3369 | + writeJSON(pdf); | |
| 3343 | 3370 | } else { |
| 3344 | 3371 | // QPDFWriter must have block scope so the output file will be |
| 3345 | 3372 | // closed after write() finishes. |
| ... | ... | @@ -3393,52 +3420,30 @@ QPDFJob::writeOutfile(QPDF& pdf) |
| 3393 | 3420 | } |
| 3394 | 3421 | |
| 3395 | 3422 | void |
| 3396 | -QPDFJob::writeJSON(Pipeline* p, QPDF& pdf, bool complete, bool& first_key) | |
| 3423 | +QPDFJob::writeJSON(QPDF& pdf) | |
| 3397 | 3424 | { |
| 3398 | 3425 | // File pipeline must have block scope so it will be closed |
| 3399 | 3426 | // after write. |
| 3400 | 3427 | std::shared_ptr<QUtil::FileCloser> fc; |
| 3401 | 3428 | std::shared_ptr<Pipeline> fp; |
| 3402 | - std::string file_prefix = this->m->json_stream_prefix; | |
| 3403 | 3429 | if (m->outfilename.get()) { |
| 3404 | 3430 | QTC::TC("qpdf", "QPDFJob write json to file"); |
| 3405 | - if (file_prefix.empty()) { | |
| 3406 | - file_prefix = this->m->outfilename.get(); | |
| 3431 | + if (this->m->json_stream_prefix.empty()) { | |
| 3432 | + this->m->json_stream_prefix = this->m->outfilename.get(); | |
| 3407 | 3433 | } |
| 3408 | 3434 | fc = std::make_shared<QUtil::FileCloser>( |
| 3409 | 3435 | QUtil::safe_fopen(this->m->outfilename.get(), "w")); |
| 3410 | 3436 | fp = std::make_shared<Pl_StdioFile>("json output", fc->f); |
| 3411 | 3437 | } else if ( |
| 3412 | - (this->m->json_stream_data == qpdf_sj_file) && file_prefix.empty()) { | |
| 3438 | + (this->m->json_stream_data == qpdf_sj_file) && | |
| 3439 | + this->m->json_stream_prefix.empty()) { | |
| 3413 | 3440 | QTC::TC("qpdf", "QPDFJob need json-stream-prefix for stdout"); |
| 3414 | 3441 | usage("please specify --json-stream-prefix since the input file " |
| 3415 | 3442 | "name is unknown"); |
| 3416 | 3443 | } else { |
| 3417 | 3444 | QTC::TC("qpdf", "QPDFJob write json to stdout"); |
| 3418 | - if (p == nullptr) { | |
| 3419 | - fp = this->m->log->getInfo(); | |
| 3420 | - } | |
| 3421 | - } | |
| 3422 | - if (p == nullptr) { | |
| 3423 | - p = fp.get(); | |
| 3424 | - } | |
| 3425 | - std::set<std::string> json_objects; | |
| 3426 | - if (this->m->json_objects.count("trailer")) { | |
| 3427 | - json_objects.insert("trailer"); | |
| 3428 | - } | |
| 3429 | - auto wanted = getWantedJSONObjects(); | |
| 3430 | - for (auto const& og: wanted) { | |
| 3431 | - std::ostringstream s; | |
| 3432 | - s << "obj:" << og.unparse(' ') << " R"; | |
| 3433 | - json_objects.insert(s.str()); | |
| 3434 | - } | |
| 3435 | - pdf.writeJSON( | |
| 3436 | - this->m->json_version, | |
| 3437 | - p, | |
| 3438 | - complete, | |
| 3439 | - first_key, | |
| 3440 | - this->m->decode_level, | |
| 3441 | - this->m->json_stream_data, | |
| 3442 | - file_prefix, | |
| 3443 | - json_objects); | |
| 3445 | + this->m->log->saveToStandardOutput(true); | |
| 3446 | + fp = this->m->log->getSave(); | |
| 3447 | + } | |
| 3448 | + doJSON(pdf, fp.get()); | |
| 3444 | 3449 | } | ... | ... |
libqpdf/QPDFJob_config.cc
| ... | ... | @@ -244,7 +244,6 @@ QPDFJob::Config::json(std::string const& parameter) |
| 244 | 244 | if ((o.m->json_version < 1) || (o.m->json_version > JSON::LATEST)) { |
| 245 | 245 | usage(std::string("unsupported json version ") + parameter); |
| 246 | 246 | } |
| 247 | - o.m->require_outfile = false; | |
| 248 | 247 | return this; |
| 249 | 248 | } |
| 250 | 249 | |
| ... | ... | @@ -297,14 +296,7 @@ QPDFJob::Config* |
| 297 | 296 | QPDFJob::Config::jsonOutput(std::string const& parameter) |
| 298 | 297 | { |
| 299 | 298 | o.m->json_output = true; |
| 300 | - if (parameter.empty() || (parameter == "latest")) { | |
| 301 | - o.m->json_version = JSON::LATEST; | |
| 302 | - } else { | |
| 303 | - o.m->json_version = QUtil::string_to_int(parameter.c_str()); | |
| 304 | - } | |
| 305 | - if ((o.m->json_version < 2) || (o.m->json_version > JSON::LATEST)) { | |
| 306 | - usage(std::string("unsupported json output version ") + parameter); | |
| 307 | - } | |
| 299 | + json(parameter); | |
| 308 | 300 | if (!o.m->json_stream_data_set) { |
| 309 | 301 | // No need to set json_stream_data_set -- that indicates |
| 310 | 302 | // explicit use of --json-stream-data. |
| ... | ... | @@ -313,9 +305,7 @@ QPDFJob::Config::jsonOutput(std::string const& parameter) |
| 313 | 305 | if (!o.m->decode_level_set) { |
| 314 | 306 | o.m->decode_level = qpdf_dl_none; |
| 315 | 307 | } |
| 316 | - if (o.m->json_keys.empty()) { | |
| 317 | - o.m->json_keys.insert("qpdf"); | |
| 318 | - } | |
| 308 | + o.m->json_keys.insert("qpdf"); | |
| 319 | 309 | return this; |
| 320 | 310 | } |
| 321 | 311 | ... | ... |
libqpdf/qpdf/auto_job_help.hh
| ... | ... | @@ -803,7 +803,9 @@ depth in the JSON section of the manual. "version" may be a |
| 803 | 803 | specific version or "latest" (the default). Run qpdf --json-help |
| 804 | 804 | for a description of the generated JSON object. |
| 805 | 805 | )"); |
| 806 | -ap.addOptionHelp("--json-help", "json", "show format of JSON output", R"(Describe the format of the JSON output by writing to standard | |
| 806 | +ap.addOptionHelp("--json-help", "json", "show format of JSON output", R"(--json-help[=version] | |
| 807 | + | |
| 808 | +Describe the format of the JSON output by writing to standard | |
| 807 | 809 | output a JSON object with the same keys and with values |
| 808 | 810 | containing descriptive text. |
| 809 | 811 | )"); |
| ... | ... | @@ -838,17 +840,17 @@ which is to use the output file name. Whatever is given here |
| 838 | 840 | will be appended with -nnn to create the name of the file that |
| 839 | 841 | will contain the data for the stream stream in object nnn. |
| 840 | 842 | )"); |
| 841 | -ap.addOptionHelp("--json-output", "json", "serialize to JSON", R"(--json-output[=version] | |
| 843 | +ap.addOptionHelp("--json-output", "json", "apply defaults for JSON serialization", R"(--json-output[=version] | |
| 842 | 844 | |
| 843 | -The output file will be qpdf JSON format at the given version. | |
| 844 | -"version" may be a specific version or "latest" (the default). | |
| 845 | -The only supported version is 2. See also --json-stream-data, | |
| 846 | ---json-stream-prefix, and --decode-level. | |
| 845 | +Implies --json=version. Changes default values for certain | |
| 846 | +options so that the JSON output written is the most faithful | |
| 847 | +representation of the original PDF and contains no additional | |
| 848 | +JSON keys. See also --json-stream-data, --json-stream-prefix, | |
| 849 | +and --decode-level. | |
| 847 | 850 | )"); |
| 848 | -ap.addOptionHelp("--json-input", "json", "input file is qpdf JSON", R"(Treat the input file as a JSON file in qpdf JSON format as | |
| 849 | -written by qpdf --json-output. See the "qpdf JSON Format" | |
| 850 | -section of the manual for information about how to use this | |
| 851 | -option. | |
| 851 | +ap.addOptionHelp("--json-input", "json", "input file is qpdf JSON", R"(Treat the input file as a JSON file in qpdf JSON format. See the | |
| 852 | +"qpdf JSON Format" section of the manual for information about | |
| 853 | +how to use this option. | |
| 852 | 854 | )"); |
| 853 | 855 | ap.addOptionHelp("--update-from-json", "json", "update a PDF from qpdf JSON", R"(--update-from-json=qpdf-json-file |
| 854 | 856 | ... | ... |
libqpdf/qpdf/auto_job_schema.hh
| ... | ... | @@ -28,7 +28,7 @@ static constexpr char const* JOB_SCHEMA_DATA = R"({ |
| 28 | 28 | "forceVersion": "set output PDF version", |
| 29 | 29 | "progress": "show progress when writing", |
| 30 | 30 | "splitPages": "write pages to separate files", |
| 31 | - "jsonOutput": "serialize to JSON", | |
| 31 | + "jsonOutput": "apply defaults for JSON serialization", | |
| 32 | 32 | "encrypt": { |
| 33 | 33 | "userPassword": "user password", |
| 34 | 34 | "ownerPassword": "owner password", | ... | ... |
manual/cli.rst
| ... | ... | @@ -3194,7 +3194,16 @@ Related Options |
| 3194 | 3194 | :qpdf:ref:`--json-help` option to get a description of the JSON |
| 3195 | 3195 | object. |
| 3196 | 3196 | |
| 3197 | -.. qpdf:option:: --json-help | |
| 3197 | + Starting with qpdf 11, when this option is specified, an output | |
| 3198 | + file is optional (for backward compatibility) and defaults to | |
| 3199 | + standard output. You may specify an output file to write the JSON | |
| 3200 | + to a file rather than standard output. | |
| 3201 | + | |
| 3202 | + Stream data is only included if :qpdf:ref:`--json-output` is | |
| 3203 | + specified or if a value other than ``none`` is passed to | |
| 3204 | + :qpdf:ref:`--json-stream-data`. | |
| 3205 | + | |
| 3206 | +.. qpdf:option:: --json-help[=version] | |
| 3198 | 3207 | |
| 3199 | 3208 | .. help: show format of JSON output |
| 3200 | 3209 | |
| ... | ... | @@ -3202,12 +3211,13 @@ Related Options |
| 3202 | 3211 | output a JSON object with the same keys and with values |
| 3203 | 3212 | containing descriptive text. |
| 3204 | 3213 | |
| 3205 | - Describe the format of the JSON output by writing to standard | |
| 3206 | - output a JSON object with the same structure as the JSON generated | |
| 3207 | - by qpdf. In the output written by ``--json-help``, each key's value | |
| 3208 | - is a description of the key. The specific contract guaranteed by | |
| 3209 | - qpdf in its JSON representation is explained in more detail in the | |
| 3210 | - :ref:`json`. | |
| 3214 | + Describe the format of the corresponding version of JSON output by | |
| 3215 | + writing to standard output a JSON object with the same structure as | |
| 3216 | + the JSON generated by qpdf. In the output written by | |
| 3217 | + ``--json-help``, each key's value is a description of the key. The | |
| 3218 | + specific contract guaranteed by qpdf in its JSON representation is | |
| 3219 | + explained in more detail in the :ref:`json`. The default version of | |
| 3220 | + help is version ``2``, as with the :qpdf:ref:`--json` flag. | |
| 3211 | 3221 | |
| 3212 | 3222 | .. qpdf:option:: --json-key=key |
| 3213 | 3223 | |
| ... | ... | @@ -3233,11 +3243,9 @@ Related Options |
| 3233 | 3243 | objects will be shown. |
| 3234 | 3244 | |
| 3235 | 3245 | This option is repeatable. If given, only specified objects will be |
| 3236 | - shown in the ``"objects"`` key of the JSON output. Otherwise, all | |
| 3237 | - objects will be shown. For qpdf JSON version 1, this also affects | |
| 3238 | - the ``"objectinfo"`` key, which is not present in version 2. This | |
| 3239 | - option may be used with :qpdf:ref:`--json` and also with | |
| 3240 | - :qpdf:ref:`--json-output`. | |
| 3246 | + shown in the objects dictionary in the JSON output. Otherwise, all | |
| 3247 | + objects will be shown. See :ref:`json` for details about the qpdf | |
| 3248 | + JSON format. | |
| 3241 | 3249 | |
| 3242 | 3250 | .. qpdf:option:: --json-stream-data={none|inline|file} |
| 3243 | 3251 | |
| ... | ... | @@ -3281,28 +3289,30 @@ Related Options |
| 3281 | 3289 | |
| 3282 | 3290 | .. qpdf:option:: --json-output[=version] |
| 3283 | 3291 | |
| 3284 | - .. help: serialize to JSON | |
| 3292 | + .. help: apply defaults for JSON serialization | |
| 3285 | 3293 | |
| 3286 | - The output file will be qpdf JSON format at the given version. | |
| 3287 | - "version" may be a specific version or "latest" (the default). | |
| 3288 | - The only supported version is 2. See also --json-stream-data, | |
| 3289 | - --json-stream-prefix, and --decode-level. | |
| 3294 | + Implies --json=version. Changes default values for certain | |
| 3295 | + options so that the JSON output written is the most faithful | |
| 3296 | + representation of the original PDF and contains no additional | |
| 3297 | + JSON keys. See also --json-stream-data, --json-stream-prefix, | |
| 3298 | + and --decode-level. | |
| 3290 | 3299 | |
| 3291 | - The output file, instead of being a PDF file, will be a JSON file | |
| 3292 | - in qpdf JSON format at the given version. ``version`` may be a | |
| 3293 | - specific version or ``latest`` (the default). The only supported | |
| 3294 | - version is 2. See also :qpdf:ref:`--json-stream-data` and | |
| 3295 | - :qpdf:ref:`--json-stream-prefix`. This option also changes the | |
| 3296 | - following defaults: | |
| 3300 | + Implies :qpdf:ref:`--json` at the specified version. This option | |
| 3301 | + changes several default values, all of which can be overridden by | |
| 3302 | + specifying the stated option: | |
| 3297 | 3303 | |
| 3298 | 3304 | - The default value for :qpdf:ref:`--json-stream-data` changes from |
| 3299 | 3305 | ``none`` to ``inline``. |
| 3300 | 3306 | |
| 3301 | - - The default decode level for stream data becomes ``none``, but you can | |
| 3302 | - override it with :qpdf:ref:`--decode-level`. | |
| 3307 | + - The default value for :qpdf:ref:`--decode-level` changes from | |
| 3308 | + ``generalized`` to ``none``. | |
| 3309 | + | |
| 3310 | + - By default, only the ``"qpdf"`` key is included in the JSON | |
| 3311 | + output, but you can add additional keys with | |
| 3312 | + :qpdf:ref:`--json-key`. | |
| 3303 | 3313 | |
| 3304 | - - Only the ``"qpdf"`` key is included in the JSON output, but you | |
| 3305 | - can add additional keys with :qpdf:ref:`--json-key`. | |
| 3314 | + - Excludes the ``"version"`` and ``"parameters"`` keys from the | |
| 3315 | + JSON output. | |
| 3306 | 3316 | |
| 3307 | 3317 | If you want to look at the contents of streams easily as you would |
| 3308 | 3318 | in QDF mode (see :ref:`qdf`), you can use |
| ... | ... | @@ -3313,15 +3323,15 @@ Related Options |
| 3313 | 3323 | |
| 3314 | 3324 | .. help: input file is qpdf JSON |
| 3315 | 3325 | |
| 3316 | - Treat the input file as a JSON file in qpdf JSON format as | |
| 3317 | - written by qpdf --json-output. See the "qpdf JSON Format" | |
| 3318 | - section of the manual for information about how to use this | |
| 3319 | - option. | |
| 3326 | + Treat the input file as a JSON file in qpdf JSON format. See the | |
| 3327 | + "qpdf JSON Format" section of the manual for information about | |
| 3328 | + how to use this option. | |
| 3320 | 3329 | |
| 3321 | - Treat the input file as a JSON file in qpdf JSON format as written | |
| 3322 | - by ``qpdf --json-output``. The input file must be complete and | |
| 3323 | - include all stream data. For information about converting between | |
| 3324 | - PDF and JSON, please see :ref:`json`. | |
| 3330 | + Treat the input file as a JSON file in qpdf JSON format. The input | |
| 3331 | + file must be complete and include all stream data. The JSON version | |
| 3332 | + must be at least 2. All top-level keys are ignored except for | |
| 3333 | + ``"qpdf"``. For information about converting between PDF and JSON, | |
| 3334 | + please see :ref:`json`. | |
| 3325 | 3335 | |
| 3326 | 3336 | .. qpdf:option:: --update-from-json=qpdf-json-file |
| 3327 | 3337 | ... | ... |
manual/json.rst
| ... | ... | @@ -24,27 +24,28 @@ represents the contents of a PDF file. This is distinct from the |
| 24 | 24 | interacting with qpdf the way the command-line tool does. For |
| 25 | 25 | information about that, see :ref:`qpdf-job`. |
| 26 | 26 | |
| 27 | -The qpdf JSON format is specific to qpdf. There are two ways to use | |
| 28 | -qpdf JSON: | |
| 29 | - | |
| 30 | -- The :qpdf:ref:`--json` command-line flag causes creation of a JSON | |
| 31 | - representation of all the objects in a PDF file, excluding stream | |
| 32 | - data. This includes an unambiguous representation of the PDF object | |
| 33 | - structure and also provides JSON-formatted summaries of other | |
| 34 | - information about the file. This functionality is built into | |
| 35 | - ``QPDFJob`` and can be accessed from the ``qpdf`` command-line tool | |
| 36 | - or from the ``QPDFJob`` C or C++ API. | |
| 37 | - | |
| 38 | -- qpdf can create a JSON file that completely represents a PDF file. | |
| 39 | - You can think of this as using JSON as an *alternative syntax* for | |
| 40 | - representing a PDF file. Using qpdf JSON, it is possible to | |
| 41 | - convert a PDF file to JSON, manipulate the structure or contents of | |
| 42 | - the objects at a low level, and convert the results back to a PDF | |
| 43 | - file. This functionality can be accessed from the command-line with | |
| 44 | - the :qpdf:ref:`--json-output`, :qpdf:ref:`--json-input`, and | |
| 45 | - :qpdf:ref:`--update-from-json` flags, or from the API using the | |
| 46 | - ``QPDF::writeJSON``, ``QPDF::createFromJSON``, and | |
| 47 | - ``QPDF::updateFromJSON`` methods. | |
| 27 | +The qpdf JSON format is specific to qpdf. With JSON version 2, the | |
| 28 | +:qpdf:ref:`--json` command-line flag causes creation of a JSON | |
| 29 | +representation of all the objects in a PDF file. This includes an | |
| 30 | +unambiguous representation of the PDF object structure and also | |
| 31 | +provides JSON-formatted summaries of other information about the file. | |
| 32 | +This functionality is built into ``QPDFJob`` and can be accessed from | |
| 33 | +the ``qpdf`` command-line tool or from the ``QPDFJob`` C or C++ API. | |
| 34 | + | |
| 35 | +By default, stream data is omitted, but it can be included by | |
| 36 | +specifying the :qpdf:ref:`--json-stream-data` option. With stream data | |
| 37 | +included, the generated JSON file completely represents a PDF file. | |
| 38 | +You can think of this as using JSON as an *alternative syntax* for | |
| 39 | +representing a PDF file. Using qpdf JSON, it is possible to convert a | |
| 40 | +PDF file to JSON, manipulate the structure or contents of the objects | |
| 41 | +at a low level, and convert the results back to a PDF file. This | |
| 42 | +functionality can be accessed from the command-line with the | |
| 43 | +:qpdf:ref:`--json-input`, and :qpdf:ref:`--update-from-json` flags, or | |
| 44 | +from the API using the ``QPDF::writeJSON``, ``QPDF::createFromJSON``, | |
| 45 | +and ``QPDF::updateFromJSON`` methods. The :qpdf:ref:`--json-output` | |
| 46 | +flag changes a handful of defaults so that the resulting JSON is as | |
| 47 | +close as possible to the original input and is ready for being | |
| 48 | +converted back to PDF. | |
| 48 | 49 | |
| 49 | 50 | .. _json-terminology: |
| 50 | 51 | |
| ... | ... | @@ -120,18 +121,53 @@ qpdf JSON Object Representation |
| 120 | 121 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
| 121 | 122 | |
| 122 | 123 | This section describes the representation of PDF objects in qpdf JSON |
| 123 | -version 2. PDF objects are represented within the ``"objects"`` | |
| 124 | -dictionary of a qpdf JSON file. This is true both for PDF serialized | |
| 125 | -to JSON (:qpdf:ref:`--json-output`, ``QPDF::writeJSON``) or objects as | |
| 126 | -they appear in the output of ``qpdf`` with the :qpdf:ref:`--json` | |
| 127 | -option. | |
| 128 | - | |
| 129 | -Each key in the ``"objects"`` dictionary is either ``"trailer"`` or a | |
| 130 | -string of the form ``"obj:O G R"`` where ``O`` and ``G`` are the | |
| 131 | -object and generation numbers and ``R`` is the literal string ``R``. | |
| 132 | -This is the PDF syntax for the indirect object reference prepended by | |
| 133 | -``obj:``. The value, representing the object itself, is a JSON object | |
| 134 | -whose structure is described below. | |
| 124 | +version 2. PDF objects are represented within the ``"qpdf"`` entry of | |
| 125 | +a qpdf JSON file. The ``"qpdf"`` entry is a two-element array. The | |
| 126 | +first element is a dictionary containing header-like information about | |
| 127 | +the file such as the PDF version. The second element is a dictionary | |
| 128 | +containing all the objects in the PDF file. We refer to this as the | |
| 129 | +*objects dictionary*. | |
| 130 | + | |
| 131 | +The first element contains the following keys: | |
| 132 | + | |
| 133 | +- ``"jsonversion"`` -- a number indicating the JSON version used for | |
| 134 | + writing. This will always be ``2``. | |
| 135 | + | |
| 136 | +- ``"pdfversion"`` -- a string containing PDF version as indicated in | |
| 137 | + the PDF header (e.g. ``"1.7"``, ``"2.0"``) | |
| 138 | + | |
| 139 | +- ``pushedinheritedpageresources`` -- a boolean indicating whether | |
| 140 | + the library pushed inherited resources down to the page level. | |
| 141 | + Certain library calls cause this to happen, and qpdf needs to know | |
| 142 | + when reading a JSON file back in whether it should do this as it may | |
| 143 | + cause certain objects to be renumbered. | |
| 144 | + | |
| 145 | +- ``calledgetallpages`` -- a boolean indicating whether | |
| 146 | + ``getAllPages`` was called prior to writing the JSON output. This | |
| 147 | + method causes page tree repair to occur, which may renumber some | |
| 148 | + objects (in very rare cases of corrupted page trees), so qpdf needs | |
| 149 | + to know this information when reading a JSON file back in. | |
| 150 | + | |
| 151 | +- ``"maxobjectid"`` -- a number indicating the object ID of the | |
| 152 | + highest numbered object in the file. This is provided to make it | |
| 153 | + easier for software that wants to add new objects to the file as you | |
| 154 | + can safely start with one above that number when creating new | |
| 155 | + objects. Note that the value of ``"maxobjectid"`` may be higher than | |
| 156 | + the actual maximum object that appears in the input PDF since it | |
| 157 | + takes into consideration any dangling indirect object references | |
| 158 | + from the original file. This prevents you from unwittingly creating | |
| 159 | + an object that doesn't exist but that is referenced, which may have | |
| 160 | + unintended side effects. (The PDF specification explicitly allows | |
| 161 | + dangling references and says to treat them as nulls. This can happen | |
| 162 | + if objects are removed from a PDF file.) | |
| 163 | + | |
| 164 | +The second element is the objects dictionary. Each key in the objects | |
| 165 | +dictionary is either ``"trailer"`` or a string of the form ``"obj:O G | |
| 166 | +R"`` where ``O`` and ``G`` are the object and generation numbers and | |
| 167 | +``R`` is the literal string ``R``. This is the PDF syntax for the | |
| 168 | +indirect object reference prepended by ``obj:``. The value, | |
| 169 | +representing the object itself, is a JSON object whose structure is | |
| 170 | +described below. | |
| 135 | 171 | |
| 136 | 172 | Top-level Stream Objects |
| 137 | 173 | Stream objects are represented as a JSON object with the single key |
| ... | ... | @@ -143,6 +179,7 @@ Top-level Stream Objects |
| 143 | 179 | |
| 144 | 180 | - ``none``: stream data is not represented; no other keys are |
| 145 | 181 | present |
| 182 | + specified. | |
| 146 | 183 | |
| 147 | 184 | - ``inline``: the stream data appears as a base64-encoded string as |
| 148 | 185 | the value of the ``"data"`` key |
| ... | ... | @@ -249,57 +286,6 @@ Object Values |
| 249 | 286 | the string representations of names and whose values are |
| 250 | 287 | representations of PDF objects. |
| 251 | 288 | |
| 252 | -.. _json.output: | |
| 253 | - | |
| 254 | -qpdf JSON Output | |
| 255 | -~~~~~~~~~~~~~~~~ | |
| 256 | - | |
| 257 | -The format of the JSON written by qpdf's :qpdf:ref:`--json-output` | |
| 258 | -flag or the ``QPDF::writeJSON`` API call is a JSON object consisting | |
| 259 | -of a single key: ``"qpdf"``. This may be the only key, or it may be | |
| 260 | -embedded in the output of ``qpdf --json``. Unknown keys are ignored | |
| 261 | -for future compatibility. It is guaranteed that qpdf will never add | |
| 262 | -any keys whose names start with ``xdata``, so users are free to add | |
| 263 | -their own metadata using keys whose names start with ``xdata`` without | |
| 264 | -fear of clashing with a future version of qpdf. | |
| 265 | - | |
| 266 | -The ``"qpdf"`` key points to a two-element JSON array. The first element is | |
| 267 | -a JSON object with the following keys: | |
| 268 | - | |
| 269 | -- ``"jsonversion"`` -- a number indicating the JSON version used for | |
| 270 | - writing. This will always be ``2``. | |
| 271 | - | |
| 272 | -- ``"pdfversion"`` -- a string containing PDF version as indicated in | |
| 273 | - the PDF header (e.g. ``"1.7"``, ``"2.0"``) | |
| 274 | - | |
| 275 | -- ``pushedinheritedpageresources`` -- a boolean indicating whether | |
| 276 | - the library pushed inherited resources down to the page level. | |
| 277 | - Certain library calls cause this to happen, and qpdf needs to know | |
| 278 | - when reading a JSON file back in whether it should do this as it may | |
| 279 | - cause certain objects to be renumbered. | |
| 280 | - | |
| 281 | -- ``calledgetallpages`` -- a boolean indicating whether | |
| 282 | - ``getAllPages`` was called prior to writing the JSON output. This | |
| 283 | - method causes page tree repair to occur, which may renumber some | |
| 284 | - objects (in very rare cases of corrupted page trees), so qpdf needs | |
| 285 | - to know this information when reading a JSON file back in. | |
| 286 | - | |
| 287 | -- ``"maxobjectid"`` -- a number indicating the object ID of the | |
| 288 | - highest numbered object in the file. This is provided to make it | |
| 289 | - easier for software that wants to add new objects to the file as you | |
| 290 | - can safely start with one above that number when creating new | |
| 291 | - objects. Note that the value of ``"maxobjectid"`` may be higher than | |
| 292 | - the actual maximum object that appears in the input PDF since it | |
| 293 | - takes into consideration any dangling indirect object references | |
| 294 | - from the original file. This prevents you from unwittingly creating | |
| 295 | - an object that doesn't exist but that is referenced, which may have | |
| 296 | - unintended side effects. (The PDF specification explicitly allows | |
| 297 | - dangling references and says to treat them as nulls. This can happen | |
| 298 | - if objects are removed from a PDF file.) | |
| 299 | - | |
| 300 | -The second element is a JSON object containing the actual PDF objects | |
| 301 | -as described in :ref:`json.objects`. | |
| 302 | - | |
| 303 | 289 | Note that writing JSON output is done by ``QPDF``, not ``QPDFWriter``. |
| 304 | 290 | As such, none of the things ``QPDFWriter`` does apply. This includes |
| 305 | 291 | recompression of streams, renumbering of objects, anything to do with |
| ... | ... | @@ -325,7 +311,7 @@ qpdf JSON format. |
| 325 | 311 | "pdfversion": "1.3", |
| 326 | 312 | "pushedinheritedpageresources": false, |
| 327 | 313 | "calledgetallpages": false, |
| 328 | - "maxobjectid": 5, | |
| 314 | + "maxobjectid": 5 | |
| 329 | 315 | }, |
| 330 | 316 | { |
| 331 | 317 | "obj:1 0 R": { |
| ... | ... | @@ -389,8 +375,7 @@ qpdf JSON format. |
| 389 | 375 | qpdf JSON Input |
| 390 | 376 | ~~~~~~~~~~~~~~~ |
| 391 | 377 | |
| 392 | -Output in the JSON output format described in :ref:`json.output` can | |
| 393 | -be used in two different ways: | |
| 378 | +The qpdf JSON output can be used in two different ways: | |
| 394 | 379 | |
| 395 | 380 | - By using the :qpdf:ref:`--json-input` flag or calling |
| 396 | 381 | ``QPDF::createFromJSON`` in place of ``QPDF::processFile``, a qpdf |
| ... | ... | @@ -408,8 +393,11 @@ Here are some important things to know about qpdf JSON input. |
| 408 | 393 | - When a qpdf JSON file is used as the primary input file, it must be |
| 409 | 394 | complete. This means |
| 410 | 395 | |
| 396 | + - A JSON version number must be specified with the ``"jsonversion"`` | |
| 397 | + key in the first array element | |
| 398 | + | |
| 411 | 399 | - A PDF version number must be specified with the ``"pdfversion"`` |
| 412 | - key | |
| 400 | + key in the first array element | |
| 413 | 401 | |
| 414 | 402 | - Stream data must be present for all streams |
| 415 | 403 | |
| ... | ... | @@ -422,6 +410,9 @@ Here are some important things to know about qpdf JSON input. |
| 422 | 410 | - ``"maxobjectid"`` is ignored, so it is not necessary to update it |
| 423 | 411 | when adding new objects. |
| 424 | 412 | |
| 413 | + - ``"calledgetallpages"`` and ``"pushedinheritedpageresources"`` are | |
| 414 | + treated as false if omitted. | |
| 415 | + | |
| 425 | 416 | - ``"/Length"`` is ignored in all stream dictionaries. qpdf doesn't |
| 426 | 417 | put it there when it creates JSON output, and it is not necessary |
| 427 | 418 | to add it. |
| ... | ... | @@ -432,14 +423,13 @@ Here are some important things to know about qpdf JSON input. |
| 432 | 423 | - Unknown keys at the to top level of the file, within ``objects``, |
| 433 | 424 | at the top level of each individual object (inside the object that |
| 434 | 425 | has the ``"value"`` or ``"stream"`` key) and directly within |
| 435 | - ``"stream"`` are ignored for future compatibility. You should | |
| 436 | - avoid putting your own values in those places if you wish to avoid | |
| 437 | - risking that your JSON files will not work in future versions of | |
| 438 | - qpdf. The exception to this advice is at the top level of the | |
| 439 | - overall file where it is explicitly supported for you to add your | |
| 440 | - own keys. For example, you could add your own metadata at the top | |
| 441 | - level, and qpdf will ignore it. Note that extra top-level keys are | |
| 442 | - not preserved when qpdf reads your JSON file. | |
| 426 | + ``"stream"`` are ignored for future compatibility. This includes | |
| 427 | + other top-level keys generated by ``qpdf`` itself (such as | |
| 428 | + ``"pages"``). As such, those keys don't have to be consistent with | |
| 429 | + the ``"qpdf"`` key if modifying a JSON file for conversion back to | |
| 430 | + PDF. If you wish to store application-specific metadata, you can | |
| 431 | + do so by adding a key whose name starts with ``x-``. qpdf is | |
| 432 | + guaranteed not to add any of its own keys that starts with ``x-``. | |
| 443 | 433 | |
| 444 | 434 | - When qpdf reads a PDF file, the internal object numbers are always |
| 445 | 435 | preserved. However, when qpdf writes a file using ``QPDFWriter``, |
| ... | ... | @@ -458,9 +448,9 @@ Here are some important things to know about qpdf JSON input. |
| 458 | 448 | # edit pdf.json |
| 459 | 449 | qpdf in.pdf out.pdf --update-from-json=pdf.json |
| 460 | 450 | |
| 461 | - The following will not produce predictable results because | |
| 462 | - ``out.pdf`` won't have the same object numbers as ``pdf.json`` and | |
| 463 | - ``in.pdf``. | |
| 451 | + The following will produce unpredictable and probably incorrect | |
| 452 | + results because ``out.pdf`` won't have the same object numbers as | |
| 453 | + ``pdf.json`` and ``in.pdf``. | |
| 464 | 454 | |
| 465 | 455 | :: |
| 466 | 456 | |
| ... | ... | @@ -658,15 +648,16 @@ be aware of: |
| 658 | 648 | - If a PDF file has certain types of errors in its pages tree (such as |
| 659 | 649 | page objects that are direct or multiple pages sharing the same |
| 660 | 650 | object ID), qpdf will automatically repair the pages tree. If you |
| 661 | - specify ``"objects"`` (and, with qpdf JSON version 1, also | |
| 651 | + specify ``"qpdf"`` (or, with qpdf JSON version 1, ``"objects"`` or | |
| 662 | 652 | ``"objectinfo"``) without any other keys, you will see the original |
| 663 | 653 | pages tree without any corrections. If you specify any of keys that |
| 664 | 654 | require page tree traversal (for example, ``"pages"``, |
| 665 | - ``"outlines"``, or ``"pagelabel"``), then ``"objects"`` (and | |
| 666 | - ``"objectinfo"``) will show the repaired page tree so that object | |
| 667 | - references will be consistent throughout the file. This is not an | |
| 668 | - issue with :qpdf:ref:`--json-output`, which doesn't repair the pages | |
| 669 | - tree. | |
| 655 | + ``"outlines"``, or ``"pagelabel"``), then ``"qpdf"`` (and | |
| 656 | + ``"objects"`` and ``"objectinfo"``) will show the repaired page | |
| 657 | + tree so that object references will be consistent throughout the | |
| 658 | + file. You can tell if this has happened by looking at the | |
| 659 | + ``"calledgetallpages"`` and ``"pushedinheritedpageresources"`` | |
| 660 | + fields in the first element of the ``"qpdf"`` array. | |
| 670 | 661 | |
| 671 | 662 | - While qpdf guarantees that keys present in the help will be present |
| 672 | 663 | in the output, those fields may be null or empty if the information |
| ... | ... | @@ -743,16 +734,17 @@ version 2. |
| 743 | 734 | dictionary containing either a ``"value"`` key or a ``"stream"`` |
| 744 | 735 | key, making it possible to distinguish streams from other objects. |
| 745 | 736 | |
| 746 | -- The ``"objectinfo"`` key has been removed in favor of a | |
| 747 | - representation in ``"objects"`` that differentiates between a stream | |
| 748 | - and other kinds of objects. In v1, it was not possible to tell a | |
| 749 | - stream from a dictionary within ``"objects"``. | |
| 750 | - | |
| 751 | -- Within the ``"objects"`` dictionary, keys are now ``"obj:O G R"`` | |
| 752 | - where ``O`` and ``G`` are the object and generation number. | |
| 753 | - ``"trailer"`` remains the key for the trailer dictionary. In v1, the | |
| 754 | - ``obj:`` prefix was not present. The rationale for this change is as | |
| 755 | - follows: | |
| 737 | +- The ``"objectinfo"`` and ``"objects"`` keys have been removed in | |
| 738 | + favor of a representation in ``"qpdf"`` that includes header | |
| 739 | + information and differentiates between a stream and other kinds of | |
| 740 | + objects. In v1, it was not possible to tell a stream from a | |
| 741 | + dictionary within ``"objects"``, and the PDF version was not | |
| 742 | + captured at all. | |
| 743 | + | |
| 744 | +- Within the objects dictionary, keys are now ``"obj:O G R"`` where | |
| 745 | + ``O`` and ``G`` are the object and generation number. ``"trailer"`` | |
| 746 | + remains the key for the trailer dictionary. In v1, the ``obj:`` | |
| 747 | + prefix was not present. The rationale for this change is as follows: | |
| 756 | 748 | |
| 757 | 749 | - Having a unique prefix (``obj:``) makes it much easier to search |
| 758 | 750 | in the JSON file for the definition of an object | ... | ... |