Commit 5f4224f31a500452a4f97f36ed57351b41ca0114
1 parent
80acfc38
Simplify --json-output
Now --json-output just changes defaults. Allow output file with --json.
Showing
10 changed files
with
266 additions
and
302 deletions
TODO
| @@ -69,46 +69,11 @@ Soon: Break ground on "Document-level work" | @@ -69,46 +69,11 @@ Soon: Break ground on "Document-level work" | ||
| 69 | JSON v2 fixes | 69 | JSON v2 fixes |
| 70 | ============= | 70 | ============= |
| 71 | 71 | ||
| 72 | -* Unify code between QPDFJob::doJSONObjects and QPDF::writeJSON. Make | ||
| 73 | - sure that the "qpdf" key is always present when json-output is | ||
| 74 | - specified. | ||
| 75 | - | ||
| 76 | -* Change the name of the "qpdf-v2" key to "qpdf". Use that in place of | ||
| 77 | - "objects" and change its content to a two-element array whose first | ||
| 78 | - element is metadata required (or useful) for parsing and whose | ||
| 79 | - second element contains the actual data. Use of an array is the only | ||
| 80 | - way to ensure that the metadata is guaranteed to be parsed before we | ||
| 81 | - start parsing the objects. Example: | ||
| 82 | - | ||
| 83 | - { | ||
| 84 | - "qpdf": [ | ||
| 85 | - { | ||
| 86 | - "jsonversion": 2, | ||
| 87 | - "pdfversion": "1.3", | ||
| 88 | - "pushedinheritedpageresources": false, | ||
| 89 | - "calledgetallpages": false, | ||
| 90 | - "maxobjectid": 10 | ||
| 91 | - }, | ||
| 92 | - { | ||
| 93 | - ... objects ... | ||
| 94 | - } | ||
| 95 | - ] | ||
| 96 | - } | ||
| 97 | - | ||
| 98 | - This implies a few things: | ||
| 99 | - | ||
| 100 | - * Still need to test pushedinheritedpageresources and | ||
| 101 | - calledgetallpages and check/use their values when reading | ||
| 102 | - | ||
| 103 | - * Fix --json-help | ||
| 104 | - | ||
| 105 | - * When reading back in, we'll have to call | ||
| 106 | - pushInheritedAttributesToPage or getAllPages based on the values | ||
| 107 | - of the metadata. | ||
| 108 | - | ||
| 109 | - * Test --json with --json-stream-data and --json-output with | ||
| 110 | - --json-stream-data=none. Recheck writeJSON's handling of the | ||
| 111 | - pipeline argument. | 72 | +* Rethink QPDF::writeJSON. Maybe provide a simpler overload? |
| 73 | + | ||
| 74 | +* When reading back in, we'll have to call | ||
| 75 | + pushInheritedAttributesToPage or getAllPages based on the values | ||
| 76 | + of the metadata. | ||
| 112 | 77 | ||
| 113 | * Support json v2 in the C API. At a minimum, write_json, | 78 | * Support json v2 in the C API. At a minimum, write_json, |
| 114 | create_from_json, and update_from_json need to be there and should | 79 | create_from_json, and update_from_json need to be there and should |
include/qpdf/QPDF.hh
| @@ -133,7 +133,7 @@ class QPDF | @@ -133,7 +133,7 @@ class QPDF | ||
| 133 | QPDF_DLL | 133 | QPDF_DLL |
| 134 | void updateFromJSON(std::shared_ptr<InputSource>); | 134 | void updateFromJSON(std::shared_ptr<InputSource>); |
| 135 | 135 | ||
| 136 | - // Write qpdf json format to the pipeline "p". The only supported | 136 | + // Write qpdf JSON format to the pipeline "p". The only supported |
| 137 | // version is 2. | 137 | // version is 2. |
| 138 | // | 138 | // |
| 139 | // If the value of "complete" is true, a complete JSON object | 139 | // If the value of "complete" is true, a complete JSON object |
include/qpdf/QPDFJob.hh
| @@ -554,7 +554,7 @@ class QPDFJob | @@ -554,7 +554,7 @@ class QPDFJob | ||
| 554 | void setEncryptionOptions(QPDF&, QPDFWriter&); | 554 | void setEncryptionOptions(QPDF&, QPDFWriter&); |
| 555 | void maybeFixWritePassword(int R, std::string& password); | 555 | void maybeFixWritePassword(int R, std::string& password); |
| 556 | void writeOutfile(QPDF& pdf); | 556 | void writeOutfile(QPDF& pdf); |
| 557 | - void writeJSON(Pipeline* p, QPDF& pdf, bool complete, bool& first_key); | 557 | + void writeJSON(QPDF& pdf); |
| 558 | 558 | ||
| 559 | // JSON | 559 | // JSON |
| 560 | void doJSON(QPDF& pdf, Pipeline*); | 560 | void doJSON(QPDF& pdf, Pipeline*); |
job.sums
| @@ -8,10 +8,10 @@ include/qpdf/auto_job_c_pages.hh b3cc0f21029f6d89efa043dcdbfa183cb59325b6506001c | @@ -8,10 +8,10 @@ include/qpdf/auto_job_c_pages.hh b3cc0f21029f6d89efa043dcdbfa183cb59325b6506001c | ||
| 8 | include/qpdf/auto_job_c_uo.hh ae21b69a1efa9333050f4833d465f6daff87e5b38e5106e49bbef5d4132e4ed1 | 8 | include/qpdf/auto_job_c_uo.hh ae21b69a1efa9333050f4833d465f6daff87e5b38e5106e49bbef5d4132e4ed1 |
| 9 | job.yml f9564f18b08a45d17328af43652645771d3498471820c858b8c9013a193e1412 | 9 | job.yml f9564f18b08a45d17328af43652645771d3498471820c858b8c9013a193e1412 |
| 10 | libqpdf/qpdf/auto_job_decl.hh 7844eba58edffb9494b19e8eca6fd59a24d6e152ca606c3b07da569f753df2da | 10 | libqpdf/qpdf/auto_job_decl.hh 7844eba58edffb9494b19e8eca6fd59a24d6e152ca606c3b07da569f753df2da |
| 11 | -libqpdf/qpdf/auto_job_help.hh db2e4350c700e064b204e3e20d4fee4eddfe312b28092afcf608b4b6863d30e5 | 11 | +libqpdf/qpdf/auto_job_help.hh 700d7600b34588169c80f3e325e39e592e2f5c1af1cdac16614150ff38424b40 |
| 12 | libqpdf/qpdf/auto_job_init.hh fd1635a5ad6ba16b7ae008467145560a59a5ecfd10d29c5ef7cd0d8347747cd2 | 12 | libqpdf/qpdf/auto_job_init.hh fd1635a5ad6ba16b7ae008467145560a59a5ecfd10d29c5ef7cd0d8347747cd2 |
| 13 | libqpdf/qpdf/auto_job_json_decl.hh 06caa46eaf71db8a50c046f91866baa8087745a9474319fb7c86d92634cc8297 | 13 | libqpdf/qpdf/auto_job_json_decl.hh 06caa46eaf71db8a50c046f91866baa8087745a9474319fb7c86d92634cc8297 |
| 14 | libqpdf/qpdf/auto_job_json_init.hh 59545578a2e47c660ff98516ed53f06638be75eb4658e2a09d32cc08e0cb7268 | 14 | libqpdf/qpdf/auto_job_json_init.hh 59545578a2e47c660ff98516ed53f06638be75eb4658e2a09d32cc08e0cb7268 |
| 15 | -libqpdf/qpdf/auto_job_schema.hh 9d543cd4a43eafffc2c4b8a6fee29e399c271c52cb6f7d417ae5497b3c1127dc | 15 | +libqpdf/qpdf/auto_job_schema.hh 5352ef1be1ad7cc6f4f36dab88f2937d278e6bd3a0e2d46259794dc226c8ba6b |
| 16 | manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580 | 16 | manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580 |
| 17 | -manual/cli.rst 8e1f443c6fa000e023e516c318df4d04d58233d4d8648907c4a71f0ea5722bca | 17 | +manual/cli.rst bbce4cfb662a96c8df0c8563f8065844b77aca7b4ec6385955546b9a455d9953 |
libqpdf/QPDFJob.cc
| @@ -680,8 +680,15 @@ QPDFJob::checkConfiguration() | @@ -680,8 +680,15 @@ QPDFJob::checkConfiguration() | ||
| 680 | " an output file is specified"); | 680 | " an output file is specified"); |
| 681 | } else if (m->split_pages) { | 681 | } else if (m->split_pages) { |
| 682 | usage("--split-pages may not be used with --replace-input"); | 682 | usage("--split-pages may not be used with --replace-input"); |
| 683 | + } else if (m->json_version) { | ||
| 684 | + usage("--json may not be used with --replace-input"); | ||
| 683 | } | 685 | } |
| 684 | } | 686 | } |
| 687 | + if (m->json_version && (m->outfilename == nullptr)) { | ||
| 688 | + // The output file is optional with --json for backward | ||
| 689 | + // compatibility and defaults to standard output. | ||
| 690 | + m->outfilename = QUtil::make_shared_cstr("-"); | ||
| 691 | + } | ||
| 685 | if (m->infilename == nullptr) { | 692 | if (m->infilename == nullptr) { |
| 686 | usage("an input file name is required"); | 693 | usage("an input file name is required"); |
| 687 | } else if ( | 694 | } else if ( |
| @@ -1116,25 +1123,47 @@ QPDFJob::doJSONObject( | @@ -1116,25 +1123,47 @@ QPDFJob::doJSONObject( | ||
| 1116 | void | 1123 | void |
| 1117 | QPDFJob::doJSONObjects(Pipeline* p, bool& first, QPDF& pdf) | 1124 | QPDFJob::doJSONObjects(Pipeline* p, bool& first, QPDF& pdf) |
| 1118 | { | 1125 | { |
| 1119 | - JSON::writeDictionaryKey(p, first, "objects", 1); | ||
| 1120 | - bool first_object = true; | ||
| 1121 | - JSON::writeDictionaryOpen(p, first_object, 1); | ||
| 1122 | - bool all_objects = m->json_objects.empty(); | ||
| 1123 | - std::set<QPDFObjGen> wanted_og = getWantedJSONObjects(); | ||
| 1124 | - for (auto& obj: pdf.getAllObjects()) { | ||
| 1125 | - std::string key = obj.unparse(); | ||
| 1126 | - if (this->m->json_version > 1) { | ||
| 1127 | - key = "obj:" + key; | 1126 | + if (m->json_version == 1) { |
| 1127 | + JSON::writeDictionaryKey(p, first, "objects", 1); | ||
| 1128 | + bool first_object = true; | ||
| 1129 | + JSON::writeDictionaryOpen(p, first_object, 1); | ||
| 1130 | + bool all_objects = m->json_objects.empty(); | ||
| 1131 | + std::set<QPDFObjGen> wanted_og = getWantedJSONObjects(); | ||
| 1132 | + for (auto& obj: pdf.getAllObjects()) { | ||
| 1133 | + std::string key = obj.unparse(); | ||
| 1134 | + if (this->m->json_version > 1) { | ||
| 1135 | + key = "obj:" + key; | ||
| 1136 | + } | ||
| 1137 | + if (all_objects || wanted_og.count(obj.getObjGen())) { | ||
| 1138 | + doJSONObject(p, first_object, key, obj); | ||
| 1139 | + } | ||
| 1128 | } | 1140 | } |
| 1129 | - if (all_objects || wanted_og.count(obj.getObjGen())) { | ||
| 1130 | - doJSONObject(p, first_object, key, obj); | 1141 | + if (all_objects || m->json_objects.count("trailer")) { |
| 1142 | + auto trailer = pdf.getTrailer(); | ||
| 1143 | + doJSONObject(p, first_object, "trailer", trailer); | ||
| 1131 | } | 1144 | } |
| 1145 | + JSON::writeDictionaryClose(p, first_object, 1); | ||
| 1146 | + } else { | ||
| 1147 | + std::set<std::string> json_objects; | ||
| 1148 | + if (this->m->json_objects.count("trailer")) { | ||
| 1149 | + json_objects.insert("trailer"); | ||
| 1150 | + } | ||
| 1151 | + auto wanted = getWantedJSONObjects(); | ||
| 1152 | + for (auto const& og: wanted) { | ||
| 1153 | + std::ostringstream s; | ||
| 1154 | + s << "obj:" << og.unparse(' ') << " R"; | ||
| 1155 | + json_objects.insert(s.str()); | ||
| 1156 | + } | ||
| 1157 | + pdf.writeJSON( | ||
| 1158 | + this->m->json_version, | ||
| 1159 | + p, | ||
| 1160 | + false, | ||
| 1161 | + first, | ||
| 1162 | + this->m->decode_level, | ||
| 1163 | + this->m->json_stream_data, | ||
| 1164 | + this->m->json_stream_prefix, | ||
| 1165 | + json_objects); | ||
| 1132 | } | 1166 | } |
| 1133 | - if (all_objects || m->json_objects.count("trailer")) { | ||
| 1134 | - auto trailer = pdf.getTrailer(); | ||
| 1135 | - doJSONObject(p, first_object, "trailer", trailer); | ||
| 1136 | - } | ||
| 1137 | - JSON::writeDictionaryClose(p, first_object, 1); | ||
| 1138 | } | 1167 | } |
| 1139 | 1168 | ||
| 1140 | void | 1169 | void |
| @@ -1777,7 +1806,7 @@ void | @@ -1777,7 +1806,7 @@ void | ||
| 1777 | QPDFJob::doJSON(QPDF& pdf, Pipeline* p) | 1806 | QPDFJob::doJSON(QPDF& pdf, Pipeline* p) |
| 1778 | { | 1807 | { |
| 1779 | // qpdf guarantees that no new top-level keys whose names start | 1808 | // qpdf guarantees that no new top-level keys whose names start |
| 1780 | - // with "xdata" will be added. These are reserved for users. | 1809 | + // with "x-" will be added. These are reserved for users. |
| 1781 | 1810 | ||
| 1782 | std::string captured_json; | 1811 | std::string captured_json; |
| 1783 | std::shared_ptr<Pl_String> pl_str; | 1812 | std::shared_ptr<Pl_String> pl_str; |
| @@ -1788,32 +1817,38 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) | @@ -1788,32 +1817,38 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) | ||
| 1788 | 1817 | ||
| 1789 | bool first = true; | 1818 | bool first = true; |
| 1790 | JSON::writeDictionaryOpen(p, first, 0); | 1819 | JSON::writeDictionaryOpen(p, first, 0); |
| 1791 | - // This version is updated every time a non-backward-compatible | ||
| 1792 | - // change is made to the JSON format. Clients of the JSON are to | ||
| 1793 | - // ignore unrecognized keys, so we only update the version of a | ||
| 1794 | - // key disappears or if its value changes meaning. | ||
| 1795 | - JSON::writeDictionaryItem( | ||
| 1796 | - p, first, "version", JSON::makeInt(this->m->json_version), 1); | ||
| 1797 | - JSON j_params = JSON::makeDictionary(); | ||
| 1798 | - std::string decode_level_str; | ||
| 1799 | - switch (m->decode_level) { | ||
| 1800 | - case qpdf_dl_none: | ||
| 1801 | - decode_level_str = "none"; | ||
| 1802 | - break; | ||
| 1803 | - case qpdf_dl_generalized: | ||
| 1804 | - decode_level_str = "generalized"; | ||
| 1805 | - break; | ||
| 1806 | - case qpdf_dl_specialized: | ||
| 1807 | - decode_level_str = "specialized"; | ||
| 1808 | - break; | ||
| 1809 | - case qpdf_dl_all: | ||
| 1810 | - decode_level_str = "all"; | ||
| 1811 | - break; | ||
| 1812 | - } | ||
| 1813 | - j_params.addDictionaryMember( | ||
| 1814 | - "decodelevel", JSON::makeString(decode_level_str)); | ||
| 1815 | - JSON::writeDictionaryItem(p, first, "parameters", j_params, 1); | ||
| 1816 | 1820 | ||
| 1821 | + if (m->json_output) { | ||
| 1822 | + // Exclude version and parameters to keep the output file | ||
| 1823 | + // minimal. The JSON version is inside the "qpdf" key for | ||
| 1824 | + // version 2. | ||
| 1825 | + } else { | ||
| 1826 | + // This version is updated every time a non-backward-compatible | ||
| 1827 | + // change is made to the JSON format. Clients of the JSON are to | ||
| 1828 | + // ignore unrecognized keys, so we only update the version of a | ||
| 1829 | + // key disappears or if its value changes meaning. | ||
| 1830 | + JSON::writeDictionaryItem( | ||
| 1831 | + p, first, "version", JSON::makeInt(this->m->json_version), 1); | ||
| 1832 | + JSON j_params = JSON::makeDictionary(); | ||
| 1833 | + std::string decode_level_str; | ||
| 1834 | + switch (m->decode_level) { | ||
| 1835 | + case qpdf_dl_none: | ||
| 1836 | + decode_level_str = "none"; | ||
| 1837 | + break; | ||
| 1838 | + case qpdf_dl_generalized: | ||
| 1839 | + decode_level_str = "generalized"; | ||
| 1840 | + break; | ||
| 1841 | + case qpdf_dl_specialized: | ||
| 1842 | + decode_level_str = "specialized"; | ||
| 1843 | + break; | ||
| 1844 | + case qpdf_dl_all: | ||
| 1845 | + decode_level_str = "all"; | ||
| 1846 | + break; | ||
| 1847 | + } | ||
| 1848 | + j_params.addDictionaryMember( | ||
| 1849 | + "decodelevel", JSON::makeString(decode_level_str)); | ||
| 1850 | + JSON::writeDictionaryItem(p, first, "parameters", j_params, 1); | ||
| 1851 | + } | ||
| 1817 | bool all_keys = m->json_keys.empty(); | 1852 | bool all_keys = m->json_keys.empty(); |
| 1818 | // The list of selectable top-level keys id duplicated in the | 1853 | // The list of selectable top-level keys id duplicated in the |
| 1819 | // following places: job.yml, QPDFJob::json_schema, and | 1854 | // following places: job.yml, QPDFJob::json_schema, and |
| @@ -1850,11 +1885,7 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) | @@ -1850,11 +1885,7 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) | ||
| 1850 | // qpdf/objects/objectinfo without other keys. | 1885 | // qpdf/objects/objectinfo without other keys. |
| 1851 | if (all_keys || m->json_keys.count("objects") || | 1886 | if (all_keys || m->json_keys.count("objects") || |
| 1852 | m->json_keys.count("qpdf")) { | 1887 | m->json_keys.count("qpdf")) { |
| 1853 | - if (this->m->json_version == 1) { | ||
| 1854 | - doJSONObjects(p, first, pdf); | ||
| 1855 | - } else { | ||
| 1856 | - writeJSON(p, pdf, false, first); | ||
| 1857 | - } | 1888 | + doJSONObjects(p, first, pdf); |
| 1858 | } | 1889 | } |
| 1859 | if (this->m->json_version == 1) { | 1890 | if (this->m->json_version == 1) { |
| 1860 | // "objectinfo" is not needed for version >1 since you can | 1891 | // "objectinfo" is not needed for version >1 since you can |
| @@ -1889,9 +1920,6 @@ QPDFJob::doInspection(QPDF& pdf) | @@ -1889,9 +1920,6 @@ QPDFJob::doInspection(QPDF& pdf) | ||
| 1889 | if (m->check) { | 1920 | if (m->check) { |
| 1890 | doCheck(pdf); | 1921 | doCheck(pdf); |
| 1891 | } | 1922 | } |
| 1892 | - if (m->json_version) { | ||
| 1893 | - doJSON(pdf, &cout); | ||
| 1894 | - } | ||
| 1895 | if (m->show_npages) { | 1923 | if (m->show_npages) { |
| 1896 | QTC::TC("qpdf", "QPDFJob npages"); | 1924 | QTC::TC("qpdf", "QPDFJob npages"); |
| 1897 | cout << pdf.getRoot().getKey("/Pages").getKey("/Count").getIntValue() | 1925 | cout << pdf.getRoot().getKey("/Pages").getKey("/Count").getIntValue() |
| @@ -3337,9 +3365,8 @@ QPDFJob::writeOutfile(QPDF& pdf) | @@ -3337,9 +3365,8 @@ QPDFJob::writeOutfile(QPDF& pdf) | ||
| 3337 | } else if (strcmp(m->outfilename.get(), "-") == 0) { | 3365 | } else if (strcmp(m->outfilename.get(), "-") == 0) { |
| 3338 | m->outfilename = nullptr; | 3366 | m->outfilename = nullptr; |
| 3339 | } | 3367 | } |
| 3340 | - if (this->m->json_output) { | ||
| 3341 | - bool unused = true; | ||
| 3342 | - writeJSON(nullptr, pdf, true, unused); | 3368 | + if (this->m->json_version) { |
| 3369 | + writeJSON(pdf); | ||
| 3343 | } else { | 3370 | } else { |
| 3344 | // QPDFWriter must have block scope so the output file will be | 3371 | // QPDFWriter must have block scope so the output file will be |
| 3345 | // closed after write() finishes. | 3372 | // closed after write() finishes. |
| @@ -3393,52 +3420,30 @@ QPDFJob::writeOutfile(QPDF& pdf) | @@ -3393,52 +3420,30 @@ QPDFJob::writeOutfile(QPDF& pdf) | ||
| 3393 | } | 3420 | } |
| 3394 | 3421 | ||
| 3395 | void | 3422 | void |
| 3396 | -QPDFJob::writeJSON(Pipeline* p, QPDF& pdf, bool complete, bool& first_key) | 3423 | +QPDFJob::writeJSON(QPDF& pdf) |
| 3397 | { | 3424 | { |
| 3398 | // File pipeline must have block scope so it will be closed | 3425 | // File pipeline must have block scope so it will be closed |
| 3399 | // after write. | 3426 | // after write. |
| 3400 | std::shared_ptr<QUtil::FileCloser> fc; | 3427 | std::shared_ptr<QUtil::FileCloser> fc; |
| 3401 | std::shared_ptr<Pipeline> fp; | 3428 | std::shared_ptr<Pipeline> fp; |
| 3402 | - std::string file_prefix = this->m->json_stream_prefix; | ||
| 3403 | if (m->outfilename.get()) { | 3429 | if (m->outfilename.get()) { |
| 3404 | QTC::TC("qpdf", "QPDFJob write json to file"); | 3430 | QTC::TC("qpdf", "QPDFJob write json to file"); |
| 3405 | - if (file_prefix.empty()) { | ||
| 3406 | - file_prefix = this->m->outfilename.get(); | 3431 | + if (this->m->json_stream_prefix.empty()) { |
| 3432 | + this->m->json_stream_prefix = this->m->outfilename.get(); | ||
| 3407 | } | 3433 | } |
| 3408 | fc = std::make_shared<QUtil::FileCloser>( | 3434 | fc = std::make_shared<QUtil::FileCloser>( |
| 3409 | QUtil::safe_fopen(this->m->outfilename.get(), "w")); | 3435 | QUtil::safe_fopen(this->m->outfilename.get(), "w")); |
| 3410 | fp = std::make_shared<Pl_StdioFile>("json output", fc->f); | 3436 | fp = std::make_shared<Pl_StdioFile>("json output", fc->f); |
| 3411 | } else if ( | 3437 | } else if ( |
| 3412 | - (this->m->json_stream_data == qpdf_sj_file) && file_prefix.empty()) { | 3438 | + (this->m->json_stream_data == qpdf_sj_file) && |
| 3439 | + this->m->json_stream_prefix.empty()) { | ||
| 3413 | QTC::TC("qpdf", "QPDFJob need json-stream-prefix for stdout"); | 3440 | QTC::TC("qpdf", "QPDFJob need json-stream-prefix for stdout"); |
| 3414 | usage("please specify --json-stream-prefix since the input file " | 3441 | usage("please specify --json-stream-prefix since the input file " |
| 3415 | "name is unknown"); | 3442 | "name is unknown"); |
| 3416 | } else { | 3443 | } else { |
| 3417 | QTC::TC("qpdf", "QPDFJob write json to stdout"); | 3444 | QTC::TC("qpdf", "QPDFJob write json to stdout"); |
| 3418 | - if (p == nullptr) { | ||
| 3419 | - fp = this->m->log->getInfo(); | ||
| 3420 | - } | ||
| 3421 | - } | ||
| 3422 | - if (p == nullptr) { | ||
| 3423 | - p = fp.get(); | ||
| 3424 | - } | ||
| 3425 | - std::set<std::string> json_objects; | ||
| 3426 | - if (this->m->json_objects.count("trailer")) { | ||
| 3427 | - json_objects.insert("trailer"); | ||
| 3428 | - } | ||
| 3429 | - auto wanted = getWantedJSONObjects(); | ||
| 3430 | - for (auto const& og: wanted) { | ||
| 3431 | - std::ostringstream s; | ||
| 3432 | - s << "obj:" << og.unparse(' ') << " R"; | ||
| 3433 | - json_objects.insert(s.str()); | ||
| 3434 | - } | ||
| 3435 | - pdf.writeJSON( | ||
| 3436 | - this->m->json_version, | ||
| 3437 | - p, | ||
| 3438 | - complete, | ||
| 3439 | - first_key, | ||
| 3440 | - this->m->decode_level, | ||
| 3441 | - this->m->json_stream_data, | ||
| 3442 | - file_prefix, | ||
| 3443 | - json_objects); | 3445 | + this->m->log->saveToStandardOutput(true); |
| 3446 | + fp = this->m->log->getSave(); | ||
| 3447 | + } | ||
| 3448 | + doJSON(pdf, fp.get()); | ||
| 3444 | } | 3449 | } |
libqpdf/QPDFJob_config.cc
| @@ -244,7 +244,6 @@ QPDFJob::Config::json(std::string const& parameter) | @@ -244,7 +244,6 @@ QPDFJob::Config::json(std::string const& parameter) | ||
| 244 | if ((o.m->json_version < 1) || (o.m->json_version > JSON::LATEST)) { | 244 | if ((o.m->json_version < 1) || (o.m->json_version > JSON::LATEST)) { |
| 245 | usage(std::string("unsupported json version ") + parameter); | 245 | usage(std::string("unsupported json version ") + parameter); |
| 246 | } | 246 | } |
| 247 | - o.m->require_outfile = false; | ||
| 248 | return this; | 247 | return this; |
| 249 | } | 248 | } |
| 250 | 249 | ||
| @@ -297,14 +296,7 @@ QPDFJob::Config* | @@ -297,14 +296,7 @@ QPDFJob::Config* | ||
| 297 | QPDFJob::Config::jsonOutput(std::string const& parameter) | 296 | QPDFJob::Config::jsonOutput(std::string const& parameter) |
| 298 | { | 297 | { |
| 299 | o.m->json_output = true; | 298 | o.m->json_output = true; |
| 300 | - if (parameter.empty() || (parameter == "latest")) { | ||
| 301 | - o.m->json_version = JSON::LATEST; | ||
| 302 | - } else { | ||
| 303 | - o.m->json_version = QUtil::string_to_int(parameter.c_str()); | ||
| 304 | - } | ||
| 305 | - if ((o.m->json_version < 2) || (o.m->json_version > JSON::LATEST)) { | ||
| 306 | - usage(std::string("unsupported json output version ") + parameter); | ||
| 307 | - } | 299 | + json(parameter); |
| 308 | if (!o.m->json_stream_data_set) { | 300 | if (!o.m->json_stream_data_set) { |
| 309 | // No need to set json_stream_data_set -- that indicates | 301 | // No need to set json_stream_data_set -- that indicates |
| 310 | // explicit use of --json-stream-data. | 302 | // explicit use of --json-stream-data. |
| @@ -313,9 +305,7 @@ QPDFJob::Config::jsonOutput(std::string const& parameter) | @@ -313,9 +305,7 @@ QPDFJob::Config::jsonOutput(std::string const& parameter) | ||
| 313 | if (!o.m->decode_level_set) { | 305 | if (!o.m->decode_level_set) { |
| 314 | o.m->decode_level = qpdf_dl_none; | 306 | o.m->decode_level = qpdf_dl_none; |
| 315 | } | 307 | } |
| 316 | - if (o.m->json_keys.empty()) { | ||
| 317 | - o.m->json_keys.insert("qpdf"); | ||
| 318 | - } | 308 | + o.m->json_keys.insert("qpdf"); |
| 319 | return this; | 309 | return this; |
| 320 | } | 310 | } |
| 321 | 311 |
libqpdf/qpdf/auto_job_help.hh
| @@ -803,7 +803,9 @@ depth in the JSON section of the manual. "version" may be a | @@ -803,7 +803,9 @@ depth in the JSON section of the manual. "version" may be a | ||
| 803 | specific version or "latest" (the default). Run qpdf --json-help | 803 | specific version or "latest" (the default). Run qpdf --json-help |
| 804 | for a description of the generated JSON object. | 804 | for a description of the generated JSON object. |
| 805 | )"); | 805 | )"); |
| 806 | -ap.addOptionHelp("--json-help", "json", "show format of JSON output", R"(Describe the format of the JSON output by writing to standard | 806 | +ap.addOptionHelp("--json-help", "json", "show format of JSON output", R"(--json-help[=version] |
| 807 | + | ||
| 808 | +Describe the format of the JSON output by writing to standard | ||
| 807 | output a JSON object with the same keys and with values | 809 | output a JSON object with the same keys and with values |
| 808 | containing descriptive text. | 810 | containing descriptive text. |
| 809 | )"); | 811 | )"); |
| @@ -838,17 +840,17 @@ which is to use the output file name. Whatever is given here | @@ -838,17 +840,17 @@ which is to use the output file name. Whatever is given here | ||
| 838 | will be appended with -nnn to create the name of the file that | 840 | will be appended with -nnn to create the name of the file that |
| 839 | will contain the data for the stream stream in object nnn. | 841 | will contain the data for the stream stream in object nnn. |
| 840 | )"); | 842 | )"); |
| 841 | -ap.addOptionHelp("--json-output", "json", "serialize to JSON", R"(--json-output[=version] | 843 | +ap.addOptionHelp("--json-output", "json", "apply defaults for JSON serialization", R"(--json-output[=version] |
| 842 | 844 | ||
| 843 | -The output file will be qpdf JSON format at the given version. | ||
| 844 | -"version" may be a specific version or "latest" (the default). | ||
| 845 | -The only supported version is 2. See also --json-stream-data, | ||
| 846 | ---json-stream-prefix, and --decode-level. | 845 | +Implies --json=version. Changes default values for certain |
| 846 | +options so that the JSON output written is the most faithful | ||
| 847 | +representation of the original PDF and contains no additional | ||
| 848 | +JSON keys. See also --json-stream-data, --json-stream-prefix, | ||
| 849 | +and --decode-level. | ||
| 847 | )"); | 850 | )"); |
| 848 | -ap.addOptionHelp("--json-input", "json", "input file is qpdf JSON", R"(Treat the input file as a JSON file in qpdf JSON format as | ||
| 849 | -written by qpdf --json-output. See the "qpdf JSON Format" | ||
| 850 | -section of the manual for information about how to use this | ||
| 851 | -option. | 851 | +ap.addOptionHelp("--json-input", "json", "input file is qpdf JSON", R"(Treat the input file as a JSON file in qpdf JSON format. See the |
| 852 | +"qpdf JSON Format" section of the manual for information about | ||
| 853 | +how to use this option. | ||
| 852 | )"); | 854 | )"); |
| 853 | ap.addOptionHelp("--update-from-json", "json", "update a PDF from qpdf JSON", R"(--update-from-json=qpdf-json-file | 855 | ap.addOptionHelp("--update-from-json", "json", "update a PDF from qpdf JSON", R"(--update-from-json=qpdf-json-file |
| 854 | 856 |
libqpdf/qpdf/auto_job_schema.hh
| @@ -28,7 +28,7 @@ static constexpr char const* JOB_SCHEMA_DATA = R"({ | @@ -28,7 +28,7 @@ static constexpr char const* JOB_SCHEMA_DATA = R"({ | ||
| 28 | "forceVersion": "set output PDF version", | 28 | "forceVersion": "set output PDF version", |
| 29 | "progress": "show progress when writing", | 29 | "progress": "show progress when writing", |
| 30 | "splitPages": "write pages to separate files", | 30 | "splitPages": "write pages to separate files", |
| 31 | - "jsonOutput": "serialize to JSON", | 31 | + "jsonOutput": "apply defaults for JSON serialization", |
| 32 | "encrypt": { | 32 | "encrypt": { |
| 33 | "userPassword": "user password", | 33 | "userPassword": "user password", |
| 34 | "ownerPassword": "owner password", | 34 | "ownerPassword": "owner password", |
manual/cli.rst
| @@ -3194,7 +3194,16 @@ Related Options | @@ -3194,7 +3194,16 @@ Related Options | ||
| 3194 | :qpdf:ref:`--json-help` option to get a description of the JSON | 3194 | :qpdf:ref:`--json-help` option to get a description of the JSON |
| 3195 | object. | 3195 | object. |
| 3196 | 3196 | ||
| 3197 | -.. qpdf:option:: --json-help | 3197 | + Starting with qpdf 11, when this option is specified, an output |
| 3198 | + file is optional (for backward compatibility) and defaults to | ||
| 3199 | + standard output. You may specify an output file to write the JSON | ||
| 3200 | + to a file rather than standard output. | ||
| 3201 | + | ||
| 3202 | + Stream data is only included if :qpdf:ref:`--json-output` is | ||
| 3203 | + specified or if a value other than ``none`` is passed to | ||
| 3204 | + :qpdf:ref:`--json-stream-data`. | ||
| 3205 | + | ||
| 3206 | +.. qpdf:option:: --json-help[=version] | ||
| 3198 | 3207 | ||
| 3199 | .. help: show format of JSON output | 3208 | .. help: show format of JSON output |
| 3200 | 3209 | ||
| @@ -3202,12 +3211,13 @@ Related Options | @@ -3202,12 +3211,13 @@ Related Options | ||
| 3202 | output a JSON object with the same keys and with values | 3211 | output a JSON object with the same keys and with values |
| 3203 | containing descriptive text. | 3212 | containing descriptive text. |
| 3204 | 3213 | ||
| 3205 | - Describe the format of the JSON output by writing to standard | ||
| 3206 | - output a JSON object with the same structure as the JSON generated | ||
| 3207 | - by qpdf. In the output written by ``--json-help``, each key's value | ||
| 3208 | - is a description of the key. The specific contract guaranteed by | ||
| 3209 | - qpdf in its JSON representation is explained in more detail in the | ||
| 3210 | - :ref:`json`. | 3214 | + Describe the format of the corresponding version of JSON output by |
| 3215 | + writing to standard output a JSON object with the same structure as | ||
| 3216 | + the JSON generated by qpdf. In the output written by | ||
| 3217 | + ``--json-help``, each key's value is a description of the key. The | ||
| 3218 | + specific contract guaranteed by qpdf in its JSON representation is | ||
| 3219 | + explained in more detail in the :ref:`json`. The default version of | ||
| 3220 | + help is version ``2``, as with the :qpdf:ref:`--json` flag. | ||
| 3211 | 3221 | ||
| 3212 | .. qpdf:option:: --json-key=key | 3222 | .. qpdf:option:: --json-key=key |
| 3213 | 3223 | ||
| @@ -3233,11 +3243,9 @@ Related Options | @@ -3233,11 +3243,9 @@ Related Options | ||
| 3233 | objects will be shown. | 3243 | objects will be shown. |
| 3234 | 3244 | ||
| 3235 | This option is repeatable. If given, only specified objects will be | 3245 | This option is repeatable. If given, only specified objects will be |
| 3236 | - shown in the ``"objects"`` key of the JSON output. Otherwise, all | ||
| 3237 | - objects will be shown. For qpdf JSON version 1, this also affects | ||
| 3238 | - the ``"objectinfo"`` key, which is not present in version 2. This | ||
| 3239 | - option may be used with :qpdf:ref:`--json` and also with | ||
| 3240 | - :qpdf:ref:`--json-output`. | 3246 | + shown in the objects dictionary in the JSON output. Otherwise, all |
| 3247 | + objects will be shown. See :ref:`json` for details about the qpdf | ||
| 3248 | + JSON format. | ||
| 3241 | 3249 | ||
| 3242 | .. qpdf:option:: --json-stream-data={none|inline|file} | 3250 | .. qpdf:option:: --json-stream-data={none|inline|file} |
| 3243 | 3251 | ||
| @@ -3281,28 +3289,30 @@ Related Options | @@ -3281,28 +3289,30 @@ Related Options | ||
| 3281 | 3289 | ||
| 3282 | .. qpdf:option:: --json-output[=version] | 3290 | .. qpdf:option:: --json-output[=version] |
| 3283 | 3291 | ||
| 3284 | - .. help: serialize to JSON | 3292 | + .. help: apply defaults for JSON serialization |
| 3285 | 3293 | ||
| 3286 | - The output file will be qpdf JSON format at the given version. | ||
| 3287 | - "version" may be a specific version or "latest" (the default). | ||
| 3288 | - The only supported version is 2. See also --json-stream-data, | ||
| 3289 | - --json-stream-prefix, and --decode-level. | 3294 | + Implies --json=version. Changes default values for certain |
| 3295 | + options so that the JSON output written is the most faithful | ||
| 3296 | + representation of the original PDF and contains no additional | ||
| 3297 | + JSON keys. See also --json-stream-data, --json-stream-prefix, | ||
| 3298 | + and --decode-level. | ||
| 3290 | 3299 | ||
| 3291 | - The output file, instead of being a PDF file, will be a JSON file | ||
| 3292 | - in qpdf JSON format at the given version. ``version`` may be a | ||
| 3293 | - specific version or ``latest`` (the default). The only supported | ||
| 3294 | - version is 2. See also :qpdf:ref:`--json-stream-data` and | ||
| 3295 | - :qpdf:ref:`--json-stream-prefix`. This option also changes the | ||
| 3296 | - following defaults: | 3300 | + Implies :qpdf:ref:`--json` at the specified version. This option |
| 3301 | + changes several default values, all of which can be overridden by | ||
| 3302 | + specifying the stated option: | ||
| 3297 | 3303 | ||
| 3298 | - The default value for :qpdf:ref:`--json-stream-data` changes from | 3304 | - The default value for :qpdf:ref:`--json-stream-data` changes from |
| 3299 | ``none`` to ``inline``. | 3305 | ``none`` to ``inline``. |
| 3300 | 3306 | ||
| 3301 | - - The default decode level for stream data becomes ``none``, but you can | ||
| 3302 | - override it with :qpdf:ref:`--decode-level`. | 3307 | + - The default value for :qpdf:ref:`--decode-level` changes from |
| 3308 | + ``generalized`` to ``none``. | ||
| 3309 | + | ||
| 3310 | + - By default, only the ``"qpdf"`` key is included in the JSON | ||
| 3311 | + output, but you can add additional keys with | ||
| 3312 | + :qpdf:ref:`--json-key`. | ||
| 3303 | 3313 | ||
| 3304 | - - Only the ``"qpdf"`` key is included in the JSON output, but you | ||
| 3305 | - can add additional keys with :qpdf:ref:`--json-key`. | 3314 | + - Excludes the ``"version"`` and ``"parameters"`` keys from the |
| 3315 | + JSON output. | ||
| 3306 | 3316 | ||
| 3307 | If you want to look at the contents of streams easily as you would | 3317 | If you want to look at the contents of streams easily as you would |
| 3308 | in QDF mode (see :ref:`qdf`), you can use | 3318 | in QDF mode (see :ref:`qdf`), you can use |
| @@ -3313,15 +3323,15 @@ Related Options | @@ -3313,15 +3323,15 @@ Related Options | ||
| 3313 | 3323 | ||
| 3314 | .. help: input file is qpdf JSON | 3324 | .. help: input file is qpdf JSON |
| 3315 | 3325 | ||
| 3316 | - Treat the input file as a JSON file in qpdf JSON format as | ||
| 3317 | - written by qpdf --json-output. See the "qpdf JSON Format" | ||
| 3318 | - section of the manual for information about how to use this | ||
| 3319 | - option. | 3326 | + Treat the input file as a JSON file in qpdf JSON format. See the |
| 3327 | + "qpdf JSON Format" section of the manual for information about | ||
| 3328 | + how to use this option. | ||
| 3320 | 3329 | ||
| 3321 | - Treat the input file as a JSON file in qpdf JSON format as written | ||
| 3322 | - by ``qpdf --json-output``. The input file must be complete and | ||
| 3323 | - include all stream data. For information about converting between | ||
| 3324 | - PDF and JSON, please see :ref:`json`. | 3330 | + Treat the input file as a JSON file in qpdf JSON format. The input |
| 3331 | + file must be complete and include all stream data. The JSON version | ||
| 3332 | + must be at least 2. All top-level keys are ignored except for | ||
| 3333 | + ``"qpdf"``. For information about converting between PDF and JSON, | ||
| 3334 | + please see :ref:`json`. | ||
| 3325 | 3335 | ||
| 3326 | .. qpdf:option:: --update-from-json=qpdf-json-file | 3336 | .. qpdf:option:: --update-from-json=qpdf-json-file |
| 3327 | 3337 |
manual/json.rst
| @@ -24,27 +24,28 @@ represents the contents of a PDF file. This is distinct from the | @@ -24,27 +24,28 @@ represents the contents of a PDF file. This is distinct from the | ||
| 24 | interacting with qpdf the way the command-line tool does. For | 24 | interacting with qpdf the way the command-line tool does. For |
| 25 | information about that, see :ref:`qpdf-job`. | 25 | information about that, see :ref:`qpdf-job`. |
| 26 | 26 | ||
| 27 | -The qpdf JSON format is specific to qpdf. There are two ways to use | ||
| 28 | -qpdf JSON: | ||
| 29 | - | ||
| 30 | -- The :qpdf:ref:`--json` command-line flag causes creation of a JSON | ||
| 31 | - representation of all the objects in a PDF file, excluding stream | ||
| 32 | - data. This includes an unambiguous representation of the PDF object | ||
| 33 | - structure and also provides JSON-formatted summaries of other | ||
| 34 | - information about the file. This functionality is built into | ||
| 35 | - ``QPDFJob`` and can be accessed from the ``qpdf`` command-line tool | ||
| 36 | - or from the ``QPDFJob`` C or C++ API. | ||
| 37 | - | ||
| 38 | -- qpdf can create a JSON file that completely represents a PDF file. | ||
| 39 | - You can think of this as using JSON as an *alternative syntax* for | ||
| 40 | - representing a PDF file. Using qpdf JSON, it is possible to | ||
| 41 | - convert a PDF file to JSON, manipulate the structure or contents of | ||
| 42 | - the objects at a low level, and convert the results back to a PDF | ||
| 43 | - file. This functionality can be accessed from the command-line with | ||
| 44 | - the :qpdf:ref:`--json-output`, :qpdf:ref:`--json-input`, and | ||
| 45 | - :qpdf:ref:`--update-from-json` flags, or from the API using the | ||
| 46 | - ``QPDF::writeJSON``, ``QPDF::createFromJSON``, and | ||
| 47 | - ``QPDF::updateFromJSON`` methods. | 27 | +The qpdf JSON format is specific to qpdf. With JSON version 2, the |
| 28 | +:qpdf:ref:`--json` command-line flag causes creation of a JSON | ||
| 29 | +representation of all the objects in a PDF file. This includes an | ||
| 30 | +unambiguous representation of the PDF object structure and also | ||
| 31 | +provides JSON-formatted summaries of other information about the file. | ||
| 32 | +This functionality is built into ``QPDFJob`` and can be accessed from | ||
| 33 | +the ``qpdf`` command-line tool or from the ``QPDFJob`` C or C++ API. | ||
| 34 | + | ||
| 35 | +By default, stream data is omitted, but it can be included by | ||
| 36 | +specifying the :qpdf:ref:`--json-stream-data` option. With stream data | ||
| 37 | +included, the generated JSON file completely represents a PDF file. | ||
| 38 | +You can think of this as using JSON as an *alternative syntax* for | ||
| 39 | +representing a PDF file. Using qpdf JSON, it is possible to convert a | ||
| 40 | +PDF file to JSON, manipulate the structure or contents of the objects | ||
| 41 | +at a low level, and convert the results back to a PDF file. This | ||
| 42 | +functionality can be accessed from the command-line with the | ||
| 43 | +:qpdf:ref:`--json-input`, and :qpdf:ref:`--update-from-json` flags, or | ||
| 44 | +from the API using the ``QPDF::writeJSON``, ``QPDF::createFromJSON``, | ||
| 45 | +and ``QPDF::updateFromJSON`` methods. The :qpdf:ref:`--json-output` | ||
| 46 | +flag changes a handful of defaults so that the resulting JSON is as | ||
| 47 | +close as possible to the original input and is ready for being | ||
| 48 | +converted back to PDF. | ||
| 48 | 49 | ||
| 49 | .. _json-terminology: | 50 | .. _json-terminology: |
| 50 | 51 | ||
| @@ -120,18 +121,53 @@ qpdf JSON Object Representation | @@ -120,18 +121,53 @@ qpdf JSON Object Representation | ||
| 120 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 121 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
| 121 | 122 | ||
| 122 | This section describes the representation of PDF objects in qpdf JSON | 123 | This section describes the representation of PDF objects in qpdf JSON |
| 123 | -version 2. PDF objects are represented within the ``"objects"`` | ||
| 124 | -dictionary of a qpdf JSON file. This is true both for PDF serialized | ||
| 125 | -to JSON (:qpdf:ref:`--json-output`, ``QPDF::writeJSON``) or objects as | ||
| 126 | -they appear in the output of ``qpdf`` with the :qpdf:ref:`--json` | ||
| 127 | -option. | ||
| 128 | - | ||
| 129 | -Each key in the ``"objects"`` dictionary is either ``"trailer"`` or a | ||
| 130 | -string of the form ``"obj:O G R"`` where ``O`` and ``G`` are the | ||
| 131 | -object and generation numbers and ``R`` is the literal string ``R``. | ||
| 132 | -This is the PDF syntax for the indirect object reference prepended by | ||
| 133 | -``obj:``. The value, representing the object itself, is a JSON object | ||
| 134 | -whose structure is described below. | 124 | +version 2. PDF objects are represented within the ``"qpdf"`` entry of |
| 125 | +a qpdf JSON file. The ``"qpdf"`` entry is a two-element array. The | ||
| 126 | +first element is a dictionary containing header-like information about | ||
| 127 | +the file such as the PDF version. The second element is a dictionary | ||
| 128 | +containing all the objects in the PDF file. We refer to this as the | ||
| 129 | +*objects dictionary*. | ||
| 130 | + | ||
| 131 | +The first element contains the following keys: | ||
| 132 | + | ||
| 133 | +- ``"jsonversion"`` -- a number indicating the JSON version used for | ||
| 134 | + writing. This will always be ``2``. | ||
| 135 | + | ||
| 136 | +- ``"pdfversion"`` -- a string containing PDF version as indicated in | ||
| 137 | + the PDF header (e.g. ``"1.7"``, ``"2.0"``) | ||
| 138 | + | ||
| 139 | +- ``pushedinheritedpageresources`` -- a boolean indicating whether | ||
| 140 | + the library pushed inherited resources down to the page level. | ||
| 141 | + Certain library calls cause this to happen, and qpdf needs to know | ||
| 142 | + when reading a JSON file back in whether it should do this as it may | ||
| 143 | + cause certain objects to be renumbered. | ||
| 144 | + | ||
| 145 | +- ``calledgetallpages`` -- a boolean indicating whether | ||
| 146 | + ``getAllPages`` was called prior to writing the JSON output. This | ||
| 147 | + method causes page tree repair to occur, which may renumber some | ||
| 148 | + objects (in very rare cases of corrupted page trees), so qpdf needs | ||
| 149 | + to know this information when reading a JSON file back in. | ||
| 150 | + | ||
| 151 | +- ``"maxobjectid"`` -- a number indicating the object ID of the | ||
| 152 | + highest numbered object in the file. This is provided to make it | ||
| 153 | + easier for software that wants to add new objects to the file as you | ||
| 154 | + can safely start with one above that number when creating new | ||
| 155 | + objects. Note that the value of ``"maxobjectid"`` may be higher than | ||
| 156 | + the actual maximum object that appears in the input PDF since it | ||
| 157 | + takes into consideration any dangling indirect object references | ||
| 158 | + from the original file. This prevents you from unwittingly creating | ||
| 159 | + an object that doesn't exist but that is referenced, which may have | ||
| 160 | + unintended side effects. (The PDF specification explicitly allows | ||
| 161 | + dangling references and says to treat them as nulls. This can happen | ||
| 162 | + if objects are removed from a PDF file.) | ||
| 163 | + | ||
| 164 | +The second element is the objects dictionary. Each key in the objects | ||
| 165 | +dictionary is either ``"trailer"`` or a string of the form ``"obj:O G | ||
| 166 | +R"`` where ``O`` and ``G`` are the object and generation numbers and | ||
| 167 | +``R`` is the literal string ``R``. This is the PDF syntax for the | ||
| 168 | +indirect object reference prepended by ``obj:``. The value, | ||
| 169 | +representing the object itself, is a JSON object whose structure is | ||
| 170 | +described below. | ||
| 135 | 171 | ||
| 136 | Top-level Stream Objects | 172 | Top-level Stream Objects |
| 137 | Stream objects are represented as a JSON object with the single key | 173 | Stream objects are represented as a JSON object with the single key |
| @@ -143,6 +179,7 @@ Top-level Stream Objects | @@ -143,6 +179,7 @@ Top-level Stream Objects | ||
| 143 | 179 | ||
| 144 | - ``none``: stream data is not represented; no other keys are | 180 | - ``none``: stream data is not represented; no other keys are |
| 145 | present | 181 | present |
| 182 | + specified. | ||
| 146 | 183 | ||
| 147 | - ``inline``: the stream data appears as a base64-encoded string as | 184 | - ``inline``: the stream data appears as a base64-encoded string as |
| 148 | the value of the ``"data"`` key | 185 | the value of the ``"data"`` key |
| @@ -249,57 +286,6 @@ Object Values | @@ -249,57 +286,6 @@ Object Values | ||
| 249 | the string representations of names and whose values are | 286 | the string representations of names and whose values are |
| 250 | representations of PDF objects. | 287 | representations of PDF objects. |
| 251 | 288 | ||
| 252 | -.. _json.output: | ||
| 253 | - | ||
| 254 | -qpdf JSON Output | ||
| 255 | -~~~~~~~~~~~~~~~~ | ||
| 256 | - | ||
| 257 | -The format of the JSON written by qpdf's :qpdf:ref:`--json-output` | ||
| 258 | -flag or the ``QPDF::writeJSON`` API call is a JSON object consisting | ||
| 259 | -of a single key: ``"qpdf"``. This may be the only key, or it may be | ||
| 260 | -embedded in the output of ``qpdf --json``. Unknown keys are ignored | ||
| 261 | -for future compatibility. It is guaranteed that qpdf will never add | ||
| 262 | -any keys whose names start with ``xdata``, so users are free to add | ||
| 263 | -their own metadata using keys whose names start with ``xdata`` without | ||
| 264 | -fear of clashing with a future version of qpdf. | ||
| 265 | - | ||
| 266 | -The ``"qpdf"`` key points to a two-element JSON array. The first element is | ||
| 267 | -a JSON object with the following keys: | ||
| 268 | - | ||
| 269 | -- ``"jsonversion"`` -- a number indicating the JSON version used for | ||
| 270 | - writing. This will always be ``2``. | ||
| 271 | - | ||
| 272 | -- ``"pdfversion"`` -- a string containing PDF version as indicated in | ||
| 273 | - the PDF header (e.g. ``"1.7"``, ``"2.0"``) | ||
| 274 | - | ||
| 275 | -- ``pushedinheritedpageresources`` -- a boolean indicating whether | ||
| 276 | - the library pushed inherited resources down to the page level. | ||
| 277 | - Certain library calls cause this to happen, and qpdf needs to know | ||
| 278 | - when reading a JSON file back in whether it should do this as it may | ||
| 279 | - cause certain objects to be renumbered. | ||
| 280 | - | ||
| 281 | -- ``calledgetallpages`` -- a boolean indicating whether | ||
| 282 | - ``getAllPages`` was called prior to writing the JSON output. This | ||
| 283 | - method causes page tree repair to occur, which may renumber some | ||
| 284 | - objects (in very rare cases of corrupted page trees), so qpdf needs | ||
| 285 | - to know this information when reading a JSON file back in. | ||
| 286 | - | ||
| 287 | -- ``"maxobjectid"`` -- a number indicating the object ID of the | ||
| 288 | - highest numbered object in the file. This is provided to make it | ||
| 289 | - easier for software that wants to add new objects to the file as you | ||
| 290 | - can safely start with one above that number when creating new | ||
| 291 | - objects. Note that the value of ``"maxobjectid"`` may be higher than | ||
| 292 | - the actual maximum object that appears in the input PDF since it | ||
| 293 | - takes into consideration any dangling indirect object references | ||
| 294 | - from the original file. This prevents you from unwittingly creating | ||
| 295 | - an object that doesn't exist but that is referenced, which may have | ||
| 296 | - unintended side effects. (The PDF specification explicitly allows | ||
| 297 | - dangling references and says to treat them as nulls. This can happen | ||
| 298 | - if objects are removed from a PDF file.) | ||
| 299 | - | ||
| 300 | -The second element is a JSON object containing the actual PDF objects | ||
| 301 | -as described in :ref:`json.objects`. | ||
| 302 | - | ||
| 303 | Note that writing JSON output is done by ``QPDF``, not ``QPDFWriter``. | 289 | Note that writing JSON output is done by ``QPDF``, not ``QPDFWriter``. |
| 304 | As such, none of the things ``QPDFWriter`` does apply. This includes | 290 | As such, none of the things ``QPDFWriter`` does apply. This includes |
| 305 | recompression of streams, renumbering of objects, anything to do with | 291 | recompression of streams, renumbering of objects, anything to do with |
| @@ -325,7 +311,7 @@ qpdf JSON format. | @@ -325,7 +311,7 @@ qpdf JSON format. | ||
| 325 | "pdfversion": "1.3", | 311 | "pdfversion": "1.3", |
| 326 | "pushedinheritedpageresources": false, | 312 | "pushedinheritedpageresources": false, |
| 327 | "calledgetallpages": false, | 313 | "calledgetallpages": false, |
| 328 | - "maxobjectid": 5, | 314 | + "maxobjectid": 5 |
| 329 | }, | 315 | }, |
| 330 | { | 316 | { |
| 331 | "obj:1 0 R": { | 317 | "obj:1 0 R": { |
| @@ -389,8 +375,7 @@ qpdf JSON format. | @@ -389,8 +375,7 @@ qpdf JSON format. | ||
| 389 | qpdf JSON Input | 375 | qpdf JSON Input |
| 390 | ~~~~~~~~~~~~~~~ | 376 | ~~~~~~~~~~~~~~~ |
| 391 | 377 | ||
| 392 | -Output in the JSON output format described in :ref:`json.output` can | ||
| 393 | -be used in two different ways: | 378 | +The qpdf JSON output can be used in two different ways: |
| 394 | 379 | ||
| 395 | - By using the :qpdf:ref:`--json-input` flag or calling | 380 | - By using the :qpdf:ref:`--json-input` flag or calling |
| 396 | ``QPDF::createFromJSON`` in place of ``QPDF::processFile``, a qpdf | 381 | ``QPDF::createFromJSON`` in place of ``QPDF::processFile``, a qpdf |
| @@ -408,8 +393,11 @@ Here are some important things to know about qpdf JSON input. | @@ -408,8 +393,11 @@ Here are some important things to know about qpdf JSON input. | ||
| 408 | - When a qpdf JSON file is used as the primary input file, it must be | 393 | - When a qpdf JSON file is used as the primary input file, it must be |
| 409 | complete. This means | 394 | complete. This means |
| 410 | 395 | ||
| 396 | + - A JSON version number must be specified with the ``"jsonversion"`` | ||
| 397 | + key in the first array element | ||
| 398 | + | ||
| 411 | - A PDF version number must be specified with the ``"pdfversion"`` | 399 | - A PDF version number must be specified with the ``"pdfversion"`` |
| 412 | - key | 400 | + key in the first array element |
| 413 | 401 | ||
| 414 | - Stream data must be present for all streams | 402 | - Stream data must be present for all streams |
| 415 | 403 | ||
| @@ -422,6 +410,9 @@ Here are some important things to know about qpdf JSON input. | @@ -422,6 +410,9 @@ Here are some important things to know about qpdf JSON input. | ||
| 422 | - ``"maxobjectid"`` is ignored, so it is not necessary to update it | 410 | - ``"maxobjectid"`` is ignored, so it is not necessary to update it |
| 423 | when adding new objects. | 411 | when adding new objects. |
| 424 | 412 | ||
| 413 | + - ``"calledgetallpages"`` and ``"pushedinheritedpageresources"`` are | ||
| 414 | + treated as false if omitted. | ||
| 415 | + | ||
| 425 | - ``"/Length"`` is ignored in all stream dictionaries. qpdf doesn't | 416 | - ``"/Length"`` is ignored in all stream dictionaries. qpdf doesn't |
| 426 | put it there when it creates JSON output, and it is not necessary | 417 | put it there when it creates JSON output, and it is not necessary |
| 427 | to add it. | 418 | to add it. |
| @@ -432,14 +423,13 @@ Here are some important things to know about qpdf JSON input. | @@ -432,14 +423,13 @@ Here are some important things to know about qpdf JSON input. | ||
| 432 | - Unknown keys at the to top level of the file, within ``objects``, | 423 | - Unknown keys at the to top level of the file, within ``objects``, |
| 433 | at the top level of each individual object (inside the object that | 424 | at the top level of each individual object (inside the object that |
| 434 | has the ``"value"`` or ``"stream"`` key) and directly within | 425 | has the ``"value"`` or ``"stream"`` key) and directly within |
| 435 | - ``"stream"`` are ignored for future compatibility. You should | ||
| 436 | - avoid putting your own values in those places if you wish to avoid | ||
| 437 | - risking that your JSON files will not work in future versions of | ||
| 438 | - qpdf. The exception to this advice is at the top level of the | ||
| 439 | - overall file where it is explicitly supported for you to add your | ||
| 440 | - own keys. For example, you could add your own metadata at the top | ||
| 441 | - level, and qpdf will ignore it. Note that extra top-level keys are | ||
| 442 | - not preserved when qpdf reads your JSON file. | 426 | + ``"stream"`` are ignored for future compatibility. This includes |
| 427 | + other top-level keys generated by ``qpdf`` itself (such as | ||
| 428 | + ``"pages"``). As such, those keys don't have to be consistent with | ||
| 429 | + the ``"qpdf"`` key if modifying a JSON file for conversion back to | ||
| 430 | + PDF. If you wish to store application-specific metadata, you can | ||
| 431 | + do so by adding a key whose name starts with ``x-``. qpdf is | ||
| 432 | + guaranteed not to add any of its own keys that starts with ``x-``. | ||
| 443 | 433 | ||
| 444 | - When qpdf reads a PDF file, the internal object numbers are always | 434 | - When qpdf reads a PDF file, the internal object numbers are always |
| 445 | preserved. However, when qpdf writes a file using ``QPDFWriter``, | 435 | preserved. However, when qpdf writes a file using ``QPDFWriter``, |
| @@ -458,9 +448,9 @@ Here are some important things to know about qpdf JSON input. | @@ -458,9 +448,9 @@ Here are some important things to know about qpdf JSON input. | ||
| 458 | # edit pdf.json | 448 | # edit pdf.json |
| 459 | qpdf in.pdf out.pdf --update-from-json=pdf.json | 449 | qpdf in.pdf out.pdf --update-from-json=pdf.json |
| 460 | 450 | ||
| 461 | - The following will not produce predictable results because | ||
| 462 | - ``out.pdf`` won't have the same object numbers as ``pdf.json`` and | ||
| 463 | - ``in.pdf``. | 451 | + The following will produce unpredictable and probably incorrect |
| 452 | + results because ``out.pdf`` won't have the same object numbers as | ||
| 453 | + ``pdf.json`` and ``in.pdf``. | ||
| 464 | 454 | ||
| 465 | :: | 455 | :: |
| 466 | 456 | ||
| @@ -658,15 +648,16 @@ be aware of: | @@ -658,15 +648,16 @@ be aware of: | ||
| 658 | - If a PDF file has certain types of errors in its pages tree (such as | 648 | - If a PDF file has certain types of errors in its pages tree (such as |
| 659 | page objects that are direct or multiple pages sharing the same | 649 | page objects that are direct or multiple pages sharing the same |
| 660 | object ID), qpdf will automatically repair the pages tree. If you | 650 | object ID), qpdf will automatically repair the pages tree. If you |
| 661 | - specify ``"objects"`` (and, with qpdf JSON version 1, also | 651 | + specify ``"qpdf"`` (or, with qpdf JSON version 1, ``"objects"`` or |
| 662 | ``"objectinfo"``) without any other keys, you will see the original | 652 | ``"objectinfo"``) without any other keys, you will see the original |
| 663 | pages tree without any corrections. If you specify any of keys that | 653 | pages tree without any corrections. If you specify any of keys that |
| 664 | require page tree traversal (for example, ``"pages"``, | 654 | require page tree traversal (for example, ``"pages"``, |
| 665 | - ``"outlines"``, or ``"pagelabel"``), then ``"objects"`` (and | ||
| 666 | - ``"objectinfo"``) will show the repaired page tree so that object | ||
| 667 | - references will be consistent throughout the file. This is not an | ||
| 668 | - issue with :qpdf:ref:`--json-output`, which doesn't repair the pages | ||
| 669 | - tree. | 655 | + ``"outlines"``, or ``"pagelabel"``), then ``"qpdf"`` (and |
| 656 | + ``"objects"`` and ``"objectinfo"``) will show the repaired page | ||
| 657 | + tree so that object references will be consistent throughout the | ||
| 658 | + file. You can tell if this has happened by looking at the | ||
| 659 | + ``"calledgetallpages"`` and ``"pushedinheritedpageresources"`` | ||
| 660 | + fields in the first element of the ``"qpdf"`` array. | ||
| 670 | 661 | ||
| 671 | - While qpdf guarantees that keys present in the help will be present | 662 | - While qpdf guarantees that keys present in the help will be present |
| 672 | in the output, those fields may be null or empty if the information | 663 | in the output, those fields may be null or empty if the information |
| @@ -743,16 +734,17 @@ version 2. | @@ -743,16 +734,17 @@ version 2. | ||
| 743 | dictionary containing either a ``"value"`` key or a ``"stream"`` | 734 | dictionary containing either a ``"value"`` key or a ``"stream"`` |
| 744 | key, making it possible to distinguish streams from other objects. | 735 | key, making it possible to distinguish streams from other objects. |
| 745 | 736 | ||
| 746 | -- The ``"objectinfo"`` key has been removed in favor of a | ||
| 747 | - representation in ``"objects"`` that differentiates between a stream | ||
| 748 | - and other kinds of objects. In v1, it was not possible to tell a | ||
| 749 | - stream from a dictionary within ``"objects"``. | ||
| 750 | - | ||
| 751 | -- Within the ``"objects"`` dictionary, keys are now ``"obj:O G R"`` | ||
| 752 | - where ``O`` and ``G`` are the object and generation number. | ||
| 753 | - ``"trailer"`` remains the key for the trailer dictionary. In v1, the | ||
| 754 | - ``obj:`` prefix was not present. The rationale for this change is as | ||
| 755 | - follows: | 737 | +- The ``"objectinfo"`` and ``"objects"`` keys have been removed in |
| 738 | + favor of a representation in ``"qpdf"`` that includes header | ||
| 739 | + information and differentiates between a stream and other kinds of | ||
| 740 | + objects. In v1, it was not possible to tell a stream from a | ||
| 741 | + dictionary within ``"objects"``, and the PDF version was not | ||
| 742 | + captured at all. | ||
| 743 | + | ||
| 744 | +- Within the objects dictionary, keys are now ``"obj:O G R"`` where | ||
| 745 | + ``O`` and ``G`` are the object and generation number. ``"trailer"`` | ||
| 746 | + remains the key for the trailer dictionary. In v1, the ``obj:`` | ||
| 747 | + prefix was not present. The rationale for this change is as follows: | ||
| 756 | 748 | ||
| 757 | - Having a unique prefix (``obj:``) makes it much easier to search | 749 | - Having a unique prefix (``obj:``) makes it much easier to search |
| 758 | in the JSON file for the definition of an object | 750 | in the JSON file for the definition of an object |