Commit 6580ffe983aa9c2885555b6b4d7cf68fd3f16301
1 parent
fa366435
Preliminary implementation of json mode
The json mode implemented in this commit is not the final version, or are the command line arguments used to invoke it.
Showing
1 changed file
with
239 additions
and
0 deletions
qpdf/qpdf.cc
| @@ -17,6 +17,7 @@ | @@ -17,6 +17,7 @@ | ||
| 17 | #include <qpdf/QPDFPageDocumentHelper.hh> | 17 | #include <qpdf/QPDFPageDocumentHelper.hh> |
| 18 | #include <qpdf/QPDFPageObjectHelper.hh> | 18 | #include <qpdf/QPDFPageObjectHelper.hh> |
| 19 | #include <qpdf/QPDFPageLabelDocumentHelper.hh> | 19 | #include <qpdf/QPDFPageLabelDocumentHelper.hh> |
| 20 | +#include <qpdf/QPDFOutlineDocumentHelper.hh> | ||
| 20 | #include <qpdf/QPDFExc.hh> | 21 | #include <qpdf/QPDFExc.hh> |
| 21 | 22 | ||
| 22 | #include <qpdf/QPDFWriter.hh> | 23 | #include <qpdf/QPDFWriter.hh> |
| @@ -117,6 +118,7 @@ struct Options | @@ -117,6 +118,7 @@ struct Options | ||
| 117 | show_filtered_stream_data(false), | 118 | show_filtered_stream_data(false), |
| 118 | show_pages(false), | 119 | show_pages(false), |
| 119 | show_page_images(false), | 120 | show_page_images(false), |
| 121 | + show_json(false), | ||
| 120 | check(false), | 122 | check(false), |
| 121 | require_outfile(true), | 123 | require_outfile(true), |
| 122 | infilename(0), | 124 | infilename(0), |
| @@ -189,6 +191,7 @@ struct Options | @@ -189,6 +191,7 @@ struct Options | ||
| 189 | bool show_filtered_stream_data; | 191 | bool show_filtered_stream_data; |
| 190 | bool show_pages; | 192 | bool show_pages; |
| 191 | bool show_page_images; | 193 | bool show_page_images; |
| 194 | + bool show_json; | ||
| 192 | bool check; | 195 | bool check; |
| 193 | std::vector<PageSpec> page_specs; | 196 | std::vector<PageSpec> page_specs; |
| 194 | std::map<std::string, RotationSpec> rotations; | 197 | std::map<std::string, RotationSpec> rotations; |
| @@ -549,6 +552,75 @@ void usage(std::string const& msg) | @@ -549,6 +552,75 @@ void usage(std::string const& msg) | ||
| 549 | exit(EXIT_ERROR); | 552 | exit(EXIT_ERROR); |
| 550 | } | 553 | } |
| 551 | 554 | ||
| 555 | +static JSON json_schema() | ||
| 556 | +{ | ||
| 557 | + // This JSON object doubles as a schema and as documentation for | ||
| 558 | + // our JSON output. Any schema mismatch is a bug in qpdf. This | ||
| 559 | + // helps to enforce our policy of consistently providing a known | ||
| 560 | + // structure where every documented key will always be present, | ||
| 561 | + // which makes it easier to consume our JSON. This is discussed in | ||
| 562 | + // more depth in the manual. | ||
| 563 | + JSON schema = JSON::makeDictionary(); | ||
| 564 | + schema.addDictionaryMember( | ||
| 565 | + "version", JSON::makeString( | ||
| 566 | + "JSON format serial number; increased for non-compatible changes")); | ||
| 567 | + schema.addDictionaryMember( | ||
| 568 | + "objects", JSON::makeString( | ||
| 569 | + "Original objects; keys are 'trailer' or 'n n R'")); | ||
| 570 | + JSON page = schema.addDictionaryMember("pages", JSON::makeArray()). | ||
| 571 | + addArrayElement(JSON::makeDictionary()); | ||
| 572 | + page.addDictionaryMember( | ||
| 573 | + "object", | ||
| 574 | + JSON::makeString("reference to original page object")); | ||
| 575 | + JSON image = page.addDictionaryMember("images", JSON::makeArray()). | ||
| 576 | + addArrayElement(JSON::makeDictionary()); | ||
| 577 | + image.addDictionaryMember( | ||
| 578 | + "object", | ||
| 579 | + JSON::makeString("reference to image stream")); | ||
| 580 | + image.addDictionaryMember( | ||
| 581 | + "width", | ||
| 582 | + JSON::makeString("image width")); | ||
| 583 | + image.addDictionaryMember( | ||
| 584 | + "height", | ||
| 585 | + JSON::makeString("image height")); | ||
| 586 | + image.addDictionaryMember("filter", JSON::makeArray()). | ||
| 587 | + addArrayElement( | ||
| 588 | + JSON::makeString("filters applied to image data")); | ||
| 589 | + image.addDictionaryMember("decodeparms", JSON::makeArray()). | ||
| 590 | + addArrayElement( | ||
| 591 | + JSON::makeString("decode parameters for image data")); | ||
| 592 | + image.addDictionaryMember( | ||
| 593 | + "filterable", | ||
| 594 | + JSON::makeString("whether image data can be decoded" | ||
| 595 | + " using the decode level qpdf was invoked with")); | ||
| 596 | + page.addDictionaryMember("contents", JSON::makeArray()). | ||
| 597 | + addArrayElement( | ||
| 598 | + JSON::makeString("reference to each content stream")); | ||
| 599 | + page.addDictionaryMember( | ||
| 600 | + "label", | ||
| 601 | + JSON::makeString("page label dictionary, or null if none")); | ||
| 602 | + JSON labels = schema.addDictionaryMember("pagelabels", JSON::makeArray()). | ||
| 603 | + addArrayElement(JSON::makeDictionary()); | ||
| 604 | + labels.addDictionaryMember( | ||
| 605 | + "index", | ||
| 606 | + JSON::makeString("starting page position starting from zero")); | ||
| 607 | + labels.addDictionaryMember( | ||
| 608 | + "label", | ||
| 609 | + JSON::makeString("page label dictionary")); | ||
| 610 | + JSON outline = page.addDictionaryMember("outlines", JSON::makeArray()). | ||
| 611 | + addArrayElement(JSON::makeDictionary()); | ||
| 612 | + outline.addDictionaryMember( | ||
| 613 | + "object", | ||
| 614 | + JSON::makeString("reference to outline that targets this page")); | ||
| 615 | + outline.addDictionaryMember( | ||
| 616 | + "title", | ||
| 617 | + JSON::makeString("outline title")); | ||
| 618 | + outline.addDictionaryMember( | ||
| 619 | + "destination", | ||
| 620 | + JSON::makeString("outline destination dictionary")); | ||
| 621 | + return schema; | ||
| 622 | +} | ||
| 623 | + | ||
| 552 | static std::string show_bool(bool v) | 624 | static std::string show_bool(bool v) |
| 553 | { | 625 | { |
| 554 | return v ? "allowed" : "not allowed"; | 626 | return v ? "allowed" : "not allowed"; |
| @@ -1613,6 +1685,11 @@ static void parse_options(int argc, char* argv[], Options& o) | @@ -1613,6 +1685,11 @@ static void parse_options(int argc, char* argv[], Options& o) | ||
| 1613 | { | 1685 | { |
| 1614 | o.show_page_images = true; | 1686 | o.show_page_images = true; |
| 1615 | } | 1687 | } |
| 1688 | + else if (strcmp(arg, "show-json") == 0) | ||
| 1689 | + { | ||
| 1690 | + o.show_json = true; | ||
| 1691 | + o.require_outfile = false; | ||
| 1692 | + } | ||
| 1616 | else if (strcmp(arg, "check") == 0) | 1693 | else if (strcmp(arg, "check") == 0) |
| 1617 | { | 1694 | { |
| 1618 | o.check = true; | 1695 | o.check = true; |
| @@ -1884,6 +1961,164 @@ static void do_show_pages(QPDF& pdf, Options& o) | @@ -1884,6 +1961,164 @@ static void do_show_pages(QPDF& pdf, Options& o) | ||
| 1884 | } | 1961 | } |
| 1885 | } | 1962 | } |
| 1886 | 1963 | ||
| 1964 | +static void do_show_json(QPDF& pdf, Options& o) | ||
| 1965 | +{ | ||
| 1966 | + JSON j = JSON::makeDictionary(); | ||
| 1967 | + // This version is updated every time a non-backward-compatible | ||
| 1968 | + // change is made to the JSON format. Clients of the JSON are to | ||
| 1969 | + // ignore unrecognized keys, so we only update the version of a | ||
| 1970 | + // key disappears or if its value changes meaning. | ||
| 1971 | + j.addDictionaryMember("version", JSON::makeInt(1)); | ||
| 1972 | + | ||
| 1973 | + // Objects | ||
| 1974 | + | ||
| 1975 | + // Add all objects. Do this first before other code below modifies | ||
| 1976 | + // things by doing stuff like calling | ||
| 1977 | + // pushInheritedAttributesToPage. | ||
| 1978 | + JSON j_objects = j.addDictionaryMember("objects", JSON::makeDictionary()); | ||
| 1979 | + j_objects.addDictionaryMember("trailer", pdf.getTrailer().getJSON(true)); | ||
| 1980 | + std::vector<QPDFObjectHandle> objects = pdf.getAllObjects(); | ||
| 1981 | + for (std::vector<QPDFObjectHandle>::iterator iter = objects.begin(); | ||
| 1982 | + iter != objects.end(); ++iter) | ||
| 1983 | + { | ||
| 1984 | + j_objects.addDictionaryMember( | ||
| 1985 | + (*iter).unparse(), (*iter).getJSON(true)); | ||
| 1986 | + } | ||
| 1987 | + | ||
| 1988 | + // Pages | ||
| 1989 | + | ||
| 1990 | + JSON j_pages = j.addDictionaryMember("pages", JSON::makeArray()); | ||
| 1991 | + QPDFPageDocumentHelper dh(pdf); | ||
| 1992 | + QPDFPageLabelDocumentHelper pldh(pdf); | ||
| 1993 | + QPDFOutlineDocumentHelper odh(pdf); | ||
| 1994 | + dh.pushInheritedAttributesToPage(); | ||
| 1995 | + std::vector<QPDFPageObjectHelper> pages = dh.getAllPages(); | ||
| 1996 | + size_t pageno = 0; | ||
| 1997 | + for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin(); | ||
| 1998 | + iter != pages.end(); ++iter, ++pageno) | ||
| 1999 | + { | ||
| 2000 | + JSON j_page = j_pages.addArrayElement(JSON::makeDictionary()); | ||
| 2001 | + QPDFPageObjectHelper& ph(*iter); | ||
| 2002 | + QPDFObjectHandle page = ph.getObjectHandle(); | ||
| 2003 | + j_page.addDictionaryMember("object", page.getJSON()); | ||
| 2004 | + JSON j_images = j_page.addDictionaryMember( | ||
| 2005 | + "images", JSON::makeArray()); | ||
| 2006 | + std::map<std::string, QPDFObjectHandle> images = | ||
| 2007 | + ph.getPageImages(); | ||
| 2008 | + for (std::map<std::string, QPDFObjectHandle>::iterator iter = | ||
| 2009 | + images.begin(); | ||
| 2010 | + iter != images.end(); ++iter) | ||
| 2011 | + { | ||
| 2012 | + JSON j_image = j_images.addArrayElement(JSON::makeDictionary()); | ||
| 2013 | + j_image.addDictionaryMember( | ||
| 2014 | + "name", JSON::makeString((*iter).first)); | ||
| 2015 | + QPDFObjectHandle image = (*iter).second; | ||
| 2016 | + QPDFObjectHandle dict = image.getDict(); | ||
| 2017 | + j_image.addDictionaryMember("object", image.getJSON()); | ||
| 2018 | + j_image.addDictionaryMember( | ||
| 2019 | + "width", dict.getKey("/Width").getJSON()); | ||
| 2020 | + j_image.addDictionaryMember( | ||
| 2021 | + "height", dict.getKey("/Height").getJSON()); | ||
| 2022 | + QPDFObjectHandle filters = dict.getKey("/Filter").wrapInArray(); | ||
| 2023 | + j_image.addDictionaryMember( | ||
| 2024 | + "filter", filters.getJSON()); | ||
| 2025 | + QPDFObjectHandle decode_parms = dict.getKey("/DecodeParms"); | ||
| 2026 | + QPDFObjectHandle dp_array; | ||
| 2027 | + if (decode_parms.isArray()) | ||
| 2028 | + { | ||
| 2029 | + dp_array = decode_parms; | ||
| 2030 | + } | ||
| 2031 | + else | ||
| 2032 | + { | ||
| 2033 | + dp_array = QPDFObjectHandle::newArray(); | ||
| 2034 | + for (int i = 0; i < filters.getArrayNItems(); ++i) | ||
| 2035 | + { | ||
| 2036 | + dp_array.appendItem(decode_parms); | ||
| 2037 | + } | ||
| 2038 | + } | ||
| 2039 | + j_image.addDictionaryMember("decodeparms", dp_array.getJSON()); | ||
| 2040 | + j_image.addDictionaryMember( | ||
| 2041 | + "filterable", | ||
| 2042 | + JSON::makeBool( | ||
| 2043 | + image.pipeStreamData(0, 0, o.decode_level, true))); | ||
| 2044 | + } | ||
| 2045 | + j_page.addDictionaryMember("images", j_images); | ||
| 2046 | + JSON j_contents = j_page.addDictionaryMember( | ||
| 2047 | + "contents", JSON::makeArray()); | ||
| 2048 | + std::vector<QPDFObjectHandle> content = ph.getPageContents(); | ||
| 2049 | + for (std::vector<QPDFObjectHandle>::iterator iter = content.begin(); | ||
| 2050 | + iter != content.end(); ++iter) | ||
| 2051 | + { | ||
| 2052 | + j_contents.addArrayElement((*iter).getJSON()); | ||
| 2053 | + } | ||
| 2054 | + j_page.addDictionaryMember( | ||
| 2055 | + "label", pldh.getLabelForPage(pageno).getJSON()); | ||
| 2056 | + JSON j_outlines = j_page.addDictionaryMember( | ||
| 2057 | + "outlines", JSON::makeArray()); | ||
| 2058 | + std::list<QPDFOutlineObjectHelper> outlines = | ||
| 2059 | + odh.getOutlinesForPage(page.getObjGen()); | ||
| 2060 | + for (std::list<QPDFOutlineObjectHelper>::iterator oiter = | ||
| 2061 | + outlines.begin(); | ||
| 2062 | + oiter != outlines.end(); ++oiter) | ||
| 2063 | + { | ||
| 2064 | + JSON j_outline = j_outlines.addArrayElement(JSON::makeDictionary()); | ||
| 2065 | + j_outline.addDictionaryMember( | ||
| 2066 | + "object", (*oiter).getObjectHandle().getJSON()); | ||
| 2067 | + j_outline.addDictionaryMember( | ||
| 2068 | + "title", JSON::makeString((*oiter).getTitle())); | ||
| 2069 | + j_outline.addDictionaryMember( | ||
| 2070 | + "destination", (*oiter).getDest().getJSON(true)); | ||
| 2071 | + } | ||
| 2072 | + } | ||
| 2073 | + | ||
| 2074 | + // Page labels | ||
| 2075 | + | ||
| 2076 | + JSON j_labels = j.addDictionaryMember("pagelabels", JSON::makeArray()); | ||
| 2077 | + if (pldh.hasPageLabels()) | ||
| 2078 | + { | ||
| 2079 | + std::vector<QPDFObjectHandle> labels; | ||
| 2080 | + pldh.getLabelsForPageRange(0, pages.size() - 1, 0, labels); | ||
| 2081 | + for (std::vector<QPDFObjectHandle>::iterator iter = labels.begin(); | ||
| 2082 | + iter != labels.end(); ++iter) | ||
| 2083 | + { | ||
| 2084 | + std::vector<QPDFObjectHandle>::iterator next = iter; | ||
| 2085 | + ++next; | ||
| 2086 | + if (next == labels.end()) | ||
| 2087 | + { | ||
| 2088 | + // This can't happen, so ignore it. This could only | ||
| 2089 | + // happen if getLabelsForPageRange somehow returned an | ||
| 2090 | + // odd number of items. | ||
| 2091 | + break; | ||
| 2092 | + } | ||
| 2093 | + JSON j_label = j_labels.addArrayElement(JSON::makeDictionary()); | ||
| 2094 | + j_label.addDictionaryMember("index", (*iter).getJSON()); | ||
| 2095 | + ++iter; | ||
| 2096 | + j_label.addDictionaryMember("label", (*iter).getJSON()); | ||
| 2097 | + } | ||
| 2098 | + } | ||
| 2099 | + | ||
| 2100 | + // Check against schema | ||
| 2101 | + | ||
| 2102 | + JSON schema = json_schema(); | ||
| 2103 | + std::list<std::string> errors; | ||
| 2104 | + if (! j.checkSchema(schema, errors)) | ||
| 2105 | + { | ||
| 2106 | + std::cerr | ||
| 2107 | + << whoami << " didn't create JSON that complies with its own\n\ | ||
| 2108 | +rules. Please report this as a bug at\n\ | ||
| 2109 | + https://github.com/qpdf/qpdf/issues/new\n\ | ||
| 2110 | +ideally with the file that caused the error and the output below. Thanks!\n\ | ||
| 2111 | +\n"; | ||
| 2112 | + for (std::list<std::string>::iterator iter = errors.begin(); | ||
| 2113 | + iter != errors.end(); ++iter) | ||
| 2114 | + { | ||
| 2115 | + std::cerr << (*iter) << std::endl; | ||
| 2116 | + } | ||
| 2117 | + } | ||
| 2118 | + | ||
| 2119 | + std::cout << j.serialize() << std::endl; | ||
| 2120 | +} | ||
| 2121 | + | ||
| 1887 | static void do_inspection(QPDF& pdf, Options& o) | 2122 | static void do_inspection(QPDF& pdf, Options& o) |
| 1888 | { | 2123 | { |
| 1889 | int exit_code = 0; | 2124 | int exit_code = 0; |
| @@ -1891,6 +2126,10 @@ static void do_inspection(QPDF& pdf, Options& o) | @@ -1891,6 +2126,10 @@ static void do_inspection(QPDF& pdf, Options& o) | ||
| 1891 | { | 2126 | { |
| 1892 | do_check(pdf, o, exit_code); | 2127 | do_check(pdf, o, exit_code); |
| 1893 | } | 2128 | } |
| 2129 | + if (o.show_json) | ||
| 2130 | + { | ||
| 2131 | + do_show_json(pdf, o); | ||
| 2132 | + } | ||
| 1894 | if (o.show_npages) | 2133 | if (o.show_npages) |
| 1895 | { | 2134 | { |
| 1896 | QTC::TC("qpdf", "qpdf npages"); | 2135 | QTC::TC("qpdf", "qpdf npages"); |