Commit 6580ffe983aa9c2885555b6b4d7cf68fd3f16301

Authored by Jay Berkenbilt
1 parent fa366435

Preliminary implementation of json mode

The json mode implemented in this commit is not the final version, or
are the command line arguments used to invoke it.
Showing 1 changed file with 239 additions and 0 deletions
qpdf/qpdf.cc
@@ -17,6 +17,7 @@ @@ -17,6 +17,7 @@
17 #include <qpdf/QPDFPageDocumentHelper.hh> 17 #include <qpdf/QPDFPageDocumentHelper.hh>
18 #include <qpdf/QPDFPageObjectHelper.hh> 18 #include <qpdf/QPDFPageObjectHelper.hh>
19 #include <qpdf/QPDFPageLabelDocumentHelper.hh> 19 #include <qpdf/QPDFPageLabelDocumentHelper.hh>
  20 +#include <qpdf/QPDFOutlineDocumentHelper.hh>
20 #include <qpdf/QPDFExc.hh> 21 #include <qpdf/QPDFExc.hh>
21 22
22 #include <qpdf/QPDFWriter.hh> 23 #include <qpdf/QPDFWriter.hh>
@@ -117,6 +118,7 @@ struct Options @@ -117,6 +118,7 @@ struct Options
117 show_filtered_stream_data(false), 118 show_filtered_stream_data(false),
118 show_pages(false), 119 show_pages(false),
119 show_page_images(false), 120 show_page_images(false),
  121 + show_json(false),
120 check(false), 122 check(false),
121 require_outfile(true), 123 require_outfile(true),
122 infilename(0), 124 infilename(0),
@@ -189,6 +191,7 @@ struct Options @@ -189,6 +191,7 @@ struct Options
189 bool show_filtered_stream_data; 191 bool show_filtered_stream_data;
190 bool show_pages; 192 bool show_pages;
191 bool show_page_images; 193 bool show_page_images;
  194 + bool show_json;
192 bool check; 195 bool check;
193 std::vector<PageSpec> page_specs; 196 std::vector<PageSpec> page_specs;
194 std::map<std::string, RotationSpec> rotations; 197 std::map<std::string, RotationSpec> rotations;
@@ -549,6 +552,75 @@ void usage(std::string const&amp; msg) @@ -549,6 +552,75 @@ void usage(std::string const&amp; msg)
549 exit(EXIT_ERROR); 552 exit(EXIT_ERROR);
550 } 553 }
551 554
  555 +static JSON json_schema()
  556 +{
  557 + // This JSON object doubles as a schema and as documentation for
  558 + // our JSON output. Any schema mismatch is a bug in qpdf. This
  559 + // helps to enforce our policy of consistently providing a known
  560 + // structure where every documented key will always be present,
  561 + // which makes it easier to consume our JSON. This is discussed in
  562 + // more depth in the manual.
  563 + JSON schema = JSON::makeDictionary();
  564 + schema.addDictionaryMember(
  565 + "version", JSON::makeString(
  566 + "JSON format serial number; increased for non-compatible changes"));
  567 + schema.addDictionaryMember(
  568 + "objects", JSON::makeString(
  569 + "Original objects; keys are 'trailer' or 'n n R'"));
  570 + JSON page = schema.addDictionaryMember("pages", JSON::makeArray()).
  571 + addArrayElement(JSON::makeDictionary());
  572 + page.addDictionaryMember(
  573 + "object",
  574 + JSON::makeString("reference to original page object"));
  575 + JSON image = page.addDictionaryMember("images", JSON::makeArray()).
  576 + addArrayElement(JSON::makeDictionary());
  577 + image.addDictionaryMember(
  578 + "object",
  579 + JSON::makeString("reference to image stream"));
  580 + image.addDictionaryMember(
  581 + "width",
  582 + JSON::makeString("image width"));
  583 + image.addDictionaryMember(
  584 + "height",
  585 + JSON::makeString("image height"));
  586 + image.addDictionaryMember("filter", JSON::makeArray()).
  587 + addArrayElement(
  588 + JSON::makeString("filters applied to image data"));
  589 + image.addDictionaryMember("decodeparms", JSON::makeArray()).
  590 + addArrayElement(
  591 + JSON::makeString("decode parameters for image data"));
  592 + image.addDictionaryMember(
  593 + "filterable",
  594 + JSON::makeString("whether image data can be decoded"
  595 + " using the decode level qpdf was invoked with"));
  596 + page.addDictionaryMember("contents", JSON::makeArray()).
  597 + addArrayElement(
  598 + JSON::makeString("reference to each content stream"));
  599 + page.addDictionaryMember(
  600 + "label",
  601 + JSON::makeString("page label dictionary, or null if none"));
  602 + JSON labels = schema.addDictionaryMember("pagelabels", JSON::makeArray()).
  603 + addArrayElement(JSON::makeDictionary());
  604 + labels.addDictionaryMember(
  605 + "index",
  606 + JSON::makeString("starting page position starting from zero"));
  607 + labels.addDictionaryMember(
  608 + "label",
  609 + JSON::makeString("page label dictionary"));
  610 + JSON outline = page.addDictionaryMember("outlines", JSON::makeArray()).
  611 + addArrayElement(JSON::makeDictionary());
  612 + outline.addDictionaryMember(
  613 + "object",
  614 + JSON::makeString("reference to outline that targets this page"));
  615 + outline.addDictionaryMember(
  616 + "title",
  617 + JSON::makeString("outline title"));
  618 + outline.addDictionaryMember(
  619 + "destination",
  620 + JSON::makeString("outline destination dictionary"));
  621 + return schema;
  622 +}
  623 +
552 static std::string show_bool(bool v) 624 static std::string show_bool(bool v)
553 { 625 {
554 return v ? "allowed" : "not allowed"; 626 return v ? "allowed" : "not allowed";
@@ -1613,6 +1685,11 @@ static void parse_options(int argc, char* argv[], Options&amp; o) @@ -1613,6 +1685,11 @@ static void parse_options(int argc, char* argv[], Options&amp; o)
1613 { 1685 {
1614 o.show_page_images = true; 1686 o.show_page_images = true;
1615 } 1687 }
  1688 + else if (strcmp(arg, "show-json") == 0)
  1689 + {
  1690 + o.show_json = true;
  1691 + o.require_outfile = false;
  1692 + }
1616 else if (strcmp(arg, "check") == 0) 1693 else if (strcmp(arg, "check") == 0)
1617 { 1694 {
1618 o.check = true; 1695 o.check = true;
@@ -1884,6 +1961,164 @@ static void do_show_pages(QPDF&amp; pdf, Options&amp; o) @@ -1884,6 +1961,164 @@ static void do_show_pages(QPDF&amp; pdf, Options&amp; o)
1884 } 1961 }
1885 } 1962 }
1886 1963
  1964 +static void do_show_json(QPDF& pdf, Options& o)
  1965 +{
  1966 + JSON j = JSON::makeDictionary();
  1967 + // This version is updated every time a non-backward-compatible
  1968 + // change is made to the JSON format. Clients of the JSON are to
  1969 + // ignore unrecognized keys, so we only update the version of a
  1970 + // key disappears or if its value changes meaning.
  1971 + j.addDictionaryMember("version", JSON::makeInt(1));
  1972 +
  1973 + // Objects
  1974 +
  1975 + // Add all objects. Do this first before other code below modifies
  1976 + // things by doing stuff like calling
  1977 + // pushInheritedAttributesToPage.
  1978 + JSON j_objects = j.addDictionaryMember("objects", JSON::makeDictionary());
  1979 + j_objects.addDictionaryMember("trailer", pdf.getTrailer().getJSON(true));
  1980 + std::vector<QPDFObjectHandle> objects = pdf.getAllObjects();
  1981 + for (std::vector<QPDFObjectHandle>::iterator iter = objects.begin();
  1982 + iter != objects.end(); ++iter)
  1983 + {
  1984 + j_objects.addDictionaryMember(
  1985 + (*iter).unparse(), (*iter).getJSON(true));
  1986 + }
  1987 +
  1988 + // Pages
  1989 +
  1990 + JSON j_pages = j.addDictionaryMember("pages", JSON::makeArray());
  1991 + QPDFPageDocumentHelper dh(pdf);
  1992 + QPDFPageLabelDocumentHelper pldh(pdf);
  1993 + QPDFOutlineDocumentHelper odh(pdf);
  1994 + dh.pushInheritedAttributesToPage();
  1995 + std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
  1996 + size_t pageno = 0;
  1997 + for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
  1998 + iter != pages.end(); ++iter, ++pageno)
  1999 + {
  2000 + JSON j_page = j_pages.addArrayElement(JSON::makeDictionary());
  2001 + QPDFPageObjectHelper& ph(*iter);
  2002 + QPDFObjectHandle page = ph.getObjectHandle();
  2003 + j_page.addDictionaryMember("object", page.getJSON());
  2004 + JSON j_images = j_page.addDictionaryMember(
  2005 + "images", JSON::makeArray());
  2006 + std::map<std::string, QPDFObjectHandle> images =
  2007 + ph.getPageImages();
  2008 + for (std::map<std::string, QPDFObjectHandle>::iterator iter =
  2009 + images.begin();
  2010 + iter != images.end(); ++iter)
  2011 + {
  2012 + JSON j_image = j_images.addArrayElement(JSON::makeDictionary());
  2013 + j_image.addDictionaryMember(
  2014 + "name", JSON::makeString((*iter).first));
  2015 + QPDFObjectHandle image = (*iter).second;
  2016 + QPDFObjectHandle dict = image.getDict();
  2017 + j_image.addDictionaryMember("object", image.getJSON());
  2018 + j_image.addDictionaryMember(
  2019 + "width", dict.getKey("/Width").getJSON());
  2020 + j_image.addDictionaryMember(
  2021 + "height", dict.getKey("/Height").getJSON());
  2022 + QPDFObjectHandle filters = dict.getKey("/Filter").wrapInArray();
  2023 + j_image.addDictionaryMember(
  2024 + "filter", filters.getJSON());
  2025 + QPDFObjectHandle decode_parms = dict.getKey("/DecodeParms");
  2026 + QPDFObjectHandle dp_array;
  2027 + if (decode_parms.isArray())
  2028 + {
  2029 + dp_array = decode_parms;
  2030 + }
  2031 + else
  2032 + {
  2033 + dp_array = QPDFObjectHandle::newArray();
  2034 + for (int i = 0; i < filters.getArrayNItems(); ++i)
  2035 + {
  2036 + dp_array.appendItem(decode_parms);
  2037 + }
  2038 + }
  2039 + j_image.addDictionaryMember("decodeparms", dp_array.getJSON());
  2040 + j_image.addDictionaryMember(
  2041 + "filterable",
  2042 + JSON::makeBool(
  2043 + image.pipeStreamData(0, 0, o.decode_level, true)));
  2044 + }
  2045 + j_page.addDictionaryMember("images", j_images);
  2046 + JSON j_contents = j_page.addDictionaryMember(
  2047 + "contents", JSON::makeArray());
  2048 + std::vector<QPDFObjectHandle> content = ph.getPageContents();
  2049 + for (std::vector<QPDFObjectHandle>::iterator iter = content.begin();
  2050 + iter != content.end(); ++iter)
  2051 + {
  2052 + j_contents.addArrayElement((*iter).getJSON());
  2053 + }
  2054 + j_page.addDictionaryMember(
  2055 + "label", pldh.getLabelForPage(pageno).getJSON());
  2056 + JSON j_outlines = j_page.addDictionaryMember(
  2057 + "outlines", JSON::makeArray());
  2058 + std::list<QPDFOutlineObjectHelper> outlines =
  2059 + odh.getOutlinesForPage(page.getObjGen());
  2060 + for (std::list<QPDFOutlineObjectHelper>::iterator oiter =
  2061 + outlines.begin();
  2062 + oiter != outlines.end(); ++oiter)
  2063 + {
  2064 + JSON j_outline = j_outlines.addArrayElement(JSON::makeDictionary());
  2065 + j_outline.addDictionaryMember(
  2066 + "object", (*oiter).getObjectHandle().getJSON());
  2067 + j_outline.addDictionaryMember(
  2068 + "title", JSON::makeString((*oiter).getTitle()));
  2069 + j_outline.addDictionaryMember(
  2070 + "destination", (*oiter).getDest().getJSON(true));
  2071 + }
  2072 + }
  2073 +
  2074 + // Page labels
  2075 +
  2076 + JSON j_labels = j.addDictionaryMember("pagelabels", JSON::makeArray());
  2077 + if (pldh.hasPageLabels())
  2078 + {
  2079 + std::vector<QPDFObjectHandle> labels;
  2080 + pldh.getLabelsForPageRange(0, pages.size() - 1, 0, labels);
  2081 + for (std::vector<QPDFObjectHandle>::iterator iter = labels.begin();
  2082 + iter != labels.end(); ++iter)
  2083 + {
  2084 + std::vector<QPDFObjectHandle>::iterator next = iter;
  2085 + ++next;
  2086 + if (next == labels.end())
  2087 + {
  2088 + // This can't happen, so ignore it. This could only
  2089 + // happen if getLabelsForPageRange somehow returned an
  2090 + // odd number of items.
  2091 + break;
  2092 + }
  2093 + JSON j_label = j_labels.addArrayElement(JSON::makeDictionary());
  2094 + j_label.addDictionaryMember("index", (*iter).getJSON());
  2095 + ++iter;
  2096 + j_label.addDictionaryMember("label", (*iter).getJSON());
  2097 + }
  2098 + }
  2099 +
  2100 + // Check against schema
  2101 +
  2102 + JSON schema = json_schema();
  2103 + std::list<std::string> errors;
  2104 + if (! j.checkSchema(schema, errors))
  2105 + {
  2106 + std::cerr
  2107 + << whoami << " didn't create JSON that complies with its own\n\
  2108 +rules. Please report this as a bug at\n\
  2109 + https://github.com/qpdf/qpdf/issues/new\n\
  2110 +ideally with the file that caused the error and the output below. Thanks!\n\
  2111 +\n";
  2112 + for (std::list<std::string>::iterator iter = errors.begin();
  2113 + iter != errors.end(); ++iter)
  2114 + {
  2115 + std::cerr << (*iter) << std::endl;
  2116 + }
  2117 + }
  2118 +
  2119 + std::cout << j.serialize() << std::endl;
  2120 +}
  2121 +
1887 static void do_inspection(QPDF& pdf, Options& o) 2122 static void do_inspection(QPDF& pdf, Options& o)
1888 { 2123 {
1889 int exit_code = 0; 2124 int exit_code = 0;
@@ -1891,6 +2126,10 @@ static void do_inspection(QPDF&amp; pdf, Options&amp; o) @@ -1891,6 +2126,10 @@ static void do_inspection(QPDF&amp; pdf, Options&amp; o)
1891 { 2126 {
1892 do_check(pdf, o, exit_code); 2127 do_check(pdf, o, exit_code);
1893 } 2128 }
  2129 + if (o.show_json)
  2130 + {
  2131 + do_show_json(pdf, o);
  2132 + }
1894 if (o.show_npages) 2133 if (o.show_npages)
1895 { 2134 {
1896 QTC::TC("qpdf", "qpdf npages"); 2135 QTC::TC("qpdf", "qpdf npages");