Commit 76bf863aaa7bd57c2672718804dd334a6c561cfb
1 parent
52a0b767
Add page position information to json
Showing
2 changed files
with
249 additions
and
161 deletions
manual/qpdf-manual.xml
| @@ -1940,178 +1940,235 @@ outfile.pdf</option> | @@ -1940,178 +1940,235 @@ outfile.pdf</option> | ||
| 1940 | </chapter> | 1940 | </chapter> |
| 1941 | <chapter id="ref.json"> | 1941 | <chapter id="ref.json"> |
| 1942 | <title>QPDF JSON</title> | 1942 | <title>QPDF JSON</title> |
| 1943 | - <para> | ||
| 1944 | - Beginning with qpdf version 8.3.0, the <command>qpdf</command> | ||
| 1945 | - command-line program can produce a json representation of the | ||
| 1946 | - non-content data in a PDF file. It includes a dump in json format | ||
| 1947 | - of all objects in the PDF file excluding the content of streams. | ||
| 1948 | - This json representation makes it very easy to look in detail at | ||
| 1949 | - the structure of a given PDF file, and it also provides a great way | ||
| 1950 | - to work with PDF files programmatically from the command-line in | ||
| 1951 | - languages that can't call or link with the qpdf library directly. | ||
| 1952 | - Note that stream data can be extracted from PDF files using other | ||
| 1953 | - qpdf command-line options. | ||
| 1954 | - </para> | ||
| 1955 | - <para> | ||
| 1956 | - The qpdf json representation includes a json serialization of the | ||
| 1957 | - raw objects in the PDF file as well as some computed information in | ||
| 1958 | - a more easily extracted format. QPDF provides some guarantees about | ||
| 1959 | - its json format. These guarantees are designed to simplify the | ||
| 1960 | - experience of a developer working with the JSON format. | ||
| 1961 | - <variablelist> | ||
| 1962 | - <varlistentry> | ||
| 1963 | - <term>Compatibility</term> | 1943 | + <sect1 id="ref.json-overview"> |
| 1944 | + <title>Overview</title> | ||
| 1945 | + <para> | ||
| 1946 | + Beginning with qpdf version 8.3.0, the <command>qpdf</command> | ||
| 1947 | + command-line program can produce a json representation of the | ||
| 1948 | + non-content data in a PDF file. It includes a dump in json format | ||
| 1949 | + of all objects in the PDF file excluding the content of streams. | ||
| 1950 | + This json representation makes it very easy to look in detail at | ||
| 1951 | + the structure of a given PDF file, and it also provides a great way | ||
| 1952 | + to work with PDF files programmatically from the command-line in | ||
| 1953 | + languages that can't call or link with the qpdf library directly. | ||
| 1954 | + Note that stream data can be extracted from PDF files using other | ||
| 1955 | + qpdf command-line options. | ||
| 1956 | + </para> | ||
| 1957 | + </sect1> | ||
| 1958 | + <sect1 id="ref.json-guarantees"> | ||
| 1959 | + <title>JSON Guarantees</title> | ||
| 1960 | + <para> | ||
| 1961 | + The qpdf json representation includes a json serialization of the | ||
| 1962 | + raw objects in the PDF file as well as some computed information in | ||
| 1963 | + a more easily extracted format. QPDF provides some guarantees about | ||
| 1964 | + its json format. These guarantees are designed to simplify the | ||
| 1965 | + experience of a developer working with the JSON format. | ||
| 1966 | + <variablelist> | ||
| 1967 | + <varlistentry> | ||
| 1968 | + <term>Compatibility</term> | ||
| 1969 | + <listitem> | ||
| 1970 | + <para> | ||
| 1971 | + The top-level json object output is a dictionary. The json | ||
| 1972 | + output contains various nested dictionaries and arrays. With | ||
| 1973 | + the exception of dictionaries that are populated by the fields | ||
| 1974 | + of objects from the file, all instances of a dictionary are | ||
| 1975 | + guaranteed to have exactly the same keys. Future versions of | ||
| 1976 | + qpdf are free to add additional keys but not to remove keys or | ||
| 1977 | + change the type of object that a key points to. The qpdf | ||
| 1978 | + program validates this guarantee, and in the unlikely event | ||
| 1979 | + that a bug in qpdf should cause it to generate data that | ||
| 1980 | + doesn't conform to this rule, it will ask you to file a bug | ||
| 1981 | + report. | ||
| 1982 | + </para> | ||
| 1983 | + <para> | ||
| 1984 | + The top-level json structure contains a | ||
| 1985 | + “<literal>version</literal>” key whose value is | ||
| 1986 | + simple integer. The value of the <literal>version</literal> key | ||
| 1987 | + will be incremented if a non-compatible change is made. A | ||
| 1988 | + non-compatible change would be any change that involves removal | ||
| 1989 | + of a key, a change to the format of data pointed to by a key, | ||
| 1990 | + or a semantic change that requires a different interpretation | ||
| 1991 | + of a previously existing key. A strong effort will be made to | ||
| 1992 | + avoid breaking compatibility. | ||
| 1993 | + </para> | ||
| 1994 | + </listitem> | ||
| 1995 | + </varlistentry> | ||
| 1996 | + <varlistentry> | ||
| 1997 | + <term>Documentation</term> | ||
| 1998 | + <listitem> | ||
| 1999 | + <para> | ||
| 2000 | + The <command>qpdf</command> command can be invoked with the | ||
| 2001 | + <option>--json-help</option> option. This will output a json | ||
| 2002 | + structure that has the same structure as the json output that | ||
| 2003 | + qpdf generates, except that each field in the help output is a | ||
| 2004 | + description of the corresponding field in the json output. The | ||
| 2005 | + specific guarantees are as follows: | ||
| 2006 | + <itemizedlist> | ||
| 2007 | + <listitem> | ||
| 2008 | + <para> | ||
| 2009 | + A dictionary in the help output means that the corresponding | ||
| 2010 | + location in the actual json output is also a dictionary with | ||
| 2011 | + exactly the same keys; that is, no keys present in help are | ||
| 2012 | + absent in the real output, and no keys will be present in | ||
| 2013 | + the real output that are not in help. | ||
| 2014 | + </para> | ||
| 2015 | + </listitem> | ||
| 2016 | + <listitem> | ||
| 2017 | + <para> | ||
| 2018 | + A string in the help output is a description of the item | ||
| 2019 | + that appears in the corresponding location of the actual | ||
| 2020 | + output. The corresponding output can have any format. | ||
| 2021 | + </para> | ||
| 2022 | + </listitem> | ||
| 2023 | + <listitem> | ||
| 2024 | + <para> | ||
| 2025 | + An array in the help output always contains a single | ||
| 2026 | + element. It indicates that the corresponding location in the | ||
| 2027 | + actual output is also an array, and that each element of the | ||
| 2028 | + array has whatever format is implied by the single element | ||
| 2029 | + of the help output's array. | ||
| 2030 | + </para> | ||
| 2031 | + </listitem> | ||
| 2032 | + </itemizedlist> | ||
| 2033 | + For example, the help output indicates includes a | ||
| 2034 | + “<literal>pagelabels</literal>” key whose value is | ||
| 2035 | + an array of one element. That element is a dictionary with keys | ||
| 2036 | + “<literal>index</literal>” and | ||
| 2037 | + “<literal>label</literal>”. In addition to | ||
| 2038 | + describing the meaning of those keys, this tells you that the | ||
| 2039 | + actual json output will contain a <literal>pagelabels</literal> | ||
| 2040 | + array, each of whose elements is a dictionary that contains an | ||
| 2041 | + <literal>index</literal> key, a <literal>label</literal> key, | ||
| 2042 | + and no other keys. | ||
| 2043 | + </para> | ||
| 2044 | + </listitem> | ||
| 2045 | + </varlistentry> | ||
| 2046 | + <varlistentry> | ||
| 2047 | + <term>Directness and Simplicity</term> | ||
| 2048 | + <listitem> | ||
| 2049 | + <para> | ||
| 2050 | + The json output contains the value of every object in the file, | ||
| 2051 | + but it also contains some processed data. This is analogous to | ||
| 2052 | + how qpdf's library interface works. The processed data is | ||
| 2053 | + similar to the helper functions in that it allows you to look | ||
| 2054 | + at certain aspects of the PDF file without having to understand | ||
| 2055 | + all the nuances of the PDF specification, while the raw objects | ||
| 2056 | + allow you to mine the PDF for anything that the higher-level | ||
| 2057 | + interfaces are lacking. | ||
| 2058 | + </para> | ||
| 2059 | + </listitem> | ||
| 2060 | + </varlistentry> | ||
| 2061 | + </variablelist> | ||
| 2062 | + </para> | ||
| 2063 | + </sect1> | ||
| 2064 | + <sect1 id="json.limitations"> | ||
| 2065 | + <title>Limitations of JSON Representation</title> | ||
| 2066 | + <para> | ||
| 2067 | + There are a few limitations to be aware of with the json structure: | ||
| 2068 | + <itemizedlist> | ||
| 1964 | <listitem> | 2069 | <listitem> |
| 1965 | <para> | 2070 | <para> |
| 1966 | - The top-level json object output is a dictionary. The json | ||
| 1967 | - output contains various nested dictionaries and arrays. With | ||
| 1968 | - the exception of dictionaries that are populated by the fields | ||
| 1969 | - of objects from the file, all instances of a dictionary are | ||
| 1970 | - guaranteed to have exactly the same keys. Future versions of | ||
| 1971 | - qpdf are free to add additional keys but not to remove keys or | ||
| 1972 | - change the type of object that a key points to. The qpdf | ||
| 1973 | - program validates this guarantee, and in the unlikely event | ||
| 1974 | - that a bug in qpdf should cause it to generate data that | ||
| 1975 | - doesn't conform to this rule, it will ask you to file a bug | ||
| 1976 | - report. | 2071 | + Strings, names, and indirect object references in the original |
| 2072 | + PDF file are all converted to strings in the json | ||
| 2073 | + representation. In the case of a “normal” PDF file, | ||
| 2074 | + you can tell the difference because a name starts with a slash | ||
| 2075 | + (<literal>/</literal>), and an indirect object reference looks | ||
| 2076 | + like <literal>n n R</literal>, but if there were to be a string | ||
| 2077 | + that looked like a name or indirect object reference, there | ||
| 2078 | + would be no way to tell this from the json output. Note that | ||
| 2079 | + there are certain cases where you know for sure what something | ||
| 2080 | + is, such as knowing that dictionary keys in objects are always | ||
| 2081 | + names and that certain things in the higher-level computed data | ||
| 2082 | + are known to contain indirect object references. | ||
| 1977 | </para> | 2083 | </para> |
| 2084 | + </listitem> | ||
| 2085 | + <listitem> | ||
| 1978 | <para> | 2086 | <para> |
| 1979 | - The top-level json structure contains a | ||
| 1980 | - “<literal>version</literal>” key whose value is | ||
| 1981 | - simple integer. The value of the <literal>version</literal> key | ||
| 1982 | - will be incremented if a non-compatible change is made. A | ||
| 1983 | - non-compatible change would be any change that involves removal | ||
| 1984 | - of a key, a change to the format of data pointed to by a key, | ||
| 1985 | - or a semantic change that requires a different interpretation | ||
| 1986 | - of a previously existing key. A strong effort will be made to | ||
| 1987 | - avoid breaking compatibility. | 2087 | + The json format doesn't support binary data very well. Mostly |
| 2088 | + the details are not important, but they are presented here for | ||
| 2089 | + information. When qpdf outputs a string in the json | ||
| 2090 | + representation, it converts the string to UTF-8, assuming usual | ||
| 2091 | + PDF string semantics. Specifically, if the original string is | ||
| 2092 | + UTF-16, it is converted to UTF-8. Otherwise, it is assumed to | ||
| 2093 | + have PDF doc encoding, and is converted to UTF-8 with that | ||
| 2094 | + assumption. This causes strange things to happen to binary | ||
| 2095 | + strings. For example, if you had the binary string | ||
| 2096 | + <literal><038051></literal>, this would be output to the | ||
| 2097 | + json as <literal>\u0003โขQ</literal> because | ||
| 2098 | + <literal>03</literal> is not a printable character and | ||
| 2099 | + <literal>80</literal> is the bullet character in PDF doc | ||
| 2100 | + encoding and is mapped to the Unicode value | ||
| 2101 | + <literal>2022</literal>. Since <literal>51</literal> is | ||
| 2102 | + <literal>Q</literal>, it is output as is. If you wanted to | ||
| 2103 | + convert back from here to a binary string, would have to | ||
| 2104 | + recognize Unicode values whose code points are higher than | ||
| 2105 | + <literal>0xFF</literal> and map those back to their | ||
| 2106 | + corresponding PDF doc encoding characters. There is no way to | ||
| 2107 | + tell the difference between a Unicode string that was originally | ||
| 2108 | + encoded as UTF-16 or one that was converted from PDF doc | ||
| 2109 | + encoding. In other words, it's best if you don't try to use the | ||
| 2110 | + json format to extract binary strings from the PDF file, but if | ||
| 2111 | + you really had to, it could be done. Note that qpdf's | ||
| 2112 | + <option>--show-object</option> option does not have this | ||
| 2113 | + limitation and will reveal the string as encoded in the original | ||
| 2114 | + file. | ||
| 1988 | </para> | 2115 | </para> |
| 1989 | </listitem> | 2116 | </listitem> |
| 1990 | - </varlistentry> | ||
| 1991 | - <varlistentry> | ||
| 1992 | - <term>Documentation</term> | 2117 | + </itemizedlist> |
| 2118 | + </para> | ||
| 2119 | + </sect1> | ||
| 2120 | + <sect1 id="json.considerations"> | ||
| 2121 | + <title>JSON: Special Considerations</title> | ||
| 2122 | + <para> | ||
| 2123 | + For the most part, the built-in JSON help tells you everything you | ||
| 2124 | + need to know about the JSON format, but there are a few | ||
| 2125 | + non-obvious things to be aware of: | ||
| 2126 | + <itemizedlist> | ||
| 1993 | <listitem> | 2127 | <listitem> |
| 1994 | <para> | 2128 | <para> |
| 1995 | - The <command>qpdf</command> command can be invoked with the | ||
| 1996 | - <option>--json-help</option> option. This will output a json | ||
| 1997 | - structure that has the same structure as the json output that | ||
| 1998 | - qpdf generates, except that each field in the help output is a | ||
| 1999 | - description of the corresponding field in the json output. The | ||
| 2000 | - specific guarantees are as follows: | ||
| 2001 | - <itemizedlist> | ||
| 2002 | - <listitem> | ||
| 2003 | - <para> | ||
| 2004 | - A dictionary in the help output means that the corresponding | ||
| 2005 | - location in the actual json output is also a dictionary with | ||
| 2006 | - exactly the same keys; that is, no keys present in help are | ||
| 2007 | - absent in the real output, and no keys will be present in | ||
| 2008 | - the real output that are not in help. | ||
| 2009 | - </para> | ||
| 2010 | - </listitem> | ||
| 2011 | - <listitem> | ||
| 2012 | - <para> | ||
| 2013 | - A string in the help output is a description of the item | ||
| 2014 | - that appears in the corresponding location of the actual | ||
| 2015 | - output. The corresponding output can have any format. | ||
| 2016 | - </para> | ||
| 2017 | - </listitem> | ||
| 2018 | - <listitem> | ||
| 2019 | - <para> | ||
| 2020 | - An array in the help output always contains a single | ||
| 2021 | - element. It indicates that the corresponding location in the | ||
| 2022 | - actual output is also an array, and that each element of the | ||
| 2023 | - array has whatever format is implied by the single element | ||
| 2024 | - of the help output's array. | ||
| 2025 | - </para> | ||
| 2026 | - </listitem> | ||
| 2027 | - </itemizedlist> | ||
| 2028 | - For example, the help output indicates includes a | ||
| 2029 | - “<literal>pagelabels</literal>” key whose value is | ||
| 2030 | - an array of one element. That element is a dictionary with keys | ||
| 2031 | - “<literal>index</literal>” and | ||
| 2032 | - “<literal>label</literal>”. In addition to | ||
| 2033 | - describing the meaning of those keys, this tells you that the | ||
| 2034 | - actual json output will contain a <literal>pagelabels</literal> | ||
| 2035 | - array, each of whose elements is a dictionary that contains an | ||
| 2036 | - <literal>index</literal> key, a <literal>label</literal> key, | ||
| 2037 | - and no other keys. | 2129 | + While qpdf guarantees that keys present in the help will be |
| 2130 | + present in the output, those fields may be null or empty if the | ||
| 2131 | + information is not known or absent in the file. Also, if you | ||
| 2132 | + specify <option>--json-keys</option>, the keys that are not | ||
| 2133 | + listed will be excluded entirely except for those that | ||
| 2134 | + <option>--json-help</option> says are always present. | ||
| 2038 | </para> | 2135 | </para> |
| 2039 | </listitem> | 2136 | </listitem> |
| 2040 | - </varlistentry> | ||
| 2041 | - <varlistentry> | ||
| 2042 | - <term>Directness and Simplicity</term> | ||
| 2043 | <listitem> | 2137 | <listitem> |
| 2044 | <para> | 2138 | <para> |
| 2045 | - The json output contains the value of every object in the file, | ||
| 2046 | - but it also contains some processed data. This is analogous to | ||
| 2047 | - how qpdf's library interface works. The processed data is | ||
| 2048 | - similar to the helper functions in that it allows you to look | ||
| 2049 | - at certain aspects of the PDF file without having to understand | ||
| 2050 | - all the nuances of the PDF specification, while the raw objects | ||
| 2051 | - allow you to mine the PDF for anything that the higher-level | ||
| 2052 | - interfaces are lacking. | 2139 | + In a few places, there are keys with names containing |
| 2140 | + <literal>pageposfrom1</literal>. The values of these keys are | ||
| 2141 | + null or an integer. If an integer, they point to a page index | ||
| 2142 | + within the file numbering from 1. Note that json indexes from | ||
| 2143 | + 0, and you would also use 0-based indexing using the API. | ||
| 2144 | + However, 1-based indexing is easier in this case because the | ||
| 2145 | + command-line syntax for specifying page ranges is 1-based. If | ||
| 2146 | + you were going to write a program that looked through the json | ||
| 2147 | + for information about specific pages and then use the | ||
| 2148 | + command-line to extract those pages, 1-based indexing is | ||
| 2149 | + easier. Besides, it's more convenient to subtract 1 from a | ||
| 2150 | + program in a real programming language than it is to add 1 from | ||
| 2151 | + shell code. | ||
| 2053 | </para> | 2152 | </para> |
| 2054 | </listitem> | 2153 | </listitem> |
| 2055 | - </varlistentry> | ||
| 2056 | - </variablelist> | ||
| 2057 | - </para> | ||
| 2058 | - <para> | ||
| 2059 | - There are a few limitations to be aware of with the json structure: | ||
| 2060 | - <itemizedlist> | ||
| 2061 | - <listitem> | ||
| 2062 | - <para> | ||
| 2063 | - Strings, names, and indirect object references in the original | ||
| 2064 | - PDF file are all converted to strings in the json | ||
| 2065 | - representation. In the case of a “normal” PDF file, | ||
| 2066 | - you can tell the difference because a name starts with a slash | ||
| 2067 | - (<literal>/</literal>), and an indirect object reference looks | ||
| 2068 | - like <literal>n n R</literal>, but if there were to be a string | ||
| 2069 | - that looked like a name or indirect object reference, there | ||
| 2070 | - would be no way to tell this from the json output. Note that | ||
| 2071 | - there are certain cases where you know for sure what something | ||
| 2072 | - is, such as knowing that dictionary keys in objects are always | ||
| 2073 | - names and that certain things in the higher-level computed data | ||
| 2074 | - are known to contain indirect object references. | ||
| 2075 | - </para> | ||
| 2076 | - </listitem> | ||
| 2077 | - <listitem> | ||
| 2078 | - <para> | ||
| 2079 | - The json format doesn't support binary data very well. Mostly | ||
| 2080 | - the details are not important, but they are presented here for | ||
| 2081 | - information. When qpdf outputs a string in the json | ||
| 2082 | - representation, it converts the string to UTF-8, assuming usual | ||
| 2083 | - PDF string semantics. Specifically, if the original string is | ||
| 2084 | - UTF-16, it is converted to UTF-8. Otherwise, it is assumed to | ||
| 2085 | - have PDF doc encoding, and is converted to UTF-8 with that | ||
| 2086 | - assumption. This causes strange things to happen to binary | ||
| 2087 | - strings. For example, if you had the binary string | ||
| 2088 | - <literal><038051></literal>, this would be output to the | ||
| 2089 | - json as <literal>\u0003โขQ</literal> because | ||
| 2090 | - <literal>03</literal> is not a printable character and | ||
| 2091 | - <literal>80</literal> is the bullet character in PDF doc | ||
| 2092 | - encoding and is mapped to the Unicode value | ||
| 2093 | - <literal>2022</literal>. Since <literal>51</literal> is | ||
| 2094 | - <literal>Q</literal>, it is output as is. If you wanted to | ||
| 2095 | - convert back from here to a binary string, would have to | ||
| 2096 | - recognize Unicode values whose code points are higher than | ||
| 2097 | - <literal>0xFF</literal> and map those back to their | ||
| 2098 | - corresponding PDF doc encoding characters. There is no way to | ||
| 2099 | - tell the difference between a Unicode string that was originally | ||
| 2100 | - encoded as UTF-16 or one that was converted from PDF doc | ||
| 2101 | - encoding. In other words, it's best if you don't try to use the | ||
| 2102 | - json format to extract binary strings from the PDF file, but if | ||
| 2103 | - you really had to, it could be done. Note that qpdf's | ||
| 2104 | - <option>--show-object</option> option does not have this | ||
| 2105 | - limitation and will reveal the string as encoded in the original | ||
| 2106 | - file. | ||
| 2107 | - </para> | ||
| 2108 | - </listitem> | ||
| 2109 | - </itemizedlist> | ||
| 2110 | - </para> | ||
| 2111 | - <para> | ||
| 2112 | - For specific details on the information provided in the json | ||
| 2113 | - output, please run <command>qpdf --json-help</command>. | ||
| 2114 | - </para> | 2154 | + <listitem> |
| 2155 | + <para> | ||
| 2156 | + The image information included in the <literal>page</literal> | ||
| 2157 | + section of the json output includes the key | ||
| 2158 | + “<literal>filterable</literal>”. Note that the | ||
| 2159 | + value of this field may depend on the | ||
| 2160 | + <option>--decode-level</option> that you invoke qpdf with. The | ||
| 2161 | + json output includes a top-level key | ||
| 2162 | + “<literal>parameters</literal>” that indicates the | ||
| 2163 | + decode level used for computing whether a stream was | ||
| 2164 | + filterable. For example, jpeg images will be shown as not | ||
| 2165 | + filterable by default, but they will be shown as filterable if | ||
| 2166 | + you run <command>qpdf --json --decode-level=all</command>. | ||
| 2167 | + </para> | ||
| 2168 | + </listitem> | ||
| 2169 | + </itemizedlist> | ||
| 2170 | + </para> | ||
| 2171 | + </sect1> | ||
| 2115 | </chapter> | 2172 | </chapter> |
| 2116 | <chapter id="ref.design"> | 2173 | <chapter id="ref.design"> |
| 2117 | <title>Design and Library Notes</title> | 2174 | <title>Design and Library Notes</title> |
qpdf/qpdf.cc
| @@ -338,6 +338,9 @@ static JSON json_schema(std::set<std::string>* keys = 0) | @@ -338,6 +338,9 @@ static JSON json_schema(std::set<std::string>* keys = 0) | ||
| 338 | outline.addDictionaryMember( | 338 | outline.addDictionaryMember( |
| 339 | "dest", | 339 | "dest", |
| 340 | JSON::makeString("outline destination dictionary")); | 340 | JSON::makeString("outline destination dictionary")); |
| 341 | + page.addDictionaryMember( | ||
| 342 | + "pageposfrom1", | ||
| 343 | + JSON::makeString("position of page in document numbering from 1")); | ||
| 341 | } | 344 | } |
| 342 | if (all_keys || keys->count("pagelabels")) | 345 | if (all_keys || keys->count("pagelabels")) |
| 343 | { | 346 | { |
| @@ -371,6 +374,10 @@ static JSON json_schema(std::set<std::string>* keys = 0) | @@ -371,6 +374,10 @@ static JSON json_schema(std::set<std::string>* keys = 0) | ||
| 371 | outlines.addDictionaryMember( | 374 | outlines.addDictionaryMember( |
| 372 | "open", | 375 | "open", |
| 373 | JSON::makeString("whether the outline is displayed expanded")); | 376 | JSON::makeString("whether the outline is displayed expanded")); |
| 377 | + outlines.addDictionaryMember( | ||
| 378 | + "destpageposfrom1", | ||
| 379 | + JSON::makeString("position of destination page in document" | ||
| 380 | + " numbered from 1; null if not known")); | ||
| 374 | } | 381 | } |
| 375 | return schema; | 382 | return schema; |
| 376 | } | 383 | } |
| @@ -2813,6 +2820,7 @@ static void do_json_pages(QPDF& pdf, Options& o, JSON& j) | @@ -2813,6 +2820,7 @@ static void do_json_pages(QPDF& pdf, Options& o, JSON& j) | ||
| 2813 | j_outline.addDictionaryMember( | 2820 | j_outline.addDictionaryMember( |
| 2814 | "dest", (*oiter).getDest().getJSON(true)); | 2821 | "dest", (*oiter).getDest().getJSON(true)); |
| 2815 | } | 2822 | } |
| 2823 | + j_page.addDictionaryMember("pageposfrom1", JSON::makeInt(1 + pageno)); | ||
| 2816 | } | 2824 | } |
| 2817 | } | 2825 | } |
| 2818 | 2826 | ||
| @@ -2847,7 +2855,8 @@ static void do_json_page_labels(QPDF& pdf, Options& o, JSON& j) | @@ -2847,7 +2855,8 @@ static void do_json_page_labels(QPDF& pdf, Options& o, JSON& j) | ||
| 2847 | } | 2855 | } |
| 2848 | 2856 | ||
| 2849 | static void add_outlines_to_json( | 2857 | static void add_outlines_to_json( |
| 2850 | - std::list<QPDFOutlineObjectHelper> outlines, JSON& j) | 2858 | + std::list<QPDFOutlineObjectHelper> outlines, JSON& j, |
| 2859 | + std::map<QPDFObjGen, int>& page_numbers) | ||
| 2851 | { | 2860 | { |
| 2852 | for (std::list<QPDFOutlineObjectHelper>::iterator iter = outlines.begin(); | 2861 | for (std::list<QPDFOutlineObjectHelper>::iterator iter = outlines.begin(); |
| 2853 | iter != outlines.end(); ++iter) | 2862 | iter != outlines.end(); ++iter) |
| @@ -2858,17 +2867,39 @@ static void add_outlines_to_json( | @@ -2858,17 +2867,39 @@ static void add_outlines_to_json( | ||
| 2858 | jo.addDictionaryMember("title", JSON::makeString(ol.getTitle())); | 2867 | jo.addDictionaryMember("title", JSON::makeString(ol.getTitle())); |
| 2859 | jo.addDictionaryMember("dest", ol.getDest().getJSON(true)); | 2868 | jo.addDictionaryMember("dest", ol.getDest().getJSON(true)); |
| 2860 | jo.addDictionaryMember("open", JSON::makeBool(ol.getCount() >= 0)); | 2869 | jo.addDictionaryMember("open", JSON::makeBool(ol.getCount() >= 0)); |
| 2870 | + QPDFObjectHandle page = ol.getDestPage(); | ||
| 2871 | + JSON j_destpage = JSON::makeNull(); | ||
| 2872 | + if (page.isIndirect()) | ||
| 2873 | + { | ||
| 2874 | + QPDFObjGen og = page.getObjGen(); | ||
| 2875 | + if (page_numbers.count(og)) | ||
| 2876 | + { | ||
| 2877 | + j_destpage = JSON::makeInt(page_numbers[og]); | ||
| 2878 | + } | ||
| 2879 | + } | ||
| 2880 | + jo.addDictionaryMember("destpageposfrom1", j_destpage); | ||
| 2861 | JSON j_kids = jo.addDictionaryMember("kids", JSON::makeArray()); | 2881 | JSON j_kids = jo.addDictionaryMember("kids", JSON::makeArray()); |
| 2862 | - add_outlines_to_json(ol.getKids(), j_kids); | 2882 | + add_outlines_to_json(ol.getKids(), j_kids, page_numbers); |
| 2863 | } | 2883 | } |
| 2864 | } | 2884 | } |
| 2865 | 2885 | ||
| 2866 | static void do_json_outlines(QPDF& pdf, Options& o, JSON& j) | 2886 | static void do_json_outlines(QPDF& pdf, Options& o, JSON& j) |
| 2867 | { | 2887 | { |
| 2888 | + std::map<QPDFObjGen, int> page_numbers; | ||
| 2889 | + QPDFPageDocumentHelper dh(pdf); | ||
| 2890 | + std::vector<QPDFPageObjectHelper> pages = dh.getAllPages(); | ||
| 2891 | + int n = 0; | ||
| 2892 | + for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin(); | ||
| 2893 | + iter != pages.end(); ++iter) | ||
| 2894 | + { | ||
| 2895 | + QPDFObjectHandle oh = (*iter).getObjectHandle(); | ||
| 2896 | + page_numbers[oh.getObjGen()] = ++n; | ||
| 2897 | + } | ||
| 2898 | + | ||
| 2868 | JSON j_outlines = j.addDictionaryMember( | 2899 | JSON j_outlines = j.addDictionaryMember( |
| 2869 | "outlines", JSON::makeArray()); | 2900 | "outlines", JSON::makeArray()); |
| 2870 | QPDFOutlineDocumentHelper odh(pdf); | 2901 | QPDFOutlineDocumentHelper odh(pdf); |
| 2871 | - add_outlines_to_json(odh.getTopLevelOutlines(), j_outlines); | 2902 | + add_outlines_to_json(odh.getTopLevelOutlines(), j_outlines, page_numbers); |
| 2872 | } | 2903 | } |
| 2873 | 2904 | ||
| 2874 | static void do_json(QPDF& pdf, Options& o) | 2905 | static void do_json(QPDF& pdf, Options& o) |