Commit ed6fe01238701ec1e7846058f553eb390335a68f

Authored by Jay Berkenbilt
1 parent 39779dd3

Remove docbook manual and associated files

.gitignore
@@ -6,11 +6,7 @@ autom4te.cache/ @@ -6,11 +6,7 @@ autom4te.cache/
6 config.log 6 config.log
7 config.status 7 config.status
8 distfiles.zip 8 distfiles.zip
9 -doc/fix-qdf.1  
10 -doc/qpdf-manual.html  
11 -doc/qpdf-manual.pdf  
12 -doc/qpdf.1  
13 -doc/zlib-flate.1 9 +doc
14 examples/build/ 10 examples/build/
15 external-libs 11 external-libs
16 fuzz/build/ 12 fuzz/build/
@@ -21,8 +17,6 @@ libqpdf/qpdf/qpdf-config.h @@ -21,8 +17,6 @@ libqpdf/qpdf/qpdf-config.h
21 libtests/build/ 17 libtests/build/
22 libtool 18 libtool
23 manual/build/ 19 manual/build/
24 -manual/html.xsl  
25 -manual/print.xsl  
26 qpdf/build/ 20 qpdf/build/
27 zlib-flate/build/ 21 zlib-flate/build/
28 distribution/ 22 distribution/
doc/stylesheet.css deleted
1 -/**************************************************************/  
2 -/* Custom style-sheet for the QPDF manual in HTML form. */  
3 -/**************************************************************/  
4 -  
5 -/*  
6 - * This file is the CSS for the QPDF manual. It is based heavily on  
7 - * the CSS for the Subversion book. That file contains the following  
8 - * copyright and attribution:  
9 - *  
10 - * Copyright (c) 2003-2007  
11 - * Ben Collins-Sussman, Brian W. Fitzpatrick, C. Michael Pilato.  
12 - *  
13 - * This work is licensed under the Creative Commons Attribution License.  
14 - * To view a copy of this license, visit  
15 - * http://creativecommons.org/licenses/by/2.0/ or send a letter to  
16 - * Creative Commons, 559 Nathan Abbott Way, Stanford, California 94305,  
17 - * USA.  
18 - */  
19 -  
20 -body  
21 -{  
22 - background: white;  
23 - margin: 0.5in;  
24 -}  
25 -  
26 -p, li, ul, ol, dd, dt  
27 -{  
28 - font-style: normal;  
29 - font-weight: normal;  
30 - color: black;  
31 -}  
32 -  
33 -tt, pre  
34 -{  
35 - font-family: courier new,courier,fixed;  
36 -}  
37 -  
38 -a  
39 -{  
40 - color: blue;  
41 - text-decoration: underline;  
42 -}  
43 -  
44 -a:hover  
45 -{  
46 - background: rgb(75%,75%,100%);  
47 - color: blue;  
48 - text-decoration: underline;  
49 -}  
50 -  
51 -a:visited  
52 -{  
53 - color: purple;  
54 - text-decoration: underline;  
55 -}  
56 -  
57 -img  
58 -{  
59 - border: none;  
60 -}  
61 -  
62 -h1.title  
63 -{  
64 - font-size: 250%;  
65 - font-style: normal;  
66 - font-weight: bold;  
67 - color: black;  
68 -}  
69 -  
70 -h2.subtitle  
71 -{  
72 - font-size: 150%;  
73 - font-style: italic;  
74 - color: black;  
75 -}  
76 -  
77 -h2.title  
78 -{  
79 - font-size: 150%;  
80 - font-style: normal;  
81 - font-weight: bold;  
82 - color: black;  
83 -}  
84 -  
85 -h3.title  
86 -{  
87 - font-size: 125%;  
88 - font-style: normal;  
89 - font-weight: bold;  
90 - color: black;  
91 -}  
92 -  
93 -h4.title  
94 -{  
95 - font-size: 100%;  
96 - font-style: normal;  
97 - font-weight: bold;  
98 - color: black;  
99 -}  
100 -  
101 -.toc b  
102 -{  
103 - font-size: 125%;  
104 - font-style: normal;  
105 - font-weight: bold;  
106 - color: black;  
107 -}  
108 -  
109 -.screen, .programlisting, .literal  
110 -{  
111 - font-family: courier new,courier,fixed;  
112 - font-style: normal;  
113 - font-weight: normal;  
114 -}  
115 -  
116 -.command, .option, .type  
117 -{  
118 - font-family: courier new,courier,fixed;  
119 - font-style: normal;  
120 - font-weight: normal;  
121 -}  
122 -  
123 -.filename  
124 -{  
125 - font-family: arial,helvetica,sans-serif;  
126 - font-style: italic;  
127 -}  
128 -  
129 -.property  
130 -{  
131 - font-family: arial,helvetica,sans-serif;  
132 - font-weight: bold;  
133 -}  
134 -  
135 -.classname  
136 -{  
137 - font-family: arial,helvetica,sans-serif;  
138 - font-weight: bold;  
139 - font-style: italic;  
140 -}  
141 -  
142 -.varname, .function, .envar  
143 -{  
144 - font-family: arial,helvetica,sans-serif;  
145 - font-style: italic;  
146 -}  
147 -  
148 -.replaceable  
149 -{  
150 - font-style: italic;  
151 - font-size: 100%;  
152 -}  
153 -  
154 -.figure, .example, .table  
155 -{  
156 - margin: 0.125in 0.25in;  
157 -}  
158 -  
159 -.table table  
160 -{  
161 - border-width: 1px;  
162 - border-style: solid;  
163 - border-color: black;  
164 - border-spacing: 0;  
165 - background: rgb(240,240,240);  
166 -}  
167 -  
168 -.table td  
169 -{  
170 - border: none;  
171 - border-right: 1px black solid;  
172 - border-bottom: 1px black solid;  
173 - padding: 2px;  
174 -}  
175 -  
176 -.table th  
177 -{  
178 - background: rgb(180,180,180);  
179 - border: none;  
180 - border-right: 1px black solid;  
181 - border-bottom: 1px black solid;  
182 - padding: 2px;  
183 -}  
184 -  
185 -.table p.title, .figure p.title, .example p.title  
186 -{  
187 - text-align: left !important;  
188 - font-size: 100% !important;  
189 -}  
190 -  
191 -.author, .pubdate  
192 -{  
193 - margin: 0;  
194 - font-size: 100%;  
195 - font-style: italic;  
196 - font-weight: normal;  
197 - color: black;  
198 -}  
199 -  
200 -.preface div.author, .preface .pubdate  
201 -{  
202 - font-size: 80%;  
203 -}  
204 -  
205 -.sidebar  
206 -{  
207 - border-top: dotted 1px black;  
208 - border-left: dotted 1px black;  
209 - border-right: solid 2px black;  
210 - border-bottom: solid 2px black;  
211 - background: rgb(240,220,170);  
212 - padding: 0 0.12in;  
213 - margin: 0.25in;  
214 -}  
215 -  
216 -.note .programlisting, .note .screen,  
217 -.tip .programlisting, .tip .screen,  
218 -.warning .programlisting, .warning .screen,  
219 -.sidebar .programlisting, .sidebar .screen  
220 -{  
221 - border: none;  
222 - background: none;  
223 -}  
224 -  
225 -.sidebar p.title  
226 -{  
227 - text-align: center;  
228 - font-size: 125%;  
229 -}  
230 -  
231 -.note  
232 -{  
233 - border: black solid 1px;  
234 - background: url(./images/note.png) no-repeat rgb(252,246,220);  
235 - margin: 0.125in 0;  
236 - padding: 0 55px;  
237 -}  
238 -  
239 -.tip  
240 -{  
241 - border: black solid 1px;  
242 - background: url(./images/tip.png) no-repeat rgb(224,244,255);  
243 - margin: 0.125in 0;  
244 - padding: 0 55px;  
245 -}  
246 -  
247 -.warning  
248 -{  
249 - border: black solid 1px;  
250 - background: url(./images/warning.png) no-repeat rgb(255,210,210);  
251 - margin: 0.125in 0;  
252 - padding: 0 55px;  
253 -}  
254 -  
255 -/*  
256 -.note .title, .tip .title, .warning .title  
257 -{  
258 - display: none;  
259 -}  
260 -*/  
261 -  
262 -.programlisting, .screen  
263 -{  
264 - font-size: 90%;  
265 - color: black;  
266 - margin: 1em 0.25in;  
267 - padding: 0.5em;  
268 - background: rgb(240,240,240);  
269 - border-top: black dotted 1px;  
270 - border-left: black dotted 1px;  
271 - border-right: black solid 2px;  
272 - border-bottom: black solid 2px;  
273 -}  
274 -  
275 -.navheader, .navfooter  
276 -{  
277 - border: black solid 1px;  
278 - background: rgb(180,180,200);  
279 -}  
280 -  
281 -.navheader hr, .navfooter hr  
282 -{  
283 - display: none;  
284 -}  
manual/common.xsl deleted
1 -<?xml version='1.0'?>  
2 -<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"  
3 - version="1.0">  
4 - <xsl:param name="variablelist.as.blocks" select="1"/>  
5 - <xsl:param name="body.start.indent">0pt</xsl:param>  
6 - <xsl:param name="xref.with.number.and.title" select="'yes'"/>  
7 - <xsl:param name="section.autolabel" select="1"/>  
8 - <xsl:param name="section.label.includes.component.label" select="1"/>  
9 -</xsl:stylesheet>  
manual/html.xsl.in deleted
1 -<?xml version='1.0'?>  
2 -<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"  
3 - xmlns:fo="http://www.w3.org/1999/XSL/Format"  
4 - version="1.0">  
5 - <xsl:import href="@DOCBOOK_XHTML@"/>  
6 - <xsl:import href="common.xsl"/>  
7 - <xsl:param name="html.stylesheet">stylesheet.css</xsl:param>  
8 -</xsl:stylesheet>  
manual/print.xsl.in deleted
1 -<?xml version='1.0'?>  
2 -<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"  
3 - xmlns:fo="http://www.w3.org/1999/XSL/Format"  
4 - version="1.0">  
5 - <xsl:import href="@DOCBOOK_FO@"/>  
6 - <xsl:import href="common.xsl"/>  
7 - <xsl:param name="local.l10n.xml" select="document('')"/>  
8 - <l:i18n xmlns:l="http://docbook.sourceforge.net/xmlns/l10n/1.0">  
9 - <l:l10n language="en">  
10 - <l:context name="xref">  
11 - <l:template name="page.citation" text=", page %p"/>  
12 - </l:context>  
13 - </l:l10n>  
14 - </l:i18n>  
15 - <!-- This should give us bookmarks, but it's broken for fop -->  
16 - <!-- 0.94 and stylesheets 1.73.2. -->  
17 -<!--  
18 - <xsl:param name="fop.extensions" select="1"/>  
19 --->  
20 - <xsl:param name="paper.type" select="'USLetter'"/>  
21 - <xsl:param name="insert.xref.page.number" select="'yes'"/>  
22 -<!--  
23 - <xsl:param name="admon.graphics.path">  
24 - /tmp/z/docbook-xsl-1.73.2/images/  
25 - </xsl:param>  
26 - <xsl:param name="admon.graphics" select="1"/>  
27 --->  
28 - <xsl:param name="shade.verbatim" select="1"/>  
29 - <xsl:attribute-set name="shade.verbatim.style">  
30 - <xsl:attribute name="background-color">#F0F0F0</xsl:attribute>  
31 - <xsl:attribute name="border-width">0.5pt</xsl:attribute>  
32 - <xsl:attribute name="border-style">solid</xsl:attribute>  
33 - <xsl:attribute name="border-color">#575757</xsl:attribute>  
34 - <xsl:attribute name="padding">3pt</xsl:attribute>  
35 - </xsl:attribute-set>  
36 - <xsl:attribute-set name="xref.properties">  
37 - <xsl:attribute name="color">#00c</xsl:attribute>  
38 - </xsl:attribute-set>  
39 - <fo:page-sequence language="en"/>  
40 - <fo:block hyphenate="true"/>  
41 -  
42 - <xsl:template match="property">  
43 - <xsl:call-template name="inline.boldseq"/>  
44 - </xsl:template>  
45 - <xsl:template match="classname">  
46 - <fo:inline font-family="sans-serif" font-weight="bold">  
47 - <xsl:call-template name="inline.italicseq"/>  
48 - </fo:inline>  
49 - </xsl:template>  
50 - <xsl:template match="filename">  
51 - <xsl:call-template name="inline.italicseq"/>  
52 - </xsl:template>  
53 - <xsl:template match="varname">  
54 - <xsl:call-template name="inline.italicseq"/>  
55 - </xsl:template>  
56 - <xsl:template match="function">  
57 - <xsl:call-template name="inline.italicseq"/>  
58 - </xsl:template>  
59 - <xsl:template match="envar">  
60 - <xsl:call-template name="inline.italicseq"/>  
61 - </xsl:template>  
62 - <xsl:template match="type">  
63 - <xsl:call-template name="inline.monoseq"/>  
64 - </xsl:template>  
65 - <xsl:template match="option">  
66 - <xsl:call-template name="inline.boldseq"/>  
67 - </xsl:template>  
68 -  
69 -</xsl:stylesheet>  
manual/qpdf-manual.xml deleted
1 -<?xml version="1.0" encoding="utf-8"?>  
2 -<!DOCTYPE book [  
3 -<!ENTITY swversion "10.4.0">  
4 -<!ENTITY lastreleased "November 16, 2021">  
5 -]>  
6 -<book>  
7 - <bookinfo>  
8 - <title>QPDF Manual</title>  
9 - <subtitle>For QPDF Version &swversion;, &lastreleased;</subtitle>  
10 - <author>  
11 - <firstname>Jay</firstname><surname>Berkenbilt</surname>  
12 - </author>  
13 - <copyright>  
14 - <year>2005&ndash;2020</year>  
15 - <holder>Jay Berkenbilt</holder>  
16 - </copyright>  
17 - </bookinfo>  
18 - <preface id="acknowledgments">  
19 - <title>General Information</title>  
20 - <para>  
21 - QPDF is a program that does structural, content-preserving  
22 - transformations on PDF files. QPDF's website is located at <ulink  
23 - url="https://qpdf.sourceforge.io/">https://qpdf.sourceforge.io/</ulink>.  
24 - QPDF's source code is hosted on github at <ulink  
25 - url="https://github.com/qpdf/qpdf">https://github.com/qpdf/qpdf</ulink>.  
26 - </para>  
27 - <para>  
28 - QPDF is licensed under <ulink  
29 - url="http://www.apache.org/licenses/LICENSE-2.0">the Apache  
30 - License, Version 2.0</ulink> (the "License"). Unless required by  
31 - applicable law or agreed to in writing, software distributed under  
32 - the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES  
33 - OR CONDITIONS OF ANY KIND, either express or implied. See the  
34 - License for the specific language governing permissions and  
35 - limitations under the License.  
36 - </para>  
37 - <para>  
38 - Versions of qpdf prior to version 7 were released under the terms  
39 - of <ulink url="https://opensource.org/licenses/Artistic-2.0">the  
40 - Artistic License, version 2.0</ulink>. At your option, you may  
41 - continue to consider qpdf to be licensed under those terms. The  
42 - Apache License 2.0 permits everything that the Artistic License 2.0  
43 - permits but is slightly less restrictive. Allowing the Artistic  
44 - License to continue being used is primary to help people who may  
45 - have to get specific approval to use qpdf in their products.  
46 - </para>  
47 - <para>  
48 - QPDF is intentionally released with a permissive license. However,  
49 - if there is some reason that the licensing terms don't work for  
50 - your requirements, please feel free to contact the copyright holder  
51 - to make other arrangements.  
52 - </para>  
53 - <para>  
54 - QPDF was originally created in 2001 and modified periodically  
55 - between 2001 and 2005 during my employment at <ulink  
56 - url="http://www.apexcovantage.com">Apex CoVantage</ulink>. Upon my  
57 - departure from Apex, the company graciously allowed me to take  
58 - ownership of the software and continue maintaining as an open  
59 - source project, a decision for which I am very grateful. I have  
60 - made considerable enhancements to it since that time. I feel  
61 - fortunate to have worked for people who would make such a decision.  
62 - This work would not have been possible without their support.  
63 - </para>  
64 - </preface>  
65 - <chapter id="ref.overview">  
66 - <title>What is QPDF?</title>  
67 - <para>  
68 - QPDF is a program that does structural, content-preserving  
69 - transformations on PDF files. It could have been called something  
70 - like <emphasis>pdf-to-pdf</emphasis>. It also provides many useful  
71 - capabilities to developers of PDF-producing software or for people  
72 - who just want to look at the innards of a PDF file to learn more  
73 - about how they work.  
74 - </para>  
75 - <para>  
76 - With QPDF, it is possible to copy objects from one PDF file into  
77 - another and to manipulate the list of pages in a PDF file. This  
78 - makes it possible to merge and split PDF files. The QPDF library  
79 - also makes it possible for you to create PDF files from scratch.  
80 - In this mode, you are responsible for supplying all the contents of  
81 - the file, while the QPDF library takes care off all the syntactical  
82 - representation of the objects, creation of cross references tables  
83 - and, if you use them, object streams, encryption, linearization,  
84 - and other syntactic details. You are still responsible for  
85 - generating PDF content on your own.  
86 - </para>  
87 - <para>  
88 - QPDF has been designed with very few external dependencies, and it  
89 - is intentionally very lightweight. QPDF is  
90 - <emphasis>not</emphasis> a PDF content creation library, a PDF  
91 - viewer, or a program capable of converting PDF into other formats.  
92 - In particular, QPDF knows nothing about the semantics of PDF  
93 - content streams. If you are looking for something that can do  
94 - that, you should look elsewhere. However, once you have a valid  
95 - PDF file, QPDF can be used to transform that file in ways perhaps  
96 - your original PDF creation can't handle. For example, many  
97 - programs generate simple PDF files but can't password-protect them,  
98 - web-optimize them, or perform other transformations of that type.  
99 - </para>  
100 - </chapter>  
101 - <chapter id="ref.installing">  
102 - <title>Building and Installing QPDF</title>  
103 - <para>  
104 - This chapter describes how to build and install qpdf. Please see  
105 - also the @1@filename@1@README.md@2@filename@2@ and  
106 - @1@filename@1@INSTALL@2@filename@2@ files in the source distribution.  
107 - </para>  
108 - <sect1 id="ref.prerequisites">  
109 - <title>System Requirements</title>  
110 - <para>  
111 - The qpdf package has few external dependencies. In order to build  
112 - qpdf, the following packages are required:  
113 - <itemizedlist>  
114 - <listitem>  
115 - <para>  
116 - A C++ compiler that supports C++-14.  
117 - </para>  
118 - </listitem>  
119 - <listitem>  
120 - <para>  
121 - zlib: <ulink url="http://www.zlib.net/">http://www.zlib.net/</ulink>  
122 - </para>  
123 - </listitem>  
124 - <listitem>  
125 - <para>  
126 - jpeg: <ulink  
127 - url="http://www.ijg.org/files/">http://www.ijg.org/files/</ulink>  
128 - or <ulink  
129 - url="https://libjpeg-turbo.org/">https://libjpeg-turbo.org/</ulink>  
130 - </para>  
131 - </listitem>  
132 - <listitem>  
133 - <para>  
134 - <emphasis>Recommended but not required:</emphasis> gnutls:  
135 - <ulink url="https://www.gnutls.org/">https://www.gnutls.org/</ulink>  
136 - to be able to use the gnutls crypto provider, and/or openssl:  
137 - <ulink url="https://openssl.org/">https://openssl.org/</ulink>  
138 - to be able to use the openssl crypto provider.  
139 - </para>  
140 - </listitem>  
141 - <listitem>  
142 - <para>  
143 - gnu make 3.81 or newer: <ulink url="http://www.gnu.org/software/make">http://www.gnu.org/software/make</ulink>  
144 - </para>  
145 - </listitem>  
146 - <listitem>  
147 - <para>  
148 - perl version 5.8 or newer:  
149 - <ulink url="http://www.perl.org/">http://www.perl.org/</ulink>;  
150 - required for running the test suite. Starting with qpdf version  
151 - 9.1.1, perl is no longer required at runtime.  
152 - </para>  
153 - </listitem>  
154 - <listitem>  
155 - <para>  
156 - GNU diffutils (any version): <ulink  
157 - url="http://www.gnu.org/software/diffutils/">http://www.gnu.org/software/diffutils/</ulink>  
158 - is required to run the test suite. Note that this is the  
159 - version of diff present on virtually all GNU/Linux systems.  
160 - This is required because the test suite uses @1@command@1@diff  
161 - -u@2@command@2@.  
162 - </para>  
163 - </listitem>  
164 - </itemizedlist>  
165 - </para>  
166 - <para>  
167 - Part of qpdf's test suite does comparisons of the contents PDF  
168 - files by converting them images and comparing the images. The  
169 - image comparison tests are disabled by default. Those tests are  
170 - not required for determining correctness of a qpdf build if you  
171 - have not modified the code since the test suite also contains  
172 - expected output files that are compared literally. The image  
173 - comparison tests provide an extra check to make sure that any  
174 - content transformations don't break the rendering of pages.  
175 - Transformations that affect the content streams themselves are off  
176 - by default and are only provided to help developers look into the  
177 - contents of PDF files. If you are making deep changes to the  
178 - library that cause changes in the contents of the files that qpdf  
179 - generates, then you should enable the image comparison tests.  
180 - Enable them by running @1@command@1@configure@2@command@2@ with the  
181 - @1@option@1@--enable-test-compare-images@2@option@2@ flag. If you enable  
182 - this, the following additional requirements are required by the  
183 - test suite. Note that in no case are these items required to use  
184 - qpdf.  
185 - <itemizedlist>  
186 - <listitem>  
187 - <para>  
188 - libtiff: <ulink url="http://www.remotesensing.org/libtiff/">http://www.remotesensing.org/libtiff/</ulink>  
189 - </para>  
190 - </listitem>  
191 - <listitem>  
192 - <para>  
193 - GhostScript version 8.60 or newer: <ulink  
194 - url="http://www.ghostscript.com">http://www.ghostscript.com</ulink>  
195 - </para>  
196 - </listitem>  
197 - </itemizedlist>  
198 - If you do not enable this, then you do not need to have tiff and  
199 - ghostscript.  
200 - </para>  
201 - <para>  
202 - Pre-built documentation is distributed with qpdf, so you should  
203 - generally not need to rebuild the documentation. In order to  
204 - build the documentation from its docbook sources, you need the  
205 - docbook XML style sheets (<ulink  
206 - url="http://downloads.sourceforge.net/docbook/">http://downloads.sourceforge.net/docbook/</ulink>).  
207 - To build the PDF version of the documentation, you need Apache fop  
208 - (<ulink  
209 - url="http://xml.apache.org/fop/">http://xml.apache.org/fop/</ulink>)  
210 - version 0.94 or higher.  
211 - </para>  
212 - </sect1>  
213 - <sect1 id="ref.building">  
214 - <title>Build Instructions</title>  
215 - <para>  
216 - Building qpdf on UNIX is generally just a matter of running  
217 -  
218 - <programlisting>./configure  
219 -make  
220 -</programlisting>  
221 - You can also run @1@command@1@make check@2@command@2@ to run the test  
222 - suite and @1@command@1@make install@2@command@2@ to install. Please run  
223 - @1@command@1@./configure --help@2@command@2@ for options on what can be  
224 - configured. You can also set the value of  
225 - <varname>DESTDIR</varname> during installation to install to a  
226 - temporary location, as is common with many open source packages.  
227 - Please see also the @1@filename@1@README.md@2@filename@2@ and  
228 - @1@filename@1@INSTALL@2@filename@2@ files in the source distribution.  
229 - </para>  
230 - <para>  
231 - Building on Windows is a little bit more complicated. For  
232 - details, please see @1@filename@1@README-windows.md@2@filename@2@ in the  
233 - source distribution. You can also download a binary distribution  
234 - for Windows. There is a port of qpdf to Visual C++ version 6 in  
235 - the @1@filename@1@contrib@2@filename@2@ area generously contributed by  
236 - Jian Ma. This is also discussed in more detail in  
237 - @1@filename@1@README-windows.md@2@filename@2@.  
238 - </para>  
239 - <para>  
240 - While <type>wchar_t</type> is part of the C++ standard, qpdf uses  
241 - it in only one place in the public API, and it's just in a helper  
242 - function. It is possible to build qpdf on a system that doesn't  
243 - have <type>wchar_t</type>, and it's also possible to compile a  
244 - program that uses qpdf on a system without <type>wchar_t</type> as  
245 - long as you don't call that one method. This is a very unusual  
246 - situation. For a detailed discussion, please see the top-level  
247 - README.md file in qpdf's source distribution.  
248 - </para>  
249 - <para>  
250 - There are some other things you can do with the build. Although  
251 - qpdf uses @1@application@1@autoconf@2@application@2@, it does not use  
252 - @1@application@1@automake@2@application@2@ but instead uses a  
253 - hand-crafted non-recursive Makefile that requires gnu make. If  
254 - you're really interested, please read the comments in the  
255 - top-level @1@filename@1@Makefile@2@filename@2@.  
256 - </para>  
257 - </sect1>  
258 - <sect1 id="ref.crypto">  
259 - <title>Crypto Providers</title>  
260 - <para>  
261 - Starting with qpdf 9.1.0, the qpdf library can be built with  
262 - multiple implementations of providers of cryptographic functions,  
263 - which we refer to as "crypto providers." At the time  
264 - of writing, a crypto implementation must provide MD5 and SHA2  
265 - (256, 384, and 512-bit) hashes and RC4 and AES256 with and without  
266 - CBC encryption. In the future, if digital signature is added to  
267 - qpdf, there may be additional requirements beyond this.  
268 - </para>  
269 - <para>  
270 - Starting with qpdf version 9.1.0, the available implementations  
271 - are <literal>native</literal> and <literal>gnutls</literal>. In  
272 - qpdf 10.0.0, <literal>openssl</literal> was added. Additional  
273 - implementations may be added if needed. It is also possible for a  
274 - developer to provide their own implementation without modifying  
275 - the qpdf library.  
276 - </para>  
277 - <sect2 id="ref.crypto.build">  
278 - <title>Build Support For Crypto Providers</title>  
279 - <para>  
280 - When building with qpdf's build system, crypto providers can be  
281 - enabled at build time using various  
282 - @1@command@1@./configure@2@command@2@ options. The default behavior is  
283 - for @1@command@1@./configure@2@command@2@ to discover which crypto  
284 - providers can be supported based on available external libraries,  
285 - to build all available crypto providers, and to use an external  
286 - provider as the default over the native one. This behavior can be  
287 - changed with the following flags to  
288 - @1@command@1@./configure@2@command@2@:  
289 - <itemizedlist>  
290 - <listitem>  
291 - <para>  
292 - @1@option@1@--enable-crypto-@1@replaceable@1@x@2@replaceable@2@@2@option@2@  
293 - (where @1@replaceable@1@x@2@replaceable@2@ is a supported crypto  
294 - provider): enable the @1@replaceable@1@x@2@replaceable@2@ crypto  
295 - provider, requiring any external dependencies it needs  
296 - </para>  
297 - </listitem>  
298 - <listitem>  
299 - <para>  
300 - @1@option@1@--disable-crypto-@1@replaceable@1@x@2@replaceable@2@@2@option@2@:  
301 - disable the @1@replaceable@1@x@2@replaceable@2@ provider, and do not  
302 - link against its dependencies even if they are available  
303 - </para>  
304 - </listitem>  
305 - <listitem>  
306 - <para>  
307 - @1@option@1@--with-default-crypto=@1@replaceable@1@x@2@replaceable@2@@2@option@2@:  
308 - make @1@replaceable@1@x@2@replaceable@2@ the default provider even if  
309 - a higher priority one is available  
310 - </para>  
311 - </listitem>  
312 - <listitem>  
313 - <para>  
314 - @1@option@1@--disable-implicit-crypto@2@option@2@: only build crypto  
315 - providers that are explicitly requested with an  
316 - @1@option@1@--enable-crypto-@1@replaceable@1@x@2@replaceable@2@@2@option@2@  
317 - option  
318 - </para>  
319 - </listitem>  
320 - </itemizedlist>  
321 - </para>  
322 - <para>  
323 - For example, if you want to guarantee that the gnutls crypto  
324 - provider is used and that the native provider is not built, you  
325 - could run @1@command@1@./configure --enable-crypto-gnutls  
326 - --disable-implicit-crypto@2@command@2@.  
327 - </para>  
328 - <para>  
329 - If you build qpdf using your own build system, in order for qpdf  
330 - to work at all, you need to enable at least one crypto provider.  
331 - The file @1@filename@1@libqpdf/qpdf/qpdf-config.h.in@2@filename@2@  
332 - provides macros <literal>DEFAULT_CRYPTO</literal>, whose value  
333 - must be a string naming the default crypto provider, and various  
334 - symbols starting with <literal>USE_CRYPTO_</literal>, at least  
335 - one of which has to be enabled. Additionally, you must compile  
336 - the source files that implement a crypto provider. To get a list  
337 - of those files, look at @1@filename@1@libqpdf/build.mk@2@filename@2@. If  
338 - you want to omit a particular crypto provider, as long as its  
339 - <literal>USE_CRYPTO_</literal> symbol is undefined, you can  
340 - completely ignore the source files that belong to a particular  
341 - crypto provider. Additionally, crypto providers may have their  
342 - own external dependencies that can be omitted if the crypto  
343 - provider is not used. For example, if you are building qpdf  
344 - yourself and are using an environment that does not support  
345 - gnutls or openssl, you can ensure that  
346 - <literal>USE_CRYPTO_NATIVE</literal> is defined,  
347 - <literal>USE_CRYPTO_GNUTLS</literal> is not defined, and  
348 - <literal>DEFAULT_CRYPTO</literal> is defined to  
349 - <literal>"native"</literal>. Then you must include the source  
350 - files used in the native implementation, some of which were added  
351 - or renamed from earlier versions, to your build, and you can  
352 - ignore @1@filename@1@QPDFCrypto_gnutls.cc@2@filename@2@. Always consult  
353 - @1@filename@1@libqpdf/build.mk@2@filename@2@ to get the list of source  
354 - files you need to build.  
355 - </para>  
356 - </sect2>  
357 - <sect2 id="ref.crypto.runtime">  
358 - <title>Runtime Crypto Provider Selection</title>  
359 - <para>  
360 - You can use the @1@option@1@--show-crypto@2@option@2@ option to  
361 - @1@command@1@qpdf@2@command@2@ to get a list of available crypto  
362 - providers. The default provider is always listed first, and the  
363 - rest are listed in lexical order. Each crypto provider is listed  
364 - on a line by itself with no other text, enabling the output of  
365 - this command to be used easily in scripts.  
366 - </para>  
367 - <para>  
368 - You can override which crypto provider is used by setting the  
369 - <literal>QPDF_CRYPTO_PROVIDER</literal> environment variable.  
370 - There are few reasons to ever do this, but you might want to do  
371 - it if you were explicitly trying to compare behavior of two  
372 - different crypto providers while testing performance or  
373 - reproducing a bug. It could also be useful for people who are  
374 - implementing their own crypto providers.  
375 - </para>  
376 - </sect2>  
377 - <sect2 id="ref.crypto.develop">  
378 - <title>Crypto Provider Information for Developers</title>  
379 - <para>  
380 - If you are writing code that uses libqpdf and you want to force a  
381 - certain crypto provider to be used, you can call the method  
382 - <function>QPDFCryptoProvider::setDefaultProvider</function>. The  
383 - argument is the name of a built-in or developer-supplied  
384 - provider. To add your own crypto provider, you have to create a  
385 - class derived from <classname>QPDFCryptoImpl</classname> and  
386 - register it with <classname>QPDFCryptoProvider</classname>. For  
387 - additional information, see comments in  
388 - @1@filename@1@include/qpdf/QPDFCryptoImpl.hh@2@filename@2@.  
389 - </para>  
390 - </sect2>  
391 - <sect2 id="ref.crypto.design">  
392 - <title>Crypto Provider Design Notes</title>  
393 - <para>  
394 - This section describes a few bits of rationale for why the crypto  
395 - provider interface was set up the way it was. You don't need to  
396 - know any of this information, but it's provided for the record  
397 - and in case it's interesting.  
398 - </para>  
399 - <para>  
400 - As a general rule, I want to avoid as much as possible including  
401 - large blocks of code that are conditionally compiled such that,  
402 - in most builds, some code is never built. This is dangerous  
403 - because it makes it very easy for invalid code to creep in  
404 - unnoticed. As such, I want it to be possible to build qpdf with  
405 - all available crypto providers, and this is the way I build qpdf  
406 - for local development. At the same time, if a particular packager  
407 - feels that it is a security liability for qpdf to use crypto  
408 - functionality from other than a library that gets considerable  
409 - scrutiny for this specific purpose (such as gnutls, openssl, or  
410 - nettle), then I want to give that packager the ability to  
411 - completely disable qpdf's native implementation. Or if someone  
412 - wants to avoid adding a dependency on one of the external crypto  
413 - providers, I don't want the availability of the provider to  
414 - impose additional external dependencies within that environment.  
415 - Both of these are situations that I know to be true for some  
416 - users of qpdf.  
417 - </para>  
418 - <para>  
419 - I want registration and selection of crypto providers to be  
420 - thread-safe, and I want it to work deterministically for a  
421 - developer to provide their own crypto provider and be able to set  
422 - it up as the default. This was the primary motivation behind  
423 - requiring C++-11 as doing so enabled me to exploit the guaranteed  
424 - thread safety of local block static initialization. The  
425 - <classname>QPDFCryptoProvider</classname> class uses a singleton  
426 - pattern with thread-safe initialization to create the singleton  
427 - instance of <classname>QPDFCryptoProvider</classname> and exposes  
428 - only static methods in its public interface. In this way, if a  
429 - developer wants to call any  
430 - <classname>QPDFCryptoProvider</classname> methods, the library  
431 - guarantees the <classname>QPDFCryptoProvider</classname> is fully  
432 - initialized and all built-in crypto providers are registered.  
433 - Making <classname>QPDFCryptoProvider</classname> actually know  
434 - about all the built-in providers may seem a bit sad at first, but  
435 - this choice makes it extremely clear exactly what the  
436 - initialization behavior is. There's no question about provider  
437 - implementations automatically registering themselves in a  
438 - nondeterministic order. It also means that implementations do not  
439 - need to know anything about the provider interface, which makes  
440 - them easier to test in isolation. Another advantage of this  
441 - approach is that a developer who wants to develop their own  
442 - crypto provider can do so in complete isolation from the qpdf  
443 - library and, with just two calls, can make qpdf use their  
444 - provider in their application. If they decided to contribute  
445 - their code, plugging it into the qpdf library would require a  
446 - very small change to qpdf's source code.  
447 - </para>  
448 - <para>  
449 - The decision to make the crypto provider selectable at runtime  
450 - was one I struggled with a little, but I decided to do it for  
451 - various reasons. Allowing an end user to switch crypto providers  
452 - easily could be very useful for reproducing a potential bug. If a  
453 - user reports a bug that some cryptographic thing is broken, I can  
454 - easily ask that person to try with the  
455 - <literal>QPDF_CRYPTO_PROVIDER</literal> variable set to different  
456 - values. The same could apply in the event of a performance  
457 - problem. This also makes it easier for qpdf's own test suite to  
458 - exercise code with different providers without having to make  
459 - every program that links with qpdf aware of the possibility of  
460 - multiple providers. In qpdf's continuous integration environment,  
461 - the entire test suite is run for each supported crypto provider.  
462 - This is made simple by being able to select the provider using an  
463 - environment variable.  
464 - </para>  
465 - <para>  
466 - Finally, making crypto providers selectable in this way establish  
467 - a pattern that I may follow again in the future for stream filter  
468 - providers. One could imagine a future enhancement where someone  
469 - could provide their own implementations for basic filters like  
470 - <literal>/FlateDecode</literal> or for other filters that qpdf  
471 - doesn't support. Implementing the registration functions and  
472 - internal storage of registered providers was also easier using  
473 - C++-11's functional interfaces, which was another reason to  
474 - require C++-11 at this time.  
475 - </para>  
476 - </sect2>  
477 - </sect1>  
478 - <sect1 id="ref.packaging">  
479 - <title>Notes for Packagers</title>  
480 - <para>  
481 - If you are packaging qpdf for an operating system distribution,  
482 - here are some things you may want to keep in mind:  
483 - <itemizedlist>  
484 - <listitem>  
485 - <para>  
486 - Starting in qpdf version 9.1.1, qpdf no longer has a runtime  
487 - dependency on perl. This is because fix-qdf was rewritten in  
488 - C++. However, qpdf still has a build-time dependency on perl.  
489 - </para>  
490 - </listitem>  
491 - <listitem>  
492 - <para>  
493 - Make sure you are getting the intended behavior with regard to  
494 - crypto providers. Read <xref linkend="ref.crypto.build"/> for  
495 - details.  
496 - </para>  
497 - </listitem>  
498 - <listitem>  
499 - <para>  
500 - Passing @1@option@1@--enable-show-failed-test-output@2@option@2@ to  
501 - @1@command@1@./configure@2@command@2@ will cause any failed test  
502 - output to be written to the console. This can be very useful  
503 - for seeing test failures generated by autobuilders where you  
504 - can't access qtest.log after the fact.  
505 - </para>  
506 - </listitem>  
507 - <listitem>  
508 - <para>  
509 - If qpdf's build environment detects the presence of autoconf  
510 - and related tools, it will check to ensure that automatically  
511 - generated files are up-to-date with recorded checksums and fail  
512 - if it detects a discrepancy. This feature is intended to  
513 - prevent you from accidentally forgetting to regenerate  
514 - automatic files after modifying their sources. If your  
515 - packaging environment automatically refreshes automatic files,  
516 - it can cause this check to fail. Suppress qpdf's checks by  
517 - passing @1@option@1@--disable-check-autofiles@2@option@2@ to  
518 - @1@command@1@/.configure@2@command@2@. This is safe since qpdf's  
519 - @1@command@1@autogen.sh@2@command@2@ just runs autotools in the normal  
520 - way.  
521 - </para>  
522 - </listitem>  
523 - <listitem>  
524 - <para>  
525 - QPDF's @1@command@1@make install@2@command@2@ does not install  
526 - completion files by default, but as a packager, it's good if  
527 - you install them wherever your distribution expects such files  
528 - to go. You can find completion files to install in the  
529 - @1@filename@1@completions@2@filename@2@ directory.  
530 - </para>  
531 - </listitem>  
532 - <listitem>  
533 - <para>  
534 - Packagers are encouraged to install the source files from the  
535 - @1@filename@1@examples@2@filename@2@ directory along with qpdf  
536 - development packages.  
537 - </para>  
538 - </listitem>  
539 - </itemizedlist>  
540 - </para>  
541 - </sect1>  
542 - </chapter>  
543 - <chapter id="ref.using">  
544 - <title>Running QPDF</title>  
545 - <para>  
546 - This chapter describes how to run the qpdf program from the command  
547 - line.  
548 - </para>  
549 - <sect1 id="ref.invocation">  
550 - <title>Basic Invocation</title>  
551 - <para>  
552 - When running qpdf, the basic invocation is as follows:  
553 -  
554 - <programlisting>@1@command@1@qpdf@2@command@2@@1@option@1@ [ @1@replaceable@1@options@2@replaceable@2@ ] { @1@replaceable@1@infilename@2@replaceable@2@ | @1@option@1@--empty@2@option@2@ } [ @1@replaceable@1@page_selection_options@2@replaceable@2@ ] @1@replaceable@1@outfilename@2@replaceable@2@@2@option@2@  
555 -</programlisting>  
556 - This converts PDF file @1@option@1@infilename@2@option@2@ to PDF file  
557 - @1@option@1@outfilename@2@option@2@. The output file is functionally  
558 - identical to the input file but may have been structurally  
559 - reorganized. Also, orphaned objects will be removed from the  
560 - file. Many transformations are available as controlled by the  
561 - options below. In place of @1@option@1@infilename@2@option@2@, the  
562 - parameter @1@option@1@--empty@2@option@2@ may be specified. This causes  
563 - qpdf to use a dummy input file that contains zero pages. The only  
564 - normal use case for using @1@option@1@--empty@2@option@2@ would be if you  
565 - were going to add pages from another source, as discussed in <xref  
566 - linkend="ref.page-selection"/>.  
567 - </para>  
568 - <para>  
569 - If @1@option@1@@filename@2@option@2@ appears as a word anywhere in the  
570 - command-line, it will be read line by line, and each line will be  
571 - treated as a command-line argument. Leading and trailing  
572 - whitespace is intentionally not removed from lines, which makes it  
573 - possible to handle arguments that start or end with spaces. The  
574 - @1@option@1@@-@2@option@2@ option allows arguments to be read from  
575 - standard input. This allows qpdf to be invoked with an arbitrary  
576 - number of arbitrarily long arguments. It is also very useful for  
577 - avoiding having to pass passwords on the command line. Note that  
578 - the @1@option@1@@filename@2@option@2@ can't appear in the middle of an  
579 - argument, so constructs such as @1@option@1@--arg=@option@2@option@2@  
580 - will not work. You would have to include the argument and its  
581 - options together in the arguments file.  
582 - </para>  
583 - <para>  
584 - @1@option@1@outfilename@2@option@2@ does not have to be seekable, even  
585 - when generating linearized files. Specifying  
586 - "@1@option@1@-@2@option@2@" as @1@option@1@outfilename@2@option@2@  
587 - means to write to standard output. If you want to overwrite the  
588 - input file with the output, use the option  
589 - @1@option@1@--replace-input@2@option@2@ and omit the output file name.  
590 - You can't specify the same file as both the input and the output.  
591 - If you do this, qpdf will tell you about the  
592 - @1@option@1@--replace-input@2@option@2@ option.  
593 - </para>  
594 - <para>  
595 - Most options require an output file, but some testing or  
596 - inspection commands do not. These are specifically noted.  
597 - </para>  
598 - <sect2 id="ref.exit-status">  
599 - <title>Exit Status</title>  
600 - <para>  
601 - The exit status of @1@command@1@qpdf@2@command@2@ may be interpreted as  
602 - follows:  
603 - <itemizedlist>  
604 - <listitem>  
605 - <para>  
606 - <literal>0</literal>: no errors or warnings were found. The  
607 - file may still have problems qpdf can't detect. If  
608 - @1@option@1@--warning-exit-0@2@option@2@ was specified, exit status 0  
609 - is used even if there are warnings.  
610 - </para>  
611 - </listitem>  
612 - <listitem>  
613 - <para>  
614 - <literal>2</literal>: errors were found. qpdf was not able to  
615 - fully process the file.  
616 - </para>  
617 - </listitem>  
618 - <listitem>  
619 - <para>  
620 - <literal>3</literal>: qpdf encountered problems that it was  
621 - able to recover from. In some cases, the resulting file may  
622 - still be damaged. Note that qpdf still exits with status  
623 - <literal>3</literal> if it finds warnings even when  
624 - @1@option@1@--no-warn@2@option@2@ is specified. With  
625 - @1@option@1@--warning-exit-0@2@option@2@, warnings without errors  
626 - exit with status 0 instead of 3.  
627 - </para>  
628 - </listitem>  
629 - </itemizedlist>  
630 - Note that @1@command@1@qpdf@2@command@2@ never exists with status  
631 - <literal>1</literal>. If you get an exit status of  
632 - <literal>1</literal>, it was something else, like the shell not  
633 - being able to find or execute @1@command@1@qpdf@2@command@2@.  
634 - </para>  
635 - </sect2>  
636 - </sect1>  
637 - <sect1 id="ref.shell-completion">  
638 - <title>Shell Completion</title>  
639 - <para>  
640 - Starting in qpdf version 8.3.0, qpdf provides its own completion  
641 - support for zsh and bash. You can enable bash completion with  
642 - @1@command@1@eval $(qpdf --completion-bash)@2@command@2@ and zsh  
643 - completion with @1@command@1@eval $(qpdf --completion-zsh)@2@command@2@.  
644 - If @1@command@1@qpdf@2@command@2@ is not in your path, you should invoke  
645 - it above with an absolute path. If you invoke it with a relative  
646 - path, it will warn you, and the completion won't work if you're in  
647 - a different directory.  
648 - </para>  
649 - <para>  
650 - qpdf will use <literal>argv[0]</literal> to figure out where its  
651 - executable is. This may produce unwanted results in some cases,  
652 - especially if you are trying to use completion with copy of qpdf  
653 - that is built from source. You can specify a full path to the qpdf  
654 - you want to use for completion in the  
655 - <literal>QPDF_EXECUTABLE</literal> environment variable.  
656 - </para>  
657 - </sect1>  
658 - <sect1 id="ref.basic-options">  
659 - <title>Basic Options</title>  
660 - <para>  
661 - The following options are the most common ones and perform  
662 - commonly needed transformations.  
663 - <variablelist>  
664 - <varlistentry>  
665 - <term>@1@option@1@--help@2@option@2@</term>  
666 - <listitem>  
667 - <para>  
668 - Display command-line invocation help.  
669 - </para>  
670 - </listitem>  
671 - </varlistentry>  
672 - <varlistentry>  
673 - <term>@1@option@1@--version@2@option@2@</term>  
674 - <listitem>  
675 - <para>  
676 - Display the current version of qpdf.  
677 - </para>  
678 - </listitem>  
679 - </varlistentry>  
680 - <varlistentry>  
681 - <term>@1@option@1@--copyright@2@option@2@</term>  
682 - <listitem>  
683 - <para>  
684 - Show detailed copyright information.  
685 - </para>  
686 - </listitem>  
687 - </varlistentry>  
688 - <varlistentry>  
689 - <term>@1@option@1@--show-crypto@2@option@2@</term>  
690 - <listitem>  
691 - <para>  
692 - Show a list of available crypto providers, each on a line by  
693 - itself. The default provider is always listed first. See <xref  
694 - linkend="ref.crypto"/> for more information about crypto  
695 - providers.  
696 - </para>  
697 - </listitem>  
698 - </varlistentry>  
699 - <varlistentry>  
700 - <term>@1@option@1@--completion-bash@2@option@2@</term>  
701 - <listitem>  
702 - <para>  
703 - Output a completion command you can eval to enable shell  
704 - completion from bash.  
705 - </para>  
706 - </listitem>  
707 - </varlistentry>  
708 - <varlistentry>  
709 - <term>@1@option@1@--completion-zsh@2@option@2@</term>  
710 - <listitem>  
711 - <para>  
712 - Output a completion command you can eval to enable shell  
713 - completion from zsh.  
714 - </para>  
715 - </listitem>  
716 - </varlistentry>  
717 - <varlistentry>  
718 - <term>@1@option@1@--password=@1@replaceable@1@password@2@replaceable@2@@2@option@2@</term>  
719 - <listitem>  
720 - <para>  
721 - Specifies a password for accessing encrypted files. To read  
722 - the password from a file or standard input, you can use  
723 - @1@option@1@--password-file@2@option@2@, added in qpdf 10.2. Note  
724 - that you can also use @1@option@1@@filename@2@option@2@ or  
725 - @1@option@1@@-@2@option@2@ as described above to put the password in  
726 - a file or pass it via standard input, but you would do so by  
727 - specifying the entire  
728 - @1@option@1@--password=@1@replaceable@1@password@2@replaceable@2@@2@option@2@  
729 - option in the file. Syntax such as  
730 - @1@option@1@--password=@filename@2@option@2@ won't work since  
731 - @1@option@1@@filename@2@option@2@ is not recognized in the middle of  
732 - an argument.  
733 - </para>  
734 - </listitem>  
735 - </varlistentry>  
736 - <varlistentry>  
737 - <term>@1@option@1@--password-file=@1@replaceable@1@filename@2@replaceable@2@@2@option@2@</term>  
738 - <listitem>  
739 - <para>  
740 - Reads the first line from the specified file and uses it as  
741 - the password for accessing encrypted files.  
742 - @1@option@1@@1@replaceable@1@filename@2@replaceable@2@@2@option@2@ may be  
743 - <literal>-</literal> to read the password from standard input.  
744 - Note that, in this case, the password is echoed and there is  
745 - no prompt, so use with caution.  
746 - </para>  
747 - </listitem>  
748 - </varlistentry>  
749 - <varlistentry>  
750 - <term>@1@option@1@--is-encrypted@2@option@2@</term>  
751 - <listitem>  
752 - <para>  
753 - Silently exit with status 0 if the file is encrypted or status  
754 - 2 if the file is not encrypted. This is useful for shell  
755 - scripts. Other options are ignored if this is given. This  
756 - option is mutually exclusive with  
757 - @1@option@1@--requires-password@2@option@2@. Both this option and  
758 - @1@option@1@--requires-password@2@option@2@ exit with status 2 for  
759 - non-encrypted files.  
760 - </para>  
761 - </listitem>  
762 - </varlistentry>  
763 - <varlistentry>  
764 - <term>@1@option@1@--requires-password@2@option@2@</term>  
765 - <listitem>  
766 - <para>  
767 - Silently exit with status 0 if a password (other than as  
768 - supplied) is required. Exit with status 2 if the file is not  
769 - encrypted. Exit with status 3 if the file is encrypted but  
770 - requires no password or the correct password has been  
771 - supplied. This is useful for shell scripts. Note that any  
772 - supplied password is used when opening the file. When used  
773 - with a @1@option@1@--password@2@option@2@ option, this option can be  
774 - used to check the correctness of the password. In that case,  
775 - an exit status of 3 means the file works with the supplied  
776 - password. This option is mutually exclusive with  
777 - @1@option@1@--is-encrypted@2@option@2@. Both this option and  
778 - @1@option@1@--is-encrypted@2@option@2@ exit with status 2 for  
779 - non-encrypted files.  
780 - </para>  
781 - </listitem>  
782 - </varlistentry>  
783 - <varlistentry>  
784 - <term>@1@option@1@--verbose@2@option@2@</term>  
785 - <listitem>  
786 - <para>  
787 - Increase verbosity of output. For now, this just prints some  
788 - indication of any file that it creates.  
789 - </para>  
790 - </listitem>  
791 - </varlistentry>  
792 - <varlistentry>  
793 - <term>@1@option@1@--progress@2@option@2@</term>  
794 - <listitem>  
795 - <para>  
796 - Indicate progress while writing files.  
797 - </para>  
798 - </listitem>  
799 - </varlistentry>  
800 - <varlistentry>  
801 - <term>@1@option@1@--no-warn@2@option@2@</term>  
802 - <listitem>  
803 - <para>  
804 - Suppress writing of warnings to stderr. If warnings were  
805 - detected and suppressed, @1@command@1@qpdf@2@command@2@ will still  
806 - exit with exit code 3. See also  
807 - @1@option@1@--warning-exit-0@2@option@2@.  
808 - </para>  
809 - </listitem>  
810 - </varlistentry>  
811 - <varlistentry>  
812 - <term>@1@option@1@--warning-exit-0@2@option@2@</term>  
813 - <listitem>  
814 - <para>  
815 - If warnings are found but no errors, exit with exit code 0  
816 - instead 3. When combined with @1@option@1@--no-warn@2@option@2@, the  
817 - effect is for @1@command@1@qpdf@2@command@2@ to completely ignore  
818 - warnings.  
819 - </para>  
820 - </listitem>  
821 - </varlistentry>  
822 - <varlistentry>  
823 - <term>@1@option@1@--linearize@2@option@2@</term>  
824 - <listitem>  
825 - <para>  
826 - Causes generation of a linearized (web-optimized) output file.  
827 - </para>  
828 - </listitem>  
829 - </varlistentry>  
830 - <varlistentry>  
831 - <term>@1@option@1@--replace-input@2@option@2@</term>  
832 - <listitem>  
833 - <para>  
834 - If specified, the output file name should be omitted. This  
835 - option tells qpdf to replace the input file with the output.  
836 - It does this by writing to  
837 - @1@filename@1@@1@replaceable@1@infilename@2@replaceable@2@.~qpdf-temp#@2@filename@2@  
838 - and, when done, overwriting the input file with the temporary  
839 - file. If there were any warnings, the original input is saved  
840 - as  
841 - @1@filename@1@@1@replaceable@1@infilename@2@replaceable@2@.~qpdf-orig@2@filename@2@.  
842 - </para>  
843 - </listitem>  
844 - </varlistentry>  
845 - <varlistentry>  
846 - <term>@1@option@1@--copy-encryption=file@2@option@2@</term>  
847 - <listitem>  
848 - <para>  
849 - Encrypt the file using the same encryption parameters,  
850 - including user and owner password, as the specified file. Use  
851 - @1@option@1@--encryption-file-password@2@option@2@ to specify a password  
852 - if one is needed to open this file. Note that copying the  
853 - encryption parameters from a file also copies the first half  
854 - of <literal>/ID</literal> from the file since this is part of  
855 - the encryption parameters.  
856 - </para>  
857 - </listitem>  
858 - </varlistentry>  
859 - <varlistentry>  
860 - <term>@1@option@1@--encryption-file-password=password@2@option@2@</term>  
861 - <listitem>  
862 - <para>  
863 - If the file specified with @1@option@1@--copy-encryption@2@option@2@  
864 - requires a password, specify the password using this option.  
865 - Note that only one of the user or owner password is required.  
866 - Both passwords will be preserved since QPDF does not  
867 - distinguish between the two passwords. It is possible to  
868 - preserve encryption parameters, including the owner password,  
869 - from a file even if you don't know the file's owner password.  
870 - </para>  
871 - </listitem>  
872 - </varlistentry>  
873 - <varlistentry>  
874 - <term>@1@option@1@--allow-weak-crypto@2@option@2@</term>  
875 - <listitem>  
876 - <para>  
877 - Starting with version 10.4, qpdf issues warnings when  
878 - requested to create files using RC4 encryption. This option  
879 - suppresses those warnings. In future versions of qpdf, qpdf  
880 - will refuse to create files with weak cryptography when this  
881 - flag is not given. See <xref linkend="ref.weak-crypto"/> for  
882 - additional details.  
883 - </para>  
884 - </listitem>  
885 - </varlistentry>  
886 - <varlistentry>  
887 - <term>@1@option@1@--encrypt options --@2@option@2@</term>  
888 - <listitem>  
889 - <para>  
890 - Causes generation an encrypted output file. Please see <xref  
891 - linkend="ref.encryption-options"/> for details on how to  
892 - specify encryption parameters.  
893 - </para>  
894 - </listitem>  
895 - </varlistentry>  
896 - <varlistentry>  
897 - <term>@1@option@1@--decrypt@2@option@2@</term>  
898 - <listitem>  
899 - <para>  
900 - Removes any encryption on the file. A password must be  
901 - supplied if the file is password protected.  
902 - </para>  
903 - </listitem>  
904 - </varlistentry>  
905 - <varlistentry>  
906 - <term>@1@option@1@--password-is-hex-key@2@option@2@</term>  
907 - <listitem>  
908 - <para>  
909 - Overrides the usual computation/retrieval of the PDF file's  
910 - encryption key from user/owner password with an explicit  
911 - specification of the encryption key. When this option is  
912 - specified, the argument to the @1@option@1@--password@2@option@2@  
913 - option is interpreted as a hexadecimal-encoded key value. This  
914 - only applies to the password used to open the main input file.  
915 - It does not apply to other files opened by  
916 - @1@option@1@--pages@2@option@2@ or other options or to files being  
917 - written.  
918 - </para>  
919 - <para>  
920 - Most users will never have a need for this option, and no  
921 - standard viewers support this mode of operation, but it can be  
922 - useful for forensic or investigatory purposes. For example, if  
923 - a PDF file is encrypted with an unknown password, a  
924 - brute-force attack using the key directly is sometimes more  
925 - efficient than one using the password. Also, if a file is  
926 - heavily damaged, it may be possible to derive the encryption  
927 - key and recover parts of the file using it directly. To expose  
928 - the encryption key used by an encrypted file that you can open  
929 - normally, use the @1@option@1@--show-encryption-key@2@option@2@  
930 - option.  
931 - </para>  
932 - </listitem>  
933 - </varlistentry>  
934 - <varlistentry>  
935 - <term>@1@option@1@--suppress-password-recovery@2@option@2@</term>  
936 - <listitem>  
937 - <para>  
938 - Ordinarily, qpdf attempts to automatically compensate for  
939 - passwords specified in the wrong character encoding. This  
940 - option suppresses that behavior. Under normal conditions,  
941 - there are no reasons to use this option. See <xref  
942 - linkend="ref.unicode-passwords"/> for a discussion  
943 - </para>  
944 - </listitem>  
945 - </varlistentry>  
946 - <varlistentry>  
947 - <term>@1@option@1@--password-mode=@1@replaceable@1@mode@2@replaceable@2@@2@option@2@</term>  
948 - <listitem>  
949 - <para>  
950 - This option can be used to fine-tune how qpdf interprets  
951 - Unicode (non-ASCII) password strings passed on the command  
952 - line. With the exception of the @1@option@1@hex-bytes@2@option@2@  
953 - mode, these only apply to passwords provided when encrypting  
954 - files. The @1@option@1@hex-bytes@2@option@2@ mode also applies to  
955 - passwords specified for reading files. For additional  
956 - discussion of the supported password modes and when you might  
957 - want to use them, see <xref linkend="ref.unicode-passwords"/>.  
958 - The following modes are supported:  
959 - <itemizedlist>  
960 - <listitem>  
961 - <para>  
962 - @1@option@1@auto@2@option@2@: Automatically determine whether the  
963 - specified password is a properly encoded Unicode (UTF-8)  
964 - string, and transcode it as required by the PDF spec based  
965 - on the type encryption being applied. On Windows starting  
966 - with version 8.4.0, and on almost all other modern  
967 - platforms, incoming passwords will be properly encoded in  
968 - UTF-8, so this is almost always what you want.  
969 - </para>  
970 - </listitem>  
971 - <listitem>  
972 - <para>  
973 - @1@option@1@unicode@2@option@2@: Tells qpdf that the incoming  
974 - password is UTF-8, overriding whatever its automatic  
975 - detection determines. The only difference between this mode  
976 - and @1@option@1@auto@2@option@2@ is that qpdf will fail with an  
977 - error message if the password is not valid UTF-8 instead of  
978 - falling back to @1@option@1@bytes@2@option@2@ mode with a warning.  
979 - </para>  
980 - </listitem>  
981 - <listitem>  
982 - <para>  
983 - @1@option@1@bytes@2@option@2@: Interpret the password as a literal  
984 - byte string. For non-Windows platforms, this is what  
985 - versions of qpdf prior to 8.4.0 did. For Windows platforms,  
986 - there is no way to specify strings of binary data on the  
987 - command line directly, but you can use the  
988 - @1@option@1@@filename@2@option@2@ option to do it, in which case  
989 - this option forces qpdf to respect the string of bytes as  
990 - provided. This option will allow you to encrypt PDF files  
991 - with passwords that will not be usable by other readers.  
992 - </para>  
993 - </listitem>  
994 - <listitem>  
995 - <para>  
996 - @1@option@1@hex-bytes@2@option@2@: Interpret the password as a  
997 - hex-encoded string. This provides a way to pass binary data  
998 - as a password on all platforms including Windows. As with  
999 - @1@option@1@bytes@2@option@2@, this option may allow creation of  
1000 - files that can't be opened by other readers. This mode  
1001 - affects qpdf's interpretation of passwords specified for  
1002 - decrypting files as well as for encrypting them. It makes  
1003 - it possible to specify strings that are encoded in some  
1004 - manner other than the system's default encoding.  
1005 - </para>  
1006 - </listitem>  
1007 - </itemizedlist>  
1008 - </para>  
1009 - </listitem>  
1010 - </varlistentry>  
1011 - <varlistentry>  
1012 - <term>@1@option@1@--rotate=[+|-]angle[:page-range]@2@option@2@</term>  
1013 - <listitem>  
1014 - <para>  
1015 - Apply rotation to specified pages. The  
1016 - @1@option@1@page-range@2@option@2@ portion of the option value has  
1017 - the same format as page ranges in <xref  
1018 - linkend="ref.page-selection"/>. If the page range is omitted,  
1019 - the rotation is applied to all pages. The  
1020 - @1@option@1@angle@2@option@2@ portion of the parameter may be either  
1021 - 0, 90, 180, or 270. If preceded by @1@option@1@+@2@option@2@ or  
1022 - @1@option@1@-@2@option@2@, the angle is added to or subtracted from  
1023 - the specified pages' original rotations. This is almost always  
1024 - what you want. Otherwise the pages' rotations are set to the  
1025 - exact value, which may cause the appearances of the pages to  
1026 - be inconsistent, especially for scans. For example, the  
1027 - command @1@command@1@qpdf in.pdf out.pdf --rotate=+90:2,4,6  
1028 - --rotate=180:7-8@2@command@2@ would rotate pages 2, 4, and 6 90  
1029 - degrees clockwise from their original rotation and force the  
1030 - rotation of pages 7 through 8 to 180 degrees regardless of  
1031 - their original rotation, and the command @1@command@1@qpdf in.pdf  
1032 - out.pdf --rotate=+180@2@command@2@ would rotate all pages by 180  
1033 - degrees.  
1034 - </para>  
1035 - </listitem>  
1036 - </varlistentry>  
1037 - <varlistentry>  
1038 - <term>@1@option@1@--keep-files-open=@1@replaceable@1@[yn]@2@replaceable@2@@2@option@2@</term>  
1039 - <listitem>  
1040 - <para>  
1041 - This option controls whether qpdf keeps individual files open  
1042 - while merging. Prior to version 8.1.0, qpdf always kept all  
1043 - files open, but this meant that the number of files that could  
1044 - be merged was limited by the operating system's open file  
1045 - limit. Version 8.1.0 opened files as they were referenced and  
1046 - closed them after each read, but this caused a major  
1047 - performance impact. Version 8.2.0 optimized the performance  
1048 - but did so in a way that, for local file systems, there was a  
1049 - small but unavoidable performance hit, but for networked file  
1050 - systems, the performance impact could be very high. Starting  
1051 - with version 8.2.1, the default behavior is that files are  
1052 - kept open if no more than 200 files are specified, but that  
1053 - the behavior can be explicitly overridden with the  
1054 - @1@option@1@--keep-files-open@2@option@2@ flag. If you are merging  
1055 - more than 200 files but less than the operating system's max  
1056 - open files limit, you may want to use  
1057 - @1@option@1@--keep-files-open=y@2@option@2@, especially if working  
1058 - over a networked file system. If you are using a local file  
1059 - system where the overhead is low and you might sometimes merge  
1060 - more than the OS limit's number of files from a script and are  
1061 - not worried about a few seconds additional processing time,  
1062 - you may want to specify @1@option@1@--keep-files-open=n@2@option@2@.  
1063 - The threshold for switching may be changed from the default  
1064 - 200 with the @1@option@1@--keep-files-open-threshold@2@option@2@  
1065 - option.  
1066 - </para>  
1067 - </listitem>  
1068 - </varlistentry>  
1069 - <varlistentry>  
1070 - <term>@1@option@1@--keep-files-open-threshold=@1@replaceable@1@count@2@replaceable@2@@2@option@2@</term>  
1071 - <listitem>  
1072 - <para>  
1073 - If specified, overrides the default value of 200 used as the  
1074 - threshold for qpdf deciding whether or not to keep files open.  
1075 - See @1@option@1@--keep-files-open@2@option@2@ for details.  
1076 - </para>  
1077 - </listitem>  
1078 - </varlistentry>  
1079 - <varlistentry>  
1080 - <term>@1@option@1@--pages options --@2@option@2@</term>  
1081 - <listitem>  
1082 - <para>  
1083 - Select specific pages from one or more input files. See <xref  
1084 - linkend="ref.page-selection"/> for details on how to do page  
1085 - selection (splitting and merging).  
1086 - </para>  
1087 - </listitem>  
1088 - </varlistentry>  
1089 - <varlistentry>  
1090 - <term>@1@option@1@--collate=@1@replaceable@1@n@2@replaceable@2@@2@option@2@</term>  
1091 - <listitem>  
1092 - <para>  
1093 - When specified, collate rather than concatenate pages from  
1094 - files specified with @1@option@1@--pages@2@option@2@. With a numeric  
1095 - argument, collate in groups of @1@replaceable@1@n@2@replaceable@2@.  
1096 - The default is 1. See <xref linkend="ref.page-selection"/> for  
1097 - additional details.  
1098 - </para>  
1099 - </listitem>  
1100 - </varlistentry>  
1101 - <varlistentry>  
1102 - <term>@1@option@1@--flatten-rotation@2@option@2@</term>  
1103 - <listitem>  
1104 - <para>  
1105 - For each page that is rotated using the  
1106 - <literal>/Rotate</literal> key in the page's dictionary,  
1107 - remove the <literal>/Rotate</literal> key and implement the  
1108 - identical rotation semantics by modifying the page's contents.  
1109 - This option can be useful to prepare files for buggy PDF  
1110 - applications that don't properly handle rotated pages.  
1111 - </para>  
1112 - </listitem>  
1113 - </varlistentry>  
1114 - <varlistentry>  
1115 - <term>@1@option@1@--split-pages=[n]@2@option@2@</term>  
1116 - <listitem>  
1117 - <para>  
1118 - Write each group of @1@option@1@n@2@option@2@ pages to a separate  
1119 - output file. If @1@option@1@n@2@option@2@ is not specified, create  
1120 - single pages. Output file names are generated as follows:  
1121 - <itemizedlist>  
1122 - <listitem>  
1123 - <para>  
1124 - If the string <literal>%d</literal> appears in the output  
1125 - file name, it is replaced with a range of zero-padded page  
1126 - numbers starting from 1.  
1127 - </para>  
1128 - </listitem>  
1129 - <listitem>  
1130 - <para>  
1131 - Otherwise, if the output file name ends in  
1132 - @1@filename@1@.pdf@2@filename@2@ (case insensitive), a zero-padded  
1133 - page range, preceded by a dash, is inserted before the file  
1134 - extension.  
1135 - </para>  
1136 - </listitem>  
1137 - <listitem>  
1138 - <para>  
1139 - Otherwise, the file name is appended with a zero-padded  
1140 - page range preceded by a dash.  
1141 - </para>  
1142 - </listitem>  
1143 - </itemizedlist>  
1144 - </para>  
1145 - <para>  
1146 - Page ranges are a single number in the case of single-page  
1147 - groups or two numbers separated by a dash otherwise.  
1148 - For example, if @1@filename@1@infile.pdf@2@filename@2@ has 12 pages  
1149 - <itemizedlist>  
1150 - <listitem>  
1151 - <para>  
1152 - @1@command@1@qpdf --split-pages infile.pdf %d-out@2@command@2@  
1153 - would generate files @1@filename@1@01-out@2@filename@2@ through  
1154 - @1@filename@1@12-out@2@filename@2@  
1155 - </para>  
1156 - </listitem>  
1157 - <listitem>  
1158 - <para>  
1159 - @1@command@1@qpdf --split-pages=2 infile.pdf  
1160 - outfile.pdf@2@command@2@ would generate files  
1161 - @1@filename@1@outfile-01-02.pdf@2@filename@2@ through  
1162 - @1@filename@1@outfile-11-12.pdf@2@filename@2@  
1163 - </para>  
1164 - </listitem>  
1165 - <listitem>  
1166 - <para>  
1167 - @1@command@1@qpdf --split-pages infile.pdf  
1168 - something.else@2@command@2@ would generate files  
1169 - @1@filename@1@something.else-01@2@filename@2@ through  
1170 - @1@filename@1@something.else-12@2@filename@2@  
1171 - </para>  
1172 - </listitem>  
1173 - </itemizedlist>  
1174 - </para>  
1175 - <para>  
1176 - Note that outlines, threads, and other global features of the  
1177 - original PDF file are not preserved. For each page of output,  
1178 - this option creates an empty PDF and copies a single page from  
1179 - the output into it. If you require the global data, you will  
1180 - have to run @1@command@1@qpdf@2@command@2@ with the  
1181 - @1@option@1@--pages@2@option@2@ option once for each file. Using  
1182 - @1@option@1@--split-pages@2@option@2@ is much faster if you don't  
1183 - require the global data.  
1184 - </para>  
1185 - </listitem>  
1186 - </varlistentry>  
1187 - <varlistentry>  
1188 - <term>@1@option@1@--overlay options --@2@option@2@</term>  
1189 - <listitem>  
1190 - <para>  
1191 - Overlay pages from another file onto the output pages. See  
1192 - <xref linkend="ref.overlay-underlay"/> for details on  
1193 - overlay/underlay.  
1194 - </para>  
1195 - </listitem>  
1196 - </varlistentry>  
1197 - <varlistentry>  
1198 - <term>@1@option@1@--underlay options --@2@option@2@</term>  
1199 - <listitem>  
1200 - <para>  
1201 - Overlay pages from another file onto the output pages. See  
1202 - <xref linkend="ref.overlay-underlay"/> for details on  
1203 - overlay/underlay.  
1204 - </para>  
1205 - </listitem>  
1206 - </varlistentry>  
1207 - </variablelist>  
1208 - </para>  
1209 - <para>  
1210 - Password-protected files may be opened by specifying a password.  
1211 - By default, qpdf will preserve any encryption data associated with  
1212 - a file. If @1@option@1@--decrypt@2@option@2@ is specified, qpdf will  
1213 - attempt to remove any encryption information. If  
1214 - @1@option@1@--encrypt@2@option@2@ is specified, qpdf will replace the  
1215 - document's encryption parameters with whatever is specified.  
1216 - </para>  
1217 - <para>  
1218 - Note that qpdf does not obey encryption restrictions already  
1219 - imposed on the file. Doing so would be meaningless since qpdf can  
1220 - be used to remove encryption from the file entirely. This  
1221 - functionality is not intended to be used for bypassing copyright  
1222 - restrictions or other restrictions placed on files by their  
1223 - producers.  
1224 - </para>  
1225 - <para>  
1226 - Prior to 8.4.0, in the case of passwords that contain characters  
1227 - that fall outside of 7-bit US-ASCII, qpdf left the burden of  
1228 - supplying properly encoded encryption and decryption passwords to  
1229 - the user. Starting in qpdf 8.4.0, qpdf does this automatically in  
1230 - most cases. For an in-depth discussion, please see <xref  
1231 - linkend="ref.unicode-passwords"/>. Previous versions of this  
1232 - manual described workarounds using the @1@command@1@iconv@2@command@2@  
1233 - command. Such workarounds are no longer required or recommended  
1234 - with qpdf 8.4.0. However, for backward compatibility, qpdf  
1235 - attempts to detect those workarounds and do the right thing in  
1236 - most cases.  
1237 - </para>  
1238 - </sect1>  
1239 - <sect1 id="ref.encryption-options">  
1240 - <title>Encryption Options</title>  
1241 - <para>  
1242 - To change the encryption parameters of a file, use the --encrypt  
1243 - flag. The syntax is  
1244 -  
1245 - <programlisting>@1@option@1@--encrypt @1@replaceable@1@user-password@2@replaceable@2@ @1@replaceable@1@owner-password@2@replaceable@2@ @1@replaceable@1@key-length@2@replaceable@2@ [ @1@replaceable@1@restrictions@2@replaceable@2@ ] --@2@option@2@  
1246 -</programlisting>  
1247 - Note that "@1@option@1@--@2@option@2@" terminates parsing of  
1248 - encryption flags and must be present even if no restrictions are  
1249 - present.  
1250 - </para>  
1251 - <para>  
1252 - Either or both of the user password and the owner password may be  
1253 - empty strings. Starting in qpdf 10.2, qpdf defaults to not  
1254 - allowing creation of PDF files with a non-empty user password, an  
1255 - empty owner password, and a 256-bit key since such files can be  
1256 - opened with no password. If you want to create such files, specify  
1257 - the encryption option @1@option@1@--allow-insecure@2@option@2@, as  
1258 - described below.  
1259 - </para>  
1260 - <para>  
1261 - The value for  
1262 - @1@option@1@@1@replaceable@1@key-length@2@replaceable@2@@2@option@2@ may be 40,  
1263 - 128, or 256. The restriction flags are dependent upon key length.  
1264 - When no additional restrictions are given, the default is to be  
1265 - fully permissive.  
1266 - </para>  
1267 - <para>  
1268 - If @1@option@1@@1@replaceable@1@key-length@2@replaceable@2@@2@option@2@ is 40,  
1269 - the following restriction options are available:  
1270 - <variablelist>  
1271 - <varlistentry>  
1272 - <term>@1@option@1@--print=[yn]@2@option@2@</term>  
1273 - <listitem>  
1274 - <para>  
1275 - Determines whether or not to allow printing.  
1276 - </para>  
1277 - </listitem>  
1278 - </varlistentry>  
1279 - <varlistentry>  
1280 - <term>@1@option@1@--modify=[yn]@2@option@2@</term>  
1281 - <listitem>  
1282 - <para>  
1283 - Determines whether or not to allow document modification.  
1284 - </para>  
1285 - </listitem>  
1286 - </varlistentry>  
1287 - <varlistentry>  
1288 - <term>@1@option@1@--extract=[yn]@2@option@2@</term>  
1289 - <listitem>  
1290 - <para>  
1291 - Determines whether or not to allow text/image extraction.  
1292 - </para>  
1293 - </listitem>  
1294 - </varlistentry>  
1295 - <varlistentry>  
1296 - <term>@1@option@1@--annotate=[yn]@2@option@2@</term>  
1297 - <listitem>  
1298 - <para>  
1299 - Determines whether or not to allow comments and form fill-in  
1300 - and signing.  
1301 - </para>  
1302 - </listitem>  
1303 - </varlistentry>  
1304 - </variablelist>  
1305 - If @1@option@1@@1@replaceable@1@key-length@2@replaceable@2@@2@option@2@ is 128,  
1306 - the following restriction options are available:  
1307 - <variablelist>  
1308 - <varlistentry>  
1309 - <term>@1@option@1@--accessibility=[yn]@2@option@2@</term>  
1310 - <listitem>  
1311 - <para>  
1312 - Determines whether or not to allow accessibility to visually  
1313 - impaired. The qpdf library disregards this field when AES is  
1314 - used or when 256-bit encryption is used. You should really  
1315 - never disable accessibility, but qpdf lets you do it in case  
1316 - you need to configure a file this way for testing purposes.  
1317 - The PDF spec says that conforming readers should disregard  
1318 - this permission and always allow accessibility.  
1319 - </para>  
1320 - </listitem>  
1321 - </varlistentry>  
1322 - <varlistentry>  
1323 - <term>@1@option@1@--extract=[yn]@2@option@2@</term>  
1324 - <listitem>  
1325 - <para>  
1326 - Determines whether or not to allow text/graphic extraction.  
1327 - </para>  
1328 - </listitem>  
1329 - </varlistentry>  
1330 - <varlistentry>  
1331 - <term>@1@option@1@--assemble=[yn]@2@option@2@</term>  
1332 - <listitem>  
1333 - <para>  
1334 - Determines whether document assembly (rotation and reordering  
1335 - of pages) is allowed.  
1336 - </para>  
1337 - </listitem>  
1338 - </varlistentry>  
1339 - <varlistentry>  
1340 - <term>@1@option@1@--annotate=[yn]@2@option@2@</term>  
1341 - <listitem>  
1342 - <para>  
1343 - Determines whether modifying annotations is allowed. This  
1344 - includes adding comments and filling in form fields. Also  
1345 - allows editing of form fields if  
1346 - @1@option@1@--modify-other=y@2@option@2@ is given.  
1347 - </para>  
1348 - </listitem>  
1349 - </varlistentry>  
1350 - <varlistentry>  
1351 - <term>@1@option@1@--form=[yn]@2@option@2@</term>  
1352 - <listitem>  
1353 - <para>  
1354 - Determines whether filling form fields is allowed.  
1355 - </para>  
1356 - </listitem>  
1357 - </varlistentry>  
1358 - <varlistentry>  
1359 - <term>@1@option@1@--modify-other=[yn]@2@option@2@</term>  
1360 - <listitem>  
1361 - <para>  
1362 - Allow all document editing except those controlled separately  
1363 - by the @1@option@1@--assemble@2@option@2@,  
1364 - @1@option@1@--annotate@2@option@2@, and @1@option@1@--form@2@option@2@  
1365 - options.  
1366 - </para>  
1367 - </listitem>  
1368 - </varlistentry>  
1369 - <varlistentry>  
1370 - <term>@1@option@1@--print=@1@replaceable@1@print-opt@2@replaceable@2@@2@option@2@</term>  
1371 - <listitem>  
1372 - <para>  
1373 - Controls printing access.  
1374 - @1@option@1@@1@replaceable@1@print-opt@2@replaceable@2@@2@option@2@ may be  
1375 - one of the following:  
1376 - <itemizedlist>  
1377 - <listitem>  
1378 - <para>  
1379 - @1@option@1@full@2@option@2@: allow full printing  
1380 - </para>  
1381 - </listitem>  
1382 - <listitem>  
1383 - <para>  
1384 - @1@option@1@low@2@option@2@: allow low-resolution printing only  
1385 - </para>  
1386 - </listitem>  
1387 - <listitem>  
1388 - <para>  
1389 - @1@option@1@none@2@option@2@: disallow printing  
1390 - </para>  
1391 - </listitem>  
1392 - </itemizedlist>  
1393 - </para>  
1394 - </listitem>  
1395 - </varlistentry>  
1396 - <varlistentry>  
1397 - <term>@1@option@1@--modify=@1@replaceable@1@modify-opt@2@replaceable@2@@2@option@2@</term>  
1398 - <listitem>  
1399 - <para>  
1400 - Controls modify access. This way of controlling modify access  
1401 - has less granularity than new options added in qpdf 8.4.  
1402 - @1@option@1@@1@replaceable@1@modify-opt@2@replaceable@2@@2@option@2@ may be  
1403 - one of the following:  
1404 - <itemizedlist>  
1405 - <listitem>  
1406 - <para>  
1407 - @1@option@1@all@2@option@2@: allow full document modification  
1408 - </para>  
1409 - </listitem>  
1410 - <listitem>  
1411 - <para>  
1412 - @1@option@1@annotate@2@option@2@: allow comment authoring, form  
1413 - operations, and document assembly  
1414 - </para>  
1415 - </listitem>  
1416 - <listitem>  
1417 - <para>  
1418 - @1@option@1@form@2@option@2@: allow form field fill-in and signing  
1419 - and document assembly  
1420 - </para>  
1421 - </listitem>  
1422 - <listitem>  
1423 - <para>  
1424 - @1@option@1@assembly@2@option@2@: allow document assembly only  
1425 - </para>  
1426 - </listitem>  
1427 - <listitem>  
1428 - <para>  
1429 - @1@option@1@none@2@option@2@: allow no modifications  
1430 - </para>  
1431 - </listitem>  
1432 - </itemizedlist>  
1433 - Using the @1@option@1@--modify@2@option@2@ option does not allow you  
1434 - to create certain combinations of permissions such as allowing  
1435 - form filling but not allowing document assembly. Starting with  
1436 - qpdf 8.4, you can either just use the other options to control  
1437 - fields individually, or you can use something like  
1438 - @1@option@1@--modify=form --assembly=n@2@option@2@ to fine tune.  
1439 - </para>  
1440 - </listitem>  
1441 - </varlistentry>  
1442 - <varlistentry>  
1443 - <term>@1@option@1@--cleartext-metadata@2@option@2@</term>  
1444 - <listitem>  
1445 - <para>  
1446 - If specified, any metadata stream in the document will be left  
1447 - unencrypted even if the rest of the document is encrypted.  
1448 - This also forces the PDF version to be at least 1.5.  
1449 - </para>  
1450 - </listitem>  
1451 - </varlistentry>  
1452 - <varlistentry>  
1453 - <term>@1@option@1@--use-aes=[yn]@2@option@2@</term>  
1454 - <listitem>  
1455 - <para>  
1456 - If @1@option@1@--use-aes=y@2@option@2@ is specified, AES encryption  
1457 - will be used instead of RC4 encryption. This forces the PDF  
1458 - version to be at least 1.6.  
1459 - </para>  
1460 - </listitem>  
1461 - </varlistentry>  
1462 - <varlistentry>  
1463 - <term>@1@option@1@--allow-insecure@2@option@2@</term>  
1464 - <listitem>  
1465 - <para>  
1466 - From qpdf 10.2, qpdf defaults to not allowing creation of PDF  
1467 - files where the user password is non-empty, the owner password  
1468 - is empty, and a 256-bit key is in use. Files created in this  
1469 - way are insecure since they can be opened without a password.  
1470 - Users would ordinarily never want to create such files. If you  
1471 - are using qpdf to intentionally created strange files for  
1472 - testing (a definite valid use of qpdf!), this option allows  
1473 - you to create such insecure files.  
1474 - </para>  
1475 - </listitem>  
1476 - </varlistentry>  
1477 - <varlistentry>  
1478 - <term>@1@option@1@--force-V4@2@option@2@</term>  
1479 - <listitem>  
1480 - <para>  
1481 - Use of this option forces the <literal>/V</literal> and  
1482 - <literal>/R</literal> parameters in the document's encryption  
1483 - dictionary to be set to the value <literal>4</literal>. As  
1484 - qpdf will automatically do this when required, there is no  
1485 - reason to ever use this option. It exists primarily for use  
1486 - in testing qpdf itself. This option also forces the PDF  
1487 - version to be at least 1.5.  
1488 - </para>  
1489 - </listitem>  
1490 - </varlistentry>  
1491 - </variablelist>  
1492 - If @1@option@1@@1@replaceable@1@key-length@2@replaceable@2@@2@option@2@ is 256,  
1493 - the minimum PDF version is 1.7 with extension level 8, and the  
1494 - AES-based encryption format used is the PDF 2.0 encryption method  
1495 - supported by Acrobat X. the same options are available as with  
1496 - 128 bits with the following exceptions:  
1497 - <variablelist>  
1498 - <varlistentry>  
1499 - <term>@1@option@1@--use-aes@2@option@2@</term>  
1500 - <listitem>  
1501 - <para>  
1502 - This option is not available with 256-bit keys. AES is always  
1503 - used with 256-bit encryption keys.  
1504 - </para>  
1505 - </listitem>  
1506 - </varlistentry>  
1507 - <varlistentry>  
1508 - <term>@1@option@1@--force-V4@2@option@2@</term>  
1509 - <listitem>  
1510 - <para>  
1511 - This option is not available with 256 keys.  
1512 - </para>  
1513 - </listitem>  
1514 - </varlistentry>  
1515 - <varlistentry>  
1516 - <term>@1@option@1@--force-R5@2@option@2@</term>  
1517 - <listitem>  
1518 - <para>  
1519 - If specified, qpdf sets the minimum version to 1.7 at  
1520 - extension level 3 and writes the deprecated encryption format  
1521 - used by Acrobat version IX. This option should not be used in  
1522 - practice to generate PDF files that will be in general use,  
1523 - but it can be useful to generate files if you are trying to  
1524 - test proper support in another application for PDF files  
1525 - encrypted in this way.  
1526 - </para>  
1527 - </listitem>  
1528 - </varlistentry>  
1529 - </variablelist>  
1530 - The default for each permission option is to be fully permissive.  
1531 - </para>  
1532 - </sect1>  
1533 - <sect1 id="ref.page-selection">  
1534 - <title>Page Selection Options</title>  
1535 - <para>  
1536 - Starting with qpdf 3.0, it is possible to split and merge PDF  
1537 - files by selecting pages from one or more input files. Whatever  
1538 - file is given as the primary input file is used as the starting  
1539 - point, but its pages are replaced with pages as specified.  
1540 -  
1541 - <programlisting>@1@option@1@--pages @1@replaceable@1@input-file@2@replaceable@2@ [ @1@replaceable@1@--password=password@2@replaceable@2@ ] [ @1@replaceable@1@page-range@2@replaceable@2@ ] [ ... ] --@2@option@2@  
1542 -</programlisting>  
1543 - Multiple input files may be specified. Each one is given as the  
1544 - name of the input file, an optional password (if required to open  
1545 - the file), and the range of pages. Note that  
1546 - "@1@option@1@--@2@option@2@" terminates parsing of page  
1547 - selection flags.  
1548 - </para>  
1549 - <para>  
1550 - Starting with qpf 8.4, the special input file name  
1551 - "@1@filename@1@.@2@filename@2@" can be used as a shortcut for the  
1552 - primary input filename.  
1553 - </para>  
1554 - <para>  
1555 - For each file that pages should be taken from, specify the file, a  
1556 - password needed to open the file (if any), and a page range. The  
1557 - password needs to be given only once per file. If any of the  
1558 - input files are the same as the primary input file or the file  
1559 - used to copy encryption parameters (if specified), you do not need  
1560 - to repeat the password here. The same file can be repeated  
1561 - multiple times. If a file that is repeated has a password, the  
1562 - password only has to be given the first time. All non-page data  
1563 - (info, outlines, page numbers, etc.) are taken from the primary  
1564 - input file. To discard these, use @1@option@1@--empty@2@option@2@ as the  
1565 - primary input.  
1566 - </para>  
1567 - <para>  
1568 - Starting with qpdf 5.0.0, it is possible to omit the page range.  
1569 - If qpdf sees a value in the place where it expects a page range  
1570 - and that value is not a valid range but is a valid file name, qpdf  
1571 - will implicitly use the range <literal>1-z</literal>, meaning that  
1572 - it will include all pages in the file. This makes it possible to  
1573 - easily combine all pages in a set of files with a command like  
1574 - @1@command@1@qpdf --empty out.pdf --pages *.pdf --@2@command@2@.  
1575 - </para>  
1576 - <para>  
1577 - The page range is a set of numbers separated by commas, ranges of  
1578 - numbers separated dashes, or combinations of those. The character  
1579 - "z" represents the last page. A number preceded by an  
1580 - "r" indicates to count from the end, so  
1581 - <literal>r3-r1</literal> would be the last three pages of the  
1582 - document. Pages can appear in any order. Ranges can appear with a  
1583 - high number followed by a low number, which causes the pages to  
1584 - appear in reverse. Numbers may be repeated in a page range. A page  
1585 - range may be optionally appended with <literal>:even</literal> or  
1586 - <literal>:odd</literal> to indicate only the even or odd pages in  
1587 - the given range. Note that even and odd refer to the positions  
1588 - within the specified, range, not whether the original number is  
1589 - even or odd.  
1590 - </para>  
1591 - <para>  
1592 - Example page ranges:  
1593 - <itemizedlist>  
1594 - <listitem>  
1595 - <para>  
1596 - <literal>1,3,5-9,15-12</literal>: pages 1, 3, 5, 6, 7, 8,  
1597 - 9, 15, 14, 13, and 12 in that order.  
1598 - </para>  
1599 - </listitem>  
1600 - <listitem>  
1601 - <para>  
1602 - <literal>z-1</literal>: all pages in the document in reverse  
1603 - </para>  
1604 - </listitem>  
1605 - <listitem>  
1606 - <para>  
1607 - <literal>r3-r1</literal>: the last three pages of the document  
1608 - </para>  
1609 - </listitem>  
1610 - <listitem>  
1611 - <para>  
1612 - <literal>r1-r3</literal>: the last three pages of the document  
1613 - in reverse order  
1614 - </para>  
1615 - </listitem>  
1616 - <listitem>  
1617 - <para>  
1618 - <literal>1-20:even</literal>: even pages from 2 to 20  
1619 - </para>  
1620 - </listitem>  
1621 - <listitem>  
1622 - <para>  
1623 - <literal>5,7-9,12:odd</literal>: pages 5, 8, and, 12, which are  
1624 - the pages in odd positions from among the original range, which  
1625 - represents pages 5, 7, 8, 9, and 12.  
1626 - </para>  
1627 - </listitem>  
1628 - </itemizedlist>  
1629 - </para>  
1630 - <para>  
1631 - Starting in qpdf version 8.3, you can specify the  
1632 - @1@option@1@--collate@2@option@2@ option. Note that this option is  
1633 - specified outside of @1@option@1@--pagesย ...ย --@2@option@2@.  
1634 - When @1@option@1@--collate@2@option@2@ is specified, it changes the  
1635 - meaning of @1@option@1@--pages@2@option@2@ so that the specified files,  
1636 - as modified by page ranges, are collated rather than concatenated.  
1637 - For example, if you add the files @1@filename@1@odd.pdf@2@filename@2@ and  
1638 - @1@filename@1@even.pdf@2@filename@2@ containing odd and even pages of a  
1639 - document respectively, you could run @1@command@1@qpdf --collate  
1640 - odd.pdf --pages odd.pdf even.pdf -- all.pdf@2@command@2@ to collate  
1641 - the pages. This would pick page 1 from odd, page 1 from even, page  
1642 - 2 from odd, page 2 from even, etc. until all pages have been  
1643 - included. Any number of files and page ranges can be specified. If  
1644 - any file has fewer pages, that file is just skipped when its pages  
1645 - have all been included. For example, if you ran @1@command@1@qpdf  
1646 - --collate --empty --pages a.pdf 1-5 b.pdf 6-4 c.pdf r1 --  
1647 - out.pdf@2@command@2@, you would get the following pages in this  
1648 - order:  
1649 - <itemizedlist>  
1650 - <listitem><para>a.pdf page 1</para></listitem>  
1651 - <listitem><para>b.pdf page 6</para></listitem>  
1652 - <listitem><para>c.pdf last page</para></listitem>  
1653 - <listitem><para>a.pdf page 2</para></listitem>  
1654 - <listitem><para>b.pdf page 5</para></listitem>  
1655 - <listitem><para>a.pdf page 3</para></listitem>  
1656 - <listitem><para>b.pdf page 4</para></listitem>  
1657 - <listitem><para>a.pdf page 4</para></listitem>  
1658 - <listitem><para>a.pdf page 5</para></listitem>  
1659 - </itemizedlist>  
1660 - </para>  
1661 - <para>  
1662 - Starting in qpdf version 10.2, you may specify a numeric argument  
1663 - to @1@option@1@--collate@2@option@2@. With  
1664 - @1@option@1@--collate=@1@replaceable@1@n@2@replaceable@2@@2@option@2@, pull  
1665 - groups of @1@replaceable@1@n@2@replaceable@2@ pages from each file,  
1666 - again, stopping when there are no more pages. For example, if you  
1667 - ran @1@command@1@qpdf --collate=2 --empty --pages a.pdf 1-5 b.pdf 6-4  
1668 - c.pdf r1 -- out.pdf@2@command@2@, you would get the following pages  
1669 - in this order:  
1670 - <itemizedlist>  
1671 - <listitem><para>a.pdf page 1</para></listitem>  
1672 - <listitem><para>a.pdf page 2</para></listitem>  
1673 - <listitem><para>b.pdf page 6</para></listitem>  
1674 - <listitem><para>b.pdf page 5</para></listitem>  
1675 - <listitem><para>c.pdf last page</para></listitem>  
1676 - <listitem><para>a.pdf page 3</para></listitem>  
1677 - <listitem><para>a.pdf page 4</para></listitem>  
1678 - <listitem><para>b.pdf page 4</para></listitem>  
1679 - <listitem><para>a.pdf page 5</para></listitem>  
1680 - </itemizedlist>  
1681 - </para>  
1682 - <para>  
1683 - Starting in qpdf version 8.3, when you split and merge files, any  
1684 - page labels (page numbers) are preserved in the final file. It is  
1685 - expected that more document features will be preserved by  
1686 - splitting and merging. In the mean time, semantics of splitting  
1687 - and merging vary across features. For example, the document's  
1688 - outlines (bookmarks) point to actual page objects, so if you  
1689 - select some pages and not others, bookmarks that point to pages  
1690 - that are in the output file will work, and remaining bookmarks  
1691 - will not work. A future version of @1@command@1@qpdf@2@command@2@ may do  
1692 - a better job at handling these issues. (Note that the qpdf library  
1693 - already contains all of the APIs required in order to implement  
1694 - this in your own application if you need it.) In the mean time,  
1695 - you can always use @1@option@1@--empty@2@option@2@ as the primary input  
1696 - file to avoid copying all of that from the first file. For  
1697 - example, to take pages 1 through 5 from a  
1698 - @1@filename@1@infile.pdf@2@filename@2@ while preserving all metadata  
1699 - associated with that file, you could use  
1700 -  
1701 - <programlisting>@1@command@1@qpdf@2@command@2@ @1@option@1@infile.pdf --pages . 1-5 -- outfile.pdf@2@option@2@  
1702 -</programlisting>  
1703 - If you wanted pages 1 through 5 from  
1704 - @1@filename@1@infile.pdf@2@filename@2@ but you wanted the rest of the  
1705 - metadata to be dropped, you could instead run  
1706 -  
1707 - <programlisting>@1@command@1@qpdf@2@command@2@ @1@option@1@--empty --pages infile.pdf 1-5 -- outfile.pdf@2@option@2@  
1708 -</programlisting>  
1709 - If you wanted to take pages 1 through 5 from  
1710 - @1@filename@1@file1.pdf@2@filename@2@ and pages 11 through 15 from  
1711 - @1@filename@1@file2.pdf@2@filename@2@ in reverse, taking document-level  
1712 - metadata from @1@filename@1@file2.pdf@2@filename@2@, you would run  
1713 -  
1714 - <programlisting>@1@command@1@qpdf@2@command@2@ @1@option@1@file2.pdf --pages file1.pdf 1-5 . 15-11 -- outfile.pdf@2@option@2@  
1715 -</programlisting>  
1716 - If, for some reason, you wanted to take the first page of an  
1717 - encrypted file called @1@filename@1@encrypted.pdf@2@filename@2@ with  
1718 - password <literal>pass</literal> and repeat it twice in an output  
1719 - file, and if you wanted to drop document-level metadata but  
1720 - preserve encryption, you would use  
1721 -  
1722 - <programlisting>@1@command@1@qpdf@2@command@2@ @1@option@1@--empty --copy-encryption=encrypted.pdf --encryption-file-password=pass  
1723 ---pages encrypted.pdf --password=pass 1 ./encrypted.pdf --password=pass 1 --  
1724 -outfile.pdf@2@option@2@  
1725 -</programlisting>  
1726 - Note that we had to specify the password all three times because  
1727 - giving a password as @1@option@1@--encryption-file-password@2@option@2@  
1728 - doesn't count for page selection, and as far as qpdf is concerned,  
1729 - @1@filename@1@encrypted.pdf@2@filename@2@ and  
1730 - @1@filename@1@./encrypted.pdf@2@filename@2@ are separated files. These  
1731 - are all corner cases that most users should hopefully never have  
1732 - to be bothered with.  
1733 - </para>  
1734 - <para>  
1735 - Prior to version 8.4, it was not possible to specify the same page  
1736 - from the same file directly more than once, and the workaround of  
1737 - specifying the same file in more than one way was required.  
1738 - Version 8.4 removes this limitation, but there is still a valid  
1739 - use case. When you specify the same page from the same file more  
1740 - than once, qpdf will share objects between the pages. If you are  
1741 - going to do further manipulation on the file and need the two  
1742 - instances of the same original page to be deep copies, then you  
1743 - can specify the file in two different ways. For example  
1744 - @1@command@1@qpdf in.pdf --pages . 1 ./in.pdf 1 -- out.pdf@2@command@2@  
1745 - would create a file with two copies of the first page of the  
1746 - input, and the two copies would share any objects in common. This  
1747 - includes fonts, images, and anything else the page references.  
1748 - </para>  
1749 - </sect1>  
1750 - <sect1 id="ref.overlay-underlay">  
1751 - <title>Overlay and Underlay Options</title>  
1752 - <para>  
1753 - Starting with qpdf 8.4, it is possible to overlay or underlay  
1754 - pages from other files onto the output generated by qpdf. Specify  
1755 - overlay or underlay as follows:  
1756 -  
1757 - <programlisting>{ @1@option@1@--overlay@2@option@2@ | @1@option@1@--underlay@2@option@2@ } @1@replaceable@1@file@2@replaceable@2@ [ @1@option@1@options@2@option@2@ ] @1@option@1@--@2@option@2@  
1758 -</programlisting>  
1759 - Overlay and underlay options are processed late, so they can be  
1760 - combined with other like merging and will apply to the final  
1761 - output. The @1@option@1@--overlay@2@option@2@ and  
1762 - @1@option@1@--underlay@2@option@2@ options work the same way, except  
1763 - underlay pages are drawn underneath the page to which they are  
1764 - applied, possibly obscured by the original page, and overlay files  
1765 - are drawn on top of the page to which they are applied, possibly  
1766 - obscuring the page. You can combine overlay and underlay.  
1767 - </para>  
1768 - <para>  
1769 - The default behavior of overlay and underlay is that pages are  
1770 - taken from the overlay/underlay file in sequence and applied to  
1771 - corresponding pages in the output until there are no more output  
1772 - pages. If the overlay or underlay file runs out of pages,  
1773 - remaining output pages are left alone. This behavior can be  
1774 - modified by options, which are provided between the  
1775 - @1@option@1@--overlay@2@option@2@ or @1@option@1@--underlay@2@option@2@ flag and  
1776 - the @1@option@1@--@2@option@2@ option. The following options are  
1777 - supported:  
1778 - <itemizedlist>  
1779 - <listitem>  
1780 - <para>  
1781 - @1@option@1@--password=password@2@option@2@: supply a password if the  
1782 - overlay/underlay file is encrypted.  
1783 - </para>  
1784 - </listitem>  
1785 - <listitem>  
1786 - <para>  
1787 - @1@option@1@--to=page-range@2@option@2@: a range of pages in the same  
1788 - form at described in <xref linkend="ref.page-selection"/>  
1789 - indicates which pages in the output should have the  
1790 - overlay/underlay applied. If not specified, overlay/underlay  
1791 - are applied to all pages.  
1792 - </para>  
1793 - </listitem>  
1794 - <listitem>  
1795 - <para>  
1796 - @1@option@1@--from=[page-range]@2@option@2@: a range of pages that  
1797 - specifies which pages in the overlay/underlay file will be used  
1798 - for overlay or underlay. If not specified, all pages will be  
1799 - used. This can be explicitly specified to be empty if  
1800 - @1@option@1@--repeat@2@option@2@ is used.  
1801 - </para>  
1802 - </listitem>  
1803 - <listitem>  
1804 - <para>  
1805 - @1@option@1@--repeat=page-range@2@option@2@: an optional range of  
1806 - pages that specifies which pages in the overlay/underlay file  
1807 - will be repeated after the "from" pages are used  
1808 - up. If you want to repeat a range of pages starting at the  
1809 - beginning, you can explicitly use @1@option@1@--from=@2@option@2@.  
1810 - </para>  
1811 - </listitem>  
1812 - </itemizedlist>  
1813 - </para>  
1814 - <para>  
1815 - Here are some examples.  
1816 - <itemizedlist>  
1817 - <listitem>  
1818 - <para>  
1819 - @1@command@1@--overlay o.pdf --to=1-5 --from=1-3  
1820 - --repeat=4 --@2@command@2@: overlay the first three pages from file  
1821 - @1@filename@1@o.pdf@2@filename@2@ onto the first three pages of the  
1822 - output, then overlay page 4 from @1@filename@1@o.pdf@2@filename@2@  
1823 - onto pages 4 and 5 of the output. Leave remaining output pages  
1824 - untouched.  
1825 - </para>  
1826 - </listitem>  
1827 - <listitem>  
1828 - <para>  
1829 - @1@command@1@--underlay footer.pdf --from= --repeat=1,2 --@2@command@2@:  
1830 - Underlay page 1 of @1@filename@1@footer.pdf@2@filename@2@ on all odd  
1831 - output pages, and underlay page 2 of  
1832 - @1@filename@1@footer.pdf@2@filename@2@ on all even output pages.  
1833 - </para>  
1834 - </listitem>  
1835 - </itemizedlist>  
1836 - </para>  
1837 - </sect1>  
1838 - <sect1 id="ref.attachments">  
1839 - <title>Embedded Files/Attachments Options</title>  
1840 - <para>  
1841 - Starting with qpdf 10.2, you can work with file attachments in PDF  
1842 - files from the command line. The following options are available:  
1843 - <variablelist>  
1844 - <varlistentry>  
1845 - <term>@1@option@1@--list-attachments@2@option@2@</term>  
1846 - <listitem>  
1847 - <para>  
1848 - Show the "key" and stream number for embedded  
1849 - files. With @1@option@1@--verbose@2@option@2@, additional  
1850 - information, including preferred file name, description,  
1851 - dates, and more are also displayed. The key is usually but not  
1852 - always equal to the file name, and is needed by some of the  
1853 - other options.  
1854 - </para>  
1855 - </listitem>  
1856 - </varlistentry>  
1857 - <varlistentry>  
1858 - <term>@1@option@1@--show-attachment=@1@replaceable@1@key@2@replaceable@2@@2@option@2@</term>  
1859 - <listitem>  
1860 - <para>  
1861 - Write the contents of the specified attachment to standard  
1862 - output as binary data. The key should match one of the keys  
1863 - shown by @1@option@1@--list-attachments@2@option@2@. If specified  
1864 - multiple times, only the last attachment will be shown.  
1865 - </para>  
1866 - </listitem>  
1867 - </varlistentry>  
1868 - <varlistentry>  
1869 - <term>@1@option@1@--add-attachment @1@replaceable@1@file@2@replaceable@2@ @1@replaceable@1@options@2@replaceable@2@ --@2@option@2@</term>  
1870 - <listitem>  
1871 - <para>  
1872 - Add or replace an attachment with the contents of  
1873 - @1@replaceable@1@file@2@replaceable@2@. This may be specified more  
1874 - than once. The following additional options may appear before  
1875 - the <literal>--</literal> that ends this option:  
1876 - <variablelist>  
1877 - <varlistentry>  
1878 - <term>@1@option@1@--key=@1@replaceable@1@key@2@replaceable@2@@2@option@2@</term>  
1879 - <listitem>  
1880 - <para>  
1881 - The key to use to register the attachment in the embedded  
1882 - files table. Defaults to the last path element of  
1883 - @1@replaceable@1@file@2@replaceable@2@.  
1884 - </para>  
1885 - </listitem>  
1886 - </varlistentry>  
1887 - <varlistentry>  
1888 - <term>@1@option@1@--filename=@1@replaceable@1@name@2@replaceable@2@@2@option@2@</term>  
1889 - <listitem>  
1890 - <para>  
1891 - The file name to be used for the attachment. This is what is usually  
1892 - displayed to the user and is the name most graphical PDF  
1893 - viewers will use when saving a file. It defaults to the  
1894 - last path element of @1@replaceable@1@file@2@replaceable@2@.  
1895 - </para>  
1896 - </listitem>  
1897 - </varlistentry>  
1898 - <varlistentry>  
1899 - <term>@1@option@1@--creationdate=@1@replaceable@1@date@2@replaceable@2@@2@option@2@</term>  
1900 - <listitem>  
1901 - <para>  
1902 - The attachment's creation date in PDF format; defaults to  
1903 - the current time. The date format is explained below.  
1904 - </para>  
1905 - </listitem>  
1906 - </varlistentry>  
1907 - <varlistentry>  
1908 - <term>@1@option@1@--moddate=@1@replaceable@1@date@2@replaceable@2@@2@option@2@</term>  
1909 - <listitem>  
1910 - <para>  
1911 - The attachment's modification date in PDF format; defaults  
1912 - to the current time. The date format is explained below.  
1913 - </para>  
1914 - </listitem>  
1915 - </varlistentry>  
1916 - <varlistentry>  
1917 - <term>@1@option@1@--mimetype=@1@replaceable@1@type/subtype@2@replaceable@2@@2@option@2@</term>  
1918 - <listitem>  
1919 - <para>  
1920 - The mime type for the attachment, e.g.  
1921 - <literal>text/plain</literal> or  
1922 - <literal>application/pdf</literal>. Note that the mimetype  
1923 - appears in a field called <literal>/Subtype</literal> in  
1924 - the PDF but actually includes the full type and subtype of  
1925 - the mime type.  
1926 - </para>  
1927 - </listitem>  
1928 - </varlistentry>  
1929 - <varlistentry>  
1930 - <term>@1@option@1@--description=@1@replaceable@1@&quot;text&quot;@2@replaceable@2@@2@option@2@</term>  
1931 - <listitem>  
1932 - <para>  
1933 - Descriptive text for the attachment, displayed by some PDF  
1934 - viewers.  
1935 - </para>  
1936 - </listitem>  
1937 - </varlistentry>  
1938 - <varlistentry>  
1939 - <term>@1@option@1@--replace@2@option@2@</term>  
1940 - <listitem>  
1941 - <para>  
1942 - Indicates that any existing attachment with the same key  
1943 - should be replaced by the new attachment. Otherwise,  
1944 - @1@command@1@qpdf@2@command@2@ gives an error if an attachment  
1945 - with that key is already present.  
1946 - </para>  
1947 - </listitem>  
1948 - </varlistentry>  
1949 - </variablelist>  
1950 - </para>  
1951 - </listitem>  
1952 - </varlistentry>  
1953 - <varlistentry>  
1954 - <term>@1@option@1@--remove-attachment=@1@replaceable@1@key@2@replaceable@2@@2@option@2@</term>  
1955 - <listitem>  
1956 - <para>  
1957 - Remove the specified attachment. This doesn't only remove the  
1958 - attachment from the embedded files table but also clears out  
1959 - the file specification. That means that any potential internal  
1960 - links to the attachment will be broken. This option may be  
1961 - specified multiple times. Run with @1@option@1@--verbose@2@option@2@  
1962 - to see status of the removal.  
1963 - </para>  
1964 - </listitem>  
1965 - </varlistentry>  
1966 - <varlistentry>  
1967 - <term>@1@option@1@--copy-attachments-from @1@replaceable@1@file@2@replaceable@2@ @1@replaceable@1@options@2@replaceable@2@ --@2@option@2@</term>  
1968 - <listitem>  
1969 - <para>  
1970 - Copy attachments from another file. This may be specified more  
1971 - than once. The following additional options may appear before  
1972 - the <literal>--</literal> that ends this option:  
1973 - <variablelist>  
1974 - <varlistentry>  
1975 - <term>@1@option@1@--password=@1@replaceable@1@password@2@replaceable@2@@2@option@2@</term>  
1976 - <listitem>  
1977 - <para>  
1978 - If required, the password needed to open  
1979 - @1@replaceable@1@file@2@replaceable@2@  
1980 - </para>  
1981 - </listitem>  
1982 - </varlistentry>  
1983 - <varlistentry>  
1984 - <term>@1@option@1@--prefix=@1@replaceable@1@prefix@2@replaceable@2@@2@option@2@</term>  
1985 - <listitem>  
1986 - <para>  
1987 - Only required if the file from which attachments are being  
1988 - copied has attachments with keys that conflict with  
1989 - attachments already in the file. In this case, the  
1990 - specified prefix will be prepended to each key. This  
1991 - affects only the key in the embedded files table, not the  
1992 - file name. The PDF specification doesn't preclude multiple  
1993 - attachments having the same file name.  
1994 - </para>  
1995 - </listitem>  
1996 - </varlistentry>  
1997 - </variablelist>  
1998 - </para>  
1999 - </listitem>  
2000 - </varlistentry>  
2001 - </variablelist>  
2002 - When a date is required, the date should conform to the PDF date  
2003 - format specification, which is  
2004 - <literal>D:</literal>@1@replaceable@1@yyyymmddhhmmss&lt;z&gt;@2@replaceable@2@,  
2005 - where @1@replaceable@1@&lt;z&gt;@2@replaceable@2@ is either  
2006 - <literal>Z</literal> for UTC or a timezone offset in the form  
2007 - @1@replaceable@1@-hh'mm'@2@replaceable@2@ or  
2008 - @1@replaceable@1@+hh'mm'@2@replaceable@2@. Examples:  
2009 - <literal>D:20210207161528-05'00'</literal>,  
2010 - <literal>D:20210207211528Z</literal>.  
2011 - </para>  
2012 - </sect1>  
2013 - <sect1 id="ref.advanced-parsing">  
2014 - <title>Advanced Parsing Options</title>  
2015 - <para>  
2016 - These options control aspects of how qpdf reads PDF files. Mostly  
2017 - these are of use to people who are working with damaged files.  
2018 - There is little reason to use these options unless you are trying  
2019 - to solve specific problems. The following options are available:  
2020 - <variablelist>  
2021 - <varlistentry>  
2022 - <term>@1@option@1@--suppress-recovery@2@option@2@</term>  
2023 - <listitem>  
2024 - <para>  
2025 - Prevents qpdf from attempting to recover damaged files.  
2026 - </para>  
2027 - </listitem>  
2028 - </varlistentry>  
2029 - <varlistentry>  
2030 - <term>@1@option@1@--ignore-xref-streams@2@option@2@</term>  
2031 - <listitem>  
2032 - <para>  
2033 - Tells qpdf to ignore any cross-reference streams.  
2034 - </para>  
2035 - </listitem>  
2036 - </varlistentry>  
2037 - </variablelist>  
2038 - </para>  
2039 - <para>  
2040 - Ordinarily, qpdf will attempt to recover from certain types of  
2041 - errors in PDF files. These include errors in the cross-reference  
2042 - table, certain types of object numbering errors, and certain types  
2043 - of stream length errors. Sometimes, qpdf may think it has  
2044 - recovered but may not have actually recovered, so care should be  
2045 - taken when using this option as some data loss is possible. The  
2046 - @1@option@1@--suppress-recovery@2@option@2@ option will prevent qpdf from  
2047 - attempting recovery. In this case, it will fail on the first  
2048 - error that it encounters.  
2049 - </para>  
2050 - <para>  
2051 - Ordinarily, qpdf reads cross-reference streams when they are  
2052 - present in a PDF file. If @1@option@1@--ignore-xref-streams@2@option@2@  
2053 - is specified, qpdf will ignore any cross-reference streams for  
2054 - hybrid PDF files. The purpose of hybrid files is to make some  
2055 - content available to viewers that are not aware of cross-reference  
2056 - streams. It is almost never desirable to ignore them. The only  
2057 - time when you might want to use this feature is if you are testing  
2058 - creation of hybrid PDF files and wish to see how a PDF consumer  
2059 - that doesn't understand object and cross-reference streams would  
2060 - interpret such a file.  
2061 - </para>  
2062 - </sect1>  
2063 - <sect1 id="ref.advanced-transformation">  
2064 - <title>Advanced Transformation Options</title>  
2065 - <para>  
2066 - These transformation options control fine points of how qpdf  
2067 - creates the output file. Mostly these are of use only to people  
2068 - who are very familiar with the PDF file format or who are PDF  
2069 - developers. The following options are available:  
2070 - <variablelist>  
2071 - <varlistentry>  
2072 - <term>@1@option@1@--compress-streams=@1@replaceable@1@[yn]@2@replaceable@2@@2@option@2@</term>  
2073 - <listitem>  
2074 - <para>  
2075 - By default, or with @1@option@1@--compress-streams=y@2@option@2@,  
2076 - qpdf will compress any stream with no other filters applied to  
2077 - it with the <literal>/FlateDecode</literal> filter when it  
2078 - writes it. To suppress this behavior and preserve uncompressed  
2079 - streams as uncompressed, use  
2080 - @1@option@1@--compress-streams=n@2@option@2@.  
2081 - </para>  
2082 - </listitem>  
2083 - </varlistentry>  
2084 - <varlistentry>  
2085 - <term>@1@option@1@--decode-level=@1@replaceable@1@option@2@replaceable@2@@2@option@2@</term>  
2086 - <listitem>  
2087 - <para>  
2088 - Controls which streams qpdf tries to decode. The default is  
2089 - @1@option@1@generalized@2@option@2@. The following options are  
2090 - available:  
2091 - <itemizedlist>  
2092 - <listitem>  
2093 - <para>  
2094 - @1@option@1@none@2@option@2@: do not attempt to decode any streams  
2095 - </para>  
2096 - </listitem>  
2097 - <listitem>  
2098 - <para>  
2099 - @1@option@1@generalized@2@option@2@: decode streams filtered with  
2100 - supported generalized filters:  
2101 - <literal>/LZWDecode</literal>,  
2102 - <literal>/FlateDecode</literal>,  
2103 - <literal>/ASCII85Decode</literal>, and  
2104 - <literal>/ASCIIHexDecode</literal>. We define generalized  
2105 - filters as those to be used for general-purpose compression  
2106 - or encoding, as opposed to filters specifically designed  
2107 - for image data. Note that, by default, streams already  
2108 - compressed with <literal>/FlateDecode</literal> are not  
2109 - uncompressed and recompressed unless you also specify  
2110 - @1@option@1@--recompress-flate@2@option@2@.  
2111 - </para>  
2112 - </listitem>  
2113 - <listitem>  
2114 - <para>  
2115 - @1@option@1@specialized@2@option@2@: in addition to generalized,  
2116 - decode streams with supported non-lossy specialized  
2117 - filters; currently this is just  
2118 - <literal>/RunLengthDecode</literal>  
2119 - </para>  
2120 - </listitem>  
2121 - <listitem>  
2122 - <para>  
2123 - @1@option@1@all@2@option@2@: in addition to generalized and  
2124 - specialized, decode streams with supported lossy filters;  
2125 - currently this is just <literal>/DCTDecode</literal> (JPEG)  
2126 - </para>  
2127 - </listitem>  
2128 - </itemizedlist>  
2129 - </para>  
2130 - </listitem>  
2131 - </varlistentry>  
2132 - <varlistentry>  
2133 - <term>@1@option@1@--stream-data=@1@replaceable@1@option@2@replaceable@2@@2@option@2@</term>  
2134 - <listitem>  
2135 - <para>  
2136 - Controls transformation of stream data. This option predates  
2137 - the @1@option@1@--compress-streams@2@option@2@ and  
2138 - @1@option@1@--decode-level@2@option@2@ options. Those options can be  
2139 - used to achieve the same affect with more control. The value  
2140 - of @1@option@1@@1@replaceable@1@option@2@replaceable@2@@2@option@2@ may be  
2141 - one of the following:  
2142 - <itemizedlist>  
2143 - <listitem>  
2144 - <para>  
2145 - @1@option@1@compress@2@option@2@: recompress stream data when  
2146 - possible (default); equivalent to  
2147 - @1@option@1@--compress-streams=y@2@option@2@  
2148 - @1@option@1@--decode-level=generalized@2@option@2@. Does not  
2149 - recompress streams already compressed with  
2150 - <literal>/FlateDecode</literal> unless  
2151 - @1@option@1@--recompress-flate@2@option@2@ is also specified.  
2152 - </para>  
2153 - </listitem>  
2154 - <listitem>  
2155 - <para>  
2156 - @1@option@1@preserve@2@option@2@: leave all stream data as is;  
2157 - equivalent to @1@option@1@--compress-streams=n@2@option@2@  
2158 - @1@option@1@--decode-level=none@2@option@2@  
2159 - </para>  
2160 - </listitem>  
2161 - <listitem>  
2162 - <para>  
2163 - @1@option@1@uncompress@2@option@2@: uncompress stream data  
2164 - compressed with generalized filters when possible;  
2165 - equivalent to @1@option@1@--compress-streams=n@2@option@2@  
2166 - @1@option@1@--decode-level=generalized@2@option@2@  
2167 - </para>  
2168 - </listitem>  
2169 - </itemizedlist>  
2170 - </para>  
2171 - </listitem>  
2172 - </varlistentry>  
2173 - <varlistentry>  
2174 - <term>@1@option@1@--recompress-flate@2@option@2@</term>  
2175 - <listitem>  
2176 - <para>  
2177 - By default, streams already compressed with  
2178 - <literal>/FlateDecode</literal> are left alone rather than  
2179 - being uncompressed and recompressed. This option causes qpdf  
2180 - to uncompress and recompress the streams. There is a  
2181 - significant performance cost to using this option, but you  
2182 - probably want to use it if you specify  
2183 - @1@option@1@--compression-level@2@option@2@.  
2184 - </para>  
2185 - </listitem>  
2186 - </varlistentry>  
2187 - <varlistentry>  
2188 - <term>@1@option@1@--compression-level=@1@replaceable@1@level@2@replaceable@2@@2@option@2@</term>  
2189 - <listitem>  
2190 - <para>  
2191 - When writing new streams that are compressed with  
2192 - <literal>/FlateDecode</literal>, use the specified compression  
2193 - level. The value of @1@option@1@level@2@option@2@ should be a number  
2194 - from 1 to 9 and is passed directly to zlib, which implements  
2195 - deflate compression. Note that qpdf doesn't uncompress and  
2196 - recompress streams by default. To have this option apply to  
2197 - already compressed streams, you should also specify  
2198 - @1@option@1@--recompress-flate@2@option@2@. If your goal is to shrink  
2199 - the size of PDF files, you should also use  
2200 - @1@option@1@--object-streams=generate@2@option@2@.  
2201 - </para>  
2202 - </listitem>  
2203 - </varlistentry>  
2204 - <varlistentry>  
2205 - <term>@1@option@1@--normalize-content=[yn]@2@option@2@</term>  
2206 - <listitem>  
2207 - <para>  
2208 - Enables or disables normalization of content streams. Content  
2209 - normalization is enabled by default in QDF mode. Please see  
2210 - <xref linkend="ref.qdf"/> for additional discussion of QDF  
2211 - mode.  
2212 - </para>  
2213 - </listitem>  
2214 - </varlistentry>  
2215 - <varlistentry>  
2216 - <term>@1@option@1@--object-streams=@1@replaceable@1@mode@2@replaceable@2@@2@option@2@</term>  
2217 - <listitem>  
2218 - <para>  
2219 - Controls handling of object streams. The value of  
2220 - @1@option@1@@1@replaceable@1@mode@2@replaceable@2@@2@option@2@ may be one of  
2221 - the following:  
2222 - <itemizedlist>  
2223 - <listitem>  
2224 - <para>  
2225 - @1@option@1@preserve@2@option@2@: preserve original object streams  
2226 - (default)  
2227 - </para>  
2228 - </listitem>  
2229 - <listitem>  
2230 - <para>  
2231 - @1@option@1@disable@2@option@2@: don't write any object streams  
2232 - </para>  
2233 - </listitem>  
2234 - <listitem>  
2235 - <para>  
2236 - @1@option@1@generate@2@option@2@: use object streams wherever  
2237 - possible  
2238 - </para>  
2239 - </listitem>  
2240 - </itemizedlist>  
2241 - </para>  
2242 - </listitem>  
2243 - </varlistentry>  
2244 - <varlistentry>  
2245 - <term>@1@option@1@--preserve-unreferenced@2@option@2@</term>  
2246 - <listitem>  
2247 - <para>  
2248 - Tells qpdf to preserve objects that are not referenced when  
2249 - writing the file. Ordinarily any object that is not referenced  
2250 - in a traversal of the document from the trailer dictionary  
2251 - will be discarded. This may be useful in working with some  
2252 - damaged files or inspecting files with known unreferenced  
2253 - objects.  
2254 - </para>  
2255 - <para>  
2256 - This flag is ignored for linearized files and has the effect  
2257 - of causing objects in the new file to be written in order by  
2258 - object ID from the original file. This does not mean that  
2259 - object numbers will be the same since qpdf may create stream  
2260 - lengths as direct or indirect differently from the original  
2261 - file, and the original file may have gaps in its numbering.  
2262 - </para>  
2263 - <para>  
2264 - See also @1@option@1@--preserve-unreferenced-resources@2@option@2@,  
2265 - which does something completely different.  
2266 - </para>  
2267 - </listitem>  
2268 - </varlistentry>  
2269 - <varlistentry>  
2270 - <term>@1@option@1@--remove-unreferenced-resources=@1@replaceable@1@option@2@replaceable@2@@2@option@2@</term>  
2271 - <listitem>  
2272 - <para>  
2273 - The @1@replaceable@1@option@2@replaceable@2@ may be  
2274 - <literal>auto</literal>, <literal>yes</literal>, or  
2275 - <literal>no</literal>. The default is <literal>auto</literal>.  
2276 - </para>  
2277 - <para>  
2278 - Starting with qpdf 8.1, when splitting pages, qpdf is able to  
2279 - attempt to remove images and fonts that are not used by a page  
2280 - even if they are referenced in the page's resources  
2281 - dictionary. When shared resources are in use, this behavior  
2282 - can greatly reduce the file sizes of split pages, but the  
2283 - analysis is very slow. In versions from 8.1 through 9.1.1,  
2284 - qpdf did this analysis by default. Starting in qpdf 10.0.0, if  
2285 - <literal>auto</literal> is used, qpdf does a quick analysis of  
2286 - the file to determine whether the file is likely to have  
2287 - unreferenced objects on pages, a pattern that frequently  
2288 - occurs when resource dictionaries are shared across multiple  
2289 - pages and rarely occurs otherwise. If it discovers this  
2290 - pattern, then it will attempt to remove unreferenced  
2291 - resources. Usually this means you get the slower splitting  
2292 - speed only when it's actually going to create smaller files.  
2293 - You can suppress removal of unreferenced resources altogether  
2294 - by specifying <literal>no</literal> or force it to do the full  
2295 - algorithm by specifying <literal>yes</literal>.  
2296 - </para>  
2297 - <para>  
2298 - Other than cases in which you don't care about file size and  
2299 - care a lot about runtime, there are few reasons to use this  
2300 - option, especially now that <literal>auto</literal> mode is  
2301 - supported. One reason to use this is if you suspect that qpdf  
2302 - is removing resources it shouldn't be removing. If you  
2303 - encounter that case, please report it as bug at <ulink  
2304 - url="https://github.com/qpdf/qpdf/issues/">https://github.com/qpdf/qpdf/issues/</ulink>.  
2305 - </para>  
2306 - </listitem>  
2307 - </varlistentry>  
2308 - <varlistentry>  
2309 - <term>@1@option@1@--preserve-unreferenced-resources@2@option@2@</term>  
2310 - <listitem>  
2311 - <para>  
2312 - This is a synonym for  
2313 - @1@option@1@--remove-unreferenced-resources=no@2@option@2@.  
2314 - </para>  
2315 - <para>  
2316 - See also @1@option@1@--preserve-unreferenced@2@option@2@, which does  
2317 - something completely different.  
2318 - </para>  
2319 - </listitem>  
2320 - </varlistentry>  
2321 - <varlistentry>  
2322 - <term>@1@option@1@--newline-before-endstream@2@option@2@</term>  
2323 - <listitem>  
2324 - <para>  
2325 - Tells qpdf to insert a newline before the  
2326 - <literal>endstream</literal> keyword, not counted in the  
2327 - length, after any stream content even if the last character of  
2328 - the stream was a newline. This may result in two newlines in  
2329 - some cases. This is a requirement of PDF/A. While qpdf doesn't  
2330 - specifically know how to generate PDF/A-compliant PDFs, this  
2331 - at least prevents it from removing compliance on already  
2332 - compliant files.  
2333 - </para>  
2334 - </listitem>  
2335 - </varlistentry>  
2336 - <varlistentry>  
2337 - <term>@1@option@1@--linearize-pass1=@1@replaceable@1@file@2@replaceable@2@@2@option@2@</term>  
2338 - <listitem>  
2339 - <para>  
2340 - Write the first pass of linearization to the named file. The  
2341 - resulting file is not a valid PDF file. This option is useful  
2342 - only for debugging <classname>QPDFWriter</classname>'s  
2343 - linearization code. When qpdf linearizes files, it writes the  
2344 - file in two passes, using the first pass to calculate sizes  
2345 - and offsets that are required for hint tables and the  
2346 - linearization dictionary. Ordinarily, the first pass is  
2347 - discarded. This option enables it to be captured.  
2348 - </para>  
2349 - </listitem>  
2350 - </varlistentry>  
2351 - <varlistentry>  
2352 - <term>@1@option@1@--coalesce-contents@2@option@2@</term>  
2353 - <listitem>  
2354 - <para>  
2355 - When a page's contents are split across multiple streams, this  
2356 - option causes qpdf to combine them into a single stream. Use  
2357 - of this option is never necessary for ordinary usage, but it  
2358 - can help when working with some files in some cases. For  
2359 - example, this can also be combined with QDF mode or content  
2360 - normalization to make it easier to look at all of a page's  
2361 - contents at once.  
2362 - </para>  
2363 - </listitem>  
2364 - </varlistentry>  
2365 - <varlistentry>  
2366 - <term>@1@option@1@--flatten-annotations=@1@replaceable@1@option@2@replaceable@2@@2@option@2@</term>  
2367 - <listitem>  
2368 - <para>  
2369 - This option collapses annotations into the pages' contents  
2370 - with special handling for form fields. Ordinarily, an  
2371 - annotation is rendered separately and on top of the page.  
2372 - Combining annotations into the page's contents effectively  
2373 - freezes the placement of the annotations, making them look  
2374 - right after various page transformations. The library  
2375 - functionality backing this option was added for the benefit of  
2376 - programs that want to create <emphasis>n-up</emphasis> page  
2377 - layouts and other similar things that don't work well with  
2378 - annotations. The @1@replaceable@1@option@2@replaceable@2@ parameter  
2379 - may be any of the following:  
2380 - <itemizedlist>  
2381 - <listitem>  
2382 - <para>  
2383 - @1@option@1@all@2@option@2@: include all annotations that are not  
2384 - marked invisible or hidden  
2385 - </para>  
2386 - </listitem>  
2387 - <listitem>  
2388 - <para>  
2389 - @1@option@1@print@2@option@2@: only include annotations that  
2390 - indicate that they should appear when the page is printed  
2391 - </para>  
2392 - </listitem>  
2393 - <listitem>  
2394 - <para>  
2395 - @1@option@1@screen@2@option@2@: omit annotations that indicate  
2396 - they should not appear on the screen  
2397 - </para>  
2398 - </listitem>  
2399 - </itemizedlist>  
2400 - </para>  
2401 - <para>  
2402 - Note that form fields are special because the annotations that  
2403 - are used to render filled-in form fields may become out of  
2404 - date from the fields' values if the form is filled in by a  
2405 - program that doesn't know how to update the appearances. If  
2406 - qpdf detects this case, its default behavior is not to flatten  
2407 - those annotations because doing so would cause the value of  
2408 - the form field to be lost. This gives you a chance to go back  
2409 - and resave the form with a program that knows how to generate  
2410 - appearances. QPDF itself can generate appearances with some  
2411 - limitations. See the @1@option@1@--generate-appearances@2@option@2@  
2412 - option below.  
2413 - </para>  
2414 - </listitem>  
2415 - </varlistentry>  
2416 - <varlistentry>  
2417 - <term>@1@option@1@--generate-appearances@2@option@2@</term>  
2418 - <listitem>  
2419 - <para>  
2420 - If a file contains interactive form fields and indicates that  
2421 - the appearances are out of date with the values of the form,  
2422 - this flag will regenerate appearances, subject to a few  
2423 - limitations. Note that there is not usually a reason to do  
2424 - this, but it can be necessary before using the  
2425 - @1@option@1@--flatten-annotations@2@option@2@ option. Most of these  
2426 - are not a problem with well-behaved PDF files. The limitations  
2427 - are as follows:  
2428 - <itemizedlist>  
2429 - <listitem>  
2430 - <para>  
2431 - Radio button and checkbox appearances use the pre-set  
2432 - values in the PDF file. QPDF just makes sure that the  
2433 - correct appearance is displayed based on the value of the  
2434 - field. This is fine for PDF files that create their forms  
2435 - properly. Some PDF writers save appearances for fields when  
2436 - they change, which could cause some controls to have  
2437 - inconsistent appearances.  
2438 - </para>  
2439 - </listitem>  
2440 - </itemizedlist>  
2441 - <itemizedlist>  
2442 - <listitem>  
2443 - <para>  
2444 - For text fields and list boxes, any characters that fall  
2445 - outside of US-ASCII or, if detected, "Windows  
2446 - ANSI" or "Mac Roman" encoding, will be  
2447 - replaced by the <literal>?</literal> character.  
2448 - </para>  
2449 - </listitem>  
2450 - </itemizedlist>  
2451 - <itemizedlist>  
2452 - <listitem>  
2453 - <para>  
2454 - Quadding is ignored. Quadding is used to specify whether  
2455 - the contents of a field should be left, center, or right  
2456 - aligned with the field.  
2457 - </para>  
2458 - </listitem>  
2459 - </itemizedlist>  
2460 - <itemizedlist>  
2461 - <listitem>  
2462 - <para>  
2463 - Rich text, multi-line, and other more elaborate formatting  
2464 - directives are ignored.  
2465 - </para>  
2466 - </listitem>  
2467 - </itemizedlist>  
2468 - <itemizedlist>  
2469 - <listitem>  
2470 - <para>  
2471 - There is no support for multi-select fields or signature  
2472 - fields.  
2473 - </para>  
2474 - </listitem>  
2475 - </itemizedlist>  
2476 - If qpdf doesn't do a good enough job with your form, use an  
2477 - external application to save your filled-in form before  
2478 - processing it with qpdf.  
2479 - </para>  
2480 - </listitem>  
2481 - </varlistentry>  
2482 - <varlistentry>  
2483 - <term>@1@option@1@--optimize-images@2@option@2@</term>  
2484 - <listitem>  
2485 - <para>  
2486 - This flag causes qpdf to recompress all images that are not  
2487 - compressed with DCT (JPEG) using DCT compression as long as  
2488 - doing so decreases the size in bytes of the image data and the  
2489 - image does not fall below minimum specified dimensions. Useful  
2490 - information is provided when used in combination with  
2491 - @1@option@1@--verbose@2@option@2@. See also the  
2492 - @1@option@1@--oi-min-width@2@option@2@,  
2493 - @1@option@1@--oi-min-height@2@option@2@, and  
2494 - @1@option@1@--oi-min-area@2@option@2@ options. By default, starting  
2495 - in qpdf 8.4, inline images are converted to regular images  
2496 - and optimized as well. Use  
2497 - @1@option@1@--keep-inline-images@2@option@2@ to prevent inline images  
2498 - from being included.  
2499 - </para>  
2500 - </listitem>  
2501 - </varlistentry>  
2502 - <varlistentry>  
2503 - <term>@1@option@1@--oi-min-width=@1@replaceable@1@width@2@replaceable@2@@2@option@2@</term>  
2504 - <listitem>  
2505 - <para>  
2506 - Avoid optimizing images whose width is below the specified  
2507 - amount. If omitted, the default is 128 pixels. Use 0 for no  
2508 - minimum.  
2509 - </para>  
2510 - </listitem>  
2511 - </varlistentry>  
2512 - <varlistentry>  
2513 - <term>@1@option@1@--oi-min-height=@1@replaceable@1@height@2@replaceable@2@@2@option@2@</term>  
2514 - <listitem>  
2515 - <para>  
2516 - Avoid optimizing images whose height is below the specified  
2517 - amount. If omitted, the default is 128 pixels. Use 0 for no  
2518 - minimum.  
2519 - </para>  
2520 - </listitem>  
2521 - </varlistentry>  
2522 - <varlistentry>  
2523 - <term>@1@option@1@--oi-min-area=@1@replaceable@1@area-in-pixels@2@replaceable@2@@2@option@2@</term>  
2524 - <listitem>  
2525 - <para>  
2526 - Avoid optimizing images whose pixel count  
2527 - (widthย ร—ย height) is below the specified amount. If  
2528 - omitted, the default is 16,384 pixels. Use 0 for no minimum.  
2529 - </para>  
2530 - </listitem>  
2531 - </varlistentry>  
2532 - <varlistentry>  
2533 - <term>@1@option@1@--externalize-inline-images@2@option@2@</term>  
2534 - <listitem>  
2535 - <para>  
2536 - Convert inline images to regular images. By default, images  
2537 - whose data is at least 1,024 bytes are converted when this  
2538 - option is selected. Use @1@option@1@--ii-min-bytes@2@option@2@ to  
2539 - change the size threshold. This option is implicitly selected  
2540 - when @1@option@1@--optimize-images@2@option@2@ is selected. Use  
2541 - @1@option@1@--keep-inline-images@2@option@2@ to exclude inline images  
2542 - from image optimization.  
2543 - </para>  
2544 - </listitem>  
2545 - </varlistentry>  
2546 - <varlistentry>  
2547 - <term>@1@option@1@--ii-min-bytes=@1@replaceable@1@bytes@2@replaceable@2@@2@option@2@</term>  
2548 - <listitem>  
2549 - <para>  
2550 - Avoid converting inline images whose size is below the  
2551 - specified minimum size to regular images. If omitted, the  
2552 - default is 1,024 bytes. Use 0 for no minimum.  
2553 - </para>  
2554 - </listitem>  
2555 - </varlistentry>  
2556 - <varlistentry>  
2557 - <term>@1@option@1@--keep-inline-images@2@option@2@</term>  
2558 - <listitem>  
2559 - <para>  
2560 - Prevent inline images from being included in image  
2561 - optimization. This option has no affect when  
2562 - @1@option@1@--optimize-images@2@option@2@ is not specified.  
2563 - </para>  
2564 - </listitem>  
2565 - </varlistentry>  
2566 - <varlistentry>  
2567 - <term>@1@option@1@--remove-page-labels@2@option@2@</term>  
2568 - <listitem>  
2569 - <para>  
2570 - Remove page labels from the output file.  
2571 - </para>  
2572 - </listitem>  
2573 - </varlistentry>  
2574 - <varlistentry>  
2575 - <term>@1@option@1@--qdf@2@option@2@</term>  
2576 - <listitem>  
2577 - <para>  
2578 - Turns on QDF mode. For additional information on QDF, please  
2579 - see <xref linkend="ref.qdf"/>. Note that  
2580 - @1@option@1@--linearize@2@option@2@ disables QDF mode.  
2581 - </para>  
2582 - </listitem>  
2583 - </varlistentry>  
2584 - <varlistentry>  
2585 - <term>@1@option@1@--min-version=@1@replaceable@1@version@2@replaceable@2@@2@option@2@</term>  
2586 - <listitem>  
2587 - <para>  
2588 - Forces the PDF version of the output file to be at least  
2589 - @1@replaceable@1@version@2@replaceable@2@. In other words, if the  
2590 - input file has a lower version than the specified version, the  
2591 - specified version will be used. If the input file has a  
2592 - higher version, the input file's original version will be  
2593 - used. It is seldom necessary to use this option since qpdf  
2594 - will automatically increase the version as needed when adding  
2595 - features that require newer PDF readers.  
2596 - </para>  
2597 - <para>  
2598 - The version number may be expressed in the form  
2599 - @1@replaceable@1@major.minor.extension-level@2@replaceable@2@, in  
2600 - which case the version is interpreted as  
2601 - @1@replaceable@1@major.minor@2@replaceable@2@ at extension level  
2602 - @1@replaceable@1@extension-level@2@replaceable@2@. For example,  
2603 - version <literal>1.7.8</literal> represents version 1.7 at  
2604 - extension level 8. Note that minimal syntax checking is done  
2605 - on the command line.  
2606 - </para>  
2607 - </listitem>  
2608 - </varlistentry>  
2609 - <varlistentry>  
2610 - <term>@1@option@1@--force-version=@1@replaceable@1@version@2@replaceable@2@@2@option@2@</term>  
2611 - <listitem>  
2612 - <para>  
2613 - This option forces the PDF version to be the exact version  
2614 - specified <emphasis>even when the file may have content that  
2615 - is not supported in that version</emphasis>. The version  
2616 - number is interpreted in the same way as with  
2617 - @1@option@1@--min-version@2@option@2@ so that extension levels can be  
2618 - set. In some cases, forcing the output file's PDF version to  
2619 - be lower than that of the input file will cause qpdf to  
2620 - disable certain features of the document. Specifically,  
2621 - 256-bit keys are disabled if the version is less than 1.7 with  
2622 - extension level 8 (except R5 is disabled if less than 1.7 with  
2623 - extension level 3), AES encryption is disabled if the version  
2624 - is less than 1.6, cleartext metadata and object streams are  
2625 - disabled if less than 1.5, 128-bit encryption keys are  
2626 - disabled if less than 1.4, and all encryption is disabled if  
2627 - less than 1.3. Even with these precautions, qpdf won't be  
2628 - able to do things like eliminate use of newer image  
2629 - compression schemes, transparency groups, or other features  
2630 - that may have been added in more recent versions of PDF.  
2631 - </para>  
2632 - <para>  
2633 - As a general rule, with the exception of big structural things  
2634 - like the use of object streams or AES encryption, PDF viewers  
2635 - are supposed to ignore features in files that they don't  
2636 - support from newer versions. This means that forcing the  
2637 - version to a lower version may make it possible to open your  
2638 - PDF file with an older version, though bear in mind that some  
2639 - of the original document's functionality may be lost.  
2640 - </para>  
2641 - </listitem>  
2642 - </varlistentry>  
2643 - </variablelist>  
2644 - </para>  
2645 - <para>  
2646 - By default, when a stream is encoded using non-lossy filters that  
2647 - qpdf understands and is not already compressed using a good  
2648 - compression scheme, qpdf will uncompress and recompress streams.  
2649 - Assuming proper filter implements, this is safe and generally  
2650 - results in smaller files. This behavior may also be explicitly  
2651 - requested with @1@option@1@--stream-data=compress@2@option@2@.  
2652 - </para>  
2653 - <para>  
2654 - When @1@option@1@--normalize-content=y@2@option@2@ is specified, qpdf  
2655 - will attempt to normalize whitespace and newlines in page content  
2656 - streams. This is generally safe but could, in some cases, cause  
2657 - damage to the content streams. This option is intended for people  
2658 - who wish to study PDF content streams or to debug PDF content.  
2659 - You should not use this for "production" PDF files.  
2660 - </para>  
2661 - <para>  
2662 - When normalizing content, if qpdf runs into any lexical errors, it  
2663 - will print a warning indicating that content may be damaged. The  
2664 - only situation in which qpdf is known to cause damage during  
2665 - content normalization is when a page's contents are split across  
2666 - multiple streams and streams are split in the middle of a lexical  
2667 - token such as a string, name, or inline image. Note that files  
2668 - that do this are invalid since the PDF specification states that  
2669 - content streams are not to be split in the middle of a token. If  
2670 - you want to inspect the original content streams in an  
2671 - uncompressed format, you can always run with @1@option@1@--qdf  
2672 - --normalize-content=n@2@option@2@ for a QDF file without content  
2673 - normalization, or alternatively  
2674 - @1@option@1@--stream-data=uncompress@2@option@2@ for a regular non-QDF  
2675 - mode file with uncompressed streams. These will both uncompress  
2676 - all the streams but will not attempt to normalize content. Please  
2677 - note that if you are using content normalization or QDF mode for  
2678 - the purpose of manually inspecting files, you don't have to care  
2679 - about this.  
2680 - </para>  
2681 - <para>  
2682 - Object streams, also known as compressed objects, were introduced  
2683 - into the PDF specification at version 1.5, corresponding to  
2684 - Acrobat 6. Some older PDF viewers may not support files with  
2685 - object streams. qpdf can be used to transform files with object  
2686 - streams to files without object streams or vice versa. As  
2687 - mentioned above, there are three object stream modes:  
2688 - @1@option@1@preserve@2@option@2@, @1@option@1@disable@2@option@2@, and  
2689 - @1@option@1@generate@2@option@2@.  
2690 - </para>  
2691 - <para>  
2692 - In @1@option@1@preserve@2@option@2@ mode, the relationship to objects and  
2693 - the streams that contain them is preserved from the original file.  
2694 - In @1@option@1@disable@2@option@2@ mode, all objects are written as  
2695 - regular, uncompressed objects. The resulting file should be  
2696 - readable by older PDF viewers. (Of course, the content of the  
2697 - files may include features not supported by older viewers, but at  
2698 - least the structure will be supported.) In  
2699 - @1@option@1@generate@2@option@2@ mode, qpdf will create its own object  
2700 - streams. This will usually result in more compact PDF files,  
2701 - though they may not be readable by older viewers. In this mode,  
2702 - qpdf will also make sure the PDF version number in the header is  
2703 - at least 1.5.  
2704 - </para>  
2705 - <para>  
2706 - The @1@option@1@--qdf@2@option@2@ flag turns on QDF mode, which changes  
2707 - some of the defaults described above. Specifically, in QDF mode,  
2708 - by default, stream data is uncompressed, content streams are  
2709 - normalized, and encryption is removed. These defaults can still  
2710 - be overridden by specifying the appropriate options as described  
2711 - above. Additionally, in QDF mode, stream lengths are stored as  
2712 - indirect objects, objects are laid out in a less efficient but  
2713 - more readable fashion, and the documents are interspersed with  
2714 - comments that make it easier for the user to find things and also  
2715 - make it possible for @1@command@1@fix-qdf@2@command@2@ to work properly.  
2716 - QDF mode is intended for people, mostly developers, who wish to  
2717 - inspect or modify PDF files in a text editor. For details, please  
2718 - see <xref linkend="ref.qdf"/>.  
2719 - </para>  
2720 - </sect1>  
2721 - <sect1 id="ref.testing-options">  
2722 - <title>Testing, Inspection, and Debugging Options</title>  
2723 - <para>  
2724 - These options can be useful for digging into PDF files or for use  
2725 - in automated test suites for software that uses the qpdf library.  
2726 - When any of the options in this section are specified, no output  
2727 - file should be given. The following options are available:  
2728 - <variablelist>  
2729 - <varlistentry>  
2730 - <term>@1@option@1@--deterministic-id@2@option@2@</term>  
2731 - <listitem>  
2732 - <para>  
2733 - Causes generation of a deterministic value for /ID. This  
2734 - prevents use of timestamp and output file name information in  
2735 - the /ID generation. Instead, at some slight additional runtime  
2736 - cost, the /ID field is generated to include a digest of the  
2737 - significant parts of the content of the output PDF file. This  
2738 - means that a given qpdf operation should generate the same /ID  
2739 - each time it is run, which can be useful when caching results  
2740 - or for generation of some test data. Use of this flag is not  
2741 - compatible with creation of encrypted files.  
2742 - </para>  
2743 - </listitem>  
2744 - </varlistentry>  
2745 - <varlistentry>  
2746 - <term>@1@option@1@--static-id@2@option@2@</term>  
2747 - <listitem>  
2748 - <para>  
2749 - Causes generation of a fixed value for /ID. This is intended  
2750 - for testing only. Never use it for production files. If you  
2751 - are trying to get the same /ID each time for a given file and  
2752 - you are not generating encrypted files, consider using the  
2753 - @1@option@1@--deterministic-id@2@option@2@ option.  
2754 - </para>  
2755 - </listitem>  
2756 - </varlistentry>  
2757 - <varlistentry>  
2758 - <term>@1@option@1@--static-aes-iv@2@option@2@</term>  
2759 - <listitem>  
2760 - <para>  
2761 - Causes use of a static initialization vector for AES-CBC.  
2762 - This is intended for testing only so that output files can be  
2763 - reproducible. Never use it for production files. This option  
2764 - in particular is not secure since it significantly weakens the  
2765 - encryption.  
2766 - </para>  
2767 - </listitem>  
2768 - </varlistentry>  
2769 - <varlistentry>  
2770 - <term>@1@option@1@--no-original-object-ids@2@option@2@</term>  
2771 - <listitem>  
2772 - <para>  
2773 - Suppresses inclusion of original object ID comments in QDF  
2774 - files. This can be useful when generating QDF files for test  
2775 - purposes, particularly when comparing them to determine  
2776 - whether two PDF files have identical content.  
2777 - </para>  
2778 - </listitem>  
2779 - </varlistentry>  
2780 - <varlistentry>  
2781 - <term>@1@option@1@--show-encryption@2@option@2@</term>  
2782 - <listitem>  
2783 - <para>  
2784 - Shows document encryption parameters. Also shows the  
2785 - document's user password if the owner password is given.  
2786 - </para>  
2787 - </listitem>  
2788 - </varlistentry>  
2789 - <varlistentry>  
2790 - <term>@1@option@1@--show-encryption-key@2@option@2@</term>  
2791 - <listitem>  
2792 - <para>  
2793 - When encryption information is being displayed, as when  
2794 - @1@option@1@--check@2@option@2@ or @1@option@1@--show-encryption@2@option@2@  
2795 - is given, display the computed or retrieved encryption key as  
2796 - a hexadecimal string. This value is not ordinarily useful to  
2797 - users, but it can be used as the argument to  
2798 - @1@option@1@--password@2@option@2@ if the  
2799 - @1@option@1@--password-is-hex-key@2@option@2@ is specified. Note  
2800 - that, when PDF files are encrypted, passwords and other  
2801 - metadata are used only to compute an encryption key, and the  
2802 - encryption key is what is actually used for encryption. This  
2803 - enables retrieval of that key.  
2804 - </para>  
2805 - </listitem>  
2806 - </varlistentry>  
2807 - <varlistentry>  
2808 - <term>@1@option@1@--check-linearization@2@option@2@</term>  
2809 - <listitem>  
2810 - <para>  
2811 - Checks file integrity and linearization status.  
2812 - </para>  
2813 - </listitem>  
2814 - </varlistentry>  
2815 - <varlistentry>  
2816 - <term>@1@option@1@--show-linearization@2@option@2@</term>  
2817 - <listitem>  
2818 - <para>  
2819 - Checks and displays all data in the linearization hint tables.  
2820 - </para>  
2821 - </listitem>  
2822 - </varlistentry>  
2823 - <varlistentry>  
2824 - <term>@1@option@1@--show-xref@2@option@2@</term>  
2825 - <listitem>  
2826 - <para>  
2827 - Shows the contents of the cross-reference table in a  
2828 - human-readable form. This is especially useful for files with  
2829 - cross-reference streams which are stored in a binary format.  
2830 - </para>  
2831 - </listitem>  
2832 - </varlistentry>  
2833 - <varlistentry>  
2834 - <term>@1@option@1@--show-object=trailer|obj[,gen]@2@option@2@</term>  
2835 - <listitem>  
2836 - <para>  
2837 - Show the contents of the given object. This is especially  
2838 - useful for inspecting objects that are inside of object  
2839 - streams (also known as "compressed objects").  
2840 - </para>  
2841 - </listitem>  
2842 - </varlistentry>  
2843 - <varlistentry>  
2844 - <term>@1@option@1@--raw-stream-data@2@option@2@</term>  
2845 - <listitem>  
2846 - <para>  
2847 - When used along with the @1@option@1@--show-object@2@option@2@  
2848 - option, if the object is a stream, shows the raw stream data  
2849 - instead of object's contents.  
2850 - </para>  
2851 - </listitem>  
2852 - </varlistentry>  
2853 - <varlistentry>  
2854 - <term>@1@option@1@--filtered-stream-data@2@option@2@</term>  
2855 - <listitem>  
2856 - <para>  
2857 - When used along with the @1@option@1@--show-object@2@option@2@  
2858 - option, if the object is a stream, shows the filtered stream  
2859 - data instead of object's contents. If the stream is filtered  
2860 - using filters that qpdf does not support, an error will be  
2861 - issued.  
2862 - </para>  
2863 - </listitem>  
2864 - </varlistentry>  
2865 - <varlistentry>  
2866 - <term>@1@option@1@--show-npages@2@option@2@</term>  
2867 - <listitem>  
2868 - <para>  
2869 - Prints the number of pages in the input file on a line by  
2870 - itself. Since the number of pages appears by itself on a  
2871 - line, this option can be useful for scripting if you need to  
2872 - know the number of pages in a file.  
2873 - </para>  
2874 - </listitem>  
2875 - </varlistentry>  
2876 - <varlistentry>  
2877 - <term>@1@option@1@--show-pages@2@option@2@</term>  
2878 - <listitem>  
2879 - <para>  
2880 - Shows the object and generation number for each page  
2881 - dictionary object and for each content stream associated with  
2882 - the page. Having this information makes it more convenient to  
2883 - inspect objects from a particular page.  
2884 - </para>  
2885 - </listitem>  
2886 - </varlistentry>  
2887 - <varlistentry>  
2888 - <term>@1@option@1@--with-images@2@option@2@</term>  
2889 - <listitem>  
2890 - <para>  
2891 - When used along with @1@option@1@--show-pages@2@option@2@, also shows  
2892 - the object and generation numbers for the image objects on  
2893 - each page. (At present, information about images in shared  
2894 - resource dictionaries are not output by this command. This is  
2895 - discussed in a comment in the source code.)  
2896 - </para>  
2897 - </listitem>  
2898 - </varlistentry>  
2899 - <varlistentry>  
2900 - <term>@1@option@1@--json@2@option@2@</term>  
2901 - <listitem>  
2902 - <para>  
2903 - Generate a JSON representation of the file. This is described  
2904 - in depth in <xref linkend="ref.json"/>  
2905 - </para>  
2906 - </listitem>  
2907 - </varlistentry>  
2908 - <varlistentry>  
2909 - <term>@1@option@1@--json-help@2@option@2@</term>  
2910 - <listitem>  
2911 - <para>  
2912 - Describe the format of the JSON output.  
2913 - </para>  
2914 - </listitem>  
2915 - </varlistentry>  
2916 - <varlistentry>  
2917 - <term>@1@option@1@--json-key=key@2@option@2@</term>  
2918 - <listitem>  
2919 - <para>  
2920 - This option is repeatable. If specified, only top-level keys  
2921 - specified will be included in the JSON output. If not  
2922 - specified, all keys will be shown.  
2923 - </para>  
2924 - </listitem>  
2925 - </varlistentry>  
2926 - <varlistentry>  
2927 - <term>@1@option@1@--json-object=trailer|obj[,gen]@2@option@2@</term>  
2928 - <listitem>  
2929 - <para>  
2930 - This option is repeatable. If specified, only specified  
2931 - objects will be shown in the  
2932 - "<literal>objects</literal>" key of the JSON  
2933 - output. If absent, all objects will be shown.  
2934 - </para>  
2935 - </listitem>  
2936 - </varlistentry>  
2937 - <varlistentry>  
2938 - <term>@1@option@1@--check@2@option@2@</term>  
2939 - <listitem>  
2940 - <para>  
2941 - Checks file structure and well as encryption, linearization,  
2942 - and encoding of stream data. A file for which  
2943 - @1@option@1@--check@2@option@2@ reports no errors may still have  
2944 - errors in stream data content but should otherwise be  
2945 - structurally sound. If @1@option@1@--check@2@option@2@ any errors,  
2946 - qpdf will exit with a status of 2. There are some recoverable  
2947 - conditions that @1@option@1@--check@2@option@2@ detects. These are  
2948 - issued as warnings instead of errors. If qpdf finds no errors  
2949 - but finds warnings, it will exit with a status of 3 (as of  
2950 - versionย 2.0.4). When @1@option@1@--check@2@option@2@ is combined  
2951 - with other options, checks are always performed before any  
2952 - other options are processed. For erroneous files,  
2953 - @1@option@1@--check@2@option@2@ will cause qpdf to attempt to  
2954 - recover, after which other options are effectively operating  
2955 - on the recovered file. Combining @1@option@1@--check@2@option@2@ with  
2956 - other options in this way can be useful for manually  
2957 - recovering severely damaged files. Note that  
2958 - @1@option@1@--check@2@option@2@ produces no output to standard output  
2959 - when everything is valid, so if you are using this to  
2960 - programmatically validate files in bulk, it is safe to run  
2961 - without output redirected to @1@filename@1@/dev/null@2@filename@2@  
2962 - and just check for a 0 exit code.  
2963 - </para>  
2964 - </listitem>  
2965 - </varlistentry>  
2966 - </variablelist>  
2967 - </para>  
2968 - <para>  
2969 - The @1@option@1@--raw-stream-data@2@option@2@ and  
2970 - @1@option@1@--filtered-stream-data@2@option@2@ options are ignored unless  
2971 - @1@option@1@--show-object@2@option@2@ is given. Either of these options  
2972 - will cause the stream data to be written to standard output. In  
2973 - order to avoid commingling of stream data with other output, it is  
2974 - recommend that these objects not be combined with other  
2975 - test/inspection options.  
2976 - </para>  
2977 - <para>  
2978 - If @1@option@1@--filtered-stream-data@2@option@2@ is given and  
2979 - @1@option@1@--normalize-content=y@2@option@2@ is also given, qpdf will  
2980 - attempt to normalize the stream data as if it is a page content  
2981 - stream. This attempt will be made even if it is not a page  
2982 - content stream, in which case it will produce unusable results.  
2983 - </para>  
2984 - </sect1>  
2985 - <sect1 id="ref.unicode-passwords">  
2986 - <title>Unicode Passwords</title>  
2987 - <para>  
2988 - At the library API level, all methods that perform encryption and  
2989 - decryption interpret passwords as strings of bytes. It is up to  
2990 - the caller to ensure that they are appropriately encoded. Starting  
2991 - with qpdf version 8.4.0, qpdf will attempt to make this easier for  
2992 - you when interact with qpdf via its command line interface. The  
2993 - PDF specification requires passwords used to encrypt files with  
2994 - 40-bit or 128-bit encryption to be encoded with PDF Doc encoding.  
2995 - This encoding is a single-byte encoding that supports ISO-Latin-1  
2996 - and a handful of other commonly used characters. It has a large  
2997 - overlap with Windows ANSI but is not exactly the same. There is  
2998 - generally not a way to provide PDF Doc encoded strings on the  
2999 - command line. As such, qpdf versions prior to 8.4.0 would often  
3000 - create PDF files that couldn't be opened with other software when  
3001 - given a password with non-ASCII characters to encrypt a file with  
3002 - 40-bit or 128-bit encryption. Starting with qpdf 8.4.0, qpdf  
3003 - recognizes the encoding of the parameter and transcodes it as  
3004 - needed. The rest of this section provides the details about  
3005 - exactly how qpdf behaves. Most users will not need to know this  
3006 - information, but it might be useful if you have been working  
3007 - around qpdf's old behavior or if you are using qpdf to generate  
3008 - encrypted files for testing other PDF software.  
3009 - </para>  
3010 - <para>  
3011 - A note about Windows: when qpdf builds, it attempts to determine  
3012 - what it has to do to use <function>wmain</function> instead of  
3013 - <function>main</function> on Windows. The  
3014 - <function>wmain</function> function is an alternative entry point  
3015 - that receives all arguments as UTF-16-encoded strings. When qpdf  
3016 - starts up this way, it converts all the strings to UTF-8 encoding  
3017 - and then invokes the regular main. This means that, as far as qpdf  
3018 - is concerned, it receives its command-line arguments with UTF-8  
3019 - encoding, just as it would in any modern Linux or UNIX  
3020 - environment.  
3021 - </para>  
3022 - <para>  
3023 - If a file is being encrypted with 40-bit or 128-bit encryption and  
3024 - the supplied password is not a valid UTF-8 string, qpdf will fall  
3025 - back to the behavior of interpreting the password as a string of  
3026 - bytes. If you have old scripts that encrypt files by passing the  
3027 - output of @1@command@1@iconv@2@command@2@ to qpdf, you no longer need to  
3028 - do that, but if you do, qpdf should still work. The only exception  
3029 - would be for the extremely unlikely case of a password that is  
3030 - encoded with a single-byte encoding but also happens to be valid  
3031 - UTF-8. Such a password would contain strings of even numbers of  
3032 - characters that alternate between accented letters and symbols. In  
3033 - the extremely unlikely event that you are intentionally using such  
3034 - passwords and qpdf is thwarting you by interpreting them as UTF-8,  
3035 - you can use @1@option@1@--password-mode=bytes@2@option@2@ to suppress  
3036 - qpdf's automatic behavior.  
3037 - </para>  
3038 - <para>  
3039 - The @1@option@1@--password-mode@2@option@2@ option, as described earlier  
3040 - in this chapter, can be used to change qpdf's interpretation of  
3041 - supplied passwords. There are very few reasons to use this option.  
3042 - One would be the unlikely case described in the previous paragraph  
3043 - in which the supplied password happens to be valid UTF-8 but isn't  
3044 - supposed to be UTF-8. Your best bet would be just to provide the  
3045 - password as a valid UTF-8 string, but you could also use  
3046 - @1@option@1@--password-mode=bytes@2@option@2@. Another reason to use  
3047 - @1@option@1@--password-mode=bytes@2@option@2@ would be to intentionally  
3048 - generate PDF files encrypted with passwords that are not properly  
3049 - encoded. The qpdf test suite does this to generate invalid files  
3050 - for the purpose of testing its password recovery capability. If  
3051 - you were trying to create intentionally incorrect files for a  
3052 - similar purposes, the @1@option@1@bytes@2@option@2@ password mode can  
3053 - enable you to do this.  
3054 - </para>  
3055 - <para>  
3056 - When qpdf attempts to decrypt a file with a password that contains  
3057 - non-ASCII characters, it will generate a list of alternative  
3058 - passwords by attempting to interpret the password as each of a  
3059 - handful of different coding systems and then transcode them to the  
3060 - required format. This helps to compensate for the supplied  
3061 - password being given in the wrong coding system, such as would  
3062 - happen if you used the @1@command@1@iconv@2@command@2@ workaround that  
3063 - was previously needed. It also generates passwords by doing the  
3064 - reverse operation: translating from correct in incorrect encoding  
3065 - of the password. This would enable qpdf to decrypt files using  
3066 - passwords that were improperly encoded by whatever software  
3067 - encrypted the files, including older versions of qpdf invoked  
3068 - without properly encoded passwords. The combination of these two  
3069 - recovery methods should make qpdf transparently open most  
3070 - encrypted files with the password supplied correctly but in the  
3071 - wrong coding system. There are no real downsides to this behavior,  
3072 - but if you don't want qpdf to do this, you can use the  
3073 - @1@option@1@--suppress-password-recovery@2@option@2@ option. One reason  
3074 - to do that is to ensure that you know the exact password that was  
3075 - used to encrypt the file.  
3076 - </para>  
3077 - <para>  
3078 - With these changes, qpdf now generates compliant passwords in most  
3079 - cases. There are still some exceptions. In particular, the PDF  
3080 - specification directs compliant writers to normalize Unicode  
3081 - passwords and to perform certain transformations on passwords with  
3082 - bidirectional text. Implementing this functionality requires using  
3083 - a real Unicode library like ICU. If a client application that uses  
3084 - qpdf wants to do this, the qpdf library will accept the resulting  
3085 - passwords, but qpdf will not perform these transformations itself.  
3086 - It is possible that this will be addressed in a future version of  
3087 - qpdf. The <classname>QPDFWriter</classname> methods that enable  
3088 - encryption on the output file accept passwords as strings of  
3089 - bytes.  
3090 - </para>  
3091 - <para>  
3092 - Please note that the @1@option@1@--password-is-hex-key@2@option@2@ option  
3093 - is unrelated to all this. This flag bypasses the normal process of  
3094 - going from password to encryption string entirely, allowing the  
3095 - raw encryption key to be specified directly. This is useful for  
3096 - forensic purposes or for brute-force recovery of files with  
3097 - unknown passwords.  
3098 - </para>  
3099 - </sect1>  
3100 - </chapter>  
3101 - <chapter id="ref.qdf">  
3102 - <title>QDF Mode</title>  
3103 - <para>  
3104 - In QDF mode, qpdf creates PDF files in what we call @1@firstterm@1@QDF  
3105 - form@2@firstterm@2@. A PDF file in QDF form, sometimes called a QDF  
3106 - file, is a completely valid PDF file that has  
3107 - <literal>%QDF-1.0</literal> as its third line (after the pdf header  
3108 - and binary characters) and has certain other characteristics. The  
3109 - purpose of QDF form is to make it possible to edit PDF files, with  
3110 - some restrictions, in an ordinary text editor. This can be very  
3111 - useful for experimenting with different PDF constructs or for  
3112 - making one-off edits to PDF files (though there are other reasons  
3113 - why this may not always work). Note that QDF mode does not support  
3114 - linearized files. If you enable linearization, QDF mode is  
3115 - automatically disabled.  
3116 - </para>  
3117 - <para>  
3118 - It is ordinarily very difficult to edit PDF files in a text editor  
3119 - for two reasons: most meaningful data in PDF files is compressed,  
3120 - and PDF files are full of offset and length information that makes  
3121 - it hard to add or remove data. A QDF file is organized in a manner  
3122 - such that, if edits are kept within certain constraints, the  
3123 - @1@command@1@fix-qdf@2@command@2@ program, distributed with qpdf, is able  
3124 - to restore edited files to a correct state. The  
3125 - @1@command@1@fix-qdf@2@command@2@ program takes no command-line  
3126 - arguments. It reads a possibly edited QDF file from standard input  
3127 - and writes a repaired file to standard output.  
3128 - </para>  
3129 - <para>  
3130 - The following attributes characterize a QDF file:  
3131 - <itemizedlist>  
3132 - <listitem>  
3133 - <para>  
3134 - All objects appear in numerical order in the PDF file, including  
3135 - when objects appear in object streams.  
3136 - </para>  
3137 - </listitem>  
3138 - <listitem>  
3139 - <para>  
3140 - Objects are printed in an easy-to-read format, and all line  
3141 - endings are normalized to UNIX line endings.  
3142 - </para>  
3143 - </listitem>  
3144 - <listitem>  
3145 - <para>  
3146 - Unless specifically overridden, streams appear uncompressed  
3147 - (when qpdf supports the filters and they are compressed with a  
3148 - non-lossy compression scheme), and most content streams are  
3149 - normalized (line endings are converted to just a UNIX-style  
3150 - linefeeds).  
3151 - </para>  
3152 - </listitem>  
3153 - <listitem>  
3154 - <para>  
3155 - All streams lengths are represented as indirect objects, and the  
3156 - stream length object is always the next object after the stream.  
3157 - If the stream data does not end with a newline, an extra newline  
3158 - is inserted, and a special comment appears after the stream  
3159 - indicating that this has been done.  
3160 - </para>  
3161 - </listitem>  
3162 - <listitem>  
3163 - <para>  
3164 - If the PDF file contains object streams, if object stream  
3165 - <emphasis>n</emphasis> contains <emphasis>k</emphasis> objects,  
3166 - those objects are numbered from <emphasis>n+1</emphasis> through  
3167 - <emphasis>n+k</emphasis>, and the object number/offset pairs  
3168 - appear on a separate line for each object. Additionally, each  
3169 - object in the object stream is preceded by a comment indicating  
3170 - its object number and index. This makes it very easy to find  
3171 - objects in object streams.  
3172 - </para>  
3173 - </listitem>  
3174 - <listitem>  
3175 - <para>  
3176 - All beginnings of objects, <literal>stream</literal> tokens,  
3177 - <literal>endstream</literal> tokens, and  
3178 - <literal>endobj</literal> tokens appear on lines by themselves.  
3179 - A blank line follows every <literal>endobj</literal> token.  
3180 - </para>  
3181 - </listitem>  
3182 - <listitem>  
3183 - <para>  
3184 - If there is a cross-reference stream, it is unfiltered.  
3185 - </para>  
3186 - </listitem>  
3187 - <listitem>  
3188 - <para>  
3189 - Page dictionaries and page content streams are marked with  
3190 - special comments that make them easy to find.  
3191 - </para>  
3192 - </listitem>  
3193 - <listitem>  
3194 - <para>  
3195 - Comments precede each object indicating the object number of the  
3196 - corresponding object in the original file.  
3197 - </para>  
3198 - </listitem>  
3199 - </itemizedlist>  
3200 - </para>  
3201 - <para>  
3202 - When editing a QDF file, any edits can be made as long as the above  
3203 - constraints are maintained. This means that you can freely edit a  
3204 - page's content without worrying about messing up the QDF file. It  
3205 - is also possible to add new objects so long as those objects are  
3206 - added after the last object in the file or subsequent objects are  
3207 - renumbered. If a QDF file has object streams in it, you can always  
3208 - add the new objects before the xref stream and then change the  
3209 - number of the xref stream, since nothing generally ever references  
3210 - it by number.  
3211 - </para>  
3212 - <para>  
3213 - It is not generally practical to remove objects from QDF files  
3214 - without messing up object numbering, but if you remove all  
3215 - references to an object, you can run qpdf on the file (after  
3216 - running @1@command@1@fix-qdf@2@command@2@), and qpdf will omit the  
3217 - now-orphaned object.  
3218 - </para>  
3219 - <para>  
3220 - When @1@command@1@fix-qdf@2@command@2@ is run, it goes through the file  
3221 - and recomputes the following parts of the file:  
3222 - <itemizedlist>  
3223 - <listitem>  
3224 - <para>  
3225 - the <literal>/N</literal>, <literal>/W</literal>, and  
3226 - <literal>/First</literal> keys of all object stream dictionaries  
3227 - </para>  
3228 - </listitem>  
3229 - <listitem>  
3230 - <para>  
3231 - the pairs of numbers representing object numbers and offsets of  
3232 - objects in object streams  
3233 - </para>  
3234 - </listitem>  
3235 - <listitem>  
3236 - <para>  
3237 - all stream lengths  
3238 - </para>  
3239 - </listitem>  
3240 - <listitem>  
3241 - <para>  
3242 - the cross-reference table or cross-reference stream  
3243 - </para>  
3244 - </listitem>  
3245 - <listitem>  
3246 - <para>  
3247 - the offset to the cross-reference table or cross-reference  
3248 - stream following the <literal>startxref</literal> token  
3249 - </para>  
3250 - </listitem>  
3251 - </itemizedlist>  
3252 - </para>  
3253 - </chapter>  
3254 - <chapter id="ref.using-library">  
3255 - <title>Using the QPDF Library</title>  
3256 - <sect1 id="ref.using.from-cxx">  
3257 - <title>Using QPDF from C++</title>  
3258 - <para>  
3259 - The source tree for the qpdf package has an  
3260 - @1@filename@1@examples@2@filename@2@ directory that contains a few  
3261 - example programs. The @1@filename@1@qpdf/qpdf.cc@2@filename@2@ source  
3262 - file also serves as a useful example since it exercises almost all  
3263 - of the qpdf library's public interface. The best source of  
3264 - documentation on the library itself is reading comments in  
3265 - @1@filename@1@include/qpdf/QPDF.hh@2@filename@2@,  
3266 - @1@filename@1@include/qpdf/QPDFWriter.hh@2@filename@2@, and  
3267 - @1@filename@1@include/qpdf/QPDFObjectHandle.hh@2@filename@2@.  
3268 - </para>  
3269 - <para>  
3270 - All header files are installed in the @1@filename@1@include/qpdf@2@filename@2@ directory. It  
3271 - is recommend that you use <literal>#include  
3272 - &lt;qpdf/QPDF.hh&gt;</literal> rather than adding  
3273 - @1@filename@1@include/qpdf@2@filename@2@ to your include path.  
3274 - </para>  
3275 - <para>  
3276 - When linking against the qpdf static library, you may also need to  
3277 - specify <literal>-lz -ljpeg</literal> on your link command. If  
3278 - your system understands how to read libtool  
3279 - @1@filename@1@.la@2@filename@2@ files, this may not be necessary.  
3280 - </para>  
3281 - <para>  
3282 - The qpdf library is safe to use in a multithreaded program, but no  
3283 - individual <type>QPDF</type> object instance (including  
3284 - <type>QPDF</type>, <type>QPDFObjectHandle</type>, or  
3285 - <type>QPDFWriter</type>) can be used in more than one thread at a  
3286 - time. Multiple threads may simultaneously work with different  
3287 - instances of these and all other QPDF objects.  
3288 - </para>  
3289 - </sect1>  
3290 - <sect1 id="ref.using.other-languages">  
3291 - <title>Using QPDF from other languages</title>  
3292 - <para>  
3293 - The qpdf library is implemented in C++, which makes it hard to use  
3294 - directly in other languages. There are a few things that can help.  
3295 - </para>  
3296 - <variablelist>  
3297 - <varlistentry>  
3298 - <term>"C"</term>  
3299 - <listitem>  
3300 - <para>  
3301 - The qpdf library includes a "C" language interface  
3302 - that provides a subset of the overall capabilities. The header  
3303 - file @1@filename@1@qpdf/qpdf-c.h@2@filename@2@ includes information  
3304 - about its use. As long as you use a C++ linker, you can link C  
3305 - programs with qpdf and use the C API. For languages that can  
3306 - directly load methods from a shared library, the C API can also  
3307 - be useful. People have reported success using the C API from  
3308 - other languages on Windows by directly calling functions in the  
3309 - DLL.  
3310 - </para>  
3311 - </listitem>  
3312 - </varlistentry>  
3313 - <varlistentry>  
3314 - <term>Python</term>  
3315 - <listitem>  
3316 - <para>  
3317 - A Python module called <ulink  
3318 - url="https://pypi.org/project/pikepdf/">pikepdf</ulink>  
3319 - provides a clean and highly functional set of Python bindings  
3320 - to the qpdf library. Using pikepdf, you can work with PDF files  
3321 - in a natural way and combine qpdf's capabilities with other  
3322 - functionality provided by Python's rich standard library and  
3323 - available modules.  
3324 - </para>  
3325 - </listitem>  
3326 - </varlistentry>  
3327 - <varlistentry>  
3328 - <term>Other Languages</term>  
3329 - <listitem>  
3330 - <para>  
3331 - Starting with version 8.3.0, the @1@command@1@qpdf@2@command@2@  
3332 - command-line tool can produce a JSON representation of the PDF  
3333 - file's non-content data. This can facilitate interacting  
3334 - programmatically with PDF files through qpdf's command line  
3335 - interface. For more information, please see <xref  
3336 - linkend="ref.json"/>.  
3337 - </para>  
3338 - </listitem>  
3339 - </varlistentry>  
3340 - </variablelist>  
3341 - </sect1>  
3342 - <sect1 id="ref.unicode-files">  
3343 - <title>A Note About Unicode File Names</title>  
3344 - <para>  
3345 - When strings are passed to qpdf library routines either as  
3346 - <literal>char*</literal> or as <literal>std::string</literal>,  
3347 - they are treated as byte arrays except where otherwise noted. When  
3348 - Unicode is desired, qpdf wants UTF-8 unless otherwise noted in  
3349 - comments in header files. In modern UNIX/Linux environments, this  
3350 - generally does the right thing. In Windows, it's a bit more  
3351 - complicated. Starting in qpdf 8.4.0, passwords that contain  
3352 - Unicode characters are handled much better, and starting in qpdf  
3353 - 8.4.1, the library attempts to properly handle Unicode characters  
3354 - in filenames. In particular, in Windows, if a UTF-8 encoded string  
3355 - is used as a filename in either <classname>QPDF</classname> or  
3356 - <classname>QPDFWriter</classname>, it is internally converted to  
3357 - <literal>wchar_t*</literal>, and Unicode-aware Windows APIs are  
3358 - used. As such, qpdf will generally operate properly on files with  
3359 - non-ASCII characters in their names as long as the filenames are  
3360 - UTF-8 encoded for passing into the qpdf library API, but there are  
3361 - still some rough edges, such as the encoding of the filenames in  
3362 - error messages our CLI output messages. Patches or bug reports are  
3363 - welcome for any continuing issues with Unicode file names in  
3364 - Windows.  
3365 - </para>  
3366 - </sect1>  
3367 - </chapter>  
3368 - <chapter id="ref.weak-crypto">  
3369 - <title>Weak Cryptography</title>  
3370 - <para>  
3371 - Start with version 10.4, qpdf is taking steps to reduce the  
3372 - likelihood of a user <emphasis>accidentally</emphasis> creating PDF  
3373 - files with insecure cryptography but will continue to allow  
3374 - creation of such files indefinitely with explicit acknowledgment.  
3375 - </para>  
3376 - <para>  
3377 - The PDF file format makes use of RC4, which is known to be a weak  
3378 - cryptography algorithm, and MD5, which is a weak hashing algorithm.  
3379 - In version 10.4, qpdf generates warnings for some (but not all)  
3380 - cases of writing files with weak cryptography when invoked from the  
3381 - command-line. These warnings can be suppressed using the  
3382 - @1@option@1@--allow-weak-crypto@2@option@2@ option.  
3383 - </para>  
3384 - <para>  
3385 - It is planned for qpdf version 11 to be stricter, making it an  
3386 - error to write files with insecure cryptography from the  
3387 - command-line tool in most cases without specifying the  
3388 - @1@option@1@--allow-weak-crypto@2@option@2@ flag and also to require  
3389 - explicit steps when using the C++ library to enable use of insecure  
3390 - cryptography.  
3391 - </para>  
3392 - <para>  
3393 - Note that qpdf must always retain support for weak cryptographic  
3394 - algorithms since this is required for reading older PDF files that  
3395 - use it. Additionally, qpdf will always retain the ability to create  
3396 - files using weak cryptographic algorithms since, as a development  
3397 - tool, qpdf explicitly supports creating older or deprecated types  
3398 - of PDF files since these are sometimes needed to test or work with  
3399 - older versions of software. Even if other cryptography libraries  
3400 - drop support for RC4 or MD5, qpdf can always fall back to its  
3401 - internal implementations of those algorithms, so they are not going  
3402 - to disappear from qpdf.  
3403 - </para>  
3404 - </chapter>  
3405 - <chapter id="ref.json">  
3406 - <title>QPDF JSON</title>  
3407 - <sect1 id="ref.json-overview">  
3408 - <title>Overview</title>  
3409 - <para>  
3410 - Beginning with qpdf version 8.3.0, the @1@command@1@qpdf@2@command@2@  
3411 - command-line program can produce a JSON representation of the  
3412 - non-content data in a PDF file. It includes a dump in JSON format  
3413 - of all objects in the PDF file excluding the content of streams.  
3414 - This JSON representation makes it very easy to look in detail at  
3415 - the structure of a given PDF file, and it also provides a great way  
3416 - to work with PDF files programmatically from the command-line in  
3417 - languages that can't call or link with the qpdf library directly.  
3418 - Note that stream data can be extracted from PDF files using other  
3419 - qpdf command-line options.  
3420 - </para>  
3421 - </sect1>  
3422 - <sect1 id="ref.json-guarantees">  
3423 - <title>JSON Guarantees</title>  
3424 - <para>  
3425 - The qpdf JSON representation includes a JSON serialization of the  
3426 - raw objects in the PDF file as well as some computed information in  
3427 - a more easily extracted format. QPDF provides some guarantees about  
3428 - its JSON format. These guarantees are designed to simplify the  
3429 - experience of a developer working with the JSON format.  
3430 - <variablelist>  
3431 - <varlistentry>  
3432 - <term>Compatibility</term>  
3433 - <listitem>  
3434 - <para>  
3435 - The top-level JSON object output is a dictionary. The JSON  
3436 - output contains various nested dictionaries and arrays. With  
3437 - the exception of dictionaries that are populated by the fields  
3438 - of objects from the file, all instances of a dictionary are  
3439 - guaranteed to have exactly the same keys. Future versions of  
3440 - qpdf are free to add additional keys but not to remove keys or  
3441 - change the type of object that a key points to. The qpdf  
3442 - program validates this guarantee, and in the unlikely event  
3443 - that a bug in qpdf should cause it to generate data that  
3444 - doesn't conform to this rule, it will ask you to file a bug  
3445 - report.  
3446 - </para>  
3447 - <para>  
3448 - The top-level JSON structure contains a  
3449 - "<literal>version</literal>" key whose value is  
3450 - simple integer. The value of the <literal>version</literal> key  
3451 - will be incremented if a non-compatible change is made. A  
3452 - non-compatible change would be any change that involves removal  
3453 - of a key, a change to the format of data pointed to by a key,  
3454 - or a semantic change that requires a different interpretation  
3455 - of a previously existing key. A strong effort will be made to  
3456 - avoid breaking compatibility.  
3457 - </para>  
3458 - </listitem>  
3459 - </varlistentry>  
3460 - <varlistentry>  
3461 - <term>Documentation</term>  
3462 - <listitem>  
3463 - <para>  
3464 - The @1@command@1@qpdf@2@command@2@ command can be invoked with the  
3465 - @1@option@1@--json-help@2@option@2@ option. This will output a JSON  
3466 - structure that has the same structure as the JSON output that  
3467 - qpdf generates, except that each field in the help output is a  
3468 - description of the corresponding field in the JSON output. The  
3469 - specific guarantees are as follows:  
3470 - <itemizedlist>  
3471 - <listitem>  
3472 - <para>  
3473 - A dictionary in the help output means that the  
3474 - corresponding location in the actual JSON output is also a  
3475 - dictionary with exactly the same keys; that is, no keys  
3476 - present in help are absent in the real output, and no keys  
3477 - will be present in the real output that are not in help. As  
3478 - a special case, if the dictionary has a single key whose  
3479 - name starts with <literal>&lt;</literal> and ends with  
3480 - <literal>&gt;</literal>, it means that the JSON output is a  
3481 - dictionary that can have any keys, each of which conforms  
3482 - to the value of the special key. This is used for cases in  
3483 - which the keys of the dictionary are things like object  
3484 - IDs.  
3485 - </para>  
3486 - </listitem>  
3487 - <listitem>  
3488 - <para>  
3489 - A string in the help output is a description of the item  
3490 - that appears in the corresponding location of the actual  
3491 - output. The corresponding output can have any format.  
3492 - </para>  
3493 - </listitem>  
3494 - <listitem>  
3495 - <para>  
3496 - An array in the help output always contains a single  
3497 - element. It indicates that the corresponding location in the  
3498 - actual output is also an array, and that each element of the  
3499 - array has whatever format is implied by the single element  
3500 - of the help output's array.  
3501 - </para>  
3502 - </listitem>  
3503 - </itemizedlist>  
3504 - For example, the help output indicates includes a  
3505 - "<literal>pagelabels</literal>" key whose value is  
3506 - an array of one element. That element is a dictionary with keys  
3507 - "<literal>index</literal>" and  
3508 - "<literal>label</literal>". In addition to  
3509 - describing the meaning of those keys, this tells you that the  
3510 - actual JSON output will contain a <literal>pagelabels</literal>  
3511 - array, each of whose elements is a dictionary that contains an  
3512 - <literal>index</literal> key, a <literal>label</literal> key,  
3513 - and no other keys.  
3514 - </para>  
3515 - </listitem>  
3516 - </varlistentry>  
3517 - <varlistentry>  
3518 - <term>Directness and Simplicity</term>  
3519 - <listitem>  
3520 - <para>  
3521 - The JSON output contains the value of every object in the file,  
3522 - but it also contains some processed data. This is analogous to  
3523 - how qpdf's library interface works. The processed data is  
3524 - similar to the helper functions in that it allows you to look  
3525 - at certain aspects of the PDF file without having to understand  
3526 - all the nuances of the PDF specification, while the raw objects  
3527 - allow you to mine the PDF for anything that the higher-level  
3528 - interfaces are lacking.  
3529 - </para>  
3530 - </listitem>  
3531 - </varlistentry>  
3532 - </variablelist>  
3533 - </para>  
3534 - </sect1>  
3535 - <sect1 id="json.limitations">  
3536 - <title>Limitations of JSON Representation</title>  
3537 - <para>  
3538 - There are a few limitations to be aware of with the JSON structure:  
3539 - <itemizedlist>  
3540 - <listitem>  
3541 - <para>  
3542 - Strings, names, and indirect object references in the original  
3543 - PDF file are all converted to strings in the JSON  
3544 - representation. In the case of a "normal" PDF file,  
3545 - you can tell the difference because a name starts with a slash  
3546 - (<literal>/</literal>), and an indirect object reference looks  
3547 - like <literal>n n R</literal>, but if there were to be a string  
3548 - that looked like a name or indirect object reference, there  
3549 - would be no way to tell this from the JSON output. Note that  
3550 - there are certain cases where you know for sure what something  
3551 - is, such as knowing that dictionary keys in objects are always  
3552 - names and that certain things in the higher-level computed data  
3553 - are known to contain indirect object references.  
3554 - </para>  
3555 - </listitem>  
3556 - <listitem>  
3557 - <para>  
3558 - The JSON format doesn't support binary data very well. Mostly  
3559 - the details are not important, but they are presented here for  
3560 - information. When qpdf outputs a string in the JSON  
3561 - representation, it converts the string to UTF-8, assuming usual  
3562 - PDF string semantics. Specifically, if the original string is  
3563 - UTF-16, it is converted to UTF-8. Otherwise, it is assumed to  
3564 - have PDF doc encoding, and is converted to UTF-8 with that  
3565 - assumption. This causes strange things to happen to binary  
3566 - strings. For example, if you had the binary string  
3567 - <literal>&lt;038051&gt;</literal>, this would be output to the  
3568 - JSON as <literal>\u0003โ€ขQ</literal> because  
3569 - <literal>03</literal> is not a printable character and  
3570 - <literal>80</literal> is the bullet character in PDF doc  
3571 - encoding and is mapped to the Unicode value  
3572 - <literal>2022</literal>. Since <literal>51</literal> is  
3573 - <literal>Q</literal>, it is output as is. If you wanted to  
3574 - convert back from here to a binary string, would have to  
3575 - recognize Unicode values whose code points are higher than  
3576 - <literal>0xFF</literal> and map those back to their  
3577 - corresponding PDF doc encoding characters. There is no way to  
3578 - tell the difference between a Unicode string that was originally  
3579 - encoded as UTF-16 or one that was converted from PDF doc  
3580 - encoding. In other words, it's best if you don't try to use the  
3581 - JSON format to extract binary strings from the PDF file, but if  
3582 - you really had to, it could be done. Note that qpdf's  
3583 - @1@option@1@--show-object@2@option@2@ option does not have this  
3584 - limitation and will reveal the string as encoded in the original  
3585 - file.  
3586 - </para>  
3587 - </listitem>  
3588 - </itemizedlist>  
3589 - </para>  
3590 - </sect1>  
3591 - <sect1 id="json.considerations">  
3592 - <title>JSON: Special Considerations</title>  
3593 - <para>  
3594 - For the most part, the built-in JSON help tells you everything you  
3595 - need to know about the JSON format, but there are a few  
3596 - non-obvious things to be aware of:  
3597 - <itemizedlist>  
3598 - <listitem>  
3599 - <para>  
3600 - While qpdf guarantees that keys present in the help will be  
3601 - present in the output, those fields may be null or empty if the  
3602 - information is not known or absent in the file. Also, if you  
3603 - specify @1@option@1@--json-keys@2@option@2@, the keys that are not  
3604 - listed will be excluded entirely except for those that  
3605 - @1@option@1@--json-help@2@option@2@ says are always present.  
3606 - </para>  
3607 - </listitem>  
3608 - <listitem>  
3609 - <para>  
3610 - In a few places, there are keys with names containing  
3611 - <literal>pageposfrom1</literal>. The values of these keys are  
3612 - null or an integer. If an integer, they point to a page index  
3613 - within the file numbering from 1. Note that JSON indexes from  
3614 - 0, and you would also use 0-based indexing using the API.  
3615 - However, 1-based indexing is easier in this case because the  
3616 - command-line syntax for specifying page ranges is 1-based. If  
3617 - you were going to write a program that looked through the JSON  
3618 - for information about specific pages and then use the  
3619 - command-line to extract those pages, 1-based indexing is  
3620 - easier. Besides, it's more convenient to subtract 1 from a  
3621 - program in a real programming language than it is to add 1 from  
3622 - shell code.  
3623 - </para>  
3624 - </listitem>  
3625 - <listitem>  
3626 - <para>  
3627 - The image information included in the <literal>page</literal>  
3628 - section of the JSON output includes the key  
3629 - "<literal>filterable</literal>". Note that the  
3630 - value of this field may depend on the  
3631 - @1@option@1@--decode-level@2@option@2@ that you invoke qpdf with. The  
3632 - JSON output includes a top-level key  
3633 - "<literal>parameters</literal>" that indicates the  
3634 - decode level used for computing whether a stream was  
3635 - filterable. For example, jpeg images will be shown as not  
3636 - filterable by default, but they will be shown as filterable if  
3637 - you run @1@command@1@qpdf --json --decode-level=all@2@command@2@.  
3638 - </para>  
3639 - </listitem>  
3640 - </itemizedlist>  
3641 - </para>  
3642 - </sect1>  
3643 - </chapter>  
3644 - <chapter id="ref.design">  
3645 - <title>Design and Library Notes</title>  
3646 - <sect1 id="ref.design.intro">  
3647 - <title>Introduction</title>  
3648 - <para>  
3649 - This section was written prior to the implementation of the qpdf  
3650 - package and was subsequently modified to reflect the  
3651 - implementation. In some cases, for purposes of explanation, it  
3652 - may differ slightly from the actual implementation. As always,  
3653 - the source code and test suite are authoritative. Even if there  
3654 - are some errors, this document should serve as a road map to  
3655 - understanding how this code works.  
3656 - </para>  
3657 - <para>  
3658 - In general, one should adhere strictly to a specification when  
3659 - writing but be liberal in reading. This way, the product of our  
3660 - software will be accepted by the widest range of other programs,  
3661 - and we will accept the widest range of input files. This library  
3662 - attempts to conform to that philosophy whenever possible but also  
3663 - aims to provide strict checking for people who want to validate  
3664 - PDF files. If you don't want to see warnings and are trying to  
3665 - write something that is tolerant, you can call  
3666 - <literal>setSuppressWarnings(true)</literal>. If you want to fail  
3667 - on the first error, you can call  
3668 - <literal>setAttemptRecovery(false)</literal>. The default behavior  
3669 - is to generating warnings for recoverable problems. Note that  
3670 - recovery will not always produce the desired results even if it is  
3671 - able to get through the file. Unlike most other PDF files that  
3672 - produce generic warnings such as "This file is  
3673 - damaged,", qpdf generally issues a detailed error message  
3674 - that would be most useful to a PDF developer. This is by design as  
3675 - there seems to be a shortage of PDF validation tools out there.  
3676 - This was, in fact, one of the major motivations behind the initial  
3677 - creation of qpdf.  
3678 - </para>  
3679 - </sect1>  
3680 - <sect1 id="ref.design-goals">  
3681 - <title>Design Goals</title>  
3682 - <para>  
3683 - The QPDF package includes support for reading and rewriting PDF  
3684 - files. It aims to hide from the user details involving object  
3685 - locations, modified (appended) PDF files, the  
3686 - directness/indirectness of objects, and stream filters including  
3687 - encryption. It does not aim to hide knowledge of the object  
3688 - hierarchy or content stream contents. Put another way, a user of  
3689 - the qpdf library is expected to have knowledge about how PDF files  
3690 - work, but is not expected to have to keep track of bookkeeping  
3691 - details such as file positions.  
3692 - </para>  
3693 - <para>  
3694 - A user of the library never has to care whether an object is  
3695 - direct or indirect, though it is possible to determine whether an  
3696 - object is direct or not if this information is needed. All access  
3697 - to objects deals with this transparently. All memory management  
3698 - details are also handled by the library.  
3699 - </para>  
3700 - <para>  
3701 - The <classname>PointerHolder</classname> object is used internally  
3702 - by the library to deal with memory management. This is basically a  
3703 - smart pointer object very similar in spirit to C++-11's  
3704 - <classname>std::shared_ptr</classname> object, but predating it by  
3705 - several years. This library also makes use of a technique for  
3706 - giving fine-grained access to methods in one class to other  
3707 - classes by using public subclasses with friends and only private  
3708 - members that in turn call private methods of the containing class.  
3709 - See <classname>QPDFObjectHandle::Factory</classname> as an  
3710 - example.  
3711 - </para>  
3712 - <para>  
3713 - The top-level qpdf class is <classname>QPDF</classname>. A  
3714 - <classname>QPDF</classname> object represents a PDF file. The  
3715 - library provides methods for both accessing and mutating PDF  
3716 - files.  
3717 - </para>  
3718 - <para>  
3719 - The primary class for interacting with PDF objects is  
3720 - <classname>QPDFObjectHandle</classname>. Instances of this class  
3721 - can be passed around by value, copied, stored in containers, etc.  
3722 - with very low overhead. Instances of  
3723 - <classname>QPDFObjectHandle</classname> created by reading from a  
3724 - file will always contain a reference back to the  
3725 - <classname>QPDF</classname> object from which they were created. A  
3726 - <classname>QPDFObjectHandle</classname> may be direct or indirect.  
3727 - If indirect, the <classname>QPDFObject</classname> the  
3728 - <classname>PointerHolder</classname> initially points to is a null  
3729 - pointer. In this case, the first attempt to access the underlying  
3730 - <classname>QPDFObject</classname> will result in the  
3731 - <classname>QPDFObject</classname> being resolved via a call to the  
3732 - referenced <classname>QPDF</classname> instance. This makes it  
3733 - essentially impossible to make coding errors in which certain  
3734 - things will work for some PDF files and not for others based on  
3735 - which objects are direct and which objects are indirect.  
3736 - </para>  
3737 - <para>  
3738 - Instances of <classname>QPDFObjectHandle</classname> can be  
3739 - directly created and modified using static factory methods in the  
3740 - <classname>QPDFObjectHandle</classname> class. There are factory  
3741 - methods for each type of object as well as a convenience method  
3742 - <function>QPDFObjectHandle::parse</function> that creates an  
3743 - object from a string representation of the object. Existing  
3744 - instances of <classname>QPDFObjectHandle</classname> can also be  
3745 - modified in several ways. See comments in  
3746 - @1@filename@1@QPDFObjectHandle.hh@2@filename@2@ for details.  
3747 - </para>  
3748 - <para>  
3749 - An instance of <classname>QPDF</classname> is constructed by using  
3750 - the class's default constructor. If desired, the  
3751 - <classname>QPDF</classname> object may be configured with various  
3752 - methods that change its default behavior. Then the  
3753 - <function>QPDF::processFile()</function> method is passed the name  
3754 - of a PDF file, which permanently associates the file with that  
3755 - QPDF object. A password may also be given for access to  
3756 - password-protected files. QPDF does not enforce encryption  
3757 - parameters and will treat user and owner passwords equivalently.  
3758 - Either password may be used to access an encrypted file.  
3759 - <classname>QPDF</classname> will allow recovery of a user password  
3760 - given an owner password. The input PDF file must be seekable.  
3761 - (Output files written by <classname>QPDFWriter</classname> need  
3762 - not be seekable, even when creating linearized files.) During  
3763 - construction, <classname>QPDF</classname> validates the PDF file's  
3764 - header, and then reads the cross reference tables and trailer  
3765 - dictionaries. The <classname>QPDF</classname> class keeps only  
3766 - the first trailer dictionary though it does read all of them so it  
3767 - can check the <literal>/Prev</literal> key.  
3768 - <classname>QPDF</classname> class users may request the root  
3769 - object and the trailer dictionary specifically. The cross  
3770 - reference table is kept private. Objects may then be requested by  
3771 - number of by walking the object tree.  
3772 - </para>  
3773 - <para>  
3774 - When a PDF file has a cross-reference stream instead of a  
3775 - cross-reference table and trailer, requesting the document's  
3776 - trailer dictionary returns the stream dictionary from the  
3777 - cross-reference stream instead.  
3778 - </para>  
3779 - <para>  
3780 - There are some convenience routines for very common operations  
3781 - such as walking the page tree and returning a vector of all page  
3782 - objects. For full details, please see the header files  
3783 - @1@filename@1@QPDF.hh@2@filename@2@ and  
3784 - @1@filename@1@QPDFObjectHandle.hh@2@filename@2@. There are also some  
3785 - additional helper classes that provide higher level API functions  
3786 - for certain document constructions. These are discussed in <xref  
3787 - linkend="ref.helper-classes"/>.  
3788 - </para>  
3789 - </sect1>  
3790 - <sect1 id="ref.helper-classes">  
3791 - <title>Helper Classes</title>  
3792 - <para>  
3793 - QPDF version 8.1 introduced the concept of helper classes. Helper  
3794 - classes are intended to contain higher level APIs that allow  
3795 - developers to work with certain document constructs at an  
3796 - abstraction level above that of  
3797 - <classname>QPDFObjectHandle</classname> while staying true to  
3798 - qpdf's philosophy of not hiding document structure from the  
3799 - developer. As with qpdf in general, the goal is take away some of  
3800 - the more tedious bookkeeping aspects of working with PDF files,  
3801 - not to remove the need for the developer to understand how the PDF  
3802 - construction in question works. The driving factor behind the  
3803 - creation of helper classes was to allow the evolution of higher  
3804 - level interfaces in qpdf without polluting the interfaces of the  
3805 - main top-level classes <classname>QPDF</classname> and  
3806 - <classname>QPDFObjectHandle</classname>.  
3807 - </para>  
3808 - <para>  
3809 - There are two kinds of helper classes:  
3810 - <emphasis>document</emphasis> helpers and  
3811 - <emphasis>object</emphasis> helpers. Document helpers are  
3812 - constructed with a reference to a <classname>QPDF</classname>  
3813 - object and provide methods for working with structures that are at  
3814 - the document level. Object helpers are constructed with an  
3815 - instance of a <classname>QPDFObjectHandle</classname> and provide  
3816 - methods for working with specific types of objects.  
3817 - </para>  
3818 - <para>  
3819 - Examples of document helpers include  
3820 - <classname>QPDFPageDocumentHelper</classname>, which contains  
3821 - methods for operating on the document's page trees, such as  
3822 - enumerating all pages of a document and adding and removing pages;  
3823 - and <classname>QPDFAcroFormDocumentHelper</classname>, which  
3824 - contains document-level methods related to interactive forms, such  
3825 - as enumerating form fields and creating mappings between form  
3826 - fields and annotations.  
3827 - </para>  
3828 - <para>  
3829 - Examples of object helpers include  
3830 - <classname>QPDFPageObjectHelper</classname> for performing  
3831 - operations on pages such as page rotation and some operations on  
3832 - content streams, <classname>QPDFFormFieldObjectHelper</classname>  
3833 - for performing operations related to interactive form fields, and  
3834 - <classname>QPDFAnnotationObjectHelper</classname> for working with  
3835 - annotations.  
3836 - </para>  
3837 - <para>  
3838 - It is always possible to retrieve the underlying  
3839 - <classname>QPDF</classname> reference from a document helper and  
3840 - the underlying <classname>QPDFObjectHandle</classname> reference  
3841 - from an object helper. Helpers are designed to be helpers, not  
3842 - wrappers. The intention is that, in general, it is safe to freely  
3843 - intermix operations that use helpers with operations that use the  
3844 - underlying objects. Document and object helpers do not attempt to  
3845 - provide a complete interface for working with the things they are  
3846 - helping with, nor do they attempt to encapsulate underlying  
3847 - structures. They just provide a few methods to help with  
3848 - error-prone, repetitive, or complex tasks. In some cases, a helper  
3849 - object may cache some information that is expensive to gather. In  
3850 - such cases, the helper classes are implemented so that their own  
3851 - methods keep the cache consistent, and the header file will  
3852 - provide a method to invalidate the cache and a description of what  
3853 - kinds of operations would make the cache invalid. If in doubt, you  
3854 - can always discard a helper class and create a new one with the  
3855 - same underlying objects, which will ensure that you have discarded  
3856 - any stale information.  
3857 - </para>  
3858 - <para>  
3859 - By Convention, document helpers are called  
3860 - <classname>QPDFSomethingDocumentHelper</classname> and are derived  
3861 - from <classname>QPDFDocumentHelper</classname>, and object helpers  
3862 - are called <classname>QPDFSomethingObjectHelper</classname> and  
3863 - are derived from <classname>QPDFObjectHelper</classname>. For  
3864 - details on specific helpers, please see their header files. You  
3865 - can find them by looking at  
3866 - @1@filename@1@include/qpdf/QPDF*DocumentHelper.hh@2@filename@2@ and  
3867 - @1@filename@1@include/qpdf/QPDF*ObjectHelper.hh@2@filename@2@.  
3868 - </para>  
3869 - <para>  
3870 - In order to avoid creation of circular dependencies, the following  
3871 - general guidelines are followed with helper classes:  
3872 - <itemizedlist>  
3873 - <listitem>  
3874 - <para>  
3875 - Core class interfaces do not know about helper classes. For  
3876 - example, no methods of <classname>QPDF</classname> or  
3877 - <classname>QPDFObjectHandle</classname> will include helper  
3878 - classes in their interfaces.  
3879 - </para>  
3880 - </listitem>  
3881 - <listitem>  
3882 - <para>  
3883 - Interfaces of object helpers will usually not use document  
3884 - helpers in their interfaces. This is because it is much more  
3885 - useful for document helpers to have methods that return object  
3886 - helpers. Most operations in PDF files start at the document  
3887 - level and go from there to the object level rather than the  
3888 - other way around. It can sometimes be useful to map back from  
3889 - object-level structures to document-level structures. If there  
3890 - is a desire to do this, it will generally be provided by a  
3891 - method in the document helper class.  
3892 - </para>  
3893 - </listitem>  
3894 - <listitem>  
3895 - <para>  
3896 - Most of the time, object helpers don't know about other object  
3897 - helpers. However, in some cases, one type of object may be a  
3898 - container for another type of object, in which case it may make  
3899 - sense for the outer object to know about the inner object. For  
3900 - example, there are methods in the  
3901 - <classname>QPDFPageObjectHelper</classname> that know  
3902 - <classname>QPDFAnnotationObjectHelper</classname> because  
3903 - references to annotations are contained in page dictionaries.  
3904 - </para>  
3905 - </listitem>  
3906 - <listitem>  
3907 - <para>  
3908 - Any helper or core library class may use helpers in their  
3909 - implementations.  
3910 - </para>  
3911 - </listitem>  
3912 - </itemizedlist>  
3913 - </para>  
3914 - <para>  
3915 - Prior to qpdf version 8.1, higher level interfaces were added as  
3916 - "convenience functions" in either  
3917 - <classname>QPDF</classname> or  
3918 - <classname>QPDFObjectHandle</classname>. For compatibility, older  
3919 - convenience functions for operating with pages will remain in  
3920 - those classes even as alternatives are provided in helper classes.  
3921 - Going forward, new higher level interfaces will be provided using  
3922 - helper classes.  
3923 - </para>  
3924 - </sect1>  
3925 - <sect1 id="ref.implementation-notes">  
3926 - <title>Implementation Notes</title>  
3927 - <para>  
3928 - This section contains a few notes about QPDF's internal  
3929 - implementation, particularly around what it does when it first  
3930 - processes a file. This section is a bit of a simplification of  
3931 - what it actually does, but it could serve as a starting point to  
3932 - someone trying to understand the implementation. There is nothing  
3933 - in this section that you need to know to use the qpdf library.  
3934 - </para>  
3935 - <para>  
3936 - <classname>QPDFObject</classname> is the basic PDF Object class.  
3937 - It is an abstract base class from which are derived classes for  
3938 - each type of PDF object. Clients do not interact with Objects  
3939 - directly but instead interact with  
3940 - <classname>QPDFObjectHandle</classname>.  
3941 - </para>  
3942 - <para>  
3943 - When the <classname>QPDF</classname> class creates a new object,  
3944 - it dynamically allocates the appropriate type of  
3945 - <classname>QPDFObject</classname> and immediately hands the  
3946 - pointer to an instance of <classname>QPDFObjectHandle</classname>.  
3947 - The parser reads a token from the current file position. If the  
3948 - token is a not either a dictionary or array opener, an object is  
3949 - immediately constructed from the single token and the parser  
3950 - returns. Otherwise, the parser iterates in a special mode in which  
3951 - it accumulates objects until it finds a balancing closer. During  
3952 - this process, the "<literal>R</literal>" keyword is  
3953 - recognized and an indirect <classname>QPDFObjectHandle</classname>  
3954 - may be constructed.  
3955 - </para>  
3956 - <para>  
3957 - The <function>QPDF::resolve()</function> method, which is used to  
3958 - resolve an indirect object, may be invoked from the  
3959 - <classname>QPDFObjectHandle</classname> class. It first checks a  
3960 - cache to see whether this object has already been read. If not,  
3961 - it reads the object from the PDF file and caches it. It the  
3962 - returns the resulting <classname>QPDFObjectHandle</classname>.  
3963 - The calling object handle then replaces its  
3964 - <classname>PointerHolder&lt;QDFObject&gt;</classname> with the one  
3965 - from the newly returned <classname>QPDFObjectHandle</classname>.  
3966 - In this way, only a single copy of any direct object need exist  
3967 - and clients can access objects transparently without knowing  
3968 - caring whether they are direct or indirect objects. Additionally,  
3969 - no object is ever read from the file more than once. That means  
3970 - that only the portions of the PDF file that are actually needed  
3971 - are ever read from the input file, thus allowing the qpdf package  
3972 - to take advantage of this important design goal of PDF files.  
3973 - </para>  
3974 - <para>  
3975 - If the requested object is inside of an object stream, the object  
3976 - stream itself is first read into memory. Then the tokenizer reads  
3977 - objects from the memory stream based on the offset information  
3978 - stored in the stream. Those individual objects are cached, after  
3979 - which the temporary buffer holding the object stream contents are  
3980 - discarded. In this way, the first time an object in an object  
3981 - stream is requested, all objects in the stream are cached.  
3982 - </para>  
3983 - <para>  
3984 - The following example should clarify how  
3985 - <classname>QPDF</classname> processes a simple file.  
3986 - <itemizedlist>  
3987 - <listitem>  
3988 - <para>  
3989 - Client constructs <classname>QPDF</classname>  
3990 - <varname>pdf</varname> and calls  
3991 - <function>pdf.processFile("a.pdf");</function>.  
3992 - </para>  
3993 - </listitem>  
3994 - <listitem>  
3995 - <para>  
3996 - The <classname>QPDF</classname> class checks the beginning of  
3997 - @1@filename@1@a.pdf@2@filename@2@ for a PDF header. It then reads the  
3998 - cross reference table mentioned at the end of the file,  
3999 - ensuring that it is looking before the last  
4000 - <literal>%%EOF</literal>. After getting to  
4001 - <literal>trailer</literal> keyword, it invokes the parser.  
4002 - </para>  
4003 - </listitem>  
4004 - <listitem>  
4005 - <para>  
4006 - The parser sees "<literal>&lt;&lt;</literal>", so  
4007 - it calls itself recursively in dictionary creation mode.  
4008 - </para>  
4009 - </listitem>  
4010 - <listitem>  
4011 - <para>  
4012 - In dictionary creation mode, the parser keeps accumulating  
4013 - objects until it encounters  
4014 - "<literal>&gt;&gt;</literal>". Each object that is  
4015 - read is pushed onto a stack. If  
4016 - "<literal>R</literal>" is read, the last two  
4017 - objects on the stack are inspected. If they are integers, they  
4018 - are popped off the stack and their values are used to construct  
4019 - an indirect object handle which is then pushed onto the stack.  
4020 - When "<literal>&gt;&gt;</literal>" is finally read,  
4021 - the stack is converted into a  
4022 - <classname>QPDF_Dictionary</classname> which is placed in a  
4023 - <classname>QPDFObjectHandle</classname> and returned.  
4024 - </para>  
4025 - </listitem>  
4026 - <listitem>  
4027 - <para>  
4028 - The resulting dictionary is saved as the trailer dictionary.  
4029 - </para>  
4030 - </listitem>  
4031 - <listitem>  
4032 - <para>  
4033 - The <literal>/Prev</literal> key is searched. If present,  
4034 - <classname>QPDF</classname> seeks to that point and repeats  
4035 - except that the new trailer dictionary is not saved. If  
4036 - <literal>/Prev</literal> is not present, the initial parsing  
4037 - process is complete.  
4038 - </para>  
4039 - <para>  
4040 - If there is an encryption dictionary, the document's encryption  
4041 - parameters are initialized.  
4042 - </para>  
4043 - </listitem>  
4044 - <listitem>  
4045 - <para>  
4046 - The client requests root object. The  
4047 - <classname>QPDF</classname> class gets the value of root key  
4048 - from trailer dictionary and returns it. It is an unresolved  
4049 - indirect <classname>QPDFObjectHandle</classname>.  
4050 - </para>  
4051 - </listitem>  
4052 - <listitem>  
4053 - <para>  
4054 - The client requests the <literal>/Pages</literal> key from root  
4055 - <classname>QPDFObjectHandle</classname>. The  
4056 - <classname>QPDFObjectHandle</classname> notices that it is  
4057 - indirect so it asks <classname>QPDF</classname> to resolve it.  
4058 - <classname>QPDF</classname> looks in the object cache for an  
4059 - object with the root dictionary's object ID and generation  
4060 - number. Upon not seeing it, it checks the cross reference  
4061 - table, gets the offset, and reads the object present at that  
4062 - offset. It stores the result in the object cache and returns  
4063 - the cached result. The calling  
4064 - <classname>QPDFObjectHandle</classname> replaces its object  
4065 - pointer with the one from the resolved  
4066 - <classname>QPDFObjectHandle</classname>, verifies that it a  
4067 - valid dictionary object, and returns the (unresolved indirect)  
4068 - <classname>QPDFObject</classname> handle to the top of the  
4069 - Pages hierarchy.  
4070 - </para>  
4071 - <para>  
4072 - As the client continues to request objects, the same process is  
4073 - followed for each new requested object.  
4074 - </para>  
4075 - </listitem>  
4076 - </itemizedlist>  
4077 - </para>  
4078 - </sect1>  
4079 - <sect1 id="ref.casting">  
4080 - <title>Casting Policy</title>  
4081 - <para>  
4082 - This section describes the casting policy followed by qpdf's  
4083 - implementation. This is no concern to qpdf's end users and  
4084 - largely of no concern to people writing code that uses qpdf, but  
4085 - it could be of interest to people who are porting qpdf to a new  
4086 - platform or who are making modifications to the code.  
4087 - </para>  
4088 - <para>  
4089 - The C++ code in qpdf is free of old-style casts except where  
4090 - unavoidable (e.g. where the old-style cast is in a macro provided  
4091 - by a third-party header file). When there is a need for a cast,  
4092 - it is handled, in order of preference, by rewriting the code to  
4093 - avoid the need for a cast, calling  
4094 - <function>const_cast</function>, calling  
4095 - <function>static_cast</function>, calling  
4096 - <function>reinterpret_cast</function>, or calling some combination  
4097 - of the above. As a last resort, a compiler-specific  
4098 - <literal>#pragma</literal> may be used to suppress a warning that  
4099 - we don't want to fix. Examples may include suppressing warnings  
4100 - about the use of old-style casts in code that is shared between C  
4101 - and C++ code.  
4102 - </para>  
4103 - <para>  
4104 - The <classname>QIntC</classname> namespace, provided by  
4105 - @1@filename@1@include/qpdf/QIntC.hh@2@filename@2@, implements safe  
4106 - functions for converting between integer types. These functions do  
4107 - range checking and throw a <type>std::range_error</type>, which is  
4108 - subclass of <type>std::runtime_error</type>, if conversion from one  
4109 - integer type to another results in loss of information. There are  
4110 - many cases in which we have to move between different integer  
4111 - types because of incompatible integer types used in interoperable  
4112 - interfaces. Some are unavoidable, such as moving between sizes and  
4113 - offsets, and others are there because of old code that is too in  
4114 - entrenched to be fixable without breaking source compatibility and  
4115 - causing pain for users. QPDF is compiled with extra warnings to  
4116 - detect conversions with potential data loss, and all such cases  
4117 - should be fixed by either using a function from  
4118 - <classname>QIntC</classname> or a  
4119 - <function>static_cast</function>.  
4120 - </para>  
4121 - <para>  
4122 - When the intention is just to switch the type because of  
4123 - exchanging data between incompatible interfaces, use  
4124 - <classname>QIntC</classname>. This is the usual case. However,  
4125 - there are some cases in which we are explicitly intending to use  
4126 - the exact same bit pattern with a different type. This is most  
4127 - common when switching between signed and unsigned characters. A  
4128 - lot of qpdf's code uses unsigned characters internally, but  
4129 - <type>std::string</type> and <type>char</type> are signed. Using  
4130 - <function>QIntC::to_char</function> would be wrong for converting  
4131 - from unsigned to signed characters because a negative  
4132 - <type>char</type> value and the corresponding <type>unsigned  
4133 - char</type> value greater than 127 <emphasis>mean the same  
4134 - thing</emphasis>. There are also cases in which we use  
4135 - <function>static_cast</function> when working with bit fields  
4136 - where we are not representing a numerical value but rather a bunch  
4137 - of bits packed together in some integer type. Also note that  
4138 - <type>size_t</type> and <type>long</type> both typically differ  
4139 - between 32-bit and 64-bit environments, so sometimes an explicit  
4140 - cast may not be needed to avoid warnings on one platform but may  
4141 - be needed on another. A conversion with  
4142 - <classname>QIntC</classname> should always be used when the types  
4143 - are different even if the underlying size is the same. QPDF's CI  
4144 - build builds on 32-bit and 64-bit platforms, and the test suite is  
4145 - very thorough, so it is hard to make any of the potential errors  
4146 - here without being caught in build or test.  
4147 - </para>  
4148 - <para>  
4149 - Non-const <type>unsigned char*</type> is used in the  
4150 - <type>Pipeline</type> interface. The pipeline interface has a  
4151 - <function>write</function> call that uses <type>unsigned  
4152 - char*</type> without a <type>const</type> qualifier. The main  
4153 - reason for this is to support pipelines that make calls to  
4154 - third-party libraries, such as zlib, that don't include  
4155 - <type>const</type> in their interfaces. Unfortunately, there are  
4156 - many places in the code where it is desirable to have <type>const  
4157 - char*</type> with pipelines. None of the pipeline implementations  
4158 - in qpdf currently modify the data passed to write, and doing so  
4159 - would be counter to the intent of <type>Pipeline</type>, but there  
4160 - is nothing in the code to prevent this from being done. There are  
4161 - places in the code where <function>const_cast</function> is used  
4162 - to remove the const-ness of pointers going into  
4163 - <type>Pipeline</type>s. This could theoretically be unsafe, but  
4164 - there is adequate testing to assert that it is safe and will  
4165 - remain safe in qpdf's code.  
4166 - </para>  
4167 - </sect1>  
4168 - <sect1 id="ref.encryption">  
4169 - <title>Encryption</title>  
4170 - <para>  
4171 - Encryption is supported transparently by qpdf. When opening a PDF  
4172 - file, if an encryption dictionary exists, the  
4173 - <classname>QPDF</classname> object processes this dictionary using  
4174 - the password (if any) provided. The primary decryption key is  
4175 - computed and cached. No further access is made to the encryption  
4176 - dictionary after that time. When an object is read from a file,  
4177 - the object ID and generation of the object in which it is  
4178 - contained is always known. Using this information along with the  
4179 - stored encryption key, all stream and string objects are  
4180 - transparently decrypted. Raw encrypted objects are never stored  
4181 - in memory. This way, nothing in the library ever has to know or  
4182 - care whether it is reading an encrypted file.  
4183 - </para>  
4184 - <para>  
4185 - An interface is also provided for writing encrypted streams and  
4186 - strings given an encryption key. This is used by  
4187 - <classname>QPDFWriter</classname> when it rewrites encrypted  
4188 - files.  
4189 - </para>  
4190 - <para>  
4191 - When copying encrypted files, unless otherwise directed, qpdf will  
4192 - preserve any encryption in force in the original file. qpdf can  
4193 - do this with either the user or the owner password. There is no  
4194 - difference in capability based on which password is used. When 40  
4195 - or 128 bit encryption keys are used, the user password can be  
4196 - recovered with the owner password. With 256 keys, the user and  
4197 - owner passwords are used independently to encrypt the actual  
4198 - encryption key, so while either can be used, the owner password  
4199 - can no longer be used to recover the user password.  
4200 - </para>  
4201 - <para>  
4202 - Starting with version 4.0.0, qpdf can read files that are not  
4203 - encrypted but that contain encrypted attachments, but it cannot  
4204 - write such files. qpdf also requires the password to be specified  
4205 - in order to open the file, not just to extract attachments, since  
4206 - once the file is open, all decryption is handled transparently.  
4207 - When copying files like this while preserving encryption, qpdf  
4208 - will apply the file's encryption to everything in the file, not  
4209 - just to the attachments. When decrypting the file, qpdf will  
4210 - decrypt the attachments. In general, when copying PDF files with  
4211 - multiple encryption formats, qpdf will choose the newest format.  
4212 - The only exception to this is that clear-text metadata will be  
4213 - preserved as clear-text if it is that way in the original file.  
4214 - </para>  
4215 - <para>  
4216 - One point of confusion some people have about encrypted PDF files  
4217 - is that encryption is not the same as password protection.  
4218 - Password protected files are always encrypted, but it is also  
4219 - possible to create encrypted files that do not have passwords.  
4220 - Internally, such files use the empty string as a password, and  
4221 - most readers try the empty string first to see if it works and  
4222 - prompt for a password only if the empty string doesn't work.  
4223 - Normally such files have an empty user password and a non-empty  
4224 - owner password. In that way, if the file is opened by an ordinary  
4225 - reader without specification of password, the restrictions  
4226 - specified in the encryption dictionary can be enforced. Most users  
4227 - wouldn't even realize such a file was encrypted. Since qpdf always  
4228 - ignores the restrictions (except for the purpose of reporting what  
4229 - they are), qpdf doesn't care which password you use. QPDF will  
4230 - allow you to create PDF files with non-empty user passwords and  
4231 - empty owner passwords. Some readers will require a password when  
4232 - you open these files, and others will open the files without a  
4233 - password and not enforce restrictions. Having a non-empty user  
4234 - password and an empty owner password doesn't really make sense  
4235 - because it would mean that opening the file with the user password  
4236 - would be more restrictive than not supplying a password at all.  
4237 - QPDF also allows you to create PDF files with the same password as  
4238 - both the user and owner password. Some readers will not ever allow  
4239 - such files to be accessed without restrictions because they never  
4240 - try the password as the owner password if it works as the user  
4241 - password. Nonetheless, one of the powerful aspects of qpdf is that  
4242 - it allows you to finely specify the way encrypted files are  
4243 - created, even if the results are not useful to some readers. One  
4244 - use case for this would be for testing a PDF reader to ensure that  
4245 - it handles odd configurations of input files.  
4246 - </para>  
4247 - </sect1>  
4248 - <sect1 id="ref.random-numbers">  
4249 - <title>Random Number Generation</title>  
4250 - <para>  
4251 - QPDF generates random numbers to support generation of encrypted  
4252 - data. Starting in qpdf 10.0.0, qpdf uses the crypto provider as  
4253 - its source of random numbers. Older versions used the OS-provided  
4254 - source of secure random numbers or, if allowed at build time,  
4255 - insecure random numbers from stdlib. Starting with version 5.1.0,  
4256 - you can disable use of OS-provided secure random numbers at build  
4257 - time. This is especially useful on Windows if you want to avoid a  
4258 - dependency on Microsoft's cryptography API. You can also supply  
4259 - your own random data provider. For details on how to do this,  
4260 - please refer to the top-level README.md file in the source  
4261 - distribution and to comments in @1@filename@1@QUtil.hh@2@filename@2@.  
4262 - </para>  
4263 - </sect1>  
4264 - <sect1 id="ref.adding-and-remove-pages">  
4265 - <title>Adding and Removing Pages</title>  
4266 - <para>  
4267 - While qpdf's API has supported adding and modifying objects for  
4268 - some time, version 3.0 introduces specific methods for adding and  
4269 - removing pages. These are largely convenience routines that  
4270 - handle two tricky issues: pushing inheritable resources from the  
4271 - <literal>/Pages</literal> tree down to individual pages and  
4272 - manipulation of the <literal>/Pages</literal> tree itself. For  
4273 - details, see <function>addPage</function> and surrounding methods  
4274 - in @1@filename@1@QPDF.hh@2@filename@2@.  
4275 - </para>  
4276 - </sect1>  
4277 - <sect1 id="ref.reserved-objects">  
4278 - <title>Reserving Object Numbers</title>  
4279 - <para>  
4280 - Version 3.0 of qpdf introduced the concept of reserved objects.  
4281 - These are seldom needed for ordinary operations, but there are  
4282 - cases in which you may want to add a series of indirect objects  
4283 - with references to each other to a <classname>QPDF</classname>  
4284 - object. This causes a problem because you can't determine the  
4285 - object ID that a new indirect object will have until you add it to  
4286 - the <classname>QPDF</classname> object with  
4287 - <function>QPDF::makeIndirectObject</function>. The only way to  
4288 - add two mutually referential objects to a  
4289 - <classname>QPDF</classname> object prior to version 3.0 would be  
4290 - to add the new objects first and then make them refer to each  
4291 - other after adding them. Now it is possible to create a  
4292 - @1@firstterm@1@reserved object@2@firstterm@2@ using  
4293 - <function>QPDFObjectHandle::newReserved</function>. This is an  
4294 - indirect object that stays "unresolved" even if it is  
4295 - queried for its type. So now, if you want to create a set of  
4296 - mutually referential objects, you can create reservations for each  
4297 - one of them and use those reservations to construct the  
4298 - references. When finished, you can call  
4299 - <function>QPDF::replaceReserved</function> to replace the reserved  
4300 - objects with the real ones. This functionality will never be  
4301 - needed by most applications, but it is used internally by QPDF  
4302 - when copying objects from other PDF files, as discussed in <xref  
4303 - linkend="ref.foreign-objects"/>. For an example of how to use  
4304 - reserved objects, search for <function>newReserved</function> in  
4305 - @1@filename@1@test_driver.cc@2@filename@2@ in qpdf's sources.  
4306 - </para>  
4307 - </sect1>  
4308 - <sect1 id="ref.foreign-objects">  
4309 - <title>Copying Objects From Other PDF Files</title>  
4310 - <para>  
4311 - Version 3.0 of qpdf introduced the ability to copy objects into a  
4312 - <classname>QPDF</classname> object from a different  
4313 - <classname>QPDF</classname> object, which we refer to as  
4314 - @1@firstterm@1@foreign objects@2@firstterm@2@. This allows arbitrary  
4315 - merging of PDF files. The "from"  
4316 - <classname>QPDF</classname> object must remain valid after the  
4317 - copy as discussed in the note below. The @1@command@1@qpdf@2@command@2@  
4318 - command-line tool provides limited support for basic page  
4319 - selection, including merging in pages from other files, but the  
4320 - library's API makes it possible to implement arbitrarily complex  
4321 - merging operations. The main method for copying foreign objects is  
4322 - <function>QPDF::copyForeignObject</function>. This takes an  
4323 - indirect object from another <classname>QPDF</classname> and  
4324 - copies it recursively into this object while preserving all object  
4325 - structure, including circular references. This means you can add a  
4326 - direct object that you create from scratch to a  
4327 - <classname>QPDF</classname> object with  
4328 - <function>QPDF::makeIndirectObject</function>, and you can add an  
4329 - indirect object from another file with  
4330 - <function>QPDF::copyForeignObject</function>. The fact that  
4331 - <function>QPDF::makeIndirectObject</function> does not  
4332 - automatically detect a foreign object and copy it is an explicit  
4333 - design decision. Copying a foreign object seems like a  
4334 - sufficiently significant thing to do that it should be done  
4335 - explicitly.  
4336 - </para>  
4337 - <para>  
4338 - The other way to copy foreign objects is by passing a page from  
4339 - one <classname>QPDF</classname> to another by calling  
4340 - <function>QPDF::addPage</function>. In contrast to  
4341 - <function>QPDF::makeIndirectObject</function>, this method  
4342 - automatically distinguishes between indirect objects in the  
4343 - current file, foreign objects, and direct objects.  
4344 - </para>  
4345 - <para>  
4346 - Please note: when you copy objects from one  
4347 - <classname>QPDF</classname> to another, the source  
4348 - <classname>QPDF</classname> object must remain valid until you  
4349 - have finished with the destination object. This is because the  
4350 - original object is still used to retrieve any referenced stream  
4351 - data from the copied object.  
4352 - </para>  
4353 - </sect1>  
4354 - <sect1 id="ref.rewriting">  
4355 - <title>Writing PDF Files</title>  
4356 - <para>  
4357 - The qpdf library supports file writing of  
4358 - <classname>QPDF</classname> objects to PDF files through the  
4359 - <classname>QPDFWriter</classname> class. The  
4360 - <classname>QPDFWriter</classname> class has two writing modes: one  
4361 - for non-linearized files, and one for linearized files. See <xref  
4362 - linkend="ref.linearization"/> for a description of linearization  
4363 - is implemented. This section describes how we write  
4364 - non-linearized files including the creation of QDF files (see  
4365 - <xref linkend="ref.qdf"/>.  
4366 - </para>  
4367 - <para>  
4368 - This outline was written prior to implementation and is not  
4369 - exactly accurate, but it provides a correct "notional"  
4370 - idea of how writing works. Look at the code in  
4371 - <classname>QPDFWriter</classname> for exact details.  
4372 - <itemizedlist>  
4373 - <listitem>  
4374 - <para>  
4375 - Initialize state:  
4376 - <itemizedlist>  
4377 - <listitem>  
4378 - <para>  
4379 - next object number = 1  
4380 - </para>  
4381 - </listitem>  
4382 - <listitem>  
4383 - <para>  
4384 - object queue = empty  
4385 - </para>  
4386 - </listitem>  
4387 - <listitem>  
4388 - <para>  
4389 - renumber table: old object id/generation to new id/0 = empty  
4390 - </para>  
4391 - </listitem>  
4392 - <listitem>  
4393 - <para>  
4394 - xref table: new id -> offset = empty  
4395 - </para>  
4396 - </listitem>  
4397 - </itemizedlist>  
4398 - </para>  
4399 - </listitem>  
4400 - <listitem>  
4401 - <para>  
4402 - Create a QPDF object from a file.  
4403 - </para>  
4404 - </listitem>  
4405 - <listitem>  
4406 - <para>  
4407 - Write header for new PDF file.  
4408 - </para>  
4409 - </listitem>  
4410 - <listitem>  
4411 - <para>  
4412 - Request the trailer dictionary.  
4413 - </para>  
4414 - </listitem>  
4415 - <listitem>  
4416 - <para>  
4417 - For each value that is an indirect object, grab the next object  
4418 - number (via an operation that returns and increments the  
4419 - number). Map object to new number in renumber table. Push  
4420 - object onto queue.  
4421 - </para>  
4422 - </listitem>  
4423 - <listitem>  
4424 - <para>  
4425 - While there are more objects on the queue:  
4426 - <itemizedlist>  
4427 - <listitem>  
4428 - <para>  
4429 - Pop queue.  
4430 - </para>  
4431 - </listitem>  
4432 - <listitem>  
4433 - <para>  
4434 - Look up object's new number <emphasis>n</emphasis> in the  
4435 - renumbering table.  
4436 - </para>  
4437 - </listitem>  
4438 - <listitem>  
4439 - <para>  
4440 - Store current offset into xref table.  
4441 - </para>  
4442 - </listitem>  
4443 - <listitem>  
4444 - <para>  
4445 - Write <literal>@1@replaceable@1@n@2@replaceable@2@ 0 obj</literal>.  
4446 - </para>  
4447 - </listitem>  
4448 - <listitem>  
4449 - <para>  
4450 - If object is null, whether direct or indirect, write out  
4451 - null, thus eliminating unresolvable indirect object  
4452 - references.  
4453 - </para>  
4454 - </listitem>  
4455 - <listitem>  
4456 - <para>  
4457 - If the object is a stream stream, write stream contents,  
4458 - piped through any filters as required, to a memory buffer.  
4459 - Use this buffer to determine the stream length.  
4460 - </para>  
4461 - </listitem>  
4462 - <listitem>  
4463 - <para>  
4464 - If object is not a stream, array, or dictionary, write out  
4465 - its contents.  
4466 - </para>  
4467 - </listitem>  
4468 - <listitem>  
4469 - <para>  
4470 - If object is an array or dictionary (including stream),  
4471 - traverse its elements (for array) or values (for  
4472 - dictionaries), handling recursive dictionaries and arrays,  
4473 - looking for indirect objects. When an indirect object is  
4474 - found, if it is not resolvable, ignore. (This case is  
4475 - handled when writing it out.) Otherwise, look it up in the  
4476 - renumbering table. If not found, grab the next available  
4477 - object number, assign to the referenced object in the  
4478 - renumbering table, and push the referenced object onto the  
4479 - queue. As a special case, when writing out a stream  
4480 - dictionary, replace length, filters, and decode parameters  
4481 - as required.  
4482 - </para>  
4483 - <para>  
4484 - Write out dictionary or array, replacing any unresolvable  
4485 - indirect object references with null (pdf spec says  
4486 - reference to non-existent object is legal and resolves to  
4487 - null) and any resolvable ones with references to the  
4488 - renumbered objects.  
4489 - </para>  
4490 - </listitem>  
4491 - <listitem>  
4492 - <para>  
4493 - If the object is a stream, write  
4494 - <literal>stream\n</literal>, the stream contents (from the  
4495 - memory buffer), and <literal>\nendstream\n</literal>.  
4496 - </para>  
4497 - </listitem>  
4498 - <listitem>  
4499 - <para>  
4500 - When done, write <literal>endobj</literal>.  
4501 - </para>  
4502 - </listitem>  
4503 - </itemizedlist>  
4504 - </para>  
4505 - </listitem>  
4506 - </itemizedlist>  
4507 - </para>  
4508 - <para>  
4509 - Once we have finished the queue, all referenced objects will have  
4510 - been written out and all deleted objects or unreferenced objects  
4511 - will have been skipped. The new cross-reference table will  
4512 - contain an offset for every new object number from 1 up to the  
4513 - number of objects written. This can be used to write out a new  
4514 - xref table. Finally we can write out the trailer dictionary with  
4515 - appropriately computed /ID (see spec, 8.3, File Identifiers), the  
4516 - cross reference table offset, and <literal>%%EOF</literal>.  
4517 - </para>  
4518 - </sect1>  
4519 - <sect1 id="ref.filtered-streams">  
4520 - <title>Filtered Streams</title>  
4521 - <para>  
4522 - Support for streams is implemented through the  
4523 - <classname>Pipeline</classname> interface which was designed for  
4524 - this package.  
4525 - </para>  
4526 - <para>  
4527 - When reading streams, create a series of  
4528 - <classname>Pipeline</classname> objects. The  
4529 - <classname>Pipeline</classname> abstract base requires  
4530 - implementation <function>write()</function> and  
4531 - <function>finish()</function> and provides an implementation of  
4532 - <function>getNext()</function>. Each pipeline object, upon  
4533 - receiving data, does whatever it is going to do and then writes  
4534 - the data (possibly modified) to its successor. Alternatively, a  
4535 - pipeline may be an end-of-the-line pipeline that does something  
4536 - like store its output to a file or a memory buffer ignoring a  
4537 - successor. For additional details, look at  
4538 - @1@filename@1@Pipeline.hh@2@filename@2@.  
4539 - </para>  
4540 - <para>  
4541 - <classname>QPDF</classname> can read raw or filtered streams.  
4542 - When reading a filtered stream, the <classname>QPDF</classname>  
4543 - class creates a <classname>Pipeline</classname> object for one of  
4544 - each appropriate filter object and chains them together. The last  
4545 - filter should write to whatever type of output is required. The  
4546 - <classname>QPDF</classname> class has an interface to write raw or  
4547 - filtered stream contents to a given pipeline.  
4548 - </para>  
4549 - </sect1>  
4550 - <sect1 id="ref.object-accessors">  
4551 - <title>Object Accessor Methods</title>  
4552 - <para>  
4553 - @1@comment: This section is referenced in QPDFObjectHandle.hh @1@  
4554 - </para>  
4555 - <para>  
4556 - For general information about how to access instances of  
4557 - <classname>QPDFObjectHandle</classname>, please see the comments  
4558 - in @1@filename@1@QPDFObjectHandle.hh@2@filename@2@. Search for  
4559 - "Accessor methods". This section provides a more  
4560 - in-depth discussion of the behavior and the rationale for the  
4561 - behavior.  
4562 - </para>  
4563 - <para>  
4564 - <emphasis>Why were type errors made into warnings?</emphasis> When  
4565 - type checks were introduced into qpdf in the early days, it was  
4566 - expected that type errors would only occur as a result of  
4567 - programmer error. However, in practice, type errors would occur  
4568 - with malformed PDF files because of assumptions made in code,  
4569 - including code within the qpdf library and code written by library  
4570 - users. The most common case would be chaining calls to  
4571 - <function>getKey()</function> to access keys deep within a  
4572 - dictionary. In many cases, qpdf would be able to recover from  
4573 - these situations, but the old behavior often resulted in crashes  
4574 - rather than graceful recovery. For this reason, the errors were  
4575 - changed to warnings.  
4576 - </para>  
4577 - <para>  
4578 - <emphasis>Why even warn about type errors when the user can't  
4579 - usually do anything about them?</emphasis> Type warnings are  
4580 - extremely valuable during development. Since it's impossible to  
4581 - catch at compile time things like typos in dictionary key names or  
4582 - logic errors around what the structure of a PDF file might be, the  
4583 - presence of type warnings can save lots of developer time. They  
4584 - have also proven useful in exposing issues in qpdf itself that  
4585 - would have otherwise gone undetected.  
4586 - </para>  
4587 - <para>  
4588 - <emphasis>Can there be a type-safe  
4589 - <classname>QPDFObjectHandle</classname>?</emphasis> It would be  
4590 - great if <classname>QPDFObjectHandle</classname> could be more  
4591 - strongly typed so that you'd have to have check that something was  
4592 - of a particular type before calling type-specific accessor  
4593 - methods. However, implementing this at this stage of the library's  
4594 - history would be quite difficult, and it would make a the common  
4595 - pattern of drilling into an object no longer work. While it would  
4596 - be possible to have a parallel interface, it would create a lot of  
4597 - extra code. If qpdf were written in a language like rust, an  
4598 - interface like this would make a lot of sense, but, for a variety  
4599 - of reasons, the qpdf API is consistent with other APIs of its  
4600 - time, relying on exception handling to catch errors. The  
4601 - underlying PDF objects are inherently not type-safe. Forcing  
4602 - stronger type safety in <classname>QPDFObjectHandle</classname>  
4603 - would ultimately cause a lot more code to have to be written and  
4604 - would like make software that uses qpdf more brittle, and even so,  
4605 - checks would have to occur at runtime.  
4606 - </para>  
4607 - <para>  
4608 - <emphasis>Why do type errors sometimes raise  
4609 - exceptions?</emphasis> The way warnings work in qpdf requires a  
4610 - <classname>QPDF</classname> object to be associated with an object  
4611 - handle for a warning to be issued. It would be nice if this could  
4612 - be fixed, but it would require major changes to the API. Rather  
4613 - than throwing away these conditions, we convert them to  
4614 - exceptions. It's not that bad though. Since any object handle that  
4615 - was read from a file has an associated <classname>QPDF</classname>  
4616 - object, it would only be type errors on objects that were created  
4617 - explicitly that would cause exceptions, and in that case, type  
4618 - errors are much more likely to be the result of a coding error  
4619 - than invalid input.  
4620 - </para>  
4621 - <para>  
4622 - <emphasis>Why does the behavior of a type exception differ between  
4623 - the C and C++ API?</emphasis> There is no way to throw and catch  
4624 - exceptions in C short of something like  
4625 - <function>setjmp</function> and <function>longjmp</function>, and  
4626 - that approach is not portable across language barriers. Since the  
4627 - C API is often used from other languages, it's important to keep  
4628 - things as simple as possible. Starting in qpdf 10.5, exceptions  
4629 - that used to crash code using the C API will be written to stderr  
4630 - by default, and it is possible to register an error handler.  
4631 - There's no reason that the error handler can't simulate exception  
4632 - handling in some way, such as by using <function>setjmp</function>  
4633 - and <function>longjmp</function> or by setting some variable that  
4634 - can be checked after library calls are made. In retrospect, it  
4635 - might have been better if the C API object handle methods returned  
4636 - error codes like the other methods and set return values in  
4637 - passed-in pointers, but this would complicate both the  
4638 - implementation and the use of the library for a case that is  
4639 - actually quite rare and largely avoidable.  
4640 - </para>  
4641 - </sect1>  
4642 - </chapter>  
4643 - <chapter id="ref.linearization">  
4644 - <title>Linearization</title>  
4645 - <para>  
4646 - This chapter describes how <classname>QPDF</classname> and  
4647 - <classname>QPDFWriter</classname> implement creation and processing  
4648 - of linearized PDFS.  
4649 - </para>  
4650 - <sect1 id="ref.linearization-strategy">  
4651 - <title>Basic Strategy for Linearization</title>  
4652 - <para>  
4653 - To avoid the incestuous problem of having the qpdf library  
4654 - validate its own linearized files, we have a special linearized  
4655 - file checking mode which can be invoked via @1@command@1@qpdf  
4656 - --check-linearization@2@command@2@ (or @1@command@1@qpdf  
4657 - --check@2@command@2@). This mode reads the linearization parameter  
4658 - dictionary and the hint streams and validates that object  
4659 - ordering, parameters, and hint stream contents are correct. The  
4660 - validation code was first tested against linearized files created  
4661 - by external tools (Acrobat and pdlin) and then used to validate  
4662 - files created by <classname>QPDFWriter</classname> itself.  
4663 - </para>  
4664 - </sect1>  
4665 - <sect1 id="ref.linearized.preparation">  
4666 - <title>Preparing For Linearization</title>  
4667 - <para>  
4668 - Before creating a linearized PDF file from any other PDF file, the  
4669 - PDF file must be altered such that all page attributes are  
4670 - propagated down to the page level (and not inherited from parents  
4671 - in the <literal>/Pages</literal> tree). We also have to know  
4672 - which objects refer to which other objects, being concerned with  
4673 - page boundaries and a few other cases. We refer to this part of  
4674 - preparing the PDF file as @1@firstterm@1@optimization@2@firstterm@2@,  
4675 - discussed in <xref linkend="ref.optimization"/>. Note the, in  
4676 - this context, the term @1@firstterm@1@optimization@2@firstterm@2@ is a  
4677 - qpdf term, and the term @1@firstterm@1@linearization@2@firstterm@2@ is a  
4678 - term from the PDF specification. Do not be confused by the fact  
4679 - that many applications refer to linearization as optimization or  
4680 - web optimization.  
4681 - </para>  
4682 - <para>  
4683 - When creating linearized PDF files from optimized PDF files, there  
4684 - are really only a few issues that need to be dealt with:  
4685 - <itemizedlist>  
4686 - <listitem>  
4687 - <para>  
4688 - Creation of hints tables  
4689 - </para>  
4690 - </listitem>  
4691 - <listitem>  
4692 - <para>  
4693 - Placing objects in the correct order  
4694 - </para>  
4695 - </listitem>  
4696 - <listitem>  
4697 - <para>  
4698 - Filling in offsets and byte sizes  
4699 - </para>  
4700 - </listitem>  
4701 - </itemizedlist>  
4702 - </para>  
4703 - </sect1>  
4704 - <sect1 id="ref.optimization">  
4705 - <title>Optimization</title>  
4706 - <para>  
4707 - In order to perform various operations such as linearization and  
4708 - splitting files into pages, it is necessary to know which objects  
4709 - are referenced by which pages, page thumbnails, and root and  
4710 - trailer dictionary keys. It is also necessary to ensure that all  
4711 - page-level attributes appear directly at the page level and are  
4712 - not inherited from parents in the pages tree.  
4713 - </para>  
4714 - <para>  
4715 - We refer to the process of enforcing these constraints as  
4716 - @1@firstterm@1@optimization@2@firstterm@2@. As mentioned above, note  
4717 - that some applications refer to linearization as optimization.  
4718 - Although this optimization was initially motivated by the need to  
4719 - create linearized files, we are using these terms separately.  
4720 - </para>  
4721 - <para>  
4722 - PDF file optimization is implemented in the  
4723 - @1@filename@1@QPDF_optimization.cc@2@filename@2@ source file. That file  
4724 - is richly commented and serves as the primary reference for the  
4725 - optimization process.  
4726 - </para>  
4727 - <para>  
4728 - After optimization has been completed, the private member  
4729 - variables <varname>obj_user_to_objects</varname> and  
4730 - <varname>object_to_obj_users</varname> in  
4731 - <classname>QPDF</classname> have been populated. Any object that  
4732 - has more than one value in the  
4733 - <varname>object_to_obj_users</varname> table is shared. Any  
4734 - object that has exactly one value in the  
4735 - <varname>object_to_obj_users</varname> table is private. To find  
4736 - all the private objects in a page or a trailer or root dictionary  
4737 - key, one merely has make this determination for each element in  
4738 - the <varname>obj_user_to_objects</varname> table for the given  
4739 - page or key.  
4740 - </para>  
4741 - <para>  
4742 - Note that pages and thumbnails have different object user types,  
4743 - so the above test on a page will not include objects referenced by  
4744 - the page's thumbnail dictionary and nothing else.  
4745 - </para>  
4746 - </sect1>  
4747 - <sect1 id="ref.linearization.writing">  
4748 - <title>Writing Linearized Files</title>  
4749 - <para>  
4750 - We will create files with only primary hint streams. We will  
4751 - never write overflow hint streams. (As of PDF version 1.4,  
4752 - Acrobat doesn't either, and they are never necessary.) The hint  
4753 - streams contain offset information to objects that point to where  
4754 - they would be if the hint stream were not present. This means  
4755 - that we have to calculate all object positions before we can  
4756 - generate and write the hint table. This means that we have to  
4757 - generate the file in two passes. To make this reliable,  
4758 - <classname>QPDFWriter</classname> in linearization mode invokes  
4759 - exactly the same code twice to write the file to a pipeline.  
4760 - </para>  
4761 - <para>  
4762 - In the first pass, the target pipeline is a count pipeline chained  
4763 - to a discard pipeline. The count pipeline simply passes its data  
4764 - through to the next pipeline in the chain but can return the  
4765 - number of bytes passed through it at any intermediate point. The  
4766 - discard pipeline is an end of line pipeline that just throws its  
4767 - data away. The hint stream is not written and dummy values with  
4768 - adequate padding are stored in the first cross reference table,  
4769 - linearization parameter dictionary, and /Prev key of the first  
4770 - trailer dictionary. All the offset, length, object renumbering  
4771 - information, and anything else we need for the second pass is  
4772 - stored.  
4773 - </para>  
4774 - <para>  
4775 - At the end of the first pass, this information is passed to the  
4776 - <classname>QPDF</classname> class which constructs a compressed  
4777 - hint stream in a memory buffer and returns it.  
4778 - <classname>QPDFWriter</classname> uses this information to write a  
4779 - complete hint stream object into a memory buffer. At this point,  
4780 - the length of the hint stream is known.  
4781 - </para>  
4782 - <para>  
4783 - In the second pass, the end of the pipeline chain is a regular  
4784 - file instead of a discard pipeline, and we have known values for  
4785 - all the offsets and lengths that we didn't have in the first pass.  
4786 - We have to adjust offsets that appear after the start of the hint  
4787 - stream by the length of the hint stream, which is known. Anything  
4788 - that is of variable length is padded, with the padding code  
4789 - surrounding any writing code that differs in the two passes. This  
4790 - ensures that changes to the way things are represented never  
4791 - results in offsets that were gathered during the first pass  
4792 - becoming incorrect for the second pass.  
4793 - </para>  
4794 - <para>  
4795 - Using this strategy, we can write linearized files to a  
4796 - non-seekable output stream with only a single pass to disk or  
4797 - wherever the output is going.  
4798 - </para>  
4799 - </sect1>  
4800 - <sect1 id="ref.linearization-data">  
4801 - <title>Calculating Linearization Data</title>  
4802 - <para>  
4803 - Once a file is optimized, we have information about which objects  
4804 - access which other objects. We can then process these tables to  
4805 - decide which part (as described in "Linearized PDF Document  
4806 - Structure" in the PDF specification) each object is  
4807 - contained within. This tells us the exact order in which objects  
4808 - are written. The <classname>QPDFWriter</classname> class asks for  
4809 - this information and enqueues objects for writing in the proper  
4810 - order. It also turns on a check that causes an exception to be  
4811 - thrown if an object is encountered that has not already been  
4812 - queued. (This could happen only if there were a bug in the  
4813 - traversal code used to calculate the linearization data.)  
4814 - </para>  
4815 - </sect1>  
4816 - <sect1 id="ref.linearization-issues">  
4817 - <title>Known Issues with Linearization</title>  
4818 - <para>  
4819 - There are a handful of known issues with this linearization code.  
4820 - These issues do not appear to impact the behavior of linearized  
4821 - files which still work as intended: it is possible for a web  
4822 - browser to begin to display them before they are fully  
4823 - downloaded. In fact, it seems that various other programs that  
4824 - create linearized files have many of these same issues. These  
4825 - items make reference to terminology used in the linearization  
4826 - appendix of the PDF specification.  
4827 - <itemizedlist>  
4828 - <listitem>  
4829 - <para>  
4830 - Thread Dictionary information keys appear in part 4 with the  
4831 - rest of Threads instead of in part 9. Objects in part 9 are  
4832 - not grouped together functionally.  
4833 - </para>  
4834 - </listitem>  
4835 - <listitem>  
4836 - <para>  
4837 - We are not calculating numerators for shared object positions  
4838 - within content streams or interleaving them within content  
4839 - streams.  
4840 - </para>  
4841 - </listitem>  
4842 - <listitem>  
4843 - <para>  
4844 - We generate only page offset, shared object, and outline hint  
4845 - tables. It would be relatively easy to add some additional  
4846 - tables. We gather most of the information needed to create  
4847 - thumbnail hint tables. There are comments in the code about  
4848 - this.  
4849 - </para>  
4850 - </listitem>  
4851 - </itemizedlist>  
4852 - </para>  
4853 - </sect1>  
4854 - <sect1 id="ref.linearization-debugging">  
4855 - <title>Debugging Note</title>  
4856 - <para>  
4857 - The @1@command@1@qpdf --show-linearization@2@command@2@ command can show  
4858 - the complete contents of linearization hint streams. To look at  
4859 - the raw data, you can extract the filtered contents of the  
4860 - linearization hint tables using @1@command@1@qpdf --show-object=n  
4861 - --filtered-stream-data@2@command@2@. Then, to convert this into a  
4862 - bit stream (since linearization tables are bit streams written  
4863 - without regard to byte boundaries), you can pipe the resulting  
4864 - data through the following perl code:  
4865 -  
4866 - <programlisting>use bytes;  
4867 -binmode STDIN;  
4868 -undef $/;  
4869 -my $a = &lt;STDIN&gt;;  
4870 -my @ch = split(//, $a);  
4871 -map { printf("%08b", ord($_)) } @ch;  
4872 -print "\n";  
4873 -</programlisting>  
4874 - </para>  
4875 - </sect1>  
4876 - </chapter>  
4877 - <chapter id="ref.object-and-xref-streams">  
4878 - <title>Object and Cross-Reference Streams</title>  
4879 - <para>  
4880 - This chapter provides information about the implementation of  
4881 - object stream and cross-reference stream support in qpdf.  
4882 - </para>  
4883 - <sect1 id="ref.object-streams">  
4884 - <title>Object Streams</title>  
4885 - <para>  
4886 - Object streams can contain any regular object except the  
4887 - following:  
4888 - <itemizedlist>  
4889 - <listitem>  
4890 - <para>  
4891 - stream objects  
4892 - </para>  
4893 - </listitem>  
4894 - <listitem>  
4895 - <para>  
4896 - objects with generation &gt; 0  
4897 - </para>  
4898 - </listitem>  
4899 - <listitem>  
4900 - <para>  
4901 - the encryption dictionary  
4902 - </para>  
4903 - </listitem>  
4904 - <listitem>  
4905 - <para>  
4906 - objects containing the /Length of another stream  
4907 - </para>  
4908 - </listitem>  
4909 - </itemizedlist>  
4910 - In addition, Adobe reader (at least as of version 8.0.0) appears  
4911 - to not be able to handle having the document catalog appear in an  
4912 - object stream if the file is encrypted, though this is not  
4913 - specifically disallowed by the specification.  
4914 - </para>  
4915 - <para>  
4916 - There are additional restrictions for linearized files. See <xref  
4917 - linkend="ref.object-streams-linearization"/>for details.  
4918 - </para>  
4919 - <para>  
4920 - The PDF specification refers to objects in object streams as  
4921 - "compressed objects" regardless of whether the object  
4922 - stream is compressed.  
4923 - </para>  
4924 - <para>  
4925 - The generation number of every object in an object stream must be  
4926 - zero. It is possible to delete and replace an object in an object  
4927 - stream with a regular object.  
4928 - </para>  
4929 - <para>  
4930 - The object stream dictionary has the following keys:  
4931 - <itemizedlist>  
4932 - <listitem>  
4933 - <para>  
4934 - <literal>/N</literal>: number of objects  
4935 - </para>  
4936 - </listitem>  
4937 - <listitem>  
4938 - <para>  
4939 - <literal>/First</literal>: byte offset of first object  
4940 - </para>  
4941 - </listitem>  
4942 - <listitem>  
4943 - <para>  
4944 - <literal>/Extends</literal>: indirect reference to stream that  
4945 - this extends  
4946 - </para>  
4947 - </listitem>  
4948 - </itemizedlist>  
4949 - </para>  
4950 - <para>  
4951 - Stream collections are formed with <literal>/Extends</literal>.  
4952 - They must form a directed acyclic graph. These can be used for  
4953 - semantic information and are not meaningful to the PDF document's  
4954 - syntactic structure. Although qpdf preserves stream collections,  
4955 - it never generates them and doesn't make use of this information  
4956 - in any way.  
4957 - </para>  
4958 - <para>  
4959 - The specification recommends limiting the number of objects in  
4960 - object stream for efficiency in reading and decoding. Acrobat 6  
4961 - uses no more than 100 objects per object stream for linearized  
4962 - files and no more 200 objects per stream for non-linearized files.  
4963 - <classname>QPDFWriter</classname>, in object stream generation  
4964 - mode, never puts more than 100 objects in an object stream.  
4965 - </para>  
4966 - <para>  
4967 - Object stream contents consists of <emphasis>N</emphasis> pairs of  
4968 - integers, each of which is the object number and the byte offset  
4969 - of the object relative to the first object in the stream, followed  
4970 - by the objects themselves, concatenated.  
4971 - </para>  
4972 - </sect1>  
4973 - <sect1 id="ref.xref-streams">  
4974 - <title>Cross-Reference Streams</title>  
4975 - <para>  
4976 - For non-hybrid files, the value following  
4977 - <literal>startxref</literal> is the byte offset to the xref stream  
4978 - rather than the word <literal>xref</literal>.  
4979 - </para>  
4980 - <para>  
4981 - For hybrid files (files containing both xref tables and  
4982 - cross-reference streams), the xref table's trailer dictionary  
4983 - contains the key <literal>/XRefStm</literal> whose value is the  
4984 - byte offset to a cross-reference stream that supplements the xref  
4985 - table. A PDF 1.5-compliant application should read the xref table  
4986 - first. Then it should replace any object that it has already seen  
4987 - with any defined in the xref stream. Then it should follow any  
4988 - <literal>/Prev</literal> pointer in the original xref table's  
4989 - trailer dictionary. The specification is not clear about what  
4990 - should be done, if anything, with a <literal>/Prev</literal>  
4991 - pointer in the xref stream referenced by an xref table. The  
4992 - <classname>QPDF</classname> class ignores it, which is probably  
4993 - reasonable since, if this case were to appear for any sensible PDF  
4994 - file, the previous xref table would probably have a corresponding  
4995 - <literal>/XRefStm</literal> pointer of its own. For example, if a  
4996 - hybrid file were appended, the appended section would have its own  
4997 - xref table and <literal>/XRefStm</literal>. The appended xref  
4998 - table would point to the previous xref table which would point the  
4999 - <literal>/XRefStm</literal>, meaning that the new  
5000 - <literal>/XRefStm</literal> doesn't have to point to it.  
5001 - </para>  
5002 - <para>  
5003 - Since xref streams must be read very early, they may not be  
5004 - encrypted, and the may not contain indirect objects for keys  
5005 - required to read them, which are these:  
5006 - <itemizedlist>  
5007 - <listitem>  
5008 - <para>  
5009 - <literal>/Type</literal>: value <literal>/XRef</literal>  
5010 - </para>  
5011 - </listitem>  
5012 - <listitem>  
5013 - <para>  
5014 - <literal>/Size</literal>: value <emphasis>n+1</emphasis>: where  
5015 - <emphasis>n</emphasis> is highest object number (same as  
5016 - <literal>/Size</literal> in the trailer dictionary)  
5017 - </para>  
5018 - </listitem>  
5019 - <listitem>  
5020 - <para>  
5021 - <literal>/Index</literal> (optional): value  
5022 - <literal>[@1@replaceable@1@n count@2@replaceable@2@ ...]</literal>  
5023 - used to determine which objects' information is stored in this  
5024 - stream. The default is <literal>[0 /Size]</literal>.  
5025 - </para>  
5026 - </listitem>  
5027 - <listitem>  
5028 - <para>  
5029 - <literal>/Prev</literal>: value  
5030 - @1@replaceable@1@offset@2@replaceable@2@: byte offset of previous xref  
5031 - stream (same as <literal>/Prev</literal> in the trailer  
5032 - dictionary)  
5033 - </para>  
5034 - </listitem>  
5035 - <listitem>  
5036 - <para>  
5037 - <literal>/W [...]</literal>: sizes of each field in the xref  
5038 - table  
5039 - </para>  
5040 - </listitem>  
5041 - </itemizedlist>  
5042 - </para>  
5043 - <para>  
5044 - The other fields in the xref stream, which may be indirect if  
5045 - desired, are the union of those from the xref table's trailer  
5046 - dictionary.  
5047 - </para>  
5048 - <sect2 id="ref.xref-stream-data">  
5049 - <title>Cross-Reference Stream Data</title>  
5050 - <para>  
5051 - The stream data is binary and encoded in big-endian byte order.  
5052 - Entries are concatenated, and each entry has a length equal to  
5053 - the total of the entries in <literal>/W</literal> above. Each  
5054 - entry consists of one or more fields, the first of which is the  
5055 - type of the field. The number of bytes for each field is given  
5056 - by <literal>/W</literal> above. A 0 in <literal>/W</literal>  
5057 - indicates that the field is omitted and has the default value.  
5058 - The default value for the field type is  
5059 - "<literal>1</literal>". All other default values are  
5060 - "<literal>0</literal>".  
5061 - </para>  
5062 - <para>  
5063 - PDF 1.5 has three field types:  
5064 - <itemizedlist>  
5065 - <listitem>  
5066 - <para>  
5067 - 0: for free objects. Format: <literal>0 obj  
5068 - next-generation</literal>, same as the free table in a  
5069 - traditional cross-reference table  
5070 - </para>  
5071 - </listitem>  
5072 - <listitem>  
5073 - <para>  
5074 - 1: regular non-compressed object. Format: <literal>1 offset  
5075 - generation</literal>  
5076 - </para>  
5077 - </listitem>  
5078 - <listitem>  
5079 - <para>  
5080 - 2: for objects in object streams. Format: <literal>2  
5081 - object-stream-number index</literal>, the number of object  
5082 - stream containing the object and the index within the object  
5083 - stream of the object.  
5084 - </para>  
5085 - </listitem>  
5086 - </itemizedlist>  
5087 - </para>  
5088 - <para>  
5089 - It seems standard to have the first entry in the table be  
5090 - <literal>0 0 0</literal> instead of <literal>0 0 ffff</literal>  
5091 - if there are no deleted objects.  
5092 - </para>  
5093 - </sect2>  
5094 - </sect1>  
5095 - <sect1 id="ref.object-streams-linearization">  
5096 - <title>Implications for Linearized Files</title>  
5097 - <para>  
5098 - For linearized files, the linearization dictionary, document  
5099 - catalog, and page objects may not be contained in object streams.  
5100 - </para>  
5101 - <para>  
5102 - Objects stored within object streams are given the highest range  
5103 - of object numbers within the main and first-page cross-reference  
5104 - sections.  
5105 - </para>  
5106 - <para>  
5107 - It is okay to use cross-reference streams in place of regular xref  
5108 - tables. There are on special considerations.  
5109 - </para>  
5110 - <para>  
5111 - Hint data refers to object streams themselves, not the objects in  
5112 - the streams. Shared object references should also be made to the  
5113 - object streams. There are no reference in any hint tables to the  
5114 - object numbers of compressed objects (objects within object  
5115 - streams).  
5116 - </para>  
5117 - <para>  
5118 - When numbering objects, all shared objects within both the first  
5119 - and second halves of the linearized files must be numbered  
5120 - consecutively after all normal uncompressed objects in that half.  
5121 - </para>  
5122 - </sect1>  
5123 - <sect1 id="ref.object-stream-implementation">  
5124 - <title>Implementation Notes</title>  
5125 - <para>  
5126 - There are three modes for writing object streams:  
5127 - @1@option@1@disable@2@option@2@, @1@option@1@preserve@2@option@2@, and  
5128 - @1@option@1@generate@2@option@2@. In disable mode, we do not generate  
5129 - any object streams, and we also generate an xref table rather than  
5130 - xref streams. This can be used to generate PDF files that are  
5131 - viewable with older readers. In preserve mode, we write object  
5132 - streams such that written object streams contain the same objects  
5133 - and <literal>/Extends</literal> relationships as in the original  
5134 - file. This is equal to disable if the file has no object streams.  
5135 - In generate, we create object streams ourselves by grouping  
5136 - objects that are allowed in object streams together in sets of no  
5137 - more than 100 objects. We also ensure that the PDF version is at  
5138 - least 1.5 in generate mode, but we preserve the version header in  
5139 - the other modes. The default is @1@option@1@preserve@2@option@2@.  
5140 - </para>  
5141 - <para>  
5142 - We do not support creation of hybrid files. When we write files,  
5143 - even in preserve mode, we will lose any xref tables and merge any  
5144 - appended sections.  
5145 - </para>  
5146 - </sect1>  
5147 - </chapter>  
5148 - <appendix id="ref.release-notes">  
5149 - <title>Release Notes</title>  
5150 - <para>  
5151 - For a detailed list of changes, please see the file  
5152 - @1@filename@1@ChangeLog@2@filename@2@ in the source distribution.  
5153 - </para>  
5154 - <variablelist>  
5155 -<!--  
5156 - <varlistentry>  
5157 - <term>x.y.z: Month dd, YYYY</term>  
5158 - <listitem>  
5159 - <itemizedlist>  
5160 - <listitem>  
5161 - <para>  
5162 - Category  
5163 - </para>  
5164 - <itemizedlist>  
5165 - <listitem>  
5166 - <para>  
5167 - Item  
5168 - </para>  
5169 - </listitem>  
5170 - <listitem>  
5171 - <para>  
5172 - Item  
5173 - </para>  
5174 - </listitem>  
5175 - </itemizedlist>  
5176 - </listitem>  
5177 - <listitem>  
5178 - <para>  
5179 - Category  
5180 - </para>  
5181 - <itemizedlist>  
5182 - <listitem>  
5183 - <para>  
5184 - Item  
5185 - </para>  
5186 - </listitem>  
5187 - <listitem>  
5188 - <para>  
5189 - Item  
5190 - </para>  
5191 - </listitem>  
5192 - </itemizedlist>  
5193 - </listitem>  
5194 - </itemizedlist>  
5195 - </listitem>  
5196 - </varlistentry>  
5197 --->  
5198 - <varlistentry>  
5199 - <term>10.5.0: XXX Month dd, YYYY</term>  
5200 - <listitem>  
5201 - <itemizedlist>  
5202 - <listitem>  
5203 - <para>  
5204 - Library Enhancements  
5205 - </para>  
5206 - <itemizedlist>  
5207 - <listitem>  
5208 - <para>  
5209 - Since qpdf version 8, using object accessor methods on an  
5210 - instance of <classname>QPDFObjectHandle</classname> may  
5211 - create warnings if the object is not of the expected type.  
5212 - These warnings now have an error code of  
5213 - <literal>qpdf_e_object</literal> instead of  
5214 - <literal>qpdf_e_damaged_pdf</literal>. Also, comments have  
5215 - been added to @1@filename@1@QPDFObjectHandle.hh@2@filename@2@ to  
5216 - explain in more detail what the behavior is. See <xref  
5217 - linkend="ref.object-accessors"/> for a more in-depth  
5218 - discussion.  
5219 - </para>  
5220 - </listitem>  
5221 - <listitem>  
5222 - <para>  
5223 - Overhaul error handling for the object handle functions in  
5224 - the C API. See comments in the "Object handling"  
5225 - section of @1@filename@1@include/qpdf/qpdf-c.h@2@filename@2@ for  
5226 - details. In particular, exceptions thrown by the underlying  
5227 - C++ code when calling object accessors are caught and  
5228 - converted into errors. The errors can be trapped by  
5229 - registering an error handler with  
5230 - <function>qpdf_register_oh_error_handler</function> or will  
5231 - be written to stderr if no handler is registered.  
5232 - </para>  
5233 - </listitem>  
5234 - <listitem>  
5235 - <para>  
5236 - Add <function>qpdf_get_last_string_length</function> to the  
5237 - C API to get the length of the last string that was  
5238 - returned. This is needed to handle strings that contain  
5239 - embedded null characters.  
5240 - </para>  
5241 - </listitem>  
5242 - <listitem>  
5243 - <para>  
5244 - Add <function>qpdf_oh_is_initialized</function> and  
5245 - <function>qpdf_oh_new_uninitialized</function> to the C API  
5246 - to make it possible to work with uninitialized objects.  
5247 - </para>  
5248 - </listitem>  
5249 - <listitem>  
5250 - <para>  
5251 - Add <function>qpdf_oh_new_object</function> to the  
5252 - C API. This allows you to clone an object handle.  
5253 - </para>  
5254 - </listitem>  
5255 - <listitem>  
5256 - <para>  
5257 - Add <function>qpdf_get_object_by_id</function>,  
5258 - <function>qpdf_make_indirect_object</function>, and  
5259 - <function>qpdf_replace_object</function>, exposing the  
5260 - corresponding methods in <classname>QPDF</classname> and  
5261 - <classname>QPDFObjectHandle</classname>.  
5262 - </para>  
5263 - </listitem>  
5264 - </itemizedlist>  
5265 - </listitem>  
5266 - </itemizedlist>  
5267 - </listitem>  
5268 - </varlistentry>  
5269 - <varlistentry>  
5270 - <term>10.4.0: November 16, 2021</term>  
5271 - <listitem>  
5272 - <itemizedlist>  
5273 - <listitem>  
5274 - <para>  
5275 - Handling of Weak Cryptography Algorithms  
5276 - </para>  
5277 - <itemizedlist>  
5278 - <listitem>  
5279 - <para>  
5280 - From the qpdf CLI, the @1@option@1@--allow-weak-crypto@2@option@2@  
5281 - is now required to suppress a warning when explicitly  
5282 - creating PDF files using RC4 encryption. While qpdf will  
5283 - always retain the ability to read and write such files,  
5284 - doing so will require explicit acknowledgment moving  
5285 - forward. For qpdf 10.4, this change only affects the  
5286 - command-line tool. Starting in qpdf 11, there will be small  
5287 - API changes to require explicit acknowledgment in those  
5288 - cases as well. For additional information, see <xref  
5289 - linkend="ref.weak-crypto"/>.  
5290 - </para>  
5291 - </listitem>  
5292 - </itemizedlist>  
5293 - </listitem>  
5294 - <listitem>  
5295 - <para>  
5296 - Bug Fixes  
5297 - </para>  
5298 - <itemizedlist>  
5299 - <listitem>  
5300 - <para>  
5301 - Fix potential bounds error when handling shell completion  
5302 - that could occur when given bogus input.  
5303 - </para>  
5304 - </listitem>  
5305 - <listitem>  
5306 - <para>  
5307 - Properly handle overlay/underlay on completely empty pages  
5308 - (with no resource dictionary).  
5309 - </para>  
5310 - </listitem>  
5311 - <listitem>  
5312 - <para>  
5313 - Fix crash that could occur under certain conditions when  
5314 - using @1@option@1@--pages@2@option@2@ with files that had form  
5315 - fields.  
5316 - </para>  
5317 - </listitem>  
5318 - </itemizedlist>  
5319 - </listitem>  
5320 - <listitem>  
5321 - <para>  
5322 - Library Enhancements  
5323 - </para>  
5324 - <itemizedlist>  
5325 - <listitem>  
5326 - <para>  
5327 - Make <function>QPDF::findPage</function> functions public.  
5328 - </para>  
5329 - </listitem>  
5330 - <listitem>  
5331 - <para>  
5332 - Add methods to <classname>Pl_Flate</classname> to be able to  
5333 - receive warnings on certain recoverable conditions.  
5334 - </para>  
5335 - </listitem>  
5336 - <listitem>  
5337 - <para>  
5338 - Add an extra check to the library to detect when foreign  
5339 - objects are inserted directly (instead of using  
5340 - <function>QPDF::copyForeignObject</function>) at the time of  
5341 - insertion rather than when the file is written. Catching the  
5342 - error sooner makes it much easier to locate the incorrect  
5343 - code.  
5344 - </para>  
5345 - </listitem>  
5346 - </itemizedlist>  
5347 - </listitem>  
5348 - <listitem>  
5349 - <para>  
5350 - CLI Enhancements  
5351 - </para>  
5352 - <itemizedlist>  
5353 - <listitem>  
5354 - <para>  
5355 - Improve diagnostics around parsing @1@option@1@--pages@2@option@2@  
5356 - command-line options  
5357 - </para>  
5358 - </listitem>  
5359 - </itemizedlist>  
5360 - </listitem>  
5361 - <listitem>  
5362 - <para>  
5363 - Packaging Changes  
5364 - </para>  
5365 - <itemizedlist>  
5366 - <listitem>  
5367 - <para>  
5368 - The Windows binary distribution is now built with crypto  
5369 - provided by OpenSSL 3.0.  
5370 - </para>  
5371 - </listitem>  
5372 - </itemizedlist>  
5373 - </listitem>  
5374 - </itemizedlist>  
5375 - </listitem>  
5376 - </varlistentry>  
5377 - <varlistentry>  
5378 - <term>10.3.2: May 8, 2021</term>  
5379 - <listitem>  
5380 - <itemizedlist>  
5381 - <listitem>  
5382 - <para>  
5383 - Bug Fixes  
5384 - </para>  
5385 - <itemizedlist>  
5386 - <listitem>  
5387 - <para>  
5388 - When generating a file while preserving object streams,  
5389 - unreferenced objects are correctly removed unless  
5390 - @1@option@1@--preserve-unreferenced@2@option@2@ is specified.  
5391 - </para>  
5392 - </listitem>  
5393 - </itemizedlist>  
5394 - </listitem>  
5395 - <listitem>  
5396 - <para>  
5397 - Library Enhancements  
5398 - </para>  
5399 - <itemizedlist>  
5400 - <listitem>  
5401 - <para>  
5402 - When adding a page that already exists, make a shallow copy  
5403 - instead of throwing an exception. This makes the library  
5404 - behavior consistent with the CLI behavior. See  
5405 - @1@filename@1@ChangeLog@2@filename@2@ for additional notes.  
5406 - </para>  
5407 - </listitem>  
5408 - </itemizedlist>  
5409 - </listitem>  
5410 - </itemizedlist>  
5411 - </listitem>  
5412 - </varlistentry>  
5413 - <varlistentry>  
5414 - <term>10.3.1: March 11, 2021</term>  
5415 - <listitem>  
5416 - <itemizedlist>  
5417 - <listitem>  
5418 - <para>  
5419 - Bug Fixes  
5420 - </para>  
5421 - <itemizedlist>  
5422 - <listitem>  
5423 - <para>  
5424 - Form field copying failed on files where /DR was a direct  
5425 - object in the document-level form dictionary.  
5426 - </para>  
5427 - </listitem>  
5428 - </itemizedlist>  
5429 - </listitem>  
5430 - </itemizedlist>  
5431 - </listitem>  
5432 - </varlistentry>  
5433 - <varlistentry>  
5434 - <term>10.3.0: March 4, 2021</term>  
5435 - <listitem>  
5436 - <itemizedlist>  
5437 - <listitem>  
5438 - <para>  
5439 - Bug Fixes  
5440 - </para>  
5441 - <itemizedlist>  
5442 - <listitem>  
5443 - <para>  
5444 - The code for handling form fields when copying pages from  
5445 - 10.2.0 was not quite right and didn't work in a number of  
5446 - situations, such as when the same page was copied multiple  
5447 - times or when there were conflicting resource or field names  
5448 - across multiple copies. The 10.3.0 code has been much more  
5449 - thoroughly tested with more complex cases and with a  
5450 - multitude of readers and should be much closer to correct.  
5451 - The 10.2.0 code worked well enough for page splitting or for  
5452 - copying pages with form fields into documents that didn't  
5453 - already have them but was still not quite correct in  
5454 - handling of field-level resources.  
5455 - </para>  
5456 - </listitem>  
5457 - <listitem>  
5458 - <para>  
5459 - When <function>QPDF::replaceObject</function> or  
5460 - <function>QPDF::swapObjects</function> is called, existing  
5461 - <classname>QPDFObjectHandle</classname> instances no longer  
5462 - point to the old objects. The next time they are  
5463 - accessed, they automatically notice the change to the  
5464 - underlying object and update themselves. This resolves a  
5465 - very longstanding source of confusion, albeit in a very  
5466 - rarely used method call.  
5467 - </para>  
5468 - </listitem>  
5469 - <listitem>  
5470 - <para>  
5471 - Fix form field handling code to look for default  
5472 - appearances, quadding, and default resources in the right  
5473 - places. The code was not looking for things in the  
5474 - document-level interactive form dictionary that it was  
5475 - supposed to be finding there. This required adding a few new  
5476 - methods to <classname>QPDFFormFieldObjectHelper</classname>.  
5477 - </para>  
5478 - </listitem>  
5479 - </itemizedlist>  
5480 - </listitem>  
5481 - <listitem>  
5482 - <para>  
5483 - Library Enhancements  
5484 - </para>  
5485 - <itemizedlist>  
5486 - <listitem>  
5487 - <para>  
5488 - Reworked the code that handles copying annotations and form  
5489 - fields during page operations. There were additional methods  
5490 - added to the public API from 10.2.0 and a one deprecation of  
5491 - a method added in 10.2.0. The majority of the API changes  
5492 - are in methods most people would never call and that will  
5493 - hopefully be superseded by higher-level interfaces for  
5494 - handling page copies. Please see the  
5495 - @1@filename@1@ChangeLog@2@filename@2@ file for details.  
5496 - </para>  
5497 - </listitem>  
5498 - <listitem>  
5499 - <para>  
5500 - The method <function>QPDF::numWarnings</function> was added  
5501 - so that you can tell whether any warnings happened during a  
5502 - specific block of code.  
5503 - </para>  
5504 - </listitem>  
5505 - </itemizedlist>  
5506 - </listitem>  
5507 - </itemizedlist>  
5508 - </listitem>  
5509 - </varlistentry>  
5510 - <varlistentry>  
5511 - <term>10.2.0: February 23, 2021</term>  
5512 - <listitem>  
5513 - <itemizedlist>  
5514 - <listitem>  
5515 - <para>  
5516 - CLI Behavior Changes  
5517 - </para>  
5518 - <itemizedlist>  
5519 - <listitem>  
5520 - <para>  
5521 - Operations that work on combining pages are much better  
5522 - about protecting form fields. In particular,  
5523 - @1@option@1@--split-pages@2@option@2@ and @1@option@1@--pages@2@option@2@  
5524 - now preserve interaction form functionality by copying the  
5525 - relevant form field information from the original files.  
5526 - Additionally, if you use @1@option@1@--pages@2@option@2@ to select  
5527 - only some pages from the original input file, unused form  
5528 - fields are removed, which prevents lots of unused  
5529 - annotations from being retained.  
5530 - </para>  
5531 - </listitem>  
5532 - <listitem>  
5533 - <para>  
5534 - By default, @1@command@1@qpdf@2@command@2@ no longer allows  
5535 - creation of encrypted PDF files whose user password is  
5536 - non-empty and owner password is empty when a 256-bit key is  
5537 - in use. The @1@option@1@--allow-insecure@2@option@2@ option,  
5538 - specified inside the @1@option@1@--encrypt@2@option@2@ options,  
5539 - allows creation of such files. Behavior changes in the CLI  
5540 - are avoided when possible, but an exception was made here  
5541 - because this is security-related. qpdf must always allow  
5542 - creation of weird files for testing purposes, but it should  
5543 - not default to letting users unknowingly create insecure  
5544 - files.  
5545 - </para>  
5546 - </listitem>  
5547 - </itemizedlist>  
5548 - </listitem>  
5549 - <listitem>  
5550 - <para>  
5551 - Library Behavior Changes  
5552 - </para>  
5553 - <itemizedlist>  
5554 - <listitem>  
5555 - <para>  
5556 - Note: the changes in this section cause differences in  
5557 - output in some cases. These differences change the syntax of  
5558 - the PDF but do not change the semantics (meaning). I make a  
5559 - strong effort to avoid gratuitous changes in qpdf's output  
5560 - so that qpdf changes don't break people's tests. In this  
5561 - case, the changes significantly improve the readability of  
5562 - the generated PDF and don't affect any output that's  
5563 - generated by simple transformation. If you are annoyed by  
5564 - having to update test files, please rest assured that  
5565 - changes like this have been and will continue to be rare  
5566 - events.  
5567 - </para>  
5568 - </listitem>  
5569 - <listitem>  
5570 - <para>  
5571 - <function>QPDFObjectHandle::newUnicodeString</function> now  
5572 - uses whichever of ASCII, PDFDocEncoding, of UTF-16 is  
5573 - sufficient to encode all the characters in the string. This  
5574 - reduces needless encoding in UTF-16 of strings that can be  
5575 - encoded in ASCII. This change may cause qpdf to generate  
5576 - different output than before when form field values are set  
5577 - using <classname>QPDFFormFieldObjectHelper</classname> but  
5578 - does not change the meaning of the output.  
5579 - </para>  
5580 - </listitem>  
5581 - <listitem>  
5582 - <para>  
5583 - The code that places form XObjects and also the code that  
5584 - flattens rotations trim trailing zeroes from real numbers  
5585 - that they calculate. This causes slight (but semantically  
5586 - equivalent) differences in generated appearance streams and  
5587 - form XObject invocations in overlay/underlay code or in user  
5588 - code that calls the methods that place form XObjects on a  
5589 - page.  
5590 - </para>  
5591 - </listitem>  
5592 - </itemizedlist>  
5593 - </listitem>  
5594 - <listitem>  
5595 - <para>  
5596 - CLI Enhancements  
5597 - </para>  
5598 - <itemizedlist>  
5599 - <listitem>  
5600 - <para>  
5601 - Add new command line options for listing, saving, adding,  
5602 - removing, and and copying file attachments. See <xref  
5603 - linkend="ref.attachments"/> for details.  
5604 - </para>  
5605 - </listitem>  
5606 - <listitem>  
5607 - <para>  
5608 - Page splitting and merging operations, as well as  
5609 - @1@option@1@--flatten-rotation@2@option@2@, are better behaved  
5610 - with respect to annotations and interactive form fields. In  
5611 - most cases, interactive form field functionality and proper  
5612 - formatting and functionality of annotations is preserved by  
5613 - these operations. There are still some cases that aren't  
5614 - perfect, such as when functionality of annotations depends  
5615 - on document-level data that qpdf doesn't yet understand or  
5616 - when there are problems with referential integrity among  
5617 - form fields and annotations (e.g., when a single form field  
5618 - object or its associated annotations are shared across  
5619 - multiple pages, a case that is out of spec but that works in  
5620 - most viewers anyway).  
5621 - </para>  
5622 - </listitem>  
5623 - <listitem>  
5624 - <para>  
5625 - The option  
5626 - @1@option@1@--password-file=@1@replaceable@1@filename@2@replaceable@2@@2@option@2@  
5627 - can now be used to read the decryption password from a file.  
5628 - You can use <literal>-</literal> as the file name to read  
5629 - the password from standard input. This is an easier/more  
5630 - obvious way to read passwords from files or standard input  
5631 - than using @1@option@1@@file@2@option@2@ for this purpose.  
5632 - </para>  
5633 - </listitem>  
5634 - <listitem>  
5635 - <para>  
5636 - Add some information about attachments to the json output,  
5637 - and added <literal>attachments</literal> as an additional  
5638 - json key. The information included here is limited to the  
5639 - preferred name and content stream and a reference to the  
5640 - file spec object. This is enough detail for clients to avoid  
5641 - the hassle of navigating a name tree and provides what is  
5642 - needed for basic enumeration and extraction of attachments.  
5643 - More detailed information can be obtained by following the  
5644 - reference to the file spec object.  
5645 - </para>  
5646 - </listitem>  
5647 - <listitem>  
5648 - <para>  
5649 - Add numeric option to @1@option@1@--collate@2@option@2@. If  
5650 - @1@option@1@--collate=@1@replaceable@1@n@2@replaceable@2@@2@option@2@ is  
5651 - given, take pages in groups of @1@replaceable@1@n@2@replaceable@2@  
5652 - from the given files.  
5653 - </para>  
5654 - </listitem>  
5655 - <listitem>  
5656 - <para>  
5657 - It is now valid to provide @1@option@1@--rotate=0@2@option@2@ to  
5658 - clear rotation from a page.  
5659 - </para>  
5660 - </listitem>  
5661 - </itemizedlist>  
5662 - </listitem>  
5663 - <listitem>  
5664 - <para>  
5665 - Library Enhancements  
5666 - </para>  
5667 - <itemizedlist>  
5668 - <listitem>  
5669 - <para>  
5670 - This release includes numerous additions to the API. Not all  
5671 - changes are listed here. Please see the  
5672 - @1@filename@1@ChangeLog@2@filename@2@ file in the source  
5673 - distribution for a comprehensive list. Highlights appear  
5674 - below.  
5675 - </para>  
5676 - </listitem>  
5677 - <listitem>  
5678 - <para>  
5679 - Add <function>QPDFObjectHandle::ditems()</function> and  
5680 - <function>QPDFObjectHandle::aitems()</function> that enable  
5681 - C++-style iteration, including range-for iteration, over  
5682 - dictionary and array QPDFObjectHandles. See comments in  
5683 - @1@filename@1@include/qpdf/QPDFObjectHandle.hh@2@filename@2@ and  
5684 - @1@filename@1@examples/pdf-name-number-tree.cc@2@filename@2@ for  
5685 - details.  
5686 - </para>  
5687 - </listitem>  
5688 - <listitem>  
5689 - <para>  
5690 - Add <function>QPDFObjectHandle::copyStream</function> for  
5691 - making a copy of a stream within the same  
5692 - <classname>QPDF</classname> instance.  
5693 - </para>  
5694 - </listitem>  
5695 - <listitem>  
5696 - <para>  
5697 - Add new helper classes for supporting file attachments, also  
5698 - known as embedded files. New classes are  
5699 - <classname>QPDFEmbeddedFileDocumentHelper</classname>,  
5700 - <classname>QPDFFileSpecObjectHelper</classname>, and  
5701 - <classname>QPDFEFStreamObjectHelper</classname>. See their  
5702 - respective headers for details and  
5703 - @1@filename@1@examples/pdf-attach-file.cc@2@filename@2@ for an  
5704 - example.  
5705 - </para>  
5706 - </listitem>  
5707 - <listitem>  
5708 - <para>  
5709 - Add a version of  
5710 - <function>QPDFObjectHandle::parse</function> that takes a  
5711 - <classname>QPDF</classname> pointer as context so that it  
5712 - can parse strings containing indirect object references.  
5713 - This is illustrated in  
5714 - @1@filename@1@examples/pdf-attach-file.cc@2@filename@2@.  
5715 - </para>  
5716 - </listitem>  
5717 - <listitem>  
5718 - <para>  
5719 - Re-implement <classname>QPDFNameTreeObjectHelper</classname>  
5720 - and <classname>QPDFNumberTreeObjectHelper</classname> to be  
5721 - more efficient, add an iterator-based API, give them the  
5722 - capability to repair broken trees, and create methods for  
5723 - modifying the trees. With this change, qpdf has a robust  
5724 - read/write implementation of name and number trees.  
5725 - </para>  
5726 - </listitem>  
5727 - <listitem>  
5728 - <para>  
5729 - Add new versions of  
5730 - <function>QPDFObjectHandle::replaceStreamData</function>  
5731 - that take <classname>std::function</classname> objects for  
5732 - cases when you need something between a static string and a  
5733 - full-fledged StreamDataProvider. Using this with  
5734 - <function>QUtil::file_provider</function> is a very easy way  
5735 - to create a stream from the contents of a file.  
5736 - </para>  
5737 - </listitem>  
5738 - <listitem>  
5739 - <para>  
5740 - The <classname>QPDFMatrix</classname> class, formerly a  
5741 - private, internal class, has been added to the public API.  
5742 - See @1@filename@1@include/qpdf/QPDFMatrix.hh@2@filename@2@ for  
5743 - details. This class is for working with transformation  
5744 - matrices. Some methods in  
5745 - <classname>QPDFPageObjectHelper</classname> make use of this  
5746 - to make information about transformation matrices available.  
5747 - For an example, see  
5748 - @1@filename@1@examples/pdf-overlay-page.cc@2@filename@2@.  
5749 - </para>  
5750 - </listitem>  
5751 - <listitem>  
5752 - <para>  
5753 - Several new methods were added to  
5754 - <classname>QPDFAcroFormDocumentHelper</classname> for  
5755 - adding, removing, getting information about, and enumerating  
5756 - form fields.  
5757 - </para>  
5758 - </listitem>  
5759 - <listitem>  
5760 - <para>  
5761 - Add method  
5762 - <function>QPDFAcroFormDocumentHelper::transformAnnotations</function>,  
5763 - which applies a transformation to each annotation on a page.  
5764 - </para>  
5765 - </listitem>  
5766 - <listitem>  
5767 - <para>  
5768 - Add  
5769 - <function>QPDFPageObjectHelper::copyAnnotations</function>,  
5770 - which copies annotations and, if applicable, associated form  
5771 - fields, from one page to another, possibly transforming the  
5772 - rectangles.  
5773 - </para>  
5774 - </listitem>  
5775 - </itemizedlist>  
5776 - </listitem>  
5777 - <listitem>  
5778 - <para>  
5779 - Build Changes  
5780 - </para>  
5781 - <itemizedlist>  
5782 - <listitem>  
5783 - <para>  
5784 - A C++-14 compiler is now required to build qpdf. There is no  
5785 - intention to require anything newer than that for a while.  
5786 - C++-14 includes modest enhancements to C++-11 and appears to  
5787 - be supported about as widely as C++-11.  
5788 - </para>  
5789 - </listitem>  
5790 - </itemizedlist>  
5791 - </listitem>  
5792 - <listitem>  
5793 - <para>  
5794 - Bug Fixes  
5795 - </para>  
5796 - <itemizedlist>  
5797 - <listitem>  
5798 - <para>  
5799 - The @1@option@1@--flatten-rotation@2@option@2@ option applies  
5800 - transformations to any annotations that may be on the page.  
5801 - </para>  
5802 - </listitem>  
5803 - <listitem>  
5804 - <para>  
5805 - If a form XObject lacks a resources dictionary, consider any  
5806 - names in that form XObject to be referenced from the  
5807 - containing page. This is compliant with older PDF versions.  
5808 - Also detect if any form XObjects have any unresolved names  
5809 - and, if so, don't remove unreferenced resources from them or  
5810 - from the page that contains them. Unfortunately this has the  
5811 - side effect of preventing removal of unreferenced resources  
5812 - in some cases where names appear that don't refer to  
5813 - resources, such as with tagged PDF. This is a bit of a  
5814 - corner case that is not likely to cause a significant  
5815 - problem in practice, but the only side effect would be lack  
5816 - of removal of shared resources. A future version of qpdf may  
5817 - be more sophisticated in its detection of names that refer  
5818 - to resources.  
5819 - </para>  
5820 - </listitem>  
5821 - <listitem>  
5822 - <para>  
5823 - Properly handle strings if they appear in inline image  
5824 - dictionaries while externalizing inline images.  
5825 - </para>  
5826 - </listitem>  
5827 - </itemizedlist>  
5828 - </listitem>  
5829 - </itemizedlist>  
5830 - </listitem>  
5831 - </varlistentry>  
5832 - <varlistentry>  
5833 - <term>10.1.0: January 5, 2021</term>  
5834 - <listitem>  
5835 - <itemizedlist>  
5836 - <listitem>  
5837 - <para>  
5838 - CLI Enhancements  
5839 - </para>  
5840 - <itemizedlist>  
5841 - <listitem>  
5842 - <para>  
5843 - Add @1@option@1@--flatten-rotation@2@option@2@ command-line option,  
5844 - which causes all pages that are rotated using parameters in  
5845 - the page's dictionary to instead be identically rotated in  
5846 - the page's contents. The change is not user-visible for  
5847 - compliant PDF readers but can be used to work around broken  
5848 - PDF applications that don't properly handle page rotation.  
5849 - </para>  
5850 - </listitem>  
5851 - </itemizedlist>  
5852 - </listitem>  
5853 - <listitem>  
5854 - <para>  
5855 - Library Enhancements  
5856 - </para>  
5857 - <itemizedlist>  
5858 - <listitem>  
5859 - <para>  
5860 - Support for user-provided (pluggable, modular) stream  
5861 - filters. It is now possible to derive a class from  
5862 - <classname>QPDFStreamFilter</classname> and register it with  
5863 - <classname>QPDF</classname> so that regular library methods,  
5864 - including those used by <classname>QPDFWriter</classname>,  
5865 - can decode streams with filters not directly supported by  
5866 - the library. The example  
5867 - @1@filename@1@examples/pdf-custom-filter.cc@2@filename@2@  
5868 - illustrates how to use this capability.  
5869 - </para>  
5870 - </listitem>  
5871 - <listitem>  
5872 - <para>  
5873 - Add methods to <classname>QPDFPageObjectHelper</classname>  
5874 - to iterate through XObjects on a page or form XObjects,  
5875 - possibly recursing into nested form XObjects:  
5876 - <function>forEachXObject</function>,  
5877 - <function>ForEachImage</function>,  
5878 - <function>forEachFormXObject</function>.  
5879 - </para>  
5880 - </listitem>  
5881 - <listitem>  
5882 - <para>  
5883 - Enhance several methods in  
5884 - <classname>QPDFPageObjectHelper</classname> to work with  
5885 - form XObjects as well as pages, as noted in comments. See  
5886 - @1@filename@1@ChangeLog@2@filename@2@ for a full list.  
5887 - </para>  
5888 - </listitem>  
5889 - <listitem>  
5890 - <para>  
5891 - Rename some functions in  
5892 - <classname>QPDFPageObjectHelper</classname>, while keeping  
5893 - old names for compatibility:  
5894 - <itemizedlist>  
5895 - <listitem>  
5896 - <para>  
5897 - <function>getPageImages</function> to  
5898 - <function>getImages</function>  
5899 - </para>  
5900 - </listitem>  
5901 - <listitem>  
5902 - <para>  
5903 - <function>filterPageContents</function> to  
5904 - <function>filterContents</function>  
5905 - </para>  
5906 - </listitem>  
5907 - <listitem>  
5908 - <para>  
5909 - <function>pipePageContents</function> to  
5910 - <function>pipeContents</function>  
5911 - </para>  
5912 - </listitem>  
5913 - <listitem>  
5914 - <para>  
5915 - <function>parsePageContents</function> to  
5916 - <function>parseContents</function>  
5917 - </para>  
5918 - </listitem>  
5919 - </itemizedlist>  
5920 - </para>  
5921 - </listitem>  
5922 - <listitem>  
5923 - <para>  
5924 - Add method  
5925 - <function>QPDFPageObjectHelper::getFormXObjects</function>  
5926 - to return a map of form XObjects directly on a page or form  
5927 - XObject  
5928 - </para>  
5929 - </listitem>  
5930 - <listitem>  
5931 - <para>  
5932 - Add new helper methods to  
5933 - <classname>QPDFObjectHandle</classname>:  
5934 - <function>isFormXObject</function>, <function>isImage</function>  
5935 - </para>  
5936 - </listitem>  
5937 - <listitem>  
5938 - <para>  
5939 - Add the optional <function>allow_streams</function>  
5940 - parameter <function>QPDFObjectHandle::makeDirect</function>.  
5941 - When <function>QPDFObjectHandle::makeDirect</function> is  
5942 - called in this way, it preserves references to streams  
5943 - rather than throwing an exception.  
5944 - </para>  
5945 - </listitem>  
5946 - <listitem>  
5947 - <para>  
5948 - Add <function>QPDFObjectHandle::setFilterOnWrite</function>  
5949 - method. Calling this on a stream prevents  
5950 - <function>QPDFWriter</function> from attempting to  
5951 - uncompress, recompress, or otherwise filter a stream even if  
5952 - it could. Developers can use this to protect streams that  
5953 - are optimized should be protected from  
5954 - <classname>QPDFWriter</classname>'s default behavior for any  
5955 - other reason.  
5956 - </para>  
5957 - </listitem>  
5958 - <listitem>  
5959 - <para>  
5960 - Add <classname>ostream</classname>  
5961 - <literal>&lt;&lt;</literal> operator for  
5962 - <classname>QPDFObjGen</classname>. This is useful to have  
5963 - for debugging.  
5964 - </para>  
5965 - </listitem>  
5966 - <listitem>  
5967 - <para>  
5968 - Add method  
5969 - <function>QPDFPageObjectHelper::flattenRotation</function>,  
5970 - which replaces a page's <literal>/Rotate</literal> keyword  
5971 - by rotating the page within the content stream and altering  
5972 - the page's bounding boxes so the rendering is the same. This  
5973 - can be used to work around buggy PDF readers that can't  
5974 - properly handle page rotation.  
5975 - </para>  
5976 - </listitem>  
5977 - </itemizedlist>  
5978 - </listitem>  
5979 - <listitem>  
5980 - <para>  
5981 - C API Enhancements  
5982 - </para>  
5983 - <itemizedlist>  
5984 - <listitem>  
5985 - <para>  
5986 - Add several new functions to the C API for working with  
5987 - objects. These are wrappers around many of the methods in  
5988 - <classname>QPDFObjectHandle</classname>. Their inclusion  
5989 - adds considerable new capability to the C API.  
5990 - </para>  
5991 - </listitem>  
5992 - <listitem>  
5993 - <para>  
5994 - Add <function>qpdf_register_progress_reporter</function> to  
5995 - the C API, corresponding to  
5996 - <function>QPDFWriter::registerProgressReporter</function>.  
5997 - </para>  
5998 - </listitem>  
5999 - </itemizedlist>  
6000 - </listitem>  
6001 - <listitem>  
6002 - <para>  
6003 - Performance Enhancements  
6004 - </para>  
6005 - <itemizedlist>  
6006 - <listitem>  
6007 - <para>  
6008 - Improve steps <classname>QPDFWriter</classname> takes to  
6009 - prepare a <classname>QPDF</classname> object for writing,  
6010 - resulting in about an 8% improvement in write performance  
6011 - while allowing indirect objects to appear in  
6012 - <literal>/DecodeParms</literal>.  
6013 - </para>  
6014 - </listitem>  
6015 - <listitem>  
6016 - <para>  
6017 - When extracting pages, the @1@command@1@qpdf@2@command@2@ CLI only  
6018 - removes unreferenced resources from the pages that are being  
6019 - kept, resulting in a significant performance improvement  
6020 - when extracting small numbers of pages from large, complex  
6021 - documents.  
6022 - </para>  
6023 - </listitem>  
6024 - </itemizedlist>  
6025 - </listitem>  
6026 - <listitem>  
6027 - <para>  
6028 - Bug Fixes  
6029 - </para>  
6030 - <itemizedlist>  
6031 - <listitem>  
6032 - <para>  
6033 - <function>QPDFPageObjectHelper::externalizeInlineImages</function>  
6034 - was not externalizing images referenced from form XObjects  
6035 - that appeared on the page.  
6036 - </para>  
6037 - </listitem>  
6038 - <listitem>  
6039 - <para>  
6040 - <function>QPDFObjectHandle::filterPageContents</function>  
6041 - was broken for pages with multiple content streams.  
6042 - </para>  
6043 - </listitem>  
6044 - <listitem>  
6045 - <para>  
6046 - Tweak zsh completion code to behave a little better with  
6047 - respect to path completion.  
6048 - </para>  
6049 - </listitem>  
6050 - </itemizedlist>  
6051 - </listitem>  
6052 - </itemizedlist>  
6053 - </listitem>  
6054 - </varlistentry>  
6055 - <varlistentry>  
6056 - <term>10.0.4: November 21, 2020</term>  
6057 - <listitem>  
6058 - <itemizedlist>  
6059 - <listitem>  
6060 - <para>  
6061 - Bug Fixes  
6062 - </para>  
6063 - <itemizedlist>  
6064 - <listitem>  
6065 - <para>  
6066 - Fix a handful of integer overflows. This includes cases  
6067 - found by fuzzing as well as having qpdf not do range  
6068 - checking on unused values in the xref stream.  
6069 - </para>  
6070 - </listitem>  
6071 - </itemizedlist>  
6072 - </listitem>  
6073 - </itemizedlist>  
6074 - </listitem>  
6075 - </varlistentry>  
6076 - <varlistentry>  
6077 - <term>10.0.3: October 31, 2020</term>  
6078 - <listitem>  
6079 - <itemizedlist>  
6080 - <listitem>  
6081 - <para>  
6082 - Bug Fixes  
6083 - </para>  
6084 - <itemizedlist>  
6085 - <listitem>  
6086 - <para>  
6087 - The fix to the bug involving copying streams with indirect  
6088 - filters was incorrect and introduced a new, more serious  
6089 - bug. The original bug has been fixed correctly, as has the  
6090 - bug introduced in 10.0.2.  
6091 - </para>  
6092 - </listitem>  
6093 - </itemizedlist>  
6094 - </listitem>  
6095 - </itemizedlist>  
6096 - </listitem>  
6097 - </varlistentry>  
6098 - <varlistentry>  
6099 - <term>10.0.2: October 27, 2020</term>  
6100 - <listitem>  
6101 - <itemizedlist>  
6102 - <listitem>  
6103 - <para>  
6104 - Bug Fixes  
6105 - </para>  
6106 - <itemizedlist>  
6107 - <listitem>  
6108 - <para>  
6109 - When concatenating content streams, as with  
6110 - @1@option@1@--coalesce-contents@2@option@2@, there were cases in  
6111 - which qpdf would merge two lexical tokens together, creating  
6112 - invalid results. A newline is now inserted between  
6113 - merged content streams if one is not already present.  
6114 - </para>  
6115 - </listitem>  
6116 - <listitem>  
6117 - <para>  
6118 - Fix an internal error that could occur when copying foreign  
6119 - streams whose stream data had been replaced using a stream  
6120 - data provider if those streams had indirect filters or  
6121 - decode parameters. This is a rare corner case.  
6122 - </para>  
6123 - </listitem>  
6124 - <listitem>  
6125 - <para>  
6126 - Ensure that the caller's locale settings do not change the  
6127 - results of numeric conversions performed internally by the  
6128 - qpdf library. Note that the problem here could only be  
6129 - caused when the qpdf library was used programmatically.  
6130 - Using the qpdf CLI already ignored the user's locale for  
6131 - numeric conversion.  
6132 - </para>  
6133 - </listitem>  
6134 - <listitem>  
6135 - <para>  
6136 - Fix several instances in which warnings were not suppressed  
6137 - in spite of @1@option@1@--no-warn@2@option@2@ and/or errors or  
6138 - warnings were written to standard output rather than  
6139 - standard error.  
6140 - </para>  
6141 - </listitem>  
6142 - <listitem>  
6143 - <para>  
6144 - Fixed a memory leak that could occur under specific  
6145 - circumstances when  
6146 - @1@option@1@--object-streams=generate@2@option@2@ was used.  
6147 - </para>  
6148 - </listitem>  
6149 - <listitem>  
6150 - <para>  
6151 - Fix various integer overflows and similar conditions found  
6152 - by the OSS-Fuzz project.  
6153 - </para>  
6154 - </listitem>  
6155 - </itemizedlist>  
6156 - </listitem>  
6157 - <listitem>  
6158 - <para>  
6159 - Enhancements  
6160 - </para>  
6161 - <itemizedlist>  
6162 - <listitem>  
6163 - <para>  
6164 - New option @1@option@1@--warning-exit-0@2@option@2@ causes qpdf to  
6165 - exit with a status of <literal>0</literal> rather than  
6166 - <literal>3</literal> if there are warnings but no errors.  
6167 - Combine with @1@option@1@--no-warn@2@option@2@ to completely ignore  
6168 - warnings.  
6169 - </para>  
6170 - </listitem>  
6171 - <listitem>  
6172 - <para>  
6173 - Performance improvements have been made to  
6174 - <function>QPDF::processMemoryFile</function>.  
6175 - </para>  
6176 - </listitem>  
6177 - <listitem>  
6178 - <para>  
6179 - The OpenSSL crypto provider produces more detailed error  
6180 - messages.  
6181 - </para>  
6182 - </listitem>  
6183 - </itemizedlist>  
6184 - </listitem>  
6185 - <listitem>  
6186 - <para>  
6187 - Build Changes  
6188 - </para>  
6189 - <itemizedlist>  
6190 - <listitem>  
6191 - <para>  
6192 - The option @1@option@1@--disable-rpath@2@option@2@ is now supported  
6193 - by qpdf's @1@command@1@./configure@2@command@2@ script. Some  
6194 - distributions' packaging standards recommended the use of  
6195 - this option.  
6196 - </para>  
6197 - </listitem>  
6198 - <listitem>  
6199 - <para>  
6200 - Selection of a printf format string for <type>long  
6201 - long</type> has been moved from <literal>ifdefs</literal> to  
6202 - an autoconf test. If you are using your own build system,  
6203 - you will need to provide a value for  
6204 - <literal>LL_FMT</literal> in  
6205 - @1@filename@1@libqpdf/qpdf/qpdf-config.h@2@filename@2@, which would  
6206 - typically be <literal>&quot;%lld&quot;</literal> or, for  
6207 - some Windows compilers, <literal>&quot;%I64d&quot;</literal>.  
6208 - </para>  
6209 - </listitem>  
6210 - <listitem>  
6211 - <para>  
6212 - Several improvements were made to build-time configuration  
6213 - of the OpenSSL crypto provider.  
6214 - </para>  
6215 - </listitem>  
6216 - <listitem>  
6217 - <para>  
6218 - A nearly stand-alone Linux binary zip file is now included  
6219 - with the qpdf release. This is built on an older (but  
6220 - supported) Ubuntu LTS release, but would work on most  
6221 - reasonably recent Linux distributions. It contains only the  
6222 - executables and required shared libraries that would not be  
6223 - present on a minimal system. It can be used for including  
6224 - qpdf in a minimal environment, such as a docker container.  
6225 - The zip file is also known to work as a layer in AWS Lambda.  
6226 - </para>  
6227 - </listitem>  
6228 - <listitem>  
6229 - <para>  
6230 - QPDF's automated build has been migrated from Azure  
6231 - Pipelines to GitHub Actions.  
6232 - </para>  
6233 - </listitem>  
6234 - </itemizedlist>  
6235 - </listitem>  
6236 - <listitem>  
6237 - <para>  
6238 - Windows-specific Changes  
6239 - </para>  
6240 - <itemizedlist>  
6241 - <listitem>  
6242 - <para>  
6243 - The Windows executables distributed with qpdf releases now  
6244 - use the OpenSSL crypto provider by default. The native  
6245 - crypto provider is also compiled in and can be selected at  
6246 - runtime with the <literal>QPDF_CRYPTO_PROVIDER</literal>  
6247 - environment variable.  
6248 - </para>  
6249 - </listitem>  
6250 - <listitem>  
6251 - <para>  
6252 - Improvements have been made to how a cryptographic provider  
6253 - is obtained in the native Windows crypto implementation.  
6254 - However mostly this is shadowed by OpenSSL being used by  
6255 - default.  
6256 - </para>  
6257 - </listitem>  
6258 - </itemizedlist>  
6259 - </listitem>  
6260 - </itemizedlist>  
6261 - </listitem>  
6262 - </varlistentry>  
6263 - <varlistentry>  
6264 - <term>10.0.1: April 9, 2020</term>  
6265 - <listitem>  
6266 - <itemizedlist>  
6267 - <listitem>  
6268 - <para>  
6269 - Bug Fixes  
6270 - </para>  
6271 - <itemizedlist>  
6272 - <listitem>  
6273 - <para>  
6274 - 10.0.0 introduced a bug in which calling  
6275 - <function>QPDFObjectHandle::getStreamData</function> on a  
6276 - stream that can't be filtered was returning the raw data  
6277 - instead of throwing an exception. This is now fixed.  
6278 - </para>  
6279 - </listitem>  
6280 - <listitem>  
6281 - <para>  
6282 - Fix a bug that was preventing qpdf from linking with some  
6283 - versions of clang on some platforms.  
6284 - </para>  
6285 - </listitem>  
6286 - </itemizedlist>  
6287 - </listitem>  
6288 - <listitem>  
6289 - <para>  
6290 - Enhancements  
6291 - </para>  
6292 - <itemizedlist>  
6293 - <listitem>  
6294 - <para>  
6295 - Improve the @1@filename@1@pdf-invert-images@2@filename@2@ example  
6296 - to avoid having to load all the images into RAM at the same  
6297 - time.  
6298 - </para>  
6299 - </listitem>  
6300 - </itemizedlist>  
6301 - </listitem>  
6302 - </itemizedlist>  
6303 - </listitem>  
6304 - </varlistentry>  
6305 - <varlistentry>  
6306 - <term>10.0.0: April 6, 2020</term>  
6307 - <listitem>  
6308 - <itemizedlist>  
6309 - <listitem>  
6310 - <para>  
6311 - Performance Enhancements  
6312 - </para>  
6313 - <itemizedlist>  
6314 - <listitem>  
6315 - <para>  
6316 - The qpdf library and executable should run much faster in  
6317 - this version than in the last several releases. Several  
6318 - internal library optimizations have been made, and there has  
6319 - been improved behavior on page splitting as well. This  
6320 - version of qpdf should outperform any of the 8.x or 9.x  
6321 - versions.  
6322 - </para>  
6323 - </listitem>  
6324 - </itemizedlist>  
6325 - </listitem>  
6326 - <listitem>  
6327 - <para>  
6328 - Incompatible API (source-level) Changes (minor)  
6329 - </para>  
6330 - <itemizedlist>  
6331 - <listitem>  
6332 - <para>  
6333 - The <function>QUtil::srandom</function> method was removed.  
6334 - It didn't do anything unless insecure random numbers were  
6335 - compiled in, and they have been off by default for a long  
6336 - time. If you were calling it, just remove the call since it  
6337 - wasn't doing anything anyway.  
6338 - </para>  
6339 - </listitem>  
6340 - </itemizedlist>  
6341 - </listitem>  
6342 - <listitem>  
6343 - <para>  
6344 - Build/Packaging Changes  
6345 - </para>  
6346 - <itemizedlist>  
6347 - <listitem>  
6348 - <para>  
6349 - Add a <literal>openssl</literal> crypto provider, which is  
6350 - implemented with OpenSSL and also works with BoringSSL.  
6351 - Thanks to Dean Scarff for this contribution. If you maintain  
6352 - qpdf for a distribution, pay special attention to make sure  
6353 - that you are including support for the crypto providers you  
6354 - want. Package maintainers will have to weigh the advantages  
6355 - of allowing users to pick a crypto provider at runtime  
6356 - against the disadvantages of adding more dependencies to  
6357 - qpdf.  
6358 - </para>  
6359 - </listitem>  
6360 - <listitem>  
6361 - <para>  
6362 - Allow qpdf to built on stripped down systems whose C/C++  
6363 - libraries lack the <classname>wchar_t</classname> type.  
6364 - Search for <classname>wchar_t</classname> in qpdf's  
6365 - README.md for details. This should be very rare, but it is  
6366 - known to be helpful in some embedded environments.  
6367 - </para>  
6368 - </listitem>  
6369 - </itemizedlist>  
6370 - </listitem>  
6371 - <listitem>  
6372 - <para>  
6373 - CLI Enhancements  
6374 - </para>  
6375 - <itemizedlist>  
6376 - <listitem>  
6377 - <para>  
6378 - Add <literal>objectinfo</literal> key to the JSON output.  
6379 - This will be a place to put computed metadata or other  
6380 - information about PDF objects that are not immediately  
6381 - evident in other ways or that seem useful for some other  
6382 - reason. In this version, information is provided about each  
6383 - object indicating whether it is a stream and, if so, what  
6384 - its length and filters are. Without this, it was not  
6385 - possible to tell conclusively from the JSON output alone  
6386 - whether or not an object was a stream. Run @1@command@1@qpdf  
6387 - --json-help@2@command@2@ for details.  
6388 - </para>  
6389 - </listitem>  
6390 - <listitem>  
6391 - <para>  
6392 - Add new option  
6393 - @1@option@1@--remove-unreferenced-resources@2@option@2@ which takes  
6394 - <literal>auto</literal>, <literal>yes</literal>, or  
6395 - <literal>no</literal> as arguments. The new  
6396 - <literal>auto</literal> mode, which is the default, performs  
6397 - a fast heuristic over a PDF file when splitting pages to  
6398 - determine whether the expensive process of finding and  
6399 - removing unreferenced resources is likely to be of benefit.  
6400 - For most files, this new default will result in a  
6401 - significant performance improvement for splitting pages. See  
6402 - <xref linkend="ref.advanced-transformation"/> for a more  
6403 - detailed discussion.  
6404 - </para>  
6405 - </listitem>  
6406 - <listitem>  
6407 - <para>  
6408 - The @1@option@1@--preserve-unreferenced-resources@2@option@2@ is  
6409 - now just a synonym for  
6410 - @1@option@1@--remove-unreferenced-resources=no@2@option@2@.  
6411 - </para>  
6412 - </listitem>  
6413 - <listitem>  
6414 - <para>  
6415 - If the <literal>QPDF_EXECUTABLE</literal> environment  
6416 - variable is set when invoking @1@command@1@qpdf  
6417 - --bash-completion@2@command@2@ or @1@command@1@qpdf  
6418 - --zsh-completion@2@command@2@, the completion command that it  
6419 - outputs will refer to qpdf using the value of that variable  
6420 - rather than what @1@command@1@qpdf@2@command@2@ determines its  
6421 - executable path to be. This can be useful when wrapping  
6422 - @1@command@1@qpdf@2@command@2@ with a script, working with a  
6423 - version in the source tree, using an AppImage, or other  
6424 - situations where there is some indirection.  
6425 - </para>  
6426 - </listitem>  
6427 - </itemizedlist>  
6428 - </listitem>  
6429 - <listitem>  
6430 - <para>  
6431 - Library Enhancements  
6432 - </para>  
6433 - <itemizedlist>  
6434 - <listitem>  
6435 - <para>  
6436 - Random number generation is now delegated to the crypto  
6437 - provider. The old behavior is still used by the native  
6438 - crypto provider. It is still possible to provide your own  
6439 - random number generator.  
6440 - </para>  
6441 - </listitem>  
6442 - <listitem>  
6443 - <para>  
6444 - Add a new version of  
6445 - <function>QPDFObjectHandle::StreamDataProvider::provideStreamData</function>  
6446 - that accepts the <function>suppress_warnings</function> and  
6447 - <function>will_retry</function> options and allows a success  
6448 - code to be returned. This makes it possible to implement a  
6449 - <classname>StreamDataProvider</classname> that calls  
6450 - <function>pipeStreamData</function> on another stream and to  
6451 - pass the response back to the caller, which enables better  
6452 - error handling on those proxied streams.  
6453 - </para>  
6454 - </listitem>  
6455 - <listitem>  
6456 - <para>  
6457 - Update <function>QPDFObjectHandle::pipeStreamData</function>  
6458 - to return an overall success code that goes beyond whether  
6459 - or not filtered data was written successfully. This allows  
6460 - better error handling of cases that were not filtering  
6461 - errors. You have to call this explicitly. Methods in  
6462 - previously existing APIs have the same semantics as before.  
6463 - </para>  
6464 - </listitem>  
6465 - <listitem>  
6466 - <para>  
6467 - The  
6468 - <function>QPDFPageObjectHelper::placeFormXObject</function>  
6469 - method now allows separate control over whether it should be  
6470 - willing to shrink or expand objects to fit them better into  
6471 - the destination rectangle. The previous behavior was that  
6472 - shrinking was allowed but expansion was not. The previous  
6473 - behavior is still the default.  
6474 - </para>  
6475 - </listitem>  
6476 - <listitem>  
6477 - <para>  
6478 - When calling the C API, any non-zero value passed to a  
6479 - boolean parameter is treated as <literal>TRUE</literal>.  
6480 - Previously only the value <literal>1</literal> was accepted.  
6481 - This makes the C API behave more like most C interfaces and  
6482 - is known to improve compatibility with some Windows  
6483 - environments that dynamically load the DLL and call  
6484 - functions from it.  
6485 - </para>  
6486 - </listitem>  
6487 - <listitem>  
6488 - <para>  
6489 - Add <function>QPDFObjectHandle::unsafeShallowCopy</function>  
6490 - for copying only top-level dictionary keys or array items.  
6491 - This is unsafe because it creates a situation in which  
6492 - changing a lower-level item in one object may also change it  
6493 - in another object, but for cases in which you  
6494 - <emphasis>know</emphasis> you are only inserting or  
6495 - replacing top-level items, it is much faster than  
6496 - <function>QPDFObjectHandle::shallowCopy</function>.  
6497 - </para>  
6498 - </listitem>  
6499 - <listitem>  
6500 - <para>  
6501 - Add <function>QPDFObjectHandle::filterAsContents</function>,  
6502 - which filter's a stream's data as a content stream. This is  
6503 - useful for parsing the contents for form XObjects in the  
6504 - same way as parsing page content streams.  
6505 - </para>  
6506 - </listitem>  
6507 - </itemizedlist>  
6508 - </listitem>  
6509 - <listitem>  
6510 - <para>  
6511 - Bug Fixes  
6512 - </para>  
6513 - <itemizedlist>  
6514 - <listitem>  
6515 - <para>  
6516 - When detecting and removing unreferenced resources during  
6517 - page splitting, traverse into form XObjects and handle their  
6518 - resources dictionaries as well.  
6519 - </para>  
6520 - </listitem>  
6521 - <listitem>  
6522 - <para>  
6523 - The same error recovery is applied to streams in other than  
6524 - the primary input file when merging or splitting pages.  
6525 - </para>  
6526 - </listitem>  
6527 - </itemizedlist>  
6528 - </listitem>  
6529 - </itemizedlist>  
6530 - </listitem>  
6531 - </varlistentry>  
6532 - <varlistentry>  
6533 - <term>9.1.1: January 26, 2020</term>  
6534 - <listitem>  
6535 - <itemizedlist>  
6536 - <listitem>  
6537 - <para>  
6538 - Build/Packaging Changes  
6539 - </para>  
6540 - <itemizedlist>  
6541 - <listitem>  
6542 - <para>  
6543 - The fix-qdf program was converted from perl to C++. As such,  
6544 - qpdf no longer has a runtime dependency on perl.  
6545 - </para>  
6546 - </listitem>  
6547 - </itemizedlist>  
6548 - </listitem>  
6549 - <listitem>  
6550 - <para>  
6551 - Library Enhancements  
6552 - </para>  
6553 - <itemizedlist>  
6554 - <listitem>  
6555 - <para>  
6556 - Added new helper routine  
6557 - <function>QUtil::call_main_from_wmain</function> which  
6558 - converts <type>wchar_t</type> arguments to UTF-8 encoded  
6559 - strings. This is useful for qpdf because library methods  
6560 - expect file names to be UTF-8 encoded, even on Windows  
6561 - </para>  
6562 - </listitem>  
6563 - <listitem>  
6564 - <para>  
6565 - Added new <function>QUtil::read_lines_from_file</function>  
6566 - methods that take <type>FILE*</type> arguments and that  
6567 - allow preservation of end-of-line characters. This also  
6568 - fixes a bug where  
6569 - <function>QUtil::read_lines_from_file</function> wouldn't  
6570 - work properly with Unicode filenames.  
6571 - </para>  
6572 - </listitem>  
6573 - </itemizedlist>  
6574 - </listitem>  
6575 - <listitem>  
6576 - <para>  
6577 - CLI Enhancements  
6578 - </para>  
6579 - <itemizedlist>  
6580 - <listitem>  
6581 - <para>  
6582 - Added options @1@option@1@--is-encrypted@2@option@2@ and  
6583 - @1@option@1@--requires-password@2@option@2@ for testing whether a  
6584 - file is encrypted or requires a password other than the  
6585 - supplied (or empty) password. These communicate via exit  
6586 - status, making them useful for shell scripts. They also work  
6587 - on encrypted files with unknown passwords.  
6588 - </para>  
6589 - </listitem>  
6590 - </itemizedlist>  
6591 - <itemizedlist>  
6592 - <listitem>  
6593 - <para>  
6594 - Added <literal>encrypt</literal> key to JSON options. With  
6595 - the exception of the reconstructed user password for older  
6596 - encryption formats, this provides the same information as  
6597 - @1@option@1@--show-encryption@2@option@2@ but in a consistent,  
6598 - parseable format. See output of @1@command@1@qpdf  
6599 - --json-help@2@command@2@ for details.  
6600 - </para>  
6601 - </listitem>  
6602 - </itemizedlist>  
6603 - </listitem>  
6604 - <listitem>  
6605 - <para>  
6606 - Bug Fixes  
6607 - </para>  
6608 - <itemizedlist>  
6609 - <listitem>  
6610 - <para>  
6611 - In QDF mode, be sure not to write more than one XRef stream  
6612 - to a file, even when  
6613 - @1@option@1@--preserve-unreferenced@2@option@2@ is used.  
6614 - @1@command@1@fix-qdf@2@command@2@ assumes that there is only one  
6615 - XRef stream, and that it appears at the end of the file.  
6616 - </para>  
6617 - </listitem>  
6618 - <listitem>  
6619 - <para>  
6620 - When externalizing inline images, properly handle images  
6621 - whose color space is a reference to an object in the page's  
6622 - resource dictionary.  
6623 - </para>  
6624 - </listitem>  
6625 - <listitem>  
6626 - <para>  
6627 - Windows-specific fix for acquiring crypt context with a new  
6628 - keyset.  
6629 - </para>  
6630 - </listitem>  
6631 - </itemizedlist>  
6632 - </listitem>  
6633 - </itemizedlist>  
6634 - </listitem>  
6635 - </varlistentry>  
6636 - <varlistentry>  
6637 - <term>9.1.0: November 17, 2019</term>  
6638 - <listitem>  
6639 - <itemizedlist>  
6640 - <listitem>  
6641 - <para>  
6642 - Build Changes  
6643 - </para>  
6644 - <itemizedlist>  
6645 - <listitem>  
6646 - <para>  
6647 - A C++-11 compiler is now required to build qpdf.  
6648 - </para>  
6649 - </listitem>  
6650 - <listitem>  
6651 - <para>  
6652 - A new crypto provider that uses gnutls for crypto functions  
6653 - is now available and can be enabled at build time. See <xref  
6654 - linkend="ref.crypto"/> for more information about crypto  
6655 - providers and <xref linkend="ref.crypto.build"/> for  
6656 - specific information about the build.  
6657 - </para>  
6658 - </listitem>  
6659 - </itemizedlist>  
6660 - </listitem>  
6661 - <listitem>  
6662 - <para>  
6663 - Library Enhancements  
6664 - </para>  
6665 - <itemizedlist>  
6666 - <listitem>  
6667 - <para>  
6668 - Incorporate contribution from Masamichi Hosoda to properly  
6669 - handle signature dictionaries by not including them in  
6670 - object streams, formatting the <literal>Contents</literal>  
6671 - key has a hexadecimal string, and excluding the  
6672 - <literal>/Contents</literal> key from encryption and  
6673 - decryption.  
6674 - </para>  
6675 - </listitem>  
6676 - <listitem>  
6677 - <para>  
6678 - Incorporate contribution from Masamichi Hosoda to provide  
6679 - new API calls for getting file-level information about  
6680 - input and output files, enabling certain operations on  
6681 - the files at the file level rather than the object level.  
6682 - New methods include  
6683 - <function>QPDF::getXRefTable()</function>,  
6684 - <function>QPDFObjectHandle::getParsedOffset()</function>,  
6685 - <function>QPDFWriter::getRenumberedObjGen(QPDFObjGen)</function>,  
6686 - and <function>QPDFWriter::getWrittenXRefTable()</function>.  
6687 - </para>  
6688 - </listitem>  
6689 - <listitem>  
6690 - <para>  
6691 - Support build-time and runtime selectable crypto providers.  
6692 - This includes the addition of new classes  
6693 - <classname>QPDFCryptoProvider</classname> and  
6694 - <classname>QPDFCryptoImpl</classname> and the recognition  
6695 - of the <literal>QPDF_CRYPTO_PROVIDER</literal> environment  
6696 - variable. Crypto providers are described in depth in <xref  
6697 - linkend="ref.crypto"/>.  
6698 - </para>  
6699 - </listitem>  
6700 - </itemizedlist>  
6701 - </listitem>  
6702 - </itemizedlist>  
6703 - <itemizedlist>  
6704 - <listitem>  
6705 - <para>  
6706 - CLI Enhancements  
6707 - </para>  
6708 - <itemizedlist>  
6709 - <listitem>  
6710 - <para>  
6711 - Addition of the @1@option@1@--show-crypto@2@option@2@ option in  
6712 - support of selectable crypto providers, as described in  
6713 - <xref linkend="ref.crypto"/>.  
6714 - </para>  
6715 - </listitem>  
6716 - <listitem>  
6717 - <para>  
6718 - Allow <literal>:even</literal> or <literal>:odd</literal> to  
6719 - be appended to numeric ranges for specification of the even  
6720 - or odd pages from among the pages specified in the range.  
6721 - </para>  
6722 - </listitem>  
6723 - <listitem>  
6724 - <para>  
6725 - Fix shell wildcard expansion behavior (<literal>*</literal>  
6726 - and <literal>?</literal>) of the @1@command@1@qpdf.exe@2@command@2@  
6727 - as built my MSVC.  
6728 - </para>  
6729 - </listitem>  
6730 - </itemizedlist>  
6731 - </listitem>  
6732 - </itemizedlist>  
6733 - </listitem>  
6734 - </varlistentry>  
6735 - <varlistentry>  
6736 - <term>9.0.2: October 12, 2019</term>  
6737 - <listitem>  
6738 - <itemizedlist>  
6739 - <listitem>  
6740 - <para>  
6741 - Bug Fix  
6742 - </para>  
6743 - <itemizedlist>  
6744 - <listitem>  
6745 - <para>  
6746 - Fix the name of the temporary file used by  
6747 - @1@option@1@--replace-input@2@option@2@ so that it doesn't require  
6748 - path splitting and works with paths include directories.  
6749 - </para>  
6750 - </listitem>  
6751 - </itemizedlist>  
6752 - </listitem>  
6753 - </itemizedlist>  
6754 - </listitem>  
6755 - </varlistentry>  
6756 - <varlistentry>  
6757 - <term>9.0.1: September 20, 2019</term>  
6758 - <listitem>  
6759 - <itemizedlist>  
6760 - <listitem>  
6761 - <para>  
6762 - Bug Fixes/Enhancements  
6763 - </para>  
6764 - <itemizedlist>  
6765 - <listitem>  
6766 - <para>  
6767 - Fix some build and test issues on big-endian systems and  
6768 - compilers with characters that are unsigned by default.  
6769 - The problems were in build and test only. There were no  
6770 - actual bugs in the qpdf library itself relating to  
6771 - endianness or unsigned characters.  
6772 - </para>  
6773 - </listitem>  
6774 - <listitem>  
6775 - <para>  
6776 - When a dictionary has a duplicated key, report this with a  
6777 - warning. The behavior of the library in this case is  
6778 - unchanged, but the error condition is no longer silently  
6779 - ignored.  
6780 - </para>  
6781 - </listitem>  
6782 - <listitem>  
6783 - <para>  
6784 - When a form field's display rectangle is erroneously  
6785 - specified with inverted coordinates, detect and correct this  
6786 - situation. This avoids some form fields from being flipped  
6787 - when flattening annotations on files with this condition.  
6788 - </para>  
6789 - </listitem>  
6790 - </itemizedlist>  
6791 - </listitem>  
6792 - </itemizedlist>  
6793 - </listitem>  
6794 - </varlistentry>  
6795 - <varlistentry>  
6796 - <term>9.0.0: August 31, 2019</term>  
6797 - <listitem>  
6798 - <itemizedlist>  
6799 - <listitem>  
6800 - <para>  
6801 - Incompatible API (source-level) Changes (minor)  
6802 - </para>  
6803 - <itemizedlist>  
6804 - <listitem>  
6805 - <para>  
6806 - The method <function>QUtil::strcasecmp</function> has been  
6807 - renamed to <function>QUtil::str_compare_nocase</function>.  
6808 - This incompatible change is necessary to enable qpdf to  
6809 - build on platforms that define  
6810 - <function>strcasecmp</function> as a macro.  
6811 - </para>  
6812 - </listitem>  
6813 - <listitem>  
6814 - <para>  
6815 - The <function>QPDF::copyForeignObject</function> method had  
6816 - an overloaded version that took a boolean parameter that was  
6817 - not used. If you were using this version, just omit the  
6818 - extra parameter.  
6819 - </para>  
6820 - </listitem>  
6821 - <listitem>  
6822 - <para>  
6823 - There was a version  
6824 - <function>QPDFTokenizer::expectInlineImage</function> that  
6825 - took no arguments. This version has been removed since it  
6826 - caused the tokenizer to return incorrect inline images. A  
6827 - new version was added some time ago that produces correct  
6828 - output. This is a very low level method that doesn't make  
6829 - sense to call outside of qpdf's lexical engine. There are  
6830 - higher level methods for tokenizing content streams.  
6831 - </para>  
6832 - </listitem>  
6833 - <listitem>  
6834 - <para>  
6835 - Change  
6836 - <function>QPDFOutlineDocumentHelper::getTopLevelOutlines</function>  
6837 - and <function>QPDFOutlineObjectHelper::getKids</function> to  
6838 - return a <type>std::vector</type> instead of a  
6839 - <type>std::list</type> of  
6840 - <classname>QPDFOutlineObjectHelper</classname> objects.  
6841 - </para>  
6842 - </listitem>  
6843 - <listitem>  
6844 - <para>  
6845 - Remove method  
6846 - <function>QPDFTokenizer::allowPoundAnywhereInName</function>.  
6847 - This function would allow creation of name tokens whose  
6848 - value would change when unparsed, which is never the correct  
6849 - behavior.  
6850 - </para>  
6851 - </listitem>  
6852 - </itemizedlist>  
6853 - </listitem>  
6854 - <listitem>  
6855 - <para>  
6856 - CLI Enhancements  
6857 - </para>  
6858 - <itemizedlist>  
6859 - <listitem>  
6860 - <para>  
6861 - The @1@option@1@--replace-input@2@option@2@ option may be given in  
6862 - place of an output file name. This causes qpdf to overwrite  
6863 - the input file with the output. See the description of  
6864 - @1@option@1@--replace-input@2@option@2@ in <xref  
6865 - linkend="ref.basic-options"/> for more details.  
6866 - </para>  
6867 - </listitem>  
6868 - <listitem>  
6869 - <para>  
6870 - The @1@option@1@--recompress-flate@2@option@2@ instructs  
6871 - @1@command@1@qpdf@2@command@2@ to recompress streams that are  
6872 - already compressed with <literal>/FlateDecode</literal>.  
6873 - Useful with @1@option@1@--compression-level@2@option@2@.  
6874 - </para>  
6875 - </listitem>  
6876 - <listitem>  
6877 - <para>  
6878 - The  
6879 - @1@option@1@--compression-level=@1@replaceable@1@level@2@replaceable@2@@2@option@2@  
6880 - sets the zlib compression level used for any streams  
6881 - compressed by <literal>/FlateDecode</literal>. Most  
6882 - effective when combined with  
6883 - @1@option@1@--recompress-flate@2@option@2@.  
6884 - </para>  
6885 - </listitem>  
6886 - </itemizedlist>  
6887 - </listitem>  
6888 - <listitem>  
6889 - <para>  
6890 - Library Enhancements  
6891 - </para>  
6892 - <itemizedlist>  
6893 - <listitem>  
6894 - <para>  
6895 - A new namespace <classname>QIntC</classname>, provided by  
6896 - @1@filename@1@qpdf/QIntC.hh@2@filename@2@, provides safe conversion  
6897 - methods between different integer types. These conversion  
6898 - methods do range checking to ensure that the cast can be  
6899 - performed with no loss of information. Every use of  
6900 - <function>static_cast</function> in the library was  
6901 - inspected to see if it could use one of these safe  
6902 - converters instead. See <xref linkend="ref.casting"/> for  
6903 - additional details.  
6904 - </para>  
6905 - </listitem>  
6906 - <listitem>  
6907 - <para>  
6908 - Method <function>QPDF::anyWarnings</function> tells whether  
6909 - there have been any warnings without clearing the list of  
6910 - warnings.  
6911 - </para>  
6912 - </listitem>  
6913 - <listitem>  
6914 - <para>  
6915 - Method <function>QPDF::closeInputSource</function> closes or  
6916 - otherwise releases the input source. This enables the input  
6917 - file to be deleted or renamed.  
6918 - </para>  
6919 - </listitem>  
6920 - <listitem>  
6921 - <para>  
6922 - New methods have been added to <classname>QUtil</classname>  
6923 - for converting back and forth between strings and unsigned  
6924 - integers: <function>uint_to_string</function>,  
6925 - <function>uint_to_string_base</function>,  
6926 - <function>string_to_uint</function>, and  
6927 - <function>string_to_ull</function>.  
6928 - </para>  
6929 - </listitem>  
6930 - <listitem>  
6931 - <para>  
6932 - New methods have been added to  
6933 - <classname>QPDFObjectHandle</classname> that return the  
6934 - value of <classname>Integer</classname> objects as  
6935 - <type>int</type> or <type>unsigned int</type> with range  
6936 - checking and sensible fallback values, and a new method was  
6937 - added to return an unsigned value. This makes it easier to  
6938 - write code that is safe from unintentional data loss.  
6939 - Functions: <function>getUIntValue</function>,  
6940 - <function>getIntValueAsInt</function>,  
6941 - <function>getUIntValueAsUInt</function>.  
6942 - </para>  
6943 - </listitem>  
6944 - <listitem>  
6945 - <para>  
6946 - When parsing content streams with  
6947 - <classname>QPDFObjectHandle::ParserCallbacks</classname>, in  
6948 - place of the method  
6949 - <function>handleObject(QPDFObjectHandle)</function>, the  
6950 - developer may override  
6951 - <function>handleObject(QPDFObjectHandle, size_t offset,  
6952 - size_t length)</function>. If this method is defined, it  
6953 - will be invoked with the object along with its offset and  
6954 - length within the overall contents being parsed. Intervening  
6955 - spaces and comments are not included in offset and length.  
6956 - Additionally, a new method  
6957 - <function>contentSize(size_t)</function> may be implemented.  
6958 - If present, it will be called prior to the first call to  
6959 - <function>handleObject</function> with the total size in  
6960 - bytes of the combined contents.  
6961 - </para>  
6962 - </listitem>  
6963 - <listitem>  
6964 - <para>  
6965 - New methods <function>QPDF::userPasswordMatched</function>  
6966 - and <function>QPDF::ownerPasswordMatched</function> have  
6967 - been added to enable a caller to determine whether the  
6968 - supplied password was the user password, the owner password,  
6969 - or both. This information is also displayed by @1@command@1@qpdf  
6970 - --show-encryption@2@command@2@ and @1@command@1@qpdf  
6971 - --check@2@command@2@.  
6972 - </para>  
6973 - </listitem>  
6974 - <listitem>  
6975 - <para>  
6976 - Static method  
6977 - <function>Pl_Flate::setCompressionLevel</function> can be  
6978 - called to set the zlib compression level globally used by  
6979 - all instances of Pl_Flate in deflate mode.  
6980 - </para>  
6981 - </listitem>  
6982 - <listitem>  
6983 - <para>  
6984 - The method  
6985 - <function>QPDFWriter::setRecompressFlate</function> can be  
6986 - called to tell <classname>QPDFWriter</classname> to  
6987 - uncompress and recompress streams already compressed with  
6988 - <literal>/FlateDecode</literal>.  
6989 - </para>  
6990 - </listitem>  
6991 - <listitem>  
6992 - <para>  
6993 - The underlying implementation of QPDF arrays has been  
6994 - enhanced to be much more memory efficient when dealing with  
6995 - arrays with lots of nulls. This enables qpdf to use  
6996 - drastically less memory for certain types of files.  
6997 - </para>  
6998 - </listitem>  
6999 - <listitem>  
7000 - <para>  
7001 - When traversing the pages tree, if nodes are encountered  
7002 - with invalid types, the types are fixed, and a warning is  
7003 - issued.  
7004 - </para>  
7005 - </listitem>  
7006 - <listitem>  
7007 - <para>  
7008 - A new helper method  
7009 - <function>QUtil::read_file_into_memory</function> was added.  
7010 - </para>  
7011 - </listitem>  
7012 - <listitem>  
7013 - <para>  
7014 - All conditions previously reported by  
7015 - <function>QPDF::checkLinearization()</function> as errors  
7016 - are now presented as warnings.  
7017 - </para>  
7018 - </listitem>  
7019 - <listitem>  
7020 - <para>  
7021 - Name tokens containing the <literal>#</literal> character  
7022 - not preceded by two hexadecimal digits, which is invalid in  
7023 - PDF 1.2 and above, are properly handled by the library: a  
7024 - warning is generated, and the name token is properly  
7025 - preserved, even if invalid, in the output. See  
7026 - @1@filename@1@ChangeLog@2@filename@2@ for a more complete  
7027 - description of this change.  
7028 - </para>  
7029 - </listitem>  
7030 - </itemizedlist>  
7031 - </listitem>  
7032 - <listitem>  
7033 - <para>  
7034 - Bug Fixes  
7035 - </para>  
7036 - <itemizedlist>  
7037 - <listitem>  
7038 - <para>  
7039 - A small handful of memory issues, assertion failures, and  
7040 - unhandled exceptions that could occur on badly mangled input  
7041 - files have been fixed. Most of these problems were found by  
7042 - Google's OSS-Fuzz project.  
7043 - </para>  
7044 - </listitem>  
7045 - <listitem>  
7046 - <para>  
7047 - When @1@command@1@qpdf --check@2@command@2@ or @1@command@1@qpdf  
7048 - --check-linearization@2@command@2@ encounters a file with  
7049 - linearization warnings but not errors, it now properly exits  
7050 - with exit code 3 instead of 2.  
7051 - </para>  
7052 - </listitem>  
7053 - <listitem>  
7054 - <para>  
7055 - The @1@option@1@--completion-bash@2@option@2@ and  
7056 - @1@option@1@--completion-zsh@2@option@2@ options now work properly  
7057 - when qpdf is invoked as an AppImage.  
7058 - </para>  
7059 - </listitem>  
7060 - <listitem>  
7061 - <para>  
7062 - Calling  
7063 - <function>QPDFWriter::set*EncryptionParameters</function> on  
7064 - a <classname>QPDFWriter</classname> object whose output  
7065 - filename has not yet been set no longer produces a  
7066 - segmentation fault.  
7067 - </para>  
7068 - </listitem>  
7069 - <listitem>  
7070 - <para>  
7071 - When reading encrypted files, follow the spec more closely  
7072 - regarding encryption key length. This allows qpdf to open  
7073 - encrypted files in most cases when they have invalid or  
7074 - missing /Length keys in the encryption dictionary.  
7075 - </para>  
7076 - </listitem>  
7077 - </itemizedlist>  
7078 - </listitem>  
7079 - <listitem>  
7080 - <para>  
7081 - Build Changes  
7082 - </para>  
7083 - <itemizedlist>  
7084 - <listitem>  
7085 - <para>  
7086 - On platforms that support it, qpdf now builds with  
7087 - @1@option@1@-fvisibility=hidden@2@option@2@. If you build qpdf with  
7088 - your own build system, this is now safe to use. This  
7089 - prevents methods that are not part of the public API from  
7090 - being exported by the shared library, and makes qpdf's ELF  
7091 - shared libraries (used on Linux, MacOS, and most other UNIX  
7092 - flavors) behave more like the Windows DLL. Since the DLL  
7093 - already behaves in much this way, it is unlikely that there  
7094 - are any methods that were accidentally not exported.  
7095 - However, with ELF shared libraries, typeinfo for some  
7096 - classes has to be explicitly exported. If there are problems  
7097 - in dynamically linked code catching exceptions or  
7098 - subclassing, this could be the reason. If you see this,  
7099 - please report a bug at <ulink  
7100 - url="https://github.com/qpdf/qpdf/issues/">https://github.com/qpdf/qpdf/issues/</ulink>.  
7101 - </para>  
7102 - </listitem>  
7103 - <listitem>  
7104 - <para>  
7105 - QPDF is now compiled with integer conversion and sign  
7106 - conversion warnings enabled. Numerous changes were made to  
7107 - the library to make this safe.  
7108 - </para>  
7109 - </listitem>  
7110 - <listitem>  
7111 - <para>  
7112 - QPDF's @1@command@1@make install@2@command@2@ target explicitly  
7113 - specifies the mode to use when installing files instead of  
7114 - relying the user's umask. It was previously doing this for  
7115 - some files but not others.  
7116 - </para>  
7117 - </listitem>  
7118 - <listitem>  
7119 - <para>  
7120 - If @1@command@1@pkg-config@2@command@2@ is available, use it to  
7121 - locate @1@filename@1@libjpeg@2@filename@2@ and  
7122 - @1@filename@1@zlib@2@filename@2@ dependencies, falling back on old  
7123 - behavior if unsuccessful.  
7124 - </para>  
7125 - </listitem>  
7126 - </itemizedlist>  
7127 - </listitem>  
7128 - <listitem>  
7129 - <para>  
7130 - Other Notes  
7131 - </para>  
7132 - <itemizedlist>  
7133 - <listitem>  
7134 - <para>  
7135 - QPDF has been fully integrated into <ulink  
7136 - url="https://github.com/google/oss-fuzz">Google's OSS-Fuzz  
7137 - project</ulink>. This project exercises code with randomly  
7138 - mutated inputs and is great for discovering hidden security  
7139 - crashes and security issues. Several bugs found by oss-fuzz  
7140 - have already been fixed in qpdf.  
7141 - </para>  
7142 - </listitem>  
7143 - </itemizedlist>  
7144 - </listitem>  
7145 - </itemizedlist>  
7146 - </listitem>  
7147 - </varlistentry>  
7148 - <varlistentry>  
7149 - <term>8.4.2: May 18, 2019</term>  
7150 - <listitem>  
7151 - <para>  
7152 - This release has just one change: correction of a buffer overrun  
7153 - in the Windows code used to open files. Windows users should  
7154 - take this update. There are no code changes that affect  
7155 - non-Windows releases.  
7156 - </para>  
7157 - </listitem>  
7158 - </varlistentry>  
7159 - <varlistentry>  
7160 - <term>8.4.1: April 27, 2019</term>  
7161 - <listitem>  
7162 - <itemizedlist>  
7163 - <listitem>  
7164 - <para>  
7165 - Enhancements  
7166 - </para>  
7167 - <itemizedlist>  
7168 - <listitem>  
7169 - <para>  
7170 - When @1@command@1@qpdf --version@2@command@2@ is run, it will  
7171 - detect if the qpdf CLI was built with a different version of  
7172 - qpdf than the library, which may indicate a problem with the  
7173 - installation.  
7174 - </para>  
7175 - </listitem>  
7176 - <listitem>  
7177 - <para>  
7178 - New option @1@option@1@--remove-page-labels@2@option@2@ will remove page  
7179 - labels before generating output. This used to happen if you  
7180 - ran @1@command@1@qpdf --empty --pages .. --@2@command@2@, but the  
7181 - behavior changed in qpdf 8.3.0. This option enables people  
7182 - who were relying on the old behavior to get it again.  
7183 - </para>  
7184 - </listitem>  
7185 - <listitem>  
7186 - <para>  
7187 - New option  
7188 - @1@option@1@--keep-files-open-threshold=@1@replaceable@1@count@2@replaceable@2@@2@option@2@  
7189 - can be used to override number of files that qpdf will use  
7190 - to trigger the behavior of not keeping all files open when  
7191 - merging files. This may be necessary if your system allows  
7192 - fewer than the default value of 200 files to be open at the  
7193 - same time.  
7194 - </para>  
7195 - </listitem>  
7196 - </itemizedlist>  
7197 - </listitem>  
7198 - <listitem>  
7199 - <para>  
7200 - Bug Fixes  
7201 - </para>  
7202 - <itemizedlist>  
7203 - <listitem>  
7204 - <para>  
7205 - Handle Unicode characters in filenames on Windows. The  
7206 - changes to support Unicode on the CLI in Windows broke  
7207 - Unicode filenames for Windows.  
7208 - </para>  
7209 - </listitem>  
7210 - <listitem>  
7211 - <para>  
7212 - Slightly tighten logic that determines whether an object is  
7213 - a page. This should resolve problems in some rare files  
7214 - where some non-page objects were passing qpdf's test for  
7215 - whether something was a page, thus causing them to be  
7216 - erroneously lost during page splitting operations.  
7217 - </para>  
7218 - </listitem>  
7219 - <listitem>  
7220 - <para>  
7221 - Revert change that included preservation of outlines  
7222 - (bookmarks) in @1@option@1@--split-pages@2@option@2@. The way it  
7223 - was implemented in 8.3.0 and 8.4.0 caused a very significant  
7224 - degradation of performance for splitting certain files. A  
7225 - future release of qpdf may re-introduce the behavior in a  
7226 - more performant and also more correct fashion.  
7227 - </para>  
7228 - </listitem>  
7229 - <listitem>  
7230 - <para>  
7231 - In JSON mode, add missing leading 0 to decimal values  
7232 - between -1 and 1 even if not present in the input. The JSON  
7233 - specification requires the leading 0. The PDF specification  
7234 - does not.  
7235 - </para>  
7236 - </listitem>  
7237 - </itemizedlist>  
7238 - </listitem>  
7239 - </itemizedlist>  
7240 - </listitem>  
7241 - </varlistentry>  
7242 - <varlistentry>  
7243 - <term>8.4.0: February 1, 2019</term>  
7244 - <listitem>  
7245 - <itemizedlist>  
7246 - <listitem>  
7247 - <para>  
7248 - Command-line Enhancements  
7249 - </para>  
7250 - <itemizedlist>  
7251 - <listitem>  
7252 - <para>  
7253 - <emphasis>Non-compatible CLI change:</emphasis> The qpdf  
7254 - command-line tool interprets passwords given at the  
7255 - command-line differently from previous releases when the  
7256 - passwords contain non-ASCII characters. In some cases, the  
7257 - behavior differs from previous releases. For a discussion of  
7258 - the current behavior, please see <xref  
7259 - linkend="ref.unicode-passwords"/>. The incompatibilities are  
7260 - as follows:  
7261 - <itemizedlist>  
7262 - <listitem>  
7263 - <para>  
7264 - On Windows, qpdf now receives all command-line options as  
7265 - Unicode strings if it can figure out the appropriate  
7266 - compile/link options. This is enabled at least for MSVC  
7267 - and mingw builds. That means that if non-ASCII strings  
7268 - are passed to the qpdf CLI in Windows, qpdf will now  
7269 - correctly receive them. In the past, they would have  
7270 - either been encoded as Windows code page 1252 (also known  
7271 - as "Windows ANSI" or as something  
7272 - unintelligible. In almost all cases, qpdf is able to  
7273 - properly interpret Unicode arguments now, whereas in the  
7274 - past, it would almost never interpret them properly. The  
7275 - result is that non-ASCII passwords given to the qpdf CLI  
7276 - on Windows now have a much greater chance of creating PDF  
7277 - files that can be opened by a variety of readers. In the  
7278 - past, usually files encrypted from the Windows CLI using  
7279 - non-ASCII passwords would not be readable by most  
7280 - viewers. Note that the current version of qpdf is able to  
7281 - decrypt files that it previously created using the  
7282 - previously supplied password.  
7283 - </para>  
7284 - </listitem>  
7285 - <listitem>  
7286 - <para>  
7287 - The PDF specification requires passwords to be encoded as  
7288 - UTF-8 for 256-bit encryption and with PDF Doc encoding  
7289 - for 40-bit or 128-bit encryption. Older versions of qpdf  
7290 - left it up to the user to provide passwords with the  
7291 - correct encoding. The qpdf CLI now detects when a  
7292 - password is given with UTF-8 encoding and automatically  
7293 - transcodes it to what the PDF spec requires. While this  
7294 - is almost always the correct behavior, it is possible to  
7295 - override the behavior if there is some reason to do so.  
7296 - This is discussed in more depth in <xref  
7297 - linkend="ref.unicode-passwords"/>.  
7298 - </para>  
7299 - </listitem>  
7300 - </itemizedlist>  
7301 - </para>  
7302 - </listitem>  
7303 - <listitem>  
7304 - <para>  
7305 - New options @1@option@1@--externalize-inline-images@2@option@2@,  
7306 - @1@option@1@--ii-min-bytes@2@option@2@, and  
7307 - @1@option@1@--keep-inline-images@2@option@2@ control qpdf's  
7308 - handling of inline images and possible conversion of them to  
7309 - regular images. By default,  
7310 - @1@option@1@--optimize-images@2@option@2@ now also applies to  
7311 - inline images. These options are discussed in <xref  
7312 - linkend="ref.advanced-transformation"/>.  
7313 - </para>  
7314 - </listitem>  
7315 - <listitem>  
7316 - <para>  
7317 - Add options @1@option@1@--overlay@2@option@2@ and  
7318 - @1@option@1@--underlay@2@option@2@ for overlaying or underlaying  
7319 - pages of other files onto output pages. See <xref  
7320 - linkend="ref.overlay-underlay"/> for details.  
7321 - </para>  
7322 - </listitem>  
7323 - <listitem>  
7324 - <para>  
7325 - When opening an encrypted file with a password, if the  
7326 - specified password doesn't work and the password contains  
7327 - any non-ASCII characters, qpdf will try a number of  
7328 - alternative passwords to try to compensate for possible  
7329 - character encoding errors. This behavior can be suppressed  
7330 - with the @1@option@1@--suppress-password-recovery@2@option@2@  
7331 - option. See <xref linkend="ref.unicode-passwords"/> for a  
7332 - full discussion.  
7333 - </para>  
7334 - </listitem>  
7335 - <listitem>  
7336 - <para>  
7337 - Add the @1@option@1@--password-mode@2@option@2@ option to fine-tune  
7338 - how qpdf interprets password arguments, especially when they  
7339 - contain non-ASCII characters. See <xref  
7340 - linkend="ref.unicode-passwords"/> for more information.  
7341 - </para>  
7342 - </listitem>  
7343 - <listitem>  
7344 - <para>  
7345 - In the @1@option@1@--pages@2@option@2@ option, it is now possible  
7346 - to copy the same page more than once from the same file  
7347 - without using the previous workaround of specifying two  
7348 - different paths to the same file.  
7349 - </para>  
7350 - </listitem>  
7351 - <listitem>  
7352 - <para>  
7353 - In the @1@option@1@--pages@2@option@2@ option, allow use of  
7354 - "." as a shortcut for the primary input file.  
7355 - That way, you can do @1@command@1@qpdf in.pdf --pages . 1-2 --  
7356 - out.pdf@2@command@2@ instead of having to repeat  
7357 - @1@filename@1@in.pdf@2@filename@2@ in the command.  
7358 - </para>  
7359 - </listitem>  
7360 - <listitem>  
7361 - <para>  
7362 - When encrypting with 128-bit and 256-bit encryption, new  
7363 - encryption options @1@option@1@--assemble@2@option@2@,  
7364 - @1@option@1@--annotate@2@option@2@, @1@option@1@--form@2@option@2@, and  
7365 - @1@option@1@--modify-other@2@option@2@ allow more fine-grained  
7366 - granularity in configuring options. Before, the  
7367 - @1@option@1@--modify@2@option@2@ option only configured certain  
7368 - predefined groups of permissions.  
7369 - </para>  
7370 - </listitem>  
7371 - </itemizedlist>  
7372 - </listitem>  
7373 - <listitem>  
7374 - <para>  
7375 - Bug Fixes and Enhancements  
7376 - </para>  
7377 - <itemizedlist>  
7378 - <listitem>  
7379 - <para>  
7380 - <emphasis>Potential data-loss bug:</emphasis> Versions of  
7381 - qpdf between 8.1.0 and 8.3.0 had a bug that could cause page  
7382 - splitting and merging operations to drop some font or image  
7383 - resources if the PDF file's internal structure shared these  
7384 - resource lists across pages and if some but not all of the  
7385 - pages in the output did not reference all the fonts and  
7386 - images. Using the  
7387 - @1@option@1@--preserve-unreferenced-resources@2@option@2@ option  
7388 - would work around the incorrect behavior. This bug was the  
7389 - result of a typo in the code and a deficiency in the test  
7390 - suite. The case that triggered the error was known, just not  
7391 - handled properly. This case is now exercised in qpdf's test  
7392 - suite and properly handled.  
7393 - </para>  
7394 - </listitem>  
7395 - <listitem>  
7396 - <para>  
7397 - When optimizing images, detect and refuse to optimize  
7398 - images that can't be converted to JPEG because of bit depth  
7399 - or color space.  
7400 - </para>  
7401 - </listitem>  
7402 - <listitem>  
7403 - <para>  
7404 - Linearization and page manipulation APIs now detect and  
7405 - recover from files that have duplicate Page objects in the  
7406 - pages tree.  
7407 - </para>  
7408 - </listitem>  
7409 - <listitem>  
7410 - <para>  
7411 - Using older option @1@option@1@--stream-data=compress@2@option@2@  
7412 - with object streams, object streams and xref streams were  
7413 - not compressed.  
7414 - </para>  
7415 - </listitem>  
7416 - <listitem>  
7417 - <para>  
7418 - When the tokenizer returns inline image tokens, delimiters  
7419 - following <literal>ID</literal> and <literal>EI</literal>  
7420 - operators are no longer excluded. This makes it possible to  
7421 - reliably extract the actual image data.  
7422 - </para>  
7423 - </listitem>  
7424 - </itemizedlist>  
7425 - </listitem>  
7426 - <listitem>  
7427 - <para>  
7428 - Library Enhancements  
7429 - </para>  
7430 - <itemizedlist>  
7431 - <listitem>  
7432 - <para>  
7433 - Add method  
7434 - <function>QPDFPageObjectHelper::externalizeInlineImages</function>  
7435 - to convert inline images to regular images.  
7436 - </para>  
7437 - </listitem>  
7438 - <listitem>  
7439 - <para>  
7440 - Add method  
7441 - <function>QUtil::possible_repaired_encodings()</function> to  
7442 - generate a list of strings that represent other ways the  
7443 - given string could have been encoded. This is the method the  
7444 - QPDF CLI uses to generate the strings it tries when  
7445 - recovering incorrectly encoded Unicode passwords.  
7446 - </para>  
7447 - </listitem>  
7448 - <listitem>  
7449 - <para>  
7450 - Add new versions of  
7451 - <function>QPDFWriter::setR{3,4,5,6}EncryptionParameters</function>  
7452 - that allow more granular setting of permissions bits. See  
7453 - @1@filename@1@QPDFWriter.hh@2@filename@2@ for details.  
7454 - </para>  
7455 - </listitem>  
7456 - <listitem>  
7457 - <para>  
7458 - Add new versions of the transcoders from UTF-8 to  
7459 - single-byte coding systems in <classname>QUtil</classname>  
7460 - that report success or failure rather than just substituting  
7461 - a specified unknown character.  
7462 - </para>  
7463 - </listitem>  
7464 - <listitem>  
7465 - <para>  
7466 - Add method <function>QUtil::analyze_encoding()</function> to  
7467 - determine whether a string has high-bit characters and is  
7468 - appears to be UTF-16 or valid UTF-8 encoding.  
7469 - </para>  
7470 - </listitem>  
7471 - <listitem>  
7472 - <para>  
7473 - Add new method  
7474 - <function>QPDFPageObjectHelper::shallowCopyPage()</function>  
7475 - to copy a new page that is a "shallow copy" of a  
7476 - page. The resulting object is an indirect object ready to be  
7477 - passed to  
7478 - <function>QPDFPageDocumentHelper::addPage()</function> for  
7479 - either the original <classname>QPDF</classname> object or a  
7480 - different one. This is what the @1@command@1@qpdf@2@command@2@  
7481 - command-line tool uses to copy the same page multiple times  
7482 - from the same file during splitting and merging operations.  
7483 - </para>  
7484 - </listitem>  
7485 - <listitem>  
7486 - <para>  
7487 - Add method <function>QPDF::getUniqueId()</function>, which  
7488 - returns a unique identifier for the given QPDF object. The  
7489 - identifier will be unique across the life of the  
7490 - application. The returned value can be safely used as a map  
7491 - key.  
7492 - </para>  
7493 - </listitem>  
7494 - <listitem>  
7495 - <para>  
7496 - Add method <function>QPDF::setImmediateCopyFrom</function>.  
7497 - This further enhances qpdf's ability to allow a  
7498 - <classname>QPDF</classname> object from which objects are  
7499 - being copied to go out of scope before the destination  
7500 - object is written. If you call this method on a  
7501 - <classname>QPDF</classname> instances, objects copied  
7502 - <emphasis>from</emphasis> this instance will be copied  
7503 - immediately instead of lazily. This option uses more memory  
7504 - but allows the source object to go out of scope before the  
7505 - destination object is written in all cases. See comments in  
7506 - @1@filename@1@QPDF.hh@2@filename@2@ for details.  
7507 - </para>  
7508 - </listitem>  
7509 - <listitem>  
7510 - <para>  
7511 - Add method  
7512 - <function>QPDFPageObjectHelper::getAttribute</function> for  
7513 - retrieving an attribute from the page dictionary taking  
7514 - inheritance into consideration, and optionally making a copy  
7515 - if your intention is to modify the attribute.  
7516 - </para>  
7517 - </listitem>  
7518 - <listitem>  
7519 - <para>  
7520 - Fix long-standing limitation of  
7521 - <function>QPDFPageObjectHelper::getPageImages</function> so  
7522 - that it now properly reports images from inherited resources  
7523 - dictionaries, eliminating the need to call  
7524 - <function>QPDFPageDocumentHelper::pushInheritedAttributesToPage</function>  
7525 - in this case.  
7526 - </para>  
7527 - </listitem>  
7528 - <listitem>  
7529 - <para>  
7530 - Add method  
7531 - <function>QPDFObjectHandle::getUniqueResourceName</function>  
7532 - for finding an unused name in a resource dictionary.  
7533 - </para>  
7534 - </listitem>  
7535 - <listitem>  
7536 - <para>  
7537 - Add method  
7538 - <function>QPDFPageObjectHelper::getFormXObjectForPage</function>  
7539 - for generating a form XObject equivalent to a page. The  
7540 - resulting object can be used in the same file or copied to  
7541 - another file with <function>copyForeignObject</function>.  
7542 - This can be useful for implementing underlay, overlay, n-up,  
7543 - thumbnails, or any other functionality requiring replication  
7544 - of pages in other contexts.  
7545 - </para>  
7546 - </listitem>  
7547 - <listitem>  
7548 - <para>  
7549 - Add method  
7550 - <function>QPDFPageObjectHelper::placeFormXObject</function>  
7551 - for generating content stream text that places a given form  
7552 - XObject on a page, centered and fit within a specified  
7553 - rectangle. This method takes care of computing the proper  
7554 - transformation matrix and may optionally compensate for  
7555 - rotation or scaling of the destination page.  
7556 - </para>  
7557 - </listitem>  
7558 - </itemizedlist>  
7559 - </listitem>  
7560 - <listitem>  
7561 - <para>  
7562 - Build Improvements  
7563 - </para>  
7564 - <itemizedlist>  
7565 - <listitem>  
7566 - <para>  
7567 - Add new configure option  
7568 - @1@option@1@--enable-avoid-windows-handle@2@option@2@, which causes  
7569 - the preprocessor symbol  
7570 - <literal>AVOID_WINDOWS_HANDLE</literal> to be defined. When  
7571 - defined, qpdf will avoid referencing the Windows  
7572 - <classname>HANDLE</classname> type, which is disallowed with  
7573 - certain versions of the Windows SDK.  
7574 - </para>  
7575 - </listitem>  
7576 - <listitem>  
7577 - <para>  
7578 - For Windows builds, attempt to determine what options, if  
7579 - any, have to be passed to the compiler and linker to enable  
7580 - use of <function>wmain</function>. This causes the  
7581 - preprocessor symbol <literal>WINDOWS_WMAIN</literal> to be  
7582 - defined. If you do your own builds with other compilers, you  
7583 - can define this symbol to cause <function>wmain</function>  
7584 - to be used. This is needed to allow the Windows  
7585 - @1@command@1@qpdf@2@command@2@ command to receive Unicode  
7586 - command-line options.  
7587 - </para>  
7588 - </listitem>  
7589 - </itemizedlist>  
7590 - </listitem>  
7591 - </itemizedlist>  
7592 - </listitem>  
7593 - </varlistentry>  
7594 - <varlistentry>  
7595 - <term>8.3.0: January 7, 2019</term>  
7596 - <listitem>  
7597 - <itemizedlist>  
7598 - <listitem>  
7599 - <para>  
7600 - Command-line Enhancements  
7601 - </para>  
7602 - <itemizedlist>  
7603 - <listitem>  
7604 - <para>  
7605 - Shell completion: you can now use eval @1@command@1@$(qpdf  
7606 - --completion-bash)@2@command@2@ and eval @1@command@1@$(qpdf  
7607 - --completion-zsh)@2@command@2@ to enable shell completion for  
7608 - bash and zsh.  
7609 - </para>  
7610 - </listitem>  
7611 - <listitem>  
7612 - <para>  
7613 - Page numbers (also known as page labels) are now preserved  
7614 - when merging and splitting files with the  
7615 - @1@option@1@--pages@2@option@2@ and @1@option@1@--split-pages@2@option@2@  
7616 - options.  
7617 - </para>  
7618 - </listitem>  
7619 - <listitem>  
7620 - <para>  
7621 - Bookmarks are partially preserved when splitting pages with  
7622 - the @1@option@1@--split-pages@2@option@2@ option. Specifically, the  
7623 - outlines dictionary and some supporting metadata are copied  
7624 - into the split files. The result is that all bookmarks from  
7625 - the original file appear, those that point to pages that are  
7626 - preserved work, and those that point to pages that are not  
7627 - preserved don't do anything. This is an interim step toward  
7628 - proper support for bookmarks in splitting and merging  
7629 - operations.  
7630 - </para>  
7631 - </listitem>  
7632 - <listitem>  
7633 - <para>  
7634 - Page collation: add new option @1@option@1@--collate@2@option@2@.  
7635 - When specified, the semantics of @1@option@1@--pages@2@option@2@  
7636 - change from concatenation to collation. See <xref  
7637 - linkend="ref.page-selection"/> for examples and discussion.  
7638 - </para>  
7639 - </listitem>  
7640 - <listitem>  
7641 - <para>  
7642 - Generation of information in JSON format, primarily to  
7643 - facilitate use of qpdf from languages other than C++. Add  
7644 - new options @1@option@1@--json@2@option@2@,  
7645 - @1@option@1@--json-key@2@option@2@, and  
7646 - @1@option@1@--json-object@2@option@2@ to generate a JSON  
7647 - representation of the PDF file. Run @1@command@1@qpdf  
7648 - --json-help@2@command@2@ to get a description of the JSON  
7649 - format. For more information, see <xref linkend="ref.json"/>.  
7650 - </para>  
7651 - </listitem>  
7652 - <listitem>  
7653 - <para>  
7654 - The @1@option@1@--generate-appearances@2@option@2@ flag will cause  
7655 - qpdf to generate appearances for form fields if the PDF file  
7656 - indicates that form field appearances are out of date. This  
7657 - can happen when PDF forms are filled in by a program that  
7658 - doesn't know how to regenerate the appearances of the  
7659 - filled-in fields.  
7660 - </para>  
7661 - </listitem>  
7662 - <listitem>  
7663 - <para>  
7664 - The @1@option@1@--flatten-annotations@2@option@2@ flag can be used  
7665 - to <emphasis>flatten</emphasis> annotations, including form  
7666 - fields. Ordinarily, annotations are drawn separately from  
7667 - the page. Flattening annotations is the process of combining  
7668 - their appearances into the page's contents. You might want  
7669 - to do this if you are going to rotate or combine pages using  
7670 - a tool that doesn't understand about annotations. You may  
7671 - also want to use @1@option@1@--generate-appearances@2@option@2@  
7672 - when using this flag since annotations for outdated form  
7673 - fields are not flattened as that would cause loss of  
7674 - information.  
7675 - </para>  
7676 - </listitem>  
7677 - <listitem>  
7678 - <para>  
7679 - The @1@option@1@--optimize-images@2@option@2@ flag tells qpdf to  
7680 - recompresses every image using DCT (JPEG) compression as  
7681 - long as the image is not already compressed with lossy  
7682 - compression and recompressing the image reduces its size.  
7683 - The additional options @1@option@1@--oi-min-width@2@option@2@,  
7684 - @1@option@1@--oi-min-height@2@option@2@, and  
7685 - @1@option@1@--oi-min-area@2@option@2@ prevent recompression of  
7686 - images whose width, height, or pixel area  
7687 - (widthย &#xd7;ย height) are below a specified  
7688 - threshold.  
7689 - </para>  
7690 - </listitem>  
7691 - <listitem>  
7692 - <para>  
7693 - The @1@option@1@--show-object@2@option@2@ option can now be given  
7694 - as @1@option@1@--show-object=trailer@2@option@2@ to show the  
7695 - trailer dictionary.  
7696 - </para>  
7697 - </listitem>  
7698 - </itemizedlist>  
7699 - </listitem>  
7700 - <listitem>  
7701 - <para>  
7702 - Bug Fixes and Enhancements  
7703 - </para>  
7704 - <itemizedlist>  
7705 - <listitem>  
7706 - <para>  
7707 - QPDF now automatically detects and recovers from dangling  
7708 - references. If a PDF file contained an indirect reference to  
7709 - a non-existent object, which is valid, when adding a new  
7710 - object to the file, it was possible for the new object to  
7711 - take the object ID of the dangling reference, thereby  
7712 - causing the dangling reference to point to the new object.  
7713 - This case is now prevented.  
7714 - </para>  
7715 - </listitem>  
7716 - <listitem>  
7717 - <para>  
7718 - Fixes to form field setting code: strings are always written  
7719 - in UTF-16 format, and checkboxes and radio buttons are  
7720 - handled properly with respect to synchronization of values  
7721 - and appearance states.  
7722 - </para>  
7723 - </listitem>  
7724 - <listitem>  
7725 - <para>  
7726 - The <function>QPDF::checkLinearization()</function> no  
7727 - longer causes the program to crash when it detects problems  
7728 - with linearization data. Instead, it issues a normal warning  
7729 - or error.  
7730 - </para>  
7731 - </listitem>  
7732 - <listitem>  
7733 - <para>  
7734 - Ordinarily qpdf treats an argument of the form  
7735 - @1@option@1@@file@2@option@2@ to mean that command-line options  
7736 - should be read from @1@filename@1@file@2@filename@2@. Now, if  
7737 - @1@filename@1@file@2@filename@2@ does not exist but  
7738 - @1@filename@1@@file@2@filename@2@ does, qpdf will treat  
7739 - @1@filename@1@@file@2@filename@2@ as a regular option. This makes  
7740 - it possible to work more easily with PDF files whose names  
7741 - happen to start with the <literal>@</literal> character.  
7742 - </para>  
7743 - </listitem>  
7744 - </itemizedlist>  
7745 - </listitem>  
7746 - <listitem>  
7747 - <para>  
7748 - Library Enhancements  
7749 - </para>  
7750 - <itemizedlist>  
7751 - <listitem>  
7752 - <para>  
7753 - Remove the restriction in most cases that the source QPDF  
7754 - object used in a  
7755 - <function>QPDF::copyForeignObject</function> call has to  
7756 - stick around until the destination QPDF is written. The  
7757 - exceptional case is when the source stream gets is data  
7758 - using a QPDFObjectHandle::StreamDataProvider. For a more  
7759 - in-depth discussion, see comments around  
7760 - <function>copyForeignObject</function> in  
7761 - @1@filename@1@QPDF.hh@2@filename@2@.  
7762 - </para>  
7763 - </listitem>  
7764 - <listitem>  
7765 - <para>  
7766 - Add new method  
7767 - <function>QPDFWriter::getFinalVersion()</function>, which  
7768 - returns the PDF version that will ultimately be written to  
7769 - the final file. See comments in  
7770 - @1@filename@1@QPDFWriter.hh@2@filename@2@ for some restrictions on  
7771 - its use.  
7772 - </para>  
7773 - </listitem>  
7774 - <listitem>  
7775 - <para>  
7776 - Add several methods for transcoding strings to some of the  
7777 - character sets used in PDF files:  
7778 - <function>QUtil::utf8_to_ascii</function>,  
7779 - <function>QUtil::utf8_to_win_ansi</function>,  
7780 - <function>QUtil::utf8_to_mac_roman</function>, and  
7781 - <function>QUtil::utf8_to_utf16</function>. For the  
7782 - single-byte encodings that support only a limited character  
7783 - sets, these methods replace unsupported characters with a  
7784 - specified substitute.  
7785 - </para>  
7786 - </listitem>  
7787 - <listitem>  
7788 - <para>  
7789 - Add new methods to  
7790 - <classname>QPDFAnnotationObjectHelper</classname> and  
7791 - <classname>QPDFFormFieldObjectHelper</classname> for  
7792 - querying flags and interpretation of different field types.  
7793 - Define constants in @1@filename@1@qpdf/Constants.h@2@filename@2@ to  
7794 - help with interpretation of flag values.  
7795 - </para>  
7796 - </listitem>  
7797 - <listitem>  
7798 - <para>  
7799 - Add new methods  
7800 - <function>QPDFAcroFormDocumentHelper::generateAppearancesIfNeeded</function>  
7801 - and  
7802 - <function>QPDFFormFieldObjectHelper::generateAppearance</function>  
7803 - for generating appearance streams. See discussion in  
7804 - @1@filename@1@QPDFFormFieldObjectHelper.hh@2@filename@2@ for  
7805 - limitations.  
7806 - </para>  
7807 - </listitem>  
7808 - <listitem>  
7809 - <para>  
7810 - Add two new helper functions for dealing with resource  
7811 - dictionaries:  
7812 - <function>QPDFObjectHandle::getResourceNames()</function>  
7813 - returns a list of all second-level keys, which correspond to  
7814 - the names of resources, and  
7815 - <function>QPDFObjectHandle::mergeResources()</function>  
7816 - merges two resources dictionaries as long as they have  
7817 - non-conflicting keys. These methods are useful for certain  
7818 - types of objects that resolve resources from multiple places,  
7819 - such as form fields.  
7820 - </para>  
7821 - </listitem>  
7822 - <listitem>  
7823 - <para>  
7824 - Add methods  
7825 - <function>QPDFPageDocumentHelper::flattenAnnotations()</function>  
7826 - and  
7827 - <function>QPDFAnnotationObjectHelper::getPageContentForAppearance()</function>  
7828 - for handling low-level details of annotation flattening.  
7829 - </para>  
7830 - </listitem>  
7831 - <listitem>  
7832 - <para>  
7833 - Add new helper classes:  
7834 - <classname>QPDFOutlineDocumentHelper</classname>,  
7835 - <classname>QPDFOutlineObjectHelper</classname>,  
7836 - <classname>QPDFPageLabelDocumentHelper</classname>,  
7837 - <classname>QPDFNameTreeObjectHelper</classname>, and  
7838 - <classname>QPDFNumberTreeObjectHelper</classname>.  
7839 - </para>  
7840 - </listitem>  
7841 - <listitem>  
7842 - <para>  
7843 - Add method <function>QPDFObjectHandle::getJSON()</function>  
7844 - that returns a JSON representation of the object. Call  
7845 - <function>serialize()</function> on the result to convert it  
7846 - to a string.  
7847 - </para>  
7848 - </listitem>  
7849 - <listitem>  
7850 - <para>  
7851 - Add a simple JSON serializer. This is not a complete or  
7852 - general-purpose JSON library. It allows assembly and  
7853 - serialization of JSON structures with some restrictions,  
7854 - which are described in the header file. This is the  
7855 - serializer used by qpdf's new JSON representation.  
7856 - </para>  
7857 - </listitem>  
7858 - <listitem>  
7859 - <para>  
7860 - Add new <classname>QPDFObjectHandle::Matrix</classname>  
7861 - class along with a few convenience methods for dealing with  
7862 - six-element numerical arrays as matrices.  
7863 - </para>  
7864 - </listitem>  
7865 - <listitem>  
7866 - <para>  
7867 - Add new method  
7868 - <function>QPDFObjectHandle::wrapInArray</function>, which returns  
7869 - the object itself if it is an array, or an array containing  
7870 - the object otherwise. This is a common construct in PDF.  
7871 - This method prevents you from having to explicitly test  
7872 - whether something is a single element or an array.  
7873 - </para>  
7874 - </listitem>  
7875 - </itemizedlist>  
7876 - </listitem>  
7877 - <listitem>  
7878 - <para>  
7879 - Build Improvements  
7880 - </para>  
7881 - <itemizedlist>  
7882 - <listitem>  
7883 - <para>  
7884 - It is no longer necessary to run  
7885 - @1@command@1@autogen.sh@2@command@2@ to build from a pristine  
7886 - checkout. Automatically generated files are now committed so  
7887 - that it is possible to build on platforms without autoconf  
7888 - directly from a clean checkout of the repository. The  
7889 - @1@command@1@configure@2@command@2@ script detects if the files are  
7890 - out of date when it also determines that the tools are  
7891 - present to regenerate them.  
7892 - </para>  
7893 - </listitem>  
7894 - <listitem>  
7895 - <para>  
7896 - Pull requests and the master branch are now built  
7897 - automatically in <ulink  
7898 - url="https://dev.azure.com/qpdf/qpdf/_build">Azure  
7899 - Pipelines</ulink>, which is free for open source projects.  
7900 - The build includes Linux, mac, Windows 32-bit and 64-bit  
7901 - with mingw and MSVC, and an AppImage build. Official qpdf  
7902 - releases are now built with Azure Pipelines.  
7903 - </para>  
7904 - </listitem>  
7905 - </itemizedlist>  
7906 - </listitem>  
7907 - <listitem>  
7908 - <para>  
7909 - Notes for Packagers  
7910 - </para>  
7911 - <itemizedlist>  
7912 - <listitem>  
7913 - <para>  
7914 - A new section has been added to the documentation with notes  
7915 - for packagers. Please see <xref linkend="ref.packaging"/>.  
7916 - </para>  
7917 - </listitem>  
7918 - <listitem>  
7919 - <para>  
7920 - The qpdf detects out-of-date automatically generated files.  
7921 - If your packaging system automatically refreshes libtool or  
7922 - autoconf files, it could cause this check to fail. To avoid  
7923 - this problem, pass  
7924 - @1@option@1@--disable-check-autofiles@2@option@2@ to  
7925 - @1@command@1@configure@2@command@2@.  
7926 - </para>  
7927 - </listitem>  
7928 - <listitem>  
7929 - <para>  
7930 - If you would like to have qpdf completion enabled  
7931 - automatically, you can install completion files in the  
7932 - distribution's default location. You can find sample  
7933 - completion files to install in the  
7934 - @1@filename@1@completions@2@filename@2@ directory.  
7935 - </para>  
7936 - </listitem>  
7937 - </itemizedlist>  
7938 - </listitem>  
7939 - </itemizedlist>  
7940 - </listitem>  
7941 - </varlistentry>  
7942 - <varlistentry>  
7943 - <term>8.2.1: August 18, 2018</term>  
7944 - <listitem>  
7945 - <itemizedlist>  
7946 - <listitem>  
7947 - <para>  
7948 - Command-line Enhancements  
7949 - </para>  
7950 - <itemizedlist>  
7951 - <listitem>  
7952 - <para>  
7953 - Add  
7954 - @1@option@1@--keep-files-open=@1@replaceable@1@[yn]@2@replaceable@2@@2@option@2@  
7955 - to override default determination of whether to keep files  
7956 - open when merging. Please see the discussion of  
7957 - @1@option@1@--keep-files-open@2@option@2@ in <xref  
7958 - linkend="ref.basic-options"/> for additional details.  
7959 - </para>  
7960 - </listitem>  
7961 - </itemizedlist>  
7962 - </listitem>  
7963 - </itemizedlist>  
7964 - </listitem>  
7965 - </varlistentry>  
7966 - <varlistentry>  
7967 - <term>8.2.0: August 16, 2018</term>  
7968 - <listitem>  
7969 - <itemizedlist>  
7970 - <listitem>  
7971 - <para>  
7972 - Command-line Enhancements  
7973 - </para>  
7974 - <itemizedlist>  
7975 - <listitem>  
7976 - <para>  
7977 - Add @1@option@1@--no-warn@2@option@2@ option to suppress issuing  
7978 - warning messages. If there are any conditions that would  
7979 - have caused warnings to be issued, the exit status is still  
7980 - 3.  
7981 - </para>  
7982 - </listitem>  
7983 - </itemizedlist>  
7984 - </listitem>  
7985 - <listitem>  
7986 - <para>  
7987 - Bug Fixes and Optimizations  
7988 - </para>  
7989 - <itemizedlist>  
7990 - <listitem>  
7991 - <para>  
7992 - Performance fix: optimize page merging operation to avoid  
7993 - unnecessary open/close calls on files being merged. This  
7994 - solves a dramatic slow-down that was observed when merging  
7995 - certain types of files.  
7996 - </para>  
7997 - </listitem>  
7998 - <listitem>  
7999 - <para>  
8000 - Optimize how memory was used for the TIFF predictor,  
8001 - drastically improving performance and memory usage for files  
8002 - containing high-resolution images compressed with Flate  
8003 - using the TIFF predictor.  
8004 - </para>  
8005 - </listitem>  
8006 - <listitem>  
8007 - <para>  
8008 - Bug fix: end of line characters were not properly handled  
8009 - inside strings in some cases.  
8010 - </para>  
8011 - </listitem>  
8012 - <listitem>  
8013 - <para>  
8014 - Bug fix: using @1@option@1@--progress@2@option@2@ on very small  
8015 - files could cause an infinite loop.  
8016 - </para>  
8017 - </listitem>  
8018 - </itemizedlist>  
8019 - </listitem>  
8020 - <listitem>  
8021 - <para>  
8022 - API enhancements  
8023 - </para>  
8024 - <itemizedlist>  
8025 - <listitem>  
8026 - <para>  
8027 - Add new class <classname>QPDFSystemError</classname>, derived  
8028 - from <classname>std::runtime_error</classname>, which is now  
8029 - thrown by <function>QUtil::throw_system_error</function>.  
8030 - This enables the triggering <classname>errno</classname>  
8031 - value to be retrieved.  
8032 - </para>  
8033 - </listitem>  
8034 - <listitem>  
8035 - <para>  
8036 - Add <function>ClosedFileInputSource::stayOpen</function>  
8037 - method, enabling a  
8038 - <classname>ClosedFileInputSource</classname> to stay open  
8039 - during manually indicated periods of high activity, thus  
8040 - reducing the overhead of frequent open/close operations.  
8041 - </para>  
8042 - </listitem>  
8043 - </itemizedlist>  
8044 - </listitem>  
8045 - <listitem>  
8046 - <para>  
8047 - Build Changes  
8048 - </para>  
8049 - <itemizedlist>  
8050 - <listitem>  
8051 - <para>  
8052 - For the mingw builds, change the name of the DLL import  
8053 - library from @1@filename@1@libqpdf.a@2@filename@2@ to  
8054 - @1@filename@1@libqpdf.dll.a@2@filename@2@ to more accurately  
8055 - reflect that it is an import library rather than a static  
8056 - library. This potentially clears the way for supporting a  
8057 - static library in the future, though presently, the qpdf  
8058 - Windows build only builds the DLL and executables.  
8059 - </para>  
8060 - </listitem>  
8061 - </itemizedlist>  
8062 - </listitem>  
8063 - </itemizedlist>  
8064 - </listitem>  
8065 - </varlistentry>  
8066 - <varlistentry>  
8067 - <term>8.1.0: June 23, 2018</term>  
8068 - <listitem>  
8069 - <itemizedlist>  
8070 - <listitem>  
8071 - <para>  
8072 - Usability Improvements  
8073 - </para>  
8074 - <itemizedlist>  
8075 - <listitem>  
8076 - <para>  
8077 - When splitting files, qpdf detects fonts and images that the  
8078 - document metadata claims are referenced from a page but are  
8079 - not actually referenced and omits them from the output file.  
8080 - This change can cause a significant reduction in the size of  
8081 - split PDF files for files created by some software packages.  
8082 - In some cases, it can also make page splitting slower. Prior  
8083 - versions of qpdf would believe the document metadata and  
8084 - sometimes include all the images from all the other pages  
8085 - even though the pages were no longer present. In the  
8086 - unlikely event that the old behavior should be desired, or  
8087 - if you have a case where page splitting is very slow, the  
8088 - old behavior (and speed) can be enabled by specifying  
8089 - @1@option@1@--preserve-unreferenced-resources@2@option@2@. For  
8090 - additional details, please see <xref  
8091 - linkend="ref.advanced-transformation"/>.  
8092 - </para>  
8093 - </listitem>  
8094 - <listitem>  
8095 - <para>  
8096 - When merging multiple PDF files, qpdf no longer leaves all  
8097 - the files open. This makes it possible to merge numbers of  
8098 - files that may exceed the operating system's limit for the  
8099 - maximum number of open files.  
8100 - </para>  
8101 - </listitem>  
8102 - <listitem>  
8103 - <para>  
8104 - The @1@option@1@--rotate@2@option@2@ option's syntax has been  
8105 - extended to make the page range optional. If you specify  
8106 - @1@option@1@--rotate=@1@replaceable@1@angle@2@replaceable@2@@2@option@2@  
8107 - without specifying a page range, the rotation will be  
8108 - applied to all pages. This can be especially useful for  
8109 - adjusting a PDF created from a multi-page document that  
8110 - was scanned upside down.  
8111 - </para>  
8112 - </listitem>  
8113 - <listitem>  
8114 - <para>  
8115 - When merging multiple files, the @1@option@1@--verbose@2@option@2@  
8116 - option now prints information about each file as it operates  
8117 - on that file.  
8118 - </para>  
8119 - </listitem>  
8120 - <listitem>  
8121 - <para>  
8122 - When the @1@option@1@--progress@2@option@2@ option is specified,  
8123 - qpdf will print a running indicator of its best guess at how  
8124 - far through the writing process it is. Note that, as with  
8125 - all progress meters, it's an approximation. This option is  
8126 - implemented in a way that makes it useful for software that  
8127 - uses the qpdf library; see API Enhancements below.  
8128 - </para>  
8129 - </listitem>  
8130 - </itemizedlist>  
8131 - </listitem>  
8132 - <listitem>  
8133 - <para>  
8134 - Bug Fixes  
8135 - </para>  
8136 - <itemizedlist>  
8137 - <listitem>  
8138 - <para>  
8139 - Properly decrypt files that use revision 3 of the standard  
8140 - security handler but use 40 bit keys (even though revision 3  
8141 - supports 128-bit keys).  
8142 - </para>  
8143 - </listitem>  
8144 - <listitem>  
8145 - <para>  
8146 - Limit depth of nested data structures to prevent crashes  
8147 - from certain types of malformed (malicious) PDFs.  
8148 - </para>  
8149 - </listitem>  
8150 - <listitem>  
8151 - <para>  
8152 - In "newline before endstream" mode, insert the  
8153 - required extra newline before the  
8154 - <literal>endstream</literal> at the end of object streams.  
8155 - This one case was previously omitted.  
8156 - </para>  
8157 - </listitem>  
8158 - </itemizedlist>  
8159 - </listitem>  
8160 - <listitem>  
8161 - <para>  
8162 - API Enhancements  
8163 - </para>  
8164 - <itemizedlist>  
8165 - <listitem>  
8166 - <para>  
8167 - The first round of higher level "helper"  
8168 - interfaces has been introduced. These are designed to  
8169 - provide a more convenient way of interacting with certain  
8170 - document features than using  
8171 - <classname>QPDFObjectHandle</classname> directly. For  
8172 - details on helpers, see <xref  
8173 - linkend="ref.helper-classes"/>. Specific additional  
8174 - interfaces are described below.  
8175 - </para>  
8176 - </listitem>  
8177 - <listitem>  
8178 - <para>  
8179 - Add two new document helper classes:  
8180 - <classname>QPDFPageDocumentHelper</classname> for working  
8181 - with pages, and  
8182 - <classname>QPDFAcroFormDocumentHelper</classname> for  
8183 - working with interactive forms. No old methods have been  
8184 - removed, but <classname>QPDFPageDocumentHelper</classname>  
8185 - is now the preferred way to perform operations on pages  
8186 - rather than calling the old methods in  
8187 - <classname>QPDFObjectHandle</classname> and  
8188 - <classname>QPDF</classname> directly. Comments in the header  
8189 - files direct you to the new interfaces. Please see the  
8190 - header files and @1@filename@1@ChangeLog@2@filename@2@ for  
8191 - additional details.  
8192 - </para>  
8193 - </listitem>  
8194 - <listitem>  
8195 - <para>  
8196 - Add three new object helper class:  
8197 - <classname>QPDFPageObjectHelper</classname> for pages,  
8198 - <classname>QPDFFormFieldObjectHelper</classname> for  
8199 - interactive form fields, and  
8200 - <classname>QPDFAnnotationObjectHelper</classname> for  
8201 - annotations. All three classes are fairly sparse at the  
8202 - moment, but they have some useful, basic functionality.  
8203 - </para>  
8204 - </listitem>  
8205 - <listitem>  
8206 - <para>  
8207 - A new example program  
8208 - @1@filename@1@examples/pdf-set-form-values.cc@2@filename@2@ has  
8209 - been added that illustrates use of the new document and  
8210 - object helpers.  
8211 - </para>  
8212 - </listitem>  
8213 - <listitem>  
8214 - <para>  
8215 - The method  
8216 - <function>QPDFWriter::registerProgressReporter</function>  
8217 - has been added. This method allows you to register a  
8218 - function that is called by <classname>QPDFWriter</classname>  
8219 - to update your idea of the percentage it thinks it is  
8220 - through writing its output. Client programs can use this to  
8221 - implement reasonably accurate progress meters. The  
8222 - @1@command@1@qpdf@2@command@2@ command line tool uses this to  
8223 - implement its @1@option@1@--progress@2@option@2@ option.  
8224 - </para>  
8225 - </listitem>  
8226 - <listitem>  
8227 - <para>  
8228 - New methods  
8229 - <function>QPDFObjectHandle::newUnicodeString</function> and  
8230 - <function>QPDFObject::unparseBinary</function> have been  
8231 - added to allow for more convenient creation of strings that  
8232 - are explicitly encoded using big-endian UTF-16. This is  
8233 - useful for creating strings that appear outside of content  
8234 - streams, such as labels, form fields, outlines, document  
8235 - metadata, etc.  
8236 - </para>  
8237 - </listitem>  
8238 - <listitem>  
8239 - <para>  
8240 - A new class  
8241 - <classname>QPDFObjectHandle::Rectangle</classname> has been  
8242 - added to ease working with PDF rectangles, which are just  
8243 - arrays of four numeric values.  
8244 - </para>  
8245 - </listitem>  
8246 - </itemizedlist>  
8247 - </listitem>  
8248 - </itemizedlist>  
8249 - </listitem>  
8250 - </varlistentry>  
8251 - <varlistentry>  
8252 - <term>8.0.2: March 6, 2018</term>  
8253 - <listitem>  
8254 - <itemizedlist>  
8255 - <listitem>  
8256 - <para>  
8257 - When a loop is detected while following cross reference  
8258 - streams or tables, treat this as damage instead of silently  
8259 - ignoring the previous table. This prevents loss of otherwise  
8260 - recoverable data in some damaged files.  
8261 - </para>  
8262 - </listitem>  
8263 - </itemizedlist>  
8264 - <itemizedlist>  
8265 - <listitem>  
8266 - <para>  
8267 - Properly handle pages with no contents.  
8268 - </para>  
8269 - </listitem>  
8270 - </itemizedlist>  
8271 - </listitem>  
8272 - </varlistentry>  
8273 - <varlistentry>  
8274 - <term>8.0.1: March 4, 2018</term>  
8275 - <listitem>  
8276 - <itemizedlist>  
8277 - <listitem>  
8278 - <para>  
8279 - Disregard data check errors when uncompressing  
8280 - <literal>/FlateDecode</literal> streams. This is consistent  
8281 - with most other PDF readers and allows qpdf to recover data  
8282 - from another class of malformed PDF files.  
8283 - </para>  
8284 - </listitem>  
8285 - <listitem>  
8286 - <para>  
8287 - On the command line when specifying page ranges, support  
8288 - preceding a page number by "r" to indicate that it  
8289 - should be counted from the end. For example, the range  
8290 - <literal>r3-r1</literal> would indicate the last three pages  
8291 - of a document.  
8292 - </para>  
8293 - </listitem>  
8294 - </itemizedlist>  
8295 - </listitem>  
8296 - </varlistentry>  
8297 - <varlistentry>  
8298 - <term>8.0.0: February 25, 2018</term>  
8299 - <listitem>  
8300 - <itemizedlist>  
8301 - <listitem>  
8302 - <para>  
8303 - Packaging and Distribution Changes  
8304 - </para>  
8305 - <itemizedlist>  
8306 - <listitem>  
8307 - <para>  
8308 - QPDF is now distributed as an <ulink  
8309 - url="https://appimage.org/">AppImage</ulink> in addition to  
8310 - all the other ways it is distributed. The AppImage can be  
8311 - found in the download area with the other packages. Thanks  
8312 - to Kurt Pfeifle and Simon Peter for their contributions.  
8313 - </para>  
8314 - </listitem>  
8315 - </itemizedlist>  
8316 - </listitem>  
8317 - <listitem>  
8318 - <para>  
8319 - Bug Fixes  
8320 - </para>  
8321 - <itemizedlist>  
8322 - <listitem>  
8323 - <para>  
8324 - <function>QPDFObjectHandle::getUTF8Val</function> now  
8325 - properly treats non-Unicode strings as encoded with PDF Doc  
8326 - Encoding.  
8327 - </para>  
8328 - </listitem>  
8329 - <listitem>  
8330 - <para>  
8331 - Improvements to handling of objects in PDF files that are  
8332 - not of the expected type. In most cases, qpdf will be able  
8333 - to warn for such cases rather than fail with an exception.  
8334 - Previous versions of qpdf would sometimes fail with errors  
8335 - such as "operation for dictionary object attempted on  
8336 - object of wrong type". This situation should be mostly  
8337 - or entirely eliminated now.  
8338 - </para>  
8339 - </listitem>  
8340 - </itemizedlist>  
8341 - </listitem>  
8342 - <listitem>  
8343 - <para>  
8344 - Enhancements to the @1@command@1@qpdf@2@command@2@ Command-line Tool.  
8345 - All new options listed here are documented in more detail in  
8346 - <xref linkend="ref.using"/>.  
8347 - </para>  
8348 - <itemizedlist>  
8349 - <listitem>  
8350 - <para>  
8351 - The option  
8352 - @1@option@1@--linearize-pass1=@1@replaceable@1@file@2@replaceable@2@@2@option@2@  
8353 - has been added for debugging qpdf's linearization code.  
8354 - </para>  
8355 - </listitem>  
8356 - <listitem>  
8357 - <para>  
8358 - The option @1@option@1@--coalesce-contents@2@option@2@ can be used  
8359 - to combine content streams of a page whose contents are an  
8360 - array of streams into a single stream.  
8361 - </para>  
8362 - </listitem>  
8363 - </itemizedlist>  
8364 - </listitem>  
8365 - <listitem>  
8366 - <para>  
8367 - API Enhancements. All new API calls are documented in their  
8368 - respective classes' header files. There are no non-compatible  
8369 - changes to the API.  
8370 - </para>  
8371 - <itemizedlist>  
8372 - <listitem>  
8373 - <para>  
8374 - Add function <function>qpdf_check_pdf</function> to the C API.  
8375 - This function does basic checking that is a subset of what  
8376 - @1@command@1@qpdf --check@2@command@2@ performs.  
8377 - </para>  
8378 - </listitem>  
8379 - <listitem>  
8380 - <para>  
8381 - Major enhancements to the lexical layer of qpdf. For a  
8382 - complete list of enhancements, please refer to the  
8383 - @1@filename@1@ChangeLog@2@filename@2@ file. Most of the changes  
8384 - result in improvements to qpdf's ability handle erroneous  
8385 - files. It is also possible for programs to handle  
8386 - whitespace, comments, and inline images as tokens.  
8387 - </para>  
8388 - </listitem>  
8389 - <listitem>  
8390 - <para>  
8391 - New API for working with PDF content streams at a lexical  
8392 - level. The new class  
8393 - <classname>QPDFObjectHandle::TokenFilter</classname> allows  
8394 - the developer to provide token handlers. Token filters can be  
8395 - used with several different methods in  
8396 - <classname>QPDFObjectHandle</classname> as well as with a  
8397 - lower-level interface. See comments in  
8398 - @1@filename@1@QPDFObjectHandle.hh@2@filename@2@ as well as the new  
8399 - examples @1@filename@1@examples/pdf-filter-tokens.cc@2@filename@2@  
8400 - and @1@filename@1@examples/pdf-count-strings.cc@2@filename@2@ for  
8401 - details.  
8402 - </para>  
8403 - </listitem>  
8404 - </itemizedlist>  
8405 - </listitem>  
8406 - </itemizedlist>  
8407 - </listitem>  
8408 - </varlistentry>  
8409 - <varlistentry>  
8410 - <term>7.1.1: February 4, 2018</term>  
8411 - <listitem>  
8412 - <itemizedlist>  
8413 - <listitem>  
8414 - <para>  
8415 - Bug fix: files whose /ID fields were other than 16 bytes long  
8416 - can now be properly linearized  
8417 - </para>  
8418 - </listitem>  
8419 - <listitem>  
8420 - <para>  
8421 - A few compile and link issues have been corrected for some  
8422 - platforms.  
8423 - </para>  
8424 - </listitem>  
8425 - </itemizedlist>  
8426 - </listitem>  
8427 - </varlistentry>  
8428 - <varlistentry>  
8429 - <term>7.1.0: January 14, 2018</term>  
8430 - <listitem>  
8431 - <itemizedlist>  
8432 - <listitem>  
8433 - <para>  
8434 - PDF files contain streams that may be compressed with various  
8435 - compression algorithms which, in some cases, may be enhanced  
8436 - by various predictor functions. Previously only the PNG up  
8437 - predictor was supported. In this version, all the PNG  
8438 - predictors as well as the TIFF predictor are supported. This  
8439 - increases the range of files that qpdf is able to handle.  
8440 - </para>  
8441 - </listitem>  
8442 - <listitem>  
8443 - <para>  
8444 - QPDF now allows a raw encryption key to be specified in place  
8445 - of a password when opening encrypted files, and will  
8446 - optionally display the encryption key used by a file. This is  
8447 - a non-standard operation, but it can be useful in certain  
8448 - situations. Please see the discussion of  
8449 - @1@option@1@--password-is-hex-key@2@option@2@ in <xref  
8450 - linkend="ref.basic-options"/> or the comments around  
8451 - <function>QPDF::setPasswordIsHexKey</function> in  
8452 - @1@filename@1@QPDF.hh@2@filename@2@ for additional details.  
8453 - </para>  
8454 - </listitem>  
8455 - <listitem>  
8456 - <para>  
8457 - Bug fix: numbers ending with a trailing decimal point are now  
8458 - properly recognized as numbers.  
8459 - </para>  
8460 - </listitem>  
8461 - <listitem>  
8462 - <para>  
8463 - Bug fix: when building qpdf from source on some platforms  
8464 - (especially MacOS), the build could get confused by older  
8465 - versions of qpdf installed on the system. This has been  
8466 - corrected.  
8467 - </para>  
8468 - </listitem>  
8469 - </itemizedlist>  
8470 - </listitem>  
8471 - </varlistentry>  
8472 - <varlistentry>  
8473 - <term>7.0.0: September 15, 2017</term>  
8474 - <listitem>  
8475 - <itemizedlist>  
8476 - <listitem>  
8477 - <para>  
8478 - Packaging and Distribution Changes  
8479 - </para>  
8480 - <itemizedlist>  
8481 - <listitem>  
8482 - <para>  
8483 - QPDF's primary license is now <ulink  
8484 - url="http://www.apache.org/licenses/LICENSE-2.0">version 2.0  
8485 - of the Apache License</ulink> rather than version 2.0 of the  
8486 - Artistic License. You may still, at your option, consider  
8487 - qpdf to be licensed with version 2.0 of the Artistic  
8488 - license.  
8489 - </para>  
8490 - </listitem>  
8491 - <listitem>  
8492 - <para>  
8493 - QPDF no longer has a dependency on the PCRE (Perl-Compatible  
8494 - Regular Expression) library. QPDF now has an added  
8495 - dependency on the JPEG library.  
8496 - </para>  
8497 - </listitem>  
8498 - </itemizedlist>  
8499 - </listitem>  
8500 - </itemizedlist>  
8501 - <itemizedlist>  
8502 - <listitem>  
8503 - <para>  
8504 - Bug Fixes  
8505 - </para>  
8506 - <itemizedlist>  
8507 - <listitem>  
8508 - <para>  
8509 - This release contains many bug fixes for various infinite  
8510 - loops, memory leaks, and other memory errors that could be  
8511 - encountered with specially crafted or otherwise erroneous  
8512 - PDF files.  
8513 - </para>  
8514 - </listitem>  
8515 - </itemizedlist>  
8516 - </listitem>  
8517 - </itemizedlist>  
8518 - <itemizedlist>  
8519 - <listitem>  
8520 - <para>  
8521 - New Features  
8522 - </para>  
8523 - <itemizedlist>  
8524 - <listitem>  
8525 - <para>  
8526 - QPDF now supports reading and writing streams encoded with  
8527 - JPEG or RunLength encoding. Library API enhancements and  
8528 - command-line options have been added to control this  
8529 - behavior. See command-line options  
8530 - @1@option@1@--compress-streams@2@option@2@ and  
8531 - @1@option@1@--decode-level@2@option@2@ and methods  
8532 - <function>QPDFWriter::setCompressStreams</function> and  
8533 - <function>QPDFWriter::setDecodeLevel</function>.  
8534 - </para>  
8535 - </listitem>  
8536 - <listitem>  
8537 - <para>  
8538 - QPDF is much better at recovering from broken files. In most  
8539 - cases, qpdf will skip invalid objects and will preserve  
8540 - broken stream data by not attempting to filter broken  
8541 - streams. QPDF is now able to recover or at least not crash  
8542 - on dozens of broken test files I have received over the past  
8543 - few years.  
8544 - </para>  
8545 - </listitem>  
8546 - <listitem>  
8547 - <para>  
8548 - Page rotation is now supported and accessible from both the  
8549 - library and the command line.  
8550 - </para>  
8551 - </listitem>  
8552 - <listitem>  
8553 - <para>  
8554 - <classname>QPDFWriter</classname> supports writing files in  
8555 - a way that preserves PCLm compliance in support of  
8556 - driverless printing. This is very specialized and is only  
8557 - useful to applications that already know how to create PCLm  
8558 - files.  
8559 - </para>  
8560 - </listitem>  
8561 - </itemizedlist>  
8562 - </listitem>  
8563 - </itemizedlist>  
8564 - <itemizedlist>  
8565 - <listitem>  
8566 - <para>  
8567 - Enhancements to the @1@command@1@qpdf@2@command@2@ Command-line Tool.  
8568 - All new options listed here are documented in more detail in  
8569 - <xref linkend="ref.using"/>.  
8570 - </para>  
8571 - <itemizedlist>  
8572 - <listitem>  
8573 - <para>  
8574 - Command-line arguments can now be read from files or  
8575 - standard input using <literal>@file</literal> or  
8576 - <literal>@-</literal> syntax. Please see <xref  
8577 - linkend="ref.invocation"/>.  
8578 - </para>  
8579 - </listitem>  
8580 - <listitem>  
8581 - <para>  
8582 - @1@option@1@--rotate@2@option@2@: request page rotation  
8583 - </para>  
8584 - </listitem>  
8585 - <listitem>  
8586 - <para>  
8587 - @1@option@1@--newline-before-endstream@2@option@2@: ensure that a  
8588 - newline appears before every <literal>endstream</literal>  
8589 - keyword in the file; used to prevent qpdf from breaking  
8590 - PDF/A compliance on already compliant files.  
8591 - </para>  
8592 - </listitem>  
8593 - <listitem>  
8594 - <para>  
8595 - @1@option@1@--preserve-unreferenced@2@option@2@: preserve  
8596 - unreferenced objects in the input PDF  
8597 - </para>  
8598 - </listitem>  
8599 - <listitem>  
8600 - <para>  
8601 - @1@option@1@--split-pages@2@option@2@: break output into chunks  
8602 - with fixed numbers of pages  
8603 - </para>  
8604 - </listitem>  
8605 - <listitem>  
8606 - <para>  
8607 - @1@option@1@--verbose@2@option@2@: print the name of each output  
8608 - file that is created  
8609 - </para>  
8610 - </listitem>  
8611 - <listitem>  
8612 - <para>  
8613 - @1@option@1@--compress-streams@2@option@2@ and  
8614 - @1@option@1@--decode-level@2@option@2@ replace  
8615 - @1@option@1@--stream-data@2@option@2@ for improving granularity of  
8616 - controlling compression and decompression of stream data.  
8617 - The @1@option@1@--stream-data@2@option@2@ option will remain  
8618 - available.  
8619 - </para>  
8620 - </listitem>  
8621 - <listitem>  
8622 - <para>  
8623 - When running @1@command@1@qpdf --check@2@command@2@ with other  
8624 - options, checks are always run first. This enables qpdf to  
8625 - perform its full recovery logic before outputting other  
8626 - information. This can be especially useful when manually  
8627 - recovering broken files, looking at qpdf's regenerated cross  
8628 - reference table, or other similar operations.  
8629 - </para>  
8630 - </listitem>  
8631 - <listitem>  
8632 - <para>  
8633 - Process @1@command@1@--pages@2@command@2@ earlier so that other  
8634 - options like @1@option@1@--show-pages@2@option@2@ or  
8635 - @1@option@1@--split-pages@2@option@2@ can operate on the file after  
8636 - page splitting/merging has occurred.  
8637 - </para>  
8638 - </listitem>  
8639 - </itemizedlist>  
8640 - </listitem>  
8641 - </itemizedlist>  
8642 - <itemizedlist>  
8643 - <listitem>  
8644 - <para>  
8645 - API Changes. All new API calls are documented in their  
8646 - respective classes' header files.  
8647 - </para>  
8648 - <itemizedlist>  
8649 - <listitem>  
8650 - <para>  
8651 - <function>QPDFObjectHandle::rotatePage</function>: apply  
8652 - rotation to a page object  
8653 - </para>  
8654 - </listitem>  
8655 - <listitem>  
8656 - <para>  
8657 - <function>QPDFWriter::setNewlineBeforeEndstream</function>:  
8658 - force newline to appear before <literal>endstream</literal>  
8659 - </para>  
8660 - </listitem>  
8661 - <listitem>  
8662 - <para>  
8663 - <function>QPDFWriter::setPreserveUnreferencedObjects</function>:  
8664 - preserve unreferenced objects that appear in the input PDF.  
8665 - The default behavior is to discard them.  
8666 - </para>  
8667 - </listitem>  
8668 - <listitem>  
8669 - <para>  
8670 - New <classname>Pipeline</classname> types  
8671 - <classname>Pl_RunLength</classname> and  
8672 - <classname>Pl_DCT</classname> are available for developers  
8673 - who wish to produce or consume RunLength or DCT stream data  
8674 - directly. The @1@filename@1@examples/pdf-create.cc@2@filename@2@  
8675 - example illustrates their use.  
8676 - </para>  
8677 - </listitem>  
8678 - <listitem>  
8679 - <para>  
8680 - <function>QPDFWriter::setCompressStreams</function> and  
8681 - <function>QPDFWriter::setDecodeLevel</function> methods  
8682 - control handling of different types of stream compression.  
8683 - </para>  
8684 - </listitem>  
8685 - <listitem>  
8686 - <para>  
8687 - Add new C API functions  
8688 - <function>qpdf_set_compress_streams</function>,  
8689 - <function>qpdf_set_decode_level</function>,  
8690 - <function>qpdf_set_preserve_unreferenced_objects</function>,  
8691 - and <function>qpdf_set_newline_before_endstream</function>  
8692 - corresponding to the new <classname>QPDFWriter</classname>  
8693 - methods.  
8694 - </para>  
8695 - </listitem>  
8696 - </itemizedlist>  
8697 - </listitem>  
8698 - </itemizedlist>  
8699 - </listitem>  
8700 - </varlistentry>  
8701 - <varlistentry>  
8702 - <term>6.0.0: November 10, 2015</term>  
8703 - <listitem>  
8704 - <itemizedlist>  
8705 - <listitem>  
8706 - <para>  
8707 - Implement @1@option@1@--deterministic-id@2@option@2@ command-line  
8708 - option and <function>QPDFWriter::setDeterministicID</function>  
8709 - as well as C API function  
8710 - <function>qpdf_set_deterministic_ID</function> for generating  
8711 - a deterministic ID for non-encrypted files. When this option  
8712 - is selected, the ID of the file depends on the contents of the  
8713 - output file, and not on transient items such as the timestamp  
8714 - or output file name.  
8715 - </para>  
8716 - </listitem>  
8717 - <listitem>  
8718 - <para>  
8719 - Make qpdf more tolerant of files whose xref table entries are  
8720 - not the correct length.  
8721 - </para>  
8722 - </listitem>  
8723 - </itemizedlist>  
8724 - </listitem>  
8725 - </varlistentry>  
8726 - <varlistentry>  
8727 - <term>5.1.3: May 24, 2015</term>  
8728 - <listitem>  
8729 - <itemizedlist>  
8730 - <listitem>  
8731 - <para>  
8732 - Bug fix: fix-qdf was not properly handling files that  
8733 - contained object streams with more than 255 objects in them.  
8734 - </para>  
8735 - </listitem>  
8736 - <listitem>  
8737 - <para>  
8738 - Bug fix: qpdf was not properly initializing Microsoft's secure  
8739 - crypto provider on fresh Windows installations that had not  
8740 - had any keys created yet.  
8741 - </para>  
8742 - </listitem>  
8743 - <listitem>  
8744 - <para>  
8745 - Fix a few errors found by Gynvael Coldwind and  
8746 - Mateusz Jurczyk of the Google Security Team. Please see the  
8747 - ChangeLog for details.  
8748 - </para>  
8749 - </listitem>  
8750 - <listitem>  
8751 - <para>  
8752 - Properly handle pages that have no contents at all. There were  
8753 - many cases in which qpdf handled this fine, but a few methods  
8754 - blindly obtained page contents with handling the possibility  
8755 - that there were no contents.  
8756 - </para>  
8757 - </listitem>  
8758 - <listitem>  
8759 - <para>  
8760 - Make qpdf more robust for a few more kinds of problems that  
8761 - may occur in invalid PDF files.  
8762 - </para>  
8763 - </listitem>  
8764 - </itemizedlist>  
8765 - </listitem>  
8766 - </varlistentry>  
8767 - <varlistentry>  
8768 - <term>5.1.2: June 7, 2014</term>  
8769 - <listitem>  
8770 - <itemizedlist>  
8771 - <listitem>  
8772 - <para>  
8773 - Bug fix: linearizing files could create a corrupted output  
8774 - file under extremely unlikely file size circumstances. See  
8775 - ChangeLog for details. The odds of getting hit by this are  
8776 - very low, though one person did.  
8777 - </para>  
8778 - </listitem>  
8779 - <listitem>  
8780 - <para>  
8781 - Bug fix: qpdf would fail to write files that had streams with  
8782 - decode parameters referencing other streams.  
8783 - </para>  
8784 - </listitem>  
8785 - <listitem>  
8786 - <para>  
8787 - New example program: @1@command@1@pdf-split-pages@2@command@2@:  
8788 - efficiently split PDF files into individual pages. The example  
8789 - program does this more efficiently than using @1@command@1@qpdf  
8790 - --pages@2@command@2@ to do it.  
8791 - </para>  
8792 - </listitem>  
8793 - <listitem>  
8794 - <para>  
8795 - Packaging fix: Visual C++ binaries did not support Windows XP.  
8796 - This has been rectified by updating the compilers used to  
8797 - generate the release binaries.  
8798 - </para>  
8799 - </listitem>  
8800 - </itemizedlist>  
8801 - </listitem>  
8802 - </varlistentry>  
8803 - <varlistentry>  
8804 - <term>5.1.1: January 14, 2014</term>  
8805 - <listitem>  
8806 - <itemizedlist>  
8807 - <listitem>  
8808 - <para>  
8809 - Performance fix: copying foreign objects could be very slow  
8810 - with certain types of files. This was most likely to be  
8811 - visible during page splitting and was due to traversing the  
8812 - same objects multiple times in some cases.  
8813 - </para>  
8814 - </listitem>  
8815 - </itemizedlist>  
8816 - </listitem>  
8817 - </varlistentry>  
8818 - <varlistentry>  
8819 - <term>5.1.0: December 17, 2013</term>  
8820 - <listitem>  
8821 - <itemizedlist>  
8822 - <listitem>  
8823 - <para>  
8824 - Added runtime option  
8825 - (<function>QUtil::setRandomDataProvider</function>) to supply  
8826 - your own random data provider. You can use this if you want  
8827 - to avoid using the OS-provided secure random number generation  
8828 - facility or stdlib's less secure version. See comments in  
8829 - include/qpdf/QUtil.hh for details.  
8830 - </para>  
8831 - </listitem>  
8832 - <listitem>  
8833 - <para>  
8834 - Fixed image comparison tests to not create 12-bit-per-pixel  
8835 - images since some versions of tiffcmp have bugs in comparing  
8836 - them in some cases. This increases the disk space required by  
8837 - the image comparison tests, which are off by default anyway.  
8838 - </para>  
8839 - </listitem>  
8840 - <listitem>  
8841 - <para>  
8842 - Introduce a number of small fixes for compilation on the  
8843 - latest clang in MacOS and the latest Visual C++ in Windows.  
8844 - </para>  
8845 - </listitem>  
8846 - <listitem>  
8847 - <para>  
8848 - Be able to handle broken files that end the xref table header  
8849 - with a space instead of a newline.  
8850 - </para>  
8851 - </listitem>  
8852 - </itemizedlist>  
8853 - </listitem>  
8854 - </varlistentry>  
8855 - <varlistentry>  
8856 - <term>5.0.1: October 18, 2013</term>  
8857 - <listitem>  
8858 - <itemizedlist>  
8859 - <listitem>  
8860 - <para>  
8861 - Thanks to a detailed review by Florian Weimer and the Red Hat  
8862 - Product Security Team, this release includes a number of  
8863 - non-user-visible security hardening changes. Please see the  
8864 - ChangeLog file in the source distribution for the complete  
8865 - list.  
8866 - </para>  
8867 - </listitem>  
8868 - <listitem>  
8869 - <para>  
8870 - When available, operating system-specific secure random number  
8871 - generation is used for generating initialization vectors and  
8872 - other random values used during encryption or file creation.  
8873 - For the Windows build, this results in an added dependency on  
8874 - Microsoft's cryptography API. To disable the OS-specific  
8875 - cryptography and use the old version, pass the  
8876 - @1@option@1@--enable-insecure-random@2@option@2@ option to  
8877 - @1@command@1@./configure@2@command@2@.  
8878 - </para>  
8879 - </listitem>  
8880 - <listitem>  
8881 - <para>  
8882 - The @1@command@1@qpdf@2@command@2@ command-line tool now issues a  
8883 - warning when @1@option@1@-accessibility=n@2@option@2@ is specified  
8884 - for newer encryption versions stating that the option is  
8885 - ignored. qpdf, per the spec, has always ignored this flag,  
8886 - but it previously did so silently. This warning is issued  
8887 - only by the command-line tool, not by the library. The  
8888 - library's handling of this flag is unchanged.  
8889 - </para>  
8890 - </listitem>  
8891 - </itemizedlist>  
8892 - </listitem>  
8893 - </varlistentry>  
8894 - <varlistentry>  
8895 - <term>5.0.0: July 10, 2013</term>  
8896 - <listitem>  
8897 - <itemizedlist>  
8898 - <listitem>  
8899 - <para>  
8900 - Bug fix: previous versions of qpdf would lose objects with  
8901 - generation != 0 when generating object streams. Fixing this  
8902 - required changes to the public API.  
8903 - </para>  
8904 - </listitem>  
8905 - <listitem>  
8906 - <para>  
8907 - Removed methods from public API that were only supposed to be  
8908 - called by QPDFWriter and couldn't realistically be called  
8909 - anywhere else. See ChangeLog for details.  
8910 - </para>  
8911 - </listitem>  
8912 - <listitem>  
8913 - <para>  
8914 - New <type>QPDFObjGen</type> class added to represent an object  
8915 - ID/generation pair.  
8916 - <function>QPDFObjectHandle::getObjGen()</function> is now  
8917 - preferred over  
8918 - <function>QPDFObjectHandle::getObjectID()</function> and  
8919 - <function>QPDFObjectHandle::getGeneration()</function> as it  
8920 - makes it less likely for people to accidentally write code  
8921 - that ignores the generation number. See  
8922 - @1@filename@1@QPDF.hh@2@filename@2@ and  
8923 - @1@filename@1@QPDFObjectHandle.hh@2@filename@2@ for additional notes.  
8924 - </para>  
8925 - </listitem>  
8926 - <listitem>  
8927 - <para>  
8928 - Add @1@option@1@--show-npages@2@option@2@ command-line option to the  
8929 - @1@command@1@qpdf@2@command@2@ command to show the number of pages in  
8930 - a file.  
8931 - </para>  
8932 - </listitem>  
8933 - <listitem>  
8934 - <para>  
8935 - Allow omission of the page range within  
8936 - @1@option@1@--pages@2@option@2@ for the @1@command@1@qpdf@2@command@2@  
8937 - command. When omitted, the page range is implicitly taken to  
8938 - be all the pages in the file.  
8939 - </para>  
8940 - </listitem>  
8941 - <listitem>  
8942 - <para>  
8943 - Various enhancements were made to support different types of  
8944 - broken files or broken readers. Details can be found in  
8945 - @1@filename@1@ChangeLog@2@filename@2@.  
8946 - </para>  
8947 - </listitem>  
8948 - </itemizedlist>  
8949 - </listitem>  
8950 - </varlistentry>  
8951 - <varlistentry>  
8952 - <term>4.1.0: April 14, 2013</term>  
8953 - <listitem>  
8954 - <itemizedlist>  
8955 - <listitem>  
8956 - <para>  
8957 - Note to people including qpdf in distributions: the  
8958 - @1@filename@1@.la@2@filename@2@ files generated by libtool are now  
8959 - installed by qpdf's @1@command@1@make install@2@command@2@ target.  
8960 - Before, they were not installed. This means that if your  
8961 - distribution does not want to include @1@filename@1@.la@2@filename@2@  
8962 - files, you must remove them as part of your packaging process.  
8963 - </para>  
8964 - </listitem>  
8965 - <listitem>  
8966 - <para>  
8967 - Major enhancement: API enhancements have been made to support  
8968 - parsing of content streams. This enhancement includes the  
8969 - following changes:  
8970 - <itemizedlist>  
8971 - <listitem>  
8972 - <para>  
8973 - <function>QPDFObjectHandle::parseContentStream</function>  
8974 - method parses objects in a content stream and calls  
8975 - handlers in a callback class. The example  
8976 - @1@filename@1@examples/pdf-parse-content.cc@2@filename@2@  
8977 - illustrates how this may be used.  
8978 - </para>  
8979 - </listitem>  
8980 - <listitem>  
8981 - <para>  
8982 - <type>QPDFObjectHandle</type> can now represent operators  
8983 - and inline images, object types that may only appear in  
8984 - content streams.  
8985 - </para>  
8986 - </listitem>  
8987 - <listitem>  
8988 - <para>  
8989 - Method <function>QPDFObjectHandle::getTypeCode()</function>  
8990 - returns an enumerated type value representing the  
8991 - underlying object type. Method  
8992 - <function>QPDFObjectHandle::getTypeName()</function>  
8993 - returns a text string describing the name of the type of a  
8994 - <type>QPDFObjectHandle</type> object. These methods can be  
8995 - used for more efficient parsing and debugging/diagnostic  
8996 - messages.  
8997 - </para>  
8998 - </listitem>  
8999 - </itemizedlist>  
9000 - </para>  
9001 - </listitem>  
9002 - <listitem>  
9003 - <para>  
9004 - @1@command@1@qpdf --check@2@command@2@ now parses all pages' content  
9005 - streams in addition to doing other checks. While there are  
9006 - still many types of errors that cannot be detected, syntactic  
9007 - errors in content streams will now be reported.  
9008 - </para>  
9009 - </listitem>  
9010 - <listitem>  
9011 - <para>  
9012 - Minor compilation enhancements have been made to facilitate  
9013 - easier for support for a broader range of compilers and  
9014 - compiler versions.  
9015 - <itemizedlist>  
9016 - <listitem>  
9017 - <para>  
9018 - Warning flags have been moved into a separate variable in  
9019 - @1@filename@1@autoconf.mk@2@filename@2@  
9020 - </para>  
9021 - </listitem>  
9022 - <listitem>  
9023 - <para>  
9024 - The configure flag @1@option@1@--enable-werror@2@option@2@ work  
9025 - for Microsoft compilers  
9026 - </para>  
9027 - </listitem>  
9028 - <listitem>  
9029 - <para>  
9030 - All MSVC CRT security warnings have been resolved.  
9031 - </para>  
9032 - </listitem>  
9033 - <listitem>  
9034 - <para>  
9035 - All C-style casts in C++ Code have been replaced by C++  
9036 - casts, and many casts that had been included to suppress  
9037 - higher warning levels for some compilers have been removed,  
9038 - primarily for clarity. Places where integer type coercion  
9039 - occurs have been scrutinized. A new casting policy has  
9040 - been documented in the manual. This is of concern mainly  
9041 - to people porting qpdf to new platforms or compilers. It  
9042 - is not visible to programmers writing code that uses the  
9043 - library  
9044 - </para>  
9045 - </listitem>  
9046 - <listitem>  
9047 - <para>  
9048 - Some internal limits have been removed in code that  
9049 - converts numbers to strings. This is largely invisible to  
9050 - users, but it does trigger a bug in some older versions of  
9051 - mingw-w64's C++ library. See  
9052 - @1@filename@1@README-windows.md@2@filename@2@ in the source  
9053 - distribution if you think this may affect you. The copy of  
9054 - the DLL distributed with qpdf's binary distribution is not  
9055 - affected by this problem.  
9056 - </para>  
9057 - </listitem>  
9058 - </itemizedlist>  
9059 - </para>  
9060 - </listitem>  
9061 - <listitem>  
9062 - <para>  
9063 - The RPM spec file previously included with qpdf has been  
9064 - removed. This is because virtually all Linux distributions  
9065 - include qpdf now that it is a dependency of CUPS filters.  
9066 - </para>  
9067 - </listitem>  
9068 - <listitem>  
9069 - <para>  
9070 - A few bug fixes are included:  
9071 - <itemizedlist>  
9072 - <listitem>  
9073 - <para>  
9074 - Overridden compressed objects are properly handled.  
9075 - Before, there were certain constructs that could cause qpdf  
9076 - to see old versions of some objects. The most usual  
9077 - manifestation of this was loss of filled in form values for  
9078 - certain files.  
9079 - </para>  
9080 - </listitem>  
9081 - <listitem>  
9082 - <para>  
9083 - Installation no longer uses GNU/Linux-specific versions of  
9084 - some commands, so @1@command@1@make install@2@command@2@ works on  
9085 - Solaris with native tools.  
9086 - </para>  
9087 - </listitem>  
9088 - <listitem>  
9089 - <para>  
9090 - The 64-bit mingw Windows binary package no longer includes  
9091 - a 32-bit DLL.  
9092 - </para>  
9093 - </listitem>  
9094 - </itemizedlist>  
9095 - </para>  
9096 - </listitem>  
9097 - </itemizedlist>  
9098 - </listitem>  
9099 - </varlistentry>  
9100 - <varlistentry>  
9101 - <term>4.0.1: January 17, 2013</term>  
9102 - <listitem>  
9103 - <itemizedlist>  
9104 - <listitem>  
9105 - <para>  
9106 - Fix detection of binary attachments in test suite to avoid  
9107 - false test failures on some platforms.  
9108 - </para>  
9109 - </listitem>  
9110 - <listitem>  
9111 - <para>  
9112 - Add clarifying comment in @1@filename@1@QPDF.hh@2@filename@2@ to  
9113 - methods that return the user password explaining that it is no  
9114 - longer possible with newer encryption formats to recover the  
9115 - user password knowing the owner password. In earlier  
9116 - encryption formats, the user password was encrypted in the  
9117 - file using the owner password. In newer encryption formats, a  
9118 - separate encryption key is used on the file, and that key is  
9119 - independently encrypted using both the user password and the  
9120 - owner password.  
9121 - </para>  
9122 - </listitem>  
9123 - </itemizedlist>  
9124 - </listitem>  
9125 - </varlistentry>  
9126 - <varlistentry>  
9127 - <term>4.0.0: December 31, 2012</term>  
9128 - <listitem>  
9129 - <itemizedlist>  
9130 - <listitem>  
9131 - <para>  
9132 - Major enhancement: support has been added for newer encryption  
9133 - schemes supported by version X of Adobe Acrobat. This  
9134 - includes use of 127-character passwords, 256-bit encryption  
9135 - keys, and the encryption scheme specified in ISO 32000-2, the  
9136 - PDF 2.0 specification. This scheme can be chosen from the  
9137 - command line by specifying use of 256-bit keys. qpdf also  
9138 - supports the deprecated encryption method used by Acrobat IX.  
9139 - This encryption style has known security weaknesses and should  
9140 - not be used in practice. However, such files exist "in  
9141 - the wild," so support for this scheme is still useful.  
9142 - New methods  
9143 - <function>QPDFWriter::setR6EncryptionParameters</function>  
9144 - (for the PDF 2.0 scheme) and  
9145 - <function>QPDFWriter::setR5EncryptionParameters</function>  
9146 - (for the deprecated scheme) have been added to enable these  
9147 - new encryption schemes. Corresponding functions have been  
9148 - added to the C API as well.  
9149 - </para>  
9150 - </listitem>  
9151 - <listitem>  
9152 - <para>  
9153 - Full support for Adobe extension levels in PDF version  
9154 - information. Starting with PDF version 1.7, corresponding to  
9155 - ISO 32000, Adobe adds new functionality by increasing the  
9156 - extension level rather than increasing the version. This  
9157 - support includes addition of the  
9158 - <function>QPDF::getExtensionLevel</function> method for  
9159 - retrieving the document's extension level, addition of  
9160 - versions of  
9161 - <function>QPDFWriter::setMinimumPDFVersion</function> and  
9162 - <function>QPDFWriter::forcePDFVersion</function> that accept  
9163 - an extension level, and extended syntax for specifying forced  
9164 - and minimum versions on the command line as described in <xref  
9165 - linkend="ref.advanced-transformation"/>. Corresponding  
9166 - functions have been added to the C API as well.  
9167 - </para>  
9168 - </listitem>  
9169 - <listitem>  
9170 - <para>  
9171 - Minor fixes to prevent qpdf from referencing objects in the  
9172 - file that are not referenced in the file's overall structure.  
9173 - Most files don't have any such objects, but some files have  
9174 - contain unreferenced objects with errors, so these fixes  
9175 - prevent qpdf from needlessly rejecting or complaining about  
9176 - such objects.  
9177 - </para>  
9178 - </listitem>  
9179 - <listitem>  
9180 - <para>  
9181 - Add new generalized methods for reading and writing files  
9182 - from/to programmer-defined sources. The method  
9183 - <function>QPDF::processInputSource</function> allows the  
9184 - programmer to use any input source for the input file, and  
9185 - <function>QPDFWriter::setOutputPipeline</function> allows the  
9186 - programmer to write the output file through any pipeline.  
9187 - These methods would make it possible to perform any number of  
9188 - specialized operations, such as accessing external storage  
9189 - systems, creating bindings for qpdf in other programming  
9190 - languages that have their own I/O systems, etc.  
9191 - </para>  
9192 - </listitem>  
9193 - <listitem>  
9194 - <para>  
9195 - Add new method <function>QPDF::getEncryptionKey</function> for  
9196 - retrieving the underlying encryption key used in the file.  
9197 - </para>  
9198 - </listitem>  
9199 - <listitem>  
9200 - <para>  
9201 - This release includes a small handful of non-compatible API  
9202 - changes. While effort is made to avoid such changes, all the  
9203 - non-compatible API changes in this version were to parts of  
9204 - the API that would likely never be used outside the library  
9205 - itself. In all cases, the altered methods or structures were  
9206 - parts of the <classname>QPDF</classname> that were public to  
9207 - enable them to be called from either  
9208 - <classname>QPDFWriter</classname> or were part of validation  
9209 - code that was over-zealous in reporting problems in parts of  
9210 - the file that would not ordinarily be referenced. In no case  
9211 - did any of the removed methods do anything worse that falsely  
9212 - report error conditions in files that were broken in ways that  
9213 - didn't matter. The following public parts of the  
9214 - <classname>QPDF</classname> class were changed in a  
9215 - non-compatible way:  
9216 - <itemizedlist>  
9217 - <listitem>  
9218 - <para>  
9219 - Updated nested <classname>QPDF::EncryptionData</classname>  
9220 - class to add fields needed by the newer encryption formats,  
9221 - member variables changed to private so that future changes  
9222 - will not require breaking backward compatibility.  
9223 - </para>  
9224 - </listitem>  
9225 - <listitem>  
9226 - <para>  
9227 - Added additional parameters to  
9228 - <function>compute_data_key</function>, which is used by  
9229 - <classname>QPDFWriter</classname> to compute the encryption  
9230 - key used to encrypt a specific object.  
9231 - </para>  
9232 - </listitem>  
9233 - <listitem>  
9234 - <para>  
9235 - Removed the method  
9236 - <function>flattenScalarReferences</function>. This method  
9237 - was previously used prior to writing a new PDF file, but it  
9238 - has the undesired side effect of causing qpdf to read  
9239 - objects in the file that were not referenced. Some  
9240 - otherwise files have unreferenced objects with errors in  
9241 - them, so this could cause qpdf to reject files that would  
9242 - be accepted by virtually all other PDF readers. In fact,  
9243 - qpdf relied on only a very small part of what  
9244 - flattenScalarReferences did, so only this part has been  
9245 - preserved, and it is now done directly inside  
9246 - <classname>QPDFWriter</classname>.  
9247 - </para>  
9248 - </listitem>  
9249 - <listitem>  
9250 - <para>  
9251 - Removed the method <function>decodeStreams</function>.  
9252 - This method was used by the @1@option@1@--check@2@option@2@ option  
9253 - of the @1@command@1@qpdf@2@command@2@ command-line tool to force  
9254 - all streams in the file to be decoded, but it also suffered  
9255 - from the problem of opening otherwise unreferenced streams  
9256 - and thus could report false positive. The  
9257 - @1@option@1@--check@2@option@2@ option now causes qpdf to go  
9258 - through all the motions of writing a new file based on the  
9259 - original one, so it will always reference and check exactly  
9260 - those parts of a file that any ordinary viewer would check.  
9261 - </para>  
9262 - </listitem>  
9263 - <listitem>  
9264 - <para>  
9265 - Removed the method  
9266 - <function>trimTrailerForWrite</function>. This method was  
9267 - used by <classname>QPDFWriter</classname> to modify the  
9268 - original QPDF object by removing fields from the trailer  
9269 - dictionary that wouldn't apply to the newly written file.  
9270 - This functionality, though generally harmless, was a poor  
9271 - implementation and has been replaced by having QPDFWriter  
9272 - filter these out when copying the trailer rather than  
9273 - modifying the original QPDF object. (Note that qpdf never  
9274 - modifies the original file itself.)  
9275 - </para>  
9276 - </listitem>  
9277 - </itemizedlist>  
9278 - </para>  
9279 - </listitem>  
9280 - <listitem>  
9281 - <para>  
9282 - Allow the PDF header to appear anywhere in the first 1024  
9283 - bytes of the file. This is consistent with what other readers  
9284 - do.  
9285 - </para>  
9286 - </listitem>  
9287 - <listitem>  
9288 - <para>  
9289 - Fix the @1@command@1@pkg-config@2@command@2@ files to list zlib and  
9290 - pcre in <function>Requires.private</function> to better  
9291 - support static linking using @1@command@1@pkg-config@2@command@2@.  
9292 - </para>  
9293 - </listitem>  
9294 - </itemizedlist>  
9295 - </listitem>  
9296 - </varlistentry>  
9297 - <varlistentry>  
9298 - <term>3.0.2: September 6, 2012</term>  
9299 - <listitem>  
9300 - <itemizedlist>  
9301 - <listitem>  
9302 - <para>  
9303 - Bug fix: <function>QPDFWriter::setOutputMemory</function> did  
9304 - not work when not used with  
9305 - <function>QPDFWriter::setStaticID</function>, which made it  
9306 - pretty much useless. This has been fixed.  
9307 - </para>  
9308 - </listitem>  
9309 - <listitem>  
9310 - <para>  
9311 - New API call  
9312 - <function>QPDFWriter::setExtraHeaderText</function> inserts  
9313 - additional text near the header of the PDF file. The intended  
9314 - use case is to insert comments that may be consumed by a  
9315 - downstream application, though other use cases may exist.  
9316 - </para>  
9317 - </listitem>  
9318 - </itemizedlist>  
9319 - </listitem>  
9320 - </varlistentry>  
9321 - <varlistentry>  
9322 - <term>3.0.1: August 11, 2012</term>  
9323 - <listitem>  
9324 - <itemizedlist>  
9325 - <listitem>  
9326 - <para>  
9327 - Version 3.0.0 included addition of files for  
9328 - @1@command@1@pkg-config@2@command@2@, but this was not mentioned in  
9329 - the release notes. The release notes for 3.0.0 were updated  
9330 - to mention this.  
9331 - </para>  
9332 - </listitem>  
9333 - <listitem>  
9334 - <para>  
9335 - Bug fix: if an object stream ended with a scalar object not  
9336 - followed by space, qpdf would incorrectly report that it  
9337 - encountered a premature EOF. This bug has been in qpdf since  
9338 - versionย 2.0.  
9339 - </para>  
9340 - </listitem>  
9341 - </itemizedlist>  
9342 - </listitem>  
9343 - </varlistentry>  
9344 - <varlistentry>  
9345 - <term>3.0.0: August 2, 2012</term>  
9346 - <listitem>  
9347 - <itemizedlist>  
9348 - <listitem>  
9349 - <para>  
9350 - Acknowledgment: I would like to express gratitude for the  
9351 - contributions of Tobias Hoffmann toward the release of qpdf  
9352 - version 3.0. He is responsible for most of the implementation  
9353 - and design of the new API for manipulating pages, and  
9354 - contributed code and ideas for many of the improvements made  
9355 - in version 3.0. Without his work, this release would  
9356 - certainly not have happened as soon as it did, if at all.  
9357 - </para>  
9358 - </listitem>  
9359 - <listitem>  
9360 - <para>  
9361 - <emphasis>Non-compatible API change:</emphasis> The version of  
9362 - <function>QPDFObjectHandle::replaceStreamData</function> that  
9363 - uses a <classname>StreamDataProvider</classname> no longer  
9364 - requires (or accepts) a <varname>length</varname> parameter.  
9365 - See <xref linkend="ref.upgrading-to-3.0"/> for an explanation.  
9366 - While care is taken to avoid non-compatible API changes in  
9367 - general, an exception was made this time because the new  
9368 - interface offers an opportunity to significantly simplify  
9369 - calling code.  
9370 - </para>  
9371 - </listitem>  
9372 - <listitem>  
9373 - <para>  
9374 - Support has been added for large files. The test suite  
9375 - verifies support for files larger than 4 gigabytes, and manual  
9376 - testing has verified support for files larger than 10  
9377 - gigabytes. Large file support is available for both 32-bit  
9378 - and 64-bit platforms as long as the compiler and underlying  
9379 - platforms support it.  
9380 - </para>  
9381 - </listitem>  
9382 - <listitem>  
9383 - <para>  
9384 - Support for page selection (splitting and merging PDF files)  
9385 - has been added to the @1@command@1@qpdf@2@command@2@ command-line  
9386 - tool. See <xref linkend="ref.page-selection"/>.  
9387 - </para>  
9388 - </listitem>  
9389 - <listitem>  
9390 - <para>  
9391 - Options have been added to the @1@command@1@qpdf@2@command@2@  
9392 - command-line tool for copying encryption parameters from  
9393 - another file. See <xref linkend="ref.basic-options"/>.  
9394 - </para>  
9395 - </listitem>  
9396 - <listitem>  
9397 - <para>  
9398 - New methods have been added to the <classname>QPDF</classname>  
9399 - object for adding and removing pages. See <xref  
9400 - linkend="ref.adding-and-remove-pages"/>.  
9401 - </para>  
9402 - </listitem>  
9403 - <listitem>  
9404 - <para>  
9405 - New methods have been added to the <classname>QPDF</classname>  
9406 - object for copying objects from other PDF files. See <xref  
9407 - linkend="ref.foreign-objects"/>  
9408 - </para>  
9409 - </listitem>  
9410 - <listitem>  
9411 - <para>  
9412 - A new method <function>QPDFObjectHandle::parse</function> has  
9413 - been added for constructing  
9414 - <classname>QPDFObjectHandle</classname> objects from a string  
9415 - description.  
9416 - </para>  
9417 - </listitem>  
9418 - <listitem>  
9419 - <para>  
9420 - Methods have been added to <classname>QPDFWriter</classname>  
9421 - to allow writing to an already open stdio <type>FILE*</type>  
9422 - addition to writing to standard output or a named file.  
9423 - Methods have been added to <classname>QPDF</classname> to be  
9424 - able to process a file from an already open stdio  
9425 - <type>FILE*</type>. This makes it possible to read and write  
9426 - PDF from secure temporary files that have been unlinked prior  
9427 - to being fully read or written.  
9428 - </para>  
9429 - </listitem>  
9430 - <listitem>  
9431 - <para>  
9432 - The <function>QPDF::emptyPDF</function> can be used to allow  
9433 - creation of PDF files from scratch. The example  
9434 - @1@filename@1@examples/pdf-create.cc@2@filename@2@ illustrates how it  
9435 - can be used.  
9436 - </para>  
9437 - </listitem>  
9438 - <listitem>  
9439 - <para>  
9440 - Several methods to take  
9441 - <classname>PointerHolder&lt;Buffer&gt;</classname> can now  
9442 - also accept <type>std::string</type> arguments.  
9443 - </para>  
9444 - </listitem>  
9445 - <listitem>  
9446 - <para>  
9447 - Many new convenience methods have been added to the library,  
9448 - most in <classname>QPDFObjectHandle</classname>. See  
9449 - @1@filename@1@ChangeLog@2@filename@2@ for a full list.  
9450 - </para>  
9451 - </listitem>  
9452 - <listitem>  
9453 - <para>  
9454 - When building on a platform that supports ELF shared libraries  
9455 - (such as Linux), symbol versions are enabled by default. They  
9456 - can be disabled by passing  
9457 - @1@option@1@--disable-ld-version-script@2@option@2@ to  
9458 - @1@command@1@./configure@2@command@2@.  
9459 - </para>  
9460 - </listitem>  
9461 - <listitem>  
9462 - <para>  
9463 - The file @1@filename@1@libqpdf.pc@2@filename@2@ is now installed to  
9464 - support @1@command@1@pkg-config@2@command@2@.  
9465 - </para>  
9466 - </listitem>  
9467 - <listitem>  
9468 - <para>  
9469 - Image comparison tests are off by default now since they are  
9470 - not needed to verify a correct build or port of qpdf. They  
9471 - are needed only when changing the actual PDF output generated  
9472 - by qpdf. You should enable them if you are making deep  
9473 - changes to qpdf itself. See @1@filename@1@README.md@2@filename@2@ for  
9474 - details.  
9475 - </para>  
9476 - </listitem>  
9477 - <listitem>  
9478 - <para>  
9479 - Large file tests are off by default but can be turned on with  
9480 - @1@command@1@./configure@2@command@2@ or by setting an environment  
9481 - variable before running the test suite. See  
9482 - @1@filename@1@README.md@2@filename@2@ for details.  
9483 - </para>  
9484 - </listitem>  
9485 - <listitem>  
9486 - <para>  
9487 - When qpdf's test suite fails, failures are not printed to the  
9488 - terminal anymore by default. Instead, find them in  
9489 - @1@filename@1@build/qtest.log@2@filename@2@. For packagers who are  
9490 - building with an autobuilder, you can add the  
9491 - @1@option@1@--enable-show-failed-test-output@2@option@2@ option to  
9492 - @1@command@1@./configure@2@command@2@ to restore the old behavior.  
9493 - </para>  
9494 - </listitem>  
9495 - </itemizedlist>  
9496 - </listitem>  
9497 - </varlistentry>  
9498 - <varlistentry>  
9499 - <term>2.3.1: December 28, 2011</term>  
9500 - <listitem>  
9501 - <itemizedlist>  
9502 - <listitem>  
9503 - <para>  
9504 - Fix thread-safety problem resulting from non-thread-safe use  
9505 - of the PCRE library.  
9506 - </para>  
9507 - </listitem>  
9508 - <listitem>  
9509 - <para>  
9510 - Made a few minor documentation fixes.  
9511 - </para>  
9512 - </listitem>  
9513 - <listitem>  
9514 - <para>  
9515 - Add workaround for a bug that appears in some versions of  
9516 - ghostscript to the test suite  
9517 - </para>  
9518 - </listitem>  
9519 - <listitem>  
9520 - <para>  
9521 - Fix minor build issue for Visual C++ 2010.  
9522 - </para>  
9523 - </listitem>  
9524 - </itemizedlist>  
9525 - </listitem>  
9526 - </varlistentry>  
9527 - <varlistentry>  
9528 - <term>2.3.0: August 11, 2011</term>  
9529 - <listitem>  
9530 - <itemizedlist>  
9531 - <listitem>  
9532 - <para>  
9533 - Bug fix: when preserving existing encryption on encrypted  
9534 - files with cleartext metadata, older qpdf versions would  
9535 - generate password-protected files with no valid password.  
9536 - This operation now works. This bug only affected files  
9537 - created by copying existing encryption parameters; explicit  
9538 - encryption with specification of cleartext metadata worked  
9539 - before and continues to work.  
9540 - </para>  
9541 - </listitem>  
9542 - <listitem>  
9543 - <para>  
9544 - Enhance <classname>QPDFWriter</classname> with a new  
9545 - constructor that allows you to delay the specification of the  
9546 - output file. When using this constructor, you may now call  
9547 - <function>QPDFWriter::setOutputFilename</function> to specify  
9548 - the output file, or you may use  
9549 - <function>QPDFWriter::setOutputMemory</function> to cause  
9550 - <classname>QPDFWriter</classname> to write the resulting PDF  
9551 - file to a memory buffer. You may then use  
9552 - <function>QPDFWriter::getBuffer</function> to retrieve the  
9553 - memory buffer.  
9554 - </para>  
9555 - </listitem>  
9556 - <listitem>  
9557 - <para>  
9558 - Add new API call <function>QPDF::replaceObject</function> for  
9559 - replacing objects by object ID  
9560 - </para>  
9561 - </listitem>  
9562 - <listitem>  
9563 - <para>  
9564 - Add new API call <function>QPDF::swapObjects</function> for  
9565 - swapping two objects by object ID  
9566 - </para>  
9567 - </listitem>  
9568 - <listitem>  
9569 - <para>  
9570 - Add <function>QPDFObjectHandle::getDictAsMap</function> and  
9571 - <function>QPDFObjectHandle::getArrayAsVector</function> to  
9572 - allow retrieval of dictionary objects as maps and array  
9573 - objects as vectors.  
9574 - </para>  
9575 - </listitem>  
9576 - <listitem>  
9577 - <para>  
9578 - Add functions <function>qpdf_get_info_key</function> and  
9579 - <function>qpdf_set_info_key</function> to the C API for  
9580 - manipulating string fields of the document's  
9581 - <literal>/Info</literal> dictionary.  
9582 - </para>  
9583 - </listitem>  
9584 - <listitem>  
9585 - <para>  
9586 - Add functions <function>qpdf_init_write_memory</function>,  
9587 - <function>qpdf_get_buffer_length</function>, and  
9588 - <function>qpdf_get_buffer</function> to the C API for writing  
9589 - PDF files to a memory buffer instead of a file.  
9590 - </para>  
9591 - </listitem>  
9592 - </itemizedlist>  
9593 - </listitem>  
9594 - </varlistentry>  
9595 - <varlistentry>  
9596 - <term>2.2.4: June 25, 2011</term>  
9597 - <listitem>  
9598 - <itemizedlist>  
9599 - <listitem>  
9600 - <para>  
9601 - Fix installation and compilation issues; no functionality  
9602 - changes.  
9603 - </para>  
9604 - </listitem>  
9605 - </itemizedlist>  
9606 - </listitem>  
9607 - </varlistentry>  
9608 - <varlistentry>  
9609 - <term>2.2.3: April 30, 2011</term>  
9610 - <listitem>  
9611 - <itemizedlist>  
9612 - <listitem>  
9613 - <para>  
9614 - Handle some damaged streams with incorrect characters  
9615 - following the stream keyword.  
9616 - </para>  
9617 - </listitem>  
9618 - <listitem>  
9619 - <para>  
9620 - Improve handling of inline images when normalizing content  
9621 - streams.  
9622 - </para>  
9623 - </listitem>  
9624 - <listitem>  
9625 - <para>  
9626 - Enhance error recovery to properly handle files that use  
9627 - object 0 as a regular object, which is specifically disallowed  
9628 - by the spec.  
9629 - </para>  
9630 - </listitem>  
9631 - </itemizedlist>  
9632 - </listitem>  
9633 - </varlistentry>  
9634 - <varlistentry>  
9635 - <term>2.2.2: October 4, 2010</term>  
9636 - <listitem>  
9637 - <itemizedlist>  
9638 - <listitem>  
9639 - <para>  
9640 - Add new function <function>qpdf_read_memory</function>  
9641 - to the C API to call  
9642 - <function>QPDF::processMemoryFile</function>. This was an  
9643 - omission in qpdf 2.2.1.  
9644 - </para>  
9645 - </listitem>  
9646 - </itemizedlist>  
9647 - </listitem>  
9648 - </varlistentry>  
9649 - <varlistentry>  
9650 - <term>2.2.1: October 1, 2010</term>  
9651 - <listitem>  
9652 - <itemizedlist>  
9653 - <listitem>  
9654 - <para>  
9655 - Add new method <function>QPDF::setOutputStreams</function>  
9656 - to replace <varname>std::cout</varname> and  
9657 - <varname>std::cerr</varname> with other streams for generation  
9658 - of diagnostic messages and error messages. This can be useful  
9659 - for GUIs or other applications that want to capture any output  
9660 - generated by the library to present to the user in some other  
9661 - way. Note that QPDF does not write to  
9662 - <varname>std::cout</varname> (or the specified output stream)  
9663 - except where explicitly mentioned in  
9664 - @1@filename@1@QPDF.hh@2@filename@2@, and that the only use of the  
9665 - error stream is for warnings. Note also that output of  
9666 - warnings is suppressed when  
9667 - <literal>setSuppressWarnings(true)</literal> is called.  
9668 - </para>  
9669 - </listitem>  
9670 - <listitem>  
9671 - <para>  
9672 - Add new method <function>QPDF::processMemoryFile</function>  
9673 - for operating on PDF files that are loaded into memory rather  
9674 - than in a file on disk.  
9675 - </para>  
9676 - </listitem>  
9677 - <listitem>  
9678 - <para>  
9679 - Give a warning but otherwise ignore empty PDF objects by  
9680 - treating them as null. Empty object are not permitted by the  
9681 - PDF specification but have been known to appear in some actual  
9682 - PDF files.  
9683 - </para>  
9684 - </listitem>  
9685 - <listitem>  
9686 - <para>  
9687 - Handle inline image filter abbreviations when the appear as  
9688 - stream filter abbreviations. The PDF specification does not  
9689 - allow use of stream filter abbreviations in this way, but  
9690 - Adobe Reader and some other PDF readers accept them since they  
9691 - sometimes appear incorrectly in actual PDF files.  
9692 - </para>  
9693 - </listitem>  
9694 - <listitem>  
9695 - <para>  
9696 - Implement miscellaneous enhancements to  
9697 - <classname>PointerHolder</classname> and  
9698 - <classname>Buffer</classname> to support other changes.  
9699 - </para>  
9700 - </listitem>  
9701 - </itemizedlist>  
9702 - </listitem>  
9703 - </varlistentry>  
9704 - <varlistentry>  
9705 - <term>2.2.0: August 14, 2010</term>  
9706 - <listitem>  
9707 - <itemizedlist>  
9708 - <listitem>  
9709 - <para>  
9710 - Add new methods to <classname>QPDFObjectHandle</classname>  
9711 - (<function>newStream</function> and  
9712 - <function>replaceStreamData</function> for creating new  
9713 - streams and replacing stream data. This makes it possible to  
9714 - perform a wide range of operations that were not previously  
9715 - possible.  
9716 - </para>  
9717 - </listitem>  
9718 - <listitem>  
9719 - <para>  
9720 - Add new helper method in  
9721 - <classname>QPDFObjectHandle</classname>  
9722 - (<function>addPageContents</function>) for appending or  
9723 - prepending new content streams to a page. This method makes  
9724 - it possible to manipulate content streams without having to be  
9725 - concerned whether a page's contents are a single stream or an  
9726 - array of streams.  
9727 - </para>  
9728 - </listitem>  
9729 - <listitem>  
9730 - <para>  
9731 - Add new method in <classname>QPDFObjectHandle</classname>:  
9732 - <function>replaceOrRemoveKey</function>, which replaces a  
9733 - dictionary key  
9734 - with a given value unless the value is null, in which case it  
9735 - removes the key instead.  
9736 - </para>  
9737 - </listitem>  
9738 - <listitem>  
9739 - <para>  
9740 - Add new method in <classname>QPDFObjectHandle</classname>:  
9741 - <function>getRawStreamData</function>, which returns the raw  
9742 - (unfiltered) stream data into a buffer. This complements the  
9743 - <function>getStreamData</function> method, which returns the  
9744 - filtered (uncompressed) stream data and can only be used when  
9745 - the stream's data is filterable.  
9746 - </para>  
9747 - </listitem>  
9748 - <listitem>  
9749 - <para>  
9750 - Provide two new examples:  
9751 - @1@command@1@pdf-double-page-size@2@command@2@ and  
9752 - @1@command@1@pdf-invert-images@2@command@2@ that illustrate the newly  
9753 - added interfaces.  
9754 - </para>  
9755 - </listitem>  
9756 - <listitem>  
9757 - <para>  
9758 - Fix a memory leak that would cause loss of a few bytes for  
9759 - every object involved in a cycle of object references. Thanks  
9760 - to Jian Ma for calling my attention to the leak.  
9761 - </para>  
9762 - </listitem>  
9763 - </itemizedlist>  
9764 - </listitem>  
9765 - </varlistentry>  
9766 - <varlistentry>  
9767 - <term>2.1.5: April 25, 2010</term>  
9768 - <listitem>  
9769 - <itemizedlist>  
9770 - <listitem>  
9771 - <para>  
9772 - Remove restriction of file identifier strings to 16 bytes.  
9773 - This unnecessary restriction was preventing qpdf from being  
9774 - able to encrypt or decrypt files with identifier strings that  
9775 - were not exactly 16 bytes long. The specification imposes no  
9776 - such restriction.  
9777 - </para>  
9778 - </listitem>  
9779 - </itemizedlist>  
9780 - </listitem>  
9781 - </varlistentry>  
9782 - <varlistentry>  
9783 - <term>2.1.4: April 18, 2010</term>  
9784 - <listitem>  
9785 - <itemizedlist>  
9786 - <listitem>  
9787 - <para>  
9788 - Apply the same padding calculation fix from version 2.1.2 to  
9789 - the main cross reference stream as well.  
9790 - </para>  
9791 - </listitem>  
9792 - <listitem>  
9793 - <para>  
9794 - Since @1@command@1@qpdf --check@2@command@2@ only performs limited  
9795 - checks, clarify the output to make it clear that there still  
9796 - may be errors that qpdf can't check. This should make it less  
9797 - surprising to people when another PDF reader is unable to read  
9798 - a file that qpdf thinks is okay.  
9799 - </para>  
9800 - </listitem>  
9801 - </itemizedlist>  
9802 - </listitem>  
9803 - </varlistentry>  
9804 - <varlistentry>  
9805 - <term>2.1.3: March 27, 2010</term>  
9806 - <listitem>  
9807 - <itemizedlist>  
9808 - <listitem>  
9809 - <para>  
9810 - Fix bug that could cause a failure when rewriting PDF files  
9811 - that contain object streams with unreferenced objects that in  
9812 - turn reference indirect scalars.  
9813 - </para>  
9814 - </listitem>  
9815 - <listitem>  
9816 - <para>  
9817 - Don't complain about (invalid) AES streams that aren't a  
9818 - multiple of 16 bytes. Instead, pad them before decrypting.  
9819 - </para>  
9820 - </listitem>  
9821 - </itemizedlist>  
9822 - </listitem>  
9823 - </varlistentry>  
9824 - <varlistentry>  
9825 - <term>2.1.2: January 24, 2010</term>  
9826 - <listitem>  
9827 - <itemizedlist>  
9828 - <listitem>  
9829 - <para>  
9830 - Fix bug in padding around first half cross reference stream in  
9831 - linearized files. The bug could cause an assertion failure  
9832 - when linearizing certain unlucky files.  
9833 - </para>  
9834 - </listitem>  
9835 - </itemizedlist>  
9836 - </listitem>  
9837 - </varlistentry>  
9838 - <varlistentry>  
9839 - <term>2.1.1: December 14, 2009</term>  
9840 - <listitem>  
9841 - <itemizedlist>  
9842 - <listitem>  
9843 - <para>  
9844 - No changes in functionality; insert missing include in an  
9845 - internal library header file to support gcc 4.4, and update  
9846 - test suite to ignore broken Adobe Reader installations.  
9847 - </para>  
9848 - </listitem>  
9849 - </itemizedlist>  
9850 - </listitem>  
9851 - </varlistentry>  
9852 - <varlistentry>  
9853 - <term>2.1: October 30, 2009</term>  
9854 - <listitem>  
9855 - <itemizedlist>  
9856 - <listitem>  
9857 - <para>  
9858 - This is the first version of qpdf to include Windows support.  
9859 - On Windows, it is possible to build a DLL. Additionally, a  
9860 - partial C-language API has been introduced, which makes it  
9861 - possible to call qpdf functions from non-C++ environments. I  
9862 - am very grateful to ลฝarko Gajiฤ‡ (<ulink  
9863 - url="http://zarko-gajic.iz.hr/">http://zarko-gajic.iz.hr/</ulink>)  
9864 - for tirelessly testing numerous pre-release versions of this  
9865 - DLL and providing many excellent suggestions on improving the  
9866 - interface.  
9867 - </para>  
9868 - <para>  
9869 - For programming to the C interface, please see the header file  
9870 - @1@filename@1@qpdf/qpdf-c.h@2@filename@2@ and the example  
9871 - @1@filename@1@examples/pdf-linearize.c@2@filename@2@.  
9872 - </para>  
9873 - </listitem>  
9874 - <listitem>  
9875 - <para>  
9876 - ลฝarko Gajiฤ‡ has written a Delphi wrapper for qpdf, which can  
9877 - be downloaded from qpdf's download side. ลฝarko's Delphi  
9878 - wrapper is released with the same licensing terms as qpdf  
9879 - itself and comes with this disclaimer: "Delphi wrapper  
9880 - unit @1@filename@1@qpdf.pas@2@filename@2@ created by ลฝarko Gajiฤ‡  
9881 - (<ulink  
9882 - url="http://zarko-gajic.iz.hr/">http://zarko-gajic.iz.hr/</ulink>).  
9883 - Use at your own risk and for whatever purpose you want. No  
9884 - support is provided. Sample code is provided."  
9885 - </para>  
9886 - </listitem>  
9887 - <listitem>  
9888 - <para>  
9889 - Support has been added for AES encryption and crypt filters.  
9890 - Although qpdf does not presently support files that use  
9891 - PKI-based encryption, with the addition of AES and crypt  
9892 - filters, qpdf is now be able to open most encrypted files  
9893 - created with newer versions of Acrobat or other PDF creation  
9894 - software. Note that I have not been able to get very many  
9895 - files encrypted in this way, so it's possible there could  
9896 - still be some cases that qpdf can't handle. Please report  
9897 - them if you find them.  
9898 - </para>  
9899 - </listitem>  
9900 - <listitem>  
9901 - <para>  
9902 - Many error messages have been improved to include more  
9903 - information in hopes of making qpdf a more useful tool for PDF  
9904 - experts to use in manually recovering damaged PDF files.  
9905 - </para>  
9906 - </listitem>  
9907 - <listitem>  
9908 - <para>  
9909 - Attempt to avoid compressing metadata streams if possible.  
9910 - This is consistent with other PDF creation applications.  
9911 - </para>  
9912 - </listitem>  
9913 - <listitem>  
9914 - <para>  
9915 - Provide new command-line options for AES encrypt, cleartext  
9916 - metadata, and setting the minimum and forced PDF versions of  
9917 - output files.  
9918 - </para>  
9919 - </listitem>  
9920 - <listitem>  
9921 - <para>  
9922 - Add additional methods to the <classname>QPDF</classname>  
9923 - object for querying the document's permissions. Although qpdf  
9924 - does not enforce these permissions, it does make them  
9925 - available so that applications that use qpdf can enforce  
9926 - permissions.  
9927 - </para>  
9928 - </listitem>  
9929 - <listitem>  
9930 - <para>  
9931 - The @1@option@1@--check@2@option@2@ option to @1@command@1@qpdf@2@command@2@  
9932 - has been extended to include some additional information.  
9933 - </para>  
9934 - </listitem>  
9935 - <listitem>  
9936 - <para>  
9937 - There have been a handful of non-compatible API changes. For  
9938 - details, see <xref linkend="ref.upgrading-to-2.1"/>.  
9939 - </para>  
9940 - </listitem>  
9941 - </itemizedlist>  
9942 - </listitem>  
9943 - </varlistentry>  
9944 - <varlistentry>  
9945 - <term>2.0.6: May 3, 2009</term>  
9946 - <listitem>  
9947 - <itemizedlist>  
9948 - <listitem>  
9949 - <para>  
9950 - Do not attempt to uncompress streams that have decode  
9951 - parameters we don't recognize. Earlier versions of qpdf would  
9952 - have rejected files with such streams.  
9953 - </para>  
9954 - </listitem>  
9955 - </itemizedlist>  
9956 - </listitem>  
9957 - </varlistentry>  
9958 - <varlistentry>  
9959 - <term>2.0.5: March 10, 2009</term>  
9960 - <listitem>  
9961 - <itemizedlist>  
9962 - <listitem>  
9963 - <para>  
9964 - Improve error handling in the LZW decoder, and fix a small  
9965 - error introduced in the previous version with regard to  
9966 - handling full tables. The LZW decoder has been more strongly  
9967 - verified in this release.  
9968 - </para>  
9969 - </listitem>  
9970 - </itemizedlist>  
9971 - </listitem>  
9972 - </varlistentry>  
9973 - <varlistentry>  
9974 - <term>2.0.4: February 21, 2009</term>  
9975 - <listitem>  
9976 - <itemizedlist>  
9977 - <listitem>  
9978 - <para>  
9979 - Include proper support for LZW streams encoded without the  
9980 - "early code change" flag. Special thanks to Atom  
9981 - Smasher who reported the problem and provided an input file  
9982 - compressed in this way, which I did not previously have.  
9983 - </para>  
9984 - </listitem>  
9985 - <listitem>  
9986 - <para>  
9987 - Implement some improvements to file recovery logic.  
9988 - </para>  
9989 - </listitem>  
9990 - </itemizedlist>  
9991 - </listitem>  
9992 - </varlistentry>  
9993 - <varlistentry>  
9994 - <term>2.0.3: February 15, 2009</term>  
9995 - <listitem>  
9996 - <itemizedlist>  
9997 - <listitem>  
9998 - <para>  
9999 - Compile cleanly with gcc 4.4.  
10000 - </para>  
10001 - </listitem>  
10002 - <listitem>  
10003 - <para>  
10004 - Handle strings encoded as UTF-16BE properly.  
10005 - </para>  
10006 - </listitem>  
10007 - </itemizedlist>  
10008 - </listitem>  
10009 - </varlistentry>  
10010 - <varlistentry>  
10011 - <term>2.0.2: June 30, 2008</term>  
10012 - <listitem>  
10013 - <itemizedlist>  
10014 - <listitem>  
10015 - <para>  
10016 - Update test suite to work properly with a  
10017 - non-@1@command@1@bash@2@command@2@ @1@filename@1@/bin/sh@2@filename@2@ and  
10018 - with Perl 5.10. No changes were made to the actual qpdf  
10019 - source code itself for this release.  
10020 - </para>  
10021 - </listitem>  
10022 - </itemizedlist>  
10023 - </listitem>  
10024 - </varlistentry>  
10025 - <varlistentry>  
10026 - <term>2.0.1: May 6, 2008</term>  
10027 - <listitem>  
10028 - <itemizedlist>  
10029 - <listitem>  
10030 - <para>  
10031 - No changes in functionality or interface. This release  
10032 - includes fixes to the source code so that qpdf compiles  
10033 - properly and passes its test suite on a broader range of  
10034 - platforms. See @1@filename@1@ChangeLog@2@filename@2@ in the source  
10035 - distribution for details.  
10036 - </para>  
10037 - </listitem>  
10038 - </itemizedlist>  
10039 - </listitem>  
10040 - </varlistentry>  
10041 - <varlistentry>  
10042 - <term>2.0: April 29, 2008</term>  
10043 - <listitem>  
10044 - <itemizedlist>  
10045 - <listitem>  
10046 - <para>  
10047 - First public release.  
10048 - </para>  
10049 - </listitem>  
10050 - </itemizedlist>  
10051 - </listitem>  
10052 - </varlistentry>  
10053 - </variablelist>  
10054 - </appendix>  
10055 - <appendix id="ref.upgrading-to-2.1">  
10056 - <title>Upgrading from 2.0 to 2.1</title>  
10057 - <para>  
10058 - Although, as a general rule, we like to avoid introducing  
10059 - source-level incompatibilities in qpdf's interface, there were a  
10060 - few non-compatible changes made in this version. A considerable  
10061 - amount of source code that uses qpdf will probably compile without  
10062 - any changes, but in some cases, you may have to update your code.  
10063 - The changes are enumerated here. There are also some new  
10064 - interfaces; for those, please refer to the header files.  
10065 - </para>  
10066 - <itemizedlist>  
10067 - <listitem>  
10068 - <para>  
10069 - QPDF's exception handling mechanism now uses  
10070 - <classname>std::logic_error</classname> for internal errors and  
10071 - <classname>std::runtime_error</classname> for runtime errors in  
10072 - favor of the now removed <classname>QEXC</classname> classes used  
10073 - in previous versions. The <classname>QEXC</classname> exception  
10074 - classes predated the addition of the  
10075 - @1@filename@1@&lt;stdexcept&gt;@2@filename@2@ header file to the C++  
10076 - standard library. Most of the exceptions thrown by the qpdf  
10077 - library itself are still of type <classname>QPDFExc</classname>  
10078 - which is now derived from  
10079 - <classname>std::runtime_error</classname>. Programs that caught  
10080 - an instance of <classname>std::exception</classname> and  
10081 - displayed it by calling the <function>what()</function> method  
10082 - will not need to be changed.  
10083 - </para>  
10084 - </listitem>  
10085 - <listitem>  
10086 - <para>  
10087 - The <classname>QPDFExc</classname> class now internally  
10088 - represents various fields of the error condition and provides  
10089 - interfaces for querying them. Among the fields is a numeric  
10090 - error code that can help applications act differently on (a small  
10091 - number of) different error conditions. See  
10092 - @1@filename@1@QPDFExc.hh@2@filename@2@ for details.  
10093 - </para>  
10094 - </listitem>  
10095 - <listitem>  
10096 - <para>  
10097 - Warnings can be retrieved from qpdf as instances of  
10098 - <classname>QPDFExc</classname> instead of strings.  
10099 - </para>  
10100 - </listitem>  
10101 - <listitem>  
10102 - <para>  
10103 - The nested <classname>QPDF::EncryptionData</classname> class's  
10104 - constructor takes an additional argument. This class is  
10105 - primarily intended to be used by  
10106 - <classname>QPDFWriter</classname>. There's not really anything  
10107 - useful an end-user application could do with it. It probably  
10108 - shouldn't really be part of the public interface to begin with.  
10109 - Likewise, some of the methods for computing internal encryption  
10110 - dictionary parameters have changed to support  
10111 - <literal>/R=4</literal> encryption.  
10112 - </para>  
10113 - </listitem>  
10114 - <listitem>  
10115 - <para>  
10116 - The method <function>QPDF::getUserPassword</function> has been  
10117 - removed since it didn't do what people would think it did. There  
10118 - are now two new methods:  
10119 - <function>QPDF::getPaddedUserPassword</function> and  
10120 - <function>QPDF::getTrimmedUserPassword</function>. The first one  
10121 - does what the old <function>QPDF::getUserPassword</function>  
10122 - method used to do, which is to return the password with possible  
10123 - binary padding as specified by the PDF specification. The second  
10124 - one returns a human-readable password string.  
10125 - </para>  
10126 - </listitem>  
10127 - <listitem>  
10128 - <para>  
10129 - The enumerated types that used to be nested in  
10130 - <classname>QPDFWriter</classname> have moved to top-level  
10131 - enumerated types and are now defined in the file  
10132 - @1@filename@1@qpdf/Constants.h@2@filename@2@. This enables them to be  
10133 - shared by both the C and C++ interfaces.  
10134 - </para>  
10135 - </listitem>  
10136 - </itemizedlist>  
10137 - </appendix>  
10138 - <appendix id="ref.upgrading-to-3.0">  
10139 - <title>Upgrading to 3.0</title>  
10140 - <para>  
10141 - For the most part, the API for qpdf version 3.0 is backward  
10142 - compatible with versions 2.1 and later. There are two exceptions:  
10143 - <itemizedlist>  
10144 - <listitem>  
10145 - <para>  
10146 - The method  
10147 - <function>QPDFObjectHandle::replaceStreamData</function> that  
10148 - uses a <classname>StreamDataProvider</classname> to provide the  
10149 - stream data no longer takes a <varname>length</varname>  
10150 - parameter. While it would have been easy enough to keep the  
10151 - parameter for backward compatibility, in this case, the  
10152 - parameter was removed since this provides the user an  
10153 - opportunity to simplify the calling code. This method was  
10154 - introduced in version 2.2. At the time, the  
10155 - <varname>length</varname> parameter was required in order to  
10156 - ensure that calls to the stream data provider returned the same  
10157 - length for a specific stream every time they were invoked. In  
10158 - particular, the linearization code depends on this. Instead,  
10159 - qpdf 3.0 and newer check for that constraint explicitly. The  
10160 - first time the stream data provider is called for a specific  
10161 - stream, the actual length is saved, and subsequent calls are  
10162 - required to return the same number of bytes. This means the  
10163 - calling code no longer has to compute the length in advance,  
10164 - which can be a significant simplification. If your code fails  
10165 - to compile because of the extra argument and you don't want to  
10166 - make other changes to your code, just omit the argument.  
10167 - </para>  
10168 - </listitem>  
10169 - <listitem>  
10170 - <para>  
10171 - Many methods take <type>long long</type> instead of other  
10172 - integer types. Most if not all existing code should compile  
10173 - fine with this change since such parameters had always  
10174 - previously been smaller types. This change was required to  
10175 - support files larger than two gigabytes in size.  
10176 - </para>  
10177 - </listitem>  
10178 - </itemizedlist>  
10179 - </para>  
10180 - </appendix>  
10181 - <appendix id="ref.upgrading-to-4.0">  
10182 - <title>Upgrading to 4.0</title>  
10183 - <para>  
10184 - While version 4.0 includes a few non-compatible API changes, it is  
10185 - very unlikely that anyone's code would have used any of those parts  
10186 - of the API since they generally required information that would  
10187 - only be available inside the library. In the unlikely event that  
10188 - you should run into trouble, please see the ChangeLog. See also  
10189 - <xref linkend="ref.release-notes"/> for a complete list of the  
10190 - non-compatible API changes made in this version.  
10191 - </para>  
10192 - </appendix>  
10193 -</book>