Commit 73ee000c33b1c51688f7a9b2a8ce9816da93e7ac

Authored by Jay Berkenbilt
Committed by GitHub
2 parents 9907700f db6c09b6

Merge pull request #975 from m-holger/reflow

Code tidy - reflow comments and strings
include/qpdf/Buffer.hh
@@ -2,22 +2,19 @@ @@ -2,22 +2,19 @@
2 // 2 //
3 // This file is part of qpdf. 3 // This file is part of qpdf.
4 // 4 //
5 -// Licensed under the Apache License, Version 2.0 (the "License");  
6 -// you may not use this file except in compliance with the License.  
7 -// You may obtain a copy of the License at 5 +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
  6 +// in compliance with the License. You may obtain a copy of the License at
8 // 7 //
9 // http://www.apache.org/licenses/LICENSE-2.0 8 // http://www.apache.org/licenses/LICENSE-2.0
10 // 9 //
11 -// Unless required by applicable law or agreed to in writing, software  
12 -// distributed under the License is distributed on an "AS IS" BASIS,  
13 -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
14 -// See the License for the specific language governing permissions and  
15 -// limitations under the License. 10 +// Unless required by applicable law or agreed to in writing, software distributed under the License
  11 +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
  12 +// or implied. See the License for the specific language governing permissions and limitations under
  13 +// the License.
16 // 14 //
17 -// Versions of qpdf prior to version 7 were released under the terms  
18 -// of version 2.0 of the Artistic License. At your option, you may  
19 -// continue to consider qpdf to be licensed under those terms. Please  
20 -// see the manual for additional information. 15 +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
  16 +// License. At your option, you may continue to consider qpdf to be licensed under those terms.
  17 +// Please see the manual for additional information.
21 18
22 #ifndef BUFFER_HH 19 #ifndef BUFFER_HH
23 #define BUFFER_HH 20 #define BUFFER_HH
@@ -34,13 +31,13 @@ class Buffer @@ -34,13 +31,13 @@ class Buffer
34 QPDF_DLL 31 QPDF_DLL
35 Buffer(); 32 Buffer();
36 33
37 - // Create a Buffer object whose memory is owned by the class and  
38 - // will be freed when the Buffer object is destroyed. 34 + // Create a Buffer object whose memory is owned by the class and will be freed when the Buffer
  35 + // object is destroyed.
39 QPDF_DLL 36 QPDF_DLL
40 Buffer(size_t size); 37 Buffer(size_t size);
41 38
42 - // Create a Buffer object whose memory is owned by the caller and  
43 - // will not be freed when the Buffer is destroyed. 39 + // Create a Buffer object whose memory is owned by the caller and will not be freed when the
  40 + // Buffer is destroyed.
44 QPDF_DLL 41 QPDF_DLL
45 Buffer(unsigned char* buf, size_t size); 42 Buffer(unsigned char* buf, size_t size);
46 43
include/qpdf/JSON.hh
@@ -2,38 +2,31 @@ @@ -2,38 +2,31 @@
2 // 2 //
3 // This file is part of qpdf. 3 // This file is part of qpdf.
4 // 4 //
5 -// Licensed under the Apache License, Version 2.0 (the "License");  
6 -// you may not use this file except in compliance with the License.  
7 -// You may obtain a copy of the License at 5 +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
  6 +// in compliance with the License. You may obtain a copy of the License at
8 // 7 //
9 // http://www.apache.org/licenses/LICENSE-2.0 8 // http://www.apache.org/licenses/LICENSE-2.0
10 // 9 //
11 -// Unless required by applicable law or agreed to in writing, software  
12 -// distributed under the License is distributed on an "AS IS" BASIS,  
13 -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
14 -// See the License for the specific language governing permissions and  
15 -// limitations under the License. 10 +// Unless required by applicable law or agreed to in writing, software distributed under the License
  11 +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
  12 +// or implied. See the License for the specific language governing permissions and limitations under
  13 +// the License.
16 // 14 //
17 -// Versions of qpdf prior to version 7 were released under the terms  
18 -// of version 2.0 of the Artistic License. At your option, you may  
19 -// continue to consider qpdf to be licensed under those terms. Please  
20 -// see the manual for additional information. 15 +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
  16 +// License. At your option, you may continue to consider qpdf to be licensed under those terms.
  17 +// Please see the manual for additional information.
21 18
22 #ifndef JSON_HH 19 #ifndef JSON_HH
23 #define JSON_HH 20 #define JSON_HH
24 21
25 -// This is a simple JSON serializer and parser, primarily designed for  
26 -// serializing QPDF Objects as JSON. While it may work as a  
27 -// general-purpose JSON parser/serializer, there are better options.  
28 -// JSON objects contain their data as smart pointers. When one JSON object  
29 -// is added to another, this pointer is copied. This means you can  
30 -// create temporary JSON objects on the stack, add them to other  
31 -// objects, and let them go out of scope safely. It also means that if  
32 -// a JSON object is added in more than one place, all copies  
33 -// share the underlying data. This makes them similar in structure and  
34 -// behavior to QPDFObjectHandle and may feel natural within the QPDF  
35 -// codebase, but it is also a good reason not to use this as a  
36 -// general-purpose JSON package. 22 +// This is a simple JSON serializer and parser, primarily designed for serializing QPDF Objects as
  23 +// JSON. While it may work as a general-purpose JSON parser/serializer, there are better options.
  24 +// JSON objects contain their data as smart pointers. When one JSON object is added to another, this
  25 +// pointer is copied. This means you can create temporary JSON objects on the stack, add them to
  26 +// other objects, and let them go out of scope safely. It also means that if a JSON object is added
  27 +// in more than one place, all copies share the underlying data. This makes them similar in
  28 +// structure and behavior to QPDFObjectHandle and may feel natural within the QPDF codebase, but it
  29 +// is also a good reason not to use this as a general-purpose JSON package.
37 30
38 #include <qpdf/DLL.h> 31 #include <qpdf/DLL.h>
39 #include <qpdf/PointerHolder.hh> // unused -- remove in qpdf 12 (see #785) 32 #include <qpdf/PointerHolder.hh> // unused -- remove in qpdf 12 (see #785)
@@ -61,71 +54,60 @@ class JSON @@ -61,71 +54,60 @@ class JSON
61 QPDF_DLL 54 QPDF_DLL
62 std::string unparse() const; 55 std::string unparse() const;
63 56
64 - // Write the JSON object through a pipeline. The `depth` parameter  
65 - // specifies how deeply nested this is in another JSON structure,  
66 - // which makes it possible to write clean-looking JSON 57 + // Write the JSON object through a pipeline. The `depth` parameter specifies how deeply nested
  58 + // this is in another JSON structure, which makes it possible to write clean-looking JSON
67 // incrementally. 59 // incrementally.
68 QPDF_DLL 60 QPDF_DLL
69 void write(Pipeline*, size_t depth = 0) const; 61 void write(Pipeline*, size_t depth = 0) const;
70 62
71 // Helper methods for writing JSON incrementally. 63 // Helper methods for writing JSON incrementally.
72 // 64 //
73 - // "first" -- Several methods take a `bool& first` parameter. The  
74 - // open methods always set it to true, and the methods to output  
75 - // items always set it to false. This way, the item and close  
76 - // methods can always know whether or not a first item is being  
77 - // written. The intended mode of operation is to start with a new  
78 - // `bool first = true` each time a new container is opened and  
79 - // to pass that `first` through to all the methods that are  
80 - // called to add top-level items to the container as well as to  
81 - // close the container. This lets the JSON object use it to keep  
82 - // track of when it's writing a first object and when it's not. If  
83 - // incrementally writing multiple levels of depth, a new `first`  
84 - // should used for each new container that is opened. 65 + // "first" -- Several methods take a `bool& first` parameter. The open methods always set it to
  66 + // true, and the methods to output items always set it to false. This way, the item and close
  67 + // methods can always know whether or not a first item is being written. The intended mode of
  68 + // operation is to start with a new `bool first = true` each time a new container is opened and
  69 + // to pass that `first` through to all the methods that are called to add top-level items to the
  70 + // container as well as to close the container. This lets the JSON object use it to keep track
  71 + // of when it's writing a first object and when it's not. If incrementally writing multiple
  72 + // levels of depth, a new `first` should used for each new container that is opened.
85 // 73 //
86 - // "depth" -- Indicate the level of depth. This is used for  
87 - // consistent indentation. When writing incrementally, whenever  
88 - // you call a method to add an item to a container, the value of  
89 - // `depth` should be one more than whatever value is passed to the  
90 - // container open and close methods. 74 + // "depth" -- Indicate the level of depth. This is used for consistent indentation. When writing
  75 + // incrementally, whenever you call a method to add an item to a container, the value of `depth`
  76 + // should be one more than whatever value is passed to the container open and close methods.
91 77
92 // Open methods ignore the value of first and set it to false 78 // Open methods ignore the value of first and set it to false
93 QPDF_DLL 79 QPDF_DLL
94 static void writeDictionaryOpen(Pipeline*, bool& first, size_t depth = 0); 80 static void writeDictionaryOpen(Pipeline*, bool& first, size_t depth = 0);
95 QPDF_DLL 81 QPDF_DLL
96 static void writeArrayOpen(Pipeline*, bool& first, size_t depth = 0); 82 static void writeArrayOpen(Pipeline*, bool& first, size_t depth = 0);
97 - // Close methods don't modify first. A true value indicates that  
98 - // we are closing an empty object. 83 + // Close methods don't modify first. A true value indicates that we are closing an empty object.
99 QPDF_DLL 84 QPDF_DLL
100 static void writeDictionaryClose(Pipeline*, bool first, size_t depth = 0); 85 static void writeDictionaryClose(Pipeline*, bool first, size_t depth = 0);
101 QPDF_DLL 86 QPDF_DLL
102 static void writeArrayClose(Pipeline*, bool first, size_t depth = 0); 87 static void writeArrayClose(Pipeline*, bool first, size_t depth = 0);
103 - // The item methods use the value of first to determine if this is  
104 - // the first item and always set it to false. 88 + // The item methods use the value of first to determine if this is the first item and always set
  89 + // it to false.
105 QPDF_DLL 90 QPDF_DLL
106 static void writeDictionaryItem( 91 static void writeDictionaryItem(
107 Pipeline*, bool& first, std::string const& key, JSON const& value, size_t depth = 0); 92 Pipeline*, bool& first, std::string const& key, JSON const& value, size_t depth = 0);
108 - // Write just the key of a new dictionary item, useful if writing  
109 - // nested structures. Calls writeNext. 93 + // Write just the key of a new dictionary item, useful if writing nested structures. Calls
  94 + // writeNext.
110 QPDF_DLL 95 QPDF_DLL
111 static void 96 static void
112 writeDictionaryKey(Pipeline* p, bool& first, std::string const& key, size_t depth = 0); 97 writeDictionaryKey(Pipeline* p, bool& first, std::string const& key, size_t depth = 0);
113 QPDF_DLL 98 QPDF_DLL
114 static void writeArrayItem(Pipeline*, bool& first, JSON const& element, size_t depth = 0); 99 static void writeArrayItem(Pipeline*, bool& first, JSON const& element, size_t depth = 0);
115 - // If writing nested structures incrementally, call writeNext  
116 - // before opening a new array or container in the midst of an  
117 - // existing one. The `first` you pass to writeNext should be the  
118 - // one for the parent object. The depth should be the one for the  
119 - // child object. Then start a new `first` for the nested item.  
120 - // Note that writeDictionaryKey and writeArrayItem call writeNext  
121 - // for you, so this is most important when writing subsequent  
122 - // items or container openers to an array. 100 + // If writing nested structures incrementally, call writeNext before opening a new array or
  101 + // container in the midst of an existing one. The `first` you pass to writeNext should be the
  102 + // one for the parent object. The depth should be the one for the child object. Then start a new
  103 + // `first` for the nested item. Note that writeDictionaryKey and writeArrayItem call writeNext
  104 + // for you, so this is most important when writing subsequent items or container openers to an
  105 + // array.
123 QPDF_DLL 106 QPDF_DLL
124 static void writeNext(Pipeline* p, bool& first, size_t depth = 0); 107 static void writeNext(Pipeline* p, bool& first, size_t depth = 0);
125 108
126 - // The JSON spec calls dictionaries "objects", but that creates  
127 - // too much confusion when referring to instances of the JSON  
128 - // class. 109 + // The JSON spec calls dictionaries "objects", but that creates too much confusion when
  110 + // referring to instances of the JSON class.
129 QPDF_DLL 111 QPDF_DLL
130 static JSON makeDictionary(); 112 static JSON makeDictionary();
131 // addDictionaryMember returns the newly added item. 113 // addDictionaryMember returns the newly added item.
@@ -149,10 +131,9 @@ class JSON @@ -149,10 +131,9 @@ class JSON
149 QPDF_DLL 131 QPDF_DLL
150 static JSON makeNull(); 132 static JSON makeNull();
151 133
152 - // A blob serializes as a string. The function will be called by  
153 - // JSON with a pipeline and should write binary data to the  
154 - // pipeline but not call finish(). JSON will call finish() at the  
155 - // right time. 134 + // A blob serializes as a string. The function will be called by JSON with a pipeline and should
  135 + // write binary data to the pipeline but not call finish(). JSON will call finish() at the right
  136 + // time.
156 QPDF_DLL 137 QPDF_DLL
157 static JSON makeBlob(std::function<void(Pipeline*)>); 138 static JSON makeBlob(std::function<void(Pipeline*)>);
158 139
@@ -162,11 +143,9 @@ class JSON @@ -162,11 +143,9 @@ class JSON
162 QPDF_DLL 143 QPDF_DLL
163 bool isDictionary() const; 144 bool isDictionary() const;
164 145
165 - // If the key is already in the dictionary, return true.  
166 - // Otherwise, mark it as seen and return false. This is primarily  
167 - // intended to be used by the parser to detect duplicate keys when  
168 - // the reactor blocks them from being added to the final  
169 - // dictionary. 146 + // If the key is already in the dictionary, return true. Otherwise, mark it as seen and return
  147 + // false. This is primarily intended to be used by the parser to detect duplicate keys when the
  148 + // reactor blocks them from being added to the final dictionary.
170 QPDF_DLL 149 QPDF_DLL
171 bool checkDictionaryKeySeen(std::string const& key); 150 bool checkDictionaryKeySeen(std::string const& key);
172 151
@@ -187,45 +166,35 @@ class JSON @@ -187,45 +166,35 @@ class JSON
187 QPDF_DLL 166 QPDF_DLL
188 bool forEachArrayItem(std::function<void(JSON value)> fn) const; 167 bool forEachArrayItem(std::function<void(JSON value)> fn) const;
189 168
190 - // Check this JSON object against a "schema". This is not a schema  
191 - // according to any standard. It's just a template of what the  
192 - // JSON is supposed to contain. The checking does the following: 169 + // Check this JSON object against a "schema". This is not a schema according to any standard.
  170 + // It's just a template of what the JSON is supposed to contain. The checking does the
  171 + // following:
193 // 172 //
194 - // * The schema is a nested structure containing dictionaries,  
195 - // single-element arrays, and strings only.  
196 - // * Recursively walk the schema. In the items below, "schema  
197 - // object" refers to an object in the schema, and "checked  
198 - // object" refers to the corresponding part of the object  
199 - // being checked.  
200 - // * If the schema object is a dictionary, the checked object  
201 - // must have a dictionary in the same place with the same  
202 - // keys. If flags contains f_optional, a key in the schema  
203 - // does not have to be present in the object. Otherwise, all  
204 - // keys have to be present. Any key in the object must be  
205 - // present in the schema.  
206 - // * If the schema object is an array of length 1, the checked  
207 - // object may either be a single item or an array of items.  
208 - // The single item or each element of the checked object's  
209 - // array is validated against the single element of the  
210 - // schema's array. The rationale behind this logic is that a  
211 - // single element may appear wherever the schema allows a  
212 - // variable-length array. This makes it possible to start  
213 - // allowing an array in the future where a single element was  
214 - // previously required without breaking backward  
215 - // compatibility.  
216 - // * If the schema object is an array of length > 1, the checked  
217 - // object must be an array of the same length. In this case,  
218 - // each element of the checked object array is validated 173 + // * The schema is a nested structure containing dictionaries, single-element arrays, and
  174 + // strings only.
  175 + // * Recursively walk the schema. In the items below, "schema object" refers to an object in
  176 + // the schema, and "checked object" refers to the corresponding part of the object being
  177 + // checked.
  178 + // * If the schema object is a dictionary, the checked object must have a dictionary in the
  179 + // same place with the same keys. If flags contains f_optional, a key in the schema does not
  180 + // have to be present in the object. Otherwise, all keys have to be present. Any key in the
  181 + // object must be present in the schema.
  182 + // * If the schema object is an array of length 1, the checked object may either be a single
  183 + // item or an array of items. The single item or each element of the checked object's
  184 + // array is validated against the single element of the schema's array. The rationale behind
  185 + // this logic is that a single element may appear wherever the schema allows a
  186 + // variable-length array. This makes it possible to start allowing an array in the future
  187 + // where a single element was previously required without breaking backward compatibility.
  188 + // * If the schema object is an array of length > 1, the checked object must be an array of
  189 + // the same length. In this case, each element of the checked object array is validated
219 // against the corresponding element of the schema array. 190 // against the corresponding element of the schema array.
220 - // * Otherwise, the value must be a string whose value is a  
221 - // description of the object's corresponding value, which may  
222 - // have any type. 191 + // * Otherwise, the value must be a string whose value is a description of the object's
  192 + // corresponding value, which may have any type.
223 // 193 //
224 - // QPDF's JSON output conforms to certain strict compatibility  
225 - // rules as discussed in the manual. The idea is that a JSON  
226 - // structure created manually in qpdf.cc doubles as both JSON help  
227 - // information and a schema for validating the JSON that qpdf  
228 - // generates. Any discrepancies are a bug in qpdf. 194 + // QPDF's JSON output conforms to certain strict compatibility rules as discussed in the manual.
  195 + // The idea is that a JSON structure created manually in qpdf.cc doubles as both JSON help
  196 + // information and a schema for validating the JSON that qpdf generates. Any discrepancies are a
  197 + // bug in qpdf.
229 // 198 //
230 // Flags is a bitwise or of values from check_flags_e. 199 // Flags is a bitwise or of values from check_flags_e.
231 enum check_flags_e { 200 enum check_flags_e {
@@ -239,9 +208,8 @@ class JSON @@ -239,9 +208,8 @@ class JSON
239 QPDF_DLL 208 QPDF_DLL
240 bool checkSchema(JSON schema, std::list<std::string>& errors); 209 bool checkSchema(JSON schema, std::list<std::string>& errors);
241 210
242 - // An pointer to a Reactor class can be passed to parse, which  
243 - // will enable the caller to react to incremental events in the  
244 - // construction of the JSON object. This makes it possible to 211 + // An pointer to a Reactor class can be passed to parse, which will enable the caller to react
  212 + // to incremental events in the construction of the JSON object. This makes it possible to
245 // implement SAX-like handling of very large JSON objects. 213 // implement SAX-like handling of very large JSON objects.
246 class QPDF_DLL_CLASS Reactor 214 class QPDF_DLL_CLASS Reactor
247 { 215 {
@@ -249,17 +217,14 @@ class JSON @@ -249,17 +217,14 @@ class JSON
249 QPDF_DLL 217 QPDF_DLL
250 virtual ~Reactor() = default; 218 virtual ~Reactor() = default;
251 219
252 - // The start/end methods are called when parsing of a  
253 - // dictionary or array is started or ended. The item methods  
254 - // are called when an item is added to a dictionary or array.  
255 - // When adding a container to another container, the item  
256 - // method is called with an empty container before the lower  
257 - // container's start method is called. See important notes in 220 + // The start/end methods are called when parsing of a dictionary or array is started or
  221 + // ended. The item methods are called when an item is added to a dictionary or array. When
  222 + // adding a container to another container, the item method is called with an empty
  223 + // container before the lower container's start method is called. See important notes in
258 // "Item methods" below. 224 // "Item methods" below.
259 225
260 - // During parsing of a JSON string, the parser is operating on  
261 - // a single object at a time. When a dictionary or array is  
262 - // started, a new context begins, and when that dictionary or 226 + // During parsing of a JSON string, the parser is operating on a single object at a time.
  227 + // When a dictionary or array is started, a new context begins, and when that dictionary or
263 // array is ended, the previous context is resumed. So, for 228 // array is ended, the previous context is resumed. So, for
264 // example, if you have `{"a": [1]}`, you will receive the 229 // example, if you have `{"a": [1]}`, you will receive the
265 // following method calls 230 // following method calls
@@ -271,9 +236,8 @@ class JSON @@ -271,9 +236,8 @@ class JSON
271 // containerEnd -- now current object is the dictionary again 236 // containerEnd -- now current object is the dictionary again
272 // containerEnd -- current object is undefined 237 // containerEnd -- current object is undefined
273 // 238 //
274 - // If the top-level item in a JSON string is a scalar, the  
275 - // topLevelScalar() method will be called. No argument is  
276 - // passed since the object is the same as what is returned by 239 + // If the top-level item in a JSON string is a scalar, the topLevelScalar() method will be
  240 + // called. No argument is passed since the object is the same as what is returned by
277 // parse(). 241 // parse().
278 242
279 QPDF_DLL 243 QPDF_DLL
@@ -287,21 +251,17 @@ class JSON @@ -287,21 +251,17 @@ class JSON
287 251
288 // Item methods: 252 // Item methods:
289 // 253 //
290 - // The return value of the item methods indicate whether the  
291 - // item has been "consumed". If the item method returns true,  
292 - // then the item will not be added to the containing JSON 254 + // The return value of the item methods indicate whether the item has been "consumed". If
  255 + // the item method returns true, then the item will not be added to the containing JSON
293 // object. This is what allows arbitrarily large JSON objects 256 // object. This is what allows arbitrarily large JSON objects
294 // to be parsed and not have to be kept in memory. 257 // to be parsed and not have to be kept in memory.
295 // 258 //
296 - // NOTE: When a dictionary or an array is added to a  
297 - // container, the dictionaryItem or arrayItem method is called  
298 - // when the child item's start delimiter is encountered, so  
299 - // the JSON object passed in at that time will always be in  
300 - // its initial, empty state. Additionally, the child item's  
301 - // start method is not called until after the parent item's  
302 - // item method is called. This makes it possible to keep track  
303 - // of the current depth level by incrementing level on start  
304 - // methods and decrementing on end methods. 259 + // NOTE: When a dictionary or an array is added to a container, the dictionaryItem or
  260 + // arrayItem method is called when the child item's start delimiter is encountered, so the
  261 + // JSON object passed in at that time will always be in its initial, empty state.
  262 + // Additionally, the child item's start method is not called until after the parent item's
  263 + // item method is called. This makes it possible to keep track of the current depth level by
  264 + // incrementing level on start methods and decrementing on end methods.
305 265
306 QPDF_DLL 266 QPDF_DLL
307 virtual bool dictionaryItem(std::string const& key, JSON const& value) = 0; 267 virtual bool dictionaryItem(std::string const& key, JSON const& value) = 0;
@@ -312,14 +272,13 @@ class JSON @@ -312,14 +272,13 @@ class JSON
312 // Create a JSON object from a string. 272 // Create a JSON object from a string.
313 QPDF_DLL 273 QPDF_DLL
314 static JSON parse(std::string const&); 274 static JSON parse(std::string const&);
315 - // Create a JSON object from an input source. See above for  
316 - // information about how to use the Reactor. 275 + // Create a JSON object from an input source. See above for information about how to use the
  276 + // Reactor.
317 QPDF_DLL 277 QPDF_DLL
318 static JSON parse(InputSource&, Reactor* reactor = nullptr); 278 static JSON parse(InputSource&, Reactor* reactor = nullptr);
319 279
320 - // parse calls setOffsets to set the inclusive start and  
321 - // non-inclusive end offsets of an object relative to its input  
322 - // string. Otherwise, both values are 0. 280 + // parse calls setOffsets to set the inclusive start and non-inclusive end offsets of an object
  281 + // relative to its input string. Otherwise, both values are 0.
323 QPDF_DLL 282 QPDF_DLL
324 void setStart(qpdf_offset_t); 283 void setStart(qpdf_offset_t);
325 QPDF_DLL 284 QPDF_DLL
include/qpdf/Pipeline.hh
@@ -2,44 +2,36 @@ @@ -2,44 +2,36 @@
2 // 2 //
3 // This file is part of qpdf. 3 // This file is part of qpdf.
4 // 4 //
5 -// Licensed under the Apache License, Version 2.0 (the "License");  
6 -// you may not use this file except in compliance with the License.  
7 -// You may obtain a copy of the License at 5 +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
  6 +// in compliance with the License. You may obtain a copy of the License at
8 // 7 //
9 // http://www.apache.org/licenses/LICENSE-2.0 8 // http://www.apache.org/licenses/LICENSE-2.0
10 // 9 //
11 -// Unless required by applicable law or agreed to in writing, software  
12 -// distributed under the License is distributed on an "AS IS" BASIS,  
13 -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
14 -// See the License for the specific language governing permissions and  
15 -// limitations under the License. 10 +// Unless required by applicable law or agreed to in writing, software distributed under the License
  11 +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
  12 +// or implied. See the License for the specific language governing permissions and limitations under
  13 +// the License.
16 // 14 //
17 -// Versions of qpdf prior to version 7 were released under the terms  
18 -// of version 2.0 of the Artistic License. At your option, you may  
19 -// continue to consider qpdf to be licensed under those terms. Please  
20 -// see the manual for additional information. 15 +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
  16 +// License. At your option, you may continue to consider qpdf to be licensed under those terms.
  17 +// Please see the manual for additional information.
21 18
22 -// Generalized Pipeline interface. By convention, subclasses of  
23 -// Pipeline are called Pl_Something. 19 +// Generalized Pipeline interface. By convention, subclasses of Pipeline are called Pl_Something.
24 // 20 //
25 -// When an instance of Pipeline is created with a pointer to a next  
26 -// pipeline, that pipeline writes its data to the next one when it  
27 -// finishes with it. In order to make possible a usage style in which  
28 -// a pipeline may be passed to a function which may stick other  
29 -// pipelines in front of it, the allocator of a pipeline is  
30 -// responsible for its destruction. In other words, one pipeline  
31 -// object does not attempt to manage the memory of its successor. 21 +// When an instance of Pipeline is created with a pointer to a next pipeline, that pipeline writes
  22 +// its data to the next one when it finishes with it. In order to make possible a usage style in
  23 +// which a pipeline may be passed to a function which may stick other pipelines in front of it, the
  24 +// allocator of a pipeline is responsible for its destruction. In other words, one pipeline object
  25 +// does not attempt to manage the memory of its successor.
32 // 26 //
33 -// The client is required to call finish() before destroying a  
34 -// Pipeline in order to avoid loss of data. A Pipeline class should  
35 -// not throw an exception in the destructor if this hasn't been done 27 +// The client is required to call finish() before destroying a Pipeline in order to avoid loss of
  28 +// data. A Pipeline class should not throw an exception in the destructor if this hasn't been done
36 // though since doing so causes too much trouble when deleting 29 // though since doing so causes too much trouble when deleting
37 // pipelines during error conditions. 30 // pipelines during error conditions.
38 // 31 //
39 -// Some pipelines are reusable (i.e., you can call write() after  
40 -// calling finish() and can call finish() multiple times) while others  
41 -// are not. It is up to the caller to use a pipeline according to its  
42 -// own restrictions. 32 +// Some pipelines are reusable (i.e., you can call write() after calling finish() and can call
  33 +// finish() multiple times) while others are not. It is up to the caller to use a pipeline
  34 +// according to its own restrictions.
43 35
44 #ifndef PIPELINE_HH 36 #ifndef PIPELINE_HH
45 #define PIPELINE_HH 37 #define PIPELINE_HH
@@ -50,8 +42,8 @@ @@ -50,8 +42,8 @@
50 #include <memory> 42 #include <memory>
51 #include <string> 43 #include <string>
52 44
53 -// Remember to use QPDF_DLL_CLASS on anything derived from Pipeline so  
54 -// it will work with dynamic_cast across the shared object boundary. 45 +// Remember to use QPDF_DLL_CLASS on anything derived from Pipeline so it will work with
  46 +// dynamic_cast across the shared object boundary.
55 class QPDF_DLL_CLASS Pipeline 47 class QPDF_DLL_CLASS Pipeline
56 { 48 {
57 public: 49 public:
@@ -61,9 +53,8 @@ class QPDF_DLL_CLASS Pipeline @@ -61,9 +53,8 @@ class QPDF_DLL_CLASS Pipeline
61 QPDF_DLL 53 QPDF_DLL
62 virtual ~Pipeline() = default; 54 virtual ~Pipeline() = default;
63 55
64 - // Subclasses should implement write and finish to do their jobs  
65 - // and then, if they are not end-of-line pipelines, call  
66 - // getNext()->write or getNext()->finish. 56 + // Subclasses should implement write and finish to do their jobs and then, if they are not
  57 + // end-of-line pipelines, call getNext()->write or getNext()->finish.
67 QPDF_DLL 58 QPDF_DLL
68 virtual void write(unsigned char const* data, size_t len) = 0; 59 virtual void write(unsigned char const* data, size_t len) = 0;
69 QPDF_DLL 60 QPDF_DLL
@@ -71,17 +62,15 @@ class QPDF_DLL_CLASS Pipeline @@ -71,17 +62,15 @@ class QPDF_DLL_CLASS Pipeline
71 QPDF_DLL 62 QPDF_DLL
72 std::string getIdentifier() const; 63 std::string getIdentifier() const;
73 64
74 - // These are convenience methods for making it easier to write  
75 - // certain other types of data to pipelines without having to  
76 - // cast. The methods that take char const* expect null-terminated  
77 - // C strings and do not write the null terminators. 65 + // These are convenience methods for making it easier to write certain other types of data to
  66 + // pipelines without having to cast. The methods that take char const* expect null-terminated C
  67 + // strings and do not write the null terminators.
78 QPDF_DLL 68 QPDF_DLL
79 void writeCStr(char const* cstr); 69 void writeCStr(char const* cstr);
80 QPDF_DLL 70 QPDF_DLL
81 void writeString(std::string const&); 71 void writeString(std::string const&);
82 - // This allows *p << "x" << "y" but is not intended to be a  
83 - // general purpose << compatible with ostream and does not have  
84 - // local awareness or the ability to be "imbued" with properties. 72 + // This allows *p << "x" << "y" but is not intended to be a general purpose << compatible with
  73 + // ostream and does not have local awareness or the ability to be "imbued" with properties.
85 QPDF_DLL 74 QPDF_DLL
86 Pipeline& operator<<(char const* cstr); 75 Pipeline& operator<<(char const* cstr);
87 QPDF_DLL 76 QPDF_DLL
include/qpdf/Pl_Buffer.hh
@@ -2,36 +2,31 @@ @@ -2,36 +2,31 @@
2 // 2 //
3 // This file is part of qpdf. 3 // This file is part of qpdf.
4 // 4 //
5 -// Licensed under the Apache License, Version 2.0 (the "License");  
6 -// you may not use this file except in compliance with the License.  
7 -// You may obtain a copy of the License at 5 +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
  6 +// in compliance with the License. You may obtain a copy of the License at
8 // 7 //
9 // http://www.apache.org/licenses/LICENSE-2.0 8 // http://www.apache.org/licenses/LICENSE-2.0
10 // 9 //
11 -// Unless required by applicable law or agreed to in writing, software  
12 -// distributed under the License is distributed on an "AS IS" BASIS,  
13 -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
14 -// See the License for the specific language governing permissions and  
15 -// limitations under the License. 10 +// Unless required by applicable law or agreed to in writing, software distributed under the License
  11 +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
  12 +// or implied. See the License for the specific language governing permissions and limitations under
  13 +// the License.
16 // 14 //
17 -// Versions of qpdf prior to version 7 were released under the terms  
18 -// of version 2.0 of the Artistic License. At your option, you may  
19 -// continue to consider qpdf to be licensed under those terms. Please  
20 -// see the manual for additional information. 15 +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
  16 +// License. At your option, you may continue to consider qpdf to be licensed under those terms.
  17 +// Please see the manual for additional information.
21 18
22 #ifndef PL_BUFFER_HH 19 #ifndef PL_BUFFER_HH
23 #define PL_BUFFER_HH 20 #define PL_BUFFER_HH
24 21
25 -// This pipeline accumulates the data passed to it into a memory  
26 -// buffer. Each subsequent use of this buffer appends to the data  
27 -// accumulated so far. getBuffer() may be called only after calling  
28 -// finish() and before calling any subsequent write(). At that point,  
29 -// a dynamically allocated Buffer object is returned and the internal  
30 -// buffer is reset. The caller is responsible for deleting the  
31 -// returned Buffer. 22 +// This pipeline accumulates the data passed to it into a memory buffer. Each subsequent use of
  23 +// this buffer appends to the data accumulated so far. getBuffer() may be called only after calling
  24 +// finish() and before calling any subsequent write(). At that point, a dynamically allocated
  25 +// Buffer object is returned and the internal buffer is reset. The caller is responsible for
  26 +// deleting the returned Buffer.
32 // 27 //
33 -// For this pipeline, "next" may be null. If a next pointer is  
34 -// provided, this pipeline will also pass the data through to it. 28 +// For this pipeline, "next" may be null. If a next pointer is provided, this pipeline will also
  29 +// pass the data through to it.
35 30
36 #include <qpdf/Buffer.hh> 31 #include <qpdf/Buffer.hh>
37 #include <qpdf/Pipeline.hh> 32 #include <qpdf/Pipeline.hh>
@@ -61,12 +56,10 @@ class QPDF_DLL_CLASS Pl_Buffer: public Pipeline @@ -61,12 +56,10 @@ class QPDF_DLL_CLASS Pl_Buffer: public Pipeline
61 QPDF_DLL 56 QPDF_DLL
62 std::shared_ptr<Buffer> getBufferSharedPointer(); 57 std::shared_ptr<Buffer> getBufferSharedPointer();
63 58
64 - // getMallocBuffer behaves in the same was as getBuffer except the  
65 - // buffer is allocated with malloc(), making it suitable for use  
66 - // when calling from other languages. If there is no data, *buf is  
67 - // set to a null pointer and *len is set to 0. Otherwise, *buf is  
68 - // a buffer of size *len allocated with malloc(). It is the  
69 - // caller's responsibility to call free() on the buffer. 59 + // getMallocBuffer behaves in the same was as getBuffer except the buffer is allocated with
  60 + // malloc(), making it suitable for use when calling from other languages. If there is no data,
  61 + // *buf is set to a null pointer and *len is set to 0. Otherwise, *buf is a buffer of size *len
  62 + // allocated with malloc(). It is the caller's responsibility to call free() on the buffer.
70 QPDF_DLL 63 QPDF_DLL
71 void getMallocBuffer(unsigned char** buf, size_t* len); 64 void getMallocBuffer(unsigned char** buf, size_t* len);
72 65
include/qpdf/QPDF.hh
@@ -2,22 +2,19 @@ @@ -2,22 +2,19 @@
2 // 2 //
3 // This file is part of qpdf. 3 // This file is part of qpdf.
4 // 4 //
5 -// Licensed under the Apache License, Version 2.0 (the "License");  
6 -// you may not use this file except in compliance with the License.  
7 -// You may obtain a copy of the License at 5 +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
  6 +// in compliance with the License. You may obtain a copy of the License at
8 // 7 //
9 // http://www.apache.org/licenses/LICENSE-2.0 8 // http://www.apache.org/licenses/LICENSE-2.0
10 // 9 //
11 -// Unless required by applicable law or agreed to in writing, software  
12 -// distributed under the License is distributed on an "AS IS" BASIS,  
13 -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
14 -// See the License for the specific language governing permissions and  
15 -// limitations under the License. 10 +// Unless required by applicable law or agreed to in writing, software distributed under the License
  11 +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
  12 +// or implied. See the License for the specific language governing permissions and limitations under
  13 +// the License.
16 // 14 //
17 -// Versions of qpdf prior to version 7 were released under the terms  
18 -// of version 2.0 of the Artistic License. At your option, you may  
19 -// continue to consider qpdf to be licensed under those terms. Please  
20 -// see the manual for additional information. 15 +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
  16 +// License. At your option, you may continue to consider qpdf to be licensed under those terms.
  17 +// Please see the manual for additional information.
21 18
22 #ifndef QPDF_HH 19 #ifndef QPDF_HH
23 #define QPDF_HH 20 #define QPDF_HH
@@ -55,8 +52,7 @@ class QPDFParser; @@ -55,8 +52,7 @@ class QPDFParser;
55 class QPDF 52 class QPDF
56 { 53 {
57 public: 54 public:
58 - // Get the current version of the QPDF software. See also  
59 - // qpdf/DLL.h 55 + // Get the current version of the QPDF software. See also qpdf/DLL.h
60 QPDF_DLL 56 QPDF_DLL
61 static std::string const& QPDFVersion(); 57 static std::string const& QPDFVersion();
62 58
@@ -68,92 +64,74 @@ class QPDF @@ -68,92 +64,74 @@ class QPDF
68 QPDF_DLL 64 QPDF_DLL
69 static std::shared_ptr<QPDF> create(); 65 static std::shared_ptr<QPDF> create();
70 66
71 - // Associate a file with a QPDF object and do initial parsing of  
72 - // the file. PDF objects are not read until they are needed. A  
73 - // QPDF object may be associated with only one file in its  
74 - // lifetime. This method must be called before any methods that  
75 - // potentially ask for information about the PDF file are called.  
76 - // Prior to calling this, the only methods that are allowed are  
77 - // those that set parameters. If the input file is not  
78 - // encrypted,either a null password or an empty password can be  
79 - // used. If the file is encrypted, either the user password or  
80 - // the owner password may be supplied. The method  
81 - // setPasswordIsHexKey may be called prior to calling this method  
82 - // or any of the other process methods to force the password to be  
83 - // interpreted as a raw encryption key. See comments on  
84 - // setPasswordIsHexKey for more information. 67 + // Associate a file with a QPDF object and do initial parsing of the file. PDF objects are not
  68 + // read until they are needed. A QPDF object may be associated with only one file in its
  69 + // lifetime. This method must be called before any methods that potentially ask for information
  70 + // about the PDF file are called. Prior to calling this, the only methods that are allowed are
  71 + // those that set parameters. If the input file is not encrypted,either a null password or an
  72 + // empty password can be used. If the file is encrypted, either the user password or the owner
  73 + // password may be supplied. The method setPasswordIsHexKey may be called prior to calling this
  74 + // method or any of the other process methods to force the password to be interpreted as a raw
  75 + // encryption key. See comments on setPasswordIsHexKey for more information.
85 QPDF_DLL 76 QPDF_DLL
86 void processFile(char const* filename, char const* password = nullptr); 77 void processFile(char const* filename, char const* password = nullptr);
87 78
88 - // Parse a PDF from a stdio FILE*. The FILE must be open in  
89 - // binary mode and must be seekable. It may be open read only.  
90 - // This works exactly like processFile except that the PDF file is  
91 - // read from an already opened FILE*. If close_file is true, the  
92 - // file will be closed at the end. Otherwise, the caller is  
93 - // responsible for closing the file. 79 + // Parse a PDF from a stdio FILE*. The FILE must be open in binary mode and must be seekable.
  80 + // It may be open read only. This works exactly like processFile except that the PDF file is
  81 + // read from an already opened FILE*. If close_file is true, the file will be closed at the
  82 + // end. Otherwise, the caller is responsible for closing the file.
94 QPDF_DLL 83 QPDF_DLL
95 void processFile( 84 void processFile(
96 char const* description, FILE* file, bool close_file, char const* password = nullptr); 85 char const* description, FILE* file, bool close_file, char const* password = nullptr);
97 86
98 - // Parse a PDF file loaded into a memory buffer. This works  
99 - // exactly like processFile except that the PDF file is in memory  
100 - // instead of on disk. The description appears in any warning or 87 + // Parse a PDF file loaded into a memory buffer. This works exactly like processFile except
  88 + // that the PDF file is in memory instead of on disk. The description appears in any warning or
101 // error message in place of the file name. 89 // error message in place of the file name.
102 QPDF_DLL 90 QPDF_DLL
103 void processMemoryFile( 91 void processMemoryFile(
104 char const* description, char const* buf, size_t length, char const* password = nullptr); 92 char const* description, char const* buf, size_t length, char const* password = nullptr);
105 93
106 - // Parse a PDF file loaded from a custom InputSource. If you have  
107 - // your own method of retrieving a PDF file, you can subclass  
108 - // InputSource and use this method. 94 + // Parse a PDF file loaded from a custom InputSource. If you have your own method of retrieving
  95 + // a PDF file, you can subclass InputSource and use this method.
109 QPDF_DLL 96 QPDF_DLL
110 void processInputSource(std::shared_ptr<InputSource>, char const* password = nullptr); 97 void processInputSource(std::shared_ptr<InputSource>, char const* password = nullptr);
111 98
112 - // Create a PDF from an input source that contains JSON as written  
113 - // by writeJSON (or qpdf --json-output, version 2 or higher). The  
114 - // JSON must be a complete representation of a PDF. See "qpdf  
115 - // JSON" in the manual for details. The input JSON may be  
116 - // arbitrarily large. QPDF does not load stream data into memory  
117 - // for more than one stream at a time, even if the stream data is 99 + // Create a PDF from an input source that contains JSON as written by writeJSON (or qpdf
  100 + // --json-output, version 2 or higher). The JSON must be a complete representation of a PDF. See
  101 + // "qpdf JSON" in the manual for details. The input JSON may be arbitrarily large. QPDF does not
  102 + // load stream data into memory for more than one stream at a time, even if the stream data is
118 // specified inline. 103 // specified inline.
119 QPDF_DLL 104 QPDF_DLL
120 void createFromJSON(std::string const& json_file); 105 void createFromJSON(std::string const& json_file);
121 QPDF_DLL 106 QPDF_DLL
122 void createFromJSON(std::shared_ptr<InputSource>); 107 void createFromJSON(std::shared_ptr<InputSource>);
123 108
124 - // Update a PDF from an input source that contains JSON in the  
125 - // same format as is written by writeJSON (or qpdf --json-output,  
126 - // version 2 or higher). Objects in the PDF and not in the JSON  
127 - // are not modified. See "qpdf JSON" in the manual for details. As  
128 - // with createFromJSON, the input JSON may be arbitrarily large. 109 + // Update a PDF from an input source that contains JSON in the same format as is written by
  110 + // writeJSON (or qpdf --json-output, version 2 or higher). Objects in the PDF and not in the
  111 + // JSON are not modified. See "qpdf JSON" in the manual for details. As with createFromJSON, the
  112 + // input JSON may be arbitrarily large.
129 QPDF_DLL 113 QPDF_DLL
130 void updateFromJSON(std::string const& json_file); 114 void updateFromJSON(std::string const& json_file);
131 QPDF_DLL 115 QPDF_DLL
132 void updateFromJSON(std::shared_ptr<InputSource>); 116 void updateFromJSON(std::shared_ptr<InputSource>);
133 117
134 - // Write qpdf JSON format to the pipeline "p". The only supported  
135 - // version is 2. The finish() method is not called on the  
136 - // pipeline. 118 + // Write qpdf JSON format to the pipeline "p". The only supported version is 2. The finish()
  119 + // method is not called on the pipeline.
137 // 120 //
138 - // The decode_level parameter controls which streams are  
139 - // uncompressed in the JSON. Use qpdf_dl_none to preserve all  
140 - // stream data exactly as it appears in the input. The possible  
141 - // values for json_stream_data can be found in qpdf/Constants.h  
142 - // and correspond to the --json-stream-data command-line argument.  
143 - // If json_stream_data is qpdf_sj_file, file_prefix must be  
144 - // specified. Each stream will be written to a file whose path is  
145 - // constructed by appending "-nnn" to file_prefix, where "nnn" is  
146 - // the object number (not zero-filled). If wanted_objects is  
147 - // empty, write all objects. Otherwise, write only objects whose  
148 - // keys are in wanted_objects. Keys may be either "trailer" or of  
149 - // the form "obj:n n R". Invalid keys are ignored. This  
150 - // corresponds to the --json-object command-line argument. 121 + // The decode_level parameter controls which streams are uncompressed in the JSON. Use
  122 + // qpdf_dl_none to preserve all stream data exactly as it appears in the input. The possible
  123 + // values for json_stream_data can be found in qpdf/Constants.h and correspond to the
  124 + // --json-stream-data command-line argument. If json_stream_data is qpdf_sj_file, file_prefix
  125 + // must be specified. Each stream will be written to a file whose path is constructed by
  126 + // appending "-nnn" to file_prefix, where "nnn" is the object number (not zero-filled). If
  127 + // wanted_objects is empty, write all objects. Otherwise, write only objects whose keys are in
  128 + // wanted_objects. Keys may be either "trailer" or of the form "obj:n n R". Invalid keys are
  129 + // ignored. This corresponds to the --json-object command-line argument.
151 // 130 //
152 - // QPDF is efficient with regard to memory when writing, allowing  
153 - // you to write arbitrarily large PDF files to a pipeline. You can  
154 - // use a pipeline like Pl_Buffer or Pl_String to capture the JSON  
155 - // output in memory, but do so with caution as this will allocate  
156 - // enough memory to hold the entire PDF file. 131 + // QPDF is efficient with regard to memory when writing, allowing you to write arbitrarily large
  132 + // PDF files to a pipeline. You can use a pipeline like Pl_Buffer or Pl_String to capture the
  133 + // JSON output in memory, but do so with caution as this will allocate enough memory to hold the
  134 + // entire PDF file.
157 QPDF_DLL 135 QPDF_DLL
158 void writeJSON( 136 void writeJSON(
159 int version, 137 int version,
@@ -163,17 +141,13 @@ class QPDF @@ -163,17 +141,13 @@ class QPDF
163 std::string const& file_prefix, 141 std::string const& file_prefix,
164 std::set<std::string> wanted_objects); 142 std::set<std::string> wanted_objects);
165 143
166 - // This version of writeJSON enables writing only the "qpdf" key  
167 - // of an in-progress dictionary. If the value of "complete" is  
168 - // true, a complete JSON object containing only the "qpdf" key is  
169 - // written to the pipeline. If the value of "complete" is false,  
170 - // the "qpdf" key and its value are written to the pipeline  
171 - // assuming that a dictionary is already open. The parameter  
172 - // first_key indicates whether this is the first key in an  
173 - // in-progress dictionary. It will be set to false by writeJSON.  
174 - // The "qpdf" key and value are written as if at depth 1 in a  
175 - // prettified JSON output. Remaining arguments are the same as the  
176 - // above version. 144 + // This version of writeJSON enables writing only the "qpdf" key of an in-progress dictionary.
  145 + // If the value of "complete" is true, a complete JSON object containing only the "qpdf" key is
  146 + // written to the pipeline. If the value of "complete" is false, the "qpdf" key and its value
  147 + // are written to the pipeline assuming that a dictionary is already open. The parameter
  148 + // first_key indicates whether this is the first key in an in-progress dictionary. It will be
  149 + // set to false by writeJSON. The "qpdf" key and value are written as if at depth 1 in a
  150 + // prettified JSON output. Remaining arguments are the same as the above version.
177 QPDF_DLL 151 QPDF_DLL
178 void writeJSON( 152 void writeJSON(
179 int version, 153 int version,
@@ -185,169 +159,135 @@ class QPDF @@ -185,169 +159,135 @@ class QPDF
185 std::string const& file_prefix, 159 std::string const& file_prefix,
186 std::set<std::string> wanted_objects); 160 std::set<std::string> wanted_objects);
187 161
188 - // Close or otherwise release the input source. Once this has been  
189 - // called, no other methods of qpdf can be called safely except  
190 - // for getWarnings and anyWarnings(). After this has been called,  
191 - // it is safe to perform operations on the input file such as  
192 - // deleting or renaming it. 162 + // Close or otherwise release the input source. Once this has been called, no other methods of
  163 + // qpdf can be called safely except for getWarnings and anyWarnings(). After this has been
  164 + // called, it is safe to perform operations on the input file such as deleting or renaming it.
193 QPDF_DLL 165 QPDF_DLL
194 void closeInputSource(); 166 void closeInputSource();
195 167
196 - // For certain forensic or investigatory purposes, it may  
197 - // sometimes be useful to specify the encryption key directly,  
198 - // even though regular PDF applications do not provide a way to do  
199 - // this. Calling setPasswordIsHexKey(true) before calling any of  
200 - // the process methods will bypass the normal encryption key  
201 - // computation or recovery mechanisms and interpret the bytes in  
202 - // the password as a hex-encoded encryption key. Note that we  
203 - // hex-encode the key because it may contain null bytes and  
204 - // therefore can't be represented in a char const*. 168 + // For certain forensic or investigatory purposes, it may sometimes be useful to specify the
  169 + // encryption key directly, even though regular PDF applications do not provide a way to do
  170 + // this. Calling setPasswordIsHexKey(true) before calling any of the process methods will bypass
  171 + // the normal encryption key computation or recovery mechanisms and interpret the bytes in the
  172 + // password as a hex-encoded encryption key. Note that we hex-encode the key because it may
  173 + // contain null bytes and therefore can't be represented in a char const*.
205 QPDF_DLL 174 QPDF_DLL
206 void setPasswordIsHexKey(bool); 175 void setPasswordIsHexKey(bool);
207 176
208 - // Create a QPDF object for an empty PDF. This PDF has no pages  
209 - // or objects other than a minimal trailer, a document catalog,  
210 - // and a /Pages tree containing zero pages. Pages and other  
211 - // objects can be added to the file in the normal way, and the  
212 - // trailer and document catalog can be mutated. Calling this  
213 - // method is equivalent to calling processFile on an equivalent  
214 - // PDF file. See the pdf-create.cc example for a demonstration of  
215 - // how to use this method to create a PDF file from scratch. 177 + // Create a QPDF object for an empty PDF. This PDF has no pages or objects other than a minimal
  178 + // trailer, a document catalog, and a /Pages tree containing zero pages. Pages and other
  179 + // objects can be added to the file in the normal way, and the trailer and document catalog can
  180 + // be mutated. Calling this method is equivalent to calling processFile on an equivalent PDF
  181 + // file. See the pdf-create.cc example for a demonstration of how to use this method to create
  182 + // a PDF file from scratch.
216 QPDF_DLL 183 QPDF_DLL
217 void emptyPDF(); 184 void emptyPDF();
218 185
219 - // From 10.1: register a new filter implementation for a specific  
220 - // stream filter. You can add your own implementations for new  
221 - // filter types or override existing ones provided by the library.  
222 - // Registered stream filters are used for decoding only as you can  
223 - // override encoding with stream data providers. For example, you  
224 - // could use this method to add support for one of the other filter  
225 - // types by using additional third-party libraries that qpdf does  
226 - // not presently use. The standard filters are implemented using  
227 - // QPDFStreamFilter classes. 186 + // From 10.1: register a new filter implementation for a specific stream filter. You can add
  187 + // your own implementations for new filter types or override existing ones provided by the
  188 + // library. Registered stream filters are used for decoding only as you can override encoding
  189 + // with stream data providers. For example, you could use this method to add support for one of
  190 + // the other filter types by using additional third-party libraries that qpdf does not presently
  191 + // use. The standard filters are implemented using QPDFStreamFilter classes.
228 QPDF_DLL 192 QPDF_DLL
229 static void registerStreamFilter( 193 static void registerStreamFilter(
230 std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory); 194 std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory);
231 195
232 // Parameter settings 196 // Parameter settings
233 197
234 - // To capture or redirect output, configure the logger returned by  
235 - // getLogger(). By default, all QPDF and QPDFJob objects share the  
236 - // global logger. If you need a private logger for some reason,  
237 - // pass a new one to setLogger(). See comments in QPDFLogger.hh  
238 - // for details on configuring the logger. 198 + // To capture or redirect output, configure the logger returned by getLogger(). By default, all
  199 + // QPDF and QPDFJob objects share the global logger. If you need a private logger for some
  200 + // reason, pass a new one to setLogger(). See comments in QPDFLogger.hh for details on
  201 + // configuring the logger.
239 // 202 //
240 - // Note that no normal QPDF operations generate output to standard  
241 - // output, so for applications that just wish to avoid creating  
242 - // output for warnings and don't call any check functions, calling  
243 - // setSuppressWarnings(true) is sufficient. 203 + // Note that no normal QPDF operations generate output to standard output, so for applications
  204 + // that just wish to avoid creating output for warnings and don't call any check functions,
  205 + // calling setSuppressWarnings(true) is sufficient.
244 QPDF_DLL 206 QPDF_DLL
245 std::shared_ptr<QPDFLogger> getLogger(); 207 std::shared_ptr<QPDFLogger> getLogger();
246 QPDF_DLL 208 QPDF_DLL
247 void setLogger(std::shared_ptr<QPDFLogger>); 209 void setLogger(std::shared_ptr<QPDFLogger>);
248 210
249 - // This deprecated method is the old way to capture output, but it  
250 - // didn't capture all output. See comments above for getLogger and  
251 - // setLogger. This will be removed in QPDF 12. For now, it  
252 - // configures a private logger, separating this object from the  
253 - // default logger, and calls setOutputStreams on that logger. See  
254 - // QPDFLogger.hh for additional details. 211 + // This deprecated method is the old way to capture output, but it didn't capture all output.
  212 + // See comments above for getLogger and setLogger. This will be removed in QPDF 12. For now, it
  213 + // configures a private logger, separating this object from the default logger, and calls
  214 + // setOutputStreams on that logger. See QPDFLogger.hh for additional details.
255 [[deprecated("configure logger from getLogger() or call setLogger()")]] QPDF_DLL void 215 [[deprecated("configure logger from getLogger() or call setLogger()")]] QPDF_DLL void
256 setOutputStreams(std::ostream* out_stream, std::ostream* err_stream); 216 setOutputStreams(std::ostream* out_stream, std::ostream* err_stream);
257 217
258 - // If true, ignore any cross-reference streams in a hybrid file  
259 - // (one that contains both cross-reference streams and  
260 - // cross-reference tables). This can be useful for testing to 218 + // If true, ignore any cross-reference streams in a hybrid file (one that contains both
  219 + // cross-reference streams and cross-reference tables). This can be useful for testing to
261 // ensure that a hybrid file would work with an older reader. 220 // ensure that a hybrid file would work with an older reader.
262 QPDF_DLL 221 QPDF_DLL
263 void setIgnoreXRefStreams(bool); 222 void setIgnoreXRefStreams(bool);
264 223
265 - // By default, any warnings are issued to std::cerr or the error  
266 - // stream specified in a call to setOutputStreams as they are  
267 - // encountered. If this method is called with a true value, reporting of  
268 - // warnings is suppressed. You may still retrieve warnings by  
269 - // calling getWarnings. 224 + // By default, any warnings are issued to std::cerr or the error stream specified in a call to
  225 + // setOutputStreams as they are encountered. If this method is called with a true value,
  226 + // reporting of warnings is suppressed. You may still retrieve warnings by calling getWarnings.
270 QPDF_DLL 227 QPDF_DLL
271 void setSuppressWarnings(bool); 228 void setSuppressWarnings(bool);
272 229
273 - // By default, QPDF will try to recover if it finds certain types  
274 - // of errors in PDF files. If turned off, it will throw an  
275 - // exception on the first such problem it finds without attempting 230 + // By default, QPDF will try to recover if it finds certain types of errors in PDF files. If
  231 + // turned off, it will throw an exception on the first such problem it finds without attempting
276 // recovery. 232 // recovery.
277 QPDF_DLL 233 QPDF_DLL
278 void setAttemptRecovery(bool); 234 void setAttemptRecovery(bool);
279 235
280 - // Tell other QPDF objects that streams copied from this QPDF need  
281 - // to be fully copied when copyForeignObject is called on them.  
282 - // Calling setIgnoreXRefStreams(true) on a QPDF object makes it  
283 - // possible for the object and its input source to disappear  
284 - // before streams copied from it are written with the destination  
285 - // QPDF object. Confused? Ordinarily, if you are going to copy  
286 - // objects from a source QPDF object to a destination QPDF object  
287 - // using copyForeignObject or addPage, the source object's input  
288 - // source must stick around until after the destination PDF is  
289 - // written. If you call this method on the source QPDF object, it  
290 - // sends a signal to the destination object that it must fully  
291 - // copy the stream data when copyForeignObject. It will do this by  
292 - // making a copy in RAM. Ordinarily the stream data is copied  
293 - // lazily to avoid unnecessary duplication of the stream data.  
294 - // Note that the stream data is copied into RAM only once  
295 - // regardless of how many objects the stream is copied into. The  
296 - // result is that, if you called setImmediateCopyFrom(true) on a  
297 - // given QPDF object prior to copying any of its streams, you do  
298 - // not need to keep it or its input source around after copying  
299 - // its objects to another QPDF. This is true even if the source  
300 - // streams use StreamDataProvider. Note that this method is called  
301 - // on the QPDF object you are copying FROM, not the one you are  
302 - // copying to. The reasoning for this is that there's no reason a  
303 - // given QPDF may not get objects copied to it from a variety of  
304 - // other objects, some transient and some not. Since what's  
305 - // relevant is whether the source QPDF is transient, the method  
306 - // must be called on the source QPDF, not the destination one.  
307 - // This method will make a copy of the stream in RAM, so be  
308 - // sure you have enough memory to simultaneously hold all the  
309 - // streams you're copying. 236 + // Tell other QPDF objects that streams copied from this QPDF need to be fully copied when
  237 + // copyForeignObject is called on them. Calling setIgnoreXRefStreams(true) on a QPDF object
  238 + // makes it possible for the object and its input source to disappear before streams copied from
  239 + // it are written with the destination QPDF object. Confused? Ordinarily, if you are going to
  240 + // copy objects from a source QPDF object to a destination QPDF object using copyForeignObject
  241 + // or addPage, the source object's input source must stick around until after the destination
  242 + // PDF is written. If you call this method on the source QPDF object, it sends a signal to the
  243 + // destination object that it must fully copy the stream data when copyForeignObject. It will do
  244 + // this by making a copy in RAM. Ordinarily the stream data is copied lazily to avoid
  245 + // unnecessary duplication of the stream data. Note that the stream data is copied into RAM only
  246 + // once regardless of how many objects the stream is copied into. The result is that, if you
  247 + // called setImmediateCopyFrom(true) on a given QPDF object prior to copying any of its streams,
  248 + // you do not need to keep it or its input source around after copying its objects to another
  249 + // QPDF. This is true even if the source streams use StreamDataProvider. Note that this method
  250 + // is called on the QPDF object you are copying FROM, not the one you are copying to. The
  251 + // reasoning for this is that there's no reason a given QPDF may not get objects copied to it
  252 + // from a variety of other objects, some transient and some not. Since what's relevant is
  253 + // whether the source QPDF is transient, the method must be called on the source QPDF, not the
  254 + // destination one. This method will make a copy of the stream in RAM, so be sure you have
  255 + // enough memory to simultaneously hold all the streams you're copying.
310 QPDF_DLL 256 QPDF_DLL
311 void setImmediateCopyFrom(bool); 257 void setImmediateCopyFrom(bool);
312 258
313 // Other public methods 259 // Other public methods
314 260
315 - // Return the list of warnings that have been issued so far and  
316 - // clear the list. This method may be called even if processFile  
317 - // throws an exception. Note that if setSuppressWarnings was not  
318 - // called or was called with a false value, any warnings retrieved  
319 - // here will have already been output. 261 + // Return the list of warnings that have been issued so far and clear the list. This method may
  262 + // be called even if processFile throws an exception. Note that if setSuppressWarnings was not
  263 + // called or was called with a false value, any warnings retrieved here will have already been
  264 + // output.
320 QPDF_DLL 265 QPDF_DLL
321 std::vector<QPDFExc> getWarnings(); 266 std::vector<QPDFExc> getWarnings();
322 267
323 - // Indicate whether any warnings have been issued so far. Does not  
324 - // clear the list of warnings. 268 + // Indicate whether any warnings have been issued so far. Does not clear the list of warnings.
325 QPDF_DLL 269 QPDF_DLL
326 bool anyWarnings() const; 270 bool anyWarnings() const;
327 271
328 - // Indicate the number of warnings that have been issued since the last  
329 - // call to getWarnings. Does not clear the list of warnings. 272 + // Indicate the number of warnings that have been issued since the last call to getWarnings.
  273 + // Does not clear the list of warnings.
330 QPDF_DLL 274 QPDF_DLL
331 size_t numWarnings() const; 275 size_t numWarnings() const;
332 276
333 - // Return an application-scoped unique ID for this QPDF object.  
334 - // This is not a globally unique ID. It is constructed using a  
335 - // timestamp and a random number and is intended to be unique  
336 - // among QPDF objects that are created by a single run of an  
337 - // application. While it's very likely that these are actually  
338 - // globally unique, it is not recommended to use them for  
339 - // long-term purposes. 277 + // Return an application-scoped unique ID for this QPDF object. This is not a globally unique
  278 + // ID. It is constructed using a timestamp and a random number and is intended to be unique
  279 + // among QPDF objects that are created by a single run of an application. While it's very likely
  280 + // that these are actually globally unique, it is not recommended to use them for long-term
  281 + // purposes.
340 QPDF_DLL 282 QPDF_DLL
341 unsigned long long getUniqueId() const; 283 unsigned long long getUniqueId() const;
342 284
343 - // Issue a warning on behalf of this QPDF object. It will be  
344 - // emitted with other warnings, following warning suppression  
345 - // rules, and it will be available with getWarnings(). 285 + // Issue a warning on behalf of this QPDF object. It will be emitted with other warnings,
  286 + // following warning suppression rules, and it will be available with getWarnings().
346 QPDF_DLL 287 QPDF_DLL
347 void warn(QPDFExc const& e); 288 void warn(QPDFExc const& e);
348 - // Same as above but creates the QPDFExc object using the  
349 - // arguments passed to warn. The filename argument to QPDFExc is  
350 - // omitted. This method uses the filename associated with the QPDF 289 + // Same as above but creates the QPDFExc object using the arguments passed to warn. The filename
  290 + // argument to QPDFExc is omitted. This method uses the filename associated with the QPDF
351 // object. 291 // object.
352 QPDF_DLL 292 QPDF_DLL
353 void warn( 293 void warn(
@@ -376,60 +316,48 @@ class QPDF @@ -376,60 +316,48 @@ class QPDF
376 316
377 // Public factory methods 317 // Public factory methods
378 318
379 - // Create a new stream. A subsequent call must be made to  
380 - // replaceStreamData() to provide data for the stream. The stream's  
381 - // dictionary may be retrieved by calling getDict(), and the resulting  
382 - // dictionary may be modified. Alternatively, you can create a new  
383 - // dictionary and call replaceDict to install it. 319 + // Create a new stream. A subsequent call must be made to replaceStreamData() to provide data
  320 + // for the stream. The stream's dictionary may be retrieved by calling getDict(), and the
  321 + // resulting dictionary may be modified. Alternatively, you can create a new dictionary and
  322 + // call replaceDict to install it.
384 QPDF_DLL 323 QPDF_DLL
385 QPDFObjectHandle newStream(); 324 QPDFObjectHandle newStream();
386 325
387 - // Create a new stream. Use the given buffer as the stream data. The  
388 - // stream dictionary's /Length key will automatically be set to the size of  
389 - // the data buffer. If additional keys are required, the stream's  
390 - // dictionary may be retrieved by calling getDict(), and the resulting  
391 - // dictionary may be modified. This method is just a convenient wrapper  
392 - // around the newStream() and replaceStreamData(). It is a convenience  
393 - // methods for streams that require no parameters beyond the stream length.  
394 - // Note that you don't have to deal with compression yourself if you use  
395 - // QPDFWriter. By default, QPDFWriter will automatically compress  
396 - // uncompressed stream data. Example programs are provided that  
397 - // illustrate this. 326 + // Create a new stream. Use the given buffer as the stream data. The stream dictionary's
  327 + // /Length key will automatically be set to the size of the data buffer. If additional keys are
  328 + // required, the stream's dictionary may be retrieved by calling getDict(), and the resulting
  329 + // dictionary may be modified. This method is just a convenient wrapper around the newStream()
  330 + // and replaceStreamData(). It is a convenience methods for streams that require no parameters
  331 + // beyond the stream length. Note that you don't have to deal with compression yourself if you
  332 + // use QPDFWriter. By default, QPDFWriter will automatically compress uncompressed stream data.
  333 + // Example programs are provided that illustrate this.
398 QPDF_DLL 334 QPDF_DLL
399 QPDFObjectHandle newStream(std::shared_ptr<Buffer> data); 335 QPDFObjectHandle newStream(std::shared_ptr<Buffer> data);
400 336
401 - // Create new stream with data from string. This method will  
402 - // create a copy of the data rather than using the user-provided  
403 - // buffer as in the std::shared_ptr<Buffer> version of newStream. 337 + // Create new stream with data from string. This method will create a copy of the data rather
  338 + // than using the user-provided buffer as in the std::shared_ptr<Buffer> version of newStream.
404 QPDF_DLL 339 QPDF_DLL
405 QPDFObjectHandle newStream(std::string const& data); 340 QPDFObjectHandle newStream(std::string const& data);
406 341
407 - // A reserved object is a special sentinel used for qpdf to  
408 - // reserve a spot for an object that is going to be added to the  
409 - // QPDF object. Normally you don't have to use this type since  
410 - // you can just call QPDF::makeIndirectObject. However, in some  
411 - // cases, if you have to create objects with circular references,  
412 - // you may need to create a reserved object so that you can have a  
413 - // reference to it and then replace the object later. Reserved  
414 - // objects have the special property that they can't be resolved  
415 - // to direct objects. This makes it possible to replace a  
416 - // reserved object with a new object while preserving existing  
417 - // references to them. When you are ready to replace a reserved  
418 - // object with its replacement, use QPDF::replaceReserved for this  
419 - // purpose rather than the more general QPDF::replaceObject. It  
420 - // is an error to try to write a QPDF with QPDFWriter if it has  
421 - // any reserved objects in it. 342 + // A reserved object is a special sentinel used for qpdf to reserve a spot for an object that is
  343 + // going to be added to the QPDF object. Normally you don't have to use this type since you can
  344 + // just call QPDF::makeIndirectObject. However, in some cases, if you have to create objects
  345 + // with circular references, you may need to create a reserved object so that you can have a
  346 + // reference to it and then replace the object later. Reserved objects have the special
  347 + // property that they can't be resolved to direct objects. This makes it possible to replace a
  348 + // reserved object with a new object while preserving existing references to them. When you are
  349 + // ready to replace a reserved object with its replacement, use QPDF::replaceReserved for this
  350 + // purpose rather than the more general QPDF::replaceObject. It is an error to try to write a
  351 + // QPDF with QPDFWriter if it has any reserved objects in it.
422 QPDF_DLL 352 QPDF_DLL
423 QPDFObjectHandle newReserved(); 353 QPDFObjectHandle newReserved();
424 354
425 - // Install this object handle as an indirect object and return an  
426 - // indirect reference to it. 355 + // Install this object handle as an indirect object and return an indirect reference to it.
427 QPDF_DLL 356 QPDF_DLL
428 QPDFObjectHandle makeIndirectObject(QPDFObjectHandle); 357 QPDFObjectHandle makeIndirectObject(QPDFObjectHandle);
429 358
430 - // Retrieve an object by object ID and generation. Returns an  
431 - // indirect reference to it. The getObject() methods were added  
432 - // for qpdf 11. 359 + // Retrieve an object by object ID and generation. Returns an indirect reference to it. The
  360 + // getObject() methods were added for qpdf 11.
433 QPDF_DLL 361 QPDF_DLL
434 QPDFObjectHandle getObject(QPDFObjGen const&); 362 QPDFObjectHandle getObject(QPDFObjGen const&);
435 QPDF_DLL 363 QPDF_DLL
@@ -441,83 +369,63 @@ class QPDF @@ -441,83 +369,63 @@ class QPDF
441 QPDF_DLL 369 QPDF_DLL
442 QPDFObjectHandle getObjectByID(int objid, int generation); 370 QPDFObjectHandle getObjectByID(int objid, int generation);
443 371
444 - // Replace the object with the given object id with the given  
445 - // object. The object handle passed in must be a direct object,  
446 - // though it may contain references to other indirect objects  
447 - // within it. Prior to qpdf 10.2.1, after calling this method,  
448 - // existing QPDFObjectHandle instances that pointed to the  
449 - // original object still pointed to the original object, resulting  
450 - // in confusing and incorrect behavior. This was fixed in 10.2.1,  
451 - // so existing QPDFObjectHandle objects will start pointing to the  
452 - // newly replaced object. Note that replacing an object with  
453 - // QPDFObjectHandle::newNull() effectively removes the object from  
454 - // the file since a non-existent object is treated as a null  
455 - // object. To replace a reserved object, call replaceReserved 372 + // Replace the object with the given object id with the given object. The object handle passed
  373 + // in must be a direct object, though it may contain references to other indirect objects within
  374 + // it. Prior to qpdf 10.2.1, after calling this method, existing QPDFObjectHandle instances that
  375 + // pointed to the original object still pointed to the original object, resulting in confusing
  376 + // and incorrect behavior. This was fixed in 10.2.1, so existing QPDFObjectHandle objects will
  377 + // start pointing to the newly replaced object. Note that replacing an object with
  378 + // QPDFObjectHandle::newNull() effectively removes the object from the file since a non-existent
  379 + // object is treated as a null object. To replace a reserved object, call replaceReserved
456 // instead. 380 // instead.
457 QPDF_DLL 381 QPDF_DLL
458 void replaceObject(QPDFObjGen const& og, QPDFObjectHandle); 382 void replaceObject(QPDFObjGen const& og, QPDFObjectHandle);
459 QPDF_DLL 383 QPDF_DLL
460 void replaceObject(int objid, int generation, QPDFObjectHandle); 384 void replaceObject(int objid, int generation, QPDFObjectHandle);
461 385
462 - // Swap two objects given by ID. Prior to qpdf 10.2.1, existing  
463 - // QPDFObjectHandle instances that reference them objects not  
464 - // notice the swap, but this was fixed in 10.2.1. 386 + // Swap two objects given by ID. Prior to qpdf 10.2.1, existing QPDFObjectHandle instances that
  387 + // reference them objects not notice the swap, but this was fixed in 10.2.1.
465 QPDF_DLL 388 QPDF_DLL
466 void swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2); 389 void swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2);
467 QPDF_DLL 390 QPDF_DLL
468 void swapObjects(int objid1, int generation1, int objid2, int generation2); 391 void swapObjects(int objid1, int generation1, int objid2, int generation2);
469 392
470 - // Replace a reserved object. This is a wrapper around  
471 - // replaceObject but it guarantees that the underlying object is a  
472 - // reserved object. After this call, reserved will be a reference  
473 - // to replacement. 393 + // Replace a reserved object. This is a wrapper around replaceObject but it guarantees that the
  394 + // underlying object is a reserved object. After this call, reserved will be a reference to
  395 + // replacement.
474 QPDF_DLL 396 QPDF_DLL
475 void replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement); 397 void replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement);
476 398
477 - // Copy an object from another QPDF to this one. Starting with  
478 - // qpdf version 8.3.0, it is no longer necessary to keep the  
479 - // original QPDF around after the call to copyForeignObject as  
480 - // long as the source of any copied stream data is still  
481 - // available. Usually this means you just have to keep the input  
482 - // file around, not the QPDF object. The exception to this is if  
483 - // you copy a stream that gets its data from a  
484 - // QPDFObjectHandle::StreamDataProvider. In this case only, the  
485 - // original stream's QPDF object must stick around because the  
486 - // QPDF object is itself the source of the original stream data.  
487 - // For a more in-depth discussion, please see the TODO file.  
488 - // Starting in 8.4.0, you can call setImmediateCopyFrom(true) on  
489 - // the SOURCE QPDF object (the one you're copying FROM). If you do  
490 - // this prior to copying any of its objects, then neither the  
491 - // source QPDF object nor its input source needs to stick around  
492 - // at all regardless of the source. The cost is that the stream  
493 - // data is copied into RAM at the time copyForeignObject is 399 + // Copy an object from another QPDF to this one. Starting with qpdf version 8.3.0, it is no
  400 + // longer necessary to keep the original QPDF around after the call to copyForeignObject as long
  401 + // as the source of any copied stream data is still available. Usually this means you just have
  402 + // to keep the input file around, not the QPDF object. The exception to this is if you copy a
  403 + // stream that gets its data from a QPDFObjectHandle::StreamDataProvider. In this case only, the
  404 + // original stream's QPDF object must stick around because the QPDF object is itself the source
  405 + // of the original stream data. For a more in-depth discussion, please see the TODO file.
  406 + // Starting in 8.4.0, you can call setImmediateCopyFrom(true) on the SOURCE QPDF object (the one
  407 + // you're copying FROM). If you do this prior to copying any of its objects, then neither the
  408 + // source QPDF object nor its input source needs to stick around at all regardless of the
  409 + // source. The cost is that the stream data is copied into RAM at the time copyForeignObject is
494 // called. See setImmediateCopyFrom for more information. 410 // called. See setImmediateCopyFrom for more information.
495 // 411 //
496 - // The return value of this method is an indirect reference to the  
497 - // copied object in this file. This method is intended to be used  
498 - // to copy non-page objects. To copy page objects, pass the  
499 - // foreign page object directly to addPage (or addPageAt). If you  
500 - // copy objects that contain references to pages, you should copy  
501 - // the pages first using addPage(At). Otherwise references to the  
502 - // pages that have not been copied will be replaced with nulls. It  
503 - // is possible to use copyForeignObject on page objects if you are  
504 - // not going to use them as pages. Doing so copies the object  
505 - // normally but does not update the page structure. For example,  
506 - // it is a valid use case to use copyForeignObject for a page that  
507 - // you are going to turn into a form XObject, though you can also  
508 - // use QPDFPageObjectHelper::getFormXObjectForPage for that  
509 - // purpose. 412 + // The return value of this method is an indirect reference to the copied object in this file.
  413 + // This method is intended to be used to copy non-page objects. To copy page objects, pass the
  414 + // foreign page object directly to addPage (or addPageAt). If you copy objects that contain
  415 + // references to pages, you should copy the pages first using addPage(At). Otherwise references
  416 + // to the pages that have not been copied will be replaced with nulls. It is possible to use
  417 + // copyForeignObject on page objects if you are not going to use them as pages. Doing so copies
  418 + // the object normally but does not update the page structure. For example, it is a valid use
  419 + // case to use copyForeignObject for a page that you are going to turn into a form XObject,
  420 + // though you can also use QPDFPageObjectHelper::getFormXObjectForPage for that purpose.
510 // 421 //
511 - // When copying objects with this method, object structure will be  
512 - // preserved, so all indirectly referenced indirect objects will  
513 - // be copied as well. This includes any circular references that  
514 - // may exist. The QPDF object keeps a record of what has already  
515 - // been copied, so shared objects will not be copied multiple  
516 - // times. This also means that if you mutate an object that has  
517 - // already been copied and try to copy it again, it won't work  
518 - // since the modified object will not be recopied. Therefore, you  
519 - // should do all mutation on the original file that you are going  
520 - // to do before you start copying its objects to a new file. 422 + // When copying objects with this method, object structure will be preserved, so all indirectly
  423 + // referenced indirect objects will be copied as well. This includes any circular references
  424 + // that may exist. The QPDF object keeps a record of what has already been copied, so shared
  425 + // objects will not be copied multiple times. This also means that if you mutate an object that
  426 + // has already been copied and try to copy it again, it won't work since the modified object
  427 + // will not be recopied. Therefore, you should do all mutation on the original file that you
  428 + // are going to do before you start copying its objects to a new file.
521 QPDF_DLL 429 QPDF_DLL
522 QPDFObjectHandle copyForeignObject(QPDFObjectHandle foreign); 430 QPDFObjectHandle copyForeignObject(QPDFObjectHandle foreign);
523 431
@@ -633,9 +541,8 @@ class QPDF @@ -633,9 +541,8 @@ class QPDF
633 QPDF_DLL 541 QPDF_DLL
634 bool allowModifyAll(); 542 bool allowModifyAll();
635 543
636 - // Helper function to trim padding from user password. Calling  
637 - // trim_user_password on the result of getPaddedUserPassword gives  
638 - // getTrimmedUserPassword's result. 544 + // Helper function to trim padding from user password. Calling trim_user_password on the result
  545 + // of getPaddedUserPassword gives getTrimmedUserPassword's result.
639 QPDF_DLL 546 QPDF_DLL
640 static void trim_user_password(std::string& user_password); 547 static void trim_user_password(std::string& user_password);
641 QPDF_DLL 548 QPDF_DLL
@@ -678,47 +585,40 @@ class QPDF @@ -678,47 +585,40 @@ class QPDF
678 std::string& OE, 585 std::string& OE,
679 std::string& UE, 586 std::string& UE,
680 std::string& Perms); 587 std::string& Perms);
681 - // Return the full user password as stored in the PDF file. For  
682 - // files encrypted with 40-bit or 128-bit keys, the user password  
683 - // can be recovered when the file is opened using the owner  
684 - // password. This is not possible with newer encryption formats.  
685 - // If you are attempting to recover the user password in a  
686 - // user-presentable form, call getTrimmedUserPassword() instead. 588 + // Return the full user password as stored in the PDF file. For files encrypted with 40-bit or
  589 + // 128-bit keys, the user password can be recovered when the file is opened using the owner
  590 + // password. This is not possible with newer encryption formats. If you are attempting to
  591 + // recover the user password in a user-presentable form, call getTrimmedUserPassword() instead.
687 QPDF_DLL 592 QPDF_DLL
688 std::string const& getPaddedUserPassword() const; 593 std::string const& getPaddedUserPassword() const;
689 - // Return human-readable form of user password subject to same  
690 - // limitations as getPaddedUserPassword(). 594 + // Return human-readable form of user password subject to same limitations as
  595 + // getPaddedUserPassword().
691 QPDF_DLL 596 QPDF_DLL
692 std::string getTrimmedUserPassword() const; 597 std::string getTrimmedUserPassword() const;
693 - // Return the previously computed or retrieved encryption key for  
694 - // this file 598 + // Return the previously computed or retrieved encryption key for this file
695 QPDF_DLL 599 QPDF_DLL
696 std::string getEncryptionKey() const; 600 std::string getEncryptionKey() const;
697 - // Remove security restrictions associated with digitally signed  
698 - // files. 601 + // Remove security restrictions associated with digitally signed files.
699 QPDF_DLL 602 QPDF_DLL
700 void removeSecurityRestrictions(); 603 void removeSecurityRestrictions();
701 604
702 // Linearization support 605 // Linearization support
703 606
704 - // Returns true iff the file starts with a linearization parameter  
705 - // dictionary. Does no additional validation. 607 + // Returns true iff the file starts with a linearization parameter dictionary. Does no
  608 + // additional validation.
706 QPDF_DLL 609 QPDF_DLL
707 bool isLinearized(); 610 bool isLinearized();
708 611
709 - // Performs various sanity checks on a linearized file. Return  
710 - // true if no errors or warnings. Otherwise, return false and  
711 - // output errors and warnings to the default output stream  
712 - // (std::cout or whatever is configured in the logger). It is  
713 - // recommended for linearization errors to be treated as warnings. 612 + // Performs various sanity checks on a linearized file. Return true if no errors or warnings.
  613 + // Otherwise, return false and output errors and warnings to the default output stream
  614 + // (std::cout or whatever is configured in the logger). It is recommended for linearization
  615 + // errors to be treated as warnings.
714 QPDF_DLL 616 QPDF_DLL
715 bool checkLinearization(); 617 bool checkLinearization();
716 618
717 - // Calls checkLinearization() and, if possible, prints normalized  
718 - // contents of some of the hints tables to the default output  
719 - // stream. Normalization includes adding min values to delta  
720 - // values and adjusting offsets based on the location and size of  
721 - // the primary hint stream. 619 + // Calls checkLinearization() and, if possible, prints normalized contents of some of the hints
  620 + // tables to the default output stream. Normalization includes adding min values to delta values
  621 + // and adjusting offsets based on the location and size of the primary hint stream.
722 QPDF_DLL 622 QPDF_DLL
723 void showLinearizationData(); 623 void showLinearizationData();
724 624
@@ -726,66 +626,52 @@ class QPDF @@ -726,66 +626,52 @@ class QPDF
726 QPDF_DLL 626 QPDF_DLL
727 void showXRefTable(); 627 void showXRefTable();
728 628
729 - // Starting from qpdf 11.0 user code should not need to call this method.  
730 - // Before 11.0 this method was used to detect all indirect references to  
731 - // objects that don't exist and resolve them by replacing them with null,  
732 - // which is how the PDF spec says to interpret such dangling references.  
733 - // This method is called automatically when you try to add any new objects,  
734 - // if you call getAllObjects, and before a file is written. The qpdf object  
735 - // caches whether it has run this to avoid running it multiple times.  
736 - // Before 11.2.1 you could pass true to force it to run again if you had  
737 - // explicitly added new objects that may have additional dangling  
738 - // references. 629 + // Starting from qpdf 11.0 user code should not need to call this method. Before 11.0 this
  630 + // method was used to detect all indirect references to objects that don't exist and resolve
  631 + // them by replacing them with null, which is how the PDF spec says to interpret such dangling
  632 + // references. This method is called automatically when you try to add any new objects, if you
  633 + // call getAllObjects, and before a file is written. The qpdf object caches whether it has run
  634 + // this to avoid running it multiple times. Before 11.2.1 you could pass true to force it to run
  635 + // again if you had explicitly added new objects that may have additional dangling references.
739 QPDF_DLL 636 QPDF_DLL
740 void fixDanglingReferences(bool force = false); 637 void fixDanglingReferences(bool force = false);
741 638
742 - // Return the approximate number of indirect objects. It is  
743 - // approximate because not all objects in the file are preserved  
744 - // in all cases, and gaps in object numbering are not preserved. 639 + // Return the approximate number of indirect objects. It is/ approximate because not all objects
  640 + // in the file are preserved in all cases, and gaps in object numbering are not preserved.
745 QPDF_DLL 641 QPDF_DLL
746 size_t getObjectCount(); 642 size_t getObjectCount();
747 643
748 - // Returns a list of indirect objects for every object in the xref  
749 - // table. Useful for discovering objects that are not otherwise  
750 - // referenced. 644 + // Returns a list of indirect objects for every object in the xref table. Useful for discovering
  645 + // objects that are not otherwise referenced.
751 QPDF_DLL 646 QPDF_DLL
752 std::vector<QPDFObjectHandle> getAllObjects(); 647 std::vector<QPDFObjectHandle> getAllObjects();
753 648
754 - // Optimization support -- see doc/optimization. Implemented in  
755 - // QPDF_optimization.cc  
756 -  
757 - // The object_stream_data map maps from a "compressed" object to  
758 - // the object stream that contains it. This enables optimize to  
759 - // populate the object <-> user maps with only uncompressed  
760 - // objects. If allow_changes is false, an exception will be thrown  
761 - // if any changes are made during the optimization process. This  
762 - // is available so that the test suite can make sure that a  
763 - // linearized file is already optimized. When called in this way,  
764 - // optimize() still populates the object <-> user maps. The  
765 - // optional skip_stream_parameters parameter, if present, is  
766 - // called for each stream object. The function should return 2 if  
767 - // optimization should discard /Length, /Filter, and /DecodeParms;  
768 - // 1 if it should discard /Length, and 0 if it should preserve all  
769 - // keys. This is used by QPDFWriter to avoid creation of dangling  
770 - // objects for stream dictionary keys it will be regenerating. 649 + // Optimization support -- see doc/optimization. Implemented in QPDF_optimization.cc
  650 +
  651 + // The object_stream_data map maps from a "compressed" object to the object stream that contains
  652 + // it. This enables optimize to populate the object <-> user maps with only uncompressed
  653 + // objects. If allow_changes is false, an exception will be thrown if any changes are made
  654 + // during the optimization process. This is available so that the test suite can make sure that
  655 + // a linearized file is already optimized. When called in this way, optimize() still populates
  656 + // the object <-> user maps. The optional skip_stream_parameters parameter, if present, is
  657 + // called for each stream object. The function should return 2 if optimization should discard
  658 + // /Length, /Filter, and /DecodeParms; 1 if it should discard /Length, and 0 if it should
  659 + // preserve all keys. This is used by QPDFWriter to avoid creation of dangling objects for
  660 + // stream dictionary keys it will be regenerating.
771 QPDF_DLL 661 QPDF_DLL
772 void optimize( 662 void optimize(
773 std::map<int, int> const& object_stream_data, 663 std::map<int, int> const& object_stream_data,
774 bool allow_changes = true, 664 bool allow_changes = true,
775 std::function<int(QPDFObjectHandle&)> skip_stream_parameters = nullptr); 665 std::function<int(QPDFObjectHandle&)> skip_stream_parameters = nullptr);
776 666
777 - // Traverse page tree return all /Page objects. It also detects  
778 - // and resolves cases in which the same /Page object is  
779 - // duplicated. For efficiency, this method returns a const  
780 - // reference to an internal vector of pages. Calls to addPage,  
781 - // addPageAt, and removePage safely update this, but directly  
782 - // manipulation of the pages tree or pushing inheritable objects  
783 - // to the page level may invalidate it. See comments for  
784 - // updateAllPagesCache() for additional notes. Newer code should  
785 - // use QPDFPageDocumentHelper::getAllPages instead. The decision  
786 - // to expose this internal cache was arguably incorrect, but it is  
787 - // being left here for compatibility. It is, however, completely  
788 - // safe to use this for files that you are not modifying. 667 + // Traverse page tree return all /Page objects. It also detects and resolves cases in which the
  668 + // same /Page object is duplicated. For efficiency, this method returns a const reference to an
  669 + // internal vector of pages. Calls to addPage, addPageAt, and removePage safely update this, but
  670 + // directly manipulation of the pages tree or pushing inheritable objects to the page level may
  671 + // invalidate it. See comments for updateAllPagesCache() for additional notes. Newer code should
  672 + // use QPDFPageDocumentHelper::getAllPages instead. The decision to expose this internal cache
  673 + // was arguably incorrect, but it is being left here for compatibility. It is, however,
  674 + // completely safe to use this for files that you are not modifying.
789 QPDF_DLL 675 QPDF_DLL
790 std::vector<QPDFObjectHandle> const& getAllPages(); 676 std::vector<QPDFObjectHandle> const& getAllPages();
791 677
@@ -794,40 +680,32 @@ class QPDF @@ -794,40 +680,32 @@ class QPDF
794 QPDF_DLL 680 QPDF_DLL
795 bool everPushedInheritedAttributesToPages() const; 681 bool everPushedInheritedAttributesToPages() const;
796 682
797 - // These methods, given a page object or its object/generation  
798 - // number, returns the 0-based index into the array returned by  
799 - // getAllPages() for that page. An exception is thrown if the page  
800 - // is not found. 683 + // These methods, given a page object or its object/generation number, returns the 0-based index
  684 + // into the array returned by getAllPages() for that page. An exception is thrown if the page is
  685 + // not found.
801 QPDF_DLL 686 QPDF_DLL
802 int findPage(QPDFObjGen const& og); 687 int findPage(QPDFObjGen const& og);
803 QPDF_DLL 688 QPDF_DLL
804 int findPage(QPDFObjectHandle& page); 689 int findPage(QPDFObjectHandle& page);
805 690
806 - // This method synchronizes QPDF's cache of the page structure  
807 - // with the actual /Pages tree. If you restrict changes to the  
808 - // /Pages tree, including addition, removal, or replacement of  
809 - // pages or changes to any /Pages objects, to calls to these page  
810 - // handling APIs, you never need to call this method. If you  
811 - // modify /Pages structures directly, you must call this method  
812 - // afterwards. This method updates the internal list of pages, so  
813 - // after calling this method, any previous references returned by  
814 - // getAllPages() will be valid again. It also resets any state  
815 - // about having pushed inherited attributes in /Pages objects down  
816 - // to the pages, so if you add any inheritable attributes to a  
817 - // /Pages object, you should also call this method. 691 + // This method synchronizes QPDF's cache of the page structure with the actual /Pages tree. If
  692 + // you restrict changes to the /Pages tree, including addition, removal, or replacement of pages
  693 + // or changes to any /Pages objects, to calls to these page handling APIs, you never need to
  694 + // call this method. If you modify /Pages structures directly, you must call this method
  695 + // afterwards. This method updates the internal list of pages, so after calling this method,
  696 + // any previous references returned by getAllPages() will be valid again. It also resets any
  697 + // state about having pushed inherited attributes in /Pages objects down to the pages, so if you
  698 + // add any inheritable attributes to a /Pages object, you should also call this method.
818 QPDF_DLL 699 QPDF_DLL
819 void updateAllPagesCache(); 700 void updateAllPagesCache();
820 701
821 - // Legacy handling API. These methods are not going anywhere, and  
822 - // you should feel free to continue using them if it simplifies  
823 - // your code. Newer code should make use of QPDFPageDocumentHelper  
824 - // instead as future page handling methods will be added there.  
825 - // The functionality and specification of these legacy methods is  
826 - // identical to the identically named methods there, except that  
827 - // these versions use QPDFObjectHandle instead of  
828 - // QPDFPageObjectHelper, so please see comments in that file for  
829 - // descriptions. There are subtleties you need to know about, so  
830 - // please look at the comments there. 702 + // Legacy handling API. These methods are not going anywhere, and you should feel free to
  703 + // continue using them if it simplifies your code. Newer code should make use of
  704 + // QPDFPageDocumentHelper instead as future page handling methods will be added there. The
  705 + // functionality and specification of these legacy methods is identical to the identically named
  706 + // methods there, except that these versions use QPDFObjectHandle instead of
  707 + // QPDFPageObjectHelper, so please see comments in that file for descriptions. There are
  708 + // subtleties you need to know about, so please look at the comments there.
831 QPDF_DLL 709 QPDF_DLL
832 void pushInheritedAttributesToPage(); 710 void pushInheritedAttributesToPage();
833 QPDF_DLL 711 QPDF_DLL
@@ -838,8 +716,7 @@ class QPDF @@ -838,8 +716,7 @@ class QPDF
838 void removePage(QPDFObjectHandle page); 716 void removePage(QPDFObjectHandle page);
839 // End legacy page helpers 717 // End legacy page helpers
840 718
841 - // Writer class is restricted to QPDFWriter so that only it can  
842 - // call certain methods. 719 + // Writer class is restricted to QPDFWriter so that only it can call certain methods.
843 class Writer 720 class Writer
844 { 721 {
845 friend class QPDFWriter; 722 friend class QPDFWriter;
@@ -884,8 +761,8 @@ class QPDF @@ -884,8 +761,8 @@ class QPDF
884 } 761 }
885 }; 762 };
886 763
887 - // The Resolver class is restricted to QPDFObject so that only it  
888 - // can resolve indirect references. 764 + // The Resolver class is restricted to QPDFObject so that only it can resolve indirect
  765 + // references.
889 class Resolver 766 class Resolver
890 { 767 {
891 friend class QPDFObject; 768 friend class QPDFObject;
@@ -898,8 +775,7 @@ class QPDF @@ -898,8 +775,7 @@ class QPDF
898 } 775 }
899 }; 776 };
900 777
901 - // StreamCopier class is restricted to QPDFObjectHandle so it can  
902 - // copy stream data. 778 + // StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data.
903 class StreamCopier 779 class StreamCopier
904 { 780 {
905 friend class QPDFObjectHandle; 781 friend class QPDFObjectHandle;
@@ -974,12 +850,10 @@ class QPDF @@ -974,12 +850,10 @@ class QPDF
974 static bool test_json_validators(); 850 static bool test_json_validators();
975 851
976 private: 852 private:
977 - // It has never been safe to copy QPDF objects as there is code in  
978 - // the library that assumes there are no copies of a QPDF object.  
979 - // Copying QPDF objects was not prevented by the API until qpdf  
980 - // 11. If you have been copying QPDF objects, use  
981 - // std::shared_ptr<QPDF> instead. From qpdf 11, you can use  
982 - // QPDF::create to create them. 853 + // It has never been safe to copy QPDF objects as there is code in the library that assumes
  854 + // there are no copies of a QPDF object. Copying QPDF objects was not prevented by the API until
  855 + // qpdf 11. If you have been copying QPDF objects, use std::shared_ptr<QPDF> instead. From qpdf
  856 + // 11, you can use QPDF::create to create them.
983 QPDF(QPDF const&) = delete; 857 QPDF(QPDF const&) = delete;
984 QPDF& operator=(QPDF const&) = delete; 858 QPDF& operator=(QPDF const&) = delete;
985 859
@@ -1200,8 +1074,8 @@ class QPDF @@ -1200,8 +1074,8 @@ class QPDF
1200 1074
1201 // For QPDFWriter: 1075 // For QPDFWriter:
1202 1076
1203 - // Get lists of all objects in order according to the part of a  
1204 - // linearized file that they belong to. 1077 + // Get lists of all objects in order according to the part of a linearized file that they belong
  1078 + // to.
1205 void getLinearizedParts( 1079 void getLinearizedParts(
1206 std::map<int, int> const& object_stream_data, 1080 std::map<int, int> const& object_stream_data,
1207 std::vector<QPDFObjectHandle>& part4, 1081 std::vector<QPDFObjectHandle>& part4,
@@ -1221,8 +1095,7 @@ class QPDF @@ -1221,8 +1095,7 @@ class QPDF
1221 // Map object to object stream that contains it 1095 // Map object to object stream that contains it
1222 void getObjectStreamData(std::map<int, int>&); 1096 void getObjectStreamData(std::map<int, int>&);
1223 1097
1224 - // Get a list of objects that would be permitted in an object  
1225 - // stream. 1098 + // Get a list of objects that would be permitted in an object stream.
1226 std::vector<QPDFObjGen> getCompressibleObjGens(); 1099 std::vector<QPDFObjGen> getCompressibleObjGens();
1227 1100
1228 // methods to support page handling 1101 // methods to support page handling
@@ -1418,20 +1291,16 @@ class QPDF @@ -1418,20 +1291,16 @@ class QPDF
1418 qpdf_offset_t H_length; // length of primary hint stream 1291 qpdf_offset_t H_length; // length of primary hint stream
1419 }; 1292 };
1420 1293
1421 - // Computed hint table value data structures. These tables  
1422 - // contain the computed values on which the hint table values are  
1423 - // based. They exclude things like number of bits and store  
1424 - // actual values instead of mins and deltas. File offsets are  
1425 - // also absolute rather than being offset by the size of the  
1426 - // primary hint table. We populate the hint table structures from  
1427 - // these during writing and compare the hint table values with  
1428 - // these during validation. We ignore some values for various  
1429 - // reasons described in the code. Those values are omitted from  
1430 - // these structures. Note also that object numbers are object  
1431 - // numbers from the input file, not the output file.  
1432 -  
1433 - // Naming convention: CHSomething is analogous to HSomething  
1434 - // above. "CH" is computed hint. 1294 + // Computed hint table value data structures. These tables contain the computed values on which
  1295 + // the hint table values are based. They exclude things like number of bits and store actual
  1296 + // values instead of mins and deltas. File offsets are also absolute rather than being offset
  1297 + // by the size of the primary hint table. We populate the hint table structures from these
  1298 + // during writing and compare the hint table values with these during validation. We ignore
  1299 + // some values for various reasons described in the code. Those values are omitted from these
  1300 + // structures. Note also that object numbers are object numbers from the input file, not the
  1301 + // output file.
  1302 +
  1303 + // Naming convention: CHSomething is analogous to HSomething above. "CH" is computed hint.
1435 1304
1436 struct CHPageOffsetEntry 1305 struct CHPageOffsetEntry
1437 { 1306 {
@@ -1482,8 +1351,7 @@ class QPDF @@ -1482,8 +1351,7 @@ class QPDF
1482 1351
1483 // No need for CHGeneric -- HGeneric is fine as is. 1352 // No need for CHGeneric -- HGeneric is fine as is.
1484 1353
1485 - // Data structures to support optimization -- implemented in  
1486 - // QPDF_optimization.cc 1354 + // Data structures to support optimization -- implemented in QPDF_optimization.cc
1487 1355
1488 class ObjUser 1356 class ObjUser
1489 { 1357 {
@@ -1535,8 +1403,7 @@ class QPDF @@ -1535,8 +1403,7 @@ class QPDF
1535 bool findStartxref(); 1403 bool findStartxref();
1536 bool findEndstream(); 1404 bool findEndstream();
1537 1405
1538 - // methods to support linearization checking -- implemented in  
1539 - // QPDF_linearization.cc 1406 + // methods to support linearization checking -- implemented in QPDF_linearization.cc
1540 void readLinearizationData(); 1407 void readLinearizationData();
1541 bool checkLinearizationInternal(); 1408 bool checkLinearizationInternal();
1542 void dumpLinearizationDataInternal(); 1409 void dumpLinearizationDataInternal();
@@ -1693,25 +1560,23 @@ class QPDF @@ -1693,25 +1560,23 @@ class QPDF
1693 bool uncompressed_after_compressed{false}; 1560 bool uncompressed_after_compressed{false};
1694 bool linearization_warnings{false}; 1561 bool linearization_warnings{false};
1695 1562
1696 - // Linearization parameter dictionary and hint table data: may be  
1697 - // read from file or computed prior to writing a linearized file 1563 + // Linearization parameter dictionary and hint table data: may be read from file or computed
  1564 + // prior to writing a linearized file
1698 QPDFObjectHandle lindict; 1565 QPDFObjectHandle lindict;
1699 LinParameters linp; 1566 LinParameters linp;
1700 HPageOffset page_offset_hints; 1567 HPageOffset page_offset_hints;
1701 HSharedObject shared_object_hints; 1568 HSharedObject shared_object_hints;
1702 HGeneric outline_hints; 1569 HGeneric outline_hints;
1703 1570
1704 - // Computed linearization data: used to populate above tables  
1705 - // during writing and to compare with them during validation.  
1706 - // c_ means computed. 1571 + // Computed linearization data: used to populate above tables during writing and to compare
  1572 + // with them during validation. c_ means computed.
1707 LinParameters c_linp; 1573 LinParameters c_linp;
1708 CHPageOffset c_page_offset_data; 1574 CHPageOffset c_page_offset_data;
1709 CHSharedObject c_shared_object_data; 1575 CHSharedObject c_shared_object_data;
1710 HGeneric c_outline_data; 1576 HGeneric c_outline_data;
1711 1577
1712 - // Object ordering data for linearized files: initialized by  
1713 - // calculateLinearizationData(). Part numbers refer to the PDF  
1714 - // 1.4 specification. 1578 + // Object ordering data for linearized files: initialized by calculateLinearizationData().
  1579 + // Part numbers refer to the PDF 1.4 specification.
1715 std::vector<QPDFObjectHandle> part4; 1580 std::vector<QPDFObjectHandle> part4;
1716 std::vector<QPDFObjectHandle> part6; 1581 std::vector<QPDFObjectHandle> part6;
1717 std::vector<QPDFObjectHandle> part7; 1582 std::vector<QPDFObjectHandle> part7;
@@ -1723,9 +1588,8 @@ class QPDF @@ -1723,9 +1588,8 @@ class QPDF
1723 std::map<QPDFObjGen, std::set<ObjUser>> object_to_obj_users; 1588 std::map<QPDFObjGen, std::set<ObjUser>> object_to_obj_users;
1724 }; 1589 };
1725 1590
1726 - // Keep all member variables inside the Members object, which we  
1727 - // dynamically allocate. This makes it possible to add new private  
1728 - // members without breaking binary compatibility. 1591 + // Keep all member variables inside the Members object, which we dynamically allocate. This
  1592 + // makes it possible to add new private members without breaking binary compatibility.
1729 std::shared_ptr<Members> m; 1593 std::shared_ptr<Members> m;
1730 }; 1594 };
1731 1595
include/qpdf/QPDFAcroFormDocumentHelper.hh
@@ -2,69 +2,55 @@ @@ -2,69 +2,55 @@
2 // 2 //
3 // This file is part of qpdf. 3 // This file is part of qpdf.
4 // 4 //
5 -// Licensed under the Apache License, Version 2.0 (the "License");  
6 -// you may not use this file except in compliance with the License.  
7 -// You may obtain a copy of the License at 5 +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
  6 +// in compliance with the License. You may obtain a copy of the License at
8 // 7 //
9 // http://www.apache.org/licenses/LICENSE-2.0 8 // http://www.apache.org/licenses/LICENSE-2.0
10 // 9 //
11 -// Unless required by applicable law or agreed to in writing, software  
12 -// distributed under the License is distributed on an "AS IS" BASIS,  
13 -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
14 -// See the License for the specific language governing permissions and  
15 -// limitations under the License. 10 +// Unless required by applicable law or agreed to in writing, software distributed under the License
  11 +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
  12 +// or implied. See the License for the specific language governing permissions and limitations under
  13 +// the License.
16 // 14 //
17 -// Versions of qpdf prior to version 7 were released under the terms  
18 -// of version 2.0 of the Artistic License. At your option, you may  
19 -// continue to consider qpdf to be licensed under those terms. Please  
20 -// see the manual for additional information. 15 +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
  16 +// License. At your option, you may continue to consider qpdf to be licensed under those terms.
  17 +// Please see the manual for additional information.
21 18
22 #ifndef QPDFACROFORMDOCUMENTHELPER_HH 19 #ifndef QPDFACROFORMDOCUMENTHELPER_HH
23 #define QPDFACROFORMDOCUMENTHELPER_HH 20 #define QPDFACROFORMDOCUMENTHELPER_HH
24 21
25 -// This document helper is intended to help with operations on  
26 -// interactive forms. Here are the key things to know: 22 +// This document helper is intended to help with operations on interactive forms. Here are the key
  23 +// things to know:
27 24
28 -// * The PDF specification talks about interactive forms and also  
29 -// about form XObjects. While form XObjects appear in parts of  
30 -// interactive forms, this class is concerned about interactive  
31 -// forms, not form XObjects. 25 +// * The PDF specification talks about interactive forms and also about form XObjects. While form
  26 +// XObjects appear in parts of interactive forms, this class is concerned about interactive forms,
  27 +// not form XObjects.
32 // 28 //
33 -// * Interactive forms are discussed in the PDF Specification (ISO PDF  
34 -// 32000-1:2008) section 12.7. Also relevant is the section about  
35 -// Widget annotations. Annotations are discussed in section 12.5  
36 -// with annotation dictionaries discussed in 12.5.1. Widget  
37 -// annotations are discussed specifically in section 12.5.6.19. 29 +// * Interactive forms are discussed in the PDF Specification (ISO PDF 32000-1:2008) section 12.7.
  30 +// Also relevant is the section about Widget annotations. Annotations are discussed in
  31 +// section 12.5 with annotation dictionaries discussed in 12.5.1. Widget annotations are discussed
  32 +// specifically in section 12.5.6.19.
38 // 33 //
39 -// * What you need to know about the structure of interactive forms in  
40 -// PDF files: 34 +// * What you need to know about the structure of interactive forms in PDF files:
41 // 35 //
42 -// - The document catalog contains the key "/AcroForm" which  
43 -// contains a list of fields. Fields are represented as a tree  
44 -// structure much like pages. Nodes in the fields tree may contain  
45 -// other fields. Fields may inherit values of many of their  
46 -// attributes from ancestors in the tree. 36 +// - The document catalog contains the key "/AcroForm" which contains a list of fields. Fields are
  37 +// represented as a tree structure much like pages. Nodes in the fields tree may contain other
  38 +// fields. Fields may inherit values of many of their attributes from ancestors in the tree.
47 // 39 //
48 -// - Fields may also have children that are widget annotations. As a  
49 -// special case, and a cause of considerable confusion, if a field  
50 -// has a single annotation as a child, the annotation dictionary  
51 -// may be merged with the field dictionary. In that case, the  
52 -// field and the annotation are in the same object. Note that,  
53 -// while field dictionary attributes are inherited, annotation  
54 -// dictionary attributes are not. 40 +// - Fields may also have children that are widget annotations. As a special case, and a cause of
  41 +// considerable confusion, if a field has a single annotation as a child, the annotation
  42 +// dictionary may be merged with the field dictionary. In that case, the field and the
  43 +// annotation are in the same object. Note that, while field dictionary attributes are
  44 +// inherited, annotation dictionary attributes are not.
55 // 45 //
56 -// - A page dictionary contains a key called "/Annots" which  
57 -// contains a simple list of annotations. For any given annotation  
58 -// of subtype "/Widget", you should encounter that annotation in  
59 -// the "/Annots" dictionary of a page, and you should also be able  
60 -// to reach it by traversing through the "/AcroForm" dictionary  
61 -// from the document catalog. In the simplest case (and also a  
62 -// very common case), a form field's widget annotation will be  
63 -// merged with the field object, and the object will appear  
64 -// directly both under "/Annots" in the page dictionary and under  
65 -// "/Fields" in the "/AcroForm" dictionary. In a more complex  
66 -// case, you may have to trace through various "/Kids" elements in  
67 -// the "/AcroForm" field entry until you find the annotation 46 +// - A page dictionary contains a key called "/Annots" which contains a simple list of
  47 +// annotations. For any given annotation of subtype "/Widget", you should encounter that
  48 +// annotation in the "/Annots" dictionary of a page, and you should also be able to reach it by
  49 +// traversing through the "/AcroForm" dictionary from the document catalog. In the simplest case
  50 +// (and also a very common case), a form field's widget annotation will be merged with the field
  51 +// object, and the object will appear directly both under "/Annots" in the page dictionary and
  52 +// under "/Fields" in the "/AcroForm" dictionary. In a more complex case, you may have to trace
  53 +// through various "/Kids" elements in the "/AcroForm" field entry until you find the annotation
68 // dictionary. 54 // dictionary.
69 55
70 #include <qpdf/QPDFDocumentHelper.hh> 56 #include <qpdf/QPDFDocumentHelper.hh>
@@ -87,34 +73,28 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper @@ -87,34 +73,28 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper
87 QPDF_DLL 73 QPDF_DLL
88 virtual ~QPDFAcroFormDocumentHelper() = default; 74 virtual ~QPDFAcroFormDocumentHelper() = default;
89 75
90 - // This class lazily creates an internal cache of the mapping  
91 - // among form fields, annotations, and pages. Methods within this  
92 - // class preserve the validity of this cache. However, if you  
93 - // modify pages' annotation dictionaries, the document's /AcroForm  
94 - // dictionary, or any form fields manually in a way that alters  
95 - // the association between forms, fields, annotations, and pages,  
96 - // it may cause this cache to become invalid. This method marks  
97 - // the cache invalid and forces it to be regenerated the next time  
98 - // it is needed. 76 + // This class lazily creates an internal cache of the mapping among form fields, annotations,
  77 + // and pages. Methods within this class preserve the validity of this cache. However, if you
  78 + // modify pages' annotation dictionaries, the document's /AcroForm dictionary, or any form
  79 + // fields manually in a way that alters the association between forms, fields, annotations, and
  80 + // pages, it may cause this cache to become invalid. This method marks the cache invalid and
  81 + // forces it to be regenerated the next time it is needed.
99 QPDF_DLL 82 QPDF_DLL
100 void invalidateCache(); 83 void invalidateCache();
101 84
102 QPDF_DLL 85 QPDF_DLL
103 bool hasAcroForm(); 86 bool hasAcroForm();
104 87
105 - // Add a form field, initializing the document's AcroForm  
106 - // dictionary if needed, updating the cache if necessary. Note  
107 - // that you are adding fields that are copies of other fields,  
108 - // this method may result in multiple fields existing with the  
109 - // same qualified name, which can have unexpected side effects. In  
110 - // that case, you should use addAndRenameFormFields() instead. 88 + // Add a form field, initializing the document's AcroForm dictionary if needed, updating the
  89 + // cache if necessary. Note that you are adding fields that are copies of other fields, this
  90 + // method may result in multiple fields existing with the same qualified name, which can have
  91 + // unexpected side effects. In that case, you should use addAndRenameFormFields() instead.
111 QPDF_DLL 92 QPDF_DLL
112 void addFormField(QPDFFormFieldObjectHelper); 93 void addFormField(QPDFFormFieldObjectHelper);
113 94
114 - // Add a collection of form fields making sure that their fully  
115 - // qualified names don't conflict with already present form  
116 - // fields. Fields within the collection of new fields that have  
117 - // the same name as each other will continue to do so. 95 + // Add a collection of form fields making sure that their fully qualified names don't conflict
  96 + // with already present form fields. Fields within the collection of new fields that have the
  97 + // same name as each other will continue to do so.
118 QPDF_DLL 98 QPDF_DLL
119 void addAndRenameFormFields(std::vector<QPDFObjectHandle> fields); 99 void addAndRenameFormFields(std::vector<QPDFObjectHandle> fields);
120 100
@@ -122,31 +102,27 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper @@ -122,31 +102,27 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper
122 QPDF_DLL 102 QPDF_DLL
123 void removeFormFields(std::set<QPDFObjGen> const&); 103 void removeFormFields(std::set<QPDFObjGen> const&);
124 104
125 - // Set the name of a field, updating internal records of field  
126 - // names. Name should be UTF-8 encoded. 105 + // Set the name of a field, updating internal records of field names. Name should be UTF-8
  106 + // encoded.
127 QPDF_DLL 107 QPDF_DLL
128 void setFormFieldName(QPDFFormFieldObjectHelper, std::string const& name); 108 void setFormFieldName(QPDFFormFieldObjectHelper, std::string const& name);
129 109
130 - // Return a vector of all terminal fields in a document. Terminal  
131 - // fields are fields that have no children that are also fields.  
132 - // Terminal fields may still have children that are annotations.  
133 - // Intermediate nodes in the fields tree are not included in this  
134 - // list, but you can still reach them through the getParent method  
135 - // of the field object helper. 110 + // Return a vector of all terminal fields in a document. Terminal fields are fields that have no
  111 + // children that are also fields. Terminal fields may still have children that are annotations.
  112 + // Intermediate nodes in the fields tree are not included in this list, but you can still reach
  113 + // them through the getParent method of the field object helper.
136 QPDF_DLL 114 QPDF_DLL
137 std::vector<QPDFFormFieldObjectHelper> getFormFields(); 115 std::vector<QPDFFormFieldObjectHelper> getFormFields();
138 116
139 - // Return all the form fields that have the given fully-qualified  
140 - // name and also have an explicit "/T" attribute. For this  
141 - // information to be accurate, any changes to field names must be  
142 - // done through setFormFieldName() above. 117 + // Return all the form fields that have the given fully-qualified name and also have an explicit
  118 + // "/T" attribute. For this information to be accurate, any changes to field names must be done
  119 + // through setFormFieldName() above.
143 QPDF_DLL 120 QPDF_DLL
144 std::set<QPDFObjGen> getFieldsWithQualifiedName(std::string const& name); 121 std::set<QPDFObjGen> getFieldsWithQualifiedName(std::string const& name);
145 122
146 - // Return the annotations associated with a terminal field. Note  
147 - // that in the case of a field having a single annotation, the  
148 - // underlying object will typically be the same as the underlying  
149 - // object for the field. 123 + // Return the annotations associated with a terminal field. Note that in the case of a field
  124 + // having a single annotation, the underlying object will typically be the same as the
  125 + // underlying object for the field.
150 QPDF_DLL 126 QPDF_DLL
151 std::vector<QPDFAnnotationObjectHelper> getAnnotationsForField(QPDFFormFieldObjectHelper); 127 std::vector<QPDFAnnotationObjectHelper> getAnnotationsForField(QPDFFormFieldObjectHelper);
152 128
@@ -158,63 +134,49 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper @@ -158,63 +134,49 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper
158 QPDF_DLL 134 QPDF_DLL
159 std::vector<QPDFFormFieldObjectHelper> getFormFieldsForPage(QPDFPageObjectHelper); 135 std::vector<QPDFFormFieldObjectHelper> getFormFieldsForPage(QPDFPageObjectHelper);
160 136
161 - // Return the terminal field that is associated with this  
162 - // annotation. If the annotation dictionary is merged with the  
163 - // field dictionary, the underlying object will be the same, but  
164 - // this is not always the case. Note that if you call this method  
165 - // with an annotation that is not a widget annotation, there will  
166 - // not be an associated field, and this method will return a 137 + // Return the terminal field that is associated with this annotation. If the annotation
  138 + // dictionary is merged with the field dictionary, the underlying object will be the same, but
  139 + // this is not always the case. Note that if you call this method with an annotation that is not
  140 + // a widget annotation, there will not be an associated field, and this method will return a
167 // helper associated with a null object (isNull() == true). 141 // helper associated with a null object (isNull() == true).
168 QPDF_DLL 142 QPDF_DLL
169 QPDFFormFieldObjectHelper getFieldForAnnotation(QPDFAnnotationObjectHelper); 143 QPDFFormFieldObjectHelper getFieldForAnnotation(QPDFAnnotationObjectHelper);
170 144
171 - // Return the current value of /NeedAppearances. If  
172 - // /NeedAppearances is missing, return false as that is how PDF  
173 - // viewers are supposed to interpret it. 145 + // Return the current value of /NeedAppearances. If /NeedAppearances is missing, return false as
  146 + // that is how PDF viewers are supposed to interpret it.
174 QPDF_DLL 147 QPDF_DLL
175 bool getNeedAppearances(); 148 bool getNeedAppearances();
176 149
177 - // Indicate whether appearance streams must be regenerated. If you  
178 - // modify a field value, you should call setNeedAppearances(true)  
179 - // unless you also generate an appearance stream for the  
180 - // corresponding annotation at the same time. If you generate  
181 - // appearance streams for all fields, you can call  
182 - // setNeedAppearances(false). If you use  
183 - // QPDFFormFieldObjectHelper::setV, it will automatically call  
184 - // this method unless you tell it not to. 150 + // Indicate whether appearance streams must be regenerated. If you modify a field value, you
  151 + // should call setNeedAppearances(true) unless you also generate an appearance stream for the
  152 + // corresponding annotation at the same time. If you generate appearance streams for all fields,
  153 + // you can call setNeedAppearances(false). If you use QPDFFormFieldObjectHelper::setV, it will
  154 + // automatically call this method unless you tell it not to.
185 QPDF_DLL 155 QPDF_DLL
186 void setNeedAppearances(bool); 156 void setNeedAppearances(bool);
187 157
188 - // If /NeedAppearances is false, do nothing. Otherwise generate  
189 - // appearance streams for all widget annotations that need them.  
190 - // See comments in QPDFFormFieldObjectHelper.hh for  
191 - // generateAppearance for limitations. For checkbox and radio  
192 - // button fields, this code ensures that appearance state is  
193 - // consistent with the field's value and uses any pre-existing 158 + // If /NeedAppearances is false, do nothing. Otherwise generate appearance streams for all
  159 + // widget annotations that need them. See comments in QPDFFormFieldObjectHelper.hh for
  160 + // generateAppearance for limitations. For checkbox and radio button fields, this code ensures
  161 + // that appearance state is consistent with the field's value and uses any pre-existing
194 // appearance streams. 162 // appearance streams.
195 QPDF_DLL 163 QPDF_DLL
196 void generateAppearancesIfNeeded(); 164 void generateAppearancesIfNeeded();
197 165
198 - // Note: this method works on all annotations, not just ones with  
199 - // associated fields. For each annotation in old_annots, apply the  
200 - // given transformation matrix to create a new annotation. New  
201 - // annotations are appended to new_annots. If the annotation is  
202 - // associated with a form field, a new form field is created that  
203 - // points to the new annotation and is appended to new_fields, and  
204 - // the old field is added to old_fields. 166 + // Note: this method works on all annotations, not just ones with associated fields. For each
  167 + // annotation in old_annots, apply the given transformation matrix to create a new annotation.
  168 + // New annotations are appended to new_annots. If the annotation is associated with a form
  169 + // field, a new form field is created that points to the new annotation and is appended to
  170 + // new_fields, and the old field is added to old_fields.
205 // 171 //
206 - // old_annots may belong to a different QPDF object. In that case,  
207 - // you should pass in from_qpdf, and copyForeignObject will be  
208 - // called automatically. If this is the case, for efficiency, you  
209 - // may pass in a QPDFAcroFormDocumentHelper for the other file to  
210 - // avoid the expensive process of creating one for each call to  
211 - // transformAnnotations. New fields and annotations are not added  
212 - // to the document or pages. You have to do that yourself after  
213 - // calling transformAnnotations. If this operation will leave  
214 - // orphaned fields behind, such as if you are replacing the old  
215 - // annotations with the new ones on the same page and the fields  
216 - // and annotations are not shared, you will also need to remove  
217 - // the old fields to prevent them from hanging round unreferenced. 172 + // old_annots may belong to a different QPDF object. In that case, you should pass in from_qpdf,
  173 + // and copyForeignObject will be called automatically. If this is the case, for efficiency, you
  174 + // may pass in a QPDFAcroFormDocumentHelper for the other file to avoid the expensive process of
  175 + // creating one for each call to transformAnnotations. New fields and annotations are not added
  176 + // to the document or pages. You have to do that yourself after calling transformAnnotations. If
  177 + // this operation will leave orphaned fields behind, such as if you are replacing the old
  178 + // annotations with the new ones on the same page and the fields and annotations are not shared,
  179 + // you will also need to remove the old fields to prevent them from hanging round unreferenced.
218 QPDF_DLL 180 QPDF_DLL
219 void transformAnnotations( 181 void transformAnnotations(
220 QPDFObjectHandle old_annots, 182 QPDFObjectHandle old_annots,
@@ -225,18 +187,14 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper @@ -225,18 +187,14 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper
225 QPDF* from_qpdf = nullptr, 187 QPDF* from_qpdf = nullptr,
226 QPDFAcroFormDocumentHelper* from_afdh = nullptr); 188 QPDFAcroFormDocumentHelper* from_afdh = nullptr);
227 189
228 - // Copy form fields and annotations from one page to another,  
229 - // allowing the from page to be in a different QPDF or in the same  
230 - // QPDF. This would typically be called after calling addPage to  
231 - // add field/annotation awareness. When just copying the page by  
232 - // itself, annotations end up being shared, and fields end up  
233 - // being omitted because there is no reference to the field from  
234 - // the page. This method ensures that each separate copy of a page  
235 - // has private annotations and that fields and annotations are  
236 - // properly updated to resolve conflicts that may occur from  
237 - // common resource and field names across documents. It is  
238 - // basically a wrapper around transformAnnotations that handles  
239 - // updating the receiving page. If new_fields is non-null, any 190 + // Copy form fields and annotations from one page to another, allowing the from page to be in a
  191 + // different QPDF or in the same QPDF. This would typically be called after calling addPage to
  192 + // add field/annotation awareness. When just copying the page by itself, annotations end up
  193 + // being shared, and fields end up being omitted because there is no reference to the field from
  194 + // the page. This method ensures that each separate copy of a page has private annotations and
  195 + // that fields and annotations are properly updated to resolve conflicts that may occur from
  196 + // common resource and field names across documents. It is basically a wrapper around
  197 + // transformAnnotations that handles updating the receiving page. If new_fields is non-null, any
240 // newly created fields are added to it. 198 // newly created fields are added to it.
241 QPDF_DLL 199 QPDF_DLL
242 void fixCopiedAnnotations( 200 void fixCopiedAnnotations(
include/qpdf/QPDFExc.hh
@@ -2,22 +2,19 @@ @@ -2,22 +2,19 @@
2 // 2 //
3 // This file is part of qpdf. 3 // This file is part of qpdf.
4 // 4 //
5 -// Licensed under the Apache License, Version 2.0 (the "License");  
6 -// you may not use this file except in compliance with the License.  
7 -// You may obtain a copy of the License at 5 +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
  6 +// in compliance with the License. You may obtain a copy of the License at
8 // 7 //
9 // http://www.apache.org/licenses/LICENSE-2.0 8 // http://www.apache.org/licenses/LICENSE-2.0
10 // 9 //
11 -// Unless required by applicable law or agreed to in writing, software  
12 -// distributed under the License is distributed on an "AS IS" BASIS,  
13 -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
14 -// See the License for the specific language governing permissions and  
15 -// limitations under the License. 10 +// Unless required by applicable law or agreed to in writing, software distributed under the License
  11 +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
  12 +// or implied. See the License for the specific language governing permissions and limitations under
  13 +// the License.
16 // 14 //
17 -// Versions of qpdf prior to version 7 were released under the terms  
18 -// of version 2.0 of the Artistic License. At your option, you may  
19 -// continue to consider qpdf to be licensed under those terms. Please  
20 -// see the manual for additional information. 15 +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
  16 +// License. At your option, you may continue to consider qpdf to be licensed under those terms.
  17 +// Please see the manual for additional information.
21 18
22 #ifndef QPDFEXC_HH 19 #ifndef QPDFEXC_HH
23 #define QPDFEXC_HH 20 #define QPDFEXC_HH
@@ -42,14 +39,12 @@ class QPDF_DLL_CLASS QPDFExc: public std::runtime_error @@ -42,14 +39,12 @@ class QPDF_DLL_CLASS QPDFExc: public std::runtime_error
42 QPDF_DLL 39 QPDF_DLL
43 virtual ~QPDFExc() noexcept = default; 40 virtual ~QPDFExc() noexcept = default;
44 41
45 - // To get a complete error string, call what(), provided by  
46 - // std::exception. The accessors below return the original values  
47 - // used to create the exception. Only the error code and message  
48 - // are guaranteed to have non-zero/empty values. 42 + // To get a complete error string, call what(), provided by std::exception. The accessors below
  43 + // return the original values used to create the exception. Only the error code and message are
  44 + // guaranteed to have non-zero/empty values.
49 45
50 - // There is no lookup code that maps numeric error codes into  
51 - // strings. The numeric error code is just another way to get at  
52 - // the underlying issue, but it is more programmer-friendly than 46 + // There is no lookup code that maps numeric error codes into strings. The numeric error code
  47 + // is just another way to get at the underlying issue, but it is more programmer-friendly than
53 // trying to parse a string that is subject to change. 48 // trying to parse a string that is subject to change.
54 49
55 QPDF_DLL 50 QPDF_DLL
@@ -71,8 +66,8 @@ class QPDF_DLL_CLASS QPDFExc: public std::runtime_error @@ -71,8 +66,8 @@ class QPDF_DLL_CLASS QPDFExc: public std::runtime_error
71 qpdf_offset_t offset, 66 qpdf_offset_t offset,
72 std::string const& message); 67 std::string const& message);
73 68
74 - // This class does not use the Members pattern to avoid needless  
75 - // memory allocations during exception handling. 69 + // This class does not use the Members pattern to avoid needless memory allocations during
  70 + // exception handling.
76 71
77 qpdf_error_code_e error_code; 72 qpdf_error_code_e error_code;
78 std::string filename; 73 std::string filename;
include/qpdf/QPDFFormFieldObjectHelper.hh
@@ -2,29 +2,25 @@ @@ -2,29 +2,25 @@
2 // 2 //
3 // This file is part of qpdf. 3 // This file is part of qpdf.
4 // 4 //
5 -// Licensed under the Apache License, Version 2.0 (the "License");  
6 -// you may not use this file except in compliance with the License.  
7 -// You may obtain a copy of the License at 5 +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
  6 +// in compliance with the License. You may obtain a copy of the License at
8 // 7 //
9 // http://www.apache.org/licenses/LICENSE-2.0 8 // http://www.apache.org/licenses/LICENSE-2.0
10 // 9 //
11 -// Unless required by applicable law or agreed to in writing, software  
12 -// distributed under the License is distributed on an "AS IS" BASIS,  
13 -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
14 -// See the License for the specific language governing permissions and  
15 -// limitations under the License. 10 +// Unless required by applicable law or agreed to in writing, software distributed under the License
  11 +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
  12 +// or implied. See the License for the specific language governing permissions and limitations under
  13 +// the License.
16 // 14 //
17 -// Versions of qpdf prior to version 7 were released under the terms  
18 -// of version 2.0 of the Artistic License. At your option, you may  
19 -// continue to consider qpdf to be licensed under those terms. Please  
20 -// see the manual for additional information. 15 +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
  16 +// License. At your option, you may continue to consider qpdf to be licensed under those terms.
  17 +// Please see the manual for additional information.
21 18
22 #ifndef QPDFFORMFIELDOBJECTHELPER_HH 19 #ifndef QPDFFORMFIELDOBJECTHELPER_HH
23 #define QPDFFORMFIELDOBJECTHELPER_HH 20 #define QPDFFORMFIELDOBJECTHELPER_HH
24 21
25 -// This object helper helps with form fields for interactive forms.  
26 -// Please see comments in QPDFAcroFormDocumentHelper.hh for additional  
27 -// details. 22 +// This object helper helps with form fields for interactive forms. Please see comments in
  23 +// QPDFAcroFormDocumentHelper.hh for additional details.
28 24
29 #include <qpdf/QPDFObjectHelper.hh> 25 #include <qpdf/QPDFObjectHelper.hh>
30 26
@@ -46,37 +42,32 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper @@ -46,37 +42,32 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper
46 QPDF_DLL 42 QPDF_DLL
47 bool isNull(); 43 bool isNull();
48 44
49 - // Return the field's parent. A form field object helper whose  
50 - // underlying object is null is returned if there is no parent.  
51 - // This condition may be tested by calling isNull(). 45 + // Return the field's parent. A form field object helper whose underlying object is null is
  46 + // returned if there is no parent. This condition may be tested by calling isNull().
52 QPDF_DLL 47 QPDF_DLL
53 QPDFFormFieldObjectHelper getParent(); 48 QPDFFormFieldObjectHelper getParent();
54 49
55 - // Return the top-level field for this field. Typically this will  
56 - // be the field itself or its parent. If is_different is provided,  
57 - // it is set to true if the top-level field is different from the  
58 - // field itself; otherwise it is set to false. 50 + // Return the top-level field for this field. Typically this will be the field itself or its
  51 + // parent. If is_different is provided, it is set to true if the top-level field is different
  52 + // from the field itself; otherwise it is set to false.
59 QPDF_DLL 53 QPDF_DLL
60 QPDFFormFieldObjectHelper getTopLevelField(bool* is_different = nullptr); 54 QPDFFormFieldObjectHelper getTopLevelField(bool* is_different = nullptr);
61 55
62 - // Get a field value, possibly inheriting the value from an  
63 - // ancestor node. 56 + // Get a field value, possibly inheriting the value from an ancestor node.
64 QPDF_DLL 57 QPDF_DLL
65 QPDFObjectHandle getInheritableFieldValue(std::string const& name); 58 QPDFObjectHandle getInheritableFieldValue(std::string const& name);
66 59
67 - // Get an inherited field value as a string. If it is not a  
68 - // string, silently return the empty string. 60 + // Get an inherited field value as a string. If it is not a string, silently return the empty
  61 + // string.
69 QPDF_DLL 62 QPDF_DLL
70 std::string getInheritableFieldValueAsString(std::string const& name); 63 std::string getInheritableFieldValueAsString(std::string const& name);
71 64
72 - // Get an inherited field value of type name as a string  
73 - // representing the name. If it is not a name, silently return  
74 - // the empty string. 65 + // Get an inherited field value of type name as a string representing the name. If it is not a
  66 + // name, silently return the empty string.
75 QPDF_DLL 67 QPDF_DLL
76 std::string getInheritableFieldValueAsName(std::string const& name); 68 std::string getInheritableFieldValueAsName(std::string const& name);
77 69
78 - // Returns the value of /FT if present, otherwise returns the  
79 - // empty string. 70 + // Returns the value of /FT if present, otherwise returns the empty string.
80 QPDF_DLL 71 QPDF_DLL
81 std::string getFieldType(); 72 std::string getFieldType();
82 73
@@ -86,60 +77,53 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper @@ -86,60 +77,53 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper
86 QPDF_DLL 77 QPDF_DLL
87 std::string getPartialName(); 78 std::string getPartialName();
88 79
89 - // Return the alternative field name (/TU), which is the field  
90 - // name intended to be presented to users. If not present, fall  
91 - // back to the fully qualified name. 80 + // Return the alternative field name (/TU), which is the field name intended to be presented to
  81 + // users. If not present, fall back to the fully qualified name.
92 QPDF_DLL 82 QPDF_DLL
93 std::string getAlternativeName(); 83 std::string getAlternativeName();
94 84
95 - // Return the mapping field name (/TM). If not present, fall back  
96 - // to the alternative name, then to the partial name. 85 + // Return the mapping field name (/TM). If not present, fall back to the alternative name, then
  86 + // to the partial name.
97 QPDF_DLL 87 QPDF_DLL
98 std::string getMappingName(); 88 std::string getMappingName();
99 89
100 QPDF_DLL 90 QPDF_DLL
101 QPDFObjectHandle getValue(); 91 QPDFObjectHandle getValue();
102 92
103 - // Return the field's value as a string. If this is called with a  
104 - // field whose value is not a string, the empty string will be  
105 - // silently returned. 93 + // Return the field's value as a string. If this is called with a field whose value is not a
  94 + // string, the empty string will be silently returned.
106 QPDF_DLL 95 QPDF_DLL
107 std::string getValueAsString(); 96 std::string getValueAsString();
108 97
109 QPDF_DLL 98 QPDF_DLL
110 QPDFObjectHandle getDefaultValue(); 99 QPDFObjectHandle getDefaultValue();
111 100
112 - // Return the field's default value as a string. If this is called  
113 - // with a field whose value is not a string, the empty string will  
114 - // be silently returned. 101 + // Return the field's default value as a string. If this is called with a field whose value is
  102 + // not a string, the empty string will be silently returned.
115 QPDF_DLL 103 QPDF_DLL
116 std::string getDefaultValueAsString(); 104 std::string getDefaultValueAsString();
117 105
118 - // Return the default appearance string, taking inheritance from  
119 - // the field tree into account. Returns the empty string if the  
120 - // default appearance string is not available (because it's  
121 - // erroneously absent or because this is not a variable text  
122 - // field). If not found in the field hierarchy, look in /AcroForm. 106 + // Return the default appearance string, taking inheritance from the field tree into account.
  107 + // Returns the empty string if the default appearance string is not available (because it's
  108 + // erroneously absent or because this is not a variable text field). If not found in the field
  109 + // hierarchy, look in /AcroForm.
123 QPDF_DLL 110 QPDF_DLL
124 std::string getDefaultAppearance(); 111 std::string getDefaultAppearance();
125 112
126 - // Return the default resource dictionary for the field. This  
127 - // comes not from the field but from the document-level /AcroForm  
128 - // dictionary. While several PDF generates put a /DR key in the  
129 - // form field's dictionary, experimentation suggests that many  
130 - // popular readers, including Adobe Acrobat and Acrobat Reader,  
131 - // ignore any /DR item on the field. 113 + // Return the default resource dictionary for the field. This comes not from the field but from
  114 + // the document-level /AcroForm dictionary. While several PDF generates put a /DR key in the
  115 + // form field's dictionary, experimentation suggests that many popular readers, including Adobe
  116 + // Acrobat and Acrobat Reader, ignore any /DR item on the field.
132 QPDF_DLL 117 QPDF_DLL
133 QPDFObjectHandle getDefaultResources(); 118 QPDFObjectHandle getDefaultResources();
134 119
135 - // Return the quadding value, taking inheritance from the field  
136 - // tree into account. Returns 0 if quadding is not specified. Look  
137 - // in /AcroForm if not found in the field hierarchy. 120 + // Return the quadding value, taking inheritance from the field tree into account. Returns 0 if
  121 + // quadding is not specified. Look in /AcroForm if not found in the field hierarchy.
138 QPDF_DLL 122 QPDF_DLL
139 int getQuadding(); 123 int getQuadding();
140 124
141 - // Return field flags from /Ff. The value is a logical or of  
142 - // pdf_form_field_flag_e as defined in qpdf/Constants.h 125 + // Return field flags from /Ff. The value is a logical or of pdf_form_field_flag_e as defined in
  126 + // qpdf/Constants.h
143 QPDF_DLL 127 QPDF_DLL
144 int getFlags(); 128 int getFlags();
145 129
@@ -148,19 +132,16 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper @@ -148,19 +132,16 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper
148 // Returns true if field is of type /Tx 132 // Returns true if field is of type /Tx
149 QPDF_DLL 133 QPDF_DLL
150 bool isText(); 134 bool isText();
151 - // Returns true if field is of type /Btn and flags do not indicate  
152 - // some other type of button. 135 + // Returns true if field is of type /Btn and flags do not indicate some other type of button.
153 QPDF_DLL 136 QPDF_DLL
154 bool isCheckbox(); 137 bool isCheckbox();
155 // Returns true if field is a checkbox and is checked. 138 // Returns true if field is a checkbox and is checked.
156 QPDF_DLL 139 QPDF_DLL
157 bool isChecked(); 140 bool isChecked();
158 - // Returns true if field is of type /Btn and flags indicate that  
159 - // it is a radio button 141 + // Returns true if field is of type /Btn and flags indicate that it is a radio button
160 QPDF_DLL 142 QPDF_DLL
161 bool isRadioButton(); 143 bool isRadioButton();
162 - // Returns true if field is of type /Btn and flags indicate that  
163 - // it is a pushbutton 144 + // Returns true if field is of type /Btn and flags indicate that it is a pushbutton
164 QPDF_DLL 145 QPDF_DLL
165 bool isPushbutton(); 146 bool isPushbutton();
166 // Returns true if fields if of type /Ch 147 // Returns true if fields if of type /Ch
@@ -170,45 +151,36 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper @@ -170,45 +151,36 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper
170 QPDF_DLL 151 QPDF_DLL
171 std::vector<std::string> getChoices(); 152 std::vector<std::string> getChoices();
172 153
173 - // Set an attribute to the given value. If you have a  
174 - // QPDFAcroFormDocumentHelper and you want to set the name of a  
175 - // field, use QPDFAcroFormDocumentHelper::setFormFieldName  
176 - // instead. 154 + // Set an attribute to the given value. If you have a QPDFAcroFormDocumentHelper and you want to
  155 + // set the name of a field, use QPDFAcroFormDocumentHelper::setFormFieldName instead.
177 QPDF_DLL 156 QPDF_DLL
178 void setFieldAttribute(std::string const& key, QPDFObjectHandle value); 157 void setFieldAttribute(std::string const& key, QPDFObjectHandle value);
179 158
180 - // Set an attribute to the given value as a Unicode string (UTF-16  
181 - // BE encoded). The input string should be UTF-8 encoded. If you  
182 - // have a QPDFAcroFormDocumentHelper and you want to set the name  
183 - // of a field, use QPDFAcroFormDocumentHelper::setFormFieldName  
184 - // instead. 159 + // Set an attribute to the given value as a Unicode string (UTF-16 BE encoded). The input string
  160 + // should be UTF-8 encoded. If you have a QPDFAcroFormDocumentHelper and you want to set the
  161 + // name of a field, use QPDFAcroFormDocumentHelper::setFormFieldName instead.
185 QPDF_DLL 162 QPDF_DLL
186 void setFieldAttribute(std::string const& key, std::string const& utf8_value); 163 void setFieldAttribute(std::string const& key, std::string const& utf8_value);
187 164
188 - // Set /V (field value) to the given value. If need_appearances is  
189 - // true and the field type is either /Tx (text) or /Ch (choice),  
190 - // set /NeedAppearances to true. You can explicitly tell this  
191 - // method not to set /NeedAppearances if you are going to generate  
192 - // an appearance stream yourself. Starting with qpdf 8.3.0, this  
193 - // method handles fields of type /Btn (checkboxes, radio buttons,  
194 - // pushbuttons) specially. 165 + // Set /V (field value) to the given value. If need_appearances is true and the field type is
  166 + // either /Tx (text) or /Ch (choice), set /NeedAppearances to true. You can explicitly tell this
  167 + // method not to set /NeedAppearances if you are going to generate an appearance stream
  168 + // yourself. Starting with qpdf 8.3.0, this method handles fields of type /Btn (checkboxes,
  169 + // radio buttons, pushbuttons) specially.
195 QPDF_DLL 170 QPDF_DLL
196 void setV(QPDFObjectHandle value, bool need_appearances = true); 171 void setV(QPDFObjectHandle value, bool need_appearances = true);
197 172
198 - // Set /V (field value) to the given string value encoded as a  
199 - // Unicode string. The input value should be UTF-8 encoded. See  
200 - // comments above about /NeedAppearances. 173 + // Set /V (field value) to the given string value encoded as a Unicode string. The input value
  174 + // should be UTF-8 encoded. See comments above about /NeedAppearances.
201 QPDF_DLL 175 QPDF_DLL
202 void setV(std::string const& utf8_value, bool need_appearances = true); 176 void setV(std::string const& utf8_value, bool need_appearances = true);
203 177
204 - // Update the appearance stream for this field. Note that qpdf's  
205 - // ability to generate appearance streams is limited. We only  
206 - // generate appearance streams for streams of type text or choice.  
207 - // The appearance uses the default parameters provided in the  
208 - // file, and it only supports ASCII characters. Quadding is  
209 - // currently ignored. While this functionality is limited, it  
210 - // should do a decent job on properly constructed PDF files when  
211 - // field values are restricted to ASCII characters. 178 + // Update the appearance stream for this field. Note that qpdf's ability to generate appearance
  179 + // streams is limited. We only generate appearance streams for streams of type text or choice.
  180 + // The appearance uses the default parameters provided in the file, and it only supports ASCII
  181 + // characters. Quadding is currently ignored. While this functionality is limited, it should do
  182 + // a decent job on properly constructed PDF files when field values are restricted to ASCII
  183 + // characters.
212 QPDF_DLL 184 QPDF_DLL
213 void generateAppearance(QPDFAnnotationObjectHelper&); 185 void generateAppearance(QPDFAnnotationObjectHelper&);
214 186
include/qpdf/QPDFJob.hh
@@ -2,22 +2,19 @@ @@ -2,22 +2,19 @@
2 // 2 //
3 // This file is part of qpdf. 3 // This file is part of qpdf.
4 // 4 //
5 -// Licensed under the Apache License, Version 2.0 (the "License");  
6 -// you may not use this file except in compliance with the License.  
7 -// You may obtain a copy of the License at 5 +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
  6 +// in compliance with the License. You may obtain a copy of the License at
8 // 7 //
9 // http://www.apache.org/licenses/LICENSE-2.0 8 // http://www.apache.org/licenses/LICENSE-2.0
10 // 9 //
11 -// Unless required by applicable law or agreed to in writing, software  
12 -// distributed under the License is distributed on an "AS IS" BASIS,  
13 -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
14 -// See the License for the specific language governing permissions and  
15 -// limitations under the License. 10 +// Unless required by applicable law or agreed to in writing, software distributed under the License
  11 +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
  12 +// or implied. See the License for the specific language governing permissions and limitations under
  13 +// the License.
16 // 14 //
17 -// Versions of qpdf prior to version 7 were released under the terms  
18 -// of version 2.0 of the Artistic License. At your option, you may  
19 -// continue to consider qpdf to be licensed under those terms. Please  
20 -// see the manual for additional information. 15 +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
  16 +// License. At your option, you may continue to consider qpdf to be licensed under those terms.
  17 +// Please see the manual for additional information.
21 18
22 #ifndef QPDFJOB_HH 19 #ifndef QPDFJOB_HH
23 #define QPDFJOB_HH 20 #define QPDFJOB_HH
@@ -55,99 +52,80 @@ class QPDFJob @@ -55,99 +52,80 @@ class QPDFJob
55 static int constexpr EXIT_IS_NOT_ENCRYPTED = qpdf_exit_is_not_encrypted; 52 static int constexpr EXIT_IS_NOT_ENCRYPTED = qpdf_exit_is_not_encrypted;
56 static int constexpr EXIT_CORRECT_PASSWORD = qpdf_exit_correct_password; 53 static int constexpr EXIT_CORRECT_PASSWORD = qpdf_exit_correct_password;
57 54
58 - // QPDFUsage is thrown if there are any usage-like errors when  
59 - // calling Config methods. 55 + // QPDFUsage is thrown if there are any usage-like errors when calling Config methods.
60 QPDF_DLL 56 QPDF_DLL
61 QPDFJob(); 57 QPDFJob();
62 58
63 // SETUP FUNCTIONS 59 // SETUP FUNCTIONS
64 60
65 - // Initialize a QPDFJob object from argv, which must be a  
66 - // null-terminated array of null-terminated UTF-8-encoded C  
67 - // strings. The progname_env argument is the name of an  
68 - // environment variable which, if set, overrides the name of the  
69 - // executable for purposes of generating the --completion options.  
70 - // See QPDFArgParser for details. If a null pointer is passed in,  
71 - // the default value of "QPDF_EXECUTABLE" is used. This is used by  
72 - // the QPDF cli, which just initializes a QPDFJob from argv, calls  
73 - // run(), and handles errors and exit status issues. You can  
74 - // perform much of the cli functionality programmatically in this  
75 - // way rather than using the regular API. This is exposed in the C  
76 - // API, which makes it easier to get certain high-level qpdf  
77 - // functionality from other languages. If there are any  
78 - // command-line errors, this method will throw QPDFUsage which is  
79 - // derived from std::runtime_error. Other exceptions may be thrown  
80 - // in some cases. Note that argc, and argv should be UTF-8  
81 - // encoded. If you are calling this from a Windows Unicode-aware  
82 - // main (wmain), see QUtil::call_main_from_wmain for information  
83 - // about converting arguments to UTF-8. This method will mutate  
84 - // arguments that are passed to it. 61 + // Initialize a QPDFJob object from argv, which must be a null-terminated array of
  62 + // null-terminated UTF-8-encoded C strings. The progname_env argument is the name of an
  63 + // environment variable which, if set, overrides the name of the executable for purposes of
  64 + // generating the --completion options. See QPDFArgParser for details. If a null pointer is
  65 + // passed in, the default value of "QPDF_EXECUTABLE" is used. This is used by the QPDF cli,
  66 + // which just initializes a QPDFJob from argv, calls run(), and handles errors and exit status
  67 + // issues. You can perform much of the cli functionality programmatically in this way rather
  68 + // than using the regular API. This is exposed in the C API, which makes it easier to get
  69 + // certain high-level qpdf functionality from other languages. If there are any command-line
  70 + // errors, this method will throw QPDFUsage which is derived from std::runtime_error. Other
  71 + // exceptions may be thrown in some cases. Note that argc, and argv should be UTF-8 encoded. If
  72 + // you are calling this from a Windows Unicode-aware main (wmain), see
  73 + // QUtil::call_main_from_wmain for information about converting arguments to UTF-8. This method
  74 + // will mutate arguments that are passed to it.
85 QPDF_DLL 75 QPDF_DLL
86 void initializeFromArgv(char const* const argv[], char const* progname_env = nullptr); 76 void initializeFromArgv(char const* const argv[], char const* progname_env = nullptr);
87 77
88 - // Initialize a QPDFJob from json. Passing partial = true prevents  
89 - // this method from doing the final checks (calling  
90 - // checkConfiguration) after processing the json file. This makes  
91 - // it possible to initialize QPDFJob in stages using multiple json  
92 - // files or to have a json file that can be processed from the CLI  
93 - // with --job-json-file and be combined with other arguments. For  
94 - // example, you might include only encryption parameters, leaving  
95 - // it up to the rest of the command-line arguments to provide  
96 - // input and output files. initializeFromJson is called with  
97 - // partial = true when invoked from the command line. To make sure  
98 - // that the json file is fully valid on its own, just don't  
99 - // specify any other command-line flags. If there are any  
100 - // configuration errors, QPDFUsage is thrown. Some error messages  
101 - // may be CLI-centric. If an an exception tells you to use the  
102 - // "--some-option" option, set the "someOption" key in the JSON 78 + // Initialize a QPDFJob from json. Passing partial = true prevents this method from doing the
  79 + // final checks (calling checkConfiguration) after processing the json file. This makes it
  80 + // possible to initialize QPDFJob in stages using multiple json files or to have a json file
  81 + // that can be processed from the CLI with --job-json-file and be combined with other arguments.
  82 + // For example, you might include only encryption parameters, leaving it up to the rest of the
  83 + // command-line arguments to provide input and output files. initializeFromJson is called with
  84 + // partial = true when invoked from the command line. To make sure that the json file is fully
  85 + // valid on its own, just don't specify any other command-line flags. If there are any
  86 + // configuration errors, QPDFUsage is thrown. Some error messages may be CLI-centric. If an
  87 + // exception tells you to use the "--some-option" option, set the "someOption" key in the JSON
103 // object instead. 88 // object instead.
104 QPDF_DLL 89 QPDF_DLL
105 void initializeFromJson(std::string const& json, bool partial = false); 90 void initializeFromJson(std::string const& json, bool partial = false);
106 91
107 - // Set name that is used to prefix verbose messages, progress  
108 - // messages, and other things that the library writes to output  
109 - // and error streams on the caller's behalf. Defaults to "qpdf". 92 + // Set name that is used to prefix verbose messages, progress messages, and other things that
  93 + // the library writes to output and error streams on the caller's behalf. Defaults to "qpdf".
110 QPDF_DLL 94 QPDF_DLL
111 void setMessagePrefix(std::string const&); 95 void setMessagePrefix(std::string const&);
112 QPDF_DLL 96 QPDF_DLL
113 std::string getMessagePrefix() const; 97 std::string getMessagePrefix() const;
114 98
115 - // To capture or redirect output, configure the logger returned by  
116 - // getLogger(). By default, all QPDF and QPDFJob objects share the  
117 - // global logger. If you need a private logger for some reason,  
118 - // pass a new one to setLogger(). See comments in QPDFLogger.hh  
119 - // for details on configuring the logger. 99 + // To capture or redirect output, configure the logger returned by getLogger(). By default, all
  100 + // QPDF and QPDFJob objects share the global logger. If you need a private logger for some
  101 + // reason, pass a new one to setLogger(). See comments in QPDFLogger.hh for details on
  102 + // configuring the logger.
120 // 103 //
121 - // If you set a custom logger here, the logger will be passed to  
122 - // all subsequent QPDF objects created by this QPDFJob object. 104 + // If you set a custom logger here, the logger will be passed to all subsequent QPDF objects
  105 + // created by this QPDFJob object.
123 QPDF_DLL 106 QPDF_DLL
124 std::shared_ptr<QPDFLogger> getLogger(); 107 std::shared_ptr<QPDFLogger> getLogger();
125 QPDF_DLL 108 QPDF_DLL
126 void setLogger(std::shared_ptr<QPDFLogger>); 109 void setLogger(std::shared_ptr<QPDFLogger>);
127 110
128 - // This deprecated method is the old way to capture output, but it  
129 - // didn't capture all output. See comments above for getLogger and  
130 - // setLogger. This will be removed in QPDF 12. For now, it  
131 - // configures a private logger, separating this object from the  
132 - // default logger, and calls setOutputStreams on that logger. See  
133 - // QPDFLogger.hh for additional details. 111 + // This deprecated method is the old way to capture output, but it didn't capture all output.
  112 + // See comments above for getLogger and setLogger. This will be removed in QPDF 12. For now, it
  113 + // configures a private logger, separating this object from the default logger, and calls
  114 + // setOutputStreams on that logger. See QPDFLogger.hh for additional details.
134 [[deprecated("configure logger from getLogger() or call setLogger()")]] QPDF_DLL void 115 [[deprecated("configure logger from getLogger() or call setLogger()")]] QPDF_DLL void
135 setOutputStreams(std::ostream* out_stream, std::ostream* err_stream); 116 setOutputStreams(std::ostream* out_stream, std::ostream* err_stream);
136 117
137 - // You can register a custom progress reporter to be called by  
138 - // QPDFWriter (see QPDFWriter::registerProgressReporter). This is  
139 - // only called if you also request progress reporting through  
140 - // normal configuration methods (e.g., pass --progress, call 118 + // You can register a custom progress reporter to be called by QPDFWriter (see
  119 + // QPDFWriter::registerProgressReporter). This is only called if you also request progress
  120 + // reporting through normal configuration methods (e.g., pass --progress, call
141 // config()->progress, etc.) 121 // config()->progress, etc.)
142 QPDF_DLL 122 QPDF_DLL
143 void registerProgressReporter(std::function<void(int)>); 123 void registerProgressReporter(std::function<void(int)>);
144 124
145 - // Check to make sure no contradictory options have been  
146 - // specified. This is called automatically after initializing from  
147 - // argv or json and is also called by run, but you can call it  
148 - // manually as well. It throws a QPDFUsage exception if there are  
149 - // any errors. This Config object (see CONFIGURATION) also has a  
150 - // checkConfiguration method which calls this one. 125 + // Check to make sure no contradictory options have been specified. This is called automatically
  126 + // after initializing from argv or json and is also called by run, but you can call it manually
  127 + // as well. It throws a QPDFUsage exception if there are any errors. This Config object (see
  128 + // CONFIGURATION) also has a checkConfiguration method which calls this one.
151 QPDF_DLL 129 QPDF_DLL
152 void checkConfiguration(); 130 void checkConfiguration();
153 131
@@ -157,8 +135,7 @@ class QPDFJob @@ -157,8 +135,7 @@ class QPDFJob
157 135
158 // SEE BELOW FOR MORE PUBLIC METHODS AND CLASSES 136 // SEE BELOW FOR MORE PUBLIC METHODS AND CLASSES
159 private: 137 private:
160 - // These structures are private but we need to define them before  
161 - // the public Config classes. 138 + // These structures are private but we need to define them before the public Config classes.
162 struct CopyAttachmentFrom 139 struct CopyAttachmentFrom
163 { 140 {
164 std::string path; 141 std::string path;
@@ -197,33 +174,27 @@ class QPDFJob @@ -197,33 +174,27 @@ class QPDFJob
197 174
198 // Configuration classes are implemented in QPDFJob_config.cc. 175 // Configuration classes are implemented in QPDFJob_config.cc.
199 176
200 - // The config() method returns a shared pointer to a Config  
201 - // object. The Config object contains methods that correspond with  
202 - // qpdf command-line arguments. You can use a fluent interface to  
203 - // configure a QPDFJob object that would do exactly the same thing  
204 - // as a specific qpdf command. The example qpdf-job.cc contains an  
205 - // example of this usage. You can also use initializeFromJson or  
206 - // initializeFromArgv to initialize a QPDFJob object. 177 + // The config() method returns a shared pointer to a Config object. The Config object contains
  178 + // methods that correspond with qpdf command-line arguments. You can use a fluent interface to
  179 + // configure a QPDFJob object that would do exactly the same thing as a specific qpdf command.
  180 + // The example qpdf-job.cc contains an example of this usage. You can also use
  181 + // initializeFromJson or initializeFromArgv to initialize a QPDFJob object.
207 182
208 // Notes about the Config methods: 183 // Notes about the Config methods:
209 // 184 //
210 - // * Most of the method declarations are automatically generated  
211 - // in header files that are included within the class  
212 - // definitions. They correspond in predictable ways to the  
213 - // command-line arguments and are generated from the same code  
214 - // that generates the command-line argument parsing code. 185 + // * Most of the method declarations are automatically generated in header files that are
  186 + // included within the class definitions. They correspond in predictable ways to the
  187 + // command-line arguments and are generated from the same code that generates the command-line
  188 + // argument parsing code.
215 // 189 //
216 - // * Methods return pointers, rather than references, to  
217 - // configuration objects. References might feel more familiar to  
218 - // users of fluent interfaces, so why do we use pointers? The  
219 - // main methods that create them return smart pointers so that  
220 - // users can initialize them when needed, which you can't do  
221 - // with references. Returning pointers instead of references  
222 - // makes for a more uniform interface.  
223 -  
224 - // Maintainer documentation: see the section in README-maintainer  
225 - // called "HOW TO ADD A COMMAND-LINE ARGUMENT", which contains  
226 - // references to additional places in the documentation. 190 + // * Methods return pointers, rather than references, to configuration objects. References
  191 + // might feel more familiar to users of fluent interfaces, so why do we use pointers? The
  192 + // main methods that create them return smart pointers so that users can initialize them when
  193 + // needed, which you can't do with references. Returning pointers instead of references makes
  194 + // for a more uniform interface.
  195 +
  196 + // Maintainer documentation: see the section in README-maintainer called "HOW TO ADD A
  197 + // COMMAND-LINE ARGUMENT", which contains references to additional places in the documentation.
227 198
228 class Config; 199 class Config;
229 200
@@ -374,13 +345,11 @@ class QPDFJob @@ -374,13 +345,11 @@ class QPDFJob
374 QPDFJob& o; 345 QPDFJob& o;
375 }; 346 };
376 347
377 - // Return a top-level configuration item. See CONFIGURATION above  
378 - // for details. If an invalid configuration is created (such as  
379 - // supplying contradictory options, omitting an input file, etc.),  
380 - // QPDFUsage is thrown. Note that error messages are CLI-centric,  
381 - // but you can map them into config calls. For example, if an  
382 - // exception tells you to use the --some-option flag, you should  
383 - // call config()->someOption() instead. 348 + // Return a top-level configuration item. See CONFIGURATION above for details. If an invalid
  349 + // configuration is created (such as supplying contradictory options, omitting an input file,
  350 + // etc.), QPDFUsage is thrown. Note that error messages are CLI-centric, but you can map them
  351 + // into config calls. For example, if an exception tells you to use the --some-option flag, you
  352 + // should call config()->someOption() instead.
384 QPDF_DLL 353 QPDF_DLL
385 std::shared_ptr<Config> config(); 354 std::shared_ptr<Config> config();
386 355
@@ -388,33 +357,27 @@ class QPDFJob @@ -388,33 +357,27 @@ class QPDFJob
388 QPDF_DLL 357 QPDF_DLL
389 void run(); 358 void run();
390 359
391 - // The following two methods allow a job to be run in two stages - creation  
392 - // of a QPDF object and writing of the QPDF object. This allows the QPDF  
393 - // object to be modified prior to writing it out. See  
394 - // examples/qpdfjob-remove-annotations for an illustration of its use. 360 + // The following two methods allow a job to be run in two stages - creation of a QPDF object and
  361 + // writing of the QPDF object. This allows the QPDF object to be modified prior to writing it
  362 + // out. See examples/qpdfjob-remove-annotations for an illustration of its use.
395 363
396 - // Run the first stage of the job. Return a nullptr if the configuration is  
397 - // not valid. 364 + // Run the first stage of the job. Return a nullptr if the configuration is not valid.
398 QPDF_DLL 365 QPDF_DLL
399 std::unique_ptr<QPDF> createQPDF(); 366 std::unique_ptr<QPDF> createQPDF();
400 367
401 - // Run the second stage of the job. Do nothing if a nullptr is passed as  
402 - // parameter. 368 + // Run the second stage of the job. Do nothing if a nullptr is passed as parameter.
403 QPDF_DLL 369 QPDF_DLL
404 void writeQPDF(QPDF& qpdf); 370 void writeQPDF(QPDF& qpdf);
405 371
406 - // CHECK STATUS -- these methods provide information known after  
407 - // run() is called. 372 + // CHECK STATUS -- these methods provide information known after run() is called.
408 373
409 QPDF_DLL 374 QPDF_DLL
410 bool hasWarnings() const; 375 bool hasWarnings() const;
411 376
412 - // Return one of the EXIT_* constants defined at the top of the  
413 - // class declaration. This may be called after run() when run()  
414 - // did not throw an exception. Takes into consideration whether  
415 - // isEncrypted or requiresPassword was called. Note that this  
416 - // function does not know whether run() threw an exception, so  
417 - // code that uses this to determine how to exit should explicitly 377 + // Return one of the EXIT_* constants defined at the top of the class declaration. This may be
  378 + // called after run() when run() did not throw an exception. Takes into consideration whether
  379 + // isEncrypted or requiresPassword was called. Note that this function does not know whether
  380 + // run() threw an exception, so code that uses this to determine how to exit should explicitly
418 // use EXIT_ERROR if run() threw an exception. 381 // use EXIT_ERROR if run() threw an exception.
419 QPDF_DLL 382 QPDF_DLL
420 int getExitCode() const; 383 int getExitCode() const;
@@ -423,24 +386,22 @@ class QPDFJob @@ -423,24 +386,22 @@ class QPDFJob
423 QPDF_DLL 386 QPDF_DLL
424 unsigned long getEncryptionStatus(); 387 unsigned long getEncryptionStatus();
425 388
426 - // HELPER FUNCTIONS -- methods useful for calling in handlers that  
427 - // interact with QPDFJob during run or initialization. 389 + // HELPER FUNCTIONS -- methods useful for calling in handlers that interact with QPDFJob during
  390 + // run or initialization.
428 391
429 - // If in verbose mode, call the given function, passing in the  
430 - // output stream and message prefix. 392 + // If in verbose mode, call the given function, passing in the output stream and message prefix.
431 QPDF_DLL 393 QPDF_DLL
432 void doIfVerbose(std::function<void(Pipeline&, std::string const& prefix)> fn); 394 void doIfVerbose(std::function<void(Pipeline&, std::string const& prefix)> fn);
433 395
434 - // Provide a string that is the help information ("schema" for the  
435 - // qpdf-specific JSON object) for the specified version of JSON  
436 - // output. 396 + // Provide a string that is the help information ("schema" for the qpdf-specific JSON object)
  397 + // for the specified version of JSON output.
437 QPDF_DLL 398 QPDF_DLL
438 static std::string json_out_schema(int version); 399 static std::string json_out_schema(int version);
439 400
440 [[deprecated("use json_out_schema(version)")]] static std::string QPDF_DLL json_out_schema_v1(); 401 [[deprecated("use json_out_schema(version)")]] static std::string QPDF_DLL json_out_schema_v1();
441 402
442 - // Provide a string that is the help information for specified  
443 - // version of JSON format for QPDFJob. 403 + // Provide a string that is the help information for specified version of JSON format for
  404 + // QPDFJob.
444 QPDF_DLL 405 QPDF_DLL
445 static std::string job_json_schema(int version); 406 static std::string job_json_schema(int version);
446 407
include/qpdf/QPDFObjectHandle.hh
@@ -66,9 +66,8 @@ class QPDFObjectHandle @@ -66,9 +66,8 @@ class QPDFObjectHandle
66 friend class QPDFParser; 66 friend class QPDFParser;
67 67
68 public: 68 public:
69 - // This class is used by replaceStreamData. It provides an  
70 - // alternative way of associating stream data with a stream. See  
71 - // comments on replaceStreamData and newStream for additional 69 + // This class is used by replaceStreamData. It provides an alternative way of associating
  70 + // stream data with a stream. See comments on replaceStreamData and newStream for additional
72 // details. 71 // details.
73 class QPDF_DLL_CLASS StreamDataProvider 72 class QPDF_DLL_CLASS StreamDataProvider
74 { 73 {
@@ -78,55 +77,40 @@ class QPDFObjectHandle @@ -78,55 +77,40 @@ class QPDFObjectHandle
78 77
79 QPDF_DLL 78 QPDF_DLL
80 virtual ~StreamDataProvider(); 79 virtual ~StreamDataProvider();
81 - // The implementation of this function must write stream data  
82 - // to the given pipeline. The stream data must conform to  
83 - // whatever filters are explicitly associated with the stream.  
84 - // QPDFWriter may, in some cases, add compression, but if it  
85 - // does, it will update the filters as needed. Every call to  
86 - // provideStreamData for a given stream must write the same  
87 - // data. Note that, when writing linearized files, qpdf will  
88 - // call your provideStreamData twice, and if it generates  
89 - // different output, you risk generating invalid output or  
90 - // having qpdf throw an exception. The object ID and  
91 - // generation passed to this method are those that belong to  
92 - // the stream on behalf of which the provider is called. They  
93 - // may be ignored or used by the implementation for indexing  
94 - // or other purposes. This information is made available just  
95 - // to make it more convenient to use a single  
96 - // StreamDataProvider object to provide data for multiple  
97 - // streams. 80 + // The implementation of this function must write stream data to the given pipeline. The
  81 + // stream data must conform to whatever filters are explicitly associated with the stream.
  82 + // QPDFWriter may, in some cases, add compression, but if it does, it will update the
  83 + // filters as needed. Every call to provideStreamData for a given stream must write the same
  84 + // data. Note that, when writing linearized files, qpdf will call your provideStreamData
  85 + // twice, and if it generates different output, you risk generating invalid output or having
  86 + // qpdf throw an exception. The object ID and generation passed to this method are those
  87 + // that belong to the stream on behalf of which the provider is called. They may be ignored
  88 + // or used by the implementation for indexing or other purposes. This information is made
  89 + // available just to make it more convenient to use a single StreamDataProvider object to
  90 + // provide data for multiple streams.
98 91
99 // A few things to keep in mind: 92 // A few things to keep in mind:
100 // 93 //
101 - // * Stream data providers must not modify any objects since  
102 - // they may be called after some parts of the file have  
103 - // already been written. 94 + // * Stream data providers must not modify any objects since they may be called after some
  95 + // parts of the file have already been written.
104 // 96 //
105 - // * Since qpdf may call provideStreamData multiple times when  
106 - // writing linearized files, if the work done by your stream  
107 - // data provider is slow or computationally intensive, you 97 + // * Since qpdf may call provideStreamData multiple times when writing linearized files, if
  98 + // the work done by your stream data provider is slow or computationally intensive, you
108 // might want to implement your own cache. 99 // might want to implement your own cache.
109 // 100 //
110 - // * Once you have called replaceStreamData, the original  
111 - // stream data is no longer directly accessible from the  
112 - // stream, but this is easy to work around by copying the  
113 - // stream to a separate QPDF object. The qpdf library  
114 - // implements this very efficiently without actually making  
115 - // a copy of the stream data. You can find examples of this  
116 - // pattern in some of the examples, including  
117 - // pdf-custom-filter.cc and pdf-invert-images.cc.  
118 -  
119 - // Prior to qpdf 10.0.0, it was not possible to handle errors  
120 - // the way pipeStreamData does or to pass back success.  
121 - // Starting in qpdf 10.0.0, those capabilities have been added  
122 - // by allowing an alternative provideStreamData to be  
123 - // implemented. You must implement at least one of the  
124 - // versions of provideStreamData below. If you implement the  
125 - // version that supports retry and returns a value, you should  
126 - // pass true as the value of supports_retry in the base class  
127 - // constructor. This will cause the library to call that  
128 - // version of the method, which should also return a boolean  
129 - // indicating whether it ran without errors. 101 + // * Once you have called replaceStreamData, the original stream data is no longer directly
  102 + // accessible from the stream, but this is easy to work around by copying the stream to
  103 + // a separate QPDF object. The qpdf library implements this very efficiently without
  104 + // actually making a copy of the stream data. You can find examples of this pattern in
  105 + // some of the examples, including pdf-custom-filter.cc and pdf-invert-images.cc.
  106 +
  107 + // Prior to qpdf 10.0.0, it was not possible to handle errors the way pipeStreamData does or
  108 + // to pass back success. Starting in qpdf 10.0.0, those capabilities have been added by
  109 + // allowing an alternative provideStreamData to be implemented. You must implement at least
  110 + // one of the versions of provideStreamData below. If you implement the version that
  111 + // supports retry and returns a value, you should pass true as the value of supports_retry
  112 + // in the base class constructor. This will cause the library to call that version of the
  113 + // method, which should also return a boolean indicating whether it ran without errors.
130 QPDF_DLL 114 QPDF_DLL
131 virtual void provideStreamData(QPDFObjGen const& og, Pipeline* pipeline); 115 virtual void provideStreamData(QPDFObjGen const& og, Pipeline* pipeline);
132 QPDF_DLL 116 QPDF_DLL
@@ -142,41 +126,31 @@ class QPDFObjectHandle @@ -142,41 +126,31 @@ class QPDFObjectHandle
142 bool supports_retry; 126 bool supports_retry;
143 }; 127 };
144 128
145 - // The TokenFilter class provides a way to filter content streams  
146 - // in a lexically aware fashion. TokenFilters can be attached to  
147 - // streams using the addTokenFilter or addContentTokenFilter  
148 - // methods or can be applied on the spot by filterPageContents.  
149 - // You may also use Pl_QPDFTokenizer directly if you need full  
150 - // control. 129 + // The TokenFilter class provides a way to filter content streams in a lexically aware fashion.
  130 + // TokenFilters can be attached to streams using the addTokenFilter or addContentTokenFilter
  131 + // methods or can be applied on the spot by filterPageContents. You may also use
  132 + // Pl_QPDFTokenizer directly if you need full control.
151 // 133 //
152 - // The handleToken method is called for each token, including the  
153 - // eof token, and then handleEOF is called at the very end.  
154 - // Handlers may call write (or writeToken) to pass data  
155 - // downstream. Please see examples/pdf-filter-tokens.cc and  
156 - // examples/pdf-count-strings.cc for examples of using  
157 - // TokenFilters. 134 + // The handleToken method is called for each token, including the eof token, and then handleEOF
  135 + // is called at the very end. Handlers may call write (or writeToken) to pass data downstream.
  136 + // Please see examples/pdf-filter-tokens.cc and examples/pdf-count-strings.cc for examples of
  137 + // using TokenFilters.
158 // 138 //
159 - // Please note that when you call token.getValue() on a token of  
160 - // type tt_string or tt_name, you get the canonical, "parsed"  
161 - // representation of the token. For a string, this means that  
162 - // there are no delimiters, and for a name, it means that all  
163 - // escaping (# followed by two hex digits) has been resolved.  
164 - // qpdf's internal representation of a name includes the leading  
165 - // slash. As such, you can't write the value of token.getValue()  
166 - // directly to output that is supposed to be valid PDF syntax. If  
167 - // you want to do that, you need to call writeToken() instead, or  
168 - // you can retrieve the token as it appeared in the input with  
169 - // token.getRawValue(). To construct a new string or name token  
170 - // from a canonical representation, use 139 + // Please note that when you call token.getValue() on a token of type tt_string or tt_name, you
  140 + // get the canonical, "parsed" representation of the token. For a string, this means that there
  141 + // are no delimiters, and for a name, it means that all escaping (# followed by two hex digits)
  142 + // has been resolved. qpdf's internal representation of a name includes the leading slash. As
  143 + // such, you can't write the value of token.getValue() directly to output that is supposed to be
  144 + // valid PDF syntax. If you want to do that, you need to call writeToken() instead, or you can
  145 + // retrieve the token as it appeared in the input with token.getRawValue(). To construct a new
  146 + // string or name token from a canonical representation, use
171 // QPDFTokenizer::Token(QPDFTokenizer::tt_string, "parsed-str") or 147 // QPDFTokenizer::Token(QPDFTokenizer::tt_string, "parsed-str") or
172 // QPDFTokenizer::Token(QPDFTokenizer::tt_name, 148 // QPDFTokenizer::Token(QPDFTokenizer::tt_name,
173 - // "/Canonical-Name"). Tokens created this way won't have a  
174 - // PDF-syntax raw value, but you can still write them with  
175 - // writeToken(). Example: 149 + // "/Canonical-Name"). Tokens created this way won't have a PDF-syntax raw value, but you can
  150 + // still write them with writeToken(). Example:
176 // writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_name, "/text/plain")) 151 // writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_name, "/text/plain"))
177 // would write `/text#2fplain`, and 152 // would write `/text#2fplain`, and
178 - // writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_string, "a\\(b"))  
179 - // would write `(a\(b)`. 153 + // writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_string, "a\\(b")) would write `(a\(b)`.
180 class QPDF_DLL_CLASS TokenFilter 154 class QPDF_DLL_CLASS TokenFilter
181 { 155 {
182 public: 156 public:
@@ -215,8 +189,8 @@ class QPDFObjectHandle @@ -215,8 +189,8 @@ class QPDFObjectHandle
215 Pipeline* pipeline; 189 Pipeline* pipeline;
216 }; 190 };
217 191
218 - // This class is used by parse to decrypt strings when reading an  
219 - // object that contains encrypted strings. 192 + // This class is used by parse to decrypt strings when reading an object that contains encrypted
  193 + // strings.
220 class StringDecrypter 194 class StringDecrypter
221 { 195 {
222 public: 196 public:
@@ -225,9 +199,8 @@ class QPDFObjectHandle @@ -225,9 +199,8 @@ class QPDFObjectHandle
225 virtual void decryptString(std::string& val) = 0; 199 virtual void decryptString(std::string& val) = 0;
226 }; 200 };
227 201
228 - // This class is used by parsePageContents. Callers must  
229 - // instantiate a subclass of this with handlers defined to accept  
230 - // QPDFObjectHandles that are parsed from the stream. 202 + // This class is used by parsePageContents. Callers must instantiate a subclass of this with
  203 + // handlers defined to accept QPDFObjectHandles that are parsed from the stream.
231 class QPDF_DLL_CLASS ParserCallbacks 204 class QPDF_DLL_CLASS ParserCallbacks
232 { 205 {
233 public: 206 public:
@@ -241,17 +214,14 @@ class QPDFObjectHandle @@ -241,17 +214,14 @@ class QPDFObjectHandle
241 214
242 virtual void handleEOF() = 0; 215 virtual void handleEOF() = 0;
243 216
244 - // Override this if you want to know the full size of the  
245 - // contents, possibly after concatenation of multiple streams.  
246 - // This is called before the first call to handleObject. 217 + // Override this if you want to know the full size of the contents, possibly after
  218 + // concatenation of multiple streams. This is called before the first call to handleObject.
247 QPDF_DLL 219 QPDF_DLL
248 virtual void contentSize(size_t); 220 virtual void contentSize(size_t);
249 221
250 protected: 222 protected:
251 - // Implementors may call this method during parsing to  
252 - // terminate parsing early. This method throws an exception  
253 - // that is caught by parsePageContents, so its effect is  
254 - // immediate. 223 + // Implementors may call this method during parsing to terminate parsing early. This method
  224 + // throws an exception that is caught by parsePageContents, so its effect is immediate.
255 QPDF_DLL 225 QPDF_DLL
256 void terminateParsing(); 226 void terminateParsing();
257 }; 227 };
@@ -281,9 +251,8 @@ class QPDFObjectHandle @@ -281,9 +251,8 @@ class QPDFObjectHandle
281 double ury; 251 double ury;
282 }; 252 };
283 253
284 - // Convenience object for transformation matrices. See also  
285 - // QPDFMatrix. Unfortunately we can't replace this with QPDFMatrix  
286 - // because QPDFMatrix's default constructor creates the identity 254 + // Convenience object for transformation matrices. See also QPDFMatrix. Unfortunately we can't
  255 + // replace this with QPDFMatrix because QPDFMatrix's default constructor creates the identity
287 // transform matrix and this one is all zeroes. 256 // transform matrix and this one is all zeroes.
288 class Matrix 257 class Matrix
289 { 258 {
@@ -324,25 +293,22 @@ class QPDFObjectHandle @@ -324,25 +293,22 @@ class QPDFObjectHandle
324 QPDF_DLL 293 QPDF_DLL
325 inline bool isInitialized() const; 294 inline bool isInitialized() const;
326 295
327 - // This method returns true if the QPDFObjectHandle objects point  
328 - // to exactly the same underlying object, meaning that changes to  
329 - // one are reflected in the other, or "if you paint one, the other  
330 - // one changes color." This does not perform a structural  
331 - // comparison of the contents of the objects. 296 + // This method returns true if the QPDFObjectHandle objects point to exactly the same underlying
  297 + // object, meaning that changes to one are reflected in the other, or "if you paint one, the
  298 + // other one changes color." This does not perform a structural comparison of the contents of
  299 + // the objects.
332 QPDF_DLL 300 QPDF_DLL
333 bool isSameObjectAs(QPDFObjectHandle const&) const; 301 bool isSameObjectAs(QPDFObjectHandle const&) const;
334 302
335 - // Return type code and type name of underlying object. These are  
336 - // useful for doing rapid type tests (like switch statements) or  
337 - // for testing and debugging. 303 + // Return type code and type name of underlying object. These are useful for doing rapid type
  304 + // tests (like switch statements) or for testing and debugging.
338 QPDF_DLL 305 QPDF_DLL
339 qpdf_object_type_e getTypeCode(); 306 qpdf_object_type_e getTypeCode();
340 QPDF_DLL 307 QPDF_DLL
341 char const* getTypeName(); 308 char const* getTypeName();
342 309
343 - // Exactly one of these will return true for any initialized  
344 - // object. Operator and InlineImage are only allowed in content  
345 - // streams. 310 + // Exactly one of these will return true for any initialized object. Operator and InlineImage
  311 + // are only allowed in content streams.
346 QPDF_DLL 312 QPDF_DLL
347 bool isBool(); 313 bool isBool();
348 QPDF_DLL 314 QPDF_DLL
@@ -368,26 +334,22 @@ class QPDFObjectHandle @@ -368,26 +334,22 @@ class QPDFObjectHandle
368 QPDF_DLL 334 QPDF_DLL
369 bool isReserved(); 335 bool isReserved();
370 336
371 - // True for objects that are direct nulls. Does not attempt to  
372 - // resolve objects. This is intended for internal use, but it can  
373 - // be used as an efficient way to check for nulls that are not 337 + // True for objects that are direct nulls. Does not attempt to resolve objects. This is intended
  338 + // for internal use, but it can be used as an efficient way to check for nulls that are not
374 // indirect objects. 339 // indirect objects.
375 QPDF_DLL 340 QPDF_DLL
376 bool isDirectNull() const; 341 bool isDirectNull() const;
377 342
378 - // This returns true in addition to the query for the specific  
379 - // type for indirect objects. 343 + // This returns true in addition to the query for the specific type for indirect objects.
380 QPDF_DLL 344 QPDF_DLL
381 inline bool isIndirect() const; 345 inline bool isIndirect() const;
382 346
383 - // This returns true for indirect objects from a QPDF that has  
384 - // been destroyed. Trying unparse such an object will throw a  
385 - // logic_error. 347 + // This returns true for indirect objects from a QPDF that has been destroyed. Trying unparse
  348 + // such an object will throw a logic_error.
386 QPDF_DLL 349 QPDF_DLL
387 bool isDestroyed(); 350 bool isDestroyed();
388 351
389 - // True for everything except array, dictionary, stream, word, and  
390 - // inline image. 352 + // True for everything except array, dictionary, stream, word, and inline image.
391 QPDF_DLL 353 QPDF_DLL
392 bool isScalar(); 354 bool isScalar();
393 355
@@ -395,53 +357,44 @@ class QPDFObjectHandle @@ -395,53 +357,44 @@ class QPDFObjectHandle
395 QPDF_DLL 357 QPDF_DLL
396 bool isNameAndEquals(std::string const& name); 358 bool isNameAndEquals(std::string const& name);
397 359
398 - // True if the object is a dictionary of the specified type and  
399 - // subtype, if any. 360 + // True if the object is a dictionary of the specified type and subtype, if any.
400 QPDF_DLL 361 QPDF_DLL
401 bool isDictionaryOfType(std::string const& type, std::string const& subtype = ""); 362 bool isDictionaryOfType(std::string const& type, std::string const& subtype = "");
402 363
403 - // True if the object is a stream of the specified type and  
404 - // subtype, if any. 364 + // True if the object is a stream of the specified type and subtype, if any.
405 QPDF_DLL 365 QPDF_DLL
406 bool isStreamOfType(std::string const& type, std::string const& subtype = ""); 366 bool isStreamOfType(std::string const& type, std::string const& subtype = "");
407 367
408 // Public factory methods 368 // Public factory methods
409 369
410 - // Wrap an object in an array if it is not already an array. This  
411 - // is a helper for cases in which something in a PDF may either be  
412 - // a single item or an array of items, which is a common idiom. 370 + // Wrap an object in an array if it is not already an array. This is a helper for cases in which
  371 + // something in a PDF may either be a single item or an array of items, which is a common idiom.
413 QPDF_DLL 372 QPDF_DLL
414 QPDFObjectHandle wrapInArray(); 373 QPDFObjectHandle wrapInArray();
415 374
416 - // Construct an object of any type from a string representation of  
417 - // the object. Throws QPDFExc with an empty filename and an  
418 - // offset into the string if there is an error. Any indirect  
419 - // object syntax (obj gen R) will cause a logic_error exception to  
420 - // be thrown. If object_description is provided, it will appear  
421 - // in the message of any QPDFExc exception thrown for invalid  
422 - // syntax. See also the global `operator ""_qpdf` defined below. 375 + // Construct an object of any type from a string representation of the object. Throws QPDFExc
  376 + // with an empty filename and an offset into the string if there is an error. Any indirect
  377 + // object syntax (obj gen R) will cause a logic_error exception to be thrown. If
  378 + // object_description is provided, it will appear in the message of any QPDFExc exception thrown
  379 + // for invalid syntax. See also the global `operator ""_qpdf` defined below.
423 QPDF_DLL 380 QPDF_DLL
424 static QPDFObjectHandle 381 static QPDFObjectHandle
425 parse(std::string const& object_str, std::string const& object_description = ""); 382 parse(std::string const& object_str, std::string const& object_description = "");
426 383
427 - // Construct an object of any type from a string representation of  
428 - // the object. Indirect object syntax (obj gen R) is allowed and  
429 - // will create indirect references within the passed-in context.  
430 - // If object_description is provided, it will appear in the  
431 - // message of any QPDFExc exception thrown for invalid syntax.  
432 - // Note that you can't parse an indirect object reference all by  
433 - // itself as parse will stop at the end of the first complete  
434 - // object, which will just be the first number and will report  
435 - // that there is trailing data at the end of the string. 384 + // Construct an object of any type from a string representation of the object. Indirect object
  385 + // syntax (obj gen R) is allowed and will create indirect references within the passed-in
  386 + // context. If object_description is provided, it will appear in the message of any QPDFExc
  387 + // exception thrown for invalid syntax. Note that you can't parse an indirect object reference
  388 + // all by itself as parse will stop at the end of the first complete object, which will just be
  389 + // the first number and will report that there is trailing data at the end of the string.
436 QPDF_DLL 390 QPDF_DLL
437 static QPDFObjectHandle 391 static QPDFObjectHandle
438 parse(QPDF* context, std::string const& object_str, std::string const& object_description = ""); 392 parse(QPDF* context, std::string const& object_str, std::string const& object_description = "");
439 393
440 - // Construct an object as above by reading from the given  
441 - // InputSource at its current position and using the tokenizer you  
442 - // supply. Indirect objects and encrypted strings are permitted.  
443 - // This method was intended to be called by QPDF for parsing  
444 - // objects that are ready from the object's input stream. 394 + // Construct an object as above by reading from the given InputSource at its current position
  395 + // and using the tokenizer you supply. Indirect objects and encrypted strings are permitted.
  396 + // This method was intended to be called by QPDF for parsing objects that are ready from the
  397 + // object's input stream.
445 QPDF_DLL 398 QPDF_DLL
446 static QPDFObjectHandle parse( 399 static QPDFObjectHandle parse(
447 std::shared_ptr<InputSource> input, 400 std::shared_ptr<InputSource> input,
@@ -451,60 +404,46 @@ class QPDFObjectHandle @@ -451,60 +404,46 @@ class QPDFObjectHandle
451 StringDecrypter* decrypter, 404 StringDecrypter* decrypter,
452 QPDF* context); 405 QPDF* context);
453 406
454 - // Return the offset where the object was found when parsed. A  
455 - // negative value means that the object was created without  
456 - // parsing. If the object is in a stream, the offset is from the  
457 - // beginning of the stream. Otherwise, the offset is from the  
458 - // beginning of the file. 407 + // Return the offset where the object was found when parsed. A negative value means that the
  408 + // object was created without parsing. If the object is in a stream, the offset is from the
  409 + // beginning of the stream. Otherwise, the offset is from the beginning of the file.
459 QPDF_DLL 410 QPDF_DLL
460 qpdf_offset_t getParsedOffset(); 411 qpdf_offset_t getParsedOffset();
461 412
462 - // Older method: stream_or_array should be the value of /Contents  
463 - // from a page object. It's more convenient to just call  
464 - // QPDFPageObjectHelper::parsePageContents on the page object, and  
465 - // error messages will also be more useful because the page object  
466 - // information will be known. 413 + // Older method: stream_or_array should be the value of /Contents from a page object. It's more
  414 + // convenient to just call QPDFPageObjectHelper::parsePageContents on the page object, and error
  415 + // messages will also be more useful because the page object information will be known.
467 QPDF_DLL 416 QPDF_DLL
468 static void parseContentStream(QPDFObjectHandle stream_or_array, ParserCallbacks* callbacks); 417 static void parseContentStream(QPDFObjectHandle stream_or_array, ParserCallbacks* callbacks);
469 418
470 - // When called on a stream or stream array that is some page's  
471 - // content streams, do the same as pipePageContents. This method  
472 - // is a lower level way to do what  
473 - // QPDFPageObjectHelper::pipePageContents does, but it allows you  
474 - // to perform this operation on a contents object that is  
475 - // disconnected from a page object. The description argument  
476 - // should describe the containing page and is used in error  
477 - // messages. The all_description argument is initialized to  
478 - // something that could be used to describe the result of the  
479 - // pipeline. It is the description amended with the identifiers of  
480 - // the underlying objects. Please note that if there is an array  
481 - // of content streams, p->finish() is called after each stream. If  
482 - // you pass a pipeline that doesn't allow write() to be called  
483 - // after finish(), you can wrap it in an instance of  
484 - // Pl_Concatenate and then call manualFinish() on the  
485 - // Pl_Concatenate pipeline at the end. 419 + // When called on a stream or stream array that is some page's content streams, do the same as
  420 + // pipePageContents. This method is a lower level way to do what
  421 + // QPDFPageObjectHelper::pipePageContents does, but it allows you to perform this operation on a
  422 + // contents object that is disconnected from a page object. The description argument should
  423 + // describe the containing page and is used in error messages. The all_description argument is
  424 + // initialized to something that could be used to describe the result of the pipeline. It is the
  425 + // description amended with the identifiers of the underlying objects. Please note that if there
  426 + // is an array of content streams, p->finish() is called after each stream. If you pass a
  427 + // pipeline that doesn't allow write() to be called after finish(), you can wrap it in an
  428 + // instance of Pl_Concatenate and then call manualFinish() on the Pl_Concatenate pipeline at the
  429 + // end.
486 QPDF_DLL 430 QPDF_DLL
487 void 431 void
488 pipeContentStreams(Pipeline* p, std::string const& description, std::string& all_description); 432 pipeContentStreams(Pipeline* p, std::string const& description, std::string& all_description);
489 433
490 - // As of qpdf 8, it is possible to add custom token filters to a  
491 - // stream. The tokenized stream data is passed through the token  
492 - // filter after all original filters but before content stream  
493 - // normalization if requested. This is a low-level interface to  
494 - // add it to a stream. You will usually want to call  
495 - // QPDFPageObjectHelper::addContentTokenFilter instead, which can  
496 - // be applied to a page object, and which will automatically  
497 - // handle the case of pages whose contents are split across  
498 - // multiple streams. 434 + // As of qpdf 8, it is possible to add custom token filters to a stream. The tokenized stream
  435 + // data is passed through the token filter after all original filters but before content stream
  436 + // normalization if requested. This is a low-level interface to add it to a stream. You will
  437 + // usually want to call QPDFPageObjectHelper::addContentTokenFilter instead, which can be
  438 + // applied to a page object, and which will automatically handle the case of pages whose
  439 + // contents are split across multiple streams.
499 QPDF_DLL 440 QPDF_DLL
500 void addTokenFilter(std::shared_ptr<TokenFilter> token_filter); 441 void addTokenFilter(std::shared_ptr<TokenFilter> token_filter);
501 442
502 - // Legacy helpers for parsing content streams. These methods are  
503 - // not going away, but newer code should call the correspond  
504 - // methods in QPDFPageObjectHelper instead. The specification and  
505 - // behavior of these methods are the same as the identically named  
506 - // methods in that class, but newer functionality will be added  
507 - // there. 443 + // Legacy helpers for parsing content streams. These methods are not going away, but newer code
  444 + // should call the correspond methods in QPDFPageObjectHelper instead. The specification and
  445 + // behavior of these methods are the same as the identically named methods in that class, but
  446 + // newer functionality will be added there.
508 QPDF_DLL 447 QPDF_DLL
509 void parsePageContents(ParserCallbacks* callbacks); 448 void parsePageContents(ParserCallbacks* callbacks);
510 QPDF_DLL 449 QPDF_DLL
@@ -516,13 +455,12 @@ class QPDFObjectHandle @@ -516,13 +455,12 @@ class QPDFObjectHandle
516 void addContentTokenFilter(std::shared_ptr<TokenFilter> token_filter); 455 void addContentTokenFilter(std::shared_ptr<TokenFilter> token_filter);
517 // End legacy content stream helpers 456 // End legacy content stream helpers
518 457
519 - // Called on a stream to filter the stream as if it were page  
520 - // contents. This can be used to apply a TokenFilter to a form  
521 - // XObject, whose data is in the same format as a content stream. 458 + // Called on a stream to filter the stream as if it were page contents. This can be used to
  459 + // apply a TokenFilter to a form XObject, whose data is in the same format as a content stream.
522 QPDF_DLL 460 QPDF_DLL
523 void filterAsContents(TokenFilter* filter, Pipeline* next = nullptr); 461 void filterAsContents(TokenFilter* filter, Pipeline* next = nullptr);
524 - // Called on a stream to parse the stream as page contents. This  
525 - // can be used to parse a form XObject. 462 + // Called on a stream to parse the stream as page contents. This can be used to parse a form
  463 + // XObject.
526 QPDF_DLL 464 QPDF_DLL
527 void parseAsContents(ParserCallbacks* callbacks); 465 void parseAsContents(ParserCallbacks* callbacks);
528 466
@@ -538,32 +476,25 @@ class QPDFObjectHandle @@ -538,32 +476,25 @@ class QPDFObjectHandle
538 QPDF_DLL 476 QPDF_DLL
539 static QPDFObjectHandle 477 static QPDFObjectHandle
540 newReal(double value, int decimal_places = 0, bool trim_trailing_zeroes = true); 478 newReal(double value, int decimal_places = 0, bool trim_trailing_zeroes = true);
541 - // Note about name objects: qpdf's internal representation of a  
542 - // PDF name is a sequence of bytes, excluding the NUL character,  
543 - // and starting with a slash. Name objects as represented in the  
544 - // PDF specification can contain characters escaped with #, but  
545 - // such escaping is not of concern when calling QPDFObjectHandle  
546 - // methods not directly relating to parsing. For example,  
547 - // newName("/text/plain").getName() and  
548 - // parse("/text#2fplain").getName() both return "/text/plain",  
549 - // while newName("/text/plain").unparse() and  
550 - // parse("/text#2fplain").unparse() both return "/text#2fplain".  
551 - // When working with the qpdf API for creating, retrieving, and  
552 - // modifying objects, you want to work with the internal,  
553 - // canonical representation. For names containing alphanumeric  
554 - // characters, dashes, and underscores, there is no difference  
555 - // between the two representations. For a lengthy discussion, see 479 + // Note about name objects: qpdf's internal representation of a PDF name is a sequence of bytes,
  480 + // excluding the NUL character, and starting with a slash. Name objects as represented in the
  481 + // PDF specification can contain characters escaped with #, but such escaping is not of concern
  482 + // when calling QPDFObjectHandle methods not directly relating to parsing. For example,
  483 + // newName("/text/plain").getName() and parse("/text#2fplain").getName() both return
  484 + // "/text/plain", while newName("/text/plain").unparse() and parse("/text#2fplain").unparse()
  485 + // both return "/text#2fplain". When working with the qpdf API for creating, retrieving, and
  486 + // modifying objects, you want to work with the internal, canonical representation. For names
  487 + // containing alphanumeric characters, dashes, and underscores, there is no difference between
  488 + // the two representations. For a lengthy discussion, see
556 // https://github.com/qpdf/qpdf/discussions/625. 489 // https://github.com/qpdf/qpdf/discussions/625.
557 QPDF_DLL 490 QPDF_DLL
558 static QPDFObjectHandle newName(std::string const& name); 491 static QPDFObjectHandle newName(std::string const& name);
559 QPDF_DLL 492 QPDF_DLL
560 static QPDFObjectHandle newString(std::string const& str); 493 static QPDFObjectHandle newString(std::string const& str);
561 - // Create a string encoded from the given utf8-encoded string  
562 - // appropriately encoded to appear in PDF files outside of content  
563 - // streams, such as in document metadata form field values, page  
564 - // labels, outlines, and similar locations. We try ASCII first,  
565 - // then PDFDocEncoding, then UTF-16 as needed to successfully  
566 - // encode all the characters. 494 + // Create a string encoded from the given utf8-encoded string appropriately encoded to appear in
  495 + // PDF files outside of content streams, such as in document metadata form field values, page
  496 + // labels, outlines, and similar locations. We try ASCII first, then PDFDocEncoding, then UTF-16
  497 + // as needed to successfully encode all the characters.
567 QPDF_DLL 498 QPDF_DLL
568 static QPDFObjectHandle newUnicodeString(std::string const& utf8_str); 499 static QPDFObjectHandle newUnicodeString(std::string const& utf8_str);
569 QPDF_DLL 500 QPDF_DLL
@@ -585,86 +516,67 @@ class QPDFObjectHandle @@ -585,86 +516,67 @@ class QPDFObjectHandle
585 QPDF_DLL 516 QPDF_DLL
586 static QPDFObjectHandle newDictionary(std::map<std::string, QPDFObjectHandle> const& items); 517 static QPDFObjectHandle newDictionary(std::map<std::string, QPDFObjectHandle> const& items);
587 518
588 - // Create an array from a rectangle. Equivalent to the rectangle  
589 - // form of newArray. 519 + // Create an array from a rectangle. Equivalent to the rectangle form of newArray.
590 QPDF_DLL 520 QPDF_DLL
591 static QPDFObjectHandle newFromRectangle(Rectangle const&); 521 static QPDFObjectHandle newFromRectangle(Rectangle const&);
592 - // Create an array from a matrix. Equivalent to the matrix  
593 - // form of newArray. 522 + // Create an array from a matrix. Equivalent to the matrix form of newArray.
594 QPDF_DLL 523 QPDF_DLL
595 static QPDFObjectHandle newFromMatrix(Matrix const&); 524 static QPDFObjectHandle newFromMatrix(Matrix const&);
596 QPDF_DLL 525 QPDF_DLL
597 static QPDFObjectHandle newFromMatrix(QPDFMatrix const&); 526 static QPDFObjectHandle newFromMatrix(QPDFMatrix const&);
598 527
599 - // Note: new stream creation methods have were added to the QPDF  
600 - // class starting with version 11.2.0. The ones in this class are  
601 - // here for backward compatibility. 528 + // Note: new stream creation methods have were added to the QPDF class starting with
  529 + // version 11.2.0. The ones in this class are here for backward compatibility.
602 530
603 - // Create a new stream and associate it with the given qpdf  
604 - // object. A subsequent call must be made to replaceStreamData()  
605 - // to provide data for the stream. The stream's dictionary may be  
606 - // retrieved by calling getDict(), and the resulting dictionary  
607 - // may be modified. Alternatively, you can create a new dictionary  
608 - // and call replaceDict to install it. From QPDF 11.2, you can 531 + // Create a new stream and associate it with the given qpdf object. A subsequent call must be
  532 + // made to replaceStreamData() to provide data for the stream. The stream's dictionary may be
  533 + // retrieved by calling getDict(), and the resulting dictionary may be modified. Alternatively,
  534 + // you can create a new dictionary and call replaceDict to install it. From QPDF 11.2, you can
609 // call QPDF::newStream() instead. 535 // call QPDF::newStream() instead.
610 QPDF_DLL 536 QPDF_DLL
611 static QPDFObjectHandle newStream(QPDF* qpdf); 537 static QPDFObjectHandle newStream(QPDF* qpdf);
612 538
613 - // Create a new stream and associate it with the given qpdf  
614 - // object. Use the given buffer as the stream data. The stream  
615 - // dictionary's /Length key will automatically be set to the size  
616 - // of the data buffer. If additional keys are required, the  
617 - // stream's dictionary may be retrieved by calling getDict(), and  
618 - // the resulting dictionary may be modified. This method is just a  
619 - // convenient wrapper around the newStream() and  
620 - // replaceStreamData(). It is a convenience methods for streams  
621 - // that require no parameters beyond the stream length. Note that  
622 - // you don't have to deal with compression yourself if you use  
623 - // QPDFWriter. By default, QPDFWriter will automatically compress  
624 - // uncompressed stream data. Example programs are provided that 539 + // Create a new stream and associate it with the given qpdf object. Use the given buffer as the
  540 + // stream data. The stream dictionary's /Length key will automatically be set to the size of the
  541 + // data buffer. If additional keys are required, the stream's dictionary may be retrieved by
  542 + // calling getDict(), and the resulting dictionary may be modified. This method is just a
  543 + // convenient wrapper around the newStream() and replaceStreamData(). It is a convenience
  544 + // methods for streams that require no parameters beyond the stream length. Note that you don't
  545 + // have to deal with compression yourself if you use QPDFWriter. By default, QPDFWriter will
  546 + // automatically compress uncompressed stream data. Example programs are provided that
625 // illustrate this. From QPDF 11.2, you can call QPDF::newStream() 547 // illustrate this. From QPDF 11.2, you can call QPDF::newStream()
626 // instead. 548 // instead.
627 QPDF_DLL 549 QPDF_DLL
628 static QPDFObjectHandle newStream(QPDF* qpdf, std::shared_ptr<Buffer> data); 550 static QPDFObjectHandle newStream(QPDF* qpdf, std::shared_ptr<Buffer> data);
629 551
630 - // Create new stream with data from string. This method will  
631 - // create a copy of the data rather than using the user-provided  
632 - // buffer as in the std::shared_ptr<Buffer> version of newStream. 552 + // Create new stream with data from string. This method will create a copy of the data rather
  553 + // than using the user-provided buffer as in the std::shared_ptr<Buffer> version of newStream.
633 // From QPDF 11.2, you can call QPDF::newStream() instead. 554 // From QPDF 11.2, you can call QPDF::newStream() instead.
634 QPDF_DLL 555 QPDF_DLL
635 static QPDFObjectHandle newStream(QPDF* qpdf, std::string const& data); 556 static QPDFObjectHandle newStream(QPDF* qpdf, std::string const& data);
636 557
637 - // A reserved object is a special sentinel used for qpdf to  
638 - // reserve a spot for an object that is going to be added to the  
639 - // QPDF object. Normally you don't have to use this type since  
640 - // you can just call QPDF::makeIndirectObject. However, in some  
641 - // cases, if you have to create objects with circular references,  
642 - // you may need to create a reserved object so that you can have a  
643 - // reference to it and then replace the object later. Reserved  
644 - // objects have the special property that they can't be resolved  
645 - // to direct objects. This makes it possible to replace a  
646 - // reserved object with a new object while preserving existing  
647 - // references to them. When you are ready to replace a reserved  
648 - // object with its replacement, use QPDF::replaceReserved for this  
649 - // purpose rather than the more general QPDF::replaceObject. It  
650 - // is an error to try to write a QPDF with QPDFWriter if it has  
651 - // any reserved objects in it. From QPDF 11.4, you can  
652 - // call QPDF::newReserved() instead. 558 + // A reserved object is a special sentinel used for qpdf to reserve a spot for an object that is
  559 + // going to be added to the QPDF object. Normally you don't have to use this type since you can
  560 + // just call QPDF::makeIndirectObject. However, in some cases, if you have to create objects
  561 + // with circular references, you may need to create a reserved object so that you can have a
  562 + // reference to it and then replace the object later. Reserved objects have the special
  563 + // property that they can't be resolved to direct objects. This makes it possible to replace a
  564 + // reserved object with a new object while preserving existing references to them. When you are
  565 + // ready to replace a reserved object with its replacement, use QPDF::replaceReserved for this
  566 + // purpose rather than the more general QPDF::replaceObject. It is an error to try to write a
  567 + // QPDF with QPDFWriter if it has any reserved objects in it. From QPDF 11.4, you can call
  568 + // QPDF::newReserved() instead.
653 QPDF_DLL 569 QPDF_DLL
654 static QPDFObjectHandle newReserved(QPDF* qpdf); 570 static QPDFObjectHandle newReserved(QPDF* qpdf);
655 571
656 - // Provide an owning qpdf and object description. The library does  
657 - // this automatically with objects that are read from the input  
658 - // PDF and with objects that are created programmatically and  
659 - // inserted into the QPDF as a new indirect object. Most end user  
660 - // code will not need to call this. If an object has an owning  
661 - // qpdf and object description, it enables qpdf to give warnings  
662 - // with proper context in some cases where it would otherwise  
663 - // raise exceptions. It is okay to add objects without an  
664 - // owning_qpdf to objects that have one, but it is an error to  
665 - // have a QPDF contain objects with owning_qpdf set to something  
666 - // else. To add objects from another qpdf, use copyForeignObject  
667 - // instead. 572 + // Provide an owning qpdf and object description. The library does this automatically with
  573 + // objects that are read from the input PDF and with objects that are created programmatically
  574 + // and inserted into the QPDF as a new indirect object. Most end user code will not need to call
  575 + // this. If an object has an owning qpdf and object description, it enables qpdf to give
  576 + // warnings with proper context in some cases where it would otherwise raise exceptions. It is
  577 + // okay to add objects without an owning_qpdf to objects that have one, but it is an error to
  578 + // have a QPDF contain objects with owning_qpdf set to something else. To add objects from
  579 + // another qpdf, use copyForeignObject instead.
668 QPDF_DLL 580 QPDF_DLL
669 void setObjectDescription(QPDF* owning_qpdf, std::string const& object_description); 581 void setObjectDescription(QPDF* owning_qpdf, std::string const& object_description);
670 QPDF_DLL 582 QPDF_DLL
@@ -674,62 +586,47 @@ class QPDFObjectHandle @@ -674,62 +586,47 @@ class QPDFObjectHandle
674 // 586 //
675 // (Note: this comment is referenced in qpdf-c.h and the manual.) 587 // (Note: this comment is referenced in qpdf-c.h and the manual.)
676 // 588 //
677 - // In PDF files, objects have specific types, but there is nothing  
678 - // that prevents PDF files from containing objects of types that  
679 - // aren't expected by the specification. 589 + // In PDF files, objects have specific types, but there is nothing that prevents PDF files from
  590 + // containing objects of types that aren't expected by the specification.
680 // 591 //
681 // There are two flavors of accessor methods: 592 // There are two flavors of accessor methods:
682 // 593 //
683 - // * getSomethingValue() returns the value and issues a type  
684 - // warning if the type is incorrect. 594 + // * getSomethingValue() returns the value and issues a type warning if the type is incorrect.
685 // 595 //
686 - // * getValueAsSomething() returns false if the value is the wrong  
687 - // type. Otherwise, it returns true and initializes a reference  
688 - // of the appropriate type. These methods never issue type 596 + // * getValueAsSomething() returns false if the value is the wrong type. Otherwise, it returns
  597 + // true and initializes a reference of the appropriate type. These methods never issue type
689 // warnings. 598 // warnings.
690 // 599 //
691 - // The getSomethingValue() accessors and some of the other methods  
692 - // expect objects of a particular type. Prior to qpdf 8, calling  
693 - // an accessor on a method of the wrong type, such as trying to  
694 - // get a dictionary key from an array, trying to get the string  
695 - // value of a number, etc., would throw an exception, but since  
696 - // qpdf 8, qpdf issues a warning and recovers using the following  
697 - // behavior: 600 + // The getSomethingValue() accessors and some of the other methods expect objects of a
  601 + // particular type. Prior to qpdf 8, calling an accessor on a method of the wrong type, such as
  602 + // trying to get a dictionary key from an array, trying to get the string value of a number,
  603 + // etc., would throw an exception, but since qpdf 8, qpdf issues a warning and recovers using
  604 + // the following behavior:
698 // 605 //
699 - // * Requesting a value of the wrong type (int value from string,  
700 - // array item from a scalar or dictionary, etc.) will return a  
701 - // zero-like value for that type: false for boolean, 0 for  
702 - // number, the empty string for string, or the null object for  
703 - // an object handle. 606 + // * Requesting a value of the wrong type (int value from string, array item from a scalar or
  607 + // dictionary, etc.) will return a zero-like value for that type: false for boolean, 0 for
  608 + // number, the empty string for string, or the null object for an object handle.
704 // 609 //
705 - // * Accessing an array item that is out of bounds will return a  
706 - // null object. 610 + // * Accessing an array item that is out of bounds will return a null object.
707 // 611 //
708 - // * Attempts to mutate an object of the wrong type (e.g.,  
709 - // attempting to add a dictionary key to a scalar or array) will  
710 - // be ignored. 612 + // * Attempts to mutate an object of the wrong type (e.g., attempting to add a dictionary key to
  613 + // a scalar or array) will be ignored.
711 // 614 //
712 - // When any of these fallback behaviors are used, qpdf issues a  
713 - // warning. Starting in qpdf 10.5, these warnings have the error  
714 - // code qpdf_e_object. Prior to 10.5, they had the error code  
715 - // qpdf_e_damaged_pdf. If the QPDFObjectHandle is associated with  
716 - // a QPDF object (as is the case for all objects whose origin was  
717 - // a PDF file), the warning is issued using the normal warning  
718 - // mechanism (as described in QPDF.hh), making it possible to  
719 - // suppress or otherwise detect them. If the QPDFObjectHandle is  
720 - // not associated with a QPDF object (meaning it was created 615 + // When any of these fallback behaviors are used, qpdf issues a warning. Starting in qpdf 10.5,
  616 + // these warnings have the error code qpdf_e_object. Prior to 10.5, they had the error code
  617 + // qpdf_e_damaged_pdf. If the QPDFObjectHandle is associated with a QPDF object (as is the case
  618 + // for all objects whose origin was a PDF file), the warning is issued using the normal warning
  619 + // mechanism (as described in QPDF.hh), making it possible to suppress or otherwise detect them.
  620 + // If the QPDFObjectHandle is not associated with a QPDF object (meaning it was created
721 // programmatically), an exception will be thrown. 621 // programmatically), an exception will be thrown.
722 // 622 //
723 - // The way to avoid getting any type warnings or exceptions, even  
724 - // when working with malformed PDF files, is to always check the  
725 - // type of a QPDFObjectHandle before accessing it (for example,  
726 - // make sure that isString() returns true before calling  
727 - // getStringValue()) and to always be sure that any array indices  
728 - // are in bounds. 623 + // The way to avoid getting any type warnings or exceptions, even when working with malformed
  624 + // PDF files, is to always check the type of a QPDFObjectHandle before accessing it (for
  625 + // example, make sure that isString() returns true before calling getStringValue()) and to
  626 + // always be sure that any array indices are in bounds.
729 // 627 //
730 - // For additional discussion and rationale for this behavior, see  
731 - // the section in the QPDF manual entitled "Object Accessor  
732 - // Methods". 628 + // For additional discussion and rationale for this behavior, see the section in the QPDF manual
  629 + // entitled "Object Accessor Methods".
733 630
734 // Methods for bool objects 631 // Methods for bool objects
735 QPDF_DLL 632 QPDF_DLL
@@ -737,12 +634,10 @@ class QPDFObjectHandle @@ -737,12 +634,10 @@ class QPDFObjectHandle
737 QPDF_DLL 634 QPDF_DLL
738 bool getValueAsBool(bool&); 635 bool getValueAsBool(bool&);
739 636
740 - // Methods for integer objects. Note: if an integer value is too  
741 - // big (too far away from zero in either direction) to fit in the  
742 - // requested return type, the maximum or minimum value for that  
743 - // return type may be returned. For example, on a system with  
744 - // 32-bit int, a numeric object with a value of 2^40 (or anything  
745 - // too big for 32 bits) will be returned as INT_MAX. 637 + // Methods for integer objects. Note: if an integer value is too big (too far away from zero in
  638 + // either direction) to fit in the requested return type, the maximum or minimum value for that
  639 + // return type may be returned. For example, on a system with 32-bit int, a numeric object with
  640 + // a value of 2^40 (or anything too big for 32 bits) will be returned as INT_MAX.
746 QPDF_DLL 641 QPDF_DLL
747 long long getIntValue(); 642 long long getIntValue();
748 QPDF_DLL 643 QPDF_DLL
@@ -774,9 +669,8 @@ class QPDFObjectHandle @@ -774,9 +669,8 @@ class QPDFObjectHandle
774 QPDF_DLL 669 QPDF_DLL
775 bool getValueAsNumber(double&); 670 bool getValueAsNumber(double&);
776 671
777 - // Methods for name objects. The returned name value is in qpdf's  
778 - // canonical form with all escaping resolved. See comments for  
779 - // newName() for details. 672 + // Methods for name objects. The returned name value is in qpdf's canonical form with all
  673 + // escaping resolved. See comments for newName() for details.
780 QPDF_DLL 674 QPDF_DLL
781 std::string getName(); 675 std::string getName();
782 QPDF_DLL 676 QPDF_DLL
@@ -788,12 +682,10 @@ class QPDFObjectHandle @@ -788,12 +682,10 @@ class QPDFObjectHandle
788 QPDF_DLL 682 QPDF_DLL
789 bool getValueAsString(std::string&); 683 bool getValueAsString(std::string&);
790 684
791 - // If a string starts with the UTF-16 marker, it is converted from  
792 - // UTF-16 to UTF-8. Otherwise, it is treated as a string encoded  
793 - // with PDF Doc Encoding. PDF Doc Encoding is identical to  
794 - // ISO-8859-1 except in the range from 0200 through 0240, where  
795 - // there is a mapping of characters to Unicode. QPDF versions  
796 - // prior to version 8.0.0 erroneously left characters in that range 685 + // If a string starts with the UTF-16 marker, it is converted from UTF-16 to UTF-8. Otherwise,
  686 + // it is treated as a string encoded with PDF Doc Encoding. PDF Doc Encoding is identical to
  687 + // ISO-8859-1 except in the range from 0200 through 0240, where there is a mapping of characters
  688 + // to Unicode. QPDF versions prior to version 8.0.0 erroneously left characters in that range
797 // unmapped. 689 // unmapped.
798 QPDF_DLL 690 QPDF_DLL
799 std::string getUTF8Value(); 691 std::string getUTF8Value();
@@ -812,8 +704,7 @@ class QPDFObjectHandle @@ -812,8 +704,7 @@ class QPDFObjectHandle
812 704
813 // Methods for array objects; see also name and array objects. 705 // Methods for array objects; see also name and array objects.
814 706
815 - // Return an object that enables iteration over members. You can  
816 - // do 707 + // Return an object that enables iteration over members. You can do
817 // 708 //
818 // for (auto iter: obj.aitems()) 709 // for (auto iter: obj.aitems())
819 // { 710 // {
@@ -827,32 +718,29 @@ class QPDFObjectHandle @@ -827,32 +718,29 @@ class QPDFObjectHandle
827 int getArrayNItems(); 718 int getArrayNItems();
828 QPDF_DLL 719 QPDF_DLL
829 QPDFObjectHandle getArrayItem(int n); 720 QPDFObjectHandle getArrayItem(int n);
830 - // Note: QPDF arrays internally optimize memory for arrays  
831 - // containing lots of nulls. Calling getArrayAsVector may cause a  
832 - // lot of memory to be allocated for very large arrays with lots  
833 - // of nulls. 721 + // Note: QPDF arrays internally optimize memory for arrays containing lots of nulls. Calling
  722 + // getArrayAsVector may cause a lot of memory to be allocated for very large arrays with lots of
  723 + // nulls.
834 QPDF_DLL 724 QPDF_DLL
835 std::vector<QPDFObjectHandle> getArrayAsVector(); 725 std::vector<QPDFObjectHandle> getArrayAsVector();
836 QPDF_DLL 726 QPDF_DLL
837 bool isRectangle(); 727 bool isRectangle();
838 - // If the array is an array of four numeric values, return as a  
839 - // rectangle. Otherwise, return the rectangle [0, 0, 0, 0] 728 + // If the array is an array of four numeric values, return as a rectangle. Otherwise, return the
  729 + // rectangle [0, 0, 0, 0]
840 QPDF_DLL 730 QPDF_DLL
841 Rectangle getArrayAsRectangle(); 731 Rectangle getArrayAsRectangle();
842 QPDF_DLL 732 QPDF_DLL
843 bool isMatrix(); 733 bool isMatrix();
844 - // If the array is an array of six numeric values, return as a  
845 - // matrix. Otherwise, return the matrix [1, 0, 0, 1, 0, 0] 734 + // If the array is an array of six numeric values, return as a matrix. Otherwise, return the
  735 + // matrix [1, 0, 0, 1, 0, 0]
846 QPDF_DLL 736 QPDF_DLL
847 Matrix getArrayAsMatrix(); 737 Matrix getArrayAsMatrix();
848 738
849 - // Methods for dictionary objects. In all dictionary methods, keys  
850 - // are specified/represented as canonical name strings starting  
851 - // with a leading slash and not containing any PDF syntax 739 + // Methods for dictionary objects. In all dictionary methods, keys are specified/represented as
  740 + // canonical name strings starting with a leading slash and not containing any PDF syntax
852 // escaping. See comments for getName() for details. 741 // escaping. See comments for getName() for details.
853 742
854 - // Return an object that enables iteration over members. You can  
855 - // do 743 + // Return an object that enables iteration over members. You can do
856 // 744 //
857 // for (auto iter: obj.ditems()) 745 // for (auto iter: obj.ditems())
858 // { 746 // {
@@ -863,185 +751,149 @@ class QPDFObjectHandle @@ -863,185 +751,149 @@ class QPDFObjectHandle
863 QPDF_DLL 751 QPDF_DLL
864 QPDFDictItems ditems(); 752 QPDFDictItems ditems();
865 753
866 - // Return true if key is present. Keys with null values are treated as if  
867 - // they are not present. This is as per the PDF spec. 754 + // Return true if key is present. Keys with null values are treated as if they are not present.
  755 + // This is as per the PDF spec.
868 QPDF_DLL 756 QPDF_DLL
869 bool hasKey(std::string const&); 757 bool hasKey(std::string const&);
870 - // Return the value for the key. If the key is not present, null is  
871 - // returned. 758 + // Return the value for the key. If the key is not present, null is returned.
872 QPDF_DLL 759 QPDF_DLL
873 QPDFObjectHandle getKey(std::string const&); 760 QPDFObjectHandle getKey(std::string const&);
874 - // If the object is null, return null. Otherwise, call getKey().  
875 - // This makes it easier to access lower-level dictionaries, as in 761 + // If the object is null, return null. Otherwise, call getKey(). This makes it easier to access
  762 + // lower-level dictionaries, as in
876 // auto font = page.getKeyIfDict("/Resources").getKeyIfDict("/Font"); 763 // auto font = page.getKeyIfDict("/Resources").getKeyIfDict("/Font");
877 QPDF_DLL 764 QPDF_DLL
878 QPDFObjectHandle getKeyIfDict(std::string const&); 765 QPDFObjectHandle getKeyIfDict(std::string const&);
879 - // Return all keys. Keys with null values are treated as if  
880 - // they are not present. This is as per the PDF spec. 766 + // Return all keys. Keys with null values are treated as if they are not present. This is as
  767 + // per the PDF spec.
881 QPDF_DLL 768 QPDF_DLL
882 std::set<std::string> getKeys(); 769 std::set<std::string> getKeys();
883 // Return dictionary as a map. Entries with null values are included. 770 // Return dictionary as a map. Entries with null values are included.
884 QPDF_DLL 771 QPDF_DLL
885 std::map<std::string, QPDFObjectHandle> getDictAsMap(); 772 std::map<std::string, QPDFObjectHandle> getDictAsMap();
886 773
887 - // Methods for name and array objects. The name value is in qpdf's  
888 - // canonical form with all escaping resolved. See comments for  
889 - // newName() for details. 774 + // Methods for name and array objects. The name value is in qpdf's canonical form with all
  775 + // escaping resolved. See comments for newName() for details.
890 QPDF_DLL 776 QPDF_DLL
891 bool isOrHasName(std::string const&); 777 bool isOrHasName(std::string const&);
892 778
893 - // Make all resources in a resource dictionary indirect. This just  
894 - // goes through all entries of top-level subdictionaries and  
895 - // converts any direct objects to indirect objects. This can be  
896 - // useful to call before mergeResources if it is going to be  
897 - // called multiple times to prevent resources from being copied  
898 - // multiple times. 779 + // Make all resources in a resource dictionary indirect. This just goes through all entries of
  780 + // top-level subdictionaries and converts any direct objects to indirect objects. This can be
  781 + // useful to call before mergeResources if it is going to be called multiple times to prevent
  782 + // resources from being copied multiple times.
899 QPDF_DLL 783 QPDF_DLL
900 void makeResourcesIndirect(QPDF& owning_qpdf); 784 void makeResourcesIndirect(QPDF& owning_qpdf);
901 785
902 - // Merge resource dictionaries. If the "conflicts" parameter is  
903 - // provided, conflicts in dictionary subitems are resolved, and  
904 - // "conflicts" is initialized to a map such that 786 + // Merge resource dictionaries. If the "conflicts" parameter is provided, conflicts in
  787 + // dictionary subitems are resolved, and "conflicts" is initialized to a map such that
905 // conflicts[resource_type][old_key] == [new_key] 788 // conflicts[resource_type][old_key] == [new_key]
906 // 789 //
907 - // See also makeResourcesIndirect, which can be useful to call  
908 - // before calling this. 790 + // See also makeResourcesIndirect, which can be useful to call before calling this.
909 // 791 //
910 - // This method does nothing if both this object and the other  
911 - // object are not dictionaries. Otherwise, it has following  
912 - // behavior, where "object" refers to the object whose method is 792 + // This method does nothing if both this object and the other object are not dictionaries.
  793 + // Otherwise, it has following behavior, where "object" refers to the object whose method is
913 // invoked, and "other" refers to the argument: 794 // invoked, and "other" refers to the argument:
914 // 795 //
915 // * For each key in "other" whose value is an array: 796 // * For each key in "other" whose value is an array:
916 // * If "object" does not have that entry, shallow copy it. 797 // * If "object" does not have that entry, shallow copy it.
917 - // * Otherwise, if "object" has an array in the same place,  
918 - // append to that array any objects in "other"'s array that  
919 - // are not already present. 798 + // * Otherwise, if "object" has an array in the same place, append to that array any objects
  799 + // in "other"'s array that are not already present.
920 // * For each key in "other" whose value is a dictionary: 800 // * For each key in "other" whose value is a dictionary:
921 // * If "object" does not have that entry, shallow copy it. 801 // * If "object" does not have that entry, shallow copy it.
922 // * Otherwise, for each key in the subdictionary: 802 // * Otherwise, for each key in the subdictionary:
923 - // * If key is not present in "object"'s entry, shallow copy  
924 - // it if direct or just add it if indirect. 803 + // * If key is not present in "object"'s entry, shallow copy it if direct or just add it if
  804 + // indirect.
925 // * Otherwise, if conflicts are being detected: 805 // * Otherwise, if conflicts are being detected:
926 - // * If there is a key (oldkey) already in the dictionary  
927 - // that points to the same indirect destination as key,  
928 - // indicate that key was replaced by oldkey. This would  
929 - // happen if these two resource dictionaries have  
930 - // previously been merged.  
931 - // * Otherwise pick a new key (newkey) that is unique within  
932 - // the resource dictionary, store that in the resource  
933 - // dictionary with key's destination as its destination,  
934 - // and indicate that key was replaced by newkey. 806 + // * If there is a key (oldkey) already in the dictionary that points to the same indirect
  807 + // destination as key, indicate that key was replaced by oldkey. This would happen if
  808 + // these two resource dictionaries have previously been merged.
  809 + // * Otherwise pick a new key (newkey) that is unique within the resource dictionary,
  810 + // store that in the resource dictionary with key's destination as its destination, and
  811 + // indicate that key was replaced by newkey.
935 // 812 //
936 - // The primary purpose of this method is to facilitate merging of  
937 - // resource dictionaries that are supposed to have the same scope  
938 - // as each other. For example, this can be used to merge a form  
939 - // XObject's /Resources dictionary with a form field's /DR or to  
940 - // merge two /DR dictionaries. The "conflicts" parameter may be  
941 - // previously initialized. This method adds to whatever is already 813 + // The primary purpose of this method is to facilitate merging of resource dictionaries that are
  814 + // supposed to have the same scope as each other. For example, this can be used to merge a form
  815 + // XObject's /Resources dictionary with a form field's /DR or to merge two /DR dictionaries. The
  816 + // "conflicts" parameter may be previously initialized. This method adds to whatever is already
942 // there, which can be useful when merging with multiple things. 817 // there, which can be useful when merging with multiple things.
943 QPDF_DLL 818 QPDF_DLL
944 void mergeResources( 819 void mergeResources(
945 QPDFObjectHandle other, 820 QPDFObjectHandle other,
946 std::map<std::string, std::map<std::string, std::string>>* conflicts = nullptr); 821 std::map<std::string, std::map<std::string, std::string>>* conflicts = nullptr);
947 822
948 - // Get all resource names from a resource dictionary. If this  
949 - // object is a dictionary, this method returns a set of all the  
950 - // keys in all top-level subdictionaries. For resources  
951 - // dictionaries, this is the collection of names that may be  
952 - // referenced in the content stream. 823 + // Get all resource names from a resource dictionary. If this object is a dictionary, this
  824 + // method returns a set of all the keys in all top-level subdictionaries. For resources
  825 + // dictionaries, this is the collection of names that may be referenced in the content stream.
953 QPDF_DLL 826 QPDF_DLL
954 std::set<std::string> getResourceNames(); 827 std::set<std::string> getResourceNames();
955 828
956 - // Find a unique name within a resource dictionary starting with a  
957 - // given prefix. This method works by appending a number to the  
958 - // given prefix. It searches starting with min_suffix and sets  
959 - // min_suffix to selected value upon return. This can be used to  
960 - // increase efficiency if adding multiple items with the same  
961 - // prefix. (Why doesn't it set min_suffix to the next number?  
962 - // Well, maybe you aren't going to actually use the name it  
963 - // returns.) If you are calling this multiple times on the same  
964 - // resource dictionary, you can initialize resource_names by  
965 - // calling getResourceNames(), incrementally update it as you add  
966 - // resources, and keep passing it in so that getUniqueResourceName  
967 - // doesn't have to traverse the resource dictionary each time it's  
968 - // called. 829 + // Find a unique name within a resource dictionary starting with a given prefix. This method
  830 + // works by appending a number to the given prefix. It searches starting with min_suffix and
  831 + // sets min_suffix to selected value upon return. This can be used to increase efficiency if
  832 + // adding multiple items with the same prefix. (Why doesn't it set min_suffix to the next
  833 + // number? Well, maybe you aren't going to actually use the name it returns.) If you are calling
  834 + // this multiple times on the same resource dictionary, you can initialize resource_names by
  835 + // calling getResourceNames(), incrementally update it as you add resources, and keep passing it
  836 + // in so that getUniqueResourceName doesn't have to traverse the resource dictionary each time
  837 + // it's called.
969 QPDF_DLL 838 QPDF_DLL
970 std::string getUniqueResourceName( 839 std::string getUniqueResourceName(
971 std::string const& prefix, 840 std::string const& prefix,
972 int& min_suffix, 841 int& min_suffix,
973 std::set<std::string>* resource_names = nullptr); 842 std::set<std::string>* resource_names = nullptr);
974 843
975 - // A QPDFObjectHandle has an owning QPDF if it is associated with  
976 - // ("owned by") a specific QPDF object. Indirect objects always  
977 - // have an owning QPDF. Direct objects that are read from the  
978 - // input source will also have an owning QPDF. Programmatically  
979 - // created objects will only have one if setObjectDescription was  
980 - // called. 844 + // A QPDFObjectHandle has an owning QPDF if it is associated with ("owned by") a specific QPDF
  845 + // object. Indirect objects always have an owning QPDF. Direct objects that are read from the
  846 + // input source will also have an owning QPDF. Programmatically created objects will only have
  847 + // one if setObjectDescription was called.
981 // 848 //
982 - // When the QPDF object that owns an object is destroyed, the  
983 - // object is changed into a null, and its owner is cleared.  
984 - // Therefore you should not retain the value of an owning QPDF  
985 - // beyond the life of the QPDF. If in doubt, ask for it each time  
986 - // you need it. 849 + // When the QPDF object that owns an object is destroyed, the object is changed into a null, and
  850 + // its owner is cleared. Therefore you should not retain the value of an owning QPDF beyond the
  851 + // life of the QPDF. If in doubt, ask for it each time you need it.
987 852
988 - // getOwningQPDF returns a pointer to the owning QPDF is the  
989 - // object has one. Otherwise, it returns a null pointer. Use this  
990 - // when you are able to handle the case of an object that doesn't  
991 - // have an owning QPDF. 853 + // getOwningQPDF returns a pointer to the owning QPDF is the object has one. Otherwise, it
  854 + // returns a null pointer. Use this when you are able to handle the case of an object that
  855 + // doesn't have an owning QPDF.
992 QPDF_DLL 856 QPDF_DLL
993 QPDF* getOwningQPDF() const; 857 QPDF* getOwningQPDF() const;
994 - // getQPDF, new in qpdf 11, returns a reference owning QPDF. If  
995 - // there is none, it throws a runtime_error. Use this when you  
996 - // know the object has to have an owning QPDF, such as when it's a  
997 - // known indirect object. Since streams are always indirect  
998 - // objects, this method can be used safely for streams. If  
999 - // error_msg is specified, it will be used at the contents of the 858 + // getQPDF, new in qpdf 11, returns a reference owning QPDF. If there is none, it throws a
  859 + // runtime_error. Use this when you know the object has to have an owning QPDF, such as when
  860 + // it's a known indirect object. Since streams are always indirect objects, this method can be
  861 + // used safely for streams. If error_msg is specified, it will be used at the contents of the
1000 // runtime_error if there is now owner. 862 // runtime_error if there is now owner.
1001 QPDF_DLL 863 QPDF_DLL
1002 QPDF& getQPDF(std::string const& error_msg = "") const; 864 QPDF& getQPDF(std::string const& error_msg = "") const;
1003 865
1004 - // Create a shallow copy of an object as a direct object, but do not  
1005 - // traverse across indirect object boundaries. That means that,  
1006 - // for dictionaries and arrays, any keys or items that were  
1007 - // indirect objects will still be indirect objects that point to  
1008 - // the same place. In the strictest sense, this is not a shallow  
1009 - // copy because it recursively descends arrays and dictionaries;  
1010 - // it just doesn't cross over indirect objects. See also  
1011 - // unsafeShallowCopy(). You can't copy a stream this way. See  
1012 - // copyStream() instead. 866 + // Create a shallow copy of an object as a direct object, but do not traverse across indirect
  867 + // object boundaries. That means that, for dictionaries and arrays, any keys or items that were
  868 + // indirect objects will still be indirect objects that point to the same place. In the
  869 + // strictest sense, this is not a shallow copy because it recursively descends arrays and
  870 + // dictionaries; it just doesn't cross over indirect objects. See also unsafeShallowCopy(). You
  871 + // can't copy a stream this way. See copyStream() instead.
1013 QPDF_DLL 872 QPDF_DLL
1014 QPDFObjectHandle shallowCopy(); 873 QPDFObjectHandle shallowCopy();
1015 874
1016 - // Create a true shallow copy of an array or dictionary, just  
1017 - // copying the immediate items (array) or keys (dictionary). This  
1018 - // is "unsafe" because, if you *modify* any of the items in the  
1019 - // copy, you are modifying the original, which is almost never  
1020 - // what you want. However, if your intention is merely to  
1021 - // *replace* top-level items or keys and not to modify lower-level  
1022 - // items in the copy, this method is much faster than  
1023 - // shallowCopy(). 875 + // Create a true shallow copy of an array or dictionary, just copying the immediate items
  876 + // (array) or keys (dictionary). This is "unsafe" because, if you *modify* any of the items in
  877 + // the copy, you are modifying the original, which is almost never what you want. However, if
  878 + // your intention is merely to *replace* top-level items or keys and not to modify lower-level
  879 + // items in the copy, this method is much faster than shallowCopy().
1024 QPDF_DLL 880 QPDF_DLL
1025 QPDFObjectHandle unsafeShallowCopy(); 881 QPDFObjectHandle unsafeShallowCopy();
1026 882
1027 - // Create a copy of this stream. The new stream and the old stream  
1028 - // are independent: after the copy, either the original or the  
1029 - // copy's dictionary or data can be modified without affecting the  
1030 - // other. This uses StreamDataProvider internally, so no  
1031 - // unnecessary copies of the stream's data are made. If the source  
1032 - // stream's data is already being provided by a  
1033 - // StreamDataProvider, the new stream will use the same one, so  
1034 - // you have to make sure your StreamDataProvider can handle that  
1035 - // case. But if you're already using a StreamDataProvider, you  
1036 - // probably don't need to call this method. 883 + // Create a copy of this stream. The new stream and the old stream are independent: after the
  884 + // copy, either the original or the copy's dictionary or data can be modified without affecting
  885 + // the other. This uses StreamDataProvider internally, so no unnecessary copies of the stream's
  886 + // data are made. If the source stream's data is already being provided by a StreamDataProvider,
  887 + // the new stream will use the same one, so you have to make sure your StreamDataProvider can
  888 + // handle that case. But if you're already using a StreamDataProvider, you probably don't need
  889 + // to call this method.
1037 QPDF_DLL 890 QPDF_DLL
1038 QPDFObjectHandle copyStream(); 891 QPDFObjectHandle copyStream();
1039 892
1040 // Mutator methods. 893 // Mutator methods.
1041 894
1042 - // Since qpdf 11: for mutators that may add or remove an item,  
1043 - // there are additional versions whose names contain "AndGet" that  
1044 - // return the added or removed item. For example: 895 + // Since qpdf 11: for mutators that may add or remove an item, there are additional versions
  896 + // whose names contain "AndGet" that return the added or removed item. For example:
1045 // 897 //
1046 // auto new_dict = dict.replaceKeyAndGetNew( 898 // auto new_dict = dict.replaceKeyAndGetNew(
1047 // "/New", QPDFObjectHandle::newDictionary()); 899 // "/New", QPDFObjectHandle::newDictionary());
@@ -1049,15 +901,12 @@ class QPDFObjectHandle @@ -1049,15 +901,12 @@ class QPDFObjectHandle
1049 // auto old_value = dict.replaceKeyAndGetOld( 901 // auto old_value = dict.replaceKeyAndGetOld(
1050 // "/New", "(something)"_qpdf); 902 // "/New", "(something)"_qpdf);
1051 903
1052 - // Recursively copy this object, making it direct. An exception is  
1053 - // thrown if a loop is detected. With allow_streams true, keep  
1054 - // indirect object references to streams. Otherwise, throw an  
1055 - // exception if any sub-object is a stream. Note that, when  
1056 - // allow_streams is true and a stream is found, the resulting  
1057 - // object is still associated with the containing qpdf. When  
1058 - // allow_streams is false, the object will no longer be connected  
1059 - // to the original QPDF object after this call completes  
1060 - // successfully. 904 + // Recursively copy this object, making it direct. An exception is thrown if a loop is detected.
  905 + // With allow_streams true, keep indirect object references to streams. Otherwise, throw an
  906 + // exception if any sub-object is a stream. Note that, when allow_streams is true and a stream
  907 + // is found, the resulting object is still associated with the containing qpdf. When
  908 + // allow_streams is false, the object will no longer be connected to the original QPDF object
  909 + // after this call completes successfully.
1061 QPDF_DLL 910 QPDF_DLL
1062 void makeDirect(bool allow_streams = false); 911 void makeDirect(bool allow_streams = false);
1063 912
@@ -1066,9 +915,8 @@ class QPDFObjectHandle @@ -1066,9 +915,8 @@ class QPDFObjectHandle
1066 void setArrayItem(int, QPDFObjectHandle const&); 915 void setArrayItem(int, QPDFObjectHandle const&);
1067 QPDF_DLL 916 QPDF_DLL
1068 void setArrayFromVector(std::vector<QPDFObjectHandle> const& items); 917 void setArrayFromVector(std::vector<QPDFObjectHandle> const& items);
1069 - // Insert an item before the item at the given position ("at") so  
1070 - // that it has that position after insertion. If "at" is equal to  
1071 - // the size of the array, insert the item at the end. 918 + // Insert an item before the item at the given position ("at") so that it has that position
  919 + // after insertion. If "at" is equal to the size of the array, insert the item at the end.
1072 QPDF_DLL 920 QPDF_DLL
1073 void insertItem(int at, QPDFObjectHandle const& item); 921 void insertItem(int at, QPDFObjectHandle const& item);
1074 // Like insertItem but return the item that was inserted. 922 // Like insertItem but return the item that was inserted.
@@ -1080,8 +928,7 @@ class QPDFObjectHandle @@ -1080,8 +928,7 @@ class QPDFObjectHandle
1080 // Append an item, and return the newly added item. 928 // Append an item, and return the newly added item.
1081 QPDF_DLL 929 QPDF_DLL
1082 QPDFObjectHandle appendItemAndGetNew(QPDFObjectHandle const& item); 930 QPDFObjectHandle appendItemAndGetNew(QPDFObjectHandle const& item);
1083 - // Remove the item at that position, reducing the size of the  
1084 - // array by one. 931 + // Remove the item at that position, reducing the size of the array by one.
1085 QPDF_DLL 932 QPDF_DLL
1086 void eraseItem(int at); 933 void eraseItem(int at);
1087 // Erase and item and return the item that was removed. 934 // Erase and item and return the item that was removed.
@@ -1090,22 +937,19 @@ class QPDFObjectHandle @@ -1090,22 +937,19 @@ class QPDFObjectHandle
1090 937
1091 // Mutator methods for dictionary objects 938 // Mutator methods for dictionary objects
1092 939
1093 - // Replace value of key, adding it if it does not exist. If value  
1094 - // is null, remove the key. 940 + // Replace value of key, adding it if it does not exist. If value is null, remove the key.
1095 QPDF_DLL 941 QPDF_DLL
1096 void replaceKey(std::string const& key, QPDFObjectHandle const& value); 942 void replaceKey(std::string const& key, QPDFObjectHandle const& value);
1097 // Replace value of key and return the value. 943 // Replace value of key and return the value.
1098 QPDF_DLL 944 QPDF_DLL
1099 QPDFObjectHandle replaceKeyAndGetNew(std::string const& key, QPDFObjectHandle const& value); 945 QPDFObjectHandle replaceKeyAndGetNew(std::string const& key, QPDFObjectHandle const& value);
1100 - // Replace value of key and return the old value, or null if the  
1101 - // key was previously not present. 946 + // Replace value of key and return the old value, or null if the key was previously not present.
1102 QPDF_DLL 947 QPDF_DLL
1103 QPDFObjectHandle replaceKeyAndGetOld(std::string const& key, QPDFObjectHandle const& value); 948 QPDFObjectHandle replaceKeyAndGetOld(std::string const& key, QPDFObjectHandle const& value);
1104 // Remove key, doing nothing if key does not exist. 949 // Remove key, doing nothing if key does not exist.
1105 QPDF_DLL 950 QPDF_DLL
1106 void removeKey(std::string const& key); 951 void removeKey(std::string const& key);
1107 - // Remove key and return the old value. If the old value didn't  
1108 - // exist, return a null object. 952 + // Remove key and return the old value. If the old value didn't exist, return a null object.
1109 QPDF_DLL 953 QPDF_DLL
1110 QPDFObjectHandle removeKeyAndGetOld(std::string const& key); 954 QPDFObjectHandle removeKeyAndGetOld(std::string const& key);
1111 955
@@ -1117,31 +961,26 @@ class QPDFObjectHandle @@ -1117,31 +961,26 @@ class QPDFObjectHandle
1117 QPDF_DLL 961 QPDF_DLL
1118 QPDFObjectHandle getDict(); 962 QPDFObjectHandle getDict();
1119 963
1120 - // By default, or if true passed, QPDFWriter will attempt to  
1121 - // filter a stream based on decode level, whether compression is  
1122 - // enabled, and its ability to filter. Passing false will prevent  
1123 - // QPDFWriter from attempting to filter the stream even if it can.  
1124 - // This includes both decoding and compressing. This makes it  
1125 - // possible for you to prevent QPDFWriter from uncompressing and  
1126 - // recompressing a stream that it knows how to operate on for any  
1127 - // application-specific reason, such as that you have already  
1128 - // optimized its filtering. Note that this doesn't affect any  
1129 - // other ways to get the stream's data, such as pipeStreamData or  
1130 - // getStreamData. 964 + // By default, or if true passed, QPDFWriter will attempt to filter a stream based on decode
  965 + // level, whether compression is enabled, and its ability to filter. Passing false will prevent
  966 + // QPDFWriter from attempting to filter the stream even if it can. This includes both decoding
  967 + // and compressing. This makes it possible for you to prevent QPDFWriter from uncompressing and
  968 + // recompressing a stream that it knows how to operate on for any application-specific reason,
  969 + // such as that you have already optimized its filtering. Note that this doesn't affect any
  970 + // other ways to get the stream's data, such as pipeStreamData or getStreamData.
1131 QPDF_DLL 971 QPDF_DLL
1132 void setFilterOnWrite(bool); 972 void setFilterOnWrite(bool);
1133 QPDF_DLL 973 QPDF_DLL
1134 bool getFilterOnWrite(); 974 bool getFilterOnWrite();
1135 975
1136 - // If addTokenFilter has been called for this stream, then the  
1137 - // original data should be considered to be modified. This means we  
1138 - // should avoid optimizations such as not filtering a stream that  
1139 - // is already compressed. 976 + // If addTokenFilter has been called for this stream, then the original data should be
  977 + // considered to be modified. This means we should avoid optimizations such as not filtering a
  978 + // stream that is already compressed.
1140 QPDF_DLL 979 QPDF_DLL
1141 bool isDataModified(); 980 bool isDataModified();
1142 981
1143 - // Returns filtered (uncompressed) stream data. Throws an  
1144 - // exception if the stream is filtered and we can't decode it. 982 + // Returns filtered (uncompressed) stream data. Throws an exception if the stream is filtered
  983 + // and we can't decode it.
1145 QPDF_DLL 984 QPDF_DLL
1146 std::shared_ptr<Buffer> getStreamData(qpdf_stream_decode_level_e level = qpdf_dl_generalized); 985 std::shared_ptr<Buffer> getStreamData(qpdf_stream_decode_level_e level = qpdf_dl_generalized);
1147 986
@@ -1149,17 +988,15 @@ class QPDFObjectHandle @@ -1149,17 +988,15 @@ class QPDFObjectHandle
1149 QPDF_DLL 988 QPDF_DLL
1150 std::shared_ptr<Buffer> getRawStreamData(); 989 std::shared_ptr<Buffer> getRawStreamData();
1151 990
1152 - // Write stream data through the given pipeline. A null pipeline  
1153 - // value may be used if all you want to do is determine whether a  
1154 - // stream is filterable and would be filtered based on the  
1155 - // provided flags. If flags is 0, write raw stream data and return  
1156 - // false. Otherwise, the flags alter the behavior in the following  
1157 - // way: 991 + // Write stream data through the given pipeline. A null pipeline value may be used if all you
  992 + // want to do is determine whether a stream is filterable and would be filtered based on the
  993 + // provided flags. If flags is 0, write raw stream data and return false. Otherwise, the flags
  994 + // alter the behavior in the following way:
1158 // 995 //
1159 // encode_flags: 996 // encode_flags:
1160 // 997 //
1161 - // qpdf_sf_compress -- compress data with /FlateDecode if no other  
1162 - // compression filters are applied. 998 + // qpdf_sf_compress -- compress data with /FlateDecode if no other compression filters are
  999 + // applied.
1163 // 1000 //
1164 // qpdf_sf_normalize -- tokenize as content stream and normalize tokens 1001 // qpdf_sf_normalize -- tokenize as content stream and normalize tokens
1165 // 1002 //
@@ -1167,45 +1004,33 @@ class QPDFObjectHandle @@ -1167,45 +1004,33 @@ class QPDFObjectHandle
1167 // 1004 //
1168 // qpdf_dl_none -- do not decode any streams. 1005 // qpdf_dl_none -- do not decode any streams.
1169 // 1006 //
1170 - // qpdf_dl_generalized -- decode supported general-purpose  
1171 - // filters. This includes /ASCIIHexDecode, /ASCII85Decode,  
1172 - // /LZWDecode, and /FlateDecode. 1007 + // qpdf_dl_generalized -- decode supported general-purpose filters. This includes
  1008 + // /ASCIIHexDecode, /ASCII85Decode, /LZWDecode, and /FlateDecode.
1173 // 1009 //
1174 - // qpdf_dl_specialized -- in addition to generalized filters, also  
1175 - // decode supported non-lossy specialized filters. This includes  
1176 - // /RunLengthDecode. 1010 + // qpdf_dl_specialized -- in addition to generalized filters, also decode supported non-lossy
  1011 + // specialized filters. This includes /RunLengthDecode.
1177 // 1012 //
1178 - // qpdf_dl_all -- in addition to generalized and non-lossy  
1179 - // specialized filters, decode supported lossy filters. This  
1180 - // includes /DCTDecode. 1013 + // qpdf_dl_all -- in addition to generalized and non-lossy specialized filters, decode supported
  1014 + // lossy filters. This includes /DCTDecode.
1181 // 1015 //
1182 - // If, based on the flags and the filters and decode parameters,  
1183 - // we determine that we know how to apply all requested filters,  
1184 - // do so and return true if we are successful. 1016 + // If, based on the flags and the filters and decode parameters, we determine that we know how
  1017 + // to apply all requested filters, do so and return true if we are successful.
1185 // 1018 //
1186 - // The exact meaning of the return value differs the different  
1187 - // versions of this function, but for any version, the meaning has  
1188 - // been the same. For the main version, added in qpdf 10, the  
1189 - // return value indicates whether the overall operation succeeded.  
1190 - // The filter parameter, if specified, will be set to whether or  
1191 - // not filtering was attempted. If filtering was not requested,  
1192 - // this value will be false even if the overall operation  
1193 - // succeeded. 1019 + // The exact meaning of the return value differs the different versions of this function, but
  1020 + // for any version, the meaning has been the same. For the main version, added in qpdf 10, the
  1021 + // return value indicates whether the overall operation succeeded. The filter parameter, if
  1022 + // specified, will be set to whether or not filtering was attempted. If filtering was not
  1023 + // requested, this value will be false even if the overall operation succeeded.
1194 // 1024 //
1195 - // If filtering is requested but this method returns false, it  
1196 - // means there was some error in the filtering, in which case the  
1197 - // resulting data is likely partially filtered and/or incomplete  
1198 - // and may not be consistent with the configured filters.  
1199 - // QPDFWriter handles this by attempting to get the stream data  
1200 - // without filtering, but callers should consider a false return  
1201 - // value when decode_level is not qpdf_dl_none to be a potential  
1202 - // loss of data. If you intend to retry in that case, pass true as  
1203 - // the value of will_retry. This changes the warning issued by the  
1204 - // library to indicate that the operation will be retried without  
1205 - // filtering to avoid data loss.  
1206 -  
1207 - // Return value is overall success, even if filtering is not  
1208 - // requested. 1025 + // If filtering is requested but this method returns false, it means there was some error in the
  1026 + // filtering, in which case the resulting data is likely partially filtered and/or incomplete
  1027 + // and may not be consistent with the configured filters. QPDFWriter handles this by attempting
  1028 + // to get the stream data without filtering, but callers should consider a false return value
  1029 + // when decode_level is not qpdf_dl_none to be a potential loss of data. If you intend to retry
  1030 + // in that case, pass true as the value of will_retry. This changes the warning issued by the
  1031 + // library to indicate that the operation will be retried without filtering to avoid data loss.
  1032 +
  1033 + // Return value is overall success, even if filtering is not requested.
1209 QPDF_DLL 1034 QPDF_DLL
1210 bool pipeStreamData( 1035 bool pipeStreamData(
1211 Pipeline*, 1036 Pipeline*,
@@ -1215,9 +1040,8 @@ class QPDFObjectHandle @@ -1215,9 +1040,8 @@ class QPDFObjectHandle
1215 bool suppress_warnings = false, 1040 bool suppress_warnings = false,
1216 bool will_retry = false); 1041 bool will_retry = false);
1217 1042
1218 - // Legacy version. Return value is whether filtering was  
1219 - // attempted. There is no way to determine success if filtering  
1220 - // was not attempted. 1043 + // Legacy version. Return value is whether filtering was attempted. There is no way to determine
  1044 + // success if filtering was not attempted.
1221 QPDF_DLL 1045 QPDF_DLL
1222 bool pipeStreamData( 1046 bool pipeStreamData(
1223 Pipeline*, 1047 Pipeline*,
@@ -1226,8 +1050,7 @@ class QPDFObjectHandle @@ -1226,8 +1050,7 @@ class QPDFObjectHandle
1226 bool suppress_warnings = false, 1050 bool suppress_warnings = false,
1227 bool will_retry = false); 1051 bool will_retry = false);
1228 1052
1229 - // Legacy pipeStreamData. This maps to the the flags-based  
1230 - // pipeStreamData as follows: 1053 + // Legacy pipeStreamData. This maps to the the flags-based pipeStreamData as follows:
1231 // filter = false -> encode_flags = 0 1054 // filter = false -> encode_flags = 0
1232 // filter = true -> decode_level = qpdf_dl_generalized 1055 // filter = true -> decode_level = qpdf_dl_generalized
1233 // normalize = true -> encode_flags |= qpdf_sf_normalize 1056 // normalize = true -> encode_flags |= qpdf_sf_normalize
@@ -1236,70 +1059,57 @@ class QPDFObjectHandle @@ -1236,70 +1059,57 @@ class QPDFObjectHandle
1236 QPDF_DLL 1059 QPDF_DLL
1237 bool pipeStreamData(Pipeline*, bool filter, bool normalize, bool compress); 1060 bool pipeStreamData(Pipeline*, bool filter, bool normalize, bool compress);
1238 1061
1239 - // Replace a stream's dictionary. The new dictionary must be  
1240 - // consistent with the stream's data. This is most appropriately  
1241 - // used when creating streams from scratch that will use a stream  
1242 - // data provider and therefore start with an empty dictionary. It  
1243 - // may be more convenient in this case than calling getDict and  
1244 - // modifying it for each key. The pdf-create example does this. 1062 + // Replace a stream's dictionary. The new dictionary must be consistent with the stream's data.
  1063 + // This is most appropriately used when creating streams from scratch that will use a stream
  1064 + // data provider and therefore start with an empty dictionary. It may be more convenient in
  1065 + // this case than calling getDict and modifying it for each key. The pdf-create example does
  1066 + // this.
1245 QPDF_DLL 1067 QPDF_DLL
1246 void replaceDict(QPDFObjectHandle const&); 1068 void replaceDict(QPDFObjectHandle const&);
1247 1069
1248 // REPLACING STREAM DATA 1070 // REPLACING STREAM DATA
1249 1071
1250 - // Note about all replaceStreamData methods: whatever values are  
1251 - // passed as filter and decode_parms will overwrite /Filter and  
1252 - // /DecodeParms in the stream. Passing a null object  
1253 - // (QPDFObjectHandle::newNull()) will remove those values from the  
1254 - // stream dictionary. From qpdf 11, passing an *uninitialized*  
1255 - // QPDFObjectHandle (QPDFObjectHandle()) will leave any existing 1072 + // Note about all replaceStreamData methods: whatever values are passed as filter and
  1073 + // decode_parms will overwrite /Filter and /DecodeParms in the stream. Passing a null object
  1074 + // (QPDFObjectHandle::newNull()) will remove those values from the stream dictionary. From qpdf
  1075 + // 11, passing an *uninitialized* QPDFObjectHandle (QPDFObjectHandle()) will leave any existing
1256 // values untouched. 1076 // values untouched.
1257 1077
1258 - // Replace this stream's stream data with the given data buffer.  
1259 - // The stream's /Length key is replaced with the length of the  
1260 - // data buffer. The stream is interpreted as if the data read from  
1261 - // the file, after any decryption filters have been applied, is as  
1262 - // presented. 1078 + // Replace this stream's stream data with the given data buffer. The stream's /Length key is
  1079 + // replaced with the length of the data buffer. The stream is interpreted as if the data read
  1080 + // from the file, after any decryption filters have been applied, is as presented.
1263 QPDF_DLL 1081 QPDF_DLL
1264 void replaceStreamData( 1082 void replaceStreamData(
1265 std::shared_ptr<Buffer> data, 1083 std::shared_ptr<Buffer> data,
1266 QPDFObjectHandle const& filter, 1084 QPDFObjectHandle const& filter,
1267 QPDFObjectHandle const& decode_parms); 1085 QPDFObjectHandle const& decode_parms);
1268 1086
1269 - // Replace the stream's stream data with the given string.  
1270 - // This method will create a copy of the data rather than using  
1271 - // the user-provided buffer as in the std::shared_ptr<Buffer> version  
1272 - // of replaceStreamData. 1087 + // Replace the stream's stream data with the given string. This method will create a copy of the
  1088 + // data rather than using the user-provided buffer as in the std::shared_ptr<Buffer> version of
  1089 + // replaceStreamData.
1273 QPDF_DLL 1090 QPDF_DLL
1274 void replaceStreamData( 1091 void replaceStreamData(
1275 std::string const& data, 1092 std::string const& data,
1276 QPDFObjectHandle const& filter, 1093 QPDFObjectHandle const& filter,
1277 QPDFObjectHandle const& decode_parms); 1094 QPDFObjectHandle const& decode_parms);
1278 1095
1279 - // As above, replace this stream's stream data. Instead of  
1280 - // directly providing a buffer with the stream data, call the  
1281 - // given provider's provideStreamData method. See comments on the  
1282 - // StreamDataProvider class (defined above) for details on the  
1283 - // method. The data must be consistent with filter and  
1284 - // decode_parms as provided. Although it is more complex to use  
1285 - // this form of replaceStreamData than the one that takes a  
1286 - // buffer, it makes it possible to avoid allocating memory for the  
1287 - // stream data. Example programs are provided that use both forms  
1288 - // of replaceStreamData.  
1289 -  
1290 - // Note about stream length: for any given stream, the provider  
1291 - // must provide the same amount of data each time it is called.  
1292 - // This is critical for making linearization work properly.  
1293 - // Versions of qpdf before 3.0.0 required a length to be specified  
1294 - // here. Starting with version 3.0.0, this is no longer necessary  
1295 - // (or permitted). The first time the stream data provider is  
1296 - // invoked for a given stream, the actual length is stored.  
1297 - // Subsequent times, it is enforced that the length be the same as  
1298 - // the first time.  
1299 -  
1300 - // If you have gotten a compile error here while building code  
1301 - // that worked with older versions of qpdf, just omit the length  
1302 - // parameter. You can also simplify your code by not having to 1096 + // As above, replace this stream's stream data. Instead of directly providing a buffer with the
  1097 + // stream data, call the given provider's provideStreamData method. See comments on the
  1098 + // StreamDataProvider class (defined above) for details on the method. The data must be
  1099 + // consistent with filter and decode_parms as provided. Although it is more complex to use this
  1100 + // form of replaceStreamData than the one that takes a buffer, it makes it possible to avoid
  1101 + // allocating memory for the stream data. Example programs are provided that use both forms of
  1102 + // replaceStreamData.
  1103 +
  1104 + // Note about stream length: for any given stream, the provider must provide the same amount of
  1105 + // data each time it is called. This is critical for making linearization work properly.
  1106 + // Versions of qpdf before 3.0.0 required a length to be specified here. Starting with
  1107 + // version 3.0.0, this is no longer necessary (or permitted). The first time the stream data
  1108 + // provider is invoked for a given stream, the actual length is stored. Subsequent times, it is
  1109 + // enforced that the length be the same as the first time.
  1110 +
  1111 + // If you have gotten a compile error here while building code that worked with older versions
  1112 + // of qpdf, just omit the length parameter. You can also simplify your code by not having to
1303 // compute the length in advance. 1113 // compute the length in advance.
1304 QPDF_DLL 1114 QPDF_DLL
1305 void replaceStreamData( 1115 void replaceStreamData(
@@ -1307,33 +1117,28 @@ class QPDFObjectHandle @@ -1307,33 +1117,28 @@ class QPDFObjectHandle
1307 QPDFObjectHandle const& filter, 1117 QPDFObjectHandle const& filter,
1308 QPDFObjectHandle const& decode_parms); 1118 QPDFObjectHandle const& decode_parms);
1309 1119
1310 - // Starting in qpdf 10.2, you can use C++-11 function objects  
1311 - // instead of StreamDataProvider. 1120 + // Starting in qpdf 10.2, you can use C++-11 function objects instead of StreamDataProvider.
1312 1121
1313 - // The provider should write the stream data to the pipeline. For  
1314 - // a one-liner to replace stream data with the contents of a file,  
1315 - // pass QUtil::file_provider(filename) as provider. 1122 + // The provider should write the stream data to the pipeline. For a one-liner to replace stream
  1123 + // data with the contents of a file, pass QUtil::file_provider(filename) as provider.
1316 QPDF_DLL 1124 QPDF_DLL
1317 void replaceStreamData( 1125 void replaceStreamData(
1318 std::function<void(Pipeline*)> provider, 1126 std::function<void(Pipeline*)> provider,
1319 QPDFObjectHandle const& filter, 1127 QPDFObjectHandle const& filter,
1320 QPDFObjectHandle const& decode_parms); 1128 QPDFObjectHandle const& decode_parms);
1321 - // The provider should write the stream data to the pipeline,  
1322 - // returning true if it succeeded without errors. 1129 + // The provider should write the stream data to the pipeline, returning true if it succeeded
  1130 + // without errors.
1323 QPDF_DLL 1131 QPDF_DLL
1324 void replaceStreamData( 1132 void replaceStreamData(
1325 std::function<bool(Pipeline*, bool suppress_warnings, bool will_retry)> provider, 1133 std::function<bool(Pipeline*, bool suppress_warnings, bool will_retry)> provider,
1326 QPDFObjectHandle const& filter, 1134 QPDFObjectHandle const& filter,
1327 QPDFObjectHandle const& decode_parms); 1135 QPDFObjectHandle const& decode_parms);
1328 1136
1329 - // Access object ID and generation. For direct objects, return  
1330 - // object ID 0. 1137 + // Access object ID and generation. For direct objects, return object ID 0.
1331 1138
1332 - // NOTE: Be careful about calling getObjectID() and  
1333 - // getGeneration() directly as this can lead to the pattern of  
1334 - // depending on object ID or generation without the other. In  
1335 - // general, when keeping track of object IDs, it's better to use  
1336 - // QPDFObjGen instead. 1139 + // NOTE: Be careful about calling getObjectID() and getGeneration() directly as this can lead to
  1140 + // the pattern of depending on object ID or generation without the other. In general, when
  1141 + // keeping track of object IDs, it's better to use QPDFObjGen instead.
1337 1142
1338 QPDF_DLL 1143 QPDF_DLL
1339 QPDFObjGen getObjGen() const; 1144 QPDFObjGen getObjGen() const;
@@ -1346,51 +1151,40 @@ class QPDFObjectHandle @@ -1346,51 +1151,40 @@ class QPDFObjectHandle
1346 std::string unparse(); 1151 std::string unparse();
1347 QPDF_DLL 1152 QPDF_DLL
1348 std::string unparseResolved(); 1153 std::string unparseResolved();
1349 - // For strings only, force binary representation. Otherwise, same  
1350 - // as unparse. 1154 + // For strings only, force binary representation. Otherwise, same as unparse.
1351 QPDF_DLL 1155 QPDF_DLL
1352 std::string unparseBinary(); 1156 std::string unparseBinary();
1353 1157
1354 - // Return encoded as JSON. The constant JSON::LATEST can be used  
1355 - // to specify the latest available JSON version. The JSON is  
1356 - // generated as follows:  
1357 - // * Arrays, dictionaries, booleans, nulls, integers, and real  
1358 - // numbers are represented by their native JSON types.  
1359 - // * Names are encoded as strings representing the canonical  
1360 - // representation (after parsing #xx) and preceded by a slash,  
1361 - // just as unparse() returns. For example, the JSON for the 1158 + // Return encoded as JSON. The constant JSON::LATEST can be used to specify the latest available
  1159 + // JSON version. The JSON is generated as follows:
  1160 + // * Arrays, dictionaries, booleans, nulls, integers, and real numbers are represented by their
  1161 + // native JSON types.
  1162 + // * Names are encoded as strings representing the canonical representation (after parsing #xx)
  1163 + // and preceded by a slash, just as unparse() returns. For example, the JSON for the
1362 // PDF-syntax name /Text#2fPlain would be "/Text/Plain". 1164 // PDF-syntax name /Text#2fPlain would be "/Text/Plain".
1363 // * Indirect references are encoded as strings containing "obj gen R" 1165 // * Indirect references are encoded as strings containing "obj gen R"
1364 // * Strings 1166 // * Strings
1365 - // * JSON v1: Strings are encoded as UTF-8 strings with  
1366 - // unrepresentable binary characters encoded as \uHHHH.  
1367 - // Characters in PDF Doc encoding that don't have  
1368 - // bidirectional unicode mappings are not reversible. There is  
1369 - // no way to tell the difference between a string that looks  
1370 - // like a name or indirect object from an actual name or  
1371 - // indirect object. 1167 + // * JSON v1: Strings are encoded as UTF-8 strings with unrepresentable binary characters
  1168 + // encoded as \uHHHH. Characters in PDF Doc encoding that don't have bidirectional unicode
  1169 + // mappings are not reversible. There is no way to tell the difference between a string that
  1170 + // looks like a name or indirect object from an actual name or indirect object.
1372 // * JSON v2: 1171 // * JSON v2:
1373 - // * Unicode strings and strings encoded with PDF Doc encoding  
1374 - // that can be bidrectionally mapped two Unicode (which is  
1375 - // all strings without undefined characters) are represented 1172 + // * Unicode strings and strings encoded with PDF Doc encoding that can be bidrectionally
  1173 + // mapped two Unicode (which is all strings without undefined characters) are represented
1376 // as "u:" followed by the UTF-8 encoded string. Example: 1174 // as "u:" followed by the UTF-8 encoded string. Example:
1377 // "u:potato". 1175 // "u:potato".
1378 - // * All other strings are represented as "b:" followed by a  
1379 - // hexadecimal encoding of the string. Example: "b:0102cacb" 1176 + // * All other strings are represented as "b:" followed by a hexadecimal encoding of the
  1177 + // string. Example: "b:0102cacb"
1380 // * Streams 1178 // * Streams
1381 - // * JSON v1: Only the stream's dictionary is encoded. There is  
1382 - // no way tell a stream from a dictionary other than context.  
1383 - // * JSON v2: A stream is encoded as {"dict": {...}} with the  
1384 - // value being the encoding of the stream's dictionary. Since  
1385 - // "dict" does not otherwise represent anything, this is  
1386 - // unambiguous. The getStreamJSON() call can be used to add  
1387 - // encoding of the stream's data.  
1388 - // * Object types that are only valid in content streams (inline  
1389 - // image, operator) are serialized as "null". Attempting to  
1390 - // serialize a "reserved" object is an error.  
1391 - // If dereference_indirect is true and this is an indirect object,  
1392 - // show the actual contents of the object. The effect of  
1393 - // dereference_indirect applies only to this object. It is not 1179 + // * JSON v1: Only the stream's dictionary is encoded. There is no way tell a stream from a
  1180 + // dictionary other than context.
  1181 + // * JSON v2: A stream is encoded as {"dict": {...}} with the value being the encoding of the
  1182 + // stream's dictionary. Since "dict" does not otherwise represent anything, this is
  1183 + // unambiguous. The getStreamJSON() call can be used to add encoding of the stream's data.
  1184 + // * Object types that are only valid in content streams (inline image, operator) are serialized
  1185 + // as "null". Attempting to serialize a "reserved" object is an error.
  1186 + // If dereference_indirect is true and this is an indirect object, show the actual contents of
  1187 + // the object. The effect of dereference_indirect applies only to this object. It is not
1394 // recursive. 1188 // recursive.
1395 QPDF_DLL 1189 QPDF_DLL
1396 JSON getJSON(int json_version, bool dereference_indirect = false); 1190 JSON getJSON(int json_version, bool dereference_indirect = false);
@@ -1400,36 +1194,28 @@ class QPDFObjectHandle @@ -1400,36 +1194,28 @@ class QPDFObjectHandle
1400 [[deprecated("Use getJSON(int version)")]] QPDF_DLL JSON 1194 [[deprecated("Use getJSON(int version)")]] QPDF_DLL JSON
1401 getJSON(bool dereference_indirect = false); 1195 getJSON(bool dereference_indirect = false);
1402 1196
1403 - // This method can be called on a stream to get a more extended  
1404 - // JSON representation of the stream that includes the stream's  
1405 - // data. The JSON object returned is always a dictionary whose  
1406 - // "dict" key is an encoding of the stream's dictionary. The  
1407 - // representation of the data is determined by the json_data  
1408 - // field. 1197 + // This method can be called on a stream to get a more extended JSON representation of the
  1198 + // stream that includes the stream's data. The JSON object returned is always a dictionary whose
  1199 + // "dict" key is an encoding of the stream's dictionary. The representation of the data is
  1200 + // determined by the json_data field.
1409 // 1201 //
1410 - // The json_data field may have the value qpdf_sj_none,  
1411 - // qpdf_sj_inline, or qpdf_sj_file. 1202 + // The json_data field may have the value qpdf_sj_none, qpdf_sj_inline, or qpdf_sj_file.
1412 // 1203 //
1413 // If json_data is qpdf_sj_none, stream data is not represented. 1204 // If json_data is qpdf_sj_none, stream data is not represented.
1414 // 1205 //
1415 - // If json_data is qpdf_sj_inline or qpdf_sj_file, then stream  
1416 - // data is filtered or not based on the value of decode_level,  
1417 - // which has the same meaning as with pipeStreamData. 1206 + // If json_data is qpdf_sj_inline or qpdf_sj_file, then stream data is filtered or not based on
  1207 + // the value of decode_level, which has the same meaning as with pipeStreamData.
1418 // 1208 //
1419 - // If json_data is qpdf_sj_inline, the base64-encoded stream data  
1420 - // is included in the "data" field of the dictionary that is  
1421 - // returned. 1209 + // If json_data is qpdf_sj_inline, the base64-encoded stream data is included in the "data"
  1210 + // field of the dictionary that is returned.
1422 // 1211 //
1423 - // If json_data is qpdf_sj_file, then the Pipeline ("p") and  
1424 - // data_filename argument must be supplied. The value of  
1425 - // data_filename is stored in the resulting json in the "datafile"  
1426 - // key but is not otherwise use. The stream data itself (raw or  
1427 - // filtered depending on decode level), is written to the pipeline  
1428 - // via pipeStreamData(). 1212 + // If json_data is qpdf_sj_file, then the Pipeline ("p") and data_filename argument must be
  1213 + // supplied. The value of data_filename is stored in the resulting json in the "datafile" key
  1214 + // but is not otherwise use. The stream data itself (raw or filtered depending on decode level),
  1215 + // is written to the pipeline via pipeStreamData().
1429 // 1216 //
1430 - // NOTE: When json_data is qpdf_sj_inline, the QPDF object from  
1431 - // which the stream originates must remain valid until after the  
1432 - // JSON object is written. 1217 + // NOTE: When json_data is qpdf_sj_inline, the QPDF object from which the stream originates must
  1218 + // remain valid until after the JSON object is written.
1433 QPDF_DLL 1219 QPDF_DLL
1434 JSON getStreamJSON( 1220 JSON getStreamJSON(
1435 int json_version, 1221 int json_version,
@@ -1438,11 +1224,9 @@ class QPDFObjectHandle @@ -1438,11 +1224,9 @@ class QPDFObjectHandle
1438 Pipeline* p, 1224 Pipeline* p,
1439 std::string const& data_filename); 1225 std::string const& data_filename);
1440 1226
1441 - // Legacy helper methods for commonly performed operations on  
1442 - // pages. Newer code should use QPDFPageObjectHelper instead. The  
1443 - // specification and behavior of these methods are the same as the  
1444 - // identically named methods in that class, but newer  
1445 - // functionality will be added there. 1227 + // Legacy helper methods for commonly performed operations on pages. Newer code should use
  1228 + // QPDFPageObjectHelper instead. The specification and behavior of these methods are the same as
  1229 + // the identically named methods in that class, but newer functionality will be added there.
1446 QPDF_DLL 1230 QPDF_DLL
1447 std::map<std::string, QPDFObjectHandle> getPageImages(); 1231 std::map<std::string, QPDFObjectHandle> getPageImages();
1448 QPDF_DLL 1232 QPDF_DLL
@@ -1455,18 +1239,15 @@ class QPDFObjectHandle @@ -1455,18 +1239,15 @@ class QPDFObjectHandle
1455 void coalesceContentStreams(); 1239 void coalesceContentStreams();
1456 // End legacy page helpers 1240 // End legacy page helpers
1457 1241
1458 - // Issue a warning about this object if possible. If the object  
1459 - // has a description, a warning will be issued using the owning  
1460 - // QPDF as context. Otherwise, a message will be written to the  
1461 - // default logger's error stream, which is standard error if not  
1462 - // overridden. Objects read normally from the file have  
1463 - // descriptions. See comments on setObjectDescription for  
1464 - // additional details. 1242 + // Issue a warning about this object if possible. If the object has a description, a warning
  1243 + // will be issued using the owning QPDF as context. Otherwise, a message will be written to the
  1244 + // default logger's error stream, which is standard error if not overridden. Objects read
  1245 + // normally from the file have descriptions. See comments on setObjectDescription for additional
  1246 + // details.
1465 QPDF_DLL 1247 QPDF_DLL
1466 void warnIfPossible(std::string const& warning); 1248 void warnIfPossible(std::string const& warning);
1467 1249
1468 - // Provide access to specific classes for recursive  
1469 - // disconnected(). 1250 + // Provide access to specific classes for recursive disconnected().
1470 class DisconnectAccess 1251 class DisconnectAccess
1471 { 1252 {
1472 friend class QPDF_Dictionary; 1253 friend class QPDF_Dictionary;
@@ -1480,9 +1261,8 @@ class QPDFObjectHandle @@ -1480,9 +1261,8 @@ class QPDFObjectHandle
1480 } 1261 }
1481 }; 1262 };
1482 1263
1483 - // Convenience routine: Throws if the assumption is violated. Your  
1484 - // code will be better if you call one of the isType methods and  
1485 - // handle the case of the type being wrong, but these can be 1264 + // Convenience routine: Throws if the assumption is violated. Your code will be better if you
  1265 + // call one of the isType methods and handle the case of the type being wrong, but these can be
1486 // convenient if you have already verified the type. 1266 // convenient if you have already verified the type.
1487 QPDF_DLL 1267 QPDF_DLL
1488 void assertInitialized() const; 1268 void assertInitialized() const;
@@ -1519,11 +1299,10 @@ class QPDFObjectHandle @@ -1519,11 +1299,10 @@ class QPDFObjectHandle
1519 QPDF_DLL 1299 QPDF_DLL
1520 void assertNumber(); 1300 void assertNumber();
1521 1301
1522 - // The isPageObject method checks the /Type key of the object.  
1523 - // This is not completely reliable as there are some otherwise  
1524 - // valid files whose /Type is wrong for page objects. qpdf is  
1525 - // slightly more accepting but may still return false here when  
1526 - // treating the object as a page would work. Use this sparingly. 1302 + // The isPageObject method checks the /Type key of the object. This is not completely reliable
  1303 + // as there are some otherwise valid files whose /Type is wrong for page objects. qpdf is
  1304 + // slightly more accepting but may still return false here when treating the object as a page
  1305 + // would work. Use this sparingly.
1527 QPDF_DLL 1306 QPDF_DLL
1528 bool isPageObject(); 1307 bool isPageObject();
1529 QPDF_DLL 1308 QPDF_DLL
@@ -1534,13 +1313,12 @@ class QPDFObjectHandle @@ -1534,13 +1313,12 @@ class QPDFObjectHandle
1534 QPDF_DLL 1313 QPDF_DLL
1535 bool isFormXObject(); 1314 bool isFormXObject();
1536 1315
1537 - // Indicate if this is an image. If exclude_imagemask is true,  
1538 - // don't count image masks as images. 1316 + // Indicate if this is an image. If exclude_imagemask is true, don't count image masks as
  1317 + // images.
1539 QPDF_DLL 1318 QPDF_DLL
1540 bool isImage(bool exclude_imagemask = true); 1319 bool isImage(bool exclude_imagemask = true);
1541 1320
1542 - // The following methods do not form part of the public API and are for  
1543 - // internal use only. 1321 + // The following methods do not form part of the public API and are for internal use only.
1544 1322
1545 QPDFObjectHandle(std::shared_ptr<QPDFObject> const& obj) : 1323 QPDFObjectHandle(std::shared_ptr<QPDFObject> const& obj) :
1546 obj(obj) 1324 obj(obj)
@@ -1600,9 +1378,8 @@ class QPDFObjectHandle @@ -1600,9 +1378,8 @@ class QPDFObjectHandle
1600 static void warn(QPDF*, QPDFExc const&); 1378 static void warn(QPDF*, QPDFExc const&);
1601 void checkOwnership(QPDFObjectHandle const&) const; 1379 void checkOwnership(QPDFObjectHandle const&) const;
1602 1380
1603 - // Moving members of QPDFObjectHandle into a smart pointer incurs  
1604 - // a substantial performance penalty since QPDFObjectHandle  
1605 - // objects are copied around so frequently. 1381 + // Moving members of QPDFObjectHandle into a smart pointer incurs a substantial performance
  1382 + // penalty since QPDFObjectHandle objects are copied around so frequently.
1606 std::shared_ptr<QPDFObject> obj; 1383 std::shared_ptr<QPDFObject> obj;
1607 }; 1384 };
1608 1385
@@ -1611,13 +1388,12 @@ class QPDFObjectHandle @@ -1611,13 +1388,12 @@ class QPDFObjectHandle
1611 1388
1612 // auto oh = "<< /Key (value) >>"_qpdf; 1389 // auto oh = "<< /Key (value) >>"_qpdf;
1613 1390
1614 -// If this is causing problems in your code, define  
1615 -// QPDF_NO_QPDF_STRING to prevent the declaration from being here. 1391 +// If this is causing problems in your code, define QPDF_NO_QPDF_STRING to prevent the declaration
  1392 +// from being here.
1616 1393
1617 /* clang-format off */ 1394 /* clang-format off */
1618 -// Disable formatting for this declaration: emacs font-lock in cc-mode  
1619 -// (as of 28.1) treats the rest of the file as a string if  
1620 -// clang-format removes the space after "operator", and as of 1395 +// Disable formatting for this declaration: emacs font-lock in cc-mode (as of 28.1) treats the rest
  1396 +// of the file as a string if clang-format removes the space after "operator", and as of
1621 // clang-format 15, there's no way to prevent it from doing so. 1397 // clang-format 15, there's no way to prevent it from doing so.
1622 QPDF_DLL 1398 QPDF_DLL
1623 QPDFObjectHandle operator ""_qpdf(char const* v, size_t len); 1399 QPDFObjectHandle operator ""_qpdf(char const* v, size_t len);
@@ -1627,8 +1403,8 @@ QPDFObjectHandle operator &quot;&quot;_qpdf(char const* v, size_t len); @@ -1627,8 +1403,8 @@ QPDFObjectHandle operator &quot;&quot;_qpdf(char const* v, size_t len);
1627 1403
1628 class QPDFObjectHandle::QPDFDictItems 1404 class QPDFObjectHandle::QPDFDictItems
1629 { 1405 {
1630 - // This class allows C++-style iteration, including range-for  
1631 - // iteration, around dictionaries. You can write 1406 + // This class allows C++-style iteration, including range-for iteration, around dictionaries.
  1407 + // You can write
1632 1408
1633 // for (auto iter: QPDFDictItems(dictionary_obj)) 1409 // for (auto iter: QPDFDictItems(dictionary_obj))
1634 // { 1410 // {
@@ -1636,8 +1412,7 @@ class QPDFObjectHandle::QPDFDictItems @@ -1636,8 +1412,7 @@ class QPDFObjectHandle::QPDFDictItems
1636 // // iter.second is a QPDFObjectHandle 1412 // // iter.second is a QPDFObjectHandle
1637 // } 1413 // }
1638 1414
1639 - // See examples/pdf-name-number-tree.cc for a demonstration of  
1640 - // using this API. 1415 + // See examples/pdf-name-number-tree.cc for a demonstration of using this API.
1641 1416
1642 public: 1417 public:
1643 QPDF_DLL 1418 QPDF_DLL
@@ -1727,16 +1502,15 @@ class QPDFObjectHandle::QPDFDictItems @@ -1727,16 +1502,15 @@ class QPDFObjectHandle::QPDFDictItems
1727 1502
1728 class QPDFObjectHandle::QPDFArrayItems 1503 class QPDFObjectHandle::QPDFArrayItems
1729 { 1504 {
1730 - // This class allows C++-style iteration, including range-for  
1731 - // iteration, around arrays. You can write 1505 + // This class allows C++-style iteration, including range-for iteration, around arrays. You can
  1506 + // write
1732 1507
1733 // for (auto iter: QPDFArrayItems(array_obj)) 1508 // for (auto iter: QPDFArrayItems(array_obj))
1734 // { 1509 // {
1735 // // iter is a QPDFObjectHandle 1510 // // iter is a QPDFObjectHandle
1736 // } 1511 // }
1737 1512
1738 - // See examples/pdf-name-number-tree.cc for a demonstration of  
1739 - // using this API. 1513 + // See examples/pdf-name-number-tree.cc for a demonstration of using this API.
1740 1514
1741 public: 1515 public:
1742 QPDF_DLL 1516 QPDF_DLL
include/qpdf/QPDFPageObjectHelper.hh
@@ -2,22 +2,19 @@ @@ -2,22 +2,19 @@
2 // 2 //
3 // This file is part of qpdf. 3 // This file is part of qpdf.
4 // 4 //
5 -// Licensed under the Apache License, Version 2.0 (the "License");  
6 -// you may not use this file except in compliance with the License.  
7 -// You may obtain a copy of the License at 5 +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
  6 +// in compliance with the License. You may obtain a copy of the License at
8 // 7 //
9 // http://www.apache.org/licenses/LICENSE-2.0 8 // http://www.apache.org/licenses/LICENSE-2.0
10 // 9 //
11 -// Unless required by applicable law or agreed to in writing, software  
12 -// distributed under the License is distributed on an "AS IS" BASIS,  
13 -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
14 -// See the License for the specific language governing permissions and  
15 -// limitations under the License. 10 +// Unless required by applicable law or agreed to in writing, software distributed under the License
  11 +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
  12 +// or implied. See the License for the specific language governing permissions and limitations under
  13 +// the License.
16 // 14 //
17 -// Versions of qpdf prior to version 7 were released under the terms  
18 -// of version 2.0 of the Artistic License. At your option, you may  
19 -// continue to consider qpdf to be licensed under those terms. Please  
20 -// see the manual for additional information. 15 +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
  16 +// License. At your option, you may continue to consider qpdf to be licensed under those terms.
  17 +// Please see the manual for additional information.
21 18
22 #ifndef QPDFPAGEOBJECTHELPER_HH 19 #ifndef QPDFPAGEOBJECTHELPER_HH
23 #define QPDFPAGEOBJECTHELPER_HH 20 #define QPDFPAGEOBJECTHELPER_HH
@@ -35,9 +32,8 @@ class QPDFAcroFormDocumentHelper; @@ -35,9 +32,8 @@ class QPDFAcroFormDocumentHelper;
35 32
36 class QPDFPageObjectHelper: public QPDFObjectHelper 33 class QPDFPageObjectHelper: public QPDFObjectHelper
37 { 34 {
38 - // This is a helper class for page objects, but as of qpdf 10.1,  
39 - // many of the methods also work for form XObjects. When this is  
40 - // the case, it is noted in the comment. 35 + // This is a helper class for page objects, but as of qpdf 10.1, many of the methods also work
  36 + // for form XObjects. When this is the case, it is noted in the comment.
41 37
42 public: 38 public:
43 QPDF_DLL 39 QPDF_DLL
@@ -47,35 +43,30 @@ class QPDFPageObjectHelper: public QPDFObjectHelper @@ -47,35 +43,30 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
47 43
48 // PAGE ATTRIBUTES 44 // PAGE ATTRIBUTES
49 45
50 - // The getAttribute method works with pages and form XObjects. It  
51 - // return the value of the requested attribute from the page/form  
52 - // XObject's dictionary, taking inheritance from the pages tree  
53 - // into consideration. For pages, the attributes /MediaBox,  
54 - // /CropBox, /Resources, and /Rotate are inheritable, meaning that  
55 - // if they are not present directly on the page node, they may be  
56 - // inherited from ancestor nodes in the pages tree. 46 + // The getAttribute method works with pages and form XObjects. It returns the value of the
  47 + // requested attribute from the page/form XObject's dictionary, taking inheritance from the
  48 + // pages tree into consideration. For pages, the attributes /MediaBox, /CropBox, /Resources, and
  49 + // /Rotate are inheritable, meaning that if they are not present directly on the page node, they
  50 + // may be inherited from ancestor nodes in the pages tree.
57 // 51 //
58 // There are two ways that an attribute can be "shared": 52 // There are two ways that an attribute can be "shared":
59 // 53 //
60 - // * For inheritable attributes on pages, it may appear in a  
61 - // higher level node of the pages tree 54 + // * For inheritable attributes on pages, it may appear in a higher level node of the pages tree
62 // 55 //
63 - // * For any attribute, the attribute may be an indirect object  
64 - // which may be referenced by more than one page/form XObject. 56 + // * For any attribute, the attribute may be an indirect object which may be referenced by more
  57 + // than one page/form XObject.
65 // 58 //
66 - // If copy_if_shared is true, then this method will replace the  
67 - // attribute with a shallow copy if it is indirect or inherited  
68 - // and return the copy. You should do this if you are going to  
69 - // modify the returned object and want the modifications to apply  
70 - // to the current page/form XObject only. 59 + // If copy_if_shared is true, then this method will replace the attribute with a shallow copy if
  60 + // it is indirect or inherited and return the copy. You should do this if you are going to
  61 + // modify the returned object and want the modifications to apply to the current page/form
  62 + // XObject only.
71 QPDF_DLL 63 QPDF_DLL
72 QPDFObjectHandle getAttribute(std::string const& name, bool copy_if_shared); 64 QPDFObjectHandle getAttribute(std::string const& name, bool copy_if_shared);
73 65
74 // PAGE BOXES 66 // PAGE BOXES
75 // 67 //
76 - // Pages have various types of boundary boxes. These are described  
77 - // in detail in the PDF specification (section 14.11.2 Page  
78 - // boundaries). They are, by key in the page dictionary: 68 + // Pages have various types of boundary boxes. These are described in detail in the PDF
  69 + // specification (section 14.11.2 Page boundaries). They are, by key in the page dictionary:
79 // 70 //
80 // * /MediaBox -- boundaries of physical page 71 // * /MediaBox -- boundaries of physical page
81 // * /CropBox -- clipping region of what is displayed 72 // * /CropBox -- clipping region of what is displayed
@@ -87,114 +78,90 @@ class QPDFPageObjectHelper: public QPDFObjectHelper @@ -87,114 +78,90 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
87 // fallback value for /CropBox is /MediaBox, and the fallback 78 // fallback value for /CropBox is /MediaBox, and the fallback
88 // values for the other boxes are /CropBox. 79 // values for the other boxes are /CropBox.
89 // 80 //
90 - // As noted above (PAGE ATTRIBUTES), /MediaBox and /CropBox can be  
91 - // inherited from parent nodes in the pages tree. The other boxes  
92 - // can't be inherited. 81 + // As noted above (PAGE ATTRIBUTES), /MediaBox and /CropBox can be inherited from parent nodes
  82 + // in the pages tree. The other boxes can't be inherited.
93 // 83 //
94 - // When the comments below refer to the "effective value" of an  
95 - // box, this takes into consideration both inheritance through the  
96 - // pages tree (in the case of /MediaBox and /CropBox) and fallback  
97 - // values for missing attributes (for all except /MediaBox). 84 + // When the comments below refer to the "effective value" of a box, this takes into
  85 + // consideration both inheritance through the pages tree (in the case of /MediaBox and /CropBox)
  86 + // and fallback values for missing attributes (for all except /MediaBox).
98 // 87 //
99 - // For the methods below, copy_if_shared is passed to getAttribute  
100 - // and therefore refers only to indirect objects and values that  
101 - // are inherited through the pages tree. 88 + // For the methods below, copy_if_shared is passed to getAttribute and therefore refers only to
  89 + // indirect objects and values that are inherited through the pages tree.
102 // 90 //
103 - // If copy_if_fallback is true, a copy is made if the object's  
104 - // value was obtained by falling back to a different box. 91 + // If copy_if_fallback is true, a copy is made if the object's value was obtained by falling
  92 + // back to a different box.
105 // 93 //
106 - // The copy_if_shared and copy_if_fallback parameters carry across  
107 - // multiple layers. This is explained below. 94 + // The copy_if_shared and copy_if_fallback parameters carry across multiple layers. This is
  95 + // explained below.
108 // 96 //
109 - // You should set copy_if_shared to true if you want to modify a  
110 - // bounding box for the current page without affecting other pages  
111 - // but you don't want to change the fallback behavior. For  
112 - // example, if you want to modify the /TrimBox for the current  
113 - // page only but have it continue to fall back to the value of  
114 - // /CropBox or /MediaBox if they are not defined, you could set 97 + // You should set copy_if_shared to true if you want to modify a bounding box for the current
  98 + // page without affecting other pages but you don't want to change the fallback behavior. For
  99 + // example, if you want to modify the /TrimBox for the current page only but have it continue to
  100 + // fall back to the value of /CropBox or /MediaBox if they are not defined, you could set
115 // copy_if_shared to true. 101 // copy_if_shared to true.
116 // 102 //
117 - // You should set copy_if_fallback to true if you want to modify a  
118 - // specific box as distinct from any other box. For example, if  
119 - // you want to make /TrimBox differ from /CropBox, then you should  
120 - // set copy_if_fallback to true. 103 + // You should set copy_if_fallback to true if you want to modify a specific box as distinct from
  104 + // any other box. For example, if you want to make /TrimBox differ from /CropBox, then you
  105 + // should set copy_if_fallback to true.
121 // 106 //
122 // The copy_if_fallback flags were added in qpdf 11. 107 // The copy_if_fallback flags were added in qpdf 11.
123 // 108 //
124 - // For example, suppose that neither /CropBox nor /TrimBox is  
125 - // present on a page but /CropBox is present in the page's parent  
126 - // node in the page tree. 109 + // For example, suppose that neither /CropBox nor /TrimBox is present on a page but /CropBox is
  110 + // present in the page's parent node in the page tree.
127 // 111 //
128 - // * getTrimBox(false, false) would return the /CropBox from the  
129 - // parent node. 112 + // * getTrimBox(false, false) would return the /CropBox from the parent node.
130 // 113 //
131 - // * getTrimBox(true, false) would make a shallow copy of the  
132 - // /CropBox from the parent node into the current node and  
133 - // return it. 114 + // * getTrimBox(true, false) would make a shallow copy of the /CropBox from the parent node into
  115 + // the current node and return it.
134 // 116 //
135 - // * getTrimBox(false, true) would make a shallow copy of the  
136 - // /CropBox from the parent node into /TrimBox of the current  
137 - // node and return it. 117 + // * getTrimBox(false, true) would make a shallow copy of the /CropBox from the parent node into
  118 + // /TrimBox of the current node and return it.
138 // 119 //
139 - // * getTrimBox(true, true) would make a shallow copy of the  
140 - // /CropBox from the parent node into the current node, then  
141 - // make a shallow copy of the resulting copy to /TrimBox of the  
142 - // current node, and then return that. 120 + // * getTrimBox(true, true) would make a shallow copy of the /CropBox from the parent node into
  121 + // the current node, then make a shallow copy of the resulting copy to /TrimBox of the current
  122 + // node, and then return that.
143 // 123 //
144 - // To illustrate how these parameters carry across multiple  
145 - // layers, suppose that neither /MediaBox, /CropBox, nor /TrimBox  
146 - // is present on a page but /MediaBox is present on the parent. In  
147 - // this case: 124 + // To illustrate how these parameters carry across multiple layers, suppose that neither
  125 + // /MediaBox, /CropBox, nor /TrimBox is present on a page but /MediaBox is present on the
  126 + // parent. In this case:
148 // 127 //
149 - // * getTrimBox(false, false) would return the value of /MediaBox  
150 - // from the parent node. 128 + // * getTrimBox(false, false) would return the value of /MediaBox from the parent node.
151 // 129 //
152 - // * getTrimBox(true, false) would copy /MediaBox to the current  
153 - // node and return it. 130 + // * getTrimBox(true, false) would copy /MediaBox to the current node and return it.
154 // 131 //
155 - // * getTrimBox(false, true) would first copy /MediaBox from the  
156 - // parent to /CropBox, then copy /CropBox to /TrimBox, and then  
157 - // return the result. 132 + // * getTrimBox(false, true) would first copy /MediaBox from the parent to /CropBox, then copy
  133 + // /CropBox to /TrimBox, and then return the result.
158 // 134 //
159 - // * getTrimBox(true, true) would first copy /MediaBox from the  
160 - // parent to the current page, then copy it to /CropBox, then  
161 - // copy /CropBox to /TrimBox, and then return the result. 135 + // * getTrimBox(true, true) would first copy /MediaBox from the parent to the current page, then
  136 + // copy it to /CropBox, then copy /CropBox to /TrimBox, and then return the result.
162 // 137 //
163 - // If you need different behavior, call getAttribute directly and  
164 - // take care of your own copying. 138 + // If you need different behavior, call getAttribute directly and take care of your own copying.
165 139
166 // Return the effective MediaBox 140 // Return the effective MediaBox
167 QPDF_DLL 141 QPDF_DLL
168 QPDFObjectHandle getMediaBox(bool copy_if_shared = false); 142 QPDFObjectHandle getMediaBox(bool copy_if_shared = false);
169 143
170 - // Return the effective CropBox. If not defined, fall back to  
171 - // MediaBox 144 + // Return the effective CropBox. If not defined, fall back to MediaBox
172 QPDF_DLL 145 QPDF_DLL
173 QPDFObjectHandle getCropBox(bool copy_if_shared = false, bool copy_if_fallback = false); 146 QPDFObjectHandle getCropBox(bool copy_if_shared = false, bool copy_if_fallback = false);
174 147
175 - // Return the effective BleedBox. If not defined, fall back to  
176 - // CropBox. 148 + // Return the effective BleedBox. If not defined, fall back to CropBox.
177 QPDF_DLL 149 QPDF_DLL
178 QPDFObjectHandle getBleedBox(bool copy_if_shared = false, bool copy_if_fallback = false); 150 QPDFObjectHandle getBleedBox(bool copy_if_shared = false, bool copy_if_fallback = false);
179 151
180 - // Return the effective TrimBox. If not defined, fall back to  
181 - // CropBox. 152 + // Return the effective TrimBox. If not defined, fall back to CropBox.
182 QPDF_DLL 153 QPDF_DLL
183 QPDFObjectHandle getTrimBox(bool copy_if_shared = false, bool copy_if_fallback = false); 154 QPDFObjectHandle getTrimBox(bool copy_if_shared = false, bool copy_if_fallback = false);
184 155
185 - // Return the effective ArtBox. If not defined, fall back to  
186 - // CropBox. 156 + // Return the effective ArtBox. If not defined, fall back to CropBox.
187 QPDF_DLL 157 QPDF_DLL
188 QPDFObjectHandle getArtBox(bool copy_if_shared = false, bool copy_if_fallback = false); 158 QPDFObjectHandle getArtBox(bool copy_if_shared = false, bool copy_if_fallback = false);
189 159
190 - // Iterate through XObjects, possibly recursing into form  
191 - // XObjects. This works with pages or form XObjects. Call action  
192 - // on each XObject for which selector, if specified, returns true.  
193 - // With no selector, calls action for every object. In addition to  
194 - // the object being passed to action, the containing XObject  
195 - // dictionary and key are passed in. Remember that the XObject  
196 - // dictionary may be shared, and the object may appear in multiple  
197 - // XObject dictionaries. 160 + // Iterate through XObjects, possibly recursing into form XObjects. This works with pages or
  161 + // form XObjects. Call action on each XObject for which selector, if specified, returns true.
  162 + // With no selector, calls action for every object. In addition to the object being passed to
  163 + // action, the containing XObject dictionary and key are passed in. Remember that the XObject
  164 + // dictionary may be shared, and the object may appear in multiple XObject dictionaries.
198 QPDF_DLL 165 QPDF_DLL
199 void forEachXObject( 166 void forEachXObject(
200 bool recursive, 167 bool recursive,
@@ -214,12 +181,10 @@ class QPDFPageObjectHelper: public QPDFObjectHelper @@ -214,12 +181,10 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
214 std::function<void( 181 std::function<void(
215 QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key)> action); 182 QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key)> action);
216 183
217 - // Returns an empty map if there are no images or no resources.  
218 - // Prior to qpdf 8.4.0, this function did not support inherited  
219 - // resources, but it does now. Return value is a map from XObject  
220 - // name to the image object, which is always a stream. Works with  
221 - // form XObjects as well as pages. This method does not recurse  
222 - // into nested form XObjects. For that, use forEachImage. 184 + // Returns an empty map if there are no images or no resources. Prior to qpdf 8.4.0, this
  185 + // function did not support inherited resources, but it does now. Return value is a map from
  186 + // XObject name to the image object, which is always a stream. Works with form XObjects as well
  187 + // as pages. This method does not recurse into nested form XObjects. For that, use forEachImage.
223 QPDF_DLL 188 QPDF_DLL
224 std::map<std::string, QPDFObjectHandle> getImages(); 189 std::map<std::string, QPDFObjectHandle> getImages();
225 190
@@ -227,59 +192,48 @@ class QPDFPageObjectHelper: public QPDFObjectHelper @@ -227,59 +192,48 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
227 QPDF_DLL 192 QPDF_DLL
228 std::map<std::string, QPDFObjectHandle> getPageImages(); 193 std::map<std::string, QPDFObjectHandle> getPageImages();
229 194
230 - // Returns an empty map if there are no form XObjects or no  
231 - // resources. Otherwise, returns a map of keys to form XObjects  
232 - // directly referenced from this page or form XObjects. This does  
233 - // not recurse into nested form XObjects. For that, use  
234 - // forEachFormXObject. 195 + // Returns an empty map if there are no form XObjects or no resources. Otherwise, returns a map
  196 + // of keys to form XObjects directly referenced from this page or form XObjects. This does not
  197 + // recurse into nested form XObjects. For that, use forEachFormXObject.
235 QPDF_DLL 198 QPDF_DLL
236 std::map<std::string, QPDFObjectHandle> getFormXObjects(); 199 std::map<std::string, QPDFObjectHandle> getFormXObjects();
237 200
238 - // Converts each inline image to an external (normal) image if the  
239 - // size is at least the specified number of bytes. This method  
240 - // works with pages or form XObjects. By default, it recursively  
241 - // processes nested form XObjects. Pass true as shallow to avoid  
242 - // this behavior. Prior to qpdf 10.1, form XObjects were ignored,  
243 - // but this was considered a bug. 201 + // Converts each inline image to an external (normal) image if the size is at least the
  202 + // specified number of bytes. This method works with pages or form XObjects. By default, it
  203 + // recursively processes nested form XObjects. Pass true as shallow to avoid this behavior.
  204 + // Prior to qpdf 10.1, form XObjects were ignored, but this was considered a bug.
244 QPDF_DLL 205 QPDF_DLL
245 void externalizeInlineImages(size_t min_size = 0, bool shallow = false); 206 void externalizeInlineImages(size_t min_size = 0, bool shallow = false);
246 207
247 - // Return the annotations in the page's "/Annots" list, if any. If  
248 - // only_subtype is non-empty, only include annotations of the  
249 - // given subtype. 208 + // Return the annotations in the page's "/Annots" list, if any. If only_subtype is non-empty,
  209 + // only include annotations of the given subtype.
250 QPDF_DLL 210 QPDF_DLL
251 std::vector<QPDFAnnotationObjectHelper> getAnnotations(std::string const& only_subtype = ""); 211 std::vector<QPDFAnnotationObjectHelper> getAnnotations(std::string const& only_subtype = "");
252 212
253 - // Returns a vector of stream objects representing the content  
254 - // streams for the given page. This routine allows the caller to  
255 - // not care whether there are one or more than one content streams 213 + // Returns a vector of stream objects representing the content streams for the given page. This
  214 + // routine allows the caller to not care whether there are one or more than one content streams
256 // for a page. 215 // for a page.
257 QPDF_DLL 216 QPDF_DLL
258 std::vector<QPDFObjectHandle> getPageContents(); 217 std::vector<QPDFObjectHandle> getPageContents();
259 218
260 - // Add the given object as a new content stream for this page. If  
261 - // parameter 'first' is true, add to the beginning. Otherwise, add  
262 - // to the end. This routine automatically converts the page  
263 - // contents to an array if it is a scalar, allowing the caller not  
264 - // to care what the initial structure is. You can call  
265 - // coalesceContentStreams() afterwards if you want to force it to  
266 - // be a single stream. 219 + // Add the given object as a new content stream for this page. If parameter 'first' is true, add
  220 + // to the beginning. Otherwise, add to the end. This routine automatically converts the page
  221 + // contents to an array if it is a scalar, allowing the caller not to care what the initial
  222 + // structure is. You can call coalesceContentStreams() afterwards if you want to force it to be
  223 + // a single stream.
267 QPDF_DLL 224 QPDF_DLL
268 void addPageContents(QPDFObjectHandle contents, bool first); 225 void addPageContents(QPDFObjectHandle contents, bool first);
269 226
270 - // Rotate a page. If relative is false, set the rotation of the  
271 - // page to angle. Otherwise, add angle to the rotation of the  
272 - // page. Angle must be a multiple of 90. Adding 90 to the rotation 227 + // Rotate a page. If relative is false, set the rotation of the page to angle. Otherwise, add
  228 + // angle to the rotation of the page. Angle must be a multiple of 90. Adding 90 to the rotation
273 // rotates clockwise by 90 degrees. 229 // rotates clockwise by 90 degrees.
274 QPDF_DLL 230 QPDF_DLL
275 void rotatePage(int angle, bool relative); 231 void rotatePage(int angle, bool relative);
276 232
277 - // Coalesce a page's content streams. A page's content may be a  
278 - // stream or an array of streams. If this page's content is an  
279 - // array, concatenate the streams into a single stream. This can  
280 - // be useful when working with files that split content streams in  
281 - // arbitrary spots, such as in the middle of a token, as that can  
282 - // confuse some software. You could also call this after calling 233 + // Coalesce a page's content streams. A page's content may be a stream or an array of streams.
  234 + // If this page's content is an array, concatenate the streams into a single stream. This can be
  235 + // useful when working with files that split content streams in arbitrary spots, such as in the
  236 + // middle of a token, as that can confuse some software. You could also call this after calling
283 // addPageContents. 237 // addPageContents.
284 QPDF_DLL 238 QPDF_DLL
285 void coalesceContentStreams(); 239 void coalesceContentStreams();
@@ -288,25 +242,21 @@ class QPDFPageObjectHelper: public QPDFObjectHelper @@ -288,25 +242,21 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
288 // Content stream handling 242 // Content stream handling
289 // 243 //
290 244
291 - // Parse a page's contents through ParserCallbacks, described  
292 - // above. This method works whether the contents are a single  
293 - // stream or an array of streams. Call on a page object. Also  
294 - // works for form XObjects. 245 + // Parse a page's contents through ParserCallbacks, described above. This method works whether
  246 + // the contents are a single stream or an array of streams. Call on a page object. Also works
  247 + // for form XObjects.
295 QPDF_DLL 248 QPDF_DLL
296 void parseContents(QPDFObjectHandle::ParserCallbacks* callbacks); 249 void parseContents(QPDFObjectHandle::ParserCallbacks* callbacks);
297 // Old name 250 // Old name
298 QPDF_DLL 251 QPDF_DLL
299 void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks); 252 void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks);
300 253
301 - // Pass a page's or form XObject's contents through the given  
302 - // TokenFilter. If a pipeline is also provided, it will be the  
303 - // target of the write methods from the token filter. If a  
304 - // pipeline is not specified, any output generated by the token  
305 - // filter will be discarded. Use this interface if you need to  
306 - // pass a page's contents through filter for work purposes without  
307 - // having that filter automatically applied to the page's  
308 - // contents, as happens with addContentTokenFilter. See  
309 - // examples/pdf-count-strings.cc for an example. 254 + // Pass a page's or form XObject's contents through the given TokenFilter. If a pipeline is also
  255 + // provided, it will be the target of the write methods from the token filter. If a pipeline is
  256 + // not specified, any output generated by the token filter will be discarded. Use this interface
  257 + // if you need to pass a page's contents through filter for work purposes without having that
  258 + // filter automatically applied to the page's contents, as happens with addContentTokenFilter.
  259 + // See examples/pdf-count-strings.cc for an example.
310 QPDF_DLL 260 QPDF_DLL
311 void filterContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next = nullptr); 261 void filterContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next = nullptr);
312 262
@@ -314,95 +264,74 @@ class QPDFPageObjectHelper: public QPDFObjectHelper @@ -314,95 +264,74 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
314 QPDF_DLL 264 QPDF_DLL
315 void filterPageContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next = nullptr); 265 void filterPageContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next = nullptr);
316 266
317 - // Pipe a page's contents through the given pipeline. This method  
318 - // works whether the contents are a single stream or an array of  
319 - // streams. Also works on form XObjects. 267 + // Pipe a page's contents through the given pipeline. This method works whether the contents are
  268 + // a single stream or an array of streams. Also works on form XObjects.
320 QPDF_DLL 269 QPDF_DLL
321 void pipeContents(Pipeline* p); 270 void pipeContents(Pipeline* p);
322 // Old name 271 // Old name
323 QPDF_DLL 272 QPDF_DLL
324 void pipePageContents(Pipeline* p); 273 void pipePageContents(Pipeline* p);
325 274
326 - // Attach a token filter to a page's contents. If the page's  
327 - // contents is an array of streams, it is automatically coalesced.  
328 - // The token filter is applied to the page's contents as a single 275 + // Attach a token filter to a page's contents. If the page's contents is an array of streams, it
  276 + // is automatically coalesced. The token filter is applied to the page's contents as a single
329 // stream. Also works on form XObjects. 277 // stream. Also works on form XObjects.
330 QPDF_DLL 278 QPDF_DLL
331 void addContentTokenFilter(std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter); 279 void addContentTokenFilter(std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter);
332 280
333 - // A page's resources dictionary maps names to objects elsewhere  
334 - // in the file. This method walks through a page's contents and  
335 - // keeps tracks of which resources are referenced somewhere in the  
336 - // contents. Then it removes from the resources dictionary any  
337 - // object that is not referenced in the contents. This operation  
338 - // is most useful after calling  
339 - // QPDFPageDocumentHelper::pushInheritedAttributesToPage(). This  
340 - // method is used by page splitting code to avoid copying unused  
341 - // objects in files that used shared resource dictionaries across  
342 - // multiple pages. This method recurses into form XObjects and can  
343 - // be called with a form XObject as well as a page. 281 + // A page's resources dictionary maps names to objects elsewhere in the file. This method walks
  282 + // through a page's contents and keeps tracks of which resources are referenced somewhere in the
  283 + // contents. Then it removes from the resources dictionary any object that is not referenced in
  284 + // the contents. This operation is most useful after calling
  285 + // QPDFPageDocumentHelper::pushInheritedAttributesToPage(). This method is used by page
  286 + // splitting code to avoid copying unused objects in files that used shared resource
  287 + // dictionaries across multiple pages. This method recurses into form XObjects and can be called
  288 + // with a form XObject as well as a page.
344 QPDF_DLL 289 QPDF_DLL
345 void removeUnreferencedResources(); 290 void removeUnreferencedResources();
346 291
347 - // Return a new QPDFPageObjectHelper that is a duplicate of the  
348 - // page. The returned object is an indirect object that is ready  
349 - // to be inserted into the same or a different QPDF object using  
350 - // any of the addPage methods in QPDFPageDocumentHelper or QPDF.  
351 - // Without calling one of those methods, the page will not be  
352 - // added anywhere. The new page object shares all content streams  
353 - // and indirect object resources with the original page, so if you  
354 - // are going to modify the contents or other aspects of the page,  
355 - // you will need to handling copying of the component parts  
356 - // separately. 292 + // Return a new QPDFPageObjectHelper that is a duplicate of the page. The returned object is an
  293 + // indirect object that is ready to be inserted into the same or a different QPDF object using
  294 + // any of the addPage methods in QPDFPageDocumentHelper or QPDF. Without calling one of those
  295 + // methods, the page will not be added anywhere. The new page object shares all content streams
  296 + // and indirect object resources with the original page, so if you are going to modify the
  297 + // contents or other aspects of the page, you will need to handling copying of the component
  298 + // parts separately.
357 QPDF_DLL 299 QPDF_DLL
358 QPDFPageObjectHelper shallowCopyPage(); 300 QPDFPageObjectHelper shallowCopyPage();
359 301
360 - // Return a transformation matrix whose effect is the same as the  
361 - // page's /Rotate and /UserUnit parameters. If invert is true,  
362 - // return a matrix whose effect is the opposite. The regular  
363 - // matrix is suitable for taking something from this page to put  
364 - // elsewhere, and the second one is suitable for putting something  
365 - // else onto this page. The page's TrimBox is used as the bounding  
366 - // box for purposes of computing the matrix. 302 + // Return a transformation matrix whose effect is the same as the page's /Rotate and /UserUnit
  303 + // parameters. If invert is true, return a matrix whose effect is the opposite. The regular
  304 + // matrix is suitable for taking something from this page to put elsewhere, and the second one
  305 + // is suitable for putting something else onto this page. The page's TrimBox is used as the
  306 + // bounding box for purposes of computing the matrix.
367 QPDF_DLL 307 QPDF_DLL
368 QPDFObjectHandle::Matrix getMatrixForTransformations(bool invert = false); 308 QPDFObjectHandle::Matrix getMatrixForTransformations(bool invert = false);
369 309
370 - // Return a form XObject that draws this page. This is useful for  
371 - // n-up operations, underlay, overlay, thumbnail generation, or  
372 - // any other case in which it is useful to replicate the contents  
373 - // of a page in some other context. The dictionaries are shallow  
374 - // copies of the original page dictionary, and the contents are  
375 - // coalesced from the page's contents. The resulting object handle  
376 - // is not referenced anywhere. If handle_transformations is true,  
377 - // the resulting form XObject's /Matrix will be set to replicate  
378 - // rotation (/Rotate) and scaling (/UserUnit) in the page's  
379 - // dictionary. In this way, the page's transformations will be  
380 - // preserved when placing this object on another page. 310 + // Return a form XObject that draws this page. This is useful for n-up operations, underlay,
  311 + // overlay, thumbnail generation, or any other case in which it is useful to replicate the
  312 + // contents of a page in some other context. The dictionaries are shallow copies of the original
  313 + // page dictionary, and the contents are coalesced from the page's contents. The resulting
  314 + // object handle is not referenced anywhere. If handle_transformations is true, the resulting
  315 + // form XObject's /Matrix will be set to replicate rotation (/Rotate) and scaling (/UserUnit) in
  316 + // the page's dictionary. In this way, the page's transformations will be preserved when placing
  317 + // this object on another page.
381 QPDF_DLL 318 QPDF_DLL
382 QPDFObjectHandle getFormXObjectForPage(bool handle_transformations = true); 319 QPDFObjectHandle getFormXObjectForPage(bool handle_transformations = true);
383 320
384 - // Return content stream text that will place the given form  
385 - // XObject (fo) using the resource name "name" on this page  
386 - // centered within the given rectangle. If invert_transformations  
387 - // is true, the effect of any rotation (/Rotate) and scaling  
388 - // (/UserUnit) applied to the current page will be inverted in the  
389 - // form XObject placement. This will cause the form XObject's  
390 - // absolute orientation to be preserved. You could overlay one  
391 - // page on another by calling getFormXObjectForPage on the  
392 - // original page, QPDFObjectHandle::getUniqueResourceName on the  
393 - // destination page's Resources dictionary to generate a name for  
394 - // the resulting object, and calling placeFormXObject on the  
395 - // destination page. Then insert the new fo (or, if it comes from  
396 - // a different file, the result of calling copyForeignObject on  
397 - // it) into the resources dictionary using name, and append or  
398 - // prepend the content to the page's content streams. See the  
399 - // overlay/underlay code in qpdf.cc or  
400 - // examples/pdf-overlay-page.cc for an example. From qpdf 10.0.0,  
401 - // the allow_shrink and allow_expand parameters control whether  
402 - // the form XObject is allowed to be shrunk or expanded to stay  
403 - // within or maximally fill the destination rectangle. The default  
404 - // values are for backward compatibility with the pre-10.0.0  
405 - // behavior. 321 + // Return content stream text that will place the given form XObject (fo) using the resource
  322 + // name "name" on this page centered within the given rectangle. If invert_transformations is
  323 + // true, the effect of any rotation (/Rotate) and scaling (/UserUnit) applied to the current
  324 + // page will be inverted in the form XObject placement. This will cause the form XObject's
  325 + // absolute orientation to be preserved. You could overlay one page on another by calling
  326 + // getFormXObjectForPage on the original page, QPDFObjectHandle::getUniqueResourceName on the
  327 + // destination page's Resources dictionary to generate a name for the resulting object, and
  328 + // calling placeFormXObject on the destination page. Then insert the new fo (or, if it comes
  329 + // from a different file, the result of calling copyForeignObject on it) into the resources
  330 + // dictionary using name, and append or prepend the content to the page's content streams. See
  331 + // the overlay/underlay code in qpdf.cc or examples/pdf-overlay-page.cc for an example. From
  332 + // qpdf 10.0.0, the allow_shrink and allow_expand parameters control whether the form XObject is
  333 + // allowed to be shrunk or expanded to stay within or maximally fill the destination rectangle.
  334 + // The default values are for backward compatibility with the pre-10.0.0 behavior.
406 QPDF_DLL 335 QPDF_DLL
407 std::string placeFormXObject( 336 std::string placeFormXObject(
408 QPDFObjectHandle fo, 337 QPDFObjectHandle fo,
@@ -412,8 +341,7 @@ class QPDFPageObjectHelper: public QPDFObjectHelper @@ -412,8 +341,7 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
412 bool allow_shrink = true, 341 bool allow_shrink = true,
413 bool allow_expand = false); 342 bool allow_expand = false);
414 343
415 - // Alternative version that also fills in the transformation  
416 - // matrix that was used. 344 + // Alternative version that also fills in the transformation matrix that was used.
417 QPDF_DLL 345 QPDF_DLL
418 std::string placeFormXObject( 346 std::string placeFormXObject(
419 QPDFObjectHandle fo, 347 QPDFObjectHandle fo,
@@ -424,10 +352,9 @@ class QPDFPageObjectHelper: public QPDFObjectHelper @@ -424,10 +352,9 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
424 bool allow_shrink = true, 352 bool allow_shrink = true,
425 bool allow_expand = false); 353 bool allow_expand = false);
426 354
427 - // Return the transformation matrix that translates from the given  
428 - // form XObject's coordinate system into the given rectangular  
429 - // region on the page. The parameters have the same meaning as for  
430 - // placeFormXObject. 355 + // Return the transformation matrix that translates from the given form XObject's coordinate
  356 + // system into the given rectangular region on the page. The parameters have the same meaning as
  357 + // for placeFormXObject.
431 QPDF_DLL 358 QPDF_DLL
432 QPDFMatrix getMatrixForFormXObjectPlacement( 359 QPDFMatrix getMatrixForFormXObjectPlacement(
433 QPDFObjectHandle fo, 360 QPDFObjectHandle fo,
@@ -436,43 +363,32 @@ class QPDFPageObjectHelper: public QPDFObjectHelper @@ -436,43 +363,32 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
436 bool allow_shrink = true, 363 bool allow_shrink = true,
437 bool allow_expand = false); 364 bool allow_expand = false);
438 365
439 - // If a page is rotated using /Rotate in the page's dictionary,  
440 - // instead rotate the page by the same amount by altering the  
441 - // contents and removing the /Rotate key. This method adjusts the  
442 - // various page bounding boxes (/MediaBox, etc.) so that the page  
443 - // will have the same semantics. This can be useful to work around  
444 - // problems with PDF applications that can't properly handle  
445 - // rotated pages. If a QPDFAcroFormDocumentHelper is provided, it  
446 - // will be used for resolving any form fields that have to be  
447 - // rotated. If not, one will be created inside the function, which 366 + // If a page is rotated using /Rotate in the page's dictionary, instead rotate the page by the
  367 + // same amount by altering the contents and removing the /Rotate key. This method adjusts the
  368 + // various page bounding boxes (/MediaBox, etc.) so that the page will have the same semantics.
  369 + // This can be useful to work around problems with PDF applications that can't properly handle
  370 + // rotated pages. If a QPDFAcroFormDocumentHelper is provided, it will be used for resolving any
  371 + // form fields that have to be rotated. If not, one will be created inside the function, which
448 // is less efficient. 372 // is less efficient.
449 QPDF_DLL 373 QPDF_DLL
450 void flattenRotation(QPDFAcroFormDocumentHelper* afdh = nullptr); 374 void flattenRotation(QPDFAcroFormDocumentHelper* afdh = nullptr);
451 375
452 - // Copy annotations from another page into this page. The other  
453 - // page may be from the same QPDF or from a different QPDF. Each  
454 - // annotation's rectangle is transformed by the given matrix. If  
455 - // the annotation is a widget annotation that is associated with a  
456 - // form field, the form field is copied into this document's  
457 - // AcroForm dictionary as well. You can use this to copy  
458 - // annotations from a page that was converted to a form XObject  
459 - // and added to another page. For example of this, see  
460 - // examples/pdf-overlay-page.cc. This method calls  
461 - // QPDFAcroFormDocumentHelper::transformAnnotations, which will  
462 - // copy annotations and form fields so that you can copy  
463 - // annotations from a source page to any number of other pages,  
464 - // even with different matrices, and maintain independence from  
465 - // the original annotations. See also  
466 - // QPDFAcroFormDocumentHelper::fixCopiedAnnotations, which can be  
467 - // used if you copy a page and want to repair the annotations on  
468 - // the destination page to make them independent from the original  
469 - // page's annotations. 376 + // Copy annotations from another page into this page. The other page may be from the same QPDF
  377 + // or from a different QPDF. Each annotation's rectangle is transformed by the given matrix. If
  378 + // the annotation is a widget annotation that is associated with a form field, the form field is
  379 + // copied into this document's AcroForm dictionary as well. You can use this to copy annotations
  380 + // from a page that was converted to a form XObject and added to another page. For example of
  381 + // this, see examples/pdf-overlay-page.cc. This method calls
  382 + // QPDFAcroFormDocumentHelper::transformAnnotations, which will copy annotations and form fields
  383 + // so that you can copy annotations from a source page to any number of other pages, even with
  384 + // different matrices, and maintain independence from the original annotations. See also
  385 + // QPDFAcroFormDocumentHelper::fixCopiedAnnotations, which can be used if you copy a page and
  386 + // want to repair the annotations on the destination page to make them independent from the
  387 + // original page's annotations.
470 // 388 //
471 - // If you pass in a QPDFAcroFormDocumentHelper*, the method will  
472 - // use that instead of creating one in the function. Creating  
473 - // QPDFAcroFormDocumentHelper objects is expensive, so if you're  
474 - // doing a lot of copying, it can be more efficient to create  
475 - // these outside and pass them in. 389 + // If you pass in a QPDFAcroFormDocumentHelper*, the method will use that instead of creating
  390 + // one in the function. Creating QPDFAcroFormDocumentHelper objects is expensive, so if you're
  391 + // doing a lot of copying, it can be more efficient to create these outside and pass them in.
476 QPDF_DLL 392 QPDF_DLL
477 void copyAnnotations( 393 void copyAnnotations(
478 QPDFPageObjectHelper from_page, 394 QPDFPageObjectHelper from_page,
include/qpdf/QPDFTokenizer.hh
@@ -2,22 +2,19 @@ @@ -2,22 +2,19 @@
2 // 2 //
3 // This file is part of qpdf. 3 // This file is part of qpdf.
4 // 4 //
5 -// Licensed under the Apache License, Version 2.0 (the "License");  
6 -// you may not use this file except in compliance with the License.  
7 -// You may obtain a copy of the License at 5 +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
  6 +// in compliance with the License. You may obtain a copy of the License at
8 // 7 //
9 // http://www.apache.org/licenses/LICENSE-2.0 8 // http://www.apache.org/licenses/LICENSE-2.0
10 // 9 //
11 -// Unless required by applicable law or agreed to in writing, software  
12 -// distributed under the License is distributed on an "AS IS" BASIS,  
13 -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
14 -// See the License for the specific language governing permissions and  
15 -// limitations under the License. 10 +// Unless required by applicable law or agreed to in writing, software distributed under the License
  11 +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
  12 +// or implied. See the License for the specific language governing permissions and limitations under
  13 +// the License.
16 // 14 //
17 -// Versions of qpdf prior to version 7 were released under the terms  
18 -// of version 2.0 of the Artistic License. At your option, you may  
19 -// continue to consider qpdf to be licensed under those terms. Please  
20 -// see the manual for additional information. 15 +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
  16 +// License. At your option, you may continue to consider qpdf to be licensed under those terms.
  17 +// Please see the manual for additional information.
21 18
22 #ifndef QPDFTOKENIZER_HH 19 #ifndef QPDFTOKENIZER_HH
23 #define QPDFTOKENIZER_HH 20 #define QPDFTOKENIZER_HH
@@ -34,9 +31,8 @@ @@ -34,9 +31,8 @@
34 class QPDFTokenizer 31 class QPDFTokenizer
35 { 32 {
36 public: 33 public:
37 - // Token type tt_eof is only returned of allowEOF() is called on  
38 - // the tokenizer. tt_eof was introduced in QPDF version 4.1.  
39 - // tt_space, tt_comment, and tt_inline_image were added in QPDF 34 + // Token type tt_eof is only returned of allowEOF() is called on the tokenizer. tt_eof was
  35 + // introduced in QPDF version 4.1. tt_space, tt_comment, and tt_inline_image were added in QPDF
40 // version 8. 36 // version 8.
41 enum token_type_e { 37 enum token_type_e {
42 tt_bad, 38 tt_bad,
@@ -132,72 +128,65 @@ class QPDFTokenizer @@ -132,72 +128,65 @@ class QPDFTokenizer
132 QPDF_DLL 128 QPDF_DLL
133 QPDFTokenizer(); 129 QPDFTokenizer();
134 130
135 - // If called, treat EOF as a separate token type instead of an  
136 - // error. This was introduced in QPDF 4.1 to facilitate  
137 - // tokenizing content streams. 131 + // If called, treat EOF as a separate token type instead of an error. This was introduced in
  132 + // QPDF 4.1 to facilitate tokenizing content streams.
138 QPDF_DLL 133 QPDF_DLL
139 void allowEOF(); 134 void allowEOF();
140 135
141 - // If called, readToken will return "ignorable" tokens for space  
142 - // and comments. This was added in QPDF 8. 136 + // If called, readToken will return "ignorable" tokens for space and comments. This was added in
  137 + // QPDF 8.
143 QPDF_DLL 138 QPDF_DLL
144 void includeIgnorable(); 139 void includeIgnorable();
145 140
146 - // There are two modes of operation: push and pull. The pull  
147 - // method is easier but requires an input source. The push method  
148 - // is more complicated but can be used to tokenize a stream of 141 + // There are two modes of operation: push and pull. The pull method is easier but requires an
  142 + // input source. The push method is more complicated but can be used to tokenize a stream of
149 // incoming characters in a pipeline. 143 // incoming characters in a pipeline.
150 144
151 // Push mode: 145 // Push mode:
152 146
153 - // Keep presenting characters with presentCharacter() and  
154 - // presentEOF() and calling getToken() until getToken() returns  
155 - // true. When it does, be sure to check unread_ch and to unread ch  
156 - // if it is true. 147 + // Keep presenting characters with presentCharacter() and presentEOF() and calling getToken()
  148 + // until getToken() returns true. When it does, be sure to check unread_ch and to unread ch if
  149 + // it is true.
157 150
158 - // It these are called when a token is available, an exception  
159 - // will be thrown. 151 + // It these are called when a token is available, an exception will be thrown.
160 QPDF_DLL 152 QPDF_DLL
161 void presentCharacter(char ch); 153 void presentCharacter(char ch);
162 QPDF_DLL 154 QPDF_DLL
163 void presentEOF(); 155 void presentEOF();
164 156
165 - // If a token is available, return true and initialize token with  
166 - // the token, unread_char with whether or not we have to unread  
167 - // the last character, and if unread_char, ch with the character  
168 - // to unread. 157 + // If a token is available, return true and initialize token with the token, unread_char with
  158 + // whether or not we have to unread the last character, and if unread_char, ch with the
  159 + // character to unread.
169 QPDF_DLL 160 QPDF_DLL
170 bool getToken(Token& token, bool& unread_char, char& ch); 161 bool getToken(Token& token, bool& unread_char, char& ch);
171 162
172 - // This function returns true of the current character is between  
173 - // tokens (i.e., white space that is not part of a string) or is  
174 - // part of a comment. A tokenizing filter can call this to 163 + // This function returns true of the current character is between tokens (i.e., white space that
  164 + // is not part of a string) or is part of a comment. A tokenizing filter can call this to
175 // determine whether to output the character. 165 // determine whether to output the character.
176 QPDF_DLL 166 QPDF_DLL
177 bool betweenTokens(); 167 bool betweenTokens();
178 168
179 // Pull mode: 169 // Pull mode:
180 170
181 - // Read a token from an input source. Context describes the  
182 - // context in which the token is being read and is used in the  
183 - // exception thrown if there is an error. After a token is read,  
184 - // the position of the input source returned by input->tell()  
185 - // points to just after the token, and the input source's "last  
186 - // offset" as returned by input->getLastOffset() points to the 171 + // Read a token from an input source. Context describes the context in which the token is being
  172 + // read and is used in the exception thrown if there is an error. After a token is read, the
  173 + // position of the input source returned by input->tell() points to just after the token, and
  174 + // the input source's "last offset" as returned by input->getLastOffset() points to the
187 // beginning of the token. 175 // beginning of the token.
188 QPDF_DLL 176 QPDF_DLL
189 Token readToken( 177 Token readToken(
  178 + InputSource& input, std::string const& context, bool allow_bad = false, size_t max_len = 0);
  179 + QPDF_DLL
  180 + Token readToken(
190 std::shared_ptr<InputSource> input, 181 std::shared_ptr<InputSource> input,
191 std::string const& context, 182 std::string const& context,
192 bool allow_bad = false, 183 bool allow_bad = false,
193 size_t max_len = 0); 184 size_t max_len = 0);
194 185
195 - // Calling this method puts the tokenizer in a state for reading  
196 - // inline images. You should call this method after reading the  
197 - // character following the ID operator. In that state, it will  
198 - // return all data up to BUT NOT INCLUDING the next EI token.  
199 - // After you call this method, the next call to readToken (or the  
200 - // token created next time getToken returns true) will either be 186 + // Calling this method puts the tokenizer in a state for reading inline images. You should call
  187 + // this method after reading the character following the ID operator. In that state, it will
  188 + // return all data up to BUT NOT INCLUDING the next EI token. After you call this method, the
  189 + // next call to readToken (or the token created next time getToken returns true) will either be
201 // tt_inline_image or tt_bad. This is the only way readToken 190 // tt_inline_image or tt_bad. This is the only way readToken
202 // returns a tt_inline_image token. 191 // returns a tt_inline_image token.
203 QPDF_DLL 192 QPDF_DLL
@@ -206,21 +195,18 @@ class QPDFTokenizer @@ -206,21 +195,18 @@ class QPDFTokenizer
206 private: 195 private:
207 friend class QPDFParser; 196 friend class QPDFParser;
208 197
209 - // Read a token from an input source. Context describes the  
210 - // context in which the token is being read and is used in the  
211 - // exception thrown if there is an error. After a token is read,  
212 - // the position of the input source returned by input->tell()  
213 - // points to just after the token, and the input source's "last  
214 - // offset" as returned by input->getLastOffset() points to the  
215 - // beginning of the token. Returns false if the token is bad  
216 - // or if scanning produced an error message for any reason. 198 + // Read a token from an input source. Context describes the context in which the token is being
  199 + // read and is used in the exception thrown if there is an error. After a token is read, the
  200 + // position of the input source returned by input->tell() points to just after the token, and
  201 + // the input source's "last offset" as returned by input->getLastOffset() points to the
  202 + // beginning of the token. Returns false if the token is bad or if scanning produced an error
  203 + // message for any reason.
217 204
218 bool nextToken(InputSource& input, std::string const& context, size_t max_len = 0); 205 bool nextToken(InputSource& input, std::string const& context, size_t max_len = 0);
219 206
220 - // The following methods are only valid after nextToken has been called  
221 - // and until another QPDFTokenizer method is called. They allow the results  
222 - // of calling nextToken to be accessed without creating a Token, thus  
223 - // avoiding copying information that may not be needed. 207 + // The following methods are only valid after nextToken has been called and until another
  208 + // QPDFTokenizer method is called. They allow the results of calling nextToken to be accessed
  209 + // without creating a Token, thus avoiding copying information that may not be needed.
224 inline token_type_e getType() const noexcept; 210 inline token_type_e getType() const noexcept;
225 inline std::string const& getValue() const noexcept; 211 inline std::string const& getValue() const noexcept;
226 inline std::string const& getRawValue() const noexcept; 212 inline std::string const& getRawValue() const noexcept;
libqpdf/JSON.cc
@@ -218,13 +218,12 @@ JSON::encode_string(std::string const&amp; str) @@ -218,13 +218,12 @@ JSON::encode_string(std::string const&amp; str)
218 while (iter != end) { 218 while (iter != end) {
219 auto c = static_cast<unsigned char>(*iter); 219 auto c = static_cast<unsigned char>(*iter);
220 if ((c > 34 && c != '\\') || c == ' ' || c == 33) { 220 if ((c > 34 && c != '\\') || c == ' ' || c == 33) {
221 - // Optimistically check that no char in str requires escaping.  
222 - // Hopefully we can just return the input str. 221 + // Optimistically check that no char in str requires escaping. Hopefully we can just
  222 + // return the input str.
223 ++iter; 223 ++iter;
224 } else { 224 } else {
225 - // We found a char that requires escaping. Initialize result to the  
226 - // chars scanned so far, append/replace the rest of str one char at  
227 - // a time, and return the result. 225 + // We found a char that requires escaping. Initialize result to the chars scanned so
  226 + // far, append/replace the rest of str one char at a time, and return the result.
228 std::string result{begin, iter}; 227 std::string result{begin, iter};
229 228
230 for (; iter != end; ++iter) { 229 for (; iter != end; ++iter) {
@@ -532,12 +531,10 @@ JSON::checkSchemaInternal( @@ -532,12 +531,10 @@ JSON::checkSchemaInternal(
532 } else if (sch_arr) { 531 } else if (sch_arr) {
533 auto n_elements = sch_arr->elements.size(); 532 auto n_elements = sch_arr->elements.size();
534 if (n_elements == 1) { 533 if (n_elements == 1) {
535 - // A single-element array in the schema allows a single  
536 - // element in the object or a variable-length array, each  
537 - // of whose items must conform to the single element of  
538 - // the schema array. This doesn't apply to arrays of  
539 - // arrays -- we fall back to the behavior of allowing a  
540 - // single item only when the object is not an array. 534 + // A single-element array in the schema allows a single element in the object or a
  535 + // variable-length array, each of whose items must conform to the single element of the
  536 + // schema array. This doesn't apply to arrays of arrays -- we fall back to the behavior
  537 + // of allowing a single item only when the object is not an array.
541 if (this_arr) { 538 if (this_arr) {
542 int i = 0; 539 int i = 0;
543 for (auto const& element: this_arr->elements) { 540 for (auto const& element: this_arr->elements) {
@@ -560,10 +557,9 @@ JSON::checkSchemaInternal( @@ -560,10 +557,9 @@ JSON::checkSchemaInternal(
560 err_prefix + " is supposed to be an array of length " + std::to_string(n_elements)); 557 err_prefix + " is supposed to be an array of length " + std::to_string(n_elements));
561 return false; 558 return false;
562 } else { 559 } else {
563 - // A multi-element array in the schema must correspond to  
564 - // an element of the same length in the object. Each  
565 - // element in the object is validated against the  
566 - // corresponding element in the schema. 560 + // A multi-element array in the schema must correspond to an element of the same length
  561 + // in the object. Each element in the object is validated against the corresponding
  562 + // element in the schema.
567 size_t i = 0; 563 size_t i = 0;
568 for (auto const& element: this_arr->elements) { 564 for (auto const& element: this_arr->elements) {
569 checkSchemaInternal( 565 checkSchemaInternal(
@@ -701,8 +697,7 @@ JSONParser::handle_u_code( @@ -701,8 +697,7 @@ JSONParser::handle_u_code(
701 QTC::TC("libtests", "JSON 16 high high"); 697 QTC::TC("libtests", "JSON 16 high high");
702 throw std::runtime_error( 698 throw std::runtime_error(
703 "JSON: offset " + std::to_string(new_high_offset) + 699 "JSON: offset " + std::to_string(new_high_offset) +
704 - ": UTF-16 high surrogate found after previous high surrogate"  
705 - " at offset " + 700 + ": UTF-16 high surrogate found after previous high surrogate at offset " +
706 std::to_string(high_offset)); 701 std::to_string(high_offset));
707 } 702 }
708 high_offset = new_high_offset; 703 high_offset = new_high_offset;
@@ -713,8 +708,7 @@ JSONParser::handle_u_code( @@ -713,8 +708,7 @@ JSONParser::handle_u_code(
713 QTC::TC("libtests", "JSON 16 low not after high"); 708 QTC::TC("libtests", "JSON 16 low not after high");
714 throw std::runtime_error( 709 throw std::runtime_error(
715 "JSON: offset " + std::to_string(offset) + 710 "JSON: offset " + std::to_string(offset) +
716 - ": UTF-16 low surrogate found not immediately after high"  
717 - " surrogate"); 711 + ": UTF-16 low surrogate found not immediately after high surrogate");
718 } 712 }
719 high_offset = 0; 713 high_offset = 0;
720 codepoint = 0x10000U + ((high_surrogate & 0x3FFU) << 10U) + (codepoint & 0x3FF); 714 codepoint = 0x10000U + ((high_surrogate & 0x3FFU) << 10U) + (codepoint & 0x3FF);
@@ -797,8 +791,8 @@ JSONParser::append() @@ -797,8 +791,8 @@ JSONParser::append()
797 ++offset; 791 ++offset;
798 } 792 }
799 793
800 -// Append current character to token, advance to next input character and  
801 -// transition to 'next' lexer state. 794 +// Append current character to token, advance to next input character and transition to 'next' lexer
  795 +// state.
802 inline void 796 inline void
803 JSONParser::append(lex_state_e next) 797 JSONParser::append(lex_state_e next)
804 { 798 {
@@ -808,8 +802,7 @@ JSONParser::append(lex_state_e next) @@ -808,8 +802,7 @@ JSONParser::append(lex_state_e next)
808 ++offset; 802 ++offset;
809 } 803 }
810 804
811 -// Advance to next input character without appending the current character to  
812 -// token. 805 +// Advance to next input character without appending the current character to token.
813 inline void 806 inline void
814 JSONParser::ignore() 807 JSONParser::ignore()
815 { 808 {
@@ -817,8 +810,8 @@ JSONParser::ignore() @@ -817,8 +810,8 @@ JSONParser::ignore()
817 ++offset; 810 ++offset;
818 } 811 }
819 812
820 -// Advance to next input character without appending the current character to  
821 -// token and transition to 'next' lexer state. 813 +// Advance to next input character without appending the current character to token and transition
  814 +// to 'next' lexer state.
822 inline void 815 inline void
823 JSONParser::ignore(lex_state_e next) 816 JSONParser::ignore(lex_state_e next)
824 { 817 {
@@ -848,9 +841,8 @@ JSONParser::getToken() @@ -848,9 +841,8 @@ JSONParser::getToken()
848 841
849 if ((*p < 32 && *p >= 0)) { 842 if ((*p < 32 && *p >= 0)) {
850 if (*p == '\t' || *p == '\n' || *p == '\r') { 843 if (*p == '\t' || *p == '\n' || *p == '\r') {
851 - // Legal white space not permitted in strings. This will always  
852 - // end the current token (unless we are still before the start  
853 - // of the token). 844 + // Legal white space not permitted in strings. This will always end the current
  845 + // token (unless we are still before the start of the token).
854 if (lex_state == ls_top) { 846 if (lex_state == ls_top) {
855 ignore(); 847 ignore();
856 } else { 848 } else {
@@ -1044,8 +1036,7 @@ JSONParser::getToken() @@ -1044,8 +1036,7 @@ JSONParser::getToken()
1044 QTC::TC("libtests", "JSON 16 dangling high"); 1036 QTC::TC("libtests", "JSON 16 dangling high");
1045 throw std::runtime_error( 1037 throw std::runtime_error(
1046 "JSON: offset " + std::to_string(high_offset) + 1038 "JSON: offset " + std::to_string(high_offset) +
1047 - ": UTF-16 high surrogate not followed by low "  
1048 - "surrogate"); 1039 + ": UTF-16 high surrogate not followed by low surrogate");
1049 } 1040 }
1050 ignore(); 1041 ignore();
1051 return; 1042 return;
@@ -1062,8 +1053,7 @@ JSONParser::getToken() @@ -1062,8 +1053,7 @@ JSONParser::getToken()
1062 case '\\': 1053 case '\\':
1063 case '\"': 1054 case '\"':
1064 case '/': 1055 case '/':
1065 - // \/ is allowed in json input, but so is /, so we  
1066 - // don't map / to \/ in output. 1056 + // \/ is allowed in json input, but so is /, so we don't map / to \/ in output.
1067 token += *p; 1057 token += *p;
1068 break; 1058 break;
1069 case 'b': 1059 case 'b':
@@ -1113,8 +1103,8 @@ JSONParser::getToken() @@ -1113,8 +1103,8 @@ JSONParser::getToken()
1113 } 1103 }
1114 } 1104 }
1115 1105
1116 - // We only get here if on end of input or if the last character was a  
1117 - // control character or other delimiter. 1106 + // We only get here if on end of input or if the last character was a control character or other
  1107 + // delimiter.
1118 1108
1119 if (!token.empty()) { 1109 if (!token.empty()) {
1120 switch (lex_state) { 1110 switch (lex_state) {
@@ -1189,8 +1179,7 @@ JSONParser::handleToken() @@ -1189,8 +1179,7 @@ JSONParser::handleToken()
1189 } else if (parser_state == ps_array_after_item) { 1179 } else if (parser_state == ps_array_after_item) {
1190 parser_state = ps_array_after_comma; 1180 parser_state = ps_array_after_comma;
1191 } else { 1181 } else {
1192 - throw std::logic_error("JSONParser::handleToken: unexpected parser"  
1193 - " state for comma"); 1182 + throw std::logic_error("JSONParser::handleToken: unexpected parser state for comma");
1194 } 1183 }
1195 return; 1184 return;
1196 1185
@@ -1323,10 +1312,9 @@ JSONParser::handleToken() @@ -1323,10 +1312,9 @@ JSONParser::handleToken()
1323 1312
1324 if (item.isDictionary() || item.isArray()) { 1313 if (item.isDictionary() || item.isArray()) {
1325 stack.push_back({parser_state, item}); 1314 stack.push_back({parser_state, item});
1326 - // Calling container start method is postponed until after  
1327 - // adding the containers to their parent containers, if any.  
1328 - // This makes it much easier to keep track of the current  
1329 - // nesting level. 1315 + // Calling container start method is postponed until after adding the containers to their
  1316 + // parent containers, if any. This makes it much easier to keep track of the current nesting
  1317 + // level.
1330 if (item.isDictionary()) { 1318 if (item.isDictionary()) {
1331 if (reactor) { 1319 if (reactor) {
1332 reactor->dictionaryStart(); 1320 reactor->dictionaryStart();
libqpdf/Pl_Buffer.cc
@@ -13,8 +13,7 @@ Pl_Buffer::Pl_Buffer(char const* identifier, Pipeline* next) : @@ -13,8 +13,7 @@ Pl_Buffer::Pl_Buffer(char const* identifier, Pipeline* next) :
13 13
14 Pl_Buffer::~Pl_Buffer() 14 Pl_Buffer::~Pl_Buffer()
15 { 15 {
16 - // Must be explicit and not inline -- see QPDF_DLL_CLASS in  
17 - // README-maintainer 16 + // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
18 } 17 }
19 18
20 void 19 void
libqpdf/QPDF.cc
@@ -32,8 +32,8 @@ @@ -32,8 +32,8 @@
32 #include <qpdf/QTC.hh> 32 #include <qpdf/QTC.hh>
33 #include <qpdf/QUtil.hh> 33 #include <qpdf/QUtil.hh>
34 34
35 -// This must be a fixed value. This API returns a const reference to  
36 -// it, and the C API relies on its being static as well. 35 +// This must be a fixed value. This API returns a const reference to it, and the C API relies on its
  36 +// being static as well.
37 std::string const QPDF::qpdf_version(QPDF_VERSION); 37 std::string const QPDF::qpdf_version(QPDF_VERSION);
38 38
39 static char const* EMPTY_PDF = ( 39 static char const* EMPTY_PDF = (
@@ -212,33 +212,26 @@ QPDF::QPDF() : @@ -212,33 +212,26 @@ QPDF::QPDF() :
212 m(new Members()) 212 m(new Members())
213 { 213 {
214 m->tokenizer.allowEOF(); 214 m->tokenizer.allowEOF();
215 - // Generate a unique ID. It just has to be unique among all QPDF  
216 - // objects allocated throughout the lifetime of this running  
217 - // application. 215 + // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout
  216 + // the lifetime of this running application.
218 static std::atomic<unsigned long long> unique_id{0}; 217 static std::atomic<unsigned long long> unique_id{0};
219 m->unique_id = unique_id.fetch_add(1ULL); 218 m->unique_id = unique_id.fetch_add(1ULL);
220 } 219 }
221 220
222 QPDF::~QPDF() 221 QPDF::~QPDF()
223 { 222 {
224 - // If two objects are mutually referential (through each object  
225 - // having an array or dictionary that contains an indirect  
226 - // reference to the other), the circular references in the  
227 - // std::shared_ptr objects will prevent the objects from being  
228 - // deleted. Walk through all objects in the object cache, which is  
229 - // those objects that we read from the file, and break all  
230 - // resolved indirect references by replacing them with an internal  
231 - // object type representing that they have been destroyed. Note  
232 - // that we can't break references like this at any time when the  
233 - // QPDF object is active. The call to reset also causes all direct  
234 - // QPDFObjectHandle objects that are reachable from this object to  
235 - // release their association with this QPDF. Direct objects are  
236 - // not destroyed since they can be moved to other QPDF objects  
237 - // safely.  
238 -  
239 - // At this point, obviously no one is still using the QPDF object,  
240 - // but we'll explicitly clear the xref table anyway just to  
241 - // prevent any possibility of resolve() succeeding. 223 + // If two objects are mutually referential (through each object having an array or dictionary
  224 + // that contains an indirect reference to the other), the circular references in the
  225 + // std::shared_ptr objects will prevent the objects from being deleted. Walk through all objects
  226 + // in the object cache, which is those objects that we read from the file, and break all
  227 + // resolved indirect references by replacing them with an internal object type representing that
  228 + // they have been destroyed. Note that we can't break references like this at any time when the
  229 + // QPDF object is active. The call to reset also causes all direct QPDFObjectHandle objects that
  230 + // are reachable from this object to release their association with this QPDF. Direct objects
  231 + // are not destroyed since they can be moved to other QPDF objects safely.
  232 +
  233 + // At this point, obviously no one is still using the QPDF object, but we'll explicitly clear
  234 + // the xref table anyway just to prevent any possibility of resolve() succeeding.
242 m->xref_table.clear(); 235 m->xref_table.clear();
243 for (auto const& iter: m->obj_cache) { 236 for (auto const& iter: m->obj_cache) {
244 iter.second.object->disconnect(); 237 iter.second.object->disconnect();
@@ -406,18 +399,15 @@ QPDF::findHeader() @@ -406,18 +399,15 @@ QPDF::findHeader()
406 } 399 }
407 p += 5; 400 p += 5;
408 std::string version; 401 std::string version;
409 - // Note: The string returned by line.c_str() is always  
410 - // null-terminated. The code below never overruns the buffer  
411 - // because a null character always short-circuits further  
412 - // advancement. 402 + // Note: The string returned by line.c_str() is always null-terminated. The code below never
  403 + // overruns the buffer because a null character always short-circuits further advancement.
413 bool valid = validatePDFVersion(p, version); 404 bool valid = validatePDFVersion(p, version);
414 if (valid) { 405 if (valid) {
415 m->pdf_version = version; 406 m->pdf_version = version;
416 if (global_offset != 0) { 407 if (global_offset != 0) {
417 - // Empirical evidence strongly suggests that when there is  
418 - // leading material prior to the PDF header, all explicit  
419 - // offsets in the file are such that 0 points to the  
420 - // beginning of the header. 408 + // Empirical evidence strongly suggests that when there is leading material prior to the
  409 + // PDF header, all explicit offsets in the file are such that 0 points to the beginning
  410 + // of the header.
421 QTC::TC("qpdf", "QPDF global offset"); 411 QTC::TC("qpdf", "QPDF global offset");
422 m->file = std::shared_ptr<InputSource>(new OffsetInputSource(m->file, global_offset)); 412 m->file = std::shared_ptr<InputSource>(new OffsetInputSource(m->file, global_offset));
423 } 413 }
@@ -448,14 +438,12 @@ QPDF::parse(char const* password) @@ -448,14 +438,12 @@ QPDF::parse(char const* password)
448 if (!m->file->findFirst("%PDF-", 0, 1024, hf)) { 438 if (!m->file->findFirst("%PDF-", 0, 1024, hf)) {
449 QTC::TC("qpdf", "QPDF not a pdf file"); 439 QTC::TC("qpdf", "QPDF not a pdf file");
450 warn(damagedPDF("", 0, "can't find PDF header")); 440 warn(damagedPDF("", 0, "can't find PDF header"));
451 - // QPDFWriter writes files that usually require at least  
452 - // version 1.2 for /FlateDecode 441 + // QPDFWriter writes files that usually require at least version 1.2 for /FlateDecode
453 m->pdf_version = "1.2"; 442 m->pdf_version = "1.2";
454 } 443 }
455 444
456 - // PDF spec says %%EOF must be found within the last 1024 bytes of  
457 - // the file. We add an extra 30 characters to leave room for the  
458 - // startxref stuff. 445 + // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra
  446 + // 30 characters to leave room for the startxref stuff.
459 m->file->seek(0, SEEK_END); 447 m->file->seek(0, SEEK_END);
460 qpdf_offset_t end_offset = m->file->tell(); 448 qpdf_offset_t end_offset = m->file->tell();
461 qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); 449 qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);
@@ -494,8 +482,8 @@ void @@ -494,8 +482,8 @@ void
494 QPDF::inParse(bool v) 482 QPDF::inParse(bool v)
495 { 483 {
496 if (m->in_parse == v) { 484 if (m->in_parse == v) {
497 - // This happens if QPDFParser::parse tries to  
498 - // resolve an indirect object while it is parsing. 485 + // This happens if QPDFParser::parse tries to resolve an indirect object while it is
  486 + // parsing.
499 throw std::logic_error("QPDF: re-entrant parsing detected. This is a qpdf bug." 487 throw std::logic_error("QPDF: re-entrant parsing detected. This is a qpdf bug."
500 " Please report at https://github.com/qpdf/qpdf/issues."); 488 " Please report at https://github.com/qpdf/qpdf/issues.");
501 } 489 }
@@ -518,7 +506,7 @@ QPDF::warn( @@ -518,7 +506,7 @@ QPDF::warn(
518 qpdf_offset_t offset, 506 qpdf_offset_t offset,
519 std::string const& message) 507 std::string const& message)
520 { 508 {
521 - warn(QPDFExc(error_code, this->getFilename(), object, offset, message)); 509 + warn(QPDFExc(error_code, getFilename(), object, offset, message));
522 } 510 }
523 511
524 void 512 void
@@ -534,9 +522,8 @@ void @@ -534,9 +522,8 @@ void
534 QPDF::reconstruct_xref(QPDFExc& e) 522 QPDF::reconstruct_xref(QPDFExc& e)
535 { 523 {
536 if (m->reconstructed_xref) { 524 if (m->reconstructed_xref) {
537 - // Avoid xref reconstruction infinite loops. This is getting  
538 - // very hard to reproduce because qpdf is throwing many fewer  
539 - // exceptions while parsing. Most situations are warnings now. 525 + // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because
  526 + // qpdf is throwing many fewer exceptions while parsing. Most situations are warnings now.
540 throw e; 527 throw e;
541 } 528 }
542 529
@@ -572,8 +559,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e) @@ -572,8 +559,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
572 QPDFTokenizer::Token t1 = readToken(m->file, MAX_LEN); 559 QPDFTokenizer::Token t1 = readToken(m->file, MAX_LEN);
573 qpdf_offset_t token_start = m->file->tell() - toO(t1.getValue().length()); 560 qpdf_offset_t token_start = m->file->tell() - toO(t1.getValue().length());
574 if (token_start >= next_line_start) { 561 if (token_start >= next_line_start) {
575 - // don't process yet -- wait until we get to the line  
576 - // containing this token 562 + // don't process yet -- wait until we get to the line containing this token
577 } else if (t1.isInteger()) { 563 } else if (t1.isInteger()) {
578 QPDFTokenizer::Token t2 = readToken(m->file, MAX_LEN); 564 QPDFTokenizer::Token t2 = readToken(m->file, MAX_LEN);
579 if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) { 565 if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) {
@@ -594,22 +580,18 @@ QPDF::reconstruct_xref(QPDFExc&amp; e) @@ -594,22 +580,18 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
594 } 580 }
595 581
596 if (!m->trailer.isInitialized()) { 582 if (!m->trailer.isInitialized()) {
597 - // We could check the last encountered object to see if it was  
598 - // an xref stream. If so, we could try to get the trailer  
599 - // from there. This may make it possible to recover files  
600 - // with bad startxref pointers even when they have object  
601 - // streams. 583 + // We could check the last encountered object to see if it was an xref stream. If so, we
  584 + // could try to get the trailer from there. This may make it possible to recover files with
  585 + // bad startxref pointers even when they have object streams.
602 586
603 throw damagedPDF("", 0, "unable to find trailer dictionary while recovering damaged file"); 587 throw damagedPDF("", 0, "unable to find trailer dictionary while recovering damaged file");
604 } 588 }
605 589
606 - // We could iterate through the objects looking for streams and  
607 - // try to find objects inside of them, but it's probably not worth  
608 - // the trouble. Acrobat can't recover files with any errors in an  
609 - // xref stream, and this would be a real long shot anyway. If we  
610 - // wanted to do anything that involved looking at stream contents,  
611 - // we'd also have to call initializeEncryption() here. It's safe  
612 - // to call it more than once. 590 + // We could iterate through the objects looking for streams and try to find objects inside of
  591 + // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors
  592 + // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything
  593 + // that involved looking at stream contents, we'd also have to call initializeEncryption() here.
  594 + // It's safe to call it more than once.
613 } 595 }
614 596
615 void 597 void
@@ -622,12 +604,10 @@ QPDF::read_xref(qpdf_offset_t xref_offset) @@ -622,12 +604,10 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
622 char buf[7]; 604 char buf[7];
623 memset(buf, 0, sizeof(buf)); 605 memset(buf, 0, sizeof(buf));
624 m->file->seek(xref_offset, SEEK_SET); 606 m->file->seek(xref_offset, SEEK_SET);
625 - // Some files miss the mark a little with startxref. We could  
626 - // do a better job of searching in the neighborhood for  
627 - // something that looks like either an xref table or stream,  
628 - // but the simple heuristic of skipping whitespace can help  
629 - // with the xref table case and is harmless with the stream  
630 - // case. 607 + // Some files miss the mark a little with startxref. We could do a better job of searching
  608 + // in the neighborhood for something that looks like either an xref table or stream, but the
  609 + // simple heuristic of skipping whitespace can help with the xref table case and is harmless
  610 + // with the stream case.
631 bool done = false; 611 bool done = false;
632 bool skipped_space = false; 612 bool skipped_space = false;
633 while (!done) { 613 while (!done) {
@@ -646,9 +626,8 @@ QPDF::read_xref(qpdf_offset_t xref_offset) @@ -646,9 +626,8 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
646 } 626 }
647 627
648 m->file->read(buf, sizeof(buf) - 1); 628 m->file->read(buf, sizeof(buf) - 1);
649 - // The PDF spec says xref must be followed by a line  
650 - // terminator, but files exist in the wild where it is  
651 - // terminated by arbitrary whitespace. 629 + // The PDF spec says xref must be followed by a line terminator, but files exist in the wild
  630 + // where it is terminated by arbitrary whitespace.
652 if ((strncmp(buf, "xref", 4) == 0) && QUtil::is_space(buf[4])) { 631 if ((strncmp(buf, "xref", 4) == 0) && QUtil::is_space(buf[4])) {
653 if (skipped_space) { 632 if (skipped_space) {
654 QTC::TC("qpdf", "QPDF xref skipped space"); 633 QTC::TC("qpdf", "QPDF xref skipped space");
@@ -662,8 +641,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) @@ -662,8 +641,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
662 : (buf[4] == ' ') ? 2 641 : (buf[4] == ' ') ? 2
663 : 9999)); 642 : 9999));
664 int skip = 4; 643 int skip = 4;
665 - // buf is null-terminated, and QUtil::is_space('\0') is  
666 - // false, so this won't overrun. 644 + // buf is null-terminated, and QUtil::is_space('\0') is false, so this won't overrun.
667 while (QUtil::is_space(buf[skip])) { 645 while (QUtil::is_space(buf[skip])) {
668 ++skip; 646 ++skip;
669 } 647 }
@@ -697,16 +675,16 @@ QPDF::read_xref(qpdf_offset_t xref_offset) @@ -697,16 +675,16 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
697 ") is not one plus the highest object number (" + std::to_string(max_obj) + ")"))); 675 ") is not one plus the highest object number (" + std::to_string(max_obj) + ")")));
698 } 676 }
699 677
700 - // We no longer need the deleted_objects table, so go ahead and  
701 - // clear it out to make sure we never depend on its being set. 678 + // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we
  679 + // never depend on its being set.
702 m->deleted_objects.clear(); 680 m->deleted_objects.clear();
703 } 681 }
704 682
705 bool 683 bool
706 QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes) 684 QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes)
707 { 685 {
708 - // is_space and is_digit both return false on '\0', so this will  
709 - // not overrun the null-terminated buffer. 686 + // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
  687 + // buffer.
710 char const* p = line.c_str(); 688 char const* p = line.c_str();
711 char const* start = line.c_str(); 689 char const* start = line.c_str();
712 690
@@ -753,8 +731,8 @@ QPDF::parse_xrefFirst(std::string const&amp; line, int&amp; obj, int&amp; num, int&amp; bytes) @@ -753,8 +731,8 @@ QPDF::parse_xrefFirst(std::string const&amp; line, int&amp; obj, int&amp; num, int&amp; bytes)
753 bool 731 bool
754 QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& type) 732 QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& type)
755 { 733 {
756 - // is_space and is_digit both return false on '\0', so this will  
757 - // not overrun the null-terminated buffer. 734 + // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
  735 + // buffer.
758 char const* p = line.c_str(); 736 char const* p = line.c_str();
759 737
760 // Skip zero or more spaces. There aren't supposed to be any. 738 // Skip zero or more spaces. There aren't supposed to be any.
@@ -862,8 +840,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) @@ -862,8 +840,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
862 "xref table", "invalid xref entry (obj=" + std::to_string(i) + ")"); 840 "xref table", "invalid xref entry (obj=" + std::to_string(i) + ")");
863 } 841 }
864 if (type == 'f') { 842 if (type == 'f') {
865 - // Save deleted items until after we've checked the  
866 - // XRefStm, if any. 843 + // Save deleted items until after we've checked the XRefStm, if any.
867 deleted_items.push_back(QPDFObjGen(toI(i), f2)); 844 deleted_items.push_back(QPDFObjGen(toI(i), f2));
868 } else { 845 } else {
869 insertXrefEntry(toI(i), 1, f1, f2); 846 insertXrefEntry(toI(i), 1, f1, f2);
@@ -902,9 +879,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) @@ -902,9 +879,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
902 QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer"); 879 QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer");
903 } else { 880 } else {
904 if (cur_trailer.getKey("/XRefStm").isInteger()) { 881 if (cur_trailer.getKey("/XRefStm").isInteger()) {
905 - // Read the xref stream but disregard any return value  
906 - // -- we'll use our trailer's /Prev key instead of the  
907 - // xref stream's. 882 + // Read the xref stream but disregard any return value -- we'll use our trailer's
  883 + // /Prev key instead of the xref stream's.
908 (void)read_xrefStream(cur_trailer.getKey("/XRefStm").getIntValue()); 884 (void)read_xrefStream(cur_trailer.getKey("/XRefStm").getIntValue());
909 } else { 885 } else {
910 throw damagedPDF("xref stream", xref_offset, "invalid /XRefStm"); 886 throw damagedPDF("xref stream", xref_offset, "invalid /XRefStm");
@@ -1035,8 +1011,8 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj) @@ -1035,8 +1011,8 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj)
1035 num_entries += toS(indx.at(i)); 1011 num_entries += toS(indx.at(i));
1036 } 1012 }
1037 1013
1038 - // entry_size and num_entries have both been validated to ensure  
1039 - // that this multiplication does not cause an overflow. 1014 + // entry_size and num_entries have both been validated to ensure that this multiplication does
  1015 + // not cause an overflow.
1040 size_t expected_size = entry_size * num_entries; 1016 size_t expected_size = entry_size * num_entries;
1041 1017
1042 std::shared_ptr<Buffer> bp = xref_obj.getStreamData(qpdf_dl_specialized); 1018 std::shared_ptr<Buffer> bp = xref_obj.getStreamData(qpdf_dl_specialized);
@@ -1060,9 +1036,8 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj) @@ -1060,9 +1036,8 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj)
1060 1036
1061 bool saw_first_compressed_object = false; 1037 bool saw_first_compressed_object = false;
1062 1038
1063 - // Actual size vs. expected size check above ensures that we will  
1064 - // not overflow any buffers here. We know that entry_size *  
1065 - // num_entries is equal to the size of the buffer. 1039 + // Actual size vs. expected size check above ensures that we will not overflow any buffers here.
  1040 + // We know that entry_size * num_entries is equal to the size of the buffer.
1066 unsigned char const* data = bp->getBuffer(); 1041 unsigned char const* data = bp->getBuffer();
1067 for (size_t i = 0; i < num_entries; ++i) { 1042 for (size_t i = 0; i < num_entries; ++i) {
1068 // Read this entry 1043 // Read this entry
@@ -1081,17 +1056,15 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj) @@ -1081,17 +1056,15 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj)
1081 } 1056 }
1082 } 1057 }
1083 1058
1084 - // Get the object and generation number. The object number is  
1085 - // based on /Index. The generation number is 0 unless this is  
1086 - // an uncompressed object record, in which case the generation  
1087 - // number appears as the third field. 1059 + // Get the object and generation number. The object number is based on /Index. The
  1060 + // generation number is 0 unless this is an uncompressed object record, in which case the
  1061 + // generation number appears as the third field.
1088 int obj = toI(indx.at(cur_chunk)); 1062 int obj = toI(indx.at(cur_chunk));
1089 if ((obj < 0) || ((std::numeric_limits<int>::max() - obj) < chunk_count)) { 1063 if ((obj < 0) || ((std::numeric_limits<int>::max() - obj) < chunk_count)) {
1090 std::ostringstream msg; 1064 std::ostringstream msg;
1091 msg.imbue(std::locale::classic()); 1065 msg.imbue(std::locale::classic());
1092 msg << "adding " << chunk_count << " to " << obj 1066 msg << "adding " << chunk_count << " to " << obj
1093 - << " while computing index in xref stream would cause"  
1094 - << " an integer overflow"; 1067 + << " while computing index in xref stream would cause an integer overflow";
1095 throw std::range_error(msg.str()); 1068 throw std::range_error(msg.str());
1096 } 1069 }
1097 obj += chunk_count; 1070 obj += chunk_count;
@@ -1113,10 +1086,8 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj) @@ -1113,10 +1086,8 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj)
1113 m->first_xref_item_offset = xref_offset; 1086 m->first_xref_item_offset = xref_offset;
1114 } 1087 }
1115 if (fields[0] == 0) { 1088 if (fields[0] == 0) {
1116 - // Ignore fields[2], which we don't care about in this  
1117 - // case. This works around the issue of some PDF files  
1118 - // that put invalid values, like -1, here for deleted  
1119 - // objects. 1089 + // Ignore fields[2], which we don't care about in this case. This works around the issue
  1090 + // of some PDF files that put invalid values, like -1, here for deleted objects.
1120 fields[2] = 0; 1091 fields[2] = 0;
1121 } 1092 }
1122 insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2])); 1093 insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2]));
@@ -1143,17 +1114,14 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj) @@ -1143,17 +1114,14 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj)
1143 void 1114 void
1144 QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite) 1115 QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite)
1145 { 1116 {
1146 - // Populate the xref table in such a way that the first reference  
1147 - // to an object that we see, which is the one in the latest xref  
1148 - // table in which it appears, is the one that gets stored. This  
1149 - // works because we are reading more recent appends before older  
1150 - // ones. Exception: if overwrite is true, then replace any  
1151 - // existing object. This is used in xref recovery mode, which  
1152 - // reads the file from beginning to end.  
1153 -  
1154 - // If there is already an entry for this object and generation in  
1155 - // the table, it means that a later xref table has registered this  
1156 - // object. Disregard this one. 1117 + // Populate the xref table in such a way that the first reference to an object that we see,
  1118 + // which is the one in the latest xref table in which it appears, is the one that gets stored.
  1119 + // This works because we are reading more recent appends before older ones. Exception: if
  1120 + // overwrite is true, then replace any existing object. This is used in xref recovery mode,
  1121 + // which reads the file from beginning to end.
  1122 +
  1123 + // If there is already an entry for this object and generation in the table, it means that a
  1124 + // later xref table has registered this object. Disregard this one.
1157 { // private scope 1125 { // private scope
1158 int gen = (f0 == 2 ? 0 : f2); 1126 int gen = (f0 == 2 ? 0 : f2);
1159 QPDFObjGen og(obj, gen); 1127 QPDFObjGen og(obj, gen);
@@ -1220,8 +1188,8 @@ QPDF::showXRefTable() @@ -1220,8 +1188,8 @@ QPDF::showXRefTable()
1220 } 1188 }
1221 } 1189 }
1222 1190
1223 -// Resolve all objects in the xref table. If this triggers a xref table  
1224 -// reconstruction abort and return false. Otherwise return true. 1191 +// Resolve all objects in the xref table. If this triggers a xref table reconstruction abort and
  1192 +// return false. Otherwise return true.
1225 bool 1193 bool
1226 QPDF::resolveXRefTable() 1194 QPDF::resolveXRefTable()
1227 { 1195 {
@@ -1237,8 +1205,8 @@ QPDF::resolveXRefTable() @@ -1237,8 +1205,8 @@ QPDF::resolveXRefTable()
1237 return true; 1205 return true;
1238 } 1206 }
1239 1207
1240 -// Ensure all objects in the pdf file, including those in indirect  
1241 -// references, appear in the object cache. 1208 +// Ensure all objects in the pdf file, including those in indirect references, appear in the object
  1209 +// cache.
1242 void 1210 void
1243 QPDF::fixDanglingReferences(bool force) 1211 QPDF::fixDanglingReferences(bool force)
1244 { 1212 {
@@ -1255,10 +1223,9 @@ QPDF::fixDanglingReferences(bool force) @@ -1255,10 +1223,9 @@ QPDF::fixDanglingReferences(bool force)
1255 size_t 1223 size_t
1256 QPDF::getObjectCount() 1224 QPDF::getObjectCount()
1257 { 1225 {
1258 - // This method returns the next available indirect object number.  
1259 - // makeIndirectObject uses it for this purpose. After  
1260 - // fixDanglingReferences is called, all objects in the xref table  
1261 - // will also be in obj_cache. 1226 + // This method returns the next available indirect object number. makeIndirectObject uses it for
  1227 + // this purpose. After fixDanglingReferences is called, all objects in the xref table will also
  1228 + // be in obj_cache.
1262 fixDanglingReferences(); 1229 fixDanglingReferences();
1263 QPDFObjGen og; 1230 QPDFObjGen og;
1264 if (!m->obj_cache.empty()) { 1231 if (!m->obj_cache.empty()) {
@@ -1270,8 +1237,7 @@ QPDF::getObjectCount() @@ -1270,8 +1237,7 @@ QPDF::getObjectCount()
1270 std::vector<QPDFObjectHandle> 1237 std::vector<QPDFObjectHandle>
1271 QPDF::getAllObjects() 1238 QPDF::getAllObjects()
1272 { 1239 {
1273 - // After fixDanglingReferences is called, all objects are in the  
1274 - // object cache. 1240 + // After fixDanglingReferences is called, all objects are in the object cache.
1275 fixDanglingReferences(); 1241 fixDanglingReferences();
1276 std::vector<QPDFObjectHandle> result; 1242 std::vector<QPDFObjectHandle> result;
1277 for (auto const& iter: m->obj_cache) { 1243 for (auto const& iter: m->obj_cache) {
@@ -1315,34 +1281,27 @@ QPDF::readObject( @@ -1315,34 +1281,27 @@ QPDF::readObject(
1315 auto object = QPDFParser(input, m->last_object_description, m->tokenizer, decrypter, this) 1281 auto object = QPDFParser(input, m->last_object_description, m->tokenizer, decrypter, this)
1316 .parse(empty, false); 1282 .parse(empty, false);
1317 if (empty) { 1283 if (empty) {
1318 - // Nothing in the PDF spec appears to allow empty objects, but  
1319 - // they have been encountered in actual PDF files and Adobe  
1320 - // Reader appears to ignore them. 1284 + // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
  1285 + // actual PDF files and Adobe Reader appears to ignore them.
1321 warn(damagedPDF(input, input->getLastOffset(), "empty object treated as null")); 1286 warn(damagedPDF(input, input->getLastOffset(), "empty object treated as null"));
1322 } else if (object.isDictionary() && (!in_object_stream)) { 1287 } else if (object.isDictionary() && (!in_object_stream)) {
1323 // check for stream 1288 // check for stream
1324 qpdf_offset_t cur_offset = input->tell(); 1289 qpdf_offset_t cur_offset = input->tell();
1325 if (readToken(input).isWord("stream")) { 1290 if (readToken(input).isWord("stream")) {
1326 - // The PDF specification states that the word "stream"  
1327 - // should be followed by either a carriage return and  
1328 - // a newline or by a newline alone. It specifically  
1329 - // disallowed following it by a carriage return alone  
1330 - // since, in that case, there would be no way to tell  
1331 - // whether the NL in a CR NL sequence was part of the  
1332 - // stream data. However, some readers, including  
1333 - // Adobe reader, accept a carriage return by itself  
1334 - // when followed by a non-newline character, so that's  
1335 - // what we do here. We have also seen files that have  
1336 - // extraneous whitespace between the stream keyword and  
1337 - // the newline. 1291 + // The PDF specification states that the word "stream" should be followed by either a
  1292 + // carriage return and a newline or by a newline alone. It specifically disallowed
  1293 + // following it by a carriage return alone since, in that case, there would be no way to
  1294 + // tell whether the NL in a CR NL sequence was part of the stream data. However, some
  1295 + // readers, including Adobe reader, accept a carriage return by itself when followed by
  1296 + // a non-newline character, so that's what we do here. We have also seen files that have
  1297 + // extraneous whitespace between the stream keyword and the newline.
1338 bool done = false; 1298 bool done = false;
1339 while (!done) { 1299 while (!done) {
1340 done = true; 1300 done = true;
1341 char ch; 1301 char ch;
1342 if (input->read(&ch, 1) == 0) { 1302 if (input->read(&ch, 1) == 0) {
1343 - // A premature EOF here will result in some  
1344 - // other problem that will get reported at  
1345 - // another time. 1303 + // A premature EOF here will result in some other problem that will get reported
  1304 + // at another time.
1346 } else if (ch == '\n') { 1305 } else if (ch == '\n') {
1347 // ready to read stream data 1306 // ready to read stream data
1348 QTC::TC("qpdf", "QPDF stream with NL only"); 1307 QTC::TC("qpdf", "QPDF stream with NL only");
@@ -1353,10 +1312,8 @@ QPDF::readObject( @@ -1353,10 +1312,8 @@ QPDF::readObject(
1353 // Ready to read stream data 1312 // Ready to read stream data
1354 QTC::TC("qpdf", "QPDF stream with CRNL"); 1313 QTC::TC("qpdf", "QPDF stream with CRNL");
1355 } else { 1314 } else {
1356 - // Treat the \r by itself as the  
1357 - // whitespace after endstream and  
1358 - // start reading stream data in spite  
1359 - // of not having seen a newline. 1315 + // Treat the \r by itself as the whitespace after endstream and start
  1316 + // reading stream data in spite of not having seen a newline.
1360 QTC::TC("qpdf", "QPDF stream with CR only"); 1317 QTC::TC("qpdf", "QPDF stream with CR only");
1361 input->unreadCh(ch); 1318 input->unreadCh(ch);
1362 warn(damagedPDF( 1319 warn(damagedPDF(
@@ -1381,9 +1338,8 @@ QPDF::readObject( @@ -1381,9 +1338,8 @@ QPDF::readObject(
1381 } 1338 }
1382 } 1339 }
1383 1340
1384 - // Must get offset before accessing any additional  
1385 - // objects since resolving a previously unresolved  
1386 - // indirect object will change file position. 1341 + // Must get offset before accessing any additional objects since resolving a previously
  1342 + // unresolved indirect object will change file position.
1387 qpdf_offset_t stream_offset = input->tell(); 1343 qpdf_offset_t stream_offset = input->tell();
1388 size_t length = 0; 1344 size_t length = 0;
1389 1345
@@ -1427,8 +1383,7 @@ QPDF::readObject( @@ -1427,8 +1383,7 @@ QPDF::readObject(
1427 } 1383 }
1428 } 1384 }
1429 1385
1430 - // Override last_offset so that it points to the beginning of the  
1431 - // object we just read 1386 + // Override last_offset so that it points to the beginning of the object we just read
1432 input->setLastOffset(offset); 1387 input->setLastOffset(offset);
1433 return object; 1388 return object;
1434 } 1389 }
@@ -1449,8 +1404,7 @@ size_t @@ -1449,8 +1404,7 @@ size_t
1449 QPDF::recoverStreamLength( 1404 QPDF::recoverStreamLength(
1450 std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset) 1405 std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset)
1451 { 1406 {
1452 - // Try to reconstruct stream length by looking for  
1453 - // endstream or endobj 1407 + // Try to reconstruct stream length by looking for endstream or endobj
1454 warn(damagedPDF(input, stream_offset, "attempting to recover stream length")); 1408 warn(damagedPDF(input, stream_offset, "attempting to recover stream length"));
1455 1409
1456 PatternFinder ef(*this, &QPDF::findEndstream); 1410 PatternFinder ef(*this, &QPDF::findEndstream);
@@ -1481,9 +1435,8 @@ QPDF::recoverStreamLength( @@ -1481,9 +1435,8 @@ QPDF::recoverStreamLength(
1481 } 1435 }
1482 } 1436 }
1483 if (this_obj_offset && (this_og == og)) { 1437 if (this_obj_offset && (this_og == og)) {
1484 - // Well, we found endstream\nendobj within the space  
1485 - // allowed for this object, so we're probably in good  
1486 - // shape. 1438 + // Well, we found endstream\nendobj within the space allowed for this object, so we're
  1439 + // probably in good shape.
1487 } else { 1440 } else {
1488 QTC::TC("qpdf", "QPDF found wrong endstream in recovery"); 1441 QTC::TC("qpdf", "QPDF found wrong endstream in recovery");
1489 } 1442 }
@@ -1518,14 +1471,12 @@ QPDF::readObjectAtOffset( @@ -1518,14 +1471,12 @@ QPDF::readObjectAtOffset(
1518 { 1471 {
1519 bool check_og = true; 1472 bool check_og = true;
1520 if (exp_og.getObj() == 0) { 1473 if (exp_og.getObj() == 0) {
1521 - // This method uses an expect object ID of 0 to indicate that  
1522 - // we don't know or don't care what the actual object ID is at  
1523 - // this offset. This is true when we read the xref stream and  
1524 - // linearization hint streams. In this case, we don't verify  
1525 - // the expect object ID/generation against what was read from  
1526 - // the file. There is also no reason to attempt xref recovery  
1527 - // if we get a failure in this case since the read attempt was  
1528 - // not triggered by an xref lookup. 1474 + // This method uses an expect object ID of 0 to indicate that we don't know or don't care
  1475 + // what the actual object ID is at this offset. This is true when we read the xref stream
  1476 + // and linearization hint streams. In this case, we don't verify the expect object
  1477 + // ID/generation against what was read from the file. There is also no reason to attempt
  1478 + // xref recovery if we get a failure in this case since the read attempt was not triggered
  1479 + // by an xref lookup.
1529 check_og = false; 1480 check_og = false;
1530 try_recovery = false; 1481 try_recovery = false;
1531 } 1482 }
@@ -1535,11 +1486,9 @@ QPDF::readObjectAtOffset( @@ -1535,11 +1486,9 @@ QPDF::readObjectAtOffset(
1535 try_recovery = false; 1486 try_recovery = false;
1536 } 1487 }
1537 1488
1538 - // Special case: if offset is 0, just return null. Some PDF  
1539 - // writers, in particular "Mac OS X 10.7.5 Quartz PDFContext", may  
1540 - // store deleted objects in the xref table as "0000000000 00000  
1541 - // n", which is not correct, but it won't hurt anything for to  
1542 - // ignore these. 1489 + // Special case: if offset is 0, just return null. Some PDF writers, in particular
  1490 + // "Mac OS X 10.7.5 Quartz PDFContext", may store deleted objects in the xref table as
  1491 + // "0000000000 00000 n", which is not correct, but it won't hurt anything for to ignore these.
1543 if (offset == 0) { 1492 if (offset == 0) {
1544 QTC::TC("qpdf", "QPDF bogus 0 offset", 0); 1493 QTC::TC("qpdf", "QPDF bogus 0 offset", 0);
1545 warn(damagedPDF(0, "object has offset 0")); 1494 warn(damagedPDF(0, "object has offset 0"));
@@ -1579,8 +1528,7 @@ QPDF::readObjectAtOffset( @@ -1579,8 +1528,7 @@ QPDF::readObjectAtOffset(
1579 // Will be retried below 1528 // Will be retried below
1580 throw e; 1529 throw e;
1581 } else { 1530 } else {
1582 - // We can try reading the object anyway even if the ID  
1583 - // doesn't match. 1531 + // We can try reading the object anyway even if the ID doesn't match.
1584 warn(e); 1532 warn(e);
1585 } 1533 }
1586 } 1534 }
@@ -1617,16 +1565,13 @@ QPDF::readObjectAtOffset( @@ -1617,16 +1565,13 @@ QPDF::readObjectAtOffset(
1617 } 1565 }
1618 1566
1619 if (isUnresolved(og)) { 1567 if (isUnresolved(og)) {
1620 - // Store the object in the cache here so it gets cached  
1621 - // whether we first know the offset or whether we first know  
1622 - // the object ID and generation (in which we case we would get  
1623 - // here through resolve).  
1624 -  
1625 - // Determine the end offset of this object before and after  
1626 - // white space. We use these numbers to validate  
1627 - // linearization hint tables. Offsets and lengths of objects  
1628 - // may imply the end of an object to be anywhere between these  
1629 - // values. 1568 + // Store the object in the cache here so it gets cached whether we first know the offset or
  1569 + // whether we first know the object ID and generation (in which we case we would get here
  1570 + // through resolve).
  1571 +
  1572 + // Determine the end offset of this object before and after white space. We use these
  1573 + // numbers to validate linearization hint tables. Offsets and lengths of objects may imply
  1574 + // the end of an object to be anywhere between these values.
1630 qpdf_offset_t end_before_space = m->file->tell(); 1575 qpdf_offset_t end_before_space = m->file->tell();
1631 1576
1632 // skip over spaces 1577 // skip over spaces
@@ -1643,41 +1588,31 @@ QPDF::readObjectAtOffset( @@ -1643,41 +1588,31 @@ QPDF::readObjectAtOffset(
1643 } 1588 }
1644 qpdf_offset_t end_after_space = m->file->tell(); 1589 qpdf_offset_t end_after_space = m->file->tell();
1645 if (skip_cache_if_in_xref && m->xref_table.count(og)) { 1590 if (skip_cache_if_in_xref && m->xref_table.count(og)) {
1646 - // Ordinarily, an object gets read here when resolved  
1647 - // through xref table or stream. In the special case of  
1648 - // the xref stream and linearization hint tables, the  
1649 - // offset comes from another source. For the specific case  
1650 - // of xref streams, the xref stream is read and loaded  
1651 - // into the object cache very early in parsing.  
1652 - // Ordinarily, when a file is updated by appending, items  
1653 - // inserted into the xref table in later updates take  
1654 - // precedence over earlier items. In the special case of  
1655 - // reusing the object number previously used as the xref  
1656 - // stream, we have the following order of events: 1591 + // Ordinarily, an object gets read here when resolved through xref table or stream. In
  1592 + // the special case of the xref stream and linearization hint tables, the offset comes
  1593 + // from another source. For the specific case of xref streams, the xref stream is read
  1594 + // and loaded into the object cache very early in parsing. Ordinarily, when a file is
  1595 + // updated by appending, items inserted into the xref table in later updates take
  1596 + // precedence over earlier items. In the special case of reusing the object number
  1597 + // previously used as the xref stream, we have the following order of events:
1657 // 1598 //
1658 // * reused object gets loaded into the xref table 1599 // * reused object gets loaded into the xref table
1659 // * old object is read here while reading xref streams 1600 // * old object is read here while reading xref streams
1660 // * original xref entry is ignored (since already in xref table) 1601 // * original xref entry is ignored (since already in xref table)
1661 // 1602 //
1662 - // It is the second step that causes a problem. Even  
1663 - // though the xref table is correct in this case, the old  
1664 - // object is already in the cache and so effectively  
1665 - // prevails over the reused object. To work around this  
1666 - // issue, we have a special case for the xref stream (via  
1667 - // the skip_cache_if_in_xref): if the object is already in  
1668 - // the xref stream, don't cache what we read here. 1603 + // It is the second step that causes a problem. Even though the xref table is correct in
  1604 + // this case, the old object is already in the cache and so effectively prevails over
  1605 + // the reused object. To work around this issue, we have a special case for the xref
  1606 + // stream (via the skip_cache_if_in_xref): if the object is already in the xref stream,
  1607 + // don't cache what we read here.
1669 // 1608 //
1670 - // It is likely that the same bug may exist for  
1671 - // linearization hint tables, but the existing code uses  
1672 - // end_before_space and end_after_space from the cache, so  
1673 - // fixing that would require more significant rework. The  
1674 - // chances of a linearization hint stream being reused  
1675 - // seems smaller because the xref stream is probably the  
1676 - // highest object in the file and the linearization hint  
1677 - // stream would be some random place in the middle, so I'm  
1678 - // leaving that bug unfixed for now. If the bug were to be  
1679 - // fixed, we could use !check_og in place of  
1680 - // skip_cache_if_in_xref. 1609 + // It is likely that the same bug may exist for linearization hint tables, but the
  1610 + // existing code uses end_before_space and end_after_space from the cache, so fixing
  1611 + // that would require more significant rework. The chances of a linearization hint
  1612 + // stream being reused seems smaller because the xref stream is probably the highest
  1613 + // object in the file and the linearization hint stream would be some random place in
  1614 + // the middle, so I'm leaving that bug unfixed for now. If the bug were to be fixed, we
  1615 + // could use !check_og in place of skip_cache_if_in_xref.
1681 QTC::TC("qpdf", "QPDF skipping cache for known unchecked object"); 1616 QTC::TC("qpdf", "QPDF skipping cache for known unchecked object");
1682 } else { 1617 } else {
1683 updateCache(og, oh.getObj(), end_before_space, end_after_space); 1618 updateCache(og, oh.getObj(), end_before_space, end_after_space);
@@ -1695,9 +1630,8 @@ QPDF::resolve(QPDFObjGen og) @@ -1695,9 +1630,8 @@ QPDF::resolve(QPDFObjGen og)
1695 } 1630 }
1696 1631
1697 if (m->resolving.count(og)) { 1632 if (m->resolving.count(og)) {
1698 - // This can happen if an object references itself directly or  
1699 - // indirectly in some key that has to be resolved during  
1700 - // object parsing, such as stream length. 1633 + // This can happen if an object references itself directly or indirectly in some key that
  1634 + // has to be resolved during object parsing, such as stream length.
1701 QTC::TC("qpdf", "QPDF recursion loop in resolve"); 1635 QTC::TC("qpdf", "QPDF recursion loop in resolve");
1702 warn(damagedPDF("", "loop detected resolving object " + og.unparse(' '))); 1636 warn(damagedPDF("", "loop detected resolving object " + og.unparse(' ')));
1703 updateCache(og, QPDF_Null::create(), -1, -1); 1637 updateCache(og, QPDF_Null::create(), -1, -1);
@@ -1758,8 +1692,8 @@ QPDF::resolveObjectsInStream(int obj_stream_number) @@ -1758,8 +1692,8 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
1758 "supposed object stream " + std::to_string(obj_stream_number) + " is not a stream"); 1692 "supposed object stream " + std::to_string(obj_stream_number) + " is not a stream");
1759 } 1693 }
1760 1694
1761 - // For linearization data in the object, use the data from the  
1762 - // object stream for the objects in the stream. 1695 + // For linearization data in the object, use the data from the object stream for the objects in
  1696 + // the stream.
1763 QPDFObjGen stream_og(obj_stream_number, 0); 1697 QPDFObjGen stream_og(obj_stream_number, 0);
1764 qpdf_offset_t end_before_space = m->obj_cache[stream_og].end_before_space; 1698 qpdf_offset_t end_before_space = m->obj_cache[stream_og].end_before_space;
1765 qpdf_offset_t end_after_space = m->obj_cache[stream_og].end_after_space; 1699 qpdf_offset_t end_after_space = m->obj_cache[stream_og].end_after_space;
@@ -1804,11 +1738,10 @@ QPDF::resolveObjectsInStream(int obj_stream_number) @@ -1804,11 +1738,10 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
1804 offsets[num] = toI(offset + first); 1738 offsets[num] = toI(offset + first);
1805 } 1739 }
1806 1740
1807 - // To avoid having to read the object stream multiple times, store  
1808 - // all objects that would be found here in the cache. Remember  
1809 - // that some objects stored here might have been overridden by new  
1810 - // objects appended to the file, so it is necessary to recheck the  
1811 - // xref table and only cache what would actually be resolved here. 1741 + // To avoid having to read the object stream multiple times, store all objects that would be
  1742 + // found here in the cache. Remember that some objects stored here might have been overridden
  1743 + // by new objects appended to the file, so it is necessary to recheck the xref table and only
  1744 + // cache what would actually be resolved here.
1812 for (auto const& iter: offsets) { 1745 for (auto const& iter: offsets) {
1813 QPDFObjGen og(iter.first, 0); 1746 QPDFObjGen og(iter.first, 0);
1814 QPDFXRefEntry const& entry = m->xref_table[og]; 1747 QPDFXRefEntry const& entry = m->xref_table[og];
@@ -1936,8 +1869,7 @@ QPDF::reserveStream(QPDFObjGen const&amp; og) @@ -1936,8 +1869,7 @@ QPDF::reserveStream(QPDFObjGen const&amp; og)
1936 QPDFObjectHandle 1869 QPDFObjectHandle
1937 QPDF::getObject(QPDFObjGen const& og) 1870 QPDF::getObject(QPDFObjGen const& og)
1938 { 1871 {
1939 - // This method is called by the parser and therefore must not  
1940 - // resolve any objects. 1872 + // This method is called by the parser and therefore must not resolve any objects.
1941 if (!isCached(og)) { 1873 if (!isCached(og)) {
1942 m->obj_cache[og] = ObjCache(QPDF_Unresolved::create(this, og), -1, -1); 1874 m->obj_cache[og] = ObjCache(QPDF_Unresolved::create(this, og), -1, -1);
1943 } 1875 }
@@ -1991,48 +1923,38 @@ QPDF::copyForeignObject(QPDFObjectHandle foreign) @@ -1991,48 +1923,38 @@ QPDF::copyForeignObject(QPDFObjectHandle foreign)
1991 { 1923 {
1992 // Here's an explanation of what's going on here. 1924 // Here's an explanation of what's going on here.
1993 // 1925 //
1994 - // A QPDFObjectHandle that is an indirect object has an owning  
1995 - // QPDF. The object ID and generation refers to an object in the  
1996 - // owning QPDF. When we copy the QPDFObjectHandle from a foreign  
1997 - // QPDF into the local QPDF, we have to replace all indirect  
1998 - // object references with references to the corresponding object  
1999 - // in the local file. 1926 + // A QPDFObjectHandle that is an indirect object has an owning QPDF. The object ID and
  1927 + // generation refers to an object in the owning QPDF. When we copy the QPDFObjectHandle from a
  1928 + // foreign QPDF into the local QPDF, we have to replace all indirect object references with
  1929 + // references to the corresponding object in the local file.
2000 // 1930 //
2001 - // To do this, we maintain mappings from foreign object IDs to  
2002 - // local object IDs for each foreign QPDF that we are copying  
2003 - // from. The mapping is stored in an ObjCopier, which contains a 1931 + // To do this, we maintain mappings from foreign object IDs to local object IDs for each foreign
  1932 + // QPDF that we are copying from. The mapping is stored in an ObjCopier, which contains a
2004 // mapping from the foreign ObjGen to the local QPDFObjectHandle. 1933 // mapping from the foreign ObjGen to the local QPDFObjectHandle.
2005 // 1934 //
2006 - // To copy, we do a deep traversal of the foreign object with loop  
2007 - // detection to discover all indirect objects that are  
2008 - // encountered, stopping at page boundaries. Whenever we encounter  
2009 - // an indirect object, we check to see if we have already created  
2010 - // a local copy of it. If not, we allocate a "reserved" object  
2011 - // (or, for a stream, just a new stream) and store in the map the 1935 + // To copy, we do a deep traversal of the foreign object with loop detection to discover all
  1936 + // indirect objects that are encountered, stopping at page boundaries. Whenever we encounter an
  1937 + // indirect object, we check to see if we have already created a local copy of it. If not, we
  1938 + // allocate a "reserved" object (or, for a stream, just a new stream) and store in the map the
2012 // mapping from the foreign object ID to the new object. While we 1939 // mapping from the foreign object ID to the new object. While we
2013 // do this, we keep a list of objects to copy. 1940 // do this, we keep a list of objects to copy.
2014 // 1941 //
2015 - // Once we are done with the traversal, we copy all the objects  
2016 - // that we need to copy. However, the copies will contain indirect  
2017 - // object IDs that refer to objects in the foreign file. We need  
2018 - // to replace them with references to objects in the local file.  
2019 - // This is what replaceForeignIndirectObjects does. Once we have  
2020 - // created a copy of the foreign object with all the indirect  
2021 - // references replaced with new ones in the local context, we can  
2022 - // replace the local reserved object with the copy. This mechanism  
2023 - // allows us to copy objects with circular references in any  
2024 - // order.  
2025 -  
2026 - // For streams, rather than copying the objects, we set up the  
2027 - // stream data to pull from the original stream by using a stream  
2028 - // data provider. This is done in a manner that doesn't require  
2029 - // the original QPDF object but may require the original source of  
2030 - // the stream data with special handling for immediate_copy_from.  
2031 - // This logic is also in replaceForeignIndirectObjects.  
2032 -  
2033 - // Note that we explicitly allow use of copyForeignObject on page  
2034 - // objects. It is a documented use case to copy pages this way if  
2035 - // the intention is to not update the pages tree. 1942 + // Once we are done with the traversal, we copy all the objects that we need to copy. However,
  1943 + // the copies will contain indirect object IDs that refer to objects in the foreign file. We
  1944 + // need to replace them with references to objects in the local file. This is what
  1945 + // replaceForeignIndirectObjects does. Once we have created a copy of the foreign object with
  1946 + // all the indirect references replaced with new ones in the local context, we can replace the
  1947 + // local reserved object with the copy. This mechanism allows us to copy objects with circular
  1948 + // references in any order.
  1949 +
  1950 + // For streams, rather than copying the objects, we set up the stream data to pull from the
  1951 + // original stream by using a stream data provider. This is done in a manner that doesn't
  1952 + // require the original QPDF object but may require the original source of the stream data with
  1953 + // special handling for immediate_copy_from. This logic is also in
  1954 + // replaceForeignIndirectObjects.
  1955 +
  1956 + // Note that we explicitly allow use of copyForeignObject on page objects. It is a documented
  1957 + // use case to copy pages this way if the intention is to not update the pages tree.
2036 if (!foreign.isIndirect()) { 1958 if (!foreign.isIndirect()) {
2037 QTC::TC("qpdf", "QPDF copyForeign direct"); 1959 QTC::TC("qpdf", "QPDF copyForeign direct");
2038 throw std::logic_error("QPDF::copyForeign called with direct object handle"); 1960 throw std::logic_error("QPDF::copyForeign called with direct object handle");
@@ -2049,12 +1971,10 @@ QPDF::copyForeignObject(QPDFObjectHandle foreign) @@ -2049,12 +1971,10 @@ QPDF::copyForeignObject(QPDFObjectHandle foreign)
2049 " at the beginning of copyForeignObject"); 1971 " at the beginning of copyForeignObject");
2050 } 1972 }
2051 1973
2052 - // Make sure we have an object in this file for every referenced  
2053 - // object in the old file. obj_copier.object_map maps foreign  
2054 - // QPDFObjGen to local objects. For everything new that we have  
2055 - // to copy, the local object will be a reservation, unless it is a  
2056 - // stream, in which case the local object will already be a  
2057 - // stream. 1974 + // Make sure we have an object in this file for every referenced object in the old file.
  1975 + // obj_copier.object_map maps foreign QPDFObjGen to local objects. For everything new that we
  1976 + // have to copy, the local object will be a reservation, unless it is a stream, in which case
  1977 + // the local object will already be a stream.
2058 reserveObjects(foreign, obj_copier, true); 1978 reserveObjects(foreign, obj_copier, true);
2059 1979
2060 if (!obj_copier.visiting.empty()) { 1980 if (!obj_copier.visiting.empty()) {
@@ -2140,8 +2060,8 @@ QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier&amp; obj_cop @@ -2140,8 +2060,8 @@ QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier&amp; obj_cop
2140 QTC::TC("qpdf", "QPDF replace indirect"); 2060 QTC::TC("qpdf", "QPDF replace indirect");
2141 auto mapping = obj_copier.object_map.find(foreign.getObjGen()); 2061 auto mapping = obj_copier.object_map.find(foreign.getObjGen());
2142 if (mapping == obj_copier.object_map.end()) { 2062 if (mapping == obj_copier.object_map.end()) {
2143 - // This case would occur if this is a reference to a Page  
2144 - // or Pages object that we didn't traverse into. 2063 + // This case would occur if this is a reference to a Page or Pages object that we didn't
  2064 + // traverse into.
2145 QTC::TC("qpdf", "QPDF replace foreign indirect with null"); 2065 QTC::TC("qpdf", "QPDF replace foreign indirect with null");
2146 result = QPDFObjectHandle::newNull(); 2066 result = QPDFObjectHandle::newNull();
2147 } else { 2067 } else {
@@ -2192,9 +2112,8 @@ QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier&amp; obj_cop @@ -2192,9 +2112,8 @@ QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier&amp; obj_cop
2192 void 2112 void
2193 QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign) 2113 QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)
2194 { 2114 {
2195 - // This method was originally written for copying foreign streams,  
2196 - // but it is used by QPDFObjectHandle to copy streams from the  
2197 - // same QPDF object as well. 2115 + // This method was originally written for copying foreign streams, but it is used by
  2116 + // QPDFObjectHandle to copy streams from the same QPDF object as well.
2198 2117
2199 QPDFObjectHandle dict = result.getDict(); 2118 QPDFObjectHandle dict = result.getDict();
2200 QPDFObjectHandle old_dict = foreign.getDict(); 2119 QPDFObjectHandle old_dict = foreign.getDict();
@@ -2204,8 +2123,8 @@ QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign) @@ -2204,8 +2123,8 @@ QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)
2204 std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(m->copied_stream_data_provider); 2123 std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(m->copied_stream_data_provider);
2205 } 2124 }
2206 QPDFObjGen local_og(result.getObjGen()); 2125 QPDFObjGen local_og(result.getObjGen());
2207 - // Copy information from the foreign stream so we can pipe its  
2208 - // data later without keeping the original QPDF object around. 2126 + // Copy information from the foreign stream so we can pipe its data later without keeping the
  2127 + // original QPDF object around.
2209 2128
2210 QPDF& foreign_stream_qpdf = 2129 QPDF& foreign_stream_qpdf =
2211 foreign.getQPDF("unable to retrieve owning qpdf from foreign stream"); 2130 foreign.getQPDF("unable to retrieve owning qpdf from foreign stream");
@@ -2217,10 +2136,9 @@ QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign) @@ -2217,10 +2136,9 @@ QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)
2217 } 2136 }
2218 std::shared_ptr<Buffer> stream_buffer = stream->getStreamDataBuffer(); 2137 std::shared_ptr<Buffer> stream_buffer = stream->getStreamDataBuffer();
2219 if ((foreign_stream_qpdf.m->immediate_copy_from) && (stream_buffer == nullptr)) { 2138 if ((foreign_stream_qpdf.m->immediate_copy_from) && (stream_buffer == nullptr)) {
2220 - // Pull the stream data into a buffer before attempting  
2221 - // the copy operation. Do it on the source stream so that  
2222 - // if the source stream is copied multiple times, we don't  
2223 - // have to keep duplicating the memory. 2139 + // Pull the stream data into a buffer before attempting the copy operation. Do it on the
  2140 + // source stream so that if the source stream is copied multiple times, we don't have to
  2141 + // keep duplicating the memory.
2224 QTC::TC("qpdf", "QPDF immediate copy stream data"); 2142 QTC::TC("qpdf", "QPDF immediate copy stream data");
2225 foreign.replaceStreamData( 2143 foreign.replaceStreamData(
2226 foreign.getRawStreamData(), 2144 foreign.getRawStreamData(),
@@ -2263,8 +2181,7 @@ QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2) @@ -2263,8 +2181,7 @@ QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2)
2263 void 2181 void
2264 QPDF::swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2) 2182 QPDF::swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2)
2265 { 2183 {
2266 - // Force objects to be read from the input source if needed, then  
2267 - // swap them in the cache. 2184 + // Force objects to be read from the input source if needed, then swap them in the cache.
2268 resolve(og1); 2185 resolve(og1);
2269 resolve(og2); 2186 resolve(og2);
2270 m->obj_cache[og1].object->swapWith(m->obj_cache[og2].object); 2187 m->obj_cache[og1].object->swapWith(m->obj_cache[og2].object);
@@ -2338,9 +2255,8 @@ QPDF::getRoot() @@ -2338,9 +2255,8 @@ QPDF::getRoot()
2338 if (!root.isDictionary()) { 2255 if (!root.isDictionary()) {
2339 throw damagedPDF("", 0, "unable to find /Root dictionary"); 2256 throw damagedPDF("", 0, "unable to find /Root dictionary");
2340 } else if ( 2257 } else if (
2341 - // Check_mode is an interim solution to request #810 pending a more  
2342 - // comprehensive review of the approach to more extensive checks and  
2343 - // warning levels. 2258 + // Check_mode is an interim solution to request #810 pending a more comprehensive review of
  2259 + // the approach to more extensive checks and warning levels.
2344 m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) { 2260 m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) {
2345 warn(damagedPDF("", 0, "catalog /Type entry missing or invalid")); 2261 warn(damagedPDF("", 0, "catalog /Type entry missing or invalid"));
2346 root.replaceKey("/Type", "/Catalog"_qpdf); 2262 root.replaceKey("/Type", "/Catalog"_qpdf);
@@ -2373,14 +2289,11 @@ QPDF::getObjectStreamData(std::map&lt;int, int&gt;&amp; omap) @@ -2373,14 +2289,11 @@ QPDF::getObjectStreamData(std::map&lt;int, int&gt;&amp; omap)
2373 std::vector<QPDFObjGen> 2289 std::vector<QPDFObjGen>
2374 QPDF::getCompressibleObjGens() 2290 QPDF::getCompressibleObjGens()
2375 { 2291 {
2376 - // Return a list of objects that are allowed to be in object  
2377 - // streams. Walk through the objects by traversing the document  
2378 - // from the root, including a traversal of the pages tree. This  
2379 - // makes that objects that are on the same page are more likely to  
2380 - // be in the same object stream, which is slightly more efficient,  
2381 - // particularly with linearized files. This is better than  
2382 - // iterating through the xref table since it avoids preserving  
2383 - // orphaned items. 2292 + // Return a list of objects that are allowed to be in object streams. Walk through the objects
  2293 + // by traversing the document from the root, including a traversal of the pages tree. This
  2294 + // makes that objects that are on the same page are more likely to be in the same object stream,
  2295 + // which is slightly more efficient, particularly with linearized files. This is better than
  2296 + // iterating through the xref table since it avoids preserving orphaned items.
2384 2297
2385 // Exclude encryption dictionary, if any 2298 // Exclude encryption dictionary, if any
2386 QPDFObjectHandle encryption_dict = m->trailer.getKey("/Encrypt"); 2299 QPDFObjectHandle encryption_dict = m->trailer.getKey("/Encrypt");
@@ -2555,9 +2468,8 @@ QPDF::pipeForeignStreamData( @@ -2555,9 +2468,8 @@ QPDF::pipeForeignStreamData(
2555 will_retry); 2468 will_retry);
2556 } 2469 }
2557 2470
2558 -// Throw a generic exception when we lack context for something  
2559 -// more specific. New code should not use this. This method exists  
2560 -// to improve somewhat from calling assert in very old code. 2471 +// Throw a generic exception when we lack context for something more specific. New code should not
  2472 +// use this. This method exists to improve somewhat from calling assert in very old code.
2561 void 2473 void
2562 QPDF::stopOnError(std::string const& message) 2474 QPDF::stopOnError(std::string const& message)
2563 { 2475 {
@@ -2584,33 +2496,31 @@ QPDF::damagedPDF( @@ -2584,33 +2496,31 @@ QPDF::damagedPDF(
2584 return damagedPDF(input, m->last_object_description, offset, message); 2496 return damagedPDF(input, m->last_object_description, offset, message);
2585 } 2497 }
2586 2498
2587 -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from  
2588 -// m->file. 2499 +// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file.
2589 QPDFExc 2500 QPDFExc
2590 QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message) 2501 QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message)
2591 { 2502 {
2592 return QPDFExc(qpdf_e_damaged_pdf, m->file->getName(), object, offset, message); 2503 return QPDFExc(qpdf_e_damaged_pdf, m->file->getName(), object, offset, message);
2593 } 2504 }
2594 2505
2595 -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from  
2596 -// m->file and the offset from .m->file->getLastOffset(). 2506 +// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the
  2507 +// offset from .m->file->getLastOffset().
2597 QPDFExc 2508 QPDFExc
2598 QPDF::damagedPDF(std::string const& object, std::string const& message) 2509 QPDF::damagedPDF(std::string const& object, std::string const& message)
2599 { 2510 {
2600 return damagedPDF(object, m->file->getLastOffset(), message); 2511 return damagedPDF(object, m->file->getLastOffset(), message);
2601 } 2512 }
2602 2513
2603 -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from  
2604 -// m->file and the object from .m->last_object_description. 2514 +// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object
  2515 +// from .m->last_object_description.
2605 QPDFExc 2516 QPDFExc
2606 QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message) 2517 QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message)
2607 { 2518 {
2608 return damagedPDF(m->last_object_description, offset, message); 2519 return damagedPDF(m->last_object_description, offset, message);
2609 } 2520 }
2610 2521
2611 -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from  
2612 -// m->file, the object from m->last_object_description and the offset from  
2613 -// m->file->getLastOffset(). 2522 +// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file, the object
  2523 +// from m->last_object_description and the offset from m->file->getLastOffset().
2614 QPDFExc 2524 QPDFExc
2615 QPDF::damagedPDF(std::string const& message) 2525 QPDF::damagedPDF(std::string const& message)
2616 { 2526 {
libqpdf/QPDFAcroFormDocumentHelper.cc
@@ -15,9 +15,8 @@ QPDFAcroFormDocumentHelper::QPDFAcroFormDocumentHelper(QPDF&amp; qpdf) : @@ -15,9 +15,8 @@ QPDFAcroFormDocumentHelper::QPDFAcroFormDocumentHelper(QPDF&amp; qpdf) :
15 QPDFDocumentHelper(qpdf), 15 QPDFDocumentHelper(qpdf),
16 m(new Members()) 16 m(new Members())
17 { 17 {
18 - // We have to analyze up front. Otherwise, when we are adding  
19 - // annotations and fields, we are in a temporarily unstable  
20 - // configuration where some widget annotations are not reachable. 18 + // We have to analyze up front. Otherwise, when we are adding annotations and fields, we are in
  19 + // a temporarily unstable configuration where some widget annotations are not reachable.
21 analyze(); 20 analyze();
22 } 21 }
23 22
@@ -77,14 +76,11 @@ QPDFAcroFormDocumentHelper::addAndRenameFormFields(std::vector&lt;QPDFObjectHandle&gt; @@ -77,14 +76,11 @@ QPDFAcroFormDocumentHelper::addAndRenameFormFields(std::vector&lt;QPDFObjectHandle&gt;
77 } 76 }
78 77
79 if (obj.hasKey("/T")) { 78 if (obj.hasKey("/T")) {
80 - // Find something we can append to the partial name that  
81 - // makes the fully qualified name unique. When we find  
82 - // something, reuse the same suffix for all fields in this  
83 - // group with the same name. We can only change the name  
84 - // of fields that have /T, and this field's /T is always  
85 - // at the end of the fully qualified name, appending to /T  
86 - // has the effect of appending the same thing to the fully  
87 - // qualified name. 79 + // Find something we can append to the partial name that makes the fully qualified
  80 + // name unique. When we find something, reuse the same suffix for all fields in this
  81 + // group with the same name. We can only change the name of fields that have /T, and
  82 + // this field's /T is always at the end of the fully qualified name, appending to /T
  83 + // has the effect of appending the same thing to the fully qualified name.
88 std::string old_name = QPDFFormFieldObjectHelper(obj).getFullyQualifiedName(); 84 std::string old_name = QPDFFormFieldObjectHelper(obj).getFullyQualifiedName();
89 if (renames.count(old_name) == 0) { 85 if (renames.count(old_name) == 0) {
90 std::string new_name = old_name; 86 std::string new_name = old_name;
@@ -253,8 +249,7 @@ QPDFAcroFormDocumentHelper::analyze() @@ -253,8 +249,7 @@ QPDFAcroFormDocumentHelper::analyze()
253 fields = QPDFObjectHandle::newArray(); 249 fields = QPDFObjectHandle::newArray();
254 } 250 }
255 251
256 - // Traverse /AcroForm to find annotations and map them  
257 - // bidirectionally to fields. 252 + // Traverse /AcroForm to find annotations and map them bidirectionally to fields.
258 253
259 QPDFObjGen::set visited; 254 QPDFObjGen::set visited;
260 int nfields = fields.getArrayNItems(); 255 int nfields = fields.getArrayNItems();
@@ -263,12 +258,10 @@ QPDFAcroFormDocumentHelper::analyze() @@ -263,12 +258,10 @@ QPDFAcroFormDocumentHelper::analyze()
263 traverseField(fields.getArrayItem(i), null, 0, visited); 258 traverseField(fields.getArrayItem(i), null, 0, visited);
264 } 259 }
265 260
266 - // All Widget annotations should have been encountered by  
267 - // traversing /AcroForm, but in case any weren't, find them by  
268 - // walking through pages, and treat any widget annotation that is  
269 - // not associated with a field as its own field. This just ensures  
270 - // that requesting the field for any annotation we find through a  
271 - // page's /Annots list will have some associated field. Note that 261 + // All Widget annotations should have been encountered by traversing /AcroForm, but in case any
  262 + // weren't, find them by walking through pages, and treat any widget annotation that is not
  263 + // associated with a field as its own field. This just ensures that requesting the field for any
  264 + // annotation we find through a page's /Annots list will have some associated field. Note that
272 // a file that contains this kind of error will probably not 265 // a file that contains this kind of error will probably not
273 // actually work with most viewers. 266 // actually work with most viewers.
274 267
@@ -278,13 +271,11 @@ QPDFAcroFormDocumentHelper::analyze() @@ -278,13 +271,11 @@ QPDFAcroFormDocumentHelper::analyze()
278 QPDFObjGen og(annot.getObjGen()); 271 QPDFObjGen og(annot.getObjGen());
279 if (m->annotation_to_field.count(og) == 0) { 272 if (m->annotation_to_field.count(og) == 0) {
280 QTC::TC("qpdf", "QPDFAcroFormDocumentHelper orphaned widget"); 273 QTC::TC("qpdf", "QPDFAcroFormDocumentHelper orphaned widget");
281 - // This is not supposed to happen, but it's easy  
282 - // enough for us to handle this case. Treat the  
283 - // annotation as its own field. This could allow qpdf  
284 - // to sensibly handle a case such as a PDF creator  
285 - // adding a self-contained annotation (merged with the  
286 - // field dictionary) to the page's /Annots array and  
287 - // forgetting to also put it in /AcroForm. 274 + // This is not supposed to happen, but it's easy enough for us to handle this case.
  275 + // Treat the annotation as its own field. This could allow qpdf to sensibly handle a
  276 + // case such as a PDF creator adding a self-contained annotation (merged with the
  277 + // field dictionary) to the page's /Annots array and forgetting to also put it in
  278 + // /AcroForm.
288 annot.warnIfPossible("this widget annotation is not" 279 annot.warnIfPossible("this widget annotation is not"
289 " reachable from /AcroForm in the document catalog"); 280 " reachable from /AcroForm in the document catalog");
290 m->annotation_to_field[og] = QPDFFormFieldObjectHelper(annot); 281 m->annotation_to_field[og] = QPDFFormFieldObjectHelper(annot);
@@ -299,14 +290,14 @@ QPDFAcroFormDocumentHelper::traverseField( @@ -299,14 +290,14 @@ QPDFAcroFormDocumentHelper::traverseField(
299 QPDFObjectHandle field, QPDFObjectHandle parent, int depth, QPDFObjGen::set& visited) 290 QPDFObjectHandle field, QPDFObjectHandle parent, int depth, QPDFObjGen::set& visited)
300 { 291 {
301 if (depth > 100) { 292 if (depth > 100) {
302 - // Arbitrarily cut off recursion at a fixed depth to avoid  
303 - // specially crafted files that could cause stack overflow. 293 + // Arbitrarily cut off recursion at a fixed depth to avoid specially crafted files that
  294 + // could cause stack overflow.
304 return; 295 return;
305 } 296 }
306 if (!field.isIndirect()) { 297 if (!field.isIndirect()) {
307 QTC::TC("qpdf", "QPDFAcroFormDocumentHelper direct field"); 298 QTC::TC("qpdf", "QPDFAcroFormDocumentHelper direct field");
308 - field.warnIfPossible("encountered a direct object as a field or annotation while"  
309 - " traversing /AcroForm; ignoring field or annotation"); 299 + field.warnIfPossible("encountered a direct object as a field or annotation while "
  300 + "traversing /AcroForm; ignoring field or annotation");
310 return; 301 return;
311 } 302 }
312 if (!field.isDictionary()) { 303 if (!field.isDictionary()) {
@@ -322,13 +313,11 @@ QPDFAcroFormDocumentHelper::traverseField( @@ -322,13 +313,11 @@ QPDFAcroFormDocumentHelper::traverseField(
322 return; 313 return;
323 } 314 }
324 315
325 - // A dictionary encountered while traversing the /AcroForm field  
326 - // may be a form field, an annotation, or the merger of the two. A  
327 - // field that has no fields below it is a terminal. If a terminal  
328 - // field looks like an annotation, it is an annotation because  
329 - // annotation dictionary fields can be merged with terminal field  
330 - // dictionaries. Otherwise, the annotation fields might be there  
331 - // to be inherited by annotations below it. 316 + // A dictionary encountered while traversing the /AcroForm field may be a form field, an
  317 + // annotation, or the merger of the two. A field that has no fields below it is a terminal. If a
  318 + // terminal field looks like an annotation, it is an annotation because annotation dictionary
  319 + // fields can be merged with terminal field dictionaries. Otherwise, the annotation fields might
  320 + // be there to be inherited by annotations below it.
332 321
333 bool is_annotation = false; 322 bool is_annotation = false;
334 bool is_field = (0 == depth); 323 bool is_field = (0 == depth);
@@ -363,8 +352,7 @@ QPDFAcroFormDocumentHelper::traverseField( @@ -363,8 +352,7 @@ QPDFAcroFormDocumentHelper::traverseField(
363 std::string name = foh.getFullyQualifiedName(); 352 std::string name = foh.getFullyQualifiedName();
364 auto old = m->field_to_name.find(f_og); 353 auto old = m->field_to_name.find(f_og);
365 if (old != m->field_to_name.end()) { 354 if (old != m->field_to_name.end()) {
366 - // We might be updating after a name change, so remove any  
367 - // old information 355 + // We might be updating after a name change, so remove any old information
368 std::string old_name = old->second; 356 std::string old_name = old->second;
369 m->name_to_fields[old_name].erase(f_og); 357 m->name_to_fields[old_name].erase(f_og);
370 } 358 }
@@ -412,11 +400,9 @@ QPDFAcroFormDocumentHelper::generateAppearancesIfNeeded() @@ -412,11 +400,9 @@ QPDFAcroFormDocumentHelper::generateAppearancesIfNeeded()
412 for (auto& aoh: getWidgetAnnotationsForPage(page)) { 400 for (auto& aoh: getWidgetAnnotationsForPage(page)) {
413 QPDFFormFieldObjectHelper ffh = getFieldForAnnotation(aoh); 401 QPDFFormFieldObjectHelper ffh = getFieldForAnnotation(aoh);
414 if (ffh.getFieldType() == "/Btn") { 402 if (ffh.getFieldType() == "/Btn") {
415 - // Rather than generating appearances for button  
416 - // fields, rely on what's already there. Just make  
417 - // sure /AS is consistent with /V, which we can do by  
418 - // resetting the value of the field back to itself.  
419 - // This code is referenced in a comment in 403 + // Rather than generating appearances for button fields, rely on what's already
  404 + // there. Just make sure /AS is consistent with /V, which we can do by resetting the
  405 + // value of the field back to itself. This code is referenced in a comment in
420 // QPDFFormFieldObjectHelper::generateAppearance. 406 // QPDFFormFieldObjectHelper::generateAppearance.
421 if (ffh.isRadioButton() || ffh.isCheckbox()) { 407 if (ffh.isRadioButton() || ffh.isCheckbox()) {
422 ffh.setV(ffh.getValue()); 408 ffh.setV(ffh.getValue());
@@ -437,16 +423,13 @@ QPDFAcroFormDocumentHelper::adjustInheritedFields( @@ -437,16 +423,13 @@ QPDFAcroFormDocumentHelper::adjustInheritedFields(
437 bool override_q, 423 bool override_q,
438 int from_default_q) 424 int from_default_q)
439 { 425 {
440 - // Override /Q or /DA if needed. If this object has a field type,  
441 - // directly or inherited, it is a field and not just an  
442 - // annotation. In that case, we need to override if we are getting  
443 - // a value from the document that is different from the value we  
444 - // would have gotten from the old document. We must take care not  
445 - // to override an explicit value. It's possible that /FT may be  
446 - // inherited by lower fields that may explicitly set /DA or /Q or  
447 - // that this is a field whose type does not require /DA or /Q and  
448 - // we may be put a value on the field that is unused. This is  
449 - // harmless, so it's not worth trying to work around. 426 + // Override /Q or /DA if needed. If this object has a field type, directly or inherited, it is a
  427 + // field and not just an annotation. In that case, we need to override if we are getting a value
  428 + // from the document that is different from the value we would have gotten from the old
  429 + // document. We must take care not to override an explicit value. It's possible that /FT may be
  430 + // inherited by lower fields that may explicitly set /DA or /Q or that this is a field whose
  431 + // type does not require /DA or /Q and we may be put a value on the field that is unused. This
  432 + // is harmless, so it's not worth trying to work around.
450 433
451 auto has_explicit = [](QPDFFormFieldObjectHelper& field, std::string const& key) { 434 auto has_explicit = [](QPDFFormFieldObjectHelper& field, std::string const& key) {
452 if (field.getObjectHandle().hasKey(key)) { 435 if (field.getObjectHandle().hasKey(key)) {
@@ -550,45 +533,36 @@ void @@ -550,45 +533,36 @@ void
550 QPDFAcroFormDocumentHelper::adjustDefaultAppearances( 533 QPDFAcroFormDocumentHelper::adjustDefaultAppearances(
551 QPDFObjectHandle obj, std::map<std::string, std::map<std::string, std::string>> const& dr_map) 534 QPDFObjectHandle obj, std::map<std::string, std::map<std::string, std::string>> const& dr_map)
552 { 535 {
553 - // This method is called on a field that has been copied from  
554 - // another file but whose /DA still refers to resources in the  
555 - // original file's /DR.  
556 -  
557 - // When appearance streams are generated for variable text fields  
558 - // (see ISO 32000 PDF spec section 12.7.3.3), the field's /DA is  
559 - // used to generate content of the appearance stream. /DA contains  
560 - // references to resources that may be resolved in the document's  
561 - // /DR dictionary, which appears in the document's /AcroForm  
562 - // dictionary. For fields that we copied from other documents, we  
563 - // need to ensure that resources are mapped correctly in the case  
564 - // of conflicting names. For example, if a.pdf's /DR has /F1  
565 - // pointing to one font and b.pdf's /DR also has /F1 but it points  
566 - // elsewhere, we need to make sure appearance streams of fields  
567 - // copied from b.pdf into a.pdf use whatever font /F1 meant in  
568 - // b.pdf, not whatever it means in a.pdf. This method takes care  
569 - // of that. It is only called on fields copied from foreign files. 536 + // This method is called on a field that has been copied from another file but whose /DA still
  537 + // refers to resources in the original file's /DR.
  538 +
  539 + // When appearance streams are generated for variable text fields (see ISO 32000 PDF spec
  540 + // section 12.7.3.3), the field's /DA is used to generate content of the appearance stream. /DA
  541 + // contains references to resources that may be resolved in the document's /DR dictionary, which
  542 + // appears in the document's /AcroForm dictionary. For fields that we copied from other
  543 + // documents, we need to ensure that resources are mapped correctly in the case of conflicting
  544 + // names. For example, if a.pdf's /DR has /F1 pointing to one font and b.pdf's /DR also has /F1
  545 + // but it points elsewhere, we need to make sure appearance streams of fields copied from b.pdf
  546 + // into a.pdf use whatever font /F1 meant in b.pdf, not whatever it means in a.pdf. This method
  547 + // takes care of that. It is only called on fields copied from foreign files.
570 548
571 // A few notes: 549 // A few notes:
572 // 550 //
573 - // * If the from document's /DR and the current document's /DR  
574 - // have conflicting keys, we have already resolved the conflicts  
575 - // before calling this method. The dr_map parameter contains the  
576 - // mapping from old keys to new keys. 551 + // * If the from document's /DR and the current document's /DR have conflicting keys, we have
  552 + // already resolved the conflicts before calling this method. The dr_map parameter contains
  553 + // the mapping from old keys to new keys.
577 // 554 //
578 - // * /DA may be inherited from the document's /AcroForm  
579 - // dictionary. By the time this method has been called, we have  
580 - // already copied any document-level values into the fields to  
581 - // avoid having them inherit from the new document. This was  
582 - // done in adjustInheritedFields. 555 + // * /DA may be inherited from the document's /AcroForm dictionary. By the time this method has
  556 + // been called, we have already copied any document-level values into the fields to avoid
  557 + // having them inherit from the new document. This was done in adjustInheritedFields.
583 558
584 auto DA = obj.getKey("/DA"); 559 auto DA = obj.getKey("/DA");
585 if (!DA.isString()) { 560 if (!DA.isString()) {
586 return; 561 return;
587 } 562 }
588 563
589 - // Find names in /DA. /DA is a string that contains content  
590 - // stream-like code, so we create a stream out of the string and  
591 - // then filter it. We don't attach the stream to anything, so it 564 + // Find names in /DA. /DA is a string that contains content stream-like code, so we create a
  565 + // stream out of the string and then filter it. We don't attach the stream to anything, so it
592 // will get discarded. 566 // will get discarded.
593 ResourceFinder rf; 567 ResourceFinder rf;
594 auto da_stream = QPDFObjectHandle::newStream(&this->qpdf, DA.getUTF8Value()); 568 auto da_stream = QPDFObjectHandle::newStream(&this->qpdf, DA.getUTF8Value());
@@ -599,8 +573,8 @@ QPDFAcroFormDocumentHelper::adjustDefaultAppearances( @@ -599,8 +573,8 @@ QPDFAcroFormDocumentHelper::adjustDefaultAppearances(
599 QTC::TC("qpdf", "QPDFAcroFormDocumentHelper /DA parse error"); 573 QTC::TC("qpdf", "QPDFAcroFormDocumentHelper /DA parse error");
600 } 574 }
601 } catch (std::exception& e) { 575 } catch (std::exception& e) {
602 - // No way to reproduce in test suite right now since error  
603 - // conditions are converted to warnings. 576 + // No way to reproduce in test suite right now since error conditions are converted to
  577 + // warnings.
604 obj.warnIfPossible( 578 obj.warnIfPossible(
605 std::string("Unable to parse /DA: ") + e.what() + 579 std::string("Unable to parse /DA: ") + e.what() +
606 "; this form field may not update properly"); 580 "; this form field may not update properly");
@@ -620,15 +594,12 @@ void @@ -620,15 +594,12 @@ void
620 QPDFAcroFormDocumentHelper::adjustAppearanceStream( 594 QPDFAcroFormDocumentHelper::adjustAppearanceStream(
621 QPDFObjectHandle stream, std::map<std::string, std::map<std::string, std::string>> dr_map) 595 QPDFObjectHandle stream, std::map<std::string, std::map<std::string, std::string>> dr_map)
622 { 596 {
623 - // We don't have to modify appearance streams or their resource  
624 - // dictionaries for them to display properly, but we need to do so  
625 - // to make them save to regenerate. Suppose an appearance stream  
626 - // as a font /F1 that is different from /F1 in /DR, and that when  
627 - // we copy the field, /F1 is remapped to /F1_1. When the field is  
628 - // regenerated, /F1_1 won't appear in the stream's resource  
629 - // dictionary, so the regenerated appearance stream will revert to  
630 - // the /F1_1 in /DR. If we adjust existing appearance streams, we  
631 - // are protected from this problem. 597 + // We don't have to modify appearance streams or their resource dictionaries for them to display
  598 + // properly, but we need to do so to make them save to regenerate. Suppose an appearance stream
  599 + // as a font /F1 that is different from /F1 in /DR, and that when we copy the field, /F1 is
  600 + // remapped to /F1_1. When the field is regenerated, /F1_1 won't appear in the stream's resource
  601 + // dictionary, so the regenerated appearance stream will revert to the /F1_1 in /DR. If we
  602 + // adjust existing appearance streams, we are protected from this problem.
632 603
633 auto dict = stream.getDict(); 604 auto dict = stream.getDict();
634 auto resources = dict.getKey("/Resources"); 605 auto resources = dict.getKey("/Resources");
@@ -640,17 +611,15 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream( @@ -640,17 +611,15 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream(
640 resources = this->qpdf.makeIndirectObject(resources); 611 resources = this->qpdf.makeIndirectObject(resources);
641 } 612 }
642 dict.replaceKey("/Resources", resources); 613 dict.replaceKey("/Resources", resources);
643 - // Create a dictionary with top-level keys so we can use  
644 - // mergeResources to force them to be unshared. We will also use  
645 - // this to resolve conflicts that may already be in the resource 614 + // Create a dictionary with top-level keys so we can use mergeResources to force them to be
  615 + // unshared. We will also use this to resolve conflicts that may already be in the resource
646 // dictionary. 616 // dictionary.
647 auto merge_with = QPDFObjectHandle::newDictionary(); 617 auto merge_with = QPDFObjectHandle::newDictionary();
648 for (auto const& top_key: dr_map) { 618 for (auto const& top_key: dr_map) {
649 merge_with.replaceKey(top_key.first, QPDFObjectHandle::newDictionary()); 619 merge_with.replaceKey(top_key.first, QPDFObjectHandle::newDictionary());
650 } 620 }
651 resources.mergeResources(merge_with); 621 resources.mergeResources(merge_with);
652 - // Rename any keys in the resource dictionary that we  
653 - // remapped. 622 + // Rename any keys in the resource dictionary that we remapped.
654 for (auto const& i1: dr_map) { 623 for (auto const& i1: dr_map) {
655 std::string const& top_key = i1.first; 624 std::string const& top_key = i1.first;
656 auto subdict = resources.getKey(top_key); 625 auto subdict = resources.getKey(top_key);
@@ -662,12 +631,10 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream( @@ -662,12 +631,10 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream(
662 std::string const& new_key = i2.second; 631 std::string const& new_key = i2.second;
663 auto existing_new = subdict.getKey(new_key); 632 auto existing_new = subdict.getKey(new_key);
664 if (!existing_new.isNull()) { 633 if (!existing_new.isNull()) {
665 - // The resource dictionary already has a key in it  
666 - // matching what we remapped an old key to, so we'll  
667 - // have to move it out of the way. Stick it in  
668 - // merge_with, which we will re-merge with the  
669 - // dictionary when we're done. We know merge_with  
670 - // already has dictionaries for all the top keys. 634 + // The resource dictionary already has a key in it matching what we remapped an old
  635 + // key to, so we'll have to move it out of the way. Stick it in merge_with, which we
  636 + // will re-merge with the dictionary when we're done. We know merge_with already has
  637 + // dictionaries for all the top keys.
671 QTC::TC("qpdf", "QPDFAcroFormDocumentHelper ap conflict"); 638 QTC::TC("qpdf", "QPDFAcroFormDocumentHelper ap conflict");
672 merge_with.getKey(top_key).replaceKey(new_key, existing_new); 639 merge_with.getKey(top_key).replaceKey(new_key, existing_new);
673 } 640 }
@@ -679,9 +646,8 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream( @@ -679,9 +646,8 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream(
679 } 646 }
680 } 647 }
681 } 648 }
682 - // Deal with any any conflicts by re-merging with merge_with and  
683 - // updating our local copy of dr_map, which we will use to modify  
684 - // the stream contents. 649 + // Deal with any any conflicts by re-merging with merge_with and updating our local copy of
  650 + // dr_map, which we will use to modify the stream contents.
685 resources.mergeResources(merge_with, &dr_map); 651 resources.mergeResources(merge_with, &dr_map);
686 // Remove empty subdictionaries 652 // Remove empty subdictionaries
687 for (auto iter: resources.ditems()) { 653 for (auto iter: resources.ditems()) {
@@ -702,8 +668,8 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream( @@ -702,8 +668,8 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream(
702 auto tf = std::shared_ptr<QPDFObjectHandle::TokenFilter>(rr); 668 auto tf = std::shared_ptr<QPDFObjectHandle::TokenFilter>(rr);
703 stream.addTokenFilter(tf); 669 stream.addTokenFilter(tf);
704 } catch (std::exception& e) { 670 } catch (std::exception& e) {
705 - // No way to reproduce in test suite right now since error  
706 - // conditions are converted to warnings. 671 + // No way to reproduce in test suite right now since error conditions are converted to
  672 + // warnings.
707 stream.warnIfPossible(std::string("Unable to parse appearance stream: ") + e.what()); 673 stream.warnIfPossible(std::string("Unable to parse appearance stream: ") + e.what());
708 } 674 }
709 } 675 }
@@ -729,24 +695,22 @@ QPDFAcroFormDocumentHelper::transformAnnotations( @@ -729,24 +695,22 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
729 } 695 }
730 bool foreign = (from_qpdf != &this->qpdf); 696 bool foreign = (from_qpdf != &this->qpdf);
731 697
732 - // It's possible that we will transform annotations that don't  
733 - // include any form fields. This code takes care not to muck  
734 - // around with /AcroForm unless we have to. 698 + // It's possible that we will transform annotations that don't include any form fields. This
  699 + // code takes care not to muck around with /AcroForm unless we have to.
735 700
736 QPDFObjectHandle acroform = this->qpdf.getRoot().getKey("/AcroForm"); 701 QPDFObjectHandle acroform = this->qpdf.getRoot().getKey("/AcroForm");
737 QPDFObjectHandle from_acroform = from_qpdf->getRoot().getKey("/AcroForm"); 702 QPDFObjectHandle from_acroform = from_qpdf->getRoot().getKey("/AcroForm");
738 703
739 - // /DA and /Q may be inherited from the document-level /AcroForm  
740 - // dictionary. If we are copying a foreign stream and the stream  
741 - // is getting one of these values from its document's /AcroForm,  
742 - // we will need to copy the value explicitly so that it doesn't  
743 - // start getting its default from the destination document. 704 + // /DA and /Q may be inherited from the document-level /AcroForm dictionary. If we are copying a
  705 + // foreign stream and the stream is getting one of these values from its document's /AcroForm,
  706 + // we will need to copy the value explicitly so that it doesn't start getting its default from
  707 + // the destination document.
744 bool override_da = false; 708 bool override_da = false;
745 bool override_q = false; 709 bool override_q = false;
746 std::string from_default_da; 710 std::string from_default_da;
747 int from_default_q = 0; 711 int from_default_q = 0;
748 - // If we copy any form fields, we will need to merge the source  
749 - // document's /DR into this document's /DR. 712 + // If we copy any form fields, we will need to merge the source document's /DR into this
  713 + // document's /DR.
750 QPDFObjectHandle from_dr = QPDFObjectHandle::newNull(); 714 QPDFObjectHandle from_dr = QPDFObjectHandle::newNull();
751 if (foreign) { 715 if (foreign) {
752 std::string default_da; 716 std::string default_da;
@@ -782,9 +746,8 @@ QPDFAcroFormDocumentHelper::transformAnnotations( @@ -782,9 +746,8 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
782 } 746 }
783 } 747 }
784 748
785 - // If we have to merge /DR, we will need a mapping of conflicting  
786 - // keys for rewriting /DA. Set this up for lazy initialization in  
787 - // case we encounter any form fields. 749 + // If we have to merge /DR, we will need a mapping of conflicting keys for rewriting /DA. Set
  750 + // this up for lazy initialization in case we encounter any form fields.
788 std::map<std::string, std::map<std::string, std::string>> dr_map; 751 std::map<std::string, std::map<std::string, std::string>> dr_map;
789 bool initialized_dr_map = false; 752 bool initialized_dr_map = false;
790 QPDFObjectHandle dr = QPDFObjectHandle::newNull(); 753 QPDFObjectHandle dr = QPDFObjectHandle::newNull();
@@ -804,11 +767,9 @@ QPDFAcroFormDocumentHelper::transformAnnotations( @@ -804,11 +767,9 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
804 if (!dr.isIndirect()) { 767 if (!dr.isIndirect()) {
805 dr = acroform.replaceKeyAndGetNew("/DR", this->qpdf.makeIndirectObject(dr)); 768 dr = acroform.replaceKeyAndGetNew("/DR", this->qpdf.makeIndirectObject(dr));
806 } 769 }
807 - // Merge the other document's /DR, creating a conflict  
808 - // map. mergeResources checks to make sure both objects  
809 - // are dictionaries. By this point, if this is foreign,  
810 - // from_dr has been copied, so we use the target qpdf as  
811 - // the owning qpdf. 770 + // Merge the other document's /DR, creating a conflict map. mergeResources checks to
  771 + // make sure both objects are dictionaries. By this point, if this is foreign, from_dr
  772 + // has been copied, so we use the target qpdf as the owning qpdf.
812 from_dr.makeResourcesIndirect(this->qpdf); 773 from_dr.makeResourcesIndirect(this->qpdf);
813 dr.mergeResources(from_dr, &dr_map); 774 dr.mergeResources(from_dr, &dr_map);
814 775
@@ -818,8 +779,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations( @@ -818,8 +779,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
818 } 779 }
819 }; 780 };
820 781
821 - // This helper prevents us from copying the same object  
822 - // multiple times. 782 + // This helper prevents us from copying the same object multiple times.
823 std::map<QPDFObjGen, QPDFObjectHandle> orig_to_copy; 783 std::map<QPDFObjGen, QPDFObjectHandle> orig_to_copy;
824 auto maybe_copy_object = [&](QPDFObjectHandle& to_copy) { 784 auto maybe_copy_object = [&](QPDFObjectHandle& to_copy) {
825 auto og = to_copy.getObjGen(); 785 auto og = to_copy.getObjGen();
@@ -842,39 +802,28 @@ QPDFAcroFormDocumentHelper::transformAnnotations( @@ -842,39 +802,28 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
842 continue; 802 continue;
843 } 803 }
844 804
845 - // Make copies of annotations and fields down to the  
846 - // appearance streams, preserving all internal referential  
847 - // integrity. When the incoming annotations are from a  
848 - // different file, we first copy them locally. Then, whether  
849 - // local or foreign, we copy them again so that if we bring  
850 - // the same annotation in multiple times (e.g. overlaying a  
851 - // foreign page onto multiple local pages or a local page onto  
852 - // multiple other local pages), we don't create annotations  
853 - // that are referenced in more than one place. If we did that,  
854 - // the effect of applying transformations would be cumulative,  
855 - // which is definitely not what we want. Besides, annotations  
856 - // and fields are not intended to be referenced in multiple  
857 - // places.  
858 -  
859 - // Determine if this annotation is attached to a form field.  
860 - // If so, the annotation may be the same object as the form  
861 - // field, or the form field may have the annotation as a kid.  
862 - // In either case, we have to walk up the field structure to  
863 - // find the top-level field. Within one iteration through a  
864 - // set of annotations, we don't want to copy the same item  
865 - // more than once. For example, suppose we have field A with  
866 - // kids B, C, and D, each of which has annotations BA, CA, and  
867 - // DA. When we get to BA, we will find that BA is a kid of B  
868 - // which is under A. When we do a copyForeignObject of A, it  
869 - // will also copy everything else because of the indirect  
870 - // references. When we clone BA, we will want to clone A and  
871 - // then update A's clone's kid to point B's clone and B's  
872 - // clone's parent to point to A's clone. The same thing holds  
873 - // for annotations. Next, when we get to CA, we will again  
874 - // discover that A is the top, but we don't want to re-copy A.  
875 - // We want CA's clone to be linked to the same clone as BA's.  
876 - // Failure to do this will break up things like radio button  
877 - // groups, which all have to kids of the same parent. 805 + // Make copies of annotations and fields down to the appearance streams, preserving all
  806 + // internal referential integrity. When the incoming annotations are from a different file,
  807 + // we first copy them locally. Then, whether local or foreign, we copy them again so that if
  808 + // we bring the same annotation in multiple times (e.g. overlaying a foreign page onto
  809 + // multiple local pages or a local page onto multiple other local pages), we don't create
  810 + // annotations that are referenced in more than one place. If we did that, the effect of
  811 + // applying transformations would be cumulative, which is definitely not what we want.
  812 + // Besides, annotations and fields are not intended to be referenced in multiple places.
  813 +
  814 + // Determine if this annotation is attached to a form field. If so, the annotation may be
  815 + // the same object as the form field, or the form field may have the annotation as a kid. In
  816 + // either case, we have to walk up the field structure to find the top-level field. Within
  817 + // one iteration through a set of annotations, we don't want to copy the same item more than
  818 + // once. For example, suppose we have field A with kids B, C, and D, each of which has
  819 + // annotations BA, CA, and DA. When we get to BA, we will find that BA is a kid of B which
  820 + // is under A. When we do a copyForeignObject of A, it will also copy everything else
  821 + // because of the indirect references. When we clone BA, we will want to clone A and then
  822 + // update A's clone's kid to point B's clone and B's clone's parent to point to A's clone.
  823 + // The same thing holds for annotations. Next, when we get to CA, we will again discover
  824 + // that A is the top, but we don't want to re-copy A. We want CA's clone to be linked to the
  825 + // same clone as BA's. Failure to do this will break up things like radio button groups,
  826 + // which all have to kids of the same parent.
878 827
879 auto ffield = from_afdh->getFieldForAnnotation(annot); 828 auto ffield = from_afdh->getFieldForAnnotation(annot);
880 auto ffield_oh = ffield.getObjectHandle(); 829 auto ffield_oh = ffield.getObjectHandle();
@@ -886,36 +835,29 @@ QPDFAcroFormDocumentHelper::transformAnnotations( @@ -886,36 +835,29 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
886 } else if ((!ffield_oh.isNull()) && (!ffield_oh.isIndirect())) { 835 } else if ((!ffield_oh.isNull()) && (!ffield_oh.isIndirect())) {
887 ffield_oh.warnIfPossible("ignoring form field not indirect"); 836 ffield_oh.warnIfPossible("ignoring form field not indirect");
888 } else if (!ffield_oh.isNull()) { 837 } else if (!ffield_oh.isNull()) {
889 - // A field and its associated annotation can be the same  
890 - // object. This matters because we don't want to clone the  
891 - // annotation and field separately in this case. 838 + // A field and its associated annotation can be the same object. This matters because we
  839 + // don't want to clone the annotation and field separately in this case.
892 have_field = true; 840 have_field = true;
893 // Find the top-level field. It may be the field itself. 841 // Find the top-level field. It may be the field itself.
894 top_field = ffield.getTopLevelField(&have_parent).getObjectHandle(); 842 top_field = ffield.getTopLevelField(&have_parent).getObjectHandle();
895 if (foreign) { 843 if (foreign) {
896 - // copyForeignObject returns the same value if called  
897 - // multiple times with the same field. Create/retrieve  
898 - // the local copy of the original field. This pulls  
899 - // over everything the field references including  
900 - // annotations and appearance streams, but it's  
901 - // harmless to call copyForeignObject on them too.  
902 - // They will already be copied, so we'll get the right  
903 - // object back. 844 + // copyForeignObject returns the same value if called multiple times with the same
  845 + // field. Create/retrieve the local copy of the original field. This pulls over
  846 + // everything the field references including annotations and appearance streams, but
  847 + // it's harmless to call copyForeignObject on them too. They will already be copied,
  848 + // so we'll get the right object back.
904 849
905 // top_field and ffield_oh are known to be indirect. 850 // top_field and ffield_oh are known to be indirect.
906 top_field = this->qpdf.copyForeignObject(top_field); 851 top_field = this->qpdf.copyForeignObject(top_field);
907 ffield_oh = this->qpdf.copyForeignObject(ffield_oh); 852 ffield_oh = this->qpdf.copyForeignObject(ffield_oh);
908 } else { 853 } else {
909 - // We don't need to add top_field to old_fields if  
910 - // it's foreign because the new copy of the foreign  
911 - // field won't be referenced anywhere. It's just the  
912 - // starting point for us to make an additional local  
913 - // copy of. 854 + // We don't need to add top_field to old_fields if it's foreign because the new copy
  855 + // of the foreign field won't be referenced anywhere. It's just the starting point
  856 + // for us to make an additional local copy of.
914 old_fields.insert(top_field.getObjGen()); 857 old_fields.insert(top_field.getObjGen());
915 } 858 }
916 859
917 - // Traverse the field, copying kids, and preserving  
918 - // integrity. 860 + // Traverse the field, copying kids, and preserving integrity.
919 std::list<QPDFObjectHandle> queue; 861 std::list<QPDFObjectHandle> queue;
920 QPDFObjGen::set seen; 862 QPDFObjGen::set seen;
921 if (maybe_copy_object(top_field)) { 863 if (maybe_copy_object(top_field)) {
@@ -933,8 +875,8 @@ QPDFAcroFormDocumentHelper::transformAnnotations( @@ -933,8 +875,8 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
933 parent.warnIfPossible( 875 parent.warnIfPossible(
934 "while traversing field " + obj.getObjGen().unparse(',') + 876 "while traversing field " + obj.getObjGen().unparse(',') +
935 ", found parent (" + parent_og.unparse(',') + 877 ", found parent (" + parent_og.unparse(',') +
936 - ") that had not been seen, indicating likely"  
937 - " invalid field structure"); 878 + ") that had not been seen, indicating likely invalid field "
  879 + "structure");
938 } 880 }
939 } 881 }
940 auto kids = obj.getKey("/Kids"); 882 auto kids = obj.getKey("/Kids");
@@ -955,17 +897,13 @@ QPDFAcroFormDocumentHelper::transformAnnotations( @@ -955,17 +897,13 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
955 if (foreign) { 897 if (foreign) {
956 // Lazily initialize our /DR and the conflict map. 898 // Lazily initialize our /DR and the conflict map.
957 init_dr_map(); 899 init_dr_map();
958 - // The spec doesn't say anything about /DR on the  
959 - // field, but lots of writers put one there, and  
960 - // it is frequently the same as the document-level  
961 - // /DR. To avoid having the field's /DR point to  
962 - // information that we are not maintaining, just  
963 - // reset it to that if it exists. Empirical  
964 - // evidence suggests that many readers, including  
965 - // Acrobat, Adobe Acrobat Reader, chrome, firefox,  
966 - // the mac Preview application, and several of the  
967 - // free readers on Linux all ignore /DR at the  
968 - // field level. 900 + // The spec doesn't say anything about /DR on the field, but lots of writers
  901 + // put one there, and it is frequently the same as the document-level /DR.
  902 + // To avoid having the field's /DR point to information that we are not
  903 + // maintaining, just reset it to that if it exists. Empirical evidence
  904 + // suggests that many readers, including Acrobat, Adobe Acrobat Reader,
  905 + // chrome, firefox, the mac Preview application, and several of the free
  906 + // readers on Linux all ignore /DR at the field level.
969 if (obj.hasKey("/DR")) { 907 if (obj.hasKey("/DR")) {
970 obj.replaceKey("/DR", dr); 908 obj.replaceKey("/DR", dr);
971 } 909 }
@@ -1029,8 +967,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations( @@ -1029,8 +967,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
1029 } 967 }
1030 } 968 }
1031 969
1032 - // Now we can safely mutate the annotation and its appearance  
1033 - // streams. 970 + // Now we can safely mutate the annotation and its appearance streams.
1034 for (auto& stream: streams) { 971 for (auto& stream: streams) {
1035 auto dict = stream.getDict(); 972 auto dict = stream.getDict();
1036 auto omatrix = dict.getKey("/Matrix"); 973 auto omatrix = dict.getKey("/Matrix");
libqpdf/QPDFFormFieldObjectHelper.cc
@@ -310,15 +310,15 @@ QPDFFormFieldObjectHelper::setV(QPDFObjectHandle value, bool need_appearances) @@ -310,15 +310,15 @@ QPDFFormFieldObjectHelper::setV(QPDFObjectHandle value, bool need_appearances)
310 } 310 }
311 } 311 }
312 if (!okay) { 312 if (!okay) {
313 - this->oh.warnIfPossible("ignoring attempt to set a checkbox field to a"  
314 - " value of other than /Yes or /Off"); 313 + this->oh.warnIfPossible("ignoring attempt to set a checkbox field to a value of "
  314 + "other than /Yes or /Off");
315 } 315 }
316 } else if (isRadioButton()) { 316 } else if (isRadioButton()) {
317 if (value.isName()) { 317 if (value.isName()) {
318 setRadioButtonValue(value); 318 setRadioButtonValue(value);
319 } else { 319 } else {
320 - this->oh.warnIfPossible("ignoring attempt to set a radio button field to"  
321 - " an object that is not a name"); 320 + this->oh.warnIfPossible(
  321 + "ignoring attempt to set a radio button field to an object that is not a name");
322 } 322 }
323 } else if (isPushbutton()) { 323 } else if (isPushbutton()) {
324 this->oh.warnIfPossible("ignoring attempt set the value of a pushbutton field"); 324 this->oh.warnIfPossible("ignoring attempt set the value of a pushbutton field");
@@ -347,24 +347,19 @@ QPDFFormFieldObjectHelper::setV(std::string const&amp; utf8_value, bool need_appeara @@ -347,24 +347,19 @@ QPDFFormFieldObjectHelper::setV(std::string const&amp; utf8_value, bool need_appeara
347 void 347 void
348 QPDFFormFieldObjectHelper::setRadioButtonValue(QPDFObjectHandle name) 348 QPDFFormFieldObjectHelper::setRadioButtonValue(QPDFObjectHandle name)
349 { 349 {
350 - // Set the value of a radio button field. This has the following  
351 - // specific behavior:  
352 - // * If this is a radio button field that has a parent that is  
353 - // also a radio button field and has no explicit /V, call itself  
354 - // on the parent  
355 - // * If this is a radio button field with children, set /V to the  
356 - // given value. Then, for each child, if the child has the  
357 - // specified value as one of its keys in the /N subdictionary of  
358 - // its /AP (i.e. its normal appearance stream dictionary), set  
359 - // /AS to name; otherwise, if /Off is a member, set /AS to /Off.  
360 - // Note that we never turn on /NeedAppearances when setting a  
361 - // radio button field. 350 + // Set the value of a radio button field. This has the following specific behavior:
  351 + // * If this is a radio button field that has a parent that is also a radio button field and has
  352 + // no explicit /V, call itself on the parent
  353 + // * If this is a radio button field with children, set /V to the given value. Then, for each
  354 + // child, if the child has the specified value as one of its keys in the /N subdictionary of
  355 + // its /AP (i.e. its normal appearance stream dictionary), set /AS to name; otherwise, if /Off
  356 + // is a member, set /AS to /Off.
  357 + // Note that we never turn on /NeedAppearances when setting a radio button field.
362 QPDFObjectHandle parent = this->oh.getKey("/Parent"); 358 QPDFObjectHandle parent = this->oh.getKey("/Parent");
363 if (parent.isDictionary() && parent.getKey("/Parent").isNull()) { 359 if (parent.isDictionary() && parent.getKey("/Parent").isNull()) {
364 QPDFFormFieldObjectHelper ph(parent); 360 QPDFFormFieldObjectHelper ph(parent);
365 if (ph.isRadioButton()) { 361 if (ph.isRadioButton()) {
366 - // This is most likely one of the individual buttons. Try  
367 - // calling on the parent. 362 + // This is most likely one of the individual buttons. Try calling on the parent.
368 QTC::TC("qpdf", "QPDFFormFieldObjectHelper set parent radio button"); 363 QTC::TC("qpdf", "QPDFFormFieldObjectHelper set parent radio button");
369 ph.setRadioButtonValue(name); 364 ph.setRadioButtonValue(name);
370 return; 365 return;
@@ -384,8 +379,7 @@ QPDFFormFieldObjectHelper::setRadioButtonValue(QPDFObjectHandle name) @@ -384,8 +379,7 @@ QPDFFormFieldObjectHelper::setRadioButtonValue(QPDFObjectHandle name)
384 QPDFObjectHandle AP = kid.getKey("/AP"); 379 QPDFObjectHandle AP = kid.getKey("/AP");
385 QPDFObjectHandle annot; 380 QPDFObjectHandle annot;
386 if (AP.isNull()) { 381 if (AP.isNull()) {
387 - // The widget may be below. If there is more than one,  
388 - // just find the first one. 382 + // The widget may be below. If there is more than one, just find the first one.
389 QPDFObjectHandle grandkids = kid.getKey("/Kids"); 383 QPDFObjectHandle grandkids = kid.getKey("/Kids");
390 if (grandkids.isArray()) { 384 if (grandkids.isArray()) {
391 int ngrandkids = grandkids.getArrayNItems(); 385 int ngrandkids = grandkids.getArrayNItems();
@@ -458,9 +452,8 @@ void @@ -458,9 +452,8 @@ void
458 QPDFFormFieldObjectHelper::generateAppearance(QPDFAnnotationObjectHelper& aoh) 452 QPDFFormFieldObjectHelper::generateAppearance(QPDFAnnotationObjectHelper& aoh)
459 { 453 {
460 std::string ft = getFieldType(); 454 std::string ft = getFieldType();
461 - // Ignore field types we don't know how to generate appearances  
462 - // for. Button fields don't really need them -- see code in  
463 - // QPDFAcroFormDocumentHelper::generateAppearancesIfNeeded. 455 + // Ignore field types we don't know how to generate appearances for. Button fields don't really
  456 + // need them -- see code in QPDFAcroFormDocumentHelper::generateAppearancesIfNeeded.
464 if ((ft == "/Tx") || (ft == "/Ch")) { 457 if ((ft == "/Tx") || (ft == "/Ch")) {
465 generateTextAppearance(aoh); 458 generateTextAppearance(aoh);
466 } 459 }
@@ -562,15 +555,13 @@ ValueSetter::writeAppearance() @@ -562,15 +555,13 @@ ValueSetter::writeAppearance()
562 { 555 {
563 this->replaced = true; 556 this->replaced = true;
564 557
565 - // This code does not take quadding into consideration because  
566 - // doing so requires font metric information, which we don't  
567 - // have in many cases. 558 + // This code does not take quadding into consideration because doing so requires font metric
  559 + // information, which we don't have in many cases.
568 560
569 double tfh = 1.2 * tf; 561 double tfh = 1.2 * tf;
570 int dx = 1; 562 int dx = 1;
571 563
572 - // Write one or more lines, centered vertically, possibly with  
573 - // one row highlighted. 564 + // Write one or more lines, centered vertically, possibly with one row highlighted.
574 565
575 auto max_rows = static_cast<size_t>((bbox.ury - bbox.lly) / tfh); 566 auto max_rows = static_cast<size_t>((bbox.ury - bbox.lly) / tfh);
576 bool highlight = false; 567 bool highlight = false;
@@ -591,8 +582,7 @@ ValueSetter::writeAppearance() @@ -591,8 +582,7 @@ ValueSetter::writeAppearance()
591 } 582 }
592 } 583 }
593 if (found) { 584 if (found) {
594 - // Try to make the found item the second one, but  
595 - // adjust for under/overflow. 585 + // Try to make the found item the second one, but adjust for under/overflow.
596 int wanted_first = QIntC::to_int(found_idx) - 1; 586 int wanted_first = QIntC::to_int(found_idx) - 1;
597 int wanted_last = QIntC::to_int(found_idx + max_rows) - 2; 587 int wanted_last = QIntC::to_int(found_idx + max_rows) - 2;
598 QTC::TC("qpdf", "QPDFFormFieldObjectHelper list found"); 588 QTC::TC("qpdf", "QPDFFormFieldObjectHelper list found");
@@ -639,9 +629,8 @@ ValueSetter::writeAppearance() @@ -639,9 +629,8 @@ ValueSetter::writeAppearance()
639 dy -= tf; 629 dy -= tf;
640 write("q\nBT\n" + DA + "\n"); 630 write("q\nBT\n" + DA + "\n");
641 for (size_t i = 0; i < nlines; ++i) { 631 for (size_t i = 0; i < nlines; ++i) {
642 - // We could adjust Tm to translate to the beginning the first  
643 - // line, set TL to tfh, and use T* for each subsequent line,  
644 - // but doing this would require extracting any Tm from DA, 632 + // We could adjust Tm to translate to the beginning the first line, set TL to tfh, and use
  633 + // T* for each subsequent line, but doing this would require extracting any Tm from DA,
645 // which doesn't seem really worth the effort. 634 // which doesn't seem really worth the effort.
646 if (i == 0) { 635 if (i == 0) {
647 write( 636 write(
@@ -708,8 +697,8 @@ TfFinder::handleToken(QPDFTokenizer::Token const&amp; token) @@ -708,8 +697,8 @@ TfFinder::handleToken(QPDFTokenizer::Token const&amp; token)
708 case QPDFTokenizer::tt_word: 697 case QPDFTokenizer::tt_word:
709 if (token.isWord("Tf")) { 698 if (token.isWord("Tf")) {
710 if ((last_num > 1.0) && (last_num < 1000.0)) { 699 if ((last_num > 1.0) && (last_num < 1000.0)) {
711 - // These ranges are arbitrary but keep us from doing  
712 - // insane things or suffering from over/underflow 700 + // These ranges are arbitrary but keep us from doing insane things or suffering from
  701 + // over/underflow
713 tf = last_num; 702 tf = last_num;
714 } 703 }
715 tf_idx = last_num_idx; 704 tf_idx = last_num_idx;
@@ -738,8 +727,7 @@ TfFinder::getDA() @@ -738,8 +727,7 @@ TfFinder::getDA()
738 if (QIntC::to_int(i) == tf_idx) { 727 if (QIntC::to_int(i) == tf_idx) {
739 double delta = strtod(cur.c_str(), nullptr) - this->tf; 728 double delta = strtod(cur.c_str(), nullptr) - this->tf;
740 if ((delta > 0.001) || (delta < -0.001)) { 729 if ((delta > 0.001) || (delta < -0.001)) {
741 - // tf doesn't match the font size passed to Tf, so  
742 - // substitute. 730 + // tf doesn't match the font size passed to Tf, so substitute.
743 QTC::TC("qpdf", "QPDFFormFieldObjectHelper fallback Tf"); 731 QTC::TC("qpdf", "QPDFFormFieldObjectHelper fallback Tf");
744 cur = QUtil::double_to_string(tf); 732 cur = QUtil::double_to_string(tf);
745 } 733 }
@@ -852,6 +840,5 @@ QPDFFormFieldObjectHelper::generateTextAppearance(QPDFAnnotationObjectHelper&amp; ao @@ -852,6 +840,5 @@ QPDFFormFieldObjectHelper::generateTextAppearance(QPDFAnnotationObjectHelper&amp; ao
852 } 840 }
853 841
854 AS.addTokenFilter( 842 AS.addTokenFilter(
855 - // line-break  
856 std::shared_ptr<QPDFObjectHandle::TokenFilter>(new ValueSetter(DA, V, opt, tf, bbox))); 843 std::shared_ptr<QPDFObjectHandle::TokenFilter>(new ValueSetter(DA, V, opt, tf, bbox)));
857 } 844 }
libqpdf/QPDFJob.cc
@@ -130,8 +130,8 @@ ImageOptimizer::makePipeline(std::string const&amp; description, Pipeline* next) @@ -130,8 +130,8 @@ ImageOptimizer::makePipeline(std::string const&amp; description, Pipeline* next)
130 if (!(w_obj.isNumber() && h_obj.isNumber())) { 130 if (!(w_obj.isNumber() && h_obj.isNumber())) {
131 if (!description.empty()) { 131 if (!description.empty()) {
132 o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { 132 o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
133 - v << prefix << ": " << description << ": not optimizing because image dictionary"  
134 - << " is missing required keys\n"; 133 + v << prefix << ": " << description
  134 + << ": not optimizing because image dictionary is missing required keys\n";
135 }); 135 });
136 } 136 }
137 return result; 137 return result;
@@ -142,14 +142,13 @@ ImageOptimizer::makePipeline(std::string const&amp; description, Pipeline* next) @@ -142,14 +142,13 @@ ImageOptimizer::makePipeline(std::string const&amp; description, Pipeline* next)
142 if (!description.empty()) { 142 if (!description.empty()) {
143 o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { 143 o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
144 v << prefix << ": " << description 144 v << prefix << ": " << description
145 - << ": not optimizing because image has other than"  
146 - << " 8 bits per component\n"; 145 + << ": not optimizing because image has other than 8 bits per component\n";
147 }); 146 });
148 } 147 }
149 return result; 148 return result;
150 } 149 }
151 - // Files have been seen in the wild whose width and height are  
152 - // floating point, which is goofy, but we can deal with it. 150 + // Files have been seen in the wild whose width and height are floating point, which is goofy,
  151 + // but we can deal with it.
153 JDIMENSION w = 0; 152 JDIMENSION w = 0;
154 if (w_obj.isInteger()) { 153 if (w_obj.isInteger()) {
155 w = w_obj.getUIntValueAsUInt(); 154 w = w_obj.getUIntValueAsUInt();
@@ -178,8 +177,8 @@ ImageOptimizer::makePipeline(std::string const&amp; description, Pipeline* next) @@ -178,8 +177,8 @@ ImageOptimizer::makePipeline(std::string const&amp; description, Pipeline* next)
178 QTC::TC("qpdf", "QPDFJob image optimize colorspace"); 177 QTC::TC("qpdf", "QPDFJob image optimize colorspace");
179 if (!description.empty()) { 178 if (!description.empty()) {
180 o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { 179 o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
181 - v << prefix << ": " << description << ": not optimizing because qpdf can't optimize"  
182 - << " images with this colorspace\n"; 180 + v << prefix << ": " << description
  181 + << ": not optimizing because qpdf can't optimize images with this colorspace\n";
183 }); 182 });
184 } 183 }
185 return result; 184 return result;
@@ -190,8 +189,9 @@ ImageOptimizer::makePipeline(std::string const&amp; description, Pipeline* next) @@ -190,8 +189,9 @@ ImageOptimizer::makePipeline(std::string const&amp; description, Pipeline* next)
190 QTC::TC("qpdf", "QPDFJob image optimize too small"); 189 QTC::TC("qpdf", "QPDFJob image optimize too small");
191 if (!description.empty()) { 190 if (!description.empty()) {
192 o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { 191 o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
193 - v << prefix << ": " << description << ": not optimizing because image"  
194 - << " is smaller than requested minimum dimensions\n"; 192 + v << prefix << ": " << description
  193 + << ": not optimizing because image is smaller than requested minimum "
  194 + "dimensions\n";
195 }); 195 });
196 } 196 }
197 return result; 197 return result;
@@ -207,8 +207,8 @@ ImageOptimizer::evaluate(std::string const&amp; description) @@ -207,8 +207,8 @@ ImageOptimizer::evaluate(std::string const&amp; description)
207 if (!image.pipeStreamData(nullptr, 0, qpdf_dl_specialized, true)) { 207 if (!image.pipeStreamData(nullptr, 0, qpdf_dl_specialized, true)) {
208 QTC::TC("qpdf", "QPDFJob image optimize no pipeline"); 208 QTC::TC("qpdf", "QPDFJob image optimize no pipeline");
209 o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { 209 o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
210 - v << prefix << ": " << description << ": not optimizing because unable to decode data"  
211 - << " or data already uses DCT\n"; 210 + v << prefix << ": " << description
  211 + << ": not optimizing because unable to decode data or data already uses DCT\n";
212 }); 212 });
213 return false; 213 return false;
214 } 214 }
@@ -227,8 +227,7 @@ ImageOptimizer::evaluate(std::string const&amp; description) @@ -227,8 +227,7 @@ ImageOptimizer::evaluate(std::string const&amp; description)
227 QTC::TC("qpdf", "QPDFJob image optimize no shrink"); 227 QTC::TC("qpdf", "QPDFJob image optimize no shrink");
228 o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { 228 o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
229 v << prefix << ": " << description 229 v << prefix << ": " << description
230 - << ": not optimizing because DCT compression does not"  
231 - << " reduce image size\n"; 230 + << ": not optimizing because DCT compression does not reduce image size\n";
232 }); 231 });
233 return false; 232 return false;
234 } 233 }
@@ -245,8 +244,8 @@ ImageOptimizer::provideStreamData(QPDFObjGen const&amp;, Pipeline* pipeline) @@ -245,8 +244,8 @@ ImageOptimizer::provideStreamData(QPDFObjGen const&amp;, Pipeline* pipeline)
245 std::shared_ptr<Pipeline> p = makePipeline("", pipeline); 244 std::shared_ptr<Pipeline> p = makePipeline("", pipeline);
246 if (p == nullptr) { 245 if (p == nullptr) {
247 // Should not be possible 246 // Should not be possible
248 - image.warnIfPossible("unable to create pipeline after previous"  
249 - " success; image data will be lost"); 247 + image.warnIfPossible(
  248 + "unable to create pipeline after previous success; image data will be lost");
250 pipeline->finish(); 249 pipeline->finish();
251 return; 250 return;
252 } 251 }
@@ -441,8 +440,7 @@ QPDFJob::createQPDF() @@ -441,8 +440,7 @@ QPDFJob::createQPDF()
441 processFile(pdf_sp, m->infilename.get(), m->password.get(), true, true); 440 processFile(pdf_sp, m->infilename.get(), m->password.get(), true, true);
442 } catch (QPDFExc& e) { 441 } catch (QPDFExc& e) {
443 if (e.getErrorCode() == qpdf_e_password) { 442 if (e.getErrorCode() == qpdf_e_password) {
444 - // Allow certain operations to work when an incorrect  
445 - // password is supplied. 443 + // Allow certain operations to work when an incorrect password is supplied.
446 if (m->check_is_encrypted || m->check_requires_password) { 444 if (m->check_is_encrypted || m->check_requires_password) {
447 m->encryption_status = qpdf_es_encrypted | qpdf_es_password_incorrect; 445 m->encryption_status = qpdf_es_encrypted | qpdf_es_password_incorrect;
448 return nullptr; 446 return nullptr;
@@ -464,8 +462,8 @@ QPDFJob::createQPDF() @@ -464,8 +462,8 @@ QPDFJob::createQPDF()
464 return nullptr; 462 return nullptr;
465 } 463 }
466 464
467 - // If we are updating from JSON, this has to be done first before  
468 - // other options may cause transformations to the input. 465 + // If we are updating from JSON, this has to be done first before other options may cause
  466 + // transformations to the input.
469 if (!m->update_from_json.empty()) { 467 if (!m->update_from_json.empty()) {
470 pdf.updateFromJSON(m->update_from_json); 468 pdf.updateFromJSON(m->update_from_json);
471 } 469 }
@@ -497,16 +495,16 @@ QPDFJob::writeQPDF(QPDF&amp; pdf) @@ -497,16 +495,16 @@ QPDFJob::writeQPDF(QPDF&amp; pdf)
497 } 495 }
498 if (m->warnings && (!m->suppress_warnings)) { 496 if (m->warnings && (!m->suppress_warnings)) {
499 if (createsOutput()) { 497 if (createsOutput()) {
500 - *m->log->getWarn() << m->message_prefix << ": operation succeeded with warnings;"  
501 - << " resulting file may have some problems\n"; 498 + *m->log->getWarn()
  499 + << m->message_prefix
  500 + << ": operation succeeded with warnings; resulting file may have some problems\n";
502 } else { 501 } else {
503 *m->log->getWarn() << m->message_prefix << ": operation succeeded with warnings\n"; 502 *m->log->getWarn() << m->message_prefix << ": operation succeeded with warnings\n";
504 } 503 }
505 } 504 }
506 if (m->report_mem_usage) { 505 if (m->report_mem_usage) {
507 - // Call get_max_memory_usage before generating output. When  
508 - // debugging, it's easier if print statements from  
509 - // get_max_memory_usage are not interleaved with the output. 506 + // Call get_max_memory_usage before generating output. When debugging, it's easier if print
  507 + // statements from get_max_memory_usage are not interleaved with the output.
510 auto mem_usage = QUtil::get_max_memory_usage(); 508 auto mem_usage = QUtil::get_max_memory_usage();
511 *m->log->getWarn() << "qpdf-max-memory-usage " << mem_usage << "\n"; 509 *m->log->getWarn() << "qpdf-max-memory-usage " << mem_usage << "\n";
512 } 510 }
@@ -568,16 +566,13 @@ QPDFJob::getExitCode() const @@ -568,16 +566,13 @@ QPDFJob::getExitCode() const
568 void 566 void
569 QPDFJob::checkConfiguration() 567 QPDFJob::checkConfiguration()
570 { 568 {
571 - // Do final checks for command-line consistency. (I always think  
572 - // this is called doFinalChecks, so I'm putting that in a  
573 - // comment.) 569 + // Do final checks for command-line consistency. (I always think this is called doFinalChecks,
  570 + // so I'm putting that in a comment.)
574 571
575 if (m->replace_input) { 572 if (m->replace_input) {
576 - // Check for --empty appears later after we have checked  
577 - // m->infilename. 573 + // Check for --empty appears later after we have checked m->infilename.
578 if (m->outfilename) { 574 if (m->outfilename) {
579 - usage("--replace-input may not be used when"  
580 - " an output file is specified"); 575 + usage("--replace-input may not be used when an output file is specified");
581 } else if (m->split_pages) { 576 } else if (m->split_pages) {
582 usage("--split-pages may not be used with --replace-input"); 577 usage("--split-pages may not be used with --replace-input");
583 } else if (m->json_version) { 578 } else if (m->json_version) {
@@ -585,8 +580,8 @@ QPDFJob::checkConfiguration() @@ -585,8 +580,8 @@ QPDFJob::checkConfiguration()
585 } 580 }
586 } 581 }
587 if (m->json_version && (m->outfilename == nullptr)) { 582 if (m->json_version && (m->outfilename == nullptr)) {
588 - // The output file is optional with --json for backward  
589 - // compatibility and defaults to standard output. 583 + // The output file is optional with --json for backward compatibility and defaults to
  584 + // standard output.
590 m->outfilename = QUtil::make_shared_cstr("-"); 585 m->outfilename = QUtil::make_shared_cstr("-");
591 } 586 }
592 if (m->infilename == nullptr) { 587 if (m->infilename == nullptr) {
@@ -605,24 +600,21 @@ QPDFJob::checkConfiguration() @@ -605,24 +600,21 @@ QPDFJob::checkConfiguration()
605 600
606 if (m->encrypt && (!m->allow_insecure) && 601 if (m->encrypt && (!m->allow_insecure) &&
607 (m->owner_password.empty() && (!m->user_password.empty()) && (m->keylen == 256))) { 602 (m->owner_password.empty() && (!m->user_password.empty()) && (m->keylen == 256))) {
608 - // Note that empty owner passwords for R < 5 are copied from  
609 - // the user password, so this lack of security is not an issue  
610 - // for those files. Also we are consider only the ability to  
611 - // open the file without a password to be insecure. We are not  
612 - // concerned about whether the viewer enforces security  
613 - // settings when the user and owner password match.  
614 - usage("A PDF with a non-empty user password and an empty owner"  
615 - " password encrypted with a 256-bit key is insecure as it"  
616 - " can be opened without a password. If you really want to"  
617 - " do this, you must also give the --allow-insecure option"  
618 - " before the -- that follows --encrypt."); 603 + // Note that empty owner passwords for R < 5 are copied from the user password, so this lack
  604 + // of security is not an issue for those files. Also we are consider only the ability to
  605 + // open the file without a password to be insecure. We are not concerned about whether the
  606 + // viewer enforces security settings when the user and owner password match.
  607 + usage(
  608 + "A PDF with a non-empty user password and an empty owner password encrypted with a "
  609 + "256-bit key is insecure as it can be opened without a password. If you really want to"
  610 + " do this, you must also give the --allow-insecure option before the -- that follows "
  611 + "--encrypt.");
619 } 612 }
620 613
621 bool save_to_stdout = false; 614 bool save_to_stdout = false;
622 if (m->require_outfile && m->outfilename && (strcmp(m->outfilename.get(), "-") == 0)) { 615 if (m->require_outfile && m->outfilename && (strcmp(m->outfilename.get(), "-") == 0)) {
623 if (m->split_pages) { 616 if (m->split_pages) {
624 - usage("--split-pages may not be used when"  
625 - " writing to standard output"); 617 + usage("--split-pages may not be used when writing to standard output");
626 } 618 }
627 save_to_stdout = true; 619 save_to_stdout = true;
628 } 620 }
@@ -634,9 +626,8 @@ QPDFJob::checkConfiguration() @@ -634,9 +626,8 @@ QPDFJob::checkConfiguration()
634 } 626 }
635 if ((!m->split_pages) && QUtil::same_file(m->infilename.get(), m->outfilename.get())) { 627 if ((!m->split_pages) && QUtil::same_file(m->infilename.get(), m->outfilename.get())) {
636 QTC::TC("qpdf", "QPDFJob same file error"); 628 QTC::TC("qpdf", "QPDFJob same file error");
637 - usage("input file and output file are the same;"  
638 - " use --replace-input to intentionally"  
639 - " overwrite the input file"); 629 + usage("input file and output file are the same; use --replace-input to intentionally "
  630 + "overwrite the input file");
640 } 631 }
641 632
642 if (m->json_version == 1) { 633 if (m->json_version == 1) {
@@ -645,8 +636,7 @@ QPDFJob::checkConfiguration() @@ -645,8 +636,7 @@ QPDFJob::checkConfiguration()
645 } 636 }
646 } else { 637 } else {
647 if (m->json_keys.count("objectinfo") || m->json_keys.count("objects")) { 638 if (m->json_keys.count("objectinfo") || m->json_keys.count("objects")) {
648 - usage("json keys \"objects\" and \"objectinfo\" are only valid for"  
649 - " json version 1"); 639 + usage("json keys \"objects\" and \"objectinfo\" are only valid for json version 1");
650 } 640 }
651 } 641 }
652 } 642 }
@@ -754,10 +744,8 @@ QPDFJob::showEncryption(QPDF&amp; pdf) @@ -754,10 +744,8 @@ QPDFJob::showEncryption(QPDF&amp; pdf)
754 void 744 void
755 QPDFJob::doCheck(QPDF& pdf) 745 QPDFJob::doCheck(QPDF& pdf)
756 { 746 {
757 - // Code below may set okay to false but not to true.  
758 - // We assume okay until we prove otherwise but may  
759 - // continue to perform additional checks after finding  
760 - // errors. 747 + // Code below may set okay to false but not to true. We assume okay until we prove otherwise but
  748 + // may continue to perform additional checks after finding errors.
761 bool okay = true; 749 bool okay = true;
762 auto& cout = *m->log->getInfo(); 750 auto& cout = *m->log->getInfo();
763 cout << "checking " << m->infilename.get() << "\n"; 751 cout << "checking " << m->infilename.get() << "\n";
@@ -777,8 +765,7 @@ QPDFJob::doCheck(QPDF&amp; pdf) @@ -777,8 +765,7 @@ QPDFJob::doCheck(QPDF&amp; pdf)
777 cout << "File is not linearized\n"; 765 cout << "File is not linearized\n";
778 } 766 }
779 767
780 - // Write the file to nowhere, uncompressing  
781 - // streams. This causes full file traversal and 768 + // Write the file to nowhere, uncompressing streams. This causes full file traversal and
782 // decoding of all streams we can decode. 769 // decoding of all streams we can decode.
783 QPDFWriter w(pdf); 770 QPDFWriter w(pdf);
784 Pl_Discard discard; 771 Pl_Discard discard;
@@ -809,9 +796,9 @@ QPDFJob::doCheck(QPDF&amp; pdf) @@ -809,9 +796,9 @@ QPDFJob::doCheck(QPDF&amp; pdf)
809 if (!pdf.getWarnings().empty()) { 796 if (!pdf.getWarnings().empty()) {
810 m->warnings = true; 797 m->warnings = true;
811 } else { 798 } else {
812 - *m->log->getInfo() << "No syntax or stream encoding errors"  
813 - << " found; the file may still contain\n"  
814 - << "errors that qpdf cannot detect\n"; 799 + *m->log->getInfo()
  800 + << "No syntax or stream encoding errors found; the file may still contain\n"
  801 + << "errors that qpdf cannot detect\n";
815 } 802 }
816 } 803 }
817 804
@@ -833,8 +820,7 @@ QPDFJob::doShowObj(QPDF&amp; pdf) @@ -833,8 +820,7 @@ QPDFJob::doShowObj(QPDF&amp; pdf)
833 obj.warnIfPossible("unable to filter stream data"); 820 obj.warnIfPossible("unable to filter stream data");
834 error = true; 821 error = true;
835 } else { 822 } else {
836 - // If anything has been written to standard output,  
837 - // this will fail. 823 + // If anything has been written to standard output, this will fail.
838 m->log->saveToStandardOutput(true); 824 m->log->saveToStandardOutput(true);
839 obj.pipeStreamData( 825 obj.pipeStreamData(
840 m->log->getSave().get(), 826 m->log->getSave().get(),
@@ -933,8 +919,8 @@ QPDFJob::doShowAttachment(QPDF&amp; pdf) @@ -933,8 +919,8 @@ QPDFJob::doShowAttachment(QPDF&amp; pdf)
933 throw std::runtime_error("attachment " + m->attachment_to_show + " not found"); 919 throw std::runtime_error("attachment " + m->attachment_to_show + " not found");
934 } 920 }
935 auto efs = fs->getEmbeddedFileStream(); 921 auto efs = fs->getEmbeddedFileStream();
936 - // saveToStandardOutput has already been called, but it's harmless  
937 - // to call it again, so do as defensive coding. 922 + // saveToStandardOutput has already been called, but it's harmless to call it again, so do as
  923 + // defensive coding.
938 m->log->saveToStandardOutput(true); 924 m->log->saveToStandardOutput(true);
939 efs.pipeStreamData(m->log->getSave().get(), 0, qpdf_dl_all); 925 efs.pipeStreamData(m->log->getSave().get(), 0, qpdf_dl_all);
940 } 926 }
@@ -1132,9 +1118,8 @@ QPDFJob::doJSONPageLabels(Pipeline* p, bool&amp; first, QPDF&amp; pdf) @@ -1132,9 +1118,8 @@ QPDFJob::doJSONPageLabels(Pipeline* p, bool&amp; first, QPDF&amp; pdf)
1132 pldh.getLabelsForPageRange(0, npages - 1, 0, labels); 1118 pldh.getLabelsForPageRange(0, npages - 1, 0, labels);
1133 for (auto iter = labels.begin(); iter != labels.end(); ++iter) { 1119 for (auto iter = labels.begin(); iter != labels.end(); ++iter) {
1134 if ((iter + 1) == labels.end()) { 1120 if ((iter + 1) == labels.end()) {
1135 - // This can't happen, so ignore it. This could only  
1136 - // happen if getLabelsForPageRange somehow returned an  
1137 - // odd number of items. 1121 + // This can't happen, so ignore it. This could only happen if getLabelsForPageRange
  1122 + // somehow returned an odd number of items.
1138 break; 1123 break;
1139 } 1124 }
1140 JSON j_label = j_labels.addArrayElement(JSON::makeDictionary()); 1125 JSON j_label = j_labels.addArrayElement(JSON::makeDictionary());
@@ -1362,22 +1347,17 @@ QPDFJob::doJSONAttachments(Pipeline* p, bool&amp; first, QPDF&amp; pdf) @@ -1362,22 +1347,17 @@ QPDFJob::doJSONAttachments(Pipeline* p, bool&amp; first, QPDF&amp; pdf)
1362 JSON 1347 JSON
1363 QPDFJob::json_schema(int json_version, std::set<std::string>* keys) 1348 QPDFJob::json_schema(int json_version, std::set<std::string>* keys)
1364 { 1349 {
1365 - // Style: use all lower-case keys with no dashes or underscores.  
1366 - // Choose array or dictionary based on indexing. For example, we  
1367 - // use a dictionary for objects because we want to index by object  
1368 - // ID and an array for pages because we want to index by position.  
1369 - // The pages in the pages array contain references back to the  
1370 - // original object, which can be resolved in the objects  
1371 - // dictionary. When a PDF construct that maps back to an original  
1372 - // object is represented separately, use "object" as the key that  
1373 - // references the original object.  
1374 -  
1375 - // This JSON object doubles as a schema and as documentation for  
1376 - // our JSON output. Any schema mismatch is a bug in qpdf. This  
1377 - // helps to enforce our policy of consistently providing a known  
1378 - // structure where every documented key will always be present,  
1379 - // which makes it easier to consume our JSON. This is discussed in  
1380 - // more depth in the manual. 1350 + // Style: use all lower-case keys with no dashes or underscores. Choose array or dictionary
  1351 + // based on indexing. For example, we use a dictionary for objects because we want to index by
  1352 + // object ID and an array for pages because we want to index by position. The pages in the pages
  1353 + // array contain references back to the original object, which can be resolved in the objects
  1354 + // dictionary. When a PDF construct that maps back to an original object is represented
  1355 + // separately, use "object" as the key that references the original object.
  1356 +
  1357 + // This JSON object doubles as a schema and as documentation for our JSON output. Any schema
  1358 + // mismatch is a bug in qpdf. This helps to enforce our policy of consistently providing a known
  1359 + // structure where every documented key will always be present, which makes it easier to consume
  1360 + // our JSON. This is discussed in more depth in the manual.
1381 JSON schema = JSON::makeDictionary(); 1361 JSON schema = JSON::makeDictionary();
1382 schema.addDictionaryMember( 1362 schema.addDictionaryMember(
1383 "version", 1363 "version",
@@ -1388,9 +1368,8 @@ QPDFJob::json_schema(int json_version, std::set&lt;std::string&gt;* keys) @@ -1388,9 +1368,8 @@ QPDFJob::json_schema(int json_version, std::set&lt;std::string&gt;* keys)
1388 1368
1389 bool all_keys = ((keys == nullptr) || keys->empty()); 1369 bool all_keys = ((keys == nullptr) || keys->empty());
1390 1370
1391 - // The list of selectable top-level keys id duplicated in the  
1392 - // following places: job.yml, QPDFJob::json_schema, and  
1393 - // QPDFJob::doJSON. 1371 + // The list of selectable top-level keys id duplicated in the following places: job.yml,
  1372 + // QPDFJob::json_schema, and QPDFJob::doJSON.
1394 if (json_version == 1) { 1373 if (json_version == 1) {
1395 if (all_keys || keys->count("objects")) { 1374 if (all_keys || keys->count("objects")) {
1396 schema.addDictionaryMember("objects", JSON::parse(R"({ 1375 schema.addDictionaryMember("objects", JSON::parse(R"({
@@ -1581,8 +1560,8 @@ QPDFJob::json_out_schema_v1() @@ -1581,8 +1560,8 @@ QPDFJob::json_out_schema_v1()
1581 void 1560 void
1582 QPDFJob::doJSON(QPDF& pdf, Pipeline* p) 1561 QPDFJob::doJSON(QPDF& pdf, Pipeline* p)
1583 { 1562 {
1584 - // qpdf guarantees that no new top-level keys whose names start  
1585 - // with "x-" will be added. These are reserved for users. 1563 + // qpdf guarantees that no new top-level keys whose names start with "x-" will be added. These
  1564 + // are reserved for users.
1586 1565
1587 std::string captured_json; 1566 std::string captured_json;
1588 std::shared_ptr<Pl_String> pl_str; 1567 std::shared_ptr<Pl_String> pl_str;
@@ -1595,14 +1574,12 @@ QPDFJob::doJSON(QPDF&amp; pdf, Pipeline* p) @@ -1595,14 +1574,12 @@ QPDFJob::doJSON(QPDF&amp; pdf, Pipeline* p)
1595 JSON::writeDictionaryOpen(p, first, 0); 1574 JSON::writeDictionaryOpen(p, first, 0);
1596 1575
1597 if (m->json_output) { 1576 if (m->json_output) {
1598 - // Exclude version and parameters to keep the output file  
1599 - // minimal. The JSON version is inside the "qpdf" key for  
1600 - // version 2. 1577 + // Exclude version and parameters to keep the output file minimal. The JSON version is
  1578 + // inside the "qpdf" key for version 2.
1601 } else { 1579 } else {
1602 - // This version is updated every time a non-backward-compatible  
1603 - // change is made to the JSON format. Clients of the JSON are to  
1604 - // ignore unrecognized keys, so we only update the version of a  
1605 - // key disappears or if its value changes meaning. 1580 + // This version is updated every time a non-backward-compatible change is made to the JSON
  1581 + // format. Clients of the JSON are to ignore unrecognized keys, so we only update the
  1582 + // version of a key disappears or if its value changes meaning.
1606 JSON::writeDictionaryItem(p, first, "version", JSON::makeInt(m->json_version), 1); 1583 JSON::writeDictionaryItem(p, first, "version", JSON::makeInt(m->json_version), 1);
1607 JSON j_params = JSON::makeDictionary(); 1584 JSON j_params = JSON::makeDictionary();
1608 std::string decode_level_str; 1585 std::string decode_level_str;
@@ -1624,13 +1601,11 @@ QPDFJob::doJSON(QPDF&amp; pdf, Pipeline* p) @@ -1624,13 +1601,11 @@ QPDFJob::doJSON(QPDF&amp; pdf, Pipeline* p)
1624 JSON::writeDictionaryItem(p, first, "parameters", j_params, 1); 1601 JSON::writeDictionaryItem(p, first, "parameters", j_params, 1);
1625 } 1602 }
1626 bool all_keys = m->json_keys.empty(); 1603 bool all_keys = m->json_keys.empty();
1627 - // The list of selectable top-level keys id duplicated in the  
1628 - // following places: job.yml, QPDFJob::json_schema, and  
1629 - // QPDFJob::doJSON. 1604 + // The list of selectable top-level keys id duplicated in the following places: job.yml,
  1605 + // QPDFJob::json_schema, and QPDFJob::doJSON.
1630 1606
1631 - // We do pages and pagelabels first since they have the side  
1632 - // effect of repairing the pages tree, which could potentially  
1633 - // impact object references in remaining items. 1607 + // We do pages and pagelabels first since they have the side effect of repairing the pages tree,
  1608 + // which could potentially impact object references in remaining items.
1634 if (all_keys || m->json_keys.count("pages")) { 1609 if (all_keys || m->json_keys.count("pages")) {
1635 doJSONPages(p, first, pdf); 1610 doJSONPages(p, first, pdf);
1636 } 1611 }
@@ -1638,8 +1613,7 @@ QPDFJob::doJSON(QPDF&amp; pdf, Pipeline* p) @@ -1638,8 +1613,7 @@ QPDFJob::doJSON(QPDF&amp; pdf, Pipeline* p)
1638 doJSONPageLabels(p, first, pdf); 1613 doJSONPageLabels(p, first, pdf);
1639 } 1614 }
1640 1615
1641 - // The non-special keys are output in alphabetical order, but the  
1642 - // order doesn't actually matter. 1616 + // The non-special keys are output in alphabetical order, but the order doesn't actually matter.
1643 if (all_keys || m->json_keys.count("acroform")) { 1617 if (all_keys || m->json_keys.count("acroform")) {
1644 doJSONAcroform(p, first, pdf); 1618 doJSONAcroform(p, first, pdf);
1645 } 1619 }
@@ -1653,16 +1627,15 @@ QPDFJob::doJSON(QPDF&amp; pdf, Pipeline* p) @@ -1653,16 +1627,15 @@ QPDFJob::doJSON(QPDF&amp; pdf, Pipeline* p)
1653 doJSONOutlines(p, first, pdf); 1627 doJSONOutlines(p, first, pdf);
1654 } 1628 }
1655 1629
1656 - // We do objects last so their information is consistent with  
1657 - // repairing the page tree. To see the original file with any page  
1658 - // tree problems and the page tree not flattened, select 1630 + // We do objects last so their information is consistent with repairing the page tree. To see
  1631 + // the original file with any page tree problems and the page tree not flattened, select
1659 // qpdf/objects/objectinfo without other keys. 1632 // qpdf/objects/objectinfo without other keys.
1660 if (all_keys || m->json_keys.count("objects") || m->json_keys.count("qpdf")) { 1633 if (all_keys || m->json_keys.count("objects") || m->json_keys.count("qpdf")) {
1661 doJSONObjects(p, first, pdf); 1634 doJSONObjects(p, first, pdf);
1662 } 1635 }
1663 if (m->json_version == 1) { 1636 if (m->json_version == 1) {
1664 - // "objectinfo" is not needed for version >1 since you can  
1665 - // tell streams from other objects in "objects". 1637 + // "objectinfo" is not needed for version >1 since you can tell streams from other objects
  1638 + // in "objects".
1666 if (all_keys || m->json_keys.count("objectinfo")) { 1639 if (all_keys || m->json_keys.count("objectinfo")) {
1667 doJSONObjectinfo(p, first, pdf); 1640 doJSONObjectinfo(p, first, pdf);
1668 } 1641 }
@@ -1677,8 +1650,7 @@ QPDFJob::doJSON(QPDF&amp; pdf, Pipeline* p) @@ -1677,8 +1650,7 @@ QPDFJob::doJSON(QPDF&amp; pdf, Pipeline* p)
1677 std::list<std::string> errors; 1650 std::list<std::string> errors;
1678 JSON captured = JSON::parse(captured_json); 1651 JSON captured = JSON::parse(captured_json);
1679 if (!captured.checkSchema(schema, errors)) { 1652 if (!captured.checkSchema(schema, errors)) {
1680 - m->log->error("QPDFJob didn't create JSON that complies with "  
1681 - "its own rules.\n"); 1653 + m->log->error("QPDFJob didn't create JSON that complies with its own rules.\n");
1682 for (auto const& error: errors) { 1654 for (auto const& error: errors) {
1683 *m->log->getError() << error << "\n"; 1655 *m->log->getError() << error << "\n";
1684 } 1656 }
@@ -1768,53 +1740,46 @@ QPDFJob::doProcess( @@ -1768,53 +1740,46 @@ QPDFJob::doProcess(
1768 bool used_for_input, 1740 bool used_for_input,
1769 bool main_input) 1741 bool main_input)
1770 { 1742 {
1771 - // If a password has been specified but doesn't work, try other  
1772 - // passwords that are equivalent in different character encodings.  
1773 - // This makes it possible to open PDF files that were encrypted  
1774 - // using incorrect string encodings. For example, if someone used  
1775 - // a password encoded in PDF Doc encoding or Windows code page  
1776 - // 1252 for an AES-encrypted file or a UTF-8-encoded password on  
1777 - // an RC4-encrypted file, or if the password was properly encoded  
1778 - // but the password given here was incorrectly encoded, there's a  
1779 - // good chance we'd succeed here. 1743 + // If a password has been specified but doesn't work, try other passwords that are equivalent in
  1744 + // different character encodings. This makes it possible to open PDF files that were encrypted
  1745 + // using incorrect string encodings. For example, if someone used a password encoded in PDF Doc
  1746 + // encoding or Windows code page 1252 for an AES-encrypted file or a UTF-8-encoded password on
  1747 + // an RC4-encrypted file, or if the password was properly encoded but the password given here
  1748 + // was incorrectly encoded, there's a good chance we'd succeed here.
1780 1749
1781 std::string ptemp; 1750 std::string ptemp;
1782 if (password && (!m->password_is_hex_key)) { 1751 if (password && (!m->password_is_hex_key)) {
1783 if (m->password_mode == QPDFJob::pm_hex_bytes) { 1752 if (m->password_mode == QPDFJob::pm_hex_bytes) {
1784 - // Special case: handle --password-mode=hex-bytes for input  
1785 - // password as well as output password 1753 + // Special case: handle --password-mode=hex-bytes for input password as well as output
  1754 + // password
1786 QTC::TC("qpdf", "QPDFJob input password hex-bytes"); 1755 QTC::TC("qpdf", "QPDFJob input password hex-bytes");
1787 ptemp = QUtil::hex_decode(password); 1756 ptemp = QUtil::hex_decode(password);
1788 password = ptemp.c_str(); 1757 password = ptemp.c_str();
1789 } 1758 }
1790 } 1759 }
1791 if ((password == nullptr) || empty || m->password_is_hex_key || m->suppress_password_recovery) { 1760 if ((password == nullptr) || empty || m->password_is_hex_key || m->suppress_password_recovery) {
1792 - // There is no password, or we're not doing recovery, so just  
1793 - // do the normal processing with the supplied password. 1761 + // There is no password, or we're not doing recovery, so just do the normal processing with
  1762 + // the supplied password.
1794 doProcessOnce(pdf, fn, password, empty, used_for_input, main_input); 1763 doProcessOnce(pdf, fn, password, empty, used_for_input, main_input);
1795 return; 1764 return;
1796 } 1765 }
1797 1766
1798 - // Get a list of otherwise encoded strings. Keep in scope for this  
1799 - // method. 1767 + // Get a list of otherwise encoded strings. Keep in scope for this method.
1800 std::vector<std::string> passwords_str = QUtil::possible_repaired_encodings(password); 1768 std::vector<std::string> passwords_str = QUtil::possible_repaired_encodings(password);
1801 // Represent to char const*, as required by the QPDF class. 1769 // Represent to char const*, as required by the QPDF class.
1802 std::vector<char const*> passwords; 1770 std::vector<char const*> passwords;
1803 for (auto const& iter: passwords_str) { 1771 for (auto const& iter: passwords_str) {
1804 passwords.push_back(iter.c_str()); 1772 passwords.push_back(iter.c_str());
1805 } 1773 }
1806 - // We always try the supplied password first because it is the  
1807 - // first string returned by possible_repaired_encodings. If there  
1808 - // is more than one option, go ahead and put the supplied password  
1809 - // at the end so that it's that decoding attempt whose exception  
1810 - // is thrown. 1774 + // We always try the supplied password first because it is the first string returned by
  1775 + // possible_repaired_encodings. If there is more than one option, go ahead and put the supplied
  1776 + // password at the end so that it's that decoding attempt whose exception is thrown.
1811 if (passwords.size() > 1) { 1777 if (passwords.size() > 1) {
1812 passwords.push_back(password); 1778 passwords.push_back(password);
1813 } 1779 }
1814 1780
1815 - // Try each password. If one works, return the resulting object.  
1816 - // If they all fail, throw the exception thrown by the final  
1817 - // attempt, which, like the first attempt, will be with the 1781 + // Try each password. If one works, return the resulting object. If they all fail, throw the
  1782 + // exception thrown by the final attempt, which, like the first attempt, will be with the
1818 // supplied password. 1783 // supplied password.
1819 bool warned = false; 1784 bool warned = false;
1820 for (auto iter = passwords.begin(); iter != passwords.end(); ++iter) { 1785 for (auto iter = passwords.begin(); iter != passwords.end(); ++iter) {
@@ -1831,9 +1796,9 @@ QPDFJob::doProcess( @@ -1831,9 +1796,9 @@ QPDFJob::doProcess(
1831 if (!warned) { 1796 if (!warned) {
1832 warned = true; 1797 warned = true;
1833 doIfVerbose([&](Pipeline& v, std::string const& prefix) { 1798 doIfVerbose([&](Pipeline& v, std::string const& prefix) {
1834 - v << prefix << ": supplied password didn't work;"  
1835 - << " trying other passwords based on interpreting"  
1836 - << " password with different string encodings\n"; 1799 + v << prefix
  1800 + << ": supplied password didn't work; trying other passwords based on "
  1801 + "interpreting password with different string encodings\n";
1837 }); 1802 });
1838 } 1803 }
1839 } 1804 }
@@ -1943,10 +1908,8 @@ QPDFJob::doUnderOverlayForPage( @@ -1943,10 +1908,8 @@ QPDFJob::doUnderOverlayForPage(
1943 fo[from_pageno] = pdf.copyForeignObject(from_page.getFormXObjectForPage()); 1908 fo[from_pageno] = pdf.copyForeignObject(from_page.getFormXObjectForPage());
1944 } 1909 }
1945 1910
1946 - // If the same page is overlaid or underlaid multiple times,  
1947 - // we'll generate multiple names for it, but that's harmless  
1948 - // and also a pretty goofy case that's not worth coding  
1949 - // around. 1911 + // If the same page is overlaid or underlaid multiple times, we'll generate multiple names
  1912 + // for it, but that's harmless and also a pretty goofy case that's not worth coding around.
1950 std::string name = resources.getUniqueResourceName("/Fx", min_suffix); 1913 std::string name = resources.getUniqueResourceName("/Fx", min_suffix);
1951 QPDFMatrix cm; 1914 QPDFMatrix cm;
1952 std::string new_content = dest_page.placeFormXObject( 1915 std::string new_content = dest_page.placeFormXObject(
@@ -2017,18 +1980,15 @@ QPDFJob::handleUnderOverlay(QPDF&amp; pdf) @@ -2017,18 +1980,15 @@ QPDFJob::handleUnderOverlay(QPDF&amp; pdf)
2017 if (!(underlay_pagenos.count(pageno) || overlay_pagenos.count(pageno))) { 1980 if (!(underlay_pagenos.count(pageno) || overlay_pagenos.count(pageno))) {
2018 continue; 1981 continue;
2019 } 1982 }
2020 - // This code converts the original page, any underlays, and  
2021 - // any overlays to form XObjects. Then it concatenates display  
2022 - // of all underlays, the original page, and all overlays.  
2023 - // Prior to 11.3.0, the original page contents were wrapped in  
2024 - // q/Q, but this didn't work if the original page had  
2025 - // unbalanced q/Q operators. See github issue #904. 1983 + // This code converts the original page, any underlays, and any overlays to form XObjects.
  1984 + // Then it concatenates display of all underlays, the original page, and all overlays. Prior
  1985 + // to 11.3.0, the original page contents were wrapped in q/Q, but this didn't work if the
  1986 + // original page had unbalanced q/Q operators. See github issue #904.
2026 auto& dest_page = main_pages.at(i); 1987 auto& dest_page = main_pages.at(i);
2027 auto dest_page_oh = dest_page.getObjectHandle(); 1988 auto dest_page_oh = dest_page.getObjectHandle();
2028 auto this_page_fo = dest_page.getFormXObjectForPage(); 1989 auto this_page_fo = dest_page.getFormXObjectForPage();
2029 - // The resulting form xobject lazily reads the content from  
2030 - // the original page, which we are going to replace. Therefore  
2031 - // we have to explicitly copy it. 1990 + // The resulting form xobject lazily reads the content from the original page, which we are
  1991 + // going to replace. Therefore we have to explicitly copy it.
2032 auto content_data = this_page_fo.getRawStreamData(); 1992 auto content_data = this_page_fo.getRawStreamData();
2033 this_page_fo.replaceStreamData(content_data, QPDFObjectHandle(), QPDFObjectHandle()); 1993 this_page_fo.replaceStreamData(content_data, QPDFObjectHandle(), QPDFObjectHandle());
2034 auto resources = 1994 auto resources =
@@ -2097,8 +2057,7 @@ QPDFJob::addAttachments(QPDF&amp; pdf) @@ -2097,8 +2057,7 @@ QPDFJob::addAttachments(QPDF&amp; pdf)
2097 } 2057 }
2098 message = pdf.getFilename() + 2058 message = pdf.getFilename() +
2099 " already has attachments with the following keys: " + message + 2059 " already has attachments with the following keys: " + message +
2100 - "; use --replace to replace or --key to specify a different "  
2101 - "key"; 2060 + "; use --replace to replace or --key to specify a different key";
2102 throw std::runtime_error(message); 2061 throw std::runtime_error(message);
2103 } 2062 }
2104 } 2063 }
@@ -2144,11 +2103,9 @@ QPDFJob::copyAttachments(QPDF&amp; pdf) @@ -2144,11 +2103,9 @@ QPDFJob::copyAttachments(QPDF&amp; pdf)
2144 message += i; 2103 message += i;
2145 } 2104 }
2146 message = pdf.getFilename() + 2105 message = pdf.getFilename() +
2147 - " already has attachments with keys that conflict with"  
2148 - " attachments from other files: " + 2106 + " already has attachments with keys that conflict with attachments from other files: " +
2149 message + 2107 message +
2150 - ". Use --prefix with --copy-attachments-from"  
2151 - " or manually copy individual attachments."; 2108 + ". Use --prefix with --copy-attachments-from or manually copy individual attachments.";
2152 throw std::runtime_error(message); 2109 throw std::runtime_error(message);
2153 } 2110 }
2154 } 2111 }
@@ -2243,13 +2200,11 @@ QPDFJob::shouldRemoveUnreferencedResources(QPDF&amp; pdf) @@ -2243,13 +2200,11 @@ QPDFJob::shouldRemoveUnreferencedResources(QPDF&amp; pdf)
2243 return true; 2200 return true;
2244 } 2201 }
2245 2202
2246 - // Unreferenced resources are common in files where resources  
2247 - // dictionaries are shared across pages. As a heuristic, we look  
2248 - // in the file for shared resources dictionaries or shared XObject  
2249 - // subkeys of resources dictionaries either on pages or on form  
2250 - // XObjects in pages. If we find any, then there is a higher  
2251 - // likelihood that the expensive process of finding unreferenced  
2252 - // resources is worth it. 2203 + // Unreferenced resources are common in files where resources dictionaries are shared across
  2204 + // pages. As a heuristic, we look in the file for shared resources dictionaries or shared
  2205 + // XObject subkeys of resources dictionaries either on pages or on form XObjects in pages. If we
  2206 + // find any, then there is a higher likelihood that the expensive process of finding
  2207 + // unreferenced resources is worth it.
2253 2208
2254 // Return true as soon as we find any shared resources. 2209 // Return true as soon as we find any shared resources.
2255 2210
@@ -2332,8 +2287,8 @@ added_page(QPDF&amp; pdf, QPDFObjectHandle page) @@ -2332,8 +2287,8 @@ added_page(QPDF&amp; pdf, QPDFObjectHandle page)
2332 { 2287 {
2333 QPDFObjectHandle result = page; 2288 QPDFObjectHandle result = page;
2334 if (&page.getQPDF() != &pdf) { 2289 if (&page.getQPDF() != &pdf) {
2335 - // Calling copyForeignObject on an object we already copied  
2336 - // will give us the already existing copy. 2290 + // Calling copyForeignObject on an object we already copied will give us the already
  2291 + // existing copy.
2337 result = pdf.copyForeignObject(page); 2292 result = pdf.copyForeignObject(page);
2338 } 2293 }
2339 return result; 2294 return result;
@@ -2348,8 +2303,7 @@ added_page(QPDF&amp; pdf, QPDFPageObjectHelper page) @@ -2348,8 +2303,7 @@ added_page(QPDF&amp; pdf, QPDFPageObjectHelper page)
2348 void 2303 void
2349 QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_heap) 2304 QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_heap)
2350 { 2305 {
2351 - // Parse all page specifications and translate them into lists of  
2352 - // actual pages. 2306 + // Parse all page specifications and translate them into lists of actual pages.
2353 2307
2354 // Handle "." as a shortcut for the input file 2308 // Handle "." as a shortcut for the input file
2355 for (auto& page_spec: m->page_specs) { 2309 for (auto& page_spec: m->page_specs) {
@@ -2359,9 +2313,8 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf, std::vector&lt;std::unique_ptr&lt;QPDF&gt;&gt;&amp; page_hea @@ -2359,9 +2313,8 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf, std::vector&lt;std::unique_ptr&lt;QPDF&gt;&gt;&amp; page_hea
2359 } 2313 }
2360 2314
2361 if (!m->keep_files_open_set) { 2315 if (!m->keep_files_open_set) {
2362 - // Count the number of distinct files to determine whether we  
2363 - // should keep files open or not. Rather than trying to code  
2364 - // some portable heuristic based on OS limits, just hard-code 2316 + // Count the number of distinct files to determine whether we should keep files open or not.
  2317 + // Rather than trying to code some portable heuristic based on OS limits, just hard-code
2365 // this at a given number and allow users to override. 2318 // this at a given number and allow users to override.
2366 std::set<std::string> filenames; 2319 std::set<std::string> filenames;
2367 for (auto& page_spec: m->page_specs) { 2320 for (auto& page_spec: m->page_specs) {
@@ -2383,16 +2336,13 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf, std::vector&lt;std::unique_ptr&lt;QPDF&gt;&gt;&amp; page_hea @@ -2383,16 +2336,13 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf, std::vector&lt;std::unique_ptr&lt;QPDF&gt;&gt;&amp; page_hea
2383 std::map<unsigned long long, std::set<QPDFObjGen>> copied_pages; 2336 std::map<unsigned long long, std::set<QPDFObjGen>> copied_pages;
2384 for (auto& page_spec: m->page_specs) { 2337 for (auto& page_spec: m->page_specs) {
2385 if (page_spec_qpdfs.count(page_spec.filename) == 0) { 2338 if (page_spec_qpdfs.count(page_spec.filename) == 0) {
2386 - // Open the PDF file and store the QPDF object. Throw a  
2387 - // std::shared_ptr to the qpdf into a heap so that it  
2388 - // survives through copying to the output but gets cleaned up  
2389 - // automatically at the end. Do not canonicalize the file  
2390 - // name. Using two different paths to refer to the same  
2391 - // file is a documented workaround for duplicating a page.  
2392 - // If you are using this an example of how to do this with  
2393 - // the API, you can just create two different QPDF objects  
2394 - // to the same underlying file with the same path to  
2395 - // achieve the same affect. 2339 + // Open the PDF file and store the QPDF object. Throw a std::shared_ptr to the qpdf into
  2340 + // a heap so that it survives through copying to the output but gets cleaned up
  2341 + // automatically at the end. Do not canonicalize the file name. Using two different
  2342 + // paths to refer to the same file is a documented workaround for duplicating a page. If
  2343 + // you are using this an example of how to do this with the API, you can just create two
  2344 + // different QPDF objects to the same underlying file with the same path to achieve the
  2345 + // same affect.
2396 char const* password = page_spec.password.get(); 2346 char const* password = page_spec.password.get();
2397 if ((!m->encryption_file.empty()) && (password == nullptr) && 2347 if ((!m->encryption_file.empty()) && (password == nullptr) &&
2398 (page_spec.filename == m->encryption_file)) { 2348 (page_spec.filename == m->encryption_file)) {
@@ -2424,8 +2374,8 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf, std::vector&lt;std::unique_ptr&lt;QPDF&gt;&gt;&amp; page_hea @@ -2424,8 +2374,8 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf, std::vector&lt;std::unique_ptr&lt;QPDF&gt;&gt;&amp; page_hea
2424 } 2374 }
2425 } 2375 }
2426 2376
2427 - // Read original pages from the PDF, and parse the page range  
2428 - // associated with this occurrence of the file. 2377 + // Read original pages from the PDF, and parse the page range associated with this
  2378 + // occurrence of the file.
2429 parsed_specs.push_back( 2379 parsed_specs.push_back(
2430 // line-break 2380 // line-break
2431 QPDFPageData(page_spec.filename, page_spec_qpdfs[page_spec.filename], page_spec.range)); 2381 QPDFPageData(page_spec.filename, page_spec_qpdfs[page_spec.filename], page_spec.range));
@@ -2451,11 +2401,9 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf, std::vector&lt;std::unique_ptr&lt;QPDF&gt;&gt;&amp; page_hea @@ -2451,11 +2401,9 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf, std::vector&lt;std::unique_ptr&lt;QPDF&gt;&gt;&amp; page_hea
2451 } 2401 }
2452 } 2402 }
2453 2403
2454 - // Clear all pages out of the primary QPDF's pages tree but leave  
2455 - // the objects in place in the file so they can be re-added  
2456 - // without changing their object numbers. This enables other  
2457 - // things in the original file, such as outlines, to continue to  
2458 - // work. 2404 + // Clear all pages out of the primary QPDF's pages tree but leave the objects in place in the
  2405 + // file so they can be re-added without changing their object numbers. This enables other things
  2406 + // in the original file, such as outlines, to continue to work.
2459 doIfVerbose([&](Pipeline& v, std::string const& prefix) { 2407 doIfVerbose([&](Pipeline& v, std::string const& prefix) {
2460 v << prefix << ": removing unreferenced pages from primary input\n"; 2408 v << prefix << ": removing unreferenced pages from primary input\n";
2461 }); 2409 });
@@ -2466,9 +2414,8 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf, std::vector&lt;std::unique_ptr&lt;QPDF&gt;&gt;&amp; page_hea @@ -2466,9 +2414,8 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf, std::vector&lt;std::unique_ptr&lt;QPDF&gt;&gt;&amp; page_hea
2466 } 2414 }
2467 2415
2468 if (m->collate && (parsed_specs.size() > 1)) { 2416 if (m->collate && (parsed_specs.size() > 1)) {
2469 - // Collate the pages by selecting one page from each spec in  
2470 - // order. When a spec runs out of pages, stop selecting from  
2471 - // it. 2417 + // Collate the pages by selecting one page from each spec in order. When a spec runs out of
  2418 + // pages, stop selecting from it.
2472 std::vector<QPDFPageData> new_parsed_specs; 2419 std::vector<QPDFPageData> new_parsed_specs;
2473 size_t nspecs = parsed_specs.size(); 2420 size_t nspecs = parsed_specs.size();
2474 size_t cur_page = 0; 2421 size_t cur_page = 0;
@@ -2491,9 +2438,8 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf, std::vector&lt;std::unique_ptr&lt;QPDF&gt;&gt;&amp; page_hea @@ -2491,9 +2438,8 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf, std::vector&lt;std::unique_ptr&lt;QPDF&gt;&gt;&amp; page_hea
2491 parsed_specs = new_parsed_specs; 2438 parsed_specs = new_parsed_specs;
2492 } 2439 }
2493 2440
2494 - // Add all the pages from all the files in the order specified.  
2495 - // Keep track of any pages from the original file that we are  
2496 - // selecting. 2441 + // Add all the pages from all the files in the order specified. Keep track of any pages from the
  2442 + // original file that we are selecting.
2497 std::set<int> selected_from_orig; 2443 std::set<int> selected_from_orig;
2498 std::vector<QPDFObjectHandle> new_labels; 2444 std::vector<QPDFObjectHandle> new_labels;
2499 bool any_page_labels = false; 2445 bool any_page_labels = false;
@@ -2516,8 +2462,7 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf, std::vector&lt;std::unique_ptr&lt;QPDF&gt;&gt;&amp; page_hea @@ -2516,8 +2462,7 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf, std::vector&lt;std::unique_ptr&lt;QPDF&gt;&gt;&amp; page_hea
2516 v << prefix << ": adding pages from " << page_data.filename << "\n"; 2462 v << prefix << ": adding pages from " << page_data.filename << "\n";
2517 }); 2463 });
2518 for (auto pageno_iter: page_data.selected_pages) { 2464 for (auto pageno_iter: page_data.selected_pages) {
2519 - // Pages are specified from 1 but numbered from 0 in the  
2520 - // vector 2465 + // Pages are specified from 1 but numbered from 0 in the vector
2521 int pageno = pageno_iter - 1; 2466 int pageno = pageno_iter - 1;
2522 pldh.getLabelsForPageRange(pageno, pageno, out_pageno++, new_labels); 2467 pldh.getLabelsForPageRange(pageno, pageno, out_pageno++, new_labels);
2523 QPDFPageObjectHelper to_copy = page_data.orig_pages.at(QIntC::to_size(pageno)); 2468 QPDFPageObjectHelper to_copy = page_data.orig_pages.at(QIntC::to_size(pageno));
@@ -2539,22 +2484,18 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf, std::vector&lt;std::unique_ptr&lt;QPDF&gt;&gt;&amp; page_hea @@ -2539,22 +2484,18 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf, std::vector&lt;std::unique_ptr&lt;QPDF&gt;&gt;&amp; page_hea
2539 bool first_copy_from_orig = false; 2484 bool first_copy_from_orig = false;
2540 bool this_file = (page_data.qpdf == &pdf); 2485 bool this_file = (page_data.qpdf == &pdf);
2541 if (this_file) { 2486 if (this_file) {
2542 - // This is a page from the original file. Keep track  
2543 - // of the fact that we are using it. 2487 + // This is a page from the original file. Keep track of the fact that we are using
  2488 + // it.
2544 first_copy_from_orig = (selected_from_orig.count(pageno) == 0); 2489 first_copy_from_orig = (selected_from_orig.count(pageno) == 0);
2545 selected_from_orig.insert(pageno); 2490 selected_from_orig.insert(pageno);
2546 } 2491 }
2547 auto new_page = added_page(pdf, to_copy); 2492 auto new_page = added_page(pdf, to_copy);
2548 - // Try to avoid gratuitously renaming fields. In the case  
2549 - // of where we're just extracting a bunch of pages from  
2550 - // the original file and not copying any page more than  
2551 - // once, there's no reason to do anything with the fields.  
2552 - // Since we don't remove fields from the original file  
2553 - // until all copy operations are completed, any foreign  
2554 - // pages that conflict with original pages will be  
2555 - // adjusted. If we copy any page from the original file  
2556 - // more than once, that page would be in conflict with the  
2557 - // previous copy of itself. 2493 + // Try to avoid gratuitously renaming fields. In the case of where we're just extracting
  2494 + // a bunch of pages from the original file and not copying any page more than once,
  2495 + // there's no reason to do anything with the fields. Since we don't remove fields from
  2496 + // the original file until all copy operations are completed, any foreign pages that
  2497 + // conflict with original pages will be adjusted. If we copy any page from the original
  2498 + // file more than once, that page would be in conflict with the previous copy of itself.
2558 if (other_afdh->hasAcroForm() && ((!this_file) || (!first_copy_from_orig))) { 2499 if (other_afdh->hasAcroForm() && ((!this_file) || (!first_copy_from_orig))) {
2559 if (!this_file) { 2500 if (!this_file) {
2560 QTC::TC("qpdf", "QPDFJob copy fields not this file"); 2501 QTC::TC("qpdf", "QPDFJob copy fields not this file");
@@ -2569,8 +2510,8 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf, std::vector&lt;std::unique_ptr&lt;QPDF&gt;&gt;&amp; page_hea @@ -2569,8 +2510,8 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf, std::vector&lt;std::unique_ptr&lt;QPDF&gt;&gt;&amp; page_hea
2569 qpdf_e_damaged_pdf, 2510 qpdf_e_damaged_pdf,
2570 "", 2511 "",
2571 0, 2512 0,
2572 - ("Exception caught while fixing copied"  
2573 - " annotations. This may be a qpdf bug. " + 2513 + ("Exception caught while fixing copied annotations. This may be a qpdf "
  2514 + "bug. " +
2574 std::string("Exception: ") + e.what())); 2515 std::string("Exception: ") + e.what()));
2575 } 2516 }
2576 } 2517 }
@@ -2585,10 +2526,9 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf, std::vector&lt;std::unique_ptr&lt;QPDF&gt;&gt;&amp; page_hea @@ -2585,10 +2526,9 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf, std::vector&lt;std::unique_ptr&lt;QPDF&gt;&gt;&amp; page_hea
2585 pdf.getRoot().replaceKey("/PageLabels", page_labels); 2526 pdf.getRoot().replaceKey("/PageLabels", page_labels);
2586 } 2527 }
2587 2528
2588 - // Delete page objects for unused page in primary. This prevents  
2589 - // those objects from being preserved by being referred to from  
2590 - // other places, such as the outlines dictionary. Also make sure  
2591 - // we keep form fields from pages we preserved. 2529 + // Delete page objects for unused page in primary. This prevents those objects from being
  2530 + // preserved by being referred to from other places, such as the outlines dictionary. Also make
  2531 + // sure we keep form fields from pages we preserved.
2592 for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno) { 2532 for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno) {
2593 auto page = orig_pages.at(pageno); 2533 auto page = orig_pages.at(pageno);
2594 if (selected_from_orig.count(QIntC::to_int(pageno))) { 2534 if (selected_from_orig.count(QIntC::to_int(pageno))) {
@@ -2676,8 +2616,8 @@ QPDFJob::maybeFixWritePassword(int R, std::string&amp; password) @@ -2676,8 +2616,8 @@ QPDFJob::maybeFixWritePassword(int R, std::string&amp; password)
2676 std::string encoded; 2616 std::string encoded;
2677 if (!QUtil::utf8_to_pdf_doc(password, encoded)) { 2617 if (!QUtil::utf8_to_pdf_doc(password, encoded)) {
2678 QTC::TC("qpdf", "QPDFJob password not encodable"); 2618 QTC::TC("qpdf", "QPDFJob password not encodable");
2679 - throw std::runtime_error("supplied password cannot be encoded for"  
2680 - " 40-bit or 128-bit encryption formats"); 2619 + throw std::runtime_error("supplied password cannot be encoded for 40-bit "
  2620 + "or 128-bit encryption formats");
2681 } 2621 }
2682 password = encoded; 2622 password = encoded;
2683 } 2623 }
@@ -2687,31 +2627,27 @@ QPDFJob::maybeFixWritePassword(int R, std::string&amp; password) @@ -2687,31 +2627,27 @@ QPDFJob::maybeFixWritePassword(int R, std::string&amp; password)
2687 if (QUtil::utf8_to_pdf_doc(password, encoded)) { 2627 if (QUtil::utf8_to_pdf_doc(password, encoded)) {
2688 QTC::TC("qpdf", "QPDFJob auto-encode password"); 2628 QTC::TC("qpdf", "QPDFJob auto-encode password");
2689 doIfVerbose([&](Pipeline& v, std::string const& prefix) { 2629 doIfVerbose([&](Pipeline& v, std::string const& prefix) {
2690 - v << prefix << ": automatically converting Unicode"  
2691 - << " password to single-byte encoding as"  
2692 - << " required for 40-bit or 128-bit"  
2693 - << " encryption\n"; 2630 + v << prefix
  2631 + << ": automatically converting Unicode password to single-byte "
  2632 + "encoding as required for 40-bit or 128-bit encryption\n";
2694 }); 2633 });
2695 password = encoded; 2634 password = encoded;
2696 } else { 2635 } else {
2697 QTC::TC("qpdf", "QPDFJob bytes fallback warning"); 2636 QTC::TC("qpdf", "QPDFJob bytes fallback warning");
2698 - *m->log->getError() << m->message_prefix << ": WARNING: "  
2699 - << "supplied password looks like a Unicode"  
2700 - << " password with characters not allowed in"  
2701 - << " passwords for 40-bit and 128-bit "  
2702 - "encryption;"  
2703 - << " most readers will not be able to open this"  
2704 - << " file with the supplied password."  
2705 - << " (Use --password-mode=bytes to suppress "  
2706 - "this"  
2707 - << " warning and use the password anyway.)\n"; 2637 + *m->log->getError()
  2638 + << m->message_prefix
  2639 + << ": WARNING: supplied password looks like a Unicode password with "
  2640 + "characters not allowed in passwords for 40-bit and 128-bit "
  2641 + "encryption; most readers will not be able to open this file with "
  2642 + "the supplied password. (Use --password-mode=bytes to suppress this "
  2643 + "warning and use the password anyway.)\n";
2708 } 2644 }
2709 } else if ((R >= 5) && (!is_valid_utf8)) { 2645 } else if ((R >= 5) && (!is_valid_utf8)) {
2710 QTC::TC("qpdf", "QPDFJob invalid utf-8 in auto"); 2646 QTC::TC("qpdf", "QPDFJob invalid utf-8 in auto");
2711 - throw std::runtime_error("supplied password is not a valid Unicode password,"  
2712 - " which is required for 256-bit encryption; to"  
2713 - " really use this password, rerun with the"  
2714 - " --password-mode=bytes option"); 2647 + throw std::runtime_error(
  2648 + "supplied password is not a valid Unicode password, which is required for "
  2649 + "256-bit encryption; to really use this password, rerun with the "
  2650 + "--password-mode=bytes option");
2715 } 2651 }
2716 } 2652 }
2717 } 2653 }
@@ -2749,16 +2685,12 @@ QPDFJob::setEncryptionOptions(QPDF&amp; pdf, QPDFWriter&amp; w) @@ -2749,16 +2685,12 @@ QPDFJob::setEncryptionOptions(QPDF&amp; pdf, QPDFWriter&amp; w)
2749 if ((R < 4) || ((R == 4) && (!m->use_aes))) { 2685 if ((R < 4) || ((R == 4) && (!m->use_aes))) {
2750 if (!m->allow_weak_crypto) { 2686 if (!m->allow_weak_crypto) {
2751 QTC::TC("qpdf", "QPDFJob weak crypto error"); 2687 QTC::TC("qpdf", "QPDFJob weak crypto error");
2752 - *m->log->getError() << m->message_prefix  
2753 - << ": refusing to write a file with RC4, a weak "  
2754 - "cryptographic "  
2755 - "algorithm\n"  
2756 - << "Please use 256-bit keys for better security.\n"  
2757 - << "Pass --allow-weak-crypto to enable writing insecure "  
2758 - "files.\n"  
2759 - << "See also "  
2760 - "https://qpdf.readthedocs.io/en/stable/"  
2761 - "weak-crypto.html\n"; 2688 + *m->log->getError()
  2689 + << m->message_prefix
  2690 + << ": refusing to write a file with RC4, a weak cryptographic algorithm\n"
  2691 + "Please use 256-bit keys for better security.\n"
  2692 + "Pass --allow-weak-crypto to enable writing insecure files.\n"
  2693 + "See also https://qpdf.readthedocs.io/en/stable/weak-crypto.html\n";
2762 throw std::runtime_error("refusing to write a file with weak crypto"); 2694 throw std::runtime_error("refusing to write a file with weak crypto");
2763 } 2695 }
2764 } 2696 }
@@ -2996,8 +2928,8 @@ QPDFJob::doSplitPages(QPDF&amp; pdf) @@ -2996,8 +2928,8 @@ QPDFJob::doSplitPages(QPDF&amp; pdf)
2996 qpdf_e_damaged_pdf, 2928 qpdf_e_damaged_pdf,
2997 "", 2929 "",
2998 0, 2930 0,
2999 - ("Exception caught while fixing copied"  
3000 - " annotations. This may be a qpdf bug." + 2931 + ("Exception caught while fixing copied annotations. This may be a qpdf "
  2932 + "bug." +
3001 std::string("Exception: ") + e.what())); 2933 std::string("Exception: ") + e.what()));
3002 } 2934 }
3003 } 2935 }
@@ -3032,12 +2964,10 @@ QPDFJob::writeOutfile(QPDF&amp; pdf) @@ -3032,12 +2964,10 @@ QPDFJob::writeOutfile(QPDF&amp; pdf)
3032 { 2964 {
3033 std::shared_ptr<char> temp_out; 2965 std::shared_ptr<char> temp_out;
3034 if (m->replace_input) { 2966 if (m->replace_input) {
3035 - // Append but don't prepend to the path to generate a  
3036 - // temporary name. This saves us from having to split the path  
3037 - // by directory and non-directory. 2967 + // Append but don't prepend to the path to generate a temporary name. This saves us from
  2968 + // having to split the path by directory and non-directory.
3038 temp_out = QUtil::make_shared_cstr(std::string(m->infilename.get()) + ".~qpdf-temp#"); 2969 temp_out = QUtil::make_shared_cstr(std::string(m->infilename.get()) + ".~qpdf-temp#");
3039 - // m->outfilename will be restored to 0 before temp_out  
3040 - // goes out of scope. 2970 + // m->outfilename will be restored to 0 before temp_out goes out of scope.
3041 m->outfilename = temp_out; 2971 m->outfilename = temp_out;
3042 } else if (strcmp(m->outfilename.get(), "-") == 0) { 2972 } else if (strcmp(m->outfilename.get(), "-") == 0) {
3043 m->outfilename = nullptr; 2973 m->outfilename = nullptr;
@@ -3045,14 +2975,14 @@ QPDFJob::writeOutfile(QPDF&amp; pdf) @@ -3045,14 +2975,14 @@ QPDFJob::writeOutfile(QPDF&amp; pdf)
3045 if (m->json_version) { 2975 if (m->json_version) {
3046 writeJSON(pdf); 2976 writeJSON(pdf);
3047 } else { 2977 } else {
3048 - // QPDFWriter must have block scope so the output file will be  
3049 - // closed after write() finishes. 2978 + // QPDFWriter must have block scope so the output file will be closed after write()
  2979 + // finishes.
3050 QPDFWriter w(pdf); 2980 QPDFWriter w(pdf);
3051 if (m->outfilename) { 2981 if (m->outfilename) {
3052 w.setOutputFilename(m->outfilename.get()); 2982 w.setOutputFilename(m->outfilename.get());
3053 } else { 2983 } else {
3054 - // saveToStandardOutput has already been called, but  
3055 - // calling it again is defensive and harmless. 2984 + // saveToStandardOutput has already been called, but calling it again is defensive and
  2985 + // harmless.
3056 m->log->saveToStandardOutput(true); 2986 m->log->saveToStandardOutput(true);
3057 w.setOutputPipeline(m->log->getSave().get()); 2987 w.setOutputPipeline(m->log->getSave().get());
3058 } 2988 }
@@ -3096,8 +3026,7 @@ QPDFJob::writeOutfile(QPDF&amp; pdf) @@ -3096,8 +3026,7 @@ QPDFJob::writeOutfile(QPDF&amp; pdf)
3096 void 3026 void
3097 QPDFJob::writeJSON(QPDF& pdf) 3027 QPDFJob::writeJSON(QPDF& pdf)
3098 { 3028 {
3099 - // File pipeline must have block scope so it will be closed  
3100 - // after write. 3029 + // File pipeline must have block scope so it will be closed after write.
3101 std::shared_ptr<QUtil::FileCloser> fc; 3030 std::shared_ptr<QUtil::FileCloser> fc;
3102 std::shared_ptr<Pipeline> fp; 3031 std::shared_ptr<Pipeline> fp;
3103 if (m->outfilename.get()) { 3032 if (m->outfilename.get()) {
libqpdf/QPDFObjectHandle.cc
@@ -51,8 +51,7 @@ QPDFObjectHandle::StreamDataProvider::StreamDataProvider(bool supports_retry) : @@ -51,8 +51,7 @@ QPDFObjectHandle::StreamDataProvider::StreamDataProvider(bool supports_retry) :
51 51
52 QPDFObjectHandle::StreamDataProvider::~StreamDataProvider() 52 QPDFObjectHandle::StreamDataProvider::~StreamDataProvider()
53 { 53 {
54 - // Must be explicit and not inline -- see QPDF_DLL_CLASS in  
55 - // README-maintainer 54 + // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
56 } 55 }
57 56
58 void 57 void
@@ -155,16 +154,14 @@ QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const&amp; token) @@ -155,16 +154,14 @@ QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const&amp; token)
155 void 154 void
156 QPDFObjectHandle::ParserCallbacks::handleObject(QPDFObjectHandle) 155 QPDFObjectHandle::ParserCallbacks::handleObject(QPDFObjectHandle)
157 { 156 {
158 - throw std::logic_error("You must override one of the"  
159 - " handleObject methods in ParserCallbacks"); 157 + throw std::logic_error("You must override one of the handleObject methods in ParserCallbacks");
160 } 158 }
161 159
162 void 160 void
163 QPDFObjectHandle::ParserCallbacks::handleObject(QPDFObjectHandle oh, size_t, size_t) 161 QPDFObjectHandle::ParserCallbacks::handleObject(QPDFObjectHandle oh, size_t, size_t)
164 { 162 {
165 - // This version of handleObject was added in qpdf 9. If the  
166 - // developer did not override it, fall back to the older  
167 - // interface. 163 + // This version of handleObject was added in qpdf 9. If the developer did not override it, fall
  164 + // back to the older interface.
168 handleObject(oh); 165 handleObject(oh);
169 } 166 }
170 167
@@ -592,8 +589,7 @@ QPDFObjectHandle::getUIntValueAsUInt() @@ -592,8 +589,7 @@ QPDFObjectHandle::getUIntValueAsUInt()
592 result = 0; 589 result = 0;
593 } else if (v > UINT_MAX) { 590 } else if (v > UINT_MAX) {
594 QTC::TC("qpdf", "QPDFObjectHandle uint returning UINT_MAX"); 591 QTC::TC("qpdf", "QPDFObjectHandle uint returning UINT_MAX");
595 - warnIfPossible("requested value of unsigned integer is too big;"  
596 - " returning UINT_MAX"); 592 + warnIfPossible("requested value of unsigned integer is too big; returning UINT_MAX");
597 result = UINT_MAX; 593 result = UINT_MAX;
598 } else { 594 } else {
599 result = static_cast<unsigned int>(v); 595 result = static_cast<unsigned int>(v);
@@ -1092,11 +1088,9 @@ QPDFObjectHandle::mergeResources( @@ -1092,11 +1088,9 @@ QPDFObjectHandle::mergeResources(
1092 QPDFObjectHandle this_val = getKey(rtype); 1088 QPDFObjectHandle this_val = getKey(rtype);
1093 if (this_val.isDictionary() && other_val.isDictionary()) { 1089 if (this_val.isDictionary() && other_val.isDictionary()) {
1094 if (this_val.isIndirect()) { 1090 if (this_val.isIndirect()) {
1095 - // Do this even if there are no keys. Various  
1096 - // places in the code call mergeResources with  
1097 - // resource dictionaries that contain empty  
1098 - // subdictionaries just to get this shallow copy  
1099 - // functionality. 1091 + // Do this even if there are no keys. Various places in the code call
  1092 + // mergeResources with resource dictionaries that contain empty subdictionaries
  1093 + // just to get this shallow copy functionality.
1100 QTC::TC("qpdf", "QPDFObjectHandle replace with copy"); 1094 QTC::TC("qpdf", "QPDFObjectHandle replace with copy");
1101 this_val = replaceKeyAndGetNew(rtype, this_val.shallowCopy()); 1095 this_val = replaceKeyAndGetNew(rtype, this_val.shallowCopy());
1102 } 1096 }
@@ -1476,8 +1470,7 @@ QPDFObjectHandle::arrayOrStreamToStreamArray( @@ -1476,8 +1470,7 @@ QPDFObjectHandle::arrayOrStreamToStreamArray(
1476 "", 1470 "",
1477 description, 1471 description,
1478 0, 1472 0,
1479 - " object is supposed to be a stream or an"  
1480 - " array of streams but is neither")); 1473 + " object is supposed to be a stream or an array of streams but is neither"));
1481 } 1474 }
1482 1475
1483 bool first = true; 1476 bool first = true;
@@ -1526,8 +1519,8 @@ void @@ -1526,8 +1519,8 @@ void
1526 QPDFObjectHandle::rotatePage(int angle, bool relative) 1519 QPDFObjectHandle::rotatePage(int angle, bool relative)
1527 { 1520 {
1528 if ((angle % 90) != 0) { 1521 if ((angle % 90) != 0) {
1529 - throw std::runtime_error("QPDF::rotatePage called with an"  
1530 - " angle that is not a multiple of 90"); 1522 + throw std::runtime_error(
  1523 + "QPDF::rotatePage called with an angle that is not a multiple of 90");
1531 } 1524 }
1532 int new_angle = angle; 1525 int new_angle = angle;
1533 if (relative) { 1526 if (relative) {
@@ -1551,8 +1544,7 @@ QPDFObjectHandle::rotatePage(int angle, bool relative) @@ -1551,8 +1544,7 @@ QPDFObjectHandle::rotatePage(int angle, bool relative)
1551 new_angle += old_angle; 1544 new_angle += old_angle;
1552 } 1545 }
1553 new_angle = (new_angle + 360) % 360; 1546 new_angle = (new_angle + 360) % 360;
1554 - // Make this explicit even with new_angle == 0 since /Rotate can  
1555 - // be inherited. 1547 + // Make this explicit even with new_angle == 0 since /Rotate can be inherited.
1556 replaceKey("/Rotate", QPDFObjectHandle::newInteger(new_angle)); 1548 replaceKey("/Rotate", QPDFObjectHandle::newInteger(new_angle));
1557 } 1549 }
1558 1550
@@ -1564,15 +1556,14 @@ QPDFObjectHandle::coalesceContentStreams() @@ -1564,15 +1556,14 @@ QPDFObjectHandle::coalesceContentStreams()
1564 QTC::TC("qpdf", "QPDFObjectHandle coalesce called on stream"); 1556 QTC::TC("qpdf", "QPDFObjectHandle coalesce called on stream");
1565 return; 1557 return;
1566 } else if (!contents.isArray()) { 1558 } else if (!contents.isArray()) {
1567 - // /Contents is optional for pages, and some very damaged  
1568 - // files may have pages that are invalid in other ways. 1559 + // /Contents is optional for pages, and some very damaged files may have pages that are
  1560 + // invalid in other ways.
1569 return; 1561 return;
1570 } 1562 }
1571 - // Should not be possible for a page object to not have an  
1572 - // owning PDF unless it was manually constructed in some  
1573 - // incorrect way. However, it can happen in a PDF file whose  
1574 - // page structure is direct, which is against spec but still  
1575 - // possible to hand construct, as in fuzz issue 27393. 1563 + // Should not be possible for a page object to not have an owning PDF unless it was manually
  1564 + // constructed in some incorrect way. However, it can happen in a PDF file whose page structure
  1565 + // is direct, which is against spec but still possible to hand construct, as in fuzz issue
  1566 + // 27393.
1576 QPDF& qpdf = getQPDF("coalesceContentStreams called on object with no associated PDF file"); 1567 QPDF& qpdf = getQPDF("coalesceContentStreams called on object with no associated PDF file");
1577 1568
1578 QPDFObjectHandle new_contents = newStream(&qpdf); 1569 QPDFObjectHandle new_contents = newStream(&qpdf);
@@ -1808,8 +1799,8 @@ QPDFObjectHandle::parseContentStream_data( @@ -1808,8 +1799,8 @@ QPDFObjectHandle::parseContentStream_data(
1808 1799
1809 callbacks->handleObject(obj, QIntC::to_size(offset), length); 1800 callbacks->handleObject(obj, QIntC::to_size(offset), length);
1810 if (obj.isOperator() && (obj.getOperatorValue() == "ID")) { 1801 if (obj.isOperator() && (obj.getOperatorValue() == "ID")) {
1811 - // Discard next character; it is the space after ID that  
1812 - // terminated the token. Read until end of inline image. 1802 + // Discard next character; it is the space after ID that terminated the token. Read
  1803 + // until end of inline image.
1813 char ch; 1804 char ch;
1814 input->read(&ch, 1); 1805 input->read(&ch, 1);
1815 tokenizer.expectInlineImage(input); 1806 tokenizer.expectInlineImage(input);
@@ -2052,8 +2043,8 @@ QPDFObjectHandle::newReserved(QPDF* qpdf) @@ -2052,8 +2043,8 @@ QPDFObjectHandle::newReserved(QPDF* qpdf)
2052 void 2043 void
2053 QPDFObjectHandle::setObjectDescription(QPDF* owning_qpdf, std::string const& object_description) 2044 QPDFObjectHandle::setObjectDescription(QPDF* owning_qpdf, std::string const& object_description)
2054 { 2045 {
2055 - // This is called during parsing on newly created direct objects,  
2056 - // so we can't call dereference() here. 2046 + // This is called during parsing on newly created direct objects, so we can't call dereference()
  2047 + // here.
2057 if (isInitialized() && obj.get()) { 2048 if (isInitialized() && obj.get()) {
2058 auto descr = std::make_shared<QPDFValue::Description>(object_description); 2049 auto descr = std::make_shared<QPDFValue::Description>(object_description);
2059 obj->setDescription(owning_qpdf, descr); 2050 obj->setDescription(owning_qpdf, descr);
@@ -2070,8 +2061,7 @@ QPDFObjectHandle @@ -2070,8 +2061,7 @@ QPDFObjectHandle
2070 QPDFObjectHandle::shallowCopy() 2061 QPDFObjectHandle::shallowCopy()
2071 { 2062 {
2072 if (!dereference()) { 2063 if (!dereference()) {
2073 - throw std::logic_error("operation attempted on uninitialized "  
2074 - "QPDFObjectHandle"); 2064 + throw std::logic_error("operation attempted on uninitialized QPDFObjectHandle");
2075 } 2065 }
2076 return QPDFObjectHandle(obj->copy()); 2066 return QPDFObjectHandle(obj->copy());
2077 } 2067 }
@@ -2080,8 +2070,7 @@ QPDFObjectHandle @@ -2080,8 +2070,7 @@ QPDFObjectHandle
2080 QPDFObjectHandle::unsafeShallowCopy() 2070 QPDFObjectHandle::unsafeShallowCopy()
2081 { 2071 {
2082 if (!dereference()) { 2072 if (!dereference()) {
2083 - throw std::logic_error("operation attempted on uninitialized "  
2084 - "QPDFObjectHandle"); 2073 + throw std::logic_error("operation attempted on uninitialized QPDFObjectHandle");
2085 } 2074 }
2086 return QPDFObjectHandle(obj->copy(true)); 2075 return QPDFObjectHandle(obj->copy(true));
2087 } 2076 }
@@ -2094,8 +2083,7 @@ QPDFObjectHandle::makeDirect(QPDFObjGen::set&amp; visited, bool stop_at_streams) @@ -2094,8 +2083,7 @@ QPDFObjectHandle::makeDirect(QPDFObjGen::set&amp; visited, bool stop_at_streams)
2094 auto cur_og = getObjGen(); 2083 auto cur_og = getObjGen();
2095 if (!visited.add(cur_og)) { 2084 if (!visited.add(cur_og)) {
2096 QTC::TC("qpdf", "QPDFObjectHandle makeDirect loop"); 2085 QTC::TC("qpdf", "QPDFObjectHandle makeDirect loop");
2097 - throw std::runtime_error("loop detected while converting object from "  
2098 - "indirect to direct"); 2086 + throw std::runtime_error("loop detected while converting object from indirect to direct");
2099 } 2087 }
2100 2088
2101 if (isBool() || isInteger() || isName() || isNull() || isReal() || isString()) { 2089 if (isBool() || isInteger() || isName() || isNull() || isReal() || isString()) {
@@ -2123,11 +2111,10 @@ QPDFObjectHandle::makeDirect(QPDFObjGen::set&amp; visited, bool stop_at_streams) @@ -2123,11 +2111,10 @@ QPDFObjectHandle::makeDirect(QPDFObjGen::set&amp; visited, bool stop_at_streams)
2123 throw std::runtime_error("attempt to make a stream into a direct object"); 2111 throw std::runtime_error("attempt to make a stream into a direct object");
2124 } 2112 }
2125 } else if (isReserved()) { 2113 } else if (isReserved()) {
2126 - throw std::logic_error("QPDFObjectHandle: attempting to make a"  
2127 - " reserved object handle direct"); 2114 + throw std::logic_error(
  2115 + "QPDFObjectHandle: attempting to make a reserved object handle direct");
2128 } else { 2116 } else {
2129 - throw std::logic_error("QPDFObjectHandle::makeDirectInternal: "  
2130 - "unknown object type"); 2117 + throw std::logic_error("QPDFObjectHandle::makeDirectInternal: unknown object type");
2131 } 2118 }
2132 2119
2133 visited.erase(cur_og); 2120 visited.erase(cur_og);
@@ -2162,8 +2149,7 @@ void @@ -2162,8 +2149,7 @@ void
2162 QPDFObjectHandle::assertInitialized() const 2149 QPDFObjectHandle::assertInitialized() const
2163 { 2150 {
2164 if (!isInitialized()) { 2151 if (!isInitialized()) {
2165 - throw std::logic_error("operation attempted on uninitialized "  
2166 - "QPDFObjectHandle"); 2152 + throw std::logic_error("operation attempted on uninitialized QPDFObjectHandle");
2167 } 2153 }
2168 } 2154 }
2169 2155
@@ -2172,8 +2158,8 @@ QPDFObjectHandle::typeWarning(char const* expected_type, std::string const&amp; warn @@ -2172,8 +2158,8 @@ QPDFObjectHandle::typeWarning(char const* expected_type, std::string const&amp; warn
2172 { 2158 {
2173 QPDF* context = nullptr; 2159 QPDF* context = nullptr;
2174 std::string description; 2160 std::string description;
2175 - // Type checks above guarantee that the object has been dereferenced.  
2176 - // Nevertheless, dereference throws exceptions in the test suite 2161 + // Type checks above guarantee that the object has been dereferenced. Nevertheless, dereference
  2162 + // throws exceptions in the test suite
2177 if (!dereference()) { 2163 if (!dereference()) {
2178 throw std::logic_error("attempted to dereference an uninitialized QPDFObjectHandle"); 2164 throw std::logic_error("attempted to dereference an uninitialized QPDFObjectHandle");
2179 } 2165 }
@@ -2376,8 +2362,8 @@ QPDFObjectHandle::checkOwnership(QPDFObjectHandle const&amp; item) const @@ -2376,8 +2362,8 @@ QPDFObjectHandle::checkOwnership(QPDFObjectHandle const&amp; item) const
2376 auto item_qpdf = item.getOwningQPDF(); 2362 auto item_qpdf = item.getOwningQPDF();
2377 if ((qpdf != nullptr) && (item_qpdf != nullptr) && (qpdf != item_qpdf)) { 2363 if ((qpdf != nullptr) && (item_qpdf != nullptr) && (qpdf != item_qpdf)) {
2378 QTC::TC("qpdf", "QPDFObjectHandle check ownership"); 2364 QTC::TC("qpdf", "QPDFObjectHandle check ownership");
2379 - throw std::logic_error("Attempting to add an object from a different QPDF."  
2380 - " Use QPDF::copyForeignObject to add objects from another file."); 2365 + throw std::logic_error("Attempting to add an object from a different QPDF. Use "
  2366 + "QPDF::copyForeignObject to add objects from another file.");
2381 } 2367 }
2382 } 2368 }
2383 2369
@@ -2402,9 +2388,8 @@ QPDFObjectHandle::dereference() @@ -2402,9 +2388,8 @@ QPDFObjectHandle::dereference()
2402 void 2388 void
2403 QPDFObjectHandle::warn(QPDF* qpdf, QPDFExc const& e) 2389 QPDFObjectHandle::warn(QPDF* qpdf, QPDFExc const& e)
2404 { 2390 {
2405 - // If parsing on behalf of a QPDF object and want to give a  
2406 - // warning, we can warn through the object. If parsing for some  
2407 - // other reason, such as an explicit creation of an object from a 2391 + // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the
  2392 + // object. If parsing for some other reason, such as an explicit creation of an object from a
2408 // string, then just throw the exception. 2393 // string, then just throw the exception.
2409 if (qpdf) { 2394 if (qpdf) {
2410 qpdf->warn(e); 2395 qpdf->warn(e);
@@ -2596,7 +2581,8 @@ QPDFObjectHandle::getQPDF(std::string const&amp; error_msg) const @@ -2596,7 +2581,8 @@ QPDFObjectHandle::getQPDF(std::string const&amp; error_msg) const
2596 { 2581 {
2597 auto result = isInitialized() ? this->obj->getQPDF() : nullptr; 2582 auto result = isInitialized() ? this->obj->getQPDF() : nullptr;
2598 if (result == nullptr) { 2583 if (result == nullptr) {
2599 - throw std::runtime_error(error_msg == "" ? "attempt to use a null qpdf object" : error_msg); 2584 + throw std::runtime_error(
  2585 + error_msg.empty() ? "attempt to use a null qpdf object" : error_msg);
2600 } 2586 }
2601 return *result; 2587 return *result;
2602 } 2588 }
libqpdf/QPDFPageObjectHelper.cc
@@ -110,10 +110,8 @@ InlineImageTracker::convertIIDict(QPDFObjectHandle odict) @@ -110,10 +110,8 @@ InlineImageTracker::convertIIDict(QPDFObjectHandle odict)
110 } else if (name == "/I") { 110 } else if (name == "/I") {
111 name = "/Indexed"; 111 name = "/Indexed";
112 } else { 112 } else {
113 - // This is a key in the page's /Resources ->  
114 - // /ColorSpace dictionary. We need to look it up  
115 - // and use its value as the color space for the  
116 - // image. 113 + // This is a key in the page's /Resources -> /ColorSpace dictionary. We need to
  114 + // look it up and use its value as the color space for the image.
117 QPDFObjectHandle colorspace = resources.getKey("/ColorSpace"); 115 QPDFObjectHandle colorspace = resources.getKey("/ColorSpace");
118 if (colorspace.isDictionary() && colorspace.hasKey(name)) { 116 if (colorspace.isDictionary() && colorspace.hasKey(name)) {
119 QTC::TC("qpdf", "QPDFPageObjectHelper colorspace lookup"); 117 QTC::TC("qpdf", "QPDFPageObjectHelper colorspace lookup");
@@ -407,8 +405,8 @@ QPDFPageObjectHelper::externalizeInlineImages(size_t min_size, bool shallow) @@ -407,8 +405,8 @@ QPDFPageObjectHelper::externalizeInlineImages(size_t min_size, bool shallow)
407 { 405 {
408 if (shallow) { 406 if (shallow) {
409 QPDFObjectHandle resources = getAttribute("/Resources", true); 407 QPDFObjectHandle resources = getAttribute("/Resources", true);
410 - // Calling mergeResources also ensures that /XObject becomes  
411 - // direct and is not shared with other pages. 408 + // Calling mergeResources also ensures that /XObject becomes direct and is not shared with
  409 + // other pages.
412 resources.mergeResources("<< /XObject << >> >>"_qpdf); 410 resources.mergeResources("<< /XObject << >> >>"_qpdf);
413 InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources); 411 InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources);
414 Pl_Buffer b("new page content"); 412 Pl_Buffer b("new page content");
@@ -573,11 +571,10 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( @@ -573,11 +571,10 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
573 return false; 571 return false;
574 } 572 }
575 573
576 - // We will walk through /Font and /XObject dictionaries, removing  
577 - // any resources that are not referenced. We must make copies of  
578 - // resource dictionaries down into the dictionaries are mutating  
579 - // to prevent mutating one dictionary from having the side effect  
580 - // of mutating the one it was copied from. 574 + // We will walk through /Font and /XObject dictionaries, removing any resources that are not
  575 + // referenced. We must make copies of resource dictionaries down into the dictionaries are
  576 + // mutating to prevent mutating one dictionary from having the side effect of mutating the one
  577 + // it was copied from.
581 QPDFObjectHandle resources = ph.getAttribute("/Resources", true); 578 QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
582 std::vector<QPDFObjectHandle> rdicts; 579 std::vector<QPDFObjectHandle> rdicts;
583 std::set<std::string> known_names; 580 std::set<std::string> known_names;
@@ -605,33 +602,25 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( @@ -605,33 +602,25 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
605 } 602 }
606 } 603 }
607 } 604 }
608 - // Older versions of the PDF spec allowed form XObjects to omit  
609 - // their resources dictionaries, in which case names were resolved  
610 - // from the containing page. This behavior seems to be widely  
611 - // supported by viewers. If a form XObjects has a resources  
612 - // dictionary and has some unresolved names, some viewers fail to  
613 - // resolve them, and others allow them to be inherited from the  
614 - // page or from another form XObjects that contains them. Since  
615 - // this behavior is inconsistent across viewers, we consider an  
616 - // unresolved name when a resources dictionary is present to be  
617 - // reason not to remove unreferenced resources. An unresolved name  
618 - // in the absence of a resource dictionary is not considered a  
619 - // problem. For form XObjects, we just accumulate a list of  
620 - // unresolved names, and for page objects, we avoid removing any  
621 - // such names found in nested form XObjects. 605 + // Older versions of the PDF spec allowed form XObjects to omit their resources dictionaries, in
  606 + // which case names were resolved from the containing page. This behavior seems to be widely
  607 + // supported by viewers. If a form XObjects has a resources dictionary and has some unresolved
  608 + // names, some viewers fail to resolve them, and others allow them to be inherited from the page
  609 + // or from another form XObjects that contains them. Since this behavior is inconsistent across
  610 + // viewers, we consider an unresolved name when a resources dictionary is present to be reason
  611 + // not to remove unreferenced resources. An unresolved name in the absence of a resource
  612 + // dictionary is not considered a problem. For form XObjects, we just accumulate a list of
  613 + // unresolved names, and for page objects, we avoid removing any such names found in nested form
  614 + // XObjects.
622 615
623 if ((!local_unresolved.empty()) && resources.isDictionary()) { 616 if ((!local_unresolved.empty()) && resources.isDictionary()) {
624 - // It's not worth issuing a warning for this case. From qpdf  
625 - // 10.3, we are hopefully only looking at names that are  
626 - // referencing fonts and XObjects, but until we're certain  
627 - // that we know the meaning of every name in a content stream,  
628 - // we don't want to give warnings that might be false  
629 - // positives. Also, this can happen in legitimate cases with  
630 - // older PDFs, and there's nothing to be done about it, so  
631 - // there's no good reason to issue a warning. The only sad  
632 - // thing is that it was a false positive that alerted me to a  
633 - // logic error in the code, and any future such errors would  
634 - // now be hidden. 617 + // It's not worth issuing a warning for this case. From qpdf 10.3, we are hopefully only
  618 + // looking at names that are referencing fonts and XObjects, but until we're certain that we
  619 + // know the meaning of every name in a content stream, we don't want to give warnings that
  620 + // might be false positives. Also, this can happen in legitimate cases with older PDFs, and
  621 + // there's nothing to be done about it, so there's no good reason to issue a warning. The
  622 + // only sad thing is that it was a false positive that alerted me to a logic error in the
  623 + // code, and any future such errors would now be hidden.
635 QTC::TC("qpdf", "QPDFPageObjectHelper unresolved names"); 624 QTC::TC("qpdf", "QPDFPageObjectHelper unresolved names");
636 return false; 625 return false;
637 } 626 }
@@ -639,8 +628,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( @@ -639,8 +628,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
639 for (auto& dict: rdicts) { 628 for (auto& dict: rdicts) {
640 for (auto const& key: dict.getKeys()) { 629 for (auto const& key: dict.getKeys()) {
641 if (is_page && unresolved.count(key)) { 630 if (is_page && unresolved.count(key)) {
642 - // This name is referenced by some nested form  
643 - // xobject, so don't remove it. 631 + // This name is referenced by some nested form xobject, so don't remove it.
644 QTC::TC("qpdf", "QPDFPageObjectHelper resolving unresolved"); 632 QTC::TC("qpdf", "QPDFPageObjectHelper resolving unresolved");
645 } else if (!rf.getNames().count(key)) { 633 } else if (!rf.getNames().count(key)) {
646 dict.removeKey(key); 634 dict.removeKey(key);
@@ -653,8 +641,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( @@ -653,8 +641,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
653 void 641 void
654 QPDFPageObjectHelper::removeUnreferencedResources() 642 QPDFPageObjectHelper::removeUnreferencedResources()
655 { 643 {
656 - // Accumulate a list of unresolved names across all nested form  
657 - // XObjects. 644 + // Accumulate a list of unresolved names across all nested form XObjects.
658 std::set<std::string> unresolved; 645 std::set<std::string> unresolved;
659 bool any_failures = false; 646 bool any_failures = false;
660 forEachFormXObject( 647 forEachFormXObject(
@@ -724,10 +711,9 @@ QPDFPageObjectHelper::getMatrixForTransformations(bool invert) @@ -724,10 +711,9 @@ QPDFPageObjectHelper::getMatrixForTransformations(bool invert)
724 QPDFObjectHandle 711 QPDFObjectHandle
725 QPDFPageObjectHelper::getFormXObjectForPage(bool handle_transformations) 712 QPDFPageObjectHelper::getFormXObjectForPage(bool handle_transformations)
726 { 713 {
727 - auto result = this->oh  
728 - .getQPDF("QPDFPageObjectHelper::getFormXObjectForPage "  
729 - "called with a direct object")  
730 - .newStream(); 714 + auto result =
  715 + this->oh.getQPDF("QPDFPageObjectHelper::getFormXObjectForPage called with a direct object")
  716 + .newStream();
731 QPDFObjectHandle newdict = result.getDict(); 717 QPDFObjectHandle newdict = result.getDict();
732 newdict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject")); 718 newdict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject"));
733 newdict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Form")); 719 newdict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Form"));
@@ -759,18 +745,15 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement( @@ -759,18 +745,15 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement(
759 bool allow_shrink, 745 bool allow_shrink,
760 bool allow_expand) 746 bool allow_expand)
761 { 747 {
762 - // Calculate the transformation matrix that will place the given  
763 - // form XObject fully inside the given rectangle, center and  
764 - // shrinking or expanding as needed if requested.  
765 -  
766 - // When rendering a form XObject, the transformation in the  
767 - // graphics state (cm) is applied first (of course -- when it is  
768 - // applied, the PDF interpreter doesn't even know we're going to  
769 - // be drawing a form XObject yet), and then the object's matrix  
770 - // (M) is applied. The resulting matrix, when applied to the form  
771 - // XObject's bounding box, will generate a new rectangle. We want  
772 - // to create a transformation matrix that make the form XObject's  
773 - // bounding box land in exactly the right spot. 748 + // Calculate the transformation matrix that will place the given form XObject fully inside the
  749 + // given rectangle, center and shrinking or expanding as needed if requested.
  750 +
  751 + // When rendering a form XObject, the transformation in the graphics state (cm) is applied first
  752 + // (of course -- when it is applied, the PDF interpreter doesn't even know we're going to be
  753 + // drawing a form XObject yet), and then the object's matrix (M) is applied. The resulting
  754 + // matrix, when applied to the form XObject's bounding box, will generate a new rectangle. We
  755 + // want to create a transformation matrix that make the form XObject's bounding box land in
  756 + // exactly the right spot.
774 757
775 QPDFObjectHandle fdict = fo.getDict(); 758 QPDFObjectHandle fdict = fo.getDict();
776 QPDFObjectHandle bbox_obj = fdict.getKey("/BBox"); 759 QPDFObjectHandle bbox_obj = fdict.getKey("/BBox");
@@ -782,37 +765,32 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement( @@ -782,37 +765,32 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement(
782 QPDFMatrix tmatrix; // "to" matrix 765 QPDFMatrix tmatrix; // "to" matrix
783 QPDFMatrix fmatrix; // "from" matrix 766 QPDFMatrix fmatrix; // "from" matrix
784 if (invert_transformations) { 767 if (invert_transformations) {
785 - // tmatrix inverts scaling and rotation of the destination  
786 - // page. Applying this matrix allows the overlaid form  
787 - // XObject's to be absolute rather than relative to properties  
788 - // of the destination page. tmatrix is part of the computed  
789 - // transformation matrix. 768 + // tmatrix inverts scaling and rotation of the destination page. Applying this matrix allows
  769 + // the overlaid form XObject's to be absolute rather than relative to properties of the
  770 + // destination page. tmatrix is part of the computed transformation matrix.
790 tmatrix = QPDFMatrix(getMatrixForTransformations(true)); 771 tmatrix = QPDFMatrix(getMatrixForTransformations(true));
791 wmatrix.concat(tmatrix); 772 wmatrix.concat(tmatrix);
792 } 773 }
793 if (fdict.getKey("/Matrix").isMatrix()) { 774 if (fdict.getKey("/Matrix").isMatrix()) {
794 - // fmatrix is the transformation matrix that is applied to the  
795 - // form XObject itself. We need this for calculations, but we  
796 - // don't explicitly use it in the final result because the PDF 775 + // fmatrix is the transformation matrix that is applied to the form XObject itself. We need
  776 + // this for calculations, but we don't explicitly use it in the final result because the PDF
797 // rendering system automatically applies this last before 777 // rendering system automatically applies this last before
798 // drawing the form XObject. 778 // drawing the form XObject.
799 fmatrix = QPDFMatrix(fdict.getKey("/Matrix").getArrayAsMatrix()); 779 fmatrix = QPDFMatrix(fdict.getKey("/Matrix").getArrayAsMatrix());
800 wmatrix.concat(fmatrix); 780 wmatrix.concat(fmatrix);
801 } 781 }
802 782
803 - // The current wmatrix handles transformation from the form  
804 - // xobject and, if requested, the destination page. Next, we have  
805 - // to adjust this for scale and position. 783 + // The current wmatrix handles transformation from the form xobject and, if requested, the
  784 + // destination page. Next, we have to adjust this for scale and position.
806 785
807 - // Step 1: figure out what scale factor we need to make the form  
808 - // XObject's bounding box fit within the destination rectangle. 786 + // Step 1: figure out what scale factor we need to make the form XObject's bounding box fit
  787 + // within the destination rectangle.
809 788
810 // Transform bounding box 789 // Transform bounding box
811 QPDFObjectHandle::Rectangle bbox = bbox_obj.getArrayAsRectangle(); 790 QPDFObjectHandle::Rectangle bbox = bbox_obj.getArrayAsRectangle();
812 QPDFObjectHandle::Rectangle T = wmatrix.transformRectangle(bbox); 791 QPDFObjectHandle::Rectangle T = wmatrix.transformRectangle(bbox);
813 792
814 - // Calculate a scale factor, if needed. Shrink or expand if needed  
815 - // and allowed. 793 + // Calculate a scale factor, if needed. Shrink or expand if needed and allowed.
816 if ((T.urx == T.llx) || (T.ury == T.lly)) { 794 if ((T.urx == T.llx) || (T.ury == T.lly)) {
817 // avoid division by zero 795 // avoid division by zero
818 return QPDFMatrix(); 796 return QPDFMatrix();
@@ -834,8 +812,8 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement( @@ -834,8 +812,8 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement(
834 } 812 }
835 } 813 }
836 814
837 - // Step 2: figure out what translation is required to get the  
838 - // rectangle to the right spot: centered within the destination. 815 + // Step 2: figure out what translation is required to get the rectangle to the right spot:
  816 + // centered within the destination.
839 wmatrix = QPDFMatrix(); 817 wmatrix = QPDFMatrix();
840 wmatrix.scale(scale, scale); 818 wmatrix.scale(scale, scale);
841 wmatrix.concat(tmatrix); 819 wmatrix.concat(tmatrix);
@@ -849,9 +827,8 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement( @@ -849,9 +827,8 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement(
849 double tx = r_cx - t_cx; 827 double tx = r_cx - t_cx;
850 double ty = r_cy - t_cy; 828 double ty = r_cy - t_cy;
851 829
852 - // Now we can calculate the final matrix. The final matrix does  
853 - // not include fmatrix because that is applied automatically by  
854 - // the PDF interpreter. 830 + // Now we can calculate the final matrix. The final matrix does not include fmatrix because that
  831 + // is applied automatically by the PDF interpreter.
855 QPDFMatrix cm; 832 QPDFMatrix cm;
856 cm.translate(tx, ty); 833 cm.translate(tx, ty);
857 cm.scale(scale, scale); 834 cm.scale(scale, scale);
@@ -921,18 +898,15 @@ QPDFPageObjectHelper::flattenRotation(QPDFAcroFormDocumentHelper* afdh) @@ -921,18 +898,15 @@ QPDFPageObjectHelper::flattenRotation(QPDFAcroFormDocumentHelper* afdh)
921 auto rect = box.getArrayAsRectangle(); 898 auto rect = box.getArrayAsRectangle();
922 decltype(rect) new_rect; 899 decltype(rect) new_rect;
923 900
924 - // How far are the edges of our rectangle from the edges  
925 - // of the media box? 901 + // How far are the edges of our rectangle from the edges of the media box?
926 auto left_x = rect.llx - media_rect.llx; 902 auto left_x = rect.llx - media_rect.llx;
927 auto right_x = media_rect.urx - rect.urx; 903 auto right_x = media_rect.urx - rect.urx;
928 auto bottom_y = rect.lly - media_rect.lly; 904 auto bottom_y = rect.lly - media_rect.lly;
929 auto top_y = media_rect.ury - rect.ury; 905 auto top_y = media_rect.ury - rect.ury;
930 906
931 - // Rotating the page 180 degrees does not change  
932 - // /MediaBox. Rotating 90 or 270 degrees reverses llx and  
933 - // lly and also reverse urx and ury. For all the other  
934 - // boxes, we want the corners to be the correct distance  
935 - // away from the corners of the mediabox. 907 + // Rotating the page 180 degrees does not change /MediaBox. Rotating 90 or 270 degrees
  908 + // reverses llx and lly and also reverse urx and ury. For all the other boxes, we want the
  909 + // corners to be the correct distance away from the corners of the mediabox.
936 switch (rotate) { 910 switch (rotate) {
937 case 90: 911 case 90:
938 new_rect.llx = media_rect.lly + bottom_y; 912 new_rect.llx = media_rect.lly + bottom_y;
@@ -963,9 +937,8 @@ QPDFPageObjectHelper::flattenRotation(QPDFAcroFormDocumentHelper* afdh) @@ -963,9 +937,8 @@ QPDFPageObjectHelper::flattenRotation(QPDFAcroFormDocumentHelper* afdh)
963 this->oh.replaceKey(boxkey, QPDFObjectHandle::newFromRectangle(new_rect)); 937 this->oh.replaceKey(boxkey, QPDFObjectHandle::newFromRectangle(new_rect));
964 } 938 }
965 939
966 - // When we rotate the page, pivot about the point 0, 0 and then  
967 - // translate so the page is visible with the origin point being  
968 - // the same offset from the lower left corner of the media box. 940 + // When we rotate the page, pivot about the point 0, 0 and then translate so the page is visible
  941 + // with the origin point being the same offset from the lower left corner of the media box.
969 // These calculations have been verified empirically with various 942 // These calculations have been verified empirically with various
970 // PDF readers. 943 // PDF readers.
971 QPDFMatrix cm(0, 0, 0, 0, 0, 0); 944 QPDFMatrix cm(0, 0, 0, 0, 0, 0);
libqpdf/QPDFParser.cc
@@ -41,12 +41,10 @@ namespace @@ -41,12 +41,10 @@ namespace
41 QPDFObjectHandle 41 QPDFObjectHandle
42 QPDFParser::parse(bool& empty, bool content_stream) 42 QPDFParser::parse(bool& empty, bool content_stream)
43 { 43 {
44 - // This method must take care not to resolve any objects. Don't  
45 - // check the type of any object without first ensuring that it is  
46 - // a direct object. Otherwise, doing so may have the side effect  
47 - // of reading the object and changing the file pointer. If you do  
48 - // this, it will cause a logic error to be thrown from  
49 - // QPDF::inParse(). 44 + // This method must take care not to resolve any objects. Don't check the type of any object
  45 + // without first ensuring that it is a direct object. Otherwise, doing so may have the side
  46 + // effect of reading the object and changing the file pointer. If you do this, it will cause a
  47 + // logic error to be thrown from QPDF::inParse().
50 48
51 const static std::shared_ptr<QPDFObject> null_oh = QPDF_Null::create(); 49 const static std::shared_ptr<QPDFObject> null_oh = QPDF_Null::create();
52 QPDF::ParseGuard pg(context); 50 QPDF::ParseGuard pg(context);
@@ -193,18 +191,16 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -193,18 +191,16 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
193 !olist.at(size - 2)->getObjGen().isIndirect()) { 191 !olist.at(size - 2)->getObjGen().isIndirect()) {
194 if (context == nullptr) { 192 if (context == nullptr) {
195 QTC::TC("qpdf", "QPDFParser indirect without context"); 193 QTC::TC("qpdf", "QPDFParser indirect without context");
196 - throw std::logic_error("QPDFObjectHandle::parse called without context"  
197 - " on an object with indirect references"); 194 + throw std::logic_error("QPDFObjectHandle::parse called without context on "
  195 + "an object with indirect references");
198 } 196 }
199 auto ref_og = QPDFObjGen( 197 auto ref_og = QPDFObjGen(
200 QPDFObjectHandle(olist.at(size - 2)).getIntValueAsInt(), 198 QPDFObjectHandle(olist.at(size - 2)).getIntValueAsInt(),
201 QPDFObjectHandle(olist.back()).getIntValueAsInt()); 199 QPDFObjectHandle(olist.back()).getIntValueAsInt());
202 if (ref_og.isIndirect()) { 200 if (ref_og.isIndirect()) {
203 - // This action has the desirable side effect  
204 - // of causing dangling references (references  
205 - // to indirect objects that don't appear in  
206 - // the PDF) in any parsed object to appear in  
207 - // the object cache. 201 + // This action has the desirable side effect of causing dangling references
  202 + // (references to indirect objects that don't appear in the PDF) in any
  203 + // parsed object to appear in the object cache.
208 object = context->getObject(ref_og).obj; 204 object = context->getObject(ref_og).obj;
209 indirect_ref = true; 205 indirect_ref = true;
210 } else { 206 } else {
@@ -214,16 +210,14 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -214,16 +210,14 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
214 olist.pop_back(); 210 olist.pop_back();
215 olist.pop_back(); 211 olist.pop_back();
216 } else if ((value == "endobj") && (state == st_top)) { 212 } else if ((value == "endobj") && (state == st_top)) {
217 - // We just saw endobj without having read  
218 - // anything. Treat this as a null and do not move  
219 - // the input source's offset. 213 + // We just saw endobj without having read anything. Treat this as a null and do
  214 + // not move the input source's offset.
220 is_null = true; 215 is_null = true;
221 input->seek(input->getLastOffset(), SEEK_SET); 216 input->seek(input->getLastOffset(), SEEK_SET);
222 empty = true; 217 empty = true;
223 } else { 218 } else {
224 QTC::TC("qpdf", "QPDFParser treat word as string"); 219 QTC::TC("qpdf", "QPDFParser treat word as string");
225 - warn("unknown token while reading object;"  
226 - " treating as string"); 220 + warn("unknown token while reading object; treating as string");
227 bad = true; 221 bad = true;
228 object = QPDF_String::create(value); 222 object = QPDF_String::create(value);
229 } 223 }
@@ -250,8 +244,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -250,8 +244,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
250 break; 244 break;
251 245
252 default: 246 default:
253 - warn("treating unknown token type as null while "  
254 - "reading object"); 247 + warn("treating unknown token type as null while reading object");
255 bad = true; 248 bad = true;
256 is_null = true; 249 is_null = true;
257 break; 250 break;
@@ -259,8 +252,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -259,8 +252,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
259 252
260 if (object == nullptr && !is_null && 253 if (object == nullptr && !is_null &&
261 (!((state == st_start) || (state == st_stop) || (state == st_eof)))) { 254 (!((state == st_start) || (state == st_stop) || (state == st_eof)))) {
262 - throw std::logic_error("QPDFObjectHandle::parseInternal: "  
263 - "unexpected uninitialized object"); 255 + throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object");
264 is_null = true; 256 is_null = true;
265 } 257 }
266 258
@@ -274,8 +266,8 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -274,8 +266,8 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
274 } 266 }
275 } 267 }
276 if (bad_count > 5) { 268 if (bad_count > 5) {
277 - // We had too many consecutive errors without enough  
278 - // intervening successful objects. Give up. 269 + // We had too many consecutive errors without enough intervening successful objects.
  270 + // Give up.
279 warn("too many errors; giving up on reading object"); 271 warn("too many errors; giving up on reading object");
280 state = st_top; 272 state = st_top;
281 is_null = true; 273 is_null = true;
@@ -287,8 +279,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -287,8 +279,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
287 warn("parse error while reading object"); 279 warn("parse error while reading object");
288 } 280 }
289 done = true; 281 done = true;
290 - // In content stream mode, leave object uninitialized to  
291 - // indicate EOF 282 + // In content stream mode, leave object uninitialized to indicate EOF
292 if (!content_stream) { 283 if (!content_stream) {
293 is_null = true; 284 is_null = true;
294 } 285 }
@@ -298,8 +289,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -298,8 +289,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
298 case st_array: 289 case st_array:
299 if (is_null) { 290 if (is_null) {
300 object = null_oh; 291 object = null_oh;
301 - // No need to set description for direct nulls - they probably  
302 - // will become implicit. 292 + // No need to set description for direct nulls - they probably will become implicit.
303 } else if (!indirect_ref) { 293 } else if (!indirect_ref) {
304 setDescription(object, input->getLastOffset()); 294 setDescription(object, input->getLastOffset());
305 } 295 }
@@ -316,23 +306,22 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -316,23 +306,22 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
316 306
317 case st_stop: 307 case st_stop:
318 if ((state_stack.size() < 2) || (stack.size() < 2)) { 308 if ((state_stack.size() < 2) || (stack.size() < 2)) {
319 - throw std::logic_error("QPDFObjectHandle::parseInternal: st_stop encountered"  
320 - " with insufficient elements in stack"); 309 + throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with "
  310 + "insufficient elements in stack");
321 } 311 }
322 parser_state_e old_state = state_stack.back(); 312 parser_state_e old_state = state_stack.back();
323 state_stack.pop_back(); 313 state_stack.pop_back();
324 if (old_state == st_array) { 314 if (old_state == st_array) {
325 object = QPDF_Array::create(std::move(olist), frame.null_count > 100); 315 object = QPDF_Array::create(std::move(olist), frame.null_count > 100);
326 setDescription(object, offset - 1); 316 setDescription(object, offset - 1);
327 - // The `offset` points to the next of "[". Set the rewind  
328 - // offset to point to the beginning of "[". This has been  
329 - // explicitly tested with whitespace surrounding the array start  
330 - // delimiter. getLastOffset points to the array end token and  
331 - // therefore can't be used here. 317 + // The `offset` points to the next of "[". Set the rewind offset to point to the
  318 + // beginning of "[". This has been explicitly tested with whitespace surrounding the
  319 + // array start delimiter. getLastOffset points to the array end token and therefore
  320 + // can't be used here.
332 set_offset = true; 321 set_offset = true;
333 } else if (old_state == st_dictionary) { 322 } else if (old_state == st_dictionary) {
334 - // Convert list to map. Alternating elements are keys. Attempt  
335 - // to recover more or less gracefully from invalid dictionaries. 323 + // Convert list to map. Alternating elements are keys. Attempt to recover more or
  324 + // less gracefully from invalid dictionaries.
336 std::set<std::string> names; 325 std::set<std::string> names;
337 for (auto& obj: olist) { 326 for (auto& obj: olist) {
338 if (obj) { 327 if (obj) {
@@ -358,8 +347,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -358,8 +347,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
358 } 347 }
359 warn( 348 warn(
360 offset, 349 offset,
361 - "expected dictionary key but found"  
362 - " non-name object; inserting key " + 350 + "expected dictionary key but found non-name object; inserting key " +
363 key); 351 key);
364 } 352 }
365 if (dict.count(key) > 0) { 353 if (dict.count(key) > 0) {
@@ -367,8 +355,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -367,8 +355,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
367 warn( 355 warn(
368 offset, 356 offset,
369 "dictionary has duplicated key " + key + 357 "dictionary has duplicated key " + key +
370 - "; last occurrence overrides earlier "  
371 - "ones"); 358 + "; last occurrence overrides earlier ones");
372 } 359 }
373 360
374 // Calculate value. 361 // Calculate value.
@@ -380,8 +367,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -380,8 +367,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
380 QTC::TC("qpdf", "QPDFParser no val for last key"); 367 QTC::TC("qpdf", "QPDFParser no val for last key");
381 warn( 368 warn(
382 offset, 369 offset,
383 - "dictionary ended prematurely; "  
384 - "using null as value for last key"); 370 + "dictionary ended prematurely; using null as value for last key");
385 val = QPDF_Null::create(); 371 val = QPDF_Null::create();
386 } 372 }
387 373
@@ -395,11 +381,10 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -395,11 +381,10 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
395 } 381 }
396 object = QPDF_Dictionary::create(std::move(dict)); 382 object = QPDF_Dictionary::create(std::move(dict));
397 setDescription(object, offset - 2); 383 setDescription(object, offset - 2);
398 - // The `offset` points to the next of "<<". Set the rewind  
399 - // offset to point to the beginning of "<<". This has been  
400 - // explicitly tested with whitespace surrounding the dictionary  
401 - // start delimiter. getLastOffset points to the dictionary end  
402 - // token and therefore can't be used here. 384 + // The `offset` points to the next of "<<". Set the rewind offset to point to the
  385 + // beginning of "<<". This has been explicitly tested with whitespace surrounding
  386 + // the dictionary start delimiter. getLastOffset points to the dictionary end token
  387 + // and therefore can't be used here.
403 set_offset = true; 388 set_offset = true;
404 } 389 }
405 stack.pop_back(); 390 stack.pop_back();
@@ -431,9 +416,8 @@ QPDFParser::setDescription(std::shared_ptr&lt;QPDFObject&gt;&amp; obj, qpdf_offset_t parse @@ -431,9 +416,8 @@ QPDFParser::setDescription(std::shared_ptr&lt;QPDFObject&gt;&amp; obj, qpdf_offset_t parse
431 void 416 void
432 QPDFParser::warn(QPDFExc const& e) const 417 QPDFParser::warn(QPDFExc const& e) const
433 { 418 {
434 - // If parsing on behalf of a QPDF object and want to give a  
435 - // warning, we can warn through the object. If parsing for some  
436 - // other reason, such as an explicit creation of an object from a 419 + // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the
  420 + // object. If parsing for some other reason, such as an explicit creation of an object from a
437 // string, then just throw the exception. 421 // string, then just throw the exception.
438 if (context) { 422 if (context) {
439 context->warn(e); 423 context->warn(e);
libqpdf/QPDFTokenizer.cc
1 #include <qpdf/QPDFTokenizer.hh> 1 #include <qpdf/QPDFTokenizer.hh>
2 2
3 -// DO NOT USE ctype -- it is locale dependent for some things, and  
4 -// it's not worth the risk of including it in case it may accidentally  
5 -// be used. 3 +// DO NOT USE ctype -- it is locale dependent for some things, and it's not worth the risk of
  4 +// including it in case it may accidentally be used.
6 5
7 #include <qpdf/QIntC.hh> 6 #include <qpdf/QIntC.hh>
8 #include <qpdf/QPDFExc.hh> 7 #include <qpdf/QPDFExc.hh>
@@ -45,8 +44,8 @@ namespace @@ -45,8 +44,8 @@ namespace
45 bool 44 bool
46 QPDFWordTokenFinder::check() 45 QPDFWordTokenFinder::check()
47 { 46 {
48 - // Find a word token matching the given string, preceded by a  
49 - // delimiter, and followed by a delimiter or EOF. 47 + // Find a word token matching the given string, preceded by a delimiter, and followed by a
  48 + // delimiter or EOF.
50 QPDFTokenizer tokenizer; 49 QPDFTokenizer tokenizer;
51 QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true); 50 QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true);
52 qpdf_offset_t pos = is->tell(); 51 qpdf_offset_t pos = is->tell();
@@ -68,8 +67,7 @@ QPDFWordTokenFinder::check() @@ -68,8 +67,7 @@ QPDFWordTokenFinder::check()
68 return false; 67 return false;
69 } 68 }
70 if (token_start == 0) { 69 if (token_start == 0) {
71 - // Can't actually happen...we never start the search at the  
72 - // beginning of the input. 70 + // Can't actually happen...we never start the search at the beginning of the input.
73 return false; 71 return false;
74 } 72 }
75 return true; 73 return true;
@@ -147,9 +145,9 @@ QPDFTokenizer::presentCharacter(char ch) @@ -147,9 +145,9 @@ QPDFTokenizer::presentCharacter(char ch)
147 void 145 void
148 QPDFTokenizer::handleCharacter(char ch) 146 QPDFTokenizer::handleCharacter(char ch)
149 { 147 {
150 - // State machine is implemented such that the final character may not be  
151 - // handled. This happens whenever you have to use a character from the  
152 - // next token to detect the end of the current token. 148 + // State machine is implemented such that the final character may not be handled. This happens
  149 + // whenever you have to use a character from the next token to detect the end of the current
  150 + // token.
153 151
154 switch (this->state) { 152 switch (this->state) {
155 case st_top: 153 case st_top:
@@ -248,15 +246,14 @@ QPDFTokenizer::handleCharacter(char ch) @@ -248,15 +246,14 @@ QPDFTokenizer::handleCharacter(char ch)
248 void 246 void
249 QPDFTokenizer::inTokenReady(char ch) 247 QPDFTokenizer::inTokenReady(char ch)
250 { 248 {
251 - throw std::logic_error("INTERNAL ERROR: QPDF tokenizer presented character "  
252 - "while token is waiting"); 249 + throw std::logic_error(
  250 + "INTERNAL ERROR: QPDF tokenizer presented character while token is waiting");
253 } 251 }
254 252
255 void 253 void
256 QPDFTokenizer::inBeforeToken(char ch) 254 QPDFTokenizer::inBeforeToken(char ch)
257 { 255 {
258 - // Note: we specifically do not use ctype here. It is  
259 - // locale-dependent. 256 + // Note: we specifically do not use ctype here. It is locale-dependent.
260 if (isSpace(ch)) { 257 if (isSpace(ch)) {
261 this->before_token = !this->include_ignorable; 258 this->before_token = !this->include_ignorable;
262 this->in_token = this->include_ignorable; 259 this->in_token = this->include_ignorable;
@@ -421,11 +418,9 @@ void @@ -421,11 +418,9 @@ void
421 QPDFTokenizer::inName(char ch) 418 QPDFTokenizer::inName(char ch)
422 { 419 {
423 if (isDelimiter(ch)) { 420 if (isDelimiter(ch)) {
424 - // A C-locale whitespace character or delimiter terminates  
425 - // token. It is important to unread the whitespace  
426 - // character even though it is ignored since it may be the  
427 - // newline after a stream keyword. Removing it here could  
428 - // make the stream-reading code break on some files, 421 + // A C-locale whitespace character or delimiter terminates token. It is important to unread
  422 + // the whitespace character even though it is ignored since it may be the newline after a
  423 + // stream keyword. Removing it here could make the stream-reading code break on some files,
429 // though not on any files in the test suite as of this 424 // though not on any files in the test suite as of this
430 // writing. 425 // writing.
431 426
@@ -452,8 +447,7 @@ QPDFTokenizer::inNameHex1(char ch) @@ -452,8 +447,7 @@ QPDFTokenizer::inNameHex1(char ch)
452 } else { 447 } else {
453 QTC::TC("qpdf", "QPDFTokenizer bad name 1"); 448 QTC::TC("qpdf", "QPDFTokenizer bad name 1");
454 this->error_message = "name with stray # will not work with PDF >= 1.2"; 449 this->error_message = "name with stray # will not work with PDF >= 1.2";
455 - // Use null to encode a bad # -- this is reversed  
456 - // in QPDF_Name::normalizeName. 450 + // Use null to encode a bad # -- this is reversed in QPDF_Name::normalizeName.
457 this->val += '\0'; 451 this->val += '\0';
458 this->state = st_name; 452 this->state = st_name;
459 inName(ch); 453 inName(ch);
@@ -468,8 +462,7 @@ QPDFTokenizer::inNameHex2(char ch) @@ -468,8 +462,7 @@ QPDFTokenizer::inNameHex2(char ch)
468 } else { 462 } else {
469 QTC::TC("qpdf", "QPDFTokenizer bad name 2"); 463 QTC::TC("qpdf", "QPDFTokenizer bad name 2");
470 this->error_message = "name with stray # will not work with PDF >= 1.2"; 464 this->error_message = "name with stray # will not work with PDF >= 1.2";
471 - // Use null to encode a bad # -- this is reversed  
472 - // in QPDF_Name::normalizeName. 465 + // Use null to encode a bad # -- this is reversed in QPDF_Name::normalizeName.
473 this->val += '\0'; 466 this->val += '\0';
474 this->val += this->hex_char; 467 this->val += this->hex_char;
475 this->state = st_name; 468 this->state = st_name;
@@ -636,13 +629,10 @@ void @@ -636,13 +629,10 @@ void
636 QPDFTokenizer::inLiteral(char ch) 629 QPDFTokenizer::inLiteral(char ch)
637 { 630 {
638 if (isDelimiter(ch)) { 631 if (isDelimiter(ch)) {
639 - // A C-locale whitespace character or delimiter terminates  
640 - // token. It is important to unread the whitespace  
641 - // character even though it is ignored since it may be the  
642 - // newline after a stream keyword. Removing it here could  
643 - // make the stream-reading code break on some files,  
644 - // though not on any files in the test suite as of this  
645 - // writing. 632 + // A C-locale whitespace character or delimiter terminates token. It is important to unread
  633 + // the whitespace character even though it is ignored since it may be the newline after a
  634 + // stream keyword. Removing it here could make the stream-reading code break on some files,
  635 + // though not on any files in the test suite as of this writing.
646 636
647 this->in_token = false; 637 this->in_token = false;
648 this->char_to_unread = ch; 638 this->char_to_unread = ch;
@@ -707,8 +697,7 @@ QPDFTokenizer::inCharCode(char ch) @@ -707,8 +697,7 @@ QPDFTokenizer::inCharCode(char ch)
707 if (++(this->digit_count) < 3) { 697 if (++(this->digit_count) < 3) {
708 return; 698 return;
709 } 699 }
710 - // We've accumulated \ddd. PDF Spec says to ignore  
711 - // high-order overflow. 700 + // We've accumulated \ddd. PDF Spec says to ignore high-order overflow.
712 } 701 }
713 this->val += char(this->char_code % 256); 702 this->val += char(this->char_code % 256);
714 this->state = st_in_string; 703 this->state = st_in_string;
@@ -739,8 +728,7 @@ QPDFTokenizer::presentEOF() @@ -739,8 +728,7 @@ QPDFTokenizer::presentEOF()
739 case st_decimal: 728 case st_decimal:
740 case st_literal: 729 case st_literal:
741 QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token"); 730 QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token");
742 - // Push any delimiter to the state machine to finish off the final  
743 - // token. 731 + // Push any delimiter to the state machine to finish off the final token.
744 presentCharacter('\f'); 732 presentCharacter('\f');
745 this->in_token = true; 733 this->in_token = true;
746 break; 734 break;
@@ -794,14 +782,12 @@ QPDFTokenizer::findEI(std::shared_ptr&lt;InputSource&gt; input) @@ -794,14 +782,12 @@ QPDFTokenizer::findEI(std::shared_ptr&lt;InputSource&gt; input)
794 qpdf_offset_t last_offset = input->getLastOffset(); 782 qpdf_offset_t last_offset = input->getLastOffset();
795 qpdf_offset_t pos = input->tell(); 783 qpdf_offset_t pos = input->tell();
796 784
797 - // Use QPDFWordTokenFinder to find EI surrounded by delimiters.  
798 - // Then read the next several tokens or up to EOF. If we find any  
799 - // suspicious-looking or tokens, this is probably still part of  
800 - // the image data, so keep looking for EI. Stop at the first EI  
801 - // that passes. If we get to the end without finding one, return  
802 - // the last EI we found. Store the number of bytes expected in the  
803 - // inline image including the EI and use that to break out of  
804 - // inline image, falling back to the old method if needed. 785 + // Use QPDFWordTokenFinder to find EI surrounded by delimiters. Then read the next several
  786 + // tokens or up to EOF. If we find any suspicious-looking or tokens, this is probably still part
  787 + // of the image data, so keep looking for EI. Stop at the first EI that passes. If we get to the
  788 + // end without finding one, return the last EI we found. Store the number of bytes expected in
  789 + // the inline image including the EI and use that to break out of inline image, falling back to
  790 + // the old method if needed.
805 791
806 bool okay = false; 792 bool okay = false;
807 bool first_try = true; 793 bool first_try = true;
@@ -814,13 +800,11 @@ QPDFTokenizer::findEI(std::shared_ptr&lt;InputSource&gt; input) @@ -814,13 +800,11 @@ QPDFTokenizer::findEI(std::shared_ptr&lt;InputSource&gt; input)
814 800
815 QPDFTokenizer check; 801 QPDFTokenizer check;
816 bool found_bad = false; 802 bool found_bad = false;
817 - // Look at the next 10 tokens or up to EOF. The next inline  
818 - // image's image data would look like bad tokens, but there  
819 - // will always be at least 10 tokens between one inline  
820 - // image's EI and the next valid one's ID since width, height,  
821 - // bits per pixel, and color space are all required as well as  
822 - // a BI and ID. If we get 10 good tokens in a row or hit EOF,  
823 - // we can be pretty sure we've found the actual EI. 803 + // Look at the next 10 tokens or up to EOF. The next inline image's image data would look
  804 + // like bad tokens, but there will always be at least 10 tokens between one inline image's
  805 + // EI and the next valid one's ID since width, height, bits per pixel, and color space are
  806 + // all required as well as a BI and ID. If we get 10 good tokens in a row or hit EOF, we can
  807 + // be pretty sure we've found the actual EI.
824 for (int i = 0; i < 10; ++i) { 808 for (int i = 0; i < 10; ++i) {
825 QPDFTokenizer::Token t = check.readToken(input, "checker", true); 809 QPDFTokenizer::Token t = check.readToken(input, "checker", true);
826 token_type_e type = t.getType(); 810 token_type_e type = t.getType();
@@ -829,27 +813,22 @@ QPDFTokenizer::findEI(std::shared_ptr&lt;InputSource&gt; input) @@ -829,27 +813,22 @@ QPDFTokenizer::findEI(std::shared_ptr&lt;InputSource&gt; input)
829 } else if (type == tt_bad) { 813 } else if (type == tt_bad) {
830 found_bad = true; 814 found_bad = true;
831 } else if (t.isWord()) { 815 } else if (t.isWord()) {
832 - // The qpdf tokenizer lumps alphabetic and otherwise  
833 - // uncategorized characters into "words". We recognize  
834 - // strings of alphabetic characters as potential valid  
835 - // operators for purposes of telling whether we're in  
836 - // valid content or not. It's not perfect, but it  
837 - // should work more reliably than what we used to do,  
838 - // which was already good enough for the vast majority  
839 - // of files. 816 + // The qpdf tokenizer lumps alphabetic and otherwise uncategorized characters into
  817 + // "words". We recognize strings of alphabetic characters as potential valid
  818 + // operators for purposes of telling whether we're in valid content or not. It's not
  819 + // perfect, but it should work more reliably than what we used to do, which was
  820 + // already good enough for the vast majority of files.
840 bool found_alpha = false; 821 bool found_alpha = false;
841 bool found_non_printable = false; 822 bool found_non_printable = false;
842 bool found_other = false; 823 bool found_other = false;
843 for (char ch: t.getValue()) { 824 for (char ch: t.getValue()) {
844 if (((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')) || 825 if (((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')) ||
845 (ch == '*')) { 826 (ch == '*')) {
846 - // Treat '*' as alpha since there are valid  
847 - // PDF operators that contain * along with  
848 - // alphabetic characters. 827 + // Treat '*' as alpha since there are valid PDF operators that contain *
  828 + // along with alphabetic characters.
849 found_alpha = true; 829 found_alpha = true;
850 } else if ((static_cast<signed char>(ch) < 32) && (!isSpace(ch))) { 830 } else if ((static_cast<signed char>(ch) < 32) && (!isSpace(ch))) {
851 - // Compare ch as a signed char so characters  
852 - // outside of 7-bit will be < 0. 831 + // Compare ch as a signed char so characters outside of 7-bit will be < 0.
853 found_non_printable = true; 832 found_non_printable = true;
854 break; 833 break;
855 } else { 834 } else {
@@ -903,9 +882,9 @@ QPDFTokenizer::betweenTokens() @@ -903,9 +882,9 @@ QPDFTokenizer::betweenTokens()
903 882
904 QPDFTokenizer::Token 883 QPDFTokenizer::Token
905 QPDFTokenizer::readToken( 884 QPDFTokenizer::readToken(
906 - std::shared_ptr<InputSource> input, std::string const& context, bool allow_bad, size_t max_len) 885 + InputSource& input, std::string const& context, bool allow_bad, size_t max_len)
907 { 886 {
908 - nextToken(*input, context, max_len); 887 + nextToken(input, context, max_len);
909 888
910 Token token; 889 Token token;
911 bool unread_char; 890 bool unread_char;
@@ -918,15 +897,22 @@ QPDFTokenizer::readToken( @@ -918,15 +897,22 @@ QPDFTokenizer::readToken(
918 } else { 897 } else {
919 throw QPDFExc( 898 throw QPDFExc(
920 qpdf_e_damaged_pdf, 899 qpdf_e_damaged_pdf,
921 - input->getName(), 900 + input.getName(),
922 context, 901 context,
923 - input->getLastOffset(), 902 + input.getLastOffset(),
924 token.getErrorMessage()); 903 token.getErrorMessage());
925 } 904 }
926 } 905 }
927 return token; 906 return token;
928 } 907 }
929 908
  909 +QPDFTokenizer::Token
  910 +QPDFTokenizer::readToken(
  911 + std::shared_ptr<InputSource> input, std::string const& context, bool allow_bad, size_t max_len)
  912 +{
  913 + return readToken(*input, context, allow_bad, max_len);
  914 +}
  915 +
930 bool 916 bool
931 QPDFTokenizer::nextToken(InputSource& input, std::string const& context, size_t max_len) 917 QPDFTokenizer::nextToken(InputSource& input, std::string const& context, size_t max_len)
932 { 918 {
@@ -941,9 +927,8 @@ QPDFTokenizer::nextToken(InputSource&amp; input, std::string const&amp; context, size_t @@ -941,9 +927,8 @@ QPDFTokenizer::nextToken(InputSource&amp; input, std::string const&amp; context, size_t
941 presentEOF(); 927 presentEOF();
942 928
943 if ((this->type == tt_eof) && (!this->allow_eof)) { 929 if ((this->type == tt_eof) && (!this->allow_eof)) {
944 - // Nothing in the qpdf library calls readToken  
945 - // without allowEOF anymore, so this case is not  
946 - // exercised. 930 + // Nothing in the qpdf library calls readToken without allowEOF anymore, so this
  931 + // case is not exercised.
947 this->type = tt_bad; 932 this->type = tt_bad;
948 this->error_message = "unexpected EOF"; 933 this->error_message = "unexpected EOF";
949 offset = input.getLastOffset(); 934 offset = input.getLastOffset();
libqpdf/QPDF_Stream.cc
@@ -69,10 +69,9 @@ namespace @@ -69,10 +69,9 @@ namespace
69 } // namespace 69 } // namespace
70 70
71 std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = { 71 std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = {
72 - // The PDF specification provides these filter abbreviations for  
73 - // use in inline images, but according to table H.1 in the pre-ISO  
74 - // versions of the PDF specification, Adobe Reader also accepts  
75 - // them for stream filters. 72 + // The PDF specification provides these filter abbreviations for use in inline images, but
  73 + // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader also
  74 + // accepts them for stream filters.
76 {"/AHx", "/ASCIIHexDecode"}, 75 {"/AHx", "/ASCIIHexDecode"},
77 {"/A85", "/ASCII85Decode"}, 76 {"/A85", "/ASCII85Decode"},
78 {"/LZW", "/LZWDecode"}, 77 {"/LZW", "/LZWDecode"},
@@ -118,8 +117,8 @@ QPDF_Stream::QPDF_Stream( @@ -118,8 +117,8 @@ QPDF_Stream::QPDF_Stream(
118 length(length) 117 length(length)
119 { 118 {
120 if (!stream_dict.isDictionary()) { 119 if (!stream_dict.isDictionary()) {
121 - throw std::logic_error("stream object instantiated with non-dictionary "  
122 - "object for dictionary"); 120 + throw std::logic_error(
  121 + "stream object instantiated with non-dictionary object for dictionary");
123 } 122 }
124 auto descr = std::make_shared<QPDFValue::Description>( 123 auto descr = std::make_shared<QPDFValue::Description>(
125 qpdf->getFilename() + ", stream object " + og.unparse(' ')); 124 qpdf->getFilename() + ", stream object " + og.unparse(' '));
@@ -198,18 +197,18 @@ QPDF_Stream::getStreamJSON( @@ -198,18 +197,18 @@ QPDF_Stream::getStreamJSON(
198 case qpdf_sj_none: 197 case qpdf_sj_none:
199 case qpdf_sj_inline: 198 case qpdf_sj_inline:
200 if (p != nullptr) { 199 if (p != nullptr) {
201 - throw std::logic_error("QPDF_Stream::getStreamJSON: pipeline should "  
202 - "only be supplied when json_data is file"); 200 + throw std::logic_error("QPDF_Stream::getStreamJSON: pipeline should only be supplied "
  201 + "when json_data is file");
203 } 202 }
204 break; 203 break;
205 case qpdf_sj_file: 204 case qpdf_sj_file:
206 if (p == nullptr) { 205 if (p == nullptr) {
207 - throw std::logic_error("QPDF_Stream::getStreamJSON: pipeline must "  
208 - "be supplied when json_data is file"); 206 + throw std::logic_error(
  207 + "QPDF_Stream::getStreamJSON: pipeline must be supplied when json_data is file");
209 } 208 }
210 if (data_filename.empty()) { 209 if (data_filename.empty()) {
211 - throw std::logic_error("QPDF_Stream::getStreamJSON: data_filename "  
212 - "must be supplied when json_data is file"); 210 + throw std::logic_error("QPDF_Stream::getStreamJSON: data_filename must be supplied "
  211 + "when json_data is file");
213 } 212 }
214 break; 213 break;
215 } 214 }
@@ -244,8 +243,7 @@ QPDF_Stream::getStreamJSON( @@ -244,8 +243,7 @@ QPDF_Stream::getStreamJSON(
244 break; 243 break;
245 } 244 }
246 } 245 }
247 - // We can use unsafeShallowCopy because we are only  
248 - // touching top-level keys. 246 + // We can use unsafeShallowCopy because we are only touching top-level keys.
249 dict = this->stream_dict.unsafeShallowCopy(); 247 dict = this->stream_dict.unsafeShallowCopy();
250 dict.removeKey("/Length"); 248 dict.removeKey("/Length");
251 if (filter && filtered) { 249 if (filter && filtered) {
@@ -408,8 +406,7 @@ QPDF_Stream::filterable( @@ -408,8 +406,7 @@ QPDF_Stream::filterable(
408 return false; 406 return false;
409 } 407 }
410 408
411 - // filters now contains a list of filters to be applied in order.  
412 - // See which ones we can support. 409 + // filters now contains a list of filters to be applied in order. See which ones we can support.
413 410
414 // See if we can support any decode parameters that are specified. 411 // See if we can support any decode parameters that are specified.
415 412
@@ -428,9 +425,8 @@ QPDF_Stream::filterable( @@ -428,9 +425,8 @@ QPDF_Stream::filterable(
428 } 425 }
429 } 426 }
430 427
431 - // Ignore /DecodeParms entirely if /Filters is empty. At least  
432 - // one case of a file whose /DecodeParms was [ << >> ] when  
433 - // /Filters was empty has been seen in the wild. 428 + // Ignore /DecodeParms entirely if /Filters is empty. At least one case of a file whose
  429 + // /DecodeParms was [ << >> ] when /Filters was empty has been seen in the wild.
434 if ((filters.size() != 0) && (decode_parms.size() != filters.size())) { 430 if ((filters.size() != 0) && (decode_parms.size() != filters.size())) {
435 warn("stream /DecodeParms length is inconsistent with filters"); 431 warn("stream /DecodeParms length is inconsistent with filters");
436 filterable = false; 432 filterable = false;
@@ -502,9 +498,8 @@ QPDF_Stream::pipeStreamData( @@ -502,9 +498,8 @@ QPDF_Stream::pipeStreamData(
502 return filter; 498 return filter;
503 } 499 }
504 500
505 - // Construct the pipeline in reverse order. Force pipelines we  
506 - // create to be deleted when this function finishes. Pipelines  
507 - // created by QPDFStreamFilter objects will be deleted by those 501 + // Construct the pipeline in reverse order. Force pipelines we create to be deleted when this
  502 + // function finishes. Pipelines created by QPDFStreamFilter objects will be deleted by those
508 // objects. 503 // objects.
509 std::vector<std::shared_ptr<Pipeline>> to_delete; 504 std::vector<std::shared_ptr<Pipeline>> to_delete;
510 505
@@ -568,8 +563,8 @@ QPDF_Stream::pipeStreamData( @@ -568,8 +563,8 @@ QPDF_Stream::pipeStreamData(
568 QTC::TC("qpdf", "QPDF_Stream pipe use stream provider"); 563 QTC::TC("qpdf", "QPDF_Stream pipe use stream provider");
569 } else { 564 } else {
570 QTC::TC("qpdf", "QPDF_Stream provider length mismatch"); 565 QTC::TC("qpdf", "QPDF_Stream provider length mismatch");
571 - // This would be caused by programmer error on the  
572 - // part of a library user, not by invalid input data. 566 + // This would be caused by programmer error on the part of a library user, not by
  567 + // invalid input data.
573 throw std::runtime_error( 568 throw std::runtime_error(
574 "stream data provider for " + og.unparse(' ') + " provided " + 569 "stream data provider for " + og.unparse(' ') + " provided " +
575 std::to_string(actual_length) + " bytes instead of expected " + 570 std::to_string(actual_length) + " bytes instead of expected " +
@@ -602,14 +597,13 @@ QPDF_Stream::pipeStreamData( @@ -602,14 +597,13 @@ QPDF_Stream::pipeStreamData(
602 warn("content normalization encountered bad tokens"); 597 warn("content normalization encountered bad tokens");
603 if (normalizer->lastTokenWasBad()) { 598 if (normalizer->lastTokenWasBad()) {
604 QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize"); 599 QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize");
605 - warn("normalized content ended with a bad token; you may be able "  
606 - "to resolve this by coalescing content streams in combination "  
607 - "with normalizing content. From the command line, specify "  
608 - "--coalesce-contents"); 600 + warn("normalized content ended with a bad token; you may be able to resolve this by "
  601 + "coalescing content streams in combination with normalizing content. From the "
  602 + "command line, specify --coalesce-contents");
609 } 603 }
610 - warn("Resulting stream data may be corrupted but is may still useful "  
611 - "for manual inspection. For more information on this warning, "  
612 - "search for content normalization in the manual."); 604 + warn("Resulting stream data may be corrupted but is may still useful for manual "
  605 + "inspection. For more information on this warning, search for content normalization "
  606 + "in the manual.");
613 } 607 }
614 608
615 return success; 609 return success;
libqpdf/QPDF_encryption.cc
@@ -137,9 +137,8 @@ pad_or_truncate_password_V4(std::string const&amp; password, char k1[key_bytes]) @@ -137,9 +137,8 @@ pad_or_truncate_password_V4(std::string const&amp; password, char k1[key_bytes])
137 void 137 void
138 QPDF::trim_user_password(std::string& user_password) 138 QPDF::trim_user_password(std::string& user_password)
139 { 139 {
140 - // Although unnecessary, this routine trims the padding string  
141 - // from the end of a user password. Its only purpose is for  
142 - // recovery of user passwords which is done in the test suite. 140 + // Although unnecessary, this routine trims the padding string from the end of a user password.
  141 + // Its only purpose is for recovery of user passwords which is done in the test suite.
143 char const* cstr = user_password.c_str(); 142 char const* cstr = user_password.c_str();
144 size_t len = user_password.length(); 143 size_t len = user_password.length();
145 if (len < key_bytes) { 144 if (len < key_bytes) {
@@ -262,22 +261,17 @@ hash_V5( @@ -262,22 +261,17 @@ hash_V5(
262 int round_number = 0; 261 int round_number = 0;
263 bool done = false; 262 bool done = false;
264 while (!done) { 263 while (!done) {
265 - // The hash algorithm has us setting K initially to the R5  
266 - // value and then repeating a series of steps 64 times  
267 - // before starting with the termination case testing. The  
268 - // wording of the specification is very unclear as to the  
269 - // exact number of times it should be run since the  
270 - // wording about whether the initial setup counts as round  
271 - // 0 or not is ambiguous. This code counts the initial  
272 - // setup (R5) value as round 0, which appears to be  
273 - // correct. This was determined to be correct by  
274 - // increasing or decreasing the number of rounds by 1 or 2  
275 - // from this value and generating 20 test files. In this  
276 - // interpretation, all the test files worked with Adobe  
277 - // Reader X. In the other configurations, many of the  
278 - // files did not work, and we were accurately able to  
279 - // predict which files didn't work by looking at the  
280 - // conditions under which we terminated repetition. 264 + // The hash algorithm has us setting K initially to the R5 value and then repeating a
  265 + // series of steps 64 times before starting with the termination case testing. The
  266 + // wording of the specification is very unclear as to the exact number of times it
  267 + // should be run since the wording about whether the initial setup counts as round 0 or
  268 + // not is ambiguous. This code counts the initial setup (R5) value as round 0, which
  269 + // appears to be correct. This was determined to be correct by increasing or decreasing
  270 + // the number of rounds by 1 or 2 from this value and generating 20 test files. In this
  271 + // interpretation, all the test files worked with Adobe Reader X. In the other
  272 + // configurations, many of the files did not work, and we were accurately able to
  273 + // predict which files didn't work by looking at the conditions under which we
  274 + // terminated repetition.
281 275
282 ++round_number; 276 ++round_number;
283 std::string K1 = password + K + udata; 277 std::string K1 = password + K + udata;
@@ -291,11 +285,10 @@ hash_V5( @@ -291,11 +285,10 @@ hash_V5(
291 QUtil::unsigned_char_pointer(K.substr(16, 16)), 285 QUtil::unsigned_char_pointer(K.substr(16, 16)),
292 16); 286 16);
293 287
294 - // E_mod_3 is supposed to be mod 3 of the first 16 bytes  
295 - // of E taken as as a (128-bit) big-endian number. Since  
296 - // (xy mod n) is equal to ((x mod n) + (y mod n)) mod n  
297 - // and since 256 mod n is 1, we can just take the sums of  
298 - // the the mod 3s of each byte to get the same result. 288 + // E_mod_3 is supposed to be mod 3 of the first 16 bytes of E taken as as a (128-bit)
  289 + // big-endian number. Since (xy mod n) is equal to ((x mod n) + (y mod n)) mod n and
  290 + // since 256 mod n is 1, we can just take the sums of the the mod 3s of each byte to get
  291 + // the same result.
299 int E_mod_3 = 0; 292 int E_mod_3 = 0;
300 for (unsigned int i = 0; i < 16; ++i) { 293 for (unsigned int i = 0; i < 16; ++i) {
301 E_mod_3 += static_cast<unsigned char>(E.at(i)); 294 E_mod_3 += static_cast<unsigned char>(E.at(i));
@@ -344,8 +337,7 @@ QPDF::compute_data_key( @@ -344,8 +337,7 @@ QPDF::compute_data_key(
344 std::string result = encryption_key; 337 std::string result = encryption_key;
345 338
346 if (encryption_V >= 5) { 339 if (encryption_V >= 5) {
347 - // Algorithm 3.1a (PDF 1.7 extension level 3): just use  
348 - // encryption key straight. 340 + // Algorithm 3.1a (PDF 1.7 extension level 3): just use encryption key straight.
349 return result; 341 return result;
350 } 342 }
351 343
@@ -370,9 +362,8 @@ std::string @@ -370,9 +362,8 @@ std::string
370 QPDF::compute_encryption_key(std::string const& password, EncryptionData const& data) 362 QPDF::compute_encryption_key(std::string const& password, EncryptionData const& data)
371 { 363 {
372 if (data.getV() >= 5) { 364 if (data.getV() >= 5) {
373 - // For V >= 5, the encryption key is generated and stored in  
374 - // the file, encrypted separately with both user and owner  
375 - // passwords. 365 + // For V >= 5, the encryption key is generated and stored in the file, encrypted separately
  366 + // with both user and owner passwords.
376 return recover_encryption_key_with_password(password, data); 367 return recover_encryption_key_with_password(password, data);
377 } else { 368 } else {
378 // For V < 5, the encryption key is derived from the user 369 // For V < 5, the encryption key is derived from the user
@@ -386,12 +377,10 @@ QPDF::compute_encryption_key_from_password(std::string const&amp; password, Encrypti @@ -386,12 +377,10 @@ QPDF::compute_encryption_key_from_password(std::string const&amp; password, Encrypti
386 { 377 {
387 // Algorithm 3.2 from the PDF 1.7 Reference Manual 378 // Algorithm 3.2 from the PDF 1.7 Reference Manual
388 379
389 - // This code does not properly handle Unicode passwords.  
390 - // Passwords are supposed to be converted from OS codepage  
391 - // characters to PDFDocEncoding. Unicode passwords are supposed  
392 - // to be converted to OS codepage before converting to  
393 - // PDFDocEncoding. We instead require the password to be  
394 - // presented in its final form. 380 + // This code does not properly handle Unicode passwords. Passwords are supposed to be converted
  381 + // from OS codepage characters to PDFDocEncoding. Unicode passwords are supposed to be
  382 + // converted to OS codepage before converting to PDFDocEncoding. We instead require the
  383 + // password to be presented in its final form.
395 384
396 MD5 md5; 385 MD5 md5;
397 md5.encodeDataIncrementally(pad_or_truncate_password_V4(password).c_str(), key_bytes); 386 md5.encodeDataIncrementally(pad_or_truncate_password_V4(password).c_str(), key_bytes);
@@ -681,11 +670,9 @@ QPDF::recover_encryption_key_with_password( @@ -681,11 +670,9 @@ QPDF::recover_encryption_key_with_password(
681 { 670 {
682 // Algorithm 3.2a from the PDF 1.7 extension level 3 671 // Algorithm 3.2a from the PDF 1.7 extension level 3
683 672
684 - // This code does not handle Unicode passwords correctly.  
685 - // Empirical evidence suggests that most viewers don't. We are  
686 - // supposed to process the input string with the SASLprep (RFC  
687 - // 4013) profile of stringprep (RFC 3454) and then convert the  
688 - // result to UTF-8. 673 + // This code does not handle Unicode passwords correctly. Empirical evidence suggests that most
  674 + // viewers don't. We are supposed to process the input string with the SASLprep (RFC 4013)
  675 + // profile of stringprep (RFC 3454) and then convert the result to UTF-8.
689 676
690 perms_valid = false; 677 perms_valid = false;
691 std::string key_password = truncate_password_V5(password); 678 std::string key_password = truncate_password_V5(password);
@@ -738,18 +725,16 @@ QPDF::initializeEncryption() @@ -738,18 +725,16 @@ QPDF::initializeEncryption()
738 } 725 }
739 m->encp->encryption_initialized = true; 726 m->encp->encryption_initialized = true;
740 727
741 - // After we initialize encryption parameters, we must used stored  
742 - // key information and never look at /Encrypt again. Otherwise,  
743 - // things could go wrong if someone mutates the encryption 728 + // After we initialize encryption parameters, we must use stored key information and never look
  729 + // at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption
744 // dictionary. 730 // dictionary.
745 731
746 if (!m->trailer.hasKey("/Encrypt")) { 732 if (!m->trailer.hasKey("/Encrypt")) {
747 return; 733 return;
748 } 734 }
749 735
750 - // Go ahead and set m->encrypted here. That way, isEncrypted  
751 - // will return true even if there were errors reading the  
752 - // encryption dictionary. 736 + // Go ahead and set m->encrypted here. That way, isEncrypted will return true even if there
  737 + // were errors reading the encryption dictionary.
753 m->encp->encrypted = true; 738 m->encp->encrypted = true;
754 739
755 std::string id1; 740 std::string id1;
@@ -757,9 +742,8 @@ QPDF::initializeEncryption() @@ -757,9 +742,8 @@ QPDF::initializeEncryption()
757 if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) { 742 if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) {
758 id1 = id_obj.getArrayItem(0).getStringValue(); 743 id1 = id_obj.getArrayItem(0).getStringValue();
759 } else { 744 } else {
760 - // Treating a missing ID as the empty string enables qpdf to  
761 - // decrypt some invalid encrypted files with no /ID that  
762 - // poppler can read but Adobe Reader can't. 745 + // Treating a missing ID as the empty string enables qpdf to decrypt some invalid encrypted
  746 + // files with no /ID that poppler can read but Adobe Reader can't.
763 warn(damagedPDF("trailer", "invalid /ID in trailer dictionary")); 747 warn(damagedPDF("trailer", "invalid /ID in trailer dictionary"));
764 } 748 }
765 749
@@ -800,8 +784,8 @@ QPDF::initializeEncryption() @@ -800,8 +784,8 @@ QPDF::initializeEncryption()
800 std::string U = encryption_dict.getKey("/U").getStringValue(); 784 std::string U = encryption_dict.getKey("/U").getStringValue();
801 int P = static_cast<int>(encryption_dict.getKey("/P").getIntValue()); 785 int P = static_cast<int>(encryption_dict.getKey("/P").getIntValue());
802 786
803 - // If supporting new encryption R/V values, remember to update  
804 - // error message inside this if statement. 787 + // If supporting new encryption R/V values, remember to update error message inside this if
  788 + // statement.
805 if (!(((R >= 2) && (R <= 6)) && ((V == 1) || (V == 2) || (V == 4) || (V == 5)))) { 789 if (!(((R >= 2) && (R <= 6)) && ((V == 1) || (V == 2) || (V == 4) || (V == 5)))) {
806 throw QPDFExc( 790 throw QPDFExc(
807 qpdf_e_unsupported, 791 qpdf_e_unsupported,
@@ -893,8 +877,7 @@ QPDF::initializeEncryption() @@ -893,8 +877,7 @@ QPDF::initializeEncryption()
893 QTC::TC("qpdf", "QPDF_encryption CFM AESV3"); 877 QTC::TC("qpdf", "QPDF_encryption CFM AESV3");
894 method = e_aesv3; 878 method = e_aesv3;
895 } else { 879 } else {
896 - // Don't complain now -- maybe we won't need  
897 - // to reference this type. 880 + // Don't complain now -- maybe we won't need to reference this type.
898 method = e_unknown; 881 method = e_unknown;
899 } 882 }
900 } 883 }
@@ -908,20 +891,15 @@ QPDF::initializeEncryption() @@ -908,20 +891,15 @@ QPDF::initializeEncryption()
908 m->encp->cf_stream = interpretCF(m->encp, StmF); 891 m->encp->cf_stream = interpretCF(m->encp, StmF);
909 m->encp->cf_string = interpretCF(m->encp, StrF); 892 m->encp->cf_string = interpretCF(m->encp, StrF);
910 if (EFF.isName()) { 893 if (EFF.isName()) {
911 - // qpdf does not use this for anything other than  
912 - // informational purposes. This is intended to instruct  
913 - // conforming writers on which crypt filter should be used  
914 - // when new file attachments are added to a PDF file, but  
915 - // qpdf never generates encrypted files with non-default  
916 - // crypt filters. Prior to 10.2, I was under the mistaken  
917 - // impression that this was supposed to be used for  
918 - // decrypting attachments, but the code was wrong in a way  
919 - // that turns out not to have mattered because no writers  
920 - // were generating files the way I was imagining. Still,  
921 - // providing this information could be useful when looking  
922 - // at a file generated by something else, such as Acrobat  
923 - // when specifying that only attachments should be  
924 - // encrypted. 894 + // qpdf does not use this for anything other than informational purposes. This is
  895 + // intended to instruct conforming writers on which crypt filter should be used when new
  896 + // file attachments are added to a PDF file, but qpdf never generates encrypted files
  897 + // with non-default crypt filters. Prior to 10.2, I was under the mistaken impression
  898 + // that this was supposed to be used for decrypting attachments, but the code was wrong
  899 + // in a way that turns out not to have mattered because no writers were generating files
  900 + // the way I was imagining. Still, providing this information could be useful when
  901 + // looking at a file generated by something else, such as Acrobat when specifying that
  902 + // only attachments should be encrypted.
925 m->encp->cf_file = interpretCF(m->encp, EFF); 903 m->encp->cf_file = interpretCF(m->encp, EFF);
926 } else { 904 } else {
927 m->encp->cf_file = m->encp->cf_stream; 905 m->encp->cf_file = m->encp->cf_stream;
@@ -935,8 +913,7 @@ QPDF::initializeEncryption() @@ -935,8 +913,7 @@ QPDF::initializeEncryption()
935 m->encp->owner_password_matched = 913 m->encp->owner_password_matched =
936 check_owner_password(m->encp->user_password, m->encp->provided_password, data); 914 check_owner_password(m->encp->user_password, m->encp->provided_password, data);
937 if (m->encp->owner_password_matched && (V < 5)) { 915 if (m->encp->owner_password_matched && (V < 5)) {
938 - // password supplied was owner password; user_password has  
939 - // been initialized for V < 5 916 + // password supplied was owner password; user_password has been initialized for V < 5
940 if (getTrimmedUserPassword() == m->encp->provided_password) { 917 if (getTrimmedUserPassword() == m->encp->provided_password) {
941 m->encp->user_password_matched = true; 918 m->encp->user_password_matched = true;
942 QTC::TC("qpdf", "QPDF_encryption user matches owner V < 5"); 919 QTC::TC("qpdf", "QPDF_encryption user matches owner V < 5");
@@ -958,14 +935,12 @@ QPDF::initializeEncryption() @@ -958,14 +935,12 @@ QPDF::initializeEncryption()
958 if (m->provided_password_is_hex_key) { 935 if (m->provided_password_is_hex_key) {
959 m->encp->encryption_key = QUtil::hex_decode(m->encp->provided_password); 936 m->encp->encryption_key = QUtil::hex_decode(m->encp->provided_password);
960 } else if (V < 5) { 937 } else if (V < 5) {
961 - // For V < 5, the user password is encrypted with the owner  
962 - // password, and the user password is always used for  
963 - // computing the encryption key. 938 + // For V < 5, the user password is encrypted with the owner password, and the user password
  939 + // is always used for computing the encryption key.
964 m->encp->encryption_key = compute_encryption_key(m->encp->user_password, data); 940 m->encp->encryption_key = compute_encryption_key(m->encp->user_password, data);
965 } else { 941 } else {
966 - // For V >= 5, either password can be used independently to  
967 - // compute the encryption key, and neither password can be  
968 - // used to recover the other. 942 + // For V >= 5, either password can be used independently to compute the encryption key, and
  943 + // neither password can be used to recover the other.
969 bool perms_valid; 944 bool perms_valid;
970 m->encp->encryption_key = 945 m->encp->encryption_key =
971 recover_encryption_key_with_password(m->encp->provided_password, data, perms_valid); 946 recover_encryption_key_with_password(m->encp->provided_password, data, perms_valid);
@@ -1026,8 +1001,7 @@ QPDF::decryptString(std::string&amp; str, QPDFObjGen const&amp; og) @@ -1026,8 +1001,7 @@ QPDF::decryptString(std::string&amp; str, QPDFObjGen const&amp; og)
1026 default: 1001 default:
1027 warn(damagedPDF("unknown encryption filter for strings (check /StrF in " 1002 warn(damagedPDF("unknown encryption filter for strings (check /StrF in "
1028 "/Encrypt dictionary); strings may be decrypted improperly")); 1003 "/Encrypt dictionary); strings may be decrypted improperly"));
1029 - // To avoid repeated warnings, reset cf_string. Assume  
1030 - // we'd want to use AES if V == 4. 1004 + // To avoid repeated warnings, reset cf_string. Assume we'd want to use AES if V == 4.
1031 m->encp->cf_string = e_aes; 1005 m->encp->cf_string = e_aes;
1032 use_aes = true; 1006 use_aes = true;
1033 break; 1007 break;
@@ -1052,8 +1026,8 @@ QPDF::decryptString(std::string&amp; str, QPDFObjGen const&amp; og) @@ -1052,8 +1026,8 @@ QPDF::decryptString(std::string&amp; str, QPDFObjGen const&amp; og)
1052 } else { 1026 } else {
1053 QTC::TC("qpdf", "QPDF_encryption rc4 decode string"); 1027 QTC::TC("qpdf", "QPDF_encryption rc4 decode string");
1054 size_t vlen = str.length(); 1028 size_t vlen = str.length();
1055 - // Using std::shared_ptr guarantees that tmp will  
1056 - // be freed even if rc4.process throws an exception. 1029 + // Using std::shared_ptr guarantees that tmp will be freed even if rc4.process throws an
  1030 + // exception.
1057 auto tmp = QUtil::make_unique_cstr(str); 1031 auto tmp = QUtil::make_unique_cstr(str);
1058 RC4 rc4(QUtil::unsigned_char_pointer(key), toI(key.length())); 1032 RC4 rc4(QUtil::unsigned_char_pointer(key), toI(key.length()));
1059 auto data = QUtil::unsigned_char_pointer(tmp.get()); 1033 auto data = QUtil::unsigned_char_pointer(tmp.get());
@@ -1154,8 +1128,7 @@ QPDF::decryptStream( @@ -1154,8 +1128,7 @@ QPDF::decryptStream(
1154 file->getLastOffset(), 1128 file->getLastOffset(),
1155 "unknown encryption filter for streams (check " + method_source + 1129 "unknown encryption filter for streams (check " + method_source +
1156 "); streams may be decrypted improperly")); 1130 "); streams may be decrypted improperly"));
1157 - // To avoid repeated warnings, reset cf_stream. Assume  
1158 - // we'd want to use AES if V == 4. 1131 + // To avoid repeated warnings, reset cf_stream. Assume we'd want to use AES if V == 4.
1159 encp->cf_stream = e_aes; 1132 encp->cf_stream = e_aes;
1160 use_aes = true; 1133 use_aes = true;
1161 break; 1134 break;
libqpdf/QPDF_json.cc
@@ -12,8 +12,7 @@ @@ -12,8 +12,7 @@
12 #include <algorithm> 12 #include <algorithm>
13 #include <cstring> 13 #include <cstring>
14 14
15 -// This chart shows an example of the state transitions that would  
16 -// occur in parsing a minimal file. 15 +// This chart shows an example of the state transitions that would occur in parsing a minimal file.
17 16
18 // | st_initial 17 // | st_initial
19 // { | -> st_top 18 // { | -> st_top
@@ -414,9 +413,9 @@ QPDF::JSONReactor::containerEnd(JSON const&amp; value) @@ -414,9 +413,9 @@ QPDF::JSONReactor::containerEnd(JSON const&amp; value)
414 object_stack.pop_back(); 413 object_stack.pop_back();
415 } 414 }
416 } else if ((state == st_top) && (from_state == st_qpdf)) { 415 } else if ((state == st_top) && (from_state == st_qpdf)) {
417 - // Handle dangling indirect object references which the PDF spec says to  
418 - // treat as nulls. It's tempting to make this an error, but that would  
419 - // be wrong since valid input files may have these. 416 + // Handle dangling indirect object references which the PDF spec says to treat as nulls.
  417 + // It's tempting to make this an error, but that would be wrong since valid input files may
  418 + // have these.
420 for (auto& oc: pdf.m->obj_cache) { 419 for (auto& oc: pdf.m->obj_cache) {
421 if (oc.second.object->getTypeCode() == ::ot_reserved && reserved.count(oc.first) == 0) { 420 if (oc.second.object->getTypeCode() == ::ot_reserved && reserved.count(oc.first) == 0) {
422 QTC::TC("qpdf", "QPDF_json non-trivial null reserved"); 421 QTC::TC("qpdf", "QPDF_json non-trivial null reserved");
@@ -446,8 +445,7 @@ QPDF::JSONReactor::topLevelScalar() @@ -446,8 +445,7 @@ QPDF::JSONReactor::topLevelScalar()
446 void 445 void
447 QPDF::JSONReactor::nestedState(std::string const& key, JSON const& value, state_e next) 446 QPDF::JSONReactor::nestedState(std::string const& key, JSON const& value, state_e next)
448 { 447 {
449 - // Use this method when the next state is for processing a nested  
450 - // dictionary. 448 + // Use this method when the next state is for processing a nested dictionary.
451 if (value.isDictionary()) { 449 if (value.isDictionary()) {
452 this->next_state = next; 450 this->next_state = next;
453 } else { 451 } else {
@@ -531,8 +529,8 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value) @@ -531,8 +529,8 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
531 error(value.getStart(), "calledgetallpages must be a boolean"); 529 error(value.getStart(), "calledgetallpages must be a boolean");
532 } 530 }
533 } else { 531 } else {
534 - // ignore unknown keys for forward compatibility and to  
535 - // skip keys we don't care about like "maxobjectid". 532 + // ignore unknown keys for forward compatibility and to skip keys we don't care about
  533 + // like "maxobjectid".
536 QTC::TC("qpdf", "QPDF_json ignore second-level key"); 534 QTC::TC("qpdf", "QPDF_json ignore second-level key");
537 next_state = st_ignore; 535 next_state = st_ignore;
538 } 536 }
@@ -594,8 +592,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value) @@ -594,8 +592,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
594 this->pdf.m->trailer = makeObject(value); 592 this->pdf.m->trailer = makeObject(value);
595 setObjectDescription(this->pdf.m->trailer, value); 593 setObjectDescription(this->pdf.m->trailer, value);
596 } else if (key == "stream") { 594 } else if (key == "stream") {
597 - // Don't need to set saw_stream here since there's already  
598 - // an error. 595 + // Don't need to set saw_stream here since there's already an error.
599 QTC::TC("qpdf", "QPDF_json trailer stream"); 596 QTC::TC("qpdf", "QPDF_json trailer stream");
600 error(value.getStart(), "the trailer may not be a stream"); 597 error(value.getStart(), "the trailer may not be a stream");
601 next_state = st_ignore; 598 next_state = st_ignore;
@@ -616,8 +613,8 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value) @@ -616,8 +613,8 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
616 auto uninitialized = QPDFObjectHandle(); 613 auto uninitialized = QPDFObjectHandle();
617 if (key == "dict") { 614 if (key == "dict") {
618 this->saw_dict = true; 615 this->saw_dict = true;
619 - // Since a stream dictionary must be a dictionary, we can  
620 - // use nestedState to transition to st_value. 616 + // Since a stream dictionary must be a dictionary, we can use nestedState to transition
  617 + // to st_value.
621 nestedState("stream.dict", value, st_object); 618 nestedState("stream.dict", value, st_object);
622 auto dict = makeObject(value); 619 auto dict = makeObject(value);
623 if (dict.isDictionary()) { 620 if (dict.isDictionary()) {
libqpdf/QPDF_linearization.cc
@@ -22,8 +22,8 @@ load_vector_int( @@ -22,8 +22,8 @@ load_vector_int(
22 BitStream& bit_stream, int nitems, std::vector<T>& vec, int bits_wanted, int_type T::*field) 22 BitStream& bit_stream, int nitems, std::vector<T>& vec, int bits_wanted, int_type T::*field)
23 { 23 {
24 bool append = vec.empty(); 24 bool append = vec.empty();
25 - // nitems times, read bits_wanted from the given bit stream,  
26 - // storing results in the ith vector entry. 25 + // nitems times, read bits_wanted from the given bit stream, storing results in the ith vector
  26 + // entry.
27 27
28 for (size_t i = 0; i < QIntC::to_size(nitems); ++i) { 28 for (size_t i = 0; i < QIntC::to_size(nitems); ++i) {
29 if (append) { 29 if (append) {
@@ -34,8 +34,8 @@ load_vector_int( @@ -34,8 +34,8 @@ load_vector_int(
34 if (QIntC::to_int(vec.size()) != nitems) { 34 if (QIntC::to_int(vec.size()) != nitems) {
35 throw std::logic_error("vector has wrong size in load_vector_int"); 35 throw std::logic_error("vector has wrong size in load_vector_int");
36 } 36 }
37 - // The PDF spec says that each hint table starts at a byte  
38 - // boundary. Each "row" actually must start on a byte boundary. 37 + // The PDF spec says that each hint table starts at a byte boundary. Each "row" actually must
  38 + // start on a byte boundary.
39 bit_stream.skipToNextByte(); 39 bit_stream.skipToNextByte();
40 } 40 }
41 41
@@ -49,8 +49,8 @@ load_vector_vector( @@ -49,8 +49,8 @@ load_vector_vector(
49 int bits_wanted, 49 int bits_wanted,
50 std::vector<int> T::*vec2) 50 std::vector<int> T::*vec2)
51 { 51 {
52 - // nitems1 times, read nitems2 (from the ith element of vec1) items  
53 - // into the vec2 vector field of the ith item of vec1. 52 + // nitems1 times, read nitems2 (from the ith element of vec1) items into the vec2 vector field
  53 + // of the ith item of vec1.
54 for (size_t i1 = 0; i1 < QIntC::to_size(nitems1); ++i1) { 54 for (size_t i1 = 0; i1 < QIntC::to_size(nitems1); ++i1) {
55 for (int i2 = 0; i2 < vec1.at(i1).*nitems2; ++i2) { 55 for (int i2 = 0; i2 < vec1.at(i1).*nitems2; ++i2) {
56 (vec1.at(i1).*vec2).push_back(bit_stream.getBitsInt(QIntC::to_size(bits_wanted))); 56 (vec1.at(i1).*vec2).push_back(bit_stream.getBitsInt(QIntC::to_size(bits_wanted)));
@@ -83,18 +83,15 @@ QPDF::checkLinearization() @@ -83,18 +83,15 @@ QPDF::checkLinearization()
83 bool 83 bool
84 QPDF::isLinearized() 84 QPDF::isLinearized()
85 { 85 {
86 - // If the first object in the file is a dictionary with a suitable  
87 - // /Linearized key and has an /L key that accurately indicates the  
88 - // file size, initialize m->lindict and return true.  
89 -  
90 - // A linearized PDF spec's first object will be contained within  
91 - // the first 1024 bytes of the file and will be a dictionary with  
92 - // a valid /Linearized key. This routine looks for that and does  
93 - // no additional validation.  
94 -  
95 - // The PDF spec says the linearization dictionary must be  
96 - // completely contained within the first 1024 bytes of the file.  
97 - // Add a byte for a null terminator. 86 + // If the first object in the file is a dictionary with a suitable /Linearized key and has an /L
  87 + // key that accurately indicates the file size, initialize m->lindict and return true.
  88 +
  89 + // A linearized PDF spec's first object will be contained within the first 1024 bytes of the
  90 + // file and will be a dictionary with a valid /Linearized key. This routine looks for that and
  91 + // does no additional validation.
  92 +
  93 + // The PDF spec says the linearization dictionary must be completely contained within the first
  94 + // 1024 bytes of the file. Add a byte for a null terminator.
98 static int const tbuf_size = 1025; 95 static int const tbuf_size = 1025;
99 96
100 auto b = std::make_unique<char[]>(tbuf_size); 97 auto b = std::make_unique<char[]>(tbuf_size);
@@ -161,8 +158,8 @@ QPDF::isLinearized() @@ -161,8 +158,8 @@ QPDF::isLinearized()
161 void 158 void
162 QPDF::readLinearizationData() 159 QPDF::readLinearizationData()
163 { 160 {
164 - // This function throws an exception (which is trapped by  
165 - // checkLinearization()) for any errors that prevent loading. 161 + // This function throws an exception (which is trapped by checkLinearization()) for any errors
  162 + // that prevent loading.
166 163
167 if (!isLinearized()) { 164 if (!isLinearized()) {
168 throw std::logic_error("called readLinearizationData for file" 165 throw std::logic_error("called readLinearizationData for file"
@@ -206,8 +203,8 @@ QPDF::readLinearizationData() @@ -206,8 +203,8 @@ QPDF::readLinearizationData()
206 int H1_offset = 0; 203 int H1_offset = 0;
207 int H1_length = 0; 204 int H1_length = 0;
208 if (H_items.size() == 4) { 205 if (H_items.size() == 4) {
209 - // Acrobat doesn't read or write these (as PDF 1.4), so we  
210 - // don't have a way to generate a test case. 206 + // Acrobat doesn't read or write these (as PDF 1.4), so we don't have a way to generate a
  207 + // test case.
211 // QTC::TC("qpdf", "QPDF overflow hint table"); 208 // QTC::TC("qpdf", "QPDF overflow hint table");
212 H1_offset = H_items.at(2); 209 H1_offset = H_items.at(2);
213 H1_length = H_items.at(3); 210 H1_length = H_items.at(3);
@@ -224,9 +221,8 @@ QPDF::readLinearizationData() @@ -224,9 +221,8 @@ QPDF::readLinearizationData()
224 221
225 // Store linearization parameter data 222 // Store linearization parameter data
226 223
227 - // Various places in the code use linp.npages, which is  
228 - // initialized from N, to pre-allocate memory, so make sure it's  
229 - // accurate and bail right now if it's not. 224 + // Various places in the code use linp.npages, which is initialized from N, to pre-allocate
  225 + // memory, so make sure it's accurate and bail right now if it's not.
230 if (N.getIntValue() != static_cast<long long>(getAllPages().size())) { 226 if (N.getIntValue() != static_cast<long long>(getAllPages().size())) {
231 throw damagedPDF("linearization hint table", "/N does not match number of pages"); 227 throw damagedPDF("linearization hint table", "/N does not match number of pages");
232 } 228 }
@@ -299,11 +295,10 @@ QPDF::readHintStream(Pipeline&amp; pl, qpdf_offset_t offset, size_t length) @@ -299,11 +295,10 @@ QPDF::readHintStream(Pipeline&amp; pl, qpdf_offset_t offset, size_t length)
299 295
300 QPDFObjectHandle Hdict = H.getDict(); 296 QPDFObjectHandle Hdict = H.getDict();
301 297
302 - // Some versions of Acrobat make /Length indirect and place it  
303 - // immediately after the stream, increasing length to cover it,  
304 - // even though the specification says all objects in the  
305 - // linearization parameter dictionary must be direct. We have to  
306 - // get the file position of the end of length in this case. 298 + // Some versions of Acrobat make /Length indirect and place it immediately after the stream,
  299 + // increasing length to cover it, even though the specification says all objects in the
  300 + // linearization parameter dictionary must be direct. We have to get the file position of the
  301 + // end of length in this case.
307 QPDFObjectHandle length_obj = Hdict.getKey("/Length"); 302 QPDFObjectHandle length_obj = Hdict.getKey("/Length");
308 if (length_obj.isIndirect()) { 303 if (length_obj.isIndirect()) {
309 QTC::TC("qpdf", "QPDF hint table length indirect"); 304 QTC::TC("qpdf", "QPDF hint table length indirect");
@@ -329,8 +324,7 @@ QPDF::readHintStream(Pipeline&amp; pl, qpdf_offset_t offset, size_t length) @@ -329,8 +324,7 @@ QPDF::readHintStream(Pipeline&amp; pl, qpdf_offset_t offset, size_t length)
329 void 324 void
330 QPDF::readHPageOffset(BitStream h) 325 QPDF::readHPageOffset(BitStream h)
331 { 326 {
332 - // All comments referring to the PDF spec refer to the spec for  
333 - // version 1.4. 327 + // All comments referring to the PDF spec refer to the spec for version 1.4.
334 328
335 HPageOffset& t = m->page_offset_hints; 329 HPageOffset& t = m->page_offset_hints;
336 330
@@ -402,9 +396,8 @@ QPDF::readHSharedObject(BitStream h) @@ -402,9 +396,8 @@ QPDF::readHSharedObject(BitStream h)
402 load_vector_int(h, nitems, entries, 1, &HSharedObjectEntry::signature_present); 396 load_vector_int(h, nitems, entries, 1, &HSharedObjectEntry::signature_present);
403 for (size_t i = 0; i < toS(nitems); ++i) { 397 for (size_t i = 0; i < toS(nitems); ++i) {
404 if (entries.at(i).signature_present) { 398 if (entries.at(i).signature_present) {
405 - // Skip 128-bit MD5 hash. These are not supported by  
406 - // acrobat, so they should probably never be there. We  
407 - // have no test case for this. 399 + // Skip 128-bit MD5 hash. These are not supported by acrobat, so they should probably
  400 + // never be there. We have no test case for this.
408 for (int j = 0; j < 4; ++j) { 401 for (int j = 0; j < 4; ++j) {
409 (void)h.getBits(32); 402 (void)h.getBits(32);
410 } 403 }
@@ -425,8 +418,7 @@ QPDF::readHGeneric(BitStream h, HGeneric&amp; t) @@ -425,8 +418,7 @@ QPDF::readHGeneric(BitStream h, HGeneric&amp; t)
425 bool 418 bool
426 QPDF::checkLinearizationInternal() 419 QPDF::checkLinearizationInternal()
427 { 420 {
428 - // All comments referring to the PDF spec refer to the spec for  
429 - // version 1.4. 421 + // All comments referring to the PDF spec refer to the spec for version 1.4.
430 422
431 // Check all values in linearization parameter dictionary 423 // Check all values in linearization parameter dictionary
432 424
@@ -476,24 +468,21 @@ QPDF::checkLinearizationInternal() @@ -476,24 +468,21 @@ QPDF::checkLinearizationInternal()
476 "; file = " + std::to_string(m->file->tell())); 468 "; file = " + std::to_string(m->file->tell()));
477 } 469 }
478 470
479 - // P: first page number -- Implementation note 124 says Acrobat  
480 - // ignores this value, so we will too. 471 + // P: first page number -- Implementation note 124 says Acrobat ignores this value, so we will
  472 + // too.
481 473
482 - // Check numbering of compressed objects in each xref section.  
483 - // For linearized files, all compressed objects are supposed to be  
484 - // at the end of the containing xref section if any object streams  
485 - // are in use. 474 + // Check numbering of compressed objects in each xref section. For linearized files, all
  475 + // compressed objects are supposed to be at the end of the containing xref section if any object
  476 + // streams are in use.
486 477
487 if (m->uncompressed_after_compressed) { 478 if (m->uncompressed_after_compressed) {
488 - linearizationWarning("linearized file contains an uncompressed object"  
489 - " after a compressed one in a cross-reference stream"); 479 + linearizationWarning("linearized file contains an uncompressed object after a compressed "
  480 + "one in a cross-reference stream");
490 } 481 }
491 482
492 - // Further checking requires optimization and order calculation.  
493 - // Don't allow optimization to make changes. If it has to, then  
494 - // the file is not properly linearized. We use the xref table to  
495 - // figure out which objects are compressed and which are  
496 - // uncompressed. 483 + // Further checking requires optimization and order calculation. Don't allow optimization to
  484 + // make changes. If it has to, then the file is not properly linearized. We use the xref table
  485 + // to figure out which objects are compressed and which are uncompressed.
497 { // local scope 486 { // local scope
498 std::map<int, int> object_stream_data; 487 std::map<int, int> object_stream_data;
499 for (auto const& iter: m->xref_table) { 488 for (auto const& iter: m->xref_table) {
@@ -507,16 +496,13 @@ QPDF::checkLinearizationInternal() @@ -507,16 +496,13 @@ QPDF::checkLinearizationInternal()
507 calculateLinearizationData(object_stream_data); 496 calculateLinearizationData(object_stream_data);
508 } 497 }
509 498
510 - // E: offset of end of first page -- Implementation note 123 says  
511 - // Acrobat includes on extra object here by mistake. pdlin fails  
512 - // to place thumbnail images in section 9, so when thumbnails are  
513 - // present, it also gets the wrong value for /E. It also doesn't  
514 - // count outlines here when it should even though it places them  
515 - // in part 6. This code fails to put thread information  
516 - // dictionaries in part 9, so it actually gets the wrong value for  
517 - // E when threads are present. In that case, it would probably  
518 - // agree with pdlin. As of this writing, the test suite doesn't  
519 - // contain any files with threads. 499 + // E: offset of end of first page -- Implementation note 123 says Acrobat includes on extra
  500 + // object here by mistake. pdlin fails to place thumbnail images in section 9, so when
  501 + // thumbnails are present, it also gets the wrong value for /E. It also doesn't count outlines
  502 + // here when it should even though it places them in part 6. This code fails to put thread
  503 + // information dictionaries in part 9, so it actually gets the wrong value for E when threads
  504 + // are present. In that case, it would probably agree with pdlin. As of this writing, the test
  505 + // suite doesn't contain any files with threads.
520 506
521 if (m->part6.empty()) { 507 if (m->part6.empty()) {
522 stopOnError("linearization part 6 unexpectedly empty"); 508 stopOnError("linearization part 6 unexpectedly empty");
@@ -577,8 +563,7 @@ QPDF::getLinearizationOffset(QPDFObjGen const&amp; og) @@ -577,8 +563,7 @@ QPDF::getLinearizationOffset(QPDFObjGen const&amp; og)
577 break; 563 break;
578 564
579 case 2: 565 case 2:
580 - // For compressed objects, return the offset of the object  
581 - // stream that contains them. 566 + // For compressed objects, return the offset of the object stream that contains them.
582 result = getLinearizationOffset(QPDFObjGen(entry.getObjStreamNumber(), 0)); 567 result = getLinearizationOffset(QPDFObjGen(entry.getObjStreamNumber(), 0));
583 break; 568 break;
584 569
@@ -611,8 +596,7 @@ QPDF::lengthNextN(int first_object, int n) @@ -611,8 +596,7 @@ QPDF::lengthNextN(int first_object, int n)
611 "no xref table entry for " + std::to_string(first_object + i) + " 0"); 596 "no xref table entry for " + std::to_string(first_object + i) + " 0");
612 } else { 597 } else {
613 if (m->obj_cache.count(og) == 0) { 598 if (m->obj_cache.count(og) == 0) {
614 - stopOnError("found unknown object while"  
615 - " calculating length for linearization data"); 599 + stopOnError("found unknown object while calculating length for linearization data");
616 } 600 }
617 length += toI(m->obj_cache[og].end_after_space - getLinearizationOffset(og)); 601 length += toI(m->obj_cache[og].end_after_space - getLinearizationOffset(og));
618 } 602 }
@@ -624,22 +608,17 @@ void @@ -624,22 +608,17 @@ void
624 QPDF::checkHPageOffset( 608 QPDF::checkHPageOffset(
625 std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& shared_idx_to_obj) 609 std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& shared_idx_to_obj)
626 { 610 {
627 - // Implementation note 126 says Acrobat always sets  
628 - // delta_content_offset and delta_content_length in the page  
629 - // offset header dictionary to 0. It also states that  
630 - // min_content_offset in the per-page information is always 0,  
631 - // which is an incorrect value.  
632 -  
633 - // Implementation note 127 explains that Acrobat always sets item  
634 - // 8 (min_content_length) to zero, item 9  
635 - // (nbits_delta_content_length) to the value of item 5  
636 - // (nbits_delta_page_length), and item 7 of each per-page hint  
637 - // table (delta_content_length) to item 2 (delta_page_length) of  
638 - // that entry. Acrobat ignores these values when reading files.  
639 -  
640 - // Empirically, it also seems that Acrobat sometimes puts items  
641 - // under a page's /Resources dictionary in with shared objects  
642 - // even when they are private. 611 + // Implementation note 126 says Acrobat always sets delta_content_offset and
  612 + // delta_content_length in the page offset header dictionary to 0. It also states that
  613 + // min_content_offset in the per-page information is always 0, which is an incorrect value.
  614 +
  615 + // Implementation note 127 explains that Acrobat always sets item 8 (min_content_length) to
  616 + // zero, item 9 (nbits_delta_content_length) to the value of item 5 (nbits_delta_page_length),
  617 + // and item 7 of each per-page hint table (delta_content_length) to item 2 (delta_page_length)
  618 + // of that entry. Acrobat ignores these values when reading files.
  619 +
  620 + // Empirically, it also seems that Acrobat sometimes puts items under a page's /Resources
  621 + // dictionary in with shared objects even when they are private.
643 622
644 int npages = toI(pages.size()); 623 int npages = toI(pages.size());
645 qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset); 624 qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset);
@@ -670,13 +649,12 @@ QPDF::checkHPageOffset( @@ -670,13 +649,12 @@ QPDF::checkHPageOffset(
670 std::to_string(h_nobjects) + "; computed = " + std::to_string(ce.nobjects)); 649 std::to_string(h_nobjects) + "; computed = " + std::to_string(ce.nobjects));
671 } 650 }
672 651
673 - // Use value for number of objects in hint table rather than  
674 - // computed value if there is a discrepancy. 652 + // Use value for number of objects in hint table rather than computed value if there is a
  653 + // discrepancy.
675 int length = lengthNextN(first_object, h_nobjects); 654 int length = lengthNextN(first_object, h_nobjects);
676 int h_length = toI(he.delta_page_length + m->page_offset_hints.min_page_length); 655 int h_length = toI(he.delta_page_length + m->page_offset_hints.min_page_length);
677 if (length != h_length) { 656 if (length != h_length) {
678 - // This condition almost certainly indicates a bad hint  
679 - // table or a bug in this code. 657 + // This condition almost certainly indicates a bad hint table or a bug in this code.
680 linearizationWarning( 658 linearizationWarning(
681 "page length mismatch for page " + std::to_string(pageno) + ": hint table = " + 659 "page length mismatch for page " + std::to_string(pageno) + ": hint table = " +
682 std::to_string(h_length) + "; computed length = " + std::to_string(length) + 660 std::to_string(h_length) + "; computed length = " + std::to_string(length) +
@@ -690,8 +668,8 @@ QPDF::checkHPageOffset( @@ -690,8 +668,8 @@ QPDF::checkHPageOffset(
690 std::set<int> computed_shared; 668 std::set<int> computed_shared;
691 669
692 if ((pageno == 0) && (he.nshared_objects > 0)) { 670 if ((pageno == 0) && (he.nshared_objects > 0)) {
693 - // pdlin and Acrobat both do this even though the spec  
694 - // states clearly and unambiguously that they should not. 671 + // pdlin and Acrobat both do this even though the spec states clearly and unambiguously
  672 + // that they should not.
695 linearizationWarning("page 0 has shared identifier entries"); 673 linearizationWarning("page 0 has shared identifier entries");
696 } 674 }
697 675
@@ -724,9 +702,8 @@ QPDF::checkHPageOffset( @@ -724,9 +702,8 @@ QPDF::checkHPageOffset(
724 702
725 for (int iter: computed_shared) { 703 for (int iter: computed_shared) {
726 if (!hint_shared.count(iter)) { 704 if (!hint_shared.count(iter)) {
727 - // Acrobat does not put some things including at least  
728 - // built-in fonts and procsets here, at least in some  
729 - // cases. 705 + // Acrobat does not put some things including at least built-in fonts and procsets
  706 + // here, at least in some cases.
730 linearizationWarning( 707 linearizationWarning(
731 ("page " + std::to_string(pageno) + ": shared object " + std::to_string(iter) + 708 ("page " + std::to_string(pageno) + ": shared object " + std::to_string(iter) +
732 ": in computed list but not hint table")); 709 ": in computed list but not hint table"));
@@ -738,31 +715,26 @@ QPDF::checkHPageOffset( @@ -738,31 +715,26 @@ QPDF::checkHPageOffset(
738 void 715 void
739 QPDF::checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj) 716 QPDF::checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj)
740 { 717 {
741 - // Implementation note 125 says shared object groups always  
742 - // contain only one object. Implementation note 128 says that  
743 - // Acrobat always nbits_nobjects to zero. Implementation note 130  
744 - // says that Acrobat does not support more than one shared object  
745 - // per group. These are all consistent. 718 + // Implementation note 125 says shared object groups always contain only one object.
  719 + // Implementation note 128 says that Acrobat always nbits_nobjects to zero. Implementation note
  720 + // 130 says that Acrobat does not support more than one shared object per group. These are all
  721 + // consistent.
746 722
747 - // Implementation note 129 states that MD5 signatures are not  
748 - // implemented in Acrobat, so signature_present must always be  
749 - // zero. 723 + // Implementation note 129 states that MD5 signatures are not implemented in Acrobat, so
  724 + // signature_present must always be zero.
750 725
751 - // Implementation note 131 states that first_shared_obj and  
752 - // first_shared_offset have meaningless values for single-page  
753 - // files. 726 + // Implementation note 131 states that first_shared_obj and first_shared_offset have meaningless
  727 + // values for single-page files.
754 728
755 - // Empirically, Acrobat and pdlin generate incorrect values for  
756 - // these whenever there are no shared objects not referenced by  
757 - // the first page (i.e., nshared_total == nshared_first_page). 729 + // Empirically, Acrobat and pdlin generate incorrect values for these whenever there are no
  730 + // shared objects not referenced by the first page (i.e., nshared_total == nshared_first_page).
758 731
759 HSharedObject& so = m->shared_object_hints; 732 HSharedObject& so = m->shared_object_hints;
760 if (so.nshared_total < so.nshared_first_page) { 733 if (so.nshared_total < so.nshared_first_page) {
761 linearizationWarning("shared object hint table: ntotal < nfirst_page"); 734 linearizationWarning("shared object hint table: ntotal < nfirst_page");
762 } else { 735 } else {
763 - // The first nshared_first_page objects are consecutive  
764 - // objects starting with the first page object. The rest are  
765 - // consecutive starting from the first_shared_obj object. 736 + // The first nshared_first_page objects are consecutive objects starting with the first page
  737 + // object. The rest are consecutive starting from the first_shared_obj object.
766 int cur_object = pages.at(0).getObjectID(); 738 int cur_object = pages.at(0).getObjectID();
767 for (int i = 0; i < so.nshared_total; ++i) { 739 for (int i = 0; i < so.nshared_total; ++i) {
768 if (i == so.nshared_first_page) { 740 if (i == so.nshared_first_page) {
@@ -814,12 +786,10 @@ QPDF::checkHSharedObject(std::vector&lt;QPDFObjectHandle&gt; const&amp; pages, std::map&lt;in @@ -814,12 +786,10 @@ QPDF::checkHSharedObject(std::vector&lt;QPDFObjectHandle&gt; const&amp; pages, std::map&lt;in
814 void 786 void
815 QPDF::checkHOutlines() 787 QPDF::checkHOutlines()
816 { 788 {
817 - // Empirically, Acrobat generates the correct value for the object  
818 - // number but incorrectly stores the next object number's offset  
819 - // as the offset, at least when outlines appear in part 6. It  
820 - // also generates an incorrect value for length (specifically, the  
821 - // length that would cover the correct number of objects from the  
822 - // wrong starting place). pdlin appears to generate correct 789 + // Empirically, Acrobat generates the correct value for the object number but incorrectly stores
  790 + // the next object number's offset as the offset, at least when outlines appear in part 6. It
  791 + // also generates an incorrect value for length (specifically, the length that would cover the
  792 + // correct number of objects from the wrong starting place). pdlin appears to generate correct
823 // values in those cases. 793 // values in those cases.
824 794
825 if (m->c_outline_data.nobjects == m->outline_hints.nobjects) { 795 if (m->c_outline_data.nobjects == m->outline_hints.nobjects) {
@@ -831,9 +801,8 @@ QPDF::checkHOutlines() @@ -831,9 +801,8 @@ QPDF::checkHOutlines()
831 // Check length and offset. Acrobat gets these wrong. 801 // Check length and offset. Acrobat gets these wrong.
832 QPDFObjectHandle outlines = getRoot().getKey("/Outlines"); 802 QPDFObjectHandle outlines = getRoot().getKey("/Outlines");
833 if (!outlines.isIndirect()) { 803 if (!outlines.isIndirect()) {
834 - // This case is not exercised in test suite since not  
835 - // permitted by the spec, but if this does occur, the  
836 - // code below would fail. 804 + // This case is not exercised in test suite since not permitted by the spec, but if
  805 + // this does occur, the code below would fail.
837 linearizationWarning("/Outlines key of root dictionary is not indirect"); 806 linearizationWarning("/Outlines key of root dictionary is not indirect");
838 return; 807 return;
839 } 808 }
@@ -906,9 +875,8 @@ QPDF::dumpLinearizationDataInternal() @@ -906,9 +875,8 @@ QPDF::dumpLinearizationDataInternal()
906 qpdf_offset_t 875 qpdf_offset_t
907 QPDF::adjusted_offset(qpdf_offset_t offset) 876 QPDF::adjusted_offset(qpdf_offset_t offset)
908 { 877 {
909 - // All offsets >= H_offset have to be increased by H_length  
910 - // since all hint table location values disregard the hint table  
911 - // itself. 878 + // All offsets >= H_offset have to be increased by H_length since all hint table location values
  879 + // disregard the hint table itself.
912 if (offset >= m->linp.H_offset) { 880 if (offset >= m->linp.H_offset) {
913 return offset + m->linp.H_length; 881 return offset + m->linp.H_length;
914 } 882 }
@@ -971,8 +939,8 @@ QPDF::dumpHSharedObject() @@ -971,8 +939,8 @@ QPDF::dumpHSharedObject()
971 *m->log->getInfo() << "Shared Object " << i << ":\n" 939 *m->log->getInfo() << "Shared Object " << i << ":\n"
972 << " group length: " << se.delta_group_length + t.min_group_length 940 << " group length: " << se.delta_group_length + t.min_group_length
973 << "\n"; 941 << "\n";
974 - // PDF spec says signature present nobjects_minus_one are  
975 - // always 0, so print them only if they have a non-zero value. 942 + // PDF spec says signature present nobjects_minus_one are always 0, so print them only if
  943 + // they have a non-zero value.
976 if (se.signature_present) { 944 if (se.signature_present) {
977 *m->log->getInfo() << " signature present\n"; 945 *m->log->getInfo() << " signature present\n";
978 } 946 }
@@ -994,44 +962,38 @@ QPDF::dumpHGeneric(HGeneric&amp; t) @@ -994,44 +962,38 @@ QPDF::dumpHGeneric(HGeneric&amp; t)
994 void 962 void
995 QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) 963 QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
996 { 964 {
997 - // This function calculates the ordering of objects, divides them  
998 - // into the appropriate parts, and computes some values for the  
999 - // linearization parameter dictionary and hint tables. The file  
1000 - // must be optimized (via calling optimize()) prior to calling  
1001 - // this function. Note that actual offsets and lengths are not  
1002 - // computed here, but anything related to object ordering is. 965 + // This function calculates the ordering of objects, divides them into the appropriate parts,
  966 + // and computes some values for the linearization parameter dictionary and hint tables. The
  967 + // file must be optimized (via calling optimize()) prior to calling this function. Note that
  968 + // actual offsets and lengths are not computed here, but anything related to object ordering is.
1003 969
1004 if (m->object_to_obj_users.empty()) { 970 if (m->object_to_obj_users.empty()) {
1005 - // Note that we can't call optimize here because we don't know  
1006 - // whether it should be called with or without allow changes.  
1007 - throw std::logic_error("INTERNAL ERROR: QPDF::calculateLinearizationData "  
1008 - "called before optimize()"); 971 + // Note that we can't call optimize here because we don't know whether it should be called
  972 + // with or without allow changes.
  973 + throw std::logic_error(
  974 + "INTERNAL ERROR: QPDF::calculateLinearizationData called before optimize()");
1009 } 975 }
1010 976
1011 - // Separate objects into the categories sufficient for us to  
1012 - // determine which part of the linearized file should contain the  
1013 - // object. This categorization is useful for other purposes as  
1014 - // well. Part numbers refer to version 1.4 of the PDF spec. 977 + // Separate objects into the categories sufficient for us to determine which part of the
  978 + // linearized file should contain the object. This categorization is useful for other purposes
  979 + // as well. Part numbers refer to version 1.4 of the PDF spec.
1015 980
1016 - // Parts 1, 3, 5, 10, and 11 don't contain any objects from the  
1017 - // original file (except the trailer dictionary in part 11). 981 + // Parts 1, 3, 5, 10, and 11 don't contain any objects from the original file (except the
  982 + // trailer dictionary in part 11).
1018 983
1019 - // Part 4 is the document catalog (root) and the following root  
1020 - // keys: /ViewerPreferences, /PageMode, /Threads, /OpenAction,  
1021 - // /AcroForm, /Encrypt. Note that Thread information dictionaries  
1022 - // are supposed to appear in part 9, but we are disregarding that  
1023 - // recommendation for now. 984 + // Part 4 is the document catalog (root) and the following root keys: /ViewerPreferences,
  985 + // /PageMode, /Threads, /OpenAction, /AcroForm, /Encrypt. Note that Thread information
  986 + // dictionaries are supposed to appear in part 9, but we are disregarding that recommendation
  987 + // for now.
1024 988
1025 - // Part 6 is the first page section. It includes all remaining  
1026 - // objects referenced by the first page including shared objects  
1027 - // but not including thumbnails. Additionally, if /PageMode is 989 + // Part 6 is the first page section. It includes all remaining objects referenced by the first
  990 + // page including shared objects but not including thumbnails. Additionally, if /PageMode is
1028 // /Outlines, then information from /Outlines also appears here. 991 // /Outlines, then information from /Outlines also appears here.
1029 992
1030 - // Part 7 contains remaining objects private to pages other than  
1031 - // the first page. 993 + // Part 7 contains remaining objects private to pages other than the first page.
1032 994
1033 - // Part 8 contains all remaining shared objects except those that  
1034 - // are shared only within thumbnails. 995 + // Part 8 contains all remaining shared objects except those that are shared only within
  996 + // thumbnails.
1035 997
1036 // Part 9 contains all remaining objects. 998 // Part 9 contains all remaining objects.
1037 999
@@ -1176,42 +1138,35 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1176,42 +1138,35 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1176 } 1138 }
1177 } 1139 }
1178 1140
1179 - // Generate ordering for objects in the output file. Sometimes we  
1180 - // just dump right from a set into a vector. Rather than  
1181 - // optimizing this by going straight into the vector, we'll leave  
1182 - // these phases separate for now. That way, this section can be  
1183 - // concerned only with ordering, and the above section can be  
1184 - // considered only with categorization. Note that sets of  
1185 - // QPDFObjGens are sorted by QPDFObjGen. In a linearized file,  
1186 - // objects appear in sequence with the possible exception of hints  
1187 - // tables which we won't see here anyway. That means that running  
1188 - // calculateLinearizationData() on a linearized file should give  
1189 - // results identical to the original file ordering.  
1190 -  
1191 - // We seem to traverse the page tree a lot in this code, but we  
1192 - // can address this for a future code optimization if necessary.  
1193 - // Premature optimization is the root of all evil. 1141 + // Generate ordering for objects in the output file. Sometimes we just dump right from a set
  1142 + // into a vector. Rather than optimizing this by going straight into the vector, we'll leave
  1143 + // these phases separate for now. That way, this section can be concerned only with ordering,
  1144 + // and the above section can be considered only with categorization. Note that sets of
  1145 + // QPDFObjGens are sorted by QPDFObjGen. In a linearized file, objects appear in sequence with
  1146 + // the possible exception of hints tables which we won't see here anyway. That means that
  1147 + // running calculateLinearizationData() on a linearized file should give results identical to
  1148 + // the original file ordering.
  1149 +
  1150 + // We seem to traverse the page tree a lot in this code, but we can address this for a future
  1151 + // code optimization if necessary. Premature optimization is the root of all evil.
1194 std::vector<QPDFObjectHandle> pages; 1152 std::vector<QPDFObjectHandle> pages;
1195 { // local scope 1153 { // local scope
1196 - // Map all page objects to the containing object stream. This  
1197 - // should be a no-op in a properly linearized file. 1154 + // Map all page objects to the containing object stream. This should be a no-op in a
  1155 + // properly linearized file.
1198 for (auto oh: getAllPages()) { 1156 for (auto oh: getAllPages()) {
1199 pages.push_back(getUncompressedObject(oh, object_stream_data)); 1157 pages.push_back(getUncompressedObject(oh, object_stream_data));
1200 } 1158 }
1201 } 1159 }
1202 int npages = toI(pages.size()); 1160 int npages = toI(pages.size());
1203 1161
1204 - // We will be initializing some values of the computed hint  
1205 - // tables. Specifically, we can initialize any items that deal  
1206 - // with object numbers or counts but not any items that deal with  
1207 - // lengths or offsets. The code that writes linearized files will  
1208 - // have to fill in these values during the first pass. The  
1209 - // validation code can compute them relatively easily given the  
1210 - // rest of the information.  
1211 -  
1212 - // npages is the size of the existing pages vector, which has been  
1213 - // created by traversing the pages tree, and as such is a  
1214 - // reasonable size. 1162 + // We will be initializing some values of the computed hint tables. Specifically, we can
  1163 + // initialize any items that deal with object numbers or counts but not any items that deal with
  1164 + // lengths or offsets. The code that writes linearized files will have to fill in these values
  1165 + // during the first pass. The validation code can compute them relatively easily given the rest
  1166 + // of the information.
  1167 +
  1168 + // npages is the size of the existing pages vector, which has been created by traversing the
  1169 + // pages tree, and as such is a reasonable size.
1215 m->c_linp.npages = npages; 1170 m->c_linp.npages = npages;
1216 m->c_page_offset_data.entries = std::vector<CHPageOffsetEntry>(toS(npages)); 1171 m->c_page_offset_data.entries = std::vector<CHPageOffsetEntry>(toS(npages));
1217 1172
@@ -1226,11 +1181,9 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1226,11 +1181,9 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1226 m->part4.push_back(getObject(og)); 1181 m->part4.push_back(getObject(og));
1227 } 1182 }
1228 1183
1229 - // Part 6: first page objects. Note: implementation note 124  
1230 - // states that Acrobat always treats page 0 as the first page for  
1231 - // linearization regardless of /OpenAction. pdlin doesn't provide  
1232 - // any option to set this and also disregards /OpenAction. We  
1233 - // will do the same. 1184 + // Part 6: first page objects. Note: implementation note 124 states that Acrobat always treats
  1185 + // page 0 as the first page for linearization regardless of /OpenAction. pdlin doesn't provide
  1186 + // any option to set this and also disregards /OpenAction. We will do the same.
1234 1187
1235 // First, place the actual first page object itself. 1188 // First, place the actual first page object itself.
1236 if (pages.empty()) { 1189 if (pages.empty()) {
@@ -1245,10 +1198,9 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1245,10 +1198,9 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1245 m->c_linp.first_page_object = pages.at(0).getObjectID(); 1198 m->c_linp.first_page_object = pages.at(0).getObjectID();
1246 m->part6.push_back(pages.at(0)); 1199 m->part6.push_back(pages.at(0));
1247 1200
1248 - // The PDF spec "recommends" an order for the rest of the objects,  
1249 - // but we are going to disregard it except to the extent that it  
1250 - // groups private and shared objects contiguously for the sake of  
1251 - // hint tables. 1201 + // The PDF spec "recommends" an order for the rest of the objects, but we are going to disregard
  1202 + // it except to the extent that it groups private and shared objects contiguously for the sake
  1203 + // of hint tables.
1252 1204
1253 for (auto const& og: lc_first_page_private) { 1205 for (auto const& og: lc_first_page_private) {
1254 m->part6.push_back(getObject(og)); 1206 m->part6.push_back(getObject(og));
@@ -1263,11 +1215,9 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1263,11 +1215,9 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1263 pushOutlinesToPart(m->part6, lc_outlines, object_stream_data); 1215 pushOutlinesToPart(m->part6, lc_outlines, object_stream_data);
1264 } 1216 }
1265 1217
1266 - // Fill in page offset hint table information for the first page.  
1267 - // The PDF spec says that nshared_objects should be zero for the  
1268 - // first page. pdlin does not appear to obey this, but it fills  
1269 - // in garbage values for all the shared object identifiers on the  
1270 - // first page. 1218 + // Fill in page offset hint table information for the first page. The PDF spec says that
  1219 + // nshared_objects should be zero for the first page. pdlin does not appear to obey this, but
  1220 + // it fills in garbage values for all the shared object identifiers on the first page.
1271 1221
1272 m->c_page_offset_data.entries.at(0).nobjects = toI(m->part6.size()); 1222 m->c_page_offset_data.entries.at(0).nobjects = toI(m->part6.size());
1273 1223
@@ -1287,8 +1237,8 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1287,8 +1237,8 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1287 lc_other_page_private.erase(page_og); 1237 lc_other_page_private.erase(page_og);
1288 m->part7.push_back(pages.at(i)); 1238 m->part7.push_back(pages.at(i));
1289 1239
1290 - // Place all non-shared objects referenced by this page,  
1291 - // updating the page object count for the hint table. 1240 + // Place all non-shared objects referenced by this page, updating the page object count for
  1241 + // the hint table.
1292 1242
1293 m->c_page_offset_data.entries.at(i).nobjects = 1; 1243 m->c_page_offset_data.entries.at(i).nobjects = 1;
1294 1244
@@ -1321,12 +1271,10 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1321,12 +1271,10 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1321 1271
1322 // Part 9: other objects 1272 // Part 9: other objects
1323 1273
1324 - // The PDF specification makes recommendations on ordering here.  
1325 - // We follow them only to a limited extent. Specifically, we put  
1326 - // the pages tree first, then private thumbnail objects in page  
1327 - // order, then shared thumbnail objects, and then outlines (unless  
1328 - // in part 6). After that, we throw all remaining objects in  
1329 - // arbitrary order. 1274 + // The PDF specification makes recommendations on ordering here. We follow them only to a
  1275 + // limited extent. Specifically, we put the pages tree first, then private thumbnail objects in
  1276 + // page order, then shared thumbnail objects, and then outlines (unless in part 6). After that,
  1277 + // we throw all remaining objects in arbitrary order.
1330 1278
1331 // Place the pages tree. 1279 // Place the pages tree.
1332 std::set<QPDFObjGen> pages_ogs = 1280 std::set<QPDFObjGen> pages_ogs =
@@ -1342,9 +1290,8 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1342,9 +1290,8 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1342 } 1290 }
1343 } 1291 }
1344 1292
1345 - // Place private thumbnail images in page order. Slightly more  
1346 - // information would be required if we were going to bother with  
1347 - // thumbnail hint tables. 1293 + // Place private thumbnail images in page order. Slightly more information would be required if
  1294 + // we were going to bother with thumbnail hint tables.
1348 for (size_t i = 0; i < toS(npages); ++i) { 1295 for (size_t i = 0; i < toS(npages); ++i) {
1349 QPDFObjectHandle thumb = pages.at(i).getKey("/Thumb"); 1296 QPDFObjectHandle thumb = pages.at(i).getKey("/Thumb");
1350 thumb = getUncompressedObject(thumb, object_stream_data); 1297 thumb = getUncompressedObject(thumb, object_stream_data);
@@ -1355,11 +1302,9 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1355,11 +1302,9 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1355 lc_thumbnail_private.erase(thumb_og); 1302 lc_thumbnail_private.erase(thumb_og);
1356 m->part9.push_back(thumb); 1303 m->part9.push_back(thumb);
1357 } else { 1304 } else {
1358 - // No internal error this time...there's nothing to  
1359 - // stop this object from having been referred to  
1360 - // somewhere else outside of a page's /Thumb, and if  
1361 - // it had been, there's nothing to prevent it from  
1362 - // having been in some set other than 1305 + // No internal error this time...there's nothing to stop this object from having
  1306 + // been referred to somewhere else outside of a page's /Thumb, and if it had been,
  1307 + // there's nothing to prevent it from having been in some set other than
1363 // lc_thumbnail_private. 1308 // lc_thumbnail_private.
1364 } 1309 }
1365 std::set<QPDFObjGen>& ogs = m->obj_user_to_objects[ObjUser(ObjUser::ou_thumb, toI(i))]; 1310 std::set<QPDFObjGen>& ogs = m->obj_user_to_objects[ObjUser(ObjUser::ou_thumb, toI(i))];
@@ -1372,9 +1317,8 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1372,9 +1317,8 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1372 } 1317 }
1373 } 1318 }
1374 if (!lc_thumbnail_private.empty()) { 1319 if (!lc_thumbnail_private.empty()) {
1375 - stopOnError("INTERNAL ERROR: "  
1376 - "QPDF::calculateLinearizationData: lc_thumbnail_private "  
1377 - "not empty after placing thumbnails"); 1320 + stopOnError("INTERNAL ERROR: QPDF::calculateLinearizationData: lc_thumbnail_private not "
  1321 + "empty after placing thumbnails");
1378 } 1322 }
1379 1323
1380 // Place shared thumbnail objects 1324 // Place shared thumbnail objects
@@ -1404,17 +1348,15 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1404,17 +1348,15 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1404 std::to_string(num_placed) + "; number of objects: " + std::to_string(num_wanted)); 1348 std::to_string(num_placed) + "; number of objects: " + std::to_string(num_wanted));
1405 } 1349 }
1406 1350
1407 - // Calculate shared object hint table information including  
1408 - // references to shared objects from page offset hint data. 1351 + // Calculate shared object hint table information including references to shared objects from
  1352 + // page offset hint data.
1409 1353
1410 - // The shared object hint table consists of all part 6 (whether  
1411 - // shared or not) in order followed by all part 8 objects in  
1412 - // order. Add the objects to shared object data keeping a map of  
1413 - // object number to index. Then populate the shared object  
1414 - // information for the pages. 1354 + // The shared object hint table consists of all part 6 (whether shared or not) in order followed
  1355 + // by all part 8 objects in order. Add the objects to shared object data keeping a map of
  1356 + // object number to index. Then populate the shared object information for the pages.
1415 1357
1416 - // Note that two objects never have the same object number, so we  
1417 - // can map from object number only without regards to generation. 1358 + // Note that two objects never have the same object number, so we can map from object number
  1359 + // only without regards to generation.
1418 std::map<int, int> obj_to_index; 1360 std::map<int, int> obj_to_index;
1419 1361
1420 m->c_shared_object_data.nshared_first_page = toI(m->part6.size()); 1362 m->c_shared_object_data.nshared_first_page = toI(m->part6.size());
@@ -1441,8 +1383,7 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1441,8 +1383,7 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1441 stopOnError("shared object hint table has wrong number of entries"); 1383 stopOnError("shared object hint table has wrong number of entries");
1442 } 1384 }
1443 1385
1444 - // Now compute the list of shared objects for each page after the  
1445 - // first page. 1386 + // Now compute the list of shared objects for each page after the first page.
1446 1387
1447 for (size_t i = 1; i < toS(npages); ++i) { 1388 for (size_t i = 1; i < toS(npages); ++i) {
1448 CHPageOffsetEntry& pe = m->c_page_offset_data.entries.at(i); 1389 CHPageOffsetEntry& pe = m->c_page_offset_data.entries.at(i);
@@ -1520,20 +1461,17 @@ QPDF::outputLengthNextN( @@ -1520,20 +1461,17 @@ QPDF::outputLengthNextN(
1520 std::map<int, qpdf_offset_t> const& lengths, 1461 std::map<int, qpdf_offset_t> const& lengths,
1521 std::map<int, int> const& obj_renumber) 1462 std::map<int, int> const& obj_renumber)
1522 { 1463 {
1523 - // Figure out the length of a series of n consecutive objects in  
1524 - // the output file starting with whatever object in_object from  
1525 - // the input file mapped to. 1464 + // Figure out the length of a series of n consecutive objects in the output file starting with
  1465 + // whatever object in_object from the input file mapped to.
1526 1466
1527 if (obj_renumber.count(in_object) == 0) { 1467 if (obj_renumber.count(in_object) == 0) {
1528 - stopOnError("found object that is not renumbered while"  
1529 - " writing linearization data"); 1468 + stopOnError("found object that is not renumbered while writing linearization data");
1530 } 1469 }
1531 int first = (*(obj_renumber.find(in_object))).second; 1470 int first = (*(obj_renumber.find(in_object))).second;
1532 int length = 0; 1471 int length = 0;
1533 for (int i = 0; i < n; ++i) { 1472 for (int i = 0; i < n; ++i) {
1534 if (lengths.count(first + i) == 0) { 1473 if (lengths.count(first + i) == 0) {
1535 - stopOnError("found item with unknown length"  
1536 - " while writing linearization data"); 1474 + stopOnError("found item with unknown length while writing linearization data");
1537 } 1475 }
1538 length += toI((*(lengths.find(first + toI(i)))).second); 1476 length += toI((*(lengths.find(first + toI(i)))).second);
1539 } 1477 }
@@ -1548,16 +1486,14 @@ QPDF::calculateHPageOffset( @@ -1548,16 +1486,14 @@ QPDF::calculateHPageOffset(
1548 { 1486 {
1549 // Page Offset Hint Table 1487 // Page Offset Hint Table
1550 1488
1551 - // We are purposely leaving some values set to their initial zero  
1552 - // values. 1489 + // We are purposely leaving some values set to their initial zero values.
1553 1490
1554 std::vector<QPDFObjectHandle> const& pages = getAllPages(); 1491 std::vector<QPDFObjectHandle> const& pages = getAllPages();
1555 size_t npages = pages.size(); 1492 size_t npages = pages.size();
1556 CHPageOffset& cph = m->c_page_offset_data; 1493 CHPageOffset& cph = m->c_page_offset_data;
1557 std::vector<CHPageOffsetEntry>& cphe = cph.entries; 1494 std::vector<CHPageOffsetEntry>& cphe = cph.entries;
1558 1495
1559 - // Calculate minimum and maximum values for number of objects per  
1560 - // page and page length. 1496 + // Calculate minimum and maximum values for number of objects per page and page length.
1561 1497
1562 int min_nobjects = cphe.at(0).nobjects; 1498 int min_nobjects = cphe.at(0).nobjects;
1563 int max_nobjects = min_nobjects; 1499 int max_nobjects = min_nobjects;
@@ -1572,11 +1508,11 @@ QPDF::calculateHPageOffset( @@ -1572,11 +1508,11 @@ QPDF::calculateHPageOffset(
1572 phe = std::vector<HPageOffsetEntry>(npages); 1508 phe = std::vector<HPageOffsetEntry>(npages);
1573 1509
1574 for (unsigned int i = 0; i < npages; ++i) { 1510 for (unsigned int i = 0; i < npages; ++i) {
1575 - // Calculate values for each page, assigning full values to  
1576 - // the delta items. They will be adjusted later. 1511 + // Calculate values for each page, assigning full values to the delta items. They will be
  1512 + // adjusted later.
1577 1513
1578 - // Repeat calculations for page 0 so we can assign to phe[i]  
1579 - // without duplicating those assignments. 1514 + // Repeat calculations for page 0 so we can assign to phe[i] without duplicating those
  1515 + // assignments.
1580 1516
1581 int nobjects = cphe.at(i).nobjects; 1517 int nobjects = cphe.at(i).nobjects;
1582 int length = outputLengthNextN(pages.at(i).getObjectID(), nobjects, lengths, obj_renumber); 1518 int length = outputLengthNextN(pages.at(i).getObjectID(), nobjects, lengths, obj_renumber);
@@ -1604,11 +1540,10 @@ QPDF::calculateHPageOffset( @@ -1604,11 +1540,10 @@ QPDF::calculateHPageOffset(
1604 ph.nbits_shared_identifier = nbits(m->c_shared_object_data.nshared_total); 1540 ph.nbits_shared_identifier = nbits(m->c_shared_object_data.nshared_total);
1605 ph.shared_denominator = 4; // doesn't matter 1541 ph.shared_denominator = 4; // doesn't matter
1606 1542
1607 - // It isn't clear how to compute content offset and content  
1608 - // length. Since we are not interleaving page objects with the  
1609 - // content stream, we'll use the same values for content length as  
1610 - // page length. We will use 0 as content offset because this is  
1611 - // what Adobe does (implementation note 127) and pdlin as well. 1543 + // It isn't clear how to compute content offset and content length. Since we are not
  1544 + // interleaving page objects with the content stream, we'll use the same values for content
  1545 + // length as page length. We will use 0 as content offset because this is what Adobe does
  1546 + // (implementation note 127) and pdlin as well.
1612 ph.nbits_delta_content_length = ph.nbits_delta_page_length; 1547 ph.nbits_delta_content_length = ph.nbits_delta_page_length;
1613 ph.min_content_length = ph.min_page_length; 1548 ph.min_content_length = ph.min_page_length;
1614 1549
@@ -1616,8 +1551,8 @@ QPDF::calculateHPageOffset( @@ -1616,8 +1551,8 @@ QPDF::calculateHPageOffset(
1616 // Adjust delta entries 1551 // Adjust delta entries
1617 if ((phe.at(i).delta_nobjects < min_nobjects) || 1552 if ((phe.at(i).delta_nobjects < min_nobjects) ||
1618 (phe.at(i).delta_page_length < min_length)) { 1553 (phe.at(i).delta_page_length < min_length)) {
1619 - stopOnError("found too small delta nobjects or delta page length"  
1620 - " while writing linearization data"); 1554 + stopOnError("found too small delta nobjects or delta page length while writing "
  1555 + "linearization data");
1621 } 1556 }
1622 phe.at(i).delta_nobjects -= min_nobjects; 1557 phe.at(i).delta_nobjects -= min_nobjects;
1623 phe.at(i).delta_page_length -= min_length; 1558 phe.at(i).delta_page_length -= min_length;
@@ -1669,8 +1604,7 @@ QPDF::calculateHSharedObject( @@ -1669,8 +1604,7 @@ QPDF::calculateHSharedObject(
1669 for (size_t i = 0; i < toS(cso.nshared_total); ++i) { 1604 for (size_t i = 0; i < toS(cso.nshared_total); ++i) {
1670 // Adjust deltas 1605 // Adjust deltas
1671 if (soe.at(i).delta_group_length < min_length) { 1606 if (soe.at(i).delta_group_length < min_length) {
1672 - stopOnError("found too small group length while"  
1673 - " writing linearization data"); 1607 + stopOnError("found too small group length while writing linearization data");
1674 } 1608 }
1675 soe.at(i).delta_group_length -= min_length; 1609 soe.at(i).delta_group_length -= min_length;
1676 } 1610 }
@@ -1700,14 +1634,13 @@ template &lt;class T, class int_type&gt; @@ -1700,14 +1634,13 @@ template &lt;class T, class int_type&gt;
1700 static void 1634 static void
1701 write_vector_int(BitWriter& w, int nitems, std::vector<T>& vec, int bits, int_type T::*field) 1635 write_vector_int(BitWriter& w, int nitems, std::vector<T>& vec, int bits, int_type T::*field)
1702 { 1636 {
1703 - // nitems times, write bits bits from the given field of the ith  
1704 - // vector to the given bit writer. 1637 + // nitems times, write bits bits from the given field of the ith vector to the given bit writer.
1705 1638
1706 for (size_t i = 0; i < QIntC::to_size(nitems); ++i) { 1639 for (size_t i = 0; i < QIntC::to_size(nitems); ++i) {
1707 w.writeBits(QIntC::to_ulonglong(vec.at(i).*field), QIntC::to_size(bits)); 1640 w.writeBits(QIntC::to_ulonglong(vec.at(i).*field), QIntC::to_size(bits));
1708 } 1641 }
1709 - // The PDF spec says that each hint table starts at a byte  
1710 - // boundary. Each "row" actually must start on a byte boundary. 1642 + // The PDF spec says that each hint table starts at a byte boundary. Each "row" actually must
  1643 + // start on a byte boundary.
1711 w.flush(); 1644 w.flush();
1712 } 1645 }
1713 1646
@@ -1721,8 +1654,8 @@ write_vector_vector( @@ -1721,8 +1654,8 @@ write_vector_vector(
1721 int bits, 1654 int bits,
1722 std::vector<int> T::*vec2) 1655 std::vector<int> T::*vec2)
1723 { 1656 {
1724 - // nitems1 times, write nitems2 (from the ith element of vec1) items  
1725 - // from the vec2 vector field of the ith item of vec1. 1657 + // nitems1 times, write nitems2 (from the ith element of vec1) items from the vec2 vector field
  1658 + // of the ith item of vec1.
1726 for (size_t i1 = 0; i1 < QIntC::to_size(nitems1); ++i1) { 1659 for (size_t i1 = 0; i1 < QIntC::to_size(nitems1); ++i1) {
1727 for (size_t i2 = 0; i2 < QIntC::to_size(vec1.at(i1).*nitems2); ++i2) { 1660 for (size_t i2 = 0; i2 < QIntC::to_size(vec1.at(i1).*nitems2); ++i2) {
1728 w.writeBits(QIntC::to_ulonglong((vec1.at(i1).*vec2).at(i2)), QIntC::to_size(bits)); 1661 w.writeBits(QIntC::to_ulonglong((vec1.at(i1).*vec2).at(i2)), QIntC::to_size(bits));
@@ -1835,8 +1768,8 @@ QPDF::generateHintStream( @@ -1835,8 +1768,8 @@ QPDF::generateHintStream(
1835 calculateHSharedObject(xref, lengths, obj_renumber); 1768 calculateHSharedObject(xref, lengths, obj_renumber);
1836 calculateHOutline(xref, lengths, obj_renumber); 1769 calculateHOutline(xref, lengths, obj_renumber);
1837 1770
1838 - // Write the hint stream itself into a compressed memory buffer.  
1839 - // Write through a counter so we can get offsets. 1771 + // Write the hint stream itself into a compressed memory buffer. Write through a counter so we
  1772 + // can get offsets.
1840 Pl_Buffer hint_stream("hint stream"); 1773 Pl_Buffer hint_stream("hint stream");
1841 Pl_Flate f("compress hint stream", &hint_stream, Pl_Flate::a_deflate); 1774 Pl_Flate f("compress hint stream", &hint_stream, Pl_Flate::a_deflate);
1842 Pl_Count c("count", &f); 1775 Pl_Count c("count", &f);
libqpdf/QPDF_optimization.cc
@@ -64,9 +64,8 @@ QPDF::optimize( @@ -64,9 +64,8 @@ QPDF::optimize(
64 return; 64 return;
65 } 65 }
66 66
67 - // The PDF specification indicates that /Outlines is supposed to  
68 - // be an indirect reference. Force it to be so if it exists and  
69 - // is direct. (This has been seen in the wild.) 67 + // The PDF specification indicates that /Outlines is supposed to be an indirect reference. Force
  68 + // it to be so if it exists and is direct. (This has been seen in the wild.)
70 QPDFObjectHandle root = getRoot(); 69 QPDFObjectHandle root = getRoot();
71 if (root.getKey("/Outlines").isDictionary()) { 70 if (root.getKey("/Outlines").isDictionary()) {
72 QPDFObjectHandle outlines = root.getKey("/Outlines"); 71 QPDFObjectHandle outlines = root.getKey("/Outlines");
@@ -76,8 +75,8 @@ QPDF::optimize( @@ -76,8 +75,8 @@ QPDF::optimize(
76 } 75 }
77 } 76 }
78 77
79 - // Traverse pages tree pushing all inherited resources down to the  
80 - // page level. This also initializes m->all_pages. 78 + // Traverse pages tree pushing all inherited resources down to the page level. This also
  79 + // initializes m->all_pages.
81 pushInheritedAttributesToPage(allow_changes, false); 80 pushInheritedAttributesToPage(allow_changes, false);
82 81
83 // Traverse pages 82 // Traverse pages
@@ -102,12 +101,10 @@ QPDF::optimize( @@ -102,12 +101,10 @@ QPDF::optimize(
102 } 101 }
103 102
104 for (auto const& key: root.getKeys()) { 103 for (auto const& key: root.getKeys()) {
105 - // Technically, /I keys from /Thread dictionaries are supposed  
106 - // to be handled separately, but we are going to disregard  
107 - // that specification for now. There is loads of evidence  
108 - // that pdlin and Acrobat both disregard things like this from  
109 - // time to time, so this is almost certain not to cause any  
110 - // problems. 104 + // Technically, /I keys from /Thread dictionaries are supposed to be handled separately, but
  105 + // we are going to disregard that specification for now. There is loads of evidence that
  106 + // pdlin and Acrobat both disregard things like this from time to time, so this is almost
  107 + // certain not to cause any problems.
111 updateObjectMaps( 108 updateObjectMaps(
112 ObjUser(ObjUser::ou_root_key, key), root.getKey(key), skip_stream_parameters); 109 ObjUser(ObjUser::ou_root_key, key), root.getKey(key), skip_stream_parameters);
113 } 110 }
@@ -130,23 +127,20 @@ QPDF::pushInheritedAttributesToPage() @@ -130,23 +127,20 @@ QPDF::pushInheritedAttributesToPage()
130 void 127 void
131 QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) 128 QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
132 { 129 {
133 - // Traverse pages tree pushing all inherited resources down to the  
134 - // page level. 130 + // Traverse pages tree pushing all inherited resources down to the page level.
135 131
136 - // The record of whether we've done this is cleared by  
137 - // updateAllPagesCache(). If we're warning for skipped keys,  
138 - // re-traverse unconditionally. 132 + // The record of whether we've done this is cleared by updateAllPagesCache(). If we're warning
  133 + // for skipped keys, re-traverse unconditionally.
139 if (m->pushed_inherited_attributes_to_pages && (!warn_skipped_keys)) { 134 if (m->pushed_inherited_attributes_to_pages && (!warn_skipped_keys)) {
140 return; 135 return;
141 } 136 }
142 137
143 - // Calling getAllPages() resolves any duplicated page objects,  
144 - // repairs broken nodes, and detects loops, so we don't have to do  
145 - // those activities here. 138 + // Calling getAllPages() resolves any duplicated page objects, repairs broken nodes, and detects
  139 + // loops, so we don't have to do those activities here.
146 getAllPages(); 140 getAllPages();
147 141
148 - // key_ancestors is a mapping of page attribute keys to a stack of  
149 - // Pages nodes that contain values for them. 142 + // key_ancestors is a mapping of page attribute keys to a stack of Pages nodes that contain
  143 + // values for them.
150 std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors; 144 std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors;
151 pushInheritedAttributesToPageInternal( 145 pushInheritedAttributesToPageInternal(
152 m->trailer.getKey("/Root").getKey("/Pages"), 146 m->trailer.getKey("/Root").getKey("/Pages"),
@@ -168,10 +162,9 @@ QPDF::pushInheritedAttributesToPageInternal( @@ -168,10 +162,9 @@ QPDF::pushInheritedAttributesToPageInternal(
168 bool allow_changes, 162 bool allow_changes,
169 bool warn_skipped_keys) 163 bool warn_skipped_keys)
170 { 164 {
171 - // Make a list of inheritable keys. Only the keys /MediaBox,  
172 - // /CropBox, /Resources, and /Rotate are inheritable  
173 - // attributes. Push this object onto the stack of pages nodes  
174 - // that have values for this attribute. 165 + // Make a list of inheritable keys. Only the keys /MediaBox, /CropBox, /Resources, and /Rotate
  166 + // are inheritable attributes. Push this object onto the stack of pages nodes that have values
  167 + // for this attribute.
175 168
176 std::set<std::string> inheritable_keys; 169 std::set<std::string> inheritable_keys;
177 for (auto const& key: cur_pages.getKeys()) { 170 for (auto const& key: cur_pages.getKeys()) {
@@ -183,9 +176,7 @@ QPDF::pushInheritedAttributesToPageInternal( @@ -183,9 +176,7 @@ QPDF::pushInheritedAttributesToPageInternal(
183 m->file->getName(), 176 m->file->getName(),
184 m->last_object_description, 177 m->last_object_description,
185 m->file->getLastOffset(), 178 m->file->getLastOffset(),
186 - "optimize detected an "  
187 - "inheritable attribute when called "  
188 - "in no-change mode"); 179 + "optimize detected an inheritable attribute when called in no-change mode");
189 } 180 }
190 181
191 // This is an inheritable resource 182 // This is an inheritable resource
@@ -194,9 +185,8 @@ QPDF::pushInheritedAttributesToPageInternal( @@ -194,9 +185,8 @@ QPDF::pushInheritedAttributesToPageInternal(
194 QTC::TC("qpdf", "QPDF opt direct pages resource", oh.isIndirect() ? 0 : 1); 185 QTC::TC("qpdf", "QPDF opt direct pages resource", oh.isIndirect() ? 0 : 1);
195 if (!oh.isIndirect()) { 186 if (!oh.isIndirect()) {
196 if (!oh.isScalar()) { 187 if (!oh.isScalar()) {
197 - // Replace shared direct object non-scalar  
198 - // resources with indirect objects to avoid  
199 - // copying large structures around. 188 + // Replace shared direct object non-scalar resources with indirect objects to
  189 + // avoid copying large structures around.
200 cur_pages.replaceKey(key, makeIndirectObject(oh)); 190 cur_pages.replaceKey(key, makeIndirectObject(oh));
201 oh = cur_pages.getKey(key); 191 oh = cur_pages.getKey(key);
202 } else { 192 } else {
@@ -208,14 +198,12 @@ QPDF::pushInheritedAttributesToPageInternal( @@ -208,14 +198,12 @@ QPDF::pushInheritedAttributesToPageInternal(
208 if (key_ancestors[key].size() > 1) { 198 if (key_ancestors[key].size() > 1) {
209 QTC::TC("qpdf", "QPDF opt key ancestors depth > 1"); 199 QTC::TC("qpdf", "QPDF opt key ancestors depth > 1");
210 } 200 }
211 - // Remove this resource from this node. It will be  
212 - // reattached at the page level. 201 + // Remove this resource from this node. It will be reattached at the page level.
213 cur_pages.removeKey(key); 202 cur_pages.removeKey(key);
214 } else if (!((key == "/Type") || (key == "/Parent") || (key == "/Kids") || 203 } else if (!((key == "/Type") || (key == "/Parent") || (key == "/Kids") ||
215 (key == "/Count"))) { 204 (key == "/Count"))) {
216 - // Warn when flattening, but not if the key is at the top  
217 - // level (i.e. "/Parent" not set), as we don't change these;  
218 - // but flattening removes intermediate /Pages nodes. 205 + // Warn when flattening, but not if the key is at the top level (i.e. "/Parent" not
  206 + // set), as we don't change these; but flattening removes intermediate /Pages nodes.
219 if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) { 207 if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) {
220 QTC::TC("qpdf", "QPDF unknown key not inherited"); 208 QTC::TC("qpdf", "QPDF unknown key not inherited");
221 setLastObjectDescription("Pages object", cur_pages.getObjGen()); 209 setLastObjectDescription("Pages object", cur_pages.getObjGen());
@@ -224,24 +212,21 @@ QPDF::pushInheritedAttributesToPageInternal( @@ -224,24 +212,21 @@ QPDF::pushInheritedAttributesToPageInternal(
224 m->last_object_description, 212 m->last_object_description,
225 0, 213 0,
226 ("Unknown key " + key + 214 ("Unknown key " + key +
227 - " in /Pages object"  
228 - " is being discarded as a result of"  
229 - " flattening the /Pages tree")); 215 + " in /Pages object is being discarded as a result of flattening the /Pages "
  216 + "tree"));
230 } 217 }
231 } 218 }
232 } 219 }
233 220
234 - // Process descendant nodes. This method does not perform loop  
235 - // detection because all code paths that lead here follow a call  
236 - // to getAllPages, which already throws an exception in the event 221 + // Process descendant nodes. This method does not perform loop detection because all code paths
  222 + // that lead here follow a call to getAllPages, which already throws an exception in the event
237 // of a loop in the pages tree. 223 // of a loop in the pages tree.
238 for (auto& kid: cur_pages.getKey("/Kids").aitems()) { 224 for (auto& kid: cur_pages.getKey("/Kids").aitems()) {
239 if (kid.isDictionaryOfType("/Pages")) { 225 if (kid.isDictionaryOfType("/Pages")) {
240 pushInheritedAttributesToPageInternal( 226 pushInheritedAttributesToPageInternal(
241 kid, key_ancestors, allow_changes, warn_skipped_keys); 227 kid, key_ancestors, allow_changes, warn_skipped_keys);
242 } else { 228 } else {
243 - // Add all available inheritable attributes not present in  
244 - // this object to this object. 229 + // Add all available inheritable attributes not present in this object to this object.
245 for (auto const& iter: key_ancestors) { 230 for (auto const& iter: key_ancestors) {
246 std::string const& key = iter.first; 231 std::string const& key = iter.first;
247 if (!kid.hasKey(key)) { 232 if (!kid.hasKey(key)) {
@@ -254,10 +239,9 @@ QPDF::pushInheritedAttributesToPageInternal( @@ -254,10 +239,9 @@ QPDF::pushInheritedAttributesToPageInternal(
254 } 239 }
255 } 240 }
256 241
257 - // For each inheritable key, pop the stack. If the stack  
258 - // becomes empty, remove it from the map. That way, the  
259 - // invariant that the list of keys in key_ancestors is exactly  
260 - // those keys for which inheritable attributes are available. 242 + // For each inheritable key, pop the stack. If the stack becomes empty, remove it from the map.
  243 + // That way, the invariant that the list of keys in key_ancestors is exactly those keys for
  244 + // which inheritable attributes are available.
261 245
262 if (!inheritable_keys.empty()) { 246 if (!inheritable_keys.empty()) {
263 QTC::TC("qpdf", "QPDF opt inheritable keys"); 247 QTC::TC("qpdf", "QPDF opt inheritable keys");
@@ -291,8 +275,7 @@ QPDF::updateObjectMapsInternal( @@ -291,8 +275,7 @@ QPDF::updateObjectMapsInternal(
291 QPDFObjGen::set& visited, 275 QPDFObjGen::set& visited,
292 bool top) 276 bool top)
293 { 277 {
294 - // Traverse the object tree from this point taking care to avoid  
295 - // crossing page boundaries. 278 + // Traverse the object tree from this point taking care to avoid crossing page boundaries.
296 279
297 bool is_page_node = false; 280 bool is_page_node = false;
298 281
@@ -332,8 +315,7 @@ QPDF::updateObjectMapsInternal( @@ -332,8 +315,7 @@ QPDF::updateObjectMapsInternal(
332 315
333 for (auto const& key: dict.getKeys()) { 316 for (auto const& key: dict.getKeys()) {
334 if (is_page_node && (key == "/Thumb")) { 317 if (is_page_node && (key == "/Thumb")) {
335 - // Traverse page thumbnail dictionaries as a special  
336 - // case. 318 + // Traverse page thumbnail dictionaries as a special case.
337 updateObjectMapsInternal( 319 updateObjectMapsInternal(
338 ObjUser(ObjUser::ou_thumb, ou.pageno), 320 ObjUser(ObjUser::ou_thumb, ou.pageno),
339 dict.getKey(key), 321 dict.getKey(key),
@@ -345,8 +327,7 @@ QPDF::updateObjectMapsInternal( @@ -345,8 +327,7 @@ QPDF::updateObjectMapsInternal(
345 } else if ( 327 } else if (
346 ((ssp >= 1) && (key == "/Length")) || 328 ((ssp >= 1) && (key == "/Length")) ||
347 ((ssp >= 2) && ((key == "/Filter") || (key == "/DecodeParms")))) { 329 ((ssp >= 2) && ((key == "/Filter") || (key == "/DecodeParms")))) {
348 - // Don't traverse into stream parameters that we are  
349 - // not going to write. 330 + // Don't traverse into stream parameters that we are not going to write.
350 } else { 331 } else {
351 updateObjectMapsInternal( 332 updateObjectMapsInternal(
352 ou, dict.getKey(key), skip_stream_parameters, visited, false); 333 ou, dict.getKey(key), skip_stream_parameters, visited, false);
@@ -362,9 +343,8 @@ QPDF::filterCompressedObjects(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -362,9 +343,8 @@ QPDF::filterCompressedObjects(std::map&lt;int, int&gt; const&amp; object_stream_data)
362 return; 343 return;
363 } 344 }
364 345
365 - // Transform object_to_obj_users and obj_user_to_objects so that  
366 - // they refer only to uncompressed objects. If something is a  
367 - // user of a compressed object, then it is really a user of the 346 + // Transform object_to_obj_users and obj_user_to_objects so that they refer only to uncompressed
  347 + // objects. If something is a user of a compressed object, then it is really a user of the
368 // object stream that contains it. 348 // object stream that contains it.
369 349
370 std::map<ObjUser, std::set<QPDFObjGen>> t_obj_user_to_objects; 350 std::map<ObjUser, std::set<QPDFObjGen>> t_obj_user_to_objects;
libqpdf/QPDF_pages.cc
@@ -4,55 +4,42 @@ @@ -4,55 +4,42 @@
4 #include <qpdf/QTC.hh> 4 #include <qpdf/QTC.hh>
5 #include <qpdf/QUtil.hh> 5 #include <qpdf/QUtil.hh>
6 6
7 -// In support of page manipulation APIs, these methods internally  
8 -// maintain state about pages in a pair of data structures: all_pages,  
9 -// which is a vector of page objects, and pageobj_to_pages_pos, which  
10 -// maps a page object to its position in the all_pages array.  
11 -// Unfortunately, the getAllPages() method returns a const reference  
12 -// to all_pages and has been in the public API long before the  
13 -// introduction of mutation APIs, so we're pretty much stuck with it.  
14 -// Anyway, there are lots of calls to it in the library, so the  
15 -// efficiency of having it cached is probably worth keeping it. At one  
16 -// point, I had partially implemented a helper class specifically for  
17 -// the pages tree, but once you work in all the logic that handles  
18 -// repairing the /Type keys of page tree nodes (both /Pages and /Page)  
19 -// and deal with duplicate pages, it's just as complex and less  
20 -// efficient than what's here. So, in spite of the fact that a const  
21 -// reference is returned, the current code is fine and does not need  
22 -// to be replaced. A partial implementation of QPDFPagesTree is in  
23 -// github in attic in case there is ever a reason to resurrect it.  
24 -// There are additional notes in README-maintainer, which also refers  
25 -// to this comment. 7 +// In support of page manipulation APIs, these methods internally maintain state about pages in a
  8 +// pair of data structures: all_pages, which is a vector of page objects, and pageobj_to_pages_pos,
  9 +// which maps a page object to its position in the all_pages array. Unfortunately, the getAllPages()
  10 +// method returns a const reference to all_pages and has been in the public API long before the
  11 +// introduction of mutation APIs, so we're pretty much stuck with it. Anyway, there are lots of
  12 +// calls to it in the library, so the efficiency of having it cached is probably worth keeping it.
  13 +// At one point, I had partially implemented a helper class specifically for the pages tree, but
  14 +// once you work in all the logic that handles repairing the /Type keys of page tree nodes (both
  15 +// /Pages and /Page) and deal with duplicate pages, it's just as complex and less efficient than
  16 +// what's here. So, in spite of the fact that a const reference is returned, the current code is
  17 +// fine and does not need to be replaced. A partial implementation of QPDFPagesTree is in github in
  18 +// attic in case there is ever a reason to resurrect it. There are additional notes in
  19 +// README-maintainer, which also refers to this comment.
26 20
27 -// The goal of this code is to ensure that the all_pages vector, which  
28 -// users may have a reference to, and the pageobj_to_pages_pos map,  
29 -// which users will not have access to, remain consistent outside of  
30 -// any call to the library. As long as users only touch the /Pages  
31 -// structure through page-specific API calls, they never have to worry  
32 -// about anything, and this will also stay consistent. If a user  
33 -// touches anything about the /Pages structure outside of these calls  
34 -// (such as by directly looking up and manipulating the underlying  
35 -// objects), they can call updatePagesCache() to bring things back in  
36 -// sync. 21 +// The goal of this code is to ensure that the all_pages vector, which users may have a reference
  22 +// to, and the pageobj_to_pages_pos map, which users will not have access to, remain consistent
  23 +// outside of any call to the library. As long as users only touch the /Pages structure through
  24 +// page-specific API calls, they never have to worry about anything, and this will also stay
  25 +// consistent. If a user touches anything about the /Pages structure outside of these calls (such
  26 +// as by directly looking up and manipulating the underlying objects), they can call
  27 +// updatePagesCache() to bring things back in sync.
37 28
38 -// If the user doesn't ever use the page manipulation APIs, then qpdf  
39 -// leaves the /Pages structure alone. If the user does use the APIs,  
40 -// then we push all inheritable objects down and flatten the /Pages  
41 -// tree. This makes it easier for us to keep /Pages, all_pages, and  
42 -// pageobj_to_pages_pos internally consistent at all times. 29 +// If the user doesn't ever use the page manipulation APIs, then qpdf leaves the /Pages structure
  30 +// alone. If the user does use the APIs, then we push all inheritable objects down and flatten the
  31 +// /Pages tree. This makes it easier for us to keep /Pages, all_pages, and pageobj_to_pages_pos
  32 +// internally consistent at all times.
43 33
44 -// Responsibility for keeping all_pages, pageobj_to_pages_pos, and the  
45 -// Pages structure consistent should remain in as few places as  
46 -// possible. As of initial writing, only flattenPagesTree,  
47 -// insertPage, and removePage, along with methods they call, are  
48 -// concerned with it. Everything else goes through one of those  
49 -// methods. 34 +// Responsibility for keeping all_pages, pageobj_to_pages_pos, and the Pages structure consistent
  35 +// should remain in as few places as possible. As of initial writing, only flattenPagesTree,
  36 +// insertPage, and removePage, along with methods they call, are concerned with it. Everything else
  37 +// goes through one of those methods.
50 38
51 std::vector<QPDFObjectHandle> const& 39 std::vector<QPDFObjectHandle> const&
52 QPDF::getAllPages() 40 QPDF::getAllPages()
53 { 41 {
54 - // Note that pushInheritedAttributesToPage may also be used to  
55 - // initialize m->all_pages. 42 + // Note that pushInheritedAttributesToPage may also be used to initialize m->all_pages.
56 if (m->all_pages.empty()) { 43 if (m->all_pages.empty()) {
57 m->ever_called_get_all_pages = true; 44 m->ever_called_get_all_pages = true;
58 QPDFObjGen::set visited; 45 QPDFObjGen::set visited;
@@ -65,9 +52,8 @@ QPDF::getAllPages() @@ -65,9 +52,8 @@ QPDF::getAllPages()
65 // loop -- will be detected again and reported later 52 // loop -- will be detected again and reported later
66 break; 53 break;
67 } 54 }
68 - // Files have been found in the wild where /Pages in the  
69 - // catalog points to the first page. Try to work around  
70 - // this and similar cases with this heuristic. 55 + // Files have been found in the wild where /Pages in the catalog points to the first
  56 + // page. Try to work around this and similar cases with this heuristic.
71 if (!warned) { 57 if (!warned) {
72 getRoot().warnIfPossible("document page tree root (root -> /Pages) doesn't point" 58 getRoot().warnIfPossible("document page tree root (root -> /Pages) doesn't point"
73 " to the root of the page tree; attempting to correct"); 59 " to the root of the page tree; attempting to correct");
@@ -118,8 +104,8 @@ QPDF::getAllPagesInternal( @@ -118,8 +104,8 @@ QPDF::getAllPagesInternal(
118 kid = makeIndirectObject(kid); 104 kid = makeIndirectObject(kid);
119 kids.setArrayItem(i, kid); 105 kids.setArrayItem(i, kid);
120 } else if (!seen.add(kid)) { 106 } else if (!seen.add(kid)) {
121 - // Make a copy of the page. This does the same as  
122 - // shallowCopyPage in QPDFPageObjectHelper. 107 + // Make a copy of the page. This does the same as shallowCopyPage in
  108 + // QPDFPageObjectHelper.
123 QTC::TC("qpdf", "QPDF resolve duplicated page object"); 109 QTC::TC("qpdf", "QPDF resolve duplicated page object");
124 cur_node.warnIfPossible( 110 cur_node.warnIfPossible(
125 "kid " + std::to_string(i) + 111 "kid " + std::to_string(i) +
@@ -141,9 +127,8 @@ QPDF::getAllPagesInternal( @@ -141,9 +127,8 @@ QPDF::getAllPagesInternal(
141 void 127 void
142 QPDF::updateAllPagesCache() 128 QPDF::updateAllPagesCache()
143 { 129 {
144 - // Force regeneration of the pages cache. We force immediate  
145 - // recalculation of all_pages since users may have references to  
146 - // it that they got from calls to getAllPages(). We can defer 130 + // Force regeneration of the pages cache. We force immediate recalculation of all_pages since
  131 + // users may have references to it that they got from calls to getAllPages(). We can defer
147 // recalculation of pageobj_to_pages_pos until needed. 132 // recalculation of pageobj_to_pages_pos until needed.
148 QTC::TC("qpdf", "QPDF updateAllPagesCache"); 133 QTC::TC("qpdf", "QPDF updateAllPagesCache");
149 m->all_pages.clear(); 134 m->all_pages.clear();
@@ -155,25 +140,23 @@ QPDF::updateAllPagesCache() @@ -155,25 +140,23 @@ QPDF::updateAllPagesCache()
155 void 140 void
156 QPDF::flattenPagesTree() 141 QPDF::flattenPagesTree()
157 { 142 {
158 - // If not already done, flatten the /Pages structure and  
159 - // initialize pageobj_to_pages_pos. 143 + // If not already done, flatten the /Pages structure and initialize pageobj_to_pages_pos.
160 144
161 if (!m->pageobj_to_pages_pos.empty()) { 145 if (!m->pageobj_to_pages_pos.empty()) {
162 return; 146 return;
163 } 147 }
164 148
165 - // Push inherited objects down to the /Page level. As a side  
166 - // effect m->all_pages will also be generated. 149 + // Push inherited objects down to the /Page level. As a side effect m->all_pages will also be
  150 + // generated.
167 pushInheritedAttributesToPage(true, true); 151 pushInheritedAttributesToPage(true, true);
168 152
169 QPDFObjectHandle pages = getRoot().getKey("/Pages"); 153 QPDFObjectHandle pages = getRoot().getKey("/Pages");
170 154
171 size_t const len = m->all_pages.size(); 155 size_t const len = m->all_pages.size();
172 for (size_t pos = 0; pos < len; ++pos) { 156 for (size_t pos = 0; pos < len; ++pos) {
173 - // Populate pageobj_to_pages_pos and fix parent pointer. There  
174 - // should be no duplicates at this point because  
175 - // pushInheritedAttributesToPage calls getAllPages which  
176 - // resolves duplicates. 157 + // Populate pageobj_to_pages_pos and fix parent pointer. There should be no duplicates at
  158 + // this point because pushInheritedAttributesToPage calls getAllPages which resolves
  159 + // duplicates.
177 insertPageobjToPage(m->all_pages.at(pos), toI(pos), true); 160 insertPageobjToPage(m->all_pages.at(pos), toI(pos), true);
178 m->all_pages.at(pos).replaceKey("/Parent", pages); 161 m->all_pages.at(pos).replaceKey("/Parent", pages);
179 } 162 }
@@ -191,16 +174,14 @@ QPDF::insertPageobjToPage(QPDFObjectHandle const&amp; obj, int pos, bool check_dupli @@ -191,16 +174,14 @@ QPDF::insertPageobjToPage(QPDFObjectHandle const&amp; obj, int pos, bool check_dupli
191 QPDFObjGen og(obj.getObjGen()); 174 QPDFObjGen og(obj.getObjGen());
192 if (check_duplicate) { 175 if (check_duplicate) {
193 if (!m->pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second) { 176 if (!m->pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second) {
194 - // The library never calls insertPageobjToPage in a way  
195 - // that causes this to happen. 177 + // The library never calls insertPageobjToPage in a way that causes this to happen.
196 setLastObjectDescription("page " + std::to_string(pos) + " (numbered from zero)", og); 178 setLastObjectDescription("page " + std::to_string(pos) + " (numbered from zero)", og);
197 throw QPDFExc( 179 throw QPDFExc(
198 qpdf_e_pages, 180 qpdf_e_pages,
199 m->file->getName(), 181 m->file->getName(),
200 m->last_object_description, 182 m->last_object_description,
201 0, 183 0,
202 - "duplicate page reference found;"  
203 - " this would cause loss of data"); 184 + "duplicate page reference found; this would cause loss of data");
204 } 185 }
205 } else { 186 } else {
206 m->pageobj_to_pages_pos[og] = pos; 187 m->pageobj_to_pages_pos[og] = pos;
@@ -210,8 +191,7 @@ QPDF::insertPageobjToPage(QPDFObjectHandle const&amp; obj, int pos, bool check_dupli @@ -210,8 +191,7 @@ QPDF::insertPageobjToPage(QPDFObjectHandle const&amp; obj, int pos, bool check_dupli
210 void 191 void
211 QPDF::insertPage(QPDFObjectHandle newpage, int pos) 192 QPDF::insertPage(QPDFObjectHandle newpage, int pos)
212 { 193 {
213 - // pos is numbered from 0, so pos = 0 inserts at the beginning and  
214 - // pos = npages adds to the end. 194 + // pos is numbered from 0, so pos = 0 inserts at the beginning and pos = npages adds to the end.
215 195
216 flattenPagesTree(); 196 flattenPagesTree();
217 197
@@ -233,10 +213,9 @@ QPDF::insertPage(QPDFObjectHandle newpage, int pos) @@ -233,10 +213,9 @@ QPDF::insertPage(QPDFObjectHandle newpage, int pos)
233 QTC::TC( 213 QTC::TC(
234 "qpdf", 214 "qpdf",
235 "QPDF insert page", 215 "QPDF insert page",
236 - (pos == 0) ? 0 : // insert at beginning  
237 - (pos == toI(m->all_pages.size())) ? 1  
238 - : // at end  
239 - 2); // insert in middle 216 + (pos == 0) ? 0 : // insert at beginning
  217 + (pos == toI(m->all_pages.size())) ? 1 // at end
  218 + : 2); // insert in middle
240 219
241 auto og = newpage.getObjGen(); 220 auto og = newpage.getObjGen();
242 if (m->pageobj_to_pages_pos.count(og)) { 221 if (m->pageobj_to_pages_pos.count(og)) {
@@ -265,10 +244,9 @@ QPDF::removePage(QPDFObjectHandle page) @@ -265,10 +244,9 @@ QPDF::removePage(QPDFObjectHandle page)
265 QTC::TC( 244 QTC::TC(
266 "qpdf", 245 "qpdf",
267 "QPDF remove page", 246 "QPDF remove page",
268 - (pos == 0) ? 0 : // remove at beginning  
269 - (pos == toI(m->all_pages.size() - 1)) ? 1  
270 - : // end  
271 - 2); // remove in middle 247 + (pos == 0) ? 0 : // remove at beginning
  248 + (pos == toI(m->all_pages.size() - 1)) ? 1 // end
  249 + : 2); // remove in middle
272 250
273 QPDFObjectHandle pages = getRoot().getKey("/Pages"); 251 QPDFObjectHandle pages = getRoot().getKey("/Pages");
274 QPDFObjectHandle kids = pages.getKey("/Kids"); 252 QPDFObjectHandle kids = pages.getKey("/Kids");