Commit 73ee000c33b1c51688f7a9b2a8ce9816da93e7ac
Committed by
GitHub
Merge pull request #975 from m-holger/reflow
Code tidy - reflow comments and strings
Showing
28 changed files
with
2533 additions
and
3636 deletions
include/qpdf/Buffer.hh
| @@ -2,22 +2,19 @@ | @@ -2,22 +2,19 @@ | ||
| 2 | // | 2 | // |
| 3 | // This file is part of qpdf. | 3 | // This file is part of qpdf. |
| 4 | // | 4 | // |
| 5 | -// Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 6 | -// you may not use this file except in compliance with the License. | ||
| 7 | -// You may obtain a copy of the License at | 5 | +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
| 6 | +// in compliance with the License. You may obtain a copy of the License at | ||
| 8 | // | 7 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 | 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // | 9 | // |
| 11 | -// Unless required by applicable law or agreed to in writing, software | ||
| 12 | -// distributed under the License is distributed on an "AS IS" BASIS, | ||
| 13 | -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 14 | -// See the License for the specific language governing permissions and | ||
| 15 | -// limitations under the License. | 10 | +// Unless required by applicable law or agreed to in writing, software distributed under the License |
| 11 | +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
| 12 | +// or implied. See the License for the specific language governing permissions and limitations under | ||
| 13 | +// the License. | ||
| 16 | // | 14 | // |
| 17 | -// Versions of qpdf prior to version 7 were released under the terms | ||
| 18 | -// of version 2.0 of the Artistic License. At your option, you may | ||
| 19 | -// continue to consider qpdf to be licensed under those terms. Please | ||
| 20 | -// see the manual for additional information. | 15 | +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic |
| 16 | +// License. At your option, you may continue to consider qpdf to be licensed under those terms. | ||
| 17 | +// Please see the manual for additional information. | ||
| 21 | 18 | ||
| 22 | #ifndef BUFFER_HH | 19 | #ifndef BUFFER_HH |
| 23 | #define BUFFER_HH | 20 | #define BUFFER_HH |
| @@ -34,13 +31,13 @@ class Buffer | @@ -34,13 +31,13 @@ class Buffer | ||
| 34 | QPDF_DLL | 31 | QPDF_DLL |
| 35 | Buffer(); | 32 | Buffer(); |
| 36 | 33 | ||
| 37 | - // Create a Buffer object whose memory is owned by the class and | ||
| 38 | - // will be freed when the Buffer object is destroyed. | 34 | + // Create a Buffer object whose memory is owned by the class and will be freed when the Buffer |
| 35 | + // object is destroyed. | ||
| 39 | QPDF_DLL | 36 | QPDF_DLL |
| 40 | Buffer(size_t size); | 37 | Buffer(size_t size); |
| 41 | 38 | ||
| 42 | - // Create a Buffer object whose memory is owned by the caller and | ||
| 43 | - // will not be freed when the Buffer is destroyed. | 39 | + // Create a Buffer object whose memory is owned by the caller and will not be freed when the |
| 40 | + // Buffer is destroyed. | ||
| 44 | QPDF_DLL | 41 | QPDF_DLL |
| 45 | Buffer(unsigned char* buf, size_t size); | 42 | Buffer(unsigned char* buf, size_t size); |
| 46 | 43 |
include/qpdf/JSON.hh
| @@ -2,38 +2,31 @@ | @@ -2,38 +2,31 @@ | ||
| 2 | // | 2 | // |
| 3 | // This file is part of qpdf. | 3 | // This file is part of qpdf. |
| 4 | // | 4 | // |
| 5 | -// Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 6 | -// you may not use this file except in compliance with the License. | ||
| 7 | -// You may obtain a copy of the License at | 5 | +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
| 6 | +// in compliance with the License. You may obtain a copy of the License at | ||
| 8 | // | 7 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 | 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // | 9 | // |
| 11 | -// Unless required by applicable law or agreed to in writing, software | ||
| 12 | -// distributed under the License is distributed on an "AS IS" BASIS, | ||
| 13 | -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 14 | -// See the License for the specific language governing permissions and | ||
| 15 | -// limitations under the License. | 10 | +// Unless required by applicable law or agreed to in writing, software distributed under the License |
| 11 | +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
| 12 | +// or implied. See the License for the specific language governing permissions and limitations under | ||
| 13 | +// the License. | ||
| 16 | // | 14 | // |
| 17 | -// Versions of qpdf prior to version 7 were released under the terms | ||
| 18 | -// of version 2.0 of the Artistic License. At your option, you may | ||
| 19 | -// continue to consider qpdf to be licensed under those terms. Please | ||
| 20 | -// see the manual for additional information. | 15 | +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic |
| 16 | +// License. At your option, you may continue to consider qpdf to be licensed under those terms. | ||
| 17 | +// Please see the manual for additional information. | ||
| 21 | 18 | ||
| 22 | #ifndef JSON_HH | 19 | #ifndef JSON_HH |
| 23 | #define JSON_HH | 20 | #define JSON_HH |
| 24 | 21 | ||
| 25 | -// This is a simple JSON serializer and parser, primarily designed for | ||
| 26 | -// serializing QPDF Objects as JSON. While it may work as a | ||
| 27 | -// general-purpose JSON parser/serializer, there are better options. | ||
| 28 | -// JSON objects contain their data as smart pointers. When one JSON object | ||
| 29 | -// is added to another, this pointer is copied. This means you can | ||
| 30 | -// create temporary JSON objects on the stack, add them to other | ||
| 31 | -// objects, and let them go out of scope safely. It also means that if | ||
| 32 | -// a JSON object is added in more than one place, all copies | ||
| 33 | -// share the underlying data. This makes them similar in structure and | ||
| 34 | -// behavior to QPDFObjectHandle and may feel natural within the QPDF | ||
| 35 | -// codebase, but it is also a good reason not to use this as a | ||
| 36 | -// general-purpose JSON package. | 22 | +// This is a simple JSON serializer and parser, primarily designed for serializing QPDF Objects as |
| 23 | +// JSON. While it may work as a general-purpose JSON parser/serializer, there are better options. | ||
| 24 | +// JSON objects contain their data as smart pointers. When one JSON object is added to another, this | ||
| 25 | +// pointer is copied. This means you can create temporary JSON objects on the stack, add them to | ||
| 26 | +// other objects, and let them go out of scope safely. It also means that if a JSON object is added | ||
| 27 | +// in more than one place, all copies share the underlying data. This makes them similar in | ||
| 28 | +// structure and behavior to QPDFObjectHandle and may feel natural within the QPDF codebase, but it | ||
| 29 | +// is also a good reason not to use this as a general-purpose JSON package. | ||
| 37 | 30 | ||
| 38 | #include <qpdf/DLL.h> | 31 | #include <qpdf/DLL.h> |
| 39 | #include <qpdf/PointerHolder.hh> // unused -- remove in qpdf 12 (see #785) | 32 | #include <qpdf/PointerHolder.hh> // unused -- remove in qpdf 12 (see #785) |
| @@ -61,71 +54,60 @@ class JSON | @@ -61,71 +54,60 @@ class JSON | ||
| 61 | QPDF_DLL | 54 | QPDF_DLL |
| 62 | std::string unparse() const; | 55 | std::string unparse() const; |
| 63 | 56 | ||
| 64 | - // Write the JSON object through a pipeline. The `depth` parameter | ||
| 65 | - // specifies how deeply nested this is in another JSON structure, | ||
| 66 | - // which makes it possible to write clean-looking JSON | 57 | + // Write the JSON object through a pipeline. The `depth` parameter specifies how deeply nested |
| 58 | + // this is in another JSON structure, which makes it possible to write clean-looking JSON | ||
| 67 | // incrementally. | 59 | // incrementally. |
| 68 | QPDF_DLL | 60 | QPDF_DLL |
| 69 | void write(Pipeline*, size_t depth = 0) const; | 61 | void write(Pipeline*, size_t depth = 0) const; |
| 70 | 62 | ||
| 71 | // Helper methods for writing JSON incrementally. | 63 | // Helper methods for writing JSON incrementally. |
| 72 | // | 64 | // |
| 73 | - // "first" -- Several methods take a `bool& first` parameter. The | ||
| 74 | - // open methods always set it to true, and the methods to output | ||
| 75 | - // items always set it to false. This way, the item and close | ||
| 76 | - // methods can always know whether or not a first item is being | ||
| 77 | - // written. The intended mode of operation is to start with a new | ||
| 78 | - // `bool first = true` each time a new container is opened and | ||
| 79 | - // to pass that `first` through to all the methods that are | ||
| 80 | - // called to add top-level items to the container as well as to | ||
| 81 | - // close the container. This lets the JSON object use it to keep | ||
| 82 | - // track of when it's writing a first object and when it's not. If | ||
| 83 | - // incrementally writing multiple levels of depth, a new `first` | ||
| 84 | - // should used for each new container that is opened. | 65 | + // "first" -- Several methods take a `bool& first` parameter. The open methods always set it to |
| 66 | + // true, and the methods to output items always set it to false. This way, the item and close | ||
| 67 | + // methods can always know whether or not a first item is being written. The intended mode of | ||
| 68 | + // operation is to start with a new `bool first = true` each time a new container is opened and | ||
| 69 | + // to pass that `first` through to all the methods that are called to add top-level items to the | ||
| 70 | + // container as well as to close the container. This lets the JSON object use it to keep track | ||
| 71 | + // of when it's writing a first object and when it's not. If incrementally writing multiple | ||
| 72 | + // levels of depth, a new `first` should used for each new container that is opened. | ||
| 85 | // | 73 | // |
| 86 | - // "depth" -- Indicate the level of depth. This is used for | ||
| 87 | - // consistent indentation. When writing incrementally, whenever | ||
| 88 | - // you call a method to add an item to a container, the value of | ||
| 89 | - // `depth` should be one more than whatever value is passed to the | ||
| 90 | - // container open and close methods. | 74 | + // "depth" -- Indicate the level of depth. This is used for consistent indentation. When writing |
| 75 | + // incrementally, whenever you call a method to add an item to a container, the value of `depth` | ||
| 76 | + // should be one more than whatever value is passed to the container open and close methods. | ||
| 91 | 77 | ||
| 92 | // Open methods ignore the value of first and set it to false | 78 | // Open methods ignore the value of first and set it to false |
| 93 | QPDF_DLL | 79 | QPDF_DLL |
| 94 | static void writeDictionaryOpen(Pipeline*, bool& first, size_t depth = 0); | 80 | static void writeDictionaryOpen(Pipeline*, bool& first, size_t depth = 0); |
| 95 | QPDF_DLL | 81 | QPDF_DLL |
| 96 | static void writeArrayOpen(Pipeline*, bool& first, size_t depth = 0); | 82 | static void writeArrayOpen(Pipeline*, bool& first, size_t depth = 0); |
| 97 | - // Close methods don't modify first. A true value indicates that | ||
| 98 | - // we are closing an empty object. | 83 | + // Close methods don't modify first. A true value indicates that we are closing an empty object. |
| 99 | QPDF_DLL | 84 | QPDF_DLL |
| 100 | static void writeDictionaryClose(Pipeline*, bool first, size_t depth = 0); | 85 | static void writeDictionaryClose(Pipeline*, bool first, size_t depth = 0); |
| 101 | QPDF_DLL | 86 | QPDF_DLL |
| 102 | static void writeArrayClose(Pipeline*, bool first, size_t depth = 0); | 87 | static void writeArrayClose(Pipeline*, bool first, size_t depth = 0); |
| 103 | - // The item methods use the value of first to determine if this is | ||
| 104 | - // the first item and always set it to false. | 88 | + // The item methods use the value of first to determine if this is the first item and always set |
| 89 | + // it to false. | ||
| 105 | QPDF_DLL | 90 | QPDF_DLL |
| 106 | static void writeDictionaryItem( | 91 | static void writeDictionaryItem( |
| 107 | Pipeline*, bool& first, std::string const& key, JSON const& value, size_t depth = 0); | 92 | Pipeline*, bool& first, std::string const& key, JSON const& value, size_t depth = 0); |
| 108 | - // Write just the key of a new dictionary item, useful if writing | ||
| 109 | - // nested structures. Calls writeNext. | 93 | + // Write just the key of a new dictionary item, useful if writing nested structures. Calls |
| 94 | + // writeNext. | ||
| 110 | QPDF_DLL | 95 | QPDF_DLL |
| 111 | static void | 96 | static void |
| 112 | writeDictionaryKey(Pipeline* p, bool& first, std::string const& key, size_t depth = 0); | 97 | writeDictionaryKey(Pipeline* p, bool& first, std::string const& key, size_t depth = 0); |
| 113 | QPDF_DLL | 98 | QPDF_DLL |
| 114 | static void writeArrayItem(Pipeline*, bool& first, JSON const& element, size_t depth = 0); | 99 | static void writeArrayItem(Pipeline*, bool& first, JSON const& element, size_t depth = 0); |
| 115 | - // If writing nested structures incrementally, call writeNext | ||
| 116 | - // before opening a new array or container in the midst of an | ||
| 117 | - // existing one. The `first` you pass to writeNext should be the | ||
| 118 | - // one for the parent object. The depth should be the one for the | ||
| 119 | - // child object. Then start a new `first` for the nested item. | ||
| 120 | - // Note that writeDictionaryKey and writeArrayItem call writeNext | ||
| 121 | - // for you, so this is most important when writing subsequent | ||
| 122 | - // items or container openers to an array. | 100 | + // If writing nested structures incrementally, call writeNext before opening a new array or |
| 101 | + // container in the midst of an existing one. The `first` you pass to writeNext should be the | ||
| 102 | + // one for the parent object. The depth should be the one for the child object. Then start a new | ||
| 103 | + // `first` for the nested item. Note that writeDictionaryKey and writeArrayItem call writeNext | ||
| 104 | + // for you, so this is most important when writing subsequent items or container openers to an | ||
| 105 | + // array. | ||
| 123 | QPDF_DLL | 106 | QPDF_DLL |
| 124 | static void writeNext(Pipeline* p, bool& first, size_t depth = 0); | 107 | static void writeNext(Pipeline* p, bool& first, size_t depth = 0); |
| 125 | 108 | ||
| 126 | - // The JSON spec calls dictionaries "objects", but that creates | ||
| 127 | - // too much confusion when referring to instances of the JSON | ||
| 128 | - // class. | 109 | + // The JSON spec calls dictionaries "objects", but that creates too much confusion when |
| 110 | + // referring to instances of the JSON class. | ||
| 129 | QPDF_DLL | 111 | QPDF_DLL |
| 130 | static JSON makeDictionary(); | 112 | static JSON makeDictionary(); |
| 131 | // addDictionaryMember returns the newly added item. | 113 | // addDictionaryMember returns the newly added item. |
| @@ -149,10 +131,9 @@ class JSON | @@ -149,10 +131,9 @@ class JSON | ||
| 149 | QPDF_DLL | 131 | QPDF_DLL |
| 150 | static JSON makeNull(); | 132 | static JSON makeNull(); |
| 151 | 133 | ||
| 152 | - // A blob serializes as a string. The function will be called by | ||
| 153 | - // JSON with a pipeline and should write binary data to the | ||
| 154 | - // pipeline but not call finish(). JSON will call finish() at the | ||
| 155 | - // right time. | 134 | + // A blob serializes as a string. The function will be called by JSON with a pipeline and should |
| 135 | + // write binary data to the pipeline but not call finish(). JSON will call finish() at the right | ||
| 136 | + // time. | ||
| 156 | QPDF_DLL | 137 | QPDF_DLL |
| 157 | static JSON makeBlob(std::function<void(Pipeline*)>); | 138 | static JSON makeBlob(std::function<void(Pipeline*)>); |
| 158 | 139 | ||
| @@ -162,11 +143,9 @@ class JSON | @@ -162,11 +143,9 @@ class JSON | ||
| 162 | QPDF_DLL | 143 | QPDF_DLL |
| 163 | bool isDictionary() const; | 144 | bool isDictionary() const; |
| 164 | 145 | ||
| 165 | - // If the key is already in the dictionary, return true. | ||
| 166 | - // Otherwise, mark it as seen and return false. This is primarily | ||
| 167 | - // intended to be used by the parser to detect duplicate keys when | ||
| 168 | - // the reactor blocks them from being added to the final | ||
| 169 | - // dictionary. | 146 | + // If the key is already in the dictionary, return true. Otherwise, mark it as seen and return |
| 147 | + // false. This is primarily intended to be used by the parser to detect duplicate keys when the | ||
| 148 | + // reactor blocks them from being added to the final dictionary. | ||
| 170 | QPDF_DLL | 149 | QPDF_DLL |
| 171 | bool checkDictionaryKeySeen(std::string const& key); | 150 | bool checkDictionaryKeySeen(std::string const& key); |
| 172 | 151 | ||
| @@ -187,45 +166,35 @@ class JSON | @@ -187,45 +166,35 @@ class JSON | ||
| 187 | QPDF_DLL | 166 | QPDF_DLL |
| 188 | bool forEachArrayItem(std::function<void(JSON value)> fn) const; | 167 | bool forEachArrayItem(std::function<void(JSON value)> fn) const; |
| 189 | 168 | ||
| 190 | - // Check this JSON object against a "schema". This is not a schema | ||
| 191 | - // according to any standard. It's just a template of what the | ||
| 192 | - // JSON is supposed to contain. The checking does the following: | 169 | + // Check this JSON object against a "schema". This is not a schema according to any standard. |
| 170 | + // It's just a template of what the JSON is supposed to contain. The checking does the | ||
| 171 | + // following: | ||
| 193 | // | 172 | // |
| 194 | - // * The schema is a nested structure containing dictionaries, | ||
| 195 | - // single-element arrays, and strings only. | ||
| 196 | - // * Recursively walk the schema. In the items below, "schema | ||
| 197 | - // object" refers to an object in the schema, and "checked | ||
| 198 | - // object" refers to the corresponding part of the object | ||
| 199 | - // being checked. | ||
| 200 | - // * If the schema object is a dictionary, the checked object | ||
| 201 | - // must have a dictionary in the same place with the same | ||
| 202 | - // keys. If flags contains f_optional, a key in the schema | ||
| 203 | - // does not have to be present in the object. Otherwise, all | ||
| 204 | - // keys have to be present. Any key in the object must be | ||
| 205 | - // present in the schema. | ||
| 206 | - // * If the schema object is an array of length 1, the checked | ||
| 207 | - // object may either be a single item or an array of items. | ||
| 208 | - // The single item or each element of the checked object's | ||
| 209 | - // array is validated against the single element of the | ||
| 210 | - // schema's array. The rationale behind this logic is that a | ||
| 211 | - // single element may appear wherever the schema allows a | ||
| 212 | - // variable-length array. This makes it possible to start | ||
| 213 | - // allowing an array in the future where a single element was | ||
| 214 | - // previously required without breaking backward | ||
| 215 | - // compatibility. | ||
| 216 | - // * If the schema object is an array of length > 1, the checked | ||
| 217 | - // object must be an array of the same length. In this case, | ||
| 218 | - // each element of the checked object array is validated | 173 | + // * The schema is a nested structure containing dictionaries, single-element arrays, and |
| 174 | + // strings only. | ||
| 175 | + // * Recursively walk the schema. In the items below, "schema object" refers to an object in | ||
| 176 | + // the schema, and "checked object" refers to the corresponding part of the object being | ||
| 177 | + // checked. | ||
| 178 | + // * If the schema object is a dictionary, the checked object must have a dictionary in the | ||
| 179 | + // same place with the same keys. If flags contains f_optional, a key in the schema does not | ||
| 180 | + // have to be present in the object. Otherwise, all keys have to be present. Any key in the | ||
| 181 | + // object must be present in the schema. | ||
| 182 | + // * If the schema object is an array of length 1, the checked object may either be a single | ||
| 183 | + // item or an array of items. The single item or each element of the checked object's | ||
| 184 | + // array is validated against the single element of the schema's array. The rationale behind | ||
| 185 | + // this logic is that a single element may appear wherever the schema allows a | ||
| 186 | + // variable-length array. This makes it possible to start allowing an array in the future | ||
| 187 | + // where a single element was previously required without breaking backward compatibility. | ||
| 188 | + // * If the schema object is an array of length > 1, the checked object must be an array of | ||
| 189 | + // the same length. In this case, each element of the checked object array is validated | ||
| 219 | // against the corresponding element of the schema array. | 190 | // against the corresponding element of the schema array. |
| 220 | - // * Otherwise, the value must be a string whose value is a | ||
| 221 | - // description of the object's corresponding value, which may | ||
| 222 | - // have any type. | 191 | + // * Otherwise, the value must be a string whose value is a description of the object's |
| 192 | + // corresponding value, which may have any type. | ||
| 223 | // | 193 | // |
| 224 | - // QPDF's JSON output conforms to certain strict compatibility | ||
| 225 | - // rules as discussed in the manual. The idea is that a JSON | ||
| 226 | - // structure created manually in qpdf.cc doubles as both JSON help | ||
| 227 | - // information and a schema for validating the JSON that qpdf | ||
| 228 | - // generates. Any discrepancies are a bug in qpdf. | 194 | + // QPDF's JSON output conforms to certain strict compatibility rules as discussed in the manual. |
| 195 | + // The idea is that a JSON structure created manually in qpdf.cc doubles as both JSON help | ||
| 196 | + // information and a schema for validating the JSON that qpdf generates. Any discrepancies are a | ||
| 197 | + // bug in qpdf. | ||
| 229 | // | 198 | // |
| 230 | // Flags is a bitwise or of values from check_flags_e. | 199 | // Flags is a bitwise or of values from check_flags_e. |
| 231 | enum check_flags_e { | 200 | enum check_flags_e { |
| @@ -239,9 +208,8 @@ class JSON | @@ -239,9 +208,8 @@ class JSON | ||
| 239 | QPDF_DLL | 208 | QPDF_DLL |
| 240 | bool checkSchema(JSON schema, std::list<std::string>& errors); | 209 | bool checkSchema(JSON schema, std::list<std::string>& errors); |
| 241 | 210 | ||
| 242 | - // An pointer to a Reactor class can be passed to parse, which | ||
| 243 | - // will enable the caller to react to incremental events in the | ||
| 244 | - // construction of the JSON object. This makes it possible to | 211 | + // An pointer to a Reactor class can be passed to parse, which will enable the caller to react |
| 212 | + // to incremental events in the construction of the JSON object. This makes it possible to | ||
| 245 | // implement SAX-like handling of very large JSON objects. | 213 | // implement SAX-like handling of very large JSON objects. |
| 246 | class QPDF_DLL_CLASS Reactor | 214 | class QPDF_DLL_CLASS Reactor |
| 247 | { | 215 | { |
| @@ -249,17 +217,14 @@ class JSON | @@ -249,17 +217,14 @@ class JSON | ||
| 249 | QPDF_DLL | 217 | QPDF_DLL |
| 250 | virtual ~Reactor() = default; | 218 | virtual ~Reactor() = default; |
| 251 | 219 | ||
| 252 | - // The start/end methods are called when parsing of a | ||
| 253 | - // dictionary or array is started or ended. The item methods | ||
| 254 | - // are called when an item is added to a dictionary or array. | ||
| 255 | - // When adding a container to another container, the item | ||
| 256 | - // method is called with an empty container before the lower | ||
| 257 | - // container's start method is called. See important notes in | 220 | + // The start/end methods are called when parsing of a dictionary or array is started or |
| 221 | + // ended. The item methods are called when an item is added to a dictionary or array. When | ||
| 222 | + // adding a container to another container, the item method is called with an empty | ||
| 223 | + // container before the lower container's start method is called. See important notes in | ||
| 258 | // "Item methods" below. | 224 | // "Item methods" below. |
| 259 | 225 | ||
| 260 | - // During parsing of a JSON string, the parser is operating on | ||
| 261 | - // a single object at a time. When a dictionary or array is | ||
| 262 | - // started, a new context begins, and when that dictionary or | 226 | + // During parsing of a JSON string, the parser is operating on a single object at a time. |
| 227 | + // When a dictionary or array is started, a new context begins, and when that dictionary or | ||
| 263 | // array is ended, the previous context is resumed. So, for | 228 | // array is ended, the previous context is resumed. So, for |
| 264 | // example, if you have `{"a": [1]}`, you will receive the | 229 | // example, if you have `{"a": [1]}`, you will receive the |
| 265 | // following method calls | 230 | // following method calls |
| @@ -271,9 +236,8 @@ class JSON | @@ -271,9 +236,8 @@ class JSON | ||
| 271 | // containerEnd -- now current object is the dictionary again | 236 | // containerEnd -- now current object is the dictionary again |
| 272 | // containerEnd -- current object is undefined | 237 | // containerEnd -- current object is undefined |
| 273 | // | 238 | // |
| 274 | - // If the top-level item in a JSON string is a scalar, the | ||
| 275 | - // topLevelScalar() method will be called. No argument is | ||
| 276 | - // passed since the object is the same as what is returned by | 239 | + // If the top-level item in a JSON string is a scalar, the topLevelScalar() method will be |
| 240 | + // called. No argument is passed since the object is the same as what is returned by | ||
| 277 | // parse(). | 241 | // parse(). |
| 278 | 242 | ||
| 279 | QPDF_DLL | 243 | QPDF_DLL |
| @@ -287,21 +251,17 @@ class JSON | @@ -287,21 +251,17 @@ class JSON | ||
| 287 | 251 | ||
| 288 | // Item methods: | 252 | // Item methods: |
| 289 | // | 253 | // |
| 290 | - // The return value of the item methods indicate whether the | ||
| 291 | - // item has been "consumed". If the item method returns true, | ||
| 292 | - // then the item will not be added to the containing JSON | 254 | + // The return value of the item methods indicate whether the item has been "consumed". If |
| 255 | + // the item method returns true, then the item will not be added to the containing JSON | ||
| 293 | // object. This is what allows arbitrarily large JSON objects | 256 | // object. This is what allows arbitrarily large JSON objects |
| 294 | // to be parsed and not have to be kept in memory. | 257 | // to be parsed and not have to be kept in memory. |
| 295 | // | 258 | // |
| 296 | - // NOTE: When a dictionary or an array is added to a | ||
| 297 | - // container, the dictionaryItem or arrayItem method is called | ||
| 298 | - // when the child item's start delimiter is encountered, so | ||
| 299 | - // the JSON object passed in at that time will always be in | ||
| 300 | - // its initial, empty state. Additionally, the child item's | ||
| 301 | - // start method is not called until after the parent item's | ||
| 302 | - // item method is called. This makes it possible to keep track | ||
| 303 | - // of the current depth level by incrementing level on start | ||
| 304 | - // methods and decrementing on end methods. | 259 | + // NOTE: When a dictionary or an array is added to a container, the dictionaryItem or |
| 260 | + // arrayItem method is called when the child item's start delimiter is encountered, so the | ||
| 261 | + // JSON object passed in at that time will always be in its initial, empty state. | ||
| 262 | + // Additionally, the child item's start method is not called until after the parent item's | ||
| 263 | + // item method is called. This makes it possible to keep track of the current depth level by | ||
| 264 | + // incrementing level on start methods and decrementing on end methods. | ||
| 305 | 265 | ||
| 306 | QPDF_DLL | 266 | QPDF_DLL |
| 307 | virtual bool dictionaryItem(std::string const& key, JSON const& value) = 0; | 267 | virtual bool dictionaryItem(std::string const& key, JSON const& value) = 0; |
| @@ -312,14 +272,13 @@ class JSON | @@ -312,14 +272,13 @@ class JSON | ||
| 312 | // Create a JSON object from a string. | 272 | // Create a JSON object from a string. |
| 313 | QPDF_DLL | 273 | QPDF_DLL |
| 314 | static JSON parse(std::string const&); | 274 | static JSON parse(std::string const&); |
| 315 | - // Create a JSON object from an input source. See above for | ||
| 316 | - // information about how to use the Reactor. | 275 | + // Create a JSON object from an input source. See above for information about how to use the |
| 276 | + // Reactor. | ||
| 317 | QPDF_DLL | 277 | QPDF_DLL |
| 318 | static JSON parse(InputSource&, Reactor* reactor = nullptr); | 278 | static JSON parse(InputSource&, Reactor* reactor = nullptr); |
| 319 | 279 | ||
| 320 | - // parse calls setOffsets to set the inclusive start and | ||
| 321 | - // non-inclusive end offsets of an object relative to its input | ||
| 322 | - // string. Otherwise, both values are 0. | 280 | + // parse calls setOffsets to set the inclusive start and non-inclusive end offsets of an object |
| 281 | + // relative to its input string. Otherwise, both values are 0. | ||
| 323 | QPDF_DLL | 282 | QPDF_DLL |
| 324 | void setStart(qpdf_offset_t); | 283 | void setStart(qpdf_offset_t); |
| 325 | QPDF_DLL | 284 | QPDF_DLL |
include/qpdf/Pipeline.hh
| @@ -2,44 +2,36 @@ | @@ -2,44 +2,36 @@ | ||
| 2 | // | 2 | // |
| 3 | // This file is part of qpdf. | 3 | // This file is part of qpdf. |
| 4 | // | 4 | // |
| 5 | -// Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 6 | -// you may not use this file except in compliance with the License. | ||
| 7 | -// You may obtain a copy of the License at | 5 | +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
| 6 | +// in compliance with the License. You may obtain a copy of the License at | ||
| 8 | // | 7 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 | 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // | 9 | // |
| 11 | -// Unless required by applicable law or agreed to in writing, software | ||
| 12 | -// distributed under the License is distributed on an "AS IS" BASIS, | ||
| 13 | -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 14 | -// See the License for the specific language governing permissions and | ||
| 15 | -// limitations under the License. | 10 | +// Unless required by applicable law or agreed to in writing, software distributed under the License |
| 11 | +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
| 12 | +// or implied. See the License for the specific language governing permissions and limitations under | ||
| 13 | +// the License. | ||
| 16 | // | 14 | // |
| 17 | -// Versions of qpdf prior to version 7 were released under the terms | ||
| 18 | -// of version 2.0 of the Artistic License. At your option, you may | ||
| 19 | -// continue to consider qpdf to be licensed under those terms. Please | ||
| 20 | -// see the manual for additional information. | 15 | +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic |
| 16 | +// License. At your option, you may continue to consider qpdf to be licensed under those terms. | ||
| 17 | +// Please see the manual for additional information. | ||
| 21 | 18 | ||
| 22 | -// Generalized Pipeline interface. By convention, subclasses of | ||
| 23 | -// Pipeline are called Pl_Something. | 19 | +// Generalized Pipeline interface. By convention, subclasses of Pipeline are called Pl_Something. |
| 24 | // | 20 | // |
| 25 | -// When an instance of Pipeline is created with a pointer to a next | ||
| 26 | -// pipeline, that pipeline writes its data to the next one when it | ||
| 27 | -// finishes with it. In order to make possible a usage style in which | ||
| 28 | -// a pipeline may be passed to a function which may stick other | ||
| 29 | -// pipelines in front of it, the allocator of a pipeline is | ||
| 30 | -// responsible for its destruction. In other words, one pipeline | ||
| 31 | -// object does not attempt to manage the memory of its successor. | 21 | +// When an instance of Pipeline is created with a pointer to a next pipeline, that pipeline writes |
| 22 | +// its data to the next one when it finishes with it. In order to make possible a usage style in | ||
| 23 | +// which a pipeline may be passed to a function which may stick other pipelines in front of it, the | ||
| 24 | +// allocator of a pipeline is responsible for its destruction. In other words, one pipeline object | ||
| 25 | +// does not attempt to manage the memory of its successor. | ||
| 32 | // | 26 | // |
| 33 | -// The client is required to call finish() before destroying a | ||
| 34 | -// Pipeline in order to avoid loss of data. A Pipeline class should | ||
| 35 | -// not throw an exception in the destructor if this hasn't been done | 27 | +// The client is required to call finish() before destroying a Pipeline in order to avoid loss of |
| 28 | +// data. A Pipeline class should not throw an exception in the destructor if this hasn't been done | ||
| 36 | // though since doing so causes too much trouble when deleting | 29 | // though since doing so causes too much trouble when deleting |
| 37 | // pipelines during error conditions. | 30 | // pipelines during error conditions. |
| 38 | // | 31 | // |
| 39 | -// Some pipelines are reusable (i.e., you can call write() after | ||
| 40 | -// calling finish() and can call finish() multiple times) while others | ||
| 41 | -// are not. It is up to the caller to use a pipeline according to its | ||
| 42 | -// own restrictions. | 32 | +// Some pipelines are reusable (i.e., you can call write() after calling finish() and can call |
| 33 | +// finish() multiple times) while others are not. It is up to the caller to use a pipeline | ||
| 34 | +// according to its own restrictions. | ||
| 43 | 35 | ||
| 44 | #ifndef PIPELINE_HH | 36 | #ifndef PIPELINE_HH |
| 45 | #define PIPELINE_HH | 37 | #define PIPELINE_HH |
| @@ -50,8 +42,8 @@ | @@ -50,8 +42,8 @@ | ||
| 50 | #include <memory> | 42 | #include <memory> |
| 51 | #include <string> | 43 | #include <string> |
| 52 | 44 | ||
| 53 | -// Remember to use QPDF_DLL_CLASS on anything derived from Pipeline so | ||
| 54 | -// it will work with dynamic_cast across the shared object boundary. | 45 | +// Remember to use QPDF_DLL_CLASS on anything derived from Pipeline so it will work with |
| 46 | +// dynamic_cast across the shared object boundary. | ||
| 55 | class QPDF_DLL_CLASS Pipeline | 47 | class QPDF_DLL_CLASS Pipeline |
| 56 | { | 48 | { |
| 57 | public: | 49 | public: |
| @@ -61,9 +53,8 @@ class QPDF_DLL_CLASS Pipeline | @@ -61,9 +53,8 @@ class QPDF_DLL_CLASS Pipeline | ||
| 61 | QPDF_DLL | 53 | QPDF_DLL |
| 62 | virtual ~Pipeline() = default; | 54 | virtual ~Pipeline() = default; |
| 63 | 55 | ||
| 64 | - // Subclasses should implement write and finish to do their jobs | ||
| 65 | - // and then, if they are not end-of-line pipelines, call | ||
| 66 | - // getNext()->write or getNext()->finish. | 56 | + // Subclasses should implement write and finish to do their jobs and then, if they are not |
| 57 | + // end-of-line pipelines, call getNext()->write or getNext()->finish. | ||
| 67 | QPDF_DLL | 58 | QPDF_DLL |
| 68 | virtual void write(unsigned char const* data, size_t len) = 0; | 59 | virtual void write(unsigned char const* data, size_t len) = 0; |
| 69 | QPDF_DLL | 60 | QPDF_DLL |
| @@ -71,17 +62,15 @@ class QPDF_DLL_CLASS Pipeline | @@ -71,17 +62,15 @@ class QPDF_DLL_CLASS Pipeline | ||
| 71 | QPDF_DLL | 62 | QPDF_DLL |
| 72 | std::string getIdentifier() const; | 63 | std::string getIdentifier() const; |
| 73 | 64 | ||
| 74 | - // These are convenience methods for making it easier to write | ||
| 75 | - // certain other types of data to pipelines without having to | ||
| 76 | - // cast. The methods that take char const* expect null-terminated | ||
| 77 | - // C strings and do not write the null terminators. | 65 | + // These are convenience methods for making it easier to write certain other types of data to |
| 66 | + // pipelines without having to cast. The methods that take char const* expect null-terminated C | ||
| 67 | + // strings and do not write the null terminators. | ||
| 78 | QPDF_DLL | 68 | QPDF_DLL |
| 79 | void writeCStr(char const* cstr); | 69 | void writeCStr(char const* cstr); |
| 80 | QPDF_DLL | 70 | QPDF_DLL |
| 81 | void writeString(std::string const&); | 71 | void writeString(std::string const&); |
| 82 | - // This allows *p << "x" << "y" but is not intended to be a | ||
| 83 | - // general purpose << compatible with ostream and does not have | ||
| 84 | - // local awareness or the ability to be "imbued" with properties. | 72 | + // This allows *p << "x" << "y" but is not intended to be a general purpose << compatible with |
| 73 | + // ostream and does not have local awareness or the ability to be "imbued" with properties. | ||
| 85 | QPDF_DLL | 74 | QPDF_DLL |
| 86 | Pipeline& operator<<(char const* cstr); | 75 | Pipeline& operator<<(char const* cstr); |
| 87 | QPDF_DLL | 76 | QPDF_DLL |
include/qpdf/Pl_Buffer.hh
| @@ -2,36 +2,31 @@ | @@ -2,36 +2,31 @@ | ||
| 2 | // | 2 | // |
| 3 | // This file is part of qpdf. | 3 | // This file is part of qpdf. |
| 4 | // | 4 | // |
| 5 | -// Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 6 | -// you may not use this file except in compliance with the License. | ||
| 7 | -// You may obtain a copy of the License at | 5 | +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
| 6 | +// in compliance with the License. You may obtain a copy of the License at | ||
| 8 | // | 7 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 | 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // | 9 | // |
| 11 | -// Unless required by applicable law or agreed to in writing, software | ||
| 12 | -// distributed under the License is distributed on an "AS IS" BASIS, | ||
| 13 | -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 14 | -// See the License for the specific language governing permissions and | ||
| 15 | -// limitations under the License. | 10 | +// Unless required by applicable law or agreed to in writing, software distributed under the License |
| 11 | +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
| 12 | +// or implied. See the License for the specific language governing permissions and limitations under | ||
| 13 | +// the License. | ||
| 16 | // | 14 | // |
| 17 | -// Versions of qpdf prior to version 7 were released under the terms | ||
| 18 | -// of version 2.0 of the Artistic License. At your option, you may | ||
| 19 | -// continue to consider qpdf to be licensed under those terms. Please | ||
| 20 | -// see the manual for additional information. | 15 | +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic |
| 16 | +// License. At your option, you may continue to consider qpdf to be licensed under those terms. | ||
| 17 | +// Please see the manual for additional information. | ||
| 21 | 18 | ||
| 22 | #ifndef PL_BUFFER_HH | 19 | #ifndef PL_BUFFER_HH |
| 23 | #define PL_BUFFER_HH | 20 | #define PL_BUFFER_HH |
| 24 | 21 | ||
| 25 | -// This pipeline accumulates the data passed to it into a memory | ||
| 26 | -// buffer. Each subsequent use of this buffer appends to the data | ||
| 27 | -// accumulated so far. getBuffer() may be called only after calling | ||
| 28 | -// finish() and before calling any subsequent write(). At that point, | ||
| 29 | -// a dynamically allocated Buffer object is returned and the internal | ||
| 30 | -// buffer is reset. The caller is responsible for deleting the | ||
| 31 | -// returned Buffer. | 22 | +// This pipeline accumulates the data passed to it into a memory buffer. Each subsequent use of |
| 23 | +// this buffer appends to the data accumulated so far. getBuffer() may be called only after calling | ||
| 24 | +// finish() and before calling any subsequent write(). At that point, a dynamically allocated | ||
| 25 | +// Buffer object is returned and the internal buffer is reset. The caller is responsible for | ||
| 26 | +// deleting the returned Buffer. | ||
| 32 | // | 27 | // |
| 33 | -// For this pipeline, "next" may be null. If a next pointer is | ||
| 34 | -// provided, this pipeline will also pass the data through to it. | 28 | +// For this pipeline, "next" may be null. If a next pointer is provided, this pipeline will also |
| 29 | +// pass the data through to it. | ||
| 35 | 30 | ||
| 36 | #include <qpdf/Buffer.hh> | 31 | #include <qpdf/Buffer.hh> |
| 37 | #include <qpdf/Pipeline.hh> | 32 | #include <qpdf/Pipeline.hh> |
| @@ -61,12 +56,10 @@ class QPDF_DLL_CLASS Pl_Buffer: public Pipeline | @@ -61,12 +56,10 @@ class QPDF_DLL_CLASS Pl_Buffer: public Pipeline | ||
| 61 | QPDF_DLL | 56 | QPDF_DLL |
| 62 | std::shared_ptr<Buffer> getBufferSharedPointer(); | 57 | std::shared_ptr<Buffer> getBufferSharedPointer(); |
| 63 | 58 | ||
| 64 | - // getMallocBuffer behaves in the same was as getBuffer except the | ||
| 65 | - // buffer is allocated with malloc(), making it suitable for use | ||
| 66 | - // when calling from other languages. If there is no data, *buf is | ||
| 67 | - // set to a null pointer and *len is set to 0. Otherwise, *buf is | ||
| 68 | - // a buffer of size *len allocated with malloc(). It is the | ||
| 69 | - // caller's responsibility to call free() on the buffer. | 59 | + // getMallocBuffer behaves in the same was as getBuffer except the buffer is allocated with |
| 60 | + // malloc(), making it suitable for use when calling from other languages. If there is no data, | ||
| 61 | + // *buf is set to a null pointer and *len is set to 0. Otherwise, *buf is a buffer of size *len | ||
| 62 | + // allocated with malloc(). It is the caller's responsibility to call free() on the buffer. | ||
| 70 | QPDF_DLL | 63 | QPDF_DLL |
| 71 | void getMallocBuffer(unsigned char** buf, size_t* len); | 64 | void getMallocBuffer(unsigned char** buf, size_t* len); |
| 72 | 65 |
include/qpdf/QPDF.hh
| @@ -2,22 +2,19 @@ | @@ -2,22 +2,19 @@ | ||
| 2 | // | 2 | // |
| 3 | // This file is part of qpdf. | 3 | // This file is part of qpdf. |
| 4 | // | 4 | // |
| 5 | -// Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 6 | -// you may not use this file except in compliance with the License. | ||
| 7 | -// You may obtain a copy of the License at | 5 | +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
| 6 | +// in compliance with the License. You may obtain a copy of the License at | ||
| 8 | // | 7 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 | 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // | 9 | // |
| 11 | -// Unless required by applicable law or agreed to in writing, software | ||
| 12 | -// distributed under the License is distributed on an "AS IS" BASIS, | ||
| 13 | -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 14 | -// See the License for the specific language governing permissions and | ||
| 15 | -// limitations under the License. | 10 | +// Unless required by applicable law or agreed to in writing, software distributed under the License |
| 11 | +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
| 12 | +// or implied. See the License for the specific language governing permissions and limitations under | ||
| 13 | +// the License. | ||
| 16 | // | 14 | // |
| 17 | -// Versions of qpdf prior to version 7 were released under the terms | ||
| 18 | -// of version 2.0 of the Artistic License. At your option, you may | ||
| 19 | -// continue to consider qpdf to be licensed under those terms. Please | ||
| 20 | -// see the manual for additional information. | 15 | +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic |
| 16 | +// License. At your option, you may continue to consider qpdf to be licensed under those terms. | ||
| 17 | +// Please see the manual for additional information. | ||
| 21 | 18 | ||
| 22 | #ifndef QPDF_HH | 19 | #ifndef QPDF_HH |
| 23 | #define QPDF_HH | 20 | #define QPDF_HH |
| @@ -55,8 +52,7 @@ class QPDFParser; | @@ -55,8 +52,7 @@ class QPDFParser; | ||
| 55 | class QPDF | 52 | class QPDF |
| 56 | { | 53 | { |
| 57 | public: | 54 | public: |
| 58 | - // Get the current version of the QPDF software. See also | ||
| 59 | - // qpdf/DLL.h | 55 | + // Get the current version of the QPDF software. See also qpdf/DLL.h |
| 60 | QPDF_DLL | 56 | QPDF_DLL |
| 61 | static std::string const& QPDFVersion(); | 57 | static std::string const& QPDFVersion(); |
| 62 | 58 | ||
| @@ -68,92 +64,74 @@ class QPDF | @@ -68,92 +64,74 @@ class QPDF | ||
| 68 | QPDF_DLL | 64 | QPDF_DLL |
| 69 | static std::shared_ptr<QPDF> create(); | 65 | static std::shared_ptr<QPDF> create(); |
| 70 | 66 | ||
| 71 | - // Associate a file with a QPDF object and do initial parsing of | ||
| 72 | - // the file. PDF objects are not read until they are needed. A | ||
| 73 | - // QPDF object may be associated with only one file in its | ||
| 74 | - // lifetime. This method must be called before any methods that | ||
| 75 | - // potentially ask for information about the PDF file are called. | ||
| 76 | - // Prior to calling this, the only methods that are allowed are | ||
| 77 | - // those that set parameters. If the input file is not | ||
| 78 | - // encrypted,either a null password or an empty password can be | ||
| 79 | - // used. If the file is encrypted, either the user password or | ||
| 80 | - // the owner password may be supplied. The method | ||
| 81 | - // setPasswordIsHexKey may be called prior to calling this method | ||
| 82 | - // or any of the other process methods to force the password to be | ||
| 83 | - // interpreted as a raw encryption key. See comments on | ||
| 84 | - // setPasswordIsHexKey for more information. | 67 | + // Associate a file with a QPDF object and do initial parsing of the file. PDF objects are not |
| 68 | + // read until they are needed. A QPDF object may be associated with only one file in its | ||
| 69 | + // lifetime. This method must be called before any methods that potentially ask for information | ||
| 70 | + // about the PDF file are called. Prior to calling this, the only methods that are allowed are | ||
| 71 | + // those that set parameters. If the input file is not encrypted,either a null password or an | ||
| 72 | + // empty password can be used. If the file is encrypted, either the user password or the owner | ||
| 73 | + // password may be supplied. The method setPasswordIsHexKey may be called prior to calling this | ||
| 74 | + // method or any of the other process methods to force the password to be interpreted as a raw | ||
| 75 | + // encryption key. See comments on setPasswordIsHexKey for more information. | ||
| 85 | QPDF_DLL | 76 | QPDF_DLL |
| 86 | void processFile(char const* filename, char const* password = nullptr); | 77 | void processFile(char const* filename, char const* password = nullptr); |
| 87 | 78 | ||
| 88 | - // Parse a PDF from a stdio FILE*. The FILE must be open in | ||
| 89 | - // binary mode and must be seekable. It may be open read only. | ||
| 90 | - // This works exactly like processFile except that the PDF file is | ||
| 91 | - // read from an already opened FILE*. If close_file is true, the | ||
| 92 | - // file will be closed at the end. Otherwise, the caller is | ||
| 93 | - // responsible for closing the file. | 79 | + // Parse a PDF from a stdio FILE*. The FILE must be open in binary mode and must be seekable. |
| 80 | + // It may be open read only. This works exactly like processFile except that the PDF file is | ||
| 81 | + // read from an already opened FILE*. If close_file is true, the file will be closed at the | ||
| 82 | + // end. Otherwise, the caller is responsible for closing the file. | ||
| 94 | QPDF_DLL | 83 | QPDF_DLL |
| 95 | void processFile( | 84 | void processFile( |
| 96 | char const* description, FILE* file, bool close_file, char const* password = nullptr); | 85 | char const* description, FILE* file, bool close_file, char const* password = nullptr); |
| 97 | 86 | ||
| 98 | - // Parse a PDF file loaded into a memory buffer. This works | ||
| 99 | - // exactly like processFile except that the PDF file is in memory | ||
| 100 | - // instead of on disk. The description appears in any warning or | 87 | + // Parse a PDF file loaded into a memory buffer. This works exactly like processFile except |
| 88 | + // that the PDF file is in memory instead of on disk. The description appears in any warning or | ||
| 101 | // error message in place of the file name. | 89 | // error message in place of the file name. |
| 102 | QPDF_DLL | 90 | QPDF_DLL |
| 103 | void processMemoryFile( | 91 | void processMemoryFile( |
| 104 | char const* description, char const* buf, size_t length, char const* password = nullptr); | 92 | char const* description, char const* buf, size_t length, char const* password = nullptr); |
| 105 | 93 | ||
| 106 | - // Parse a PDF file loaded from a custom InputSource. If you have | ||
| 107 | - // your own method of retrieving a PDF file, you can subclass | ||
| 108 | - // InputSource and use this method. | 94 | + // Parse a PDF file loaded from a custom InputSource. If you have your own method of retrieving |
| 95 | + // a PDF file, you can subclass InputSource and use this method. | ||
| 109 | QPDF_DLL | 96 | QPDF_DLL |
| 110 | void processInputSource(std::shared_ptr<InputSource>, char const* password = nullptr); | 97 | void processInputSource(std::shared_ptr<InputSource>, char const* password = nullptr); |
| 111 | 98 | ||
| 112 | - // Create a PDF from an input source that contains JSON as written | ||
| 113 | - // by writeJSON (or qpdf --json-output, version 2 or higher). The | ||
| 114 | - // JSON must be a complete representation of a PDF. See "qpdf | ||
| 115 | - // JSON" in the manual for details. The input JSON may be | ||
| 116 | - // arbitrarily large. QPDF does not load stream data into memory | ||
| 117 | - // for more than one stream at a time, even if the stream data is | 99 | + // Create a PDF from an input source that contains JSON as written by writeJSON (or qpdf |
| 100 | + // --json-output, version 2 or higher). The JSON must be a complete representation of a PDF. See | ||
| 101 | + // "qpdf JSON" in the manual for details. The input JSON may be arbitrarily large. QPDF does not | ||
| 102 | + // load stream data into memory for more than one stream at a time, even if the stream data is | ||
| 118 | // specified inline. | 103 | // specified inline. |
| 119 | QPDF_DLL | 104 | QPDF_DLL |
| 120 | void createFromJSON(std::string const& json_file); | 105 | void createFromJSON(std::string const& json_file); |
| 121 | QPDF_DLL | 106 | QPDF_DLL |
| 122 | void createFromJSON(std::shared_ptr<InputSource>); | 107 | void createFromJSON(std::shared_ptr<InputSource>); |
| 123 | 108 | ||
| 124 | - // Update a PDF from an input source that contains JSON in the | ||
| 125 | - // same format as is written by writeJSON (or qpdf --json-output, | ||
| 126 | - // version 2 or higher). Objects in the PDF and not in the JSON | ||
| 127 | - // are not modified. See "qpdf JSON" in the manual for details. As | ||
| 128 | - // with createFromJSON, the input JSON may be arbitrarily large. | 109 | + // Update a PDF from an input source that contains JSON in the same format as is written by |
| 110 | + // writeJSON (or qpdf --json-output, version 2 or higher). Objects in the PDF and not in the | ||
| 111 | + // JSON are not modified. See "qpdf JSON" in the manual for details. As with createFromJSON, the | ||
| 112 | + // input JSON may be arbitrarily large. | ||
| 129 | QPDF_DLL | 113 | QPDF_DLL |
| 130 | void updateFromJSON(std::string const& json_file); | 114 | void updateFromJSON(std::string const& json_file); |
| 131 | QPDF_DLL | 115 | QPDF_DLL |
| 132 | void updateFromJSON(std::shared_ptr<InputSource>); | 116 | void updateFromJSON(std::shared_ptr<InputSource>); |
| 133 | 117 | ||
| 134 | - // Write qpdf JSON format to the pipeline "p". The only supported | ||
| 135 | - // version is 2. The finish() method is not called on the | ||
| 136 | - // pipeline. | 118 | + // Write qpdf JSON format to the pipeline "p". The only supported version is 2. The finish() |
| 119 | + // method is not called on the pipeline. | ||
| 137 | // | 120 | // |
| 138 | - // The decode_level parameter controls which streams are | ||
| 139 | - // uncompressed in the JSON. Use qpdf_dl_none to preserve all | ||
| 140 | - // stream data exactly as it appears in the input. The possible | ||
| 141 | - // values for json_stream_data can be found in qpdf/Constants.h | ||
| 142 | - // and correspond to the --json-stream-data command-line argument. | ||
| 143 | - // If json_stream_data is qpdf_sj_file, file_prefix must be | ||
| 144 | - // specified. Each stream will be written to a file whose path is | ||
| 145 | - // constructed by appending "-nnn" to file_prefix, where "nnn" is | ||
| 146 | - // the object number (not zero-filled). If wanted_objects is | ||
| 147 | - // empty, write all objects. Otherwise, write only objects whose | ||
| 148 | - // keys are in wanted_objects. Keys may be either "trailer" or of | ||
| 149 | - // the form "obj:n n R". Invalid keys are ignored. This | ||
| 150 | - // corresponds to the --json-object command-line argument. | 121 | + // The decode_level parameter controls which streams are uncompressed in the JSON. Use |
| 122 | + // qpdf_dl_none to preserve all stream data exactly as it appears in the input. The possible | ||
| 123 | + // values for json_stream_data can be found in qpdf/Constants.h and correspond to the | ||
| 124 | + // --json-stream-data command-line argument. If json_stream_data is qpdf_sj_file, file_prefix | ||
| 125 | + // must be specified. Each stream will be written to a file whose path is constructed by | ||
| 126 | + // appending "-nnn" to file_prefix, where "nnn" is the object number (not zero-filled). If | ||
| 127 | + // wanted_objects is empty, write all objects. Otherwise, write only objects whose keys are in | ||
| 128 | + // wanted_objects. Keys may be either "trailer" or of the form "obj:n n R". Invalid keys are | ||
| 129 | + // ignored. This corresponds to the --json-object command-line argument. | ||
| 151 | // | 130 | // |
| 152 | - // QPDF is efficient with regard to memory when writing, allowing | ||
| 153 | - // you to write arbitrarily large PDF files to a pipeline. You can | ||
| 154 | - // use a pipeline like Pl_Buffer or Pl_String to capture the JSON | ||
| 155 | - // output in memory, but do so with caution as this will allocate | ||
| 156 | - // enough memory to hold the entire PDF file. | 131 | + // QPDF is efficient with regard to memory when writing, allowing you to write arbitrarily large |
| 132 | + // PDF files to a pipeline. You can use a pipeline like Pl_Buffer or Pl_String to capture the | ||
| 133 | + // JSON output in memory, but do so with caution as this will allocate enough memory to hold the | ||
| 134 | + // entire PDF file. | ||
| 157 | QPDF_DLL | 135 | QPDF_DLL |
| 158 | void writeJSON( | 136 | void writeJSON( |
| 159 | int version, | 137 | int version, |
| @@ -163,17 +141,13 @@ class QPDF | @@ -163,17 +141,13 @@ class QPDF | ||
| 163 | std::string const& file_prefix, | 141 | std::string const& file_prefix, |
| 164 | std::set<std::string> wanted_objects); | 142 | std::set<std::string> wanted_objects); |
| 165 | 143 | ||
| 166 | - // This version of writeJSON enables writing only the "qpdf" key | ||
| 167 | - // of an in-progress dictionary. If the value of "complete" is | ||
| 168 | - // true, a complete JSON object containing only the "qpdf" key is | ||
| 169 | - // written to the pipeline. If the value of "complete" is false, | ||
| 170 | - // the "qpdf" key and its value are written to the pipeline | ||
| 171 | - // assuming that a dictionary is already open. The parameter | ||
| 172 | - // first_key indicates whether this is the first key in an | ||
| 173 | - // in-progress dictionary. It will be set to false by writeJSON. | ||
| 174 | - // The "qpdf" key and value are written as if at depth 1 in a | ||
| 175 | - // prettified JSON output. Remaining arguments are the same as the | ||
| 176 | - // above version. | 144 | + // This version of writeJSON enables writing only the "qpdf" key of an in-progress dictionary. |
| 145 | + // If the value of "complete" is true, a complete JSON object containing only the "qpdf" key is | ||
| 146 | + // written to the pipeline. If the value of "complete" is false, the "qpdf" key and its value | ||
| 147 | + // are written to the pipeline assuming that a dictionary is already open. The parameter | ||
| 148 | + // first_key indicates whether this is the first key in an in-progress dictionary. It will be | ||
| 149 | + // set to false by writeJSON. The "qpdf" key and value are written as if at depth 1 in a | ||
| 150 | + // prettified JSON output. Remaining arguments are the same as the above version. | ||
| 177 | QPDF_DLL | 151 | QPDF_DLL |
| 178 | void writeJSON( | 152 | void writeJSON( |
| 179 | int version, | 153 | int version, |
| @@ -185,169 +159,135 @@ class QPDF | @@ -185,169 +159,135 @@ class QPDF | ||
| 185 | std::string const& file_prefix, | 159 | std::string const& file_prefix, |
| 186 | std::set<std::string> wanted_objects); | 160 | std::set<std::string> wanted_objects); |
| 187 | 161 | ||
| 188 | - // Close or otherwise release the input source. Once this has been | ||
| 189 | - // called, no other methods of qpdf can be called safely except | ||
| 190 | - // for getWarnings and anyWarnings(). After this has been called, | ||
| 191 | - // it is safe to perform operations on the input file such as | ||
| 192 | - // deleting or renaming it. | 162 | + // Close or otherwise release the input source. Once this has been called, no other methods of |
| 163 | + // qpdf can be called safely except for getWarnings and anyWarnings(). After this has been | ||
| 164 | + // called, it is safe to perform operations on the input file such as deleting or renaming it. | ||
| 193 | QPDF_DLL | 165 | QPDF_DLL |
| 194 | void closeInputSource(); | 166 | void closeInputSource(); |
| 195 | 167 | ||
| 196 | - // For certain forensic or investigatory purposes, it may | ||
| 197 | - // sometimes be useful to specify the encryption key directly, | ||
| 198 | - // even though regular PDF applications do not provide a way to do | ||
| 199 | - // this. Calling setPasswordIsHexKey(true) before calling any of | ||
| 200 | - // the process methods will bypass the normal encryption key | ||
| 201 | - // computation or recovery mechanisms and interpret the bytes in | ||
| 202 | - // the password as a hex-encoded encryption key. Note that we | ||
| 203 | - // hex-encode the key because it may contain null bytes and | ||
| 204 | - // therefore can't be represented in a char const*. | 168 | + // For certain forensic or investigatory purposes, it may sometimes be useful to specify the |
| 169 | + // encryption key directly, even though regular PDF applications do not provide a way to do | ||
| 170 | + // this. Calling setPasswordIsHexKey(true) before calling any of the process methods will bypass | ||
| 171 | + // the normal encryption key computation or recovery mechanisms and interpret the bytes in the | ||
| 172 | + // password as a hex-encoded encryption key. Note that we hex-encode the key because it may | ||
| 173 | + // contain null bytes and therefore can't be represented in a char const*. | ||
| 205 | QPDF_DLL | 174 | QPDF_DLL |
| 206 | void setPasswordIsHexKey(bool); | 175 | void setPasswordIsHexKey(bool); |
| 207 | 176 | ||
| 208 | - // Create a QPDF object for an empty PDF. This PDF has no pages | ||
| 209 | - // or objects other than a minimal trailer, a document catalog, | ||
| 210 | - // and a /Pages tree containing zero pages. Pages and other | ||
| 211 | - // objects can be added to the file in the normal way, and the | ||
| 212 | - // trailer and document catalog can be mutated. Calling this | ||
| 213 | - // method is equivalent to calling processFile on an equivalent | ||
| 214 | - // PDF file. See the pdf-create.cc example for a demonstration of | ||
| 215 | - // how to use this method to create a PDF file from scratch. | 177 | + // Create a QPDF object for an empty PDF. This PDF has no pages or objects other than a minimal |
| 178 | + // trailer, a document catalog, and a /Pages tree containing zero pages. Pages and other | ||
| 179 | + // objects can be added to the file in the normal way, and the trailer and document catalog can | ||
| 180 | + // be mutated. Calling this method is equivalent to calling processFile on an equivalent PDF | ||
| 181 | + // file. See the pdf-create.cc example for a demonstration of how to use this method to create | ||
| 182 | + // a PDF file from scratch. | ||
| 216 | QPDF_DLL | 183 | QPDF_DLL |
| 217 | void emptyPDF(); | 184 | void emptyPDF(); |
| 218 | 185 | ||
| 219 | - // From 10.1: register a new filter implementation for a specific | ||
| 220 | - // stream filter. You can add your own implementations for new | ||
| 221 | - // filter types or override existing ones provided by the library. | ||
| 222 | - // Registered stream filters are used for decoding only as you can | ||
| 223 | - // override encoding with stream data providers. For example, you | ||
| 224 | - // could use this method to add support for one of the other filter | ||
| 225 | - // types by using additional third-party libraries that qpdf does | ||
| 226 | - // not presently use. The standard filters are implemented using | ||
| 227 | - // QPDFStreamFilter classes. | 186 | + // From 10.1: register a new filter implementation for a specific stream filter. You can add |
| 187 | + // your own implementations for new filter types or override existing ones provided by the | ||
| 188 | + // library. Registered stream filters are used for decoding only as you can override encoding | ||
| 189 | + // with stream data providers. For example, you could use this method to add support for one of | ||
| 190 | + // the other filter types by using additional third-party libraries that qpdf does not presently | ||
| 191 | + // use. The standard filters are implemented using QPDFStreamFilter classes. | ||
| 228 | QPDF_DLL | 192 | QPDF_DLL |
| 229 | static void registerStreamFilter( | 193 | static void registerStreamFilter( |
| 230 | std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory); | 194 | std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory); |
| 231 | 195 | ||
| 232 | // Parameter settings | 196 | // Parameter settings |
| 233 | 197 | ||
| 234 | - // To capture or redirect output, configure the logger returned by | ||
| 235 | - // getLogger(). By default, all QPDF and QPDFJob objects share the | ||
| 236 | - // global logger. If you need a private logger for some reason, | ||
| 237 | - // pass a new one to setLogger(). See comments in QPDFLogger.hh | ||
| 238 | - // for details on configuring the logger. | 198 | + // To capture or redirect output, configure the logger returned by getLogger(). By default, all |
| 199 | + // QPDF and QPDFJob objects share the global logger. If you need a private logger for some | ||
| 200 | + // reason, pass a new one to setLogger(). See comments in QPDFLogger.hh for details on | ||
| 201 | + // configuring the logger. | ||
| 239 | // | 202 | // |
| 240 | - // Note that no normal QPDF operations generate output to standard | ||
| 241 | - // output, so for applications that just wish to avoid creating | ||
| 242 | - // output for warnings and don't call any check functions, calling | ||
| 243 | - // setSuppressWarnings(true) is sufficient. | 203 | + // Note that no normal QPDF operations generate output to standard output, so for applications |
| 204 | + // that just wish to avoid creating output for warnings and don't call any check functions, | ||
| 205 | + // calling setSuppressWarnings(true) is sufficient. | ||
| 244 | QPDF_DLL | 206 | QPDF_DLL |
| 245 | std::shared_ptr<QPDFLogger> getLogger(); | 207 | std::shared_ptr<QPDFLogger> getLogger(); |
| 246 | QPDF_DLL | 208 | QPDF_DLL |
| 247 | void setLogger(std::shared_ptr<QPDFLogger>); | 209 | void setLogger(std::shared_ptr<QPDFLogger>); |
| 248 | 210 | ||
| 249 | - // This deprecated method is the old way to capture output, but it | ||
| 250 | - // didn't capture all output. See comments above for getLogger and | ||
| 251 | - // setLogger. This will be removed in QPDF 12. For now, it | ||
| 252 | - // configures a private logger, separating this object from the | ||
| 253 | - // default logger, and calls setOutputStreams on that logger. See | ||
| 254 | - // QPDFLogger.hh for additional details. | 211 | + // This deprecated method is the old way to capture output, but it didn't capture all output. |
| 212 | + // See comments above for getLogger and setLogger. This will be removed in QPDF 12. For now, it | ||
| 213 | + // configures a private logger, separating this object from the default logger, and calls | ||
| 214 | + // setOutputStreams on that logger. See QPDFLogger.hh for additional details. | ||
| 255 | [[deprecated("configure logger from getLogger() or call setLogger()")]] QPDF_DLL void | 215 | [[deprecated("configure logger from getLogger() or call setLogger()")]] QPDF_DLL void |
| 256 | setOutputStreams(std::ostream* out_stream, std::ostream* err_stream); | 216 | setOutputStreams(std::ostream* out_stream, std::ostream* err_stream); |
| 257 | 217 | ||
| 258 | - // If true, ignore any cross-reference streams in a hybrid file | ||
| 259 | - // (one that contains both cross-reference streams and | ||
| 260 | - // cross-reference tables). This can be useful for testing to | 218 | + // If true, ignore any cross-reference streams in a hybrid file (one that contains both |
| 219 | + // cross-reference streams and cross-reference tables). This can be useful for testing to | ||
| 261 | // ensure that a hybrid file would work with an older reader. | 220 | // ensure that a hybrid file would work with an older reader. |
| 262 | QPDF_DLL | 221 | QPDF_DLL |
| 263 | void setIgnoreXRefStreams(bool); | 222 | void setIgnoreXRefStreams(bool); |
| 264 | 223 | ||
| 265 | - // By default, any warnings are issued to std::cerr or the error | ||
| 266 | - // stream specified in a call to setOutputStreams as they are | ||
| 267 | - // encountered. If this method is called with a true value, reporting of | ||
| 268 | - // warnings is suppressed. You may still retrieve warnings by | ||
| 269 | - // calling getWarnings. | 224 | + // By default, any warnings are issued to std::cerr or the error stream specified in a call to |
| 225 | + // setOutputStreams as they are encountered. If this method is called with a true value, | ||
| 226 | + // reporting of warnings is suppressed. You may still retrieve warnings by calling getWarnings. | ||
| 270 | QPDF_DLL | 227 | QPDF_DLL |
| 271 | void setSuppressWarnings(bool); | 228 | void setSuppressWarnings(bool); |
| 272 | 229 | ||
| 273 | - // By default, QPDF will try to recover if it finds certain types | ||
| 274 | - // of errors in PDF files. If turned off, it will throw an | ||
| 275 | - // exception on the first such problem it finds without attempting | 230 | + // By default, QPDF will try to recover if it finds certain types of errors in PDF files. If |
| 231 | + // turned off, it will throw an exception on the first such problem it finds without attempting | ||
| 276 | // recovery. | 232 | // recovery. |
| 277 | QPDF_DLL | 233 | QPDF_DLL |
| 278 | void setAttemptRecovery(bool); | 234 | void setAttemptRecovery(bool); |
| 279 | 235 | ||
| 280 | - // Tell other QPDF objects that streams copied from this QPDF need | ||
| 281 | - // to be fully copied when copyForeignObject is called on them. | ||
| 282 | - // Calling setIgnoreXRefStreams(true) on a QPDF object makes it | ||
| 283 | - // possible for the object and its input source to disappear | ||
| 284 | - // before streams copied from it are written with the destination | ||
| 285 | - // QPDF object. Confused? Ordinarily, if you are going to copy | ||
| 286 | - // objects from a source QPDF object to a destination QPDF object | ||
| 287 | - // using copyForeignObject or addPage, the source object's input | ||
| 288 | - // source must stick around until after the destination PDF is | ||
| 289 | - // written. If you call this method on the source QPDF object, it | ||
| 290 | - // sends a signal to the destination object that it must fully | ||
| 291 | - // copy the stream data when copyForeignObject. It will do this by | ||
| 292 | - // making a copy in RAM. Ordinarily the stream data is copied | ||
| 293 | - // lazily to avoid unnecessary duplication of the stream data. | ||
| 294 | - // Note that the stream data is copied into RAM only once | ||
| 295 | - // regardless of how many objects the stream is copied into. The | ||
| 296 | - // result is that, if you called setImmediateCopyFrom(true) on a | ||
| 297 | - // given QPDF object prior to copying any of its streams, you do | ||
| 298 | - // not need to keep it or its input source around after copying | ||
| 299 | - // its objects to another QPDF. This is true even if the source | ||
| 300 | - // streams use StreamDataProvider. Note that this method is called | ||
| 301 | - // on the QPDF object you are copying FROM, not the one you are | ||
| 302 | - // copying to. The reasoning for this is that there's no reason a | ||
| 303 | - // given QPDF may not get objects copied to it from a variety of | ||
| 304 | - // other objects, some transient and some not. Since what's | ||
| 305 | - // relevant is whether the source QPDF is transient, the method | ||
| 306 | - // must be called on the source QPDF, not the destination one. | ||
| 307 | - // This method will make a copy of the stream in RAM, so be | ||
| 308 | - // sure you have enough memory to simultaneously hold all the | ||
| 309 | - // streams you're copying. | 236 | + // Tell other QPDF objects that streams copied from this QPDF need to be fully copied when |
| 237 | + // copyForeignObject is called on them. Calling setIgnoreXRefStreams(true) on a QPDF object | ||
| 238 | + // makes it possible for the object and its input source to disappear before streams copied from | ||
| 239 | + // it are written with the destination QPDF object. Confused? Ordinarily, if you are going to | ||
| 240 | + // copy objects from a source QPDF object to a destination QPDF object using copyForeignObject | ||
| 241 | + // or addPage, the source object's input source must stick around until after the destination | ||
| 242 | + // PDF is written. If you call this method on the source QPDF object, it sends a signal to the | ||
| 243 | + // destination object that it must fully copy the stream data when copyForeignObject. It will do | ||
| 244 | + // this by making a copy in RAM. Ordinarily the stream data is copied lazily to avoid | ||
| 245 | + // unnecessary duplication of the stream data. Note that the stream data is copied into RAM only | ||
| 246 | + // once regardless of how many objects the stream is copied into. The result is that, if you | ||
| 247 | + // called setImmediateCopyFrom(true) on a given QPDF object prior to copying any of its streams, | ||
| 248 | + // you do not need to keep it or its input source around after copying its objects to another | ||
| 249 | + // QPDF. This is true even if the source streams use StreamDataProvider. Note that this method | ||
| 250 | + // is called on the QPDF object you are copying FROM, not the one you are copying to. The | ||
| 251 | + // reasoning for this is that there's no reason a given QPDF may not get objects copied to it | ||
| 252 | + // from a variety of other objects, some transient and some not. Since what's relevant is | ||
| 253 | + // whether the source QPDF is transient, the method must be called on the source QPDF, not the | ||
| 254 | + // destination one. This method will make a copy of the stream in RAM, so be sure you have | ||
| 255 | + // enough memory to simultaneously hold all the streams you're copying. | ||
| 310 | QPDF_DLL | 256 | QPDF_DLL |
| 311 | void setImmediateCopyFrom(bool); | 257 | void setImmediateCopyFrom(bool); |
| 312 | 258 | ||
| 313 | // Other public methods | 259 | // Other public methods |
| 314 | 260 | ||
| 315 | - // Return the list of warnings that have been issued so far and | ||
| 316 | - // clear the list. This method may be called even if processFile | ||
| 317 | - // throws an exception. Note that if setSuppressWarnings was not | ||
| 318 | - // called or was called with a false value, any warnings retrieved | ||
| 319 | - // here will have already been output. | 261 | + // Return the list of warnings that have been issued so far and clear the list. This method may |
| 262 | + // be called even if processFile throws an exception. Note that if setSuppressWarnings was not | ||
| 263 | + // called or was called with a false value, any warnings retrieved here will have already been | ||
| 264 | + // output. | ||
| 320 | QPDF_DLL | 265 | QPDF_DLL |
| 321 | std::vector<QPDFExc> getWarnings(); | 266 | std::vector<QPDFExc> getWarnings(); |
| 322 | 267 | ||
| 323 | - // Indicate whether any warnings have been issued so far. Does not | ||
| 324 | - // clear the list of warnings. | 268 | + // Indicate whether any warnings have been issued so far. Does not clear the list of warnings. |
| 325 | QPDF_DLL | 269 | QPDF_DLL |
| 326 | bool anyWarnings() const; | 270 | bool anyWarnings() const; |
| 327 | 271 | ||
| 328 | - // Indicate the number of warnings that have been issued since the last | ||
| 329 | - // call to getWarnings. Does not clear the list of warnings. | 272 | + // Indicate the number of warnings that have been issued since the last call to getWarnings. |
| 273 | + // Does not clear the list of warnings. | ||
| 330 | QPDF_DLL | 274 | QPDF_DLL |
| 331 | size_t numWarnings() const; | 275 | size_t numWarnings() const; |
| 332 | 276 | ||
| 333 | - // Return an application-scoped unique ID for this QPDF object. | ||
| 334 | - // This is not a globally unique ID. It is constructed using a | ||
| 335 | - // timestamp and a random number and is intended to be unique | ||
| 336 | - // among QPDF objects that are created by a single run of an | ||
| 337 | - // application. While it's very likely that these are actually | ||
| 338 | - // globally unique, it is not recommended to use them for | ||
| 339 | - // long-term purposes. | 277 | + // Return an application-scoped unique ID for this QPDF object. This is not a globally unique |
| 278 | + // ID. It is constructed using a timestamp and a random number and is intended to be unique | ||
| 279 | + // among QPDF objects that are created by a single run of an application. While it's very likely | ||
| 280 | + // that these are actually globally unique, it is not recommended to use them for long-term | ||
| 281 | + // purposes. | ||
| 340 | QPDF_DLL | 282 | QPDF_DLL |
| 341 | unsigned long long getUniqueId() const; | 283 | unsigned long long getUniqueId() const; |
| 342 | 284 | ||
| 343 | - // Issue a warning on behalf of this QPDF object. It will be | ||
| 344 | - // emitted with other warnings, following warning suppression | ||
| 345 | - // rules, and it will be available with getWarnings(). | 285 | + // Issue a warning on behalf of this QPDF object. It will be emitted with other warnings, |
| 286 | + // following warning suppression rules, and it will be available with getWarnings(). | ||
| 346 | QPDF_DLL | 287 | QPDF_DLL |
| 347 | void warn(QPDFExc const& e); | 288 | void warn(QPDFExc const& e); |
| 348 | - // Same as above but creates the QPDFExc object using the | ||
| 349 | - // arguments passed to warn. The filename argument to QPDFExc is | ||
| 350 | - // omitted. This method uses the filename associated with the QPDF | 289 | + // Same as above but creates the QPDFExc object using the arguments passed to warn. The filename |
| 290 | + // argument to QPDFExc is omitted. This method uses the filename associated with the QPDF | ||
| 351 | // object. | 291 | // object. |
| 352 | QPDF_DLL | 292 | QPDF_DLL |
| 353 | void warn( | 293 | void warn( |
| @@ -376,60 +316,48 @@ class QPDF | @@ -376,60 +316,48 @@ class QPDF | ||
| 376 | 316 | ||
| 377 | // Public factory methods | 317 | // Public factory methods |
| 378 | 318 | ||
| 379 | - // Create a new stream. A subsequent call must be made to | ||
| 380 | - // replaceStreamData() to provide data for the stream. The stream's | ||
| 381 | - // dictionary may be retrieved by calling getDict(), and the resulting | ||
| 382 | - // dictionary may be modified. Alternatively, you can create a new | ||
| 383 | - // dictionary and call replaceDict to install it. | 319 | + // Create a new stream. A subsequent call must be made to replaceStreamData() to provide data |
| 320 | + // for the stream. The stream's dictionary may be retrieved by calling getDict(), and the | ||
| 321 | + // resulting dictionary may be modified. Alternatively, you can create a new dictionary and | ||
| 322 | + // call replaceDict to install it. | ||
| 384 | QPDF_DLL | 323 | QPDF_DLL |
| 385 | QPDFObjectHandle newStream(); | 324 | QPDFObjectHandle newStream(); |
| 386 | 325 | ||
| 387 | - // Create a new stream. Use the given buffer as the stream data. The | ||
| 388 | - // stream dictionary's /Length key will automatically be set to the size of | ||
| 389 | - // the data buffer. If additional keys are required, the stream's | ||
| 390 | - // dictionary may be retrieved by calling getDict(), and the resulting | ||
| 391 | - // dictionary may be modified. This method is just a convenient wrapper | ||
| 392 | - // around the newStream() and replaceStreamData(). It is a convenience | ||
| 393 | - // methods for streams that require no parameters beyond the stream length. | ||
| 394 | - // Note that you don't have to deal with compression yourself if you use | ||
| 395 | - // QPDFWriter. By default, QPDFWriter will automatically compress | ||
| 396 | - // uncompressed stream data. Example programs are provided that | ||
| 397 | - // illustrate this. | 326 | + // Create a new stream. Use the given buffer as the stream data. The stream dictionary's |
| 327 | + // /Length key will automatically be set to the size of the data buffer. If additional keys are | ||
| 328 | + // required, the stream's dictionary may be retrieved by calling getDict(), and the resulting | ||
| 329 | + // dictionary may be modified. This method is just a convenient wrapper around the newStream() | ||
| 330 | + // and replaceStreamData(). It is a convenience methods for streams that require no parameters | ||
| 331 | + // beyond the stream length. Note that you don't have to deal with compression yourself if you | ||
| 332 | + // use QPDFWriter. By default, QPDFWriter will automatically compress uncompressed stream data. | ||
| 333 | + // Example programs are provided that illustrate this. | ||
| 398 | QPDF_DLL | 334 | QPDF_DLL |
| 399 | QPDFObjectHandle newStream(std::shared_ptr<Buffer> data); | 335 | QPDFObjectHandle newStream(std::shared_ptr<Buffer> data); |
| 400 | 336 | ||
| 401 | - // Create new stream with data from string. This method will | ||
| 402 | - // create a copy of the data rather than using the user-provided | ||
| 403 | - // buffer as in the std::shared_ptr<Buffer> version of newStream. | 337 | + // Create new stream with data from string. This method will create a copy of the data rather |
| 338 | + // than using the user-provided buffer as in the std::shared_ptr<Buffer> version of newStream. | ||
| 404 | QPDF_DLL | 339 | QPDF_DLL |
| 405 | QPDFObjectHandle newStream(std::string const& data); | 340 | QPDFObjectHandle newStream(std::string const& data); |
| 406 | 341 | ||
| 407 | - // A reserved object is a special sentinel used for qpdf to | ||
| 408 | - // reserve a spot for an object that is going to be added to the | ||
| 409 | - // QPDF object. Normally you don't have to use this type since | ||
| 410 | - // you can just call QPDF::makeIndirectObject. However, in some | ||
| 411 | - // cases, if you have to create objects with circular references, | ||
| 412 | - // you may need to create a reserved object so that you can have a | ||
| 413 | - // reference to it and then replace the object later. Reserved | ||
| 414 | - // objects have the special property that they can't be resolved | ||
| 415 | - // to direct objects. This makes it possible to replace a | ||
| 416 | - // reserved object with a new object while preserving existing | ||
| 417 | - // references to them. When you are ready to replace a reserved | ||
| 418 | - // object with its replacement, use QPDF::replaceReserved for this | ||
| 419 | - // purpose rather than the more general QPDF::replaceObject. It | ||
| 420 | - // is an error to try to write a QPDF with QPDFWriter if it has | ||
| 421 | - // any reserved objects in it. | 342 | + // A reserved object is a special sentinel used for qpdf to reserve a spot for an object that is |
| 343 | + // going to be added to the QPDF object. Normally you don't have to use this type since you can | ||
| 344 | + // just call QPDF::makeIndirectObject. However, in some cases, if you have to create objects | ||
| 345 | + // with circular references, you may need to create a reserved object so that you can have a | ||
| 346 | + // reference to it and then replace the object later. Reserved objects have the special | ||
| 347 | + // property that they can't be resolved to direct objects. This makes it possible to replace a | ||
| 348 | + // reserved object with a new object while preserving existing references to them. When you are | ||
| 349 | + // ready to replace a reserved object with its replacement, use QPDF::replaceReserved for this | ||
| 350 | + // purpose rather than the more general QPDF::replaceObject. It is an error to try to write a | ||
| 351 | + // QPDF with QPDFWriter if it has any reserved objects in it. | ||
| 422 | QPDF_DLL | 352 | QPDF_DLL |
| 423 | QPDFObjectHandle newReserved(); | 353 | QPDFObjectHandle newReserved(); |
| 424 | 354 | ||
| 425 | - // Install this object handle as an indirect object and return an | ||
| 426 | - // indirect reference to it. | 355 | + // Install this object handle as an indirect object and return an indirect reference to it. |
| 427 | QPDF_DLL | 356 | QPDF_DLL |
| 428 | QPDFObjectHandle makeIndirectObject(QPDFObjectHandle); | 357 | QPDFObjectHandle makeIndirectObject(QPDFObjectHandle); |
| 429 | 358 | ||
| 430 | - // Retrieve an object by object ID and generation. Returns an | ||
| 431 | - // indirect reference to it. The getObject() methods were added | ||
| 432 | - // for qpdf 11. | 359 | + // Retrieve an object by object ID and generation. Returns an indirect reference to it. The |
| 360 | + // getObject() methods were added for qpdf 11. | ||
| 433 | QPDF_DLL | 361 | QPDF_DLL |
| 434 | QPDFObjectHandle getObject(QPDFObjGen const&); | 362 | QPDFObjectHandle getObject(QPDFObjGen const&); |
| 435 | QPDF_DLL | 363 | QPDF_DLL |
| @@ -441,83 +369,63 @@ class QPDF | @@ -441,83 +369,63 @@ class QPDF | ||
| 441 | QPDF_DLL | 369 | QPDF_DLL |
| 442 | QPDFObjectHandle getObjectByID(int objid, int generation); | 370 | QPDFObjectHandle getObjectByID(int objid, int generation); |
| 443 | 371 | ||
| 444 | - // Replace the object with the given object id with the given | ||
| 445 | - // object. The object handle passed in must be a direct object, | ||
| 446 | - // though it may contain references to other indirect objects | ||
| 447 | - // within it. Prior to qpdf 10.2.1, after calling this method, | ||
| 448 | - // existing QPDFObjectHandle instances that pointed to the | ||
| 449 | - // original object still pointed to the original object, resulting | ||
| 450 | - // in confusing and incorrect behavior. This was fixed in 10.2.1, | ||
| 451 | - // so existing QPDFObjectHandle objects will start pointing to the | ||
| 452 | - // newly replaced object. Note that replacing an object with | ||
| 453 | - // QPDFObjectHandle::newNull() effectively removes the object from | ||
| 454 | - // the file since a non-existent object is treated as a null | ||
| 455 | - // object. To replace a reserved object, call replaceReserved | 372 | + // Replace the object with the given object id with the given object. The object handle passed |
| 373 | + // in must be a direct object, though it may contain references to other indirect objects within | ||
| 374 | + // it. Prior to qpdf 10.2.1, after calling this method, existing QPDFObjectHandle instances that | ||
| 375 | + // pointed to the original object still pointed to the original object, resulting in confusing | ||
| 376 | + // and incorrect behavior. This was fixed in 10.2.1, so existing QPDFObjectHandle objects will | ||
| 377 | + // start pointing to the newly replaced object. Note that replacing an object with | ||
| 378 | + // QPDFObjectHandle::newNull() effectively removes the object from the file since a non-existent | ||
| 379 | + // object is treated as a null object. To replace a reserved object, call replaceReserved | ||
| 456 | // instead. | 380 | // instead. |
| 457 | QPDF_DLL | 381 | QPDF_DLL |
| 458 | void replaceObject(QPDFObjGen const& og, QPDFObjectHandle); | 382 | void replaceObject(QPDFObjGen const& og, QPDFObjectHandle); |
| 459 | QPDF_DLL | 383 | QPDF_DLL |
| 460 | void replaceObject(int objid, int generation, QPDFObjectHandle); | 384 | void replaceObject(int objid, int generation, QPDFObjectHandle); |
| 461 | 385 | ||
| 462 | - // Swap two objects given by ID. Prior to qpdf 10.2.1, existing | ||
| 463 | - // QPDFObjectHandle instances that reference them objects not | ||
| 464 | - // notice the swap, but this was fixed in 10.2.1. | 386 | + // Swap two objects given by ID. Prior to qpdf 10.2.1, existing QPDFObjectHandle instances that |
| 387 | + // reference them objects not notice the swap, but this was fixed in 10.2.1. | ||
| 465 | QPDF_DLL | 388 | QPDF_DLL |
| 466 | void swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2); | 389 | void swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2); |
| 467 | QPDF_DLL | 390 | QPDF_DLL |
| 468 | void swapObjects(int objid1, int generation1, int objid2, int generation2); | 391 | void swapObjects(int objid1, int generation1, int objid2, int generation2); |
| 469 | 392 | ||
| 470 | - // Replace a reserved object. This is a wrapper around | ||
| 471 | - // replaceObject but it guarantees that the underlying object is a | ||
| 472 | - // reserved object. After this call, reserved will be a reference | ||
| 473 | - // to replacement. | 393 | + // Replace a reserved object. This is a wrapper around replaceObject but it guarantees that the |
| 394 | + // underlying object is a reserved object. After this call, reserved will be a reference to | ||
| 395 | + // replacement. | ||
| 474 | QPDF_DLL | 396 | QPDF_DLL |
| 475 | void replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement); | 397 | void replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement); |
| 476 | 398 | ||
| 477 | - // Copy an object from another QPDF to this one. Starting with | ||
| 478 | - // qpdf version 8.3.0, it is no longer necessary to keep the | ||
| 479 | - // original QPDF around after the call to copyForeignObject as | ||
| 480 | - // long as the source of any copied stream data is still | ||
| 481 | - // available. Usually this means you just have to keep the input | ||
| 482 | - // file around, not the QPDF object. The exception to this is if | ||
| 483 | - // you copy a stream that gets its data from a | ||
| 484 | - // QPDFObjectHandle::StreamDataProvider. In this case only, the | ||
| 485 | - // original stream's QPDF object must stick around because the | ||
| 486 | - // QPDF object is itself the source of the original stream data. | ||
| 487 | - // For a more in-depth discussion, please see the TODO file. | ||
| 488 | - // Starting in 8.4.0, you can call setImmediateCopyFrom(true) on | ||
| 489 | - // the SOURCE QPDF object (the one you're copying FROM). If you do | ||
| 490 | - // this prior to copying any of its objects, then neither the | ||
| 491 | - // source QPDF object nor its input source needs to stick around | ||
| 492 | - // at all regardless of the source. The cost is that the stream | ||
| 493 | - // data is copied into RAM at the time copyForeignObject is | 399 | + // Copy an object from another QPDF to this one. Starting with qpdf version 8.3.0, it is no |
| 400 | + // longer necessary to keep the original QPDF around after the call to copyForeignObject as long | ||
| 401 | + // as the source of any copied stream data is still available. Usually this means you just have | ||
| 402 | + // to keep the input file around, not the QPDF object. The exception to this is if you copy a | ||
| 403 | + // stream that gets its data from a QPDFObjectHandle::StreamDataProvider. In this case only, the | ||
| 404 | + // original stream's QPDF object must stick around because the QPDF object is itself the source | ||
| 405 | + // of the original stream data. For a more in-depth discussion, please see the TODO file. | ||
| 406 | + // Starting in 8.4.0, you can call setImmediateCopyFrom(true) on the SOURCE QPDF object (the one | ||
| 407 | + // you're copying FROM). If you do this prior to copying any of its objects, then neither the | ||
| 408 | + // source QPDF object nor its input source needs to stick around at all regardless of the | ||
| 409 | + // source. The cost is that the stream data is copied into RAM at the time copyForeignObject is | ||
| 494 | // called. See setImmediateCopyFrom for more information. | 410 | // called. See setImmediateCopyFrom for more information. |
| 495 | // | 411 | // |
| 496 | - // The return value of this method is an indirect reference to the | ||
| 497 | - // copied object in this file. This method is intended to be used | ||
| 498 | - // to copy non-page objects. To copy page objects, pass the | ||
| 499 | - // foreign page object directly to addPage (or addPageAt). If you | ||
| 500 | - // copy objects that contain references to pages, you should copy | ||
| 501 | - // the pages first using addPage(At). Otherwise references to the | ||
| 502 | - // pages that have not been copied will be replaced with nulls. It | ||
| 503 | - // is possible to use copyForeignObject on page objects if you are | ||
| 504 | - // not going to use them as pages. Doing so copies the object | ||
| 505 | - // normally but does not update the page structure. For example, | ||
| 506 | - // it is a valid use case to use copyForeignObject for a page that | ||
| 507 | - // you are going to turn into a form XObject, though you can also | ||
| 508 | - // use QPDFPageObjectHelper::getFormXObjectForPage for that | ||
| 509 | - // purpose. | 412 | + // The return value of this method is an indirect reference to the copied object in this file. |
| 413 | + // This method is intended to be used to copy non-page objects. To copy page objects, pass the | ||
| 414 | + // foreign page object directly to addPage (or addPageAt). If you copy objects that contain | ||
| 415 | + // references to pages, you should copy the pages first using addPage(At). Otherwise references | ||
| 416 | + // to the pages that have not been copied will be replaced with nulls. It is possible to use | ||
| 417 | + // copyForeignObject on page objects if you are not going to use them as pages. Doing so copies | ||
| 418 | + // the object normally but does not update the page structure. For example, it is a valid use | ||
| 419 | + // case to use copyForeignObject for a page that you are going to turn into a form XObject, | ||
| 420 | + // though you can also use QPDFPageObjectHelper::getFormXObjectForPage for that purpose. | ||
| 510 | // | 421 | // |
| 511 | - // When copying objects with this method, object structure will be | ||
| 512 | - // preserved, so all indirectly referenced indirect objects will | ||
| 513 | - // be copied as well. This includes any circular references that | ||
| 514 | - // may exist. The QPDF object keeps a record of what has already | ||
| 515 | - // been copied, so shared objects will not be copied multiple | ||
| 516 | - // times. This also means that if you mutate an object that has | ||
| 517 | - // already been copied and try to copy it again, it won't work | ||
| 518 | - // since the modified object will not be recopied. Therefore, you | ||
| 519 | - // should do all mutation on the original file that you are going | ||
| 520 | - // to do before you start copying its objects to a new file. | 422 | + // When copying objects with this method, object structure will be preserved, so all indirectly |
| 423 | + // referenced indirect objects will be copied as well. This includes any circular references | ||
| 424 | + // that may exist. The QPDF object keeps a record of what has already been copied, so shared | ||
| 425 | + // objects will not be copied multiple times. This also means that if you mutate an object that | ||
| 426 | + // has already been copied and try to copy it again, it won't work since the modified object | ||
| 427 | + // will not be recopied. Therefore, you should do all mutation on the original file that you | ||
| 428 | + // are going to do before you start copying its objects to a new file. | ||
| 521 | QPDF_DLL | 429 | QPDF_DLL |
| 522 | QPDFObjectHandle copyForeignObject(QPDFObjectHandle foreign); | 430 | QPDFObjectHandle copyForeignObject(QPDFObjectHandle foreign); |
| 523 | 431 | ||
| @@ -633,9 +541,8 @@ class QPDF | @@ -633,9 +541,8 @@ class QPDF | ||
| 633 | QPDF_DLL | 541 | QPDF_DLL |
| 634 | bool allowModifyAll(); | 542 | bool allowModifyAll(); |
| 635 | 543 | ||
| 636 | - // Helper function to trim padding from user password. Calling | ||
| 637 | - // trim_user_password on the result of getPaddedUserPassword gives | ||
| 638 | - // getTrimmedUserPassword's result. | 544 | + // Helper function to trim padding from user password. Calling trim_user_password on the result |
| 545 | + // of getPaddedUserPassword gives getTrimmedUserPassword's result. | ||
| 639 | QPDF_DLL | 546 | QPDF_DLL |
| 640 | static void trim_user_password(std::string& user_password); | 547 | static void trim_user_password(std::string& user_password); |
| 641 | QPDF_DLL | 548 | QPDF_DLL |
| @@ -678,47 +585,40 @@ class QPDF | @@ -678,47 +585,40 @@ class QPDF | ||
| 678 | std::string& OE, | 585 | std::string& OE, |
| 679 | std::string& UE, | 586 | std::string& UE, |
| 680 | std::string& Perms); | 587 | std::string& Perms); |
| 681 | - // Return the full user password as stored in the PDF file. For | ||
| 682 | - // files encrypted with 40-bit or 128-bit keys, the user password | ||
| 683 | - // can be recovered when the file is opened using the owner | ||
| 684 | - // password. This is not possible with newer encryption formats. | ||
| 685 | - // If you are attempting to recover the user password in a | ||
| 686 | - // user-presentable form, call getTrimmedUserPassword() instead. | 588 | + // Return the full user password as stored in the PDF file. For files encrypted with 40-bit or |
| 589 | + // 128-bit keys, the user password can be recovered when the file is opened using the owner | ||
| 590 | + // password. This is not possible with newer encryption formats. If you are attempting to | ||
| 591 | + // recover the user password in a user-presentable form, call getTrimmedUserPassword() instead. | ||
| 687 | QPDF_DLL | 592 | QPDF_DLL |
| 688 | std::string const& getPaddedUserPassword() const; | 593 | std::string const& getPaddedUserPassword() const; |
| 689 | - // Return human-readable form of user password subject to same | ||
| 690 | - // limitations as getPaddedUserPassword(). | 594 | + // Return human-readable form of user password subject to same limitations as |
| 595 | + // getPaddedUserPassword(). | ||
| 691 | QPDF_DLL | 596 | QPDF_DLL |
| 692 | std::string getTrimmedUserPassword() const; | 597 | std::string getTrimmedUserPassword() const; |
| 693 | - // Return the previously computed or retrieved encryption key for | ||
| 694 | - // this file | 598 | + // Return the previously computed or retrieved encryption key for this file |
| 695 | QPDF_DLL | 599 | QPDF_DLL |
| 696 | std::string getEncryptionKey() const; | 600 | std::string getEncryptionKey() const; |
| 697 | - // Remove security restrictions associated with digitally signed | ||
| 698 | - // files. | 601 | + // Remove security restrictions associated with digitally signed files. |
| 699 | QPDF_DLL | 602 | QPDF_DLL |
| 700 | void removeSecurityRestrictions(); | 603 | void removeSecurityRestrictions(); |
| 701 | 604 | ||
| 702 | // Linearization support | 605 | // Linearization support |
| 703 | 606 | ||
| 704 | - // Returns true iff the file starts with a linearization parameter | ||
| 705 | - // dictionary. Does no additional validation. | 607 | + // Returns true iff the file starts with a linearization parameter dictionary. Does no |
| 608 | + // additional validation. | ||
| 706 | QPDF_DLL | 609 | QPDF_DLL |
| 707 | bool isLinearized(); | 610 | bool isLinearized(); |
| 708 | 611 | ||
| 709 | - // Performs various sanity checks on a linearized file. Return | ||
| 710 | - // true if no errors or warnings. Otherwise, return false and | ||
| 711 | - // output errors and warnings to the default output stream | ||
| 712 | - // (std::cout or whatever is configured in the logger). It is | ||
| 713 | - // recommended for linearization errors to be treated as warnings. | 612 | + // Performs various sanity checks on a linearized file. Return true if no errors or warnings. |
| 613 | + // Otherwise, return false and output errors and warnings to the default output stream | ||
| 614 | + // (std::cout or whatever is configured in the logger). It is recommended for linearization | ||
| 615 | + // errors to be treated as warnings. | ||
| 714 | QPDF_DLL | 616 | QPDF_DLL |
| 715 | bool checkLinearization(); | 617 | bool checkLinearization(); |
| 716 | 618 | ||
| 717 | - // Calls checkLinearization() and, if possible, prints normalized | ||
| 718 | - // contents of some of the hints tables to the default output | ||
| 719 | - // stream. Normalization includes adding min values to delta | ||
| 720 | - // values and adjusting offsets based on the location and size of | ||
| 721 | - // the primary hint stream. | 619 | + // Calls checkLinearization() and, if possible, prints normalized contents of some of the hints |
| 620 | + // tables to the default output stream. Normalization includes adding min values to delta values | ||
| 621 | + // and adjusting offsets based on the location and size of the primary hint stream. | ||
| 722 | QPDF_DLL | 622 | QPDF_DLL |
| 723 | void showLinearizationData(); | 623 | void showLinearizationData(); |
| 724 | 624 | ||
| @@ -726,66 +626,52 @@ class QPDF | @@ -726,66 +626,52 @@ class QPDF | ||
| 726 | QPDF_DLL | 626 | QPDF_DLL |
| 727 | void showXRefTable(); | 627 | void showXRefTable(); |
| 728 | 628 | ||
| 729 | - // Starting from qpdf 11.0 user code should not need to call this method. | ||
| 730 | - // Before 11.0 this method was used to detect all indirect references to | ||
| 731 | - // objects that don't exist and resolve them by replacing them with null, | ||
| 732 | - // which is how the PDF spec says to interpret such dangling references. | ||
| 733 | - // This method is called automatically when you try to add any new objects, | ||
| 734 | - // if you call getAllObjects, and before a file is written. The qpdf object | ||
| 735 | - // caches whether it has run this to avoid running it multiple times. | ||
| 736 | - // Before 11.2.1 you could pass true to force it to run again if you had | ||
| 737 | - // explicitly added new objects that may have additional dangling | ||
| 738 | - // references. | 629 | + // Starting from qpdf 11.0 user code should not need to call this method. Before 11.0 this |
| 630 | + // method was used to detect all indirect references to objects that don't exist and resolve | ||
| 631 | + // them by replacing them with null, which is how the PDF spec says to interpret such dangling | ||
| 632 | + // references. This method is called automatically when you try to add any new objects, if you | ||
| 633 | + // call getAllObjects, and before a file is written. The qpdf object caches whether it has run | ||
| 634 | + // this to avoid running it multiple times. Before 11.2.1 you could pass true to force it to run | ||
| 635 | + // again if you had explicitly added new objects that may have additional dangling references. | ||
| 739 | QPDF_DLL | 636 | QPDF_DLL |
| 740 | void fixDanglingReferences(bool force = false); | 637 | void fixDanglingReferences(bool force = false); |
| 741 | 638 | ||
| 742 | - // Return the approximate number of indirect objects. It is | ||
| 743 | - // approximate because not all objects in the file are preserved | ||
| 744 | - // in all cases, and gaps in object numbering are not preserved. | 639 | + // Return the approximate number of indirect objects. It is/ approximate because not all objects |
| 640 | + // in the file are preserved in all cases, and gaps in object numbering are not preserved. | ||
| 745 | QPDF_DLL | 641 | QPDF_DLL |
| 746 | size_t getObjectCount(); | 642 | size_t getObjectCount(); |
| 747 | 643 | ||
| 748 | - // Returns a list of indirect objects for every object in the xref | ||
| 749 | - // table. Useful for discovering objects that are not otherwise | ||
| 750 | - // referenced. | 644 | + // Returns a list of indirect objects for every object in the xref table. Useful for discovering |
| 645 | + // objects that are not otherwise referenced. | ||
| 751 | QPDF_DLL | 646 | QPDF_DLL |
| 752 | std::vector<QPDFObjectHandle> getAllObjects(); | 647 | std::vector<QPDFObjectHandle> getAllObjects(); |
| 753 | 648 | ||
| 754 | - // Optimization support -- see doc/optimization. Implemented in | ||
| 755 | - // QPDF_optimization.cc | ||
| 756 | - | ||
| 757 | - // The object_stream_data map maps from a "compressed" object to | ||
| 758 | - // the object stream that contains it. This enables optimize to | ||
| 759 | - // populate the object <-> user maps with only uncompressed | ||
| 760 | - // objects. If allow_changes is false, an exception will be thrown | ||
| 761 | - // if any changes are made during the optimization process. This | ||
| 762 | - // is available so that the test suite can make sure that a | ||
| 763 | - // linearized file is already optimized. When called in this way, | ||
| 764 | - // optimize() still populates the object <-> user maps. The | ||
| 765 | - // optional skip_stream_parameters parameter, if present, is | ||
| 766 | - // called for each stream object. The function should return 2 if | ||
| 767 | - // optimization should discard /Length, /Filter, and /DecodeParms; | ||
| 768 | - // 1 if it should discard /Length, and 0 if it should preserve all | ||
| 769 | - // keys. This is used by QPDFWriter to avoid creation of dangling | ||
| 770 | - // objects for stream dictionary keys it will be regenerating. | 649 | + // Optimization support -- see doc/optimization. Implemented in QPDF_optimization.cc |
| 650 | + | ||
| 651 | + // The object_stream_data map maps from a "compressed" object to the object stream that contains | ||
| 652 | + // it. This enables optimize to populate the object <-> user maps with only uncompressed | ||
| 653 | + // objects. If allow_changes is false, an exception will be thrown if any changes are made | ||
| 654 | + // during the optimization process. This is available so that the test suite can make sure that | ||
| 655 | + // a linearized file is already optimized. When called in this way, optimize() still populates | ||
| 656 | + // the object <-> user maps. The optional skip_stream_parameters parameter, if present, is | ||
| 657 | + // called for each stream object. The function should return 2 if optimization should discard | ||
| 658 | + // /Length, /Filter, and /DecodeParms; 1 if it should discard /Length, and 0 if it should | ||
| 659 | + // preserve all keys. This is used by QPDFWriter to avoid creation of dangling objects for | ||
| 660 | + // stream dictionary keys it will be regenerating. | ||
| 771 | QPDF_DLL | 661 | QPDF_DLL |
| 772 | void optimize( | 662 | void optimize( |
| 773 | std::map<int, int> const& object_stream_data, | 663 | std::map<int, int> const& object_stream_data, |
| 774 | bool allow_changes = true, | 664 | bool allow_changes = true, |
| 775 | std::function<int(QPDFObjectHandle&)> skip_stream_parameters = nullptr); | 665 | std::function<int(QPDFObjectHandle&)> skip_stream_parameters = nullptr); |
| 776 | 666 | ||
| 777 | - // Traverse page tree return all /Page objects. It also detects | ||
| 778 | - // and resolves cases in which the same /Page object is | ||
| 779 | - // duplicated. For efficiency, this method returns a const | ||
| 780 | - // reference to an internal vector of pages. Calls to addPage, | ||
| 781 | - // addPageAt, and removePage safely update this, but directly | ||
| 782 | - // manipulation of the pages tree or pushing inheritable objects | ||
| 783 | - // to the page level may invalidate it. See comments for | ||
| 784 | - // updateAllPagesCache() for additional notes. Newer code should | ||
| 785 | - // use QPDFPageDocumentHelper::getAllPages instead. The decision | ||
| 786 | - // to expose this internal cache was arguably incorrect, but it is | ||
| 787 | - // being left here for compatibility. It is, however, completely | ||
| 788 | - // safe to use this for files that you are not modifying. | 667 | + // Traverse page tree return all /Page objects. It also detects and resolves cases in which the |
| 668 | + // same /Page object is duplicated. For efficiency, this method returns a const reference to an | ||
| 669 | + // internal vector of pages. Calls to addPage, addPageAt, and removePage safely update this, but | ||
| 670 | + // directly manipulation of the pages tree or pushing inheritable objects to the page level may | ||
| 671 | + // invalidate it. See comments for updateAllPagesCache() for additional notes. Newer code should | ||
| 672 | + // use QPDFPageDocumentHelper::getAllPages instead. The decision to expose this internal cache | ||
| 673 | + // was arguably incorrect, but it is being left here for compatibility. It is, however, | ||
| 674 | + // completely safe to use this for files that you are not modifying. | ||
| 789 | QPDF_DLL | 675 | QPDF_DLL |
| 790 | std::vector<QPDFObjectHandle> const& getAllPages(); | 676 | std::vector<QPDFObjectHandle> const& getAllPages(); |
| 791 | 677 | ||
| @@ -794,40 +680,32 @@ class QPDF | @@ -794,40 +680,32 @@ class QPDF | ||
| 794 | QPDF_DLL | 680 | QPDF_DLL |
| 795 | bool everPushedInheritedAttributesToPages() const; | 681 | bool everPushedInheritedAttributesToPages() const; |
| 796 | 682 | ||
| 797 | - // These methods, given a page object or its object/generation | ||
| 798 | - // number, returns the 0-based index into the array returned by | ||
| 799 | - // getAllPages() for that page. An exception is thrown if the page | ||
| 800 | - // is not found. | 683 | + // These methods, given a page object or its object/generation number, returns the 0-based index |
| 684 | + // into the array returned by getAllPages() for that page. An exception is thrown if the page is | ||
| 685 | + // not found. | ||
| 801 | QPDF_DLL | 686 | QPDF_DLL |
| 802 | int findPage(QPDFObjGen const& og); | 687 | int findPage(QPDFObjGen const& og); |
| 803 | QPDF_DLL | 688 | QPDF_DLL |
| 804 | int findPage(QPDFObjectHandle& page); | 689 | int findPage(QPDFObjectHandle& page); |
| 805 | 690 | ||
| 806 | - // This method synchronizes QPDF's cache of the page structure | ||
| 807 | - // with the actual /Pages tree. If you restrict changes to the | ||
| 808 | - // /Pages tree, including addition, removal, or replacement of | ||
| 809 | - // pages or changes to any /Pages objects, to calls to these page | ||
| 810 | - // handling APIs, you never need to call this method. If you | ||
| 811 | - // modify /Pages structures directly, you must call this method | ||
| 812 | - // afterwards. This method updates the internal list of pages, so | ||
| 813 | - // after calling this method, any previous references returned by | ||
| 814 | - // getAllPages() will be valid again. It also resets any state | ||
| 815 | - // about having pushed inherited attributes in /Pages objects down | ||
| 816 | - // to the pages, so if you add any inheritable attributes to a | ||
| 817 | - // /Pages object, you should also call this method. | 691 | + // This method synchronizes QPDF's cache of the page structure with the actual /Pages tree. If |
| 692 | + // you restrict changes to the /Pages tree, including addition, removal, or replacement of pages | ||
| 693 | + // or changes to any /Pages objects, to calls to these page handling APIs, you never need to | ||
| 694 | + // call this method. If you modify /Pages structures directly, you must call this method | ||
| 695 | + // afterwards. This method updates the internal list of pages, so after calling this method, | ||
| 696 | + // any previous references returned by getAllPages() will be valid again. It also resets any | ||
| 697 | + // state about having pushed inherited attributes in /Pages objects down to the pages, so if you | ||
| 698 | + // add any inheritable attributes to a /Pages object, you should also call this method. | ||
| 818 | QPDF_DLL | 699 | QPDF_DLL |
| 819 | void updateAllPagesCache(); | 700 | void updateAllPagesCache(); |
| 820 | 701 | ||
| 821 | - // Legacy handling API. These methods are not going anywhere, and | ||
| 822 | - // you should feel free to continue using them if it simplifies | ||
| 823 | - // your code. Newer code should make use of QPDFPageDocumentHelper | ||
| 824 | - // instead as future page handling methods will be added there. | ||
| 825 | - // The functionality and specification of these legacy methods is | ||
| 826 | - // identical to the identically named methods there, except that | ||
| 827 | - // these versions use QPDFObjectHandle instead of | ||
| 828 | - // QPDFPageObjectHelper, so please see comments in that file for | ||
| 829 | - // descriptions. There are subtleties you need to know about, so | ||
| 830 | - // please look at the comments there. | 702 | + // Legacy handling API. These methods are not going anywhere, and you should feel free to |
| 703 | + // continue using them if it simplifies your code. Newer code should make use of | ||
| 704 | + // QPDFPageDocumentHelper instead as future page handling methods will be added there. The | ||
| 705 | + // functionality and specification of these legacy methods is identical to the identically named | ||
| 706 | + // methods there, except that these versions use QPDFObjectHandle instead of | ||
| 707 | + // QPDFPageObjectHelper, so please see comments in that file for descriptions. There are | ||
| 708 | + // subtleties you need to know about, so please look at the comments there. | ||
| 831 | QPDF_DLL | 709 | QPDF_DLL |
| 832 | void pushInheritedAttributesToPage(); | 710 | void pushInheritedAttributesToPage(); |
| 833 | QPDF_DLL | 711 | QPDF_DLL |
| @@ -838,8 +716,7 @@ class QPDF | @@ -838,8 +716,7 @@ class QPDF | ||
| 838 | void removePage(QPDFObjectHandle page); | 716 | void removePage(QPDFObjectHandle page); |
| 839 | // End legacy page helpers | 717 | // End legacy page helpers |
| 840 | 718 | ||
| 841 | - // Writer class is restricted to QPDFWriter so that only it can | ||
| 842 | - // call certain methods. | 719 | + // Writer class is restricted to QPDFWriter so that only it can call certain methods. |
| 843 | class Writer | 720 | class Writer |
| 844 | { | 721 | { |
| 845 | friend class QPDFWriter; | 722 | friend class QPDFWriter; |
| @@ -884,8 +761,8 @@ class QPDF | @@ -884,8 +761,8 @@ class QPDF | ||
| 884 | } | 761 | } |
| 885 | }; | 762 | }; |
| 886 | 763 | ||
| 887 | - // The Resolver class is restricted to QPDFObject so that only it | ||
| 888 | - // can resolve indirect references. | 764 | + // The Resolver class is restricted to QPDFObject so that only it can resolve indirect |
| 765 | + // references. | ||
| 889 | class Resolver | 766 | class Resolver |
| 890 | { | 767 | { |
| 891 | friend class QPDFObject; | 768 | friend class QPDFObject; |
| @@ -898,8 +775,7 @@ class QPDF | @@ -898,8 +775,7 @@ class QPDF | ||
| 898 | } | 775 | } |
| 899 | }; | 776 | }; |
| 900 | 777 | ||
| 901 | - // StreamCopier class is restricted to QPDFObjectHandle so it can | ||
| 902 | - // copy stream data. | 778 | + // StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data. |
| 903 | class StreamCopier | 779 | class StreamCopier |
| 904 | { | 780 | { |
| 905 | friend class QPDFObjectHandle; | 781 | friend class QPDFObjectHandle; |
| @@ -974,12 +850,10 @@ class QPDF | @@ -974,12 +850,10 @@ class QPDF | ||
| 974 | static bool test_json_validators(); | 850 | static bool test_json_validators(); |
| 975 | 851 | ||
| 976 | private: | 852 | private: |
| 977 | - // It has never been safe to copy QPDF objects as there is code in | ||
| 978 | - // the library that assumes there are no copies of a QPDF object. | ||
| 979 | - // Copying QPDF objects was not prevented by the API until qpdf | ||
| 980 | - // 11. If you have been copying QPDF objects, use | ||
| 981 | - // std::shared_ptr<QPDF> instead. From qpdf 11, you can use | ||
| 982 | - // QPDF::create to create them. | 853 | + // It has never been safe to copy QPDF objects as there is code in the library that assumes |
| 854 | + // there are no copies of a QPDF object. Copying QPDF objects was not prevented by the API until | ||
| 855 | + // qpdf 11. If you have been copying QPDF objects, use std::shared_ptr<QPDF> instead. From qpdf | ||
| 856 | + // 11, you can use QPDF::create to create them. | ||
| 983 | QPDF(QPDF const&) = delete; | 857 | QPDF(QPDF const&) = delete; |
| 984 | QPDF& operator=(QPDF const&) = delete; | 858 | QPDF& operator=(QPDF const&) = delete; |
| 985 | 859 | ||
| @@ -1200,8 +1074,8 @@ class QPDF | @@ -1200,8 +1074,8 @@ class QPDF | ||
| 1200 | 1074 | ||
| 1201 | // For QPDFWriter: | 1075 | // For QPDFWriter: |
| 1202 | 1076 | ||
| 1203 | - // Get lists of all objects in order according to the part of a | ||
| 1204 | - // linearized file that they belong to. | 1077 | + // Get lists of all objects in order according to the part of a linearized file that they belong |
| 1078 | + // to. | ||
| 1205 | void getLinearizedParts( | 1079 | void getLinearizedParts( |
| 1206 | std::map<int, int> const& object_stream_data, | 1080 | std::map<int, int> const& object_stream_data, |
| 1207 | std::vector<QPDFObjectHandle>& part4, | 1081 | std::vector<QPDFObjectHandle>& part4, |
| @@ -1221,8 +1095,7 @@ class QPDF | @@ -1221,8 +1095,7 @@ class QPDF | ||
| 1221 | // Map object to object stream that contains it | 1095 | // Map object to object stream that contains it |
| 1222 | void getObjectStreamData(std::map<int, int>&); | 1096 | void getObjectStreamData(std::map<int, int>&); |
| 1223 | 1097 | ||
| 1224 | - // Get a list of objects that would be permitted in an object | ||
| 1225 | - // stream. | 1098 | + // Get a list of objects that would be permitted in an object stream. |
| 1226 | std::vector<QPDFObjGen> getCompressibleObjGens(); | 1099 | std::vector<QPDFObjGen> getCompressibleObjGens(); |
| 1227 | 1100 | ||
| 1228 | // methods to support page handling | 1101 | // methods to support page handling |
| @@ -1418,20 +1291,16 @@ class QPDF | @@ -1418,20 +1291,16 @@ class QPDF | ||
| 1418 | qpdf_offset_t H_length; // length of primary hint stream | 1291 | qpdf_offset_t H_length; // length of primary hint stream |
| 1419 | }; | 1292 | }; |
| 1420 | 1293 | ||
| 1421 | - // Computed hint table value data structures. These tables | ||
| 1422 | - // contain the computed values on which the hint table values are | ||
| 1423 | - // based. They exclude things like number of bits and store | ||
| 1424 | - // actual values instead of mins and deltas. File offsets are | ||
| 1425 | - // also absolute rather than being offset by the size of the | ||
| 1426 | - // primary hint table. We populate the hint table structures from | ||
| 1427 | - // these during writing and compare the hint table values with | ||
| 1428 | - // these during validation. We ignore some values for various | ||
| 1429 | - // reasons described in the code. Those values are omitted from | ||
| 1430 | - // these structures. Note also that object numbers are object | ||
| 1431 | - // numbers from the input file, not the output file. | ||
| 1432 | - | ||
| 1433 | - // Naming convention: CHSomething is analogous to HSomething | ||
| 1434 | - // above. "CH" is computed hint. | 1294 | + // Computed hint table value data structures. These tables contain the computed values on which |
| 1295 | + // the hint table values are based. They exclude things like number of bits and store actual | ||
| 1296 | + // values instead of mins and deltas. File offsets are also absolute rather than being offset | ||
| 1297 | + // by the size of the primary hint table. We populate the hint table structures from these | ||
| 1298 | + // during writing and compare the hint table values with these during validation. We ignore | ||
| 1299 | + // some values for various reasons described in the code. Those values are omitted from these | ||
| 1300 | + // structures. Note also that object numbers are object numbers from the input file, not the | ||
| 1301 | + // output file. | ||
| 1302 | + | ||
| 1303 | + // Naming convention: CHSomething is analogous to HSomething above. "CH" is computed hint. | ||
| 1435 | 1304 | ||
| 1436 | struct CHPageOffsetEntry | 1305 | struct CHPageOffsetEntry |
| 1437 | { | 1306 | { |
| @@ -1482,8 +1351,7 @@ class QPDF | @@ -1482,8 +1351,7 @@ class QPDF | ||
| 1482 | 1351 | ||
| 1483 | // No need for CHGeneric -- HGeneric is fine as is. | 1352 | // No need for CHGeneric -- HGeneric is fine as is. |
| 1484 | 1353 | ||
| 1485 | - // Data structures to support optimization -- implemented in | ||
| 1486 | - // QPDF_optimization.cc | 1354 | + // Data structures to support optimization -- implemented in QPDF_optimization.cc |
| 1487 | 1355 | ||
| 1488 | class ObjUser | 1356 | class ObjUser |
| 1489 | { | 1357 | { |
| @@ -1535,8 +1403,7 @@ class QPDF | @@ -1535,8 +1403,7 @@ class QPDF | ||
| 1535 | bool findStartxref(); | 1403 | bool findStartxref(); |
| 1536 | bool findEndstream(); | 1404 | bool findEndstream(); |
| 1537 | 1405 | ||
| 1538 | - // methods to support linearization checking -- implemented in | ||
| 1539 | - // QPDF_linearization.cc | 1406 | + // methods to support linearization checking -- implemented in QPDF_linearization.cc |
| 1540 | void readLinearizationData(); | 1407 | void readLinearizationData(); |
| 1541 | bool checkLinearizationInternal(); | 1408 | bool checkLinearizationInternal(); |
| 1542 | void dumpLinearizationDataInternal(); | 1409 | void dumpLinearizationDataInternal(); |
| @@ -1693,25 +1560,23 @@ class QPDF | @@ -1693,25 +1560,23 @@ class QPDF | ||
| 1693 | bool uncompressed_after_compressed{false}; | 1560 | bool uncompressed_after_compressed{false}; |
| 1694 | bool linearization_warnings{false}; | 1561 | bool linearization_warnings{false}; |
| 1695 | 1562 | ||
| 1696 | - // Linearization parameter dictionary and hint table data: may be | ||
| 1697 | - // read from file or computed prior to writing a linearized file | 1563 | + // Linearization parameter dictionary and hint table data: may be read from file or computed |
| 1564 | + // prior to writing a linearized file | ||
| 1698 | QPDFObjectHandle lindict; | 1565 | QPDFObjectHandle lindict; |
| 1699 | LinParameters linp; | 1566 | LinParameters linp; |
| 1700 | HPageOffset page_offset_hints; | 1567 | HPageOffset page_offset_hints; |
| 1701 | HSharedObject shared_object_hints; | 1568 | HSharedObject shared_object_hints; |
| 1702 | HGeneric outline_hints; | 1569 | HGeneric outline_hints; |
| 1703 | 1570 | ||
| 1704 | - // Computed linearization data: used to populate above tables | ||
| 1705 | - // during writing and to compare with them during validation. | ||
| 1706 | - // c_ means computed. | 1571 | + // Computed linearization data: used to populate above tables during writing and to compare |
| 1572 | + // with them during validation. c_ means computed. | ||
| 1707 | LinParameters c_linp; | 1573 | LinParameters c_linp; |
| 1708 | CHPageOffset c_page_offset_data; | 1574 | CHPageOffset c_page_offset_data; |
| 1709 | CHSharedObject c_shared_object_data; | 1575 | CHSharedObject c_shared_object_data; |
| 1710 | HGeneric c_outline_data; | 1576 | HGeneric c_outline_data; |
| 1711 | 1577 | ||
| 1712 | - // Object ordering data for linearized files: initialized by | ||
| 1713 | - // calculateLinearizationData(). Part numbers refer to the PDF | ||
| 1714 | - // 1.4 specification. | 1578 | + // Object ordering data for linearized files: initialized by calculateLinearizationData(). |
| 1579 | + // Part numbers refer to the PDF 1.4 specification. | ||
| 1715 | std::vector<QPDFObjectHandle> part4; | 1580 | std::vector<QPDFObjectHandle> part4; |
| 1716 | std::vector<QPDFObjectHandle> part6; | 1581 | std::vector<QPDFObjectHandle> part6; |
| 1717 | std::vector<QPDFObjectHandle> part7; | 1582 | std::vector<QPDFObjectHandle> part7; |
| @@ -1723,9 +1588,8 @@ class QPDF | @@ -1723,9 +1588,8 @@ class QPDF | ||
| 1723 | std::map<QPDFObjGen, std::set<ObjUser>> object_to_obj_users; | 1588 | std::map<QPDFObjGen, std::set<ObjUser>> object_to_obj_users; |
| 1724 | }; | 1589 | }; |
| 1725 | 1590 | ||
| 1726 | - // Keep all member variables inside the Members object, which we | ||
| 1727 | - // dynamically allocate. This makes it possible to add new private | ||
| 1728 | - // members without breaking binary compatibility. | 1591 | + // Keep all member variables inside the Members object, which we dynamically allocate. This |
| 1592 | + // makes it possible to add new private members without breaking binary compatibility. | ||
| 1729 | std::shared_ptr<Members> m; | 1593 | std::shared_ptr<Members> m; |
| 1730 | }; | 1594 | }; |
| 1731 | 1595 |
include/qpdf/QPDFAcroFormDocumentHelper.hh
| @@ -2,69 +2,55 @@ | @@ -2,69 +2,55 @@ | ||
| 2 | // | 2 | // |
| 3 | // This file is part of qpdf. | 3 | // This file is part of qpdf. |
| 4 | // | 4 | // |
| 5 | -// Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 6 | -// you may not use this file except in compliance with the License. | ||
| 7 | -// You may obtain a copy of the License at | 5 | +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
| 6 | +// in compliance with the License. You may obtain a copy of the License at | ||
| 8 | // | 7 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 | 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // | 9 | // |
| 11 | -// Unless required by applicable law or agreed to in writing, software | ||
| 12 | -// distributed under the License is distributed on an "AS IS" BASIS, | ||
| 13 | -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 14 | -// See the License for the specific language governing permissions and | ||
| 15 | -// limitations under the License. | 10 | +// Unless required by applicable law or agreed to in writing, software distributed under the License |
| 11 | +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
| 12 | +// or implied. See the License for the specific language governing permissions and limitations under | ||
| 13 | +// the License. | ||
| 16 | // | 14 | // |
| 17 | -// Versions of qpdf prior to version 7 were released under the terms | ||
| 18 | -// of version 2.0 of the Artistic License. At your option, you may | ||
| 19 | -// continue to consider qpdf to be licensed under those terms. Please | ||
| 20 | -// see the manual for additional information. | 15 | +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic |
| 16 | +// License. At your option, you may continue to consider qpdf to be licensed under those terms. | ||
| 17 | +// Please see the manual for additional information. | ||
| 21 | 18 | ||
| 22 | #ifndef QPDFACROFORMDOCUMENTHELPER_HH | 19 | #ifndef QPDFACROFORMDOCUMENTHELPER_HH |
| 23 | #define QPDFACROFORMDOCUMENTHELPER_HH | 20 | #define QPDFACROFORMDOCUMENTHELPER_HH |
| 24 | 21 | ||
| 25 | -// This document helper is intended to help with operations on | ||
| 26 | -// interactive forms. Here are the key things to know: | 22 | +// This document helper is intended to help with operations on interactive forms. Here are the key |
| 23 | +// things to know: | ||
| 27 | 24 | ||
| 28 | -// * The PDF specification talks about interactive forms and also | ||
| 29 | -// about form XObjects. While form XObjects appear in parts of | ||
| 30 | -// interactive forms, this class is concerned about interactive | ||
| 31 | -// forms, not form XObjects. | 25 | +// * The PDF specification talks about interactive forms and also about form XObjects. While form |
| 26 | +// XObjects appear in parts of interactive forms, this class is concerned about interactive forms, | ||
| 27 | +// not form XObjects. | ||
| 32 | // | 28 | // |
| 33 | -// * Interactive forms are discussed in the PDF Specification (ISO PDF | ||
| 34 | -// 32000-1:2008) section 12.7. Also relevant is the section about | ||
| 35 | -// Widget annotations. Annotations are discussed in section 12.5 | ||
| 36 | -// with annotation dictionaries discussed in 12.5.1. Widget | ||
| 37 | -// annotations are discussed specifically in section 12.5.6.19. | 29 | +// * Interactive forms are discussed in the PDF Specification (ISO PDF 32000-1:2008) section 12.7. |
| 30 | +// Also relevant is the section about Widget annotations. Annotations are discussed in | ||
| 31 | +// section 12.5 with annotation dictionaries discussed in 12.5.1. Widget annotations are discussed | ||
| 32 | +// specifically in section 12.5.6.19. | ||
| 38 | // | 33 | // |
| 39 | -// * What you need to know about the structure of interactive forms in | ||
| 40 | -// PDF files: | 34 | +// * What you need to know about the structure of interactive forms in PDF files: |
| 41 | // | 35 | // |
| 42 | -// - The document catalog contains the key "/AcroForm" which | ||
| 43 | -// contains a list of fields. Fields are represented as a tree | ||
| 44 | -// structure much like pages. Nodes in the fields tree may contain | ||
| 45 | -// other fields. Fields may inherit values of many of their | ||
| 46 | -// attributes from ancestors in the tree. | 36 | +// - The document catalog contains the key "/AcroForm" which contains a list of fields. Fields are |
| 37 | +// represented as a tree structure much like pages. Nodes in the fields tree may contain other | ||
| 38 | +// fields. Fields may inherit values of many of their attributes from ancestors in the tree. | ||
| 47 | // | 39 | // |
| 48 | -// - Fields may also have children that are widget annotations. As a | ||
| 49 | -// special case, and a cause of considerable confusion, if a field | ||
| 50 | -// has a single annotation as a child, the annotation dictionary | ||
| 51 | -// may be merged with the field dictionary. In that case, the | ||
| 52 | -// field and the annotation are in the same object. Note that, | ||
| 53 | -// while field dictionary attributes are inherited, annotation | ||
| 54 | -// dictionary attributes are not. | 40 | +// - Fields may also have children that are widget annotations. As a special case, and a cause of |
| 41 | +// considerable confusion, if a field has a single annotation as a child, the annotation | ||
| 42 | +// dictionary may be merged with the field dictionary. In that case, the field and the | ||
| 43 | +// annotation are in the same object. Note that, while field dictionary attributes are | ||
| 44 | +// inherited, annotation dictionary attributes are not. | ||
| 55 | // | 45 | // |
| 56 | -// - A page dictionary contains a key called "/Annots" which | ||
| 57 | -// contains a simple list of annotations. For any given annotation | ||
| 58 | -// of subtype "/Widget", you should encounter that annotation in | ||
| 59 | -// the "/Annots" dictionary of a page, and you should also be able | ||
| 60 | -// to reach it by traversing through the "/AcroForm" dictionary | ||
| 61 | -// from the document catalog. In the simplest case (and also a | ||
| 62 | -// very common case), a form field's widget annotation will be | ||
| 63 | -// merged with the field object, and the object will appear | ||
| 64 | -// directly both under "/Annots" in the page dictionary and under | ||
| 65 | -// "/Fields" in the "/AcroForm" dictionary. In a more complex | ||
| 66 | -// case, you may have to trace through various "/Kids" elements in | ||
| 67 | -// the "/AcroForm" field entry until you find the annotation | 46 | +// - A page dictionary contains a key called "/Annots" which contains a simple list of |
| 47 | +// annotations. For any given annotation of subtype "/Widget", you should encounter that | ||
| 48 | +// annotation in the "/Annots" dictionary of a page, and you should also be able to reach it by | ||
| 49 | +// traversing through the "/AcroForm" dictionary from the document catalog. In the simplest case | ||
| 50 | +// (and also a very common case), a form field's widget annotation will be merged with the field | ||
| 51 | +// object, and the object will appear directly both under "/Annots" in the page dictionary and | ||
| 52 | +// under "/Fields" in the "/AcroForm" dictionary. In a more complex case, you may have to trace | ||
| 53 | +// through various "/Kids" elements in the "/AcroForm" field entry until you find the annotation | ||
| 68 | // dictionary. | 54 | // dictionary. |
| 69 | 55 | ||
| 70 | #include <qpdf/QPDFDocumentHelper.hh> | 56 | #include <qpdf/QPDFDocumentHelper.hh> |
| @@ -87,34 +73,28 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper | @@ -87,34 +73,28 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper | ||
| 87 | QPDF_DLL | 73 | QPDF_DLL |
| 88 | virtual ~QPDFAcroFormDocumentHelper() = default; | 74 | virtual ~QPDFAcroFormDocumentHelper() = default; |
| 89 | 75 | ||
| 90 | - // This class lazily creates an internal cache of the mapping | ||
| 91 | - // among form fields, annotations, and pages. Methods within this | ||
| 92 | - // class preserve the validity of this cache. However, if you | ||
| 93 | - // modify pages' annotation dictionaries, the document's /AcroForm | ||
| 94 | - // dictionary, or any form fields manually in a way that alters | ||
| 95 | - // the association between forms, fields, annotations, and pages, | ||
| 96 | - // it may cause this cache to become invalid. This method marks | ||
| 97 | - // the cache invalid and forces it to be regenerated the next time | ||
| 98 | - // it is needed. | 76 | + // This class lazily creates an internal cache of the mapping among form fields, annotations, |
| 77 | + // and pages. Methods within this class preserve the validity of this cache. However, if you | ||
| 78 | + // modify pages' annotation dictionaries, the document's /AcroForm dictionary, or any form | ||
| 79 | + // fields manually in a way that alters the association between forms, fields, annotations, and | ||
| 80 | + // pages, it may cause this cache to become invalid. This method marks the cache invalid and | ||
| 81 | + // forces it to be regenerated the next time it is needed. | ||
| 99 | QPDF_DLL | 82 | QPDF_DLL |
| 100 | void invalidateCache(); | 83 | void invalidateCache(); |
| 101 | 84 | ||
| 102 | QPDF_DLL | 85 | QPDF_DLL |
| 103 | bool hasAcroForm(); | 86 | bool hasAcroForm(); |
| 104 | 87 | ||
| 105 | - // Add a form field, initializing the document's AcroForm | ||
| 106 | - // dictionary if needed, updating the cache if necessary. Note | ||
| 107 | - // that you are adding fields that are copies of other fields, | ||
| 108 | - // this method may result in multiple fields existing with the | ||
| 109 | - // same qualified name, which can have unexpected side effects. In | ||
| 110 | - // that case, you should use addAndRenameFormFields() instead. | 88 | + // Add a form field, initializing the document's AcroForm dictionary if needed, updating the |
| 89 | + // cache if necessary. Note that you are adding fields that are copies of other fields, this | ||
| 90 | + // method may result in multiple fields existing with the same qualified name, which can have | ||
| 91 | + // unexpected side effects. In that case, you should use addAndRenameFormFields() instead. | ||
| 111 | QPDF_DLL | 92 | QPDF_DLL |
| 112 | void addFormField(QPDFFormFieldObjectHelper); | 93 | void addFormField(QPDFFormFieldObjectHelper); |
| 113 | 94 | ||
| 114 | - // Add a collection of form fields making sure that their fully | ||
| 115 | - // qualified names don't conflict with already present form | ||
| 116 | - // fields. Fields within the collection of new fields that have | ||
| 117 | - // the same name as each other will continue to do so. | 95 | + // Add a collection of form fields making sure that their fully qualified names don't conflict |
| 96 | + // with already present form fields. Fields within the collection of new fields that have the | ||
| 97 | + // same name as each other will continue to do so. | ||
| 118 | QPDF_DLL | 98 | QPDF_DLL |
| 119 | void addAndRenameFormFields(std::vector<QPDFObjectHandle> fields); | 99 | void addAndRenameFormFields(std::vector<QPDFObjectHandle> fields); |
| 120 | 100 | ||
| @@ -122,31 +102,27 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper | @@ -122,31 +102,27 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper | ||
| 122 | QPDF_DLL | 102 | QPDF_DLL |
| 123 | void removeFormFields(std::set<QPDFObjGen> const&); | 103 | void removeFormFields(std::set<QPDFObjGen> const&); |
| 124 | 104 | ||
| 125 | - // Set the name of a field, updating internal records of field | ||
| 126 | - // names. Name should be UTF-8 encoded. | 105 | + // Set the name of a field, updating internal records of field names. Name should be UTF-8 |
| 106 | + // encoded. | ||
| 127 | QPDF_DLL | 107 | QPDF_DLL |
| 128 | void setFormFieldName(QPDFFormFieldObjectHelper, std::string const& name); | 108 | void setFormFieldName(QPDFFormFieldObjectHelper, std::string const& name); |
| 129 | 109 | ||
| 130 | - // Return a vector of all terminal fields in a document. Terminal | ||
| 131 | - // fields are fields that have no children that are also fields. | ||
| 132 | - // Terminal fields may still have children that are annotations. | ||
| 133 | - // Intermediate nodes in the fields tree are not included in this | ||
| 134 | - // list, but you can still reach them through the getParent method | ||
| 135 | - // of the field object helper. | 110 | + // Return a vector of all terminal fields in a document. Terminal fields are fields that have no |
| 111 | + // children that are also fields. Terminal fields may still have children that are annotations. | ||
| 112 | + // Intermediate nodes in the fields tree are not included in this list, but you can still reach | ||
| 113 | + // them through the getParent method of the field object helper. | ||
| 136 | QPDF_DLL | 114 | QPDF_DLL |
| 137 | std::vector<QPDFFormFieldObjectHelper> getFormFields(); | 115 | std::vector<QPDFFormFieldObjectHelper> getFormFields(); |
| 138 | 116 | ||
| 139 | - // Return all the form fields that have the given fully-qualified | ||
| 140 | - // name and also have an explicit "/T" attribute. For this | ||
| 141 | - // information to be accurate, any changes to field names must be | ||
| 142 | - // done through setFormFieldName() above. | 117 | + // Return all the form fields that have the given fully-qualified name and also have an explicit |
| 118 | + // "/T" attribute. For this information to be accurate, any changes to field names must be done | ||
| 119 | + // through setFormFieldName() above. | ||
| 143 | QPDF_DLL | 120 | QPDF_DLL |
| 144 | std::set<QPDFObjGen> getFieldsWithQualifiedName(std::string const& name); | 121 | std::set<QPDFObjGen> getFieldsWithQualifiedName(std::string const& name); |
| 145 | 122 | ||
| 146 | - // Return the annotations associated with a terminal field. Note | ||
| 147 | - // that in the case of a field having a single annotation, the | ||
| 148 | - // underlying object will typically be the same as the underlying | ||
| 149 | - // object for the field. | 123 | + // Return the annotations associated with a terminal field. Note that in the case of a field |
| 124 | + // having a single annotation, the underlying object will typically be the same as the | ||
| 125 | + // underlying object for the field. | ||
| 150 | QPDF_DLL | 126 | QPDF_DLL |
| 151 | std::vector<QPDFAnnotationObjectHelper> getAnnotationsForField(QPDFFormFieldObjectHelper); | 127 | std::vector<QPDFAnnotationObjectHelper> getAnnotationsForField(QPDFFormFieldObjectHelper); |
| 152 | 128 | ||
| @@ -158,63 +134,49 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper | @@ -158,63 +134,49 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper | ||
| 158 | QPDF_DLL | 134 | QPDF_DLL |
| 159 | std::vector<QPDFFormFieldObjectHelper> getFormFieldsForPage(QPDFPageObjectHelper); | 135 | std::vector<QPDFFormFieldObjectHelper> getFormFieldsForPage(QPDFPageObjectHelper); |
| 160 | 136 | ||
| 161 | - // Return the terminal field that is associated with this | ||
| 162 | - // annotation. If the annotation dictionary is merged with the | ||
| 163 | - // field dictionary, the underlying object will be the same, but | ||
| 164 | - // this is not always the case. Note that if you call this method | ||
| 165 | - // with an annotation that is not a widget annotation, there will | ||
| 166 | - // not be an associated field, and this method will return a | 137 | + // Return the terminal field that is associated with this annotation. If the annotation |
| 138 | + // dictionary is merged with the field dictionary, the underlying object will be the same, but | ||
| 139 | + // this is not always the case. Note that if you call this method with an annotation that is not | ||
| 140 | + // a widget annotation, there will not be an associated field, and this method will return a | ||
| 167 | // helper associated with a null object (isNull() == true). | 141 | // helper associated with a null object (isNull() == true). |
| 168 | QPDF_DLL | 142 | QPDF_DLL |
| 169 | QPDFFormFieldObjectHelper getFieldForAnnotation(QPDFAnnotationObjectHelper); | 143 | QPDFFormFieldObjectHelper getFieldForAnnotation(QPDFAnnotationObjectHelper); |
| 170 | 144 | ||
| 171 | - // Return the current value of /NeedAppearances. If | ||
| 172 | - // /NeedAppearances is missing, return false as that is how PDF | ||
| 173 | - // viewers are supposed to interpret it. | 145 | + // Return the current value of /NeedAppearances. If /NeedAppearances is missing, return false as |
| 146 | + // that is how PDF viewers are supposed to interpret it. | ||
| 174 | QPDF_DLL | 147 | QPDF_DLL |
| 175 | bool getNeedAppearances(); | 148 | bool getNeedAppearances(); |
| 176 | 149 | ||
| 177 | - // Indicate whether appearance streams must be regenerated. If you | ||
| 178 | - // modify a field value, you should call setNeedAppearances(true) | ||
| 179 | - // unless you also generate an appearance stream for the | ||
| 180 | - // corresponding annotation at the same time. If you generate | ||
| 181 | - // appearance streams for all fields, you can call | ||
| 182 | - // setNeedAppearances(false). If you use | ||
| 183 | - // QPDFFormFieldObjectHelper::setV, it will automatically call | ||
| 184 | - // this method unless you tell it not to. | 150 | + // Indicate whether appearance streams must be regenerated. If you modify a field value, you |
| 151 | + // should call setNeedAppearances(true) unless you also generate an appearance stream for the | ||
| 152 | + // corresponding annotation at the same time. If you generate appearance streams for all fields, | ||
| 153 | + // you can call setNeedAppearances(false). If you use QPDFFormFieldObjectHelper::setV, it will | ||
| 154 | + // automatically call this method unless you tell it not to. | ||
| 185 | QPDF_DLL | 155 | QPDF_DLL |
| 186 | void setNeedAppearances(bool); | 156 | void setNeedAppearances(bool); |
| 187 | 157 | ||
| 188 | - // If /NeedAppearances is false, do nothing. Otherwise generate | ||
| 189 | - // appearance streams for all widget annotations that need them. | ||
| 190 | - // See comments in QPDFFormFieldObjectHelper.hh for | ||
| 191 | - // generateAppearance for limitations. For checkbox and radio | ||
| 192 | - // button fields, this code ensures that appearance state is | ||
| 193 | - // consistent with the field's value and uses any pre-existing | 158 | + // If /NeedAppearances is false, do nothing. Otherwise generate appearance streams for all |
| 159 | + // widget annotations that need them. See comments in QPDFFormFieldObjectHelper.hh for | ||
| 160 | + // generateAppearance for limitations. For checkbox and radio button fields, this code ensures | ||
| 161 | + // that appearance state is consistent with the field's value and uses any pre-existing | ||
| 194 | // appearance streams. | 162 | // appearance streams. |
| 195 | QPDF_DLL | 163 | QPDF_DLL |
| 196 | void generateAppearancesIfNeeded(); | 164 | void generateAppearancesIfNeeded(); |
| 197 | 165 | ||
| 198 | - // Note: this method works on all annotations, not just ones with | ||
| 199 | - // associated fields. For each annotation in old_annots, apply the | ||
| 200 | - // given transformation matrix to create a new annotation. New | ||
| 201 | - // annotations are appended to new_annots. If the annotation is | ||
| 202 | - // associated with a form field, a new form field is created that | ||
| 203 | - // points to the new annotation and is appended to new_fields, and | ||
| 204 | - // the old field is added to old_fields. | 166 | + // Note: this method works on all annotations, not just ones with associated fields. For each |
| 167 | + // annotation in old_annots, apply the given transformation matrix to create a new annotation. | ||
| 168 | + // New annotations are appended to new_annots. If the annotation is associated with a form | ||
| 169 | + // field, a new form field is created that points to the new annotation and is appended to | ||
| 170 | + // new_fields, and the old field is added to old_fields. | ||
| 205 | // | 171 | // |
| 206 | - // old_annots may belong to a different QPDF object. In that case, | ||
| 207 | - // you should pass in from_qpdf, and copyForeignObject will be | ||
| 208 | - // called automatically. If this is the case, for efficiency, you | ||
| 209 | - // may pass in a QPDFAcroFormDocumentHelper for the other file to | ||
| 210 | - // avoid the expensive process of creating one for each call to | ||
| 211 | - // transformAnnotations. New fields and annotations are not added | ||
| 212 | - // to the document or pages. You have to do that yourself after | ||
| 213 | - // calling transformAnnotations. If this operation will leave | ||
| 214 | - // orphaned fields behind, such as if you are replacing the old | ||
| 215 | - // annotations with the new ones on the same page and the fields | ||
| 216 | - // and annotations are not shared, you will also need to remove | ||
| 217 | - // the old fields to prevent them from hanging round unreferenced. | 172 | + // old_annots may belong to a different QPDF object. In that case, you should pass in from_qpdf, |
| 173 | + // and copyForeignObject will be called automatically. If this is the case, for efficiency, you | ||
| 174 | + // may pass in a QPDFAcroFormDocumentHelper for the other file to avoid the expensive process of | ||
| 175 | + // creating one for each call to transformAnnotations. New fields and annotations are not added | ||
| 176 | + // to the document or pages. You have to do that yourself after calling transformAnnotations. If | ||
| 177 | + // this operation will leave orphaned fields behind, such as if you are replacing the old | ||
| 178 | + // annotations with the new ones on the same page and the fields and annotations are not shared, | ||
| 179 | + // you will also need to remove the old fields to prevent them from hanging round unreferenced. | ||
| 218 | QPDF_DLL | 180 | QPDF_DLL |
| 219 | void transformAnnotations( | 181 | void transformAnnotations( |
| 220 | QPDFObjectHandle old_annots, | 182 | QPDFObjectHandle old_annots, |
| @@ -225,18 +187,14 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper | @@ -225,18 +187,14 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper | ||
| 225 | QPDF* from_qpdf = nullptr, | 187 | QPDF* from_qpdf = nullptr, |
| 226 | QPDFAcroFormDocumentHelper* from_afdh = nullptr); | 188 | QPDFAcroFormDocumentHelper* from_afdh = nullptr); |
| 227 | 189 | ||
| 228 | - // Copy form fields and annotations from one page to another, | ||
| 229 | - // allowing the from page to be in a different QPDF or in the same | ||
| 230 | - // QPDF. This would typically be called after calling addPage to | ||
| 231 | - // add field/annotation awareness. When just copying the page by | ||
| 232 | - // itself, annotations end up being shared, and fields end up | ||
| 233 | - // being omitted because there is no reference to the field from | ||
| 234 | - // the page. This method ensures that each separate copy of a page | ||
| 235 | - // has private annotations and that fields and annotations are | ||
| 236 | - // properly updated to resolve conflicts that may occur from | ||
| 237 | - // common resource and field names across documents. It is | ||
| 238 | - // basically a wrapper around transformAnnotations that handles | ||
| 239 | - // updating the receiving page. If new_fields is non-null, any | 190 | + // Copy form fields and annotations from one page to another, allowing the from page to be in a |
| 191 | + // different QPDF or in the same QPDF. This would typically be called after calling addPage to | ||
| 192 | + // add field/annotation awareness. When just copying the page by itself, annotations end up | ||
| 193 | + // being shared, and fields end up being omitted because there is no reference to the field from | ||
| 194 | + // the page. This method ensures that each separate copy of a page has private annotations and | ||
| 195 | + // that fields and annotations are properly updated to resolve conflicts that may occur from | ||
| 196 | + // common resource and field names across documents. It is basically a wrapper around | ||
| 197 | + // transformAnnotations that handles updating the receiving page. If new_fields is non-null, any | ||
| 240 | // newly created fields are added to it. | 198 | // newly created fields are added to it. |
| 241 | QPDF_DLL | 199 | QPDF_DLL |
| 242 | void fixCopiedAnnotations( | 200 | void fixCopiedAnnotations( |
include/qpdf/QPDFExc.hh
| @@ -2,22 +2,19 @@ | @@ -2,22 +2,19 @@ | ||
| 2 | // | 2 | // |
| 3 | // This file is part of qpdf. | 3 | // This file is part of qpdf. |
| 4 | // | 4 | // |
| 5 | -// Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 6 | -// you may not use this file except in compliance with the License. | ||
| 7 | -// You may obtain a copy of the License at | 5 | +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
| 6 | +// in compliance with the License. You may obtain a copy of the License at | ||
| 8 | // | 7 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 | 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // | 9 | // |
| 11 | -// Unless required by applicable law or agreed to in writing, software | ||
| 12 | -// distributed under the License is distributed on an "AS IS" BASIS, | ||
| 13 | -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 14 | -// See the License for the specific language governing permissions and | ||
| 15 | -// limitations under the License. | 10 | +// Unless required by applicable law or agreed to in writing, software distributed under the License |
| 11 | +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
| 12 | +// or implied. See the License for the specific language governing permissions and limitations under | ||
| 13 | +// the License. | ||
| 16 | // | 14 | // |
| 17 | -// Versions of qpdf prior to version 7 were released under the terms | ||
| 18 | -// of version 2.0 of the Artistic License. At your option, you may | ||
| 19 | -// continue to consider qpdf to be licensed under those terms. Please | ||
| 20 | -// see the manual for additional information. | 15 | +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic |
| 16 | +// License. At your option, you may continue to consider qpdf to be licensed under those terms. | ||
| 17 | +// Please see the manual for additional information. | ||
| 21 | 18 | ||
| 22 | #ifndef QPDFEXC_HH | 19 | #ifndef QPDFEXC_HH |
| 23 | #define QPDFEXC_HH | 20 | #define QPDFEXC_HH |
| @@ -42,14 +39,12 @@ class QPDF_DLL_CLASS QPDFExc: public std::runtime_error | @@ -42,14 +39,12 @@ class QPDF_DLL_CLASS QPDFExc: public std::runtime_error | ||
| 42 | QPDF_DLL | 39 | QPDF_DLL |
| 43 | virtual ~QPDFExc() noexcept = default; | 40 | virtual ~QPDFExc() noexcept = default; |
| 44 | 41 | ||
| 45 | - // To get a complete error string, call what(), provided by | ||
| 46 | - // std::exception. The accessors below return the original values | ||
| 47 | - // used to create the exception. Only the error code and message | ||
| 48 | - // are guaranteed to have non-zero/empty values. | 42 | + // To get a complete error string, call what(), provided by std::exception. The accessors below |
| 43 | + // return the original values used to create the exception. Only the error code and message are | ||
| 44 | + // guaranteed to have non-zero/empty values. | ||
| 49 | 45 | ||
| 50 | - // There is no lookup code that maps numeric error codes into | ||
| 51 | - // strings. The numeric error code is just another way to get at | ||
| 52 | - // the underlying issue, but it is more programmer-friendly than | 46 | + // There is no lookup code that maps numeric error codes into strings. The numeric error code |
| 47 | + // is just another way to get at the underlying issue, but it is more programmer-friendly than | ||
| 53 | // trying to parse a string that is subject to change. | 48 | // trying to parse a string that is subject to change. |
| 54 | 49 | ||
| 55 | QPDF_DLL | 50 | QPDF_DLL |
| @@ -71,8 +66,8 @@ class QPDF_DLL_CLASS QPDFExc: public std::runtime_error | @@ -71,8 +66,8 @@ class QPDF_DLL_CLASS QPDFExc: public std::runtime_error | ||
| 71 | qpdf_offset_t offset, | 66 | qpdf_offset_t offset, |
| 72 | std::string const& message); | 67 | std::string const& message); |
| 73 | 68 | ||
| 74 | - // This class does not use the Members pattern to avoid needless | ||
| 75 | - // memory allocations during exception handling. | 69 | + // This class does not use the Members pattern to avoid needless memory allocations during |
| 70 | + // exception handling. | ||
| 76 | 71 | ||
| 77 | qpdf_error_code_e error_code; | 72 | qpdf_error_code_e error_code; |
| 78 | std::string filename; | 73 | std::string filename; |
include/qpdf/QPDFFormFieldObjectHelper.hh
| @@ -2,29 +2,25 @@ | @@ -2,29 +2,25 @@ | ||
| 2 | // | 2 | // |
| 3 | // This file is part of qpdf. | 3 | // This file is part of qpdf. |
| 4 | // | 4 | // |
| 5 | -// Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 6 | -// you may not use this file except in compliance with the License. | ||
| 7 | -// You may obtain a copy of the License at | 5 | +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
| 6 | +// in compliance with the License. You may obtain a copy of the License at | ||
| 8 | // | 7 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 | 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // | 9 | // |
| 11 | -// Unless required by applicable law or agreed to in writing, software | ||
| 12 | -// distributed under the License is distributed on an "AS IS" BASIS, | ||
| 13 | -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 14 | -// See the License for the specific language governing permissions and | ||
| 15 | -// limitations under the License. | 10 | +// Unless required by applicable law or agreed to in writing, software distributed under the License |
| 11 | +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
| 12 | +// or implied. See the License for the specific language governing permissions and limitations under | ||
| 13 | +// the License. | ||
| 16 | // | 14 | // |
| 17 | -// Versions of qpdf prior to version 7 were released under the terms | ||
| 18 | -// of version 2.0 of the Artistic License. At your option, you may | ||
| 19 | -// continue to consider qpdf to be licensed under those terms. Please | ||
| 20 | -// see the manual for additional information. | 15 | +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic |
| 16 | +// License. At your option, you may continue to consider qpdf to be licensed under those terms. | ||
| 17 | +// Please see the manual for additional information. | ||
| 21 | 18 | ||
| 22 | #ifndef QPDFFORMFIELDOBJECTHELPER_HH | 19 | #ifndef QPDFFORMFIELDOBJECTHELPER_HH |
| 23 | #define QPDFFORMFIELDOBJECTHELPER_HH | 20 | #define QPDFFORMFIELDOBJECTHELPER_HH |
| 24 | 21 | ||
| 25 | -// This object helper helps with form fields for interactive forms. | ||
| 26 | -// Please see comments in QPDFAcroFormDocumentHelper.hh for additional | ||
| 27 | -// details. | 22 | +// This object helper helps with form fields for interactive forms. Please see comments in |
| 23 | +// QPDFAcroFormDocumentHelper.hh for additional details. | ||
| 28 | 24 | ||
| 29 | #include <qpdf/QPDFObjectHelper.hh> | 25 | #include <qpdf/QPDFObjectHelper.hh> |
| 30 | 26 | ||
| @@ -46,37 +42,32 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper | @@ -46,37 +42,32 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper | ||
| 46 | QPDF_DLL | 42 | QPDF_DLL |
| 47 | bool isNull(); | 43 | bool isNull(); |
| 48 | 44 | ||
| 49 | - // Return the field's parent. A form field object helper whose | ||
| 50 | - // underlying object is null is returned if there is no parent. | ||
| 51 | - // This condition may be tested by calling isNull(). | 45 | + // Return the field's parent. A form field object helper whose underlying object is null is |
| 46 | + // returned if there is no parent. This condition may be tested by calling isNull(). | ||
| 52 | QPDF_DLL | 47 | QPDF_DLL |
| 53 | QPDFFormFieldObjectHelper getParent(); | 48 | QPDFFormFieldObjectHelper getParent(); |
| 54 | 49 | ||
| 55 | - // Return the top-level field for this field. Typically this will | ||
| 56 | - // be the field itself or its parent. If is_different is provided, | ||
| 57 | - // it is set to true if the top-level field is different from the | ||
| 58 | - // field itself; otherwise it is set to false. | 50 | + // Return the top-level field for this field. Typically this will be the field itself or its |
| 51 | + // parent. If is_different is provided, it is set to true if the top-level field is different | ||
| 52 | + // from the field itself; otherwise it is set to false. | ||
| 59 | QPDF_DLL | 53 | QPDF_DLL |
| 60 | QPDFFormFieldObjectHelper getTopLevelField(bool* is_different = nullptr); | 54 | QPDFFormFieldObjectHelper getTopLevelField(bool* is_different = nullptr); |
| 61 | 55 | ||
| 62 | - // Get a field value, possibly inheriting the value from an | ||
| 63 | - // ancestor node. | 56 | + // Get a field value, possibly inheriting the value from an ancestor node. |
| 64 | QPDF_DLL | 57 | QPDF_DLL |
| 65 | QPDFObjectHandle getInheritableFieldValue(std::string const& name); | 58 | QPDFObjectHandle getInheritableFieldValue(std::string const& name); |
| 66 | 59 | ||
| 67 | - // Get an inherited field value as a string. If it is not a | ||
| 68 | - // string, silently return the empty string. | 60 | + // Get an inherited field value as a string. If it is not a string, silently return the empty |
| 61 | + // string. | ||
| 69 | QPDF_DLL | 62 | QPDF_DLL |
| 70 | std::string getInheritableFieldValueAsString(std::string const& name); | 63 | std::string getInheritableFieldValueAsString(std::string const& name); |
| 71 | 64 | ||
| 72 | - // Get an inherited field value of type name as a string | ||
| 73 | - // representing the name. If it is not a name, silently return | ||
| 74 | - // the empty string. | 65 | + // Get an inherited field value of type name as a string representing the name. If it is not a |
| 66 | + // name, silently return the empty string. | ||
| 75 | QPDF_DLL | 67 | QPDF_DLL |
| 76 | std::string getInheritableFieldValueAsName(std::string const& name); | 68 | std::string getInheritableFieldValueAsName(std::string const& name); |
| 77 | 69 | ||
| 78 | - // Returns the value of /FT if present, otherwise returns the | ||
| 79 | - // empty string. | 70 | + // Returns the value of /FT if present, otherwise returns the empty string. |
| 80 | QPDF_DLL | 71 | QPDF_DLL |
| 81 | std::string getFieldType(); | 72 | std::string getFieldType(); |
| 82 | 73 | ||
| @@ -86,60 +77,53 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper | @@ -86,60 +77,53 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper | ||
| 86 | QPDF_DLL | 77 | QPDF_DLL |
| 87 | std::string getPartialName(); | 78 | std::string getPartialName(); |
| 88 | 79 | ||
| 89 | - // Return the alternative field name (/TU), which is the field | ||
| 90 | - // name intended to be presented to users. If not present, fall | ||
| 91 | - // back to the fully qualified name. | 80 | + // Return the alternative field name (/TU), which is the field name intended to be presented to |
| 81 | + // users. If not present, fall back to the fully qualified name. | ||
| 92 | QPDF_DLL | 82 | QPDF_DLL |
| 93 | std::string getAlternativeName(); | 83 | std::string getAlternativeName(); |
| 94 | 84 | ||
| 95 | - // Return the mapping field name (/TM). If not present, fall back | ||
| 96 | - // to the alternative name, then to the partial name. | 85 | + // Return the mapping field name (/TM). If not present, fall back to the alternative name, then |
| 86 | + // to the partial name. | ||
| 97 | QPDF_DLL | 87 | QPDF_DLL |
| 98 | std::string getMappingName(); | 88 | std::string getMappingName(); |
| 99 | 89 | ||
| 100 | QPDF_DLL | 90 | QPDF_DLL |
| 101 | QPDFObjectHandle getValue(); | 91 | QPDFObjectHandle getValue(); |
| 102 | 92 | ||
| 103 | - // Return the field's value as a string. If this is called with a | ||
| 104 | - // field whose value is not a string, the empty string will be | ||
| 105 | - // silently returned. | 93 | + // Return the field's value as a string. If this is called with a field whose value is not a |
| 94 | + // string, the empty string will be silently returned. | ||
| 106 | QPDF_DLL | 95 | QPDF_DLL |
| 107 | std::string getValueAsString(); | 96 | std::string getValueAsString(); |
| 108 | 97 | ||
| 109 | QPDF_DLL | 98 | QPDF_DLL |
| 110 | QPDFObjectHandle getDefaultValue(); | 99 | QPDFObjectHandle getDefaultValue(); |
| 111 | 100 | ||
| 112 | - // Return the field's default value as a string. If this is called | ||
| 113 | - // with a field whose value is not a string, the empty string will | ||
| 114 | - // be silently returned. | 101 | + // Return the field's default value as a string. If this is called with a field whose value is |
| 102 | + // not a string, the empty string will be silently returned. | ||
| 115 | QPDF_DLL | 103 | QPDF_DLL |
| 116 | std::string getDefaultValueAsString(); | 104 | std::string getDefaultValueAsString(); |
| 117 | 105 | ||
| 118 | - // Return the default appearance string, taking inheritance from | ||
| 119 | - // the field tree into account. Returns the empty string if the | ||
| 120 | - // default appearance string is not available (because it's | ||
| 121 | - // erroneously absent or because this is not a variable text | ||
| 122 | - // field). If not found in the field hierarchy, look in /AcroForm. | 106 | + // Return the default appearance string, taking inheritance from the field tree into account. |
| 107 | + // Returns the empty string if the default appearance string is not available (because it's | ||
| 108 | + // erroneously absent or because this is not a variable text field). If not found in the field | ||
| 109 | + // hierarchy, look in /AcroForm. | ||
| 123 | QPDF_DLL | 110 | QPDF_DLL |
| 124 | std::string getDefaultAppearance(); | 111 | std::string getDefaultAppearance(); |
| 125 | 112 | ||
| 126 | - // Return the default resource dictionary for the field. This | ||
| 127 | - // comes not from the field but from the document-level /AcroForm | ||
| 128 | - // dictionary. While several PDF generates put a /DR key in the | ||
| 129 | - // form field's dictionary, experimentation suggests that many | ||
| 130 | - // popular readers, including Adobe Acrobat and Acrobat Reader, | ||
| 131 | - // ignore any /DR item on the field. | 113 | + // Return the default resource dictionary for the field. This comes not from the field but from |
| 114 | + // the document-level /AcroForm dictionary. While several PDF generates put a /DR key in the | ||
| 115 | + // form field's dictionary, experimentation suggests that many popular readers, including Adobe | ||
| 116 | + // Acrobat and Acrobat Reader, ignore any /DR item on the field. | ||
| 132 | QPDF_DLL | 117 | QPDF_DLL |
| 133 | QPDFObjectHandle getDefaultResources(); | 118 | QPDFObjectHandle getDefaultResources(); |
| 134 | 119 | ||
| 135 | - // Return the quadding value, taking inheritance from the field | ||
| 136 | - // tree into account. Returns 0 if quadding is not specified. Look | ||
| 137 | - // in /AcroForm if not found in the field hierarchy. | 120 | + // Return the quadding value, taking inheritance from the field tree into account. Returns 0 if |
| 121 | + // quadding is not specified. Look in /AcroForm if not found in the field hierarchy. | ||
| 138 | QPDF_DLL | 122 | QPDF_DLL |
| 139 | int getQuadding(); | 123 | int getQuadding(); |
| 140 | 124 | ||
| 141 | - // Return field flags from /Ff. The value is a logical or of | ||
| 142 | - // pdf_form_field_flag_e as defined in qpdf/Constants.h | 125 | + // Return field flags from /Ff. The value is a logical or of pdf_form_field_flag_e as defined in |
| 126 | + // qpdf/Constants.h | ||
| 143 | QPDF_DLL | 127 | QPDF_DLL |
| 144 | int getFlags(); | 128 | int getFlags(); |
| 145 | 129 | ||
| @@ -148,19 +132,16 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper | @@ -148,19 +132,16 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper | ||
| 148 | // Returns true if field is of type /Tx | 132 | // Returns true if field is of type /Tx |
| 149 | QPDF_DLL | 133 | QPDF_DLL |
| 150 | bool isText(); | 134 | bool isText(); |
| 151 | - // Returns true if field is of type /Btn and flags do not indicate | ||
| 152 | - // some other type of button. | 135 | + // Returns true if field is of type /Btn and flags do not indicate some other type of button. |
| 153 | QPDF_DLL | 136 | QPDF_DLL |
| 154 | bool isCheckbox(); | 137 | bool isCheckbox(); |
| 155 | // Returns true if field is a checkbox and is checked. | 138 | // Returns true if field is a checkbox and is checked. |
| 156 | QPDF_DLL | 139 | QPDF_DLL |
| 157 | bool isChecked(); | 140 | bool isChecked(); |
| 158 | - // Returns true if field is of type /Btn and flags indicate that | ||
| 159 | - // it is a radio button | 141 | + // Returns true if field is of type /Btn and flags indicate that it is a radio button |
| 160 | QPDF_DLL | 142 | QPDF_DLL |
| 161 | bool isRadioButton(); | 143 | bool isRadioButton(); |
| 162 | - // Returns true if field is of type /Btn and flags indicate that | ||
| 163 | - // it is a pushbutton | 144 | + // Returns true if field is of type /Btn and flags indicate that it is a pushbutton |
| 164 | QPDF_DLL | 145 | QPDF_DLL |
| 165 | bool isPushbutton(); | 146 | bool isPushbutton(); |
| 166 | // Returns true if fields if of type /Ch | 147 | // Returns true if fields if of type /Ch |
| @@ -170,45 +151,36 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper | @@ -170,45 +151,36 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper | ||
| 170 | QPDF_DLL | 151 | QPDF_DLL |
| 171 | std::vector<std::string> getChoices(); | 152 | std::vector<std::string> getChoices(); |
| 172 | 153 | ||
| 173 | - // Set an attribute to the given value. If you have a | ||
| 174 | - // QPDFAcroFormDocumentHelper and you want to set the name of a | ||
| 175 | - // field, use QPDFAcroFormDocumentHelper::setFormFieldName | ||
| 176 | - // instead. | 154 | + // Set an attribute to the given value. If you have a QPDFAcroFormDocumentHelper and you want to |
| 155 | + // set the name of a field, use QPDFAcroFormDocumentHelper::setFormFieldName instead. | ||
| 177 | QPDF_DLL | 156 | QPDF_DLL |
| 178 | void setFieldAttribute(std::string const& key, QPDFObjectHandle value); | 157 | void setFieldAttribute(std::string const& key, QPDFObjectHandle value); |
| 179 | 158 | ||
| 180 | - // Set an attribute to the given value as a Unicode string (UTF-16 | ||
| 181 | - // BE encoded). The input string should be UTF-8 encoded. If you | ||
| 182 | - // have a QPDFAcroFormDocumentHelper and you want to set the name | ||
| 183 | - // of a field, use QPDFAcroFormDocumentHelper::setFormFieldName | ||
| 184 | - // instead. | 159 | + // Set an attribute to the given value as a Unicode string (UTF-16 BE encoded). The input string |
| 160 | + // should be UTF-8 encoded. If you have a QPDFAcroFormDocumentHelper and you want to set the | ||
| 161 | + // name of a field, use QPDFAcroFormDocumentHelper::setFormFieldName instead. | ||
| 185 | QPDF_DLL | 162 | QPDF_DLL |
| 186 | void setFieldAttribute(std::string const& key, std::string const& utf8_value); | 163 | void setFieldAttribute(std::string const& key, std::string const& utf8_value); |
| 187 | 164 | ||
| 188 | - // Set /V (field value) to the given value. If need_appearances is | ||
| 189 | - // true and the field type is either /Tx (text) or /Ch (choice), | ||
| 190 | - // set /NeedAppearances to true. You can explicitly tell this | ||
| 191 | - // method not to set /NeedAppearances if you are going to generate | ||
| 192 | - // an appearance stream yourself. Starting with qpdf 8.3.0, this | ||
| 193 | - // method handles fields of type /Btn (checkboxes, radio buttons, | ||
| 194 | - // pushbuttons) specially. | 165 | + // Set /V (field value) to the given value. If need_appearances is true and the field type is |
| 166 | + // either /Tx (text) or /Ch (choice), set /NeedAppearances to true. You can explicitly tell this | ||
| 167 | + // method not to set /NeedAppearances if you are going to generate an appearance stream | ||
| 168 | + // yourself. Starting with qpdf 8.3.0, this method handles fields of type /Btn (checkboxes, | ||
| 169 | + // radio buttons, pushbuttons) specially. | ||
| 195 | QPDF_DLL | 170 | QPDF_DLL |
| 196 | void setV(QPDFObjectHandle value, bool need_appearances = true); | 171 | void setV(QPDFObjectHandle value, bool need_appearances = true); |
| 197 | 172 | ||
| 198 | - // Set /V (field value) to the given string value encoded as a | ||
| 199 | - // Unicode string. The input value should be UTF-8 encoded. See | ||
| 200 | - // comments above about /NeedAppearances. | 173 | + // Set /V (field value) to the given string value encoded as a Unicode string. The input value |
| 174 | + // should be UTF-8 encoded. See comments above about /NeedAppearances. | ||
| 201 | QPDF_DLL | 175 | QPDF_DLL |
| 202 | void setV(std::string const& utf8_value, bool need_appearances = true); | 176 | void setV(std::string const& utf8_value, bool need_appearances = true); |
| 203 | 177 | ||
| 204 | - // Update the appearance stream for this field. Note that qpdf's | ||
| 205 | - // ability to generate appearance streams is limited. We only | ||
| 206 | - // generate appearance streams for streams of type text or choice. | ||
| 207 | - // The appearance uses the default parameters provided in the | ||
| 208 | - // file, and it only supports ASCII characters. Quadding is | ||
| 209 | - // currently ignored. While this functionality is limited, it | ||
| 210 | - // should do a decent job on properly constructed PDF files when | ||
| 211 | - // field values are restricted to ASCII characters. | 178 | + // Update the appearance stream for this field. Note that qpdf's ability to generate appearance |
| 179 | + // streams is limited. We only generate appearance streams for streams of type text or choice. | ||
| 180 | + // The appearance uses the default parameters provided in the file, and it only supports ASCII | ||
| 181 | + // characters. Quadding is currently ignored. While this functionality is limited, it should do | ||
| 182 | + // a decent job on properly constructed PDF files when field values are restricted to ASCII | ||
| 183 | + // characters. | ||
| 212 | QPDF_DLL | 184 | QPDF_DLL |
| 213 | void generateAppearance(QPDFAnnotationObjectHelper&); | 185 | void generateAppearance(QPDFAnnotationObjectHelper&); |
| 214 | 186 |
include/qpdf/QPDFJob.hh
| @@ -2,22 +2,19 @@ | @@ -2,22 +2,19 @@ | ||
| 2 | // | 2 | // |
| 3 | // This file is part of qpdf. | 3 | // This file is part of qpdf. |
| 4 | // | 4 | // |
| 5 | -// Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 6 | -// you may not use this file except in compliance with the License. | ||
| 7 | -// You may obtain a copy of the License at | 5 | +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
| 6 | +// in compliance with the License. You may obtain a copy of the License at | ||
| 8 | // | 7 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 | 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // | 9 | // |
| 11 | -// Unless required by applicable law or agreed to in writing, software | ||
| 12 | -// distributed under the License is distributed on an "AS IS" BASIS, | ||
| 13 | -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 14 | -// See the License for the specific language governing permissions and | ||
| 15 | -// limitations under the License. | 10 | +// Unless required by applicable law or agreed to in writing, software distributed under the License |
| 11 | +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
| 12 | +// or implied. See the License for the specific language governing permissions and limitations under | ||
| 13 | +// the License. | ||
| 16 | // | 14 | // |
| 17 | -// Versions of qpdf prior to version 7 were released under the terms | ||
| 18 | -// of version 2.0 of the Artistic License. At your option, you may | ||
| 19 | -// continue to consider qpdf to be licensed under those terms. Please | ||
| 20 | -// see the manual for additional information. | 15 | +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic |
| 16 | +// License. At your option, you may continue to consider qpdf to be licensed under those terms. | ||
| 17 | +// Please see the manual for additional information. | ||
| 21 | 18 | ||
| 22 | #ifndef QPDFJOB_HH | 19 | #ifndef QPDFJOB_HH |
| 23 | #define QPDFJOB_HH | 20 | #define QPDFJOB_HH |
| @@ -55,99 +52,80 @@ class QPDFJob | @@ -55,99 +52,80 @@ class QPDFJob | ||
| 55 | static int constexpr EXIT_IS_NOT_ENCRYPTED = qpdf_exit_is_not_encrypted; | 52 | static int constexpr EXIT_IS_NOT_ENCRYPTED = qpdf_exit_is_not_encrypted; |
| 56 | static int constexpr EXIT_CORRECT_PASSWORD = qpdf_exit_correct_password; | 53 | static int constexpr EXIT_CORRECT_PASSWORD = qpdf_exit_correct_password; |
| 57 | 54 | ||
| 58 | - // QPDFUsage is thrown if there are any usage-like errors when | ||
| 59 | - // calling Config methods. | 55 | + // QPDFUsage is thrown if there are any usage-like errors when calling Config methods. |
| 60 | QPDF_DLL | 56 | QPDF_DLL |
| 61 | QPDFJob(); | 57 | QPDFJob(); |
| 62 | 58 | ||
| 63 | // SETUP FUNCTIONS | 59 | // SETUP FUNCTIONS |
| 64 | 60 | ||
| 65 | - // Initialize a QPDFJob object from argv, which must be a | ||
| 66 | - // null-terminated array of null-terminated UTF-8-encoded C | ||
| 67 | - // strings. The progname_env argument is the name of an | ||
| 68 | - // environment variable which, if set, overrides the name of the | ||
| 69 | - // executable for purposes of generating the --completion options. | ||
| 70 | - // See QPDFArgParser for details. If a null pointer is passed in, | ||
| 71 | - // the default value of "QPDF_EXECUTABLE" is used. This is used by | ||
| 72 | - // the QPDF cli, which just initializes a QPDFJob from argv, calls | ||
| 73 | - // run(), and handles errors and exit status issues. You can | ||
| 74 | - // perform much of the cli functionality programmatically in this | ||
| 75 | - // way rather than using the regular API. This is exposed in the C | ||
| 76 | - // API, which makes it easier to get certain high-level qpdf | ||
| 77 | - // functionality from other languages. If there are any | ||
| 78 | - // command-line errors, this method will throw QPDFUsage which is | ||
| 79 | - // derived from std::runtime_error. Other exceptions may be thrown | ||
| 80 | - // in some cases. Note that argc, and argv should be UTF-8 | ||
| 81 | - // encoded. If you are calling this from a Windows Unicode-aware | ||
| 82 | - // main (wmain), see QUtil::call_main_from_wmain for information | ||
| 83 | - // about converting arguments to UTF-8. This method will mutate | ||
| 84 | - // arguments that are passed to it. | 61 | + // Initialize a QPDFJob object from argv, which must be a null-terminated array of |
| 62 | + // null-terminated UTF-8-encoded C strings. The progname_env argument is the name of an | ||
| 63 | + // environment variable which, if set, overrides the name of the executable for purposes of | ||
| 64 | + // generating the --completion options. See QPDFArgParser for details. If a null pointer is | ||
| 65 | + // passed in, the default value of "QPDF_EXECUTABLE" is used. This is used by the QPDF cli, | ||
| 66 | + // which just initializes a QPDFJob from argv, calls run(), and handles errors and exit status | ||
| 67 | + // issues. You can perform much of the cli functionality programmatically in this way rather | ||
| 68 | + // than using the regular API. This is exposed in the C API, which makes it easier to get | ||
| 69 | + // certain high-level qpdf functionality from other languages. If there are any command-line | ||
| 70 | + // errors, this method will throw QPDFUsage which is derived from std::runtime_error. Other | ||
| 71 | + // exceptions may be thrown in some cases. Note that argc, and argv should be UTF-8 encoded. If | ||
| 72 | + // you are calling this from a Windows Unicode-aware main (wmain), see | ||
| 73 | + // QUtil::call_main_from_wmain for information about converting arguments to UTF-8. This method | ||
| 74 | + // will mutate arguments that are passed to it. | ||
| 85 | QPDF_DLL | 75 | QPDF_DLL |
| 86 | void initializeFromArgv(char const* const argv[], char const* progname_env = nullptr); | 76 | void initializeFromArgv(char const* const argv[], char const* progname_env = nullptr); |
| 87 | 77 | ||
| 88 | - // Initialize a QPDFJob from json. Passing partial = true prevents | ||
| 89 | - // this method from doing the final checks (calling | ||
| 90 | - // checkConfiguration) after processing the json file. This makes | ||
| 91 | - // it possible to initialize QPDFJob in stages using multiple json | ||
| 92 | - // files or to have a json file that can be processed from the CLI | ||
| 93 | - // with --job-json-file and be combined with other arguments. For | ||
| 94 | - // example, you might include only encryption parameters, leaving | ||
| 95 | - // it up to the rest of the command-line arguments to provide | ||
| 96 | - // input and output files. initializeFromJson is called with | ||
| 97 | - // partial = true when invoked from the command line. To make sure | ||
| 98 | - // that the json file is fully valid on its own, just don't | ||
| 99 | - // specify any other command-line flags. If there are any | ||
| 100 | - // configuration errors, QPDFUsage is thrown. Some error messages | ||
| 101 | - // may be CLI-centric. If an an exception tells you to use the | ||
| 102 | - // "--some-option" option, set the "someOption" key in the JSON | 78 | + // Initialize a QPDFJob from json. Passing partial = true prevents this method from doing the |
| 79 | + // final checks (calling checkConfiguration) after processing the json file. This makes it | ||
| 80 | + // possible to initialize QPDFJob in stages using multiple json files or to have a json file | ||
| 81 | + // that can be processed from the CLI with --job-json-file and be combined with other arguments. | ||
| 82 | + // For example, you might include only encryption parameters, leaving it up to the rest of the | ||
| 83 | + // command-line arguments to provide input and output files. initializeFromJson is called with | ||
| 84 | + // partial = true when invoked from the command line. To make sure that the json file is fully | ||
| 85 | + // valid on its own, just don't specify any other command-line flags. If there are any | ||
| 86 | + // configuration errors, QPDFUsage is thrown. Some error messages may be CLI-centric. If an | ||
| 87 | + // exception tells you to use the "--some-option" option, set the "someOption" key in the JSON | ||
| 103 | // object instead. | 88 | // object instead. |
| 104 | QPDF_DLL | 89 | QPDF_DLL |
| 105 | void initializeFromJson(std::string const& json, bool partial = false); | 90 | void initializeFromJson(std::string const& json, bool partial = false); |
| 106 | 91 | ||
| 107 | - // Set name that is used to prefix verbose messages, progress | ||
| 108 | - // messages, and other things that the library writes to output | ||
| 109 | - // and error streams on the caller's behalf. Defaults to "qpdf". | 92 | + // Set name that is used to prefix verbose messages, progress messages, and other things that |
| 93 | + // the library writes to output and error streams on the caller's behalf. Defaults to "qpdf". | ||
| 110 | QPDF_DLL | 94 | QPDF_DLL |
| 111 | void setMessagePrefix(std::string const&); | 95 | void setMessagePrefix(std::string const&); |
| 112 | QPDF_DLL | 96 | QPDF_DLL |
| 113 | std::string getMessagePrefix() const; | 97 | std::string getMessagePrefix() const; |
| 114 | 98 | ||
| 115 | - // To capture or redirect output, configure the logger returned by | ||
| 116 | - // getLogger(). By default, all QPDF and QPDFJob objects share the | ||
| 117 | - // global logger. If you need a private logger for some reason, | ||
| 118 | - // pass a new one to setLogger(). See comments in QPDFLogger.hh | ||
| 119 | - // for details on configuring the logger. | 99 | + // To capture or redirect output, configure the logger returned by getLogger(). By default, all |
| 100 | + // QPDF and QPDFJob objects share the global logger. If you need a private logger for some | ||
| 101 | + // reason, pass a new one to setLogger(). See comments in QPDFLogger.hh for details on | ||
| 102 | + // configuring the logger. | ||
| 120 | // | 103 | // |
| 121 | - // If you set a custom logger here, the logger will be passed to | ||
| 122 | - // all subsequent QPDF objects created by this QPDFJob object. | 104 | + // If you set a custom logger here, the logger will be passed to all subsequent QPDF objects |
| 105 | + // created by this QPDFJob object. | ||
| 123 | QPDF_DLL | 106 | QPDF_DLL |
| 124 | std::shared_ptr<QPDFLogger> getLogger(); | 107 | std::shared_ptr<QPDFLogger> getLogger(); |
| 125 | QPDF_DLL | 108 | QPDF_DLL |
| 126 | void setLogger(std::shared_ptr<QPDFLogger>); | 109 | void setLogger(std::shared_ptr<QPDFLogger>); |
| 127 | 110 | ||
| 128 | - // This deprecated method is the old way to capture output, but it | ||
| 129 | - // didn't capture all output. See comments above for getLogger and | ||
| 130 | - // setLogger. This will be removed in QPDF 12. For now, it | ||
| 131 | - // configures a private logger, separating this object from the | ||
| 132 | - // default logger, and calls setOutputStreams on that logger. See | ||
| 133 | - // QPDFLogger.hh for additional details. | 111 | + // This deprecated method is the old way to capture output, but it didn't capture all output. |
| 112 | + // See comments above for getLogger and setLogger. This will be removed in QPDF 12. For now, it | ||
| 113 | + // configures a private logger, separating this object from the default logger, and calls | ||
| 114 | + // setOutputStreams on that logger. See QPDFLogger.hh for additional details. | ||
| 134 | [[deprecated("configure logger from getLogger() or call setLogger()")]] QPDF_DLL void | 115 | [[deprecated("configure logger from getLogger() or call setLogger()")]] QPDF_DLL void |
| 135 | setOutputStreams(std::ostream* out_stream, std::ostream* err_stream); | 116 | setOutputStreams(std::ostream* out_stream, std::ostream* err_stream); |
| 136 | 117 | ||
| 137 | - // You can register a custom progress reporter to be called by | ||
| 138 | - // QPDFWriter (see QPDFWriter::registerProgressReporter). This is | ||
| 139 | - // only called if you also request progress reporting through | ||
| 140 | - // normal configuration methods (e.g., pass --progress, call | 118 | + // You can register a custom progress reporter to be called by QPDFWriter (see |
| 119 | + // QPDFWriter::registerProgressReporter). This is only called if you also request progress | ||
| 120 | + // reporting through normal configuration methods (e.g., pass --progress, call | ||
| 141 | // config()->progress, etc.) | 121 | // config()->progress, etc.) |
| 142 | QPDF_DLL | 122 | QPDF_DLL |
| 143 | void registerProgressReporter(std::function<void(int)>); | 123 | void registerProgressReporter(std::function<void(int)>); |
| 144 | 124 | ||
| 145 | - // Check to make sure no contradictory options have been | ||
| 146 | - // specified. This is called automatically after initializing from | ||
| 147 | - // argv or json and is also called by run, but you can call it | ||
| 148 | - // manually as well. It throws a QPDFUsage exception if there are | ||
| 149 | - // any errors. This Config object (see CONFIGURATION) also has a | ||
| 150 | - // checkConfiguration method which calls this one. | 125 | + // Check to make sure no contradictory options have been specified. This is called automatically |
| 126 | + // after initializing from argv or json and is also called by run, but you can call it manually | ||
| 127 | + // as well. It throws a QPDFUsage exception if there are any errors. This Config object (see | ||
| 128 | + // CONFIGURATION) also has a checkConfiguration method which calls this one. | ||
| 151 | QPDF_DLL | 129 | QPDF_DLL |
| 152 | void checkConfiguration(); | 130 | void checkConfiguration(); |
| 153 | 131 | ||
| @@ -157,8 +135,7 @@ class QPDFJob | @@ -157,8 +135,7 @@ class QPDFJob | ||
| 157 | 135 | ||
| 158 | // SEE BELOW FOR MORE PUBLIC METHODS AND CLASSES | 136 | // SEE BELOW FOR MORE PUBLIC METHODS AND CLASSES |
| 159 | private: | 137 | private: |
| 160 | - // These structures are private but we need to define them before | ||
| 161 | - // the public Config classes. | 138 | + // These structures are private but we need to define them before the public Config classes. |
| 162 | struct CopyAttachmentFrom | 139 | struct CopyAttachmentFrom |
| 163 | { | 140 | { |
| 164 | std::string path; | 141 | std::string path; |
| @@ -197,33 +174,27 @@ class QPDFJob | @@ -197,33 +174,27 @@ class QPDFJob | ||
| 197 | 174 | ||
| 198 | // Configuration classes are implemented in QPDFJob_config.cc. | 175 | // Configuration classes are implemented in QPDFJob_config.cc. |
| 199 | 176 | ||
| 200 | - // The config() method returns a shared pointer to a Config | ||
| 201 | - // object. The Config object contains methods that correspond with | ||
| 202 | - // qpdf command-line arguments. You can use a fluent interface to | ||
| 203 | - // configure a QPDFJob object that would do exactly the same thing | ||
| 204 | - // as a specific qpdf command. The example qpdf-job.cc contains an | ||
| 205 | - // example of this usage. You can also use initializeFromJson or | ||
| 206 | - // initializeFromArgv to initialize a QPDFJob object. | 177 | + // The config() method returns a shared pointer to a Config object. The Config object contains |
| 178 | + // methods that correspond with qpdf command-line arguments. You can use a fluent interface to | ||
| 179 | + // configure a QPDFJob object that would do exactly the same thing as a specific qpdf command. | ||
| 180 | + // The example qpdf-job.cc contains an example of this usage. You can also use | ||
| 181 | + // initializeFromJson or initializeFromArgv to initialize a QPDFJob object. | ||
| 207 | 182 | ||
| 208 | // Notes about the Config methods: | 183 | // Notes about the Config methods: |
| 209 | // | 184 | // |
| 210 | - // * Most of the method declarations are automatically generated | ||
| 211 | - // in header files that are included within the class | ||
| 212 | - // definitions. They correspond in predictable ways to the | ||
| 213 | - // command-line arguments and are generated from the same code | ||
| 214 | - // that generates the command-line argument parsing code. | 185 | + // * Most of the method declarations are automatically generated in header files that are |
| 186 | + // included within the class definitions. They correspond in predictable ways to the | ||
| 187 | + // command-line arguments and are generated from the same code that generates the command-line | ||
| 188 | + // argument parsing code. | ||
| 215 | // | 189 | // |
| 216 | - // * Methods return pointers, rather than references, to | ||
| 217 | - // configuration objects. References might feel more familiar to | ||
| 218 | - // users of fluent interfaces, so why do we use pointers? The | ||
| 219 | - // main methods that create them return smart pointers so that | ||
| 220 | - // users can initialize them when needed, which you can't do | ||
| 221 | - // with references. Returning pointers instead of references | ||
| 222 | - // makes for a more uniform interface. | ||
| 223 | - | ||
| 224 | - // Maintainer documentation: see the section in README-maintainer | ||
| 225 | - // called "HOW TO ADD A COMMAND-LINE ARGUMENT", which contains | ||
| 226 | - // references to additional places in the documentation. | 190 | + // * Methods return pointers, rather than references, to configuration objects. References |
| 191 | + // might feel more familiar to users of fluent interfaces, so why do we use pointers? The | ||
| 192 | + // main methods that create them return smart pointers so that users can initialize them when | ||
| 193 | + // needed, which you can't do with references. Returning pointers instead of references makes | ||
| 194 | + // for a more uniform interface. | ||
| 195 | + | ||
| 196 | + // Maintainer documentation: see the section in README-maintainer called "HOW TO ADD A | ||
| 197 | + // COMMAND-LINE ARGUMENT", which contains references to additional places in the documentation. | ||
| 227 | 198 | ||
| 228 | class Config; | 199 | class Config; |
| 229 | 200 | ||
| @@ -374,13 +345,11 @@ class QPDFJob | @@ -374,13 +345,11 @@ class QPDFJob | ||
| 374 | QPDFJob& o; | 345 | QPDFJob& o; |
| 375 | }; | 346 | }; |
| 376 | 347 | ||
| 377 | - // Return a top-level configuration item. See CONFIGURATION above | ||
| 378 | - // for details. If an invalid configuration is created (such as | ||
| 379 | - // supplying contradictory options, omitting an input file, etc.), | ||
| 380 | - // QPDFUsage is thrown. Note that error messages are CLI-centric, | ||
| 381 | - // but you can map them into config calls. For example, if an | ||
| 382 | - // exception tells you to use the --some-option flag, you should | ||
| 383 | - // call config()->someOption() instead. | 348 | + // Return a top-level configuration item. See CONFIGURATION above for details. If an invalid |
| 349 | + // configuration is created (such as supplying contradictory options, omitting an input file, | ||
| 350 | + // etc.), QPDFUsage is thrown. Note that error messages are CLI-centric, but you can map them | ||
| 351 | + // into config calls. For example, if an exception tells you to use the --some-option flag, you | ||
| 352 | + // should call config()->someOption() instead. | ||
| 384 | QPDF_DLL | 353 | QPDF_DLL |
| 385 | std::shared_ptr<Config> config(); | 354 | std::shared_ptr<Config> config(); |
| 386 | 355 | ||
| @@ -388,33 +357,27 @@ class QPDFJob | @@ -388,33 +357,27 @@ class QPDFJob | ||
| 388 | QPDF_DLL | 357 | QPDF_DLL |
| 389 | void run(); | 358 | void run(); |
| 390 | 359 | ||
| 391 | - // The following two methods allow a job to be run in two stages - creation | ||
| 392 | - // of a QPDF object and writing of the QPDF object. This allows the QPDF | ||
| 393 | - // object to be modified prior to writing it out. See | ||
| 394 | - // examples/qpdfjob-remove-annotations for an illustration of its use. | 360 | + // The following two methods allow a job to be run in two stages - creation of a QPDF object and |
| 361 | + // writing of the QPDF object. This allows the QPDF object to be modified prior to writing it | ||
| 362 | + // out. See examples/qpdfjob-remove-annotations for an illustration of its use. | ||
| 395 | 363 | ||
| 396 | - // Run the first stage of the job. Return a nullptr if the configuration is | ||
| 397 | - // not valid. | 364 | + // Run the first stage of the job. Return a nullptr if the configuration is not valid. |
| 398 | QPDF_DLL | 365 | QPDF_DLL |
| 399 | std::unique_ptr<QPDF> createQPDF(); | 366 | std::unique_ptr<QPDF> createQPDF(); |
| 400 | 367 | ||
| 401 | - // Run the second stage of the job. Do nothing if a nullptr is passed as | ||
| 402 | - // parameter. | 368 | + // Run the second stage of the job. Do nothing if a nullptr is passed as parameter. |
| 403 | QPDF_DLL | 369 | QPDF_DLL |
| 404 | void writeQPDF(QPDF& qpdf); | 370 | void writeQPDF(QPDF& qpdf); |
| 405 | 371 | ||
| 406 | - // CHECK STATUS -- these methods provide information known after | ||
| 407 | - // run() is called. | 372 | + // CHECK STATUS -- these methods provide information known after run() is called. |
| 408 | 373 | ||
| 409 | QPDF_DLL | 374 | QPDF_DLL |
| 410 | bool hasWarnings() const; | 375 | bool hasWarnings() const; |
| 411 | 376 | ||
| 412 | - // Return one of the EXIT_* constants defined at the top of the | ||
| 413 | - // class declaration. This may be called after run() when run() | ||
| 414 | - // did not throw an exception. Takes into consideration whether | ||
| 415 | - // isEncrypted or requiresPassword was called. Note that this | ||
| 416 | - // function does not know whether run() threw an exception, so | ||
| 417 | - // code that uses this to determine how to exit should explicitly | 377 | + // Return one of the EXIT_* constants defined at the top of the class declaration. This may be |
| 378 | + // called after run() when run() did not throw an exception. Takes into consideration whether | ||
| 379 | + // isEncrypted or requiresPassword was called. Note that this function does not know whether | ||
| 380 | + // run() threw an exception, so code that uses this to determine how to exit should explicitly | ||
| 418 | // use EXIT_ERROR if run() threw an exception. | 381 | // use EXIT_ERROR if run() threw an exception. |
| 419 | QPDF_DLL | 382 | QPDF_DLL |
| 420 | int getExitCode() const; | 383 | int getExitCode() const; |
| @@ -423,24 +386,22 @@ class QPDFJob | @@ -423,24 +386,22 @@ class QPDFJob | ||
| 423 | QPDF_DLL | 386 | QPDF_DLL |
| 424 | unsigned long getEncryptionStatus(); | 387 | unsigned long getEncryptionStatus(); |
| 425 | 388 | ||
| 426 | - // HELPER FUNCTIONS -- methods useful for calling in handlers that | ||
| 427 | - // interact with QPDFJob during run or initialization. | 389 | + // HELPER FUNCTIONS -- methods useful for calling in handlers that interact with QPDFJob during |
| 390 | + // run or initialization. | ||
| 428 | 391 | ||
| 429 | - // If in verbose mode, call the given function, passing in the | ||
| 430 | - // output stream and message prefix. | 392 | + // If in verbose mode, call the given function, passing in the output stream and message prefix. |
| 431 | QPDF_DLL | 393 | QPDF_DLL |
| 432 | void doIfVerbose(std::function<void(Pipeline&, std::string const& prefix)> fn); | 394 | void doIfVerbose(std::function<void(Pipeline&, std::string const& prefix)> fn); |
| 433 | 395 | ||
| 434 | - // Provide a string that is the help information ("schema" for the | ||
| 435 | - // qpdf-specific JSON object) for the specified version of JSON | ||
| 436 | - // output. | 396 | + // Provide a string that is the help information ("schema" for the qpdf-specific JSON object) |
| 397 | + // for the specified version of JSON output. | ||
| 437 | QPDF_DLL | 398 | QPDF_DLL |
| 438 | static std::string json_out_schema(int version); | 399 | static std::string json_out_schema(int version); |
| 439 | 400 | ||
| 440 | [[deprecated("use json_out_schema(version)")]] static std::string QPDF_DLL json_out_schema_v1(); | 401 | [[deprecated("use json_out_schema(version)")]] static std::string QPDF_DLL json_out_schema_v1(); |
| 441 | 402 | ||
| 442 | - // Provide a string that is the help information for specified | ||
| 443 | - // version of JSON format for QPDFJob. | 403 | + // Provide a string that is the help information for specified version of JSON format for |
| 404 | + // QPDFJob. | ||
| 444 | QPDF_DLL | 405 | QPDF_DLL |
| 445 | static std::string job_json_schema(int version); | 406 | static std::string job_json_schema(int version); |
| 446 | 407 |
include/qpdf/QPDFObjectHandle.hh
| @@ -66,9 +66,8 @@ class QPDFObjectHandle | @@ -66,9 +66,8 @@ class QPDFObjectHandle | ||
| 66 | friend class QPDFParser; | 66 | friend class QPDFParser; |
| 67 | 67 | ||
| 68 | public: | 68 | public: |
| 69 | - // This class is used by replaceStreamData. It provides an | ||
| 70 | - // alternative way of associating stream data with a stream. See | ||
| 71 | - // comments on replaceStreamData and newStream for additional | 69 | + // This class is used by replaceStreamData. It provides an alternative way of associating |
| 70 | + // stream data with a stream. See comments on replaceStreamData and newStream for additional | ||
| 72 | // details. | 71 | // details. |
| 73 | class QPDF_DLL_CLASS StreamDataProvider | 72 | class QPDF_DLL_CLASS StreamDataProvider |
| 74 | { | 73 | { |
| @@ -78,55 +77,40 @@ class QPDFObjectHandle | @@ -78,55 +77,40 @@ class QPDFObjectHandle | ||
| 78 | 77 | ||
| 79 | QPDF_DLL | 78 | QPDF_DLL |
| 80 | virtual ~StreamDataProvider(); | 79 | virtual ~StreamDataProvider(); |
| 81 | - // The implementation of this function must write stream data | ||
| 82 | - // to the given pipeline. The stream data must conform to | ||
| 83 | - // whatever filters are explicitly associated with the stream. | ||
| 84 | - // QPDFWriter may, in some cases, add compression, but if it | ||
| 85 | - // does, it will update the filters as needed. Every call to | ||
| 86 | - // provideStreamData for a given stream must write the same | ||
| 87 | - // data. Note that, when writing linearized files, qpdf will | ||
| 88 | - // call your provideStreamData twice, and if it generates | ||
| 89 | - // different output, you risk generating invalid output or | ||
| 90 | - // having qpdf throw an exception. The object ID and | ||
| 91 | - // generation passed to this method are those that belong to | ||
| 92 | - // the stream on behalf of which the provider is called. They | ||
| 93 | - // may be ignored or used by the implementation for indexing | ||
| 94 | - // or other purposes. This information is made available just | ||
| 95 | - // to make it more convenient to use a single | ||
| 96 | - // StreamDataProvider object to provide data for multiple | ||
| 97 | - // streams. | 80 | + // The implementation of this function must write stream data to the given pipeline. The |
| 81 | + // stream data must conform to whatever filters are explicitly associated with the stream. | ||
| 82 | + // QPDFWriter may, in some cases, add compression, but if it does, it will update the | ||
| 83 | + // filters as needed. Every call to provideStreamData for a given stream must write the same | ||
| 84 | + // data. Note that, when writing linearized files, qpdf will call your provideStreamData | ||
| 85 | + // twice, and if it generates different output, you risk generating invalid output or having | ||
| 86 | + // qpdf throw an exception. The object ID and generation passed to this method are those | ||
| 87 | + // that belong to the stream on behalf of which the provider is called. They may be ignored | ||
| 88 | + // or used by the implementation for indexing or other purposes. This information is made | ||
| 89 | + // available just to make it more convenient to use a single StreamDataProvider object to | ||
| 90 | + // provide data for multiple streams. | ||
| 98 | 91 | ||
| 99 | // A few things to keep in mind: | 92 | // A few things to keep in mind: |
| 100 | // | 93 | // |
| 101 | - // * Stream data providers must not modify any objects since | ||
| 102 | - // they may be called after some parts of the file have | ||
| 103 | - // already been written. | 94 | + // * Stream data providers must not modify any objects since they may be called after some |
| 95 | + // parts of the file have already been written. | ||
| 104 | // | 96 | // |
| 105 | - // * Since qpdf may call provideStreamData multiple times when | ||
| 106 | - // writing linearized files, if the work done by your stream | ||
| 107 | - // data provider is slow or computationally intensive, you | 97 | + // * Since qpdf may call provideStreamData multiple times when writing linearized files, if |
| 98 | + // the work done by your stream data provider is slow or computationally intensive, you | ||
| 108 | // might want to implement your own cache. | 99 | // might want to implement your own cache. |
| 109 | // | 100 | // |
| 110 | - // * Once you have called replaceStreamData, the original | ||
| 111 | - // stream data is no longer directly accessible from the | ||
| 112 | - // stream, but this is easy to work around by copying the | ||
| 113 | - // stream to a separate QPDF object. The qpdf library | ||
| 114 | - // implements this very efficiently without actually making | ||
| 115 | - // a copy of the stream data. You can find examples of this | ||
| 116 | - // pattern in some of the examples, including | ||
| 117 | - // pdf-custom-filter.cc and pdf-invert-images.cc. | ||
| 118 | - | ||
| 119 | - // Prior to qpdf 10.0.0, it was not possible to handle errors | ||
| 120 | - // the way pipeStreamData does or to pass back success. | ||
| 121 | - // Starting in qpdf 10.0.0, those capabilities have been added | ||
| 122 | - // by allowing an alternative provideStreamData to be | ||
| 123 | - // implemented. You must implement at least one of the | ||
| 124 | - // versions of provideStreamData below. If you implement the | ||
| 125 | - // version that supports retry and returns a value, you should | ||
| 126 | - // pass true as the value of supports_retry in the base class | ||
| 127 | - // constructor. This will cause the library to call that | ||
| 128 | - // version of the method, which should also return a boolean | ||
| 129 | - // indicating whether it ran without errors. | 101 | + // * Once you have called replaceStreamData, the original stream data is no longer directly |
| 102 | + // accessible from the stream, but this is easy to work around by copying the stream to | ||
| 103 | + // a separate QPDF object. The qpdf library implements this very efficiently without | ||
| 104 | + // actually making a copy of the stream data. You can find examples of this pattern in | ||
| 105 | + // some of the examples, including pdf-custom-filter.cc and pdf-invert-images.cc. | ||
| 106 | + | ||
| 107 | + // Prior to qpdf 10.0.0, it was not possible to handle errors the way pipeStreamData does or | ||
| 108 | + // to pass back success. Starting in qpdf 10.0.0, those capabilities have been added by | ||
| 109 | + // allowing an alternative provideStreamData to be implemented. You must implement at least | ||
| 110 | + // one of the versions of provideStreamData below. If you implement the version that | ||
| 111 | + // supports retry and returns a value, you should pass true as the value of supports_retry | ||
| 112 | + // in the base class constructor. This will cause the library to call that version of the | ||
| 113 | + // method, which should also return a boolean indicating whether it ran without errors. | ||
| 130 | QPDF_DLL | 114 | QPDF_DLL |
| 131 | virtual void provideStreamData(QPDFObjGen const& og, Pipeline* pipeline); | 115 | virtual void provideStreamData(QPDFObjGen const& og, Pipeline* pipeline); |
| 132 | QPDF_DLL | 116 | QPDF_DLL |
| @@ -142,41 +126,31 @@ class QPDFObjectHandle | @@ -142,41 +126,31 @@ class QPDFObjectHandle | ||
| 142 | bool supports_retry; | 126 | bool supports_retry; |
| 143 | }; | 127 | }; |
| 144 | 128 | ||
| 145 | - // The TokenFilter class provides a way to filter content streams | ||
| 146 | - // in a lexically aware fashion. TokenFilters can be attached to | ||
| 147 | - // streams using the addTokenFilter or addContentTokenFilter | ||
| 148 | - // methods or can be applied on the spot by filterPageContents. | ||
| 149 | - // You may also use Pl_QPDFTokenizer directly if you need full | ||
| 150 | - // control. | 129 | + // The TokenFilter class provides a way to filter content streams in a lexically aware fashion. |
| 130 | + // TokenFilters can be attached to streams using the addTokenFilter or addContentTokenFilter | ||
| 131 | + // methods or can be applied on the spot by filterPageContents. You may also use | ||
| 132 | + // Pl_QPDFTokenizer directly if you need full control. | ||
| 151 | // | 133 | // |
| 152 | - // The handleToken method is called for each token, including the | ||
| 153 | - // eof token, and then handleEOF is called at the very end. | ||
| 154 | - // Handlers may call write (or writeToken) to pass data | ||
| 155 | - // downstream. Please see examples/pdf-filter-tokens.cc and | ||
| 156 | - // examples/pdf-count-strings.cc for examples of using | ||
| 157 | - // TokenFilters. | 134 | + // The handleToken method is called for each token, including the eof token, and then handleEOF |
| 135 | + // is called at the very end. Handlers may call write (or writeToken) to pass data downstream. | ||
| 136 | + // Please see examples/pdf-filter-tokens.cc and examples/pdf-count-strings.cc for examples of | ||
| 137 | + // using TokenFilters. | ||
| 158 | // | 138 | // |
| 159 | - // Please note that when you call token.getValue() on a token of | ||
| 160 | - // type tt_string or tt_name, you get the canonical, "parsed" | ||
| 161 | - // representation of the token. For a string, this means that | ||
| 162 | - // there are no delimiters, and for a name, it means that all | ||
| 163 | - // escaping (# followed by two hex digits) has been resolved. | ||
| 164 | - // qpdf's internal representation of a name includes the leading | ||
| 165 | - // slash. As such, you can't write the value of token.getValue() | ||
| 166 | - // directly to output that is supposed to be valid PDF syntax. If | ||
| 167 | - // you want to do that, you need to call writeToken() instead, or | ||
| 168 | - // you can retrieve the token as it appeared in the input with | ||
| 169 | - // token.getRawValue(). To construct a new string or name token | ||
| 170 | - // from a canonical representation, use | 139 | + // Please note that when you call token.getValue() on a token of type tt_string or tt_name, you |
| 140 | + // get the canonical, "parsed" representation of the token. For a string, this means that there | ||
| 141 | + // are no delimiters, and for a name, it means that all escaping (# followed by two hex digits) | ||
| 142 | + // has been resolved. qpdf's internal representation of a name includes the leading slash. As | ||
| 143 | + // such, you can't write the value of token.getValue() directly to output that is supposed to be | ||
| 144 | + // valid PDF syntax. If you want to do that, you need to call writeToken() instead, or you can | ||
| 145 | + // retrieve the token as it appeared in the input with token.getRawValue(). To construct a new | ||
| 146 | + // string or name token from a canonical representation, use | ||
| 171 | // QPDFTokenizer::Token(QPDFTokenizer::tt_string, "parsed-str") or | 147 | // QPDFTokenizer::Token(QPDFTokenizer::tt_string, "parsed-str") or |
| 172 | // QPDFTokenizer::Token(QPDFTokenizer::tt_name, | 148 | // QPDFTokenizer::Token(QPDFTokenizer::tt_name, |
| 173 | - // "/Canonical-Name"). Tokens created this way won't have a | ||
| 174 | - // PDF-syntax raw value, but you can still write them with | ||
| 175 | - // writeToken(). Example: | 149 | + // "/Canonical-Name"). Tokens created this way won't have a PDF-syntax raw value, but you can |
| 150 | + // still write them with writeToken(). Example: | ||
| 176 | // writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_name, "/text/plain")) | 151 | // writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_name, "/text/plain")) |
| 177 | // would write `/text#2fplain`, and | 152 | // would write `/text#2fplain`, and |
| 178 | - // writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_string, "a\\(b")) | ||
| 179 | - // would write `(a\(b)`. | 153 | + // writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_string, "a\\(b")) would write `(a\(b)`. |
| 180 | class QPDF_DLL_CLASS TokenFilter | 154 | class QPDF_DLL_CLASS TokenFilter |
| 181 | { | 155 | { |
| 182 | public: | 156 | public: |
| @@ -215,8 +189,8 @@ class QPDFObjectHandle | @@ -215,8 +189,8 @@ class QPDFObjectHandle | ||
| 215 | Pipeline* pipeline; | 189 | Pipeline* pipeline; |
| 216 | }; | 190 | }; |
| 217 | 191 | ||
| 218 | - // This class is used by parse to decrypt strings when reading an | ||
| 219 | - // object that contains encrypted strings. | 192 | + // This class is used by parse to decrypt strings when reading an object that contains encrypted |
| 193 | + // strings. | ||
| 220 | class StringDecrypter | 194 | class StringDecrypter |
| 221 | { | 195 | { |
| 222 | public: | 196 | public: |
| @@ -225,9 +199,8 @@ class QPDFObjectHandle | @@ -225,9 +199,8 @@ class QPDFObjectHandle | ||
| 225 | virtual void decryptString(std::string& val) = 0; | 199 | virtual void decryptString(std::string& val) = 0; |
| 226 | }; | 200 | }; |
| 227 | 201 | ||
| 228 | - // This class is used by parsePageContents. Callers must | ||
| 229 | - // instantiate a subclass of this with handlers defined to accept | ||
| 230 | - // QPDFObjectHandles that are parsed from the stream. | 202 | + // This class is used by parsePageContents. Callers must instantiate a subclass of this with |
| 203 | + // handlers defined to accept QPDFObjectHandles that are parsed from the stream. | ||
| 231 | class QPDF_DLL_CLASS ParserCallbacks | 204 | class QPDF_DLL_CLASS ParserCallbacks |
| 232 | { | 205 | { |
| 233 | public: | 206 | public: |
| @@ -241,17 +214,14 @@ class QPDFObjectHandle | @@ -241,17 +214,14 @@ class QPDFObjectHandle | ||
| 241 | 214 | ||
| 242 | virtual void handleEOF() = 0; | 215 | virtual void handleEOF() = 0; |
| 243 | 216 | ||
| 244 | - // Override this if you want to know the full size of the | ||
| 245 | - // contents, possibly after concatenation of multiple streams. | ||
| 246 | - // This is called before the first call to handleObject. | 217 | + // Override this if you want to know the full size of the contents, possibly after |
| 218 | + // concatenation of multiple streams. This is called before the first call to handleObject. | ||
| 247 | QPDF_DLL | 219 | QPDF_DLL |
| 248 | virtual void contentSize(size_t); | 220 | virtual void contentSize(size_t); |
| 249 | 221 | ||
| 250 | protected: | 222 | protected: |
| 251 | - // Implementors may call this method during parsing to | ||
| 252 | - // terminate parsing early. This method throws an exception | ||
| 253 | - // that is caught by parsePageContents, so its effect is | ||
| 254 | - // immediate. | 223 | + // Implementors may call this method during parsing to terminate parsing early. This method |
| 224 | + // throws an exception that is caught by parsePageContents, so its effect is immediate. | ||
| 255 | QPDF_DLL | 225 | QPDF_DLL |
| 256 | void terminateParsing(); | 226 | void terminateParsing(); |
| 257 | }; | 227 | }; |
| @@ -281,9 +251,8 @@ class QPDFObjectHandle | @@ -281,9 +251,8 @@ class QPDFObjectHandle | ||
| 281 | double ury; | 251 | double ury; |
| 282 | }; | 252 | }; |
| 283 | 253 | ||
| 284 | - // Convenience object for transformation matrices. See also | ||
| 285 | - // QPDFMatrix. Unfortunately we can't replace this with QPDFMatrix | ||
| 286 | - // because QPDFMatrix's default constructor creates the identity | 254 | + // Convenience object for transformation matrices. See also QPDFMatrix. Unfortunately we can't |
| 255 | + // replace this with QPDFMatrix because QPDFMatrix's default constructor creates the identity | ||
| 287 | // transform matrix and this one is all zeroes. | 256 | // transform matrix and this one is all zeroes. |
| 288 | class Matrix | 257 | class Matrix |
| 289 | { | 258 | { |
| @@ -324,25 +293,22 @@ class QPDFObjectHandle | @@ -324,25 +293,22 @@ class QPDFObjectHandle | ||
| 324 | QPDF_DLL | 293 | QPDF_DLL |
| 325 | inline bool isInitialized() const; | 294 | inline bool isInitialized() const; |
| 326 | 295 | ||
| 327 | - // This method returns true if the QPDFObjectHandle objects point | ||
| 328 | - // to exactly the same underlying object, meaning that changes to | ||
| 329 | - // one are reflected in the other, or "if you paint one, the other | ||
| 330 | - // one changes color." This does not perform a structural | ||
| 331 | - // comparison of the contents of the objects. | 296 | + // This method returns true if the QPDFObjectHandle objects point to exactly the same underlying |
| 297 | + // object, meaning that changes to one are reflected in the other, or "if you paint one, the | ||
| 298 | + // other one changes color." This does not perform a structural comparison of the contents of | ||
| 299 | + // the objects. | ||
| 332 | QPDF_DLL | 300 | QPDF_DLL |
| 333 | bool isSameObjectAs(QPDFObjectHandle const&) const; | 301 | bool isSameObjectAs(QPDFObjectHandle const&) const; |
| 334 | 302 | ||
| 335 | - // Return type code and type name of underlying object. These are | ||
| 336 | - // useful for doing rapid type tests (like switch statements) or | ||
| 337 | - // for testing and debugging. | 303 | + // Return type code and type name of underlying object. These are useful for doing rapid type |
| 304 | + // tests (like switch statements) or for testing and debugging. | ||
| 338 | QPDF_DLL | 305 | QPDF_DLL |
| 339 | qpdf_object_type_e getTypeCode(); | 306 | qpdf_object_type_e getTypeCode(); |
| 340 | QPDF_DLL | 307 | QPDF_DLL |
| 341 | char const* getTypeName(); | 308 | char const* getTypeName(); |
| 342 | 309 | ||
| 343 | - // Exactly one of these will return true for any initialized | ||
| 344 | - // object. Operator and InlineImage are only allowed in content | ||
| 345 | - // streams. | 310 | + // Exactly one of these will return true for any initialized object. Operator and InlineImage |
| 311 | + // are only allowed in content streams. | ||
| 346 | QPDF_DLL | 312 | QPDF_DLL |
| 347 | bool isBool(); | 313 | bool isBool(); |
| 348 | QPDF_DLL | 314 | QPDF_DLL |
| @@ -368,26 +334,22 @@ class QPDFObjectHandle | @@ -368,26 +334,22 @@ class QPDFObjectHandle | ||
| 368 | QPDF_DLL | 334 | QPDF_DLL |
| 369 | bool isReserved(); | 335 | bool isReserved(); |
| 370 | 336 | ||
| 371 | - // True for objects that are direct nulls. Does not attempt to | ||
| 372 | - // resolve objects. This is intended for internal use, but it can | ||
| 373 | - // be used as an efficient way to check for nulls that are not | 337 | + // True for objects that are direct nulls. Does not attempt to resolve objects. This is intended |
| 338 | + // for internal use, but it can be used as an efficient way to check for nulls that are not | ||
| 374 | // indirect objects. | 339 | // indirect objects. |
| 375 | QPDF_DLL | 340 | QPDF_DLL |
| 376 | bool isDirectNull() const; | 341 | bool isDirectNull() const; |
| 377 | 342 | ||
| 378 | - // This returns true in addition to the query for the specific | ||
| 379 | - // type for indirect objects. | 343 | + // This returns true in addition to the query for the specific type for indirect objects. |
| 380 | QPDF_DLL | 344 | QPDF_DLL |
| 381 | inline bool isIndirect() const; | 345 | inline bool isIndirect() const; |
| 382 | 346 | ||
| 383 | - // This returns true for indirect objects from a QPDF that has | ||
| 384 | - // been destroyed. Trying unparse such an object will throw a | ||
| 385 | - // logic_error. | 347 | + // This returns true for indirect objects from a QPDF that has been destroyed. Trying unparse |
| 348 | + // such an object will throw a logic_error. | ||
| 386 | QPDF_DLL | 349 | QPDF_DLL |
| 387 | bool isDestroyed(); | 350 | bool isDestroyed(); |
| 388 | 351 | ||
| 389 | - // True for everything except array, dictionary, stream, word, and | ||
| 390 | - // inline image. | 352 | + // True for everything except array, dictionary, stream, word, and inline image. |
| 391 | QPDF_DLL | 353 | QPDF_DLL |
| 392 | bool isScalar(); | 354 | bool isScalar(); |
| 393 | 355 | ||
| @@ -395,53 +357,44 @@ class QPDFObjectHandle | @@ -395,53 +357,44 @@ class QPDFObjectHandle | ||
| 395 | QPDF_DLL | 357 | QPDF_DLL |
| 396 | bool isNameAndEquals(std::string const& name); | 358 | bool isNameAndEquals(std::string const& name); |
| 397 | 359 | ||
| 398 | - // True if the object is a dictionary of the specified type and | ||
| 399 | - // subtype, if any. | 360 | + // True if the object is a dictionary of the specified type and subtype, if any. |
| 400 | QPDF_DLL | 361 | QPDF_DLL |
| 401 | bool isDictionaryOfType(std::string const& type, std::string const& subtype = ""); | 362 | bool isDictionaryOfType(std::string const& type, std::string const& subtype = ""); |
| 402 | 363 | ||
| 403 | - // True if the object is a stream of the specified type and | ||
| 404 | - // subtype, if any. | 364 | + // True if the object is a stream of the specified type and subtype, if any. |
| 405 | QPDF_DLL | 365 | QPDF_DLL |
| 406 | bool isStreamOfType(std::string const& type, std::string const& subtype = ""); | 366 | bool isStreamOfType(std::string const& type, std::string const& subtype = ""); |
| 407 | 367 | ||
| 408 | // Public factory methods | 368 | // Public factory methods |
| 409 | 369 | ||
| 410 | - // Wrap an object in an array if it is not already an array. This | ||
| 411 | - // is a helper for cases in which something in a PDF may either be | ||
| 412 | - // a single item or an array of items, which is a common idiom. | 370 | + // Wrap an object in an array if it is not already an array. This is a helper for cases in which |
| 371 | + // something in a PDF may either be a single item or an array of items, which is a common idiom. | ||
| 413 | QPDF_DLL | 372 | QPDF_DLL |
| 414 | QPDFObjectHandle wrapInArray(); | 373 | QPDFObjectHandle wrapInArray(); |
| 415 | 374 | ||
| 416 | - // Construct an object of any type from a string representation of | ||
| 417 | - // the object. Throws QPDFExc with an empty filename and an | ||
| 418 | - // offset into the string if there is an error. Any indirect | ||
| 419 | - // object syntax (obj gen R) will cause a logic_error exception to | ||
| 420 | - // be thrown. If object_description is provided, it will appear | ||
| 421 | - // in the message of any QPDFExc exception thrown for invalid | ||
| 422 | - // syntax. See also the global `operator ""_qpdf` defined below. | 375 | + // Construct an object of any type from a string representation of the object. Throws QPDFExc |
| 376 | + // with an empty filename and an offset into the string if there is an error. Any indirect | ||
| 377 | + // object syntax (obj gen R) will cause a logic_error exception to be thrown. If | ||
| 378 | + // object_description is provided, it will appear in the message of any QPDFExc exception thrown | ||
| 379 | + // for invalid syntax. See also the global `operator ""_qpdf` defined below. | ||
| 423 | QPDF_DLL | 380 | QPDF_DLL |
| 424 | static QPDFObjectHandle | 381 | static QPDFObjectHandle |
| 425 | parse(std::string const& object_str, std::string const& object_description = ""); | 382 | parse(std::string const& object_str, std::string const& object_description = ""); |
| 426 | 383 | ||
| 427 | - // Construct an object of any type from a string representation of | ||
| 428 | - // the object. Indirect object syntax (obj gen R) is allowed and | ||
| 429 | - // will create indirect references within the passed-in context. | ||
| 430 | - // If object_description is provided, it will appear in the | ||
| 431 | - // message of any QPDFExc exception thrown for invalid syntax. | ||
| 432 | - // Note that you can't parse an indirect object reference all by | ||
| 433 | - // itself as parse will stop at the end of the first complete | ||
| 434 | - // object, which will just be the first number and will report | ||
| 435 | - // that there is trailing data at the end of the string. | 384 | + // Construct an object of any type from a string representation of the object. Indirect object |
| 385 | + // syntax (obj gen R) is allowed and will create indirect references within the passed-in | ||
| 386 | + // context. If object_description is provided, it will appear in the message of any QPDFExc | ||
| 387 | + // exception thrown for invalid syntax. Note that you can't parse an indirect object reference | ||
| 388 | + // all by itself as parse will stop at the end of the first complete object, which will just be | ||
| 389 | + // the first number and will report that there is trailing data at the end of the string. | ||
| 436 | QPDF_DLL | 390 | QPDF_DLL |
| 437 | static QPDFObjectHandle | 391 | static QPDFObjectHandle |
| 438 | parse(QPDF* context, std::string const& object_str, std::string const& object_description = ""); | 392 | parse(QPDF* context, std::string const& object_str, std::string const& object_description = ""); |
| 439 | 393 | ||
| 440 | - // Construct an object as above by reading from the given | ||
| 441 | - // InputSource at its current position and using the tokenizer you | ||
| 442 | - // supply. Indirect objects and encrypted strings are permitted. | ||
| 443 | - // This method was intended to be called by QPDF for parsing | ||
| 444 | - // objects that are ready from the object's input stream. | 394 | + // Construct an object as above by reading from the given InputSource at its current position |
| 395 | + // and using the tokenizer you supply. Indirect objects and encrypted strings are permitted. | ||
| 396 | + // This method was intended to be called by QPDF for parsing objects that are ready from the | ||
| 397 | + // object's input stream. | ||
| 445 | QPDF_DLL | 398 | QPDF_DLL |
| 446 | static QPDFObjectHandle parse( | 399 | static QPDFObjectHandle parse( |
| 447 | std::shared_ptr<InputSource> input, | 400 | std::shared_ptr<InputSource> input, |
| @@ -451,60 +404,46 @@ class QPDFObjectHandle | @@ -451,60 +404,46 @@ class QPDFObjectHandle | ||
| 451 | StringDecrypter* decrypter, | 404 | StringDecrypter* decrypter, |
| 452 | QPDF* context); | 405 | QPDF* context); |
| 453 | 406 | ||
| 454 | - // Return the offset where the object was found when parsed. A | ||
| 455 | - // negative value means that the object was created without | ||
| 456 | - // parsing. If the object is in a stream, the offset is from the | ||
| 457 | - // beginning of the stream. Otherwise, the offset is from the | ||
| 458 | - // beginning of the file. | 407 | + // Return the offset where the object was found when parsed. A negative value means that the |
| 408 | + // object was created without parsing. If the object is in a stream, the offset is from the | ||
| 409 | + // beginning of the stream. Otherwise, the offset is from the beginning of the file. | ||
| 459 | QPDF_DLL | 410 | QPDF_DLL |
| 460 | qpdf_offset_t getParsedOffset(); | 411 | qpdf_offset_t getParsedOffset(); |
| 461 | 412 | ||
| 462 | - // Older method: stream_or_array should be the value of /Contents | ||
| 463 | - // from a page object. It's more convenient to just call | ||
| 464 | - // QPDFPageObjectHelper::parsePageContents on the page object, and | ||
| 465 | - // error messages will also be more useful because the page object | ||
| 466 | - // information will be known. | 413 | + // Older method: stream_or_array should be the value of /Contents from a page object. It's more |
| 414 | + // convenient to just call QPDFPageObjectHelper::parsePageContents on the page object, and error | ||
| 415 | + // messages will also be more useful because the page object information will be known. | ||
| 467 | QPDF_DLL | 416 | QPDF_DLL |
| 468 | static void parseContentStream(QPDFObjectHandle stream_or_array, ParserCallbacks* callbacks); | 417 | static void parseContentStream(QPDFObjectHandle stream_or_array, ParserCallbacks* callbacks); |
| 469 | 418 | ||
| 470 | - // When called on a stream or stream array that is some page's | ||
| 471 | - // content streams, do the same as pipePageContents. This method | ||
| 472 | - // is a lower level way to do what | ||
| 473 | - // QPDFPageObjectHelper::pipePageContents does, but it allows you | ||
| 474 | - // to perform this operation on a contents object that is | ||
| 475 | - // disconnected from a page object. The description argument | ||
| 476 | - // should describe the containing page and is used in error | ||
| 477 | - // messages. The all_description argument is initialized to | ||
| 478 | - // something that could be used to describe the result of the | ||
| 479 | - // pipeline. It is the description amended with the identifiers of | ||
| 480 | - // the underlying objects. Please note that if there is an array | ||
| 481 | - // of content streams, p->finish() is called after each stream. If | ||
| 482 | - // you pass a pipeline that doesn't allow write() to be called | ||
| 483 | - // after finish(), you can wrap it in an instance of | ||
| 484 | - // Pl_Concatenate and then call manualFinish() on the | ||
| 485 | - // Pl_Concatenate pipeline at the end. | 419 | + // When called on a stream or stream array that is some page's content streams, do the same as |
| 420 | + // pipePageContents. This method is a lower level way to do what | ||
| 421 | + // QPDFPageObjectHelper::pipePageContents does, but it allows you to perform this operation on a | ||
| 422 | + // contents object that is disconnected from a page object. The description argument should | ||
| 423 | + // describe the containing page and is used in error messages. The all_description argument is | ||
| 424 | + // initialized to something that could be used to describe the result of the pipeline. It is the | ||
| 425 | + // description amended with the identifiers of the underlying objects. Please note that if there | ||
| 426 | + // is an array of content streams, p->finish() is called after each stream. If you pass a | ||
| 427 | + // pipeline that doesn't allow write() to be called after finish(), you can wrap it in an | ||
| 428 | + // instance of Pl_Concatenate and then call manualFinish() on the Pl_Concatenate pipeline at the | ||
| 429 | + // end. | ||
| 486 | QPDF_DLL | 430 | QPDF_DLL |
| 487 | void | 431 | void |
| 488 | pipeContentStreams(Pipeline* p, std::string const& description, std::string& all_description); | 432 | pipeContentStreams(Pipeline* p, std::string const& description, std::string& all_description); |
| 489 | 433 | ||
| 490 | - // As of qpdf 8, it is possible to add custom token filters to a | ||
| 491 | - // stream. The tokenized stream data is passed through the token | ||
| 492 | - // filter after all original filters but before content stream | ||
| 493 | - // normalization if requested. This is a low-level interface to | ||
| 494 | - // add it to a stream. You will usually want to call | ||
| 495 | - // QPDFPageObjectHelper::addContentTokenFilter instead, which can | ||
| 496 | - // be applied to a page object, and which will automatically | ||
| 497 | - // handle the case of pages whose contents are split across | ||
| 498 | - // multiple streams. | 434 | + // As of qpdf 8, it is possible to add custom token filters to a stream. The tokenized stream |
| 435 | + // data is passed through the token filter after all original filters but before content stream | ||
| 436 | + // normalization if requested. This is a low-level interface to add it to a stream. You will | ||
| 437 | + // usually want to call QPDFPageObjectHelper::addContentTokenFilter instead, which can be | ||
| 438 | + // applied to a page object, and which will automatically handle the case of pages whose | ||
| 439 | + // contents are split across multiple streams. | ||
| 499 | QPDF_DLL | 440 | QPDF_DLL |
| 500 | void addTokenFilter(std::shared_ptr<TokenFilter> token_filter); | 441 | void addTokenFilter(std::shared_ptr<TokenFilter> token_filter); |
| 501 | 442 | ||
| 502 | - // Legacy helpers for parsing content streams. These methods are | ||
| 503 | - // not going away, but newer code should call the correspond | ||
| 504 | - // methods in QPDFPageObjectHelper instead. The specification and | ||
| 505 | - // behavior of these methods are the same as the identically named | ||
| 506 | - // methods in that class, but newer functionality will be added | ||
| 507 | - // there. | 443 | + // Legacy helpers for parsing content streams. These methods are not going away, but newer code |
| 444 | + // should call the correspond methods in QPDFPageObjectHelper instead. The specification and | ||
| 445 | + // behavior of these methods are the same as the identically named methods in that class, but | ||
| 446 | + // newer functionality will be added there. | ||
| 508 | QPDF_DLL | 447 | QPDF_DLL |
| 509 | void parsePageContents(ParserCallbacks* callbacks); | 448 | void parsePageContents(ParserCallbacks* callbacks); |
| 510 | QPDF_DLL | 449 | QPDF_DLL |
| @@ -516,13 +455,12 @@ class QPDFObjectHandle | @@ -516,13 +455,12 @@ class QPDFObjectHandle | ||
| 516 | void addContentTokenFilter(std::shared_ptr<TokenFilter> token_filter); | 455 | void addContentTokenFilter(std::shared_ptr<TokenFilter> token_filter); |
| 517 | // End legacy content stream helpers | 456 | // End legacy content stream helpers |
| 518 | 457 | ||
| 519 | - // Called on a stream to filter the stream as if it were page | ||
| 520 | - // contents. This can be used to apply a TokenFilter to a form | ||
| 521 | - // XObject, whose data is in the same format as a content stream. | 458 | + // Called on a stream to filter the stream as if it were page contents. This can be used to |
| 459 | + // apply a TokenFilter to a form XObject, whose data is in the same format as a content stream. | ||
| 522 | QPDF_DLL | 460 | QPDF_DLL |
| 523 | void filterAsContents(TokenFilter* filter, Pipeline* next = nullptr); | 461 | void filterAsContents(TokenFilter* filter, Pipeline* next = nullptr); |
| 524 | - // Called on a stream to parse the stream as page contents. This | ||
| 525 | - // can be used to parse a form XObject. | 462 | + // Called on a stream to parse the stream as page contents. This can be used to parse a form |
| 463 | + // XObject. | ||
| 526 | QPDF_DLL | 464 | QPDF_DLL |
| 527 | void parseAsContents(ParserCallbacks* callbacks); | 465 | void parseAsContents(ParserCallbacks* callbacks); |
| 528 | 466 | ||
| @@ -538,32 +476,25 @@ class QPDFObjectHandle | @@ -538,32 +476,25 @@ class QPDFObjectHandle | ||
| 538 | QPDF_DLL | 476 | QPDF_DLL |
| 539 | static QPDFObjectHandle | 477 | static QPDFObjectHandle |
| 540 | newReal(double value, int decimal_places = 0, bool trim_trailing_zeroes = true); | 478 | newReal(double value, int decimal_places = 0, bool trim_trailing_zeroes = true); |
| 541 | - // Note about name objects: qpdf's internal representation of a | ||
| 542 | - // PDF name is a sequence of bytes, excluding the NUL character, | ||
| 543 | - // and starting with a slash. Name objects as represented in the | ||
| 544 | - // PDF specification can contain characters escaped with #, but | ||
| 545 | - // such escaping is not of concern when calling QPDFObjectHandle | ||
| 546 | - // methods not directly relating to parsing. For example, | ||
| 547 | - // newName("/text/plain").getName() and | ||
| 548 | - // parse("/text#2fplain").getName() both return "/text/plain", | ||
| 549 | - // while newName("/text/plain").unparse() and | ||
| 550 | - // parse("/text#2fplain").unparse() both return "/text#2fplain". | ||
| 551 | - // When working with the qpdf API for creating, retrieving, and | ||
| 552 | - // modifying objects, you want to work with the internal, | ||
| 553 | - // canonical representation. For names containing alphanumeric | ||
| 554 | - // characters, dashes, and underscores, there is no difference | ||
| 555 | - // between the two representations. For a lengthy discussion, see | 479 | + // Note about name objects: qpdf's internal representation of a PDF name is a sequence of bytes, |
| 480 | + // excluding the NUL character, and starting with a slash. Name objects as represented in the | ||
| 481 | + // PDF specification can contain characters escaped with #, but such escaping is not of concern | ||
| 482 | + // when calling QPDFObjectHandle methods not directly relating to parsing. For example, | ||
| 483 | + // newName("/text/plain").getName() and parse("/text#2fplain").getName() both return | ||
| 484 | + // "/text/plain", while newName("/text/plain").unparse() and parse("/text#2fplain").unparse() | ||
| 485 | + // both return "/text#2fplain". When working with the qpdf API for creating, retrieving, and | ||
| 486 | + // modifying objects, you want to work with the internal, canonical representation. For names | ||
| 487 | + // containing alphanumeric characters, dashes, and underscores, there is no difference between | ||
| 488 | + // the two representations. For a lengthy discussion, see | ||
| 556 | // https://github.com/qpdf/qpdf/discussions/625. | 489 | // https://github.com/qpdf/qpdf/discussions/625. |
| 557 | QPDF_DLL | 490 | QPDF_DLL |
| 558 | static QPDFObjectHandle newName(std::string const& name); | 491 | static QPDFObjectHandle newName(std::string const& name); |
| 559 | QPDF_DLL | 492 | QPDF_DLL |
| 560 | static QPDFObjectHandle newString(std::string const& str); | 493 | static QPDFObjectHandle newString(std::string const& str); |
| 561 | - // Create a string encoded from the given utf8-encoded string | ||
| 562 | - // appropriately encoded to appear in PDF files outside of content | ||
| 563 | - // streams, such as in document metadata form field values, page | ||
| 564 | - // labels, outlines, and similar locations. We try ASCII first, | ||
| 565 | - // then PDFDocEncoding, then UTF-16 as needed to successfully | ||
| 566 | - // encode all the characters. | 494 | + // Create a string encoded from the given utf8-encoded string appropriately encoded to appear in |
| 495 | + // PDF files outside of content streams, such as in document metadata form field values, page | ||
| 496 | + // labels, outlines, and similar locations. We try ASCII first, then PDFDocEncoding, then UTF-16 | ||
| 497 | + // as needed to successfully encode all the characters. | ||
| 567 | QPDF_DLL | 498 | QPDF_DLL |
| 568 | static QPDFObjectHandle newUnicodeString(std::string const& utf8_str); | 499 | static QPDFObjectHandle newUnicodeString(std::string const& utf8_str); |
| 569 | QPDF_DLL | 500 | QPDF_DLL |
| @@ -585,86 +516,67 @@ class QPDFObjectHandle | @@ -585,86 +516,67 @@ class QPDFObjectHandle | ||
| 585 | QPDF_DLL | 516 | QPDF_DLL |
| 586 | static QPDFObjectHandle newDictionary(std::map<std::string, QPDFObjectHandle> const& items); | 517 | static QPDFObjectHandle newDictionary(std::map<std::string, QPDFObjectHandle> const& items); |
| 587 | 518 | ||
| 588 | - // Create an array from a rectangle. Equivalent to the rectangle | ||
| 589 | - // form of newArray. | 519 | + // Create an array from a rectangle. Equivalent to the rectangle form of newArray. |
| 590 | QPDF_DLL | 520 | QPDF_DLL |
| 591 | static QPDFObjectHandle newFromRectangle(Rectangle const&); | 521 | static QPDFObjectHandle newFromRectangle(Rectangle const&); |
| 592 | - // Create an array from a matrix. Equivalent to the matrix | ||
| 593 | - // form of newArray. | 522 | + // Create an array from a matrix. Equivalent to the matrix form of newArray. |
| 594 | QPDF_DLL | 523 | QPDF_DLL |
| 595 | static QPDFObjectHandle newFromMatrix(Matrix const&); | 524 | static QPDFObjectHandle newFromMatrix(Matrix const&); |
| 596 | QPDF_DLL | 525 | QPDF_DLL |
| 597 | static QPDFObjectHandle newFromMatrix(QPDFMatrix const&); | 526 | static QPDFObjectHandle newFromMatrix(QPDFMatrix const&); |
| 598 | 527 | ||
| 599 | - // Note: new stream creation methods have were added to the QPDF | ||
| 600 | - // class starting with version 11.2.0. The ones in this class are | ||
| 601 | - // here for backward compatibility. | 528 | + // Note: new stream creation methods have were added to the QPDF class starting with |
| 529 | + // version 11.2.0. The ones in this class are here for backward compatibility. | ||
| 602 | 530 | ||
| 603 | - // Create a new stream and associate it with the given qpdf | ||
| 604 | - // object. A subsequent call must be made to replaceStreamData() | ||
| 605 | - // to provide data for the stream. The stream's dictionary may be | ||
| 606 | - // retrieved by calling getDict(), and the resulting dictionary | ||
| 607 | - // may be modified. Alternatively, you can create a new dictionary | ||
| 608 | - // and call replaceDict to install it. From QPDF 11.2, you can | 531 | + // Create a new stream and associate it with the given qpdf object. A subsequent call must be |
| 532 | + // made to replaceStreamData() to provide data for the stream. The stream's dictionary may be | ||
| 533 | + // retrieved by calling getDict(), and the resulting dictionary may be modified. Alternatively, | ||
| 534 | + // you can create a new dictionary and call replaceDict to install it. From QPDF 11.2, you can | ||
| 609 | // call QPDF::newStream() instead. | 535 | // call QPDF::newStream() instead. |
| 610 | QPDF_DLL | 536 | QPDF_DLL |
| 611 | static QPDFObjectHandle newStream(QPDF* qpdf); | 537 | static QPDFObjectHandle newStream(QPDF* qpdf); |
| 612 | 538 | ||
| 613 | - // Create a new stream and associate it with the given qpdf | ||
| 614 | - // object. Use the given buffer as the stream data. The stream | ||
| 615 | - // dictionary's /Length key will automatically be set to the size | ||
| 616 | - // of the data buffer. If additional keys are required, the | ||
| 617 | - // stream's dictionary may be retrieved by calling getDict(), and | ||
| 618 | - // the resulting dictionary may be modified. This method is just a | ||
| 619 | - // convenient wrapper around the newStream() and | ||
| 620 | - // replaceStreamData(). It is a convenience methods for streams | ||
| 621 | - // that require no parameters beyond the stream length. Note that | ||
| 622 | - // you don't have to deal with compression yourself if you use | ||
| 623 | - // QPDFWriter. By default, QPDFWriter will automatically compress | ||
| 624 | - // uncompressed stream data. Example programs are provided that | 539 | + // Create a new stream and associate it with the given qpdf object. Use the given buffer as the |
| 540 | + // stream data. The stream dictionary's /Length key will automatically be set to the size of the | ||
| 541 | + // data buffer. If additional keys are required, the stream's dictionary may be retrieved by | ||
| 542 | + // calling getDict(), and the resulting dictionary may be modified. This method is just a | ||
| 543 | + // convenient wrapper around the newStream() and replaceStreamData(). It is a convenience | ||
| 544 | + // methods for streams that require no parameters beyond the stream length. Note that you don't | ||
| 545 | + // have to deal with compression yourself if you use QPDFWriter. By default, QPDFWriter will | ||
| 546 | + // automatically compress uncompressed stream data. Example programs are provided that | ||
| 625 | // illustrate this. From QPDF 11.2, you can call QPDF::newStream() | 547 | // illustrate this. From QPDF 11.2, you can call QPDF::newStream() |
| 626 | // instead. | 548 | // instead. |
| 627 | QPDF_DLL | 549 | QPDF_DLL |
| 628 | static QPDFObjectHandle newStream(QPDF* qpdf, std::shared_ptr<Buffer> data); | 550 | static QPDFObjectHandle newStream(QPDF* qpdf, std::shared_ptr<Buffer> data); |
| 629 | 551 | ||
| 630 | - // Create new stream with data from string. This method will | ||
| 631 | - // create a copy of the data rather than using the user-provided | ||
| 632 | - // buffer as in the std::shared_ptr<Buffer> version of newStream. | 552 | + // Create new stream with data from string. This method will create a copy of the data rather |
| 553 | + // than using the user-provided buffer as in the std::shared_ptr<Buffer> version of newStream. | ||
| 633 | // From QPDF 11.2, you can call QPDF::newStream() instead. | 554 | // From QPDF 11.2, you can call QPDF::newStream() instead. |
| 634 | QPDF_DLL | 555 | QPDF_DLL |
| 635 | static QPDFObjectHandle newStream(QPDF* qpdf, std::string const& data); | 556 | static QPDFObjectHandle newStream(QPDF* qpdf, std::string const& data); |
| 636 | 557 | ||
| 637 | - // A reserved object is a special sentinel used for qpdf to | ||
| 638 | - // reserve a spot for an object that is going to be added to the | ||
| 639 | - // QPDF object. Normally you don't have to use this type since | ||
| 640 | - // you can just call QPDF::makeIndirectObject. However, in some | ||
| 641 | - // cases, if you have to create objects with circular references, | ||
| 642 | - // you may need to create a reserved object so that you can have a | ||
| 643 | - // reference to it and then replace the object later. Reserved | ||
| 644 | - // objects have the special property that they can't be resolved | ||
| 645 | - // to direct objects. This makes it possible to replace a | ||
| 646 | - // reserved object with a new object while preserving existing | ||
| 647 | - // references to them. When you are ready to replace a reserved | ||
| 648 | - // object with its replacement, use QPDF::replaceReserved for this | ||
| 649 | - // purpose rather than the more general QPDF::replaceObject. It | ||
| 650 | - // is an error to try to write a QPDF with QPDFWriter if it has | ||
| 651 | - // any reserved objects in it. From QPDF 11.4, you can | ||
| 652 | - // call QPDF::newReserved() instead. | 558 | + // A reserved object is a special sentinel used for qpdf to reserve a spot for an object that is |
| 559 | + // going to be added to the QPDF object. Normally you don't have to use this type since you can | ||
| 560 | + // just call QPDF::makeIndirectObject. However, in some cases, if you have to create objects | ||
| 561 | + // with circular references, you may need to create a reserved object so that you can have a | ||
| 562 | + // reference to it and then replace the object later. Reserved objects have the special | ||
| 563 | + // property that they can't be resolved to direct objects. This makes it possible to replace a | ||
| 564 | + // reserved object with a new object while preserving existing references to them. When you are | ||
| 565 | + // ready to replace a reserved object with its replacement, use QPDF::replaceReserved for this | ||
| 566 | + // purpose rather than the more general QPDF::replaceObject. It is an error to try to write a | ||
| 567 | + // QPDF with QPDFWriter if it has any reserved objects in it. From QPDF 11.4, you can call | ||
| 568 | + // QPDF::newReserved() instead. | ||
| 653 | QPDF_DLL | 569 | QPDF_DLL |
| 654 | static QPDFObjectHandle newReserved(QPDF* qpdf); | 570 | static QPDFObjectHandle newReserved(QPDF* qpdf); |
| 655 | 571 | ||
| 656 | - // Provide an owning qpdf and object description. The library does | ||
| 657 | - // this automatically with objects that are read from the input | ||
| 658 | - // PDF and with objects that are created programmatically and | ||
| 659 | - // inserted into the QPDF as a new indirect object. Most end user | ||
| 660 | - // code will not need to call this. If an object has an owning | ||
| 661 | - // qpdf and object description, it enables qpdf to give warnings | ||
| 662 | - // with proper context in some cases where it would otherwise | ||
| 663 | - // raise exceptions. It is okay to add objects without an | ||
| 664 | - // owning_qpdf to objects that have one, but it is an error to | ||
| 665 | - // have a QPDF contain objects with owning_qpdf set to something | ||
| 666 | - // else. To add objects from another qpdf, use copyForeignObject | ||
| 667 | - // instead. | 572 | + // Provide an owning qpdf and object description. The library does this automatically with |
| 573 | + // objects that are read from the input PDF and with objects that are created programmatically | ||
| 574 | + // and inserted into the QPDF as a new indirect object. Most end user code will not need to call | ||
| 575 | + // this. If an object has an owning qpdf and object description, it enables qpdf to give | ||
| 576 | + // warnings with proper context in some cases where it would otherwise raise exceptions. It is | ||
| 577 | + // okay to add objects without an owning_qpdf to objects that have one, but it is an error to | ||
| 578 | + // have a QPDF contain objects with owning_qpdf set to something else. To add objects from | ||
| 579 | + // another qpdf, use copyForeignObject instead. | ||
| 668 | QPDF_DLL | 580 | QPDF_DLL |
| 669 | void setObjectDescription(QPDF* owning_qpdf, std::string const& object_description); | 581 | void setObjectDescription(QPDF* owning_qpdf, std::string const& object_description); |
| 670 | QPDF_DLL | 582 | QPDF_DLL |
| @@ -674,62 +586,47 @@ class QPDFObjectHandle | @@ -674,62 +586,47 @@ class QPDFObjectHandle | ||
| 674 | // | 586 | // |
| 675 | // (Note: this comment is referenced in qpdf-c.h and the manual.) | 587 | // (Note: this comment is referenced in qpdf-c.h and the manual.) |
| 676 | // | 588 | // |
| 677 | - // In PDF files, objects have specific types, but there is nothing | ||
| 678 | - // that prevents PDF files from containing objects of types that | ||
| 679 | - // aren't expected by the specification. | 589 | + // In PDF files, objects have specific types, but there is nothing that prevents PDF files from |
| 590 | + // containing objects of types that aren't expected by the specification. | ||
| 680 | // | 591 | // |
| 681 | // There are two flavors of accessor methods: | 592 | // There are two flavors of accessor methods: |
| 682 | // | 593 | // |
| 683 | - // * getSomethingValue() returns the value and issues a type | ||
| 684 | - // warning if the type is incorrect. | 594 | + // * getSomethingValue() returns the value and issues a type warning if the type is incorrect. |
| 685 | // | 595 | // |
| 686 | - // * getValueAsSomething() returns false if the value is the wrong | ||
| 687 | - // type. Otherwise, it returns true and initializes a reference | ||
| 688 | - // of the appropriate type. These methods never issue type | 596 | + // * getValueAsSomething() returns false if the value is the wrong type. Otherwise, it returns |
| 597 | + // true and initializes a reference of the appropriate type. These methods never issue type | ||
| 689 | // warnings. | 598 | // warnings. |
| 690 | // | 599 | // |
| 691 | - // The getSomethingValue() accessors and some of the other methods | ||
| 692 | - // expect objects of a particular type. Prior to qpdf 8, calling | ||
| 693 | - // an accessor on a method of the wrong type, such as trying to | ||
| 694 | - // get a dictionary key from an array, trying to get the string | ||
| 695 | - // value of a number, etc., would throw an exception, but since | ||
| 696 | - // qpdf 8, qpdf issues a warning and recovers using the following | ||
| 697 | - // behavior: | 600 | + // The getSomethingValue() accessors and some of the other methods expect objects of a |
| 601 | + // particular type. Prior to qpdf 8, calling an accessor on a method of the wrong type, such as | ||
| 602 | + // trying to get a dictionary key from an array, trying to get the string value of a number, | ||
| 603 | + // etc., would throw an exception, but since qpdf 8, qpdf issues a warning and recovers using | ||
| 604 | + // the following behavior: | ||
| 698 | // | 605 | // |
| 699 | - // * Requesting a value of the wrong type (int value from string, | ||
| 700 | - // array item from a scalar or dictionary, etc.) will return a | ||
| 701 | - // zero-like value for that type: false for boolean, 0 for | ||
| 702 | - // number, the empty string for string, or the null object for | ||
| 703 | - // an object handle. | 606 | + // * Requesting a value of the wrong type (int value from string, array item from a scalar or |
| 607 | + // dictionary, etc.) will return a zero-like value for that type: false for boolean, 0 for | ||
| 608 | + // number, the empty string for string, or the null object for an object handle. | ||
| 704 | // | 609 | // |
| 705 | - // * Accessing an array item that is out of bounds will return a | ||
| 706 | - // null object. | 610 | + // * Accessing an array item that is out of bounds will return a null object. |
| 707 | // | 611 | // |
| 708 | - // * Attempts to mutate an object of the wrong type (e.g., | ||
| 709 | - // attempting to add a dictionary key to a scalar or array) will | ||
| 710 | - // be ignored. | 612 | + // * Attempts to mutate an object of the wrong type (e.g., attempting to add a dictionary key to |
| 613 | + // a scalar or array) will be ignored. | ||
| 711 | // | 614 | // |
| 712 | - // When any of these fallback behaviors are used, qpdf issues a | ||
| 713 | - // warning. Starting in qpdf 10.5, these warnings have the error | ||
| 714 | - // code qpdf_e_object. Prior to 10.5, they had the error code | ||
| 715 | - // qpdf_e_damaged_pdf. If the QPDFObjectHandle is associated with | ||
| 716 | - // a QPDF object (as is the case for all objects whose origin was | ||
| 717 | - // a PDF file), the warning is issued using the normal warning | ||
| 718 | - // mechanism (as described in QPDF.hh), making it possible to | ||
| 719 | - // suppress or otherwise detect them. If the QPDFObjectHandle is | ||
| 720 | - // not associated with a QPDF object (meaning it was created | 615 | + // When any of these fallback behaviors are used, qpdf issues a warning. Starting in qpdf 10.5, |
| 616 | + // these warnings have the error code qpdf_e_object. Prior to 10.5, they had the error code | ||
| 617 | + // qpdf_e_damaged_pdf. If the QPDFObjectHandle is associated with a QPDF object (as is the case | ||
| 618 | + // for all objects whose origin was a PDF file), the warning is issued using the normal warning | ||
| 619 | + // mechanism (as described in QPDF.hh), making it possible to suppress or otherwise detect them. | ||
| 620 | + // If the QPDFObjectHandle is not associated with a QPDF object (meaning it was created | ||
| 721 | // programmatically), an exception will be thrown. | 621 | // programmatically), an exception will be thrown. |
| 722 | // | 622 | // |
| 723 | - // The way to avoid getting any type warnings or exceptions, even | ||
| 724 | - // when working with malformed PDF files, is to always check the | ||
| 725 | - // type of a QPDFObjectHandle before accessing it (for example, | ||
| 726 | - // make sure that isString() returns true before calling | ||
| 727 | - // getStringValue()) and to always be sure that any array indices | ||
| 728 | - // are in bounds. | 623 | + // The way to avoid getting any type warnings or exceptions, even when working with malformed |
| 624 | + // PDF files, is to always check the type of a QPDFObjectHandle before accessing it (for | ||
| 625 | + // example, make sure that isString() returns true before calling getStringValue()) and to | ||
| 626 | + // always be sure that any array indices are in bounds. | ||
| 729 | // | 627 | // |
| 730 | - // For additional discussion and rationale for this behavior, see | ||
| 731 | - // the section in the QPDF manual entitled "Object Accessor | ||
| 732 | - // Methods". | 628 | + // For additional discussion and rationale for this behavior, see the section in the QPDF manual |
| 629 | + // entitled "Object Accessor Methods". | ||
| 733 | 630 | ||
| 734 | // Methods for bool objects | 631 | // Methods for bool objects |
| 735 | QPDF_DLL | 632 | QPDF_DLL |
| @@ -737,12 +634,10 @@ class QPDFObjectHandle | @@ -737,12 +634,10 @@ class QPDFObjectHandle | ||
| 737 | QPDF_DLL | 634 | QPDF_DLL |
| 738 | bool getValueAsBool(bool&); | 635 | bool getValueAsBool(bool&); |
| 739 | 636 | ||
| 740 | - // Methods for integer objects. Note: if an integer value is too | ||
| 741 | - // big (too far away from zero in either direction) to fit in the | ||
| 742 | - // requested return type, the maximum or minimum value for that | ||
| 743 | - // return type may be returned. For example, on a system with | ||
| 744 | - // 32-bit int, a numeric object with a value of 2^40 (or anything | ||
| 745 | - // too big for 32 bits) will be returned as INT_MAX. | 637 | + // Methods for integer objects. Note: if an integer value is too big (too far away from zero in |
| 638 | + // either direction) to fit in the requested return type, the maximum or minimum value for that | ||
| 639 | + // return type may be returned. For example, on a system with 32-bit int, a numeric object with | ||
| 640 | + // a value of 2^40 (or anything too big for 32 bits) will be returned as INT_MAX. | ||
| 746 | QPDF_DLL | 641 | QPDF_DLL |
| 747 | long long getIntValue(); | 642 | long long getIntValue(); |
| 748 | QPDF_DLL | 643 | QPDF_DLL |
| @@ -774,9 +669,8 @@ class QPDFObjectHandle | @@ -774,9 +669,8 @@ class QPDFObjectHandle | ||
| 774 | QPDF_DLL | 669 | QPDF_DLL |
| 775 | bool getValueAsNumber(double&); | 670 | bool getValueAsNumber(double&); |
| 776 | 671 | ||
| 777 | - // Methods for name objects. The returned name value is in qpdf's | ||
| 778 | - // canonical form with all escaping resolved. See comments for | ||
| 779 | - // newName() for details. | 672 | + // Methods for name objects. The returned name value is in qpdf's canonical form with all |
| 673 | + // escaping resolved. See comments for newName() for details. | ||
| 780 | QPDF_DLL | 674 | QPDF_DLL |
| 781 | std::string getName(); | 675 | std::string getName(); |
| 782 | QPDF_DLL | 676 | QPDF_DLL |
| @@ -788,12 +682,10 @@ class QPDFObjectHandle | @@ -788,12 +682,10 @@ class QPDFObjectHandle | ||
| 788 | QPDF_DLL | 682 | QPDF_DLL |
| 789 | bool getValueAsString(std::string&); | 683 | bool getValueAsString(std::string&); |
| 790 | 684 | ||
| 791 | - // If a string starts with the UTF-16 marker, it is converted from | ||
| 792 | - // UTF-16 to UTF-8. Otherwise, it is treated as a string encoded | ||
| 793 | - // with PDF Doc Encoding. PDF Doc Encoding is identical to | ||
| 794 | - // ISO-8859-1 except in the range from 0200 through 0240, where | ||
| 795 | - // there is a mapping of characters to Unicode. QPDF versions | ||
| 796 | - // prior to version 8.0.0 erroneously left characters in that range | 685 | + // If a string starts with the UTF-16 marker, it is converted from UTF-16 to UTF-8. Otherwise, |
| 686 | + // it is treated as a string encoded with PDF Doc Encoding. PDF Doc Encoding is identical to | ||
| 687 | + // ISO-8859-1 except in the range from 0200 through 0240, where there is a mapping of characters | ||
| 688 | + // to Unicode. QPDF versions prior to version 8.0.0 erroneously left characters in that range | ||
| 797 | // unmapped. | 689 | // unmapped. |
| 798 | QPDF_DLL | 690 | QPDF_DLL |
| 799 | std::string getUTF8Value(); | 691 | std::string getUTF8Value(); |
| @@ -812,8 +704,7 @@ class QPDFObjectHandle | @@ -812,8 +704,7 @@ class QPDFObjectHandle | ||
| 812 | 704 | ||
| 813 | // Methods for array objects; see also name and array objects. | 705 | // Methods for array objects; see also name and array objects. |
| 814 | 706 | ||
| 815 | - // Return an object that enables iteration over members. You can | ||
| 816 | - // do | 707 | + // Return an object that enables iteration over members. You can do |
| 817 | // | 708 | // |
| 818 | // for (auto iter: obj.aitems()) | 709 | // for (auto iter: obj.aitems()) |
| 819 | // { | 710 | // { |
| @@ -827,32 +718,29 @@ class QPDFObjectHandle | @@ -827,32 +718,29 @@ class QPDFObjectHandle | ||
| 827 | int getArrayNItems(); | 718 | int getArrayNItems(); |
| 828 | QPDF_DLL | 719 | QPDF_DLL |
| 829 | QPDFObjectHandle getArrayItem(int n); | 720 | QPDFObjectHandle getArrayItem(int n); |
| 830 | - // Note: QPDF arrays internally optimize memory for arrays | ||
| 831 | - // containing lots of nulls. Calling getArrayAsVector may cause a | ||
| 832 | - // lot of memory to be allocated for very large arrays with lots | ||
| 833 | - // of nulls. | 721 | + // Note: QPDF arrays internally optimize memory for arrays containing lots of nulls. Calling |
| 722 | + // getArrayAsVector may cause a lot of memory to be allocated for very large arrays with lots of | ||
| 723 | + // nulls. | ||
| 834 | QPDF_DLL | 724 | QPDF_DLL |
| 835 | std::vector<QPDFObjectHandle> getArrayAsVector(); | 725 | std::vector<QPDFObjectHandle> getArrayAsVector(); |
| 836 | QPDF_DLL | 726 | QPDF_DLL |
| 837 | bool isRectangle(); | 727 | bool isRectangle(); |
| 838 | - // If the array is an array of four numeric values, return as a | ||
| 839 | - // rectangle. Otherwise, return the rectangle [0, 0, 0, 0] | 728 | + // If the array is an array of four numeric values, return as a rectangle. Otherwise, return the |
| 729 | + // rectangle [0, 0, 0, 0] | ||
| 840 | QPDF_DLL | 730 | QPDF_DLL |
| 841 | Rectangle getArrayAsRectangle(); | 731 | Rectangle getArrayAsRectangle(); |
| 842 | QPDF_DLL | 732 | QPDF_DLL |
| 843 | bool isMatrix(); | 733 | bool isMatrix(); |
| 844 | - // If the array is an array of six numeric values, return as a | ||
| 845 | - // matrix. Otherwise, return the matrix [1, 0, 0, 1, 0, 0] | 734 | + // If the array is an array of six numeric values, return as a matrix. Otherwise, return the |
| 735 | + // matrix [1, 0, 0, 1, 0, 0] | ||
| 846 | QPDF_DLL | 736 | QPDF_DLL |
| 847 | Matrix getArrayAsMatrix(); | 737 | Matrix getArrayAsMatrix(); |
| 848 | 738 | ||
| 849 | - // Methods for dictionary objects. In all dictionary methods, keys | ||
| 850 | - // are specified/represented as canonical name strings starting | ||
| 851 | - // with a leading slash and not containing any PDF syntax | 739 | + // Methods for dictionary objects. In all dictionary methods, keys are specified/represented as |
| 740 | + // canonical name strings starting with a leading slash and not containing any PDF syntax | ||
| 852 | // escaping. See comments for getName() for details. | 741 | // escaping. See comments for getName() for details. |
| 853 | 742 | ||
| 854 | - // Return an object that enables iteration over members. You can | ||
| 855 | - // do | 743 | + // Return an object that enables iteration over members. You can do |
| 856 | // | 744 | // |
| 857 | // for (auto iter: obj.ditems()) | 745 | // for (auto iter: obj.ditems()) |
| 858 | // { | 746 | // { |
| @@ -863,185 +751,149 @@ class QPDFObjectHandle | @@ -863,185 +751,149 @@ class QPDFObjectHandle | ||
| 863 | QPDF_DLL | 751 | QPDF_DLL |
| 864 | QPDFDictItems ditems(); | 752 | QPDFDictItems ditems(); |
| 865 | 753 | ||
| 866 | - // Return true if key is present. Keys with null values are treated as if | ||
| 867 | - // they are not present. This is as per the PDF spec. | 754 | + // Return true if key is present. Keys with null values are treated as if they are not present. |
| 755 | + // This is as per the PDF spec. | ||
| 868 | QPDF_DLL | 756 | QPDF_DLL |
| 869 | bool hasKey(std::string const&); | 757 | bool hasKey(std::string const&); |
| 870 | - // Return the value for the key. If the key is not present, null is | ||
| 871 | - // returned. | 758 | + // Return the value for the key. If the key is not present, null is returned. |
| 872 | QPDF_DLL | 759 | QPDF_DLL |
| 873 | QPDFObjectHandle getKey(std::string const&); | 760 | QPDFObjectHandle getKey(std::string const&); |
| 874 | - // If the object is null, return null. Otherwise, call getKey(). | ||
| 875 | - // This makes it easier to access lower-level dictionaries, as in | 761 | + // If the object is null, return null. Otherwise, call getKey(). This makes it easier to access |
| 762 | + // lower-level dictionaries, as in | ||
| 876 | // auto font = page.getKeyIfDict("/Resources").getKeyIfDict("/Font"); | 763 | // auto font = page.getKeyIfDict("/Resources").getKeyIfDict("/Font"); |
| 877 | QPDF_DLL | 764 | QPDF_DLL |
| 878 | QPDFObjectHandle getKeyIfDict(std::string const&); | 765 | QPDFObjectHandle getKeyIfDict(std::string const&); |
| 879 | - // Return all keys. Keys with null values are treated as if | ||
| 880 | - // they are not present. This is as per the PDF spec. | 766 | + // Return all keys. Keys with null values are treated as if they are not present. This is as |
| 767 | + // per the PDF spec. | ||
| 881 | QPDF_DLL | 768 | QPDF_DLL |
| 882 | std::set<std::string> getKeys(); | 769 | std::set<std::string> getKeys(); |
| 883 | // Return dictionary as a map. Entries with null values are included. | 770 | // Return dictionary as a map. Entries with null values are included. |
| 884 | QPDF_DLL | 771 | QPDF_DLL |
| 885 | std::map<std::string, QPDFObjectHandle> getDictAsMap(); | 772 | std::map<std::string, QPDFObjectHandle> getDictAsMap(); |
| 886 | 773 | ||
| 887 | - // Methods for name and array objects. The name value is in qpdf's | ||
| 888 | - // canonical form with all escaping resolved. See comments for | ||
| 889 | - // newName() for details. | 774 | + // Methods for name and array objects. The name value is in qpdf's canonical form with all |
| 775 | + // escaping resolved. See comments for newName() for details. | ||
| 890 | QPDF_DLL | 776 | QPDF_DLL |
| 891 | bool isOrHasName(std::string const&); | 777 | bool isOrHasName(std::string const&); |
| 892 | 778 | ||
| 893 | - // Make all resources in a resource dictionary indirect. This just | ||
| 894 | - // goes through all entries of top-level subdictionaries and | ||
| 895 | - // converts any direct objects to indirect objects. This can be | ||
| 896 | - // useful to call before mergeResources if it is going to be | ||
| 897 | - // called multiple times to prevent resources from being copied | ||
| 898 | - // multiple times. | 779 | + // Make all resources in a resource dictionary indirect. This just goes through all entries of |
| 780 | + // top-level subdictionaries and converts any direct objects to indirect objects. This can be | ||
| 781 | + // useful to call before mergeResources if it is going to be called multiple times to prevent | ||
| 782 | + // resources from being copied multiple times. | ||
| 899 | QPDF_DLL | 783 | QPDF_DLL |
| 900 | void makeResourcesIndirect(QPDF& owning_qpdf); | 784 | void makeResourcesIndirect(QPDF& owning_qpdf); |
| 901 | 785 | ||
| 902 | - // Merge resource dictionaries. If the "conflicts" parameter is | ||
| 903 | - // provided, conflicts in dictionary subitems are resolved, and | ||
| 904 | - // "conflicts" is initialized to a map such that | 786 | + // Merge resource dictionaries. If the "conflicts" parameter is provided, conflicts in |
| 787 | + // dictionary subitems are resolved, and "conflicts" is initialized to a map such that | ||
| 905 | // conflicts[resource_type][old_key] == [new_key] | 788 | // conflicts[resource_type][old_key] == [new_key] |
| 906 | // | 789 | // |
| 907 | - // See also makeResourcesIndirect, which can be useful to call | ||
| 908 | - // before calling this. | 790 | + // See also makeResourcesIndirect, which can be useful to call before calling this. |
| 909 | // | 791 | // |
| 910 | - // This method does nothing if both this object and the other | ||
| 911 | - // object are not dictionaries. Otherwise, it has following | ||
| 912 | - // behavior, where "object" refers to the object whose method is | 792 | + // This method does nothing if both this object and the other object are not dictionaries. |
| 793 | + // Otherwise, it has following behavior, where "object" refers to the object whose method is | ||
| 913 | // invoked, and "other" refers to the argument: | 794 | // invoked, and "other" refers to the argument: |
| 914 | // | 795 | // |
| 915 | // * For each key in "other" whose value is an array: | 796 | // * For each key in "other" whose value is an array: |
| 916 | // * If "object" does not have that entry, shallow copy it. | 797 | // * If "object" does not have that entry, shallow copy it. |
| 917 | - // * Otherwise, if "object" has an array in the same place, | ||
| 918 | - // append to that array any objects in "other"'s array that | ||
| 919 | - // are not already present. | 798 | + // * Otherwise, if "object" has an array in the same place, append to that array any objects |
| 799 | + // in "other"'s array that are not already present. | ||
| 920 | // * For each key in "other" whose value is a dictionary: | 800 | // * For each key in "other" whose value is a dictionary: |
| 921 | // * If "object" does not have that entry, shallow copy it. | 801 | // * If "object" does not have that entry, shallow copy it. |
| 922 | // * Otherwise, for each key in the subdictionary: | 802 | // * Otherwise, for each key in the subdictionary: |
| 923 | - // * If key is not present in "object"'s entry, shallow copy | ||
| 924 | - // it if direct or just add it if indirect. | 803 | + // * If key is not present in "object"'s entry, shallow copy it if direct or just add it if |
| 804 | + // indirect. | ||
| 925 | // * Otherwise, if conflicts are being detected: | 805 | // * Otherwise, if conflicts are being detected: |
| 926 | - // * If there is a key (oldkey) already in the dictionary | ||
| 927 | - // that points to the same indirect destination as key, | ||
| 928 | - // indicate that key was replaced by oldkey. This would | ||
| 929 | - // happen if these two resource dictionaries have | ||
| 930 | - // previously been merged. | ||
| 931 | - // * Otherwise pick a new key (newkey) that is unique within | ||
| 932 | - // the resource dictionary, store that in the resource | ||
| 933 | - // dictionary with key's destination as its destination, | ||
| 934 | - // and indicate that key was replaced by newkey. | 806 | + // * If there is a key (oldkey) already in the dictionary that points to the same indirect |
| 807 | + // destination as key, indicate that key was replaced by oldkey. This would happen if | ||
| 808 | + // these two resource dictionaries have previously been merged. | ||
| 809 | + // * Otherwise pick a new key (newkey) that is unique within the resource dictionary, | ||
| 810 | + // store that in the resource dictionary with key's destination as its destination, and | ||
| 811 | + // indicate that key was replaced by newkey. | ||
| 935 | // | 812 | // |
| 936 | - // The primary purpose of this method is to facilitate merging of | ||
| 937 | - // resource dictionaries that are supposed to have the same scope | ||
| 938 | - // as each other. For example, this can be used to merge a form | ||
| 939 | - // XObject's /Resources dictionary with a form field's /DR or to | ||
| 940 | - // merge two /DR dictionaries. The "conflicts" parameter may be | ||
| 941 | - // previously initialized. This method adds to whatever is already | 813 | + // The primary purpose of this method is to facilitate merging of resource dictionaries that are |
| 814 | + // supposed to have the same scope as each other. For example, this can be used to merge a form | ||
| 815 | + // XObject's /Resources dictionary with a form field's /DR or to merge two /DR dictionaries. The | ||
| 816 | + // "conflicts" parameter may be previously initialized. This method adds to whatever is already | ||
| 942 | // there, which can be useful when merging with multiple things. | 817 | // there, which can be useful when merging with multiple things. |
| 943 | QPDF_DLL | 818 | QPDF_DLL |
| 944 | void mergeResources( | 819 | void mergeResources( |
| 945 | QPDFObjectHandle other, | 820 | QPDFObjectHandle other, |
| 946 | std::map<std::string, std::map<std::string, std::string>>* conflicts = nullptr); | 821 | std::map<std::string, std::map<std::string, std::string>>* conflicts = nullptr); |
| 947 | 822 | ||
| 948 | - // Get all resource names from a resource dictionary. If this | ||
| 949 | - // object is a dictionary, this method returns a set of all the | ||
| 950 | - // keys in all top-level subdictionaries. For resources | ||
| 951 | - // dictionaries, this is the collection of names that may be | ||
| 952 | - // referenced in the content stream. | 823 | + // Get all resource names from a resource dictionary. If this object is a dictionary, this |
| 824 | + // method returns a set of all the keys in all top-level subdictionaries. For resources | ||
| 825 | + // dictionaries, this is the collection of names that may be referenced in the content stream. | ||
| 953 | QPDF_DLL | 826 | QPDF_DLL |
| 954 | std::set<std::string> getResourceNames(); | 827 | std::set<std::string> getResourceNames(); |
| 955 | 828 | ||
| 956 | - // Find a unique name within a resource dictionary starting with a | ||
| 957 | - // given prefix. This method works by appending a number to the | ||
| 958 | - // given prefix. It searches starting with min_suffix and sets | ||
| 959 | - // min_suffix to selected value upon return. This can be used to | ||
| 960 | - // increase efficiency if adding multiple items with the same | ||
| 961 | - // prefix. (Why doesn't it set min_suffix to the next number? | ||
| 962 | - // Well, maybe you aren't going to actually use the name it | ||
| 963 | - // returns.) If you are calling this multiple times on the same | ||
| 964 | - // resource dictionary, you can initialize resource_names by | ||
| 965 | - // calling getResourceNames(), incrementally update it as you add | ||
| 966 | - // resources, and keep passing it in so that getUniqueResourceName | ||
| 967 | - // doesn't have to traverse the resource dictionary each time it's | ||
| 968 | - // called. | 829 | + // Find a unique name within a resource dictionary starting with a given prefix. This method |
| 830 | + // works by appending a number to the given prefix. It searches starting with min_suffix and | ||
| 831 | + // sets min_suffix to selected value upon return. This can be used to increase efficiency if | ||
| 832 | + // adding multiple items with the same prefix. (Why doesn't it set min_suffix to the next | ||
| 833 | + // number? Well, maybe you aren't going to actually use the name it returns.) If you are calling | ||
| 834 | + // this multiple times on the same resource dictionary, you can initialize resource_names by | ||
| 835 | + // calling getResourceNames(), incrementally update it as you add resources, and keep passing it | ||
| 836 | + // in so that getUniqueResourceName doesn't have to traverse the resource dictionary each time | ||
| 837 | + // it's called. | ||
| 969 | QPDF_DLL | 838 | QPDF_DLL |
| 970 | std::string getUniqueResourceName( | 839 | std::string getUniqueResourceName( |
| 971 | std::string const& prefix, | 840 | std::string const& prefix, |
| 972 | int& min_suffix, | 841 | int& min_suffix, |
| 973 | std::set<std::string>* resource_names = nullptr); | 842 | std::set<std::string>* resource_names = nullptr); |
| 974 | 843 | ||
| 975 | - // A QPDFObjectHandle has an owning QPDF if it is associated with | ||
| 976 | - // ("owned by") a specific QPDF object. Indirect objects always | ||
| 977 | - // have an owning QPDF. Direct objects that are read from the | ||
| 978 | - // input source will also have an owning QPDF. Programmatically | ||
| 979 | - // created objects will only have one if setObjectDescription was | ||
| 980 | - // called. | 844 | + // A QPDFObjectHandle has an owning QPDF if it is associated with ("owned by") a specific QPDF |
| 845 | + // object. Indirect objects always have an owning QPDF. Direct objects that are read from the | ||
| 846 | + // input source will also have an owning QPDF. Programmatically created objects will only have | ||
| 847 | + // one if setObjectDescription was called. | ||
| 981 | // | 848 | // |
| 982 | - // When the QPDF object that owns an object is destroyed, the | ||
| 983 | - // object is changed into a null, and its owner is cleared. | ||
| 984 | - // Therefore you should not retain the value of an owning QPDF | ||
| 985 | - // beyond the life of the QPDF. If in doubt, ask for it each time | ||
| 986 | - // you need it. | 849 | + // When the QPDF object that owns an object is destroyed, the object is changed into a null, and |
| 850 | + // its owner is cleared. Therefore you should not retain the value of an owning QPDF beyond the | ||
| 851 | + // life of the QPDF. If in doubt, ask for it each time you need it. | ||
| 987 | 852 | ||
| 988 | - // getOwningQPDF returns a pointer to the owning QPDF is the | ||
| 989 | - // object has one. Otherwise, it returns a null pointer. Use this | ||
| 990 | - // when you are able to handle the case of an object that doesn't | ||
| 991 | - // have an owning QPDF. | 853 | + // getOwningQPDF returns a pointer to the owning QPDF is the object has one. Otherwise, it |
| 854 | + // returns a null pointer. Use this when you are able to handle the case of an object that | ||
| 855 | + // doesn't have an owning QPDF. | ||
| 992 | QPDF_DLL | 856 | QPDF_DLL |
| 993 | QPDF* getOwningQPDF() const; | 857 | QPDF* getOwningQPDF() const; |
| 994 | - // getQPDF, new in qpdf 11, returns a reference owning QPDF. If | ||
| 995 | - // there is none, it throws a runtime_error. Use this when you | ||
| 996 | - // know the object has to have an owning QPDF, such as when it's a | ||
| 997 | - // known indirect object. Since streams are always indirect | ||
| 998 | - // objects, this method can be used safely for streams. If | ||
| 999 | - // error_msg is specified, it will be used at the contents of the | 858 | + // getQPDF, new in qpdf 11, returns a reference owning QPDF. If there is none, it throws a |
| 859 | + // runtime_error. Use this when you know the object has to have an owning QPDF, such as when | ||
| 860 | + // it's a known indirect object. Since streams are always indirect objects, this method can be | ||
| 861 | + // used safely for streams. If error_msg is specified, it will be used at the contents of the | ||
| 1000 | // runtime_error if there is now owner. | 862 | // runtime_error if there is now owner. |
| 1001 | QPDF_DLL | 863 | QPDF_DLL |
| 1002 | QPDF& getQPDF(std::string const& error_msg = "") const; | 864 | QPDF& getQPDF(std::string const& error_msg = "") const; |
| 1003 | 865 | ||
| 1004 | - // Create a shallow copy of an object as a direct object, but do not | ||
| 1005 | - // traverse across indirect object boundaries. That means that, | ||
| 1006 | - // for dictionaries and arrays, any keys or items that were | ||
| 1007 | - // indirect objects will still be indirect objects that point to | ||
| 1008 | - // the same place. In the strictest sense, this is not a shallow | ||
| 1009 | - // copy because it recursively descends arrays and dictionaries; | ||
| 1010 | - // it just doesn't cross over indirect objects. See also | ||
| 1011 | - // unsafeShallowCopy(). You can't copy a stream this way. See | ||
| 1012 | - // copyStream() instead. | 866 | + // Create a shallow copy of an object as a direct object, but do not traverse across indirect |
| 867 | + // object boundaries. That means that, for dictionaries and arrays, any keys or items that were | ||
| 868 | + // indirect objects will still be indirect objects that point to the same place. In the | ||
| 869 | + // strictest sense, this is not a shallow copy because it recursively descends arrays and | ||
| 870 | + // dictionaries; it just doesn't cross over indirect objects. See also unsafeShallowCopy(). You | ||
| 871 | + // can't copy a stream this way. See copyStream() instead. | ||
| 1013 | QPDF_DLL | 872 | QPDF_DLL |
| 1014 | QPDFObjectHandle shallowCopy(); | 873 | QPDFObjectHandle shallowCopy(); |
| 1015 | 874 | ||
| 1016 | - // Create a true shallow copy of an array or dictionary, just | ||
| 1017 | - // copying the immediate items (array) or keys (dictionary). This | ||
| 1018 | - // is "unsafe" because, if you *modify* any of the items in the | ||
| 1019 | - // copy, you are modifying the original, which is almost never | ||
| 1020 | - // what you want. However, if your intention is merely to | ||
| 1021 | - // *replace* top-level items or keys and not to modify lower-level | ||
| 1022 | - // items in the copy, this method is much faster than | ||
| 1023 | - // shallowCopy(). | 875 | + // Create a true shallow copy of an array or dictionary, just copying the immediate items |
| 876 | + // (array) or keys (dictionary). This is "unsafe" because, if you *modify* any of the items in | ||
| 877 | + // the copy, you are modifying the original, which is almost never what you want. However, if | ||
| 878 | + // your intention is merely to *replace* top-level items or keys and not to modify lower-level | ||
| 879 | + // items in the copy, this method is much faster than shallowCopy(). | ||
| 1024 | QPDF_DLL | 880 | QPDF_DLL |
| 1025 | QPDFObjectHandle unsafeShallowCopy(); | 881 | QPDFObjectHandle unsafeShallowCopy(); |
| 1026 | 882 | ||
| 1027 | - // Create a copy of this stream. The new stream and the old stream | ||
| 1028 | - // are independent: after the copy, either the original or the | ||
| 1029 | - // copy's dictionary or data can be modified without affecting the | ||
| 1030 | - // other. This uses StreamDataProvider internally, so no | ||
| 1031 | - // unnecessary copies of the stream's data are made. If the source | ||
| 1032 | - // stream's data is already being provided by a | ||
| 1033 | - // StreamDataProvider, the new stream will use the same one, so | ||
| 1034 | - // you have to make sure your StreamDataProvider can handle that | ||
| 1035 | - // case. But if you're already using a StreamDataProvider, you | ||
| 1036 | - // probably don't need to call this method. | 883 | + // Create a copy of this stream. The new stream and the old stream are independent: after the |
| 884 | + // copy, either the original or the copy's dictionary or data can be modified without affecting | ||
| 885 | + // the other. This uses StreamDataProvider internally, so no unnecessary copies of the stream's | ||
| 886 | + // data are made. If the source stream's data is already being provided by a StreamDataProvider, | ||
| 887 | + // the new stream will use the same one, so you have to make sure your StreamDataProvider can | ||
| 888 | + // handle that case. But if you're already using a StreamDataProvider, you probably don't need | ||
| 889 | + // to call this method. | ||
| 1037 | QPDF_DLL | 890 | QPDF_DLL |
| 1038 | QPDFObjectHandle copyStream(); | 891 | QPDFObjectHandle copyStream(); |
| 1039 | 892 | ||
| 1040 | // Mutator methods. | 893 | // Mutator methods. |
| 1041 | 894 | ||
| 1042 | - // Since qpdf 11: for mutators that may add or remove an item, | ||
| 1043 | - // there are additional versions whose names contain "AndGet" that | ||
| 1044 | - // return the added or removed item. For example: | 895 | + // Since qpdf 11: for mutators that may add or remove an item, there are additional versions |
| 896 | + // whose names contain "AndGet" that return the added or removed item. For example: | ||
| 1045 | // | 897 | // |
| 1046 | // auto new_dict = dict.replaceKeyAndGetNew( | 898 | // auto new_dict = dict.replaceKeyAndGetNew( |
| 1047 | // "/New", QPDFObjectHandle::newDictionary()); | 899 | // "/New", QPDFObjectHandle::newDictionary()); |
| @@ -1049,15 +901,12 @@ class QPDFObjectHandle | @@ -1049,15 +901,12 @@ class QPDFObjectHandle | ||
| 1049 | // auto old_value = dict.replaceKeyAndGetOld( | 901 | // auto old_value = dict.replaceKeyAndGetOld( |
| 1050 | // "/New", "(something)"_qpdf); | 902 | // "/New", "(something)"_qpdf); |
| 1051 | 903 | ||
| 1052 | - // Recursively copy this object, making it direct. An exception is | ||
| 1053 | - // thrown if a loop is detected. With allow_streams true, keep | ||
| 1054 | - // indirect object references to streams. Otherwise, throw an | ||
| 1055 | - // exception if any sub-object is a stream. Note that, when | ||
| 1056 | - // allow_streams is true and a stream is found, the resulting | ||
| 1057 | - // object is still associated with the containing qpdf. When | ||
| 1058 | - // allow_streams is false, the object will no longer be connected | ||
| 1059 | - // to the original QPDF object after this call completes | ||
| 1060 | - // successfully. | 904 | + // Recursively copy this object, making it direct. An exception is thrown if a loop is detected. |
| 905 | + // With allow_streams true, keep indirect object references to streams. Otherwise, throw an | ||
| 906 | + // exception if any sub-object is a stream. Note that, when allow_streams is true and a stream | ||
| 907 | + // is found, the resulting object is still associated with the containing qpdf. When | ||
| 908 | + // allow_streams is false, the object will no longer be connected to the original QPDF object | ||
| 909 | + // after this call completes successfully. | ||
| 1061 | QPDF_DLL | 910 | QPDF_DLL |
| 1062 | void makeDirect(bool allow_streams = false); | 911 | void makeDirect(bool allow_streams = false); |
| 1063 | 912 | ||
| @@ -1066,9 +915,8 @@ class QPDFObjectHandle | @@ -1066,9 +915,8 @@ class QPDFObjectHandle | ||
| 1066 | void setArrayItem(int, QPDFObjectHandle const&); | 915 | void setArrayItem(int, QPDFObjectHandle const&); |
| 1067 | QPDF_DLL | 916 | QPDF_DLL |
| 1068 | void setArrayFromVector(std::vector<QPDFObjectHandle> const& items); | 917 | void setArrayFromVector(std::vector<QPDFObjectHandle> const& items); |
| 1069 | - // Insert an item before the item at the given position ("at") so | ||
| 1070 | - // that it has that position after insertion. If "at" is equal to | ||
| 1071 | - // the size of the array, insert the item at the end. | 918 | + // Insert an item before the item at the given position ("at") so that it has that position |
| 919 | + // after insertion. If "at" is equal to the size of the array, insert the item at the end. | ||
| 1072 | QPDF_DLL | 920 | QPDF_DLL |
| 1073 | void insertItem(int at, QPDFObjectHandle const& item); | 921 | void insertItem(int at, QPDFObjectHandle const& item); |
| 1074 | // Like insertItem but return the item that was inserted. | 922 | // Like insertItem but return the item that was inserted. |
| @@ -1080,8 +928,7 @@ class QPDFObjectHandle | @@ -1080,8 +928,7 @@ class QPDFObjectHandle | ||
| 1080 | // Append an item, and return the newly added item. | 928 | // Append an item, and return the newly added item. |
| 1081 | QPDF_DLL | 929 | QPDF_DLL |
| 1082 | QPDFObjectHandle appendItemAndGetNew(QPDFObjectHandle const& item); | 930 | QPDFObjectHandle appendItemAndGetNew(QPDFObjectHandle const& item); |
| 1083 | - // Remove the item at that position, reducing the size of the | ||
| 1084 | - // array by one. | 931 | + // Remove the item at that position, reducing the size of the array by one. |
| 1085 | QPDF_DLL | 932 | QPDF_DLL |
| 1086 | void eraseItem(int at); | 933 | void eraseItem(int at); |
| 1087 | // Erase and item and return the item that was removed. | 934 | // Erase and item and return the item that was removed. |
| @@ -1090,22 +937,19 @@ class QPDFObjectHandle | @@ -1090,22 +937,19 @@ class QPDFObjectHandle | ||
| 1090 | 937 | ||
| 1091 | // Mutator methods for dictionary objects | 938 | // Mutator methods for dictionary objects |
| 1092 | 939 | ||
| 1093 | - // Replace value of key, adding it if it does not exist. If value | ||
| 1094 | - // is null, remove the key. | 940 | + // Replace value of key, adding it if it does not exist. If value is null, remove the key. |
| 1095 | QPDF_DLL | 941 | QPDF_DLL |
| 1096 | void replaceKey(std::string const& key, QPDFObjectHandle const& value); | 942 | void replaceKey(std::string const& key, QPDFObjectHandle const& value); |
| 1097 | // Replace value of key and return the value. | 943 | // Replace value of key and return the value. |
| 1098 | QPDF_DLL | 944 | QPDF_DLL |
| 1099 | QPDFObjectHandle replaceKeyAndGetNew(std::string const& key, QPDFObjectHandle const& value); | 945 | QPDFObjectHandle replaceKeyAndGetNew(std::string const& key, QPDFObjectHandle const& value); |
| 1100 | - // Replace value of key and return the old value, or null if the | ||
| 1101 | - // key was previously not present. | 946 | + // Replace value of key and return the old value, or null if the key was previously not present. |
| 1102 | QPDF_DLL | 947 | QPDF_DLL |
| 1103 | QPDFObjectHandle replaceKeyAndGetOld(std::string const& key, QPDFObjectHandle const& value); | 948 | QPDFObjectHandle replaceKeyAndGetOld(std::string const& key, QPDFObjectHandle const& value); |
| 1104 | // Remove key, doing nothing if key does not exist. | 949 | // Remove key, doing nothing if key does not exist. |
| 1105 | QPDF_DLL | 950 | QPDF_DLL |
| 1106 | void removeKey(std::string const& key); | 951 | void removeKey(std::string const& key); |
| 1107 | - // Remove key and return the old value. If the old value didn't | ||
| 1108 | - // exist, return a null object. | 952 | + // Remove key and return the old value. If the old value didn't exist, return a null object. |
| 1109 | QPDF_DLL | 953 | QPDF_DLL |
| 1110 | QPDFObjectHandle removeKeyAndGetOld(std::string const& key); | 954 | QPDFObjectHandle removeKeyAndGetOld(std::string const& key); |
| 1111 | 955 | ||
| @@ -1117,31 +961,26 @@ class QPDFObjectHandle | @@ -1117,31 +961,26 @@ class QPDFObjectHandle | ||
| 1117 | QPDF_DLL | 961 | QPDF_DLL |
| 1118 | QPDFObjectHandle getDict(); | 962 | QPDFObjectHandle getDict(); |
| 1119 | 963 | ||
| 1120 | - // By default, or if true passed, QPDFWriter will attempt to | ||
| 1121 | - // filter a stream based on decode level, whether compression is | ||
| 1122 | - // enabled, and its ability to filter. Passing false will prevent | ||
| 1123 | - // QPDFWriter from attempting to filter the stream even if it can. | ||
| 1124 | - // This includes both decoding and compressing. This makes it | ||
| 1125 | - // possible for you to prevent QPDFWriter from uncompressing and | ||
| 1126 | - // recompressing a stream that it knows how to operate on for any | ||
| 1127 | - // application-specific reason, such as that you have already | ||
| 1128 | - // optimized its filtering. Note that this doesn't affect any | ||
| 1129 | - // other ways to get the stream's data, such as pipeStreamData or | ||
| 1130 | - // getStreamData. | 964 | + // By default, or if true passed, QPDFWriter will attempt to filter a stream based on decode |
| 965 | + // level, whether compression is enabled, and its ability to filter. Passing false will prevent | ||
| 966 | + // QPDFWriter from attempting to filter the stream even if it can. This includes both decoding | ||
| 967 | + // and compressing. This makes it possible for you to prevent QPDFWriter from uncompressing and | ||
| 968 | + // recompressing a stream that it knows how to operate on for any application-specific reason, | ||
| 969 | + // such as that you have already optimized its filtering. Note that this doesn't affect any | ||
| 970 | + // other ways to get the stream's data, such as pipeStreamData or getStreamData. | ||
| 1131 | QPDF_DLL | 971 | QPDF_DLL |
| 1132 | void setFilterOnWrite(bool); | 972 | void setFilterOnWrite(bool); |
| 1133 | QPDF_DLL | 973 | QPDF_DLL |
| 1134 | bool getFilterOnWrite(); | 974 | bool getFilterOnWrite(); |
| 1135 | 975 | ||
| 1136 | - // If addTokenFilter has been called for this stream, then the | ||
| 1137 | - // original data should be considered to be modified. This means we | ||
| 1138 | - // should avoid optimizations such as not filtering a stream that | ||
| 1139 | - // is already compressed. | 976 | + // If addTokenFilter has been called for this stream, then the original data should be |
| 977 | + // considered to be modified. This means we should avoid optimizations such as not filtering a | ||
| 978 | + // stream that is already compressed. | ||
| 1140 | QPDF_DLL | 979 | QPDF_DLL |
| 1141 | bool isDataModified(); | 980 | bool isDataModified(); |
| 1142 | 981 | ||
| 1143 | - // Returns filtered (uncompressed) stream data. Throws an | ||
| 1144 | - // exception if the stream is filtered and we can't decode it. | 982 | + // Returns filtered (uncompressed) stream data. Throws an exception if the stream is filtered |
| 983 | + // and we can't decode it. | ||
| 1145 | QPDF_DLL | 984 | QPDF_DLL |
| 1146 | std::shared_ptr<Buffer> getStreamData(qpdf_stream_decode_level_e level = qpdf_dl_generalized); | 985 | std::shared_ptr<Buffer> getStreamData(qpdf_stream_decode_level_e level = qpdf_dl_generalized); |
| 1147 | 986 | ||
| @@ -1149,17 +988,15 @@ class QPDFObjectHandle | @@ -1149,17 +988,15 @@ class QPDFObjectHandle | ||
| 1149 | QPDF_DLL | 988 | QPDF_DLL |
| 1150 | std::shared_ptr<Buffer> getRawStreamData(); | 989 | std::shared_ptr<Buffer> getRawStreamData(); |
| 1151 | 990 | ||
| 1152 | - // Write stream data through the given pipeline. A null pipeline | ||
| 1153 | - // value may be used if all you want to do is determine whether a | ||
| 1154 | - // stream is filterable and would be filtered based on the | ||
| 1155 | - // provided flags. If flags is 0, write raw stream data and return | ||
| 1156 | - // false. Otherwise, the flags alter the behavior in the following | ||
| 1157 | - // way: | 991 | + // Write stream data through the given pipeline. A null pipeline value may be used if all you |
| 992 | + // want to do is determine whether a stream is filterable and would be filtered based on the | ||
| 993 | + // provided flags. If flags is 0, write raw stream data and return false. Otherwise, the flags | ||
| 994 | + // alter the behavior in the following way: | ||
| 1158 | // | 995 | // |
| 1159 | // encode_flags: | 996 | // encode_flags: |
| 1160 | // | 997 | // |
| 1161 | - // qpdf_sf_compress -- compress data with /FlateDecode if no other | ||
| 1162 | - // compression filters are applied. | 998 | + // qpdf_sf_compress -- compress data with /FlateDecode if no other compression filters are |
| 999 | + // applied. | ||
| 1163 | // | 1000 | // |
| 1164 | // qpdf_sf_normalize -- tokenize as content stream and normalize tokens | 1001 | // qpdf_sf_normalize -- tokenize as content stream and normalize tokens |
| 1165 | // | 1002 | // |
| @@ -1167,45 +1004,33 @@ class QPDFObjectHandle | @@ -1167,45 +1004,33 @@ class QPDFObjectHandle | ||
| 1167 | // | 1004 | // |
| 1168 | // qpdf_dl_none -- do not decode any streams. | 1005 | // qpdf_dl_none -- do not decode any streams. |
| 1169 | // | 1006 | // |
| 1170 | - // qpdf_dl_generalized -- decode supported general-purpose | ||
| 1171 | - // filters. This includes /ASCIIHexDecode, /ASCII85Decode, | ||
| 1172 | - // /LZWDecode, and /FlateDecode. | 1007 | + // qpdf_dl_generalized -- decode supported general-purpose filters. This includes |
| 1008 | + // /ASCIIHexDecode, /ASCII85Decode, /LZWDecode, and /FlateDecode. | ||
| 1173 | // | 1009 | // |
| 1174 | - // qpdf_dl_specialized -- in addition to generalized filters, also | ||
| 1175 | - // decode supported non-lossy specialized filters. This includes | ||
| 1176 | - // /RunLengthDecode. | 1010 | + // qpdf_dl_specialized -- in addition to generalized filters, also decode supported non-lossy |
| 1011 | + // specialized filters. This includes /RunLengthDecode. | ||
| 1177 | // | 1012 | // |
| 1178 | - // qpdf_dl_all -- in addition to generalized and non-lossy | ||
| 1179 | - // specialized filters, decode supported lossy filters. This | ||
| 1180 | - // includes /DCTDecode. | 1013 | + // qpdf_dl_all -- in addition to generalized and non-lossy specialized filters, decode supported |
| 1014 | + // lossy filters. This includes /DCTDecode. | ||
| 1181 | // | 1015 | // |
| 1182 | - // If, based on the flags and the filters and decode parameters, | ||
| 1183 | - // we determine that we know how to apply all requested filters, | ||
| 1184 | - // do so and return true if we are successful. | 1016 | + // If, based on the flags and the filters and decode parameters, we determine that we know how |
| 1017 | + // to apply all requested filters, do so and return true if we are successful. | ||
| 1185 | // | 1018 | // |
| 1186 | - // The exact meaning of the return value differs the different | ||
| 1187 | - // versions of this function, but for any version, the meaning has | ||
| 1188 | - // been the same. For the main version, added in qpdf 10, the | ||
| 1189 | - // return value indicates whether the overall operation succeeded. | ||
| 1190 | - // The filter parameter, if specified, will be set to whether or | ||
| 1191 | - // not filtering was attempted. If filtering was not requested, | ||
| 1192 | - // this value will be false even if the overall operation | ||
| 1193 | - // succeeded. | 1019 | + // The exact meaning of the return value differs the different versions of this function, but |
| 1020 | + // for any version, the meaning has been the same. For the main version, added in qpdf 10, the | ||
| 1021 | + // return value indicates whether the overall operation succeeded. The filter parameter, if | ||
| 1022 | + // specified, will be set to whether or not filtering was attempted. If filtering was not | ||
| 1023 | + // requested, this value will be false even if the overall operation succeeded. | ||
| 1194 | // | 1024 | // |
| 1195 | - // If filtering is requested but this method returns false, it | ||
| 1196 | - // means there was some error in the filtering, in which case the | ||
| 1197 | - // resulting data is likely partially filtered and/or incomplete | ||
| 1198 | - // and may not be consistent with the configured filters. | ||
| 1199 | - // QPDFWriter handles this by attempting to get the stream data | ||
| 1200 | - // without filtering, but callers should consider a false return | ||
| 1201 | - // value when decode_level is not qpdf_dl_none to be a potential | ||
| 1202 | - // loss of data. If you intend to retry in that case, pass true as | ||
| 1203 | - // the value of will_retry. This changes the warning issued by the | ||
| 1204 | - // library to indicate that the operation will be retried without | ||
| 1205 | - // filtering to avoid data loss. | ||
| 1206 | - | ||
| 1207 | - // Return value is overall success, even if filtering is not | ||
| 1208 | - // requested. | 1025 | + // If filtering is requested but this method returns false, it means there was some error in the |
| 1026 | + // filtering, in which case the resulting data is likely partially filtered and/or incomplete | ||
| 1027 | + // and may not be consistent with the configured filters. QPDFWriter handles this by attempting | ||
| 1028 | + // to get the stream data without filtering, but callers should consider a false return value | ||
| 1029 | + // when decode_level is not qpdf_dl_none to be a potential loss of data. If you intend to retry | ||
| 1030 | + // in that case, pass true as the value of will_retry. This changes the warning issued by the | ||
| 1031 | + // library to indicate that the operation will be retried without filtering to avoid data loss. | ||
| 1032 | + | ||
| 1033 | + // Return value is overall success, even if filtering is not requested. | ||
| 1209 | QPDF_DLL | 1034 | QPDF_DLL |
| 1210 | bool pipeStreamData( | 1035 | bool pipeStreamData( |
| 1211 | Pipeline*, | 1036 | Pipeline*, |
| @@ -1215,9 +1040,8 @@ class QPDFObjectHandle | @@ -1215,9 +1040,8 @@ class QPDFObjectHandle | ||
| 1215 | bool suppress_warnings = false, | 1040 | bool suppress_warnings = false, |
| 1216 | bool will_retry = false); | 1041 | bool will_retry = false); |
| 1217 | 1042 | ||
| 1218 | - // Legacy version. Return value is whether filtering was | ||
| 1219 | - // attempted. There is no way to determine success if filtering | ||
| 1220 | - // was not attempted. | 1043 | + // Legacy version. Return value is whether filtering was attempted. There is no way to determine |
| 1044 | + // success if filtering was not attempted. | ||
| 1221 | QPDF_DLL | 1045 | QPDF_DLL |
| 1222 | bool pipeStreamData( | 1046 | bool pipeStreamData( |
| 1223 | Pipeline*, | 1047 | Pipeline*, |
| @@ -1226,8 +1050,7 @@ class QPDFObjectHandle | @@ -1226,8 +1050,7 @@ class QPDFObjectHandle | ||
| 1226 | bool suppress_warnings = false, | 1050 | bool suppress_warnings = false, |
| 1227 | bool will_retry = false); | 1051 | bool will_retry = false); |
| 1228 | 1052 | ||
| 1229 | - // Legacy pipeStreamData. This maps to the the flags-based | ||
| 1230 | - // pipeStreamData as follows: | 1053 | + // Legacy pipeStreamData. This maps to the the flags-based pipeStreamData as follows: |
| 1231 | // filter = false -> encode_flags = 0 | 1054 | // filter = false -> encode_flags = 0 |
| 1232 | // filter = true -> decode_level = qpdf_dl_generalized | 1055 | // filter = true -> decode_level = qpdf_dl_generalized |
| 1233 | // normalize = true -> encode_flags |= qpdf_sf_normalize | 1056 | // normalize = true -> encode_flags |= qpdf_sf_normalize |
| @@ -1236,70 +1059,57 @@ class QPDFObjectHandle | @@ -1236,70 +1059,57 @@ class QPDFObjectHandle | ||
| 1236 | QPDF_DLL | 1059 | QPDF_DLL |
| 1237 | bool pipeStreamData(Pipeline*, bool filter, bool normalize, bool compress); | 1060 | bool pipeStreamData(Pipeline*, bool filter, bool normalize, bool compress); |
| 1238 | 1061 | ||
| 1239 | - // Replace a stream's dictionary. The new dictionary must be | ||
| 1240 | - // consistent with the stream's data. This is most appropriately | ||
| 1241 | - // used when creating streams from scratch that will use a stream | ||
| 1242 | - // data provider and therefore start with an empty dictionary. It | ||
| 1243 | - // may be more convenient in this case than calling getDict and | ||
| 1244 | - // modifying it for each key. The pdf-create example does this. | 1062 | + // Replace a stream's dictionary. The new dictionary must be consistent with the stream's data. |
| 1063 | + // This is most appropriately used when creating streams from scratch that will use a stream | ||
| 1064 | + // data provider and therefore start with an empty dictionary. It may be more convenient in | ||
| 1065 | + // this case than calling getDict and modifying it for each key. The pdf-create example does | ||
| 1066 | + // this. | ||
| 1245 | QPDF_DLL | 1067 | QPDF_DLL |
| 1246 | void replaceDict(QPDFObjectHandle const&); | 1068 | void replaceDict(QPDFObjectHandle const&); |
| 1247 | 1069 | ||
| 1248 | // REPLACING STREAM DATA | 1070 | // REPLACING STREAM DATA |
| 1249 | 1071 | ||
| 1250 | - // Note about all replaceStreamData methods: whatever values are | ||
| 1251 | - // passed as filter and decode_parms will overwrite /Filter and | ||
| 1252 | - // /DecodeParms in the stream. Passing a null object | ||
| 1253 | - // (QPDFObjectHandle::newNull()) will remove those values from the | ||
| 1254 | - // stream dictionary. From qpdf 11, passing an *uninitialized* | ||
| 1255 | - // QPDFObjectHandle (QPDFObjectHandle()) will leave any existing | 1072 | + // Note about all replaceStreamData methods: whatever values are passed as filter and |
| 1073 | + // decode_parms will overwrite /Filter and /DecodeParms in the stream. Passing a null object | ||
| 1074 | + // (QPDFObjectHandle::newNull()) will remove those values from the stream dictionary. From qpdf | ||
| 1075 | + // 11, passing an *uninitialized* QPDFObjectHandle (QPDFObjectHandle()) will leave any existing | ||
| 1256 | // values untouched. | 1076 | // values untouched. |
| 1257 | 1077 | ||
| 1258 | - // Replace this stream's stream data with the given data buffer. | ||
| 1259 | - // The stream's /Length key is replaced with the length of the | ||
| 1260 | - // data buffer. The stream is interpreted as if the data read from | ||
| 1261 | - // the file, after any decryption filters have been applied, is as | ||
| 1262 | - // presented. | 1078 | + // Replace this stream's stream data with the given data buffer. The stream's /Length key is |
| 1079 | + // replaced with the length of the data buffer. The stream is interpreted as if the data read | ||
| 1080 | + // from the file, after any decryption filters have been applied, is as presented. | ||
| 1263 | QPDF_DLL | 1081 | QPDF_DLL |
| 1264 | void replaceStreamData( | 1082 | void replaceStreamData( |
| 1265 | std::shared_ptr<Buffer> data, | 1083 | std::shared_ptr<Buffer> data, |
| 1266 | QPDFObjectHandle const& filter, | 1084 | QPDFObjectHandle const& filter, |
| 1267 | QPDFObjectHandle const& decode_parms); | 1085 | QPDFObjectHandle const& decode_parms); |
| 1268 | 1086 | ||
| 1269 | - // Replace the stream's stream data with the given string. | ||
| 1270 | - // This method will create a copy of the data rather than using | ||
| 1271 | - // the user-provided buffer as in the std::shared_ptr<Buffer> version | ||
| 1272 | - // of replaceStreamData. | 1087 | + // Replace the stream's stream data with the given string. This method will create a copy of the |
| 1088 | + // data rather than using the user-provided buffer as in the std::shared_ptr<Buffer> version of | ||
| 1089 | + // replaceStreamData. | ||
| 1273 | QPDF_DLL | 1090 | QPDF_DLL |
| 1274 | void replaceStreamData( | 1091 | void replaceStreamData( |
| 1275 | std::string const& data, | 1092 | std::string const& data, |
| 1276 | QPDFObjectHandle const& filter, | 1093 | QPDFObjectHandle const& filter, |
| 1277 | QPDFObjectHandle const& decode_parms); | 1094 | QPDFObjectHandle const& decode_parms); |
| 1278 | 1095 | ||
| 1279 | - // As above, replace this stream's stream data. Instead of | ||
| 1280 | - // directly providing a buffer with the stream data, call the | ||
| 1281 | - // given provider's provideStreamData method. See comments on the | ||
| 1282 | - // StreamDataProvider class (defined above) for details on the | ||
| 1283 | - // method. The data must be consistent with filter and | ||
| 1284 | - // decode_parms as provided. Although it is more complex to use | ||
| 1285 | - // this form of replaceStreamData than the one that takes a | ||
| 1286 | - // buffer, it makes it possible to avoid allocating memory for the | ||
| 1287 | - // stream data. Example programs are provided that use both forms | ||
| 1288 | - // of replaceStreamData. | ||
| 1289 | - | ||
| 1290 | - // Note about stream length: for any given stream, the provider | ||
| 1291 | - // must provide the same amount of data each time it is called. | ||
| 1292 | - // This is critical for making linearization work properly. | ||
| 1293 | - // Versions of qpdf before 3.0.0 required a length to be specified | ||
| 1294 | - // here. Starting with version 3.0.0, this is no longer necessary | ||
| 1295 | - // (or permitted). The first time the stream data provider is | ||
| 1296 | - // invoked for a given stream, the actual length is stored. | ||
| 1297 | - // Subsequent times, it is enforced that the length be the same as | ||
| 1298 | - // the first time. | ||
| 1299 | - | ||
| 1300 | - // If you have gotten a compile error here while building code | ||
| 1301 | - // that worked with older versions of qpdf, just omit the length | ||
| 1302 | - // parameter. You can also simplify your code by not having to | 1096 | + // As above, replace this stream's stream data. Instead of directly providing a buffer with the |
| 1097 | + // stream data, call the given provider's provideStreamData method. See comments on the | ||
| 1098 | + // StreamDataProvider class (defined above) for details on the method. The data must be | ||
| 1099 | + // consistent with filter and decode_parms as provided. Although it is more complex to use this | ||
| 1100 | + // form of replaceStreamData than the one that takes a buffer, it makes it possible to avoid | ||
| 1101 | + // allocating memory for the stream data. Example programs are provided that use both forms of | ||
| 1102 | + // replaceStreamData. | ||
| 1103 | + | ||
| 1104 | + // Note about stream length: for any given stream, the provider must provide the same amount of | ||
| 1105 | + // data each time it is called. This is critical for making linearization work properly. | ||
| 1106 | + // Versions of qpdf before 3.0.0 required a length to be specified here. Starting with | ||
| 1107 | + // version 3.0.0, this is no longer necessary (or permitted). The first time the stream data | ||
| 1108 | + // provider is invoked for a given stream, the actual length is stored. Subsequent times, it is | ||
| 1109 | + // enforced that the length be the same as the first time. | ||
| 1110 | + | ||
| 1111 | + // If you have gotten a compile error here while building code that worked with older versions | ||
| 1112 | + // of qpdf, just omit the length parameter. You can also simplify your code by not having to | ||
| 1303 | // compute the length in advance. | 1113 | // compute the length in advance. |
| 1304 | QPDF_DLL | 1114 | QPDF_DLL |
| 1305 | void replaceStreamData( | 1115 | void replaceStreamData( |
| @@ -1307,33 +1117,28 @@ class QPDFObjectHandle | @@ -1307,33 +1117,28 @@ class QPDFObjectHandle | ||
| 1307 | QPDFObjectHandle const& filter, | 1117 | QPDFObjectHandle const& filter, |
| 1308 | QPDFObjectHandle const& decode_parms); | 1118 | QPDFObjectHandle const& decode_parms); |
| 1309 | 1119 | ||
| 1310 | - // Starting in qpdf 10.2, you can use C++-11 function objects | ||
| 1311 | - // instead of StreamDataProvider. | 1120 | + // Starting in qpdf 10.2, you can use C++-11 function objects instead of StreamDataProvider. |
| 1312 | 1121 | ||
| 1313 | - // The provider should write the stream data to the pipeline. For | ||
| 1314 | - // a one-liner to replace stream data with the contents of a file, | ||
| 1315 | - // pass QUtil::file_provider(filename) as provider. | 1122 | + // The provider should write the stream data to the pipeline. For a one-liner to replace stream |
| 1123 | + // data with the contents of a file, pass QUtil::file_provider(filename) as provider. | ||
| 1316 | QPDF_DLL | 1124 | QPDF_DLL |
| 1317 | void replaceStreamData( | 1125 | void replaceStreamData( |
| 1318 | std::function<void(Pipeline*)> provider, | 1126 | std::function<void(Pipeline*)> provider, |
| 1319 | QPDFObjectHandle const& filter, | 1127 | QPDFObjectHandle const& filter, |
| 1320 | QPDFObjectHandle const& decode_parms); | 1128 | QPDFObjectHandle const& decode_parms); |
| 1321 | - // The provider should write the stream data to the pipeline, | ||
| 1322 | - // returning true if it succeeded without errors. | 1129 | + // The provider should write the stream data to the pipeline, returning true if it succeeded |
| 1130 | + // without errors. | ||
| 1323 | QPDF_DLL | 1131 | QPDF_DLL |
| 1324 | void replaceStreamData( | 1132 | void replaceStreamData( |
| 1325 | std::function<bool(Pipeline*, bool suppress_warnings, bool will_retry)> provider, | 1133 | std::function<bool(Pipeline*, bool suppress_warnings, bool will_retry)> provider, |
| 1326 | QPDFObjectHandle const& filter, | 1134 | QPDFObjectHandle const& filter, |
| 1327 | QPDFObjectHandle const& decode_parms); | 1135 | QPDFObjectHandle const& decode_parms); |
| 1328 | 1136 | ||
| 1329 | - // Access object ID and generation. For direct objects, return | ||
| 1330 | - // object ID 0. | 1137 | + // Access object ID and generation. For direct objects, return object ID 0. |
| 1331 | 1138 | ||
| 1332 | - // NOTE: Be careful about calling getObjectID() and | ||
| 1333 | - // getGeneration() directly as this can lead to the pattern of | ||
| 1334 | - // depending on object ID or generation without the other. In | ||
| 1335 | - // general, when keeping track of object IDs, it's better to use | ||
| 1336 | - // QPDFObjGen instead. | 1139 | + // NOTE: Be careful about calling getObjectID() and getGeneration() directly as this can lead to |
| 1140 | + // the pattern of depending on object ID or generation without the other. In general, when | ||
| 1141 | + // keeping track of object IDs, it's better to use QPDFObjGen instead. | ||
| 1337 | 1142 | ||
| 1338 | QPDF_DLL | 1143 | QPDF_DLL |
| 1339 | QPDFObjGen getObjGen() const; | 1144 | QPDFObjGen getObjGen() const; |
| @@ -1346,51 +1151,40 @@ class QPDFObjectHandle | @@ -1346,51 +1151,40 @@ class QPDFObjectHandle | ||
| 1346 | std::string unparse(); | 1151 | std::string unparse(); |
| 1347 | QPDF_DLL | 1152 | QPDF_DLL |
| 1348 | std::string unparseResolved(); | 1153 | std::string unparseResolved(); |
| 1349 | - // For strings only, force binary representation. Otherwise, same | ||
| 1350 | - // as unparse. | 1154 | + // For strings only, force binary representation. Otherwise, same as unparse. |
| 1351 | QPDF_DLL | 1155 | QPDF_DLL |
| 1352 | std::string unparseBinary(); | 1156 | std::string unparseBinary(); |
| 1353 | 1157 | ||
| 1354 | - // Return encoded as JSON. The constant JSON::LATEST can be used | ||
| 1355 | - // to specify the latest available JSON version. The JSON is | ||
| 1356 | - // generated as follows: | ||
| 1357 | - // * Arrays, dictionaries, booleans, nulls, integers, and real | ||
| 1358 | - // numbers are represented by their native JSON types. | ||
| 1359 | - // * Names are encoded as strings representing the canonical | ||
| 1360 | - // representation (after parsing #xx) and preceded by a slash, | ||
| 1361 | - // just as unparse() returns. For example, the JSON for the | 1158 | + // Return encoded as JSON. The constant JSON::LATEST can be used to specify the latest available |
| 1159 | + // JSON version. The JSON is generated as follows: | ||
| 1160 | + // * Arrays, dictionaries, booleans, nulls, integers, and real numbers are represented by their | ||
| 1161 | + // native JSON types. | ||
| 1162 | + // * Names are encoded as strings representing the canonical representation (after parsing #xx) | ||
| 1163 | + // and preceded by a slash, just as unparse() returns. For example, the JSON for the | ||
| 1362 | // PDF-syntax name /Text#2fPlain would be "/Text/Plain". | 1164 | // PDF-syntax name /Text#2fPlain would be "/Text/Plain". |
| 1363 | // * Indirect references are encoded as strings containing "obj gen R" | 1165 | // * Indirect references are encoded as strings containing "obj gen R" |
| 1364 | // * Strings | 1166 | // * Strings |
| 1365 | - // * JSON v1: Strings are encoded as UTF-8 strings with | ||
| 1366 | - // unrepresentable binary characters encoded as \uHHHH. | ||
| 1367 | - // Characters in PDF Doc encoding that don't have | ||
| 1368 | - // bidirectional unicode mappings are not reversible. There is | ||
| 1369 | - // no way to tell the difference between a string that looks | ||
| 1370 | - // like a name or indirect object from an actual name or | ||
| 1371 | - // indirect object. | 1167 | + // * JSON v1: Strings are encoded as UTF-8 strings with unrepresentable binary characters |
| 1168 | + // encoded as \uHHHH. Characters in PDF Doc encoding that don't have bidirectional unicode | ||
| 1169 | + // mappings are not reversible. There is no way to tell the difference between a string that | ||
| 1170 | + // looks like a name or indirect object from an actual name or indirect object. | ||
| 1372 | // * JSON v2: | 1171 | // * JSON v2: |
| 1373 | - // * Unicode strings and strings encoded with PDF Doc encoding | ||
| 1374 | - // that can be bidrectionally mapped two Unicode (which is | ||
| 1375 | - // all strings without undefined characters) are represented | 1172 | + // * Unicode strings and strings encoded with PDF Doc encoding that can be bidrectionally |
| 1173 | + // mapped two Unicode (which is all strings without undefined characters) are represented | ||
| 1376 | // as "u:" followed by the UTF-8 encoded string. Example: | 1174 | // as "u:" followed by the UTF-8 encoded string. Example: |
| 1377 | // "u:potato". | 1175 | // "u:potato". |
| 1378 | - // * All other strings are represented as "b:" followed by a | ||
| 1379 | - // hexadecimal encoding of the string. Example: "b:0102cacb" | 1176 | + // * All other strings are represented as "b:" followed by a hexadecimal encoding of the |
| 1177 | + // string. Example: "b:0102cacb" | ||
| 1380 | // * Streams | 1178 | // * Streams |
| 1381 | - // * JSON v1: Only the stream's dictionary is encoded. There is | ||
| 1382 | - // no way tell a stream from a dictionary other than context. | ||
| 1383 | - // * JSON v2: A stream is encoded as {"dict": {...}} with the | ||
| 1384 | - // value being the encoding of the stream's dictionary. Since | ||
| 1385 | - // "dict" does not otherwise represent anything, this is | ||
| 1386 | - // unambiguous. The getStreamJSON() call can be used to add | ||
| 1387 | - // encoding of the stream's data. | ||
| 1388 | - // * Object types that are only valid in content streams (inline | ||
| 1389 | - // image, operator) are serialized as "null". Attempting to | ||
| 1390 | - // serialize a "reserved" object is an error. | ||
| 1391 | - // If dereference_indirect is true and this is an indirect object, | ||
| 1392 | - // show the actual contents of the object. The effect of | ||
| 1393 | - // dereference_indirect applies only to this object. It is not | 1179 | + // * JSON v1: Only the stream's dictionary is encoded. There is no way tell a stream from a |
| 1180 | + // dictionary other than context. | ||
| 1181 | + // * JSON v2: A stream is encoded as {"dict": {...}} with the value being the encoding of the | ||
| 1182 | + // stream's dictionary. Since "dict" does not otherwise represent anything, this is | ||
| 1183 | + // unambiguous. The getStreamJSON() call can be used to add encoding of the stream's data. | ||
| 1184 | + // * Object types that are only valid in content streams (inline image, operator) are serialized | ||
| 1185 | + // as "null". Attempting to serialize a "reserved" object is an error. | ||
| 1186 | + // If dereference_indirect is true and this is an indirect object, show the actual contents of | ||
| 1187 | + // the object. The effect of dereference_indirect applies only to this object. It is not | ||
| 1394 | // recursive. | 1188 | // recursive. |
| 1395 | QPDF_DLL | 1189 | QPDF_DLL |
| 1396 | JSON getJSON(int json_version, bool dereference_indirect = false); | 1190 | JSON getJSON(int json_version, bool dereference_indirect = false); |
| @@ -1400,36 +1194,28 @@ class QPDFObjectHandle | @@ -1400,36 +1194,28 @@ class QPDFObjectHandle | ||
| 1400 | [[deprecated("Use getJSON(int version)")]] QPDF_DLL JSON | 1194 | [[deprecated("Use getJSON(int version)")]] QPDF_DLL JSON |
| 1401 | getJSON(bool dereference_indirect = false); | 1195 | getJSON(bool dereference_indirect = false); |
| 1402 | 1196 | ||
| 1403 | - // This method can be called on a stream to get a more extended | ||
| 1404 | - // JSON representation of the stream that includes the stream's | ||
| 1405 | - // data. The JSON object returned is always a dictionary whose | ||
| 1406 | - // "dict" key is an encoding of the stream's dictionary. The | ||
| 1407 | - // representation of the data is determined by the json_data | ||
| 1408 | - // field. | 1197 | + // This method can be called on a stream to get a more extended JSON representation of the |
| 1198 | + // stream that includes the stream's data. The JSON object returned is always a dictionary whose | ||
| 1199 | + // "dict" key is an encoding of the stream's dictionary. The representation of the data is | ||
| 1200 | + // determined by the json_data field. | ||
| 1409 | // | 1201 | // |
| 1410 | - // The json_data field may have the value qpdf_sj_none, | ||
| 1411 | - // qpdf_sj_inline, or qpdf_sj_file. | 1202 | + // The json_data field may have the value qpdf_sj_none, qpdf_sj_inline, or qpdf_sj_file. |
| 1412 | // | 1203 | // |
| 1413 | // If json_data is qpdf_sj_none, stream data is not represented. | 1204 | // If json_data is qpdf_sj_none, stream data is not represented. |
| 1414 | // | 1205 | // |
| 1415 | - // If json_data is qpdf_sj_inline or qpdf_sj_file, then stream | ||
| 1416 | - // data is filtered or not based on the value of decode_level, | ||
| 1417 | - // which has the same meaning as with pipeStreamData. | 1206 | + // If json_data is qpdf_sj_inline or qpdf_sj_file, then stream data is filtered or not based on |
| 1207 | + // the value of decode_level, which has the same meaning as with pipeStreamData. | ||
| 1418 | // | 1208 | // |
| 1419 | - // If json_data is qpdf_sj_inline, the base64-encoded stream data | ||
| 1420 | - // is included in the "data" field of the dictionary that is | ||
| 1421 | - // returned. | 1209 | + // If json_data is qpdf_sj_inline, the base64-encoded stream data is included in the "data" |
| 1210 | + // field of the dictionary that is returned. | ||
| 1422 | // | 1211 | // |
| 1423 | - // If json_data is qpdf_sj_file, then the Pipeline ("p") and | ||
| 1424 | - // data_filename argument must be supplied. The value of | ||
| 1425 | - // data_filename is stored in the resulting json in the "datafile" | ||
| 1426 | - // key but is not otherwise use. The stream data itself (raw or | ||
| 1427 | - // filtered depending on decode level), is written to the pipeline | ||
| 1428 | - // via pipeStreamData(). | 1212 | + // If json_data is qpdf_sj_file, then the Pipeline ("p") and data_filename argument must be |
| 1213 | + // supplied. The value of data_filename is stored in the resulting json in the "datafile" key | ||
| 1214 | + // but is not otherwise use. The stream data itself (raw or filtered depending on decode level), | ||
| 1215 | + // is written to the pipeline via pipeStreamData(). | ||
| 1429 | // | 1216 | // |
| 1430 | - // NOTE: When json_data is qpdf_sj_inline, the QPDF object from | ||
| 1431 | - // which the stream originates must remain valid until after the | ||
| 1432 | - // JSON object is written. | 1217 | + // NOTE: When json_data is qpdf_sj_inline, the QPDF object from which the stream originates must |
| 1218 | + // remain valid until after the JSON object is written. | ||
| 1433 | QPDF_DLL | 1219 | QPDF_DLL |
| 1434 | JSON getStreamJSON( | 1220 | JSON getStreamJSON( |
| 1435 | int json_version, | 1221 | int json_version, |
| @@ -1438,11 +1224,9 @@ class QPDFObjectHandle | @@ -1438,11 +1224,9 @@ class QPDFObjectHandle | ||
| 1438 | Pipeline* p, | 1224 | Pipeline* p, |
| 1439 | std::string const& data_filename); | 1225 | std::string const& data_filename); |
| 1440 | 1226 | ||
| 1441 | - // Legacy helper methods for commonly performed operations on | ||
| 1442 | - // pages. Newer code should use QPDFPageObjectHelper instead. The | ||
| 1443 | - // specification and behavior of these methods are the same as the | ||
| 1444 | - // identically named methods in that class, but newer | ||
| 1445 | - // functionality will be added there. | 1227 | + // Legacy helper methods for commonly performed operations on pages. Newer code should use |
| 1228 | + // QPDFPageObjectHelper instead. The specification and behavior of these methods are the same as | ||
| 1229 | + // the identically named methods in that class, but newer functionality will be added there. | ||
| 1446 | QPDF_DLL | 1230 | QPDF_DLL |
| 1447 | std::map<std::string, QPDFObjectHandle> getPageImages(); | 1231 | std::map<std::string, QPDFObjectHandle> getPageImages(); |
| 1448 | QPDF_DLL | 1232 | QPDF_DLL |
| @@ -1455,18 +1239,15 @@ class QPDFObjectHandle | @@ -1455,18 +1239,15 @@ class QPDFObjectHandle | ||
| 1455 | void coalesceContentStreams(); | 1239 | void coalesceContentStreams(); |
| 1456 | // End legacy page helpers | 1240 | // End legacy page helpers |
| 1457 | 1241 | ||
| 1458 | - // Issue a warning about this object if possible. If the object | ||
| 1459 | - // has a description, a warning will be issued using the owning | ||
| 1460 | - // QPDF as context. Otherwise, a message will be written to the | ||
| 1461 | - // default logger's error stream, which is standard error if not | ||
| 1462 | - // overridden. Objects read normally from the file have | ||
| 1463 | - // descriptions. See comments on setObjectDescription for | ||
| 1464 | - // additional details. | 1242 | + // Issue a warning about this object if possible. If the object has a description, a warning |
| 1243 | + // will be issued using the owning QPDF as context. Otherwise, a message will be written to the | ||
| 1244 | + // default logger's error stream, which is standard error if not overridden. Objects read | ||
| 1245 | + // normally from the file have descriptions. See comments on setObjectDescription for additional | ||
| 1246 | + // details. | ||
| 1465 | QPDF_DLL | 1247 | QPDF_DLL |
| 1466 | void warnIfPossible(std::string const& warning); | 1248 | void warnIfPossible(std::string const& warning); |
| 1467 | 1249 | ||
| 1468 | - // Provide access to specific classes for recursive | ||
| 1469 | - // disconnected(). | 1250 | + // Provide access to specific classes for recursive disconnected(). |
| 1470 | class DisconnectAccess | 1251 | class DisconnectAccess |
| 1471 | { | 1252 | { |
| 1472 | friend class QPDF_Dictionary; | 1253 | friend class QPDF_Dictionary; |
| @@ -1480,9 +1261,8 @@ class QPDFObjectHandle | @@ -1480,9 +1261,8 @@ class QPDFObjectHandle | ||
| 1480 | } | 1261 | } |
| 1481 | }; | 1262 | }; |
| 1482 | 1263 | ||
| 1483 | - // Convenience routine: Throws if the assumption is violated. Your | ||
| 1484 | - // code will be better if you call one of the isType methods and | ||
| 1485 | - // handle the case of the type being wrong, but these can be | 1264 | + // Convenience routine: Throws if the assumption is violated. Your code will be better if you |
| 1265 | + // call one of the isType methods and handle the case of the type being wrong, but these can be | ||
| 1486 | // convenient if you have already verified the type. | 1266 | // convenient if you have already verified the type. |
| 1487 | QPDF_DLL | 1267 | QPDF_DLL |
| 1488 | void assertInitialized() const; | 1268 | void assertInitialized() const; |
| @@ -1519,11 +1299,10 @@ class QPDFObjectHandle | @@ -1519,11 +1299,10 @@ class QPDFObjectHandle | ||
| 1519 | QPDF_DLL | 1299 | QPDF_DLL |
| 1520 | void assertNumber(); | 1300 | void assertNumber(); |
| 1521 | 1301 | ||
| 1522 | - // The isPageObject method checks the /Type key of the object. | ||
| 1523 | - // This is not completely reliable as there are some otherwise | ||
| 1524 | - // valid files whose /Type is wrong for page objects. qpdf is | ||
| 1525 | - // slightly more accepting but may still return false here when | ||
| 1526 | - // treating the object as a page would work. Use this sparingly. | 1302 | + // The isPageObject method checks the /Type key of the object. This is not completely reliable |
| 1303 | + // as there are some otherwise valid files whose /Type is wrong for page objects. qpdf is | ||
| 1304 | + // slightly more accepting but may still return false here when treating the object as a page | ||
| 1305 | + // would work. Use this sparingly. | ||
| 1527 | QPDF_DLL | 1306 | QPDF_DLL |
| 1528 | bool isPageObject(); | 1307 | bool isPageObject(); |
| 1529 | QPDF_DLL | 1308 | QPDF_DLL |
| @@ -1534,13 +1313,12 @@ class QPDFObjectHandle | @@ -1534,13 +1313,12 @@ class QPDFObjectHandle | ||
| 1534 | QPDF_DLL | 1313 | QPDF_DLL |
| 1535 | bool isFormXObject(); | 1314 | bool isFormXObject(); |
| 1536 | 1315 | ||
| 1537 | - // Indicate if this is an image. If exclude_imagemask is true, | ||
| 1538 | - // don't count image masks as images. | 1316 | + // Indicate if this is an image. If exclude_imagemask is true, don't count image masks as |
| 1317 | + // images. | ||
| 1539 | QPDF_DLL | 1318 | QPDF_DLL |
| 1540 | bool isImage(bool exclude_imagemask = true); | 1319 | bool isImage(bool exclude_imagemask = true); |
| 1541 | 1320 | ||
| 1542 | - // The following methods do not form part of the public API and are for | ||
| 1543 | - // internal use only. | 1321 | + // The following methods do not form part of the public API and are for internal use only. |
| 1544 | 1322 | ||
| 1545 | QPDFObjectHandle(std::shared_ptr<QPDFObject> const& obj) : | 1323 | QPDFObjectHandle(std::shared_ptr<QPDFObject> const& obj) : |
| 1546 | obj(obj) | 1324 | obj(obj) |
| @@ -1600,9 +1378,8 @@ class QPDFObjectHandle | @@ -1600,9 +1378,8 @@ class QPDFObjectHandle | ||
| 1600 | static void warn(QPDF*, QPDFExc const&); | 1378 | static void warn(QPDF*, QPDFExc const&); |
| 1601 | void checkOwnership(QPDFObjectHandle const&) const; | 1379 | void checkOwnership(QPDFObjectHandle const&) const; |
| 1602 | 1380 | ||
| 1603 | - // Moving members of QPDFObjectHandle into a smart pointer incurs | ||
| 1604 | - // a substantial performance penalty since QPDFObjectHandle | ||
| 1605 | - // objects are copied around so frequently. | 1381 | + // Moving members of QPDFObjectHandle into a smart pointer incurs a substantial performance |
| 1382 | + // penalty since QPDFObjectHandle objects are copied around so frequently. | ||
| 1606 | std::shared_ptr<QPDFObject> obj; | 1383 | std::shared_ptr<QPDFObject> obj; |
| 1607 | }; | 1384 | }; |
| 1608 | 1385 | ||
| @@ -1611,13 +1388,12 @@ class QPDFObjectHandle | @@ -1611,13 +1388,12 @@ class QPDFObjectHandle | ||
| 1611 | 1388 | ||
| 1612 | // auto oh = "<< /Key (value) >>"_qpdf; | 1389 | // auto oh = "<< /Key (value) >>"_qpdf; |
| 1613 | 1390 | ||
| 1614 | -// If this is causing problems in your code, define | ||
| 1615 | -// QPDF_NO_QPDF_STRING to prevent the declaration from being here. | 1391 | +// If this is causing problems in your code, define QPDF_NO_QPDF_STRING to prevent the declaration |
| 1392 | +// from being here. | ||
| 1616 | 1393 | ||
| 1617 | /* clang-format off */ | 1394 | /* clang-format off */ |
| 1618 | -// Disable formatting for this declaration: emacs font-lock in cc-mode | ||
| 1619 | -// (as of 28.1) treats the rest of the file as a string if | ||
| 1620 | -// clang-format removes the space after "operator", and as of | 1395 | +// Disable formatting for this declaration: emacs font-lock in cc-mode (as of 28.1) treats the rest |
| 1396 | +// of the file as a string if clang-format removes the space after "operator", and as of | ||
| 1621 | // clang-format 15, there's no way to prevent it from doing so. | 1397 | // clang-format 15, there's no way to prevent it from doing so. |
| 1622 | QPDF_DLL | 1398 | QPDF_DLL |
| 1623 | QPDFObjectHandle operator ""_qpdf(char const* v, size_t len); | 1399 | QPDFObjectHandle operator ""_qpdf(char const* v, size_t len); |
| @@ -1627,8 +1403,8 @@ QPDFObjectHandle operator ""_qpdf(char const* v, size_t len); | @@ -1627,8 +1403,8 @@ QPDFObjectHandle operator ""_qpdf(char const* v, size_t len); | ||
| 1627 | 1403 | ||
| 1628 | class QPDFObjectHandle::QPDFDictItems | 1404 | class QPDFObjectHandle::QPDFDictItems |
| 1629 | { | 1405 | { |
| 1630 | - // This class allows C++-style iteration, including range-for | ||
| 1631 | - // iteration, around dictionaries. You can write | 1406 | + // This class allows C++-style iteration, including range-for iteration, around dictionaries. |
| 1407 | + // You can write | ||
| 1632 | 1408 | ||
| 1633 | // for (auto iter: QPDFDictItems(dictionary_obj)) | 1409 | // for (auto iter: QPDFDictItems(dictionary_obj)) |
| 1634 | // { | 1410 | // { |
| @@ -1636,8 +1412,7 @@ class QPDFObjectHandle::QPDFDictItems | @@ -1636,8 +1412,7 @@ class QPDFObjectHandle::QPDFDictItems | ||
| 1636 | // // iter.second is a QPDFObjectHandle | 1412 | // // iter.second is a QPDFObjectHandle |
| 1637 | // } | 1413 | // } |
| 1638 | 1414 | ||
| 1639 | - // See examples/pdf-name-number-tree.cc for a demonstration of | ||
| 1640 | - // using this API. | 1415 | + // See examples/pdf-name-number-tree.cc for a demonstration of using this API. |
| 1641 | 1416 | ||
| 1642 | public: | 1417 | public: |
| 1643 | QPDF_DLL | 1418 | QPDF_DLL |
| @@ -1727,16 +1502,15 @@ class QPDFObjectHandle::QPDFDictItems | @@ -1727,16 +1502,15 @@ class QPDFObjectHandle::QPDFDictItems | ||
| 1727 | 1502 | ||
| 1728 | class QPDFObjectHandle::QPDFArrayItems | 1503 | class QPDFObjectHandle::QPDFArrayItems |
| 1729 | { | 1504 | { |
| 1730 | - // This class allows C++-style iteration, including range-for | ||
| 1731 | - // iteration, around arrays. You can write | 1505 | + // This class allows C++-style iteration, including range-for iteration, around arrays. You can |
| 1506 | + // write | ||
| 1732 | 1507 | ||
| 1733 | // for (auto iter: QPDFArrayItems(array_obj)) | 1508 | // for (auto iter: QPDFArrayItems(array_obj)) |
| 1734 | // { | 1509 | // { |
| 1735 | // // iter is a QPDFObjectHandle | 1510 | // // iter is a QPDFObjectHandle |
| 1736 | // } | 1511 | // } |
| 1737 | 1512 | ||
| 1738 | - // See examples/pdf-name-number-tree.cc for a demonstration of | ||
| 1739 | - // using this API. | 1513 | + // See examples/pdf-name-number-tree.cc for a demonstration of using this API. |
| 1740 | 1514 | ||
| 1741 | public: | 1515 | public: |
| 1742 | QPDF_DLL | 1516 | QPDF_DLL |
include/qpdf/QPDFPageObjectHelper.hh
| @@ -2,22 +2,19 @@ | @@ -2,22 +2,19 @@ | ||
| 2 | // | 2 | // |
| 3 | // This file is part of qpdf. | 3 | // This file is part of qpdf. |
| 4 | // | 4 | // |
| 5 | -// Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 6 | -// you may not use this file except in compliance with the License. | ||
| 7 | -// You may obtain a copy of the License at | 5 | +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
| 6 | +// in compliance with the License. You may obtain a copy of the License at | ||
| 8 | // | 7 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 | 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // | 9 | // |
| 11 | -// Unless required by applicable law or agreed to in writing, software | ||
| 12 | -// distributed under the License is distributed on an "AS IS" BASIS, | ||
| 13 | -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 14 | -// See the License for the specific language governing permissions and | ||
| 15 | -// limitations under the License. | 10 | +// Unless required by applicable law or agreed to in writing, software distributed under the License |
| 11 | +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
| 12 | +// or implied. See the License for the specific language governing permissions and limitations under | ||
| 13 | +// the License. | ||
| 16 | // | 14 | // |
| 17 | -// Versions of qpdf prior to version 7 were released under the terms | ||
| 18 | -// of version 2.0 of the Artistic License. At your option, you may | ||
| 19 | -// continue to consider qpdf to be licensed under those terms. Please | ||
| 20 | -// see the manual for additional information. | 15 | +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic |
| 16 | +// License. At your option, you may continue to consider qpdf to be licensed under those terms. | ||
| 17 | +// Please see the manual for additional information. | ||
| 21 | 18 | ||
| 22 | #ifndef QPDFPAGEOBJECTHELPER_HH | 19 | #ifndef QPDFPAGEOBJECTHELPER_HH |
| 23 | #define QPDFPAGEOBJECTHELPER_HH | 20 | #define QPDFPAGEOBJECTHELPER_HH |
| @@ -35,9 +32,8 @@ class QPDFAcroFormDocumentHelper; | @@ -35,9 +32,8 @@ class QPDFAcroFormDocumentHelper; | ||
| 35 | 32 | ||
| 36 | class QPDFPageObjectHelper: public QPDFObjectHelper | 33 | class QPDFPageObjectHelper: public QPDFObjectHelper |
| 37 | { | 34 | { |
| 38 | - // This is a helper class for page objects, but as of qpdf 10.1, | ||
| 39 | - // many of the methods also work for form XObjects. When this is | ||
| 40 | - // the case, it is noted in the comment. | 35 | + // This is a helper class for page objects, but as of qpdf 10.1, many of the methods also work |
| 36 | + // for form XObjects. When this is the case, it is noted in the comment. | ||
| 41 | 37 | ||
| 42 | public: | 38 | public: |
| 43 | QPDF_DLL | 39 | QPDF_DLL |
| @@ -47,35 +43,30 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | @@ -47,35 +43,30 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | ||
| 47 | 43 | ||
| 48 | // PAGE ATTRIBUTES | 44 | // PAGE ATTRIBUTES |
| 49 | 45 | ||
| 50 | - // The getAttribute method works with pages and form XObjects. It | ||
| 51 | - // return the value of the requested attribute from the page/form | ||
| 52 | - // XObject's dictionary, taking inheritance from the pages tree | ||
| 53 | - // into consideration. For pages, the attributes /MediaBox, | ||
| 54 | - // /CropBox, /Resources, and /Rotate are inheritable, meaning that | ||
| 55 | - // if they are not present directly on the page node, they may be | ||
| 56 | - // inherited from ancestor nodes in the pages tree. | 46 | + // The getAttribute method works with pages and form XObjects. It returns the value of the |
| 47 | + // requested attribute from the page/form XObject's dictionary, taking inheritance from the | ||
| 48 | + // pages tree into consideration. For pages, the attributes /MediaBox, /CropBox, /Resources, and | ||
| 49 | + // /Rotate are inheritable, meaning that if they are not present directly on the page node, they | ||
| 50 | + // may be inherited from ancestor nodes in the pages tree. | ||
| 57 | // | 51 | // |
| 58 | // There are two ways that an attribute can be "shared": | 52 | // There are two ways that an attribute can be "shared": |
| 59 | // | 53 | // |
| 60 | - // * For inheritable attributes on pages, it may appear in a | ||
| 61 | - // higher level node of the pages tree | 54 | + // * For inheritable attributes on pages, it may appear in a higher level node of the pages tree |
| 62 | // | 55 | // |
| 63 | - // * For any attribute, the attribute may be an indirect object | ||
| 64 | - // which may be referenced by more than one page/form XObject. | 56 | + // * For any attribute, the attribute may be an indirect object which may be referenced by more |
| 57 | + // than one page/form XObject. | ||
| 65 | // | 58 | // |
| 66 | - // If copy_if_shared is true, then this method will replace the | ||
| 67 | - // attribute with a shallow copy if it is indirect or inherited | ||
| 68 | - // and return the copy. You should do this if you are going to | ||
| 69 | - // modify the returned object and want the modifications to apply | ||
| 70 | - // to the current page/form XObject only. | 59 | + // If copy_if_shared is true, then this method will replace the attribute with a shallow copy if |
| 60 | + // it is indirect or inherited and return the copy. You should do this if you are going to | ||
| 61 | + // modify the returned object and want the modifications to apply to the current page/form | ||
| 62 | + // XObject only. | ||
| 71 | QPDF_DLL | 63 | QPDF_DLL |
| 72 | QPDFObjectHandle getAttribute(std::string const& name, bool copy_if_shared); | 64 | QPDFObjectHandle getAttribute(std::string const& name, bool copy_if_shared); |
| 73 | 65 | ||
| 74 | // PAGE BOXES | 66 | // PAGE BOXES |
| 75 | // | 67 | // |
| 76 | - // Pages have various types of boundary boxes. These are described | ||
| 77 | - // in detail in the PDF specification (section 14.11.2 Page | ||
| 78 | - // boundaries). They are, by key in the page dictionary: | 68 | + // Pages have various types of boundary boxes. These are described in detail in the PDF |
| 69 | + // specification (section 14.11.2 Page boundaries). They are, by key in the page dictionary: | ||
| 79 | // | 70 | // |
| 80 | // * /MediaBox -- boundaries of physical page | 71 | // * /MediaBox -- boundaries of physical page |
| 81 | // * /CropBox -- clipping region of what is displayed | 72 | // * /CropBox -- clipping region of what is displayed |
| @@ -87,114 +78,90 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | @@ -87,114 +78,90 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | ||
| 87 | // fallback value for /CropBox is /MediaBox, and the fallback | 78 | // fallback value for /CropBox is /MediaBox, and the fallback |
| 88 | // values for the other boxes are /CropBox. | 79 | // values for the other boxes are /CropBox. |
| 89 | // | 80 | // |
| 90 | - // As noted above (PAGE ATTRIBUTES), /MediaBox and /CropBox can be | ||
| 91 | - // inherited from parent nodes in the pages tree. The other boxes | ||
| 92 | - // can't be inherited. | 81 | + // As noted above (PAGE ATTRIBUTES), /MediaBox and /CropBox can be inherited from parent nodes |
| 82 | + // in the pages tree. The other boxes can't be inherited. | ||
| 93 | // | 83 | // |
| 94 | - // When the comments below refer to the "effective value" of an | ||
| 95 | - // box, this takes into consideration both inheritance through the | ||
| 96 | - // pages tree (in the case of /MediaBox and /CropBox) and fallback | ||
| 97 | - // values for missing attributes (for all except /MediaBox). | 84 | + // When the comments below refer to the "effective value" of a box, this takes into |
| 85 | + // consideration both inheritance through the pages tree (in the case of /MediaBox and /CropBox) | ||
| 86 | + // and fallback values for missing attributes (for all except /MediaBox). | ||
| 98 | // | 87 | // |
| 99 | - // For the methods below, copy_if_shared is passed to getAttribute | ||
| 100 | - // and therefore refers only to indirect objects and values that | ||
| 101 | - // are inherited through the pages tree. | 88 | + // For the methods below, copy_if_shared is passed to getAttribute and therefore refers only to |
| 89 | + // indirect objects and values that are inherited through the pages tree. | ||
| 102 | // | 90 | // |
| 103 | - // If copy_if_fallback is true, a copy is made if the object's | ||
| 104 | - // value was obtained by falling back to a different box. | 91 | + // If copy_if_fallback is true, a copy is made if the object's value was obtained by falling |
| 92 | + // back to a different box. | ||
| 105 | // | 93 | // |
| 106 | - // The copy_if_shared and copy_if_fallback parameters carry across | ||
| 107 | - // multiple layers. This is explained below. | 94 | + // The copy_if_shared and copy_if_fallback parameters carry across multiple layers. This is |
| 95 | + // explained below. | ||
| 108 | // | 96 | // |
| 109 | - // You should set copy_if_shared to true if you want to modify a | ||
| 110 | - // bounding box for the current page without affecting other pages | ||
| 111 | - // but you don't want to change the fallback behavior. For | ||
| 112 | - // example, if you want to modify the /TrimBox for the current | ||
| 113 | - // page only but have it continue to fall back to the value of | ||
| 114 | - // /CropBox or /MediaBox if they are not defined, you could set | 97 | + // You should set copy_if_shared to true if you want to modify a bounding box for the current |
| 98 | + // page without affecting other pages but you don't want to change the fallback behavior. For | ||
| 99 | + // example, if you want to modify the /TrimBox for the current page only but have it continue to | ||
| 100 | + // fall back to the value of /CropBox or /MediaBox if they are not defined, you could set | ||
| 115 | // copy_if_shared to true. | 101 | // copy_if_shared to true. |
| 116 | // | 102 | // |
| 117 | - // You should set copy_if_fallback to true if you want to modify a | ||
| 118 | - // specific box as distinct from any other box. For example, if | ||
| 119 | - // you want to make /TrimBox differ from /CropBox, then you should | ||
| 120 | - // set copy_if_fallback to true. | 103 | + // You should set copy_if_fallback to true if you want to modify a specific box as distinct from |
| 104 | + // any other box. For example, if you want to make /TrimBox differ from /CropBox, then you | ||
| 105 | + // should set copy_if_fallback to true. | ||
| 121 | // | 106 | // |
| 122 | // The copy_if_fallback flags were added in qpdf 11. | 107 | // The copy_if_fallback flags were added in qpdf 11. |
| 123 | // | 108 | // |
| 124 | - // For example, suppose that neither /CropBox nor /TrimBox is | ||
| 125 | - // present on a page but /CropBox is present in the page's parent | ||
| 126 | - // node in the page tree. | 109 | + // For example, suppose that neither /CropBox nor /TrimBox is present on a page but /CropBox is |
| 110 | + // present in the page's parent node in the page tree. | ||
| 127 | // | 111 | // |
| 128 | - // * getTrimBox(false, false) would return the /CropBox from the | ||
| 129 | - // parent node. | 112 | + // * getTrimBox(false, false) would return the /CropBox from the parent node. |
| 130 | // | 113 | // |
| 131 | - // * getTrimBox(true, false) would make a shallow copy of the | ||
| 132 | - // /CropBox from the parent node into the current node and | ||
| 133 | - // return it. | 114 | + // * getTrimBox(true, false) would make a shallow copy of the /CropBox from the parent node into |
| 115 | + // the current node and return it. | ||
| 134 | // | 116 | // |
| 135 | - // * getTrimBox(false, true) would make a shallow copy of the | ||
| 136 | - // /CropBox from the parent node into /TrimBox of the current | ||
| 137 | - // node and return it. | 117 | + // * getTrimBox(false, true) would make a shallow copy of the /CropBox from the parent node into |
| 118 | + // /TrimBox of the current node and return it. | ||
| 138 | // | 119 | // |
| 139 | - // * getTrimBox(true, true) would make a shallow copy of the | ||
| 140 | - // /CropBox from the parent node into the current node, then | ||
| 141 | - // make a shallow copy of the resulting copy to /TrimBox of the | ||
| 142 | - // current node, and then return that. | 120 | + // * getTrimBox(true, true) would make a shallow copy of the /CropBox from the parent node into |
| 121 | + // the current node, then make a shallow copy of the resulting copy to /TrimBox of the current | ||
| 122 | + // node, and then return that. | ||
| 143 | // | 123 | // |
| 144 | - // To illustrate how these parameters carry across multiple | ||
| 145 | - // layers, suppose that neither /MediaBox, /CropBox, nor /TrimBox | ||
| 146 | - // is present on a page but /MediaBox is present on the parent. In | ||
| 147 | - // this case: | 124 | + // To illustrate how these parameters carry across multiple layers, suppose that neither |
| 125 | + // /MediaBox, /CropBox, nor /TrimBox is present on a page but /MediaBox is present on the | ||
| 126 | + // parent. In this case: | ||
| 148 | // | 127 | // |
| 149 | - // * getTrimBox(false, false) would return the value of /MediaBox | ||
| 150 | - // from the parent node. | 128 | + // * getTrimBox(false, false) would return the value of /MediaBox from the parent node. |
| 151 | // | 129 | // |
| 152 | - // * getTrimBox(true, false) would copy /MediaBox to the current | ||
| 153 | - // node and return it. | 130 | + // * getTrimBox(true, false) would copy /MediaBox to the current node and return it. |
| 154 | // | 131 | // |
| 155 | - // * getTrimBox(false, true) would first copy /MediaBox from the | ||
| 156 | - // parent to /CropBox, then copy /CropBox to /TrimBox, and then | ||
| 157 | - // return the result. | 132 | + // * getTrimBox(false, true) would first copy /MediaBox from the parent to /CropBox, then copy |
| 133 | + // /CropBox to /TrimBox, and then return the result. | ||
| 158 | // | 134 | // |
| 159 | - // * getTrimBox(true, true) would first copy /MediaBox from the | ||
| 160 | - // parent to the current page, then copy it to /CropBox, then | ||
| 161 | - // copy /CropBox to /TrimBox, and then return the result. | 135 | + // * getTrimBox(true, true) would first copy /MediaBox from the parent to the current page, then |
| 136 | + // copy it to /CropBox, then copy /CropBox to /TrimBox, and then return the result. | ||
| 162 | // | 137 | // |
| 163 | - // If you need different behavior, call getAttribute directly and | ||
| 164 | - // take care of your own copying. | 138 | + // If you need different behavior, call getAttribute directly and take care of your own copying. |
| 165 | 139 | ||
| 166 | // Return the effective MediaBox | 140 | // Return the effective MediaBox |
| 167 | QPDF_DLL | 141 | QPDF_DLL |
| 168 | QPDFObjectHandle getMediaBox(bool copy_if_shared = false); | 142 | QPDFObjectHandle getMediaBox(bool copy_if_shared = false); |
| 169 | 143 | ||
| 170 | - // Return the effective CropBox. If not defined, fall back to | ||
| 171 | - // MediaBox | 144 | + // Return the effective CropBox. If not defined, fall back to MediaBox |
| 172 | QPDF_DLL | 145 | QPDF_DLL |
| 173 | QPDFObjectHandle getCropBox(bool copy_if_shared = false, bool copy_if_fallback = false); | 146 | QPDFObjectHandle getCropBox(bool copy_if_shared = false, bool copy_if_fallback = false); |
| 174 | 147 | ||
| 175 | - // Return the effective BleedBox. If not defined, fall back to | ||
| 176 | - // CropBox. | 148 | + // Return the effective BleedBox. If not defined, fall back to CropBox. |
| 177 | QPDF_DLL | 149 | QPDF_DLL |
| 178 | QPDFObjectHandle getBleedBox(bool copy_if_shared = false, bool copy_if_fallback = false); | 150 | QPDFObjectHandle getBleedBox(bool copy_if_shared = false, bool copy_if_fallback = false); |
| 179 | 151 | ||
| 180 | - // Return the effective TrimBox. If not defined, fall back to | ||
| 181 | - // CropBox. | 152 | + // Return the effective TrimBox. If not defined, fall back to CropBox. |
| 182 | QPDF_DLL | 153 | QPDF_DLL |
| 183 | QPDFObjectHandle getTrimBox(bool copy_if_shared = false, bool copy_if_fallback = false); | 154 | QPDFObjectHandle getTrimBox(bool copy_if_shared = false, bool copy_if_fallback = false); |
| 184 | 155 | ||
| 185 | - // Return the effective ArtBox. If not defined, fall back to | ||
| 186 | - // CropBox. | 156 | + // Return the effective ArtBox. If not defined, fall back to CropBox. |
| 187 | QPDF_DLL | 157 | QPDF_DLL |
| 188 | QPDFObjectHandle getArtBox(bool copy_if_shared = false, bool copy_if_fallback = false); | 158 | QPDFObjectHandle getArtBox(bool copy_if_shared = false, bool copy_if_fallback = false); |
| 189 | 159 | ||
| 190 | - // Iterate through XObjects, possibly recursing into form | ||
| 191 | - // XObjects. This works with pages or form XObjects. Call action | ||
| 192 | - // on each XObject for which selector, if specified, returns true. | ||
| 193 | - // With no selector, calls action for every object. In addition to | ||
| 194 | - // the object being passed to action, the containing XObject | ||
| 195 | - // dictionary and key are passed in. Remember that the XObject | ||
| 196 | - // dictionary may be shared, and the object may appear in multiple | ||
| 197 | - // XObject dictionaries. | 160 | + // Iterate through XObjects, possibly recursing into form XObjects. This works with pages or |
| 161 | + // form XObjects. Call action on each XObject for which selector, if specified, returns true. | ||
| 162 | + // With no selector, calls action for every object. In addition to the object being passed to | ||
| 163 | + // action, the containing XObject dictionary and key are passed in. Remember that the XObject | ||
| 164 | + // dictionary may be shared, and the object may appear in multiple XObject dictionaries. | ||
| 198 | QPDF_DLL | 165 | QPDF_DLL |
| 199 | void forEachXObject( | 166 | void forEachXObject( |
| 200 | bool recursive, | 167 | bool recursive, |
| @@ -214,12 +181,10 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | @@ -214,12 +181,10 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | ||
| 214 | std::function<void( | 181 | std::function<void( |
| 215 | QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key)> action); | 182 | QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key)> action); |
| 216 | 183 | ||
| 217 | - // Returns an empty map if there are no images or no resources. | ||
| 218 | - // Prior to qpdf 8.4.0, this function did not support inherited | ||
| 219 | - // resources, but it does now. Return value is a map from XObject | ||
| 220 | - // name to the image object, which is always a stream. Works with | ||
| 221 | - // form XObjects as well as pages. This method does not recurse | ||
| 222 | - // into nested form XObjects. For that, use forEachImage. | 184 | + // Returns an empty map if there are no images or no resources. Prior to qpdf 8.4.0, this |
| 185 | + // function did not support inherited resources, but it does now. Return value is a map from | ||
| 186 | + // XObject name to the image object, which is always a stream. Works with form XObjects as well | ||
| 187 | + // as pages. This method does not recurse into nested form XObjects. For that, use forEachImage. | ||
| 223 | QPDF_DLL | 188 | QPDF_DLL |
| 224 | std::map<std::string, QPDFObjectHandle> getImages(); | 189 | std::map<std::string, QPDFObjectHandle> getImages(); |
| 225 | 190 | ||
| @@ -227,59 +192,48 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | @@ -227,59 +192,48 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | ||
| 227 | QPDF_DLL | 192 | QPDF_DLL |
| 228 | std::map<std::string, QPDFObjectHandle> getPageImages(); | 193 | std::map<std::string, QPDFObjectHandle> getPageImages(); |
| 229 | 194 | ||
| 230 | - // Returns an empty map if there are no form XObjects or no | ||
| 231 | - // resources. Otherwise, returns a map of keys to form XObjects | ||
| 232 | - // directly referenced from this page or form XObjects. This does | ||
| 233 | - // not recurse into nested form XObjects. For that, use | ||
| 234 | - // forEachFormXObject. | 195 | + // Returns an empty map if there are no form XObjects or no resources. Otherwise, returns a map |
| 196 | + // of keys to form XObjects directly referenced from this page or form XObjects. This does not | ||
| 197 | + // recurse into nested form XObjects. For that, use forEachFormXObject. | ||
| 235 | QPDF_DLL | 198 | QPDF_DLL |
| 236 | std::map<std::string, QPDFObjectHandle> getFormXObjects(); | 199 | std::map<std::string, QPDFObjectHandle> getFormXObjects(); |
| 237 | 200 | ||
| 238 | - // Converts each inline image to an external (normal) image if the | ||
| 239 | - // size is at least the specified number of bytes. This method | ||
| 240 | - // works with pages or form XObjects. By default, it recursively | ||
| 241 | - // processes nested form XObjects. Pass true as shallow to avoid | ||
| 242 | - // this behavior. Prior to qpdf 10.1, form XObjects were ignored, | ||
| 243 | - // but this was considered a bug. | 201 | + // Converts each inline image to an external (normal) image if the size is at least the |
| 202 | + // specified number of bytes. This method works with pages or form XObjects. By default, it | ||
| 203 | + // recursively processes nested form XObjects. Pass true as shallow to avoid this behavior. | ||
| 204 | + // Prior to qpdf 10.1, form XObjects were ignored, but this was considered a bug. | ||
| 244 | QPDF_DLL | 205 | QPDF_DLL |
| 245 | void externalizeInlineImages(size_t min_size = 0, bool shallow = false); | 206 | void externalizeInlineImages(size_t min_size = 0, bool shallow = false); |
| 246 | 207 | ||
| 247 | - // Return the annotations in the page's "/Annots" list, if any. If | ||
| 248 | - // only_subtype is non-empty, only include annotations of the | ||
| 249 | - // given subtype. | 208 | + // Return the annotations in the page's "/Annots" list, if any. If only_subtype is non-empty, |
| 209 | + // only include annotations of the given subtype. | ||
| 250 | QPDF_DLL | 210 | QPDF_DLL |
| 251 | std::vector<QPDFAnnotationObjectHelper> getAnnotations(std::string const& only_subtype = ""); | 211 | std::vector<QPDFAnnotationObjectHelper> getAnnotations(std::string const& only_subtype = ""); |
| 252 | 212 | ||
| 253 | - // Returns a vector of stream objects representing the content | ||
| 254 | - // streams for the given page. This routine allows the caller to | ||
| 255 | - // not care whether there are one or more than one content streams | 213 | + // Returns a vector of stream objects representing the content streams for the given page. This |
| 214 | + // routine allows the caller to not care whether there are one or more than one content streams | ||
| 256 | // for a page. | 215 | // for a page. |
| 257 | QPDF_DLL | 216 | QPDF_DLL |
| 258 | std::vector<QPDFObjectHandle> getPageContents(); | 217 | std::vector<QPDFObjectHandle> getPageContents(); |
| 259 | 218 | ||
| 260 | - // Add the given object as a new content stream for this page. If | ||
| 261 | - // parameter 'first' is true, add to the beginning. Otherwise, add | ||
| 262 | - // to the end. This routine automatically converts the page | ||
| 263 | - // contents to an array if it is a scalar, allowing the caller not | ||
| 264 | - // to care what the initial structure is. You can call | ||
| 265 | - // coalesceContentStreams() afterwards if you want to force it to | ||
| 266 | - // be a single stream. | 219 | + // Add the given object as a new content stream for this page. If parameter 'first' is true, add |
| 220 | + // to the beginning. Otherwise, add to the end. This routine automatically converts the page | ||
| 221 | + // contents to an array if it is a scalar, allowing the caller not to care what the initial | ||
| 222 | + // structure is. You can call coalesceContentStreams() afterwards if you want to force it to be | ||
| 223 | + // a single stream. | ||
| 267 | QPDF_DLL | 224 | QPDF_DLL |
| 268 | void addPageContents(QPDFObjectHandle contents, bool first); | 225 | void addPageContents(QPDFObjectHandle contents, bool first); |
| 269 | 226 | ||
| 270 | - // Rotate a page. If relative is false, set the rotation of the | ||
| 271 | - // page to angle. Otherwise, add angle to the rotation of the | ||
| 272 | - // page. Angle must be a multiple of 90. Adding 90 to the rotation | 227 | + // Rotate a page. If relative is false, set the rotation of the page to angle. Otherwise, add |
| 228 | + // angle to the rotation of the page. Angle must be a multiple of 90. Adding 90 to the rotation | ||
| 273 | // rotates clockwise by 90 degrees. | 229 | // rotates clockwise by 90 degrees. |
| 274 | QPDF_DLL | 230 | QPDF_DLL |
| 275 | void rotatePage(int angle, bool relative); | 231 | void rotatePage(int angle, bool relative); |
| 276 | 232 | ||
| 277 | - // Coalesce a page's content streams. A page's content may be a | ||
| 278 | - // stream or an array of streams. If this page's content is an | ||
| 279 | - // array, concatenate the streams into a single stream. This can | ||
| 280 | - // be useful when working with files that split content streams in | ||
| 281 | - // arbitrary spots, such as in the middle of a token, as that can | ||
| 282 | - // confuse some software. You could also call this after calling | 233 | + // Coalesce a page's content streams. A page's content may be a stream or an array of streams. |
| 234 | + // If this page's content is an array, concatenate the streams into a single stream. This can be | ||
| 235 | + // useful when working with files that split content streams in arbitrary spots, such as in the | ||
| 236 | + // middle of a token, as that can confuse some software. You could also call this after calling | ||
| 283 | // addPageContents. | 237 | // addPageContents. |
| 284 | QPDF_DLL | 238 | QPDF_DLL |
| 285 | void coalesceContentStreams(); | 239 | void coalesceContentStreams(); |
| @@ -288,25 +242,21 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | @@ -288,25 +242,21 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | ||
| 288 | // Content stream handling | 242 | // Content stream handling |
| 289 | // | 243 | // |
| 290 | 244 | ||
| 291 | - // Parse a page's contents through ParserCallbacks, described | ||
| 292 | - // above. This method works whether the contents are a single | ||
| 293 | - // stream or an array of streams. Call on a page object. Also | ||
| 294 | - // works for form XObjects. | 245 | + // Parse a page's contents through ParserCallbacks, described above. This method works whether |
| 246 | + // the contents are a single stream or an array of streams. Call on a page object. Also works | ||
| 247 | + // for form XObjects. | ||
| 295 | QPDF_DLL | 248 | QPDF_DLL |
| 296 | void parseContents(QPDFObjectHandle::ParserCallbacks* callbacks); | 249 | void parseContents(QPDFObjectHandle::ParserCallbacks* callbacks); |
| 297 | // Old name | 250 | // Old name |
| 298 | QPDF_DLL | 251 | QPDF_DLL |
| 299 | void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks); | 252 | void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks); |
| 300 | 253 | ||
| 301 | - // Pass a page's or form XObject's contents through the given | ||
| 302 | - // TokenFilter. If a pipeline is also provided, it will be the | ||
| 303 | - // target of the write methods from the token filter. If a | ||
| 304 | - // pipeline is not specified, any output generated by the token | ||
| 305 | - // filter will be discarded. Use this interface if you need to | ||
| 306 | - // pass a page's contents through filter for work purposes without | ||
| 307 | - // having that filter automatically applied to the page's | ||
| 308 | - // contents, as happens with addContentTokenFilter. See | ||
| 309 | - // examples/pdf-count-strings.cc for an example. | 254 | + // Pass a page's or form XObject's contents through the given TokenFilter. If a pipeline is also |
| 255 | + // provided, it will be the target of the write methods from the token filter. If a pipeline is | ||
| 256 | + // not specified, any output generated by the token filter will be discarded. Use this interface | ||
| 257 | + // if you need to pass a page's contents through filter for work purposes without having that | ||
| 258 | + // filter automatically applied to the page's contents, as happens with addContentTokenFilter. | ||
| 259 | + // See examples/pdf-count-strings.cc for an example. | ||
| 310 | QPDF_DLL | 260 | QPDF_DLL |
| 311 | void filterContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next = nullptr); | 261 | void filterContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next = nullptr); |
| 312 | 262 | ||
| @@ -314,95 +264,74 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | @@ -314,95 +264,74 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | ||
| 314 | QPDF_DLL | 264 | QPDF_DLL |
| 315 | void filterPageContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next = nullptr); | 265 | void filterPageContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next = nullptr); |
| 316 | 266 | ||
| 317 | - // Pipe a page's contents through the given pipeline. This method | ||
| 318 | - // works whether the contents are a single stream or an array of | ||
| 319 | - // streams. Also works on form XObjects. | 267 | + // Pipe a page's contents through the given pipeline. This method works whether the contents are |
| 268 | + // a single stream or an array of streams. Also works on form XObjects. | ||
| 320 | QPDF_DLL | 269 | QPDF_DLL |
| 321 | void pipeContents(Pipeline* p); | 270 | void pipeContents(Pipeline* p); |
| 322 | // Old name | 271 | // Old name |
| 323 | QPDF_DLL | 272 | QPDF_DLL |
| 324 | void pipePageContents(Pipeline* p); | 273 | void pipePageContents(Pipeline* p); |
| 325 | 274 | ||
| 326 | - // Attach a token filter to a page's contents. If the page's | ||
| 327 | - // contents is an array of streams, it is automatically coalesced. | ||
| 328 | - // The token filter is applied to the page's contents as a single | 275 | + // Attach a token filter to a page's contents. If the page's contents is an array of streams, it |
| 276 | + // is automatically coalesced. The token filter is applied to the page's contents as a single | ||
| 329 | // stream. Also works on form XObjects. | 277 | // stream. Also works on form XObjects. |
| 330 | QPDF_DLL | 278 | QPDF_DLL |
| 331 | void addContentTokenFilter(std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter); | 279 | void addContentTokenFilter(std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter); |
| 332 | 280 | ||
| 333 | - // A page's resources dictionary maps names to objects elsewhere | ||
| 334 | - // in the file. This method walks through a page's contents and | ||
| 335 | - // keeps tracks of which resources are referenced somewhere in the | ||
| 336 | - // contents. Then it removes from the resources dictionary any | ||
| 337 | - // object that is not referenced in the contents. This operation | ||
| 338 | - // is most useful after calling | ||
| 339 | - // QPDFPageDocumentHelper::pushInheritedAttributesToPage(). This | ||
| 340 | - // method is used by page splitting code to avoid copying unused | ||
| 341 | - // objects in files that used shared resource dictionaries across | ||
| 342 | - // multiple pages. This method recurses into form XObjects and can | ||
| 343 | - // be called with a form XObject as well as a page. | 281 | + // A page's resources dictionary maps names to objects elsewhere in the file. This method walks |
| 282 | + // through a page's contents and keeps tracks of which resources are referenced somewhere in the | ||
| 283 | + // contents. Then it removes from the resources dictionary any object that is not referenced in | ||
| 284 | + // the contents. This operation is most useful after calling | ||
| 285 | + // QPDFPageDocumentHelper::pushInheritedAttributesToPage(). This method is used by page | ||
| 286 | + // splitting code to avoid copying unused objects in files that used shared resource | ||
| 287 | + // dictionaries across multiple pages. This method recurses into form XObjects and can be called | ||
| 288 | + // with a form XObject as well as a page. | ||
| 344 | QPDF_DLL | 289 | QPDF_DLL |
| 345 | void removeUnreferencedResources(); | 290 | void removeUnreferencedResources(); |
| 346 | 291 | ||
| 347 | - // Return a new QPDFPageObjectHelper that is a duplicate of the | ||
| 348 | - // page. The returned object is an indirect object that is ready | ||
| 349 | - // to be inserted into the same or a different QPDF object using | ||
| 350 | - // any of the addPage methods in QPDFPageDocumentHelper or QPDF. | ||
| 351 | - // Without calling one of those methods, the page will not be | ||
| 352 | - // added anywhere. The new page object shares all content streams | ||
| 353 | - // and indirect object resources with the original page, so if you | ||
| 354 | - // are going to modify the contents or other aspects of the page, | ||
| 355 | - // you will need to handling copying of the component parts | ||
| 356 | - // separately. | 292 | + // Return a new QPDFPageObjectHelper that is a duplicate of the page. The returned object is an |
| 293 | + // indirect object that is ready to be inserted into the same or a different QPDF object using | ||
| 294 | + // any of the addPage methods in QPDFPageDocumentHelper or QPDF. Without calling one of those | ||
| 295 | + // methods, the page will not be added anywhere. The new page object shares all content streams | ||
| 296 | + // and indirect object resources with the original page, so if you are going to modify the | ||
| 297 | + // contents or other aspects of the page, you will need to handling copying of the component | ||
| 298 | + // parts separately. | ||
| 357 | QPDF_DLL | 299 | QPDF_DLL |
| 358 | QPDFPageObjectHelper shallowCopyPage(); | 300 | QPDFPageObjectHelper shallowCopyPage(); |
| 359 | 301 | ||
| 360 | - // Return a transformation matrix whose effect is the same as the | ||
| 361 | - // page's /Rotate and /UserUnit parameters. If invert is true, | ||
| 362 | - // return a matrix whose effect is the opposite. The regular | ||
| 363 | - // matrix is suitable for taking something from this page to put | ||
| 364 | - // elsewhere, and the second one is suitable for putting something | ||
| 365 | - // else onto this page. The page's TrimBox is used as the bounding | ||
| 366 | - // box for purposes of computing the matrix. | 302 | + // Return a transformation matrix whose effect is the same as the page's /Rotate and /UserUnit |
| 303 | + // parameters. If invert is true, return a matrix whose effect is the opposite. The regular | ||
| 304 | + // matrix is suitable for taking something from this page to put elsewhere, and the second one | ||
| 305 | + // is suitable for putting something else onto this page. The page's TrimBox is used as the | ||
| 306 | + // bounding box for purposes of computing the matrix. | ||
| 367 | QPDF_DLL | 307 | QPDF_DLL |
| 368 | QPDFObjectHandle::Matrix getMatrixForTransformations(bool invert = false); | 308 | QPDFObjectHandle::Matrix getMatrixForTransformations(bool invert = false); |
| 369 | 309 | ||
| 370 | - // Return a form XObject that draws this page. This is useful for | ||
| 371 | - // n-up operations, underlay, overlay, thumbnail generation, or | ||
| 372 | - // any other case in which it is useful to replicate the contents | ||
| 373 | - // of a page in some other context. The dictionaries are shallow | ||
| 374 | - // copies of the original page dictionary, and the contents are | ||
| 375 | - // coalesced from the page's contents. The resulting object handle | ||
| 376 | - // is not referenced anywhere. If handle_transformations is true, | ||
| 377 | - // the resulting form XObject's /Matrix will be set to replicate | ||
| 378 | - // rotation (/Rotate) and scaling (/UserUnit) in the page's | ||
| 379 | - // dictionary. In this way, the page's transformations will be | ||
| 380 | - // preserved when placing this object on another page. | 310 | + // Return a form XObject that draws this page. This is useful for n-up operations, underlay, |
| 311 | + // overlay, thumbnail generation, or any other case in which it is useful to replicate the | ||
| 312 | + // contents of a page in some other context. The dictionaries are shallow copies of the original | ||
| 313 | + // page dictionary, and the contents are coalesced from the page's contents. The resulting | ||
| 314 | + // object handle is not referenced anywhere. If handle_transformations is true, the resulting | ||
| 315 | + // form XObject's /Matrix will be set to replicate rotation (/Rotate) and scaling (/UserUnit) in | ||
| 316 | + // the page's dictionary. In this way, the page's transformations will be preserved when placing | ||
| 317 | + // this object on another page. | ||
| 381 | QPDF_DLL | 318 | QPDF_DLL |
| 382 | QPDFObjectHandle getFormXObjectForPage(bool handle_transformations = true); | 319 | QPDFObjectHandle getFormXObjectForPage(bool handle_transformations = true); |
| 383 | 320 | ||
| 384 | - // Return content stream text that will place the given form | ||
| 385 | - // XObject (fo) using the resource name "name" on this page | ||
| 386 | - // centered within the given rectangle. If invert_transformations | ||
| 387 | - // is true, the effect of any rotation (/Rotate) and scaling | ||
| 388 | - // (/UserUnit) applied to the current page will be inverted in the | ||
| 389 | - // form XObject placement. This will cause the form XObject's | ||
| 390 | - // absolute orientation to be preserved. You could overlay one | ||
| 391 | - // page on another by calling getFormXObjectForPage on the | ||
| 392 | - // original page, QPDFObjectHandle::getUniqueResourceName on the | ||
| 393 | - // destination page's Resources dictionary to generate a name for | ||
| 394 | - // the resulting object, and calling placeFormXObject on the | ||
| 395 | - // destination page. Then insert the new fo (or, if it comes from | ||
| 396 | - // a different file, the result of calling copyForeignObject on | ||
| 397 | - // it) into the resources dictionary using name, and append or | ||
| 398 | - // prepend the content to the page's content streams. See the | ||
| 399 | - // overlay/underlay code in qpdf.cc or | ||
| 400 | - // examples/pdf-overlay-page.cc for an example. From qpdf 10.0.0, | ||
| 401 | - // the allow_shrink and allow_expand parameters control whether | ||
| 402 | - // the form XObject is allowed to be shrunk or expanded to stay | ||
| 403 | - // within or maximally fill the destination rectangle. The default | ||
| 404 | - // values are for backward compatibility with the pre-10.0.0 | ||
| 405 | - // behavior. | 321 | + // Return content stream text that will place the given form XObject (fo) using the resource |
| 322 | + // name "name" on this page centered within the given rectangle. If invert_transformations is | ||
| 323 | + // true, the effect of any rotation (/Rotate) and scaling (/UserUnit) applied to the current | ||
| 324 | + // page will be inverted in the form XObject placement. This will cause the form XObject's | ||
| 325 | + // absolute orientation to be preserved. You could overlay one page on another by calling | ||
| 326 | + // getFormXObjectForPage on the original page, QPDFObjectHandle::getUniqueResourceName on the | ||
| 327 | + // destination page's Resources dictionary to generate a name for the resulting object, and | ||
| 328 | + // calling placeFormXObject on the destination page. Then insert the new fo (or, if it comes | ||
| 329 | + // from a different file, the result of calling copyForeignObject on it) into the resources | ||
| 330 | + // dictionary using name, and append or prepend the content to the page's content streams. See | ||
| 331 | + // the overlay/underlay code in qpdf.cc or examples/pdf-overlay-page.cc for an example. From | ||
| 332 | + // qpdf 10.0.0, the allow_shrink and allow_expand parameters control whether the form XObject is | ||
| 333 | + // allowed to be shrunk or expanded to stay within or maximally fill the destination rectangle. | ||
| 334 | + // The default values are for backward compatibility with the pre-10.0.0 behavior. | ||
| 406 | QPDF_DLL | 335 | QPDF_DLL |
| 407 | std::string placeFormXObject( | 336 | std::string placeFormXObject( |
| 408 | QPDFObjectHandle fo, | 337 | QPDFObjectHandle fo, |
| @@ -412,8 +341,7 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | @@ -412,8 +341,7 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | ||
| 412 | bool allow_shrink = true, | 341 | bool allow_shrink = true, |
| 413 | bool allow_expand = false); | 342 | bool allow_expand = false); |
| 414 | 343 | ||
| 415 | - // Alternative version that also fills in the transformation | ||
| 416 | - // matrix that was used. | 344 | + // Alternative version that also fills in the transformation matrix that was used. |
| 417 | QPDF_DLL | 345 | QPDF_DLL |
| 418 | std::string placeFormXObject( | 346 | std::string placeFormXObject( |
| 419 | QPDFObjectHandle fo, | 347 | QPDFObjectHandle fo, |
| @@ -424,10 +352,9 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | @@ -424,10 +352,9 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | ||
| 424 | bool allow_shrink = true, | 352 | bool allow_shrink = true, |
| 425 | bool allow_expand = false); | 353 | bool allow_expand = false); |
| 426 | 354 | ||
| 427 | - // Return the transformation matrix that translates from the given | ||
| 428 | - // form XObject's coordinate system into the given rectangular | ||
| 429 | - // region on the page. The parameters have the same meaning as for | ||
| 430 | - // placeFormXObject. | 355 | + // Return the transformation matrix that translates from the given form XObject's coordinate |
| 356 | + // system into the given rectangular region on the page. The parameters have the same meaning as | ||
| 357 | + // for placeFormXObject. | ||
| 431 | QPDF_DLL | 358 | QPDF_DLL |
| 432 | QPDFMatrix getMatrixForFormXObjectPlacement( | 359 | QPDFMatrix getMatrixForFormXObjectPlacement( |
| 433 | QPDFObjectHandle fo, | 360 | QPDFObjectHandle fo, |
| @@ -436,43 +363,32 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | @@ -436,43 +363,32 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | ||
| 436 | bool allow_shrink = true, | 363 | bool allow_shrink = true, |
| 437 | bool allow_expand = false); | 364 | bool allow_expand = false); |
| 438 | 365 | ||
| 439 | - // If a page is rotated using /Rotate in the page's dictionary, | ||
| 440 | - // instead rotate the page by the same amount by altering the | ||
| 441 | - // contents and removing the /Rotate key. This method adjusts the | ||
| 442 | - // various page bounding boxes (/MediaBox, etc.) so that the page | ||
| 443 | - // will have the same semantics. This can be useful to work around | ||
| 444 | - // problems with PDF applications that can't properly handle | ||
| 445 | - // rotated pages. If a QPDFAcroFormDocumentHelper is provided, it | ||
| 446 | - // will be used for resolving any form fields that have to be | ||
| 447 | - // rotated. If not, one will be created inside the function, which | 366 | + // If a page is rotated using /Rotate in the page's dictionary, instead rotate the page by the |
| 367 | + // same amount by altering the contents and removing the /Rotate key. This method adjusts the | ||
| 368 | + // various page bounding boxes (/MediaBox, etc.) so that the page will have the same semantics. | ||
| 369 | + // This can be useful to work around problems with PDF applications that can't properly handle | ||
| 370 | + // rotated pages. If a QPDFAcroFormDocumentHelper is provided, it will be used for resolving any | ||
| 371 | + // form fields that have to be rotated. If not, one will be created inside the function, which | ||
| 448 | // is less efficient. | 372 | // is less efficient. |
| 449 | QPDF_DLL | 373 | QPDF_DLL |
| 450 | void flattenRotation(QPDFAcroFormDocumentHelper* afdh = nullptr); | 374 | void flattenRotation(QPDFAcroFormDocumentHelper* afdh = nullptr); |
| 451 | 375 | ||
| 452 | - // Copy annotations from another page into this page. The other | ||
| 453 | - // page may be from the same QPDF or from a different QPDF. Each | ||
| 454 | - // annotation's rectangle is transformed by the given matrix. If | ||
| 455 | - // the annotation is a widget annotation that is associated with a | ||
| 456 | - // form field, the form field is copied into this document's | ||
| 457 | - // AcroForm dictionary as well. You can use this to copy | ||
| 458 | - // annotations from a page that was converted to a form XObject | ||
| 459 | - // and added to another page. For example of this, see | ||
| 460 | - // examples/pdf-overlay-page.cc. This method calls | ||
| 461 | - // QPDFAcroFormDocumentHelper::transformAnnotations, which will | ||
| 462 | - // copy annotations and form fields so that you can copy | ||
| 463 | - // annotations from a source page to any number of other pages, | ||
| 464 | - // even with different matrices, and maintain independence from | ||
| 465 | - // the original annotations. See also | ||
| 466 | - // QPDFAcroFormDocumentHelper::fixCopiedAnnotations, which can be | ||
| 467 | - // used if you copy a page and want to repair the annotations on | ||
| 468 | - // the destination page to make them independent from the original | ||
| 469 | - // page's annotations. | 376 | + // Copy annotations from another page into this page. The other page may be from the same QPDF |
| 377 | + // or from a different QPDF. Each annotation's rectangle is transformed by the given matrix. If | ||
| 378 | + // the annotation is a widget annotation that is associated with a form field, the form field is | ||
| 379 | + // copied into this document's AcroForm dictionary as well. You can use this to copy annotations | ||
| 380 | + // from a page that was converted to a form XObject and added to another page. For example of | ||
| 381 | + // this, see examples/pdf-overlay-page.cc. This method calls | ||
| 382 | + // QPDFAcroFormDocumentHelper::transformAnnotations, which will copy annotations and form fields | ||
| 383 | + // so that you can copy annotations from a source page to any number of other pages, even with | ||
| 384 | + // different matrices, and maintain independence from the original annotations. See also | ||
| 385 | + // QPDFAcroFormDocumentHelper::fixCopiedAnnotations, which can be used if you copy a page and | ||
| 386 | + // want to repair the annotations on the destination page to make them independent from the | ||
| 387 | + // original page's annotations. | ||
| 470 | // | 388 | // |
| 471 | - // If you pass in a QPDFAcroFormDocumentHelper*, the method will | ||
| 472 | - // use that instead of creating one in the function. Creating | ||
| 473 | - // QPDFAcroFormDocumentHelper objects is expensive, so if you're | ||
| 474 | - // doing a lot of copying, it can be more efficient to create | ||
| 475 | - // these outside and pass them in. | 389 | + // If you pass in a QPDFAcroFormDocumentHelper*, the method will use that instead of creating |
| 390 | + // one in the function. Creating QPDFAcroFormDocumentHelper objects is expensive, so if you're | ||
| 391 | + // doing a lot of copying, it can be more efficient to create these outside and pass them in. | ||
| 476 | QPDF_DLL | 392 | QPDF_DLL |
| 477 | void copyAnnotations( | 393 | void copyAnnotations( |
| 478 | QPDFPageObjectHelper from_page, | 394 | QPDFPageObjectHelper from_page, |
include/qpdf/QPDFTokenizer.hh
| @@ -2,22 +2,19 @@ | @@ -2,22 +2,19 @@ | ||
| 2 | // | 2 | // |
| 3 | // This file is part of qpdf. | 3 | // This file is part of qpdf. |
| 4 | // | 4 | // |
| 5 | -// Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 6 | -// you may not use this file except in compliance with the License. | ||
| 7 | -// You may obtain a copy of the License at | 5 | +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
| 6 | +// in compliance with the License. You may obtain a copy of the License at | ||
| 8 | // | 7 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 | 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // | 9 | // |
| 11 | -// Unless required by applicable law or agreed to in writing, software | ||
| 12 | -// distributed under the License is distributed on an "AS IS" BASIS, | ||
| 13 | -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 14 | -// See the License for the specific language governing permissions and | ||
| 15 | -// limitations under the License. | 10 | +// Unless required by applicable law or agreed to in writing, software distributed under the License |
| 11 | +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
| 12 | +// or implied. See the License for the specific language governing permissions and limitations under | ||
| 13 | +// the License. | ||
| 16 | // | 14 | // |
| 17 | -// Versions of qpdf prior to version 7 were released under the terms | ||
| 18 | -// of version 2.0 of the Artistic License. At your option, you may | ||
| 19 | -// continue to consider qpdf to be licensed under those terms. Please | ||
| 20 | -// see the manual for additional information. | 15 | +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic |
| 16 | +// License. At your option, you may continue to consider qpdf to be licensed under those terms. | ||
| 17 | +// Please see the manual for additional information. | ||
| 21 | 18 | ||
| 22 | #ifndef QPDFTOKENIZER_HH | 19 | #ifndef QPDFTOKENIZER_HH |
| 23 | #define QPDFTOKENIZER_HH | 20 | #define QPDFTOKENIZER_HH |
| @@ -34,9 +31,8 @@ | @@ -34,9 +31,8 @@ | ||
| 34 | class QPDFTokenizer | 31 | class QPDFTokenizer |
| 35 | { | 32 | { |
| 36 | public: | 33 | public: |
| 37 | - // Token type tt_eof is only returned of allowEOF() is called on | ||
| 38 | - // the tokenizer. tt_eof was introduced in QPDF version 4.1. | ||
| 39 | - // tt_space, tt_comment, and tt_inline_image were added in QPDF | 34 | + // Token type tt_eof is only returned of allowEOF() is called on the tokenizer. tt_eof was |
| 35 | + // introduced in QPDF version 4.1. tt_space, tt_comment, and tt_inline_image were added in QPDF | ||
| 40 | // version 8. | 36 | // version 8. |
| 41 | enum token_type_e { | 37 | enum token_type_e { |
| 42 | tt_bad, | 38 | tt_bad, |
| @@ -132,72 +128,65 @@ class QPDFTokenizer | @@ -132,72 +128,65 @@ class QPDFTokenizer | ||
| 132 | QPDF_DLL | 128 | QPDF_DLL |
| 133 | QPDFTokenizer(); | 129 | QPDFTokenizer(); |
| 134 | 130 | ||
| 135 | - // If called, treat EOF as a separate token type instead of an | ||
| 136 | - // error. This was introduced in QPDF 4.1 to facilitate | ||
| 137 | - // tokenizing content streams. | 131 | + // If called, treat EOF as a separate token type instead of an error. This was introduced in |
| 132 | + // QPDF 4.1 to facilitate tokenizing content streams. | ||
| 138 | QPDF_DLL | 133 | QPDF_DLL |
| 139 | void allowEOF(); | 134 | void allowEOF(); |
| 140 | 135 | ||
| 141 | - // If called, readToken will return "ignorable" tokens for space | ||
| 142 | - // and comments. This was added in QPDF 8. | 136 | + // If called, readToken will return "ignorable" tokens for space and comments. This was added in |
| 137 | + // QPDF 8. | ||
| 143 | QPDF_DLL | 138 | QPDF_DLL |
| 144 | void includeIgnorable(); | 139 | void includeIgnorable(); |
| 145 | 140 | ||
| 146 | - // There are two modes of operation: push and pull. The pull | ||
| 147 | - // method is easier but requires an input source. The push method | ||
| 148 | - // is more complicated but can be used to tokenize a stream of | 141 | + // There are two modes of operation: push and pull. The pull method is easier but requires an |
| 142 | + // input source. The push method is more complicated but can be used to tokenize a stream of | ||
| 149 | // incoming characters in a pipeline. | 143 | // incoming characters in a pipeline. |
| 150 | 144 | ||
| 151 | // Push mode: | 145 | // Push mode: |
| 152 | 146 | ||
| 153 | - // Keep presenting characters with presentCharacter() and | ||
| 154 | - // presentEOF() and calling getToken() until getToken() returns | ||
| 155 | - // true. When it does, be sure to check unread_ch and to unread ch | ||
| 156 | - // if it is true. | 147 | + // Keep presenting characters with presentCharacter() and presentEOF() and calling getToken() |
| 148 | + // until getToken() returns true. When it does, be sure to check unread_ch and to unread ch if | ||
| 149 | + // it is true. | ||
| 157 | 150 | ||
| 158 | - // It these are called when a token is available, an exception | ||
| 159 | - // will be thrown. | 151 | + // It these are called when a token is available, an exception will be thrown. |
| 160 | QPDF_DLL | 152 | QPDF_DLL |
| 161 | void presentCharacter(char ch); | 153 | void presentCharacter(char ch); |
| 162 | QPDF_DLL | 154 | QPDF_DLL |
| 163 | void presentEOF(); | 155 | void presentEOF(); |
| 164 | 156 | ||
| 165 | - // If a token is available, return true and initialize token with | ||
| 166 | - // the token, unread_char with whether or not we have to unread | ||
| 167 | - // the last character, and if unread_char, ch with the character | ||
| 168 | - // to unread. | 157 | + // If a token is available, return true and initialize token with the token, unread_char with |
| 158 | + // whether or not we have to unread the last character, and if unread_char, ch with the | ||
| 159 | + // character to unread. | ||
| 169 | QPDF_DLL | 160 | QPDF_DLL |
| 170 | bool getToken(Token& token, bool& unread_char, char& ch); | 161 | bool getToken(Token& token, bool& unread_char, char& ch); |
| 171 | 162 | ||
| 172 | - // This function returns true of the current character is between | ||
| 173 | - // tokens (i.e., white space that is not part of a string) or is | ||
| 174 | - // part of a comment. A tokenizing filter can call this to | 163 | + // This function returns true of the current character is between tokens (i.e., white space that |
| 164 | + // is not part of a string) or is part of a comment. A tokenizing filter can call this to | ||
| 175 | // determine whether to output the character. | 165 | // determine whether to output the character. |
| 176 | QPDF_DLL | 166 | QPDF_DLL |
| 177 | bool betweenTokens(); | 167 | bool betweenTokens(); |
| 178 | 168 | ||
| 179 | // Pull mode: | 169 | // Pull mode: |
| 180 | 170 | ||
| 181 | - // Read a token from an input source. Context describes the | ||
| 182 | - // context in which the token is being read and is used in the | ||
| 183 | - // exception thrown if there is an error. After a token is read, | ||
| 184 | - // the position of the input source returned by input->tell() | ||
| 185 | - // points to just after the token, and the input source's "last | ||
| 186 | - // offset" as returned by input->getLastOffset() points to the | 171 | + // Read a token from an input source. Context describes the context in which the token is being |
| 172 | + // read and is used in the exception thrown if there is an error. After a token is read, the | ||
| 173 | + // position of the input source returned by input->tell() points to just after the token, and | ||
| 174 | + // the input source's "last offset" as returned by input->getLastOffset() points to the | ||
| 187 | // beginning of the token. | 175 | // beginning of the token. |
| 188 | QPDF_DLL | 176 | QPDF_DLL |
| 189 | Token readToken( | 177 | Token readToken( |
| 178 | + InputSource& input, std::string const& context, bool allow_bad = false, size_t max_len = 0); | ||
| 179 | + QPDF_DLL | ||
| 180 | + Token readToken( | ||
| 190 | std::shared_ptr<InputSource> input, | 181 | std::shared_ptr<InputSource> input, |
| 191 | std::string const& context, | 182 | std::string const& context, |
| 192 | bool allow_bad = false, | 183 | bool allow_bad = false, |
| 193 | size_t max_len = 0); | 184 | size_t max_len = 0); |
| 194 | 185 | ||
| 195 | - // Calling this method puts the tokenizer in a state for reading | ||
| 196 | - // inline images. You should call this method after reading the | ||
| 197 | - // character following the ID operator. In that state, it will | ||
| 198 | - // return all data up to BUT NOT INCLUDING the next EI token. | ||
| 199 | - // After you call this method, the next call to readToken (or the | ||
| 200 | - // token created next time getToken returns true) will either be | 186 | + // Calling this method puts the tokenizer in a state for reading inline images. You should call |
| 187 | + // this method after reading the character following the ID operator. In that state, it will | ||
| 188 | + // return all data up to BUT NOT INCLUDING the next EI token. After you call this method, the | ||
| 189 | + // next call to readToken (or the token created next time getToken returns true) will either be | ||
| 201 | // tt_inline_image or tt_bad. This is the only way readToken | 190 | // tt_inline_image or tt_bad. This is the only way readToken |
| 202 | // returns a tt_inline_image token. | 191 | // returns a tt_inline_image token. |
| 203 | QPDF_DLL | 192 | QPDF_DLL |
| @@ -206,21 +195,18 @@ class QPDFTokenizer | @@ -206,21 +195,18 @@ class QPDFTokenizer | ||
| 206 | private: | 195 | private: |
| 207 | friend class QPDFParser; | 196 | friend class QPDFParser; |
| 208 | 197 | ||
| 209 | - // Read a token from an input source. Context describes the | ||
| 210 | - // context in which the token is being read and is used in the | ||
| 211 | - // exception thrown if there is an error. After a token is read, | ||
| 212 | - // the position of the input source returned by input->tell() | ||
| 213 | - // points to just after the token, and the input source's "last | ||
| 214 | - // offset" as returned by input->getLastOffset() points to the | ||
| 215 | - // beginning of the token. Returns false if the token is bad | ||
| 216 | - // or if scanning produced an error message for any reason. | 198 | + // Read a token from an input source. Context describes the context in which the token is being |
| 199 | + // read and is used in the exception thrown if there is an error. After a token is read, the | ||
| 200 | + // position of the input source returned by input->tell() points to just after the token, and | ||
| 201 | + // the input source's "last offset" as returned by input->getLastOffset() points to the | ||
| 202 | + // beginning of the token. Returns false if the token is bad or if scanning produced an error | ||
| 203 | + // message for any reason. | ||
| 217 | 204 | ||
| 218 | bool nextToken(InputSource& input, std::string const& context, size_t max_len = 0); | 205 | bool nextToken(InputSource& input, std::string const& context, size_t max_len = 0); |
| 219 | 206 | ||
| 220 | - // The following methods are only valid after nextToken has been called | ||
| 221 | - // and until another QPDFTokenizer method is called. They allow the results | ||
| 222 | - // of calling nextToken to be accessed without creating a Token, thus | ||
| 223 | - // avoiding copying information that may not be needed. | 207 | + // The following methods are only valid after nextToken has been called and until another |
| 208 | + // QPDFTokenizer method is called. They allow the results of calling nextToken to be accessed | ||
| 209 | + // without creating a Token, thus avoiding copying information that may not be needed. | ||
| 224 | inline token_type_e getType() const noexcept; | 210 | inline token_type_e getType() const noexcept; |
| 225 | inline std::string const& getValue() const noexcept; | 211 | inline std::string const& getValue() const noexcept; |
| 226 | inline std::string const& getRawValue() const noexcept; | 212 | inline std::string const& getRawValue() const noexcept; |
libqpdf/JSON.cc
| @@ -218,13 +218,12 @@ JSON::encode_string(std::string const& str) | @@ -218,13 +218,12 @@ JSON::encode_string(std::string const& str) | ||
| 218 | while (iter != end) { | 218 | while (iter != end) { |
| 219 | auto c = static_cast<unsigned char>(*iter); | 219 | auto c = static_cast<unsigned char>(*iter); |
| 220 | if ((c > 34 && c != '\\') || c == ' ' || c == 33) { | 220 | if ((c > 34 && c != '\\') || c == ' ' || c == 33) { |
| 221 | - // Optimistically check that no char in str requires escaping. | ||
| 222 | - // Hopefully we can just return the input str. | 221 | + // Optimistically check that no char in str requires escaping. Hopefully we can just |
| 222 | + // return the input str. | ||
| 223 | ++iter; | 223 | ++iter; |
| 224 | } else { | 224 | } else { |
| 225 | - // We found a char that requires escaping. Initialize result to the | ||
| 226 | - // chars scanned so far, append/replace the rest of str one char at | ||
| 227 | - // a time, and return the result. | 225 | + // We found a char that requires escaping. Initialize result to the chars scanned so |
| 226 | + // far, append/replace the rest of str one char at a time, and return the result. | ||
| 228 | std::string result{begin, iter}; | 227 | std::string result{begin, iter}; |
| 229 | 228 | ||
| 230 | for (; iter != end; ++iter) { | 229 | for (; iter != end; ++iter) { |
| @@ -532,12 +531,10 @@ JSON::checkSchemaInternal( | @@ -532,12 +531,10 @@ JSON::checkSchemaInternal( | ||
| 532 | } else if (sch_arr) { | 531 | } else if (sch_arr) { |
| 533 | auto n_elements = sch_arr->elements.size(); | 532 | auto n_elements = sch_arr->elements.size(); |
| 534 | if (n_elements == 1) { | 533 | if (n_elements == 1) { |
| 535 | - // A single-element array in the schema allows a single | ||
| 536 | - // element in the object or a variable-length array, each | ||
| 537 | - // of whose items must conform to the single element of | ||
| 538 | - // the schema array. This doesn't apply to arrays of | ||
| 539 | - // arrays -- we fall back to the behavior of allowing a | ||
| 540 | - // single item only when the object is not an array. | 534 | + // A single-element array in the schema allows a single element in the object or a |
| 535 | + // variable-length array, each of whose items must conform to the single element of the | ||
| 536 | + // schema array. This doesn't apply to arrays of arrays -- we fall back to the behavior | ||
| 537 | + // of allowing a single item only when the object is not an array. | ||
| 541 | if (this_arr) { | 538 | if (this_arr) { |
| 542 | int i = 0; | 539 | int i = 0; |
| 543 | for (auto const& element: this_arr->elements) { | 540 | for (auto const& element: this_arr->elements) { |
| @@ -560,10 +557,9 @@ JSON::checkSchemaInternal( | @@ -560,10 +557,9 @@ JSON::checkSchemaInternal( | ||
| 560 | err_prefix + " is supposed to be an array of length " + std::to_string(n_elements)); | 557 | err_prefix + " is supposed to be an array of length " + std::to_string(n_elements)); |
| 561 | return false; | 558 | return false; |
| 562 | } else { | 559 | } else { |
| 563 | - // A multi-element array in the schema must correspond to | ||
| 564 | - // an element of the same length in the object. Each | ||
| 565 | - // element in the object is validated against the | ||
| 566 | - // corresponding element in the schema. | 560 | + // A multi-element array in the schema must correspond to an element of the same length |
| 561 | + // in the object. Each element in the object is validated against the corresponding | ||
| 562 | + // element in the schema. | ||
| 567 | size_t i = 0; | 563 | size_t i = 0; |
| 568 | for (auto const& element: this_arr->elements) { | 564 | for (auto const& element: this_arr->elements) { |
| 569 | checkSchemaInternal( | 565 | checkSchemaInternal( |
| @@ -701,8 +697,7 @@ JSONParser::handle_u_code( | @@ -701,8 +697,7 @@ JSONParser::handle_u_code( | ||
| 701 | QTC::TC("libtests", "JSON 16 high high"); | 697 | QTC::TC("libtests", "JSON 16 high high"); |
| 702 | throw std::runtime_error( | 698 | throw std::runtime_error( |
| 703 | "JSON: offset " + std::to_string(new_high_offset) + | 699 | "JSON: offset " + std::to_string(new_high_offset) + |
| 704 | - ": UTF-16 high surrogate found after previous high surrogate" | ||
| 705 | - " at offset " + | 700 | + ": UTF-16 high surrogate found after previous high surrogate at offset " + |
| 706 | std::to_string(high_offset)); | 701 | std::to_string(high_offset)); |
| 707 | } | 702 | } |
| 708 | high_offset = new_high_offset; | 703 | high_offset = new_high_offset; |
| @@ -713,8 +708,7 @@ JSONParser::handle_u_code( | @@ -713,8 +708,7 @@ JSONParser::handle_u_code( | ||
| 713 | QTC::TC("libtests", "JSON 16 low not after high"); | 708 | QTC::TC("libtests", "JSON 16 low not after high"); |
| 714 | throw std::runtime_error( | 709 | throw std::runtime_error( |
| 715 | "JSON: offset " + std::to_string(offset) + | 710 | "JSON: offset " + std::to_string(offset) + |
| 716 | - ": UTF-16 low surrogate found not immediately after high" | ||
| 717 | - " surrogate"); | 711 | + ": UTF-16 low surrogate found not immediately after high surrogate"); |
| 718 | } | 712 | } |
| 719 | high_offset = 0; | 713 | high_offset = 0; |
| 720 | codepoint = 0x10000U + ((high_surrogate & 0x3FFU) << 10U) + (codepoint & 0x3FF); | 714 | codepoint = 0x10000U + ((high_surrogate & 0x3FFU) << 10U) + (codepoint & 0x3FF); |
| @@ -797,8 +791,8 @@ JSONParser::append() | @@ -797,8 +791,8 @@ JSONParser::append() | ||
| 797 | ++offset; | 791 | ++offset; |
| 798 | } | 792 | } |
| 799 | 793 | ||
| 800 | -// Append current character to token, advance to next input character and | ||
| 801 | -// transition to 'next' lexer state. | 794 | +// Append current character to token, advance to next input character and transition to 'next' lexer |
| 795 | +// state. | ||
| 802 | inline void | 796 | inline void |
| 803 | JSONParser::append(lex_state_e next) | 797 | JSONParser::append(lex_state_e next) |
| 804 | { | 798 | { |
| @@ -808,8 +802,7 @@ JSONParser::append(lex_state_e next) | @@ -808,8 +802,7 @@ JSONParser::append(lex_state_e next) | ||
| 808 | ++offset; | 802 | ++offset; |
| 809 | } | 803 | } |
| 810 | 804 | ||
| 811 | -// Advance to next input character without appending the current character to | ||
| 812 | -// token. | 805 | +// Advance to next input character without appending the current character to token. |
| 813 | inline void | 806 | inline void |
| 814 | JSONParser::ignore() | 807 | JSONParser::ignore() |
| 815 | { | 808 | { |
| @@ -817,8 +810,8 @@ JSONParser::ignore() | @@ -817,8 +810,8 @@ JSONParser::ignore() | ||
| 817 | ++offset; | 810 | ++offset; |
| 818 | } | 811 | } |
| 819 | 812 | ||
| 820 | -// Advance to next input character without appending the current character to | ||
| 821 | -// token and transition to 'next' lexer state. | 813 | +// Advance to next input character without appending the current character to token and transition |
| 814 | +// to 'next' lexer state. | ||
| 822 | inline void | 815 | inline void |
| 823 | JSONParser::ignore(lex_state_e next) | 816 | JSONParser::ignore(lex_state_e next) |
| 824 | { | 817 | { |
| @@ -848,9 +841,8 @@ JSONParser::getToken() | @@ -848,9 +841,8 @@ JSONParser::getToken() | ||
| 848 | 841 | ||
| 849 | if ((*p < 32 && *p >= 0)) { | 842 | if ((*p < 32 && *p >= 0)) { |
| 850 | if (*p == '\t' || *p == '\n' || *p == '\r') { | 843 | if (*p == '\t' || *p == '\n' || *p == '\r') { |
| 851 | - // Legal white space not permitted in strings. This will always | ||
| 852 | - // end the current token (unless we are still before the start | ||
| 853 | - // of the token). | 844 | + // Legal white space not permitted in strings. This will always end the current |
| 845 | + // token (unless we are still before the start of the token). | ||
| 854 | if (lex_state == ls_top) { | 846 | if (lex_state == ls_top) { |
| 855 | ignore(); | 847 | ignore(); |
| 856 | } else { | 848 | } else { |
| @@ -1044,8 +1036,7 @@ JSONParser::getToken() | @@ -1044,8 +1036,7 @@ JSONParser::getToken() | ||
| 1044 | QTC::TC("libtests", "JSON 16 dangling high"); | 1036 | QTC::TC("libtests", "JSON 16 dangling high"); |
| 1045 | throw std::runtime_error( | 1037 | throw std::runtime_error( |
| 1046 | "JSON: offset " + std::to_string(high_offset) + | 1038 | "JSON: offset " + std::to_string(high_offset) + |
| 1047 | - ": UTF-16 high surrogate not followed by low " | ||
| 1048 | - "surrogate"); | 1039 | + ": UTF-16 high surrogate not followed by low surrogate"); |
| 1049 | } | 1040 | } |
| 1050 | ignore(); | 1041 | ignore(); |
| 1051 | return; | 1042 | return; |
| @@ -1062,8 +1053,7 @@ JSONParser::getToken() | @@ -1062,8 +1053,7 @@ JSONParser::getToken() | ||
| 1062 | case '\\': | 1053 | case '\\': |
| 1063 | case '\"': | 1054 | case '\"': |
| 1064 | case '/': | 1055 | case '/': |
| 1065 | - // \/ is allowed in json input, but so is /, so we | ||
| 1066 | - // don't map / to \/ in output. | 1056 | + // \/ is allowed in json input, but so is /, so we don't map / to \/ in output. |
| 1067 | token += *p; | 1057 | token += *p; |
| 1068 | break; | 1058 | break; |
| 1069 | case 'b': | 1059 | case 'b': |
| @@ -1113,8 +1103,8 @@ JSONParser::getToken() | @@ -1113,8 +1103,8 @@ JSONParser::getToken() | ||
| 1113 | } | 1103 | } |
| 1114 | } | 1104 | } |
| 1115 | 1105 | ||
| 1116 | - // We only get here if on end of input or if the last character was a | ||
| 1117 | - // control character or other delimiter. | 1106 | + // We only get here if on end of input or if the last character was a control character or other |
| 1107 | + // delimiter. | ||
| 1118 | 1108 | ||
| 1119 | if (!token.empty()) { | 1109 | if (!token.empty()) { |
| 1120 | switch (lex_state) { | 1110 | switch (lex_state) { |
| @@ -1189,8 +1179,7 @@ JSONParser::handleToken() | @@ -1189,8 +1179,7 @@ JSONParser::handleToken() | ||
| 1189 | } else if (parser_state == ps_array_after_item) { | 1179 | } else if (parser_state == ps_array_after_item) { |
| 1190 | parser_state = ps_array_after_comma; | 1180 | parser_state = ps_array_after_comma; |
| 1191 | } else { | 1181 | } else { |
| 1192 | - throw std::logic_error("JSONParser::handleToken: unexpected parser" | ||
| 1193 | - " state for comma"); | 1182 | + throw std::logic_error("JSONParser::handleToken: unexpected parser state for comma"); |
| 1194 | } | 1183 | } |
| 1195 | return; | 1184 | return; |
| 1196 | 1185 | ||
| @@ -1323,10 +1312,9 @@ JSONParser::handleToken() | @@ -1323,10 +1312,9 @@ JSONParser::handleToken() | ||
| 1323 | 1312 | ||
| 1324 | if (item.isDictionary() || item.isArray()) { | 1313 | if (item.isDictionary() || item.isArray()) { |
| 1325 | stack.push_back({parser_state, item}); | 1314 | stack.push_back({parser_state, item}); |
| 1326 | - // Calling container start method is postponed until after | ||
| 1327 | - // adding the containers to their parent containers, if any. | ||
| 1328 | - // This makes it much easier to keep track of the current | ||
| 1329 | - // nesting level. | 1315 | + // Calling container start method is postponed until after adding the containers to their |
| 1316 | + // parent containers, if any. This makes it much easier to keep track of the current nesting | ||
| 1317 | + // level. | ||
| 1330 | if (item.isDictionary()) { | 1318 | if (item.isDictionary()) { |
| 1331 | if (reactor) { | 1319 | if (reactor) { |
| 1332 | reactor->dictionaryStart(); | 1320 | reactor->dictionaryStart(); |
libqpdf/Pl_Buffer.cc
| @@ -13,8 +13,7 @@ Pl_Buffer::Pl_Buffer(char const* identifier, Pipeline* next) : | @@ -13,8 +13,7 @@ Pl_Buffer::Pl_Buffer(char const* identifier, Pipeline* next) : | ||
| 13 | 13 | ||
| 14 | Pl_Buffer::~Pl_Buffer() | 14 | Pl_Buffer::~Pl_Buffer() |
| 15 | { | 15 | { |
| 16 | - // Must be explicit and not inline -- see QPDF_DLL_CLASS in | ||
| 17 | - // README-maintainer | 16 | + // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer |
| 18 | } | 17 | } |
| 19 | 18 | ||
| 20 | void | 19 | void |
libqpdf/QPDF.cc
| @@ -32,8 +32,8 @@ | @@ -32,8 +32,8 @@ | ||
| 32 | #include <qpdf/QTC.hh> | 32 | #include <qpdf/QTC.hh> |
| 33 | #include <qpdf/QUtil.hh> | 33 | #include <qpdf/QUtil.hh> |
| 34 | 34 | ||
| 35 | -// This must be a fixed value. This API returns a const reference to | ||
| 36 | -// it, and the C API relies on its being static as well. | 35 | +// This must be a fixed value. This API returns a const reference to it, and the C API relies on its |
| 36 | +// being static as well. | ||
| 37 | std::string const QPDF::qpdf_version(QPDF_VERSION); | 37 | std::string const QPDF::qpdf_version(QPDF_VERSION); |
| 38 | 38 | ||
| 39 | static char const* EMPTY_PDF = ( | 39 | static char const* EMPTY_PDF = ( |
| @@ -212,33 +212,26 @@ QPDF::QPDF() : | @@ -212,33 +212,26 @@ QPDF::QPDF() : | ||
| 212 | m(new Members()) | 212 | m(new Members()) |
| 213 | { | 213 | { |
| 214 | m->tokenizer.allowEOF(); | 214 | m->tokenizer.allowEOF(); |
| 215 | - // Generate a unique ID. It just has to be unique among all QPDF | ||
| 216 | - // objects allocated throughout the lifetime of this running | ||
| 217 | - // application. | 215 | + // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout |
| 216 | + // the lifetime of this running application. | ||
| 218 | static std::atomic<unsigned long long> unique_id{0}; | 217 | static std::atomic<unsigned long long> unique_id{0}; |
| 219 | m->unique_id = unique_id.fetch_add(1ULL); | 218 | m->unique_id = unique_id.fetch_add(1ULL); |
| 220 | } | 219 | } |
| 221 | 220 | ||
| 222 | QPDF::~QPDF() | 221 | QPDF::~QPDF() |
| 223 | { | 222 | { |
| 224 | - // If two objects are mutually referential (through each object | ||
| 225 | - // having an array or dictionary that contains an indirect | ||
| 226 | - // reference to the other), the circular references in the | ||
| 227 | - // std::shared_ptr objects will prevent the objects from being | ||
| 228 | - // deleted. Walk through all objects in the object cache, which is | ||
| 229 | - // those objects that we read from the file, and break all | ||
| 230 | - // resolved indirect references by replacing them with an internal | ||
| 231 | - // object type representing that they have been destroyed. Note | ||
| 232 | - // that we can't break references like this at any time when the | ||
| 233 | - // QPDF object is active. The call to reset also causes all direct | ||
| 234 | - // QPDFObjectHandle objects that are reachable from this object to | ||
| 235 | - // release their association with this QPDF. Direct objects are | ||
| 236 | - // not destroyed since they can be moved to other QPDF objects | ||
| 237 | - // safely. | ||
| 238 | - | ||
| 239 | - // At this point, obviously no one is still using the QPDF object, | ||
| 240 | - // but we'll explicitly clear the xref table anyway just to | ||
| 241 | - // prevent any possibility of resolve() succeeding. | 223 | + // If two objects are mutually referential (through each object having an array or dictionary |
| 224 | + // that contains an indirect reference to the other), the circular references in the | ||
| 225 | + // std::shared_ptr objects will prevent the objects from being deleted. Walk through all objects | ||
| 226 | + // in the object cache, which is those objects that we read from the file, and break all | ||
| 227 | + // resolved indirect references by replacing them with an internal object type representing that | ||
| 228 | + // they have been destroyed. Note that we can't break references like this at any time when the | ||
| 229 | + // QPDF object is active. The call to reset also causes all direct QPDFObjectHandle objects that | ||
| 230 | + // are reachable from this object to release their association with this QPDF. Direct objects | ||
| 231 | + // are not destroyed since they can be moved to other QPDF objects safely. | ||
| 232 | + | ||
| 233 | + // At this point, obviously no one is still using the QPDF object, but we'll explicitly clear | ||
| 234 | + // the xref table anyway just to prevent any possibility of resolve() succeeding. | ||
| 242 | m->xref_table.clear(); | 235 | m->xref_table.clear(); |
| 243 | for (auto const& iter: m->obj_cache) { | 236 | for (auto const& iter: m->obj_cache) { |
| 244 | iter.second.object->disconnect(); | 237 | iter.second.object->disconnect(); |
| @@ -406,18 +399,15 @@ QPDF::findHeader() | @@ -406,18 +399,15 @@ QPDF::findHeader() | ||
| 406 | } | 399 | } |
| 407 | p += 5; | 400 | p += 5; |
| 408 | std::string version; | 401 | std::string version; |
| 409 | - // Note: The string returned by line.c_str() is always | ||
| 410 | - // null-terminated. The code below never overruns the buffer | ||
| 411 | - // because a null character always short-circuits further | ||
| 412 | - // advancement. | 402 | + // Note: The string returned by line.c_str() is always null-terminated. The code below never |
| 403 | + // overruns the buffer because a null character always short-circuits further advancement. | ||
| 413 | bool valid = validatePDFVersion(p, version); | 404 | bool valid = validatePDFVersion(p, version); |
| 414 | if (valid) { | 405 | if (valid) { |
| 415 | m->pdf_version = version; | 406 | m->pdf_version = version; |
| 416 | if (global_offset != 0) { | 407 | if (global_offset != 0) { |
| 417 | - // Empirical evidence strongly suggests that when there is | ||
| 418 | - // leading material prior to the PDF header, all explicit | ||
| 419 | - // offsets in the file are such that 0 points to the | ||
| 420 | - // beginning of the header. | 408 | + // Empirical evidence strongly suggests that when there is leading material prior to the |
| 409 | + // PDF header, all explicit offsets in the file are such that 0 points to the beginning | ||
| 410 | + // of the header. | ||
| 421 | QTC::TC("qpdf", "QPDF global offset"); | 411 | QTC::TC("qpdf", "QPDF global offset"); |
| 422 | m->file = std::shared_ptr<InputSource>(new OffsetInputSource(m->file, global_offset)); | 412 | m->file = std::shared_ptr<InputSource>(new OffsetInputSource(m->file, global_offset)); |
| 423 | } | 413 | } |
| @@ -448,14 +438,12 @@ QPDF::parse(char const* password) | @@ -448,14 +438,12 @@ QPDF::parse(char const* password) | ||
| 448 | if (!m->file->findFirst("%PDF-", 0, 1024, hf)) { | 438 | if (!m->file->findFirst("%PDF-", 0, 1024, hf)) { |
| 449 | QTC::TC("qpdf", "QPDF not a pdf file"); | 439 | QTC::TC("qpdf", "QPDF not a pdf file"); |
| 450 | warn(damagedPDF("", 0, "can't find PDF header")); | 440 | warn(damagedPDF("", 0, "can't find PDF header")); |
| 451 | - // QPDFWriter writes files that usually require at least | ||
| 452 | - // version 1.2 for /FlateDecode | 441 | + // QPDFWriter writes files that usually require at least version 1.2 for /FlateDecode |
| 453 | m->pdf_version = "1.2"; | 442 | m->pdf_version = "1.2"; |
| 454 | } | 443 | } |
| 455 | 444 | ||
| 456 | - // PDF spec says %%EOF must be found within the last 1024 bytes of | ||
| 457 | - // the file. We add an extra 30 characters to leave room for the | ||
| 458 | - // startxref stuff. | 445 | + // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra |
| 446 | + // 30 characters to leave room for the startxref stuff. | ||
| 459 | m->file->seek(0, SEEK_END); | 447 | m->file->seek(0, SEEK_END); |
| 460 | qpdf_offset_t end_offset = m->file->tell(); | 448 | qpdf_offset_t end_offset = m->file->tell(); |
| 461 | qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); | 449 | qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); |
| @@ -494,8 +482,8 @@ void | @@ -494,8 +482,8 @@ void | ||
| 494 | QPDF::inParse(bool v) | 482 | QPDF::inParse(bool v) |
| 495 | { | 483 | { |
| 496 | if (m->in_parse == v) { | 484 | if (m->in_parse == v) { |
| 497 | - // This happens if QPDFParser::parse tries to | ||
| 498 | - // resolve an indirect object while it is parsing. | 485 | + // This happens if QPDFParser::parse tries to resolve an indirect object while it is |
| 486 | + // parsing. | ||
| 499 | throw std::logic_error("QPDF: re-entrant parsing detected. This is a qpdf bug." | 487 | throw std::logic_error("QPDF: re-entrant parsing detected. This is a qpdf bug." |
| 500 | " Please report at https://github.com/qpdf/qpdf/issues."); | 488 | " Please report at https://github.com/qpdf/qpdf/issues."); |
| 501 | } | 489 | } |
| @@ -518,7 +506,7 @@ QPDF::warn( | @@ -518,7 +506,7 @@ QPDF::warn( | ||
| 518 | qpdf_offset_t offset, | 506 | qpdf_offset_t offset, |
| 519 | std::string const& message) | 507 | std::string const& message) |
| 520 | { | 508 | { |
| 521 | - warn(QPDFExc(error_code, this->getFilename(), object, offset, message)); | 509 | + warn(QPDFExc(error_code, getFilename(), object, offset, message)); |
| 522 | } | 510 | } |
| 523 | 511 | ||
| 524 | void | 512 | void |
| @@ -534,9 +522,8 @@ void | @@ -534,9 +522,8 @@ void | ||
| 534 | QPDF::reconstruct_xref(QPDFExc& e) | 522 | QPDF::reconstruct_xref(QPDFExc& e) |
| 535 | { | 523 | { |
| 536 | if (m->reconstructed_xref) { | 524 | if (m->reconstructed_xref) { |
| 537 | - // Avoid xref reconstruction infinite loops. This is getting | ||
| 538 | - // very hard to reproduce because qpdf is throwing many fewer | ||
| 539 | - // exceptions while parsing. Most situations are warnings now. | 525 | + // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because |
| 526 | + // qpdf is throwing many fewer exceptions while parsing. Most situations are warnings now. | ||
| 540 | throw e; | 527 | throw e; |
| 541 | } | 528 | } |
| 542 | 529 | ||
| @@ -572,8 +559,7 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -572,8 +559,7 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 572 | QPDFTokenizer::Token t1 = readToken(m->file, MAX_LEN); | 559 | QPDFTokenizer::Token t1 = readToken(m->file, MAX_LEN); |
| 573 | qpdf_offset_t token_start = m->file->tell() - toO(t1.getValue().length()); | 560 | qpdf_offset_t token_start = m->file->tell() - toO(t1.getValue().length()); |
| 574 | if (token_start >= next_line_start) { | 561 | if (token_start >= next_line_start) { |
| 575 | - // don't process yet -- wait until we get to the line | ||
| 576 | - // containing this token | 562 | + // don't process yet -- wait until we get to the line containing this token |
| 577 | } else if (t1.isInteger()) { | 563 | } else if (t1.isInteger()) { |
| 578 | QPDFTokenizer::Token t2 = readToken(m->file, MAX_LEN); | 564 | QPDFTokenizer::Token t2 = readToken(m->file, MAX_LEN); |
| 579 | if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) { | 565 | if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) { |
| @@ -594,22 +580,18 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -594,22 +580,18 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 594 | } | 580 | } |
| 595 | 581 | ||
| 596 | if (!m->trailer.isInitialized()) { | 582 | if (!m->trailer.isInitialized()) { |
| 597 | - // We could check the last encountered object to see if it was | ||
| 598 | - // an xref stream. If so, we could try to get the trailer | ||
| 599 | - // from there. This may make it possible to recover files | ||
| 600 | - // with bad startxref pointers even when they have object | ||
| 601 | - // streams. | 583 | + // We could check the last encountered object to see if it was an xref stream. If so, we |
| 584 | + // could try to get the trailer from there. This may make it possible to recover files with | ||
| 585 | + // bad startxref pointers even when they have object streams. | ||
| 602 | 586 | ||
| 603 | throw damagedPDF("", 0, "unable to find trailer dictionary while recovering damaged file"); | 587 | throw damagedPDF("", 0, "unable to find trailer dictionary while recovering damaged file"); |
| 604 | } | 588 | } |
| 605 | 589 | ||
| 606 | - // We could iterate through the objects looking for streams and | ||
| 607 | - // try to find objects inside of them, but it's probably not worth | ||
| 608 | - // the trouble. Acrobat can't recover files with any errors in an | ||
| 609 | - // xref stream, and this would be a real long shot anyway. If we | ||
| 610 | - // wanted to do anything that involved looking at stream contents, | ||
| 611 | - // we'd also have to call initializeEncryption() here. It's safe | ||
| 612 | - // to call it more than once. | 590 | + // We could iterate through the objects looking for streams and try to find objects inside of |
| 591 | + // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors | ||
| 592 | + // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything | ||
| 593 | + // that involved looking at stream contents, we'd also have to call initializeEncryption() here. | ||
| 594 | + // It's safe to call it more than once. | ||
| 613 | } | 595 | } |
| 614 | 596 | ||
| 615 | void | 597 | void |
| @@ -622,12 +604,10 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | @@ -622,12 +604,10 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | ||
| 622 | char buf[7]; | 604 | char buf[7]; |
| 623 | memset(buf, 0, sizeof(buf)); | 605 | memset(buf, 0, sizeof(buf)); |
| 624 | m->file->seek(xref_offset, SEEK_SET); | 606 | m->file->seek(xref_offset, SEEK_SET); |
| 625 | - // Some files miss the mark a little with startxref. We could | ||
| 626 | - // do a better job of searching in the neighborhood for | ||
| 627 | - // something that looks like either an xref table or stream, | ||
| 628 | - // but the simple heuristic of skipping whitespace can help | ||
| 629 | - // with the xref table case and is harmless with the stream | ||
| 630 | - // case. | 607 | + // Some files miss the mark a little with startxref. We could do a better job of searching |
| 608 | + // in the neighborhood for something that looks like either an xref table or stream, but the | ||
| 609 | + // simple heuristic of skipping whitespace can help with the xref table case and is harmless | ||
| 610 | + // with the stream case. | ||
| 631 | bool done = false; | 611 | bool done = false; |
| 632 | bool skipped_space = false; | 612 | bool skipped_space = false; |
| 633 | while (!done) { | 613 | while (!done) { |
| @@ -646,9 +626,8 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | @@ -646,9 +626,8 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | ||
| 646 | } | 626 | } |
| 647 | 627 | ||
| 648 | m->file->read(buf, sizeof(buf) - 1); | 628 | m->file->read(buf, sizeof(buf) - 1); |
| 649 | - // The PDF spec says xref must be followed by a line | ||
| 650 | - // terminator, but files exist in the wild where it is | ||
| 651 | - // terminated by arbitrary whitespace. | 629 | + // The PDF spec says xref must be followed by a line terminator, but files exist in the wild |
| 630 | + // where it is terminated by arbitrary whitespace. | ||
| 652 | if ((strncmp(buf, "xref", 4) == 0) && QUtil::is_space(buf[4])) { | 631 | if ((strncmp(buf, "xref", 4) == 0) && QUtil::is_space(buf[4])) { |
| 653 | if (skipped_space) { | 632 | if (skipped_space) { |
| 654 | QTC::TC("qpdf", "QPDF xref skipped space"); | 633 | QTC::TC("qpdf", "QPDF xref skipped space"); |
| @@ -662,8 +641,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | @@ -662,8 +641,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | ||
| 662 | : (buf[4] == ' ') ? 2 | 641 | : (buf[4] == ' ') ? 2 |
| 663 | : 9999)); | 642 | : 9999)); |
| 664 | int skip = 4; | 643 | int skip = 4; |
| 665 | - // buf is null-terminated, and QUtil::is_space('\0') is | ||
| 666 | - // false, so this won't overrun. | 644 | + // buf is null-terminated, and QUtil::is_space('\0') is false, so this won't overrun. |
| 667 | while (QUtil::is_space(buf[skip])) { | 645 | while (QUtil::is_space(buf[skip])) { |
| 668 | ++skip; | 646 | ++skip; |
| 669 | } | 647 | } |
| @@ -697,16 +675,16 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | @@ -697,16 +675,16 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | ||
| 697 | ") is not one plus the highest object number (" + std::to_string(max_obj) + ")"))); | 675 | ") is not one plus the highest object number (" + std::to_string(max_obj) + ")"))); |
| 698 | } | 676 | } |
| 699 | 677 | ||
| 700 | - // We no longer need the deleted_objects table, so go ahead and | ||
| 701 | - // clear it out to make sure we never depend on its being set. | 678 | + // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we |
| 679 | + // never depend on its being set. | ||
| 702 | m->deleted_objects.clear(); | 680 | m->deleted_objects.clear(); |
| 703 | } | 681 | } |
| 704 | 682 | ||
| 705 | bool | 683 | bool |
| 706 | QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes) | 684 | QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes) |
| 707 | { | 685 | { |
| 708 | - // is_space and is_digit both return false on '\0', so this will | ||
| 709 | - // not overrun the null-terminated buffer. | 686 | + // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated |
| 687 | + // buffer. | ||
| 710 | char const* p = line.c_str(); | 688 | char const* p = line.c_str(); |
| 711 | char const* start = line.c_str(); | 689 | char const* start = line.c_str(); |
| 712 | 690 | ||
| @@ -753,8 +731,8 @@ QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes) | @@ -753,8 +731,8 @@ QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes) | ||
| 753 | bool | 731 | bool |
| 754 | QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& type) | 732 | QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& type) |
| 755 | { | 733 | { |
| 756 | - // is_space and is_digit both return false on '\0', so this will | ||
| 757 | - // not overrun the null-terminated buffer. | 734 | + // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated |
| 735 | + // buffer. | ||
| 758 | char const* p = line.c_str(); | 736 | char const* p = line.c_str(); |
| 759 | 737 | ||
| 760 | // Skip zero or more spaces. There aren't supposed to be any. | 738 | // Skip zero or more spaces. There aren't supposed to be any. |
| @@ -862,8 +840,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | @@ -862,8 +840,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | ||
| 862 | "xref table", "invalid xref entry (obj=" + std::to_string(i) + ")"); | 840 | "xref table", "invalid xref entry (obj=" + std::to_string(i) + ")"); |
| 863 | } | 841 | } |
| 864 | if (type == 'f') { | 842 | if (type == 'f') { |
| 865 | - // Save deleted items until after we've checked the | ||
| 866 | - // XRefStm, if any. | 843 | + // Save deleted items until after we've checked the XRefStm, if any. |
| 867 | deleted_items.push_back(QPDFObjGen(toI(i), f2)); | 844 | deleted_items.push_back(QPDFObjGen(toI(i), f2)); |
| 868 | } else { | 845 | } else { |
| 869 | insertXrefEntry(toI(i), 1, f1, f2); | 846 | insertXrefEntry(toI(i), 1, f1, f2); |
| @@ -902,9 +879,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | @@ -902,9 +879,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | ||
| 902 | QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer"); | 879 | QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer"); |
| 903 | } else { | 880 | } else { |
| 904 | if (cur_trailer.getKey("/XRefStm").isInteger()) { | 881 | if (cur_trailer.getKey("/XRefStm").isInteger()) { |
| 905 | - // Read the xref stream but disregard any return value | ||
| 906 | - // -- we'll use our trailer's /Prev key instead of the | ||
| 907 | - // xref stream's. | 882 | + // Read the xref stream but disregard any return value -- we'll use our trailer's |
| 883 | + // /Prev key instead of the xref stream's. | ||
| 908 | (void)read_xrefStream(cur_trailer.getKey("/XRefStm").getIntValue()); | 884 | (void)read_xrefStream(cur_trailer.getKey("/XRefStm").getIntValue()); |
| 909 | } else { | 885 | } else { |
| 910 | throw damagedPDF("xref stream", xref_offset, "invalid /XRefStm"); | 886 | throw damagedPDF("xref stream", xref_offset, "invalid /XRefStm"); |
| @@ -1035,8 +1011,8 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | @@ -1035,8 +1011,8 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | ||
| 1035 | num_entries += toS(indx.at(i)); | 1011 | num_entries += toS(indx.at(i)); |
| 1036 | } | 1012 | } |
| 1037 | 1013 | ||
| 1038 | - // entry_size and num_entries have both been validated to ensure | ||
| 1039 | - // that this multiplication does not cause an overflow. | 1014 | + // entry_size and num_entries have both been validated to ensure that this multiplication does |
| 1015 | + // not cause an overflow. | ||
| 1040 | size_t expected_size = entry_size * num_entries; | 1016 | size_t expected_size = entry_size * num_entries; |
| 1041 | 1017 | ||
| 1042 | std::shared_ptr<Buffer> bp = xref_obj.getStreamData(qpdf_dl_specialized); | 1018 | std::shared_ptr<Buffer> bp = xref_obj.getStreamData(qpdf_dl_specialized); |
| @@ -1060,9 +1036,8 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | @@ -1060,9 +1036,8 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | ||
| 1060 | 1036 | ||
| 1061 | bool saw_first_compressed_object = false; | 1037 | bool saw_first_compressed_object = false; |
| 1062 | 1038 | ||
| 1063 | - // Actual size vs. expected size check above ensures that we will | ||
| 1064 | - // not overflow any buffers here. We know that entry_size * | ||
| 1065 | - // num_entries is equal to the size of the buffer. | 1039 | + // Actual size vs. expected size check above ensures that we will not overflow any buffers here. |
| 1040 | + // We know that entry_size * num_entries is equal to the size of the buffer. | ||
| 1066 | unsigned char const* data = bp->getBuffer(); | 1041 | unsigned char const* data = bp->getBuffer(); |
| 1067 | for (size_t i = 0; i < num_entries; ++i) { | 1042 | for (size_t i = 0; i < num_entries; ++i) { |
| 1068 | // Read this entry | 1043 | // Read this entry |
| @@ -1081,17 +1056,15 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | @@ -1081,17 +1056,15 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | ||
| 1081 | } | 1056 | } |
| 1082 | } | 1057 | } |
| 1083 | 1058 | ||
| 1084 | - // Get the object and generation number. The object number is | ||
| 1085 | - // based on /Index. The generation number is 0 unless this is | ||
| 1086 | - // an uncompressed object record, in which case the generation | ||
| 1087 | - // number appears as the third field. | 1059 | + // Get the object and generation number. The object number is based on /Index. The |
| 1060 | + // generation number is 0 unless this is an uncompressed object record, in which case the | ||
| 1061 | + // generation number appears as the third field. | ||
| 1088 | int obj = toI(indx.at(cur_chunk)); | 1062 | int obj = toI(indx.at(cur_chunk)); |
| 1089 | if ((obj < 0) || ((std::numeric_limits<int>::max() - obj) < chunk_count)) { | 1063 | if ((obj < 0) || ((std::numeric_limits<int>::max() - obj) < chunk_count)) { |
| 1090 | std::ostringstream msg; | 1064 | std::ostringstream msg; |
| 1091 | msg.imbue(std::locale::classic()); | 1065 | msg.imbue(std::locale::classic()); |
| 1092 | msg << "adding " << chunk_count << " to " << obj | 1066 | msg << "adding " << chunk_count << " to " << obj |
| 1093 | - << " while computing index in xref stream would cause" | ||
| 1094 | - << " an integer overflow"; | 1067 | + << " while computing index in xref stream would cause an integer overflow"; |
| 1095 | throw std::range_error(msg.str()); | 1068 | throw std::range_error(msg.str()); |
| 1096 | } | 1069 | } |
| 1097 | obj += chunk_count; | 1070 | obj += chunk_count; |
| @@ -1113,10 +1086,8 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | @@ -1113,10 +1086,8 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | ||
| 1113 | m->first_xref_item_offset = xref_offset; | 1086 | m->first_xref_item_offset = xref_offset; |
| 1114 | } | 1087 | } |
| 1115 | if (fields[0] == 0) { | 1088 | if (fields[0] == 0) { |
| 1116 | - // Ignore fields[2], which we don't care about in this | ||
| 1117 | - // case. This works around the issue of some PDF files | ||
| 1118 | - // that put invalid values, like -1, here for deleted | ||
| 1119 | - // objects. | 1089 | + // Ignore fields[2], which we don't care about in this case. This works around the issue |
| 1090 | + // of some PDF files that put invalid values, like -1, here for deleted objects. | ||
| 1120 | fields[2] = 0; | 1091 | fields[2] = 0; |
| 1121 | } | 1092 | } |
| 1122 | insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2])); | 1093 | insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2])); |
| @@ -1143,17 +1114,14 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | @@ -1143,17 +1114,14 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | ||
| 1143 | void | 1114 | void |
| 1144 | QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite) | 1115 | QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite) |
| 1145 | { | 1116 | { |
| 1146 | - // Populate the xref table in such a way that the first reference | ||
| 1147 | - // to an object that we see, which is the one in the latest xref | ||
| 1148 | - // table in which it appears, is the one that gets stored. This | ||
| 1149 | - // works because we are reading more recent appends before older | ||
| 1150 | - // ones. Exception: if overwrite is true, then replace any | ||
| 1151 | - // existing object. This is used in xref recovery mode, which | ||
| 1152 | - // reads the file from beginning to end. | ||
| 1153 | - | ||
| 1154 | - // If there is already an entry for this object and generation in | ||
| 1155 | - // the table, it means that a later xref table has registered this | ||
| 1156 | - // object. Disregard this one. | 1117 | + // Populate the xref table in such a way that the first reference to an object that we see, |
| 1118 | + // which is the one in the latest xref table in which it appears, is the one that gets stored. | ||
| 1119 | + // This works because we are reading more recent appends before older ones. Exception: if | ||
| 1120 | + // overwrite is true, then replace any existing object. This is used in xref recovery mode, | ||
| 1121 | + // which reads the file from beginning to end. | ||
| 1122 | + | ||
| 1123 | + // If there is already an entry for this object and generation in the table, it means that a | ||
| 1124 | + // later xref table has registered this object. Disregard this one. | ||
| 1157 | { // private scope | 1125 | { // private scope |
| 1158 | int gen = (f0 == 2 ? 0 : f2); | 1126 | int gen = (f0 == 2 ? 0 : f2); |
| 1159 | QPDFObjGen og(obj, gen); | 1127 | QPDFObjGen og(obj, gen); |
| @@ -1220,8 +1188,8 @@ QPDF::showXRefTable() | @@ -1220,8 +1188,8 @@ QPDF::showXRefTable() | ||
| 1220 | } | 1188 | } |
| 1221 | } | 1189 | } |
| 1222 | 1190 | ||
| 1223 | -// Resolve all objects in the xref table. If this triggers a xref table | ||
| 1224 | -// reconstruction abort and return false. Otherwise return true. | 1191 | +// Resolve all objects in the xref table. If this triggers a xref table reconstruction abort and |
| 1192 | +// return false. Otherwise return true. | ||
| 1225 | bool | 1193 | bool |
| 1226 | QPDF::resolveXRefTable() | 1194 | QPDF::resolveXRefTable() |
| 1227 | { | 1195 | { |
| @@ -1237,8 +1205,8 @@ QPDF::resolveXRefTable() | @@ -1237,8 +1205,8 @@ QPDF::resolveXRefTable() | ||
| 1237 | return true; | 1205 | return true; |
| 1238 | } | 1206 | } |
| 1239 | 1207 | ||
| 1240 | -// Ensure all objects in the pdf file, including those in indirect | ||
| 1241 | -// references, appear in the object cache. | 1208 | +// Ensure all objects in the pdf file, including those in indirect references, appear in the object |
| 1209 | +// cache. | ||
| 1242 | void | 1210 | void |
| 1243 | QPDF::fixDanglingReferences(bool force) | 1211 | QPDF::fixDanglingReferences(bool force) |
| 1244 | { | 1212 | { |
| @@ -1255,10 +1223,9 @@ QPDF::fixDanglingReferences(bool force) | @@ -1255,10 +1223,9 @@ QPDF::fixDanglingReferences(bool force) | ||
| 1255 | size_t | 1223 | size_t |
| 1256 | QPDF::getObjectCount() | 1224 | QPDF::getObjectCount() |
| 1257 | { | 1225 | { |
| 1258 | - // This method returns the next available indirect object number. | ||
| 1259 | - // makeIndirectObject uses it for this purpose. After | ||
| 1260 | - // fixDanglingReferences is called, all objects in the xref table | ||
| 1261 | - // will also be in obj_cache. | 1226 | + // This method returns the next available indirect object number. makeIndirectObject uses it for |
| 1227 | + // this purpose. After fixDanglingReferences is called, all objects in the xref table will also | ||
| 1228 | + // be in obj_cache. | ||
| 1262 | fixDanglingReferences(); | 1229 | fixDanglingReferences(); |
| 1263 | QPDFObjGen og; | 1230 | QPDFObjGen og; |
| 1264 | if (!m->obj_cache.empty()) { | 1231 | if (!m->obj_cache.empty()) { |
| @@ -1270,8 +1237,7 @@ QPDF::getObjectCount() | @@ -1270,8 +1237,7 @@ QPDF::getObjectCount() | ||
| 1270 | std::vector<QPDFObjectHandle> | 1237 | std::vector<QPDFObjectHandle> |
| 1271 | QPDF::getAllObjects() | 1238 | QPDF::getAllObjects() |
| 1272 | { | 1239 | { |
| 1273 | - // After fixDanglingReferences is called, all objects are in the | ||
| 1274 | - // object cache. | 1240 | + // After fixDanglingReferences is called, all objects are in the object cache. |
| 1275 | fixDanglingReferences(); | 1241 | fixDanglingReferences(); |
| 1276 | std::vector<QPDFObjectHandle> result; | 1242 | std::vector<QPDFObjectHandle> result; |
| 1277 | for (auto const& iter: m->obj_cache) { | 1243 | for (auto const& iter: m->obj_cache) { |
| @@ -1315,34 +1281,27 @@ QPDF::readObject( | @@ -1315,34 +1281,27 @@ QPDF::readObject( | ||
| 1315 | auto object = QPDFParser(input, m->last_object_description, m->tokenizer, decrypter, this) | 1281 | auto object = QPDFParser(input, m->last_object_description, m->tokenizer, decrypter, this) |
| 1316 | .parse(empty, false); | 1282 | .parse(empty, false); |
| 1317 | if (empty) { | 1283 | if (empty) { |
| 1318 | - // Nothing in the PDF spec appears to allow empty objects, but | ||
| 1319 | - // they have been encountered in actual PDF files and Adobe | ||
| 1320 | - // Reader appears to ignore them. | 1284 | + // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in |
| 1285 | + // actual PDF files and Adobe Reader appears to ignore them. | ||
| 1321 | warn(damagedPDF(input, input->getLastOffset(), "empty object treated as null")); | 1286 | warn(damagedPDF(input, input->getLastOffset(), "empty object treated as null")); |
| 1322 | } else if (object.isDictionary() && (!in_object_stream)) { | 1287 | } else if (object.isDictionary() && (!in_object_stream)) { |
| 1323 | // check for stream | 1288 | // check for stream |
| 1324 | qpdf_offset_t cur_offset = input->tell(); | 1289 | qpdf_offset_t cur_offset = input->tell(); |
| 1325 | if (readToken(input).isWord("stream")) { | 1290 | if (readToken(input).isWord("stream")) { |
| 1326 | - // The PDF specification states that the word "stream" | ||
| 1327 | - // should be followed by either a carriage return and | ||
| 1328 | - // a newline or by a newline alone. It specifically | ||
| 1329 | - // disallowed following it by a carriage return alone | ||
| 1330 | - // since, in that case, there would be no way to tell | ||
| 1331 | - // whether the NL in a CR NL sequence was part of the | ||
| 1332 | - // stream data. However, some readers, including | ||
| 1333 | - // Adobe reader, accept a carriage return by itself | ||
| 1334 | - // when followed by a non-newline character, so that's | ||
| 1335 | - // what we do here. We have also seen files that have | ||
| 1336 | - // extraneous whitespace between the stream keyword and | ||
| 1337 | - // the newline. | 1291 | + // The PDF specification states that the word "stream" should be followed by either a |
| 1292 | + // carriage return and a newline or by a newline alone. It specifically disallowed | ||
| 1293 | + // following it by a carriage return alone since, in that case, there would be no way to | ||
| 1294 | + // tell whether the NL in a CR NL sequence was part of the stream data. However, some | ||
| 1295 | + // readers, including Adobe reader, accept a carriage return by itself when followed by | ||
| 1296 | + // a non-newline character, so that's what we do here. We have also seen files that have | ||
| 1297 | + // extraneous whitespace between the stream keyword and the newline. | ||
| 1338 | bool done = false; | 1298 | bool done = false; |
| 1339 | while (!done) { | 1299 | while (!done) { |
| 1340 | done = true; | 1300 | done = true; |
| 1341 | char ch; | 1301 | char ch; |
| 1342 | if (input->read(&ch, 1) == 0) { | 1302 | if (input->read(&ch, 1) == 0) { |
| 1343 | - // A premature EOF here will result in some | ||
| 1344 | - // other problem that will get reported at | ||
| 1345 | - // another time. | 1303 | + // A premature EOF here will result in some other problem that will get reported |
| 1304 | + // at another time. | ||
| 1346 | } else if (ch == '\n') { | 1305 | } else if (ch == '\n') { |
| 1347 | // ready to read stream data | 1306 | // ready to read stream data |
| 1348 | QTC::TC("qpdf", "QPDF stream with NL only"); | 1307 | QTC::TC("qpdf", "QPDF stream with NL only"); |
| @@ -1353,10 +1312,8 @@ QPDF::readObject( | @@ -1353,10 +1312,8 @@ QPDF::readObject( | ||
| 1353 | // Ready to read stream data | 1312 | // Ready to read stream data |
| 1354 | QTC::TC("qpdf", "QPDF stream with CRNL"); | 1313 | QTC::TC("qpdf", "QPDF stream with CRNL"); |
| 1355 | } else { | 1314 | } else { |
| 1356 | - // Treat the \r by itself as the | ||
| 1357 | - // whitespace after endstream and | ||
| 1358 | - // start reading stream data in spite | ||
| 1359 | - // of not having seen a newline. | 1315 | + // Treat the \r by itself as the whitespace after endstream and start |
| 1316 | + // reading stream data in spite of not having seen a newline. | ||
| 1360 | QTC::TC("qpdf", "QPDF stream with CR only"); | 1317 | QTC::TC("qpdf", "QPDF stream with CR only"); |
| 1361 | input->unreadCh(ch); | 1318 | input->unreadCh(ch); |
| 1362 | warn(damagedPDF( | 1319 | warn(damagedPDF( |
| @@ -1381,9 +1338,8 @@ QPDF::readObject( | @@ -1381,9 +1338,8 @@ QPDF::readObject( | ||
| 1381 | } | 1338 | } |
| 1382 | } | 1339 | } |
| 1383 | 1340 | ||
| 1384 | - // Must get offset before accessing any additional | ||
| 1385 | - // objects since resolving a previously unresolved | ||
| 1386 | - // indirect object will change file position. | 1341 | + // Must get offset before accessing any additional objects since resolving a previously |
| 1342 | + // unresolved indirect object will change file position. | ||
| 1387 | qpdf_offset_t stream_offset = input->tell(); | 1343 | qpdf_offset_t stream_offset = input->tell(); |
| 1388 | size_t length = 0; | 1344 | size_t length = 0; |
| 1389 | 1345 | ||
| @@ -1427,8 +1383,7 @@ QPDF::readObject( | @@ -1427,8 +1383,7 @@ QPDF::readObject( | ||
| 1427 | } | 1383 | } |
| 1428 | } | 1384 | } |
| 1429 | 1385 | ||
| 1430 | - // Override last_offset so that it points to the beginning of the | ||
| 1431 | - // object we just read | 1386 | + // Override last_offset so that it points to the beginning of the object we just read |
| 1432 | input->setLastOffset(offset); | 1387 | input->setLastOffset(offset); |
| 1433 | return object; | 1388 | return object; |
| 1434 | } | 1389 | } |
| @@ -1449,8 +1404,7 @@ size_t | @@ -1449,8 +1404,7 @@ size_t | ||
| 1449 | QPDF::recoverStreamLength( | 1404 | QPDF::recoverStreamLength( |
| 1450 | std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset) | 1405 | std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset) |
| 1451 | { | 1406 | { |
| 1452 | - // Try to reconstruct stream length by looking for | ||
| 1453 | - // endstream or endobj | 1407 | + // Try to reconstruct stream length by looking for endstream or endobj |
| 1454 | warn(damagedPDF(input, stream_offset, "attempting to recover stream length")); | 1408 | warn(damagedPDF(input, stream_offset, "attempting to recover stream length")); |
| 1455 | 1409 | ||
| 1456 | PatternFinder ef(*this, &QPDF::findEndstream); | 1410 | PatternFinder ef(*this, &QPDF::findEndstream); |
| @@ -1481,9 +1435,8 @@ QPDF::recoverStreamLength( | @@ -1481,9 +1435,8 @@ QPDF::recoverStreamLength( | ||
| 1481 | } | 1435 | } |
| 1482 | } | 1436 | } |
| 1483 | if (this_obj_offset && (this_og == og)) { | 1437 | if (this_obj_offset && (this_og == og)) { |
| 1484 | - // Well, we found endstream\nendobj within the space | ||
| 1485 | - // allowed for this object, so we're probably in good | ||
| 1486 | - // shape. | 1438 | + // Well, we found endstream\nendobj within the space allowed for this object, so we're |
| 1439 | + // probably in good shape. | ||
| 1487 | } else { | 1440 | } else { |
| 1488 | QTC::TC("qpdf", "QPDF found wrong endstream in recovery"); | 1441 | QTC::TC("qpdf", "QPDF found wrong endstream in recovery"); |
| 1489 | } | 1442 | } |
| @@ -1518,14 +1471,12 @@ QPDF::readObjectAtOffset( | @@ -1518,14 +1471,12 @@ QPDF::readObjectAtOffset( | ||
| 1518 | { | 1471 | { |
| 1519 | bool check_og = true; | 1472 | bool check_og = true; |
| 1520 | if (exp_og.getObj() == 0) { | 1473 | if (exp_og.getObj() == 0) { |
| 1521 | - // This method uses an expect object ID of 0 to indicate that | ||
| 1522 | - // we don't know or don't care what the actual object ID is at | ||
| 1523 | - // this offset. This is true when we read the xref stream and | ||
| 1524 | - // linearization hint streams. In this case, we don't verify | ||
| 1525 | - // the expect object ID/generation against what was read from | ||
| 1526 | - // the file. There is also no reason to attempt xref recovery | ||
| 1527 | - // if we get a failure in this case since the read attempt was | ||
| 1528 | - // not triggered by an xref lookup. | 1474 | + // This method uses an expect object ID of 0 to indicate that we don't know or don't care |
| 1475 | + // what the actual object ID is at this offset. This is true when we read the xref stream | ||
| 1476 | + // and linearization hint streams. In this case, we don't verify the expect object | ||
| 1477 | + // ID/generation against what was read from the file. There is also no reason to attempt | ||
| 1478 | + // xref recovery if we get a failure in this case since the read attempt was not triggered | ||
| 1479 | + // by an xref lookup. | ||
| 1529 | check_og = false; | 1480 | check_og = false; |
| 1530 | try_recovery = false; | 1481 | try_recovery = false; |
| 1531 | } | 1482 | } |
| @@ -1535,11 +1486,9 @@ QPDF::readObjectAtOffset( | @@ -1535,11 +1486,9 @@ QPDF::readObjectAtOffset( | ||
| 1535 | try_recovery = false; | 1486 | try_recovery = false; |
| 1536 | } | 1487 | } |
| 1537 | 1488 | ||
| 1538 | - // Special case: if offset is 0, just return null. Some PDF | ||
| 1539 | - // writers, in particular "Mac OS X 10.7.5 Quartz PDFContext", may | ||
| 1540 | - // store deleted objects in the xref table as "0000000000 00000 | ||
| 1541 | - // n", which is not correct, but it won't hurt anything for to | ||
| 1542 | - // ignore these. | 1489 | + // Special case: if offset is 0, just return null. Some PDF writers, in particular |
| 1490 | + // "Mac OS X 10.7.5 Quartz PDFContext", may store deleted objects in the xref table as | ||
| 1491 | + // "0000000000 00000 n", which is not correct, but it won't hurt anything for to ignore these. | ||
| 1543 | if (offset == 0) { | 1492 | if (offset == 0) { |
| 1544 | QTC::TC("qpdf", "QPDF bogus 0 offset", 0); | 1493 | QTC::TC("qpdf", "QPDF bogus 0 offset", 0); |
| 1545 | warn(damagedPDF(0, "object has offset 0")); | 1494 | warn(damagedPDF(0, "object has offset 0")); |
| @@ -1579,8 +1528,7 @@ QPDF::readObjectAtOffset( | @@ -1579,8 +1528,7 @@ QPDF::readObjectAtOffset( | ||
| 1579 | // Will be retried below | 1528 | // Will be retried below |
| 1580 | throw e; | 1529 | throw e; |
| 1581 | } else { | 1530 | } else { |
| 1582 | - // We can try reading the object anyway even if the ID | ||
| 1583 | - // doesn't match. | 1531 | + // We can try reading the object anyway even if the ID doesn't match. |
| 1584 | warn(e); | 1532 | warn(e); |
| 1585 | } | 1533 | } |
| 1586 | } | 1534 | } |
| @@ -1617,16 +1565,13 @@ QPDF::readObjectAtOffset( | @@ -1617,16 +1565,13 @@ QPDF::readObjectAtOffset( | ||
| 1617 | } | 1565 | } |
| 1618 | 1566 | ||
| 1619 | if (isUnresolved(og)) { | 1567 | if (isUnresolved(og)) { |
| 1620 | - // Store the object in the cache here so it gets cached | ||
| 1621 | - // whether we first know the offset or whether we first know | ||
| 1622 | - // the object ID and generation (in which we case we would get | ||
| 1623 | - // here through resolve). | ||
| 1624 | - | ||
| 1625 | - // Determine the end offset of this object before and after | ||
| 1626 | - // white space. We use these numbers to validate | ||
| 1627 | - // linearization hint tables. Offsets and lengths of objects | ||
| 1628 | - // may imply the end of an object to be anywhere between these | ||
| 1629 | - // values. | 1568 | + // Store the object in the cache here so it gets cached whether we first know the offset or |
| 1569 | + // whether we first know the object ID and generation (in which we case we would get here | ||
| 1570 | + // through resolve). | ||
| 1571 | + | ||
| 1572 | + // Determine the end offset of this object before and after white space. We use these | ||
| 1573 | + // numbers to validate linearization hint tables. Offsets and lengths of objects may imply | ||
| 1574 | + // the end of an object to be anywhere between these values. | ||
| 1630 | qpdf_offset_t end_before_space = m->file->tell(); | 1575 | qpdf_offset_t end_before_space = m->file->tell(); |
| 1631 | 1576 | ||
| 1632 | // skip over spaces | 1577 | // skip over spaces |
| @@ -1643,41 +1588,31 @@ QPDF::readObjectAtOffset( | @@ -1643,41 +1588,31 @@ QPDF::readObjectAtOffset( | ||
| 1643 | } | 1588 | } |
| 1644 | qpdf_offset_t end_after_space = m->file->tell(); | 1589 | qpdf_offset_t end_after_space = m->file->tell(); |
| 1645 | if (skip_cache_if_in_xref && m->xref_table.count(og)) { | 1590 | if (skip_cache_if_in_xref && m->xref_table.count(og)) { |
| 1646 | - // Ordinarily, an object gets read here when resolved | ||
| 1647 | - // through xref table or stream. In the special case of | ||
| 1648 | - // the xref stream and linearization hint tables, the | ||
| 1649 | - // offset comes from another source. For the specific case | ||
| 1650 | - // of xref streams, the xref stream is read and loaded | ||
| 1651 | - // into the object cache very early in parsing. | ||
| 1652 | - // Ordinarily, when a file is updated by appending, items | ||
| 1653 | - // inserted into the xref table in later updates take | ||
| 1654 | - // precedence over earlier items. In the special case of | ||
| 1655 | - // reusing the object number previously used as the xref | ||
| 1656 | - // stream, we have the following order of events: | 1591 | + // Ordinarily, an object gets read here when resolved through xref table or stream. In |
| 1592 | + // the special case of the xref stream and linearization hint tables, the offset comes | ||
| 1593 | + // from another source. For the specific case of xref streams, the xref stream is read | ||
| 1594 | + // and loaded into the object cache very early in parsing. Ordinarily, when a file is | ||
| 1595 | + // updated by appending, items inserted into the xref table in later updates take | ||
| 1596 | + // precedence over earlier items. In the special case of reusing the object number | ||
| 1597 | + // previously used as the xref stream, we have the following order of events: | ||
| 1657 | // | 1598 | // |
| 1658 | // * reused object gets loaded into the xref table | 1599 | // * reused object gets loaded into the xref table |
| 1659 | // * old object is read here while reading xref streams | 1600 | // * old object is read here while reading xref streams |
| 1660 | // * original xref entry is ignored (since already in xref table) | 1601 | // * original xref entry is ignored (since already in xref table) |
| 1661 | // | 1602 | // |
| 1662 | - // It is the second step that causes a problem. Even | ||
| 1663 | - // though the xref table is correct in this case, the old | ||
| 1664 | - // object is already in the cache and so effectively | ||
| 1665 | - // prevails over the reused object. To work around this | ||
| 1666 | - // issue, we have a special case for the xref stream (via | ||
| 1667 | - // the skip_cache_if_in_xref): if the object is already in | ||
| 1668 | - // the xref stream, don't cache what we read here. | 1603 | + // It is the second step that causes a problem. Even though the xref table is correct in |
| 1604 | + // this case, the old object is already in the cache and so effectively prevails over | ||
| 1605 | + // the reused object. To work around this issue, we have a special case for the xref | ||
| 1606 | + // stream (via the skip_cache_if_in_xref): if the object is already in the xref stream, | ||
| 1607 | + // don't cache what we read here. | ||
| 1669 | // | 1608 | // |
| 1670 | - // It is likely that the same bug may exist for | ||
| 1671 | - // linearization hint tables, but the existing code uses | ||
| 1672 | - // end_before_space and end_after_space from the cache, so | ||
| 1673 | - // fixing that would require more significant rework. The | ||
| 1674 | - // chances of a linearization hint stream being reused | ||
| 1675 | - // seems smaller because the xref stream is probably the | ||
| 1676 | - // highest object in the file and the linearization hint | ||
| 1677 | - // stream would be some random place in the middle, so I'm | ||
| 1678 | - // leaving that bug unfixed for now. If the bug were to be | ||
| 1679 | - // fixed, we could use !check_og in place of | ||
| 1680 | - // skip_cache_if_in_xref. | 1609 | + // It is likely that the same bug may exist for linearization hint tables, but the |
| 1610 | + // existing code uses end_before_space and end_after_space from the cache, so fixing | ||
| 1611 | + // that would require more significant rework. The chances of a linearization hint | ||
| 1612 | + // stream being reused seems smaller because the xref stream is probably the highest | ||
| 1613 | + // object in the file and the linearization hint stream would be some random place in | ||
| 1614 | + // the middle, so I'm leaving that bug unfixed for now. If the bug were to be fixed, we | ||
| 1615 | + // could use !check_og in place of skip_cache_if_in_xref. | ||
| 1681 | QTC::TC("qpdf", "QPDF skipping cache for known unchecked object"); | 1616 | QTC::TC("qpdf", "QPDF skipping cache for known unchecked object"); |
| 1682 | } else { | 1617 | } else { |
| 1683 | updateCache(og, oh.getObj(), end_before_space, end_after_space); | 1618 | updateCache(og, oh.getObj(), end_before_space, end_after_space); |
| @@ -1695,9 +1630,8 @@ QPDF::resolve(QPDFObjGen og) | @@ -1695,9 +1630,8 @@ QPDF::resolve(QPDFObjGen og) | ||
| 1695 | } | 1630 | } |
| 1696 | 1631 | ||
| 1697 | if (m->resolving.count(og)) { | 1632 | if (m->resolving.count(og)) { |
| 1698 | - // This can happen if an object references itself directly or | ||
| 1699 | - // indirectly in some key that has to be resolved during | ||
| 1700 | - // object parsing, such as stream length. | 1633 | + // This can happen if an object references itself directly or indirectly in some key that |
| 1634 | + // has to be resolved during object parsing, such as stream length. | ||
| 1701 | QTC::TC("qpdf", "QPDF recursion loop in resolve"); | 1635 | QTC::TC("qpdf", "QPDF recursion loop in resolve"); |
| 1702 | warn(damagedPDF("", "loop detected resolving object " + og.unparse(' '))); | 1636 | warn(damagedPDF("", "loop detected resolving object " + og.unparse(' '))); |
| 1703 | updateCache(og, QPDF_Null::create(), -1, -1); | 1637 | updateCache(og, QPDF_Null::create(), -1, -1); |
| @@ -1758,8 +1692,8 @@ QPDF::resolveObjectsInStream(int obj_stream_number) | @@ -1758,8 +1692,8 @@ QPDF::resolveObjectsInStream(int obj_stream_number) | ||
| 1758 | "supposed object stream " + std::to_string(obj_stream_number) + " is not a stream"); | 1692 | "supposed object stream " + std::to_string(obj_stream_number) + " is not a stream"); |
| 1759 | } | 1693 | } |
| 1760 | 1694 | ||
| 1761 | - // For linearization data in the object, use the data from the | ||
| 1762 | - // object stream for the objects in the stream. | 1695 | + // For linearization data in the object, use the data from the object stream for the objects in |
| 1696 | + // the stream. | ||
| 1763 | QPDFObjGen stream_og(obj_stream_number, 0); | 1697 | QPDFObjGen stream_og(obj_stream_number, 0); |
| 1764 | qpdf_offset_t end_before_space = m->obj_cache[stream_og].end_before_space; | 1698 | qpdf_offset_t end_before_space = m->obj_cache[stream_og].end_before_space; |
| 1765 | qpdf_offset_t end_after_space = m->obj_cache[stream_og].end_after_space; | 1699 | qpdf_offset_t end_after_space = m->obj_cache[stream_og].end_after_space; |
| @@ -1804,11 +1738,10 @@ QPDF::resolveObjectsInStream(int obj_stream_number) | @@ -1804,11 +1738,10 @@ QPDF::resolveObjectsInStream(int obj_stream_number) | ||
| 1804 | offsets[num] = toI(offset + first); | 1738 | offsets[num] = toI(offset + first); |
| 1805 | } | 1739 | } |
| 1806 | 1740 | ||
| 1807 | - // To avoid having to read the object stream multiple times, store | ||
| 1808 | - // all objects that would be found here in the cache. Remember | ||
| 1809 | - // that some objects stored here might have been overridden by new | ||
| 1810 | - // objects appended to the file, so it is necessary to recheck the | ||
| 1811 | - // xref table and only cache what would actually be resolved here. | 1741 | + // To avoid having to read the object stream multiple times, store all objects that would be |
| 1742 | + // found here in the cache. Remember that some objects stored here might have been overridden | ||
| 1743 | + // by new objects appended to the file, so it is necessary to recheck the xref table and only | ||
| 1744 | + // cache what would actually be resolved here. | ||
| 1812 | for (auto const& iter: offsets) { | 1745 | for (auto const& iter: offsets) { |
| 1813 | QPDFObjGen og(iter.first, 0); | 1746 | QPDFObjGen og(iter.first, 0); |
| 1814 | QPDFXRefEntry const& entry = m->xref_table[og]; | 1747 | QPDFXRefEntry const& entry = m->xref_table[og]; |
| @@ -1936,8 +1869,7 @@ QPDF::reserveStream(QPDFObjGen const& og) | @@ -1936,8 +1869,7 @@ QPDF::reserveStream(QPDFObjGen const& og) | ||
| 1936 | QPDFObjectHandle | 1869 | QPDFObjectHandle |
| 1937 | QPDF::getObject(QPDFObjGen const& og) | 1870 | QPDF::getObject(QPDFObjGen const& og) |
| 1938 | { | 1871 | { |
| 1939 | - // This method is called by the parser and therefore must not | ||
| 1940 | - // resolve any objects. | 1872 | + // This method is called by the parser and therefore must not resolve any objects. |
| 1941 | if (!isCached(og)) { | 1873 | if (!isCached(og)) { |
| 1942 | m->obj_cache[og] = ObjCache(QPDF_Unresolved::create(this, og), -1, -1); | 1874 | m->obj_cache[og] = ObjCache(QPDF_Unresolved::create(this, og), -1, -1); |
| 1943 | } | 1875 | } |
| @@ -1991,48 +1923,38 @@ QPDF::copyForeignObject(QPDFObjectHandle foreign) | @@ -1991,48 +1923,38 @@ QPDF::copyForeignObject(QPDFObjectHandle foreign) | ||
| 1991 | { | 1923 | { |
| 1992 | // Here's an explanation of what's going on here. | 1924 | // Here's an explanation of what's going on here. |
| 1993 | // | 1925 | // |
| 1994 | - // A QPDFObjectHandle that is an indirect object has an owning | ||
| 1995 | - // QPDF. The object ID and generation refers to an object in the | ||
| 1996 | - // owning QPDF. When we copy the QPDFObjectHandle from a foreign | ||
| 1997 | - // QPDF into the local QPDF, we have to replace all indirect | ||
| 1998 | - // object references with references to the corresponding object | ||
| 1999 | - // in the local file. | 1926 | + // A QPDFObjectHandle that is an indirect object has an owning QPDF. The object ID and |
| 1927 | + // generation refers to an object in the owning QPDF. When we copy the QPDFObjectHandle from a | ||
| 1928 | + // foreign QPDF into the local QPDF, we have to replace all indirect object references with | ||
| 1929 | + // references to the corresponding object in the local file. | ||
| 2000 | // | 1930 | // |
| 2001 | - // To do this, we maintain mappings from foreign object IDs to | ||
| 2002 | - // local object IDs for each foreign QPDF that we are copying | ||
| 2003 | - // from. The mapping is stored in an ObjCopier, which contains a | 1931 | + // To do this, we maintain mappings from foreign object IDs to local object IDs for each foreign |
| 1932 | + // QPDF that we are copying from. The mapping is stored in an ObjCopier, which contains a | ||
| 2004 | // mapping from the foreign ObjGen to the local QPDFObjectHandle. | 1933 | // mapping from the foreign ObjGen to the local QPDFObjectHandle. |
| 2005 | // | 1934 | // |
| 2006 | - // To copy, we do a deep traversal of the foreign object with loop | ||
| 2007 | - // detection to discover all indirect objects that are | ||
| 2008 | - // encountered, stopping at page boundaries. Whenever we encounter | ||
| 2009 | - // an indirect object, we check to see if we have already created | ||
| 2010 | - // a local copy of it. If not, we allocate a "reserved" object | ||
| 2011 | - // (or, for a stream, just a new stream) and store in the map the | 1935 | + // To copy, we do a deep traversal of the foreign object with loop detection to discover all |
| 1936 | + // indirect objects that are encountered, stopping at page boundaries. Whenever we encounter an | ||
| 1937 | + // indirect object, we check to see if we have already created a local copy of it. If not, we | ||
| 1938 | + // allocate a "reserved" object (or, for a stream, just a new stream) and store in the map the | ||
| 2012 | // mapping from the foreign object ID to the new object. While we | 1939 | // mapping from the foreign object ID to the new object. While we |
| 2013 | // do this, we keep a list of objects to copy. | 1940 | // do this, we keep a list of objects to copy. |
| 2014 | // | 1941 | // |
| 2015 | - // Once we are done with the traversal, we copy all the objects | ||
| 2016 | - // that we need to copy. However, the copies will contain indirect | ||
| 2017 | - // object IDs that refer to objects in the foreign file. We need | ||
| 2018 | - // to replace them with references to objects in the local file. | ||
| 2019 | - // This is what replaceForeignIndirectObjects does. Once we have | ||
| 2020 | - // created a copy of the foreign object with all the indirect | ||
| 2021 | - // references replaced with new ones in the local context, we can | ||
| 2022 | - // replace the local reserved object with the copy. This mechanism | ||
| 2023 | - // allows us to copy objects with circular references in any | ||
| 2024 | - // order. | ||
| 2025 | - | ||
| 2026 | - // For streams, rather than copying the objects, we set up the | ||
| 2027 | - // stream data to pull from the original stream by using a stream | ||
| 2028 | - // data provider. This is done in a manner that doesn't require | ||
| 2029 | - // the original QPDF object but may require the original source of | ||
| 2030 | - // the stream data with special handling for immediate_copy_from. | ||
| 2031 | - // This logic is also in replaceForeignIndirectObjects. | ||
| 2032 | - | ||
| 2033 | - // Note that we explicitly allow use of copyForeignObject on page | ||
| 2034 | - // objects. It is a documented use case to copy pages this way if | ||
| 2035 | - // the intention is to not update the pages tree. | 1942 | + // Once we are done with the traversal, we copy all the objects that we need to copy. However, |
| 1943 | + // the copies will contain indirect object IDs that refer to objects in the foreign file. We | ||
| 1944 | + // need to replace them with references to objects in the local file. This is what | ||
| 1945 | + // replaceForeignIndirectObjects does. Once we have created a copy of the foreign object with | ||
| 1946 | + // all the indirect references replaced with new ones in the local context, we can replace the | ||
| 1947 | + // local reserved object with the copy. This mechanism allows us to copy objects with circular | ||
| 1948 | + // references in any order. | ||
| 1949 | + | ||
| 1950 | + // For streams, rather than copying the objects, we set up the stream data to pull from the | ||
| 1951 | + // original stream by using a stream data provider. This is done in a manner that doesn't | ||
| 1952 | + // require the original QPDF object but may require the original source of the stream data with | ||
| 1953 | + // special handling for immediate_copy_from. This logic is also in | ||
| 1954 | + // replaceForeignIndirectObjects. | ||
| 1955 | + | ||
| 1956 | + // Note that we explicitly allow use of copyForeignObject on page objects. It is a documented | ||
| 1957 | + // use case to copy pages this way if the intention is to not update the pages tree. | ||
| 2036 | if (!foreign.isIndirect()) { | 1958 | if (!foreign.isIndirect()) { |
| 2037 | QTC::TC("qpdf", "QPDF copyForeign direct"); | 1959 | QTC::TC("qpdf", "QPDF copyForeign direct"); |
| 2038 | throw std::logic_error("QPDF::copyForeign called with direct object handle"); | 1960 | throw std::logic_error("QPDF::copyForeign called with direct object handle"); |
| @@ -2049,12 +1971,10 @@ QPDF::copyForeignObject(QPDFObjectHandle foreign) | @@ -2049,12 +1971,10 @@ QPDF::copyForeignObject(QPDFObjectHandle foreign) | ||
| 2049 | " at the beginning of copyForeignObject"); | 1971 | " at the beginning of copyForeignObject"); |
| 2050 | } | 1972 | } |
| 2051 | 1973 | ||
| 2052 | - // Make sure we have an object in this file for every referenced | ||
| 2053 | - // object in the old file. obj_copier.object_map maps foreign | ||
| 2054 | - // QPDFObjGen to local objects. For everything new that we have | ||
| 2055 | - // to copy, the local object will be a reservation, unless it is a | ||
| 2056 | - // stream, in which case the local object will already be a | ||
| 2057 | - // stream. | 1974 | + // Make sure we have an object in this file for every referenced object in the old file. |
| 1975 | + // obj_copier.object_map maps foreign QPDFObjGen to local objects. For everything new that we | ||
| 1976 | + // have to copy, the local object will be a reservation, unless it is a stream, in which case | ||
| 1977 | + // the local object will already be a stream. | ||
| 2058 | reserveObjects(foreign, obj_copier, true); | 1978 | reserveObjects(foreign, obj_copier, true); |
| 2059 | 1979 | ||
| 2060 | if (!obj_copier.visiting.empty()) { | 1980 | if (!obj_copier.visiting.empty()) { |
| @@ -2140,8 +2060,8 @@ QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_cop | @@ -2140,8 +2060,8 @@ QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_cop | ||
| 2140 | QTC::TC("qpdf", "QPDF replace indirect"); | 2060 | QTC::TC("qpdf", "QPDF replace indirect"); |
| 2141 | auto mapping = obj_copier.object_map.find(foreign.getObjGen()); | 2061 | auto mapping = obj_copier.object_map.find(foreign.getObjGen()); |
| 2142 | if (mapping == obj_copier.object_map.end()) { | 2062 | if (mapping == obj_copier.object_map.end()) { |
| 2143 | - // This case would occur if this is a reference to a Page | ||
| 2144 | - // or Pages object that we didn't traverse into. | 2063 | + // This case would occur if this is a reference to a Page or Pages object that we didn't |
| 2064 | + // traverse into. | ||
| 2145 | QTC::TC("qpdf", "QPDF replace foreign indirect with null"); | 2065 | QTC::TC("qpdf", "QPDF replace foreign indirect with null"); |
| 2146 | result = QPDFObjectHandle::newNull(); | 2066 | result = QPDFObjectHandle::newNull(); |
| 2147 | } else { | 2067 | } else { |
| @@ -2192,9 +2112,8 @@ QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_cop | @@ -2192,9 +2112,8 @@ QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_cop | ||
| 2192 | void | 2112 | void |
| 2193 | QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign) | 2113 | QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign) |
| 2194 | { | 2114 | { |
| 2195 | - // This method was originally written for copying foreign streams, | ||
| 2196 | - // but it is used by QPDFObjectHandle to copy streams from the | ||
| 2197 | - // same QPDF object as well. | 2115 | + // This method was originally written for copying foreign streams, but it is used by |
| 2116 | + // QPDFObjectHandle to copy streams from the same QPDF object as well. | ||
| 2198 | 2117 | ||
| 2199 | QPDFObjectHandle dict = result.getDict(); | 2118 | QPDFObjectHandle dict = result.getDict(); |
| 2200 | QPDFObjectHandle old_dict = foreign.getDict(); | 2119 | QPDFObjectHandle old_dict = foreign.getDict(); |
| @@ -2204,8 +2123,8 @@ QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign) | @@ -2204,8 +2123,8 @@ QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign) | ||
| 2204 | std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(m->copied_stream_data_provider); | 2123 | std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(m->copied_stream_data_provider); |
| 2205 | } | 2124 | } |
| 2206 | QPDFObjGen local_og(result.getObjGen()); | 2125 | QPDFObjGen local_og(result.getObjGen()); |
| 2207 | - // Copy information from the foreign stream so we can pipe its | ||
| 2208 | - // data later without keeping the original QPDF object around. | 2126 | + // Copy information from the foreign stream so we can pipe its data later without keeping the |
| 2127 | + // original QPDF object around. | ||
| 2209 | 2128 | ||
| 2210 | QPDF& foreign_stream_qpdf = | 2129 | QPDF& foreign_stream_qpdf = |
| 2211 | foreign.getQPDF("unable to retrieve owning qpdf from foreign stream"); | 2130 | foreign.getQPDF("unable to retrieve owning qpdf from foreign stream"); |
| @@ -2217,10 +2136,9 @@ QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign) | @@ -2217,10 +2136,9 @@ QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign) | ||
| 2217 | } | 2136 | } |
| 2218 | std::shared_ptr<Buffer> stream_buffer = stream->getStreamDataBuffer(); | 2137 | std::shared_ptr<Buffer> stream_buffer = stream->getStreamDataBuffer(); |
| 2219 | if ((foreign_stream_qpdf.m->immediate_copy_from) && (stream_buffer == nullptr)) { | 2138 | if ((foreign_stream_qpdf.m->immediate_copy_from) && (stream_buffer == nullptr)) { |
| 2220 | - // Pull the stream data into a buffer before attempting | ||
| 2221 | - // the copy operation. Do it on the source stream so that | ||
| 2222 | - // if the source stream is copied multiple times, we don't | ||
| 2223 | - // have to keep duplicating the memory. | 2139 | + // Pull the stream data into a buffer before attempting the copy operation. Do it on the |
| 2140 | + // source stream so that if the source stream is copied multiple times, we don't have to | ||
| 2141 | + // keep duplicating the memory. | ||
| 2224 | QTC::TC("qpdf", "QPDF immediate copy stream data"); | 2142 | QTC::TC("qpdf", "QPDF immediate copy stream data"); |
| 2225 | foreign.replaceStreamData( | 2143 | foreign.replaceStreamData( |
| 2226 | foreign.getRawStreamData(), | 2144 | foreign.getRawStreamData(), |
| @@ -2263,8 +2181,7 @@ QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2) | @@ -2263,8 +2181,7 @@ QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2) | ||
| 2263 | void | 2181 | void |
| 2264 | QPDF::swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2) | 2182 | QPDF::swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2) |
| 2265 | { | 2183 | { |
| 2266 | - // Force objects to be read from the input source if needed, then | ||
| 2267 | - // swap them in the cache. | 2184 | + // Force objects to be read from the input source if needed, then swap them in the cache. |
| 2268 | resolve(og1); | 2185 | resolve(og1); |
| 2269 | resolve(og2); | 2186 | resolve(og2); |
| 2270 | m->obj_cache[og1].object->swapWith(m->obj_cache[og2].object); | 2187 | m->obj_cache[og1].object->swapWith(m->obj_cache[og2].object); |
| @@ -2338,9 +2255,8 @@ QPDF::getRoot() | @@ -2338,9 +2255,8 @@ QPDF::getRoot() | ||
| 2338 | if (!root.isDictionary()) { | 2255 | if (!root.isDictionary()) { |
| 2339 | throw damagedPDF("", 0, "unable to find /Root dictionary"); | 2256 | throw damagedPDF("", 0, "unable to find /Root dictionary"); |
| 2340 | } else if ( | 2257 | } else if ( |
| 2341 | - // Check_mode is an interim solution to request #810 pending a more | ||
| 2342 | - // comprehensive review of the approach to more extensive checks and | ||
| 2343 | - // warning levels. | 2258 | + // Check_mode is an interim solution to request #810 pending a more comprehensive review of |
| 2259 | + // the approach to more extensive checks and warning levels. | ||
| 2344 | m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) { | 2260 | m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) { |
| 2345 | warn(damagedPDF("", 0, "catalog /Type entry missing or invalid")); | 2261 | warn(damagedPDF("", 0, "catalog /Type entry missing or invalid")); |
| 2346 | root.replaceKey("/Type", "/Catalog"_qpdf); | 2262 | root.replaceKey("/Type", "/Catalog"_qpdf); |
| @@ -2373,14 +2289,11 @@ QPDF::getObjectStreamData(std::map<int, int>& omap) | @@ -2373,14 +2289,11 @@ QPDF::getObjectStreamData(std::map<int, int>& omap) | ||
| 2373 | std::vector<QPDFObjGen> | 2289 | std::vector<QPDFObjGen> |
| 2374 | QPDF::getCompressibleObjGens() | 2290 | QPDF::getCompressibleObjGens() |
| 2375 | { | 2291 | { |
| 2376 | - // Return a list of objects that are allowed to be in object | ||
| 2377 | - // streams. Walk through the objects by traversing the document | ||
| 2378 | - // from the root, including a traversal of the pages tree. This | ||
| 2379 | - // makes that objects that are on the same page are more likely to | ||
| 2380 | - // be in the same object stream, which is slightly more efficient, | ||
| 2381 | - // particularly with linearized files. This is better than | ||
| 2382 | - // iterating through the xref table since it avoids preserving | ||
| 2383 | - // orphaned items. | 2292 | + // Return a list of objects that are allowed to be in object streams. Walk through the objects |
| 2293 | + // by traversing the document from the root, including a traversal of the pages tree. This | ||
| 2294 | + // makes that objects that are on the same page are more likely to be in the same object stream, | ||
| 2295 | + // which is slightly more efficient, particularly with linearized files. This is better than | ||
| 2296 | + // iterating through the xref table since it avoids preserving orphaned items. | ||
| 2384 | 2297 | ||
| 2385 | // Exclude encryption dictionary, if any | 2298 | // Exclude encryption dictionary, if any |
| 2386 | QPDFObjectHandle encryption_dict = m->trailer.getKey("/Encrypt"); | 2299 | QPDFObjectHandle encryption_dict = m->trailer.getKey("/Encrypt"); |
| @@ -2555,9 +2468,8 @@ QPDF::pipeForeignStreamData( | @@ -2555,9 +2468,8 @@ QPDF::pipeForeignStreamData( | ||
| 2555 | will_retry); | 2468 | will_retry); |
| 2556 | } | 2469 | } |
| 2557 | 2470 | ||
| 2558 | -// Throw a generic exception when we lack context for something | ||
| 2559 | -// more specific. New code should not use this. This method exists | ||
| 2560 | -// to improve somewhat from calling assert in very old code. | 2471 | +// Throw a generic exception when we lack context for something more specific. New code should not |
| 2472 | +// use this. This method exists to improve somewhat from calling assert in very old code. | ||
| 2561 | void | 2473 | void |
| 2562 | QPDF::stopOnError(std::string const& message) | 2474 | QPDF::stopOnError(std::string const& message) |
| 2563 | { | 2475 | { |
| @@ -2584,33 +2496,31 @@ QPDF::damagedPDF( | @@ -2584,33 +2496,31 @@ QPDF::damagedPDF( | ||
| 2584 | return damagedPDF(input, m->last_object_description, offset, message); | 2496 | return damagedPDF(input, m->last_object_description, offset, message); |
| 2585 | } | 2497 | } |
| 2586 | 2498 | ||
| 2587 | -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from | ||
| 2588 | -// m->file. | 2499 | +// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file. |
| 2589 | QPDFExc | 2500 | QPDFExc |
| 2590 | QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message) | 2501 | QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message) |
| 2591 | { | 2502 | { |
| 2592 | return QPDFExc(qpdf_e_damaged_pdf, m->file->getName(), object, offset, message); | 2503 | return QPDFExc(qpdf_e_damaged_pdf, m->file->getName(), object, offset, message); |
| 2593 | } | 2504 | } |
| 2594 | 2505 | ||
| 2595 | -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from | ||
| 2596 | -// m->file and the offset from .m->file->getLastOffset(). | 2506 | +// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the |
| 2507 | +// offset from .m->file->getLastOffset(). | ||
| 2597 | QPDFExc | 2508 | QPDFExc |
| 2598 | QPDF::damagedPDF(std::string const& object, std::string const& message) | 2509 | QPDF::damagedPDF(std::string const& object, std::string const& message) |
| 2599 | { | 2510 | { |
| 2600 | return damagedPDF(object, m->file->getLastOffset(), message); | 2511 | return damagedPDF(object, m->file->getLastOffset(), message); |
| 2601 | } | 2512 | } |
| 2602 | 2513 | ||
| 2603 | -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from | ||
| 2604 | -// m->file and the object from .m->last_object_description. | 2514 | +// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object |
| 2515 | +// from .m->last_object_description. | ||
| 2605 | QPDFExc | 2516 | QPDFExc |
| 2606 | QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message) | 2517 | QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message) |
| 2607 | { | 2518 | { |
| 2608 | return damagedPDF(m->last_object_description, offset, message); | 2519 | return damagedPDF(m->last_object_description, offset, message); |
| 2609 | } | 2520 | } |
| 2610 | 2521 | ||
| 2611 | -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from | ||
| 2612 | -// m->file, the object from m->last_object_description and the offset from | ||
| 2613 | -// m->file->getLastOffset(). | 2522 | +// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file, the object |
| 2523 | +// from m->last_object_description and the offset from m->file->getLastOffset(). | ||
| 2614 | QPDFExc | 2524 | QPDFExc |
| 2615 | QPDF::damagedPDF(std::string const& message) | 2525 | QPDF::damagedPDF(std::string const& message) |
| 2616 | { | 2526 | { |
libqpdf/QPDFAcroFormDocumentHelper.cc
| @@ -15,9 +15,8 @@ QPDFAcroFormDocumentHelper::QPDFAcroFormDocumentHelper(QPDF& qpdf) : | @@ -15,9 +15,8 @@ QPDFAcroFormDocumentHelper::QPDFAcroFormDocumentHelper(QPDF& qpdf) : | ||
| 15 | QPDFDocumentHelper(qpdf), | 15 | QPDFDocumentHelper(qpdf), |
| 16 | m(new Members()) | 16 | m(new Members()) |
| 17 | { | 17 | { |
| 18 | - // We have to analyze up front. Otherwise, when we are adding | ||
| 19 | - // annotations and fields, we are in a temporarily unstable | ||
| 20 | - // configuration where some widget annotations are not reachable. | 18 | + // We have to analyze up front. Otherwise, when we are adding annotations and fields, we are in |
| 19 | + // a temporarily unstable configuration where some widget annotations are not reachable. | ||
| 21 | analyze(); | 20 | analyze(); |
| 22 | } | 21 | } |
| 23 | 22 | ||
| @@ -77,14 +76,11 @@ QPDFAcroFormDocumentHelper::addAndRenameFormFields(std::vector<QPDFObjectHandle> | @@ -77,14 +76,11 @@ QPDFAcroFormDocumentHelper::addAndRenameFormFields(std::vector<QPDFObjectHandle> | ||
| 77 | } | 76 | } |
| 78 | 77 | ||
| 79 | if (obj.hasKey("/T")) { | 78 | if (obj.hasKey("/T")) { |
| 80 | - // Find something we can append to the partial name that | ||
| 81 | - // makes the fully qualified name unique. When we find | ||
| 82 | - // something, reuse the same suffix for all fields in this | ||
| 83 | - // group with the same name. We can only change the name | ||
| 84 | - // of fields that have /T, and this field's /T is always | ||
| 85 | - // at the end of the fully qualified name, appending to /T | ||
| 86 | - // has the effect of appending the same thing to the fully | ||
| 87 | - // qualified name. | 79 | + // Find something we can append to the partial name that makes the fully qualified |
| 80 | + // name unique. When we find something, reuse the same suffix for all fields in this | ||
| 81 | + // group with the same name. We can only change the name of fields that have /T, and | ||
| 82 | + // this field's /T is always at the end of the fully qualified name, appending to /T | ||
| 83 | + // has the effect of appending the same thing to the fully qualified name. | ||
| 88 | std::string old_name = QPDFFormFieldObjectHelper(obj).getFullyQualifiedName(); | 84 | std::string old_name = QPDFFormFieldObjectHelper(obj).getFullyQualifiedName(); |
| 89 | if (renames.count(old_name) == 0) { | 85 | if (renames.count(old_name) == 0) { |
| 90 | std::string new_name = old_name; | 86 | std::string new_name = old_name; |
| @@ -253,8 +249,7 @@ QPDFAcroFormDocumentHelper::analyze() | @@ -253,8 +249,7 @@ QPDFAcroFormDocumentHelper::analyze() | ||
| 253 | fields = QPDFObjectHandle::newArray(); | 249 | fields = QPDFObjectHandle::newArray(); |
| 254 | } | 250 | } |
| 255 | 251 | ||
| 256 | - // Traverse /AcroForm to find annotations and map them | ||
| 257 | - // bidirectionally to fields. | 252 | + // Traverse /AcroForm to find annotations and map them bidirectionally to fields. |
| 258 | 253 | ||
| 259 | QPDFObjGen::set visited; | 254 | QPDFObjGen::set visited; |
| 260 | int nfields = fields.getArrayNItems(); | 255 | int nfields = fields.getArrayNItems(); |
| @@ -263,12 +258,10 @@ QPDFAcroFormDocumentHelper::analyze() | @@ -263,12 +258,10 @@ QPDFAcroFormDocumentHelper::analyze() | ||
| 263 | traverseField(fields.getArrayItem(i), null, 0, visited); | 258 | traverseField(fields.getArrayItem(i), null, 0, visited); |
| 264 | } | 259 | } |
| 265 | 260 | ||
| 266 | - // All Widget annotations should have been encountered by | ||
| 267 | - // traversing /AcroForm, but in case any weren't, find them by | ||
| 268 | - // walking through pages, and treat any widget annotation that is | ||
| 269 | - // not associated with a field as its own field. This just ensures | ||
| 270 | - // that requesting the field for any annotation we find through a | ||
| 271 | - // page's /Annots list will have some associated field. Note that | 261 | + // All Widget annotations should have been encountered by traversing /AcroForm, but in case any |
| 262 | + // weren't, find them by walking through pages, and treat any widget annotation that is not | ||
| 263 | + // associated with a field as its own field. This just ensures that requesting the field for any | ||
| 264 | + // annotation we find through a page's /Annots list will have some associated field. Note that | ||
| 272 | // a file that contains this kind of error will probably not | 265 | // a file that contains this kind of error will probably not |
| 273 | // actually work with most viewers. | 266 | // actually work with most viewers. |
| 274 | 267 | ||
| @@ -278,13 +271,11 @@ QPDFAcroFormDocumentHelper::analyze() | @@ -278,13 +271,11 @@ QPDFAcroFormDocumentHelper::analyze() | ||
| 278 | QPDFObjGen og(annot.getObjGen()); | 271 | QPDFObjGen og(annot.getObjGen()); |
| 279 | if (m->annotation_to_field.count(og) == 0) { | 272 | if (m->annotation_to_field.count(og) == 0) { |
| 280 | QTC::TC("qpdf", "QPDFAcroFormDocumentHelper orphaned widget"); | 273 | QTC::TC("qpdf", "QPDFAcroFormDocumentHelper orphaned widget"); |
| 281 | - // This is not supposed to happen, but it's easy | ||
| 282 | - // enough for us to handle this case. Treat the | ||
| 283 | - // annotation as its own field. This could allow qpdf | ||
| 284 | - // to sensibly handle a case such as a PDF creator | ||
| 285 | - // adding a self-contained annotation (merged with the | ||
| 286 | - // field dictionary) to the page's /Annots array and | ||
| 287 | - // forgetting to also put it in /AcroForm. | 274 | + // This is not supposed to happen, but it's easy enough for us to handle this case. |
| 275 | + // Treat the annotation as its own field. This could allow qpdf to sensibly handle a | ||
| 276 | + // case such as a PDF creator adding a self-contained annotation (merged with the | ||
| 277 | + // field dictionary) to the page's /Annots array and forgetting to also put it in | ||
| 278 | + // /AcroForm. | ||
| 288 | annot.warnIfPossible("this widget annotation is not" | 279 | annot.warnIfPossible("this widget annotation is not" |
| 289 | " reachable from /AcroForm in the document catalog"); | 280 | " reachable from /AcroForm in the document catalog"); |
| 290 | m->annotation_to_field[og] = QPDFFormFieldObjectHelper(annot); | 281 | m->annotation_to_field[og] = QPDFFormFieldObjectHelper(annot); |
| @@ -299,14 +290,14 @@ QPDFAcroFormDocumentHelper::traverseField( | @@ -299,14 +290,14 @@ QPDFAcroFormDocumentHelper::traverseField( | ||
| 299 | QPDFObjectHandle field, QPDFObjectHandle parent, int depth, QPDFObjGen::set& visited) | 290 | QPDFObjectHandle field, QPDFObjectHandle parent, int depth, QPDFObjGen::set& visited) |
| 300 | { | 291 | { |
| 301 | if (depth > 100) { | 292 | if (depth > 100) { |
| 302 | - // Arbitrarily cut off recursion at a fixed depth to avoid | ||
| 303 | - // specially crafted files that could cause stack overflow. | 293 | + // Arbitrarily cut off recursion at a fixed depth to avoid specially crafted files that |
| 294 | + // could cause stack overflow. | ||
| 304 | return; | 295 | return; |
| 305 | } | 296 | } |
| 306 | if (!field.isIndirect()) { | 297 | if (!field.isIndirect()) { |
| 307 | QTC::TC("qpdf", "QPDFAcroFormDocumentHelper direct field"); | 298 | QTC::TC("qpdf", "QPDFAcroFormDocumentHelper direct field"); |
| 308 | - field.warnIfPossible("encountered a direct object as a field or annotation while" | ||
| 309 | - " traversing /AcroForm; ignoring field or annotation"); | 299 | + field.warnIfPossible("encountered a direct object as a field or annotation while " |
| 300 | + "traversing /AcroForm; ignoring field or annotation"); | ||
| 310 | return; | 301 | return; |
| 311 | } | 302 | } |
| 312 | if (!field.isDictionary()) { | 303 | if (!field.isDictionary()) { |
| @@ -322,13 +313,11 @@ QPDFAcroFormDocumentHelper::traverseField( | @@ -322,13 +313,11 @@ QPDFAcroFormDocumentHelper::traverseField( | ||
| 322 | return; | 313 | return; |
| 323 | } | 314 | } |
| 324 | 315 | ||
| 325 | - // A dictionary encountered while traversing the /AcroForm field | ||
| 326 | - // may be a form field, an annotation, or the merger of the two. A | ||
| 327 | - // field that has no fields below it is a terminal. If a terminal | ||
| 328 | - // field looks like an annotation, it is an annotation because | ||
| 329 | - // annotation dictionary fields can be merged with terminal field | ||
| 330 | - // dictionaries. Otherwise, the annotation fields might be there | ||
| 331 | - // to be inherited by annotations below it. | 316 | + // A dictionary encountered while traversing the /AcroForm field may be a form field, an |
| 317 | + // annotation, or the merger of the two. A field that has no fields below it is a terminal. If a | ||
| 318 | + // terminal field looks like an annotation, it is an annotation because annotation dictionary | ||
| 319 | + // fields can be merged with terminal field dictionaries. Otherwise, the annotation fields might | ||
| 320 | + // be there to be inherited by annotations below it. | ||
| 332 | 321 | ||
| 333 | bool is_annotation = false; | 322 | bool is_annotation = false; |
| 334 | bool is_field = (0 == depth); | 323 | bool is_field = (0 == depth); |
| @@ -363,8 +352,7 @@ QPDFAcroFormDocumentHelper::traverseField( | @@ -363,8 +352,7 @@ QPDFAcroFormDocumentHelper::traverseField( | ||
| 363 | std::string name = foh.getFullyQualifiedName(); | 352 | std::string name = foh.getFullyQualifiedName(); |
| 364 | auto old = m->field_to_name.find(f_og); | 353 | auto old = m->field_to_name.find(f_og); |
| 365 | if (old != m->field_to_name.end()) { | 354 | if (old != m->field_to_name.end()) { |
| 366 | - // We might be updating after a name change, so remove any | ||
| 367 | - // old information | 355 | + // We might be updating after a name change, so remove any old information |
| 368 | std::string old_name = old->second; | 356 | std::string old_name = old->second; |
| 369 | m->name_to_fields[old_name].erase(f_og); | 357 | m->name_to_fields[old_name].erase(f_og); |
| 370 | } | 358 | } |
| @@ -412,11 +400,9 @@ QPDFAcroFormDocumentHelper::generateAppearancesIfNeeded() | @@ -412,11 +400,9 @@ QPDFAcroFormDocumentHelper::generateAppearancesIfNeeded() | ||
| 412 | for (auto& aoh: getWidgetAnnotationsForPage(page)) { | 400 | for (auto& aoh: getWidgetAnnotationsForPage(page)) { |
| 413 | QPDFFormFieldObjectHelper ffh = getFieldForAnnotation(aoh); | 401 | QPDFFormFieldObjectHelper ffh = getFieldForAnnotation(aoh); |
| 414 | if (ffh.getFieldType() == "/Btn") { | 402 | if (ffh.getFieldType() == "/Btn") { |
| 415 | - // Rather than generating appearances for button | ||
| 416 | - // fields, rely on what's already there. Just make | ||
| 417 | - // sure /AS is consistent with /V, which we can do by | ||
| 418 | - // resetting the value of the field back to itself. | ||
| 419 | - // This code is referenced in a comment in | 403 | + // Rather than generating appearances for button fields, rely on what's already |
| 404 | + // there. Just make sure /AS is consistent with /V, which we can do by resetting the | ||
| 405 | + // value of the field back to itself. This code is referenced in a comment in | ||
| 420 | // QPDFFormFieldObjectHelper::generateAppearance. | 406 | // QPDFFormFieldObjectHelper::generateAppearance. |
| 421 | if (ffh.isRadioButton() || ffh.isCheckbox()) { | 407 | if (ffh.isRadioButton() || ffh.isCheckbox()) { |
| 422 | ffh.setV(ffh.getValue()); | 408 | ffh.setV(ffh.getValue()); |
| @@ -437,16 +423,13 @@ QPDFAcroFormDocumentHelper::adjustInheritedFields( | @@ -437,16 +423,13 @@ QPDFAcroFormDocumentHelper::adjustInheritedFields( | ||
| 437 | bool override_q, | 423 | bool override_q, |
| 438 | int from_default_q) | 424 | int from_default_q) |
| 439 | { | 425 | { |
| 440 | - // Override /Q or /DA if needed. If this object has a field type, | ||
| 441 | - // directly or inherited, it is a field and not just an | ||
| 442 | - // annotation. In that case, we need to override if we are getting | ||
| 443 | - // a value from the document that is different from the value we | ||
| 444 | - // would have gotten from the old document. We must take care not | ||
| 445 | - // to override an explicit value. It's possible that /FT may be | ||
| 446 | - // inherited by lower fields that may explicitly set /DA or /Q or | ||
| 447 | - // that this is a field whose type does not require /DA or /Q and | ||
| 448 | - // we may be put a value on the field that is unused. This is | ||
| 449 | - // harmless, so it's not worth trying to work around. | 426 | + // Override /Q or /DA if needed. If this object has a field type, directly or inherited, it is a |
| 427 | + // field and not just an annotation. In that case, we need to override if we are getting a value | ||
| 428 | + // from the document that is different from the value we would have gotten from the old | ||
| 429 | + // document. We must take care not to override an explicit value. It's possible that /FT may be | ||
| 430 | + // inherited by lower fields that may explicitly set /DA or /Q or that this is a field whose | ||
| 431 | + // type does not require /DA or /Q and we may be put a value on the field that is unused. This | ||
| 432 | + // is harmless, so it's not worth trying to work around. | ||
| 450 | 433 | ||
| 451 | auto has_explicit = [](QPDFFormFieldObjectHelper& field, std::string const& key) { | 434 | auto has_explicit = [](QPDFFormFieldObjectHelper& field, std::string const& key) { |
| 452 | if (field.getObjectHandle().hasKey(key)) { | 435 | if (field.getObjectHandle().hasKey(key)) { |
| @@ -550,45 +533,36 @@ void | @@ -550,45 +533,36 @@ void | ||
| 550 | QPDFAcroFormDocumentHelper::adjustDefaultAppearances( | 533 | QPDFAcroFormDocumentHelper::adjustDefaultAppearances( |
| 551 | QPDFObjectHandle obj, std::map<std::string, std::map<std::string, std::string>> const& dr_map) | 534 | QPDFObjectHandle obj, std::map<std::string, std::map<std::string, std::string>> const& dr_map) |
| 552 | { | 535 | { |
| 553 | - // This method is called on a field that has been copied from | ||
| 554 | - // another file but whose /DA still refers to resources in the | ||
| 555 | - // original file's /DR. | ||
| 556 | - | ||
| 557 | - // When appearance streams are generated for variable text fields | ||
| 558 | - // (see ISO 32000 PDF spec section 12.7.3.3), the field's /DA is | ||
| 559 | - // used to generate content of the appearance stream. /DA contains | ||
| 560 | - // references to resources that may be resolved in the document's | ||
| 561 | - // /DR dictionary, which appears in the document's /AcroForm | ||
| 562 | - // dictionary. For fields that we copied from other documents, we | ||
| 563 | - // need to ensure that resources are mapped correctly in the case | ||
| 564 | - // of conflicting names. For example, if a.pdf's /DR has /F1 | ||
| 565 | - // pointing to one font and b.pdf's /DR also has /F1 but it points | ||
| 566 | - // elsewhere, we need to make sure appearance streams of fields | ||
| 567 | - // copied from b.pdf into a.pdf use whatever font /F1 meant in | ||
| 568 | - // b.pdf, not whatever it means in a.pdf. This method takes care | ||
| 569 | - // of that. It is only called on fields copied from foreign files. | 536 | + // This method is called on a field that has been copied from another file but whose /DA still |
| 537 | + // refers to resources in the original file's /DR. | ||
| 538 | + | ||
| 539 | + // When appearance streams are generated for variable text fields (see ISO 32000 PDF spec | ||
| 540 | + // section 12.7.3.3), the field's /DA is used to generate content of the appearance stream. /DA | ||
| 541 | + // contains references to resources that may be resolved in the document's /DR dictionary, which | ||
| 542 | + // appears in the document's /AcroForm dictionary. For fields that we copied from other | ||
| 543 | + // documents, we need to ensure that resources are mapped correctly in the case of conflicting | ||
| 544 | + // names. For example, if a.pdf's /DR has /F1 pointing to one font and b.pdf's /DR also has /F1 | ||
| 545 | + // but it points elsewhere, we need to make sure appearance streams of fields copied from b.pdf | ||
| 546 | + // into a.pdf use whatever font /F1 meant in b.pdf, not whatever it means in a.pdf. This method | ||
| 547 | + // takes care of that. It is only called on fields copied from foreign files. | ||
| 570 | 548 | ||
| 571 | // A few notes: | 549 | // A few notes: |
| 572 | // | 550 | // |
| 573 | - // * If the from document's /DR and the current document's /DR | ||
| 574 | - // have conflicting keys, we have already resolved the conflicts | ||
| 575 | - // before calling this method. The dr_map parameter contains the | ||
| 576 | - // mapping from old keys to new keys. | 551 | + // * If the from document's /DR and the current document's /DR have conflicting keys, we have |
| 552 | + // already resolved the conflicts before calling this method. The dr_map parameter contains | ||
| 553 | + // the mapping from old keys to new keys. | ||
| 577 | // | 554 | // |
| 578 | - // * /DA may be inherited from the document's /AcroForm | ||
| 579 | - // dictionary. By the time this method has been called, we have | ||
| 580 | - // already copied any document-level values into the fields to | ||
| 581 | - // avoid having them inherit from the new document. This was | ||
| 582 | - // done in adjustInheritedFields. | 555 | + // * /DA may be inherited from the document's /AcroForm dictionary. By the time this method has |
| 556 | + // been called, we have already copied any document-level values into the fields to avoid | ||
| 557 | + // having them inherit from the new document. This was done in adjustInheritedFields. | ||
| 583 | 558 | ||
| 584 | auto DA = obj.getKey("/DA"); | 559 | auto DA = obj.getKey("/DA"); |
| 585 | if (!DA.isString()) { | 560 | if (!DA.isString()) { |
| 586 | return; | 561 | return; |
| 587 | } | 562 | } |
| 588 | 563 | ||
| 589 | - // Find names in /DA. /DA is a string that contains content | ||
| 590 | - // stream-like code, so we create a stream out of the string and | ||
| 591 | - // then filter it. We don't attach the stream to anything, so it | 564 | + // Find names in /DA. /DA is a string that contains content stream-like code, so we create a |
| 565 | + // stream out of the string and then filter it. We don't attach the stream to anything, so it | ||
| 592 | // will get discarded. | 566 | // will get discarded. |
| 593 | ResourceFinder rf; | 567 | ResourceFinder rf; |
| 594 | auto da_stream = QPDFObjectHandle::newStream(&this->qpdf, DA.getUTF8Value()); | 568 | auto da_stream = QPDFObjectHandle::newStream(&this->qpdf, DA.getUTF8Value()); |
| @@ -599,8 +573,8 @@ QPDFAcroFormDocumentHelper::adjustDefaultAppearances( | @@ -599,8 +573,8 @@ QPDFAcroFormDocumentHelper::adjustDefaultAppearances( | ||
| 599 | QTC::TC("qpdf", "QPDFAcroFormDocumentHelper /DA parse error"); | 573 | QTC::TC("qpdf", "QPDFAcroFormDocumentHelper /DA parse error"); |
| 600 | } | 574 | } |
| 601 | } catch (std::exception& e) { | 575 | } catch (std::exception& e) { |
| 602 | - // No way to reproduce in test suite right now since error | ||
| 603 | - // conditions are converted to warnings. | 576 | + // No way to reproduce in test suite right now since error conditions are converted to |
| 577 | + // warnings. | ||
| 604 | obj.warnIfPossible( | 578 | obj.warnIfPossible( |
| 605 | std::string("Unable to parse /DA: ") + e.what() + | 579 | std::string("Unable to parse /DA: ") + e.what() + |
| 606 | "; this form field may not update properly"); | 580 | "; this form field may not update properly"); |
| @@ -620,15 +594,12 @@ void | @@ -620,15 +594,12 @@ void | ||
| 620 | QPDFAcroFormDocumentHelper::adjustAppearanceStream( | 594 | QPDFAcroFormDocumentHelper::adjustAppearanceStream( |
| 621 | QPDFObjectHandle stream, std::map<std::string, std::map<std::string, std::string>> dr_map) | 595 | QPDFObjectHandle stream, std::map<std::string, std::map<std::string, std::string>> dr_map) |
| 622 | { | 596 | { |
| 623 | - // We don't have to modify appearance streams or their resource | ||
| 624 | - // dictionaries for them to display properly, but we need to do so | ||
| 625 | - // to make them save to regenerate. Suppose an appearance stream | ||
| 626 | - // as a font /F1 that is different from /F1 in /DR, and that when | ||
| 627 | - // we copy the field, /F1 is remapped to /F1_1. When the field is | ||
| 628 | - // regenerated, /F1_1 won't appear in the stream's resource | ||
| 629 | - // dictionary, so the regenerated appearance stream will revert to | ||
| 630 | - // the /F1_1 in /DR. If we adjust existing appearance streams, we | ||
| 631 | - // are protected from this problem. | 597 | + // We don't have to modify appearance streams or their resource dictionaries for them to display |
| 598 | + // properly, but we need to do so to make them save to regenerate. Suppose an appearance stream | ||
| 599 | + // as a font /F1 that is different from /F1 in /DR, and that when we copy the field, /F1 is | ||
| 600 | + // remapped to /F1_1. When the field is regenerated, /F1_1 won't appear in the stream's resource | ||
| 601 | + // dictionary, so the regenerated appearance stream will revert to the /F1_1 in /DR. If we | ||
| 602 | + // adjust existing appearance streams, we are protected from this problem. | ||
| 632 | 603 | ||
| 633 | auto dict = stream.getDict(); | 604 | auto dict = stream.getDict(); |
| 634 | auto resources = dict.getKey("/Resources"); | 605 | auto resources = dict.getKey("/Resources"); |
| @@ -640,17 +611,15 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream( | @@ -640,17 +611,15 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream( | ||
| 640 | resources = this->qpdf.makeIndirectObject(resources); | 611 | resources = this->qpdf.makeIndirectObject(resources); |
| 641 | } | 612 | } |
| 642 | dict.replaceKey("/Resources", resources); | 613 | dict.replaceKey("/Resources", resources); |
| 643 | - // Create a dictionary with top-level keys so we can use | ||
| 644 | - // mergeResources to force them to be unshared. We will also use | ||
| 645 | - // this to resolve conflicts that may already be in the resource | 614 | + // Create a dictionary with top-level keys so we can use mergeResources to force them to be |
| 615 | + // unshared. We will also use this to resolve conflicts that may already be in the resource | ||
| 646 | // dictionary. | 616 | // dictionary. |
| 647 | auto merge_with = QPDFObjectHandle::newDictionary(); | 617 | auto merge_with = QPDFObjectHandle::newDictionary(); |
| 648 | for (auto const& top_key: dr_map) { | 618 | for (auto const& top_key: dr_map) { |
| 649 | merge_with.replaceKey(top_key.first, QPDFObjectHandle::newDictionary()); | 619 | merge_with.replaceKey(top_key.first, QPDFObjectHandle::newDictionary()); |
| 650 | } | 620 | } |
| 651 | resources.mergeResources(merge_with); | 621 | resources.mergeResources(merge_with); |
| 652 | - // Rename any keys in the resource dictionary that we | ||
| 653 | - // remapped. | 622 | + // Rename any keys in the resource dictionary that we remapped. |
| 654 | for (auto const& i1: dr_map) { | 623 | for (auto const& i1: dr_map) { |
| 655 | std::string const& top_key = i1.first; | 624 | std::string const& top_key = i1.first; |
| 656 | auto subdict = resources.getKey(top_key); | 625 | auto subdict = resources.getKey(top_key); |
| @@ -662,12 +631,10 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream( | @@ -662,12 +631,10 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream( | ||
| 662 | std::string const& new_key = i2.second; | 631 | std::string const& new_key = i2.second; |
| 663 | auto existing_new = subdict.getKey(new_key); | 632 | auto existing_new = subdict.getKey(new_key); |
| 664 | if (!existing_new.isNull()) { | 633 | if (!existing_new.isNull()) { |
| 665 | - // The resource dictionary already has a key in it | ||
| 666 | - // matching what we remapped an old key to, so we'll | ||
| 667 | - // have to move it out of the way. Stick it in | ||
| 668 | - // merge_with, which we will re-merge with the | ||
| 669 | - // dictionary when we're done. We know merge_with | ||
| 670 | - // already has dictionaries for all the top keys. | 634 | + // The resource dictionary already has a key in it matching what we remapped an old |
| 635 | + // key to, so we'll have to move it out of the way. Stick it in merge_with, which we | ||
| 636 | + // will re-merge with the dictionary when we're done. We know merge_with already has | ||
| 637 | + // dictionaries for all the top keys. | ||
| 671 | QTC::TC("qpdf", "QPDFAcroFormDocumentHelper ap conflict"); | 638 | QTC::TC("qpdf", "QPDFAcroFormDocumentHelper ap conflict"); |
| 672 | merge_with.getKey(top_key).replaceKey(new_key, existing_new); | 639 | merge_with.getKey(top_key).replaceKey(new_key, existing_new); |
| 673 | } | 640 | } |
| @@ -679,9 +646,8 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream( | @@ -679,9 +646,8 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream( | ||
| 679 | } | 646 | } |
| 680 | } | 647 | } |
| 681 | } | 648 | } |
| 682 | - // Deal with any any conflicts by re-merging with merge_with and | ||
| 683 | - // updating our local copy of dr_map, which we will use to modify | ||
| 684 | - // the stream contents. | 649 | + // Deal with any any conflicts by re-merging with merge_with and updating our local copy of |
| 650 | + // dr_map, which we will use to modify the stream contents. | ||
| 685 | resources.mergeResources(merge_with, &dr_map); | 651 | resources.mergeResources(merge_with, &dr_map); |
| 686 | // Remove empty subdictionaries | 652 | // Remove empty subdictionaries |
| 687 | for (auto iter: resources.ditems()) { | 653 | for (auto iter: resources.ditems()) { |
| @@ -702,8 +668,8 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream( | @@ -702,8 +668,8 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream( | ||
| 702 | auto tf = std::shared_ptr<QPDFObjectHandle::TokenFilter>(rr); | 668 | auto tf = std::shared_ptr<QPDFObjectHandle::TokenFilter>(rr); |
| 703 | stream.addTokenFilter(tf); | 669 | stream.addTokenFilter(tf); |
| 704 | } catch (std::exception& e) { | 670 | } catch (std::exception& e) { |
| 705 | - // No way to reproduce in test suite right now since error | ||
| 706 | - // conditions are converted to warnings. | 671 | + // No way to reproduce in test suite right now since error conditions are converted to |
| 672 | + // warnings. | ||
| 707 | stream.warnIfPossible(std::string("Unable to parse appearance stream: ") + e.what()); | 673 | stream.warnIfPossible(std::string("Unable to parse appearance stream: ") + e.what()); |
| 708 | } | 674 | } |
| 709 | } | 675 | } |
| @@ -729,24 +695,22 @@ QPDFAcroFormDocumentHelper::transformAnnotations( | @@ -729,24 +695,22 @@ QPDFAcroFormDocumentHelper::transformAnnotations( | ||
| 729 | } | 695 | } |
| 730 | bool foreign = (from_qpdf != &this->qpdf); | 696 | bool foreign = (from_qpdf != &this->qpdf); |
| 731 | 697 | ||
| 732 | - // It's possible that we will transform annotations that don't | ||
| 733 | - // include any form fields. This code takes care not to muck | ||
| 734 | - // around with /AcroForm unless we have to. | 698 | + // It's possible that we will transform annotations that don't include any form fields. This |
| 699 | + // code takes care not to muck around with /AcroForm unless we have to. | ||
| 735 | 700 | ||
| 736 | QPDFObjectHandle acroform = this->qpdf.getRoot().getKey("/AcroForm"); | 701 | QPDFObjectHandle acroform = this->qpdf.getRoot().getKey("/AcroForm"); |
| 737 | QPDFObjectHandle from_acroform = from_qpdf->getRoot().getKey("/AcroForm"); | 702 | QPDFObjectHandle from_acroform = from_qpdf->getRoot().getKey("/AcroForm"); |
| 738 | 703 | ||
| 739 | - // /DA and /Q may be inherited from the document-level /AcroForm | ||
| 740 | - // dictionary. If we are copying a foreign stream and the stream | ||
| 741 | - // is getting one of these values from its document's /AcroForm, | ||
| 742 | - // we will need to copy the value explicitly so that it doesn't | ||
| 743 | - // start getting its default from the destination document. | 704 | + // /DA and /Q may be inherited from the document-level /AcroForm dictionary. If we are copying a |
| 705 | + // foreign stream and the stream is getting one of these values from its document's /AcroForm, | ||
| 706 | + // we will need to copy the value explicitly so that it doesn't start getting its default from | ||
| 707 | + // the destination document. | ||
| 744 | bool override_da = false; | 708 | bool override_da = false; |
| 745 | bool override_q = false; | 709 | bool override_q = false; |
| 746 | std::string from_default_da; | 710 | std::string from_default_da; |
| 747 | int from_default_q = 0; | 711 | int from_default_q = 0; |
| 748 | - // If we copy any form fields, we will need to merge the source | ||
| 749 | - // document's /DR into this document's /DR. | 712 | + // If we copy any form fields, we will need to merge the source document's /DR into this |
| 713 | + // document's /DR. | ||
| 750 | QPDFObjectHandle from_dr = QPDFObjectHandle::newNull(); | 714 | QPDFObjectHandle from_dr = QPDFObjectHandle::newNull(); |
| 751 | if (foreign) { | 715 | if (foreign) { |
| 752 | std::string default_da; | 716 | std::string default_da; |
| @@ -782,9 +746,8 @@ QPDFAcroFormDocumentHelper::transformAnnotations( | @@ -782,9 +746,8 @@ QPDFAcroFormDocumentHelper::transformAnnotations( | ||
| 782 | } | 746 | } |
| 783 | } | 747 | } |
| 784 | 748 | ||
| 785 | - // If we have to merge /DR, we will need a mapping of conflicting | ||
| 786 | - // keys for rewriting /DA. Set this up for lazy initialization in | ||
| 787 | - // case we encounter any form fields. | 749 | + // If we have to merge /DR, we will need a mapping of conflicting keys for rewriting /DA. Set |
| 750 | + // this up for lazy initialization in case we encounter any form fields. | ||
| 788 | std::map<std::string, std::map<std::string, std::string>> dr_map; | 751 | std::map<std::string, std::map<std::string, std::string>> dr_map; |
| 789 | bool initialized_dr_map = false; | 752 | bool initialized_dr_map = false; |
| 790 | QPDFObjectHandle dr = QPDFObjectHandle::newNull(); | 753 | QPDFObjectHandle dr = QPDFObjectHandle::newNull(); |
| @@ -804,11 +767,9 @@ QPDFAcroFormDocumentHelper::transformAnnotations( | @@ -804,11 +767,9 @@ QPDFAcroFormDocumentHelper::transformAnnotations( | ||
| 804 | if (!dr.isIndirect()) { | 767 | if (!dr.isIndirect()) { |
| 805 | dr = acroform.replaceKeyAndGetNew("/DR", this->qpdf.makeIndirectObject(dr)); | 768 | dr = acroform.replaceKeyAndGetNew("/DR", this->qpdf.makeIndirectObject(dr)); |
| 806 | } | 769 | } |
| 807 | - // Merge the other document's /DR, creating a conflict | ||
| 808 | - // map. mergeResources checks to make sure both objects | ||
| 809 | - // are dictionaries. By this point, if this is foreign, | ||
| 810 | - // from_dr has been copied, so we use the target qpdf as | ||
| 811 | - // the owning qpdf. | 770 | + // Merge the other document's /DR, creating a conflict map. mergeResources checks to |
| 771 | + // make sure both objects are dictionaries. By this point, if this is foreign, from_dr | ||
| 772 | + // has been copied, so we use the target qpdf as the owning qpdf. | ||
| 812 | from_dr.makeResourcesIndirect(this->qpdf); | 773 | from_dr.makeResourcesIndirect(this->qpdf); |
| 813 | dr.mergeResources(from_dr, &dr_map); | 774 | dr.mergeResources(from_dr, &dr_map); |
| 814 | 775 | ||
| @@ -818,8 +779,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations( | @@ -818,8 +779,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations( | ||
| 818 | } | 779 | } |
| 819 | }; | 780 | }; |
| 820 | 781 | ||
| 821 | - // This helper prevents us from copying the same object | ||
| 822 | - // multiple times. | 782 | + // This helper prevents us from copying the same object multiple times. |
| 823 | std::map<QPDFObjGen, QPDFObjectHandle> orig_to_copy; | 783 | std::map<QPDFObjGen, QPDFObjectHandle> orig_to_copy; |
| 824 | auto maybe_copy_object = [&](QPDFObjectHandle& to_copy) { | 784 | auto maybe_copy_object = [&](QPDFObjectHandle& to_copy) { |
| 825 | auto og = to_copy.getObjGen(); | 785 | auto og = to_copy.getObjGen(); |
| @@ -842,39 +802,28 @@ QPDFAcroFormDocumentHelper::transformAnnotations( | @@ -842,39 +802,28 @@ QPDFAcroFormDocumentHelper::transformAnnotations( | ||
| 842 | continue; | 802 | continue; |
| 843 | } | 803 | } |
| 844 | 804 | ||
| 845 | - // Make copies of annotations and fields down to the | ||
| 846 | - // appearance streams, preserving all internal referential | ||
| 847 | - // integrity. When the incoming annotations are from a | ||
| 848 | - // different file, we first copy them locally. Then, whether | ||
| 849 | - // local or foreign, we copy them again so that if we bring | ||
| 850 | - // the same annotation in multiple times (e.g. overlaying a | ||
| 851 | - // foreign page onto multiple local pages or a local page onto | ||
| 852 | - // multiple other local pages), we don't create annotations | ||
| 853 | - // that are referenced in more than one place. If we did that, | ||
| 854 | - // the effect of applying transformations would be cumulative, | ||
| 855 | - // which is definitely not what we want. Besides, annotations | ||
| 856 | - // and fields are not intended to be referenced in multiple | ||
| 857 | - // places. | ||
| 858 | - | ||
| 859 | - // Determine if this annotation is attached to a form field. | ||
| 860 | - // If so, the annotation may be the same object as the form | ||
| 861 | - // field, or the form field may have the annotation as a kid. | ||
| 862 | - // In either case, we have to walk up the field structure to | ||
| 863 | - // find the top-level field. Within one iteration through a | ||
| 864 | - // set of annotations, we don't want to copy the same item | ||
| 865 | - // more than once. For example, suppose we have field A with | ||
| 866 | - // kids B, C, and D, each of which has annotations BA, CA, and | ||
| 867 | - // DA. When we get to BA, we will find that BA is a kid of B | ||
| 868 | - // which is under A. When we do a copyForeignObject of A, it | ||
| 869 | - // will also copy everything else because of the indirect | ||
| 870 | - // references. When we clone BA, we will want to clone A and | ||
| 871 | - // then update A's clone's kid to point B's clone and B's | ||
| 872 | - // clone's parent to point to A's clone. The same thing holds | ||
| 873 | - // for annotations. Next, when we get to CA, we will again | ||
| 874 | - // discover that A is the top, but we don't want to re-copy A. | ||
| 875 | - // We want CA's clone to be linked to the same clone as BA's. | ||
| 876 | - // Failure to do this will break up things like radio button | ||
| 877 | - // groups, which all have to kids of the same parent. | 805 | + // Make copies of annotations and fields down to the appearance streams, preserving all |
| 806 | + // internal referential integrity. When the incoming annotations are from a different file, | ||
| 807 | + // we first copy them locally. Then, whether local or foreign, we copy them again so that if | ||
| 808 | + // we bring the same annotation in multiple times (e.g. overlaying a foreign page onto | ||
| 809 | + // multiple local pages or a local page onto multiple other local pages), we don't create | ||
| 810 | + // annotations that are referenced in more than one place. If we did that, the effect of | ||
| 811 | + // applying transformations would be cumulative, which is definitely not what we want. | ||
| 812 | + // Besides, annotations and fields are not intended to be referenced in multiple places. | ||
| 813 | + | ||
| 814 | + // Determine if this annotation is attached to a form field. If so, the annotation may be | ||
| 815 | + // the same object as the form field, or the form field may have the annotation as a kid. In | ||
| 816 | + // either case, we have to walk up the field structure to find the top-level field. Within | ||
| 817 | + // one iteration through a set of annotations, we don't want to copy the same item more than | ||
| 818 | + // once. For example, suppose we have field A with kids B, C, and D, each of which has | ||
| 819 | + // annotations BA, CA, and DA. When we get to BA, we will find that BA is a kid of B which | ||
| 820 | + // is under A. When we do a copyForeignObject of A, it will also copy everything else | ||
| 821 | + // because of the indirect references. When we clone BA, we will want to clone A and then | ||
| 822 | + // update A's clone's kid to point B's clone and B's clone's parent to point to A's clone. | ||
| 823 | + // The same thing holds for annotations. Next, when we get to CA, we will again discover | ||
| 824 | + // that A is the top, but we don't want to re-copy A. We want CA's clone to be linked to the | ||
| 825 | + // same clone as BA's. Failure to do this will break up things like radio button groups, | ||
| 826 | + // which all have to kids of the same parent. | ||
| 878 | 827 | ||
| 879 | auto ffield = from_afdh->getFieldForAnnotation(annot); | 828 | auto ffield = from_afdh->getFieldForAnnotation(annot); |
| 880 | auto ffield_oh = ffield.getObjectHandle(); | 829 | auto ffield_oh = ffield.getObjectHandle(); |
| @@ -886,36 +835,29 @@ QPDFAcroFormDocumentHelper::transformAnnotations( | @@ -886,36 +835,29 @@ QPDFAcroFormDocumentHelper::transformAnnotations( | ||
| 886 | } else if ((!ffield_oh.isNull()) && (!ffield_oh.isIndirect())) { | 835 | } else if ((!ffield_oh.isNull()) && (!ffield_oh.isIndirect())) { |
| 887 | ffield_oh.warnIfPossible("ignoring form field not indirect"); | 836 | ffield_oh.warnIfPossible("ignoring form field not indirect"); |
| 888 | } else if (!ffield_oh.isNull()) { | 837 | } else if (!ffield_oh.isNull()) { |
| 889 | - // A field and its associated annotation can be the same | ||
| 890 | - // object. This matters because we don't want to clone the | ||
| 891 | - // annotation and field separately in this case. | 838 | + // A field and its associated annotation can be the same object. This matters because we |
| 839 | + // don't want to clone the annotation and field separately in this case. | ||
| 892 | have_field = true; | 840 | have_field = true; |
| 893 | // Find the top-level field. It may be the field itself. | 841 | // Find the top-level field. It may be the field itself. |
| 894 | top_field = ffield.getTopLevelField(&have_parent).getObjectHandle(); | 842 | top_field = ffield.getTopLevelField(&have_parent).getObjectHandle(); |
| 895 | if (foreign) { | 843 | if (foreign) { |
| 896 | - // copyForeignObject returns the same value if called | ||
| 897 | - // multiple times with the same field. Create/retrieve | ||
| 898 | - // the local copy of the original field. This pulls | ||
| 899 | - // over everything the field references including | ||
| 900 | - // annotations and appearance streams, but it's | ||
| 901 | - // harmless to call copyForeignObject on them too. | ||
| 902 | - // They will already be copied, so we'll get the right | ||
| 903 | - // object back. | 844 | + // copyForeignObject returns the same value if called multiple times with the same |
| 845 | + // field. Create/retrieve the local copy of the original field. This pulls over | ||
| 846 | + // everything the field references including annotations and appearance streams, but | ||
| 847 | + // it's harmless to call copyForeignObject on them too. They will already be copied, | ||
| 848 | + // so we'll get the right object back. | ||
| 904 | 849 | ||
| 905 | // top_field and ffield_oh are known to be indirect. | 850 | // top_field and ffield_oh are known to be indirect. |
| 906 | top_field = this->qpdf.copyForeignObject(top_field); | 851 | top_field = this->qpdf.copyForeignObject(top_field); |
| 907 | ffield_oh = this->qpdf.copyForeignObject(ffield_oh); | 852 | ffield_oh = this->qpdf.copyForeignObject(ffield_oh); |
| 908 | } else { | 853 | } else { |
| 909 | - // We don't need to add top_field to old_fields if | ||
| 910 | - // it's foreign because the new copy of the foreign | ||
| 911 | - // field won't be referenced anywhere. It's just the | ||
| 912 | - // starting point for us to make an additional local | ||
| 913 | - // copy of. | 854 | + // We don't need to add top_field to old_fields if it's foreign because the new copy |
| 855 | + // of the foreign field won't be referenced anywhere. It's just the starting point | ||
| 856 | + // for us to make an additional local copy of. | ||
| 914 | old_fields.insert(top_field.getObjGen()); | 857 | old_fields.insert(top_field.getObjGen()); |
| 915 | } | 858 | } |
| 916 | 859 | ||
| 917 | - // Traverse the field, copying kids, and preserving | ||
| 918 | - // integrity. | 860 | + // Traverse the field, copying kids, and preserving integrity. |
| 919 | std::list<QPDFObjectHandle> queue; | 861 | std::list<QPDFObjectHandle> queue; |
| 920 | QPDFObjGen::set seen; | 862 | QPDFObjGen::set seen; |
| 921 | if (maybe_copy_object(top_field)) { | 863 | if (maybe_copy_object(top_field)) { |
| @@ -933,8 +875,8 @@ QPDFAcroFormDocumentHelper::transformAnnotations( | @@ -933,8 +875,8 @@ QPDFAcroFormDocumentHelper::transformAnnotations( | ||
| 933 | parent.warnIfPossible( | 875 | parent.warnIfPossible( |
| 934 | "while traversing field " + obj.getObjGen().unparse(',') + | 876 | "while traversing field " + obj.getObjGen().unparse(',') + |
| 935 | ", found parent (" + parent_og.unparse(',') + | 877 | ", found parent (" + parent_og.unparse(',') + |
| 936 | - ") that had not been seen, indicating likely" | ||
| 937 | - " invalid field structure"); | 878 | + ") that had not been seen, indicating likely invalid field " |
| 879 | + "structure"); | ||
| 938 | } | 880 | } |
| 939 | } | 881 | } |
| 940 | auto kids = obj.getKey("/Kids"); | 882 | auto kids = obj.getKey("/Kids"); |
| @@ -955,17 +897,13 @@ QPDFAcroFormDocumentHelper::transformAnnotations( | @@ -955,17 +897,13 @@ QPDFAcroFormDocumentHelper::transformAnnotations( | ||
| 955 | if (foreign) { | 897 | if (foreign) { |
| 956 | // Lazily initialize our /DR and the conflict map. | 898 | // Lazily initialize our /DR and the conflict map. |
| 957 | init_dr_map(); | 899 | init_dr_map(); |
| 958 | - // The spec doesn't say anything about /DR on the | ||
| 959 | - // field, but lots of writers put one there, and | ||
| 960 | - // it is frequently the same as the document-level | ||
| 961 | - // /DR. To avoid having the field's /DR point to | ||
| 962 | - // information that we are not maintaining, just | ||
| 963 | - // reset it to that if it exists. Empirical | ||
| 964 | - // evidence suggests that many readers, including | ||
| 965 | - // Acrobat, Adobe Acrobat Reader, chrome, firefox, | ||
| 966 | - // the mac Preview application, and several of the | ||
| 967 | - // free readers on Linux all ignore /DR at the | ||
| 968 | - // field level. | 900 | + // The spec doesn't say anything about /DR on the field, but lots of writers |
| 901 | + // put one there, and it is frequently the same as the document-level /DR. | ||
| 902 | + // To avoid having the field's /DR point to information that we are not | ||
| 903 | + // maintaining, just reset it to that if it exists. Empirical evidence | ||
| 904 | + // suggests that many readers, including Acrobat, Adobe Acrobat Reader, | ||
| 905 | + // chrome, firefox, the mac Preview application, and several of the free | ||
| 906 | + // readers on Linux all ignore /DR at the field level. | ||
| 969 | if (obj.hasKey("/DR")) { | 907 | if (obj.hasKey("/DR")) { |
| 970 | obj.replaceKey("/DR", dr); | 908 | obj.replaceKey("/DR", dr); |
| 971 | } | 909 | } |
| @@ -1029,8 +967,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations( | @@ -1029,8 +967,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations( | ||
| 1029 | } | 967 | } |
| 1030 | } | 968 | } |
| 1031 | 969 | ||
| 1032 | - // Now we can safely mutate the annotation and its appearance | ||
| 1033 | - // streams. | 970 | + // Now we can safely mutate the annotation and its appearance streams. |
| 1034 | for (auto& stream: streams) { | 971 | for (auto& stream: streams) { |
| 1035 | auto dict = stream.getDict(); | 972 | auto dict = stream.getDict(); |
| 1036 | auto omatrix = dict.getKey("/Matrix"); | 973 | auto omatrix = dict.getKey("/Matrix"); |
libqpdf/QPDFFormFieldObjectHelper.cc
| @@ -310,15 +310,15 @@ QPDFFormFieldObjectHelper::setV(QPDFObjectHandle value, bool need_appearances) | @@ -310,15 +310,15 @@ QPDFFormFieldObjectHelper::setV(QPDFObjectHandle value, bool need_appearances) | ||
| 310 | } | 310 | } |
| 311 | } | 311 | } |
| 312 | if (!okay) { | 312 | if (!okay) { |
| 313 | - this->oh.warnIfPossible("ignoring attempt to set a checkbox field to a" | ||
| 314 | - " value of other than /Yes or /Off"); | 313 | + this->oh.warnIfPossible("ignoring attempt to set a checkbox field to a value of " |
| 314 | + "other than /Yes or /Off"); | ||
| 315 | } | 315 | } |
| 316 | } else if (isRadioButton()) { | 316 | } else if (isRadioButton()) { |
| 317 | if (value.isName()) { | 317 | if (value.isName()) { |
| 318 | setRadioButtonValue(value); | 318 | setRadioButtonValue(value); |
| 319 | } else { | 319 | } else { |
| 320 | - this->oh.warnIfPossible("ignoring attempt to set a radio button field to" | ||
| 321 | - " an object that is not a name"); | 320 | + this->oh.warnIfPossible( |
| 321 | + "ignoring attempt to set a radio button field to an object that is not a name"); | ||
| 322 | } | 322 | } |
| 323 | } else if (isPushbutton()) { | 323 | } else if (isPushbutton()) { |
| 324 | this->oh.warnIfPossible("ignoring attempt set the value of a pushbutton field"); | 324 | this->oh.warnIfPossible("ignoring attempt set the value of a pushbutton field"); |
| @@ -347,24 +347,19 @@ QPDFFormFieldObjectHelper::setV(std::string const& utf8_value, bool need_appeara | @@ -347,24 +347,19 @@ QPDFFormFieldObjectHelper::setV(std::string const& utf8_value, bool need_appeara | ||
| 347 | void | 347 | void |
| 348 | QPDFFormFieldObjectHelper::setRadioButtonValue(QPDFObjectHandle name) | 348 | QPDFFormFieldObjectHelper::setRadioButtonValue(QPDFObjectHandle name) |
| 349 | { | 349 | { |
| 350 | - // Set the value of a radio button field. This has the following | ||
| 351 | - // specific behavior: | ||
| 352 | - // * If this is a radio button field that has a parent that is | ||
| 353 | - // also a radio button field and has no explicit /V, call itself | ||
| 354 | - // on the parent | ||
| 355 | - // * If this is a radio button field with children, set /V to the | ||
| 356 | - // given value. Then, for each child, if the child has the | ||
| 357 | - // specified value as one of its keys in the /N subdictionary of | ||
| 358 | - // its /AP (i.e. its normal appearance stream dictionary), set | ||
| 359 | - // /AS to name; otherwise, if /Off is a member, set /AS to /Off. | ||
| 360 | - // Note that we never turn on /NeedAppearances when setting a | ||
| 361 | - // radio button field. | 350 | + // Set the value of a radio button field. This has the following specific behavior: |
| 351 | + // * If this is a radio button field that has a parent that is also a radio button field and has | ||
| 352 | + // no explicit /V, call itself on the parent | ||
| 353 | + // * If this is a radio button field with children, set /V to the given value. Then, for each | ||
| 354 | + // child, if the child has the specified value as one of its keys in the /N subdictionary of | ||
| 355 | + // its /AP (i.e. its normal appearance stream dictionary), set /AS to name; otherwise, if /Off | ||
| 356 | + // is a member, set /AS to /Off. | ||
| 357 | + // Note that we never turn on /NeedAppearances when setting a radio button field. | ||
| 362 | QPDFObjectHandle parent = this->oh.getKey("/Parent"); | 358 | QPDFObjectHandle parent = this->oh.getKey("/Parent"); |
| 363 | if (parent.isDictionary() && parent.getKey("/Parent").isNull()) { | 359 | if (parent.isDictionary() && parent.getKey("/Parent").isNull()) { |
| 364 | QPDFFormFieldObjectHelper ph(parent); | 360 | QPDFFormFieldObjectHelper ph(parent); |
| 365 | if (ph.isRadioButton()) { | 361 | if (ph.isRadioButton()) { |
| 366 | - // This is most likely one of the individual buttons. Try | ||
| 367 | - // calling on the parent. | 362 | + // This is most likely one of the individual buttons. Try calling on the parent. |
| 368 | QTC::TC("qpdf", "QPDFFormFieldObjectHelper set parent radio button"); | 363 | QTC::TC("qpdf", "QPDFFormFieldObjectHelper set parent radio button"); |
| 369 | ph.setRadioButtonValue(name); | 364 | ph.setRadioButtonValue(name); |
| 370 | return; | 365 | return; |
| @@ -384,8 +379,7 @@ QPDFFormFieldObjectHelper::setRadioButtonValue(QPDFObjectHandle name) | @@ -384,8 +379,7 @@ QPDFFormFieldObjectHelper::setRadioButtonValue(QPDFObjectHandle name) | ||
| 384 | QPDFObjectHandle AP = kid.getKey("/AP"); | 379 | QPDFObjectHandle AP = kid.getKey("/AP"); |
| 385 | QPDFObjectHandle annot; | 380 | QPDFObjectHandle annot; |
| 386 | if (AP.isNull()) { | 381 | if (AP.isNull()) { |
| 387 | - // The widget may be below. If there is more than one, | ||
| 388 | - // just find the first one. | 382 | + // The widget may be below. If there is more than one, just find the first one. |
| 389 | QPDFObjectHandle grandkids = kid.getKey("/Kids"); | 383 | QPDFObjectHandle grandkids = kid.getKey("/Kids"); |
| 390 | if (grandkids.isArray()) { | 384 | if (grandkids.isArray()) { |
| 391 | int ngrandkids = grandkids.getArrayNItems(); | 385 | int ngrandkids = grandkids.getArrayNItems(); |
| @@ -458,9 +452,8 @@ void | @@ -458,9 +452,8 @@ void | ||
| 458 | QPDFFormFieldObjectHelper::generateAppearance(QPDFAnnotationObjectHelper& aoh) | 452 | QPDFFormFieldObjectHelper::generateAppearance(QPDFAnnotationObjectHelper& aoh) |
| 459 | { | 453 | { |
| 460 | std::string ft = getFieldType(); | 454 | std::string ft = getFieldType(); |
| 461 | - // Ignore field types we don't know how to generate appearances | ||
| 462 | - // for. Button fields don't really need them -- see code in | ||
| 463 | - // QPDFAcroFormDocumentHelper::generateAppearancesIfNeeded. | 455 | + // Ignore field types we don't know how to generate appearances for. Button fields don't really |
| 456 | + // need them -- see code in QPDFAcroFormDocumentHelper::generateAppearancesIfNeeded. | ||
| 464 | if ((ft == "/Tx") || (ft == "/Ch")) { | 457 | if ((ft == "/Tx") || (ft == "/Ch")) { |
| 465 | generateTextAppearance(aoh); | 458 | generateTextAppearance(aoh); |
| 466 | } | 459 | } |
| @@ -562,15 +555,13 @@ ValueSetter::writeAppearance() | @@ -562,15 +555,13 @@ ValueSetter::writeAppearance() | ||
| 562 | { | 555 | { |
| 563 | this->replaced = true; | 556 | this->replaced = true; |
| 564 | 557 | ||
| 565 | - // This code does not take quadding into consideration because | ||
| 566 | - // doing so requires font metric information, which we don't | ||
| 567 | - // have in many cases. | 558 | + // This code does not take quadding into consideration because doing so requires font metric |
| 559 | + // information, which we don't have in many cases. | ||
| 568 | 560 | ||
| 569 | double tfh = 1.2 * tf; | 561 | double tfh = 1.2 * tf; |
| 570 | int dx = 1; | 562 | int dx = 1; |
| 571 | 563 | ||
| 572 | - // Write one or more lines, centered vertically, possibly with | ||
| 573 | - // one row highlighted. | 564 | + // Write one or more lines, centered vertically, possibly with one row highlighted. |
| 574 | 565 | ||
| 575 | auto max_rows = static_cast<size_t>((bbox.ury - bbox.lly) / tfh); | 566 | auto max_rows = static_cast<size_t>((bbox.ury - bbox.lly) / tfh); |
| 576 | bool highlight = false; | 567 | bool highlight = false; |
| @@ -591,8 +582,7 @@ ValueSetter::writeAppearance() | @@ -591,8 +582,7 @@ ValueSetter::writeAppearance() | ||
| 591 | } | 582 | } |
| 592 | } | 583 | } |
| 593 | if (found) { | 584 | if (found) { |
| 594 | - // Try to make the found item the second one, but | ||
| 595 | - // adjust for under/overflow. | 585 | + // Try to make the found item the second one, but adjust for under/overflow. |
| 596 | int wanted_first = QIntC::to_int(found_idx) - 1; | 586 | int wanted_first = QIntC::to_int(found_idx) - 1; |
| 597 | int wanted_last = QIntC::to_int(found_idx + max_rows) - 2; | 587 | int wanted_last = QIntC::to_int(found_idx + max_rows) - 2; |
| 598 | QTC::TC("qpdf", "QPDFFormFieldObjectHelper list found"); | 588 | QTC::TC("qpdf", "QPDFFormFieldObjectHelper list found"); |
| @@ -639,9 +629,8 @@ ValueSetter::writeAppearance() | @@ -639,9 +629,8 @@ ValueSetter::writeAppearance() | ||
| 639 | dy -= tf; | 629 | dy -= tf; |
| 640 | write("q\nBT\n" + DA + "\n"); | 630 | write("q\nBT\n" + DA + "\n"); |
| 641 | for (size_t i = 0; i < nlines; ++i) { | 631 | for (size_t i = 0; i < nlines; ++i) { |
| 642 | - // We could adjust Tm to translate to the beginning the first | ||
| 643 | - // line, set TL to tfh, and use T* for each subsequent line, | ||
| 644 | - // but doing this would require extracting any Tm from DA, | 632 | + // We could adjust Tm to translate to the beginning the first line, set TL to tfh, and use |
| 633 | + // T* for each subsequent line, but doing this would require extracting any Tm from DA, | ||
| 645 | // which doesn't seem really worth the effort. | 634 | // which doesn't seem really worth the effort. |
| 646 | if (i == 0) { | 635 | if (i == 0) { |
| 647 | write( | 636 | write( |
| @@ -708,8 +697,8 @@ TfFinder::handleToken(QPDFTokenizer::Token const& token) | @@ -708,8 +697,8 @@ TfFinder::handleToken(QPDFTokenizer::Token const& token) | ||
| 708 | case QPDFTokenizer::tt_word: | 697 | case QPDFTokenizer::tt_word: |
| 709 | if (token.isWord("Tf")) { | 698 | if (token.isWord("Tf")) { |
| 710 | if ((last_num > 1.0) && (last_num < 1000.0)) { | 699 | if ((last_num > 1.0) && (last_num < 1000.0)) { |
| 711 | - // These ranges are arbitrary but keep us from doing | ||
| 712 | - // insane things or suffering from over/underflow | 700 | + // These ranges are arbitrary but keep us from doing insane things or suffering from |
| 701 | + // over/underflow | ||
| 713 | tf = last_num; | 702 | tf = last_num; |
| 714 | } | 703 | } |
| 715 | tf_idx = last_num_idx; | 704 | tf_idx = last_num_idx; |
| @@ -738,8 +727,7 @@ TfFinder::getDA() | @@ -738,8 +727,7 @@ TfFinder::getDA() | ||
| 738 | if (QIntC::to_int(i) == tf_idx) { | 727 | if (QIntC::to_int(i) == tf_idx) { |
| 739 | double delta = strtod(cur.c_str(), nullptr) - this->tf; | 728 | double delta = strtod(cur.c_str(), nullptr) - this->tf; |
| 740 | if ((delta > 0.001) || (delta < -0.001)) { | 729 | if ((delta > 0.001) || (delta < -0.001)) { |
| 741 | - // tf doesn't match the font size passed to Tf, so | ||
| 742 | - // substitute. | 730 | + // tf doesn't match the font size passed to Tf, so substitute. |
| 743 | QTC::TC("qpdf", "QPDFFormFieldObjectHelper fallback Tf"); | 731 | QTC::TC("qpdf", "QPDFFormFieldObjectHelper fallback Tf"); |
| 744 | cur = QUtil::double_to_string(tf); | 732 | cur = QUtil::double_to_string(tf); |
| 745 | } | 733 | } |
| @@ -852,6 +840,5 @@ QPDFFormFieldObjectHelper::generateTextAppearance(QPDFAnnotationObjectHelper& ao | @@ -852,6 +840,5 @@ QPDFFormFieldObjectHelper::generateTextAppearance(QPDFAnnotationObjectHelper& ao | ||
| 852 | } | 840 | } |
| 853 | 841 | ||
| 854 | AS.addTokenFilter( | 842 | AS.addTokenFilter( |
| 855 | - // line-break | ||
| 856 | std::shared_ptr<QPDFObjectHandle::TokenFilter>(new ValueSetter(DA, V, opt, tf, bbox))); | 843 | std::shared_ptr<QPDFObjectHandle::TokenFilter>(new ValueSetter(DA, V, opt, tf, bbox))); |
| 857 | } | 844 | } |
libqpdf/QPDFJob.cc
| @@ -130,8 +130,8 @@ ImageOptimizer::makePipeline(std::string const& description, Pipeline* next) | @@ -130,8 +130,8 @@ ImageOptimizer::makePipeline(std::string const& description, Pipeline* next) | ||
| 130 | if (!(w_obj.isNumber() && h_obj.isNumber())) { | 130 | if (!(w_obj.isNumber() && h_obj.isNumber())) { |
| 131 | if (!description.empty()) { | 131 | if (!description.empty()) { |
| 132 | o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { | 132 | o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { |
| 133 | - v << prefix << ": " << description << ": not optimizing because image dictionary" | ||
| 134 | - << " is missing required keys\n"; | 133 | + v << prefix << ": " << description |
| 134 | + << ": not optimizing because image dictionary is missing required keys\n"; | ||
| 135 | }); | 135 | }); |
| 136 | } | 136 | } |
| 137 | return result; | 137 | return result; |
| @@ -142,14 +142,13 @@ ImageOptimizer::makePipeline(std::string const& description, Pipeline* next) | @@ -142,14 +142,13 @@ ImageOptimizer::makePipeline(std::string const& description, Pipeline* next) | ||
| 142 | if (!description.empty()) { | 142 | if (!description.empty()) { |
| 143 | o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { | 143 | o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { |
| 144 | v << prefix << ": " << description | 144 | v << prefix << ": " << description |
| 145 | - << ": not optimizing because image has other than" | ||
| 146 | - << " 8 bits per component\n"; | 145 | + << ": not optimizing because image has other than 8 bits per component\n"; |
| 147 | }); | 146 | }); |
| 148 | } | 147 | } |
| 149 | return result; | 148 | return result; |
| 150 | } | 149 | } |
| 151 | - // Files have been seen in the wild whose width and height are | ||
| 152 | - // floating point, which is goofy, but we can deal with it. | 150 | + // Files have been seen in the wild whose width and height are floating point, which is goofy, |
| 151 | + // but we can deal with it. | ||
| 153 | JDIMENSION w = 0; | 152 | JDIMENSION w = 0; |
| 154 | if (w_obj.isInteger()) { | 153 | if (w_obj.isInteger()) { |
| 155 | w = w_obj.getUIntValueAsUInt(); | 154 | w = w_obj.getUIntValueAsUInt(); |
| @@ -178,8 +177,8 @@ ImageOptimizer::makePipeline(std::string const& description, Pipeline* next) | @@ -178,8 +177,8 @@ ImageOptimizer::makePipeline(std::string const& description, Pipeline* next) | ||
| 178 | QTC::TC("qpdf", "QPDFJob image optimize colorspace"); | 177 | QTC::TC("qpdf", "QPDFJob image optimize colorspace"); |
| 179 | if (!description.empty()) { | 178 | if (!description.empty()) { |
| 180 | o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { | 179 | o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { |
| 181 | - v << prefix << ": " << description << ": not optimizing because qpdf can't optimize" | ||
| 182 | - << " images with this colorspace\n"; | 180 | + v << prefix << ": " << description |
| 181 | + << ": not optimizing because qpdf can't optimize images with this colorspace\n"; | ||
| 183 | }); | 182 | }); |
| 184 | } | 183 | } |
| 185 | return result; | 184 | return result; |
| @@ -190,8 +189,9 @@ ImageOptimizer::makePipeline(std::string const& description, Pipeline* next) | @@ -190,8 +189,9 @@ ImageOptimizer::makePipeline(std::string const& description, Pipeline* next) | ||
| 190 | QTC::TC("qpdf", "QPDFJob image optimize too small"); | 189 | QTC::TC("qpdf", "QPDFJob image optimize too small"); |
| 191 | if (!description.empty()) { | 190 | if (!description.empty()) { |
| 192 | o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { | 191 | o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { |
| 193 | - v << prefix << ": " << description << ": not optimizing because image" | ||
| 194 | - << " is smaller than requested minimum dimensions\n"; | 192 | + v << prefix << ": " << description |
| 193 | + << ": not optimizing because image is smaller than requested minimum " | ||
| 194 | + "dimensions\n"; | ||
| 195 | }); | 195 | }); |
| 196 | } | 196 | } |
| 197 | return result; | 197 | return result; |
| @@ -207,8 +207,8 @@ ImageOptimizer::evaluate(std::string const& description) | @@ -207,8 +207,8 @@ ImageOptimizer::evaluate(std::string const& description) | ||
| 207 | if (!image.pipeStreamData(nullptr, 0, qpdf_dl_specialized, true)) { | 207 | if (!image.pipeStreamData(nullptr, 0, qpdf_dl_specialized, true)) { |
| 208 | QTC::TC("qpdf", "QPDFJob image optimize no pipeline"); | 208 | QTC::TC("qpdf", "QPDFJob image optimize no pipeline"); |
| 209 | o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { | 209 | o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { |
| 210 | - v << prefix << ": " << description << ": not optimizing because unable to decode data" | ||
| 211 | - << " or data already uses DCT\n"; | 210 | + v << prefix << ": " << description |
| 211 | + << ": not optimizing because unable to decode data or data already uses DCT\n"; | ||
| 212 | }); | 212 | }); |
| 213 | return false; | 213 | return false; |
| 214 | } | 214 | } |
| @@ -227,8 +227,7 @@ ImageOptimizer::evaluate(std::string const& description) | @@ -227,8 +227,7 @@ ImageOptimizer::evaluate(std::string const& description) | ||
| 227 | QTC::TC("qpdf", "QPDFJob image optimize no shrink"); | 227 | QTC::TC("qpdf", "QPDFJob image optimize no shrink"); |
| 228 | o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { | 228 | o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { |
| 229 | v << prefix << ": " << description | 229 | v << prefix << ": " << description |
| 230 | - << ": not optimizing because DCT compression does not" | ||
| 231 | - << " reduce image size\n"; | 230 | + << ": not optimizing because DCT compression does not reduce image size\n"; |
| 232 | }); | 231 | }); |
| 233 | return false; | 232 | return false; |
| 234 | } | 233 | } |
| @@ -245,8 +244,8 @@ ImageOptimizer::provideStreamData(QPDFObjGen const&, Pipeline* pipeline) | @@ -245,8 +244,8 @@ ImageOptimizer::provideStreamData(QPDFObjGen const&, Pipeline* pipeline) | ||
| 245 | std::shared_ptr<Pipeline> p = makePipeline("", pipeline); | 244 | std::shared_ptr<Pipeline> p = makePipeline("", pipeline); |
| 246 | if (p == nullptr) { | 245 | if (p == nullptr) { |
| 247 | // Should not be possible | 246 | // Should not be possible |
| 248 | - image.warnIfPossible("unable to create pipeline after previous" | ||
| 249 | - " success; image data will be lost"); | 247 | + image.warnIfPossible( |
| 248 | + "unable to create pipeline after previous success; image data will be lost"); | ||
| 250 | pipeline->finish(); | 249 | pipeline->finish(); |
| 251 | return; | 250 | return; |
| 252 | } | 251 | } |
| @@ -441,8 +440,7 @@ QPDFJob::createQPDF() | @@ -441,8 +440,7 @@ QPDFJob::createQPDF() | ||
| 441 | processFile(pdf_sp, m->infilename.get(), m->password.get(), true, true); | 440 | processFile(pdf_sp, m->infilename.get(), m->password.get(), true, true); |
| 442 | } catch (QPDFExc& e) { | 441 | } catch (QPDFExc& e) { |
| 443 | if (e.getErrorCode() == qpdf_e_password) { | 442 | if (e.getErrorCode() == qpdf_e_password) { |
| 444 | - // Allow certain operations to work when an incorrect | ||
| 445 | - // password is supplied. | 443 | + // Allow certain operations to work when an incorrect password is supplied. |
| 446 | if (m->check_is_encrypted || m->check_requires_password) { | 444 | if (m->check_is_encrypted || m->check_requires_password) { |
| 447 | m->encryption_status = qpdf_es_encrypted | qpdf_es_password_incorrect; | 445 | m->encryption_status = qpdf_es_encrypted | qpdf_es_password_incorrect; |
| 448 | return nullptr; | 446 | return nullptr; |
| @@ -464,8 +462,8 @@ QPDFJob::createQPDF() | @@ -464,8 +462,8 @@ QPDFJob::createQPDF() | ||
| 464 | return nullptr; | 462 | return nullptr; |
| 465 | } | 463 | } |
| 466 | 464 | ||
| 467 | - // If we are updating from JSON, this has to be done first before | ||
| 468 | - // other options may cause transformations to the input. | 465 | + // If we are updating from JSON, this has to be done first before other options may cause |
| 466 | + // transformations to the input. | ||
| 469 | if (!m->update_from_json.empty()) { | 467 | if (!m->update_from_json.empty()) { |
| 470 | pdf.updateFromJSON(m->update_from_json); | 468 | pdf.updateFromJSON(m->update_from_json); |
| 471 | } | 469 | } |
| @@ -497,16 +495,16 @@ QPDFJob::writeQPDF(QPDF& pdf) | @@ -497,16 +495,16 @@ QPDFJob::writeQPDF(QPDF& pdf) | ||
| 497 | } | 495 | } |
| 498 | if (m->warnings && (!m->suppress_warnings)) { | 496 | if (m->warnings && (!m->suppress_warnings)) { |
| 499 | if (createsOutput()) { | 497 | if (createsOutput()) { |
| 500 | - *m->log->getWarn() << m->message_prefix << ": operation succeeded with warnings;" | ||
| 501 | - << " resulting file may have some problems\n"; | 498 | + *m->log->getWarn() |
| 499 | + << m->message_prefix | ||
| 500 | + << ": operation succeeded with warnings; resulting file may have some problems\n"; | ||
| 502 | } else { | 501 | } else { |
| 503 | *m->log->getWarn() << m->message_prefix << ": operation succeeded with warnings\n"; | 502 | *m->log->getWarn() << m->message_prefix << ": operation succeeded with warnings\n"; |
| 504 | } | 503 | } |
| 505 | } | 504 | } |
| 506 | if (m->report_mem_usage) { | 505 | if (m->report_mem_usage) { |
| 507 | - // Call get_max_memory_usage before generating output. When | ||
| 508 | - // debugging, it's easier if print statements from | ||
| 509 | - // get_max_memory_usage are not interleaved with the output. | 506 | + // Call get_max_memory_usage before generating output. When debugging, it's easier if print |
| 507 | + // statements from get_max_memory_usage are not interleaved with the output. | ||
| 510 | auto mem_usage = QUtil::get_max_memory_usage(); | 508 | auto mem_usage = QUtil::get_max_memory_usage(); |
| 511 | *m->log->getWarn() << "qpdf-max-memory-usage " << mem_usage << "\n"; | 509 | *m->log->getWarn() << "qpdf-max-memory-usage " << mem_usage << "\n"; |
| 512 | } | 510 | } |
| @@ -568,16 +566,13 @@ QPDFJob::getExitCode() const | @@ -568,16 +566,13 @@ QPDFJob::getExitCode() const | ||
| 568 | void | 566 | void |
| 569 | QPDFJob::checkConfiguration() | 567 | QPDFJob::checkConfiguration() |
| 570 | { | 568 | { |
| 571 | - // Do final checks for command-line consistency. (I always think | ||
| 572 | - // this is called doFinalChecks, so I'm putting that in a | ||
| 573 | - // comment.) | 569 | + // Do final checks for command-line consistency. (I always think this is called doFinalChecks, |
| 570 | + // so I'm putting that in a comment.) | ||
| 574 | 571 | ||
| 575 | if (m->replace_input) { | 572 | if (m->replace_input) { |
| 576 | - // Check for --empty appears later after we have checked | ||
| 577 | - // m->infilename. | 573 | + // Check for --empty appears later after we have checked m->infilename. |
| 578 | if (m->outfilename) { | 574 | if (m->outfilename) { |
| 579 | - usage("--replace-input may not be used when" | ||
| 580 | - " an output file is specified"); | 575 | + usage("--replace-input may not be used when an output file is specified"); |
| 581 | } else if (m->split_pages) { | 576 | } else if (m->split_pages) { |
| 582 | usage("--split-pages may not be used with --replace-input"); | 577 | usage("--split-pages may not be used with --replace-input"); |
| 583 | } else if (m->json_version) { | 578 | } else if (m->json_version) { |
| @@ -585,8 +580,8 @@ QPDFJob::checkConfiguration() | @@ -585,8 +580,8 @@ QPDFJob::checkConfiguration() | ||
| 585 | } | 580 | } |
| 586 | } | 581 | } |
| 587 | if (m->json_version && (m->outfilename == nullptr)) { | 582 | if (m->json_version && (m->outfilename == nullptr)) { |
| 588 | - // The output file is optional with --json for backward | ||
| 589 | - // compatibility and defaults to standard output. | 583 | + // The output file is optional with --json for backward compatibility and defaults to |
| 584 | + // standard output. | ||
| 590 | m->outfilename = QUtil::make_shared_cstr("-"); | 585 | m->outfilename = QUtil::make_shared_cstr("-"); |
| 591 | } | 586 | } |
| 592 | if (m->infilename == nullptr) { | 587 | if (m->infilename == nullptr) { |
| @@ -605,24 +600,21 @@ QPDFJob::checkConfiguration() | @@ -605,24 +600,21 @@ QPDFJob::checkConfiguration() | ||
| 605 | 600 | ||
| 606 | if (m->encrypt && (!m->allow_insecure) && | 601 | if (m->encrypt && (!m->allow_insecure) && |
| 607 | (m->owner_password.empty() && (!m->user_password.empty()) && (m->keylen == 256))) { | 602 | (m->owner_password.empty() && (!m->user_password.empty()) && (m->keylen == 256))) { |
| 608 | - // Note that empty owner passwords for R < 5 are copied from | ||
| 609 | - // the user password, so this lack of security is not an issue | ||
| 610 | - // for those files. Also we are consider only the ability to | ||
| 611 | - // open the file without a password to be insecure. We are not | ||
| 612 | - // concerned about whether the viewer enforces security | ||
| 613 | - // settings when the user and owner password match. | ||
| 614 | - usage("A PDF with a non-empty user password and an empty owner" | ||
| 615 | - " password encrypted with a 256-bit key is insecure as it" | ||
| 616 | - " can be opened without a password. If you really want to" | ||
| 617 | - " do this, you must also give the --allow-insecure option" | ||
| 618 | - " before the -- that follows --encrypt."); | 603 | + // Note that empty owner passwords for R < 5 are copied from the user password, so this lack |
| 604 | + // of security is not an issue for those files. Also we are consider only the ability to | ||
| 605 | + // open the file without a password to be insecure. We are not concerned about whether the | ||
| 606 | + // viewer enforces security settings when the user and owner password match. | ||
| 607 | + usage( | ||
| 608 | + "A PDF with a non-empty user password and an empty owner password encrypted with a " | ||
| 609 | + "256-bit key is insecure as it can be opened without a password. If you really want to" | ||
| 610 | + " do this, you must also give the --allow-insecure option before the -- that follows " | ||
| 611 | + "--encrypt."); | ||
| 619 | } | 612 | } |
| 620 | 613 | ||
| 621 | bool save_to_stdout = false; | 614 | bool save_to_stdout = false; |
| 622 | if (m->require_outfile && m->outfilename && (strcmp(m->outfilename.get(), "-") == 0)) { | 615 | if (m->require_outfile && m->outfilename && (strcmp(m->outfilename.get(), "-") == 0)) { |
| 623 | if (m->split_pages) { | 616 | if (m->split_pages) { |
| 624 | - usage("--split-pages may not be used when" | ||
| 625 | - " writing to standard output"); | 617 | + usage("--split-pages may not be used when writing to standard output"); |
| 626 | } | 618 | } |
| 627 | save_to_stdout = true; | 619 | save_to_stdout = true; |
| 628 | } | 620 | } |
| @@ -634,9 +626,8 @@ QPDFJob::checkConfiguration() | @@ -634,9 +626,8 @@ QPDFJob::checkConfiguration() | ||
| 634 | } | 626 | } |
| 635 | if ((!m->split_pages) && QUtil::same_file(m->infilename.get(), m->outfilename.get())) { | 627 | if ((!m->split_pages) && QUtil::same_file(m->infilename.get(), m->outfilename.get())) { |
| 636 | QTC::TC("qpdf", "QPDFJob same file error"); | 628 | QTC::TC("qpdf", "QPDFJob same file error"); |
| 637 | - usage("input file and output file are the same;" | ||
| 638 | - " use --replace-input to intentionally" | ||
| 639 | - " overwrite the input file"); | 629 | + usage("input file and output file are the same; use --replace-input to intentionally " |
| 630 | + "overwrite the input file"); | ||
| 640 | } | 631 | } |
| 641 | 632 | ||
| 642 | if (m->json_version == 1) { | 633 | if (m->json_version == 1) { |
| @@ -645,8 +636,7 @@ QPDFJob::checkConfiguration() | @@ -645,8 +636,7 @@ QPDFJob::checkConfiguration() | ||
| 645 | } | 636 | } |
| 646 | } else { | 637 | } else { |
| 647 | if (m->json_keys.count("objectinfo") || m->json_keys.count("objects")) { | 638 | if (m->json_keys.count("objectinfo") || m->json_keys.count("objects")) { |
| 648 | - usage("json keys \"objects\" and \"objectinfo\" are only valid for" | ||
| 649 | - " json version 1"); | 639 | + usage("json keys \"objects\" and \"objectinfo\" are only valid for json version 1"); |
| 650 | } | 640 | } |
| 651 | } | 641 | } |
| 652 | } | 642 | } |
| @@ -754,10 +744,8 @@ QPDFJob::showEncryption(QPDF& pdf) | @@ -754,10 +744,8 @@ QPDFJob::showEncryption(QPDF& pdf) | ||
| 754 | void | 744 | void |
| 755 | QPDFJob::doCheck(QPDF& pdf) | 745 | QPDFJob::doCheck(QPDF& pdf) |
| 756 | { | 746 | { |
| 757 | - // Code below may set okay to false but not to true. | ||
| 758 | - // We assume okay until we prove otherwise but may | ||
| 759 | - // continue to perform additional checks after finding | ||
| 760 | - // errors. | 747 | + // Code below may set okay to false but not to true. We assume okay until we prove otherwise but |
| 748 | + // may continue to perform additional checks after finding errors. | ||
| 761 | bool okay = true; | 749 | bool okay = true; |
| 762 | auto& cout = *m->log->getInfo(); | 750 | auto& cout = *m->log->getInfo(); |
| 763 | cout << "checking " << m->infilename.get() << "\n"; | 751 | cout << "checking " << m->infilename.get() << "\n"; |
| @@ -777,8 +765,7 @@ QPDFJob::doCheck(QPDF& pdf) | @@ -777,8 +765,7 @@ QPDFJob::doCheck(QPDF& pdf) | ||
| 777 | cout << "File is not linearized\n"; | 765 | cout << "File is not linearized\n"; |
| 778 | } | 766 | } |
| 779 | 767 | ||
| 780 | - // Write the file to nowhere, uncompressing | ||
| 781 | - // streams. This causes full file traversal and | 768 | + // Write the file to nowhere, uncompressing streams. This causes full file traversal and |
| 782 | // decoding of all streams we can decode. | 769 | // decoding of all streams we can decode. |
| 783 | QPDFWriter w(pdf); | 770 | QPDFWriter w(pdf); |
| 784 | Pl_Discard discard; | 771 | Pl_Discard discard; |
| @@ -809,9 +796,9 @@ QPDFJob::doCheck(QPDF& pdf) | @@ -809,9 +796,9 @@ QPDFJob::doCheck(QPDF& pdf) | ||
| 809 | if (!pdf.getWarnings().empty()) { | 796 | if (!pdf.getWarnings().empty()) { |
| 810 | m->warnings = true; | 797 | m->warnings = true; |
| 811 | } else { | 798 | } else { |
| 812 | - *m->log->getInfo() << "No syntax or stream encoding errors" | ||
| 813 | - << " found; the file may still contain\n" | ||
| 814 | - << "errors that qpdf cannot detect\n"; | 799 | + *m->log->getInfo() |
| 800 | + << "No syntax or stream encoding errors found; the file may still contain\n" | ||
| 801 | + << "errors that qpdf cannot detect\n"; | ||
| 815 | } | 802 | } |
| 816 | } | 803 | } |
| 817 | 804 | ||
| @@ -833,8 +820,7 @@ QPDFJob::doShowObj(QPDF& pdf) | @@ -833,8 +820,7 @@ QPDFJob::doShowObj(QPDF& pdf) | ||
| 833 | obj.warnIfPossible("unable to filter stream data"); | 820 | obj.warnIfPossible("unable to filter stream data"); |
| 834 | error = true; | 821 | error = true; |
| 835 | } else { | 822 | } else { |
| 836 | - // If anything has been written to standard output, | ||
| 837 | - // this will fail. | 823 | + // If anything has been written to standard output, this will fail. |
| 838 | m->log->saveToStandardOutput(true); | 824 | m->log->saveToStandardOutput(true); |
| 839 | obj.pipeStreamData( | 825 | obj.pipeStreamData( |
| 840 | m->log->getSave().get(), | 826 | m->log->getSave().get(), |
| @@ -933,8 +919,8 @@ QPDFJob::doShowAttachment(QPDF& pdf) | @@ -933,8 +919,8 @@ QPDFJob::doShowAttachment(QPDF& pdf) | ||
| 933 | throw std::runtime_error("attachment " + m->attachment_to_show + " not found"); | 919 | throw std::runtime_error("attachment " + m->attachment_to_show + " not found"); |
| 934 | } | 920 | } |
| 935 | auto efs = fs->getEmbeddedFileStream(); | 921 | auto efs = fs->getEmbeddedFileStream(); |
| 936 | - // saveToStandardOutput has already been called, but it's harmless | ||
| 937 | - // to call it again, so do as defensive coding. | 922 | + // saveToStandardOutput has already been called, but it's harmless to call it again, so do as |
| 923 | + // defensive coding. | ||
| 938 | m->log->saveToStandardOutput(true); | 924 | m->log->saveToStandardOutput(true); |
| 939 | efs.pipeStreamData(m->log->getSave().get(), 0, qpdf_dl_all); | 925 | efs.pipeStreamData(m->log->getSave().get(), 0, qpdf_dl_all); |
| 940 | } | 926 | } |
| @@ -1132,9 +1118,8 @@ QPDFJob::doJSONPageLabels(Pipeline* p, bool& first, QPDF& pdf) | @@ -1132,9 +1118,8 @@ QPDFJob::doJSONPageLabels(Pipeline* p, bool& first, QPDF& pdf) | ||
| 1132 | pldh.getLabelsForPageRange(0, npages - 1, 0, labels); | 1118 | pldh.getLabelsForPageRange(0, npages - 1, 0, labels); |
| 1133 | for (auto iter = labels.begin(); iter != labels.end(); ++iter) { | 1119 | for (auto iter = labels.begin(); iter != labels.end(); ++iter) { |
| 1134 | if ((iter + 1) == labels.end()) { | 1120 | if ((iter + 1) == labels.end()) { |
| 1135 | - // This can't happen, so ignore it. This could only | ||
| 1136 | - // happen if getLabelsForPageRange somehow returned an | ||
| 1137 | - // odd number of items. | 1121 | + // This can't happen, so ignore it. This could only happen if getLabelsForPageRange |
| 1122 | + // somehow returned an odd number of items. | ||
| 1138 | break; | 1123 | break; |
| 1139 | } | 1124 | } |
| 1140 | JSON j_label = j_labels.addArrayElement(JSON::makeDictionary()); | 1125 | JSON j_label = j_labels.addArrayElement(JSON::makeDictionary()); |
| @@ -1362,22 +1347,17 @@ QPDFJob::doJSONAttachments(Pipeline* p, bool& first, QPDF& pdf) | @@ -1362,22 +1347,17 @@ QPDFJob::doJSONAttachments(Pipeline* p, bool& first, QPDF& pdf) | ||
| 1362 | JSON | 1347 | JSON |
| 1363 | QPDFJob::json_schema(int json_version, std::set<std::string>* keys) | 1348 | QPDFJob::json_schema(int json_version, std::set<std::string>* keys) |
| 1364 | { | 1349 | { |
| 1365 | - // Style: use all lower-case keys with no dashes or underscores. | ||
| 1366 | - // Choose array or dictionary based on indexing. For example, we | ||
| 1367 | - // use a dictionary for objects because we want to index by object | ||
| 1368 | - // ID and an array for pages because we want to index by position. | ||
| 1369 | - // The pages in the pages array contain references back to the | ||
| 1370 | - // original object, which can be resolved in the objects | ||
| 1371 | - // dictionary. When a PDF construct that maps back to an original | ||
| 1372 | - // object is represented separately, use "object" as the key that | ||
| 1373 | - // references the original object. | ||
| 1374 | - | ||
| 1375 | - // This JSON object doubles as a schema and as documentation for | ||
| 1376 | - // our JSON output. Any schema mismatch is a bug in qpdf. This | ||
| 1377 | - // helps to enforce our policy of consistently providing a known | ||
| 1378 | - // structure where every documented key will always be present, | ||
| 1379 | - // which makes it easier to consume our JSON. This is discussed in | ||
| 1380 | - // more depth in the manual. | 1350 | + // Style: use all lower-case keys with no dashes or underscores. Choose array or dictionary |
| 1351 | + // based on indexing. For example, we use a dictionary for objects because we want to index by | ||
| 1352 | + // object ID and an array for pages because we want to index by position. The pages in the pages | ||
| 1353 | + // array contain references back to the original object, which can be resolved in the objects | ||
| 1354 | + // dictionary. When a PDF construct that maps back to an original object is represented | ||
| 1355 | + // separately, use "object" as the key that references the original object. | ||
| 1356 | + | ||
| 1357 | + // This JSON object doubles as a schema and as documentation for our JSON output. Any schema | ||
| 1358 | + // mismatch is a bug in qpdf. This helps to enforce our policy of consistently providing a known | ||
| 1359 | + // structure where every documented key will always be present, which makes it easier to consume | ||
| 1360 | + // our JSON. This is discussed in more depth in the manual. | ||
| 1381 | JSON schema = JSON::makeDictionary(); | 1361 | JSON schema = JSON::makeDictionary(); |
| 1382 | schema.addDictionaryMember( | 1362 | schema.addDictionaryMember( |
| 1383 | "version", | 1363 | "version", |
| @@ -1388,9 +1368,8 @@ QPDFJob::json_schema(int json_version, std::set<std::string>* keys) | @@ -1388,9 +1368,8 @@ QPDFJob::json_schema(int json_version, std::set<std::string>* keys) | ||
| 1388 | 1368 | ||
| 1389 | bool all_keys = ((keys == nullptr) || keys->empty()); | 1369 | bool all_keys = ((keys == nullptr) || keys->empty()); |
| 1390 | 1370 | ||
| 1391 | - // The list of selectable top-level keys id duplicated in the | ||
| 1392 | - // following places: job.yml, QPDFJob::json_schema, and | ||
| 1393 | - // QPDFJob::doJSON. | 1371 | + // The list of selectable top-level keys id duplicated in the following places: job.yml, |
| 1372 | + // QPDFJob::json_schema, and QPDFJob::doJSON. | ||
| 1394 | if (json_version == 1) { | 1373 | if (json_version == 1) { |
| 1395 | if (all_keys || keys->count("objects")) { | 1374 | if (all_keys || keys->count("objects")) { |
| 1396 | schema.addDictionaryMember("objects", JSON::parse(R"({ | 1375 | schema.addDictionaryMember("objects", JSON::parse(R"({ |
| @@ -1581,8 +1560,8 @@ QPDFJob::json_out_schema_v1() | @@ -1581,8 +1560,8 @@ QPDFJob::json_out_schema_v1() | ||
| 1581 | void | 1560 | void |
| 1582 | QPDFJob::doJSON(QPDF& pdf, Pipeline* p) | 1561 | QPDFJob::doJSON(QPDF& pdf, Pipeline* p) |
| 1583 | { | 1562 | { |
| 1584 | - // qpdf guarantees that no new top-level keys whose names start | ||
| 1585 | - // with "x-" will be added. These are reserved for users. | 1563 | + // qpdf guarantees that no new top-level keys whose names start with "x-" will be added. These |
| 1564 | + // are reserved for users. | ||
| 1586 | 1565 | ||
| 1587 | std::string captured_json; | 1566 | std::string captured_json; |
| 1588 | std::shared_ptr<Pl_String> pl_str; | 1567 | std::shared_ptr<Pl_String> pl_str; |
| @@ -1595,14 +1574,12 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) | @@ -1595,14 +1574,12 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) | ||
| 1595 | JSON::writeDictionaryOpen(p, first, 0); | 1574 | JSON::writeDictionaryOpen(p, first, 0); |
| 1596 | 1575 | ||
| 1597 | if (m->json_output) { | 1576 | if (m->json_output) { |
| 1598 | - // Exclude version and parameters to keep the output file | ||
| 1599 | - // minimal. The JSON version is inside the "qpdf" key for | ||
| 1600 | - // version 2. | 1577 | + // Exclude version and parameters to keep the output file minimal. The JSON version is |
| 1578 | + // inside the "qpdf" key for version 2. | ||
| 1601 | } else { | 1579 | } else { |
| 1602 | - // This version is updated every time a non-backward-compatible | ||
| 1603 | - // change is made to the JSON format. Clients of the JSON are to | ||
| 1604 | - // ignore unrecognized keys, so we only update the version of a | ||
| 1605 | - // key disappears or if its value changes meaning. | 1580 | + // This version is updated every time a non-backward-compatible change is made to the JSON |
| 1581 | + // format. Clients of the JSON are to ignore unrecognized keys, so we only update the | ||
| 1582 | + // version of a key disappears or if its value changes meaning. | ||
| 1606 | JSON::writeDictionaryItem(p, first, "version", JSON::makeInt(m->json_version), 1); | 1583 | JSON::writeDictionaryItem(p, first, "version", JSON::makeInt(m->json_version), 1); |
| 1607 | JSON j_params = JSON::makeDictionary(); | 1584 | JSON j_params = JSON::makeDictionary(); |
| 1608 | std::string decode_level_str; | 1585 | std::string decode_level_str; |
| @@ -1624,13 +1601,11 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) | @@ -1624,13 +1601,11 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) | ||
| 1624 | JSON::writeDictionaryItem(p, first, "parameters", j_params, 1); | 1601 | JSON::writeDictionaryItem(p, first, "parameters", j_params, 1); |
| 1625 | } | 1602 | } |
| 1626 | bool all_keys = m->json_keys.empty(); | 1603 | bool all_keys = m->json_keys.empty(); |
| 1627 | - // The list of selectable top-level keys id duplicated in the | ||
| 1628 | - // following places: job.yml, QPDFJob::json_schema, and | ||
| 1629 | - // QPDFJob::doJSON. | 1604 | + // The list of selectable top-level keys id duplicated in the following places: job.yml, |
| 1605 | + // QPDFJob::json_schema, and QPDFJob::doJSON. | ||
| 1630 | 1606 | ||
| 1631 | - // We do pages and pagelabels first since they have the side | ||
| 1632 | - // effect of repairing the pages tree, which could potentially | ||
| 1633 | - // impact object references in remaining items. | 1607 | + // We do pages and pagelabels first since they have the side effect of repairing the pages tree, |
| 1608 | + // which could potentially impact object references in remaining items. | ||
| 1634 | if (all_keys || m->json_keys.count("pages")) { | 1609 | if (all_keys || m->json_keys.count("pages")) { |
| 1635 | doJSONPages(p, first, pdf); | 1610 | doJSONPages(p, first, pdf); |
| 1636 | } | 1611 | } |
| @@ -1638,8 +1613,7 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) | @@ -1638,8 +1613,7 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) | ||
| 1638 | doJSONPageLabels(p, first, pdf); | 1613 | doJSONPageLabels(p, first, pdf); |
| 1639 | } | 1614 | } |
| 1640 | 1615 | ||
| 1641 | - // The non-special keys are output in alphabetical order, but the | ||
| 1642 | - // order doesn't actually matter. | 1616 | + // The non-special keys are output in alphabetical order, but the order doesn't actually matter. |
| 1643 | if (all_keys || m->json_keys.count("acroform")) { | 1617 | if (all_keys || m->json_keys.count("acroform")) { |
| 1644 | doJSONAcroform(p, first, pdf); | 1618 | doJSONAcroform(p, first, pdf); |
| 1645 | } | 1619 | } |
| @@ -1653,16 +1627,15 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) | @@ -1653,16 +1627,15 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) | ||
| 1653 | doJSONOutlines(p, first, pdf); | 1627 | doJSONOutlines(p, first, pdf); |
| 1654 | } | 1628 | } |
| 1655 | 1629 | ||
| 1656 | - // We do objects last so their information is consistent with | ||
| 1657 | - // repairing the page tree. To see the original file with any page | ||
| 1658 | - // tree problems and the page tree not flattened, select | 1630 | + // We do objects last so their information is consistent with repairing the page tree. To see |
| 1631 | + // the original file with any page tree problems and the page tree not flattened, select | ||
| 1659 | // qpdf/objects/objectinfo without other keys. | 1632 | // qpdf/objects/objectinfo without other keys. |
| 1660 | if (all_keys || m->json_keys.count("objects") || m->json_keys.count("qpdf")) { | 1633 | if (all_keys || m->json_keys.count("objects") || m->json_keys.count("qpdf")) { |
| 1661 | doJSONObjects(p, first, pdf); | 1634 | doJSONObjects(p, first, pdf); |
| 1662 | } | 1635 | } |
| 1663 | if (m->json_version == 1) { | 1636 | if (m->json_version == 1) { |
| 1664 | - // "objectinfo" is not needed for version >1 since you can | ||
| 1665 | - // tell streams from other objects in "objects". | 1637 | + // "objectinfo" is not needed for version >1 since you can tell streams from other objects |
| 1638 | + // in "objects". | ||
| 1666 | if (all_keys || m->json_keys.count("objectinfo")) { | 1639 | if (all_keys || m->json_keys.count("objectinfo")) { |
| 1667 | doJSONObjectinfo(p, first, pdf); | 1640 | doJSONObjectinfo(p, first, pdf); |
| 1668 | } | 1641 | } |
| @@ -1677,8 +1650,7 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) | @@ -1677,8 +1650,7 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) | ||
| 1677 | std::list<std::string> errors; | 1650 | std::list<std::string> errors; |
| 1678 | JSON captured = JSON::parse(captured_json); | 1651 | JSON captured = JSON::parse(captured_json); |
| 1679 | if (!captured.checkSchema(schema, errors)) { | 1652 | if (!captured.checkSchema(schema, errors)) { |
| 1680 | - m->log->error("QPDFJob didn't create JSON that complies with " | ||
| 1681 | - "its own rules.\n"); | 1653 | + m->log->error("QPDFJob didn't create JSON that complies with its own rules.\n"); |
| 1682 | for (auto const& error: errors) { | 1654 | for (auto const& error: errors) { |
| 1683 | *m->log->getError() << error << "\n"; | 1655 | *m->log->getError() << error << "\n"; |
| 1684 | } | 1656 | } |
| @@ -1768,53 +1740,46 @@ QPDFJob::doProcess( | @@ -1768,53 +1740,46 @@ QPDFJob::doProcess( | ||
| 1768 | bool used_for_input, | 1740 | bool used_for_input, |
| 1769 | bool main_input) | 1741 | bool main_input) |
| 1770 | { | 1742 | { |
| 1771 | - // If a password has been specified but doesn't work, try other | ||
| 1772 | - // passwords that are equivalent in different character encodings. | ||
| 1773 | - // This makes it possible to open PDF files that were encrypted | ||
| 1774 | - // using incorrect string encodings. For example, if someone used | ||
| 1775 | - // a password encoded in PDF Doc encoding or Windows code page | ||
| 1776 | - // 1252 for an AES-encrypted file or a UTF-8-encoded password on | ||
| 1777 | - // an RC4-encrypted file, or if the password was properly encoded | ||
| 1778 | - // but the password given here was incorrectly encoded, there's a | ||
| 1779 | - // good chance we'd succeed here. | 1743 | + // If a password has been specified but doesn't work, try other passwords that are equivalent in |
| 1744 | + // different character encodings. This makes it possible to open PDF files that were encrypted | ||
| 1745 | + // using incorrect string encodings. For example, if someone used a password encoded in PDF Doc | ||
| 1746 | + // encoding or Windows code page 1252 for an AES-encrypted file or a UTF-8-encoded password on | ||
| 1747 | + // an RC4-encrypted file, or if the password was properly encoded but the password given here | ||
| 1748 | + // was incorrectly encoded, there's a good chance we'd succeed here. | ||
| 1780 | 1749 | ||
| 1781 | std::string ptemp; | 1750 | std::string ptemp; |
| 1782 | if (password && (!m->password_is_hex_key)) { | 1751 | if (password && (!m->password_is_hex_key)) { |
| 1783 | if (m->password_mode == QPDFJob::pm_hex_bytes) { | 1752 | if (m->password_mode == QPDFJob::pm_hex_bytes) { |
| 1784 | - // Special case: handle --password-mode=hex-bytes for input | ||
| 1785 | - // password as well as output password | 1753 | + // Special case: handle --password-mode=hex-bytes for input password as well as output |
| 1754 | + // password | ||
| 1786 | QTC::TC("qpdf", "QPDFJob input password hex-bytes"); | 1755 | QTC::TC("qpdf", "QPDFJob input password hex-bytes"); |
| 1787 | ptemp = QUtil::hex_decode(password); | 1756 | ptemp = QUtil::hex_decode(password); |
| 1788 | password = ptemp.c_str(); | 1757 | password = ptemp.c_str(); |
| 1789 | } | 1758 | } |
| 1790 | } | 1759 | } |
| 1791 | if ((password == nullptr) || empty || m->password_is_hex_key || m->suppress_password_recovery) { | 1760 | if ((password == nullptr) || empty || m->password_is_hex_key || m->suppress_password_recovery) { |
| 1792 | - // There is no password, or we're not doing recovery, so just | ||
| 1793 | - // do the normal processing with the supplied password. | 1761 | + // There is no password, or we're not doing recovery, so just do the normal processing with |
| 1762 | + // the supplied password. | ||
| 1794 | doProcessOnce(pdf, fn, password, empty, used_for_input, main_input); | 1763 | doProcessOnce(pdf, fn, password, empty, used_for_input, main_input); |
| 1795 | return; | 1764 | return; |
| 1796 | } | 1765 | } |
| 1797 | 1766 | ||
| 1798 | - // Get a list of otherwise encoded strings. Keep in scope for this | ||
| 1799 | - // method. | 1767 | + // Get a list of otherwise encoded strings. Keep in scope for this method. |
| 1800 | std::vector<std::string> passwords_str = QUtil::possible_repaired_encodings(password); | 1768 | std::vector<std::string> passwords_str = QUtil::possible_repaired_encodings(password); |
| 1801 | // Represent to char const*, as required by the QPDF class. | 1769 | // Represent to char const*, as required by the QPDF class. |
| 1802 | std::vector<char const*> passwords; | 1770 | std::vector<char const*> passwords; |
| 1803 | for (auto const& iter: passwords_str) { | 1771 | for (auto const& iter: passwords_str) { |
| 1804 | passwords.push_back(iter.c_str()); | 1772 | passwords.push_back(iter.c_str()); |
| 1805 | } | 1773 | } |
| 1806 | - // We always try the supplied password first because it is the | ||
| 1807 | - // first string returned by possible_repaired_encodings. If there | ||
| 1808 | - // is more than one option, go ahead and put the supplied password | ||
| 1809 | - // at the end so that it's that decoding attempt whose exception | ||
| 1810 | - // is thrown. | 1774 | + // We always try the supplied password first because it is the first string returned by |
| 1775 | + // possible_repaired_encodings. If there is more than one option, go ahead and put the supplied | ||
| 1776 | + // password at the end so that it's that decoding attempt whose exception is thrown. | ||
| 1811 | if (passwords.size() > 1) { | 1777 | if (passwords.size() > 1) { |
| 1812 | passwords.push_back(password); | 1778 | passwords.push_back(password); |
| 1813 | } | 1779 | } |
| 1814 | 1780 | ||
| 1815 | - // Try each password. If one works, return the resulting object. | ||
| 1816 | - // If they all fail, throw the exception thrown by the final | ||
| 1817 | - // attempt, which, like the first attempt, will be with the | 1781 | + // Try each password. If one works, return the resulting object. If they all fail, throw the |
| 1782 | + // exception thrown by the final attempt, which, like the first attempt, will be with the | ||
| 1818 | // supplied password. | 1783 | // supplied password. |
| 1819 | bool warned = false; | 1784 | bool warned = false; |
| 1820 | for (auto iter = passwords.begin(); iter != passwords.end(); ++iter) { | 1785 | for (auto iter = passwords.begin(); iter != passwords.end(); ++iter) { |
| @@ -1831,9 +1796,9 @@ QPDFJob::doProcess( | @@ -1831,9 +1796,9 @@ QPDFJob::doProcess( | ||
| 1831 | if (!warned) { | 1796 | if (!warned) { |
| 1832 | warned = true; | 1797 | warned = true; |
| 1833 | doIfVerbose([&](Pipeline& v, std::string const& prefix) { | 1798 | doIfVerbose([&](Pipeline& v, std::string const& prefix) { |
| 1834 | - v << prefix << ": supplied password didn't work;" | ||
| 1835 | - << " trying other passwords based on interpreting" | ||
| 1836 | - << " password with different string encodings\n"; | 1799 | + v << prefix |
| 1800 | + << ": supplied password didn't work; trying other passwords based on " | ||
| 1801 | + "interpreting password with different string encodings\n"; | ||
| 1837 | }); | 1802 | }); |
| 1838 | } | 1803 | } |
| 1839 | } | 1804 | } |
| @@ -1943,10 +1908,8 @@ QPDFJob::doUnderOverlayForPage( | @@ -1943,10 +1908,8 @@ QPDFJob::doUnderOverlayForPage( | ||
| 1943 | fo[from_pageno] = pdf.copyForeignObject(from_page.getFormXObjectForPage()); | 1908 | fo[from_pageno] = pdf.copyForeignObject(from_page.getFormXObjectForPage()); |
| 1944 | } | 1909 | } |
| 1945 | 1910 | ||
| 1946 | - // If the same page is overlaid or underlaid multiple times, | ||
| 1947 | - // we'll generate multiple names for it, but that's harmless | ||
| 1948 | - // and also a pretty goofy case that's not worth coding | ||
| 1949 | - // around. | 1911 | + // If the same page is overlaid or underlaid multiple times, we'll generate multiple names |
| 1912 | + // for it, but that's harmless and also a pretty goofy case that's not worth coding around. | ||
| 1950 | std::string name = resources.getUniqueResourceName("/Fx", min_suffix); | 1913 | std::string name = resources.getUniqueResourceName("/Fx", min_suffix); |
| 1951 | QPDFMatrix cm; | 1914 | QPDFMatrix cm; |
| 1952 | std::string new_content = dest_page.placeFormXObject( | 1915 | std::string new_content = dest_page.placeFormXObject( |
| @@ -2017,18 +1980,15 @@ QPDFJob::handleUnderOverlay(QPDF& pdf) | @@ -2017,18 +1980,15 @@ QPDFJob::handleUnderOverlay(QPDF& pdf) | ||
| 2017 | if (!(underlay_pagenos.count(pageno) || overlay_pagenos.count(pageno))) { | 1980 | if (!(underlay_pagenos.count(pageno) || overlay_pagenos.count(pageno))) { |
| 2018 | continue; | 1981 | continue; |
| 2019 | } | 1982 | } |
| 2020 | - // This code converts the original page, any underlays, and | ||
| 2021 | - // any overlays to form XObjects. Then it concatenates display | ||
| 2022 | - // of all underlays, the original page, and all overlays. | ||
| 2023 | - // Prior to 11.3.0, the original page contents were wrapped in | ||
| 2024 | - // q/Q, but this didn't work if the original page had | ||
| 2025 | - // unbalanced q/Q operators. See github issue #904. | 1983 | + // This code converts the original page, any underlays, and any overlays to form XObjects. |
| 1984 | + // Then it concatenates display of all underlays, the original page, and all overlays. Prior | ||
| 1985 | + // to 11.3.0, the original page contents were wrapped in q/Q, but this didn't work if the | ||
| 1986 | + // original page had unbalanced q/Q operators. See github issue #904. | ||
| 2026 | auto& dest_page = main_pages.at(i); | 1987 | auto& dest_page = main_pages.at(i); |
| 2027 | auto dest_page_oh = dest_page.getObjectHandle(); | 1988 | auto dest_page_oh = dest_page.getObjectHandle(); |
| 2028 | auto this_page_fo = dest_page.getFormXObjectForPage(); | 1989 | auto this_page_fo = dest_page.getFormXObjectForPage(); |
| 2029 | - // The resulting form xobject lazily reads the content from | ||
| 2030 | - // the original page, which we are going to replace. Therefore | ||
| 2031 | - // we have to explicitly copy it. | 1990 | + // The resulting form xobject lazily reads the content from the original page, which we are |
| 1991 | + // going to replace. Therefore we have to explicitly copy it. | ||
| 2032 | auto content_data = this_page_fo.getRawStreamData(); | 1992 | auto content_data = this_page_fo.getRawStreamData(); |
| 2033 | this_page_fo.replaceStreamData(content_data, QPDFObjectHandle(), QPDFObjectHandle()); | 1993 | this_page_fo.replaceStreamData(content_data, QPDFObjectHandle(), QPDFObjectHandle()); |
| 2034 | auto resources = | 1994 | auto resources = |
| @@ -2097,8 +2057,7 @@ QPDFJob::addAttachments(QPDF& pdf) | @@ -2097,8 +2057,7 @@ QPDFJob::addAttachments(QPDF& pdf) | ||
| 2097 | } | 2057 | } |
| 2098 | message = pdf.getFilename() + | 2058 | message = pdf.getFilename() + |
| 2099 | " already has attachments with the following keys: " + message + | 2059 | " already has attachments with the following keys: " + message + |
| 2100 | - "; use --replace to replace or --key to specify a different " | ||
| 2101 | - "key"; | 2060 | + "; use --replace to replace or --key to specify a different key"; |
| 2102 | throw std::runtime_error(message); | 2061 | throw std::runtime_error(message); |
| 2103 | } | 2062 | } |
| 2104 | } | 2063 | } |
| @@ -2144,11 +2103,9 @@ QPDFJob::copyAttachments(QPDF& pdf) | @@ -2144,11 +2103,9 @@ QPDFJob::copyAttachments(QPDF& pdf) | ||
| 2144 | message += i; | 2103 | message += i; |
| 2145 | } | 2104 | } |
| 2146 | message = pdf.getFilename() + | 2105 | message = pdf.getFilename() + |
| 2147 | - " already has attachments with keys that conflict with" | ||
| 2148 | - " attachments from other files: " + | 2106 | + " already has attachments with keys that conflict with attachments from other files: " + |
| 2149 | message + | 2107 | message + |
| 2150 | - ". Use --prefix with --copy-attachments-from" | ||
| 2151 | - " or manually copy individual attachments."; | 2108 | + ". Use --prefix with --copy-attachments-from or manually copy individual attachments."; |
| 2152 | throw std::runtime_error(message); | 2109 | throw std::runtime_error(message); |
| 2153 | } | 2110 | } |
| 2154 | } | 2111 | } |
| @@ -2243,13 +2200,11 @@ QPDFJob::shouldRemoveUnreferencedResources(QPDF& pdf) | @@ -2243,13 +2200,11 @@ QPDFJob::shouldRemoveUnreferencedResources(QPDF& pdf) | ||
| 2243 | return true; | 2200 | return true; |
| 2244 | } | 2201 | } |
| 2245 | 2202 | ||
| 2246 | - // Unreferenced resources are common in files where resources | ||
| 2247 | - // dictionaries are shared across pages. As a heuristic, we look | ||
| 2248 | - // in the file for shared resources dictionaries or shared XObject | ||
| 2249 | - // subkeys of resources dictionaries either on pages or on form | ||
| 2250 | - // XObjects in pages. If we find any, then there is a higher | ||
| 2251 | - // likelihood that the expensive process of finding unreferenced | ||
| 2252 | - // resources is worth it. | 2203 | + // Unreferenced resources are common in files where resources dictionaries are shared across |
| 2204 | + // pages. As a heuristic, we look in the file for shared resources dictionaries or shared | ||
| 2205 | + // XObject subkeys of resources dictionaries either on pages or on form XObjects in pages. If we | ||
| 2206 | + // find any, then there is a higher likelihood that the expensive process of finding | ||
| 2207 | + // unreferenced resources is worth it. | ||
| 2253 | 2208 | ||
| 2254 | // Return true as soon as we find any shared resources. | 2209 | // Return true as soon as we find any shared resources. |
| 2255 | 2210 | ||
| @@ -2332,8 +2287,8 @@ added_page(QPDF& pdf, QPDFObjectHandle page) | @@ -2332,8 +2287,8 @@ added_page(QPDF& pdf, QPDFObjectHandle page) | ||
| 2332 | { | 2287 | { |
| 2333 | QPDFObjectHandle result = page; | 2288 | QPDFObjectHandle result = page; |
| 2334 | if (&page.getQPDF() != &pdf) { | 2289 | if (&page.getQPDF() != &pdf) { |
| 2335 | - // Calling copyForeignObject on an object we already copied | ||
| 2336 | - // will give us the already existing copy. | 2290 | + // Calling copyForeignObject on an object we already copied will give us the already |
| 2291 | + // existing copy. | ||
| 2337 | result = pdf.copyForeignObject(page); | 2292 | result = pdf.copyForeignObject(page); |
| 2338 | } | 2293 | } |
| 2339 | return result; | 2294 | return result; |
| @@ -2348,8 +2303,7 @@ added_page(QPDF& pdf, QPDFPageObjectHelper page) | @@ -2348,8 +2303,7 @@ added_page(QPDF& pdf, QPDFPageObjectHelper page) | ||
| 2348 | void | 2303 | void |
| 2349 | QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_heap) | 2304 | QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_heap) |
| 2350 | { | 2305 | { |
| 2351 | - // Parse all page specifications and translate them into lists of | ||
| 2352 | - // actual pages. | 2306 | + // Parse all page specifications and translate them into lists of actual pages. |
| 2353 | 2307 | ||
| 2354 | // Handle "." as a shortcut for the input file | 2308 | // Handle "." as a shortcut for the input file |
| 2355 | for (auto& page_spec: m->page_specs) { | 2309 | for (auto& page_spec: m->page_specs) { |
| @@ -2359,9 +2313,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea | @@ -2359,9 +2313,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea | ||
| 2359 | } | 2313 | } |
| 2360 | 2314 | ||
| 2361 | if (!m->keep_files_open_set) { | 2315 | if (!m->keep_files_open_set) { |
| 2362 | - // Count the number of distinct files to determine whether we | ||
| 2363 | - // should keep files open or not. Rather than trying to code | ||
| 2364 | - // some portable heuristic based on OS limits, just hard-code | 2316 | + // Count the number of distinct files to determine whether we should keep files open or not. |
| 2317 | + // Rather than trying to code some portable heuristic based on OS limits, just hard-code | ||
| 2365 | // this at a given number and allow users to override. | 2318 | // this at a given number and allow users to override. |
| 2366 | std::set<std::string> filenames; | 2319 | std::set<std::string> filenames; |
| 2367 | for (auto& page_spec: m->page_specs) { | 2320 | for (auto& page_spec: m->page_specs) { |
| @@ -2383,16 +2336,13 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea | @@ -2383,16 +2336,13 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea | ||
| 2383 | std::map<unsigned long long, std::set<QPDFObjGen>> copied_pages; | 2336 | std::map<unsigned long long, std::set<QPDFObjGen>> copied_pages; |
| 2384 | for (auto& page_spec: m->page_specs) { | 2337 | for (auto& page_spec: m->page_specs) { |
| 2385 | if (page_spec_qpdfs.count(page_spec.filename) == 0) { | 2338 | if (page_spec_qpdfs.count(page_spec.filename) == 0) { |
| 2386 | - // Open the PDF file and store the QPDF object. Throw a | ||
| 2387 | - // std::shared_ptr to the qpdf into a heap so that it | ||
| 2388 | - // survives through copying to the output but gets cleaned up | ||
| 2389 | - // automatically at the end. Do not canonicalize the file | ||
| 2390 | - // name. Using two different paths to refer to the same | ||
| 2391 | - // file is a documented workaround for duplicating a page. | ||
| 2392 | - // If you are using this an example of how to do this with | ||
| 2393 | - // the API, you can just create two different QPDF objects | ||
| 2394 | - // to the same underlying file with the same path to | ||
| 2395 | - // achieve the same affect. | 2339 | + // Open the PDF file and store the QPDF object. Throw a std::shared_ptr to the qpdf into |
| 2340 | + // a heap so that it survives through copying to the output but gets cleaned up | ||
| 2341 | + // automatically at the end. Do not canonicalize the file name. Using two different | ||
| 2342 | + // paths to refer to the same file is a documented workaround for duplicating a page. If | ||
| 2343 | + // you are using this an example of how to do this with the API, you can just create two | ||
| 2344 | + // different QPDF objects to the same underlying file with the same path to achieve the | ||
| 2345 | + // same affect. | ||
| 2396 | char const* password = page_spec.password.get(); | 2346 | char const* password = page_spec.password.get(); |
| 2397 | if ((!m->encryption_file.empty()) && (password == nullptr) && | 2347 | if ((!m->encryption_file.empty()) && (password == nullptr) && |
| 2398 | (page_spec.filename == m->encryption_file)) { | 2348 | (page_spec.filename == m->encryption_file)) { |
| @@ -2424,8 +2374,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea | @@ -2424,8 +2374,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea | ||
| 2424 | } | 2374 | } |
| 2425 | } | 2375 | } |
| 2426 | 2376 | ||
| 2427 | - // Read original pages from the PDF, and parse the page range | ||
| 2428 | - // associated with this occurrence of the file. | 2377 | + // Read original pages from the PDF, and parse the page range associated with this |
| 2378 | + // occurrence of the file. | ||
| 2429 | parsed_specs.push_back( | 2379 | parsed_specs.push_back( |
| 2430 | // line-break | 2380 | // line-break |
| 2431 | QPDFPageData(page_spec.filename, page_spec_qpdfs[page_spec.filename], page_spec.range)); | 2381 | QPDFPageData(page_spec.filename, page_spec_qpdfs[page_spec.filename], page_spec.range)); |
| @@ -2451,11 +2401,9 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea | @@ -2451,11 +2401,9 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea | ||
| 2451 | } | 2401 | } |
| 2452 | } | 2402 | } |
| 2453 | 2403 | ||
| 2454 | - // Clear all pages out of the primary QPDF's pages tree but leave | ||
| 2455 | - // the objects in place in the file so they can be re-added | ||
| 2456 | - // without changing their object numbers. This enables other | ||
| 2457 | - // things in the original file, such as outlines, to continue to | ||
| 2458 | - // work. | 2404 | + // Clear all pages out of the primary QPDF's pages tree but leave the objects in place in the |
| 2405 | + // file so they can be re-added without changing their object numbers. This enables other things | ||
| 2406 | + // in the original file, such as outlines, to continue to work. | ||
| 2459 | doIfVerbose([&](Pipeline& v, std::string const& prefix) { | 2407 | doIfVerbose([&](Pipeline& v, std::string const& prefix) { |
| 2460 | v << prefix << ": removing unreferenced pages from primary input\n"; | 2408 | v << prefix << ": removing unreferenced pages from primary input\n"; |
| 2461 | }); | 2409 | }); |
| @@ -2466,9 +2414,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea | @@ -2466,9 +2414,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea | ||
| 2466 | } | 2414 | } |
| 2467 | 2415 | ||
| 2468 | if (m->collate && (parsed_specs.size() > 1)) { | 2416 | if (m->collate && (parsed_specs.size() > 1)) { |
| 2469 | - // Collate the pages by selecting one page from each spec in | ||
| 2470 | - // order. When a spec runs out of pages, stop selecting from | ||
| 2471 | - // it. | 2417 | + // Collate the pages by selecting one page from each spec in order. When a spec runs out of |
| 2418 | + // pages, stop selecting from it. | ||
| 2472 | std::vector<QPDFPageData> new_parsed_specs; | 2419 | std::vector<QPDFPageData> new_parsed_specs; |
| 2473 | size_t nspecs = parsed_specs.size(); | 2420 | size_t nspecs = parsed_specs.size(); |
| 2474 | size_t cur_page = 0; | 2421 | size_t cur_page = 0; |
| @@ -2491,9 +2438,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea | @@ -2491,9 +2438,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea | ||
| 2491 | parsed_specs = new_parsed_specs; | 2438 | parsed_specs = new_parsed_specs; |
| 2492 | } | 2439 | } |
| 2493 | 2440 | ||
| 2494 | - // Add all the pages from all the files in the order specified. | ||
| 2495 | - // Keep track of any pages from the original file that we are | ||
| 2496 | - // selecting. | 2441 | + // Add all the pages from all the files in the order specified. Keep track of any pages from the |
| 2442 | + // original file that we are selecting. | ||
| 2497 | std::set<int> selected_from_orig; | 2443 | std::set<int> selected_from_orig; |
| 2498 | std::vector<QPDFObjectHandle> new_labels; | 2444 | std::vector<QPDFObjectHandle> new_labels; |
| 2499 | bool any_page_labels = false; | 2445 | bool any_page_labels = false; |
| @@ -2516,8 +2462,7 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea | @@ -2516,8 +2462,7 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea | ||
| 2516 | v << prefix << ": adding pages from " << page_data.filename << "\n"; | 2462 | v << prefix << ": adding pages from " << page_data.filename << "\n"; |
| 2517 | }); | 2463 | }); |
| 2518 | for (auto pageno_iter: page_data.selected_pages) { | 2464 | for (auto pageno_iter: page_data.selected_pages) { |
| 2519 | - // Pages are specified from 1 but numbered from 0 in the | ||
| 2520 | - // vector | 2465 | + // Pages are specified from 1 but numbered from 0 in the vector |
| 2521 | int pageno = pageno_iter - 1; | 2466 | int pageno = pageno_iter - 1; |
| 2522 | pldh.getLabelsForPageRange(pageno, pageno, out_pageno++, new_labels); | 2467 | pldh.getLabelsForPageRange(pageno, pageno, out_pageno++, new_labels); |
| 2523 | QPDFPageObjectHelper to_copy = page_data.orig_pages.at(QIntC::to_size(pageno)); | 2468 | QPDFPageObjectHelper to_copy = page_data.orig_pages.at(QIntC::to_size(pageno)); |
| @@ -2539,22 +2484,18 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea | @@ -2539,22 +2484,18 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea | ||
| 2539 | bool first_copy_from_orig = false; | 2484 | bool first_copy_from_orig = false; |
| 2540 | bool this_file = (page_data.qpdf == &pdf); | 2485 | bool this_file = (page_data.qpdf == &pdf); |
| 2541 | if (this_file) { | 2486 | if (this_file) { |
| 2542 | - // This is a page from the original file. Keep track | ||
| 2543 | - // of the fact that we are using it. | 2487 | + // This is a page from the original file. Keep track of the fact that we are using |
| 2488 | + // it. | ||
| 2544 | first_copy_from_orig = (selected_from_orig.count(pageno) == 0); | 2489 | first_copy_from_orig = (selected_from_orig.count(pageno) == 0); |
| 2545 | selected_from_orig.insert(pageno); | 2490 | selected_from_orig.insert(pageno); |
| 2546 | } | 2491 | } |
| 2547 | auto new_page = added_page(pdf, to_copy); | 2492 | auto new_page = added_page(pdf, to_copy); |
| 2548 | - // Try to avoid gratuitously renaming fields. In the case | ||
| 2549 | - // of where we're just extracting a bunch of pages from | ||
| 2550 | - // the original file and not copying any page more than | ||
| 2551 | - // once, there's no reason to do anything with the fields. | ||
| 2552 | - // Since we don't remove fields from the original file | ||
| 2553 | - // until all copy operations are completed, any foreign | ||
| 2554 | - // pages that conflict with original pages will be | ||
| 2555 | - // adjusted. If we copy any page from the original file | ||
| 2556 | - // more than once, that page would be in conflict with the | ||
| 2557 | - // previous copy of itself. | 2493 | + // Try to avoid gratuitously renaming fields. In the case of where we're just extracting |
| 2494 | + // a bunch of pages from the original file and not copying any page more than once, | ||
| 2495 | + // there's no reason to do anything with the fields. Since we don't remove fields from | ||
| 2496 | + // the original file until all copy operations are completed, any foreign pages that | ||
| 2497 | + // conflict with original pages will be adjusted. If we copy any page from the original | ||
| 2498 | + // file more than once, that page would be in conflict with the previous copy of itself. | ||
| 2558 | if (other_afdh->hasAcroForm() && ((!this_file) || (!first_copy_from_orig))) { | 2499 | if (other_afdh->hasAcroForm() && ((!this_file) || (!first_copy_from_orig))) { |
| 2559 | if (!this_file) { | 2500 | if (!this_file) { |
| 2560 | QTC::TC("qpdf", "QPDFJob copy fields not this file"); | 2501 | QTC::TC("qpdf", "QPDFJob copy fields not this file"); |
| @@ -2569,8 +2510,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea | @@ -2569,8 +2510,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea | ||
| 2569 | qpdf_e_damaged_pdf, | 2510 | qpdf_e_damaged_pdf, |
| 2570 | "", | 2511 | "", |
| 2571 | 0, | 2512 | 0, |
| 2572 | - ("Exception caught while fixing copied" | ||
| 2573 | - " annotations. This may be a qpdf bug. " + | 2513 | + ("Exception caught while fixing copied annotations. This may be a qpdf " |
| 2514 | + "bug. " + | ||
| 2574 | std::string("Exception: ") + e.what())); | 2515 | std::string("Exception: ") + e.what())); |
| 2575 | } | 2516 | } |
| 2576 | } | 2517 | } |
| @@ -2585,10 +2526,9 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea | @@ -2585,10 +2526,9 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea | ||
| 2585 | pdf.getRoot().replaceKey("/PageLabels", page_labels); | 2526 | pdf.getRoot().replaceKey("/PageLabels", page_labels); |
| 2586 | } | 2527 | } |
| 2587 | 2528 | ||
| 2588 | - // Delete page objects for unused page in primary. This prevents | ||
| 2589 | - // those objects from being preserved by being referred to from | ||
| 2590 | - // other places, such as the outlines dictionary. Also make sure | ||
| 2591 | - // we keep form fields from pages we preserved. | 2529 | + // Delete page objects for unused page in primary. This prevents those objects from being |
| 2530 | + // preserved by being referred to from other places, such as the outlines dictionary. Also make | ||
| 2531 | + // sure we keep form fields from pages we preserved. | ||
| 2592 | for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno) { | 2532 | for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno) { |
| 2593 | auto page = orig_pages.at(pageno); | 2533 | auto page = orig_pages.at(pageno); |
| 2594 | if (selected_from_orig.count(QIntC::to_int(pageno))) { | 2534 | if (selected_from_orig.count(QIntC::to_int(pageno))) { |
| @@ -2676,8 +2616,8 @@ QPDFJob::maybeFixWritePassword(int R, std::string& password) | @@ -2676,8 +2616,8 @@ QPDFJob::maybeFixWritePassword(int R, std::string& password) | ||
| 2676 | std::string encoded; | 2616 | std::string encoded; |
| 2677 | if (!QUtil::utf8_to_pdf_doc(password, encoded)) { | 2617 | if (!QUtil::utf8_to_pdf_doc(password, encoded)) { |
| 2678 | QTC::TC("qpdf", "QPDFJob password not encodable"); | 2618 | QTC::TC("qpdf", "QPDFJob password not encodable"); |
| 2679 | - throw std::runtime_error("supplied password cannot be encoded for" | ||
| 2680 | - " 40-bit or 128-bit encryption formats"); | 2619 | + throw std::runtime_error("supplied password cannot be encoded for 40-bit " |
| 2620 | + "or 128-bit encryption formats"); | ||
| 2681 | } | 2621 | } |
| 2682 | password = encoded; | 2622 | password = encoded; |
| 2683 | } | 2623 | } |
| @@ -2687,31 +2627,27 @@ QPDFJob::maybeFixWritePassword(int R, std::string& password) | @@ -2687,31 +2627,27 @@ QPDFJob::maybeFixWritePassword(int R, std::string& password) | ||
| 2687 | if (QUtil::utf8_to_pdf_doc(password, encoded)) { | 2627 | if (QUtil::utf8_to_pdf_doc(password, encoded)) { |
| 2688 | QTC::TC("qpdf", "QPDFJob auto-encode password"); | 2628 | QTC::TC("qpdf", "QPDFJob auto-encode password"); |
| 2689 | doIfVerbose([&](Pipeline& v, std::string const& prefix) { | 2629 | doIfVerbose([&](Pipeline& v, std::string const& prefix) { |
| 2690 | - v << prefix << ": automatically converting Unicode" | ||
| 2691 | - << " password to single-byte encoding as" | ||
| 2692 | - << " required for 40-bit or 128-bit" | ||
| 2693 | - << " encryption\n"; | 2630 | + v << prefix |
| 2631 | + << ": automatically converting Unicode password to single-byte " | ||
| 2632 | + "encoding as required for 40-bit or 128-bit encryption\n"; | ||
| 2694 | }); | 2633 | }); |
| 2695 | password = encoded; | 2634 | password = encoded; |
| 2696 | } else { | 2635 | } else { |
| 2697 | QTC::TC("qpdf", "QPDFJob bytes fallback warning"); | 2636 | QTC::TC("qpdf", "QPDFJob bytes fallback warning"); |
| 2698 | - *m->log->getError() << m->message_prefix << ": WARNING: " | ||
| 2699 | - << "supplied password looks like a Unicode" | ||
| 2700 | - << " password with characters not allowed in" | ||
| 2701 | - << " passwords for 40-bit and 128-bit " | ||
| 2702 | - "encryption;" | ||
| 2703 | - << " most readers will not be able to open this" | ||
| 2704 | - << " file with the supplied password." | ||
| 2705 | - << " (Use --password-mode=bytes to suppress " | ||
| 2706 | - "this" | ||
| 2707 | - << " warning and use the password anyway.)\n"; | 2637 | + *m->log->getError() |
| 2638 | + << m->message_prefix | ||
| 2639 | + << ": WARNING: supplied password looks like a Unicode password with " | ||
| 2640 | + "characters not allowed in passwords for 40-bit and 128-bit " | ||
| 2641 | + "encryption; most readers will not be able to open this file with " | ||
| 2642 | + "the supplied password. (Use --password-mode=bytes to suppress this " | ||
| 2643 | + "warning and use the password anyway.)\n"; | ||
| 2708 | } | 2644 | } |
| 2709 | } else if ((R >= 5) && (!is_valid_utf8)) { | 2645 | } else if ((R >= 5) && (!is_valid_utf8)) { |
| 2710 | QTC::TC("qpdf", "QPDFJob invalid utf-8 in auto"); | 2646 | QTC::TC("qpdf", "QPDFJob invalid utf-8 in auto"); |
| 2711 | - throw std::runtime_error("supplied password is not a valid Unicode password," | ||
| 2712 | - " which is required for 256-bit encryption; to" | ||
| 2713 | - " really use this password, rerun with the" | ||
| 2714 | - " --password-mode=bytes option"); | 2647 | + throw std::runtime_error( |
| 2648 | + "supplied password is not a valid Unicode password, which is required for " | ||
| 2649 | + "256-bit encryption; to really use this password, rerun with the " | ||
| 2650 | + "--password-mode=bytes option"); | ||
| 2715 | } | 2651 | } |
| 2716 | } | 2652 | } |
| 2717 | } | 2653 | } |
| @@ -2749,16 +2685,12 @@ QPDFJob::setEncryptionOptions(QPDF& pdf, QPDFWriter& w) | @@ -2749,16 +2685,12 @@ QPDFJob::setEncryptionOptions(QPDF& pdf, QPDFWriter& w) | ||
| 2749 | if ((R < 4) || ((R == 4) && (!m->use_aes))) { | 2685 | if ((R < 4) || ((R == 4) && (!m->use_aes))) { |
| 2750 | if (!m->allow_weak_crypto) { | 2686 | if (!m->allow_weak_crypto) { |
| 2751 | QTC::TC("qpdf", "QPDFJob weak crypto error"); | 2687 | QTC::TC("qpdf", "QPDFJob weak crypto error"); |
| 2752 | - *m->log->getError() << m->message_prefix | ||
| 2753 | - << ": refusing to write a file with RC4, a weak " | ||
| 2754 | - "cryptographic " | ||
| 2755 | - "algorithm\n" | ||
| 2756 | - << "Please use 256-bit keys for better security.\n" | ||
| 2757 | - << "Pass --allow-weak-crypto to enable writing insecure " | ||
| 2758 | - "files.\n" | ||
| 2759 | - << "See also " | ||
| 2760 | - "https://qpdf.readthedocs.io/en/stable/" | ||
| 2761 | - "weak-crypto.html\n"; | 2688 | + *m->log->getError() |
| 2689 | + << m->message_prefix | ||
| 2690 | + << ": refusing to write a file with RC4, a weak cryptographic algorithm\n" | ||
| 2691 | + "Please use 256-bit keys for better security.\n" | ||
| 2692 | + "Pass --allow-weak-crypto to enable writing insecure files.\n" | ||
| 2693 | + "See also https://qpdf.readthedocs.io/en/stable/weak-crypto.html\n"; | ||
| 2762 | throw std::runtime_error("refusing to write a file with weak crypto"); | 2694 | throw std::runtime_error("refusing to write a file with weak crypto"); |
| 2763 | } | 2695 | } |
| 2764 | } | 2696 | } |
| @@ -2996,8 +2928,8 @@ QPDFJob::doSplitPages(QPDF& pdf) | @@ -2996,8 +2928,8 @@ QPDFJob::doSplitPages(QPDF& pdf) | ||
| 2996 | qpdf_e_damaged_pdf, | 2928 | qpdf_e_damaged_pdf, |
| 2997 | "", | 2929 | "", |
| 2998 | 0, | 2930 | 0, |
| 2999 | - ("Exception caught while fixing copied" | ||
| 3000 | - " annotations. This may be a qpdf bug." + | 2931 | + ("Exception caught while fixing copied annotations. This may be a qpdf " |
| 2932 | + "bug." + | ||
| 3001 | std::string("Exception: ") + e.what())); | 2933 | std::string("Exception: ") + e.what())); |
| 3002 | } | 2934 | } |
| 3003 | } | 2935 | } |
| @@ -3032,12 +2964,10 @@ QPDFJob::writeOutfile(QPDF& pdf) | @@ -3032,12 +2964,10 @@ QPDFJob::writeOutfile(QPDF& pdf) | ||
| 3032 | { | 2964 | { |
| 3033 | std::shared_ptr<char> temp_out; | 2965 | std::shared_ptr<char> temp_out; |
| 3034 | if (m->replace_input) { | 2966 | if (m->replace_input) { |
| 3035 | - // Append but don't prepend to the path to generate a | ||
| 3036 | - // temporary name. This saves us from having to split the path | ||
| 3037 | - // by directory and non-directory. | 2967 | + // Append but don't prepend to the path to generate a temporary name. This saves us from |
| 2968 | + // having to split the path by directory and non-directory. | ||
| 3038 | temp_out = QUtil::make_shared_cstr(std::string(m->infilename.get()) + ".~qpdf-temp#"); | 2969 | temp_out = QUtil::make_shared_cstr(std::string(m->infilename.get()) + ".~qpdf-temp#"); |
| 3039 | - // m->outfilename will be restored to 0 before temp_out | ||
| 3040 | - // goes out of scope. | 2970 | + // m->outfilename will be restored to 0 before temp_out goes out of scope. |
| 3041 | m->outfilename = temp_out; | 2971 | m->outfilename = temp_out; |
| 3042 | } else if (strcmp(m->outfilename.get(), "-") == 0) { | 2972 | } else if (strcmp(m->outfilename.get(), "-") == 0) { |
| 3043 | m->outfilename = nullptr; | 2973 | m->outfilename = nullptr; |
| @@ -3045,14 +2975,14 @@ QPDFJob::writeOutfile(QPDF& pdf) | @@ -3045,14 +2975,14 @@ QPDFJob::writeOutfile(QPDF& pdf) | ||
| 3045 | if (m->json_version) { | 2975 | if (m->json_version) { |
| 3046 | writeJSON(pdf); | 2976 | writeJSON(pdf); |
| 3047 | } else { | 2977 | } else { |
| 3048 | - // QPDFWriter must have block scope so the output file will be | ||
| 3049 | - // closed after write() finishes. | 2978 | + // QPDFWriter must have block scope so the output file will be closed after write() |
| 2979 | + // finishes. | ||
| 3050 | QPDFWriter w(pdf); | 2980 | QPDFWriter w(pdf); |
| 3051 | if (m->outfilename) { | 2981 | if (m->outfilename) { |
| 3052 | w.setOutputFilename(m->outfilename.get()); | 2982 | w.setOutputFilename(m->outfilename.get()); |
| 3053 | } else { | 2983 | } else { |
| 3054 | - // saveToStandardOutput has already been called, but | ||
| 3055 | - // calling it again is defensive and harmless. | 2984 | + // saveToStandardOutput has already been called, but calling it again is defensive and |
| 2985 | + // harmless. | ||
| 3056 | m->log->saveToStandardOutput(true); | 2986 | m->log->saveToStandardOutput(true); |
| 3057 | w.setOutputPipeline(m->log->getSave().get()); | 2987 | w.setOutputPipeline(m->log->getSave().get()); |
| 3058 | } | 2988 | } |
| @@ -3096,8 +3026,7 @@ QPDFJob::writeOutfile(QPDF& pdf) | @@ -3096,8 +3026,7 @@ QPDFJob::writeOutfile(QPDF& pdf) | ||
| 3096 | void | 3026 | void |
| 3097 | QPDFJob::writeJSON(QPDF& pdf) | 3027 | QPDFJob::writeJSON(QPDF& pdf) |
| 3098 | { | 3028 | { |
| 3099 | - // File pipeline must have block scope so it will be closed | ||
| 3100 | - // after write. | 3029 | + // File pipeline must have block scope so it will be closed after write. |
| 3101 | std::shared_ptr<QUtil::FileCloser> fc; | 3030 | std::shared_ptr<QUtil::FileCloser> fc; |
| 3102 | std::shared_ptr<Pipeline> fp; | 3031 | std::shared_ptr<Pipeline> fp; |
| 3103 | if (m->outfilename.get()) { | 3032 | if (m->outfilename.get()) { |
libqpdf/QPDFObjectHandle.cc
| @@ -51,8 +51,7 @@ QPDFObjectHandle::StreamDataProvider::StreamDataProvider(bool supports_retry) : | @@ -51,8 +51,7 @@ QPDFObjectHandle::StreamDataProvider::StreamDataProvider(bool supports_retry) : | ||
| 51 | 51 | ||
| 52 | QPDFObjectHandle::StreamDataProvider::~StreamDataProvider() | 52 | QPDFObjectHandle::StreamDataProvider::~StreamDataProvider() |
| 53 | { | 53 | { |
| 54 | - // Must be explicit and not inline -- see QPDF_DLL_CLASS in | ||
| 55 | - // README-maintainer | 54 | + // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer |
| 56 | } | 55 | } |
| 57 | 56 | ||
| 58 | void | 57 | void |
| @@ -155,16 +154,14 @@ QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const& token) | @@ -155,16 +154,14 @@ QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const& token) | ||
| 155 | void | 154 | void |
| 156 | QPDFObjectHandle::ParserCallbacks::handleObject(QPDFObjectHandle) | 155 | QPDFObjectHandle::ParserCallbacks::handleObject(QPDFObjectHandle) |
| 157 | { | 156 | { |
| 158 | - throw std::logic_error("You must override one of the" | ||
| 159 | - " handleObject methods in ParserCallbacks"); | 157 | + throw std::logic_error("You must override one of the handleObject methods in ParserCallbacks"); |
| 160 | } | 158 | } |
| 161 | 159 | ||
| 162 | void | 160 | void |
| 163 | QPDFObjectHandle::ParserCallbacks::handleObject(QPDFObjectHandle oh, size_t, size_t) | 161 | QPDFObjectHandle::ParserCallbacks::handleObject(QPDFObjectHandle oh, size_t, size_t) |
| 164 | { | 162 | { |
| 165 | - // This version of handleObject was added in qpdf 9. If the | ||
| 166 | - // developer did not override it, fall back to the older | ||
| 167 | - // interface. | 163 | + // This version of handleObject was added in qpdf 9. If the developer did not override it, fall |
| 164 | + // back to the older interface. | ||
| 168 | handleObject(oh); | 165 | handleObject(oh); |
| 169 | } | 166 | } |
| 170 | 167 | ||
| @@ -592,8 +589,7 @@ QPDFObjectHandle::getUIntValueAsUInt() | @@ -592,8 +589,7 @@ QPDFObjectHandle::getUIntValueAsUInt() | ||
| 592 | result = 0; | 589 | result = 0; |
| 593 | } else if (v > UINT_MAX) { | 590 | } else if (v > UINT_MAX) { |
| 594 | QTC::TC("qpdf", "QPDFObjectHandle uint returning UINT_MAX"); | 591 | QTC::TC("qpdf", "QPDFObjectHandle uint returning UINT_MAX"); |
| 595 | - warnIfPossible("requested value of unsigned integer is too big;" | ||
| 596 | - " returning UINT_MAX"); | 592 | + warnIfPossible("requested value of unsigned integer is too big; returning UINT_MAX"); |
| 597 | result = UINT_MAX; | 593 | result = UINT_MAX; |
| 598 | } else { | 594 | } else { |
| 599 | result = static_cast<unsigned int>(v); | 595 | result = static_cast<unsigned int>(v); |
| @@ -1092,11 +1088,9 @@ QPDFObjectHandle::mergeResources( | @@ -1092,11 +1088,9 @@ QPDFObjectHandle::mergeResources( | ||
| 1092 | QPDFObjectHandle this_val = getKey(rtype); | 1088 | QPDFObjectHandle this_val = getKey(rtype); |
| 1093 | if (this_val.isDictionary() && other_val.isDictionary()) { | 1089 | if (this_val.isDictionary() && other_val.isDictionary()) { |
| 1094 | if (this_val.isIndirect()) { | 1090 | if (this_val.isIndirect()) { |
| 1095 | - // Do this even if there are no keys. Various | ||
| 1096 | - // places in the code call mergeResources with | ||
| 1097 | - // resource dictionaries that contain empty | ||
| 1098 | - // subdictionaries just to get this shallow copy | ||
| 1099 | - // functionality. | 1091 | + // Do this even if there are no keys. Various places in the code call |
| 1092 | + // mergeResources with resource dictionaries that contain empty subdictionaries | ||
| 1093 | + // just to get this shallow copy functionality. | ||
| 1100 | QTC::TC("qpdf", "QPDFObjectHandle replace with copy"); | 1094 | QTC::TC("qpdf", "QPDFObjectHandle replace with copy"); |
| 1101 | this_val = replaceKeyAndGetNew(rtype, this_val.shallowCopy()); | 1095 | this_val = replaceKeyAndGetNew(rtype, this_val.shallowCopy()); |
| 1102 | } | 1096 | } |
| @@ -1476,8 +1470,7 @@ QPDFObjectHandle::arrayOrStreamToStreamArray( | @@ -1476,8 +1470,7 @@ QPDFObjectHandle::arrayOrStreamToStreamArray( | ||
| 1476 | "", | 1470 | "", |
| 1477 | description, | 1471 | description, |
| 1478 | 0, | 1472 | 0, |
| 1479 | - " object is supposed to be a stream or an" | ||
| 1480 | - " array of streams but is neither")); | 1473 | + " object is supposed to be a stream or an array of streams but is neither")); |
| 1481 | } | 1474 | } |
| 1482 | 1475 | ||
| 1483 | bool first = true; | 1476 | bool first = true; |
| @@ -1526,8 +1519,8 @@ void | @@ -1526,8 +1519,8 @@ void | ||
| 1526 | QPDFObjectHandle::rotatePage(int angle, bool relative) | 1519 | QPDFObjectHandle::rotatePage(int angle, bool relative) |
| 1527 | { | 1520 | { |
| 1528 | if ((angle % 90) != 0) { | 1521 | if ((angle % 90) != 0) { |
| 1529 | - throw std::runtime_error("QPDF::rotatePage called with an" | ||
| 1530 | - " angle that is not a multiple of 90"); | 1522 | + throw std::runtime_error( |
| 1523 | + "QPDF::rotatePage called with an angle that is not a multiple of 90"); | ||
| 1531 | } | 1524 | } |
| 1532 | int new_angle = angle; | 1525 | int new_angle = angle; |
| 1533 | if (relative) { | 1526 | if (relative) { |
| @@ -1551,8 +1544,7 @@ QPDFObjectHandle::rotatePage(int angle, bool relative) | @@ -1551,8 +1544,7 @@ QPDFObjectHandle::rotatePage(int angle, bool relative) | ||
| 1551 | new_angle += old_angle; | 1544 | new_angle += old_angle; |
| 1552 | } | 1545 | } |
| 1553 | new_angle = (new_angle + 360) % 360; | 1546 | new_angle = (new_angle + 360) % 360; |
| 1554 | - // Make this explicit even with new_angle == 0 since /Rotate can | ||
| 1555 | - // be inherited. | 1547 | + // Make this explicit even with new_angle == 0 since /Rotate can be inherited. |
| 1556 | replaceKey("/Rotate", QPDFObjectHandle::newInteger(new_angle)); | 1548 | replaceKey("/Rotate", QPDFObjectHandle::newInteger(new_angle)); |
| 1557 | } | 1549 | } |
| 1558 | 1550 | ||
| @@ -1564,15 +1556,14 @@ QPDFObjectHandle::coalesceContentStreams() | @@ -1564,15 +1556,14 @@ QPDFObjectHandle::coalesceContentStreams() | ||
| 1564 | QTC::TC("qpdf", "QPDFObjectHandle coalesce called on stream"); | 1556 | QTC::TC("qpdf", "QPDFObjectHandle coalesce called on stream"); |
| 1565 | return; | 1557 | return; |
| 1566 | } else if (!contents.isArray()) { | 1558 | } else if (!contents.isArray()) { |
| 1567 | - // /Contents is optional for pages, and some very damaged | ||
| 1568 | - // files may have pages that are invalid in other ways. | 1559 | + // /Contents is optional for pages, and some very damaged files may have pages that are |
| 1560 | + // invalid in other ways. | ||
| 1569 | return; | 1561 | return; |
| 1570 | } | 1562 | } |
| 1571 | - // Should not be possible for a page object to not have an | ||
| 1572 | - // owning PDF unless it was manually constructed in some | ||
| 1573 | - // incorrect way. However, it can happen in a PDF file whose | ||
| 1574 | - // page structure is direct, which is against spec but still | ||
| 1575 | - // possible to hand construct, as in fuzz issue 27393. | 1563 | + // Should not be possible for a page object to not have an owning PDF unless it was manually |
| 1564 | + // constructed in some incorrect way. However, it can happen in a PDF file whose page structure | ||
| 1565 | + // is direct, which is against spec but still possible to hand construct, as in fuzz issue | ||
| 1566 | + // 27393. | ||
| 1576 | QPDF& qpdf = getQPDF("coalesceContentStreams called on object with no associated PDF file"); | 1567 | QPDF& qpdf = getQPDF("coalesceContentStreams called on object with no associated PDF file"); |
| 1577 | 1568 | ||
| 1578 | QPDFObjectHandle new_contents = newStream(&qpdf); | 1569 | QPDFObjectHandle new_contents = newStream(&qpdf); |
| @@ -1808,8 +1799,8 @@ QPDFObjectHandle::parseContentStream_data( | @@ -1808,8 +1799,8 @@ QPDFObjectHandle::parseContentStream_data( | ||
| 1808 | 1799 | ||
| 1809 | callbacks->handleObject(obj, QIntC::to_size(offset), length); | 1800 | callbacks->handleObject(obj, QIntC::to_size(offset), length); |
| 1810 | if (obj.isOperator() && (obj.getOperatorValue() == "ID")) { | 1801 | if (obj.isOperator() && (obj.getOperatorValue() == "ID")) { |
| 1811 | - // Discard next character; it is the space after ID that | ||
| 1812 | - // terminated the token. Read until end of inline image. | 1802 | + // Discard next character; it is the space after ID that terminated the token. Read |
| 1803 | + // until end of inline image. | ||
| 1813 | char ch; | 1804 | char ch; |
| 1814 | input->read(&ch, 1); | 1805 | input->read(&ch, 1); |
| 1815 | tokenizer.expectInlineImage(input); | 1806 | tokenizer.expectInlineImage(input); |
| @@ -2052,8 +2043,8 @@ QPDFObjectHandle::newReserved(QPDF* qpdf) | @@ -2052,8 +2043,8 @@ QPDFObjectHandle::newReserved(QPDF* qpdf) | ||
| 2052 | void | 2043 | void |
| 2053 | QPDFObjectHandle::setObjectDescription(QPDF* owning_qpdf, std::string const& object_description) | 2044 | QPDFObjectHandle::setObjectDescription(QPDF* owning_qpdf, std::string const& object_description) |
| 2054 | { | 2045 | { |
| 2055 | - // This is called during parsing on newly created direct objects, | ||
| 2056 | - // so we can't call dereference() here. | 2046 | + // This is called during parsing on newly created direct objects, so we can't call dereference() |
| 2047 | + // here. | ||
| 2057 | if (isInitialized() && obj.get()) { | 2048 | if (isInitialized() && obj.get()) { |
| 2058 | auto descr = std::make_shared<QPDFValue::Description>(object_description); | 2049 | auto descr = std::make_shared<QPDFValue::Description>(object_description); |
| 2059 | obj->setDescription(owning_qpdf, descr); | 2050 | obj->setDescription(owning_qpdf, descr); |
| @@ -2070,8 +2061,7 @@ QPDFObjectHandle | @@ -2070,8 +2061,7 @@ QPDFObjectHandle | ||
| 2070 | QPDFObjectHandle::shallowCopy() | 2061 | QPDFObjectHandle::shallowCopy() |
| 2071 | { | 2062 | { |
| 2072 | if (!dereference()) { | 2063 | if (!dereference()) { |
| 2073 | - throw std::logic_error("operation attempted on uninitialized " | ||
| 2074 | - "QPDFObjectHandle"); | 2064 | + throw std::logic_error("operation attempted on uninitialized QPDFObjectHandle"); |
| 2075 | } | 2065 | } |
| 2076 | return QPDFObjectHandle(obj->copy()); | 2066 | return QPDFObjectHandle(obj->copy()); |
| 2077 | } | 2067 | } |
| @@ -2080,8 +2070,7 @@ QPDFObjectHandle | @@ -2080,8 +2070,7 @@ QPDFObjectHandle | ||
| 2080 | QPDFObjectHandle::unsafeShallowCopy() | 2070 | QPDFObjectHandle::unsafeShallowCopy() |
| 2081 | { | 2071 | { |
| 2082 | if (!dereference()) { | 2072 | if (!dereference()) { |
| 2083 | - throw std::logic_error("operation attempted on uninitialized " | ||
| 2084 | - "QPDFObjectHandle"); | 2073 | + throw std::logic_error("operation attempted on uninitialized QPDFObjectHandle"); |
| 2085 | } | 2074 | } |
| 2086 | return QPDFObjectHandle(obj->copy(true)); | 2075 | return QPDFObjectHandle(obj->copy(true)); |
| 2087 | } | 2076 | } |
| @@ -2094,8 +2083,7 @@ QPDFObjectHandle::makeDirect(QPDFObjGen::set& visited, bool stop_at_streams) | @@ -2094,8 +2083,7 @@ QPDFObjectHandle::makeDirect(QPDFObjGen::set& visited, bool stop_at_streams) | ||
| 2094 | auto cur_og = getObjGen(); | 2083 | auto cur_og = getObjGen(); |
| 2095 | if (!visited.add(cur_og)) { | 2084 | if (!visited.add(cur_og)) { |
| 2096 | QTC::TC("qpdf", "QPDFObjectHandle makeDirect loop"); | 2085 | QTC::TC("qpdf", "QPDFObjectHandle makeDirect loop"); |
| 2097 | - throw std::runtime_error("loop detected while converting object from " | ||
| 2098 | - "indirect to direct"); | 2086 | + throw std::runtime_error("loop detected while converting object from indirect to direct"); |
| 2099 | } | 2087 | } |
| 2100 | 2088 | ||
| 2101 | if (isBool() || isInteger() || isName() || isNull() || isReal() || isString()) { | 2089 | if (isBool() || isInteger() || isName() || isNull() || isReal() || isString()) { |
| @@ -2123,11 +2111,10 @@ QPDFObjectHandle::makeDirect(QPDFObjGen::set& visited, bool stop_at_streams) | @@ -2123,11 +2111,10 @@ QPDFObjectHandle::makeDirect(QPDFObjGen::set& visited, bool stop_at_streams) | ||
| 2123 | throw std::runtime_error("attempt to make a stream into a direct object"); | 2111 | throw std::runtime_error("attempt to make a stream into a direct object"); |
| 2124 | } | 2112 | } |
| 2125 | } else if (isReserved()) { | 2113 | } else if (isReserved()) { |
| 2126 | - throw std::logic_error("QPDFObjectHandle: attempting to make a" | ||
| 2127 | - " reserved object handle direct"); | 2114 | + throw std::logic_error( |
| 2115 | + "QPDFObjectHandle: attempting to make a reserved object handle direct"); | ||
| 2128 | } else { | 2116 | } else { |
| 2129 | - throw std::logic_error("QPDFObjectHandle::makeDirectInternal: " | ||
| 2130 | - "unknown object type"); | 2117 | + throw std::logic_error("QPDFObjectHandle::makeDirectInternal: unknown object type"); |
| 2131 | } | 2118 | } |
| 2132 | 2119 | ||
| 2133 | visited.erase(cur_og); | 2120 | visited.erase(cur_og); |
| @@ -2162,8 +2149,7 @@ void | @@ -2162,8 +2149,7 @@ void | ||
| 2162 | QPDFObjectHandle::assertInitialized() const | 2149 | QPDFObjectHandle::assertInitialized() const |
| 2163 | { | 2150 | { |
| 2164 | if (!isInitialized()) { | 2151 | if (!isInitialized()) { |
| 2165 | - throw std::logic_error("operation attempted on uninitialized " | ||
| 2166 | - "QPDFObjectHandle"); | 2152 | + throw std::logic_error("operation attempted on uninitialized QPDFObjectHandle"); |
| 2167 | } | 2153 | } |
| 2168 | } | 2154 | } |
| 2169 | 2155 | ||
| @@ -2172,8 +2158,8 @@ QPDFObjectHandle::typeWarning(char const* expected_type, std::string const& warn | @@ -2172,8 +2158,8 @@ QPDFObjectHandle::typeWarning(char const* expected_type, std::string const& warn | ||
| 2172 | { | 2158 | { |
| 2173 | QPDF* context = nullptr; | 2159 | QPDF* context = nullptr; |
| 2174 | std::string description; | 2160 | std::string description; |
| 2175 | - // Type checks above guarantee that the object has been dereferenced. | ||
| 2176 | - // Nevertheless, dereference throws exceptions in the test suite | 2161 | + // Type checks above guarantee that the object has been dereferenced. Nevertheless, dereference |
| 2162 | + // throws exceptions in the test suite | ||
| 2177 | if (!dereference()) { | 2163 | if (!dereference()) { |
| 2178 | throw std::logic_error("attempted to dereference an uninitialized QPDFObjectHandle"); | 2164 | throw std::logic_error("attempted to dereference an uninitialized QPDFObjectHandle"); |
| 2179 | } | 2165 | } |
| @@ -2376,8 +2362,8 @@ QPDFObjectHandle::checkOwnership(QPDFObjectHandle const& item) const | @@ -2376,8 +2362,8 @@ QPDFObjectHandle::checkOwnership(QPDFObjectHandle const& item) const | ||
| 2376 | auto item_qpdf = item.getOwningQPDF(); | 2362 | auto item_qpdf = item.getOwningQPDF(); |
| 2377 | if ((qpdf != nullptr) && (item_qpdf != nullptr) && (qpdf != item_qpdf)) { | 2363 | if ((qpdf != nullptr) && (item_qpdf != nullptr) && (qpdf != item_qpdf)) { |
| 2378 | QTC::TC("qpdf", "QPDFObjectHandle check ownership"); | 2364 | QTC::TC("qpdf", "QPDFObjectHandle check ownership"); |
| 2379 | - throw std::logic_error("Attempting to add an object from a different QPDF." | ||
| 2380 | - " Use QPDF::copyForeignObject to add objects from another file."); | 2365 | + throw std::logic_error("Attempting to add an object from a different QPDF. Use " |
| 2366 | + "QPDF::copyForeignObject to add objects from another file."); | ||
| 2381 | } | 2367 | } |
| 2382 | } | 2368 | } |
| 2383 | 2369 | ||
| @@ -2402,9 +2388,8 @@ QPDFObjectHandle::dereference() | @@ -2402,9 +2388,8 @@ QPDFObjectHandle::dereference() | ||
| 2402 | void | 2388 | void |
| 2403 | QPDFObjectHandle::warn(QPDF* qpdf, QPDFExc const& e) | 2389 | QPDFObjectHandle::warn(QPDF* qpdf, QPDFExc const& e) |
| 2404 | { | 2390 | { |
| 2405 | - // If parsing on behalf of a QPDF object and want to give a | ||
| 2406 | - // warning, we can warn through the object. If parsing for some | ||
| 2407 | - // other reason, such as an explicit creation of an object from a | 2391 | + // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the |
| 2392 | + // object. If parsing for some other reason, such as an explicit creation of an object from a | ||
| 2408 | // string, then just throw the exception. | 2393 | // string, then just throw the exception. |
| 2409 | if (qpdf) { | 2394 | if (qpdf) { |
| 2410 | qpdf->warn(e); | 2395 | qpdf->warn(e); |
| @@ -2596,7 +2581,8 @@ QPDFObjectHandle::getQPDF(std::string const& error_msg) const | @@ -2596,7 +2581,8 @@ QPDFObjectHandle::getQPDF(std::string const& error_msg) const | ||
| 2596 | { | 2581 | { |
| 2597 | auto result = isInitialized() ? this->obj->getQPDF() : nullptr; | 2582 | auto result = isInitialized() ? this->obj->getQPDF() : nullptr; |
| 2598 | if (result == nullptr) { | 2583 | if (result == nullptr) { |
| 2599 | - throw std::runtime_error(error_msg == "" ? "attempt to use a null qpdf object" : error_msg); | 2584 | + throw std::runtime_error( |
| 2585 | + error_msg.empty() ? "attempt to use a null qpdf object" : error_msg); | ||
| 2600 | } | 2586 | } |
| 2601 | return *result; | 2587 | return *result; |
| 2602 | } | 2588 | } |
libqpdf/QPDFPageObjectHelper.cc
| @@ -110,10 +110,8 @@ InlineImageTracker::convertIIDict(QPDFObjectHandle odict) | @@ -110,10 +110,8 @@ InlineImageTracker::convertIIDict(QPDFObjectHandle odict) | ||
| 110 | } else if (name == "/I") { | 110 | } else if (name == "/I") { |
| 111 | name = "/Indexed"; | 111 | name = "/Indexed"; |
| 112 | } else { | 112 | } else { |
| 113 | - // This is a key in the page's /Resources -> | ||
| 114 | - // /ColorSpace dictionary. We need to look it up | ||
| 115 | - // and use its value as the color space for the | ||
| 116 | - // image. | 113 | + // This is a key in the page's /Resources -> /ColorSpace dictionary. We need to |
| 114 | + // look it up and use its value as the color space for the image. | ||
| 117 | QPDFObjectHandle colorspace = resources.getKey("/ColorSpace"); | 115 | QPDFObjectHandle colorspace = resources.getKey("/ColorSpace"); |
| 118 | if (colorspace.isDictionary() && colorspace.hasKey(name)) { | 116 | if (colorspace.isDictionary() && colorspace.hasKey(name)) { |
| 119 | QTC::TC("qpdf", "QPDFPageObjectHelper colorspace lookup"); | 117 | QTC::TC("qpdf", "QPDFPageObjectHelper colorspace lookup"); |
| @@ -407,8 +405,8 @@ QPDFPageObjectHelper::externalizeInlineImages(size_t min_size, bool shallow) | @@ -407,8 +405,8 @@ QPDFPageObjectHelper::externalizeInlineImages(size_t min_size, bool shallow) | ||
| 407 | { | 405 | { |
| 408 | if (shallow) { | 406 | if (shallow) { |
| 409 | QPDFObjectHandle resources = getAttribute("/Resources", true); | 407 | QPDFObjectHandle resources = getAttribute("/Resources", true); |
| 410 | - // Calling mergeResources also ensures that /XObject becomes | ||
| 411 | - // direct and is not shared with other pages. | 408 | + // Calling mergeResources also ensures that /XObject becomes direct and is not shared with |
| 409 | + // other pages. | ||
| 412 | resources.mergeResources("<< /XObject << >> >>"_qpdf); | 410 | resources.mergeResources("<< /XObject << >> >>"_qpdf); |
| 413 | InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources); | 411 | InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources); |
| 414 | Pl_Buffer b("new page content"); | 412 | Pl_Buffer b("new page content"); |
| @@ -573,11 +571,10 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | @@ -573,11 +571,10 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | ||
| 573 | return false; | 571 | return false; |
| 574 | } | 572 | } |
| 575 | 573 | ||
| 576 | - // We will walk through /Font and /XObject dictionaries, removing | ||
| 577 | - // any resources that are not referenced. We must make copies of | ||
| 578 | - // resource dictionaries down into the dictionaries are mutating | ||
| 579 | - // to prevent mutating one dictionary from having the side effect | ||
| 580 | - // of mutating the one it was copied from. | 574 | + // We will walk through /Font and /XObject dictionaries, removing any resources that are not |
| 575 | + // referenced. We must make copies of resource dictionaries down into the dictionaries are | ||
| 576 | + // mutating to prevent mutating one dictionary from having the side effect of mutating the one | ||
| 577 | + // it was copied from. | ||
| 581 | QPDFObjectHandle resources = ph.getAttribute("/Resources", true); | 578 | QPDFObjectHandle resources = ph.getAttribute("/Resources", true); |
| 582 | std::vector<QPDFObjectHandle> rdicts; | 579 | std::vector<QPDFObjectHandle> rdicts; |
| 583 | std::set<std::string> known_names; | 580 | std::set<std::string> known_names; |
| @@ -605,33 +602,25 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | @@ -605,33 +602,25 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | ||
| 605 | } | 602 | } |
| 606 | } | 603 | } |
| 607 | } | 604 | } |
| 608 | - // Older versions of the PDF spec allowed form XObjects to omit | ||
| 609 | - // their resources dictionaries, in which case names were resolved | ||
| 610 | - // from the containing page. This behavior seems to be widely | ||
| 611 | - // supported by viewers. If a form XObjects has a resources | ||
| 612 | - // dictionary and has some unresolved names, some viewers fail to | ||
| 613 | - // resolve them, and others allow them to be inherited from the | ||
| 614 | - // page or from another form XObjects that contains them. Since | ||
| 615 | - // this behavior is inconsistent across viewers, we consider an | ||
| 616 | - // unresolved name when a resources dictionary is present to be | ||
| 617 | - // reason not to remove unreferenced resources. An unresolved name | ||
| 618 | - // in the absence of a resource dictionary is not considered a | ||
| 619 | - // problem. For form XObjects, we just accumulate a list of | ||
| 620 | - // unresolved names, and for page objects, we avoid removing any | ||
| 621 | - // such names found in nested form XObjects. | 605 | + // Older versions of the PDF spec allowed form XObjects to omit their resources dictionaries, in |
| 606 | + // which case names were resolved from the containing page. This behavior seems to be widely | ||
| 607 | + // supported by viewers. If a form XObjects has a resources dictionary and has some unresolved | ||
| 608 | + // names, some viewers fail to resolve them, and others allow them to be inherited from the page | ||
| 609 | + // or from another form XObjects that contains them. Since this behavior is inconsistent across | ||
| 610 | + // viewers, we consider an unresolved name when a resources dictionary is present to be reason | ||
| 611 | + // not to remove unreferenced resources. An unresolved name in the absence of a resource | ||
| 612 | + // dictionary is not considered a problem. For form XObjects, we just accumulate a list of | ||
| 613 | + // unresolved names, and for page objects, we avoid removing any such names found in nested form | ||
| 614 | + // XObjects. | ||
| 622 | 615 | ||
| 623 | if ((!local_unresolved.empty()) && resources.isDictionary()) { | 616 | if ((!local_unresolved.empty()) && resources.isDictionary()) { |
| 624 | - // It's not worth issuing a warning for this case. From qpdf | ||
| 625 | - // 10.3, we are hopefully only looking at names that are | ||
| 626 | - // referencing fonts and XObjects, but until we're certain | ||
| 627 | - // that we know the meaning of every name in a content stream, | ||
| 628 | - // we don't want to give warnings that might be false | ||
| 629 | - // positives. Also, this can happen in legitimate cases with | ||
| 630 | - // older PDFs, and there's nothing to be done about it, so | ||
| 631 | - // there's no good reason to issue a warning. The only sad | ||
| 632 | - // thing is that it was a false positive that alerted me to a | ||
| 633 | - // logic error in the code, and any future such errors would | ||
| 634 | - // now be hidden. | 617 | + // It's not worth issuing a warning for this case. From qpdf 10.3, we are hopefully only |
| 618 | + // looking at names that are referencing fonts and XObjects, but until we're certain that we | ||
| 619 | + // know the meaning of every name in a content stream, we don't want to give warnings that | ||
| 620 | + // might be false positives. Also, this can happen in legitimate cases with older PDFs, and | ||
| 621 | + // there's nothing to be done about it, so there's no good reason to issue a warning. The | ||
| 622 | + // only sad thing is that it was a false positive that alerted me to a logic error in the | ||
| 623 | + // code, and any future such errors would now be hidden. | ||
| 635 | QTC::TC("qpdf", "QPDFPageObjectHelper unresolved names"); | 624 | QTC::TC("qpdf", "QPDFPageObjectHelper unresolved names"); |
| 636 | return false; | 625 | return false; |
| 637 | } | 626 | } |
| @@ -639,8 +628,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | @@ -639,8 +628,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | ||
| 639 | for (auto& dict: rdicts) { | 628 | for (auto& dict: rdicts) { |
| 640 | for (auto const& key: dict.getKeys()) { | 629 | for (auto const& key: dict.getKeys()) { |
| 641 | if (is_page && unresolved.count(key)) { | 630 | if (is_page && unresolved.count(key)) { |
| 642 | - // This name is referenced by some nested form | ||
| 643 | - // xobject, so don't remove it. | 631 | + // This name is referenced by some nested form xobject, so don't remove it. |
| 644 | QTC::TC("qpdf", "QPDFPageObjectHelper resolving unresolved"); | 632 | QTC::TC("qpdf", "QPDFPageObjectHelper resolving unresolved"); |
| 645 | } else if (!rf.getNames().count(key)) { | 633 | } else if (!rf.getNames().count(key)) { |
| 646 | dict.removeKey(key); | 634 | dict.removeKey(key); |
| @@ -653,8 +641,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | @@ -653,8 +641,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | ||
| 653 | void | 641 | void |
| 654 | QPDFPageObjectHelper::removeUnreferencedResources() | 642 | QPDFPageObjectHelper::removeUnreferencedResources() |
| 655 | { | 643 | { |
| 656 | - // Accumulate a list of unresolved names across all nested form | ||
| 657 | - // XObjects. | 644 | + // Accumulate a list of unresolved names across all nested form XObjects. |
| 658 | std::set<std::string> unresolved; | 645 | std::set<std::string> unresolved; |
| 659 | bool any_failures = false; | 646 | bool any_failures = false; |
| 660 | forEachFormXObject( | 647 | forEachFormXObject( |
| @@ -724,10 +711,9 @@ QPDFPageObjectHelper::getMatrixForTransformations(bool invert) | @@ -724,10 +711,9 @@ QPDFPageObjectHelper::getMatrixForTransformations(bool invert) | ||
| 724 | QPDFObjectHandle | 711 | QPDFObjectHandle |
| 725 | QPDFPageObjectHelper::getFormXObjectForPage(bool handle_transformations) | 712 | QPDFPageObjectHelper::getFormXObjectForPage(bool handle_transformations) |
| 726 | { | 713 | { |
| 727 | - auto result = this->oh | ||
| 728 | - .getQPDF("QPDFPageObjectHelper::getFormXObjectForPage " | ||
| 729 | - "called with a direct object") | ||
| 730 | - .newStream(); | 714 | + auto result = |
| 715 | + this->oh.getQPDF("QPDFPageObjectHelper::getFormXObjectForPage called with a direct object") | ||
| 716 | + .newStream(); | ||
| 731 | QPDFObjectHandle newdict = result.getDict(); | 717 | QPDFObjectHandle newdict = result.getDict(); |
| 732 | newdict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject")); | 718 | newdict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject")); |
| 733 | newdict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Form")); | 719 | newdict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Form")); |
| @@ -759,18 +745,15 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement( | @@ -759,18 +745,15 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement( | ||
| 759 | bool allow_shrink, | 745 | bool allow_shrink, |
| 760 | bool allow_expand) | 746 | bool allow_expand) |
| 761 | { | 747 | { |
| 762 | - // Calculate the transformation matrix that will place the given | ||
| 763 | - // form XObject fully inside the given rectangle, center and | ||
| 764 | - // shrinking or expanding as needed if requested. | ||
| 765 | - | ||
| 766 | - // When rendering a form XObject, the transformation in the | ||
| 767 | - // graphics state (cm) is applied first (of course -- when it is | ||
| 768 | - // applied, the PDF interpreter doesn't even know we're going to | ||
| 769 | - // be drawing a form XObject yet), and then the object's matrix | ||
| 770 | - // (M) is applied. The resulting matrix, when applied to the form | ||
| 771 | - // XObject's bounding box, will generate a new rectangle. We want | ||
| 772 | - // to create a transformation matrix that make the form XObject's | ||
| 773 | - // bounding box land in exactly the right spot. | 748 | + // Calculate the transformation matrix that will place the given form XObject fully inside the |
| 749 | + // given rectangle, center and shrinking or expanding as needed if requested. | ||
| 750 | + | ||
| 751 | + // When rendering a form XObject, the transformation in the graphics state (cm) is applied first | ||
| 752 | + // (of course -- when it is applied, the PDF interpreter doesn't even know we're going to be | ||
| 753 | + // drawing a form XObject yet), and then the object's matrix (M) is applied. The resulting | ||
| 754 | + // matrix, when applied to the form XObject's bounding box, will generate a new rectangle. We | ||
| 755 | + // want to create a transformation matrix that make the form XObject's bounding box land in | ||
| 756 | + // exactly the right spot. | ||
| 774 | 757 | ||
| 775 | QPDFObjectHandle fdict = fo.getDict(); | 758 | QPDFObjectHandle fdict = fo.getDict(); |
| 776 | QPDFObjectHandle bbox_obj = fdict.getKey("/BBox"); | 759 | QPDFObjectHandle bbox_obj = fdict.getKey("/BBox"); |
| @@ -782,37 +765,32 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement( | @@ -782,37 +765,32 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement( | ||
| 782 | QPDFMatrix tmatrix; // "to" matrix | 765 | QPDFMatrix tmatrix; // "to" matrix |
| 783 | QPDFMatrix fmatrix; // "from" matrix | 766 | QPDFMatrix fmatrix; // "from" matrix |
| 784 | if (invert_transformations) { | 767 | if (invert_transformations) { |
| 785 | - // tmatrix inverts scaling and rotation of the destination | ||
| 786 | - // page. Applying this matrix allows the overlaid form | ||
| 787 | - // XObject's to be absolute rather than relative to properties | ||
| 788 | - // of the destination page. tmatrix is part of the computed | ||
| 789 | - // transformation matrix. | 768 | + // tmatrix inverts scaling and rotation of the destination page. Applying this matrix allows |
| 769 | + // the overlaid form XObject's to be absolute rather than relative to properties of the | ||
| 770 | + // destination page. tmatrix is part of the computed transformation matrix. | ||
| 790 | tmatrix = QPDFMatrix(getMatrixForTransformations(true)); | 771 | tmatrix = QPDFMatrix(getMatrixForTransformations(true)); |
| 791 | wmatrix.concat(tmatrix); | 772 | wmatrix.concat(tmatrix); |
| 792 | } | 773 | } |
| 793 | if (fdict.getKey("/Matrix").isMatrix()) { | 774 | if (fdict.getKey("/Matrix").isMatrix()) { |
| 794 | - // fmatrix is the transformation matrix that is applied to the | ||
| 795 | - // form XObject itself. We need this for calculations, but we | ||
| 796 | - // don't explicitly use it in the final result because the PDF | 775 | + // fmatrix is the transformation matrix that is applied to the form XObject itself. We need |
| 776 | + // this for calculations, but we don't explicitly use it in the final result because the PDF | ||
| 797 | // rendering system automatically applies this last before | 777 | // rendering system automatically applies this last before |
| 798 | // drawing the form XObject. | 778 | // drawing the form XObject. |
| 799 | fmatrix = QPDFMatrix(fdict.getKey("/Matrix").getArrayAsMatrix()); | 779 | fmatrix = QPDFMatrix(fdict.getKey("/Matrix").getArrayAsMatrix()); |
| 800 | wmatrix.concat(fmatrix); | 780 | wmatrix.concat(fmatrix); |
| 801 | } | 781 | } |
| 802 | 782 | ||
| 803 | - // The current wmatrix handles transformation from the form | ||
| 804 | - // xobject and, if requested, the destination page. Next, we have | ||
| 805 | - // to adjust this for scale and position. | 783 | + // The current wmatrix handles transformation from the form xobject and, if requested, the |
| 784 | + // destination page. Next, we have to adjust this for scale and position. | ||
| 806 | 785 | ||
| 807 | - // Step 1: figure out what scale factor we need to make the form | ||
| 808 | - // XObject's bounding box fit within the destination rectangle. | 786 | + // Step 1: figure out what scale factor we need to make the form XObject's bounding box fit |
| 787 | + // within the destination rectangle. | ||
| 809 | 788 | ||
| 810 | // Transform bounding box | 789 | // Transform bounding box |
| 811 | QPDFObjectHandle::Rectangle bbox = bbox_obj.getArrayAsRectangle(); | 790 | QPDFObjectHandle::Rectangle bbox = bbox_obj.getArrayAsRectangle(); |
| 812 | QPDFObjectHandle::Rectangle T = wmatrix.transformRectangle(bbox); | 791 | QPDFObjectHandle::Rectangle T = wmatrix.transformRectangle(bbox); |
| 813 | 792 | ||
| 814 | - // Calculate a scale factor, if needed. Shrink or expand if needed | ||
| 815 | - // and allowed. | 793 | + // Calculate a scale factor, if needed. Shrink or expand if needed and allowed. |
| 816 | if ((T.urx == T.llx) || (T.ury == T.lly)) { | 794 | if ((T.urx == T.llx) || (T.ury == T.lly)) { |
| 817 | // avoid division by zero | 795 | // avoid division by zero |
| 818 | return QPDFMatrix(); | 796 | return QPDFMatrix(); |
| @@ -834,8 +812,8 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement( | @@ -834,8 +812,8 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement( | ||
| 834 | } | 812 | } |
| 835 | } | 813 | } |
| 836 | 814 | ||
| 837 | - // Step 2: figure out what translation is required to get the | ||
| 838 | - // rectangle to the right spot: centered within the destination. | 815 | + // Step 2: figure out what translation is required to get the rectangle to the right spot: |
| 816 | + // centered within the destination. | ||
| 839 | wmatrix = QPDFMatrix(); | 817 | wmatrix = QPDFMatrix(); |
| 840 | wmatrix.scale(scale, scale); | 818 | wmatrix.scale(scale, scale); |
| 841 | wmatrix.concat(tmatrix); | 819 | wmatrix.concat(tmatrix); |
| @@ -849,9 +827,8 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement( | @@ -849,9 +827,8 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement( | ||
| 849 | double tx = r_cx - t_cx; | 827 | double tx = r_cx - t_cx; |
| 850 | double ty = r_cy - t_cy; | 828 | double ty = r_cy - t_cy; |
| 851 | 829 | ||
| 852 | - // Now we can calculate the final matrix. The final matrix does | ||
| 853 | - // not include fmatrix because that is applied automatically by | ||
| 854 | - // the PDF interpreter. | 830 | + // Now we can calculate the final matrix. The final matrix does not include fmatrix because that |
| 831 | + // is applied automatically by the PDF interpreter. | ||
| 855 | QPDFMatrix cm; | 832 | QPDFMatrix cm; |
| 856 | cm.translate(tx, ty); | 833 | cm.translate(tx, ty); |
| 857 | cm.scale(scale, scale); | 834 | cm.scale(scale, scale); |
| @@ -921,18 +898,15 @@ QPDFPageObjectHelper::flattenRotation(QPDFAcroFormDocumentHelper* afdh) | @@ -921,18 +898,15 @@ QPDFPageObjectHelper::flattenRotation(QPDFAcroFormDocumentHelper* afdh) | ||
| 921 | auto rect = box.getArrayAsRectangle(); | 898 | auto rect = box.getArrayAsRectangle(); |
| 922 | decltype(rect) new_rect; | 899 | decltype(rect) new_rect; |
| 923 | 900 | ||
| 924 | - // How far are the edges of our rectangle from the edges | ||
| 925 | - // of the media box? | 901 | + // How far are the edges of our rectangle from the edges of the media box? |
| 926 | auto left_x = rect.llx - media_rect.llx; | 902 | auto left_x = rect.llx - media_rect.llx; |
| 927 | auto right_x = media_rect.urx - rect.urx; | 903 | auto right_x = media_rect.urx - rect.urx; |
| 928 | auto bottom_y = rect.lly - media_rect.lly; | 904 | auto bottom_y = rect.lly - media_rect.lly; |
| 929 | auto top_y = media_rect.ury - rect.ury; | 905 | auto top_y = media_rect.ury - rect.ury; |
| 930 | 906 | ||
| 931 | - // Rotating the page 180 degrees does not change | ||
| 932 | - // /MediaBox. Rotating 90 or 270 degrees reverses llx and | ||
| 933 | - // lly and also reverse urx and ury. For all the other | ||
| 934 | - // boxes, we want the corners to be the correct distance | ||
| 935 | - // away from the corners of the mediabox. | 907 | + // Rotating the page 180 degrees does not change /MediaBox. Rotating 90 or 270 degrees |
| 908 | + // reverses llx and lly and also reverse urx and ury. For all the other boxes, we want the | ||
| 909 | + // corners to be the correct distance away from the corners of the mediabox. | ||
| 936 | switch (rotate) { | 910 | switch (rotate) { |
| 937 | case 90: | 911 | case 90: |
| 938 | new_rect.llx = media_rect.lly + bottom_y; | 912 | new_rect.llx = media_rect.lly + bottom_y; |
| @@ -963,9 +937,8 @@ QPDFPageObjectHelper::flattenRotation(QPDFAcroFormDocumentHelper* afdh) | @@ -963,9 +937,8 @@ QPDFPageObjectHelper::flattenRotation(QPDFAcroFormDocumentHelper* afdh) | ||
| 963 | this->oh.replaceKey(boxkey, QPDFObjectHandle::newFromRectangle(new_rect)); | 937 | this->oh.replaceKey(boxkey, QPDFObjectHandle::newFromRectangle(new_rect)); |
| 964 | } | 938 | } |
| 965 | 939 | ||
| 966 | - // When we rotate the page, pivot about the point 0, 0 and then | ||
| 967 | - // translate so the page is visible with the origin point being | ||
| 968 | - // the same offset from the lower left corner of the media box. | 940 | + // When we rotate the page, pivot about the point 0, 0 and then translate so the page is visible |
| 941 | + // with the origin point being the same offset from the lower left corner of the media box. | ||
| 969 | // These calculations have been verified empirically with various | 942 | // These calculations have been verified empirically with various |
| 970 | // PDF readers. | 943 | // PDF readers. |
| 971 | QPDFMatrix cm(0, 0, 0, 0, 0, 0); | 944 | QPDFMatrix cm(0, 0, 0, 0, 0, 0); |
libqpdf/QPDFParser.cc
| @@ -41,12 +41,10 @@ namespace | @@ -41,12 +41,10 @@ namespace | ||
| 41 | QPDFObjectHandle | 41 | QPDFObjectHandle |
| 42 | QPDFParser::parse(bool& empty, bool content_stream) | 42 | QPDFParser::parse(bool& empty, bool content_stream) |
| 43 | { | 43 | { |
| 44 | - // This method must take care not to resolve any objects. Don't | ||
| 45 | - // check the type of any object without first ensuring that it is | ||
| 46 | - // a direct object. Otherwise, doing so may have the side effect | ||
| 47 | - // of reading the object and changing the file pointer. If you do | ||
| 48 | - // this, it will cause a logic error to be thrown from | ||
| 49 | - // QPDF::inParse(). | 44 | + // This method must take care not to resolve any objects. Don't check the type of any object |
| 45 | + // without first ensuring that it is a direct object. Otherwise, doing so may have the side | ||
| 46 | + // effect of reading the object and changing the file pointer. If you do this, it will cause a | ||
| 47 | + // logic error to be thrown from QPDF::inParse(). | ||
| 50 | 48 | ||
| 51 | const static std::shared_ptr<QPDFObject> null_oh = QPDF_Null::create(); | 49 | const static std::shared_ptr<QPDFObject> null_oh = QPDF_Null::create(); |
| 52 | QPDF::ParseGuard pg(context); | 50 | QPDF::ParseGuard pg(context); |
| @@ -193,18 +191,16 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -193,18 +191,16 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 193 | !olist.at(size - 2)->getObjGen().isIndirect()) { | 191 | !olist.at(size - 2)->getObjGen().isIndirect()) { |
| 194 | if (context == nullptr) { | 192 | if (context == nullptr) { |
| 195 | QTC::TC("qpdf", "QPDFParser indirect without context"); | 193 | QTC::TC("qpdf", "QPDFParser indirect without context"); |
| 196 | - throw std::logic_error("QPDFObjectHandle::parse called without context" | ||
| 197 | - " on an object with indirect references"); | 194 | + throw std::logic_error("QPDFObjectHandle::parse called without context on " |
| 195 | + "an object with indirect references"); | ||
| 198 | } | 196 | } |
| 199 | auto ref_og = QPDFObjGen( | 197 | auto ref_og = QPDFObjGen( |
| 200 | QPDFObjectHandle(olist.at(size - 2)).getIntValueAsInt(), | 198 | QPDFObjectHandle(olist.at(size - 2)).getIntValueAsInt(), |
| 201 | QPDFObjectHandle(olist.back()).getIntValueAsInt()); | 199 | QPDFObjectHandle(olist.back()).getIntValueAsInt()); |
| 202 | if (ref_og.isIndirect()) { | 200 | if (ref_og.isIndirect()) { |
| 203 | - // This action has the desirable side effect | ||
| 204 | - // of causing dangling references (references | ||
| 205 | - // to indirect objects that don't appear in | ||
| 206 | - // the PDF) in any parsed object to appear in | ||
| 207 | - // the object cache. | 201 | + // This action has the desirable side effect of causing dangling references |
| 202 | + // (references to indirect objects that don't appear in the PDF) in any | ||
| 203 | + // parsed object to appear in the object cache. | ||
| 208 | object = context->getObject(ref_og).obj; | 204 | object = context->getObject(ref_og).obj; |
| 209 | indirect_ref = true; | 205 | indirect_ref = true; |
| 210 | } else { | 206 | } else { |
| @@ -214,16 +210,14 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -214,16 +210,14 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 214 | olist.pop_back(); | 210 | olist.pop_back(); |
| 215 | olist.pop_back(); | 211 | olist.pop_back(); |
| 216 | } else if ((value == "endobj") && (state == st_top)) { | 212 | } else if ((value == "endobj") && (state == st_top)) { |
| 217 | - // We just saw endobj without having read | ||
| 218 | - // anything. Treat this as a null and do not move | ||
| 219 | - // the input source's offset. | 213 | + // We just saw endobj without having read anything. Treat this as a null and do |
| 214 | + // not move the input source's offset. | ||
| 220 | is_null = true; | 215 | is_null = true; |
| 221 | input->seek(input->getLastOffset(), SEEK_SET); | 216 | input->seek(input->getLastOffset(), SEEK_SET); |
| 222 | empty = true; | 217 | empty = true; |
| 223 | } else { | 218 | } else { |
| 224 | QTC::TC("qpdf", "QPDFParser treat word as string"); | 219 | QTC::TC("qpdf", "QPDFParser treat word as string"); |
| 225 | - warn("unknown token while reading object;" | ||
| 226 | - " treating as string"); | 220 | + warn("unknown token while reading object; treating as string"); |
| 227 | bad = true; | 221 | bad = true; |
| 228 | object = QPDF_String::create(value); | 222 | object = QPDF_String::create(value); |
| 229 | } | 223 | } |
| @@ -250,8 +244,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -250,8 +244,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 250 | break; | 244 | break; |
| 251 | 245 | ||
| 252 | default: | 246 | default: |
| 253 | - warn("treating unknown token type as null while " | ||
| 254 | - "reading object"); | 247 | + warn("treating unknown token type as null while reading object"); |
| 255 | bad = true; | 248 | bad = true; |
| 256 | is_null = true; | 249 | is_null = true; |
| 257 | break; | 250 | break; |
| @@ -259,8 +252,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -259,8 +252,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 259 | 252 | ||
| 260 | if (object == nullptr && !is_null && | 253 | if (object == nullptr && !is_null && |
| 261 | (!((state == st_start) || (state == st_stop) || (state == st_eof)))) { | 254 | (!((state == st_start) || (state == st_stop) || (state == st_eof)))) { |
| 262 | - throw std::logic_error("QPDFObjectHandle::parseInternal: " | ||
| 263 | - "unexpected uninitialized object"); | 255 | + throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object"); |
| 264 | is_null = true; | 256 | is_null = true; |
| 265 | } | 257 | } |
| 266 | 258 | ||
| @@ -274,8 +266,8 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -274,8 +266,8 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 274 | } | 266 | } |
| 275 | } | 267 | } |
| 276 | if (bad_count > 5) { | 268 | if (bad_count > 5) { |
| 277 | - // We had too many consecutive errors without enough | ||
| 278 | - // intervening successful objects. Give up. | 269 | + // We had too many consecutive errors without enough intervening successful objects. |
| 270 | + // Give up. | ||
| 279 | warn("too many errors; giving up on reading object"); | 271 | warn("too many errors; giving up on reading object"); |
| 280 | state = st_top; | 272 | state = st_top; |
| 281 | is_null = true; | 273 | is_null = true; |
| @@ -287,8 +279,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -287,8 +279,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 287 | warn("parse error while reading object"); | 279 | warn("parse error while reading object"); |
| 288 | } | 280 | } |
| 289 | done = true; | 281 | done = true; |
| 290 | - // In content stream mode, leave object uninitialized to | ||
| 291 | - // indicate EOF | 282 | + // In content stream mode, leave object uninitialized to indicate EOF |
| 292 | if (!content_stream) { | 283 | if (!content_stream) { |
| 293 | is_null = true; | 284 | is_null = true; |
| 294 | } | 285 | } |
| @@ -298,8 +289,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -298,8 +289,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 298 | case st_array: | 289 | case st_array: |
| 299 | if (is_null) { | 290 | if (is_null) { |
| 300 | object = null_oh; | 291 | object = null_oh; |
| 301 | - // No need to set description for direct nulls - they probably | ||
| 302 | - // will become implicit. | 292 | + // No need to set description for direct nulls - they probably will become implicit. |
| 303 | } else if (!indirect_ref) { | 293 | } else if (!indirect_ref) { |
| 304 | setDescription(object, input->getLastOffset()); | 294 | setDescription(object, input->getLastOffset()); |
| 305 | } | 295 | } |
| @@ -316,23 +306,22 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -316,23 +306,22 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 316 | 306 | ||
| 317 | case st_stop: | 307 | case st_stop: |
| 318 | if ((state_stack.size() < 2) || (stack.size() < 2)) { | 308 | if ((state_stack.size() < 2) || (stack.size() < 2)) { |
| 319 | - throw std::logic_error("QPDFObjectHandle::parseInternal: st_stop encountered" | ||
| 320 | - " with insufficient elements in stack"); | 309 | + throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with " |
| 310 | + "insufficient elements in stack"); | ||
| 321 | } | 311 | } |
| 322 | parser_state_e old_state = state_stack.back(); | 312 | parser_state_e old_state = state_stack.back(); |
| 323 | state_stack.pop_back(); | 313 | state_stack.pop_back(); |
| 324 | if (old_state == st_array) { | 314 | if (old_state == st_array) { |
| 325 | object = QPDF_Array::create(std::move(olist), frame.null_count > 100); | 315 | object = QPDF_Array::create(std::move(olist), frame.null_count > 100); |
| 326 | setDescription(object, offset - 1); | 316 | setDescription(object, offset - 1); |
| 327 | - // The `offset` points to the next of "[". Set the rewind | ||
| 328 | - // offset to point to the beginning of "[". This has been | ||
| 329 | - // explicitly tested with whitespace surrounding the array start | ||
| 330 | - // delimiter. getLastOffset points to the array end token and | ||
| 331 | - // therefore can't be used here. | 317 | + // The `offset` points to the next of "[". Set the rewind offset to point to the |
| 318 | + // beginning of "[". This has been explicitly tested with whitespace surrounding the | ||
| 319 | + // array start delimiter. getLastOffset points to the array end token and therefore | ||
| 320 | + // can't be used here. | ||
| 332 | set_offset = true; | 321 | set_offset = true; |
| 333 | } else if (old_state == st_dictionary) { | 322 | } else if (old_state == st_dictionary) { |
| 334 | - // Convert list to map. Alternating elements are keys. Attempt | ||
| 335 | - // to recover more or less gracefully from invalid dictionaries. | 323 | + // Convert list to map. Alternating elements are keys. Attempt to recover more or |
| 324 | + // less gracefully from invalid dictionaries. | ||
| 336 | std::set<std::string> names; | 325 | std::set<std::string> names; |
| 337 | for (auto& obj: olist) { | 326 | for (auto& obj: olist) { |
| 338 | if (obj) { | 327 | if (obj) { |
| @@ -358,8 +347,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -358,8 +347,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 358 | } | 347 | } |
| 359 | warn( | 348 | warn( |
| 360 | offset, | 349 | offset, |
| 361 | - "expected dictionary key but found" | ||
| 362 | - " non-name object; inserting key " + | 350 | + "expected dictionary key but found non-name object; inserting key " + |
| 363 | key); | 351 | key); |
| 364 | } | 352 | } |
| 365 | if (dict.count(key) > 0) { | 353 | if (dict.count(key) > 0) { |
| @@ -367,8 +355,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -367,8 +355,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 367 | warn( | 355 | warn( |
| 368 | offset, | 356 | offset, |
| 369 | "dictionary has duplicated key " + key + | 357 | "dictionary has duplicated key " + key + |
| 370 | - "; last occurrence overrides earlier " | ||
| 371 | - "ones"); | 358 | + "; last occurrence overrides earlier ones"); |
| 372 | } | 359 | } |
| 373 | 360 | ||
| 374 | // Calculate value. | 361 | // Calculate value. |
| @@ -380,8 +367,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -380,8 +367,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 380 | QTC::TC("qpdf", "QPDFParser no val for last key"); | 367 | QTC::TC("qpdf", "QPDFParser no val for last key"); |
| 381 | warn( | 368 | warn( |
| 382 | offset, | 369 | offset, |
| 383 | - "dictionary ended prematurely; " | ||
| 384 | - "using null as value for last key"); | 370 | + "dictionary ended prematurely; using null as value for last key"); |
| 385 | val = QPDF_Null::create(); | 371 | val = QPDF_Null::create(); |
| 386 | } | 372 | } |
| 387 | 373 | ||
| @@ -395,11 +381,10 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -395,11 +381,10 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 395 | } | 381 | } |
| 396 | object = QPDF_Dictionary::create(std::move(dict)); | 382 | object = QPDF_Dictionary::create(std::move(dict)); |
| 397 | setDescription(object, offset - 2); | 383 | setDescription(object, offset - 2); |
| 398 | - // The `offset` points to the next of "<<". Set the rewind | ||
| 399 | - // offset to point to the beginning of "<<". This has been | ||
| 400 | - // explicitly tested with whitespace surrounding the dictionary | ||
| 401 | - // start delimiter. getLastOffset points to the dictionary end | ||
| 402 | - // token and therefore can't be used here. | 384 | + // The `offset` points to the next of "<<". Set the rewind offset to point to the |
| 385 | + // beginning of "<<". This has been explicitly tested with whitespace surrounding | ||
| 386 | + // the dictionary start delimiter. getLastOffset points to the dictionary end token | ||
| 387 | + // and therefore can't be used here. | ||
| 403 | set_offset = true; | 388 | set_offset = true; |
| 404 | } | 389 | } |
| 405 | stack.pop_back(); | 390 | stack.pop_back(); |
| @@ -431,9 +416,8 @@ QPDFParser::setDescription(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parse | @@ -431,9 +416,8 @@ QPDFParser::setDescription(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parse | ||
| 431 | void | 416 | void |
| 432 | QPDFParser::warn(QPDFExc const& e) const | 417 | QPDFParser::warn(QPDFExc const& e) const |
| 433 | { | 418 | { |
| 434 | - // If parsing on behalf of a QPDF object and want to give a | ||
| 435 | - // warning, we can warn through the object. If parsing for some | ||
| 436 | - // other reason, such as an explicit creation of an object from a | 419 | + // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the |
| 420 | + // object. If parsing for some other reason, such as an explicit creation of an object from a | ||
| 437 | // string, then just throw the exception. | 421 | // string, then just throw the exception. |
| 438 | if (context) { | 422 | if (context) { |
| 439 | context->warn(e); | 423 | context->warn(e); |
libqpdf/QPDFTokenizer.cc
| 1 | #include <qpdf/QPDFTokenizer.hh> | 1 | #include <qpdf/QPDFTokenizer.hh> |
| 2 | 2 | ||
| 3 | -// DO NOT USE ctype -- it is locale dependent for some things, and | ||
| 4 | -// it's not worth the risk of including it in case it may accidentally | ||
| 5 | -// be used. | 3 | +// DO NOT USE ctype -- it is locale dependent for some things, and it's not worth the risk of |
| 4 | +// including it in case it may accidentally be used. | ||
| 6 | 5 | ||
| 7 | #include <qpdf/QIntC.hh> | 6 | #include <qpdf/QIntC.hh> |
| 8 | #include <qpdf/QPDFExc.hh> | 7 | #include <qpdf/QPDFExc.hh> |
| @@ -45,8 +44,8 @@ namespace | @@ -45,8 +44,8 @@ namespace | ||
| 45 | bool | 44 | bool |
| 46 | QPDFWordTokenFinder::check() | 45 | QPDFWordTokenFinder::check() |
| 47 | { | 46 | { |
| 48 | - // Find a word token matching the given string, preceded by a | ||
| 49 | - // delimiter, and followed by a delimiter or EOF. | 47 | + // Find a word token matching the given string, preceded by a delimiter, and followed by a |
| 48 | + // delimiter or EOF. | ||
| 50 | QPDFTokenizer tokenizer; | 49 | QPDFTokenizer tokenizer; |
| 51 | QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true); | 50 | QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true); |
| 52 | qpdf_offset_t pos = is->tell(); | 51 | qpdf_offset_t pos = is->tell(); |
| @@ -68,8 +67,7 @@ QPDFWordTokenFinder::check() | @@ -68,8 +67,7 @@ QPDFWordTokenFinder::check() | ||
| 68 | return false; | 67 | return false; |
| 69 | } | 68 | } |
| 70 | if (token_start == 0) { | 69 | if (token_start == 0) { |
| 71 | - // Can't actually happen...we never start the search at the | ||
| 72 | - // beginning of the input. | 70 | + // Can't actually happen...we never start the search at the beginning of the input. |
| 73 | return false; | 71 | return false; |
| 74 | } | 72 | } |
| 75 | return true; | 73 | return true; |
| @@ -147,9 +145,9 @@ QPDFTokenizer::presentCharacter(char ch) | @@ -147,9 +145,9 @@ QPDFTokenizer::presentCharacter(char ch) | ||
| 147 | void | 145 | void |
| 148 | QPDFTokenizer::handleCharacter(char ch) | 146 | QPDFTokenizer::handleCharacter(char ch) |
| 149 | { | 147 | { |
| 150 | - // State machine is implemented such that the final character may not be | ||
| 151 | - // handled. This happens whenever you have to use a character from the | ||
| 152 | - // next token to detect the end of the current token. | 148 | + // State machine is implemented such that the final character may not be handled. This happens |
| 149 | + // whenever you have to use a character from the next token to detect the end of the current | ||
| 150 | + // token. | ||
| 153 | 151 | ||
| 154 | switch (this->state) { | 152 | switch (this->state) { |
| 155 | case st_top: | 153 | case st_top: |
| @@ -248,15 +246,14 @@ QPDFTokenizer::handleCharacter(char ch) | @@ -248,15 +246,14 @@ QPDFTokenizer::handleCharacter(char ch) | ||
| 248 | void | 246 | void |
| 249 | QPDFTokenizer::inTokenReady(char ch) | 247 | QPDFTokenizer::inTokenReady(char ch) |
| 250 | { | 248 | { |
| 251 | - throw std::logic_error("INTERNAL ERROR: QPDF tokenizer presented character " | ||
| 252 | - "while token is waiting"); | 249 | + throw std::logic_error( |
| 250 | + "INTERNAL ERROR: QPDF tokenizer presented character while token is waiting"); | ||
| 253 | } | 251 | } |
| 254 | 252 | ||
| 255 | void | 253 | void |
| 256 | QPDFTokenizer::inBeforeToken(char ch) | 254 | QPDFTokenizer::inBeforeToken(char ch) |
| 257 | { | 255 | { |
| 258 | - // Note: we specifically do not use ctype here. It is | ||
| 259 | - // locale-dependent. | 256 | + // Note: we specifically do not use ctype here. It is locale-dependent. |
| 260 | if (isSpace(ch)) { | 257 | if (isSpace(ch)) { |
| 261 | this->before_token = !this->include_ignorable; | 258 | this->before_token = !this->include_ignorable; |
| 262 | this->in_token = this->include_ignorable; | 259 | this->in_token = this->include_ignorable; |
| @@ -421,11 +418,9 @@ void | @@ -421,11 +418,9 @@ void | ||
| 421 | QPDFTokenizer::inName(char ch) | 418 | QPDFTokenizer::inName(char ch) |
| 422 | { | 419 | { |
| 423 | if (isDelimiter(ch)) { | 420 | if (isDelimiter(ch)) { |
| 424 | - // A C-locale whitespace character or delimiter terminates | ||
| 425 | - // token. It is important to unread the whitespace | ||
| 426 | - // character even though it is ignored since it may be the | ||
| 427 | - // newline after a stream keyword. Removing it here could | ||
| 428 | - // make the stream-reading code break on some files, | 421 | + // A C-locale whitespace character or delimiter terminates token. It is important to unread |
| 422 | + // the whitespace character even though it is ignored since it may be the newline after a | ||
| 423 | + // stream keyword. Removing it here could make the stream-reading code break on some files, | ||
| 429 | // though not on any files in the test suite as of this | 424 | // though not on any files in the test suite as of this |
| 430 | // writing. | 425 | // writing. |
| 431 | 426 | ||
| @@ -452,8 +447,7 @@ QPDFTokenizer::inNameHex1(char ch) | @@ -452,8 +447,7 @@ QPDFTokenizer::inNameHex1(char ch) | ||
| 452 | } else { | 447 | } else { |
| 453 | QTC::TC("qpdf", "QPDFTokenizer bad name 1"); | 448 | QTC::TC("qpdf", "QPDFTokenizer bad name 1"); |
| 454 | this->error_message = "name with stray # will not work with PDF >= 1.2"; | 449 | this->error_message = "name with stray # will not work with PDF >= 1.2"; |
| 455 | - // Use null to encode a bad # -- this is reversed | ||
| 456 | - // in QPDF_Name::normalizeName. | 450 | + // Use null to encode a bad # -- this is reversed in QPDF_Name::normalizeName. |
| 457 | this->val += '\0'; | 451 | this->val += '\0'; |
| 458 | this->state = st_name; | 452 | this->state = st_name; |
| 459 | inName(ch); | 453 | inName(ch); |
| @@ -468,8 +462,7 @@ QPDFTokenizer::inNameHex2(char ch) | @@ -468,8 +462,7 @@ QPDFTokenizer::inNameHex2(char ch) | ||
| 468 | } else { | 462 | } else { |
| 469 | QTC::TC("qpdf", "QPDFTokenizer bad name 2"); | 463 | QTC::TC("qpdf", "QPDFTokenizer bad name 2"); |
| 470 | this->error_message = "name with stray # will not work with PDF >= 1.2"; | 464 | this->error_message = "name with stray # will not work with PDF >= 1.2"; |
| 471 | - // Use null to encode a bad # -- this is reversed | ||
| 472 | - // in QPDF_Name::normalizeName. | 465 | + // Use null to encode a bad # -- this is reversed in QPDF_Name::normalizeName. |
| 473 | this->val += '\0'; | 466 | this->val += '\0'; |
| 474 | this->val += this->hex_char; | 467 | this->val += this->hex_char; |
| 475 | this->state = st_name; | 468 | this->state = st_name; |
| @@ -636,13 +629,10 @@ void | @@ -636,13 +629,10 @@ void | ||
| 636 | QPDFTokenizer::inLiteral(char ch) | 629 | QPDFTokenizer::inLiteral(char ch) |
| 637 | { | 630 | { |
| 638 | if (isDelimiter(ch)) { | 631 | if (isDelimiter(ch)) { |
| 639 | - // A C-locale whitespace character or delimiter terminates | ||
| 640 | - // token. It is important to unread the whitespace | ||
| 641 | - // character even though it is ignored since it may be the | ||
| 642 | - // newline after a stream keyword. Removing it here could | ||
| 643 | - // make the stream-reading code break on some files, | ||
| 644 | - // though not on any files in the test suite as of this | ||
| 645 | - // writing. | 632 | + // A C-locale whitespace character or delimiter terminates token. It is important to unread |
| 633 | + // the whitespace character even though it is ignored since it may be the newline after a | ||
| 634 | + // stream keyword. Removing it here could make the stream-reading code break on some files, | ||
| 635 | + // though not on any files in the test suite as of this writing. | ||
| 646 | 636 | ||
| 647 | this->in_token = false; | 637 | this->in_token = false; |
| 648 | this->char_to_unread = ch; | 638 | this->char_to_unread = ch; |
| @@ -707,8 +697,7 @@ QPDFTokenizer::inCharCode(char ch) | @@ -707,8 +697,7 @@ QPDFTokenizer::inCharCode(char ch) | ||
| 707 | if (++(this->digit_count) < 3) { | 697 | if (++(this->digit_count) < 3) { |
| 708 | return; | 698 | return; |
| 709 | } | 699 | } |
| 710 | - // We've accumulated \ddd. PDF Spec says to ignore | ||
| 711 | - // high-order overflow. | 700 | + // We've accumulated \ddd. PDF Spec says to ignore high-order overflow. |
| 712 | } | 701 | } |
| 713 | this->val += char(this->char_code % 256); | 702 | this->val += char(this->char_code % 256); |
| 714 | this->state = st_in_string; | 703 | this->state = st_in_string; |
| @@ -739,8 +728,7 @@ QPDFTokenizer::presentEOF() | @@ -739,8 +728,7 @@ QPDFTokenizer::presentEOF() | ||
| 739 | case st_decimal: | 728 | case st_decimal: |
| 740 | case st_literal: | 729 | case st_literal: |
| 741 | QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token"); | 730 | QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token"); |
| 742 | - // Push any delimiter to the state machine to finish off the final | ||
| 743 | - // token. | 731 | + // Push any delimiter to the state machine to finish off the final token. |
| 744 | presentCharacter('\f'); | 732 | presentCharacter('\f'); |
| 745 | this->in_token = true; | 733 | this->in_token = true; |
| 746 | break; | 734 | break; |
| @@ -794,14 +782,12 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input) | @@ -794,14 +782,12 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input) | ||
| 794 | qpdf_offset_t last_offset = input->getLastOffset(); | 782 | qpdf_offset_t last_offset = input->getLastOffset(); |
| 795 | qpdf_offset_t pos = input->tell(); | 783 | qpdf_offset_t pos = input->tell(); |
| 796 | 784 | ||
| 797 | - // Use QPDFWordTokenFinder to find EI surrounded by delimiters. | ||
| 798 | - // Then read the next several tokens or up to EOF. If we find any | ||
| 799 | - // suspicious-looking or tokens, this is probably still part of | ||
| 800 | - // the image data, so keep looking for EI. Stop at the first EI | ||
| 801 | - // that passes. If we get to the end without finding one, return | ||
| 802 | - // the last EI we found. Store the number of bytes expected in the | ||
| 803 | - // inline image including the EI and use that to break out of | ||
| 804 | - // inline image, falling back to the old method if needed. | 785 | + // Use QPDFWordTokenFinder to find EI surrounded by delimiters. Then read the next several |
| 786 | + // tokens or up to EOF. If we find any suspicious-looking or tokens, this is probably still part | ||
| 787 | + // of the image data, so keep looking for EI. Stop at the first EI that passes. If we get to the | ||
| 788 | + // end without finding one, return the last EI we found. Store the number of bytes expected in | ||
| 789 | + // the inline image including the EI and use that to break out of inline image, falling back to | ||
| 790 | + // the old method if needed. | ||
| 805 | 791 | ||
| 806 | bool okay = false; | 792 | bool okay = false; |
| 807 | bool first_try = true; | 793 | bool first_try = true; |
| @@ -814,13 +800,11 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input) | @@ -814,13 +800,11 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input) | ||
| 814 | 800 | ||
| 815 | QPDFTokenizer check; | 801 | QPDFTokenizer check; |
| 816 | bool found_bad = false; | 802 | bool found_bad = false; |
| 817 | - // Look at the next 10 tokens or up to EOF. The next inline | ||
| 818 | - // image's image data would look like bad tokens, but there | ||
| 819 | - // will always be at least 10 tokens between one inline | ||
| 820 | - // image's EI and the next valid one's ID since width, height, | ||
| 821 | - // bits per pixel, and color space are all required as well as | ||
| 822 | - // a BI and ID. If we get 10 good tokens in a row or hit EOF, | ||
| 823 | - // we can be pretty sure we've found the actual EI. | 803 | + // Look at the next 10 tokens or up to EOF. The next inline image's image data would look |
| 804 | + // like bad tokens, but there will always be at least 10 tokens between one inline image's | ||
| 805 | + // EI and the next valid one's ID since width, height, bits per pixel, and color space are | ||
| 806 | + // all required as well as a BI and ID. If we get 10 good tokens in a row or hit EOF, we can | ||
| 807 | + // be pretty sure we've found the actual EI. | ||
| 824 | for (int i = 0; i < 10; ++i) { | 808 | for (int i = 0; i < 10; ++i) { |
| 825 | QPDFTokenizer::Token t = check.readToken(input, "checker", true); | 809 | QPDFTokenizer::Token t = check.readToken(input, "checker", true); |
| 826 | token_type_e type = t.getType(); | 810 | token_type_e type = t.getType(); |
| @@ -829,27 +813,22 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input) | @@ -829,27 +813,22 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input) | ||
| 829 | } else if (type == tt_bad) { | 813 | } else if (type == tt_bad) { |
| 830 | found_bad = true; | 814 | found_bad = true; |
| 831 | } else if (t.isWord()) { | 815 | } else if (t.isWord()) { |
| 832 | - // The qpdf tokenizer lumps alphabetic and otherwise | ||
| 833 | - // uncategorized characters into "words". We recognize | ||
| 834 | - // strings of alphabetic characters as potential valid | ||
| 835 | - // operators for purposes of telling whether we're in | ||
| 836 | - // valid content or not. It's not perfect, but it | ||
| 837 | - // should work more reliably than what we used to do, | ||
| 838 | - // which was already good enough for the vast majority | ||
| 839 | - // of files. | 816 | + // The qpdf tokenizer lumps alphabetic and otherwise uncategorized characters into |
| 817 | + // "words". We recognize strings of alphabetic characters as potential valid | ||
| 818 | + // operators for purposes of telling whether we're in valid content or not. It's not | ||
| 819 | + // perfect, but it should work more reliably than what we used to do, which was | ||
| 820 | + // already good enough for the vast majority of files. | ||
| 840 | bool found_alpha = false; | 821 | bool found_alpha = false; |
| 841 | bool found_non_printable = false; | 822 | bool found_non_printable = false; |
| 842 | bool found_other = false; | 823 | bool found_other = false; |
| 843 | for (char ch: t.getValue()) { | 824 | for (char ch: t.getValue()) { |
| 844 | if (((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')) || | 825 | if (((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')) || |
| 845 | (ch == '*')) { | 826 | (ch == '*')) { |
| 846 | - // Treat '*' as alpha since there are valid | ||
| 847 | - // PDF operators that contain * along with | ||
| 848 | - // alphabetic characters. | 827 | + // Treat '*' as alpha since there are valid PDF operators that contain * |
| 828 | + // along with alphabetic characters. | ||
| 849 | found_alpha = true; | 829 | found_alpha = true; |
| 850 | } else if ((static_cast<signed char>(ch) < 32) && (!isSpace(ch))) { | 830 | } else if ((static_cast<signed char>(ch) < 32) && (!isSpace(ch))) { |
| 851 | - // Compare ch as a signed char so characters | ||
| 852 | - // outside of 7-bit will be < 0. | 831 | + // Compare ch as a signed char so characters outside of 7-bit will be < 0. |
| 853 | found_non_printable = true; | 832 | found_non_printable = true; |
| 854 | break; | 833 | break; |
| 855 | } else { | 834 | } else { |
| @@ -903,9 +882,9 @@ QPDFTokenizer::betweenTokens() | @@ -903,9 +882,9 @@ QPDFTokenizer::betweenTokens() | ||
| 903 | 882 | ||
| 904 | QPDFTokenizer::Token | 883 | QPDFTokenizer::Token |
| 905 | QPDFTokenizer::readToken( | 884 | QPDFTokenizer::readToken( |
| 906 | - std::shared_ptr<InputSource> input, std::string const& context, bool allow_bad, size_t max_len) | 885 | + InputSource& input, std::string const& context, bool allow_bad, size_t max_len) |
| 907 | { | 886 | { |
| 908 | - nextToken(*input, context, max_len); | 887 | + nextToken(input, context, max_len); |
| 909 | 888 | ||
| 910 | Token token; | 889 | Token token; |
| 911 | bool unread_char; | 890 | bool unread_char; |
| @@ -918,15 +897,22 @@ QPDFTokenizer::readToken( | @@ -918,15 +897,22 @@ QPDFTokenizer::readToken( | ||
| 918 | } else { | 897 | } else { |
| 919 | throw QPDFExc( | 898 | throw QPDFExc( |
| 920 | qpdf_e_damaged_pdf, | 899 | qpdf_e_damaged_pdf, |
| 921 | - input->getName(), | 900 | + input.getName(), |
| 922 | context, | 901 | context, |
| 923 | - input->getLastOffset(), | 902 | + input.getLastOffset(), |
| 924 | token.getErrorMessage()); | 903 | token.getErrorMessage()); |
| 925 | } | 904 | } |
| 926 | } | 905 | } |
| 927 | return token; | 906 | return token; |
| 928 | } | 907 | } |
| 929 | 908 | ||
| 909 | +QPDFTokenizer::Token | ||
| 910 | +QPDFTokenizer::readToken( | ||
| 911 | + std::shared_ptr<InputSource> input, std::string const& context, bool allow_bad, size_t max_len) | ||
| 912 | +{ | ||
| 913 | + return readToken(*input, context, allow_bad, max_len); | ||
| 914 | +} | ||
| 915 | + | ||
| 930 | bool | 916 | bool |
| 931 | QPDFTokenizer::nextToken(InputSource& input, std::string const& context, size_t max_len) | 917 | QPDFTokenizer::nextToken(InputSource& input, std::string const& context, size_t max_len) |
| 932 | { | 918 | { |
| @@ -941,9 +927,8 @@ QPDFTokenizer::nextToken(InputSource& input, std::string const& context, size_t | @@ -941,9 +927,8 @@ QPDFTokenizer::nextToken(InputSource& input, std::string const& context, size_t | ||
| 941 | presentEOF(); | 927 | presentEOF(); |
| 942 | 928 | ||
| 943 | if ((this->type == tt_eof) && (!this->allow_eof)) { | 929 | if ((this->type == tt_eof) && (!this->allow_eof)) { |
| 944 | - // Nothing in the qpdf library calls readToken | ||
| 945 | - // without allowEOF anymore, so this case is not | ||
| 946 | - // exercised. | 930 | + // Nothing in the qpdf library calls readToken without allowEOF anymore, so this |
| 931 | + // case is not exercised. | ||
| 947 | this->type = tt_bad; | 932 | this->type = tt_bad; |
| 948 | this->error_message = "unexpected EOF"; | 933 | this->error_message = "unexpected EOF"; |
| 949 | offset = input.getLastOffset(); | 934 | offset = input.getLastOffset(); |
libqpdf/QPDF_Stream.cc
| @@ -69,10 +69,9 @@ namespace | @@ -69,10 +69,9 @@ namespace | ||
| 69 | } // namespace | 69 | } // namespace |
| 70 | 70 | ||
| 71 | std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = { | 71 | std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = { |
| 72 | - // The PDF specification provides these filter abbreviations for | ||
| 73 | - // use in inline images, but according to table H.1 in the pre-ISO | ||
| 74 | - // versions of the PDF specification, Adobe Reader also accepts | ||
| 75 | - // them for stream filters. | 72 | + // The PDF specification provides these filter abbreviations for use in inline images, but |
| 73 | + // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader also | ||
| 74 | + // accepts them for stream filters. | ||
| 76 | {"/AHx", "/ASCIIHexDecode"}, | 75 | {"/AHx", "/ASCIIHexDecode"}, |
| 77 | {"/A85", "/ASCII85Decode"}, | 76 | {"/A85", "/ASCII85Decode"}, |
| 78 | {"/LZW", "/LZWDecode"}, | 77 | {"/LZW", "/LZWDecode"}, |
| @@ -118,8 +117,8 @@ QPDF_Stream::QPDF_Stream( | @@ -118,8 +117,8 @@ QPDF_Stream::QPDF_Stream( | ||
| 118 | length(length) | 117 | length(length) |
| 119 | { | 118 | { |
| 120 | if (!stream_dict.isDictionary()) { | 119 | if (!stream_dict.isDictionary()) { |
| 121 | - throw std::logic_error("stream object instantiated with non-dictionary " | ||
| 122 | - "object for dictionary"); | 120 | + throw std::logic_error( |
| 121 | + "stream object instantiated with non-dictionary object for dictionary"); | ||
| 123 | } | 122 | } |
| 124 | auto descr = std::make_shared<QPDFValue::Description>( | 123 | auto descr = std::make_shared<QPDFValue::Description>( |
| 125 | qpdf->getFilename() + ", stream object " + og.unparse(' ')); | 124 | qpdf->getFilename() + ", stream object " + og.unparse(' ')); |
| @@ -198,18 +197,18 @@ QPDF_Stream::getStreamJSON( | @@ -198,18 +197,18 @@ QPDF_Stream::getStreamJSON( | ||
| 198 | case qpdf_sj_none: | 197 | case qpdf_sj_none: |
| 199 | case qpdf_sj_inline: | 198 | case qpdf_sj_inline: |
| 200 | if (p != nullptr) { | 199 | if (p != nullptr) { |
| 201 | - throw std::logic_error("QPDF_Stream::getStreamJSON: pipeline should " | ||
| 202 | - "only be supplied when json_data is file"); | 200 | + throw std::logic_error("QPDF_Stream::getStreamJSON: pipeline should only be supplied " |
| 201 | + "when json_data is file"); | ||
| 203 | } | 202 | } |
| 204 | break; | 203 | break; |
| 205 | case qpdf_sj_file: | 204 | case qpdf_sj_file: |
| 206 | if (p == nullptr) { | 205 | if (p == nullptr) { |
| 207 | - throw std::logic_error("QPDF_Stream::getStreamJSON: pipeline must " | ||
| 208 | - "be supplied when json_data is file"); | 206 | + throw std::logic_error( |
| 207 | + "QPDF_Stream::getStreamJSON: pipeline must be supplied when json_data is file"); | ||
| 209 | } | 208 | } |
| 210 | if (data_filename.empty()) { | 209 | if (data_filename.empty()) { |
| 211 | - throw std::logic_error("QPDF_Stream::getStreamJSON: data_filename " | ||
| 212 | - "must be supplied when json_data is file"); | 210 | + throw std::logic_error("QPDF_Stream::getStreamJSON: data_filename must be supplied " |
| 211 | + "when json_data is file"); | ||
| 213 | } | 212 | } |
| 214 | break; | 213 | break; |
| 215 | } | 214 | } |
| @@ -244,8 +243,7 @@ QPDF_Stream::getStreamJSON( | @@ -244,8 +243,7 @@ QPDF_Stream::getStreamJSON( | ||
| 244 | break; | 243 | break; |
| 245 | } | 244 | } |
| 246 | } | 245 | } |
| 247 | - // We can use unsafeShallowCopy because we are only | ||
| 248 | - // touching top-level keys. | 246 | + // We can use unsafeShallowCopy because we are only touching top-level keys. |
| 249 | dict = this->stream_dict.unsafeShallowCopy(); | 247 | dict = this->stream_dict.unsafeShallowCopy(); |
| 250 | dict.removeKey("/Length"); | 248 | dict.removeKey("/Length"); |
| 251 | if (filter && filtered) { | 249 | if (filter && filtered) { |
| @@ -408,8 +406,7 @@ QPDF_Stream::filterable( | @@ -408,8 +406,7 @@ QPDF_Stream::filterable( | ||
| 408 | return false; | 406 | return false; |
| 409 | } | 407 | } |
| 410 | 408 | ||
| 411 | - // filters now contains a list of filters to be applied in order. | ||
| 412 | - // See which ones we can support. | 409 | + // filters now contains a list of filters to be applied in order. See which ones we can support. |
| 413 | 410 | ||
| 414 | // See if we can support any decode parameters that are specified. | 411 | // See if we can support any decode parameters that are specified. |
| 415 | 412 | ||
| @@ -428,9 +425,8 @@ QPDF_Stream::filterable( | @@ -428,9 +425,8 @@ QPDF_Stream::filterable( | ||
| 428 | } | 425 | } |
| 429 | } | 426 | } |
| 430 | 427 | ||
| 431 | - // Ignore /DecodeParms entirely if /Filters is empty. At least | ||
| 432 | - // one case of a file whose /DecodeParms was [ << >> ] when | ||
| 433 | - // /Filters was empty has been seen in the wild. | 428 | + // Ignore /DecodeParms entirely if /Filters is empty. At least one case of a file whose |
| 429 | + // /DecodeParms was [ << >> ] when /Filters was empty has been seen in the wild. | ||
| 434 | if ((filters.size() != 0) && (decode_parms.size() != filters.size())) { | 430 | if ((filters.size() != 0) && (decode_parms.size() != filters.size())) { |
| 435 | warn("stream /DecodeParms length is inconsistent with filters"); | 431 | warn("stream /DecodeParms length is inconsistent with filters"); |
| 436 | filterable = false; | 432 | filterable = false; |
| @@ -502,9 +498,8 @@ QPDF_Stream::pipeStreamData( | @@ -502,9 +498,8 @@ QPDF_Stream::pipeStreamData( | ||
| 502 | return filter; | 498 | return filter; |
| 503 | } | 499 | } |
| 504 | 500 | ||
| 505 | - // Construct the pipeline in reverse order. Force pipelines we | ||
| 506 | - // create to be deleted when this function finishes. Pipelines | ||
| 507 | - // created by QPDFStreamFilter objects will be deleted by those | 501 | + // Construct the pipeline in reverse order. Force pipelines we create to be deleted when this |
| 502 | + // function finishes. Pipelines created by QPDFStreamFilter objects will be deleted by those | ||
| 508 | // objects. | 503 | // objects. |
| 509 | std::vector<std::shared_ptr<Pipeline>> to_delete; | 504 | std::vector<std::shared_ptr<Pipeline>> to_delete; |
| 510 | 505 | ||
| @@ -568,8 +563,8 @@ QPDF_Stream::pipeStreamData( | @@ -568,8 +563,8 @@ QPDF_Stream::pipeStreamData( | ||
| 568 | QTC::TC("qpdf", "QPDF_Stream pipe use stream provider"); | 563 | QTC::TC("qpdf", "QPDF_Stream pipe use stream provider"); |
| 569 | } else { | 564 | } else { |
| 570 | QTC::TC("qpdf", "QPDF_Stream provider length mismatch"); | 565 | QTC::TC("qpdf", "QPDF_Stream provider length mismatch"); |
| 571 | - // This would be caused by programmer error on the | ||
| 572 | - // part of a library user, not by invalid input data. | 566 | + // This would be caused by programmer error on the part of a library user, not by |
| 567 | + // invalid input data. | ||
| 573 | throw std::runtime_error( | 568 | throw std::runtime_error( |
| 574 | "stream data provider for " + og.unparse(' ') + " provided " + | 569 | "stream data provider for " + og.unparse(' ') + " provided " + |
| 575 | std::to_string(actual_length) + " bytes instead of expected " + | 570 | std::to_string(actual_length) + " bytes instead of expected " + |
| @@ -602,14 +597,13 @@ QPDF_Stream::pipeStreamData( | @@ -602,14 +597,13 @@ QPDF_Stream::pipeStreamData( | ||
| 602 | warn("content normalization encountered bad tokens"); | 597 | warn("content normalization encountered bad tokens"); |
| 603 | if (normalizer->lastTokenWasBad()) { | 598 | if (normalizer->lastTokenWasBad()) { |
| 604 | QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize"); | 599 | QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize"); |
| 605 | - warn("normalized content ended with a bad token; you may be able " | ||
| 606 | - "to resolve this by coalescing content streams in combination " | ||
| 607 | - "with normalizing content. From the command line, specify " | ||
| 608 | - "--coalesce-contents"); | 600 | + warn("normalized content ended with a bad token; you may be able to resolve this by " |
| 601 | + "coalescing content streams in combination with normalizing content. From the " | ||
| 602 | + "command line, specify --coalesce-contents"); | ||
| 609 | } | 603 | } |
| 610 | - warn("Resulting stream data may be corrupted but is may still useful " | ||
| 611 | - "for manual inspection. For more information on this warning, " | ||
| 612 | - "search for content normalization in the manual."); | 604 | + warn("Resulting stream data may be corrupted but is may still useful for manual " |
| 605 | + "inspection. For more information on this warning, search for content normalization " | ||
| 606 | + "in the manual."); | ||
| 613 | } | 607 | } |
| 614 | 608 | ||
| 615 | return success; | 609 | return success; |
libqpdf/QPDF_encryption.cc
| @@ -137,9 +137,8 @@ pad_or_truncate_password_V4(std::string const& password, char k1[key_bytes]) | @@ -137,9 +137,8 @@ pad_or_truncate_password_V4(std::string const& password, char k1[key_bytes]) | ||
| 137 | void | 137 | void |
| 138 | QPDF::trim_user_password(std::string& user_password) | 138 | QPDF::trim_user_password(std::string& user_password) |
| 139 | { | 139 | { |
| 140 | - // Although unnecessary, this routine trims the padding string | ||
| 141 | - // from the end of a user password. Its only purpose is for | ||
| 142 | - // recovery of user passwords which is done in the test suite. | 140 | + // Although unnecessary, this routine trims the padding string from the end of a user password. |
| 141 | + // Its only purpose is for recovery of user passwords which is done in the test suite. | ||
| 143 | char const* cstr = user_password.c_str(); | 142 | char const* cstr = user_password.c_str(); |
| 144 | size_t len = user_password.length(); | 143 | size_t len = user_password.length(); |
| 145 | if (len < key_bytes) { | 144 | if (len < key_bytes) { |
| @@ -262,22 +261,17 @@ hash_V5( | @@ -262,22 +261,17 @@ hash_V5( | ||
| 262 | int round_number = 0; | 261 | int round_number = 0; |
| 263 | bool done = false; | 262 | bool done = false; |
| 264 | while (!done) { | 263 | while (!done) { |
| 265 | - // The hash algorithm has us setting K initially to the R5 | ||
| 266 | - // value and then repeating a series of steps 64 times | ||
| 267 | - // before starting with the termination case testing. The | ||
| 268 | - // wording of the specification is very unclear as to the | ||
| 269 | - // exact number of times it should be run since the | ||
| 270 | - // wording about whether the initial setup counts as round | ||
| 271 | - // 0 or not is ambiguous. This code counts the initial | ||
| 272 | - // setup (R5) value as round 0, which appears to be | ||
| 273 | - // correct. This was determined to be correct by | ||
| 274 | - // increasing or decreasing the number of rounds by 1 or 2 | ||
| 275 | - // from this value and generating 20 test files. In this | ||
| 276 | - // interpretation, all the test files worked with Adobe | ||
| 277 | - // Reader X. In the other configurations, many of the | ||
| 278 | - // files did not work, and we were accurately able to | ||
| 279 | - // predict which files didn't work by looking at the | ||
| 280 | - // conditions under which we terminated repetition. | 264 | + // The hash algorithm has us setting K initially to the R5 value and then repeating a |
| 265 | + // series of steps 64 times before starting with the termination case testing. The | ||
| 266 | + // wording of the specification is very unclear as to the exact number of times it | ||
| 267 | + // should be run since the wording about whether the initial setup counts as round 0 or | ||
| 268 | + // not is ambiguous. This code counts the initial setup (R5) value as round 0, which | ||
| 269 | + // appears to be correct. This was determined to be correct by increasing or decreasing | ||
| 270 | + // the number of rounds by 1 or 2 from this value and generating 20 test files. In this | ||
| 271 | + // interpretation, all the test files worked with Adobe Reader X. In the other | ||
| 272 | + // configurations, many of the files did not work, and we were accurately able to | ||
| 273 | + // predict which files didn't work by looking at the conditions under which we | ||
| 274 | + // terminated repetition. | ||
| 281 | 275 | ||
| 282 | ++round_number; | 276 | ++round_number; |
| 283 | std::string K1 = password + K + udata; | 277 | std::string K1 = password + K + udata; |
| @@ -291,11 +285,10 @@ hash_V5( | @@ -291,11 +285,10 @@ hash_V5( | ||
| 291 | QUtil::unsigned_char_pointer(K.substr(16, 16)), | 285 | QUtil::unsigned_char_pointer(K.substr(16, 16)), |
| 292 | 16); | 286 | 16); |
| 293 | 287 | ||
| 294 | - // E_mod_3 is supposed to be mod 3 of the first 16 bytes | ||
| 295 | - // of E taken as as a (128-bit) big-endian number. Since | ||
| 296 | - // (xy mod n) is equal to ((x mod n) + (y mod n)) mod n | ||
| 297 | - // and since 256 mod n is 1, we can just take the sums of | ||
| 298 | - // the the mod 3s of each byte to get the same result. | 288 | + // E_mod_3 is supposed to be mod 3 of the first 16 bytes of E taken as as a (128-bit) |
| 289 | + // big-endian number. Since (xy mod n) is equal to ((x mod n) + (y mod n)) mod n and | ||
| 290 | + // since 256 mod n is 1, we can just take the sums of the the mod 3s of each byte to get | ||
| 291 | + // the same result. | ||
| 299 | int E_mod_3 = 0; | 292 | int E_mod_3 = 0; |
| 300 | for (unsigned int i = 0; i < 16; ++i) { | 293 | for (unsigned int i = 0; i < 16; ++i) { |
| 301 | E_mod_3 += static_cast<unsigned char>(E.at(i)); | 294 | E_mod_3 += static_cast<unsigned char>(E.at(i)); |
| @@ -344,8 +337,7 @@ QPDF::compute_data_key( | @@ -344,8 +337,7 @@ QPDF::compute_data_key( | ||
| 344 | std::string result = encryption_key; | 337 | std::string result = encryption_key; |
| 345 | 338 | ||
| 346 | if (encryption_V >= 5) { | 339 | if (encryption_V >= 5) { |
| 347 | - // Algorithm 3.1a (PDF 1.7 extension level 3): just use | ||
| 348 | - // encryption key straight. | 340 | + // Algorithm 3.1a (PDF 1.7 extension level 3): just use encryption key straight. |
| 349 | return result; | 341 | return result; |
| 350 | } | 342 | } |
| 351 | 343 | ||
| @@ -370,9 +362,8 @@ std::string | @@ -370,9 +362,8 @@ std::string | ||
| 370 | QPDF::compute_encryption_key(std::string const& password, EncryptionData const& data) | 362 | QPDF::compute_encryption_key(std::string const& password, EncryptionData const& data) |
| 371 | { | 363 | { |
| 372 | if (data.getV() >= 5) { | 364 | if (data.getV() >= 5) { |
| 373 | - // For V >= 5, the encryption key is generated and stored in | ||
| 374 | - // the file, encrypted separately with both user and owner | ||
| 375 | - // passwords. | 365 | + // For V >= 5, the encryption key is generated and stored in the file, encrypted separately |
| 366 | + // with both user and owner passwords. | ||
| 376 | return recover_encryption_key_with_password(password, data); | 367 | return recover_encryption_key_with_password(password, data); |
| 377 | } else { | 368 | } else { |
| 378 | // For V < 5, the encryption key is derived from the user | 369 | // For V < 5, the encryption key is derived from the user |
| @@ -386,12 +377,10 @@ QPDF::compute_encryption_key_from_password(std::string const& password, Encrypti | @@ -386,12 +377,10 @@ QPDF::compute_encryption_key_from_password(std::string const& password, Encrypti | ||
| 386 | { | 377 | { |
| 387 | // Algorithm 3.2 from the PDF 1.7 Reference Manual | 378 | // Algorithm 3.2 from the PDF 1.7 Reference Manual |
| 388 | 379 | ||
| 389 | - // This code does not properly handle Unicode passwords. | ||
| 390 | - // Passwords are supposed to be converted from OS codepage | ||
| 391 | - // characters to PDFDocEncoding. Unicode passwords are supposed | ||
| 392 | - // to be converted to OS codepage before converting to | ||
| 393 | - // PDFDocEncoding. We instead require the password to be | ||
| 394 | - // presented in its final form. | 380 | + // This code does not properly handle Unicode passwords. Passwords are supposed to be converted |
| 381 | + // from OS codepage characters to PDFDocEncoding. Unicode passwords are supposed to be | ||
| 382 | + // converted to OS codepage before converting to PDFDocEncoding. We instead require the | ||
| 383 | + // password to be presented in its final form. | ||
| 395 | 384 | ||
| 396 | MD5 md5; | 385 | MD5 md5; |
| 397 | md5.encodeDataIncrementally(pad_or_truncate_password_V4(password).c_str(), key_bytes); | 386 | md5.encodeDataIncrementally(pad_or_truncate_password_V4(password).c_str(), key_bytes); |
| @@ -681,11 +670,9 @@ QPDF::recover_encryption_key_with_password( | @@ -681,11 +670,9 @@ QPDF::recover_encryption_key_with_password( | ||
| 681 | { | 670 | { |
| 682 | // Algorithm 3.2a from the PDF 1.7 extension level 3 | 671 | // Algorithm 3.2a from the PDF 1.7 extension level 3 |
| 683 | 672 | ||
| 684 | - // This code does not handle Unicode passwords correctly. | ||
| 685 | - // Empirical evidence suggests that most viewers don't. We are | ||
| 686 | - // supposed to process the input string with the SASLprep (RFC | ||
| 687 | - // 4013) profile of stringprep (RFC 3454) and then convert the | ||
| 688 | - // result to UTF-8. | 673 | + // This code does not handle Unicode passwords correctly. Empirical evidence suggests that most |
| 674 | + // viewers don't. We are supposed to process the input string with the SASLprep (RFC 4013) | ||
| 675 | + // profile of stringprep (RFC 3454) and then convert the result to UTF-8. | ||
| 689 | 676 | ||
| 690 | perms_valid = false; | 677 | perms_valid = false; |
| 691 | std::string key_password = truncate_password_V5(password); | 678 | std::string key_password = truncate_password_V5(password); |
| @@ -738,18 +725,16 @@ QPDF::initializeEncryption() | @@ -738,18 +725,16 @@ QPDF::initializeEncryption() | ||
| 738 | } | 725 | } |
| 739 | m->encp->encryption_initialized = true; | 726 | m->encp->encryption_initialized = true; |
| 740 | 727 | ||
| 741 | - // After we initialize encryption parameters, we must used stored | ||
| 742 | - // key information and never look at /Encrypt again. Otherwise, | ||
| 743 | - // things could go wrong if someone mutates the encryption | 728 | + // After we initialize encryption parameters, we must use stored key information and never look |
| 729 | + // at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption | ||
| 744 | // dictionary. | 730 | // dictionary. |
| 745 | 731 | ||
| 746 | if (!m->trailer.hasKey("/Encrypt")) { | 732 | if (!m->trailer.hasKey("/Encrypt")) { |
| 747 | return; | 733 | return; |
| 748 | } | 734 | } |
| 749 | 735 | ||
| 750 | - // Go ahead and set m->encrypted here. That way, isEncrypted | ||
| 751 | - // will return true even if there were errors reading the | ||
| 752 | - // encryption dictionary. | 736 | + // Go ahead and set m->encrypted here. That way, isEncrypted will return true even if there |
| 737 | + // were errors reading the encryption dictionary. | ||
| 753 | m->encp->encrypted = true; | 738 | m->encp->encrypted = true; |
| 754 | 739 | ||
| 755 | std::string id1; | 740 | std::string id1; |
| @@ -757,9 +742,8 @@ QPDF::initializeEncryption() | @@ -757,9 +742,8 @@ QPDF::initializeEncryption() | ||
| 757 | if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) { | 742 | if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) { |
| 758 | id1 = id_obj.getArrayItem(0).getStringValue(); | 743 | id1 = id_obj.getArrayItem(0).getStringValue(); |
| 759 | } else { | 744 | } else { |
| 760 | - // Treating a missing ID as the empty string enables qpdf to | ||
| 761 | - // decrypt some invalid encrypted files with no /ID that | ||
| 762 | - // poppler can read but Adobe Reader can't. | 745 | + // Treating a missing ID as the empty string enables qpdf to decrypt some invalid encrypted |
| 746 | + // files with no /ID that poppler can read but Adobe Reader can't. | ||
| 763 | warn(damagedPDF("trailer", "invalid /ID in trailer dictionary")); | 747 | warn(damagedPDF("trailer", "invalid /ID in trailer dictionary")); |
| 764 | } | 748 | } |
| 765 | 749 | ||
| @@ -800,8 +784,8 @@ QPDF::initializeEncryption() | @@ -800,8 +784,8 @@ QPDF::initializeEncryption() | ||
| 800 | std::string U = encryption_dict.getKey("/U").getStringValue(); | 784 | std::string U = encryption_dict.getKey("/U").getStringValue(); |
| 801 | int P = static_cast<int>(encryption_dict.getKey("/P").getIntValue()); | 785 | int P = static_cast<int>(encryption_dict.getKey("/P").getIntValue()); |
| 802 | 786 | ||
| 803 | - // If supporting new encryption R/V values, remember to update | ||
| 804 | - // error message inside this if statement. | 787 | + // If supporting new encryption R/V values, remember to update error message inside this if |
| 788 | + // statement. | ||
| 805 | if (!(((R >= 2) && (R <= 6)) && ((V == 1) || (V == 2) || (V == 4) || (V == 5)))) { | 789 | if (!(((R >= 2) && (R <= 6)) && ((V == 1) || (V == 2) || (V == 4) || (V == 5)))) { |
| 806 | throw QPDFExc( | 790 | throw QPDFExc( |
| 807 | qpdf_e_unsupported, | 791 | qpdf_e_unsupported, |
| @@ -893,8 +877,7 @@ QPDF::initializeEncryption() | @@ -893,8 +877,7 @@ QPDF::initializeEncryption() | ||
| 893 | QTC::TC("qpdf", "QPDF_encryption CFM AESV3"); | 877 | QTC::TC("qpdf", "QPDF_encryption CFM AESV3"); |
| 894 | method = e_aesv3; | 878 | method = e_aesv3; |
| 895 | } else { | 879 | } else { |
| 896 | - // Don't complain now -- maybe we won't need | ||
| 897 | - // to reference this type. | 880 | + // Don't complain now -- maybe we won't need to reference this type. |
| 898 | method = e_unknown; | 881 | method = e_unknown; |
| 899 | } | 882 | } |
| 900 | } | 883 | } |
| @@ -908,20 +891,15 @@ QPDF::initializeEncryption() | @@ -908,20 +891,15 @@ QPDF::initializeEncryption() | ||
| 908 | m->encp->cf_stream = interpretCF(m->encp, StmF); | 891 | m->encp->cf_stream = interpretCF(m->encp, StmF); |
| 909 | m->encp->cf_string = interpretCF(m->encp, StrF); | 892 | m->encp->cf_string = interpretCF(m->encp, StrF); |
| 910 | if (EFF.isName()) { | 893 | if (EFF.isName()) { |
| 911 | - // qpdf does not use this for anything other than | ||
| 912 | - // informational purposes. This is intended to instruct | ||
| 913 | - // conforming writers on which crypt filter should be used | ||
| 914 | - // when new file attachments are added to a PDF file, but | ||
| 915 | - // qpdf never generates encrypted files with non-default | ||
| 916 | - // crypt filters. Prior to 10.2, I was under the mistaken | ||
| 917 | - // impression that this was supposed to be used for | ||
| 918 | - // decrypting attachments, but the code was wrong in a way | ||
| 919 | - // that turns out not to have mattered because no writers | ||
| 920 | - // were generating files the way I was imagining. Still, | ||
| 921 | - // providing this information could be useful when looking | ||
| 922 | - // at a file generated by something else, such as Acrobat | ||
| 923 | - // when specifying that only attachments should be | ||
| 924 | - // encrypted. | 894 | + // qpdf does not use this for anything other than informational purposes. This is |
| 895 | + // intended to instruct conforming writers on which crypt filter should be used when new | ||
| 896 | + // file attachments are added to a PDF file, but qpdf never generates encrypted files | ||
| 897 | + // with non-default crypt filters. Prior to 10.2, I was under the mistaken impression | ||
| 898 | + // that this was supposed to be used for decrypting attachments, but the code was wrong | ||
| 899 | + // in a way that turns out not to have mattered because no writers were generating files | ||
| 900 | + // the way I was imagining. Still, providing this information could be useful when | ||
| 901 | + // looking at a file generated by something else, such as Acrobat when specifying that | ||
| 902 | + // only attachments should be encrypted. | ||
| 925 | m->encp->cf_file = interpretCF(m->encp, EFF); | 903 | m->encp->cf_file = interpretCF(m->encp, EFF); |
| 926 | } else { | 904 | } else { |
| 927 | m->encp->cf_file = m->encp->cf_stream; | 905 | m->encp->cf_file = m->encp->cf_stream; |
| @@ -935,8 +913,7 @@ QPDF::initializeEncryption() | @@ -935,8 +913,7 @@ QPDF::initializeEncryption() | ||
| 935 | m->encp->owner_password_matched = | 913 | m->encp->owner_password_matched = |
| 936 | check_owner_password(m->encp->user_password, m->encp->provided_password, data); | 914 | check_owner_password(m->encp->user_password, m->encp->provided_password, data); |
| 937 | if (m->encp->owner_password_matched && (V < 5)) { | 915 | if (m->encp->owner_password_matched && (V < 5)) { |
| 938 | - // password supplied was owner password; user_password has | ||
| 939 | - // been initialized for V < 5 | 916 | + // password supplied was owner password; user_password has been initialized for V < 5 |
| 940 | if (getTrimmedUserPassword() == m->encp->provided_password) { | 917 | if (getTrimmedUserPassword() == m->encp->provided_password) { |
| 941 | m->encp->user_password_matched = true; | 918 | m->encp->user_password_matched = true; |
| 942 | QTC::TC("qpdf", "QPDF_encryption user matches owner V < 5"); | 919 | QTC::TC("qpdf", "QPDF_encryption user matches owner V < 5"); |
| @@ -958,14 +935,12 @@ QPDF::initializeEncryption() | @@ -958,14 +935,12 @@ QPDF::initializeEncryption() | ||
| 958 | if (m->provided_password_is_hex_key) { | 935 | if (m->provided_password_is_hex_key) { |
| 959 | m->encp->encryption_key = QUtil::hex_decode(m->encp->provided_password); | 936 | m->encp->encryption_key = QUtil::hex_decode(m->encp->provided_password); |
| 960 | } else if (V < 5) { | 937 | } else if (V < 5) { |
| 961 | - // For V < 5, the user password is encrypted with the owner | ||
| 962 | - // password, and the user password is always used for | ||
| 963 | - // computing the encryption key. | 938 | + // For V < 5, the user password is encrypted with the owner password, and the user password |
| 939 | + // is always used for computing the encryption key. | ||
| 964 | m->encp->encryption_key = compute_encryption_key(m->encp->user_password, data); | 940 | m->encp->encryption_key = compute_encryption_key(m->encp->user_password, data); |
| 965 | } else { | 941 | } else { |
| 966 | - // For V >= 5, either password can be used independently to | ||
| 967 | - // compute the encryption key, and neither password can be | ||
| 968 | - // used to recover the other. | 942 | + // For V >= 5, either password can be used independently to compute the encryption key, and |
| 943 | + // neither password can be used to recover the other. | ||
| 969 | bool perms_valid; | 944 | bool perms_valid; |
| 970 | m->encp->encryption_key = | 945 | m->encp->encryption_key = |
| 971 | recover_encryption_key_with_password(m->encp->provided_password, data, perms_valid); | 946 | recover_encryption_key_with_password(m->encp->provided_password, data, perms_valid); |
| @@ -1026,8 +1001,7 @@ QPDF::decryptString(std::string& str, QPDFObjGen const& og) | @@ -1026,8 +1001,7 @@ QPDF::decryptString(std::string& str, QPDFObjGen const& og) | ||
| 1026 | default: | 1001 | default: |
| 1027 | warn(damagedPDF("unknown encryption filter for strings (check /StrF in " | 1002 | warn(damagedPDF("unknown encryption filter for strings (check /StrF in " |
| 1028 | "/Encrypt dictionary); strings may be decrypted improperly")); | 1003 | "/Encrypt dictionary); strings may be decrypted improperly")); |
| 1029 | - // To avoid repeated warnings, reset cf_string. Assume | ||
| 1030 | - // we'd want to use AES if V == 4. | 1004 | + // To avoid repeated warnings, reset cf_string. Assume we'd want to use AES if V == 4. |
| 1031 | m->encp->cf_string = e_aes; | 1005 | m->encp->cf_string = e_aes; |
| 1032 | use_aes = true; | 1006 | use_aes = true; |
| 1033 | break; | 1007 | break; |
| @@ -1052,8 +1026,8 @@ QPDF::decryptString(std::string& str, QPDFObjGen const& og) | @@ -1052,8 +1026,8 @@ QPDF::decryptString(std::string& str, QPDFObjGen const& og) | ||
| 1052 | } else { | 1026 | } else { |
| 1053 | QTC::TC("qpdf", "QPDF_encryption rc4 decode string"); | 1027 | QTC::TC("qpdf", "QPDF_encryption rc4 decode string"); |
| 1054 | size_t vlen = str.length(); | 1028 | size_t vlen = str.length(); |
| 1055 | - // Using std::shared_ptr guarantees that tmp will | ||
| 1056 | - // be freed even if rc4.process throws an exception. | 1029 | + // Using std::shared_ptr guarantees that tmp will be freed even if rc4.process throws an |
| 1030 | + // exception. | ||
| 1057 | auto tmp = QUtil::make_unique_cstr(str); | 1031 | auto tmp = QUtil::make_unique_cstr(str); |
| 1058 | RC4 rc4(QUtil::unsigned_char_pointer(key), toI(key.length())); | 1032 | RC4 rc4(QUtil::unsigned_char_pointer(key), toI(key.length())); |
| 1059 | auto data = QUtil::unsigned_char_pointer(tmp.get()); | 1033 | auto data = QUtil::unsigned_char_pointer(tmp.get()); |
| @@ -1154,8 +1128,7 @@ QPDF::decryptStream( | @@ -1154,8 +1128,7 @@ QPDF::decryptStream( | ||
| 1154 | file->getLastOffset(), | 1128 | file->getLastOffset(), |
| 1155 | "unknown encryption filter for streams (check " + method_source + | 1129 | "unknown encryption filter for streams (check " + method_source + |
| 1156 | "); streams may be decrypted improperly")); | 1130 | "); streams may be decrypted improperly")); |
| 1157 | - // To avoid repeated warnings, reset cf_stream. Assume | ||
| 1158 | - // we'd want to use AES if V == 4. | 1131 | + // To avoid repeated warnings, reset cf_stream. Assume we'd want to use AES if V == 4. |
| 1159 | encp->cf_stream = e_aes; | 1132 | encp->cf_stream = e_aes; |
| 1160 | use_aes = true; | 1133 | use_aes = true; |
| 1161 | break; | 1134 | break; |
libqpdf/QPDF_json.cc
| @@ -12,8 +12,7 @@ | @@ -12,8 +12,7 @@ | ||
| 12 | #include <algorithm> | 12 | #include <algorithm> |
| 13 | #include <cstring> | 13 | #include <cstring> |
| 14 | 14 | ||
| 15 | -// This chart shows an example of the state transitions that would | ||
| 16 | -// occur in parsing a minimal file. | 15 | +// This chart shows an example of the state transitions that would occur in parsing a minimal file. |
| 17 | 16 | ||
| 18 | // | st_initial | 17 | // | st_initial |
| 19 | // { | -> st_top | 18 | // { | -> st_top |
| @@ -414,9 +413,9 @@ QPDF::JSONReactor::containerEnd(JSON const& value) | @@ -414,9 +413,9 @@ QPDF::JSONReactor::containerEnd(JSON const& value) | ||
| 414 | object_stack.pop_back(); | 413 | object_stack.pop_back(); |
| 415 | } | 414 | } |
| 416 | } else if ((state == st_top) && (from_state == st_qpdf)) { | 415 | } else if ((state == st_top) && (from_state == st_qpdf)) { |
| 417 | - // Handle dangling indirect object references which the PDF spec says to | ||
| 418 | - // treat as nulls. It's tempting to make this an error, but that would | ||
| 419 | - // be wrong since valid input files may have these. | 416 | + // Handle dangling indirect object references which the PDF spec says to treat as nulls. |
| 417 | + // It's tempting to make this an error, but that would be wrong since valid input files may | ||
| 418 | + // have these. | ||
| 420 | for (auto& oc: pdf.m->obj_cache) { | 419 | for (auto& oc: pdf.m->obj_cache) { |
| 421 | if (oc.second.object->getTypeCode() == ::ot_reserved && reserved.count(oc.first) == 0) { | 420 | if (oc.second.object->getTypeCode() == ::ot_reserved && reserved.count(oc.first) == 0) { |
| 422 | QTC::TC("qpdf", "QPDF_json non-trivial null reserved"); | 421 | QTC::TC("qpdf", "QPDF_json non-trivial null reserved"); |
| @@ -446,8 +445,7 @@ QPDF::JSONReactor::topLevelScalar() | @@ -446,8 +445,7 @@ QPDF::JSONReactor::topLevelScalar() | ||
| 446 | void | 445 | void |
| 447 | QPDF::JSONReactor::nestedState(std::string const& key, JSON const& value, state_e next) | 446 | QPDF::JSONReactor::nestedState(std::string const& key, JSON const& value, state_e next) |
| 448 | { | 447 | { |
| 449 | - // Use this method when the next state is for processing a nested | ||
| 450 | - // dictionary. | 448 | + // Use this method when the next state is for processing a nested dictionary. |
| 451 | if (value.isDictionary()) { | 449 | if (value.isDictionary()) { |
| 452 | this->next_state = next; | 450 | this->next_state = next; |
| 453 | } else { | 451 | } else { |
| @@ -531,8 +529,8 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) | @@ -531,8 +529,8 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) | ||
| 531 | error(value.getStart(), "calledgetallpages must be a boolean"); | 529 | error(value.getStart(), "calledgetallpages must be a boolean"); |
| 532 | } | 530 | } |
| 533 | } else { | 531 | } else { |
| 534 | - // ignore unknown keys for forward compatibility and to | ||
| 535 | - // skip keys we don't care about like "maxobjectid". | 532 | + // ignore unknown keys for forward compatibility and to skip keys we don't care about |
| 533 | + // like "maxobjectid". | ||
| 536 | QTC::TC("qpdf", "QPDF_json ignore second-level key"); | 534 | QTC::TC("qpdf", "QPDF_json ignore second-level key"); |
| 537 | next_state = st_ignore; | 535 | next_state = st_ignore; |
| 538 | } | 536 | } |
| @@ -594,8 +592,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) | @@ -594,8 +592,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) | ||
| 594 | this->pdf.m->trailer = makeObject(value); | 592 | this->pdf.m->trailer = makeObject(value); |
| 595 | setObjectDescription(this->pdf.m->trailer, value); | 593 | setObjectDescription(this->pdf.m->trailer, value); |
| 596 | } else if (key == "stream") { | 594 | } else if (key == "stream") { |
| 597 | - // Don't need to set saw_stream here since there's already | ||
| 598 | - // an error. | 595 | + // Don't need to set saw_stream here since there's already an error. |
| 599 | QTC::TC("qpdf", "QPDF_json trailer stream"); | 596 | QTC::TC("qpdf", "QPDF_json trailer stream"); |
| 600 | error(value.getStart(), "the trailer may not be a stream"); | 597 | error(value.getStart(), "the trailer may not be a stream"); |
| 601 | next_state = st_ignore; | 598 | next_state = st_ignore; |
| @@ -616,8 +613,8 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) | @@ -616,8 +613,8 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) | ||
| 616 | auto uninitialized = QPDFObjectHandle(); | 613 | auto uninitialized = QPDFObjectHandle(); |
| 617 | if (key == "dict") { | 614 | if (key == "dict") { |
| 618 | this->saw_dict = true; | 615 | this->saw_dict = true; |
| 619 | - // Since a stream dictionary must be a dictionary, we can | ||
| 620 | - // use nestedState to transition to st_value. | 616 | + // Since a stream dictionary must be a dictionary, we can use nestedState to transition |
| 617 | + // to st_value. | ||
| 621 | nestedState("stream.dict", value, st_object); | 618 | nestedState("stream.dict", value, st_object); |
| 622 | auto dict = makeObject(value); | 619 | auto dict = makeObject(value); |
| 623 | if (dict.isDictionary()) { | 620 | if (dict.isDictionary()) { |
libqpdf/QPDF_linearization.cc
| @@ -22,8 +22,8 @@ load_vector_int( | @@ -22,8 +22,8 @@ load_vector_int( | ||
| 22 | BitStream& bit_stream, int nitems, std::vector<T>& vec, int bits_wanted, int_type T::*field) | 22 | BitStream& bit_stream, int nitems, std::vector<T>& vec, int bits_wanted, int_type T::*field) |
| 23 | { | 23 | { |
| 24 | bool append = vec.empty(); | 24 | bool append = vec.empty(); |
| 25 | - // nitems times, read bits_wanted from the given bit stream, | ||
| 26 | - // storing results in the ith vector entry. | 25 | + // nitems times, read bits_wanted from the given bit stream, storing results in the ith vector |
| 26 | + // entry. | ||
| 27 | 27 | ||
| 28 | for (size_t i = 0; i < QIntC::to_size(nitems); ++i) { | 28 | for (size_t i = 0; i < QIntC::to_size(nitems); ++i) { |
| 29 | if (append) { | 29 | if (append) { |
| @@ -34,8 +34,8 @@ load_vector_int( | @@ -34,8 +34,8 @@ load_vector_int( | ||
| 34 | if (QIntC::to_int(vec.size()) != nitems) { | 34 | if (QIntC::to_int(vec.size()) != nitems) { |
| 35 | throw std::logic_error("vector has wrong size in load_vector_int"); | 35 | throw std::logic_error("vector has wrong size in load_vector_int"); |
| 36 | } | 36 | } |
| 37 | - // The PDF spec says that each hint table starts at a byte | ||
| 38 | - // boundary. Each "row" actually must start on a byte boundary. | 37 | + // The PDF spec says that each hint table starts at a byte boundary. Each "row" actually must |
| 38 | + // start on a byte boundary. | ||
| 39 | bit_stream.skipToNextByte(); | 39 | bit_stream.skipToNextByte(); |
| 40 | } | 40 | } |
| 41 | 41 | ||
| @@ -49,8 +49,8 @@ load_vector_vector( | @@ -49,8 +49,8 @@ load_vector_vector( | ||
| 49 | int bits_wanted, | 49 | int bits_wanted, |
| 50 | std::vector<int> T::*vec2) | 50 | std::vector<int> T::*vec2) |
| 51 | { | 51 | { |
| 52 | - // nitems1 times, read nitems2 (from the ith element of vec1) items | ||
| 53 | - // into the vec2 vector field of the ith item of vec1. | 52 | + // nitems1 times, read nitems2 (from the ith element of vec1) items into the vec2 vector field |
| 53 | + // of the ith item of vec1. | ||
| 54 | for (size_t i1 = 0; i1 < QIntC::to_size(nitems1); ++i1) { | 54 | for (size_t i1 = 0; i1 < QIntC::to_size(nitems1); ++i1) { |
| 55 | for (int i2 = 0; i2 < vec1.at(i1).*nitems2; ++i2) { | 55 | for (int i2 = 0; i2 < vec1.at(i1).*nitems2; ++i2) { |
| 56 | (vec1.at(i1).*vec2).push_back(bit_stream.getBitsInt(QIntC::to_size(bits_wanted))); | 56 | (vec1.at(i1).*vec2).push_back(bit_stream.getBitsInt(QIntC::to_size(bits_wanted))); |
| @@ -83,18 +83,15 @@ QPDF::checkLinearization() | @@ -83,18 +83,15 @@ QPDF::checkLinearization() | ||
| 83 | bool | 83 | bool |
| 84 | QPDF::isLinearized() | 84 | QPDF::isLinearized() |
| 85 | { | 85 | { |
| 86 | - // If the first object in the file is a dictionary with a suitable | ||
| 87 | - // /Linearized key and has an /L key that accurately indicates the | ||
| 88 | - // file size, initialize m->lindict and return true. | ||
| 89 | - | ||
| 90 | - // A linearized PDF spec's first object will be contained within | ||
| 91 | - // the first 1024 bytes of the file and will be a dictionary with | ||
| 92 | - // a valid /Linearized key. This routine looks for that and does | ||
| 93 | - // no additional validation. | ||
| 94 | - | ||
| 95 | - // The PDF spec says the linearization dictionary must be | ||
| 96 | - // completely contained within the first 1024 bytes of the file. | ||
| 97 | - // Add a byte for a null terminator. | 86 | + // If the first object in the file is a dictionary with a suitable /Linearized key and has an /L |
| 87 | + // key that accurately indicates the file size, initialize m->lindict and return true. | ||
| 88 | + | ||
| 89 | + // A linearized PDF spec's first object will be contained within the first 1024 bytes of the | ||
| 90 | + // file and will be a dictionary with a valid /Linearized key. This routine looks for that and | ||
| 91 | + // does no additional validation. | ||
| 92 | + | ||
| 93 | + // The PDF spec says the linearization dictionary must be completely contained within the first | ||
| 94 | + // 1024 bytes of the file. Add a byte for a null terminator. | ||
| 98 | static int const tbuf_size = 1025; | 95 | static int const tbuf_size = 1025; |
| 99 | 96 | ||
| 100 | auto b = std::make_unique<char[]>(tbuf_size); | 97 | auto b = std::make_unique<char[]>(tbuf_size); |
| @@ -161,8 +158,8 @@ QPDF::isLinearized() | @@ -161,8 +158,8 @@ QPDF::isLinearized() | ||
| 161 | void | 158 | void |
| 162 | QPDF::readLinearizationData() | 159 | QPDF::readLinearizationData() |
| 163 | { | 160 | { |
| 164 | - // This function throws an exception (which is trapped by | ||
| 165 | - // checkLinearization()) for any errors that prevent loading. | 161 | + // This function throws an exception (which is trapped by checkLinearization()) for any errors |
| 162 | + // that prevent loading. | ||
| 166 | 163 | ||
| 167 | if (!isLinearized()) { | 164 | if (!isLinearized()) { |
| 168 | throw std::logic_error("called readLinearizationData for file" | 165 | throw std::logic_error("called readLinearizationData for file" |
| @@ -206,8 +203,8 @@ QPDF::readLinearizationData() | @@ -206,8 +203,8 @@ QPDF::readLinearizationData() | ||
| 206 | int H1_offset = 0; | 203 | int H1_offset = 0; |
| 207 | int H1_length = 0; | 204 | int H1_length = 0; |
| 208 | if (H_items.size() == 4) { | 205 | if (H_items.size() == 4) { |
| 209 | - // Acrobat doesn't read or write these (as PDF 1.4), so we | ||
| 210 | - // don't have a way to generate a test case. | 206 | + // Acrobat doesn't read or write these (as PDF 1.4), so we don't have a way to generate a |
| 207 | + // test case. | ||
| 211 | // QTC::TC("qpdf", "QPDF overflow hint table"); | 208 | // QTC::TC("qpdf", "QPDF overflow hint table"); |
| 212 | H1_offset = H_items.at(2); | 209 | H1_offset = H_items.at(2); |
| 213 | H1_length = H_items.at(3); | 210 | H1_length = H_items.at(3); |
| @@ -224,9 +221,8 @@ QPDF::readLinearizationData() | @@ -224,9 +221,8 @@ QPDF::readLinearizationData() | ||
| 224 | 221 | ||
| 225 | // Store linearization parameter data | 222 | // Store linearization parameter data |
| 226 | 223 | ||
| 227 | - // Various places in the code use linp.npages, which is | ||
| 228 | - // initialized from N, to pre-allocate memory, so make sure it's | ||
| 229 | - // accurate and bail right now if it's not. | 224 | + // Various places in the code use linp.npages, which is initialized from N, to pre-allocate |
| 225 | + // memory, so make sure it's accurate and bail right now if it's not. | ||
| 230 | if (N.getIntValue() != static_cast<long long>(getAllPages().size())) { | 226 | if (N.getIntValue() != static_cast<long long>(getAllPages().size())) { |
| 231 | throw damagedPDF("linearization hint table", "/N does not match number of pages"); | 227 | throw damagedPDF("linearization hint table", "/N does not match number of pages"); |
| 232 | } | 228 | } |
| @@ -299,11 +295,10 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) | @@ -299,11 +295,10 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) | ||
| 299 | 295 | ||
| 300 | QPDFObjectHandle Hdict = H.getDict(); | 296 | QPDFObjectHandle Hdict = H.getDict(); |
| 301 | 297 | ||
| 302 | - // Some versions of Acrobat make /Length indirect and place it | ||
| 303 | - // immediately after the stream, increasing length to cover it, | ||
| 304 | - // even though the specification says all objects in the | ||
| 305 | - // linearization parameter dictionary must be direct. We have to | ||
| 306 | - // get the file position of the end of length in this case. | 298 | + // Some versions of Acrobat make /Length indirect and place it immediately after the stream, |
| 299 | + // increasing length to cover it, even though the specification says all objects in the | ||
| 300 | + // linearization parameter dictionary must be direct. We have to get the file position of the | ||
| 301 | + // end of length in this case. | ||
| 307 | QPDFObjectHandle length_obj = Hdict.getKey("/Length"); | 302 | QPDFObjectHandle length_obj = Hdict.getKey("/Length"); |
| 308 | if (length_obj.isIndirect()) { | 303 | if (length_obj.isIndirect()) { |
| 309 | QTC::TC("qpdf", "QPDF hint table length indirect"); | 304 | QTC::TC("qpdf", "QPDF hint table length indirect"); |
| @@ -329,8 +324,7 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) | @@ -329,8 +324,7 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) | ||
| 329 | void | 324 | void |
| 330 | QPDF::readHPageOffset(BitStream h) | 325 | QPDF::readHPageOffset(BitStream h) |
| 331 | { | 326 | { |
| 332 | - // All comments referring to the PDF spec refer to the spec for | ||
| 333 | - // version 1.4. | 327 | + // All comments referring to the PDF spec refer to the spec for version 1.4. |
| 334 | 328 | ||
| 335 | HPageOffset& t = m->page_offset_hints; | 329 | HPageOffset& t = m->page_offset_hints; |
| 336 | 330 | ||
| @@ -402,9 +396,8 @@ QPDF::readHSharedObject(BitStream h) | @@ -402,9 +396,8 @@ QPDF::readHSharedObject(BitStream h) | ||
| 402 | load_vector_int(h, nitems, entries, 1, &HSharedObjectEntry::signature_present); | 396 | load_vector_int(h, nitems, entries, 1, &HSharedObjectEntry::signature_present); |
| 403 | for (size_t i = 0; i < toS(nitems); ++i) { | 397 | for (size_t i = 0; i < toS(nitems); ++i) { |
| 404 | if (entries.at(i).signature_present) { | 398 | if (entries.at(i).signature_present) { |
| 405 | - // Skip 128-bit MD5 hash. These are not supported by | ||
| 406 | - // acrobat, so they should probably never be there. We | ||
| 407 | - // have no test case for this. | 399 | + // Skip 128-bit MD5 hash. These are not supported by acrobat, so they should probably |
| 400 | + // never be there. We have no test case for this. | ||
| 408 | for (int j = 0; j < 4; ++j) { | 401 | for (int j = 0; j < 4; ++j) { |
| 409 | (void)h.getBits(32); | 402 | (void)h.getBits(32); |
| 410 | } | 403 | } |
| @@ -425,8 +418,7 @@ QPDF::readHGeneric(BitStream h, HGeneric& t) | @@ -425,8 +418,7 @@ QPDF::readHGeneric(BitStream h, HGeneric& t) | ||
| 425 | bool | 418 | bool |
| 426 | QPDF::checkLinearizationInternal() | 419 | QPDF::checkLinearizationInternal() |
| 427 | { | 420 | { |
| 428 | - // All comments referring to the PDF spec refer to the spec for | ||
| 429 | - // version 1.4. | 421 | + // All comments referring to the PDF spec refer to the spec for version 1.4. |
| 430 | 422 | ||
| 431 | // Check all values in linearization parameter dictionary | 423 | // Check all values in linearization parameter dictionary |
| 432 | 424 | ||
| @@ -476,24 +468,21 @@ QPDF::checkLinearizationInternal() | @@ -476,24 +468,21 @@ QPDF::checkLinearizationInternal() | ||
| 476 | "; file = " + std::to_string(m->file->tell())); | 468 | "; file = " + std::to_string(m->file->tell())); |
| 477 | } | 469 | } |
| 478 | 470 | ||
| 479 | - // P: first page number -- Implementation note 124 says Acrobat | ||
| 480 | - // ignores this value, so we will too. | 471 | + // P: first page number -- Implementation note 124 says Acrobat ignores this value, so we will |
| 472 | + // too. | ||
| 481 | 473 | ||
| 482 | - // Check numbering of compressed objects in each xref section. | ||
| 483 | - // For linearized files, all compressed objects are supposed to be | ||
| 484 | - // at the end of the containing xref section if any object streams | ||
| 485 | - // are in use. | 474 | + // Check numbering of compressed objects in each xref section. For linearized files, all |
| 475 | + // compressed objects are supposed to be at the end of the containing xref section if any object | ||
| 476 | + // streams are in use. | ||
| 486 | 477 | ||
| 487 | if (m->uncompressed_after_compressed) { | 478 | if (m->uncompressed_after_compressed) { |
| 488 | - linearizationWarning("linearized file contains an uncompressed object" | ||
| 489 | - " after a compressed one in a cross-reference stream"); | 479 | + linearizationWarning("linearized file contains an uncompressed object after a compressed " |
| 480 | + "one in a cross-reference stream"); | ||
| 490 | } | 481 | } |
| 491 | 482 | ||
| 492 | - // Further checking requires optimization and order calculation. | ||
| 493 | - // Don't allow optimization to make changes. If it has to, then | ||
| 494 | - // the file is not properly linearized. We use the xref table to | ||
| 495 | - // figure out which objects are compressed and which are | ||
| 496 | - // uncompressed. | 483 | + // Further checking requires optimization and order calculation. Don't allow optimization to |
| 484 | + // make changes. If it has to, then the file is not properly linearized. We use the xref table | ||
| 485 | + // to figure out which objects are compressed and which are uncompressed. | ||
| 497 | { // local scope | 486 | { // local scope |
| 498 | std::map<int, int> object_stream_data; | 487 | std::map<int, int> object_stream_data; |
| 499 | for (auto const& iter: m->xref_table) { | 488 | for (auto const& iter: m->xref_table) { |
| @@ -507,16 +496,13 @@ QPDF::checkLinearizationInternal() | @@ -507,16 +496,13 @@ QPDF::checkLinearizationInternal() | ||
| 507 | calculateLinearizationData(object_stream_data); | 496 | calculateLinearizationData(object_stream_data); |
| 508 | } | 497 | } |
| 509 | 498 | ||
| 510 | - // E: offset of end of first page -- Implementation note 123 says | ||
| 511 | - // Acrobat includes on extra object here by mistake. pdlin fails | ||
| 512 | - // to place thumbnail images in section 9, so when thumbnails are | ||
| 513 | - // present, it also gets the wrong value for /E. It also doesn't | ||
| 514 | - // count outlines here when it should even though it places them | ||
| 515 | - // in part 6. This code fails to put thread information | ||
| 516 | - // dictionaries in part 9, so it actually gets the wrong value for | ||
| 517 | - // E when threads are present. In that case, it would probably | ||
| 518 | - // agree with pdlin. As of this writing, the test suite doesn't | ||
| 519 | - // contain any files with threads. | 499 | + // E: offset of end of first page -- Implementation note 123 says Acrobat includes on extra |
| 500 | + // object here by mistake. pdlin fails to place thumbnail images in section 9, so when | ||
| 501 | + // thumbnails are present, it also gets the wrong value for /E. It also doesn't count outlines | ||
| 502 | + // here when it should even though it places them in part 6. This code fails to put thread | ||
| 503 | + // information dictionaries in part 9, so it actually gets the wrong value for E when threads | ||
| 504 | + // are present. In that case, it would probably agree with pdlin. As of this writing, the test | ||
| 505 | + // suite doesn't contain any files with threads. | ||
| 520 | 506 | ||
| 521 | if (m->part6.empty()) { | 507 | if (m->part6.empty()) { |
| 522 | stopOnError("linearization part 6 unexpectedly empty"); | 508 | stopOnError("linearization part 6 unexpectedly empty"); |
| @@ -577,8 +563,7 @@ QPDF::getLinearizationOffset(QPDFObjGen const& og) | @@ -577,8 +563,7 @@ QPDF::getLinearizationOffset(QPDFObjGen const& og) | ||
| 577 | break; | 563 | break; |
| 578 | 564 | ||
| 579 | case 2: | 565 | case 2: |
| 580 | - // For compressed objects, return the offset of the object | ||
| 581 | - // stream that contains them. | 566 | + // For compressed objects, return the offset of the object stream that contains them. |
| 582 | result = getLinearizationOffset(QPDFObjGen(entry.getObjStreamNumber(), 0)); | 567 | result = getLinearizationOffset(QPDFObjGen(entry.getObjStreamNumber(), 0)); |
| 583 | break; | 568 | break; |
| 584 | 569 | ||
| @@ -611,8 +596,7 @@ QPDF::lengthNextN(int first_object, int n) | @@ -611,8 +596,7 @@ QPDF::lengthNextN(int first_object, int n) | ||
| 611 | "no xref table entry for " + std::to_string(first_object + i) + " 0"); | 596 | "no xref table entry for " + std::to_string(first_object + i) + " 0"); |
| 612 | } else { | 597 | } else { |
| 613 | if (m->obj_cache.count(og) == 0) { | 598 | if (m->obj_cache.count(og) == 0) { |
| 614 | - stopOnError("found unknown object while" | ||
| 615 | - " calculating length for linearization data"); | 599 | + stopOnError("found unknown object while calculating length for linearization data"); |
| 616 | } | 600 | } |
| 617 | length += toI(m->obj_cache[og].end_after_space - getLinearizationOffset(og)); | 601 | length += toI(m->obj_cache[og].end_after_space - getLinearizationOffset(og)); |
| 618 | } | 602 | } |
| @@ -624,22 +608,17 @@ void | @@ -624,22 +608,17 @@ void | ||
| 624 | QPDF::checkHPageOffset( | 608 | QPDF::checkHPageOffset( |
| 625 | std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& shared_idx_to_obj) | 609 | std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& shared_idx_to_obj) |
| 626 | { | 610 | { |
| 627 | - // Implementation note 126 says Acrobat always sets | ||
| 628 | - // delta_content_offset and delta_content_length in the page | ||
| 629 | - // offset header dictionary to 0. It also states that | ||
| 630 | - // min_content_offset in the per-page information is always 0, | ||
| 631 | - // which is an incorrect value. | ||
| 632 | - | ||
| 633 | - // Implementation note 127 explains that Acrobat always sets item | ||
| 634 | - // 8 (min_content_length) to zero, item 9 | ||
| 635 | - // (nbits_delta_content_length) to the value of item 5 | ||
| 636 | - // (nbits_delta_page_length), and item 7 of each per-page hint | ||
| 637 | - // table (delta_content_length) to item 2 (delta_page_length) of | ||
| 638 | - // that entry. Acrobat ignores these values when reading files. | ||
| 639 | - | ||
| 640 | - // Empirically, it also seems that Acrobat sometimes puts items | ||
| 641 | - // under a page's /Resources dictionary in with shared objects | ||
| 642 | - // even when they are private. | 611 | + // Implementation note 126 says Acrobat always sets delta_content_offset and |
| 612 | + // delta_content_length in the page offset header dictionary to 0. It also states that | ||
| 613 | + // min_content_offset in the per-page information is always 0, which is an incorrect value. | ||
| 614 | + | ||
| 615 | + // Implementation note 127 explains that Acrobat always sets item 8 (min_content_length) to | ||
| 616 | + // zero, item 9 (nbits_delta_content_length) to the value of item 5 (nbits_delta_page_length), | ||
| 617 | + // and item 7 of each per-page hint table (delta_content_length) to item 2 (delta_page_length) | ||
| 618 | + // of that entry. Acrobat ignores these values when reading files. | ||
| 619 | + | ||
| 620 | + // Empirically, it also seems that Acrobat sometimes puts items under a page's /Resources | ||
| 621 | + // dictionary in with shared objects even when they are private. | ||
| 643 | 622 | ||
| 644 | int npages = toI(pages.size()); | 623 | int npages = toI(pages.size()); |
| 645 | qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset); | 624 | qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset); |
| @@ -670,13 +649,12 @@ QPDF::checkHPageOffset( | @@ -670,13 +649,12 @@ QPDF::checkHPageOffset( | ||
| 670 | std::to_string(h_nobjects) + "; computed = " + std::to_string(ce.nobjects)); | 649 | std::to_string(h_nobjects) + "; computed = " + std::to_string(ce.nobjects)); |
| 671 | } | 650 | } |
| 672 | 651 | ||
| 673 | - // Use value for number of objects in hint table rather than | ||
| 674 | - // computed value if there is a discrepancy. | 652 | + // Use value for number of objects in hint table rather than computed value if there is a |
| 653 | + // discrepancy. | ||
| 675 | int length = lengthNextN(first_object, h_nobjects); | 654 | int length = lengthNextN(first_object, h_nobjects); |
| 676 | int h_length = toI(he.delta_page_length + m->page_offset_hints.min_page_length); | 655 | int h_length = toI(he.delta_page_length + m->page_offset_hints.min_page_length); |
| 677 | if (length != h_length) { | 656 | if (length != h_length) { |
| 678 | - // This condition almost certainly indicates a bad hint | ||
| 679 | - // table or a bug in this code. | 657 | + // This condition almost certainly indicates a bad hint table or a bug in this code. |
| 680 | linearizationWarning( | 658 | linearizationWarning( |
| 681 | "page length mismatch for page " + std::to_string(pageno) + ": hint table = " + | 659 | "page length mismatch for page " + std::to_string(pageno) + ": hint table = " + |
| 682 | std::to_string(h_length) + "; computed length = " + std::to_string(length) + | 660 | std::to_string(h_length) + "; computed length = " + std::to_string(length) + |
| @@ -690,8 +668,8 @@ QPDF::checkHPageOffset( | @@ -690,8 +668,8 @@ QPDF::checkHPageOffset( | ||
| 690 | std::set<int> computed_shared; | 668 | std::set<int> computed_shared; |
| 691 | 669 | ||
| 692 | if ((pageno == 0) && (he.nshared_objects > 0)) { | 670 | if ((pageno == 0) && (he.nshared_objects > 0)) { |
| 693 | - // pdlin and Acrobat both do this even though the spec | ||
| 694 | - // states clearly and unambiguously that they should not. | 671 | + // pdlin and Acrobat both do this even though the spec states clearly and unambiguously |
| 672 | + // that they should not. | ||
| 695 | linearizationWarning("page 0 has shared identifier entries"); | 673 | linearizationWarning("page 0 has shared identifier entries"); |
| 696 | } | 674 | } |
| 697 | 675 | ||
| @@ -724,9 +702,8 @@ QPDF::checkHPageOffset( | @@ -724,9 +702,8 @@ QPDF::checkHPageOffset( | ||
| 724 | 702 | ||
| 725 | for (int iter: computed_shared) { | 703 | for (int iter: computed_shared) { |
| 726 | if (!hint_shared.count(iter)) { | 704 | if (!hint_shared.count(iter)) { |
| 727 | - // Acrobat does not put some things including at least | ||
| 728 | - // built-in fonts and procsets here, at least in some | ||
| 729 | - // cases. | 705 | + // Acrobat does not put some things including at least built-in fonts and procsets |
| 706 | + // here, at least in some cases. | ||
| 730 | linearizationWarning( | 707 | linearizationWarning( |
| 731 | ("page " + std::to_string(pageno) + ": shared object " + std::to_string(iter) + | 708 | ("page " + std::to_string(pageno) + ": shared object " + std::to_string(iter) + |
| 732 | ": in computed list but not hint table")); | 709 | ": in computed list but not hint table")); |
| @@ -738,31 +715,26 @@ QPDF::checkHPageOffset( | @@ -738,31 +715,26 @@ QPDF::checkHPageOffset( | ||
| 738 | void | 715 | void |
| 739 | QPDF::checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj) | 716 | QPDF::checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj) |
| 740 | { | 717 | { |
| 741 | - // Implementation note 125 says shared object groups always | ||
| 742 | - // contain only one object. Implementation note 128 says that | ||
| 743 | - // Acrobat always nbits_nobjects to zero. Implementation note 130 | ||
| 744 | - // says that Acrobat does not support more than one shared object | ||
| 745 | - // per group. These are all consistent. | 718 | + // Implementation note 125 says shared object groups always contain only one object. |
| 719 | + // Implementation note 128 says that Acrobat always nbits_nobjects to zero. Implementation note | ||
| 720 | + // 130 says that Acrobat does not support more than one shared object per group. These are all | ||
| 721 | + // consistent. | ||
| 746 | 722 | ||
| 747 | - // Implementation note 129 states that MD5 signatures are not | ||
| 748 | - // implemented in Acrobat, so signature_present must always be | ||
| 749 | - // zero. | 723 | + // Implementation note 129 states that MD5 signatures are not implemented in Acrobat, so |
| 724 | + // signature_present must always be zero. | ||
| 750 | 725 | ||
| 751 | - // Implementation note 131 states that first_shared_obj and | ||
| 752 | - // first_shared_offset have meaningless values for single-page | ||
| 753 | - // files. | 726 | + // Implementation note 131 states that first_shared_obj and first_shared_offset have meaningless |
| 727 | + // values for single-page files. | ||
| 754 | 728 | ||
| 755 | - // Empirically, Acrobat and pdlin generate incorrect values for | ||
| 756 | - // these whenever there are no shared objects not referenced by | ||
| 757 | - // the first page (i.e., nshared_total == nshared_first_page). | 729 | + // Empirically, Acrobat and pdlin generate incorrect values for these whenever there are no |
| 730 | + // shared objects not referenced by the first page (i.e., nshared_total == nshared_first_page). | ||
| 758 | 731 | ||
| 759 | HSharedObject& so = m->shared_object_hints; | 732 | HSharedObject& so = m->shared_object_hints; |
| 760 | if (so.nshared_total < so.nshared_first_page) { | 733 | if (so.nshared_total < so.nshared_first_page) { |
| 761 | linearizationWarning("shared object hint table: ntotal < nfirst_page"); | 734 | linearizationWarning("shared object hint table: ntotal < nfirst_page"); |
| 762 | } else { | 735 | } else { |
| 763 | - // The first nshared_first_page objects are consecutive | ||
| 764 | - // objects starting with the first page object. The rest are | ||
| 765 | - // consecutive starting from the first_shared_obj object. | 736 | + // The first nshared_first_page objects are consecutive objects starting with the first page |
| 737 | + // object. The rest are consecutive starting from the first_shared_obj object. | ||
| 766 | int cur_object = pages.at(0).getObjectID(); | 738 | int cur_object = pages.at(0).getObjectID(); |
| 767 | for (int i = 0; i < so.nshared_total; ++i) { | 739 | for (int i = 0; i < so.nshared_total; ++i) { |
| 768 | if (i == so.nshared_first_page) { | 740 | if (i == so.nshared_first_page) { |
| @@ -814,12 +786,10 @@ QPDF::checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<in | @@ -814,12 +786,10 @@ QPDF::checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<in | ||
| 814 | void | 786 | void |
| 815 | QPDF::checkHOutlines() | 787 | QPDF::checkHOutlines() |
| 816 | { | 788 | { |
| 817 | - // Empirically, Acrobat generates the correct value for the object | ||
| 818 | - // number but incorrectly stores the next object number's offset | ||
| 819 | - // as the offset, at least when outlines appear in part 6. It | ||
| 820 | - // also generates an incorrect value for length (specifically, the | ||
| 821 | - // length that would cover the correct number of objects from the | ||
| 822 | - // wrong starting place). pdlin appears to generate correct | 789 | + // Empirically, Acrobat generates the correct value for the object number but incorrectly stores |
| 790 | + // the next object number's offset as the offset, at least when outlines appear in part 6. It | ||
| 791 | + // also generates an incorrect value for length (specifically, the length that would cover the | ||
| 792 | + // correct number of objects from the wrong starting place). pdlin appears to generate correct | ||
| 823 | // values in those cases. | 793 | // values in those cases. |
| 824 | 794 | ||
| 825 | if (m->c_outline_data.nobjects == m->outline_hints.nobjects) { | 795 | if (m->c_outline_data.nobjects == m->outline_hints.nobjects) { |
| @@ -831,9 +801,8 @@ QPDF::checkHOutlines() | @@ -831,9 +801,8 @@ QPDF::checkHOutlines() | ||
| 831 | // Check length and offset. Acrobat gets these wrong. | 801 | // Check length and offset. Acrobat gets these wrong. |
| 832 | QPDFObjectHandle outlines = getRoot().getKey("/Outlines"); | 802 | QPDFObjectHandle outlines = getRoot().getKey("/Outlines"); |
| 833 | if (!outlines.isIndirect()) { | 803 | if (!outlines.isIndirect()) { |
| 834 | - // This case is not exercised in test suite since not | ||
| 835 | - // permitted by the spec, but if this does occur, the | ||
| 836 | - // code below would fail. | 804 | + // This case is not exercised in test suite since not permitted by the spec, but if |
| 805 | + // this does occur, the code below would fail. | ||
| 837 | linearizationWarning("/Outlines key of root dictionary is not indirect"); | 806 | linearizationWarning("/Outlines key of root dictionary is not indirect"); |
| 838 | return; | 807 | return; |
| 839 | } | 808 | } |
| @@ -906,9 +875,8 @@ QPDF::dumpLinearizationDataInternal() | @@ -906,9 +875,8 @@ QPDF::dumpLinearizationDataInternal() | ||
| 906 | qpdf_offset_t | 875 | qpdf_offset_t |
| 907 | QPDF::adjusted_offset(qpdf_offset_t offset) | 876 | QPDF::adjusted_offset(qpdf_offset_t offset) |
| 908 | { | 877 | { |
| 909 | - // All offsets >= H_offset have to be increased by H_length | ||
| 910 | - // since all hint table location values disregard the hint table | ||
| 911 | - // itself. | 878 | + // All offsets >= H_offset have to be increased by H_length since all hint table location values |
| 879 | + // disregard the hint table itself. | ||
| 912 | if (offset >= m->linp.H_offset) { | 880 | if (offset >= m->linp.H_offset) { |
| 913 | return offset + m->linp.H_length; | 881 | return offset + m->linp.H_length; |
| 914 | } | 882 | } |
| @@ -971,8 +939,8 @@ QPDF::dumpHSharedObject() | @@ -971,8 +939,8 @@ QPDF::dumpHSharedObject() | ||
| 971 | *m->log->getInfo() << "Shared Object " << i << ":\n" | 939 | *m->log->getInfo() << "Shared Object " << i << ":\n" |
| 972 | << " group length: " << se.delta_group_length + t.min_group_length | 940 | << " group length: " << se.delta_group_length + t.min_group_length |
| 973 | << "\n"; | 941 | << "\n"; |
| 974 | - // PDF spec says signature present nobjects_minus_one are | ||
| 975 | - // always 0, so print them only if they have a non-zero value. | 942 | + // PDF spec says signature present nobjects_minus_one are always 0, so print them only if |
| 943 | + // they have a non-zero value. | ||
| 976 | if (se.signature_present) { | 944 | if (se.signature_present) { |
| 977 | *m->log->getInfo() << " signature present\n"; | 945 | *m->log->getInfo() << " signature present\n"; |
| 978 | } | 946 | } |
| @@ -994,44 +962,38 @@ QPDF::dumpHGeneric(HGeneric& t) | @@ -994,44 +962,38 @@ QPDF::dumpHGeneric(HGeneric& t) | ||
| 994 | void | 962 | void |
| 995 | QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | 963 | QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) |
| 996 | { | 964 | { |
| 997 | - // This function calculates the ordering of objects, divides them | ||
| 998 | - // into the appropriate parts, and computes some values for the | ||
| 999 | - // linearization parameter dictionary and hint tables. The file | ||
| 1000 | - // must be optimized (via calling optimize()) prior to calling | ||
| 1001 | - // this function. Note that actual offsets and lengths are not | ||
| 1002 | - // computed here, but anything related to object ordering is. | 965 | + // This function calculates the ordering of objects, divides them into the appropriate parts, |
| 966 | + // and computes some values for the linearization parameter dictionary and hint tables. The | ||
| 967 | + // file must be optimized (via calling optimize()) prior to calling this function. Note that | ||
| 968 | + // actual offsets and lengths are not computed here, but anything related to object ordering is. | ||
| 1003 | 969 | ||
| 1004 | if (m->object_to_obj_users.empty()) { | 970 | if (m->object_to_obj_users.empty()) { |
| 1005 | - // Note that we can't call optimize here because we don't know | ||
| 1006 | - // whether it should be called with or without allow changes. | ||
| 1007 | - throw std::logic_error("INTERNAL ERROR: QPDF::calculateLinearizationData " | ||
| 1008 | - "called before optimize()"); | 971 | + // Note that we can't call optimize here because we don't know whether it should be called |
| 972 | + // with or without allow changes. | ||
| 973 | + throw std::logic_error( | ||
| 974 | + "INTERNAL ERROR: QPDF::calculateLinearizationData called before optimize()"); | ||
| 1009 | } | 975 | } |
| 1010 | 976 | ||
| 1011 | - // Separate objects into the categories sufficient for us to | ||
| 1012 | - // determine which part of the linearized file should contain the | ||
| 1013 | - // object. This categorization is useful for other purposes as | ||
| 1014 | - // well. Part numbers refer to version 1.4 of the PDF spec. | 977 | + // Separate objects into the categories sufficient for us to determine which part of the |
| 978 | + // linearized file should contain the object. This categorization is useful for other purposes | ||
| 979 | + // as well. Part numbers refer to version 1.4 of the PDF spec. | ||
| 1015 | 980 | ||
| 1016 | - // Parts 1, 3, 5, 10, and 11 don't contain any objects from the | ||
| 1017 | - // original file (except the trailer dictionary in part 11). | 981 | + // Parts 1, 3, 5, 10, and 11 don't contain any objects from the original file (except the |
| 982 | + // trailer dictionary in part 11). | ||
| 1018 | 983 | ||
| 1019 | - // Part 4 is the document catalog (root) and the following root | ||
| 1020 | - // keys: /ViewerPreferences, /PageMode, /Threads, /OpenAction, | ||
| 1021 | - // /AcroForm, /Encrypt. Note that Thread information dictionaries | ||
| 1022 | - // are supposed to appear in part 9, but we are disregarding that | ||
| 1023 | - // recommendation for now. | 984 | + // Part 4 is the document catalog (root) and the following root keys: /ViewerPreferences, |
| 985 | + // /PageMode, /Threads, /OpenAction, /AcroForm, /Encrypt. Note that Thread information | ||
| 986 | + // dictionaries are supposed to appear in part 9, but we are disregarding that recommendation | ||
| 987 | + // for now. | ||
| 1024 | 988 | ||
| 1025 | - // Part 6 is the first page section. It includes all remaining | ||
| 1026 | - // objects referenced by the first page including shared objects | ||
| 1027 | - // but not including thumbnails. Additionally, if /PageMode is | 989 | + // Part 6 is the first page section. It includes all remaining objects referenced by the first |
| 990 | + // page including shared objects but not including thumbnails. Additionally, if /PageMode is | ||
| 1028 | // /Outlines, then information from /Outlines also appears here. | 991 | // /Outlines, then information from /Outlines also appears here. |
| 1029 | 992 | ||
| 1030 | - // Part 7 contains remaining objects private to pages other than | ||
| 1031 | - // the first page. | 993 | + // Part 7 contains remaining objects private to pages other than the first page. |
| 1032 | 994 | ||
| 1033 | - // Part 8 contains all remaining shared objects except those that | ||
| 1034 | - // are shared only within thumbnails. | 995 | + // Part 8 contains all remaining shared objects except those that are shared only within |
| 996 | + // thumbnails. | ||
| 1035 | 997 | ||
| 1036 | // Part 9 contains all remaining objects. | 998 | // Part 9 contains all remaining objects. |
| 1037 | 999 | ||
| @@ -1176,42 +1138,35 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | @@ -1176,42 +1138,35 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | ||
| 1176 | } | 1138 | } |
| 1177 | } | 1139 | } |
| 1178 | 1140 | ||
| 1179 | - // Generate ordering for objects in the output file. Sometimes we | ||
| 1180 | - // just dump right from a set into a vector. Rather than | ||
| 1181 | - // optimizing this by going straight into the vector, we'll leave | ||
| 1182 | - // these phases separate for now. That way, this section can be | ||
| 1183 | - // concerned only with ordering, and the above section can be | ||
| 1184 | - // considered only with categorization. Note that sets of | ||
| 1185 | - // QPDFObjGens are sorted by QPDFObjGen. In a linearized file, | ||
| 1186 | - // objects appear in sequence with the possible exception of hints | ||
| 1187 | - // tables which we won't see here anyway. That means that running | ||
| 1188 | - // calculateLinearizationData() on a linearized file should give | ||
| 1189 | - // results identical to the original file ordering. | ||
| 1190 | - | ||
| 1191 | - // We seem to traverse the page tree a lot in this code, but we | ||
| 1192 | - // can address this for a future code optimization if necessary. | ||
| 1193 | - // Premature optimization is the root of all evil. | 1141 | + // Generate ordering for objects in the output file. Sometimes we just dump right from a set |
| 1142 | + // into a vector. Rather than optimizing this by going straight into the vector, we'll leave | ||
| 1143 | + // these phases separate for now. That way, this section can be concerned only with ordering, | ||
| 1144 | + // and the above section can be considered only with categorization. Note that sets of | ||
| 1145 | + // QPDFObjGens are sorted by QPDFObjGen. In a linearized file, objects appear in sequence with | ||
| 1146 | + // the possible exception of hints tables which we won't see here anyway. That means that | ||
| 1147 | + // running calculateLinearizationData() on a linearized file should give results identical to | ||
| 1148 | + // the original file ordering. | ||
| 1149 | + | ||
| 1150 | + // We seem to traverse the page tree a lot in this code, but we can address this for a future | ||
| 1151 | + // code optimization if necessary. Premature optimization is the root of all evil. | ||
| 1194 | std::vector<QPDFObjectHandle> pages; | 1152 | std::vector<QPDFObjectHandle> pages; |
| 1195 | { // local scope | 1153 | { // local scope |
| 1196 | - // Map all page objects to the containing object stream. This | ||
| 1197 | - // should be a no-op in a properly linearized file. | 1154 | + // Map all page objects to the containing object stream. This should be a no-op in a |
| 1155 | + // properly linearized file. | ||
| 1198 | for (auto oh: getAllPages()) { | 1156 | for (auto oh: getAllPages()) { |
| 1199 | pages.push_back(getUncompressedObject(oh, object_stream_data)); | 1157 | pages.push_back(getUncompressedObject(oh, object_stream_data)); |
| 1200 | } | 1158 | } |
| 1201 | } | 1159 | } |
| 1202 | int npages = toI(pages.size()); | 1160 | int npages = toI(pages.size()); |
| 1203 | 1161 | ||
| 1204 | - // We will be initializing some values of the computed hint | ||
| 1205 | - // tables. Specifically, we can initialize any items that deal | ||
| 1206 | - // with object numbers or counts but not any items that deal with | ||
| 1207 | - // lengths or offsets. The code that writes linearized files will | ||
| 1208 | - // have to fill in these values during the first pass. The | ||
| 1209 | - // validation code can compute them relatively easily given the | ||
| 1210 | - // rest of the information. | ||
| 1211 | - | ||
| 1212 | - // npages is the size of the existing pages vector, which has been | ||
| 1213 | - // created by traversing the pages tree, and as such is a | ||
| 1214 | - // reasonable size. | 1162 | + // We will be initializing some values of the computed hint tables. Specifically, we can |
| 1163 | + // initialize any items that deal with object numbers or counts but not any items that deal with | ||
| 1164 | + // lengths or offsets. The code that writes linearized files will have to fill in these values | ||
| 1165 | + // during the first pass. The validation code can compute them relatively easily given the rest | ||
| 1166 | + // of the information. | ||
| 1167 | + | ||
| 1168 | + // npages is the size of the existing pages vector, which has been created by traversing the | ||
| 1169 | + // pages tree, and as such is a reasonable size. | ||
| 1215 | m->c_linp.npages = npages; | 1170 | m->c_linp.npages = npages; |
| 1216 | m->c_page_offset_data.entries = std::vector<CHPageOffsetEntry>(toS(npages)); | 1171 | m->c_page_offset_data.entries = std::vector<CHPageOffsetEntry>(toS(npages)); |
| 1217 | 1172 | ||
| @@ -1226,11 +1181,9 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | @@ -1226,11 +1181,9 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | ||
| 1226 | m->part4.push_back(getObject(og)); | 1181 | m->part4.push_back(getObject(og)); |
| 1227 | } | 1182 | } |
| 1228 | 1183 | ||
| 1229 | - // Part 6: first page objects. Note: implementation note 124 | ||
| 1230 | - // states that Acrobat always treats page 0 as the first page for | ||
| 1231 | - // linearization regardless of /OpenAction. pdlin doesn't provide | ||
| 1232 | - // any option to set this and also disregards /OpenAction. We | ||
| 1233 | - // will do the same. | 1184 | + // Part 6: first page objects. Note: implementation note 124 states that Acrobat always treats |
| 1185 | + // page 0 as the first page for linearization regardless of /OpenAction. pdlin doesn't provide | ||
| 1186 | + // any option to set this and also disregards /OpenAction. We will do the same. | ||
| 1234 | 1187 | ||
| 1235 | // First, place the actual first page object itself. | 1188 | // First, place the actual first page object itself. |
| 1236 | if (pages.empty()) { | 1189 | if (pages.empty()) { |
| @@ -1245,10 +1198,9 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | @@ -1245,10 +1198,9 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | ||
| 1245 | m->c_linp.first_page_object = pages.at(0).getObjectID(); | 1198 | m->c_linp.first_page_object = pages.at(0).getObjectID(); |
| 1246 | m->part6.push_back(pages.at(0)); | 1199 | m->part6.push_back(pages.at(0)); |
| 1247 | 1200 | ||
| 1248 | - // The PDF spec "recommends" an order for the rest of the objects, | ||
| 1249 | - // but we are going to disregard it except to the extent that it | ||
| 1250 | - // groups private and shared objects contiguously for the sake of | ||
| 1251 | - // hint tables. | 1201 | + // The PDF spec "recommends" an order for the rest of the objects, but we are going to disregard |
| 1202 | + // it except to the extent that it groups private and shared objects contiguously for the sake | ||
| 1203 | + // of hint tables. | ||
| 1252 | 1204 | ||
| 1253 | for (auto const& og: lc_first_page_private) { | 1205 | for (auto const& og: lc_first_page_private) { |
| 1254 | m->part6.push_back(getObject(og)); | 1206 | m->part6.push_back(getObject(og)); |
| @@ -1263,11 +1215,9 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | @@ -1263,11 +1215,9 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | ||
| 1263 | pushOutlinesToPart(m->part6, lc_outlines, object_stream_data); | 1215 | pushOutlinesToPart(m->part6, lc_outlines, object_stream_data); |
| 1264 | } | 1216 | } |
| 1265 | 1217 | ||
| 1266 | - // Fill in page offset hint table information for the first page. | ||
| 1267 | - // The PDF spec says that nshared_objects should be zero for the | ||
| 1268 | - // first page. pdlin does not appear to obey this, but it fills | ||
| 1269 | - // in garbage values for all the shared object identifiers on the | ||
| 1270 | - // first page. | 1218 | + // Fill in page offset hint table information for the first page. The PDF spec says that |
| 1219 | + // nshared_objects should be zero for the first page. pdlin does not appear to obey this, but | ||
| 1220 | + // it fills in garbage values for all the shared object identifiers on the first page. | ||
| 1271 | 1221 | ||
| 1272 | m->c_page_offset_data.entries.at(0).nobjects = toI(m->part6.size()); | 1222 | m->c_page_offset_data.entries.at(0).nobjects = toI(m->part6.size()); |
| 1273 | 1223 | ||
| @@ -1287,8 +1237,8 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | @@ -1287,8 +1237,8 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | ||
| 1287 | lc_other_page_private.erase(page_og); | 1237 | lc_other_page_private.erase(page_og); |
| 1288 | m->part7.push_back(pages.at(i)); | 1238 | m->part7.push_back(pages.at(i)); |
| 1289 | 1239 | ||
| 1290 | - // Place all non-shared objects referenced by this page, | ||
| 1291 | - // updating the page object count for the hint table. | 1240 | + // Place all non-shared objects referenced by this page, updating the page object count for |
| 1241 | + // the hint table. | ||
| 1292 | 1242 | ||
| 1293 | m->c_page_offset_data.entries.at(i).nobjects = 1; | 1243 | m->c_page_offset_data.entries.at(i).nobjects = 1; |
| 1294 | 1244 | ||
| @@ -1321,12 +1271,10 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | @@ -1321,12 +1271,10 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | ||
| 1321 | 1271 | ||
| 1322 | // Part 9: other objects | 1272 | // Part 9: other objects |
| 1323 | 1273 | ||
| 1324 | - // The PDF specification makes recommendations on ordering here. | ||
| 1325 | - // We follow them only to a limited extent. Specifically, we put | ||
| 1326 | - // the pages tree first, then private thumbnail objects in page | ||
| 1327 | - // order, then shared thumbnail objects, and then outlines (unless | ||
| 1328 | - // in part 6). After that, we throw all remaining objects in | ||
| 1329 | - // arbitrary order. | 1274 | + // The PDF specification makes recommendations on ordering here. We follow them only to a |
| 1275 | + // limited extent. Specifically, we put the pages tree first, then private thumbnail objects in | ||
| 1276 | + // page order, then shared thumbnail objects, and then outlines (unless in part 6). After that, | ||
| 1277 | + // we throw all remaining objects in arbitrary order. | ||
| 1330 | 1278 | ||
| 1331 | // Place the pages tree. | 1279 | // Place the pages tree. |
| 1332 | std::set<QPDFObjGen> pages_ogs = | 1280 | std::set<QPDFObjGen> pages_ogs = |
| @@ -1342,9 +1290,8 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | @@ -1342,9 +1290,8 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | ||
| 1342 | } | 1290 | } |
| 1343 | } | 1291 | } |
| 1344 | 1292 | ||
| 1345 | - // Place private thumbnail images in page order. Slightly more | ||
| 1346 | - // information would be required if we were going to bother with | ||
| 1347 | - // thumbnail hint tables. | 1293 | + // Place private thumbnail images in page order. Slightly more information would be required if |
| 1294 | + // we were going to bother with thumbnail hint tables. | ||
| 1348 | for (size_t i = 0; i < toS(npages); ++i) { | 1295 | for (size_t i = 0; i < toS(npages); ++i) { |
| 1349 | QPDFObjectHandle thumb = pages.at(i).getKey("/Thumb"); | 1296 | QPDFObjectHandle thumb = pages.at(i).getKey("/Thumb"); |
| 1350 | thumb = getUncompressedObject(thumb, object_stream_data); | 1297 | thumb = getUncompressedObject(thumb, object_stream_data); |
| @@ -1355,11 +1302,9 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | @@ -1355,11 +1302,9 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | ||
| 1355 | lc_thumbnail_private.erase(thumb_og); | 1302 | lc_thumbnail_private.erase(thumb_og); |
| 1356 | m->part9.push_back(thumb); | 1303 | m->part9.push_back(thumb); |
| 1357 | } else { | 1304 | } else { |
| 1358 | - // No internal error this time...there's nothing to | ||
| 1359 | - // stop this object from having been referred to | ||
| 1360 | - // somewhere else outside of a page's /Thumb, and if | ||
| 1361 | - // it had been, there's nothing to prevent it from | ||
| 1362 | - // having been in some set other than | 1305 | + // No internal error this time...there's nothing to stop this object from having |
| 1306 | + // been referred to somewhere else outside of a page's /Thumb, and if it had been, | ||
| 1307 | + // there's nothing to prevent it from having been in some set other than | ||
| 1363 | // lc_thumbnail_private. | 1308 | // lc_thumbnail_private. |
| 1364 | } | 1309 | } |
| 1365 | std::set<QPDFObjGen>& ogs = m->obj_user_to_objects[ObjUser(ObjUser::ou_thumb, toI(i))]; | 1310 | std::set<QPDFObjGen>& ogs = m->obj_user_to_objects[ObjUser(ObjUser::ou_thumb, toI(i))]; |
| @@ -1372,9 +1317,8 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | @@ -1372,9 +1317,8 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | ||
| 1372 | } | 1317 | } |
| 1373 | } | 1318 | } |
| 1374 | if (!lc_thumbnail_private.empty()) { | 1319 | if (!lc_thumbnail_private.empty()) { |
| 1375 | - stopOnError("INTERNAL ERROR: " | ||
| 1376 | - "QPDF::calculateLinearizationData: lc_thumbnail_private " | ||
| 1377 | - "not empty after placing thumbnails"); | 1320 | + stopOnError("INTERNAL ERROR: QPDF::calculateLinearizationData: lc_thumbnail_private not " |
| 1321 | + "empty after placing thumbnails"); | ||
| 1378 | } | 1322 | } |
| 1379 | 1323 | ||
| 1380 | // Place shared thumbnail objects | 1324 | // Place shared thumbnail objects |
| @@ -1404,17 +1348,15 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | @@ -1404,17 +1348,15 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | ||
| 1404 | std::to_string(num_placed) + "; number of objects: " + std::to_string(num_wanted)); | 1348 | std::to_string(num_placed) + "; number of objects: " + std::to_string(num_wanted)); |
| 1405 | } | 1349 | } |
| 1406 | 1350 | ||
| 1407 | - // Calculate shared object hint table information including | ||
| 1408 | - // references to shared objects from page offset hint data. | 1351 | + // Calculate shared object hint table information including references to shared objects from |
| 1352 | + // page offset hint data. | ||
| 1409 | 1353 | ||
| 1410 | - // The shared object hint table consists of all part 6 (whether | ||
| 1411 | - // shared or not) in order followed by all part 8 objects in | ||
| 1412 | - // order. Add the objects to shared object data keeping a map of | ||
| 1413 | - // object number to index. Then populate the shared object | ||
| 1414 | - // information for the pages. | 1354 | + // The shared object hint table consists of all part 6 (whether shared or not) in order followed |
| 1355 | + // by all part 8 objects in order. Add the objects to shared object data keeping a map of | ||
| 1356 | + // object number to index. Then populate the shared object information for the pages. | ||
| 1415 | 1357 | ||
| 1416 | - // Note that two objects never have the same object number, so we | ||
| 1417 | - // can map from object number only without regards to generation. | 1358 | + // Note that two objects never have the same object number, so we can map from object number |
| 1359 | + // only without regards to generation. | ||
| 1418 | std::map<int, int> obj_to_index; | 1360 | std::map<int, int> obj_to_index; |
| 1419 | 1361 | ||
| 1420 | m->c_shared_object_data.nshared_first_page = toI(m->part6.size()); | 1362 | m->c_shared_object_data.nshared_first_page = toI(m->part6.size()); |
| @@ -1441,8 +1383,7 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | @@ -1441,8 +1383,7 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | ||
| 1441 | stopOnError("shared object hint table has wrong number of entries"); | 1383 | stopOnError("shared object hint table has wrong number of entries"); |
| 1442 | } | 1384 | } |
| 1443 | 1385 | ||
| 1444 | - // Now compute the list of shared objects for each page after the | ||
| 1445 | - // first page. | 1386 | + // Now compute the list of shared objects for each page after the first page. |
| 1446 | 1387 | ||
| 1447 | for (size_t i = 1; i < toS(npages); ++i) { | 1388 | for (size_t i = 1; i < toS(npages); ++i) { |
| 1448 | CHPageOffsetEntry& pe = m->c_page_offset_data.entries.at(i); | 1389 | CHPageOffsetEntry& pe = m->c_page_offset_data.entries.at(i); |
| @@ -1520,20 +1461,17 @@ QPDF::outputLengthNextN( | @@ -1520,20 +1461,17 @@ QPDF::outputLengthNextN( | ||
| 1520 | std::map<int, qpdf_offset_t> const& lengths, | 1461 | std::map<int, qpdf_offset_t> const& lengths, |
| 1521 | std::map<int, int> const& obj_renumber) | 1462 | std::map<int, int> const& obj_renumber) |
| 1522 | { | 1463 | { |
| 1523 | - // Figure out the length of a series of n consecutive objects in | ||
| 1524 | - // the output file starting with whatever object in_object from | ||
| 1525 | - // the input file mapped to. | 1464 | + // Figure out the length of a series of n consecutive objects in the output file starting with |
| 1465 | + // whatever object in_object from the input file mapped to. | ||
| 1526 | 1466 | ||
| 1527 | if (obj_renumber.count(in_object) == 0) { | 1467 | if (obj_renumber.count(in_object) == 0) { |
| 1528 | - stopOnError("found object that is not renumbered while" | ||
| 1529 | - " writing linearization data"); | 1468 | + stopOnError("found object that is not renumbered while writing linearization data"); |
| 1530 | } | 1469 | } |
| 1531 | int first = (*(obj_renumber.find(in_object))).second; | 1470 | int first = (*(obj_renumber.find(in_object))).second; |
| 1532 | int length = 0; | 1471 | int length = 0; |
| 1533 | for (int i = 0; i < n; ++i) { | 1472 | for (int i = 0; i < n; ++i) { |
| 1534 | if (lengths.count(first + i) == 0) { | 1473 | if (lengths.count(first + i) == 0) { |
| 1535 | - stopOnError("found item with unknown length" | ||
| 1536 | - " while writing linearization data"); | 1474 | + stopOnError("found item with unknown length while writing linearization data"); |
| 1537 | } | 1475 | } |
| 1538 | length += toI((*(lengths.find(first + toI(i)))).second); | 1476 | length += toI((*(lengths.find(first + toI(i)))).second); |
| 1539 | } | 1477 | } |
| @@ -1548,16 +1486,14 @@ QPDF::calculateHPageOffset( | @@ -1548,16 +1486,14 @@ QPDF::calculateHPageOffset( | ||
| 1548 | { | 1486 | { |
| 1549 | // Page Offset Hint Table | 1487 | // Page Offset Hint Table |
| 1550 | 1488 | ||
| 1551 | - // We are purposely leaving some values set to their initial zero | ||
| 1552 | - // values. | 1489 | + // We are purposely leaving some values set to their initial zero values. |
| 1553 | 1490 | ||
| 1554 | std::vector<QPDFObjectHandle> const& pages = getAllPages(); | 1491 | std::vector<QPDFObjectHandle> const& pages = getAllPages(); |
| 1555 | size_t npages = pages.size(); | 1492 | size_t npages = pages.size(); |
| 1556 | CHPageOffset& cph = m->c_page_offset_data; | 1493 | CHPageOffset& cph = m->c_page_offset_data; |
| 1557 | std::vector<CHPageOffsetEntry>& cphe = cph.entries; | 1494 | std::vector<CHPageOffsetEntry>& cphe = cph.entries; |
| 1558 | 1495 | ||
| 1559 | - // Calculate minimum and maximum values for number of objects per | ||
| 1560 | - // page and page length. | 1496 | + // Calculate minimum and maximum values for number of objects per page and page length. |
| 1561 | 1497 | ||
| 1562 | int min_nobjects = cphe.at(0).nobjects; | 1498 | int min_nobjects = cphe.at(0).nobjects; |
| 1563 | int max_nobjects = min_nobjects; | 1499 | int max_nobjects = min_nobjects; |
| @@ -1572,11 +1508,11 @@ QPDF::calculateHPageOffset( | @@ -1572,11 +1508,11 @@ QPDF::calculateHPageOffset( | ||
| 1572 | phe = std::vector<HPageOffsetEntry>(npages); | 1508 | phe = std::vector<HPageOffsetEntry>(npages); |
| 1573 | 1509 | ||
| 1574 | for (unsigned int i = 0; i < npages; ++i) { | 1510 | for (unsigned int i = 0; i < npages; ++i) { |
| 1575 | - // Calculate values for each page, assigning full values to | ||
| 1576 | - // the delta items. They will be adjusted later. | 1511 | + // Calculate values for each page, assigning full values to the delta items. They will be |
| 1512 | + // adjusted later. | ||
| 1577 | 1513 | ||
| 1578 | - // Repeat calculations for page 0 so we can assign to phe[i] | ||
| 1579 | - // without duplicating those assignments. | 1514 | + // Repeat calculations for page 0 so we can assign to phe[i] without duplicating those |
| 1515 | + // assignments. | ||
| 1580 | 1516 | ||
| 1581 | int nobjects = cphe.at(i).nobjects; | 1517 | int nobjects = cphe.at(i).nobjects; |
| 1582 | int length = outputLengthNextN(pages.at(i).getObjectID(), nobjects, lengths, obj_renumber); | 1518 | int length = outputLengthNextN(pages.at(i).getObjectID(), nobjects, lengths, obj_renumber); |
| @@ -1604,11 +1540,10 @@ QPDF::calculateHPageOffset( | @@ -1604,11 +1540,10 @@ QPDF::calculateHPageOffset( | ||
| 1604 | ph.nbits_shared_identifier = nbits(m->c_shared_object_data.nshared_total); | 1540 | ph.nbits_shared_identifier = nbits(m->c_shared_object_data.nshared_total); |
| 1605 | ph.shared_denominator = 4; // doesn't matter | 1541 | ph.shared_denominator = 4; // doesn't matter |
| 1606 | 1542 | ||
| 1607 | - // It isn't clear how to compute content offset and content | ||
| 1608 | - // length. Since we are not interleaving page objects with the | ||
| 1609 | - // content stream, we'll use the same values for content length as | ||
| 1610 | - // page length. We will use 0 as content offset because this is | ||
| 1611 | - // what Adobe does (implementation note 127) and pdlin as well. | 1543 | + // It isn't clear how to compute content offset and content length. Since we are not |
| 1544 | + // interleaving page objects with the content stream, we'll use the same values for content | ||
| 1545 | + // length as page length. We will use 0 as content offset because this is what Adobe does | ||
| 1546 | + // (implementation note 127) and pdlin as well. | ||
| 1612 | ph.nbits_delta_content_length = ph.nbits_delta_page_length; | 1547 | ph.nbits_delta_content_length = ph.nbits_delta_page_length; |
| 1613 | ph.min_content_length = ph.min_page_length; | 1548 | ph.min_content_length = ph.min_page_length; |
| 1614 | 1549 | ||
| @@ -1616,8 +1551,8 @@ QPDF::calculateHPageOffset( | @@ -1616,8 +1551,8 @@ QPDF::calculateHPageOffset( | ||
| 1616 | // Adjust delta entries | 1551 | // Adjust delta entries |
| 1617 | if ((phe.at(i).delta_nobjects < min_nobjects) || | 1552 | if ((phe.at(i).delta_nobjects < min_nobjects) || |
| 1618 | (phe.at(i).delta_page_length < min_length)) { | 1553 | (phe.at(i).delta_page_length < min_length)) { |
| 1619 | - stopOnError("found too small delta nobjects or delta page length" | ||
| 1620 | - " while writing linearization data"); | 1554 | + stopOnError("found too small delta nobjects or delta page length while writing " |
| 1555 | + "linearization data"); | ||
| 1621 | } | 1556 | } |
| 1622 | phe.at(i).delta_nobjects -= min_nobjects; | 1557 | phe.at(i).delta_nobjects -= min_nobjects; |
| 1623 | phe.at(i).delta_page_length -= min_length; | 1558 | phe.at(i).delta_page_length -= min_length; |
| @@ -1669,8 +1604,7 @@ QPDF::calculateHSharedObject( | @@ -1669,8 +1604,7 @@ QPDF::calculateHSharedObject( | ||
| 1669 | for (size_t i = 0; i < toS(cso.nshared_total); ++i) { | 1604 | for (size_t i = 0; i < toS(cso.nshared_total); ++i) { |
| 1670 | // Adjust deltas | 1605 | // Adjust deltas |
| 1671 | if (soe.at(i).delta_group_length < min_length) { | 1606 | if (soe.at(i).delta_group_length < min_length) { |
| 1672 | - stopOnError("found too small group length while" | ||
| 1673 | - " writing linearization data"); | 1607 | + stopOnError("found too small group length while writing linearization data"); |
| 1674 | } | 1608 | } |
| 1675 | soe.at(i).delta_group_length -= min_length; | 1609 | soe.at(i).delta_group_length -= min_length; |
| 1676 | } | 1610 | } |
| @@ -1700,14 +1634,13 @@ template <class T, class int_type> | @@ -1700,14 +1634,13 @@ template <class T, class int_type> | ||
| 1700 | static void | 1634 | static void |
| 1701 | write_vector_int(BitWriter& w, int nitems, std::vector<T>& vec, int bits, int_type T::*field) | 1635 | write_vector_int(BitWriter& w, int nitems, std::vector<T>& vec, int bits, int_type T::*field) |
| 1702 | { | 1636 | { |
| 1703 | - // nitems times, write bits bits from the given field of the ith | ||
| 1704 | - // vector to the given bit writer. | 1637 | + // nitems times, write bits bits from the given field of the ith vector to the given bit writer. |
| 1705 | 1638 | ||
| 1706 | for (size_t i = 0; i < QIntC::to_size(nitems); ++i) { | 1639 | for (size_t i = 0; i < QIntC::to_size(nitems); ++i) { |
| 1707 | w.writeBits(QIntC::to_ulonglong(vec.at(i).*field), QIntC::to_size(bits)); | 1640 | w.writeBits(QIntC::to_ulonglong(vec.at(i).*field), QIntC::to_size(bits)); |
| 1708 | } | 1641 | } |
| 1709 | - // The PDF spec says that each hint table starts at a byte | ||
| 1710 | - // boundary. Each "row" actually must start on a byte boundary. | 1642 | + // The PDF spec says that each hint table starts at a byte boundary. Each "row" actually must |
| 1643 | + // start on a byte boundary. | ||
| 1711 | w.flush(); | 1644 | w.flush(); |
| 1712 | } | 1645 | } |
| 1713 | 1646 | ||
| @@ -1721,8 +1654,8 @@ write_vector_vector( | @@ -1721,8 +1654,8 @@ write_vector_vector( | ||
| 1721 | int bits, | 1654 | int bits, |
| 1722 | std::vector<int> T::*vec2) | 1655 | std::vector<int> T::*vec2) |
| 1723 | { | 1656 | { |
| 1724 | - // nitems1 times, write nitems2 (from the ith element of vec1) items | ||
| 1725 | - // from the vec2 vector field of the ith item of vec1. | 1657 | + // nitems1 times, write nitems2 (from the ith element of vec1) items from the vec2 vector field |
| 1658 | + // of the ith item of vec1. | ||
| 1726 | for (size_t i1 = 0; i1 < QIntC::to_size(nitems1); ++i1) { | 1659 | for (size_t i1 = 0; i1 < QIntC::to_size(nitems1); ++i1) { |
| 1727 | for (size_t i2 = 0; i2 < QIntC::to_size(vec1.at(i1).*nitems2); ++i2) { | 1660 | for (size_t i2 = 0; i2 < QIntC::to_size(vec1.at(i1).*nitems2); ++i2) { |
| 1728 | w.writeBits(QIntC::to_ulonglong((vec1.at(i1).*vec2).at(i2)), QIntC::to_size(bits)); | 1661 | w.writeBits(QIntC::to_ulonglong((vec1.at(i1).*vec2).at(i2)), QIntC::to_size(bits)); |
| @@ -1835,8 +1768,8 @@ QPDF::generateHintStream( | @@ -1835,8 +1768,8 @@ QPDF::generateHintStream( | ||
| 1835 | calculateHSharedObject(xref, lengths, obj_renumber); | 1768 | calculateHSharedObject(xref, lengths, obj_renumber); |
| 1836 | calculateHOutline(xref, lengths, obj_renumber); | 1769 | calculateHOutline(xref, lengths, obj_renumber); |
| 1837 | 1770 | ||
| 1838 | - // Write the hint stream itself into a compressed memory buffer. | ||
| 1839 | - // Write through a counter so we can get offsets. | 1771 | + // Write the hint stream itself into a compressed memory buffer. Write through a counter so we |
| 1772 | + // can get offsets. | ||
| 1840 | Pl_Buffer hint_stream("hint stream"); | 1773 | Pl_Buffer hint_stream("hint stream"); |
| 1841 | Pl_Flate f("compress hint stream", &hint_stream, Pl_Flate::a_deflate); | 1774 | Pl_Flate f("compress hint stream", &hint_stream, Pl_Flate::a_deflate); |
| 1842 | Pl_Count c("count", &f); | 1775 | Pl_Count c("count", &f); |
libqpdf/QPDF_optimization.cc
| @@ -64,9 +64,8 @@ QPDF::optimize( | @@ -64,9 +64,8 @@ QPDF::optimize( | ||
| 64 | return; | 64 | return; |
| 65 | } | 65 | } |
| 66 | 66 | ||
| 67 | - // The PDF specification indicates that /Outlines is supposed to | ||
| 68 | - // be an indirect reference. Force it to be so if it exists and | ||
| 69 | - // is direct. (This has been seen in the wild.) | 67 | + // The PDF specification indicates that /Outlines is supposed to be an indirect reference. Force |
| 68 | + // it to be so if it exists and is direct. (This has been seen in the wild.) | ||
| 70 | QPDFObjectHandle root = getRoot(); | 69 | QPDFObjectHandle root = getRoot(); |
| 71 | if (root.getKey("/Outlines").isDictionary()) { | 70 | if (root.getKey("/Outlines").isDictionary()) { |
| 72 | QPDFObjectHandle outlines = root.getKey("/Outlines"); | 71 | QPDFObjectHandle outlines = root.getKey("/Outlines"); |
| @@ -76,8 +75,8 @@ QPDF::optimize( | @@ -76,8 +75,8 @@ QPDF::optimize( | ||
| 76 | } | 75 | } |
| 77 | } | 76 | } |
| 78 | 77 | ||
| 79 | - // Traverse pages tree pushing all inherited resources down to the | ||
| 80 | - // page level. This also initializes m->all_pages. | 78 | + // Traverse pages tree pushing all inherited resources down to the page level. This also |
| 79 | + // initializes m->all_pages. | ||
| 81 | pushInheritedAttributesToPage(allow_changes, false); | 80 | pushInheritedAttributesToPage(allow_changes, false); |
| 82 | 81 | ||
| 83 | // Traverse pages | 82 | // Traverse pages |
| @@ -102,12 +101,10 @@ QPDF::optimize( | @@ -102,12 +101,10 @@ QPDF::optimize( | ||
| 102 | } | 101 | } |
| 103 | 102 | ||
| 104 | for (auto const& key: root.getKeys()) { | 103 | for (auto const& key: root.getKeys()) { |
| 105 | - // Technically, /I keys from /Thread dictionaries are supposed | ||
| 106 | - // to be handled separately, but we are going to disregard | ||
| 107 | - // that specification for now. There is loads of evidence | ||
| 108 | - // that pdlin and Acrobat both disregard things like this from | ||
| 109 | - // time to time, so this is almost certain not to cause any | ||
| 110 | - // problems. | 104 | + // Technically, /I keys from /Thread dictionaries are supposed to be handled separately, but |
| 105 | + // we are going to disregard that specification for now. There is loads of evidence that | ||
| 106 | + // pdlin and Acrobat both disregard things like this from time to time, so this is almost | ||
| 107 | + // certain not to cause any problems. | ||
| 111 | updateObjectMaps( | 108 | updateObjectMaps( |
| 112 | ObjUser(ObjUser::ou_root_key, key), root.getKey(key), skip_stream_parameters); | 109 | ObjUser(ObjUser::ou_root_key, key), root.getKey(key), skip_stream_parameters); |
| 113 | } | 110 | } |
| @@ -130,23 +127,20 @@ QPDF::pushInheritedAttributesToPage() | @@ -130,23 +127,20 @@ QPDF::pushInheritedAttributesToPage() | ||
| 130 | void | 127 | void |
| 131 | QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) | 128 | QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) |
| 132 | { | 129 | { |
| 133 | - // Traverse pages tree pushing all inherited resources down to the | ||
| 134 | - // page level. | 130 | + // Traverse pages tree pushing all inherited resources down to the page level. |
| 135 | 131 | ||
| 136 | - // The record of whether we've done this is cleared by | ||
| 137 | - // updateAllPagesCache(). If we're warning for skipped keys, | ||
| 138 | - // re-traverse unconditionally. | 132 | + // The record of whether we've done this is cleared by updateAllPagesCache(). If we're warning |
| 133 | + // for skipped keys, re-traverse unconditionally. | ||
| 139 | if (m->pushed_inherited_attributes_to_pages && (!warn_skipped_keys)) { | 134 | if (m->pushed_inherited_attributes_to_pages && (!warn_skipped_keys)) { |
| 140 | return; | 135 | return; |
| 141 | } | 136 | } |
| 142 | 137 | ||
| 143 | - // Calling getAllPages() resolves any duplicated page objects, | ||
| 144 | - // repairs broken nodes, and detects loops, so we don't have to do | ||
| 145 | - // those activities here. | 138 | + // Calling getAllPages() resolves any duplicated page objects, repairs broken nodes, and detects |
| 139 | + // loops, so we don't have to do those activities here. | ||
| 146 | getAllPages(); | 140 | getAllPages(); |
| 147 | 141 | ||
| 148 | - // key_ancestors is a mapping of page attribute keys to a stack of | ||
| 149 | - // Pages nodes that contain values for them. | 142 | + // key_ancestors is a mapping of page attribute keys to a stack of Pages nodes that contain |
| 143 | + // values for them. | ||
| 150 | std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors; | 144 | std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors; |
| 151 | pushInheritedAttributesToPageInternal( | 145 | pushInheritedAttributesToPageInternal( |
| 152 | m->trailer.getKey("/Root").getKey("/Pages"), | 146 | m->trailer.getKey("/Root").getKey("/Pages"), |
| @@ -168,10 +162,9 @@ QPDF::pushInheritedAttributesToPageInternal( | @@ -168,10 +162,9 @@ QPDF::pushInheritedAttributesToPageInternal( | ||
| 168 | bool allow_changes, | 162 | bool allow_changes, |
| 169 | bool warn_skipped_keys) | 163 | bool warn_skipped_keys) |
| 170 | { | 164 | { |
| 171 | - // Make a list of inheritable keys. Only the keys /MediaBox, | ||
| 172 | - // /CropBox, /Resources, and /Rotate are inheritable | ||
| 173 | - // attributes. Push this object onto the stack of pages nodes | ||
| 174 | - // that have values for this attribute. | 165 | + // Make a list of inheritable keys. Only the keys /MediaBox, /CropBox, /Resources, and /Rotate |
| 166 | + // are inheritable attributes. Push this object onto the stack of pages nodes that have values | ||
| 167 | + // for this attribute. | ||
| 175 | 168 | ||
| 176 | std::set<std::string> inheritable_keys; | 169 | std::set<std::string> inheritable_keys; |
| 177 | for (auto const& key: cur_pages.getKeys()) { | 170 | for (auto const& key: cur_pages.getKeys()) { |
| @@ -183,9 +176,7 @@ QPDF::pushInheritedAttributesToPageInternal( | @@ -183,9 +176,7 @@ QPDF::pushInheritedAttributesToPageInternal( | ||
| 183 | m->file->getName(), | 176 | m->file->getName(), |
| 184 | m->last_object_description, | 177 | m->last_object_description, |
| 185 | m->file->getLastOffset(), | 178 | m->file->getLastOffset(), |
| 186 | - "optimize detected an " | ||
| 187 | - "inheritable attribute when called " | ||
| 188 | - "in no-change mode"); | 179 | + "optimize detected an inheritable attribute when called in no-change mode"); |
| 189 | } | 180 | } |
| 190 | 181 | ||
| 191 | // This is an inheritable resource | 182 | // This is an inheritable resource |
| @@ -194,9 +185,8 @@ QPDF::pushInheritedAttributesToPageInternal( | @@ -194,9 +185,8 @@ QPDF::pushInheritedAttributesToPageInternal( | ||
| 194 | QTC::TC("qpdf", "QPDF opt direct pages resource", oh.isIndirect() ? 0 : 1); | 185 | QTC::TC("qpdf", "QPDF opt direct pages resource", oh.isIndirect() ? 0 : 1); |
| 195 | if (!oh.isIndirect()) { | 186 | if (!oh.isIndirect()) { |
| 196 | if (!oh.isScalar()) { | 187 | if (!oh.isScalar()) { |
| 197 | - // Replace shared direct object non-scalar | ||
| 198 | - // resources with indirect objects to avoid | ||
| 199 | - // copying large structures around. | 188 | + // Replace shared direct object non-scalar resources with indirect objects to |
| 189 | + // avoid copying large structures around. | ||
| 200 | cur_pages.replaceKey(key, makeIndirectObject(oh)); | 190 | cur_pages.replaceKey(key, makeIndirectObject(oh)); |
| 201 | oh = cur_pages.getKey(key); | 191 | oh = cur_pages.getKey(key); |
| 202 | } else { | 192 | } else { |
| @@ -208,14 +198,12 @@ QPDF::pushInheritedAttributesToPageInternal( | @@ -208,14 +198,12 @@ QPDF::pushInheritedAttributesToPageInternal( | ||
| 208 | if (key_ancestors[key].size() > 1) { | 198 | if (key_ancestors[key].size() > 1) { |
| 209 | QTC::TC("qpdf", "QPDF opt key ancestors depth > 1"); | 199 | QTC::TC("qpdf", "QPDF opt key ancestors depth > 1"); |
| 210 | } | 200 | } |
| 211 | - // Remove this resource from this node. It will be | ||
| 212 | - // reattached at the page level. | 201 | + // Remove this resource from this node. It will be reattached at the page level. |
| 213 | cur_pages.removeKey(key); | 202 | cur_pages.removeKey(key); |
| 214 | } else if (!((key == "/Type") || (key == "/Parent") || (key == "/Kids") || | 203 | } else if (!((key == "/Type") || (key == "/Parent") || (key == "/Kids") || |
| 215 | (key == "/Count"))) { | 204 | (key == "/Count"))) { |
| 216 | - // Warn when flattening, but not if the key is at the top | ||
| 217 | - // level (i.e. "/Parent" not set), as we don't change these; | ||
| 218 | - // but flattening removes intermediate /Pages nodes. | 205 | + // Warn when flattening, but not if the key is at the top level (i.e. "/Parent" not |
| 206 | + // set), as we don't change these; but flattening removes intermediate /Pages nodes. | ||
| 219 | if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) { | 207 | if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) { |
| 220 | QTC::TC("qpdf", "QPDF unknown key not inherited"); | 208 | QTC::TC("qpdf", "QPDF unknown key not inherited"); |
| 221 | setLastObjectDescription("Pages object", cur_pages.getObjGen()); | 209 | setLastObjectDescription("Pages object", cur_pages.getObjGen()); |
| @@ -224,24 +212,21 @@ QPDF::pushInheritedAttributesToPageInternal( | @@ -224,24 +212,21 @@ QPDF::pushInheritedAttributesToPageInternal( | ||
| 224 | m->last_object_description, | 212 | m->last_object_description, |
| 225 | 0, | 213 | 0, |
| 226 | ("Unknown key " + key + | 214 | ("Unknown key " + key + |
| 227 | - " in /Pages object" | ||
| 228 | - " is being discarded as a result of" | ||
| 229 | - " flattening the /Pages tree")); | 215 | + " in /Pages object is being discarded as a result of flattening the /Pages " |
| 216 | + "tree")); | ||
| 230 | } | 217 | } |
| 231 | } | 218 | } |
| 232 | } | 219 | } |
| 233 | 220 | ||
| 234 | - // Process descendant nodes. This method does not perform loop | ||
| 235 | - // detection because all code paths that lead here follow a call | ||
| 236 | - // to getAllPages, which already throws an exception in the event | 221 | + // Process descendant nodes. This method does not perform loop detection because all code paths |
| 222 | + // that lead here follow a call to getAllPages, which already throws an exception in the event | ||
| 237 | // of a loop in the pages tree. | 223 | // of a loop in the pages tree. |
| 238 | for (auto& kid: cur_pages.getKey("/Kids").aitems()) { | 224 | for (auto& kid: cur_pages.getKey("/Kids").aitems()) { |
| 239 | if (kid.isDictionaryOfType("/Pages")) { | 225 | if (kid.isDictionaryOfType("/Pages")) { |
| 240 | pushInheritedAttributesToPageInternal( | 226 | pushInheritedAttributesToPageInternal( |
| 241 | kid, key_ancestors, allow_changes, warn_skipped_keys); | 227 | kid, key_ancestors, allow_changes, warn_skipped_keys); |
| 242 | } else { | 228 | } else { |
| 243 | - // Add all available inheritable attributes not present in | ||
| 244 | - // this object to this object. | 229 | + // Add all available inheritable attributes not present in this object to this object. |
| 245 | for (auto const& iter: key_ancestors) { | 230 | for (auto const& iter: key_ancestors) { |
| 246 | std::string const& key = iter.first; | 231 | std::string const& key = iter.first; |
| 247 | if (!kid.hasKey(key)) { | 232 | if (!kid.hasKey(key)) { |
| @@ -254,10 +239,9 @@ QPDF::pushInheritedAttributesToPageInternal( | @@ -254,10 +239,9 @@ QPDF::pushInheritedAttributesToPageInternal( | ||
| 254 | } | 239 | } |
| 255 | } | 240 | } |
| 256 | 241 | ||
| 257 | - // For each inheritable key, pop the stack. If the stack | ||
| 258 | - // becomes empty, remove it from the map. That way, the | ||
| 259 | - // invariant that the list of keys in key_ancestors is exactly | ||
| 260 | - // those keys for which inheritable attributes are available. | 242 | + // For each inheritable key, pop the stack. If the stack becomes empty, remove it from the map. |
| 243 | + // That way, the invariant that the list of keys in key_ancestors is exactly those keys for | ||
| 244 | + // which inheritable attributes are available. | ||
| 261 | 245 | ||
| 262 | if (!inheritable_keys.empty()) { | 246 | if (!inheritable_keys.empty()) { |
| 263 | QTC::TC("qpdf", "QPDF opt inheritable keys"); | 247 | QTC::TC("qpdf", "QPDF opt inheritable keys"); |
| @@ -291,8 +275,7 @@ QPDF::updateObjectMapsInternal( | @@ -291,8 +275,7 @@ QPDF::updateObjectMapsInternal( | ||
| 291 | QPDFObjGen::set& visited, | 275 | QPDFObjGen::set& visited, |
| 292 | bool top) | 276 | bool top) |
| 293 | { | 277 | { |
| 294 | - // Traverse the object tree from this point taking care to avoid | ||
| 295 | - // crossing page boundaries. | 278 | + // Traverse the object tree from this point taking care to avoid crossing page boundaries. |
| 296 | 279 | ||
| 297 | bool is_page_node = false; | 280 | bool is_page_node = false; |
| 298 | 281 | ||
| @@ -332,8 +315,7 @@ QPDF::updateObjectMapsInternal( | @@ -332,8 +315,7 @@ QPDF::updateObjectMapsInternal( | ||
| 332 | 315 | ||
| 333 | for (auto const& key: dict.getKeys()) { | 316 | for (auto const& key: dict.getKeys()) { |
| 334 | if (is_page_node && (key == "/Thumb")) { | 317 | if (is_page_node && (key == "/Thumb")) { |
| 335 | - // Traverse page thumbnail dictionaries as a special | ||
| 336 | - // case. | 318 | + // Traverse page thumbnail dictionaries as a special case. |
| 337 | updateObjectMapsInternal( | 319 | updateObjectMapsInternal( |
| 338 | ObjUser(ObjUser::ou_thumb, ou.pageno), | 320 | ObjUser(ObjUser::ou_thumb, ou.pageno), |
| 339 | dict.getKey(key), | 321 | dict.getKey(key), |
| @@ -345,8 +327,7 @@ QPDF::updateObjectMapsInternal( | @@ -345,8 +327,7 @@ QPDF::updateObjectMapsInternal( | ||
| 345 | } else if ( | 327 | } else if ( |
| 346 | ((ssp >= 1) && (key == "/Length")) || | 328 | ((ssp >= 1) && (key == "/Length")) || |
| 347 | ((ssp >= 2) && ((key == "/Filter") || (key == "/DecodeParms")))) { | 329 | ((ssp >= 2) && ((key == "/Filter") || (key == "/DecodeParms")))) { |
| 348 | - // Don't traverse into stream parameters that we are | ||
| 349 | - // not going to write. | 330 | + // Don't traverse into stream parameters that we are not going to write. |
| 350 | } else { | 331 | } else { |
| 351 | updateObjectMapsInternal( | 332 | updateObjectMapsInternal( |
| 352 | ou, dict.getKey(key), skip_stream_parameters, visited, false); | 333 | ou, dict.getKey(key), skip_stream_parameters, visited, false); |
| @@ -362,9 +343,8 @@ QPDF::filterCompressedObjects(std::map<int, int> const& object_stream_data) | @@ -362,9 +343,8 @@ QPDF::filterCompressedObjects(std::map<int, int> const& object_stream_data) | ||
| 362 | return; | 343 | return; |
| 363 | } | 344 | } |
| 364 | 345 | ||
| 365 | - // Transform object_to_obj_users and obj_user_to_objects so that | ||
| 366 | - // they refer only to uncompressed objects. If something is a | ||
| 367 | - // user of a compressed object, then it is really a user of the | 346 | + // Transform object_to_obj_users and obj_user_to_objects so that they refer only to uncompressed |
| 347 | + // objects. If something is a user of a compressed object, then it is really a user of the | ||
| 368 | // object stream that contains it. | 348 | // object stream that contains it. |
| 369 | 349 | ||
| 370 | std::map<ObjUser, std::set<QPDFObjGen>> t_obj_user_to_objects; | 350 | std::map<ObjUser, std::set<QPDFObjGen>> t_obj_user_to_objects; |
libqpdf/QPDF_pages.cc
| @@ -4,55 +4,42 @@ | @@ -4,55 +4,42 @@ | ||
| 4 | #include <qpdf/QTC.hh> | 4 | #include <qpdf/QTC.hh> |
| 5 | #include <qpdf/QUtil.hh> | 5 | #include <qpdf/QUtil.hh> |
| 6 | 6 | ||
| 7 | -// In support of page manipulation APIs, these methods internally | ||
| 8 | -// maintain state about pages in a pair of data structures: all_pages, | ||
| 9 | -// which is a vector of page objects, and pageobj_to_pages_pos, which | ||
| 10 | -// maps a page object to its position in the all_pages array. | ||
| 11 | -// Unfortunately, the getAllPages() method returns a const reference | ||
| 12 | -// to all_pages and has been in the public API long before the | ||
| 13 | -// introduction of mutation APIs, so we're pretty much stuck with it. | ||
| 14 | -// Anyway, there are lots of calls to it in the library, so the | ||
| 15 | -// efficiency of having it cached is probably worth keeping it. At one | ||
| 16 | -// point, I had partially implemented a helper class specifically for | ||
| 17 | -// the pages tree, but once you work in all the logic that handles | ||
| 18 | -// repairing the /Type keys of page tree nodes (both /Pages and /Page) | ||
| 19 | -// and deal with duplicate pages, it's just as complex and less | ||
| 20 | -// efficient than what's here. So, in spite of the fact that a const | ||
| 21 | -// reference is returned, the current code is fine and does not need | ||
| 22 | -// to be replaced. A partial implementation of QPDFPagesTree is in | ||
| 23 | -// github in attic in case there is ever a reason to resurrect it. | ||
| 24 | -// There are additional notes in README-maintainer, which also refers | ||
| 25 | -// to this comment. | 7 | +// In support of page manipulation APIs, these methods internally maintain state about pages in a |
| 8 | +// pair of data structures: all_pages, which is a vector of page objects, and pageobj_to_pages_pos, | ||
| 9 | +// which maps a page object to its position in the all_pages array. Unfortunately, the getAllPages() | ||
| 10 | +// method returns a const reference to all_pages and has been in the public API long before the | ||
| 11 | +// introduction of mutation APIs, so we're pretty much stuck with it. Anyway, there are lots of | ||
| 12 | +// calls to it in the library, so the efficiency of having it cached is probably worth keeping it. | ||
| 13 | +// At one point, I had partially implemented a helper class specifically for the pages tree, but | ||
| 14 | +// once you work in all the logic that handles repairing the /Type keys of page tree nodes (both | ||
| 15 | +// /Pages and /Page) and deal with duplicate pages, it's just as complex and less efficient than | ||
| 16 | +// what's here. So, in spite of the fact that a const reference is returned, the current code is | ||
| 17 | +// fine and does not need to be replaced. A partial implementation of QPDFPagesTree is in github in | ||
| 18 | +// attic in case there is ever a reason to resurrect it. There are additional notes in | ||
| 19 | +// README-maintainer, which also refers to this comment. | ||
| 26 | 20 | ||
| 27 | -// The goal of this code is to ensure that the all_pages vector, which | ||
| 28 | -// users may have a reference to, and the pageobj_to_pages_pos map, | ||
| 29 | -// which users will not have access to, remain consistent outside of | ||
| 30 | -// any call to the library. As long as users only touch the /Pages | ||
| 31 | -// structure through page-specific API calls, they never have to worry | ||
| 32 | -// about anything, and this will also stay consistent. If a user | ||
| 33 | -// touches anything about the /Pages structure outside of these calls | ||
| 34 | -// (such as by directly looking up and manipulating the underlying | ||
| 35 | -// objects), they can call updatePagesCache() to bring things back in | ||
| 36 | -// sync. | 21 | +// The goal of this code is to ensure that the all_pages vector, which users may have a reference |
| 22 | +// to, and the pageobj_to_pages_pos map, which users will not have access to, remain consistent | ||
| 23 | +// outside of any call to the library. As long as users only touch the /Pages structure through | ||
| 24 | +// page-specific API calls, they never have to worry about anything, and this will also stay | ||
| 25 | +// consistent. If a user touches anything about the /Pages structure outside of these calls (such | ||
| 26 | +// as by directly looking up and manipulating the underlying objects), they can call | ||
| 27 | +// updatePagesCache() to bring things back in sync. | ||
| 37 | 28 | ||
| 38 | -// If the user doesn't ever use the page manipulation APIs, then qpdf | ||
| 39 | -// leaves the /Pages structure alone. If the user does use the APIs, | ||
| 40 | -// then we push all inheritable objects down and flatten the /Pages | ||
| 41 | -// tree. This makes it easier for us to keep /Pages, all_pages, and | ||
| 42 | -// pageobj_to_pages_pos internally consistent at all times. | 29 | +// If the user doesn't ever use the page manipulation APIs, then qpdf leaves the /Pages structure |
| 30 | +// alone. If the user does use the APIs, then we push all inheritable objects down and flatten the | ||
| 31 | +// /Pages tree. This makes it easier for us to keep /Pages, all_pages, and pageobj_to_pages_pos | ||
| 32 | +// internally consistent at all times. | ||
| 43 | 33 | ||
| 44 | -// Responsibility for keeping all_pages, pageobj_to_pages_pos, and the | ||
| 45 | -// Pages structure consistent should remain in as few places as | ||
| 46 | -// possible. As of initial writing, only flattenPagesTree, | ||
| 47 | -// insertPage, and removePage, along with methods they call, are | ||
| 48 | -// concerned with it. Everything else goes through one of those | ||
| 49 | -// methods. | 34 | +// Responsibility for keeping all_pages, pageobj_to_pages_pos, and the Pages structure consistent |
| 35 | +// should remain in as few places as possible. As of initial writing, only flattenPagesTree, | ||
| 36 | +// insertPage, and removePage, along with methods they call, are concerned with it. Everything else | ||
| 37 | +// goes through one of those methods. | ||
| 50 | 38 | ||
| 51 | std::vector<QPDFObjectHandle> const& | 39 | std::vector<QPDFObjectHandle> const& |
| 52 | QPDF::getAllPages() | 40 | QPDF::getAllPages() |
| 53 | { | 41 | { |
| 54 | - // Note that pushInheritedAttributesToPage may also be used to | ||
| 55 | - // initialize m->all_pages. | 42 | + // Note that pushInheritedAttributesToPage may also be used to initialize m->all_pages. |
| 56 | if (m->all_pages.empty()) { | 43 | if (m->all_pages.empty()) { |
| 57 | m->ever_called_get_all_pages = true; | 44 | m->ever_called_get_all_pages = true; |
| 58 | QPDFObjGen::set visited; | 45 | QPDFObjGen::set visited; |
| @@ -65,9 +52,8 @@ QPDF::getAllPages() | @@ -65,9 +52,8 @@ QPDF::getAllPages() | ||
| 65 | // loop -- will be detected again and reported later | 52 | // loop -- will be detected again and reported later |
| 66 | break; | 53 | break; |
| 67 | } | 54 | } |
| 68 | - // Files have been found in the wild where /Pages in the | ||
| 69 | - // catalog points to the first page. Try to work around | ||
| 70 | - // this and similar cases with this heuristic. | 55 | + // Files have been found in the wild where /Pages in the catalog points to the first |
| 56 | + // page. Try to work around this and similar cases with this heuristic. | ||
| 71 | if (!warned) { | 57 | if (!warned) { |
| 72 | getRoot().warnIfPossible("document page tree root (root -> /Pages) doesn't point" | 58 | getRoot().warnIfPossible("document page tree root (root -> /Pages) doesn't point" |
| 73 | " to the root of the page tree; attempting to correct"); | 59 | " to the root of the page tree; attempting to correct"); |
| @@ -118,8 +104,8 @@ QPDF::getAllPagesInternal( | @@ -118,8 +104,8 @@ QPDF::getAllPagesInternal( | ||
| 118 | kid = makeIndirectObject(kid); | 104 | kid = makeIndirectObject(kid); |
| 119 | kids.setArrayItem(i, kid); | 105 | kids.setArrayItem(i, kid); |
| 120 | } else if (!seen.add(kid)) { | 106 | } else if (!seen.add(kid)) { |
| 121 | - // Make a copy of the page. This does the same as | ||
| 122 | - // shallowCopyPage in QPDFPageObjectHelper. | 107 | + // Make a copy of the page. This does the same as shallowCopyPage in |
| 108 | + // QPDFPageObjectHelper. | ||
| 123 | QTC::TC("qpdf", "QPDF resolve duplicated page object"); | 109 | QTC::TC("qpdf", "QPDF resolve duplicated page object"); |
| 124 | cur_node.warnIfPossible( | 110 | cur_node.warnIfPossible( |
| 125 | "kid " + std::to_string(i) + | 111 | "kid " + std::to_string(i) + |
| @@ -141,9 +127,8 @@ QPDF::getAllPagesInternal( | @@ -141,9 +127,8 @@ QPDF::getAllPagesInternal( | ||
| 141 | void | 127 | void |
| 142 | QPDF::updateAllPagesCache() | 128 | QPDF::updateAllPagesCache() |
| 143 | { | 129 | { |
| 144 | - // Force regeneration of the pages cache. We force immediate | ||
| 145 | - // recalculation of all_pages since users may have references to | ||
| 146 | - // it that they got from calls to getAllPages(). We can defer | 130 | + // Force regeneration of the pages cache. We force immediate recalculation of all_pages since |
| 131 | + // users may have references to it that they got from calls to getAllPages(). We can defer | ||
| 147 | // recalculation of pageobj_to_pages_pos until needed. | 132 | // recalculation of pageobj_to_pages_pos until needed. |
| 148 | QTC::TC("qpdf", "QPDF updateAllPagesCache"); | 133 | QTC::TC("qpdf", "QPDF updateAllPagesCache"); |
| 149 | m->all_pages.clear(); | 134 | m->all_pages.clear(); |
| @@ -155,25 +140,23 @@ QPDF::updateAllPagesCache() | @@ -155,25 +140,23 @@ QPDF::updateAllPagesCache() | ||
| 155 | void | 140 | void |
| 156 | QPDF::flattenPagesTree() | 141 | QPDF::flattenPagesTree() |
| 157 | { | 142 | { |
| 158 | - // If not already done, flatten the /Pages structure and | ||
| 159 | - // initialize pageobj_to_pages_pos. | 143 | + // If not already done, flatten the /Pages structure and initialize pageobj_to_pages_pos. |
| 160 | 144 | ||
| 161 | if (!m->pageobj_to_pages_pos.empty()) { | 145 | if (!m->pageobj_to_pages_pos.empty()) { |
| 162 | return; | 146 | return; |
| 163 | } | 147 | } |
| 164 | 148 | ||
| 165 | - // Push inherited objects down to the /Page level. As a side | ||
| 166 | - // effect m->all_pages will also be generated. | 149 | + // Push inherited objects down to the /Page level. As a side effect m->all_pages will also be |
| 150 | + // generated. | ||
| 167 | pushInheritedAttributesToPage(true, true); | 151 | pushInheritedAttributesToPage(true, true); |
| 168 | 152 | ||
| 169 | QPDFObjectHandle pages = getRoot().getKey("/Pages"); | 153 | QPDFObjectHandle pages = getRoot().getKey("/Pages"); |
| 170 | 154 | ||
| 171 | size_t const len = m->all_pages.size(); | 155 | size_t const len = m->all_pages.size(); |
| 172 | for (size_t pos = 0; pos < len; ++pos) { | 156 | for (size_t pos = 0; pos < len; ++pos) { |
| 173 | - // Populate pageobj_to_pages_pos and fix parent pointer. There | ||
| 174 | - // should be no duplicates at this point because | ||
| 175 | - // pushInheritedAttributesToPage calls getAllPages which | ||
| 176 | - // resolves duplicates. | 157 | + // Populate pageobj_to_pages_pos and fix parent pointer. There should be no duplicates at |
| 158 | + // this point because pushInheritedAttributesToPage calls getAllPages which resolves | ||
| 159 | + // duplicates. | ||
| 177 | insertPageobjToPage(m->all_pages.at(pos), toI(pos), true); | 160 | insertPageobjToPage(m->all_pages.at(pos), toI(pos), true); |
| 178 | m->all_pages.at(pos).replaceKey("/Parent", pages); | 161 | m->all_pages.at(pos).replaceKey("/Parent", pages); |
| 179 | } | 162 | } |
| @@ -191,16 +174,14 @@ QPDF::insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_dupli | @@ -191,16 +174,14 @@ QPDF::insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_dupli | ||
| 191 | QPDFObjGen og(obj.getObjGen()); | 174 | QPDFObjGen og(obj.getObjGen()); |
| 192 | if (check_duplicate) { | 175 | if (check_duplicate) { |
| 193 | if (!m->pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second) { | 176 | if (!m->pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second) { |
| 194 | - // The library never calls insertPageobjToPage in a way | ||
| 195 | - // that causes this to happen. | 177 | + // The library never calls insertPageobjToPage in a way that causes this to happen. |
| 196 | setLastObjectDescription("page " + std::to_string(pos) + " (numbered from zero)", og); | 178 | setLastObjectDescription("page " + std::to_string(pos) + " (numbered from zero)", og); |
| 197 | throw QPDFExc( | 179 | throw QPDFExc( |
| 198 | qpdf_e_pages, | 180 | qpdf_e_pages, |
| 199 | m->file->getName(), | 181 | m->file->getName(), |
| 200 | m->last_object_description, | 182 | m->last_object_description, |
| 201 | 0, | 183 | 0, |
| 202 | - "duplicate page reference found;" | ||
| 203 | - " this would cause loss of data"); | 184 | + "duplicate page reference found; this would cause loss of data"); |
| 204 | } | 185 | } |
| 205 | } else { | 186 | } else { |
| 206 | m->pageobj_to_pages_pos[og] = pos; | 187 | m->pageobj_to_pages_pos[og] = pos; |
| @@ -210,8 +191,7 @@ QPDF::insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_dupli | @@ -210,8 +191,7 @@ QPDF::insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_dupli | ||
| 210 | void | 191 | void |
| 211 | QPDF::insertPage(QPDFObjectHandle newpage, int pos) | 192 | QPDF::insertPage(QPDFObjectHandle newpage, int pos) |
| 212 | { | 193 | { |
| 213 | - // pos is numbered from 0, so pos = 0 inserts at the beginning and | ||
| 214 | - // pos = npages adds to the end. | 194 | + // pos is numbered from 0, so pos = 0 inserts at the beginning and pos = npages adds to the end. |
| 215 | 195 | ||
| 216 | flattenPagesTree(); | 196 | flattenPagesTree(); |
| 217 | 197 | ||
| @@ -233,10 +213,9 @@ QPDF::insertPage(QPDFObjectHandle newpage, int pos) | @@ -233,10 +213,9 @@ QPDF::insertPage(QPDFObjectHandle newpage, int pos) | ||
| 233 | QTC::TC( | 213 | QTC::TC( |
| 234 | "qpdf", | 214 | "qpdf", |
| 235 | "QPDF insert page", | 215 | "QPDF insert page", |
| 236 | - (pos == 0) ? 0 : // insert at beginning | ||
| 237 | - (pos == toI(m->all_pages.size())) ? 1 | ||
| 238 | - : // at end | ||
| 239 | - 2); // insert in middle | 216 | + (pos == 0) ? 0 : // insert at beginning |
| 217 | + (pos == toI(m->all_pages.size())) ? 1 // at end | ||
| 218 | + : 2); // insert in middle | ||
| 240 | 219 | ||
| 241 | auto og = newpage.getObjGen(); | 220 | auto og = newpage.getObjGen(); |
| 242 | if (m->pageobj_to_pages_pos.count(og)) { | 221 | if (m->pageobj_to_pages_pos.count(og)) { |
| @@ -265,10 +244,9 @@ QPDF::removePage(QPDFObjectHandle page) | @@ -265,10 +244,9 @@ QPDF::removePage(QPDFObjectHandle page) | ||
| 265 | QTC::TC( | 244 | QTC::TC( |
| 266 | "qpdf", | 245 | "qpdf", |
| 267 | "QPDF remove page", | 246 | "QPDF remove page", |
| 268 | - (pos == 0) ? 0 : // remove at beginning | ||
| 269 | - (pos == toI(m->all_pages.size() - 1)) ? 1 | ||
| 270 | - : // end | ||
| 271 | - 2); // remove in middle | 247 | + (pos == 0) ? 0 : // remove at beginning |
| 248 | + (pos == toI(m->all_pages.size() - 1)) ? 1 // end | ||
| 249 | + : 2); // remove in middle | ||
| 272 | 250 | ||
| 273 | QPDFObjectHandle pages = getRoot().getKey("/Pages"); | 251 | QPDFObjectHandle pages = getRoot().getKey("/Pages"); |
| 274 | QPDFObjectHandle kids = pages.getKey("/Kids"); | 252 | QPDFObjectHandle kids = pages.getKey("/Kids"); |