Commit b8fd18ae562ab8bae1d2e67c1ab63ff4ea62124b
Merge branch 'parse_ref' into work
Showing
12 changed files
with
555 additions
and
305 deletions
libqpdf/QPDFParser.cc
| @@ -21,22 +21,7 @@ | @@ -21,22 +21,7 @@ | ||
| 21 | 21 | ||
| 22 | #include <memory> | 22 | #include <memory> |
| 23 | 23 | ||
| 24 | -namespace | ||
| 25 | -{ | ||
| 26 | - struct StackFrame | ||
| 27 | - { | ||
| 28 | - StackFrame(std::shared_ptr<InputSource> input) : | ||
| 29 | - offset(input->tell()) | ||
| 30 | - { | ||
| 31 | - } | ||
| 32 | - | ||
| 33 | - std::vector<std::shared_ptr<QPDFObject>> olist; | ||
| 34 | - qpdf_offset_t offset; | ||
| 35 | - std::string contents_string{""}; | ||
| 36 | - qpdf_offset_t contents_offset{-1}; | ||
| 37 | - int null_count{0}; | ||
| 38 | - }; | ||
| 39 | -} // namespace | 24 | +using ObjectPtr = std::shared_ptr<QPDFObject>; |
| 40 | 25 | ||
| 41 | QPDFObjectHandle | 26 | QPDFObjectHandle |
| 42 | QPDFParser::parse(bool& empty, bool content_stream) | 27 | QPDFParser::parse(bool& empty, bool content_stream) |
| @@ -46,371 +31,457 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -46,371 +31,457 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 46 | // effect of reading the object and changing the file pointer. If you do this, it will cause a | 31 | // effect of reading the object and changing the file pointer. If you do this, it will cause a |
| 47 | // logic error to be thrown from QPDF::inParse(). | 32 | // logic error to be thrown from QPDF::inParse(). |
| 48 | 33 | ||
| 49 | - const static std::shared_ptr<QPDFObject> null_oh = QPDF_Null::create(); | ||
| 50 | QPDF::ParseGuard pg(context); | 34 | QPDF::ParseGuard pg(context); |
| 51 | - | ||
| 52 | empty = false; | 35 | empty = false; |
| 36 | + start = input->tell(); | ||
| 53 | 37 | ||
| 54 | - std::shared_ptr<QPDFObject> object; | ||
| 55 | - bool set_offset = false; | ||
| 56 | - | ||
| 57 | - std::vector<StackFrame> stack; | ||
| 58 | - stack.emplace_back(input); | ||
| 59 | - std::vector<parser_state_e> state_stack; | ||
| 60 | - state_stack.push_back(st_top); | ||
| 61 | - qpdf_offset_t offset; | ||
| 62 | - bool done = false; | ||
| 63 | - int bad_count = 0; | ||
| 64 | - int good_count = 0; | ||
| 65 | - bool b_contents = false; | ||
| 66 | - bool is_null = false; | 38 | + if (!tokenizer.nextToken(*input, object_description)) { |
| 39 | + warn(tokenizer.getErrorMessage()); | ||
| 40 | + } | ||
| 41 | + | ||
| 42 | + switch (tokenizer.getType()) { | ||
| 43 | + case QPDFTokenizer::tt_eof: | ||
| 44 | + if (content_stream) { | ||
| 45 | + // In content stream mode, leave object uninitialized to indicate EOF | ||
| 46 | + return {}; | ||
| 47 | + } | ||
| 48 | + QTC::TC("qpdf", "QPDFParser eof in parse"); | ||
| 49 | + warn("unexpected EOF"); | ||
| 50 | + return {QPDF_Null::create()}; | ||
| 51 | + | ||
| 52 | + case QPDFTokenizer::tt_bad: | ||
| 53 | + QTC::TC("qpdf", "QPDFParser bad token in parse"); | ||
| 54 | + return {QPDF_Null::create()}; | ||
| 55 | + | ||
| 56 | + case QPDFTokenizer::tt_brace_open: | ||
| 57 | + case QPDFTokenizer::tt_brace_close: | ||
| 58 | + QTC::TC("qpdf", "QPDFParser bad brace"); | ||
| 59 | + warn("treating unexpected brace token as null"); | ||
| 60 | + return {QPDF_Null::create()}; | ||
| 61 | + | ||
| 62 | + case QPDFTokenizer::tt_array_close: | ||
| 63 | + QTC::TC("qpdf", "QPDFParser bad array close"); | ||
| 64 | + warn("treating unexpected array close token as null"); | ||
| 65 | + return {QPDF_Null::create()}; | ||
| 66 | + | ||
| 67 | + case QPDFTokenizer::tt_dict_close: | ||
| 68 | + QTC::TC("qpdf", "QPDFParser bad dictionary close"); | ||
| 69 | + warn("unexpected dictionary close token"); | ||
| 70 | + return {QPDF_Null::create()}; | ||
| 71 | + | ||
| 72 | + case QPDFTokenizer::tt_array_open: | ||
| 73 | + case QPDFTokenizer::tt_dict_open: | ||
| 74 | + stack.clear(); | ||
| 75 | + stack.emplace_back( | ||
| 76 | + input, | ||
| 77 | + (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key); | ||
| 78 | + frame = &stack.back(); | ||
| 79 | + return parseRemainder(content_stream); | ||
| 80 | + | ||
| 81 | + case QPDFTokenizer::tt_bool: | ||
| 82 | + return withDescription<QPDF_Bool>(tokenizer.getValue() == "true"); | ||
| 83 | + | ||
| 84 | + case QPDFTokenizer::tt_null: | ||
| 85 | + return {QPDF_Null::create()}; | ||
| 86 | + | ||
| 87 | + case QPDFTokenizer::tt_integer: | ||
| 88 | + return withDescription<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str())); | ||
| 89 | + | ||
| 90 | + case QPDFTokenizer::tt_real: | ||
| 91 | + return withDescription<QPDF_Real>(tokenizer.getValue()); | ||
| 92 | + | ||
| 93 | + case QPDFTokenizer::tt_name: | ||
| 94 | + return withDescription<QPDF_Name>(tokenizer.getValue()); | ||
| 95 | + | ||
| 96 | + case QPDFTokenizer::tt_word: | ||
| 97 | + { | ||
| 98 | + auto const& value = tokenizer.getValue(); | ||
| 99 | + if (content_stream) { | ||
| 100 | + return withDescription<QPDF_Operator>(value); | ||
| 101 | + } else if (value == "endobj") { | ||
| 102 | + // We just saw endobj without having read anything. Treat this as a null and do | ||
| 103 | + // not move the input source's offset. | ||
| 104 | + input->seek(input->getLastOffset(), SEEK_SET); | ||
| 105 | + empty = true; | ||
| 106 | + return {QPDF_Null::create()}; | ||
| 107 | + } else { | ||
| 108 | + QTC::TC("qpdf", "QPDFParser treat word as string"); | ||
| 109 | + warn("unknown token while reading object; treating as string"); | ||
| 110 | + return withDescription<QPDF_String>(value); | ||
| 111 | + } | ||
| 112 | + } | ||
| 113 | + | ||
| 114 | + case QPDFTokenizer::tt_string: | ||
| 115 | + if (decrypter) { | ||
| 116 | + std::string s{tokenizer.getValue()}; | ||
| 117 | + decrypter->decryptString(s); | ||
| 118 | + return withDescription<QPDF_String>(s); | ||
| 119 | + } else { | ||
| 120 | + return withDescription<QPDF_String>(tokenizer.getValue()); | ||
| 121 | + } | ||
| 122 | + | ||
| 123 | + default: | ||
| 124 | + warn("treating unknown token type as null while reading object"); | ||
| 125 | + return {QPDF_Null::create()}; | ||
| 126 | + } | ||
| 127 | +} | ||
| 67 | 128 | ||
| 68 | - while (!done) { | ||
| 69 | - bool bad = false; | ||
| 70 | - bool indirect_ref = false; | ||
| 71 | - is_null = false; | ||
| 72 | - auto& frame = stack.back(); | ||
| 73 | - auto& olist = frame.olist; | ||
| 74 | - parser_state_e state = state_stack.back(); | ||
| 75 | - offset = frame.offset; | 129 | +QPDFObjectHandle |
| 130 | +QPDFParser::parseRemainder(bool content_stream) | ||
| 131 | +{ | ||
| 132 | + // This method must take care not to resolve any objects. Don't check the type of any object | ||
| 133 | + // without first ensuring that it is a direct object. Otherwise, doing so may have the side | ||
| 134 | + // effect of reading the object and changing the file pointer. If you do this, it will cause a | ||
| 135 | + // logic error to be thrown from QPDF::inParse(). | ||
| 76 | 136 | ||
| 77 | - object = nullptr; | ||
| 78 | - set_offset = false; | 137 | + bad_count = 0; |
| 138 | + bool b_contents = false; | ||
| 79 | 139 | ||
| 140 | + while (true) { | ||
| 80 | if (!tokenizer.nextToken(*input, object_description)) { | 141 | if (!tokenizer.nextToken(*input, object_description)) { |
| 81 | warn(tokenizer.getErrorMessage()); | 142 | warn(tokenizer.getErrorMessage()); |
| 82 | } | 143 | } |
| 144 | + ++good_count; // optimistically | ||
| 145 | + | ||
| 146 | + if (int_count != 0) { | ||
| 147 | + // Special handling of indirect references. Treat integer tokens as part of an indirect | ||
| 148 | + // reference until proven otherwise. | ||
| 149 | + if (tokenizer.getType() == QPDFTokenizer::tt_integer) { | ||
| 150 | + if (++int_count > 2) { | ||
| 151 | + // Process the oldest buffered integer. | ||
| 152 | + addInt(int_count); | ||
| 153 | + } | ||
| 154 | + last_offset_buffer[int_count % 2] = input->getLastOffset(); | ||
| 155 | + int_buffer[int_count % 2] = QUtil::string_to_ll(tokenizer.getValue().c_str()); | ||
| 156 | + continue; | ||
| 157 | + | ||
| 158 | + } else if ( | ||
| 159 | + int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word && | ||
| 160 | + tokenizer.getValue() == "R") { | ||
| 161 | + if (context == nullptr) { | ||
| 162 | + QTC::TC("qpdf", "QPDFParser indirect without context"); | ||
| 163 | + throw std::logic_error("QPDFParser::parse called without context on an object " | ||
| 164 | + "with indirect references"); | ||
| 165 | + } | ||
| 166 | + auto ref_og = QPDFObjGen( | ||
| 167 | + QIntC::to_int(int_buffer[(int_count - 1) % 2]), | ||
| 168 | + QIntC::to_int(int_buffer[(int_count) % 2])); | ||
| 169 | + if (ref_og.isIndirect()) { | ||
| 170 | + // This action has the desirable side effect of causing dangling references | ||
| 171 | + // (references to indirect objects that don't appear in the PDF) in any parsed | ||
| 172 | + // object to appear in the object cache. | ||
| 173 | + add(std::move(context->getObject(ref_og).obj)); | ||
| 174 | + } else { | ||
| 175 | + QTC::TC("qpdf", "QPDFParser indirect with 0 objid"); | ||
| 176 | + addNull(); | ||
| 177 | + } | ||
| 178 | + int_count = 0; | ||
| 179 | + continue; | ||
| 180 | + | ||
| 181 | + } else if (int_count > 0) { | ||
| 182 | + // Process the buffered integers before processing the current token. | ||
| 183 | + if (int_count > 1) { | ||
| 184 | + addInt(int_count - 1); | ||
| 185 | + } | ||
| 186 | + addInt(int_count); | ||
| 187 | + int_count = 0; | ||
| 188 | + } | ||
| 189 | + } | ||
| 83 | 190 | ||
| 84 | switch (tokenizer.getType()) { | 191 | switch (tokenizer.getType()) { |
| 85 | case QPDFTokenizer::tt_eof: | 192 | case QPDFTokenizer::tt_eof: |
| 86 | - if (!content_stream) { | ||
| 87 | - QTC::TC("qpdf", "QPDFParser eof in parse"); | ||
| 88 | - warn("unexpected EOF"); | 193 | + warn("parse error while reading object"); |
| 194 | + if (content_stream) { | ||
| 195 | + // In content stream mode, leave object uninitialized to indicate EOF | ||
| 196 | + return {}; | ||
| 89 | } | 197 | } |
| 90 | - bad = true; | ||
| 91 | - state = st_eof; | ||
| 92 | - break; | 198 | + QTC::TC("qpdf", "QPDFParser eof in parseRemainder"); |
| 199 | + warn("unexpected EOF"); | ||
| 200 | + return {QPDF_Null::create()}; | ||
| 93 | 201 | ||
| 94 | case QPDFTokenizer::tt_bad: | 202 | case QPDFTokenizer::tt_bad: |
| 95 | - QTC::TC("qpdf", "QPDFParser bad token in parse"); | ||
| 96 | - bad = true; | ||
| 97 | - is_null = true; | ||
| 98 | - break; | 203 | + QTC::TC("qpdf", "QPDFParser bad token in parseRemainder"); |
| 204 | + if (tooManyBadTokens()) { | ||
| 205 | + return {QPDF_Null::create()}; | ||
| 206 | + } | ||
| 207 | + addNull(); | ||
| 208 | + continue; | ||
| 99 | 209 | ||
| 100 | case QPDFTokenizer::tt_brace_open: | 210 | case QPDFTokenizer::tt_brace_open: |
| 101 | case QPDFTokenizer::tt_brace_close: | 211 | case QPDFTokenizer::tt_brace_close: |
| 102 | - QTC::TC("qpdf", "QPDFParser bad brace"); | 212 | + QTC::TC("qpdf", "QPDFParser bad brace in parseRemainder"); |
| 103 | warn("treating unexpected brace token as null"); | 213 | warn("treating unexpected brace token as null"); |
| 104 | - bad = true; | ||
| 105 | - is_null = true; | ||
| 106 | - break; | 214 | + if (tooManyBadTokens()) { |
| 215 | + return {QPDF_Null::create()}; | ||
| 216 | + } | ||
| 217 | + addNull(); | ||
| 218 | + continue; | ||
| 107 | 219 | ||
| 108 | case QPDFTokenizer::tt_array_close: | 220 | case QPDFTokenizer::tt_array_close: |
| 109 | - if (state == st_array) { | ||
| 110 | - state = st_stop; | 221 | + if (frame->state == st_array) { |
| 222 | + auto object = QPDF_Array::create(std::move(frame->olist), frame->null_count > 100); | ||
| 223 | + setDescription(object, frame->offset - 1); | ||
| 224 | + // The `offset` points to the next of "[". Set the rewind offset to point to the | ||
| 225 | + // beginning of "[". This has been explicitly tested with whitespace surrounding the | ||
| 226 | + // array start delimiter. getLastOffset points to the array end token and therefore | ||
| 227 | + // can't be used here. | ||
| 228 | + if (stack.size() <= 1) { | ||
| 229 | + return object; | ||
| 230 | + } | ||
| 231 | + stack.pop_back(); | ||
| 232 | + frame = &stack.back(); | ||
| 233 | + add(std::move(object)); | ||
| 111 | } else { | 234 | } else { |
| 112 | - QTC::TC("qpdf", "QPDFParser bad array close"); | 235 | + QTC::TC("qpdf", "QPDFParser bad array close in parseRemainder"); |
| 113 | warn("treating unexpected array close token as null"); | 236 | warn("treating unexpected array close token as null"); |
| 114 | - bad = true; | ||
| 115 | - is_null = true; | 237 | + if (tooManyBadTokens()) { |
| 238 | + return {QPDF_Null::create()}; | ||
| 239 | + } | ||
| 240 | + addNull(); | ||
| 116 | } | 241 | } |
| 117 | - break; | 242 | + continue; |
| 118 | 243 | ||
| 119 | case QPDFTokenizer::tt_dict_close: | 244 | case QPDFTokenizer::tt_dict_close: |
| 120 | - if (state == st_dictionary) { | ||
| 121 | - state = st_stop; | 245 | + if (frame->state <= st_dictionary_value) { |
| 246 | + // Attempt to recover more or less gracefully from invalid dictionaries. | ||
| 247 | + auto& dict = frame->dict; | ||
| 248 | + | ||
| 249 | + if (frame->state == st_dictionary_value) { | ||
| 250 | + QTC::TC("qpdf", "QPDFParser no val for last key"); | ||
| 251 | + warn( | ||
| 252 | + frame->offset, | ||
| 253 | + "dictionary ended prematurely; using null as value for last key"); | ||
| 254 | + dict[frame->key] = QPDF_Null::create(); | ||
| 255 | + } | ||
| 256 | + | ||
| 257 | + if (!frame->olist.empty()) | ||
| 258 | + fixMissingKeys(); | ||
| 259 | + | ||
| 260 | + if (!frame->contents_string.empty() && dict.count("/Type") && | ||
| 261 | + dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") && | ||
| 262 | + dict.count("/Contents") && dict["/Contents"].isString()) { | ||
| 263 | + dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string); | ||
| 264 | + dict["/Contents"].setParsedOffset(frame->contents_offset); | ||
| 265 | + } | ||
| 266 | + auto object = QPDF_Dictionary::create(std::move(dict)); | ||
| 267 | + setDescription(object, frame->offset - 2); | ||
| 268 | + // The `offset` points to the next of "<<". Set the rewind offset to point to the | ||
| 269 | + // beginning of "<<". This has been explicitly tested with whitespace surrounding | ||
| 270 | + // the dictionary start delimiter. getLastOffset points to the dictionary end token | ||
| 271 | + // and therefore can't be used here. | ||
| 272 | + if (stack.size() <= 1) { | ||
| 273 | + return object; | ||
| 274 | + } | ||
| 275 | + stack.pop_back(); | ||
| 276 | + frame = &stack.back(); | ||
| 277 | + add(std::move(object)); | ||
| 122 | } else { | 278 | } else { |
| 123 | - QTC::TC("qpdf", "QPDFParser bad dictionary close"); | 279 | + QTC::TC("qpdf", "QPDFParser bad dictionary close in parseRemainder"); |
| 124 | warn("unexpected dictionary close token"); | 280 | warn("unexpected dictionary close token"); |
| 125 | - bad = true; | ||
| 126 | - is_null = true; | 281 | + if (tooManyBadTokens()) { |
| 282 | + return {QPDF_Null::create()}; | ||
| 283 | + } | ||
| 284 | + addNull(); | ||
| 127 | } | 285 | } |
| 128 | - break; | 286 | + continue; |
| 129 | 287 | ||
| 130 | case QPDFTokenizer::tt_array_open: | 288 | case QPDFTokenizer::tt_array_open: |
| 131 | case QPDFTokenizer::tt_dict_open: | 289 | case QPDFTokenizer::tt_dict_open: |
| 132 | - if (stack.size() > 500) { | 290 | + if (stack.size() > 499) { |
| 133 | QTC::TC("qpdf", "QPDFParser too deep"); | 291 | QTC::TC("qpdf", "QPDFParser too deep"); |
| 134 | warn("ignoring excessively deeply nested data structure"); | 292 | warn("ignoring excessively deeply nested data structure"); |
| 135 | - bad = true; | ||
| 136 | - is_null = true; | ||
| 137 | - state = st_top; | 293 | + return {QPDF_Null::create()}; |
| 138 | } else { | 294 | } else { |
| 139 | - state = st_start; | ||
| 140 | - state_stack.push_back( | ||
| 141 | - (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array | ||
| 142 | - : st_dictionary); | ||
| 143 | b_contents = false; | 295 | b_contents = false; |
| 144 | - stack.emplace_back(input); | 296 | + stack.emplace_back( |
| 297 | + input, | ||
| 298 | + (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array | ||
| 299 | + : st_dictionary_key); | ||
| 300 | + frame = &stack.back(); | ||
| 301 | + continue; | ||
| 145 | } | 302 | } |
| 146 | - break; | ||
| 147 | 303 | ||
| 148 | case QPDFTokenizer::tt_bool: | 304 | case QPDFTokenizer::tt_bool: |
| 149 | - object = QPDF_Bool::create((tokenizer.getValue() == "true")); | ||
| 150 | - break; | 305 | + addScalar<QPDF_Bool>(tokenizer.getValue() == "true"); |
| 306 | + continue; | ||
| 151 | 307 | ||
| 152 | case QPDFTokenizer::tt_null: | 308 | case QPDFTokenizer::tt_null: |
| 153 | - is_null = true; | ||
| 154 | - ++frame.null_count; | ||
| 155 | - | ||
| 156 | - break; | 309 | + addNull(); |
| 310 | + continue; | ||
| 157 | 311 | ||
| 158 | case QPDFTokenizer::tt_integer: | 312 | case QPDFTokenizer::tt_integer: |
| 159 | - object = QPDF_Integer::create( | ||
| 160 | - QUtil::string_to_ll(std::string(tokenizer.getValue()).c_str())); | ||
| 161 | - break; | 313 | + if (!content_stream) { |
| 314 | + // Buffer token in case it is part of an indirect reference. | ||
| 315 | + last_offset_buffer[1] = input->getLastOffset(); | ||
| 316 | + int_buffer[1] = QUtil::string_to_ll(tokenizer.getValue().c_str()); | ||
| 317 | + int_count = 1; | ||
| 318 | + } else { | ||
| 319 | + addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str())); | ||
| 320 | + } | ||
| 321 | + continue; | ||
| 162 | 322 | ||
| 163 | case QPDFTokenizer::tt_real: | 323 | case QPDFTokenizer::tt_real: |
| 164 | - object = QPDF_Real::create(tokenizer.getValue()); | ||
| 165 | - break; | 324 | + addScalar<QPDF_Real>(tokenizer.getValue()); |
| 325 | + continue; | ||
| 166 | 326 | ||
| 167 | case QPDFTokenizer::tt_name: | 327 | case QPDFTokenizer::tt_name: |
| 168 | - { | ||
| 169 | - auto name = tokenizer.getValue(); | ||
| 170 | - object = QPDF_Name::create(name); | ||
| 171 | - | ||
| 172 | - if (name == "/Contents") { | ||
| 173 | - b_contents = true; | ||
| 174 | - } else { | ||
| 175 | - b_contents = false; | ||
| 176 | - } | 328 | + if (frame->state == st_dictionary_key) { |
| 329 | + frame->key = tokenizer.getValue(); | ||
| 330 | + frame->state = st_dictionary_value; | ||
| 331 | + b_contents = decrypter && frame->key == "/Contents"; | ||
| 332 | + continue; | ||
| 333 | + } else { | ||
| 334 | + addScalar<QPDF_Name>(tokenizer.getValue()); | ||
| 177 | } | 335 | } |
| 178 | - break; | 336 | + continue; |
| 179 | 337 | ||
| 180 | case QPDFTokenizer::tt_word: | 338 | case QPDFTokenizer::tt_word: |
| 181 | - { | ||
| 182 | - auto value = tokenizer.getValue(); | ||
| 183 | - auto size = olist.size(); | ||
| 184 | - if (content_stream) { | ||
| 185 | - object = QPDF_Operator::create(value); | ||
| 186 | - } else if ( | ||
| 187 | - value == "R" && state != st_top && size >= 2 && olist.back() && | ||
| 188 | - olist.back()->getTypeCode() == ::ot_integer && | ||
| 189 | - !olist.back()->getObjGen().isIndirect() && olist.at(size - 2) && | ||
| 190 | - olist.at(size - 2)->getTypeCode() == ::ot_integer && | ||
| 191 | - !olist.at(size - 2)->getObjGen().isIndirect()) { | ||
| 192 | - if (context == nullptr) { | ||
| 193 | - QTC::TC("qpdf", "QPDFParser indirect without context"); | ||
| 194 | - throw std::logic_error("QPDFObjectHandle::parse called without context on " | ||
| 195 | - "an object with indirect references"); | ||
| 196 | - } | ||
| 197 | - auto ref_og = QPDFObjGen( | ||
| 198 | - QPDFObjectHandle(olist.at(size - 2)).getIntValueAsInt(), | ||
| 199 | - QPDFObjectHandle(olist.back()).getIntValueAsInt()); | ||
| 200 | - if (ref_og.isIndirect()) { | ||
| 201 | - // This action has the desirable side effect of causing dangling references | ||
| 202 | - // (references to indirect objects that don't appear in the PDF) in any | ||
| 203 | - // parsed object to appear in the object cache. | ||
| 204 | - object = context->getObject(ref_og).obj; | ||
| 205 | - indirect_ref = true; | ||
| 206 | - } else { | ||
| 207 | - QTC::TC("qpdf", "QPDFParser indirect with 0 objid"); | ||
| 208 | - is_null = true; | ||
| 209 | - } | ||
| 210 | - olist.pop_back(); | ||
| 211 | - olist.pop_back(); | ||
| 212 | - } else if ((value == "endobj") && (state == st_top)) { | ||
| 213 | - // We just saw endobj without having read anything. Treat this as a null and do | ||
| 214 | - // not move the input source's offset. | ||
| 215 | - is_null = true; | ||
| 216 | - input->seek(input->getLastOffset(), SEEK_SET); | ||
| 217 | - empty = true; | ||
| 218 | - } else { | ||
| 219 | - QTC::TC("qpdf", "QPDFParser treat word as string"); | ||
| 220 | - warn("unknown token while reading object; treating as string"); | ||
| 221 | - bad = true; | ||
| 222 | - object = QPDF_String::create(value); | 339 | + if (content_stream) { |
| 340 | + addScalar<QPDF_Operator>(tokenizer.getValue()); | ||
| 341 | + } else { | ||
| 342 | + QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder"); | ||
| 343 | + warn("unknown token while reading object; treating as string"); | ||
| 344 | + if (tooManyBadTokens()) { | ||
| 345 | + return {QPDF_Null::create()}; | ||
| 223 | } | 346 | } |
| 347 | + addScalar<QPDF_String>(tokenizer.getValue()); | ||
| 224 | } | 348 | } |
| 225 | - break; | 349 | + continue; |
| 226 | 350 | ||
| 227 | case QPDFTokenizer::tt_string: | 351 | case QPDFTokenizer::tt_string: |
| 228 | { | 352 | { |
| 229 | - auto val = tokenizer.getValue(); | 353 | + auto const& val = tokenizer.getValue(); |
| 230 | if (decrypter) { | 354 | if (decrypter) { |
| 231 | if (b_contents) { | 355 | if (b_contents) { |
| 232 | - frame.contents_string = val; | ||
| 233 | - frame.contents_offset = input->getLastOffset(); | 356 | + frame->contents_string = val; |
| 357 | + frame->contents_offset = input->getLastOffset(); | ||
| 234 | b_contents = false; | 358 | b_contents = false; |
| 235 | } | 359 | } |
| 236 | std::string s{val}; | 360 | std::string s{val}; |
| 237 | decrypter->decryptString(s); | 361 | decrypter->decryptString(s); |
| 238 | - object = QPDF_String::create(s); | 362 | + addScalar<QPDF_String>(s); |
| 239 | } else { | 363 | } else { |
| 240 | - object = QPDF_String::create(val); | 364 | + addScalar<QPDF_String>(val); |
| 241 | } | 365 | } |
| 242 | } | 366 | } |
| 243 | - | ||
| 244 | - break; | 367 | + continue; |
| 245 | 368 | ||
| 246 | default: | 369 | default: |
| 247 | warn("treating unknown token type as null while reading object"); | 370 | warn("treating unknown token type as null while reading object"); |
| 248 | - bad = true; | ||
| 249 | - is_null = true; | ||
| 250 | - break; | ||
| 251 | - } | ||
| 252 | - | ||
| 253 | - if (object == nullptr && !is_null && | ||
| 254 | - (!((state == st_start) || (state == st_stop) || (state == st_eof)))) { | ||
| 255 | - throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object"); | ||
| 256 | - is_null = true; | ||
| 257 | - } | ||
| 258 | - | ||
| 259 | - if (bad) { | ||
| 260 | - ++bad_count; | ||
| 261 | - good_count = 0; | ||
| 262 | - } else { | ||
| 263 | - ++good_count; | ||
| 264 | - if (good_count > 3) { | ||
| 265 | - bad_count = 0; | 371 | + if (tooManyBadTokens()) { |
| 372 | + return {QPDF_Null::create()}; | ||
| 266 | } | 373 | } |
| 374 | + addNull(); | ||
| 267 | } | 375 | } |
| 268 | - if (bad_count > 5) { | ||
| 269 | - // We had too many consecutive errors without enough intervening successful objects. | ||
| 270 | - // Give up. | ||
| 271 | - warn("too many errors; giving up on reading object"); | ||
| 272 | - state = st_top; | ||
| 273 | - is_null = true; | ||
| 274 | - } | 376 | + } |
| 377 | +} | ||
| 275 | 378 | ||
| 276 | - switch (state) { | ||
| 277 | - case st_eof: | ||
| 278 | - if (state_stack.size() > 1) { | ||
| 279 | - warn("parse error while reading object"); | ||
| 280 | - } | ||
| 281 | - done = true; | ||
| 282 | - // In content stream mode, leave object uninitialized to indicate EOF | ||
| 283 | - if (!content_stream) { | ||
| 284 | - is_null = true; | ||
| 285 | - } | ||
| 286 | - break; | ||
| 287 | - | ||
| 288 | - case st_dictionary: | ||
| 289 | - case st_array: | ||
| 290 | - if (is_null) { | ||
| 291 | - object = null_oh; | ||
| 292 | - // No need to set description for direct nulls - they probably will become implicit. | ||
| 293 | - } else if (!indirect_ref) { | ||
| 294 | - setDescription(object, input->getLastOffset()); | ||
| 295 | - } | ||
| 296 | - set_offset = true; | ||
| 297 | - olist.push_back(object); | ||
| 298 | - break; | 379 | +void |
| 380 | +QPDFParser::add(std::shared_ptr<QPDFObject>&& obj) | ||
| 381 | +{ | ||
| 382 | + if (frame->state != st_dictionary_value) { | ||
| 383 | + // If state is st_dictionary_key then there is a missing key. Push onto olist for | ||
| 384 | + // processing once the tt_dict_close token has been found. | ||
| 385 | + frame->olist.emplace_back(std::move(obj)); | ||
| 386 | + } else { | ||
| 387 | + if (auto res = frame->dict.insert_or_assign(frame->key, std::move(obj)); !res.second) { | ||
| 388 | + warnDuplicateKey(); | ||
| 389 | + } | ||
| 390 | + frame->state = st_dictionary_key; | ||
| 391 | + } | ||
| 392 | +} | ||
| 299 | 393 | ||
| 300 | - case st_top: | ||
| 301 | - done = true; | ||
| 302 | - break; | 394 | +void |
| 395 | +QPDFParser::addNull() | ||
| 396 | +{ | ||
| 397 | + const static ObjectPtr null_obj = QPDF_Null::create(); | ||
| 303 | 398 | ||
| 304 | - case st_start: | ||
| 305 | - break; | 399 | + if (frame->state != st_dictionary_value) { |
| 400 | + // If state is st_dictionary_key then there is a missing key. Push onto olist for | ||
| 401 | + // processing once the tt_dict_close token has been found. | ||
| 402 | + frame->olist.emplace_back(null_obj); | ||
| 403 | + } else { | ||
| 404 | + if (auto res = frame->dict.insert_or_assign(frame->key, null_obj); !res.second) { | ||
| 405 | + warnDuplicateKey(); | ||
| 406 | + } | ||
| 407 | + frame->state = st_dictionary_key; | ||
| 408 | + } | ||
| 409 | + ++frame->null_count; | ||
| 410 | +} | ||
| 306 | 411 | ||
| 307 | - case st_stop: | ||
| 308 | - if ((state_stack.size() < 2) || (stack.size() < 2)) { | ||
| 309 | - throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with " | ||
| 310 | - "insufficient elements in stack"); | ||
| 311 | - } | ||
| 312 | - parser_state_e old_state = state_stack.back(); | ||
| 313 | - state_stack.pop_back(); | ||
| 314 | - if (old_state == st_array) { | ||
| 315 | - object = QPDF_Array::create(std::move(olist), frame.null_count > 100); | ||
| 316 | - setDescription(object, offset - 1); | ||
| 317 | - // The `offset` points to the next of "[". Set the rewind offset to point to the | ||
| 318 | - // beginning of "[". This has been explicitly tested with whitespace surrounding the | ||
| 319 | - // array start delimiter. getLastOffset points to the array end token and therefore | ||
| 320 | - // can't be used here. | ||
| 321 | - set_offset = true; | ||
| 322 | - } else if (old_state == st_dictionary) { | ||
| 323 | - // Convert list to map. Alternating elements are keys. Attempt to recover more or | ||
| 324 | - // less gracefully from invalid dictionaries. | ||
| 325 | - std::set<std::string> names; | ||
| 326 | - for (auto& obj: olist) { | ||
| 327 | - if (obj) { | ||
| 328 | - if (obj->getTypeCode() == ::ot_name) { | ||
| 329 | - names.insert(obj->getStringValue()); | ||
| 330 | - } | ||
| 331 | - } | ||
| 332 | - } | 412 | +void |
| 413 | +QPDFParser::addInt(int count) | ||
| 414 | +{ | ||
| 415 | + auto obj = QPDF_Integer::create(int_buffer[count % 2]); | ||
| 416 | + obj->setDescription(context, description, last_offset_buffer[count % 2]); | ||
| 417 | + add(std::move(obj)); | ||
| 418 | +} | ||
| 333 | 419 | ||
| 334 | - std::map<std::string, QPDFObjectHandle> dict; | ||
| 335 | - int next_fake_key = 1; | ||
| 336 | - for (auto iter = olist.begin(); iter != olist.end();) { | ||
| 337 | - // Calculate key. | ||
| 338 | - std::string key; | ||
| 339 | - if (*iter && (*iter)->getTypeCode() == ::ot_name) { | ||
| 340 | - key = (*iter)->getStringValue(); | ||
| 341 | - ++iter; | ||
| 342 | - } else { | ||
| 343 | - for (bool found_fake = false; !found_fake;) { | ||
| 344 | - key = "/QPDFFake" + std::to_string(next_fake_key++); | ||
| 345 | - found_fake = (names.count(key) == 0); | ||
| 346 | - QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); | ||
| 347 | - } | ||
| 348 | - warn( | ||
| 349 | - offset, | ||
| 350 | - "expected dictionary key but found non-name object; inserting key " + | ||
| 351 | - key); | ||
| 352 | - } | ||
| 353 | - if (dict.count(key) > 0) { | ||
| 354 | - QTC::TC("qpdf", "QPDFParser duplicate dict key"); | ||
| 355 | - warn( | ||
| 356 | - offset, | ||
| 357 | - "dictionary has duplicated key " + key + | ||
| 358 | - "; last occurrence overrides earlier ones"); | ||
| 359 | - } | 420 | +template <typename T, typename... Args> |
| 421 | +void | ||
| 422 | +QPDFParser::addScalar(Args&&... args) | ||
| 423 | +{ | ||
| 424 | + auto obj = T::create(args...); | ||
| 425 | + obj->setDescription(context, description, input->getLastOffset()); | ||
| 426 | + add(std::move(obj)); | ||
| 427 | +} | ||
| 360 | 428 | ||
| 361 | - // Calculate value. | ||
| 362 | - std::shared_ptr<QPDFObject> val; | ||
| 363 | - if (iter != olist.end()) { | ||
| 364 | - val = *iter; | ||
| 365 | - ++iter; | ||
| 366 | - } else { | ||
| 367 | - QTC::TC("qpdf", "QPDFParser no val for last key"); | ||
| 368 | - warn( | ||
| 369 | - offset, | ||
| 370 | - "dictionary ended prematurely; using null as value for last key"); | ||
| 371 | - val = QPDF_Null::create(); | ||
| 372 | - } | 429 | +template <typename T, typename... Args> |
| 430 | +QPDFObjectHandle | ||
| 431 | +QPDFParser::withDescription(Args&&... args) | ||
| 432 | +{ | ||
| 433 | + auto obj = T::create(args...); | ||
| 434 | + obj->setDescription(context, description, start); | ||
| 435 | + return {obj}; | ||
| 436 | +} | ||
| 373 | 437 | ||
| 374 | - dict[std::move(key)] = std::move(val); | ||
| 375 | - } | ||
| 376 | - if (!frame.contents_string.empty() && dict.count("/Type") && | ||
| 377 | - dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") && | ||
| 378 | - dict.count("/Contents") && dict["/Contents"].isString()) { | ||
| 379 | - dict["/Contents"] = QPDFObjectHandle::newString(frame.contents_string); | ||
| 380 | - dict["/Contents"].setParsedOffset(frame.contents_offset); | ||
| 381 | - } | ||
| 382 | - object = QPDF_Dictionary::create(std::move(dict)); | ||
| 383 | - setDescription(object, offset - 2); | ||
| 384 | - // The `offset` points to the next of "<<". Set the rewind offset to point to the | ||
| 385 | - // beginning of "<<". This has been explicitly tested with whitespace surrounding | ||
| 386 | - // the dictionary start delimiter. getLastOffset points to the dictionary end token | ||
| 387 | - // and therefore can't be used here. | ||
| 388 | - set_offset = true; | ||
| 389 | - } | ||
| 390 | - stack.pop_back(); | ||
| 391 | - if (state_stack.back() == st_top) { | ||
| 392 | - done = true; | ||
| 393 | - } else { | ||
| 394 | - stack.back().olist.push_back(object); | ||
| 395 | - } | ||
| 396 | - } | 438 | +void |
| 439 | +QPDFParser::setDescription(ObjectPtr& obj, qpdf_offset_t parsed_offset) | ||
| 440 | +{ | ||
| 441 | + if (obj) { | ||
| 442 | + obj->setDescription(context, description, parsed_offset); | ||
| 397 | } | 443 | } |
| 444 | +} | ||
| 398 | 445 | ||
| 399 | - if (is_null) { | ||
| 400 | - object = QPDF_Null::create(); | 446 | +void |
| 447 | +QPDFParser::fixMissingKeys() | ||
| 448 | +{ | ||
| 449 | + std::set<std::string> names; | ||
| 450 | + for (auto& obj: frame->olist) { | ||
| 451 | + if (obj->getTypeCode() == ::ot_name) { | ||
| 452 | + names.insert(obj->getStringValue()); | ||
| 453 | + } | ||
| 401 | } | 454 | } |
| 402 | - if (!set_offset) { | ||
| 403 | - setDescription(object, offset); | 455 | + int next_fake_key = 1; |
| 456 | + for (auto const& item: frame->olist) { | ||
| 457 | + while (true) { | ||
| 458 | + const std::string key = "/QPDFFake" + std::to_string(next_fake_key++); | ||
| 459 | + const bool found_fake = frame->dict.count(key) == 0 && names.count(key) == 0; | ||
| 460 | + QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); | ||
| 461 | + if (found_fake) { | ||
| 462 | + warn( | ||
| 463 | + frame->offset, | ||
| 464 | + "expected dictionary key but found non-name object; inserting key " + key); | ||
| 465 | + frame->dict[key] = item; | ||
| 466 | + break; | ||
| 467 | + } | ||
| 468 | + } | ||
| 404 | } | 469 | } |
| 405 | - return object; | ||
| 406 | } | 470 | } |
| 407 | 471 | ||
| 408 | -void | ||
| 409 | -QPDFParser::setDescription(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parsed_offset) | 472 | +bool |
| 473 | +QPDFParser::tooManyBadTokens() | ||
| 410 | { | 474 | { |
| 411 | - if (obj) { | ||
| 412 | - obj->setDescription(context, description, parsed_offset); | 475 | + if (good_count <= 4) { |
| 476 | + if (++bad_count > 5) { | ||
| 477 | + warn("too many errors; giving up on reading object"); | ||
| 478 | + return true; | ||
| 479 | + } | ||
| 480 | + } else { | ||
| 481 | + bad_count = 1; | ||
| 413 | } | 482 | } |
| 483 | + good_count = 0; | ||
| 484 | + return false; | ||
| 414 | } | 485 | } |
| 415 | 486 | ||
| 416 | void | 487 | void |
| @@ -427,6 +498,15 @@ QPDFParser::warn(QPDFExc const& e) const | @@ -427,6 +498,15 @@ QPDFParser::warn(QPDFExc const& e) const | ||
| 427 | } | 498 | } |
| 428 | 499 | ||
| 429 | void | 500 | void |
| 501 | +QPDFParser::warnDuplicateKey() | ||
| 502 | +{ | ||
| 503 | + QTC::TC("qpdf", "QPDFParser duplicate dict key"); | ||
| 504 | + warn( | ||
| 505 | + frame->offset, | ||
| 506 | + "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones"); | ||
| 507 | +} | ||
| 508 | + | ||
| 509 | +void | ||
| 430 | QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const | 510 | QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const |
| 431 | { | 511 | { |
| 432 | warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), object_description, offset, msg)); | 512 | warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), object_description, offset, msg)); |
libqpdf/qpdf/QPDFParser.hh
| @@ -31,11 +31,44 @@ class QPDFParser | @@ -31,11 +31,44 @@ class QPDFParser | ||
| 31 | QPDFObjectHandle parse(bool& empty, bool content_stream); | 31 | QPDFObjectHandle parse(bool& empty, bool content_stream); |
| 32 | 32 | ||
| 33 | private: | 33 | private: |
| 34 | - enum parser_state_e { st_top, st_start, st_stop, st_eof, st_dictionary, st_array }; | 34 | + // Parser state. Note: |
| 35 | + // state < st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value) | ||
| 36 | + enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array }; | ||
| 35 | 37 | ||
| 38 | + struct StackFrame | ||
| 39 | + { | ||
| 40 | + StackFrame(std::shared_ptr<InputSource> const& input, parser_state_e state) : | ||
| 41 | + state(state), | ||
| 42 | + offset(input->tell()) | ||
| 43 | + { | ||
| 44 | + } | ||
| 45 | + | ||
| 46 | + std::vector<std::shared_ptr<QPDFObject>> olist; | ||
| 47 | + std::map<std::string, QPDFObjectHandle> dict; | ||
| 48 | + parser_state_e state; | ||
| 49 | + std::string key; | ||
| 50 | + qpdf_offset_t offset; | ||
| 51 | + std::string contents_string; | ||
| 52 | + qpdf_offset_t contents_offset{-1}; | ||
| 53 | + int null_count{0}; | ||
| 54 | + }; | ||
| 55 | + | ||
| 56 | + QPDFObjectHandle parseRemainder(bool content_stream); | ||
| 57 | + void add(std::shared_ptr<QPDFObject>&& obj); | ||
| 58 | + void addNull(); | ||
| 59 | + void addInt(int count); | ||
| 60 | + template <typename T, typename... Args> | ||
| 61 | + void addScalar(Args&&... args); | ||
| 62 | + bool tooManyBadTokens(); | ||
| 63 | + void warnDuplicateKey(); | ||
| 64 | + void fixMissingKeys(); | ||
| 36 | void warn(qpdf_offset_t offset, std::string const& msg) const; | 65 | void warn(qpdf_offset_t offset, std::string const& msg) const; |
| 37 | void warn(std::string const& msg) const; | 66 | void warn(std::string const& msg) const; |
| 38 | void warn(QPDFExc const&) const; | 67 | void warn(QPDFExc const&) const; |
| 68 | + template <typename T, typename... Args> | ||
| 69 | + // Create a new scalar object complete with parsed offset and description. | ||
| 70 | + // NB the offset includes any leading whitespace. | ||
| 71 | + QPDFObjectHandle withDescription(Args&&... args); | ||
| 39 | void setDescription(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parsed_offset); | 72 | void setDescription(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parsed_offset); |
| 40 | std::shared_ptr<InputSource> input; | 73 | std::shared_ptr<InputSource> input; |
| 41 | std::string const& object_description; | 74 | std::string const& object_description; |
| @@ -43,6 +76,18 @@ class QPDFParser | @@ -43,6 +76,18 @@ class QPDFParser | ||
| 43 | QPDFObjectHandle::StringDecrypter* decrypter; | 76 | QPDFObjectHandle::StringDecrypter* decrypter; |
| 44 | QPDF* context; | 77 | QPDF* context; |
| 45 | std::shared_ptr<QPDFValue::Description> description; | 78 | std::shared_ptr<QPDFValue::Description> description; |
| 79 | + std::vector<StackFrame> stack; | ||
| 80 | + StackFrame* frame; | ||
| 81 | + // Number of recent bad tokens. | ||
| 82 | + int bad_count = 0; | ||
| 83 | + // Number of good tokens since last bad token. Irrelevant if bad_count == 0. | ||
| 84 | + int good_count = 0; | ||
| 85 | + // Start offset including any leading whitespace. | ||
| 86 | + qpdf_offset_t start; | ||
| 87 | + // Number of successive integer tokens. | ||
| 88 | + int int_count = 0; | ||
| 89 | + long long int_buffer[2]{0, 0}; | ||
| 90 | + qpdf_offset_t last_offset_buffer[2]{0, 0}; | ||
| 46 | }; | 91 | }; |
| 47 | 92 | ||
| 48 | #endif // QPDFPARSER_HH | 93 | #endif // QPDFPARSER_HH |
qpdf/qpdf.testcov
| @@ -57,11 +57,14 @@ QPDF trailer lacks size 0 | @@ -57,11 +57,14 @@ QPDF trailer lacks size 0 | ||
| 57 | QPDF trailer size not integer 0 | 57 | QPDF trailer size not integer 0 |
| 58 | QPDF trailer prev not integer 0 | 58 | QPDF trailer prev not integer 0 |
| 59 | QPDFParser bad brace 0 | 59 | QPDFParser bad brace 0 |
| 60 | +QPDFParser bad brace in parseRemainder 0 | ||
| 60 | QPDFParser bad array close 0 | 61 | QPDFParser bad array close 0 |
| 62 | +QPDFParser bad array close in parseRemainder 0 | ||
| 61 | QPDF stream without length 0 | 63 | QPDF stream without length 0 |
| 62 | QPDF stream length not integer 0 | 64 | QPDF stream length not integer 0 |
| 63 | QPDF missing endstream 0 | 65 | QPDF missing endstream 0 |
| 64 | QPDFParser bad dictionary close 0 | 66 | QPDFParser bad dictionary close 0 |
| 67 | +QPDFParser bad dictionary close in parseRemainder 0 | ||
| 65 | QPDF can't find xref 0 | 68 | QPDF can't find xref 0 |
| 66 | QPDFTokenizer bad ) 0 | 69 | QPDFTokenizer bad ) 0 |
| 67 | QPDFTokenizer bad > 0 | 70 | QPDFTokenizer bad > 0 |
| @@ -258,6 +261,7 @@ QPDFParser indirect with 0 objid 0 | @@ -258,6 +261,7 @@ QPDFParser indirect with 0 objid 0 | ||
| 258 | QPDF object id 0 0 | 261 | QPDF object id 0 0 |
| 259 | QPDF recursion loop in resolve 0 | 262 | QPDF recursion loop in resolve 0 |
| 260 | QPDFParser treat word as string 0 | 263 | QPDFParser treat word as string 0 |
| 264 | +QPDFParser treat word as string in parseRemainder 0 | ||
| 261 | QPDFParser found fake 1 | 265 | QPDFParser found fake 1 |
| 262 | QPDFParser no val for last key 0 | 266 | QPDFParser no val for last key 0 |
| 263 | QPDF resolve failure to null 0 | 267 | QPDF resolve failure to null 0 |
| @@ -289,7 +293,9 @@ QPDFObjectHandle coalesce called on stream 0 | @@ -289,7 +293,9 @@ QPDFObjectHandle coalesce called on stream 0 | ||
| 289 | QPDFObjectHandle coalesce provide stream data 0 | 293 | QPDFObjectHandle coalesce provide stream data 0 |
| 290 | QPDF_Stream bad token at end during normalize 0 | 294 | QPDF_Stream bad token at end during normalize 0 |
| 291 | QPDFParser bad token in parse 0 | 295 | QPDFParser bad token in parse 0 |
| 296 | +QPDFParser bad token in parseRemainder 0 | ||
| 292 | QPDFParser eof in parse 0 | 297 | QPDFParser eof in parse 0 |
| 298 | +QPDFParser eof in parseRemainder 0 | ||
| 293 | QPDFObjectHandle array bounds 0 | 299 | QPDFObjectHandle array bounds 0 |
| 294 | QPDFObjectHandle boolean returning false 0 | 300 | QPDFObjectHandle boolean returning false 0 |
| 295 | QPDFObjectHandle integer returning 0 0 | 301 | QPDFObjectHandle integer returning 0 0 |
qpdf/qtest/parsing.test
| @@ -17,7 +17,7 @@ my $td = new TestDriver('parsing'); | @@ -17,7 +17,7 @@ my $td = new TestDriver('parsing'); | ||
| 17 | my $n_tests = 17; | 17 | my $n_tests = 17; |
| 18 | 18 | ||
| 19 | $td->runtest("parse objects from string", | 19 | $td->runtest("parse objects from string", |
| 20 | - {$td->COMMAND => "test_driver 31 good1.qdf"}, | 20 | + {$td->COMMAND => "test_driver 31 bad39.qdf"}, |
| 21 | {$td->FILE => "parse-object.out", $td->EXIT_STATUS => 0}, | 21 | {$td->FILE => "parse-object.out", $td->EXIT_STATUS => 0}, |
| 22 | $td->NORMALIZE_NEWLINES); | 22 | $td->NORMALIZE_NEWLINES); |
| 23 | $td->runtest("EOF terminating literal tokens", | 23 | $td->runtest("EOF terminating literal tokens", |
qpdf/qtest/qpdf/bad16-recover.out
| 1 | WARNING: bad16.pdf (trailer, offset 753): unexpected dictionary close token | 1 | WARNING: bad16.pdf (trailer, offset 753): unexpected dictionary close token |
| 2 | WARNING: bad16.pdf (trailer, offset 756): unexpected dictionary close token | 2 | WARNING: bad16.pdf (trailer, offset 756): unexpected dictionary close token |
| 3 | WARNING: bad16.pdf (trailer, offset 759): unknown token while reading object; treating as string | 3 | WARNING: bad16.pdf (trailer, offset 759): unknown token while reading object; treating as string |
| 4 | -WARNING: bad16.pdf (trailer, offset 779): unexpected EOF | ||
| 5 | WARNING: bad16.pdf (trailer, offset 779): parse error while reading object | 4 | WARNING: bad16.pdf (trailer, offset 779): parse error while reading object |
| 5 | +WARNING: bad16.pdf (trailer, offset 779): unexpected EOF | ||
| 6 | WARNING: bad16.pdf: file is damaged | 6 | WARNING: bad16.pdf: file is damaged |
| 7 | WARNING: bad16.pdf (offset 712): expected trailer dictionary | 7 | WARNING: bad16.pdf (offset 712): expected trailer dictionary |
| 8 | WARNING: bad16.pdf: Attempting to reconstruct cross-reference table | 8 | WARNING: bad16.pdf: Attempting to reconstruct cross-reference table |
| 9 | WARNING: bad16.pdf (trailer, offset 753): unexpected dictionary close token | 9 | WARNING: bad16.pdf (trailer, offset 753): unexpected dictionary close token |
| 10 | WARNING: bad16.pdf (trailer, offset 756): unexpected dictionary close token | 10 | WARNING: bad16.pdf (trailer, offset 756): unexpected dictionary close token |
| 11 | WARNING: bad16.pdf (trailer, offset 759): unknown token while reading object; treating as string | 11 | WARNING: bad16.pdf (trailer, offset 759): unknown token while reading object; treating as string |
| 12 | -WARNING: bad16.pdf (trailer, offset 779): unexpected EOF | ||
| 13 | WARNING: bad16.pdf (trailer, offset 779): parse error while reading object | 12 | WARNING: bad16.pdf (trailer, offset 779): parse error while reading object |
| 13 | +WARNING: bad16.pdf (trailer, offset 779): unexpected EOF | ||
| 14 | bad16.pdf: unable to find trailer dictionary while recovering damaged file | 14 | bad16.pdf: unable to find trailer dictionary while recovering damaged file |
qpdf/qtest/qpdf/bad16.out
| 1 | WARNING: bad16.pdf (trailer, offset 753): unexpected dictionary close token | 1 | WARNING: bad16.pdf (trailer, offset 753): unexpected dictionary close token |
| 2 | WARNING: bad16.pdf (trailer, offset 756): unexpected dictionary close token | 2 | WARNING: bad16.pdf (trailer, offset 756): unexpected dictionary close token |
| 3 | WARNING: bad16.pdf (trailer, offset 759): unknown token while reading object; treating as string | 3 | WARNING: bad16.pdf (trailer, offset 759): unknown token while reading object; treating as string |
| 4 | -WARNING: bad16.pdf (trailer, offset 779): unexpected EOF | ||
| 5 | WARNING: bad16.pdf (trailer, offset 779): parse error while reading object | 4 | WARNING: bad16.pdf (trailer, offset 779): parse error while reading object |
| 5 | +WARNING: bad16.pdf (trailer, offset 779): unexpected EOF | ||
| 6 | bad16.pdf (offset 712): expected trailer dictionary | 6 | bad16.pdf (offset 712): expected trailer dictionary |
qpdf/qtest/qpdf/bad36-recover.out
| 1 | WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string | 1 | WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string |
| 2 | -WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2 | ||
| 3 | WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key | 2 | WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key |
| 3 | +WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2 | ||
| 4 | /QTest is implicit | 4 | /QTest is implicit |
| 5 | /QTest is direct and has type null (2) | 5 | /QTest is direct and has type null (2) |
| 6 | /QTest is null | 6 | /QTest is null |
qpdf/qtest/qpdf/bad36.out
| 1 | WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string | 1 | WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string |
| 2 | -WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2 | ||
| 3 | WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key | 2 | WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key |
| 3 | +WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2 | ||
| 4 | /QTest is implicit | 4 | /QTest is implicit |
| 5 | /QTest is direct and has type null (2) | 5 | /QTest is direct and has type null (2) |
| 6 | /QTest is null | 6 | /QTest is null |
qpdf/qtest/qpdf/bad39.qdf
0 โ 100644
| 1 | +%PDF-1.3 | ||
| 2 | +%ยฟรทยขรพ | ||
| 3 | +%QDF-1.0 | ||
| 4 | + | ||
| 5 | +%% Original object ID: 1 0 | ||
| 6 | +1 0 obj | ||
| 7 | +<< | ||
| 8 | + /Pages 2 0 R | ||
| 9 | + /Type /Catalog | ||
| 10 | +>> | ||
| 11 | +endobj | ||
| 12 | + | ||
| 13 | +%% Original object ID: 2 0 | ||
| 14 | +2 0 obj | ||
| 15 | +<< | ||
| 16 | + /Count 1 | ||
| 17 | + /Kids [ | ||
| 18 | + 3 0 R | ||
| 19 | + ] | ||
| 20 | + /Type /Pages | ||
| 21 | +>> | ||
| 22 | +endobj | ||
| 23 | + | ||
| 24 | +%% Page 1 | ||
| 25 | +%% Original object ID: 3 0 | ||
| 26 | +3 0 obj | ||
| 27 | +<< | ||
| 28 | + /Contents 4 0 R | ||
| 29 | + /MediaBox [ | ||
| 30 | + 0 | ||
| 31 | + 0 | ||
| 32 | + 612 | ||
| 33 | + 792 | ||
| 34 | + ] | ||
| 35 | + /Parent 2 0 R | ||
| 36 | + /Resources << | ||
| 37 | + /Font << | ||
| 38 | + /F1 6 0 R | ||
| 39 | + >> | ||
| 40 | + /ProcSet 7 0 R | ||
| 41 | + >> | ||
| 42 | + /Type /Page | ||
| 43 | +>> | ||
| 44 | +endobj | ||
| 45 | + | ||
| 46 | +%% Contents for page 1 | ||
| 47 | +%% Original object ID: 4 0 | ||
| 48 | +4 0 obj | ||
| 49 | +<< | ||
| 50 | + /Length 5 0 R | ||
| 51 | +>> | ||
| 52 | +stream | ||
| 53 | +BT | ||
| 54 | + /F1 24 Tf | ||
| 55 | + 72 720 Td | ||
| 56 | + (Potato) Tj | ||
| 57 | +ET | ||
| 58 | +endstream | ||
| 59 | +endobj | ||
| 60 | + | ||
| 61 | +5 0 obj | ||
| 62 | +44 | ||
| 63 | +endobj | ||
| 64 | + | ||
| 65 | +%% Original object ID: 6 0 | ||
| 66 | +6 0 obj | ||
| 67 | +<< | ||
| 68 | + /BaseFont /Helvetica | ||
| 69 | + /Encoding /WinAnsiEncoding | ||
| 70 | + /Name /F1 | ||
| 71 | + /Subtype /Type1 | ||
| 72 | + /Type /Font | ||
| 73 | +>> | ||
| 74 | +endobj | ||
| 75 | + | ||
| 76 | +%% Original object ID: 5 0 | ||
| 77 | +7 0 obj | ||
| 78 | +[ | ||
| 79 | |||
| 80 | + /Text | ||
| 81 | +] | ||
| 82 | +endobj | ||
| 83 | + | ||
| 84 | +xref | ||
| 85 | +0 8 | ||
| 86 | +0000000000 65535 f | ||
| 87 | +0000000052 00000 n | ||
| 88 | +0000000133 00000 n | ||
| 89 | +0000000242 00000 n | ||
| 90 | +0000000484 00000 n | ||
| 91 | +0000000583 00000 n | ||
| 92 | +0000000629 00000 n | ||
| 93 | +0000001113 00000 n | ||
| 94 | +trailer << | ||
| 95 | + /Root 1 0 R | ||
| 96 | + /Size 8 | ||
| 97 | + /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] | ||
| 98 | +>> | ||
| 99 | +startxref | ||
| 100 | +809 | ||
| 101 | +%%EOF | ||
| 102 | +7 0 obj |
qpdf/qtest/qpdf/issue-335a.out
| @@ -51,6 +51,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) | @@ -51,6 +51,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) | ||
| 51 | WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) | 51 | WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) |
| 52 | WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 | 52 | WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 |
| 53 | WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) | 53 | WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) |
| 54 | +WARNING: issue-335a.pdf (trailer, offset 134): dictionary has duplicated key /L | ||
| 54 | WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) | 55 | WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) |
| 55 | WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) | 56 | WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) |
| 56 | WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 | 57 | WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 |
| @@ -74,6 +75,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) | @@ -74,6 +75,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) | ||
| 74 | WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) | 75 | WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) |
| 75 | WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 | 76 | WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 |
| 76 | WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) | 77 | WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) |
| 78 | +WARNING: issue-335a.pdf (trailer, offset 164): dictionary has duplicated key /L | ||
| 77 | WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) | 79 | WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) |
| 78 | WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) | 80 | WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) |
| 79 | WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 | 81 | WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 |
| @@ -97,6 +99,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) | @@ -97,6 +99,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) | ||
| 97 | WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) | 99 | WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) |
| 98 | WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 | 100 | WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 |
| 99 | WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) | 101 | WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) |
| 102 | +WARNING: issue-335a.pdf (trailer, offset 231): dictionary has duplicated key /L | ||
| 100 | WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) | 103 | WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) |
| 101 | WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) | 104 | WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) |
| 102 | WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 | 105 | WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 |
| @@ -448,6 +451,7 @@ WARNING: issue-335a.pdf (trailer, offset 1168): unexpected ) | @@ -448,6 +451,7 @@ WARNING: issue-335a.pdf (trailer, offset 1168): unexpected ) | ||
| 448 | WARNING: issue-335a.pdf (trailer, offset 1328): unexpected ) | 451 | WARNING: issue-335a.pdf (trailer, offset 1328): unexpected ) |
| 449 | WARNING: issue-335a.pdf (trailer, offset 1329): name with stray # will not work with PDF >= 1.2 | 452 | WARNING: issue-335a.pdf (trailer, offset 1329): name with stray # will not work with PDF >= 1.2 |
| 450 | WARNING: issue-335a.pdf (trailer, offset 1332): unexpected ) | 453 | WARNING: issue-335a.pdf (trailer, offset 1332): unexpected ) |
| 454 | +WARNING: issue-335a.pdf (trailer, offset 1033): dictionary has duplicated key /L | ||
| 451 | WARNING: issue-335a.pdf (trailer, offset 1333): unexpected ) | 455 | WARNING: issue-335a.pdf (trailer, offset 1333): unexpected ) |
| 452 | WARNING: issue-335a.pdf (trailer, offset 1344): unexpected ) | 456 | WARNING: issue-335a.pdf (trailer, offset 1344): unexpected ) |
| 453 | WARNING: issue-335a.pdf (trailer, offset 1428): unexpected ) | 457 | WARNING: issue-335a.pdf (trailer, offset 1428): unexpected ) |
qpdf/qtest/qpdf/parse-object.out
| 1 | [ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ] | 1 | [ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ] |
| 2 | -logic error parsing indirect: QPDFObjectHandle::parse called without context on an object with indirect references | 2 | +logic error parsing indirect: QPDFParser::parse called without context on an object with indirect references |
| 3 | trailing data: parsed object (trailing test): trailing data found parsing object from string | 3 | trailing data: parsed object (trailing test): trailing data found parsing object from string |
| 4 | WARNING: parsed object (offset 9): unknown token while reading object; treating as string | 4 | WARNING: parsed object (offset 9): unknown token while reading object; treating as string |
| 5 | +WARNING: parsed object: treating unexpected brace token as null | ||
| 6 | +WARNING: parsed object: treating unexpected brace token as null | ||
| 7 | +WARNING: parsed object: unexpected dictionary close token | ||
| 8 | +WARNING: bad39.qdf (object 7 0, offset 1121): unexpected EOF | ||
| 9 | +WARNING: bad39.qdf (object 7 0, offset 1121): expected endobj | ||
| 10 | +WARNING: bad39.qdf (object 7 0, offset 1121): EOF after endobj | ||
| 5 | test 31 done | 11 | test 31 done |
qpdf/test_driver.cc
| @@ -1195,6 +1195,13 @@ test_31(QPDF& pdf, char const* arg2) | @@ -1195,6 +1195,13 @@ test_31(QPDF& pdf, char const* arg2) | ||
| 1195 | // mistakenly parsed as an indirect object. | 1195 | // mistakenly parsed as an indirect object. |
| 1196 | assert(QPDFObjectHandle::parse(&pdf, "[5 0 R 0 R /X]").unparse() == "[ 5 0 R 0 (R) /X ]"); | 1196 | assert(QPDFObjectHandle::parse(&pdf, "[5 0 R 0 R /X]").unparse() == "[ 5 0 R 0 (R) /X ]"); |
| 1197 | assert(QPDFObjectHandle::parse(&pdf, "[1 0 R]", "indirect test").unparse() == "[ 1 0 R ]"); | 1197 | assert(QPDFObjectHandle::parse(&pdf, "[1 0 R]", "indirect test").unparse() == "[ 1 0 R ]"); |
| 1198 | + // TC:QPDFParser bad brace | ||
| 1199 | + assert(QPDFObjectHandle::parse(&pdf, "}").unparse() == "null"); | ||
| 1200 | + assert(QPDFObjectHandle::parse(&pdf, "{").unparse() == "null"); | ||
| 1201 | + // TC:QPDFParser bad dictionary close | ||
| 1202 | + assert(QPDFObjectHandle::parse(&pdf, ">>").unparse() == "null"); | ||
| 1203 | + // TC:QPDFParser eof in parse | ||
| 1204 | + assert(QPDFObjectHandle::parse(&pdf, "[7 0 R]").getArrayItem(0).isNull()); | ||
| 1198 | } | 1205 | } |
| 1199 | 1206 | ||
| 1200 | static void | 1207 | static void |