Commit 172cc6130583d3c30df3fcea22528afca4b12e5f
1 parent
5a1bf035
Remove redundant code in QPDFParser::parse and parseRemainder
Also, fix test cases.
Showing
6 changed files
with
219 additions
and
327 deletions
libqpdf/QPDFParser.cc
| ... | ... | @@ -21,7 +21,6 @@ |
| 21 | 21 | |
| 22 | 22 | #include <memory> |
| 23 | 23 | |
| 24 | - | |
| 25 | 24 | QPDFObjectHandle |
| 26 | 25 | QPDFParser::parse(bool& empty, bool content_stream) |
| 27 | 26 | { |
| ... | ... | @@ -30,327 +29,110 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 30 | 29 | // effect of reading the object and changing the file pointer. If you do this, it will cause a |
| 31 | 30 | // logic error to be thrown from QPDF::inParse(). |
| 32 | 31 | |
| 33 | - const static std::shared_ptr<QPDFObject> null_oh = QPDF_Null::create(); | |
| 34 | 32 | QPDF::ParseGuard pg(context); |
| 35 | - | |
| 36 | 33 | empty = false; |
| 37 | 34 | |
| 38 | 35 | std::shared_ptr<QPDFObject> object; |
| 39 | - bool set_offset = false; | |
| 40 | - | |
| 41 | -// std::vector<StackFrame> stack{{input, st_top}}; | |
| 42 | - stack.clear(); // NEW | |
| 43 | - stack.emplace_back(input, st_top); // NEW | |
| 44 | - bool done = false; | |
| 45 | - bool b_contents = false; | |
| 46 | - bool is_null = false; | |
| 47 | - frame = &stack.back(); // CHANGED | |
| 36 | + stack.clear(); | |
| 37 | + stack.emplace_back(input, st_top); | |
| 38 | + frame = &stack.back(); | |
| 39 | + object = nullptr; | |
| 48 | 40 | |
| 49 | - while (!done) { | |
| 50 | - bool indirect_ref = false; | |
| 51 | - is_null = false; | |
| 52 | - object = nullptr; | |
| 53 | - set_offset = false; | |
| 41 | + if (!tokenizer.nextToken(*input, object_description)) { | |
| 42 | + warn(tokenizer.getErrorMessage()); | |
| 43 | + } | |
| 54 | 44 | |
| 55 | - if (!tokenizer.nextToken(*input, object_description)) { | |
| 56 | - warn(tokenizer.getErrorMessage()); | |
| 45 | + switch (tokenizer.getType()) { | |
| 46 | + case QPDFTokenizer::tt_eof: | |
| 47 | + if (content_stream) { | |
| 48 | + // In content stream mode, leave object uninitialized to indicate EOF | |
| 49 | + return {}; | |
| 57 | 50 | } |
| 58 | - ++good_count; // optimistically | |
| 59 | - | |
| 60 | - switch (tokenizer.getType()) { | |
| 61 | - case QPDFTokenizer::tt_eof: | |
| 62 | - if (stack.size() > 1) { | |
| 63 | - warn("parse error while reading object"); | |
| 64 | - } | |
| 51 | + QTC::TC("qpdf", "QPDFParser eof in parse"); | |
| 52 | + warn("unexpected EOF"); | |
| 53 | + return {QPDF_Null::create()}; | |
| 54 | + | |
| 55 | + case QPDFTokenizer::tt_bad: | |
| 56 | + QTC::TC("qpdf", "QPDFParser bad token in parse"); | |
| 57 | + return {QPDF_Null::create()}; | |
| 58 | + | |
| 59 | + case QPDFTokenizer::tt_brace_open: | |
| 60 | + case QPDFTokenizer::tt_brace_close: | |
| 61 | + QTC::TC("qpdf", "QPDFParser bad brace"); | |
| 62 | + warn("treating unexpected brace token as null"); | |
| 63 | + return {QPDF_Null::create()}; | |
| 64 | + | |
| 65 | + case QPDFTokenizer::tt_array_close: | |
| 66 | + QTC::TC("qpdf", "QPDFParser bad array close"); | |
| 67 | + warn("treating unexpected array close token as null"); | |
| 68 | + return {QPDF_Null::create()}; | |
| 69 | + | |
| 70 | + case QPDFTokenizer::tt_dict_close: | |
| 71 | + QTC::TC("qpdf", "QPDFParser bad dictionary close"); | |
| 72 | + warn("unexpected dictionary close token"); | |
| 73 | + return {QPDF_Null::create()}; | |
| 74 | + | |
| 75 | + case QPDFTokenizer::tt_array_open: | |
| 76 | + case QPDFTokenizer::tt_dict_open: | |
| 77 | + stack.emplace_back( | |
| 78 | + input, | |
| 79 | + (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary); | |
| 80 | + return parseRemainder(content_stream); | |
| 81 | + | |
| 82 | + case QPDFTokenizer::tt_bool: | |
| 83 | + object = QPDF_Bool::create((tokenizer.getValue() == "true")); | |
| 84 | + break; | |
| 85 | + | |
| 86 | + case QPDFTokenizer::tt_null: | |
| 87 | + return {QPDF_Null::create()}; | |
| 88 | + | |
| 89 | + case QPDFTokenizer::tt_integer: | |
| 90 | + object = QPDF_Integer::create(QUtil::string_to_ll(tokenizer.getValue().c_str())); | |
| 91 | + break; | |
| 92 | + | |
| 93 | + case QPDFTokenizer::tt_real: | |
| 94 | + object = QPDF_Real::create(tokenizer.getValue()); | |
| 95 | + break; | |
| 96 | + | |
| 97 | + case QPDFTokenizer::tt_name: | |
| 98 | + object = QPDF_Name::create(tokenizer.getValue()); | |
| 99 | + break; | |
| 100 | + | |
| 101 | + case QPDFTokenizer::tt_word: | |
| 102 | + { | |
| 103 | + auto const& value = tokenizer.getValue(); | |
| 65 | 104 | if (content_stream) { |
| 66 | - // In content stream mode, leave object uninitialized to indicate EOF | |
| 67 | - return {}; | |
| 68 | - } | |
| 69 | -// QTC::TC("qpdf", "QPDFParser eof in parse"); | |
| 70 | - warn("unexpected EOF"); | |
| 71 | - return {QPDF_Null::create()}; | |
| 72 | - | |
| 73 | - case QPDFTokenizer::tt_bad: | |
| 74 | -// QTC::TC("qpdf", "QPDFParser bad token in parse"); | |
| 75 | - if (tooManyBadTokens()) { | |
| 76 | - return {QPDF_Null::create()}; | |
| 77 | - } | |
| 78 | - is_null = true; | |
| 79 | - break; | |
| 80 | - | |
| 81 | - case QPDFTokenizer::tt_brace_open: | |
| 82 | - case QPDFTokenizer::tt_brace_close: | |
| 83 | -// QTC::TC("qpdf", "QPDFParser bad brace"); | |
| 84 | - warn("treating unexpected brace token as null"); | |
| 85 | - if (tooManyBadTokens()) { | |
| 86 | - return {QPDF_Null::create()}; | |
| 87 | - } | |
| 88 | - is_null = true; | |
| 89 | - break; | |
| 90 | - | |
| 91 | - case QPDFTokenizer::tt_array_close: | |
| 92 | - if (frame->state == st_array) { | |
| 93 | - if (stack.size() < 2) { | |
| 94 | - throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with " | |
| 95 | - "insufficient elements in stack"); | |
| 96 | - } | |
| 97 | - object = QPDF_Array::create(std::move(frame->olist), frame->null_count > 100); | |
| 98 | - setDescription(object, frame->offset - 1); | |
| 99 | - // The `offset` points to the next of "[". Set the rewind offset to point to the | |
| 100 | - // beginning of "[". This has been explicitly tested with whitespace surrounding the | |
| 101 | - // array start delimiter. getLastOffset points to the array end token and therefore | |
| 102 | - // can't be used here. | |
| 103 | - set_offset = true; | |
| 104 | - stack.pop_back(); | |
| 105 | - frame = &stack.back(); | |
| 106 | - } else { | |
| 107 | -// QTC::TC("qpdf", "QPDFParser bad array close"); | |
| 108 | - warn("treating unexpected array close token as null"); | |
| 109 | - if (tooManyBadTokens()) { | |
| 110 | - return {QPDF_Null::create()}; | |
| 111 | - } | |
| 112 | - is_null = true; | |
| 113 | - } | |
| 114 | - break; | |
| 115 | - | |
| 116 | - case QPDFTokenizer::tt_dict_close: | |
| 117 | - if (frame->state == st_dictionary) { | |
| 118 | - if (stack.size() < 2) { | |
| 119 | - throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with " | |
| 120 | - "insufficient elements in stack"); | |
| 121 | - } | |
| 122 | - | |
| 123 | - // Convert list to map. Alternating elements are keys. Attempt to recover more or | |
| 124 | - // less gracefully from invalid dictionaries. | |
| 125 | - std::set<std::string> names; | |
| 126 | - for (auto& obj: frame->olist) { | |
| 127 | - if (obj) { | |
| 128 | - if (obj->getTypeCode() == ::ot_name) { | |
| 129 | - names.insert(obj->getStringValue()); | |
| 130 | - } | |
| 131 | - } | |
| 132 | - } | |
| 133 | - | |
| 134 | - std::map<std::string, QPDFObjectHandle> dict; | |
| 135 | - int next_fake_key = 1; | |
| 136 | - for (auto iter = frame->olist.begin(); iter != frame->olist.end();) { | |
| 137 | - // Calculate key. | |
| 138 | - std::string key; | |
| 139 | - if (*iter && (*iter)->getTypeCode() == ::ot_name) { | |
| 140 | - key = (*iter)->getStringValue(); | |
| 141 | - ++iter; | |
| 142 | - } else { | |
| 143 | - for (bool found_fake = false; !found_fake;) { | |
| 144 | - key = "/QPDFFake" + std::to_string(next_fake_key++); | |
| 145 | - found_fake = (names.count(key) == 0); | |
| 146 | -// QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); | |
| 147 | - } | |
| 148 | - warn( | |
| 149 | - frame->offset, | |
| 150 | - "expected dictionary key but found non-name object; inserting key " + | |
| 151 | - key); | |
| 152 | - } | |
| 153 | - if (dict.count(key) > 0) { | |
| 154 | -// QTC::TC("qpdf", "QPDFParser duplicate dict key"); | |
| 155 | - warn( | |
| 156 | - frame->offset, | |
| 157 | - "dictionary has duplicated key " + key + | |
| 158 | - "; last occurrence overrides earlier ones"); | |
| 159 | - } | |
| 160 | - | |
| 161 | - // Calculate value. | |
| 162 | - std::shared_ptr<QPDFObject> val; | |
| 163 | - if (iter != frame->olist.end()) { | |
| 164 | - val = *iter; | |
| 165 | - ++iter; | |
| 166 | - } else { | |
| 167 | -// QTC::TC("qpdf", "QPDFParser no val for last key"); | |
| 168 | - warn( | |
| 169 | - frame->offset, | |
| 170 | - "dictionary ended prematurely; using null as value for last key"); | |
| 171 | - val = QPDF_Null::create(); | |
| 172 | - } | |
| 173 | - | |
| 174 | - dict[std::move(key)] = std::move(val); | |
| 175 | - } | |
| 176 | - if (!frame->contents_string.empty() && dict.count("/Type") && | |
| 177 | - dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") && | |
| 178 | - dict.count("/Contents") && dict["/Contents"].isString()) { | |
| 179 | - dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string); | |
| 180 | - dict["/Contents"].setParsedOffset(frame->contents_offset); | |
| 181 | - } | |
| 182 | - object = QPDF_Dictionary::create(std::move(dict)); | |
| 183 | - setDescription(object, frame->offset - 2); | |
| 184 | - // The `offset` points to the next of "<<". Set the rewind offset to point to the | |
| 185 | - // beginning of "<<". This has been explicitly tested with whitespace surrounding | |
| 186 | - // the dictionary start delimiter. getLastOffset points to the dictionary end token | |
| 187 | - // and therefore can't be used here. | |
| 188 | - set_offset = true; | |
| 189 | - stack.pop_back(); | |
| 190 | - frame = &stack.back(); | |
| 191 | - } else { | |
| 192 | -// QTC::TC("qpdf", "QPDFParser bad dictionary close"); | |
| 193 | - warn("unexpected dictionary close token"); | |
| 194 | - if (tooManyBadTokens()) { | |
| 195 | - return {QPDF_Null::create()}; | |
| 196 | - } | |
| 197 | - is_null = true; | |
| 198 | - } | |
| 199 | - break; | |
| 200 | - | |
| 201 | - case QPDFTokenizer::tt_array_open: | |
| 202 | - case QPDFTokenizer::tt_dict_open: | |
| 203 | - if (stack.size() > 500) { | |
| 204 | -// QTC::TC("qpdf", "QPDFParser too deep"); | |
| 205 | - warn("ignoring excessively deeply nested data structure"); | |
| 105 | + object = QPDF_Operator::create(value); | |
| 106 | + } else if (value == "endobj") { | |
| 107 | + // We just saw endobj without having read anything. Treat this as a null and do | |
| 108 | + // not move the input source's offset. | |
| 109 | + input->seek(input->getLastOffset(), SEEK_SET); | |
| 110 | + empty = true; | |
| 206 | 111 | return {QPDF_Null::create()}; |
| 207 | 112 | } else { |
| 208 | - b_contents = false; | |
| 209 | - stack.emplace_back( | |
| 210 | - input, | |
| 211 | - (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array | |
| 212 | - : st_dictionary); | |
| 213 | - frame = &stack.back(); | |
| 214 | - return parseRemainder(content_stream); // NEW | |
| 215 | - continue; | |
| 216 | - } | |
| 217 | - | |
| 218 | - case QPDFTokenizer::tt_bool: | |
| 219 | - object = QPDF_Bool::create((tokenizer.getValue() == "true")); | |
| 220 | - break; | |
| 221 | - | |
| 222 | - case QPDFTokenizer::tt_null: | |
| 223 | - is_null = true; | |
| 224 | - ++frame->null_count; | |
| 225 | - | |
| 226 | - break; | |
| 227 | - | |
| 228 | - case QPDFTokenizer::tt_integer: | |
| 229 | - object = QPDF_Integer::create(QUtil::string_to_ll(tokenizer.getValue().c_str())); | |
| 230 | - break; | |
| 231 | - | |
| 232 | - case QPDFTokenizer::tt_real: | |
| 233 | - object = QPDF_Real::create(tokenizer.getValue()); | |
| 234 | - break; | |
| 235 | - | |
| 236 | - case QPDFTokenizer::tt_name: | |
| 237 | - { | |
| 238 | - auto const& name = tokenizer.getValue(); | |
| 239 | - object = QPDF_Name::create(name); | |
| 240 | - | |
| 241 | - if (name == "/Contents") { | |
| 242 | - b_contents = true; | |
| 243 | - } else { | |
| 244 | - b_contents = false; | |
| 245 | - } | |
| 246 | - } | |
| 247 | - break; | |
| 248 | - | |
| 249 | - case QPDFTokenizer::tt_word: | |
| 250 | - { | |
| 251 | - auto const& value = tokenizer.getValue(); | |
| 252 | - auto size = frame->olist.size(); | |
| 253 | - if (content_stream) { | |
| 254 | - object = QPDF_Operator::create(value); | |
| 255 | - } else if ( | |
| 256 | - value == "R" && frame->state != st_top && size >= 2 && frame->olist.back() && | |
| 257 | - frame->olist.back()->getTypeCode() == ::ot_integer && | |
| 258 | - !frame->olist.back()->getObjGen().isIndirect() && frame->olist.at(size - 2) && | |
| 259 | - frame->olist.at(size - 2)->getTypeCode() == ::ot_integer && | |
| 260 | - !frame->olist.at(size - 2)->getObjGen().isIndirect()) { | |
| 261 | - if (context == nullptr) { | |
| 262 | -// QTC::TC("qpdf", "QPDFParser indirect without context"); | |
| 263 | - throw std::logic_error("QPDFObjectHandle::parse called without context on " | |
| 264 | - "an object with indirect references"); | |
| 265 | - } | |
| 266 | - auto ref_og = QPDFObjGen( | |
| 267 | - QPDFObjectHandle(frame->olist.at(size - 2)).getIntValueAsInt(), | |
| 268 | - QPDFObjectHandle(frame->olist.back()).getIntValueAsInt()); | |
| 269 | - if (ref_og.isIndirect()) { | |
| 270 | - // This action has the desirable side effect of causing dangling references | |
| 271 | - // (references to indirect objects that don't appear in the PDF) in any | |
| 272 | - // parsed object to appear in the object cache. | |
| 273 | - object = context->getObject(ref_og).obj; | |
| 274 | - indirect_ref = true; | |
| 275 | - } else { | |
| 276 | -// QTC::TC("qpdf", "QPDFParser indirect with 0 objid"); | |
| 277 | - is_null = true; | |
| 278 | - } | |
| 279 | - frame->olist.pop_back(); | |
| 280 | - frame->olist.pop_back(); | |
| 281 | - } else if ((value == "endobj") && (frame->state == st_top)) { | |
| 282 | - // We just saw endobj without having read anything. Treat this as a null and do | |
| 283 | - // not move the input source's offset. | |
| 284 | - is_null = true; | |
| 285 | - input->seek(input->getLastOffset(), SEEK_SET); | |
| 286 | - empty = true; | |
| 287 | - } else { | |
| 288 | -// QTC::TC("qpdf", "QPDFParser treat word as string"); | |
| 289 | - warn("unknown token while reading object; treating as string"); | |
| 290 | - if (tooManyBadTokens()) { | |
| 291 | - return {QPDF_Null::create()}; | |
| 292 | - } | |
| 293 | - object = QPDF_String::create(value); | |
| 294 | - } | |
| 295 | - } | |
| 296 | - break; | |
| 297 | - | |
| 298 | - case QPDFTokenizer::tt_string: | |
| 299 | - { | |
| 300 | - auto const& val = tokenizer.getValue(); | |
| 301 | - if (decrypter) { | |
| 302 | - if (b_contents) { | |
| 303 | - frame->contents_string = val; | |
| 304 | - frame->contents_offset = input->getLastOffset(); | |
| 305 | - b_contents = false; | |
| 306 | - } | |
| 307 | - std::string s{val}; | |
| 308 | - decrypter->decryptString(s); | |
| 309 | - object = QPDF_String::create(s); | |
| 310 | - } else { | |
| 311 | - object = QPDF_String::create(val); | |
| 312 | - } | |
| 113 | + QTC::TC("qpdf", "QPDFParser treat word as string"); | |
| 114 | + warn("unknown token while reading object; treating as string"); | |
| 115 | + object = QPDF_String::create(value); | |
| 313 | 116 | } |
| 314 | - break; | |
| 315 | - | |
| 316 | - default: | |
| 317 | - warn("treating unknown token type as null while reading object"); | |
| 318 | - if (tooManyBadTokens()) { | |
| 319 | - return {QPDF_Null::create()}; | |
| 320 | - } | |
| 321 | - is_null = true; | |
| 322 | - break; | |
| 323 | 117 | } |
| 324 | - | |
| 325 | - if (object == nullptr && !is_null) { | |
| 326 | - throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object"); | |
| 118 | + break; | |
| 119 | + | |
| 120 | + case QPDFTokenizer::tt_string: | |
| 121 | + if (decrypter) { | |
| 122 | + std::string s{tokenizer.getValue()}; | |
| 123 | + decrypter->decryptString(s); | |
| 124 | + object = QPDF_String::create(s); | |
| 125 | + } else { | |
| 126 | + object = QPDF_String::create(tokenizer.getValue()); | |
| 327 | 127 | } |
| 128 | + break; | |
| 328 | 129 | |
| 329 | - switch (frame->state) { | |
| 330 | - case st_dictionary: | |
| 331 | - case st_array: | |
| 332 | - if (is_null) { | |
| 333 | - object = null_oh; | |
| 334 | - // No need to set description for direct nulls - they probably will become implicit. | |
| 335 | - } else if (!indirect_ref && !set_offset) { | |
| 336 | - setDescription(object, input->getLastOffset()); | |
| 337 | - } | |
| 338 | - set_offset = true; | |
| 339 | - frame->olist.push_back(object); | |
| 340 | - break; | |
| 341 | - | |
| 342 | - case st_top: | |
| 343 | - done = true; | |
| 344 | - break; | |
| 345 | - } | |
| 130 | + default: | |
| 131 | + warn("treating unknown token type as null while reading object"); | |
| 132 | + return {QPDF_Null::create()}; | |
| 346 | 133 | } |
| 347 | 134 | |
| 348 | - if (is_null) { | |
| 349 | - object = QPDF_Null::create(); | |
| 350 | - } | |
| 351 | - if (!set_offset) { | |
| 352 | - setDescription(object, frame->offset); | |
| 353 | - } | |
| 135 | + setDescription(object, frame->offset); | |
| 354 | 136 | return object; |
| 355 | 137 | } |
| 356 | 138 | |
| ... | ... | @@ -363,18 +145,15 @@ QPDFParser::parseRemainder(bool content_stream) |
| 363 | 145 | // logic error to be thrown from QPDF::inParse(). |
| 364 | 146 | |
| 365 | 147 | const static std::shared_ptr<QPDFObject> null_oh = QPDF_Null::create(); |
| 366 | -// QPDF::ParseGuard pg(context); | |
| 367 | - | |
| 368 | -// empty = false; | |
| 369 | 148 | |
| 370 | 149 | std::shared_ptr<QPDFObject> object; |
| 371 | 150 | bool set_offset = false; |
| 372 | 151 | |
| 373 | -// std::vector<StackFrame> stack{{input, st_top},}; | |
| 374 | 152 | bool done = false; |
| 375 | 153 | bool b_contents = false; |
| 376 | 154 | bool is_null = false; |
| 377 | 155 | frame = &stack.back(); // CHANGED |
| 156 | + bad_count = 0; | |
| 378 | 157 | |
| 379 | 158 | while (!done) { |
| 380 | 159 | bool indirect_ref = false; |
| ... | ... | @@ -389,19 +168,17 @@ QPDFParser::parseRemainder(bool content_stream) |
| 389 | 168 | |
| 390 | 169 | switch (tokenizer.getType()) { |
| 391 | 170 | case QPDFTokenizer::tt_eof: |
| 392 | - if (stack.size() > 1) { | |
| 393 | - warn("parse error while reading object"); | |
| 394 | - } | |
| 171 | + warn("parse error while reading object"); | |
| 395 | 172 | if (content_stream) { |
| 396 | 173 | // In content stream mode, leave object uninitialized to indicate EOF |
| 397 | 174 | return {}; |
| 398 | 175 | } |
| 399 | - QTC::TC("qpdf", "QPDFParser eof in parse"); | |
| 176 | + QTC::TC("qpdf", "QPDFParser eof in parseRemainder"); | |
| 400 | 177 | warn("unexpected EOF"); |
| 401 | 178 | return {QPDF_Null::create()}; |
| 402 | 179 | |
| 403 | 180 | case QPDFTokenizer::tt_bad: |
| 404 | - QTC::TC("qpdf", "QPDFParser bad token in parse"); | |
| 181 | + QTC::TC("qpdf", "QPDFParser bad token in parseRemainder"); | |
| 405 | 182 | if (tooManyBadTokens()) { |
| 406 | 183 | return {QPDF_Null::create()}; |
| 407 | 184 | } |
| ... | ... | @@ -410,7 +187,7 @@ QPDFParser::parseRemainder(bool content_stream) |
| 410 | 187 | |
| 411 | 188 | case QPDFTokenizer::tt_brace_open: |
| 412 | 189 | case QPDFTokenizer::tt_brace_close: |
| 413 | - QTC::TC("qpdf", "QPDFParser bad brace"); | |
| 190 | + QTC::TC("qpdf", "QPDFParser bad brace in parseRemainder"); | |
| 414 | 191 | warn("treating unexpected brace token as null"); |
| 415 | 192 | if (tooManyBadTokens()) { |
| 416 | 193 | return {QPDF_Null::create()}; |
| ... | ... | @@ -434,7 +211,7 @@ QPDFParser::parseRemainder(bool content_stream) |
| 434 | 211 | stack.pop_back(); |
| 435 | 212 | frame = &stack.back(); |
| 436 | 213 | } else { |
| 437 | - QTC::TC("qpdf", "QPDFParser bad array close"); | |
| 214 | + QTC::TC("qpdf", "QPDFParser bad array close in parseRemainder"); | |
| 438 | 215 | warn("treating unexpected array close token as null"); |
| 439 | 216 | if (tooManyBadTokens()) { |
| 440 | 217 | return {QPDF_Null::create()}; |
| ... | ... | @@ -519,7 +296,7 @@ QPDFParser::parseRemainder(bool content_stream) |
| 519 | 296 | stack.pop_back(); |
| 520 | 297 | frame = &stack.back(); |
| 521 | 298 | } else { |
| 522 | - QTC::TC("qpdf", "QPDFParser bad dictionary close"); | |
| 299 | + QTC::TC("qpdf", "QPDFParser bad dictionary close in parseRemainder"); | |
| 523 | 300 | warn("unexpected dictionary close token"); |
| 524 | 301 | if (tooManyBadTokens()) { |
| 525 | 302 | return {QPDF_Null::create()}; |
| ... | ... | @@ -582,7 +359,7 @@ QPDFParser::parseRemainder(bool content_stream) |
| 582 | 359 | if (content_stream) { |
| 583 | 360 | object = QPDF_Operator::create(value); |
| 584 | 361 | } else if ( |
| 585 | - value == "R" && frame->state != st_top && size >= 2 && frame->olist.back() && | |
| 362 | + value == "R" && size >= 2 && frame->olist.back() && | |
| 586 | 363 | frame->olist.back()->getTypeCode() == ::ot_integer && |
| 587 | 364 | !frame->olist.back()->getObjGen().isIndirect() && frame->olist.at(size - 2) && |
| 588 | 365 | frame->olist.at(size - 2)->getTypeCode() == ::ot_integer && |
| ... | ... | @@ -607,14 +384,8 @@ QPDFParser::parseRemainder(bool content_stream) |
| 607 | 384 | } |
| 608 | 385 | frame->olist.pop_back(); |
| 609 | 386 | frame->olist.pop_back(); |
| 610 | - } else if ((value == "endobj") && (frame->state == st_top)) { | |
| 611 | - // We just saw endobj without having read anything. Treat this as a null and do | |
| 612 | - // not move the input source's offset. | |
| 613 | - is_null = true; | |
| 614 | - input->seek(input->getLastOffset(), SEEK_SET); | |
| 615 | -// empty = true; | |
| 616 | 387 | } else { |
| 617 | - QTC::TC("qpdf", "QPDFParser treat word as string"); | |
| 388 | + QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder"); | |
| 618 | 389 | warn("unknown token while reading object; treating as string"); |
| 619 | 390 | if (tooManyBadTokens()) { |
| 620 | 391 | return {QPDF_Null::create()}; | ... | ... |
qpdf/qpdf.testcov
| ... | ... | @@ -57,11 +57,14 @@ QPDF trailer lacks size 0 |
| 57 | 57 | QPDF trailer size not integer 0 |
| 58 | 58 | QPDF trailer prev not integer 0 |
| 59 | 59 | QPDFParser bad brace 0 |
| 60 | +QPDFParser bad brace in parseRemainder 0 | |
| 60 | 61 | QPDFParser bad array close 0 |
| 62 | +QPDFParser bad array close in parseRemainder 0 | |
| 61 | 63 | QPDF stream without length 0 |
| 62 | 64 | QPDF stream length not integer 0 |
| 63 | 65 | QPDF missing endstream 0 |
| 64 | 66 | QPDFParser bad dictionary close 0 |
| 67 | +QPDFParser bad dictionary close in parseRemainder 0 | |
| 65 | 68 | QPDF can't find xref 0 |
| 66 | 69 | QPDFTokenizer bad ) 0 |
| 67 | 70 | QPDFTokenizer bad > 0 |
| ... | ... | @@ -258,6 +261,7 @@ QPDFParser indirect with 0 objid 0 |
| 258 | 261 | QPDF object id 0 0 |
| 259 | 262 | QPDF recursion loop in resolve 0 |
| 260 | 263 | QPDFParser treat word as string 0 |
| 264 | +QPDFParser treat word as string in parseRemainder 0 | |
| 261 | 265 | QPDFParser found fake 1 |
| 262 | 266 | QPDFParser no val for last key 0 |
| 263 | 267 | QPDF resolve failure to null 0 |
| ... | ... | @@ -289,7 +293,9 @@ QPDFObjectHandle coalesce called on stream 0 |
| 289 | 293 | QPDFObjectHandle coalesce provide stream data 0 |
| 290 | 294 | QPDF_Stream bad token at end during normalize 0 |
| 291 | 295 | QPDFParser bad token in parse 0 |
| 296 | +QPDFParser bad token in parseRemainder 0 | |
| 292 | 297 | QPDFParser eof in parse 0 |
| 298 | +QPDFParser eof in parseRemainder 0 | |
| 293 | 299 | QPDFObjectHandle array bounds 0 |
| 294 | 300 | QPDFObjectHandle boolean returning false 0 |
| 295 | 301 | QPDFObjectHandle integer returning 0 0 | ... | ... |
qpdf/qtest/parsing.test
| ... | ... | @@ -17,7 +17,7 @@ my $td = new TestDriver('parsing'); |
| 17 | 17 | my $n_tests = 17; |
| 18 | 18 | |
| 19 | 19 | $td->runtest("parse objects from string", |
| 20 | - {$td->COMMAND => "test_driver 31 good1.qdf"}, | |
| 20 | + {$td->COMMAND => "test_driver 31 bad39.qdf"}, | |
| 21 | 21 | {$td->FILE => "parse-object.out", $td->EXIT_STATUS => 0}, |
| 22 | 22 | $td->NORMALIZE_NEWLINES); |
| 23 | 23 | $td->runtest("EOF terminating literal tokens", | ... | ... |
qpdf/qtest/qpdf/bad39.qdf
0 โ 100644
| 1 | +%PDF-1.3 | |
| 2 | +%ยฟรทยขรพ | |
| 3 | +%QDF-1.0 | |
| 4 | + | |
| 5 | +%% Original object ID: 1 0 | |
| 6 | +1 0 obj | |
| 7 | +<< | |
| 8 | + /Pages 2 0 R | |
| 9 | + /Type /Catalog | |
| 10 | +>> | |
| 11 | +endobj | |
| 12 | + | |
| 13 | +%% Original object ID: 2 0 | |
| 14 | +2 0 obj | |
| 15 | +<< | |
| 16 | + /Count 1 | |
| 17 | + /Kids [ | |
| 18 | + 3 0 R | |
| 19 | + ] | |
| 20 | + /Type /Pages | |
| 21 | +>> | |
| 22 | +endobj | |
| 23 | + | |
| 24 | +%% Page 1 | |
| 25 | +%% Original object ID: 3 0 | |
| 26 | +3 0 obj | |
| 27 | +<< | |
| 28 | + /Contents 4 0 R | |
| 29 | + /MediaBox [ | |
| 30 | + 0 | |
| 31 | + 0 | |
| 32 | + 612 | |
| 33 | + 792 | |
| 34 | + ] | |
| 35 | + /Parent 2 0 R | |
| 36 | + /Resources << | |
| 37 | + /Font << | |
| 38 | + /F1 6 0 R | |
| 39 | + >> | |
| 40 | + /ProcSet 7 0 R | |
| 41 | + >> | |
| 42 | + /Type /Page | |
| 43 | +>> | |
| 44 | +endobj | |
| 45 | + | |
| 46 | +%% Contents for page 1 | |
| 47 | +%% Original object ID: 4 0 | |
| 48 | +4 0 obj | |
| 49 | +<< | |
| 50 | + /Length 5 0 R | |
| 51 | +>> | |
| 52 | +stream | |
| 53 | +BT | |
| 54 | + /F1 24 Tf | |
| 55 | + 72 720 Td | |
| 56 | + (Potato) Tj | |
| 57 | +ET | |
| 58 | +endstream | |
| 59 | +endobj | |
| 60 | + | |
| 61 | +5 0 obj | |
| 62 | +44 | |
| 63 | +endobj | |
| 64 | + | |
| 65 | +%% Original object ID: 6 0 | |
| 66 | +6 0 obj | |
| 67 | +<< | |
| 68 | + /BaseFont /Helvetica | |
| 69 | + /Encoding /WinAnsiEncoding | |
| 70 | + /Name /F1 | |
| 71 | + /Subtype /Type1 | |
| 72 | + /Type /Font | |
| 73 | +>> | |
| 74 | +endobj | |
| 75 | + | |
| 76 | +%% Original object ID: 5 0 | |
| 77 | +7 0 obj | |
| 78 | +[ | |
| 79 | ||
| 80 | + /Text | |
| 81 | +] | |
| 82 | +endobj | |
| 83 | + | |
| 84 | +xref | |
| 85 | +0 8 | |
| 86 | +0000000000 65535 f | |
| 87 | +0000000052 00000 n | |
| 88 | +0000000133 00000 n | |
| 89 | +0000000242 00000 n | |
| 90 | +0000000484 00000 n | |
| 91 | +0000000583 00000 n | |
| 92 | +0000000629 00000 n | |
| 93 | +0000001113 00000 n | |
| 94 | +trailer << | |
| 95 | + /Root 1 0 R | |
| 96 | + /Size 8 | |
| 97 | + /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] | |
| 98 | +>> | |
| 99 | +startxref | |
| 100 | +809 | |
| 101 | +%%EOF | |
| 102 | +7 0 obj | ... | ... |
qpdf/qtest/qpdf/parse-object.out
| ... | ... | @@ -2,4 +2,10 @@ |
| 2 | 2 | logic error parsing indirect: QPDFObjectHandle::parse called without context on an object with indirect references |
| 3 | 3 | trailing data: parsed object (trailing test): trailing data found parsing object from string |
| 4 | 4 | WARNING: parsed object (offset 9): unknown token while reading object; treating as string |
| 5 | +WARNING: parsed object: treating unexpected brace token as null | |
| 6 | +WARNING: parsed object: treating unexpected brace token as null | |
| 7 | +WARNING: parsed object: unexpected dictionary close token | |
| 8 | +WARNING: bad39.qdf (object 7 0, offset 1121): unexpected EOF | |
| 9 | +WARNING: bad39.qdf (object 7 0, offset 1121): expected endobj | |
| 10 | +WARNING: bad39.qdf (object 7 0, offset 1121): EOF after endobj | |
| 5 | 11 | test 31 done | ... | ... |
qpdf/test_driver.cc
| ... | ... | @@ -1195,6 +1195,13 @@ test_31(QPDF& pdf, char const* arg2) |
| 1195 | 1195 | // mistakenly parsed as an indirect object. |
| 1196 | 1196 | assert(QPDFObjectHandle::parse(&pdf, "[5 0 R 0 R /X]").unparse() == "[ 5 0 R 0 (R) /X ]"); |
| 1197 | 1197 | assert(QPDFObjectHandle::parse(&pdf, "[1 0 R]", "indirect test").unparse() == "[ 1 0 R ]"); |
| 1198 | + // TC:QPDFParser bad brace | |
| 1199 | + assert(QPDFObjectHandle::parse(&pdf, "}").unparse() == "null"); | |
| 1200 | + assert(QPDFObjectHandle::parse(&pdf, "{").unparse() == "null"); | |
| 1201 | + // TC:QPDFParser bad dictionary close | |
| 1202 | + assert(QPDFObjectHandle::parse(&pdf, ">>").unparse() == "null"); | |
| 1203 | + // TC:QPDFParser eof in parse | |
| 1204 | + assert(QPDFObjectHandle::parse(&pdf, "[7 0 R]").getArrayItem(0).isNull()); | |
| 1198 | 1205 | } |
| 1199 | 1206 | |
| 1200 | 1207 | static void | ... | ... |