Commit 172cc6130583d3c30df3fcea22528afca4b12e5f
1 parent
5a1bf035
Remove redundant code in QPDFParser::parse and parseRemainder
Also, fix test cases.
Showing
6 changed files
with
219 additions
and
327 deletions
libqpdf/QPDFParser.cc
| @@ -21,7 +21,6 @@ | @@ -21,7 +21,6 @@ | ||
| 21 | 21 | ||
| 22 | #include <memory> | 22 | #include <memory> |
| 23 | 23 | ||
| 24 | - | ||
| 25 | QPDFObjectHandle | 24 | QPDFObjectHandle |
| 26 | QPDFParser::parse(bool& empty, bool content_stream) | 25 | QPDFParser::parse(bool& empty, bool content_stream) |
| 27 | { | 26 | { |
| @@ -30,327 +29,110 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -30,327 +29,110 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 30 | // effect of reading the object and changing the file pointer. If you do this, it will cause a | 29 | // effect of reading the object and changing the file pointer. If you do this, it will cause a |
| 31 | // logic error to be thrown from QPDF::inParse(). | 30 | // logic error to be thrown from QPDF::inParse(). |
| 32 | 31 | ||
| 33 | - const static std::shared_ptr<QPDFObject> null_oh = QPDF_Null::create(); | ||
| 34 | QPDF::ParseGuard pg(context); | 32 | QPDF::ParseGuard pg(context); |
| 35 | - | ||
| 36 | empty = false; | 33 | empty = false; |
| 37 | 34 | ||
| 38 | std::shared_ptr<QPDFObject> object; | 35 | std::shared_ptr<QPDFObject> object; |
| 39 | - bool set_offset = false; | ||
| 40 | - | ||
| 41 | -// std::vector<StackFrame> stack{{input, st_top}}; | ||
| 42 | - stack.clear(); // NEW | ||
| 43 | - stack.emplace_back(input, st_top); // NEW | ||
| 44 | - bool done = false; | ||
| 45 | - bool b_contents = false; | ||
| 46 | - bool is_null = false; | ||
| 47 | - frame = &stack.back(); // CHANGED | 36 | + stack.clear(); |
| 37 | + stack.emplace_back(input, st_top); | ||
| 38 | + frame = &stack.back(); | ||
| 39 | + object = nullptr; | ||
| 48 | 40 | ||
| 49 | - while (!done) { | ||
| 50 | - bool indirect_ref = false; | ||
| 51 | - is_null = false; | ||
| 52 | - object = nullptr; | ||
| 53 | - set_offset = false; | 41 | + if (!tokenizer.nextToken(*input, object_description)) { |
| 42 | + warn(tokenizer.getErrorMessage()); | ||
| 43 | + } | ||
| 54 | 44 | ||
| 55 | - if (!tokenizer.nextToken(*input, object_description)) { | ||
| 56 | - warn(tokenizer.getErrorMessage()); | 45 | + switch (tokenizer.getType()) { |
| 46 | + case QPDFTokenizer::tt_eof: | ||
| 47 | + if (content_stream) { | ||
| 48 | + // In content stream mode, leave object uninitialized to indicate EOF | ||
| 49 | + return {}; | ||
| 57 | } | 50 | } |
| 58 | - ++good_count; // optimistically | ||
| 59 | - | ||
| 60 | - switch (tokenizer.getType()) { | ||
| 61 | - case QPDFTokenizer::tt_eof: | ||
| 62 | - if (stack.size() > 1) { | ||
| 63 | - warn("parse error while reading object"); | ||
| 64 | - } | 51 | + QTC::TC("qpdf", "QPDFParser eof in parse"); |
| 52 | + warn("unexpected EOF"); | ||
| 53 | + return {QPDF_Null::create()}; | ||
| 54 | + | ||
| 55 | + case QPDFTokenizer::tt_bad: | ||
| 56 | + QTC::TC("qpdf", "QPDFParser bad token in parse"); | ||
| 57 | + return {QPDF_Null::create()}; | ||
| 58 | + | ||
| 59 | + case QPDFTokenizer::tt_brace_open: | ||
| 60 | + case QPDFTokenizer::tt_brace_close: | ||
| 61 | + QTC::TC("qpdf", "QPDFParser bad brace"); | ||
| 62 | + warn("treating unexpected brace token as null"); | ||
| 63 | + return {QPDF_Null::create()}; | ||
| 64 | + | ||
| 65 | + case QPDFTokenizer::tt_array_close: | ||
| 66 | + QTC::TC("qpdf", "QPDFParser bad array close"); | ||
| 67 | + warn("treating unexpected array close token as null"); | ||
| 68 | + return {QPDF_Null::create()}; | ||
| 69 | + | ||
| 70 | + case QPDFTokenizer::tt_dict_close: | ||
| 71 | + QTC::TC("qpdf", "QPDFParser bad dictionary close"); | ||
| 72 | + warn("unexpected dictionary close token"); | ||
| 73 | + return {QPDF_Null::create()}; | ||
| 74 | + | ||
| 75 | + case QPDFTokenizer::tt_array_open: | ||
| 76 | + case QPDFTokenizer::tt_dict_open: | ||
| 77 | + stack.emplace_back( | ||
| 78 | + input, | ||
| 79 | + (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary); | ||
| 80 | + return parseRemainder(content_stream); | ||
| 81 | + | ||
| 82 | + case QPDFTokenizer::tt_bool: | ||
| 83 | + object = QPDF_Bool::create((tokenizer.getValue() == "true")); | ||
| 84 | + break; | ||
| 85 | + | ||
| 86 | + case QPDFTokenizer::tt_null: | ||
| 87 | + return {QPDF_Null::create()}; | ||
| 88 | + | ||
| 89 | + case QPDFTokenizer::tt_integer: | ||
| 90 | + object = QPDF_Integer::create(QUtil::string_to_ll(tokenizer.getValue().c_str())); | ||
| 91 | + break; | ||
| 92 | + | ||
| 93 | + case QPDFTokenizer::tt_real: | ||
| 94 | + object = QPDF_Real::create(tokenizer.getValue()); | ||
| 95 | + break; | ||
| 96 | + | ||
| 97 | + case QPDFTokenizer::tt_name: | ||
| 98 | + object = QPDF_Name::create(tokenizer.getValue()); | ||
| 99 | + break; | ||
| 100 | + | ||
| 101 | + case QPDFTokenizer::tt_word: | ||
| 102 | + { | ||
| 103 | + auto const& value = tokenizer.getValue(); | ||
| 65 | if (content_stream) { | 104 | if (content_stream) { |
| 66 | - // In content stream mode, leave object uninitialized to indicate EOF | ||
| 67 | - return {}; | ||
| 68 | - } | ||
| 69 | -// QTC::TC("qpdf", "QPDFParser eof in parse"); | ||
| 70 | - warn("unexpected EOF"); | ||
| 71 | - return {QPDF_Null::create()}; | ||
| 72 | - | ||
| 73 | - case QPDFTokenizer::tt_bad: | ||
| 74 | -// QTC::TC("qpdf", "QPDFParser bad token in parse"); | ||
| 75 | - if (tooManyBadTokens()) { | ||
| 76 | - return {QPDF_Null::create()}; | ||
| 77 | - } | ||
| 78 | - is_null = true; | ||
| 79 | - break; | ||
| 80 | - | ||
| 81 | - case QPDFTokenizer::tt_brace_open: | ||
| 82 | - case QPDFTokenizer::tt_brace_close: | ||
| 83 | -// QTC::TC("qpdf", "QPDFParser bad brace"); | ||
| 84 | - warn("treating unexpected brace token as null"); | ||
| 85 | - if (tooManyBadTokens()) { | ||
| 86 | - return {QPDF_Null::create()}; | ||
| 87 | - } | ||
| 88 | - is_null = true; | ||
| 89 | - break; | ||
| 90 | - | ||
| 91 | - case QPDFTokenizer::tt_array_close: | ||
| 92 | - if (frame->state == st_array) { | ||
| 93 | - if (stack.size() < 2) { | ||
| 94 | - throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with " | ||
| 95 | - "insufficient elements in stack"); | ||
| 96 | - } | ||
| 97 | - object = QPDF_Array::create(std::move(frame->olist), frame->null_count > 100); | ||
| 98 | - setDescription(object, frame->offset - 1); | ||
| 99 | - // The `offset` points to the next of "[". Set the rewind offset to point to the | ||
| 100 | - // beginning of "[". This has been explicitly tested with whitespace surrounding the | ||
| 101 | - // array start delimiter. getLastOffset points to the array end token and therefore | ||
| 102 | - // can't be used here. | ||
| 103 | - set_offset = true; | ||
| 104 | - stack.pop_back(); | ||
| 105 | - frame = &stack.back(); | ||
| 106 | - } else { | ||
| 107 | -// QTC::TC("qpdf", "QPDFParser bad array close"); | ||
| 108 | - warn("treating unexpected array close token as null"); | ||
| 109 | - if (tooManyBadTokens()) { | ||
| 110 | - return {QPDF_Null::create()}; | ||
| 111 | - } | ||
| 112 | - is_null = true; | ||
| 113 | - } | ||
| 114 | - break; | ||
| 115 | - | ||
| 116 | - case QPDFTokenizer::tt_dict_close: | ||
| 117 | - if (frame->state == st_dictionary) { | ||
| 118 | - if (stack.size() < 2) { | ||
| 119 | - throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with " | ||
| 120 | - "insufficient elements in stack"); | ||
| 121 | - } | ||
| 122 | - | ||
| 123 | - // Convert list to map. Alternating elements are keys. Attempt to recover more or | ||
| 124 | - // less gracefully from invalid dictionaries. | ||
| 125 | - std::set<std::string> names; | ||
| 126 | - for (auto& obj: frame->olist) { | ||
| 127 | - if (obj) { | ||
| 128 | - if (obj->getTypeCode() == ::ot_name) { | ||
| 129 | - names.insert(obj->getStringValue()); | ||
| 130 | - } | ||
| 131 | - } | ||
| 132 | - } | ||
| 133 | - | ||
| 134 | - std::map<std::string, QPDFObjectHandle> dict; | ||
| 135 | - int next_fake_key = 1; | ||
| 136 | - for (auto iter = frame->olist.begin(); iter != frame->olist.end();) { | ||
| 137 | - // Calculate key. | ||
| 138 | - std::string key; | ||
| 139 | - if (*iter && (*iter)->getTypeCode() == ::ot_name) { | ||
| 140 | - key = (*iter)->getStringValue(); | ||
| 141 | - ++iter; | ||
| 142 | - } else { | ||
| 143 | - for (bool found_fake = false; !found_fake;) { | ||
| 144 | - key = "/QPDFFake" + std::to_string(next_fake_key++); | ||
| 145 | - found_fake = (names.count(key) == 0); | ||
| 146 | -// QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); | ||
| 147 | - } | ||
| 148 | - warn( | ||
| 149 | - frame->offset, | ||
| 150 | - "expected dictionary key but found non-name object; inserting key " + | ||
| 151 | - key); | ||
| 152 | - } | ||
| 153 | - if (dict.count(key) > 0) { | ||
| 154 | -// QTC::TC("qpdf", "QPDFParser duplicate dict key"); | ||
| 155 | - warn( | ||
| 156 | - frame->offset, | ||
| 157 | - "dictionary has duplicated key " + key + | ||
| 158 | - "; last occurrence overrides earlier ones"); | ||
| 159 | - } | ||
| 160 | - | ||
| 161 | - // Calculate value. | ||
| 162 | - std::shared_ptr<QPDFObject> val; | ||
| 163 | - if (iter != frame->olist.end()) { | ||
| 164 | - val = *iter; | ||
| 165 | - ++iter; | ||
| 166 | - } else { | ||
| 167 | -// QTC::TC("qpdf", "QPDFParser no val for last key"); | ||
| 168 | - warn( | ||
| 169 | - frame->offset, | ||
| 170 | - "dictionary ended prematurely; using null as value for last key"); | ||
| 171 | - val = QPDF_Null::create(); | ||
| 172 | - } | ||
| 173 | - | ||
| 174 | - dict[std::move(key)] = std::move(val); | ||
| 175 | - } | ||
| 176 | - if (!frame->contents_string.empty() && dict.count("/Type") && | ||
| 177 | - dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") && | ||
| 178 | - dict.count("/Contents") && dict["/Contents"].isString()) { | ||
| 179 | - dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string); | ||
| 180 | - dict["/Contents"].setParsedOffset(frame->contents_offset); | ||
| 181 | - } | ||
| 182 | - object = QPDF_Dictionary::create(std::move(dict)); | ||
| 183 | - setDescription(object, frame->offset - 2); | ||
| 184 | - // The `offset` points to the next of "<<". Set the rewind offset to point to the | ||
| 185 | - // beginning of "<<". This has been explicitly tested with whitespace surrounding | ||
| 186 | - // the dictionary start delimiter. getLastOffset points to the dictionary end token | ||
| 187 | - // and therefore can't be used here. | ||
| 188 | - set_offset = true; | ||
| 189 | - stack.pop_back(); | ||
| 190 | - frame = &stack.back(); | ||
| 191 | - } else { | ||
| 192 | -// QTC::TC("qpdf", "QPDFParser bad dictionary close"); | ||
| 193 | - warn("unexpected dictionary close token"); | ||
| 194 | - if (tooManyBadTokens()) { | ||
| 195 | - return {QPDF_Null::create()}; | ||
| 196 | - } | ||
| 197 | - is_null = true; | ||
| 198 | - } | ||
| 199 | - break; | ||
| 200 | - | ||
| 201 | - case QPDFTokenizer::tt_array_open: | ||
| 202 | - case QPDFTokenizer::tt_dict_open: | ||
| 203 | - if (stack.size() > 500) { | ||
| 204 | -// QTC::TC("qpdf", "QPDFParser too deep"); | ||
| 205 | - warn("ignoring excessively deeply nested data structure"); | 105 | + object = QPDF_Operator::create(value); |
| 106 | + } else if (value == "endobj") { | ||
| 107 | + // We just saw endobj without having read anything. Treat this as a null and do | ||
| 108 | + // not move the input source's offset. | ||
| 109 | + input->seek(input->getLastOffset(), SEEK_SET); | ||
| 110 | + empty = true; | ||
| 206 | return {QPDF_Null::create()}; | 111 | return {QPDF_Null::create()}; |
| 207 | } else { | 112 | } else { |
| 208 | - b_contents = false; | ||
| 209 | - stack.emplace_back( | ||
| 210 | - input, | ||
| 211 | - (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array | ||
| 212 | - : st_dictionary); | ||
| 213 | - frame = &stack.back(); | ||
| 214 | - return parseRemainder(content_stream); // NEW | ||
| 215 | - continue; | ||
| 216 | - } | ||
| 217 | - | ||
| 218 | - case QPDFTokenizer::tt_bool: | ||
| 219 | - object = QPDF_Bool::create((tokenizer.getValue() == "true")); | ||
| 220 | - break; | ||
| 221 | - | ||
| 222 | - case QPDFTokenizer::tt_null: | ||
| 223 | - is_null = true; | ||
| 224 | - ++frame->null_count; | ||
| 225 | - | ||
| 226 | - break; | ||
| 227 | - | ||
| 228 | - case QPDFTokenizer::tt_integer: | ||
| 229 | - object = QPDF_Integer::create(QUtil::string_to_ll(tokenizer.getValue().c_str())); | ||
| 230 | - break; | ||
| 231 | - | ||
| 232 | - case QPDFTokenizer::tt_real: | ||
| 233 | - object = QPDF_Real::create(tokenizer.getValue()); | ||
| 234 | - break; | ||
| 235 | - | ||
| 236 | - case QPDFTokenizer::tt_name: | ||
| 237 | - { | ||
| 238 | - auto const& name = tokenizer.getValue(); | ||
| 239 | - object = QPDF_Name::create(name); | ||
| 240 | - | ||
| 241 | - if (name == "/Contents") { | ||
| 242 | - b_contents = true; | ||
| 243 | - } else { | ||
| 244 | - b_contents = false; | ||
| 245 | - } | ||
| 246 | - } | ||
| 247 | - break; | ||
| 248 | - | ||
| 249 | - case QPDFTokenizer::tt_word: | ||
| 250 | - { | ||
| 251 | - auto const& value = tokenizer.getValue(); | ||
| 252 | - auto size = frame->olist.size(); | ||
| 253 | - if (content_stream) { | ||
| 254 | - object = QPDF_Operator::create(value); | ||
| 255 | - } else if ( | ||
| 256 | - value == "R" && frame->state != st_top && size >= 2 && frame->olist.back() && | ||
| 257 | - frame->olist.back()->getTypeCode() == ::ot_integer && | ||
| 258 | - !frame->olist.back()->getObjGen().isIndirect() && frame->olist.at(size - 2) && | ||
| 259 | - frame->olist.at(size - 2)->getTypeCode() == ::ot_integer && | ||
| 260 | - !frame->olist.at(size - 2)->getObjGen().isIndirect()) { | ||
| 261 | - if (context == nullptr) { | ||
| 262 | -// QTC::TC("qpdf", "QPDFParser indirect without context"); | ||
| 263 | - throw std::logic_error("QPDFObjectHandle::parse called without context on " | ||
| 264 | - "an object with indirect references"); | ||
| 265 | - } | ||
| 266 | - auto ref_og = QPDFObjGen( | ||
| 267 | - QPDFObjectHandle(frame->olist.at(size - 2)).getIntValueAsInt(), | ||
| 268 | - QPDFObjectHandle(frame->olist.back()).getIntValueAsInt()); | ||
| 269 | - if (ref_og.isIndirect()) { | ||
| 270 | - // This action has the desirable side effect of causing dangling references | ||
| 271 | - // (references to indirect objects that don't appear in the PDF) in any | ||
| 272 | - // parsed object to appear in the object cache. | ||
| 273 | - object = context->getObject(ref_og).obj; | ||
| 274 | - indirect_ref = true; | ||
| 275 | - } else { | ||
| 276 | -// QTC::TC("qpdf", "QPDFParser indirect with 0 objid"); | ||
| 277 | - is_null = true; | ||
| 278 | - } | ||
| 279 | - frame->olist.pop_back(); | ||
| 280 | - frame->olist.pop_back(); | ||
| 281 | - } else if ((value == "endobj") && (frame->state == st_top)) { | ||
| 282 | - // We just saw endobj without having read anything. Treat this as a null and do | ||
| 283 | - // not move the input source's offset. | ||
| 284 | - is_null = true; | ||
| 285 | - input->seek(input->getLastOffset(), SEEK_SET); | ||
| 286 | - empty = true; | ||
| 287 | - } else { | ||
| 288 | -// QTC::TC("qpdf", "QPDFParser treat word as string"); | ||
| 289 | - warn("unknown token while reading object; treating as string"); | ||
| 290 | - if (tooManyBadTokens()) { | ||
| 291 | - return {QPDF_Null::create()}; | ||
| 292 | - } | ||
| 293 | - object = QPDF_String::create(value); | ||
| 294 | - } | ||
| 295 | - } | ||
| 296 | - break; | ||
| 297 | - | ||
| 298 | - case QPDFTokenizer::tt_string: | ||
| 299 | - { | ||
| 300 | - auto const& val = tokenizer.getValue(); | ||
| 301 | - if (decrypter) { | ||
| 302 | - if (b_contents) { | ||
| 303 | - frame->contents_string = val; | ||
| 304 | - frame->contents_offset = input->getLastOffset(); | ||
| 305 | - b_contents = false; | ||
| 306 | - } | ||
| 307 | - std::string s{val}; | ||
| 308 | - decrypter->decryptString(s); | ||
| 309 | - object = QPDF_String::create(s); | ||
| 310 | - } else { | ||
| 311 | - object = QPDF_String::create(val); | ||
| 312 | - } | 113 | + QTC::TC("qpdf", "QPDFParser treat word as string"); |
| 114 | + warn("unknown token while reading object; treating as string"); | ||
| 115 | + object = QPDF_String::create(value); | ||
| 313 | } | 116 | } |
| 314 | - break; | ||
| 315 | - | ||
| 316 | - default: | ||
| 317 | - warn("treating unknown token type as null while reading object"); | ||
| 318 | - if (tooManyBadTokens()) { | ||
| 319 | - return {QPDF_Null::create()}; | ||
| 320 | - } | ||
| 321 | - is_null = true; | ||
| 322 | - break; | ||
| 323 | } | 117 | } |
| 324 | - | ||
| 325 | - if (object == nullptr && !is_null) { | ||
| 326 | - throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object"); | 118 | + break; |
| 119 | + | ||
| 120 | + case QPDFTokenizer::tt_string: | ||
| 121 | + if (decrypter) { | ||
| 122 | + std::string s{tokenizer.getValue()}; | ||
| 123 | + decrypter->decryptString(s); | ||
| 124 | + object = QPDF_String::create(s); | ||
| 125 | + } else { | ||
| 126 | + object = QPDF_String::create(tokenizer.getValue()); | ||
| 327 | } | 127 | } |
| 128 | + break; | ||
| 328 | 129 | ||
| 329 | - switch (frame->state) { | ||
| 330 | - case st_dictionary: | ||
| 331 | - case st_array: | ||
| 332 | - if (is_null) { | ||
| 333 | - object = null_oh; | ||
| 334 | - // No need to set description for direct nulls - they probably will become implicit. | ||
| 335 | - } else if (!indirect_ref && !set_offset) { | ||
| 336 | - setDescription(object, input->getLastOffset()); | ||
| 337 | - } | ||
| 338 | - set_offset = true; | ||
| 339 | - frame->olist.push_back(object); | ||
| 340 | - break; | ||
| 341 | - | ||
| 342 | - case st_top: | ||
| 343 | - done = true; | ||
| 344 | - break; | ||
| 345 | - } | 130 | + default: |
| 131 | + warn("treating unknown token type as null while reading object"); | ||
| 132 | + return {QPDF_Null::create()}; | ||
| 346 | } | 133 | } |
| 347 | 134 | ||
| 348 | - if (is_null) { | ||
| 349 | - object = QPDF_Null::create(); | ||
| 350 | - } | ||
| 351 | - if (!set_offset) { | ||
| 352 | - setDescription(object, frame->offset); | ||
| 353 | - } | 135 | + setDescription(object, frame->offset); |
| 354 | return object; | 136 | return object; |
| 355 | } | 137 | } |
| 356 | 138 | ||
| @@ -363,18 +145,15 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -363,18 +145,15 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 363 | // logic error to be thrown from QPDF::inParse(). | 145 | // logic error to be thrown from QPDF::inParse(). |
| 364 | 146 | ||
| 365 | const static std::shared_ptr<QPDFObject> null_oh = QPDF_Null::create(); | 147 | const static std::shared_ptr<QPDFObject> null_oh = QPDF_Null::create(); |
| 366 | -// QPDF::ParseGuard pg(context); | ||
| 367 | - | ||
| 368 | -// empty = false; | ||
| 369 | 148 | ||
| 370 | std::shared_ptr<QPDFObject> object; | 149 | std::shared_ptr<QPDFObject> object; |
| 371 | bool set_offset = false; | 150 | bool set_offset = false; |
| 372 | 151 | ||
| 373 | -// std::vector<StackFrame> stack{{input, st_top},}; | ||
| 374 | bool done = false; | 152 | bool done = false; |
| 375 | bool b_contents = false; | 153 | bool b_contents = false; |
| 376 | bool is_null = false; | 154 | bool is_null = false; |
| 377 | frame = &stack.back(); // CHANGED | 155 | frame = &stack.back(); // CHANGED |
| 156 | + bad_count = 0; | ||
| 378 | 157 | ||
| 379 | while (!done) { | 158 | while (!done) { |
| 380 | bool indirect_ref = false; | 159 | bool indirect_ref = false; |
| @@ -389,19 +168,17 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -389,19 +168,17 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 389 | 168 | ||
| 390 | switch (tokenizer.getType()) { | 169 | switch (tokenizer.getType()) { |
| 391 | case QPDFTokenizer::tt_eof: | 170 | case QPDFTokenizer::tt_eof: |
| 392 | - if (stack.size() > 1) { | ||
| 393 | - warn("parse error while reading object"); | ||
| 394 | - } | 171 | + warn("parse error while reading object"); |
| 395 | if (content_stream) { | 172 | if (content_stream) { |
| 396 | // In content stream mode, leave object uninitialized to indicate EOF | 173 | // In content stream mode, leave object uninitialized to indicate EOF |
| 397 | return {}; | 174 | return {}; |
| 398 | } | 175 | } |
| 399 | - QTC::TC("qpdf", "QPDFParser eof in parse"); | 176 | + QTC::TC("qpdf", "QPDFParser eof in parseRemainder"); |
| 400 | warn("unexpected EOF"); | 177 | warn("unexpected EOF"); |
| 401 | return {QPDF_Null::create()}; | 178 | return {QPDF_Null::create()}; |
| 402 | 179 | ||
| 403 | case QPDFTokenizer::tt_bad: | 180 | case QPDFTokenizer::tt_bad: |
| 404 | - QTC::TC("qpdf", "QPDFParser bad token in parse"); | 181 | + QTC::TC("qpdf", "QPDFParser bad token in parseRemainder"); |
| 405 | if (tooManyBadTokens()) { | 182 | if (tooManyBadTokens()) { |
| 406 | return {QPDF_Null::create()}; | 183 | return {QPDF_Null::create()}; |
| 407 | } | 184 | } |
| @@ -410,7 +187,7 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -410,7 +187,7 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 410 | 187 | ||
| 411 | case QPDFTokenizer::tt_brace_open: | 188 | case QPDFTokenizer::tt_brace_open: |
| 412 | case QPDFTokenizer::tt_brace_close: | 189 | case QPDFTokenizer::tt_brace_close: |
| 413 | - QTC::TC("qpdf", "QPDFParser bad brace"); | 190 | + QTC::TC("qpdf", "QPDFParser bad brace in parseRemainder"); |
| 414 | warn("treating unexpected brace token as null"); | 191 | warn("treating unexpected brace token as null"); |
| 415 | if (tooManyBadTokens()) { | 192 | if (tooManyBadTokens()) { |
| 416 | return {QPDF_Null::create()}; | 193 | return {QPDF_Null::create()}; |
| @@ -434,7 +211,7 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -434,7 +211,7 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 434 | stack.pop_back(); | 211 | stack.pop_back(); |
| 435 | frame = &stack.back(); | 212 | frame = &stack.back(); |
| 436 | } else { | 213 | } else { |
| 437 | - QTC::TC("qpdf", "QPDFParser bad array close"); | 214 | + QTC::TC("qpdf", "QPDFParser bad array close in parseRemainder"); |
| 438 | warn("treating unexpected array close token as null"); | 215 | warn("treating unexpected array close token as null"); |
| 439 | if (tooManyBadTokens()) { | 216 | if (tooManyBadTokens()) { |
| 440 | return {QPDF_Null::create()}; | 217 | return {QPDF_Null::create()}; |
| @@ -519,7 +296,7 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -519,7 +296,7 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 519 | stack.pop_back(); | 296 | stack.pop_back(); |
| 520 | frame = &stack.back(); | 297 | frame = &stack.back(); |
| 521 | } else { | 298 | } else { |
| 522 | - QTC::TC("qpdf", "QPDFParser bad dictionary close"); | 299 | + QTC::TC("qpdf", "QPDFParser bad dictionary close in parseRemainder"); |
| 523 | warn("unexpected dictionary close token"); | 300 | warn("unexpected dictionary close token"); |
| 524 | if (tooManyBadTokens()) { | 301 | if (tooManyBadTokens()) { |
| 525 | return {QPDF_Null::create()}; | 302 | return {QPDF_Null::create()}; |
| @@ -582,7 +359,7 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -582,7 +359,7 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 582 | if (content_stream) { | 359 | if (content_stream) { |
| 583 | object = QPDF_Operator::create(value); | 360 | object = QPDF_Operator::create(value); |
| 584 | } else if ( | 361 | } else if ( |
| 585 | - value == "R" && frame->state != st_top && size >= 2 && frame->olist.back() && | 362 | + value == "R" && size >= 2 && frame->olist.back() && |
| 586 | frame->olist.back()->getTypeCode() == ::ot_integer && | 363 | frame->olist.back()->getTypeCode() == ::ot_integer && |
| 587 | !frame->olist.back()->getObjGen().isIndirect() && frame->olist.at(size - 2) && | 364 | !frame->olist.back()->getObjGen().isIndirect() && frame->olist.at(size - 2) && |
| 588 | frame->olist.at(size - 2)->getTypeCode() == ::ot_integer && | 365 | frame->olist.at(size - 2)->getTypeCode() == ::ot_integer && |
| @@ -607,14 +384,8 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -607,14 +384,8 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 607 | } | 384 | } |
| 608 | frame->olist.pop_back(); | 385 | frame->olist.pop_back(); |
| 609 | frame->olist.pop_back(); | 386 | frame->olist.pop_back(); |
| 610 | - } else if ((value == "endobj") && (frame->state == st_top)) { | ||
| 611 | - // We just saw endobj without having read anything. Treat this as a null and do | ||
| 612 | - // not move the input source's offset. | ||
| 613 | - is_null = true; | ||
| 614 | - input->seek(input->getLastOffset(), SEEK_SET); | ||
| 615 | -// empty = true; | ||
| 616 | } else { | 387 | } else { |
| 617 | - QTC::TC("qpdf", "QPDFParser treat word as string"); | 388 | + QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder"); |
| 618 | warn("unknown token while reading object; treating as string"); | 389 | warn("unknown token while reading object; treating as string"); |
| 619 | if (tooManyBadTokens()) { | 390 | if (tooManyBadTokens()) { |
| 620 | return {QPDF_Null::create()}; | 391 | return {QPDF_Null::create()}; |
qpdf/qpdf.testcov
| @@ -57,11 +57,14 @@ QPDF trailer lacks size 0 | @@ -57,11 +57,14 @@ QPDF trailer lacks size 0 | ||
| 57 | QPDF trailer size not integer 0 | 57 | QPDF trailer size not integer 0 |
| 58 | QPDF trailer prev not integer 0 | 58 | QPDF trailer prev not integer 0 |
| 59 | QPDFParser bad brace 0 | 59 | QPDFParser bad brace 0 |
| 60 | +QPDFParser bad brace in parseRemainder 0 | ||
| 60 | QPDFParser bad array close 0 | 61 | QPDFParser bad array close 0 |
| 62 | +QPDFParser bad array close in parseRemainder 0 | ||
| 61 | QPDF stream without length 0 | 63 | QPDF stream without length 0 |
| 62 | QPDF stream length not integer 0 | 64 | QPDF stream length not integer 0 |
| 63 | QPDF missing endstream 0 | 65 | QPDF missing endstream 0 |
| 64 | QPDFParser bad dictionary close 0 | 66 | QPDFParser bad dictionary close 0 |
| 67 | +QPDFParser bad dictionary close in parseRemainder 0 | ||
| 65 | QPDF can't find xref 0 | 68 | QPDF can't find xref 0 |
| 66 | QPDFTokenizer bad ) 0 | 69 | QPDFTokenizer bad ) 0 |
| 67 | QPDFTokenizer bad > 0 | 70 | QPDFTokenizer bad > 0 |
| @@ -258,6 +261,7 @@ QPDFParser indirect with 0 objid 0 | @@ -258,6 +261,7 @@ QPDFParser indirect with 0 objid 0 | ||
| 258 | QPDF object id 0 0 | 261 | QPDF object id 0 0 |
| 259 | QPDF recursion loop in resolve 0 | 262 | QPDF recursion loop in resolve 0 |
| 260 | QPDFParser treat word as string 0 | 263 | QPDFParser treat word as string 0 |
| 264 | +QPDFParser treat word as string in parseRemainder 0 | ||
| 261 | QPDFParser found fake 1 | 265 | QPDFParser found fake 1 |
| 262 | QPDFParser no val for last key 0 | 266 | QPDFParser no val for last key 0 |
| 263 | QPDF resolve failure to null 0 | 267 | QPDF resolve failure to null 0 |
| @@ -289,7 +293,9 @@ QPDFObjectHandle coalesce called on stream 0 | @@ -289,7 +293,9 @@ QPDFObjectHandle coalesce called on stream 0 | ||
| 289 | QPDFObjectHandle coalesce provide stream data 0 | 293 | QPDFObjectHandle coalesce provide stream data 0 |
| 290 | QPDF_Stream bad token at end during normalize 0 | 294 | QPDF_Stream bad token at end during normalize 0 |
| 291 | QPDFParser bad token in parse 0 | 295 | QPDFParser bad token in parse 0 |
| 296 | +QPDFParser bad token in parseRemainder 0 | ||
| 292 | QPDFParser eof in parse 0 | 297 | QPDFParser eof in parse 0 |
| 298 | +QPDFParser eof in parseRemainder 0 | ||
| 293 | QPDFObjectHandle array bounds 0 | 299 | QPDFObjectHandle array bounds 0 |
| 294 | QPDFObjectHandle boolean returning false 0 | 300 | QPDFObjectHandle boolean returning false 0 |
| 295 | QPDFObjectHandle integer returning 0 0 | 301 | QPDFObjectHandle integer returning 0 0 |
qpdf/qtest/parsing.test
| @@ -17,7 +17,7 @@ my $td = new TestDriver('parsing'); | @@ -17,7 +17,7 @@ my $td = new TestDriver('parsing'); | ||
| 17 | my $n_tests = 17; | 17 | my $n_tests = 17; |
| 18 | 18 | ||
| 19 | $td->runtest("parse objects from string", | 19 | $td->runtest("parse objects from string", |
| 20 | - {$td->COMMAND => "test_driver 31 good1.qdf"}, | 20 | + {$td->COMMAND => "test_driver 31 bad39.qdf"}, |
| 21 | {$td->FILE => "parse-object.out", $td->EXIT_STATUS => 0}, | 21 | {$td->FILE => "parse-object.out", $td->EXIT_STATUS => 0}, |
| 22 | $td->NORMALIZE_NEWLINES); | 22 | $td->NORMALIZE_NEWLINES); |
| 23 | $td->runtest("EOF terminating literal tokens", | 23 | $td->runtest("EOF terminating literal tokens", |
qpdf/qtest/qpdf/bad39.qdf
0 โ 100644
| 1 | +%PDF-1.3 | ||
| 2 | +%ยฟรทยขรพ | ||
| 3 | +%QDF-1.0 | ||
| 4 | + | ||
| 5 | +%% Original object ID: 1 0 | ||
| 6 | +1 0 obj | ||
| 7 | +<< | ||
| 8 | + /Pages 2 0 R | ||
| 9 | + /Type /Catalog | ||
| 10 | +>> | ||
| 11 | +endobj | ||
| 12 | + | ||
| 13 | +%% Original object ID: 2 0 | ||
| 14 | +2 0 obj | ||
| 15 | +<< | ||
| 16 | + /Count 1 | ||
| 17 | + /Kids [ | ||
| 18 | + 3 0 R | ||
| 19 | + ] | ||
| 20 | + /Type /Pages | ||
| 21 | +>> | ||
| 22 | +endobj | ||
| 23 | + | ||
| 24 | +%% Page 1 | ||
| 25 | +%% Original object ID: 3 0 | ||
| 26 | +3 0 obj | ||
| 27 | +<< | ||
| 28 | + /Contents 4 0 R | ||
| 29 | + /MediaBox [ | ||
| 30 | + 0 | ||
| 31 | + 0 | ||
| 32 | + 612 | ||
| 33 | + 792 | ||
| 34 | + ] | ||
| 35 | + /Parent 2 0 R | ||
| 36 | + /Resources << | ||
| 37 | + /Font << | ||
| 38 | + /F1 6 0 R | ||
| 39 | + >> | ||
| 40 | + /ProcSet 7 0 R | ||
| 41 | + >> | ||
| 42 | + /Type /Page | ||
| 43 | +>> | ||
| 44 | +endobj | ||
| 45 | + | ||
| 46 | +%% Contents for page 1 | ||
| 47 | +%% Original object ID: 4 0 | ||
| 48 | +4 0 obj | ||
| 49 | +<< | ||
| 50 | + /Length 5 0 R | ||
| 51 | +>> | ||
| 52 | +stream | ||
| 53 | +BT | ||
| 54 | + /F1 24 Tf | ||
| 55 | + 72 720 Td | ||
| 56 | + (Potato) Tj | ||
| 57 | +ET | ||
| 58 | +endstream | ||
| 59 | +endobj | ||
| 60 | + | ||
| 61 | +5 0 obj | ||
| 62 | +44 | ||
| 63 | +endobj | ||
| 64 | + | ||
| 65 | +%% Original object ID: 6 0 | ||
| 66 | +6 0 obj | ||
| 67 | +<< | ||
| 68 | + /BaseFont /Helvetica | ||
| 69 | + /Encoding /WinAnsiEncoding | ||
| 70 | + /Name /F1 | ||
| 71 | + /Subtype /Type1 | ||
| 72 | + /Type /Font | ||
| 73 | +>> | ||
| 74 | +endobj | ||
| 75 | + | ||
| 76 | +%% Original object ID: 5 0 | ||
| 77 | +7 0 obj | ||
| 78 | +[ | ||
| 79 | |||
| 80 | + /Text | ||
| 81 | +] | ||
| 82 | +endobj | ||
| 83 | + | ||
| 84 | +xref | ||
| 85 | +0 8 | ||
| 86 | +0000000000 65535 f | ||
| 87 | +0000000052 00000 n | ||
| 88 | +0000000133 00000 n | ||
| 89 | +0000000242 00000 n | ||
| 90 | +0000000484 00000 n | ||
| 91 | +0000000583 00000 n | ||
| 92 | +0000000629 00000 n | ||
| 93 | +0000001113 00000 n | ||
| 94 | +trailer << | ||
| 95 | + /Root 1 0 R | ||
| 96 | + /Size 8 | ||
| 97 | + /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] | ||
| 98 | +>> | ||
| 99 | +startxref | ||
| 100 | +809 | ||
| 101 | +%%EOF | ||
| 102 | +7 0 obj |
qpdf/qtest/qpdf/parse-object.out
| @@ -2,4 +2,10 @@ | @@ -2,4 +2,10 @@ | ||
| 2 | logic error parsing indirect: QPDFObjectHandle::parse called without context on an object with indirect references | 2 | logic error parsing indirect: QPDFObjectHandle::parse called without context on an object with indirect references |
| 3 | trailing data: parsed object (trailing test): trailing data found parsing object from string | 3 | trailing data: parsed object (trailing test): trailing data found parsing object from string |
| 4 | WARNING: parsed object (offset 9): unknown token while reading object; treating as string | 4 | WARNING: parsed object (offset 9): unknown token while reading object; treating as string |
| 5 | +WARNING: parsed object: treating unexpected brace token as null | ||
| 6 | +WARNING: parsed object: treating unexpected brace token as null | ||
| 7 | +WARNING: parsed object: unexpected dictionary close token | ||
| 8 | +WARNING: bad39.qdf (object 7 0, offset 1121): unexpected EOF | ||
| 9 | +WARNING: bad39.qdf (object 7 0, offset 1121): expected endobj | ||
| 10 | +WARNING: bad39.qdf (object 7 0, offset 1121): EOF after endobj | ||
| 5 | test 31 done | 11 | test 31 done |
qpdf/test_driver.cc
| @@ -1195,6 +1195,13 @@ test_31(QPDF& pdf, char const* arg2) | @@ -1195,6 +1195,13 @@ test_31(QPDF& pdf, char const* arg2) | ||
| 1195 | // mistakenly parsed as an indirect object. | 1195 | // mistakenly parsed as an indirect object. |
| 1196 | assert(QPDFObjectHandle::parse(&pdf, "[5 0 R 0 R /X]").unparse() == "[ 5 0 R 0 (R) /X ]"); | 1196 | assert(QPDFObjectHandle::parse(&pdf, "[5 0 R 0 R /X]").unparse() == "[ 5 0 R 0 (R) /X ]"); |
| 1197 | assert(QPDFObjectHandle::parse(&pdf, "[1 0 R]", "indirect test").unparse() == "[ 1 0 R ]"); | 1197 | assert(QPDFObjectHandle::parse(&pdf, "[1 0 R]", "indirect test").unparse() == "[ 1 0 R ]"); |
| 1198 | + // TC:QPDFParser bad brace | ||
| 1199 | + assert(QPDFObjectHandle::parse(&pdf, "}").unparse() == "null"); | ||
| 1200 | + assert(QPDFObjectHandle::parse(&pdf, "{").unparse() == "null"); | ||
| 1201 | + // TC:QPDFParser bad dictionary close | ||
| 1202 | + assert(QPDFObjectHandle::parse(&pdf, ">>").unparse() == "null"); | ||
| 1203 | + // TC:QPDFParser eof in parse | ||
| 1204 | + assert(QPDFObjectHandle::parse(&pdf, "[7 0 R]").getArrayItem(0).isNull()); | ||
| 1198 | } | 1205 | } |
| 1199 | 1206 | ||
| 1200 | static void | 1207 | static void |