Commit 0328d8723793fa8c7f3cb4d243bfc7ed051e85bb
1 parent
1548b8d8
In QPDFParser::parse refactor parsing of indirect references
Showing
3 changed files
with
75 additions
and
41 deletions
libqpdf/QPDFParser.cc
| ... | ... | @@ -143,6 +143,51 @@ QPDFParser::parseRemainder(bool content_stream) |
| 143 | 143 | } |
| 144 | 144 | ++good_count; // optimistically |
| 145 | 145 | |
| 146 | + if (int_count != 0) { | |
| 147 | + // Special handling of indirect references. Treat integer tokens as part of an indirect | |
| 148 | + // reference until proven otherwise. | |
| 149 | + if (tokenizer.getType() == QPDFTokenizer::tt_integer) { | |
| 150 | + if (++int_count > 2) { | |
| 151 | + // Process the oldest buffered integer. | |
| 152 | + addInt(int_count); | |
| 153 | + } | |
| 154 | + last_offset_buffer[int_count % 2] = input->getLastOffset(); | |
| 155 | + int_buffer[int_count % 2] = QUtil::string_to_ll(tokenizer.getValue().c_str()); | |
| 156 | + continue; | |
| 157 | + | |
| 158 | + } else if ( | |
| 159 | + int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word && | |
| 160 | + tokenizer.getValue() == "R") { | |
| 161 | + if (context == nullptr) { | |
| 162 | + QTC::TC("qpdf", "QPDFParser indirect without context"); | |
| 163 | + throw std::logic_error("QPDFParser::parse called without context on an object " | |
| 164 | + "with indirect references"); | |
| 165 | + } | |
| 166 | + auto ref_og = QPDFObjGen( | |
| 167 | + QIntC::to_int(int_buffer[(int_count - 1) % 2]), | |
| 168 | + QIntC::to_int(int_buffer[(int_count) % 2])); | |
| 169 | + if (ref_og.isIndirect()) { | |
| 170 | + // This action has the desirable side effect of causing dangling references | |
| 171 | + // (references to indirect objects that don't appear in the PDF) in any parsed | |
| 172 | + // object to appear in the object cache. | |
| 173 | + add(std::move(context->getObject(ref_og).obj)); | |
| 174 | + } else { | |
| 175 | + QTC::TC("qpdf", "QPDFParser indirect with 0 objid"); | |
| 176 | + addNull(); | |
| 177 | + } | |
| 178 | + int_count = 0; | |
| 179 | + continue; | |
| 180 | + | |
| 181 | + } else if (int_count > 0) { | |
| 182 | + // Process the buffered integers before processing the current token. | |
| 183 | + if (int_count > 1) { | |
| 184 | + addInt(int_count - 1); | |
| 185 | + } | |
| 186 | + addInt(int_count); | |
| 187 | + int_count = 0; | |
| 188 | + } | |
| 189 | + } | |
| 190 | + | |
| 146 | 191 | switch (tokenizer.getType()) { |
| 147 | 192 | case QPDFTokenizer::tt_eof: |
| 148 | 193 | warn("parse error while reading object"); |
| ... | ... | @@ -304,7 +349,14 @@ QPDFParser::parseRemainder(bool content_stream) |
| 304 | 349 | continue; |
| 305 | 350 | |
| 306 | 351 | case QPDFTokenizer::tt_integer: |
| 307 | - addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str())); | |
| 352 | + if (!content_stream) { | |
| 353 | + // Buffer token in case it is part of an indirect reference. | |
| 354 | + last_offset_buffer[1] = input->getLastOffset(); | |
| 355 | + int_buffer[1] = QUtil::string_to_ll(tokenizer.getValue().c_str()); | |
| 356 | + int_count = 1; | |
| 357 | + } else { | |
| 358 | + addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str())); | |
| 359 | + } | |
| 308 | 360 | continue; |
| 309 | 361 | |
| 310 | 362 | case QPDFTokenizer::tt_real: |
| ... | ... | @@ -325,46 +377,15 @@ QPDFParser::parseRemainder(bool content_stream) |
| 325 | 377 | continue; |
| 326 | 378 | |
| 327 | 379 | case QPDFTokenizer::tt_word: |
| 328 | - { | |
| 329 | - auto const& value = tokenizer.getValue(); | |
| 330 | - auto size = frame->olist.size(); | |
| 331 | - if (content_stream) { | |
| 332 | - addScalar<QPDF_Operator>(value); | |
| 333 | - } else if ( | |
| 334 | - value == "R" && size >= 2 && frame->olist.back() && | |
| 335 | - frame->olist.back()->getTypeCode() == ::ot_integer && | |
| 336 | - !frame->olist.back()->getObjGen().isIndirect() && frame->olist.at(size - 2) && | |
| 337 | - frame->olist.at(size - 2)->getTypeCode() == ::ot_integer && | |
| 338 | - !frame->olist.at(size - 2)->getObjGen().isIndirect()) { | |
| 339 | - if (context == nullptr) { | |
| 340 | - QTC::TC("qpdf", "QPDFParser indirect without context"); | |
| 341 | - throw std::logic_error("QPDFObjectHandle::parse called without context on " | |
| 342 | - "an object with indirect references"); | |
| 343 | - } | |
| 344 | - auto ref_og = QPDFObjGen( | |
| 345 | - QPDFObjectHandle(frame->olist.at(size - 2)).getIntValueAsInt(), | |
| 346 | - QPDFObjectHandle(frame->olist.back()).getIntValueAsInt()); | |
| 347 | - if (ref_og.isIndirect()) { | |
| 348 | - // This action has the desirable side effect of causing dangling references | |
| 349 | - // (references to indirect objects that don't appear in the PDF) in any | |
| 350 | - // parsed object to appear in the object cache. | |
| 351 | - frame->olist.pop_back(); | |
| 352 | - frame->olist.pop_back(); | |
| 353 | - add(std::move(context->getObject(ref_og).obj)); | |
| 354 | - } else { | |
| 355 | - QTC::TC("qpdf", "QPDFParser indirect with 0 objid"); | |
| 356 | - frame->olist.pop_back(); | |
| 357 | - frame->olist.pop_back(); | |
| 358 | - addNull(); | |
| 359 | - } | |
| 360 | - } else { | |
| 361 | - QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder"); | |
| 362 | - warn("unknown token while reading object; treating as string"); | |
| 363 | - if (tooManyBadTokens()) { | |
| 364 | - return {QPDF_Null::create()}; | |
| 365 | - } | |
| 366 | - addScalar<QPDF_String>(value); | |
| 380 | + if (content_stream) { | |
| 381 | + addScalar<QPDF_Operator>(tokenizer.getValue()); | |
| 382 | + } else { | |
| 383 | + QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder"); | |
| 384 | + warn("unknown token while reading object; treating as string"); | |
| 385 | + if (tooManyBadTokens()) { | |
| 386 | + return {QPDF_Null::create()}; | |
| 367 | 387 | } |
| 388 | + addScalar<QPDF_String>(tokenizer.getValue()); | |
| 368 | 389 | } |
| 369 | 390 | continue; |
| 370 | 391 | |
| ... | ... | @@ -412,6 +433,14 @@ QPDFParser::addNull() |
| 412 | 433 | ++frame->null_count; |
| 413 | 434 | } |
| 414 | 435 | |
| 436 | +void | |
| 437 | +QPDFParser::addInt(int count) | |
| 438 | +{ | |
| 439 | + auto obj = QPDF_Integer::create(int_buffer[count % 2]); | |
| 440 | + obj->setDescription(context, description, last_offset_buffer[count % 2]); | |
| 441 | + add(std::move(obj)); | |
| 442 | +} | |
| 443 | + | |
| 415 | 444 | template <typename T, typename... Args> |
| 416 | 445 | void |
| 417 | 446 | QPDFParser::addScalar(Args&&... args) | ... | ... |
libqpdf/qpdf/QPDFParser.hh
| ... | ... | @@ -53,6 +53,7 @@ class QPDFParser |
| 53 | 53 | QPDFObjectHandle parseRemainder(bool content_stream); |
| 54 | 54 | void add(std::shared_ptr<QPDFObject>&& obj); |
| 55 | 55 | void addNull(); |
| 56 | + void addInt(int count); | |
| 56 | 57 | template <typename T, typename... Args> |
| 57 | 58 | void addScalar(Args&&... args); |
| 58 | 59 | bool tooManyBadTokens(); |
| ... | ... | @@ -78,6 +79,10 @@ class QPDFParser |
| 78 | 79 | int good_count = 0; |
| 79 | 80 | // Start offset including any leading whitespace. |
| 80 | 81 | qpdf_offset_t start; |
| 82 | + // Number of successive integer tokens. | |
| 83 | + int int_count = 0; | |
| 84 | + long long int_buffer[2]{0, 0}; | |
| 85 | + qpdf_offset_t last_offset_buffer[2]{0, 0}; | |
| 81 | 86 | |
| 82 | 87 | }; |
| 83 | 88 | ... | ... |
qpdf/qtest/qpdf/parse-object.out
| 1 | 1 | [ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ] |
| 2 | -logic error parsing indirect: QPDFObjectHandle::parse called without context on an object with indirect references | |
| 2 | +logic error parsing indirect: QPDFParser::parse called without context on an object with indirect references | |
| 3 | 3 | trailing data: parsed object (trailing test): trailing data found parsing object from string |
| 4 | 4 | WARNING: parsed object (offset 9): unknown token while reading object; treating as string |
| 5 | 5 | WARNING: parsed object: treating unexpected brace token as null | ... | ... |