Commit 0328d8723793fa8c7f3cb4d243bfc7ed051e85bb
1 parent
1548b8d8
In QPDFParser::parse refactor parsing of indirect references
Showing
3 changed files
with
75 additions
and
41 deletions
libqpdf/QPDFParser.cc
| @@ -143,6 +143,51 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -143,6 +143,51 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 143 | } | 143 | } |
| 144 | ++good_count; // optimistically | 144 | ++good_count; // optimistically |
| 145 | 145 | ||
| 146 | + if (int_count != 0) { | ||
| 147 | + // Special handling of indirect references. Treat integer tokens as part of an indirect | ||
| 148 | + // reference until proven otherwise. | ||
| 149 | + if (tokenizer.getType() == QPDFTokenizer::tt_integer) { | ||
| 150 | + if (++int_count > 2) { | ||
| 151 | + // Process the oldest buffered integer. | ||
| 152 | + addInt(int_count); | ||
| 153 | + } | ||
| 154 | + last_offset_buffer[int_count % 2] = input->getLastOffset(); | ||
| 155 | + int_buffer[int_count % 2] = QUtil::string_to_ll(tokenizer.getValue().c_str()); | ||
| 156 | + continue; | ||
| 157 | + | ||
| 158 | + } else if ( | ||
| 159 | + int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word && | ||
| 160 | + tokenizer.getValue() == "R") { | ||
| 161 | + if (context == nullptr) { | ||
| 162 | + QTC::TC("qpdf", "QPDFParser indirect without context"); | ||
| 163 | + throw std::logic_error("QPDFParser::parse called without context on an object " | ||
| 164 | + "with indirect references"); | ||
| 165 | + } | ||
| 166 | + auto ref_og = QPDFObjGen( | ||
| 167 | + QIntC::to_int(int_buffer[(int_count - 1) % 2]), | ||
| 168 | + QIntC::to_int(int_buffer[(int_count) % 2])); | ||
| 169 | + if (ref_og.isIndirect()) { | ||
| 170 | + // This action has the desirable side effect of causing dangling references | ||
| 171 | + // (references to indirect objects that don't appear in the PDF) in any parsed | ||
| 172 | + // object to appear in the object cache. | ||
| 173 | + add(std::move(context->getObject(ref_og).obj)); | ||
| 174 | + } else { | ||
| 175 | + QTC::TC("qpdf", "QPDFParser indirect with 0 objid"); | ||
| 176 | + addNull(); | ||
| 177 | + } | ||
| 178 | + int_count = 0; | ||
| 179 | + continue; | ||
| 180 | + | ||
| 181 | + } else if (int_count > 0) { | ||
| 182 | + // Process the buffered integers before processing the current token. | ||
| 183 | + if (int_count > 1) { | ||
| 184 | + addInt(int_count - 1); | ||
| 185 | + } | ||
| 186 | + addInt(int_count); | ||
| 187 | + int_count = 0; | ||
| 188 | + } | ||
| 189 | + } | ||
| 190 | + | ||
| 146 | switch (tokenizer.getType()) { | 191 | switch (tokenizer.getType()) { |
| 147 | case QPDFTokenizer::tt_eof: | 192 | case QPDFTokenizer::tt_eof: |
| 148 | warn("parse error while reading object"); | 193 | warn("parse error while reading object"); |
| @@ -304,7 +349,14 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -304,7 +349,14 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 304 | continue; | 349 | continue; |
| 305 | 350 | ||
| 306 | case QPDFTokenizer::tt_integer: | 351 | case QPDFTokenizer::tt_integer: |
| 307 | - addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str())); | 352 | + if (!content_stream) { |
| 353 | + // Buffer token in case it is part of an indirect reference. | ||
| 354 | + last_offset_buffer[1] = input->getLastOffset(); | ||
| 355 | + int_buffer[1] = QUtil::string_to_ll(tokenizer.getValue().c_str()); | ||
| 356 | + int_count = 1; | ||
| 357 | + } else { | ||
| 358 | + addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str())); | ||
| 359 | + } | ||
| 308 | continue; | 360 | continue; |
| 309 | 361 | ||
| 310 | case QPDFTokenizer::tt_real: | 362 | case QPDFTokenizer::tt_real: |
| @@ -325,46 +377,15 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -325,46 +377,15 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 325 | continue; | 377 | continue; |
| 326 | 378 | ||
| 327 | case QPDFTokenizer::tt_word: | 379 | case QPDFTokenizer::tt_word: |
| 328 | - { | ||
| 329 | - auto const& value = tokenizer.getValue(); | ||
| 330 | - auto size = frame->olist.size(); | ||
| 331 | - if (content_stream) { | ||
| 332 | - addScalar<QPDF_Operator>(value); | ||
| 333 | - } else if ( | ||
| 334 | - value == "R" && size >= 2 && frame->olist.back() && | ||
| 335 | - frame->olist.back()->getTypeCode() == ::ot_integer && | ||
| 336 | - !frame->olist.back()->getObjGen().isIndirect() && frame->olist.at(size - 2) && | ||
| 337 | - frame->olist.at(size - 2)->getTypeCode() == ::ot_integer && | ||
| 338 | - !frame->olist.at(size - 2)->getObjGen().isIndirect()) { | ||
| 339 | - if (context == nullptr) { | ||
| 340 | - QTC::TC("qpdf", "QPDFParser indirect without context"); | ||
| 341 | - throw std::logic_error("QPDFObjectHandle::parse called without context on " | ||
| 342 | - "an object with indirect references"); | ||
| 343 | - } | ||
| 344 | - auto ref_og = QPDFObjGen( | ||
| 345 | - QPDFObjectHandle(frame->olist.at(size - 2)).getIntValueAsInt(), | ||
| 346 | - QPDFObjectHandle(frame->olist.back()).getIntValueAsInt()); | ||
| 347 | - if (ref_og.isIndirect()) { | ||
| 348 | - // This action has the desirable side effect of causing dangling references | ||
| 349 | - // (references to indirect objects that don't appear in the PDF) in any | ||
| 350 | - // parsed object to appear in the object cache. | ||
| 351 | - frame->olist.pop_back(); | ||
| 352 | - frame->olist.pop_back(); | ||
| 353 | - add(std::move(context->getObject(ref_og).obj)); | ||
| 354 | - } else { | ||
| 355 | - QTC::TC("qpdf", "QPDFParser indirect with 0 objid"); | ||
| 356 | - frame->olist.pop_back(); | ||
| 357 | - frame->olist.pop_back(); | ||
| 358 | - addNull(); | ||
| 359 | - } | ||
| 360 | - } else { | ||
| 361 | - QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder"); | ||
| 362 | - warn("unknown token while reading object; treating as string"); | ||
| 363 | - if (tooManyBadTokens()) { | ||
| 364 | - return {QPDF_Null::create()}; | ||
| 365 | - } | ||
| 366 | - addScalar<QPDF_String>(value); | 380 | + if (content_stream) { |
| 381 | + addScalar<QPDF_Operator>(tokenizer.getValue()); | ||
| 382 | + } else { | ||
| 383 | + QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder"); | ||
| 384 | + warn("unknown token while reading object; treating as string"); | ||
| 385 | + if (tooManyBadTokens()) { | ||
| 386 | + return {QPDF_Null::create()}; | ||
| 367 | } | 387 | } |
| 388 | + addScalar<QPDF_String>(tokenizer.getValue()); | ||
| 368 | } | 389 | } |
| 369 | continue; | 390 | continue; |
| 370 | 391 | ||
| @@ -412,6 +433,14 @@ QPDFParser::addNull() | @@ -412,6 +433,14 @@ QPDFParser::addNull() | ||
| 412 | ++frame->null_count; | 433 | ++frame->null_count; |
| 413 | } | 434 | } |
| 414 | 435 | ||
| 436 | +void | ||
| 437 | +QPDFParser::addInt(int count) | ||
| 438 | +{ | ||
| 439 | + auto obj = QPDF_Integer::create(int_buffer[count % 2]); | ||
| 440 | + obj->setDescription(context, description, last_offset_buffer[count % 2]); | ||
| 441 | + add(std::move(obj)); | ||
| 442 | +} | ||
| 443 | + | ||
| 415 | template <typename T, typename... Args> | 444 | template <typename T, typename... Args> |
| 416 | void | 445 | void |
| 417 | QPDFParser::addScalar(Args&&... args) | 446 | QPDFParser::addScalar(Args&&... args) |
libqpdf/qpdf/QPDFParser.hh
| @@ -53,6 +53,7 @@ class QPDFParser | @@ -53,6 +53,7 @@ class QPDFParser | ||
| 53 | QPDFObjectHandle parseRemainder(bool content_stream); | 53 | QPDFObjectHandle parseRemainder(bool content_stream); |
| 54 | void add(std::shared_ptr<QPDFObject>&& obj); | 54 | void add(std::shared_ptr<QPDFObject>&& obj); |
| 55 | void addNull(); | 55 | void addNull(); |
| 56 | + void addInt(int count); | ||
| 56 | template <typename T, typename... Args> | 57 | template <typename T, typename... Args> |
| 57 | void addScalar(Args&&... args); | 58 | void addScalar(Args&&... args); |
| 58 | bool tooManyBadTokens(); | 59 | bool tooManyBadTokens(); |
| @@ -78,6 +79,10 @@ class QPDFParser | @@ -78,6 +79,10 @@ class QPDFParser | ||
| 78 | int good_count = 0; | 79 | int good_count = 0; |
| 79 | // Start offset including any leading whitespace. | 80 | // Start offset including any leading whitespace. |
| 80 | qpdf_offset_t start; | 81 | qpdf_offset_t start; |
| 82 | + // Number of successive integer tokens. | ||
| 83 | + int int_count = 0; | ||
| 84 | + long long int_buffer[2]{0, 0}; | ||
| 85 | + qpdf_offset_t last_offset_buffer[2]{0, 0}; | ||
| 81 | 86 | ||
| 82 | }; | 87 | }; |
| 83 | 88 |
qpdf/qtest/qpdf/parse-object.out
| 1 | [ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ] | 1 | [ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ] |
| 2 | -logic error parsing indirect: QPDFObjectHandle::parse called without context on an object with indirect references | 2 | +logic error parsing indirect: QPDFParser::parse called without context on an object with indirect references |
| 3 | trailing data: parsed object (trailing test): trailing data found parsing object from string | 3 | trailing data: parsed object (trailing test): trailing data found parsing object from string |
| 4 | WARNING: parsed object (offset 9): unknown token while reading object; treating as string | 4 | WARNING: parsed object (offset 9): unknown token while reading object; treating as string |
| 5 | WARNING: parsed object: treating unexpected brace token as null | 5 | WARNING: parsed object: treating unexpected brace token as null |