Commit 0328d8723793fa8c7f3cb4d243bfc7ed051e85bb

Authored by m-holger
1 parent 1548b8d8

In QPDFParser::parse refactor parsing of indirect references

libqpdf/QPDFParser.cc
... ... @@ -143,6 +143,51 @@ QPDFParser::parseRemainder(bool content_stream)
143 143 }
144 144 ++good_count; // optimistically
145 145  
  146 + if (int_count != 0) {
  147 + // Special handling of indirect references. Treat integer tokens as part of an indirect
  148 + // reference until proven otherwise.
  149 + if (tokenizer.getType() == QPDFTokenizer::tt_integer) {
  150 + if (++int_count > 2) {
  151 + // Process the oldest buffered integer.
  152 + addInt(int_count);
  153 + }
  154 + last_offset_buffer[int_count % 2] = input->getLastOffset();
  155 + int_buffer[int_count % 2] = QUtil::string_to_ll(tokenizer.getValue().c_str());
  156 + continue;
  157 +
  158 + } else if (
  159 + int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word &&
  160 + tokenizer.getValue() == "R") {
  161 + if (context == nullptr) {
  162 + QTC::TC("qpdf", "QPDFParser indirect without context");
  163 + throw std::logic_error("QPDFParser::parse called without context on an object "
  164 + "with indirect references");
  165 + }
  166 + auto ref_og = QPDFObjGen(
  167 + QIntC::to_int(int_buffer[(int_count - 1) % 2]),
  168 + QIntC::to_int(int_buffer[(int_count) % 2]));
  169 + if (ref_og.isIndirect()) {
  170 + // This action has the desirable side effect of causing dangling references
  171 + // (references to indirect objects that don't appear in the PDF) in any parsed
  172 + // object to appear in the object cache.
  173 + add(std::move(context->getObject(ref_og).obj));
  174 + } else {
  175 + QTC::TC("qpdf", "QPDFParser indirect with 0 objid");
  176 + addNull();
  177 + }
  178 + int_count = 0;
  179 + continue;
  180 +
  181 + } else if (int_count > 0) {
  182 + // Process the buffered integers before processing the current token.
  183 + if (int_count > 1) {
  184 + addInt(int_count - 1);
  185 + }
  186 + addInt(int_count);
  187 + int_count = 0;
  188 + }
  189 + }
  190 +
146 191 switch (tokenizer.getType()) {
147 192 case QPDFTokenizer::tt_eof:
148 193 warn("parse error while reading object");
... ... @@ -304,7 +349,14 @@ QPDFParser::parseRemainder(bool content_stream)
304 349 continue;
305 350  
306 351 case QPDFTokenizer::tt_integer:
307   - addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str()));
  352 + if (!content_stream) {
  353 + // Buffer token in case it is part of an indirect reference.
  354 + last_offset_buffer[1] = input->getLastOffset();
  355 + int_buffer[1] = QUtil::string_to_ll(tokenizer.getValue().c_str());
  356 + int_count = 1;
  357 + } else {
  358 + addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str()));
  359 + }
308 360 continue;
309 361  
310 362 case QPDFTokenizer::tt_real:
... ... @@ -325,46 +377,15 @@ QPDFParser::parseRemainder(bool content_stream)
325 377 continue;
326 378  
327 379 case QPDFTokenizer::tt_word:
328   - {
329   - auto const& value = tokenizer.getValue();
330   - auto size = frame->olist.size();
331   - if (content_stream) {
332   - addScalar<QPDF_Operator>(value);
333   - } else if (
334   - value == "R" && size >= 2 && frame->olist.back() &&
335   - frame->olist.back()->getTypeCode() == ::ot_integer &&
336   - !frame->olist.back()->getObjGen().isIndirect() && frame->olist.at(size - 2) &&
337   - frame->olist.at(size - 2)->getTypeCode() == ::ot_integer &&
338   - !frame->olist.at(size - 2)->getObjGen().isIndirect()) {
339   - if (context == nullptr) {
340   - QTC::TC("qpdf", "QPDFParser indirect without context");
341   - throw std::logic_error("QPDFObjectHandle::parse called without context on "
342   - "an object with indirect references");
343   - }
344   - auto ref_og = QPDFObjGen(
345   - QPDFObjectHandle(frame->olist.at(size - 2)).getIntValueAsInt(),
346   - QPDFObjectHandle(frame->olist.back()).getIntValueAsInt());
347   - if (ref_og.isIndirect()) {
348   - // This action has the desirable side effect of causing dangling references
349   - // (references to indirect objects that don't appear in the PDF) in any
350   - // parsed object to appear in the object cache.
351   - frame->olist.pop_back();
352   - frame->olist.pop_back();
353   - add(std::move(context->getObject(ref_og).obj));
354   - } else {
355   - QTC::TC("qpdf", "QPDFParser indirect with 0 objid");
356   - frame->olist.pop_back();
357   - frame->olist.pop_back();
358   - addNull();
359   - }
360   - } else {
361   - QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder");
362   - warn("unknown token while reading object; treating as string");
363   - if (tooManyBadTokens()) {
364   - return {QPDF_Null::create()};
365   - }
366   - addScalar<QPDF_String>(value);
  380 + if (content_stream) {
  381 + addScalar<QPDF_Operator>(tokenizer.getValue());
  382 + } else {
  383 + QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder");
  384 + warn("unknown token while reading object; treating as string");
  385 + if (tooManyBadTokens()) {
  386 + return {QPDF_Null::create()};
367 387 }
  388 + addScalar<QPDF_String>(tokenizer.getValue());
368 389 }
369 390 continue;
370 391  
... ... @@ -412,6 +433,14 @@ QPDFParser::addNull()
412 433 ++frame->null_count;
413 434 }
414 435  
  436 +void
  437 +QPDFParser::addInt(int count)
  438 +{
  439 + auto obj = QPDF_Integer::create(int_buffer[count % 2]);
  440 + obj->setDescription(context, description, last_offset_buffer[count % 2]);
  441 + add(std::move(obj));
  442 +}
  443 +
415 444 template <typename T, typename... Args>
416 445 void
417 446 QPDFParser::addScalar(Args&&... args)
... ...
libqpdf/qpdf/QPDFParser.hh
... ... @@ -53,6 +53,7 @@ class QPDFParser
53 53 QPDFObjectHandle parseRemainder(bool content_stream);
54 54 void add(std::shared_ptr<QPDFObject>&& obj);
55 55 void addNull();
  56 + void addInt(int count);
56 57 template <typename T, typename... Args>
57 58 void addScalar(Args&&... args);
58 59 bool tooManyBadTokens();
... ... @@ -78,6 +79,10 @@ class QPDFParser
78 79 int good_count = 0;
79 80 // Start offset including any leading whitespace.
80 81 qpdf_offset_t start;
  82 + // Number of successive integer tokens.
  83 + int int_count = 0;
  84 + long long int_buffer[2]{0, 0};
  85 + qpdf_offset_t last_offset_buffer[2]{0, 0};
81 86  
82 87 };
83 88  
... ...
qpdf/qtest/qpdf/parse-object.out
1 1 [ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ]
2   -logic error parsing indirect: QPDFObjectHandle::parse called without context on an object with indirect references
  2 +logic error parsing indirect: QPDFParser::parse called without context on an object with indirect references
3 3 trailing data: parsed object (trailing test): trailing data found parsing object from string
4 4 WARNING: parsed object (offset 9): unknown token while reading object; treating as string
5 5 WARNING: parsed object: treating unexpected brace token as null
... ...