Commit 0328d8723793fa8c7f3cb4d243bfc7ed051e85bb

Authored by m-holger
1 parent 1548b8d8

In QPDFParser::parse refactor parsing of indirect references

libqpdf/QPDFParser.cc
@@ -143,6 +143,51 @@ QPDFParser::parseRemainder(bool content_stream) @@ -143,6 +143,51 @@ QPDFParser::parseRemainder(bool content_stream)
143 } 143 }
144 ++good_count; // optimistically 144 ++good_count; // optimistically
145 145
  146 + if (int_count != 0) {
  147 + // Special handling of indirect references. Treat integer tokens as part of an indirect
  148 + // reference until proven otherwise.
  149 + if (tokenizer.getType() == QPDFTokenizer::tt_integer) {
  150 + if (++int_count > 2) {
  151 + // Process the oldest buffered integer.
  152 + addInt(int_count);
  153 + }
  154 + last_offset_buffer[int_count % 2] = input->getLastOffset();
  155 + int_buffer[int_count % 2] = QUtil::string_to_ll(tokenizer.getValue().c_str());
  156 + continue;
  157 +
  158 + } else if (
  159 + int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word &&
  160 + tokenizer.getValue() == "R") {
  161 + if (context == nullptr) {
  162 + QTC::TC("qpdf", "QPDFParser indirect without context");
  163 + throw std::logic_error("QPDFParser::parse called without context on an object "
  164 + "with indirect references");
  165 + }
  166 + auto ref_og = QPDFObjGen(
  167 + QIntC::to_int(int_buffer[(int_count - 1) % 2]),
  168 + QIntC::to_int(int_buffer[(int_count) % 2]));
  169 + if (ref_og.isIndirect()) {
  170 + // This action has the desirable side effect of causing dangling references
  171 + // (references to indirect objects that don't appear in the PDF) in any parsed
  172 + // object to appear in the object cache.
  173 + add(std::move(context->getObject(ref_og).obj));
  174 + } else {
  175 + QTC::TC("qpdf", "QPDFParser indirect with 0 objid");
  176 + addNull();
  177 + }
  178 + int_count = 0;
  179 + continue;
  180 +
  181 + } else if (int_count > 0) {
  182 + // Process the buffered integers before processing the current token.
  183 + if (int_count > 1) {
  184 + addInt(int_count - 1);
  185 + }
  186 + addInt(int_count);
  187 + int_count = 0;
  188 + }
  189 + }
  190 +
146 switch (tokenizer.getType()) { 191 switch (tokenizer.getType()) {
147 case QPDFTokenizer::tt_eof: 192 case QPDFTokenizer::tt_eof:
148 warn("parse error while reading object"); 193 warn("parse error while reading object");
@@ -304,7 +349,14 @@ QPDFParser::parseRemainder(bool content_stream) @@ -304,7 +349,14 @@ QPDFParser::parseRemainder(bool content_stream)
304 continue; 349 continue;
305 350
306 case QPDFTokenizer::tt_integer: 351 case QPDFTokenizer::tt_integer:
307 - addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str())); 352 + if (!content_stream) {
  353 + // Buffer token in case it is part of an indirect reference.
  354 + last_offset_buffer[1] = input->getLastOffset();
  355 + int_buffer[1] = QUtil::string_to_ll(tokenizer.getValue().c_str());
  356 + int_count = 1;
  357 + } else {
  358 + addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str()));
  359 + }
308 continue; 360 continue;
309 361
310 case QPDFTokenizer::tt_real: 362 case QPDFTokenizer::tt_real:
@@ -325,46 +377,15 @@ QPDFParser::parseRemainder(bool content_stream) @@ -325,46 +377,15 @@ QPDFParser::parseRemainder(bool content_stream)
325 continue; 377 continue;
326 378
327 case QPDFTokenizer::tt_word: 379 case QPDFTokenizer::tt_word:
328 - {  
329 - auto const& value = tokenizer.getValue();  
330 - auto size = frame->olist.size();  
331 - if (content_stream) {  
332 - addScalar<QPDF_Operator>(value);  
333 - } else if (  
334 - value == "R" && size >= 2 && frame->olist.back() &&  
335 - frame->olist.back()->getTypeCode() == ::ot_integer &&  
336 - !frame->olist.back()->getObjGen().isIndirect() && frame->olist.at(size - 2) &&  
337 - frame->olist.at(size - 2)->getTypeCode() == ::ot_integer &&  
338 - !frame->olist.at(size - 2)->getObjGen().isIndirect()) {  
339 - if (context == nullptr) {  
340 - QTC::TC("qpdf", "QPDFParser indirect without context");  
341 - throw std::logic_error("QPDFObjectHandle::parse called without context on "  
342 - "an object with indirect references");  
343 - }  
344 - auto ref_og = QPDFObjGen(  
345 - QPDFObjectHandle(frame->olist.at(size - 2)).getIntValueAsInt(),  
346 - QPDFObjectHandle(frame->olist.back()).getIntValueAsInt());  
347 - if (ref_og.isIndirect()) {  
348 - // This action has the desirable side effect of causing dangling references  
349 - // (references to indirect objects that don't appear in the PDF) in any  
350 - // parsed object to appear in the object cache.  
351 - frame->olist.pop_back();  
352 - frame->olist.pop_back();  
353 - add(std::move(context->getObject(ref_og).obj));  
354 - } else {  
355 - QTC::TC("qpdf", "QPDFParser indirect with 0 objid");  
356 - frame->olist.pop_back();  
357 - frame->olist.pop_back();  
358 - addNull();  
359 - }  
360 - } else {  
361 - QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder");  
362 - warn("unknown token while reading object; treating as string");  
363 - if (tooManyBadTokens()) {  
364 - return {QPDF_Null::create()};  
365 - }  
366 - addScalar<QPDF_String>(value); 380 + if (content_stream) {
  381 + addScalar<QPDF_Operator>(tokenizer.getValue());
  382 + } else {
  383 + QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder");
  384 + warn("unknown token while reading object; treating as string");
  385 + if (tooManyBadTokens()) {
  386 + return {QPDF_Null::create()};
367 } 387 }
  388 + addScalar<QPDF_String>(tokenizer.getValue());
368 } 389 }
369 continue; 390 continue;
370 391
@@ -412,6 +433,14 @@ QPDFParser::addNull() @@ -412,6 +433,14 @@ QPDFParser::addNull()
412 ++frame->null_count; 433 ++frame->null_count;
413 } 434 }
414 435
  436 +void
  437 +QPDFParser::addInt(int count)
  438 +{
  439 + auto obj = QPDF_Integer::create(int_buffer[count % 2]);
  440 + obj->setDescription(context, description, last_offset_buffer[count % 2]);
  441 + add(std::move(obj));
  442 +}
  443 +
415 template <typename T, typename... Args> 444 template <typename T, typename... Args>
416 void 445 void
417 QPDFParser::addScalar(Args&&... args) 446 QPDFParser::addScalar(Args&&... args)
libqpdf/qpdf/QPDFParser.hh
@@ -53,6 +53,7 @@ class QPDFParser @@ -53,6 +53,7 @@ class QPDFParser
53 QPDFObjectHandle parseRemainder(bool content_stream); 53 QPDFObjectHandle parseRemainder(bool content_stream);
54 void add(std::shared_ptr<QPDFObject>&& obj); 54 void add(std::shared_ptr<QPDFObject>&& obj);
55 void addNull(); 55 void addNull();
  56 + void addInt(int count);
56 template <typename T, typename... Args> 57 template <typename T, typename... Args>
57 void addScalar(Args&&... args); 58 void addScalar(Args&&... args);
58 bool tooManyBadTokens(); 59 bool tooManyBadTokens();
@@ -78,6 +79,10 @@ class QPDFParser @@ -78,6 +79,10 @@ class QPDFParser
78 int good_count = 0; 79 int good_count = 0;
79 // Start offset including any leading whitespace. 80 // Start offset including any leading whitespace.
80 qpdf_offset_t start; 81 qpdf_offset_t start;
  82 + // Number of successive integer tokens.
  83 + int int_count = 0;
  84 + long long int_buffer[2]{0, 0};
  85 + qpdf_offset_t last_offset_buffer[2]{0, 0};
81 86
82 }; 87 };
83 88
qpdf/qtest/qpdf/parse-object.out
1 [ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ] 1 [ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ]
2 -logic error parsing indirect: QPDFObjectHandle::parse called without context on an object with indirect references 2 +logic error parsing indirect: QPDFParser::parse called without context on an object with indirect references
3 trailing data: parsed object (trailing test): trailing data found parsing object from string 3 trailing data: parsed object (trailing test): trailing data found parsing object from string
4 WARNING: parsed object (offset 9): unknown token while reading object; treating as string 4 WARNING: parsed object (offset 9): unknown token while reading object; treating as string
5 WARNING: parsed object: treating unexpected brace token as null 5 WARNING: parsed object: treating unexpected brace token as null