Commit 91822ae6a12533cc9c73c418776def341582c9ac

Authored by m-holger
1 parent 39df5936

Refactor Xref_table::reconstruct

Split reconstruction into two passes - scanning of input for objects and
insertion of objects into the xref table. This allows insertion to take
place in the usual reverse order and removes the need for a separate
insert_reconstructed method.
libqpdf/QPDF.cc
... ... @@ -582,6 +582,9 @@ QPDF::Xref_table::reconstruct(QPDFExc& e)
582 582 table.erase(iter);
583 583 }
584 584  
  585 + std::vector<std::tuple<int, int, qpdf_offset_t>> objects;
  586 + std::vector<qpdf_offset_t> trailers;
  587 +
585 588 file->seek(0, SEEK_END);
586 589 qpdf_offset_t eof = file->tell();
587 590 file->seek(0, SEEK_SET);
... ... @@ -597,25 +600,37 @@ QPDF::Xref_table::reconstruct(QPDFExc&amp; e)
597 600 int obj = QUtil::string_to_int(t1.getValue().c_str());
598 601 int gen = QUtil::string_to_int(t2.getValue().c_str());
599 602 if (obj <= max_id_) {
600   - insert_reconstructed(obj, token_start, gen);
  603 + objects.emplace_back(obj, gen, token_start);
601 604 } else {
602 605 warn_damaged("ignoring object with impossibly large id " + std::to_string(obj));
603 606 }
604 607 }
605 608 file->seek(pos, SEEK_SET);
606 609 } else if (!trailer_ && t1.isWord("trailer")) {
607   - auto pos = file->tell();
608   - QPDFObjectHandle t = read_trailer();
609   - if (!t.isDictionary()) {
610   - // Oh well. It was worth a try.
611   - } else {
612   - trailer_ = t;
613   - }
614   - file->seek(pos, SEEK_SET);
  610 + trailers.emplace_back(file->tell());
615 611 }
616   - check_warnings();
617 612 file->findAndSkipNextEOL();
618 613 }
  614 +
  615 + for (auto tr: trailers) {
  616 + file->seek(tr, SEEK_SET);
  617 + auto t = read_trailer();
  618 + if (!t.isDictionary()) {
  619 + // Oh well. It was worth a try.
  620 + } else {
  621 + trailer_ = t;
  622 + break;
  623 + }
  624 + check_warnings();
  625 + }
  626 +
  627 + auto rend = objects.rend();
  628 + for (auto it = objects.rbegin(); it != rend; it++) {
  629 + auto [obj, gen, token_start] = *it;
  630 + insert(obj, 1, token_start, gen);
  631 + check_warnings();
  632 + }
  633 +
619 634 deleted_objects.clear();
620 635  
621 636 if (!trailer_) {
... ... @@ -1321,8 +1336,13 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
1321 1336 // If there is already an entry for this object and generation in the table, it means that a
1322 1337 // later xref table has registered this object. Disregard this one.
1323 1338  
1324   - if (obj > max_id_) {
1325   - // ignore impossibly large object ids or object ids > Size.
  1339 + int new_gen = f0 == 2 ? 0 : f2;
  1340 +
  1341 + if (!(obj > 0 && obj <= max_id_ && 0 <= f2 && new_gen < 65535)) {
  1342 + // We are ignoring invalid objgens. Most will arrive here from xref reconstruction. There
  1343 + // is probably no point having another warning but we could count invalid items in order to
  1344 + // decide when to give up.
  1345 + QTC::TC("qpdf", "QPDF xref overwrite invalid objgen");
1326 1346 return;
1327 1347 }
1328 1348  
... ... @@ -1337,7 +1357,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
1337 1357 return;
1338 1358 }
1339 1359  
1340   - auto [iter, created] = table.try_emplace(QPDFObjGen(obj, (f0 == 2 ? 0 : f2)));
  1360 + auto [iter, created] = table.try_emplace(QPDFObjGen(obj, new_gen));
1341 1361 if (!created) {
1342 1362 QTC::TC("qpdf", "QPDF xref reused object");
1343 1363 return;
... ... @@ -1369,25 +1389,6 @@ QPDF::Xref_table::insert_free(QPDFObjGen og)
1369 1389 }
1370 1390 }
1371 1391  
1372   -// Replace uncompressed object. This is used in xref recovery mode, which reads the file from
1373   -// beginning to end.
1374   -void
1375   -QPDF::Xref_table::insert_reconstructed(int obj, qpdf_offset_t f1, int f2)
1376   -{
1377   - if (!(obj > 0 && obj <= max_id_ && 0 <= f2 && f2 < 65535)) {
1378   - QTC::TC("qpdf", "QPDF xref overwrite invalid objgen");
1379   - return;
1380   - }
1381   -
1382   - QPDFObjGen og(obj, f2);
1383   - if (!deleted_objects.count(obj)) {
1384   - // deleted_objects stores the uncompressed objects removed from the xref table at the start
1385   - // of recovery.
1386   - QTC::TC("qpdf", "QPDF xref overwrite object");
1387   - table.insert_or_assign(QPDFObjGen(obj, f2), QPDFXRefEntry(f1));
1388   - }
1389   -}
1390   -
1391 1392 void
1392 1393 QPDF::showXRefTable()
1393 1394 {
... ...
libqpdf/QPDFWriter.cc
... ... @@ -1698,7 +1698,6 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1698 1698 if (obj_to_write.isStream()) {
1699 1699 // This condition occurred in a fuzz input. Ideally we should block it at parse
1700 1700 // time, but it's not clear to me how to construct a case for this.
1701   - QTC::TC("qpdf", "QPDFWriter stream in ostream");
1702 1701 obj_to_write.warnIfPossible("stream found inside object stream; treating as null");
1703 1702 obj_to_write = QPDFObjectHandle::newNull();
1704 1703 }
... ...
qpdf/qpdf.testcov
... ... @@ -105,7 +105,6 @@ QPDFWriter not recompressing /FlateDecode 0
105 105 QPDF_encryption xref stream from encrypted file 0
106 106 QPDFJob unable to filter 0
107 107 QUtil non-trivial UTF-16 0
108   -QPDF xref overwrite object 0
109 108 QPDF xref overwrite invalid objgen 0
110 109 QPDF decoding error warning 0
111 110 qpdf-c called qpdf_init 0
... ... @@ -437,7 +436,6 @@ QPDF xref skipped space 0
437 436 QPDF eof skipping spaces before xref 1
438 437 QPDF_encryption user matches owner V < 5 0
439 438 QPDF_encryption same password 1
440   -QPDFWriter stream in ostream 0
441 439 QPDFParser duplicate dict key 0
442 440 QPDFWriter no encryption sig contents 0
443 441 QPDFPageObjectHelper colorspace lookup 0
... ...
qpdf/qtest/qpdf/fuzz-16214.out
... ... @@ -11,11 +11,9 @@ WARNING: fuzz-16214.pdf (object 1 0, offset 7189): expected n n obj
11 11 WARNING: fuzz-16214.pdf: Attempting to reconstruct cross-reference table
12 12 WARNING: fuzz-16214.pdf (offset 7207): error decoding stream data for object 2 0: stream inflate: inflate: data: invalid code lengths set
13 13 WARNING: fuzz-16214.pdf (offset 7207): getStreamData called on unfilterable stream
14   -WARNING: fuzz-16214.pdf (object 8 0, offset 7207): supposed object stream 5 has wrong type
15   -WARNING: fuzz-16214.pdf (object 8 0, offset 7207): object stream 5 has incorrect keys
  14 +WARNING: fuzz-16214.pdf (object 7 0, offset 7207): supposed object stream 5 has wrong type
  15 +WARNING: fuzz-16214.pdf (object 7 0, offset 7207): object stream 5 has incorrect keys
16 16 WARNING: fuzz-16214.pdf (object 21 0, offset 3639): expected endstream
17 17 WARNING: fuzz-16214.pdf (object 21 0, offset 3112): attempting to recover stream length
18 18 WARNING: fuzz-16214.pdf (object 21 0, offset 3112): recovered stream length: 340
19   -WARNING: fuzz-16214.pdf, stream object 8 0: stream found inside object stream; treating as null
20   -WARNING: fuzz-16214.pdf, stream object 8 0: stream found inside object stream; treating as null
21 19 qpdf: operation succeeded with warnings; resulting file may have some problems
... ...
qpdf/qtest/qpdf/issue-147.out
... ... @@ -2,6 +2,6 @@ WARNING: issue-147.pdf: can&#39;t find PDF header
2 2 WARNING: issue-147.pdf: file is damaged
3 3 WARNING: issue-147.pdf: can't find startxref
4 4 WARNING: issue-147.pdf: Attempting to reconstruct cross-reference table
5   -WARNING: issue-147.pdf (trailer, offset 9): expected dictionary key but found non-name object; inserting key /QPDFFake1
6 5 WARNING: issue-147.pdf: ignoring object with impossibly large id 62
  6 +WARNING: issue-147.pdf (trailer, offset 9): expected dictionary key but found non-name object; inserting key /QPDFFake1
7 7 qpdf: issue-147.pdf: unable to find objects while recovering damaged file
... ...