Commit 91822ae6a12533cc9c73c418776def341582c9ac
1 parent
39df5936
Refactor Xref_table::reconstruct
Split reconstruction into two passes - scanning of input for objects and insertion of objects into the xref table. This allows insertion to take place in the usual reverse order and removes the need for a separate insert_reconstructed method.
Showing
5 changed files
with
36 additions
and
40 deletions
libqpdf/QPDF.cc
| ... | ... | @@ -582,6 +582,9 @@ QPDF::Xref_table::reconstruct(QPDFExc& e) |
| 582 | 582 | table.erase(iter); |
| 583 | 583 | } |
| 584 | 584 | |
| 585 | + std::vector<std::tuple<int, int, qpdf_offset_t>> objects; | |
| 586 | + std::vector<qpdf_offset_t> trailers; | |
| 587 | + | |
| 585 | 588 | file->seek(0, SEEK_END); |
| 586 | 589 | qpdf_offset_t eof = file->tell(); |
| 587 | 590 | file->seek(0, SEEK_SET); |
| ... | ... | @@ -597,25 +600,37 @@ QPDF::Xref_table::reconstruct(QPDFExc& e) |
| 597 | 600 | int obj = QUtil::string_to_int(t1.getValue().c_str()); |
| 598 | 601 | int gen = QUtil::string_to_int(t2.getValue().c_str()); |
| 599 | 602 | if (obj <= max_id_) { |
| 600 | - insert_reconstructed(obj, token_start, gen); | |
| 603 | + objects.emplace_back(obj, gen, token_start); | |
| 601 | 604 | } else { |
| 602 | 605 | warn_damaged("ignoring object with impossibly large id " + std::to_string(obj)); |
| 603 | 606 | } |
| 604 | 607 | } |
| 605 | 608 | file->seek(pos, SEEK_SET); |
| 606 | 609 | } else if (!trailer_ && t1.isWord("trailer")) { |
| 607 | - auto pos = file->tell(); | |
| 608 | - QPDFObjectHandle t = read_trailer(); | |
| 609 | - if (!t.isDictionary()) { | |
| 610 | - // Oh well. It was worth a try. | |
| 611 | - } else { | |
| 612 | - trailer_ = t; | |
| 613 | - } | |
| 614 | - file->seek(pos, SEEK_SET); | |
| 610 | + trailers.emplace_back(file->tell()); | |
| 615 | 611 | } |
| 616 | - check_warnings(); | |
| 617 | 612 | file->findAndSkipNextEOL(); |
| 618 | 613 | } |
| 614 | + | |
| 615 | + for (auto tr: trailers) { | |
| 616 | + file->seek(tr, SEEK_SET); | |
| 617 | + auto t = read_trailer(); | |
| 618 | + if (!t.isDictionary()) { | |
| 619 | + // Oh well. It was worth a try. | |
| 620 | + } else { | |
| 621 | + trailer_ = t; | |
| 622 | + break; | |
| 623 | + } | |
| 624 | + check_warnings(); | |
| 625 | + } | |
| 626 | + | |
| 627 | + auto rend = objects.rend(); | |
| 628 | + for (auto it = objects.rbegin(); it != rend; it++) { | |
| 629 | + auto [obj, gen, token_start] = *it; | |
| 630 | + insert(obj, 1, token_start, gen); | |
| 631 | + check_warnings(); | |
| 632 | + } | |
| 633 | + | |
| 619 | 634 | deleted_objects.clear(); |
| 620 | 635 | |
| 621 | 636 | if (!trailer_) { |
| ... | ... | @@ -1321,8 +1336,13 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2) |
| 1321 | 1336 | // If there is already an entry for this object and generation in the table, it means that a |
| 1322 | 1337 | // later xref table has registered this object. Disregard this one. |
| 1323 | 1338 | |
| 1324 | - if (obj > max_id_) { | |
| 1325 | - // ignore impossibly large object ids or object ids > Size. | |
| 1339 | + int new_gen = f0 == 2 ? 0 : f2; | |
| 1340 | + | |
| 1341 | + if (!(obj > 0 && obj <= max_id_ && 0 <= f2 && new_gen < 65535)) { | |
| 1342 | + // We are ignoring invalid objgens. Most will arrive here from xref reconstruction. There | |
| 1343 | + // is probably no point having another warning but we could count invalid items in order to | |
| 1344 | + // decide when to give up. | |
| 1345 | + QTC::TC("qpdf", "QPDF xref overwrite invalid objgen"); | |
| 1326 | 1346 | return; |
| 1327 | 1347 | } |
| 1328 | 1348 | |
| ... | ... | @@ -1337,7 +1357,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2) |
| 1337 | 1357 | return; |
| 1338 | 1358 | } |
| 1339 | 1359 | |
| 1340 | - auto [iter, created] = table.try_emplace(QPDFObjGen(obj, (f0 == 2 ? 0 : f2))); | |
| 1360 | + auto [iter, created] = table.try_emplace(QPDFObjGen(obj, new_gen)); | |
| 1341 | 1361 | if (!created) { |
| 1342 | 1362 | QTC::TC("qpdf", "QPDF xref reused object"); |
| 1343 | 1363 | return; |
| ... | ... | @@ -1369,25 +1389,6 @@ QPDF::Xref_table::insert_free(QPDFObjGen og) |
| 1369 | 1389 | } |
| 1370 | 1390 | } |
| 1371 | 1391 | |
| 1372 | -// Replace uncompressed object. This is used in xref recovery mode, which reads the file from | |
| 1373 | -// beginning to end. | |
| 1374 | -void | |
| 1375 | -QPDF::Xref_table::insert_reconstructed(int obj, qpdf_offset_t f1, int f2) | |
| 1376 | -{ | |
| 1377 | - if (!(obj > 0 && obj <= max_id_ && 0 <= f2 && f2 < 65535)) { | |
| 1378 | - QTC::TC("qpdf", "QPDF xref overwrite invalid objgen"); | |
| 1379 | - return; | |
| 1380 | - } | |
| 1381 | - | |
| 1382 | - QPDFObjGen og(obj, f2); | |
| 1383 | - if (!deleted_objects.count(obj)) { | |
| 1384 | - // deleted_objects stores the uncompressed objects removed from the xref table at the start | |
| 1385 | - // of recovery. | |
| 1386 | - QTC::TC("qpdf", "QPDF xref overwrite object"); | |
| 1387 | - table.insert_or_assign(QPDFObjGen(obj, f2), QPDFXRefEntry(f1)); | |
| 1388 | - } | |
| 1389 | -} | |
| 1390 | - | |
| 1391 | 1392 | void |
| 1392 | 1393 | QPDF::showXRefTable() |
| 1393 | 1394 | { | ... | ... |
libqpdf/QPDFWriter.cc
| ... | ... | @@ -1698,7 +1698,6 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) |
| 1698 | 1698 | if (obj_to_write.isStream()) { |
| 1699 | 1699 | // This condition occurred in a fuzz input. Ideally we should block it at parse |
| 1700 | 1700 | // time, but it's not clear to me how to construct a case for this. |
| 1701 | - QTC::TC("qpdf", "QPDFWriter stream in ostream"); | |
| 1702 | 1701 | obj_to_write.warnIfPossible("stream found inside object stream; treating as null"); |
| 1703 | 1702 | obj_to_write = QPDFObjectHandle::newNull(); |
| 1704 | 1703 | } | ... | ... |
qpdf/qpdf.testcov
| ... | ... | @@ -105,7 +105,6 @@ QPDFWriter not recompressing /FlateDecode 0 |
| 105 | 105 | QPDF_encryption xref stream from encrypted file 0 |
| 106 | 106 | QPDFJob unable to filter 0 |
| 107 | 107 | QUtil non-trivial UTF-16 0 |
| 108 | -QPDF xref overwrite object 0 | |
| 109 | 108 | QPDF xref overwrite invalid objgen 0 |
| 110 | 109 | QPDF decoding error warning 0 |
| 111 | 110 | qpdf-c called qpdf_init 0 |
| ... | ... | @@ -437,7 +436,6 @@ QPDF xref skipped space 0 |
| 437 | 436 | QPDF eof skipping spaces before xref 1 |
| 438 | 437 | QPDF_encryption user matches owner V < 5 0 |
| 439 | 438 | QPDF_encryption same password 1 |
| 440 | -QPDFWriter stream in ostream 0 | |
| 441 | 439 | QPDFParser duplicate dict key 0 |
| 442 | 440 | QPDFWriter no encryption sig contents 0 |
| 443 | 441 | QPDFPageObjectHelper colorspace lookup 0 | ... | ... |
qpdf/qtest/qpdf/fuzz-16214.out
| ... | ... | @@ -11,11 +11,9 @@ WARNING: fuzz-16214.pdf (object 1 0, offset 7189): expected n n obj |
| 11 | 11 | WARNING: fuzz-16214.pdf: Attempting to reconstruct cross-reference table |
| 12 | 12 | WARNING: fuzz-16214.pdf (offset 7207): error decoding stream data for object 2 0: stream inflate: inflate: data: invalid code lengths set |
| 13 | 13 | WARNING: fuzz-16214.pdf (offset 7207): getStreamData called on unfilterable stream |
| 14 | -WARNING: fuzz-16214.pdf (object 8 0, offset 7207): supposed object stream 5 has wrong type | |
| 15 | -WARNING: fuzz-16214.pdf (object 8 0, offset 7207): object stream 5 has incorrect keys | |
| 14 | +WARNING: fuzz-16214.pdf (object 7 0, offset 7207): supposed object stream 5 has wrong type | |
| 15 | +WARNING: fuzz-16214.pdf (object 7 0, offset 7207): object stream 5 has incorrect keys | |
| 16 | 16 | WARNING: fuzz-16214.pdf (object 21 0, offset 3639): expected endstream |
| 17 | 17 | WARNING: fuzz-16214.pdf (object 21 0, offset 3112): attempting to recover stream length |
| 18 | 18 | WARNING: fuzz-16214.pdf (object 21 0, offset 3112): recovered stream length: 340 |
| 19 | -WARNING: fuzz-16214.pdf, stream object 8 0: stream found inside object stream; treating as null | |
| 20 | -WARNING: fuzz-16214.pdf, stream object 8 0: stream found inside object stream; treating as null | |
| 21 | 19 | qpdf: operation succeeded with warnings; resulting file may have some problems | ... | ... |
qpdf/qtest/qpdf/issue-147.out
| ... | ... | @@ -2,6 +2,6 @@ WARNING: issue-147.pdf: can't find PDF header |
| 2 | 2 | WARNING: issue-147.pdf: file is damaged |
| 3 | 3 | WARNING: issue-147.pdf: can't find startxref |
| 4 | 4 | WARNING: issue-147.pdf: Attempting to reconstruct cross-reference table |
| 5 | -WARNING: issue-147.pdf (trailer, offset 9): expected dictionary key but found non-name object; inserting key /QPDFFake1 | |
| 6 | 5 | WARNING: issue-147.pdf: ignoring object with impossibly large id 62 |
| 6 | +WARNING: issue-147.pdf (trailer, offset 9): expected dictionary key but found non-name object; inserting key /QPDFFake1 | |
| 7 | 7 | qpdf: issue-147.pdf: unable to find objects while recovering damaged file | ... | ... |