From 91822ae6a12533cc9c73c418776def341582c9ac Mon Sep 17 00:00:00 2001 From: m-holger Date: Wed, 14 Aug 2024 16:39:22 +0100 Subject: [PATCH] Refactor Xref_table::reconstruct --- libqpdf/QPDF.cc | 65 +++++++++++++++++++++++++++++++++-------------------------------- libqpdf/QPDFWriter.cc | 1 - qpdf/qpdf.testcov | 2 -- qpdf/qtest/qpdf/fuzz-16214.out | 6 ++---- qpdf/qtest/qpdf/issue-147.out | 2 +- 5 files changed, 36 insertions(+), 40 deletions(-) diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 79a5da1..69cd092 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -582,6 +582,9 @@ QPDF::Xref_table::reconstruct(QPDFExc& e) table.erase(iter); } + std::vector> objects; + std::vector trailers; + file->seek(0, SEEK_END); qpdf_offset_t eof = file->tell(); file->seek(0, SEEK_SET); @@ -597,25 +600,37 @@ QPDF::Xref_table::reconstruct(QPDFExc& e) int obj = QUtil::string_to_int(t1.getValue().c_str()); int gen = QUtil::string_to_int(t2.getValue().c_str()); if (obj <= max_id_) { - insert_reconstructed(obj, token_start, gen); + objects.emplace_back(obj, gen, token_start); } else { warn_damaged("ignoring object with impossibly large id " + std::to_string(obj)); } } file->seek(pos, SEEK_SET); } else if (!trailer_ && t1.isWord("trailer")) { - auto pos = file->tell(); - QPDFObjectHandle t = read_trailer(); - if (!t.isDictionary()) { - // Oh well. It was worth a try. - } else { - trailer_ = t; - } - file->seek(pos, SEEK_SET); + trailers.emplace_back(file->tell()); } - check_warnings(); file->findAndSkipNextEOL(); } + + for (auto tr: trailers) { + file->seek(tr, SEEK_SET); + auto t = read_trailer(); + if (!t.isDictionary()) { + // Oh well. It was worth a try. + } else { + trailer_ = t; + break; + } + check_warnings(); + } + + auto rend = objects.rend(); + for (auto it = objects.rbegin(); it != rend; it++) { + auto [obj, gen, token_start] = *it; + insert(obj, 1, token_start, gen); + check_warnings(); + } + deleted_objects.clear(); if (!trailer_) { @@ -1321,8 +1336,13 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2) // If there is already an entry for this object and generation in the table, it means that a // later xref table has registered this object. Disregard this one. - if (obj > max_id_) { - // ignore impossibly large object ids or object ids > Size. + int new_gen = f0 == 2 ? 0 : f2; + + if (!(obj > 0 && obj <= max_id_ && 0 <= f2 && new_gen < 65535)) { + // We are ignoring invalid objgens. Most will arrive here from xref reconstruction. There + // is probably no point having another warning but we could count invalid items in order to + // decide when to give up. + QTC::TC("qpdf", "QPDF xref overwrite invalid objgen"); return; } @@ -1337,7 +1357,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2) return; } - auto [iter, created] = table.try_emplace(QPDFObjGen(obj, (f0 == 2 ? 0 : f2))); + auto [iter, created] = table.try_emplace(QPDFObjGen(obj, new_gen)); if (!created) { QTC::TC("qpdf", "QPDF xref reused object"); return; @@ -1369,25 +1389,6 @@ QPDF::Xref_table::insert_free(QPDFObjGen og) } } -// Replace uncompressed object. This is used in xref recovery mode, which reads the file from -// beginning to end. -void -QPDF::Xref_table::insert_reconstructed(int obj, qpdf_offset_t f1, int f2) -{ - if (!(obj > 0 && obj <= max_id_ && 0 <= f2 && f2 < 65535)) { - QTC::TC("qpdf", "QPDF xref overwrite invalid objgen"); - return; - } - - QPDFObjGen og(obj, f2); - if (!deleted_objects.count(obj)) { - // deleted_objects stores the uncompressed objects removed from the xref table at the start - // of recovery. - QTC::TC("qpdf", "QPDF xref overwrite object"); - table.insert_or_assign(QPDFObjGen(obj, f2), QPDFXRefEntry(f1)); - } -} - void QPDF::showXRefTable() { diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 40183f3..b6394c6 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -1698,7 +1698,6 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) if (obj_to_write.isStream()) { // This condition occurred in a fuzz input. Ideally we should block it at parse // time, but it's not clear to me how to construct a case for this. - QTC::TC("qpdf", "QPDFWriter stream in ostream"); obj_to_write.warnIfPossible("stream found inside object stream; treating as null"); obj_to_write = QPDFObjectHandle::newNull(); } diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 733a016..5b27a60 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -105,7 +105,6 @@ QPDFWriter not recompressing /FlateDecode 0 QPDF_encryption xref stream from encrypted file 0 QPDFJob unable to filter 0 QUtil non-trivial UTF-16 0 -QPDF xref overwrite object 0 QPDF xref overwrite invalid objgen 0 QPDF decoding error warning 0 qpdf-c called qpdf_init 0 @@ -437,7 +436,6 @@ QPDF xref skipped space 0 QPDF eof skipping spaces before xref 1 QPDF_encryption user matches owner V < 5 0 QPDF_encryption same password 1 -QPDFWriter stream in ostream 0 QPDFParser duplicate dict key 0 QPDFWriter no encryption sig contents 0 QPDFPageObjectHelper colorspace lookup 0 diff --git a/qpdf/qtest/qpdf/fuzz-16214.out b/qpdf/qtest/qpdf/fuzz-16214.out index a03574b..b00f183 100644 --- a/qpdf/qtest/qpdf/fuzz-16214.out +++ b/qpdf/qtest/qpdf/fuzz-16214.out @@ -11,11 +11,9 @@ WARNING: fuzz-16214.pdf (object 1 0, offset 7189): expected n n obj WARNING: fuzz-16214.pdf: Attempting to reconstruct cross-reference table WARNING: fuzz-16214.pdf (offset 7207): error decoding stream data for object 2 0: stream inflate: inflate: data: invalid code lengths set WARNING: fuzz-16214.pdf (offset 7207): getStreamData called on unfilterable stream -WARNING: fuzz-16214.pdf (object 8 0, offset 7207): supposed object stream 5 has wrong type -WARNING: fuzz-16214.pdf (object 8 0, offset 7207): object stream 5 has incorrect keys +WARNING: fuzz-16214.pdf (object 7 0, offset 7207): supposed object stream 5 has wrong type +WARNING: fuzz-16214.pdf (object 7 0, offset 7207): object stream 5 has incorrect keys WARNING: fuzz-16214.pdf (object 21 0, offset 3639): expected endstream WARNING: fuzz-16214.pdf (object 21 0, offset 3112): attempting to recover stream length WARNING: fuzz-16214.pdf (object 21 0, offset 3112): recovered stream length: 340 -WARNING: fuzz-16214.pdf, stream object 8 0: stream found inside object stream; treating as null -WARNING: fuzz-16214.pdf, stream object 8 0: stream found inside object stream; treating as null qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/issue-147.out b/qpdf/qtest/qpdf/issue-147.out index 9e766df..3d63149 100644 --- a/qpdf/qtest/qpdf/issue-147.out +++ b/qpdf/qtest/qpdf/issue-147.out @@ -2,6 +2,6 @@ WARNING: issue-147.pdf: can't find PDF header WARNING: issue-147.pdf: file is damaged WARNING: issue-147.pdf: can't find startxref WARNING: issue-147.pdf: Attempting to reconstruct cross-reference table -WARNING: issue-147.pdf (trailer, offset 9): expected dictionary key but found non-name object; inserting key /QPDFFake1 WARNING: issue-147.pdf: ignoring object with impossibly large id 62 +WARNING: issue-147.pdf (trailer, offset 9): expected dictionary key but found non-name object; inserting key /QPDFFake1 qpdf: issue-147.pdf: unable to find objects while recovering damaged file -- libgit2 0.21.4