Commit 91822ae6a12533cc9c73c418776def341582c9ac

Authored by m-holger
1 parent 39df5936

Refactor Xref_table::reconstruct

Split reconstruction into two passes - scanning of input for objects and
insertion of objects into the xref table. This allows insertion to take
place in the usual reverse order and removes the need for a separate
insert_reconstructed method.
libqpdf/QPDF.cc
@@ -582,6 +582,9 @@ QPDF::Xref_table::reconstruct(QPDFExc& e) @@ -582,6 +582,9 @@ QPDF::Xref_table::reconstruct(QPDFExc& e)
582 table.erase(iter); 582 table.erase(iter);
583 } 583 }
584 584
  585 + std::vector<std::tuple<int, int, qpdf_offset_t>> objects;
  586 + std::vector<qpdf_offset_t> trailers;
  587 +
585 file->seek(0, SEEK_END); 588 file->seek(0, SEEK_END);
586 qpdf_offset_t eof = file->tell(); 589 qpdf_offset_t eof = file->tell();
587 file->seek(0, SEEK_SET); 590 file->seek(0, SEEK_SET);
@@ -597,25 +600,37 @@ QPDF::Xref_table::reconstruct(QPDFExc&amp; e) @@ -597,25 +600,37 @@ QPDF::Xref_table::reconstruct(QPDFExc&amp; e)
597 int obj = QUtil::string_to_int(t1.getValue().c_str()); 600 int obj = QUtil::string_to_int(t1.getValue().c_str());
598 int gen = QUtil::string_to_int(t2.getValue().c_str()); 601 int gen = QUtil::string_to_int(t2.getValue().c_str());
599 if (obj <= max_id_) { 602 if (obj <= max_id_) {
600 - insert_reconstructed(obj, token_start, gen); 603 + objects.emplace_back(obj, gen, token_start);
601 } else { 604 } else {
602 warn_damaged("ignoring object with impossibly large id " + std::to_string(obj)); 605 warn_damaged("ignoring object with impossibly large id " + std::to_string(obj));
603 } 606 }
604 } 607 }
605 file->seek(pos, SEEK_SET); 608 file->seek(pos, SEEK_SET);
606 } else if (!trailer_ && t1.isWord("trailer")) { 609 } else if (!trailer_ && t1.isWord("trailer")) {
607 - auto pos = file->tell();  
608 - QPDFObjectHandle t = read_trailer();  
609 - if (!t.isDictionary()) {  
610 - // Oh well. It was worth a try.  
611 - } else {  
612 - trailer_ = t;  
613 - }  
614 - file->seek(pos, SEEK_SET); 610 + trailers.emplace_back(file->tell());
615 } 611 }
616 - check_warnings();  
617 file->findAndSkipNextEOL(); 612 file->findAndSkipNextEOL();
618 } 613 }
  614 +
  615 + for (auto tr: trailers) {
  616 + file->seek(tr, SEEK_SET);
  617 + auto t = read_trailer();
  618 + if (!t.isDictionary()) {
  619 + // Oh well. It was worth a try.
  620 + } else {
  621 + trailer_ = t;
  622 + break;
  623 + }
  624 + check_warnings();
  625 + }
  626 +
  627 + auto rend = objects.rend();
  628 + for (auto it = objects.rbegin(); it != rend; it++) {
  629 + auto [obj, gen, token_start] = *it;
  630 + insert(obj, 1, token_start, gen);
  631 + check_warnings();
  632 + }
  633 +
619 deleted_objects.clear(); 634 deleted_objects.clear();
620 635
621 if (!trailer_) { 636 if (!trailer_) {
@@ -1321,8 +1336,13 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2) @@ -1321,8 +1336,13 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
1321 // If there is already an entry for this object and generation in the table, it means that a 1336 // If there is already an entry for this object and generation in the table, it means that a
1322 // later xref table has registered this object. Disregard this one. 1337 // later xref table has registered this object. Disregard this one.
1323 1338
1324 - if (obj > max_id_) {  
1325 - // ignore impossibly large object ids or object ids > Size. 1339 + int new_gen = f0 == 2 ? 0 : f2;
  1340 +
  1341 + if (!(obj > 0 && obj <= max_id_ && 0 <= f2 && new_gen < 65535)) {
  1342 + // We are ignoring invalid objgens. Most will arrive here from xref reconstruction. There
  1343 + // is probably no point having another warning but we could count invalid items in order to
  1344 + // decide when to give up.
  1345 + QTC::TC("qpdf", "QPDF xref overwrite invalid objgen");
1326 return; 1346 return;
1327 } 1347 }
1328 1348
@@ -1337,7 +1357,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2) @@ -1337,7 +1357,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
1337 return; 1357 return;
1338 } 1358 }
1339 1359
1340 - auto [iter, created] = table.try_emplace(QPDFObjGen(obj, (f0 == 2 ? 0 : f2))); 1360 + auto [iter, created] = table.try_emplace(QPDFObjGen(obj, new_gen));
1341 if (!created) { 1361 if (!created) {
1342 QTC::TC("qpdf", "QPDF xref reused object"); 1362 QTC::TC("qpdf", "QPDF xref reused object");
1343 return; 1363 return;
@@ -1369,25 +1389,6 @@ QPDF::Xref_table::insert_free(QPDFObjGen og) @@ -1369,25 +1389,6 @@ QPDF::Xref_table::insert_free(QPDFObjGen og)
1369 } 1389 }
1370 } 1390 }
1371 1391
1372 -// Replace uncompressed object. This is used in xref recovery mode, which reads the file from  
1373 -// beginning to end.  
1374 -void  
1375 -QPDF::Xref_table::insert_reconstructed(int obj, qpdf_offset_t f1, int f2)  
1376 -{  
1377 - if (!(obj > 0 && obj <= max_id_ && 0 <= f2 && f2 < 65535)) {  
1378 - QTC::TC("qpdf", "QPDF xref overwrite invalid objgen");  
1379 - return;  
1380 - }  
1381 -  
1382 - QPDFObjGen og(obj, f2);  
1383 - if (!deleted_objects.count(obj)) {  
1384 - // deleted_objects stores the uncompressed objects removed from the xref table at the start  
1385 - // of recovery.  
1386 - QTC::TC("qpdf", "QPDF xref overwrite object");  
1387 - table.insert_or_assign(QPDFObjGen(obj, f2), QPDFXRefEntry(f1));  
1388 - }  
1389 -}  
1390 -  
1391 void 1392 void
1392 QPDF::showXRefTable() 1393 QPDF::showXRefTable()
1393 { 1394 {
libqpdf/QPDFWriter.cc
@@ -1698,7 +1698,6 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) @@ -1698,7 +1698,6 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1698 if (obj_to_write.isStream()) { 1698 if (obj_to_write.isStream()) {
1699 // This condition occurred in a fuzz input. Ideally we should block it at parse 1699 // This condition occurred in a fuzz input. Ideally we should block it at parse
1700 // time, but it's not clear to me how to construct a case for this. 1700 // time, but it's not clear to me how to construct a case for this.
1701 - QTC::TC("qpdf", "QPDFWriter stream in ostream");  
1702 obj_to_write.warnIfPossible("stream found inside object stream; treating as null"); 1701 obj_to_write.warnIfPossible("stream found inside object stream; treating as null");
1703 obj_to_write = QPDFObjectHandle::newNull(); 1702 obj_to_write = QPDFObjectHandle::newNull();
1704 } 1703 }
qpdf/qpdf.testcov
@@ -105,7 +105,6 @@ QPDFWriter not recompressing /FlateDecode 0 @@ -105,7 +105,6 @@ QPDFWriter not recompressing /FlateDecode 0
105 QPDF_encryption xref stream from encrypted file 0 105 QPDF_encryption xref stream from encrypted file 0
106 QPDFJob unable to filter 0 106 QPDFJob unable to filter 0
107 QUtil non-trivial UTF-16 0 107 QUtil non-trivial UTF-16 0
108 -QPDF xref overwrite object 0  
109 QPDF xref overwrite invalid objgen 0 108 QPDF xref overwrite invalid objgen 0
110 QPDF decoding error warning 0 109 QPDF decoding error warning 0
111 qpdf-c called qpdf_init 0 110 qpdf-c called qpdf_init 0
@@ -437,7 +436,6 @@ QPDF xref skipped space 0 @@ -437,7 +436,6 @@ QPDF xref skipped space 0
437 QPDF eof skipping spaces before xref 1 436 QPDF eof skipping spaces before xref 1
438 QPDF_encryption user matches owner V < 5 0 437 QPDF_encryption user matches owner V < 5 0
439 QPDF_encryption same password 1 438 QPDF_encryption same password 1
440 -QPDFWriter stream in ostream 0  
441 QPDFParser duplicate dict key 0 439 QPDFParser duplicate dict key 0
442 QPDFWriter no encryption sig contents 0 440 QPDFWriter no encryption sig contents 0
443 QPDFPageObjectHelper colorspace lookup 0 441 QPDFPageObjectHelper colorspace lookup 0
qpdf/qtest/qpdf/fuzz-16214.out
@@ -11,11 +11,9 @@ WARNING: fuzz-16214.pdf (object 1 0, offset 7189): expected n n obj @@ -11,11 +11,9 @@ WARNING: fuzz-16214.pdf (object 1 0, offset 7189): expected n n obj
11 WARNING: fuzz-16214.pdf: Attempting to reconstruct cross-reference table 11 WARNING: fuzz-16214.pdf: Attempting to reconstruct cross-reference table
12 WARNING: fuzz-16214.pdf (offset 7207): error decoding stream data for object 2 0: stream inflate: inflate: data: invalid code lengths set 12 WARNING: fuzz-16214.pdf (offset 7207): error decoding stream data for object 2 0: stream inflate: inflate: data: invalid code lengths set
13 WARNING: fuzz-16214.pdf (offset 7207): getStreamData called on unfilterable stream 13 WARNING: fuzz-16214.pdf (offset 7207): getStreamData called on unfilterable stream
14 -WARNING: fuzz-16214.pdf (object 8 0, offset 7207): supposed object stream 5 has wrong type  
15 -WARNING: fuzz-16214.pdf (object 8 0, offset 7207): object stream 5 has incorrect keys 14 +WARNING: fuzz-16214.pdf (object 7 0, offset 7207): supposed object stream 5 has wrong type
  15 +WARNING: fuzz-16214.pdf (object 7 0, offset 7207): object stream 5 has incorrect keys
16 WARNING: fuzz-16214.pdf (object 21 0, offset 3639): expected endstream 16 WARNING: fuzz-16214.pdf (object 21 0, offset 3639): expected endstream
17 WARNING: fuzz-16214.pdf (object 21 0, offset 3112): attempting to recover stream length 17 WARNING: fuzz-16214.pdf (object 21 0, offset 3112): attempting to recover stream length
18 WARNING: fuzz-16214.pdf (object 21 0, offset 3112): recovered stream length: 340 18 WARNING: fuzz-16214.pdf (object 21 0, offset 3112): recovered stream length: 340
19 -WARNING: fuzz-16214.pdf, stream object 8 0: stream found inside object stream; treating as null  
20 -WARNING: fuzz-16214.pdf, stream object 8 0: stream found inside object stream; treating as null  
21 qpdf: operation succeeded with warnings; resulting file may have some problems 19 qpdf: operation succeeded with warnings; resulting file may have some problems
qpdf/qtest/qpdf/issue-147.out
@@ -2,6 +2,6 @@ WARNING: issue-147.pdf: can&#39;t find PDF header @@ -2,6 +2,6 @@ WARNING: issue-147.pdf: can&#39;t find PDF header
2 WARNING: issue-147.pdf: file is damaged 2 WARNING: issue-147.pdf: file is damaged
3 WARNING: issue-147.pdf: can't find startxref 3 WARNING: issue-147.pdf: can't find startxref
4 WARNING: issue-147.pdf: Attempting to reconstruct cross-reference table 4 WARNING: issue-147.pdf: Attempting to reconstruct cross-reference table
5 -WARNING: issue-147.pdf (trailer, offset 9): expected dictionary key but found non-name object; inserting key /QPDFFake1  
6 WARNING: issue-147.pdf: ignoring object with impossibly large id 62 5 WARNING: issue-147.pdf: ignoring object with impossibly large id 62
  6 +WARNING: issue-147.pdf (trailer, offset 9): expected dictionary key but found non-name object; inserting key /QPDFFake1
7 qpdf: issue-147.pdf: unable to find objects while recovering damaged file 7 qpdf: issue-147.pdf: unable to find objects while recovering damaged file