Commit db06e075ea8a7538c682194fee8a818ac09d807a

Authored by m-holger
1 parent a4db9b31

Move xref table initialisation from QPDF::parse to QPDF::Xref_table

libqpdf/QPDF.cc
@@ -445,46 +445,8 @@ QPDF::parse(char const* password) @@ -445,46 +445,8 @@ QPDF::parse(char const* password)
445 m->pdf_version = "1.2"; 445 m->pdf_version = "1.2";
446 } 446 }
447 447
448 - // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra  
449 - // 30 characters to leave room for the startxref stuff.  
450 - m->file->seek(0, SEEK_END);  
451 - qpdf_offset_t end_offset = m->file->tell();  
452 - m->xref_table.max_offset = end_offset;  
453 - // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic  
454 - // scenarios at least 3 bytes are required.  
455 - if (m->xref_table.max_id > m->xref_table.max_offset / 3) {  
456 - m->xref_table.max_id = static_cast<int>(m->xref_table.max_offset / 3);  
457 - }  
458 - qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);  
459 - PatternFinder sf(*this, &QPDF::findStartxref);  
460 - qpdf_offset_t xref_offset = 0;  
461 - if (m->file->findLast("startxref", start_offset, 0, sf)) {  
462 - xref_offset = QUtil::string_to_ll(readToken(*m->file).getValue().c_str());  
463 - }  
464 -  
465 - try {  
466 - if (xref_offset == 0) {  
467 - QTC::TC("qpdf", "QPDF can't find startxref");  
468 - throw damagedPDF("", 0, "can't find startxref");  
469 - }  
470 - try {  
471 - m->xref_table.read(xref_offset);  
472 - } catch (QPDFExc&) {  
473 - throw;  
474 - } catch (std::exception& e) {  
475 - throw damagedPDF("", 0, std::string("error reading xref: ") + e.what());  
476 - }  
477 - } catch (QPDFExc& e) {  
478 - if (m->attempt_recovery) {  
479 - m->xref_table.reconstruct(e);  
480 - QTC::TC("qpdf", "QPDF reconstructed xref table");  
481 - } else {  
482 - throw;  
483 - }  
484 - }  
485 - 448 + m->xref_table.initialize();
486 initializeEncryption(); 449 initializeEncryption();
487 - m->xref_table.parsed = true;  
488 if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) { 450 if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) {
489 // QPDFs created from JSON have an empty xref table and no root object yet. 451 // QPDFs created from JSON have an empty xref table and no root object yet.
490 throw damagedPDF("", 0, "unable to find page tree"); 452 throw damagedPDF("", 0, "unable to find page tree");
@@ -526,6 +488,52 @@ QPDF::warn( @@ -526,6 +488,52 @@ QPDF::warn(
526 } 488 }
527 489
528 void 490 void
  491 +QPDF::Xref_table::initialize()
  492 +{
  493 + auto* m = qpdf.m.get();
  494 +
  495 + // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra
  496 + // 30 characters to leave room for the startxref stuff.
  497 + m->file->seek(0, SEEK_END);
  498 + qpdf_offset_t end_offset = m->file->tell();
  499 + max_offset = end_offset;
  500 + // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic
  501 + // scenarios at least 3 bytes are required.
  502 + if (max_id > max_offset / 3) {
  503 + max_id = static_cast<int>(max_offset / 3);
  504 + }
  505 + qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);
  506 + PatternFinder sf(qpdf, &QPDF::findStartxref);
  507 + qpdf_offset_t xref_offset = 0;
  508 + if (m->file->findLast("startxref", start_offset, 0, sf)) {
  509 + xref_offset = QUtil::string_to_ll(qpdf.readToken(*m->file).getValue().c_str());
  510 + }
  511 +
  512 + try {
  513 + if (xref_offset == 0) {
  514 + QTC::TC("qpdf", "QPDF can't find startxref");
  515 + throw damaged_pdf("can't find startxref");
  516 + }
  517 + try {
  518 + read(xref_offset);
  519 + } catch (QPDFExc&) {
  520 + throw;
  521 + } catch (std::exception& e) {
  522 + throw damaged_pdf(std::string("error reading xref: ") + e.what());
  523 + }
  524 + } catch (QPDFExc& e) {
  525 + if (attempt_recovery) {
  526 + reconstruct(e);
  527 + QTC::TC("qpdf", "QPDF reconstructed xref table");
  528 + } else {
  529 + throw;
  530 + }
  531 + }
  532 +
  533 + parsed = true;
  534 +}
  535 +
  536 +void
529 QPDF::Xref_table::reconstruct(QPDFExc& e) 537 QPDF::Xref_table::reconstruct(QPDFExc& e)
530 { 538 {
531 if (reconstructed) { 539 if (reconstructed) {
@@ -739,8 +747,9 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset) @@ -739,8 +747,9 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset)
739 } 747 }
740 if ((size < 1) || (size - 1 != max_obj)) { 748 if ((size < 1) || (size - 1 != max_obj)) {
741 QTC::TC("qpdf", "QPDF xref size mismatch"); 749 QTC::TC("qpdf", "QPDF xref size mismatch");
742 - warn_damaged("reported number of objects (" + std::to_string(size) +  
743 - ") is not one plus the highest object number (" + std::to_string(max_obj) + ")"); 750 + warn_damaged(
  751 + "reported number of objects (" + std::to_string(size) +
  752 + ") is not one plus the highest object number (" + std::to_string(max_obj) + ")");
744 } 753 }
745 754
746 // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we 755 // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we
libqpdf/qpdf/QPDF_private.hh
@@ -12,7 +12,7 @@ class QPDF::Xref_table: public std::map&lt;QPDFObjGen, QPDFXRefEntry&gt; @@ -12,7 +12,7 @@ class QPDF::Xref_table: public std::map&lt;QPDFObjGen, QPDFXRefEntry&gt;
12 { 12 {
13 } 13 }
14 14
15 - void read(qpdf_offset_t offset); 15 + void initialize();
16 void reconstruct(QPDFExc& e); 16 void reconstruct(QPDFExc& e);
17 17
18 QPDFObjectHandle trailer; 18 QPDFObjectHandle trailer;
@@ -30,6 +30,8 @@ class QPDF::Xref_table: public std::map&lt;QPDFObjGen, QPDFXRefEntry&gt; @@ -30,6 +30,8 @@ class QPDF::Xref_table: public std::map&lt;QPDFObjGen, QPDFXRefEntry&gt;
30 qpdf_offset_t first_item_offset{0}; // actual value from file 30 qpdf_offset_t first_item_offset{0}; // actual value from file
31 31
32 private: 32 private:
  33 + void read(qpdf_offset_t offset);
  34 +
33 // Methods to parse tables 35 // Methods to parse tables
34 qpdf_offset_t read_table(qpdf_offset_t offset); 36 qpdf_offset_t read_table(qpdf_offset_t offset);
35 bool parse_first(std::string const& line, int& obj, int& num, int& bytes); 37 bool parse_first(std::string const& line, int& obj, int& num, int& bytes);