Commit db06e075ea8a7538c682194fee8a818ac09d807a

Authored by m-holger
1 parent a4db9b31

Move xref table initialisation from QPDF::parse to QPDF::Xref_table

libqpdf/QPDF.cc
... ... @@ -445,46 +445,8 @@ QPDF::parse(char const* password)
445 445 m->pdf_version = "1.2";
446 446 }
447 447  
448   - // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra
449   - // 30 characters to leave room for the startxref stuff.
450   - m->file->seek(0, SEEK_END);
451   - qpdf_offset_t end_offset = m->file->tell();
452   - m->xref_table.max_offset = end_offset;
453   - // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic
454   - // scenarios at least 3 bytes are required.
455   - if (m->xref_table.max_id > m->xref_table.max_offset / 3) {
456   - m->xref_table.max_id = static_cast<int>(m->xref_table.max_offset / 3);
457   - }
458   - qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);
459   - PatternFinder sf(*this, &QPDF::findStartxref);
460   - qpdf_offset_t xref_offset = 0;
461   - if (m->file->findLast("startxref", start_offset, 0, sf)) {
462   - xref_offset = QUtil::string_to_ll(readToken(*m->file).getValue().c_str());
463   - }
464   -
465   - try {
466   - if (xref_offset == 0) {
467   - QTC::TC("qpdf", "QPDF can't find startxref");
468   - throw damagedPDF("", 0, "can't find startxref");
469   - }
470   - try {
471   - m->xref_table.read(xref_offset);
472   - } catch (QPDFExc&) {
473   - throw;
474   - } catch (std::exception& e) {
475   - throw damagedPDF("", 0, std::string("error reading xref: ") + e.what());
476   - }
477   - } catch (QPDFExc& e) {
478   - if (m->attempt_recovery) {
479   - m->xref_table.reconstruct(e);
480   - QTC::TC("qpdf", "QPDF reconstructed xref table");
481   - } else {
482   - throw;
483   - }
484   - }
485   -
  448 + m->xref_table.initialize();
486 449 initializeEncryption();
487   - m->xref_table.parsed = true;
488 450 if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) {
489 451 // QPDFs created from JSON have an empty xref table and no root object yet.
490 452 throw damagedPDF("", 0, "unable to find page tree");
... ... @@ -526,6 +488,52 @@ QPDF::warn(
526 488 }
527 489  
528 490 void
  491 +QPDF::Xref_table::initialize()
  492 +{
  493 + auto* m = qpdf.m.get();
  494 +
  495 + // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra
  496 + // 30 characters to leave room for the startxref stuff.
  497 + m->file->seek(0, SEEK_END);
  498 + qpdf_offset_t end_offset = m->file->tell();
  499 + max_offset = end_offset;
  500 + // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic
  501 + // scenarios at least 3 bytes are required.
  502 + if (max_id > max_offset / 3) {
  503 + max_id = static_cast<int>(max_offset / 3);
  504 + }
  505 + qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);
  506 + PatternFinder sf(qpdf, &QPDF::findStartxref);
  507 + qpdf_offset_t xref_offset = 0;
  508 + if (m->file->findLast("startxref", start_offset, 0, sf)) {
  509 + xref_offset = QUtil::string_to_ll(qpdf.readToken(*m->file).getValue().c_str());
  510 + }
  511 +
  512 + try {
  513 + if (xref_offset == 0) {
  514 + QTC::TC("qpdf", "QPDF can't find startxref");
  515 + throw damaged_pdf("can't find startxref");
  516 + }
  517 + try {
  518 + read(xref_offset);
  519 + } catch (QPDFExc&) {
  520 + throw;
  521 + } catch (std::exception& e) {
  522 + throw damaged_pdf(std::string("error reading xref: ") + e.what());
  523 + }
  524 + } catch (QPDFExc& e) {
  525 + if (attempt_recovery) {
  526 + reconstruct(e);
  527 + QTC::TC("qpdf", "QPDF reconstructed xref table");
  528 + } else {
  529 + throw;
  530 + }
  531 + }
  532 +
  533 + parsed = true;
  534 +}
  535 +
  536 +void
529 537 QPDF::Xref_table::reconstruct(QPDFExc& e)
530 538 {
531 539 if (reconstructed) {
... ... @@ -739,8 +747,9 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset)
739 747 }
740 748 if ((size < 1) || (size - 1 != max_obj)) {
741 749 QTC::TC("qpdf", "QPDF xref size mismatch");
742   - warn_damaged("reported number of objects (" + std::to_string(size) +
743   - ") is not one plus the highest object number (" + std::to_string(max_obj) + ")");
  750 + warn_damaged(
  751 + "reported number of objects (" + std::to_string(size) +
  752 + ") is not one plus the highest object number (" + std::to_string(max_obj) + ")");
744 753 }
745 754  
746 755 // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -12,7 +12,7 @@ class QPDF::Xref_table: public std::map&lt;QPDFObjGen, QPDFXRefEntry&gt;
12 12 {
13 13 }
14 14  
15   - void read(qpdf_offset_t offset);
  15 + void initialize();
16 16 void reconstruct(QPDFExc& e);
17 17  
18 18 QPDFObjectHandle trailer;
... ... @@ -30,6 +30,8 @@ class QPDF::Xref_table: public std::map&lt;QPDFObjGen, QPDFXRefEntry&gt;
30 30 qpdf_offset_t first_item_offset{0}; // actual value from file
31 31  
32 32 private:
  33 + void read(qpdf_offset_t offset);
  34 +
33 35 // Methods to parse tables
34 36 qpdf_offset_t read_table(qpdf_offset_t offset);
35 37 bool parse_first(std::string const& line, int& obj, int& num, int& bytes);
... ...