Commit db06e075ea8a7538c682194fee8a818ac09d807a
1 parent
a4db9b31
Move xref table initialisation from QPDF::parse to QPDF::Xref_table
Showing
2 changed files
with
53 additions
and
42 deletions
libqpdf/QPDF.cc
| @@ -445,46 +445,8 @@ QPDF::parse(char const* password) | @@ -445,46 +445,8 @@ QPDF::parse(char const* password) | ||
| 445 | m->pdf_version = "1.2"; | 445 | m->pdf_version = "1.2"; |
| 446 | } | 446 | } |
| 447 | 447 | ||
| 448 | - // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra | ||
| 449 | - // 30 characters to leave room for the startxref stuff. | ||
| 450 | - m->file->seek(0, SEEK_END); | ||
| 451 | - qpdf_offset_t end_offset = m->file->tell(); | ||
| 452 | - m->xref_table.max_offset = end_offset; | ||
| 453 | - // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic | ||
| 454 | - // scenarios at least 3 bytes are required. | ||
| 455 | - if (m->xref_table.max_id > m->xref_table.max_offset / 3) { | ||
| 456 | - m->xref_table.max_id = static_cast<int>(m->xref_table.max_offset / 3); | ||
| 457 | - } | ||
| 458 | - qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); | ||
| 459 | - PatternFinder sf(*this, &QPDF::findStartxref); | ||
| 460 | - qpdf_offset_t xref_offset = 0; | ||
| 461 | - if (m->file->findLast("startxref", start_offset, 0, sf)) { | ||
| 462 | - xref_offset = QUtil::string_to_ll(readToken(*m->file).getValue().c_str()); | ||
| 463 | - } | ||
| 464 | - | ||
| 465 | - try { | ||
| 466 | - if (xref_offset == 0) { | ||
| 467 | - QTC::TC("qpdf", "QPDF can't find startxref"); | ||
| 468 | - throw damagedPDF("", 0, "can't find startxref"); | ||
| 469 | - } | ||
| 470 | - try { | ||
| 471 | - m->xref_table.read(xref_offset); | ||
| 472 | - } catch (QPDFExc&) { | ||
| 473 | - throw; | ||
| 474 | - } catch (std::exception& e) { | ||
| 475 | - throw damagedPDF("", 0, std::string("error reading xref: ") + e.what()); | ||
| 476 | - } | ||
| 477 | - } catch (QPDFExc& e) { | ||
| 478 | - if (m->attempt_recovery) { | ||
| 479 | - m->xref_table.reconstruct(e); | ||
| 480 | - QTC::TC("qpdf", "QPDF reconstructed xref table"); | ||
| 481 | - } else { | ||
| 482 | - throw; | ||
| 483 | - } | ||
| 484 | - } | ||
| 485 | - | 448 | + m->xref_table.initialize(); |
| 486 | initializeEncryption(); | 449 | initializeEncryption(); |
| 487 | - m->xref_table.parsed = true; | ||
| 488 | if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) { | 450 | if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) { |
| 489 | // QPDFs created from JSON have an empty xref table and no root object yet. | 451 | // QPDFs created from JSON have an empty xref table and no root object yet. |
| 490 | throw damagedPDF("", 0, "unable to find page tree"); | 452 | throw damagedPDF("", 0, "unable to find page tree"); |
| @@ -526,6 +488,52 @@ QPDF::warn( | @@ -526,6 +488,52 @@ QPDF::warn( | ||
| 526 | } | 488 | } |
| 527 | 489 | ||
| 528 | void | 490 | void |
| 491 | +QPDF::Xref_table::initialize() | ||
| 492 | +{ | ||
| 493 | + auto* m = qpdf.m.get(); | ||
| 494 | + | ||
| 495 | + // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra | ||
| 496 | + // 30 characters to leave room for the startxref stuff. | ||
| 497 | + m->file->seek(0, SEEK_END); | ||
| 498 | + qpdf_offset_t end_offset = m->file->tell(); | ||
| 499 | + max_offset = end_offset; | ||
| 500 | + // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic | ||
| 501 | + // scenarios at least 3 bytes are required. | ||
| 502 | + if (max_id > max_offset / 3) { | ||
| 503 | + max_id = static_cast<int>(max_offset / 3); | ||
| 504 | + } | ||
| 505 | + qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); | ||
| 506 | + PatternFinder sf(qpdf, &QPDF::findStartxref); | ||
| 507 | + qpdf_offset_t xref_offset = 0; | ||
| 508 | + if (m->file->findLast("startxref", start_offset, 0, sf)) { | ||
| 509 | + xref_offset = QUtil::string_to_ll(qpdf.readToken(*m->file).getValue().c_str()); | ||
| 510 | + } | ||
| 511 | + | ||
| 512 | + try { | ||
| 513 | + if (xref_offset == 0) { | ||
| 514 | + QTC::TC("qpdf", "QPDF can't find startxref"); | ||
| 515 | + throw damaged_pdf("can't find startxref"); | ||
| 516 | + } | ||
| 517 | + try { | ||
| 518 | + read(xref_offset); | ||
| 519 | + } catch (QPDFExc&) { | ||
| 520 | + throw; | ||
| 521 | + } catch (std::exception& e) { | ||
| 522 | + throw damaged_pdf(std::string("error reading xref: ") + e.what()); | ||
| 523 | + } | ||
| 524 | + } catch (QPDFExc& e) { | ||
| 525 | + if (attempt_recovery) { | ||
| 526 | + reconstruct(e); | ||
| 527 | + QTC::TC("qpdf", "QPDF reconstructed xref table"); | ||
| 528 | + } else { | ||
| 529 | + throw; | ||
| 530 | + } | ||
| 531 | + } | ||
| 532 | + | ||
| 533 | + parsed = true; | ||
| 534 | +} | ||
| 535 | + | ||
| 536 | +void | ||
| 529 | QPDF::Xref_table::reconstruct(QPDFExc& e) | 537 | QPDF::Xref_table::reconstruct(QPDFExc& e) |
| 530 | { | 538 | { |
| 531 | if (reconstructed) { | 539 | if (reconstructed) { |
| @@ -739,8 +747,9 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset) | @@ -739,8 +747,9 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset) | ||
| 739 | } | 747 | } |
| 740 | if ((size < 1) || (size - 1 != max_obj)) { | 748 | if ((size < 1) || (size - 1 != max_obj)) { |
| 741 | QTC::TC("qpdf", "QPDF xref size mismatch"); | 749 | QTC::TC("qpdf", "QPDF xref size mismatch"); |
| 742 | - warn_damaged("reported number of objects (" + std::to_string(size) + | ||
| 743 | - ") is not one plus the highest object number (" + std::to_string(max_obj) + ")"); | 750 | + warn_damaged( |
| 751 | + "reported number of objects (" + std::to_string(size) + | ||
| 752 | + ") is not one plus the highest object number (" + std::to_string(max_obj) + ")"); | ||
| 744 | } | 753 | } |
| 745 | 754 | ||
| 746 | // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we | 755 | // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we |
libqpdf/qpdf/QPDF_private.hh
| @@ -12,7 +12,7 @@ class QPDF::Xref_table: public std::map<QPDFObjGen, QPDFXRefEntry> | @@ -12,7 +12,7 @@ class QPDF::Xref_table: public std::map<QPDFObjGen, QPDFXRefEntry> | ||
| 12 | { | 12 | { |
| 13 | } | 13 | } |
| 14 | 14 | ||
| 15 | - void read(qpdf_offset_t offset); | 15 | + void initialize(); |
| 16 | void reconstruct(QPDFExc& e); | 16 | void reconstruct(QPDFExc& e); |
| 17 | 17 | ||
| 18 | QPDFObjectHandle trailer; | 18 | QPDFObjectHandle trailer; |
| @@ -30,6 +30,8 @@ class QPDF::Xref_table: public std::map<QPDFObjGen, QPDFXRefEntry> | @@ -30,6 +30,8 @@ class QPDF::Xref_table: public std::map<QPDFObjGen, QPDFXRefEntry> | ||
| 30 | qpdf_offset_t first_item_offset{0}; // actual value from file | 30 | qpdf_offset_t first_item_offset{0}; // actual value from file |
| 31 | 31 | ||
| 32 | private: | 32 | private: |
| 33 | + void read(qpdf_offset_t offset); | ||
| 34 | + | ||
| 33 | // Methods to parse tables | 35 | // Methods to parse tables |
| 34 | qpdf_offset_t read_table(qpdf_offset_t offset); | 36 | qpdf_offset_t read_table(qpdf_offset_t offset); |
| 35 | bool parse_first(std::string const& line, int& obj, int& num, int& bytes); | 37 | bool parse_first(std::string const& line, int& obj, int& num, int& bytes); |