Commit db06e075ea8a7538c682194fee8a818ac09d807a
1 parent
a4db9b31
Move xref table initialisation from QPDF::parse to QPDF::Xref_table
Showing
2 changed files
with
53 additions
and
42 deletions
libqpdf/QPDF.cc
| ... | ... | @@ -445,46 +445,8 @@ QPDF::parse(char const* password) |
| 445 | 445 | m->pdf_version = "1.2"; |
| 446 | 446 | } |
| 447 | 447 | |
| 448 | - // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra | |
| 449 | - // 30 characters to leave room for the startxref stuff. | |
| 450 | - m->file->seek(0, SEEK_END); | |
| 451 | - qpdf_offset_t end_offset = m->file->tell(); | |
| 452 | - m->xref_table.max_offset = end_offset; | |
| 453 | - // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic | |
| 454 | - // scenarios at least 3 bytes are required. | |
| 455 | - if (m->xref_table.max_id > m->xref_table.max_offset / 3) { | |
| 456 | - m->xref_table.max_id = static_cast<int>(m->xref_table.max_offset / 3); | |
| 457 | - } | |
| 458 | - qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); | |
| 459 | - PatternFinder sf(*this, &QPDF::findStartxref); | |
| 460 | - qpdf_offset_t xref_offset = 0; | |
| 461 | - if (m->file->findLast("startxref", start_offset, 0, sf)) { | |
| 462 | - xref_offset = QUtil::string_to_ll(readToken(*m->file).getValue().c_str()); | |
| 463 | - } | |
| 464 | - | |
| 465 | - try { | |
| 466 | - if (xref_offset == 0) { | |
| 467 | - QTC::TC("qpdf", "QPDF can't find startxref"); | |
| 468 | - throw damagedPDF("", 0, "can't find startxref"); | |
| 469 | - } | |
| 470 | - try { | |
| 471 | - m->xref_table.read(xref_offset); | |
| 472 | - } catch (QPDFExc&) { | |
| 473 | - throw; | |
| 474 | - } catch (std::exception& e) { | |
| 475 | - throw damagedPDF("", 0, std::string("error reading xref: ") + e.what()); | |
| 476 | - } | |
| 477 | - } catch (QPDFExc& e) { | |
| 478 | - if (m->attempt_recovery) { | |
| 479 | - m->xref_table.reconstruct(e); | |
| 480 | - QTC::TC("qpdf", "QPDF reconstructed xref table"); | |
| 481 | - } else { | |
| 482 | - throw; | |
| 483 | - } | |
| 484 | - } | |
| 485 | - | |
| 448 | + m->xref_table.initialize(); | |
| 486 | 449 | initializeEncryption(); |
| 487 | - m->xref_table.parsed = true; | |
| 488 | 450 | if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) { |
| 489 | 451 | // QPDFs created from JSON have an empty xref table and no root object yet. |
| 490 | 452 | throw damagedPDF("", 0, "unable to find page tree"); |
| ... | ... | @@ -526,6 +488,52 @@ QPDF::warn( |
| 526 | 488 | } |
| 527 | 489 | |
| 528 | 490 | void |
| 491 | +QPDF::Xref_table::initialize() | |
| 492 | +{ | |
| 493 | + auto* m = qpdf.m.get(); | |
| 494 | + | |
| 495 | + // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra | |
| 496 | + // 30 characters to leave room for the startxref stuff. | |
| 497 | + m->file->seek(0, SEEK_END); | |
| 498 | + qpdf_offset_t end_offset = m->file->tell(); | |
| 499 | + max_offset = end_offset; | |
| 500 | + // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic | |
| 501 | + // scenarios at least 3 bytes are required. | |
| 502 | + if (max_id > max_offset / 3) { | |
| 503 | + max_id = static_cast<int>(max_offset / 3); | |
| 504 | + } | |
| 505 | + qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); | |
| 506 | + PatternFinder sf(qpdf, &QPDF::findStartxref); | |
| 507 | + qpdf_offset_t xref_offset = 0; | |
| 508 | + if (m->file->findLast("startxref", start_offset, 0, sf)) { | |
| 509 | + xref_offset = QUtil::string_to_ll(qpdf.readToken(*m->file).getValue().c_str()); | |
| 510 | + } | |
| 511 | + | |
| 512 | + try { | |
| 513 | + if (xref_offset == 0) { | |
| 514 | + QTC::TC("qpdf", "QPDF can't find startxref"); | |
| 515 | + throw damaged_pdf("can't find startxref"); | |
| 516 | + } | |
| 517 | + try { | |
| 518 | + read(xref_offset); | |
| 519 | + } catch (QPDFExc&) { | |
| 520 | + throw; | |
| 521 | + } catch (std::exception& e) { | |
| 522 | + throw damaged_pdf(std::string("error reading xref: ") + e.what()); | |
| 523 | + } | |
| 524 | + } catch (QPDFExc& e) { | |
| 525 | + if (attempt_recovery) { | |
| 526 | + reconstruct(e); | |
| 527 | + QTC::TC("qpdf", "QPDF reconstructed xref table"); | |
| 528 | + } else { | |
| 529 | + throw; | |
| 530 | + } | |
| 531 | + } | |
| 532 | + | |
| 533 | + parsed = true; | |
| 534 | +} | |
| 535 | + | |
| 536 | +void | |
| 529 | 537 | QPDF::Xref_table::reconstruct(QPDFExc& e) |
| 530 | 538 | { |
| 531 | 539 | if (reconstructed) { |
| ... | ... | @@ -739,8 +747,9 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset) |
| 739 | 747 | } |
| 740 | 748 | if ((size < 1) || (size - 1 != max_obj)) { |
| 741 | 749 | QTC::TC("qpdf", "QPDF xref size mismatch"); |
| 742 | - warn_damaged("reported number of objects (" + std::to_string(size) + | |
| 743 | - ") is not one plus the highest object number (" + std::to_string(max_obj) + ")"); | |
| 750 | + warn_damaged( | |
| 751 | + "reported number of objects (" + std::to_string(size) + | |
| 752 | + ") is not one plus the highest object number (" + std::to_string(max_obj) + ")"); | |
| 744 | 753 | } |
| 745 | 754 | |
| 746 | 755 | // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we | ... | ... |
libqpdf/qpdf/QPDF_private.hh
| ... | ... | @@ -12,7 +12,7 @@ class QPDF::Xref_table: public std::map<QPDFObjGen, QPDFXRefEntry> |
| 12 | 12 | { |
| 13 | 13 | } |
| 14 | 14 | |
| 15 | - void read(qpdf_offset_t offset); | |
| 15 | + void initialize(); | |
| 16 | 16 | void reconstruct(QPDFExc& e); |
| 17 | 17 | |
| 18 | 18 | QPDFObjectHandle trailer; |
| ... | ... | @@ -30,6 +30,8 @@ class QPDF::Xref_table: public std::map<QPDFObjGen, QPDFXRefEntry> |
| 30 | 30 | qpdf_offset_t first_item_offset{0}; // actual value from file |
| 31 | 31 | |
| 32 | 32 | private: |
| 33 | + void read(qpdf_offset_t offset); | |
| 34 | + | |
| 33 | 35 | // Methods to parse tables |
| 34 | 36 | qpdf_offset_t read_table(qpdf_offset_t offset); |
| 35 | 37 | bool parse_first(std::string const& line, int& obj, int& num, int& bytes); | ... | ... |