Commit 5fc257f0f75bc4554c827dc3d273c11e6a2611fa
1 parent
d64b1491
Add QPDF::Xref_table methods type, offset, stream_number and stream_index
Showing
3 changed files
with
74 additions
and
54 deletions
libqpdf/QPDF.cc
| @@ -1769,20 +1769,17 @@ QPDF::readObjectAtOffset( | @@ -1769,20 +1769,17 @@ QPDF::readObjectAtOffset( | ||
| 1769 | if (try_recovery) { | 1769 | if (try_recovery) { |
| 1770 | // Try again after reconstructing xref table | 1770 | // Try again after reconstructing xref table |
| 1771 | m->xref_table.reconstruct(e); | 1771 | m->xref_table.reconstruct(e); |
| 1772 | - if (m->xref_table.count(exp_og) && (m->xref_table[exp_og].getType() == 1)) { | ||
| 1773 | - qpdf_offset_t new_offset = m->xref_table[exp_og].getOffset(); | ||
| 1774 | - QPDFObjectHandle result = | ||
| 1775 | - readObjectAtOffset(false, new_offset, description, exp_og, og, false); | 1772 | + if (m->xref_table.type(exp_og) == 1) { |
| 1776 | QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset"); | 1773 | QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset"); |
| 1777 | - return result; | 1774 | + return readObjectAtOffset( |
| 1775 | + false, m->xref_table.offset(exp_og), description, exp_og, og, false); | ||
| 1778 | } else { | 1776 | } else { |
| 1779 | QTC::TC("qpdf", "QPDF object gone after xref reconstruction"); | 1777 | QTC::TC("qpdf", "QPDF object gone after xref reconstruction"); |
| 1780 | warn(damagedPDF( | 1778 | warn(damagedPDF( |
| 1781 | "", | 1779 | "", |
| 1782 | 0, | 1780 | 0, |
| 1783 | ("object " + exp_og.unparse(' ') + | 1781 | ("object " + exp_og.unparse(' ') + |
| 1784 | - " not found in file after regenerating cross reference " | ||
| 1785 | - "table"))); | 1782 | + " not found in file after regenerating cross reference table"))); |
| 1786 | return QPDFObjectHandle::newNull(); | 1783 | return QPDFObjectHandle::newNull(); |
| 1787 | } | 1784 | } |
| 1788 | } else { | 1785 | } else { |
| @@ -1815,7 +1812,7 @@ QPDF::readObjectAtOffset( | @@ -1815,7 +1812,7 @@ QPDF::readObjectAtOffset( | ||
| 1815 | } | 1812 | } |
| 1816 | } | 1813 | } |
| 1817 | qpdf_offset_t end_after_space = m->file->tell(); | 1814 | qpdf_offset_t end_after_space = m->file->tell(); |
| 1818 | - if (skip_cache_if_in_xref && m->xref_table.count(og)) { | 1815 | + if (skip_cache_if_in_xref && m->xref_table.type(og)) { |
| 1819 | // Ordinarily, an object gets read here when resolved through xref table or stream. In | 1816 | // Ordinarily, an object gets read here when resolved through xref table or stream. In |
| 1820 | // the special case of the xref stream and linearization hint tables, the offset comes | 1817 | // the special case of the xref stream and linearization hint tables, the offset comes |
| 1821 | // from another source. For the specific case of xref streams, the xref stream is read | 1818 | // from another source. For the specific case of xref streams, the xref stream is read |
| @@ -1867,33 +1864,32 @@ QPDF::resolve(QPDFObjGen og) | @@ -1867,33 +1864,32 @@ QPDF::resolve(QPDFObjGen og) | ||
| 1867 | } | 1864 | } |
| 1868 | ResolveRecorder rr(this, og); | 1865 | ResolveRecorder rr(this, og); |
| 1869 | 1866 | ||
| 1870 | - if (m->xref_table.count(og) != 0) { | ||
| 1871 | - QPDFXRefEntry const& entry = m->xref_table[og]; | ||
| 1872 | - try { | ||
| 1873 | - switch (entry.getType()) { | ||
| 1874 | - case 1: | ||
| 1875 | - { | ||
| 1876 | - qpdf_offset_t offset = entry.getOffset(); | ||
| 1877 | - // Object stored in cache by readObjectAtOffset | ||
| 1878 | - QPDFObjGen a_og; | ||
| 1879 | - QPDFObjectHandle oh = readObjectAtOffset(true, offset, "", og, a_og, false); | ||
| 1880 | - } | ||
| 1881 | - break; | 1867 | + try { |
| 1868 | + switch (m->xref_table.type(og)) { | ||
| 1869 | + case 0: | ||
| 1870 | + break; | ||
| 1871 | + case 1: | ||
| 1872 | + { | ||
| 1873 | + // Object stored in cache by readObjectAtOffset | ||
| 1874 | + QPDFObjGen a_og; | ||
| 1875 | + QPDFObjectHandle oh = | ||
| 1876 | + readObjectAtOffset(true, m->xref_table.offset(og), "", og, a_og, false); | ||
| 1877 | + } | ||
| 1878 | + break; | ||
| 1882 | 1879 | ||
| 1883 | - case 2: | ||
| 1884 | - resolveObjectsInStream(entry.getObjStreamNumber()); | ||
| 1885 | - break; | 1880 | + case 2: |
| 1881 | + resolveObjectsInStream(m->xref_table.stream_number(og.getObj())); | ||
| 1882 | + break; | ||
| 1886 | 1883 | ||
| 1887 | - default: | ||
| 1888 | - throw damagedPDF( | ||
| 1889 | - "", 0, ("object " + og.unparse('/') + " has unexpected xref entry type")); | ||
| 1890 | - } | ||
| 1891 | - } catch (QPDFExc& e) { | ||
| 1892 | - warn(e); | ||
| 1893 | - } catch (std::exception& e) { | ||
| 1894 | - warn(damagedPDF( | ||
| 1895 | - "", 0, ("object " + og.unparse('/') + ": error reading object: " + e.what()))); | 1884 | + default: |
| 1885 | + throw damagedPDF( | ||
| 1886 | + "", 0, ("object " + og.unparse('/') + " has unexpected xref entry type")); | ||
| 1896 | } | 1887 | } |
| 1888 | + } catch (QPDFExc& e) { | ||
| 1889 | + warn(e); | ||
| 1890 | + } catch (std::exception& e) { | ||
| 1891 | + warn(damagedPDF( | ||
| 1892 | + "", 0, ("object " + og.unparse('/') + ": error reading object: " + e.what()))); | ||
| 1897 | } | 1893 | } |
| 1898 | 1894 | ||
| 1899 | if (isUnresolved(og)) { | 1895 | if (isUnresolved(og)) { |
| @@ -2107,7 +2103,7 @@ QPDF::getObjectForParser(int id, int gen, bool parse_pdf) | @@ -2107,7 +2103,7 @@ QPDF::getObjectForParser(int id, int gen, bool parse_pdf) | ||
| 2107 | if (auto iter = m->obj_cache.find(og); iter != m->obj_cache.end()) { | 2103 | if (auto iter = m->obj_cache.find(og); iter != m->obj_cache.end()) { |
| 2108 | return iter->second.object; | 2104 | return iter->second.object; |
| 2109 | } | 2105 | } |
| 2110 | - if (m->xref_table.count(og) || !m->xref_table.parsed) { | 2106 | + if (m->xref_table.type(og) || !m->xref_table.parsed) { |
| 2111 | return m->obj_cache.insert({og, QPDF_Unresolved::create(this, og)}).first->second.object; | 2107 | return m->obj_cache.insert({og, QPDF_Unresolved::create(this, og)}).first->second.object; |
| 2112 | } | 2108 | } |
| 2113 | if (parse_pdf) { | 2109 | if (parse_pdf) { |
| @@ -2123,9 +2119,8 @@ QPDF::getObjectForJSON(int id, int gen) | @@ -2123,9 +2119,8 @@ QPDF::getObjectForJSON(int id, int gen) | ||
| 2123 | auto [it, inserted] = m->obj_cache.try_emplace(og); | 2119 | auto [it, inserted] = m->obj_cache.try_emplace(og); |
| 2124 | auto& obj = it->second.object; | 2120 | auto& obj = it->second.object; |
| 2125 | if (inserted) { | 2121 | if (inserted) { |
| 2126 | - obj = (m->xref_table.parsed && !m->xref_table.count(og)) | ||
| 2127 | - ? QPDF_Null::create(this, og) | ||
| 2128 | - : QPDF_Unresolved::create(this, og); | 2122 | + obj = (m->xref_table.parsed && !m->xref_table.type(og)) ? QPDF_Null::create(this, og) |
| 2123 | + : QPDF_Unresolved::create(this, og); | ||
| 2129 | } | 2124 | } |
| 2130 | return obj; | 2125 | return obj; |
| 2131 | } | 2126 | } |
| @@ -2135,7 +2130,7 @@ QPDF::getObject(QPDFObjGen const& og) | @@ -2135,7 +2130,7 @@ QPDF::getObject(QPDFObjGen const& og) | ||
| 2135 | { | 2130 | { |
| 2136 | if (auto it = m->obj_cache.find(og); it != m->obj_cache.end()) { | 2131 | if (auto it = m->obj_cache.find(og); it != m->obj_cache.end()) { |
| 2137 | return {it->second.object}; | 2132 | return {it->second.object}; |
| 2138 | - } else if (m->xref_table.parsed && !m->xref_table.count(og)) { | 2133 | + } else if (m->xref_table.parsed && !m->xref_table.type(og)) { |
| 2139 | return QPDF_Null::create(); | 2134 | return QPDF_Null::create(); |
| 2140 | } else { | 2135 | } else { |
| 2141 | auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og), -1, -1); | 2136 | auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og), -1, -1); |
libqpdf/QPDF_linearization.cc
| @@ -445,7 +445,7 @@ QPDF::checkLinearizationInternal() | @@ -445,7 +445,7 @@ QPDF::checkLinearizationInternal() | ||
| 445 | for (size_t i = 0; i < toS(npages); ++i) { | 445 | for (size_t i = 0; i < toS(npages); ++i) { |
| 446 | QPDFObjectHandle const& page = pages.at(i); | 446 | QPDFObjectHandle const& page = pages.at(i); |
| 447 | QPDFObjGen og(page.getObjGen()); | 447 | QPDFObjGen og(page.getObjGen()); |
| 448 | - if (m->xref_table[og].getType() == 2) { | 448 | + if (m->xref_table.type(og) == 2) { |
| 449 | linearizationWarning( | 449 | linearizationWarning( |
| 450 | "page dictionary for page " + std::to_string(i) + " is compressed"); | 450 | "page dictionary for page " + std::to_string(i) + " is compressed"); |
| 451 | } | 451 | } |
| @@ -556,23 +556,18 @@ QPDF::maxEnd(ObjUser const& ou) | @@ -556,23 +556,18 @@ QPDF::maxEnd(ObjUser const& ou) | ||
| 556 | qpdf_offset_t | 556 | qpdf_offset_t |
| 557 | QPDF::getLinearizationOffset(QPDFObjGen const& og) | 557 | QPDF::getLinearizationOffset(QPDFObjGen const& og) |
| 558 | { | 558 | { |
| 559 | - QPDFXRefEntry entry = m->xref_table[og]; | ||
| 560 | - qpdf_offset_t result = 0; | ||
| 561 | - switch (entry.getType()) { | 559 | + switch (m->xref_table.type(og)) { |
| 562 | case 1: | 560 | case 1: |
| 563 | - result = entry.getOffset(); | ||
| 564 | - break; | 561 | + return m->xref_table.offset(og); |
| 565 | 562 | ||
| 566 | case 2: | 563 | case 2: |
| 567 | // For compressed objects, return the offset of the object stream that contains them. | 564 | // For compressed objects, return the offset of the object stream that contains them. |
| 568 | - result = getLinearizationOffset(QPDFObjGen(entry.getObjStreamNumber(), 0)); | ||
| 569 | - break; | 565 | + return getLinearizationOffset(QPDFObjGen(m->xref_table.stream_number(og.getObj()), 0)); |
| 570 | 566 | ||
| 571 | default: | 567 | default: |
| 572 | stopOnError("getLinearizationOffset called for xref entry not of type 1 or 2"); | 568 | stopOnError("getLinearizationOffset called for xref entry not of type 1 or 2"); |
| 573 | - break; | 569 | + return 0; // unreachable |
| 574 | } | 570 | } |
| 575 | - return result; | ||
| 576 | } | 571 | } |
| 577 | 572 | ||
| 578 | QPDFObjectHandle | 573 | QPDFObjectHandle |
| @@ -603,7 +598,7 @@ QPDF::lengthNextN(int first_object, int n) | @@ -603,7 +598,7 @@ QPDF::lengthNextN(int first_object, int n) | ||
| 603 | int length = 0; | 598 | int length = 0; |
| 604 | for (int i = 0; i < n; ++i) { | 599 | for (int i = 0; i < n; ++i) { |
| 605 | QPDFObjGen og(first_object + i, 0); | 600 | QPDFObjGen og(first_object + i, 0); |
| 606 | - if (m->xref_table.count(og) == 0) { | 601 | + if (m->xref_table.type(og) == 0) { |
| 607 | linearizationWarning( | 602 | linearizationWarning( |
| 608 | "no xref table entry for " + std::to_string(first_object + i) + " 0"); | 603 | "no xref table entry for " + std::to_string(first_object + i) + " 0"); |
| 609 | } else { | 604 | } else { |
| @@ -635,7 +630,7 @@ QPDF::checkHPageOffset( | @@ -635,7 +630,7 @@ QPDF::checkHPageOffset( | ||
| 635 | int npages = toI(pages.size()); | 630 | int npages = toI(pages.size()); |
| 636 | qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset); | 631 | qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset); |
| 637 | QPDFObjGen first_page_og(pages.at(0).getObjGen()); | 632 | QPDFObjGen first_page_og(pages.at(0).getObjGen()); |
| 638 | - if (m->xref_table.count(first_page_og) == 0) { | 633 | + if (m->xref_table.type(first_page_og) == 0) { |
| 639 | stopOnError("supposed first page object is not known"); | 634 | stopOnError("supposed first page object is not known"); |
| 640 | } | 635 | } |
| 641 | qpdf_offset_t offset = getLinearizationOffset(first_page_og); | 636 | qpdf_offset_t offset = getLinearizationOffset(first_page_og); |
| @@ -646,7 +641,7 @@ QPDF::checkHPageOffset( | @@ -646,7 +641,7 @@ QPDF::checkHPageOffset( | ||
| 646 | for (int pageno = 0; pageno < npages; ++pageno) { | 641 | for (int pageno = 0; pageno < npages; ++pageno) { |
| 647 | QPDFObjGen page_og(pages.at(toS(pageno)).getObjGen()); | 642 | QPDFObjGen page_og(pages.at(toS(pageno)).getObjGen()); |
| 648 | int first_object = page_og.getObj(); | 643 | int first_object = page_og.getObj(); |
| 649 | - if (m->xref_table.count(page_og) == 0) { | 644 | + if (m->xref_table.type(page_og) == 0) { |
| 650 | stopOnError("unknown object in page offset hint table"); | 645 | stopOnError("unknown object in page offset hint table"); |
| 651 | } | 646 | } |
| 652 | offset = getLinearizationOffset(page_og); | 647 | offset = getLinearizationOffset(page_og); |
| @@ -768,7 +763,7 @@ QPDF::checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<in | @@ -768,7 +763,7 @@ QPDF::checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<in | ||
| 768 | cur_object = so.first_shared_obj; | 763 | cur_object = so.first_shared_obj; |
| 769 | 764 | ||
| 770 | QPDFObjGen og(cur_object, 0); | 765 | QPDFObjGen og(cur_object, 0); |
| 771 | - if (m->xref_table.count(og) == 0) { | 766 | + if (m->xref_table.type(og) == 0) { |
| 772 | stopOnError("unknown object in shared object hint table"); | 767 | stopOnError("unknown object in shared object hint table"); |
| 773 | } | 768 | } |
| 774 | qpdf_offset_t offset = getLinearizationOffset(og); | 769 | qpdf_offset_t offset = getLinearizationOffset(og); |
| @@ -819,7 +814,7 @@ QPDF::checkHOutlines() | @@ -819,7 +814,7 @@ QPDF::checkHOutlines() | ||
| 819 | return; | 814 | return; |
| 820 | } | 815 | } |
| 821 | QPDFObjGen og(outlines.getObjGen()); | 816 | QPDFObjGen og(outlines.getObjGen()); |
| 822 | - if (m->xref_table.count(og) == 0) { | 817 | + if (m->xref_table.type(og) == 0) { |
| 823 | stopOnError("unknown object in outlines hint table"); | 818 | stopOnError("unknown object in outlines hint table"); |
| 824 | } | 819 | } |
| 825 | qpdf_offset_t offset = getLinearizationOffset(og); | 820 | qpdf_offset_t offset = getLinearizationOffset(og); |
| @@ -838,8 +833,7 @@ QPDF::checkHOutlines() | @@ -838,8 +833,7 @@ QPDF::checkHOutlines() | ||
| 838 | std::to_string(table_length) + "; computed = " + std::to_string(length)); | 833 | std::to_string(table_length) + "; computed = " + std::to_string(length)); |
| 839 | } | 834 | } |
| 840 | } else { | 835 | } else { |
| 841 | - linearizationWarning("incorrect first object number in outline " | ||
| 842 | - "hints table."); | 836 | + linearizationWarning("incorrect first object number in outline hints table."); |
| 843 | } | 837 | } |
| 844 | } else { | 838 | } else { |
| 845 | linearizationWarning("incorrect object count in outline hint table"); | 839 | linearizationWarning("incorrect object count in outline hint table"); |
libqpdf/qpdf/QPDF_private.hh
| @@ -19,6 +19,37 @@ class QPDF::Xref_table: public std::map<QPDFObjGen, QPDFXRefEntry> | @@ -19,6 +19,37 @@ class QPDF::Xref_table: public std::map<QPDFObjGen, QPDFXRefEntry> | ||
| 19 | void show(); | 19 | void show(); |
| 20 | bool resolve(); | 20 | bool resolve(); |
| 21 | 21 | ||
| 22 | + // Returns 0 if og is not in table. | ||
| 23 | + int | ||
| 24 | + type(QPDFObjGen og) const | ||
| 25 | + { | ||
| 26 | + auto it = find(og); | ||
| 27 | + return it == end() ? 0 : it->second.getType(); | ||
| 28 | + } | ||
| 29 | + | ||
| 30 | + // Returns 0 if og is not in table. | ||
| 31 | + qpdf_offset_t | ||
| 32 | + offset(QPDFObjGen og) const | ||
| 33 | + { | ||
| 34 | + auto it = find(og); | ||
| 35 | + return it == end() ? 0 : it->second.getOffset(); | ||
| 36 | + } | ||
| 37 | + | ||
| 38 | + // Returns 0 if og is not in table. | ||
| 39 | + int | ||
| 40 | + stream_number(int id) const | ||
| 41 | + { | ||
| 42 | + auto it = find(QPDFObjGen(id, 0)); | ||
| 43 | + return it == end() ? 0 : it->second.getObjStreamNumber(); | ||
| 44 | + } | ||
| 45 | + | ||
| 46 | + int | ||
| 47 | + stream_index(int id) const | ||
| 48 | + { | ||
| 49 | + auto it = find(QPDFObjGen(id, 0)); | ||
| 50 | + return it == end() ? 0 : it->second.getObjStreamIndex(); | ||
| 51 | + } | ||
| 52 | + | ||
| 22 | QPDFObjectHandle trailer; | 53 | QPDFObjectHandle trailer; |
| 23 | bool reconstructed{false}; | 54 | bool reconstructed{false}; |
| 24 | // Various tables are indexed by object id, with potential size id + 1 | 55 | // Various tables are indexed by object id, with potential size id + 1 |