Commit 5fc257f0f75bc4554c827dc3d273c11e6a2611fa

Authored by m-holger
1 parent d64b1491

Add QPDF::Xref_table methods type, offset, stream_number and stream_index

libqpdf/QPDF.cc
... ... @@ -1769,20 +1769,17 @@ QPDF::readObjectAtOffset(
1769 1769 if (try_recovery) {
1770 1770 // Try again after reconstructing xref table
1771 1771 m->xref_table.reconstruct(e);
1772   - if (m->xref_table.count(exp_og) && (m->xref_table[exp_og].getType() == 1)) {
1773   - qpdf_offset_t new_offset = m->xref_table[exp_og].getOffset();
1774   - QPDFObjectHandle result =
1775   - readObjectAtOffset(false, new_offset, description, exp_og, og, false);
  1772 + if (m->xref_table.type(exp_og) == 1) {
1776 1773 QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset");
1777   - return result;
  1774 + return readObjectAtOffset(
  1775 + false, m->xref_table.offset(exp_og), description, exp_og, og, false);
1778 1776 } else {
1779 1777 QTC::TC("qpdf", "QPDF object gone after xref reconstruction");
1780 1778 warn(damagedPDF(
1781 1779 "",
1782 1780 0,
1783 1781 ("object " + exp_og.unparse(' ') +
1784   - " not found in file after regenerating cross reference "
1785   - "table")));
  1782 + " not found in file after regenerating cross reference table")));
1786 1783 return QPDFObjectHandle::newNull();
1787 1784 }
1788 1785 } else {
... ... @@ -1815,7 +1812,7 @@ QPDF::readObjectAtOffset(
1815 1812 }
1816 1813 }
1817 1814 qpdf_offset_t end_after_space = m->file->tell();
1818   - if (skip_cache_if_in_xref && m->xref_table.count(og)) {
  1815 + if (skip_cache_if_in_xref && m->xref_table.type(og)) {
1819 1816 // Ordinarily, an object gets read here when resolved through xref table or stream. In
1820 1817 // the special case of the xref stream and linearization hint tables, the offset comes
1821 1818 // from another source. For the specific case of xref streams, the xref stream is read
... ... @@ -1867,33 +1864,32 @@ QPDF::resolve(QPDFObjGen og)
1867 1864 }
1868 1865 ResolveRecorder rr(this, og);
1869 1866  
1870   - if (m->xref_table.count(og) != 0) {
1871   - QPDFXRefEntry const& entry = m->xref_table[og];
1872   - try {
1873   - switch (entry.getType()) {
1874   - case 1:
1875   - {
1876   - qpdf_offset_t offset = entry.getOffset();
1877   - // Object stored in cache by readObjectAtOffset
1878   - QPDFObjGen a_og;
1879   - QPDFObjectHandle oh = readObjectAtOffset(true, offset, "", og, a_og, false);
1880   - }
1881   - break;
  1867 + try {
  1868 + switch (m->xref_table.type(og)) {
  1869 + case 0:
  1870 + break;
  1871 + case 1:
  1872 + {
  1873 + // Object stored in cache by readObjectAtOffset
  1874 + QPDFObjGen a_og;
  1875 + QPDFObjectHandle oh =
  1876 + readObjectAtOffset(true, m->xref_table.offset(og), "", og, a_og, false);
  1877 + }
  1878 + break;
1882 1879  
1883   - case 2:
1884   - resolveObjectsInStream(entry.getObjStreamNumber());
1885   - break;
  1880 + case 2:
  1881 + resolveObjectsInStream(m->xref_table.stream_number(og.getObj()));
  1882 + break;
1886 1883  
1887   - default:
1888   - throw damagedPDF(
1889   - "", 0, ("object " + og.unparse('/') + " has unexpected xref entry type"));
1890   - }
1891   - } catch (QPDFExc& e) {
1892   - warn(e);
1893   - } catch (std::exception& e) {
1894   - warn(damagedPDF(
1895   - "", 0, ("object " + og.unparse('/') + ": error reading object: " + e.what())));
  1884 + default:
  1885 + throw damagedPDF(
  1886 + "", 0, ("object " + og.unparse('/') + " has unexpected xref entry type"));
1896 1887 }
  1888 + } catch (QPDFExc& e) {
  1889 + warn(e);
  1890 + } catch (std::exception& e) {
  1891 + warn(damagedPDF(
  1892 + "", 0, ("object " + og.unparse('/') + ": error reading object: " + e.what())));
1897 1893 }
1898 1894  
1899 1895 if (isUnresolved(og)) {
... ... @@ -2107,7 +2103,7 @@ QPDF::getObjectForParser(int id, int gen, bool parse_pdf)
2107 2103 if (auto iter = m->obj_cache.find(og); iter != m->obj_cache.end()) {
2108 2104 return iter->second.object;
2109 2105 }
2110   - if (m->xref_table.count(og) || !m->xref_table.parsed) {
  2106 + if (m->xref_table.type(og) || !m->xref_table.parsed) {
2111 2107 return m->obj_cache.insert({og, QPDF_Unresolved::create(this, og)}).first->second.object;
2112 2108 }
2113 2109 if (parse_pdf) {
... ... @@ -2123,9 +2119,8 @@ QPDF::getObjectForJSON(int id, int gen)
2123 2119 auto [it, inserted] = m->obj_cache.try_emplace(og);
2124 2120 auto& obj = it->second.object;
2125 2121 if (inserted) {
2126   - obj = (m->xref_table.parsed && !m->xref_table.count(og))
2127   - ? QPDF_Null::create(this, og)
2128   - : QPDF_Unresolved::create(this, og);
  2122 + obj = (m->xref_table.parsed && !m->xref_table.type(og)) ? QPDF_Null::create(this, og)
  2123 + : QPDF_Unresolved::create(this, og);
2129 2124 }
2130 2125 return obj;
2131 2126 }
... ... @@ -2135,7 +2130,7 @@ QPDF::getObject(QPDFObjGen const& og)
2135 2130 {
2136 2131 if (auto it = m->obj_cache.find(og); it != m->obj_cache.end()) {
2137 2132 return {it->second.object};
2138   - } else if (m->xref_table.parsed && !m->xref_table.count(og)) {
  2133 + } else if (m->xref_table.parsed && !m->xref_table.type(og)) {
2139 2134 return QPDF_Null::create();
2140 2135 } else {
2141 2136 auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og), -1, -1);
... ...
libqpdf/QPDF_linearization.cc
... ... @@ -445,7 +445,7 @@ QPDF::checkLinearizationInternal()
445 445 for (size_t i = 0; i < toS(npages); ++i) {
446 446 QPDFObjectHandle const& page = pages.at(i);
447 447 QPDFObjGen og(page.getObjGen());
448   - if (m->xref_table[og].getType() == 2) {
  448 + if (m->xref_table.type(og) == 2) {
449 449 linearizationWarning(
450 450 "page dictionary for page " + std::to_string(i) + " is compressed");
451 451 }
... ... @@ -556,23 +556,18 @@ QPDF::maxEnd(ObjUser const&amp; ou)
556 556 qpdf_offset_t
557 557 QPDF::getLinearizationOffset(QPDFObjGen const& og)
558 558 {
559   - QPDFXRefEntry entry = m->xref_table[og];
560   - qpdf_offset_t result = 0;
561   - switch (entry.getType()) {
  559 + switch (m->xref_table.type(og)) {
562 560 case 1:
563   - result = entry.getOffset();
564   - break;
  561 + return m->xref_table.offset(og);
565 562  
566 563 case 2:
567 564 // For compressed objects, return the offset of the object stream that contains them.
568   - result = getLinearizationOffset(QPDFObjGen(entry.getObjStreamNumber(), 0));
569   - break;
  565 + return getLinearizationOffset(QPDFObjGen(m->xref_table.stream_number(og.getObj()), 0));
570 566  
571 567 default:
572 568 stopOnError("getLinearizationOffset called for xref entry not of type 1 or 2");
573   - break;
  569 + return 0; // unreachable
574 570 }
575   - return result;
576 571 }
577 572  
578 573 QPDFObjectHandle
... ... @@ -603,7 +598,7 @@ QPDF::lengthNextN(int first_object, int n)
603 598 int length = 0;
604 599 for (int i = 0; i < n; ++i) {
605 600 QPDFObjGen og(first_object + i, 0);
606   - if (m->xref_table.count(og) == 0) {
  601 + if (m->xref_table.type(og) == 0) {
607 602 linearizationWarning(
608 603 "no xref table entry for " + std::to_string(first_object + i) + " 0");
609 604 } else {
... ... @@ -635,7 +630,7 @@ QPDF::checkHPageOffset(
635 630 int npages = toI(pages.size());
636 631 qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset);
637 632 QPDFObjGen first_page_og(pages.at(0).getObjGen());
638   - if (m->xref_table.count(first_page_og) == 0) {
  633 + if (m->xref_table.type(first_page_og) == 0) {
639 634 stopOnError("supposed first page object is not known");
640 635 }
641 636 qpdf_offset_t offset = getLinearizationOffset(first_page_og);
... ... @@ -646,7 +641,7 @@ QPDF::checkHPageOffset(
646 641 for (int pageno = 0; pageno < npages; ++pageno) {
647 642 QPDFObjGen page_og(pages.at(toS(pageno)).getObjGen());
648 643 int first_object = page_og.getObj();
649   - if (m->xref_table.count(page_og) == 0) {
  644 + if (m->xref_table.type(page_og) == 0) {
650 645 stopOnError("unknown object in page offset hint table");
651 646 }
652 647 offset = getLinearizationOffset(page_og);
... ... @@ -768,7 +763,7 @@ QPDF::checkHSharedObject(std::vector&lt;QPDFObjectHandle&gt; const&amp; pages, std::map&lt;in
768 763 cur_object = so.first_shared_obj;
769 764  
770 765 QPDFObjGen og(cur_object, 0);
771   - if (m->xref_table.count(og) == 0) {
  766 + if (m->xref_table.type(og) == 0) {
772 767 stopOnError("unknown object in shared object hint table");
773 768 }
774 769 qpdf_offset_t offset = getLinearizationOffset(og);
... ... @@ -819,7 +814,7 @@ QPDF::checkHOutlines()
819 814 return;
820 815 }
821 816 QPDFObjGen og(outlines.getObjGen());
822   - if (m->xref_table.count(og) == 0) {
  817 + if (m->xref_table.type(og) == 0) {
823 818 stopOnError("unknown object in outlines hint table");
824 819 }
825 820 qpdf_offset_t offset = getLinearizationOffset(og);
... ... @@ -838,8 +833,7 @@ QPDF::checkHOutlines()
838 833 std::to_string(table_length) + "; computed = " + std::to_string(length));
839 834 }
840 835 } else {
841   - linearizationWarning("incorrect first object number in outline "
842   - "hints table.");
  836 + linearizationWarning("incorrect first object number in outline hints table.");
843 837 }
844 838 } else {
845 839 linearizationWarning("incorrect object count in outline hint table");
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -19,6 +19,37 @@ class QPDF::Xref_table: public std::map&lt;QPDFObjGen, QPDFXRefEntry&gt;
19 19 void show();
20 20 bool resolve();
21 21  
  22 + // Returns 0 if og is not in table.
  23 + int
  24 + type(QPDFObjGen og) const
  25 + {
  26 + auto it = find(og);
  27 + return it == end() ? 0 : it->second.getType();
  28 + }
  29 +
  30 + // Returns 0 if og is not in table.
  31 + qpdf_offset_t
  32 + offset(QPDFObjGen og) const
  33 + {
  34 + auto it = find(og);
  35 + return it == end() ? 0 : it->second.getOffset();
  36 + }
  37 +
  38 + // Returns 0 if og is not in table.
  39 + int
  40 + stream_number(int id) const
  41 + {
  42 + auto it = find(QPDFObjGen(id, 0));
  43 + return it == end() ? 0 : it->second.getObjStreamNumber();
  44 + }
  45 +
  46 + int
  47 + stream_index(int id) const
  48 + {
  49 + auto it = find(QPDFObjGen(id, 0));
  50 + return it == end() ? 0 : it->second.getObjStreamIndex();
  51 + }
  52 +
22 53 QPDFObjectHandle trailer;
23 54 bool reconstructed{false};
24 55 // Various tables are indexed by object id, with potential size id + 1
... ...