Commit 5fc257f0f75bc4554c827dc3d273c11e6a2611fa

Authored by m-holger
1 parent d64b1491

Add QPDF::Xref_table methods type, offset, stream_number and stream_index

libqpdf/QPDF.cc
@@ -1769,20 +1769,17 @@ QPDF::readObjectAtOffset( @@ -1769,20 +1769,17 @@ QPDF::readObjectAtOffset(
1769 if (try_recovery) { 1769 if (try_recovery) {
1770 // Try again after reconstructing xref table 1770 // Try again after reconstructing xref table
1771 m->xref_table.reconstruct(e); 1771 m->xref_table.reconstruct(e);
1772 - if (m->xref_table.count(exp_og) && (m->xref_table[exp_og].getType() == 1)) {  
1773 - qpdf_offset_t new_offset = m->xref_table[exp_og].getOffset();  
1774 - QPDFObjectHandle result =  
1775 - readObjectAtOffset(false, new_offset, description, exp_og, og, false); 1772 + if (m->xref_table.type(exp_og) == 1) {
1776 QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset"); 1773 QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset");
1777 - return result; 1774 + return readObjectAtOffset(
  1775 + false, m->xref_table.offset(exp_og), description, exp_og, og, false);
1778 } else { 1776 } else {
1779 QTC::TC("qpdf", "QPDF object gone after xref reconstruction"); 1777 QTC::TC("qpdf", "QPDF object gone after xref reconstruction");
1780 warn(damagedPDF( 1778 warn(damagedPDF(
1781 "", 1779 "",
1782 0, 1780 0,
1783 ("object " + exp_og.unparse(' ') + 1781 ("object " + exp_og.unparse(' ') +
1784 - " not found in file after regenerating cross reference "  
1785 - "table"))); 1782 + " not found in file after regenerating cross reference table")));
1786 return QPDFObjectHandle::newNull(); 1783 return QPDFObjectHandle::newNull();
1787 } 1784 }
1788 } else { 1785 } else {
@@ -1815,7 +1812,7 @@ QPDF::readObjectAtOffset( @@ -1815,7 +1812,7 @@ QPDF::readObjectAtOffset(
1815 } 1812 }
1816 } 1813 }
1817 qpdf_offset_t end_after_space = m->file->tell(); 1814 qpdf_offset_t end_after_space = m->file->tell();
1818 - if (skip_cache_if_in_xref && m->xref_table.count(og)) { 1815 + if (skip_cache_if_in_xref && m->xref_table.type(og)) {
1819 // Ordinarily, an object gets read here when resolved through xref table or stream. In 1816 // Ordinarily, an object gets read here when resolved through xref table or stream. In
1820 // the special case of the xref stream and linearization hint tables, the offset comes 1817 // the special case of the xref stream and linearization hint tables, the offset comes
1821 // from another source. For the specific case of xref streams, the xref stream is read 1818 // from another source. For the specific case of xref streams, the xref stream is read
@@ -1867,33 +1864,32 @@ QPDF::resolve(QPDFObjGen og) @@ -1867,33 +1864,32 @@ QPDF::resolve(QPDFObjGen og)
1867 } 1864 }
1868 ResolveRecorder rr(this, og); 1865 ResolveRecorder rr(this, og);
1869 1866
1870 - if (m->xref_table.count(og) != 0) {  
1871 - QPDFXRefEntry const& entry = m->xref_table[og];  
1872 - try {  
1873 - switch (entry.getType()) {  
1874 - case 1:  
1875 - {  
1876 - qpdf_offset_t offset = entry.getOffset();  
1877 - // Object stored in cache by readObjectAtOffset  
1878 - QPDFObjGen a_og;  
1879 - QPDFObjectHandle oh = readObjectAtOffset(true, offset, "", og, a_og, false);  
1880 - }  
1881 - break; 1867 + try {
  1868 + switch (m->xref_table.type(og)) {
  1869 + case 0:
  1870 + break;
  1871 + case 1:
  1872 + {
  1873 + // Object stored in cache by readObjectAtOffset
  1874 + QPDFObjGen a_og;
  1875 + QPDFObjectHandle oh =
  1876 + readObjectAtOffset(true, m->xref_table.offset(og), "", og, a_og, false);
  1877 + }
  1878 + break;
1882 1879
1883 - case 2:  
1884 - resolveObjectsInStream(entry.getObjStreamNumber());  
1885 - break; 1880 + case 2:
  1881 + resolveObjectsInStream(m->xref_table.stream_number(og.getObj()));
  1882 + break;
1886 1883
1887 - default:  
1888 - throw damagedPDF(  
1889 - "", 0, ("object " + og.unparse('/') + " has unexpected xref entry type"));  
1890 - }  
1891 - } catch (QPDFExc& e) {  
1892 - warn(e);  
1893 - } catch (std::exception& e) {  
1894 - warn(damagedPDF(  
1895 - "", 0, ("object " + og.unparse('/') + ": error reading object: " + e.what()))); 1884 + default:
  1885 + throw damagedPDF(
  1886 + "", 0, ("object " + og.unparse('/') + " has unexpected xref entry type"));
1896 } 1887 }
  1888 + } catch (QPDFExc& e) {
  1889 + warn(e);
  1890 + } catch (std::exception& e) {
  1891 + warn(damagedPDF(
  1892 + "", 0, ("object " + og.unparse('/') + ": error reading object: " + e.what())));
1897 } 1893 }
1898 1894
1899 if (isUnresolved(og)) { 1895 if (isUnresolved(og)) {
@@ -2107,7 +2103,7 @@ QPDF::getObjectForParser(int id, int gen, bool parse_pdf) @@ -2107,7 +2103,7 @@ QPDF::getObjectForParser(int id, int gen, bool parse_pdf)
2107 if (auto iter = m->obj_cache.find(og); iter != m->obj_cache.end()) { 2103 if (auto iter = m->obj_cache.find(og); iter != m->obj_cache.end()) {
2108 return iter->second.object; 2104 return iter->second.object;
2109 } 2105 }
2110 - if (m->xref_table.count(og) || !m->xref_table.parsed) { 2106 + if (m->xref_table.type(og) || !m->xref_table.parsed) {
2111 return m->obj_cache.insert({og, QPDF_Unresolved::create(this, og)}).first->second.object; 2107 return m->obj_cache.insert({og, QPDF_Unresolved::create(this, og)}).first->second.object;
2112 } 2108 }
2113 if (parse_pdf) { 2109 if (parse_pdf) {
@@ -2123,9 +2119,8 @@ QPDF::getObjectForJSON(int id, int gen) @@ -2123,9 +2119,8 @@ QPDF::getObjectForJSON(int id, int gen)
2123 auto [it, inserted] = m->obj_cache.try_emplace(og); 2119 auto [it, inserted] = m->obj_cache.try_emplace(og);
2124 auto& obj = it->second.object; 2120 auto& obj = it->second.object;
2125 if (inserted) { 2121 if (inserted) {
2126 - obj = (m->xref_table.parsed && !m->xref_table.count(og))  
2127 - ? QPDF_Null::create(this, og)  
2128 - : QPDF_Unresolved::create(this, og); 2122 + obj = (m->xref_table.parsed && !m->xref_table.type(og)) ? QPDF_Null::create(this, og)
  2123 + : QPDF_Unresolved::create(this, og);
2129 } 2124 }
2130 return obj; 2125 return obj;
2131 } 2126 }
@@ -2135,7 +2130,7 @@ QPDF::getObject(QPDFObjGen const& og) @@ -2135,7 +2130,7 @@ QPDF::getObject(QPDFObjGen const& og)
2135 { 2130 {
2136 if (auto it = m->obj_cache.find(og); it != m->obj_cache.end()) { 2131 if (auto it = m->obj_cache.find(og); it != m->obj_cache.end()) {
2137 return {it->second.object}; 2132 return {it->second.object};
2138 - } else if (m->xref_table.parsed && !m->xref_table.count(og)) { 2133 + } else if (m->xref_table.parsed && !m->xref_table.type(og)) {
2139 return QPDF_Null::create(); 2134 return QPDF_Null::create();
2140 } else { 2135 } else {
2141 auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og), -1, -1); 2136 auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og), -1, -1);
libqpdf/QPDF_linearization.cc
@@ -445,7 +445,7 @@ QPDF::checkLinearizationInternal() @@ -445,7 +445,7 @@ QPDF::checkLinearizationInternal()
445 for (size_t i = 0; i < toS(npages); ++i) { 445 for (size_t i = 0; i < toS(npages); ++i) {
446 QPDFObjectHandle const& page = pages.at(i); 446 QPDFObjectHandle const& page = pages.at(i);
447 QPDFObjGen og(page.getObjGen()); 447 QPDFObjGen og(page.getObjGen());
448 - if (m->xref_table[og].getType() == 2) { 448 + if (m->xref_table.type(og) == 2) {
449 linearizationWarning( 449 linearizationWarning(
450 "page dictionary for page " + std::to_string(i) + " is compressed"); 450 "page dictionary for page " + std::to_string(i) + " is compressed");
451 } 451 }
@@ -556,23 +556,18 @@ QPDF::maxEnd(ObjUser const&amp; ou) @@ -556,23 +556,18 @@ QPDF::maxEnd(ObjUser const&amp; ou)
556 qpdf_offset_t 556 qpdf_offset_t
557 QPDF::getLinearizationOffset(QPDFObjGen const& og) 557 QPDF::getLinearizationOffset(QPDFObjGen const& og)
558 { 558 {
559 - QPDFXRefEntry entry = m->xref_table[og];  
560 - qpdf_offset_t result = 0;  
561 - switch (entry.getType()) { 559 + switch (m->xref_table.type(og)) {
562 case 1: 560 case 1:
563 - result = entry.getOffset();  
564 - break; 561 + return m->xref_table.offset(og);
565 562
566 case 2: 563 case 2:
567 // For compressed objects, return the offset of the object stream that contains them. 564 // For compressed objects, return the offset of the object stream that contains them.
568 - result = getLinearizationOffset(QPDFObjGen(entry.getObjStreamNumber(), 0));  
569 - break; 565 + return getLinearizationOffset(QPDFObjGen(m->xref_table.stream_number(og.getObj()), 0));
570 566
571 default: 567 default:
572 stopOnError("getLinearizationOffset called for xref entry not of type 1 or 2"); 568 stopOnError("getLinearizationOffset called for xref entry not of type 1 or 2");
573 - break; 569 + return 0; // unreachable
574 } 570 }
575 - return result;  
576 } 571 }
577 572
578 QPDFObjectHandle 573 QPDFObjectHandle
@@ -603,7 +598,7 @@ QPDF::lengthNextN(int first_object, int n) @@ -603,7 +598,7 @@ QPDF::lengthNextN(int first_object, int n)
603 int length = 0; 598 int length = 0;
604 for (int i = 0; i < n; ++i) { 599 for (int i = 0; i < n; ++i) {
605 QPDFObjGen og(first_object + i, 0); 600 QPDFObjGen og(first_object + i, 0);
606 - if (m->xref_table.count(og) == 0) { 601 + if (m->xref_table.type(og) == 0) {
607 linearizationWarning( 602 linearizationWarning(
608 "no xref table entry for " + std::to_string(first_object + i) + " 0"); 603 "no xref table entry for " + std::to_string(first_object + i) + " 0");
609 } else { 604 } else {
@@ -635,7 +630,7 @@ QPDF::checkHPageOffset( @@ -635,7 +630,7 @@ QPDF::checkHPageOffset(
635 int npages = toI(pages.size()); 630 int npages = toI(pages.size());
636 qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset); 631 qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset);
637 QPDFObjGen first_page_og(pages.at(0).getObjGen()); 632 QPDFObjGen first_page_og(pages.at(0).getObjGen());
638 - if (m->xref_table.count(first_page_og) == 0) { 633 + if (m->xref_table.type(first_page_og) == 0) {
639 stopOnError("supposed first page object is not known"); 634 stopOnError("supposed first page object is not known");
640 } 635 }
641 qpdf_offset_t offset = getLinearizationOffset(first_page_og); 636 qpdf_offset_t offset = getLinearizationOffset(first_page_og);
@@ -646,7 +641,7 @@ QPDF::checkHPageOffset( @@ -646,7 +641,7 @@ QPDF::checkHPageOffset(
646 for (int pageno = 0; pageno < npages; ++pageno) { 641 for (int pageno = 0; pageno < npages; ++pageno) {
647 QPDFObjGen page_og(pages.at(toS(pageno)).getObjGen()); 642 QPDFObjGen page_og(pages.at(toS(pageno)).getObjGen());
648 int first_object = page_og.getObj(); 643 int first_object = page_og.getObj();
649 - if (m->xref_table.count(page_og) == 0) { 644 + if (m->xref_table.type(page_og) == 0) {
650 stopOnError("unknown object in page offset hint table"); 645 stopOnError("unknown object in page offset hint table");
651 } 646 }
652 offset = getLinearizationOffset(page_og); 647 offset = getLinearizationOffset(page_og);
@@ -768,7 +763,7 @@ QPDF::checkHSharedObject(std::vector&lt;QPDFObjectHandle&gt; const&amp; pages, std::map&lt;in @@ -768,7 +763,7 @@ QPDF::checkHSharedObject(std::vector&lt;QPDFObjectHandle&gt; const&amp; pages, std::map&lt;in
768 cur_object = so.first_shared_obj; 763 cur_object = so.first_shared_obj;
769 764
770 QPDFObjGen og(cur_object, 0); 765 QPDFObjGen og(cur_object, 0);
771 - if (m->xref_table.count(og) == 0) { 766 + if (m->xref_table.type(og) == 0) {
772 stopOnError("unknown object in shared object hint table"); 767 stopOnError("unknown object in shared object hint table");
773 } 768 }
774 qpdf_offset_t offset = getLinearizationOffset(og); 769 qpdf_offset_t offset = getLinearizationOffset(og);
@@ -819,7 +814,7 @@ QPDF::checkHOutlines() @@ -819,7 +814,7 @@ QPDF::checkHOutlines()
819 return; 814 return;
820 } 815 }
821 QPDFObjGen og(outlines.getObjGen()); 816 QPDFObjGen og(outlines.getObjGen());
822 - if (m->xref_table.count(og) == 0) { 817 + if (m->xref_table.type(og) == 0) {
823 stopOnError("unknown object in outlines hint table"); 818 stopOnError("unknown object in outlines hint table");
824 } 819 }
825 qpdf_offset_t offset = getLinearizationOffset(og); 820 qpdf_offset_t offset = getLinearizationOffset(og);
@@ -838,8 +833,7 @@ QPDF::checkHOutlines() @@ -838,8 +833,7 @@ QPDF::checkHOutlines()
838 std::to_string(table_length) + "; computed = " + std::to_string(length)); 833 std::to_string(table_length) + "; computed = " + std::to_string(length));
839 } 834 }
840 } else { 835 } else {
841 - linearizationWarning("incorrect first object number in outline "  
842 - "hints table."); 836 + linearizationWarning("incorrect first object number in outline hints table.");
843 } 837 }
844 } else { 838 } else {
845 linearizationWarning("incorrect object count in outline hint table"); 839 linearizationWarning("incorrect object count in outline hint table");
libqpdf/qpdf/QPDF_private.hh
@@ -19,6 +19,37 @@ class QPDF::Xref_table: public std::map&lt;QPDFObjGen, QPDFXRefEntry&gt; @@ -19,6 +19,37 @@ class QPDF::Xref_table: public std::map&lt;QPDFObjGen, QPDFXRefEntry&gt;
19 void show(); 19 void show();
20 bool resolve(); 20 bool resolve();
21 21
  22 + // Returns 0 if og is not in table.
  23 + int
  24 + type(QPDFObjGen og) const
  25 + {
  26 + auto it = find(og);
  27 + return it == end() ? 0 : it->second.getType();
  28 + }
  29 +
  30 + // Returns 0 if og is not in table.
  31 + qpdf_offset_t
  32 + offset(QPDFObjGen og) const
  33 + {
  34 + auto it = find(og);
  35 + return it == end() ? 0 : it->second.getOffset();
  36 + }
  37 +
  38 + // Returns 0 if og is not in table.
  39 + int
  40 + stream_number(int id) const
  41 + {
  42 + auto it = find(QPDFObjGen(id, 0));
  43 + return it == end() ? 0 : it->second.getObjStreamNumber();
  44 + }
  45 +
  46 + int
  47 + stream_index(int id) const
  48 + {
  49 + auto it = find(QPDFObjGen(id, 0));
  50 + return it == end() ? 0 : it->second.getObjStreamIndex();
  51 + }
  52 +
22 QPDFObjectHandle trailer; 53 QPDFObjectHandle trailer;
23 bool reconstructed{false}; 54 bool reconstructed{false};
24 // Various tables are indexed by object id, with potential size id + 1 55 // Various tables are indexed by object id, with potential size id + 1