Commit 837dcf8fc2546a80f205a0c4c53e5a1545c53a84

Authored by Jay Berkenbilt
1 parent a0135918

Don't call assert while checking linearization data (fixes #209, #231)

Instead of calling assert for problems found during checking
linearization data, throw an exception which is later caught and
issued as an error. Ideally we would handle errors more robustly, but
this is still a significant improvement.
ChangeLog
1 2019-01-04 Jay Berkenbilt <ejb@ql.org> 1 2019-01-04 Jay Berkenbilt <ejb@ql.org>
2 2
  3 + * When unexpected errors are found while checking linearization
  4 + data, print an error message instead of calling assert, which
  5 + cause the program to crash. Fixes #209, #231.
  6 +
3 * Detect and recover from dangling references. If a PDF file 7 * Detect and recover from dangling references. If a PDF file
4 contained an indirect reference to a non-existent object (which is 8 contained an indirect reference to a non-existent object (which is
5 valid), when adding a new object to the file, it was possible for 9 valid), when adding a new object to the file, it was possible for
@@ -391,4 +391,9 @@ I find it useful to make reference to them in this list @@ -391,4 +391,9 @@ I find it useful to make reference to them in this list
391 hint stream contents. Consider adding on option to provide a 391 hint stream contents. Consider adding on option to provide a
392 human-readable dump of linearization hint tables. This should 392 human-readable dump of linearization hint tables. This should
393 include improving the 'overflow reading bit stream' message as 393 include improving the 'overflow reading bit stream' message as
394 - reported in issue #2. 394 + reported in issue #2. There are multiple calls to stopOnError in
  395 + the linearization checking code. Ideally, these should not
  396 + terminate checking. It would require re-acquiring an understanding
  397 + of all that code to make the checks more robust. In particular,
  398 + it's hard to look at the code and quickly determine what is a true
  399 + logic error and what could happen because of malformed user input.
include/qpdf/QPDF.hh
@@ -726,6 +726,7 @@ class QPDF @@ -726,6 +726,7 @@ class QPDF
726 PointerHolder<QPDFObject> resolve(int objid, int generation); 726 PointerHolder<QPDFObject> resolve(int objid, int generation);
727 void resolveObjectsInStream(int obj_stream_number); 727 void resolveObjectsInStream(int obj_stream_number);
728 void findAttachmentStreams(); 728 void findAttachmentStreams();
  729 + void stopOnError(std::string const& message);
729 730
730 // Calls finish() on the pipeline when done but does not delete it 731 // Calls finish() on the pipeline when done but does not delete it
731 bool pipeStreamData(int objid, int generation, 732 bool pipeStreamData(int objid, int generation,
libqpdf/QPDF.cc
@@ -2609,3 +2609,13 @@ QPDF::findAttachmentStreams() @@ -2609,3 +2609,13 @@ QPDF::findAttachmentStreams()
2609 } 2609 }
2610 } 2610 }
2611 } 2611 }
  2612 +
  2613 +void
  2614 +QPDF::stopOnError(std::string const& message)
  2615 +{
  2616 + // Throw a generic exception when we lack context for something
  2617 + // more specific. New code should not use this. This method exists
  2618 + // to improve somewhat from calling assert in very old code.
  2619 + throw QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),
  2620 + "", this->m->file->getLastOffset(), message);
  2621 +}
libqpdf/QPDF_linearization.cc
@@ -675,14 +675,20 @@ QPDF::checkLinearizationInternal() @@ -675,14 +675,20 @@ QPDF::checkLinearizationInternal()
675 qpdf_offset_t 675 qpdf_offset_t
676 QPDF::maxEnd(ObjUser const& ou) 676 QPDF::maxEnd(ObjUser const& ou)
677 { 677 {
678 - assert(this->m->obj_user_to_objects.count(ou) > 0); 678 + if (this->m->obj_user_to_objects.count(ou) == 0)
  679 + {
  680 + stopOnError("no entry in object user table for requested object user");
  681 + }
679 std::set<QPDFObjGen> const& ogs = this->m->obj_user_to_objects[ou]; 682 std::set<QPDFObjGen> const& ogs = this->m->obj_user_to_objects[ou];
680 qpdf_offset_t end = 0; 683 qpdf_offset_t end = 0;
681 for (std::set<QPDFObjGen>::const_iterator iter = ogs.begin(); 684 for (std::set<QPDFObjGen>::const_iterator iter = ogs.begin();
682 iter != ogs.end(); ++iter) 685 iter != ogs.end(); ++iter)
683 { 686 {
684 QPDFObjGen const& og = *iter; 687 QPDFObjGen const& og = *iter;
685 - assert(this->m->obj_cache.count(og) > 0); 688 + if (this->m->obj_cache.count(og) == 0)
  689 + {
  690 + stopOnError("unknown object referenced in object user table");
  691 + }
686 end = std::max(end, this->m->obj_cache[og].end_after_space); 692 end = std::max(end, this->m->obj_cache[og].end_after_space);
687 } 693 }
688 return end; 694 return end;
@@ -745,7 +751,11 @@ QPDF::lengthNextN(int first_object, int n, @@ -745,7 +751,11 @@ QPDF::lengthNextN(int first_object, int n,
745 } 751 }
746 else 752 else
747 { 753 {
748 - assert(this->m->obj_cache.count(og) > 0); 754 + if (this->m->obj_cache.count(og) == 0)
  755 + {
  756 + stopOnError("found unknown object while"
  757 + " calculating length for linearization data");
  758 + }
749 length += this->m->obj_cache[og].end_after_space - 759 length += this->m->obj_cache[og].end_after_space -
750 getLinearizationOffset(og); 760 getLinearizationOffset(og);
751 } 761 }
@@ -780,7 +790,10 @@ QPDF::checkHPageOffset(std::list&lt;std::string&gt;&amp; errors, @@ -780,7 +790,10 @@ QPDF::checkHPageOffset(std::list&lt;std::string&gt;&amp; errors,
780 int table_offset = adjusted_offset( 790 int table_offset = adjusted_offset(
781 this->m->page_offset_hints.first_page_offset); 791 this->m->page_offset_hints.first_page_offset);
782 QPDFObjGen first_page_og(pages.at(0).getObjGen()); 792 QPDFObjGen first_page_og(pages.at(0).getObjGen());
783 - assert(this->m->xref_table.count(first_page_og) > 0); 793 + if (this->m->xref_table.count(first_page_og) == 0)
  794 + {
  795 + stopOnError("supposed first page object is not known");
  796 + }
784 int offset = getLinearizationOffset(first_page_og); 797 int offset = getLinearizationOffset(first_page_og);
785 if (table_offset != offset) 798 if (table_offset != offset)
786 { 799 {
@@ -791,7 +804,10 @@ QPDF::checkHPageOffset(std::list&lt;std::string&gt;&amp; errors, @@ -791,7 +804,10 @@ QPDF::checkHPageOffset(std::list&lt;std::string&gt;&amp; errors,
791 { 804 {
792 QPDFObjGen page_og(pages.at(pageno).getObjGen()); 805 QPDFObjGen page_og(pages.at(pageno).getObjGen());
793 int first_object = page_og.getObj(); 806 int first_object = page_og.getObj();
794 - assert(this->m->xref_table.count(page_og) > 0); 807 + if (this->m->xref_table.count(page_og) == 0)
  808 + {
  809 + stopOnError("unknown object in page offset hint table");
  810 + }
795 offset = getLinearizationOffset(page_og); 811 offset = getLinearizationOffset(page_og);
796 812
797 HPageOffsetEntry& he = this->m->page_offset_hints.entries.at(pageno); 813 HPageOffsetEntry& he = this->m->page_offset_hints.entries.at(pageno);
@@ -955,7 +971,10 @@ QPDF::checkHSharedObject(std::list&lt;std::string&gt;&amp; errors, @@ -955,7 +971,10 @@ QPDF::checkHSharedObject(std::list&lt;std::string&gt;&amp; errors,
955 cur_object = so.first_shared_obj; 971 cur_object = so.first_shared_obj;
956 972
957 QPDFObjGen og(cur_object, 0); 973 QPDFObjGen og(cur_object, 0);
958 - assert(this->m->xref_table.count(og) > 0); 974 + if (this->m->xref_table.count(og) == 0)
  975 + {
  976 + stopOnError("unknown object in shared object hint table");
  977 + }
959 int offset = getLinearizationOffset(og); 978 int offset = getLinearizationOffset(og);
960 int h_offset = adjusted_offset(so.first_shared_offset); 979 int h_offset = adjusted_offset(so.first_shared_offset);
961 if (offset != h_offset) 980 if (offset != h_offset)
@@ -1018,7 +1037,10 @@ QPDF::checkHOutlines(std::list&lt;std::string&gt;&amp; warnings) @@ -1018,7 +1037,10 @@ QPDF::checkHOutlines(std::list&lt;std::string&gt;&amp; warnings)
1018 return; 1037 return;
1019 } 1038 }
1020 QPDFObjGen og(outlines.getObjGen()); 1039 QPDFObjGen og(outlines.getObjGen());
1021 - assert(this->m->xref_table.count(og) > 0); 1040 + if (this->m->xref_table.count(og) == 0)
  1041 + {
  1042 + stopOnError("unknown object in outlines hint table");
  1043 + }
1022 int offset = getLinearizationOffset(og); 1044 int offset = getLinearizationOffset(og);
1023 ObjUser ou(ObjUser::ou_root_key, "/Outlines"); 1045 ObjUser ou(ObjUser::ou_root_key, "/Outlines");
1024 int length = maxEnd(ou) - offset; 1046 int length = maxEnd(ou) - offset;
@@ -1513,7 +1535,11 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1513,7 +1535,11 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1513 1535
1514 // Part 4: open document objects. We don't care about the order. 1536 // Part 4: open document objects. We don't care about the order.
1515 1537
1516 - assert(lc_root.size() == 1); 1538 + if (lc_root.size() != 1)
  1539 + {
  1540 + stopOnError("found other than one root while"
  1541 + " calculating linearization data");
  1542 + }
1517 this->m->part4.push_back(objGenToIndirect(*(lc_root.begin()))); 1543 this->m->part4.push_back(objGenToIndirect(*(lc_root.begin())));
1518 for (std::set<QPDFObjGen>::iterator iter = lc_open_document.begin(); 1544 for (std::set<QPDFObjGen>::iterator iter = lc_open_document.begin();
1519 iter != lc_open_document.end(); ++iter) 1545 iter != lc_open_document.end(); ++iter)
@@ -1594,7 +1620,11 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1594,7 +1620,11 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1594 this->m->c_page_offset_data.entries.at(i).nobjects = 1; 1620 this->m->c_page_offset_data.entries.at(i).nobjects = 1;
1595 1621
1596 ObjUser ou(ObjUser::ou_page, i); 1622 ObjUser ou(ObjUser::ou_page, i);
1597 - assert(this->m->obj_user_to_objects.count(ou) > 0); 1623 + if (this->m->obj_user_to_objects.count(ou) == 0)
  1624 + {
  1625 + stopOnError("found unreferenced page while"
  1626 + " calculating linearization data");
  1627 + }
1598 std::set<QPDFObjGen> ogs = this->m->obj_user_to_objects[ou]; 1628 std::set<QPDFObjGen> ogs = this->m->obj_user_to_objects[ou];
1599 for (std::set<QPDFObjGen>::iterator iter = ogs.begin(); 1629 for (std::set<QPDFObjGen>::iterator iter = ogs.begin();
1600 iter != ogs.end(); ++iter) 1630 iter != ogs.end(); ++iter)
@@ -1638,7 +1668,11 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1638,7 +1668,11 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1638 // Place the pages tree. 1668 // Place the pages tree.
1639 std::set<QPDFObjGen> pages_ogs = 1669 std::set<QPDFObjGen> pages_ogs =
1640 this->m->obj_user_to_objects[ObjUser(ObjUser::ou_root_key, "/Pages")]; 1670 this->m->obj_user_to_objects[ObjUser(ObjUser::ou_root_key, "/Pages")];
1641 - assert(! pages_ogs.empty()); 1671 + if (pages_ogs.empty())
  1672 + {
  1673 + stopOnError("found empty pages tree while"
  1674 + " calculating linearization data");
  1675 + }
1642 for (std::set<QPDFObjGen>::iterator iter = pages_ogs.begin(); 1676 for (std::set<QPDFObjGen>::iterator iter = pages_ogs.begin();
1643 iter != pages_ogs.end(); ++iter) 1677 iter != pages_ogs.end(); ++iter)
1644 { 1678 {
@@ -1790,7 +1824,11 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1790,7 +1824,11 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1790 { 1824 {
1791 CHPageOffsetEntry& pe = this->m->c_page_offset_data.entries.at(i); 1825 CHPageOffsetEntry& pe = this->m->c_page_offset_data.entries.at(i);
1792 ObjUser ou(ObjUser::ou_page, i); 1826 ObjUser ou(ObjUser::ou_page, i);
1793 - assert(this->m->obj_user_to_objects.count(ou) > 0); 1827 + if (this->m->obj_user_to_objects.count(ou) == 0)
  1828 + {
  1829 + stopOnError("found unreferenced page while"
  1830 + " calculating linearization data");
  1831 + }
1794 std::set<QPDFObjGen> const& ogs = this->m->obj_user_to_objects[ou]; 1832 std::set<QPDFObjGen> const& ogs = this->m->obj_user_to_objects[ou];
1795 for (std::set<QPDFObjGen>::const_iterator iter = ogs.begin(); 1833 for (std::set<QPDFObjGen>::const_iterator iter = ogs.begin();
1796 iter != ogs.end(); ++iter) 1834 iter != ogs.end(); ++iter)
@@ -1869,12 +1907,20 @@ QPDF::outputLengthNextN( @@ -1869,12 +1907,20 @@ QPDF::outputLengthNextN(
1869 // the output file starting with whatever object in_object from 1907 // the output file starting with whatever object in_object from
1870 // the input file mapped to. 1908 // the input file mapped to.
1871 1909
1872 - assert(obj_renumber.count(in_object) > 0); 1910 + if (obj_renumber.count(in_object) == 0)
  1911 + {
  1912 + stopOnError("found object that is not renumbered while"
  1913 + " writing linearization data");
  1914 + }
1873 int first = (*(obj_renumber.find(in_object))).second; 1915 int first = (*(obj_renumber.find(in_object))).second;
1874 int length = 0; 1916 int length = 0;
1875 for (int i = 0; i < n; ++i) 1917 for (int i = 0; i < n; ++i)
1876 { 1918 {
1877 - assert(lengths.count(first + i) > 0); 1919 + if (lengths.count(first + i) == 0)
  1920 + {
  1921 + stopOnError("found item with unknown length"
  1922 + " while writing linearization data");
  1923 + }
1878 length += (*(lengths.find(first + i))).second; 1924 length += (*(lengths.find(first + i))).second;
1879 } 1925 }
1880 return length; 1926 return length;
@@ -1958,8 +2004,12 @@ QPDF::calculateHPageOffset( @@ -1958,8 +2004,12 @@ QPDF::calculateHPageOffset(
1958 for (unsigned int i = 0; i < npages; ++i) 2004 for (unsigned int i = 0; i < npages; ++i)
1959 { 2005 {
1960 // Adjust delta entries 2006 // Adjust delta entries
1961 - assert(phe.at(i).delta_nobjects >= min_nobjects);  
1962 - assert(phe.at(i).delta_page_length >= min_length); 2007 + if ((phe.at(i).delta_nobjects < min_nobjects) ||
  2008 + (phe.at(i).delta_page_length < min_length))
  2009 + {
  2010 + stopOnError("found too small delta nobjects or delta page length"
  2011 + " while writing linearization data");
  2012 + }
1963 phe.at(i).delta_nobjects -= min_nobjects; 2013 phe.at(i).delta_nobjects -= min_nobjects;
1964 phe.at(i).delta_page_length -= min_length; 2014 phe.at(i).delta_page_length -= min_length;
1965 phe.at(i).delta_content_length = phe.at(i).delta_page_length; 2015 phe.at(i).delta_content_length = phe.at(i).delta_page_length;
@@ -2019,7 +2069,11 @@ QPDF::calculateHSharedObject( @@ -2019,7 +2069,11 @@ QPDF::calculateHSharedObject(
2019 for (int i = 0; i < cso.nshared_total; ++i) 2069 for (int i = 0; i < cso.nshared_total; ++i)
2020 { 2070 {
2021 // Adjust deltas 2071 // Adjust deltas
2022 - assert(soe.at(i).delta_group_length >= min_length); 2072 + if (soe.at(i).delta_group_length < min_length)
  2073 + {
  2074 + stopOnError("found too small group length while"
  2075 + " writing linearization data");
  2076 + }
2023 soe.at(i).delta_group_length -= min_length; 2077 soe.at(i).delta_group_length -= min_length;
2024 } 2078 }
2025 } 2079 }
@@ -2158,7 +2212,11 @@ QPDF::writeHSharedObject(BitWriter&amp; w) @@ -2158,7 +2212,11 @@ QPDF::writeHSharedObject(BitWriter&amp; w)
2158 for (int i = 0; i < nitems; ++i) 2212 for (int i = 0; i < nitems; ++i)
2159 { 2213 {
2160 // If signature were present, we'd have to write a 128-bit hash. 2214 // If signature were present, we'd have to write a 128-bit hash.
2161 - assert(entries.at(i).signature_present == 0); 2215 + if (entries.at(i).signature_present != 0)
  2216 + {
  2217 + stopOnError("found unexpected signature present"
  2218 + " while writing linearization data");
  2219 + }
2162 } 2220 }
2163 write_vector_int(w, nitems, entries, 2221 write_vector_int(w, nitems, entries,
2164 t.nbits_nobjects, 2222 t.nbits_nobjects,
libqpdf/QPDF_optimization.cc
@@ -163,7 +163,12 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) @@ -163,7 +163,12 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
163 pushInheritedAttributesToPageInternal( 163 pushInheritedAttributesToPageInternal(
164 this->m->trailer.getKey("/Root").getKey("/Pages"), 164 this->m->trailer.getKey("/Root").getKey("/Pages"),
165 key_ancestors, this->m->all_pages, allow_changes, warn_skipped_keys); 165 key_ancestors, this->m->all_pages, allow_changes, warn_skipped_keys);
166 - assert(key_ancestors.empty()); 166 + if (! key_ancestors.empty())
  167 + {
  168 + throw std::logic_error(
  169 + "key_ancestors not empty after"
  170 + " pushing inherited attributes to pages");
  171 + }
167 this->m->pushed_inherited_attributes_to_pages = true; 172 this->m->pushed_inherited_attributes_to_pages = true;
168 } 173 }
169 174