Commit 837dcf8fc2546a80f205a0c4c53e5a1545c53a84

Authored by Jay Berkenbilt
1 parent a0135918

Don't call assert while checking linearization data (fixes #209, #231)

Instead of calling assert for problems found during checking
linearization data, throw an exception which is later caught and
issued as an error. Ideally we would handle errors more robustly, but
this is still a significant improvement.
ChangeLog
1 1 2019-01-04 Jay Berkenbilt <ejb@ql.org>
2 2  
  3 + * When unexpected errors are found while checking linearization
  4 + data, print an error message instead of calling assert, which
  5 + cause the program to crash. Fixes #209, #231.
  6 +
3 7 * Detect and recover from dangling references. If a PDF file
4 8 contained an indirect reference to a non-existent object (which is
5 9 valid), when adding a new object to the file, it was possible for
... ...
... ... @@ -391,4 +391,9 @@ I find it useful to make reference to them in this list
391 391 hint stream contents. Consider adding on option to provide a
392 392 human-readable dump of linearization hint tables. This should
393 393 include improving the 'overflow reading bit stream' message as
394   - reported in issue #2.
  394 + reported in issue #2. There are multiple calls to stopOnError in
  395 + the linearization checking code. Ideally, these should not
  396 + terminate checking. It would require re-acquiring an understanding
  397 + of all that code to make the checks more robust. In particular,
  398 + it's hard to look at the code and quickly determine what is a true
  399 + logic error and what could happen because of malformed user input.
... ...
include/qpdf/QPDF.hh
... ... @@ -726,6 +726,7 @@ class QPDF
726 726 PointerHolder<QPDFObject> resolve(int objid, int generation);
727 727 void resolveObjectsInStream(int obj_stream_number);
728 728 void findAttachmentStreams();
  729 + void stopOnError(std::string const& message);
729 730  
730 731 // Calls finish() on the pipeline when done but does not delete it
731 732 bool pipeStreamData(int objid, int generation,
... ...
libqpdf/QPDF.cc
... ... @@ -2609,3 +2609,13 @@ QPDF::findAttachmentStreams()
2609 2609 }
2610 2610 }
2611 2611 }
  2612 +
  2613 +void
  2614 +QPDF::stopOnError(std::string const& message)
  2615 +{
  2616 + // Throw a generic exception when we lack context for something
  2617 + // more specific. New code should not use this. This method exists
  2618 + // to improve somewhat from calling assert in very old code.
  2619 + throw QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),
  2620 + "", this->m->file->getLastOffset(), message);
  2621 +}
... ...
libqpdf/QPDF_linearization.cc
... ... @@ -675,14 +675,20 @@ QPDF::checkLinearizationInternal()
675 675 qpdf_offset_t
676 676 QPDF::maxEnd(ObjUser const& ou)
677 677 {
678   - assert(this->m->obj_user_to_objects.count(ou) > 0);
  678 + if (this->m->obj_user_to_objects.count(ou) == 0)
  679 + {
  680 + stopOnError("no entry in object user table for requested object user");
  681 + }
679 682 std::set<QPDFObjGen> const& ogs = this->m->obj_user_to_objects[ou];
680 683 qpdf_offset_t end = 0;
681 684 for (std::set<QPDFObjGen>::const_iterator iter = ogs.begin();
682 685 iter != ogs.end(); ++iter)
683 686 {
684 687 QPDFObjGen const& og = *iter;
685   - assert(this->m->obj_cache.count(og) > 0);
  688 + if (this->m->obj_cache.count(og) == 0)
  689 + {
  690 + stopOnError("unknown object referenced in object user table");
  691 + }
686 692 end = std::max(end, this->m->obj_cache[og].end_after_space);
687 693 }
688 694 return end;
... ... @@ -745,7 +751,11 @@ QPDF::lengthNextN(int first_object, int n,
745 751 }
746 752 else
747 753 {
748   - assert(this->m->obj_cache.count(og) > 0);
  754 + if (this->m->obj_cache.count(og) == 0)
  755 + {
  756 + stopOnError("found unknown object while"
  757 + " calculating length for linearization data");
  758 + }
749 759 length += this->m->obj_cache[og].end_after_space -
750 760 getLinearizationOffset(og);
751 761 }
... ... @@ -780,7 +790,10 @@ QPDF::checkHPageOffset(std::list&lt;std::string&gt;&amp; errors,
780 790 int table_offset = adjusted_offset(
781 791 this->m->page_offset_hints.first_page_offset);
782 792 QPDFObjGen first_page_og(pages.at(0).getObjGen());
783   - assert(this->m->xref_table.count(first_page_og) > 0);
  793 + if (this->m->xref_table.count(first_page_og) == 0)
  794 + {
  795 + stopOnError("supposed first page object is not known");
  796 + }
784 797 int offset = getLinearizationOffset(first_page_og);
785 798 if (table_offset != offset)
786 799 {
... ... @@ -791,7 +804,10 @@ QPDF::checkHPageOffset(std::list&lt;std::string&gt;&amp; errors,
791 804 {
792 805 QPDFObjGen page_og(pages.at(pageno).getObjGen());
793 806 int first_object = page_og.getObj();
794   - assert(this->m->xref_table.count(page_og) > 0);
  807 + if (this->m->xref_table.count(page_og) == 0)
  808 + {
  809 + stopOnError("unknown object in page offset hint table");
  810 + }
795 811 offset = getLinearizationOffset(page_og);
796 812  
797 813 HPageOffsetEntry& he = this->m->page_offset_hints.entries.at(pageno);
... ... @@ -955,7 +971,10 @@ QPDF::checkHSharedObject(std::list&lt;std::string&gt;&amp; errors,
955 971 cur_object = so.first_shared_obj;
956 972  
957 973 QPDFObjGen og(cur_object, 0);
958   - assert(this->m->xref_table.count(og) > 0);
  974 + if (this->m->xref_table.count(og) == 0)
  975 + {
  976 + stopOnError("unknown object in shared object hint table");
  977 + }
959 978 int offset = getLinearizationOffset(og);
960 979 int h_offset = adjusted_offset(so.first_shared_offset);
961 980 if (offset != h_offset)
... ... @@ -1018,7 +1037,10 @@ QPDF::checkHOutlines(std::list&lt;std::string&gt;&amp; warnings)
1018 1037 return;
1019 1038 }
1020 1039 QPDFObjGen og(outlines.getObjGen());
1021   - assert(this->m->xref_table.count(og) > 0);
  1040 + if (this->m->xref_table.count(og) == 0)
  1041 + {
  1042 + stopOnError("unknown object in outlines hint table");
  1043 + }
1022 1044 int offset = getLinearizationOffset(og);
1023 1045 ObjUser ou(ObjUser::ou_root_key, "/Outlines");
1024 1046 int length = maxEnd(ou) - offset;
... ... @@ -1513,7 +1535,11 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1513 1535  
1514 1536 // Part 4: open document objects. We don't care about the order.
1515 1537  
1516   - assert(lc_root.size() == 1);
  1538 + if (lc_root.size() != 1)
  1539 + {
  1540 + stopOnError("found other than one root while"
  1541 + " calculating linearization data");
  1542 + }
1517 1543 this->m->part4.push_back(objGenToIndirect(*(lc_root.begin())));
1518 1544 for (std::set<QPDFObjGen>::iterator iter = lc_open_document.begin();
1519 1545 iter != lc_open_document.end(); ++iter)
... ... @@ -1594,7 +1620,11 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1594 1620 this->m->c_page_offset_data.entries.at(i).nobjects = 1;
1595 1621  
1596 1622 ObjUser ou(ObjUser::ou_page, i);
1597   - assert(this->m->obj_user_to_objects.count(ou) > 0);
  1623 + if (this->m->obj_user_to_objects.count(ou) == 0)
  1624 + {
  1625 + stopOnError("found unreferenced page while"
  1626 + " calculating linearization data");
  1627 + }
1598 1628 std::set<QPDFObjGen> ogs = this->m->obj_user_to_objects[ou];
1599 1629 for (std::set<QPDFObjGen>::iterator iter = ogs.begin();
1600 1630 iter != ogs.end(); ++iter)
... ... @@ -1638,7 +1668,11 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1638 1668 // Place the pages tree.
1639 1669 std::set<QPDFObjGen> pages_ogs =
1640 1670 this->m->obj_user_to_objects[ObjUser(ObjUser::ou_root_key, "/Pages")];
1641   - assert(! pages_ogs.empty());
  1671 + if (pages_ogs.empty())
  1672 + {
  1673 + stopOnError("found empty pages tree while"
  1674 + " calculating linearization data");
  1675 + }
1642 1676 for (std::set<QPDFObjGen>::iterator iter = pages_ogs.begin();
1643 1677 iter != pages_ogs.end(); ++iter)
1644 1678 {
... ... @@ -1790,7 +1824,11 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1790 1824 {
1791 1825 CHPageOffsetEntry& pe = this->m->c_page_offset_data.entries.at(i);
1792 1826 ObjUser ou(ObjUser::ou_page, i);
1793   - assert(this->m->obj_user_to_objects.count(ou) > 0);
  1827 + if (this->m->obj_user_to_objects.count(ou) == 0)
  1828 + {
  1829 + stopOnError("found unreferenced page while"
  1830 + " calculating linearization data");
  1831 + }
1794 1832 std::set<QPDFObjGen> const& ogs = this->m->obj_user_to_objects[ou];
1795 1833 for (std::set<QPDFObjGen>::const_iterator iter = ogs.begin();
1796 1834 iter != ogs.end(); ++iter)
... ... @@ -1869,12 +1907,20 @@ QPDF::outputLengthNextN(
1869 1907 // the output file starting with whatever object in_object from
1870 1908 // the input file mapped to.
1871 1909  
1872   - assert(obj_renumber.count(in_object) > 0);
  1910 + if (obj_renumber.count(in_object) == 0)
  1911 + {
  1912 + stopOnError("found object that is not renumbered while"
  1913 + " writing linearization data");
  1914 + }
1873 1915 int first = (*(obj_renumber.find(in_object))).second;
1874 1916 int length = 0;
1875 1917 for (int i = 0; i < n; ++i)
1876 1918 {
1877   - assert(lengths.count(first + i) > 0);
  1919 + if (lengths.count(first + i) == 0)
  1920 + {
  1921 + stopOnError("found item with unknown length"
  1922 + " while writing linearization data");
  1923 + }
1878 1924 length += (*(lengths.find(first + i))).second;
1879 1925 }
1880 1926 return length;
... ... @@ -1958,8 +2004,12 @@ QPDF::calculateHPageOffset(
1958 2004 for (unsigned int i = 0; i < npages; ++i)
1959 2005 {
1960 2006 // Adjust delta entries
1961   - assert(phe.at(i).delta_nobjects >= min_nobjects);
1962   - assert(phe.at(i).delta_page_length >= min_length);
  2007 + if ((phe.at(i).delta_nobjects < min_nobjects) ||
  2008 + (phe.at(i).delta_page_length < min_length))
  2009 + {
  2010 + stopOnError("found too small delta nobjects or delta page length"
  2011 + " while writing linearization data");
  2012 + }
1963 2013 phe.at(i).delta_nobjects -= min_nobjects;
1964 2014 phe.at(i).delta_page_length -= min_length;
1965 2015 phe.at(i).delta_content_length = phe.at(i).delta_page_length;
... ... @@ -2019,7 +2069,11 @@ QPDF::calculateHSharedObject(
2019 2069 for (int i = 0; i < cso.nshared_total; ++i)
2020 2070 {
2021 2071 // Adjust deltas
2022   - assert(soe.at(i).delta_group_length >= min_length);
  2072 + if (soe.at(i).delta_group_length < min_length)
  2073 + {
  2074 + stopOnError("found too small group length while"
  2075 + " writing linearization data");
  2076 + }
2023 2077 soe.at(i).delta_group_length -= min_length;
2024 2078 }
2025 2079 }
... ... @@ -2158,7 +2212,11 @@ QPDF::writeHSharedObject(BitWriter&amp; w)
2158 2212 for (int i = 0; i < nitems; ++i)
2159 2213 {
2160 2214 // If signature were present, we'd have to write a 128-bit hash.
2161   - assert(entries.at(i).signature_present == 0);
  2215 + if (entries.at(i).signature_present != 0)
  2216 + {
  2217 + stopOnError("found unexpected signature present"
  2218 + " while writing linearization data");
  2219 + }
2162 2220 }
2163 2221 write_vector_int(w, nitems, entries,
2164 2222 t.nbits_nobjects,
... ...
libqpdf/QPDF_optimization.cc
... ... @@ -163,7 +163,12 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
163 163 pushInheritedAttributesToPageInternal(
164 164 this->m->trailer.getKey("/Root").getKey("/Pages"),
165 165 key_ancestors, this->m->all_pages, allow_changes, warn_skipped_keys);
166   - assert(key_ancestors.empty());
  166 + if (! key_ancestors.empty())
  167 + {
  168 + throw std::logic_error(
  169 + "key_ancestors not empty after"
  170 + " pushing inherited attributes to pages");
  171 + }
167 172 this->m->pushed_inherited_attributes_to_pages = true;
168 173 }
169 174  
... ...