Commit b07ad6794eea175ee7f4b8c505995bdb4f397ce6

Authored by Jay Berkenbilt
1 parent a35d4ce9

Fix bugs found by fuzz tests

* Several assertions in linearization were not always true; change
  them to run time errors
* Handle a few cases of uninitialized objects
* Handle pages with no contents when doing form operations
* Handle invalid page tree nodes when traversing pages
1 -OSS-Fuzz  
2 -========  
3 -  
4 -* Fix open issues from https://oss-fuzz.com  
5 -  
6 Next ABI 1 Next ABI
7 ======== 2 ========
8 3
libqpdf/QPDFFormFieldObjectHelper.cc
@@ -43,6 +43,10 @@ QPDFObjectHandle @@ -43,6 +43,10 @@ QPDFObjectHandle
43 QPDFFormFieldObjectHelper::getInheritableFieldValue(std::string const& name) 43 QPDFFormFieldObjectHelper::getInheritableFieldValue(std::string const& name)
44 { 44 {
45 QPDFObjectHandle node = this->oh; 45 QPDFObjectHandle node = this->oh;
  46 + if (! node.isDictionary())
  47 + {
  48 + return QPDFObjectHandle::newNull();
  49 + }
46 QPDFObjectHandle result(node.getKey(name)); 50 QPDFObjectHandle result(node.getKey(name));
47 std::set<QPDFObjGen> seen; 51 std::set<QPDFObjGen> seen;
48 while (result.isNull() && node.hasKey("/Parent")) 52 while (result.isNull() && node.hasKey("/Parent"))
@@ -896,7 +900,8 @@ QPDFFormFieldObjectHelper::generateTextAppearance( @@ -896,7 +900,8 @@ QPDFFormFieldObjectHelper::generateTextAppearance(
896 QPDFObjectHandle dr = getInheritableFieldValue("/DR"); 900 QPDFObjectHandle dr = getInheritableFieldValue("/DR");
897 font = getFontFromResource(dr, font_name); 901 font = getFontFromResource(dr, font_name);
898 } 902 }
899 - if (font.isDictionary() && 903 + if (font.isInitialized() &&
  904 + font.isDictionary() &&
900 font.getKey("/Encoding").isName()) 905 font.getKey("/Encoding").isName())
901 { 906 {
902 std::string encoding = font.getKey("/Encoding").getName(); 907 std::string encoding = font.getKey("/Encoding").getName();
libqpdf/QPDFObjectHandle.cc
@@ -1407,6 +1407,12 @@ QPDFObjectHandle::coalesceContentStreams() @@ -1407,6 +1407,12 @@ QPDFObjectHandle::coalesceContentStreams()
1407 QTC::TC("qpdf", "QPDFObjectHandle coalesce called on stream"); 1407 QTC::TC("qpdf", "QPDFObjectHandle coalesce called on stream");
1408 return; 1408 return;
1409 } 1409 }
  1410 + else if (! contents.isArray())
  1411 + {
  1412 + // /Contents is optional for pages, and some very damaged
  1413 + // files may have pages that are invalid in other ways.
  1414 + return;
  1415 + }
1410 QPDF* qpdf = getOwningQPDF(); 1416 QPDF* qpdf = getOwningQPDF();
1411 if (qpdf == 0) 1417 if (qpdf == 0)
1412 { 1418 {
libqpdf/QPDFWriter.cc
@@ -851,9 +851,11 @@ QPDFWriter::parseVersion(std::string const&amp; version, @@ -851,9 +851,11 @@ QPDFWriter::parseVersion(std::string const&amp; version,
851 QUtil::int_to_string(minor); 851 QUtil::int_to_string(minor);
852 if (tmp != version) 852 if (tmp != version)
853 { 853 {
854 - throw std::logic_error(  
855 - "INTERNAL ERROR: QPDFWriter::parseVersion"  
856 - " called with invalid version number " + version); 854 + // The version number in the input is probably invalid. This
  855 + // happens with some files that are designed to exercise bugs,
  856 + // such as files in the fuzzer corpus. Unfortunately
  857 + // QPDFWriter doesn't have a way to give a warning, so we just
  858 + // ignore this case.
857 } 859 }
858 } 860 }
859 861
@@ -3013,7 +3015,7 @@ QPDFWriter::discardGeneration(std::map&lt;QPDFObjGen, int&gt; const&amp; in, @@ -3013,7 +3015,7 @@ QPDFWriter::discardGeneration(std::map&lt;QPDFObjGen, int&gt; const&amp; in,
3013 { 3015 {
3014 if (out.count((*iter).first.getObj())) 3016 if (out.count((*iter).first.getObj()))
3015 { 3017 {
3016 - throw std::logic_error( 3018 + throw std::runtime_error(
3017 "QPDF cannot currently linearize files that contain" 3019 "QPDF cannot currently linearize files that contain"
3018 " multiple objects with the same object ID and different" 3020 " multiple objects with the same object ID and different"
3019 " generations. If you see this error message, please file" 3021 " generations. If you see this error message, please file"
@@ -3130,15 +3132,33 @@ QPDFWriter::writeLinearized() @@ -3130,15 +3132,33 @@ QPDFWriter::writeLinearized()
3130 3132
3131 this->m->next_objid = part4_first_obj; 3133 this->m->next_objid = part4_first_obj;
3132 enqueuePart(part4); 3134 enqueuePart(part4);
3133 - assert(this->m->next_objid == after_part4); 3135 + if (this->m->next_objid != after_part4)
  3136 + {
  3137 + // This can happen with very botched files as in the fuzzer
  3138 + // test. There are likely some faulty assumptions in
  3139 + // calculateLinearizationData
  3140 + throw std::runtime_error(
  3141 + "error encountered after"
  3142 + " writing part 4 of linearized data");
  3143 + }
3134 this->m->next_objid = part6_first_obj; 3144 this->m->next_objid = part6_first_obj;
3135 enqueuePart(part6); 3145 enqueuePart(part6);
3136 - assert(this->m->next_objid == after_part6); 3146 + if (this->m->next_objid != after_part6)
  3147 + {
  3148 + throw std::runtime_error(
  3149 + "error encountered after"
  3150 + " writing part 6 of linearized data");
  3151 + }
3137 this->m->next_objid = second_half_first_obj; 3152 this->m->next_objid = second_half_first_obj;
3138 enqueuePart(part7); 3153 enqueuePart(part7);
3139 enqueuePart(part8); 3154 enqueuePart(part8);
3140 enqueuePart(part9); 3155 enqueuePart(part9);
3141 - assert(this->m->next_objid == after_second_half); 3156 + if (this->m->next_objid != after_second_half)
  3157 + {
  3158 + throw std::runtime_error(
  3159 + "error encountered after"
  3160 + " writing part 9 of linearized data");
  3161 + }
3142 3162
3143 qpdf_offset_t hint_length = 0; 3163 qpdf_offset_t hint_length = 0;
3144 PointerHolder<Buffer> hint_buffer; 3164 PointerHolder<Buffer> hint_buffer;
libqpdf/QPDF_linearization.cc
@@ -612,7 +612,7 @@ QPDF::checkLinearizationInternal() @@ -612,7 +612,7 @@ QPDF::checkLinearizationInternal()
612 612
613 if (this->m->part6.empty()) 613 if (this->m->part6.empty())
614 { 614 {
615 - throw std::logic_error("linearization part 6 unexpectedly empty"); 615 + stopOnError("linearization part 6 unexpectedly empty");
616 } 616 }
617 qpdf_offset_t min_E = -1; 617 qpdf_offset_t min_E = -1;
618 qpdf_offset_t max_E = -1; 618 qpdf_offset_t max_E = -1;
@@ -714,7 +714,7 @@ QPDF::getLinearizationOffset(QPDFObjGen const&amp; og) @@ -714,7 +714,7 @@ QPDF::getLinearizationOffset(QPDFObjGen const&amp; og)
714 break; 714 break;
715 715
716 default: 716 default:
717 - throw std::logic_error( 717 + stopOnError(
718 "getLinearizationOffset called for xref entry not of type 1 or 2"); 718 "getLinearizationOffset called for xref entry not of type 1 or 2");
719 break; 719 break;
720 } 720 }
@@ -862,7 +862,7 @@ QPDF::checkHPageOffset(std::list&lt;std::string&gt;&amp; errors, @@ -862,7 +862,7 @@ QPDF::checkHPageOffset(std::list&lt;std::string&gt;&amp; errors,
862 int idx = he.shared_identifiers.at(i); 862 int idx = he.shared_identifiers.at(i);
863 if (shared_idx_to_obj.count(idx) == 0) 863 if (shared_idx_to_obj.count(idx) == 0)
864 { 864 {
865 - throw std::logic_error( 865 + stopOnError(
866 "unable to get object for item in" 866 "unable to get object for item in"
867 " shared objects hint table"); 867 " shared objects hint table");
868 } 868 }
@@ -874,7 +874,7 @@ QPDF::checkHPageOffset(std::list&lt;std::string&gt;&amp; errors, @@ -874,7 +874,7 @@ QPDF::checkHPageOffset(std::list&lt;std::string&gt;&amp; errors,
874 int idx = ce.shared_identifiers.at(i); 874 int idx = ce.shared_identifiers.at(i);
875 if (idx >= this->m->c_shared_object_data.nshared_total) 875 if (idx >= this->m->c_shared_object_data.nshared_total)
876 { 876 {
877 - throw std::logic_error( 877 + stopOnError(
878 "index out of bounds for shared object hint table"); 878 "index out of bounds for shared object hint table");
879 } 879 }
880 int obj = this->m->c_shared_object_data.entries.at(toS(idx)).object; 880 int obj = this->m->c_shared_object_data.entries.at(toS(idx)).object;
@@ -1444,7 +1444,7 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1444,7 +1444,7 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1444 break; 1444 break;
1445 1445
1446 case ObjUser::ou_bad: 1446 case ObjUser::ou_bad:
1447 - throw std::logic_error( 1447 + stopOnError(
1448 "INTERNAL ERROR: QPDF::calculateLinearizationData: " 1448 "INTERNAL ERROR: QPDF::calculateLinearizationData: "
1449 "invalid user type"); 1449 "invalid user type");
1450 break; 1450 break;
@@ -1558,10 +1558,14 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1558,10 +1558,14 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1558 // will do the same. 1558 // will do the same.
1559 1559
1560 // First, place the actual first page object itself. 1560 // First, place the actual first page object itself.
  1561 + if (pages.empty())
  1562 + {
  1563 + stopOnError("no pages found while calculating linearization data");
  1564 + }
1561 QPDFObjGen first_page_og(pages.at(0).getObjGen()); 1565 QPDFObjGen first_page_og(pages.at(0).getObjGen());
1562 if (! lc_first_page_private.count(first_page_og)) 1566 if (! lc_first_page_private.count(first_page_og))
1563 { 1567 {
1564 - throw std::logic_error( 1568 + stopOnError(
1565 "INTERNAL ERROR: QPDF::calculateLinearizationData: first page " 1569 "INTERNAL ERROR: QPDF::calculateLinearizationData: first page "
1566 "object not in lc_first_page_private"); 1570 "object not in lc_first_page_private");
1567 } 1571 }
@@ -1611,7 +1615,7 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1611,7 +1615,7 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1611 QPDFObjGen page_og(pages.at(i).getObjGen()); 1615 QPDFObjGen page_og(pages.at(i).getObjGen());
1612 if (! lc_other_page_private.count(page_og)) 1616 if (! lc_other_page_private.count(page_og))
1613 { 1617 {
1614 - throw std::logic_error( 1618 + stopOnError(
1615 "INTERNAL ERROR: " 1619 "INTERNAL ERROR: "
1616 "QPDF::calculateLinearizationData: page object for page " + 1620 "QPDF::calculateLinearizationData: page object for page " +
1617 QUtil::uint_to_string(i) + " not in lc_other_page_private"); 1621 QUtil::uint_to_string(i) + " not in lc_other_page_private");
@@ -1646,7 +1650,7 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1646,7 +1650,7 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1646 // That should have covered all part7 objects. 1650 // That should have covered all part7 objects.
1647 if (! lc_other_page_private.empty()) 1651 if (! lc_other_page_private.empty())
1648 { 1652 {
1649 - throw std::logic_error( 1653 + stopOnError(
1650 "INTERNAL ERROR:" 1654 "INTERNAL ERROR:"
1651 " QPDF::calculateLinearizationData: lc_other_page_private is " 1655 " QPDF::calculateLinearizationData: lc_other_page_private is "
1652 "not empty after generation of part7"); 1656 "not empty after generation of part7");
@@ -1731,7 +1735,7 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1731,7 +1735,7 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1731 } 1735 }
1732 if (! lc_thumbnail_private.empty()) 1736 if (! lc_thumbnail_private.empty())
1733 { 1737 {
1734 - throw std::logic_error( 1738 + stopOnError(
1735 "INTERNAL ERROR: " 1739 "INTERNAL ERROR: "
1736 "QPDF::calculateLinearizationData: lc_thumbnail_private " 1740 "QPDF::calculateLinearizationData: lc_thumbnail_private "
1737 "not empty after placing thumbnails"); 1741 "not empty after placing thumbnails");
@@ -1765,7 +1769,7 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1765,7 +1769,7 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1765 size_t num_wanted = this->m->object_to_obj_users.size(); 1769 size_t num_wanted = this->m->object_to_obj_users.size();
1766 if (num_placed != num_wanted) 1770 if (num_placed != num_wanted)
1767 { 1771 {
1768 - throw std::logic_error( 1772 + stopOnError(
1769 "INTERNAL ERROR: QPDF::calculateLinearizationData: wrong " 1773 "INTERNAL ERROR: QPDF::calculateLinearizationData: wrong "
1770 "number of objects placed (num_placed = " + 1774 "number of objects placed (num_placed = " +
1771 QUtil::uint_to_string(num_placed) + 1775 QUtil::uint_to_string(num_placed) +
@@ -1820,7 +1824,7 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1820,7 +1824,7 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1820 if (static_cast<size_t>(this->m->c_shared_object_data.nshared_total) != 1824 if (static_cast<size_t>(this->m->c_shared_object_data.nshared_total) !=
1821 this->m->c_shared_object_data.entries.size()) 1825 this->m->c_shared_object_data.entries.size())
1822 { 1826 {
1823 - throw std::logic_error( 1827 + stopOnError(
1824 "shared object hint table has wrong number of entries"); 1828 "shared object hint table has wrong number of entries");
1825 } 1829 }
1826 1830
@@ -2058,7 +2062,7 @@ QPDF::calculateHSharedObject( @@ -2058,7 +2062,7 @@ QPDF::calculateHSharedObject(
2058 } 2062 }
2059 if (soe.size() != static_cast<size_t>(cso.nshared_total)) 2063 if (soe.size() != static_cast<size_t>(cso.nshared_total))
2060 { 2064 {
2061 - throw std::logic_error("soe has wrong size after initialization"); 2065 + stopOnError("soe has wrong size after initialization");
2062 } 2066 }
2063 2067
2064 so.nshared_total = cso.nshared_total; 2068 so.nshared_total = cso.nshared_total;
libqpdf/QPDF_optimization.cc
@@ -195,6 +195,14 @@ QPDF::pushInheritedAttributesToPageInternal( @@ -195,6 +195,14 @@ QPDF::pushInheritedAttributesToPageInternal(
195 } 195 }
196 visited.insert(this_og); 196 visited.insert(this_og);
197 197
  198 + if (! cur_pages.isDictionary())
  199 + {
  200 + throw QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),
  201 + this->m->last_object_description,
  202 + this->m->file->getLastOffset(),
  203 + "invalid object in page tree");
  204 + }
  205 +
198 // Extract the underlying dictionary object 206 // Extract the underlying dictionary object
199 std::string type = cur_pages.getKey("/Type").getName(); 207 std::string type = cur_pages.getKey("/Type").getName();
200 208
qpdf/qtest/qpdf/no-contents-coalesce-contents.pdf
@@ -7,21 +7,15 @@ endobj @@ -7,21 +7,15 @@ endobj
7 << /Count 1 /Kids [ 3 0 R ] /Type /Pages >> 7 << /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
8 endobj 8 endobj
9 3 0 obj 9 3 0 obj
10 -<< /Contents 4 0 R /MediaBox [ 0 0 720 720 ] /Parent 2 0 R /Resources << >> /Type /Page >>  
11 -endobj  
12 -4 0 obj  
13 -<< /Length 0 /Filter /FlateDecode >>  
14 -stream  
15 -endstream 10 +<< /MediaBox [ 0 0 720 720 ] /Parent 2 0 R /Resources << >> /Type /Page >>
16 endobj 11 endobj
17 xref 12 xref
18 -0 5 13 +0 4
19 0000000000 65535 f 14 0000000000 65535 f
20 0000000015 00000 n 15 0000000015 00000 n
21 0000000064 00000 n 16 0000000064 00000 n
22 0000000123 00000 n 17 0000000123 00000 n
23 -0000000229 00000 n  
24 -trailer << /Root 1 0 R /Size 5 /ID [<52bba3c78160d0c6e851b59110e5d076><31415926535897932384626433832795>] >> 18 +trailer << /Root 1 0 R /Size 4 /ID [<52bba3c78160d0c6e851b59110e5d076><31415926535897932384626433832795>] >>
25 startxref 19 startxref
26 -298 20 +213
27 %%EOF 21 %%EOF