Commit cf469d789024cdda41684f1ea48b41829b98c242

Authored by Jay Berkenbilt
1 parent cd830968

Give up reading objects with too many consecutive errors

ChangeLog
  1 +2019-06-15 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * When parsing files, while reading an object, if there are too
  4 + many consecutive errors without enough intervening successes, give
  5 + up on the specific object. This reduces cases in which very badly
  6 + damaged files send qpdf into a tail spin reading one character at
  7 + a time and reporting warnings.
  8 +
1 9 2019-06-13 Jay Berkenbilt <ejb@ql.org>
2 10  
3 11 * Perform initial integration of Google's oss-fuzz project by
... ...
libqpdf/QPDFObjectHandle.cc
... ... @@ -1628,8 +1628,11 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input,
1628 1628 qpdf_offset_t offset = input->tell();
1629 1629 offset_stack.push_back(offset);
1630 1630 bool done = false;
  1631 + int bad_count = 0;
  1632 + int good_count = 0;
1631 1633 while (! done)
1632 1634 {
  1635 + bool bad = false;
1633 1636 std::vector<QPDFObjectHandle>& olist = olist_stack.back();
1634 1637 parser_state_e state = state_stack.back();
1635 1638 offset = offset_stack.back();
... ... @@ -1651,6 +1654,7 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input,
1651 1654 input->getLastOffset(),
1652 1655 "unexpected EOF"));
1653 1656 }
  1657 + bad = true;
1654 1658 state = st_eof;
1655 1659 break;
1656 1660  
... ... @@ -1661,6 +1665,7 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input,
1661 1665 object_description,
1662 1666 input->getLastOffset(),
1663 1667 token.getErrorMessage()));
  1668 + bad = true;
1664 1669 object = newNull();
1665 1670 break;
1666 1671  
... ... @@ -1672,6 +1677,7 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input,
1672 1677 object_description,
1673 1678 input->getLastOffset(),
1674 1679 "treating unexpected brace token as null"));
  1680 + bad = true;
1675 1681 object = newNull();
1676 1682 break;
1677 1683  
... ... @@ -1688,6 +1694,7 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input,
1688 1694 object_description,
1689 1695 input->getLastOffset(),
1690 1696 "treating unexpected array close token as null"));
  1697 + bad = true;
1691 1698 object = newNull();
1692 1699 }
1693 1700 break;
... ... @@ -1705,6 +1712,7 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input,
1705 1712 object_description,
1706 1713 input->getLastOffset(),
1707 1714 "unexpected dictionary close token"));
  1715 + bad = true;
1708 1716 object = newNull();
1709 1717 }
1710 1718 break;
... ... @@ -1719,6 +1727,7 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input,
1719 1727 object_description,
1720 1728 input->getLastOffset(),
1721 1729 "ignoring excessively deeply nested data structure"));
  1730 + bad = true;
1722 1731 object = newNull();
1723 1732 state = st_top;
1724 1733 }
... ... @@ -1800,6 +1809,7 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input,
1800 1809 input->getLastOffset(),
1801 1810 "unknown token while reading object;"
1802 1811 " treating as string"));
  1812 + bad = true;
1803 1813 object = newString(value);
1804 1814 }
1805 1815 }
... ... @@ -1824,6 +1834,7 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input,
1824 1834 input->getLastOffset(),
1825 1835 "treating unknown token type as null while "
1826 1836 "reading object"));
  1837 + bad = true;
1827 1838 object = newNull();
1828 1839 break;
1829 1840 }
... ... @@ -1839,6 +1850,32 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input,
1839 1850 object = newNull();
1840 1851 }
1841 1852  
  1853 + if (bad)
  1854 + {
  1855 + ++bad_count;
  1856 + good_count = 0;
  1857 + }
  1858 + else
  1859 + {
  1860 + ++good_count;
  1861 + if (good_count > 3)
  1862 + {
  1863 + bad_count = 0;
  1864 + }
  1865 + }
  1866 + if (bad_count > 5)
  1867 + {
  1868 + // We had too many consecutive errors without enough
  1869 + // intervening successful objects. Give up.
  1870 + warn(context,
  1871 + QPDFExc(qpdf_e_damaged_pdf, input->getName(),
  1872 + object_description,
  1873 + input->getLastOffset(),
  1874 + "too many errors; giving up on reading object"));
  1875 + state = st_top;
  1876 + object = newNull();
  1877 + }
  1878 +
1842 1879 switch (state)
1843 1880 {
1844 1881 case st_eof:
... ...
qpdf/qtest/qpdf.test
... ... @@ -624,7 +624,7 @@ my @bug_tests = (
624 624 ["99", "object 0", 2],
625 625 ["99b", "object 0", 2],
626 626 ["100", "xref reconstruction loop", 2],
627   - ["101", "resolve for exception text", 3],
  627 + ["101", "resolve for exception text", 2],
628 628 ["117", "other infinite loop", 2],
629 629 ["118", "other infinite loop", 2],
630 630 ["119", "other infinite loop", 3],
... ... @@ -639,7 +639,7 @@ my @bug_tests = (
639 639 ["149", "xref prev pointer loop", 3],
640 640 ["150", "integer overflow", 2],
641 641 ["202", "even more deeply nested dictionary", 2],
642   - ["263", "empty xref stream", 3],
  642 + ["263", "empty xref stream", 2],
643 643 );
644 644 $n_tests += scalar(@bug_tests);
645 645 foreach my $d (@bug_tests)
... ...
qpdf/qtest/qpdf/issue-100.out
... ... @@ -7,10 +7,8 @@ WARNING: issue-100.pdf (object 5 0, offset 289): unknown token while reading obj
7 7 WARNING: issue-100.pdf (object 5 0, offset 294): unknown token while reading object; treating as string
8 8 WARNING: issue-100.pdf (object 5 0, offset 297): unknown token while reading object; treating as string
9 9 WARNING: issue-100.pdf (object 5 0, offset 304): unknown token while reading object; treating as string
10   -WARNING: issue-100.pdf (object 5 0, offset 308): unexpected )
11   -WARNING: issue-100.pdf (object 5 0, offset 316): treating unexpected array close token as null
12   -WARNING: issue-100.pdf (object 5 0, offset 227): expected dictionary key but found non-name object; inserting key /QPDFFake1
13   -WARNING: issue-100.pdf (object 5 0, offset 321): expected endobj
  10 +WARNING: issue-100.pdf (object 5 0, offset 304): too many errors; giving up on reading object
  11 +WARNING: issue-100.pdf (object 5 0, offset 308): expected endobj
14 12 WARNING: issue-100.pdf (object 5 0, offset 418): /Length key in stream dictionary is not an integer
15 13 WARNING: issue-100.pdf (object 5 0, offset 489): attempting to recover stream length
16 14 WARNING: issue-100.pdf (object 5 0, offset 489): recovered stream length: 12
... ...
qpdf/qtest/qpdf/issue-101.out
... ... @@ -38,116 +38,6 @@ WARNING: issue-101.pdf (object 11 0, offset 626): unknown token while reading ob
38 38 WARNING: issue-101.pdf (object 11 0, offset 637): unknown token while reading object; treating as string
39 39 WARNING: issue-101.pdf (object 11 0, offset 639): unknown token while reading object; treating as string
40 40 WARNING: issue-101.pdf (object 11 0, offset 644): unknown token while reading object; treating as string
41   -WARNING: issue-101.pdf (object 11 0, offset 647): unknown token while reading object; treating as string
42   -WARNING: issue-101.pdf (object 11 0, offset 687): unknown token while reading object; treating as string
43   -WARNING: issue-101.pdf (object 11 0, offset 691): unknown token while reading object; treating as string
44   -WARNING: issue-101.pdf (object 11 0, offset 696): unknown token while reading object; treating as string
45   -WARNING: issue-101.pdf (object 11 0, offset 698): unknown token while reading object; treating as string
46   -WARNING: issue-101.pdf (object 11 0, offset 701): unknown token while reading object; treating as string
47   -WARNING: issue-101.pdf (object 11 0, offset 711): unknown token while reading object; treating as string
48   -WARNING: issue-101.pdf (object 11 0, offset 743): unknown token while reading object; treating as string
49   -WARNING: issue-101.pdf (object 11 0, offset 745): unknown token while reading object; treating as string
50   -WARNING: issue-101.pdf (object 11 0, offset 747): unknown token while reading object; treating as string
51   -WARNING: issue-101.pdf (object 11 0, offset 777): unknown token while reading object; treating as string
52   -WARNING: issue-101.pdf (object 11 0, offset 790): unknown token while reading object; treating as string
53   -WARNING: issue-101.pdf (object 11 0, offset 800): treating unexpected brace token as null
54   -WARNING: issue-101.pdf (object 11 0, offset 801): unknown token while reading object; treating as string
55   -WARNING: issue-101.pdf (object 11 0, offset 811): unknown token while reading object; treating as string
56   -WARNING: issue-101.pdf (object 11 0, offset 819): unknown token while reading object; treating as string
57   -WARNING: issue-101.pdf (object 11 0, offset 832): unknown token while reading object; treating as string
58   -WARNING: issue-101.pdf (object 11 0, offset 856): unexpected >
59   -WARNING: issue-101.pdf (object 11 0, offset 857): unknown token while reading object; treating as string
60   -WARNING: issue-101.pdf (object 11 0, offset 868): unknown token while reading object; treating as string
61   -WARNING: issue-101.pdf (object 11 0, offset 887): unknown token while reading object; treating as string
62   -WARNING: issue-101.pdf (object 11 0, offset 897): unexpected )
63   -WARNING: issue-101.pdf (object 11 0, offset 898): unknown token while reading object; treating as string
64   -WARNING: issue-101.pdf (object 11 0, offset 909): invalid character (ยค) in hexstring
65   -WARNING: issue-101.pdf (object 11 0, offset 911): unknown token while reading object; treating as string
66   -WARNING: issue-101.pdf (object 11 0, offset 929): unknown token while reading object; treating as string
67   -WARNING: issue-101.pdf (object 11 0, offset 930): invalid character (ยฒ) in hexstring
68   -WARNING: issue-101.pdf (object 11 0, offset 932): unknown token while reading object; treating as string
69   -WARNING: issue-101.pdf (object 11 0, offset 944): unknown token while reading object; treating as string
70   -WARNING: issue-101.pdf (object 11 0, offset 947): unknown token while reading object; treating as string
71   -WARNING: issue-101.pdf (object 11 0, offset 970): unknown token while reading object; treating as string
72   -WARNING: issue-101.pdf (object 11 0, offset 1046): unknown token while reading object; treating as string
73   -WARNING: issue-101.pdf (object 11 0, offset 1067): unknown token while reading object; treating as string
74   -WARNING: issue-101.pdf (object 11 0, offset 1075): unknown token while reading object; treating as string
75   -WARNING: issue-101.pdf (object 11 0, offset 1080): unknown token while reading object; treating as string
76   -WARNING: issue-101.pdf (object 11 0, offset 1084): unknown token while reading object; treating as string
77   -WARNING: issue-101.pdf (object 11 0, offset 1102): unknown token while reading object; treating as string
78   -WARNING: issue-101.pdf (object 11 0, offset 1112): unknown token while reading object; treating as string
79   -WARNING: issue-101.pdf (object 11 0, offset 1124): unknown token while reading object; treating as string
80   -WARNING: issue-101.pdf (object 11 0, offset 1133): unknown token while reading object; treating as string
81   -WARNING: issue-101.pdf (object 11 0, offset 1145): unknown token while reading object; treating as string
82   -WARNING: issue-101.pdf (object 11 0, offset 1148): unknown token while reading object; treating as string
83   -WARNING: issue-101.pdf (object 11 0, offset 1150): unknown token while reading object; treating as string
84   -WARNING: issue-101.pdf (object 11 0, offset 1151): unexpected )
85   -WARNING: issue-101.pdf (object 11 0, offset 1153): unexpected dictionary close token
86   -WARNING: issue-101.pdf (object 11 0, offset 1156): unknown token while reading object; treating as string
87   -WARNING: issue-101.pdf (object 11 0, offset 1163): unknown token while reading object; treating as string
88   -WARNING: issue-101.pdf (object 11 0, offset 1168): unexpected >
89   -WARNING: issue-101.pdf (object 11 0, offset 1170): invalid character (I) in hexstring
90   -WARNING: issue-101.pdf (object 11 0, offset 1167): expected dictionary key but found non-name object; inserting key /QPDFFake1
91   -WARNING: issue-101.pdf (object 11 0, offset 1167): expected dictionary key but found non-name object; inserting key /QPDFFake2
92   -WARNING: issue-101.pdf (object 11 0, offset 1167): expected dictionary key but found non-name object; inserting key /QPDFFake3
93   -WARNING: issue-101.pdf (object 11 0, offset 1176): unknown token while reading object; treating as string
94   -WARNING: issue-101.pdf (object 11 0, offset 1180): unknown token while reading object; treating as string
95   -WARNING: issue-101.pdf (object 11 0, offset 1184): unknown token while reading object; treating as string
96   -WARNING: issue-101.pdf (object 11 0, offset 1190): unexpected >
97   -WARNING: issue-101.pdf (object 11 0, offset 1192): unknown token while reading object; treating as string
98   -WARNING: issue-101.pdf (object 11 0, offset 1195): unknown token while reading object; treating as string
99   -WARNING: issue-101.pdf (object 11 0, offset 1205): unknown token while reading object; treating as string
100   -WARNING: issue-101.pdf (object 11 0, offset 1217): unknown token while reading object; treating as string
101   -WARNING: issue-101.pdf (object 11 0, offset 1224): unknown token while reading object; treating as string
102   -WARNING: issue-101.pdf (object 11 0, offset 1236): unknown token while reading object; treating as string
103   -WARNING: issue-101.pdf (object 11 0, offset 1242): expected dictionary key but found non-name object; inserting key /QPDFFake1
104   -WARNING: issue-101.pdf (object 11 0, offset 1242): dictionary ended prematurely; using null as value for last key
105   -WARNING: issue-101.pdf (object 11 0, offset 1275): unknown token while reading object; treating as string
106   -WARNING: issue-101.pdf (object 11 0, offset 1287): unknown token while reading object; treating as string
107   -WARNING: issue-101.pdf (object 11 0, offset 1291): unexpected dictionary close token
108   -WARNING: issue-101.pdf (object 11 0, offset 1294): unknown token while reading object; treating as string
109   -WARNING: issue-101.pdf (object 11 0, offset 1306): unknown token while reading object; treating as string
110   -WARNING: issue-101.pdf (object 11 0, offset 1322): unknown token while reading object; treating as string
111   -WARNING: issue-101.pdf (object 11 0, offset 1325): unknown token while reading object; treating as string
112   -WARNING: issue-101.pdf (object 11 0, offset 1329): unknown token while reading object; treating as string
113   -WARNING: issue-101.pdf (object 11 0, offset 1341): treating unexpected array close token as null
114   -WARNING: issue-101.pdf (object 11 0, offset 1312): expected dictionary key but found non-name object; inserting key /QPDFFake1
115   -WARNING: issue-101.pdf (object 11 0, offset 1312): expected dictionary key but found non-name object; inserting key /QPDFFake2
116   -WARNING: issue-101.pdf (object 11 0, offset 1312): expected dictionary key but found non-name object; inserting key /QPDFFake3
117   -WARNING: issue-101.pdf (object 11 0, offset 1312): expected dictionary key but found non-name object; inserting key /QPDFFake4
118   -WARNING: issue-101.pdf (object 11 0, offset 1312): dictionary ended prematurely; using null as value for last key
119   -WARNING: issue-101.pdf (object 11 0, offset 1349): unknown token while reading object; treating as string
120   -WARNING: issue-101.pdf (object 11 0, offset 1353): unknown token while reading object; treating as string
121   -WARNING: issue-101.pdf (object 11 0, offset 1357): unknown token while reading object; treating as string
122   -WARNING: issue-101.pdf (object 11 0, offset 1359): unknown token while reading object; treating as string
123   -WARNING: issue-101.pdf (object 11 0, offset 1368): unexpected )
124   -WARNING: issue-101.pdf (object 11 0, offset 1373): expected endobj
125   -WARNING: issue-101.pdf (object 2 0, offset 244): unknown token while reading object; treating as string
126   -WARNING: issue-101.pdf (object 7 0, offset 3855): unknown token while reading object; treating as string
127   -WARNING: issue-101.pdf (object 7 0, offset 3863): treating unexpected brace token as null
128   -WARNING: issue-101.pdf (object 7 0, offset 3864): unknown token while reading object; treating as string
129   -WARNING: issue-101.pdf (object 7 0, offset 3866): unknown token while reading object; treating as string
130   -WARNING: issue-101.pdf (object 7 0, offset 3873): unknown token while reading object; treating as string
131   -WARNING: issue-101.pdf (object 7 0, offset 3879): unknown token while reading object; treating as string
132   -WARNING: issue-101.pdf (object 7 0, offset 3888): unknown token while reading object; treating as string
133   -WARNING: issue-101.pdf (object 7 0, offset 3901): unknown token while reading object; treating as string
134   -WARNING: issue-101.pdf (object 7 0, offset 3905): unknown token while reading object; treating as string
135   -WARNING: issue-101.pdf (object 7 0, offset 3913): unknown token while reading object; treating as string
136   -WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake1
137   -WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake2
138   -WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake3
139   -WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake4
140   -WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake5
141   -WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake6
142   -WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake7
143   -WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake8
144   -WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake9
145   -WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake10
146   -WARNING: issue-101.pdf (object 7 0, offset 3844): stream dictionary lacks /Length key
147   -WARNING: issue-101.pdf (object 7 0, offset 3962): attempting to recover stream length
148   -WARNING: issue-101.pdf (object 7 0, offset 3962): recovered stream length: 12
149   -WARNING: issue-101.pdf (object 8 0, offset 4067): invalid character ()) in hexstring
150   -WARNING: issue-101.pdf (object 8 0, offset 4069): expected endobj
151   -WARNING: issue-101.pdf (object 9 0, offset 2832): unknown token while reading object; treating as string
152   -WARNING: issue-101.pdf (object 9 0, offset 2834): expected endobj
153   -qpdf: operation succeeded with warnings; resulting file may have some problems
  41 +WARNING: issue-101.pdf (object 11 0, offset 644): too many errors; giving up on reading object
  42 +WARNING: issue-101.pdf (object 11 0, offset 647): expected endobj
  43 +issue-101.pdf (offset 687): unable to find /Root dictionary
... ...
qpdf/qtest/qpdf/issue-263.out
... ... @@ -8,31 +8,5 @@ WARNING: issue-263.pdf (trailer, offset 79): unknown token while reading object;
8 8 WARNING: issue-263.pdf (trailer, offset 82): unexpected )
9 9 WARNING: issue-263.pdf (trailer, offset 83): unknown token while reading object; treating as string
10 10 WARNING: issue-263.pdf (trailer, offset 87): unexpected >
11   -WARNING: issue-263.pdf (trailer, offset 89): unexpected dictionary close token
12   -WARNING: issue-263.pdf (trailer, offset 92): unexpected >
13   -WARNING: issue-263.pdf (trailer, offset 40): expected dictionary key but found non-name object; inserting key /QPDFFake1
14   -WARNING: issue-263.pdf (trailer, offset 40): expected dictionary key but found non-name object; inserting key /QPDFFake2
15   -WARNING: issue-263.pdf (trailer, offset 40): expected dictionary key but found non-name object; inserting key /QPDFFake3
16   -WARNING: issue-263.pdf (trailer, offset 40): expected dictionary key but found non-name object; inserting key /QPDFFake4
17   -WARNING: issue-263.pdf (trailer, offset 40): expected dictionary key but found non-name object; inserting key /QPDFFake5
18   -WARNING: issue-263.pdf (trailer, offset 40): expected dictionary key but found non-name object; inserting key /QPDFFake6
19   -WARNING: issue-263.pdf (trailer, offset 40): expected dictionary key but found non-name object; inserting key /QPDFFake7
20   -WARNING: issue-263.pdf (trailer, offset 98): unknown token while reading object; treating as string
21   -WARNING: issue-263.pdf (trailer, offset 103): unexpected )
22   -WARNING: issue-263.pdf (trailer, offset 107): unknown token while reading object; treating as string
23   -WARNING: issue-263.pdf (trailer, offset 119): expected dictionary key but found non-name object; inserting key /QPDFFake1
24   -WARNING: issue-263.pdf (trailer, offset 163): unknown token while reading object; treating as string
25   -WARNING: issue-263.pdf (trailer, offset 173): unknown token while reading object; treating as string
26   -WARNING: issue-263.pdf (trailer, offset 113): expected dictionary key but found non-name object; inserting key /QPDFFake1
27   -WARNING: issue-263.pdf (trailer, offset 113): expected dictionary key but found non-name object; inserting key /QPDFFake2
28   -WARNING: issue-263.pdf (trailer, offset 113): dictionary ended prematurely; using null as value for last key
29   -WARNING: issue-263.pdf (trailer, offset 18): expected dictionary key but found non-name object; inserting key /QPDFFake1
30   -WARNING: issue-263.pdf (trailer, offset 18): expected dictionary key but found non-name object; inserting key /QPDFFake2
31   -WARNING: issue-263.pdf (trailer, offset 18): expected dictionary key but found non-name object; inserting key /QPDFFake3
32   -WARNING: issue-263.pdf (trailer, offset 18): expected dictionary key but found non-name object; inserting key /QPDFFake4
33   -WARNING: issue-263.pdf (trailer, offset 18): expected dictionary key but found non-name object; inserting key /QPDFFake5
34   -WARNING: issue-263.pdf (trailer, offset 18): expected dictionary key but found non-name object; inserting key /QPDFFake6
35   -WARNING: issue-263.pdf (trailer, offset 18): expected dictionary key but found non-name object; inserting key /QPDFFake7
36   -WARNING: issue-263.pdf (trailer, offset 18): expected dictionary key but found non-name object; inserting key /QPDFFake8
37   -WARNING: issue-263.pdf (trailer, offset 18): expected dictionary key but found non-name object; inserting key /QPDFFake9
38   -qpdf: operation succeeded with warnings; resulting file may have some problems
  11 +WARNING: issue-263.pdf (trailer, offset 87): too many errors; giving up on reading object
  12 +issue-263.pdf: unable to find trailer dictionary while recovering damaged file
... ...