Commit 07d6f770b2a1c731cdf4f981ddc01466dbbc4ba0

Authored by Jay Berkenbilt
1 parent 39d7307f

Better recovery of bad stream start (fixes #104)

ChangeLog
1 2017-07-29 Jay Berkenbilt <ejb@ql.org> 1 2017-07-29 Jay Berkenbilt <ejb@ql.org>
2 2
  3 + * Improve handling of files where the "stream" keyword is not
  4 + followed by proper line terminators. Fixes #104.
  5 +
3 * Fix content stream parsing to handle cases of structures within 6 * Fix content stream parsing to handle cases of structures within
4 the stream split across stream boundaries. Fixes #73. 7 the stream split across stream boundaries. Fixes #73.
5 8
libqpdf/QPDF.cc
@@ -1117,6 +1117,11 @@ QPDF::readObject(PointerHolder&lt;InputSource&gt; input, @@ -1117,6 +1117,11 @@ QPDF::readObject(PointerHolder&lt;InputSource&gt; input,
1117 else 1117 else
1118 { 1118 {
1119 QTC::TC("qpdf", "QPDF stream without newline"); 1119 QTC::TC("qpdf", "QPDF stream without newline");
  1120 + if (! QUtil::is_space(ch))
  1121 + {
  1122 + QTC::TC("qpdf", "QPDF stream with non-space");
  1123 + input->unreadCh(ch);
  1124 + }
1120 warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), 1125 warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
1121 this->last_object_description, 1126 this->last_object_description,
1122 input->tell(), 1127 input->tell(),
qpdf/qpdf.testcov
@@ -283,3 +283,4 @@ QPDFWriter precheck stream 0 @@ -283,3 +283,4 @@ QPDFWriter precheck stream 0
283 QPDFWriter preserve unreferenced standard 0 283 QPDFWriter preserve unreferenced standard 0
284 QPDFObjectHandle non-stream in parsecontent 0 284 QPDFObjectHandle non-stream in parsecontent 0
285 QPDFObjectHandle errors in parsecontent 0 285 QPDFObjectHandle errors in parsecontent 0
  286 +QPDF stream with non-space 0
qpdf/qtest/qpdf/stream-line-enders.out
1 -WARNING: stream-line-enders.pdf (object 5 0, file position 378): stream keyword followed by carriage return only  
2 -WARNING: stream-line-enders.pdf (object 6 0, file position 437): stream keyword not followed by proper line terminator 1 +WARNING: stream-line-enders.pdf (object 5 0, file position 384): stream keyword followed by carriage return only
  2 +WARNING: stream-line-enders.pdf (object 6 0, file position 443): stream keyword not followed by proper line terminator
  3 +WARNING: stream-line-enders.pdf (object 7 0, file position 503): stream keyword not followed by proper line terminator
3 qpdf: operation succeeded with warnings; resulting file may have some problems 4 qpdf: operation succeeded with warnings; resulting file may have some problems
qpdf/qtest/qpdf/stream-line-enders.pdf
@@ -7,7 +7,7 @@ endobj @@ -7,7 +7,7 @@ endobj
7 << /Count 1 /Kids [ 3 0 R ] /Type /Pages >> 7 << /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
8 endobj 8 endobj
9 3 0 obj 9 3 0 obj
10 -<< /Contents [ 4 0 R 5 0 R 6 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 7 0 R >> /ProcSet 8 0 R >> /Type /Page >> 10 +<< /Contents [ 4 0 R 5 0 R 6 0 R 7 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 8 0 R >> /ProcSet 9 0 R >> /Type /Page >>
11 endobj 11 endobj
12 4 0 obj 12 4 0 obj
13 << /Length 14 >> 13 << /Length 14 >>
@@ -22,29 +22,35 @@ stream 72 720 Td @@ -22,29 +22,35 @@ stream 72 720 Td
22 endstream 22 endstream
23 endobj 23 endobj
24 6 0 obj 24 6 0 obj
25 -<< /Length 15 >> 25 +<< /Length 12 >>
26 stream (Potato) Tj 26 stream (Potato) Tj
27 -ET  
28 endstream 27 endstream
29 endobj 28 endobj
30 7 0 obj 29 7 0 obj
31 -<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> 30 +<< /Length 11 >>
  31 +stream%comment
  32 +ET
  33 +endstream
32 endobj 34 endobj
33 8 0 obj 35 8 0 obj
  36 +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
  37 +endobj
  38 +9 0 obj
34 [ /PDF /Text ] 39 [ /PDF /Text ]
35 endobj 40 endobj
36 xref 41 xref
37 -0 9 42 +0 10
38 0000000000 65535 f 43 0000000000 65535 f
39 0000000015 00000 n 44 0000000015 00000 n
40 0000000064 00000 n 45 0000000064 00000 n
41 0000000123 00000 n 46 0000000123 00000 n
42 -0000000282 00000 n  
43 -0000000346 00000 n  
44 -0000000405 00000 n  
45 -0000000469 00000 n  
46 -0000000576 00000 n  
47 -trailer << /Root 1 0 R /Size 9 /ID [<08aa98c73f8a7262d77c8328772c3989><7b1f32865e2165debe277f27ee790092>] >> 47 +0000000288 00000 n
  48 +0000000352 00000 n
  49 +0000000411 00000 n
  50 +0000000472 00000 n
  51 +0000000532 00000 n
  52 +0000000639 00000 n
  53 +trailer << /Root 1 0 R /Size 10 /ID [<08aa98c73f8a7262d77c8328772c3989><5a35fde138b2e8251b9e079b560c0253>] >>
48 startxref 54 startxref
49 -606 55 +669
50 %%EOF 56 %%EOF
qpdf/qtest/qpdf/stream-line-enders.qdf
@@ -29,6 +29,7 @@ endobj @@ -29,6 +29,7 @@ endobj
29 4 0 R 29 4 0 R
30 6 0 R 30 6 0 R
31 8 0 R 31 8 0 R
  32 + 10 0 R
32 ] 33 ]
33 /MediaBox [ 34 /MediaBox [
34 0 35 0
@@ -39,9 +40,9 @@ endobj @@ -39,9 +40,9 @@ endobj
39 /Parent 2 0 R 40 /Parent 2 0 R
40 /Resources << 41 /Resources <<
41 /Font << 42 /Font <<
42 - /F1 10 0 R 43 + /F1 12 0 R
43 >> 44 >>
44 - /ProcSet 11 0 R 45 + /ProcSet 13 0 R
45 >> 46 >>
46 /Type /Page 47 /Type /Page
47 >> 48 >>
@@ -86,17 +87,32 @@ endobj @@ -86,17 +87,32 @@ endobj
86 >> 87 >>
87 stream 88 stream
88 (Potato) Tj 89 (Potato) Tj
89 -ET  
90 endstream 90 endstream
91 endobj 91 endobj
92 92
93 9 0 obj 93 9 0 obj
94 -15 94 +12
95 endobj 95 endobj
96 96
  97 +%% Contents for page 1
97 %% Original object ID: 7 0 98 %% Original object ID: 7 0
98 10 0 obj 99 10 0 obj
99 << 100 <<
  101 + /Length 11 0 R
  102 +>>
  103 +stream
  104 +%comment
  105 +ET
  106 +endstream
  107 +endobj
  108 +
  109 +11 0 obj
  110 +12
  111 +endobj
  112 +
  113 +%% Original object ID: 8 0
  114 +12 0 obj
  115 +<<
100 /BaseFont /Helvetica 116 /BaseFont /Helvetica
101 /Encoding /WinAnsiEncoding 117 /Encoding /WinAnsiEncoding
102 /Name /F1 118 /Name /F1
@@ -105,8 +121,8 @@ endobj @@ -105,8 +121,8 @@ endobj
105 >> 121 >>
106 endobj 122 endobj
107 123
108 -%% Original object ID: 8 0  
109 -11 0 obj 124 +%% Original object ID: 9 0
  125 +13 0 obj
110 [ 126 [
111 /PDF 127 /PDF
112 /Text 128 /Text
@@ -114,24 +130,26 @@ endobj @@ -114,24 +130,26 @@ endobj
114 endobj 130 endobj
115 131
116 xref 132 xref
117 -0 12 133 +0 14
118 0000000000 65535 f 134 0000000000 65535 f
119 0000000052 00000 n 135 0000000052 00000 n
120 0000000133 00000 n 136 0000000133 00000 n
121 0000000242 00000 n 137 0000000242 00000 n
122 -0000000516 00000 n  
123 -0000000585 00000 n  
124 -0000000654 00000 n  
125 -0000000719 00000 n  
126 -0000000788 00000 n  
127 -0000000858 00000 n  
128 -0000000904 00000 n  
129 -0000001050 00000 n 138 +0000000527 00000 n
  139 +0000000596 00000 n
  140 +0000000665 00000 n
  141 +0000000730 00000 n
  142 +0000000799 00000 n
  143 +0000000866 00000 n
  144 +0000000935 00000 n
  145 +0000001004 00000 n
  146 +0000001051 00000 n
  147 +0000001197 00000 n
130 trailer << 148 trailer <<
131 /Root 1 0 R 149 /Root 1 0 R
132 - /Size 12 150 + /Size 14
133 /ID [<08aa98c73f8a7262d77c8328772c3989><31415926535897932384626433832795>] 151 /ID [<08aa98c73f8a7262d77c8328772c3989><31415926535897932384626433832795>]
134 >> 152 >>
135 startxref 153 startxref
136 -1086 154 +1233
137 %%EOF 155 %%EOF