Commit 8a9086a6894e83ebc60fbd39cb835b20aa2683b1

Authored by Jay Berkenbilt
1 parent 43f91f58

Accept extraneous space after stream keyword (fixes #329)

ChangeLog
1 2019-08-19 Jay Berkenbilt <ejb@ql.org> 1 2019-08-19 Jay Berkenbilt <ejb@ql.org>
2 2
  3 + * Accept (and warn for) extraneous whitespace between the stream
  4 + keyword and newline. Fixes #329.
  5 +
3 * Properly handle name tokens containing # not preceding two 6 * Properly handle name tokens containing # not preceding two
4 hexadecimal digits. Such names are invalid in PDF >= 1.2 but valid 7 hexadecimal digits. Such names are invalid in PDF >= 1.2 but valid
5 in PDF 1.0 and 1.1. Prior to this fix, qpdf's behavior was to 8 in PDF 1.0 and 1.1. Prior to this fix, qpdf's behavior was to
libqpdf/QPDF.cc
@@ -1477,8 +1477,13 @@ QPDF::readObject(PointerHolder&lt;InputSource&gt; input, @@ -1477,8 +1477,13 @@ QPDF::readObject(PointerHolder&lt;InputSource&gt; input,
1477 // stream data. However, some readers, including 1477 // stream data. However, some readers, including
1478 // Adobe reader, accept a carriage return by itself 1478 // Adobe reader, accept a carriage return by itself
1479 // when followed by a non-newline character, so that's 1479 // when followed by a non-newline character, so that's
1480 - // what we do here. 1480 + // what we do here. We have also seen files that have
  1481 + // extraneous whitespace between the stream keyword and
  1482 + // the newline.
  1483 + bool done = false;
  1484 + while (! done)
1481 { 1485 {
  1486 + done = true;
1482 char ch; 1487 char ch;
1483 if (input->read(&ch, 1) == 0) 1488 if (input->read(&ch, 1) == 0)
1484 { 1489 {
@@ -1519,14 +1524,21 @@ QPDF::readObject(PointerHolder&lt;InputSource&gt; input, @@ -1519,14 +1524,21 @@ QPDF::readObject(PointerHolder&lt;InputSource&gt; input,
1519 } 1524 }
1520 } 1525 }
1521 } 1526 }
  1527 + else if (QUtil::is_space(ch))
  1528 + {
  1529 + warn(QPDFExc(
  1530 + qpdf_e_damaged_pdf,
  1531 + input->getName(),
  1532 + this->m->last_object_description,
  1533 + input->tell(),
  1534 + "stream keyword followed by"
  1535 + " extraneous whitespace"));
  1536 + done = false;
  1537 + }
1522 else 1538 else
1523 { 1539 {
1524 QTC::TC("qpdf", "QPDF stream without newline"); 1540 QTC::TC("qpdf", "QPDF stream without newline");
1525 - if (! QUtil::is_space(ch))  
1526 - {  
1527 - QTC::TC("qpdf", "QPDF stream with non-space");  
1528 - input->unreadCh(ch);  
1529 - } 1541 + input->unreadCh(ch);
1530 warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), 1542 warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
1531 this->m->last_object_description, 1543 this->m->last_object_description,
1532 input->tell(), 1544 input->tell(),
qpdf/qpdf.testcov
@@ -275,7 +275,6 @@ QPDFObjectHandle no val for last key 0 @@ -275,7 +275,6 @@ QPDFObjectHandle no val for last key 0
275 QPDF resolve failure to null 0 275 QPDF resolve failure to null 0
276 QPDFWriter preserve unreferenced standard 0 276 QPDFWriter preserve unreferenced standard 0
277 QPDFObjectHandle errors in parsecontent 0 277 QPDFObjectHandle errors in parsecontent 0
278 -QPDF stream with non-space 0  
279 qpdf same file error 0 278 qpdf same file error 0
280 qpdf read args from stdin 0 279 qpdf read args from stdin 0
281 qpdf read args from file 0 280 qpdf read args from file 0
qpdf/qtest/qpdf/stream-line-enders.out
1 -WARNING: stream-line-enders.pdf (object 5 0, offset 384): stream keyword followed by carriage return only  
2 -WARNING: stream-line-enders.pdf (object 6 0, offset 443): stream keyword not followed by proper line terminator  
3 -WARNING: stream-line-enders.pdf (object 7 0, offset 503): stream keyword not followed by proper line terminator 1 +WARNING: stream-line-enders.pdf (object 5 0, offset 391): stream keyword followed by carriage return only
  2 +WARNING: stream-line-enders.pdf (object 6 0, offset 450): stream keyword followed by extraneous whitespace
  3 +WARNING: stream-line-enders.pdf (object 6 0, offset 450): stream keyword not followed by proper line terminator
  4 +WARNING: stream-line-enders.pdf (object 7 0, offset 509): stream keyword not followed by proper line terminator
  5 +WARNING: stream-line-enders.pdf (object 8 0, offset 567): stream keyword followed by extraneous whitespace
4 qpdf: operation succeeded with warnings; resulting file may have some problems 6 qpdf: operation succeeded with warnings; resulting file may have some problems
qpdf/qtest/qpdf/stream-line-enders.pdf
@@ -7,7 +7,7 @@ endobj @@ -7,7 +7,7 @@ endobj
7 << /Count 1 /Kids [ 3 0 R ] /Type /Pages >> 7 << /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
8 endobj 8 endobj
9 3 0 obj 9 3 0 obj
10 -<< /Contents [ 4 0 R 5 0 R 6 0 R 7 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 8 0 R >> /ProcSet 9 0 R >> /Type /Page >> 10 +<< /Contents [ 4 0 R 5 0 R 6 0 R 7 0 R 8 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 9 0 R >> /ProcSet 10 0 R >> /Type /Page >>
11 endobj 11 endobj
12 4 0 obj 12 4 0 obj
13 << /Length 14 >> 13 << /Length 14 >>
@@ -27,30 +27,37 @@ stream (Potato) Tj @@ -27,30 +27,37 @@ stream (Potato) Tj
27 endstream 27 endstream
28 endobj 28 endobj
29 7 0 obj 29 7 0 obj
30 -<< /Length 11 >> 30 +<< /Length 9 >>
31 stream%comment 31 stream%comment
32 -ET  
33 endstream 32 endstream
34 endobj 33 endobj
35 8 0 obj 34 8 0 obj
36 -<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> 35 +<< /Length 11 >>
  36 +stream
  37 +%comment
  38 +ET
  39 +endstream
37 endobj 40 endobj
38 9 0 obj 41 9 0 obj
  42 +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
  43 +endobj
  44 +10 0 obj
39 [ /PDF /Text ] 45 [ /PDF /Text ]
40 endobj 46 endobj
41 xref 47 xref
42 -0 10 48 +0 11
43 0000000000 65535 f 49 0000000000 65535 f
44 0000000015 00000 n 50 0000000015 00000 n
45 0000000064 00000 n 51 0000000064 00000 n
46 0000000123 00000 n 52 0000000123 00000 n
47 -0000000288 00000 n  
48 -0000000352 00000 n  
49 -0000000411 00000 n  
50 -0000000472 00000 n  
51 -0000000532 00000 n  
52 -0000000639 00000 n  
53 -trailer << /Root 1 0 R /Size 10 /ID [<08aa98c73f8a7262d77c8328772c3989><5a35fde138b2e8251b9e079b560c0253>] >> 53 +0000000295 00000 n
  54 +0000000359 00000 n
  55 +0000000418 00000 n
  56 +0000000479 00000 n
  57 +0000000535 00000 n
  58 +0000000598 00000 n
  59 +0000000705 00000 n
  60 +trailer << /Root 1 0 R /Size 11 /ID [<08aa98c73f8a7262d77c8328772c3989><5a35fde138b2e8251b9e079b560c0253>] >>
54 startxref 61 startxref
55 -669 62 +736
56 %%EOF 63 %%EOF
qpdf/qtest/qpdf/stream-line-enders.qdf
@@ -30,6 +30,7 @@ endobj @@ -30,6 +30,7 @@ endobj
30 6 0 R 30 6 0 R
31 8 0 R 31 8 0 R
32 10 0 R 32 10 0 R
  33 + 12 0 R
33 ] 34 ]
34 /MediaBox [ 35 /MediaBox [
35 0 36 0
@@ -40,9 +41,9 @@ endobj @@ -40,9 +41,9 @@ endobj
40 /Parent 2 0 R 41 /Parent 2 0 R
41 /Resources << 42 /Resources <<
42 /Font << 43 /Font <<
43 - /F1 12 0 R 44 + /F1 14 0 R
44 >> 45 >>
45 - /ProcSet 13 0 R 46 + /ProcSet 15 0 R
46 >> 47 >>
47 /Type /Page 48 /Type /Page
48 >> 49 >>
@@ -102,18 +103,33 @@ endobj @@ -102,18 +103,33 @@ endobj
102 >> 103 >>
103 stream 104 stream
104 %comment 105 %comment
105 -ET  
106 endstream 106 endstream
107 endobj 107 endobj
108 108
109 -%QDF: ignore_newline  
110 11 0 obj 109 11 0 obj
111 -11 110 +9
112 endobj 111 endobj
113 112
  113 +%% Contents for page 1
114 %% Original object ID: 8 0 114 %% Original object ID: 8 0
115 12 0 obj 115 12 0 obj
116 << 116 <<
  117 + /Length 13 0 R
  118 +>>
  119 +stream
  120 +%comment
  121 +ET
  122 +endstream
  123 +endobj
  124 +
  125 +%QDF: ignore_newline
  126 +13 0 obj
  127 +11
  128 +endobj
  129 +
  130 +%% Original object ID: 9 0
  131 +14 0 obj
  132 +<<
117 /BaseFont /Helvetica 133 /BaseFont /Helvetica
118 /Encoding /WinAnsiEncoding 134 /Encoding /WinAnsiEncoding
119 /Name /F1 135 /Name /F1
@@ -122,8 +138,8 @@ endobj @@ -122,8 +138,8 @@ endobj
122 >> 138 >>
123 endobj 139 endobj
124 140
125 -%% Original object ID: 9 0  
126 -13 0 obj 141 +%% Original object ID: 10 0
  142 +15 0 obj
127 [ 143 [
128 /PDF 144 /PDF
129 /Text 145 /Text
@@ -131,26 +147,28 @@ endobj @@ -131,26 +147,28 @@ endobj
131 endobj 147 endobj
132 148
133 xref 149 xref
134 -0 14 150 +0 16
135 0000000000 65535 f 151 0000000000 65535 f
136 0000000052 00000 n 152 0000000052 00000 n
137 0000000133 00000 n 153 0000000133 00000 n
138 0000000242 00000 n 154 0000000242 00000 n
139 -0000000527 00000 n  
140 -0000000596 00000 n  
141 -0000000665 00000 n  
142 -0000000730 00000 n  
143 -0000000799 00000 n  
144 -0000000866 00000 n  
145 -0000000935 00000 n  
146 -0000001025 00000 n  
147 -0000001072 00000 n 155 +0000000538 00000 n
  156 +0000000607 00000 n
  157 +0000000676 00000 n
  158 +0000000741 00000 n
  159 +0000000810 00000 n
  160 +0000000877 00000 n
  161 +0000000946 00000 n
  162 +0000001012 00000 n
  163 +0000001081 00000 n
  164 +0000001171 00000 n
148 0000001218 00000 n 165 0000001218 00000 n
  166 +0000001365 00000 n
149 trailer << 167 trailer <<
150 /Root 1 0 R 168 /Root 1 0 R
151 - /Size 14 169 + /Size 16
152 /ID [<08aa98c73f8a7262d77c8328772c3989><31415926535897932384626433832795>] 170 /ID [<08aa98c73f8a7262d77c8328772c3989><31415926535897932384626433832795>]
153 >> 171 >>
154 startxref 172 startxref
155 -1254 173 +1401
156 %%EOF 174 %%EOF