Commit 8a9086a6894e83ebc60fbd39cb835b20aa2683b1

Authored by Jay Berkenbilt
1 parent 43f91f58

Accept extraneous space after stream keyword (fixes #329)

ChangeLog
1 1 2019-08-19 Jay Berkenbilt <ejb@ql.org>
2 2  
  3 + * Accept (and warn for) extraneous whitespace between the stream
  4 + keyword and newline. Fixes #329.
  5 +
3 6 * Properly handle name tokens containing # not preceding two
4 7 hexadecimal digits. Such names are invalid in PDF >= 1.2 but valid
5 8 in PDF 1.0 and 1.1. Prior to this fix, qpdf's behavior was to
... ...
libqpdf/QPDF.cc
... ... @@ -1477,8 +1477,13 @@ QPDF::readObject(PointerHolder&lt;InputSource&gt; input,
1477 1477 // stream data. However, some readers, including
1478 1478 // Adobe reader, accept a carriage return by itself
1479 1479 // when followed by a non-newline character, so that's
1480   - // what we do here.
  1480 + // what we do here. We have also seen files that have
  1481 + // extraneous whitespace between the stream keyword and
  1482 + // the newline.
  1483 + bool done = false;
  1484 + while (! done)
1481 1485 {
  1486 + done = true;
1482 1487 char ch;
1483 1488 if (input->read(&ch, 1) == 0)
1484 1489 {
... ... @@ -1519,14 +1524,21 @@ QPDF::readObject(PointerHolder&lt;InputSource&gt; input,
1519 1524 }
1520 1525 }
1521 1526 }
  1527 + else if (QUtil::is_space(ch))
  1528 + {
  1529 + warn(QPDFExc(
  1530 + qpdf_e_damaged_pdf,
  1531 + input->getName(),
  1532 + this->m->last_object_description,
  1533 + input->tell(),
  1534 + "stream keyword followed by"
  1535 + " extraneous whitespace"));
  1536 + done = false;
  1537 + }
1522 1538 else
1523 1539 {
1524 1540 QTC::TC("qpdf", "QPDF stream without newline");
1525   - if (! QUtil::is_space(ch))
1526   - {
1527   - QTC::TC("qpdf", "QPDF stream with non-space");
1528   - input->unreadCh(ch);
1529   - }
  1541 + input->unreadCh(ch);
1530 1542 warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
1531 1543 this->m->last_object_description,
1532 1544 input->tell(),
... ...
qpdf/qpdf.testcov
... ... @@ -275,7 +275,6 @@ QPDFObjectHandle no val for last key 0
275 275 QPDF resolve failure to null 0
276 276 QPDFWriter preserve unreferenced standard 0
277 277 QPDFObjectHandle errors in parsecontent 0
278   -QPDF stream with non-space 0
279 278 qpdf same file error 0
280 279 qpdf read args from stdin 0
281 280 qpdf read args from file 0
... ...
qpdf/qtest/qpdf/stream-line-enders.out
1   -WARNING: stream-line-enders.pdf (object 5 0, offset 384): stream keyword followed by carriage return only
2   -WARNING: stream-line-enders.pdf (object 6 0, offset 443): stream keyword not followed by proper line terminator
3   -WARNING: stream-line-enders.pdf (object 7 0, offset 503): stream keyword not followed by proper line terminator
  1 +WARNING: stream-line-enders.pdf (object 5 0, offset 391): stream keyword followed by carriage return only
  2 +WARNING: stream-line-enders.pdf (object 6 0, offset 450): stream keyword followed by extraneous whitespace
  3 +WARNING: stream-line-enders.pdf (object 6 0, offset 450): stream keyword not followed by proper line terminator
  4 +WARNING: stream-line-enders.pdf (object 7 0, offset 509): stream keyword not followed by proper line terminator
  5 +WARNING: stream-line-enders.pdf (object 8 0, offset 567): stream keyword followed by extraneous whitespace
4 6 qpdf: operation succeeded with warnings; resulting file may have some problems
... ...
qpdf/qtest/qpdf/stream-line-enders.pdf
... ... @@ -7,7 +7,7 @@ endobj
7 7 << /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
8 8 endobj
9 9 3 0 obj
10   -<< /Contents [ 4 0 R 5 0 R 6 0 R 7 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 8 0 R >> /ProcSet 9 0 R >> /Type /Page >>
  10 +<< /Contents [ 4 0 R 5 0 R 6 0 R 7 0 R 8 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 9 0 R >> /ProcSet 10 0 R >> /Type /Page >>
11 11 endobj
12 12 4 0 obj
13 13 << /Length 14 >>
... ... @@ -27,30 +27,37 @@ stream (Potato) Tj
27 27 endstream
28 28 endobj
29 29 7 0 obj
30   -<< /Length 11 >>
  30 +<< /Length 9 >>
31 31 stream%comment
32   -ET
33 32 endstream
34 33 endobj
35 34 8 0 obj
36   -<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
  35 +<< /Length 11 >>
  36 +stream
  37 +%comment
  38 +ET
  39 +endstream
37 40 endobj
38 41 9 0 obj
  42 +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
  43 +endobj
  44 +10 0 obj
39 45 [ /PDF /Text ]
40 46 endobj
41 47 xref
42   -0 10
  48 +0 11
43 49 0000000000 65535 f
44 50 0000000015 00000 n
45 51 0000000064 00000 n
46 52 0000000123 00000 n
47   -0000000288 00000 n
48   -0000000352 00000 n
49   -0000000411 00000 n
50   -0000000472 00000 n
51   -0000000532 00000 n
52   -0000000639 00000 n
53   -trailer << /Root 1 0 R /Size 10 /ID [<08aa98c73f8a7262d77c8328772c3989><5a35fde138b2e8251b9e079b560c0253>] >>
  53 +0000000295 00000 n
  54 +0000000359 00000 n
  55 +0000000418 00000 n
  56 +0000000479 00000 n
  57 +0000000535 00000 n
  58 +0000000598 00000 n
  59 +0000000705 00000 n
  60 +trailer << /Root 1 0 R /Size 11 /ID [<08aa98c73f8a7262d77c8328772c3989><5a35fde138b2e8251b9e079b560c0253>] >>
54 61 startxref
55   -669
  62 +736
56 63 %%EOF
... ...
qpdf/qtest/qpdf/stream-line-enders.qdf
... ... @@ -30,6 +30,7 @@ endobj
30 30 6 0 R
31 31 8 0 R
32 32 10 0 R
  33 + 12 0 R
33 34 ]
34 35 /MediaBox [
35 36 0
... ... @@ -40,9 +41,9 @@ endobj
40 41 /Parent 2 0 R
41 42 /Resources <<
42 43 /Font <<
43   - /F1 12 0 R
  44 + /F1 14 0 R
44 45 >>
45   - /ProcSet 13 0 R
  46 + /ProcSet 15 0 R
46 47 >>
47 48 /Type /Page
48 49 >>
... ... @@ -102,18 +103,33 @@ endobj
102 103 >>
103 104 stream
104 105 %comment
105   -ET
106 106 endstream
107 107 endobj
108 108  
109   -%QDF: ignore_newline
110 109 11 0 obj
111   -11
  110 +9
112 111 endobj
113 112  
  113 +%% Contents for page 1
114 114 %% Original object ID: 8 0
115 115 12 0 obj
116 116 <<
  117 + /Length 13 0 R
  118 +>>
  119 +stream
  120 +%comment
  121 +ET
  122 +endstream
  123 +endobj
  124 +
  125 +%QDF: ignore_newline
  126 +13 0 obj
  127 +11
  128 +endobj
  129 +
  130 +%% Original object ID: 9 0
  131 +14 0 obj
  132 +<<
117 133 /BaseFont /Helvetica
118 134 /Encoding /WinAnsiEncoding
119 135 /Name /F1
... ... @@ -122,8 +138,8 @@ endobj
122 138 >>
123 139 endobj
124 140  
125   -%% Original object ID: 9 0
126   -13 0 obj
  141 +%% Original object ID: 10 0
  142 +15 0 obj
127 143 [
128 144 /PDF
129 145 /Text
... ... @@ -131,26 +147,28 @@ endobj
131 147 endobj
132 148  
133 149 xref
134   -0 14
  150 +0 16
135 151 0000000000 65535 f
136 152 0000000052 00000 n
137 153 0000000133 00000 n
138 154 0000000242 00000 n
139   -0000000527 00000 n
140   -0000000596 00000 n
141   -0000000665 00000 n
142   -0000000730 00000 n
143   -0000000799 00000 n
144   -0000000866 00000 n
145   -0000000935 00000 n
146   -0000001025 00000 n
147   -0000001072 00000 n
  155 +0000000538 00000 n
  156 +0000000607 00000 n
  157 +0000000676 00000 n
  158 +0000000741 00000 n
  159 +0000000810 00000 n
  160 +0000000877 00000 n
  161 +0000000946 00000 n
  162 +0000001012 00000 n
  163 +0000001081 00000 n
  164 +0000001171 00000 n
148 165 0000001218 00000 n
  166 +0000001365 00000 n
149 167 trailer <<
150 168 /Root 1 0 R
151   - /Size 14
  169 + /Size 16
152 170 /ID [<08aa98c73f8a7262d77c8328772c3989><31415926535897932384626433832795>]
153 171 >>
154 172 startxref
155   -1254
  173 +1401
156 174 %%EOF
... ...