Commit aeb892f99bad9f6c24aef94a2d93d573c6de0382
1 parent
c551b972
accept stream keyword with CR only
git-svn-id: svn+q:///qpdf/trunk@1052 71b93d88-0707-0410-a8cf-f5a4172ac649
Showing
8 changed files
with
272 additions
and
14 deletions
ChangeLog
| ... | ... | @@ -2,6 +2,11 @@ |
| 2 | 2 | |
| 3 | 3 | * 2.2.3: release |
| 4 | 4 | |
| 5 | + * libqpdf/QPDF.cc (readObjectInternal): Accept the case of the | |
| 6 | + stream keyword being followed by carriage return by itself. While | |
| 7 | + this is not permitted by the specification, there are PDF files | |
| 8 | + that do this, and other readers can read them. | |
| 9 | + | |
| 5 | 10 | * libqpdf/Pl_QPDFTokenizer.cc (processChar): When an inline image |
| 6 | 11 | is detected, suspend normalization only up to the end of the |
| 7 | 12 | inline image rather than for the remainder of the content stream. | ... | ... |
libqpdf/QPDF.cc
| ... | ... | @@ -1331,24 +1331,66 @@ QPDF::readObjectInternal(PointerHolder<InputSource> input, |
| 1331 | 1331 | if (readToken(input) == |
| 1332 | 1332 | QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream")) |
| 1333 | 1333 | { |
| 1334 | - // Kill to next actual newline. Do not use readLine() | |
| 1335 | - // here -- streams are a special case. The next | |
| 1336 | - // single newline character marks the end of the | |
| 1337 | - // stream token. It is incorrect to strip subsequent | |
| 1338 | - // carriage returns or newlines as they may be part of | |
| 1339 | - // the stream. | |
| 1334 | + // The PDF specification states that the word "stream" | |
| 1335 | + // should be followed by either a carriage return and | |
| 1336 | + // a newline or by a newline alone. It specifically | |
| 1337 | + // disallowed following it by a carriage return alone | |
| 1338 | + // since, in that case, there would be no way to tell | |
| 1339 | + // whether the NL in a CR NL sequence was part of the | |
| 1340 | + // stream data. However, some readers, including | |
| 1341 | + // Adobe reader, accept a carriage return by itself | |
| 1342 | + // when followed by a non-newline character, so that's | |
| 1343 | + // what we do here. | |
| 1340 | 1344 | { |
| 1341 | 1345 | char ch; |
| 1342 | - do | |
| 1346 | + if (input->read(&ch, 1) == 0) | |
| 1343 | 1347 | { |
| 1344 | - if (input->read(&ch, 1) == 0) | |
| 1348 | + // A premature EOF here will result in some | |
| 1349 | + // other problem that will get reported at | |
| 1350 | + // another time. | |
| 1351 | + } | |
| 1352 | + else if (ch == '\n') | |
| 1353 | + { | |
| 1354 | + // ready to read stream data | |
| 1355 | + QTC::TC("qpdf", "QPDF stream with NL only"); | |
| 1356 | + } | |
| 1357 | + else if (ch == '\r') | |
| 1358 | + { | |
| 1359 | + // Read another character | |
| 1360 | + if (input->read(&ch, 1) != 0) | |
| 1345 | 1361 | { |
| 1346 | - // A premature EOF here will result in | |
| 1347 | - // some other problem that will get | |
| 1348 | - // reported at another time. | |
| 1349 | - ch = '\n'; | |
| 1362 | + if (ch == '\n') | |
| 1363 | + { | |
| 1364 | + // Ready to read stream data | |
| 1365 | + QTC::TC("qpdf", "QPDF stream with CRNL"); | |
| 1366 | + } | |
| 1367 | + else | |
| 1368 | + { | |
| 1369 | + // Treat the \r by itself as the | |
| 1370 | + // whitespace after endstream and | |
| 1371 | + // start reading stream data in spite | |
| 1372 | + // of not having seen a newline. | |
| 1373 | + QTC::TC("qpdf", "QPDF stream with CR only"); | |
| 1374 | + input->unreadCh(ch); | |
| 1375 | + warn(QPDFExc( | |
| 1376 | + qpdf_e_damaged_pdf, | |
| 1377 | + input->getName(), | |
| 1378 | + this->last_object_description, | |
| 1379 | + input->tell(), | |
| 1380 | + "stream keyword followed" | |
| 1381 | + " by carriage return only")); | |
| 1382 | + } | |
| 1350 | 1383 | } |
| 1351 | - } while (ch != '\n'); | |
| 1384 | + } | |
| 1385 | + else | |
| 1386 | + { | |
| 1387 | + QTC::TC("qpdf", "QPDF stream without newline"); | |
| 1388 | + warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 1389 | + this->last_object_description, | |
| 1390 | + input->tell(), | |
| 1391 | + "stream keyword not followed" | |
| 1392 | + " by proper line terminator")); | |
| 1393 | + } | |
| 1352 | 1394 | } |
| 1353 | 1395 | |
| 1354 | 1396 | // Must get offset before accessing any additional | ... | ... |
manual/qpdf-manual.xml
| ... | ... | @@ -2080,6 +2080,12 @@ print "\n"; |
| 2080 | 2080 | <itemizedlist> |
| 2081 | 2081 | <listitem> |
| 2082 | 2082 | <para> |
| 2083 | + Handle some damaged streams with incorrect characters | |
| 2084 | + following the stream keyword. | |
| 2085 | + </para> | |
| 2086 | + </listitem> | |
| 2087 | + <listitem> | |
| 2088 | + <para> | |
| 2083 | 2089 | Improve handling of inline images when normalizing content |
| 2084 | 2090 | streams. |
| 2085 | 2091 | </para> | ... | ... |
qpdf/qpdf.testcov
| ... | ... | @@ -188,3 +188,7 @@ QPDF_Stream getStreamData 0 |
| 188 | 188 | QPDF_Stream expand filter abbreviation 0 |
| 189 | 189 | qpdf-c called qpdf_read_memory 0 |
| 190 | 190 | Pl_QPDFTokenizer found EI 0 |
| 191 | +QPDF stream without newline 0 | |
| 192 | +QPDF stream with CR only 0 | |
| 193 | +QPDF stream with CRNL 0 | |
| 194 | +QPDF stream with NL only 0 | ... | ... |
qpdf/qtest/qpdf.test
| ... | ... | @@ -111,7 +111,7 @@ $td->runtest("new stream", |
| 111 | 111 | show_ntests(); |
| 112 | 112 | # ---------- |
| 113 | 113 | $td->notify("--- Miscellaneous Tests ---"); |
| 114 | -$n_tests += 29; | |
| 114 | +$n_tests += 31; | |
| 115 | 115 | |
| 116 | 116 | $td->runtest("qpdf version", |
| 117 | 117 | {$td->COMMAND => "qpdf --version"}, |
| ... | ... | @@ -265,6 +265,17 @@ $td->runtest("error/output redirection to strings", |
| 265 | 265 | $td->EXIT_STATUS => 0}, |
| 266 | 266 | $td->NORMALIZE_NEWLINES); |
| 267 | 267 | |
| 268 | +$td->runtest("odd terminators for stream keyword", | |
| 269 | + {$td->COMMAND => | |
| 270 | + "qpdf --qdf --static-id" . | |
| 271 | + " stream-line-enders.pdf a.qdf"}, | |
| 272 | + {$td->FILE => "stream-line-enders.out", | |
| 273 | + $td->EXIT_STATUS => 3}, | |
| 274 | + $td->NORMALIZE_NEWLINES); | |
| 275 | +$td->runtest("check output", | |
| 276 | + {$td->FILE => "a.qdf"}, | |
| 277 | + {$td->FILE => "stream-line-enders.qdf"}); | |
| 278 | + | |
| 268 | 279 | show_ntests(); |
| 269 | 280 | # ---------- |
| 270 | 281 | $td->notify("--- Error Condition Tests ---"); | ... | ... |
qpdf/qtest/qpdf/stream-line-enders.out
0 โ 100644
| 1 | +WARNING: stream-line-enders.pdf (object 5 0, file position 378): stream keyword followed by carriage return only | |
| 2 | +WARNING: stream-line-enders.pdf (object 6 0, file position 437): stream keyword not followed by proper line terminator | |
| 3 | +qpdf: operation succeeded with warnings; resulting file may have some problems | ... | ... |
qpdf/qtest/qpdf/stream-line-enders.pdf
0 โ 100644
| 1 | +%PDF-1.3 | |
| 2 | +%ยฟรทยขรพ | |
| 3 | +1 0 obj | |
| 4 | +<< /Pages 2 0 R /Type /Catalog >> | |
| 5 | +endobj | |
| 6 | +2 0 obj | |
| 7 | +<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >> | |
| 8 | +endobj | |
| 9 | +3 0 obj | |
| 10 | +<< /Contents [ 4 0 R 5 0 R 6 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 7 0 R >> /ProcSet 8 0 R >> /Type /Page >> | |
| 11 | +endobj | |
| 12 | +4 0 obj | |
| 13 | +<< /Length 14 >> | |
| 14 | +stream | |
| 15 | +BT | |
| 16 | + /F1 24 Tf | |
| 17 | +endstream | |
| 18 | +endobj | |
| 19 | +5 0 obj | |
| 20 | +<< /Length 10 >> | |
| 21 | +stream 72 720 Td | |
| 22 | +endstream | |
| 23 | +endobj | |
| 24 | +6 0 obj | |
| 25 | +<< /Length 15 >> | |
| 26 | +stream (Potato) Tj | |
| 27 | +ET | |
| 28 | +endstream | |
| 29 | +endobj | |
| 30 | +7 0 obj | |
| 31 | +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> | |
| 32 | +endobj | |
| 33 | +8 0 obj | |
| 34 | +[ /PDF /Text ] | |
| 35 | +endobj | |
| 36 | +xref | |
| 37 | +0 9 | |
| 38 | +0000000000 65535 f | |
| 39 | +0000000015 00000 n | |
| 40 | +0000000064 00000 n | |
| 41 | +0000000123 00000 n | |
| 42 | +0000000282 00000 n | |
| 43 | +0000000346 00000 n | |
| 44 | +0000000405 00000 n | |
| 45 | +0000000469 00000 n | |
| 46 | +0000000576 00000 n | |
| 47 | +trailer << /Root 1 0 R /Size 9 /ID [<08aa98c73f8a7262d77c8328772c3989><7b1f32865e2165debe277f27ee790092>] >> | |
| 48 | +startxref | |
| 49 | +606 | |
| 50 | +%%EOF | ... | ... |
qpdf/qtest/qpdf/stream-line-enders.qdf
0 โ 100644
| 1 | +%PDF-1.3 | |
| 2 | +%ยฟรทยขรพ | |
| 3 | +%QDF-1.0 | |
| 4 | + | |
| 5 | +%% Original object ID: 1 0 | |
| 6 | +1 0 obj | |
| 7 | +<< | |
| 8 | + /Pages 2 0 R | |
| 9 | + /Type /Catalog | |
| 10 | +>> | |
| 11 | +endobj | |
| 12 | + | |
| 13 | +%% Original object ID: 2 0 | |
| 14 | +2 0 obj | |
| 15 | +<< | |
| 16 | + /Count 1 | |
| 17 | + /Kids [ | |
| 18 | + 3 0 R | |
| 19 | + ] | |
| 20 | + /Type /Pages | |
| 21 | +>> | |
| 22 | +endobj | |
| 23 | + | |
| 24 | +%% Page 1 | |
| 25 | +%% Original object ID: 3 0 | |
| 26 | +3 0 obj | |
| 27 | +<< | |
| 28 | + /Contents [ | |
| 29 | + 4 0 R | |
| 30 | + 6 0 R | |
| 31 | + 8 0 R | |
| 32 | + ] | |
| 33 | + /MediaBox [ | |
| 34 | + 0 | |
| 35 | + 0 | |
| 36 | + 612 | |
| 37 | + 792 | |
| 38 | + ] | |
| 39 | + /Parent 2 0 R | |
| 40 | + /Resources << | |
| 41 | + /Font << | |
| 42 | + /F1 10 0 R | |
| 43 | + >> | |
| 44 | + /ProcSet 11 0 R | |
| 45 | + >> | |
| 46 | + /Type /Page | |
| 47 | +>> | |
| 48 | +endobj | |
| 49 | + | |
| 50 | +%% Contents for page 1 | |
| 51 | +%% Original object ID: 4 0 | |
| 52 | +4 0 obj | |
| 53 | +<< | |
| 54 | + /Length 5 0 R | |
| 55 | +>> | |
| 56 | +stream | |
| 57 | +BT | |
| 58 | + /F1 24 Tf | |
| 59 | +endstream | |
| 60 | +endobj | |
| 61 | + | |
| 62 | +5 0 obj | |
| 63 | +14 | |
| 64 | +endobj | |
| 65 | + | |
| 66 | +%% Contents for page 1 | |
| 67 | +%% Original object ID: 5 0 | |
| 68 | +6 0 obj | |
| 69 | +<< | |
| 70 | + /Length 7 0 R | |
| 71 | +>> | |
| 72 | +stream | |
| 73 | +72 720 Td | |
| 74 | +endstream | |
| 75 | +endobj | |
| 76 | + | |
| 77 | +7 0 obj | |
| 78 | +10 | |
| 79 | +endobj | |
| 80 | + | |
| 81 | +%% Contents for page 1 | |
| 82 | +%% Original object ID: 6 0 | |
| 83 | +8 0 obj | |
| 84 | +<< | |
| 85 | + /Length 9 0 R | |
| 86 | +>> | |
| 87 | +stream | |
| 88 | +(Potato) Tj | |
| 89 | +ET | |
| 90 | +endstream | |
| 91 | +endobj | |
| 92 | + | |
| 93 | +9 0 obj | |
| 94 | +15 | |
| 95 | +endobj | |
| 96 | + | |
| 97 | +%% Original object ID: 7 0 | |
| 98 | +10 0 obj | |
| 99 | +<< | |
| 100 | + /BaseFont /Helvetica | |
| 101 | + /Encoding /WinAnsiEncoding | |
| 102 | + /Name /F1 | |
| 103 | + /Subtype /Type1 | |
| 104 | + /Type /Font | |
| 105 | +>> | |
| 106 | +endobj | |
| 107 | + | |
| 108 | +%% Original object ID: 8 0 | |
| 109 | +11 0 obj | |
| 110 | +[ | |
| 111 | ||
| 112 | + /Text | |
| 113 | +] | |
| 114 | +endobj | |
| 115 | + | |
| 116 | +xref | |
| 117 | +0 12 | |
| 118 | +0000000000 65535 f | |
| 119 | +0000000052 00000 n | |
| 120 | +0000000133 00000 n | |
| 121 | +0000000242 00000 n | |
| 122 | +0000000516 00000 n | |
| 123 | +0000000585 00000 n | |
| 124 | +0000000654 00000 n | |
| 125 | +0000000719 00000 n | |
| 126 | +0000000788 00000 n | |
| 127 | +0000000858 00000 n | |
| 128 | +0000000904 00000 n | |
| 129 | +0000001050 00000 n | |
| 130 | +trailer << | |
| 131 | + /Root 1 0 R | |
| 132 | + /Size 12 | |
| 133 | + /ID [<08aa98c73f8a7262d77c8328772c3989><31415926535897932384626433832795>] | |
| 134 | +>> | |
| 135 | +startxref | |
| 136 | +1086 | |
| 137 | +%%EOF | ... | ... |