Commit ae5bd7102da5d4b456f08790a0efc04c1c42b4a5
1 parent
8a9086a6
Accept extraneous space before xref (fixes #341)
Showing
10 changed files
with
228 additions
and
1 deletions
ChangeLog
| 1 | 2019-08-19 Jay Berkenbilt <ejb@ql.org> | 1 | 2019-08-19 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | ||
| 3 | + * Accept (and warn for) extraneous whitespace preceding the xref | ||
| 4 | + table. Fixes #341. | ||
| 5 | + | ||
| 3 | * Accept (and warn for) extraneous whitespace between the stream | 6 | * Accept (and warn for) extraneous whitespace between the stream |
| 4 | keyword and newline. Fixes #329. | 7 | keyword and newline. Fixes #329. |
| 5 | 8 |
libqpdf/QPDF.cc
| @@ -570,6 +570,37 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | @@ -570,6 +570,37 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | ||
| 570 | char buf[7]; | 570 | char buf[7]; |
| 571 | memset(buf, 0, sizeof(buf)); | 571 | memset(buf, 0, sizeof(buf)); |
| 572 | this->m->file->seek(xref_offset, SEEK_SET); | 572 | this->m->file->seek(xref_offset, SEEK_SET); |
| 573 | + // Some files miss the mark a little with startxref. We could | ||
| 574 | + // do a better job of searching in the neighborhood for | ||
| 575 | + // something that looks like either an xref table or stream, | ||
| 576 | + // but the simple heuristic of skipping whitespace can help | ||
| 577 | + // with the xref table case and is harmless with the stream | ||
| 578 | + // case. | ||
| 579 | + bool done = false; | ||
| 580 | + bool skipped_space = false; | ||
| 581 | + while (! done) | ||
| 582 | + { | ||
| 583 | + char ch; | ||
| 584 | + if (1 == this->m->file->read(&ch, 1)) | ||
| 585 | + { | ||
| 586 | + if (QUtil::is_space(ch)) | ||
| 587 | + { | ||
| 588 | + skipped_space = true; | ||
| 589 | + } | ||
| 590 | + else | ||
| 591 | + { | ||
| 592 | + this->m->file->unreadCh(ch); | ||
| 593 | + done = true; | ||
| 594 | + } | ||
| 595 | + } | ||
| 596 | + else | ||
| 597 | + { | ||
| 598 | + QTC::TC("qpdf", "QPDF eof skipping spaces before xref", | ||
| 599 | + skipped_space ? 0 : 1); | ||
| 600 | + done = true; | ||
| 601 | + } | ||
| 602 | + } | ||
| 603 | + | ||
| 573 | this->m->file->read(buf, sizeof(buf) - 1); | 604 | this->m->file->read(buf, sizeof(buf) - 1); |
| 574 | // The PDF spec says xref must be followed by a line | 605 | // The PDF spec says xref must be followed by a line |
| 575 | // terminator, but files exist in the wild where it is | 606 | // terminator, but files exist in the wild where it is |
| @@ -577,6 +608,13 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | @@ -577,6 +608,13 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | ||
| 577 | if ((strncmp(buf, "xref", 4) == 0) && | 608 | if ((strncmp(buf, "xref", 4) == 0) && |
| 578 | QUtil::is_space(buf[4])) | 609 | QUtil::is_space(buf[4])) |
| 579 | { | 610 | { |
| 611 | + if (skipped_space) | ||
| 612 | + { | ||
| 613 | + QTC::TC("qpdf", "QPDF xref skipped space"); | ||
| 614 | + warn(QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(), | ||
| 615 | + "", 0, | ||
| 616 | + "extraneous whitespace seen before xref")); | ||
| 617 | + } | ||
| 580 | QTC::TC("qpdf", "QPDF xref space", | 618 | QTC::TC("qpdf", "QPDF xref space", |
| 581 | ((buf[4] == '\n') ? 0 : | 619 | ((buf[4] == '\n') ? 0 : |
| 582 | (buf[4] == '\r') ? 1 : | 620 | (buf[4] == '\r') ? 1 : |
qpdf/qpdf.testcov
| @@ -441,3 +441,5 @@ QPDFObjectHandle int returning INT_MIN 0 | @@ -441,3 +441,5 @@ QPDFObjectHandle int returning INT_MIN 0 | ||
| 441 | QPDFObjectHandle int returning INT_MAX 0 | 441 | QPDFObjectHandle int returning INT_MAX 0 |
| 442 | QPDFObjectHandle uint returning UINT_MAX 0 | 442 | QPDFObjectHandle uint returning UINT_MAX 0 |
| 443 | QPDFObjectHandle uint uint returning 0 0 | 443 | QPDFObjectHandle uint uint returning 0 0 |
| 444 | +QPDF xref skipped space 0 | ||
| 445 | +QPDF eof skipping spaces before xref 1 |
qpdf/qtest/qpdf.test
| @@ -2291,6 +2291,8 @@ my @badfiles = ("not a PDF file", # 1 | @@ -2291,6 +2291,8 @@ my @badfiles = ("not a PDF file", # 1 | ||
| 2291 | "obj/gen in wrong place", # 34 | 2291 | "obj/gen in wrong place", # 34 |
| 2292 | "object stream of wrong type", # 35 | 2292 | "object stream of wrong type", # 35 |
| 2293 | "bad dictionary key", # 36 | 2293 | "bad dictionary key", # 36 |
| 2294 | + "space before xref", # 37 | ||
| 2295 | + "startxref to space then eof", # 38 | ||
| 2294 | ); | 2296 | ); |
| 2295 | 2297 | ||
| 2296 | $n_tests += @badfiles + 6; | 2298 | $n_tests += @badfiles + 6; |
| @@ -2301,7 +2303,7 @@ $n_tests += @badfiles + 6; | @@ -2301,7 +2303,7 @@ $n_tests += @badfiles + 6; | ||
| 2301 | # have error conditions that used to be fatal but are now considered | 2303 | # have error conditions that used to be fatal but are now considered |
| 2302 | # non-fatal. | 2304 | # non-fatal. |
| 2303 | my %badtest_overrides = (); | 2305 | my %badtest_overrides = (); |
| 2304 | -for(6, 12..15, 17, 18..32, 34, 36) | 2306 | +for(6, 12..15, 17, 18..32, 34, 36..37) |
| 2305 | { | 2307 | { |
| 2306 | $badtest_overrides{$_} = 0; | 2308 | $badtest_overrides{$_} = 0; |
| 2307 | } | 2309 | } |
qpdf/qtest/qpdf/bad37-recover.out
0 → 100644
qpdf/qtest/qpdf/bad37.out
0 → 100644
qpdf/qtest/qpdf/bad37.pdf
0 → 100644
| 1 | +%PDF-1.3 | ||
| 2 | +1 0 obj | ||
| 3 | +<< | ||
| 4 | + /Type /Catalog | ||
| 5 | + /Pages 2 0 R | ||
| 6 | +>> | ||
| 7 | +endobj | ||
| 8 | + | ||
| 9 | +2 0 obj | ||
| 10 | +<< | ||
| 11 | + /Type /Pages | ||
| 12 | + /Kids [ | ||
| 13 | + 3 0 R | ||
| 14 | + ] | ||
| 15 | + /Count 1 | ||
| 16 | +>> | ||
| 17 | +endobj | ||
| 18 | + | ||
| 19 | +3 0 obj | ||
| 20 | +<< | ||
| 21 | + /Type /Page | ||
| 22 | + /Parent 2 0 R | ||
| 23 | + /MediaBox [0 0 612 792] | ||
| 24 | + /Contents 4 0 R | ||
| 25 | + /Resources << | ||
| 26 | + /ProcSet 5 0 R | ||
| 27 | + /Font << | ||
| 28 | + /F1 6 0 R | ||
| 29 | + >> | ||
| 30 | + >> | ||
| 31 | +>> | ||
| 32 | +endobj | ||
| 33 | + | ||
| 34 | +4 0 obj | ||
| 35 | +<< | ||
| 36 | + /Length 44 | ||
| 37 | +>> | ||
| 38 | +stream | ||
| 39 | +BT | ||
| 40 | + /F1 24 Tf | ||
| 41 | + 72 720 Td | ||
| 42 | + (Potato) Tj | ||
| 43 | +ET | ||
| 44 | +endstream | ||
| 45 | +endobj | ||
| 46 | + | ||
| 47 | +5 0 obj | ||
| 48 | +[ | ||
| 49 | |||
| 50 | + /Text | ||
| 51 | +] | ||
| 52 | +endobj | ||
| 53 | + | ||
| 54 | +6 0 obj | ||
| 55 | +<< | ||
| 56 | + /Type /Font | ||
| 57 | + /Subtype /Type1 | ||
| 58 | + /Name /F1 | ||
| 59 | + /BaseFont /Helvetica | ||
| 60 | + /Encoding /WinAnsiEncoding | ||
| 61 | +>> | ||
| 62 | +endobj | ||
| 63 | + | ||
| 64 | +xref | ||
| 65 | +0 7 | ||
| 66 | +0000000000 65535 f | ||
| 67 | +0000000009 00000 n | ||
| 68 | +0000000063 00000 n | ||
| 69 | +0000000135 00000 n | ||
| 70 | +0000000307 00000 n | ||
| 71 | +0000000403 00000 n | ||
| 72 | +0000000438 00000 n | ||
| 73 | +trailer << | ||
| 74 | + /Size 7 | ||
| 75 | + /Root 1 0 R | ||
| 76 | + /QTest (potato) | ||
| 77 | +>> | ||
| 78 | +startxref | ||
| 79 | +555 | ||
| 80 | +%%EOF |
qpdf/qtest/qpdf/bad38-recover.out
0 → 100644
| 1 | +WARNING: bad38.pdf: file is damaged | ||
| 2 | +WARNING: bad38.pdf (offset 781): xref not found | ||
| 3 | +WARNING: bad38.pdf: Attempting to reconstruct cross-reference table | ||
| 4 | +/QTest is direct and has type string (6) | ||
| 5 | +/QTest is a string with value potato | ||
| 6 | +unparse: (potato) | ||
| 7 | +unparseResolved: (potato) | ||
| 8 | +test 1 done |
qpdf/qtest/qpdf/bad38.out
0 → 100644
| 1 | +bad38.pdf (offset 781): xref not found |
qpdf/qtest/qpdf/bad38.pdf
0 → 100644
| 1 | +%PDF-1.3 | ||
| 2 | +1 0 obj | ||
| 3 | +<< | ||
| 4 | + /Type /Catalog | ||
| 5 | + /Pages 2 0 R | ||
| 6 | +>> | ||
| 7 | +endobj | ||
| 8 | + | ||
| 9 | +2 0 obj | ||
| 10 | +<< | ||
| 11 | + /Type /Pages | ||
| 12 | + /Kids [ | ||
| 13 | + 3 0 R | ||
| 14 | + ] | ||
| 15 | + /Count 1 | ||
| 16 | +>> | ||
| 17 | +endobj | ||
| 18 | + | ||
| 19 | +3 0 obj | ||
| 20 | +<< | ||
| 21 | + /Type /Page | ||
| 22 | + /Parent 2 0 R | ||
| 23 | + /MediaBox [0 0 612 792] | ||
| 24 | + /Contents 4 0 R | ||
| 25 | + /Resources << | ||
| 26 | + /ProcSet 5 0 R | ||
| 27 | + /Font << | ||
| 28 | + /F1 6 0 R | ||
| 29 | + >> | ||
| 30 | + >> | ||
| 31 | +>> | ||
| 32 | +endobj | ||
| 33 | + | ||
| 34 | +4 0 obj | ||
| 35 | +<< | ||
| 36 | + /Length 44 | ||
| 37 | +>> | ||
| 38 | +stream | ||
| 39 | +BT | ||
| 40 | + /F1 24 Tf | ||
| 41 | + 72 720 Td | ||
| 42 | + (Potato) Tj | ||
| 43 | +ET | ||
| 44 | +endstream | ||
| 45 | +endobj | ||
| 46 | + | ||
| 47 | +5 0 obj | ||
| 48 | +[ | ||
| 49 | |||
| 50 | + /Text | ||
| 51 | +] | ||
| 52 | +endobj | ||
| 53 | + | ||
| 54 | +6 0 obj | ||
| 55 | +<< | ||
| 56 | + /Type /Font | ||
| 57 | + /Subtype /Type1 | ||
| 58 | + /Name /F1 | ||
| 59 | + /BaseFont /Helvetica | ||
| 60 | + /Encoding /WinAnsiEncoding | ||
| 61 | +>> | ||
| 62 | +endobj | ||
| 63 | + | ||
| 64 | +xref | ||
| 65 | +0 7 | ||
| 66 | +0000000000 65535 f | ||
| 67 | +0000000009 00000 n | ||
| 68 | +0000000063 00000 n | ||
| 69 | +0000000135 00000 n | ||
| 70 | +0000000307 00000 n | ||
| 71 | +0000000403 00000 n | ||
| 72 | +0000000438 00000 n | ||
| 73 | +trailer << | ||
| 74 | + /Size 7 | ||
| 75 | + /Root 1 0 R | ||
| 76 | + /QTest (potato) | ||
| 77 | +>> | ||
| 78 | +startxref | ||
| 79 | +781 | ||
| 80 | +%%EOF | ||
| 81 | + |