Commit ae5bd7102da5d4b456f08790a0efc04c1c42b4a5
1 parent
8a9086a6
Accept extraneous space before xref (fixes #341)
Showing
10 changed files
with
228 additions
and
1 deletions
ChangeLog
libqpdf/QPDF.cc
| ... | ... | @@ -570,6 +570,37 @@ QPDF::read_xref(qpdf_offset_t xref_offset) |
| 570 | 570 | char buf[7]; |
| 571 | 571 | memset(buf, 0, sizeof(buf)); |
| 572 | 572 | this->m->file->seek(xref_offset, SEEK_SET); |
| 573 | + // Some files miss the mark a little with startxref. We could | |
| 574 | + // do a better job of searching in the neighborhood for | |
| 575 | + // something that looks like either an xref table or stream, | |
| 576 | + // but the simple heuristic of skipping whitespace can help | |
| 577 | + // with the xref table case and is harmless with the stream | |
| 578 | + // case. | |
| 579 | + bool done = false; | |
| 580 | + bool skipped_space = false; | |
| 581 | + while (! done) | |
| 582 | + { | |
| 583 | + char ch; | |
| 584 | + if (1 == this->m->file->read(&ch, 1)) | |
| 585 | + { | |
| 586 | + if (QUtil::is_space(ch)) | |
| 587 | + { | |
| 588 | + skipped_space = true; | |
| 589 | + } | |
| 590 | + else | |
| 591 | + { | |
| 592 | + this->m->file->unreadCh(ch); | |
| 593 | + done = true; | |
| 594 | + } | |
| 595 | + } | |
| 596 | + else | |
| 597 | + { | |
| 598 | + QTC::TC("qpdf", "QPDF eof skipping spaces before xref", | |
| 599 | + skipped_space ? 0 : 1); | |
| 600 | + done = true; | |
| 601 | + } | |
| 602 | + } | |
| 603 | + | |
| 573 | 604 | this->m->file->read(buf, sizeof(buf) - 1); |
| 574 | 605 | // The PDF spec says xref must be followed by a line |
| 575 | 606 | // terminator, but files exist in the wild where it is |
| ... | ... | @@ -577,6 +608,13 @@ QPDF::read_xref(qpdf_offset_t xref_offset) |
| 577 | 608 | if ((strncmp(buf, "xref", 4) == 0) && |
| 578 | 609 | QUtil::is_space(buf[4])) |
| 579 | 610 | { |
| 611 | + if (skipped_space) | |
| 612 | + { | |
| 613 | + QTC::TC("qpdf", "QPDF xref skipped space"); | |
| 614 | + warn(QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(), | |
| 615 | + "", 0, | |
| 616 | + "extraneous whitespace seen before xref")); | |
| 617 | + } | |
| 580 | 618 | QTC::TC("qpdf", "QPDF xref space", |
| 581 | 619 | ((buf[4] == '\n') ? 0 : |
| 582 | 620 | (buf[4] == '\r') ? 1 : | ... | ... |
qpdf/qpdf.testcov
| ... | ... | @@ -441,3 +441,5 @@ QPDFObjectHandle int returning INT_MIN 0 |
| 441 | 441 | QPDFObjectHandle int returning INT_MAX 0 |
| 442 | 442 | QPDFObjectHandle uint returning UINT_MAX 0 |
| 443 | 443 | QPDFObjectHandle uint uint returning 0 0 |
| 444 | +QPDF xref skipped space 0 | |
| 445 | +QPDF eof skipping spaces before xref 1 | ... | ... |
qpdf/qtest/qpdf.test
| ... | ... | @@ -2291,6 +2291,8 @@ my @badfiles = ("not a PDF file", # 1 |
| 2291 | 2291 | "obj/gen in wrong place", # 34 |
| 2292 | 2292 | "object stream of wrong type", # 35 |
| 2293 | 2293 | "bad dictionary key", # 36 |
| 2294 | + "space before xref", # 37 | |
| 2295 | + "startxref to space then eof", # 38 | |
| 2294 | 2296 | ); |
| 2295 | 2297 | |
| 2296 | 2298 | $n_tests += @badfiles + 6; |
| ... | ... | @@ -2301,7 +2303,7 @@ $n_tests += @badfiles + 6; |
| 2301 | 2303 | # have error conditions that used to be fatal but are now considered |
| 2302 | 2304 | # non-fatal. |
| 2303 | 2305 | my %badtest_overrides = (); |
| 2304 | -for(6, 12..15, 17, 18..32, 34, 36) | |
| 2306 | +for(6, 12..15, 17, 18..32, 34, 36..37) | |
| 2305 | 2307 | { |
| 2306 | 2308 | $badtest_overrides{$_} = 0; |
| 2307 | 2309 | } | ... | ... |
qpdf/qtest/qpdf/bad37-recover.out
0 → 100644
qpdf/qtest/qpdf/bad37.out
0 → 100644
qpdf/qtest/qpdf/bad37.pdf
0 → 100644
| 1 | +%PDF-1.3 | |
| 2 | +1 0 obj | |
| 3 | +<< | |
| 4 | + /Type /Catalog | |
| 5 | + /Pages 2 0 R | |
| 6 | +>> | |
| 7 | +endobj | |
| 8 | + | |
| 9 | +2 0 obj | |
| 10 | +<< | |
| 11 | + /Type /Pages | |
| 12 | + /Kids [ | |
| 13 | + 3 0 R | |
| 14 | + ] | |
| 15 | + /Count 1 | |
| 16 | +>> | |
| 17 | +endobj | |
| 18 | + | |
| 19 | +3 0 obj | |
| 20 | +<< | |
| 21 | + /Type /Page | |
| 22 | + /Parent 2 0 R | |
| 23 | + /MediaBox [0 0 612 792] | |
| 24 | + /Contents 4 0 R | |
| 25 | + /Resources << | |
| 26 | + /ProcSet 5 0 R | |
| 27 | + /Font << | |
| 28 | + /F1 6 0 R | |
| 29 | + >> | |
| 30 | + >> | |
| 31 | +>> | |
| 32 | +endobj | |
| 33 | + | |
| 34 | +4 0 obj | |
| 35 | +<< | |
| 36 | + /Length 44 | |
| 37 | +>> | |
| 38 | +stream | |
| 39 | +BT | |
| 40 | + /F1 24 Tf | |
| 41 | + 72 720 Td | |
| 42 | + (Potato) Tj | |
| 43 | +ET | |
| 44 | +endstream | |
| 45 | +endobj | |
| 46 | + | |
| 47 | +5 0 obj | |
| 48 | +[ | |
| 49 | ||
| 50 | + /Text | |
| 51 | +] | |
| 52 | +endobj | |
| 53 | + | |
| 54 | +6 0 obj | |
| 55 | +<< | |
| 56 | + /Type /Font | |
| 57 | + /Subtype /Type1 | |
| 58 | + /Name /F1 | |
| 59 | + /BaseFont /Helvetica | |
| 60 | + /Encoding /WinAnsiEncoding | |
| 61 | +>> | |
| 62 | +endobj | |
| 63 | + | |
| 64 | +xref | |
| 65 | +0 7 | |
| 66 | +0000000000 65535 f | |
| 67 | +0000000009 00000 n | |
| 68 | +0000000063 00000 n | |
| 69 | +0000000135 00000 n | |
| 70 | +0000000307 00000 n | |
| 71 | +0000000403 00000 n | |
| 72 | +0000000438 00000 n | |
| 73 | +trailer << | |
| 74 | + /Size 7 | |
| 75 | + /Root 1 0 R | |
| 76 | + /QTest (potato) | |
| 77 | +>> | |
| 78 | +startxref | |
| 79 | +555 | |
| 80 | +%%EOF | ... | ... |
qpdf/qtest/qpdf/bad38-recover.out
0 → 100644
| 1 | +WARNING: bad38.pdf: file is damaged | |
| 2 | +WARNING: bad38.pdf (offset 781): xref not found | |
| 3 | +WARNING: bad38.pdf: Attempting to reconstruct cross-reference table | |
| 4 | +/QTest is direct and has type string (6) | |
| 5 | +/QTest is a string with value potato | |
| 6 | +unparse: (potato) | |
| 7 | +unparseResolved: (potato) | |
| 8 | +test 1 done | ... | ... |
qpdf/qtest/qpdf/bad38.out
0 → 100644
| 1 | +bad38.pdf (offset 781): xref not found | ... | ... |
qpdf/qtest/qpdf/bad38.pdf
0 → 100644
| 1 | +%PDF-1.3 | |
| 2 | +1 0 obj | |
| 3 | +<< | |
| 4 | + /Type /Catalog | |
| 5 | + /Pages 2 0 R | |
| 6 | +>> | |
| 7 | +endobj | |
| 8 | + | |
| 9 | +2 0 obj | |
| 10 | +<< | |
| 11 | + /Type /Pages | |
| 12 | + /Kids [ | |
| 13 | + 3 0 R | |
| 14 | + ] | |
| 15 | + /Count 1 | |
| 16 | +>> | |
| 17 | +endobj | |
| 18 | + | |
| 19 | +3 0 obj | |
| 20 | +<< | |
| 21 | + /Type /Page | |
| 22 | + /Parent 2 0 R | |
| 23 | + /MediaBox [0 0 612 792] | |
| 24 | + /Contents 4 0 R | |
| 25 | + /Resources << | |
| 26 | + /ProcSet 5 0 R | |
| 27 | + /Font << | |
| 28 | + /F1 6 0 R | |
| 29 | + >> | |
| 30 | + >> | |
| 31 | +>> | |
| 32 | +endobj | |
| 33 | + | |
| 34 | +4 0 obj | |
| 35 | +<< | |
| 36 | + /Length 44 | |
| 37 | +>> | |
| 38 | +stream | |
| 39 | +BT | |
| 40 | + /F1 24 Tf | |
| 41 | + 72 720 Td | |
| 42 | + (Potato) Tj | |
| 43 | +ET | |
| 44 | +endstream | |
| 45 | +endobj | |
| 46 | + | |
| 47 | +5 0 obj | |
| 48 | +[ | |
| 49 | ||
| 50 | + /Text | |
| 51 | +] | |
| 52 | +endobj | |
| 53 | + | |
| 54 | +6 0 obj | |
| 55 | +<< | |
| 56 | + /Type /Font | |
| 57 | + /Subtype /Type1 | |
| 58 | + /Name /F1 | |
| 59 | + /BaseFont /Helvetica | |
| 60 | + /Encoding /WinAnsiEncoding | |
| 61 | +>> | |
| 62 | +endobj | |
| 63 | + | |
| 64 | +xref | |
| 65 | +0 7 | |
| 66 | +0000000000 65535 f | |
| 67 | +0000000009 00000 n | |
| 68 | +0000000063 00000 n | |
| 69 | +0000000135 00000 n | |
| 70 | +0000000307 00000 n | |
| 71 | +0000000403 00000 n | |
| 72 | +0000000438 00000 n | |
| 73 | +trailer << | |
| 74 | + /Size 7 | |
| 75 | + /Root 1 0 R | |
| 76 | + /QTest (potato) | |
| 77 | +>> | |
| 78 | +startxref | |
| 79 | +781 | |
| 80 | +%%EOF | |
| 81 | + | ... | ... |