Commit ef8ae5449dc30782451beba64fdd0af86e1cb931
1 parent
8320d16c
Allow QPDFTokenizer::readToken to return bad tokens
Sometimes we want to ignore bad tokens rather than having them throw an exception. A coverage case is commented out here and added in a later commit.
Showing
7 changed files
with
114 additions
and
10 deletions
include/qpdf/QPDF.hh
| ... | ... | @@ -666,7 +666,8 @@ class QPDF |
| 666 | 666 | size_t recoverStreamLength( |
| 667 | 667 | PointerHolder<InputSource> input, int objid, int generation, |
| 668 | 668 | qpdf_offset_t stream_offset); |
| 669 | - QPDFTokenizer::Token readToken(PointerHolder<InputSource>); | |
| 669 | + QPDFTokenizer::Token readToken(PointerHolder<InputSource>, | |
| 670 | + bool allow_bad = false); | |
| 670 | 671 | |
| 671 | 672 | QPDFObjectHandle readObjectAtOffset( |
| 672 | 673 | bool attempt_recovery, | ... | ... |
include/qpdf/QPDFTokenizer.hh
| ... | ... | @@ -138,7 +138,8 @@ class QPDFTokenizer |
| 138 | 138 | // exception thrown if there is an error. |
| 139 | 139 | QPDF_DLL |
| 140 | 140 | Token readToken(PointerHolder<InputSource> input, |
| 141 | - std::string const& context); | |
| 141 | + std::string const& context, | |
| 142 | + bool allow_bad = false); | |
| 142 | 143 | |
| 143 | 144 | private: |
| 144 | 145 | void reset(); | ... | ... |
libqpdf/QPDF.cc
| ... | ... | @@ -1329,9 +1329,10 @@ QPDF::recoverStreamLength(PointerHolder<InputSource> input, |
| 1329 | 1329 | } |
| 1330 | 1330 | |
| 1331 | 1331 | QPDFTokenizer::Token |
| 1332 | -QPDF::readToken(PointerHolder<InputSource> input) | |
| 1332 | +QPDF::readToken(PointerHolder<InputSource> input, bool allow_bad) | |
| 1333 | 1333 | { |
| 1334 | - return this->tokenizer.readToken(input, this->last_object_description); | |
| 1334 | + return this->tokenizer.readToken( | |
| 1335 | + input, this->last_object_description, allow_bad); | |
| 1335 | 1336 | } |
| 1336 | 1337 | |
| 1337 | 1338 | QPDFObjectHandle | ... | ... |
libqpdf/QPDFTokenizer.cc
| ... | ... | @@ -475,7 +475,8 @@ QPDFTokenizer::betweenTokens() |
| 475 | 475 | |
| 476 | 476 | QPDFTokenizer::Token |
| 477 | 477 | QPDFTokenizer::readToken(PointerHolder<InputSource> input, |
| 478 | - std::string const& context) | |
| 478 | + std::string const& context, | |
| 479 | + bool allow_bad) | |
| 479 | 480 | { |
| 480 | 481 | qpdf_offset_t offset = input->tell(); |
| 481 | 482 | Token token; |
| ... | ... | @@ -514,13 +515,20 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input, |
| 514 | 515 | input->unreadCh(char_to_unread); |
| 515 | 516 | } |
| 516 | 517 | |
| 518 | + input->setLastOffset(offset); | |
| 519 | + | |
| 517 | 520 | if (token.getType() == tt_bad) |
| 518 | 521 | { |
| 519 | - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 520 | - context, offset, token.getErrorMessage()); | |
| 522 | + if (allow_bad) | |
| 523 | + { | |
| 524 | +// QTC::TC("qpdf", "QPDFTokenizer allowing bad token"); | |
| 525 | + } | |
| 526 | + else | |
| 527 | + { | |
| 528 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 529 | + context, offset, token.getErrorMessage()); | |
| 530 | + } | |
| 521 | 531 | } |
| 522 | 532 | |
| 523 | - input->setLastOffset(offset); | |
| 524 | - | |
| 525 | 533 | return token; |
| 526 | 534 | } | ... | ... |
qpdf/qtest/qpdf.test
| ... | ... | @@ -232,7 +232,7 @@ foreach my $d (@bug_tests) |
| 232 | 232 | show_ntests(); |
| 233 | 233 | # ---------- |
| 234 | 234 | $td->notify("--- Miscellaneous Tests ---"); |
| 235 | -$n_tests += 85; | |
| 235 | +$n_tests += 86; | |
| 236 | 236 | |
| 237 | 237 | $td->runtest("qpdf version", |
| 238 | 238 | {$td->COMMAND => "qpdf --version"}, |
| ... | ... | @@ -662,6 +662,13 @@ $td->runtest("combine show and --pages", |
| 662 | 662 | $td->EXIT_STATUS => 0}, |
| 663 | 663 | $td->NORMALIZE_NEWLINES); |
| 664 | 664 | |
| 665 | +$td->runtest("ignore bad token", | |
| 666 | + {$td->COMMAND => | |
| 667 | + "qpdf --show-xref bad-token-startxref.pdf"}, | |
| 668 | + {$td->FILE => "bad-token-startxref.out", | |
| 669 | + $td->EXIT_STATUS => 0}, | |
| 670 | + $td->NORMALIZE_NEWLINES); | |
| 671 | + | |
| 665 | 672 | show_ntests(); |
| 666 | 673 | # ---------- |
| 667 | 674 | $td->notify("--- Single Page ---"); | ... | ... |
qpdf/qtest/qpdf/bad-token-startxref.out
0 → 100644
qpdf/qtest/qpdf/bad-token-startxref.pdf
0 → 100644
| 1 | +%PDF-1.3 | |
| 2 | +1 0 obj | |
| 3 | +<< | |
| 4 | + /Type /Catalog | |
| 5 | + /Pages 2 0 R | |
| 6 | +>> | |
| 7 | +endobj | |
| 8 | + | |
| 9 | +2 0 obj | |
| 10 | +<< | |
| 11 | + /Type /Pages | |
| 12 | + /Kids [ | |
| 13 | + 3 0 R | |
| 14 | + ] | |
| 15 | + /Count 1 | |
| 16 | +>> | |
| 17 | +endobj | |
| 18 | + | |
| 19 | +3 0 obj | |
| 20 | +<< | |
| 21 | + /Type /Page | |
| 22 | + /Parent 2 0 R | |
| 23 | + /MediaBox [0 0 612 792] | |
| 24 | + /Contents 4 0 R | |
| 25 | + /Resources << | |
| 26 | + /ProcSet 5 0 R | |
| 27 | + /Font << | |
| 28 | + /F1 6 0 R | |
| 29 | + >> | |
| 30 | + >> | |
| 31 | +>> | |
| 32 | +endobj | |
| 33 | + | |
| 34 | +4 0 obj | |
| 35 | +<< | |
| 36 | + /Length 44 | |
| 37 | +>> | |
| 38 | +stream | |
| 39 | +BT | |
| 40 | + /F1 24 Tf | |
| 41 | + 72 720 Td | |
| 42 | + (Potato) Tj | |
| 43 | +ET | |
| 44 | +endstream | |
| 45 | +endobj | |
| 46 | + | |
| 47 | +5 0 obj | |
| 48 | +[ | |
| 49 | ||
| 50 | + /Text | |
| 51 | +] | |
| 52 | +endobj | |
| 53 | + | |
| 54 | +6 0 obj | |
| 55 | +<< | |
| 56 | + /Type /Font | |
| 57 | + /Subtype /Type1 | |
| 58 | + /Name /F1 | |
| 59 | + /BaseFont /Helvetica | |
| 60 | + /Encoding /WinAnsiEncoding | |
| 61 | +>> | |
| 62 | +endobj | |
| 63 | + | |
| 64 | +xref | |
| 65 | +0 7 | |
| 66 | +0000000000 65535 f | |
| 67 | +0000000009 00000 n | |
| 68 | +0000000063 00000 n | |
| 69 | +0000000135 00000 n | |
| 70 | +0000000307 00000 n | |
| 71 | +0000000403 00000 n | |
| 72 | +0000000438 00000 n | |
| 73 | +trailer << | |
| 74 | + /Size 7 | |
| 75 | + /Root 1 0 R | |
| 76 | +>> | |
| 77 | +startxref( | |
| 78 | +startxref | |
| 79 | +556 | |
| 80 | +%%EOF | ... | ... |