Commit ef8ae5449dc30782451beba64fdd0af86e1cb931
1 parent
8320d16c
Allow QPDFTokenizer::readToken to return bad tokens
Sometimes we want to ignore bad tokens rather than having them throw an exception. A coverage case is commented out here and added in a later commit.
Showing
7 changed files
with
114 additions
and
10 deletions
include/qpdf/QPDF.hh
| @@ -666,7 +666,8 @@ class QPDF | @@ -666,7 +666,8 @@ class QPDF | ||
| 666 | size_t recoverStreamLength( | 666 | size_t recoverStreamLength( |
| 667 | PointerHolder<InputSource> input, int objid, int generation, | 667 | PointerHolder<InputSource> input, int objid, int generation, |
| 668 | qpdf_offset_t stream_offset); | 668 | qpdf_offset_t stream_offset); |
| 669 | - QPDFTokenizer::Token readToken(PointerHolder<InputSource>); | 669 | + QPDFTokenizer::Token readToken(PointerHolder<InputSource>, |
| 670 | + bool allow_bad = false); | ||
| 670 | 671 | ||
| 671 | QPDFObjectHandle readObjectAtOffset( | 672 | QPDFObjectHandle readObjectAtOffset( |
| 672 | bool attempt_recovery, | 673 | bool attempt_recovery, |
include/qpdf/QPDFTokenizer.hh
| @@ -138,7 +138,8 @@ class QPDFTokenizer | @@ -138,7 +138,8 @@ class QPDFTokenizer | ||
| 138 | // exception thrown if there is an error. | 138 | // exception thrown if there is an error. |
| 139 | QPDF_DLL | 139 | QPDF_DLL |
| 140 | Token readToken(PointerHolder<InputSource> input, | 140 | Token readToken(PointerHolder<InputSource> input, |
| 141 | - std::string const& context); | 141 | + std::string const& context, |
| 142 | + bool allow_bad = false); | ||
| 142 | 143 | ||
| 143 | private: | 144 | private: |
| 144 | void reset(); | 145 | void reset(); |
libqpdf/QPDF.cc
| @@ -1329,9 +1329,10 @@ QPDF::recoverStreamLength(PointerHolder<InputSource> input, | @@ -1329,9 +1329,10 @@ QPDF::recoverStreamLength(PointerHolder<InputSource> input, | ||
| 1329 | } | 1329 | } |
| 1330 | 1330 | ||
| 1331 | QPDFTokenizer::Token | 1331 | QPDFTokenizer::Token |
| 1332 | -QPDF::readToken(PointerHolder<InputSource> input) | 1332 | +QPDF::readToken(PointerHolder<InputSource> input, bool allow_bad) |
| 1333 | { | 1333 | { |
| 1334 | - return this->tokenizer.readToken(input, this->last_object_description); | 1334 | + return this->tokenizer.readToken( |
| 1335 | + input, this->last_object_description, allow_bad); | ||
| 1335 | } | 1336 | } |
| 1336 | 1337 | ||
| 1337 | QPDFObjectHandle | 1338 | QPDFObjectHandle |
libqpdf/QPDFTokenizer.cc
| @@ -475,7 +475,8 @@ QPDFTokenizer::betweenTokens() | @@ -475,7 +475,8 @@ QPDFTokenizer::betweenTokens() | ||
| 475 | 475 | ||
| 476 | QPDFTokenizer::Token | 476 | QPDFTokenizer::Token |
| 477 | QPDFTokenizer::readToken(PointerHolder<InputSource> input, | 477 | QPDFTokenizer::readToken(PointerHolder<InputSource> input, |
| 478 | - std::string const& context) | 478 | + std::string const& context, |
| 479 | + bool allow_bad) | ||
| 479 | { | 480 | { |
| 480 | qpdf_offset_t offset = input->tell(); | 481 | qpdf_offset_t offset = input->tell(); |
| 481 | Token token; | 482 | Token token; |
| @@ -514,13 +515,20 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input, | @@ -514,13 +515,20 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input, | ||
| 514 | input->unreadCh(char_to_unread); | 515 | input->unreadCh(char_to_unread); |
| 515 | } | 516 | } |
| 516 | 517 | ||
| 518 | + input->setLastOffset(offset); | ||
| 519 | + | ||
| 517 | if (token.getType() == tt_bad) | 520 | if (token.getType() == tt_bad) |
| 518 | { | 521 | { |
| 519 | - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 520 | - context, offset, token.getErrorMessage()); | 522 | + if (allow_bad) |
| 523 | + { | ||
| 524 | +// QTC::TC("qpdf", "QPDFTokenizer allowing bad token"); | ||
| 525 | + } | ||
| 526 | + else | ||
| 527 | + { | ||
| 528 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 529 | + context, offset, token.getErrorMessage()); | ||
| 530 | + } | ||
| 521 | } | 531 | } |
| 522 | 532 | ||
| 523 | - input->setLastOffset(offset); | ||
| 524 | - | ||
| 525 | return token; | 533 | return token; |
| 526 | } | 534 | } |
qpdf/qtest/qpdf.test
| @@ -232,7 +232,7 @@ foreach my $d (@bug_tests) | @@ -232,7 +232,7 @@ foreach my $d (@bug_tests) | ||
| 232 | show_ntests(); | 232 | show_ntests(); |
| 233 | # ---------- | 233 | # ---------- |
| 234 | $td->notify("--- Miscellaneous Tests ---"); | 234 | $td->notify("--- Miscellaneous Tests ---"); |
| 235 | -$n_tests += 85; | 235 | +$n_tests += 86; |
| 236 | 236 | ||
| 237 | $td->runtest("qpdf version", | 237 | $td->runtest("qpdf version", |
| 238 | {$td->COMMAND => "qpdf --version"}, | 238 | {$td->COMMAND => "qpdf --version"}, |
| @@ -662,6 +662,13 @@ $td->runtest("combine show and --pages", | @@ -662,6 +662,13 @@ $td->runtest("combine show and --pages", | ||
| 662 | $td->EXIT_STATUS => 0}, | 662 | $td->EXIT_STATUS => 0}, |
| 663 | $td->NORMALIZE_NEWLINES); | 663 | $td->NORMALIZE_NEWLINES); |
| 664 | 664 | ||
| 665 | +$td->runtest("ignore bad token", | ||
| 666 | + {$td->COMMAND => | ||
| 667 | + "qpdf --show-xref bad-token-startxref.pdf"}, | ||
| 668 | + {$td->FILE => "bad-token-startxref.out", | ||
| 669 | + $td->EXIT_STATUS => 0}, | ||
| 670 | + $td->NORMALIZE_NEWLINES); | ||
| 671 | + | ||
| 665 | show_ntests(); | 672 | show_ntests(); |
| 666 | # ---------- | 673 | # ---------- |
| 667 | $td->notify("--- Single Page ---"); | 674 | $td->notify("--- Single Page ---"); |
qpdf/qtest/qpdf/bad-token-startxref.out
0 → 100644
qpdf/qtest/qpdf/bad-token-startxref.pdf
0 → 100644
| 1 | +%PDF-1.3 | ||
| 2 | +1 0 obj | ||
| 3 | +<< | ||
| 4 | + /Type /Catalog | ||
| 5 | + /Pages 2 0 R | ||
| 6 | +>> | ||
| 7 | +endobj | ||
| 8 | + | ||
| 9 | +2 0 obj | ||
| 10 | +<< | ||
| 11 | + /Type /Pages | ||
| 12 | + /Kids [ | ||
| 13 | + 3 0 R | ||
| 14 | + ] | ||
| 15 | + /Count 1 | ||
| 16 | +>> | ||
| 17 | +endobj | ||
| 18 | + | ||
| 19 | +3 0 obj | ||
| 20 | +<< | ||
| 21 | + /Type /Page | ||
| 22 | + /Parent 2 0 R | ||
| 23 | + /MediaBox [0 0 612 792] | ||
| 24 | + /Contents 4 0 R | ||
| 25 | + /Resources << | ||
| 26 | + /ProcSet 5 0 R | ||
| 27 | + /Font << | ||
| 28 | + /F1 6 0 R | ||
| 29 | + >> | ||
| 30 | + >> | ||
| 31 | +>> | ||
| 32 | +endobj | ||
| 33 | + | ||
| 34 | +4 0 obj | ||
| 35 | +<< | ||
| 36 | + /Length 44 | ||
| 37 | +>> | ||
| 38 | +stream | ||
| 39 | +BT | ||
| 40 | + /F1 24 Tf | ||
| 41 | + 72 720 Td | ||
| 42 | + (Potato) Tj | ||
| 43 | +ET | ||
| 44 | +endstream | ||
| 45 | +endobj | ||
| 46 | + | ||
| 47 | +5 0 obj | ||
| 48 | +[ | ||
| 49 | |||
| 50 | + /Text | ||
| 51 | +] | ||
| 52 | +endobj | ||
| 53 | + | ||
| 54 | +6 0 obj | ||
| 55 | +<< | ||
| 56 | + /Type /Font | ||
| 57 | + /Subtype /Type1 | ||
| 58 | + /Name /F1 | ||
| 59 | + /BaseFont /Helvetica | ||
| 60 | + /Encoding /WinAnsiEncoding | ||
| 61 | +>> | ||
| 62 | +endobj | ||
| 63 | + | ||
| 64 | +xref | ||
| 65 | +0 7 | ||
| 66 | +0000000000 65535 f | ||
| 67 | +0000000009 00000 n | ||
| 68 | +0000000063 00000 n | ||
| 69 | +0000000135 00000 n | ||
| 70 | +0000000307 00000 n | ||
| 71 | +0000000403 00000 n | ||
| 72 | +0000000438 00000 n | ||
| 73 | +trailer << | ||
| 74 | + /Size 7 | ||
| 75 | + /Root 1 0 R | ||
| 76 | +>> | ||
| 77 | +startxref( | ||
| 78 | +startxref | ||
| 79 | +556 | ||
| 80 | +%%EOF |