Commit ef8ae5449dc30782451beba64fdd0af86e1cb931

Authored by Jay Berkenbilt
1 parent 8320d16c

Allow QPDFTokenizer::readToken to return bad tokens

Sometimes we want to ignore bad tokens rather than having them throw
an exception. A coverage case is commented out here and added in a
later commit.
include/qpdf/QPDF.hh
@@ -666,7 +666,8 @@ class QPDF @@ -666,7 +666,8 @@ class QPDF
666 size_t recoverStreamLength( 666 size_t recoverStreamLength(
667 PointerHolder<InputSource> input, int objid, int generation, 667 PointerHolder<InputSource> input, int objid, int generation,
668 qpdf_offset_t stream_offset); 668 qpdf_offset_t stream_offset);
669 - QPDFTokenizer::Token readToken(PointerHolder<InputSource>); 669 + QPDFTokenizer::Token readToken(PointerHolder<InputSource>,
  670 + bool allow_bad = false);
670 671
671 QPDFObjectHandle readObjectAtOffset( 672 QPDFObjectHandle readObjectAtOffset(
672 bool attempt_recovery, 673 bool attempt_recovery,
include/qpdf/QPDFTokenizer.hh
@@ -138,7 +138,8 @@ class QPDFTokenizer @@ -138,7 +138,8 @@ class QPDFTokenizer
138 // exception thrown if there is an error. 138 // exception thrown if there is an error.
139 QPDF_DLL 139 QPDF_DLL
140 Token readToken(PointerHolder<InputSource> input, 140 Token readToken(PointerHolder<InputSource> input,
141 - std::string const& context); 141 + std::string const& context,
  142 + bool allow_bad = false);
142 143
143 private: 144 private:
144 void reset(); 145 void reset();
libqpdf/QPDF.cc
@@ -1329,9 +1329,10 @@ QPDF::recoverStreamLength(PointerHolder&lt;InputSource&gt; input, @@ -1329,9 +1329,10 @@ QPDF::recoverStreamLength(PointerHolder&lt;InputSource&gt; input,
1329 } 1329 }
1330 1330
1331 QPDFTokenizer::Token 1331 QPDFTokenizer::Token
1332 -QPDF::readToken(PointerHolder<InputSource> input) 1332 +QPDF::readToken(PointerHolder<InputSource> input, bool allow_bad)
1333 { 1333 {
1334 - return this->tokenizer.readToken(input, this->last_object_description); 1334 + return this->tokenizer.readToken(
  1335 + input, this->last_object_description, allow_bad);
1335 } 1336 }
1336 1337
1337 QPDFObjectHandle 1338 QPDFObjectHandle
libqpdf/QPDFTokenizer.cc
@@ -475,7 +475,8 @@ QPDFTokenizer::betweenTokens() @@ -475,7 +475,8 @@ QPDFTokenizer::betweenTokens()
475 475
476 QPDFTokenizer::Token 476 QPDFTokenizer::Token
477 QPDFTokenizer::readToken(PointerHolder<InputSource> input, 477 QPDFTokenizer::readToken(PointerHolder<InputSource> input,
478 - std::string const& context) 478 + std::string const& context,
  479 + bool allow_bad)
479 { 480 {
480 qpdf_offset_t offset = input->tell(); 481 qpdf_offset_t offset = input->tell();
481 Token token; 482 Token token;
@@ -514,13 +515,20 @@ QPDFTokenizer::readToken(PointerHolder&lt;InputSource&gt; input, @@ -514,13 +515,20 @@ QPDFTokenizer::readToken(PointerHolder&lt;InputSource&gt; input,
514 input->unreadCh(char_to_unread); 515 input->unreadCh(char_to_unread);
515 } 516 }
516 517
  518 + input->setLastOffset(offset);
  519 +
517 if (token.getType() == tt_bad) 520 if (token.getType() == tt_bad)
518 { 521 {
519 - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),  
520 - context, offset, token.getErrorMessage()); 522 + if (allow_bad)
  523 + {
  524 +// QTC::TC("qpdf", "QPDFTokenizer allowing bad token");
  525 + }
  526 + else
  527 + {
  528 + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
  529 + context, offset, token.getErrorMessage());
  530 + }
521 } 531 }
522 532
523 - input->setLastOffset(offset);  
524 -  
525 return token; 533 return token;
526 } 534 }
qpdf/qtest/qpdf.test
@@ -232,7 +232,7 @@ foreach my $d (@bug_tests) @@ -232,7 +232,7 @@ foreach my $d (@bug_tests)
232 show_ntests(); 232 show_ntests();
233 # ---------- 233 # ----------
234 $td->notify("--- Miscellaneous Tests ---"); 234 $td->notify("--- Miscellaneous Tests ---");
235 -$n_tests += 85; 235 +$n_tests += 86;
236 236
237 $td->runtest("qpdf version", 237 $td->runtest("qpdf version",
238 {$td->COMMAND => "qpdf --version"}, 238 {$td->COMMAND => "qpdf --version"},
@@ -662,6 +662,13 @@ $td-&gt;runtest(&quot;combine show and --pages&quot;, @@ -662,6 +662,13 @@ $td-&gt;runtest(&quot;combine show and --pages&quot;,
662 $td->EXIT_STATUS => 0}, 662 $td->EXIT_STATUS => 0},
663 $td->NORMALIZE_NEWLINES); 663 $td->NORMALIZE_NEWLINES);
664 664
  665 +$td->runtest("ignore bad token",
  666 + {$td->COMMAND =>
  667 + "qpdf --show-xref bad-token-startxref.pdf"},
  668 + {$td->FILE => "bad-token-startxref.out",
  669 + $td->EXIT_STATUS => 0},
  670 + $td->NORMALIZE_NEWLINES);
  671 +
665 show_ntests(); 672 show_ntests();
666 # ---------- 673 # ----------
667 $td->notify("--- Single Page ---"); 674 $td->notify("--- Single Page ---");
qpdf/qtest/qpdf/bad-token-startxref.out 0 → 100644
  1 +1/0: uncompressed; offset = 9
  2 +2/0: uncompressed; offset = 63
  3 +3/0: uncompressed; offset = 135
  4 +4/0: uncompressed; offset = 307
  5 +5/0: uncompressed; offset = 403
  6 +6/0: uncompressed; offset = 438
qpdf/qtest/qpdf/bad-token-startxref.pdf 0 → 100644
  1 +%PDF-1.3
  2 +1 0 obj
  3 +<<
  4 + /Type /Catalog
  5 + /Pages 2 0 R
  6 +>>
  7 +endobj
  8 +
  9 +2 0 obj
  10 +<<
  11 + /Type /Pages
  12 + /Kids [
  13 + 3 0 R
  14 + ]
  15 + /Count 1
  16 +>>
  17 +endobj
  18 +
  19 +3 0 obj
  20 +<<
  21 + /Type /Page
  22 + /Parent 2 0 R
  23 + /MediaBox [0 0 612 792]
  24 + /Contents 4 0 R
  25 + /Resources <<
  26 + /ProcSet 5 0 R
  27 + /Font <<
  28 + /F1 6 0 R
  29 + >>
  30 + >>
  31 +>>
  32 +endobj
  33 +
  34 +4 0 obj
  35 +<<
  36 + /Length 44
  37 +>>
  38 +stream
  39 +BT
  40 + /F1 24 Tf
  41 + 72 720 Td
  42 + (Potato) Tj
  43 +ET
  44 +endstream
  45 +endobj
  46 +
  47 +5 0 obj
  48 +[
  49 + /PDF
  50 + /Text
  51 +]
  52 +endobj
  53 +
  54 +6 0 obj
  55 +<<
  56 + /Type /Font
  57 + /Subtype /Type1
  58 + /Name /F1
  59 + /BaseFont /Helvetica
  60 + /Encoding /WinAnsiEncoding
  61 +>>
  62 +endobj
  63 +
  64 +xref
  65 +0 7
  66 +0000000000 65535 f
  67 +0000000009 00000 n
  68 +0000000063 00000 n
  69 +0000000135 00000 n
  70 +0000000307 00000 n
  71 +0000000403 00000 n
  72 +0000000438 00000 n
  73 +trailer <<
  74 + /Size 7
  75 + /Root 1 0 R
  76 +>>
  77 +startxref(
  78 +startxref
  79 +556
  80 +%%EOF