Commit b62cbe250885484a42ee161798830b1da9520097

Authored by Jay Berkenbilt
1 parent f0b85a1e

Tolerate some mangled xref tables

If xref table entries lack the spec-required trailing whitespace or
contain a small amount of extra space, handle them anyway.
ChangeLog
  1 +2015-10-31 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * libqpdf/QPDF.cc (read_xrefTable): Be tolerant of some malformed
  4 + xref tables that don't have the required trailing space after each
  5 + line.
  6 +
1 2015-10-29 Jay Berkenbilt <ejb@ql.org> 7 2015-10-29 Jay Berkenbilt <ejb@ql.org>
2 8
3 * Implement QPDFWriter::setDeterministicID and --deterministic-id 9 * Implement QPDFWriter::setDeterministicID and --deterministic-id
libqpdf/QPDF.cc
@@ -488,7 +488,7 @@ qpdf_offset_t @@ -488,7 +488,7 @@ qpdf_offset_t
488 QPDF::read_xrefTable(qpdf_offset_t xref_offset) 488 QPDF::read_xrefTable(qpdf_offset_t xref_offset)
489 { 489 {
490 PCRE xref_first_re("^\\s*(\\d+)\\s+(\\d+)\\s*"); 490 PCRE xref_first_re("^\\s*(\\d+)\\s+(\\d+)\\s*");
491 - PCRE xref_entry_re("(?s:(^\\d{10}) (\\d{5}) ([fn])[ \r\n]{2}$)"); 491 + PCRE xref_entry_re("(?s:(^\\d{10}) (\\d{5}) ([fn])\\s*$)");
492 492
493 std::vector<QPDFObjGen> deleted_items; 493 std::vector<QPDFObjGen> deleted_items;
494 494
@@ -512,8 +512,6 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) @@ -512,8 +512,6 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
512 SEEK_SET); 512 SEEK_SET);
513 int obj = atoi(m1.getMatch(1).c_str()); 513 int obj = atoi(m1.getMatch(1).c_str());
514 int num = atoi(m1.getMatch(2).c_str()); 514 int num = atoi(m1.getMatch(2).c_str());
515 - static int const xref_entry_size = 20;  
516 - char xref_entry[xref_entry_size + 1];  
517 for (int i = obj; i < obj + num; ++i) 515 for (int i = obj; i < obj + num; ++i)
518 { 516 {
519 if (i == 0) 517 if (i == 0)
@@ -521,9 +519,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) @@ -521,9 +519,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
521 // This is needed by checkLinearization() 519 // This is needed by checkLinearization()
522 this->first_xref_item_offset = this->file->tell(); 520 this->first_xref_item_offset = this->file->tell();
523 } 521 }
524 - memset(xref_entry, 0, sizeof(xref_entry));  
525 - this->file->read(xref_entry, xref_entry_size);  
526 - PCRE::Match m2 = xref_entry_re.match(xref_entry); 522 + std::string xref_entry = this->file->readLine(30);
  523 + PCRE::Match m2 = xref_entry_re.match(xref_entry.c_str());
527 if (! m2) 524 if (! m2)
528 { 525 {
529 QTC::TC("qpdf", "QPDF invalid xref entry"); 526 QTC::TC("qpdf", "QPDF invalid xref entry");
qpdf/qtest/qpdf.test
@@ -199,7 +199,7 @@ $td-&gt;runtest(&quot;remove page we don&#39;t have&quot;, @@ -199,7 +199,7 @@ $td-&gt;runtest(&quot;remove page we don&#39;t have&quot;,
199 show_ntests(); 199 show_ntests();
200 # ---------- 200 # ----------
201 $td->notify("--- Miscellaneous Tests ---"); 201 $td->notify("--- Miscellaneous Tests ---");
202 -$n_tests += 76; 202 +$n_tests += 77;
203 203
204 $td->runtest("qpdf version", 204 $td->runtest("qpdf version",
205 {$td->COMMAND => "qpdf --version"}, 205 {$td->COMMAND => "qpdf --version"},
@@ -570,6 +570,10 @@ $td-&gt;runtest(&quot;detect loops in pages structure&quot;, @@ -570,6 +570,10 @@ $td-&gt;runtest(&quot;detect loops in pages structure&quot;,
570 {$td->COMMAND => "qpdf --check pages-loop.pdf"}, 570 {$td->COMMAND => "qpdf --check pages-loop.pdf"},
571 {$td->FILE => "pages-loop.out", $td->EXIT_STATUS => 2}, 571 {$td->FILE => "pages-loop.out", $td->EXIT_STATUS => 2},
572 $td->NORMALIZE_NEWLINES); 572 $td->NORMALIZE_NEWLINES);
  573 +$td->runtest("no trailing space in xref table",
  574 + {$td->COMMAND => "qpdf --check no-space-in-xref.pdf"},
  575 + {$td->FILE => "no-space-in-xref.out", $td->EXIT_STATUS => 0},
  576 + $td->NORMALIZE_NEWLINES);
573 577
574 show_ntests(); 578 show_ntests();
575 # ---------- 579 # ----------
qpdf/qtest/qpdf/no-space-in-xref.out 0 → 100644
  1 +checking no-space-in-xref.pdf
  2 +PDF Version: 1.3
  3 +File is not encrypted
  4 +File is not linearized
  5 +No syntax or stream encoding errors found; the file may still contain
  6 +errors that qpdf cannot detect
qpdf/qtest/qpdf/no-space-in-xref.pdf 0 → 100644
  1 +%PDF-1.3
  2 +1 0 obj
  3 +<<
  4 + /Type /Catalog
  5 + /Pages 2 0 R
  6 +>>
  7 +endobj
  8 +
  9 +2 0 obj
  10 +<<
  11 + /Type /Pages
  12 + /Kids [
  13 + 3 0 R
  14 + ]
  15 + /Count 1
  16 +>>
  17 +endobj
  18 +
  19 +3 0 obj
  20 +<<
  21 + /Type /Page
  22 + /Parent 2 0 R
  23 + /MediaBox [0 0 612 792]
  24 + /Contents 4 0 R
  25 + /Resources <<
  26 + /ProcSet 5 0 R
  27 + /Font <<
  28 + /F1 6 0 R
  29 + >>
  30 + >>
  31 +>>
  32 +endobj
  33 +
  34 +4 0 obj
  35 +<<
  36 + /Length 44
  37 +>>
  38 +stream
  39 +BT
  40 + /F1 24 Tf
  41 + 72 720 Td
  42 + (Potato) Tj
  43 +ET
  44 +endstream
  45 +endobj
  46 +
  47 +5 0 obj
  48 +[
  49 + /PDF
  50 + /Text
  51 +]
  52 +endobj
  53 +
  54 +6 0 obj
  55 +<<
  56 + /Type /Font
  57 + /Subtype /Type1
  58 + /Name /F1
  59 + /BaseFont /Helvetica
  60 + /Encoding /WinAnsiEncoding
  61 +>>
  62 +endobj
  63 +
  64 +xref
  65 +0 7
  66 +0000000000 65535 f
  67 +0000000009 00000 n
  68 +0000000063 00000 n
  69 +0000000135 00000 n
  70 +0000000307 00000 n
  71 +0000000403 00000 n
  72 +0000000438 00000 n
  73 +trailer <<
  74 + /Size 7
  75 + /Root 1 0 R
  76 +>>
  77 +startxref
  78 +556
  79 +%%EOF