Commit ae5bd7102da5d4b456f08790a0efc04c1c42b4a5

Authored by Jay Berkenbilt
1 parent 8a9086a6

Accept extraneous space before xref (fixes #341)

ChangeLog
1 2019-08-19 Jay Berkenbilt <ejb@ql.org> 1 2019-08-19 Jay Berkenbilt <ejb@ql.org>
2 2
  3 + * Accept (and warn for) extraneous whitespace preceding the xref
  4 + table. Fixes #341.
  5 +
3 * Accept (and warn for) extraneous whitespace between the stream 6 * Accept (and warn for) extraneous whitespace between the stream
4 keyword and newline. Fixes #329. 7 keyword and newline. Fixes #329.
5 8
libqpdf/QPDF.cc
@@ -570,6 +570,37 @@ QPDF::read_xref(qpdf_offset_t xref_offset) @@ -570,6 +570,37 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
570 char buf[7]; 570 char buf[7];
571 memset(buf, 0, sizeof(buf)); 571 memset(buf, 0, sizeof(buf));
572 this->m->file->seek(xref_offset, SEEK_SET); 572 this->m->file->seek(xref_offset, SEEK_SET);
  573 + // Some files miss the mark a little with startxref. We could
  574 + // do a better job of searching in the neighborhood for
  575 + // something that looks like either an xref table or stream,
  576 + // but the simple heuristic of skipping whitespace can help
  577 + // with the xref table case and is harmless with the stream
  578 + // case.
  579 + bool done = false;
  580 + bool skipped_space = false;
  581 + while (! done)
  582 + {
  583 + char ch;
  584 + if (1 == this->m->file->read(&ch, 1))
  585 + {
  586 + if (QUtil::is_space(ch))
  587 + {
  588 + skipped_space = true;
  589 + }
  590 + else
  591 + {
  592 + this->m->file->unreadCh(ch);
  593 + done = true;
  594 + }
  595 + }
  596 + else
  597 + {
  598 + QTC::TC("qpdf", "QPDF eof skipping spaces before xref",
  599 + skipped_space ? 0 : 1);
  600 + done = true;
  601 + }
  602 + }
  603 +
573 this->m->file->read(buf, sizeof(buf) - 1); 604 this->m->file->read(buf, sizeof(buf) - 1);
574 // The PDF spec says xref must be followed by a line 605 // The PDF spec says xref must be followed by a line
575 // terminator, but files exist in the wild where it is 606 // terminator, but files exist in the wild where it is
@@ -577,6 +608,13 @@ QPDF::read_xref(qpdf_offset_t xref_offset) @@ -577,6 +608,13 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
577 if ((strncmp(buf, "xref", 4) == 0) && 608 if ((strncmp(buf, "xref", 4) == 0) &&
578 QUtil::is_space(buf[4])) 609 QUtil::is_space(buf[4]))
579 { 610 {
  611 + if (skipped_space)
  612 + {
  613 + QTC::TC("qpdf", "QPDF xref skipped space");
  614 + warn(QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),
  615 + "", 0,
  616 + "extraneous whitespace seen before xref"));
  617 + }
580 QTC::TC("qpdf", "QPDF xref space", 618 QTC::TC("qpdf", "QPDF xref space",
581 ((buf[4] == '\n') ? 0 : 619 ((buf[4] == '\n') ? 0 :
582 (buf[4] == '\r') ? 1 : 620 (buf[4] == '\r') ? 1 :
qpdf/qpdf.testcov
@@ -441,3 +441,5 @@ QPDFObjectHandle int returning INT_MIN 0 @@ -441,3 +441,5 @@ QPDFObjectHandle int returning INT_MIN 0
441 QPDFObjectHandle int returning INT_MAX 0 441 QPDFObjectHandle int returning INT_MAX 0
442 QPDFObjectHandle uint returning UINT_MAX 0 442 QPDFObjectHandle uint returning UINT_MAX 0
443 QPDFObjectHandle uint uint returning 0 0 443 QPDFObjectHandle uint uint returning 0 0
  444 +QPDF xref skipped space 0
  445 +QPDF eof skipping spaces before xref 1
qpdf/qtest/qpdf.test
@@ -2291,6 +2291,8 @@ my @badfiles = (&quot;not a PDF file&quot;, # 1 @@ -2291,6 +2291,8 @@ my @badfiles = (&quot;not a PDF file&quot;, # 1
2291 "obj/gen in wrong place", # 34 2291 "obj/gen in wrong place", # 34
2292 "object stream of wrong type", # 35 2292 "object stream of wrong type", # 35
2293 "bad dictionary key", # 36 2293 "bad dictionary key", # 36
  2294 + "space before xref", # 37
  2295 + "startxref to space then eof", # 38
2294 ); 2296 );
2295 2297
2296 $n_tests += @badfiles + 6; 2298 $n_tests += @badfiles + 6;
@@ -2301,7 +2303,7 @@ $n_tests += @badfiles + 6; @@ -2301,7 +2303,7 @@ $n_tests += @badfiles + 6;
2301 # have error conditions that used to be fatal but are now considered 2303 # have error conditions that used to be fatal but are now considered
2302 # non-fatal. 2304 # non-fatal.
2303 my %badtest_overrides = (); 2305 my %badtest_overrides = ();
2304 -for(6, 12..15, 17, 18..32, 34, 36) 2306 +for(6, 12..15, 17, 18..32, 34, 36..37)
2305 { 2307 {
2306 $badtest_overrides{$_} = 0; 2308 $badtest_overrides{$_} = 0;
2307 } 2309 }
qpdf/qtest/qpdf/bad37-recover.out 0 → 100644
  1 +WARNING: bad37.pdf: extraneous whitespace seen before xref
  2 +/QTest is direct and has type string (6)
  3 +/QTest is a string with value potato
  4 +unparse: (potato)
  5 +unparseResolved: (potato)
  6 +test 1 done
qpdf/qtest/qpdf/bad37.out 0 → 100644
  1 +WARNING: bad37.pdf: extraneous whitespace seen before xref
  2 +/QTest is direct and has type string (6)
  3 +/QTest is a string with value potato
  4 +unparse: (potato)
  5 +unparseResolved: (potato)
  6 +test 0 done
qpdf/qtest/qpdf/bad37.pdf 0 → 100644
  1 +%PDF-1.3
  2 +1 0 obj
  3 +<<
  4 + /Type /Catalog
  5 + /Pages 2 0 R
  6 +>>
  7 +endobj
  8 +
  9 +2 0 obj
  10 +<<
  11 + /Type /Pages
  12 + /Kids [
  13 + 3 0 R
  14 + ]
  15 + /Count 1
  16 +>>
  17 +endobj
  18 +
  19 +3 0 obj
  20 +<<
  21 + /Type /Page
  22 + /Parent 2 0 R
  23 + /MediaBox [0 0 612 792]
  24 + /Contents 4 0 R
  25 + /Resources <<
  26 + /ProcSet 5 0 R
  27 + /Font <<
  28 + /F1 6 0 R
  29 + >>
  30 + >>
  31 +>>
  32 +endobj
  33 +
  34 +4 0 obj
  35 +<<
  36 + /Length 44
  37 +>>
  38 +stream
  39 +BT
  40 + /F1 24 Tf
  41 + 72 720 Td
  42 + (Potato) Tj
  43 +ET
  44 +endstream
  45 +endobj
  46 +
  47 +5 0 obj
  48 +[
  49 + /PDF
  50 + /Text
  51 +]
  52 +endobj
  53 +
  54 +6 0 obj
  55 +<<
  56 + /Type /Font
  57 + /Subtype /Type1
  58 + /Name /F1
  59 + /BaseFont /Helvetica
  60 + /Encoding /WinAnsiEncoding
  61 +>>
  62 +endobj
  63 +
  64 +xref
  65 +0 7
  66 +0000000000 65535 f
  67 +0000000009 00000 n
  68 +0000000063 00000 n
  69 +0000000135 00000 n
  70 +0000000307 00000 n
  71 +0000000403 00000 n
  72 +0000000438 00000 n
  73 +trailer <<
  74 + /Size 7
  75 + /Root 1 0 R
  76 + /QTest (potato)
  77 +>>
  78 +startxref
  79 +555
  80 +%%EOF
qpdf/qtest/qpdf/bad38-recover.out 0 → 100644
  1 +WARNING: bad38.pdf: file is damaged
  2 +WARNING: bad38.pdf (offset 781): xref not found
  3 +WARNING: bad38.pdf: Attempting to reconstruct cross-reference table
  4 +/QTest is direct and has type string (6)
  5 +/QTest is a string with value potato
  6 +unparse: (potato)
  7 +unparseResolved: (potato)
  8 +test 1 done
qpdf/qtest/qpdf/bad38.out 0 → 100644
  1 +bad38.pdf (offset 781): xref not found
qpdf/qtest/qpdf/bad38.pdf 0 → 100644
  1 +%PDF-1.3
  2 +1 0 obj
  3 +<<
  4 + /Type /Catalog
  5 + /Pages 2 0 R
  6 +>>
  7 +endobj
  8 +
  9 +2 0 obj
  10 +<<
  11 + /Type /Pages
  12 + /Kids [
  13 + 3 0 R
  14 + ]
  15 + /Count 1
  16 +>>
  17 +endobj
  18 +
  19 +3 0 obj
  20 +<<
  21 + /Type /Page
  22 + /Parent 2 0 R
  23 + /MediaBox [0 0 612 792]
  24 + /Contents 4 0 R
  25 + /Resources <<
  26 + /ProcSet 5 0 R
  27 + /Font <<
  28 + /F1 6 0 R
  29 + >>
  30 + >>
  31 +>>
  32 +endobj
  33 +
  34 +4 0 obj
  35 +<<
  36 + /Length 44
  37 +>>
  38 +stream
  39 +BT
  40 + /F1 24 Tf
  41 + 72 720 Td
  42 + (Potato) Tj
  43 +ET
  44 +endstream
  45 +endobj
  46 +
  47 +5 0 obj
  48 +[
  49 + /PDF
  50 + /Text
  51 +]
  52 +endobj
  53 +
  54 +6 0 obj
  55 +<<
  56 + /Type /Font
  57 + /Subtype /Type1
  58 + /Name /F1
  59 + /BaseFont /Helvetica
  60 + /Encoding /WinAnsiEncoding
  61 +>>
  62 +endobj
  63 +
  64 +xref
  65 +0 7
  66 +0000000000 65535 f
  67 +0000000009 00000 n
  68 +0000000063 00000 n
  69 +0000000135 00000 n
  70 +0000000307 00000 n
  71 +0000000403 00000 n
  72 +0000000438 00000 n
  73 +trailer <<
  74 + /Size 7
  75 + /Root 1 0 R
  76 + /QTest (potato)
  77 +>>
  78 +startxref
  79 +781
  80 +%%EOF
  81 +