Commit e9a319fb9536347aeab076cdb18e1ff97eb66c07

Authored by Jay Berkenbilt
1 parent 7393a038

Allow arbitrary whitespace, not just newline, after xref

Fixes #27.
ChangeLog
  1 +2013-12-14 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Allow anyspace rather than just newline to follow xref header.
  4 + This allows qpdf to read a wider range of damaged files.
  5 +
1 2013-11-29 Jay Berkenbilt <ejb@ql.org> 6 2013-11-29 Jay Berkenbilt <ejb@ql.org>
2 7
3 * If NO_GET_ENVIRONMENT is #defined, for Windows only, 8 * If NO_GET_ENVIRONMENT is #defined, for Windows only,
libqpdf/QPDF.cc
@@ -487,7 +487,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) @@ -487,7 +487,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
487 qpdf_offset_t 487 qpdf_offset_t
488 QPDF::read_xrefTable(qpdf_offset_t xref_offset) 488 QPDF::read_xrefTable(qpdf_offset_t xref_offset)
489 { 489 {
490 - PCRE xref_first_re("^\\s*(\\d+)\\s+(\\d+)"); 490 + PCRE xref_first_re("^\\s*(\\d+)\\s+(\\d+)\\s*");
491 PCRE xref_entry_re("(?s:(^\\d{10}) (\\d{5}) ([fn])[ \r\n]{2}$)"); 491 PCRE xref_entry_re("(?s:(^\\d{10}) (\\d{5}) ([fn])[ \r\n]{2}$)");
492 492
493 std::vector<QPDFObjGen> deleted_items; 493 std::vector<QPDFObjGen> deleted_items;
@@ -496,7 +496,10 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) @@ -496,7 +496,10 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
496 bool done = false; 496 bool done = false;
497 while (! done) 497 while (! done)
498 { 498 {
499 - std::string line = this->file->readLine(50); 499 + char linebuf[51];
  500 + memset(linebuf, 0, sizeof(linebuf));
  501 + this->file->read(linebuf, sizeof(linebuf) - 1);
  502 + std::string line = linebuf;
500 PCRE::Match m1 = xref_first_re.match(line.c_str()); 503 PCRE::Match m1 = xref_first_re.match(line.c_str());
501 if (! m1) 504 if (! m1)
502 { 505 {
@@ -505,6 +508,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) @@ -505,6 +508,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
505 "xref table", this->file->getLastOffset(), 508 "xref table", this->file->getLastOffset(),
506 "xref syntax invalid"); 509 "xref syntax invalid");
507 } 510 }
  511 + file->seek(this->file->getLastOffset() + m1.getMatch(0).length(),
  512 + SEEK_SET);
508 int obj = atoi(m1.getMatch(1).c_str()); 513 int obj = atoi(m1.getMatch(1).c_str());
509 int num = atoi(m1.getMatch(2).c_str()); 514 int num = atoi(m1.getMatch(2).c_str());
510 static int const xref_entry_size = 20; 515 static int const xref_entry_size = 20;
qpdf/qtest/qpdf.test
@@ -464,6 +464,7 @@ $td-&gt;runtest(&quot;object with zero offset&quot;, @@ -464,6 +464,7 @@ $td-&gt;runtest(&quot;object with zero offset&quot;,
464 {$td->COMMAND => "qpdf --check zero-offset.pdf"}, 464 {$td->COMMAND => "qpdf --check zero-offset.pdf"},
465 {$td->FILE => "zero-offset.out", $td->EXIT_STATUS => 3}, 465 {$td->FILE => "zero-offset.out", $td->EXIT_STATUS => 3},
466 $td->NORMALIZE_NEWLINES); 466 $td->NORMALIZE_NEWLINES);
  467 +# leading-junk also has a space instead of a newline after xref
467 $td->runtest("check file with leading junk", 468 $td->runtest("check file with leading junk",
468 {$td->COMMAND => "qpdf --check leading-junk.pdf"}, 469 {$td->COMMAND => "qpdf --check leading-junk.pdf"},
469 {$td->FILE => "leading-junk.out", $td->EXIT_STATUS => 0}, 470 {$td->FILE => "leading-junk.out", $td->EXIT_STATUS => 0},
qpdf/qtest/qpdf/leading-junk.pdf
No preview for this file type