Commit e9a319fb9536347aeab076cdb18e1ff97eb66c07

Authored by Jay Berkenbilt
1 parent 7393a038

Allow arbitrary whitespace, not just newline, after xref

Fixes #27.
ChangeLog
  1 +2013-12-14 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Allow anyspace rather than just newline to follow xref header.
  4 + This allows qpdf to read a wider range of damaged files.
  5 +
1 6 2013-11-29 Jay Berkenbilt <ejb@ql.org>
2 7  
3 8 * If NO_GET_ENVIRONMENT is #defined, for Windows only,
... ...
libqpdf/QPDF.cc
... ... @@ -487,7 +487,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
487 487 qpdf_offset_t
488 488 QPDF::read_xrefTable(qpdf_offset_t xref_offset)
489 489 {
490   - PCRE xref_first_re("^\\s*(\\d+)\\s+(\\d+)");
  490 + PCRE xref_first_re("^\\s*(\\d+)\\s+(\\d+)\\s*");
491 491 PCRE xref_entry_re("(?s:(^\\d{10}) (\\d{5}) ([fn])[ \r\n]{2}$)");
492 492  
493 493 std::vector<QPDFObjGen> deleted_items;
... ... @@ -496,7 +496,10 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
496 496 bool done = false;
497 497 while (! done)
498 498 {
499   - std::string line = this->file->readLine(50);
  499 + char linebuf[51];
  500 + memset(linebuf, 0, sizeof(linebuf));
  501 + this->file->read(linebuf, sizeof(linebuf) - 1);
  502 + std::string line = linebuf;
500 503 PCRE::Match m1 = xref_first_re.match(line.c_str());
501 504 if (! m1)
502 505 {
... ... @@ -505,6 +508,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
505 508 "xref table", this->file->getLastOffset(),
506 509 "xref syntax invalid");
507 510 }
  511 + file->seek(this->file->getLastOffset() + m1.getMatch(0).length(),
  512 + SEEK_SET);
508 513 int obj = atoi(m1.getMatch(1).c_str());
509 514 int num = atoi(m1.getMatch(2).c_str());
510 515 static int const xref_entry_size = 20;
... ...
qpdf/qtest/qpdf.test
... ... @@ -464,6 +464,7 @@ $td-&gt;runtest(&quot;object with zero offset&quot;,
464 464 {$td->COMMAND => "qpdf --check zero-offset.pdf"},
465 465 {$td->FILE => "zero-offset.out", $td->EXIT_STATUS => 3},
466 466 $td->NORMALIZE_NEWLINES);
  467 +# leading-junk also has a space instead of a newline after xref
467 468 $td->runtest("check file with leading junk",
468 469 {$td->COMMAND => "qpdf --check leading-junk.pdf"},
469 470 {$td->FILE => "leading-junk.out", $td->EXIT_STATUS => 0},
... ...
qpdf/qtest/qpdf/leading-junk.pdf
No preview for this file type