Commit e9a319fb9536347aeab076cdb18e1ff97eb66c07
1 parent
7393a038
Allow arbitrary whitespace, not just newline, after xref
Fixes #27.
Showing
4 changed files
with
13 additions
and
2 deletions
ChangeLog
| 1 | +2013-12-14 Jay Berkenbilt <ejb@ql.org> | ||
| 2 | + | ||
| 3 | + * Allow anyspace rather than just newline to follow xref header. | ||
| 4 | + This allows qpdf to read a wider range of damaged files. | ||
| 5 | + | ||
| 1 | 2013-11-29 Jay Berkenbilt <ejb@ql.org> | 6 | 2013-11-29 Jay Berkenbilt <ejb@ql.org> |
| 2 | 7 | ||
| 3 | * If NO_GET_ENVIRONMENT is #defined, for Windows only, | 8 | * If NO_GET_ENVIRONMENT is #defined, for Windows only, |
libqpdf/QPDF.cc
| @@ -487,7 +487,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | @@ -487,7 +487,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | ||
| 487 | qpdf_offset_t | 487 | qpdf_offset_t |
| 488 | QPDF::read_xrefTable(qpdf_offset_t xref_offset) | 488 | QPDF::read_xrefTable(qpdf_offset_t xref_offset) |
| 489 | { | 489 | { |
| 490 | - PCRE xref_first_re("^\\s*(\\d+)\\s+(\\d+)"); | 490 | + PCRE xref_first_re("^\\s*(\\d+)\\s+(\\d+)\\s*"); |
| 491 | PCRE xref_entry_re("(?s:(^\\d{10}) (\\d{5}) ([fn])[ \r\n]{2}$)"); | 491 | PCRE xref_entry_re("(?s:(^\\d{10}) (\\d{5}) ([fn])[ \r\n]{2}$)"); |
| 492 | 492 | ||
| 493 | std::vector<QPDFObjGen> deleted_items; | 493 | std::vector<QPDFObjGen> deleted_items; |
| @@ -496,7 +496,10 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | @@ -496,7 +496,10 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | ||
| 496 | bool done = false; | 496 | bool done = false; |
| 497 | while (! done) | 497 | while (! done) |
| 498 | { | 498 | { |
| 499 | - std::string line = this->file->readLine(50); | 499 | + char linebuf[51]; |
| 500 | + memset(linebuf, 0, sizeof(linebuf)); | ||
| 501 | + this->file->read(linebuf, sizeof(linebuf) - 1); | ||
| 502 | + std::string line = linebuf; | ||
| 500 | PCRE::Match m1 = xref_first_re.match(line.c_str()); | 503 | PCRE::Match m1 = xref_first_re.match(line.c_str()); |
| 501 | if (! m1) | 504 | if (! m1) |
| 502 | { | 505 | { |
| @@ -505,6 +508,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | @@ -505,6 +508,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | ||
| 505 | "xref table", this->file->getLastOffset(), | 508 | "xref table", this->file->getLastOffset(), |
| 506 | "xref syntax invalid"); | 509 | "xref syntax invalid"); |
| 507 | } | 510 | } |
| 511 | + file->seek(this->file->getLastOffset() + m1.getMatch(0).length(), | ||
| 512 | + SEEK_SET); | ||
| 508 | int obj = atoi(m1.getMatch(1).c_str()); | 513 | int obj = atoi(m1.getMatch(1).c_str()); |
| 509 | int num = atoi(m1.getMatch(2).c_str()); | 514 | int num = atoi(m1.getMatch(2).c_str()); |
| 510 | static int const xref_entry_size = 20; | 515 | static int const xref_entry_size = 20; |
qpdf/qtest/qpdf.test
| @@ -464,6 +464,7 @@ $td->runtest("object with zero offset", | @@ -464,6 +464,7 @@ $td->runtest("object with zero offset", | ||
| 464 | {$td->COMMAND => "qpdf --check zero-offset.pdf"}, | 464 | {$td->COMMAND => "qpdf --check zero-offset.pdf"}, |
| 465 | {$td->FILE => "zero-offset.out", $td->EXIT_STATUS => 3}, | 465 | {$td->FILE => "zero-offset.out", $td->EXIT_STATUS => 3}, |
| 466 | $td->NORMALIZE_NEWLINES); | 466 | $td->NORMALIZE_NEWLINES); |
| 467 | +# leading-junk also has a space instead of a newline after xref | ||
| 467 | $td->runtest("check file with leading junk", | 468 | $td->runtest("check file with leading junk", |
| 468 | {$td->COMMAND => "qpdf --check leading-junk.pdf"}, | 469 | {$td->COMMAND => "qpdf --check leading-junk.pdf"}, |
| 469 | {$td->FILE => "leading-junk.out", $td->EXIT_STATUS => 0}, | 470 | {$td->FILE => "leading-junk.out", $td->EXIT_STATUS => 0}, |
qpdf/qtest/qpdf/leading-junk.pdf
No preview for this file type