Commit a85007cb0d9bb8af8f0a32bda3ace19aaff97816
1 parent
a1d5a3e9
Handle more broken files
Space rather than newline after xref, missing /ID in trailer for encrypted file. This enables qpdf to handle some files that xpdf can handle. Adobe reader can't necessarily handle them.
Showing
7 changed files
with
59 additions
and
11 deletions
ChangeLog
| 1 | +2013-06-15 Jay Berkenbilt <ejb@ql.org> | |
| 2 | + | |
| 3 | + * Handle some additional broken files with missing /ID in trailer | |
| 4 | + for encrypted files and with space rather than newline after xref. | |
| 5 | + | |
| 1 | 6 | 2013-06-14 Jay Berkenbilt <ejb@ql.org> |
| 2 | 7 | |
| 3 | 8 | * Detect and correct /Outlines dictionary being a direct object | ... | ... |
libqpdf/QPDF.cc
| ... | ... | @@ -430,11 +430,22 @@ QPDF::read_xref(qpdf_offset_t xref_offset) |
| 430 | 430 | std::map<int, int> free_table; |
| 431 | 431 | while (xref_offset) |
| 432 | 432 | { |
| 433 | + char buf[7]; | |
| 434 | + memset(buf, 0, sizeof(buf)); | |
| 433 | 435 | this->file->seek(xref_offset, SEEK_SET); |
| 434 | - std::string line = this->file->readLine(50); | |
| 435 | - if (line == "xref") | |
| 436 | + this->file->read(buf, sizeof(buf) - 1); | |
| 437 | + // The PDF spec says xref must be followed by a line | |
| 438 | + // terminator, but files exist in the wild where it is | |
| 439 | + // terminated by arbitrary whitespace. | |
| 440 | + PCRE xref_re("^xref\\s+"); | |
| 441 | + PCRE::Match m = xref_re.match(buf); | |
| 442 | + if (m) | |
| 436 | 443 | { |
| 437 | - xref_offset = read_xrefTable(this->file->tell()); | |
| 444 | + QTC::TC("qpdf", "QPDF xref space", | |
| 445 | + ((buf[4] == '\n') ? 0 : | |
| 446 | + (buf[4] == '\r') ? 1 : | |
| 447 | + (buf[4] == ' ') ? 2 : 9999)); | |
| 448 | + xref_offset = read_xrefTable(xref_offset + m.getMatch(0).length()); | |
| 438 | 449 | } |
| 439 | 450 | else |
| 440 | 451 | { | ... | ... |
libqpdf/QPDF_encryption.cc
| ... | ... | @@ -791,17 +791,24 @@ QPDF::initializeEncryption() |
| 791 | 791 | // encryption dictionary. |
| 792 | 792 | this->encrypted = true; |
| 793 | 793 | |
| 794 | + std::string id1; | |
| 794 | 795 | QPDFObjectHandle id_obj = this->trailer.getKey("/ID"); |
| 795 | - if (! (id_obj.isArray() && | |
| 796 | - (id_obj.getArrayNItems() == 2) && | |
| 797 | - id_obj.getArrayItem(0).isString())) | |
| 796 | + if ((id_obj.isArray() && | |
| 797 | + (id_obj.getArrayNItems() == 2) && | |
| 798 | + id_obj.getArrayItem(0).isString())) | |
| 798 | 799 | { |
| 799 | - throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), | |
| 800 | - "trailer", this->file->getLastOffset(), | |
| 801 | - "invalid /ID in trailer dictionary"); | |
| 800 | + id1 = id_obj.getArrayItem(0).getStringValue(); | |
| 801 | + } | |
| 802 | + else | |
| 803 | + { | |
| 804 | + // Treating a missing ID as the empty string enables qpdf to | |
| 805 | + // decrypt some invalid encrypted files with no /ID that | |
| 806 | + // poppler can read but Adobe Reader can't. | |
| 807 | + warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), | |
| 808 | + "trailer", this->file->getLastOffset(), | |
| 809 | + "invalid /ID in trailer dictionary")); | |
| 802 | 810 | } |
| 803 | 811 | |
| 804 | - std::string id1 = id_obj.getArrayItem(0).getStringValue(); | |
| 805 | 812 | QPDFObjectHandle encryption_dict = this->trailer.getKey("/Encrypt"); |
| 806 | 813 | if (! encryption_dict.isDictionary()) |
| 807 | 814 | { | ... | ... |
qpdf/qpdf.testcov
qpdf/qtest/qpdf.test
| ... | ... | @@ -199,7 +199,7 @@ $td->runtest("remove page we don't have", |
| 199 | 199 | show_ntests(); |
| 200 | 200 | # ---------- |
| 201 | 201 | $td->notify("--- Miscellaneous Tests ---"); |
| 202 | -$n_tests += 64; | |
| 202 | +$n_tests += 65; | |
| 203 | 203 | |
| 204 | 204 | $td->runtest("qpdf version", |
| 205 | 205 | {$td->COMMAND => "qpdf --version"}, |
| ... | ... | @@ -509,6 +509,14 @@ $td->runtest("check file", |
| 509 | 509 | {$td->FILE => "a.pdf"}, |
| 510 | 510 | {$td->FILE => "gen1.qdf"}); |
| 511 | 511 | |
| 512 | +# This file, from a user, is missing /ID in its trailer even though it | |
| 513 | +# is encrypted and also has a space instead of a newline after its | |
| 514 | +# xref keyword. xpdf can open it, but Adobe reader can't. | |
| 515 | +$td->runtest("check broken file", | |
| 516 | + {$td->COMMAND => "qpdf --check invalid-id-xref.pdf"}, | |
| 517 | + {$td->FILE => "invalid-id-xref.out", $td->EXIT_STATUS => 3}, | |
| 518 | + $td->NORMALIZE_NEWLINES); | |
| 519 | + | |
| 512 | 520 | show_ntests(); |
| 513 | 521 | # ---------- |
| 514 | 522 | $td->notify("--- Numeric range parsing tests ---"); | ... | ... |
qpdf/qtest/qpdf/invalid-id-xref.out
0 → 100644
| 1 | +WARNING: invalid-id-xref.pdf (trailer, file position 2493795): invalid /ID in trailer dictionary | |
| 2 | +checking invalid-id-xref.pdf | |
| 3 | +PDF Version: 1.1 | |
| 4 | +R = 3 | |
| 5 | +P = -1804 | |
| 6 | +User password = | |
| 7 | +extract for accessibility: not allowed | |
| 8 | +extract for any purpose: allowed | |
| 9 | +print low resolution: allowed | |
| 10 | +print high resolution: allowed | |
| 11 | +modify document assembly: not allowed | |
| 12 | +modify forms: not allowed | |
| 13 | +modify annotations: allowed | |
| 14 | +modify other: not allowed | |
| 15 | +modify anything: not allowed | |
| 16 | +File is not linearized | ... | ... |
qpdf/qtest/qpdf/invalid-id-xref.pdf
0 → 100644
No preview for this file type