Commit a85007cb0d9bb8af8f0a32bda3ace19aaff97816
1 parent
a1d5a3e9
Handle more broken files
Space rather than newline after xref, missing /ID in trailer for encrypted file. This enables qpdf to handle some files that xpdf can handle. Adobe reader can't necessarily handle them.
Showing
7 changed files
with
59 additions
and
11 deletions
ChangeLog
| 1 | +2013-06-15 Jay Berkenbilt <ejb@ql.org> | ||
| 2 | + | ||
| 3 | + * Handle some additional broken files with missing /ID in trailer | ||
| 4 | + for encrypted files and with space rather than newline after xref. | ||
| 5 | + | ||
| 1 | 2013-06-14 Jay Berkenbilt <ejb@ql.org> | 6 | 2013-06-14 Jay Berkenbilt <ejb@ql.org> |
| 2 | 7 | ||
| 3 | * Detect and correct /Outlines dictionary being a direct object | 8 | * Detect and correct /Outlines dictionary being a direct object |
libqpdf/QPDF.cc
| @@ -430,11 +430,22 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | @@ -430,11 +430,22 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | ||
| 430 | std::map<int, int> free_table; | 430 | std::map<int, int> free_table; |
| 431 | while (xref_offset) | 431 | while (xref_offset) |
| 432 | { | 432 | { |
| 433 | + char buf[7]; | ||
| 434 | + memset(buf, 0, sizeof(buf)); | ||
| 433 | this->file->seek(xref_offset, SEEK_SET); | 435 | this->file->seek(xref_offset, SEEK_SET); |
| 434 | - std::string line = this->file->readLine(50); | ||
| 435 | - if (line == "xref") | 436 | + this->file->read(buf, sizeof(buf) - 1); |
| 437 | + // The PDF spec says xref must be followed by a line | ||
| 438 | + // terminator, but files exist in the wild where it is | ||
| 439 | + // terminated by arbitrary whitespace. | ||
| 440 | + PCRE xref_re("^xref\\s+"); | ||
| 441 | + PCRE::Match m = xref_re.match(buf); | ||
| 442 | + if (m) | ||
| 436 | { | 443 | { |
| 437 | - xref_offset = read_xrefTable(this->file->tell()); | 444 | + QTC::TC("qpdf", "QPDF xref space", |
| 445 | + ((buf[4] == '\n') ? 0 : | ||
| 446 | + (buf[4] == '\r') ? 1 : | ||
| 447 | + (buf[4] == ' ') ? 2 : 9999)); | ||
| 448 | + xref_offset = read_xrefTable(xref_offset + m.getMatch(0).length()); | ||
| 438 | } | 449 | } |
| 439 | else | 450 | else |
| 440 | { | 451 | { |
libqpdf/QPDF_encryption.cc
| @@ -791,17 +791,24 @@ QPDF::initializeEncryption() | @@ -791,17 +791,24 @@ QPDF::initializeEncryption() | ||
| 791 | // encryption dictionary. | 791 | // encryption dictionary. |
| 792 | this->encrypted = true; | 792 | this->encrypted = true; |
| 793 | 793 | ||
| 794 | + std::string id1; | ||
| 794 | QPDFObjectHandle id_obj = this->trailer.getKey("/ID"); | 795 | QPDFObjectHandle id_obj = this->trailer.getKey("/ID"); |
| 795 | - if (! (id_obj.isArray() && | ||
| 796 | - (id_obj.getArrayNItems() == 2) && | ||
| 797 | - id_obj.getArrayItem(0).isString())) | 796 | + if ((id_obj.isArray() && |
| 797 | + (id_obj.getArrayNItems() == 2) && | ||
| 798 | + id_obj.getArrayItem(0).isString())) | ||
| 798 | { | 799 | { |
| 799 | - throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), | ||
| 800 | - "trailer", this->file->getLastOffset(), | ||
| 801 | - "invalid /ID in trailer dictionary"); | 800 | + id1 = id_obj.getArrayItem(0).getStringValue(); |
| 801 | + } | ||
| 802 | + else | ||
| 803 | + { | ||
| 804 | + // Treating a missing ID as the empty string enables qpdf to | ||
| 805 | + // decrypt some invalid encrypted files with no /ID that | ||
| 806 | + // poppler can read but Adobe Reader can't. | ||
| 807 | + warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), | ||
| 808 | + "trailer", this->file->getLastOffset(), | ||
| 809 | + "invalid /ID in trailer dictionary")); | ||
| 802 | } | 810 | } |
| 803 | 811 | ||
| 804 | - std::string id1 = id_obj.getArrayItem(0).getStringValue(); | ||
| 805 | QPDFObjectHandle encryption_dict = this->trailer.getKey("/Encrypt"); | 812 | QPDFObjectHandle encryption_dict = this->trailer.getKey("/Encrypt"); |
| 806 | if (! encryption_dict.isDictionary()) | 813 | if (! encryption_dict.isDictionary()) |
| 807 | { | 814 | { |
qpdf/qpdf.testcov
| @@ -264,3 +264,4 @@ QPDFObjectHandle inline image token 0 | @@ -264,3 +264,4 @@ QPDFObjectHandle inline image token 0 | ||
| 264 | QPDF not caching overridden objstm object 0 | 264 | QPDF not caching overridden objstm object 0 |
| 265 | QPDFWriter original obj non-zero gen 0 | 265 | QPDFWriter original obj non-zero gen 0 |
| 266 | QPDF_optimization indirect outlines 0 | 266 | QPDF_optimization indirect outlines 0 |
| 267 | +QPDF xref space 2 |
qpdf/qtest/qpdf.test
| @@ -199,7 +199,7 @@ $td->runtest("remove page we don't have", | @@ -199,7 +199,7 @@ $td->runtest("remove page we don't have", | ||
| 199 | show_ntests(); | 199 | show_ntests(); |
| 200 | # ---------- | 200 | # ---------- |
| 201 | $td->notify("--- Miscellaneous Tests ---"); | 201 | $td->notify("--- Miscellaneous Tests ---"); |
| 202 | -$n_tests += 64; | 202 | +$n_tests += 65; |
| 203 | 203 | ||
| 204 | $td->runtest("qpdf version", | 204 | $td->runtest("qpdf version", |
| 205 | {$td->COMMAND => "qpdf --version"}, | 205 | {$td->COMMAND => "qpdf --version"}, |
| @@ -509,6 +509,14 @@ $td->runtest("check file", | @@ -509,6 +509,14 @@ $td->runtest("check file", | ||
| 509 | {$td->FILE => "a.pdf"}, | 509 | {$td->FILE => "a.pdf"}, |
| 510 | {$td->FILE => "gen1.qdf"}); | 510 | {$td->FILE => "gen1.qdf"}); |
| 511 | 511 | ||
| 512 | +# This file, from a user, is missing /ID in its trailer even though it | ||
| 513 | +# is encrypted and also has a space instead of a newline after its | ||
| 514 | +# xref keyword. xpdf can open it, but Adobe reader can't. | ||
| 515 | +$td->runtest("check broken file", | ||
| 516 | + {$td->COMMAND => "qpdf --check invalid-id-xref.pdf"}, | ||
| 517 | + {$td->FILE => "invalid-id-xref.out", $td->EXIT_STATUS => 3}, | ||
| 518 | + $td->NORMALIZE_NEWLINES); | ||
| 519 | + | ||
| 512 | show_ntests(); | 520 | show_ntests(); |
| 513 | # ---------- | 521 | # ---------- |
| 514 | $td->notify("--- Numeric range parsing tests ---"); | 522 | $td->notify("--- Numeric range parsing tests ---"); |
qpdf/qtest/qpdf/invalid-id-xref.out
0 → 100644
| 1 | +WARNING: invalid-id-xref.pdf (trailer, file position 2493795): invalid /ID in trailer dictionary | ||
| 2 | +checking invalid-id-xref.pdf | ||
| 3 | +PDF Version: 1.1 | ||
| 4 | +R = 3 | ||
| 5 | +P = -1804 | ||
| 6 | +User password = | ||
| 7 | +extract for accessibility: not allowed | ||
| 8 | +extract for any purpose: allowed | ||
| 9 | +print low resolution: allowed | ||
| 10 | +print high resolution: allowed | ||
| 11 | +modify document assembly: not allowed | ||
| 12 | +modify forms: not allowed | ||
| 13 | +modify annotations: allowed | ||
| 14 | +modify other: not allowed | ||
| 15 | +modify anything: not allowed | ||
| 16 | +File is not linearized |
qpdf/qtest/qpdf/invalid-id-xref.pdf
0 → 100644
No preview for this file type