Commit a85007cb0d9bb8af8f0a32bda3ace19aaff97816

Authored by Jay Berkenbilt
1 parent a1d5a3e9

Handle more broken files

Space rather than newline after xref, missing /ID in trailer for
encrypted file.  This enables qpdf to handle some files that xpdf can
handle.  Adobe reader can't necessarily handle them.
ChangeLog
  1 +2013-06-15 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Handle some additional broken files with missing /ID in trailer
  4 + for encrypted files and with space rather than newline after xref.
  5 +
1 6 2013-06-14 Jay Berkenbilt <ejb@ql.org>
2 7  
3 8 * Detect and correct /Outlines dictionary being a direct object
... ...
libqpdf/QPDF.cc
... ... @@ -430,11 +430,22 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
430 430 std::map<int, int> free_table;
431 431 while (xref_offset)
432 432 {
  433 + char buf[7];
  434 + memset(buf, 0, sizeof(buf));
433 435 this->file->seek(xref_offset, SEEK_SET);
434   - std::string line = this->file->readLine(50);
435   - if (line == "xref")
  436 + this->file->read(buf, sizeof(buf) - 1);
  437 + // The PDF spec says xref must be followed by a line
  438 + // terminator, but files exist in the wild where it is
  439 + // terminated by arbitrary whitespace.
  440 + PCRE xref_re("^xref\\s+");
  441 + PCRE::Match m = xref_re.match(buf);
  442 + if (m)
436 443 {
437   - xref_offset = read_xrefTable(this->file->tell());
  444 + QTC::TC("qpdf", "QPDF xref space",
  445 + ((buf[4] == '\n') ? 0 :
  446 + (buf[4] == '\r') ? 1 :
  447 + (buf[4] == ' ') ? 2 : 9999));
  448 + xref_offset = read_xrefTable(xref_offset + m.getMatch(0).length());
438 449 }
439 450 else
440 451 {
... ...
libqpdf/QPDF_encryption.cc
... ... @@ -791,17 +791,24 @@ QPDF::initializeEncryption()
791 791 // encryption dictionary.
792 792 this->encrypted = true;
793 793  
  794 + std::string id1;
794 795 QPDFObjectHandle id_obj = this->trailer.getKey("/ID");
795   - if (! (id_obj.isArray() &&
796   - (id_obj.getArrayNItems() == 2) &&
797   - id_obj.getArrayItem(0).isString()))
  796 + if ((id_obj.isArray() &&
  797 + (id_obj.getArrayNItems() == 2) &&
  798 + id_obj.getArrayItem(0).isString()))
798 799 {
799   - throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(),
800   - "trailer", this->file->getLastOffset(),
801   - "invalid /ID in trailer dictionary");
  800 + id1 = id_obj.getArrayItem(0).getStringValue();
  801 + }
  802 + else
  803 + {
  804 + // Treating a missing ID as the empty string enables qpdf to
  805 + // decrypt some invalid encrypted files with no /ID that
  806 + // poppler can read but Adobe Reader can't.
  807 + warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(),
  808 + "trailer", this->file->getLastOffset(),
  809 + "invalid /ID in trailer dictionary"));
802 810 }
803 811  
804   - std::string id1 = id_obj.getArrayItem(0).getStringValue();
805 812 QPDFObjectHandle encryption_dict = this->trailer.getKey("/Encrypt");
806 813 if (! encryption_dict.isDictionary())
807 814 {
... ...
qpdf/qpdf.testcov
... ... @@ -264,3 +264,4 @@ QPDFObjectHandle inline image token 0
264 264 QPDF not caching overridden objstm object 0
265 265 QPDFWriter original obj non-zero gen 0
266 266 QPDF_optimization indirect outlines 0
  267 +QPDF xref space 2
... ...
qpdf/qtest/qpdf.test
... ... @@ -199,7 +199,7 @@ $td-&gt;runtest(&quot;remove page we don&#39;t have&quot;,
199 199 show_ntests();
200 200 # ----------
201 201 $td->notify("--- Miscellaneous Tests ---");
202   -$n_tests += 64;
  202 +$n_tests += 65;
203 203  
204 204 $td->runtest("qpdf version",
205 205 {$td->COMMAND => "qpdf --version"},
... ... @@ -509,6 +509,14 @@ $td-&gt;runtest(&quot;check file&quot;,
509 509 {$td->FILE => "a.pdf"},
510 510 {$td->FILE => "gen1.qdf"});
511 511  
  512 +# This file, from a user, is missing /ID in its trailer even though it
  513 +# is encrypted and also has a space instead of a newline after its
  514 +# xref keyword. xpdf can open it, but Adobe reader can't.
  515 +$td->runtest("check broken file",
  516 + {$td->COMMAND => "qpdf --check invalid-id-xref.pdf"},
  517 + {$td->FILE => "invalid-id-xref.out", $td->EXIT_STATUS => 3},
  518 + $td->NORMALIZE_NEWLINES);
  519 +
512 520 show_ntests();
513 521 # ----------
514 522 $td->notify("--- Numeric range parsing tests ---");
... ...
qpdf/qtest/qpdf/invalid-id-xref.out 0 → 100644
  1 +WARNING: invalid-id-xref.pdf (trailer, file position 2493795): invalid /ID in trailer dictionary
  2 +checking invalid-id-xref.pdf
  3 +PDF Version: 1.1
  4 +R = 3
  5 +P = -1804
  6 +User password =
  7 +extract for accessibility: not allowed
  8 +extract for any purpose: allowed
  9 +print low resolution: allowed
  10 +print high resolution: allowed
  11 +modify document assembly: not allowed
  12 +modify forms: not allowed
  13 +modify annotations: allowed
  14 +modify other: not allowed
  15 +modify anything: not allowed
  16 +File is not linearized
... ...
qpdf/qtest/qpdf/invalid-id-xref.pdf 0 → 100644
No preview for this file type