Commit 28c13f5492ecc8bb6b8e5419f66ad6e66d5869e7

Authored by m-holger
1 parent ad10fa30

Refactor Xref_table::subsections

Optimistically read subsection headers without reading individual object
entries, assuming that they are 20 bytes long as per the PDF spec. If
problems are encountered, fall back to calling bad_subsections.
libqpdf/QPDF.cc
@@ -820,20 +820,24 @@ QPDF::Xref_table::subsection(std::string const& line) @@ -820,20 +820,24 @@ QPDF::Xref_table::subsection(std::string const& line)
820 while (QUtil::is_space(*p)) { 820 while (QUtil::is_space(*p)) {
821 ++p; 821 ++p;
822 } 822 }
823 - return {  
824 - QUtil::string_to_int(obj_str.c_str()),  
825 - QUtil::string_to_int(num_str.c_str()),  
826 - file->getLastOffset() + toI(p - start)}; 823 + auto obj = QUtil::string_to_int(obj_str.c_str());
  824 + auto count = QUtil::string_to_int(num_str.c_str());
  825 + if (obj > max_id() || count > max_id() || (obj + count) > max_id()) {
  826 + throw damaged_table("xref table subsection header contains impossibly large entry");
  827 + }
  828 + return {obj, count, file->getLastOffset() + toI(p - start)};
827 } 829 }
828 830
829 std::vector<QPDF::Xref_table::Subsection> 831 std::vector<QPDF::Xref_table::Subsection>
830 -QPDF::Xref_table::bad_subsections(std::string& line) 832 +QPDF::Xref_table::bad_subsections(std::string& line, qpdf_offset_t start)
831 { 833 {
832 std::vector<QPDF::Xref_table::Subsection> result; 834 std::vector<QPDF::Xref_table::Subsection> result;
833 qpdf_offset_t f1 = 0; 835 qpdf_offset_t f1 = 0;
834 int f2 = 0; 836 int f2 = 0;
835 char type = '\0'; 837 char type = '\0';
836 838
  839 + file->seek(start, SEEK_SET);
  840 +
837 while (true) { 841 while (true) {
838 line.assign(50, '\0'); 842 line.assign(50, '\0');
839 file->read(line.data(), line.size()); 843 file->read(line.data(), line.size());
@@ -854,10 +858,36 @@ QPDF::Xref_table::bad_subsections(std::string&amp; line) @@ -854,10 +858,36 @@ QPDF::Xref_table::bad_subsections(std::string&amp; line)
854 } 858 }
855 } 859 }
856 860
  861 +// Optimistically read and parse all subsection headers. If an error is encountered return the
  862 +// result of bad_subsections.
857 std::vector<QPDF::Xref_table::Subsection> 863 std::vector<QPDF::Xref_table::Subsection>
858 QPDF::Xref_table::subsections(std::string& line) 864 QPDF::Xref_table::subsections(std::string& line)
859 { 865 {
860 - return bad_subsections(line); 866 + auto recovery_offset = file->tell();
  867 + try {
  868 + std::vector<QPDF::Xref_table::Subsection> result;
  869 +
  870 + while (true) {
  871 + line.assign(50, '\0');
  872 + file->read(line.data(), line.size());
  873 + auto& sub = result.emplace_back(subsection(line));
  874 + auto count = std::get<1>(sub);
  875 + auto offset = std::get<2>(sub);
  876 + file->seek(offset + 20 * toO(count) - 1, SEEK_SET);
  877 + file->read(line.data(), 1);
  878 + if (!(line[0] == '\n' || line[0] == '\n')) {
  879 + return bad_subsections(line, recovery_offset);
  880 + }
  881 + qpdf_offset_t pos = file->tell();
  882 + if (read_token().isWord("trailer")) {
  883 + return result;
  884 + } else {
  885 + file->seek(pos, SEEK_SET);
  886 + }
  887 + }
  888 + } catch (...) {
  889 + return bad_subsections(line, recovery_offset);
  890 + }
861 } 891 }
862 892
863 bool 893 bool
libqpdf/qpdf/QPDF_private.hh
@@ -231,7 +231,7 @@ class QPDF::Xref_table @@ -231,7 +231,7 @@ class QPDF::Xref_table
231 // Methods to parse tables 231 // Methods to parse tables
232 qpdf_offset_t process_section(qpdf_offset_t offset); 232 qpdf_offset_t process_section(qpdf_offset_t offset);
233 std::vector<Subsection> subsections(std::string& line); 233 std::vector<Subsection> subsections(std::string& line);
234 - std::vector<Subsection> bad_subsections(std::string& line); 234 + std::vector<Subsection> bad_subsections(std::string& line, qpdf_offset_t offset);
235 Subsection subsection(std::string const& line); 235 Subsection subsection(std::string const& line);
236 bool read_entry(qpdf_offset_t& f1, int& f2, char& type); 236 bool read_entry(qpdf_offset_t& f1, int& f2, char& type);
237 bool read_bad_entry(qpdf_offset_t& f1, int& f2, char& type); 237 bool read_bad_entry(qpdf_offset_t& f1, int& f2, char& type);
qpdf/qtest/qpdf/issue-335b.out
1 WARNING: issue-335b.pdf: can't find PDF header 1 WARNING: issue-335b.pdf: can't find PDF header
2 WARNING: issue-335b.pdf: file is damaged 2 WARNING: issue-335b.pdf: file is damaged
3 -WARNING: issue-335b.pdf (xref table, offset 23): invalid xref entry (obj=6) 3 +WARNING: issue-335b.pdf (xref table, offset 11): xref table subsection header contains impossibly large entry
4 WARNING: issue-335b.pdf: Attempting to reconstruct cross-reference table 4 WARNING: issue-335b.pdf: Attempting to reconstruct cross-reference table
5 qpdf: issue-335b.pdf: unable to find trailer dictionary while recovering damaged file 5 qpdf: issue-335b.pdf: unable to find trailer dictionary while recovering damaged file