Commit 28c13f5492ecc8bb6b8e5419f66ad6e66d5869e7

Authored by m-holger
1 parent ad10fa30

Refactor Xref_table::subsections

Optimistically read subsection headers without reading individual object
entries, assuming that they are 20 bytes long as per the PDF spec. If
problems are encountered, fall back to calling bad_subsections.
libqpdf/QPDF.cc
... ... @@ -820,20 +820,24 @@ QPDF::Xref_table::subsection(std::string const& line)
820 820 while (QUtil::is_space(*p)) {
821 821 ++p;
822 822 }
823   - return {
824   - QUtil::string_to_int(obj_str.c_str()),
825   - QUtil::string_to_int(num_str.c_str()),
826   - file->getLastOffset() + toI(p - start)};
  823 + auto obj = QUtil::string_to_int(obj_str.c_str());
  824 + auto count = QUtil::string_to_int(num_str.c_str());
  825 + if (obj > max_id() || count > max_id() || (obj + count) > max_id()) {
  826 + throw damaged_table("xref table subsection header contains impossibly large entry");
  827 + }
  828 + return {obj, count, file->getLastOffset() + toI(p - start)};
827 829 }
828 830  
829 831 std::vector<QPDF::Xref_table::Subsection>
830   -QPDF::Xref_table::bad_subsections(std::string& line)
  832 +QPDF::Xref_table::bad_subsections(std::string& line, qpdf_offset_t start)
831 833 {
832 834 std::vector<QPDF::Xref_table::Subsection> result;
833 835 qpdf_offset_t f1 = 0;
834 836 int f2 = 0;
835 837 char type = '\0';
836 838  
  839 + file->seek(start, SEEK_SET);
  840 +
837 841 while (true) {
838 842 line.assign(50, '\0');
839 843 file->read(line.data(), line.size());
... ... @@ -854,10 +858,36 @@ QPDF::Xref_table::bad_subsections(std::string&amp; line)
854 858 }
855 859 }
856 860  
  861 +// Optimistically read and parse all subsection headers. If an error is encountered return the
  862 +// result of bad_subsections.
857 863 std::vector<QPDF::Xref_table::Subsection>
858 864 QPDF::Xref_table::subsections(std::string& line)
859 865 {
860   - return bad_subsections(line);
  866 + auto recovery_offset = file->tell();
  867 + try {
  868 + std::vector<QPDF::Xref_table::Subsection> result;
  869 +
  870 + while (true) {
  871 + line.assign(50, '\0');
  872 + file->read(line.data(), line.size());
  873 + auto& sub = result.emplace_back(subsection(line));
  874 + auto count = std::get<1>(sub);
  875 + auto offset = std::get<2>(sub);
  876 + file->seek(offset + 20 * toO(count) - 1, SEEK_SET);
  877 + file->read(line.data(), 1);
  878 + if (!(line[0] == '\n' || line[0] == '\n')) {
  879 + return bad_subsections(line, recovery_offset);
  880 + }
  881 + qpdf_offset_t pos = file->tell();
  882 + if (read_token().isWord("trailer")) {
  883 + return result;
  884 + } else {
  885 + file->seek(pos, SEEK_SET);
  886 + }
  887 + }
  888 + } catch (...) {
  889 + return bad_subsections(line, recovery_offset);
  890 + }
861 891 }
862 892  
863 893 bool
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -231,7 +231,7 @@ class QPDF::Xref_table
231 231 // Methods to parse tables
232 232 qpdf_offset_t process_section(qpdf_offset_t offset);
233 233 std::vector<Subsection> subsections(std::string& line);
234   - std::vector<Subsection> bad_subsections(std::string& line);
  234 + std::vector<Subsection> bad_subsections(std::string& line, qpdf_offset_t offset);
235 235 Subsection subsection(std::string const& line);
236 236 bool read_entry(qpdf_offset_t& f1, int& f2, char& type);
237 237 bool read_bad_entry(qpdf_offset_t& f1, int& f2, char& type);
... ...
qpdf/qtest/qpdf/issue-335b.out
1 1 WARNING: issue-335b.pdf: can't find PDF header
2 2 WARNING: issue-335b.pdf: file is damaged
3   -WARNING: issue-335b.pdf (xref table, offset 23): invalid xref entry (obj=6)
  3 +WARNING: issue-335b.pdf (xref table, offset 11): xref table subsection header contains impossibly large entry
4 4 WARNING: issue-335b.pdf: Attempting to reconstruct cross-reference table
5 5 qpdf: issue-335b.pdf: unable to find trailer dictionary while recovering damaged file
... ...