Commit 28c13f5492ecc8bb6b8e5419f66ad6e66d5869e7
1 parent
ad10fa30
Refactor Xref_table::subsections
Optimistically read subsection headers without reading individual object entries, assuming that they are 20 bytes long as per the PDF spec. If problems are encountered, fall back to calling bad_subsections.
Showing
3 changed files
with
38 additions
and
8 deletions
libqpdf/QPDF.cc
| ... | ... | @@ -820,20 +820,24 @@ QPDF::Xref_table::subsection(std::string const& line) |
| 820 | 820 | while (QUtil::is_space(*p)) { |
| 821 | 821 | ++p; |
| 822 | 822 | } |
| 823 | - return { | |
| 824 | - QUtil::string_to_int(obj_str.c_str()), | |
| 825 | - QUtil::string_to_int(num_str.c_str()), | |
| 826 | - file->getLastOffset() + toI(p - start)}; | |
| 823 | + auto obj = QUtil::string_to_int(obj_str.c_str()); | |
| 824 | + auto count = QUtil::string_to_int(num_str.c_str()); | |
| 825 | + if (obj > max_id() || count > max_id() || (obj + count) > max_id()) { | |
| 826 | + throw damaged_table("xref table subsection header contains impossibly large entry"); | |
| 827 | + } | |
| 828 | + return {obj, count, file->getLastOffset() + toI(p - start)}; | |
| 827 | 829 | } |
| 828 | 830 | |
| 829 | 831 | std::vector<QPDF::Xref_table::Subsection> |
| 830 | -QPDF::Xref_table::bad_subsections(std::string& line) | |
| 832 | +QPDF::Xref_table::bad_subsections(std::string& line, qpdf_offset_t start) | |
| 831 | 833 | { |
| 832 | 834 | std::vector<QPDF::Xref_table::Subsection> result; |
| 833 | 835 | qpdf_offset_t f1 = 0; |
| 834 | 836 | int f2 = 0; |
| 835 | 837 | char type = '\0'; |
| 836 | 838 | |
| 839 | + file->seek(start, SEEK_SET); | |
| 840 | + | |
| 837 | 841 | while (true) { |
| 838 | 842 | line.assign(50, '\0'); |
| 839 | 843 | file->read(line.data(), line.size()); |
| ... | ... | @@ -854,10 +858,36 @@ QPDF::Xref_table::bad_subsections(std::string& line) |
| 854 | 858 | } |
| 855 | 859 | } |
| 856 | 860 | |
| 861 | +// Optimistically read and parse all subsection headers. If an error is encountered return the | |
| 862 | +// result of bad_subsections. | |
| 857 | 863 | std::vector<QPDF::Xref_table::Subsection> |
| 858 | 864 | QPDF::Xref_table::subsections(std::string& line) |
| 859 | 865 | { |
| 860 | - return bad_subsections(line); | |
| 866 | + auto recovery_offset = file->tell(); | |
| 867 | + try { | |
| 868 | + std::vector<QPDF::Xref_table::Subsection> result; | |
| 869 | + | |
| 870 | + while (true) { | |
| 871 | + line.assign(50, '\0'); | |
| 872 | + file->read(line.data(), line.size()); | |
| 873 | + auto& sub = result.emplace_back(subsection(line)); | |
| 874 | + auto count = std::get<1>(sub); | |
| 875 | + auto offset = std::get<2>(sub); | |
| 876 | + file->seek(offset + 20 * toO(count) - 1, SEEK_SET); | |
| 877 | + file->read(line.data(), 1); | |
| 878 | + if (!(line[0] == '\n' || line[0] == '\n')) { | |
| 879 | + return bad_subsections(line, recovery_offset); | |
| 880 | + } | |
| 881 | + qpdf_offset_t pos = file->tell(); | |
| 882 | + if (read_token().isWord("trailer")) { | |
| 883 | + return result; | |
| 884 | + } else { | |
| 885 | + file->seek(pos, SEEK_SET); | |
| 886 | + } | |
| 887 | + } | |
| 888 | + } catch (...) { | |
| 889 | + return bad_subsections(line, recovery_offset); | |
| 890 | + } | |
| 861 | 891 | } |
| 862 | 892 | |
| 863 | 893 | bool | ... | ... |
libqpdf/qpdf/QPDF_private.hh
| ... | ... | @@ -231,7 +231,7 @@ class QPDF::Xref_table |
| 231 | 231 | // Methods to parse tables |
| 232 | 232 | qpdf_offset_t process_section(qpdf_offset_t offset); |
| 233 | 233 | std::vector<Subsection> subsections(std::string& line); |
| 234 | - std::vector<Subsection> bad_subsections(std::string& line); | |
| 234 | + std::vector<Subsection> bad_subsections(std::string& line, qpdf_offset_t offset); | |
| 235 | 235 | Subsection subsection(std::string const& line); |
| 236 | 236 | bool read_entry(qpdf_offset_t& f1, int& f2, char& type); |
| 237 | 237 | bool read_bad_entry(qpdf_offset_t& f1, int& f2, char& type); | ... | ... |
qpdf/qtest/qpdf/issue-335b.out
| 1 | 1 | WARNING: issue-335b.pdf: can't find PDF header |
| 2 | 2 | WARNING: issue-335b.pdf: file is damaged |
| 3 | -WARNING: issue-335b.pdf (xref table, offset 23): invalid xref entry (obj=6) | |
| 3 | +WARNING: issue-335b.pdf (xref table, offset 11): xref table subsection header contains impossibly large entry | |
| 4 | 4 | WARNING: issue-335b.pdf: Attempting to reconstruct cross-reference table |
| 5 | 5 | qpdf: issue-335b.pdf: unable to find trailer dictionary while recovering damaged file | ... | ... |