Commit 72bd486337a35fb1b8bef744d1387edc93738682
1 parent
3b97c9bd
Refactor QPDF::parse_xrefEntry
Move reading of the entry from read_xrefTable to parse_xrefEntry. Split parse_xrefEntry into two new methods read_xrefEntry and read_bad_xrefEntry. read_xrefEntry is optimised for reading correct entries. To handle incorrect entries it calls read_bad_xrefEntry, which is largely unchanged from parse_xrefEntry.
Showing
2 changed files
with
73 additions
and
12 deletions
include/qpdf/QPDF.hh
| @@ -1004,7 +1004,8 @@ class QPDF | @@ -1004,7 +1004,8 @@ class QPDF | ||
| 1004 | bool resolveXRefTable(); | 1004 | bool resolveXRefTable(); |
| 1005 | void reconstruct_xref(QPDFExc& e); | 1005 | void reconstruct_xref(QPDFExc& e); |
| 1006 | bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes); | 1006 | bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes); |
| 1007 | - bool parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& type); | 1007 | + bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); |
| 1008 | + bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); | ||
| 1008 | qpdf_offset_t read_xrefTable(qpdf_offset_t offset); | 1009 | qpdf_offset_t read_xrefTable(qpdf_offset_t offset); |
| 1009 | qpdf_offset_t read_xrefStream(qpdf_offset_t offset); | 1010 | qpdf_offset_t read_xrefStream(qpdf_offset_t offset); |
| 1010 | qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream); | 1011 | qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream); |
libqpdf/QPDF.cc
| @@ -2,6 +2,7 @@ | @@ -2,6 +2,7 @@ | ||
| 2 | 2 | ||
| 3 | #include <qpdf/QPDF.hh> | 3 | #include <qpdf/QPDF.hh> |
| 4 | 4 | ||
| 5 | +#include <array> | ||
| 5 | #include <atomic> | 6 | #include <atomic> |
| 6 | #include <cstring> | 7 | #include <cstring> |
| 7 | #include <limits> | 8 | #include <limits> |
| @@ -767,11 +768,15 @@ QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes) | @@ -767,11 +768,15 @@ QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes) | ||
| 767 | } | 768 | } |
| 768 | 769 | ||
| 769 | bool | 770 | bool |
| 770 | -QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& type) | 771 | +QPDF::read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type) |
| 771 | { | 772 | { |
| 773 | + // Reposition after initial read attempt and reread. | ||
| 774 | + m->file->seek(m->file->getLastOffset(), SEEK_SET); | ||
| 775 | + auto line = m->file->readLine(30); | ||
| 776 | + | ||
| 772 | // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated | 777 | // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated |
| 773 | // buffer. | 778 | // buffer. |
| 774 | - char const* p = line.c_str(); | 779 | + char const* p = line.data(); |
| 775 | 780 | ||
| 776 | // Skip zero or more spaces. There aren't supposed to be any. | 781 | // Skip zero or more spaces. There aren't supposed to be any. |
| 777 | bool invalid = false; | 782 | bool invalid = false; |
| @@ -842,18 +847,73 @@ QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& | @@ -842,18 +847,73 @@ QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& | ||
| 842 | return true; | 847 | return true; |
| 843 | } | 848 | } |
| 844 | 849 | ||
| 850 | +// Optimistically read and parse xref entry. If entry is bad, call read_bad_xrefEntry and return | ||
| 851 | +// result. | ||
| 852 | +bool | ||
| 853 | +QPDF::read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type) | ||
| 854 | +{ | ||
| 855 | + std::array<char, 21> line; | ||
| 856 | + if (m->file->read(line.data(), 20) != 20) { | ||
| 857 | + // C++20: [[unlikely]] | ||
| 858 | + return false; | ||
| 859 | + } | ||
| 860 | + line[20] = '\0'; | ||
| 861 | + char const* p = line.data(); | ||
| 862 | + | ||
| 863 | + int f1_len = 0; | ||
| 864 | + int f2_len = 0; | ||
| 865 | + | ||
| 866 | + // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated | ||
| 867 | + // buffer. | ||
| 868 | + | ||
| 869 | + // Gather f1 digits. NB No risk of overflow as 9'999'999'999 < max long long. | ||
| 870 | + while (*p == '0') { | ||
| 871 | + ++f1_len; | ||
| 872 | + ++p; | ||
| 873 | + } | ||
| 874 | + while (QUtil::is_digit(*p) && f1_len++ < 10) { | ||
| 875 | + f1 *= 10; | ||
| 876 | + f1 += *p++ - '0'; | ||
| 877 | + } | ||
| 878 | + // Require space | ||
| 879 | + if (!QUtil::is_space(*p++)) { | ||
| 880 | + // Entry doesn't start with space or digit. | ||
| 881 | + // C++20: [[unlikely]] | ||
| 882 | + return false; | ||
| 883 | + } | ||
| 884 | + // Gather digits. NB No risk of overflow as 99'999 < max int. | ||
| 885 | + while (*p == '0') { | ||
| 886 | + ++f2_len; | ||
| 887 | + ++p; | ||
| 888 | + } | ||
| 889 | + while (QUtil::is_digit(*p) && f2_len++ < 5) { | ||
| 890 | + f2 *= 10; | ||
| 891 | + f2 += static_cast<int>(*p++ - '0'); | ||
| 892 | + } | ||
| 893 | + if (QUtil::is_space(*p++) && (*p == 'f' || *p == 'n')) { | ||
| 894 | + // C++20: [[likely]] | ||
| 895 | + type = *p; | ||
| 896 | + ++p; | ||
| 897 | + ++p; // No test for valid line[19]. | ||
| 898 | + if ((*p == '\n' || *p == '\r') && f1_len == 10 && f2_len == 5) { | ||
| 899 | + // C++20: [[likely]] | ||
| 900 | + return true; | ||
| 901 | + } | ||
| 902 | + } | ||
| 903 | + return read_bad_xrefEntry(f1, f2, type); | ||
| 904 | +} | ||
| 905 | + | ||
| 906 | +// Read a single cross-reference table section and associated trailer. | ||
| 845 | qpdf_offset_t | 907 | qpdf_offset_t |
| 846 | QPDF::read_xrefTable(qpdf_offset_t xref_offset) | 908 | QPDF::read_xrefTable(qpdf_offset_t xref_offset) |
| 847 | { | 909 | { |
| 848 | std::vector<QPDFObjGen> deleted_items; | 910 | std::vector<QPDFObjGen> deleted_items; |
| 849 | 911 | ||
| 850 | m->file->seek(xref_offset, SEEK_SET); | 912 | m->file->seek(xref_offset, SEEK_SET); |
| 851 | - bool done = false; | ||
| 852 | - while (!done) { | ||
| 853 | - char linebuf[51]; | ||
| 854 | - memset(linebuf, 0, sizeof(linebuf)); | ||
| 855 | - m->file->read(linebuf, sizeof(linebuf) - 1); | ||
| 856 | - std::string line = linebuf; | 913 | + std::string line; |
| 914 | + while (true) { | ||
| 915 | + line.assign(50, '\0'); | ||
| 916 | + m->file->read(line.data(), line.size()); | ||
| 857 | int obj = 0; | 917 | int obj = 0; |
| 858 | int num = 0; | 918 | int num = 0; |
| 859 | int bytes = 0; | 919 | int bytes = 0; |
| @@ -867,12 +927,11 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | @@ -867,12 +927,11 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | ||
| 867 | // This is needed by checkLinearization() | 927 | // This is needed by checkLinearization() |
| 868 | m->first_xref_item_offset = m->file->tell(); | 928 | m->first_xref_item_offset = m->file->tell(); |
| 869 | } | 929 | } |
| 870 | - std::string xref_entry = m->file->readLine(30); | ||
| 871 | // For xref_table, these will always be small enough to be ints | 930 | // For xref_table, these will always be small enough to be ints |
| 872 | qpdf_offset_t f1 = 0; | 931 | qpdf_offset_t f1 = 0; |
| 873 | int f2 = 0; | 932 | int f2 = 0; |
| 874 | char type = '\0'; | 933 | char type = '\0'; |
| 875 | - if (!parse_xrefEntry(xref_entry, f1, f2, type)) { | 934 | + if (!read_xrefEntry(f1, f2, type)) { |
| 876 | QTC::TC("qpdf", "QPDF invalid xref entry"); | 935 | QTC::TC("qpdf", "QPDF invalid xref entry"); |
| 877 | throw damagedPDF( | 936 | throw damagedPDF( |
| 878 | "xref table", "invalid xref entry (obj=" + std::to_string(i) + ")"); | 937 | "xref table", "invalid xref entry (obj=" + std::to_string(i) + ")"); |
| @@ -886,7 +945,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | @@ -886,7 +945,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | ||
| 886 | } | 945 | } |
| 887 | qpdf_offset_t pos = m->file->tell(); | 946 | qpdf_offset_t pos = m->file->tell(); |
| 888 | if (readToken(m->file).isWord("trailer")) { | 947 | if (readToken(m->file).isWord("trailer")) { |
| 889 | - done = true; | 948 | + break; |
| 890 | } else { | 949 | } else { |
| 891 | m->file->seek(pos, SEEK_SET); | 950 | m->file->seek(pos, SEEK_SET); |
| 892 | } | 951 | } |
| @@ -945,6 +1004,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | @@ -945,6 +1004,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | ||
| 945 | return xref_offset; | 1004 | return xref_offset; |
| 946 | } | 1005 | } |
| 947 | 1006 | ||
| 1007 | +// Read a single cross-reference stream. | ||
| 948 | qpdf_offset_t | 1008 | qpdf_offset_t |
| 949 | QPDF::read_xrefStream(qpdf_offset_t xref_offset) | 1009 | QPDF::read_xrefStream(qpdf_offset_t xref_offset) |
| 950 | { | 1010 | { |