Commit 72bd486337a35fb1b8bef744d1387edc93738682
1 parent
3b97c9bd
Refactor QPDF::parse_xrefEntry
Move reading of the entry from read_xrefTable to parse_xrefEntry. Split parse_xrefEntry into two new methods read_xrefEntry and read_bad_xrefEntry. read_xrefEntry is optimised for reading correct entries. To handle incorrect entries it calls read_bad_xrefEntry, which is largely unchanged from parse_xrefEntry.
Showing
2 changed files
with
73 additions
and
12 deletions
include/qpdf/QPDF.hh
| ... | ... | @@ -1004,7 +1004,8 @@ class QPDF |
| 1004 | 1004 | bool resolveXRefTable(); |
| 1005 | 1005 | void reconstruct_xref(QPDFExc& e); |
| 1006 | 1006 | bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes); |
| 1007 | - bool parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& type); | |
| 1007 | + bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); | |
| 1008 | + bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); | |
| 1008 | 1009 | qpdf_offset_t read_xrefTable(qpdf_offset_t offset); |
| 1009 | 1010 | qpdf_offset_t read_xrefStream(qpdf_offset_t offset); |
| 1010 | 1011 | qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream); | ... | ... |
libqpdf/QPDF.cc
| ... | ... | @@ -2,6 +2,7 @@ |
| 2 | 2 | |
| 3 | 3 | #include <qpdf/QPDF.hh> |
| 4 | 4 | |
| 5 | +#include <array> | |
| 5 | 6 | #include <atomic> |
| 6 | 7 | #include <cstring> |
| 7 | 8 | #include <limits> |
| ... | ... | @@ -767,11 +768,15 @@ QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes) |
| 767 | 768 | } |
| 768 | 769 | |
| 769 | 770 | bool |
| 770 | -QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& type) | |
| 771 | +QPDF::read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type) | |
| 771 | 772 | { |
| 773 | + // Reposition after initial read attempt and reread. | |
| 774 | + m->file->seek(m->file->getLastOffset(), SEEK_SET); | |
| 775 | + auto line = m->file->readLine(30); | |
| 776 | + | |
| 772 | 777 | // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated |
| 773 | 778 | // buffer. |
| 774 | - char const* p = line.c_str(); | |
| 779 | + char const* p = line.data(); | |
| 775 | 780 | |
| 776 | 781 | // Skip zero or more spaces. There aren't supposed to be any. |
| 777 | 782 | bool invalid = false; |
| ... | ... | @@ -842,18 +847,73 @@ QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& |
| 842 | 847 | return true; |
| 843 | 848 | } |
| 844 | 849 | |
| 850 | +// Optimistically read and parse xref entry. If entry is bad, call read_bad_xrefEntry and return | |
| 851 | +// result. | |
| 852 | +bool | |
| 853 | +QPDF::read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type) | |
| 854 | +{ | |
| 855 | + std::array<char, 21> line; | |
| 856 | + if (m->file->read(line.data(), 20) != 20) { | |
| 857 | + // C++20: [[unlikely]] | |
| 858 | + return false; | |
| 859 | + } | |
| 860 | + line[20] = '\0'; | |
| 861 | + char const* p = line.data(); | |
| 862 | + | |
| 863 | + int f1_len = 0; | |
| 864 | + int f2_len = 0; | |
| 865 | + | |
| 866 | + // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated | |
| 867 | + // buffer. | |
| 868 | + | |
| 869 | + // Gather f1 digits. NB No risk of overflow as 9'999'999'999 < max long long. | |
| 870 | + while (*p == '0') { | |
| 871 | + ++f1_len; | |
| 872 | + ++p; | |
| 873 | + } | |
| 874 | + while (QUtil::is_digit(*p) && f1_len++ < 10) { | |
| 875 | + f1 *= 10; | |
| 876 | + f1 += *p++ - '0'; | |
| 877 | + } | |
| 878 | + // Require space | |
| 879 | + if (!QUtil::is_space(*p++)) { | |
| 880 | + // Entry doesn't start with space or digit. | |
| 881 | + // C++20: [[unlikely]] | |
| 882 | + return false; | |
| 883 | + } | |
| 884 | + // Gather digits. NB No risk of overflow as 99'999 < max int. | |
| 885 | + while (*p == '0') { | |
| 886 | + ++f2_len; | |
| 887 | + ++p; | |
| 888 | + } | |
| 889 | + while (QUtil::is_digit(*p) && f2_len++ < 5) { | |
| 890 | + f2 *= 10; | |
| 891 | + f2 += static_cast<int>(*p++ - '0'); | |
| 892 | + } | |
| 893 | + if (QUtil::is_space(*p++) && (*p == 'f' || *p == 'n')) { | |
| 894 | + // C++20: [[likely]] | |
| 895 | + type = *p; | |
| 896 | + ++p; | |
| 897 | + ++p; // No test for valid line[19]. | |
| 898 | + if ((*p == '\n' || *p == '\r') && f1_len == 10 && f2_len == 5) { | |
| 899 | + // C++20: [[likely]] | |
| 900 | + return true; | |
| 901 | + } | |
| 902 | + } | |
| 903 | + return read_bad_xrefEntry(f1, f2, type); | |
| 904 | +} | |
| 905 | + | |
| 906 | +// Read a single cross-reference table section and associated trailer. | |
| 845 | 907 | qpdf_offset_t |
| 846 | 908 | QPDF::read_xrefTable(qpdf_offset_t xref_offset) |
| 847 | 909 | { |
| 848 | 910 | std::vector<QPDFObjGen> deleted_items; |
| 849 | 911 | |
| 850 | 912 | m->file->seek(xref_offset, SEEK_SET); |
| 851 | - bool done = false; | |
| 852 | - while (!done) { | |
| 853 | - char linebuf[51]; | |
| 854 | - memset(linebuf, 0, sizeof(linebuf)); | |
| 855 | - m->file->read(linebuf, sizeof(linebuf) - 1); | |
| 856 | - std::string line = linebuf; | |
| 913 | + std::string line; | |
| 914 | + while (true) { | |
| 915 | + line.assign(50, '\0'); | |
| 916 | + m->file->read(line.data(), line.size()); | |
| 857 | 917 | int obj = 0; |
| 858 | 918 | int num = 0; |
| 859 | 919 | int bytes = 0; |
| ... | ... | @@ -867,12 +927,11 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) |
| 867 | 927 | // This is needed by checkLinearization() |
| 868 | 928 | m->first_xref_item_offset = m->file->tell(); |
| 869 | 929 | } |
| 870 | - std::string xref_entry = m->file->readLine(30); | |
| 871 | 930 | // For xref_table, these will always be small enough to be ints |
| 872 | 931 | qpdf_offset_t f1 = 0; |
| 873 | 932 | int f2 = 0; |
| 874 | 933 | char type = '\0'; |
| 875 | - if (!parse_xrefEntry(xref_entry, f1, f2, type)) { | |
| 934 | + if (!read_xrefEntry(f1, f2, type)) { | |
| 876 | 935 | QTC::TC("qpdf", "QPDF invalid xref entry"); |
| 877 | 936 | throw damagedPDF( |
| 878 | 937 | "xref table", "invalid xref entry (obj=" + std::to_string(i) + ")"); |
| ... | ... | @@ -886,7 +945,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) |
| 886 | 945 | } |
| 887 | 946 | qpdf_offset_t pos = m->file->tell(); |
| 888 | 947 | if (readToken(m->file).isWord("trailer")) { |
| 889 | - done = true; | |
| 948 | + break; | |
| 890 | 949 | } else { |
| 891 | 950 | m->file->seek(pos, SEEK_SET); |
| 892 | 951 | } |
| ... | ... | @@ -945,6 +1004,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) |
| 945 | 1004 | return xref_offset; |
| 946 | 1005 | } |
| 947 | 1006 | |
| 1007 | +// Read a single cross-reference stream. | |
| 948 | 1008 | qpdf_offset_t |
| 949 | 1009 | QPDF::read_xrefStream(qpdf_offset_t xref_offset) |
| 950 | 1010 | { | ... | ... |