Commit 295f62f0413690940918251cbc0462dcff36e252
Committed by
GitHub
Merge pull request #1170 from m-holger/readxref
Refactor QPDF::parse_xrefEntry
Showing
2 changed files
with
72 additions
and
12 deletions
include/qpdf/QPDF.hh
| @@ -1024,7 +1024,8 @@ class QPDF | @@ -1024,7 +1024,8 @@ class QPDF | ||
| 1024 | bool resolveXRefTable(); | 1024 | bool resolveXRefTable(); |
| 1025 | void reconstruct_xref(QPDFExc& e); | 1025 | void reconstruct_xref(QPDFExc& e); |
| 1026 | bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes); | 1026 | bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes); |
| 1027 | - bool parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& type); | 1027 | + bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); |
| 1028 | + bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); | ||
| 1028 | qpdf_offset_t read_xrefTable(qpdf_offset_t offset); | 1029 | qpdf_offset_t read_xrefTable(qpdf_offset_t offset); |
| 1029 | qpdf_offset_t read_xrefStream(qpdf_offset_t offset); | 1030 | qpdf_offset_t read_xrefStream(qpdf_offset_t offset); |
| 1030 | qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream); | 1031 | qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream); |
libqpdf/QPDF.cc
| @@ -768,11 +768,15 @@ QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes) | @@ -768,11 +768,15 @@ QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes) | ||
| 768 | } | 768 | } |
| 769 | 769 | ||
| 770 | bool | 770 | bool |
| 771 | -QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& type) | 771 | +QPDF::read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type) |
| 772 | { | 772 | { |
| 773 | + // Reposition after initial read attempt and reread. | ||
| 774 | + m->file->seek(m->file->getLastOffset(), SEEK_SET); | ||
| 775 | + auto line = m->file->readLine(30); | ||
| 776 | + | ||
| 773 | // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated | 777 | // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated |
| 774 | // buffer. | 778 | // buffer. |
| 775 | - char const* p = line.c_str(); | 779 | + char const* p = line.data(); |
| 776 | 780 | ||
| 777 | // Skip zero or more spaces. There aren't supposed to be any. | 781 | // Skip zero or more spaces. There aren't supposed to be any. |
| 778 | bool invalid = false; | 782 | bool invalid = false; |
| @@ -843,18 +847,73 @@ QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& | @@ -843,18 +847,73 @@ QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& | ||
| 843 | return true; | 847 | return true; |
| 844 | } | 848 | } |
| 845 | 849 | ||
| 850 | +// Optimistically read and parse xref entry. If entry is bad, call read_bad_xrefEntry and return | ||
| 851 | +// result. | ||
| 852 | +bool | ||
| 853 | +QPDF::read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type) | ||
| 854 | +{ | ||
| 855 | + std::array<char, 21> line; | ||
| 856 | + if (m->file->read(line.data(), 20) != 20) { | ||
| 857 | + // C++20: [[unlikely]] | ||
| 858 | + return false; | ||
| 859 | + } | ||
| 860 | + line[20] = '\0'; | ||
| 861 | + char const* p = line.data(); | ||
| 862 | + | ||
| 863 | + int f1_len = 0; | ||
| 864 | + int f2_len = 0; | ||
| 865 | + | ||
| 866 | + // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated | ||
| 867 | + // buffer. | ||
| 868 | + | ||
| 869 | + // Gather f1 digits. NB No risk of overflow as 9'999'999'999 < max long long. | ||
| 870 | + while (*p == '0') { | ||
| 871 | + ++f1_len; | ||
| 872 | + ++p; | ||
| 873 | + } | ||
| 874 | + while (QUtil::is_digit(*p) && f1_len++ < 10) { | ||
| 875 | + f1 *= 10; | ||
| 876 | + f1 += *p++ - '0'; | ||
| 877 | + } | ||
| 878 | + // Require space | ||
| 879 | + if (!QUtil::is_space(*p++)) { | ||
| 880 | + // Entry doesn't start with space or digit. | ||
| 881 | + // C++20: [[unlikely]] | ||
| 882 | + return false; | ||
| 883 | + } | ||
| 884 | + // Gather digits. NB No risk of overflow as 99'999 < max int. | ||
| 885 | + while (*p == '0') { | ||
| 886 | + ++f2_len; | ||
| 887 | + ++p; | ||
| 888 | + } | ||
| 889 | + while (QUtil::is_digit(*p) && f2_len++ < 5) { | ||
| 890 | + f2 *= 10; | ||
| 891 | + f2 += static_cast<int>(*p++ - '0'); | ||
| 892 | + } | ||
| 893 | + if (QUtil::is_space(*p++) && (*p == 'f' || *p == 'n')) { | ||
| 894 | + // C++20: [[likely]] | ||
| 895 | + type = *p; | ||
| 896 | + ++p; | ||
| 897 | + ++p; // No test for valid line[19]. | ||
| 898 | + if ((*p == '\n' || *p == '\r') && f1_len == 10 && f2_len == 5) { | ||
| 899 | + // C++20: [[likely]] | ||
| 900 | + return true; | ||
| 901 | + } | ||
| 902 | + } | ||
| 903 | + return read_bad_xrefEntry(f1, f2, type); | ||
| 904 | +} | ||
| 905 | + | ||
| 906 | +// Read a single cross-reference table section and associated trailer. | ||
| 846 | qpdf_offset_t | 907 | qpdf_offset_t |
| 847 | QPDF::read_xrefTable(qpdf_offset_t xref_offset) | 908 | QPDF::read_xrefTable(qpdf_offset_t xref_offset) |
| 848 | { | 909 | { |
| 849 | std::vector<QPDFObjGen> deleted_items; | 910 | std::vector<QPDFObjGen> deleted_items; |
| 850 | 911 | ||
| 851 | m->file->seek(xref_offset, SEEK_SET); | 912 | m->file->seek(xref_offset, SEEK_SET); |
| 852 | - bool done = false; | ||
| 853 | - while (!done) { | ||
| 854 | - char linebuf[51]; | ||
| 855 | - memset(linebuf, 0, sizeof(linebuf)); | ||
| 856 | - m->file->read(linebuf, sizeof(linebuf) - 1); | ||
| 857 | - std::string line = linebuf; | 913 | + std::string line; |
| 914 | + while (true) { | ||
| 915 | + line.assign(50, '\0'); | ||
| 916 | + m->file->read(line.data(), line.size()); | ||
| 858 | int obj = 0; | 917 | int obj = 0; |
| 859 | int num = 0; | 918 | int num = 0; |
| 860 | int bytes = 0; | 919 | int bytes = 0; |
| @@ -868,12 +927,11 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | @@ -868,12 +927,11 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | ||
| 868 | // This is needed by checkLinearization() | 927 | // This is needed by checkLinearization() |
| 869 | m->first_xref_item_offset = m->file->tell(); | 928 | m->first_xref_item_offset = m->file->tell(); |
| 870 | } | 929 | } |
| 871 | - std::string xref_entry = m->file->readLine(30); | ||
| 872 | // For xref_table, these will always be small enough to be ints | 930 | // For xref_table, these will always be small enough to be ints |
| 873 | qpdf_offset_t f1 = 0; | 931 | qpdf_offset_t f1 = 0; |
| 874 | int f2 = 0; | 932 | int f2 = 0; |
| 875 | char type = '\0'; | 933 | char type = '\0'; |
| 876 | - if (!parse_xrefEntry(xref_entry, f1, f2, type)) { | 934 | + if (!read_xrefEntry(f1, f2, type)) { |
| 877 | QTC::TC("qpdf", "QPDF invalid xref entry"); | 935 | QTC::TC("qpdf", "QPDF invalid xref entry"); |
| 878 | throw damagedPDF( | 936 | throw damagedPDF( |
| 879 | "xref table", "invalid xref entry (obj=" + std::to_string(i) + ")"); | 937 | "xref table", "invalid xref entry (obj=" + std::to_string(i) + ")"); |
| @@ -887,7 +945,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | @@ -887,7 +945,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | ||
| 887 | } | 945 | } |
| 888 | qpdf_offset_t pos = m->file->tell(); | 946 | qpdf_offset_t pos = m->file->tell(); |
| 889 | if (readToken(m->file).isWord("trailer")) { | 947 | if (readToken(m->file).isWord("trailer")) { |
| 890 | - done = true; | 948 | + break; |
| 891 | } else { | 949 | } else { |
| 892 | m->file->seek(pos, SEEK_SET); | 950 | m->file->seek(pos, SEEK_SET); |
| 893 | } | 951 | } |
| @@ -946,6 +1004,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | @@ -946,6 +1004,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | ||
| 946 | return xref_offset; | 1004 | return xref_offset; |
| 947 | } | 1005 | } |
| 948 | 1006 | ||
| 1007 | +// Read a single cross-reference stream. | ||
| 949 | qpdf_offset_t | 1008 | qpdf_offset_t |
| 950 | QPDF::read_xrefStream(qpdf_offset_t xref_offset) | 1009 | QPDF::read_xrefStream(qpdf_offset_t xref_offset) |
| 951 | { | 1010 | { |