Commit 72bd486337a35fb1b8bef744d1387edc93738682

Authored by m-holger
1 parent 3b97c9bd

Refactor QPDF::parse_xrefEntry

Move reading of the entry from read_xrefTable to parse_xrefEntry.

Split parse_xrefEntry into two new methods read_xrefEntry and
read_bad_xrefEntry. read_xrefEntry is optimised for reading
correct entries. To handle incorrect entries it calls read_bad_xrefEntry,
which is largely unchanged from parse_xrefEntry.
include/qpdf/QPDF.hh
@@ -1004,7 +1004,8 @@ class QPDF @@ -1004,7 +1004,8 @@ class QPDF
1004 bool resolveXRefTable(); 1004 bool resolveXRefTable();
1005 void reconstruct_xref(QPDFExc& e); 1005 void reconstruct_xref(QPDFExc& e);
1006 bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes); 1006 bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes);
1007 - bool parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& type); 1007 + bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
  1008 + bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
1008 qpdf_offset_t read_xrefTable(qpdf_offset_t offset); 1009 qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
1009 qpdf_offset_t read_xrefStream(qpdf_offset_t offset); 1010 qpdf_offset_t read_xrefStream(qpdf_offset_t offset);
1010 qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream); 1011 qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
libqpdf/QPDF.cc
@@ -2,6 +2,7 @@ @@ -2,6 +2,7 @@
2 2
3 #include <qpdf/QPDF.hh> 3 #include <qpdf/QPDF.hh>
4 4
  5 +#include <array>
5 #include <atomic> 6 #include <atomic>
6 #include <cstring> 7 #include <cstring>
7 #include <limits> 8 #include <limits>
@@ -767,11 +768,15 @@ QPDF::parse_xrefFirst(std::string const&amp; line, int&amp; obj, int&amp; num, int&amp; bytes) @@ -767,11 +768,15 @@ QPDF::parse_xrefFirst(std::string const&amp; line, int&amp; obj, int&amp; num, int&amp; bytes)
767 } 768 }
768 769
769 bool 770 bool
770 -QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& type) 771 +QPDF::read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type)
771 { 772 {
  773 + // Reposition after initial read attempt and reread.
  774 + m->file->seek(m->file->getLastOffset(), SEEK_SET);
  775 + auto line = m->file->readLine(30);
  776 +
772 // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated 777 // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
773 // buffer. 778 // buffer.
774 - char const* p = line.c_str(); 779 + char const* p = line.data();
775 780
776 // Skip zero or more spaces. There aren't supposed to be any. 781 // Skip zero or more spaces. There aren't supposed to be any.
777 bool invalid = false; 782 bool invalid = false;
@@ -842,18 +847,73 @@ QPDF::parse_xrefEntry(std::string const&amp; line, qpdf_offset_t&amp; f1, int&amp; f2, char&amp; @@ -842,18 +847,73 @@ QPDF::parse_xrefEntry(std::string const&amp; line, qpdf_offset_t&amp; f1, int&amp; f2, char&amp;
842 return true; 847 return true;
843 } 848 }
844 849
  850 +// Optimistically read and parse xref entry. If entry is bad, call read_bad_xrefEntry and return
  851 +// result.
  852 +bool
  853 +QPDF::read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type)
  854 +{
  855 + std::array<char, 21> line;
  856 + if (m->file->read(line.data(), 20) != 20) {
  857 + // C++20: [[unlikely]]
  858 + return false;
  859 + }
  860 + line[20] = '\0';
  861 + char const* p = line.data();
  862 +
  863 + int f1_len = 0;
  864 + int f2_len = 0;
  865 +
  866 + // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
  867 + // buffer.
  868 +
  869 + // Gather f1 digits. NB No risk of overflow as 9'999'999'999 < max long long.
  870 + while (*p == '0') {
  871 + ++f1_len;
  872 + ++p;
  873 + }
  874 + while (QUtil::is_digit(*p) && f1_len++ < 10) {
  875 + f1 *= 10;
  876 + f1 += *p++ - '0';
  877 + }
  878 + // Require space
  879 + if (!QUtil::is_space(*p++)) {
  880 + // Entry doesn't start with space or digit.
  881 + // C++20: [[unlikely]]
  882 + return false;
  883 + }
  884 + // Gather digits. NB No risk of overflow as 99'999 < max int.
  885 + while (*p == '0') {
  886 + ++f2_len;
  887 + ++p;
  888 + }
  889 + while (QUtil::is_digit(*p) && f2_len++ < 5) {
  890 + f2 *= 10;
  891 + f2 += static_cast<int>(*p++ - '0');
  892 + }
  893 + if (QUtil::is_space(*p++) && (*p == 'f' || *p == 'n')) {
  894 + // C++20: [[likely]]
  895 + type = *p;
  896 + ++p;
  897 + ++p; // No test for valid line[19].
  898 + if ((*p == '\n' || *p == '\r') && f1_len == 10 && f2_len == 5) {
  899 + // C++20: [[likely]]
  900 + return true;
  901 + }
  902 + }
  903 + return read_bad_xrefEntry(f1, f2, type);
  904 +}
  905 +
  906 +// Read a single cross-reference table section and associated trailer.
845 qpdf_offset_t 907 qpdf_offset_t
846 QPDF::read_xrefTable(qpdf_offset_t xref_offset) 908 QPDF::read_xrefTable(qpdf_offset_t xref_offset)
847 { 909 {
848 std::vector<QPDFObjGen> deleted_items; 910 std::vector<QPDFObjGen> deleted_items;
849 911
850 m->file->seek(xref_offset, SEEK_SET); 912 m->file->seek(xref_offset, SEEK_SET);
851 - bool done = false;  
852 - while (!done) {  
853 - char linebuf[51];  
854 - memset(linebuf, 0, sizeof(linebuf));  
855 - m->file->read(linebuf, sizeof(linebuf) - 1);  
856 - std::string line = linebuf; 913 + std::string line;
  914 + while (true) {
  915 + line.assign(50, '\0');
  916 + m->file->read(line.data(), line.size());
857 int obj = 0; 917 int obj = 0;
858 int num = 0; 918 int num = 0;
859 int bytes = 0; 919 int bytes = 0;
@@ -867,12 +927,11 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) @@ -867,12 +927,11 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
867 // This is needed by checkLinearization() 927 // This is needed by checkLinearization()
868 m->first_xref_item_offset = m->file->tell(); 928 m->first_xref_item_offset = m->file->tell();
869 } 929 }
870 - std::string xref_entry = m->file->readLine(30);  
871 // For xref_table, these will always be small enough to be ints 930 // For xref_table, these will always be small enough to be ints
872 qpdf_offset_t f1 = 0; 931 qpdf_offset_t f1 = 0;
873 int f2 = 0; 932 int f2 = 0;
874 char type = '\0'; 933 char type = '\0';
875 - if (!parse_xrefEntry(xref_entry, f1, f2, type)) { 934 + if (!read_xrefEntry(f1, f2, type)) {
876 QTC::TC("qpdf", "QPDF invalid xref entry"); 935 QTC::TC("qpdf", "QPDF invalid xref entry");
877 throw damagedPDF( 936 throw damagedPDF(
878 "xref table", "invalid xref entry (obj=" + std::to_string(i) + ")"); 937 "xref table", "invalid xref entry (obj=" + std::to_string(i) + ")");
@@ -886,7 +945,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) @@ -886,7 +945,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
886 } 945 }
887 qpdf_offset_t pos = m->file->tell(); 946 qpdf_offset_t pos = m->file->tell();
888 if (readToken(m->file).isWord("trailer")) { 947 if (readToken(m->file).isWord("trailer")) {
889 - done = true; 948 + break;
890 } else { 949 } else {
891 m->file->seek(pos, SEEK_SET); 950 m->file->seek(pos, SEEK_SET);
892 } 951 }
@@ -945,6 +1004,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) @@ -945,6 +1004,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
945 return xref_offset; 1004 return xref_offset;
946 } 1005 }
947 1006
  1007 +// Read a single cross-reference stream.
948 qpdf_offset_t 1008 qpdf_offset_t
949 QPDF::read_xrefStream(qpdf_offset_t xref_offset) 1009 QPDF::read_xrefStream(qpdf_offset_t xref_offset)
950 { 1010 {