Commit 295f62f0413690940918251cbc0462dcff36e252

Authored by m-holger
Committed by GitHub
2 parents ce5b864c 72bd4863

Merge pull request #1170 from m-holger/readxref

Refactor QPDF::parse_xrefEntry
include/qpdf/QPDF.hh
... ... @@ -1024,7 +1024,8 @@ class QPDF
1024 1024 bool resolveXRefTable();
1025 1025 void reconstruct_xref(QPDFExc& e);
1026 1026 bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes);
1027   - bool parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& type);
  1027 + bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
  1028 + bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
1028 1029 qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
1029 1030 qpdf_offset_t read_xrefStream(qpdf_offset_t offset);
1030 1031 qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
... ...
libqpdf/QPDF.cc
... ... @@ -768,11 +768,15 @@ QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes)
768 768 }
769 769  
770 770 bool
771   -QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& type)
  771 +QPDF::read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type)
772 772 {
  773 + // Reposition after initial read attempt and reread.
  774 + m->file->seek(m->file->getLastOffset(), SEEK_SET);
  775 + auto line = m->file->readLine(30);
  776 +
773 777 // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
774 778 // buffer.
775   - char const* p = line.c_str();
  779 + char const* p = line.data();
776 780  
777 781 // Skip zero or more spaces. There aren't supposed to be any.
778 782 bool invalid = false;
... ... @@ -843,18 +847,73 @@ QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char&
843 847 return true;
844 848 }
845 849  
  850 +// Optimistically read and parse xref entry. If entry is bad, call read_bad_xrefEntry and return
  851 +// result.
  852 +bool
  853 +QPDF::read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type)
  854 +{
  855 + std::array<char, 21> line;
  856 + if (m->file->read(line.data(), 20) != 20) {
  857 + // C++20: [[unlikely]]
  858 + return false;
  859 + }
  860 + line[20] = '\0';
  861 + char const* p = line.data();
  862 +
  863 + int f1_len = 0;
  864 + int f2_len = 0;
  865 +
  866 + // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
  867 + // buffer.
  868 +
  869 + // Gather f1 digits. NB No risk of overflow as 9'999'999'999 < max long long.
  870 + while (*p == '0') {
  871 + ++f1_len;
  872 + ++p;
  873 + }
  874 + while (QUtil::is_digit(*p) && f1_len++ < 10) {
  875 + f1 *= 10;
  876 + f1 += *p++ - '0';
  877 + }
  878 + // Require space
  879 + if (!QUtil::is_space(*p++)) {
  880 + // Entry doesn't start with space or digit.
  881 + // C++20: [[unlikely]]
  882 + return false;
  883 + }
  884 + // Gather digits. NB No risk of overflow as 99'999 < max int.
  885 + while (*p == '0') {
  886 + ++f2_len;
  887 + ++p;
  888 + }
  889 + while (QUtil::is_digit(*p) && f2_len++ < 5) {
  890 + f2 *= 10;
  891 + f2 += static_cast<int>(*p++ - '0');
  892 + }
  893 + if (QUtil::is_space(*p++) && (*p == 'f' || *p == 'n')) {
  894 + // C++20: [[likely]]
  895 + type = *p;
  896 + ++p;
  897 + ++p; // No test for valid line[19].
  898 + if ((*p == '\n' || *p == '\r') && f1_len == 10 && f2_len == 5) {
  899 + // C++20: [[likely]]
  900 + return true;
  901 + }
  902 + }
  903 + return read_bad_xrefEntry(f1, f2, type);
  904 +}
  905 +
  906 +// Read a single cross-reference table section and associated trailer.
846 907 qpdf_offset_t
847 908 QPDF::read_xrefTable(qpdf_offset_t xref_offset)
848 909 {
849 910 std::vector<QPDFObjGen> deleted_items;
850 911  
851 912 m->file->seek(xref_offset, SEEK_SET);
852   - bool done = false;
853   - while (!done) {
854   - char linebuf[51];
855   - memset(linebuf, 0, sizeof(linebuf));
856   - m->file->read(linebuf, sizeof(linebuf) - 1);
857   - std::string line = linebuf;
  913 + std::string line;
  914 + while (true) {
  915 + line.assign(50, '\0');
  916 + m->file->read(line.data(), line.size());
858 917 int obj = 0;
859 918 int num = 0;
860 919 int bytes = 0;
... ... @@ -868,12 +927,11 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
868 927 // This is needed by checkLinearization()
869 928 m->first_xref_item_offset = m->file->tell();
870 929 }
871   - std::string xref_entry = m->file->readLine(30);
872 930 // For xref_table, these will always be small enough to be ints
873 931 qpdf_offset_t f1 = 0;
874 932 int f2 = 0;
875 933 char type = '\0';
876   - if (!parse_xrefEntry(xref_entry, f1, f2, type)) {
  934 + if (!read_xrefEntry(f1, f2, type)) {
877 935 QTC::TC("qpdf", "QPDF invalid xref entry");
878 936 throw damagedPDF(
879 937 "xref table", "invalid xref entry (obj=" + std::to_string(i) + ")");
... ... @@ -887,7 +945,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
887 945 }
888 946 qpdf_offset_t pos = m->file->tell();
889 947 if (readToken(m->file).isWord("trailer")) {
890   - done = true;
  948 + break;
891 949 } else {
892 950 m->file->seek(pos, SEEK_SET);
893 951 }
... ... @@ -946,6 +1004,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
946 1004 return xref_offset;
947 1005 }
948 1006  
  1007 +// Read a single cross-reference stream.
949 1008 qpdf_offset_t
950 1009 QPDF::read_xrefStream(qpdf_offset_t xref_offset)
951 1010 {
... ...