Commit 2b0c2da720d6c5d60fef046d247fc88981442501

Authored by m-holger
1 parent 7477ea78

Refactor QPDF::processXRefStream

Change the processed Index array to a vector of <first object, number of
entries> pairs.
include/qpdf/QPDF.hh
@@ -1032,7 +1032,7 @@ class QPDF @@ -1032,7 +1032,7 @@ class QPDF
1032 processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged); 1032 processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged);
1033 int processXRefSize( 1033 int processXRefSize(
1034 QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged); 1034 QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged);
1035 - std::pair<int, std::vector<long long>> processXRefIndex( 1035 + std::pair<int, std::vector<std::pair<int, int>>> processXRefIndex(
1036 QPDFObjectHandle& dict, 1036 QPDFObjectHandle& dict,
1037 int max_num_entries, 1037 int max_num_entries,
1038 std::function<QPDFExc(std::string_view)> damaged); 1038 std::function<QPDFExc(std::string_view)> damaged);
libqpdf/QPDF.cc
@@ -2,6 +2,7 @@ @@ -2,6 +2,7 @@
2 2
3 #include <qpdf/QPDF.hh> 3 #include <qpdf/QPDF.hh>
4 4
  5 +#include <array>
5 #include <atomic> 6 #include <atomic>
6 #include <cstring> 7 #include <cstring>
7 #include <limits> 8 #include <limits>
@@ -1023,57 +1024,66 @@ QPDF::processXRefSize( @@ -1023,57 +1024,66 @@ QPDF::processXRefSize(
1023 } 1024 }
1024 1025
1025 // Return the number of entries of the xref stream and the processed Index array. 1026 // Return the number of entries of the xref stream and the processed Index array.
1026 -std::pair<int, std::vector<long long>> 1027 +std::pair<int, std::vector<std::pair<int, int>>>
1027 QPDF::processXRefIndex( 1028 QPDF::processXRefIndex(
1028 QPDFObjectHandle& dict, int max_num_entries, std::function<QPDFExc(std::string_view)> damaged) 1029 QPDFObjectHandle& dict, int max_num_entries, std::function<QPDFExc(std::string_view)> damaged)
1029 { 1030 {
1030 auto size = dict.getKey("/Size").getIntValueAsInt(); 1031 auto size = dict.getKey("/Size").getIntValueAsInt();
1031 auto Index_obj = dict.getKey("/Index"); 1032 auto Index_obj = dict.getKey("/Index");
1032 - if (!(Index_obj.isArray() || Index_obj.isNull())) {  
1033 - throw damaged("Cross-reference stream does not have a proper /Index key");  
1034 - }  
1035 1033
1036 - std::vector<long long> indx;  
1037 if (Index_obj.isArray()) { 1034 if (Index_obj.isArray()) {
1038 - int n_index = Index_obj.getArrayNItems();  
1039 - if ((n_index % 2) || (n_index < 2)) { 1035 + std::vector<std::pair<int, int>> indx;
  1036 + int num_entries = 0;
  1037 + auto index_vec = Index_obj.getArrayAsVector();
  1038 + if ((index_vec.size() % 2) || index_vec.size() < 2) {
1040 throw damaged("Cross-reference stream's /Index has an invalid number of values"); 1039 throw damaged("Cross-reference stream's /Index has an invalid number of values");
1041 } 1040 }
1042 - for (int i = 0; i < n_index; ++i) {  
1043 - if (Index_obj.getArrayItem(i).isInteger()) {  
1044 - indx.push_back(Index_obj.getArrayItem(i).getIntValue()); 1041 +
  1042 + int i = 0;
  1043 + long long first = 0;
  1044 + for (auto& val: index_vec) {
  1045 + if (val.isInteger()) {
  1046 + if (i % 2) {
  1047 + auto count = val.getIntValue();
  1048 + // We are guarding against the possibility of num_entries * entry_size
  1049 + // overflowing. We are not checking that entries are in ascending order as
  1050 + // required by the spec, which probably should generate a warning. We are also
  1051 + // not checking that for each subsection first object number + number of entries
  1052 + // <= /Size. The spec requires us to ignore object number > /Size.
  1053 + if (first > (max_num_entries - count) ||
  1054 + count > (max_num_entries - num_entries)) {
  1055 + throw damaged(
  1056 + "Cross-reference stream claims to contain too many entries: " +
  1057 + std::to_string(first) + " " + std::to_string(max_num_entries) + " " +
  1058 + std::to_string(num_entries));
  1059 + }
  1060 + indx.emplace_back(static_cast<int>(first), static_cast<int>(count));
  1061 + num_entries += static_cast<int>(count);
  1062 + } else {
  1063 + first = val.getIntValue();
  1064 + if (first < 0) {
  1065 + throw damaged(
  1066 + "Cross-reference stream's /Index contains a negative object id");
  1067 + } else if (first > max_num_entries) {
  1068 + throw damaged("Cross-reference stream's /Index contains an impossibly "
  1069 + "large object id");
  1070 + }
  1071 + }
1045 } else { 1072 } else {
1046 throw damaged( 1073 throw damaged(
1047 "Cross-reference stream's /Index's item " + std::to_string(i) + 1074 "Cross-reference stream's /Index's item " + std::to_string(i) +
1048 " is not an integer"); 1075 " is not an integer");
1049 } 1076 }
  1077 + i++;
1050 } 1078 }
1051 - QTC::TC("qpdf", "QPDF xref /Index is array", n_index == 2 ? 0 : 1);  
1052 - } else {  
1053 - // We have already validated the /Index key. 1079 + QTC::TC("qpdf", "QPDF xref /Index is array", index_vec.size() == 2 ? 0 : 1);
  1080 + return {num_entries, indx};
  1081 + } else if (Index_obj.isNull()) {
1054 QTC::TC("qpdf", "QPDF xref /Index is null"); 1082 QTC::TC("qpdf", "QPDF xref /Index is null");
1055 - indx.push_back(0);  
1056 - indx.push_back(size);  
1057 - }  
1058 -  
1059 - int num_entries = 0;  
1060 - for (size_t i = 1; i < indx.size(); i += 2) {  
1061 - // We are guarding against the possibility of num_entries * entry_size overflowing.  
1062 - // We are not checking that entries are in ascending order as required by the spec, which  
1063 - // probably should generate a warning. We are also not checking that for each subsection  
1064 - // first object number + number of entries <= /Size. The spec requires us to ignore object  
1065 - // number > /Size.  
1066 - if (indx.at(i) > (max_num_entries - num_entries)) {  
1067 - throw damaged(  
1068 - "Cross-reference stream claims to contain too many entries: " +  
1069 - std::to_string(indx.at(i)) + " " + std::to_string(max_num_entries) + " " +  
1070 - std::to_string(num_entries));  
1071 - }  
1072 - num_entries += indx.at(i); 1083 + return {size, {{0, size}}};
  1084 + } else {
  1085 + throw damaged("Cross-reference stream does not have a proper /Index key");
1073 } 1086 }
1074 - // entry_size and num_entries have both been validated to ensure that this multiplication does  
1075 - // not cause an overflow.  
1076 - return {num_entries, indx};  
1077 } 1087 }
1078 1088
1079 qpdf_offset_t 1089 qpdf_offset_t
@@ -1109,27 +1119,16 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj) @@ -1109,27 +1119,16 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj)
1109 // Actual size vs. expected size check above ensures that we will not overflow any buffers here. 1119 // Actual size vs. expected size check above ensures that we will not overflow any buffers here.
1110 // We know that entry_size * num_entries is less or equal to the size of the buffer. 1120 // We know that entry_size * num_entries is less or equal to the size of the buffer.
1111 auto p = bp->getBuffer(); 1121 auto p = bp->getBuffer();
1112 - for (auto iter = indx.cbegin(); iter < indx.cend(); ++iter) { 1122 + for (auto [obj, sec_entries]: indx) {
1113 // Process a subsection. 1123 // Process a subsection.
1114 - // Get the object number. The object number is based on /Index.  
1115 - int obj = toI(*iter++);  
1116 - size_t sec_entries = toS(*iter);  
1117 - for (size_t i = 0; i < sec_entries; ++i) {  
1118 - if (obj < 0 || obj >= (std::numeric_limits<int>::max() - 1)) {  
1119 - std::ostringstream msg;  
1120 - msg.imbue(std::locale::classic());  
1121 - msg << "adding 1 to " << obj  
1122 - << " while computing index in xref stream would cause an integer overflow";  
1123 - throw std::range_error(msg.str());  
1124 - } 1124 + for (int i = 0; i < sec_entries; ++i) {
1125 // Read this entry 1125 // Read this entry
1126 - std::array<qpdf_offset_t, 3> fields; 1126 + std::array<qpdf_offset_t, 3> fields{};
  1127 + if (W[0] == 0) {
  1128 + QTC::TC("qpdf", "QPDF default for xref stream field 0");
  1129 + fields[0] = 1;
  1130 + }
1127 for (size_t j = 0; j < 3; ++j) { 1131 for (size_t j = 0; j < 3; ++j) {
1128 - fields[j] = 0;  
1129 - if ((j == 0) && (W[0] == 0)) {  
1130 - QTC::TC("qpdf", "QPDF default for xref stream field 0");  
1131 - fields[0] = 1;  
1132 - }  
1133 for (int k = 0; k < W[j]; ++k) { 1132 for (int k = 0; k < W[j]; ++k) {
1134 fields[j] <<= 8; 1133 fields[j] <<= 8;
1135 fields[j] |= *p++; 1134 fields[j] |= *p++;
@@ -1170,12 +1169,10 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj) @@ -1170,12 +1169,10 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj)
1170 "xref stream", "/Prev key in xref stream dictionary is not an integer"); 1169 "xref stream", "/Prev key in xref stream dictionary is not an integer");
1171 } 1170 }
1172 QTC::TC("qpdf", "QPDF prev key in xref stream dictionary"); 1171 QTC::TC("qpdf", "QPDF prev key in xref stream dictionary");
1173 - xref_offset = dict.getKey("/Prev").getIntValue(); 1172 + return dict.getKey("/Prev").getIntValue();
1174 } else { 1173 } else {
1175 - xref_offset = 0; 1174 + return 0;
1176 } 1175 }
1177 -  
1178 - return xref_offset;  
1179 } 1176 }
1180 1177
1181 void 1178 void