Commit 2b0c2da720d6c5d60fef046d247fc88981442501

Authored by m-holger
1 parent 7477ea78

Refactor QPDF::processXRefStream

Change the processed Index array to a vector of <first object, number of
entries> pairs.
include/qpdf/QPDF.hh
... ... @@ -1032,7 +1032,7 @@ class QPDF
1032 1032 processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged);
1033 1033 int processXRefSize(
1034 1034 QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged);
1035   - std::pair<int, std::vector<long long>> processXRefIndex(
  1035 + std::pair<int, std::vector<std::pair<int, int>>> processXRefIndex(
1036 1036 QPDFObjectHandle& dict,
1037 1037 int max_num_entries,
1038 1038 std::function<QPDFExc(std::string_view)> damaged);
... ...
libqpdf/QPDF.cc
... ... @@ -2,6 +2,7 @@
2 2  
3 3 #include <qpdf/QPDF.hh>
4 4  
  5 +#include <array>
5 6 #include <atomic>
6 7 #include <cstring>
7 8 #include <limits>
... ... @@ -1023,57 +1024,66 @@ QPDF::processXRefSize(
1023 1024 }
1024 1025  
1025 1026 // Return the number of entries of the xref stream and the processed Index array.
1026   -std::pair<int, std::vector<long long>>
  1027 +std::pair<int, std::vector<std::pair<int, int>>>
1027 1028 QPDF::processXRefIndex(
1028 1029 QPDFObjectHandle& dict, int max_num_entries, std::function<QPDFExc(std::string_view)> damaged)
1029 1030 {
1030 1031 auto size = dict.getKey("/Size").getIntValueAsInt();
1031 1032 auto Index_obj = dict.getKey("/Index");
1032   - if (!(Index_obj.isArray() || Index_obj.isNull())) {
1033   - throw damaged("Cross-reference stream does not have a proper /Index key");
1034   - }
1035 1033  
1036   - std::vector<long long> indx;
1037 1034 if (Index_obj.isArray()) {
1038   - int n_index = Index_obj.getArrayNItems();
1039   - if ((n_index % 2) || (n_index < 2)) {
  1035 + std::vector<std::pair<int, int>> indx;
  1036 + int num_entries = 0;
  1037 + auto index_vec = Index_obj.getArrayAsVector();
  1038 + if ((index_vec.size() % 2) || index_vec.size() < 2) {
1040 1039 throw damaged("Cross-reference stream's /Index has an invalid number of values");
1041 1040 }
1042   - for (int i = 0; i < n_index; ++i) {
1043   - if (Index_obj.getArrayItem(i).isInteger()) {
1044   - indx.push_back(Index_obj.getArrayItem(i).getIntValue());
  1041 +
  1042 + int i = 0;
  1043 + long long first = 0;
  1044 + for (auto& val: index_vec) {
  1045 + if (val.isInteger()) {
  1046 + if (i % 2) {
  1047 + auto count = val.getIntValue();
  1048 + // We are guarding against the possibility of num_entries * entry_size
  1049 + // overflowing. We are not checking that entries are in ascending order as
  1050 + // required by the spec, which probably should generate a warning. We are also
  1051 + // not checking that for each subsection first object number + number of entries
  1052 + // <= /Size. The spec requires us to ignore object number > /Size.
  1053 + if (first > (max_num_entries - count) ||
  1054 + count > (max_num_entries - num_entries)) {
  1055 + throw damaged(
  1056 + "Cross-reference stream claims to contain too many entries: " +
  1057 + std::to_string(first) + " " + std::to_string(max_num_entries) + " " +
  1058 + std::to_string(num_entries));
  1059 + }
  1060 + indx.emplace_back(static_cast<int>(first), static_cast<int>(count));
  1061 + num_entries += static_cast<int>(count);
  1062 + } else {
  1063 + first = val.getIntValue();
  1064 + if (first < 0) {
  1065 + throw damaged(
  1066 + "Cross-reference stream's /Index contains a negative object id");
  1067 + } else if (first > max_num_entries) {
  1068 + throw damaged("Cross-reference stream's /Index contains an impossibly "
  1069 + "large object id");
  1070 + }
  1071 + }
1045 1072 } else {
1046 1073 throw damaged(
1047 1074 "Cross-reference stream's /Index's item " + std::to_string(i) +
1048 1075 " is not an integer");
1049 1076 }
  1077 + i++;
1050 1078 }
1051   - QTC::TC("qpdf", "QPDF xref /Index is array", n_index == 2 ? 0 : 1);
1052   - } else {
1053   - // We have already validated the /Index key.
  1079 + QTC::TC("qpdf", "QPDF xref /Index is array", index_vec.size() == 2 ? 0 : 1);
  1080 + return {num_entries, indx};
  1081 + } else if (Index_obj.isNull()) {
1054 1082 QTC::TC("qpdf", "QPDF xref /Index is null");
1055   - indx.push_back(0);
1056   - indx.push_back(size);
1057   - }
1058   -
1059   - int num_entries = 0;
1060   - for (size_t i = 1; i < indx.size(); i += 2) {
1061   - // We are guarding against the possibility of num_entries * entry_size overflowing.
1062   - // We are not checking that entries are in ascending order as required by the spec, which
1063   - // probably should generate a warning. We are also not checking that for each subsection
1064   - // first object number + number of entries <= /Size. The spec requires us to ignore object
1065   - // number > /Size.
1066   - if (indx.at(i) > (max_num_entries - num_entries)) {
1067   - throw damaged(
1068   - "Cross-reference stream claims to contain too many entries: " +
1069   - std::to_string(indx.at(i)) + " " + std::to_string(max_num_entries) + " " +
1070   - std::to_string(num_entries));
1071   - }
1072   - num_entries += indx.at(i);
  1083 + return {size, {{0, size}}};
  1084 + } else {
  1085 + throw damaged("Cross-reference stream does not have a proper /Index key");
1073 1086 }
1074   - // entry_size and num_entries have both been validated to ensure that this multiplication does
1075   - // not cause an overflow.
1076   - return {num_entries, indx};
1077 1087 }
1078 1088  
1079 1089 qpdf_offset_t
... ... @@ -1109,27 +1119,16 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj)
1109 1119 // Actual size vs. expected size check above ensures that we will not overflow any buffers here.
1110 1120 // We know that entry_size * num_entries is less or equal to the size of the buffer.
1111 1121 auto p = bp->getBuffer();
1112   - for (auto iter = indx.cbegin(); iter < indx.cend(); ++iter) {
  1122 + for (auto [obj, sec_entries]: indx) {
1113 1123 // Process a subsection.
1114   - // Get the object number. The object number is based on /Index.
1115   - int obj = toI(*iter++);
1116   - size_t sec_entries = toS(*iter);
1117   - for (size_t i = 0; i < sec_entries; ++i) {
1118   - if (obj < 0 || obj >= (std::numeric_limits<int>::max() - 1)) {
1119   - std::ostringstream msg;
1120   - msg.imbue(std::locale::classic());
1121   - msg << "adding 1 to " << obj
1122   - << " while computing index in xref stream would cause an integer overflow";
1123   - throw std::range_error(msg.str());
1124   - }
  1124 + for (int i = 0; i < sec_entries; ++i) {
1125 1125 // Read this entry
1126   - std::array<qpdf_offset_t, 3> fields;
  1126 + std::array<qpdf_offset_t, 3> fields{};
  1127 + if (W[0] == 0) {
  1128 + QTC::TC("qpdf", "QPDF default for xref stream field 0");
  1129 + fields[0] = 1;
  1130 + }
1127 1131 for (size_t j = 0; j < 3; ++j) {
1128   - fields[j] = 0;
1129   - if ((j == 0) && (W[0] == 0)) {
1130   - QTC::TC("qpdf", "QPDF default for xref stream field 0");
1131   - fields[0] = 1;
1132   - }
1133 1132 for (int k = 0; k < W[j]; ++k) {
1134 1133 fields[j] <<= 8;
1135 1134 fields[j] |= *p++;
... ... @@ -1170,12 +1169,10 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj)
1170 1169 "xref stream", "/Prev key in xref stream dictionary is not an integer");
1171 1170 }
1172 1171 QTC::TC("qpdf", "QPDF prev key in xref stream dictionary");
1173   - xref_offset = dict.getKey("/Prev").getIntValue();
  1172 + return dict.getKey("/Prev").getIntValue();
1174 1173 } else {
1175   - xref_offset = 0;
  1174 + return 0;
1176 1175 }
1177   -
1178   - return xref_offset;
1179 1176 }
1180 1177  
1181 1178 void
... ...