Commit 1737902a5e5958d652d7323276b154986cc48180

Authored by m-holger
1 parent f1c774f1

Refactor QPDF::processXRefStream

Tune processing of subsections.
Showing 1 changed file with 44 additions and 48 deletions
libqpdf/QPDF.cc
@@ -1071,63 +1071,59 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) @@ -1071,63 +1071,59 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
1071 } 1071 }
1072 } 1072 }
1073 1073
1074 - size_t cur_chunk = 0;  
1075 - int chunk_count = 0;  
1076 -  
1077 bool saw_first_compressed_object = false; 1074 bool saw_first_compressed_object = false;
1078 1075
1079 // Actual size vs. expected size check above ensures that we will not overflow any buffers here. 1076 // Actual size vs. expected size check above ensures that we will not overflow any buffers here.
1080 // We know that entry_size * num_entries is less or equal to the size of the buffer. 1077 // We know that entry_size * num_entries is less or equal to the size of the buffer.
1081 auto p = bp->getBuffer(); 1078 auto p = bp->getBuffer();
1082 - for (size_t i = 0; i < num_entries; ++i) {  
1083 - // Read this entry  
1084 - qpdf_offset_t fields[3];  
1085 - for (int j = 0; j < 3; ++j) {  
1086 - fields[j] = 0;  
1087 - if ((j == 0) && (W[0] == 0)) {  
1088 - QTC::TC("qpdf", "QPDF default for xref stream field 0");  
1089 - fields[0] = 1; 1079 + for (auto iter = indx.cbegin(); iter < indx.cend(); ++iter) {
  1080 + // Process a subsection.
  1081 + // Get the object number. The object number is based on /Index.
  1082 + int obj = toI(*iter++);
  1083 + size_t sec_entries = toS(*iter);
  1084 + for (size_t i = 0; i < sec_entries; ++i) {
  1085 + if (obj < 0 || obj >= (std::numeric_limits<int>::max() - 1)) {
  1086 + std::ostringstream msg;
  1087 + msg.imbue(std::locale::classic());
  1088 + msg << "adding 1 to " << obj
  1089 + << " while computing index in xref stream would cause an integer overflow";
  1090 + throw std::range_error(msg.str());
1090 } 1091 }
1091 - for (int k = 0; k < W[j]; ++k) {  
1092 - fields[j] <<= 8;  
1093 - fields[j] |= *p++; 1092 + // Read this entry
  1093 + qpdf_offset_t fields[3];
  1094 + for (int j = 0; j < 3; ++j) {
  1095 + fields[j] = 0;
  1096 + if ((j == 0) && (W[0] == 0)) {
  1097 + QTC::TC("qpdf", "QPDF default for xref stream field 0");
  1098 + fields[0] = 1;
  1099 + }
  1100 + for (int k = 0; k < W[j]; ++k) {
  1101 + fields[j] <<= 8;
  1102 + fields[j] |= *p++;
  1103 + }
1094 } 1104 }
1095 - }  
1096 -  
1097 - // Get the object and generation number. The object number is based on /Index. The  
1098 - // generation number is 0 unless this is an uncompressed object record, in which case the  
1099 - // generation number appears as the third field.  
1100 - int obj = toI(indx.at(cur_chunk));  
1101 - if ((obj < 0) || ((std::numeric_limits<int>::max() - obj) < chunk_count)) {  
1102 - std::ostringstream msg;  
1103 - msg.imbue(std::locale::classic());  
1104 - msg << "adding " << chunk_count << " to " << obj  
1105 - << " while computing index in xref stream would cause an integer overflow";  
1106 - throw std::range_error(msg.str());  
1107 - }  
1108 - obj += chunk_count;  
1109 - ++chunk_count;  
1110 - if (chunk_count >= indx.at(cur_chunk + 1)) {  
1111 - cur_chunk += 2;  
1112 - chunk_count = 0;  
1113 - }  
1114 1105
1115 - if (saw_first_compressed_object) {  
1116 - if (fields[0] != 2) {  
1117 - m->uncompressed_after_compressed = true; 1106 + // Get the generation number. The generation number is 0 unless this is an uncompressed
  1107 + // object record, in which case the generation number appears as the third field.
  1108 + if (saw_first_compressed_object) {
  1109 + if (fields[0] != 2) {
  1110 + m->uncompressed_after_compressed = true;
  1111 + }
  1112 + } else if (fields[0] == 2) {
  1113 + saw_first_compressed_object = true;
1118 } 1114 }
1119 - } else if (fields[0] == 2) {  
1120 - saw_first_compressed_object = true;  
1121 - }  
1122 - if (obj == 0) {  
1123 - // This is needed by checkLinearization()  
1124 - m->first_xref_item_offset = xref_offset;  
1125 - } else if (fields[0] == 0) {  
1126 - // Ignore fields[2], which we don't care about in this case. This works around the issue  
1127 - // of some PDF files that put invalid values, like -1, here for deleted objects.  
1128 - insertFreeXrefEntry(QPDFObjGen(obj, 0));  
1129 - } else {  
1130 - insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2])); 1115 + if (obj == 0) {
  1116 + // This is needed by checkLinearization()
  1117 + m->first_xref_item_offset = xref_offset;
  1118 + } else if (fields[0] == 0) {
  1119 + // Ignore fields[2], which we don't care about in this case. This works around the
  1120 + // issue of some PDF files that put invalid values, like -1, here for deleted
  1121 + // objects.
  1122 + insertFreeXrefEntry(QPDFObjGen(obj, 0));
  1123 + } else {
  1124 + insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2]));
  1125 + }
  1126 + ++obj;
1131 } 1127 }
1132 } 1128 }
1133 1129