Commit 74162a2d484b7ca6da08a1597e06eb878c0c8e31
1 parent
6fc982b7
Tune QPDFParser::parse
Replace SparseOHArray with std::vector<QPDFObjectHandle>. Part of #729
Showing
1 changed file
with
31 additions
and
34 deletions
libqpdf/QPDFParser.cc
| @@ -17,6 +17,8 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -17,6 +17,8 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 17 | // this, it will cause a logic error to be thrown from | 17 | // this, it will cause a logic error to be thrown from |
| 18 | // QPDF::inParse(). | 18 | // QPDF::inParse(). |
| 19 | 19 | ||
| 20 | + using OHVector = std::vector<QPDFObjectHandle>; | ||
| 21 | + | ||
| 20 | QPDF::ParseGuard pg(context); | 22 | QPDF::ParseGuard pg(context); |
| 21 | 23 | ||
| 22 | empty = false; | 24 | empty = false; |
| @@ -24,8 +26,8 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -24,8 +26,8 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 24 | QPDFObjectHandle object; | 26 | QPDFObjectHandle object; |
| 25 | bool set_offset = false; | 27 | bool set_offset = false; |
| 26 | 28 | ||
| 27 | - std::vector<SparseOHArray> olist_stack; | ||
| 28 | - olist_stack.push_back(SparseOHArray()); | 29 | + std::vector<OHVector> olist_stack; |
| 30 | + olist_stack.push_back(OHVector()); | ||
| 29 | std::vector<parser_state_e> state_stack; | 31 | std::vector<parser_state_e> state_stack; |
| 30 | state_stack.push_back(st_top); | 32 | state_stack.push_back(st_top); |
| 31 | std::vector<qpdf_offset_t> offset_stack; | 33 | std::vector<qpdf_offset_t> offset_stack; |
| @@ -41,7 +43,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -41,7 +43,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 41 | contents_offset_stack.push_back(-1); | 43 | contents_offset_stack.push_back(-1); |
| 42 | while (!done) { | 44 | while (!done) { |
| 43 | bool bad = false; | 45 | bool bad = false; |
| 44 | - SparseOHArray& olist = olist_stack.back(); | 46 | + auto& olist = olist_stack.back(); |
| 45 | parser_state_e state = state_stack.back(); | 47 | parser_state_e state = state_stack.back(); |
| 46 | offset = offset_stack.back(); | 48 | offset = offset_stack.back(); |
| 47 | std::string& contents_string = contents_string_stack.back(); | 49 | std::string& contents_string = contents_string_stack.back(); |
| @@ -113,7 +115,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -113,7 +115,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 113 | object = QPDFObjectHandle::newNull(); | 115 | object = QPDFObjectHandle::newNull(); |
| 114 | state = st_top; | 116 | state = st_top; |
| 115 | } else { | 117 | } else { |
| 116 | - olist_stack.push_back(SparseOHArray()); | 118 | + olist_stack.push_back(OHVector()); |
| 117 | state = st_start; | 119 | state = st_start; |
| 118 | offset_stack.push_back(input->tell()); | 120 | offset_stack.push_back(input->tell()); |
| 119 | state_stack.push_back( | 121 | state_stack.push_back( |
| @@ -159,15 +161,15 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -159,15 +161,15 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 159 | case QPDFTokenizer::tt_word: | 161 | case QPDFTokenizer::tt_word: |
| 160 | { | 162 | { |
| 161 | std::string const& value = token.getValue(); | 163 | std::string const& value = token.getValue(); |
| 164 | + auto size = olist.size(); | ||
| 162 | if (content_stream) { | 165 | if (content_stream) { |
| 163 | object = QPDFObjectHandle::newOperator(value); | 166 | object = QPDFObjectHandle::newOperator(value); |
| 164 | } else if ( | 167 | } else if ( |
| 165 | - (value == "R") && (state != st_top) && | ||
| 166 | - (olist.size() >= 2) && | ||
| 167 | - (!olist.at(olist.size() - 1).isIndirect()) && | ||
| 168 | - (olist.at(olist.size() - 1).isInteger()) && | ||
| 169 | - (!olist.at(olist.size() - 2).isIndirect()) && | ||
| 170 | - (olist.at(olist.size() - 2).isInteger())) { | 168 | + (value == "R") && (state != st_top) && (size >= 2) && |
| 169 | + (!olist.back().isIndirect()) && | ||
| 170 | + (olist.back().isInteger()) && | ||
| 171 | + (!olist.at(size - 2).isIndirect()) && | ||
| 172 | + (olist.at(size - 2).isInteger())) { | ||
| 171 | if (context == nullptr) { | 173 | if (context == nullptr) { |
| 172 | QTC::TC("qpdf", "QPDFParser indirect without context"); | 174 | QTC::TC("qpdf", "QPDFParser indirect without context"); |
| 173 | throw std::logic_error( | 175 | throw std::logic_error( |
| @@ -178,10 +180,10 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -178,10 +180,10 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 178 | object = QPDFObjectHandle::newIndirect( | 180 | object = QPDFObjectHandle::newIndirect( |
| 179 | context, | 181 | context, |
| 180 | QPDFObjGen( | 182 | QPDFObjGen( |
| 181 | - olist.at(olist.size() - 2).getIntValueAsInt(), | ||
| 182 | - olist.at(olist.size() - 1).getIntValueAsInt())); | ||
| 183 | - olist.remove_last(); | ||
| 184 | - olist.remove_last(); | 183 | + olist.at(size - 2).getIntValueAsInt(), |
| 184 | + olist.back().getIntValueAsInt())); | ||
| 185 | + olist.pop_back(); | ||
| 186 | + olist.pop_back(); | ||
| 185 | } else if ((value == "endobj") && (state == st_top)) { | 187 | } else if ((value == "endobj") && (state == st_top)) { |
| 186 | // We just saw endobj without having read | 188 | // We just saw endobj without having read |
| 187 | // anything. Treat this as a null and do not move | 189 | // anything. Treat this as a null and do not move |
| @@ -266,7 +268,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -266,7 +268,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 266 | setDescriptionFromInput(object, input->getLastOffset()); | 268 | setDescriptionFromInput(object, input->getLastOffset()); |
| 267 | object.setParsedOffset(input->getLastOffset()); | 269 | object.setParsedOffset(input->getLastOffset()); |
| 268 | set_offset = true; | 270 | set_offset = true; |
| 269 | - olist.append(object); | 271 | + olist.push_back(object); |
| 270 | break; | 272 | break; |
| 271 | 273 | ||
| 272 | case st_top: | 274 | case st_top: |
| @@ -285,22 +287,18 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -285,22 +287,18 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 285 | parser_state_e old_state = state_stack.back(); | 287 | parser_state_e old_state = state_stack.back(); |
| 286 | state_stack.pop_back(); | 288 | state_stack.pop_back(); |
| 287 | if (old_state == st_array) { | 289 | if (old_state == st_array) { |
| 288 | - // There's no newArray(SparseOHArray) since | ||
| 289 | - // SparseOHArray is not part of the public API. | ||
| 290 | - object = QPDFObjectHandle(QPDF_Array::create(olist)); | 290 | + object = QPDFObjectHandle::newArray(olist); |
| 291 | setDescriptionFromInput(object, offset); | 291 | setDescriptionFromInput(object, offset); |
| 292 | - // The `offset` points to the next of "[". Set the | ||
| 293 | - // rewind offset to point to the beginning of "[". | ||
| 294 | - // This has been explicitly tested with whitespace | ||
| 295 | - // surrounding the array start delimiter. | ||
| 296 | - // getLastOffset points to the array end token and | 292 | + // The `offset` points to the next of "[". Set the rewind |
| 293 | + // offset to point to the beginning of "[". This has been | ||
| 294 | + // explicitly tested with whitespace surrounding the array start | ||
| 295 | + // delimiter. getLastOffset points to the array end token and | ||
| 297 | // therefore can't be used here. | 296 | // therefore can't be used here. |
| 298 | object.setParsedOffset(offset - 1); | 297 | object.setParsedOffset(offset - 1); |
| 299 | set_offset = true; | 298 | set_offset = true; |
| 300 | } else if (old_state == st_dictionary) { | 299 | } else if (old_state == st_dictionary) { |
| 301 | - // Convert list to map. Alternating elements are keys. | ||
| 302 | - // Attempt to recover more or less gracefully from | ||
| 303 | - // invalid dictionaries. | 300 | + // Convert list to map. Alternating elements are keys. Attempt |
| 301 | + // to recover more or less gracefully from invalid dictionaries. | ||
| 304 | std::set<std::string> names; | 302 | std::set<std::string> names; |
| 305 | size_t n_elements = olist.size(); | 303 | size_t n_elements = olist.size(); |
| 306 | for (size_t i = 0; i < n_elements; ++i) { | 304 | for (size_t i = 0; i < n_elements; ++i) { |
| @@ -312,7 +310,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -312,7 +310,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 312 | 310 | ||
| 313 | std::map<std::string, QPDFObjectHandle> dict; | 311 | std::map<std::string, QPDFObjectHandle> dict; |
| 314 | int next_fake_key = 1; | 312 | int next_fake_key = 1; |
| 315 | - for (unsigned int i = 0; i < olist.size(); ++i) { | 313 | + for (unsigned int i = 0; i < n_elements; ++i) { |
| 316 | QPDFObjectHandle key_obj = olist.at(i); | 314 | QPDFObjectHandle key_obj = olist.at(i); |
| 317 | QPDFObjectHandle val; | 315 | QPDFObjectHandle val; |
| 318 | if (key_obj.isIndirect() || (!key_obj.isName())) { | 316 | if (key_obj.isIndirect() || (!key_obj.isName())) { |
| @@ -366,12 +364,11 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -366,12 +364,11 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 366 | } | 364 | } |
| 367 | object = QPDFObjectHandle::newDictionary(dict); | 365 | object = QPDFObjectHandle::newDictionary(dict); |
| 368 | setDescriptionFromInput(object, offset); | 366 | setDescriptionFromInput(object, offset); |
| 369 | - // The `offset` points to the next of "<<". Set the | ||
| 370 | - // rewind offset to point to the beginning of "<<". | ||
| 371 | - // This has been explicitly tested with whitespace | ||
| 372 | - // surrounding the dictionary start delimiter. | ||
| 373 | - // getLastOffset points to the dictionary end token | ||
| 374 | - // and therefore can't be used here. | 367 | + // The `offset` points to the next of "<<". Set the rewind |
| 368 | + // offset to point to the beginning of "<<". This has been | ||
| 369 | + // explicitly tested with whitespace surrounding the dictionary | ||
| 370 | + // start delimiter. getLastOffset points to the dictionary end | ||
| 371 | + // token and therefore can't be used here. | ||
| 375 | object.setParsedOffset(offset - 2); | 372 | object.setParsedOffset(offset - 2); |
| 376 | set_offset = true; | 373 | set_offset = true; |
| 377 | } | 374 | } |
| @@ -380,7 +377,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -380,7 +377,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 380 | if (state_stack.back() == st_top) { | 377 | if (state_stack.back() == st_top) { |
| 381 | done = true; | 378 | done = true; |
| 382 | } else { | 379 | } else { |
| 383 | - olist_stack.back().append(object); | 380 | + olist_stack.back().push_back(object); |
| 384 | } | 381 | } |
| 385 | contents_string_stack.pop_back(); | 382 | contents_string_stack.pop_back(); |
| 386 | contents_offset_stack.pop_back(); | 383 | contents_offset_stack.pop_back(); |