Commit 74162a2d484b7ca6da08a1597e06eb878c0c8e31

Authored by m-holger
1 parent 6fc982b7

Tune QPDFParser::parse

Replace SparseOHArray with std::vector<QPDFObjectHandle>.

Part of #729
Showing 1 changed file with 31 additions and 34 deletions
libqpdf/QPDFParser.cc
@@ -17,6 +17,8 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -17,6 +17,8 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
17 // this, it will cause a logic error to be thrown from 17 // this, it will cause a logic error to be thrown from
18 // QPDF::inParse(). 18 // QPDF::inParse().
19 19
  20 + using OHVector = std::vector<QPDFObjectHandle>;
  21 +
20 QPDF::ParseGuard pg(context); 22 QPDF::ParseGuard pg(context);
21 23
22 empty = false; 24 empty = false;
@@ -24,8 +26,8 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -24,8 +26,8 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
24 QPDFObjectHandle object; 26 QPDFObjectHandle object;
25 bool set_offset = false; 27 bool set_offset = false;
26 28
27 - std::vector<SparseOHArray> olist_stack;  
28 - olist_stack.push_back(SparseOHArray()); 29 + std::vector<OHVector> olist_stack;
  30 + olist_stack.push_back(OHVector());
29 std::vector<parser_state_e> state_stack; 31 std::vector<parser_state_e> state_stack;
30 state_stack.push_back(st_top); 32 state_stack.push_back(st_top);
31 std::vector<qpdf_offset_t> offset_stack; 33 std::vector<qpdf_offset_t> offset_stack;
@@ -41,7 +43,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -41,7 +43,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
41 contents_offset_stack.push_back(-1); 43 contents_offset_stack.push_back(-1);
42 while (!done) { 44 while (!done) {
43 bool bad = false; 45 bool bad = false;
44 - SparseOHArray& olist = olist_stack.back(); 46 + auto& olist = olist_stack.back();
45 parser_state_e state = state_stack.back(); 47 parser_state_e state = state_stack.back();
46 offset = offset_stack.back(); 48 offset = offset_stack.back();
47 std::string& contents_string = contents_string_stack.back(); 49 std::string& contents_string = contents_string_stack.back();
@@ -113,7 +115,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -113,7 +115,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
113 object = QPDFObjectHandle::newNull(); 115 object = QPDFObjectHandle::newNull();
114 state = st_top; 116 state = st_top;
115 } else { 117 } else {
116 - olist_stack.push_back(SparseOHArray()); 118 + olist_stack.push_back(OHVector());
117 state = st_start; 119 state = st_start;
118 offset_stack.push_back(input->tell()); 120 offset_stack.push_back(input->tell());
119 state_stack.push_back( 121 state_stack.push_back(
@@ -159,15 +161,15 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -159,15 +161,15 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
159 case QPDFTokenizer::tt_word: 161 case QPDFTokenizer::tt_word:
160 { 162 {
161 std::string const& value = token.getValue(); 163 std::string const& value = token.getValue();
  164 + auto size = olist.size();
162 if (content_stream) { 165 if (content_stream) {
163 object = QPDFObjectHandle::newOperator(value); 166 object = QPDFObjectHandle::newOperator(value);
164 } else if ( 167 } else if (
165 - (value == "R") && (state != st_top) &&  
166 - (olist.size() >= 2) &&  
167 - (!olist.at(olist.size() - 1).isIndirect()) &&  
168 - (olist.at(olist.size() - 1).isInteger()) &&  
169 - (!olist.at(olist.size() - 2).isIndirect()) &&  
170 - (olist.at(olist.size() - 2).isInteger())) { 168 + (value == "R") && (state != st_top) && (size >= 2) &&
  169 + (!olist.back().isIndirect()) &&
  170 + (olist.back().isInteger()) &&
  171 + (!olist.at(size - 2).isIndirect()) &&
  172 + (olist.at(size - 2).isInteger())) {
171 if (context == nullptr) { 173 if (context == nullptr) {
172 QTC::TC("qpdf", "QPDFParser indirect without context"); 174 QTC::TC("qpdf", "QPDFParser indirect without context");
173 throw std::logic_error( 175 throw std::logic_error(
@@ -178,10 +180,10 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -178,10 +180,10 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
178 object = QPDFObjectHandle::newIndirect( 180 object = QPDFObjectHandle::newIndirect(
179 context, 181 context,
180 QPDFObjGen( 182 QPDFObjGen(
181 - olist.at(olist.size() - 2).getIntValueAsInt(),  
182 - olist.at(olist.size() - 1).getIntValueAsInt()));  
183 - olist.remove_last();  
184 - olist.remove_last(); 183 + olist.at(size - 2).getIntValueAsInt(),
  184 + olist.back().getIntValueAsInt()));
  185 + olist.pop_back();
  186 + olist.pop_back();
185 } else if ((value == "endobj") && (state == st_top)) { 187 } else if ((value == "endobj") && (state == st_top)) {
186 // We just saw endobj without having read 188 // We just saw endobj without having read
187 // anything. Treat this as a null and do not move 189 // anything. Treat this as a null and do not move
@@ -266,7 +268,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -266,7 +268,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
266 setDescriptionFromInput(object, input->getLastOffset()); 268 setDescriptionFromInput(object, input->getLastOffset());
267 object.setParsedOffset(input->getLastOffset()); 269 object.setParsedOffset(input->getLastOffset());
268 set_offset = true; 270 set_offset = true;
269 - olist.append(object); 271 + olist.push_back(object);
270 break; 272 break;
271 273
272 case st_top: 274 case st_top:
@@ -285,22 +287,18 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -285,22 +287,18 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
285 parser_state_e old_state = state_stack.back(); 287 parser_state_e old_state = state_stack.back();
286 state_stack.pop_back(); 288 state_stack.pop_back();
287 if (old_state == st_array) { 289 if (old_state == st_array) {
288 - // There's no newArray(SparseOHArray) since  
289 - // SparseOHArray is not part of the public API.  
290 - object = QPDFObjectHandle(QPDF_Array::create(olist)); 290 + object = QPDFObjectHandle::newArray(olist);
291 setDescriptionFromInput(object, offset); 291 setDescriptionFromInput(object, offset);
292 - // The `offset` points to the next of "[". Set the  
293 - // rewind offset to point to the beginning of "[".  
294 - // This has been explicitly tested with whitespace  
295 - // surrounding the array start delimiter.  
296 - // getLastOffset points to the array end token and 292 + // The `offset` points to the next of "[". Set the rewind
  293 + // offset to point to the beginning of "[". This has been
  294 + // explicitly tested with whitespace surrounding the array start
  295 + // delimiter. getLastOffset points to the array end token and
297 // therefore can't be used here. 296 // therefore can't be used here.
298 object.setParsedOffset(offset - 1); 297 object.setParsedOffset(offset - 1);
299 set_offset = true; 298 set_offset = true;
300 } else if (old_state == st_dictionary) { 299 } else if (old_state == st_dictionary) {
301 - // Convert list to map. Alternating elements are keys.  
302 - // Attempt to recover more or less gracefully from  
303 - // invalid dictionaries. 300 + // Convert list to map. Alternating elements are keys. Attempt
  301 + // to recover more or less gracefully from invalid dictionaries.
304 std::set<std::string> names; 302 std::set<std::string> names;
305 size_t n_elements = olist.size(); 303 size_t n_elements = olist.size();
306 for (size_t i = 0; i < n_elements; ++i) { 304 for (size_t i = 0; i < n_elements; ++i) {
@@ -312,7 +310,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -312,7 +310,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
312 310
313 std::map<std::string, QPDFObjectHandle> dict; 311 std::map<std::string, QPDFObjectHandle> dict;
314 int next_fake_key = 1; 312 int next_fake_key = 1;
315 - for (unsigned int i = 0; i < olist.size(); ++i) { 313 + for (unsigned int i = 0; i < n_elements; ++i) {
316 QPDFObjectHandle key_obj = olist.at(i); 314 QPDFObjectHandle key_obj = olist.at(i);
317 QPDFObjectHandle val; 315 QPDFObjectHandle val;
318 if (key_obj.isIndirect() || (!key_obj.isName())) { 316 if (key_obj.isIndirect() || (!key_obj.isName())) {
@@ -366,12 +364,11 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -366,12 +364,11 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
366 } 364 }
367 object = QPDFObjectHandle::newDictionary(dict); 365 object = QPDFObjectHandle::newDictionary(dict);
368 setDescriptionFromInput(object, offset); 366 setDescriptionFromInput(object, offset);
369 - // The `offset` points to the next of "<<". Set the  
370 - // rewind offset to point to the beginning of "<<".  
371 - // This has been explicitly tested with whitespace  
372 - // surrounding the dictionary start delimiter.  
373 - // getLastOffset points to the dictionary end token  
374 - // and therefore can't be used here. 367 + // The `offset` points to the next of "<<". Set the rewind
  368 + // offset to point to the beginning of "<<". This has been
  369 + // explicitly tested with whitespace surrounding the dictionary
  370 + // start delimiter. getLastOffset points to the dictionary end
  371 + // token and therefore can't be used here.
375 object.setParsedOffset(offset - 2); 372 object.setParsedOffset(offset - 2);
376 set_offset = true; 373 set_offset = true;
377 } 374 }
@@ -380,7 +377,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -380,7 +377,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
380 if (state_stack.back() == st_top) { 377 if (state_stack.back() == st_top) {
381 done = true; 378 done = true;
382 } else { 379 } else {
383 - olist_stack.back().append(object); 380 + olist_stack.back().push_back(object);
384 } 381 }
385 contents_string_stack.pop_back(); 382 contents_string_stack.pop_back();
386 contents_offset_stack.pop_back(); 383 contents_offset_stack.pop_back();