Commit 74162a2d484b7ca6da08a1597e06eb878c0c8e31

Authored by m-holger
1 parent 6fc982b7

Tune QPDFParser::parse

Replace SparseOHArray with std::vector<QPDFObjectHandle>.

Part of #729
Showing 1 changed file with 31 additions and 34 deletions
libqpdf/QPDFParser.cc
... ... @@ -17,6 +17,8 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
17 17 // this, it will cause a logic error to be thrown from
18 18 // QPDF::inParse().
19 19  
  20 + using OHVector = std::vector<QPDFObjectHandle>;
  21 +
20 22 QPDF::ParseGuard pg(context);
21 23  
22 24 empty = false;
... ... @@ -24,8 +26,8 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
24 26 QPDFObjectHandle object;
25 27 bool set_offset = false;
26 28  
27   - std::vector<SparseOHArray> olist_stack;
28   - olist_stack.push_back(SparseOHArray());
  29 + std::vector<OHVector> olist_stack;
  30 + olist_stack.push_back(OHVector());
29 31 std::vector<parser_state_e> state_stack;
30 32 state_stack.push_back(st_top);
31 33 std::vector<qpdf_offset_t> offset_stack;
... ... @@ -41,7 +43,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
41 43 contents_offset_stack.push_back(-1);
42 44 while (!done) {
43 45 bool bad = false;
44   - SparseOHArray& olist = olist_stack.back();
  46 + auto& olist = olist_stack.back();
45 47 parser_state_e state = state_stack.back();
46 48 offset = offset_stack.back();
47 49 std::string& contents_string = contents_string_stack.back();
... ... @@ -113,7 +115,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
113 115 object = QPDFObjectHandle::newNull();
114 116 state = st_top;
115 117 } else {
116   - olist_stack.push_back(SparseOHArray());
  118 + olist_stack.push_back(OHVector());
117 119 state = st_start;
118 120 offset_stack.push_back(input->tell());
119 121 state_stack.push_back(
... ... @@ -159,15 +161,15 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
159 161 case QPDFTokenizer::tt_word:
160 162 {
161 163 std::string const& value = token.getValue();
  164 + auto size = olist.size();
162 165 if (content_stream) {
163 166 object = QPDFObjectHandle::newOperator(value);
164 167 } else if (
165   - (value == "R") && (state != st_top) &&
166   - (olist.size() >= 2) &&
167   - (!olist.at(olist.size() - 1).isIndirect()) &&
168   - (olist.at(olist.size() - 1).isInteger()) &&
169   - (!olist.at(olist.size() - 2).isIndirect()) &&
170   - (olist.at(olist.size() - 2).isInteger())) {
  168 + (value == "R") && (state != st_top) && (size >= 2) &&
  169 + (!olist.back().isIndirect()) &&
  170 + (olist.back().isInteger()) &&
  171 + (!olist.at(size - 2).isIndirect()) &&
  172 + (olist.at(size - 2).isInteger())) {
171 173 if (context == nullptr) {
172 174 QTC::TC("qpdf", "QPDFParser indirect without context");
173 175 throw std::logic_error(
... ... @@ -178,10 +180,10 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
178 180 object = QPDFObjectHandle::newIndirect(
179 181 context,
180 182 QPDFObjGen(
181   - olist.at(olist.size() - 2).getIntValueAsInt(),
182   - olist.at(olist.size() - 1).getIntValueAsInt()));
183   - olist.remove_last();
184   - olist.remove_last();
  183 + olist.at(size - 2).getIntValueAsInt(),
  184 + olist.back().getIntValueAsInt()));
  185 + olist.pop_back();
  186 + olist.pop_back();
185 187 } else if ((value == "endobj") && (state == st_top)) {
186 188 // We just saw endobj without having read
187 189 // anything. Treat this as a null and do not move
... ... @@ -266,7 +268,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
266 268 setDescriptionFromInput(object, input->getLastOffset());
267 269 object.setParsedOffset(input->getLastOffset());
268 270 set_offset = true;
269   - olist.append(object);
  271 + olist.push_back(object);
270 272 break;
271 273  
272 274 case st_top:
... ... @@ -285,22 +287,18 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
285 287 parser_state_e old_state = state_stack.back();
286 288 state_stack.pop_back();
287 289 if (old_state == st_array) {
288   - // There's no newArray(SparseOHArray) since
289   - // SparseOHArray is not part of the public API.
290   - object = QPDFObjectHandle(QPDF_Array::create(olist));
  290 + object = QPDFObjectHandle::newArray(olist);
291 291 setDescriptionFromInput(object, offset);
292   - // The `offset` points to the next of "[". Set the
293   - // rewind offset to point to the beginning of "[".
294   - // This has been explicitly tested with whitespace
295   - // surrounding the array start delimiter.
296   - // getLastOffset points to the array end token and
  292 + // The `offset` points to the next of "[". Set the rewind
  293 + // offset to point to the beginning of "[". This has been
  294 + // explicitly tested with whitespace surrounding the array start
  295 + // delimiter. getLastOffset points to the array end token and
297 296 // therefore can't be used here.
298 297 object.setParsedOffset(offset - 1);
299 298 set_offset = true;
300 299 } else if (old_state == st_dictionary) {
301   - // Convert list to map. Alternating elements are keys.
302   - // Attempt to recover more or less gracefully from
303   - // invalid dictionaries.
  300 + // Convert list to map. Alternating elements are keys. Attempt
  301 + // to recover more or less gracefully from invalid dictionaries.
304 302 std::set<std::string> names;
305 303 size_t n_elements = olist.size();
306 304 for (size_t i = 0; i < n_elements; ++i) {
... ... @@ -312,7 +310,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
312 310  
313 311 std::map<std::string, QPDFObjectHandle> dict;
314 312 int next_fake_key = 1;
315   - for (unsigned int i = 0; i < olist.size(); ++i) {
  313 + for (unsigned int i = 0; i < n_elements; ++i) {
316 314 QPDFObjectHandle key_obj = olist.at(i);
317 315 QPDFObjectHandle val;
318 316 if (key_obj.isIndirect() || (!key_obj.isName())) {
... ... @@ -366,12 +364,11 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
366 364 }
367 365 object = QPDFObjectHandle::newDictionary(dict);
368 366 setDescriptionFromInput(object, offset);
369   - // The `offset` points to the next of "<<". Set the
370   - // rewind offset to point to the beginning of "<<".
371   - // This has been explicitly tested with whitespace
372   - // surrounding the dictionary start delimiter.
373   - // getLastOffset points to the dictionary end token
374   - // and therefore can't be used here.
  367 + // The `offset` points to the next of "<<". Set the rewind
  368 + // offset to point to the beginning of "<<". This has been
  369 + // explicitly tested with whitespace surrounding the dictionary
  370 + // start delimiter. getLastOffset points to the dictionary end
  371 + // token and therefore can't be used here.
375 372 object.setParsedOffset(offset - 2);
376 373 set_offset = true;
377 374 }
... ... @@ -380,7 +377,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
380 377 if (state_stack.back() == st_top) {
381 378 done = true;
382 379 } else {
383   - olist_stack.back().append(object);
  380 + olist_stack.back().push_back(object);
384 381 }
385 382 contents_string_stack.pop_back();
386 383 contents_offset_stack.pop_back();
... ...