Commit 8fdc3f09648ad2c79455363255b9f8fdac9e65f3

Authored by m-holger
1 parent 18c1ffe0

Optimize QPDFParser for non-sparse QPDF_Arrays

Stop using nullptr to represent null objects.
Count null array elements and trigger creation of sparse arrays if null count is
greater than 100.
libqpdf/QPDFParser.cc
... ... @@ -27,16 +27,15 @@ namespace
27 27 struct StackFrame
28 28 {
29 29 StackFrame(std::shared_ptr<InputSource> input) :
30   - offset(input->tell()),
31   - contents_string(""),
32   - contents_offset(-1)
  30 + offset(input->tell())
33 31 {
34 32 }
35 33  
36 34 std::vector<std::shared_ptr<QPDFObject>> olist;
37 35 qpdf_offset_t offset;
38   - std::string contents_string;
39   - qpdf_offset_t contents_offset;
  36 + std::string contents_string{""};
  37 + qpdf_offset_t contents_offset{-1};
  38 + int null_count{0};
40 39 };
41 40 } // namespace
42 41  
... ... @@ -50,6 +49,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
50 49 // this, it will cause a logic error to be thrown from
51 50 // QPDF::inParse().
52 51  
  52 + const static std::shared_ptr<QPDFObject> null_oh = QPDF_Null::create();
53 53 QPDF::ParseGuard pg(context);
54 54  
55 55 empty = false;
... ... @@ -67,7 +67,6 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
67 67 int good_count = 0;
68 68 bool b_contents = false;
69 69 bool is_null = false;
70   - auto null_oh = QPDF_Null::create();
71 70  
72 71 while (!done) {
73 72 bool bad = false;
... ... @@ -156,6 +155,8 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
156 155  
157 156 case QPDFTokenizer::tt_null:
158 157 is_null = true;
  158 + ++frame.null_count;
  159 +
159 160 break;
160 161  
161 162 case QPDFTokenizer::tt_integer:
... ... @@ -301,9 +302,11 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
301 302  
302 303 case st_dictionary:
303 304 case st_array:
304   - if (!indirect_ref && !is_null) {
305   - // No need to set description for direct nulls - they will
306   - // become implicit.
  305 + if (is_null) {
  306 + object = null_oh;
  307 + // No need to set description for direct nulls - they probably
  308 + // will become implicit.
  309 + } else if (!indirect_ref) {
307 310 setDescription(object, input->getLastOffset());
308 311 }
309 312 set_offset = true;
... ... @@ -326,7 +329,8 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
326 329 parser_state_e old_state = state_stack.back();
327 330 state_stack.pop_back();
328 331 if (old_state == st_array) {
329   - object = QPDF_Array::create(std::move(olist));
  332 + object = QPDF_Array::create(
  333 + std::move(olist), frame.null_count > 100);
330 334 setDescription(object, offset - 1);
331 335 // The `offset` points to the next of "[". Set the rewind
332 336 // offset to point to the beginning of "[". This has been
... ... @@ -381,7 +385,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
381 385 // Calculate value.
382 386 std::shared_ptr<QPDFObject> val;
383 387 if (iter != olist.end()) {
384   - val = *iter ? *iter : QPDF_Null::create();
  388 + val = *iter;
385 389 ++iter;
386 390 } else {
387 391 QTC::TC("qpdf", "QPDFParser no val for last key");
... ...
libqpdf/QPDF_Array.cc
... ... @@ -11,15 +11,19 @@ QPDF_Array::QPDF_Array(std::vector&lt;QPDFObjectHandle&gt; const&amp; v) :
11 11 setFromVector(v);
12 12 }
13 13  
14   -QPDF_Array::QPDF_Array(std::vector<std::shared_ptr<QPDFObject>>&& v) :
15   - QPDFValue(::ot_array, "array")
  14 +QPDF_Array::QPDF_Array(
  15 + std::vector<std::shared_ptr<QPDFObject>>&& v, bool sparse) :
  16 + QPDFValue(::ot_array, "array"),
  17 + sparse(sparse)
16 18 {
17 19 setFromVector(std::move(v));
18 20 }
19 21  
20 22 QPDF_Array::QPDF_Array(SparseOHArray const& items) :
21 23 QPDFValue(::ot_array, "array"),
  24 + sparse(true),
22 25 sp_elements(items)
  26 +
23 27 {
24 28 }
25 29  
... ... @@ -37,9 +41,10 @@ QPDF_Array::create(std::vector&lt;QPDFObjectHandle&gt; const&amp; items)
37 41 }
38 42  
39 43 std::shared_ptr<QPDFObject>
40   -QPDF_Array::create(std::vector<std::shared_ptr<QPDFObject>>&& items)
  44 +QPDF_Array::create(
  45 + std::vector<std::shared_ptr<QPDFObject>>&& items, bool sparse)
41 46 {
42   - return do_create(new QPDF_Array(std::move(items)));
  47 + return do_create(new QPDF_Array(std::move(items), sparse));
43 48 }
44 49  
45 50 std::shared_ptr<QPDFObject>
... ... @@ -196,8 +201,9 @@ QPDF_Array::setFromVector(std::vector&lt;std::shared_ptr&lt;QPDFObject&gt;&gt;&amp;&amp; v)
196 201 if (sparse) {
197 202 sp_elements = SparseOHArray();
198 203 for (auto&& item: v) {
199   - if (item) {
200   - sp_elements.append(item);
  204 + if (item->getTypeCode() != ::ot_null ||
  205 + item->getObjGen().isIndirect()) {
  206 + sp_elements.append(std::move(item));
201 207 } else {
202 208 ++sp_elements.n_elements;
203 209 }
... ...
libqpdf/qpdf/QPDF_Array.hh
... ... @@ -15,7 +15,7 @@ class QPDF_Array: public QPDFValue
15 15 static std::shared_ptr<QPDFObject>
16 16 create(std::vector<QPDFObjectHandle> const& items);
17 17 static std::shared_ptr<QPDFObject>
18   - create(std::vector<std::shared_ptr<QPDFObject>>&& items);
  18 + create(std::vector<std::shared_ptr<QPDFObject>>&& items, bool sparse);
19 19 static std::shared_ptr<QPDFObject> create(SparseOHArray const& items);
20 20 static std::shared_ptr<QPDFObject> create(OHArray const& items);
21 21 virtual std::shared_ptr<QPDFObject> copy(bool shallow = false);
... ... @@ -36,7 +36,7 @@ class QPDF_Array: public QPDFValue
36 36  
37 37 private:
38 38 QPDF_Array(std::vector<QPDFObjectHandle> const& items);
39   - QPDF_Array(std::vector<std::shared_ptr<QPDFObject>>&& items);
  39 + QPDF_Array(std::vector<std::shared_ptr<QPDFObject>>&& items, bool sparse);
40 40 QPDF_Array(SparseOHArray const& items);
41 41 QPDF_Array(OHArray const& items);
42 42 bool sparse{false};
... ...