Commit 8fdc3f09648ad2c79455363255b9f8fdac9e65f3

Authored by m-holger
1 parent 18c1ffe0

Optimize QPDFParser for non-sparse QPDF_Arrays

Stop using nullptr to represent null objects.
Count null array elements and trigger creation of sparse arrays if null count is
greater than 100.
libqpdf/QPDFParser.cc
@@ -27,16 +27,15 @@ namespace @@ -27,16 +27,15 @@ namespace
27 struct StackFrame 27 struct StackFrame
28 { 28 {
29 StackFrame(std::shared_ptr<InputSource> input) : 29 StackFrame(std::shared_ptr<InputSource> input) :
30 - offset(input->tell()),  
31 - contents_string(""),  
32 - contents_offset(-1) 30 + offset(input->tell())
33 { 31 {
34 } 32 }
35 33
36 std::vector<std::shared_ptr<QPDFObject>> olist; 34 std::vector<std::shared_ptr<QPDFObject>> olist;
37 qpdf_offset_t offset; 35 qpdf_offset_t offset;
38 - std::string contents_string;  
39 - qpdf_offset_t contents_offset; 36 + std::string contents_string{""};
  37 + qpdf_offset_t contents_offset{-1};
  38 + int null_count{0};
40 }; 39 };
41 } // namespace 40 } // namespace
42 41
@@ -50,6 +49,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -50,6 +49,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
50 // this, it will cause a logic error to be thrown from 49 // this, it will cause a logic error to be thrown from
51 // QPDF::inParse(). 50 // QPDF::inParse().
52 51
  52 + const static std::shared_ptr<QPDFObject> null_oh = QPDF_Null::create();
53 QPDF::ParseGuard pg(context); 53 QPDF::ParseGuard pg(context);
54 54
55 empty = false; 55 empty = false;
@@ -67,7 +67,6 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -67,7 +67,6 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
67 int good_count = 0; 67 int good_count = 0;
68 bool b_contents = false; 68 bool b_contents = false;
69 bool is_null = false; 69 bool is_null = false;
70 - auto null_oh = QPDF_Null::create();  
71 70
72 while (!done) { 71 while (!done) {
73 bool bad = false; 72 bool bad = false;
@@ -156,6 +155,8 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -156,6 +155,8 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
156 155
157 case QPDFTokenizer::tt_null: 156 case QPDFTokenizer::tt_null:
158 is_null = true; 157 is_null = true;
  158 + ++frame.null_count;
  159 +
159 break; 160 break;
160 161
161 case QPDFTokenizer::tt_integer: 162 case QPDFTokenizer::tt_integer:
@@ -301,9 +302,11 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -301,9 +302,11 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
301 302
302 case st_dictionary: 303 case st_dictionary:
303 case st_array: 304 case st_array:
304 - if (!indirect_ref && !is_null) {  
305 - // No need to set description for direct nulls - they will  
306 - // become implicit. 305 + if (is_null) {
  306 + object = null_oh;
  307 + // No need to set description for direct nulls - they probably
  308 + // will become implicit.
  309 + } else if (!indirect_ref) {
307 setDescription(object, input->getLastOffset()); 310 setDescription(object, input->getLastOffset());
308 } 311 }
309 set_offset = true; 312 set_offset = true;
@@ -326,7 +329,8 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -326,7 +329,8 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
326 parser_state_e old_state = state_stack.back(); 329 parser_state_e old_state = state_stack.back();
327 state_stack.pop_back(); 330 state_stack.pop_back();
328 if (old_state == st_array) { 331 if (old_state == st_array) {
329 - object = QPDF_Array::create(std::move(olist)); 332 + object = QPDF_Array::create(
  333 + std::move(olist), frame.null_count > 100);
330 setDescription(object, offset - 1); 334 setDescription(object, offset - 1);
331 // The `offset` points to the next of "[". Set the rewind 335 // The `offset` points to the next of "[". Set the rewind
332 // offset to point to the beginning of "[". This has been 336 // offset to point to the beginning of "[". This has been
@@ -381,7 +385,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -381,7 +385,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
381 // Calculate value. 385 // Calculate value.
382 std::shared_ptr<QPDFObject> val; 386 std::shared_ptr<QPDFObject> val;
383 if (iter != olist.end()) { 387 if (iter != olist.end()) {
384 - val = *iter ? *iter : QPDF_Null::create(); 388 + val = *iter;
385 ++iter; 389 ++iter;
386 } else { 390 } else {
387 QTC::TC("qpdf", "QPDFParser no val for last key"); 391 QTC::TC("qpdf", "QPDFParser no val for last key");
libqpdf/QPDF_Array.cc
@@ -11,15 +11,19 @@ QPDF_Array::QPDF_Array(std::vector&lt;QPDFObjectHandle&gt; const&amp; v) : @@ -11,15 +11,19 @@ QPDF_Array::QPDF_Array(std::vector&lt;QPDFObjectHandle&gt; const&amp; v) :
11 setFromVector(v); 11 setFromVector(v);
12 } 12 }
13 13
14 -QPDF_Array::QPDF_Array(std::vector<std::shared_ptr<QPDFObject>>&& v) :  
15 - QPDFValue(::ot_array, "array") 14 +QPDF_Array::QPDF_Array(
  15 + std::vector<std::shared_ptr<QPDFObject>>&& v, bool sparse) :
  16 + QPDFValue(::ot_array, "array"),
  17 + sparse(sparse)
16 { 18 {
17 setFromVector(std::move(v)); 19 setFromVector(std::move(v));
18 } 20 }
19 21
20 QPDF_Array::QPDF_Array(SparseOHArray const& items) : 22 QPDF_Array::QPDF_Array(SparseOHArray const& items) :
21 QPDFValue(::ot_array, "array"), 23 QPDFValue(::ot_array, "array"),
  24 + sparse(true),
22 sp_elements(items) 25 sp_elements(items)
  26 +
23 { 27 {
24 } 28 }
25 29
@@ -37,9 +41,10 @@ QPDF_Array::create(std::vector&lt;QPDFObjectHandle&gt; const&amp; items) @@ -37,9 +41,10 @@ QPDF_Array::create(std::vector&lt;QPDFObjectHandle&gt; const&amp; items)
37 } 41 }
38 42
39 std::shared_ptr<QPDFObject> 43 std::shared_ptr<QPDFObject>
40 -QPDF_Array::create(std::vector<std::shared_ptr<QPDFObject>>&& items) 44 +QPDF_Array::create(
  45 + std::vector<std::shared_ptr<QPDFObject>>&& items, bool sparse)
41 { 46 {
42 - return do_create(new QPDF_Array(std::move(items))); 47 + return do_create(new QPDF_Array(std::move(items), sparse));
43 } 48 }
44 49
45 std::shared_ptr<QPDFObject> 50 std::shared_ptr<QPDFObject>
@@ -196,8 +201,9 @@ QPDF_Array::setFromVector(std::vector&lt;std::shared_ptr&lt;QPDFObject&gt;&gt;&amp;&amp; v) @@ -196,8 +201,9 @@ QPDF_Array::setFromVector(std::vector&lt;std::shared_ptr&lt;QPDFObject&gt;&gt;&amp;&amp; v)
196 if (sparse) { 201 if (sparse) {
197 sp_elements = SparseOHArray(); 202 sp_elements = SparseOHArray();
198 for (auto&& item: v) { 203 for (auto&& item: v) {
199 - if (item) {  
200 - sp_elements.append(item); 204 + if (item->getTypeCode() != ::ot_null ||
  205 + item->getObjGen().isIndirect()) {
  206 + sp_elements.append(std::move(item));
201 } else { 207 } else {
202 ++sp_elements.n_elements; 208 ++sp_elements.n_elements;
203 } 209 }
libqpdf/qpdf/QPDF_Array.hh
@@ -15,7 +15,7 @@ class QPDF_Array: public QPDFValue @@ -15,7 +15,7 @@ class QPDF_Array: public QPDFValue
15 static std::shared_ptr<QPDFObject> 15 static std::shared_ptr<QPDFObject>
16 create(std::vector<QPDFObjectHandle> const& items); 16 create(std::vector<QPDFObjectHandle> const& items);
17 static std::shared_ptr<QPDFObject> 17 static std::shared_ptr<QPDFObject>
18 - create(std::vector<std::shared_ptr<QPDFObject>>&& items); 18 + create(std::vector<std::shared_ptr<QPDFObject>>&& items, bool sparse);
19 static std::shared_ptr<QPDFObject> create(SparseOHArray const& items); 19 static std::shared_ptr<QPDFObject> create(SparseOHArray const& items);
20 static std::shared_ptr<QPDFObject> create(OHArray const& items); 20 static std::shared_ptr<QPDFObject> create(OHArray const& items);
21 virtual std::shared_ptr<QPDFObject> copy(bool shallow = false); 21 virtual std::shared_ptr<QPDFObject> copy(bool shallow = false);
@@ -36,7 +36,7 @@ class QPDF_Array: public QPDFValue @@ -36,7 +36,7 @@ class QPDF_Array: public QPDFValue
36 36
37 private: 37 private:
38 QPDF_Array(std::vector<QPDFObjectHandle> const& items); 38 QPDF_Array(std::vector<QPDFObjectHandle> const& items);
39 - QPDF_Array(std::vector<std::shared_ptr<QPDFObject>>&& items); 39 + QPDF_Array(std::vector<std::shared_ptr<QPDFObject>>&& items, bool sparse);
40 QPDF_Array(SparseOHArray const& items); 40 QPDF_Array(SparseOHArray const& items);
41 QPDF_Array(OHArray const& items); 41 QPDF_Array(OHArray const& items);
42 bool sparse{false}; 42 bool sparse{false};