Commit db6ab9cbfabe9be32b7386ac92dbc2a3fabd83e5

Authored by m-holger
1 parent d904eab8

In QPDFParser::parse merge state and object stacks

libqpdf/QPDFParser.cc
@@ -21,22 +21,6 @@ @@ -21,22 +21,6 @@
21 21
22 #include <memory> 22 #include <memory>
23 23
24 -namespace  
25 -{  
26 - struct StackFrame  
27 - {  
28 - StackFrame(std::shared_ptr<InputSource> input) :  
29 - offset(input->tell())  
30 - {  
31 - }  
32 -  
33 - std::vector<std::shared_ptr<QPDFObject>> olist;  
34 - qpdf_offset_t offset;  
35 - std::string contents_string{""};  
36 - qpdf_offset_t contents_offset{-1};  
37 - int null_count{0};  
38 - };  
39 -} // namespace  
40 24
41 QPDFObjectHandle 25 QPDFObjectHandle
42 QPDFParser::parse(bool& empty, bool content_stream) 26 QPDFParser::parse(bool& empty, bool content_stream)
@@ -54,23 +38,15 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -54,23 +38,15 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
54 std::shared_ptr<QPDFObject> object; 38 std::shared_ptr<QPDFObject> object;
55 bool set_offset = false; 39 bool set_offset = false;
56 40
57 - std::vector<StackFrame> stack;  
58 - stack.emplace_back(input);  
59 - std::vector<parser_state_e> state_stack;  
60 - state_stack.push_back(st_top);  
61 - qpdf_offset_t offset; 41 + std::vector<StackFrame> stack{{input, st_top}};
62 bool done = false; 42 bool done = false;
63 bool b_contents = false; 43 bool b_contents = false;
64 bool is_null = false; 44 bool is_null = false;
  45 + auto* frame = &stack.back();
65 46
66 while (!done) { 47 while (!done) {
67 bool indirect_ref = false; 48 bool indirect_ref = false;
68 is_null = false; 49 is_null = false;
69 - auto& frame = stack.back();  
70 - auto& olist = frame.olist;  
71 - parser_state_e state = state_stack.back();  
72 - offset = frame.offset;  
73 -  
74 object = nullptr; 50 object = nullptr;
75 set_offset = false; 51 set_offset = false;
76 52
@@ -81,7 +57,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -81,7 +57,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
81 57
82 switch (tokenizer.getType()) { 58 switch (tokenizer.getType()) {
83 case QPDFTokenizer::tt_eof: 59 case QPDFTokenizer::tt_eof:
84 - if (state_stack.size() > 1) { 60 + if (stack.size() > 1) {
85 warn("parse error while reading object"); 61 warn("parse error while reading object");
86 } 62 }
87 if (content_stream) { 63 if (content_stream) {
@@ -111,21 +87,20 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -111,21 +87,20 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
111 break; 87 break;
112 88
113 case QPDFTokenizer::tt_array_close: 89 case QPDFTokenizer::tt_array_close:
114 - if (state == st_array) {  
115 - if ((state_stack.size() < 2) || (stack.size() < 2)) { 90 + if (frame->state == st_array) {
  91 + if (stack.size() < 2) {
116 throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with " 92 throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with "
117 "insufficient elements in stack"); 93 "insufficient elements in stack");
118 } 94 }
119 - object = QPDF_Array::create(std::move(olist), frame.null_count > 100);  
120 - setDescription(object, offset - 1); 95 + object = QPDF_Array::create(std::move(frame->olist), frame->null_count > 100);
  96 + setDescription(object, frame->offset - 1);
121 // The `offset` points to the next of "[". Set the rewind offset to point to the 97 // The `offset` points to the next of "[". Set the rewind offset to point to the
122 // beginning of "[". This has been explicitly tested with whitespace surrounding the 98 // beginning of "[". This has been explicitly tested with whitespace surrounding the
123 // array start delimiter. getLastOffset points to the array end token and therefore 99 // array start delimiter. getLastOffset points to the array end token and therefore
124 // can't be used here. 100 // can't be used here.
125 set_offset = true; 101 set_offset = true;
126 - state_stack.pop_back();  
127 - state = state_stack.back();  
128 stack.pop_back(); 102 stack.pop_back();
  103 + frame = &stack.back();
129 } else { 104 } else {
130 QTC::TC("qpdf", "QPDFParser bad array close"); 105 QTC::TC("qpdf", "QPDFParser bad array close");
131 warn("treating unexpected array close token as null"); 106 warn("treating unexpected array close token as null");
@@ -137,8 +112,8 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -137,8 +112,8 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
137 break; 112 break;
138 113
139 case QPDFTokenizer::tt_dict_close: 114 case QPDFTokenizer::tt_dict_close:
140 - if (state == st_dictionary) {  
141 - if ((state_stack.size() < 2) || (stack.size() < 2)) { 115 + if (frame->state == st_dictionary) {
  116 + if (stack.size() < 2) {
142 throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with " 117 throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with "
143 "insufficient elements in stack"); 118 "insufficient elements in stack");
144 } 119 }
@@ -146,7 +121,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -146,7 +121,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
146 // Convert list to map. Alternating elements are keys. Attempt to recover more or 121 // Convert list to map. Alternating elements are keys. Attempt to recover more or
147 // less gracefully from invalid dictionaries. 122 // less gracefully from invalid dictionaries.
148 std::set<std::string> names; 123 std::set<std::string> names;
149 - for (auto& obj: olist) { 124 + for (auto& obj: frame->olist) {
150 if (obj) { 125 if (obj) {
151 if (obj->getTypeCode() == ::ot_name) { 126 if (obj->getTypeCode() == ::ot_name) {
152 names.insert(obj->getStringValue()); 127 names.insert(obj->getStringValue());
@@ -156,7 +131,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -156,7 +131,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
156 131
157 std::map<std::string, QPDFObjectHandle> dict; 132 std::map<std::string, QPDFObjectHandle> dict;
158 int next_fake_key = 1; 133 int next_fake_key = 1;
159 - for (auto iter = olist.begin(); iter != olist.end();) { 134 + for (auto iter = frame->olist.begin(); iter != frame->olist.end();) {
160 // Calculate key. 135 // Calculate key.
161 std::string key; 136 std::string key;
162 if (*iter && (*iter)->getTypeCode() == ::ot_name) { 137 if (*iter && (*iter)->getTypeCode() == ::ot_name) {
@@ -169,49 +144,48 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -169,49 +144,48 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
169 QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); 144 QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
170 } 145 }
171 warn( 146 warn(
172 - offset, 147 + frame->offset,
173 "expected dictionary key but found non-name object; inserting key " + 148 "expected dictionary key but found non-name object; inserting key " +
174 key); 149 key);
175 } 150 }
176 if (dict.count(key) > 0) { 151 if (dict.count(key) > 0) {
177 QTC::TC("qpdf", "QPDFParser duplicate dict key"); 152 QTC::TC("qpdf", "QPDFParser duplicate dict key");
178 warn( 153 warn(
179 - offset, 154 + frame->offset,
180 "dictionary has duplicated key " + key + 155 "dictionary has duplicated key " + key +
181 "; last occurrence overrides earlier ones"); 156 "; last occurrence overrides earlier ones");
182 } 157 }
183 158
184 // Calculate value. 159 // Calculate value.
185 std::shared_ptr<QPDFObject> val; 160 std::shared_ptr<QPDFObject> val;
186 - if (iter != olist.end()) { 161 + if (iter != frame->olist.end()) {
187 val = *iter; 162 val = *iter;
188 ++iter; 163 ++iter;
189 } else { 164 } else {
190 QTC::TC("qpdf", "QPDFParser no val for last key"); 165 QTC::TC("qpdf", "QPDFParser no val for last key");
191 warn( 166 warn(
192 - offset, 167 + frame->offset,
193 "dictionary ended prematurely; using null as value for last key"); 168 "dictionary ended prematurely; using null as value for last key");
194 val = QPDF_Null::create(); 169 val = QPDF_Null::create();
195 } 170 }
196 171
197 dict[std::move(key)] = std::move(val); 172 dict[std::move(key)] = std::move(val);
198 } 173 }
199 - if (!frame.contents_string.empty() && dict.count("/Type") && 174 + if (!frame->contents_string.empty() && dict.count("/Type") &&
200 dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") && 175 dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") &&
201 dict.count("/Contents") && dict["/Contents"].isString()) { 176 dict.count("/Contents") && dict["/Contents"].isString()) {
202 - dict["/Contents"] = QPDFObjectHandle::newString(frame.contents_string);  
203 - dict["/Contents"].setParsedOffset(frame.contents_offset); 177 + dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string);
  178 + dict["/Contents"].setParsedOffset(frame->contents_offset);
204 } 179 }
205 object = QPDF_Dictionary::create(std::move(dict)); 180 object = QPDF_Dictionary::create(std::move(dict));
206 - setDescription(object, offset - 2); 181 + setDescription(object, frame->offset - 2);
207 // The `offset` points to the next of "<<". Set the rewind offset to point to the 182 // The `offset` points to the next of "<<". Set the rewind offset to point to the
208 // beginning of "<<". This has been explicitly tested with whitespace surrounding 183 // beginning of "<<". This has been explicitly tested with whitespace surrounding
209 // the dictionary start delimiter. getLastOffset points to the dictionary end token 184 // the dictionary start delimiter. getLastOffset points to the dictionary end token
210 // and therefore can't be used here. 185 // and therefore can't be used here.
211 set_offset = true; 186 set_offset = true;
212 - state_stack.pop_back();  
213 - state = state_stack.back();  
214 stack.pop_back(); 187 stack.pop_back();
  188 + frame = &stack.back();
215 } else { 189 } else {
216 QTC::TC("qpdf", "QPDFParser bad dictionary close"); 190 QTC::TC("qpdf", "QPDFParser bad dictionary close");
217 warn("unexpected dictionary close token"); 191 warn("unexpected dictionary close token");
@@ -229,11 +203,12 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -229,11 +203,12 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
229 warn("ignoring excessively deeply nested data structure"); 203 warn("ignoring excessively deeply nested data structure");
230 return {QPDF_Null::create()}; 204 return {QPDF_Null::create()};
231 } else { 205 } else {
232 - state_stack.push_back( 206 + b_contents = false;
  207 + stack.emplace_back(
  208 + input,
233 (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array 209 (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array
234 : st_dictionary); 210 : st_dictionary);
235 - b_contents = false;  
236 - stack.emplace_back(input); 211 + frame = &stack.back();
237 continue; 212 continue;
238 } 213 }
239 214
@@ -243,7 +218,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -243,7 +218,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
243 218
244 case QPDFTokenizer::tt_null: 219 case QPDFTokenizer::tt_null:
245 is_null = true; 220 is_null = true;
246 - ++frame.null_count; 221 + ++frame->null_count;
247 222
248 break; 223 break;
249 224
@@ -271,23 +246,23 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -271,23 +246,23 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
271 case QPDFTokenizer::tt_word: 246 case QPDFTokenizer::tt_word:
272 { 247 {
273 auto const& value = tokenizer.getValue(); 248 auto const& value = tokenizer.getValue();
274 - auto size = olist.size(); 249 + auto size = frame->olist.size();
275 if (content_stream) { 250 if (content_stream) {
276 object = QPDF_Operator::create(value); 251 object = QPDF_Operator::create(value);
277 } else if ( 252 } else if (
278 - value == "R" && state != st_top && size >= 2 && olist.back() &&  
279 - olist.back()->getTypeCode() == ::ot_integer &&  
280 - !olist.back()->getObjGen().isIndirect() && olist.at(size - 2) &&  
281 - olist.at(size - 2)->getTypeCode() == ::ot_integer &&  
282 - !olist.at(size - 2)->getObjGen().isIndirect()) { 253 + value == "R" && frame->state != st_top && size >= 2 && frame->olist.back() &&
  254 + frame->olist.back()->getTypeCode() == ::ot_integer &&
  255 + !frame->olist.back()->getObjGen().isIndirect() && frame->olist.at(size - 2) &&
  256 + frame->olist.at(size - 2)->getTypeCode() == ::ot_integer &&
  257 + !frame->olist.at(size - 2)->getObjGen().isIndirect()) {
283 if (context == nullptr) { 258 if (context == nullptr) {
284 QTC::TC("qpdf", "QPDFParser indirect without context"); 259 QTC::TC("qpdf", "QPDFParser indirect without context");
285 throw std::logic_error("QPDFObjectHandle::parse called without context on " 260 throw std::logic_error("QPDFObjectHandle::parse called without context on "
286 "an object with indirect references"); 261 "an object with indirect references");
287 } 262 }
288 auto ref_og = QPDFObjGen( 263 auto ref_og = QPDFObjGen(
289 - QPDFObjectHandle(olist.at(size - 2)).getIntValueAsInt(),  
290 - QPDFObjectHandle(olist.back()).getIntValueAsInt()); 264 + QPDFObjectHandle(frame->olist.at(size - 2)).getIntValueAsInt(),
  265 + QPDFObjectHandle(frame->olist.back()).getIntValueAsInt());
291 if (ref_og.isIndirect()) { 266 if (ref_og.isIndirect()) {
292 // This action has the desirable side effect of causing dangling references 267 // This action has the desirable side effect of causing dangling references
293 // (references to indirect objects that don't appear in the PDF) in any 268 // (references to indirect objects that don't appear in the PDF) in any
@@ -298,9 +273,9 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -298,9 +273,9 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
298 QTC::TC("qpdf", "QPDFParser indirect with 0 objid"); 273 QTC::TC("qpdf", "QPDFParser indirect with 0 objid");
299 is_null = true; 274 is_null = true;
300 } 275 }
301 - olist.pop_back();  
302 - olist.pop_back();  
303 - } else if ((value == "endobj") && (state == st_top)) { 276 + frame->olist.pop_back();
  277 + frame->olist.pop_back();
  278 + } else if ((value == "endobj") && (frame->state == st_top)) {
304 // We just saw endobj without having read anything. Treat this as a null and do 279 // We just saw endobj without having read anything. Treat this as a null and do
305 // not move the input source's offset. 280 // not move the input source's offset.
306 is_null = true; 281 is_null = true;
@@ -322,8 +297,8 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -322,8 +297,8 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
322 auto const& val = tokenizer.getValue(); 297 auto const& val = tokenizer.getValue();
323 if (decrypter) { 298 if (decrypter) {
324 if (b_contents) { 299 if (b_contents) {
325 - frame.contents_string = val;  
326 - frame.contents_offset = input->getLastOffset(); 300 + frame->contents_string = val;
  301 + frame->contents_offset = input->getLastOffset();
327 b_contents = false; 302 b_contents = false;
328 } 303 }
329 std::string s{val}; 304 std::string s{val};
@@ -348,7 +323,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -348,7 +323,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
348 throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object"); 323 throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object");
349 } 324 }
350 325
351 - switch (state) { 326 + switch (frame->state) {
352 case st_dictionary: 327 case st_dictionary:
353 case st_array: 328 case st_array:
354 if (is_null) { 329 if (is_null) {
@@ -358,7 +333,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -358,7 +333,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
358 setDescription(object, input->getLastOffset()); 333 setDescription(object, input->getLastOffset());
359 } 334 }
360 set_offset = true; 335 set_offset = true;
361 - stack.back().olist.push_back(object); 336 + frame->olist.push_back(object);
362 break; 337 break;
363 338
364 case st_top: 339 case st_top:
@@ -371,7 +346,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -371,7 +346,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
371 object = QPDF_Null::create(); 346 object = QPDF_Null::create();
372 } 347 }
373 if (!set_offset) { 348 if (!set_offset) {
374 - setDescription(object, offset); 349 + setDescription(object, frame->offset);
375 } 350 }
376 return object; 351 return object;
377 } 352 }
libqpdf/qpdf/QPDFParser.hh
@@ -31,8 +31,25 @@ class QPDFParser @@ -31,8 +31,25 @@ class QPDFParser
31 QPDFObjectHandle parse(bool& empty, bool content_stream); 31 QPDFObjectHandle parse(bool& empty, bool content_stream);
32 32
33 private: 33 private:
  34 + struct StackFrame;
34 enum parser_state_e { st_top, st_dictionary, st_array }; 35 enum parser_state_e { st_top, st_dictionary, st_array };
35 36
  37 + struct StackFrame
  38 + {
  39 + StackFrame(std::shared_ptr<InputSource> const& input, parser_state_e state) :
  40 + state(state),
  41 + offset(input->tell())
  42 + {
  43 + }
  44 +
  45 + std::vector<std::shared_ptr<QPDFObject>> olist;
  46 + parser_state_e state;
  47 + qpdf_offset_t offset;
  48 + std::string contents_string{""};
  49 + qpdf_offset_t contents_offset{-1};
  50 + int null_count{0};
  51 + };
  52 +
36 bool tooManyBadTokens(); 53 bool tooManyBadTokens();
37 void warn(qpdf_offset_t offset, std::string const& msg) const; 54 void warn(qpdf_offset_t offset, std::string const& msg) const;
38 void warn(std::string const& msg) const; 55 void warn(std::string const& msg) const;