Commit db6ab9cbfabe9be32b7386ac92dbc2a3fabd83e5

Authored by m-holger
1 parent d904eab8

In QPDFParser::parse merge state and object stacks

libqpdf/QPDFParser.cc
... ... @@ -21,22 +21,6 @@
21 21  
22 22 #include <memory>
23 23  
24   -namespace
25   -{
26   - struct StackFrame
27   - {
28   - StackFrame(std::shared_ptr<InputSource> input) :
29   - offset(input->tell())
30   - {
31   - }
32   -
33   - std::vector<std::shared_ptr<QPDFObject>> olist;
34   - qpdf_offset_t offset;
35   - std::string contents_string{""};
36   - qpdf_offset_t contents_offset{-1};
37   - int null_count{0};
38   - };
39   -} // namespace
40 24  
41 25 QPDFObjectHandle
42 26 QPDFParser::parse(bool& empty, bool content_stream)
... ... @@ -54,23 +38,15 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
54 38 std::shared_ptr<QPDFObject> object;
55 39 bool set_offset = false;
56 40  
57   - std::vector<StackFrame> stack;
58   - stack.emplace_back(input);
59   - std::vector<parser_state_e> state_stack;
60   - state_stack.push_back(st_top);
61   - qpdf_offset_t offset;
  41 + std::vector<StackFrame> stack{{input, st_top}};
62 42 bool done = false;
63 43 bool b_contents = false;
64 44 bool is_null = false;
  45 + auto* frame = &stack.back();
65 46  
66 47 while (!done) {
67 48 bool indirect_ref = false;
68 49 is_null = false;
69   - auto& frame = stack.back();
70   - auto& olist = frame.olist;
71   - parser_state_e state = state_stack.back();
72   - offset = frame.offset;
73   -
74 50 object = nullptr;
75 51 set_offset = false;
76 52  
... ... @@ -81,7 +57,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
81 57  
82 58 switch (tokenizer.getType()) {
83 59 case QPDFTokenizer::tt_eof:
84   - if (state_stack.size() > 1) {
  60 + if (stack.size() > 1) {
85 61 warn("parse error while reading object");
86 62 }
87 63 if (content_stream) {
... ... @@ -111,21 +87,20 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
111 87 break;
112 88  
113 89 case QPDFTokenizer::tt_array_close:
114   - if (state == st_array) {
115   - if ((state_stack.size() < 2) || (stack.size() < 2)) {
  90 + if (frame->state == st_array) {
  91 + if (stack.size() < 2) {
116 92 throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with "
117 93 "insufficient elements in stack");
118 94 }
119   - object = QPDF_Array::create(std::move(olist), frame.null_count > 100);
120   - setDescription(object, offset - 1);
  95 + object = QPDF_Array::create(std::move(frame->olist), frame->null_count > 100);
  96 + setDescription(object, frame->offset - 1);
121 97 // The `offset` points to the next of "[". Set the rewind offset to point to the
122 98 // beginning of "[". This has been explicitly tested with whitespace surrounding the
123 99 // array start delimiter. getLastOffset points to the array end token and therefore
124 100 // can't be used here.
125 101 set_offset = true;
126   - state_stack.pop_back();
127   - state = state_stack.back();
128 102 stack.pop_back();
  103 + frame = &stack.back();
129 104 } else {
130 105 QTC::TC("qpdf", "QPDFParser bad array close");
131 106 warn("treating unexpected array close token as null");
... ... @@ -137,8 +112,8 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
137 112 break;
138 113  
139 114 case QPDFTokenizer::tt_dict_close:
140   - if (state == st_dictionary) {
141   - if ((state_stack.size() < 2) || (stack.size() < 2)) {
  115 + if (frame->state == st_dictionary) {
  116 + if (stack.size() < 2) {
142 117 throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with "
143 118 "insufficient elements in stack");
144 119 }
... ... @@ -146,7 +121,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
146 121 // Convert list to map. Alternating elements are keys. Attempt to recover more or
147 122 // less gracefully from invalid dictionaries.
148 123 std::set<std::string> names;
149   - for (auto& obj: olist) {
  124 + for (auto& obj: frame->olist) {
150 125 if (obj) {
151 126 if (obj->getTypeCode() == ::ot_name) {
152 127 names.insert(obj->getStringValue());
... ... @@ -156,7 +131,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
156 131  
157 132 std::map<std::string, QPDFObjectHandle> dict;
158 133 int next_fake_key = 1;
159   - for (auto iter = olist.begin(); iter != olist.end();) {
  134 + for (auto iter = frame->olist.begin(); iter != frame->olist.end();) {
160 135 // Calculate key.
161 136 std::string key;
162 137 if (*iter && (*iter)->getTypeCode() == ::ot_name) {
... ... @@ -169,49 +144,48 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
169 144 QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
170 145 }
171 146 warn(
172   - offset,
  147 + frame->offset,
173 148 "expected dictionary key but found non-name object; inserting key " +
174 149 key);
175 150 }
176 151 if (dict.count(key) > 0) {
177 152 QTC::TC("qpdf", "QPDFParser duplicate dict key");
178 153 warn(
179   - offset,
  154 + frame->offset,
180 155 "dictionary has duplicated key " + key +
181 156 "; last occurrence overrides earlier ones");
182 157 }
183 158  
184 159 // Calculate value.
185 160 std::shared_ptr<QPDFObject> val;
186   - if (iter != olist.end()) {
  161 + if (iter != frame->olist.end()) {
187 162 val = *iter;
188 163 ++iter;
189 164 } else {
190 165 QTC::TC("qpdf", "QPDFParser no val for last key");
191 166 warn(
192   - offset,
  167 + frame->offset,
193 168 "dictionary ended prematurely; using null as value for last key");
194 169 val = QPDF_Null::create();
195 170 }
196 171  
197 172 dict[std::move(key)] = std::move(val);
198 173 }
199   - if (!frame.contents_string.empty() && dict.count("/Type") &&
  174 + if (!frame->contents_string.empty() && dict.count("/Type") &&
200 175 dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") &&
201 176 dict.count("/Contents") && dict["/Contents"].isString()) {
202   - dict["/Contents"] = QPDFObjectHandle::newString(frame.contents_string);
203   - dict["/Contents"].setParsedOffset(frame.contents_offset);
  177 + dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string);
  178 + dict["/Contents"].setParsedOffset(frame->contents_offset);
204 179 }
205 180 object = QPDF_Dictionary::create(std::move(dict));
206   - setDescription(object, offset - 2);
  181 + setDescription(object, frame->offset - 2);
207 182 // The `offset` points to the next of "<<". Set the rewind offset to point to the
208 183 // beginning of "<<". This has been explicitly tested with whitespace surrounding
209 184 // the dictionary start delimiter. getLastOffset points to the dictionary end token
210 185 // and therefore can't be used here.
211 186 set_offset = true;
212   - state_stack.pop_back();
213   - state = state_stack.back();
214 187 stack.pop_back();
  188 + frame = &stack.back();
215 189 } else {
216 190 QTC::TC("qpdf", "QPDFParser bad dictionary close");
217 191 warn("unexpected dictionary close token");
... ... @@ -229,11 +203,12 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
229 203 warn("ignoring excessively deeply nested data structure");
230 204 return {QPDF_Null::create()};
231 205 } else {
232   - state_stack.push_back(
  206 + b_contents = false;
  207 + stack.emplace_back(
  208 + input,
233 209 (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array
234 210 : st_dictionary);
235   - b_contents = false;
236   - stack.emplace_back(input);
  211 + frame = &stack.back();
237 212 continue;
238 213 }
239 214  
... ... @@ -243,7 +218,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
243 218  
244 219 case QPDFTokenizer::tt_null:
245 220 is_null = true;
246   - ++frame.null_count;
  221 + ++frame->null_count;
247 222  
248 223 break;
249 224  
... ... @@ -271,23 +246,23 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
271 246 case QPDFTokenizer::tt_word:
272 247 {
273 248 auto const& value = tokenizer.getValue();
274   - auto size = olist.size();
  249 + auto size = frame->olist.size();
275 250 if (content_stream) {
276 251 object = QPDF_Operator::create(value);
277 252 } else if (
278   - value == "R" && state != st_top && size >= 2 && olist.back() &&
279   - olist.back()->getTypeCode() == ::ot_integer &&
280   - !olist.back()->getObjGen().isIndirect() && olist.at(size - 2) &&
281   - olist.at(size - 2)->getTypeCode() == ::ot_integer &&
282   - !olist.at(size - 2)->getObjGen().isIndirect()) {
  253 + value == "R" && frame->state != st_top && size >= 2 && frame->olist.back() &&
  254 + frame->olist.back()->getTypeCode() == ::ot_integer &&
  255 + !frame->olist.back()->getObjGen().isIndirect() && frame->olist.at(size - 2) &&
  256 + frame->olist.at(size - 2)->getTypeCode() == ::ot_integer &&
  257 + !frame->olist.at(size - 2)->getObjGen().isIndirect()) {
283 258 if (context == nullptr) {
284 259 QTC::TC("qpdf", "QPDFParser indirect without context");
285 260 throw std::logic_error("QPDFObjectHandle::parse called without context on "
286 261 "an object with indirect references");
287 262 }
288 263 auto ref_og = QPDFObjGen(
289   - QPDFObjectHandle(olist.at(size - 2)).getIntValueAsInt(),
290   - QPDFObjectHandle(olist.back()).getIntValueAsInt());
  264 + QPDFObjectHandle(frame->olist.at(size - 2)).getIntValueAsInt(),
  265 + QPDFObjectHandle(frame->olist.back()).getIntValueAsInt());
291 266 if (ref_og.isIndirect()) {
292 267 // This action has the desirable side effect of causing dangling references
293 268 // (references to indirect objects that don't appear in the PDF) in any
... ... @@ -298,9 +273,9 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
298 273 QTC::TC("qpdf", "QPDFParser indirect with 0 objid");
299 274 is_null = true;
300 275 }
301   - olist.pop_back();
302   - olist.pop_back();
303   - } else if ((value == "endobj") && (state == st_top)) {
  276 + frame->olist.pop_back();
  277 + frame->olist.pop_back();
  278 + } else if ((value == "endobj") && (frame->state == st_top)) {
304 279 // We just saw endobj without having read anything. Treat this as a null and do
305 280 // not move the input source's offset.
306 281 is_null = true;
... ... @@ -322,8 +297,8 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
322 297 auto const& val = tokenizer.getValue();
323 298 if (decrypter) {
324 299 if (b_contents) {
325   - frame.contents_string = val;
326   - frame.contents_offset = input->getLastOffset();
  300 + frame->contents_string = val;
  301 + frame->contents_offset = input->getLastOffset();
327 302 b_contents = false;
328 303 }
329 304 std::string s{val};
... ... @@ -348,7 +323,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
348 323 throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object");
349 324 }
350 325  
351   - switch (state) {
  326 + switch (frame->state) {
352 327 case st_dictionary:
353 328 case st_array:
354 329 if (is_null) {
... ... @@ -358,7 +333,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
358 333 setDescription(object, input->getLastOffset());
359 334 }
360 335 set_offset = true;
361   - stack.back().olist.push_back(object);
  336 + frame->olist.push_back(object);
362 337 break;
363 338  
364 339 case st_top:
... ... @@ -371,7 +346,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
371 346 object = QPDF_Null::create();
372 347 }
373 348 if (!set_offset) {
374   - setDescription(object, offset);
  349 + setDescription(object, frame->offset);
375 350 }
376 351 return object;
377 352 }
... ...
libqpdf/qpdf/QPDFParser.hh
... ... @@ -31,8 +31,25 @@ class QPDFParser
31 31 QPDFObjectHandle parse(bool& empty, bool content_stream);
32 32  
33 33 private:
  34 + struct StackFrame;
34 35 enum parser_state_e { st_top, st_dictionary, st_array };
35 36  
  37 + struct StackFrame
  38 + {
  39 + StackFrame(std::shared_ptr<InputSource> const& input, parser_state_e state) :
  40 + state(state),
  41 + offset(input->tell())
  42 + {
  43 + }
  44 +
  45 + std::vector<std::shared_ptr<QPDFObject>> olist;
  46 + parser_state_e state;
  47 + qpdf_offset_t offset;
  48 + std::string contents_string{""};
  49 + qpdf_offset_t contents_offset{-1};
  50 + int null_count{0};
  51 + };
  52 +
36 53 bool tooManyBadTokens();
37 54 void warn(qpdf_offset_t offset, std::string const& msg) const;
38 55 void warn(std::string const& msg) const;
... ...