Commit 90829228b814c6fe3ea3192da34db90dc1e36843

Authored by m-holger
1 parent 12837f14

In QPDFParser::parse refactor handling of dict_close tokens

libqpdf/QPDFParser.cc
... ... @@ -138,7 +138,80 @@ QPDFParser::parse(bool& empty, bool content_stream)
138 138  
139 139 case QPDFTokenizer::tt_dict_close:
140 140 if (state == st_dictionary) {
141   - state = st_stop;
  141 + if ((state_stack.size() < 2) || (stack.size() < 2)) {
  142 + throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with "
  143 + "insufficient elements in stack");
  144 + }
  145 +
  146 + // Convert list to map. Alternating elements are keys. Attempt to recover more or
  147 + // less gracefully from invalid dictionaries.
  148 + std::set<std::string> names;
  149 + for (auto& obj: olist) {
  150 + if (obj) {
  151 + if (obj->getTypeCode() == ::ot_name) {
  152 + names.insert(obj->getStringValue());
  153 + }
  154 + }
  155 + }
  156 +
  157 + std::map<std::string, QPDFObjectHandle> dict;
  158 + int next_fake_key = 1;
  159 + for (auto iter = olist.begin(); iter != olist.end();) {
  160 + // Calculate key.
  161 + std::string key;
  162 + if (*iter && (*iter)->getTypeCode() == ::ot_name) {
  163 + key = (*iter)->getStringValue();
  164 + ++iter;
  165 + } else {
  166 + for (bool found_fake = false; !found_fake;) {
  167 + key = "/QPDFFake" + std::to_string(next_fake_key++);
  168 + found_fake = (names.count(key) == 0);
  169 + QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
  170 + }
  171 + warn(
  172 + offset,
  173 + "expected dictionary key but found non-name object; inserting key " +
  174 + key);
  175 + }
  176 + if (dict.count(key) > 0) {
  177 + QTC::TC("qpdf", "QPDFParser duplicate dict key");
  178 + warn(
  179 + offset,
  180 + "dictionary has duplicated key " + key +
  181 + "; last occurrence overrides earlier ones");
  182 + }
  183 +
  184 + // Calculate value.
  185 + std::shared_ptr<QPDFObject> val;
  186 + if (iter != olist.end()) {
  187 + val = *iter;
  188 + ++iter;
  189 + } else {
  190 + QTC::TC("qpdf", "QPDFParser no val for last key");
  191 + warn(
  192 + offset,
  193 + "dictionary ended prematurely; using null as value for last key");
  194 + val = QPDF_Null::create();
  195 + }
  196 +
  197 + dict[std::move(key)] = std::move(val);
  198 + }
  199 + if (!frame.contents_string.empty() && dict.count("/Type") &&
  200 + dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") &&
  201 + dict.count("/Contents") && dict["/Contents"].isString()) {
  202 + dict["/Contents"] = QPDFObjectHandle::newString(frame.contents_string);
  203 + dict["/Contents"].setParsedOffset(frame.contents_offset);
  204 + }
  205 + object = QPDF_Dictionary::create(std::move(dict));
  206 + setDescription(object, offset - 2);
  207 + // The `offset` points to the next of "<<". Set the rewind offset to point to the
  208 + // beginning of "<<". This has been explicitly tested with whitespace surrounding
  209 + // the dictionary start delimiter. getLastOffset points to the dictionary end token
  210 + // and therefore can't be used here.
  211 + set_offset = true;
  212 + state_stack.pop_back();
  213 + state = state_stack.back();
  214 + stack.pop_back();
142 215 } else {
143 216 QTC::TC("qpdf", "QPDFParser bad dictionary close");
144 217 warn("unexpected dictionary close token");
... ... @@ -276,7 +349,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
276 349 break;
277 350 }
278 351  
279   - if (object == nullptr && !is_null && (!(state == st_start || state == st_stop))) {
  352 + if (object == nullptr && !is_null && state != st_start) {
280 353 throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object");
281 354 }
282 355  
... ... @@ -299,88 +372,6 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
299 372  
300 373 case st_start:
301 374 break;
302   -
303   - case st_stop:
304   - if ((state_stack.size() < 2) || (stack.size() < 2)) {
305   - throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with "
306   - "insufficient elements in stack");
307   - }
308   - parser_state_e old_state = state_stack.back();
309   - state_stack.pop_back();
310   - if (old_state == st_dictionary) {
311   - // Convert list to map. Alternating elements are keys. Attempt to recover more or
312   - // less gracefully from invalid dictionaries.
313   - std::set<std::string> names;
314   - for (auto& obj: olist) {
315   - if (obj) {
316   - if (obj->getTypeCode() == ::ot_name) {
317   - names.insert(obj->getStringValue());
318   - }
319   - }
320   - }
321   -
322   - std::map<std::string, QPDFObjectHandle> dict;
323   - int next_fake_key = 1;
324   - for (auto iter = olist.begin(); iter != olist.end();) {
325   - // Calculate key.
326   - std::string key;
327   - if (*iter && (*iter)->getTypeCode() == ::ot_name) {
328   - key = (*iter)->getStringValue();
329   - ++iter;
330   - } else {
331   - for (bool found_fake = false; !found_fake;) {
332   - key = "/QPDFFake" + std::to_string(next_fake_key++);
333   - found_fake = (names.count(key) == 0);
334   - QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
335   - }
336   - warn(
337   - offset,
338   - "expected dictionary key but found non-name object; inserting key " +
339   - key);
340   - }
341   - if (dict.count(key) > 0) {
342   - QTC::TC("qpdf", "QPDFParser duplicate dict key");
343   - warn(
344   - offset,
345   - "dictionary has duplicated key " + key +
346   - "; last occurrence overrides earlier ones");
347   - }
348   -
349   - // Calculate value.
350   - std::shared_ptr<QPDFObject> val;
351   - if (iter != olist.end()) {
352   - val = *iter;
353   - ++iter;
354   - } else {
355   - QTC::TC("qpdf", "QPDFParser no val for last key");
356   - warn(
357   - offset,
358   - "dictionary ended prematurely; using null as value for last key");
359   - val = QPDF_Null::create();
360   - }
361   -
362   - dict[std::move(key)] = std::move(val);
363   - }
364   - if (!frame.contents_string.empty() && dict.count("/Type") &&
365   - dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") &&
366   - dict.count("/Contents") && dict["/Contents"].isString()) {
367   - dict["/Contents"] = QPDFObjectHandle::newString(frame.contents_string);
368   - dict["/Contents"].setParsedOffset(frame.contents_offset);
369   - }
370   - object = QPDF_Dictionary::create(std::move(dict));
371   - setDescription(object, offset - 2);
372   - // The `offset` points to the next of "<<". Set the rewind offset to point to the
373   - // beginning of "<<". This has been explicitly tested with whitespace surrounding
374   - // the dictionary start delimiter. getLastOffset points to the dictionary end token
375   - // and therefore can't be used here.
376   - set_offset = true;
377   - }
378   - stack.pop_back();
379   - if (state_stack.back() == st_top) {
380   - done = true;
381   - } else {
382   - stack.back().olist.push_back(object);
383   - }
384 375 }
385 376 }
386 377  
... ...
libqpdf/qpdf/QPDFParser.hh
... ... @@ -31,7 +31,7 @@ class QPDFParser
31 31 QPDFObjectHandle parse(bool& empty, bool content_stream);
32 32  
33 33 private:
34   - enum parser_state_e { st_top, st_start, st_stop, st_dictionary, st_array };
  34 + enum parser_state_e { st_top, st_start, st_dictionary, st_array };
35 35  
36 36 bool tooManyBadTokens();
37 37 void warn(qpdf_offset_t offset, std::string const& msg) const;
... ...