Commit 90829228b814c6fe3ea3192da34db90dc1e36843

Authored by m-holger
1 parent 12837f14

In QPDFParser::parse refactor handling of dict_close tokens

libqpdf/QPDFParser.cc
@@ -138,7 +138,80 @@ QPDFParser::parse(bool& empty, bool content_stream) @@ -138,7 +138,80 @@ QPDFParser::parse(bool& empty, bool content_stream)
138 138
139 case QPDFTokenizer::tt_dict_close: 139 case QPDFTokenizer::tt_dict_close:
140 if (state == st_dictionary) { 140 if (state == st_dictionary) {
141 - state = st_stop; 141 + if ((state_stack.size() < 2) || (stack.size() < 2)) {
  142 + throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with "
  143 + "insufficient elements in stack");
  144 + }
  145 +
  146 + // Convert list to map. Alternating elements are keys. Attempt to recover more or
  147 + // less gracefully from invalid dictionaries.
  148 + std::set<std::string> names;
  149 + for (auto& obj: olist) {
  150 + if (obj) {
  151 + if (obj->getTypeCode() == ::ot_name) {
  152 + names.insert(obj->getStringValue());
  153 + }
  154 + }
  155 + }
  156 +
  157 + std::map<std::string, QPDFObjectHandle> dict;
  158 + int next_fake_key = 1;
  159 + for (auto iter = olist.begin(); iter != olist.end();) {
  160 + // Calculate key.
  161 + std::string key;
  162 + if (*iter && (*iter)->getTypeCode() == ::ot_name) {
  163 + key = (*iter)->getStringValue();
  164 + ++iter;
  165 + } else {
  166 + for (bool found_fake = false; !found_fake;) {
  167 + key = "/QPDFFake" + std::to_string(next_fake_key++);
  168 + found_fake = (names.count(key) == 0);
  169 + QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
  170 + }
  171 + warn(
  172 + offset,
  173 + "expected dictionary key but found non-name object; inserting key " +
  174 + key);
  175 + }
  176 + if (dict.count(key) > 0) {
  177 + QTC::TC("qpdf", "QPDFParser duplicate dict key");
  178 + warn(
  179 + offset,
  180 + "dictionary has duplicated key " + key +
  181 + "; last occurrence overrides earlier ones");
  182 + }
  183 +
  184 + // Calculate value.
  185 + std::shared_ptr<QPDFObject> val;
  186 + if (iter != olist.end()) {
  187 + val = *iter;
  188 + ++iter;
  189 + } else {
  190 + QTC::TC("qpdf", "QPDFParser no val for last key");
  191 + warn(
  192 + offset,
  193 + "dictionary ended prematurely; using null as value for last key");
  194 + val = QPDF_Null::create();
  195 + }
  196 +
  197 + dict[std::move(key)] = std::move(val);
  198 + }
  199 + if (!frame.contents_string.empty() && dict.count("/Type") &&
  200 + dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") &&
  201 + dict.count("/Contents") && dict["/Contents"].isString()) {
  202 + dict["/Contents"] = QPDFObjectHandle::newString(frame.contents_string);
  203 + dict["/Contents"].setParsedOffset(frame.contents_offset);
  204 + }
  205 + object = QPDF_Dictionary::create(std::move(dict));
  206 + setDescription(object, offset - 2);
  207 + // The `offset` points to the next of "<<". Set the rewind offset to point to the
  208 + // beginning of "<<". This has been explicitly tested with whitespace surrounding
  209 + // the dictionary start delimiter. getLastOffset points to the dictionary end token
  210 + // and therefore can't be used here.
  211 + set_offset = true;
  212 + state_stack.pop_back();
  213 + state = state_stack.back();
  214 + stack.pop_back();
142 } else { 215 } else {
143 QTC::TC("qpdf", "QPDFParser bad dictionary close"); 216 QTC::TC("qpdf", "QPDFParser bad dictionary close");
144 warn("unexpected dictionary close token"); 217 warn("unexpected dictionary close token");
@@ -276,7 +349,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -276,7 +349,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
276 break; 349 break;
277 } 350 }
278 351
279 - if (object == nullptr && !is_null && (!(state == st_start || state == st_stop))) { 352 + if (object == nullptr && !is_null && state != st_start) {
280 throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object"); 353 throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object");
281 } 354 }
282 355
@@ -299,88 +372,6 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -299,88 +372,6 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
299 372
300 case st_start: 373 case st_start:
301 break; 374 break;
302 -  
303 - case st_stop:  
304 - if ((state_stack.size() < 2) || (stack.size() < 2)) {  
305 - throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with "  
306 - "insufficient elements in stack");  
307 - }  
308 - parser_state_e old_state = state_stack.back();  
309 - state_stack.pop_back();  
310 - if (old_state == st_dictionary) {  
311 - // Convert list to map. Alternating elements are keys. Attempt to recover more or  
312 - // less gracefully from invalid dictionaries.  
313 - std::set<std::string> names;  
314 - for (auto& obj: olist) {  
315 - if (obj) {  
316 - if (obj->getTypeCode() == ::ot_name) {  
317 - names.insert(obj->getStringValue());  
318 - }  
319 - }  
320 - }  
321 -  
322 - std::map<std::string, QPDFObjectHandle> dict;  
323 - int next_fake_key = 1;  
324 - for (auto iter = olist.begin(); iter != olist.end();) {  
325 - // Calculate key.  
326 - std::string key;  
327 - if (*iter && (*iter)->getTypeCode() == ::ot_name) {  
328 - key = (*iter)->getStringValue();  
329 - ++iter;  
330 - } else {  
331 - for (bool found_fake = false; !found_fake;) {  
332 - key = "/QPDFFake" + std::to_string(next_fake_key++);  
333 - found_fake = (names.count(key) == 0);  
334 - QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));  
335 - }  
336 - warn(  
337 - offset,  
338 - "expected dictionary key but found non-name object; inserting key " +  
339 - key);  
340 - }  
341 - if (dict.count(key) > 0) {  
342 - QTC::TC("qpdf", "QPDFParser duplicate dict key");  
343 - warn(  
344 - offset,  
345 - "dictionary has duplicated key " + key +  
346 - "; last occurrence overrides earlier ones");  
347 - }  
348 -  
349 - // Calculate value.  
350 - std::shared_ptr<QPDFObject> val;  
351 - if (iter != olist.end()) {  
352 - val = *iter;  
353 - ++iter;  
354 - } else {  
355 - QTC::TC("qpdf", "QPDFParser no val for last key");  
356 - warn(  
357 - offset,  
358 - "dictionary ended prematurely; using null as value for last key");  
359 - val = QPDF_Null::create();  
360 - }  
361 -  
362 - dict[std::move(key)] = std::move(val);  
363 - }  
364 - if (!frame.contents_string.empty() && dict.count("/Type") &&  
365 - dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") &&  
366 - dict.count("/Contents") && dict["/Contents"].isString()) {  
367 - dict["/Contents"] = QPDFObjectHandle::newString(frame.contents_string);  
368 - dict["/Contents"].setParsedOffset(frame.contents_offset);  
369 - }  
370 - object = QPDF_Dictionary::create(std::move(dict));  
371 - setDescription(object, offset - 2);  
372 - // The `offset` points to the next of "<<". Set the rewind offset to point to the  
373 - // beginning of "<<". This has been explicitly tested with whitespace surrounding  
374 - // the dictionary start delimiter. getLastOffset points to the dictionary end token  
375 - // and therefore can't be used here.  
376 - set_offset = true;  
377 - }  
378 - stack.pop_back();  
379 - if (state_stack.back() == st_top) {  
380 - done = true;  
381 - } else {  
382 - stack.back().olist.push_back(object);  
383 - }  
384 } 375 }
385 } 376 }
386 377
libqpdf/qpdf/QPDFParser.hh
@@ -31,7 +31,7 @@ class QPDFParser @@ -31,7 +31,7 @@ class QPDFParser
31 QPDFObjectHandle parse(bool& empty, bool content_stream); 31 QPDFObjectHandle parse(bool& empty, bool content_stream);
32 32
33 private: 33 private:
34 - enum parser_state_e { st_top, st_start, st_stop, st_dictionary, st_array }; 34 + enum parser_state_e { st_top, st_start, st_dictionary, st_array };
35 35
36 bool tooManyBadTokens(); 36 bool tooManyBadTokens();
37 void warn(qpdf_offset_t offset, std::string const& msg) const; 37 void warn(qpdf_offset_t offset, std::string const& msg) const;