Commit 37f7a734885f0d3c9dce64fbdb9a57192170686b

Authored by m-holger
1 parent 29cd8f4f

In QPDFParser::parse refactor handling of bad tokens

libqpdf/QPDFParser.cc
... ... @@ -60,13 +60,10 @@ QPDFParser::parse(bool& empty, bool content_stream)
60 60 state_stack.push_back(st_top);
61 61 qpdf_offset_t offset;
62 62 bool done = false;
63   - int bad_count = 0;
64   - int good_count = 0;
65 63 bool b_contents = false;
66 64 bool is_null = false;
67 65  
68 66 while (!done) {
69   - bool bad = false;
70 67 bool indirect_ref = false;
71 68 is_null = false;
72 69 auto& frame = stack.back();
... ... @@ -80,6 +77,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
80 77 if (!tokenizer.nextToken(*input, object_description)) {
81 78 warn(tokenizer.getErrorMessage());
82 79 }
  80 + ++good_count; // optimistically
83 81  
84 82 switch (tokenizer.getType()) {
85 83 case QPDFTokenizer::tt_eof:
... ... @@ -87,13 +85,14 @@ QPDFParser::parse(bool& empty, bool content_stream)
87 85 QTC::TC("qpdf", "QPDFParser eof in parse");
88 86 warn("unexpected EOF");
89 87 }
90   - bad = true;
91 88 state = st_eof;
92 89 break;
93 90  
94 91 case QPDFTokenizer::tt_bad:
95 92 QTC::TC("qpdf", "QPDFParser bad token in parse");
96   - bad = true;
  93 + if (tooManyBadTokens()) {
  94 + return {QPDF_Null::create()};
  95 + }
97 96 is_null = true;
98 97 break;
99 98  
... ... @@ -101,7 +100,9 @@ QPDFParser::parse(bool& empty, bool content_stream)
101 100 case QPDFTokenizer::tt_brace_close:
102 101 QTC::TC("qpdf", "QPDFParser bad brace");
103 102 warn("treating unexpected brace token as null");
104   - bad = true;
  103 + if (tooManyBadTokens()) {
  104 + return {QPDF_Null::create()};
  105 + }
105 106 is_null = true;
106 107 break;
107 108  
... ... @@ -111,7 +112,9 @@ QPDFParser::parse(bool& empty, bool content_stream)
111 112 } else {
112 113 QTC::TC("qpdf", "QPDFParser bad array close");
113 114 warn("treating unexpected array close token as null");
114   - bad = true;
  115 + if (tooManyBadTokens()) {
  116 + return {QPDF_Null::create()};
  117 + }
115 118 is_null = true;
116 119 }
117 120 break;
... ... @@ -122,7 +125,9 @@ QPDFParser::parse(bool& empty, bool content_stream)
122 125 } else {
123 126 QTC::TC("qpdf", "QPDFParser bad dictionary close");
124 127 warn("unexpected dictionary close token");
125   - bad = true;
  128 + if (tooManyBadTokens()) {
  129 + return {QPDF_Null::create()};
  130 + }
126 131 is_null = true;
127 132 }
128 133 break;
... ... @@ -132,7 +137,9 @@ QPDFParser::parse(bool& empty, bool content_stream)
132 137 if (stack.size() > 500) {
133 138 QTC::TC("qpdf", "QPDFParser too deep");
134 139 warn("ignoring excessively deeply nested data structure");
135   - bad = true;
  140 + if (tooManyBadTokens()) {
  141 + return {QPDF_Null::create()};
  142 + }
136 143 is_null = true;
137 144 state = st_top;
138 145 } else {
... ... @@ -217,7 +224,9 @@ QPDFParser::parse(bool& empty, bool content_stream)
217 224 } else {
218 225 QTC::TC("qpdf", "QPDFParser treat word as string");
219 226 warn("unknown token while reading object; treating as string");
220   - bad = true;
  227 + if (tooManyBadTokens()) {
  228 + return {QPDF_Null::create()};
  229 + }
221 230 object = QPDF_String::create(value);
222 231 }
223 232 }
... ... @@ -239,12 +248,13 @@ QPDFParser::parse(bool& empty, bool content_stream)
239 248 object = QPDF_String::create(val);
240 249 }
241 250 }
242   -
243 251 break;
244 252  
245 253 default:
246 254 warn("treating unknown token type as null while reading object");
247   - bad = true;
  255 + if (tooManyBadTokens()) {
  256 + return {QPDF_Null::create()};
  257 + }
248 258 is_null = true;
249 259 break;
250 260 }
... ... @@ -255,23 +265,6 @@ QPDFParser::parse(bool& empty, bool content_stream)
255 265 is_null = true;
256 266 }
257 267  
258   - if (bad) {
259   - ++bad_count;
260   - good_count = 0;
261   - } else {
262   - ++good_count;
263   - if (good_count > 3) {
264   - bad_count = 0;
265   - }
266   - }
267   - if (bad_count > 5) {
268   - // We had too many consecutive errors without enough intervening successful objects.
269   - // Give up.
270   - warn("too many errors; giving up on reading object");
271   - state = st_top;
272   - is_null = true;
273   - }
274   -
275 268 switch (state) {
276 269 case st_eof:
277 270 if (state_stack.size() > 1) {
... ... @@ -412,6 +405,21 @@ QPDFParser::setDescription(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parse
412 405 }
413 406 }
414 407  
  408 +bool
  409 +QPDFParser::tooManyBadTokens()
  410 +{
  411 + if (good_count <= 4) {
  412 + if (++bad_count > 5) {
  413 + warn("too many errors; giving up on reading object");
  414 + return true;
  415 + }
  416 + } else {
  417 + bad_count = 1;
  418 + }
  419 + good_count = 0;
  420 + return false;
  421 +}
  422 +
415 423 void
416 424 QPDFParser::warn(QPDFExc const& e) const
417 425 {
... ...
libqpdf/qpdf/QPDFParser.hh
... ... @@ -33,6 +33,7 @@ class QPDFParser
33 33 private:
34 34 enum parser_state_e { st_top, st_start, st_stop, st_eof, st_dictionary, st_array };
35 35  
  36 + bool tooManyBadTokens();
36 37 void warn(qpdf_offset_t offset, std::string const& msg) const;
37 38 void warn(std::string const& msg) const;
38 39 void warn(QPDFExc const&) const;
... ... @@ -43,6 +44,10 @@ class QPDFParser
43 44 QPDFObjectHandle::StringDecrypter* decrypter;
44 45 QPDF* context;
45 46 std::shared_ptr<QPDFValue::Description> description;
  47 + // Number of recent bad tokens.
  48 + int bad_count = 0;
  49 + // Number of good tokens since last bad token. Irrelevant if bad_count == 0.
  50 + int good_count = 0;
46 51 };
47 52  
48 53 #endif // QPDFPARSER_HH
... ...