Commit 37f7a734885f0d3c9dce64fbdb9a57192170686b

Authored by m-holger
1 parent 29cd8f4f

In QPDFParser::parse refactor handling of bad tokens

libqpdf/QPDFParser.cc
@@ -60,13 +60,10 @@ QPDFParser::parse(bool& empty, bool content_stream) @@ -60,13 +60,10 @@ QPDFParser::parse(bool& empty, bool content_stream)
60 state_stack.push_back(st_top); 60 state_stack.push_back(st_top);
61 qpdf_offset_t offset; 61 qpdf_offset_t offset;
62 bool done = false; 62 bool done = false;
63 - int bad_count = 0;  
64 - int good_count = 0;  
65 bool b_contents = false; 63 bool b_contents = false;
66 bool is_null = false; 64 bool is_null = false;
67 65
68 while (!done) { 66 while (!done) {
69 - bool bad = false;  
70 bool indirect_ref = false; 67 bool indirect_ref = false;
71 is_null = false; 68 is_null = false;
72 auto& frame = stack.back(); 69 auto& frame = stack.back();
@@ -80,6 +77,7 @@ QPDFParser::parse(bool& empty, bool content_stream) @@ -80,6 +77,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
80 if (!tokenizer.nextToken(*input, object_description)) { 77 if (!tokenizer.nextToken(*input, object_description)) {
81 warn(tokenizer.getErrorMessage()); 78 warn(tokenizer.getErrorMessage());
82 } 79 }
  80 + ++good_count; // optimistically
83 81
84 switch (tokenizer.getType()) { 82 switch (tokenizer.getType()) {
85 case QPDFTokenizer::tt_eof: 83 case QPDFTokenizer::tt_eof:
@@ -87,13 +85,14 @@ QPDFParser::parse(bool& empty, bool content_stream) @@ -87,13 +85,14 @@ QPDFParser::parse(bool& empty, bool content_stream)
87 QTC::TC("qpdf", "QPDFParser eof in parse"); 85 QTC::TC("qpdf", "QPDFParser eof in parse");
88 warn("unexpected EOF"); 86 warn("unexpected EOF");
89 } 87 }
90 - bad = true;  
91 state = st_eof; 88 state = st_eof;
92 break; 89 break;
93 90
94 case QPDFTokenizer::tt_bad: 91 case QPDFTokenizer::tt_bad:
95 QTC::TC("qpdf", "QPDFParser bad token in parse"); 92 QTC::TC("qpdf", "QPDFParser bad token in parse");
96 - bad = true; 93 + if (tooManyBadTokens()) {
  94 + return {QPDF_Null::create()};
  95 + }
97 is_null = true; 96 is_null = true;
98 break; 97 break;
99 98
@@ -101,7 +100,9 @@ QPDFParser::parse(bool& empty, bool content_stream) @@ -101,7 +100,9 @@ QPDFParser::parse(bool& empty, bool content_stream)
101 case QPDFTokenizer::tt_brace_close: 100 case QPDFTokenizer::tt_brace_close:
102 QTC::TC("qpdf", "QPDFParser bad brace"); 101 QTC::TC("qpdf", "QPDFParser bad brace");
103 warn("treating unexpected brace token as null"); 102 warn("treating unexpected brace token as null");
104 - bad = true; 103 + if (tooManyBadTokens()) {
  104 + return {QPDF_Null::create()};
  105 + }
105 is_null = true; 106 is_null = true;
106 break; 107 break;
107 108
@@ -111,7 +112,9 @@ QPDFParser::parse(bool& empty, bool content_stream) @@ -111,7 +112,9 @@ QPDFParser::parse(bool& empty, bool content_stream)
111 } else { 112 } else {
112 QTC::TC("qpdf", "QPDFParser bad array close"); 113 QTC::TC("qpdf", "QPDFParser bad array close");
113 warn("treating unexpected array close token as null"); 114 warn("treating unexpected array close token as null");
114 - bad = true; 115 + if (tooManyBadTokens()) {
  116 + return {QPDF_Null::create()};
  117 + }
115 is_null = true; 118 is_null = true;
116 } 119 }
117 break; 120 break;
@@ -122,7 +125,9 @@ QPDFParser::parse(bool& empty, bool content_stream) @@ -122,7 +125,9 @@ QPDFParser::parse(bool& empty, bool content_stream)
122 } else { 125 } else {
123 QTC::TC("qpdf", "QPDFParser bad dictionary close"); 126 QTC::TC("qpdf", "QPDFParser bad dictionary close");
124 warn("unexpected dictionary close token"); 127 warn("unexpected dictionary close token");
125 - bad = true; 128 + if (tooManyBadTokens()) {
  129 + return {QPDF_Null::create()};
  130 + }
126 is_null = true; 131 is_null = true;
127 } 132 }
128 break; 133 break;
@@ -132,7 +137,9 @@ QPDFParser::parse(bool& empty, bool content_stream) @@ -132,7 +137,9 @@ QPDFParser::parse(bool& empty, bool content_stream)
132 if (stack.size() > 500) { 137 if (stack.size() > 500) {
133 QTC::TC("qpdf", "QPDFParser too deep"); 138 QTC::TC("qpdf", "QPDFParser too deep");
134 warn("ignoring excessively deeply nested data structure"); 139 warn("ignoring excessively deeply nested data structure");
135 - bad = true; 140 + if (tooManyBadTokens()) {
  141 + return {QPDF_Null::create()};
  142 + }
136 is_null = true; 143 is_null = true;
137 state = st_top; 144 state = st_top;
138 } else { 145 } else {
@@ -217,7 +224,9 @@ QPDFParser::parse(bool& empty, bool content_stream) @@ -217,7 +224,9 @@ QPDFParser::parse(bool& empty, bool content_stream)
217 } else { 224 } else {
218 QTC::TC("qpdf", "QPDFParser treat word as string"); 225 QTC::TC("qpdf", "QPDFParser treat word as string");
219 warn("unknown token while reading object; treating as string"); 226 warn("unknown token while reading object; treating as string");
220 - bad = true; 227 + if (tooManyBadTokens()) {
  228 + return {QPDF_Null::create()};
  229 + }
221 object = QPDF_String::create(value); 230 object = QPDF_String::create(value);
222 } 231 }
223 } 232 }
@@ -239,12 +248,13 @@ QPDFParser::parse(bool& empty, bool content_stream) @@ -239,12 +248,13 @@ QPDFParser::parse(bool& empty, bool content_stream)
239 object = QPDF_String::create(val); 248 object = QPDF_String::create(val);
240 } 249 }
241 } 250 }
242 -  
243 break; 251 break;
244 252
245 default: 253 default:
246 warn("treating unknown token type as null while reading object"); 254 warn("treating unknown token type as null while reading object");
247 - bad = true; 255 + if (tooManyBadTokens()) {
  256 + return {QPDF_Null::create()};
  257 + }
248 is_null = true; 258 is_null = true;
249 break; 259 break;
250 } 260 }
@@ -255,23 +265,6 @@ QPDFParser::parse(bool& empty, bool content_stream) @@ -255,23 +265,6 @@ QPDFParser::parse(bool& empty, bool content_stream)
255 is_null = true; 265 is_null = true;
256 } 266 }
257 267
258 - if (bad) {  
259 - ++bad_count;  
260 - good_count = 0;  
261 - } else {  
262 - ++good_count;  
263 - if (good_count > 3) {  
264 - bad_count = 0;  
265 - }  
266 - }  
267 - if (bad_count > 5) {  
268 - // We had too many consecutive errors without enough intervening successful objects.  
269 - // Give up.  
270 - warn("too many errors; giving up on reading object");  
271 - state = st_top;  
272 - is_null = true;  
273 - }  
274 -  
275 switch (state) { 268 switch (state) {
276 case st_eof: 269 case st_eof:
277 if (state_stack.size() > 1) { 270 if (state_stack.size() > 1) {
@@ -412,6 +405,21 @@ QPDFParser::setDescription(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parse @@ -412,6 +405,21 @@ QPDFParser::setDescription(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parse
412 } 405 }
413 } 406 }
414 407
  408 +bool
  409 +QPDFParser::tooManyBadTokens()
  410 +{
  411 + if (good_count <= 4) {
  412 + if (++bad_count > 5) {
  413 + warn("too many errors; giving up on reading object");
  414 + return true;
  415 + }
  416 + } else {
  417 + bad_count = 1;
  418 + }
  419 + good_count = 0;
  420 + return false;
  421 +}
  422 +
415 void 423 void
416 QPDFParser::warn(QPDFExc const& e) const 424 QPDFParser::warn(QPDFExc const& e) const
417 { 425 {
libqpdf/qpdf/QPDFParser.hh
@@ -33,6 +33,7 @@ class QPDFParser @@ -33,6 +33,7 @@ class QPDFParser
33 private: 33 private:
34 enum parser_state_e { st_top, st_start, st_stop, st_eof, st_dictionary, st_array }; 34 enum parser_state_e { st_top, st_start, st_stop, st_eof, st_dictionary, st_array };
35 35
  36 + bool tooManyBadTokens();
36 void warn(qpdf_offset_t offset, std::string const& msg) const; 37 void warn(qpdf_offset_t offset, std::string const& msg) const;
37 void warn(std::string const& msg) const; 38 void warn(std::string const& msg) const;
38 void warn(QPDFExc const&) const; 39 void warn(QPDFExc const&) const;
@@ -43,6 +44,10 @@ class QPDFParser @@ -43,6 +44,10 @@ class QPDFParser
43 QPDFObjectHandle::StringDecrypter* decrypter; 44 QPDFObjectHandle::StringDecrypter* decrypter;
44 QPDF* context; 45 QPDF* context;
45 std::shared_ptr<QPDFValue::Description> description; 46 std::shared_ptr<QPDFValue::Description> description;
  47 + // Number of recent bad tokens.
  48 + int bad_count = 0;
  49 + // Number of good tokens since last bad token. Irrelevant if bad_count == 0.
  50 + int good_count = 0;
46 }; 51 };
47 52
48 #endif // QPDFPARSER_HH 53 #endif // QPDFPARSER_HH