Commit 172cc6130583d3c30df3fcea22528afca4b12e5f

Authored by m-holger
1 parent 5a1bf035

Remove redundant code in QPDFParser::parse and parseRemainder

Also, fix test cases.
libqpdf/QPDFParser.cc
@@ -21,7 +21,6 @@ @@ -21,7 +21,6 @@
21 21
22 #include <memory> 22 #include <memory>
23 23
24 -  
25 QPDFObjectHandle 24 QPDFObjectHandle
26 QPDFParser::parse(bool& empty, bool content_stream) 25 QPDFParser::parse(bool& empty, bool content_stream)
27 { 26 {
@@ -30,327 +29,110 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -30,327 +29,110 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
30 // effect of reading the object and changing the file pointer. If you do this, it will cause a 29 // effect of reading the object and changing the file pointer. If you do this, it will cause a
31 // logic error to be thrown from QPDF::inParse(). 30 // logic error to be thrown from QPDF::inParse().
32 31
33 - const static std::shared_ptr<QPDFObject> null_oh = QPDF_Null::create();  
34 QPDF::ParseGuard pg(context); 32 QPDF::ParseGuard pg(context);
35 -  
36 empty = false; 33 empty = false;
37 34
38 std::shared_ptr<QPDFObject> object; 35 std::shared_ptr<QPDFObject> object;
39 - bool set_offset = false;  
40 -  
41 -// std::vector<StackFrame> stack{{input, st_top}};  
42 - stack.clear(); // NEW  
43 - stack.emplace_back(input, st_top); // NEW  
44 - bool done = false;  
45 - bool b_contents = false;  
46 - bool is_null = false;  
47 - frame = &stack.back(); // CHANGED 36 + stack.clear();
  37 + stack.emplace_back(input, st_top);
  38 + frame = &stack.back();
  39 + object = nullptr;
48 40
49 - while (!done) {  
50 - bool indirect_ref = false;  
51 - is_null = false;  
52 - object = nullptr;  
53 - set_offset = false; 41 + if (!tokenizer.nextToken(*input, object_description)) {
  42 + warn(tokenizer.getErrorMessage());
  43 + }
54 44
55 - if (!tokenizer.nextToken(*input, object_description)) {  
56 - warn(tokenizer.getErrorMessage()); 45 + switch (tokenizer.getType()) {
  46 + case QPDFTokenizer::tt_eof:
  47 + if (content_stream) {
  48 + // In content stream mode, leave object uninitialized to indicate EOF
  49 + return {};
57 } 50 }
58 - ++good_count; // optimistically  
59 -  
60 - switch (tokenizer.getType()) {  
61 - case QPDFTokenizer::tt_eof:  
62 - if (stack.size() > 1) {  
63 - warn("parse error while reading object");  
64 - } 51 + QTC::TC("qpdf", "QPDFParser eof in parse");
  52 + warn("unexpected EOF");
  53 + return {QPDF_Null::create()};
  54 +
  55 + case QPDFTokenizer::tt_bad:
  56 + QTC::TC("qpdf", "QPDFParser bad token in parse");
  57 + return {QPDF_Null::create()};
  58 +
  59 + case QPDFTokenizer::tt_brace_open:
  60 + case QPDFTokenizer::tt_brace_close:
  61 + QTC::TC("qpdf", "QPDFParser bad brace");
  62 + warn("treating unexpected brace token as null");
  63 + return {QPDF_Null::create()};
  64 +
  65 + case QPDFTokenizer::tt_array_close:
  66 + QTC::TC("qpdf", "QPDFParser bad array close");
  67 + warn("treating unexpected array close token as null");
  68 + return {QPDF_Null::create()};
  69 +
  70 + case QPDFTokenizer::tt_dict_close:
  71 + QTC::TC("qpdf", "QPDFParser bad dictionary close");
  72 + warn("unexpected dictionary close token");
  73 + return {QPDF_Null::create()};
  74 +
  75 + case QPDFTokenizer::tt_array_open:
  76 + case QPDFTokenizer::tt_dict_open:
  77 + stack.emplace_back(
  78 + input,
  79 + (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary);
  80 + return parseRemainder(content_stream);
  81 +
  82 + case QPDFTokenizer::tt_bool:
  83 + object = QPDF_Bool::create((tokenizer.getValue() == "true"));
  84 + break;
  85 +
  86 + case QPDFTokenizer::tt_null:
  87 + return {QPDF_Null::create()};
  88 +
  89 + case QPDFTokenizer::tt_integer:
  90 + object = QPDF_Integer::create(QUtil::string_to_ll(tokenizer.getValue().c_str()));
  91 + break;
  92 +
  93 + case QPDFTokenizer::tt_real:
  94 + object = QPDF_Real::create(tokenizer.getValue());
  95 + break;
  96 +
  97 + case QPDFTokenizer::tt_name:
  98 + object = QPDF_Name::create(tokenizer.getValue());
  99 + break;
  100 +
  101 + case QPDFTokenizer::tt_word:
  102 + {
  103 + auto const& value = tokenizer.getValue();
65 if (content_stream) { 104 if (content_stream) {
66 - // In content stream mode, leave object uninitialized to indicate EOF  
67 - return {};  
68 - }  
69 -// QTC::TC("qpdf", "QPDFParser eof in parse");  
70 - warn("unexpected EOF");  
71 - return {QPDF_Null::create()};  
72 -  
73 - case QPDFTokenizer::tt_bad:  
74 -// QTC::TC("qpdf", "QPDFParser bad token in parse");  
75 - if (tooManyBadTokens()) {  
76 - return {QPDF_Null::create()};  
77 - }  
78 - is_null = true;  
79 - break;  
80 -  
81 - case QPDFTokenizer::tt_brace_open:  
82 - case QPDFTokenizer::tt_brace_close:  
83 -// QTC::TC("qpdf", "QPDFParser bad brace");  
84 - warn("treating unexpected brace token as null");  
85 - if (tooManyBadTokens()) {  
86 - return {QPDF_Null::create()};  
87 - }  
88 - is_null = true;  
89 - break;  
90 -  
91 - case QPDFTokenizer::tt_array_close:  
92 - if (frame->state == st_array) {  
93 - if (stack.size() < 2) {  
94 - throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with "  
95 - "insufficient elements in stack");  
96 - }  
97 - object = QPDF_Array::create(std::move(frame->olist), frame->null_count > 100);  
98 - setDescription(object, frame->offset - 1);  
99 - // The `offset` points to the next of "[". Set the rewind offset to point to the  
100 - // beginning of "[". This has been explicitly tested with whitespace surrounding the  
101 - // array start delimiter. getLastOffset points to the array end token and therefore  
102 - // can't be used here.  
103 - set_offset = true;  
104 - stack.pop_back();  
105 - frame = &stack.back();  
106 - } else {  
107 -// QTC::TC("qpdf", "QPDFParser bad array close");  
108 - warn("treating unexpected array close token as null");  
109 - if (tooManyBadTokens()) {  
110 - return {QPDF_Null::create()};  
111 - }  
112 - is_null = true;  
113 - }  
114 - break;  
115 -  
116 - case QPDFTokenizer::tt_dict_close:  
117 - if (frame->state == st_dictionary) {  
118 - if (stack.size() < 2) {  
119 - throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with "  
120 - "insufficient elements in stack");  
121 - }  
122 -  
123 - // Convert list to map. Alternating elements are keys. Attempt to recover more or  
124 - // less gracefully from invalid dictionaries.  
125 - std::set<std::string> names;  
126 - for (auto& obj: frame->olist) {  
127 - if (obj) {  
128 - if (obj->getTypeCode() == ::ot_name) {  
129 - names.insert(obj->getStringValue());  
130 - }  
131 - }  
132 - }  
133 -  
134 - std::map<std::string, QPDFObjectHandle> dict;  
135 - int next_fake_key = 1;  
136 - for (auto iter = frame->olist.begin(); iter != frame->olist.end();) {  
137 - // Calculate key.  
138 - std::string key;  
139 - if (*iter && (*iter)->getTypeCode() == ::ot_name) {  
140 - key = (*iter)->getStringValue();  
141 - ++iter;  
142 - } else {  
143 - for (bool found_fake = false; !found_fake;) {  
144 - key = "/QPDFFake" + std::to_string(next_fake_key++);  
145 - found_fake = (names.count(key) == 0);  
146 -// QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));  
147 - }  
148 - warn(  
149 - frame->offset,  
150 - "expected dictionary key but found non-name object; inserting key " +  
151 - key);  
152 - }  
153 - if (dict.count(key) > 0) {  
154 -// QTC::TC("qpdf", "QPDFParser duplicate dict key");  
155 - warn(  
156 - frame->offset,  
157 - "dictionary has duplicated key " + key +  
158 - "; last occurrence overrides earlier ones");  
159 - }  
160 -  
161 - // Calculate value.  
162 - std::shared_ptr<QPDFObject> val;  
163 - if (iter != frame->olist.end()) {  
164 - val = *iter;  
165 - ++iter;  
166 - } else {  
167 -// QTC::TC("qpdf", "QPDFParser no val for last key");  
168 - warn(  
169 - frame->offset,  
170 - "dictionary ended prematurely; using null as value for last key");  
171 - val = QPDF_Null::create();  
172 - }  
173 -  
174 - dict[std::move(key)] = std::move(val);  
175 - }  
176 - if (!frame->contents_string.empty() && dict.count("/Type") &&  
177 - dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") &&  
178 - dict.count("/Contents") && dict["/Contents"].isString()) {  
179 - dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string);  
180 - dict["/Contents"].setParsedOffset(frame->contents_offset);  
181 - }  
182 - object = QPDF_Dictionary::create(std::move(dict));  
183 - setDescription(object, frame->offset - 2);  
184 - // The `offset` points to the next of "<<". Set the rewind offset to point to the  
185 - // beginning of "<<". This has been explicitly tested with whitespace surrounding  
186 - // the dictionary start delimiter. getLastOffset points to the dictionary end token  
187 - // and therefore can't be used here.  
188 - set_offset = true;  
189 - stack.pop_back();  
190 - frame = &stack.back();  
191 - } else {  
192 -// QTC::TC("qpdf", "QPDFParser bad dictionary close");  
193 - warn("unexpected dictionary close token");  
194 - if (tooManyBadTokens()) {  
195 - return {QPDF_Null::create()};  
196 - }  
197 - is_null = true;  
198 - }  
199 - break;  
200 -  
201 - case QPDFTokenizer::tt_array_open:  
202 - case QPDFTokenizer::tt_dict_open:  
203 - if (stack.size() > 500) {  
204 -// QTC::TC("qpdf", "QPDFParser too deep");  
205 - warn("ignoring excessively deeply nested data structure"); 105 + object = QPDF_Operator::create(value);
  106 + } else if (value == "endobj") {
  107 + // We just saw endobj without having read anything. Treat this as a null and do
  108 + // not move the input source's offset.
  109 + input->seek(input->getLastOffset(), SEEK_SET);
  110 + empty = true;
206 return {QPDF_Null::create()}; 111 return {QPDF_Null::create()};
207 } else { 112 } else {
208 - b_contents = false;  
209 - stack.emplace_back(  
210 - input,  
211 - (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array  
212 - : st_dictionary);  
213 - frame = &stack.back();  
214 - return parseRemainder(content_stream); // NEW  
215 - continue;  
216 - }  
217 -  
218 - case QPDFTokenizer::tt_bool:  
219 - object = QPDF_Bool::create((tokenizer.getValue() == "true"));  
220 - break;  
221 -  
222 - case QPDFTokenizer::tt_null:  
223 - is_null = true;  
224 - ++frame->null_count;  
225 -  
226 - break;  
227 -  
228 - case QPDFTokenizer::tt_integer:  
229 - object = QPDF_Integer::create(QUtil::string_to_ll(tokenizer.getValue().c_str()));  
230 - break;  
231 -  
232 - case QPDFTokenizer::tt_real:  
233 - object = QPDF_Real::create(tokenizer.getValue());  
234 - break;  
235 -  
236 - case QPDFTokenizer::tt_name:  
237 - {  
238 - auto const& name = tokenizer.getValue();  
239 - object = QPDF_Name::create(name);  
240 -  
241 - if (name == "/Contents") {  
242 - b_contents = true;  
243 - } else {  
244 - b_contents = false;  
245 - }  
246 - }  
247 - break;  
248 -  
249 - case QPDFTokenizer::tt_word:  
250 - {  
251 - auto const& value = tokenizer.getValue();  
252 - auto size = frame->olist.size();  
253 - if (content_stream) {  
254 - object = QPDF_Operator::create(value);  
255 - } else if (  
256 - value == "R" && frame->state != st_top && size >= 2 && frame->olist.back() &&  
257 - frame->olist.back()->getTypeCode() == ::ot_integer &&  
258 - !frame->olist.back()->getObjGen().isIndirect() && frame->olist.at(size - 2) &&  
259 - frame->olist.at(size - 2)->getTypeCode() == ::ot_integer &&  
260 - !frame->olist.at(size - 2)->getObjGen().isIndirect()) {  
261 - if (context == nullptr) {  
262 -// QTC::TC("qpdf", "QPDFParser indirect without context");  
263 - throw std::logic_error("QPDFObjectHandle::parse called without context on "  
264 - "an object with indirect references");  
265 - }  
266 - auto ref_og = QPDFObjGen(  
267 - QPDFObjectHandle(frame->olist.at(size - 2)).getIntValueAsInt(),  
268 - QPDFObjectHandle(frame->olist.back()).getIntValueAsInt());  
269 - if (ref_og.isIndirect()) {  
270 - // This action has the desirable side effect of causing dangling references  
271 - // (references to indirect objects that don't appear in the PDF) in any  
272 - // parsed object to appear in the object cache.  
273 - object = context->getObject(ref_og).obj;  
274 - indirect_ref = true;  
275 - } else {  
276 -// QTC::TC("qpdf", "QPDFParser indirect with 0 objid");  
277 - is_null = true;  
278 - }  
279 - frame->olist.pop_back();  
280 - frame->olist.pop_back();  
281 - } else if ((value == "endobj") && (frame->state == st_top)) {  
282 - // We just saw endobj without having read anything. Treat this as a null and do  
283 - // not move the input source's offset.  
284 - is_null = true;  
285 - input->seek(input->getLastOffset(), SEEK_SET);  
286 - empty = true;  
287 - } else {  
288 -// QTC::TC("qpdf", "QPDFParser treat word as string");  
289 - warn("unknown token while reading object; treating as string");  
290 - if (tooManyBadTokens()) {  
291 - return {QPDF_Null::create()};  
292 - }  
293 - object = QPDF_String::create(value);  
294 - }  
295 - }  
296 - break;  
297 -  
298 - case QPDFTokenizer::tt_string:  
299 - {  
300 - auto const& val = tokenizer.getValue();  
301 - if (decrypter) {  
302 - if (b_contents) {  
303 - frame->contents_string = val;  
304 - frame->contents_offset = input->getLastOffset();  
305 - b_contents = false;  
306 - }  
307 - std::string s{val};  
308 - decrypter->decryptString(s);  
309 - object = QPDF_String::create(s);  
310 - } else {  
311 - object = QPDF_String::create(val);  
312 - } 113 + QTC::TC("qpdf", "QPDFParser treat word as string");
  114 + warn("unknown token while reading object; treating as string");
  115 + object = QPDF_String::create(value);
313 } 116 }
314 - break;  
315 -  
316 - default:  
317 - warn("treating unknown token type as null while reading object");  
318 - if (tooManyBadTokens()) {  
319 - return {QPDF_Null::create()};  
320 - }  
321 - is_null = true;  
322 - break;  
323 } 117 }
324 -  
325 - if (object == nullptr && !is_null) {  
326 - throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object"); 118 + break;
  119 +
  120 + case QPDFTokenizer::tt_string:
  121 + if (decrypter) {
  122 + std::string s{tokenizer.getValue()};
  123 + decrypter->decryptString(s);
  124 + object = QPDF_String::create(s);
  125 + } else {
  126 + object = QPDF_String::create(tokenizer.getValue());
327 } 127 }
  128 + break;
328 129
329 - switch (frame->state) {  
330 - case st_dictionary:  
331 - case st_array:  
332 - if (is_null) {  
333 - object = null_oh;  
334 - // No need to set description for direct nulls - they probably will become implicit.  
335 - } else if (!indirect_ref && !set_offset) {  
336 - setDescription(object, input->getLastOffset());  
337 - }  
338 - set_offset = true;  
339 - frame->olist.push_back(object);  
340 - break;  
341 -  
342 - case st_top:  
343 - done = true;  
344 - break;  
345 - } 130 + default:
  131 + warn("treating unknown token type as null while reading object");
  132 + return {QPDF_Null::create()};
346 } 133 }
347 134
348 - if (is_null) {  
349 - object = QPDF_Null::create();  
350 - }  
351 - if (!set_offset) {  
352 - setDescription(object, frame->offset);  
353 - } 135 + setDescription(object, frame->offset);
354 return object; 136 return object;
355 } 137 }
356 138
@@ -363,18 +145,15 @@ QPDFParser::parseRemainder(bool content_stream) @@ -363,18 +145,15 @@ QPDFParser::parseRemainder(bool content_stream)
363 // logic error to be thrown from QPDF::inParse(). 145 // logic error to be thrown from QPDF::inParse().
364 146
365 const static std::shared_ptr<QPDFObject> null_oh = QPDF_Null::create(); 147 const static std::shared_ptr<QPDFObject> null_oh = QPDF_Null::create();
366 -// QPDF::ParseGuard pg(context);  
367 -  
368 -// empty = false;  
369 148
370 std::shared_ptr<QPDFObject> object; 149 std::shared_ptr<QPDFObject> object;
371 bool set_offset = false; 150 bool set_offset = false;
372 151
373 -// std::vector<StackFrame> stack{{input, st_top},};  
374 bool done = false; 152 bool done = false;
375 bool b_contents = false; 153 bool b_contents = false;
376 bool is_null = false; 154 bool is_null = false;
377 frame = &stack.back(); // CHANGED 155 frame = &stack.back(); // CHANGED
  156 + bad_count = 0;
378 157
379 while (!done) { 158 while (!done) {
380 bool indirect_ref = false; 159 bool indirect_ref = false;
@@ -389,19 +168,17 @@ QPDFParser::parseRemainder(bool content_stream) @@ -389,19 +168,17 @@ QPDFParser::parseRemainder(bool content_stream)
389 168
390 switch (tokenizer.getType()) { 169 switch (tokenizer.getType()) {
391 case QPDFTokenizer::tt_eof: 170 case QPDFTokenizer::tt_eof:
392 - if (stack.size() > 1) {  
393 - warn("parse error while reading object");  
394 - } 171 + warn("parse error while reading object");
395 if (content_stream) { 172 if (content_stream) {
396 // In content stream mode, leave object uninitialized to indicate EOF 173 // In content stream mode, leave object uninitialized to indicate EOF
397 return {}; 174 return {};
398 } 175 }
399 - QTC::TC("qpdf", "QPDFParser eof in parse"); 176 + QTC::TC("qpdf", "QPDFParser eof in parseRemainder");
400 warn("unexpected EOF"); 177 warn("unexpected EOF");
401 return {QPDF_Null::create()}; 178 return {QPDF_Null::create()};
402 179
403 case QPDFTokenizer::tt_bad: 180 case QPDFTokenizer::tt_bad:
404 - QTC::TC("qpdf", "QPDFParser bad token in parse"); 181 + QTC::TC("qpdf", "QPDFParser bad token in parseRemainder");
405 if (tooManyBadTokens()) { 182 if (tooManyBadTokens()) {
406 return {QPDF_Null::create()}; 183 return {QPDF_Null::create()};
407 } 184 }
@@ -410,7 +187,7 @@ QPDFParser::parseRemainder(bool content_stream) @@ -410,7 +187,7 @@ QPDFParser::parseRemainder(bool content_stream)
410 187
411 case QPDFTokenizer::tt_brace_open: 188 case QPDFTokenizer::tt_brace_open:
412 case QPDFTokenizer::tt_brace_close: 189 case QPDFTokenizer::tt_brace_close:
413 - QTC::TC("qpdf", "QPDFParser bad brace"); 190 + QTC::TC("qpdf", "QPDFParser bad brace in parseRemainder");
414 warn("treating unexpected brace token as null"); 191 warn("treating unexpected brace token as null");
415 if (tooManyBadTokens()) { 192 if (tooManyBadTokens()) {
416 return {QPDF_Null::create()}; 193 return {QPDF_Null::create()};
@@ -434,7 +211,7 @@ QPDFParser::parseRemainder(bool content_stream) @@ -434,7 +211,7 @@ QPDFParser::parseRemainder(bool content_stream)
434 stack.pop_back(); 211 stack.pop_back();
435 frame = &stack.back(); 212 frame = &stack.back();
436 } else { 213 } else {
437 - QTC::TC("qpdf", "QPDFParser bad array close"); 214 + QTC::TC("qpdf", "QPDFParser bad array close in parseRemainder");
438 warn("treating unexpected array close token as null"); 215 warn("treating unexpected array close token as null");
439 if (tooManyBadTokens()) { 216 if (tooManyBadTokens()) {
440 return {QPDF_Null::create()}; 217 return {QPDF_Null::create()};
@@ -519,7 +296,7 @@ QPDFParser::parseRemainder(bool content_stream) @@ -519,7 +296,7 @@ QPDFParser::parseRemainder(bool content_stream)
519 stack.pop_back(); 296 stack.pop_back();
520 frame = &stack.back(); 297 frame = &stack.back();
521 } else { 298 } else {
522 - QTC::TC("qpdf", "QPDFParser bad dictionary close"); 299 + QTC::TC("qpdf", "QPDFParser bad dictionary close in parseRemainder");
523 warn("unexpected dictionary close token"); 300 warn("unexpected dictionary close token");
524 if (tooManyBadTokens()) { 301 if (tooManyBadTokens()) {
525 return {QPDF_Null::create()}; 302 return {QPDF_Null::create()};
@@ -582,7 +359,7 @@ QPDFParser::parseRemainder(bool content_stream) @@ -582,7 +359,7 @@ QPDFParser::parseRemainder(bool content_stream)
582 if (content_stream) { 359 if (content_stream) {
583 object = QPDF_Operator::create(value); 360 object = QPDF_Operator::create(value);
584 } else if ( 361 } else if (
585 - value == "R" && frame->state != st_top && size >= 2 && frame->olist.back() && 362 + value == "R" && size >= 2 && frame->olist.back() &&
586 frame->olist.back()->getTypeCode() == ::ot_integer && 363 frame->olist.back()->getTypeCode() == ::ot_integer &&
587 !frame->olist.back()->getObjGen().isIndirect() && frame->olist.at(size - 2) && 364 !frame->olist.back()->getObjGen().isIndirect() && frame->olist.at(size - 2) &&
588 frame->olist.at(size - 2)->getTypeCode() == ::ot_integer && 365 frame->olist.at(size - 2)->getTypeCode() == ::ot_integer &&
@@ -607,14 +384,8 @@ QPDFParser::parseRemainder(bool content_stream) @@ -607,14 +384,8 @@ QPDFParser::parseRemainder(bool content_stream)
607 } 384 }
608 frame->olist.pop_back(); 385 frame->olist.pop_back();
609 frame->olist.pop_back(); 386 frame->olist.pop_back();
610 - } else if ((value == "endobj") && (frame->state == st_top)) {  
611 - // We just saw endobj without having read anything. Treat this as a null and do  
612 - // not move the input source's offset.  
613 - is_null = true;  
614 - input->seek(input->getLastOffset(), SEEK_SET);  
615 -// empty = true;  
616 } else { 387 } else {
617 - QTC::TC("qpdf", "QPDFParser treat word as string"); 388 + QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder");
618 warn("unknown token while reading object; treating as string"); 389 warn("unknown token while reading object; treating as string");
619 if (tooManyBadTokens()) { 390 if (tooManyBadTokens()) {
620 return {QPDF_Null::create()}; 391 return {QPDF_Null::create()};
qpdf/qpdf.testcov
@@ -57,11 +57,14 @@ QPDF trailer lacks size 0 @@ -57,11 +57,14 @@ QPDF trailer lacks size 0
57 QPDF trailer size not integer 0 57 QPDF trailer size not integer 0
58 QPDF trailer prev not integer 0 58 QPDF trailer prev not integer 0
59 QPDFParser bad brace 0 59 QPDFParser bad brace 0
  60 +QPDFParser bad brace in parseRemainder 0
60 QPDFParser bad array close 0 61 QPDFParser bad array close 0
  62 +QPDFParser bad array close in parseRemainder 0
61 QPDF stream without length 0 63 QPDF stream without length 0
62 QPDF stream length not integer 0 64 QPDF stream length not integer 0
63 QPDF missing endstream 0 65 QPDF missing endstream 0
64 QPDFParser bad dictionary close 0 66 QPDFParser bad dictionary close 0
  67 +QPDFParser bad dictionary close in parseRemainder 0
65 QPDF can't find xref 0 68 QPDF can't find xref 0
66 QPDFTokenizer bad ) 0 69 QPDFTokenizer bad ) 0
67 QPDFTokenizer bad > 0 70 QPDFTokenizer bad > 0
@@ -258,6 +261,7 @@ QPDFParser indirect with 0 objid 0 @@ -258,6 +261,7 @@ QPDFParser indirect with 0 objid 0
258 QPDF object id 0 0 261 QPDF object id 0 0
259 QPDF recursion loop in resolve 0 262 QPDF recursion loop in resolve 0
260 QPDFParser treat word as string 0 263 QPDFParser treat word as string 0
  264 +QPDFParser treat word as string in parseRemainder 0
261 QPDFParser found fake 1 265 QPDFParser found fake 1
262 QPDFParser no val for last key 0 266 QPDFParser no val for last key 0
263 QPDF resolve failure to null 0 267 QPDF resolve failure to null 0
@@ -289,7 +293,9 @@ QPDFObjectHandle coalesce called on stream 0 @@ -289,7 +293,9 @@ QPDFObjectHandle coalesce called on stream 0
289 QPDFObjectHandle coalesce provide stream data 0 293 QPDFObjectHandle coalesce provide stream data 0
290 QPDF_Stream bad token at end during normalize 0 294 QPDF_Stream bad token at end during normalize 0
291 QPDFParser bad token in parse 0 295 QPDFParser bad token in parse 0
  296 +QPDFParser bad token in parseRemainder 0
292 QPDFParser eof in parse 0 297 QPDFParser eof in parse 0
  298 +QPDFParser eof in parseRemainder 0
293 QPDFObjectHandle array bounds 0 299 QPDFObjectHandle array bounds 0
294 QPDFObjectHandle boolean returning false 0 300 QPDFObjectHandle boolean returning false 0
295 QPDFObjectHandle integer returning 0 0 301 QPDFObjectHandle integer returning 0 0
qpdf/qtest/parsing.test
@@ -17,7 +17,7 @@ my $td = new TestDriver(&#39;parsing&#39;); @@ -17,7 +17,7 @@ my $td = new TestDriver(&#39;parsing&#39;);
17 my $n_tests = 17; 17 my $n_tests = 17;
18 18
19 $td->runtest("parse objects from string", 19 $td->runtest("parse objects from string",
20 - {$td->COMMAND => "test_driver 31 good1.qdf"}, 20 + {$td->COMMAND => "test_driver 31 bad39.qdf"},
21 {$td->FILE => "parse-object.out", $td->EXIT_STATUS => 0}, 21 {$td->FILE => "parse-object.out", $td->EXIT_STATUS => 0},
22 $td->NORMALIZE_NEWLINES); 22 $td->NORMALIZE_NEWLINES);
23 $td->runtest("EOF terminating literal tokens", 23 $td->runtest("EOF terminating literal tokens",
qpdf/qtest/qpdf/bad39.qdf 0 โ†’ 100644
  1 +%PDF-1.3
  2 +%ยฟรทยขรพ
  3 +%QDF-1.0
  4 +
  5 +%% Original object ID: 1 0
  6 +1 0 obj
  7 +<<
  8 + /Pages 2 0 R
  9 + /Type /Catalog
  10 +>>
  11 +endobj
  12 +
  13 +%% Original object ID: 2 0
  14 +2 0 obj
  15 +<<
  16 + /Count 1
  17 + /Kids [
  18 + 3 0 R
  19 + ]
  20 + /Type /Pages
  21 +>>
  22 +endobj
  23 +
  24 +%% Page 1
  25 +%% Original object ID: 3 0
  26 +3 0 obj
  27 +<<
  28 + /Contents 4 0 R
  29 + /MediaBox [
  30 + 0
  31 + 0
  32 + 612
  33 + 792
  34 + ]
  35 + /Parent 2 0 R
  36 + /Resources <<
  37 + /Font <<
  38 + /F1 6 0 R
  39 + >>
  40 + /ProcSet 7 0 R
  41 + >>
  42 + /Type /Page
  43 +>>
  44 +endobj
  45 +
  46 +%% Contents for page 1
  47 +%% Original object ID: 4 0
  48 +4 0 obj
  49 +<<
  50 + /Length 5 0 R
  51 +>>
  52 +stream
  53 +BT
  54 + /F1 24 Tf
  55 + 72 720 Td
  56 + (Potato) Tj
  57 +ET
  58 +endstream
  59 +endobj
  60 +
  61 +5 0 obj
  62 +44
  63 +endobj
  64 +
  65 +%% Original object ID: 6 0
  66 +6 0 obj
  67 +<<
  68 + /BaseFont /Helvetica
  69 + /Encoding /WinAnsiEncoding
  70 + /Name /F1
  71 + /Subtype /Type1
  72 + /Type /Font
  73 +>>
  74 +endobj
  75 +
  76 +%% Original object ID: 5 0
  77 +7 0 obj
  78 +[
  79 + /PDF
  80 + /Text
  81 +]
  82 +endobj
  83 +
  84 +xref
  85 +0 8
  86 +0000000000 65535 f
  87 +0000000052 00000 n
  88 +0000000133 00000 n
  89 +0000000242 00000 n
  90 +0000000484 00000 n
  91 +0000000583 00000 n
  92 +0000000629 00000 n
  93 +0000001113 00000 n
  94 +trailer <<
  95 + /Root 1 0 R
  96 + /Size 8
  97 + /ID [<31415926535897932384626433832795><31415926535897932384626433832795>]
  98 +>>
  99 +startxref
  100 +809
  101 +%%EOF
  102 +7 0 obj
qpdf/qtest/qpdf/parse-object.out
@@ -2,4 +2,10 @@ @@ -2,4 +2,10 @@
2 logic error parsing indirect: QPDFObjectHandle::parse called without context on an object with indirect references 2 logic error parsing indirect: QPDFObjectHandle::parse called without context on an object with indirect references
3 trailing data: parsed object (trailing test): trailing data found parsing object from string 3 trailing data: parsed object (trailing test): trailing data found parsing object from string
4 WARNING: parsed object (offset 9): unknown token while reading object; treating as string 4 WARNING: parsed object (offset 9): unknown token while reading object; treating as string
  5 +WARNING: parsed object: treating unexpected brace token as null
  6 +WARNING: parsed object: treating unexpected brace token as null
  7 +WARNING: parsed object: unexpected dictionary close token
  8 +WARNING: bad39.qdf (object 7 0, offset 1121): unexpected EOF
  9 +WARNING: bad39.qdf (object 7 0, offset 1121): expected endobj
  10 +WARNING: bad39.qdf (object 7 0, offset 1121): EOF after endobj
5 test 31 done 11 test 31 done
qpdf/test_driver.cc
@@ -1195,6 +1195,13 @@ test_31(QPDF&amp; pdf, char const* arg2) @@ -1195,6 +1195,13 @@ test_31(QPDF&amp; pdf, char const* arg2)
1195 // mistakenly parsed as an indirect object. 1195 // mistakenly parsed as an indirect object.
1196 assert(QPDFObjectHandle::parse(&pdf, "[5 0 R 0 R /X]").unparse() == "[ 5 0 R 0 (R) /X ]"); 1196 assert(QPDFObjectHandle::parse(&pdf, "[5 0 R 0 R /X]").unparse() == "[ 5 0 R 0 (R) /X ]");
1197 assert(QPDFObjectHandle::parse(&pdf, "[1 0 R]", "indirect test").unparse() == "[ 1 0 R ]"); 1197 assert(QPDFObjectHandle::parse(&pdf, "[1 0 R]", "indirect test").unparse() == "[ 1 0 R ]");
  1198 + // TC:QPDFParser bad brace
  1199 + assert(QPDFObjectHandle::parse(&pdf, "}").unparse() == "null");
  1200 + assert(QPDFObjectHandle::parse(&pdf, "{").unparse() == "null");
  1201 + // TC:QPDFParser bad dictionary close
  1202 + assert(QPDFObjectHandle::parse(&pdf, ">>").unparse() == "null");
  1203 + // TC:QPDFParser eof in parse
  1204 + assert(QPDFObjectHandle::parse(&pdf, "[7 0 R]").getArrayItem(0).isNull());
1198 } 1205 }
1199 1206
1200 static void 1207 static void