Commit b8fd18ae562ab8bae1d2e67c1ab63ff4ea62124b

Authored by Jay Berkenbilt
2 parents 64c840b1 1285f976

Merge branch 'parse_ref' into work

libqpdf/QPDFParser.cc
@@ -21,22 +21,7 @@ @@ -21,22 +21,7 @@
21 21
22 #include <memory> 22 #include <memory>
23 23
24 -namespace  
25 -{  
26 - struct StackFrame  
27 - {  
28 - StackFrame(std::shared_ptr<InputSource> input) :  
29 - offset(input->tell())  
30 - {  
31 - }  
32 -  
33 - std::vector<std::shared_ptr<QPDFObject>> olist;  
34 - qpdf_offset_t offset;  
35 - std::string contents_string{""};  
36 - qpdf_offset_t contents_offset{-1};  
37 - int null_count{0};  
38 - };  
39 -} // namespace 24 +using ObjectPtr = std::shared_ptr<QPDFObject>;
40 25
41 QPDFObjectHandle 26 QPDFObjectHandle
42 QPDFParser::parse(bool& empty, bool content_stream) 27 QPDFParser::parse(bool& empty, bool content_stream)
@@ -46,371 +31,457 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -46,371 +31,457 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
46 // effect of reading the object and changing the file pointer. If you do this, it will cause a 31 // effect of reading the object and changing the file pointer. If you do this, it will cause a
47 // logic error to be thrown from QPDF::inParse(). 32 // logic error to be thrown from QPDF::inParse().
48 33
49 - const static std::shared_ptr<QPDFObject> null_oh = QPDF_Null::create();  
50 QPDF::ParseGuard pg(context); 34 QPDF::ParseGuard pg(context);
51 -  
52 empty = false; 35 empty = false;
  36 + start = input->tell();
53 37
54 - std::shared_ptr<QPDFObject> object;  
55 - bool set_offset = false;  
56 -  
57 - std::vector<StackFrame> stack;  
58 - stack.emplace_back(input);  
59 - std::vector<parser_state_e> state_stack;  
60 - state_stack.push_back(st_top);  
61 - qpdf_offset_t offset;  
62 - bool done = false;  
63 - int bad_count = 0;  
64 - int good_count = 0;  
65 - bool b_contents = false;  
66 - bool is_null = false; 38 + if (!tokenizer.nextToken(*input, object_description)) {
  39 + warn(tokenizer.getErrorMessage());
  40 + }
  41 +
  42 + switch (tokenizer.getType()) {
  43 + case QPDFTokenizer::tt_eof:
  44 + if (content_stream) {
  45 + // In content stream mode, leave object uninitialized to indicate EOF
  46 + return {};
  47 + }
  48 + QTC::TC("qpdf", "QPDFParser eof in parse");
  49 + warn("unexpected EOF");
  50 + return {QPDF_Null::create()};
  51 +
  52 + case QPDFTokenizer::tt_bad:
  53 + QTC::TC("qpdf", "QPDFParser bad token in parse");
  54 + return {QPDF_Null::create()};
  55 +
  56 + case QPDFTokenizer::tt_brace_open:
  57 + case QPDFTokenizer::tt_brace_close:
  58 + QTC::TC("qpdf", "QPDFParser bad brace");
  59 + warn("treating unexpected brace token as null");
  60 + return {QPDF_Null::create()};
  61 +
  62 + case QPDFTokenizer::tt_array_close:
  63 + QTC::TC("qpdf", "QPDFParser bad array close");
  64 + warn("treating unexpected array close token as null");
  65 + return {QPDF_Null::create()};
  66 +
  67 + case QPDFTokenizer::tt_dict_close:
  68 + QTC::TC("qpdf", "QPDFParser bad dictionary close");
  69 + warn("unexpected dictionary close token");
  70 + return {QPDF_Null::create()};
  71 +
  72 + case QPDFTokenizer::tt_array_open:
  73 + case QPDFTokenizer::tt_dict_open:
  74 + stack.clear();
  75 + stack.emplace_back(
  76 + input,
  77 + (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key);
  78 + frame = &stack.back();
  79 + return parseRemainder(content_stream);
  80 +
  81 + case QPDFTokenizer::tt_bool:
  82 + return withDescription<QPDF_Bool>(tokenizer.getValue() == "true");
  83 +
  84 + case QPDFTokenizer::tt_null:
  85 + return {QPDF_Null::create()};
  86 +
  87 + case QPDFTokenizer::tt_integer:
  88 + return withDescription<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str()));
  89 +
  90 + case QPDFTokenizer::tt_real:
  91 + return withDescription<QPDF_Real>(tokenizer.getValue());
  92 +
  93 + case QPDFTokenizer::tt_name:
  94 + return withDescription<QPDF_Name>(tokenizer.getValue());
  95 +
  96 + case QPDFTokenizer::tt_word:
  97 + {
  98 + auto const& value = tokenizer.getValue();
  99 + if (content_stream) {
  100 + return withDescription<QPDF_Operator>(value);
  101 + } else if (value == "endobj") {
  102 + // We just saw endobj without having read anything. Treat this as a null and do
  103 + // not move the input source's offset.
  104 + input->seek(input->getLastOffset(), SEEK_SET);
  105 + empty = true;
  106 + return {QPDF_Null::create()};
  107 + } else {
  108 + QTC::TC("qpdf", "QPDFParser treat word as string");
  109 + warn("unknown token while reading object; treating as string");
  110 + return withDescription<QPDF_String>(value);
  111 + }
  112 + }
  113 +
  114 + case QPDFTokenizer::tt_string:
  115 + if (decrypter) {
  116 + std::string s{tokenizer.getValue()};
  117 + decrypter->decryptString(s);
  118 + return withDescription<QPDF_String>(s);
  119 + } else {
  120 + return withDescription<QPDF_String>(tokenizer.getValue());
  121 + }
  122 +
  123 + default:
  124 + warn("treating unknown token type as null while reading object");
  125 + return {QPDF_Null::create()};
  126 + }
  127 +}
67 128
68 - while (!done) {  
69 - bool bad = false;  
70 - bool indirect_ref = false;  
71 - is_null = false;  
72 - auto& frame = stack.back();  
73 - auto& olist = frame.olist;  
74 - parser_state_e state = state_stack.back();  
75 - offset = frame.offset; 129 +QPDFObjectHandle
  130 +QPDFParser::parseRemainder(bool content_stream)
  131 +{
  132 + // This method must take care not to resolve any objects. Don't check the type of any object
  133 + // without first ensuring that it is a direct object. Otherwise, doing so may have the side
  134 + // effect of reading the object and changing the file pointer. If you do this, it will cause a
  135 + // logic error to be thrown from QPDF::inParse().
76 136
77 - object = nullptr;  
78 - set_offset = false; 137 + bad_count = 0;
  138 + bool b_contents = false;
79 139
  140 + while (true) {
80 if (!tokenizer.nextToken(*input, object_description)) { 141 if (!tokenizer.nextToken(*input, object_description)) {
81 warn(tokenizer.getErrorMessage()); 142 warn(tokenizer.getErrorMessage());
82 } 143 }
  144 + ++good_count; // optimistically
  145 +
  146 + if (int_count != 0) {
  147 + // Special handling of indirect references. Treat integer tokens as part of an indirect
  148 + // reference until proven otherwise.
  149 + if (tokenizer.getType() == QPDFTokenizer::tt_integer) {
  150 + if (++int_count > 2) {
  151 + // Process the oldest buffered integer.
  152 + addInt(int_count);
  153 + }
  154 + last_offset_buffer[int_count % 2] = input->getLastOffset();
  155 + int_buffer[int_count % 2] = QUtil::string_to_ll(tokenizer.getValue().c_str());
  156 + continue;
  157 +
  158 + } else if (
  159 + int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word &&
  160 + tokenizer.getValue() == "R") {
  161 + if (context == nullptr) {
  162 + QTC::TC("qpdf", "QPDFParser indirect without context");
  163 + throw std::logic_error("QPDFParser::parse called without context on an object "
  164 + "with indirect references");
  165 + }
  166 + auto ref_og = QPDFObjGen(
  167 + QIntC::to_int(int_buffer[(int_count - 1) % 2]),
  168 + QIntC::to_int(int_buffer[(int_count) % 2]));
  169 + if (ref_og.isIndirect()) {
  170 + // This action has the desirable side effect of causing dangling references
  171 + // (references to indirect objects that don't appear in the PDF) in any parsed
  172 + // object to appear in the object cache.
  173 + add(std::move(context->getObject(ref_og).obj));
  174 + } else {
  175 + QTC::TC("qpdf", "QPDFParser indirect with 0 objid");
  176 + addNull();
  177 + }
  178 + int_count = 0;
  179 + continue;
  180 +
  181 + } else if (int_count > 0) {
  182 + // Process the buffered integers before processing the current token.
  183 + if (int_count > 1) {
  184 + addInt(int_count - 1);
  185 + }
  186 + addInt(int_count);
  187 + int_count = 0;
  188 + }
  189 + }
83 190
84 switch (tokenizer.getType()) { 191 switch (tokenizer.getType()) {
85 case QPDFTokenizer::tt_eof: 192 case QPDFTokenizer::tt_eof:
86 - if (!content_stream) {  
87 - QTC::TC("qpdf", "QPDFParser eof in parse");  
88 - warn("unexpected EOF"); 193 + warn("parse error while reading object");
  194 + if (content_stream) {
  195 + // In content stream mode, leave object uninitialized to indicate EOF
  196 + return {};
89 } 197 }
90 - bad = true;  
91 - state = st_eof;  
92 - break; 198 + QTC::TC("qpdf", "QPDFParser eof in parseRemainder");
  199 + warn("unexpected EOF");
  200 + return {QPDF_Null::create()};
93 201
94 case QPDFTokenizer::tt_bad: 202 case QPDFTokenizer::tt_bad:
95 - QTC::TC("qpdf", "QPDFParser bad token in parse");  
96 - bad = true;  
97 - is_null = true;  
98 - break; 203 + QTC::TC("qpdf", "QPDFParser bad token in parseRemainder");
  204 + if (tooManyBadTokens()) {
  205 + return {QPDF_Null::create()};
  206 + }
  207 + addNull();
  208 + continue;
99 209
100 case QPDFTokenizer::tt_brace_open: 210 case QPDFTokenizer::tt_brace_open:
101 case QPDFTokenizer::tt_brace_close: 211 case QPDFTokenizer::tt_brace_close:
102 - QTC::TC("qpdf", "QPDFParser bad brace"); 212 + QTC::TC("qpdf", "QPDFParser bad brace in parseRemainder");
103 warn("treating unexpected brace token as null"); 213 warn("treating unexpected brace token as null");
104 - bad = true;  
105 - is_null = true;  
106 - break; 214 + if (tooManyBadTokens()) {
  215 + return {QPDF_Null::create()};
  216 + }
  217 + addNull();
  218 + continue;
107 219
108 case QPDFTokenizer::tt_array_close: 220 case QPDFTokenizer::tt_array_close:
109 - if (state == st_array) {  
110 - state = st_stop; 221 + if (frame->state == st_array) {
  222 + auto object = QPDF_Array::create(std::move(frame->olist), frame->null_count > 100);
  223 + setDescription(object, frame->offset - 1);
  224 + // The `offset` points to the next of "[". Set the rewind offset to point to the
  225 + // beginning of "[". This has been explicitly tested with whitespace surrounding the
  226 + // array start delimiter. getLastOffset points to the array end token and therefore
  227 + // can't be used here.
  228 + if (stack.size() <= 1) {
  229 + return object;
  230 + }
  231 + stack.pop_back();
  232 + frame = &stack.back();
  233 + add(std::move(object));
111 } else { 234 } else {
112 - QTC::TC("qpdf", "QPDFParser bad array close"); 235 + QTC::TC("qpdf", "QPDFParser bad array close in parseRemainder");
113 warn("treating unexpected array close token as null"); 236 warn("treating unexpected array close token as null");
114 - bad = true;  
115 - is_null = true; 237 + if (tooManyBadTokens()) {
  238 + return {QPDF_Null::create()};
  239 + }
  240 + addNull();
116 } 241 }
117 - break; 242 + continue;
118 243
119 case QPDFTokenizer::tt_dict_close: 244 case QPDFTokenizer::tt_dict_close:
120 - if (state == st_dictionary) {  
121 - state = st_stop; 245 + if (frame->state <= st_dictionary_value) {
  246 + // Attempt to recover more or less gracefully from invalid dictionaries.
  247 + auto& dict = frame->dict;
  248 +
  249 + if (frame->state == st_dictionary_value) {
  250 + QTC::TC("qpdf", "QPDFParser no val for last key");
  251 + warn(
  252 + frame->offset,
  253 + "dictionary ended prematurely; using null as value for last key");
  254 + dict[frame->key] = QPDF_Null::create();
  255 + }
  256 +
  257 + if (!frame->olist.empty())
  258 + fixMissingKeys();
  259 +
  260 + if (!frame->contents_string.empty() && dict.count("/Type") &&
  261 + dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") &&
  262 + dict.count("/Contents") && dict["/Contents"].isString()) {
  263 + dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string);
  264 + dict["/Contents"].setParsedOffset(frame->contents_offset);
  265 + }
  266 + auto object = QPDF_Dictionary::create(std::move(dict));
  267 + setDescription(object, frame->offset - 2);
  268 + // The `offset` points to the next of "<<". Set the rewind offset to point to the
  269 + // beginning of "<<". This has been explicitly tested with whitespace surrounding
  270 + // the dictionary start delimiter. getLastOffset points to the dictionary end token
  271 + // and therefore can't be used here.
  272 + if (stack.size() <= 1) {
  273 + return object;
  274 + }
  275 + stack.pop_back();
  276 + frame = &stack.back();
  277 + add(std::move(object));
122 } else { 278 } else {
123 - QTC::TC("qpdf", "QPDFParser bad dictionary close"); 279 + QTC::TC("qpdf", "QPDFParser bad dictionary close in parseRemainder");
124 warn("unexpected dictionary close token"); 280 warn("unexpected dictionary close token");
125 - bad = true;  
126 - is_null = true; 281 + if (tooManyBadTokens()) {
  282 + return {QPDF_Null::create()};
  283 + }
  284 + addNull();
127 } 285 }
128 - break; 286 + continue;
129 287
130 case QPDFTokenizer::tt_array_open: 288 case QPDFTokenizer::tt_array_open:
131 case QPDFTokenizer::tt_dict_open: 289 case QPDFTokenizer::tt_dict_open:
132 - if (stack.size() > 500) { 290 + if (stack.size() > 499) {
133 QTC::TC("qpdf", "QPDFParser too deep"); 291 QTC::TC("qpdf", "QPDFParser too deep");
134 warn("ignoring excessively deeply nested data structure"); 292 warn("ignoring excessively deeply nested data structure");
135 - bad = true;  
136 - is_null = true;  
137 - state = st_top; 293 + return {QPDF_Null::create()};
138 } else { 294 } else {
139 - state = st_start;  
140 - state_stack.push_back(  
141 - (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array  
142 - : st_dictionary);  
143 b_contents = false; 295 b_contents = false;
144 - stack.emplace_back(input); 296 + stack.emplace_back(
  297 + input,
  298 + (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array
  299 + : st_dictionary_key);
  300 + frame = &stack.back();
  301 + continue;
145 } 302 }
146 - break;  
147 303
148 case QPDFTokenizer::tt_bool: 304 case QPDFTokenizer::tt_bool:
149 - object = QPDF_Bool::create((tokenizer.getValue() == "true"));  
150 - break; 305 + addScalar<QPDF_Bool>(tokenizer.getValue() == "true");
  306 + continue;
151 307
152 case QPDFTokenizer::tt_null: 308 case QPDFTokenizer::tt_null:
153 - is_null = true;  
154 - ++frame.null_count;  
155 -  
156 - break; 309 + addNull();
  310 + continue;
157 311
158 case QPDFTokenizer::tt_integer: 312 case QPDFTokenizer::tt_integer:
159 - object = QPDF_Integer::create(  
160 - QUtil::string_to_ll(std::string(tokenizer.getValue()).c_str()));  
161 - break; 313 + if (!content_stream) {
  314 + // Buffer token in case it is part of an indirect reference.
  315 + last_offset_buffer[1] = input->getLastOffset();
  316 + int_buffer[1] = QUtil::string_to_ll(tokenizer.getValue().c_str());
  317 + int_count = 1;
  318 + } else {
  319 + addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str()));
  320 + }
  321 + continue;
162 322
163 case QPDFTokenizer::tt_real: 323 case QPDFTokenizer::tt_real:
164 - object = QPDF_Real::create(tokenizer.getValue());  
165 - break; 324 + addScalar<QPDF_Real>(tokenizer.getValue());
  325 + continue;
166 326
167 case QPDFTokenizer::tt_name: 327 case QPDFTokenizer::tt_name:
168 - {  
169 - auto name = tokenizer.getValue();  
170 - object = QPDF_Name::create(name);  
171 -  
172 - if (name == "/Contents") {  
173 - b_contents = true;  
174 - } else {  
175 - b_contents = false;  
176 - } 328 + if (frame->state == st_dictionary_key) {
  329 + frame->key = tokenizer.getValue();
  330 + frame->state = st_dictionary_value;
  331 + b_contents = decrypter && frame->key == "/Contents";
  332 + continue;
  333 + } else {
  334 + addScalar<QPDF_Name>(tokenizer.getValue());
177 } 335 }
178 - break; 336 + continue;
179 337
180 case QPDFTokenizer::tt_word: 338 case QPDFTokenizer::tt_word:
181 - {  
182 - auto value = tokenizer.getValue();  
183 - auto size = olist.size();  
184 - if (content_stream) {  
185 - object = QPDF_Operator::create(value);  
186 - } else if (  
187 - value == "R" && state != st_top && size >= 2 && olist.back() &&  
188 - olist.back()->getTypeCode() == ::ot_integer &&  
189 - !olist.back()->getObjGen().isIndirect() && olist.at(size - 2) &&  
190 - olist.at(size - 2)->getTypeCode() == ::ot_integer &&  
191 - !olist.at(size - 2)->getObjGen().isIndirect()) {  
192 - if (context == nullptr) {  
193 - QTC::TC("qpdf", "QPDFParser indirect without context");  
194 - throw std::logic_error("QPDFObjectHandle::parse called without context on "  
195 - "an object with indirect references");  
196 - }  
197 - auto ref_og = QPDFObjGen(  
198 - QPDFObjectHandle(olist.at(size - 2)).getIntValueAsInt(),  
199 - QPDFObjectHandle(olist.back()).getIntValueAsInt());  
200 - if (ref_og.isIndirect()) {  
201 - // This action has the desirable side effect of causing dangling references  
202 - // (references to indirect objects that don't appear in the PDF) in any  
203 - // parsed object to appear in the object cache.  
204 - object = context->getObject(ref_og).obj;  
205 - indirect_ref = true;  
206 - } else {  
207 - QTC::TC("qpdf", "QPDFParser indirect with 0 objid");  
208 - is_null = true;  
209 - }  
210 - olist.pop_back();  
211 - olist.pop_back();  
212 - } else if ((value == "endobj") && (state == st_top)) {  
213 - // We just saw endobj without having read anything. Treat this as a null and do  
214 - // not move the input source's offset.  
215 - is_null = true;  
216 - input->seek(input->getLastOffset(), SEEK_SET);  
217 - empty = true;  
218 - } else {  
219 - QTC::TC("qpdf", "QPDFParser treat word as string");  
220 - warn("unknown token while reading object; treating as string");  
221 - bad = true;  
222 - object = QPDF_String::create(value); 339 + if (content_stream) {
  340 + addScalar<QPDF_Operator>(tokenizer.getValue());
  341 + } else {
  342 + QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder");
  343 + warn("unknown token while reading object; treating as string");
  344 + if (tooManyBadTokens()) {
  345 + return {QPDF_Null::create()};
223 } 346 }
  347 + addScalar<QPDF_String>(tokenizer.getValue());
224 } 348 }
225 - break; 349 + continue;
226 350
227 case QPDFTokenizer::tt_string: 351 case QPDFTokenizer::tt_string:
228 { 352 {
229 - auto val = tokenizer.getValue(); 353 + auto const& val = tokenizer.getValue();
230 if (decrypter) { 354 if (decrypter) {
231 if (b_contents) { 355 if (b_contents) {
232 - frame.contents_string = val;  
233 - frame.contents_offset = input->getLastOffset(); 356 + frame->contents_string = val;
  357 + frame->contents_offset = input->getLastOffset();
234 b_contents = false; 358 b_contents = false;
235 } 359 }
236 std::string s{val}; 360 std::string s{val};
237 decrypter->decryptString(s); 361 decrypter->decryptString(s);
238 - object = QPDF_String::create(s); 362 + addScalar<QPDF_String>(s);
239 } else { 363 } else {
240 - object = QPDF_String::create(val); 364 + addScalar<QPDF_String>(val);
241 } 365 }
242 } 366 }
243 -  
244 - break; 367 + continue;
245 368
246 default: 369 default:
247 warn("treating unknown token type as null while reading object"); 370 warn("treating unknown token type as null while reading object");
248 - bad = true;  
249 - is_null = true;  
250 - break;  
251 - }  
252 -  
253 - if (object == nullptr && !is_null &&  
254 - (!((state == st_start) || (state == st_stop) || (state == st_eof)))) {  
255 - throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object");  
256 - is_null = true;  
257 - }  
258 -  
259 - if (bad) {  
260 - ++bad_count;  
261 - good_count = 0;  
262 - } else {  
263 - ++good_count;  
264 - if (good_count > 3) {  
265 - bad_count = 0; 371 + if (tooManyBadTokens()) {
  372 + return {QPDF_Null::create()};
266 } 373 }
  374 + addNull();
267 } 375 }
268 - if (bad_count > 5) {  
269 - // We had too many consecutive errors without enough intervening successful objects.  
270 - // Give up.  
271 - warn("too many errors; giving up on reading object");  
272 - state = st_top;  
273 - is_null = true;  
274 - } 376 + }
  377 +}
275 378
276 - switch (state) {  
277 - case st_eof:  
278 - if (state_stack.size() > 1) {  
279 - warn("parse error while reading object");  
280 - }  
281 - done = true;  
282 - // In content stream mode, leave object uninitialized to indicate EOF  
283 - if (!content_stream) {  
284 - is_null = true;  
285 - }  
286 - break;  
287 -  
288 - case st_dictionary:  
289 - case st_array:  
290 - if (is_null) {  
291 - object = null_oh;  
292 - // No need to set description for direct nulls - they probably will become implicit.  
293 - } else if (!indirect_ref) {  
294 - setDescription(object, input->getLastOffset());  
295 - }  
296 - set_offset = true;  
297 - olist.push_back(object);  
298 - break; 379 +void
  380 +QPDFParser::add(std::shared_ptr<QPDFObject>&& obj)
  381 +{
  382 + if (frame->state != st_dictionary_value) {
  383 + // If state is st_dictionary_key then there is a missing key. Push onto olist for
  384 + // processing once the tt_dict_close token has been found.
  385 + frame->olist.emplace_back(std::move(obj));
  386 + } else {
  387 + if (auto res = frame->dict.insert_or_assign(frame->key, std::move(obj)); !res.second) {
  388 + warnDuplicateKey();
  389 + }
  390 + frame->state = st_dictionary_key;
  391 + }
  392 +}
299 393
300 - case st_top:  
301 - done = true;  
302 - break; 394 +void
  395 +QPDFParser::addNull()
  396 +{
  397 + const static ObjectPtr null_obj = QPDF_Null::create();
303 398
304 - case st_start:  
305 - break; 399 + if (frame->state != st_dictionary_value) {
  400 + // If state is st_dictionary_key then there is a missing key. Push onto olist for
  401 + // processing once the tt_dict_close token has been found.
  402 + frame->olist.emplace_back(null_obj);
  403 + } else {
  404 + if (auto res = frame->dict.insert_or_assign(frame->key, null_obj); !res.second) {
  405 + warnDuplicateKey();
  406 + }
  407 + frame->state = st_dictionary_key;
  408 + }
  409 + ++frame->null_count;
  410 +}
306 411
307 - case st_stop:  
308 - if ((state_stack.size() < 2) || (stack.size() < 2)) {  
309 - throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with "  
310 - "insufficient elements in stack");  
311 - }  
312 - parser_state_e old_state = state_stack.back();  
313 - state_stack.pop_back();  
314 - if (old_state == st_array) {  
315 - object = QPDF_Array::create(std::move(olist), frame.null_count > 100);  
316 - setDescription(object, offset - 1);  
317 - // The `offset` points to the next of "[". Set the rewind offset to point to the  
318 - // beginning of "[". This has been explicitly tested with whitespace surrounding the  
319 - // array start delimiter. getLastOffset points to the array end token and therefore  
320 - // can't be used here.  
321 - set_offset = true;  
322 - } else if (old_state == st_dictionary) {  
323 - // Convert list to map. Alternating elements are keys. Attempt to recover more or  
324 - // less gracefully from invalid dictionaries.  
325 - std::set<std::string> names;  
326 - for (auto& obj: olist) {  
327 - if (obj) {  
328 - if (obj->getTypeCode() == ::ot_name) {  
329 - names.insert(obj->getStringValue());  
330 - }  
331 - }  
332 - } 412 +void
  413 +QPDFParser::addInt(int count)
  414 +{
  415 + auto obj = QPDF_Integer::create(int_buffer[count % 2]);
  416 + obj->setDescription(context, description, last_offset_buffer[count % 2]);
  417 + add(std::move(obj));
  418 +}
333 419
334 - std::map<std::string, QPDFObjectHandle> dict;  
335 - int next_fake_key = 1;  
336 - for (auto iter = olist.begin(); iter != olist.end();) {  
337 - // Calculate key.  
338 - std::string key;  
339 - if (*iter && (*iter)->getTypeCode() == ::ot_name) {  
340 - key = (*iter)->getStringValue();  
341 - ++iter;  
342 - } else {  
343 - for (bool found_fake = false; !found_fake;) {  
344 - key = "/QPDFFake" + std::to_string(next_fake_key++);  
345 - found_fake = (names.count(key) == 0);  
346 - QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));  
347 - }  
348 - warn(  
349 - offset,  
350 - "expected dictionary key but found non-name object; inserting key " +  
351 - key);  
352 - }  
353 - if (dict.count(key) > 0) {  
354 - QTC::TC("qpdf", "QPDFParser duplicate dict key");  
355 - warn(  
356 - offset,  
357 - "dictionary has duplicated key " + key +  
358 - "; last occurrence overrides earlier ones");  
359 - } 420 +template <typename T, typename... Args>
  421 +void
  422 +QPDFParser::addScalar(Args&&... args)
  423 +{
  424 + auto obj = T::create(args...);
  425 + obj->setDescription(context, description, input->getLastOffset());
  426 + add(std::move(obj));
  427 +}
360 428
361 - // Calculate value.  
362 - std::shared_ptr<QPDFObject> val;  
363 - if (iter != olist.end()) {  
364 - val = *iter;  
365 - ++iter;  
366 - } else {  
367 - QTC::TC("qpdf", "QPDFParser no val for last key");  
368 - warn(  
369 - offset,  
370 - "dictionary ended prematurely; using null as value for last key");  
371 - val = QPDF_Null::create();  
372 - } 429 +template <typename T, typename... Args>
  430 +QPDFObjectHandle
  431 +QPDFParser::withDescription(Args&&... args)
  432 +{
  433 + auto obj = T::create(args...);
  434 + obj->setDescription(context, description, start);
  435 + return {obj};
  436 +}
373 437
374 - dict[std::move(key)] = std::move(val);  
375 - }  
376 - if (!frame.contents_string.empty() && dict.count("/Type") &&  
377 - dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") &&  
378 - dict.count("/Contents") && dict["/Contents"].isString()) {  
379 - dict["/Contents"] = QPDFObjectHandle::newString(frame.contents_string);  
380 - dict["/Contents"].setParsedOffset(frame.contents_offset);  
381 - }  
382 - object = QPDF_Dictionary::create(std::move(dict));  
383 - setDescription(object, offset - 2);  
384 - // The `offset` points to the next of "<<". Set the rewind offset to point to the  
385 - // beginning of "<<". This has been explicitly tested with whitespace surrounding  
386 - // the dictionary start delimiter. getLastOffset points to the dictionary end token  
387 - // and therefore can't be used here.  
388 - set_offset = true;  
389 - }  
390 - stack.pop_back();  
391 - if (state_stack.back() == st_top) {  
392 - done = true;  
393 - } else {  
394 - stack.back().olist.push_back(object);  
395 - }  
396 - } 438 +void
  439 +QPDFParser::setDescription(ObjectPtr& obj, qpdf_offset_t parsed_offset)
  440 +{
  441 + if (obj) {
  442 + obj->setDescription(context, description, parsed_offset);
397 } 443 }
  444 +}
398 445
399 - if (is_null) {  
400 - object = QPDF_Null::create(); 446 +void
  447 +QPDFParser::fixMissingKeys()
  448 +{
  449 + std::set<std::string> names;
  450 + for (auto& obj: frame->olist) {
  451 + if (obj->getTypeCode() == ::ot_name) {
  452 + names.insert(obj->getStringValue());
  453 + }
401 } 454 }
402 - if (!set_offset) {  
403 - setDescription(object, offset); 455 + int next_fake_key = 1;
  456 + for (auto const& item: frame->olist) {
  457 + while (true) {
  458 + const std::string key = "/QPDFFake" + std::to_string(next_fake_key++);
  459 + const bool found_fake = frame->dict.count(key) == 0 && names.count(key) == 0;
  460 + QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
  461 + if (found_fake) {
  462 + warn(
  463 + frame->offset,
  464 + "expected dictionary key but found non-name object; inserting key " + key);
  465 + frame->dict[key] = item;
  466 + break;
  467 + }
  468 + }
404 } 469 }
405 - return object;  
406 } 470 }
407 471
408 -void  
409 -QPDFParser::setDescription(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parsed_offset) 472 +bool
  473 +QPDFParser::tooManyBadTokens()
410 { 474 {
411 - if (obj) {  
412 - obj->setDescription(context, description, parsed_offset); 475 + if (good_count <= 4) {
  476 + if (++bad_count > 5) {
  477 + warn("too many errors; giving up on reading object");
  478 + return true;
  479 + }
  480 + } else {
  481 + bad_count = 1;
413 } 482 }
  483 + good_count = 0;
  484 + return false;
414 } 485 }
415 486
416 void 487 void
@@ -427,6 +498,15 @@ QPDFParser::warn(QPDFExc const&amp; e) const @@ -427,6 +498,15 @@ QPDFParser::warn(QPDFExc const&amp; e) const
427 } 498 }
428 499
429 void 500 void
  501 +QPDFParser::warnDuplicateKey()
  502 +{
  503 + QTC::TC("qpdf", "QPDFParser duplicate dict key");
  504 + warn(
  505 + frame->offset,
  506 + "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones");
  507 +}
  508 +
  509 +void
430 QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const 510 QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const
431 { 511 {
432 warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), object_description, offset, msg)); 512 warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), object_description, offset, msg));
libqpdf/qpdf/QPDFParser.hh
@@ -31,11 +31,44 @@ class QPDFParser @@ -31,11 +31,44 @@ class QPDFParser
31 QPDFObjectHandle parse(bool& empty, bool content_stream); 31 QPDFObjectHandle parse(bool& empty, bool content_stream);
32 32
33 private: 33 private:
34 - enum parser_state_e { st_top, st_start, st_stop, st_eof, st_dictionary, st_array }; 34 + // Parser state. Note:
  35 + // state < st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value)
  36 + enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array };
35 37
  38 + struct StackFrame
  39 + {
  40 + StackFrame(std::shared_ptr<InputSource> const& input, parser_state_e state) :
  41 + state(state),
  42 + offset(input->tell())
  43 + {
  44 + }
  45 +
  46 + std::vector<std::shared_ptr<QPDFObject>> olist;
  47 + std::map<std::string, QPDFObjectHandle> dict;
  48 + parser_state_e state;
  49 + std::string key;
  50 + qpdf_offset_t offset;
  51 + std::string contents_string;
  52 + qpdf_offset_t contents_offset{-1};
  53 + int null_count{0};
  54 + };
  55 +
  56 + QPDFObjectHandle parseRemainder(bool content_stream);
  57 + void add(std::shared_ptr<QPDFObject>&& obj);
  58 + void addNull();
  59 + void addInt(int count);
  60 + template <typename T, typename... Args>
  61 + void addScalar(Args&&... args);
  62 + bool tooManyBadTokens();
  63 + void warnDuplicateKey();
  64 + void fixMissingKeys();
36 void warn(qpdf_offset_t offset, std::string const& msg) const; 65 void warn(qpdf_offset_t offset, std::string const& msg) const;
37 void warn(std::string const& msg) const; 66 void warn(std::string const& msg) const;
38 void warn(QPDFExc const&) const; 67 void warn(QPDFExc const&) const;
  68 + template <typename T, typename... Args>
  69 + // Create a new scalar object complete with parsed offset and description.
  70 + // NB the offset includes any leading whitespace.
  71 + QPDFObjectHandle withDescription(Args&&... args);
39 void setDescription(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parsed_offset); 72 void setDescription(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parsed_offset);
40 std::shared_ptr<InputSource> input; 73 std::shared_ptr<InputSource> input;
41 std::string const& object_description; 74 std::string const& object_description;
@@ -43,6 +76,18 @@ class QPDFParser @@ -43,6 +76,18 @@ class QPDFParser
43 QPDFObjectHandle::StringDecrypter* decrypter; 76 QPDFObjectHandle::StringDecrypter* decrypter;
44 QPDF* context; 77 QPDF* context;
45 std::shared_ptr<QPDFValue::Description> description; 78 std::shared_ptr<QPDFValue::Description> description;
  79 + std::vector<StackFrame> stack;
  80 + StackFrame* frame;
  81 + // Number of recent bad tokens.
  82 + int bad_count = 0;
  83 + // Number of good tokens since last bad token. Irrelevant if bad_count == 0.
  84 + int good_count = 0;
  85 + // Start offset including any leading whitespace.
  86 + qpdf_offset_t start;
  87 + // Number of successive integer tokens.
  88 + int int_count = 0;
  89 + long long int_buffer[2]{0, 0};
  90 + qpdf_offset_t last_offset_buffer[2]{0, 0};
46 }; 91 };
47 92
48 #endif // QPDFPARSER_HH 93 #endif // QPDFPARSER_HH
qpdf/qpdf.testcov
@@ -57,11 +57,14 @@ QPDF trailer lacks size 0 @@ -57,11 +57,14 @@ QPDF trailer lacks size 0
57 QPDF trailer size not integer 0 57 QPDF trailer size not integer 0
58 QPDF trailer prev not integer 0 58 QPDF trailer prev not integer 0
59 QPDFParser bad brace 0 59 QPDFParser bad brace 0
  60 +QPDFParser bad brace in parseRemainder 0
60 QPDFParser bad array close 0 61 QPDFParser bad array close 0
  62 +QPDFParser bad array close in parseRemainder 0
61 QPDF stream without length 0 63 QPDF stream without length 0
62 QPDF stream length not integer 0 64 QPDF stream length not integer 0
63 QPDF missing endstream 0 65 QPDF missing endstream 0
64 QPDFParser bad dictionary close 0 66 QPDFParser bad dictionary close 0
  67 +QPDFParser bad dictionary close in parseRemainder 0
65 QPDF can't find xref 0 68 QPDF can't find xref 0
66 QPDFTokenizer bad ) 0 69 QPDFTokenizer bad ) 0
67 QPDFTokenizer bad > 0 70 QPDFTokenizer bad > 0
@@ -258,6 +261,7 @@ QPDFParser indirect with 0 objid 0 @@ -258,6 +261,7 @@ QPDFParser indirect with 0 objid 0
258 QPDF object id 0 0 261 QPDF object id 0 0
259 QPDF recursion loop in resolve 0 262 QPDF recursion loop in resolve 0
260 QPDFParser treat word as string 0 263 QPDFParser treat word as string 0
  264 +QPDFParser treat word as string in parseRemainder 0
261 QPDFParser found fake 1 265 QPDFParser found fake 1
262 QPDFParser no val for last key 0 266 QPDFParser no val for last key 0
263 QPDF resolve failure to null 0 267 QPDF resolve failure to null 0
@@ -289,7 +293,9 @@ QPDFObjectHandle coalesce called on stream 0 @@ -289,7 +293,9 @@ QPDFObjectHandle coalesce called on stream 0
289 QPDFObjectHandle coalesce provide stream data 0 293 QPDFObjectHandle coalesce provide stream data 0
290 QPDF_Stream bad token at end during normalize 0 294 QPDF_Stream bad token at end during normalize 0
291 QPDFParser bad token in parse 0 295 QPDFParser bad token in parse 0
  296 +QPDFParser bad token in parseRemainder 0
292 QPDFParser eof in parse 0 297 QPDFParser eof in parse 0
  298 +QPDFParser eof in parseRemainder 0
293 QPDFObjectHandle array bounds 0 299 QPDFObjectHandle array bounds 0
294 QPDFObjectHandle boolean returning false 0 300 QPDFObjectHandle boolean returning false 0
295 QPDFObjectHandle integer returning 0 0 301 QPDFObjectHandle integer returning 0 0
qpdf/qtest/parsing.test
@@ -17,7 +17,7 @@ my $td = new TestDriver(&#39;parsing&#39;); @@ -17,7 +17,7 @@ my $td = new TestDriver(&#39;parsing&#39;);
17 my $n_tests = 17; 17 my $n_tests = 17;
18 18
19 $td->runtest("parse objects from string", 19 $td->runtest("parse objects from string",
20 - {$td->COMMAND => "test_driver 31 good1.qdf"}, 20 + {$td->COMMAND => "test_driver 31 bad39.qdf"},
21 {$td->FILE => "parse-object.out", $td->EXIT_STATUS => 0}, 21 {$td->FILE => "parse-object.out", $td->EXIT_STATUS => 0},
22 $td->NORMALIZE_NEWLINES); 22 $td->NORMALIZE_NEWLINES);
23 $td->runtest("EOF terminating literal tokens", 23 $td->runtest("EOF terminating literal tokens",
qpdf/qtest/qpdf/bad16-recover.out
1 WARNING: bad16.pdf (trailer, offset 753): unexpected dictionary close token 1 WARNING: bad16.pdf (trailer, offset 753): unexpected dictionary close token
2 WARNING: bad16.pdf (trailer, offset 756): unexpected dictionary close token 2 WARNING: bad16.pdf (trailer, offset 756): unexpected dictionary close token
3 WARNING: bad16.pdf (trailer, offset 759): unknown token while reading object; treating as string 3 WARNING: bad16.pdf (trailer, offset 759): unknown token while reading object; treating as string
4 -WARNING: bad16.pdf (trailer, offset 779): unexpected EOF  
5 WARNING: bad16.pdf (trailer, offset 779): parse error while reading object 4 WARNING: bad16.pdf (trailer, offset 779): parse error while reading object
  5 +WARNING: bad16.pdf (trailer, offset 779): unexpected EOF
6 WARNING: bad16.pdf: file is damaged 6 WARNING: bad16.pdf: file is damaged
7 WARNING: bad16.pdf (offset 712): expected trailer dictionary 7 WARNING: bad16.pdf (offset 712): expected trailer dictionary
8 WARNING: bad16.pdf: Attempting to reconstruct cross-reference table 8 WARNING: bad16.pdf: Attempting to reconstruct cross-reference table
9 WARNING: bad16.pdf (trailer, offset 753): unexpected dictionary close token 9 WARNING: bad16.pdf (trailer, offset 753): unexpected dictionary close token
10 WARNING: bad16.pdf (trailer, offset 756): unexpected dictionary close token 10 WARNING: bad16.pdf (trailer, offset 756): unexpected dictionary close token
11 WARNING: bad16.pdf (trailer, offset 759): unknown token while reading object; treating as string 11 WARNING: bad16.pdf (trailer, offset 759): unknown token while reading object; treating as string
12 -WARNING: bad16.pdf (trailer, offset 779): unexpected EOF  
13 WARNING: bad16.pdf (trailer, offset 779): parse error while reading object 12 WARNING: bad16.pdf (trailer, offset 779): parse error while reading object
  13 +WARNING: bad16.pdf (trailer, offset 779): unexpected EOF
14 bad16.pdf: unable to find trailer dictionary while recovering damaged file 14 bad16.pdf: unable to find trailer dictionary while recovering damaged file
qpdf/qtest/qpdf/bad16.out
1 WARNING: bad16.pdf (trailer, offset 753): unexpected dictionary close token 1 WARNING: bad16.pdf (trailer, offset 753): unexpected dictionary close token
2 WARNING: bad16.pdf (trailer, offset 756): unexpected dictionary close token 2 WARNING: bad16.pdf (trailer, offset 756): unexpected dictionary close token
3 WARNING: bad16.pdf (trailer, offset 759): unknown token while reading object; treating as string 3 WARNING: bad16.pdf (trailer, offset 759): unknown token while reading object; treating as string
4 -WARNING: bad16.pdf (trailer, offset 779): unexpected EOF  
5 WARNING: bad16.pdf (trailer, offset 779): parse error while reading object 4 WARNING: bad16.pdf (trailer, offset 779): parse error while reading object
  5 +WARNING: bad16.pdf (trailer, offset 779): unexpected EOF
6 bad16.pdf (offset 712): expected trailer dictionary 6 bad16.pdf (offset 712): expected trailer dictionary
qpdf/qtest/qpdf/bad36-recover.out
1 WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string 1 WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string
2 -WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2  
3 WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key 2 WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key
  3 +WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2
4 /QTest is implicit 4 /QTest is implicit
5 /QTest is direct and has type null (2) 5 /QTest is direct and has type null (2)
6 /QTest is null 6 /QTest is null
qpdf/qtest/qpdf/bad36.out
1 WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string 1 WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string
2 -WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2  
3 WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key 2 WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key
  3 +WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2
4 /QTest is implicit 4 /QTest is implicit
5 /QTest is direct and has type null (2) 5 /QTest is direct and has type null (2)
6 /QTest is null 6 /QTest is null
qpdf/qtest/qpdf/bad39.qdf 0 โ†’ 100644
  1 +%PDF-1.3
  2 +%ยฟรทยขรพ
  3 +%QDF-1.0
  4 +
  5 +%% Original object ID: 1 0
  6 +1 0 obj
  7 +<<
  8 + /Pages 2 0 R
  9 + /Type /Catalog
  10 +>>
  11 +endobj
  12 +
  13 +%% Original object ID: 2 0
  14 +2 0 obj
  15 +<<
  16 + /Count 1
  17 + /Kids [
  18 + 3 0 R
  19 + ]
  20 + /Type /Pages
  21 +>>
  22 +endobj
  23 +
  24 +%% Page 1
  25 +%% Original object ID: 3 0
  26 +3 0 obj
  27 +<<
  28 + /Contents 4 0 R
  29 + /MediaBox [
  30 + 0
  31 + 0
  32 + 612
  33 + 792
  34 + ]
  35 + /Parent 2 0 R
  36 + /Resources <<
  37 + /Font <<
  38 + /F1 6 0 R
  39 + >>
  40 + /ProcSet 7 0 R
  41 + >>
  42 + /Type /Page
  43 +>>
  44 +endobj
  45 +
  46 +%% Contents for page 1
  47 +%% Original object ID: 4 0
  48 +4 0 obj
  49 +<<
  50 + /Length 5 0 R
  51 +>>
  52 +stream
  53 +BT
  54 + /F1 24 Tf
  55 + 72 720 Td
  56 + (Potato) Tj
  57 +ET
  58 +endstream
  59 +endobj
  60 +
  61 +5 0 obj
  62 +44
  63 +endobj
  64 +
  65 +%% Original object ID: 6 0
  66 +6 0 obj
  67 +<<
  68 + /BaseFont /Helvetica
  69 + /Encoding /WinAnsiEncoding
  70 + /Name /F1
  71 + /Subtype /Type1
  72 + /Type /Font
  73 +>>
  74 +endobj
  75 +
  76 +%% Original object ID: 5 0
  77 +7 0 obj
  78 +[
  79 + /PDF
  80 + /Text
  81 +]
  82 +endobj
  83 +
  84 +xref
  85 +0 8
  86 +0000000000 65535 f
  87 +0000000052 00000 n
  88 +0000000133 00000 n
  89 +0000000242 00000 n
  90 +0000000484 00000 n
  91 +0000000583 00000 n
  92 +0000000629 00000 n
  93 +0000001113 00000 n
  94 +trailer <<
  95 + /Root 1 0 R
  96 + /Size 8
  97 + /ID [<31415926535897932384626433832795><31415926535897932384626433832795>]
  98 +>>
  99 +startxref
  100 +809
  101 +%%EOF
  102 +7 0 obj
qpdf/qtest/qpdf/issue-335a.out
@@ -51,6 +51,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) @@ -51,6 +51,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected )
51 WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) 51 WARNING: issue-335a.pdf (trailer, offset 596): unexpected )
52 WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 52 WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2
53 WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) 53 WARNING: issue-335a.pdf (trailer, offset 600): unexpected )
  54 +WARNING: issue-335a.pdf (trailer, offset 134): dictionary has duplicated key /L
54 WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) 55 WARNING: issue-335a.pdf (trailer, offset 601): unexpected )
55 WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) 56 WARNING: issue-335a.pdf (trailer, offset 648): unexpected )
56 WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 57 WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2
@@ -74,6 +75,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) @@ -74,6 +75,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected )
74 WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) 75 WARNING: issue-335a.pdf (trailer, offset 596): unexpected )
75 WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 76 WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2
76 WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) 77 WARNING: issue-335a.pdf (trailer, offset 600): unexpected )
  78 +WARNING: issue-335a.pdf (trailer, offset 164): dictionary has duplicated key /L
77 WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) 79 WARNING: issue-335a.pdf (trailer, offset 601): unexpected )
78 WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) 80 WARNING: issue-335a.pdf (trailer, offset 648): unexpected )
79 WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 81 WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2
@@ -97,6 +99,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) @@ -97,6 +99,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected )
97 WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) 99 WARNING: issue-335a.pdf (trailer, offset 596): unexpected )
98 WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 100 WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2
99 WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) 101 WARNING: issue-335a.pdf (trailer, offset 600): unexpected )
  102 +WARNING: issue-335a.pdf (trailer, offset 231): dictionary has duplicated key /L
100 WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) 103 WARNING: issue-335a.pdf (trailer, offset 601): unexpected )
101 WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) 104 WARNING: issue-335a.pdf (trailer, offset 648): unexpected )
102 WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 105 WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2
@@ -448,6 +451,7 @@ WARNING: issue-335a.pdf (trailer, offset 1168): unexpected ) @@ -448,6 +451,7 @@ WARNING: issue-335a.pdf (trailer, offset 1168): unexpected )
448 WARNING: issue-335a.pdf (trailer, offset 1328): unexpected ) 451 WARNING: issue-335a.pdf (trailer, offset 1328): unexpected )
449 WARNING: issue-335a.pdf (trailer, offset 1329): name with stray # will not work with PDF >= 1.2 452 WARNING: issue-335a.pdf (trailer, offset 1329): name with stray # will not work with PDF >= 1.2
450 WARNING: issue-335a.pdf (trailer, offset 1332): unexpected ) 453 WARNING: issue-335a.pdf (trailer, offset 1332): unexpected )
  454 +WARNING: issue-335a.pdf (trailer, offset 1033): dictionary has duplicated key /L
451 WARNING: issue-335a.pdf (trailer, offset 1333): unexpected ) 455 WARNING: issue-335a.pdf (trailer, offset 1333): unexpected )
452 WARNING: issue-335a.pdf (trailer, offset 1344): unexpected ) 456 WARNING: issue-335a.pdf (trailer, offset 1344): unexpected )
453 WARNING: issue-335a.pdf (trailer, offset 1428): unexpected ) 457 WARNING: issue-335a.pdf (trailer, offset 1428): unexpected )
qpdf/qtest/qpdf/parse-object.out
1 [ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ] 1 [ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ]
2 -logic error parsing indirect: QPDFObjectHandle::parse called without context on an object with indirect references 2 +logic error parsing indirect: QPDFParser::parse called without context on an object with indirect references
3 trailing data: parsed object (trailing test): trailing data found parsing object from string 3 trailing data: parsed object (trailing test): trailing data found parsing object from string
4 WARNING: parsed object (offset 9): unknown token while reading object; treating as string 4 WARNING: parsed object (offset 9): unknown token while reading object; treating as string
  5 +WARNING: parsed object: treating unexpected brace token as null
  6 +WARNING: parsed object: treating unexpected brace token as null
  7 +WARNING: parsed object: unexpected dictionary close token
  8 +WARNING: bad39.qdf (object 7 0, offset 1121): unexpected EOF
  9 +WARNING: bad39.qdf (object 7 0, offset 1121): expected endobj
  10 +WARNING: bad39.qdf (object 7 0, offset 1121): EOF after endobj
5 test 31 done 11 test 31 done
qpdf/test_driver.cc
@@ -1195,6 +1195,13 @@ test_31(QPDF&amp; pdf, char const* arg2) @@ -1195,6 +1195,13 @@ test_31(QPDF&amp; pdf, char const* arg2)
1195 // mistakenly parsed as an indirect object. 1195 // mistakenly parsed as an indirect object.
1196 assert(QPDFObjectHandle::parse(&pdf, "[5 0 R 0 R /X]").unparse() == "[ 5 0 R 0 (R) /X ]"); 1196 assert(QPDFObjectHandle::parse(&pdf, "[5 0 R 0 R /X]").unparse() == "[ 5 0 R 0 (R) /X ]");
1197 assert(QPDFObjectHandle::parse(&pdf, "[1 0 R]", "indirect test").unparse() == "[ 1 0 R ]"); 1197 assert(QPDFObjectHandle::parse(&pdf, "[1 0 R]", "indirect test").unparse() == "[ 1 0 R ]");
  1198 + // TC:QPDFParser bad brace
  1199 + assert(QPDFObjectHandle::parse(&pdf, "}").unparse() == "null");
  1200 + assert(QPDFObjectHandle::parse(&pdf, "{").unparse() == "null");
  1201 + // TC:QPDFParser bad dictionary close
  1202 + assert(QPDFObjectHandle::parse(&pdf, ">>").unparse() == "null");
  1203 + // TC:QPDFParser eof in parse
  1204 + assert(QPDFObjectHandle::parse(&pdf, "[7 0 R]").getArrayItem(0).isNull());
1198 } 1205 }
1199 1206
1200 static void 1207 static void