Commit b8fd18ae562ab8bae1d2e67c1ab63ff4ea62124b
Merge branch 'parse_ref' into work
Showing
12 changed files
with
555 additions
and
305 deletions
libqpdf/QPDFParser.cc
| ... | ... | @@ -21,22 +21,7 @@ |
| 21 | 21 | |
| 22 | 22 | #include <memory> |
| 23 | 23 | |
| 24 | -namespace | |
| 25 | -{ | |
| 26 | - struct StackFrame | |
| 27 | - { | |
| 28 | - StackFrame(std::shared_ptr<InputSource> input) : | |
| 29 | - offset(input->tell()) | |
| 30 | - { | |
| 31 | - } | |
| 32 | - | |
| 33 | - std::vector<std::shared_ptr<QPDFObject>> olist; | |
| 34 | - qpdf_offset_t offset; | |
| 35 | - std::string contents_string{""}; | |
| 36 | - qpdf_offset_t contents_offset{-1}; | |
| 37 | - int null_count{0}; | |
| 38 | - }; | |
| 39 | -} // namespace | |
| 24 | +using ObjectPtr = std::shared_ptr<QPDFObject>; | |
| 40 | 25 | |
| 41 | 26 | QPDFObjectHandle |
| 42 | 27 | QPDFParser::parse(bool& empty, bool content_stream) |
| ... | ... | @@ -46,371 +31,457 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 46 | 31 | // effect of reading the object and changing the file pointer. If you do this, it will cause a |
| 47 | 32 | // logic error to be thrown from QPDF::inParse(). |
| 48 | 33 | |
| 49 | - const static std::shared_ptr<QPDFObject> null_oh = QPDF_Null::create(); | |
| 50 | 34 | QPDF::ParseGuard pg(context); |
| 51 | - | |
| 52 | 35 | empty = false; |
| 36 | + start = input->tell(); | |
| 53 | 37 | |
| 54 | - std::shared_ptr<QPDFObject> object; | |
| 55 | - bool set_offset = false; | |
| 56 | - | |
| 57 | - std::vector<StackFrame> stack; | |
| 58 | - stack.emplace_back(input); | |
| 59 | - std::vector<parser_state_e> state_stack; | |
| 60 | - state_stack.push_back(st_top); | |
| 61 | - qpdf_offset_t offset; | |
| 62 | - bool done = false; | |
| 63 | - int bad_count = 0; | |
| 64 | - int good_count = 0; | |
| 65 | - bool b_contents = false; | |
| 66 | - bool is_null = false; | |
| 38 | + if (!tokenizer.nextToken(*input, object_description)) { | |
| 39 | + warn(tokenizer.getErrorMessage()); | |
| 40 | + } | |
| 41 | + | |
| 42 | + switch (tokenizer.getType()) { | |
| 43 | + case QPDFTokenizer::tt_eof: | |
| 44 | + if (content_stream) { | |
| 45 | + // In content stream mode, leave object uninitialized to indicate EOF | |
| 46 | + return {}; | |
| 47 | + } | |
| 48 | + QTC::TC("qpdf", "QPDFParser eof in parse"); | |
| 49 | + warn("unexpected EOF"); | |
| 50 | + return {QPDF_Null::create()}; | |
| 51 | + | |
| 52 | + case QPDFTokenizer::tt_bad: | |
| 53 | + QTC::TC("qpdf", "QPDFParser bad token in parse"); | |
| 54 | + return {QPDF_Null::create()}; | |
| 55 | + | |
| 56 | + case QPDFTokenizer::tt_brace_open: | |
| 57 | + case QPDFTokenizer::tt_brace_close: | |
| 58 | + QTC::TC("qpdf", "QPDFParser bad brace"); | |
| 59 | + warn("treating unexpected brace token as null"); | |
| 60 | + return {QPDF_Null::create()}; | |
| 61 | + | |
| 62 | + case QPDFTokenizer::tt_array_close: | |
| 63 | + QTC::TC("qpdf", "QPDFParser bad array close"); | |
| 64 | + warn("treating unexpected array close token as null"); | |
| 65 | + return {QPDF_Null::create()}; | |
| 66 | + | |
| 67 | + case QPDFTokenizer::tt_dict_close: | |
| 68 | + QTC::TC("qpdf", "QPDFParser bad dictionary close"); | |
| 69 | + warn("unexpected dictionary close token"); | |
| 70 | + return {QPDF_Null::create()}; | |
| 71 | + | |
| 72 | + case QPDFTokenizer::tt_array_open: | |
| 73 | + case QPDFTokenizer::tt_dict_open: | |
| 74 | + stack.clear(); | |
| 75 | + stack.emplace_back( | |
| 76 | + input, | |
| 77 | + (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key); | |
| 78 | + frame = &stack.back(); | |
| 79 | + return parseRemainder(content_stream); | |
| 80 | + | |
| 81 | + case QPDFTokenizer::tt_bool: | |
| 82 | + return withDescription<QPDF_Bool>(tokenizer.getValue() == "true"); | |
| 83 | + | |
| 84 | + case QPDFTokenizer::tt_null: | |
| 85 | + return {QPDF_Null::create()}; | |
| 86 | + | |
| 87 | + case QPDFTokenizer::tt_integer: | |
| 88 | + return withDescription<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str())); | |
| 89 | + | |
| 90 | + case QPDFTokenizer::tt_real: | |
| 91 | + return withDescription<QPDF_Real>(tokenizer.getValue()); | |
| 92 | + | |
| 93 | + case QPDFTokenizer::tt_name: | |
| 94 | + return withDescription<QPDF_Name>(tokenizer.getValue()); | |
| 95 | + | |
| 96 | + case QPDFTokenizer::tt_word: | |
| 97 | + { | |
| 98 | + auto const& value = tokenizer.getValue(); | |
| 99 | + if (content_stream) { | |
| 100 | + return withDescription<QPDF_Operator>(value); | |
| 101 | + } else if (value == "endobj") { | |
| 102 | + // We just saw endobj without having read anything. Treat this as a null and do | |
| 103 | + // not move the input source's offset. | |
| 104 | + input->seek(input->getLastOffset(), SEEK_SET); | |
| 105 | + empty = true; | |
| 106 | + return {QPDF_Null::create()}; | |
| 107 | + } else { | |
| 108 | + QTC::TC("qpdf", "QPDFParser treat word as string"); | |
| 109 | + warn("unknown token while reading object; treating as string"); | |
| 110 | + return withDescription<QPDF_String>(value); | |
| 111 | + } | |
| 112 | + } | |
| 113 | + | |
| 114 | + case QPDFTokenizer::tt_string: | |
| 115 | + if (decrypter) { | |
| 116 | + std::string s{tokenizer.getValue()}; | |
| 117 | + decrypter->decryptString(s); | |
| 118 | + return withDescription<QPDF_String>(s); | |
| 119 | + } else { | |
| 120 | + return withDescription<QPDF_String>(tokenizer.getValue()); | |
| 121 | + } | |
| 122 | + | |
| 123 | + default: | |
| 124 | + warn("treating unknown token type as null while reading object"); | |
| 125 | + return {QPDF_Null::create()}; | |
| 126 | + } | |
| 127 | +} | |
| 67 | 128 | |
| 68 | - while (!done) { | |
| 69 | - bool bad = false; | |
| 70 | - bool indirect_ref = false; | |
| 71 | - is_null = false; | |
| 72 | - auto& frame = stack.back(); | |
| 73 | - auto& olist = frame.olist; | |
| 74 | - parser_state_e state = state_stack.back(); | |
| 75 | - offset = frame.offset; | |
| 129 | +QPDFObjectHandle | |
| 130 | +QPDFParser::parseRemainder(bool content_stream) | |
| 131 | +{ | |
| 132 | + // This method must take care not to resolve any objects. Don't check the type of any object | |
| 133 | + // without first ensuring that it is a direct object. Otherwise, doing so may have the side | |
| 134 | + // effect of reading the object and changing the file pointer. If you do this, it will cause a | |
| 135 | + // logic error to be thrown from QPDF::inParse(). | |
| 76 | 136 | |
| 77 | - object = nullptr; | |
| 78 | - set_offset = false; | |
| 137 | + bad_count = 0; | |
| 138 | + bool b_contents = false; | |
| 79 | 139 | |
| 140 | + while (true) { | |
| 80 | 141 | if (!tokenizer.nextToken(*input, object_description)) { |
| 81 | 142 | warn(tokenizer.getErrorMessage()); |
| 82 | 143 | } |
| 144 | + ++good_count; // optimistically | |
| 145 | + | |
| 146 | + if (int_count != 0) { | |
| 147 | + // Special handling of indirect references. Treat integer tokens as part of an indirect | |
| 148 | + // reference until proven otherwise. | |
| 149 | + if (tokenizer.getType() == QPDFTokenizer::tt_integer) { | |
| 150 | + if (++int_count > 2) { | |
| 151 | + // Process the oldest buffered integer. | |
| 152 | + addInt(int_count); | |
| 153 | + } | |
| 154 | + last_offset_buffer[int_count % 2] = input->getLastOffset(); | |
| 155 | + int_buffer[int_count % 2] = QUtil::string_to_ll(tokenizer.getValue().c_str()); | |
| 156 | + continue; | |
| 157 | + | |
| 158 | + } else if ( | |
| 159 | + int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word && | |
| 160 | + tokenizer.getValue() == "R") { | |
| 161 | + if (context == nullptr) { | |
| 162 | + QTC::TC("qpdf", "QPDFParser indirect without context"); | |
| 163 | + throw std::logic_error("QPDFParser::parse called without context on an object " | |
| 164 | + "with indirect references"); | |
| 165 | + } | |
| 166 | + auto ref_og = QPDFObjGen( | |
| 167 | + QIntC::to_int(int_buffer[(int_count - 1) % 2]), | |
| 168 | + QIntC::to_int(int_buffer[(int_count) % 2])); | |
| 169 | + if (ref_og.isIndirect()) { | |
| 170 | + // This action has the desirable side effect of causing dangling references | |
| 171 | + // (references to indirect objects that don't appear in the PDF) in any parsed | |
| 172 | + // object to appear in the object cache. | |
| 173 | + add(std::move(context->getObject(ref_og).obj)); | |
| 174 | + } else { | |
| 175 | + QTC::TC("qpdf", "QPDFParser indirect with 0 objid"); | |
| 176 | + addNull(); | |
| 177 | + } | |
| 178 | + int_count = 0; | |
| 179 | + continue; | |
| 180 | + | |
| 181 | + } else if (int_count > 0) { | |
| 182 | + // Process the buffered integers before processing the current token. | |
| 183 | + if (int_count > 1) { | |
| 184 | + addInt(int_count - 1); | |
| 185 | + } | |
| 186 | + addInt(int_count); | |
| 187 | + int_count = 0; | |
| 188 | + } | |
| 189 | + } | |
| 83 | 190 | |
| 84 | 191 | switch (tokenizer.getType()) { |
| 85 | 192 | case QPDFTokenizer::tt_eof: |
| 86 | - if (!content_stream) { | |
| 87 | - QTC::TC("qpdf", "QPDFParser eof in parse"); | |
| 88 | - warn("unexpected EOF"); | |
| 193 | + warn("parse error while reading object"); | |
| 194 | + if (content_stream) { | |
| 195 | + // In content stream mode, leave object uninitialized to indicate EOF | |
| 196 | + return {}; | |
| 89 | 197 | } |
| 90 | - bad = true; | |
| 91 | - state = st_eof; | |
| 92 | - break; | |
| 198 | + QTC::TC("qpdf", "QPDFParser eof in parseRemainder"); | |
| 199 | + warn("unexpected EOF"); | |
| 200 | + return {QPDF_Null::create()}; | |
| 93 | 201 | |
| 94 | 202 | case QPDFTokenizer::tt_bad: |
| 95 | - QTC::TC("qpdf", "QPDFParser bad token in parse"); | |
| 96 | - bad = true; | |
| 97 | - is_null = true; | |
| 98 | - break; | |
| 203 | + QTC::TC("qpdf", "QPDFParser bad token in parseRemainder"); | |
| 204 | + if (tooManyBadTokens()) { | |
| 205 | + return {QPDF_Null::create()}; | |
| 206 | + } | |
| 207 | + addNull(); | |
| 208 | + continue; | |
| 99 | 209 | |
| 100 | 210 | case QPDFTokenizer::tt_brace_open: |
| 101 | 211 | case QPDFTokenizer::tt_brace_close: |
| 102 | - QTC::TC("qpdf", "QPDFParser bad brace"); | |
| 212 | + QTC::TC("qpdf", "QPDFParser bad brace in parseRemainder"); | |
| 103 | 213 | warn("treating unexpected brace token as null"); |
| 104 | - bad = true; | |
| 105 | - is_null = true; | |
| 106 | - break; | |
| 214 | + if (tooManyBadTokens()) { | |
| 215 | + return {QPDF_Null::create()}; | |
| 216 | + } | |
| 217 | + addNull(); | |
| 218 | + continue; | |
| 107 | 219 | |
| 108 | 220 | case QPDFTokenizer::tt_array_close: |
| 109 | - if (state == st_array) { | |
| 110 | - state = st_stop; | |
| 221 | + if (frame->state == st_array) { | |
| 222 | + auto object = QPDF_Array::create(std::move(frame->olist), frame->null_count > 100); | |
| 223 | + setDescription(object, frame->offset - 1); | |
| 224 | + // The `offset` points to the next of "[". Set the rewind offset to point to the | |
| 225 | + // beginning of "[". This has been explicitly tested with whitespace surrounding the | |
| 226 | + // array start delimiter. getLastOffset points to the array end token and therefore | |
| 227 | + // can't be used here. | |
| 228 | + if (stack.size() <= 1) { | |
| 229 | + return object; | |
| 230 | + } | |
| 231 | + stack.pop_back(); | |
| 232 | + frame = &stack.back(); | |
| 233 | + add(std::move(object)); | |
| 111 | 234 | } else { |
| 112 | - QTC::TC("qpdf", "QPDFParser bad array close"); | |
| 235 | + QTC::TC("qpdf", "QPDFParser bad array close in parseRemainder"); | |
| 113 | 236 | warn("treating unexpected array close token as null"); |
| 114 | - bad = true; | |
| 115 | - is_null = true; | |
| 237 | + if (tooManyBadTokens()) { | |
| 238 | + return {QPDF_Null::create()}; | |
| 239 | + } | |
| 240 | + addNull(); | |
| 116 | 241 | } |
| 117 | - break; | |
| 242 | + continue; | |
| 118 | 243 | |
| 119 | 244 | case QPDFTokenizer::tt_dict_close: |
| 120 | - if (state == st_dictionary) { | |
| 121 | - state = st_stop; | |
| 245 | + if (frame->state <= st_dictionary_value) { | |
| 246 | + // Attempt to recover more or less gracefully from invalid dictionaries. | |
| 247 | + auto& dict = frame->dict; | |
| 248 | + | |
| 249 | + if (frame->state == st_dictionary_value) { | |
| 250 | + QTC::TC("qpdf", "QPDFParser no val for last key"); | |
| 251 | + warn( | |
| 252 | + frame->offset, | |
| 253 | + "dictionary ended prematurely; using null as value for last key"); | |
| 254 | + dict[frame->key] = QPDF_Null::create(); | |
| 255 | + } | |
| 256 | + | |
| 257 | + if (!frame->olist.empty()) | |
| 258 | + fixMissingKeys(); | |
| 259 | + | |
| 260 | + if (!frame->contents_string.empty() && dict.count("/Type") && | |
| 261 | + dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") && | |
| 262 | + dict.count("/Contents") && dict["/Contents"].isString()) { | |
| 263 | + dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string); | |
| 264 | + dict["/Contents"].setParsedOffset(frame->contents_offset); | |
| 265 | + } | |
| 266 | + auto object = QPDF_Dictionary::create(std::move(dict)); | |
| 267 | + setDescription(object, frame->offset - 2); | |
| 268 | + // The `offset` points to the next of "<<". Set the rewind offset to point to the | |
| 269 | + // beginning of "<<". This has been explicitly tested with whitespace surrounding | |
| 270 | + // the dictionary start delimiter. getLastOffset points to the dictionary end token | |
| 271 | + // and therefore can't be used here. | |
| 272 | + if (stack.size() <= 1) { | |
| 273 | + return object; | |
| 274 | + } | |
| 275 | + stack.pop_back(); | |
| 276 | + frame = &stack.back(); | |
| 277 | + add(std::move(object)); | |
| 122 | 278 | } else { |
| 123 | - QTC::TC("qpdf", "QPDFParser bad dictionary close"); | |
| 279 | + QTC::TC("qpdf", "QPDFParser bad dictionary close in parseRemainder"); | |
| 124 | 280 | warn("unexpected dictionary close token"); |
| 125 | - bad = true; | |
| 126 | - is_null = true; | |
| 281 | + if (tooManyBadTokens()) { | |
| 282 | + return {QPDF_Null::create()}; | |
| 283 | + } | |
| 284 | + addNull(); | |
| 127 | 285 | } |
| 128 | - break; | |
| 286 | + continue; | |
| 129 | 287 | |
| 130 | 288 | case QPDFTokenizer::tt_array_open: |
| 131 | 289 | case QPDFTokenizer::tt_dict_open: |
| 132 | - if (stack.size() > 500) { | |
| 290 | + if (stack.size() > 499) { | |
| 133 | 291 | QTC::TC("qpdf", "QPDFParser too deep"); |
| 134 | 292 | warn("ignoring excessively deeply nested data structure"); |
| 135 | - bad = true; | |
| 136 | - is_null = true; | |
| 137 | - state = st_top; | |
| 293 | + return {QPDF_Null::create()}; | |
| 138 | 294 | } else { |
| 139 | - state = st_start; | |
| 140 | - state_stack.push_back( | |
| 141 | - (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array | |
| 142 | - : st_dictionary); | |
| 143 | 295 | b_contents = false; |
| 144 | - stack.emplace_back(input); | |
| 296 | + stack.emplace_back( | |
| 297 | + input, | |
| 298 | + (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array | |
| 299 | + : st_dictionary_key); | |
| 300 | + frame = &stack.back(); | |
| 301 | + continue; | |
| 145 | 302 | } |
| 146 | - break; | |
| 147 | 303 | |
| 148 | 304 | case QPDFTokenizer::tt_bool: |
| 149 | - object = QPDF_Bool::create((tokenizer.getValue() == "true")); | |
| 150 | - break; | |
| 305 | + addScalar<QPDF_Bool>(tokenizer.getValue() == "true"); | |
| 306 | + continue; | |
| 151 | 307 | |
| 152 | 308 | case QPDFTokenizer::tt_null: |
| 153 | - is_null = true; | |
| 154 | - ++frame.null_count; | |
| 155 | - | |
| 156 | - break; | |
| 309 | + addNull(); | |
| 310 | + continue; | |
| 157 | 311 | |
| 158 | 312 | case QPDFTokenizer::tt_integer: |
| 159 | - object = QPDF_Integer::create( | |
| 160 | - QUtil::string_to_ll(std::string(tokenizer.getValue()).c_str())); | |
| 161 | - break; | |
| 313 | + if (!content_stream) { | |
| 314 | + // Buffer token in case it is part of an indirect reference. | |
| 315 | + last_offset_buffer[1] = input->getLastOffset(); | |
| 316 | + int_buffer[1] = QUtil::string_to_ll(tokenizer.getValue().c_str()); | |
| 317 | + int_count = 1; | |
| 318 | + } else { | |
| 319 | + addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str())); | |
| 320 | + } | |
| 321 | + continue; | |
| 162 | 322 | |
| 163 | 323 | case QPDFTokenizer::tt_real: |
| 164 | - object = QPDF_Real::create(tokenizer.getValue()); | |
| 165 | - break; | |
| 324 | + addScalar<QPDF_Real>(tokenizer.getValue()); | |
| 325 | + continue; | |
| 166 | 326 | |
| 167 | 327 | case QPDFTokenizer::tt_name: |
| 168 | - { | |
| 169 | - auto name = tokenizer.getValue(); | |
| 170 | - object = QPDF_Name::create(name); | |
| 171 | - | |
| 172 | - if (name == "/Contents") { | |
| 173 | - b_contents = true; | |
| 174 | - } else { | |
| 175 | - b_contents = false; | |
| 176 | - } | |
| 328 | + if (frame->state == st_dictionary_key) { | |
| 329 | + frame->key = tokenizer.getValue(); | |
| 330 | + frame->state = st_dictionary_value; | |
| 331 | + b_contents = decrypter && frame->key == "/Contents"; | |
| 332 | + continue; | |
| 333 | + } else { | |
| 334 | + addScalar<QPDF_Name>(tokenizer.getValue()); | |
| 177 | 335 | } |
| 178 | - break; | |
| 336 | + continue; | |
| 179 | 337 | |
| 180 | 338 | case QPDFTokenizer::tt_word: |
| 181 | - { | |
| 182 | - auto value = tokenizer.getValue(); | |
| 183 | - auto size = olist.size(); | |
| 184 | - if (content_stream) { | |
| 185 | - object = QPDF_Operator::create(value); | |
| 186 | - } else if ( | |
| 187 | - value == "R" && state != st_top && size >= 2 && olist.back() && | |
| 188 | - olist.back()->getTypeCode() == ::ot_integer && | |
| 189 | - !olist.back()->getObjGen().isIndirect() && olist.at(size - 2) && | |
| 190 | - olist.at(size - 2)->getTypeCode() == ::ot_integer && | |
| 191 | - !olist.at(size - 2)->getObjGen().isIndirect()) { | |
| 192 | - if (context == nullptr) { | |
| 193 | - QTC::TC("qpdf", "QPDFParser indirect without context"); | |
| 194 | - throw std::logic_error("QPDFObjectHandle::parse called without context on " | |
| 195 | - "an object with indirect references"); | |
| 196 | - } | |
| 197 | - auto ref_og = QPDFObjGen( | |
| 198 | - QPDFObjectHandle(olist.at(size - 2)).getIntValueAsInt(), | |
| 199 | - QPDFObjectHandle(olist.back()).getIntValueAsInt()); | |
| 200 | - if (ref_og.isIndirect()) { | |
| 201 | - // This action has the desirable side effect of causing dangling references | |
| 202 | - // (references to indirect objects that don't appear in the PDF) in any | |
| 203 | - // parsed object to appear in the object cache. | |
| 204 | - object = context->getObject(ref_og).obj; | |
| 205 | - indirect_ref = true; | |
| 206 | - } else { | |
| 207 | - QTC::TC("qpdf", "QPDFParser indirect with 0 objid"); | |
| 208 | - is_null = true; | |
| 209 | - } | |
| 210 | - olist.pop_back(); | |
| 211 | - olist.pop_back(); | |
| 212 | - } else if ((value == "endobj") && (state == st_top)) { | |
| 213 | - // We just saw endobj without having read anything. Treat this as a null and do | |
| 214 | - // not move the input source's offset. | |
| 215 | - is_null = true; | |
| 216 | - input->seek(input->getLastOffset(), SEEK_SET); | |
| 217 | - empty = true; | |
| 218 | - } else { | |
| 219 | - QTC::TC("qpdf", "QPDFParser treat word as string"); | |
| 220 | - warn("unknown token while reading object; treating as string"); | |
| 221 | - bad = true; | |
| 222 | - object = QPDF_String::create(value); | |
| 339 | + if (content_stream) { | |
| 340 | + addScalar<QPDF_Operator>(tokenizer.getValue()); | |
| 341 | + } else { | |
| 342 | + QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder"); | |
| 343 | + warn("unknown token while reading object; treating as string"); | |
| 344 | + if (tooManyBadTokens()) { | |
| 345 | + return {QPDF_Null::create()}; | |
| 223 | 346 | } |
| 347 | + addScalar<QPDF_String>(tokenizer.getValue()); | |
| 224 | 348 | } |
| 225 | - break; | |
| 349 | + continue; | |
| 226 | 350 | |
| 227 | 351 | case QPDFTokenizer::tt_string: |
| 228 | 352 | { |
| 229 | - auto val = tokenizer.getValue(); | |
| 353 | + auto const& val = tokenizer.getValue(); | |
| 230 | 354 | if (decrypter) { |
| 231 | 355 | if (b_contents) { |
| 232 | - frame.contents_string = val; | |
| 233 | - frame.contents_offset = input->getLastOffset(); | |
| 356 | + frame->contents_string = val; | |
| 357 | + frame->contents_offset = input->getLastOffset(); | |
| 234 | 358 | b_contents = false; |
| 235 | 359 | } |
| 236 | 360 | std::string s{val}; |
| 237 | 361 | decrypter->decryptString(s); |
| 238 | - object = QPDF_String::create(s); | |
| 362 | + addScalar<QPDF_String>(s); | |
| 239 | 363 | } else { |
| 240 | - object = QPDF_String::create(val); | |
| 364 | + addScalar<QPDF_String>(val); | |
| 241 | 365 | } |
| 242 | 366 | } |
| 243 | - | |
| 244 | - break; | |
| 367 | + continue; | |
| 245 | 368 | |
| 246 | 369 | default: |
| 247 | 370 | warn("treating unknown token type as null while reading object"); |
| 248 | - bad = true; | |
| 249 | - is_null = true; | |
| 250 | - break; | |
| 251 | - } | |
| 252 | - | |
| 253 | - if (object == nullptr && !is_null && | |
| 254 | - (!((state == st_start) || (state == st_stop) || (state == st_eof)))) { | |
| 255 | - throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object"); | |
| 256 | - is_null = true; | |
| 257 | - } | |
| 258 | - | |
| 259 | - if (bad) { | |
| 260 | - ++bad_count; | |
| 261 | - good_count = 0; | |
| 262 | - } else { | |
| 263 | - ++good_count; | |
| 264 | - if (good_count > 3) { | |
| 265 | - bad_count = 0; | |
| 371 | + if (tooManyBadTokens()) { | |
| 372 | + return {QPDF_Null::create()}; | |
| 266 | 373 | } |
| 374 | + addNull(); | |
| 267 | 375 | } |
| 268 | - if (bad_count > 5) { | |
| 269 | - // We had too many consecutive errors without enough intervening successful objects. | |
| 270 | - // Give up. | |
| 271 | - warn("too many errors; giving up on reading object"); | |
| 272 | - state = st_top; | |
| 273 | - is_null = true; | |
| 274 | - } | |
| 376 | + } | |
| 377 | +} | |
| 275 | 378 | |
| 276 | - switch (state) { | |
| 277 | - case st_eof: | |
| 278 | - if (state_stack.size() > 1) { | |
| 279 | - warn("parse error while reading object"); | |
| 280 | - } | |
| 281 | - done = true; | |
| 282 | - // In content stream mode, leave object uninitialized to indicate EOF | |
| 283 | - if (!content_stream) { | |
| 284 | - is_null = true; | |
| 285 | - } | |
| 286 | - break; | |
| 287 | - | |
| 288 | - case st_dictionary: | |
| 289 | - case st_array: | |
| 290 | - if (is_null) { | |
| 291 | - object = null_oh; | |
| 292 | - // No need to set description for direct nulls - they probably will become implicit. | |
| 293 | - } else if (!indirect_ref) { | |
| 294 | - setDescription(object, input->getLastOffset()); | |
| 295 | - } | |
| 296 | - set_offset = true; | |
| 297 | - olist.push_back(object); | |
| 298 | - break; | |
| 379 | +void | |
| 380 | +QPDFParser::add(std::shared_ptr<QPDFObject>&& obj) | |
| 381 | +{ | |
| 382 | + if (frame->state != st_dictionary_value) { | |
| 383 | + // If state is st_dictionary_key then there is a missing key. Push onto olist for | |
| 384 | + // processing once the tt_dict_close token has been found. | |
| 385 | + frame->olist.emplace_back(std::move(obj)); | |
| 386 | + } else { | |
| 387 | + if (auto res = frame->dict.insert_or_assign(frame->key, std::move(obj)); !res.second) { | |
| 388 | + warnDuplicateKey(); | |
| 389 | + } | |
| 390 | + frame->state = st_dictionary_key; | |
| 391 | + } | |
| 392 | +} | |
| 299 | 393 | |
| 300 | - case st_top: | |
| 301 | - done = true; | |
| 302 | - break; | |
| 394 | +void | |
| 395 | +QPDFParser::addNull() | |
| 396 | +{ | |
| 397 | + const static ObjectPtr null_obj = QPDF_Null::create(); | |
| 303 | 398 | |
| 304 | - case st_start: | |
| 305 | - break; | |
| 399 | + if (frame->state != st_dictionary_value) { | |
| 400 | + // If state is st_dictionary_key then there is a missing key. Push onto olist for | |
| 401 | + // processing once the tt_dict_close token has been found. | |
| 402 | + frame->olist.emplace_back(null_obj); | |
| 403 | + } else { | |
| 404 | + if (auto res = frame->dict.insert_or_assign(frame->key, null_obj); !res.second) { | |
| 405 | + warnDuplicateKey(); | |
| 406 | + } | |
| 407 | + frame->state = st_dictionary_key; | |
| 408 | + } | |
| 409 | + ++frame->null_count; | |
| 410 | +} | |
| 306 | 411 | |
| 307 | - case st_stop: | |
| 308 | - if ((state_stack.size() < 2) || (stack.size() < 2)) { | |
| 309 | - throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with " | |
| 310 | - "insufficient elements in stack"); | |
| 311 | - } | |
| 312 | - parser_state_e old_state = state_stack.back(); | |
| 313 | - state_stack.pop_back(); | |
| 314 | - if (old_state == st_array) { | |
| 315 | - object = QPDF_Array::create(std::move(olist), frame.null_count > 100); | |
| 316 | - setDescription(object, offset - 1); | |
| 317 | - // The `offset` points to the next of "[". Set the rewind offset to point to the | |
| 318 | - // beginning of "[". This has been explicitly tested with whitespace surrounding the | |
| 319 | - // array start delimiter. getLastOffset points to the array end token and therefore | |
| 320 | - // can't be used here. | |
| 321 | - set_offset = true; | |
| 322 | - } else if (old_state == st_dictionary) { | |
| 323 | - // Convert list to map. Alternating elements are keys. Attempt to recover more or | |
| 324 | - // less gracefully from invalid dictionaries. | |
| 325 | - std::set<std::string> names; | |
| 326 | - for (auto& obj: olist) { | |
| 327 | - if (obj) { | |
| 328 | - if (obj->getTypeCode() == ::ot_name) { | |
| 329 | - names.insert(obj->getStringValue()); | |
| 330 | - } | |
| 331 | - } | |
| 332 | - } | |
| 412 | +void | |
| 413 | +QPDFParser::addInt(int count) | |
| 414 | +{ | |
| 415 | + auto obj = QPDF_Integer::create(int_buffer[count % 2]); | |
| 416 | + obj->setDescription(context, description, last_offset_buffer[count % 2]); | |
| 417 | + add(std::move(obj)); | |
| 418 | +} | |
| 333 | 419 | |
| 334 | - std::map<std::string, QPDFObjectHandle> dict; | |
| 335 | - int next_fake_key = 1; | |
| 336 | - for (auto iter = olist.begin(); iter != olist.end();) { | |
| 337 | - // Calculate key. | |
| 338 | - std::string key; | |
| 339 | - if (*iter && (*iter)->getTypeCode() == ::ot_name) { | |
| 340 | - key = (*iter)->getStringValue(); | |
| 341 | - ++iter; | |
| 342 | - } else { | |
| 343 | - for (bool found_fake = false; !found_fake;) { | |
| 344 | - key = "/QPDFFake" + std::to_string(next_fake_key++); | |
| 345 | - found_fake = (names.count(key) == 0); | |
| 346 | - QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); | |
| 347 | - } | |
| 348 | - warn( | |
| 349 | - offset, | |
| 350 | - "expected dictionary key but found non-name object; inserting key " + | |
| 351 | - key); | |
| 352 | - } | |
| 353 | - if (dict.count(key) > 0) { | |
| 354 | - QTC::TC("qpdf", "QPDFParser duplicate dict key"); | |
| 355 | - warn( | |
| 356 | - offset, | |
| 357 | - "dictionary has duplicated key " + key + | |
| 358 | - "; last occurrence overrides earlier ones"); | |
| 359 | - } | |
| 420 | +template <typename T, typename... Args> | |
| 421 | +void | |
| 422 | +QPDFParser::addScalar(Args&&... args) | |
| 423 | +{ | |
| 424 | + auto obj = T::create(args...); | |
| 425 | + obj->setDescription(context, description, input->getLastOffset()); | |
| 426 | + add(std::move(obj)); | |
| 427 | +} | |
| 360 | 428 | |
| 361 | - // Calculate value. | |
| 362 | - std::shared_ptr<QPDFObject> val; | |
| 363 | - if (iter != olist.end()) { | |
| 364 | - val = *iter; | |
| 365 | - ++iter; | |
| 366 | - } else { | |
| 367 | - QTC::TC("qpdf", "QPDFParser no val for last key"); | |
| 368 | - warn( | |
| 369 | - offset, | |
| 370 | - "dictionary ended prematurely; using null as value for last key"); | |
| 371 | - val = QPDF_Null::create(); | |
| 372 | - } | |
| 429 | +template <typename T, typename... Args> | |
| 430 | +QPDFObjectHandle | |
| 431 | +QPDFParser::withDescription(Args&&... args) | |
| 432 | +{ | |
| 433 | + auto obj = T::create(args...); | |
| 434 | + obj->setDescription(context, description, start); | |
| 435 | + return {obj}; | |
| 436 | +} | |
| 373 | 437 | |
| 374 | - dict[std::move(key)] = std::move(val); | |
| 375 | - } | |
| 376 | - if (!frame.contents_string.empty() && dict.count("/Type") && | |
| 377 | - dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") && | |
| 378 | - dict.count("/Contents") && dict["/Contents"].isString()) { | |
| 379 | - dict["/Contents"] = QPDFObjectHandle::newString(frame.contents_string); | |
| 380 | - dict["/Contents"].setParsedOffset(frame.contents_offset); | |
| 381 | - } | |
| 382 | - object = QPDF_Dictionary::create(std::move(dict)); | |
| 383 | - setDescription(object, offset - 2); | |
| 384 | - // The `offset` points to the next of "<<". Set the rewind offset to point to the | |
| 385 | - // beginning of "<<". This has been explicitly tested with whitespace surrounding | |
| 386 | - // the dictionary start delimiter. getLastOffset points to the dictionary end token | |
| 387 | - // and therefore can't be used here. | |
| 388 | - set_offset = true; | |
| 389 | - } | |
| 390 | - stack.pop_back(); | |
| 391 | - if (state_stack.back() == st_top) { | |
| 392 | - done = true; | |
| 393 | - } else { | |
| 394 | - stack.back().olist.push_back(object); | |
| 395 | - } | |
| 396 | - } | |
| 438 | +void | |
| 439 | +QPDFParser::setDescription(ObjectPtr& obj, qpdf_offset_t parsed_offset) | |
| 440 | +{ | |
| 441 | + if (obj) { | |
| 442 | + obj->setDescription(context, description, parsed_offset); | |
| 397 | 443 | } |
| 444 | +} | |
| 398 | 445 | |
| 399 | - if (is_null) { | |
| 400 | - object = QPDF_Null::create(); | |
| 446 | +void | |
| 447 | +QPDFParser::fixMissingKeys() | |
| 448 | +{ | |
| 449 | + std::set<std::string> names; | |
| 450 | + for (auto& obj: frame->olist) { | |
| 451 | + if (obj->getTypeCode() == ::ot_name) { | |
| 452 | + names.insert(obj->getStringValue()); | |
| 453 | + } | |
| 401 | 454 | } |
| 402 | - if (!set_offset) { | |
| 403 | - setDescription(object, offset); | |
| 455 | + int next_fake_key = 1; | |
| 456 | + for (auto const& item: frame->olist) { | |
| 457 | + while (true) { | |
| 458 | + const std::string key = "/QPDFFake" + std::to_string(next_fake_key++); | |
| 459 | + const bool found_fake = frame->dict.count(key) == 0 && names.count(key) == 0; | |
| 460 | + QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); | |
| 461 | + if (found_fake) { | |
| 462 | + warn( | |
| 463 | + frame->offset, | |
| 464 | + "expected dictionary key but found non-name object; inserting key " + key); | |
| 465 | + frame->dict[key] = item; | |
| 466 | + break; | |
| 467 | + } | |
| 468 | + } | |
| 404 | 469 | } |
| 405 | - return object; | |
| 406 | 470 | } |
| 407 | 471 | |
| 408 | -void | |
| 409 | -QPDFParser::setDescription(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parsed_offset) | |
| 472 | +bool | |
| 473 | +QPDFParser::tooManyBadTokens() | |
| 410 | 474 | { |
| 411 | - if (obj) { | |
| 412 | - obj->setDescription(context, description, parsed_offset); | |
| 475 | + if (good_count <= 4) { | |
| 476 | + if (++bad_count > 5) { | |
| 477 | + warn("too many errors; giving up on reading object"); | |
| 478 | + return true; | |
| 479 | + } | |
| 480 | + } else { | |
| 481 | + bad_count = 1; | |
| 413 | 482 | } |
| 483 | + good_count = 0; | |
| 484 | + return false; | |
| 414 | 485 | } |
| 415 | 486 | |
| 416 | 487 | void |
| ... | ... | @@ -427,6 +498,15 @@ QPDFParser::warn(QPDFExc const& e) const |
| 427 | 498 | } |
| 428 | 499 | |
| 429 | 500 | void |
| 501 | +QPDFParser::warnDuplicateKey() | |
| 502 | +{ | |
| 503 | + QTC::TC("qpdf", "QPDFParser duplicate dict key"); | |
| 504 | + warn( | |
| 505 | + frame->offset, | |
| 506 | + "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones"); | |
| 507 | +} | |
| 508 | + | |
| 509 | +void | |
| 430 | 510 | QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const |
| 431 | 511 | { |
| 432 | 512 | warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), object_description, offset, msg)); | ... | ... |
libqpdf/qpdf/QPDFParser.hh
| ... | ... | @@ -31,11 +31,44 @@ class QPDFParser |
| 31 | 31 | QPDFObjectHandle parse(bool& empty, bool content_stream); |
| 32 | 32 | |
| 33 | 33 | private: |
| 34 | - enum parser_state_e { st_top, st_start, st_stop, st_eof, st_dictionary, st_array }; | |
| 34 | + // Parser state. Note: | |
| 35 | + // state < st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value) | |
| 36 | + enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array }; | |
| 35 | 37 | |
| 38 | + struct StackFrame | |
| 39 | + { | |
| 40 | + StackFrame(std::shared_ptr<InputSource> const& input, parser_state_e state) : | |
| 41 | + state(state), | |
| 42 | + offset(input->tell()) | |
| 43 | + { | |
| 44 | + } | |
| 45 | + | |
| 46 | + std::vector<std::shared_ptr<QPDFObject>> olist; | |
| 47 | + std::map<std::string, QPDFObjectHandle> dict; | |
| 48 | + parser_state_e state; | |
| 49 | + std::string key; | |
| 50 | + qpdf_offset_t offset; | |
| 51 | + std::string contents_string; | |
| 52 | + qpdf_offset_t contents_offset{-1}; | |
| 53 | + int null_count{0}; | |
| 54 | + }; | |
| 55 | + | |
| 56 | + QPDFObjectHandle parseRemainder(bool content_stream); | |
| 57 | + void add(std::shared_ptr<QPDFObject>&& obj); | |
| 58 | + void addNull(); | |
| 59 | + void addInt(int count); | |
| 60 | + template <typename T, typename... Args> | |
| 61 | + void addScalar(Args&&... args); | |
| 62 | + bool tooManyBadTokens(); | |
| 63 | + void warnDuplicateKey(); | |
| 64 | + void fixMissingKeys(); | |
| 36 | 65 | void warn(qpdf_offset_t offset, std::string const& msg) const; |
| 37 | 66 | void warn(std::string const& msg) const; |
| 38 | 67 | void warn(QPDFExc const&) const; |
| 68 | + template <typename T, typename... Args> | |
| 69 | + // Create a new scalar object complete with parsed offset and description. | |
| 70 | + // NB the offset includes any leading whitespace. | |
| 71 | + QPDFObjectHandle withDescription(Args&&... args); | |
| 39 | 72 | void setDescription(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parsed_offset); |
| 40 | 73 | std::shared_ptr<InputSource> input; |
| 41 | 74 | std::string const& object_description; |
| ... | ... | @@ -43,6 +76,18 @@ class QPDFParser |
| 43 | 76 | QPDFObjectHandle::StringDecrypter* decrypter; |
| 44 | 77 | QPDF* context; |
| 45 | 78 | std::shared_ptr<QPDFValue::Description> description; |
| 79 | + std::vector<StackFrame> stack; | |
| 80 | + StackFrame* frame; | |
| 81 | + // Number of recent bad tokens. | |
| 82 | + int bad_count = 0; | |
| 83 | + // Number of good tokens since last bad token. Irrelevant if bad_count == 0. | |
| 84 | + int good_count = 0; | |
| 85 | + // Start offset including any leading whitespace. | |
| 86 | + qpdf_offset_t start; | |
| 87 | + // Number of successive integer tokens. | |
| 88 | + int int_count = 0; | |
| 89 | + long long int_buffer[2]{0, 0}; | |
| 90 | + qpdf_offset_t last_offset_buffer[2]{0, 0}; | |
| 46 | 91 | }; |
| 47 | 92 | |
| 48 | 93 | #endif // QPDFPARSER_HH | ... | ... |
qpdf/qpdf.testcov
| ... | ... | @@ -57,11 +57,14 @@ QPDF trailer lacks size 0 |
| 57 | 57 | QPDF trailer size not integer 0 |
| 58 | 58 | QPDF trailer prev not integer 0 |
| 59 | 59 | QPDFParser bad brace 0 |
| 60 | +QPDFParser bad brace in parseRemainder 0 | |
| 60 | 61 | QPDFParser bad array close 0 |
| 62 | +QPDFParser bad array close in parseRemainder 0 | |
| 61 | 63 | QPDF stream without length 0 |
| 62 | 64 | QPDF stream length not integer 0 |
| 63 | 65 | QPDF missing endstream 0 |
| 64 | 66 | QPDFParser bad dictionary close 0 |
| 67 | +QPDFParser bad dictionary close in parseRemainder 0 | |
| 65 | 68 | QPDF can't find xref 0 |
| 66 | 69 | QPDFTokenizer bad ) 0 |
| 67 | 70 | QPDFTokenizer bad > 0 |
| ... | ... | @@ -258,6 +261,7 @@ QPDFParser indirect with 0 objid 0 |
| 258 | 261 | QPDF object id 0 0 |
| 259 | 262 | QPDF recursion loop in resolve 0 |
| 260 | 263 | QPDFParser treat word as string 0 |
| 264 | +QPDFParser treat word as string in parseRemainder 0 | |
| 261 | 265 | QPDFParser found fake 1 |
| 262 | 266 | QPDFParser no val for last key 0 |
| 263 | 267 | QPDF resolve failure to null 0 |
| ... | ... | @@ -289,7 +293,9 @@ QPDFObjectHandle coalesce called on stream 0 |
| 289 | 293 | QPDFObjectHandle coalesce provide stream data 0 |
| 290 | 294 | QPDF_Stream bad token at end during normalize 0 |
| 291 | 295 | QPDFParser bad token in parse 0 |
| 296 | +QPDFParser bad token in parseRemainder 0 | |
| 292 | 297 | QPDFParser eof in parse 0 |
| 298 | +QPDFParser eof in parseRemainder 0 | |
| 293 | 299 | QPDFObjectHandle array bounds 0 |
| 294 | 300 | QPDFObjectHandle boolean returning false 0 |
| 295 | 301 | QPDFObjectHandle integer returning 0 0 | ... | ... |
qpdf/qtest/parsing.test
| ... | ... | @@ -17,7 +17,7 @@ my $td = new TestDriver('parsing'); |
| 17 | 17 | my $n_tests = 17; |
| 18 | 18 | |
| 19 | 19 | $td->runtest("parse objects from string", |
| 20 | - {$td->COMMAND => "test_driver 31 good1.qdf"}, | |
| 20 | + {$td->COMMAND => "test_driver 31 bad39.qdf"}, | |
| 21 | 21 | {$td->FILE => "parse-object.out", $td->EXIT_STATUS => 0}, |
| 22 | 22 | $td->NORMALIZE_NEWLINES); |
| 23 | 23 | $td->runtest("EOF terminating literal tokens", | ... | ... |
qpdf/qtest/qpdf/bad16-recover.out
| 1 | 1 | WARNING: bad16.pdf (trailer, offset 753): unexpected dictionary close token |
| 2 | 2 | WARNING: bad16.pdf (trailer, offset 756): unexpected dictionary close token |
| 3 | 3 | WARNING: bad16.pdf (trailer, offset 759): unknown token while reading object; treating as string |
| 4 | -WARNING: bad16.pdf (trailer, offset 779): unexpected EOF | |
| 5 | 4 | WARNING: bad16.pdf (trailer, offset 779): parse error while reading object |
| 5 | +WARNING: bad16.pdf (trailer, offset 779): unexpected EOF | |
| 6 | 6 | WARNING: bad16.pdf: file is damaged |
| 7 | 7 | WARNING: bad16.pdf (offset 712): expected trailer dictionary |
| 8 | 8 | WARNING: bad16.pdf: Attempting to reconstruct cross-reference table |
| 9 | 9 | WARNING: bad16.pdf (trailer, offset 753): unexpected dictionary close token |
| 10 | 10 | WARNING: bad16.pdf (trailer, offset 756): unexpected dictionary close token |
| 11 | 11 | WARNING: bad16.pdf (trailer, offset 759): unknown token while reading object; treating as string |
| 12 | -WARNING: bad16.pdf (trailer, offset 779): unexpected EOF | |
| 13 | 12 | WARNING: bad16.pdf (trailer, offset 779): parse error while reading object |
| 13 | +WARNING: bad16.pdf (trailer, offset 779): unexpected EOF | |
| 14 | 14 | bad16.pdf: unable to find trailer dictionary while recovering damaged file | ... | ... |
qpdf/qtest/qpdf/bad16.out
| 1 | 1 | WARNING: bad16.pdf (trailer, offset 753): unexpected dictionary close token |
| 2 | 2 | WARNING: bad16.pdf (trailer, offset 756): unexpected dictionary close token |
| 3 | 3 | WARNING: bad16.pdf (trailer, offset 759): unknown token while reading object; treating as string |
| 4 | -WARNING: bad16.pdf (trailer, offset 779): unexpected EOF | |
| 5 | 4 | WARNING: bad16.pdf (trailer, offset 779): parse error while reading object |
| 5 | +WARNING: bad16.pdf (trailer, offset 779): unexpected EOF | |
| 6 | 6 | bad16.pdf (offset 712): expected trailer dictionary | ... | ... |
qpdf/qtest/qpdf/bad36-recover.out
| 1 | 1 | WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string |
| 2 | -WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2 | |
| 3 | 2 | WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key |
| 3 | +WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2 | |
| 4 | 4 | /QTest is implicit |
| 5 | 5 | /QTest is direct and has type null (2) |
| 6 | 6 | /QTest is null | ... | ... |
qpdf/qtest/qpdf/bad36.out
| 1 | 1 | WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string |
| 2 | -WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2 | |
| 3 | 2 | WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key |
| 3 | +WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2 | |
| 4 | 4 | /QTest is implicit |
| 5 | 5 | /QTest is direct and has type null (2) |
| 6 | 6 | /QTest is null | ... | ... |
qpdf/qtest/qpdf/bad39.qdf
0 โ 100644
| 1 | +%PDF-1.3 | |
| 2 | +%ยฟรทยขรพ | |
| 3 | +%QDF-1.0 | |
| 4 | + | |
| 5 | +%% Original object ID: 1 0 | |
| 6 | +1 0 obj | |
| 7 | +<< | |
| 8 | + /Pages 2 0 R | |
| 9 | + /Type /Catalog | |
| 10 | +>> | |
| 11 | +endobj | |
| 12 | + | |
| 13 | +%% Original object ID: 2 0 | |
| 14 | +2 0 obj | |
| 15 | +<< | |
| 16 | + /Count 1 | |
| 17 | + /Kids [ | |
| 18 | + 3 0 R | |
| 19 | + ] | |
| 20 | + /Type /Pages | |
| 21 | +>> | |
| 22 | +endobj | |
| 23 | + | |
| 24 | +%% Page 1 | |
| 25 | +%% Original object ID: 3 0 | |
| 26 | +3 0 obj | |
| 27 | +<< | |
| 28 | + /Contents 4 0 R | |
| 29 | + /MediaBox [ | |
| 30 | + 0 | |
| 31 | + 0 | |
| 32 | + 612 | |
| 33 | + 792 | |
| 34 | + ] | |
| 35 | + /Parent 2 0 R | |
| 36 | + /Resources << | |
| 37 | + /Font << | |
| 38 | + /F1 6 0 R | |
| 39 | + >> | |
| 40 | + /ProcSet 7 0 R | |
| 41 | + >> | |
| 42 | + /Type /Page | |
| 43 | +>> | |
| 44 | +endobj | |
| 45 | + | |
| 46 | +%% Contents for page 1 | |
| 47 | +%% Original object ID: 4 0 | |
| 48 | +4 0 obj | |
| 49 | +<< | |
| 50 | + /Length 5 0 R | |
| 51 | +>> | |
| 52 | +stream | |
| 53 | +BT | |
| 54 | + /F1 24 Tf | |
| 55 | + 72 720 Td | |
| 56 | + (Potato) Tj | |
| 57 | +ET | |
| 58 | +endstream | |
| 59 | +endobj | |
| 60 | + | |
| 61 | +5 0 obj | |
| 62 | +44 | |
| 63 | +endobj | |
| 64 | + | |
| 65 | +%% Original object ID: 6 0 | |
| 66 | +6 0 obj | |
| 67 | +<< | |
| 68 | + /BaseFont /Helvetica | |
| 69 | + /Encoding /WinAnsiEncoding | |
| 70 | + /Name /F1 | |
| 71 | + /Subtype /Type1 | |
| 72 | + /Type /Font | |
| 73 | +>> | |
| 74 | +endobj | |
| 75 | + | |
| 76 | +%% Original object ID: 5 0 | |
| 77 | +7 0 obj | |
| 78 | +[ | |
| 79 | ||
| 80 | + /Text | |
| 81 | +] | |
| 82 | +endobj | |
| 83 | + | |
| 84 | +xref | |
| 85 | +0 8 | |
| 86 | +0000000000 65535 f | |
| 87 | +0000000052 00000 n | |
| 88 | +0000000133 00000 n | |
| 89 | +0000000242 00000 n | |
| 90 | +0000000484 00000 n | |
| 91 | +0000000583 00000 n | |
| 92 | +0000000629 00000 n | |
| 93 | +0000001113 00000 n | |
| 94 | +trailer << | |
| 95 | + /Root 1 0 R | |
| 96 | + /Size 8 | |
| 97 | + /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] | |
| 98 | +>> | |
| 99 | +startxref | |
| 100 | +809 | |
| 101 | +%%EOF | |
| 102 | +7 0 obj | ... | ... |
qpdf/qtest/qpdf/issue-335a.out
| ... | ... | @@ -51,6 +51,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) |
| 51 | 51 | WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) |
| 52 | 52 | WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 |
| 53 | 53 | WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) |
| 54 | +WARNING: issue-335a.pdf (trailer, offset 134): dictionary has duplicated key /L | |
| 54 | 55 | WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) |
| 55 | 56 | WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) |
| 56 | 57 | WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 |
| ... | ... | @@ -74,6 +75,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) |
| 74 | 75 | WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) |
| 75 | 76 | WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 |
| 76 | 77 | WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) |
| 78 | +WARNING: issue-335a.pdf (trailer, offset 164): dictionary has duplicated key /L | |
| 77 | 79 | WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) |
| 78 | 80 | WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) |
| 79 | 81 | WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 |
| ... | ... | @@ -97,6 +99,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) |
| 97 | 99 | WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) |
| 98 | 100 | WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 |
| 99 | 101 | WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) |
| 102 | +WARNING: issue-335a.pdf (trailer, offset 231): dictionary has duplicated key /L | |
| 100 | 103 | WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) |
| 101 | 104 | WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) |
| 102 | 105 | WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 |
| ... | ... | @@ -448,6 +451,7 @@ WARNING: issue-335a.pdf (trailer, offset 1168): unexpected ) |
| 448 | 451 | WARNING: issue-335a.pdf (trailer, offset 1328): unexpected ) |
| 449 | 452 | WARNING: issue-335a.pdf (trailer, offset 1329): name with stray # will not work with PDF >= 1.2 |
| 450 | 453 | WARNING: issue-335a.pdf (trailer, offset 1332): unexpected ) |
| 454 | +WARNING: issue-335a.pdf (trailer, offset 1033): dictionary has duplicated key /L | |
| 451 | 455 | WARNING: issue-335a.pdf (trailer, offset 1333): unexpected ) |
| 452 | 456 | WARNING: issue-335a.pdf (trailer, offset 1344): unexpected ) |
| 453 | 457 | WARNING: issue-335a.pdf (trailer, offset 1428): unexpected ) | ... | ... |
qpdf/qtest/qpdf/parse-object.out
| 1 | 1 | [ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ] |
| 2 | -logic error parsing indirect: QPDFObjectHandle::parse called without context on an object with indirect references | |
| 2 | +logic error parsing indirect: QPDFParser::parse called without context on an object with indirect references | |
| 3 | 3 | trailing data: parsed object (trailing test): trailing data found parsing object from string |
| 4 | 4 | WARNING: parsed object (offset 9): unknown token while reading object; treating as string |
| 5 | +WARNING: parsed object: treating unexpected brace token as null | |
| 6 | +WARNING: parsed object: treating unexpected brace token as null | |
| 7 | +WARNING: parsed object: unexpected dictionary close token | |
| 8 | +WARNING: bad39.qdf (object 7 0, offset 1121): unexpected EOF | |
| 9 | +WARNING: bad39.qdf (object 7 0, offset 1121): expected endobj | |
| 10 | +WARNING: bad39.qdf (object 7 0, offset 1121): EOF after endobj | |
| 5 | 11 | test 31 done | ... | ... |
qpdf/test_driver.cc
| ... | ... | @@ -1195,6 +1195,13 @@ test_31(QPDF& pdf, char const* arg2) |
| 1195 | 1195 | // mistakenly parsed as an indirect object. |
| 1196 | 1196 | assert(QPDFObjectHandle::parse(&pdf, "[5 0 R 0 R /X]").unparse() == "[ 5 0 R 0 (R) /X ]"); |
| 1197 | 1197 | assert(QPDFObjectHandle::parse(&pdf, "[1 0 R]", "indirect test").unparse() == "[ 1 0 R ]"); |
| 1198 | + // TC:QPDFParser bad brace | |
| 1199 | + assert(QPDFObjectHandle::parse(&pdf, "}").unparse() == "null"); | |
| 1200 | + assert(QPDFObjectHandle::parse(&pdf, "{").unparse() == "null"); | |
| 1201 | + // TC:QPDFParser bad dictionary close | |
| 1202 | + assert(QPDFObjectHandle::parse(&pdf, ">>").unparse() == "null"); | |
| 1203 | + // TC:QPDFParser eof in parse | |
| 1204 | + assert(QPDFObjectHandle::parse(&pdf, "[7 0 R]").getArrayItem(0).isNull()); | |
| 1198 | 1205 | } |
| 1199 | 1206 | |
| 1200 | 1207 | static void | ... | ... |