Commit 6bbea4baa0c06b39b1b71f1aa6fc276789296556
1 parent
f3e267fc
Implement QPDFObjectHandle::parse
Move object parsing code from QPDF to QPDFObjectHandle and parameterize the parts of it that are specific to a QPDF object. Provide a version that can't handle indirect objects and that can be called on an arbitrary string. A side effect of this change is that the offset used when reporting invalid stream length has changed, but since the new value seems like a better value than the old one, the test suite has been updated rather than making the code backward compatible. This only effects the offset reported for invalid streams that lack /Length or have an invalid /Length key. Updated some test code and exmaples to use QPDFObjectHandle::parse. Supporting changes include adding a BufferInputSource constructor that takes a string.
Showing
18 changed files
with
618 additions
and
386 deletions
ChangeLog
| 1 | +2012-07-21 Jay Berkenbilt <ejb@ql.org> | |
| 2 | + | |
| 3 | + * Add new method QPDFObjectHandle::replaceDict to replace a | |
| 4 | + stream's dictionary. Use with caution; see comments in | |
| 5 | + QPDFObjectHandle.hh. | |
| 6 | + | |
| 7 | + * Add new method QPDFObjectHandle::parse for creation of | |
| 8 | + QPDFObjectHandle objects from string representations of the | |
| 9 | + objects. Thanks to Tobias Hoffmann for the idea. | |
| 10 | + | |
| 1 | 11 | 2012-07-15 Jay Berkenbilt <ejb@ql.org> |
| 2 | 12 | |
| 3 | 13 | * add new QPDF::isEncrypted method that returns some additional | ... | ... |
TODO
| ... | ... | @@ -20,16 +20,14 @@ Next |
| 20 | 20 | * Make sure that the release notes call attention to the one API |
| 21 | 21 | breaking change: removal of length from replaceStreamData. |
| 22 | 22 | |
| 23 | - * Add a way to create new QPDFObjectHandles with a string | |
| 24 | - representation of them, such as | |
| 25 | - QPDFObjectHandle::parse("<< /a 1 /b 2 >>"); | |
| 26 | - | |
| 27 | 23 | * Document thread safety: One individual QPDF or QPDFWriter object |
| 28 | 24 | can only be used by one thread at a time, but multiple threads can |
| 29 | 25 | simultaneously use separate objects. |
| 30 | 26 | |
| 31 | 27 | * Write some documentation about the design of copyForeignObject. |
| 32 | 28 | |
| 29 | + * Mention QPDFObjectHandle::parse in the documentation. | |
| 30 | + | |
| 33 | 31 | * copyForeignObject still to do: |
| 34 | 32 | |
| 35 | 33 | - qpdf command | ... | ... |
examples/pdf-create.cc
| ... | ... | @@ -81,24 +81,28 @@ static void create_pdf(char const* filename) |
| 81 | 81 | // Add an indirect object to contain a font descriptor for the |
| 82 | 82 | // built-in Helvetica font. |
| 83 | 83 | QPDFObjectHandle font = pdf.makeIndirectObject( |
| 84 | - QPDFObjectHandle::newDictionary()); | |
| 85 | - font.replaceKey("/Type", newName("/Font")); | |
| 86 | - font.replaceKey("/Subtype", newName("/Type1")); | |
| 87 | - font.replaceKey("/Name", newName("/F1")); | |
| 88 | - font.replaceKey("/BaseFont", newName("/Helvetica")); | |
| 89 | - font.replaceKey("/Encoding", newName("/WinAnsiEncoding")); | |
| 84 | + QPDFObjectHandle::parse( | |
| 85 | + "<<" | |
| 86 | + " /Type /Font" | |
| 87 | + " /Subtype /Type1" | |
| 88 | + " /Name /F1" | |
| 89 | + " /BaseFont /Helvetica" | |
| 90 | + " /Encoding /WinAnsiEncoding" | |
| 91 | + ">>")); | |
| 90 | 92 | |
| 91 | 93 | // Create a stream to encode our image. We don't have to set the |
| 92 | 94 | // length or filters. QPDFWriter will fill in the length and |
| 93 | 95 | // compress the stream data using FlateDecode by default. |
| 94 | 96 | QPDFObjectHandle image = QPDFObjectHandle::newStream(&pdf); |
| 95 | - QPDFObjectHandle image_dict = image.getDict(); | |
| 96 | - image_dict.replaceKey("/Type", newName("/XObject")); | |
| 97 | - image_dict.replaceKey("/Subtype", newName("/Image")); | |
| 98 | - image_dict.replaceKey("/ColorSpace", newName("/DeviceRGB")); | |
| 99 | - image_dict.replaceKey("/BitsPerComponent", newInteger(8)); | |
| 100 | - image_dict.replaceKey("/Width", newInteger(100)); | |
| 101 | - image_dict.replaceKey("/Height", newInteger(100)); | |
| 97 | + image.replaceDict(QPDFObjectHandle::parse( | |
| 98 | + "<<" | |
| 99 | + " /Type /XObject" | |
| 100 | + " /Subtype /Image" | |
| 101 | + " /ColorSpace /DeviceRGB" | |
| 102 | + " /BitsPerComponent 8" | |
| 103 | + " /Width 100" | |
| 104 | + " /Height 100" | |
| 105 | + ">>")); | |
| 102 | 106 | // Provide the stream data. |
| 103 | 107 | ImageProvider* p = new ImageProvider(100, 100); |
| 104 | 108 | PointerHolder<QPDFObjectHandle::StreamDataProvider> provider(p); |
| ... | ... | @@ -107,10 +111,8 @@ static void create_pdf(char const* filename) |
| 107 | 111 | QPDFObjectHandle::newNull()); |
| 108 | 112 | |
| 109 | 113 | // Create direct objects as needed by the page dictionary. |
| 110 | - QPDFObjectHandle procset = QPDFObjectHandle::newArray(); | |
| 111 | - procset.appendItem(newName("/PDF")); | |
| 112 | - procset.appendItem(newName("/Text")); | |
| 113 | - procset.appendItem(newName("/ImageC")); | |
| 114 | + QPDFObjectHandle procset = QPDFObjectHandle::parse( | |
| 115 | + "[/PDF /Text /ImageC]"); | |
| 114 | 116 | |
| 115 | 117 | QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary(); |
| 116 | 118 | rfont.replaceKey("/F1", font); | ... | ... |
include/qpdf/BufferInputSource.hh
| ... | ... | @@ -9,6 +9,8 @@ class BufferInputSource: public InputSource |
| 9 | 9 | public: |
| 10 | 10 | BufferInputSource(std::string const& description, Buffer* buf, |
| 11 | 11 | bool own_memory = false); |
| 12 | + BufferInputSource(std::string const& description, | |
| 13 | + std::string const& contents); | |
| 12 | 14 | virtual ~BufferInputSource(); |
| 13 | 15 | virtual qpdf_offset_t findAndSkipNextEOL(); |
| 14 | 16 | virtual std::string const& getName() const; | ... | ... |
include/qpdf/QPDF.hh
| ... | ... | @@ -531,6 +531,23 @@ class QPDF |
| 531 | 531 | std::map<ObjGen, QPDFObjectHandle> foreign_streams; |
| 532 | 532 | }; |
| 533 | 533 | |
| 534 | + class StringDecrypter: public QPDFObjectHandle::StringDecrypter | |
| 535 | + { | |
| 536 | + friend class QPDF; | |
| 537 | + | |
| 538 | + public: | |
| 539 | + StringDecrypter(QPDF* qpdf, int objid, int gen); | |
| 540 | + virtual ~StringDecrypter() | |
| 541 | + { | |
| 542 | + } | |
| 543 | + virtual void decryptString(std::string& val); | |
| 544 | + | |
| 545 | + private: | |
| 546 | + QPDF* qpdf; | |
| 547 | + int objid; | |
| 548 | + int gen; | |
| 549 | + }; | |
| 550 | + | |
| 534 | 551 | void parse(char const* password); |
| 535 | 552 | void warn(QPDFExc const& e); |
| 536 | 553 | void setTrailer(QPDFObjectHandle obj); |
| ... | ... | @@ -547,10 +564,6 @@ class QPDF |
| 547 | 564 | QPDFObjectHandle readObject( |
| 548 | 565 | PointerHolder<InputSource>, std::string const& description, |
| 549 | 566 | int objid, int generation, bool in_object_stream); |
| 550 | - QPDFObjectHandle readObjectInternal( | |
| 551 | - PointerHolder<InputSource> input, int objid, int generation, | |
| 552 | - bool in_object_stream, | |
| 553 | - bool in_array, bool in_dictionary); | |
| 554 | 567 | size_t recoverStreamLength( |
| 555 | 568 | PointerHolder<InputSource> input, int objid, int generation, |
| 556 | 569 | qpdf_offset_t stream_offset); | ... | ... |
include/qpdf/QPDFObjectHandle.hh
| ... | ... | @@ -18,6 +18,7 @@ |
| 18 | 18 | |
| 19 | 19 | #include <qpdf/PointerHolder.hh> |
| 20 | 20 | #include <qpdf/Buffer.hh> |
| 21 | +#include <qpdf/InputSource.hh> | |
| 21 | 22 | |
| 22 | 23 | #include <qpdf/QPDFObject.hh> |
| 23 | 24 | |
| ... | ... | @@ -25,6 +26,7 @@ class Pipeline; |
| 25 | 26 | class QPDF; |
| 26 | 27 | class QPDF_Dictionary; |
| 27 | 28 | class QPDF_Array; |
| 29 | +class QPDFTokenizer; | |
| 28 | 30 | |
| 29 | 31 | class QPDFObjectHandle |
| 30 | 32 | { |
| ... | ... | @@ -57,6 +59,18 @@ class QPDFObjectHandle |
| 57 | 59 | Pipeline* pipeline) = 0; |
| 58 | 60 | }; |
| 59 | 61 | |
| 62 | + // This class is used by parse to decrypt strings when reading an | |
| 63 | + // object that contains encrypted strings. | |
| 64 | + class StringDecrypter | |
| 65 | + { | |
| 66 | + public: | |
| 67 | + QPDF_DLL | |
| 68 | + virtual ~StringDecrypter() | |
| 69 | + { | |
| 70 | + } | |
| 71 | + virtual void decryptString(std::string& val) = 0; | |
| 72 | + }; | |
| 73 | + | |
| 60 | 74 | QPDF_DLL |
| 61 | 75 | QPDFObjectHandle(); |
| 62 | 76 | QPDF_DLL |
| ... | ... | @@ -95,6 +109,30 @@ class QPDFObjectHandle |
| 95 | 109 | |
| 96 | 110 | // Public factory methods |
| 97 | 111 | |
| 112 | + // Construct an object of any type from a string representation of | |
| 113 | + // the object. Throws QPDFExc with an empty filename and an | |
| 114 | + // offset into the string if there is an error. Any indirect | |
| 115 | + // object syntax (obj gen R) will cause a logic_error exception to | |
| 116 | + // be thrown. If object_description is provided, it will appear | |
| 117 | + // in the message of any QPDFExc exception thrown for invalid | |
| 118 | + // syntax. | |
| 119 | + QPDF_DLL | |
| 120 | + static QPDFObjectHandle parse(std::string const& object_str, | |
| 121 | + std::string const& object_description = ""); | |
| 122 | + | |
| 123 | + // Construct an object as above by reading from the given | |
| 124 | + // InputSource at its current position and using the tokenizer you | |
| 125 | + // supply. Indirect objects and encrypted strings are permitted. | |
| 126 | + // This method is intended to be called by QPDF for parsing | |
| 127 | + // objects that are ready from the object's input stream. | |
| 128 | + QPDF_DLL | |
| 129 | + static QPDFObjectHandle parse(PointerHolder<InputSource> input, | |
| 130 | + std::string const& object_description, | |
| 131 | + QPDFTokenizer&, bool& empty, | |
| 132 | + StringDecrypter* decrypter, | |
| 133 | + QPDF* context); | |
| 134 | + | |
| 135 | + // Type-specific factories | |
| 98 | 136 | QPDF_DLL |
| 99 | 137 | static QPDFObjectHandle newNull(); |
| 100 | 138 | QPDF_DLL |
| ... | ... | @@ -124,7 +162,8 @@ class QPDFObjectHandle |
| 124 | 162 | // object. A subsequent call must be made to replaceStreamData() |
| 125 | 163 | // to provide data for the stream. The stream's dictionary may be |
| 126 | 164 | // retrieved by calling getDict(), and the resulting dictionary |
| 127 | - // may be modified. | |
| 165 | + // may be modified. Alternatively, you can create a new | |
| 166 | + // dictionary and call replaceDict to install it. | |
| 128 | 167 | QPDF_DLL |
| 129 | 168 | static QPDFObjectHandle newStream(QPDF* qpdf); |
| 130 | 169 | |
| ... | ... | @@ -303,6 +342,15 @@ class QPDFObjectHandle |
| 303 | 342 | bool pipeStreamData(Pipeline*, bool filter, |
| 304 | 343 | bool normalize, bool compress); |
| 305 | 344 | |
| 345 | + // Replace a stream's dictionary. The new dictionary must be | |
| 346 | + // consistent with the stream's data. This is most appropriately | |
| 347 | + // used when creating streams from scratch that will use a stream | |
| 348 | + // data provider and therefore start with an empty dictionary. It | |
| 349 | + // may be more convenient in this case than calling getDict and | |
| 350 | + // modifying it for each key. The pdf-create example does this. | |
| 351 | + QPDF_DLL | |
| 352 | + void replaceDict(QPDFObjectHandle); | |
| 353 | + | |
| 306 | 354 | // Replace this stream's stream data with the given data buffer, |
| 307 | 355 | // and replace the /Filter and /DecodeParms keys in the stream |
| 308 | 356 | // dictionary with the given values. (If either value is empty, |
| ... | ... | @@ -489,6 +537,12 @@ class QPDFObjectHandle |
| 489 | 537 | void dereference(); |
| 490 | 538 | void makeDirectInternal(std::set<int>& visited); |
| 491 | 539 | void releaseResolved(); |
| 540 | + static QPDFObjectHandle parseInternal( | |
| 541 | + PointerHolder<InputSource> input, | |
| 542 | + std::string const& object_description, | |
| 543 | + QPDFTokenizer& tokenizer, bool& empty, | |
| 544 | + StringDecrypter* decrypter, QPDF* context, | |
| 545 | + bool in_array, bool in_dictionary); | |
| 492 | 546 | |
| 493 | 547 | bool initialized; |
| 494 | 548 | ... | ... |
libqpdf/BufferInputSource.cc
| ... | ... | @@ -11,6 +11,18 @@ BufferInputSource::BufferInputSource(std::string const& description, |
| 11 | 11 | { |
| 12 | 12 | } |
| 13 | 13 | |
| 14 | +BufferInputSource::BufferInputSource(std::string const& description, | |
| 15 | + std::string const& contents) : | |
| 16 | + own_memory(true), | |
| 17 | + description(description), | |
| 18 | + buf(0), | |
| 19 | + cur_offset(0) | |
| 20 | +{ | |
| 21 | + this->buf = new Buffer(contents.length()); | |
| 22 | + unsigned char* bp = buf->getBuffer(); | |
| 23 | + memcpy(bp, (char*)contents.c_str(), contents.length()); | |
| 24 | +} | |
| 25 | + | |
| 14 | 26 | BufferInputSource::~BufferInputSource() |
| 15 | 27 | { |
| 16 | 28 | if (own_memory) | ... | ... |
libqpdf/QPDF.cc
| ... | ... | @@ -68,6 +68,18 @@ QPDF::CopiedStreamDataProvider::registerForeignStream( |
| 68 | 68 | this->foreign_streams[local_og] = foreign_stream; |
| 69 | 69 | } |
| 70 | 70 | |
| 71 | +QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, int objid, int gen) : | |
| 72 | + qpdf(qpdf), | |
| 73 | + objid(objid), | |
| 74 | + gen(gen) | |
| 75 | +{ | |
| 76 | +} | |
| 77 | + | |
| 78 | +void | |
| 79 | +QPDF::StringDecrypter::decryptString(std::string& val) | |
| 80 | +{ | |
| 81 | + qpdf->decryptString(val, objid, gen); | |
| 82 | +} | |
| 71 | 83 | |
| 72 | 84 | std::string const& |
| 73 | 85 | QPDF::QPDFVersion() |
| ... | ... | @@ -940,358 +952,164 @@ QPDF::readObject(PointerHolder<InputSource> input, |
| 940 | 952 | { |
| 941 | 953 | setLastObjectDescription(description, objid, generation); |
| 942 | 954 | qpdf_offset_t offset = input->tell(); |
| 943 | - QPDFObjectHandle object = readObjectInternal( | |
| 944 | - input, objid, generation, in_object_stream, false, false); | |
| 945 | - // Override last_offset so that it points to the beginning of the | |
| 946 | - // object we just read | |
| 947 | - input->setLastOffset(offset); | |
| 948 | - return object; | |
| 949 | -} | |
| 950 | - | |
| 951 | -QPDFObjectHandle | |
| 952 | -QPDF::readObjectInternal(PointerHolder<InputSource> input, | |
| 953 | - int objid, int generation, | |
| 954 | - bool in_object_stream, | |
| 955 | - bool in_array, bool in_dictionary) | |
| 956 | -{ | |
| 957 | - if (in_dictionary && in_array) | |
| 958 | - { | |
| 959 | - // Although dictionaries and arrays arbitrarily nest, these | |
| 960 | - // variables indicate what is at the top of the stack right | |
| 961 | - // now, so they can, by definition, never both be true. | |
| 962 | - throw std::logic_error( | |
| 963 | - "INTERNAL ERROR: readObjectInternal: in_dict && in_array"); | |
| 964 | - } | |
| 965 | - | |
| 966 | - QPDFObjectHandle object; | |
| 967 | 955 | |
| 968 | - qpdf_offset_t offset = input->tell(); | |
| 969 | - std::vector<QPDFObjectHandle> olist; | |
| 970 | - bool done = false; | |
| 971 | - while (! done) | |
| 956 | + bool empty = false; | |
| 957 | + PointerHolder<StringDecrypter> decrypter_ph; | |
| 958 | + StringDecrypter* decrypter = 0; | |
| 959 | + if (this->encrypted && (! in_object_stream)) | |
| 972 | 960 | { |
| 973 | - object = QPDFObjectHandle(); | |
| 974 | - | |
| 975 | - QPDFTokenizer::Token token = readToken(input); | |
| 976 | - | |
| 977 | - switch (token.getType()) | |
| 978 | - { | |
| 979 | - case QPDFTokenizer::tt_brace_open: | |
| 980 | - case QPDFTokenizer::tt_brace_close: | |
| 981 | - // Don't know what to do with these for now | |
| 982 | - QTC::TC("qpdf", "QPDF bad brace"); | |
| 983 | - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 984 | - this->last_object_description, | |
| 985 | - input->getLastOffset(), | |
| 986 | - "unexpected brace token"); | |
| 987 | - break; | |
| 988 | - | |
| 989 | - case QPDFTokenizer::tt_array_close: | |
| 990 | - if (in_array) | |
| 991 | - { | |
| 992 | - done = true; | |
| 993 | - } | |
| 994 | - else | |
| 995 | - { | |
| 996 | - QTC::TC("qpdf", "QPDF bad array close"); | |
| 997 | - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 998 | - this->last_object_description, | |
| 999 | - input->getLastOffset(), | |
| 1000 | - "unexpected array close token"); | |
| 1001 | - } | |
| 1002 | - break; | |
| 1003 | - | |
| 1004 | - case QPDFTokenizer::tt_dict_close: | |
| 1005 | - if (in_dictionary) | |
| 1006 | - { | |
| 1007 | - done = true; | |
| 1008 | - } | |
| 1009 | - else | |
| 1010 | - { | |
| 1011 | - QTC::TC("qpdf", "QPDF bad dictionary close"); | |
| 1012 | - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 1013 | - this->last_object_description, | |
| 1014 | - input->getLastOffset(), | |
| 1015 | - "unexpected dictionary close token"); | |
| 1016 | - } | |
| 1017 | - break; | |
| 1018 | - | |
| 1019 | - case QPDFTokenizer::tt_array_open: | |
| 1020 | - object = readObjectInternal( | |
| 1021 | - input, objid, generation, in_object_stream, true, false); | |
| 1022 | - break; | |
| 1023 | - | |
| 1024 | - case QPDFTokenizer::tt_dict_open: | |
| 1025 | - object = readObjectInternal( | |
| 1026 | - input, objid, generation, in_object_stream, false, true); | |
| 1027 | - break; | |
| 1028 | - | |
| 1029 | - case QPDFTokenizer::tt_bool: | |
| 1030 | - object = QPDFObjectHandle::newBool( | |
| 1031 | - (token.getValue() == "true")); | |
| 1032 | - break; | |
| 1033 | - | |
| 1034 | - case QPDFTokenizer::tt_null: | |
| 1035 | - object = QPDFObjectHandle::newNull(); | |
| 1036 | - break; | |
| 1037 | - | |
| 1038 | - case QPDFTokenizer::tt_integer: | |
| 1039 | - object = QPDFObjectHandle::newInteger( | |
| 1040 | - QUtil::string_to_ll(token.getValue().c_str())); | |
| 1041 | - break; | |
| 1042 | - | |
| 1043 | - case QPDFTokenizer::tt_real: | |
| 1044 | - object = QPDFObjectHandle::newReal(token.getValue()); | |
| 1045 | - break; | |
| 1046 | - | |
| 1047 | - case QPDFTokenizer::tt_name: | |
| 1048 | - object = QPDFObjectHandle::newName(token.getValue()); | |
| 1049 | - break; | |
| 1050 | - | |
| 1051 | - case QPDFTokenizer::tt_word: | |
| 1052 | - { | |
| 1053 | - std::string const& value = token.getValue(); | |
| 1054 | - if ((value == "R") && (in_array || in_dictionary) && | |
| 1055 | - (olist.size() >= 2) && | |
| 1056 | - (olist[olist.size() - 1].isInteger()) && | |
| 1057 | - (olist[olist.size() - 2].isInteger())) | |
| 1058 | - { | |
| 1059 | - // Try to resolve indirect objects | |
| 1060 | - object = QPDFObjectHandle::Factory::newIndirect( | |
| 1061 | - this, | |
| 1062 | - olist[olist.size() - 2].getIntValue(), | |
| 1063 | - olist[olist.size() - 1].getIntValue()); | |
| 1064 | - olist.pop_back(); | |
| 1065 | - olist.pop_back(); | |
| 1066 | - } | |
| 1067 | - else if ((value == "endobj") && | |
| 1068 | - (! (in_array || in_dictionary))) | |
| 1069 | - { | |
| 1070 | - // Nothing in the PDF spec appears to allow empty | |
| 1071 | - // objects, but they have been encountered in | |
| 1072 | - // actual PDF files and Adobe Reader appears to | |
| 1073 | - // ignore them. | |
| 1074 | - warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 1075 | - this->last_object_description, | |
| 1076 | - input->getLastOffset(), | |
| 1077 | - "empty object treated as null")); | |
| 1078 | - object = QPDFObjectHandle::newNull(); | |
| 1079 | - input->seek(input->getLastOffset(), SEEK_SET); | |
| 1080 | - } | |
| 1081 | - else | |
| 1082 | - { | |
| 1083 | - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 1084 | - this->last_object_description, | |
| 1085 | - input->getLastOffset(), | |
| 1086 | - "unknown token while reading object (" + | |
| 1087 | - value + ")"); | |
| 1088 | - } | |
| 1089 | - } | |
| 1090 | - break; | |
| 1091 | - | |
| 1092 | - case QPDFTokenizer::tt_string: | |
| 1093 | - { | |
| 1094 | - std::string val = token.getValue(); | |
| 1095 | - if (this->encrypted && (! in_object_stream)) | |
| 1096 | - { | |
| 1097 | - decryptString(val, objid, generation); | |
| 1098 | - } | |
| 1099 | - object = QPDFObjectHandle::newString(val); | |
| 1100 | - } | |
| 1101 | - break; | |
| 1102 | - | |
| 1103 | - default: | |
| 1104 | - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 1105 | - this->last_object_description, | |
| 1106 | - input->getLastOffset(), | |
| 1107 | - "unknown token type while reading object"); | |
| 1108 | - break; | |
| 1109 | - } | |
| 1110 | - | |
| 1111 | - if (in_dictionary || in_array) | |
| 1112 | - { | |
| 1113 | - if (! done) | |
| 1114 | - { | |
| 1115 | - olist.push_back(object); | |
| 1116 | - } | |
| 1117 | - } | |
| 1118 | - else if (! object.isInitialized()) | |
| 1119 | - { | |
| 1120 | - throw std::logic_error( | |
| 1121 | - "INTERNAL ERROR: uninitialized object (token = " + | |
| 1122 | - QUtil::int_to_string(token.getType()) + | |
| 1123 | - ", " + token.getValue() + ")"); | |
| 1124 | - } | |
| 1125 | - else | |
| 1126 | - { | |
| 1127 | - done = true; | |
| 1128 | - } | |
| 961 | + decrypter_ph = new StringDecrypter(this, objid, generation); | |
| 962 | + decrypter = decrypter_ph.getPointer(); | |
| 1129 | 963 | } |
| 1130 | - | |
| 1131 | - if (in_array) | |
| 964 | + QPDFObjectHandle object = QPDFObjectHandle::parse( | |
| 965 | + input, description, this->tokenizer, empty, decrypter, this); | |
| 966 | + if (empty) | |
| 1132 | 967 | { |
| 1133 | - object = QPDFObjectHandle::newArray(olist); | |
| 968 | + // Nothing in the PDF spec appears to allow empty objects, but | |
| 969 | + // they have been encountered in actual PDF files and Adobe | |
| 970 | + // Reader appears to ignore them. | |
| 971 | + warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 972 | + this->last_object_description, | |
| 973 | + input->getLastOffset(), | |
| 974 | + "empty object treated as null")); | |
| 1134 | 975 | } |
| 1135 | - else if (in_dictionary) | |
| 976 | + else if (object.isDictionary() && (! in_object_stream)) | |
| 1136 | 977 | { |
| 1137 | - // Convert list to map. Alternating elements are keys. | |
| 1138 | - std::map<std::string, QPDFObjectHandle> dict; | |
| 1139 | - if (olist.size() % 2) | |
| 1140 | - { | |
| 1141 | - QTC::TC("qpdf", "QPDF dictionary odd number of elements"); | |
| 1142 | - throw QPDFExc( | |
| 1143 | - qpdf_e_damaged_pdf, input->getName(), | |
| 1144 | - this->last_object_description, input->getLastOffset(), | |
| 1145 | - "dictionary ending here has an odd number of elements"); | |
| 1146 | - } | |
| 1147 | - for (unsigned int i = 0; i < olist.size(); i += 2) | |
| 1148 | - { | |
| 1149 | - QPDFObjectHandle key_obj = olist[i]; | |
| 1150 | - QPDFObjectHandle val = olist[i + 1]; | |
| 1151 | - if (! key_obj.isName()) | |
| 1152 | - { | |
| 1153 | - throw QPDFExc( | |
| 1154 | - qpdf_e_damaged_pdf, | |
| 1155 | - input->getName(), this->last_object_description, offset, | |
| 1156 | - std::string("dictionary key not name (") + | |
| 1157 | - key_obj.unparse() + ")"); | |
| 1158 | - } | |
| 1159 | - dict[key_obj.getName()] = val; | |
| 1160 | - } | |
| 1161 | - object = QPDFObjectHandle::newDictionary(dict); | |
| 978 | + // check for stream | |
| 979 | + qpdf_offset_t cur_offset = input->tell(); | |
| 980 | + if (readToken(input) == | |
| 981 | + QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream")) | |
| 982 | + { | |
| 983 | + // The PDF specification states that the word "stream" | |
| 984 | + // should be followed by either a carriage return and | |
| 985 | + // a newline or by a newline alone. It specifically | |
| 986 | + // disallowed following it by a carriage return alone | |
| 987 | + // since, in that case, there would be no way to tell | |
| 988 | + // whether the NL in a CR NL sequence was part of the | |
| 989 | + // stream data. However, some readers, including | |
| 990 | + // Adobe reader, accept a carriage return by itself | |
| 991 | + // when followed by a non-newline character, so that's | |
| 992 | + // what we do here. | |
| 993 | + { | |
| 994 | + char ch; | |
| 995 | + if (input->read(&ch, 1) == 0) | |
| 996 | + { | |
| 997 | + // A premature EOF here will result in some | |
| 998 | + // other problem that will get reported at | |
| 999 | + // another time. | |
| 1000 | + } | |
| 1001 | + else if (ch == '\n') | |
| 1002 | + { | |
| 1003 | + // ready to read stream data | |
| 1004 | + QTC::TC("qpdf", "QPDF stream with NL only"); | |
| 1005 | + } | |
| 1006 | + else if (ch == '\r') | |
| 1007 | + { | |
| 1008 | + // Read another character | |
| 1009 | + if (input->read(&ch, 1) != 0) | |
| 1010 | + { | |
| 1011 | + if (ch == '\n') | |
| 1012 | + { | |
| 1013 | + // Ready to read stream data | |
| 1014 | + QTC::TC("qpdf", "QPDF stream with CRNL"); | |
| 1015 | + } | |
| 1016 | + else | |
| 1017 | + { | |
| 1018 | + // Treat the \r by itself as the | |
| 1019 | + // whitespace after endstream and | |
| 1020 | + // start reading stream data in spite | |
| 1021 | + // of not having seen a newline. | |
| 1022 | + QTC::TC("qpdf", "QPDF stream with CR only"); | |
| 1023 | + input->unreadCh(ch); | |
| 1024 | + warn(QPDFExc( | |
| 1025 | + qpdf_e_damaged_pdf, | |
| 1026 | + input->getName(), | |
| 1027 | + this->last_object_description, | |
| 1028 | + input->tell(), | |
| 1029 | + "stream keyword followed" | |
| 1030 | + " by carriage return only")); | |
| 1031 | + } | |
| 1032 | + } | |
| 1033 | + } | |
| 1034 | + else | |
| 1035 | + { | |
| 1036 | + QTC::TC("qpdf", "QPDF stream without newline"); | |
| 1037 | + warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 1038 | + this->last_object_description, | |
| 1039 | + input->tell(), | |
| 1040 | + "stream keyword not followed" | |
| 1041 | + " by proper line terminator")); | |
| 1042 | + } | |
| 1043 | + } | |
| 1162 | 1044 | |
| 1163 | - if (! in_object_stream) | |
| 1164 | - { | |
| 1165 | - // check for stream | |
| 1166 | - qpdf_offset_t cur_offset = input->tell(); | |
| 1167 | - if (readToken(input) == | |
| 1168 | - QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream")) | |
| 1169 | - { | |
| 1170 | - // The PDF specification states that the word "stream" | |
| 1171 | - // should be followed by either a carriage return and | |
| 1172 | - // a newline or by a newline alone. It specifically | |
| 1173 | - // disallowed following it by a carriage return alone | |
| 1174 | - // since, in that case, there would be no way to tell | |
| 1175 | - // whether the NL in a CR NL sequence was part of the | |
| 1176 | - // stream data. However, some readers, including | |
| 1177 | - // Adobe reader, accept a carriage return by itself | |
| 1178 | - // when followed by a non-newline character, so that's | |
| 1179 | - // what we do here. | |
| 1180 | - { | |
| 1181 | - char ch; | |
| 1182 | - if (input->read(&ch, 1) == 0) | |
| 1183 | - { | |
| 1184 | - // A premature EOF here will result in some | |
| 1185 | - // other problem that will get reported at | |
| 1186 | - // another time. | |
| 1187 | - } | |
| 1188 | - else if (ch == '\n') | |
| 1189 | - { | |
| 1190 | - // ready to read stream data | |
| 1191 | - QTC::TC("qpdf", "QPDF stream with NL only"); | |
| 1192 | - } | |
| 1193 | - else if (ch == '\r') | |
| 1194 | - { | |
| 1195 | - // Read another character | |
| 1196 | - if (input->read(&ch, 1) != 0) | |
| 1197 | - { | |
| 1198 | - if (ch == '\n') | |
| 1199 | - { | |
| 1200 | - // Ready to read stream data | |
| 1201 | - QTC::TC("qpdf", "QPDF stream with CRNL"); | |
| 1202 | - } | |
| 1203 | - else | |
| 1204 | - { | |
| 1205 | - // Treat the \r by itself as the | |
| 1206 | - // whitespace after endstream and | |
| 1207 | - // start reading stream data in spite | |
| 1208 | - // of not having seen a newline. | |
| 1209 | - QTC::TC("qpdf", "QPDF stream with CR only"); | |
| 1210 | - input->unreadCh(ch); | |
| 1211 | - warn(QPDFExc( | |
| 1212 | - qpdf_e_damaged_pdf, | |
| 1213 | - input->getName(), | |
| 1214 | - this->last_object_description, | |
| 1215 | - input->tell(), | |
| 1216 | - "stream keyword followed" | |
| 1217 | - " by carriage return only")); | |
| 1218 | - } | |
| 1219 | - } | |
| 1220 | - } | |
| 1221 | - else | |
| 1222 | - { | |
| 1223 | - QTC::TC("qpdf", "QPDF stream without newline"); | |
| 1224 | - warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 1225 | - this->last_object_description, | |
| 1226 | - input->tell(), | |
| 1227 | - "stream keyword not followed" | |
| 1228 | - " by proper line terminator")); | |
| 1229 | - } | |
| 1230 | - } | |
| 1045 | + // Must get offset before accessing any additional | |
| 1046 | + // objects since resolving a previously unresolved | |
| 1047 | + // indirect object will change file position. | |
| 1048 | + qpdf_offset_t stream_offset = input->tell(); | |
| 1049 | + size_t length = 0; | |
| 1231 | 1050 | |
| 1232 | - // Must get offset before accessing any additional | |
| 1233 | - // objects since resolving a previously unresolved | |
| 1234 | - // indirect object will change file position. | |
| 1235 | - qpdf_offset_t stream_offset = input->tell(); | |
| 1236 | - size_t length = 0; | |
| 1051 | + try | |
| 1052 | + { | |
| 1053 | + std::map<std::string, QPDFObjectHandle> dict = | |
| 1054 | + object.getDictAsMap(); | |
| 1237 | 1055 | |
| 1238 | - try | |
| 1239 | - { | |
| 1240 | - if (dict.count("/Length") == 0) | |
| 1241 | - { | |
| 1242 | - QTC::TC("qpdf", "QPDF stream without length"); | |
| 1243 | - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 1244 | - this->last_object_description, offset, | |
| 1245 | - "stream dictionary lacks /Length key"); | |
| 1246 | - } | |
| 1056 | + if (dict.count("/Length") == 0) | |
| 1057 | + { | |
| 1058 | + QTC::TC("qpdf", "QPDF stream without length"); | |
| 1059 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 1060 | + this->last_object_description, offset, | |
| 1061 | + "stream dictionary lacks /Length key"); | |
| 1062 | + } | |
| 1247 | 1063 | |
| 1248 | - QPDFObjectHandle length_obj = dict["/Length"]; | |
| 1249 | - if (! length_obj.isInteger()) | |
| 1250 | - { | |
| 1251 | - QTC::TC("qpdf", "QPDF stream length not integer"); | |
| 1252 | - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 1253 | - this->last_object_description, offset, | |
| 1254 | - "/Length key in stream dictionary is not " | |
| 1255 | - "an integer"); | |
| 1256 | - } | |
| 1064 | + QPDFObjectHandle length_obj = dict["/Length"]; | |
| 1065 | + if (! length_obj.isInteger()) | |
| 1066 | + { | |
| 1067 | + QTC::TC("qpdf", "QPDF stream length not integer"); | |
| 1068 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 1069 | + this->last_object_description, offset, | |
| 1070 | + "/Length key in stream dictionary is not " | |
| 1071 | + "an integer"); | |
| 1072 | + } | |
| 1257 | 1073 | |
| 1258 | - length = length_obj.getIntValue(); | |
| 1259 | - input->seek( | |
| 1260 | - stream_offset + (qpdf_offset_t)length, SEEK_SET); | |
| 1261 | - if (! (readToken(input) == | |
| 1262 | - QPDFTokenizer::Token( | |
| 1263 | - QPDFTokenizer::tt_word, "endstream"))) | |
| 1264 | - { | |
| 1265 | - QTC::TC("qpdf", "QPDF missing endstream"); | |
| 1266 | - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 1267 | - this->last_object_description, | |
| 1268 | - input->getLastOffset(), | |
| 1269 | - "expected endstream"); | |
| 1270 | - } | |
| 1271 | - } | |
| 1272 | - catch (QPDFExc& e) | |
| 1273 | - { | |
| 1274 | - if (this->attempt_recovery) | |
| 1275 | - { | |
| 1276 | - // may throw an exception | |
| 1277 | - length = recoverStreamLength( | |
| 1278 | - input, objid, generation, stream_offset); | |
| 1279 | - } | |
| 1280 | - else | |
| 1281 | - { | |
| 1282 | - throw e; | |
| 1283 | - } | |
| 1284 | - } | |
| 1285 | - object = QPDFObjectHandle::Factory::newStream( | |
| 1286 | - this, objid, generation, object, stream_offset, length); | |
| 1287 | - } | |
| 1288 | - else | |
| 1289 | - { | |
| 1290 | - input->seek(cur_offset, SEEK_SET); | |
| 1291 | - } | |
| 1292 | - } | |
| 1074 | + length = length_obj.getIntValue(); | |
| 1075 | + input->seek( | |
| 1076 | + stream_offset + (qpdf_offset_t)length, SEEK_SET); | |
| 1077 | + if (! (readToken(input) == | |
| 1078 | + QPDFTokenizer::Token( | |
| 1079 | + QPDFTokenizer::tt_word, "endstream"))) | |
| 1080 | + { | |
| 1081 | + QTC::TC("qpdf", "QPDF missing endstream"); | |
| 1082 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 1083 | + this->last_object_description, | |
| 1084 | + input->getLastOffset(), | |
| 1085 | + "expected endstream"); | |
| 1086 | + } | |
| 1087 | + } | |
| 1088 | + catch (QPDFExc& e) | |
| 1089 | + { | |
| 1090 | + if (this->attempt_recovery) | |
| 1091 | + { | |
| 1092 | + // may throw an exception | |
| 1093 | + length = recoverStreamLength( | |
| 1094 | + input, objid, generation, stream_offset); | |
| 1095 | + } | |
| 1096 | + else | |
| 1097 | + { | |
| 1098 | + throw e; | |
| 1099 | + } | |
| 1100 | + } | |
| 1101 | + object = QPDFObjectHandle::Factory::newStream( | |
| 1102 | + this, objid, generation, object, stream_offset, length); | |
| 1103 | + } | |
| 1104 | + else | |
| 1105 | + { | |
| 1106 | + input->seek(cur_offset, SEEK_SET); | |
| 1107 | + } | |
| 1293 | 1108 | } |
| 1294 | 1109 | |
| 1110 | + // Override last_offset so that it points to the beginning of the | |
| 1111 | + // object we just read | |
| 1112 | + input->setLastOffset(offset); | |
| 1295 | 1113 | return object; |
| 1296 | 1114 | } |
| 1297 | 1115 | ... | ... |
libqpdf/QPDFObjectHandle.cc
| ... | ... | @@ -11,12 +11,15 @@ |
| 11 | 11 | #include <qpdf/QPDF_Dictionary.hh> |
| 12 | 12 | #include <qpdf/QPDF_Stream.hh> |
| 13 | 13 | #include <qpdf/QPDF_Reserved.hh> |
| 14 | +#include <qpdf/BufferInputSource.hh> | |
| 15 | +#include <qpdf/QPDFExc.hh> | |
| 14 | 16 | |
| 15 | 17 | #include <qpdf/QTC.hh> |
| 16 | 18 | #include <qpdf/QUtil.hh> |
| 17 | 19 | |
| 18 | 20 | #include <stdexcept> |
| 19 | 21 | #include <stdlib.h> |
| 22 | +#include <ctype.h> | |
| 20 | 23 | |
| 21 | 24 | QPDFObjectHandle::QPDFObjectHandle() : |
| 22 | 25 | initialized(false), |
| ... | ... | @@ -398,6 +401,13 @@ QPDFObjectHandle::getDict() |
| 398 | 401 | return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getDict(); |
| 399 | 402 | } |
| 400 | 403 | |
| 404 | +void | |
| 405 | +QPDFObjectHandle::replaceDict(QPDFObjectHandle new_dict) | |
| 406 | +{ | |
| 407 | + assertStream(); | |
| 408 | + dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceDict(new_dict); | |
| 409 | +} | |
| 410 | + | |
| 401 | 411 | PointerHolder<Buffer> |
| 402 | 412 | QPDFObjectHandle::getStreamData() |
| 403 | 413 | { |
| ... | ... | @@ -599,6 +609,265 @@ QPDFObjectHandle::unparseResolved() |
| 599 | 609 | } |
| 600 | 610 | |
| 601 | 611 | QPDFObjectHandle |
| 612 | +QPDFObjectHandle::parse(std::string const& object_str, | |
| 613 | + std::string const& object_description) | |
| 614 | +{ | |
| 615 | + PointerHolder<InputSource> input = | |
| 616 | + new BufferInputSource("parsed object", object_str); | |
| 617 | + QPDFTokenizer tokenizer; | |
| 618 | + bool empty = false; | |
| 619 | + QPDFObjectHandle result = | |
| 620 | + parse(input, object_description, tokenizer, empty, 0, 0); | |
| 621 | + size_t offset = (size_t) input->tell(); | |
| 622 | + while (offset < object_str.length()) | |
| 623 | + { | |
| 624 | + if (! isspace(object_str[offset])) | |
| 625 | + { | |
| 626 | + QTC::TC("qpdf", "QPDFObjectHandle trailing data in parse"); | |
| 627 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 628 | + object_description, | |
| 629 | + input->getLastOffset(), | |
| 630 | + "trailing data found parsing object from string"); | |
| 631 | + } | |
| 632 | + ++offset; | |
| 633 | + } | |
| 634 | + return result; | |
| 635 | +} | |
| 636 | + | |
| 637 | +QPDFObjectHandle | |
| 638 | +QPDFObjectHandle::parse(PointerHolder<InputSource> input, | |
| 639 | + std::string const& object_description, | |
| 640 | + QPDFTokenizer& tokenizer, bool& empty, | |
| 641 | + StringDecrypter* decrypter, QPDF* context) | |
| 642 | +{ | |
| 643 | + return parseInternal(input, object_description, tokenizer, empty, | |
| 644 | + decrypter, context, false, false); | |
| 645 | +} | |
| 646 | + | |
| 647 | +QPDFObjectHandle | |
| 648 | +QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input, | |
| 649 | + std::string const& object_description, | |
| 650 | + QPDFTokenizer& tokenizer, bool& empty, | |
| 651 | + StringDecrypter* decrypter, QPDF* context, | |
| 652 | + bool in_array, bool in_dictionary) | |
| 653 | +{ | |
| 654 | + empty = false; | |
| 655 | + if (in_dictionary && in_array) | |
| 656 | + { | |
| 657 | + // Although dictionaries and arrays arbitrarily nest, these | |
| 658 | + // variables indicate what is at the top of the stack right | |
| 659 | + // now, so they can, by definition, never both be true. | |
| 660 | + throw std::logic_error( | |
| 661 | + "INTERNAL ERROR: parseInternal: in_dict && in_array"); | |
| 662 | + } | |
| 663 | + | |
| 664 | + QPDFObjectHandle object; | |
| 665 | + | |
| 666 | + qpdf_offset_t offset = input->tell(); | |
| 667 | + std::vector<QPDFObjectHandle> olist; | |
| 668 | + bool done = false; | |
| 669 | + while (! done) | |
| 670 | + { | |
| 671 | + object = QPDFObjectHandle(); | |
| 672 | + | |
| 673 | + QPDFTokenizer::Token token = | |
| 674 | + tokenizer.readToken(input, object_description); | |
| 675 | + | |
| 676 | + switch (token.getType()) | |
| 677 | + { | |
| 678 | + case QPDFTokenizer::tt_brace_open: | |
| 679 | + case QPDFTokenizer::tt_brace_close: | |
| 680 | + // Don't know what to do with these for now | |
| 681 | + QTC::TC("qpdf", "QPDFObjectHandle bad brace"); | |
| 682 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 683 | + object_description, | |
| 684 | + input->getLastOffset(), | |
| 685 | + "unexpected brace token"); | |
| 686 | + break; | |
| 687 | + | |
| 688 | + case QPDFTokenizer::tt_array_close: | |
| 689 | + if (in_array) | |
| 690 | + { | |
| 691 | + done = true; | |
| 692 | + } | |
| 693 | + else | |
| 694 | + { | |
| 695 | + QTC::TC("qpdf", "QPDFObjectHandle bad array close"); | |
| 696 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 697 | + object_description, | |
| 698 | + input->getLastOffset(), | |
| 699 | + "unexpected array close token"); | |
| 700 | + } | |
| 701 | + break; | |
| 702 | + | |
| 703 | + case QPDFTokenizer::tt_dict_close: | |
| 704 | + if (in_dictionary) | |
| 705 | + { | |
| 706 | + done = true; | |
| 707 | + } | |
| 708 | + else | |
| 709 | + { | |
| 710 | + QTC::TC("qpdf", "QPDFObjectHandle bad dictionary close"); | |
| 711 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 712 | + object_description, | |
| 713 | + input->getLastOffset(), | |
| 714 | + "unexpected dictionary close token"); | |
| 715 | + } | |
| 716 | + break; | |
| 717 | + | |
| 718 | + case QPDFTokenizer::tt_array_open: | |
| 719 | + object = parseInternal( | |
| 720 | + input, object_description, tokenizer, empty, | |
| 721 | + decrypter, context, true, false); | |
| 722 | + break; | |
| 723 | + | |
| 724 | + case QPDFTokenizer::tt_dict_open: | |
| 725 | + object = parseInternal( | |
| 726 | + input, object_description, tokenizer, empty, | |
| 727 | + decrypter, context, false, true); | |
| 728 | + break; | |
| 729 | + | |
| 730 | + case QPDFTokenizer::tt_bool: | |
| 731 | + object = newBool((token.getValue() == "true")); | |
| 732 | + break; | |
| 733 | + | |
| 734 | + case QPDFTokenizer::tt_null: | |
| 735 | + object = newNull(); | |
| 736 | + break; | |
| 737 | + | |
| 738 | + case QPDFTokenizer::tt_integer: | |
| 739 | + object = newInteger(QUtil::string_to_ll(token.getValue().c_str())); | |
| 740 | + break; | |
| 741 | + | |
| 742 | + case QPDFTokenizer::tt_real: | |
| 743 | + object = newReal(token.getValue()); | |
| 744 | + break; | |
| 745 | + | |
| 746 | + case QPDFTokenizer::tt_name: | |
| 747 | + object = newName(token.getValue()); | |
| 748 | + break; | |
| 749 | + | |
| 750 | + case QPDFTokenizer::tt_word: | |
| 751 | + { | |
| 752 | + std::string const& value = token.getValue(); | |
| 753 | + if ((value == "R") && (in_array || in_dictionary) && | |
| 754 | + (olist.size() >= 2) && | |
| 755 | + (olist[olist.size() - 1].isInteger()) && | |
| 756 | + (olist[olist.size() - 2].isInteger())) | |
| 757 | + { | |
| 758 | + if (context == 0) | |
| 759 | + { | |
| 760 | + QTC::TC("qpdf", "QPDFObjectHandle indirect without context"); | |
| 761 | + throw std::logic_error( | |
| 762 | + "QPDFObjectHandle::parse called without context" | |
| 763 | + " on an object with indirect references"); | |
| 764 | + } | |
| 765 | + // Try to resolve indirect objects | |
| 766 | + object = newIndirect( | |
| 767 | + context, | |
| 768 | + olist[olist.size() - 2].getIntValue(), | |
| 769 | + olist[olist.size() - 1].getIntValue()); | |
| 770 | + olist.pop_back(); | |
| 771 | + olist.pop_back(); | |
| 772 | + } | |
| 773 | + else if ((value == "endobj") && | |
| 774 | + (! (in_array || in_dictionary))) | |
| 775 | + { | |
| 776 | + // We just saw endobj without having read | |
| 777 | + // anything. Treat this as a null and do not move | |
| 778 | + // the input source's offset. | |
| 779 | + object = newNull(); | |
| 780 | + input->seek(input->getLastOffset(), SEEK_SET); | |
| 781 | + empty = true; | |
| 782 | + } | |
| 783 | + else | |
| 784 | + { | |
| 785 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 786 | + object_description, | |
| 787 | + input->getLastOffset(), | |
| 788 | + "unknown token while reading object (" + | |
| 789 | + value + ")"); | |
| 790 | + } | |
| 791 | + } | |
| 792 | + break; | |
| 793 | + | |
| 794 | + case QPDFTokenizer::tt_string: | |
| 795 | + { | |
| 796 | + std::string val = token.getValue(); | |
| 797 | + if (decrypter) | |
| 798 | + { | |
| 799 | + decrypter->decryptString(val); | |
| 800 | + } | |
| 801 | + object = QPDFObjectHandle::newString(val); | |
| 802 | + } | |
| 803 | + | |
| 804 | + break; | |
| 805 | + | |
| 806 | + default: | |
| 807 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 808 | + object_description, | |
| 809 | + input->getLastOffset(), | |
| 810 | + "unknown token type while reading object"); | |
| 811 | + break; | |
| 812 | + } | |
| 813 | + | |
| 814 | + if (in_dictionary || in_array) | |
| 815 | + { | |
| 816 | + if (! done) | |
| 817 | + { | |
| 818 | + olist.push_back(object); | |
| 819 | + } | |
| 820 | + } | |
| 821 | + else if (! object.isInitialized()) | |
| 822 | + { | |
| 823 | + throw std::logic_error( | |
| 824 | + "INTERNAL ERROR: uninitialized object (token = " + | |
| 825 | + QUtil::int_to_string(token.getType()) + | |
| 826 | + ", " + token.getValue() + ")"); | |
| 827 | + } | |
| 828 | + else | |
| 829 | + { | |
| 830 | + done = true; | |
| 831 | + } | |
| 832 | + } | |
| 833 | + | |
| 834 | + if (in_array) | |
| 835 | + { | |
| 836 | + object = newArray(olist); | |
| 837 | + } | |
| 838 | + else if (in_dictionary) | |
| 839 | + { | |
| 840 | + // Convert list to map. Alternating elements are keys. | |
| 841 | + std::map<std::string, QPDFObjectHandle> dict; | |
| 842 | + if (olist.size() % 2) | |
| 843 | + { | |
| 844 | + QTC::TC("qpdf", "QPDFObjectHandle dictionary odd number of elements"); | |
| 845 | + throw QPDFExc( | |
| 846 | + qpdf_e_damaged_pdf, input->getName(), | |
| 847 | + object_description, input->getLastOffset(), | |
| 848 | + "dictionary ending here has an odd number of elements"); | |
| 849 | + } | |
| 850 | + for (unsigned int i = 0; i < olist.size(); i += 2) | |
| 851 | + { | |
| 852 | + QPDFObjectHandle key_obj = olist[i]; | |
| 853 | + QPDFObjectHandle val = olist[i + 1]; | |
| 854 | + if (! key_obj.isName()) | |
| 855 | + { | |
| 856 | + throw QPDFExc( | |
| 857 | + qpdf_e_damaged_pdf, | |
| 858 | + input->getName(), object_description, offset, | |
| 859 | + std::string("dictionary key not name (") + | |
| 860 | + key_obj.unparse() + ")"); | |
| 861 | + } | |
| 862 | + dict[key_obj.getName()] = val; | |
| 863 | + } | |
| 864 | + object = newDictionary(dict); | |
| 865 | + } | |
| 866 | + | |
| 867 | + return object; | |
| 868 | +} | |
| 869 | + | |
| 870 | +QPDFObjectHandle | |
| 602 | 871 | QPDFObjectHandle::newIndirect(QPDF* qpdf, int objid, int generation) |
| 603 | 872 | { |
| 604 | 873 | return QPDFObjectHandle(qpdf, objid, generation); | ... | ... |
libqpdf/QPDF_Stream.cc
| ... | ... | @@ -464,3 +464,18 @@ QPDF_Stream::replaceFilterData(QPDFObjectHandle const& filter, |
| 464 | 464 | "/Length", QPDFObjectHandle::newInteger((int)length)); |
| 465 | 465 | } |
| 466 | 466 | } |
| 467 | + | |
| 468 | +void | |
| 469 | +QPDF_Stream::replaceDict(QPDFObjectHandle new_dict) | |
| 470 | +{ | |
| 471 | + this->stream_dict = new_dict; | |
| 472 | + QPDFObjectHandle length_obj = new_dict.getKey("/Length"); | |
| 473 | + if (length_obj.isInteger()) | |
| 474 | + { | |
| 475 | + this->length = length_obj.getIntValue(); | |
| 476 | + } | |
| 477 | + else | |
| 478 | + { | |
| 479 | + this->length = 0; | |
| 480 | + } | |
| 481 | +} | ... | ... |
libqpdf/qpdf/QPDF_Stream.hh
| ... | ... | @@ -32,6 +32,8 @@ class QPDF_Stream: public QPDFObject |
| 32 | 32 | QPDFObjectHandle const& filter, |
| 33 | 33 | QPDFObjectHandle const& decode_parms); |
| 34 | 34 | |
| 35 | + void replaceDict(QPDFObjectHandle new_dict); | |
| 36 | + | |
| 35 | 37 | // Replace object ID and generation. This may only be called if |
| 36 | 38 | // object ID and generation are 0. It is used by QPDFObjectHandle |
| 37 | 39 | // when adding streams to files. | ... | ... |
qpdf/pdf_from_scratch.cc
| ... | ... | @@ -38,25 +38,20 @@ void runtest(int n) |
| 38 | 38 | // Create a minimal PDF from scratch. |
| 39 | 39 | |
| 40 | 40 | QPDFObjectHandle font = pdf.makeIndirectObject( |
| 41 | - QPDFObjectHandle::newDictionary()); | |
| 42 | - font.replaceKey("/Type", newName("/Font")); | |
| 43 | - font.replaceKey("/Subtype", newName("/Type1")); | |
| 44 | - font.replaceKey("/Name", newName("/F1")); | |
| 45 | - font.replaceKey("/BaseFont", newName("/Helvetica")); | |
| 46 | - font.replaceKey("/Encoding", newName("/WinAnsiEncoding")); | |
| 41 | + QPDFObjectHandle::parse("<<" | |
| 42 | + " /Type /Font" | |
| 43 | + " /Subtype /Type1" | |
| 44 | + " /Name /F1" | |
| 45 | + " /BaseFont /Helvetica" | |
| 46 | + " /Encoding /WinAnsiEncoding" | |
| 47 | + ">>")); | |
| 47 | 48 | |
| 48 | 49 | QPDFObjectHandle procset = pdf.makeIndirectObject( |
| 49 | - QPDFObjectHandle::newArray()); | |
| 50 | - procset.appendItem(newName("/PDF")); | |
| 51 | - procset.appendItem(newName("/Text")); | |
| 50 | + QPDFObjectHandle::parse("[/PDF /Text]")); | |
| 52 | 51 | |
| 53 | 52 | QPDFObjectHandle contents = createPageContents(pdf, "First Page"); |
| 54 | 53 | |
| 55 | - QPDFObjectHandle mediabox = QPDFObjectHandle::newArray(); | |
| 56 | - mediabox.appendItem(QPDFObjectHandle::newInteger(0)); | |
| 57 | - mediabox.appendItem(QPDFObjectHandle::newInteger(0)); | |
| 58 | - mediabox.appendItem(QPDFObjectHandle::newInteger(612)); | |
| 59 | - mediabox.appendItem(QPDFObjectHandle::newInteger(792)); | |
| 54 | + QPDFObjectHandle mediabox = QPDFObjectHandle::parse("[0 0 612 792]"); | |
| 60 | 55 | |
| 61 | 56 | QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary(); |
| 62 | 57 | rfont.replaceKey("/F1", font); | ... | ... |
qpdf/qpdf.testcov
| ... | ... | @@ -60,13 +60,13 @@ QPDF missing trailer 0 |
| 60 | 60 | QPDF trailer lacks size 0 |
| 61 | 61 | QPDF trailer size not integer 0 |
| 62 | 62 | QPDF trailer prev not integer 0 |
| 63 | -QPDF bad brace 0 | |
| 64 | -QPDF bad array close 0 | |
| 65 | -QPDF dictionary odd number of elements 0 | |
| 63 | +QPDFObjectHandle bad brace 0 | |
| 64 | +QPDFObjectHandle bad array close 0 | |
| 65 | +QPDFObjectHandle dictionary odd number of elements 0 | |
| 66 | 66 | QPDF stream without length 0 |
| 67 | 67 | QPDF stream length not integer 0 |
| 68 | 68 | QPDF missing endstream 0 |
| 69 | -QPDF bad dictionary close 0 | |
| 69 | +QPDFObjectHandle bad dictionary close 0 | |
| 70 | 70 | QPDF can't find xref 0 |
| 71 | 71 | QPDF_Tokenizer bad ) 0 |
| 72 | 72 | QPDF_Tokenizer bad > 0 |
| ... | ... | @@ -235,3 +235,5 @@ QPDF not copying pages object 0 |
| 235 | 235 | QPDF insert foreign page 0 |
| 236 | 236 | QPDFWriter foreign object 0 |
| 237 | 237 | QPDFWriter copy use_aes 1 |
| 238 | +QPDFObjectHandle indirect without context 0 | |
| 239 | +QPDFObjectHandle trailing data in parse 0 | ... | ... |
qpdf/qtest/qpdf.test
| ... | ... | @@ -149,7 +149,7 @@ $td->runtest("remove page we don't have", |
| 149 | 149 | $td->NORMALIZE_NEWLINES); |
| 150 | 150 | # ---------- |
| 151 | 151 | $td->notify("--- Miscellaneous Tests ---"); |
| 152 | -$n_tests += 44; | |
| 152 | +$n_tests += 45; | |
| 153 | 153 | |
| 154 | 154 | $td->runtest("qpdf version", |
| 155 | 155 | {$td->COMMAND => "qpdf --version"}, |
| ... | ... | @@ -370,6 +370,10 @@ $td->runtest("detect foreign object in write", |
| 370 | 370 | " copy-foreign-objects-in.pdf minimal.pdf"}, |
| 371 | 371 | {$td->FILE => "foreign-in-write.out", $td->EXIT_STATUS => 0}, |
| 372 | 372 | $td->NORMALIZE_NEWLINES); |
| 373 | +$td->runtest("parse objects from string", | |
| 374 | + {$td->COMMAND => "test_driver 31 minimal.pdf"}, # file not used | |
| 375 | + {$td->FILE => "parse-object.out", $td->EXIT_STATUS => 0}, | |
| 376 | + $td->NORMALIZE_NEWLINES); | |
| 373 | 377 | |
| 374 | 378 | show_ntests(); |
| 375 | 379 | # ---------- | ... | ... |
qpdf/qtest/qpdf/bad22.out
qpdf/qtest/qpdf/bad23.out
qpdf/qtest/qpdf/parse-object.out
0 → 100644
| 1 | +[ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ] | |
| 2 | +logic error parsing indirect: QPDFObjectHandle::parse called without context on an object with indirect references | |
| 3 | +trailing data: parsed object (trailing test): trailing data found parsing object from string | |
| 4 | +test 31 done | ... | ... |
qpdf/test_driver.cc
| ... | ... | @@ -1054,6 +1054,38 @@ void runtest(int n, char const* filename1, char const* filename2) |
| 1054 | 1054 | << std::endl; |
| 1055 | 1055 | } |
| 1056 | 1056 | } |
| 1057 | + else if (n == 31) | |
| 1058 | + { | |
| 1059 | + // Test object parsing from a string. The input file is not used. | |
| 1060 | + | |
| 1061 | + QPDFObjectHandle o1 = | |
| 1062 | + QPDFObjectHandle::parse( | |
| 1063 | + "[/name 16059 3.14159 false\n" | |
| 1064 | + " << /key true /other [ (string1) (string2) ] >> null]"); | |
| 1065 | + std::cout << o1.unparse() << std::endl; | |
| 1066 | + QPDFObjectHandle o2 = QPDFObjectHandle::parse(" 12345 \f "); | |
| 1067 | + assert(o2.isInteger() && (o2.getIntValue() == 12345)); | |
| 1068 | + try | |
| 1069 | + { | |
| 1070 | + QPDFObjectHandle::parse("[1 0 R]", "indirect test"); | |
| 1071 | + std::cout << "oops -- didn't throw" << std::endl; | |
| 1072 | + } | |
| 1073 | + catch (std::logic_error e) | |
| 1074 | + { | |
| 1075 | + std::cout << "logic error parsing indirect: " << e.what() | |
| 1076 | + << std::endl; | |
| 1077 | + } | |
| 1078 | + try | |
| 1079 | + { | |
| 1080 | + QPDFObjectHandle::parse("0 trailing", "trailing test"); | |
| 1081 | + std::cout << "oops -- didn't throw" << std::endl; | |
| 1082 | + } | |
| 1083 | + catch (std::runtime_error e) | |
| 1084 | + { | |
| 1085 | + std::cout << "trailing data: " << e.what() | |
| 1086 | + << std::endl; | |
| 1087 | + } | |
| 1088 | + } | |
| 1057 | 1089 | else |
| 1058 | 1090 | { |
| 1059 | 1091 | throw std::runtime_error(std::string("invalid test ") + | ... | ... |