Commit 6bbea4baa0c06b39b1b71f1aa6fc276789296556
1 parent
f3e267fc
Implement QPDFObjectHandle::parse
Move object parsing code from QPDF to QPDFObjectHandle and parameterize the parts of it that are specific to a QPDF object. Provide a version that can't handle indirect objects and that can be called on an arbitrary string. A side effect of this change is that the offset used when reporting invalid stream length has changed, but since the new value seems like a better value than the old one, the test suite has been updated rather than making the code backward compatible. This only effects the offset reported for invalid streams that lack /Length or have an invalid /Length key. Updated some test code and exmaples to use QPDFObjectHandle::parse. Supporting changes include adding a BufferInputSource constructor that takes a string.
Showing
18 changed files
with
618 additions
and
386 deletions
ChangeLog
| 1 | +2012-07-21 Jay Berkenbilt <ejb@ql.org> | ||
| 2 | + | ||
| 3 | + * Add new method QPDFObjectHandle::replaceDict to replace a | ||
| 4 | + stream's dictionary. Use with caution; see comments in | ||
| 5 | + QPDFObjectHandle.hh. | ||
| 6 | + | ||
| 7 | + * Add new method QPDFObjectHandle::parse for creation of | ||
| 8 | + QPDFObjectHandle objects from string representations of the | ||
| 9 | + objects. Thanks to Tobias Hoffmann for the idea. | ||
| 10 | + | ||
| 1 | 2012-07-15 Jay Berkenbilt <ejb@ql.org> | 11 | 2012-07-15 Jay Berkenbilt <ejb@ql.org> |
| 2 | 12 | ||
| 3 | * add new QPDF::isEncrypted method that returns some additional | 13 | * add new QPDF::isEncrypted method that returns some additional |
TODO
| @@ -20,16 +20,14 @@ Next | @@ -20,16 +20,14 @@ Next | ||
| 20 | * Make sure that the release notes call attention to the one API | 20 | * Make sure that the release notes call attention to the one API |
| 21 | breaking change: removal of length from replaceStreamData. | 21 | breaking change: removal of length from replaceStreamData. |
| 22 | 22 | ||
| 23 | - * Add a way to create new QPDFObjectHandles with a string | ||
| 24 | - representation of them, such as | ||
| 25 | - QPDFObjectHandle::parse("<< /a 1 /b 2 >>"); | ||
| 26 | - | ||
| 27 | * Document thread safety: One individual QPDF or QPDFWriter object | 23 | * Document thread safety: One individual QPDF or QPDFWriter object |
| 28 | can only be used by one thread at a time, but multiple threads can | 24 | can only be used by one thread at a time, but multiple threads can |
| 29 | simultaneously use separate objects. | 25 | simultaneously use separate objects. |
| 30 | 26 | ||
| 31 | * Write some documentation about the design of copyForeignObject. | 27 | * Write some documentation about the design of copyForeignObject. |
| 32 | 28 | ||
| 29 | + * Mention QPDFObjectHandle::parse in the documentation. | ||
| 30 | + | ||
| 33 | * copyForeignObject still to do: | 31 | * copyForeignObject still to do: |
| 34 | 32 | ||
| 35 | - qpdf command | 33 | - qpdf command |
examples/pdf-create.cc
| @@ -81,24 +81,28 @@ static void create_pdf(char const* filename) | @@ -81,24 +81,28 @@ static void create_pdf(char const* filename) | ||
| 81 | // Add an indirect object to contain a font descriptor for the | 81 | // Add an indirect object to contain a font descriptor for the |
| 82 | // built-in Helvetica font. | 82 | // built-in Helvetica font. |
| 83 | QPDFObjectHandle font = pdf.makeIndirectObject( | 83 | QPDFObjectHandle font = pdf.makeIndirectObject( |
| 84 | - QPDFObjectHandle::newDictionary()); | ||
| 85 | - font.replaceKey("/Type", newName("/Font")); | ||
| 86 | - font.replaceKey("/Subtype", newName("/Type1")); | ||
| 87 | - font.replaceKey("/Name", newName("/F1")); | ||
| 88 | - font.replaceKey("/BaseFont", newName("/Helvetica")); | ||
| 89 | - font.replaceKey("/Encoding", newName("/WinAnsiEncoding")); | 84 | + QPDFObjectHandle::parse( |
| 85 | + "<<" | ||
| 86 | + " /Type /Font" | ||
| 87 | + " /Subtype /Type1" | ||
| 88 | + " /Name /F1" | ||
| 89 | + " /BaseFont /Helvetica" | ||
| 90 | + " /Encoding /WinAnsiEncoding" | ||
| 91 | + ">>")); | ||
| 90 | 92 | ||
| 91 | // Create a stream to encode our image. We don't have to set the | 93 | // Create a stream to encode our image. We don't have to set the |
| 92 | // length or filters. QPDFWriter will fill in the length and | 94 | // length or filters. QPDFWriter will fill in the length and |
| 93 | // compress the stream data using FlateDecode by default. | 95 | // compress the stream data using FlateDecode by default. |
| 94 | QPDFObjectHandle image = QPDFObjectHandle::newStream(&pdf); | 96 | QPDFObjectHandle image = QPDFObjectHandle::newStream(&pdf); |
| 95 | - QPDFObjectHandle image_dict = image.getDict(); | ||
| 96 | - image_dict.replaceKey("/Type", newName("/XObject")); | ||
| 97 | - image_dict.replaceKey("/Subtype", newName("/Image")); | ||
| 98 | - image_dict.replaceKey("/ColorSpace", newName("/DeviceRGB")); | ||
| 99 | - image_dict.replaceKey("/BitsPerComponent", newInteger(8)); | ||
| 100 | - image_dict.replaceKey("/Width", newInteger(100)); | ||
| 101 | - image_dict.replaceKey("/Height", newInteger(100)); | 97 | + image.replaceDict(QPDFObjectHandle::parse( |
| 98 | + "<<" | ||
| 99 | + " /Type /XObject" | ||
| 100 | + " /Subtype /Image" | ||
| 101 | + " /ColorSpace /DeviceRGB" | ||
| 102 | + " /BitsPerComponent 8" | ||
| 103 | + " /Width 100" | ||
| 104 | + " /Height 100" | ||
| 105 | + ">>")); | ||
| 102 | // Provide the stream data. | 106 | // Provide the stream data. |
| 103 | ImageProvider* p = new ImageProvider(100, 100); | 107 | ImageProvider* p = new ImageProvider(100, 100); |
| 104 | PointerHolder<QPDFObjectHandle::StreamDataProvider> provider(p); | 108 | PointerHolder<QPDFObjectHandle::StreamDataProvider> provider(p); |
| @@ -107,10 +111,8 @@ static void create_pdf(char const* filename) | @@ -107,10 +111,8 @@ static void create_pdf(char const* filename) | ||
| 107 | QPDFObjectHandle::newNull()); | 111 | QPDFObjectHandle::newNull()); |
| 108 | 112 | ||
| 109 | // Create direct objects as needed by the page dictionary. | 113 | // Create direct objects as needed by the page dictionary. |
| 110 | - QPDFObjectHandle procset = QPDFObjectHandle::newArray(); | ||
| 111 | - procset.appendItem(newName("/PDF")); | ||
| 112 | - procset.appendItem(newName("/Text")); | ||
| 113 | - procset.appendItem(newName("/ImageC")); | 114 | + QPDFObjectHandle procset = QPDFObjectHandle::parse( |
| 115 | + "[/PDF /Text /ImageC]"); | ||
| 114 | 116 | ||
| 115 | QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary(); | 117 | QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary(); |
| 116 | rfont.replaceKey("/F1", font); | 118 | rfont.replaceKey("/F1", font); |
include/qpdf/BufferInputSource.hh
| @@ -9,6 +9,8 @@ class BufferInputSource: public InputSource | @@ -9,6 +9,8 @@ class BufferInputSource: public InputSource | ||
| 9 | public: | 9 | public: |
| 10 | BufferInputSource(std::string const& description, Buffer* buf, | 10 | BufferInputSource(std::string const& description, Buffer* buf, |
| 11 | bool own_memory = false); | 11 | bool own_memory = false); |
| 12 | + BufferInputSource(std::string const& description, | ||
| 13 | + std::string const& contents); | ||
| 12 | virtual ~BufferInputSource(); | 14 | virtual ~BufferInputSource(); |
| 13 | virtual qpdf_offset_t findAndSkipNextEOL(); | 15 | virtual qpdf_offset_t findAndSkipNextEOL(); |
| 14 | virtual std::string const& getName() const; | 16 | virtual std::string const& getName() const; |
include/qpdf/QPDF.hh
| @@ -531,6 +531,23 @@ class QPDF | @@ -531,6 +531,23 @@ class QPDF | ||
| 531 | std::map<ObjGen, QPDFObjectHandle> foreign_streams; | 531 | std::map<ObjGen, QPDFObjectHandle> foreign_streams; |
| 532 | }; | 532 | }; |
| 533 | 533 | ||
| 534 | + class StringDecrypter: public QPDFObjectHandle::StringDecrypter | ||
| 535 | + { | ||
| 536 | + friend class QPDF; | ||
| 537 | + | ||
| 538 | + public: | ||
| 539 | + StringDecrypter(QPDF* qpdf, int objid, int gen); | ||
| 540 | + virtual ~StringDecrypter() | ||
| 541 | + { | ||
| 542 | + } | ||
| 543 | + virtual void decryptString(std::string& val); | ||
| 544 | + | ||
| 545 | + private: | ||
| 546 | + QPDF* qpdf; | ||
| 547 | + int objid; | ||
| 548 | + int gen; | ||
| 549 | + }; | ||
| 550 | + | ||
| 534 | void parse(char const* password); | 551 | void parse(char const* password); |
| 535 | void warn(QPDFExc const& e); | 552 | void warn(QPDFExc const& e); |
| 536 | void setTrailer(QPDFObjectHandle obj); | 553 | void setTrailer(QPDFObjectHandle obj); |
| @@ -547,10 +564,6 @@ class QPDF | @@ -547,10 +564,6 @@ class QPDF | ||
| 547 | QPDFObjectHandle readObject( | 564 | QPDFObjectHandle readObject( |
| 548 | PointerHolder<InputSource>, std::string const& description, | 565 | PointerHolder<InputSource>, std::string const& description, |
| 549 | int objid, int generation, bool in_object_stream); | 566 | int objid, int generation, bool in_object_stream); |
| 550 | - QPDFObjectHandle readObjectInternal( | ||
| 551 | - PointerHolder<InputSource> input, int objid, int generation, | ||
| 552 | - bool in_object_stream, | ||
| 553 | - bool in_array, bool in_dictionary); | ||
| 554 | size_t recoverStreamLength( | 567 | size_t recoverStreamLength( |
| 555 | PointerHolder<InputSource> input, int objid, int generation, | 568 | PointerHolder<InputSource> input, int objid, int generation, |
| 556 | qpdf_offset_t stream_offset); | 569 | qpdf_offset_t stream_offset); |
include/qpdf/QPDFObjectHandle.hh
| @@ -18,6 +18,7 @@ | @@ -18,6 +18,7 @@ | ||
| 18 | 18 | ||
| 19 | #include <qpdf/PointerHolder.hh> | 19 | #include <qpdf/PointerHolder.hh> |
| 20 | #include <qpdf/Buffer.hh> | 20 | #include <qpdf/Buffer.hh> |
| 21 | +#include <qpdf/InputSource.hh> | ||
| 21 | 22 | ||
| 22 | #include <qpdf/QPDFObject.hh> | 23 | #include <qpdf/QPDFObject.hh> |
| 23 | 24 | ||
| @@ -25,6 +26,7 @@ class Pipeline; | @@ -25,6 +26,7 @@ class Pipeline; | ||
| 25 | class QPDF; | 26 | class QPDF; |
| 26 | class QPDF_Dictionary; | 27 | class QPDF_Dictionary; |
| 27 | class QPDF_Array; | 28 | class QPDF_Array; |
| 29 | +class QPDFTokenizer; | ||
| 28 | 30 | ||
| 29 | class QPDFObjectHandle | 31 | class QPDFObjectHandle |
| 30 | { | 32 | { |
| @@ -57,6 +59,18 @@ class QPDFObjectHandle | @@ -57,6 +59,18 @@ class QPDFObjectHandle | ||
| 57 | Pipeline* pipeline) = 0; | 59 | Pipeline* pipeline) = 0; |
| 58 | }; | 60 | }; |
| 59 | 61 | ||
| 62 | + // This class is used by parse to decrypt strings when reading an | ||
| 63 | + // object that contains encrypted strings. | ||
| 64 | + class StringDecrypter | ||
| 65 | + { | ||
| 66 | + public: | ||
| 67 | + QPDF_DLL | ||
| 68 | + virtual ~StringDecrypter() | ||
| 69 | + { | ||
| 70 | + } | ||
| 71 | + virtual void decryptString(std::string& val) = 0; | ||
| 72 | + }; | ||
| 73 | + | ||
| 60 | QPDF_DLL | 74 | QPDF_DLL |
| 61 | QPDFObjectHandle(); | 75 | QPDFObjectHandle(); |
| 62 | QPDF_DLL | 76 | QPDF_DLL |
| @@ -95,6 +109,30 @@ class QPDFObjectHandle | @@ -95,6 +109,30 @@ class QPDFObjectHandle | ||
| 95 | 109 | ||
| 96 | // Public factory methods | 110 | // Public factory methods |
| 97 | 111 | ||
| 112 | + // Construct an object of any type from a string representation of | ||
| 113 | + // the object. Throws QPDFExc with an empty filename and an | ||
| 114 | + // offset into the string if there is an error. Any indirect | ||
| 115 | + // object syntax (obj gen R) will cause a logic_error exception to | ||
| 116 | + // be thrown. If object_description is provided, it will appear | ||
| 117 | + // in the message of any QPDFExc exception thrown for invalid | ||
| 118 | + // syntax. | ||
| 119 | + QPDF_DLL | ||
| 120 | + static QPDFObjectHandle parse(std::string const& object_str, | ||
| 121 | + std::string const& object_description = ""); | ||
| 122 | + | ||
| 123 | + // Construct an object as above by reading from the given | ||
| 124 | + // InputSource at its current position and using the tokenizer you | ||
| 125 | + // supply. Indirect objects and encrypted strings are permitted. | ||
| 126 | + // This method is intended to be called by QPDF for parsing | ||
| 127 | + // objects that are ready from the object's input stream. | ||
| 128 | + QPDF_DLL | ||
| 129 | + static QPDFObjectHandle parse(PointerHolder<InputSource> input, | ||
| 130 | + std::string const& object_description, | ||
| 131 | + QPDFTokenizer&, bool& empty, | ||
| 132 | + StringDecrypter* decrypter, | ||
| 133 | + QPDF* context); | ||
| 134 | + | ||
| 135 | + // Type-specific factories | ||
| 98 | QPDF_DLL | 136 | QPDF_DLL |
| 99 | static QPDFObjectHandle newNull(); | 137 | static QPDFObjectHandle newNull(); |
| 100 | QPDF_DLL | 138 | QPDF_DLL |
| @@ -124,7 +162,8 @@ class QPDFObjectHandle | @@ -124,7 +162,8 @@ class QPDFObjectHandle | ||
| 124 | // object. A subsequent call must be made to replaceStreamData() | 162 | // object. A subsequent call must be made to replaceStreamData() |
| 125 | // to provide data for the stream. The stream's dictionary may be | 163 | // to provide data for the stream. The stream's dictionary may be |
| 126 | // retrieved by calling getDict(), and the resulting dictionary | 164 | // retrieved by calling getDict(), and the resulting dictionary |
| 127 | - // may be modified. | 165 | + // may be modified. Alternatively, you can create a new |
| 166 | + // dictionary and call replaceDict to install it. | ||
| 128 | QPDF_DLL | 167 | QPDF_DLL |
| 129 | static QPDFObjectHandle newStream(QPDF* qpdf); | 168 | static QPDFObjectHandle newStream(QPDF* qpdf); |
| 130 | 169 | ||
| @@ -303,6 +342,15 @@ class QPDFObjectHandle | @@ -303,6 +342,15 @@ class QPDFObjectHandle | ||
| 303 | bool pipeStreamData(Pipeline*, bool filter, | 342 | bool pipeStreamData(Pipeline*, bool filter, |
| 304 | bool normalize, bool compress); | 343 | bool normalize, bool compress); |
| 305 | 344 | ||
| 345 | + // Replace a stream's dictionary. The new dictionary must be | ||
| 346 | + // consistent with the stream's data. This is most appropriately | ||
| 347 | + // used when creating streams from scratch that will use a stream | ||
| 348 | + // data provider and therefore start with an empty dictionary. It | ||
| 349 | + // may be more convenient in this case than calling getDict and | ||
| 350 | + // modifying it for each key. The pdf-create example does this. | ||
| 351 | + QPDF_DLL | ||
| 352 | + void replaceDict(QPDFObjectHandle); | ||
| 353 | + | ||
| 306 | // Replace this stream's stream data with the given data buffer, | 354 | // Replace this stream's stream data with the given data buffer, |
| 307 | // and replace the /Filter and /DecodeParms keys in the stream | 355 | // and replace the /Filter and /DecodeParms keys in the stream |
| 308 | // dictionary with the given values. (If either value is empty, | 356 | // dictionary with the given values. (If either value is empty, |
| @@ -489,6 +537,12 @@ class QPDFObjectHandle | @@ -489,6 +537,12 @@ class QPDFObjectHandle | ||
| 489 | void dereference(); | 537 | void dereference(); |
| 490 | void makeDirectInternal(std::set<int>& visited); | 538 | void makeDirectInternal(std::set<int>& visited); |
| 491 | void releaseResolved(); | 539 | void releaseResolved(); |
| 540 | + static QPDFObjectHandle parseInternal( | ||
| 541 | + PointerHolder<InputSource> input, | ||
| 542 | + std::string const& object_description, | ||
| 543 | + QPDFTokenizer& tokenizer, bool& empty, | ||
| 544 | + StringDecrypter* decrypter, QPDF* context, | ||
| 545 | + bool in_array, bool in_dictionary); | ||
| 492 | 546 | ||
| 493 | bool initialized; | 547 | bool initialized; |
| 494 | 548 |
libqpdf/BufferInputSource.cc
| @@ -11,6 +11,18 @@ BufferInputSource::BufferInputSource(std::string const& description, | @@ -11,6 +11,18 @@ BufferInputSource::BufferInputSource(std::string const& description, | ||
| 11 | { | 11 | { |
| 12 | } | 12 | } |
| 13 | 13 | ||
| 14 | +BufferInputSource::BufferInputSource(std::string const& description, | ||
| 15 | + std::string const& contents) : | ||
| 16 | + own_memory(true), | ||
| 17 | + description(description), | ||
| 18 | + buf(0), | ||
| 19 | + cur_offset(0) | ||
| 20 | +{ | ||
| 21 | + this->buf = new Buffer(contents.length()); | ||
| 22 | + unsigned char* bp = buf->getBuffer(); | ||
| 23 | + memcpy(bp, (char*)contents.c_str(), contents.length()); | ||
| 24 | +} | ||
| 25 | + | ||
| 14 | BufferInputSource::~BufferInputSource() | 26 | BufferInputSource::~BufferInputSource() |
| 15 | { | 27 | { |
| 16 | if (own_memory) | 28 | if (own_memory) |
libqpdf/QPDF.cc
| @@ -68,6 +68,18 @@ QPDF::CopiedStreamDataProvider::registerForeignStream( | @@ -68,6 +68,18 @@ QPDF::CopiedStreamDataProvider::registerForeignStream( | ||
| 68 | this->foreign_streams[local_og] = foreign_stream; | 68 | this->foreign_streams[local_og] = foreign_stream; |
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | +QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, int objid, int gen) : | ||
| 72 | + qpdf(qpdf), | ||
| 73 | + objid(objid), | ||
| 74 | + gen(gen) | ||
| 75 | +{ | ||
| 76 | +} | ||
| 77 | + | ||
| 78 | +void | ||
| 79 | +QPDF::StringDecrypter::decryptString(std::string& val) | ||
| 80 | +{ | ||
| 81 | + qpdf->decryptString(val, objid, gen); | ||
| 82 | +} | ||
| 71 | 83 | ||
| 72 | std::string const& | 84 | std::string const& |
| 73 | QPDF::QPDFVersion() | 85 | QPDF::QPDFVersion() |
| @@ -940,358 +952,164 @@ QPDF::readObject(PointerHolder<InputSource> input, | @@ -940,358 +952,164 @@ QPDF::readObject(PointerHolder<InputSource> input, | ||
| 940 | { | 952 | { |
| 941 | setLastObjectDescription(description, objid, generation); | 953 | setLastObjectDescription(description, objid, generation); |
| 942 | qpdf_offset_t offset = input->tell(); | 954 | qpdf_offset_t offset = input->tell(); |
| 943 | - QPDFObjectHandle object = readObjectInternal( | ||
| 944 | - input, objid, generation, in_object_stream, false, false); | ||
| 945 | - // Override last_offset so that it points to the beginning of the | ||
| 946 | - // object we just read | ||
| 947 | - input->setLastOffset(offset); | ||
| 948 | - return object; | ||
| 949 | -} | ||
| 950 | - | ||
| 951 | -QPDFObjectHandle | ||
| 952 | -QPDF::readObjectInternal(PointerHolder<InputSource> input, | ||
| 953 | - int objid, int generation, | ||
| 954 | - bool in_object_stream, | ||
| 955 | - bool in_array, bool in_dictionary) | ||
| 956 | -{ | ||
| 957 | - if (in_dictionary && in_array) | ||
| 958 | - { | ||
| 959 | - // Although dictionaries and arrays arbitrarily nest, these | ||
| 960 | - // variables indicate what is at the top of the stack right | ||
| 961 | - // now, so they can, by definition, never both be true. | ||
| 962 | - throw std::logic_error( | ||
| 963 | - "INTERNAL ERROR: readObjectInternal: in_dict && in_array"); | ||
| 964 | - } | ||
| 965 | - | ||
| 966 | - QPDFObjectHandle object; | ||
| 967 | 955 | ||
| 968 | - qpdf_offset_t offset = input->tell(); | ||
| 969 | - std::vector<QPDFObjectHandle> olist; | ||
| 970 | - bool done = false; | ||
| 971 | - while (! done) | 956 | + bool empty = false; |
| 957 | + PointerHolder<StringDecrypter> decrypter_ph; | ||
| 958 | + StringDecrypter* decrypter = 0; | ||
| 959 | + if (this->encrypted && (! in_object_stream)) | ||
| 972 | { | 960 | { |
| 973 | - object = QPDFObjectHandle(); | ||
| 974 | - | ||
| 975 | - QPDFTokenizer::Token token = readToken(input); | ||
| 976 | - | ||
| 977 | - switch (token.getType()) | ||
| 978 | - { | ||
| 979 | - case QPDFTokenizer::tt_brace_open: | ||
| 980 | - case QPDFTokenizer::tt_brace_close: | ||
| 981 | - // Don't know what to do with these for now | ||
| 982 | - QTC::TC("qpdf", "QPDF bad brace"); | ||
| 983 | - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 984 | - this->last_object_description, | ||
| 985 | - input->getLastOffset(), | ||
| 986 | - "unexpected brace token"); | ||
| 987 | - break; | ||
| 988 | - | ||
| 989 | - case QPDFTokenizer::tt_array_close: | ||
| 990 | - if (in_array) | ||
| 991 | - { | ||
| 992 | - done = true; | ||
| 993 | - } | ||
| 994 | - else | ||
| 995 | - { | ||
| 996 | - QTC::TC("qpdf", "QPDF bad array close"); | ||
| 997 | - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 998 | - this->last_object_description, | ||
| 999 | - input->getLastOffset(), | ||
| 1000 | - "unexpected array close token"); | ||
| 1001 | - } | ||
| 1002 | - break; | ||
| 1003 | - | ||
| 1004 | - case QPDFTokenizer::tt_dict_close: | ||
| 1005 | - if (in_dictionary) | ||
| 1006 | - { | ||
| 1007 | - done = true; | ||
| 1008 | - } | ||
| 1009 | - else | ||
| 1010 | - { | ||
| 1011 | - QTC::TC("qpdf", "QPDF bad dictionary close"); | ||
| 1012 | - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 1013 | - this->last_object_description, | ||
| 1014 | - input->getLastOffset(), | ||
| 1015 | - "unexpected dictionary close token"); | ||
| 1016 | - } | ||
| 1017 | - break; | ||
| 1018 | - | ||
| 1019 | - case QPDFTokenizer::tt_array_open: | ||
| 1020 | - object = readObjectInternal( | ||
| 1021 | - input, objid, generation, in_object_stream, true, false); | ||
| 1022 | - break; | ||
| 1023 | - | ||
| 1024 | - case QPDFTokenizer::tt_dict_open: | ||
| 1025 | - object = readObjectInternal( | ||
| 1026 | - input, objid, generation, in_object_stream, false, true); | ||
| 1027 | - break; | ||
| 1028 | - | ||
| 1029 | - case QPDFTokenizer::tt_bool: | ||
| 1030 | - object = QPDFObjectHandle::newBool( | ||
| 1031 | - (token.getValue() == "true")); | ||
| 1032 | - break; | ||
| 1033 | - | ||
| 1034 | - case QPDFTokenizer::tt_null: | ||
| 1035 | - object = QPDFObjectHandle::newNull(); | ||
| 1036 | - break; | ||
| 1037 | - | ||
| 1038 | - case QPDFTokenizer::tt_integer: | ||
| 1039 | - object = QPDFObjectHandle::newInteger( | ||
| 1040 | - QUtil::string_to_ll(token.getValue().c_str())); | ||
| 1041 | - break; | ||
| 1042 | - | ||
| 1043 | - case QPDFTokenizer::tt_real: | ||
| 1044 | - object = QPDFObjectHandle::newReal(token.getValue()); | ||
| 1045 | - break; | ||
| 1046 | - | ||
| 1047 | - case QPDFTokenizer::tt_name: | ||
| 1048 | - object = QPDFObjectHandle::newName(token.getValue()); | ||
| 1049 | - break; | ||
| 1050 | - | ||
| 1051 | - case QPDFTokenizer::tt_word: | ||
| 1052 | - { | ||
| 1053 | - std::string const& value = token.getValue(); | ||
| 1054 | - if ((value == "R") && (in_array || in_dictionary) && | ||
| 1055 | - (olist.size() >= 2) && | ||
| 1056 | - (olist[olist.size() - 1].isInteger()) && | ||
| 1057 | - (olist[olist.size() - 2].isInteger())) | ||
| 1058 | - { | ||
| 1059 | - // Try to resolve indirect objects | ||
| 1060 | - object = QPDFObjectHandle::Factory::newIndirect( | ||
| 1061 | - this, | ||
| 1062 | - olist[olist.size() - 2].getIntValue(), | ||
| 1063 | - olist[olist.size() - 1].getIntValue()); | ||
| 1064 | - olist.pop_back(); | ||
| 1065 | - olist.pop_back(); | ||
| 1066 | - } | ||
| 1067 | - else if ((value == "endobj") && | ||
| 1068 | - (! (in_array || in_dictionary))) | ||
| 1069 | - { | ||
| 1070 | - // Nothing in the PDF spec appears to allow empty | ||
| 1071 | - // objects, but they have been encountered in | ||
| 1072 | - // actual PDF files and Adobe Reader appears to | ||
| 1073 | - // ignore them. | ||
| 1074 | - warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 1075 | - this->last_object_description, | ||
| 1076 | - input->getLastOffset(), | ||
| 1077 | - "empty object treated as null")); | ||
| 1078 | - object = QPDFObjectHandle::newNull(); | ||
| 1079 | - input->seek(input->getLastOffset(), SEEK_SET); | ||
| 1080 | - } | ||
| 1081 | - else | ||
| 1082 | - { | ||
| 1083 | - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 1084 | - this->last_object_description, | ||
| 1085 | - input->getLastOffset(), | ||
| 1086 | - "unknown token while reading object (" + | ||
| 1087 | - value + ")"); | ||
| 1088 | - } | ||
| 1089 | - } | ||
| 1090 | - break; | ||
| 1091 | - | ||
| 1092 | - case QPDFTokenizer::tt_string: | ||
| 1093 | - { | ||
| 1094 | - std::string val = token.getValue(); | ||
| 1095 | - if (this->encrypted && (! in_object_stream)) | ||
| 1096 | - { | ||
| 1097 | - decryptString(val, objid, generation); | ||
| 1098 | - } | ||
| 1099 | - object = QPDFObjectHandle::newString(val); | ||
| 1100 | - } | ||
| 1101 | - break; | ||
| 1102 | - | ||
| 1103 | - default: | ||
| 1104 | - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 1105 | - this->last_object_description, | ||
| 1106 | - input->getLastOffset(), | ||
| 1107 | - "unknown token type while reading object"); | ||
| 1108 | - break; | ||
| 1109 | - } | ||
| 1110 | - | ||
| 1111 | - if (in_dictionary || in_array) | ||
| 1112 | - { | ||
| 1113 | - if (! done) | ||
| 1114 | - { | ||
| 1115 | - olist.push_back(object); | ||
| 1116 | - } | ||
| 1117 | - } | ||
| 1118 | - else if (! object.isInitialized()) | ||
| 1119 | - { | ||
| 1120 | - throw std::logic_error( | ||
| 1121 | - "INTERNAL ERROR: uninitialized object (token = " + | ||
| 1122 | - QUtil::int_to_string(token.getType()) + | ||
| 1123 | - ", " + token.getValue() + ")"); | ||
| 1124 | - } | ||
| 1125 | - else | ||
| 1126 | - { | ||
| 1127 | - done = true; | ||
| 1128 | - } | 961 | + decrypter_ph = new StringDecrypter(this, objid, generation); |
| 962 | + decrypter = decrypter_ph.getPointer(); | ||
| 1129 | } | 963 | } |
| 1130 | - | ||
| 1131 | - if (in_array) | 964 | + QPDFObjectHandle object = QPDFObjectHandle::parse( |
| 965 | + input, description, this->tokenizer, empty, decrypter, this); | ||
| 966 | + if (empty) | ||
| 1132 | { | 967 | { |
| 1133 | - object = QPDFObjectHandle::newArray(olist); | 968 | + // Nothing in the PDF spec appears to allow empty objects, but |
| 969 | + // they have been encountered in actual PDF files and Adobe | ||
| 970 | + // Reader appears to ignore them. | ||
| 971 | + warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 972 | + this->last_object_description, | ||
| 973 | + input->getLastOffset(), | ||
| 974 | + "empty object treated as null")); | ||
| 1134 | } | 975 | } |
| 1135 | - else if (in_dictionary) | 976 | + else if (object.isDictionary() && (! in_object_stream)) |
| 1136 | { | 977 | { |
| 1137 | - // Convert list to map. Alternating elements are keys. | ||
| 1138 | - std::map<std::string, QPDFObjectHandle> dict; | ||
| 1139 | - if (olist.size() % 2) | ||
| 1140 | - { | ||
| 1141 | - QTC::TC("qpdf", "QPDF dictionary odd number of elements"); | ||
| 1142 | - throw QPDFExc( | ||
| 1143 | - qpdf_e_damaged_pdf, input->getName(), | ||
| 1144 | - this->last_object_description, input->getLastOffset(), | ||
| 1145 | - "dictionary ending here has an odd number of elements"); | ||
| 1146 | - } | ||
| 1147 | - for (unsigned int i = 0; i < olist.size(); i += 2) | ||
| 1148 | - { | ||
| 1149 | - QPDFObjectHandle key_obj = olist[i]; | ||
| 1150 | - QPDFObjectHandle val = olist[i + 1]; | ||
| 1151 | - if (! key_obj.isName()) | ||
| 1152 | - { | ||
| 1153 | - throw QPDFExc( | ||
| 1154 | - qpdf_e_damaged_pdf, | ||
| 1155 | - input->getName(), this->last_object_description, offset, | ||
| 1156 | - std::string("dictionary key not name (") + | ||
| 1157 | - key_obj.unparse() + ")"); | ||
| 1158 | - } | ||
| 1159 | - dict[key_obj.getName()] = val; | ||
| 1160 | - } | ||
| 1161 | - object = QPDFObjectHandle::newDictionary(dict); | 978 | + // check for stream |
| 979 | + qpdf_offset_t cur_offset = input->tell(); | ||
| 980 | + if (readToken(input) == | ||
| 981 | + QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream")) | ||
| 982 | + { | ||
| 983 | + // The PDF specification states that the word "stream" | ||
| 984 | + // should be followed by either a carriage return and | ||
| 985 | + // a newline or by a newline alone. It specifically | ||
| 986 | + // disallowed following it by a carriage return alone | ||
| 987 | + // since, in that case, there would be no way to tell | ||
| 988 | + // whether the NL in a CR NL sequence was part of the | ||
| 989 | + // stream data. However, some readers, including | ||
| 990 | + // Adobe reader, accept a carriage return by itself | ||
| 991 | + // when followed by a non-newline character, so that's | ||
| 992 | + // what we do here. | ||
| 993 | + { | ||
| 994 | + char ch; | ||
| 995 | + if (input->read(&ch, 1) == 0) | ||
| 996 | + { | ||
| 997 | + // A premature EOF here will result in some | ||
| 998 | + // other problem that will get reported at | ||
| 999 | + // another time. | ||
| 1000 | + } | ||
| 1001 | + else if (ch == '\n') | ||
| 1002 | + { | ||
| 1003 | + // ready to read stream data | ||
| 1004 | + QTC::TC("qpdf", "QPDF stream with NL only"); | ||
| 1005 | + } | ||
| 1006 | + else if (ch == '\r') | ||
| 1007 | + { | ||
| 1008 | + // Read another character | ||
| 1009 | + if (input->read(&ch, 1) != 0) | ||
| 1010 | + { | ||
| 1011 | + if (ch == '\n') | ||
| 1012 | + { | ||
| 1013 | + // Ready to read stream data | ||
| 1014 | + QTC::TC("qpdf", "QPDF stream with CRNL"); | ||
| 1015 | + } | ||
| 1016 | + else | ||
| 1017 | + { | ||
| 1018 | + // Treat the \r by itself as the | ||
| 1019 | + // whitespace after endstream and | ||
| 1020 | + // start reading stream data in spite | ||
| 1021 | + // of not having seen a newline. | ||
| 1022 | + QTC::TC("qpdf", "QPDF stream with CR only"); | ||
| 1023 | + input->unreadCh(ch); | ||
| 1024 | + warn(QPDFExc( | ||
| 1025 | + qpdf_e_damaged_pdf, | ||
| 1026 | + input->getName(), | ||
| 1027 | + this->last_object_description, | ||
| 1028 | + input->tell(), | ||
| 1029 | + "stream keyword followed" | ||
| 1030 | + " by carriage return only")); | ||
| 1031 | + } | ||
| 1032 | + } | ||
| 1033 | + } | ||
| 1034 | + else | ||
| 1035 | + { | ||
| 1036 | + QTC::TC("qpdf", "QPDF stream without newline"); | ||
| 1037 | + warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 1038 | + this->last_object_description, | ||
| 1039 | + input->tell(), | ||
| 1040 | + "stream keyword not followed" | ||
| 1041 | + " by proper line terminator")); | ||
| 1042 | + } | ||
| 1043 | + } | ||
| 1162 | 1044 | ||
| 1163 | - if (! in_object_stream) | ||
| 1164 | - { | ||
| 1165 | - // check for stream | ||
| 1166 | - qpdf_offset_t cur_offset = input->tell(); | ||
| 1167 | - if (readToken(input) == | ||
| 1168 | - QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream")) | ||
| 1169 | - { | ||
| 1170 | - // The PDF specification states that the word "stream" | ||
| 1171 | - // should be followed by either a carriage return and | ||
| 1172 | - // a newline or by a newline alone. It specifically | ||
| 1173 | - // disallowed following it by a carriage return alone | ||
| 1174 | - // since, in that case, there would be no way to tell | ||
| 1175 | - // whether the NL in a CR NL sequence was part of the | ||
| 1176 | - // stream data. However, some readers, including | ||
| 1177 | - // Adobe reader, accept a carriage return by itself | ||
| 1178 | - // when followed by a non-newline character, so that's | ||
| 1179 | - // what we do here. | ||
| 1180 | - { | ||
| 1181 | - char ch; | ||
| 1182 | - if (input->read(&ch, 1) == 0) | ||
| 1183 | - { | ||
| 1184 | - // A premature EOF here will result in some | ||
| 1185 | - // other problem that will get reported at | ||
| 1186 | - // another time. | ||
| 1187 | - } | ||
| 1188 | - else if (ch == '\n') | ||
| 1189 | - { | ||
| 1190 | - // ready to read stream data | ||
| 1191 | - QTC::TC("qpdf", "QPDF stream with NL only"); | ||
| 1192 | - } | ||
| 1193 | - else if (ch == '\r') | ||
| 1194 | - { | ||
| 1195 | - // Read another character | ||
| 1196 | - if (input->read(&ch, 1) != 0) | ||
| 1197 | - { | ||
| 1198 | - if (ch == '\n') | ||
| 1199 | - { | ||
| 1200 | - // Ready to read stream data | ||
| 1201 | - QTC::TC("qpdf", "QPDF stream with CRNL"); | ||
| 1202 | - } | ||
| 1203 | - else | ||
| 1204 | - { | ||
| 1205 | - // Treat the \r by itself as the | ||
| 1206 | - // whitespace after endstream and | ||
| 1207 | - // start reading stream data in spite | ||
| 1208 | - // of not having seen a newline. | ||
| 1209 | - QTC::TC("qpdf", "QPDF stream with CR only"); | ||
| 1210 | - input->unreadCh(ch); | ||
| 1211 | - warn(QPDFExc( | ||
| 1212 | - qpdf_e_damaged_pdf, | ||
| 1213 | - input->getName(), | ||
| 1214 | - this->last_object_description, | ||
| 1215 | - input->tell(), | ||
| 1216 | - "stream keyword followed" | ||
| 1217 | - " by carriage return only")); | ||
| 1218 | - } | ||
| 1219 | - } | ||
| 1220 | - } | ||
| 1221 | - else | ||
| 1222 | - { | ||
| 1223 | - QTC::TC("qpdf", "QPDF stream without newline"); | ||
| 1224 | - warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 1225 | - this->last_object_description, | ||
| 1226 | - input->tell(), | ||
| 1227 | - "stream keyword not followed" | ||
| 1228 | - " by proper line terminator")); | ||
| 1229 | - } | ||
| 1230 | - } | 1045 | + // Must get offset before accessing any additional |
| 1046 | + // objects since resolving a previously unresolved | ||
| 1047 | + // indirect object will change file position. | ||
| 1048 | + qpdf_offset_t stream_offset = input->tell(); | ||
| 1049 | + size_t length = 0; | ||
| 1231 | 1050 | ||
| 1232 | - // Must get offset before accessing any additional | ||
| 1233 | - // objects since resolving a previously unresolved | ||
| 1234 | - // indirect object will change file position. | ||
| 1235 | - qpdf_offset_t stream_offset = input->tell(); | ||
| 1236 | - size_t length = 0; | 1051 | + try |
| 1052 | + { | ||
| 1053 | + std::map<std::string, QPDFObjectHandle> dict = | ||
| 1054 | + object.getDictAsMap(); | ||
| 1237 | 1055 | ||
| 1238 | - try | ||
| 1239 | - { | ||
| 1240 | - if (dict.count("/Length") == 0) | ||
| 1241 | - { | ||
| 1242 | - QTC::TC("qpdf", "QPDF stream without length"); | ||
| 1243 | - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 1244 | - this->last_object_description, offset, | ||
| 1245 | - "stream dictionary lacks /Length key"); | ||
| 1246 | - } | 1056 | + if (dict.count("/Length") == 0) |
| 1057 | + { | ||
| 1058 | + QTC::TC("qpdf", "QPDF stream without length"); | ||
| 1059 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 1060 | + this->last_object_description, offset, | ||
| 1061 | + "stream dictionary lacks /Length key"); | ||
| 1062 | + } | ||
| 1247 | 1063 | ||
| 1248 | - QPDFObjectHandle length_obj = dict["/Length"]; | ||
| 1249 | - if (! length_obj.isInteger()) | ||
| 1250 | - { | ||
| 1251 | - QTC::TC("qpdf", "QPDF stream length not integer"); | ||
| 1252 | - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 1253 | - this->last_object_description, offset, | ||
| 1254 | - "/Length key in stream dictionary is not " | ||
| 1255 | - "an integer"); | ||
| 1256 | - } | 1064 | + QPDFObjectHandle length_obj = dict["/Length"]; |
| 1065 | + if (! length_obj.isInteger()) | ||
| 1066 | + { | ||
| 1067 | + QTC::TC("qpdf", "QPDF stream length not integer"); | ||
| 1068 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 1069 | + this->last_object_description, offset, | ||
| 1070 | + "/Length key in stream dictionary is not " | ||
| 1071 | + "an integer"); | ||
| 1072 | + } | ||
| 1257 | 1073 | ||
| 1258 | - length = length_obj.getIntValue(); | ||
| 1259 | - input->seek( | ||
| 1260 | - stream_offset + (qpdf_offset_t)length, SEEK_SET); | ||
| 1261 | - if (! (readToken(input) == | ||
| 1262 | - QPDFTokenizer::Token( | ||
| 1263 | - QPDFTokenizer::tt_word, "endstream"))) | ||
| 1264 | - { | ||
| 1265 | - QTC::TC("qpdf", "QPDF missing endstream"); | ||
| 1266 | - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 1267 | - this->last_object_description, | ||
| 1268 | - input->getLastOffset(), | ||
| 1269 | - "expected endstream"); | ||
| 1270 | - } | ||
| 1271 | - } | ||
| 1272 | - catch (QPDFExc& e) | ||
| 1273 | - { | ||
| 1274 | - if (this->attempt_recovery) | ||
| 1275 | - { | ||
| 1276 | - // may throw an exception | ||
| 1277 | - length = recoverStreamLength( | ||
| 1278 | - input, objid, generation, stream_offset); | ||
| 1279 | - } | ||
| 1280 | - else | ||
| 1281 | - { | ||
| 1282 | - throw e; | ||
| 1283 | - } | ||
| 1284 | - } | ||
| 1285 | - object = QPDFObjectHandle::Factory::newStream( | ||
| 1286 | - this, objid, generation, object, stream_offset, length); | ||
| 1287 | - } | ||
| 1288 | - else | ||
| 1289 | - { | ||
| 1290 | - input->seek(cur_offset, SEEK_SET); | ||
| 1291 | - } | ||
| 1292 | - } | 1074 | + length = length_obj.getIntValue(); |
| 1075 | + input->seek( | ||
| 1076 | + stream_offset + (qpdf_offset_t)length, SEEK_SET); | ||
| 1077 | + if (! (readToken(input) == | ||
| 1078 | + QPDFTokenizer::Token( | ||
| 1079 | + QPDFTokenizer::tt_word, "endstream"))) | ||
| 1080 | + { | ||
| 1081 | + QTC::TC("qpdf", "QPDF missing endstream"); | ||
| 1082 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 1083 | + this->last_object_description, | ||
| 1084 | + input->getLastOffset(), | ||
| 1085 | + "expected endstream"); | ||
| 1086 | + } | ||
| 1087 | + } | ||
| 1088 | + catch (QPDFExc& e) | ||
| 1089 | + { | ||
| 1090 | + if (this->attempt_recovery) | ||
| 1091 | + { | ||
| 1092 | + // may throw an exception | ||
| 1093 | + length = recoverStreamLength( | ||
| 1094 | + input, objid, generation, stream_offset); | ||
| 1095 | + } | ||
| 1096 | + else | ||
| 1097 | + { | ||
| 1098 | + throw e; | ||
| 1099 | + } | ||
| 1100 | + } | ||
| 1101 | + object = QPDFObjectHandle::Factory::newStream( | ||
| 1102 | + this, objid, generation, object, stream_offset, length); | ||
| 1103 | + } | ||
| 1104 | + else | ||
| 1105 | + { | ||
| 1106 | + input->seek(cur_offset, SEEK_SET); | ||
| 1107 | + } | ||
| 1293 | } | 1108 | } |
| 1294 | 1109 | ||
| 1110 | + // Override last_offset so that it points to the beginning of the | ||
| 1111 | + // object we just read | ||
| 1112 | + input->setLastOffset(offset); | ||
| 1295 | return object; | 1113 | return object; |
| 1296 | } | 1114 | } |
| 1297 | 1115 |
libqpdf/QPDFObjectHandle.cc
| @@ -11,12 +11,15 @@ | @@ -11,12 +11,15 @@ | ||
| 11 | #include <qpdf/QPDF_Dictionary.hh> | 11 | #include <qpdf/QPDF_Dictionary.hh> |
| 12 | #include <qpdf/QPDF_Stream.hh> | 12 | #include <qpdf/QPDF_Stream.hh> |
| 13 | #include <qpdf/QPDF_Reserved.hh> | 13 | #include <qpdf/QPDF_Reserved.hh> |
| 14 | +#include <qpdf/BufferInputSource.hh> | ||
| 15 | +#include <qpdf/QPDFExc.hh> | ||
| 14 | 16 | ||
| 15 | #include <qpdf/QTC.hh> | 17 | #include <qpdf/QTC.hh> |
| 16 | #include <qpdf/QUtil.hh> | 18 | #include <qpdf/QUtil.hh> |
| 17 | 19 | ||
| 18 | #include <stdexcept> | 20 | #include <stdexcept> |
| 19 | #include <stdlib.h> | 21 | #include <stdlib.h> |
| 22 | +#include <ctype.h> | ||
| 20 | 23 | ||
| 21 | QPDFObjectHandle::QPDFObjectHandle() : | 24 | QPDFObjectHandle::QPDFObjectHandle() : |
| 22 | initialized(false), | 25 | initialized(false), |
| @@ -398,6 +401,13 @@ QPDFObjectHandle::getDict() | @@ -398,6 +401,13 @@ QPDFObjectHandle::getDict() | ||
| 398 | return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getDict(); | 401 | return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getDict(); |
| 399 | } | 402 | } |
| 400 | 403 | ||
| 404 | +void | ||
| 405 | +QPDFObjectHandle::replaceDict(QPDFObjectHandle new_dict) | ||
| 406 | +{ | ||
| 407 | + assertStream(); | ||
| 408 | + dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceDict(new_dict); | ||
| 409 | +} | ||
| 410 | + | ||
| 401 | PointerHolder<Buffer> | 411 | PointerHolder<Buffer> |
| 402 | QPDFObjectHandle::getStreamData() | 412 | QPDFObjectHandle::getStreamData() |
| 403 | { | 413 | { |
| @@ -599,6 +609,265 @@ QPDFObjectHandle::unparseResolved() | @@ -599,6 +609,265 @@ QPDFObjectHandle::unparseResolved() | ||
| 599 | } | 609 | } |
| 600 | 610 | ||
| 601 | QPDFObjectHandle | 611 | QPDFObjectHandle |
| 612 | +QPDFObjectHandle::parse(std::string const& object_str, | ||
| 613 | + std::string const& object_description) | ||
| 614 | +{ | ||
| 615 | + PointerHolder<InputSource> input = | ||
| 616 | + new BufferInputSource("parsed object", object_str); | ||
| 617 | + QPDFTokenizer tokenizer; | ||
| 618 | + bool empty = false; | ||
| 619 | + QPDFObjectHandle result = | ||
| 620 | + parse(input, object_description, tokenizer, empty, 0, 0); | ||
| 621 | + size_t offset = (size_t) input->tell(); | ||
| 622 | + while (offset < object_str.length()) | ||
| 623 | + { | ||
| 624 | + if (! isspace(object_str[offset])) | ||
| 625 | + { | ||
| 626 | + QTC::TC("qpdf", "QPDFObjectHandle trailing data in parse"); | ||
| 627 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 628 | + object_description, | ||
| 629 | + input->getLastOffset(), | ||
| 630 | + "trailing data found parsing object from string"); | ||
| 631 | + } | ||
| 632 | + ++offset; | ||
| 633 | + } | ||
| 634 | + return result; | ||
| 635 | +} | ||
| 636 | + | ||
| 637 | +QPDFObjectHandle | ||
| 638 | +QPDFObjectHandle::parse(PointerHolder<InputSource> input, | ||
| 639 | + std::string const& object_description, | ||
| 640 | + QPDFTokenizer& tokenizer, bool& empty, | ||
| 641 | + StringDecrypter* decrypter, QPDF* context) | ||
| 642 | +{ | ||
| 643 | + return parseInternal(input, object_description, tokenizer, empty, | ||
| 644 | + decrypter, context, false, false); | ||
| 645 | +} | ||
| 646 | + | ||
| 647 | +QPDFObjectHandle | ||
| 648 | +QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input, | ||
| 649 | + std::string const& object_description, | ||
| 650 | + QPDFTokenizer& tokenizer, bool& empty, | ||
| 651 | + StringDecrypter* decrypter, QPDF* context, | ||
| 652 | + bool in_array, bool in_dictionary) | ||
| 653 | +{ | ||
| 654 | + empty = false; | ||
| 655 | + if (in_dictionary && in_array) | ||
| 656 | + { | ||
| 657 | + // Although dictionaries and arrays arbitrarily nest, these | ||
| 658 | + // variables indicate what is at the top of the stack right | ||
| 659 | + // now, so they can, by definition, never both be true. | ||
| 660 | + throw std::logic_error( | ||
| 661 | + "INTERNAL ERROR: parseInternal: in_dict && in_array"); | ||
| 662 | + } | ||
| 663 | + | ||
| 664 | + QPDFObjectHandle object; | ||
| 665 | + | ||
| 666 | + qpdf_offset_t offset = input->tell(); | ||
| 667 | + std::vector<QPDFObjectHandle> olist; | ||
| 668 | + bool done = false; | ||
| 669 | + while (! done) | ||
| 670 | + { | ||
| 671 | + object = QPDFObjectHandle(); | ||
| 672 | + | ||
| 673 | + QPDFTokenizer::Token token = | ||
| 674 | + tokenizer.readToken(input, object_description); | ||
| 675 | + | ||
| 676 | + switch (token.getType()) | ||
| 677 | + { | ||
| 678 | + case QPDFTokenizer::tt_brace_open: | ||
| 679 | + case QPDFTokenizer::tt_brace_close: | ||
| 680 | + // Don't know what to do with these for now | ||
| 681 | + QTC::TC("qpdf", "QPDFObjectHandle bad brace"); | ||
| 682 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 683 | + object_description, | ||
| 684 | + input->getLastOffset(), | ||
| 685 | + "unexpected brace token"); | ||
| 686 | + break; | ||
| 687 | + | ||
| 688 | + case QPDFTokenizer::tt_array_close: | ||
| 689 | + if (in_array) | ||
| 690 | + { | ||
| 691 | + done = true; | ||
| 692 | + } | ||
| 693 | + else | ||
| 694 | + { | ||
| 695 | + QTC::TC("qpdf", "QPDFObjectHandle bad array close"); | ||
| 696 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 697 | + object_description, | ||
| 698 | + input->getLastOffset(), | ||
| 699 | + "unexpected array close token"); | ||
| 700 | + } | ||
| 701 | + break; | ||
| 702 | + | ||
| 703 | + case QPDFTokenizer::tt_dict_close: | ||
| 704 | + if (in_dictionary) | ||
| 705 | + { | ||
| 706 | + done = true; | ||
| 707 | + } | ||
| 708 | + else | ||
| 709 | + { | ||
| 710 | + QTC::TC("qpdf", "QPDFObjectHandle bad dictionary close"); | ||
| 711 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 712 | + object_description, | ||
| 713 | + input->getLastOffset(), | ||
| 714 | + "unexpected dictionary close token"); | ||
| 715 | + } | ||
| 716 | + break; | ||
| 717 | + | ||
| 718 | + case QPDFTokenizer::tt_array_open: | ||
| 719 | + object = parseInternal( | ||
| 720 | + input, object_description, tokenizer, empty, | ||
| 721 | + decrypter, context, true, false); | ||
| 722 | + break; | ||
| 723 | + | ||
| 724 | + case QPDFTokenizer::tt_dict_open: | ||
| 725 | + object = parseInternal( | ||
| 726 | + input, object_description, tokenizer, empty, | ||
| 727 | + decrypter, context, false, true); | ||
| 728 | + break; | ||
| 729 | + | ||
| 730 | + case QPDFTokenizer::tt_bool: | ||
| 731 | + object = newBool((token.getValue() == "true")); | ||
| 732 | + break; | ||
| 733 | + | ||
| 734 | + case QPDFTokenizer::tt_null: | ||
| 735 | + object = newNull(); | ||
| 736 | + break; | ||
| 737 | + | ||
| 738 | + case QPDFTokenizer::tt_integer: | ||
| 739 | + object = newInteger(QUtil::string_to_ll(token.getValue().c_str())); | ||
| 740 | + break; | ||
| 741 | + | ||
| 742 | + case QPDFTokenizer::tt_real: | ||
| 743 | + object = newReal(token.getValue()); | ||
| 744 | + break; | ||
| 745 | + | ||
| 746 | + case QPDFTokenizer::tt_name: | ||
| 747 | + object = newName(token.getValue()); | ||
| 748 | + break; | ||
| 749 | + | ||
| 750 | + case QPDFTokenizer::tt_word: | ||
| 751 | + { | ||
| 752 | + std::string const& value = token.getValue(); | ||
| 753 | + if ((value == "R") && (in_array || in_dictionary) && | ||
| 754 | + (olist.size() >= 2) && | ||
| 755 | + (olist[olist.size() - 1].isInteger()) && | ||
| 756 | + (olist[olist.size() - 2].isInteger())) | ||
| 757 | + { | ||
| 758 | + if (context == 0) | ||
| 759 | + { | ||
| 760 | + QTC::TC("qpdf", "QPDFObjectHandle indirect without context"); | ||
| 761 | + throw std::logic_error( | ||
| 762 | + "QPDFObjectHandle::parse called without context" | ||
| 763 | + " on an object with indirect references"); | ||
| 764 | + } | ||
| 765 | + // Try to resolve indirect objects | ||
| 766 | + object = newIndirect( | ||
| 767 | + context, | ||
| 768 | + olist[olist.size() - 2].getIntValue(), | ||
| 769 | + olist[olist.size() - 1].getIntValue()); | ||
| 770 | + olist.pop_back(); | ||
| 771 | + olist.pop_back(); | ||
| 772 | + } | ||
| 773 | + else if ((value == "endobj") && | ||
| 774 | + (! (in_array || in_dictionary))) | ||
| 775 | + { | ||
| 776 | + // We just saw endobj without having read | ||
| 777 | + // anything. Treat this as a null and do not move | ||
| 778 | + // the input source's offset. | ||
| 779 | + object = newNull(); | ||
| 780 | + input->seek(input->getLastOffset(), SEEK_SET); | ||
| 781 | + empty = true; | ||
| 782 | + } | ||
| 783 | + else | ||
| 784 | + { | ||
| 785 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 786 | + object_description, | ||
| 787 | + input->getLastOffset(), | ||
| 788 | + "unknown token while reading object (" + | ||
| 789 | + value + ")"); | ||
| 790 | + } | ||
| 791 | + } | ||
| 792 | + break; | ||
| 793 | + | ||
| 794 | + case QPDFTokenizer::tt_string: | ||
| 795 | + { | ||
| 796 | + std::string val = token.getValue(); | ||
| 797 | + if (decrypter) | ||
| 798 | + { | ||
| 799 | + decrypter->decryptString(val); | ||
| 800 | + } | ||
| 801 | + object = QPDFObjectHandle::newString(val); | ||
| 802 | + } | ||
| 803 | + | ||
| 804 | + break; | ||
| 805 | + | ||
| 806 | + default: | ||
| 807 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 808 | + object_description, | ||
| 809 | + input->getLastOffset(), | ||
| 810 | + "unknown token type while reading object"); | ||
| 811 | + break; | ||
| 812 | + } | ||
| 813 | + | ||
| 814 | + if (in_dictionary || in_array) | ||
| 815 | + { | ||
| 816 | + if (! done) | ||
| 817 | + { | ||
| 818 | + olist.push_back(object); | ||
| 819 | + } | ||
| 820 | + } | ||
| 821 | + else if (! object.isInitialized()) | ||
| 822 | + { | ||
| 823 | + throw std::logic_error( | ||
| 824 | + "INTERNAL ERROR: uninitialized object (token = " + | ||
| 825 | + QUtil::int_to_string(token.getType()) + | ||
| 826 | + ", " + token.getValue() + ")"); | ||
| 827 | + } | ||
| 828 | + else | ||
| 829 | + { | ||
| 830 | + done = true; | ||
| 831 | + } | ||
| 832 | + } | ||
| 833 | + | ||
| 834 | + if (in_array) | ||
| 835 | + { | ||
| 836 | + object = newArray(olist); | ||
| 837 | + } | ||
| 838 | + else if (in_dictionary) | ||
| 839 | + { | ||
| 840 | + // Convert list to map. Alternating elements are keys. | ||
| 841 | + std::map<std::string, QPDFObjectHandle> dict; | ||
| 842 | + if (olist.size() % 2) | ||
| 843 | + { | ||
| 844 | + QTC::TC("qpdf", "QPDFObjectHandle dictionary odd number of elements"); | ||
| 845 | + throw QPDFExc( | ||
| 846 | + qpdf_e_damaged_pdf, input->getName(), | ||
| 847 | + object_description, input->getLastOffset(), | ||
| 848 | + "dictionary ending here has an odd number of elements"); | ||
| 849 | + } | ||
| 850 | + for (unsigned int i = 0; i < olist.size(); i += 2) | ||
| 851 | + { | ||
| 852 | + QPDFObjectHandle key_obj = olist[i]; | ||
| 853 | + QPDFObjectHandle val = olist[i + 1]; | ||
| 854 | + if (! key_obj.isName()) | ||
| 855 | + { | ||
| 856 | + throw QPDFExc( | ||
| 857 | + qpdf_e_damaged_pdf, | ||
| 858 | + input->getName(), object_description, offset, | ||
| 859 | + std::string("dictionary key not name (") + | ||
| 860 | + key_obj.unparse() + ")"); | ||
| 861 | + } | ||
| 862 | + dict[key_obj.getName()] = val; | ||
| 863 | + } | ||
| 864 | + object = newDictionary(dict); | ||
| 865 | + } | ||
| 866 | + | ||
| 867 | + return object; | ||
| 868 | +} | ||
| 869 | + | ||
| 870 | +QPDFObjectHandle | ||
| 602 | QPDFObjectHandle::newIndirect(QPDF* qpdf, int objid, int generation) | 871 | QPDFObjectHandle::newIndirect(QPDF* qpdf, int objid, int generation) |
| 603 | { | 872 | { |
| 604 | return QPDFObjectHandle(qpdf, objid, generation); | 873 | return QPDFObjectHandle(qpdf, objid, generation); |
libqpdf/QPDF_Stream.cc
| @@ -464,3 +464,18 @@ QPDF_Stream::replaceFilterData(QPDFObjectHandle const& filter, | @@ -464,3 +464,18 @@ QPDF_Stream::replaceFilterData(QPDFObjectHandle const& filter, | ||
| 464 | "/Length", QPDFObjectHandle::newInteger((int)length)); | 464 | "/Length", QPDFObjectHandle::newInteger((int)length)); |
| 465 | } | 465 | } |
| 466 | } | 466 | } |
| 467 | + | ||
| 468 | +void | ||
| 469 | +QPDF_Stream::replaceDict(QPDFObjectHandle new_dict) | ||
| 470 | +{ | ||
| 471 | + this->stream_dict = new_dict; | ||
| 472 | + QPDFObjectHandle length_obj = new_dict.getKey("/Length"); | ||
| 473 | + if (length_obj.isInteger()) | ||
| 474 | + { | ||
| 475 | + this->length = length_obj.getIntValue(); | ||
| 476 | + } | ||
| 477 | + else | ||
| 478 | + { | ||
| 479 | + this->length = 0; | ||
| 480 | + } | ||
| 481 | +} |
libqpdf/qpdf/QPDF_Stream.hh
| @@ -32,6 +32,8 @@ class QPDF_Stream: public QPDFObject | @@ -32,6 +32,8 @@ class QPDF_Stream: public QPDFObject | ||
| 32 | QPDFObjectHandle const& filter, | 32 | QPDFObjectHandle const& filter, |
| 33 | QPDFObjectHandle const& decode_parms); | 33 | QPDFObjectHandle const& decode_parms); |
| 34 | 34 | ||
| 35 | + void replaceDict(QPDFObjectHandle new_dict); | ||
| 36 | + | ||
| 35 | // Replace object ID and generation. This may only be called if | 37 | // Replace object ID and generation. This may only be called if |
| 36 | // object ID and generation are 0. It is used by QPDFObjectHandle | 38 | // object ID and generation are 0. It is used by QPDFObjectHandle |
| 37 | // when adding streams to files. | 39 | // when adding streams to files. |
qpdf/pdf_from_scratch.cc
| @@ -38,25 +38,20 @@ void runtest(int n) | @@ -38,25 +38,20 @@ void runtest(int n) | ||
| 38 | // Create a minimal PDF from scratch. | 38 | // Create a minimal PDF from scratch. |
| 39 | 39 | ||
| 40 | QPDFObjectHandle font = pdf.makeIndirectObject( | 40 | QPDFObjectHandle font = pdf.makeIndirectObject( |
| 41 | - QPDFObjectHandle::newDictionary()); | ||
| 42 | - font.replaceKey("/Type", newName("/Font")); | ||
| 43 | - font.replaceKey("/Subtype", newName("/Type1")); | ||
| 44 | - font.replaceKey("/Name", newName("/F1")); | ||
| 45 | - font.replaceKey("/BaseFont", newName("/Helvetica")); | ||
| 46 | - font.replaceKey("/Encoding", newName("/WinAnsiEncoding")); | 41 | + QPDFObjectHandle::parse("<<" |
| 42 | + " /Type /Font" | ||
| 43 | + " /Subtype /Type1" | ||
| 44 | + " /Name /F1" | ||
| 45 | + " /BaseFont /Helvetica" | ||
| 46 | + " /Encoding /WinAnsiEncoding" | ||
| 47 | + ">>")); | ||
| 47 | 48 | ||
| 48 | QPDFObjectHandle procset = pdf.makeIndirectObject( | 49 | QPDFObjectHandle procset = pdf.makeIndirectObject( |
| 49 | - QPDFObjectHandle::newArray()); | ||
| 50 | - procset.appendItem(newName("/PDF")); | ||
| 51 | - procset.appendItem(newName("/Text")); | 50 | + QPDFObjectHandle::parse("[/PDF /Text]")); |
| 52 | 51 | ||
| 53 | QPDFObjectHandle contents = createPageContents(pdf, "First Page"); | 52 | QPDFObjectHandle contents = createPageContents(pdf, "First Page"); |
| 54 | 53 | ||
| 55 | - QPDFObjectHandle mediabox = QPDFObjectHandle::newArray(); | ||
| 56 | - mediabox.appendItem(QPDFObjectHandle::newInteger(0)); | ||
| 57 | - mediabox.appendItem(QPDFObjectHandle::newInteger(0)); | ||
| 58 | - mediabox.appendItem(QPDFObjectHandle::newInteger(612)); | ||
| 59 | - mediabox.appendItem(QPDFObjectHandle::newInteger(792)); | 54 | + QPDFObjectHandle mediabox = QPDFObjectHandle::parse("[0 0 612 792]"); |
| 60 | 55 | ||
| 61 | QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary(); | 56 | QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary(); |
| 62 | rfont.replaceKey("/F1", font); | 57 | rfont.replaceKey("/F1", font); |
qpdf/qpdf.testcov
| @@ -60,13 +60,13 @@ QPDF missing trailer 0 | @@ -60,13 +60,13 @@ QPDF missing trailer 0 | ||
| 60 | QPDF trailer lacks size 0 | 60 | QPDF trailer lacks size 0 |
| 61 | QPDF trailer size not integer 0 | 61 | QPDF trailer size not integer 0 |
| 62 | QPDF trailer prev not integer 0 | 62 | QPDF trailer prev not integer 0 |
| 63 | -QPDF bad brace 0 | ||
| 64 | -QPDF bad array close 0 | ||
| 65 | -QPDF dictionary odd number of elements 0 | 63 | +QPDFObjectHandle bad brace 0 |
| 64 | +QPDFObjectHandle bad array close 0 | ||
| 65 | +QPDFObjectHandle dictionary odd number of elements 0 | ||
| 66 | QPDF stream without length 0 | 66 | QPDF stream without length 0 |
| 67 | QPDF stream length not integer 0 | 67 | QPDF stream length not integer 0 |
| 68 | QPDF missing endstream 0 | 68 | QPDF missing endstream 0 |
| 69 | -QPDF bad dictionary close 0 | 69 | +QPDFObjectHandle bad dictionary close 0 |
| 70 | QPDF can't find xref 0 | 70 | QPDF can't find xref 0 |
| 71 | QPDF_Tokenizer bad ) 0 | 71 | QPDF_Tokenizer bad ) 0 |
| 72 | QPDF_Tokenizer bad > 0 | 72 | QPDF_Tokenizer bad > 0 |
| @@ -235,3 +235,5 @@ QPDF not copying pages object 0 | @@ -235,3 +235,5 @@ QPDF not copying pages object 0 | ||
| 235 | QPDF insert foreign page 0 | 235 | QPDF insert foreign page 0 |
| 236 | QPDFWriter foreign object 0 | 236 | QPDFWriter foreign object 0 |
| 237 | QPDFWriter copy use_aes 1 | 237 | QPDFWriter copy use_aes 1 |
| 238 | +QPDFObjectHandle indirect without context 0 | ||
| 239 | +QPDFObjectHandle trailing data in parse 0 |
qpdf/qtest/qpdf.test
| @@ -149,7 +149,7 @@ $td->runtest("remove page we don't have", | @@ -149,7 +149,7 @@ $td->runtest("remove page we don't have", | ||
| 149 | $td->NORMALIZE_NEWLINES); | 149 | $td->NORMALIZE_NEWLINES); |
| 150 | # ---------- | 150 | # ---------- |
| 151 | $td->notify("--- Miscellaneous Tests ---"); | 151 | $td->notify("--- Miscellaneous Tests ---"); |
| 152 | -$n_tests += 44; | 152 | +$n_tests += 45; |
| 153 | 153 | ||
| 154 | $td->runtest("qpdf version", | 154 | $td->runtest("qpdf version", |
| 155 | {$td->COMMAND => "qpdf --version"}, | 155 | {$td->COMMAND => "qpdf --version"}, |
| @@ -370,6 +370,10 @@ $td->runtest("detect foreign object in write", | @@ -370,6 +370,10 @@ $td->runtest("detect foreign object in write", | ||
| 370 | " copy-foreign-objects-in.pdf minimal.pdf"}, | 370 | " copy-foreign-objects-in.pdf minimal.pdf"}, |
| 371 | {$td->FILE => "foreign-in-write.out", $td->EXIT_STATUS => 0}, | 371 | {$td->FILE => "foreign-in-write.out", $td->EXIT_STATUS => 0}, |
| 372 | $td->NORMALIZE_NEWLINES); | 372 | $td->NORMALIZE_NEWLINES); |
| 373 | +$td->runtest("parse objects from string", | ||
| 374 | + {$td->COMMAND => "test_driver 31 minimal.pdf"}, # file not used | ||
| 375 | + {$td->FILE => "parse-object.out", $td->EXIT_STATUS => 0}, | ||
| 376 | + $td->NORMALIZE_NEWLINES); | ||
| 373 | 377 | ||
| 374 | show_ntests(); | 378 | show_ntests(); |
| 375 | # ---------- | 379 | # ---------- |
qpdf/qtest/qpdf/bad22.out
qpdf/qtest/qpdf/bad23.out
qpdf/qtest/qpdf/parse-object.out
0 → 100644
| 1 | +[ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ] | ||
| 2 | +logic error parsing indirect: QPDFObjectHandle::parse called without context on an object with indirect references | ||
| 3 | +trailing data: parsed object (trailing test): trailing data found parsing object from string | ||
| 4 | +test 31 done |
qpdf/test_driver.cc
| @@ -1054,6 +1054,38 @@ void runtest(int n, char const* filename1, char const* filename2) | @@ -1054,6 +1054,38 @@ void runtest(int n, char const* filename1, char const* filename2) | ||
| 1054 | << std::endl; | 1054 | << std::endl; |
| 1055 | } | 1055 | } |
| 1056 | } | 1056 | } |
| 1057 | + else if (n == 31) | ||
| 1058 | + { | ||
| 1059 | + // Test object parsing from a string. The input file is not used. | ||
| 1060 | + | ||
| 1061 | + QPDFObjectHandle o1 = | ||
| 1062 | + QPDFObjectHandle::parse( | ||
| 1063 | + "[/name 16059 3.14159 false\n" | ||
| 1064 | + " << /key true /other [ (string1) (string2) ] >> null]"); | ||
| 1065 | + std::cout << o1.unparse() << std::endl; | ||
| 1066 | + QPDFObjectHandle o2 = QPDFObjectHandle::parse(" 12345 \f "); | ||
| 1067 | + assert(o2.isInteger() && (o2.getIntValue() == 12345)); | ||
| 1068 | + try | ||
| 1069 | + { | ||
| 1070 | + QPDFObjectHandle::parse("[1 0 R]", "indirect test"); | ||
| 1071 | + std::cout << "oops -- didn't throw" << std::endl; | ||
| 1072 | + } | ||
| 1073 | + catch (std::logic_error e) | ||
| 1074 | + { | ||
| 1075 | + std::cout << "logic error parsing indirect: " << e.what() | ||
| 1076 | + << std::endl; | ||
| 1077 | + } | ||
| 1078 | + try | ||
| 1079 | + { | ||
| 1080 | + QPDFObjectHandle::parse("0 trailing", "trailing test"); | ||
| 1081 | + std::cout << "oops -- didn't throw" << std::endl; | ||
| 1082 | + } | ||
| 1083 | + catch (std::runtime_error e) | ||
| 1084 | + { | ||
| 1085 | + std::cout << "trailing data: " << e.what() | ||
| 1086 | + << std::endl; | ||
| 1087 | + } | ||
| 1088 | + } | ||
| 1057 | else | 1089 | else |
| 1058 | { | 1090 | { |
| 1059 | throw std::runtime_error(std::string("invalid test ") + | 1091 | throw std::runtime_error(std::string("invalid test ") + |