Commit 6bbea4baa0c06b39b1b71f1aa6fc276789296556

Authored by Jay Berkenbilt
1 parent f3e267fc

Implement QPDFObjectHandle::parse

Move object parsing code from QPDF to QPDFObjectHandle and
parameterize the parts of it that are specific to a QPDF object.
Provide a version that can't handle indirect objects and that can be
called on an arbitrary string.

A side effect of this change is that the offset used when reporting
invalid stream length has changed, but since the new value seems like
a better value than the old one, the test suite has been updated
rather than making the code backward compatible.  This only effects
the offset reported for invalid streams that lack /Length or have an
invalid /Length key.

Updated some test code and exmaples to use QPDFObjectHandle::parse.

Supporting changes include adding a BufferInputSource constructor that
takes a string.
ChangeLog
  1 +2012-07-21 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Add new method QPDFObjectHandle::replaceDict to replace a
  4 + stream's dictionary. Use with caution; see comments in
  5 + QPDFObjectHandle.hh.
  6 +
  7 + * Add new method QPDFObjectHandle::parse for creation of
  8 + QPDFObjectHandle objects from string representations of the
  9 + objects. Thanks to Tobias Hoffmann for the idea.
  10 +
1 2012-07-15 Jay Berkenbilt <ejb@ql.org> 11 2012-07-15 Jay Berkenbilt <ejb@ql.org>
2 12
3 * add new QPDF::isEncrypted method that returns some additional 13 * add new QPDF::isEncrypted method that returns some additional
@@ -20,16 +20,14 @@ Next @@ -20,16 +20,14 @@ Next
20 * Make sure that the release notes call attention to the one API 20 * Make sure that the release notes call attention to the one API
21 breaking change: removal of length from replaceStreamData. 21 breaking change: removal of length from replaceStreamData.
22 22
23 - * Add a way to create new QPDFObjectHandles with a string  
24 - representation of them, such as  
25 - QPDFObjectHandle::parse("<< /a 1 /b 2 >>");  
26 -  
27 * Document thread safety: One individual QPDF or QPDFWriter object 23 * Document thread safety: One individual QPDF or QPDFWriter object
28 can only be used by one thread at a time, but multiple threads can 24 can only be used by one thread at a time, but multiple threads can
29 simultaneously use separate objects. 25 simultaneously use separate objects.
30 26
31 * Write some documentation about the design of copyForeignObject. 27 * Write some documentation about the design of copyForeignObject.
32 28
  29 + * Mention QPDFObjectHandle::parse in the documentation.
  30 +
33 * copyForeignObject still to do: 31 * copyForeignObject still to do:
34 32
35 - qpdf command 33 - qpdf command
examples/pdf-create.cc
@@ -81,24 +81,28 @@ static void create_pdf(char const* filename) @@ -81,24 +81,28 @@ static void create_pdf(char const* filename)
81 // Add an indirect object to contain a font descriptor for the 81 // Add an indirect object to contain a font descriptor for the
82 // built-in Helvetica font. 82 // built-in Helvetica font.
83 QPDFObjectHandle font = pdf.makeIndirectObject( 83 QPDFObjectHandle font = pdf.makeIndirectObject(
84 - QPDFObjectHandle::newDictionary());  
85 - font.replaceKey("/Type", newName("/Font"));  
86 - font.replaceKey("/Subtype", newName("/Type1"));  
87 - font.replaceKey("/Name", newName("/F1"));  
88 - font.replaceKey("/BaseFont", newName("/Helvetica"));  
89 - font.replaceKey("/Encoding", newName("/WinAnsiEncoding")); 84 + QPDFObjectHandle::parse(
  85 + "<<"
  86 + " /Type /Font"
  87 + " /Subtype /Type1"
  88 + " /Name /F1"
  89 + " /BaseFont /Helvetica"
  90 + " /Encoding /WinAnsiEncoding"
  91 + ">>"));
90 92
91 // Create a stream to encode our image. We don't have to set the 93 // Create a stream to encode our image. We don't have to set the
92 // length or filters. QPDFWriter will fill in the length and 94 // length or filters. QPDFWriter will fill in the length and
93 // compress the stream data using FlateDecode by default. 95 // compress the stream data using FlateDecode by default.
94 QPDFObjectHandle image = QPDFObjectHandle::newStream(&pdf); 96 QPDFObjectHandle image = QPDFObjectHandle::newStream(&pdf);
95 - QPDFObjectHandle image_dict = image.getDict();  
96 - image_dict.replaceKey("/Type", newName("/XObject"));  
97 - image_dict.replaceKey("/Subtype", newName("/Image"));  
98 - image_dict.replaceKey("/ColorSpace", newName("/DeviceRGB"));  
99 - image_dict.replaceKey("/BitsPerComponent", newInteger(8));  
100 - image_dict.replaceKey("/Width", newInteger(100));  
101 - image_dict.replaceKey("/Height", newInteger(100)); 97 + image.replaceDict(QPDFObjectHandle::parse(
  98 + "<<"
  99 + " /Type /XObject"
  100 + " /Subtype /Image"
  101 + " /ColorSpace /DeviceRGB"
  102 + " /BitsPerComponent 8"
  103 + " /Width 100"
  104 + " /Height 100"
  105 + ">>"));
102 // Provide the stream data. 106 // Provide the stream data.
103 ImageProvider* p = new ImageProvider(100, 100); 107 ImageProvider* p = new ImageProvider(100, 100);
104 PointerHolder<QPDFObjectHandle::StreamDataProvider> provider(p); 108 PointerHolder<QPDFObjectHandle::StreamDataProvider> provider(p);
@@ -107,10 +111,8 @@ static void create_pdf(char const* filename) @@ -107,10 +111,8 @@ static void create_pdf(char const* filename)
107 QPDFObjectHandle::newNull()); 111 QPDFObjectHandle::newNull());
108 112
109 // Create direct objects as needed by the page dictionary. 113 // Create direct objects as needed by the page dictionary.
110 - QPDFObjectHandle procset = QPDFObjectHandle::newArray();  
111 - procset.appendItem(newName("/PDF"));  
112 - procset.appendItem(newName("/Text"));  
113 - procset.appendItem(newName("/ImageC")); 114 + QPDFObjectHandle procset = QPDFObjectHandle::parse(
  115 + "[/PDF /Text /ImageC]");
114 116
115 QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary(); 117 QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary();
116 rfont.replaceKey("/F1", font); 118 rfont.replaceKey("/F1", font);
include/qpdf/BufferInputSource.hh
@@ -9,6 +9,8 @@ class BufferInputSource: public InputSource @@ -9,6 +9,8 @@ class BufferInputSource: public InputSource
9 public: 9 public:
10 BufferInputSource(std::string const& description, Buffer* buf, 10 BufferInputSource(std::string const& description, Buffer* buf,
11 bool own_memory = false); 11 bool own_memory = false);
  12 + BufferInputSource(std::string const& description,
  13 + std::string const& contents);
12 virtual ~BufferInputSource(); 14 virtual ~BufferInputSource();
13 virtual qpdf_offset_t findAndSkipNextEOL(); 15 virtual qpdf_offset_t findAndSkipNextEOL();
14 virtual std::string const& getName() const; 16 virtual std::string const& getName() const;
include/qpdf/QPDF.hh
@@ -531,6 +531,23 @@ class QPDF @@ -531,6 +531,23 @@ class QPDF
531 std::map<ObjGen, QPDFObjectHandle> foreign_streams; 531 std::map<ObjGen, QPDFObjectHandle> foreign_streams;
532 }; 532 };
533 533
  534 + class StringDecrypter: public QPDFObjectHandle::StringDecrypter
  535 + {
  536 + friend class QPDF;
  537 +
  538 + public:
  539 + StringDecrypter(QPDF* qpdf, int objid, int gen);
  540 + virtual ~StringDecrypter()
  541 + {
  542 + }
  543 + virtual void decryptString(std::string& val);
  544 +
  545 + private:
  546 + QPDF* qpdf;
  547 + int objid;
  548 + int gen;
  549 + };
  550 +
534 void parse(char const* password); 551 void parse(char const* password);
535 void warn(QPDFExc const& e); 552 void warn(QPDFExc const& e);
536 void setTrailer(QPDFObjectHandle obj); 553 void setTrailer(QPDFObjectHandle obj);
@@ -547,10 +564,6 @@ class QPDF @@ -547,10 +564,6 @@ class QPDF
547 QPDFObjectHandle readObject( 564 QPDFObjectHandle readObject(
548 PointerHolder<InputSource>, std::string const& description, 565 PointerHolder<InputSource>, std::string const& description,
549 int objid, int generation, bool in_object_stream); 566 int objid, int generation, bool in_object_stream);
550 - QPDFObjectHandle readObjectInternal(  
551 - PointerHolder<InputSource> input, int objid, int generation,  
552 - bool in_object_stream,  
553 - bool in_array, bool in_dictionary);  
554 size_t recoverStreamLength( 567 size_t recoverStreamLength(
555 PointerHolder<InputSource> input, int objid, int generation, 568 PointerHolder<InputSource> input, int objid, int generation,
556 qpdf_offset_t stream_offset); 569 qpdf_offset_t stream_offset);
include/qpdf/QPDFObjectHandle.hh
@@ -18,6 +18,7 @@ @@ -18,6 +18,7 @@
18 18
19 #include <qpdf/PointerHolder.hh> 19 #include <qpdf/PointerHolder.hh>
20 #include <qpdf/Buffer.hh> 20 #include <qpdf/Buffer.hh>
  21 +#include <qpdf/InputSource.hh>
21 22
22 #include <qpdf/QPDFObject.hh> 23 #include <qpdf/QPDFObject.hh>
23 24
@@ -25,6 +26,7 @@ class Pipeline; @@ -25,6 +26,7 @@ class Pipeline;
25 class QPDF; 26 class QPDF;
26 class QPDF_Dictionary; 27 class QPDF_Dictionary;
27 class QPDF_Array; 28 class QPDF_Array;
  29 +class QPDFTokenizer;
28 30
29 class QPDFObjectHandle 31 class QPDFObjectHandle
30 { 32 {
@@ -57,6 +59,18 @@ class QPDFObjectHandle @@ -57,6 +59,18 @@ class QPDFObjectHandle
57 Pipeline* pipeline) = 0; 59 Pipeline* pipeline) = 0;
58 }; 60 };
59 61
  62 + // This class is used by parse to decrypt strings when reading an
  63 + // object that contains encrypted strings.
  64 + class StringDecrypter
  65 + {
  66 + public:
  67 + QPDF_DLL
  68 + virtual ~StringDecrypter()
  69 + {
  70 + }
  71 + virtual void decryptString(std::string& val) = 0;
  72 + };
  73 +
60 QPDF_DLL 74 QPDF_DLL
61 QPDFObjectHandle(); 75 QPDFObjectHandle();
62 QPDF_DLL 76 QPDF_DLL
@@ -95,6 +109,30 @@ class QPDFObjectHandle @@ -95,6 +109,30 @@ class QPDFObjectHandle
95 109
96 // Public factory methods 110 // Public factory methods
97 111
  112 + // Construct an object of any type from a string representation of
  113 + // the object. Throws QPDFExc with an empty filename and an
  114 + // offset into the string if there is an error. Any indirect
  115 + // object syntax (obj gen R) will cause a logic_error exception to
  116 + // be thrown. If object_description is provided, it will appear
  117 + // in the message of any QPDFExc exception thrown for invalid
  118 + // syntax.
  119 + QPDF_DLL
  120 + static QPDFObjectHandle parse(std::string const& object_str,
  121 + std::string const& object_description = "");
  122 +
  123 + // Construct an object as above by reading from the given
  124 + // InputSource at its current position and using the tokenizer you
  125 + // supply. Indirect objects and encrypted strings are permitted.
  126 + // This method is intended to be called by QPDF for parsing
  127 + // objects that are ready from the object's input stream.
  128 + QPDF_DLL
  129 + static QPDFObjectHandle parse(PointerHolder<InputSource> input,
  130 + std::string const& object_description,
  131 + QPDFTokenizer&, bool& empty,
  132 + StringDecrypter* decrypter,
  133 + QPDF* context);
  134 +
  135 + // Type-specific factories
98 QPDF_DLL 136 QPDF_DLL
99 static QPDFObjectHandle newNull(); 137 static QPDFObjectHandle newNull();
100 QPDF_DLL 138 QPDF_DLL
@@ -124,7 +162,8 @@ class QPDFObjectHandle @@ -124,7 +162,8 @@ class QPDFObjectHandle
124 // object. A subsequent call must be made to replaceStreamData() 162 // object. A subsequent call must be made to replaceStreamData()
125 // to provide data for the stream. The stream's dictionary may be 163 // to provide data for the stream. The stream's dictionary may be
126 // retrieved by calling getDict(), and the resulting dictionary 164 // retrieved by calling getDict(), and the resulting dictionary
127 - // may be modified. 165 + // may be modified. Alternatively, you can create a new
  166 + // dictionary and call replaceDict to install it.
128 QPDF_DLL 167 QPDF_DLL
129 static QPDFObjectHandle newStream(QPDF* qpdf); 168 static QPDFObjectHandle newStream(QPDF* qpdf);
130 169
@@ -303,6 +342,15 @@ class QPDFObjectHandle @@ -303,6 +342,15 @@ class QPDFObjectHandle
303 bool pipeStreamData(Pipeline*, bool filter, 342 bool pipeStreamData(Pipeline*, bool filter,
304 bool normalize, bool compress); 343 bool normalize, bool compress);
305 344
  345 + // Replace a stream's dictionary. The new dictionary must be
  346 + // consistent with the stream's data. This is most appropriately
  347 + // used when creating streams from scratch that will use a stream
  348 + // data provider and therefore start with an empty dictionary. It
  349 + // may be more convenient in this case than calling getDict and
  350 + // modifying it for each key. The pdf-create example does this.
  351 + QPDF_DLL
  352 + void replaceDict(QPDFObjectHandle);
  353 +
306 // Replace this stream's stream data with the given data buffer, 354 // Replace this stream's stream data with the given data buffer,
307 // and replace the /Filter and /DecodeParms keys in the stream 355 // and replace the /Filter and /DecodeParms keys in the stream
308 // dictionary with the given values. (If either value is empty, 356 // dictionary with the given values. (If either value is empty,
@@ -489,6 +537,12 @@ class QPDFObjectHandle @@ -489,6 +537,12 @@ class QPDFObjectHandle
489 void dereference(); 537 void dereference();
490 void makeDirectInternal(std::set<int>& visited); 538 void makeDirectInternal(std::set<int>& visited);
491 void releaseResolved(); 539 void releaseResolved();
  540 + static QPDFObjectHandle parseInternal(
  541 + PointerHolder<InputSource> input,
  542 + std::string const& object_description,
  543 + QPDFTokenizer& tokenizer, bool& empty,
  544 + StringDecrypter* decrypter, QPDF* context,
  545 + bool in_array, bool in_dictionary);
492 546
493 bool initialized; 547 bool initialized;
494 548
libqpdf/BufferInputSource.cc
@@ -11,6 +11,18 @@ BufferInputSource::BufferInputSource(std::string const&amp; description, @@ -11,6 +11,18 @@ BufferInputSource::BufferInputSource(std::string const&amp; description,
11 { 11 {
12 } 12 }
13 13
  14 +BufferInputSource::BufferInputSource(std::string const& description,
  15 + std::string const& contents) :
  16 + own_memory(true),
  17 + description(description),
  18 + buf(0),
  19 + cur_offset(0)
  20 +{
  21 + this->buf = new Buffer(contents.length());
  22 + unsigned char* bp = buf->getBuffer();
  23 + memcpy(bp, (char*)contents.c_str(), contents.length());
  24 +}
  25 +
14 BufferInputSource::~BufferInputSource() 26 BufferInputSource::~BufferInputSource()
15 { 27 {
16 if (own_memory) 28 if (own_memory)
libqpdf/QPDF.cc
@@ -68,6 +68,18 @@ QPDF::CopiedStreamDataProvider::registerForeignStream( @@ -68,6 +68,18 @@ QPDF::CopiedStreamDataProvider::registerForeignStream(
68 this->foreign_streams[local_og] = foreign_stream; 68 this->foreign_streams[local_og] = foreign_stream;
69 } 69 }
70 70
  71 +QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, int objid, int gen) :
  72 + qpdf(qpdf),
  73 + objid(objid),
  74 + gen(gen)
  75 +{
  76 +}
  77 +
  78 +void
  79 +QPDF::StringDecrypter::decryptString(std::string& val)
  80 +{
  81 + qpdf->decryptString(val, objid, gen);
  82 +}
71 83
72 std::string const& 84 std::string const&
73 QPDF::QPDFVersion() 85 QPDF::QPDFVersion()
@@ -940,358 +952,164 @@ QPDF::readObject(PointerHolder&lt;InputSource&gt; input, @@ -940,358 +952,164 @@ QPDF::readObject(PointerHolder&lt;InputSource&gt; input,
940 { 952 {
941 setLastObjectDescription(description, objid, generation); 953 setLastObjectDescription(description, objid, generation);
942 qpdf_offset_t offset = input->tell(); 954 qpdf_offset_t offset = input->tell();
943 - QPDFObjectHandle object = readObjectInternal(  
944 - input, objid, generation, in_object_stream, false, false);  
945 - // Override last_offset so that it points to the beginning of the  
946 - // object we just read  
947 - input->setLastOffset(offset);  
948 - return object;  
949 -}  
950 -  
951 -QPDFObjectHandle  
952 -QPDF::readObjectInternal(PointerHolder<InputSource> input,  
953 - int objid, int generation,  
954 - bool in_object_stream,  
955 - bool in_array, bool in_dictionary)  
956 -{  
957 - if (in_dictionary && in_array)  
958 - {  
959 - // Although dictionaries and arrays arbitrarily nest, these  
960 - // variables indicate what is at the top of the stack right  
961 - // now, so they can, by definition, never both be true.  
962 - throw std::logic_error(  
963 - "INTERNAL ERROR: readObjectInternal: in_dict && in_array");  
964 - }  
965 -  
966 - QPDFObjectHandle object;  
967 955
968 - qpdf_offset_t offset = input->tell();  
969 - std::vector<QPDFObjectHandle> olist;  
970 - bool done = false;  
971 - while (! done) 956 + bool empty = false;
  957 + PointerHolder<StringDecrypter> decrypter_ph;
  958 + StringDecrypter* decrypter = 0;
  959 + if (this->encrypted && (! in_object_stream))
972 { 960 {
973 - object = QPDFObjectHandle();  
974 -  
975 - QPDFTokenizer::Token token = readToken(input);  
976 -  
977 - switch (token.getType())  
978 - {  
979 - case QPDFTokenizer::tt_brace_open:  
980 - case QPDFTokenizer::tt_brace_close:  
981 - // Don't know what to do with these for now  
982 - QTC::TC("qpdf", "QPDF bad brace");  
983 - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),  
984 - this->last_object_description,  
985 - input->getLastOffset(),  
986 - "unexpected brace token");  
987 - break;  
988 -  
989 - case QPDFTokenizer::tt_array_close:  
990 - if (in_array)  
991 - {  
992 - done = true;  
993 - }  
994 - else  
995 - {  
996 - QTC::TC("qpdf", "QPDF bad array close");  
997 - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),  
998 - this->last_object_description,  
999 - input->getLastOffset(),  
1000 - "unexpected array close token");  
1001 - }  
1002 - break;  
1003 -  
1004 - case QPDFTokenizer::tt_dict_close:  
1005 - if (in_dictionary)  
1006 - {  
1007 - done = true;  
1008 - }  
1009 - else  
1010 - {  
1011 - QTC::TC("qpdf", "QPDF bad dictionary close");  
1012 - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),  
1013 - this->last_object_description,  
1014 - input->getLastOffset(),  
1015 - "unexpected dictionary close token");  
1016 - }  
1017 - break;  
1018 -  
1019 - case QPDFTokenizer::tt_array_open:  
1020 - object = readObjectInternal(  
1021 - input, objid, generation, in_object_stream, true, false);  
1022 - break;  
1023 -  
1024 - case QPDFTokenizer::tt_dict_open:  
1025 - object = readObjectInternal(  
1026 - input, objid, generation, in_object_stream, false, true);  
1027 - break;  
1028 -  
1029 - case QPDFTokenizer::tt_bool:  
1030 - object = QPDFObjectHandle::newBool(  
1031 - (token.getValue() == "true"));  
1032 - break;  
1033 -  
1034 - case QPDFTokenizer::tt_null:  
1035 - object = QPDFObjectHandle::newNull();  
1036 - break;  
1037 -  
1038 - case QPDFTokenizer::tt_integer:  
1039 - object = QPDFObjectHandle::newInteger(  
1040 - QUtil::string_to_ll(token.getValue().c_str()));  
1041 - break;  
1042 -  
1043 - case QPDFTokenizer::tt_real:  
1044 - object = QPDFObjectHandle::newReal(token.getValue());  
1045 - break;  
1046 -  
1047 - case QPDFTokenizer::tt_name:  
1048 - object = QPDFObjectHandle::newName(token.getValue());  
1049 - break;  
1050 -  
1051 - case QPDFTokenizer::tt_word:  
1052 - {  
1053 - std::string const& value = token.getValue();  
1054 - if ((value == "R") && (in_array || in_dictionary) &&  
1055 - (olist.size() >= 2) &&  
1056 - (olist[olist.size() - 1].isInteger()) &&  
1057 - (olist[olist.size() - 2].isInteger()))  
1058 - {  
1059 - // Try to resolve indirect objects  
1060 - object = QPDFObjectHandle::Factory::newIndirect(  
1061 - this,  
1062 - olist[olist.size() - 2].getIntValue(),  
1063 - olist[olist.size() - 1].getIntValue());  
1064 - olist.pop_back();  
1065 - olist.pop_back();  
1066 - }  
1067 - else if ((value == "endobj") &&  
1068 - (! (in_array || in_dictionary)))  
1069 - {  
1070 - // Nothing in the PDF spec appears to allow empty  
1071 - // objects, but they have been encountered in  
1072 - // actual PDF files and Adobe Reader appears to  
1073 - // ignore them.  
1074 - warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),  
1075 - this->last_object_description,  
1076 - input->getLastOffset(),  
1077 - "empty object treated as null"));  
1078 - object = QPDFObjectHandle::newNull();  
1079 - input->seek(input->getLastOffset(), SEEK_SET);  
1080 - }  
1081 - else  
1082 - {  
1083 - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),  
1084 - this->last_object_description,  
1085 - input->getLastOffset(),  
1086 - "unknown token while reading object (" +  
1087 - value + ")");  
1088 - }  
1089 - }  
1090 - break;  
1091 -  
1092 - case QPDFTokenizer::tt_string:  
1093 - {  
1094 - std::string val = token.getValue();  
1095 - if (this->encrypted && (! in_object_stream))  
1096 - {  
1097 - decryptString(val, objid, generation);  
1098 - }  
1099 - object = QPDFObjectHandle::newString(val);  
1100 - }  
1101 - break;  
1102 -  
1103 - default:  
1104 - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),  
1105 - this->last_object_description,  
1106 - input->getLastOffset(),  
1107 - "unknown token type while reading object");  
1108 - break;  
1109 - }  
1110 -  
1111 - if (in_dictionary || in_array)  
1112 - {  
1113 - if (! done)  
1114 - {  
1115 - olist.push_back(object);  
1116 - }  
1117 - }  
1118 - else if (! object.isInitialized())  
1119 - {  
1120 - throw std::logic_error(  
1121 - "INTERNAL ERROR: uninitialized object (token = " +  
1122 - QUtil::int_to_string(token.getType()) +  
1123 - ", " + token.getValue() + ")");  
1124 - }  
1125 - else  
1126 - {  
1127 - done = true;  
1128 - } 961 + decrypter_ph = new StringDecrypter(this, objid, generation);
  962 + decrypter = decrypter_ph.getPointer();
1129 } 963 }
1130 -  
1131 - if (in_array) 964 + QPDFObjectHandle object = QPDFObjectHandle::parse(
  965 + input, description, this->tokenizer, empty, decrypter, this);
  966 + if (empty)
1132 { 967 {
1133 - object = QPDFObjectHandle::newArray(olist); 968 + // Nothing in the PDF spec appears to allow empty objects, but
  969 + // they have been encountered in actual PDF files and Adobe
  970 + // Reader appears to ignore them.
  971 + warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
  972 + this->last_object_description,
  973 + input->getLastOffset(),
  974 + "empty object treated as null"));
1134 } 975 }
1135 - else if (in_dictionary) 976 + else if (object.isDictionary() && (! in_object_stream))
1136 { 977 {
1137 - // Convert list to map. Alternating elements are keys.  
1138 - std::map<std::string, QPDFObjectHandle> dict;  
1139 - if (olist.size() % 2)  
1140 - {  
1141 - QTC::TC("qpdf", "QPDF dictionary odd number of elements");  
1142 - throw QPDFExc(  
1143 - qpdf_e_damaged_pdf, input->getName(),  
1144 - this->last_object_description, input->getLastOffset(),  
1145 - "dictionary ending here has an odd number of elements");  
1146 - }  
1147 - for (unsigned int i = 0; i < olist.size(); i += 2)  
1148 - {  
1149 - QPDFObjectHandle key_obj = olist[i];  
1150 - QPDFObjectHandle val = olist[i + 1];  
1151 - if (! key_obj.isName())  
1152 - {  
1153 - throw QPDFExc(  
1154 - qpdf_e_damaged_pdf,  
1155 - input->getName(), this->last_object_description, offset,  
1156 - std::string("dictionary key not name (") +  
1157 - key_obj.unparse() + ")");  
1158 - }  
1159 - dict[key_obj.getName()] = val;  
1160 - }  
1161 - object = QPDFObjectHandle::newDictionary(dict); 978 + // check for stream
  979 + qpdf_offset_t cur_offset = input->tell();
  980 + if (readToken(input) ==
  981 + QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream"))
  982 + {
  983 + // The PDF specification states that the word "stream"
  984 + // should be followed by either a carriage return and
  985 + // a newline or by a newline alone. It specifically
  986 + // disallowed following it by a carriage return alone
  987 + // since, in that case, there would be no way to tell
  988 + // whether the NL in a CR NL sequence was part of the
  989 + // stream data. However, some readers, including
  990 + // Adobe reader, accept a carriage return by itself
  991 + // when followed by a non-newline character, so that's
  992 + // what we do here.
  993 + {
  994 + char ch;
  995 + if (input->read(&ch, 1) == 0)
  996 + {
  997 + // A premature EOF here will result in some
  998 + // other problem that will get reported at
  999 + // another time.
  1000 + }
  1001 + else if (ch == '\n')
  1002 + {
  1003 + // ready to read stream data
  1004 + QTC::TC("qpdf", "QPDF stream with NL only");
  1005 + }
  1006 + else if (ch == '\r')
  1007 + {
  1008 + // Read another character
  1009 + if (input->read(&ch, 1) != 0)
  1010 + {
  1011 + if (ch == '\n')
  1012 + {
  1013 + // Ready to read stream data
  1014 + QTC::TC("qpdf", "QPDF stream with CRNL");
  1015 + }
  1016 + else
  1017 + {
  1018 + // Treat the \r by itself as the
  1019 + // whitespace after endstream and
  1020 + // start reading stream data in spite
  1021 + // of not having seen a newline.
  1022 + QTC::TC("qpdf", "QPDF stream with CR only");
  1023 + input->unreadCh(ch);
  1024 + warn(QPDFExc(
  1025 + qpdf_e_damaged_pdf,
  1026 + input->getName(),
  1027 + this->last_object_description,
  1028 + input->tell(),
  1029 + "stream keyword followed"
  1030 + " by carriage return only"));
  1031 + }
  1032 + }
  1033 + }
  1034 + else
  1035 + {
  1036 + QTC::TC("qpdf", "QPDF stream without newline");
  1037 + warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
  1038 + this->last_object_description,
  1039 + input->tell(),
  1040 + "stream keyword not followed"
  1041 + " by proper line terminator"));
  1042 + }
  1043 + }
1162 1044
1163 - if (! in_object_stream)  
1164 - {  
1165 - // check for stream  
1166 - qpdf_offset_t cur_offset = input->tell();  
1167 - if (readToken(input) ==  
1168 - QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream"))  
1169 - {  
1170 - // The PDF specification states that the word "stream"  
1171 - // should be followed by either a carriage return and  
1172 - // a newline or by a newline alone. It specifically  
1173 - // disallowed following it by a carriage return alone  
1174 - // since, in that case, there would be no way to tell  
1175 - // whether the NL in a CR NL sequence was part of the  
1176 - // stream data. However, some readers, including  
1177 - // Adobe reader, accept a carriage return by itself  
1178 - // when followed by a non-newline character, so that's  
1179 - // what we do here.  
1180 - {  
1181 - char ch;  
1182 - if (input->read(&ch, 1) == 0)  
1183 - {  
1184 - // A premature EOF here will result in some  
1185 - // other problem that will get reported at  
1186 - // another time.  
1187 - }  
1188 - else if (ch == '\n')  
1189 - {  
1190 - // ready to read stream data  
1191 - QTC::TC("qpdf", "QPDF stream with NL only");  
1192 - }  
1193 - else if (ch == '\r')  
1194 - {  
1195 - // Read another character  
1196 - if (input->read(&ch, 1) != 0)  
1197 - {  
1198 - if (ch == '\n')  
1199 - {  
1200 - // Ready to read stream data  
1201 - QTC::TC("qpdf", "QPDF stream with CRNL");  
1202 - }  
1203 - else  
1204 - {  
1205 - // Treat the \r by itself as the  
1206 - // whitespace after endstream and  
1207 - // start reading stream data in spite  
1208 - // of not having seen a newline.  
1209 - QTC::TC("qpdf", "QPDF stream with CR only");  
1210 - input->unreadCh(ch);  
1211 - warn(QPDFExc(  
1212 - qpdf_e_damaged_pdf,  
1213 - input->getName(),  
1214 - this->last_object_description,  
1215 - input->tell(),  
1216 - "stream keyword followed"  
1217 - " by carriage return only"));  
1218 - }  
1219 - }  
1220 - }  
1221 - else  
1222 - {  
1223 - QTC::TC("qpdf", "QPDF stream without newline");  
1224 - warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),  
1225 - this->last_object_description,  
1226 - input->tell(),  
1227 - "stream keyword not followed"  
1228 - " by proper line terminator"));  
1229 - }  
1230 - } 1045 + // Must get offset before accessing any additional
  1046 + // objects since resolving a previously unresolved
  1047 + // indirect object will change file position.
  1048 + qpdf_offset_t stream_offset = input->tell();
  1049 + size_t length = 0;
1231 1050
1232 - // Must get offset before accessing any additional  
1233 - // objects since resolving a previously unresolved  
1234 - // indirect object will change file position.  
1235 - qpdf_offset_t stream_offset = input->tell();  
1236 - size_t length = 0; 1051 + try
  1052 + {
  1053 + std::map<std::string, QPDFObjectHandle> dict =
  1054 + object.getDictAsMap();
1237 1055
1238 - try  
1239 - {  
1240 - if (dict.count("/Length") == 0)  
1241 - {  
1242 - QTC::TC("qpdf", "QPDF stream without length");  
1243 - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),  
1244 - this->last_object_description, offset,  
1245 - "stream dictionary lacks /Length key");  
1246 - } 1056 + if (dict.count("/Length") == 0)
  1057 + {
  1058 + QTC::TC("qpdf", "QPDF stream without length");
  1059 + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
  1060 + this->last_object_description, offset,
  1061 + "stream dictionary lacks /Length key");
  1062 + }
1247 1063
1248 - QPDFObjectHandle length_obj = dict["/Length"];  
1249 - if (! length_obj.isInteger())  
1250 - {  
1251 - QTC::TC("qpdf", "QPDF stream length not integer");  
1252 - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),  
1253 - this->last_object_description, offset,  
1254 - "/Length key in stream dictionary is not "  
1255 - "an integer");  
1256 - } 1064 + QPDFObjectHandle length_obj = dict["/Length"];
  1065 + if (! length_obj.isInteger())
  1066 + {
  1067 + QTC::TC("qpdf", "QPDF stream length not integer");
  1068 + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
  1069 + this->last_object_description, offset,
  1070 + "/Length key in stream dictionary is not "
  1071 + "an integer");
  1072 + }
1257 1073
1258 - length = length_obj.getIntValue();  
1259 - input->seek(  
1260 - stream_offset + (qpdf_offset_t)length, SEEK_SET);  
1261 - if (! (readToken(input) ==  
1262 - QPDFTokenizer::Token(  
1263 - QPDFTokenizer::tt_word, "endstream")))  
1264 - {  
1265 - QTC::TC("qpdf", "QPDF missing endstream");  
1266 - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),  
1267 - this->last_object_description,  
1268 - input->getLastOffset(),  
1269 - "expected endstream");  
1270 - }  
1271 - }  
1272 - catch (QPDFExc& e)  
1273 - {  
1274 - if (this->attempt_recovery)  
1275 - {  
1276 - // may throw an exception  
1277 - length = recoverStreamLength(  
1278 - input, objid, generation, stream_offset);  
1279 - }  
1280 - else  
1281 - {  
1282 - throw e;  
1283 - }  
1284 - }  
1285 - object = QPDFObjectHandle::Factory::newStream(  
1286 - this, objid, generation, object, stream_offset, length);  
1287 - }  
1288 - else  
1289 - {  
1290 - input->seek(cur_offset, SEEK_SET);  
1291 - }  
1292 - } 1074 + length = length_obj.getIntValue();
  1075 + input->seek(
  1076 + stream_offset + (qpdf_offset_t)length, SEEK_SET);
  1077 + if (! (readToken(input) ==
  1078 + QPDFTokenizer::Token(
  1079 + QPDFTokenizer::tt_word, "endstream")))
  1080 + {
  1081 + QTC::TC("qpdf", "QPDF missing endstream");
  1082 + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
  1083 + this->last_object_description,
  1084 + input->getLastOffset(),
  1085 + "expected endstream");
  1086 + }
  1087 + }
  1088 + catch (QPDFExc& e)
  1089 + {
  1090 + if (this->attempt_recovery)
  1091 + {
  1092 + // may throw an exception
  1093 + length = recoverStreamLength(
  1094 + input, objid, generation, stream_offset);
  1095 + }
  1096 + else
  1097 + {
  1098 + throw e;
  1099 + }
  1100 + }
  1101 + object = QPDFObjectHandle::Factory::newStream(
  1102 + this, objid, generation, object, stream_offset, length);
  1103 + }
  1104 + else
  1105 + {
  1106 + input->seek(cur_offset, SEEK_SET);
  1107 + }
1293 } 1108 }
1294 1109
  1110 + // Override last_offset so that it points to the beginning of the
  1111 + // object we just read
  1112 + input->setLastOffset(offset);
1295 return object; 1113 return object;
1296 } 1114 }
1297 1115
libqpdf/QPDFObjectHandle.cc
@@ -11,12 +11,15 @@ @@ -11,12 +11,15 @@
11 #include <qpdf/QPDF_Dictionary.hh> 11 #include <qpdf/QPDF_Dictionary.hh>
12 #include <qpdf/QPDF_Stream.hh> 12 #include <qpdf/QPDF_Stream.hh>
13 #include <qpdf/QPDF_Reserved.hh> 13 #include <qpdf/QPDF_Reserved.hh>
  14 +#include <qpdf/BufferInputSource.hh>
  15 +#include <qpdf/QPDFExc.hh>
14 16
15 #include <qpdf/QTC.hh> 17 #include <qpdf/QTC.hh>
16 #include <qpdf/QUtil.hh> 18 #include <qpdf/QUtil.hh>
17 19
18 #include <stdexcept> 20 #include <stdexcept>
19 #include <stdlib.h> 21 #include <stdlib.h>
  22 +#include <ctype.h>
20 23
21 QPDFObjectHandle::QPDFObjectHandle() : 24 QPDFObjectHandle::QPDFObjectHandle() :
22 initialized(false), 25 initialized(false),
@@ -398,6 +401,13 @@ QPDFObjectHandle::getDict() @@ -398,6 +401,13 @@ QPDFObjectHandle::getDict()
398 return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getDict(); 401 return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getDict();
399 } 402 }
400 403
  404 +void
  405 +QPDFObjectHandle::replaceDict(QPDFObjectHandle new_dict)
  406 +{
  407 + assertStream();
  408 + dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceDict(new_dict);
  409 +}
  410 +
401 PointerHolder<Buffer> 411 PointerHolder<Buffer>
402 QPDFObjectHandle::getStreamData() 412 QPDFObjectHandle::getStreamData()
403 { 413 {
@@ -599,6 +609,265 @@ QPDFObjectHandle::unparseResolved() @@ -599,6 +609,265 @@ QPDFObjectHandle::unparseResolved()
599 } 609 }
600 610
601 QPDFObjectHandle 611 QPDFObjectHandle
  612 +QPDFObjectHandle::parse(std::string const& object_str,
  613 + std::string const& object_description)
  614 +{
  615 + PointerHolder<InputSource> input =
  616 + new BufferInputSource("parsed object", object_str);
  617 + QPDFTokenizer tokenizer;
  618 + bool empty = false;
  619 + QPDFObjectHandle result =
  620 + parse(input, object_description, tokenizer, empty, 0, 0);
  621 + size_t offset = (size_t) input->tell();
  622 + while (offset < object_str.length())
  623 + {
  624 + if (! isspace(object_str[offset]))
  625 + {
  626 + QTC::TC("qpdf", "QPDFObjectHandle trailing data in parse");
  627 + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
  628 + object_description,
  629 + input->getLastOffset(),
  630 + "trailing data found parsing object from string");
  631 + }
  632 + ++offset;
  633 + }
  634 + return result;
  635 +}
  636 +
  637 +QPDFObjectHandle
  638 +QPDFObjectHandle::parse(PointerHolder<InputSource> input,
  639 + std::string const& object_description,
  640 + QPDFTokenizer& tokenizer, bool& empty,
  641 + StringDecrypter* decrypter, QPDF* context)
  642 +{
  643 + return parseInternal(input, object_description, tokenizer, empty,
  644 + decrypter, context, false, false);
  645 +}
  646 +
  647 +QPDFObjectHandle
  648 +QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
  649 + std::string const& object_description,
  650 + QPDFTokenizer& tokenizer, bool& empty,
  651 + StringDecrypter* decrypter, QPDF* context,
  652 + bool in_array, bool in_dictionary)
  653 +{
  654 + empty = false;
  655 + if (in_dictionary && in_array)
  656 + {
  657 + // Although dictionaries and arrays arbitrarily nest, these
  658 + // variables indicate what is at the top of the stack right
  659 + // now, so they can, by definition, never both be true.
  660 + throw std::logic_error(
  661 + "INTERNAL ERROR: parseInternal: in_dict && in_array");
  662 + }
  663 +
  664 + QPDFObjectHandle object;
  665 +
  666 + qpdf_offset_t offset = input->tell();
  667 + std::vector<QPDFObjectHandle> olist;
  668 + bool done = false;
  669 + while (! done)
  670 + {
  671 + object = QPDFObjectHandle();
  672 +
  673 + QPDFTokenizer::Token token =
  674 + tokenizer.readToken(input, object_description);
  675 +
  676 + switch (token.getType())
  677 + {
  678 + case QPDFTokenizer::tt_brace_open:
  679 + case QPDFTokenizer::tt_brace_close:
  680 + // Don't know what to do with these for now
  681 + QTC::TC("qpdf", "QPDFObjectHandle bad brace");
  682 + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
  683 + object_description,
  684 + input->getLastOffset(),
  685 + "unexpected brace token");
  686 + break;
  687 +
  688 + case QPDFTokenizer::tt_array_close:
  689 + if (in_array)
  690 + {
  691 + done = true;
  692 + }
  693 + else
  694 + {
  695 + QTC::TC("qpdf", "QPDFObjectHandle bad array close");
  696 + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
  697 + object_description,
  698 + input->getLastOffset(),
  699 + "unexpected array close token");
  700 + }
  701 + break;
  702 +
  703 + case QPDFTokenizer::tt_dict_close:
  704 + if (in_dictionary)
  705 + {
  706 + done = true;
  707 + }
  708 + else
  709 + {
  710 + QTC::TC("qpdf", "QPDFObjectHandle bad dictionary close");
  711 + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
  712 + object_description,
  713 + input->getLastOffset(),
  714 + "unexpected dictionary close token");
  715 + }
  716 + break;
  717 +
  718 + case QPDFTokenizer::tt_array_open:
  719 + object = parseInternal(
  720 + input, object_description, tokenizer, empty,
  721 + decrypter, context, true, false);
  722 + break;
  723 +
  724 + case QPDFTokenizer::tt_dict_open:
  725 + object = parseInternal(
  726 + input, object_description, tokenizer, empty,
  727 + decrypter, context, false, true);
  728 + break;
  729 +
  730 + case QPDFTokenizer::tt_bool:
  731 + object = newBool((token.getValue() == "true"));
  732 + break;
  733 +
  734 + case QPDFTokenizer::tt_null:
  735 + object = newNull();
  736 + break;
  737 +
  738 + case QPDFTokenizer::tt_integer:
  739 + object = newInteger(QUtil::string_to_ll(token.getValue().c_str()));
  740 + break;
  741 +
  742 + case QPDFTokenizer::tt_real:
  743 + object = newReal(token.getValue());
  744 + break;
  745 +
  746 + case QPDFTokenizer::tt_name:
  747 + object = newName(token.getValue());
  748 + break;
  749 +
  750 + case QPDFTokenizer::tt_word:
  751 + {
  752 + std::string const& value = token.getValue();
  753 + if ((value == "R") && (in_array || in_dictionary) &&
  754 + (olist.size() >= 2) &&
  755 + (olist[olist.size() - 1].isInteger()) &&
  756 + (olist[olist.size() - 2].isInteger()))
  757 + {
  758 + if (context == 0)
  759 + {
  760 + QTC::TC("qpdf", "QPDFObjectHandle indirect without context");
  761 + throw std::logic_error(
  762 + "QPDFObjectHandle::parse called without context"
  763 + " on an object with indirect references");
  764 + }
  765 + // Try to resolve indirect objects
  766 + object = newIndirect(
  767 + context,
  768 + olist[olist.size() - 2].getIntValue(),
  769 + olist[olist.size() - 1].getIntValue());
  770 + olist.pop_back();
  771 + olist.pop_back();
  772 + }
  773 + else if ((value == "endobj") &&
  774 + (! (in_array || in_dictionary)))
  775 + {
  776 + // We just saw endobj without having read
  777 + // anything. Treat this as a null and do not move
  778 + // the input source's offset.
  779 + object = newNull();
  780 + input->seek(input->getLastOffset(), SEEK_SET);
  781 + empty = true;
  782 + }
  783 + else
  784 + {
  785 + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
  786 + object_description,
  787 + input->getLastOffset(),
  788 + "unknown token while reading object (" +
  789 + value + ")");
  790 + }
  791 + }
  792 + break;
  793 +
  794 + case QPDFTokenizer::tt_string:
  795 + {
  796 + std::string val = token.getValue();
  797 + if (decrypter)
  798 + {
  799 + decrypter->decryptString(val);
  800 + }
  801 + object = QPDFObjectHandle::newString(val);
  802 + }
  803 +
  804 + break;
  805 +
  806 + default:
  807 + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
  808 + object_description,
  809 + input->getLastOffset(),
  810 + "unknown token type while reading object");
  811 + break;
  812 + }
  813 +
  814 + if (in_dictionary || in_array)
  815 + {
  816 + if (! done)
  817 + {
  818 + olist.push_back(object);
  819 + }
  820 + }
  821 + else if (! object.isInitialized())
  822 + {
  823 + throw std::logic_error(
  824 + "INTERNAL ERROR: uninitialized object (token = " +
  825 + QUtil::int_to_string(token.getType()) +
  826 + ", " + token.getValue() + ")");
  827 + }
  828 + else
  829 + {
  830 + done = true;
  831 + }
  832 + }
  833 +
  834 + if (in_array)
  835 + {
  836 + object = newArray(olist);
  837 + }
  838 + else if (in_dictionary)
  839 + {
  840 + // Convert list to map. Alternating elements are keys.
  841 + std::map<std::string, QPDFObjectHandle> dict;
  842 + if (olist.size() % 2)
  843 + {
  844 + QTC::TC("qpdf", "QPDFObjectHandle dictionary odd number of elements");
  845 + throw QPDFExc(
  846 + qpdf_e_damaged_pdf, input->getName(),
  847 + object_description, input->getLastOffset(),
  848 + "dictionary ending here has an odd number of elements");
  849 + }
  850 + for (unsigned int i = 0; i < olist.size(); i += 2)
  851 + {
  852 + QPDFObjectHandle key_obj = olist[i];
  853 + QPDFObjectHandle val = olist[i + 1];
  854 + if (! key_obj.isName())
  855 + {
  856 + throw QPDFExc(
  857 + qpdf_e_damaged_pdf,
  858 + input->getName(), object_description, offset,
  859 + std::string("dictionary key not name (") +
  860 + key_obj.unparse() + ")");
  861 + }
  862 + dict[key_obj.getName()] = val;
  863 + }
  864 + object = newDictionary(dict);
  865 + }
  866 +
  867 + return object;
  868 +}
  869 +
  870 +QPDFObjectHandle
602 QPDFObjectHandle::newIndirect(QPDF* qpdf, int objid, int generation) 871 QPDFObjectHandle::newIndirect(QPDF* qpdf, int objid, int generation)
603 { 872 {
604 return QPDFObjectHandle(qpdf, objid, generation); 873 return QPDFObjectHandle(qpdf, objid, generation);
libqpdf/QPDF_Stream.cc
@@ -464,3 +464,18 @@ QPDF_Stream::replaceFilterData(QPDFObjectHandle const&amp; filter, @@ -464,3 +464,18 @@ QPDF_Stream::replaceFilterData(QPDFObjectHandle const&amp; filter,
464 "/Length", QPDFObjectHandle::newInteger((int)length)); 464 "/Length", QPDFObjectHandle::newInteger((int)length));
465 } 465 }
466 } 466 }
  467 +
  468 +void
  469 +QPDF_Stream::replaceDict(QPDFObjectHandle new_dict)
  470 +{
  471 + this->stream_dict = new_dict;
  472 + QPDFObjectHandle length_obj = new_dict.getKey("/Length");
  473 + if (length_obj.isInteger())
  474 + {
  475 + this->length = length_obj.getIntValue();
  476 + }
  477 + else
  478 + {
  479 + this->length = 0;
  480 + }
  481 +}
libqpdf/qpdf/QPDF_Stream.hh
@@ -32,6 +32,8 @@ class QPDF_Stream: public QPDFObject @@ -32,6 +32,8 @@ class QPDF_Stream: public QPDFObject
32 QPDFObjectHandle const& filter, 32 QPDFObjectHandle const& filter,
33 QPDFObjectHandle const& decode_parms); 33 QPDFObjectHandle const& decode_parms);
34 34
  35 + void replaceDict(QPDFObjectHandle new_dict);
  36 +
35 // Replace object ID and generation. This may only be called if 37 // Replace object ID and generation. This may only be called if
36 // object ID and generation are 0. It is used by QPDFObjectHandle 38 // object ID and generation are 0. It is used by QPDFObjectHandle
37 // when adding streams to files. 39 // when adding streams to files.
qpdf/pdf_from_scratch.cc
@@ -38,25 +38,20 @@ void runtest(int n) @@ -38,25 +38,20 @@ void runtest(int n)
38 // Create a minimal PDF from scratch. 38 // Create a minimal PDF from scratch.
39 39
40 QPDFObjectHandle font = pdf.makeIndirectObject( 40 QPDFObjectHandle font = pdf.makeIndirectObject(
41 - QPDFObjectHandle::newDictionary());  
42 - font.replaceKey("/Type", newName("/Font"));  
43 - font.replaceKey("/Subtype", newName("/Type1"));  
44 - font.replaceKey("/Name", newName("/F1"));  
45 - font.replaceKey("/BaseFont", newName("/Helvetica"));  
46 - font.replaceKey("/Encoding", newName("/WinAnsiEncoding")); 41 + QPDFObjectHandle::parse("<<"
  42 + " /Type /Font"
  43 + " /Subtype /Type1"
  44 + " /Name /F1"
  45 + " /BaseFont /Helvetica"
  46 + " /Encoding /WinAnsiEncoding"
  47 + ">>"));
47 48
48 QPDFObjectHandle procset = pdf.makeIndirectObject( 49 QPDFObjectHandle procset = pdf.makeIndirectObject(
49 - QPDFObjectHandle::newArray());  
50 - procset.appendItem(newName("/PDF"));  
51 - procset.appendItem(newName("/Text")); 50 + QPDFObjectHandle::parse("[/PDF /Text]"));
52 51
53 QPDFObjectHandle contents = createPageContents(pdf, "First Page"); 52 QPDFObjectHandle contents = createPageContents(pdf, "First Page");
54 53
55 - QPDFObjectHandle mediabox = QPDFObjectHandle::newArray();  
56 - mediabox.appendItem(QPDFObjectHandle::newInteger(0));  
57 - mediabox.appendItem(QPDFObjectHandle::newInteger(0));  
58 - mediabox.appendItem(QPDFObjectHandle::newInteger(612));  
59 - mediabox.appendItem(QPDFObjectHandle::newInteger(792)); 54 + QPDFObjectHandle mediabox = QPDFObjectHandle::parse("[0 0 612 792]");
60 55
61 QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary(); 56 QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary();
62 rfont.replaceKey("/F1", font); 57 rfont.replaceKey("/F1", font);
qpdf/qpdf.testcov
@@ -60,13 +60,13 @@ QPDF missing trailer 0 @@ -60,13 +60,13 @@ QPDF missing trailer 0
60 QPDF trailer lacks size 0 60 QPDF trailer lacks size 0
61 QPDF trailer size not integer 0 61 QPDF trailer size not integer 0
62 QPDF trailer prev not integer 0 62 QPDF trailer prev not integer 0
63 -QPDF bad brace 0  
64 -QPDF bad array close 0  
65 -QPDF dictionary odd number of elements 0 63 +QPDFObjectHandle bad brace 0
  64 +QPDFObjectHandle bad array close 0
  65 +QPDFObjectHandle dictionary odd number of elements 0
66 QPDF stream without length 0 66 QPDF stream without length 0
67 QPDF stream length not integer 0 67 QPDF stream length not integer 0
68 QPDF missing endstream 0 68 QPDF missing endstream 0
69 -QPDF bad dictionary close 0 69 +QPDFObjectHandle bad dictionary close 0
70 QPDF can't find xref 0 70 QPDF can't find xref 0
71 QPDF_Tokenizer bad ) 0 71 QPDF_Tokenizer bad ) 0
72 QPDF_Tokenizer bad > 0 72 QPDF_Tokenizer bad > 0
@@ -235,3 +235,5 @@ QPDF not copying pages object 0 @@ -235,3 +235,5 @@ QPDF not copying pages object 0
235 QPDF insert foreign page 0 235 QPDF insert foreign page 0
236 QPDFWriter foreign object 0 236 QPDFWriter foreign object 0
237 QPDFWriter copy use_aes 1 237 QPDFWriter copy use_aes 1
  238 +QPDFObjectHandle indirect without context 0
  239 +QPDFObjectHandle trailing data in parse 0
qpdf/qtest/qpdf.test
@@ -149,7 +149,7 @@ $td-&gt;runtest(&quot;remove page we don&#39;t have&quot;, @@ -149,7 +149,7 @@ $td-&gt;runtest(&quot;remove page we don&#39;t have&quot;,
149 $td->NORMALIZE_NEWLINES); 149 $td->NORMALIZE_NEWLINES);
150 # ---------- 150 # ----------
151 $td->notify("--- Miscellaneous Tests ---"); 151 $td->notify("--- Miscellaneous Tests ---");
152 -$n_tests += 44; 152 +$n_tests += 45;
153 153
154 $td->runtest("qpdf version", 154 $td->runtest("qpdf version",
155 {$td->COMMAND => "qpdf --version"}, 155 {$td->COMMAND => "qpdf --version"},
@@ -370,6 +370,10 @@ $td-&gt;runtest(&quot;detect foreign object in write&quot;, @@ -370,6 +370,10 @@ $td-&gt;runtest(&quot;detect foreign object in write&quot;,
370 " copy-foreign-objects-in.pdf minimal.pdf"}, 370 " copy-foreign-objects-in.pdf minimal.pdf"},
371 {$td->FILE => "foreign-in-write.out", $td->EXIT_STATUS => 0}, 371 {$td->FILE => "foreign-in-write.out", $td->EXIT_STATUS => 0},
372 $td->NORMALIZE_NEWLINES); 372 $td->NORMALIZE_NEWLINES);
  373 +$td->runtest("parse objects from string",
  374 + {$td->COMMAND => "test_driver 31 minimal.pdf"}, # file not used
  375 + {$td->FILE => "parse-object.out", $td->EXIT_STATUS => 0},
  376 + $td->NORMALIZE_NEWLINES);
373 377
374 show_ntests(); 378 show_ntests();
375 # ---------- 379 # ----------
qpdf/qtest/qpdf/bad22.out
1 -bad22.pdf (object 4 0, file position 317): stream dictionary lacks /Length key 1 +bad22.pdf (object 4 0, file position 314): stream dictionary lacks /Length key
qpdf/qtest/qpdf/bad23.out
1 -bad23.pdf (object 4 0, file position 317): /Length key in stream dictionary is not an integer 1 +bad23.pdf (object 4 0, file position 314): /Length key in stream dictionary is not an integer
qpdf/qtest/qpdf/parse-object.out 0 → 100644
  1 +[ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ]
  2 +logic error parsing indirect: QPDFObjectHandle::parse called without context on an object with indirect references
  3 +trailing data: parsed object (trailing test): trailing data found parsing object from string
  4 +test 31 done
qpdf/test_driver.cc
@@ -1054,6 +1054,38 @@ void runtest(int n, char const* filename1, char const* filename2) @@ -1054,6 +1054,38 @@ void runtest(int n, char const* filename1, char const* filename2)
1054 << std::endl; 1054 << std::endl;
1055 } 1055 }
1056 } 1056 }
  1057 + else if (n == 31)
  1058 + {
  1059 + // Test object parsing from a string. The input file is not used.
  1060 +
  1061 + QPDFObjectHandle o1 =
  1062 + QPDFObjectHandle::parse(
  1063 + "[/name 16059 3.14159 false\n"
  1064 + " << /key true /other [ (string1) (string2) ] >> null]");
  1065 + std::cout << o1.unparse() << std::endl;
  1066 + QPDFObjectHandle o2 = QPDFObjectHandle::parse(" 12345 \f ");
  1067 + assert(o2.isInteger() && (o2.getIntValue() == 12345));
  1068 + try
  1069 + {
  1070 + QPDFObjectHandle::parse("[1 0 R]", "indirect test");
  1071 + std::cout << "oops -- didn't throw" << std::endl;
  1072 + }
  1073 + catch (std::logic_error e)
  1074 + {
  1075 + std::cout << "logic error parsing indirect: " << e.what()
  1076 + << std::endl;
  1077 + }
  1078 + try
  1079 + {
  1080 + QPDFObjectHandle::parse("0 trailing", "trailing test");
  1081 + std::cout << "oops -- didn't throw" << std::endl;
  1082 + }
  1083 + catch (std::runtime_error e)
  1084 + {
  1085 + std::cout << "trailing data: " << e.what()
  1086 + << std::endl;
  1087 + }
  1088 + }
1057 else 1089 else
1058 { 1090 {
1059 throw std::runtime_error(std::string("invalid test ") + 1091 throw std::runtime_error(std::string("invalid test ") +