Commit 7e7a9c437982b7ede2af9cd0b12b3e47b4bc3a3d

Authored by Jay Berkenbilt
1 parent be0ed6ab

Parse objects; stream data is not yet handled

@@ -54,6 +54,15 @@ Soon: Break ground on "Document-level work" @@ -54,6 +54,15 @@ Soon: Break ground on "Document-level work"
54 Output JSON v2 54 Output JSON v2
55 ============== 55 ==============
56 56
  57 +XXX
  58 +
  59 +* Reread from perspective of update
  60 +* Test all ignore cases with QTC
  61 +* Test case of correct file with dict before data/datafile
  62 +* Have a test case if possible that exercises the object description
  63 + which means we need some kind of semantic error that gets caught
  64 + after creation.
  65 +
57 Try to never flatten pages tree. Make sure we do something reasonable 66 Try to never flatten pages tree. Make sure we do something reasonable
58 with pages tree repair. The problem is that if pages tree repair is 67 with pages tree repair. The problem is that if pages tree repair is
59 done as a side effect of running --json, the qpdf part of the json may 68 done as a side effect of running --json, the qpdf part of the json may
include/qpdf/QPDF.hh
@@ -998,7 +998,7 @@ class QPDF @@ -998,7 +998,7 @@ class QPDF
998 class JSONReactor: public JSON::Reactor 998 class JSONReactor: public JSON::Reactor
999 { 999 {
1000 public: 1000 public:
1001 - JSONReactor(QPDF&, bool must_be_complete); 1001 + JSONReactor(QPDF&, std::string const& filename, bool must_be_complete);
1002 virtual ~JSONReactor() = default; 1002 virtual ~JSONReactor() = default;
1003 virtual void dictionaryStart() override; 1003 virtual void dictionaryStart() override;
1004 virtual void arrayStart() override; 1004 virtual void arrayStart() override;
@@ -1008,31 +1008,51 @@ class QPDF @@ -1008,31 +1008,51 @@ class QPDF
1008 dictionaryItem(std::string const& key, JSON const& value) override; 1008 dictionaryItem(std::string const& key, JSON const& value) override;
1009 virtual bool arrayItem(JSON const& value) override; 1009 virtual bool arrayItem(JSON const& value) override;
1010 1010
  1011 + bool anyErrors() const;
  1012 +
1011 private: 1013 private:
1012 enum state_e { 1014 enum state_e {
1013 st_initial, 1015 st_initial,
1014 st_top, 1016 st_top,
1015 - st_ignore,  
1016 st_qpdf, 1017 st_qpdf,
1017 - st_objects_top,  
1018 - st_trailer_top, 1018 + st_objects,
  1019 + st_trailer,
1019 st_object_top, 1020 st_object_top,
1020 st_stream, 1021 st_stream,
1021 st_object, 1022 st_object,
  1023 + st_ignore,
1022 }; 1024 };
1023 1025
1024 void containerStart(); 1026 void containerStart();
1025 void nestedState(std::string const& key, JSON const& value, state_e); 1027 void nestedState(std::string const& key, JSON const& value, state_e);
  1028 + QPDFObjectHandle makeObject(JSON const& value);
  1029 + void error(size_t offset, std::string const& message);
  1030 + QPDFObjectHandle
  1031 + reserveObject(std::string const& obj, std::string const& gen);
  1032 + void replaceObject(
  1033 + QPDFObjectHandle to_replace, QPDFObjectHandle replacement);
1026 1034
1027 QPDF& pdf; 1035 QPDF& pdf;
  1036 + std::string filename;
1028 bool must_be_complete; 1037 bool must_be_complete;
  1038 + bool errors;
  1039 + bool parse_error;
1029 bool saw_qpdf; 1040 bool saw_qpdf;
  1041 + bool saw_objects;
1030 bool saw_json_version; 1042 bool saw_json_version;
1031 bool saw_pdf_version; 1043 bool saw_pdf_version;
1032 bool saw_trailer; 1044 bool saw_trailer;
1033 state_e state; 1045 state_e state;
1034 state_e next_state; 1046 state_e next_state;
  1047 + std::string cur_object;
  1048 + bool saw_value;
  1049 + bool saw_stream;
  1050 + bool saw_dict;
  1051 + bool saw_data;
  1052 + bool saw_datafile;
1035 std::vector<state_e> state_stack; 1053 std::vector<state_e> state_stack;
  1054 + std::vector<QPDFObjectHandle> object_stack;
  1055 + std::set<QPDFObjGen> reserved;
1036 }; 1056 };
1037 friend class JSONReactor; 1057 friend class JSONReactor;
1038 1058
@@ -1080,6 +1100,7 @@ class QPDF @@ -1080,6 +1100,7 @@ class QPDF
1080 void resolveObjectsInStream(int obj_stream_number); 1100 void resolveObjectsInStream(int obj_stream_number);
1081 void stopOnError(std::string const& message); 1101 void stopOnError(std::string const& message);
1082 QPDFObjectHandle reserveObjectIfNotExists(int objid, int gen); 1102 QPDFObjectHandle reserveObjectIfNotExists(int objid, int gen);
  1103 + QPDFObjectHandle reserveStream(int objid, int gen);
1083 1104
1084 // Calls finish() on the pipeline when done but does not delete it 1105 // Calls finish() on the pipeline when done but does not delete it
1085 bool pipeStreamData( 1106 bool pipeStreamData(
include/qpdf/QPDFObjectHandle.hh
@@ -1431,7 +1431,6 @@ class QPDFObjectHandle @@ -1431,7 +1431,6 @@ class QPDFObjectHandle
1431 { 1431 {
1432 return QPDFObjectHandle::newIndirect(qpdf, objid, generation); 1432 return QPDFObjectHandle::newIndirect(qpdf, objid, generation);
1433 } 1433 }
1434 - // object must be dictionary object  
1435 static QPDFObjectHandle 1434 static QPDFObjectHandle
1436 newStream( 1435 newStream(
1437 QPDF* qpdf, 1436 QPDF* qpdf,
libqpdf/QPDF.cc
@@ -2167,6 +2167,13 @@ QPDF::reserveObjectIfNotExists(int objid, int gen) @@ -2167,6 +2167,13 @@ QPDF::reserveObjectIfNotExists(int objid, int gen)
2167 } 2167 }
2168 2168
2169 QPDFObjectHandle 2169 QPDFObjectHandle
  2170 +QPDF::reserveStream(int objid, int gen)
  2171 +{
  2172 + return QPDFObjectHandle::Factory::newStream(
  2173 + this, objid, gen, QPDFObjectHandle::newDictionary(), 0, 0);
  2174 +}
  2175 +
  2176 +QPDFObjectHandle
2170 QPDF::getObjectByObjGen(QPDFObjGen const& og) 2177 QPDF::getObjectByObjGen(QPDFObjGen const& og)
2171 { 2178 {
2172 return getObjectByID(og.getObj(), og.getGen()); 2179 return getObjectByID(og.getObj(), og.getGen());
libqpdf/QPDF_json.cc
1 #include <qpdf/QPDF.hh> 1 #include <qpdf/QPDF.hh>
2 2
3 #include <qpdf/FileInputSource.hh> 3 #include <qpdf/FileInputSource.hh>
  4 +#include <qpdf/QIntC.hh>
4 #include <qpdf/QTC.hh> 5 #include <qpdf/QTC.hh>
5 #include <qpdf/QUtil.hh> 6 #include <qpdf/QUtil.hh>
6 #include <regex> 7 #include <regex>
7 8
8 -namespace  
9 -{  
10 - class JSONExc: public std::runtime_error  
11 - {  
12 - public:  
13 - JSONExc(JSON const& value, std::string const& msg) :  
14 - std::runtime_error(  
15 - "offset " + QUtil::uint_to_string(value.getStart()) + ": " +  
16 - msg)  
17 - {  
18 - }  
19 - };  
20 -} // namespace 9 +// This chart shows an example of the state transitions that would
  10 +// occur in parsing a minimal file.
  11 +
  12 +// | st_initial
  13 +// { | -> st_top
  14 +// "qpdf": { | -> st_qpdf
  15 +// "objects": { | -> st_objects
  16 +// "obj:1 0 R": { | -> st_object_top
  17 +// "value": { | -> st_object
  18 +// "/Pages": "2 0 R", | ...
  19 +// "/Type": "/Catalog" | ...
  20 +// } | <- st_object_top
  21 +// }, | <- st_objects
  22 +// "obj:2 0 R": { | -> st_object_top
  23 +// "value": 12 | -> st_object
  24 +// } | <- st_object_top
  25 +// }, | <- st_objects
  26 +// "obj:4 0 R": { | -> st_object_top
  27 +// "stream": { | -> st_stream
  28 +// "data": "cG90YXRv", | ...
  29 +// "dict": { | -> st_object
  30 +// "/K": true | ...
  31 +// } | <- st_stream
  32 +// } | <- st_object_top
  33 +// }, | <- st_objects
  34 +// "trailer": { | -> st_trailer
  35 +// "value": { | -> st_object
  36 +// "/Root": "1 0 R", | ...
  37 +// "/Size": 7 | ...
  38 +// } | <- st_trailer
  39 +// } | <- st_objects
  40 +// } | <- st_qpdf
  41 +// } | <- st_top
  42 +// } | <- st_initial
  43 +
  44 +static char const* JSON_PDF = (
  45 + // force line break
  46 + "%PDF-1.3\n"
  47 + "xref\n"
  48 + "0 1\n"
  49 + "0000000000 65535 f \n"
  50 + "trailer << /Size 1 >>\n"
  51 + "startxref\n"
  52 + "9\n"
  53 + "%%EOF\n");
21 54
22 static std::regex PDF_VERSION_RE("^\\d+\\.\\d+$"); 55 static std::regex PDF_VERSION_RE("^\\d+\\.\\d+$");
23 static std::regex OBJ_KEY_RE("^obj:(\\d+) (\\d+) R$"); 56 static std::regex OBJ_KEY_RE("^obj:(\\d+) (\\d+) R$");
  57 +static std::regex INDIRECT_OBJ_RE("^(\\d+) (\\d+) R$");
  58 +static std::regex UNICODE_RE("^u:(.*)$");
  59 +static std::regex BINARY_RE("^b:((?:[0-9a-fA-F]{2})*)$");
  60 +static std::regex NAME_RE("^/.*$");
24 61
25 -QPDF::JSONReactor::JSONReactor(QPDF& pdf, bool must_be_complete) : 62 +QPDF::JSONReactor::JSONReactor(
  63 + QPDF& pdf, std::string const& filename, bool must_be_complete) :
26 pdf(pdf), 64 pdf(pdf),
  65 + filename(filename),
27 must_be_complete(must_be_complete), 66 must_be_complete(must_be_complete),
  67 + errors(false),
  68 + parse_error(false),
28 saw_qpdf(false), 69 saw_qpdf(false),
  70 + saw_objects(false),
29 saw_json_version(false), 71 saw_json_version(false),
30 saw_pdf_version(false), 72 saw_pdf_version(false),
31 saw_trailer(false), 73 saw_trailer(false),
32 state(st_initial), 74 state(st_initial),
33 - next_state(st_top) 75 + next_state(st_top),
  76 + saw_value(false),
  77 + saw_stream(false),
  78 + saw_dict(false),
  79 + saw_data(false),
  80 + saw_datafile(false)
34 { 81 {
35 state_stack.push_back(st_initial); 82 state_stack.push_back(st_initial);
36 } 83 }
37 84
38 void 85 void
  86 +QPDF::JSONReactor::error(size_t offset, std::string const& msg)
  87 +{
  88 + this->errors = true;
  89 + this->pdf.warn(
  90 + qpdf_e_json, this->cur_object, QIntC::to_offset(offset), msg);
  91 +}
  92 +
  93 +bool
  94 +QPDF::JSONReactor::anyErrors() const
  95 +{
  96 + return this->errors;
  97 +}
  98 +
  99 +void
39 QPDF::JSONReactor::containerStart() 100 QPDF::JSONReactor::containerStart()
40 { 101 {
41 state_stack.push_back(state); 102 state_stack.push_back(state);
@@ -46,7 +107,6 @@ void @@ -46,7 +107,6 @@ void
46 QPDF::JSONReactor::dictionaryStart() 107 QPDF::JSONReactor::dictionaryStart()
47 { 108 {
48 containerStart(); 109 containerStart();
49 - // QXXXQ  
50 } 110 }
51 111
52 void 112 void
@@ -57,7 +117,6 @@ QPDF::JSONReactor::arrayStart() @@ -57,7 +117,6 @@ QPDF::JSONReactor::arrayStart()
57 QTC::TC("qpdf", "QPDF_json top-level array"); 117 QTC::TC("qpdf", "QPDF_json top-level array");
58 throw std::runtime_error("QPDF JSON must be a dictionary"); 118 throw std::runtime_error("QPDF JSON must be a dictionary");
59 } 119 }
60 - // QXXXQ  
61 } 120 }
62 121
63 void 122 void
@@ -68,23 +127,102 @@ QPDF::JSONReactor::containerEnd(JSON const&amp; value) @@ -68,23 +127,102 @@ QPDF::JSONReactor::containerEnd(JSON const&amp; value)
68 if (state == st_initial) { 127 if (state == st_initial) {
69 if (!this->saw_qpdf) { 128 if (!this->saw_qpdf) {
70 QTC::TC("qpdf", "QPDF_json missing qpdf"); 129 QTC::TC("qpdf", "QPDF_json missing qpdf");
71 - throw std::runtime_error("\"qpdf\" object was not seen"); 130 + error(0, "\"qpdf\" object was not seen");
  131 + } else {
  132 + if (!this->saw_json_version) {
  133 + QTC::TC("qpdf", "QPDF_json missing json version");
  134 + error(0, "\"qpdf.jsonversion\" was not seen");
  135 + }
  136 + if (must_be_complete && !this->saw_pdf_version) {
  137 + QTC::TC("qpdf", "QPDF_json missing pdf version");
  138 + error(0, "\"qpdf.pdfversion\" was not seen");
  139 + }
  140 + if (!this->saw_objects) {
  141 + QTC::TC("qpdf", "QPDF_json missing objects");
  142 + error(0, "\"qpdf.objects\" was not seen");
  143 + } else {
  144 + if (must_be_complete && !this->saw_trailer) {
  145 + QTC::TC("qpdf", "QPDF_json missing trailer");
  146 + error(0, "\"qpdf.objects.trailer\" was not seen");
  147 + }
  148 + }
72 } 149 }
73 - if (!this->saw_json_version) {  
74 - QTC::TC("qpdf", "QPDF_json missing json version");  
75 - throw std::runtime_error("\"qpdf.jsonversion\" was not seen"); 150 + } else if (state == st_objects) {
  151 + if (parse_error) {
  152 + // ignore
  153 + } else if (cur_object == "trailer") {
  154 + if (!saw_value) {
  155 + QTC::TC("qpdf", "QPDF_json trailer no value");
  156 + error(value.getStart(), "\"trailer\" is missing \"value\"");
  157 + }
  158 + } else if (saw_value == saw_stream) {
  159 + QTC::TC("qpdf", "QPDF_json value stream both or neither");
  160 + error(
  161 + value.getStart(),
  162 + "object must have exactly one of \"value\" or \"stream\"");
76 } 163 }
77 - if (must_be_complete && !this->saw_pdf_version) {  
78 - QTC::TC("qpdf", "QPDF_json missing pdf version");  
79 - throw std::runtime_error("\"qpdf.pdfversion\" was not seen"); 164 + object_stack.clear();
  165 + this->cur_object = "";
  166 + this->saw_dict = false;
  167 + this->saw_data = false;
  168 + this->saw_datafile = false;
  169 + this->saw_value = false;
  170 + this->saw_stream = false;
  171 + } else if (state == st_object_top) {
  172 + if (saw_stream) {
  173 + if (!saw_dict) {
  174 + QTC::TC("qpdf", "QPDF_json stream no dict");
  175 + error(value.getStart(), "\"stream\" is missing \"dict\"");
  176 + }
  177 + if (must_be_complete) {
  178 + if (saw_data == saw_datafile) {
  179 + QTC::TC("qpdf", "QPDF_json data datafile both or neither");
  180 + error(
  181 + value.getStart(),
  182 + "\"stream\" must have exactly one of \"data\" or "
  183 + "\"datafile\"");
  184 + }
  185 + } else if (saw_data && saw_datafile) {
  186 + // QXXXQ
  187 + /// QTC::TC("qpdf", "QPDF_json data and datafile");
  188 + error(
  189 + value.getStart(),
  190 + "\"stream\" may at most one of \"data\" or \"datafile\"");
  191 + }
  192 + }
  193 + } else if ((state == st_stream) || (state == st_object)) {
  194 + if (!parse_error) {
  195 + object_stack.pop_back();
80 } 196 }
81 - if (must_be_complete && !this->saw_trailer) {  
82 - /// QTC::TC("qpdf", "QPDF_json missing trailer");  
83 - throw std::runtime_error("\"qpdf.objects.trailer\" was not seen"); 197 + } else if (state == st_qpdf) {
  198 + for (auto const& og: this->reserved) {
  199 + // QXXXQ
  200 + // QTC::TC("qpdf", "QPDF_json non-trivial null reserved");
  201 + this->pdf.replaceObject(og, QPDFObjectHandle::newNull());
84 } 202 }
  203 + this->reserved.clear();
85 } 204 }
  205 +}
86 206
87 - // QXXXQ 207 +QPDFObjectHandle
  208 +QPDF::JSONReactor::reserveObject(std::string const& obj, std::string const& gen)
  209 +{
  210 + int o = QUtil::string_to_int(obj.c_str());
  211 + int g = QUtil::string_to_int(gen.c_str());
  212 + auto oh = pdf.reserveObjectIfNotExists(o, g);
  213 + if (oh.isReserved()) {
  214 + this->reserved.insert(QPDFObjGen(o, g));
  215 + }
  216 + return oh;
  217 +}
  218 +
  219 +void
  220 +QPDF::JSONReactor::replaceObject(
  221 + QPDFObjectHandle to_replace, QPDFObjectHandle replacement)
  222 +{
  223 + auto og = to_replace.getObjGen();
  224 + this->reserved.erase(og);
  225 + this->pdf.replaceObject(og, replacement);
88 } 226 }
89 227
90 void 228 void
@@ -100,16 +238,20 @@ QPDF::JSONReactor::nestedState( @@ -100,16 +238,20 @@ QPDF::JSONReactor::nestedState(
100 { 238 {
101 // Use this method when the next state is for processing a nested 239 // Use this method when the next state is for processing a nested
102 // dictionary. 240 // dictionary.
103 - if (!value.isDictionary()) {  
104 - throw JSONExc(value, "\"" + key + "\" must be a dictionary"); 241 + if (value.isDictionary()) {
  242 + this->next_state = next;
  243 + } else {
  244 + error(value.getStart(), "\"" + key + "\" must be a dictionary");
  245 + this->next_state = st_ignore;
  246 + this->parse_error = true;
105 } 247 }
106 - this->next_state = next;  
107 } 248 }
108 249
109 bool 250 bool
110 QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) 251 QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
111 { 252 {
112 if (state == st_ignore) { 253 if (state == st_ignore) {
  254 + QTC::TC("qpdf", "QPDF_json ignoring in st_ignore");
113 // ignore 255 // ignore
114 } else if (state == st_top) { 256 } else if (state == st_top) {
115 if (key == "qpdf") { 257 if (key == "qpdf") {
@@ -118,6 +260,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value) @@ -118,6 +260,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
118 } else { 260 } else {
119 // Ignore all other fields for forward compatibility. 261 // Ignore all other fields for forward compatibility.
120 // Don't use nestedState since this can be any type. 262 // Don't use nestedState since this can be any type.
  263 + // QXXXQ QTC
121 next_state = st_ignore; 264 next_state = st_ignore;
122 } 265 }
123 } else if (state == st_qpdf) { 266 } else if (state == st_qpdf) {
@@ -126,7 +269,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value) @@ -126,7 +269,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
126 std::string v; 269 std::string v;
127 if (!(value.getNumber(v) && (v == "2"))) { 270 if (!(value.getNumber(v) && (v == "2"))) {
128 QTC::TC("qpdf", "QPDF_json bad json version"); 271 QTC::TC("qpdf", "QPDF_json bad json version");
129 - throw JSONExc(value, "only JSON version 2 is supported"); 272 + error(value.getStart(), "only JSON version 2 is supported");
130 } 273 }
131 } else if (key == "pdfversion") { 274 } else if (key == "pdfversion") {
132 this->saw_pdf_version = true; 275 this->saw_pdf_version = true;
@@ -141,81 +284,197 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value) @@ -141,81 +284,197 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
141 } 284 }
142 if (!version_okay) { 285 if (!version_okay) {
143 QTC::TC("qpdf", "QPDF_json bad pdf version"); 286 QTC::TC("qpdf", "QPDF_json bad pdf version");
144 - throw JSONExc(value, "invalid PDF version (must be x.y)"); 287 + error(value.getStart(), "invalid PDF version (must be x.y)");
145 } 288 }
146 } else if (key == "objects") { 289 } else if (key == "objects") {
147 - nestedState(key, value, st_objects_top); 290 + this->saw_objects = true;
  291 + nestedState(key, value, st_objects);
148 } else { 292 } else {
149 // ignore unknown keys for forward compatibility 293 // ignore unknown keys for forward compatibility
  294 + // QXXXQ QTC
  295 + next_state = st_ignore;
150 } 296 }
151 - } else if (state == st_objects_top) { 297 + } else if (state == st_objects) {
152 std::smatch m; 298 std::smatch m;
153 if (key == "trailer") { 299 if (key == "trailer") {
154 this->saw_trailer = true; 300 this->saw_trailer = true;
155 - nestedState(key, value, st_trailer_top);  
156 - // QXXXQ 301 + nestedState(key, value, st_trailer);
  302 + this->cur_object = "trailer";
157 } else if (std::regex_match(key, m, OBJ_KEY_RE)) { 303 } else if (std::regex_match(key, m, OBJ_KEY_RE)) {
  304 + // QXXXQ remember to handle null for delete
  305 + object_stack.push_back(reserveObject(m[1].str(), m[2].str()));
158 nestedState(key, value, st_object_top); 306 nestedState(key, value, st_object_top);
159 - // QXXXQ 307 + this->cur_object = key;
160 } else { 308 } else {
161 QTC::TC("qpdf", "QPDF_json bad object key"); 309 QTC::TC("qpdf", "QPDF_json bad object key");
162 - throw JSONExc(  
163 - value, "object key should be \"trailer\" or \"obj:n n R\""); 310 + error(
  311 + value.getStart(),
  312 + "object key should be \"trailer\" or \"obj:n n R\"");
  313 + next_state = st_ignore;
  314 + parse_error = true;
164 } 315 }
165 } else if (state == st_object_top) { 316 } else if (state == st_object_top) {
  317 + if (object_stack.size() == 0) {
  318 + throw std::logic_error("no object on stack in st_object_top");
  319 + }
  320 + auto tos = object_stack.back();
  321 + QPDFObjectHandle replacement;
166 if (key == "value") { 322 if (key == "value") {
167 // Don't use nestedState since this can have any type. 323 // Don't use nestedState since this can have any type.
  324 + this->saw_value = true;
168 next_state = st_object; 325 next_state = st_object;
169 - // QXXXQ 326 + replacement = makeObject(value);
  327 + replaceObject(tos, replacement);
170 } else if (key == "stream") { 328 } else if (key == "stream") {
  329 + this->saw_stream = true;
171 nestedState(key, value, st_stream); 330 nestedState(key, value, st_stream);
172 - // QXXXQ 331 + if (tos.isStream()) {
  332 + // QXXXQ reusing -- need QTC
  333 + } else {
  334 + replacement =
  335 + pdf.reserveStream(tos.getObjectID(), tos.getGeneration());
  336 + replaceObject(tos, replacement);
  337 + replacement.replaceStreamData(
  338 + "", "<<>>"_qpdf, "<<>>"_qpdf); // QXXXQ
  339 + }
173 } else { 340 } else {
174 // Ignore unknown keys for forward compatibility 341 // Ignore unknown keys for forward compatibility
  342 + // QXXXQ QTC
  343 + next_state = st_ignore;
175 } 344 }
176 - } else if (state == st_trailer_top) { 345 + if (replacement.isInitialized()) {
  346 + object_stack.pop_back();
  347 + object_stack.push_back(replacement);
  348 + }
  349 + } else if (state == st_trailer) {
177 if (key == "value") { 350 if (key == "value") {
  351 + this->saw_value = true;
178 // The trailer must be a dictionary, so we can use nestedState. 352 // The trailer must be a dictionary, so we can use nestedState.
179 nestedState("trailer.value", value, st_object); 353 nestedState("trailer.value", value, st_object);
180 - // QXXXQ 354 + this->pdf.m->trailer = makeObject(value);
181 } else if (key == "stream") { 355 } else if (key == "stream") {
  356 + // Don't need to set saw_stream here since there's already
  357 + // an error.
182 QTC::TC("qpdf", "QPDF_json trailer stream"); 358 QTC::TC("qpdf", "QPDF_json trailer stream");
183 - throw JSONExc(value, "the trailer may not be a stream"); 359 + error(value.getStart(), "the trailer may not be a stream");
  360 + next_state = st_ignore;
  361 + parse_error = true;
184 } else { 362 } else {
185 // Ignore unknown keys for forward compatibility 363 // Ignore unknown keys for forward compatibility
  364 + // QXXXQ QTC
  365 + next_state = st_ignore;
186 } 366 }
187 } else if (state == st_stream) { 367 } else if (state == st_stream) {
188 - if (key == "dict") { 368 + if (object_stack.size() == 0) {
  369 + throw std::logic_error("no object on stack in st_stream");
  370 + }
  371 + auto tos = object_stack.back();
  372 + if (!tos.isStream()) {
  373 + // QXXXQ QTC in update mode
  374 + error(value.getStart(), "this object is not a stream");
  375 + parse_error = true;
  376 + } else if (key == "dict") {
  377 + this->saw_dict = true;
189 // Since a stream dictionary must be a dictionary, we can 378 // Since a stream dictionary must be a dictionary, we can
190 // use nestedState to transition to st_value. 379 // use nestedState to transition to st_value.
191 nestedState("stream.dict", value, st_object); 380 nestedState("stream.dict", value, st_object);
192 - // QXXXQ 381 + auto dict = makeObject(value);
  382 + if (dict.isDictionary()) {
  383 + tos.replaceDict(dict);
  384 + } else {
  385 + // An error had already been given by nestedState
  386 + QTC::TC("qpdf", "QPDF_json stream dict not dict");
  387 + parse_error = true;
  388 + }
193 } else if (key == "data") { 389 } else if (key == "data") {
  390 + this->saw_data = true;
194 // QXXXQ 391 // QXXXQ
195 } else if (key == "datafile") { 392 } else if (key == "datafile") {
  393 + this->saw_datafile = true;
196 // QXXXQ 394 // QXXXQ
197 } else { 395 } else {
198 // Ignore unknown keys for forward compatibility. 396 // Ignore unknown keys for forward compatibility.
  397 + // QXXXQ QTC
199 next_state = st_ignore; 398 next_state = st_ignore;
200 } 399 }
201 } else if (state == st_object) { 400 } else if (state == st_object) {
202 - // QXXXQ 401 + if (!parse_error) {
  402 + auto dict = object_stack.back();
  403 + if (dict.isStream()) {
  404 + dict = dict.getDict();
  405 + }
  406 + dict.replaceKey(key, makeObject(value));
  407 + }
203 } else { 408 } else {
204 throw std::logic_error( 409 throw std::logic_error(
205 "QPDF_json: unknown state " + QUtil::int_to_string(state)); 410 "QPDF_json: unknown state " + QUtil::int_to_string(state));
206 } 411 }
207 -  
208 - // QXXXQ  
209 return true; 412 return true;
210 } 413 }
211 414
212 bool 415 bool
213 QPDF::JSONReactor::arrayItem(JSON const& value) 416 QPDF::JSONReactor::arrayItem(JSON const& value)
214 { 417 {
215 - // QXXXQ 418 + if (state == st_object) {
  419 + if (!parse_error) {
  420 + auto tos = object_stack.back();
  421 + tos.appendItem(makeObject(value));
  422 + }
  423 + }
216 return true; 424 return true;
217 } 425 }
218 426
  427 +QPDFObjectHandle
  428 +QPDF::JSONReactor::makeObject(JSON const& value)
  429 +{
  430 + QPDFObjectHandle result;
  431 + std::string str_v;
  432 + bool bool_v = false;
  433 + std::smatch m;
  434 + if (value.isDictionary()) {
  435 + result = QPDFObjectHandle::newDictionary();
  436 + object_stack.push_back(result);
  437 + } else if (value.isArray()) {
  438 + result = QPDFObjectHandle::newArray();
  439 + object_stack.push_back(result);
  440 + } else if (value.isNull()) {
  441 + result = QPDFObjectHandle::newNull();
  442 + } else if (value.getBool(bool_v)) {
  443 + result = QPDFObjectHandle::newBool(bool_v);
  444 + } else if (value.getNumber(str_v)) {
  445 + if (QUtil::is_long_long(str_v.c_str())) {
  446 + result = QPDFObjectHandle::newInteger(
  447 + QUtil::string_to_ll(str_v.c_str()));
  448 + } else {
  449 + result = QPDFObjectHandle::newReal(str_v);
  450 + }
  451 + } else if (value.getString(str_v)) {
  452 + if (std::regex_match(str_v, m, INDIRECT_OBJ_RE)) {
  453 + result = reserveObject(m[1].str(), m[2].str());
  454 + } else if (std::regex_match(str_v, m, UNICODE_RE)) {
  455 + result = QPDFObjectHandle::newUnicodeString(m[1].str());
  456 + } else if (std::regex_match(str_v, m, BINARY_RE)) {
  457 + result = QPDFObjectHandle::newString(QUtil::hex_decode(m[1].str()));
  458 + } else if (std::regex_match(str_v, m, NAME_RE)) {
  459 + result = QPDFObjectHandle::newName(str_v);
  460 + } else {
  461 + QTC::TC("qpdf", "QPDF_json unrecognized string value");
  462 + error(value.getStart(), "unrecognized string value");
  463 + result = QPDFObjectHandle::newNull();
  464 + }
  465 + }
  466 + if (!result.isInitialized()) {
  467 + throw std::logic_error(
  468 + "JSONReactor::makeObject didn't initialize the object");
  469 + }
  470 +
  471 + // QXXXQ include object number in description
  472 + result.setObjectDescription(
  473 + &this->pdf,
  474 + this->filename + " offset " + QUtil::uint_to_string(value.getStart()));
  475 + return result;
  476 +}
  477 +
219 void 478 void
220 QPDF::createFromJSON(std::string const& json_file) 479 QPDF::createFromJSON(std::string const& json_file)
221 { 480 {
@@ -225,6 +484,7 @@ QPDF::createFromJSON(std::string const&amp; json_file) @@ -225,6 +484,7 @@ QPDF::createFromJSON(std::string const&amp; json_file)
225 void 484 void
226 QPDF::createFromJSON(std::shared_ptr<InputSource> is) 485 QPDF::createFromJSON(std::shared_ptr<InputSource> is)
227 { 486 {
  487 + processMemoryFile(is->getName().c_str(), JSON_PDF, strlen(JSON_PDF));
228 importJSON(is, true); 488 importJSON(is, true);
229 } 489 }
230 490
@@ -243,10 +503,19 @@ QPDF::updateFromJSON(std::shared_ptr&lt;InputSource&gt; is) @@ -243,10 +503,19 @@ QPDF::updateFromJSON(std::shared_ptr&lt;InputSource&gt; is)
243 void 503 void
244 QPDF::importJSON(std::shared_ptr<InputSource> is, bool must_be_complete) 504 QPDF::importJSON(std::shared_ptr<InputSource> is, bool must_be_complete)
245 { 505 {
246 - JSONReactor reactor(*this, must_be_complete); 506 + JSONReactor reactor(*this, is->getName(), must_be_complete);
247 try { 507 try {
248 JSON::parse(*is, &reactor); 508 JSON::parse(*is, &reactor);
249 } catch (std::runtime_error& e) { 509 } catch (std::runtime_error& e) {
250 throw std::runtime_error(is->getName() + ": " + e.what()); 510 throw std::runtime_error(is->getName() + ": " + e.what());
251 } 511 }
  512 + if (reactor.anyErrors()) {
  513 + throw std::runtime_error(is->getName() + ": errors found in JSON");
  514 + }
  515 + // QXXXQ
  516 + // std::cout << "trailer:\n" << getTrailer().unparse() << std::endl;
  517 + // for (auto& oh: getAllObjects()) {
  518 + // std::cout << oh.unparse() << ":" << std::endl;
  519 + // std::cout << oh.unparseResolved() << std::endl;
  520 + // }
252 } 521 }
qpdf/qpdf.testcov
@@ -659,3 +659,12 @@ QPDF_json bad pdf version 0 @@ -659,3 +659,12 @@ QPDF_json bad pdf version 0
659 QPDF_json top-level array 0 659 QPDF_json top-level array 0
660 QPDF_json bad object key 0 660 QPDF_json bad object key 0
661 QPDF_json trailer stream 0 661 QPDF_json trailer stream 0
  662 +QPDF_json missing trailer 0
  663 +QPDF_json missing objects 0
  664 +QPDF_json ignoring in st_ignore 0
  665 +QPDF_json stream dict not dict 0
  666 +QPDF_json unrecognized string value 0
  667 +QPDF_json data datafile both or neither 0
  668 +QPDF_json stream no dict 0
  669 +QPDF_json trailer no value 0
  670 +QPDF_json value stream both or neither 0
qpdf/qtest/qpdf-json.test
@@ -33,6 +33,9 @@ my @badfiles = ( @@ -33,6 +33,9 @@ my @badfiles = (
33 'stream-dict-not-dict', 33 'stream-dict-not-dict',
34 'trailer-not-dict', 34 'trailer-not-dict',
35 'trailer-stream', 35 'trailer-stream',
  36 + 'missing-trailer',
  37 + 'missing-objects',
  38 + 'obj-key-errors',
36 ); 39 );
37 40
38 $n_tests += scalar(@badfiles); 41 $n_tests += scalar(@badfiles);
qpdf/qtest/qpdf/qjson-bad-json-version1.out
1 -qpdf: qjson-bad-json-version1.json: offset 98: only JSON version 2 is supported 1 +WARNING: qjson-bad-json-version1.json (offset 98): only JSON version 2 is supported
  2 +qpdf: qjson-bad-json-version1.json: errors found in JSON
qpdf/qtest/qpdf/qjson-bad-json-version2.out
1 -qpdf: qjson-bad-json-version2.json: offset 98: only JSON version 2 is supported 1 +WARNING: qjson-bad-json-version2.json (offset 98): only JSON version 2 is supported
  2 +qpdf: qjson-bad-json-version2.json: errors found in JSON
qpdf/qtest/qpdf/qjson-bad-object-key.out
1 -qpdf: qjson-bad-object-key.json: offset 181: object key should be "trailer" or "obj:n n R" 1 +WARNING: qjson-bad-object-key.json (offset 181): object key should be "trailer" or "obj:n n R"
  2 +qpdf: qjson-bad-object-key.json: errors found in JSON
qpdf/qtest/qpdf/qjson-bad-pdf-version1.out
1 -qpdf: qjson-bad-pdf-version1.json: offset 119: invalid PDF version (must be x.y) 1 +WARNING: qjson-bad-pdf-version1.json (offset 119): invalid PDF version (must be x.y)
  2 +qpdf: qjson-bad-pdf-version1.json: errors found in JSON
qpdf/qtest/qpdf/qjson-bad-pdf-version2.out
1 -qpdf: qjson-bad-pdf-version2.json: offset 119: invalid PDF version (must be x.y) 1 +WARNING: qjson-bad-pdf-version2.json (offset 119): invalid PDF version (must be x.y)
  2 +qpdf: qjson-bad-pdf-version2.json: errors found in JSON
qpdf/qtest/qpdf/qjson-missing-objects.json 0 → 100644
  1 +{
  2 + "version": 2,
  3 + "parameters": {
  4 + "decodelevel": "none"
  5 + },
  6 + "qpdf": {
  7 + "jsonversion": 2,
  8 + "pdfversion": "1.3",
  9 + "maxobjectid": 6
  10 + }
  11 +}
qpdf/qtest/qpdf/qjson-missing-objects.out 0 → 100644
  1 +WARNING: qjson-missing-objects.json: "qpdf.objects" was not seen
  2 +qpdf: qjson-missing-objects.json: errors found in JSON
qpdf/qtest/qpdf/qjson-missing-trailer.json 0 → 100644
  1 +{
  2 + "version": 2,
  3 + "parameters": {
  4 + "decodelevel": "none"
  5 + },
  6 + "qpdf": {
  7 + "jsonversion": 2,
  8 + "pdfversion": "1.3",
  9 + "maxobjectid": 6,
  10 + "objects": {
  11 + "obj:1 0 R": {
  12 + "value": {
  13 + "/Pages": "2 0 R",
  14 + "/Type": "/Catalog"
  15 + }
  16 + },
  17 + "obj:2 0 R": {
  18 + "value": {
  19 + "/Count": 1,
  20 + "/Kids": [
  21 + "3 0 R"
  22 + ],
  23 + "/Type": "/Pages"
  24 + }
  25 + },
  26 + "obj:3 0 R": {
  27 + "value": {
  28 + "/Contents": "4 0 R",
  29 + "/MediaBox": [
  30 + 0,
  31 + 0,
  32 + 612,
  33 + 792
  34 + ],
  35 + "/Parent": "2 0 R",
  36 + "/Resources": {
  37 + "/Font": {
  38 + "/F1": "6 0 R"
  39 + },
  40 + "/ProcSet": "5 0 R"
  41 + },
  42 + "/Type": "/Page"
  43 + }
  44 + },
  45 + "obj:4 0 R": {
  46 + "stream": {
  47 + "data": "QlQKICAvRjEgMjQgVGYKICA3MiA3MjAgVGQKICAoUG90YXRvKSBUagpFVAo=",
  48 + "dict": {}
  49 + }
  50 + },
  51 + "obj:5 0 R": {
  52 + "value": [
  53 + "/PDF",
  54 + "/Text"
  55 + ]
  56 + },
  57 + "obj:6 0 R": {
  58 + "value": {
  59 + "/BaseFont": "/Helvetica",
  60 + "/Encoding": "/WinAnsiEncoding",
  61 + "/Subtype": "/Type1",
  62 + "/Type": "/Font"
  63 + }
  64 + }
  65 + }
  66 + }
  67 +}
qpdf/qtest/qpdf/qjson-missing-trailer.out 0 → 100644
  1 +WARNING: qjson-missing-trailer.json: "qpdf.objects.trailer" was not seen
  2 +qpdf: qjson-missing-trailer.json: errors found in JSON
qpdf/qtest/qpdf/qjson-no-json-version.out
1 -qpdf: qjson-no-json-version.json: "qpdf.jsonversion" was not seen 1 +WARNING: qjson-no-json-version.json: "qpdf.jsonversion" was not seen
  2 +qpdf: qjson-no-json-version.json: errors found in JSON
qpdf/qtest/qpdf/qjson-no-pdf-version.out
1 -qpdf: qjson-no-pdf-version.json: "qpdf.pdfversion" was not seen 1 +WARNING: qjson-no-pdf-version.json: "qpdf.pdfversion" was not seen
  2 +qpdf: qjson-no-pdf-version.json: errors found in JSON
qpdf/qtest/qpdf/qjson-no-qpdf-object.out
1 -qpdf: qjson-no-qpdf-object.json: "qpdf" object was not seen 1 +WARNING: qjson-no-qpdf-object.json: "qpdf" object was not seen
  2 +qpdf: qjson-no-qpdf-object.json: errors found in JSON
qpdf/qtest/qpdf/qjson-obj-key-errors.json 0 → 100644
  1 +{
  2 + "version": 2,
  3 + "parameters": {
  4 + "decodelevel": "none"
  5 + },
  6 + "qpdf": {
  7 + "jsonversion": 2,
  8 + "pdfversion": "1.3",
  9 + "maxobjectid": 6,
  10 + "objects": {
  11 + "obj:1 0 R": {
  12 + "value": {
  13 + "/Pages": "2 0 R",
  14 + "/Type": "/Catalog"
  15 + }
  16 + },
  17 + "obj:2 0 R": {
  18 + "value": {
  19 + "/Count": 1,
  20 + "/Kids": [
  21 + "3 0 R"
  22 + ],
  23 + "/Type": "/Pages"
  24 + },
  25 + "stream": {
  26 + "data": "QlQKICAvRjEgMjQgVGYKICA3MiA3MjAgVGQKICAoUG90YXRvKSBUagpFVAo=",
  27 + "dict": {}
  28 + }
  29 + },
  30 + "obj:3 0 R": {
  31 + "potato": {
  32 + "salad": "ignored-so-no-string-error",
  33 + "nested": [1, 2, {"x": "y"}]
  34 + }
  35 + },
  36 + "obj:4 0 R": {
  37 + "stream": {
  38 + "potato": "u:salad"
  39 + }
  40 + },
  41 + "obj:5 0 R": {
  42 + "stream": {
  43 + "dict": {"/A": "/B"},
  44 + "data": "QlQKICAvRjEgMjQgVGYKICA3MiA3MjAgVGQKICAoUG90YXRvKSBUagpFVAo=",
  45 + "datafile": "abc"
  46 + }
  47 + },
  48 + "obj:6 0 R": {
  49 + "value": {
  50 + "/BaseFont": "/Helvetica",
  51 + "/Encoding": "/WinAnsiEncoding",
  52 + "/Subtype": "/Type1",
  53 + "/Type": "/Font"
  54 + }
  55 + },
  56 + "trailer": {
  57 + "potato": {
  58 + "/Root": "1 0 R",
  59 + "/Size": 7
  60 + }
  61 + }
  62 + }
  63 + }
  64 +}
qpdf/qtest/qpdf/qjson-obj-key-errors.out 0 → 100644
  1 +WARNING: qjson-obj-key-errors.json (obj:2 0 R, offset 302): object must have exactly one of "value" or "stream"
  2 +WARNING: qjson-obj-key-errors.json (obj:3 0 R, offset 600): object must have exactly one of "value" or "stream"
  3 +WARNING: qjson-obj-key-errors.json (obj:4 0 R, offset 768): "stream" is missing "dict"
  4 +WARNING: qjson-obj-key-errors.json (obj:4 0 R, offset 768): "stream" must have exactly one of "data" or "datafile"
  5 +WARNING: qjson-obj-key-errors.json (obj:5 0 R, offset 858): "stream" must have exactly one of "data" or "datafile"
  6 +WARNING: qjson-obj-key-errors.json (trailer, offset 1236): "trailer" is missing "value"
  7 +qpdf: qjson-obj-key-errors.json: errors found in JSON
qpdf/qtest/qpdf/qjson-object-not-dict.out
1 -qpdf: qjson-object-not-dict.json: offset 184: "obj:1 0 R" must be a dictionary 1 +WARNING: qjson-object-not-dict.json (offset 184): "obj:1 0 R" must be a dictionary
  2 +qpdf: qjson-object-not-dict.json: errors found in JSON
qpdf/qtest/qpdf/qjson-objects-not-dict.out
1 -qpdf: qjson-objects-not-dict.json: offset 77: "objects" must be a dictionary 1 +WARNING: qjson-objects-not-dict.json (offset 77): "objects" must be a dictionary
  2 +WARNING: qjson-objects-not-dict.json: "qpdf.objects.trailer" was not seen
  3 +qpdf: qjson-objects-not-dict.json: errors found in JSON
qpdf/qtest/qpdf/qjson-stream-dict-not-dict.out
1 -qpdf: qjson-stream-dict-not-dict.json: offset 137: "stream.dict" must be a dictionary 1 +WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 137): "stream.dict" must be a dictionary
  2 +WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 137): unrecognized string value
  3 +WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 117): "stream" must have exactly one of "data" or "datafile"
  4 +WARNING: qjson-stream-dict-not-dict.json: "qpdf.objects.trailer" was not seen
  5 +qpdf: qjson-stream-dict-not-dict.json: errors found in JSON
qpdf/qtest/qpdf/qjson-stream-not-dict.out
1 -qpdf: qjson-stream-not-dict.json: offset 118: "stream" must be a dictionary 1 +WARNING: qjson-stream-not-dict.json (obj:1 0 R, offset 118): "stream" must be a dictionary
  2 +WARNING: qjson-stream-not-dict.json: "qpdf.objects.trailer" was not seen
  3 +qpdf: qjson-stream-not-dict.json: errors found in JSON
qpdf/qtest/qpdf/qjson-trailer-not-dict.json
@@ -63,7 +63,7 @@ @@ -63,7 +63,7 @@
63 } 63 }
64 }, 64 },
65 "trailer": { 65 "trailer": {
66 - "value": false, 66 + "value": false
67 } 67 }
68 } 68 }
69 } 69 }
qpdf/qtest/qpdf/qjson-trailer-not-dict.out
1 -qpdf: qjson-trailer-not-dict.json: offset 1326: "trailer.value" must be a dictionary 1 +WARNING: qjson-trailer-not-dict.json (trailer, offset 1327): "trailer.value" must be a dictionary
  2 +qpdf: qjson-trailer-not-dict.json: errors found in JSON
qpdf/qtest/qpdf/qjson-trailer-stream.json
@@ -63,7 +63,7 @@ @@ -63,7 +63,7 @@
63 } 63 }
64 }, 64 },
65 "trailer": { 65 "trailer": {
66 - "stream": {}, 66 + "stream": {}
67 } 67 }
68 } 68 }
69 } 69 }
qpdf/qtest/qpdf/qjson-trailer-stream.out
1 -qpdf: qjson-trailer-stream.json: offset 1327: the trailer may not be a stream 1 +WARNING: qjson-trailer-stream.json (trailer, offset 1327): the trailer may not be a stream
  2 +qpdf: qjson-trailer-stream.json: errors found in JSON