Commit 7e7a9c437982b7ede2af9cd0b12b3e47b4bc3a3d

Authored by Jay Berkenbilt
1 parent be0ed6ab

Parse objects; stream data is not yet handled

... ... @@ -54,6 +54,15 @@ Soon: Break ground on "Document-level work"
54 54 Output JSON v2
55 55 ==============
56 56  
  57 +XXX
  58 +
  59 +* Reread from perspective of update
  60 +* Test all ignore cases with QTC
  61 +* Test case of correct file with dict before data/datafile
  62 +* Have a test case if possible that exercises the object description
  63 + which means we need some kind of semantic error that gets caught
  64 + after creation.
  65 +
57 66 Try to never flatten pages tree. Make sure we do something reasonable
58 67 with pages tree repair. The problem is that if pages tree repair is
59 68 done as a side effect of running --json, the qpdf part of the json may
... ...
include/qpdf/QPDF.hh
... ... @@ -998,7 +998,7 @@ class QPDF
998 998 class JSONReactor: public JSON::Reactor
999 999 {
1000 1000 public:
1001   - JSONReactor(QPDF&, bool must_be_complete);
  1001 + JSONReactor(QPDF&, std::string const& filename, bool must_be_complete);
1002 1002 virtual ~JSONReactor() = default;
1003 1003 virtual void dictionaryStart() override;
1004 1004 virtual void arrayStart() override;
... ... @@ -1008,31 +1008,51 @@ class QPDF
1008 1008 dictionaryItem(std::string const& key, JSON const& value) override;
1009 1009 virtual bool arrayItem(JSON const& value) override;
1010 1010  
  1011 + bool anyErrors() const;
  1012 +
1011 1013 private:
1012 1014 enum state_e {
1013 1015 st_initial,
1014 1016 st_top,
1015   - st_ignore,
1016 1017 st_qpdf,
1017   - st_objects_top,
1018   - st_trailer_top,
  1018 + st_objects,
  1019 + st_trailer,
1019 1020 st_object_top,
1020 1021 st_stream,
1021 1022 st_object,
  1023 + st_ignore,
1022 1024 };
1023 1025  
1024 1026 void containerStart();
1025 1027 void nestedState(std::string const& key, JSON const& value, state_e);
  1028 + QPDFObjectHandle makeObject(JSON const& value);
  1029 + void error(size_t offset, std::string const& message);
  1030 + QPDFObjectHandle
  1031 + reserveObject(std::string const& obj, std::string const& gen);
  1032 + void replaceObject(
  1033 + QPDFObjectHandle to_replace, QPDFObjectHandle replacement);
1026 1034  
1027 1035 QPDF& pdf;
  1036 + std::string filename;
1028 1037 bool must_be_complete;
  1038 + bool errors;
  1039 + bool parse_error;
1029 1040 bool saw_qpdf;
  1041 + bool saw_objects;
1030 1042 bool saw_json_version;
1031 1043 bool saw_pdf_version;
1032 1044 bool saw_trailer;
1033 1045 state_e state;
1034 1046 state_e next_state;
  1047 + std::string cur_object;
  1048 + bool saw_value;
  1049 + bool saw_stream;
  1050 + bool saw_dict;
  1051 + bool saw_data;
  1052 + bool saw_datafile;
1035 1053 std::vector<state_e> state_stack;
  1054 + std::vector<QPDFObjectHandle> object_stack;
  1055 + std::set<QPDFObjGen> reserved;
1036 1056 };
1037 1057 friend class JSONReactor;
1038 1058  
... ... @@ -1080,6 +1100,7 @@ class QPDF
1080 1100 void resolveObjectsInStream(int obj_stream_number);
1081 1101 void stopOnError(std::string const& message);
1082 1102 QPDFObjectHandle reserveObjectIfNotExists(int objid, int gen);
  1103 + QPDFObjectHandle reserveStream(int objid, int gen);
1083 1104  
1084 1105 // Calls finish() on the pipeline when done but does not delete it
1085 1106 bool pipeStreamData(
... ...
include/qpdf/QPDFObjectHandle.hh
... ... @@ -1431,7 +1431,6 @@ class QPDFObjectHandle
1431 1431 {
1432 1432 return QPDFObjectHandle::newIndirect(qpdf, objid, generation);
1433 1433 }
1434   - // object must be dictionary object
1435 1434 static QPDFObjectHandle
1436 1435 newStream(
1437 1436 QPDF* qpdf,
... ...
libqpdf/QPDF.cc
... ... @@ -2167,6 +2167,13 @@ QPDF::reserveObjectIfNotExists(int objid, int gen)
2167 2167 }
2168 2168  
2169 2169 QPDFObjectHandle
  2170 +QPDF::reserveStream(int objid, int gen)
  2171 +{
  2172 + return QPDFObjectHandle::Factory::newStream(
  2173 + this, objid, gen, QPDFObjectHandle::newDictionary(), 0, 0);
  2174 +}
  2175 +
  2176 +QPDFObjectHandle
2170 2177 QPDF::getObjectByObjGen(QPDFObjGen const& og)
2171 2178 {
2172 2179 return getObjectByID(og.getObj(), og.getGen());
... ...
libqpdf/QPDF_json.cc
1 1 #include <qpdf/QPDF.hh>
2 2  
3 3 #include <qpdf/FileInputSource.hh>
  4 +#include <qpdf/QIntC.hh>
4 5 #include <qpdf/QTC.hh>
5 6 #include <qpdf/QUtil.hh>
6 7 #include <regex>
7 8  
8   -namespace
9   -{
10   - class JSONExc: public std::runtime_error
11   - {
12   - public:
13   - JSONExc(JSON const& value, std::string const& msg) :
14   - std::runtime_error(
15   - "offset " + QUtil::uint_to_string(value.getStart()) + ": " +
16   - msg)
17   - {
18   - }
19   - };
20   -} // namespace
  9 +// This chart shows an example of the state transitions that would
  10 +// occur in parsing a minimal file.
  11 +
  12 +// | st_initial
  13 +// { | -> st_top
  14 +// "qpdf": { | -> st_qpdf
  15 +// "objects": { | -> st_objects
  16 +// "obj:1 0 R": { | -> st_object_top
  17 +// "value": { | -> st_object
  18 +// "/Pages": "2 0 R", | ...
  19 +// "/Type": "/Catalog" | ...
  20 +// } | <- st_object_top
  21 +// }, | <- st_objects
  22 +// "obj:2 0 R": { | -> st_object_top
  23 +// "value": 12 | -> st_object
  24 +// } | <- st_object_top
  25 +// }, | <- st_objects
  26 +// "obj:4 0 R": { | -> st_object_top
  27 +// "stream": { | -> st_stream
  28 +// "data": "cG90YXRv", | ...
  29 +// "dict": { | -> st_object
  30 +// "/K": true | ...
  31 +// } | <- st_stream
  32 +// } | <- st_object_top
  33 +// }, | <- st_objects
  34 +// "trailer": { | -> st_trailer
  35 +// "value": { | -> st_object
  36 +// "/Root": "1 0 R", | ...
  37 +// "/Size": 7 | ...
  38 +// } | <- st_trailer
  39 +// } | <- st_objects
  40 +// } | <- st_qpdf
  41 +// } | <- st_top
  42 +// } | <- st_initial
  43 +
  44 +static char const* JSON_PDF = (
  45 + // force line break
  46 + "%PDF-1.3\n"
  47 + "xref\n"
  48 + "0 1\n"
  49 + "0000000000 65535 f \n"
  50 + "trailer << /Size 1 >>\n"
  51 + "startxref\n"
  52 + "9\n"
  53 + "%%EOF\n");
21 54  
22 55 static std::regex PDF_VERSION_RE("^\\d+\\.\\d+$");
23 56 static std::regex OBJ_KEY_RE("^obj:(\\d+) (\\d+) R$");
  57 +static std::regex INDIRECT_OBJ_RE("^(\\d+) (\\d+) R$");
  58 +static std::regex UNICODE_RE("^u:(.*)$");
  59 +static std::regex BINARY_RE("^b:((?:[0-9a-fA-F]{2})*)$");
  60 +static std::regex NAME_RE("^/.*$");
24 61  
25   -QPDF::JSONReactor::JSONReactor(QPDF& pdf, bool must_be_complete) :
  62 +QPDF::JSONReactor::JSONReactor(
  63 + QPDF& pdf, std::string const& filename, bool must_be_complete) :
26 64 pdf(pdf),
  65 + filename(filename),
27 66 must_be_complete(must_be_complete),
  67 + errors(false),
  68 + parse_error(false),
28 69 saw_qpdf(false),
  70 + saw_objects(false),
29 71 saw_json_version(false),
30 72 saw_pdf_version(false),
31 73 saw_trailer(false),
32 74 state(st_initial),
33   - next_state(st_top)
  75 + next_state(st_top),
  76 + saw_value(false),
  77 + saw_stream(false),
  78 + saw_dict(false),
  79 + saw_data(false),
  80 + saw_datafile(false)
34 81 {
35 82 state_stack.push_back(st_initial);
36 83 }
37 84  
38 85 void
  86 +QPDF::JSONReactor::error(size_t offset, std::string const& msg)
  87 +{
  88 + this->errors = true;
  89 + this->pdf.warn(
  90 + qpdf_e_json, this->cur_object, QIntC::to_offset(offset), msg);
  91 +}
  92 +
  93 +bool
  94 +QPDF::JSONReactor::anyErrors() const
  95 +{
  96 + return this->errors;
  97 +}
  98 +
  99 +void
39 100 QPDF::JSONReactor::containerStart()
40 101 {
41 102 state_stack.push_back(state);
... ... @@ -46,7 +107,6 @@ void
46 107 QPDF::JSONReactor::dictionaryStart()
47 108 {
48 109 containerStart();
49   - // QXXXQ
50 110 }
51 111  
52 112 void
... ... @@ -57,7 +117,6 @@ QPDF::JSONReactor::arrayStart()
57 117 QTC::TC("qpdf", "QPDF_json top-level array");
58 118 throw std::runtime_error("QPDF JSON must be a dictionary");
59 119 }
60   - // QXXXQ
61 120 }
62 121  
63 122 void
... ... @@ -68,23 +127,102 @@ QPDF::JSONReactor::containerEnd(JSON const&amp; value)
68 127 if (state == st_initial) {
69 128 if (!this->saw_qpdf) {
70 129 QTC::TC("qpdf", "QPDF_json missing qpdf");
71   - throw std::runtime_error("\"qpdf\" object was not seen");
  130 + error(0, "\"qpdf\" object was not seen");
  131 + } else {
  132 + if (!this->saw_json_version) {
  133 + QTC::TC("qpdf", "QPDF_json missing json version");
  134 + error(0, "\"qpdf.jsonversion\" was not seen");
  135 + }
  136 + if (must_be_complete && !this->saw_pdf_version) {
  137 + QTC::TC("qpdf", "QPDF_json missing pdf version");
  138 + error(0, "\"qpdf.pdfversion\" was not seen");
  139 + }
  140 + if (!this->saw_objects) {
  141 + QTC::TC("qpdf", "QPDF_json missing objects");
  142 + error(0, "\"qpdf.objects\" was not seen");
  143 + } else {
  144 + if (must_be_complete && !this->saw_trailer) {
  145 + QTC::TC("qpdf", "QPDF_json missing trailer");
  146 + error(0, "\"qpdf.objects.trailer\" was not seen");
  147 + }
  148 + }
72 149 }
73   - if (!this->saw_json_version) {
74   - QTC::TC("qpdf", "QPDF_json missing json version");
75   - throw std::runtime_error("\"qpdf.jsonversion\" was not seen");
  150 + } else if (state == st_objects) {
  151 + if (parse_error) {
  152 + // ignore
  153 + } else if (cur_object == "trailer") {
  154 + if (!saw_value) {
  155 + QTC::TC("qpdf", "QPDF_json trailer no value");
  156 + error(value.getStart(), "\"trailer\" is missing \"value\"");
  157 + }
  158 + } else if (saw_value == saw_stream) {
  159 + QTC::TC("qpdf", "QPDF_json value stream both or neither");
  160 + error(
  161 + value.getStart(),
  162 + "object must have exactly one of \"value\" or \"stream\"");
76 163 }
77   - if (must_be_complete && !this->saw_pdf_version) {
78   - QTC::TC("qpdf", "QPDF_json missing pdf version");
79   - throw std::runtime_error("\"qpdf.pdfversion\" was not seen");
  164 + object_stack.clear();
  165 + this->cur_object = "";
  166 + this->saw_dict = false;
  167 + this->saw_data = false;
  168 + this->saw_datafile = false;
  169 + this->saw_value = false;
  170 + this->saw_stream = false;
  171 + } else if (state == st_object_top) {
  172 + if (saw_stream) {
  173 + if (!saw_dict) {
  174 + QTC::TC("qpdf", "QPDF_json stream no dict");
  175 + error(value.getStart(), "\"stream\" is missing \"dict\"");
  176 + }
  177 + if (must_be_complete) {
  178 + if (saw_data == saw_datafile) {
  179 + QTC::TC("qpdf", "QPDF_json data datafile both or neither");
  180 + error(
  181 + value.getStart(),
  182 + "\"stream\" must have exactly one of \"data\" or "
  183 + "\"datafile\"");
  184 + }
  185 + } else if (saw_data && saw_datafile) {
  186 + // QXXXQ
  187 + /// QTC::TC("qpdf", "QPDF_json data and datafile");
  188 + error(
  189 + value.getStart(),
  190 + "\"stream\" may at most one of \"data\" or \"datafile\"");
  191 + }
  192 + }
  193 + } else if ((state == st_stream) || (state == st_object)) {
  194 + if (!parse_error) {
  195 + object_stack.pop_back();
80 196 }
81   - if (must_be_complete && !this->saw_trailer) {
82   - /// QTC::TC("qpdf", "QPDF_json missing trailer");
83   - throw std::runtime_error("\"qpdf.objects.trailer\" was not seen");
  197 + } else if (state == st_qpdf) {
  198 + for (auto const& og: this->reserved) {
  199 + // QXXXQ
  200 + // QTC::TC("qpdf", "QPDF_json non-trivial null reserved");
  201 + this->pdf.replaceObject(og, QPDFObjectHandle::newNull());
84 202 }
  203 + this->reserved.clear();
85 204 }
  205 +}
86 206  
87   - // QXXXQ
  207 +QPDFObjectHandle
  208 +QPDF::JSONReactor::reserveObject(std::string const& obj, std::string const& gen)
  209 +{
  210 + int o = QUtil::string_to_int(obj.c_str());
  211 + int g = QUtil::string_to_int(gen.c_str());
  212 + auto oh = pdf.reserveObjectIfNotExists(o, g);
  213 + if (oh.isReserved()) {
  214 + this->reserved.insert(QPDFObjGen(o, g));
  215 + }
  216 + return oh;
  217 +}
  218 +
  219 +void
  220 +QPDF::JSONReactor::replaceObject(
  221 + QPDFObjectHandle to_replace, QPDFObjectHandle replacement)
  222 +{
  223 + auto og = to_replace.getObjGen();
  224 + this->reserved.erase(og);
  225 + this->pdf.replaceObject(og, replacement);
88 226 }
89 227  
90 228 void
... ... @@ -100,16 +238,20 @@ QPDF::JSONReactor::nestedState(
100 238 {
101 239 // Use this method when the next state is for processing a nested
102 240 // dictionary.
103   - if (!value.isDictionary()) {
104   - throw JSONExc(value, "\"" + key + "\" must be a dictionary");
  241 + if (value.isDictionary()) {
  242 + this->next_state = next;
  243 + } else {
  244 + error(value.getStart(), "\"" + key + "\" must be a dictionary");
  245 + this->next_state = st_ignore;
  246 + this->parse_error = true;
105 247 }
106   - this->next_state = next;
107 248 }
108 249  
109 250 bool
110 251 QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
111 252 {
112 253 if (state == st_ignore) {
  254 + QTC::TC("qpdf", "QPDF_json ignoring in st_ignore");
113 255 // ignore
114 256 } else if (state == st_top) {
115 257 if (key == "qpdf") {
... ... @@ -118,6 +260,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
118 260 } else {
119 261 // Ignore all other fields for forward compatibility.
120 262 // Don't use nestedState since this can be any type.
  263 + // QXXXQ QTC
121 264 next_state = st_ignore;
122 265 }
123 266 } else if (state == st_qpdf) {
... ... @@ -126,7 +269,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
126 269 std::string v;
127 270 if (!(value.getNumber(v) && (v == "2"))) {
128 271 QTC::TC("qpdf", "QPDF_json bad json version");
129   - throw JSONExc(value, "only JSON version 2 is supported");
  272 + error(value.getStart(), "only JSON version 2 is supported");
130 273 }
131 274 } else if (key == "pdfversion") {
132 275 this->saw_pdf_version = true;
... ... @@ -141,81 +284,197 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
141 284 }
142 285 if (!version_okay) {
143 286 QTC::TC("qpdf", "QPDF_json bad pdf version");
144   - throw JSONExc(value, "invalid PDF version (must be x.y)");
  287 + error(value.getStart(), "invalid PDF version (must be x.y)");
145 288 }
146 289 } else if (key == "objects") {
147   - nestedState(key, value, st_objects_top);
  290 + this->saw_objects = true;
  291 + nestedState(key, value, st_objects);
148 292 } else {
149 293 // ignore unknown keys for forward compatibility
  294 + // QXXXQ QTC
  295 + next_state = st_ignore;
150 296 }
151   - } else if (state == st_objects_top) {
  297 + } else if (state == st_objects) {
152 298 std::smatch m;
153 299 if (key == "trailer") {
154 300 this->saw_trailer = true;
155   - nestedState(key, value, st_trailer_top);
156   - // QXXXQ
  301 + nestedState(key, value, st_trailer);
  302 + this->cur_object = "trailer";
157 303 } else if (std::regex_match(key, m, OBJ_KEY_RE)) {
  304 + // QXXXQ remember to handle null for delete
  305 + object_stack.push_back(reserveObject(m[1].str(), m[2].str()));
158 306 nestedState(key, value, st_object_top);
159   - // QXXXQ
  307 + this->cur_object = key;
160 308 } else {
161 309 QTC::TC("qpdf", "QPDF_json bad object key");
162   - throw JSONExc(
163   - value, "object key should be \"trailer\" or \"obj:n n R\"");
  310 + error(
  311 + value.getStart(),
  312 + "object key should be \"trailer\" or \"obj:n n R\"");
  313 + next_state = st_ignore;
  314 + parse_error = true;
164 315 }
165 316 } else if (state == st_object_top) {
  317 + if (object_stack.size() == 0) {
  318 + throw std::logic_error("no object on stack in st_object_top");
  319 + }
  320 + auto tos = object_stack.back();
  321 + QPDFObjectHandle replacement;
166 322 if (key == "value") {
167 323 // Don't use nestedState since this can have any type.
  324 + this->saw_value = true;
168 325 next_state = st_object;
169   - // QXXXQ
  326 + replacement = makeObject(value);
  327 + replaceObject(tos, replacement);
170 328 } else if (key == "stream") {
  329 + this->saw_stream = true;
171 330 nestedState(key, value, st_stream);
172   - // QXXXQ
  331 + if (tos.isStream()) {
  332 + // QXXXQ reusing -- need QTC
  333 + } else {
  334 + replacement =
  335 + pdf.reserveStream(tos.getObjectID(), tos.getGeneration());
  336 + replaceObject(tos, replacement);
  337 + replacement.replaceStreamData(
  338 + "", "<<>>"_qpdf, "<<>>"_qpdf); // QXXXQ
  339 + }
173 340 } else {
174 341 // Ignore unknown keys for forward compatibility
  342 + // QXXXQ QTC
  343 + next_state = st_ignore;
175 344 }
176   - } else if (state == st_trailer_top) {
  345 + if (replacement.isInitialized()) {
  346 + object_stack.pop_back();
  347 + object_stack.push_back(replacement);
  348 + }
  349 + } else if (state == st_trailer) {
177 350 if (key == "value") {
  351 + this->saw_value = true;
178 352 // The trailer must be a dictionary, so we can use nestedState.
179 353 nestedState("trailer.value", value, st_object);
180   - // QXXXQ
  354 + this->pdf.m->trailer = makeObject(value);
181 355 } else if (key == "stream") {
  356 + // Don't need to set saw_stream here since there's already
  357 + // an error.
182 358 QTC::TC("qpdf", "QPDF_json trailer stream");
183   - throw JSONExc(value, "the trailer may not be a stream");
  359 + error(value.getStart(), "the trailer may not be a stream");
  360 + next_state = st_ignore;
  361 + parse_error = true;
184 362 } else {
185 363 // Ignore unknown keys for forward compatibility
  364 + // QXXXQ QTC
  365 + next_state = st_ignore;
186 366 }
187 367 } else if (state == st_stream) {
188   - if (key == "dict") {
  368 + if (object_stack.size() == 0) {
  369 + throw std::logic_error("no object on stack in st_stream");
  370 + }
  371 + auto tos = object_stack.back();
  372 + if (!tos.isStream()) {
  373 + // QXXXQ QTC in update mode
  374 + error(value.getStart(), "this object is not a stream");
  375 + parse_error = true;
  376 + } else if (key == "dict") {
  377 + this->saw_dict = true;
189 378 // Since a stream dictionary must be a dictionary, we can
190 379 // use nestedState to transition to st_value.
191 380 nestedState("stream.dict", value, st_object);
192   - // QXXXQ
  381 + auto dict = makeObject(value);
  382 + if (dict.isDictionary()) {
  383 + tos.replaceDict(dict);
  384 + } else {
  385 + // An error had already been given by nestedState
  386 + QTC::TC("qpdf", "QPDF_json stream dict not dict");
  387 + parse_error = true;
  388 + }
193 389 } else if (key == "data") {
  390 + this->saw_data = true;
194 391 // QXXXQ
195 392 } else if (key == "datafile") {
  393 + this->saw_datafile = true;
196 394 // QXXXQ
197 395 } else {
198 396 // Ignore unknown keys for forward compatibility.
  397 + // QXXXQ QTC
199 398 next_state = st_ignore;
200 399 }
201 400 } else if (state == st_object) {
202   - // QXXXQ
  401 + if (!parse_error) {
  402 + auto dict = object_stack.back();
  403 + if (dict.isStream()) {
  404 + dict = dict.getDict();
  405 + }
  406 + dict.replaceKey(key, makeObject(value));
  407 + }
203 408 } else {
204 409 throw std::logic_error(
205 410 "QPDF_json: unknown state " + QUtil::int_to_string(state));
206 411 }
207   -
208   - // QXXXQ
209 412 return true;
210 413 }
211 414  
212 415 bool
213 416 QPDF::JSONReactor::arrayItem(JSON const& value)
214 417 {
215   - // QXXXQ
  418 + if (state == st_object) {
  419 + if (!parse_error) {
  420 + auto tos = object_stack.back();
  421 + tos.appendItem(makeObject(value));
  422 + }
  423 + }
216 424 return true;
217 425 }
218 426  
  427 +QPDFObjectHandle
  428 +QPDF::JSONReactor::makeObject(JSON const& value)
  429 +{
  430 + QPDFObjectHandle result;
  431 + std::string str_v;
  432 + bool bool_v = false;
  433 + std::smatch m;
  434 + if (value.isDictionary()) {
  435 + result = QPDFObjectHandle::newDictionary();
  436 + object_stack.push_back(result);
  437 + } else if (value.isArray()) {
  438 + result = QPDFObjectHandle::newArray();
  439 + object_stack.push_back(result);
  440 + } else if (value.isNull()) {
  441 + result = QPDFObjectHandle::newNull();
  442 + } else if (value.getBool(bool_v)) {
  443 + result = QPDFObjectHandle::newBool(bool_v);
  444 + } else if (value.getNumber(str_v)) {
  445 + if (QUtil::is_long_long(str_v.c_str())) {
  446 + result = QPDFObjectHandle::newInteger(
  447 + QUtil::string_to_ll(str_v.c_str()));
  448 + } else {
  449 + result = QPDFObjectHandle::newReal(str_v);
  450 + }
  451 + } else if (value.getString(str_v)) {
  452 + if (std::regex_match(str_v, m, INDIRECT_OBJ_RE)) {
  453 + result = reserveObject(m[1].str(), m[2].str());
  454 + } else if (std::regex_match(str_v, m, UNICODE_RE)) {
  455 + result = QPDFObjectHandle::newUnicodeString(m[1].str());
  456 + } else if (std::regex_match(str_v, m, BINARY_RE)) {
  457 + result = QPDFObjectHandle::newString(QUtil::hex_decode(m[1].str()));
  458 + } else if (std::regex_match(str_v, m, NAME_RE)) {
  459 + result = QPDFObjectHandle::newName(str_v);
  460 + } else {
  461 + QTC::TC("qpdf", "QPDF_json unrecognized string value");
  462 + error(value.getStart(), "unrecognized string value");
  463 + result = QPDFObjectHandle::newNull();
  464 + }
  465 + }
  466 + if (!result.isInitialized()) {
  467 + throw std::logic_error(
  468 + "JSONReactor::makeObject didn't initialize the object");
  469 + }
  470 +
  471 + // QXXXQ include object number in description
  472 + result.setObjectDescription(
  473 + &this->pdf,
  474 + this->filename + " offset " + QUtil::uint_to_string(value.getStart()));
  475 + return result;
  476 +}
  477 +
219 478 void
220 479 QPDF::createFromJSON(std::string const& json_file)
221 480 {
... ... @@ -225,6 +484,7 @@ QPDF::createFromJSON(std::string const&amp; json_file)
225 484 void
226 485 QPDF::createFromJSON(std::shared_ptr<InputSource> is)
227 486 {
  487 + processMemoryFile(is->getName().c_str(), JSON_PDF, strlen(JSON_PDF));
228 488 importJSON(is, true);
229 489 }
230 490  
... ... @@ -243,10 +503,19 @@ QPDF::updateFromJSON(std::shared_ptr&lt;InputSource&gt; is)
243 503 void
244 504 QPDF::importJSON(std::shared_ptr<InputSource> is, bool must_be_complete)
245 505 {
246   - JSONReactor reactor(*this, must_be_complete);
  506 + JSONReactor reactor(*this, is->getName(), must_be_complete);
247 507 try {
248 508 JSON::parse(*is, &reactor);
249 509 } catch (std::runtime_error& e) {
250 510 throw std::runtime_error(is->getName() + ": " + e.what());
251 511 }
  512 + if (reactor.anyErrors()) {
  513 + throw std::runtime_error(is->getName() + ": errors found in JSON");
  514 + }
  515 + // QXXXQ
  516 + // std::cout << "trailer:\n" << getTrailer().unparse() << std::endl;
  517 + // for (auto& oh: getAllObjects()) {
  518 + // std::cout << oh.unparse() << ":" << std::endl;
  519 + // std::cout << oh.unparseResolved() << std::endl;
  520 + // }
252 521 }
... ...
qpdf/qpdf.testcov
... ... @@ -659,3 +659,12 @@ QPDF_json bad pdf version 0
659 659 QPDF_json top-level array 0
660 660 QPDF_json bad object key 0
661 661 QPDF_json trailer stream 0
  662 +QPDF_json missing trailer 0
  663 +QPDF_json missing objects 0
  664 +QPDF_json ignoring in st_ignore 0
  665 +QPDF_json stream dict not dict 0
  666 +QPDF_json unrecognized string value 0
  667 +QPDF_json data datafile both or neither 0
  668 +QPDF_json stream no dict 0
  669 +QPDF_json trailer no value 0
  670 +QPDF_json value stream both or neither 0
... ...
qpdf/qtest/qpdf-json.test
... ... @@ -33,6 +33,9 @@ my @badfiles = (
33 33 'stream-dict-not-dict',
34 34 'trailer-not-dict',
35 35 'trailer-stream',
  36 + 'missing-trailer',
  37 + 'missing-objects',
  38 + 'obj-key-errors',
36 39 );
37 40  
38 41 $n_tests += scalar(@badfiles);
... ...
qpdf/qtest/qpdf/qjson-bad-json-version1.out
1   -qpdf: qjson-bad-json-version1.json: offset 98: only JSON version 2 is supported
  1 +WARNING: qjson-bad-json-version1.json (offset 98): only JSON version 2 is supported
  2 +qpdf: qjson-bad-json-version1.json: errors found in JSON
... ...
qpdf/qtest/qpdf/qjson-bad-json-version2.out
1   -qpdf: qjson-bad-json-version2.json: offset 98: only JSON version 2 is supported
  1 +WARNING: qjson-bad-json-version2.json (offset 98): only JSON version 2 is supported
  2 +qpdf: qjson-bad-json-version2.json: errors found in JSON
... ...
qpdf/qtest/qpdf/qjson-bad-object-key.out
1   -qpdf: qjson-bad-object-key.json: offset 181: object key should be "trailer" or "obj:n n R"
  1 +WARNING: qjson-bad-object-key.json (offset 181): object key should be "trailer" or "obj:n n R"
  2 +qpdf: qjson-bad-object-key.json: errors found in JSON
... ...
qpdf/qtest/qpdf/qjson-bad-pdf-version1.out
1   -qpdf: qjson-bad-pdf-version1.json: offset 119: invalid PDF version (must be x.y)
  1 +WARNING: qjson-bad-pdf-version1.json (offset 119): invalid PDF version (must be x.y)
  2 +qpdf: qjson-bad-pdf-version1.json: errors found in JSON
... ...
qpdf/qtest/qpdf/qjson-bad-pdf-version2.out
1   -qpdf: qjson-bad-pdf-version2.json: offset 119: invalid PDF version (must be x.y)
  1 +WARNING: qjson-bad-pdf-version2.json (offset 119): invalid PDF version (must be x.y)
  2 +qpdf: qjson-bad-pdf-version2.json: errors found in JSON
... ...
qpdf/qtest/qpdf/qjson-missing-objects.json 0 → 100644
  1 +{
  2 + "version": 2,
  3 + "parameters": {
  4 + "decodelevel": "none"
  5 + },
  6 + "qpdf": {
  7 + "jsonversion": 2,
  8 + "pdfversion": "1.3",
  9 + "maxobjectid": 6
  10 + }
  11 +}
... ...
qpdf/qtest/qpdf/qjson-missing-objects.out 0 → 100644
  1 +WARNING: qjson-missing-objects.json: "qpdf.objects" was not seen
  2 +qpdf: qjson-missing-objects.json: errors found in JSON
... ...
qpdf/qtest/qpdf/qjson-missing-trailer.json 0 → 100644
  1 +{
  2 + "version": 2,
  3 + "parameters": {
  4 + "decodelevel": "none"
  5 + },
  6 + "qpdf": {
  7 + "jsonversion": 2,
  8 + "pdfversion": "1.3",
  9 + "maxobjectid": 6,
  10 + "objects": {
  11 + "obj:1 0 R": {
  12 + "value": {
  13 + "/Pages": "2 0 R",
  14 + "/Type": "/Catalog"
  15 + }
  16 + },
  17 + "obj:2 0 R": {
  18 + "value": {
  19 + "/Count": 1,
  20 + "/Kids": [
  21 + "3 0 R"
  22 + ],
  23 + "/Type": "/Pages"
  24 + }
  25 + },
  26 + "obj:3 0 R": {
  27 + "value": {
  28 + "/Contents": "4 0 R",
  29 + "/MediaBox": [
  30 + 0,
  31 + 0,
  32 + 612,
  33 + 792
  34 + ],
  35 + "/Parent": "2 0 R",
  36 + "/Resources": {
  37 + "/Font": {
  38 + "/F1": "6 0 R"
  39 + },
  40 + "/ProcSet": "5 0 R"
  41 + },
  42 + "/Type": "/Page"
  43 + }
  44 + },
  45 + "obj:4 0 R": {
  46 + "stream": {
  47 + "data": "QlQKICAvRjEgMjQgVGYKICA3MiA3MjAgVGQKICAoUG90YXRvKSBUagpFVAo=",
  48 + "dict": {}
  49 + }
  50 + },
  51 + "obj:5 0 R": {
  52 + "value": [
  53 + "/PDF",
  54 + "/Text"
  55 + ]
  56 + },
  57 + "obj:6 0 R": {
  58 + "value": {
  59 + "/BaseFont": "/Helvetica",
  60 + "/Encoding": "/WinAnsiEncoding",
  61 + "/Subtype": "/Type1",
  62 + "/Type": "/Font"
  63 + }
  64 + }
  65 + }
  66 + }
  67 +}
... ...
qpdf/qtest/qpdf/qjson-missing-trailer.out 0 → 100644
  1 +WARNING: qjson-missing-trailer.json: "qpdf.objects.trailer" was not seen
  2 +qpdf: qjson-missing-trailer.json: errors found in JSON
... ...
qpdf/qtest/qpdf/qjson-no-json-version.out
1   -qpdf: qjson-no-json-version.json: "qpdf.jsonversion" was not seen
  1 +WARNING: qjson-no-json-version.json: "qpdf.jsonversion" was not seen
  2 +qpdf: qjson-no-json-version.json: errors found in JSON
... ...
qpdf/qtest/qpdf/qjson-no-pdf-version.out
1   -qpdf: qjson-no-pdf-version.json: "qpdf.pdfversion" was not seen
  1 +WARNING: qjson-no-pdf-version.json: "qpdf.pdfversion" was not seen
  2 +qpdf: qjson-no-pdf-version.json: errors found in JSON
... ...
qpdf/qtest/qpdf/qjson-no-qpdf-object.out
1   -qpdf: qjson-no-qpdf-object.json: "qpdf" object was not seen
  1 +WARNING: qjson-no-qpdf-object.json: "qpdf" object was not seen
  2 +qpdf: qjson-no-qpdf-object.json: errors found in JSON
... ...
qpdf/qtest/qpdf/qjson-obj-key-errors.json 0 → 100644
  1 +{
  2 + "version": 2,
  3 + "parameters": {
  4 + "decodelevel": "none"
  5 + },
  6 + "qpdf": {
  7 + "jsonversion": 2,
  8 + "pdfversion": "1.3",
  9 + "maxobjectid": 6,
  10 + "objects": {
  11 + "obj:1 0 R": {
  12 + "value": {
  13 + "/Pages": "2 0 R",
  14 + "/Type": "/Catalog"
  15 + }
  16 + },
  17 + "obj:2 0 R": {
  18 + "value": {
  19 + "/Count": 1,
  20 + "/Kids": [
  21 + "3 0 R"
  22 + ],
  23 + "/Type": "/Pages"
  24 + },
  25 + "stream": {
  26 + "data": "QlQKICAvRjEgMjQgVGYKICA3MiA3MjAgVGQKICAoUG90YXRvKSBUagpFVAo=",
  27 + "dict": {}
  28 + }
  29 + },
  30 + "obj:3 0 R": {
  31 + "potato": {
  32 + "salad": "ignored-so-no-string-error",
  33 + "nested": [1, 2, {"x": "y"}]
  34 + }
  35 + },
  36 + "obj:4 0 R": {
  37 + "stream": {
  38 + "potato": "u:salad"
  39 + }
  40 + },
  41 + "obj:5 0 R": {
  42 + "stream": {
  43 + "dict": {"/A": "/B"},
  44 + "data": "QlQKICAvRjEgMjQgVGYKICA3MiA3MjAgVGQKICAoUG90YXRvKSBUagpFVAo=",
  45 + "datafile": "abc"
  46 + }
  47 + },
  48 + "obj:6 0 R": {
  49 + "value": {
  50 + "/BaseFont": "/Helvetica",
  51 + "/Encoding": "/WinAnsiEncoding",
  52 + "/Subtype": "/Type1",
  53 + "/Type": "/Font"
  54 + }
  55 + },
  56 + "trailer": {
  57 + "potato": {
  58 + "/Root": "1 0 R",
  59 + "/Size": 7
  60 + }
  61 + }
  62 + }
  63 + }
  64 +}
... ...
qpdf/qtest/qpdf/qjson-obj-key-errors.out 0 → 100644
  1 +WARNING: qjson-obj-key-errors.json (obj:2 0 R, offset 302): object must have exactly one of "value" or "stream"
  2 +WARNING: qjson-obj-key-errors.json (obj:3 0 R, offset 600): object must have exactly one of "value" or "stream"
  3 +WARNING: qjson-obj-key-errors.json (obj:4 0 R, offset 768): "stream" is missing "dict"
  4 +WARNING: qjson-obj-key-errors.json (obj:4 0 R, offset 768): "stream" must have exactly one of "data" or "datafile"
  5 +WARNING: qjson-obj-key-errors.json (obj:5 0 R, offset 858): "stream" must have exactly one of "data" or "datafile"
  6 +WARNING: qjson-obj-key-errors.json (trailer, offset 1236): "trailer" is missing "value"
  7 +qpdf: qjson-obj-key-errors.json: errors found in JSON
... ...
qpdf/qtest/qpdf/qjson-object-not-dict.out
1   -qpdf: qjson-object-not-dict.json: offset 184: "obj:1 0 R" must be a dictionary
  1 +WARNING: qjson-object-not-dict.json (offset 184): "obj:1 0 R" must be a dictionary
  2 +qpdf: qjson-object-not-dict.json: errors found in JSON
... ...
qpdf/qtest/qpdf/qjson-objects-not-dict.out
1   -qpdf: qjson-objects-not-dict.json: offset 77: "objects" must be a dictionary
  1 +WARNING: qjson-objects-not-dict.json (offset 77): "objects" must be a dictionary
  2 +WARNING: qjson-objects-not-dict.json: "qpdf.objects.trailer" was not seen
  3 +qpdf: qjson-objects-not-dict.json: errors found in JSON
... ...
qpdf/qtest/qpdf/qjson-stream-dict-not-dict.out
1   -qpdf: qjson-stream-dict-not-dict.json: offset 137: "stream.dict" must be a dictionary
  1 +WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 137): "stream.dict" must be a dictionary
  2 +WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 137): unrecognized string value
  3 +WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 117): "stream" must have exactly one of "data" or "datafile"
  4 +WARNING: qjson-stream-dict-not-dict.json: "qpdf.objects.trailer" was not seen
  5 +qpdf: qjson-stream-dict-not-dict.json: errors found in JSON
... ...
qpdf/qtest/qpdf/qjson-stream-not-dict.out
1   -qpdf: qjson-stream-not-dict.json: offset 118: "stream" must be a dictionary
  1 +WARNING: qjson-stream-not-dict.json (obj:1 0 R, offset 118): "stream" must be a dictionary
  2 +WARNING: qjson-stream-not-dict.json: "qpdf.objects.trailer" was not seen
  3 +qpdf: qjson-stream-not-dict.json: errors found in JSON
... ...
qpdf/qtest/qpdf/qjson-trailer-not-dict.json
... ... @@ -63,7 +63,7 @@
63 63 }
64 64 },
65 65 "trailer": {
66   - "value": false,
  66 + "value": false
67 67 }
68 68 }
69 69 }
... ...
qpdf/qtest/qpdf/qjson-trailer-not-dict.out
1   -qpdf: qjson-trailer-not-dict.json: offset 1326: "trailer.value" must be a dictionary
  1 +WARNING: qjson-trailer-not-dict.json (trailer, offset 1327): "trailer.value" must be a dictionary
  2 +qpdf: qjson-trailer-not-dict.json: errors found in JSON
... ...
qpdf/qtest/qpdf/qjson-trailer-stream.json
... ... @@ -63,7 +63,7 @@
63 63 }
64 64 },
65 65 "trailer": {
66   - "stream": {},
  66 + "stream": {}
67 67 }
68 68 }
69 69 }
... ...
qpdf/qtest/qpdf/qjson-trailer-stream.out
1   -qpdf: qjson-trailer-stream.json: offset 1327: the trailer may not be a stream
  1 +WARNING: qjson-trailer-stream.json (trailer, offset 1327): the trailer may not be a stream
  2 +qpdf: qjson-trailer-stream.json: errors found in JSON
... ...