Commit 7e7a9c437982b7ede2af9cd0b12b3e47b4bc3a3d
1 parent
be0ed6ab
Parse objects; stream data is not yet handled
Showing
29 changed files
with
562 additions
and
73 deletions
TODO
| ... | ... | @@ -54,6 +54,15 @@ Soon: Break ground on "Document-level work" |
| 54 | 54 | Output JSON v2 |
| 55 | 55 | ============== |
| 56 | 56 | |
| 57 | +XXX | |
| 58 | + | |
| 59 | +* Reread from perspective of update | |
| 60 | +* Test all ignore cases with QTC | |
| 61 | +* Test case of correct file with dict before data/datafile | |
| 62 | +* Have a test case if possible that exercises the object description | |
| 63 | + which means we need some kind of semantic error that gets caught | |
| 64 | + after creation. | |
| 65 | + | |
| 57 | 66 | Try to never flatten pages tree. Make sure we do something reasonable |
| 58 | 67 | with pages tree repair. The problem is that if pages tree repair is |
| 59 | 68 | done as a side effect of running --json, the qpdf part of the json may | ... | ... |
include/qpdf/QPDF.hh
| ... | ... | @@ -998,7 +998,7 @@ class QPDF |
| 998 | 998 | class JSONReactor: public JSON::Reactor |
| 999 | 999 | { |
| 1000 | 1000 | public: |
| 1001 | - JSONReactor(QPDF&, bool must_be_complete); | |
| 1001 | + JSONReactor(QPDF&, std::string const& filename, bool must_be_complete); | |
| 1002 | 1002 | virtual ~JSONReactor() = default; |
| 1003 | 1003 | virtual void dictionaryStart() override; |
| 1004 | 1004 | virtual void arrayStart() override; |
| ... | ... | @@ -1008,31 +1008,51 @@ class QPDF |
| 1008 | 1008 | dictionaryItem(std::string const& key, JSON const& value) override; |
| 1009 | 1009 | virtual bool arrayItem(JSON const& value) override; |
| 1010 | 1010 | |
| 1011 | + bool anyErrors() const; | |
| 1012 | + | |
| 1011 | 1013 | private: |
| 1012 | 1014 | enum state_e { |
| 1013 | 1015 | st_initial, |
| 1014 | 1016 | st_top, |
| 1015 | - st_ignore, | |
| 1016 | 1017 | st_qpdf, |
| 1017 | - st_objects_top, | |
| 1018 | - st_trailer_top, | |
| 1018 | + st_objects, | |
| 1019 | + st_trailer, | |
| 1019 | 1020 | st_object_top, |
| 1020 | 1021 | st_stream, |
| 1021 | 1022 | st_object, |
| 1023 | + st_ignore, | |
| 1022 | 1024 | }; |
| 1023 | 1025 | |
| 1024 | 1026 | void containerStart(); |
| 1025 | 1027 | void nestedState(std::string const& key, JSON const& value, state_e); |
| 1028 | + QPDFObjectHandle makeObject(JSON const& value); | |
| 1029 | + void error(size_t offset, std::string const& message); | |
| 1030 | + QPDFObjectHandle | |
| 1031 | + reserveObject(std::string const& obj, std::string const& gen); | |
| 1032 | + void replaceObject( | |
| 1033 | + QPDFObjectHandle to_replace, QPDFObjectHandle replacement); | |
| 1026 | 1034 | |
| 1027 | 1035 | QPDF& pdf; |
| 1036 | + std::string filename; | |
| 1028 | 1037 | bool must_be_complete; |
| 1038 | + bool errors; | |
| 1039 | + bool parse_error; | |
| 1029 | 1040 | bool saw_qpdf; |
| 1041 | + bool saw_objects; | |
| 1030 | 1042 | bool saw_json_version; |
| 1031 | 1043 | bool saw_pdf_version; |
| 1032 | 1044 | bool saw_trailer; |
| 1033 | 1045 | state_e state; |
| 1034 | 1046 | state_e next_state; |
| 1047 | + std::string cur_object; | |
| 1048 | + bool saw_value; | |
| 1049 | + bool saw_stream; | |
| 1050 | + bool saw_dict; | |
| 1051 | + bool saw_data; | |
| 1052 | + bool saw_datafile; | |
| 1035 | 1053 | std::vector<state_e> state_stack; |
| 1054 | + std::vector<QPDFObjectHandle> object_stack; | |
| 1055 | + std::set<QPDFObjGen> reserved; | |
| 1036 | 1056 | }; |
| 1037 | 1057 | friend class JSONReactor; |
| 1038 | 1058 | |
| ... | ... | @@ -1080,6 +1100,7 @@ class QPDF |
| 1080 | 1100 | void resolveObjectsInStream(int obj_stream_number); |
| 1081 | 1101 | void stopOnError(std::string const& message); |
| 1082 | 1102 | QPDFObjectHandle reserveObjectIfNotExists(int objid, int gen); |
| 1103 | + QPDFObjectHandle reserveStream(int objid, int gen); | |
| 1083 | 1104 | |
| 1084 | 1105 | // Calls finish() on the pipeline when done but does not delete it |
| 1085 | 1106 | bool pipeStreamData( | ... | ... |
include/qpdf/QPDFObjectHandle.hh
libqpdf/QPDF.cc
| ... | ... | @@ -2167,6 +2167,13 @@ QPDF::reserveObjectIfNotExists(int objid, int gen) |
| 2167 | 2167 | } |
| 2168 | 2168 | |
| 2169 | 2169 | QPDFObjectHandle |
| 2170 | +QPDF::reserveStream(int objid, int gen) | |
| 2171 | +{ | |
| 2172 | + return QPDFObjectHandle::Factory::newStream( | |
| 2173 | + this, objid, gen, QPDFObjectHandle::newDictionary(), 0, 0); | |
| 2174 | +} | |
| 2175 | + | |
| 2176 | +QPDFObjectHandle | |
| 2170 | 2177 | QPDF::getObjectByObjGen(QPDFObjGen const& og) |
| 2171 | 2178 | { |
| 2172 | 2179 | return getObjectByID(og.getObj(), og.getGen()); | ... | ... |
libqpdf/QPDF_json.cc
| 1 | 1 | #include <qpdf/QPDF.hh> |
| 2 | 2 | |
| 3 | 3 | #include <qpdf/FileInputSource.hh> |
| 4 | +#include <qpdf/QIntC.hh> | |
| 4 | 5 | #include <qpdf/QTC.hh> |
| 5 | 6 | #include <qpdf/QUtil.hh> |
| 6 | 7 | #include <regex> |
| 7 | 8 | |
| 8 | -namespace | |
| 9 | -{ | |
| 10 | - class JSONExc: public std::runtime_error | |
| 11 | - { | |
| 12 | - public: | |
| 13 | - JSONExc(JSON const& value, std::string const& msg) : | |
| 14 | - std::runtime_error( | |
| 15 | - "offset " + QUtil::uint_to_string(value.getStart()) + ": " + | |
| 16 | - msg) | |
| 17 | - { | |
| 18 | - } | |
| 19 | - }; | |
| 20 | -} // namespace | |
| 9 | +// This chart shows an example of the state transitions that would | |
| 10 | +// occur in parsing a minimal file. | |
| 11 | + | |
| 12 | +// | st_initial | |
| 13 | +// { | -> st_top | |
| 14 | +// "qpdf": { | -> st_qpdf | |
| 15 | +// "objects": { | -> st_objects | |
| 16 | +// "obj:1 0 R": { | -> st_object_top | |
| 17 | +// "value": { | -> st_object | |
| 18 | +// "/Pages": "2 0 R", | ... | |
| 19 | +// "/Type": "/Catalog" | ... | |
| 20 | +// } | <- st_object_top | |
| 21 | +// }, | <- st_objects | |
| 22 | +// "obj:2 0 R": { | -> st_object_top | |
| 23 | +// "value": 12 | -> st_object | |
| 24 | +// } | <- st_object_top | |
| 25 | +// }, | <- st_objects | |
| 26 | +// "obj:4 0 R": { | -> st_object_top | |
| 27 | +// "stream": { | -> st_stream | |
| 28 | +// "data": "cG90YXRv", | ... | |
| 29 | +// "dict": { | -> st_object | |
| 30 | +// "/K": true | ... | |
| 31 | +// } | <- st_stream | |
| 32 | +// } | <- st_object_top | |
| 33 | +// }, | <- st_objects | |
| 34 | +// "trailer": { | -> st_trailer | |
| 35 | +// "value": { | -> st_object | |
| 36 | +// "/Root": "1 0 R", | ... | |
| 37 | +// "/Size": 7 | ... | |
| 38 | +// } | <- st_trailer | |
| 39 | +// } | <- st_objects | |
| 40 | +// } | <- st_qpdf | |
| 41 | +// } | <- st_top | |
| 42 | +// } | <- st_initial | |
| 43 | + | |
| 44 | +static char const* JSON_PDF = ( | |
| 45 | + // force line break | |
| 46 | + "%PDF-1.3\n" | |
| 47 | + "xref\n" | |
| 48 | + "0 1\n" | |
| 49 | + "0000000000 65535 f \n" | |
| 50 | + "trailer << /Size 1 >>\n" | |
| 51 | + "startxref\n" | |
| 52 | + "9\n" | |
| 53 | + "%%EOF\n"); | |
| 21 | 54 | |
| 22 | 55 | static std::regex PDF_VERSION_RE("^\\d+\\.\\d+$"); |
| 23 | 56 | static std::regex OBJ_KEY_RE("^obj:(\\d+) (\\d+) R$"); |
| 57 | +static std::regex INDIRECT_OBJ_RE("^(\\d+) (\\d+) R$"); | |
| 58 | +static std::regex UNICODE_RE("^u:(.*)$"); | |
| 59 | +static std::regex BINARY_RE("^b:((?:[0-9a-fA-F]{2})*)$"); | |
| 60 | +static std::regex NAME_RE("^/.*$"); | |
| 24 | 61 | |
| 25 | -QPDF::JSONReactor::JSONReactor(QPDF& pdf, bool must_be_complete) : | |
| 62 | +QPDF::JSONReactor::JSONReactor( | |
| 63 | + QPDF& pdf, std::string const& filename, bool must_be_complete) : | |
| 26 | 64 | pdf(pdf), |
| 65 | + filename(filename), | |
| 27 | 66 | must_be_complete(must_be_complete), |
| 67 | + errors(false), | |
| 68 | + parse_error(false), | |
| 28 | 69 | saw_qpdf(false), |
| 70 | + saw_objects(false), | |
| 29 | 71 | saw_json_version(false), |
| 30 | 72 | saw_pdf_version(false), |
| 31 | 73 | saw_trailer(false), |
| 32 | 74 | state(st_initial), |
| 33 | - next_state(st_top) | |
| 75 | + next_state(st_top), | |
| 76 | + saw_value(false), | |
| 77 | + saw_stream(false), | |
| 78 | + saw_dict(false), | |
| 79 | + saw_data(false), | |
| 80 | + saw_datafile(false) | |
| 34 | 81 | { |
| 35 | 82 | state_stack.push_back(st_initial); |
| 36 | 83 | } |
| 37 | 84 | |
| 38 | 85 | void |
| 86 | +QPDF::JSONReactor::error(size_t offset, std::string const& msg) | |
| 87 | +{ | |
| 88 | + this->errors = true; | |
| 89 | + this->pdf.warn( | |
| 90 | + qpdf_e_json, this->cur_object, QIntC::to_offset(offset), msg); | |
| 91 | +} | |
| 92 | + | |
| 93 | +bool | |
| 94 | +QPDF::JSONReactor::anyErrors() const | |
| 95 | +{ | |
| 96 | + return this->errors; | |
| 97 | +} | |
| 98 | + | |
| 99 | +void | |
| 39 | 100 | QPDF::JSONReactor::containerStart() |
| 40 | 101 | { |
| 41 | 102 | state_stack.push_back(state); |
| ... | ... | @@ -46,7 +107,6 @@ void |
| 46 | 107 | QPDF::JSONReactor::dictionaryStart() |
| 47 | 108 | { |
| 48 | 109 | containerStart(); |
| 49 | - // QXXXQ | |
| 50 | 110 | } |
| 51 | 111 | |
| 52 | 112 | void |
| ... | ... | @@ -57,7 +117,6 @@ QPDF::JSONReactor::arrayStart() |
| 57 | 117 | QTC::TC("qpdf", "QPDF_json top-level array"); |
| 58 | 118 | throw std::runtime_error("QPDF JSON must be a dictionary"); |
| 59 | 119 | } |
| 60 | - // QXXXQ | |
| 61 | 120 | } |
| 62 | 121 | |
| 63 | 122 | void |
| ... | ... | @@ -68,23 +127,102 @@ QPDF::JSONReactor::containerEnd(JSON const& value) |
| 68 | 127 | if (state == st_initial) { |
| 69 | 128 | if (!this->saw_qpdf) { |
| 70 | 129 | QTC::TC("qpdf", "QPDF_json missing qpdf"); |
| 71 | - throw std::runtime_error("\"qpdf\" object was not seen"); | |
| 130 | + error(0, "\"qpdf\" object was not seen"); | |
| 131 | + } else { | |
| 132 | + if (!this->saw_json_version) { | |
| 133 | + QTC::TC("qpdf", "QPDF_json missing json version"); | |
| 134 | + error(0, "\"qpdf.jsonversion\" was not seen"); | |
| 135 | + } | |
| 136 | + if (must_be_complete && !this->saw_pdf_version) { | |
| 137 | + QTC::TC("qpdf", "QPDF_json missing pdf version"); | |
| 138 | + error(0, "\"qpdf.pdfversion\" was not seen"); | |
| 139 | + } | |
| 140 | + if (!this->saw_objects) { | |
| 141 | + QTC::TC("qpdf", "QPDF_json missing objects"); | |
| 142 | + error(0, "\"qpdf.objects\" was not seen"); | |
| 143 | + } else { | |
| 144 | + if (must_be_complete && !this->saw_trailer) { | |
| 145 | + QTC::TC("qpdf", "QPDF_json missing trailer"); | |
| 146 | + error(0, "\"qpdf.objects.trailer\" was not seen"); | |
| 147 | + } | |
| 148 | + } | |
| 72 | 149 | } |
| 73 | - if (!this->saw_json_version) { | |
| 74 | - QTC::TC("qpdf", "QPDF_json missing json version"); | |
| 75 | - throw std::runtime_error("\"qpdf.jsonversion\" was not seen"); | |
| 150 | + } else if (state == st_objects) { | |
| 151 | + if (parse_error) { | |
| 152 | + // ignore | |
| 153 | + } else if (cur_object == "trailer") { | |
| 154 | + if (!saw_value) { | |
| 155 | + QTC::TC("qpdf", "QPDF_json trailer no value"); | |
| 156 | + error(value.getStart(), "\"trailer\" is missing \"value\""); | |
| 157 | + } | |
| 158 | + } else if (saw_value == saw_stream) { | |
| 159 | + QTC::TC("qpdf", "QPDF_json value stream both or neither"); | |
| 160 | + error( | |
| 161 | + value.getStart(), | |
| 162 | + "object must have exactly one of \"value\" or \"stream\""); | |
| 76 | 163 | } |
| 77 | - if (must_be_complete && !this->saw_pdf_version) { | |
| 78 | - QTC::TC("qpdf", "QPDF_json missing pdf version"); | |
| 79 | - throw std::runtime_error("\"qpdf.pdfversion\" was not seen"); | |
| 164 | + object_stack.clear(); | |
| 165 | + this->cur_object = ""; | |
| 166 | + this->saw_dict = false; | |
| 167 | + this->saw_data = false; | |
| 168 | + this->saw_datafile = false; | |
| 169 | + this->saw_value = false; | |
| 170 | + this->saw_stream = false; | |
| 171 | + } else if (state == st_object_top) { | |
| 172 | + if (saw_stream) { | |
| 173 | + if (!saw_dict) { | |
| 174 | + QTC::TC("qpdf", "QPDF_json stream no dict"); | |
| 175 | + error(value.getStart(), "\"stream\" is missing \"dict\""); | |
| 176 | + } | |
| 177 | + if (must_be_complete) { | |
| 178 | + if (saw_data == saw_datafile) { | |
| 179 | + QTC::TC("qpdf", "QPDF_json data datafile both or neither"); | |
| 180 | + error( | |
| 181 | + value.getStart(), | |
| 182 | + "\"stream\" must have exactly one of \"data\" or " | |
| 183 | + "\"datafile\""); | |
| 184 | + } | |
| 185 | + } else if (saw_data && saw_datafile) { | |
| 186 | + // QXXXQ | |
| 187 | + /// QTC::TC("qpdf", "QPDF_json data and datafile"); | |
| 188 | + error( | |
| 189 | + value.getStart(), | |
| 190 | + "\"stream\" may at most one of \"data\" or \"datafile\""); | |
| 191 | + } | |
| 192 | + } | |
| 193 | + } else if ((state == st_stream) || (state == st_object)) { | |
| 194 | + if (!parse_error) { | |
| 195 | + object_stack.pop_back(); | |
| 80 | 196 | } |
| 81 | - if (must_be_complete && !this->saw_trailer) { | |
| 82 | - /// QTC::TC("qpdf", "QPDF_json missing trailer"); | |
| 83 | - throw std::runtime_error("\"qpdf.objects.trailer\" was not seen"); | |
| 197 | + } else if (state == st_qpdf) { | |
| 198 | + for (auto const& og: this->reserved) { | |
| 199 | + // QXXXQ | |
| 200 | + // QTC::TC("qpdf", "QPDF_json non-trivial null reserved"); | |
| 201 | + this->pdf.replaceObject(og, QPDFObjectHandle::newNull()); | |
| 84 | 202 | } |
| 203 | + this->reserved.clear(); | |
| 85 | 204 | } |
| 205 | +} | |
| 86 | 206 | |
| 87 | - // QXXXQ | |
| 207 | +QPDFObjectHandle | |
| 208 | +QPDF::JSONReactor::reserveObject(std::string const& obj, std::string const& gen) | |
| 209 | +{ | |
| 210 | + int o = QUtil::string_to_int(obj.c_str()); | |
| 211 | + int g = QUtil::string_to_int(gen.c_str()); | |
| 212 | + auto oh = pdf.reserveObjectIfNotExists(o, g); | |
| 213 | + if (oh.isReserved()) { | |
| 214 | + this->reserved.insert(QPDFObjGen(o, g)); | |
| 215 | + } | |
| 216 | + return oh; | |
| 217 | +} | |
| 218 | + | |
| 219 | +void | |
| 220 | +QPDF::JSONReactor::replaceObject( | |
| 221 | + QPDFObjectHandle to_replace, QPDFObjectHandle replacement) | |
| 222 | +{ | |
| 223 | + auto og = to_replace.getObjGen(); | |
| 224 | + this->reserved.erase(og); | |
| 225 | + this->pdf.replaceObject(og, replacement); | |
| 88 | 226 | } |
| 89 | 227 | |
| 90 | 228 | void |
| ... | ... | @@ -100,16 +238,20 @@ QPDF::JSONReactor::nestedState( |
| 100 | 238 | { |
| 101 | 239 | // Use this method when the next state is for processing a nested |
| 102 | 240 | // dictionary. |
| 103 | - if (!value.isDictionary()) { | |
| 104 | - throw JSONExc(value, "\"" + key + "\" must be a dictionary"); | |
| 241 | + if (value.isDictionary()) { | |
| 242 | + this->next_state = next; | |
| 243 | + } else { | |
| 244 | + error(value.getStart(), "\"" + key + "\" must be a dictionary"); | |
| 245 | + this->next_state = st_ignore; | |
| 246 | + this->parse_error = true; | |
| 105 | 247 | } |
| 106 | - this->next_state = next; | |
| 107 | 248 | } |
| 108 | 249 | |
| 109 | 250 | bool |
| 110 | 251 | QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) |
| 111 | 252 | { |
| 112 | 253 | if (state == st_ignore) { |
| 254 | + QTC::TC("qpdf", "QPDF_json ignoring in st_ignore"); | |
| 113 | 255 | // ignore |
| 114 | 256 | } else if (state == st_top) { |
| 115 | 257 | if (key == "qpdf") { |
| ... | ... | @@ -118,6 +260,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) |
| 118 | 260 | } else { |
| 119 | 261 | // Ignore all other fields for forward compatibility. |
| 120 | 262 | // Don't use nestedState since this can be any type. |
| 263 | + // QXXXQ QTC | |
| 121 | 264 | next_state = st_ignore; |
| 122 | 265 | } |
| 123 | 266 | } else if (state == st_qpdf) { |
| ... | ... | @@ -126,7 +269,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) |
| 126 | 269 | std::string v; |
| 127 | 270 | if (!(value.getNumber(v) && (v == "2"))) { |
| 128 | 271 | QTC::TC("qpdf", "QPDF_json bad json version"); |
| 129 | - throw JSONExc(value, "only JSON version 2 is supported"); | |
| 272 | + error(value.getStart(), "only JSON version 2 is supported"); | |
| 130 | 273 | } |
| 131 | 274 | } else if (key == "pdfversion") { |
| 132 | 275 | this->saw_pdf_version = true; |
| ... | ... | @@ -141,81 +284,197 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) |
| 141 | 284 | } |
| 142 | 285 | if (!version_okay) { |
| 143 | 286 | QTC::TC("qpdf", "QPDF_json bad pdf version"); |
| 144 | - throw JSONExc(value, "invalid PDF version (must be x.y)"); | |
| 287 | + error(value.getStart(), "invalid PDF version (must be x.y)"); | |
| 145 | 288 | } |
| 146 | 289 | } else if (key == "objects") { |
| 147 | - nestedState(key, value, st_objects_top); | |
| 290 | + this->saw_objects = true; | |
| 291 | + nestedState(key, value, st_objects); | |
| 148 | 292 | } else { |
| 149 | 293 | // ignore unknown keys for forward compatibility |
| 294 | + // QXXXQ QTC | |
| 295 | + next_state = st_ignore; | |
| 150 | 296 | } |
| 151 | - } else if (state == st_objects_top) { | |
| 297 | + } else if (state == st_objects) { | |
| 152 | 298 | std::smatch m; |
| 153 | 299 | if (key == "trailer") { |
| 154 | 300 | this->saw_trailer = true; |
| 155 | - nestedState(key, value, st_trailer_top); | |
| 156 | - // QXXXQ | |
| 301 | + nestedState(key, value, st_trailer); | |
| 302 | + this->cur_object = "trailer"; | |
| 157 | 303 | } else if (std::regex_match(key, m, OBJ_KEY_RE)) { |
| 304 | + // QXXXQ remember to handle null for delete | |
| 305 | + object_stack.push_back(reserveObject(m[1].str(), m[2].str())); | |
| 158 | 306 | nestedState(key, value, st_object_top); |
| 159 | - // QXXXQ | |
| 307 | + this->cur_object = key; | |
| 160 | 308 | } else { |
| 161 | 309 | QTC::TC("qpdf", "QPDF_json bad object key"); |
| 162 | - throw JSONExc( | |
| 163 | - value, "object key should be \"trailer\" or \"obj:n n R\""); | |
| 310 | + error( | |
| 311 | + value.getStart(), | |
| 312 | + "object key should be \"trailer\" or \"obj:n n R\""); | |
| 313 | + next_state = st_ignore; | |
| 314 | + parse_error = true; | |
| 164 | 315 | } |
| 165 | 316 | } else if (state == st_object_top) { |
| 317 | + if (object_stack.size() == 0) { | |
| 318 | + throw std::logic_error("no object on stack in st_object_top"); | |
| 319 | + } | |
| 320 | + auto tos = object_stack.back(); | |
| 321 | + QPDFObjectHandle replacement; | |
| 166 | 322 | if (key == "value") { |
| 167 | 323 | // Don't use nestedState since this can have any type. |
| 324 | + this->saw_value = true; | |
| 168 | 325 | next_state = st_object; |
| 169 | - // QXXXQ | |
| 326 | + replacement = makeObject(value); | |
| 327 | + replaceObject(tos, replacement); | |
| 170 | 328 | } else if (key == "stream") { |
| 329 | + this->saw_stream = true; | |
| 171 | 330 | nestedState(key, value, st_stream); |
| 172 | - // QXXXQ | |
| 331 | + if (tos.isStream()) { | |
| 332 | + // QXXXQ reusing -- need QTC | |
| 333 | + } else { | |
| 334 | + replacement = | |
| 335 | + pdf.reserveStream(tos.getObjectID(), tos.getGeneration()); | |
| 336 | + replaceObject(tos, replacement); | |
| 337 | + replacement.replaceStreamData( | |
| 338 | + "", "<<>>"_qpdf, "<<>>"_qpdf); // QXXXQ | |
| 339 | + } | |
| 173 | 340 | } else { |
| 174 | 341 | // Ignore unknown keys for forward compatibility |
| 342 | + // QXXXQ QTC | |
| 343 | + next_state = st_ignore; | |
| 175 | 344 | } |
| 176 | - } else if (state == st_trailer_top) { | |
| 345 | + if (replacement.isInitialized()) { | |
| 346 | + object_stack.pop_back(); | |
| 347 | + object_stack.push_back(replacement); | |
| 348 | + } | |
| 349 | + } else if (state == st_trailer) { | |
| 177 | 350 | if (key == "value") { |
| 351 | + this->saw_value = true; | |
| 178 | 352 | // The trailer must be a dictionary, so we can use nestedState. |
| 179 | 353 | nestedState("trailer.value", value, st_object); |
| 180 | - // QXXXQ | |
| 354 | + this->pdf.m->trailer = makeObject(value); | |
| 181 | 355 | } else if (key == "stream") { |
| 356 | + // Don't need to set saw_stream here since there's already | |
| 357 | + // an error. | |
| 182 | 358 | QTC::TC("qpdf", "QPDF_json trailer stream"); |
| 183 | - throw JSONExc(value, "the trailer may not be a stream"); | |
| 359 | + error(value.getStart(), "the trailer may not be a stream"); | |
| 360 | + next_state = st_ignore; | |
| 361 | + parse_error = true; | |
| 184 | 362 | } else { |
| 185 | 363 | // Ignore unknown keys for forward compatibility |
| 364 | + // QXXXQ QTC | |
| 365 | + next_state = st_ignore; | |
| 186 | 366 | } |
| 187 | 367 | } else if (state == st_stream) { |
| 188 | - if (key == "dict") { | |
| 368 | + if (object_stack.size() == 0) { | |
| 369 | + throw std::logic_error("no object on stack in st_stream"); | |
| 370 | + } | |
| 371 | + auto tos = object_stack.back(); | |
| 372 | + if (!tos.isStream()) { | |
| 373 | + // QXXXQ QTC in update mode | |
| 374 | + error(value.getStart(), "this object is not a stream"); | |
| 375 | + parse_error = true; | |
| 376 | + } else if (key == "dict") { | |
| 377 | + this->saw_dict = true; | |
| 189 | 378 | // Since a stream dictionary must be a dictionary, we can |
| 190 | 379 | // use nestedState to transition to st_value. |
| 191 | 380 | nestedState("stream.dict", value, st_object); |
| 192 | - // QXXXQ | |
| 381 | + auto dict = makeObject(value); | |
| 382 | + if (dict.isDictionary()) { | |
| 383 | + tos.replaceDict(dict); | |
| 384 | + } else { | |
| 385 | + // An error had already been given by nestedState | |
| 386 | + QTC::TC("qpdf", "QPDF_json stream dict not dict"); | |
| 387 | + parse_error = true; | |
| 388 | + } | |
| 193 | 389 | } else if (key == "data") { |
| 390 | + this->saw_data = true; | |
| 194 | 391 | // QXXXQ |
| 195 | 392 | } else if (key == "datafile") { |
| 393 | + this->saw_datafile = true; | |
| 196 | 394 | // QXXXQ |
| 197 | 395 | } else { |
| 198 | 396 | // Ignore unknown keys for forward compatibility. |
| 397 | + // QXXXQ QTC | |
| 199 | 398 | next_state = st_ignore; |
| 200 | 399 | } |
| 201 | 400 | } else if (state == st_object) { |
| 202 | - // QXXXQ | |
| 401 | + if (!parse_error) { | |
| 402 | + auto dict = object_stack.back(); | |
| 403 | + if (dict.isStream()) { | |
| 404 | + dict = dict.getDict(); | |
| 405 | + } | |
| 406 | + dict.replaceKey(key, makeObject(value)); | |
| 407 | + } | |
| 203 | 408 | } else { |
| 204 | 409 | throw std::logic_error( |
| 205 | 410 | "QPDF_json: unknown state " + QUtil::int_to_string(state)); |
| 206 | 411 | } |
| 207 | - | |
| 208 | - // QXXXQ | |
| 209 | 412 | return true; |
| 210 | 413 | } |
| 211 | 414 | |
| 212 | 415 | bool |
| 213 | 416 | QPDF::JSONReactor::arrayItem(JSON const& value) |
| 214 | 417 | { |
| 215 | - // QXXXQ | |
| 418 | + if (state == st_object) { | |
| 419 | + if (!parse_error) { | |
| 420 | + auto tos = object_stack.back(); | |
| 421 | + tos.appendItem(makeObject(value)); | |
| 422 | + } | |
| 423 | + } | |
| 216 | 424 | return true; |
| 217 | 425 | } |
| 218 | 426 | |
| 427 | +QPDFObjectHandle | |
| 428 | +QPDF::JSONReactor::makeObject(JSON const& value) | |
| 429 | +{ | |
| 430 | + QPDFObjectHandle result; | |
| 431 | + std::string str_v; | |
| 432 | + bool bool_v = false; | |
| 433 | + std::smatch m; | |
| 434 | + if (value.isDictionary()) { | |
| 435 | + result = QPDFObjectHandle::newDictionary(); | |
| 436 | + object_stack.push_back(result); | |
| 437 | + } else if (value.isArray()) { | |
| 438 | + result = QPDFObjectHandle::newArray(); | |
| 439 | + object_stack.push_back(result); | |
| 440 | + } else if (value.isNull()) { | |
| 441 | + result = QPDFObjectHandle::newNull(); | |
| 442 | + } else if (value.getBool(bool_v)) { | |
| 443 | + result = QPDFObjectHandle::newBool(bool_v); | |
| 444 | + } else if (value.getNumber(str_v)) { | |
| 445 | + if (QUtil::is_long_long(str_v.c_str())) { | |
| 446 | + result = QPDFObjectHandle::newInteger( | |
| 447 | + QUtil::string_to_ll(str_v.c_str())); | |
| 448 | + } else { | |
| 449 | + result = QPDFObjectHandle::newReal(str_v); | |
| 450 | + } | |
| 451 | + } else if (value.getString(str_v)) { | |
| 452 | + if (std::regex_match(str_v, m, INDIRECT_OBJ_RE)) { | |
| 453 | + result = reserveObject(m[1].str(), m[2].str()); | |
| 454 | + } else if (std::regex_match(str_v, m, UNICODE_RE)) { | |
| 455 | + result = QPDFObjectHandle::newUnicodeString(m[1].str()); | |
| 456 | + } else if (std::regex_match(str_v, m, BINARY_RE)) { | |
| 457 | + result = QPDFObjectHandle::newString(QUtil::hex_decode(m[1].str())); | |
| 458 | + } else if (std::regex_match(str_v, m, NAME_RE)) { | |
| 459 | + result = QPDFObjectHandle::newName(str_v); | |
| 460 | + } else { | |
| 461 | + QTC::TC("qpdf", "QPDF_json unrecognized string value"); | |
| 462 | + error(value.getStart(), "unrecognized string value"); | |
| 463 | + result = QPDFObjectHandle::newNull(); | |
| 464 | + } | |
| 465 | + } | |
| 466 | + if (!result.isInitialized()) { | |
| 467 | + throw std::logic_error( | |
| 468 | + "JSONReactor::makeObject didn't initialize the object"); | |
| 469 | + } | |
| 470 | + | |
| 471 | + // QXXXQ include object number in description | |
| 472 | + result.setObjectDescription( | |
| 473 | + &this->pdf, | |
| 474 | + this->filename + " offset " + QUtil::uint_to_string(value.getStart())); | |
| 475 | + return result; | |
| 476 | +} | |
| 477 | + | |
| 219 | 478 | void |
| 220 | 479 | QPDF::createFromJSON(std::string const& json_file) |
| 221 | 480 | { |
| ... | ... | @@ -225,6 +484,7 @@ QPDF::createFromJSON(std::string const& json_file) |
| 225 | 484 | void |
| 226 | 485 | QPDF::createFromJSON(std::shared_ptr<InputSource> is) |
| 227 | 486 | { |
| 487 | + processMemoryFile(is->getName().c_str(), JSON_PDF, strlen(JSON_PDF)); | |
| 228 | 488 | importJSON(is, true); |
| 229 | 489 | } |
| 230 | 490 | |
| ... | ... | @@ -243,10 +503,19 @@ QPDF::updateFromJSON(std::shared_ptr<InputSource> is) |
| 243 | 503 | void |
| 244 | 504 | QPDF::importJSON(std::shared_ptr<InputSource> is, bool must_be_complete) |
| 245 | 505 | { |
| 246 | - JSONReactor reactor(*this, must_be_complete); | |
| 506 | + JSONReactor reactor(*this, is->getName(), must_be_complete); | |
| 247 | 507 | try { |
| 248 | 508 | JSON::parse(*is, &reactor); |
| 249 | 509 | } catch (std::runtime_error& e) { |
| 250 | 510 | throw std::runtime_error(is->getName() + ": " + e.what()); |
| 251 | 511 | } |
| 512 | + if (reactor.anyErrors()) { | |
| 513 | + throw std::runtime_error(is->getName() + ": errors found in JSON"); | |
| 514 | + } | |
| 515 | + // QXXXQ | |
| 516 | + // std::cout << "trailer:\n" << getTrailer().unparse() << std::endl; | |
| 517 | + // for (auto& oh: getAllObjects()) { | |
| 518 | + // std::cout << oh.unparse() << ":" << std::endl; | |
| 519 | + // std::cout << oh.unparseResolved() << std::endl; | |
| 520 | + // } | |
| 252 | 521 | } | ... | ... |
qpdf/qpdf.testcov
| ... | ... | @@ -659,3 +659,12 @@ QPDF_json bad pdf version 0 |
| 659 | 659 | QPDF_json top-level array 0 |
| 660 | 660 | QPDF_json bad object key 0 |
| 661 | 661 | QPDF_json trailer stream 0 |
| 662 | +QPDF_json missing trailer 0 | |
| 663 | +QPDF_json missing objects 0 | |
| 664 | +QPDF_json ignoring in st_ignore 0 | |
| 665 | +QPDF_json stream dict not dict 0 | |
| 666 | +QPDF_json unrecognized string value 0 | |
| 667 | +QPDF_json data datafile both or neither 0 | |
| 668 | +QPDF_json stream no dict 0 | |
| 669 | +QPDF_json trailer no value 0 | |
| 670 | +QPDF_json value stream both or neither 0 | ... | ... |
qpdf/qtest/qpdf-json.test
qpdf/qtest/qpdf/qjson-bad-json-version1.out
qpdf/qtest/qpdf/qjson-bad-json-version2.out
qpdf/qtest/qpdf/qjson-bad-object-key.out
qpdf/qtest/qpdf/qjson-bad-pdf-version1.out
qpdf/qtest/qpdf/qjson-bad-pdf-version2.out
qpdf/qtest/qpdf/qjson-missing-objects.json
0 → 100644
qpdf/qtest/qpdf/qjson-missing-objects.out
0 → 100644
qpdf/qtest/qpdf/qjson-missing-trailer.json
0 → 100644
| 1 | +{ | |
| 2 | + "version": 2, | |
| 3 | + "parameters": { | |
| 4 | + "decodelevel": "none" | |
| 5 | + }, | |
| 6 | + "qpdf": { | |
| 7 | + "jsonversion": 2, | |
| 8 | + "pdfversion": "1.3", | |
| 9 | + "maxobjectid": 6, | |
| 10 | + "objects": { | |
| 11 | + "obj:1 0 R": { | |
| 12 | + "value": { | |
| 13 | + "/Pages": "2 0 R", | |
| 14 | + "/Type": "/Catalog" | |
| 15 | + } | |
| 16 | + }, | |
| 17 | + "obj:2 0 R": { | |
| 18 | + "value": { | |
| 19 | + "/Count": 1, | |
| 20 | + "/Kids": [ | |
| 21 | + "3 0 R" | |
| 22 | + ], | |
| 23 | + "/Type": "/Pages" | |
| 24 | + } | |
| 25 | + }, | |
| 26 | + "obj:3 0 R": { | |
| 27 | + "value": { | |
| 28 | + "/Contents": "4 0 R", | |
| 29 | + "/MediaBox": [ | |
| 30 | + 0, | |
| 31 | + 0, | |
| 32 | + 612, | |
| 33 | + 792 | |
| 34 | + ], | |
| 35 | + "/Parent": "2 0 R", | |
| 36 | + "/Resources": { | |
| 37 | + "/Font": { | |
| 38 | + "/F1": "6 0 R" | |
| 39 | + }, | |
| 40 | + "/ProcSet": "5 0 R" | |
| 41 | + }, | |
| 42 | + "/Type": "/Page" | |
| 43 | + } | |
| 44 | + }, | |
| 45 | + "obj:4 0 R": { | |
| 46 | + "stream": { | |
| 47 | + "data": "QlQKICAvRjEgMjQgVGYKICA3MiA3MjAgVGQKICAoUG90YXRvKSBUagpFVAo=", | |
| 48 | + "dict": {} | |
| 49 | + } | |
| 50 | + }, | |
| 51 | + "obj:5 0 R": { | |
| 52 | + "value": [ | |
| 53 | + "/PDF", | |
| 54 | + "/Text" | |
| 55 | + ] | |
| 56 | + }, | |
| 57 | + "obj:6 0 R": { | |
| 58 | + "value": { | |
| 59 | + "/BaseFont": "/Helvetica", | |
| 60 | + "/Encoding": "/WinAnsiEncoding", | |
| 61 | + "/Subtype": "/Type1", | |
| 62 | + "/Type": "/Font" | |
| 63 | + } | |
| 64 | + } | |
| 65 | + } | |
| 66 | + } | |
| 67 | +} | ... | ... |
qpdf/qtest/qpdf/qjson-missing-trailer.out
0 → 100644
qpdf/qtest/qpdf/qjson-no-json-version.out
qpdf/qtest/qpdf/qjson-no-pdf-version.out
qpdf/qtest/qpdf/qjson-no-qpdf-object.out
qpdf/qtest/qpdf/qjson-obj-key-errors.json
0 → 100644
| 1 | +{ | |
| 2 | + "version": 2, | |
| 3 | + "parameters": { | |
| 4 | + "decodelevel": "none" | |
| 5 | + }, | |
| 6 | + "qpdf": { | |
| 7 | + "jsonversion": 2, | |
| 8 | + "pdfversion": "1.3", | |
| 9 | + "maxobjectid": 6, | |
| 10 | + "objects": { | |
| 11 | + "obj:1 0 R": { | |
| 12 | + "value": { | |
| 13 | + "/Pages": "2 0 R", | |
| 14 | + "/Type": "/Catalog" | |
| 15 | + } | |
| 16 | + }, | |
| 17 | + "obj:2 0 R": { | |
| 18 | + "value": { | |
| 19 | + "/Count": 1, | |
| 20 | + "/Kids": [ | |
| 21 | + "3 0 R" | |
| 22 | + ], | |
| 23 | + "/Type": "/Pages" | |
| 24 | + }, | |
| 25 | + "stream": { | |
| 26 | + "data": "QlQKICAvRjEgMjQgVGYKICA3MiA3MjAgVGQKICAoUG90YXRvKSBUagpFVAo=", | |
| 27 | + "dict": {} | |
| 28 | + } | |
| 29 | + }, | |
| 30 | + "obj:3 0 R": { | |
| 31 | + "potato": { | |
| 32 | + "salad": "ignored-so-no-string-error", | |
| 33 | + "nested": [1, 2, {"x": "y"}] | |
| 34 | + } | |
| 35 | + }, | |
| 36 | + "obj:4 0 R": { | |
| 37 | + "stream": { | |
| 38 | + "potato": "u:salad" | |
| 39 | + } | |
| 40 | + }, | |
| 41 | + "obj:5 0 R": { | |
| 42 | + "stream": { | |
| 43 | + "dict": {"/A": "/B"}, | |
| 44 | + "data": "QlQKICAvRjEgMjQgVGYKICA3MiA3MjAgVGQKICAoUG90YXRvKSBUagpFVAo=", | |
| 45 | + "datafile": "abc" | |
| 46 | + } | |
| 47 | + }, | |
| 48 | + "obj:6 0 R": { | |
| 49 | + "value": { | |
| 50 | + "/BaseFont": "/Helvetica", | |
| 51 | + "/Encoding": "/WinAnsiEncoding", | |
| 52 | + "/Subtype": "/Type1", | |
| 53 | + "/Type": "/Font" | |
| 54 | + } | |
| 55 | + }, | |
| 56 | + "trailer": { | |
| 57 | + "potato": { | |
| 58 | + "/Root": "1 0 R", | |
| 59 | + "/Size": 7 | |
| 60 | + } | |
| 61 | + } | |
| 62 | + } | |
| 63 | + } | |
| 64 | +} | ... | ... |
qpdf/qtest/qpdf/qjson-obj-key-errors.out
0 → 100644
| 1 | +WARNING: qjson-obj-key-errors.json (obj:2 0 R, offset 302): object must have exactly one of "value" or "stream" | |
| 2 | +WARNING: qjson-obj-key-errors.json (obj:3 0 R, offset 600): object must have exactly one of "value" or "stream" | |
| 3 | +WARNING: qjson-obj-key-errors.json (obj:4 0 R, offset 768): "stream" is missing "dict" | |
| 4 | +WARNING: qjson-obj-key-errors.json (obj:4 0 R, offset 768): "stream" must have exactly one of "data" or "datafile" | |
| 5 | +WARNING: qjson-obj-key-errors.json (obj:5 0 R, offset 858): "stream" must have exactly one of "data" or "datafile" | |
| 6 | +WARNING: qjson-obj-key-errors.json (trailer, offset 1236): "trailer" is missing "value" | |
| 7 | +qpdf: qjson-obj-key-errors.json: errors found in JSON | ... | ... |
qpdf/qtest/qpdf/qjson-object-not-dict.out
qpdf/qtest/qpdf/qjson-objects-not-dict.out
| 1 | -qpdf: qjson-objects-not-dict.json: offset 77: "objects" must be a dictionary | |
| 1 | +WARNING: qjson-objects-not-dict.json (offset 77): "objects" must be a dictionary | |
| 2 | +WARNING: qjson-objects-not-dict.json: "qpdf.objects.trailer" was not seen | |
| 3 | +qpdf: qjson-objects-not-dict.json: errors found in JSON | ... | ... |
qpdf/qtest/qpdf/qjson-stream-dict-not-dict.out
| 1 | -qpdf: qjson-stream-dict-not-dict.json: offset 137: "stream.dict" must be a dictionary | |
| 1 | +WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 137): "stream.dict" must be a dictionary | |
| 2 | +WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 137): unrecognized string value | |
| 3 | +WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 117): "stream" must have exactly one of "data" or "datafile" | |
| 4 | +WARNING: qjson-stream-dict-not-dict.json: "qpdf.objects.trailer" was not seen | |
| 5 | +qpdf: qjson-stream-dict-not-dict.json: errors found in JSON | ... | ... |
qpdf/qtest/qpdf/qjson-stream-not-dict.out
| 1 | -qpdf: qjson-stream-not-dict.json: offset 118: "stream" must be a dictionary | |
| 1 | +WARNING: qjson-stream-not-dict.json (obj:1 0 R, offset 118): "stream" must be a dictionary | |
| 2 | +WARNING: qjson-stream-not-dict.json: "qpdf.objects.trailer" was not seen | |
| 3 | +qpdf: qjson-stream-not-dict.json: errors found in JSON | ... | ... |
qpdf/qtest/qpdf/qjson-trailer-not-dict.json
qpdf/qtest/qpdf/qjson-trailer-not-dict.out
qpdf/qtest/qpdf/qjson-trailer-stream.json
qpdf/qtest/qpdf/qjson-trailer-stream.out