Commit fa3051d977ab21da7bde9d9613dca600c83123a5

Authored by Jay Berkenbilt
1 parent 2008d037

Implement --json-keys

Showing 1 changed file with 110 additions and 59 deletions
qpdf/qpdf.cc
@@ -194,6 +194,7 @@ struct Options @@ -194,6 +194,7 @@ struct Options
194 bool show_pages; 194 bool show_pages;
195 bool show_page_images; 195 bool show_page_images;
196 bool json; 196 bool json;
  197 + std::set<std::string> json_keys;
197 bool check; 198 bool check;
198 std::vector<PageSpec> page_specs; 199 std::vector<PageSpec> page_specs;
199 std::map<std::string, RotationSpec> rotations; 200 std::map<std::string, RotationSpec> rotations;
@@ -243,7 +244,7 @@ ProgressReporter::reportProgress(int percentage) @@ -243,7 +244,7 @@ ProgressReporter::reportProgress(int percentage)
243 << percentage << "%" << std::endl; 244 << percentage << "%" << std::endl;
244 } 245 }
245 246
246 -static JSON json_schema() 247 +static JSON json_schema(std::set<std::string>* keys = 0)
247 { 248 {
248 // Style: use all lower-case keys with no dashes or underscores. 249 // Style: use all lower-case keys with no dashes or underscores.
249 // Choose array or dictionary based on indexing. For example, we 250 // Choose array or dictionary based on indexing. For example, we
@@ -270,60 +271,76 @@ static JSON json_schema() @@ -270,60 +271,76 @@ static JSON json_schema()
270 j_params.addDictionaryMember( 271 j_params.addDictionaryMember(
271 "decodelevel", JSON::makeString( 272 "decodelevel", JSON::makeString(
272 "decode level used to determine stream filterability")); 273 "decode level used to determine stream filterability"));
273 - schema.addDictionaryMember(  
274 - "objects", JSON::makeString(  
275 - "dictionary of original objects; keys are 'trailer' or 'n n R'"));  
276 - JSON page = schema.addDictionaryMember("pages", JSON::makeArray()).  
277 - addArrayElement(JSON::makeDictionary());  
278 - page.addDictionaryMember(  
279 - "object",  
280 - JSON::makeString("reference to original page object"));  
281 - JSON image = page.addDictionaryMember("images", JSON::makeArray()).  
282 - addArrayElement(JSON::makeDictionary());  
283 - image.addDictionaryMember(  
284 - "object",  
285 - JSON::makeString("reference to image stream"));  
286 - image.addDictionaryMember(  
287 - "width",  
288 - JSON::makeString("image width"));  
289 - image.addDictionaryMember(  
290 - "height",  
291 - JSON::makeString("image height"));  
292 - image.addDictionaryMember("filter", JSON::makeArray()).  
293 - addArrayElement(  
294 - JSON::makeString("filters applied to image data"));  
295 - image.addDictionaryMember("decodeparms", JSON::makeArray()).  
296 - addArrayElement(  
297 - JSON::makeString("decode parameters for image data"));  
298 - image.addDictionaryMember(  
299 - "filterable",  
300 - JSON::makeString("whether image data can be decoded"  
301 - " using the decode level qpdf was invoked with"));  
302 - page.addDictionaryMember("contents", JSON::makeArray()).  
303 - addArrayElement(  
304 - JSON::makeString("reference to each content stream"));  
305 - page.addDictionaryMember(  
306 - "label",  
307 - JSON::makeString("page label dictionary, or null if none"));  
308 - JSON labels = schema.addDictionaryMember("pagelabels", JSON::makeArray()).  
309 - addArrayElement(JSON::makeDictionary());  
310 - labels.addDictionaryMember(  
311 - "index",  
312 - JSON::makeString("starting page position starting from zero"));  
313 - labels.addDictionaryMember(  
314 - "label",  
315 - JSON::makeString("page label dictionary"));  
316 - JSON outline = page.addDictionaryMember("outlines", JSON::makeArray()).  
317 - addArrayElement(JSON::makeDictionary());  
318 - outline.addDictionaryMember(  
319 - "object",  
320 - JSON::makeString("reference to outline that targets this page"));  
321 - outline.addDictionaryMember(  
322 - "title",  
323 - JSON::makeString("outline title"));  
324 - outline.addDictionaryMember(  
325 - "dest",  
326 - JSON::makeString("outline destination dictionary")); 274 +
  275 + bool all_keys = ((keys == 0) || keys->empty());
  276 +
  277 + // The list of selectable top-level keys id duplicated in three
  278 + // places: json_schema, do_json, and initOptionTable.
  279 + if (all_keys || keys->count("objects"))
  280 + {
  281 + schema.addDictionaryMember(
  282 + "objects", JSON::makeString(
  283 + "dictionary of original objects;"
  284 + " keys are 'trailer' or 'n n R'"));
  285 + }
  286 + if (all_keys || keys->count("pages"))
  287 + {
  288 + JSON page = schema.addDictionaryMember("pages", JSON::makeArray()).
  289 + addArrayElement(JSON::makeDictionary());
  290 + page.addDictionaryMember(
  291 + "object",
  292 + JSON::makeString("reference to original page object"));
  293 + JSON image = page.addDictionaryMember("images", JSON::makeArray()).
  294 + addArrayElement(JSON::makeDictionary());
  295 + image.addDictionaryMember(
  296 + "object",
  297 + JSON::makeString("reference to image stream"));
  298 + image.addDictionaryMember(
  299 + "width",
  300 + JSON::makeString("image width"));
  301 + image.addDictionaryMember(
  302 + "height",
  303 + JSON::makeString("image height"));
  304 + image.addDictionaryMember("filter", JSON::makeArray()).
  305 + addArrayElement(
  306 + JSON::makeString("filters applied to image data"));
  307 + image.addDictionaryMember("decodeparms", JSON::makeArray()).
  308 + addArrayElement(
  309 + JSON::makeString("decode parameters for image data"));
  310 + image.addDictionaryMember(
  311 + "filterable",
  312 + JSON::makeString("whether image data can be decoded"
  313 + " using the decode level qpdf was invoked with"));
  314 + page.addDictionaryMember("contents", JSON::makeArray()).
  315 + addArrayElement(
  316 + JSON::makeString("reference to each content stream"));
  317 + page.addDictionaryMember(
  318 + "label",
  319 + JSON::makeString("page label dictionary, or null if none"));
  320 + JSON outline = page.addDictionaryMember("outlines", JSON::makeArray()).
  321 + addArrayElement(JSON::makeDictionary());
  322 + outline.addDictionaryMember(
  323 + "object",
  324 + JSON::makeString("reference to outline that targets this page"));
  325 + outline.addDictionaryMember(
  326 + "title",
  327 + JSON::makeString("outline title"));
  328 + outline.addDictionaryMember(
  329 + "dest",
  330 + JSON::makeString("outline destination dictionary"));
  331 + }
  332 + if (all_keys || keys->count("pagelabels"))
  333 + {
  334 + JSON labels = schema.addDictionaryMember(
  335 + "pagelabels", JSON::makeArray()).
  336 + addArrayElement(JSON::makeDictionary());
  337 + labels.addDictionaryMember(
  338 + "index",
  339 + JSON::makeString("starting page position starting from zero"));
  340 + labels.addDictionaryMember(
  341 + "label",
  342 + JSON::makeString("page label dictionary"));
  343 + }
327 return schema; 344 return schema;
328 } 345 }
329 346
@@ -419,6 +436,7 @@ class ArgParser @@ -419,6 +436,7 @@ class ArgParser
419 void argShowPages(); 436 void argShowPages();
420 void argWithImages(); 437 void argWithImages();
421 void argJson(); 438 void argJson();
  439 + void argJsonKey(char* parameter);
422 void argCheck(); 440 void argCheck();
423 void arg40Print(char* parameter); 441 void arg40Print(char* parameter);
424 void arg40Modify(char* parameter); 442 void arg40Modify(char* parameter);
@@ -615,6 +633,11 @@ ArgParser::initOptionTable() @@ -615,6 +633,11 @@ ArgParser::initOptionTable()
615 (*t)["show-pages"] = oe_bare(&ArgParser::argShowPages); 633 (*t)["show-pages"] = oe_bare(&ArgParser::argShowPages);
616 (*t)["with-images"] = oe_bare(&ArgParser::argWithImages); 634 (*t)["with-images"] = oe_bare(&ArgParser::argWithImages);
617 (*t)["json"] = oe_bare(&ArgParser::argJson); 635 (*t)["json"] = oe_bare(&ArgParser::argJson);
  636 + // The list of selectable top-level keys id duplicated in three
  637 + // places: json_schema, do_json, and initOptionTable.
  638 + char const* jsonKeyChoices[] = {"objects", "pages", "pagelabels", 0};
  639 + (*t)["json-key"] = oe_requiredChoices(
  640 + &ArgParser::argJsonKey, jsonKeyChoices);
618 (*t)["check"] = oe_bare(&ArgParser::argCheck); 641 (*t)["check"] = oe_bare(&ArgParser::argCheck);
619 642
620 t = &this->encrypt40_option_table; 643 t = &this->encrypt40_option_table;
@@ -1178,6 +1201,12 @@ ArgParser::argJson() @@ -1178,6 +1201,12 @@ ArgParser::argJson()
1178 } 1201 }
1179 1202
1180 void 1203 void
  1204 +ArgParser::argJsonKey(char* parameter)
  1205 +{
  1206 + o.json_keys.insert(parameter);
  1207 +}
  1208 +
  1209 +void
1181 ArgParser::argCheck() 1210 ArgParser::argCheck()
1182 { 1211 {
1183 o.check = true; 1212 o.check = true;
@@ -1659,6 +1688,16 @@ automated test suites for software that uses the qpdf library.\n\ @@ -1659,6 +1688,16 @@ automated test suites for software that uses the qpdf library.\n\
1659 --show-pages shows the object/generation number for each page\n\ 1688 --show-pages shows the object/generation number for each page\n\
1660 --with-images also shows the object IDs for images on each page\n\ 1689 --with-images also shows the object IDs for images on each page\n\
1661 --check check file structure + encryption, linearization\n\ 1690 --check check file structure + encryption, linearization\n\
  1691 +--json generate a json representation of the file\n\
  1692 +--json-help describe the format of the json representation\n\
  1693 +--json-key=key repeatable; prune json structure to include only\n\
  1694 + specified keys\n\
  1695 +\n\
  1696 +The json representation generated by qpdf is designed to facilitate\n\
  1697 +processing of qpdf from other programming languages that have a hard\n\
  1698 +time calling C++ APIs. Run qpdf --json-help for details on the format.\n\
  1699 +The manual has more in-depth information about the json representation\n\
  1700 +and certain compatibility guarantees that qpdf provides.\n\
1662 \n\ 1701 \n\
1663 The --raw-stream-data and --filtered-stream-data options are ignored\n\ 1702 The --raw-stream-data and --filtered-stream-data options are ignored\n\
1664 unless --show-object is given. Either of these options will cause the\n\ 1703 unless --show-object is given. Either of these options will cause the\n\
@@ -2704,13 +2743,25 @@ static void do_json(QPDF&amp; pdf, Options&amp; o) @@ -2704,13 +2743,25 @@ static void do_json(QPDF&amp; pdf, Options&amp; o)
2704 j_params.addDictionaryMember( 2743 j_params.addDictionaryMember(
2705 "decodelevel", JSON::makeString(decode_level_str)); 2744 "decodelevel", JSON::makeString(decode_level_str));
2706 2745
2707 - do_json_objects(pdf, o, j);  
2708 - do_json_pages(pdf, o, j);  
2709 - do_json_page_labels(pdf, o, j); 2746 + bool all_keys = o.json_keys.empty();
  2747 + // The list of selectable top-level keys id duplicated in three
  2748 + // places: json_schema, do_json, and initOptionTable.
  2749 + if (all_keys || o.json_keys.count("objects"))
  2750 + {
  2751 + do_json_objects(pdf, o, j);
  2752 + }
  2753 + if (all_keys || o.json_keys.count("pages"))
  2754 + {
  2755 + do_json_pages(pdf, o, j);
  2756 + }
  2757 + if (all_keys || o.json_keys.count("pagelabels"))
  2758 + {
  2759 + do_json_page_labels(pdf, o, j);
  2760 + }
2710 2761
2711 // Check against schema 2762 // Check against schema
2712 2763
2713 - JSON schema = json_schema(); 2764 + JSON schema = json_schema(&o.json_keys);
2714 std::list<std::string> errors; 2765 std::list<std::string> errors;
2715 if (! j.checkSchema(schema, errors)) 2766 if (! j.checkSchema(schema, errors))
2716 { 2767 {