Commit 1c8d53465ff4d8e732498b39e49595b16d6754af

Authored by Jay Berkenbilt
1 parent b9cd693a

Incorporate job schema generation into generate_auto_job

README-maintainer
... ... @@ -128,7 +128,10 @@ Command-line arguments are closely coupled with QPDFJob. To add a new
128 128 command-line argument, add the option to the appropriate table in
129 129 job.yml. This will automatically declare a method in the private
130 130 ArgParser class in QPDFJob_argv.cc which you have to implement. The
131   -implementation should make calls to methods in QPDFJob.
  131 +implementation should make calls to methods in QPDFJob. Then, add the
  132 +same option to either the no-json section of job.yml if it is to be
  133 +excluded from the job json structure, or add it under the json
  134 +structure to the place where it should appear in the json structure.
132 135  
133 136 The build will fail until the new option is documented in
134 137 manual/cli.rst. To do that, create documentation for the option by
... ...
generate_auto_job
... ... @@ -5,6 +5,7 @@ import argparse
5 5 import hashlib
6 6 import re
7 7 import yaml
  8 +import json
8 9  
9 10 whoami = os.path.basename(sys.argv[0])
10 11 BANNER = f'''//
... ... @@ -29,6 +30,7 @@ class Main:
29 30 'decl': 'libqpdf/qpdf/auto_job_decl.hh',
30 31 'init': 'libqpdf/qpdf/auto_job_init.hh',
31 32 'help': 'libqpdf/qpdf/auto_job_help.hh',
  33 + 'schema': 'libqpdf/qpdf/auto_job_schema.hh',
32 34 }
33 35 SUMS = 'job.sums'
34 36  
... ... @@ -199,6 +201,9 @@ class Main:
199 201 raise Exception(
200 202 f'help for unknown option {option},'
201 203 f' lineno={lineno}')
  204 + if option not in self.help_options:
  205 + # QXXXQ also need to exclude help table
  206 + self.jdata[option[2:]]['help'] = short_text
202 207 print(f'ap.addOptionHelp("{option}", "{topic}",'
203 208 f' "{short_text}", R"({long_text})");', file=f)
204 209 help_lines += 1
... ... @@ -226,9 +231,12 @@ class Main:
226 231 with open('job.yml', 'r') as f:
227 232 data = yaml.safe_load(f.read())
228 233 self.validate(data)
229   - self.options_without_help = set(
  234 + # Add the built-in help options to tables that we populate as
  235 + # we read job.yml since we won't encounter these in job.yml
  236 + self.help_options = set(
230 237 ['--completion-bash', '--completion-zsh', '--help']
231 238 )
  239 + self.options_without_help = set(self.help_options)
232 240 self.prepare(data)
233 241 with open(self.DESTS['decl'], 'w') as f:
234 242 print(BANNER, file=f)
... ... @@ -242,6 +250,11 @@ class Main:
242 250 with open('manual/cli.rst', 'r') as df:
243 251 print(BANNER, file=f)
244 252 self.generate_doc(df, f)
  253 + self.generate_schema(data)
  254 + with open(self.DESTS['schema'], 'w') as f:
  255 + print('static constexpr char const* JOB_SCHEMA_DATA = R"(' +
  256 + json.dumps(self.schema, indent=2, separators=(',', ': ')) +
  257 + ')";', file=f)
245 258  
246 259 # Update hashes last to ensure that this will be rerun in the
247 260 # event of a failure.
... ... @@ -251,6 +264,24 @@ class Main:
251 264 def prepare(self, data):
252 265 self.decls = []
253 266 self.init = []
  267 + self.jdata = {
  268 + # option: {
  269 + # tables: set(),
  270 + # help: string,
  271 + # QXXXQ something for registering handler
  272 + # }
  273 + }
  274 +
  275 + def add_jdata(flag, table):
  276 + nonlocal self
  277 + if table == 'help':
  278 + self.help_options.add(f'--{flag}')
  279 + elif flag in self.jdata:
  280 + self.jdata[flag]['tables'].add(table)
  281 + else:
  282 + self.jdata[flag] = {
  283 + 'tables': set([table]),
  284 + }
254 285  
255 286 self.init.append('auto b = [this](void (ArgParser::*f)()) {')
256 287 self.init.append(' return QPDFArgParser::bindBare(f, this);')
... ... @@ -275,7 +306,7 @@ class Main:
275 306 self.decls.append('')
276 307 for o in data['options']:
277 308 table = o['table']
278   -
  309 + table_prefix = o.get('prefix', table)
279 310 if table == 'main':
280 311 self.init.append('this->ap.selectMainOptionTable();')
281 312 elif table == 'help':
... ... @@ -296,12 +327,14 @@ class Main:
296 327 self.decls.append(f'void {identifier}();')
297 328 self.init.append(f'this->ap.addBare("{i}", '
298 329 f'b(&ArgParser::{identifier}));')
  330 + add_jdata(i, table_prefix)
299 331 for i in o.get('optional_parameter', []):
300 332 self.options_without_help.add(f'--{i}')
301 333 identifier = self.to_identifier(i, prefix, False)
302 334 self.decls.append(f'void {identifier}(char *);')
303 335 self.init.append(f'this->ap.addOptionalParameter("{i}", '
304 336 f'p(&ArgParser::{identifier}));')
  337 + add_jdata(i, table_prefix)
305 338 for i, v in o.get('required_parameter', {}).items():
306 339 self.options_without_help.add(f'--{i}')
307 340 identifier = self.to_identifier(i, prefix, False)
... ... @@ -309,6 +342,7 @@ class Main:
309 342 self.init.append(f'this->ap.addRequiredParameter("{i}", '
310 343 f'p(&ArgParser::{identifier})'
311 344 f', "{v}");')
  345 + add_jdata(i, table_prefix)
312 346 for i, v in o.get('required_choices', {}).items():
313 347 self.options_without_help.add(f'--{i}')
314 348 identifier = self.to_identifier(i, prefix, False)
... ... @@ -316,6 +350,7 @@ class Main:
316 350 self.init.append(f'this->ap.addChoices("{i}", '
317 351 f'p(&ArgParser::{identifier})'
318 352 f', true, {v}_choices);')
  353 + add_jdata(i, table_prefix)
319 354 for i, v in o.get('optional_choices', {}).items():
320 355 self.options_without_help.add(f'--{i}')
321 356 identifier = self.to_identifier(i, prefix, False)
... ... @@ -323,11 +358,13 @@ class Main:
323 358 self.init.append(f'this->ap.addChoices("{i}", '
324 359 f'p(&ArgParser::{identifier})'
325 360 f', false, {v}_choices);')
  361 + add_jdata(i, table_prefix)
326 362 if table not in ('main', 'help'):
327 363 identifier = self.to_identifier(table, 'argEnd', False)
328 364 self.decls.append(f'void {identifier}();')
329 365 for o in data['options']:
330 366 table = o['table']
  367 + table_prefix = o.get('prefix', table)
331 368 if 'from_table' not in o:
332 369 continue
333 370 if table == 'main':
... ... @@ -341,6 +378,79 @@ class Main:
341 378 for j in ft['options']:
342 379 self.init.append('this->ap.copyFromOtherTable'
343 380 f'("{j}", "{other_table}");')
  381 + add_jdata(j, table_prefix)
  382 +
  383 + def generate_schema(self, data):
  384 + # XXX check data['json'] against what we know from jdata.
  385 + # Ultimately be able to generate a schema as well as
  386 + # JSONHandler and registering stuff.
  387 +
  388 + # Check to make sure that every command-line option is
  389 + # represented either in data['json'] or data['no-json'].
  390 +
  391 + # Build a list of options that we expect. If an option appears
  392 + # once, we just expect to see it once. If it appears in more
  393 + # than one options table, we need to see a separate version of
  394 + # it for each option table. It is represented prepended in
  395 + # job.yml with the table prefix. The table prefix is removed
  396 + # in the schema.
  397 + expected = {}
  398 + for k, v in self.jdata.items():
  399 + tables = v['tables']
  400 + if len(tables) == 1:
  401 + expected[k] = {**v}
  402 + else:
  403 + for t in sorted(tables):
  404 + expected[f'{t}.{k}'] = {**v}
  405 + for _, v in expected.items():
  406 + del v['tables']
  407 + options_seen = set(data['no-json'])
  408 +
  409 + self.schema = {}
  410 +
  411 + def option_to_json_key(s):
  412 + return self.to_identifier(s, '', False)
  413 +
  414 + # Walk through the json information building the schema as we
  415 + # go. This verifies consistency between command-line options
  416 + # and the json section of the data and builds up a schema by
  417 + # populating with help information as available.
  418 + def build_schema(j, s):
  419 + for k, v in j.items():
  420 + if not (k in expected or
  421 + k.startswith('_') or
  422 + isinstance(v, str)):
  423 + raise Exception(f'json: unknown key {k}')
  424 + if k.startswith('_'):
  425 + schema_key = k[1:]
  426 + else:
  427 + schema_key = re.sub(r'[^\.]+\.', '', k)
  428 + schema_key = option_to_json_key(schema_key)
  429 + schema_value = v
  430 + if k in expected:
  431 + options_seen.add(re.sub('^_', '', k))
  432 + if v is None:
  433 + schema_value = re.sub(
  434 + r'--(\S+)',
  435 + lambda x: option_to_json_key(x.group(1)),
  436 + expected[k]['help'])
  437 + if (isinstance(v, dict)):
  438 + schema_value = {}
  439 + build_schema(v, schema_value)
  440 + elif (isinstance(v, list)):
  441 + if len(v) != 1:
  442 + raise Exception('json contains array with length != 1')
  443 + if isinstance(v[0], dict):
  444 + schema_value = [{}]
  445 + build_schema(v[0], schema_value[0])
  446 + elif schema_value is None:
  447 + raise Exception(f'unknown schema value for {k}')
  448 + s[schema_key] = schema_value
  449 +
  450 + build_schema(data['json'], self.schema)
  451 + if options_seen != set(expected.keys()):
  452 + raise Exception('missing from json: ' +
  453 + str(set(expected.keys()) - options_seen))
344 454  
345 455 def check_keys(self, what, d, exp):
346 456 if not isinstance(d, dict):
... ... @@ -351,7 +461,8 @@ class Main:
351 461 exit(f'{what}: unknown keys = {extra}')
352 462  
353 463 def validate(self, data):
354   - self.check_keys('top', data, set(['choices', 'options']))
  464 + self.check_keys('top', data, set(
  465 + ['choices', 'options', 'no-json', 'json']))
355 466 for o in data['options']:
356 467 self.check_keys('top', o, set(
357 468 ['table', 'prefix', 'bare', 'positional',
... ... @@ -363,7 +474,10 @@ class Main:
363 474 if const:
364 475 identifier = f'{prefix}_{identifier.upper()}'
365 476 else:
366   - identifier = f'{prefix}_{identifier.lower()}'
  477 + if prefix:
  478 + identifier = f'{prefix}_{identifier.lower()}'
  479 + else:
  480 + identifier = identifier.lower()
367 481 identifier = re.sub(r'_([a-z])',
368 482 lambda x: x.group(1).upper(),
369 483 identifier).replace('_', '')
... ...
job.sums
1 1 # Generated by generate_auto_job
2   -generate_auto_job b70f64314f1ae1f100fa6a11975dee5f7669038e2a619b6c9da1e5230db1dd1b
3   -job.yml 8177cadf41096efdc174f04daadfe5d98c592ad44ad10cb96537521fd79a801a
  2 +generate_auto_job 0758b244fc4e2d3e440883072d2740bc4cdb26c5aa8de938f028afd7d83fad79
  3 +job.yml 2856c2635d42f0a58717d3ffce3125816d8f98ff17245c4b7a0669d70cd68b84
4 4 libqpdf/qpdf/auto_job_decl.hh 97395ecbe590b23ae04d6cce2080dbd0e998917ff5eeaa5c6aafa91041d3cd6a
5 5 libqpdf/qpdf/auto_job_help.hh 2653faaf59415bec81c3a85d426239d52b609ac24faba34ec2d26f00710dd2c6
6 6 libqpdf/qpdf/auto_job_init.hh 465bf46769559ceb77110d1b9d3293ba9b3595850b49848c31aeabd10aadb4ad
  7 +libqpdf/qpdf/auto_job_schema.hh c91a4e182e088797b70dda94af03ca32d360f3564890132da2a8bdc3c4432423
7 8 manual/_ext/qpdf.py 855fe12de5af7a10bb24be6ecc4d5dff4c84ac58cf388a13be6bbb394346a67d
8 9 manual/cli.rst b136c7f33a538c580b081a7e802c27635aad2a4229efa0eb0736466116b7aa90
... ...
... ... @@ -217,3 +217,161 @@ options:
217 217 required_parameter:
218 218 prefix: prefix
219 219 password: password
  220 +no-json:
  221 + - preserve-unreferenced-resources
  222 +json:
  223 + # The structure of this section defines what the json input to
  224 + # QPDFJob looks like. If a key starts with underscore or has a value
  225 + # that is a string, it does not map to a command-line argument. If
  226 + # value is null, its properties and help come from other information
  227 + # known by generate_auto_job. This information is used to construct
  228 + # a "schema" (as in JSON.hh) for the json input to QPDFJob. The
  229 + # leading underscore is removed.
  230 + _input:
  231 + _file:
  232 + _name: "input filename"
  233 + main.password:
  234 + password-file:
  235 + empty:
  236 + _output:
  237 + _file:
  238 + _name: "output filename"
  239 + replace-input:
  240 + split-pages:
  241 + _options:
  242 + qdf:
  243 + preserve-unreferenced:
  244 + newline-before-endstream:
  245 + normalize-content:
  246 + stream-data:
  247 + compress-streams:
  248 + recompress-flate:
  249 + decode-level:
  250 + decrypt:
  251 + static-aes-iv:
  252 + static-id:
  253 + no-original-object-ids:
  254 + copy-encryption:
  255 + encryption-file-password:
  256 + linearize:
  257 + linearize-pass1:
  258 + object-streams:
  259 + min-version:
  260 + force-version:
  261 + progress:
  262 + encrypt:
  263 + user-password: "user password"
  264 + owner-password: "owner password"
  265 + key-length: "key length: 48, 128, 256"
  266 + _40-bit:
  267 + Enc40.annotate:
  268 + Enc40.extract:
  269 + Enc40.modify:
  270 + Enc40.print:
  271 + _128-bit:
  272 + Enc128.accessibility:
  273 + Enc128.annotate:
  274 + Enc128.assemble:
  275 + Enc128.cleartext-metadata:
  276 + Enc128.extract:
  277 + Enc128.form:
  278 + Enc128.modify-other:
  279 + Enc128.modify:
  280 + Enc128.print:
  281 + force-V4:
  282 + use-aes:
  283 + _256-bit:
  284 + Enc256.accessibility:
  285 + Enc256.annotate:
  286 + Enc256.assemble:
  287 + Enc256.cleartext-metadata:
  288 + Enc256.extract:
  289 + Enc256.form:
  290 + Enc256.modify-other:
  291 + Enc256.modify:
  292 + Enc256.print:
  293 + allow-insecure:
  294 + force-R5:
  295 + _options:
  296 + allow-weak-crypto:
  297 + deterministic-id:
  298 + keep-files-open:
  299 + keep-files-open-threshold:
  300 + no-warn:
  301 + verbose:
  302 + warning-exit-0:
  303 + ignore-xref-streams:
  304 + password-is-hex-key:
  305 + password-mode:
  306 + suppress-password-recovery:
  307 + suppress-recovery:
  308 + _inspect:
  309 + check:
  310 + check-linearization:
  311 + filtered-stream-data:
  312 + is-encrypted:
  313 + raw-stream-data:
  314 + requires-password:
  315 + show-encryption:
  316 + show-encryption-key:
  317 + show-linearization:
  318 + show-npages:
  319 + show-object:
  320 + show-pages:
  321 + show-xref:
  322 + with-images:
  323 + list-attachments:
  324 + show-attachment:
  325 + json:
  326 + json-key:
  327 + - null
  328 + json-object:
  329 + - null
  330 + _transform:
  331 + coalesce-contents:
  332 + compression-level:
  333 + externalize-inline-images:
  334 + ii-min-bytes:
  335 + remove-unreferenced-resources:
  336 + _modify:
  337 + add-attachment:
  338 + - file: "attachment to add"
  339 + creationdate:
  340 + description:
  341 + filename:
  342 + key:
  343 + mimetype:
  344 + moddate:
  345 + replace:
  346 + remove-attachment:
  347 + copy-attachments-from:
  348 + - file: "attachment source filename"
  349 + CopyAtt.password:
  350 + prefix:
  351 + collate:
  352 + flatten-annotations:
  353 + flatten-rotation:
  354 + generate-appearances:
  355 + keep-inline-images:
  356 + oi-min-area:
  357 + oi-min-height:
  358 + oi-min-width:
  359 + optimize-images:
  360 + pages:
  361 + - file: "source for for pages"
  362 + Pages.password:
  363 + range: "page range"
  364 + remove-page-labels:
  365 + rotate:
  366 + overlay:
  367 + file: "source file for overlay"
  368 + UO.password:
  369 + from:
  370 + repeat:
  371 + to:
  372 + underlay:
  373 + file: "source file for underlay"
  374 + UO.password:
  375 + from:
  376 + repeat:
  377 + to:
... ...
libqpdf/qpdf/auto_job_schema.hh 0 โ†’ 100644
  1 +static constexpr char const* JOB_SCHEMA_DATA = R"({
  2 + "input": {
  3 + "file": {
  4 + "name": "input filename",
  5 + "password": "specify password",
  6 + "passwordFile": "read password from a file"
  7 + },
  8 + "empty": "empty input file"
  9 + },
  10 + "output": {
  11 + "file": {
  12 + "name": "output filename"
  13 + },
  14 + "replaceInput": "replace input with output",
  15 + "splitPages": "write pages to separate files",
  16 + "options": {
  17 + "qdf": "enable viewing PDF code in a text editor",
  18 + "preserveUnreferenced": "preserve unreferenced objects",
  19 + "newlineBeforeEndstream": "force a newline before endstream",
  20 + "normalizeContent": "fix newlines in content streams",
  21 + "streamData": "control stream compression",
  22 + "compressStreams": "compress uncompressed streams",
  23 + "recompressFlate": "uncompress and recompress flate",
  24 + "decodeLevel": "control which streams to uncompress",
  25 + "decrypt": "remove encryption from input file",
  26 + "staticAesIv": "use a fixed AES vector",
  27 + "staticId": "use a fixed document ID",
  28 + "noOriginalObjectIds": "omit original object ID in qdf",
  29 + "copyEncryption": "copy another file's encryption details",
  30 + "encryptionFilePassword": "supply password for copyEncryption",
  31 + "linearize": "linearize (web-optimize) output",
  32 + "linearizePass1": "save pass 1 of linearization",
  33 + "objectStreams": "control use of object streams",
  34 + "minVersion": "set minimum PDF version",
  35 + "forceVersion": "set output PDF version",
  36 + "progress": "show progress when writing",
  37 + "encrypt": {
  38 + "userPassword": "user password",
  39 + "ownerPassword": "owner password",
  40 + "keyLength": "key length: 48, 128, 256",
  41 + "40Bit": {
  42 + "annotate": "restrict document annotation",
  43 + "extract": "restrict text/graphic extraction",
  44 + "modify": "restrict document modification",
  45 + "print": "restrict printing"
  46 + },
  47 + "128Bit": {
  48 + "accessibility": "restrict document accessibility",
  49 + "annotate": "restrict document annotation",
  50 + "assemble": "restrict document assembly",
  51 + "cleartextMetadata": "don't encrypt metadata",
  52 + "extract": "restrict text/graphic extraction",
  53 + "form": "restrict form filling",
  54 + "modifyOther": "restrict other modifications",
  55 + "modify": "restrict document modification",
  56 + "print": "restrict printing",
  57 + "forceV4": "force V=4 in encryption dictionary",
  58 + "useAes": "use AES with 128-bit encryption"
  59 + },
  60 + "256Bit": {
  61 + "accessibility": "restrict document accessibility",
  62 + "annotate": "restrict document annotation",
  63 + "assemble": "restrict document assembly",
  64 + "cleartextMetadata": "don't encrypt metadata",
  65 + "extract": "restrict text/graphic extraction",
  66 + "form": "restrict form filling",
  67 + "modifyOther": "restrict other modifications",
  68 + "modify": "restrict document modification",
  69 + "print": "restrict printing",
  70 + "allowInsecure": "allow empty owner passwords",
  71 + "forceR5": "use unsupported R=5 encryption"
  72 + }
  73 + }
  74 + }
  75 + },
  76 + "options": {
  77 + "allowWeakCrypto": "allow insecure cryptographic algorithms",
  78 + "deterministicId": "generate ID deterministically",
  79 + "keepFilesOpen": "manage keeping multiple files open",
  80 + "keepFilesOpenThreshold": "set threshold for keepFilesOpen",
  81 + "noWarn": "suppress printing warning messages",
  82 + "verbose": "print additional information",
  83 + "warningExit0": "exit 0 even with warnings",
  84 + "ignoreXrefStreams": "use xref tables rather than streams",
  85 + "passwordIsHexKey": "provide hex-encoded encryption key",
  86 + "passwordMode": "tweak how qpdf encodes passwords",
  87 + "suppressPasswordRecovery": "don't try different password encodings",
  88 + "suppressRecovery": "suppress error recovery"
  89 + },
  90 + "inspect": {
  91 + "check": "partially check whether PDF is valid",
  92 + "checkLinearization": "check linearization tables",
  93 + "filteredStreamData": "show filtered stream data",
  94 + "isEncrypted": "silently test whether a file is encrypted",
  95 + "rawStreamData": "show raw stream data",
  96 + "requiresPassword": "silently test a file's password",
  97 + "showEncryption": "information about encrypted files",
  98 + "showEncryptionKey": "show key with showEncryption",
  99 + "showLinearization": "show linearization hint tables",
  100 + "showNpages": "show number of pages",
  101 + "showObject": "show contents of an object",
  102 + "showPages": "display page dictionary information",
  103 + "showXref": "show cross reference data",
  104 + "withImages": "include image details with showPages",
  105 + "listAttachments": "list embedded files",
  106 + "showAttachment": "export an embedded file",
  107 + "json": "show file in json format",
  108 + "jsonKey": [
  109 + null
  110 + ],
  111 + "jsonObject": [
  112 + null
  113 + ]
  114 + },
  115 + "transform": {
  116 + "coalesceContents": "combine content streams",
  117 + "compressionLevel": "set compression level for flate",
  118 + "externalizeInlineImages": "convert inline to regular images",
  119 + "iiMinBytes": "set minimum size for externalizeInlineImages",
  120 + "removeUnreferencedResources": "remove unreferenced page resources"
  121 + },
  122 + "modify": {
  123 + "addAttachment": [
  124 + {
  125 + "file": "attachment to add",
  126 + "creationdate": "set attachment's creation date",
  127 + "description": "set attachment's description",
  128 + "filename": "set attachment's displayed filename",
  129 + "key": "specify attachment key",
  130 + "mimetype": "attachment mime type, e.g. application/pdf",
  131 + "moddate": "set attachment's modification date",
  132 + "replace": "replace attachment with same key"
  133 + }
  134 + ],
  135 + "removeAttachment": "remove an embedded file",
  136 + "copyAttachmentsFrom": [
  137 + {
  138 + "file": "attachment source filename",
  139 + "password": "specify password",
  140 + "prefix": "key prefix for copying attachments"
  141 + }
  142 + ],
  143 + "collate": "collate with pages",
  144 + "flattenAnnotations": "push annotations into content",
  145 + "flattenRotation": "remove rotation from page dictionary",
  146 + "generateAppearances": "generate appearances for form fields",
  147 + "keepInlineImages": "exclude inline images from optimization",
  148 + "oiMinArea": "minimum area for optimizeImages",
  149 + "oiMinHeight": "minimum height for optimizeImages",
  150 + "oiMinWidth": "minimum width for optimizeImages",
  151 + "optimizeImages": "use efficient compression for images",
  152 + "pages": [
  153 + {
  154 + "file": "source for for pages",
  155 + "password": "specify password",
  156 + "range": "page range"
  157 + }
  158 + ],
  159 + "removePageLabels": "remove explicit page numbers",
  160 + "rotate": "rotate pages",
  161 + "overlay": {
  162 + "file": "source file for overlay",
  163 + "password": "specify password",
  164 + "from": "source pages for underlay/overlay",
  165 + "repeat": "overlay/underlay pages to repeat",
  166 + "to": "destination pages for underlay/overlay"
  167 + },
  168 + "underlay": {
  169 + "file": "source file for underlay",
  170 + "password": "specify password",
  171 + "from": "source pages for underlay/overlay",
  172 + "repeat": "overlay/underlay pages to repeat",
  173 + "to": "destination pages for underlay/overlay"
  174 + }
  175 + }
  176 +})";
... ...