generate_auto_job 14.3 KB
#!/usr/bin/env python3
import os
import sys
import argparse
import hashlib
import re
import yaml

whoami = os.path.basename(sys.argv[0])
BANNER = f'''//
// This file is automatically generated by {whoami}.
// Edits will be automatically overwritten if the build is
// run in maintainer mode.
//'''


def warn(*args, **kwargs):
    print(*args, file=sys.stderr, **kwargs)


class Main:
    SOURCES = [
        whoami,
        'manual/_ext/qpdf.py',
        'job.yml',
        'manual/cli.rst',
    ]
    DESTS = {
        'decl': 'libqpdf/qpdf/auto_job_decl.hh',
        'init': 'libqpdf/qpdf/auto_job_init.hh',
        'help': 'libqpdf/qpdf/auto_job_help.hh',
    }
    SUMS = 'job.sums'

    def main(self, args=sys.argv[1:], prog=whoami):
        options = self.parse_args(args, prog)
        self.top(options)

    def parse_args(self, args, prog):
        parser = argparse.ArgumentParser(
            prog=prog,
            description='Generate files for QPDFJob',
        )
        mxg = parser.add_mutually_exclusive_group(required=True)
        mxg.add_argument('--check',
                         help='update checksums if files are not up to date',
                         action='store_true', default=False)
        mxg.add_argument('--generate',
                         help='generate files from sources',
                         action='store_true', default=False)
        return parser.parse_args(args)

    def top(self, options):
        if options.check:
            self.check()
        elif options.generate:
            self.generate()
        else:
            exit(f'{whoami} unknown mode')

    def get_hashes(self):
        hashes = {}
        for i in sorted([*self.SOURCES, *self.DESTS.values()]):
            m = hashlib.sha256()
            try:
                with open(i, 'rb') as f:
                    m.update(f.read())
                hashes[i] = m.hexdigest()
            except FileNotFoundError:
                pass
        return hashes

    def check(self):
        hashes = self.get_hashes()
        match = False
        try:
            old_hashes = {}
            with open(self.SUMS, 'r') as f:
                for line in f.readlines():
                    m = re.match(r'^(\S+) (\S+)\s*$', line)
                    if m:
                        old_hashes[m.group(1)] = m.group(2)
            match = old_hashes == hashes
        except Exception:
            pass
        if not match:
            exit(f'{whoami}: auto job inputs have changed')

    def update_hashes(self):
        hashes = self.get_hashes()
        with open(self.SUMS, 'w') as f:
            print(f'# Generated by {whoami}', file=f)
            for k, v in hashes.items():
                print(f'{k} {v}', file=f)

    def generate_doc(self, df, f):
        st_top = 0
        st_topic = 1
        st_option = 2
        st_option_help = 3
        state = st_top

        indent = None
        topic = None
        option = None
        short_text = None
        long_text = None

        # Generate a bunch of short static functions rather than a big
        # member function for help. Some compilers have problems with
        # very large member functions in classes in anonymous
        # namespaces.

        help_files = 0
        help_lines = 0

        self.all_topics = set(self.options_without_help)
        self.referenced_topics = set()

        def set_indent(x):
            nonlocal indent
            indent = ' ' * len(x)

        def append_long_text(line, topic):
            nonlocal indent, long_text
            if line == '\n':
                long_text += '\n'
            elif line.startswith(indent):
                long_text += line[len(indent):]
            else:
                long_text = long_text.strip()
                if long_text == '':
                    raise Exception(f'missing long text for {topic}')
                long_text += '\n'
                for i in re.finditer(r'--help=([^\.\s]+)', long_text):
                    self.referenced_topics.add(i.group(1))
                return True
            return False

        lineno = 0
        for line in df.readlines():
            if help_lines == 0:
                if help_files > 0:
                    print('}', file=f)
                help_files += 1
                help_lines += 1
                print(f'static void add_help_{help_files}(QPDFArgParser& ap)\n'
                      '{', file=f)
            lineno += 1
            if state == st_top:
                m = re.match(r'^(\s*\.\. )help-topic (\S+): (.*)$', line)
                if m:
                    set_indent(m.group(1))
                    topic = m.group(2)
                    short_text = m.group(3)
                    long_text = ''
                    state = st_topic
                    continue
                m = re.match(
                    r'^(\s*\.\. )qpdf:option:: (([^=\s]+)([= ](.+))?)$',
                    line)
                if m:
                    if topic is None:
                        raise Exception('option seen before topic')
                    set_indent(m.group(1))
                    option = m.group(3)
                    synopsis = m.group(2)
                    if synopsis.endswith('`'):
                        raise Exception(
                            f'stray ` at end of option line (line {lineno})')
                    if synopsis != option:
                        long_text = synopsis + '\n'
                    else:
                        long_text = ''
                    state = st_option
                    continue
            elif state == st_topic:
                if append_long_text(line, topic):
                    self.all_topics.add(topic)
                    print(f'ap.addHelpTopic("{topic}", "{short_text}",'
                          f' R"({long_text})");', file=f)
                    help_lines += 1
                    state = st_top
            elif state == st_option:
                if line == '\n' or line.startswith(indent):
                    m = re.match(r'^(\s*\.\. )help: (.*)$', line)
                    if m:
                        set_indent(m.group(1))
                        short_text = m.group(2)
                        state = st_option_help
                else:
                    raise Exception('option without help text')
                    state = st_top
            elif state == st_option_help:
                if append_long_text(line, option):
                    if option in self.options_without_help:
                        self.options_without_help.remove(option)
                    else:
                        raise Exception(
                            f'help for unknown option {option},'
                            f' lineno={lineno}')
                    print(f'ap.addOptionHelp("{option}", "{topic}",'
                          f' "{short_text}", R"({long_text})");', file=f)
                    help_lines += 1
                    state = st_top
            if help_lines == 20:
                help_lines = 0
        print('}', file=f)
        print('static void add_help(QPDFArgParser& ap)\n{', file=f)
        for i in range(help_files):
            print(f'    add_help_{i+1}(ap);', file=f)
        print('ap.addHelpFooter("For detailed help, visit'
              ' the qpdf manual: https://qpdf.readthedocs.io\\n");', file=f)
        print('}\n', file=f)
        for i in self.referenced_topics:
            if i not in self.all_topics:
                raise Exception(f'help text referenced --help={i}')
        for i in self.options_without_help:
            raise Exception(
                'Options without help: ' +
                ', '.join(self.options_without_help))

    def generate(self):
        warn(f'{whoami}: regenerating auto job files')

        with open('job.yml', 'r') as f:
            data = yaml.safe_load(f.read())
        self.validate(data)
        self.options_without_help = set(
            ['--completion-bash', '--completion-zsh', '--help']
        )
        self.prepare(data)
        with open(self.DESTS['decl'], 'w') as f:
            print(BANNER, file=f)
            for i in self.decls:
                print(i, file=f)
        with open(self.DESTS['init'], 'w') as f:
            print(BANNER, file=f)
            for i in self.init:
                print(i, file=f)
        with open(self.DESTS['help'], 'w') as f:
            with open('manual/cli.rst', 'r') as df:
                print(BANNER, file=f)
                self.generate_doc(df, f)

        # Update hashes last to ensure that this will be rerun in the
        # event of a failure.
        self.update_hashes()
        # DON'T ADD CODE TO generate AFTER update_hashes

    def prepare(self, data):
        self.decls = []
        self.init = []

        self.init.append('auto b = [this](void (ArgParser::*f)()) {')
        self.init.append('    return QPDFArgParser::bindBare(f, this);')
        self.init.append('};')
        self.init.append('auto p = [this](void (ArgParser::*f)(char *)) {')
        self.init.append('    return QPDFArgParser::bindParam(f, this);')
        self.init.append('};')
        self.init.append('')
        for k, v in data['choices'].items():
            s = f'char const* {k}_choices[] = {{'
            for i in v:
                s += f'"{i}", '
            self.init.append(s + '0};')
        self.init.append('')

        for o in data['options']:
            table = o['table']
            if table in ('main', 'help'):
                continue
            i = self.to_identifier(table, 'O', True)
            self.decls.append(f'static constexpr char const* {i} = "{table}";')
        self.decls.append('')
        for o in data['options']:
            table = o['table']

            if table == 'main':
                self.init.append('this->ap.selectMainOptionTable();')
            elif table == 'help':
                self.init.append('this->ap.selectHelpOptionTable();')
            else:
                identifier = self.to_identifier(table, 'argEnd', False)
                self.init.append(f'this->ap.registerOptionTable("{table}",'
                                 f' b(&ArgParser::{identifier}));')
            prefix = 'arg' + o.get('prefix', '')
            if o.get('positional', False):
                identifier = self.to_identifier(i, prefix, False)
                self.decls.append(f'void {prefix}Positional(char*);')
                self.init.append('this->ap.addPositional('
                                 f'p(&ArgParser::{prefix}Positional));')
            for i in o.get('bare', []):
                self.options_without_help.add(f'--{i}')
                identifier = self.to_identifier(i, prefix, False)
                self.decls.append(f'void {identifier}();')
                self.init.append(f'this->ap.addBare("{i}", '
                                 f'b(&ArgParser::{identifier}));')
            for i in o.get('optional_parameter', []):
                self.options_without_help.add(f'--{i}')
                identifier = self.to_identifier(i, prefix, False)
                self.decls.append(f'void {identifier}(char *);')
                self.init.append(f'this->ap.addOptionalParameter("{i}", '
                                 f'p(&ArgParser::{identifier}));')
            for i, v in o.get('required_parameter', {}).items():
                self.options_without_help.add(f'--{i}')
                identifier = self.to_identifier(i, prefix, False)
                self.decls.append(f'void {identifier}(char *);')
                self.init.append(f'this->ap.addRequiredParameter("{i}", '
                                 f'p(&ArgParser::{identifier})'
                                 f', "{v}");')
            for i, v in o.get('required_choices', {}).items():
                self.options_without_help.add(f'--{i}')
                identifier = self.to_identifier(i, prefix, False)
                self.decls.append(f'void {identifier}(char *);')
                self.init.append(f'this->ap.addChoices("{i}", '
                                 f'p(&ArgParser::{identifier})'
                                 f', true, {v}_choices);')
            for i, v in o.get('optional_choices', {}).items():
                self.options_without_help.add(f'--{i}')
                identifier = self.to_identifier(i, prefix, False)
                self.decls.append(f'void {identifier}(char *);')
                self.init.append(f'this->ap.addChoices("{i}", '
                                 f'p(&ArgParser::{identifier})'
                                 f', false, {v}_choices);')
            if table not in ('main', 'help'):
                identifier = self.to_identifier(table, 'argEnd', False)
                self.decls.append(f'void {identifier}();')
        for o in data['options']:
            table = o['table']
            if 'from_table' not in o:
                continue
            if table == 'main':
                self.init.append('this->ap.selectMainOptionTable();')
            elif table == 'help':
                self.init.append('this->ap.selectHelpOptionTable();')
            else:
                self.init.append(f'this->ap.selectOptionTable("{table}");')
            ft = o['from_table']
            other_table = ft['table']
            for j in ft['options']:
                self.init.append('this->ap.copyFromOtherTable'
                                 f'("{j}", "{other_table}");')

    def check_keys(self, what, d, exp):
        if not isinstance(d, dict):
            exit(f'{what} is not a dictionary')
        actual = set(d.keys())
        extra = actual - exp
        if extra:
            exit(f'{what}: unknown keys = {extra}')

    def validate(self, data):
        self.check_keys('top', data, set(['choices', 'options']))
        for o in data['options']:
            self.check_keys('top', o, set(
                ['table', 'prefix', 'bare', 'positional',
                 'optional_parameter', 'required_parameter',
                 'required_choices', 'optional_choices', 'from_table']))

    def to_identifier(self, label, prefix, const):
        identifier = re.sub(r'[^a-zA-Z0-9]', '_', label)
        if const:
            identifier = f'{prefix}_{identifier.upper()}'
        else:
            identifier = f'{prefix}_{identifier.lower()}'
            identifier = re.sub(r'_([a-z])',
                                lambda x: x.group(1).upper(),
                                identifier).replace('_', '')
        return identifier


if __name__ == '__main__':
    try:
        os.chdir(os.path.dirname(os.path.realpath(__file__)))
        Main().main()
    except KeyboardInterrupt:
        exit(130)