diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index 1005b07..a59bec0 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -257,7 +257,7 @@ struct QPDFJob::PageNo }; QPDFJob::QPDFJob() : - m(std::make_shared()) + m(std::make_shared(*this)) { } @@ -2342,6 +2342,63 @@ QPDFJob::new_selection( m->selections.emplace_back(filename, password, range); } +void +QPDFJob::Inputs::process(std::string const& filename, QPDFJob::Input& input) +{ + // Open the PDF file and store the QPDF object. Do not canonicalize the file name. Using two + // different paths to refer to the same file is a documented workaround for duplicating a page. + // If you are using this an example of how to do this with the API, you can just create two + // different QPDF objects to the same underlying file with the same path to achieve the + // same effect. + auto password = input.password; + if (!encryption_file.empty() && password.empty() && filename == encryption_file) { + password = encryption_file_password; + } + job.doIfVerbose([&](Pipeline& v, std::string const& prefix) { + v << prefix << ": processing " << filename << "\n"; + }); + if (!keep_files_open) { + auto cis = std::make_shared(filename.data()); + input.cfis = cis.get(); + input.cfis->stayOpen(true); + job.processInputSource(input.qpdf_p, cis, password.data(), true); + } else { + job.processInputSource( + input.qpdf_p, + std::make_shared(filename.data()), + password.data(), + true); + } + input.qpdf = input.qpdf_p.get(); + input.orig_pages = input.qpdf->getAllPages(); + input.n_pages = QIntC::to_int(input.orig_pages.size()); + if (input.cfis) { + input.cfis->stayOpen(false); + } +} + +void +QPDFJob::Inputs::process_all() +{ + if (!keep_files_open_set) { + // Count the number of distinct files to determine whether we should keep files open or not. + // Rather than trying to code some portable heuristic based on OS limits, just hard-code + // this at a given number and allow users to override. + keep_files_open = files.size() <= keep_files_open_threshold; + QTC::TC("qpdf", "QPDFJob automatically set keep files open", keep_files_open ? 0 : 1); + job.doIfVerbose([&](Pipeline& v, std::string const& prefix) { + v << prefix << ": selecting --keep-open-files=" << (keep_files_open ? "y" : "n") + << "\n"; + }); + } + + for (auto& [filename, input]: files) { + if (!input.qpdf) { + process(filename, input); + } + } +} + // Handle all page specifications. Return true if it succeeded without warnings. bool QPDFJob::handlePageSpecs(QPDF& pdf) @@ -2369,56 +2426,7 @@ QPDFJob::handlePageSpecs(QPDF& pdf) } } - if (!m->keep_files_open_set) { - // Count the number of distinct files to determine whether we should keep files open or not. - // Rather than trying to code some portable heuristic based on OS limits, just hard-code - // this at a given number and allow users to override. - m->keep_files_open = m->inputs.files.size() <= m->keep_files_open_threshold; - QTC::TC("qpdf", "QPDFJob automatically set keep files open", m->keep_files_open ? 0 : 1); - doIfVerbose([&](Pipeline& v, std::string const& prefix) { - v << prefix << ": selecting --keep-open-files=" << (m->keep_files_open ? "y" : "n") - << "\n"; - }); - } - - // Create a QPDF object for each file that we may take pages from. - for (auto& [filename, input]: m->inputs.files) { - if (!input.qpdf) { - // Open the PDF file and store the QPDF object. Throw a std::shared_ptr to the qpdf into - // a heap so that it survives through copying to the output but gets cleaned up - // automatically at the end. Do not canonicalize the file name. Using two different - // paths to refer to the same file is a documented workaround for duplicating a page. If - // you are using this an example of how to do this with the API, you can just create two - // different QPDF objects to the same underlying file with the same path to achieve the - // same effect. - auto password = input.password; - if (!m->encryption_file.empty() && password.empty() && filename == m->encryption_file) { - password = m->encryption_file_password; - } - doIfVerbose([&](Pipeline& v, std::string const& prefix) { - v << prefix << ": processing " << filename << "\n"; - }); - if (!m->keep_files_open) { - auto cis = std::make_shared(filename.data()); - cis->stayOpen(true); - processInputSource(input.qpdf_p, cis, password.data(), true); - cis->stayOpen(false); - input.cfis = cis.get(); - } else { - processInputSource( - input.qpdf_p, - std::make_shared(filename.data()), - password.data(), - true); - } - input.qpdf = input.qpdf_p.get(); - input.orig_pages = input.qpdf->getAllPages(); - input.n_pages = QIntC::to_int(input.orig_pages.size()); - if (input.cfis) { - input.cfis->stayOpen(false); - } - } - } + m->inputs.process_all(); std::map> copied_pages; for (auto& selection: m->selections) { @@ -2427,8 +2435,7 @@ QPDFJob::handlePageSpecs(QPDF& pdf) auto const& input = m->inputs.files[selection.filename]; selection.qpdf = input.qpdf; try { - selection.selected_pages = - QUtil::parse_numrange(selection.range.data(), input.n_pages); + selection.selected_pages = QUtil::parse_numrange(selection.range.data(), input.n_pages); } catch (std::runtime_error& e) { throw std::runtime_error( "parsing numeric range for " + selection.filename + ": " + e.what()); @@ -2887,8 +2894,8 @@ QPDFJob::setWriterOptions(QPDFWriter& w) std::unique_ptr encryption_pdf; processFile( encryption_pdf, - m->encryption_file.data(), - m->encryption_file_password.data(), + m->inputs.encryption_file.data(), + m->inputs.encryption_file_password.data(), false, false); w.copyEncryptionParameters(*encryption_pdf); diff --git a/libqpdf/QPDFJob_config.cc b/libqpdf/QPDFJob_config.cc index fd6e3f5..6aac121 100644 --- a/libqpdf/QPDFJob_config.cc +++ b/libqpdf/QPDFJob_config.cc @@ -152,7 +152,7 @@ QPDFJob::Config::copyEncryption(std::string const& parameter) if (o.m->deterministic_id) { usage("the deterministic-id option is incompatible with encrypted output files"); } - o.m->encryption_file = parameter; + o.m->inputs.encryption_file = parameter; o.m->copy_encryption = true; o.m->encrypt = false; o.m->decrypt = false; @@ -181,7 +181,7 @@ QPDFJob::Config::deterministicId() QPDFJob::Config* QPDFJob::Config::encryptionFilePassword(std::string const& parameter) { - o.m->encryption_file_password = parameter; + o.m->inputs.encryption_file_password = parameter; return this; } @@ -354,15 +354,15 @@ QPDFJob::Config::testJsonSchema() QPDFJob::Config* QPDFJob::Config::keepFilesOpen(std::string const& parameter) { - o.m->keep_files_open_set = true; - o.m->keep_files_open = (parameter == "y"); + o.m->inputs.keep_files_open_set = true; + o.m->inputs.keep_files_open = (parameter == "y"); return this; } QPDFJob::Config* QPDFJob::Config::keepFilesOpenThreshold(std::string const& parameter) { - o.m->keep_files_open_threshold = QUtil::string_to_uint(parameter.c_str()); + o.m->inputs.keep_files_open_threshold = QUtil::string_to_uint(parameter.c_str()); return this; } diff --git a/libqpdf/qpdf/QPDFJob_private.hh b/libqpdf/qpdf/QPDFJob_private.hh index 42c389c..9efaa39 100644 --- a/libqpdf/qpdf/QPDFJob_private.hh +++ b/libqpdf/qpdf/QPDFJob_private.hh @@ -49,7 +49,25 @@ struct QPDFJob::Input // All PDF input files for a job. struct QPDFJob::Inputs { + // These default values are duplicated in help and docs. + static int constexpr DEFAULT_KEEP_FILES_OPEN_THRESHOLD = 200; + + Inputs(QPDFJob& job) : + job(job) + { + } + void process(std::string const& filename, QPDFJob::Input& file_spec); + void process_all(); + std::string encryption_file; + std::string encryption_file_password; + bool keep_files_open{true}; + bool keep_files_open_set{false}; + size_t keep_files_open_threshold{DEFAULT_KEEP_FILES_OPEN_THRESHOLD}; + std::map files; + + private: + QPDFJob& job; }; struct QPDFJob::RotationSpec @@ -107,8 +125,9 @@ class QPDFJob::Members friend class QPDFJob; public: - Members() : - log(QPDFLogger::defaultLogger()) + Members(QPDFJob& job) : + log(QPDFLogger::defaultLogger()), + inputs(job) { } Members(Members const&) = delete; @@ -116,7 +135,6 @@ class QPDFJob::Members private: // These default values are duplicated in help and docs. - static int constexpr DEFAULT_KEEP_FILES_OPEN_THRESHOLD = 200; static int constexpr DEFAULT_OI_MIN_WIDTH = 128; static int constexpr DEFAULT_OI_MIN_HEIGHT = 128; static int constexpr DEFAULT_OI_MIN_AREA = 16384; @@ -137,8 +155,6 @@ class QPDFJob::Members bool suppress_warnings{false}; bool warnings_exit_zero{false}; bool copy_encryption{false}; - std::string encryption_file; - std::string encryption_file_password; bool encrypt{false}; bool password_is_hex_key{false}; bool suppress_password_recovery{false}; @@ -182,9 +198,6 @@ class QPDFJob::Members bool qdf_mode{false}; bool preserve_unreferenced_objects{false}; remove_unref_e remove_unreferenced_page_resources{re_auto}; - bool keep_files_open{true}; - bool keep_files_open_set{false}; - size_t keep_files_open_threshold{DEFAULT_KEEP_FILES_OPEN_THRESHOLD}; bool newline_before_endstream{false}; std::string linearize_pass1; bool coalesce_contents{false};