From 7fa74c4f03ae285c2449a061b3da1a34b1e68eca Mon Sep 17 00:00:00 2001 From: m-holger Date: Mon, 15 Sep 2025 01:00:01 +0100 Subject: [PATCH] Refactor `QPDFJob::handlePageSpecs`: introduce `Inputs` and `Input` to replace `page_heap` and `page_spec_qpdfs`, simplify file handling, and improve code clarity. --- include/qpdf/QPDFJob.hh | 2 ++ libqpdf/QPDFJob.cc | 37 +++++++++++++++++-------------------- libqpdf/qpdf/QPDFJob_private.hh | 17 +++++++++++++++++ 3 files changed, 36 insertions(+), 20 deletions(-) diff --git a/include/qpdf/QPDFJob.hh b/include/qpdf/QPDFJob.hh index c1e2cf7..9dfe876 100644 --- a/include/qpdf/QPDFJob.hh +++ b/include/qpdf/QPDFJob.hh @@ -417,6 +417,8 @@ class QPDFJob private: struct PageNo; struct Selection; + struct Input; + struct Inputs; struct RotationSpec; struct UnderOverlay; struct PageLabelSpec; diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index fb7491f..67cc670 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -2346,7 +2346,7 @@ QPDFJob::new_selection( bool QPDFJob::handlePageSpecs(QPDF& pdf) { - std::vector> page_heap; + m->inputs.files[m->infilename].qpdf = &pdf; // Parse all page specifications and translate them into lists of actual pages. @@ -2354,6 +2354,9 @@ QPDFJob::handlePageSpecs(QPDF& pdf) for (auto& selection: m->selections) { if (selection.filename == ".") { selection.filename = m->infilename; + } else { + // Force insertion + (void)m->inputs.files[selection.filename]; } if (selection.range.empty()) { selection.range = "1-z"; @@ -2364,11 +2367,7 @@ QPDFJob::handlePageSpecs(QPDF& pdf) // Count the number of distinct files to determine whether we should keep files open or not. // Rather than trying to code some portable heuristic based on OS limits, just hard-code // this at a given number and allow users to override. - std::set filenames; - for (auto& selection: m->selections) { - filenames.insert(selection.filename); - } - m->keep_files_open = (filenames.size() <= m->keep_files_open_threshold); + m->keep_files_open = m->inputs.files.size() <= m->keep_files_open_threshold; QTC::TC("qpdf", "QPDFJob automatically set keep files open", m->keep_files_open ? 0 : 1); doIfVerbose([&](Pipeline& v, std::string const& prefix) { v << prefix << ": selecting --keep-open-files=" << (m->keep_files_open ? "y" : "n") @@ -2377,12 +2376,11 @@ QPDFJob::handlePageSpecs(QPDF& pdf) } // Create a QPDF object for each file that we may take pages from. - std::map page_spec_qpdfs; std::map page_spec_cfis; - page_spec_qpdfs[m->infilename] = &pdf; std::map> copied_pages; for (auto& selection: m->selections) { - if (!page_spec_qpdfs.contains(selection.filename)) { + auto& input = m->inputs.files[selection.filename]; + if (!input.qpdf) { // Open the PDF file and store the QPDF object. Throw a std::shared_ptr to the qpdf into // a heap so that it survives through copying to the output but gets cleaned up // automatically at the end. Do not canonicalize the file name. Using two different @@ -2408,10 +2406,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf) FileInputSource* fis = new FileInputSource(selection.filename.data()); is = std::shared_ptr(fis); } - std::unique_ptr qpdf_sp; - processInputSource(qpdf_sp, is, password.data(), true); - page_spec_qpdfs[selection.filename] = qpdf_sp.get(); - page_heap.push_back(std::move(qpdf_sp)); + processInputSource(input.qpdf_p, is, password.data(), true); + input.qpdf = input.qpdf_p.get(); if (cis) { cis->stayOpen(false); page_spec_cfis[selection.filename] = cis; @@ -2420,7 +2416,7 @@ QPDFJob::handlePageSpecs(QPDF& pdf) // Read original pages from the PDF, and parse the page range associated with this // occurrence of the file. - selection.qpdf = page_spec_qpdfs[selection.filename]; + selection.qpdf = m->inputs.files[selection.filename].qpdf; selection.orig_pages = selection.qpdf->getAllPages(); try { selection.selected_pages = QUtil::parse_numrange( @@ -2433,14 +2429,13 @@ QPDFJob::handlePageSpecs(QPDF& pdf) std::map remove_unreferenced; if (m->remove_unreferenced_page_resources != QPDFJob::re_no) { - for (auto const& iter: page_spec_qpdfs) { - std::string const& filename = iter.first; + for (auto const& [filename, input]: m->inputs.files) { ClosedFileInputSource* cis = nullptr; if (page_spec_cfis.contains(filename)) { cis = page_spec_cfis[filename]; cis->stayOpen(true); } - QPDF& other(*(iter.second)); + QPDF& other(*input.qpdf); auto other_uuid = other.getUniqueId(); if (!remove_unreferenced.contains(other_uuid)) { remove_unreferenced[other_uuid] = shouldRemoveUnreferencedResources(other); @@ -2618,9 +2613,11 @@ QPDFJob::handlePageSpecs(QPDF& pdf) } } } - for (auto& foreign: page_heap) { - if (foreign->anyWarnings()) { - return false; + for (auto& foreign: m->inputs.files) { + if (foreign.second.qpdf_p) { // exclude main input + if (foreign.second.qpdf->anyWarnings()) { + return false; + } } } return true; diff --git a/libqpdf/qpdf/QPDFJob_private.hh b/libqpdf/qpdf/QPDFJob_private.hh index b23a18a..610babe 100644 --- a/libqpdf/qpdf/QPDFJob_private.hh +++ b/libqpdf/qpdf/QPDFJob_private.hh @@ -34,6 +34,22 @@ struct QPDFJob::Selection std::vector selected_pages; }; +// A single input PDF. +// +// N.B. A single input PDF may be represented by multiple Input instances using variations of the +// filename. This is a documented work-around. +struct QPDFJob::Input +{ + std::unique_ptr qpdf_p; + QPDF* qpdf; +}; + +// All PDF input files for a job. +struct QPDFJob::Inputs +{ + std::map files; +}; + struct QPDFJob::RotationSpec { RotationSpec(int angle = 0, bool relative = false) : @@ -223,6 +239,7 @@ class QPDFJob::Members std::vector underlay; std::vector overlay; UnderOverlay* under_overlay{nullptr}; + Inputs inputs; std::vector selections; std::map rotations; bool require_outfile{true}; -- libgit2 0.21.4