Commit 7fa74c4f03ae285c2449a061b3da1a34b1e68eca
1 parent
980df238
Refactor `QPDFJob::handlePageSpecs`: introduce `Inputs` and `Input` to replace `…
…page_heap` and `page_spec_qpdfs`, simplify file handling, and improve code clarity.
Showing
3 changed files
with
36 additions
and
20 deletions
include/qpdf/QPDFJob.hh
libqpdf/QPDFJob.cc
| ... | ... | @@ -2346,7 +2346,7 @@ QPDFJob::new_selection( |
| 2346 | 2346 | bool |
| 2347 | 2347 | QPDFJob::handlePageSpecs(QPDF& pdf) |
| 2348 | 2348 | { |
| 2349 | - std::vector<std::unique_ptr<QPDF>> page_heap; | |
| 2349 | + m->inputs.files[m->infilename].qpdf = &pdf; | |
| 2350 | 2350 | |
| 2351 | 2351 | // Parse all page specifications and translate them into lists of actual pages. |
| 2352 | 2352 | |
| ... | ... | @@ -2354,6 +2354,9 @@ QPDFJob::handlePageSpecs(QPDF& pdf) |
| 2354 | 2354 | for (auto& selection: m->selections) { |
| 2355 | 2355 | if (selection.filename == ".") { |
| 2356 | 2356 | selection.filename = m->infilename; |
| 2357 | + } else { | |
| 2358 | + // Force insertion | |
| 2359 | + (void)m->inputs.files[selection.filename]; | |
| 2357 | 2360 | } |
| 2358 | 2361 | if (selection.range.empty()) { |
| 2359 | 2362 | selection.range = "1-z"; |
| ... | ... | @@ -2364,11 +2367,7 @@ QPDFJob::handlePageSpecs(QPDF& pdf) |
| 2364 | 2367 | // Count the number of distinct files to determine whether we should keep files open or not. |
| 2365 | 2368 | // Rather than trying to code some portable heuristic based on OS limits, just hard-code |
| 2366 | 2369 | // this at a given number and allow users to override. |
| 2367 | - std::set<std::string> filenames; | |
| 2368 | - for (auto& selection: m->selections) { | |
| 2369 | - filenames.insert(selection.filename); | |
| 2370 | - } | |
| 2371 | - m->keep_files_open = (filenames.size() <= m->keep_files_open_threshold); | |
| 2370 | + m->keep_files_open = m->inputs.files.size() <= m->keep_files_open_threshold; | |
| 2372 | 2371 | QTC::TC("qpdf", "QPDFJob automatically set keep files open", m->keep_files_open ? 0 : 1); |
| 2373 | 2372 | doIfVerbose([&](Pipeline& v, std::string const& prefix) { |
| 2374 | 2373 | v << prefix << ": selecting --keep-open-files=" << (m->keep_files_open ? "y" : "n") |
| ... | ... | @@ -2377,12 +2376,11 @@ QPDFJob::handlePageSpecs(QPDF& pdf) |
| 2377 | 2376 | } |
| 2378 | 2377 | |
| 2379 | 2378 | // Create a QPDF object for each file that we may take pages from. |
| 2380 | - std::map<std::string, QPDF*> page_spec_qpdfs; | |
| 2381 | 2379 | std::map<std::string, ClosedFileInputSource*> page_spec_cfis; |
| 2382 | - page_spec_qpdfs[m->infilename] = &pdf; | |
| 2383 | 2380 | std::map<unsigned long long, std::set<QPDFObjGen>> copied_pages; |
| 2384 | 2381 | for (auto& selection: m->selections) { |
| 2385 | - if (!page_spec_qpdfs.contains(selection.filename)) { | |
| 2382 | + auto& input = m->inputs.files[selection.filename]; | |
| 2383 | + if (!input.qpdf) { | |
| 2386 | 2384 | // Open the PDF file and store the QPDF object. Throw a std::shared_ptr to the qpdf into |
| 2387 | 2385 | // a heap so that it survives through copying to the output but gets cleaned up |
| 2388 | 2386 | // automatically at the end. Do not canonicalize the file name. Using two different |
| ... | ... | @@ -2408,10 +2406,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf) |
| 2408 | 2406 | FileInputSource* fis = new FileInputSource(selection.filename.data()); |
| 2409 | 2407 | is = std::shared_ptr<InputSource>(fis); |
| 2410 | 2408 | } |
| 2411 | - std::unique_ptr<QPDF> qpdf_sp; | |
| 2412 | - processInputSource(qpdf_sp, is, password.data(), true); | |
| 2413 | - page_spec_qpdfs[selection.filename] = qpdf_sp.get(); | |
| 2414 | - page_heap.push_back(std::move(qpdf_sp)); | |
| 2409 | + processInputSource(input.qpdf_p, is, password.data(), true); | |
| 2410 | + input.qpdf = input.qpdf_p.get(); | |
| 2415 | 2411 | if (cis) { |
| 2416 | 2412 | cis->stayOpen(false); |
| 2417 | 2413 | page_spec_cfis[selection.filename] = cis; |
| ... | ... | @@ -2420,7 +2416,7 @@ QPDFJob::handlePageSpecs(QPDF& pdf) |
| 2420 | 2416 | |
| 2421 | 2417 | // Read original pages from the PDF, and parse the page range associated with this |
| 2422 | 2418 | // occurrence of the file. |
| 2423 | - selection.qpdf = page_spec_qpdfs[selection.filename]; | |
| 2419 | + selection.qpdf = m->inputs.files[selection.filename].qpdf; | |
| 2424 | 2420 | selection.orig_pages = selection.qpdf->getAllPages(); |
| 2425 | 2421 | try { |
| 2426 | 2422 | selection.selected_pages = QUtil::parse_numrange( |
| ... | ... | @@ -2433,14 +2429,13 @@ QPDFJob::handlePageSpecs(QPDF& pdf) |
| 2433 | 2429 | |
| 2434 | 2430 | std::map<unsigned long long, bool> remove_unreferenced; |
| 2435 | 2431 | if (m->remove_unreferenced_page_resources != QPDFJob::re_no) { |
| 2436 | - for (auto const& iter: page_spec_qpdfs) { | |
| 2437 | - std::string const& filename = iter.first; | |
| 2432 | + for (auto const& [filename, input]: m->inputs.files) { | |
| 2438 | 2433 | ClosedFileInputSource* cis = nullptr; |
| 2439 | 2434 | if (page_spec_cfis.contains(filename)) { |
| 2440 | 2435 | cis = page_spec_cfis[filename]; |
| 2441 | 2436 | cis->stayOpen(true); |
| 2442 | 2437 | } |
| 2443 | - QPDF& other(*(iter.second)); | |
| 2438 | + QPDF& other(*input.qpdf); | |
| 2444 | 2439 | auto other_uuid = other.getUniqueId(); |
| 2445 | 2440 | if (!remove_unreferenced.contains(other_uuid)) { |
| 2446 | 2441 | remove_unreferenced[other_uuid] = shouldRemoveUnreferencedResources(other); |
| ... | ... | @@ -2618,9 +2613,11 @@ QPDFJob::handlePageSpecs(QPDF& pdf) |
| 2618 | 2613 | } |
| 2619 | 2614 | } |
| 2620 | 2615 | } |
| 2621 | - for (auto& foreign: page_heap) { | |
| 2622 | - if (foreign->anyWarnings()) { | |
| 2623 | - return false; | |
| 2616 | + for (auto& foreign: m->inputs.files) { | |
| 2617 | + if (foreign.second.qpdf_p) { // exclude main input | |
| 2618 | + if (foreign.second.qpdf->anyWarnings()) { | |
| 2619 | + return false; | |
| 2620 | + } | |
| 2624 | 2621 | } |
| 2625 | 2622 | } |
| 2626 | 2623 | return true; | ... | ... |
libqpdf/qpdf/QPDFJob_private.hh
| ... | ... | @@ -34,6 +34,22 @@ struct QPDFJob::Selection |
| 34 | 34 | std::vector<int> selected_pages; |
| 35 | 35 | }; |
| 36 | 36 | |
| 37 | +// A single input PDF. | |
| 38 | +// | |
| 39 | +// N.B. A single input PDF may be represented by multiple Input instances using variations of the | |
| 40 | +// filename. This is a documented work-around. | |
| 41 | +struct QPDFJob::Input | |
| 42 | +{ | |
| 43 | + std::unique_ptr<QPDF> qpdf_p; | |
| 44 | + QPDF* qpdf; | |
| 45 | +}; | |
| 46 | + | |
| 47 | +// All PDF input files for a job. | |
| 48 | +struct QPDFJob::Inputs | |
| 49 | +{ | |
| 50 | + std::map<std::string, Input> files; | |
| 51 | +}; | |
| 52 | + | |
| 37 | 53 | struct QPDFJob::RotationSpec |
| 38 | 54 | { |
| 39 | 55 | RotationSpec(int angle = 0, bool relative = false) : |
| ... | ... | @@ -223,6 +239,7 @@ class QPDFJob::Members |
| 223 | 239 | std::vector<UnderOverlay> underlay; |
| 224 | 240 | std::vector<UnderOverlay> overlay; |
| 225 | 241 | UnderOverlay* under_overlay{nullptr}; |
| 242 | + Inputs inputs; | |
| 226 | 243 | std::vector<Selection> selections; |
| 227 | 244 | std::map<std::string, RotationSpec> rotations; |
| 228 | 245 | bool require_outfile{true}; | ... | ... |