Commit 7fa74c4f03ae285c2449a061b3da1a34b1e68eca

Authored by m-holger
1 parent 980df238

Refactor `QPDFJob::handlePageSpecs`: introduce `Inputs` and `Input` to replace `…

…page_heap` and `page_spec_qpdfs`, simplify file handling, and improve code clarity.
include/qpdf/QPDFJob.hh
... ... @@ -417,6 +417,8 @@ class QPDFJob
417 417 private:
418 418 struct PageNo;
419 419 struct Selection;
  420 + struct Input;
  421 + struct Inputs;
420 422 struct RotationSpec;
421 423 struct UnderOverlay;
422 424 struct PageLabelSpec;
... ...
libqpdf/QPDFJob.cc
... ... @@ -2346,7 +2346,7 @@ QPDFJob::new_selection(
2346 2346 bool
2347 2347 QPDFJob::handlePageSpecs(QPDF& pdf)
2348 2348 {
2349   - std::vector<std::unique_ptr<QPDF>> page_heap;
  2349 + m->inputs.files[m->infilename].qpdf = &pdf;
2350 2350  
2351 2351 // Parse all page specifications and translate them into lists of actual pages.
2352 2352  
... ... @@ -2354,6 +2354,9 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2354 2354 for (auto& selection: m->selections) {
2355 2355 if (selection.filename == ".") {
2356 2356 selection.filename = m->infilename;
  2357 + } else {
  2358 + // Force insertion
  2359 + (void)m->inputs.files[selection.filename];
2357 2360 }
2358 2361 if (selection.range.empty()) {
2359 2362 selection.range = "1-z";
... ... @@ -2364,11 +2367,7 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2364 2367 // Count the number of distinct files to determine whether we should keep files open or not.
2365 2368 // Rather than trying to code some portable heuristic based on OS limits, just hard-code
2366 2369 // this at a given number and allow users to override.
2367   - std::set<std::string> filenames;
2368   - for (auto& selection: m->selections) {
2369   - filenames.insert(selection.filename);
2370   - }
2371   - m->keep_files_open = (filenames.size() <= m->keep_files_open_threshold);
  2370 + m->keep_files_open = m->inputs.files.size() <= m->keep_files_open_threshold;
2372 2371 QTC::TC("qpdf", "QPDFJob automatically set keep files open", m->keep_files_open ? 0 : 1);
2373 2372 doIfVerbose([&](Pipeline& v, std::string const& prefix) {
2374 2373 v << prefix << ": selecting --keep-open-files=" << (m->keep_files_open ? "y" : "n")
... ... @@ -2377,12 +2376,11 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2377 2376 }
2378 2377  
2379 2378 // Create a QPDF object for each file that we may take pages from.
2380   - std::map<std::string, QPDF*> page_spec_qpdfs;
2381 2379 std::map<std::string, ClosedFileInputSource*> page_spec_cfis;
2382   - page_spec_qpdfs[m->infilename] = &pdf;
2383 2380 std::map<unsigned long long, std::set<QPDFObjGen>> copied_pages;
2384 2381 for (auto& selection: m->selections) {
2385   - if (!page_spec_qpdfs.contains(selection.filename)) {
  2382 + auto& input = m->inputs.files[selection.filename];
  2383 + if (!input.qpdf) {
2386 2384 // Open the PDF file and store the QPDF object. Throw a std::shared_ptr to the qpdf into
2387 2385 // a heap so that it survives through copying to the output but gets cleaned up
2388 2386 // automatically at the end. Do not canonicalize the file name. Using two different
... ... @@ -2408,10 +2406,8 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2408 2406 FileInputSource* fis = new FileInputSource(selection.filename.data());
2409 2407 is = std::shared_ptr<InputSource>(fis);
2410 2408 }
2411   - std::unique_ptr<QPDF> qpdf_sp;
2412   - processInputSource(qpdf_sp, is, password.data(), true);
2413   - page_spec_qpdfs[selection.filename] = qpdf_sp.get();
2414   - page_heap.push_back(std::move(qpdf_sp));
  2409 + processInputSource(input.qpdf_p, is, password.data(), true);
  2410 + input.qpdf = input.qpdf_p.get();
2415 2411 if (cis) {
2416 2412 cis->stayOpen(false);
2417 2413 page_spec_cfis[selection.filename] = cis;
... ... @@ -2420,7 +2416,7 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2420 2416  
2421 2417 // Read original pages from the PDF, and parse the page range associated with this
2422 2418 // occurrence of the file.
2423   - selection.qpdf = page_spec_qpdfs[selection.filename];
  2419 + selection.qpdf = m->inputs.files[selection.filename].qpdf;
2424 2420 selection.orig_pages = selection.qpdf->getAllPages();
2425 2421 try {
2426 2422 selection.selected_pages = QUtil::parse_numrange(
... ... @@ -2433,14 +2429,13 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2433 2429  
2434 2430 std::map<unsigned long long, bool> remove_unreferenced;
2435 2431 if (m->remove_unreferenced_page_resources != QPDFJob::re_no) {
2436   - for (auto const& iter: page_spec_qpdfs) {
2437   - std::string const& filename = iter.first;
  2432 + for (auto const& [filename, input]: m->inputs.files) {
2438 2433 ClosedFileInputSource* cis = nullptr;
2439 2434 if (page_spec_cfis.contains(filename)) {
2440 2435 cis = page_spec_cfis[filename];
2441 2436 cis->stayOpen(true);
2442 2437 }
2443   - QPDF& other(*(iter.second));
  2438 + QPDF& other(*input.qpdf);
2444 2439 auto other_uuid = other.getUniqueId();
2445 2440 if (!remove_unreferenced.contains(other_uuid)) {
2446 2441 remove_unreferenced[other_uuid] = shouldRemoveUnreferencedResources(other);
... ... @@ -2618,9 +2613,11 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2618 2613 }
2619 2614 }
2620 2615 }
2621   - for (auto& foreign: page_heap) {
2622   - if (foreign->anyWarnings()) {
2623   - return false;
  2616 + for (auto& foreign: m->inputs.files) {
  2617 + if (foreign.second.qpdf_p) { // exclude main input
  2618 + if (foreign.second.qpdf->anyWarnings()) {
  2619 + return false;
  2620 + }
2624 2621 }
2625 2622 }
2626 2623 return true;
... ...
libqpdf/qpdf/QPDFJob_private.hh
... ... @@ -34,6 +34,22 @@ struct QPDFJob::Selection
34 34 std::vector<int> selected_pages;
35 35 };
36 36  
  37 +// A single input PDF.
  38 +//
  39 +// N.B. A single input PDF may be represented by multiple Input instances using variations of the
  40 +// filename. This is a documented work-around.
  41 +struct QPDFJob::Input
  42 +{
  43 + std::unique_ptr<QPDF> qpdf_p;
  44 + QPDF* qpdf;
  45 +};
  46 +
  47 +// All PDF input files for a job.
  48 +struct QPDFJob::Inputs
  49 +{
  50 + std::map<std::string, Input> files;
  51 +};
  52 +
37 53 struct QPDFJob::RotationSpec
38 54 {
39 55 RotationSpec(int angle = 0, bool relative = false) :
... ... @@ -223,6 +239,7 @@ class QPDFJob::Members
223 239 std::vector<UnderOverlay> underlay;
224 240 std::vector<UnderOverlay> overlay;
225 241 UnderOverlay* under_overlay{nullptr};
  242 + Inputs inputs;
226 243 std::vector<Selection> selections;
227 244 std::map<std::string, RotationSpec> rotations;
228 245 bool require_outfile{true};
... ...