Commit 7fa74c4f03ae285c2449a061b3da1a34b1e68eca

Authored by m-holger
1 parent 980df238

Refactor `QPDFJob::handlePageSpecs`: introduce `Inputs` and `Input` to replace `…

…page_heap` and `page_spec_qpdfs`, simplify file handling, and improve code clarity.
include/qpdf/QPDFJob.hh
@@ -417,6 +417,8 @@ class QPDFJob @@ -417,6 +417,8 @@ class QPDFJob
417 private: 417 private:
418 struct PageNo; 418 struct PageNo;
419 struct Selection; 419 struct Selection;
  420 + struct Input;
  421 + struct Inputs;
420 struct RotationSpec; 422 struct RotationSpec;
421 struct UnderOverlay; 423 struct UnderOverlay;
422 struct PageLabelSpec; 424 struct PageLabelSpec;
libqpdf/QPDFJob.cc
@@ -2346,7 +2346,7 @@ QPDFJob::new_selection( @@ -2346,7 +2346,7 @@ QPDFJob::new_selection(
2346 bool 2346 bool
2347 QPDFJob::handlePageSpecs(QPDF& pdf) 2347 QPDFJob::handlePageSpecs(QPDF& pdf)
2348 { 2348 {
2349 - std::vector<std::unique_ptr<QPDF>> page_heap; 2349 + m->inputs.files[m->infilename].qpdf = &pdf;
2350 2350
2351 // Parse all page specifications and translate them into lists of actual pages. 2351 // Parse all page specifications and translate them into lists of actual pages.
2352 2352
@@ -2354,6 +2354,9 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf) @@ -2354,6 +2354,9 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2354 for (auto& selection: m->selections) { 2354 for (auto& selection: m->selections) {
2355 if (selection.filename == ".") { 2355 if (selection.filename == ".") {
2356 selection.filename = m->infilename; 2356 selection.filename = m->infilename;
  2357 + } else {
  2358 + // Force insertion
  2359 + (void)m->inputs.files[selection.filename];
2357 } 2360 }
2358 if (selection.range.empty()) { 2361 if (selection.range.empty()) {
2359 selection.range = "1-z"; 2362 selection.range = "1-z";
@@ -2364,11 +2367,7 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf) @@ -2364,11 +2367,7 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2364 // Count the number of distinct files to determine whether we should keep files open or not. 2367 // Count the number of distinct files to determine whether we should keep files open or not.
2365 // Rather than trying to code some portable heuristic based on OS limits, just hard-code 2368 // Rather than trying to code some portable heuristic based on OS limits, just hard-code
2366 // this at a given number and allow users to override. 2369 // this at a given number and allow users to override.
2367 - std::set<std::string> filenames;  
2368 - for (auto& selection: m->selections) {  
2369 - filenames.insert(selection.filename);  
2370 - }  
2371 - m->keep_files_open = (filenames.size() <= m->keep_files_open_threshold); 2370 + m->keep_files_open = m->inputs.files.size() <= m->keep_files_open_threshold;
2372 QTC::TC("qpdf", "QPDFJob automatically set keep files open", m->keep_files_open ? 0 : 1); 2371 QTC::TC("qpdf", "QPDFJob automatically set keep files open", m->keep_files_open ? 0 : 1);
2373 doIfVerbose([&](Pipeline& v, std::string const& prefix) { 2372 doIfVerbose([&](Pipeline& v, std::string const& prefix) {
2374 v << prefix << ": selecting --keep-open-files=" << (m->keep_files_open ? "y" : "n") 2373 v << prefix << ": selecting --keep-open-files=" << (m->keep_files_open ? "y" : "n")
@@ -2377,12 +2376,11 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf) @@ -2377,12 +2376,11 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2377 } 2376 }
2378 2377
2379 // Create a QPDF object for each file that we may take pages from. 2378 // Create a QPDF object for each file that we may take pages from.
2380 - std::map<std::string, QPDF*> page_spec_qpdfs;  
2381 std::map<std::string, ClosedFileInputSource*> page_spec_cfis; 2379 std::map<std::string, ClosedFileInputSource*> page_spec_cfis;
2382 - page_spec_qpdfs[m->infilename] = &pdf;  
2383 std::map<unsigned long long, std::set<QPDFObjGen>> copied_pages; 2380 std::map<unsigned long long, std::set<QPDFObjGen>> copied_pages;
2384 for (auto& selection: m->selections) { 2381 for (auto& selection: m->selections) {
2385 - if (!page_spec_qpdfs.contains(selection.filename)) { 2382 + auto& input = m->inputs.files[selection.filename];
  2383 + if (!input.qpdf) {
2386 // Open the PDF file and store the QPDF object. Throw a std::shared_ptr to the qpdf into 2384 // Open the PDF file and store the QPDF object. Throw a std::shared_ptr to the qpdf into
2387 // a heap so that it survives through copying to the output but gets cleaned up 2385 // a heap so that it survives through copying to the output but gets cleaned up
2388 // automatically at the end. Do not canonicalize the file name. Using two different 2386 // automatically at the end. Do not canonicalize the file name. Using two different
@@ -2408,10 +2406,8 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf) @@ -2408,10 +2406,8 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2408 FileInputSource* fis = new FileInputSource(selection.filename.data()); 2406 FileInputSource* fis = new FileInputSource(selection.filename.data());
2409 is = std::shared_ptr<InputSource>(fis); 2407 is = std::shared_ptr<InputSource>(fis);
2410 } 2408 }
2411 - std::unique_ptr<QPDF> qpdf_sp;  
2412 - processInputSource(qpdf_sp, is, password.data(), true);  
2413 - page_spec_qpdfs[selection.filename] = qpdf_sp.get();  
2414 - page_heap.push_back(std::move(qpdf_sp)); 2409 + processInputSource(input.qpdf_p, is, password.data(), true);
  2410 + input.qpdf = input.qpdf_p.get();
2415 if (cis) { 2411 if (cis) {
2416 cis->stayOpen(false); 2412 cis->stayOpen(false);
2417 page_spec_cfis[selection.filename] = cis; 2413 page_spec_cfis[selection.filename] = cis;
@@ -2420,7 +2416,7 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf) @@ -2420,7 +2416,7 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2420 2416
2421 // Read original pages from the PDF, and parse the page range associated with this 2417 // Read original pages from the PDF, and parse the page range associated with this
2422 // occurrence of the file. 2418 // occurrence of the file.
2423 - selection.qpdf = page_spec_qpdfs[selection.filename]; 2419 + selection.qpdf = m->inputs.files[selection.filename].qpdf;
2424 selection.orig_pages = selection.qpdf->getAllPages(); 2420 selection.orig_pages = selection.qpdf->getAllPages();
2425 try { 2421 try {
2426 selection.selected_pages = QUtil::parse_numrange( 2422 selection.selected_pages = QUtil::parse_numrange(
@@ -2433,14 +2429,13 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf) @@ -2433,14 +2429,13 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2433 2429
2434 std::map<unsigned long long, bool> remove_unreferenced; 2430 std::map<unsigned long long, bool> remove_unreferenced;
2435 if (m->remove_unreferenced_page_resources != QPDFJob::re_no) { 2431 if (m->remove_unreferenced_page_resources != QPDFJob::re_no) {
2436 - for (auto const& iter: page_spec_qpdfs) {  
2437 - std::string const& filename = iter.first; 2432 + for (auto const& [filename, input]: m->inputs.files) {
2438 ClosedFileInputSource* cis = nullptr; 2433 ClosedFileInputSource* cis = nullptr;
2439 if (page_spec_cfis.contains(filename)) { 2434 if (page_spec_cfis.contains(filename)) {
2440 cis = page_spec_cfis[filename]; 2435 cis = page_spec_cfis[filename];
2441 cis->stayOpen(true); 2436 cis->stayOpen(true);
2442 } 2437 }
2443 - QPDF& other(*(iter.second)); 2438 + QPDF& other(*input.qpdf);
2444 auto other_uuid = other.getUniqueId(); 2439 auto other_uuid = other.getUniqueId();
2445 if (!remove_unreferenced.contains(other_uuid)) { 2440 if (!remove_unreferenced.contains(other_uuid)) {
2446 remove_unreferenced[other_uuid] = shouldRemoveUnreferencedResources(other); 2441 remove_unreferenced[other_uuid] = shouldRemoveUnreferencedResources(other);
@@ -2618,9 +2613,11 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf) @@ -2618,9 +2613,11 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2618 } 2613 }
2619 } 2614 }
2620 } 2615 }
2621 - for (auto& foreign: page_heap) {  
2622 - if (foreign->anyWarnings()) {  
2623 - return false; 2616 + for (auto& foreign: m->inputs.files) {
  2617 + if (foreign.second.qpdf_p) { // exclude main input
  2618 + if (foreign.second.qpdf->anyWarnings()) {
  2619 + return false;
  2620 + }
2624 } 2621 }
2625 } 2622 }
2626 return true; 2623 return true;
libqpdf/qpdf/QPDFJob_private.hh
@@ -34,6 +34,22 @@ struct QPDFJob::Selection @@ -34,6 +34,22 @@ struct QPDFJob::Selection
34 std::vector<int> selected_pages; 34 std::vector<int> selected_pages;
35 }; 35 };
36 36
  37 +// A single input PDF.
  38 +//
  39 +// N.B. A single input PDF may be represented by multiple Input instances using variations of the
  40 +// filename. This is a documented work-around.
  41 +struct QPDFJob::Input
  42 +{
  43 + std::unique_ptr<QPDF> qpdf_p;
  44 + QPDF* qpdf;
  45 +};
  46 +
  47 +// All PDF input files for a job.
  48 +struct QPDFJob::Inputs
  49 +{
  50 + std::map<std::string, Input> files;
  51 +};
  52 +
37 struct QPDFJob::RotationSpec 53 struct QPDFJob::RotationSpec
38 { 54 {
39 RotationSpec(int angle = 0, bool relative = false) : 55 RotationSpec(int angle = 0, bool relative = false) :
@@ -223,6 +239,7 @@ class QPDFJob::Members @@ -223,6 +239,7 @@ class QPDFJob::Members
223 std::vector<UnderOverlay> underlay; 239 std::vector<UnderOverlay> underlay;
224 std::vector<UnderOverlay> overlay; 240 std::vector<UnderOverlay> overlay;
225 UnderOverlay* under_overlay{nullptr}; 241 UnderOverlay* under_overlay{nullptr};
  242 + Inputs inputs;
226 std::vector<Selection> selections; 243 std::vector<Selection> selections;
227 std::map<std::string, RotationSpec> rotations; 244 std::map<std::string, RotationSpec> rotations;
228 bool require_outfile{true}; 245 bool require_outfile{true};