Commit 92b29095f11072dd00493b62c98d5cb82b34128b

Authored by m-holger
1 parent cb5a5f4e

Refactor `QPDFJob::handlePageSpecs`: replace `selected_from_orig` and `copied_pa…

…ges` sets with vector<bool> `Input::copied_pages`, centralize page tracking logic, adjust page processing, and simplify resource management.
libqpdf/QPDFJob.cc
@@ -2338,7 +2338,8 @@ QPDFJob::Input::initialize(Inputs&amp; in, QPDF* a_qpdf) @@ -2338,7 +2338,8 @@ QPDFJob::Input::initialize(Inputs&amp; in, QPDF* a_qpdf)
2338 qpdf = a_qpdf ? a_qpdf : qpdf_p.get(); 2338 qpdf = a_qpdf ? a_qpdf : qpdf_p.get();
2339 if (qpdf) { 2339 if (qpdf) {
2340 orig_pages = qpdf->getAllPages(); 2340 orig_pages = qpdf->getAllPages();
2341 - n_pages = QIntC::to_int(orig_pages.size()); 2341 + n_pages = static_cast<int>(orig_pages.size());
  2342 + copied_pages = std::vector<bool>(orig_pages.size(), false);
2342 2343
2343 if (in.job.m->remove_unreferenced_page_resources != QPDFJob::re_no) { 2344 if (in.job.m->remove_unreferenced_page_resources != QPDFJob::re_no) {
2344 remove_unreferenced = in.job.shouldRemoveUnreferencedResources(*qpdf); 2345 remove_unreferenced = in.job.shouldRemoveUnreferencedResources(*qpdf);
@@ -2368,7 +2369,6 @@ QPDFJob::Inputs::infile_name(std::string const&amp; name) @@ -2368,7 +2369,6 @@ QPDFJob::Inputs::infile_name(std::string const&amp; name)
2368 } 2369 }
2369 } 2370 }
2370 2371
2371 -  
2372 void 2372 void
2373 QPDFJob::Inputs::process(std::string const& filename, QPDFJob::Input& input) 2373 QPDFJob::Inputs::process(std::string const& filename, QPDFJob::Input& input)
2374 { 2374 {
@@ -2522,8 +2522,6 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf) @@ -2522,8 +2522,6 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2522 // Parse all section and translate them into lists of actual pages. 2522 // Parse all section and translate them into lists of actual pages.
2523 m->inputs.process_all(); 2523 m->inputs.process_all();
2524 2524
2525 - std::map<unsigned long long, std::set<QPDFObjGen>> copied_pages;  
2526 -  
2527 // Clear all pages out of the primary QPDF's pages tree but leave the objects in place in the 2525 // Clear all pages out of the primary QPDF's pages tree but leave the objects in place in the
2528 // file so they can be re-added without changing their object numbers. This enables other things 2526 // file so they can be re-added without changing their object numbers. This enables other things
2529 // in the original file, such as outlines, to continue to work. 2527 // in the original file, such as outlines, to continue to work.
@@ -2571,7 +2569,6 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf) @@ -2571,7 +2569,6 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2571 2569
2572 // Add all the pages from all the files in the order specified. Keep track of any pages from the 2570 // Add all the pages from all the files in the order specified. Keep track of any pages from the
2573 // original file that we are selecting. 2571 // original file that we are selecting.
2574 - std::set<int> selected_from_orig;  
2575 std::vector<QPDFObjectHandle> new_labels; 2572 std::vector<QPDFObjectHandle> new_labels;
2576 bool any_page_labels = false; 2573 bool any_page_labels = false;
2577 int out_pageno = 0; 2574 int out_pageno = 0;
@@ -2590,32 +2587,25 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf) @@ -2590,32 +2587,25 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2590 doIfVerbose([&](Pipeline& v, std::string const& prefix) { 2587 doIfVerbose([&](Pipeline& v, std::string const& prefix) {
2591 v << prefix << ": adding pages from " << selection.filename() << "\n"; 2588 v << prefix << ": adding pages from " << selection.filename() << "\n";
2592 }); 2589 });
2593 - for (auto pageno_iter: selection.selected_pages) { 2590 + for (PageNo page: selection.selected_pages) {
  2591 + const bool this_file = input.qpdf == &pdf;
  2592 + bool first_copy_from_orig = this_file && !main_input.copied_pages[page.idx];
  2593 +
2594 // Pages are specified from 1 but numbered from 0 in the vector 2594 // Pages are specified from 1 but numbered from 0 in the vector
2595 - int pageno = pageno_iter - 1; 2595 + int pageno = page.no - 1;
2596 pldh.getLabelsForPageRange(pageno, pageno, out_pageno++, new_labels); 2596 pldh.getLabelsForPageRange(pageno, pageno, out_pageno++, new_labels);
2597 - QPDFPageObjectHelper to_copy = input.orig_pages.at(QIntC::to_size(pageno));  
2598 - QPDFObjGen to_copy_og = to_copy.getObjectHandle().getObjGen();  
2599 - unsigned long long from_uuid = input.qpdf->getUniqueId();  
2600 - if (copied_pages[from_uuid].contains(to_copy_og)) { 2597 + QPDFPageObjectHelper to_copy = input.orig_pages.at(page.idx);
  2598 + if (input.copied_pages[page.idx]) {
2601 QTC::TC( 2599 QTC::TC(
2602 "qpdf", "QPDFJob copy same page more than once", (input.qpdf == &pdf) ? 0 : 1); 2600 "qpdf", "QPDFJob copy same page more than once", (input.qpdf == &pdf) ? 0 : 1);
2603 to_copy = to_copy.shallowCopyPage(); 2601 to_copy = to_copy.shallowCopyPage();
2604 } else { 2602 } else {
2605 - copied_pages[from_uuid].insert(to_copy_og); 2603 + input.copied_pages[page.idx] = true;
2606 if (input.remove_unreferenced) { 2604 if (input.remove_unreferenced) {
2607 to_copy.removeUnreferencedResources(); 2605 to_copy.removeUnreferencedResources();
2608 } 2606 }
2609 } 2607 }
2610 pdf.addPage(to_copy, false); 2608 pdf.addPage(to_copy, false);
2611 - bool first_copy_from_orig = false;  
2612 - bool this_file = input.qpdf == &pdf;  
2613 - if (this_file) {  
2614 - // This is a page from the original file. Keep track of the fact that we are using  
2615 - // it.  
2616 - first_copy_from_orig = (!selected_from_orig.contains(pageno));  
2617 - selected_from_orig.insert(pageno);  
2618 - }  
2619 auto new_page = added_page(pdf, to_copy); 2609 auto new_page = added_page(pdf, to_copy);
2620 // Try to avoid gratuitously renaming fields. In the case of where we're just extracting 2610 // Try to avoid gratuitously renaming fields. In the case of where we're just extracting
2621 // a bunch of pages from the original file and not copying any page more than once, 2611 // a bunch of pages from the original file and not copying any page more than once,
@@ -2654,9 +2644,9 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf) @@ -2654,9 +2644,9 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2654 // Delete page objects for unused page in primary. This prevents those objects from being 2644 // Delete page objects for unused page in primary. This prevents those objects from being
2655 // preserved by being referred to from other places, such as the outlines dictionary. Also make 2645 // preserved by being referred to from other places, such as the outlines dictionary. Also make
2656 // sure we keep form fields from pages we preserved. 2646 // sure we keep form fields from pages we preserved.
2657 - int page_idx = 0; 2647 + size_t page_idx = 0;
2658 for (auto const& page: main_input.orig_pages) { 2648 for (auto const& page: main_input.orig_pages) {
2659 - if (selected_from_orig.contains(page_idx)) { 2649 + if (main_input.copied_pages[page_idx]) {
2660 for (auto field: this_afdh.getFormFieldsForPage(page)) { 2650 for (auto field: this_afdh.getFormFieldsForPage(page)) {
2661 referenced_fields.insert(field); 2651 referenced_fields.insert(field);
2662 } 2652 }
libqpdf/qpdf/QPDFJob_private.hh
@@ -43,6 +43,7 @@ struct QPDFJob::Input @@ -43,6 +43,7 @@ struct QPDFJob::Input
43 ClosedFileInputSource* cfis{}; 43 ClosedFileInputSource* cfis{};
44 std::vector<QPDFObjectHandle> orig_pages; 44 std::vector<QPDFObjectHandle> orig_pages;
45 int n_pages; 45 int n_pages;
  46 + std::vector<bool> copied_pages;
46 bool remove_unreferenced{false}; 47 bool remove_unreferenced{false};
47 }; 48 };
48 49