Commit 92b29095f11072dd00493b62c98d5cb82b34128b

Authored by m-holger
1 parent cb5a5f4e

Refactor `QPDFJob::handlePageSpecs`: replace `selected_from_orig` and `copied_pa…

…ges` sets with vector<bool> `Input::copied_pages`, centralize page tracking logic, adjust page processing, and simplify resource management.
libqpdf/QPDFJob.cc
... ... @@ -2338,7 +2338,8 @@ QPDFJob::Input::initialize(Inputs&amp; in, QPDF* a_qpdf)
2338 2338 qpdf = a_qpdf ? a_qpdf : qpdf_p.get();
2339 2339 if (qpdf) {
2340 2340 orig_pages = qpdf->getAllPages();
2341   - n_pages = QIntC::to_int(orig_pages.size());
  2341 + n_pages = static_cast<int>(orig_pages.size());
  2342 + copied_pages = std::vector<bool>(orig_pages.size(), false);
2342 2343  
2343 2344 if (in.job.m->remove_unreferenced_page_resources != QPDFJob::re_no) {
2344 2345 remove_unreferenced = in.job.shouldRemoveUnreferencedResources(*qpdf);
... ... @@ -2368,7 +2369,6 @@ QPDFJob::Inputs::infile_name(std::string const&amp; name)
2368 2369 }
2369 2370 }
2370 2371  
2371   -
2372 2372 void
2373 2373 QPDFJob::Inputs::process(std::string const& filename, QPDFJob::Input& input)
2374 2374 {
... ... @@ -2522,8 +2522,6 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2522 2522 // Parse all section and translate them into lists of actual pages.
2523 2523 m->inputs.process_all();
2524 2524  
2525   - std::map<unsigned long long, std::set<QPDFObjGen>> copied_pages;
2526   -
2527 2525 // Clear all pages out of the primary QPDF's pages tree but leave the objects in place in the
2528 2526 // file so they can be re-added without changing their object numbers. This enables other things
2529 2527 // in the original file, such as outlines, to continue to work.
... ... @@ -2571,7 +2569,6 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2571 2569  
2572 2570 // Add all the pages from all the files in the order specified. Keep track of any pages from the
2573 2571 // original file that we are selecting.
2574   - std::set<int> selected_from_orig;
2575 2572 std::vector<QPDFObjectHandle> new_labels;
2576 2573 bool any_page_labels = false;
2577 2574 int out_pageno = 0;
... ... @@ -2590,32 +2587,25 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2590 2587 doIfVerbose([&](Pipeline& v, std::string const& prefix) {
2591 2588 v << prefix << ": adding pages from " << selection.filename() << "\n";
2592 2589 });
2593   - for (auto pageno_iter: selection.selected_pages) {
  2590 + for (PageNo page: selection.selected_pages) {
  2591 + const bool this_file = input.qpdf == &pdf;
  2592 + bool first_copy_from_orig = this_file && !main_input.copied_pages[page.idx];
  2593 +
2594 2594 // Pages are specified from 1 but numbered from 0 in the vector
2595   - int pageno = pageno_iter - 1;
  2595 + int pageno = page.no - 1;
2596 2596 pldh.getLabelsForPageRange(pageno, pageno, out_pageno++, new_labels);
2597   - QPDFPageObjectHelper to_copy = input.orig_pages.at(QIntC::to_size(pageno));
2598   - QPDFObjGen to_copy_og = to_copy.getObjectHandle().getObjGen();
2599   - unsigned long long from_uuid = input.qpdf->getUniqueId();
2600   - if (copied_pages[from_uuid].contains(to_copy_og)) {
  2597 + QPDFPageObjectHelper to_copy = input.orig_pages.at(page.idx);
  2598 + if (input.copied_pages[page.idx]) {
2601 2599 QTC::TC(
2602 2600 "qpdf", "QPDFJob copy same page more than once", (input.qpdf == &pdf) ? 0 : 1);
2603 2601 to_copy = to_copy.shallowCopyPage();
2604 2602 } else {
2605   - copied_pages[from_uuid].insert(to_copy_og);
  2603 + input.copied_pages[page.idx] = true;
2606 2604 if (input.remove_unreferenced) {
2607 2605 to_copy.removeUnreferencedResources();
2608 2606 }
2609 2607 }
2610 2608 pdf.addPage(to_copy, false);
2611   - bool first_copy_from_orig = false;
2612   - bool this_file = input.qpdf == &pdf;
2613   - if (this_file) {
2614   - // This is a page from the original file. Keep track of the fact that we are using
2615   - // it.
2616   - first_copy_from_orig = (!selected_from_orig.contains(pageno));
2617   - selected_from_orig.insert(pageno);
2618   - }
2619 2609 auto new_page = added_page(pdf, to_copy);
2620 2610 // Try to avoid gratuitously renaming fields. In the case of where we're just extracting
2621 2611 // a bunch of pages from the original file and not copying any page more than once,
... ... @@ -2654,9 +2644,9 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2654 2644 // Delete page objects for unused page in primary. This prevents those objects from being
2655 2645 // preserved by being referred to from other places, such as the outlines dictionary. Also make
2656 2646 // sure we keep form fields from pages we preserved.
2657   - int page_idx = 0;
  2647 + size_t page_idx = 0;
2658 2648 for (auto const& page: main_input.orig_pages) {
2659   - if (selected_from_orig.contains(page_idx)) {
  2649 + if (main_input.copied_pages[page_idx]) {
2660 2650 for (auto field: this_afdh.getFormFieldsForPage(page)) {
2661 2651 referenced_fields.insert(field);
2662 2652 }
... ...
libqpdf/qpdf/QPDFJob_private.hh
... ... @@ -43,6 +43,7 @@ struct QPDFJob::Input
43 43 ClosedFileInputSource* cfis{};
44 44 std::vector<QPDFObjectHandle> orig_pages;
45 45 int n_pages;
  46 + std::vector<bool> copied_pages;
46 47 bool remove_unreferenced{false};
47 48 };
48 49  
... ...