Commit 92b29095f11072dd00493b62c98d5cb82b34128b
1 parent
cb5a5f4e
Refactor `QPDFJob::handlePageSpecs`: replace `selected_from_orig` and `copied_pa…
…ges` sets with vector<bool> `Input::copied_pages`, centralize page tracking logic, adjust page processing, and simplify resource management.
Showing
2 changed files
with
13 additions
and
22 deletions
libqpdf/QPDFJob.cc
| @@ -2338,7 +2338,8 @@ QPDFJob::Input::initialize(Inputs& in, QPDF* a_qpdf) | @@ -2338,7 +2338,8 @@ QPDFJob::Input::initialize(Inputs& in, QPDF* a_qpdf) | ||
| 2338 | qpdf = a_qpdf ? a_qpdf : qpdf_p.get(); | 2338 | qpdf = a_qpdf ? a_qpdf : qpdf_p.get(); |
| 2339 | if (qpdf) { | 2339 | if (qpdf) { |
| 2340 | orig_pages = qpdf->getAllPages(); | 2340 | orig_pages = qpdf->getAllPages(); |
| 2341 | - n_pages = QIntC::to_int(orig_pages.size()); | 2341 | + n_pages = static_cast<int>(orig_pages.size()); |
| 2342 | + copied_pages = std::vector<bool>(orig_pages.size(), false); | ||
| 2342 | 2343 | ||
| 2343 | if (in.job.m->remove_unreferenced_page_resources != QPDFJob::re_no) { | 2344 | if (in.job.m->remove_unreferenced_page_resources != QPDFJob::re_no) { |
| 2344 | remove_unreferenced = in.job.shouldRemoveUnreferencedResources(*qpdf); | 2345 | remove_unreferenced = in.job.shouldRemoveUnreferencedResources(*qpdf); |
| @@ -2368,7 +2369,6 @@ QPDFJob::Inputs::infile_name(std::string const& name) | @@ -2368,7 +2369,6 @@ QPDFJob::Inputs::infile_name(std::string const& name) | ||
| 2368 | } | 2369 | } |
| 2369 | } | 2370 | } |
| 2370 | 2371 | ||
| 2371 | - | ||
| 2372 | void | 2372 | void |
| 2373 | QPDFJob::Inputs::process(std::string const& filename, QPDFJob::Input& input) | 2373 | QPDFJob::Inputs::process(std::string const& filename, QPDFJob::Input& input) |
| 2374 | { | 2374 | { |
| @@ -2522,8 +2522,6 @@ QPDFJob::handlePageSpecs(QPDF& pdf) | @@ -2522,8 +2522,6 @@ QPDFJob::handlePageSpecs(QPDF& pdf) | ||
| 2522 | // Parse all section and translate them into lists of actual pages. | 2522 | // Parse all section and translate them into lists of actual pages. |
| 2523 | m->inputs.process_all(); | 2523 | m->inputs.process_all(); |
| 2524 | 2524 | ||
| 2525 | - std::map<unsigned long long, std::set<QPDFObjGen>> copied_pages; | ||
| 2526 | - | ||
| 2527 | // Clear all pages out of the primary QPDF's pages tree but leave the objects in place in the | 2525 | // Clear all pages out of the primary QPDF's pages tree but leave the objects in place in the |
| 2528 | // file so they can be re-added without changing their object numbers. This enables other things | 2526 | // file so they can be re-added without changing their object numbers. This enables other things |
| 2529 | // in the original file, such as outlines, to continue to work. | 2527 | // in the original file, such as outlines, to continue to work. |
| @@ -2571,7 +2569,6 @@ QPDFJob::handlePageSpecs(QPDF& pdf) | @@ -2571,7 +2569,6 @@ QPDFJob::handlePageSpecs(QPDF& pdf) | ||
| 2571 | 2569 | ||
| 2572 | // Add all the pages from all the files in the order specified. Keep track of any pages from the | 2570 | // Add all the pages from all the files in the order specified. Keep track of any pages from the |
| 2573 | // original file that we are selecting. | 2571 | // original file that we are selecting. |
| 2574 | - std::set<int> selected_from_orig; | ||
| 2575 | std::vector<QPDFObjectHandle> new_labels; | 2572 | std::vector<QPDFObjectHandle> new_labels; |
| 2576 | bool any_page_labels = false; | 2573 | bool any_page_labels = false; |
| 2577 | int out_pageno = 0; | 2574 | int out_pageno = 0; |
| @@ -2590,32 +2587,25 @@ QPDFJob::handlePageSpecs(QPDF& pdf) | @@ -2590,32 +2587,25 @@ QPDFJob::handlePageSpecs(QPDF& pdf) | ||
| 2590 | doIfVerbose([&](Pipeline& v, std::string const& prefix) { | 2587 | doIfVerbose([&](Pipeline& v, std::string const& prefix) { |
| 2591 | v << prefix << ": adding pages from " << selection.filename() << "\n"; | 2588 | v << prefix << ": adding pages from " << selection.filename() << "\n"; |
| 2592 | }); | 2589 | }); |
| 2593 | - for (auto pageno_iter: selection.selected_pages) { | 2590 | + for (PageNo page: selection.selected_pages) { |
| 2591 | + const bool this_file = input.qpdf == &pdf; | ||
| 2592 | + bool first_copy_from_orig = this_file && !main_input.copied_pages[page.idx]; | ||
| 2593 | + | ||
| 2594 | // Pages are specified from 1 but numbered from 0 in the vector | 2594 | // Pages are specified from 1 but numbered from 0 in the vector |
| 2595 | - int pageno = pageno_iter - 1; | 2595 | + int pageno = page.no - 1; |
| 2596 | pldh.getLabelsForPageRange(pageno, pageno, out_pageno++, new_labels); | 2596 | pldh.getLabelsForPageRange(pageno, pageno, out_pageno++, new_labels); |
| 2597 | - QPDFPageObjectHelper to_copy = input.orig_pages.at(QIntC::to_size(pageno)); | ||
| 2598 | - QPDFObjGen to_copy_og = to_copy.getObjectHandle().getObjGen(); | ||
| 2599 | - unsigned long long from_uuid = input.qpdf->getUniqueId(); | ||
| 2600 | - if (copied_pages[from_uuid].contains(to_copy_og)) { | 2597 | + QPDFPageObjectHelper to_copy = input.orig_pages.at(page.idx); |
| 2598 | + if (input.copied_pages[page.idx]) { | ||
| 2601 | QTC::TC( | 2599 | QTC::TC( |
| 2602 | "qpdf", "QPDFJob copy same page more than once", (input.qpdf == &pdf) ? 0 : 1); | 2600 | "qpdf", "QPDFJob copy same page more than once", (input.qpdf == &pdf) ? 0 : 1); |
| 2603 | to_copy = to_copy.shallowCopyPage(); | 2601 | to_copy = to_copy.shallowCopyPage(); |
| 2604 | } else { | 2602 | } else { |
| 2605 | - copied_pages[from_uuid].insert(to_copy_og); | 2603 | + input.copied_pages[page.idx] = true; |
| 2606 | if (input.remove_unreferenced) { | 2604 | if (input.remove_unreferenced) { |
| 2607 | to_copy.removeUnreferencedResources(); | 2605 | to_copy.removeUnreferencedResources(); |
| 2608 | } | 2606 | } |
| 2609 | } | 2607 | } |
| 2610 | pdf.addPage(to_copy, false); | 2608 | pdf.addPage(to_copy, false); |
| 2611 | - bool first_copy_from_orig = false; | ||
| 2612 | - bool this_file = input.qpdf == &pdf; | ||
| 2613 | - if (this_file) { | ||
| 2614 | - // This is a page from the original file. Keep track of the fact that we are using | ||
| 2615 | - // it. | ||
| 2616 | - first_copy_from_orig = (!selected_from_orig.contains(pageno)); | ||
| 2617 | - selected_from_orig.insert(pageno); | ||
| 2618 | - } | ||
| 2619 | auto new_page = added_page(pdf, to_copy); | 2609 | auto new_page = added_page(pdf, to_copy); |
| 2620 | // Try to avoid gratuitously renaming fields. In the case of where we're just extracting | 2610 | // Try to avoid gratuitously renaming fields. In the case of where we're just extracting |
| 2621 | // a bunch of pages from the original file and not copying any page more than once, | 2611 | // a bunch of pages from the original file and not copying any page more than once, |
| @@ -2654,9 +2644,9 @@ QPDFJob::handlePageSpecs(QPDF& pdf) | @@ -2654,9 +2644,9 @@ QPDFJob::handlePageSpecs(QPDF& pdf) | ||
| 2654 | // Delete page objects for unused page in primary. This prevents those objects from being | 2644 | // Delete page objects for unused page in primary. This prevents those objects from being |
| 2655 | // preserved by being referred to from other places, such as the outlines dictionary. Also make | 2645 | // preserved by being referred to from other places, such as the outlines dictionary. Also make |
| 2656 | // sure we keep form fields from pages we preserved. | 2646 | // sure we keep form fields from pages we preserved. |
| 2657 | - int page_idx = 0; | 2647 | + size_t page_idx = 0; |
| 2658 | for (auto const& page: main_input.orig_pages) { | 2648 | for (auto const& page: main_input.orig_pages) { |
| 2659 | - if (selected_from_orig.contains(page_idx)) { | 2649 | + if (main_input.copied_pages[page_idx]) { |
| 2660 | for (auto field: this_afdh.getFormFieldsForPage(page)) { | 2650 | for (auto field: this_afdh.getFormFieldsForPage(page)) { |
| 2661 | referenced_fields.insert(field); | 2651 | referenced_fields.insert(field); |
| 2662 | } | 2652 | } |
libqpdf/qpdf/QPDFJob_private.hh
| @@ -43,6 +43,7 @@ struct QPDFJob::Input | @@ -43,6 +43,7 @@ struct QPDFJob::Input | ||
| 43 | ClosedFileInputSource* cfis{}; | 43 | ClosedFileInputSource* cfis{}; |
| 44 | std::vector<QPDFObjectHandle> orig_pages; | 44 | std::vector<QPDFObjectHandle> orig_pages; |
| 45 | int n_pages; | 45 | int n_pages; |
| 46 | + std::vector<bool> copied_pages; | ||
| 46 | bool remove_unreferenced{false}; | 47 | bool remove_unreferenced{false}; |
| 47 | }; | 48 | }; |
| 48 | 49 |