Commit 980df238438bb616e9ab04eb76b96fc9117a2e04

Authored by m-holger
1 parent a000488d

Refactor `QPDFJob`: consolidate `QPDFPageData` into `Selection`, simplify page r…

…ange handling, and cleanup obsolete logic.
libqpdf/QPDFJob.cc
... ... @@ -58,17 +58,6 @@ namespace
58 58 std::shared_ptr<Pl_DCT::CompressConfig> config;
59 59 };
60 60  
61   - struct QPDFPageData
62   - {
63   - QPDFPageData(std::string const& filename, QPDF* qpdf, std::string const& range);
64   - QPDFPageData(QPDFPageData const& other, int page);
65   -
66   - std::string filename;
67   - QPDF* qpdf;
68   - std::vector<QPDFObjectHandle> orig_pages;
69   - std::vector<int> selected_pages;
70   - };
71   -
72 61 class ProgressReporter final: public QPDFWriter::ProgressReporter
73 62 {
74 63 public:
... ... @@ -267,26 +256,6 @@ struct QPDFJob::PageNo
267 256 int no{1};
268 257 };
269 258  
270   -QPDFPageData::QPDFPageData(std::string const& filename, QPDF* qpdf, std::string const& range) :
271   - filename(filename),
272   - qpdf(qpdf),
273   - orig_pages(qpdf->getAllPages())
274   -{
275   - try {
276   - selected_pages = QUtil::parse_numrange(range.c_str(), QIntC::to_int(orig_pages.size()));
277   - } catch (std::runtime_error& e) {
278   - throw std::runtime_error("parsing numeric range for " + filename + ": " + e.what());
279   - }
280   -}
281   -
282   -QPDFPageData::QPDFPageData(QPDFPageData const& other, int page) :
283   - filename(other.filename),
284   - qpdf(other.qpdf),
285   - orig_pages(other.orig_pages)
286   -{
287   - selected_pages.push_back(page);
288   -}
289   -
290 259 QPDFJob::QPDFJob() :
291 260 m(std::make_shared<Members>())
292 261 {
... ... @@ -2411,7 +2380,6 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2411 2380 std::map<std::string, QPDF*> page_spec_qpdfs;
2412 2381 std::map<std::string, ClosedFileInputSource*> page_spec_cfis;
2413 2382 page_spec_qpdfs[m->infilename] = &pdf;
2414   - std::vector<QPDFPageData> parsed_specs;
2415 2383 std::map<unsigned long long, std::set<QPDFObjGen>> copied_pages;
2416 2384 for (auto& selection: m->selections) {
2417 2385 if (!page_spec_qpdfs.contains(selection.filename)) {
... ... @@ -2425,7 +2393,6 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2425 2393 auto password = selection.password;
2426 2394 if (!m->encryption_file.empty() && password.empty() &&
2427 2395 selection.filename == m->encryption_file) {
2428   - QTC::TC("qpdf", "QPDFJob pages encryption password");
2429 2396 password = m->encryption_file_password;
2430 2397 }
2431 2398 doIfVerbose([&](Pipeline& v, std::string const& prefix) {
... ... @@ -2434,13 +2401,11 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2434 2401 std::shared_ptr<InputSource> is;
2435 2402 ClosedFileInputSource* cis = nullptr;
2436 2403 if (!m->keep_files_open) {
2437   - QTC::TC("qpdf", "QPDFJob keep files open n");
2438   - cis = new ClosedFileInputSource(selection.filename.c_str());
  2404 + cis = new ClosedFileInputSource(selection.filename.data());
2439 2405 is = std::shared_ptr<InputSource>(cis);
2440 2406 cis->stayOpen(true);
2441 2407 } else {
2442   - QTC::TC("qpdf", "QPDFJob keep files open y");
2443   - FileInputSource* fis = new FileInputSource(selection.filename.c_str());
  2408 + FileInputSource* fis = new FileInputSource(selection.filename.data());
2444 2409 is = std::shared_ptr<InputSource>(fis);
2445 2410 }
2446 2411 std::unique_ptr<QPDF> qpdf_sp;
... ... @@ -2455,8 +2420,15 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2455 2420  
2456 2421 // Read original pages from the PDF, and parse the page range associated with this
2457 2422 // occurrence of the file.
2458   - parsed_specs.emplace_back(
2459   - selection.filename, page_spec_qpdfs[selection.filename], selection.range);
  2423 + selection.qpdf = page_spec_qpdfs[selection.filename];
  2424 + selection.orig_pages = selection.qpdf->getAllPages();
  2425 + try {
  2426 + selection.selected_pages = QUtil::parse_numrange(
  2427 + selection.range.data(), static_cast<int>(selection.orig_pages.size()));
  2428 + } catch (std::runtime_error& e) {
  2429 + throw std::runtime_error(
  2430 + "parsing numeric range for " + selection.filename + ": " + e.what());
  2431 + }
2460 2432 }
2461 2433  
2462 2434 std::map<unsigned long long, bool> remove_unreferenced;
... ... @@ -2491,16 +2463,17 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2491 2463 }
2492 2464  
2493 2465 auto n_collate = m->collate.size();
2494   - auto n_specs = parsed_specs.size();
  2466 + auto n_specs = m->selections.size();
2495 2467 if (!(n_collate == 0 || n_collate == 1 || n_collate == n_specs)) {
2496 2468 usage(
2497 2469 "--pages: if --collate has more than one value, it must have one value per page "
2498 2470 "specification");
2499 2471 }
  2472 +
  2473 + std::vector<Selection> new_specs;
2500 2474 if (n_collate > 0 && n_specs > 1) {
2501 2475 // Collate the pages by selecting one page from each spec in order. When a spec runs out of
2502 2476 // pages, stop selecting from it.
2503   - std::vector<QPDFPageData> new_parsed_specs;
2504 2477 // Make sure we have a collate value for each spec. We have already checked that a non-empty
2505 2478 // collate has either one value or one value per spec.
2506 2479 for (auto i = n_collate; i < n_specs; ++i) {
... ... @@ -2511,20 +2484,20 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2511 2484 while (got_pages) {
2512 2485 got_pages = false;
2513 2486 for (size_t i = 0; i < n_specs; ++i) {
2514   - QPDFPageData& page_data = parsed_specs.at(i);
  2487 + auto& page_data = m->selections.at(i);
2515 2488 for (size_t j = 0; j < m->collate.at(i); ++j) {
2516 2489 if (cur_page.at(i) + j < page_data.selected_pages.size()) {
2517 2490 got_pages = true;
2518   - new_parsed_specs.emplace_back(
  2491 + new_specs.emplace_back(
2519 2492 page_data, page_data.selected_pages.at(cur_page.at(i) + j));
2520 2493 }
2521 2494 }
2522 2495 cur_page.at(i) += m->collate.at(i);
2523 2496 }
2524 2497 }
2525   - parsed_specs = new_parsed_specs;
2526 2498 }
2527 2499  
  2500 + std::vector<Selection>& page_specs = new_specs.empty() ? m->selections : new_specs;
2528 2501 // Add all the pages from all the files in the order specified. Keep track of any pages from the
2529 2502 // original file that we are selecting.
2530 2503 std::set<int> selected_from_orig;
... ... @@ -2533,7 +2506,7 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2533 2506 int out_pageno = 0;
2534 2507 auto& this_afdh = pdf.acroform();
2535 2508 std::set<QPDFObjGen> referenced_fields;
2536   - for (auto& page_data: parsed_specs) {
  2509 + for (auto& page_data: page_specs) {
2537 2510 ClosedFileInputSource* cis = nullptr;
2538 2511 if (page_spec_cfis.contains(page_data.filename)) {
2539 2512 cis = page_spec_cfis[page_data.filename];
... ...
libqpdf/qpdf/QPDFJob_private.hh
... ... @@ -17,9 +17,21 @@ struct QPDFJob::Selection
17 17 {
18 18 }
19 19  
  20 + Selection(QPDFJob::Selection const& other, int page) :
  21 + filename(other.filename),
  22 + // range and password are no longer required when this constructor is called.
  23 + qpdf(other.qpdf),
  24 + orig_pages(other.orig_pages),
  25 + selected_pages({page})
  26 + {
  27 + }
  28 +
20 29 std::string filename;
21 30 std::string password;
22 31 std::string range;
  32 + QPDF* qpdf;
  33 + std::vector<QPDFObjectHandle> orig_pages;
  34 + std::vector<int> selected_pages;
23 35 };
24 36  
25 37 struct QPDFJob::RotationSpec
... ...
qpdf/qpdf.testcov
... ... @@ -188,7 +188,6 @@ QPDF insert foreign page 0
188 188 QPDFWriter copy use_aes 1
189 189 QPDFParser indirect without context 0
190 190 QPDFObjectHandle trailing data in parse 0
191   -QPDFJob pages encryption password 0
192 191 QPDFTokenizer EOF reading token 0
193 192 QPDFTokenizer EOF reading appendable token 0
194 193 QPDFWriter extra header text no newline 0
... ... @@ -286,8 +285,6 @@ QPDFAcroFormDocumentHelper non-dictionary field 0
286 285 QPDFAcroFormDocumentHelper loop 0
287 286 QPDFAcroFormDocumentHelper field found 1
288 287 QPDFAcroFormDocumentHelper annotation found 1
289   -QPDFJob keep files open n 0
290   -QPDFJob keep files open y 0
291 288 QPDFJob automatically set keep files open 1
292 289 QPDFOutlineDocumentHelper string named dest 0
293 290 QPDFObjectHandle merge top type mismatch 0
... ...