Commit a000488d2bd197217c5e3ba1f2c935a64ca89cf3

Authored by m-holger
1 parent 3e39e0dc

Refactor `QPDFJob`: replace `PageSpec` with `Selection`, introduce `new_selectio…

…n` method, and update logic for handling page specifications.
include/qpdf/QPDFJob.hh
... ... @@ -156,16 +156,6 @@ class QPDFJob
156 156 bool replace{false};
157 157 };
158 158  
159   - struct PageSpec
160   - {
161   - PageSpec(
162   - std::string const& filename, std::string const& password, std::string const& range);
163   -
164   - std::string filename;
165   - std::string password;
166   - std::string range;
167   - };
168   -
169 159 public:
170 160 // CONFIGURATION
171 161  
... ... @@ -426,6 +416,7 @@ class QPDFJob
426 416  
427 417 private:
428 418 struct PageNo;
  419 + struct Selection;
429 420 struct RotationSpec;
430 421 struct UnderOverlay;
431 422 struct PageLabelSpec;
... ... @@ -470,6 +461,8 @@ class QPDFJob
470 461 void setQPDFOptions(QPDF& pdf);
471 462 bool handlePageSpecs(QPDF& pdf);
472 463 bool shouldRemoveUnreferencedResources(QPDF& pdf);
  464 + void new_selection(
  465 + std::string const& filename, std::string const& password, std::string const& range);
473 466 void handleRotations(QPDF& pdf);
474 467 void getUOPagenos(
475 468 std::vector<UnderOverlay>& uo, std::vector<std::map<size_t, std::vector<int>>>& pagenos);
... ...
libqpdf/QPDFJob.cc
... ... @@ -267,14 +267,6 @@ struct QPDFJob::PageNo
267 267 int no{1};
268 268 };
269 269  
270   -QPDFJob::PageSpec::PageSpec(
271   - std::string const& filename, std::string const& password, std::string const& range) :
272   - filename(filename),
273   - password(password.empty() ? "" : password),
274   - range(range)
275   -{
276   -}
277   -
278 270 QPDFPageData::QPDFPageData(std::string const& filename, QPDF* qpdf, std::string const& range) :
279 271 filename(filename),
280 272 qpdf(qpdf),
... ... @@ -464,7 +456,7 @@ QPDFJob::createQPDF()
464 456 pdf.updateFromJSON(m->update_from_json);
465 457 }
466 458  
467   - if (!m->page_specs.empty()) {
  459 + if (!m->selections.empty()) {
468 460 if (!handlePageSpecs(pdf)) {
469 461 m->warnings = true;
470 462 }
... ... @@ -2374,6 +2366,13 @@ added_page(QPDF&amp; pdf, QPDFPageObjectHelper page)
2374 2366 return added_page(pdf, page.getObjectHandle());
2375 2367 }
2376 2368  
  2369 +void
  2370 +QPDFJob::new_selection(
  2371 + std::string const& filename, std::string const& password, std::string const& range)
  2372 +{
  2373 + m->selections.emplace_back(filename, password, range);
  2374 +}
  2375 +
2377 2376 // Handle all page specifications. Return true if it succeeded without warnings.
2378 2377 bool
2379 2378 QPDFJob::handlePageSpecs(QPDF& pdf)
... ... @@ -2383,12 +2382,12 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2383 2382 // Parse all page specifications and translate them into lists of actual pages.
2384 2383  
2385 2384 // Handle "." as a shortcut for the input file
2386   - for (auto& page_spec: m->page_specs) {
2387   - if (page_spec.filename == ".") {
2388   - page_spec.filename = m->infilename;
  2385 + for (auto& selection: m->selections) {
  2386 + if (selection.filename == ".") {
  2387 + selection.filename = m->infilename;
2389 2388 }
2390   - if (page_spec.range.empty()) {
2391   - page_spec.range = "1-z";
  2389 + if (selection.range.empty()) {
  2390 + selection.range = "1-z";
2392 2391 }
2393 2392 }
2394 2393  
... ... @@ -2397,8 +2396,8 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2397 2396 // Rather than trying to code some portable heuristic based on OS limits, just hard-code
2398 2397 // this at a given number and allow users to override.
2399 2398 std::set<std::string> filenames;
2400   - for (auto& page_spec: m->page_specs) {
2401   - filenames.insert(page_spec.filename);
  2399 + for (auto& selection: m->selections) {
  2400 + filenames.insert(selection.filename);
2402 2401 }
2403 2402 m->keep_files_open = (filenames.size() <= m->keep_files_open_threshold);
2404 2403 QTC::TC("qpdf", "QPDFJob automatically set keep files open", m->keep_files_open ? 0 : 1);
... ... @@ -2414,8 +2413,8 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2414 2413 page_spec_qpdfs[m->infilename] = &pdf;
2415 2414 std::vector<QPDFPageData> parsed_specs;
2416 2415 std::map<unsigned long long, std::set<QPDFObjGen>> copied_pages;
2417   - for (auto& page_spec: m->page_specs) {
2418   - if (!page_spec_qpdfs.contains(page_spec.filename)) {
  2416 + for (auto& selection: m->selections) {
  2417 + if (!page_spec_qpdfs.contains(selection.filename)) {
2419 2418 // Open the PDF file and store the QPDF object. Throw a std::shared_ptr to the qpdf into
2420 2419 // a heap so that it survives through copying to the output but gets cleaned up
2421 2420 // automatically at the end. Do not canonicalize the file name. Using two different
... ... @@ -2423,41 +2422,41 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2423 2422 // you are using this an example of how to do this with the API, you can just create two
2424 2423 // different QPDF objects to the same underlying file with the same path to achieve the
2425 2424 // same effect.
2426   - auto password = page_spec.password;
  2425 + auto password = selection.password;
2427 2426 if (!m->encryption_file.empty() && password.empty() &&
2428   - page_spec.filename == m->encryption_file) {
  2427 + selection.filename == m->encryption_file) {
2429 2428 QTC::TC("qpdf", "QPDFJob pages encryption password");
2430 2429 password = m->encryption_file_password;
2431 2430 }
2432 2431 doIfVerbose([&](Pipeline& v, std::string const& prefix) {
2433   - v << prefix << ": processing " << page_spec.filename << "\n";
  2432 + v << prefix << ": processing " << selection.filename << "\n";
2434 2433 });
2435 2434 std::shared_ptr<InputSource> is;
2436 2435 ClosedFileInputSource* cis = nullptr;
2437 2436 if (!m->keep_files_open) {
2438 2437 QTC::TC("qpdf", "QPDFJob keep files open n");
2439   - cis = new ClosedFileInputSource(page_spec.filename.c_str());
  2438 + cis = new ClosedFileInputSource(selection.filename.c_str());
2440 2439 is = std::shared_ptr<InputSource>(cis);
2441 2440 cis->stayOpen(true);
2442 2441 } else {
2443 2442 QTC::TC("qpdf", "QPDFJob keep files open y");
2444   - FileInputSource* fis = new FileInputSource(page_spec.filename.c_str());
  2443 + FileInputSource* fis = new FileInputSource(selection.filename.c_str());
2445 2444 is = std::shared_ptr<InputSource>(fis);
2446 2445 }
2447 2446 std::unique_ptr<QPDF> qpdf_sp;
2448 2447 processInputSource(qpdf_sp, is, password.data(), true);
2449   - page_spec_qpdfs[page_spec.filename] = qpdf_sp.get();
  2448 + page_spec_qpdfs[selection.filename] = qpdf_sp.get();
2450 2449 page_heap.push_back(std::move(qpdf_sp));
2451 2450 if (cis) {
2452 2451 cis->stayOpen(false);
2453   - page_spec_cfis[page_spec.filename] = cis;
  2452 + page_spec_cfis[selection.filename] = cis;
2454 2453 }
2455 2454 }
2456 2455  
2457 2456 // Read original pages from the PDF, and parse the page range associated with this
2458 2457 // occurrence of the file.
2459 2458 parsed_specs.emplace_back(
2460   - page_spec.filename, page_spec_qpdfs[page_spec.filename], page_spec.range);
  2459 + selection.filename, page_spec_qpdfs[selection.filename], selection.range);
2461 2460 }
2462 2461  
2463 2462 std::map<unsigned long long, bool> remove_unreferenced;
... ...
libqpdf/QPDFJob_config.cc
... ... @@ -978,7 +978,7 @@ QPDFJob::PagesConfig::PagesConfig(Config* c) :
978 978 std::shared_ptr<QPDFJob::PagesConfig>
979 979 QPDFJob::Config::pages()
980 980 {
981   - if (!o.m->page_specs.empty()) {
  981 + if (!o.m->selections.empty()) {
982 982 usage("--pages may only be specified one time");
983 983 }
984 984 return std::shared_ptr<PagesConfig>(new PagesConfig(this));
... ... @@ -987,7 +987,7 @@ QPDFJob::Config::pages()
987 987 QPDFJob::Config*
988 988 QPDFJob::PagesConfig::endPages()
989 989 {
990   - auto n_specs = config->o.m->page_specs.size();
  990 + auto n_specs = config->o.m->selections.size();
991 991 if (n_specs == 0) {
992 992 usage("--pages: no page specifications given");
993 993 }
... ... @@ -998,27 +998,25 @@ QPDFJob::PagesConfig*
998 998 QPDFJob::PagesConfig::pageSpec(
999 999 std::string const& filename, std::string const& range, char const* password)
1000 1000 {
1001   - config->o.m->page_specs.emplace_back(filename, password, range);
  1001 + config->o.new_selection(filename, {password ? password : ""}, range);
1002 1002 return this;
1003 1003 }
1004 1004  
1005 1005 QPDFJob::PagesConfig*
1006 1006 QPDFJob::PagesConfig::file(std::string const& arg)
1007 1007 {
1008   - config->o.m->page_specs.emplace_back(arg, "", "");
  1008 + config->o.new_selection(arg, {}, {});
1009 1009 return this;
1010 1010 }
1011 1011  
1012 1012 QPDFJob::PagesConfig*
1013 1013 QPDFJob::PagesConfig::range(std::string const& arg)
1014 1014 {
1015   - if (config->o.m->page_specs.empty()) {
1016   - QTC::TC("qpdf", "QPDFJob misplaced page range");
  1015 + if (config->o.m->selections.empty()) {
1017 1016 usage("in --range must follow a file name");
1018 1017 }
1019   - auto& last = config->o.m->page_specs.back();
  1018 + auto& last = config->o.m->selections.back();
1020 1019 if (!last.range.empty()) {
1021   - QTC::TC("qpdf", "QPDFJob duplicated range");
1022 1020 usage("--range already specified for this file");
1023 1021 }
1024 1022 last.range = arg;
... ... @@ -1028,13 +1026,11 @@ QPDFJob::PagesConfig::range(std::string const&amp; arg)
1028 1026 QPDFJob::PagesConfig*
1029 1027 QPDFJob::PagesConfig::password(std::string const& arg)
1030 1028 {
1031   - if (config->o.m->page_specs.empty()) {
1032   - QTC::TC("qpdf", "QPDFJob misplaced pages password");
  1029 + if (config->o.m->selections.empty()) {
1033 1030 usage("in --pages, --password must follow a file name");
1034 1031 }
1035   - auto& last = config->o.m->page_specs.back();
  1032 + auto& last = config->o.m->selections.back();
1036 1033 if (!last.password.empty()) {
1037   - QTC::TC("qpdf", "QPDFJob duplicated pages password");
1038 1034 usage("--password already specified for this file");
1039 1035 }
1040 1036 last.password = arg;
... ...
libqpdf/qpdf/QPDFJob_private.hh
... ... @@ -6,6 +6,22 @@
6 6 #include <qpdf/ClosedFileInputSource.hh>
7 7 #include <qpdf/QPDFLogger.hh>
8 8  
  9 +// A selection of pages from a single input PDF to be included in the output. This corresponds to a
  10 +// single clause in the --pages option.
  11 +struct QPDFJob::Selection
  12 +{
  13 + Selection(std::string const& filename, std::string const& password, std::string const& range) :
  14 + filename(filename),
  15 + password(password),
  16 + range(range)
  17 + {
  18 + }
  19 +
  20 + std::string filename;
  21 + std::string password;
  22 + std::string range;
  23 +};
  24 +
9 25 struct QPDFJob::RotationSpec
10 26 {
11 27 RotationSpec(int angle = 0, bool relative = false) :
... ... @@ -195,7 +211,7 @@ class QPDFJob::Members
195 211 std::vector<UnderOverlay> underlay;
196 212 std::vector<UnderOverlay> overlay;
197 213 UnderOverlay* under_overlay{nullptr};
198   - std::vector<PageSpec> page_specs;
  214 + std::vector<Selection> selections;
199 215 std::map<std::string, RotationSpec> rotations;
200 216 bool require_outfile{true};
201 217 bool replace_input{false};
... ...
qpdf/qpdf.testcov
... ... @@ -491,8 +491,6 @@ qpdf-c called qpdf_oh_get_binary_string_value 0
491 491 qpdf-c called qpdf_oh_get_binary_utf8_value 0
492 492 qpdf-c called qpdf_oh_new_binary_string 0
493 493 qpdf-c called qpdf_oh_new_binary_unicode_string 0
494   -QPDFJob duplicated pages password 0
495   -QPDFJob misplaced pages password 0
496 494 QPDFJob check encrypted encrypted 0
497 495 QPDFJob check encrypted not encrypted 0
498 496 QPDFJob check password password incorrect 0
... ... @@ -545,8 +543,6 @@ QPDFPageObjectHelper used fallback without copying 0
545 543 QPDF skipping cache for known unchecked object 0
546 544 QPDF fix dangling triggered xref reconstruction 0
547 545 QPDF recover xref stream 0
548   -QPDFJob misplaced page range 0
549   -QPDFJob duplicated range 0
550 546 QPDFJob json over/under no file 0
551 547 QPDF_Array copy 1
552 548 QPDF_json stream data not string 0
... ...