Commit 7aab94c82edb4e0a737e29b428e99126c87bb528

Authored by m-holger
1 parent b56890db

Refactor `QPDFJob`: move file-related logic to `Inputs`, consolidate file proces…

…sing, and simplify page specification handling.
libqpdf/QPDFJob.cc
... ... @@ -257,7 +257,7 @@ struct QPDFJob::PageNo
257 257 };
258 258  
259 259 QPDFJob::QPDFJob() :
260   - m(std::make_shared<Members>())
  260 + m(std::make_shared<Members>(*this))
261 261 {
262 262 }
263 263  
... ... @@ -2342,6 +2342,63 @@ QPDFJob::new_selection(
2342 2342 m->selections.emplace_back(filename, password, range);
2343 2343 }
2344 2344  
  2345 +void
  2346 +QPDFJob::Inputs::process(std::string const& filename, QPDFJob::Input& input)
  2347 +{
  2348 + // Open the PDF file and store the QPDF object. Do not canonicalize the file name. Using two
  2349 + // different paths to refer to the same file is a documented workaround for duplicating a page.
  2350 + // If you are using this an example of how to do this with the API, you can just create two
  2351 + // different QPDF objects to the same underlying file with the same path to achieve the
  2352 + // same effect.
  2353 + auto password = input.password;
  2354 + if (!encryption_file.empty() && password.empty() && filename == encryption_file) {
  2355 + password = encryption_file_password;
  2356 + }
  2357 + job.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
  2358 + v << prefix << ": processing " << filename << "\n";
  2359 + });
  2360 + if (!keep_files_open) {
  2361 + auto cis = std::make_shared<ClosedFileInputSource>(filename.data());
  2362 + input.cfis = cis.get();
  2363 + input.cfis->stayOpen(true);
  2364 + job.processInputSource(input.qpdf_p, cis, password.data(), true);
  2365 + } else {
  2366 + job.processInputSource(
  2367 + input.qpdf_p,
  2368 + std::make_shared<FileInputSource>(filename.data()),
  2369 + password.data(),
  2370 + true);
  2371 + }
  2372 + input.qpdf = input.qpdf_p.get();
  2373 + input.orig_pages = input.qpdf->getAllPages();
  2374 + input.n_pages = QIntC::to_int(input.orig_pages.size());
  2375 + if (input.cfis) {
  2376 + input.cfis->stayOpen(false);
  2377 + }
  2378 +}
  2379 +
  2380 +void
  2381 +QPDFJob::Inputs::process_all()
  2382 +{
  2383 + if (!keep_files_open_set) {
  2384 + // Count the number of distinct files to determine whether we should keep files open or not.
  2385 + // Rather than trying to code some portable heuristic based on OS limits, just hard-code
  2386 + // this at a given number and allow users to override.
  2387 + keep_files_open = files.size() <= keep_files_open_threshold;
  2388 + QTC::TC("qpdf", "QPDFJob automatically set keep files open", keep_files_open ? 0 : 1);
  2389 + job.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
  2390 + v << prefix << ": selecting --keep-open-files=" << (keep_files_open ? "y" : "n")
  2391 + << "\n";
  2392 + });
  2393 + }
  2394 +
  2395 + for (auto& [filename, input]: files) {
  2396 + if (!input.qpdf) {
  2397 + process(filename, input);
  2398 + }
  2399 + }
  2400 +}
  2401 +
2345 2402 // Handle all page specifications. Return true if it succeeded without warnings.
2346 2403 bool
2347 2404 QPDFJob::handlePageSpecs(QPDF& pdf)
... ... @@ -2369,56 +2426,7 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2369 2426 }
2370 2427 }
2371 2428  
2372   - if (!m->keep_files_open_set) {
2373   - // Count the number of distinct files to determine whether we should keep files open or not.
2374   - // Rather than trying to code some portable heuristic based on OS limits, just hard-code
2375   - // this at a given number and allow users to override.
2376   - m->keep_files_open = m->inputs.files.size() <= m->keep_files_open_threshold;
2377   - QTC::TC("qpdf", "QPDFJob automatically set keep files open", m->keep_files_open ? 0 : 1);
2378   - doIfVerbose([&](Pipeline& v, std::string const& prefix) {
2379   - v << prefix << ": selecting --keep-open-files=" << (m->keep_files_open ? "y" : "n")
2380   - << "\n";
2381   - });
2382   - }
2383   -
2384   - // Create a QPDF object for each file that we may take pages from.
2385   - for (auto& [filename, input]: m->inputs.files) {
2386   - if (!input.qpdf) {
2387   - // Open the PDF file and store the QPDF object. Throw a std::shared_ptr to the qpdf into
2388   - // a heap so that it survives through copying to the output but gets cleaned up
2389   - // automatically at the end. Do not canonicalize the file name. Using two different
2390   - // paths to refer to the same file is a documented workaround for duplicating a page. If
2391   - // you are using this an example of how to do this with the API, you can just create two
2392   - // different QPDF objects to the same underlying file with the same path to achieve the
2393   - // same effect.
2394   - auto password = input.password;
2395   - if (!m->encryption_file.empty() && password.empty() && filename == m->encryption_file) {
2396   - password = m->encryption_file_password;
2397   - }
2398   - doIfVerbose([&](Pipeline& v, std::string const& prefix) {
2399   - v << prefix << ": processing " << filename << "\n";
2400   - });
2401   - if (!m->keep_files_open) {
2402   - auto cis = std::make_shared<ClosedFileInputSource>(filename.data());
2403   - cis->stayOpen(true);
2404   - processInputSource(input.qpdf_p, cis, password.data(), true);
2405   - cis->stayOpen(false);
2406   - input.cfis = cis.get();
2407   - } else {
2408   - processInputSource(
2409   - input.qpdf_p,
2410   - std::make_shared<FileInputSource>(filename.data()),
2411   - password.data(),
2412   - true);
2413   - }
2414   - input.qpdf = input.qpdf_p.get();
2415   - input.orig_pages = input.qpdf->getAllPages();
2416   - input.n_pages = QIntC::to_int(input.orig_pages.size());
2417   - if (input.cfis) {
2418   - input.cfis->stayOpen(false);
2419   - }
2420   - }
2421   - }
  2429 + m->inputs.process_all();
2422 2430  
2423 2431 std::map<unsigned long long, std::set<QPDFObjGen>> copied_pages;
2424 2432 for (auto& selection: m->selections) {
... ... @@ -2427,8 +2435,7 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2427 2435 auto const& input = m->inputs.files[selection.filename];
2428 2436 selection.qpdf = input.qpdf;
2429 2437 try {
2430   - selection.selected_pages =
2431   - QUtil::parse_numrange(selection.range.data(), input.n_pages);
  2438 + selection.selected_pages = QUtil::parse_numrange(selection.range.data(), input.n_pages);
2432 2439 } catch (std::runtime_error& e) {
2433 2440 throw std::runtime_error(
2434 2441 "parsing numeric range for " + selection.filename + ": " + e.what());
... ... @@ -2887,8 +2894,8 @@ QPDFJob::setWriterOptions(QPDFWriter&amp; w)
2887 2894 std::unique_ptr<QPDF> encryption_pdf;
2888 2895 processFile(
2889 2896 encryption_pdf,
2890   - m->encryption_file.data(),
2891   - m->encryption_file_password.data(),
  2897 + m->inputs.encryption_file.data(),
  2898 + m->inputs.encryption_file_password.data(),
2892 2899 false,
2893 2900 false);
2894 2901 w.copyEncryptionParameters(*encryption_pdf);
... ...
libqpdf/QPDFJob_config.cc
... ... @@ -152,7 +152,7 @@ QPDFJob::Config::copyEncryption(std::string const&amp; parameter)
152 152 if (o.m->deterministic_id) {
153 153 usage("the deterministic-id option is incompatible with encrypted output files");
154 154 }
155   - o.m->encryption_file = parameter;
  155 + o.m->inputs.encryption_file = parameter;
156 156 o.m->copy_encryption = true;
157 157 o.m->encrypt = false;
158 158 o.m->decrypt = false;
... ... @@ -181,7 +181,7 @@ QPDFJob::Config::deterministicId()
181 181 QPDFJob::Config*
182 182 QPDFJob::Config::encryptionFilePassword(std::string const& parameter)
183 183 {
184   - o.m->encryption_file_password = parameter;
  184 + o.m->inputs.encryption_file_password = parameter;
185 185 return this;
186 186 }
187 187  
... ... @@ -354,15 +354,15 @@ QPDFJob::Config::testJsonSchema()
354 354 QPDFJob::Config*
355 355 QPDFJob::Config::keepFilesOpen(std::string const& parameter)
356 356 {
357   - o.m->keep_files_open_set = true;
358   - o.m->keep_files_open = (parameter == "y");
  357 + o.m->inputs.keep_files_open_set = true;
  358 + o.m->inputs.keep_files_open = (parameter == "y");
359 359 return this;
360 360 }
361 361  
362 362 QPDFJob::Config*
363 363 QPDFJob::Config::keepFilesOpenThreshold(std::string const& parameter)
364 364 {
365   - o.m->keep_files_open_threshold = QUtil::string_to_uint(parameter.c_str());
  365 + o.m->inputs.keep_files_open_threshold = QUtil::string_to_uint(parameter.c_str());
366 366 return this;
367 367 }
368 368  
... ...
libqpdf/qpdf/QPDFJob_private.hh
... ... @@ -49,7 +49,25 @@ struct QPDFJob::Input
49 49 // All PDF input files for a job.
50 50 struct QPDFJob::Inputs
51 51 {
  52 + // These default values are duplicated in help and docs.
  53 + static int constexpr DEFAULT_KEEP_FILES_OPEN_THRESHOLD = 200;
  54 +
  55 + Inputs(QPDFJob& job) :
  56 + job(job)
  57 + {
  58 + }
  59 + void process(std::string const& filename, QPDFJob::Input& file_spec);
  60 + void process_all();
  61 + std::string encryption_file;
  62 + std::string encryption_file_password;
  63 + bool keep_files_open{true};
  64 + bool keep_files_open_set{false};
  65 + size_t keep_files_open_threshold{DEFAULT_KEEP_FILES_OPEN_THRESHOLD};
  66 +
52 67 std::map<std::string, Input> files;
  68 +
  69 + private:
  70 + QPDFJob& job;
53 71 };
54 72  
55 73 struct QPDFJob::RotationSpec
... ... @@ -107,8 +125,9 @@ class QPDFJob::Members
107 125 friend class QPDFJob;
108 126  
109 127 public:
110   - Members() :
111   - log(QPDFLogger::defaultLogger())
  128 + Members(QPDFJob& job) :
  129 + log(QPDFLogger::defaultLogger()),
  130 + inputs(job)
112 131 {
113 132 }
114 133 Members(Members const&) = delete;
... ... @@ -116,7 +135,6 @@ class QPDFJob::Members
116 135  
117 136 private:
118 137 // These default values are duplicated in help and docs.
119   - static int constexpr DEFAULT_KEEP_FILES_OPEN_THRESHOLD = 200;
120 138 static int constexpr DEFAULT_OI_MIN_WIDTH = 128;
121 139 static int constexpr DEFAULT_OI_MIN_HEIGHT = 128;
122 140 static int constexpr DEFAULT_OI_MIN_AREA = 16384;
... ... @@ -137,8 +155,6 @@ class QPDFJob::Members
137 155 bool suppress_warnings{false};
138 156 bool warnings_exit_zero{false};
139 157 bool copy_encryption{false};
140   - std::string encryption_file;
141   - std::string encryption_file_password;
142 158 bool encrypt{false};
143 159 bool password_is_hex_key{false};
144 160 bool suppress_password_recovery{false};
... ... @@ -182,9 +198,6 @@ class QPDFJob::Members
182 198 bool qdf_mode{false};
183 199 bool preserve_unreferenced_objects{false};
184 200 remove_unref_e remove_unreferenced_page_resources{re_auto};
185   - bool keep_files_open{true};
186   - bool keep_files_open_set{false};
187   - size_t keep_files_open_threshold{DEFAULT_KEEP_FILES_OPEN_THRESHOLD};
188 201 bool newline_before_endstream{false};
189 202 std::string linearize_pass1;
190 203 bool coalesce_contents{false};
... ...