Commit 7aab94c82edb4e0a737e29b428e99126c87bb528
1 parent
b56890db
Refactor `QPDFJob`: move file-related logic to `Inputs`, consolidate file proces…
…sing, and simplify page specification handling.
Showing
3 changed files
with
88 additions
and
68 deletions
libqpdf/QPDFJob.cc
| ... | ... | @@ -257,7 +257,7 @@ struct QPDFJob::PageNo |
| 257 | 257 | }; |
| 258 | 258 | |
| 259 | 259 | QPDFJob::QPDFJob() : |
| 260 | - m(std::make_shared<Members>()) | |
| 260 | + m(std::make_shared<Members>(*this)) | |
| 261 | 261 | { |
| 262 | 262 | } |
| 263 | 263 | |
| ... | ... | @@ -2342,6 +2342,63 @@ QPDFJob::new_selection( |
| 2342 | 2342 | m->selections.emplace_back(filename, password, range); |
| 2343 | 2343 | } |
| 2344 | 2344 | |
| 2345 | +void | |
| 2346 | +QPDFJob::Inputs::process(std::string const& filename, QPDFJob::Input& input) | |
| 2347 | +{ | |
| 2348 | + // Open the PDF file and store the QPDF object. Do not canonicalize the file name. Using two | |
| 2349 | + // different paths to refer to the same file is a documented workaround for duplicating a page. | |
| 2350 | + // If you are using this an example of how to do this with the API, you can just create two | |
| 2351 | + // different QPDF objects to the same underlying file with the same path to achieve the | |
| 2352 | + // same effect. | |
| 2353 | + auto password = input.password; | |
| 2354 | + if (!encryption_file.empty() && password.empty() && filename == encryption_file) { | |
| 2355 | + password = encryption_file_password; | |
| 2356 | + } | |
| 2357 | + job.doIfVerbose([&](Pipeline& v, std::string const& prefix) { | |
| 2358 | + v << prefix << ": processing " << filename << "\n"; | |
| 2359 | + }); | |
| 2360 | + if (!keep_files_open) { | |
| 2361 | + auto cis = std::make_shared<ClosedFileInputSource>(filename.data()); | |
| 2362 | + input.cfis = cis.get(); | |
| 2363 | + input.cfis->stayOpen(true); | |
| 2364 | + job.processInputSource(input.qpdf_p, cis, password.data(), true); | |
| 2365 | + } else { | |
| 2366 | + job.processInputSource( | |
| 2367 | + input.qpdf_p, | |
| 2368 | + std::make_shared<FileInputSource>(filename.data()), | |
| 2369 | + password.data(), | |
| 2370 | + true); | |
| 2371 | + } | |
| 2372 | + input.qpdf = input.qpdf_p.get(); | |
| 2373 | + input.orig_pages = input.qpdf->getAllPages(); | |
| 2374 | + input.n_pages = QIntC::to_int(input.orig_pages.size()); | |
| 2375 | + if (input.cfis) { | |
| 2376 | + input.cfis->stayOpen(false); | |
| 2377 | + } | |
| 2378 | +} | |
| 2379 | + | |
| 2380 | +void | |
| 2381 | +QPDFJob::Inputs::process_all() | |
| 2382 | +{ | |
| 2383 | + if (!keep_files_open_set) { | |
| 2384 | + // Count the number of distinct files to determine whether we should keep files open or not. | |
| 2385 | + // Rather than trying to code some portable heuristic based on OS limits, just hard-code | |
| 2386 | + // this at a given number and allow users to override. | |
| 2387 | + keep_files_open = files.size() <= keep_files_open_threshold; | |
| 2388 | + QTC::TC("qpdf", "QPDFJob automatically set keep files open", keep_files_open ? 0 : 1); | |
| 2389 | + job.doIfVerbose([&](Pipeline& v, std::string const& prefix) { | |
| 2390 | + v << prefix << ": selecting --keep-open-files=" << (keep_files_open ? "y" : "n") | |
| 2391 | + << "\n"; | |
| 2392 | + }); | |
| 2393 | + } | |
| 2394 | + | |
| 2395 | + for (auto& [filename, input]: files) { | |
| 2396 | + if (!input.qpdf) { | |
| 2397 | + process(filename, input); | |
| 2398 | + } | |
| 2399 | + } | |
| 2400 | +} | |
| 2401 | + | |
| 2345 | 2402 | // Handle all page specifications. Return true if it succeeded without warnings. |
| 2346 | 2403 | bool |
| 2347 | 2404 | QPDFJob::handlePageSpecs(QPDF& pdf) |
| ... | ... | @@ -2369,56 +2426,7 @@ QPDFJob::handlePageSpecs(QPDF& pdf) |
| 2369 | 2426 | } |
| 2370 | 2427 | } |
| 2371 | 2428 | |
| 2372 | - if (!m->keep_files_open_set) { | |
| 2373 | - // Count the number of distinct files to determine whether we should keep files open or not. | |
| 2374 | - // Rather than trying to code some portable heuristic based on OS limits, just hard-code | |
| 2375 | - // this at a given number and allow users to override. | |
| 2376 | - m->keep_files_open = m->inputs.files.size() <= m->keep_files_open_threshold; | |
| 2377 | - QTC::TC("qpdf", "QPDFJob automatically set keep files open", m->keep_files_open ? 0 : 1); | |
| 2378 | - doIfVerbose([&](Pipeline& v, std::string const& prefix) { | |
| 2379 | - v << prefix << ": selecting --keep-open-files=" << (m->keep_files_open ? "y" : "n") | |
| 2380 | - << "\n"; | |
| 2381 | - }); | |
| 2382 | - } | |
| 2383 | - | |
| 2384 | - // Create a QPDF object for each file that we may take pages from. | |
| 2385 | - for (auto& [filename, input]: m->inputs.files) { | |
| 2386 | - if (!input.qpdf) { | |
| 2387 | - // Open the PDF file and store the QPDF object. Throw a std::shared_ptr to the qpdf into | |
| 2388 | - // a heap so that it survives through copying to the output but gets cleaned up | |
| 2389 | - // automatically at the end. Do not canonicalize the file name. Using two different | |
| 2390 | - // paths to refer to the same file is a documented workaround for duplicating a page. If | |
| 2391 | - // you are using this an example of how to do this with the API, you can just create two | |
| 2392 | - // different QPDF objects to the same underlying file with the same path to achieve the | |
| 2393 | - // same effect. | |
| 2394 | - auto password = input.password; | |
| 2395 | - if (!m->encryption_file.empty() && password.empty() && filename == m->encryption_file) { | |
| 2396 | - password = m->encryption_file_password; | |
| 2397 | - } | |
| 2398 | - doIfVerbose([&](Pipeline& v, std::string const& prefix) { | |
| 2399 | - v << prefix << ": processing " << filename << "\n"; | |
| 2400 | - }); | |
| 2401 | - if (!m->keep_files_open) { | |
| 2402 | - auto cis = std::make_shared<ClosedFileInputSource>(filename.data()); | |
| 2403 | - cis->stayOpen(true); | |
| 2404 | - processInputSource(input.qpdf_p, cis, password.data(), true); | |
| 2405 | - cis->stayOpen(false); | |
| 2406 | - input.cfis = cis.get(); | |
| 2407 | - } else { | |
| 2408 | - processInputSource( | |
| 2409 | - input.qpdf_p, | |
| 2410 | - std::make_shared<FileInputSource>(filename.data()), | |
| 2411 | - password.data(), | |
| 2412 | - true); | |
| 2413 | - } | |
| 2414 | - input.qpdf = input.qpdf_p.get(); | |
| 2415 | - input.orig_pages = input.qpdf->getAllPages(); | |
| 2416 | - input.n_pages = QIntC::to_int(input.orig_pages.size()); | |
| 2417 | - if (input.cfis) { | |
| 2418 | - input.cfis->stayOpen(false); | |
| 2419 | - } | |
| 2420 | - } | |
| 2421 | - } | |
| 2429 | + m->inputs.process_all(); | |
| 2422 | 2430 | |
| 2423 | 2431 | std::map<unsigned long long, std::set<QPDFObjGen>> copied_pages; |
| 2424 | 2432 | for (auto& selection: m->selections) { |
| ... | ... | @@ -2427,8 +2435,7 @@ QPDFJob::handlePageSpecs(QPDF& pdf) |
| 2427 | 2435 | auto const& input = m->inputs.files[selection.filename]; |
| 2428 | 2436 | selection.qpdf = input.qpdf; |
| 2429 | 2437 | try { |
| 2430 | - selection.selected_pages = | |
| 2431 | - QUtil::parse_numrange(selection.range.data(), input.n_pages); | |
| 2438 | + selection.selected_pages = QUtil::parse_numrange(selection.range.data(), input.n_pages); | |
| 2432 | 2439 | } catch (std::runtime_error& e) { |
| 2433 | 2440 | throw std::runtime_error( |
| 2434 | 2441 | "parsing numeric range for " + selection.filename + ": " + e.what()); |
| ... | ... | @@ -2887,8 +2894,8 @@ QPDFJob::setWriterOptions(QPDFWriter& w) |
| 2887 | 2894 | std::unique_ptr<QPDF> encryption_pdf; |
| 2888 | 2895 | processFile( |
| 2889 | 2896 | encryption_pdf, |
| 2890 | - m->encryption_file.data(), | |
| 2891 | - m->encryption_file_password.data(), | |
| 2897 | + m->inputs.encryption_file.data(), | |
| 2898 | + m->inputs.encryption_file_password.data(), | |
| 2892 | 2899 | false, |
| 2893 | 2900 | false); |
| 2894 | 2901 | w.copyEncryptionParameters(*encryption_pdf); | ... | ... |
libqpdf/QPDFJob_config.cc
| ... | ... | @@ -152,7 +152,7 @@ QPDFJob::Config::copyEncryption(std::string const& parameter) |
| 152 | 152 | if (o.m->deterministic_id) { |
| 153 | 153 | usage("the deterministic-id option is incompatible with encrypted output files"); |
| 154 | 154 | } |
| 155 | - o.m->encryption_file = parameter; | |
| 155 | + o.m->inputs.encryption_file = parameter; | |
| 156 | 156 | o.m->copy_encryption = true; |
| 157 | 157 | o.m->encrypt = false; |
| 158 | 158 | o.m->decrypt = false; |
| ... | ... | @@ -181,7 +181,7 @@ QPDFJob::Config::deterministicId() |
| 181 | 181 | QPDFJob::Config* |
| 182 | 182 | QPDFJob::Config::encryptionFilePassword(std::string const& parameter) |
| 183 | 183 | { |
| 184 | - o.m->encryption_file_password = parameter; | |
| 184 | + o.m->inputs.encryption_file_password = parameter; | |
| 185 | 185 | return this; |
| 186 | 186 | } |
| 187 | 187 | |
| ... | ... | @@ -354,15 +354,15 @@ QPDFJob::Config::testJsonSchema() |
| 354 | 354 | QPDFJob::Config* |
| 355 | 355 | QPDFJob::Config::keepFilesOpen(std::string const& parameter) |
| 356 | 356 | { |
| 357 | - o.m->keep_files_open_set = true; | |
| 358 | - o.m->keep_files_open = (parameter == "y"); | |
| 357 | + o.m->inputs.keep_files_open_set = true; | |
| 358 | + o.m->inputs.keep_files_open = (parameter == "y"); | |
| 359 | 359 | return this; |
| 360 | 360 | } |
| 361 | 361 | |
| 362 | 362 | QPDFJob::Config* |
| 363 | 363 | QPDFJob::Config::keepFilesOpenThreshold(std::string const& parameter) |
| 364 | 364 | { |
| 365 | - o.m->keep_files_open_threshold = QUtil::string_to_uint(parameter.c_str()); | |
| 365 | + o.m->inputs.keep_files_open_threshold = QUtil::string_to_uint(parameter.c_str()); | |
| 366 | 366 | return this; |
| 367 | 367 | } |
| 368 | 368 | ... | ... |
libqpdf/qpdf/QPDFJob_private.hh
| ... | ... | @@ -49,7 +49,25 @@ struct QPDFJob::Input |
| 49 | 49 | // All PDF input files for a job. |
| 50 | 50 | struct QPDFJob::Inputs |
| 51 | 51 | { |
| 52 | + // These default values are duplicated in help and docs. | |
| 53 | + static int constexpr DEFAULT_KEEP_FILES_OPEN_THRESHOLD = 200; | |
| 54 | + | |
| 55 | + Inputs(QPDFJob& job) : | |
| 56 | + job(job) | |
| 57 | + { | |
| 58 | + } | |
| 59 | + void process(std::string const& filename, QPDFJob::Input& file_spec); | |
| 60 | + void process_all(); | |
| 61 | + std::string encryption_file; | |
| 62 | + std::string encryption_file_password; | |
| 63 | + bool keep_files_open{true}; | |
| 64 | + bool keep_files_open_set{false}; | |
| 65 | + size_t keep_files_open_threshold{DEFAULT_KEEP_FILES_OPEN_THRESHOLD}; | |
| 66 | + | |
| 52 | 67 | std::map<std::string, Input> files; |
| 68 | + | |
| 69 | + private: | |
| 70 | + QPDFJob& job; | |
| 53 | 71 | }; |
| 54 | 72 | |
| 55 | 73 | struct QPDFJob::RotationSpec |
| ... | ... | @@ -107,8 +125,9 @@ class QPDFJob::Members |
| 107 | 125 | friend class QPDFJob; |
| 108 | 126 | |
| 109 | 127 | public: |
| 110 | - Members() : | |
| 111 | - log(QPDFLogger::defaultLogger()) | |
| 128 | + Members(QPDFJob& job) : | |
| 129 | + log(QPDFLogger::defaultLogger()), | |
| 130 | + inputs(job) | |
| 112 | 131 | { |
| 113 | 132 | } |
| 114 | 133 | Members(Members const&) = delete; |
| ... | ... | @@ -116,7 +135,6 @@ class QPDFJob::Members |
| 116 | 135 | |
| 117 | 136 | private: |
| 118 | 137 | // These default values are duplicated in help and docs. |
| 119 | - static int constexpr DEFAULT_KEEP_FILES_OPEN_THRESHOLD = 200; | |
| 120 | 138 | static int constexpr DEFAULT_OI_MIN_WIDTH = 128; |
| 121 | 139 | static int constexpr DEFAULT_OI_MIN_HEIGHT = 128; |
| 122 | 140 | static int constexpr DEFAULT_OI_MIN_AREA = 16384; |
| ... | ... | @@ -137,8 +155,6 @@ class QPDFJob::Members |
| 137 | 155 | bool suppress_warnings{false}; |
| 138 | 156 | bool warnings_exit_zero{false}; |
| 139 | 157 | bool copy_encryption{false}; |
| 140 | - std::string encryption_file; | |
| 141 | - std::string encryption_file_password; | |
| 142 | 158 | bool encrypt{false}; |
| 143 | 159 | bool password_is_hex_key{false}; |
| 144 | 160 | bool suppress_password_recovery{false}; |
| ... | ... | @@ -182,9 +198,6 @@ class QPDFJob::Members |
| 182 | 198 | bool qdf_mode{false}; |
| 183 | 199 | bool preserve_unreferenced_objects{false}; |
| 184 | 200 | remove_unref_e remove_unreferenced_page_resources{re_auto}; |
| 185 | - bool keep_files_open{true}; | |
| 186 | - bool keep_files_open_set{false}; | |
| 187 | - size_t keep_files_open_threshold{DEFAULT_KEEP_FILES_OPEN_THRESHOLD}; | |
| 188 | 201 | bool newline_before_endstream{false}; |
| 189 | 202 | std::string linearize_pass1; |
| 190 | 203 | bool coalesce_contents{false}; | ... | ... |