Commit 7aab94c82edb4e0a737e29b428e99126c87bb528

Authored by m-holger
1 parent b56890db

Refactor `QPDFJob`: move file-related logic to `Inputs`, consolidate file proces…

…sing, and simplify page specification handling.
libqpdf/QPDFJob.cc
@@ -257,7 +257,7 @@ struct QPDFJob::PageNo @@ -257,7 +257,7 @@ struct QPDFJob::PageNo
257 }; 257 };
258 258
259 QPDFJob::QPDFJob() : 259 QPDFJob::QPDFJob() :
260 - m(std::make_shared<Members>()) 260 + m(std::make_shared<Members>(*this))
261 { 261 {
262 } 262 }
263 263
@@ -2342,6 +2342,63 @@ QPDFJob::new_selection( @@ -2342,6 +2342,63 @@ QPDFJob::new_selection(
2342 m->selections.emplace_back(filename, password, range); 2342 m->selections.emplace_back(filename, password, range);
2343 } 2343 }
2344 2344
  2345 +void
  2346 +QPDFJob::Inputs::process(std::string const& filename, QPDFJob::Input& input)
  2347 +{
  2348 + // Open the PDF file and store the QPDF object. Do not canonicalize the file name. Using two
  2349 + // different paths to refer to the same file is a documented workaround for duplicating a page.
  2350 + // If you are using this an example of how to do this with the API, you can just create two
  2351 + // different QPDF objects to the same underlying file with the same path to achieve the
  2352 + // same effect.
  2353 + auto password = input.password;
  2354 + if (!encryption_file.empty() && password.empty() && filename == encryption_file) {
  2355 + password = encryption_file_password;
  2356 + }
  2357 + job.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
  2358 + v << prefix << ": processing " << filename << "\n";
  2359 + });
  2360 + if (!keep_files_open) {
  2361 + auto cis = std::make_shared<ClosedFileInputSource>(filename.data());
  2362 + input.cfis = cis.get();
  2363 + input.cfis->stayOpen(true);
  2364 + job.processInputSource(input.qpdf_p, cis, password.data(), true);
  2365 + } else {
  2366 + job.processInputSource(
  2367 + input.qpdf_p,
  2368 + std::make_shared<FileInputSource>(filename.data()),
  2369 + password.data(),
  2370 + true);
  2371 + }
  2372 + input.qpdf = input.qpdf_p.get();
  2373 + input.orig_pages = input.qpdf->getAllPages();
  2374 + input.n_pages = QIntC::to_int(input.orig_pages.size());
  2375 + if (input.cfis) {
  2376 + input.cfis->stayOpen(false);
  2377 + }
  2378 +}
  2379 +
  2380 +void
  2381 +QPDFJob::Inputs::process_all()
  2382 +{
  2383 + if (!keep_files_open_set) {
  2384 + // Count the number of distinct files to determine whether we should keep files open or not.
  2385 + // Rather than trying to code some portable heuristic based on OS limits, just hard-code
  2386 + // this at a given number and allow users to override.
  2387 + keep_files_open = files.size() <= keep_files_open_threshold;
  2388 + QTC::TC("qpdf", "QPDFJob automatically set keep files open", keep_files_open ? 0 : 1);
  2389 + job.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
  2390 + v << prefix << ": selecting --keep-open-files=" << (keep_files_open ? "y" : "n")
  2391 + << "\n";
  2392 + });
  2393 + }
  2394 +
  2395 + for (auto& [filename, input]: files) {
  2396 + if (!input.qpdf) {
  2397 + process(filename, input);
  2398 + }
  2399 + }
  2400 +}
  2401 +
2345 // Handle all page specifications. Return true if it succeeded without warnings. 2402 // Handle all page specifications. Return true if it succeeded without warnings.
2346 bool 2403 bool
2347 QPDFJob::handlePageSpecs(QPDF& pdf) 2404 QPDFJob::handlePageSpecs(QPDF& pdf)
@@ -2369,56 +2426,7 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf) @@ -2369,56 +2426,7 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2369 } 2426 }
2370 } 2427 }
2371 2428
2372 - if (!m->keep_files_open_set) {  
2373 - // Count the number of distinct files to determine whether we should keep files open or not.  
2374 - // Rather than trying to code some portable heuristic based on OS limits, just hard-code  
2375 - // this at a given number and allow users to override.  
2376 - m->keep_files_open = m->inputs.files.size() <= m->keep_files_open_threshold;  
2377 - QTC::TC("qpdf", "QPDFJob automatically set keep files open", m->keep_files_open ? 0 : 1);  
2378 - doIfVerbose([&](Pipeline& v, std::string const& prefix) {  
2379 - v << prefix << ": selecting --keep-open-files=" << (m->keep_files_open ? "y" : "n")  
2380 - << "\n";  
2381 - });  
2382 - }  
2383 -  
2384 - // Create a QPDF object for each file that we may take pages from.  
2385 - for (auto& [filename, input]: m->inputs.files) {  
2386 - if (!input.qpdf) {  
2387 - // Open the PDF file and store the QPDF object. Throw a std::shared_ptr to the qpdf into  
2388 - // a heap so that it survives through copying to the output but gets cleaned up  
2389 - // automatically at the end. Do not canonicalize the file name. Using two different  
2390 - // paths to refer to the same file is a documented workaround for duplicating a page. If  
2391 - // you are using this an example of how to do this with the API, you can just create two  
2392 - // different QPDF objects to the same underlying file with the same path to achieve the  
2393 - // same effect.  
2394 - auto password = input.password;  
2395 - if (!m->encryption_file.empty() && password.empty() && filename == m->encryption_file) {  
2396 - password = m->encryption_file_password;  
2397 - }  
2398 - doIfVerbose([&](Pipeline& v, std::string const& prefix) {  
2399 - v << prefix << ": processing " << filename << "\n";  
2400 - });  
2401 - if (!m->keep_files_open) {  
2402 - auto cis = std::make_shared<ClosedFileInputSource>(filename.data());  
2403 - cis->stayOpen(true);  
2404 - processInputSource(input.qpdf_p, cis, password.data(), true);  
2405 - cis->stayOpen(false);  
2406 - input.cfis = cis.get();  
2407 - } else {  
2408 - processInputSource(  
2409 - input.qpdf_p,  
2410 - std::make_shared<FileInputSource>(filename.data()),  
2411 - password.data(),  
2412 - true);  
2413 - }  
2414 - input.qpdf = input.qpdf_p.get();  
2415 - input.orig_pages = input.qpdf->getAllPages();  
2416 - input.n_pages = QIntC::to_int(input.orig_pages.size());  
2417 - if (input.cfis) {  
2418 - input.cfis->stayOpen(false);  
2419 - }  
2420 - }  
2421 - } 2429 + m->inputs.process_all();
2422 2430
2423 std::map<unsigned long long, std::set<QPDFObjGen>> copied_pages; 2431 std::map<unsigned long long, std::set<QPDFObjGen>> copied_pages;
2424 for (auto& selection: m->selections) { 2432 for (auto& selection: m->selections) {
@@ -2427,8 +2435,7 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf) @@ -2427,8 +2435,7 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf)
2427 auto const& input = m->inputs.files[selection.filename]; 2435 auto const& input = m->inputs.files[selection.filename];
2428 selection.qpdf = input.qpdf; 2436 selection.qpdf = input.qpdf;
2429 try { 2437 try {
2430 - selection.selected_pages =  
2431 - QUtil::parse_numrange(selection.range.data(), input.n_pages); 2438 + selection.selected_pages = QUtil::parse_numrange(selection.range.data(), input.n_pages);
2432 } catch (std::runtime_error& e) { 2439 } catch (std::runtime_error& e) {
2433 throw std::runtime_error( 2440 throw std::runtime_error(
2434 "parsing numeric range for " + selection.filename + ": " + e.what()); 2441 "parsing numeric range for " + selection.filename + ": " + e.what());
@@ -2887,8 +2894,8 @@ QPDFJob::setWriterOptions(QPDFWriter&amp; w) @@ -2887,8 +2894,8 @@ QPDFJob::setWriterOptions(QPDFWriter&amp; w)
2887 std::unique_ptr<QPDF> encryption_pdf; 2894 std::unique_ptr<QPDF> encryption_pdf;
2888 processFile( 2895 processFile(
2889 encryption_pdf, 2896 encryption_pdf,
2890 - m->encryption_file.data(),  
2891 - m->encryption_file_password.data(), 2897 + m->inputs.encryption_file.data(),
  2898 + m->inputs.encryption_file_password.data(),
2892 false, 2899 false,
2893 false); 2900 false);
2894 w.copyEncryptionParameters(*encryption_pdf); 2901 w.copyEncryptionParameters(*encryption_pdf);
libqpdf/QPDFJob_config.cc
@@ -152,7 +152,7 @@ QPDFJob::Config::copyEncryption(std::string const&amp; parameter) @@ -152,7 +152,7 @@ QPDFJob::Config::copyEncryption(std::string const&amp; parameter)
152 if (o.m->deterministic_id) { 152 if (o.m->deterministic_id) {
153 usage("the deterministic-id option is incompatible with encrypted output files"); 153 usage("the deterministic-id option is incompatible with encrypted output files");
154 } 154 }
155 - o.m->encryption_file = parameter; 155 + o.m->inputs.encryption_file = parameter;
156 o.m->copy_encryption = true; 156 o.m->copy_encryption = true;
157 o.m->encrypt = false; 157 o.m->encrypt = false;
158 o.m->decrypt = false; 158 o.m->decrypt = false;
@@ -181,7 +181,7 @@ QPDFJob::Config::deterministicId() @@ -181,7 +181,7 @@ QPDFJob::Config::deterministicId()
181 QPDFJob::Config* 181 QPDFJob::Config*
182 QPDFJob::Config::encryptionFilePassword(std::string const& parameter) 182 QPDFJob::Config::encryptionFilePassword(std::string const& parameter)
183 { 183 {
184 - o.m->encryption_file_password = parameter; 184 + o.m->inputs.encryption_file_password = parameter;
185 return this; 185 return this;
186 } 186 }
187 187
@@ -354,15 +354,15 @@ QPDFJob::Config::testJsonSchema() @@ -354,15 +354,15 @@ QPDFJob::Config::testJsonSchema()
354 QPDFJob::Config* 354 QPDFJob::Config*
355 QPDFJob::Config::keepFilesOpen(std::string const& parameter) 355 QPDFJob::Config::keepFilesOpen(std::string const& parameter)
356 { 356 {
357 - o.m->keep_files_open_set = true;  
358 - o.m->keep_files_open = (parameter == "y"); 357 + o.m->inputs.keep_files_open_set = true;
  358 + o.m->inputs.keep_files_open = (parameter == "y");
359 return this; 359 return this;
360 } 360 }
361 361
362 QPDFJob::Config* 362 QPDFJob::Config*
363 QPDFJob::Config::keepFilesOpenThreshold(std::string const& parameter) 363 QPDFJob::Config::keepFilesOpenThreshold(std::string const& parameter)
364 { 364 {
365 - o.m->keep_files_open_threshold = QUtil::string_to_uint(parameter.c_str()); 365 + o.m->inputs.keep_files_open_threshold = QUtil::string_to_uint(parameter.c_str());
366 return this; 366 return this;
367 } 367 }
368 368
libqpdf/qpdf/QPDFJob_private.hh
@@ -49,7 +49,25 @@ struct QPDFJob::Input @@ -49,7 +49,25 @@ struct QPDFJob::Input
49 // All PDF input files for a job. 49 // All PDF input files for a job.
50 struct QPDFJob::Inputs 50 struct QPDFJob::Inputs
51 { 51 {
  52 + // These default values are duplicated in help and docs.
  53 + static int constexpr DEFAULT_KEEP_FILES_OPEN_THRESHOLD = 200;
  54 +
  55 + Inputs(QPDFJob& job) :
  56 + job(job)
  57 + {
  58 + }
  59 + void process(std::string const& filename, QPDFJob::Input& file_spec);
  60 + void process_all();
  61 + std::string encryption_file;
  62 + std::string encryption_file_password;
  63 + bool keep_files_open{true};
  64 + bool keep_files_open_set{false};
  65 + size_t keep_files_open_threshold{DEFAULT_KEEP_FILES_OPEN_THRESHOLD};
  66 +
52 std::map<std::string, Input> files; 67 std::map<std::string, Input> files;
  68 +
  69 + private:
  70 + QPDFJob& job;
53 }; 71 };
54 72
55 struct QPDFJob::RotationSpec 73 struct QPDFJob::RotationSpec
@@ -107,8 +125,9 @@ class QPDFJob::Members @@ -107,8 +125,9 @@ class QPDFJob::Members
107 friend class QPDFJob; 125 friend class QPDFJob;
108 126
109 public: 127 public:
110 - Members() :  
111 - log(QPDFLogger::defaultLogger()) 128 + Members(QPDFJob& job) :
  129 + log(QPDFLogger::defaultLogger()),
  130 + inputs(job)
112 { 131 {
113 } 132 }
114 Members(Members const&) = delete; 133 Members(Members const&) = delete;
@@ -116,7 +135,6 @@ class QPDFJob::Members @@ -116,7 +135,6 @@ class QPDFJob::Members
116 135
117 private: 136 private:
118 // These default values are duplicated in help and docs. 137 // These default values are duplicated in help and docs.
119 - static int constexpr DEFAULT_KEEP_FILES_OPEN_THRESHOLD = 200;  
120 static int constexpr DEFAULT_OI_MIN_WIDTH = 128; 138 static int constexpr DEFAULT_OI_MIN_WIDTH = 128;
121 static int constexpr DEFAULT_OI_MIN_HEIGHT = 128; 139 static int constexpr DEFAULT_OI_MIN_HEIGHT = 128;
122 static int constexpr DEFAULT_OI_MIN_AREA = 16384; 140 static int constexpr DEFAULT_OI_MIN_AREA = 16384;
@@ -137,8 +155,6 @@ class QPDFJob::Members @@ -137,8 +155,6 @@ class QPDFJob::Members
137 bool suppress_warnings{false}; 155 bool suppress_warnings{false};
138 bool warnings_exit_zero{false}; 156 bool warnings_exit_zero{false};
139 bool copy_encryption{false}; 157 bool copy_encryption{false};
140 - std::string encryption_file;  
141 - std::string encryption_file_password;  
142 bool encrypt{false}; 158 bool encrypt{false};
143 bool password_is_hex_key{false}; 159 bool password_is_hex_key{false};
144 bool suppress_password_recovery{false}; 160 bool suppress_password_recovery{false};
@@ -182,9 +198,6 @@ class QPDFJob::Members @@ -182,9 +198,6 @@ class QPDFJob::Members
182 bool qdf_mode{false}; 198 bool qdf_mode{false};
183 bool preserve_unreferenced_objects{false}; 199 bool preserve_unreferenced_objects{false};
184 remove_unref_e remove_unreferenced_page_resources{re_auto}; 200 remove_unref_e remove_unreferenced_page_resources{re_auto};
185 - bool keep_files_open{true};  
186 - bool keep_files_open_set{false};  
187 - size_t keep_files_open_threshold{DEFAULT_KEEP_FILES_OPEN_THRESHOLD};  
188 bool newline_before_endstream{false}; 201 bool newline_before_endstream{false};
189 std::string linearize_pass1; 202 std::string linearize_pass1;
190 bool coalesce_contents{false}; 203 bool coalesce_contents{false};