Commit fe769f2723237596af2730bccd7972dab208a10d

Authored by Jay Berkenbilt
1 parent 4f4c627b

Keep file open while adding its pages during merge (fixes #217)

Showing 2 changed files with 32 additions and 3 deletions
ChangeLog
1 2018-08-04 Jay Berkenbilt <ejb@ql.org> 1 2018-08-04 Jay Berkenbilt <ejb@ql.org>
2 2
  3 + * Performance fix: optimize page merging operation to avoid
  4 + unnecessary open/close calls on files being merged. Fixes #217.
  5 +
3 * Add ClosedFileInputSource::stayOpen method, enabling a 6 * Add ClosedFileInputSource::stayOpen method, enabling a
4 ClosedFileInputSource to stay open during manually indicated 7 ClosedFileInputSource to stay open during manually indicated
5 periods of high activity, thus reducing the overhead of frequent 8 periods of high activity, thus reducing the overhead of frequent
qpdf/qpdf.cc
@@ -2103,6 +2103,7 @@ static void handle_page_specs(QPDF&amp; pdf, Options&amp; o, @@ -2103,6 +2103,7 @@ static void handle_page_specs(QPDF&amp; pdf, Options&amp; o,
2103 2103
2104 // Create a QPDF object for each file that we may take pages from. 2104 // Create a QPDF object for each file that we may take pages from.
2105 std::map<std::string, QPDF*> page_spec_qpdfs; 2105 std::map<std::string, QPDF*> page_spec_qpdfs;
  2106 + std::map<std::string, ClosedFileInputSource*> page_spec_cfis;
2106 page_spec_qpdfs[o.infilename] = &pdf; 2107 page_spec_qpdfs[o.infilename] = &pdf;
2107 std::vector<QPDFPageData> parsed_specs; 2108 std::vector<QPDFPageData> parsed_specs;
2108 for (std::vector<PageSpec>::iterator iter = o.page_specs.begin(); 2109 for (std::vector<PageSpec>::iterator iter = o.page_specs.begin();
@@ -2136,10 +2137,14 @@ static void handle_page_specs(QPDF&amp; pdf, Options&amp; o, @@ -2136,10 +2137,14 @@ static void handle_page_specs(QPDF&amp; pdf, Options&amp; o,
2136 std::cout << whoami << ": processing " 2137 std::cout << whoami << ": processing "
2137 << page_spec.filename << std::endl; 2138 << page_spec.filename << std::endl;
2138 } 2139 }
2139 - qpdf->processInputSource(  
2140 - new ClosedFileInputSource(  
2141 - page_spec.filename.c_str()), password); 2140 + ClosedFileInputSource* cis =
  2141 + new ClosedFileInputSource(page_spec.filename.c_str());
  2142 + PointerHolder<InputSource> is(cis);
  2143 + cis->stayOpen(true);
  2144 + qpdf->processInputSource(is, password);
  2145 + cis->stayOpen(false);
2142 page_spec_qpdfs[page_spec.filename] = qpdf; 2146 page_spec_qpdfs[page_spec.filename] = qpdf;
  2147 + page_spec_cfis[page_spec.filename] = cis;
2143 } 2148 }
2144 2149
2145 // Read original pages from the PDF, and parse the page range 2150 // Read original pages from the PDF, and parse the page range
@@ -2156,9 +2161,20 @@ static void handle_page_specs(QPDF&amp; pdf, Options&amp; o, @@ -2156,9 +2161,20 @@ static void handle_page_specs(QPDF&amp; pdf, Options&amp; o,
2156 page_spec_qpdfs.begin(); 2161 page_spec_qpdfs.begin();
2157 iter != page_spec_qpdfs.end(); ++iter) 2162 iter != page_spec_qpdfs.end(); ++iter)
2158 { 2163 {
  2164 + std::string const& filename = (*iter).first;
  2165 + ClosedFileInputSource* cis = 0;
  2166 + if (page_spec_cfis.count(filename))
  2167 + {
  2168 + cis = page_spec_cfis[filename];
  2169 + cis->stayOpen(true);
  2170 + }
2159 QPDFPageDocumentHelper dh(*((*iter).second)); 2171 QPDFPageDocumentHelper dh(*((*iter).second));
2160 dh.pushInheritedAttributesToPage(); 2172 dh.pushInheritedAttributesToPage();
2161 dh.removeUnreferencedResources(); 2173 dh.removeUnreferencedResources();
  2174 + if (cis)
  2175 + {
  2176 + cis->stayOpen(false);
  2177 + }
2162 } 2178 }
2163 } 2179 }
2164 2180
@@ -2204,7 +2220,17 @@ static void handle_page_specs(QPDF&amp; pdf, Options&amp; o, @@ -2204,7 +2220,17 @@ static void handle_page_specs(QPDF&amp; pdf, Options&amp; o,
2204 // Pages are specified from 1 but numbered from 0 in the 2220 // Pages are specified from 1 but numbered from 0 in the
2205 // vector 2221 // vector
2206 int pageno = *pageno_iter - 1; 2222 int pageno = *pageno_iter - 1;
  2223 + ClosedFileInputSource* cis = 0;
  2224 + if (page_spec_cfis.count(page_data.filename))
  2225 + {
  2226 + cis = page_spec_cfis[page_data.filename];
  2227 + cis->stayOpen(true);
  2228 + }
2207 dh.addPage(page_data.orig_pages.at(pageno), false); 2229 dh.addPage(page_data.orig_pages.at(pageno), false);
  2230 + if (cis)
  2231 + {
  2232 + cis->stayOpen(false);
  2233 + }
2208 if (page_data.qpdf == &pdf) 2234 if (page_data.qpdf == &pdf)
2209 { 2235 {
2210 // This is a page from the original file. Keep track 2236 // This is a page from the original file. Keep track