Commit 1e766dcda26886137cb70fa02de631a1cbb956a0

Authored by Jay Berkenbilt
1 parent 81904623

Add --remove-unreferenced-resources option

ChangeLog
  1 +2020-04-04 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Add new option --remove-unreferenced-resources that takes auto,
  4 + yes, or no as options. This tells qpdf whether to attempt to
  5 + remove unreferenced resources from pages when doing page splitting
  6 + operations. Prior to this change, the default was to attempt to
  7 + remove unreferenced resources, but this operation was very slow,
  8 + especially for large and complex files. The new default is "auto",
  9 + which tells qpdf to analyze the file for shared resources. This is
  10 + a relatively quick test. If no shared resources are found, then we
  11 + don't attempt to remove unreferenced resources, because
  12 + unreferenced resources never occur in files without shared
  13 + resources. To force qpdf to look for and remove unreferenced
  14 + resources, use --remove-unreferenced-resources=yes. The option
  15 + --preserve-unreferenced-resources is now a synonym for
  16 + --remove-unreferenced-resources=no.
  17 +
1 2020-04-03 Jay Berkenbilt <ejb@ql.org> 18 2020-04-03 Jay Berkenbilt <ejb@ql.org>
2 19
3 * Allow qpdf to be built on systems without wchar_t. All "normal" 20 * Allow qpdf to be built on systems without wchar_t. All "normal"
qpdf/qpdf.cc
@@ -94,6 +94,8 @@ struct UnderOverlay @@ -94,6 +94,8 @@ struct UnderOverlay
94 std::vector<int> repeat_pagenos; 94 std::vector<int> repeat_pagenos;
95 }; 95 };
96 96
  97 +enum remove_unref_e { re_auto, re_yes, re_no };
  98 +
97 struct Options 99 struct Options
98 { 100 {
99 Options() : 101 Options() :
@@ -144,7 +146,7 @@ struct Options @@ -144,7 +146,7 @@ struct Options
144 ignore_xref_streams(false), 146 ignore_xref_streams(false),
145 qdf_mode(false), 147 qdf_mode(false),
146 preserve_unreferenced_objects(false), 148 preserve_unreferenced_objects(false),
147 - preserve_unreferenced_page_resources(false), 149 + remove_unreferenced_page_resources(re_auto),
148 keep_files_open(true), 150 keep_files_open(true),
149 keep_files_open_set(false), 151 keep_files_open_set(false),
150 keep_files_open_threshold(200), // default known in help and docs 152 keep_files_open_threshold(200), // default known in help and docs
@@ -243,7 +245,7 @@ struct Options @@ -243,7 +245,7 @@ struct Options
243 bool ignore_xref_streams; 245 bool ignore_xref_streams;
244 bool qdf_mode; 246 bool qdf_mode;
245 bool preserve_unreferenced_objects; 247 bool preserve_unreferenced_objects;
246 - bool preserve_unreferenced_page_resources; 248 + remove_unref_e remove_unreferenced_page_resources;
247 bool keep_files_open; 249 bool keep_files_open;
248 bool keep_files_open_set; 250 bool keep_files_open_set;
249 size_t keep_files_open_threshold; 251 size_t keep_files_open_threshold;
@@ -739,6 +741,7 @@ class ArgParser @@ -739,6 +741,7 @@ class ArgParser
739 void argQdf(); 741 void argQdf();
740 void argPreserveUnreferenced(); 742 void argPreserveUnreferenced();
741 void argPreserveUnreferencedResources(); 743 void argPreserveUnreferencedResources();
  744 + void argRemoveUnreferencedResources(char* parameter);
742 void argKeepFilesOpen(char* parameter); 745 void argKeepFilesOpen(char* parameter);
743 void argKeepFilesOpenThreshold(char* parameter); 746 void argKeepFilesOpenThreshold(char* parameter);
744 void argNewlineBeforeEndstream(); 747 void argNewlineBeforeEndstream();
@@ -970,6 +973,10 @@ ArgParser::initOptionTable() @@ -970,6 +973,10 @@ ArgParser::initOptionTable()
970 &ArgParser::argPreserveUnreferenced); 973 &ArgParser::argPreserveUnreferenced);
971 (*t)["preserve-unreferenced-resources"] = oe_bare( 974 (*t)["preserve-unreferenced-resources"] = oe_bare(
972 &ArgParser::argPreserveUnreferencedResources); 975 &ArgParser::argPreserveUnreferencedResources);
  976 + char const* remove_unref_choices[] = {
  977 + "auto", "yes", "no", 0};
  978 + (*t)["remove-unreferenced-resources"] = oe_requiredChoices(
  979 + &ArgParser::argRemoveUnreferencedResources, remove_unref_choices);
973 (*t)["keep-files-open"] = oe_requiredChoices( 980 (*t)["keep-files-open"] = oe_requiredChoices(
974 &ArgParser::argKeepFilesOpen, yn); 981 &ArgParser::argKeepFilesOpen, yn);
975 (*t)["keep-files-open-threshold"] = oe_requiredParameter( 982 (*t)["keep-files-open-threshold"] = oe_requiredParameter(
@@ -1459,7 +1466,9 @@ ArgParser::argHelp() @@ -1459,7 +1466,9 @@ ArgParser::argHelp()
1459 << "--object-streams=mode controls handing of object streams\n" 1466 << "--object-streams=mode controls handing of object streams\n"
1460 << "--preserve-unreferenced preserve unreferenced objects\n" 1467 << "--preserve-unreferenced preserve unreferenced objects\n"
1461 << "--preserve-unreferenced-resources\n" 1468 << "--preserve-unreferenced-resources\n"
1462 - << " preserve unreferenced page resources\n" 1469 + << " synonym for --remove-unreferenced-resources=no\n"
  1470 + << "--remove-unreferenced-resources={auto,yes,no}\n"
  1471 + << " whether to remove unreferenced page resources\n"
1463 << "--newline-before-endstream always put a newline before endstream\n" 1472 << "--newline-before-endstream always put a newline before endstream\n"
1464 << "--coalesce-contents force all pages' content to be a single stream\n" 1473 << "--coalesce-contents force all pages' content to be a single stream\n"
1465 << "--flatten-annotations=option\n" 1474 << "--flatten-annotations=option\n"
@@ -1973,7 +1982,30 @@ ArgParser::argPreserveUnreferenced() @@ -1973,7 +1982,30 @@ ArgParser::argPreserveUnreferenced()
1973 void 1982 void
1974 ArgParser::argPreserveUnreferencedResources() 1983 ArgParser::argPreserveUnreferencedResources()
1975 { 1984 {
1976 - o.preserve_unreferenced_page_resources = true; 1985 + o.remove_unreferenced_page_resources = re_no;
  1986 +}
  1987 +
  1988 +void
  1989 +ArgParser::argRemoveUnreferencedResources(char* parameter)
  1990 +{
  1991 + if (strcmp(parameter, "auto") == 0)
  1992 + {
  1993 + o.remove_unreferenced_page_resources = re_auto;
  1994 + }
  1995 + else if (strcmp(parameter, "yes") == 0)
  1996 + {
  1997 + o.remove_unreferenced_page_resources = re_yes;
  1998 + }
  1999 + else if (strcmp(parameter, "no") == 0)
  2000 + {
  2001 + o.remove_unreferenced_page_resources = re_no;
  2002 + }
  2003 + else
  2004 + {
  2005 + // If this happens, it means remove_unref_choices in
  2006 + // ArgParser::initOptionTable is wrong.
  2007 + usage("invalid value for --remove-unreferenced-page-resources");
  2008 + }
1977 } 2009 }
1978 2010
1979 void 2011 void
@@ -4838,7 +4870,7 @@ static void handle_page_specs(QPDF&amp; pdf, Options&amp; o) @@ -4838,7 +4870,7 @@ static void handle_page_specs(QPDF&amp; pdf, Options&amp; o)
4838 page_spec.range)); 4870 page_spec.range));
4839 } 4871 }
4840 4872
4841 - if (! o.preserve_unreferenced_page_resources) 4873 + if (o.remove_unreferenced_page_resources != re_no)
4842 { 4874 {
4843 for (std::map<std::string, QPDF*>::iterator iter = 4875 for (std::map<std::string, QPDF*>::iterator iter =
4844 page_spec_qpdfs.begin(); 4876 page_spec_qpdfs.begin();
@@ -5336,7 +5368,7 @@ static void do_split_pages(QPDF&amp; pdf, Options&amp; o) @@ -5336,7 +5368,7 @@ static void do_split_pages(QPDF&amp; pdf, Options&amp; o)
5336 before = std::string(o.outfilename) + "-"; 5368 before = std::string(o.outfilename) + "-";
5337 } 5369 }
5338 5370
5339 - if (! o.preserve_unreferenced_page_resources) 5371 + if (o.remove_unreferenced_page_resources != re_no)
5340 { 5372 {
5341 QPDFPageDocumentHelper dh(pdf); 5373 QPDFPageDocumentHelper dh(pdf);
5342 dh.removeUnreferencedResources(); 5374 dh.removeUnreferencedResources();
qpdf/qtest/qpdf.test
@@ -2120,6 +2120,7 @@ $td-&gt;runtest(&quot;check output&quot;, @@ -2120,6 +2120,7 @@ $td-&gt;runtest(&quot;check output&quot;,
2120 $td->runtest("split with shared resources", 2120 $td->runtest("split with shared resources",
2121 {$td->COMMAND => 2121 {$td->COMMAND =>
2122 "qpdf --qdf --static-id" . 2122 "qpdf --qdf --static-id" .
  2123 + " --remove-unreferenced-resources=yes" .
2123 " shared-images.pdf --pages . 1,3" . 2124 " shared-images.pdf --pages . 1,3" .
2124 " ./shared-images.pdf 1,2 -- a.pdf"}, 2125 " ./shared-images.pdf 1,2 -- a.pdf"},
2125 {$td->STRING => "", $td->EXIT_STATUS => 0}); 2126 {$td->STRING => "", $td->EXIT_STATUS => 0});
@@ -2130,6 +2131,7 @@ $td-&gt;runtest(&quot;check output&quot;, @@ -2130,6 +2131,7 @@ $td-&gt;runtest(&quot;check output&quot;,
2130 $td->runtest("split with really shared resources", 2131 $td->runtest("split with really shared resources",
2131 {$td->COMMAND => 2132 {$td->COMMAND =>
2132 "qpdf --qdf --static-id" . 2133 "qpdf --qdf --static-id" .
  2134 + " --remove-unreferenced-resources=yes" .
2133 " shared-images.pdf --pages . 1,3" . 2135 " shared-images.pdf --pages . 1,3" .
2134 " . 1,2 -- a.pdf"}, 2136 " . 1,2 -- a.pdf"},
2135 {$td->STRING => "", $td->EXIT_STATUS => 0}); 2137 {$td->STRING => "", $td->EXIT_STATUS => 0});