Commit 1e766dcda26886137cb70fa02de631a1cbb956a0

Authored by Jay Berkenbilt
1 parent 81904623

Add --remove-unreferenced-resources option

ChangeLog
  1 +2020-04-04 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Add new option --remove-unreferenced-resources that takes auto,
  4 + yes, or no as options. This tells qpdf whether to attempt to
  5 + remove unreferenced resources from pages when doing page splitting
  6 + operations. Prior to this change, the default was to attempt to
  7 + remove unreferenced resources, but this operation was very slow,
  8 + especially for large and complex files. The new default is "auto",
  9 + which tells qpdf to analyze the file for shared resources. This is
  10 + a relatively quick test. If no shared resources are found, then we
  11 + don't attempt to remove unreferenced resources, because
  12 + unreferenced resources never occur in files without shared
  13 + resources. To force qpdf to look for and remove unreferenced
  14 + resources, use --remove-unreferenced-resources=yes. The option
  15 + --preserve-unreferenced-resources is now a synonym for
  16 + --remove-unreferenced-resources=no.
  17 +
1 18 2020-04-03 Jay Berkenbilt <ejb@ql.org>
2 19  
3 20 * Allow qpdf to be built on systems without wchar_t. All "normal"
... ...
qpdf/qpdf.cc
... ... @@ -94,6 +94,8 @@ struct UnderOverlay
94 94 std::vector<int> repeat_pagenos;
95 95 };
96 96  
  97 +enum remove_unref_e { re_auto, re_yes, re_no };
  98 +
97 99 struct Options
98 100 {
99 101 Options() :
... ... @@ -144,7 +146,7 @@ struct Options
144 146 ignore_xref_streams(false),
145 147 qdf_mode(false),
146 148 preserve_unreferenced_objects(false),
147   - preserve_unreferenced_page_resources(false),
  149 + remove_unreferenced_page_resources(re_auto),
148 150 keep_files_open(true),
149 151 keep_files_open_set(false),
150 152 keep_files_open_threshold(200), // default known in help and docs
... ... @@ -243,7 +245,7 @@ struct Options
243 245 bool ignore_xref_streams;
244 246 bool qdf_mode;
245 247 bool preserve_unreferenced_objects;
246   - bool preserve_unreferenced_page_resources;
  248 + remove_unref_e remove_unreferenced_page_resources;
247 249 bool keep_files_open;
248 250 bool keep_files_open_set;
249 251 size_t keep_files_open_threshold;
... ... @@ -739,6 +741,7 @@ class ArgParser
739 741 void argQdf();
740 742 void argPreserveUnreferenced();
741 743 void argPreserveUnreferencedResources();
  744 + void argRemoveUnreferencedResources(char* parameter);
742 745 void argKeepFilesOpen(char* parameter);
743 746 void argKeepFilesOpenThreshold(char* parameter);
744 747 void argNewlineBeforeEndstream();
... ... @@ -970,6 +973,10 @@ ArgParser::initOptionTable()
970 973 &ArgParser::argPreserveUnreferenced);
971 974 (*t)["preserve-unreferenced-resources"] = oe_bare(
972 975 &ArgParser::argPreserveUnreferencedResources);
  976 + char const* remove_unref_choices[] = {
  977 + "auto", "yes", "no", 0};
  978 + (*t)["remove-unreferenced-resources"] = oe_requiredChoices(
  979 + &ArgParser::argRemoveUnreferencedResources, remove_unref_choices);
973 980 (*t)["keep-files-open"] = oe_requiredChoices(
974 981 &ArgParser::argKeepFilesOpen, yn);
975 982 (*t)["keep-files-open-threshold"] = oe_requiredParameter(
... ... @@ -1459,7 +1466,9 @@ ArgParser::argHelp()
1459 1466 << "--object-streams=mode controls handing of object streams\n"
1460 1467 << "--preserve-unreferenced preserve unreferenced objects\n"
1461 1468 << "--preserve-unreferenced-resources\n"
1462   - << " preserve unreferenced page resources\n"
  1469 + << " synonym for --remove-unreferenced-resources=no\n"
  1470 + << "--remove-unreferenced-resources={auto,yes,no}\n"
  1471 + << " whether to remove unreferenced page resources\n"
1463 1472 << "--newline-before-endstream always put a newline before endstream\n"
1464 1473 << "--coalesce-contents force all pages' content to be a single stream\n"
1465 1474 << "--flatten-annotations=option\n"
... ... @@ -1973,7 +1982,30 @@ ArgParser::argPreserveUnreferenced()
1973 1982 void
1974 1983 ArgParser::argPreserveUnreferencedResources()
1975 1984 {
1976   - o.preserve_unreferenced_page_resources = true;
  1985 + o.remove_unreferenced_page_resources = re_no;
  1986 +}
  1987 +
  1988 +void
  1989 +ArgParser::argRemoveUnreferencedResources(char* parameter)
  1990 +{
  1991 + if (strcmp(parameter, "auto") == 0)
  1992 + {
  1993 + o.remove_unreferenced_page_resources = re_auto;
  1994 + }
  1995 + else if (strcmp(parameter, "yes") == 0)
  1996 + {
  1997 + o.remove_unreferenced_page_resources = re_yes;
  1998 + }
  1999 + else if (strcmp(parameter, "no") == 0)
  2000 + {
  2001 + o.remove_unreferenced_page_resources = re_no;
  2002 + }
  2003 + else
  2004 + {
  2005 + // If this happens, it means remove_unref_choices in
  2006 + // ArgParser::initOptionTable is wrong.
  2007 + usage("invalid value for --remove-unreferenced-page-resources");
  2008 + }
1977 2009 }
1978 2010  
1979 2011 void
... ... @@ -4838,7 +4870,7 @@ static void handle_page_specs(QPDF&amp; pdf, Options&amp; o)
4838 4870 page_spec.range));
4839 4871 }
4840 4872  
4841   - if (! o.preserve_unreferenced_page_resources)
  4873 + if (o.remove_unreferenced_page_resources != re_no)
4842 4874 {
4843 4875 for (std::map<std::string, QPDF*>::iterator iter =
4844 4876 page_spec_qpdfs.begin();
... ... @@ -5336,7 +5368,7 @@ static void do_split_pages(QPDF&amp; pdf, Options&amp; o)
5336 5368 before = std::string(o.outfilename) + "-";
5337 5369 }
5338 5370  
5339   - if (! o.preserve_unreferenced_page_resources)
  5371 + if (o.remove_unreferenced_page_resources != re_no)
5340 5372 {
5341 5373 QPDFPageDocumentHelper dh(pdf);
5342 5374 dh.removeUnreferencedResources();
... ...
qpdf/qtest/qpdf.test
... ... @@ -2120,6 +2120,7 @@ $td-&gt;runtest(&quot;check output&quot;,
2120 2120 $td->runtest("split with shared resources",
2121 2121 {$td->COMMAND =>
2122 2122 "qpdf --qdf --static-id" .
  2123 + " --remove-unreferenced-resources=yes" .
2123 2124 " shared-images.pdf --pages . 1,3" .
2124 2125 " ./shared-images.pdf 1,2 -- a.pdf"},
2125 2126 {$td->STRING => "", $td->EXIT_STATUS => 0});
... ... @@ -2130,6 +2131,7 @@ $td-&gt;runtest(&quot;check output&quot;,
2130 2131 $td->runtest("split with really shared resources",
2131 2132 {$td->COMMAND =>
2132 2133 "qpdf --qdf --static-id" .
  2134 + " --remove-unreferenced-resources=yes" .
2133 2135 " shared-images.pdf --pages . 1,3" .
2134 2136 " . 1,2 -- a.pdf"},
2135 2137 {$td->STRING => "", $td->EXIT_STATUS => 0});
... ...