Commit 1e766dcda26886137cb70fa02de631a1cbb956a0
1 parent
81904623
Add --remove-unreferenced-resources option
Showing
3 changed files
with
57 additions
and
6 deletions
ChangeLog
| 1 | +2020-04-04 Jay Berkenbilt <ejb@ql.org> | |
| 2 | + | |
| 3 | + * Add new option --remove-unreferenced-resources that takes auto, | |
| 4 | + yes, or no as options. This tells qpdf whether to attempt to | |
| 5 | + remove unreferenced resources from pages when doing page splitting | |
| 6 | + operations. Prior to this change, the default was to attempt to | |
| 7 | + remove unreferenced resources, but this operation was very slow, | |
| 8 | + especially for large and complex files. The new default is "auto", | |
| 9 | + which tells qpdf to analyze the file for shared resources. This is | |
| 10 | + a relatively quick test. If no shared resources are found, then we | |
| 11 | + don't attempt to remove unreferenced resources, because | |
| 12 | + unreferenced resources never occur in files without shared | |
| 13 | + resources. To force qpdf to look for and remove unreferenced | |
| 14 | + resources, use --remove-unreferenced-resources=yes. The option | |
| 15 | + --preserve-unreferenced-resources is now a synonym for | |
| 16 | + --remove-unreferenced-resources=no. | |
| 17 | + | |
| 1 | 18 | 2020-04-03 Jay Berkenbilt <ejb@ql.org> |
| 2 | 19 | |
| 3 | 20 | * Allow qpdf to be built on systems without wchar_t. All "normal" | ... | ... |
qpdf/qpdf.cc
| ... | ... | @@ -94,6 +94,8 @@ struct UnderOverlay |
| 94 | 94 | std::vector<int> repeat_pagenos; |
| 95 | 95 | }; |
| 96 | 96 | |
| 97 | +enum remove_unref_e { re_auto, re_yes, re_no }; | |
| 98 | + | |
| 97 | 99 | struct Options |
| 98 | 100 | { |
| 99 | 101 | Options() : |
| ... | ... | @@ -144,7 +146,7 @@ struct Options |
| 144 | 146 | ignore_xref_streams(false), |
| 145 | 147 | qdf_mode(false), |
| 146 | 148 | preserve_unreferenced_objects(false), |
| 147 | - preserve_unreferenced_page_resources(false), | |
| 149 | + remove_unreferenced_page_resources(re_auto), | |
| 148 | 150 | keep_files_open(true), |
| 149 | 151 | keep_files_open_set(false), |
| 150 | 152 | keep_files_open_threshold(200), // default known in help and docs |
| ... | ... | @@ -243,7 +245,7 @@ struct Options |
| 243 | 245 | bool ignore_xref_streams; |
| 244 | 246 | bool qdf_mode; |
| 245 | 247 | bool preserve_unreferenced_objects; |
| 246 | - bool preserve_unreferenced_page_resources; | |
| 248 | + remove_unref_e remove_unreferenced_page_resources; | |
| 247 | 249 | bool keep_files_open; |
| 248 | 250 | bool keep_files_open_set; |
| 249 | 251 | size_t keep_files_open_threshold; |
| ... | ... | @@ -739,6 +741,7 @@ class ArgParser |
| 739 | 741 | void argQdf(); |
| 740 | 742 | void argPreserveUnreferenced(); |
| 741 | 743 | void argPreserveUnreferencedResources(); |
| 744 | + void argRemoveUnreferencedResources(char* parameter); | |
| 742 | 745 | void argKeepFilesOpen(char* parameter); |
| 743 | 746 | void argKeepFilesOpenThreshold(char* parameter); |
| 744 | 747 | void argNewlineBeforeEndstream(); |
| ... | ... | @@ -970,6 +973,10 @@ ArgParser::initOptionTable() |
| 970 | 973 | &ArgParser::argPreserveUnreferenced); |
| 971 | 974 | (*t)["preserve-unreferenced-resources"] = oe_bare( |
| 972 | 975 | &ArgParser::argPreserveUnreferencedResources); |
| 976 | + char const* remove_unref_choices[] = { | |
| 977 | + "auto", "yes", "no", 0}; | |
| 978 | + (*t)["remove-unreferenced-resources"] = oe_requiredChoices( | |
| 979 | + &ArgParser::argRemoveUnreferencedResources, remove_unref_choices); | |
| 973 | 980 | (*t)["keep-files-open"] = oe_requiredChoices( |
| 974 | 981 | &ArgParser::argKeepFilesOpen, yn); |
| 975 | 982 | (*t)["keep-files-open-threshold"] = oe_requiredParameter( |
| ... | ... | @@ -1459,7 +1466,9 @@ ArgParser::argHelp() |
| 1459 | 1466 | << "--object-streams=mode controls handing of object streams\n" |
| 1460 | 1467 | << "--preserve-unreferenced preserve unreferenced objects\n" |
| 1461 | 1468 | << "--preserve-unreferenced-resources\n" |
| 1462 | - << " preserve unreferenced page resources\n" | |
| 1469 | + << " synonym for --remove-unreferenced-resources=no\n" | |
| 1470 | + << "--remove-unreferenced-resources={auto,yes,no}\n" | |
| 1471 | + << " whether to remove unreferenced page resources\n" | |
| 1463 | 1472 | << "--newline-before-endstream always put a newline before endstream\n" |
| 1464 | 1473 | << "--coalesce-contents force all pages' content to be a single stream\n" |
| 1465 | 1474 | << "--flatten-annotations=option\n" |
| ... | ... | @@ -1973,7 +1982,30 @@ ArgParser::argPreserveUnreferenced() |
| 1973 | 1982 | void |
| 1974 | 1983 | ArgParser::argPreserveUnreferencedResources() |
| 1975 | 1984 | { |
| 1976 | - o.preserve_unreferenced_page_resources = true; | |
| 1985 | + o.remove_unreferenced_page_resources = re_no; | |
| 1986 | +} | |
| 1987 | + | |
| 1988 | +void | |
| 1989 | +ArgParser::argRemoveUnreferencedResources(char* parameter) | |
| 1990 | +{ | |
| 1991 | + if (strcmp(parameter, "auto") == 0) | |
| 1992 | + { | |
| 1993 | + o.remove_unreferenced_page_resources = re_auto; | |
| 1994 | + } | |
| 1995 | + else if (strcmp(parameter, "yes") == 0) | |
| 1996 | + { | |
| 1997 | + o.remove_unreferenced_page_resources = re_yes; | |
| 1998 | + } | |
| 1999 | + else if (strcmp(parameter, "no") == 0) | |
| 2000 | + { | |
| 2001 | + o.remove_unreferenced_page_resources = re_no; | |
| 2002 | + } | |
| 2003 | + else | |
| 2004 | + { | |
| 2005 | + // If this happens, it means remove_unref_choices in | |
| 2006 | + // ArgParser::initOptionTable is wrong. | |
| 2007 | + usage("invalid value for --remove-unreferenced-page-resources"); | |
| 2008 | + } | |
| 1977 | 2009 | } |
| 1978 | 2010 | |
| 1979 | 2011 | void |
| ... | ... | @@ -4838,7 +4870,7 @@ static void handle_page_specs(QPDF& pdf, Options& o) |
| 4838 | 4870 | page_spec.range)); |
| 4839 | 4871 | } |
| 4840 | 4872 | |
| 4841 | - if (! o.preserve_unreferenced_page_resources) | |
| 4873 | + if (o.remove_unreferenced_page_resources != re_no) | |
| 4842 | 4874 | { |
| 4843 | 4875 | for (std::map<std::string, QPDF*>::iterator iter = |
| 4844 | 4876 | page_spec_qpdfs.begin(); |
| ... | ... | @@ -5336,7 +5368,7 @@ static void do_split_pages(QPDF& pdf, Options& o) |
| 5336 | 5368 | before = std::string(o.outfilename) + "-"; |
| 5337 | 5369 | } |
| 5338 | 5370 | |
| 5339 | - if (! o.preserve_unreferenced_page_resources) | |
| 5371 | + if (o.remove_unreferenced_page_resources != re_no) | |
| 5340 | 5372 | { |
| 5341 | 5373 | QPDFPageDocumentHelper dh(pdf); |
| 5342 | 5374 | dh.removeUnreferencedResources(); | ... | ... |
qpdf/qtest/qpdf.test
| ... | ... | @@ -2120,6 +2120,7 @@ $td->runtest("check output", |
| 2120 | 2120 | $td->runtest("split with shared resources", |
| 2121 | 2121 | {$td->COMMAND => |
| 2122 | 2122 | "qpdf --qdf --static-id" . |
| 2123 | + " --remove-unreferenced-resources=yes" . | |
| 2123 | 2124 | " shared-images.pdf --pages . 1,3" . |
| 2124 | 2125 | " ./shared-images.pdf 1,2 -- a.pdf"}, |
| 2125 | 2126 | {$td->STRING => "", $td->EXIT_STATUS => 0}); |
| ... | ... | @@ -2130,6 +2131,7 @@ $td->runtest("check output", |
| 2130 | 2131 | $td->runtest("split with really shared resources", |
| 2131 | 2132 | {$td->COMMAND => |
| 2132 | 2133 | "qpdf --qdf --static-id" . |
| 2134 | + " --remove-unreferenced-resources=yes" . | |
| 2133 | 2135 | " shared-images.pdf --pages . 1,3" . |
| 2134 | 2136 | " . 1,2 -- a.pdf"}, |
| 2135 | 2137 | {$td->STRING => "", $td->EXIT_STATUS => 0}); | ... | ... |