Commit d492bb0a90e30c8c57f36434479ddb708d322e79

Authored by Jay Berkenbilt
1 parent babd12c9

Add --replace-input option (fixes #321)

manual/qpdf-manual.xml
@@ -331,10 +331,12 @@ make @@ -331,10 +331,12 @@ make
331 <option>outfilename</option> does not have to be seekable, even 331 <option>outfilename</option> does not have to be seekable, even
332 when generating linearized files. Specifying 332 when generating linearized files. Specifying
333 &ldquo;<option>-</option>&rdquo; as <option>outfilename</option> 333 &ldquo;<option>-</option>&rdquo; as <option>outfilename</option>
334 - means to write to standard output. However, you can't specify the  
335 - same file as both the input and the output because qpdf reads data  
336 - from the input file as it writes to the output file. QPDF attempts  
337 - to detect this case and fail without overwriting the output file. 334 + means to write to standard output. If you want to overwrite the
  335 + input file with the output, use the option
  336 + <option>--replace-input</option> and omit the output file name.
  337 + You can't specify the same file as both the input and the output.
  338 + If you do this, qpdf will tell you about the
  339 + <option>--replace-input</option> option.
338 </para> 340 </para>
339 <para> 341 <para>
340 Most options require an output file, but some testing or 342 Most options require an output file, but some testing or
@@ -450,6 +452,21 @@ make @@ -450,6 +452,21 @@ make
450 </listitem> 452 </listitem>
451 </varlistentry> 453 </varlistentry>
452 <varlistentry> 454 <varlistentry>
  455 + <term><option>--replace-input</option></term>
  456 + <listitem>
  457 + <para>
  458 + If specified, the output file name should be omitted. This
  459 + option tells qpdf to replace the input file with the output.
  460 + It does this by writing to
  461 + <filename>.~qpdf-temp.<replaceable>infilename</replaceable>#</filename>
  462 + and, when done, overwriting the input file with the temporary
  463 + file. If there were any warnings, the original input is saved
  464 + as
  465 + <filename><replaceable>infilename</replaceable>.~qpdf-orig</filename>.
  466 + </para>
  467 + </listitem>
  468 + </varlistentry>
  469 + <varlistentry>
453 <term><option>--copy-encryption=file</option></term> 470 <term><option>--copy-encryption=file</option></term>
454 <listitem> 471 <listitem>
455 <para> 472 <para>
@@ -4421,6 +4438,15 @@ print &quot;\n&quot;; @@ -4421,6 +4438,15 @@ print &quot;\n&quot;;
4421 <itemizedlist> 4438 <itemizedlist>
4422 <listitem> 4439 <listitem>
4423 <para> 4440 <para>
  4441 + The <option>--replace-input</option> option may be given in
  4442 + place of an output file name. This causes qpdf to overwrite
  4443 + the input file with the output. See the description of
  4444 + <option>--replace-input</option> in <xref
  4445 + linkend="ref.basic-options"/> for more details.
  4446 + </para>
  4447 + </listitem>
  4448 + <listitem>
  4449 + <para>
4424 The <option>--recompress-flate</option> instructs 4450 The <option>--recompress-flate</option> instructs
4425 <command>qpdf</command> to recompress streams that are 4451 <command>qpdf</command> to recompress streams that are
4426 already compressed with <literal>/FlateDecode</literal>. 4452 already compressed with <literal>/FlateDecode</literal>.
qpdf/qpdf.cc
@@ -23,6 +23,7 @@ @@ -23,6 +23,7 @@
23 #include <qpdf/QPDFOutlineDocumentHelper.hh> 23 #include <qpdf/QPDFOutlineDocumentHelper.hh>
24 #include <qpdf/QPDFAcroFormDocumentHelper.hh> 24 #include <qpdf/QPDFAcroFormDocumentHelper.hh>
25 #include <qpdf/QPDFExc.hh> 25 #include <qpdf/QPDFExc.hh>
  26 +#include <qpdf/QPDFSystemError.hh>
26 27
27 #include <qpdf/QPDFWriter.hh> 28 #include <qpdf/QPDFWriter.hh>
28 #include <qpdf/QIntC.hh> 29 #include <qpdf/QIntC.hh>
@@ -180,6 +181,7 @@ struct Options @@ -180,6 +181,7 @@ struct Options
180 overlay("overlay"), 181 overlay("overlay"),
181 under_overlay(0), 182 under_overlay(0),
182 require_outfile(true), 183 require_outfile(true),
  184 + replace_input(false),
183 infilename(0), 185 infilename(0),
184 outfilename(0) 186 outfilename(0)
185 { 187 {
@@ -283,6 +285,7 @@ struct Options @@ -283,6 +285,7 @@ struct Options
283 std::vector<PageSpec> page_specs; 285 std::vector<PageSpec> page_specs;
284 std::map<std::string, RotationSpec> rotations; 286 std::map<std::string, RotationSpec> rotations;
285 bool require_outfile; 287 bool require_outfile;
  288 + bool replace_input;
286 char const* infilename; 289 char const* infilename;
287 char const* outfilename; 290 char const* outfilename;
288 }; 291 };
@@ -712,6 +715,7 @@ class ArgParser @@ -712,6 +715,7 @@ class ArgParser
712 void argUOrepeat(char* parameter); 715 void argUOrepeat(char* parameter);
713 void argUOpassword(char* parameter); 716 void argUOpassword(char* parameter);
714 void argEndUnderOverlay(); 717 void argEndUnderOverlay();
  718 + void argReplaceInput();
715 719
716 void usage(std::string const& message); 720 void usage(std::string const& message);
717 void checkCompletion(); 721 void checkCompletion();
@@ -940,6 +944,7 @@ ArgParser::initOptionTable() @@ -940,6 +944,7 @@ ArgParser::initOptionTable()
940 &ArgParser::argIiMinBytes, "minimum-bytes"); 944 &ArgParser::argIiMinBytes, "minimum-bytes");
941 (*t)["overlay"] = oe_bare(&ArgParser::argOverlay); 945 (*t)["overlay"] = oe_bare(&ArgParser::argOverlay);
942 (*t)["underlay"] = oe_bare(&ArgParser::argUnderlay); 946 (*t)["underlay"] = oe_bare(&ArgParser::argUnderlay);
  947 + (*t)["replace-input"] = oe_bare(&ArgParser::argReplaceInput);
943 948
944 t = &this->encrypt40_option_table; 949 t = &this->encrypt40_option_table;
945 (*t)["--"] = oe_bare(&ArgParser::argEndEncrypt); 950 (*t)["--"] = oe_bare(&ArgParser::argEndEncrypt);
@@ -1080,6 +1085,9 @@ ArgParser::argHelp() @@ -1080,6 +1085,9 @@ ArgParser::argHelp()
1080 << "will be interpreted as an argument. No interpolation is done. Line\n" 1085 << "will be interpreted as an argument. No interpolation is done. Line\n"
1081 << "terminators are stripped. @- can be specified to read from standard input.\n" 1086 << "terminators are stripped. @- can be specified to read from standard input.\n"
1082 << "\n" 1087 << "\n"
  1088 + << "The output file can be - to indicate writing to standard output, or it can\n"
  1089 + << "be --replace-input to cause qpdf to replace the input file with the output.\n"
  1090 + << "\n"
1083 << "Note that when contradictory options are provided, whichever options are\n" 1091 << "Note that when contradictory options are provided, whichever options are\n"
1084 << "provided last take precedence.\n" 1092 << "provided last take precedence.\n"
1085 << "\n" 1093 << "\n"
@@ -1097,6 +1105,8 @@ ArgParser::argHelp() @@ -1097,6 +1105,8 @@ ArgParser::argHelp()
1097 << "--progress give progress indicators while writing output\n" 1105 << "--progress give progress indicators while writing output\n"
1098 << "--no-warn suppress warnings\n" 1106 << "--no-warn suppress warnings\n"
1099 << "--linearize generated a linearized (web optimized) file\n" 1107 << "--linearize generated a linearized (web optimized) file\n"
  1108 + << "--replace-input use in place of specifying an output file; qpdf will\n"
  1109 + << " replace the input file with the output\n"
1100 << "--copy-encryption=file copy encryption parameters from specified file\n" 1110 << "--copy-encryption=file copy encryption parameters from specified file\n"
1101 << "--encryption-file-password=password\n" 1111 << "--encryption-file-password=password\n"
1102 << " password used to open the file from which encryption\n" 1112 << " password used to open the file from which encryption\n"
@@ -2317,6 +2327,12 @@ ArgParser::argEndUnderOverlay() @@ -2317,6 +2327,12 @@ ArgParser::argEndUnderOverlay()
2317 } 2327 }
2318 2328
2319 void 2329 void
  2330 +ArgParser::argReplaceInput()
  2331 +{
  2332 + o.replace_input = true;
  2333 +}
  2334 +
  2335 +void
2320 ArgParser::handleArgFileArguments() 2336 ArgParser::handleArgFileArguments()
2321 { 2337 {
2322 // Support reading arguments from files. Create a new argv. Ensure 2338 // Support reading arguments from files. Create a new argv. Ensure
@@ -3048,15 +3064,28 @@ ArgParser::doFinalChecks() @@ -3048,15 +3064,28 @@ ArgParser::doFinalChecks()
3048 { 3064 {
3049 usage("missing -- at end of options"); 3065 usage("missing -- at end of options");
3050 } 3066 }
  3067 + if (o.replace_input)
  3068 + {
  3069 + if (o.outfilename)
  3070 + {
  3071 + usage("--replace-input may not be used when"
  3072 + " an output file is specified");
  3073 + }
  3074 + else if (o.split_pages)
  3075 + {
  3076 + usage("--split-pages may not be used with --replace-input");
  3077 + }
  3078 + }
3051 if (o.infilename == 0) 3079 if (o.infilename == 0)
3052 { 3080 {
3053 usage("an input file name is required"); 3081 usage("an input file name is required");
3054 } 3082 }
3055 - else if (o.require_outfile && (o.outfilename == 0)) 3083 + else if (o.require_outfile && (o.outfilename == 0) && (! o.replace_input))
3056 { 3084 {
3057 usage("an output file name is required; use - for standard output"); 3085 usage("an output file name is required; use - for standard output");
3058 } 3086 }
3059 - else if ((! o.require_outfile) && (o.outfilename != 0)) 3087 + else if ((! o.require_outfile) &&
  3088 + ((o.outfilename != 0) || o.replace_input))
3060 { 3089 {
3061 usage("no output file may be given for this option"); 3090 usage("no output file may be given for this option");
3062 } 3091 }
@@ -3065,7 +3094,8 @@ ArgParser::doFinalChecks() @@ -3065,7 +3094,8 @@ ArgParser::doFinalChecks()
3065 o.externalize_inline_images = true; 3094 o.externalize_inline_images = true;
3066 } 3095 }
3067 3096
3068 - if (o.require_outfile && (strcmp(o.outfilename, "-") == 0)) 3097 + if (o.require_outfile && o.outfilename &&
  3098 + (strcmp(o.outfilename, "-") == 0))
3069 { 3099 {
3070 if (o.split_pages) 3100 if (o.split_pages)
3071 { 3101 {
@@ -3088,7 +3118,7 @@ ArgParser::doFinalChecks() @@ -3088,7 +3118,7 @@ ArgParser::doFinalChecks()
3088 { 3118 {
3089 QTC::TC("qpdf", "qpdf same file error"); 3119 QTC::TC("qpdf", "qpdf same file error");
3090 usage("input file and output file are the same;" 3120 usage("input file and output file are the same;"
3091 - " this would cause input file to be lost"); 3121 + " use --replace-input to intentionally overwrite the input file");
3092 } 3122 }
3093 } 3123 }
3094 3124
@@ -3861,6 +3891,12 @@ static void do_inspection(QPDF&amp; pdf, Options&amp; o) @@ -3861,6 +3891,12 @@ static void do_inspection(QPDF&amp; pdf, Options&amp; o)
3861 { 3891 {
3862 do_show_pages(pdf, o); 3892 do_show_pages(pdf, o);
3863 } 3893 }
  3894 + if ((! pdf.getWarnings().empty()) && (exit_code != EXIT_ERROR))
  3895 + {
  3896 + std::cerr << whoami
  3897 + << ": operation succeeded with warnings" << std::endl;
  3898 + exit_code = EXIT_WARNING;
  3899 + }
3864 if (exit_code) 3900 if (exit_code)
3865 { 3901 {
3866 exit(exit_code); 3902 exit(exit_code);
@@ -5109,18 +5145,80 @@ static void do_split_pages(QPDF&amp; pdf, Options&amp; o) @@ -5109,18 +5145,80 @@ static void do_split_pages(QPDF&amp; pdf, Options&amp; o)
5109 5145
5110 static void write_outfile(QPDF& pdf, Options& o) 5146 static void write_outfile(QPDF& pdf, Options& o)
5111 { 5147 {
5112 - if (strcmp(o.outfilename, "-") == 0) 5148 + std::string temp_out;
  5149 + if (o.replace_input)
  5150 + {
  5151 + // Use a file name that is hidden by default in the OS to
  5152 + // avoid having it become momentarily visible in a
  5153 + // graphical file manager or in case it gets left behind
  5154 + // because of some kind of error.
  5155 + temp_out = ".~qpdf-temp." + std::string(o.infilename) + "#";
  5156 + // o.outfilename will be restored to 0 before temp_out
  5157 + // goes out of scope.
  5158 + o.outfilename = temp_out.c_str();
  5159 + }
  5160 + else if (strcmp(o.outfilename, "-") == 0)
5113 { 5161 {
5114 o.outfilename = 0; 5162 o.outfilename = 0;
5115 } 5163 }
5116 - QPDFWriter w(pdf, o.outfilename);  
5117 - set_writer_options(pdf, o, w);  
5118 - w.write();  
5119 - if (o.verbose) 5164 + {
  5165 + // Private scope so QPDFWriter will close the output file
  5166 + QPDFWriter w(pdf, o.outfilename);
  5167 + set_writer_options(pdf, o, w);
  5168 + w.write();
  5169 + }
  5170 + if (o.verbose && o.outfilename)
5120 { 5171 {
5121 std::cout << whoami << ": wrote file " 5172 std::cout << whoami << ": wrote file "
5122 << o.outfilename << std::endl; 5173 << o.outfilename << std::endl;
5123 } 5174 }
  5175 + if (o.replace_input)
  5176 + {
  5177 + o.outfilename = 0;
  5178 + }
  5179 + if (o.replace_input)
  5180 + {
  5181 + // We must close the input before we can rename files
  5182 + pdf.closeInputSource();
  5183 + std::string backup;
  5184 + bool warnings = pdf.anyWarnings();
  5185 + if (warnings)
  5186 + {
  5187 + // If there are warnings, the user may care about this
  5188 + // file, so give it a non-hidden name that will be
  5189 + // lexically grouped with the original file.
  5190 + backup = std::string(o.infilename) + ".~qpdf-orig";
  5191 + }
  5192 + else
  5193 + {
  5194 + backup = ".~qpdf-orig." + std::string(o.infilename) + "#";
  5195 + }
  5196 + QUtil::rename_file(o.infilename, backup.c_str());
  5197 + QUtil::rename_file(temp_out.c_str(), o.infilename);
  5198 + if (warnings)
  5199 + {
  5200 + std::cerr << whoami
  5201 + << ": there are warnings; original file kept in "
  5202 + << backup << std::endl;
  5203 + }
  5204 + else
  5205 + {
  5206 + try
  5207 + {
  5208 + QUtil::remove_file(backup.c_str());
  5209 + }
  5210 + catch (QPDFSystemError& e)
  5211 + {
  5212 + std::cerr
  5213 + << whoami
  5214 + << ": unable to delete original file ("
  5215 + << e.what() << ");"
  5216 + << " original file left in " << backup
  5217 + << ", but the input was successfully replaced"
  5218 + << std::endl;
  5219 + }
  5220 + }
  5221 + }
5124 } 5222 }
5125 5223
5126 int realmain(int argc, char* argv[]) 5224 int realmain(int argc, char* argv[])
@@ -5156,7 +5254,7 @@ int realmain(int argc, char* argv[]) @@ -5156,7 +5254,7 @@ int realmain(int argc, char* argv[])
5156 handle_under_overlay(pdf, o); 5254 handle_under_overlay(pdf, o);
5157 handle_transformations(pdf, o); 5255 handle_transformations(pdf, o);
5158 5256
5159 - if (o.outfilename == 0) 5257 + if ((o.outfilename == 0) && (! o.replace_input))
5160 { 5258 {
5161 do_inspection(pdf, o); 5259 do_inspection(pdf, o);
5162 } 5260 }
qpdf/qtest/qpdf.test
@@ -191,6 +191,47 @@ foreach my $d ([&#39;auto-ü&#39;, 1], [&#39;auto-öπ&#39;, 2]) @@ -191,6 +191,47 @@ foreach my $d ([&#39;auto-ü&#39;, 1], [&#39;auto-öπ&#39;, 2])
191 191
192 show_ntests(); 192 show_ntests();
193 # ---------- 193 # ----------
  194 +$td->notify("--- Replace Input ---");
  195 +$n_tests += 8;
  196 +
  197 +# Use Unicode file names to test replace input so we can be sure it
  198 +# works for that case.
  199 +$td->runtest("create unicode filenames",
  200 + {$td->COMMAND => "test_unicode_filenames"},
  201 + {$td->STRING => "created Unicode filenames\n",
  202 + $td->EXIT_STATUS => 0},
  203 + $td->NORMALIZE_NEWLINES);
  204 +
  205 +foreach my $d (['auto-ü', 1], ['auto-öπ', 2])
  206 +{
  207 + my ($u, $n) = @$d;
  208 + $td->runtest("replace input $u",
  209 + {$td->COMMAND => "qpdf --deterministic-id" .
  210 + " --object-streams=generate --replace-input $u.pdf"},
  211 + {$td->STRING => "", $td->EXIT_STATUS => 0},
  212 + $td->NORMALIZE_NEWLINES);
  213 + $td->runtest("check output ($u)",
  214 + {$td->FILE => "$u.pdf"},
  215 + {$td->FILE => "replace-input.pdf"},
  216 + $td->NORMALIZE_NEWLINES);
  217 +}
  218 +
  219 +system("cp xref-with-short-size.pdf auto-warn.pdf") == 0 or die;
  220 +$td->runtest("replace input with warnings",
  221 + {$td->COMMAND =>
  222 + "qpdf --deterministic-id --replace-input auto-warn.pdf"},
  223 + {$td->FILE => "replace-warn.out", $td->EXIT_STATUS => 3},
  224 + $td->NORMALIZE_NEWLINES);
  225 +
  226 +$td->runtest("check output",
  227 + {$td->FILE => "auto-warn.pdf"},
  228 + {$td->FILE => "warn-replace.pdf"});
  229 +$td->runtest("check orig output",
  230 + {$td->FILE => "auto-warn.pdf.~qpdf-orig"},
  231 + {$td->FILE => "xref-with-short-size.pdf"});
  232 +
  233 +show_ntests();
  234 +# ----------
194 $td->notify("--- Final Version ---"); 235 $td->notify("--- Final Version ---");
195 $n_tests += 1; 236 $n_tests += 1;
196 237
@@ -4233,5 +4274,5 @@ sub get_md5_checksum @@ -4233,5 +4274,5 @@ sub get_md5_checksum
4233 sub cleanup 4274 sub cleanup
4234 { 4275 {
4235 system("rm -rf *.ps *.pnm ?.pdf ?.qdf *.enc* tif1 tif2 tiff-cache"); 4276 system("rm -rf *.ps *.pnm ?.pdf ?.qdf *.enc* tif1 tif2 tiff-cache");
4236 - system("rm -rf *split-out* ???-kfo.pdf *.tmpout \@file.pdf auto-*.pdf"); 4277 + system("rm -rf *split-out* ???-kfo.pdf *.tmpout \@file.pdf auto-*");
4237 } 4278 }
qpdf/qtest/qpdf/bad-jpeg-show.out
1 WARNING: bad-jpeg.pdf (offset 735): error decoding stream data for object 6 0: Not a JPEG file: starts with 0x77 0x77 1 WARNING: bad-jpeg.pdf (offset 735): error decoding stream data for object 6 0: Not a JPEG file: starts with 0x77 0x77
2 -qpdf: operation succeeded with warnings; resulting file may have some problems 2 +qpdf: operation succeeded with warnings
qpdf/qtest/qpdf/empty-object.out
1 WARNING: empty-object.pdf (object 7 0, offset 575): empty object treated as null 1 WARNING: empty-object.pdf (object 7 0, offset 575): empty object treated as null
2 null 2 null
3 -qpdf: operation succeeded with warnings; resulting file may have some problems 3 +qpdf: operation succeeded with warnings
qpdf/qtest/qpdf/replace-input.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/replace-warn.out 0 → 100644
  1 +WARNING: auto-warn.pdf (xref stream, offset 16227): Cross-reference stream data has the wrong size; expected = 52; actual = 56
  2 +qpdf: there are warnings; original file kept in auto-warn.pdf.~qpdf-orig
  3 +qpdf: operation succeeded with warnings; resulting file may have some problems
qpdf/qtest/qpdf/warn-replace.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/xref-with-short-size.out
@@ -11,4 +11,4 @@ WARNING: xref-with-short-size.pdf (xref stream, offset 16227): Cross-reference s @@ -11,4 +11,4 @@ WARNING: xref-with-short-size.pdf (xref stream, offset 16227): Cross-reference s
11 10/0: compressed; stream = 5, index = 3 11 10/0: compressed; stream = 5, index = 3
12 11/0: compressed; stream = 5, index = 7 12 11/0: compressed; stream = 5, index = 7
13 12/0: compressed; stream = 5, index = 8 13 12/0: compressed; stream = 5, index = 8
14 -qpdf: operation succeeded with warnings; resulting file may have some problems 14 +qpdf: operation succeeded with warnings