Commit ebd5ed63decb90e26ae9129164214f9d7d684621

Authored by Jay Berkenbilt
1 parent a0fd8875

Add option to save pass 1 of lineariziation

This is useful only for debugging the linearization code.
ChangeLog
1 1 2018-02-04 Jay Berkenbilt <ejb@ql.org>
2 2  
  3 + * Add QPDFWriter::setLinearizationPass1Filename method and
  4 + --linearize-pass1 command line option to allow specification of a
  5 + file into which QPDFWriter will write its intermediate
  6 + linearization pass 1 file. This is useful only for debugging qpdf.
  7 + qpdf creates linearized files by computing the output in two
  8 + passes. Ordinarily the first pass is discarded and not written
  9 + anywhere. This option allows it to be inspected.
  10 +
  11 +2018-02-04 Jay Berkenbilt <ejb@ql.org>
  12 +
3 13 * 7.1.1: release
4 14  
5 15 * Bug fix: properly linearize files whose /ID has a length of
... ...
... ... @@ -177,44 +177,6 @@ I find it useful to make reference to them in this list
177 177 * See if we can avoid preserving unreferenced objects in object
178 178 streams even when preserving the object streams.
179 179  
180   - * For debugging linearization bugs, consider adding an option to save
181   - pass 1 of linearization. This code is sufficient. Change the
182   - interface to allow specification of a pass1 file, which would
183   - change the behavior as in this patch.
184   -
185   -------------------------------
186   -Index: QPDFWriter.cc
187   -===================================================================
188   ---- QPDFWriter.cc (revision 932)
189   -+++ QPDFWriter.cc (working copy)
190   -@@ -1965,11 +1965,15 @@
191   -
192   - // Write file in two passes. Part numbers refer to PDF spec 1.4.
193   -
194   -+ FILE* XXX = 0;
195   - for (int pass = 1; pass <= 2; ++pass)
196   - {
197   - if (pass == 1)
198   - {
199   -- pushDiscardFilter();
200   -+// pushDiscardFilter();
201   -+ XXX = QUtil::safe_fopen("/tmp/pass1.pdf", "w");
202   -+ pushPipeline(new Pl_StdioFile("pass1", XXX));
203   -+ activatePipelineStack();
204   - }
205   -
206   - // Part 1: header
207   -@@ -2204,6 +2208,8 @@
208   -
209   - // Restore hint offset
210   - this->xref[hint_id] = QPDFXRefEntry(1, hint_offset, 0);
211   -+ fclose(XXX);
212   -+ XXX = 0;
213   - }
214   - }
215   - }
216   -------------------------------
217   -
218 180 * Provide APIs for embedded files. See *attachments*.pdf in test
219 181 suite. The private method findAttachmentStreams finds at least
220 182 cases for modern versions of Adobe Reader (>= 1.7, maybe earlier).
... ...
include/qpdf/QPDFWriter.hh
... ... @@ -367,6 +367,15 @@ class QPDFWriter
367 367 QPDF_DLL
368 368 void setLinearization(bool);
369 369  
  370 + // For debugging QPDF: provide the name of a file to write pass1
  371 + // of linearization to. The only reason to use this is to debug
  372 + // QPDF. To linearize, QPDF writes out the file in two passes.
  373 + // Usually the first pass is discarded, but lots of computations
  374 + // are made in pass 1. If a linearized file comes out wrong, it
  375 + // can be helpful to look at the first pass.
  376 + QPDF_DLL
  377 + void setLinearizationPass1Filename(std::string const&);
  378 +
370 379 // Create PCLm output. This is only useful for clients that know
371 380 // how to create PCLm files. If a file is structured exactly as
372 381 // PCLm requires, this call will tell QPDFWriter to write the PCLm
... ... @@ -571,6 +580,7 @@ class QPDFWriter
571 580 std::string deterministic_id_data;
572 581  
573 582 // For linearization only
  583 + std::string lin_pass1_filename;
574 584 std::map<int, int> obj_renumber_no_gen;
575 585 std::map<int, int> object_to_object_stream_no_gen;
576 586 };
... ...
libqpdf/QPDFWriter.cc
... ... @@ -357,6 +357,12 @@ QPDFWriter::setLinearization(bool val)
357 357 }
358 358  
359 359 void
  360 +QPDFWriter::setLinearizationPass1Filename(std::string const& filename)
  361 +{
  362 + this->m->lin_pass1_filename = filename;
  363 +}
  364 +
  365 +void
360 366 QPDFWriter::setPCLm(bool val)
361 367 {
362 368 this->m->pclm = val;
... ... @@ -2957,11 +2963,24 @@ QPDFWriter::writeLinearized()
2957 2963  
2958 2964 // Write file in two passes. Part numbers refer to PDF spec 1.4.
2959 2965  
  2966 + FILE* lin_pass1_file = 0;
2960 2967 for (int pass = 1; pass <= 2; ++pass)
2961 2968 {
2962 2969 if (pass == 1)
2963 2970 {
2964   - pushDiscardFilter();
  2971 + if (! this->m->lin_pass1_filename.empty())
  2972 + {
  2973 + lin_pass1_file =
  2974 + QUtil::safe_fopen(
  2975 + this->m->lin_pass1_filename.c_str(), "wb");
  2976 + pushPipeline(
  2977 + new Pl_StdioFile("linearization pass1", lin_pass1_file));
  2978 + activatePipelineStack();
  2979 + }
  2980 + else
  2981 + {
  2982 + pushDiscardFilter();
  2983 + }
2965 2984 if (this->m->deterministic_id)
2966 2985 {
2967 2986 pushMD5Pipeline();
... ... @@ -3201,6 +3220,20 @@ QPDFWriter::writeLinearized()
3201 3220  
3202 3221 // Restore hint offset
3203 3222 this->m->xref[hint_id] = QPDFXRefEntry(1, hint_offset, 0);
  3223 + if (lin_pass1_file)
  3224 + {
  3225 + // Write some debugging information
  3226 + fprintf(lin_pass1_file, "%% hint_offset=%s\n",
  3227 + QUtil::int_to_string(hint_offset).c_str());
  3228 + fprintf(lin_pass1_file, "%% hint_length=%s\n",
  3229 + QUtil::int_to_string(hint_length).c_str());
  3230 + fprintf(lin_pass1_file, "%% second_xref_offset=%s\n",
  3231 + QUtil::int_to_string(second_xref_offset).c_str());
  3232 + fprintf(lin_pass1_file, "%% second_xref_end=%s\n",
  3233 + QUtil::int_to_string(second_xref_end).c_str());
  3234 + fclose(lin_pass1_file);
  3235 + lin_pass1_file = 0;
  3236 + }
3204 3237 }
3205 3238 }
3206 3239 }
... ...
qpdf/qpdf.cc
... ... @@ -153,6 +153,7 @@ struct Options
153 153 bool qdf_mode;
154 154 bool preserve_unreferenced_objects;
155 155 bool newline_before_endstream;
  156 + std::string linearize_pass1;
156 157 std::string min_version;
157 158 std::string force_version;
158 159 bool show_npages;
... ... @@ -391,6 +392,8 @@ familiar with the PDF file format or who are PDF developers.\n\
391 392 --preserve-unreferenced preserve unreferenced objects\n\
392 393 --newline-before-endstream always put a newline before endstream\n\
393 394 --qdf turns on \"QDF mode\" (below)\n\
  395 +--linearize-pass1=file write intermediate pass of linearized file\n\
  396 + for debugging\n\
394 397 --min-version=version sets the minimum PDF version of the output file\n\
395 398 --force-version=version forces this to be the PDF version of the output file\n\
396 399 \n\
... ... @@ -1531,6 +1534,15 @@ static void parse_options(int argc, char* argv[], Options&amp; o)
1531 1534 {
1532 1535 o.newline_before_endstream = true;
1533 1536 }
  1537 + else if (strcmp(arg, "linearize-pass1") == 0)
  1538 + {
  1539 + if (parameter == 0)
  1540 + {
  1541 + usage("--linearize-pass1 be given as"
  1542 + "--linearize-pass1=filename");
  1543 + }
  1544 + o.linearize_pass1 = parameter;
  1545 + }
1534 1546 else if (strcmp(arg, "min-version") == 0)
1535 1547 {
1536 1548 if (parameter == 0)
... ... @@ -2214,6 +2226,10 @@ static void set_writer_options(QPDF&amp; pdf, Options&amp; o, QPDFWriter&amp; w)
2214 2226 {
2215 2227 w.setLinearization(true);
2216 2228 }
  2229 + if (! o.linearize_pass1.empty())
  2230 + {
  2231 + w.setLinearizationPass1Filename(o.linearize_pass1);
  2232 + }
2217 2233 if (o.object_stream_set)
2218 2234 {
2219 2235 w.setObjectStreamMode(o.object_stream_mode);
... ...
qpdf/qtest/qpdf.test
... ... @@ -240,7 +240,7 @@ foreach my $d (@bug_tests)
240 240 show_ntests();
241 241 # ----------
242 242 $td->notify("--- Miscellaneous Tests ---");
243   -$n_tests += 93;
  243 +$n_tests += 96;
244 244  
245 245 $td->runtest("qpdf version",
246 246 {$td->COMMAND => "qpdf --version"},
... ... @@ -252,6 +252,17 @@ $td-&gt;runtest(&quot;C API: qpdf version&quot;,
252 252 $td->EXIT_STATUS => 0},
253 253 $td->NORMALIZE_NEWLINES);
254 254  
  255 +$td->runtest("linearize pass 1 file",
  256 + {$td->COMMAND => "qpdf --linearize --static-id" .
  257 + " --linearize-pass1=b.pdf minimal.pdf a.pdf"},
  258 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  259 +$td->runtest("check output",
  260 + {$td->FILE => "a.pdf"},
  261 + {$td->FILE => "minimal-linearized.pdf"});
  262 +$td->runtest("check pass1 file",
  263 + {$td->FILE => "b.pdf"},
  264 + {$td->FILE => "minimal-linearize-pass1.pdf"});
  265 +
255 266 foreach (my $i = 1; $i <= 3; ++$i)
256 267 {
257 268 $td->runtest("misc tests",
... ...
qpdf/qtest/qpdf/minimal-linearize-pass1.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/minimal-linearized.pdf 0 → 100644
No preview for this file type