Commit ebd5ed63decb90e26ae9129164214f9d7d684621

Authored by Jay Berkenbilt
1 parent a0fd8875

Add option to save pass 1 of lineariziation

This is useful only for debugging the linearization code.
ChangeLog
1 2018-02-04 Jay Berkenbilt <ejb@ql.org> 1 2018-02-04 Jay Berkenbilt <ejb@ql.org>
2 2
  3 + * Add QPDFWriter::setLinearizationPass1Filename method and
  4 + --linearize-pass1 command line option to allow specification of a
  5 + file into which QPDFWriter will write its intermediate
  6 + linearization pass 1 file. This is useful only for debugging qpdf.
  7 + qpdf creates linearized files by computing the output in two
  8 + passes. Ordinarily the first pass is discarded and not written
  9 + anywhere. This option allows it to be inspected.
  10 +
  11 +2018-02-04 Jay Berkenbilt <ejb@ql.org>
  12 +
3 * 7.1.1: release 13 * 7.1.1: release
4 14
5 * Bug fix: properly linearize files whose /ID has a length of 15 * Bug fix: properly linearize files whose /ID has a length of
@@ -177,44 +177,6 @@ I find it useful to make reference to them in this list @@ -177,44 +177,6 @@ I find it useful to make reference to them in this list
177 * See if we can avoid preserving unreferenced objects in object 177 * See if we can avoid preserving unreferenced objects in object
178 streams even when preserving the object streams. 178 streams even when preserving the object streams.
179 179
180 - * For debugging linearization bugs, consider adding an option to save  
181 - pass 1 of linearization. This code is sufficient. Change the  
182 - interface to allow specification of a pass1 file, which would  
183 - change the behavior as in this patch.  
184 -  
185 -------------------------------  
186 -Index: QPDFWriter.cc  
187 -===================================================================  
188 ---- QPDFWriter.cc (revision 932)  
189 -+++ QPDFWriter.cc (working copy)  
190 -@@ -1965,11 +1965,15 @@  
191 -  
192 - // Write file in two passes. Part numbers refer to PDF spec 1.4.  
193 -  
194 -+ FILE* XXX = 0;  
195 - for (int pass = 1; pass <= 2; ++pass)  
196 - {  
197 - if (pass == 1)  
198 - {  
199 -- pushDiscardFilter();  
200 -+// pushDiscardFilter();  
201 -+ XXX = QUtil::safe_fopen("/tmp/pass1.pdf", "w");  
202 -+ pushPipeline(new Pl_StdioFile("pass1", XXX));  
203 -+ activatePipelineStack();  
204 - }  
205 -  
206 - // Part 1: header  
207 -@@ -2204,6 +2208,8 @@  
208 -  
209 - // Restore hint offset  
210 - this->xref[hint_id] = QPDFXRefEntry(1, hint_offset, 0);  
211 -+ fclose(XXX);  
212 -+ XXX = 0;  
213 - }  
214 - }  
215 - }  
216 -------------------------------  
217 -  
218 * Provide APIs for embedded files. See *attachments*.pdf in test 180 * Provide APIs for embedded files. See *attachments*.pdf in test
219 suite. The private method findAttachmentStreams finds at least 181 suite. The private method findAttachmentStreams finds at least
220 cases for modern versions of Adobe Reader (>= 1.7, maybe earlier). 182 cases for modern versions of Adobe Reader (>= 1.7, maybe earlier).
include/qpdf/QPDFWriter.hh
@@ -367,6 +367,15 @@ class QPDFWriter @@ -367,6 +367,15 @@ class QPDFWriter
367 QPDF_DLL 367 QPDF_DLL
368 void setLinearization(bool); 368 void setLinearization(bool);
369 369
  370 + // For debugging QPDF: provide the name of a file to write pass1
  371 + // of linearization to. The only reason to use this is to debug
  372 + // QPDF. To linearize, QPDF writes out the file in two passes.
  373 + // Usually the first pass is discarded, but lots of computations
  374 + // are made in pass 1. If a linearized file comes out wrong, it
  375 + // can be helpful to look at the first pass.
  376 + QPDF_DLL
  377 + void setLinearizationPass1Filename(std::string const&);
  378 +
370 // Create PCLm output. This is only useful for clients that know 379 // Create PCLm output. This is only useful for clients that know
371 // how to create PCLm files. If a file is structured exactly as 380 // how to create PCLm files. If a file is structured exactly as
372 // PCLm requires, this call will tell QPDFWriter to write the PCLm 381 // PCLm requires, this call will tell QPDFWriter to write the PCLm
@@ -571,6 +580,7 @@ class QPDFWriter @@ -571,6 +580,7 @@ class QPDFWriter
571 std::string deterministic_id_data; 580 std::string deterministic_id_data;
572 581
573 // For linearization only 582 // For linearization only
  583 + std::string lin_pass1_filename;
574 std::map<int, int> obj_renumber_no_gen; 584 std::map<int, int> obj_renumber_no_gen;
575 std::map<int, int> object_to_object_stream_no_gen; 585 std::map<int, int> object_to_object_stream_no_gen;
576 }; 586 };
libqpdf/QPDFWriter.cc
@@ -357,6 +357,12 @@ QPDFWriter::setLinearization(bool val) @@ -357,6 +357,12 @@ QPDFWriter::setLinearization(bool val)
357 } 357 }
358 358
359 void 359 void
  360 +QPDFWriter::setLinearizationPass1Filename(std::string const& filename)
  361 +{
  362 + this->m->lin_pass1_filename = filename;
  363 +}
  364 +
  365 +void
360 QPDFWriter::setPCLm(bool val) 366 QPDFWriter::setPCLm(bool val)
361 { 367 {
362 this->m->pclm = val; 368 this->m->pclm = val;
@@ -2957,11 +2963,24 @@ QPDFWriter::writeLinearized() @@ -2957,11 +2963,24 @@ QPDFWriter::writeLinearized()
2957 2963
2958 // Write file in two passes. Part numbers refer to PDF spec 1.4. 2964 // Write file in two passes. Part numbers refer to PDF spec 1.4.
2959 2965
  2966 + FILE* lin_pass1_file = 0;
2960 for (int pass = 1; pass <= 2; ++pass) 2967 for (int pass = 1; pass <= 2; ++pass)
2961 { 2968 {
2962 if (pass == 1) 2969 if (pass == 1)
2963 { 2970 {
2964 - pushDiscardFilter(); 2971 + if (! this->m->lin_pass1_filename.empty())
  2972 + {
  2973 + lin_pass1_file =
  2974 + QUtil::safe_fopen(
  2975 + this->m->lin_pass1_filename.c_str(), "wb");
  2976 + pushPipeline(
  2977 + new Pl_StdioFile("linearization pass1", lin_pass1_file));
  2978 + activatePipelineStack();
  2979 + }
  2980 + else
  2981 + {
  2982 + pushDiscardFilter();
  2983 + }
2965 if (this->m->deterministic_id) 2984 if (this->m->deterministic_id)
2966 { 2985 {
2967 pushMD5Pipeline(); 2986 pushMD5Pipeline();
@@ -3201,6 +3220,20 @@ QPDFWriter::writeLinearized() @@ -3201,6 +3220,20 @@ QPDFWriter::writeLinearized()
3201 3220
3202 // Restore hint offset 3221 // Restore hint offset
3203 this->m->xref[hint_id] = QPDFXRefEntry(1, hint_offset, 0); 3222 this->m->xref[hint_id] = QPDFXRefEntry(1, hint_offset, 0);
  3223 + if (lin_pass1_file)
  3224 + {
  3225 + // Write some debugging information
  3226 + fprintf(lin_pass1_file, "%% hint_offset=%s\n",
  3227 + QUtil::int_to_string(hint_offset).c_str());
  3228 + fprintf(lin_pass1_file, "%% hint_length=%s\n",
  3229 + QUtil::int_to_string(hint_length).c_str());
  3230 + fprintf(lin_pass1_file, "%% second_xref_offset=%s\n",
  3231 + QUtil::int_to_string(second_xref_offset).c_str());
  3232 + fprintf(lin_pass1_file, "%% second_xref_end=%s\n",
  3233 + QUtil::int_to_string(second_xref_end).c_str());
  3234 + fclose(lin_pass1_file);
  3235 + lin_pass1_file = 0;
  3236 + }
3204 } 3237 }
3205 } 3238 }
3206 } 3239 }
qpdf/qpdf.cc
@@ -153,6 +153,7 @@ struct Options @@ -153,6 +153,7 @@ struct Options
153 bool qdf_mode; 153 bool qdf_mode;
154 bool preserve_unreferenced_objects; 154 bool preserve_unreferenced_objects;
155 bool newline_before_endstream; 155 bool newline_before_endstream;
  156 + std::string linearize_pass1;
156 std::string min_version; 157 std::string min_version;
157 std::string force_version; 158 std::string force_version;
158 bool show_npages; 159 bool show_npages;
@@ -391,6 +392,8 @@ familiar with the PDF file format or who are PDF developers.\n\ @@ -391,6 +392,8 @@ familiar with the PDF file format or who are PDF developers.\n\
391 --preserve-unreferenced preserve unreferenced objects\n\ 392 --preserve-unreferenced preserve unreferenced objects\n\
392 --newline-before-endstream always put a newline before endstream\n\ 393 --newline-before-endstream always put a newline before endstream\n\
393 --qdf turns on \"QDF mode\" (below)\n\ 394 --qdf turns on \"QDF mode\" (below)\n\
  395 +--linearize-pass1=file write intermediate pass of linearized file\n\
  396 + for debugging\n\
394 --min-version=version sets the minimum PDF version of the output file\n\ 397 --min-version=version sets the minimum PDF version of the output file\n\
395 --force-version=version forces this to be the PDF version of the output file\n\ 398 --force-version=version forces this to be the PDF version of the output file\n\
396 \n\ 399 \n\
@@ -1531,6 +1534,15 @@ static void parse_options(int argc, char* argv[], Options&amp; o) @@ -1531,6 +1534,15 @@ static void parse_options(int argc, char* argv[], Options&amp; o)
1531 { 1534 {
1532 o.newline_before_endstream = true; 1535 o.newline_before_endstream = true;
1533 } 1536 }
  1537 + else if (strcmp(arg, "linearize-pass1") == 0)
  1538 + {
  1539 + if (parameter == 0)
  1540 + {
  1541 + usage("--linearize-pass1 be given as"
  1542 + "--linearize-pass1=filename");
  1543 + }
  1544 + o.linearize_pass1 = parameter;
  1545 + }
1534 else if (strcmp(arg, "min-version") == 0) 1546 else if (strcmp(arg, "min-version") == 0)
1535 { 1547 {
1536 if (parameter == 0) 1548 if (parameter == 0)
@@ -2214,6 +2226,10 @@ static void set_writer_options(QPDF&amp; pdf, Options&amp; o, QPDFWriter&amp; w) @@ -2214,6 +2226,10 @@ static void set_writer_options(QPDF&amp; pdf, Options&amp; o, QPDFWriter&amp; w)
2214 { 2226 {
2215 w.setLinearization(true); 2227 w.setLinearization(true);
2216 } 2228 }
  2229 + if (! o.linearize_pass1.empty())
  2230 + {
  2231 + w.setLinearizationPass1Filename(o.linearize_pass1);
  2232 + }
2217 if (o.object_stream_set) 2233 if (o.object_stream_set)
2218 { 2234 {
2219 w.setObjectStreamMode(o.object_stream_mode); 2235 w.setObjectStreamMode(o.object_stream_mode);
qpdf/qtest/qpdf.test
@@ -240,7 +240,7 @@ foreach my $d (@bug_tests) @@ -240,7 +240,7 @@ foreach my $d (@bug_tests)
240 show_ntests(); 240 show_ntests();
241 # ---------- 241 # ----------
242 $td->notify("--- Miscellaneous Tests ---"); 242 $td->notify("--- Miscellaneous Tests ---");
243 -$n_tests += 93; 243 +$n_tests += 96;
244 244
245 $td->runtest("qpdf version", 245 $td->runtest("qpdf version",
246 {$td->COMMAND => "qpdf --version"}, 246 {$td->COMMAND => "qpdf --version"},
@@ -252,6 +252,17 @@ $td-&gt;runtest(&quot;C API: qpdf version&quot;, @@ -252,6 +252,17 @@ $td-&gt;runtest(&quot;C API: qpdf version&quot;,
252 $td->EXIT_STATUS => 0}, 252 $td->EXIT_STATUS => 0},
253 $td->NORMALIZE_NEWLINES); 253 $td->NORMALIZE_NEWLINES);
254 254
  255 +$td->runtest("linearize pass 1 file",
  256 + {$td->COMMAND => "qpdf --linearize --static-id" .
  257 + " --linearize-pass1=b.pdf minimal.pdf a.pdf"},
  258 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  259 +$td->runtest("check output",
  260 + {$td->FILE => "a.pdf"},
  261 + {$td->FILE => "minimal-linearized.pdf"});
  262 +$td->runtest("check pass1 file",
  263 + {$td->FILE => "b.pdf"},
  264 + {$td->FILE => "minimal-linearize-pass1.pdf"});
  265 +
255 foreach (my $i = 1; $i <= 3; ++$i) 266 foreach (my $i = 1; $i <= 3; ++$i)
256 { 267 {
257 $td->runtest("misc tests", 268 $td->runtest("misc tests",
qpdf/qtest/qpdf/minimal-linearize-pass1.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/minimal-linearized.pdf 0 → 100644
No preview for this file type