Commit bedf35d6a55a9092485d56002b50bc9003ce7931

Authored by Jay Berkenbilt
1 parent a139d2b3

Bug fix: avoid extraneous pipeline finish calls with multiple contents

Avoid calling finish() multiple times on the pipeline passed to
pipeContentStreams. This commit also fixes a bug in which qpdf was not
exiting with the proper exit status if warnings found while splitting
pages; this was exposed by a test case that changed.
@@ -16,23 +16,6 @@ Candidates for upcoming release @@ -16,23 +16,6 @@ Candidates for upcoming release
16 definition of an object without breaking compatibility 16 definition of an object without breaking compatibility
17 * See if the objects part can be sorted by object number 17 * See if the objects part can be sorted by object number
18 18
19 -* QPDFObjectHandle::pipeContentStreams calls finish() after each  
20 - stream. In some code paths, Pl_Concatenate is used, which suppresses  
21 - that, but in other code paths, it's not used, and the library relies  
22 - on the behavior of finish() being called. Then there's the issue of  
23 - nested Pl_Concatenate pipelines -- calling manualFinish() on the top  
24 - one doesn't call manualFinish() on the lower ones, and there are no  
25 - exposed methods that allow us to apply things down the pipeline  
26 - stack, so it's hard to fix this without changing the API (at least  
27 - making Pipeline::getNext() public, which may be undesirable). To see  
28 - this problem in action, stick a Pl_Concatenate in front of the  
29 - pipeline in pipeContentStreams and observe the test failure. One  
30 - solution might be to add an additional argument indicating whether  
31 - or not to delay calling finish() until the end. See comments on  
32 - QPDFPageObjectHelper::filterPageContents,  
33 - QPDFObjectHandle::filterPageContents, and  
34 - QPDFObjectHandle::pipeContentStreams  
35 -  
36 * Remember to check work `qpdf` project for private issues 19 * Remember to check work `qpdf` project for private issues
37 * file with very slow page extraction 20 * file with very slow page extraction
38 * big page even with --remove-unreferenced-resources=yes, even with --empty 21 * big page even with --remove-unreferenced-resources=yes, even with --empty
include/qpdf/QPDFPageObjectHelper.hh
@@ -199,12 +199,7 @@ class QPDFPageObjectHelper: public QPDFObjectHelper @@ -199,12 +199,7 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
199 199
200 // Pipe a page's contents through the given pipeline. This method 200 // Pipe a page's contents through the given pipeline. This method
201 // works whether the contents are a single stream or an array of 201 // works whether the contents are a single stream or an array of
202 - // streams. Call on a page object. Please note that if there is an  
203 - // array of content streams, p->finish() is called after each  
204 - // stream. If you pass a pipeline that doesn't allow write() to be  
205 - // called after finish(), you can wrap it in an instance of  
206 - // Pl_Concatenate and then call manualFinish() on the  
207 - // Pl_Concatenate pipeline at the end. 202 + // streams.
208 QPDF_DLL 203 QPDF_DLL
209 void pipePageContents(Pipeline* p); 204 void pipePageContents(Pipeline* p);
210 205
libqpdf/QPDFObjectHandle.cc
@@ -14,7 +14,6 @@ @@ -14,7 +14,6 @@
14 #include <qpdf/QPDF_Stream.hh> 14 #include <qpdf/QPDF_Stream.hh>
15 #include <qpdf/QPDF_Reserved.hh> 15 #include <qpdf/QPDF_Reserved.hh>
16 #include <qpdf/Pl_Buffer.hh> 16 #include <qpdf/Pl_Buffer.hh>
17 -#include <qpdf/Pl_Concatenate.hh>  
18 #include <qpdf/Pl_QPDFTokenizer.hh> 17 #include <qpdf/Pl_QPDFTokenizer.hh>
19 #include <qpdf/BufferInputSource.hh> 18 #include <qpdf/BufferInputSource.hh>
20 #include <qpdf/QPDFExc.hh> 19 #include <qpdf/QPDFExc.hh>
@@ -90,13 +89,11 @@ void @@ -90,13 +89,11 @@ void
90 CoalesceProvider::provideStreamData(int, int, Pipeline* p) 89 CoalesceProvider::provideStreamData(int, int, Pipeline* p)
91 { 90 {
92 QTC::TC("qpdf", "QPDFObjectHandle coalesce provide stream data"); 91 QTC::TC("qpdf", "QPDFObjectHandle coalesce provide stream data");
93 - Pl_Concatenate concat("concatenate", p);  
94 std::string description = "page object " + 92 std::string description = "page object " +
95 QUtil::int_to_string(containing_page.getObjectID()) + " " + 93 QUtil::int_to_string(containing_page.getObjectID()) + " " +
96 QUtil::int_to_string(containing_page.getGeneration()); 94 QUtil::int_to_string(containing_page.getGeneration());
97 std::string all_description; 95 std::string all_description;
98 - old_contents.pipeContentStreams(&concat, description, all_description);  
99 - concat.manualFinish(); 96 + old_contents.pipeContentStreams(p, description, all_description);
100 } 97 }
101 98
102 void 99 void
@@ -1630,14 +1627,15 @@ QPDFObjectHandle::pipeContentStreams( @@ -1630,14 +1627,15 @@ QPDFObjectHandle::pipeContentStreams(
1630 arrayOrStreamToStreamArray( 1627 arrayOrStreamToStreamArray(
1631 description, all_description); 1628 description, all_description);
1632 bool need_newline = false; 1629 bool need_newline = false;
  1630 + Pl_Buffer buf("concatenated content stream buffer");
1633 for (std::vector<QPDFObjectHandle>::iterator iter = streams.begin(); 1631 for (std::vector<QPDFObjectHandle>::iterator iter = streams.begin();
1634 iter != streams.end(); ++iter) 1632 iter != streams.end(); ++iter)
1635 { 1633 {
1636 if (need_newline) 1634 if (need_newline)
1637 { 1635 {
1638 - p->write(QUtil::unsigned_char_pointer("\n"), 1); 1636 + buf.write(QUtil::unsigned_char_pointer("\n"), 1);
1639 } 1637 }
1640 - LastChar lc(p); 1638 + LastChar lc(&buf);
1641 QPDFObjectHandle stream = *iter; 1639 QPDFObjectHandle stream = *iter;
1642 std::string og = 1640 std::string og =
1643 QUtil::int_to_string(stream.getObjectID()) + " " + 1641 QUtil::int_to_string(stream.getObjectID()) + " " +
@@ -1655,6 +1653,9 @@ QPDFObjectHandle::pipeContentStreams( @@ -1655,6 +1653,9 @@ QPDFObjectHandle::pipeContentStreams(
1655 QTC::TC("qpdf", "QPDFObjectHandle need_newline", 1653 QTC::TC("qpdf", "QPDFObjectHandle need_newline",
1656 need_newline ? 0 : 1); 1654 need_newline ? 0 : 1);
1657 } 1655 }
  1656 + std::unique_ptr<Buffer> b(buf.getBuffer());
  1657 + p->write(b->getBuffer(), b->getSize());
  1658 + p->finish();
1658 } 1659 }
1659 1660
1660 void 1661 void
qpdf/qpdf.cc
@@ -5569,7 +5569,7 @@ static void set_writer_options(QPDF&amp; pdf, Options&amp; o, QPDFWriter&amp; w) @@ -5569,7 +5569,7 @@ static void set_writer_options(QPDF&amp; pdf, Options&amp; o, QPDFWriter&amp; w)
5569 } 5569 }
5570 } 5570 }
5571 5571
5572 -static void do_split_pages(QPDF& pdf, Options& o) 5572 +static void do_split_pages(QPDF& pdf, Options& o, bool& warnings)
5573 { 5573 {
5574 // Generate output file pattern 5574 // Generate output file pattern
5575 std::string before; 5575 std::string before;
@@ -5653,6 +5653,10 @@ static void do_split_pages(QPDF&amp; pdf, Options&amp; o) @@ -5653,6 +5653,10 @@ static void do_split_pages(QPDF&amp; pdf, Options&amp; o)
5653 { 5653 {
5654 std::cout << whoami << ": wrote file " << outfile << std::endl; 5654 std::cout << whoami << ": wrote file " << outfile << std::endl;
5655 } 5655 }
  5656 + if (outpdf.anyWarnings())
  5657 + {
  5658 + warnings = true;
  5659 + }
5656 } 5660 }
5657 } 5661 }
5658 5662
@@ -5794,6 +5798,7 @@ int realmain(int argc, char* argv[]) @@ -5794,6 +5798,7 @@ int realmain(int argc, char* argv[])
5794 } 5798 }
5795 handle_under_overlay(pdf, o); 5799 handle_under_overlay(pdf, o);
5796 handle_transformations(pdf, o); 5800 handle_transformations(pdf, o);
  5801 + bool split_warnings = false;
5797 5802
5798 if ((o.outfilename == 0) && (! o.replace_input)) 5803 if ((o.outfilename == 0) && (! o.replace_input))
5799 { 5804 {
@@ -5801,13 +5806,13 @@ int realmain(int argc, char* argv[]) @@ -5801,13 +5806,13 @@ int realmain(int argc, char* argv[])
5801 } 5806 }
5802 else if (o.split_pages) 5807 else if (o.split_pages)
5803 { 5808 {
5804 - do_split_pages(pdf, o); 5809 + do_split_pages(pdf, o, split_warnings);
5805 } 5810 }
5806 else 5811 else
5807 { 5812 {
5808 write_outfile(pdf, o); 5813 write_outfile(pdf, o);
5809 } 5814 }
5810 - if (! pdf.getWarnings().empty()) 5815 + if ((! pdf.getWarnings().empty()) || split_warnings)
5811 { 5816 {
5812 if (! o.suppress_warnings) 5817 if (! o.suppress_warnings)
5813 { 5818 {
qpdf/qtest/qpdf/normalize-warnings.out
@@ -6,4 +6,7 @@ WARNING: split-tokens.pdf (offset 823): Resulting stream data may be corrupted b @@ -6,4 +6,7 @@ WARNING: split-tokens.pdf (offset 823): Resulting stream data may be corrupted b
6 WARNING: split-tokens.pdf (offset 962): content normalization encountered bad tokens 6 WARNING: split-tokens.pdf (offset 962): content normalization encountered bad tokens
7 WARNING: split-tokens.pdf (offset 962): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents 7 WARNING: split-tokens.pdf (offset 962): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents
8 WARNING: split-tokens.pdf (offset 962): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. 8 WARNING: split-tokens.pdf (offset 962): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
  9 +WARNING: split-tokens.pdf (offset 1338): content normalization encountered bad tokens
  10 +WARNING: split-tokens.pdf (offset 1338): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents
  11 +WARNING: split-tokens.pdf (offset 1338): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
9 qpdf: operation succeeded with warnings; resulting file may have some problems 12 qpdf: operation succeeded with warnings; resulting file may have some problems
qpdf/qtest/qpdf/split-tokens-split-1-2.pdf
No preview for this file type
qpdf/qtest/qpdf/split-tokens-split.out
@@ -7,4 +7,7 @@ WARNING: empty PDF: Resulting stream data may be corrupted but is may still usef @@ -7,4 +7,7 @@ WARNING: empty PDF: Resulting stream data may be corrupted but is may still usef
7 WARNING: empty PDF: content normalization encountered bad tokens 7 WARNING: empty PDF: content normalization encountered bad tokens
8 WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents 8 WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents
9 WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. 9 WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
  10 +WARNING: empty PDF: content normalization encountered bad tokens
  11 +WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents
  12 +WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
10 qpdf: operation succeeded with warnings; resulting file may have some problems 13 qpdf: operation succeeded with warnings; resulting file may have some problems
qpdf/qtest/qpdf/split-tokens.pdf
No preview for this file type
qpdf/qtest/qpdf/split-tokens.qdf
No preview for this file type