Commit bedf35d6a55a9092485d56002b50bc9003ce7931
1 parent
a139d2b3
Bug fix: avoid extraneous pipeline finish calls with multiple contents
Avoid calling finish() multiple times on the pipeline passed to pipeContentStreams. This commit also fixes a bug in which qpdf was not exiting with the proper exit status if warnings found while splitting pages; this was exposed by a test case that changed.
Showing
9 changed files
with
52 additions
and
59 deletions
TODO
| ... | ... | @@ -16,23 +16,6 @@ Candidates for upcoming release |
| 16 | 16 | definition of an object without breaking compatibility |
| 17 | 17 | * See if the objects part can be sorted by object number |
| 18 | 18 | |
| 19 | -* QPDFObjectHandle::pipeContentStreams calls finish() after each | |
| 20 | - stream. In some code paths, Pl_Concatenate is used, which suppresses | |
| 21 | - that, but in other code paths, it's not used, and the library relies | |
| 22 | - on the behavior of finish() being called. Then there's the issue of | |
| 23 | - nested Pl_Concatenate pipelines -- calling manualFinish() on the top | |
| 24 | - one doesn't call manualFinish() on the lower ones, and there are no | |
| 25 | - exposed methods that allow us to apply things down the pipeline | |
| 26 | - stack, so it's hard to fix this without changing the API (at least | |
| 27 | - making Pipeline::getNext() public, which may be undesirable). To see | |
| 28 | - this problem in action, stick a Pl_Concatenate in front of the | |
| 29 | - pipeline in pipeContentStreams and observe the test failure. One | |
| 30 | - solution might be to add an additional argument indicating whether | |
| 31 | - or not to delay calling finish() until the end. See comments on | |
| 32 | - QPDFPageObjectHelper::filterPageContents, | |
| 33 | - QPDFObjectHandle::filterPageContents, and | |
| 34 | - QPDFObjectHandle::pipeContentStreams | |
| 35 | - | |
| 36 | 19 | * Remember to check work `qpdf` project for private issues |
| 37 | 20 | * file with very slow page extraction |
| 38 | 21 | * big page even with --remove-unreferenced-resources=yes, even with --empty | ... | ... |
include/qpdf/QPDFPageObjectHelper.hh
| ... | ... | @@ -199,12 +199,7 @@ class QPDFPageObjectHelper: public QPDFObjectHelper |
| 199 | 199 | |
| 200 | 200 | // Pipe a page's contents through the given pipeline. This method |
| 201 | 201 | // works whether the contents are a single stream or an array of |
| 202 | - // streams. Call on a page object. Please note that if there is an | |
| 203 | - // array of content streams, p->finish() is called after each | |
| 204 | - // stream. If you pass a pipeline that doesn't allow write() to be | |
| 205 | - // called after finish(), you can wrap it in an instance of | |
| 206 | - // Pl_Concatenate and then call manualFinish() on the | |
| 207 | - // Pl_Concatenate pipeline at the end. | |
| 202 | + // streams. | |
| 208 | 203 | QPDF_DLL |
| 209 | 204 | void pipePageContents(Pipeline* p); |
| 210 | 205 | ... | ... |
libqpdf/QPDFObjectHandle.cc
| ... | ... | @@ -14,7 +14,6 @@ |
| 14 | 14 | #include <qpdf/QPDF_Stream.hh> |
| 15 | 15 | #include <qpdf/QPDF_Reserved.hh> |
| 16 | 16 | #include <qpdf/Pl_Buffer.hh> |
| 17 | -#include <qpdf/Pl_Concatenate.hh> | |
| 18 | 17 | #include <qpdf/Pl_QPDFTokenizer.hh> |
| 19 | 18 | #include <qpdf/BufferInputSource.hh> |
| 20 | 19 | #include <qpdf/QPDFExc.hh> |
| ... | ... | @@ -90,13 +89,11 @@ void |
| 90 | 89 | CoalesceProvider::provideStreamData(int, int, Pipeline* p) |
| 91 | 90 | { |
| 92 | 91 | QTC::TC("qpdf", "QPDFObjectHandle coalesce provide stream data"); |
| 93 | - Pl_Concatenate concat("concatenate", p); | |
| 94 | 92 | std::string description = "page object " + |
| 95 | 93 | QUtil::int_to_string(containing_page.getObjectID()) + " " + |
| 96 | 94 | QUtil::int_to_string(containing_page.getGeneration()); |
| 97 | 95 | std::string all_description; |
| 98 | - old_contents.pipeContentStreams(&concat, description, all_description); | |
| 99 | - concat.manualFinish(); | |
| 96 | + old_contents.pipeContentStreams(p, description, all_description); | |
| 100 | 97 | } |
| 101 | 98 | |
| 102 | 99 | void |
| ... | ... | @@ -1630,14 +1627,15 @@ QPDFObjectHandle::pipeContentStreams( |
| 1630 | 1627 | arrayOrStreamToStreamArray( |
| 1631 | 1628 | description, all_description); |
| 1632 | 1629 | bool need_newline = false; |
| 1630 | + Pl_Buffer buf("concatenated content stream buffer"); | |
| 1633 | 1631 | for (std::vector<QPDFObjectHandle>::iterator iter = streams.begin(); |
| 1634 | 1632 | iter != streams.end(); ++iter) |
| 1635 | 1633 | { |
| 1636 | 1634 | if (need_newline) |
| 1637 | 1635 | { |
| 1638 | - p->write(QUtil::unsigned_char_pointer("\n"), 1); | |
| 1636 | + buf.write(QUtil::unsigned_char_pointer("\n"), 1); | |
| 1639 | 1637 | } |
| 1640 | - LastChar lc(p); | |
| 1638 | + LastChar lc(&buf); | |
| 1641 | 1639 | QPDFObjectHandle stream = *iter; |
| 1642 | 1640 | std::string og = |
| 1643 | 1641 | QUtil::int_to_string(stream.getObjectID()) + " " + |
| ... | ... | @@ -1655,6 +1653,9 @@ QPDFObjectHandle::pipeContentStreams( |
| 1655 | 1653 | QTC::TC("qpdf", "QPDFObjectHandle need_newline", |
| 1656 | 1654 | need_newline ? 0 : 1); |
| 1657 | 1655 | } |
| 1656 | + std::unique_ptr<Buffer> b(buf.getBuffer()); | |
| 1657 | + p->write(b->getBuffer(), b->getSize()); | |
| 1658 | + p->finish(); | |
| 1658 | 1659 | } |
| 1659 | 1660 | |
| 1660 | 1661 | void | ... | ... |
qpdf/qpdf.cc
| ... | ... | @@ -5569,7 +5569,7 @@ static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w) |
| 5569 | 5569 | } |
| 5570 | 5570 | } |
| 5571 | 5571 | |
| 5572 | -static void do_split_pages(QPDF& pdf, Options& o) | |
| 5572 | +static void do_split_pages(QPDF& pdf, Options& o, bool& warnings) | |
| 5573 | 5573 | { |
| 5574 | 5574 | // Generate output file pattern |
| 5575 | 5575 | std::string before; |
| ... | ... | @@ -5653,6 +5653,10 @@ static void do_split_pages(QPDF& pdf, Options& o) |
| 5653 | 5653 | { |
| 5654 | 5654 | std::cout << whoami << ": wrote file " << outfile << std::endl; |
| 5655 | 5655 | } |
| 5656 | + if (outpdf.anyWarnings()) | |
| 5657 | + { | |
| 5658 | + warnings = true; | |
| 5659 | + } | |
| 5656 | 5660 | } |
| 5657 | 5661 | } |
| 5658 | 5662 | |
| ... | ... | @@ -5794,6 +5798,7 @@ int realmain(int argc, char* argv[]) |
| 5794 | 5798 | } |
| 5795 | 5799 | handle_under_overlay(pdf, o); |
| 5796 | 5800 | handle_transformations(pdf, o); |
| 5801 | + bool split_warnings = false; | |
| 5797 | 5802 | |
| 5798 | 5803 | if ((o.outfilename == 0) && (! o.replace_input)) |
| 5799 | 5804 | { |
| ... | ... | @@ -5801,13 +5806,13 @@ int realmain(int argc, char* argv[]) |
| 5801 | 5806 | } |
| 5802 | 5807 | else if (o.split_pages) |
| 5803 | 5808 | { |
| 5804 | - do_split_pages(pdf, o); | |
| 5809 | + do_split_pages(pdf, o, split_warnings); | |
| 5805 | 5810 | } |
| 5806 | 5811 | else |
| 5807 | 5812 | { |
| 5808 | 5813 | write_outfile(pdf, o); |
| 5809 | 5814 | } |
| 5810 | - if (! pdf.getWarnings().empty()) | |
| 5815 | + if ((! pdf.getWarnings().empty()) || split_warnings) | |
| 5811 | 5816 | { |
| 5812 | 5817 | if (! o.suppress_warnings) |
| 5813 | 5818 | { | ... | ... |
qpdf/qtest/qpdf/normalize-warnings.out
| ... | ... | @@ -6,4 +6,7 @@ WARNING: split-tokens.pdf (offset 823): Resulting stream data may be corrupted b |
| 6 | 6 | WARNING: split-tokens.pdf (offset 962): content normalization encountered bad tokens |
| 7 | 7 | WARNING: split-tokens.pdf (offset 962): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents |
| 8 | 8 | WARNING: split-tokens.pdf (offset 962): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. |
| 9 | +WARNING: split-tokens.pdf (offset 1338): content normalization encountered bad tokens | |
| 10 | +WARNING: split-tokens.pdf (offset 1338): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents | |
| 11 | +WARNING: split-tokens.pdf (offset 1338): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. | |
| 9 | 12 | qpdf: operation succeeded with warnings; resulting file may have some problems | ... | ... |
qpdf/qtest/qpdf/split-tokens-split-1-2.pdf
No preview for this file type
qpdf/qtest/qpdf/split-tokens-split.out
| ... | ... | @@ -7,4 +7,7 @@ WARNING: empty PDF: Resulting stream data may be corrupted but is may still usef |
| 7 | 7 | WARNING: empty PDF: content normalization encountered bad tokens |
| 8 | 8 | WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents |
| 9 | 9 | WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. |
| 10 | +WARNING: empty PDF: content normalization encountered bad tokens | |
| 11 | +WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents | |
| 12 | +WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. | |
| 10 | 13 | qpdf: operation succeeded with warnings; resulting file may have some problems | ... | ... |
qpdf/qtest/qpdf/split-tokens.pdf
No preview for this file type
qpdf/qtest/qpdf/split-tokens.qdf
No preview for this file type