Commit bedf35d6a55a9092485d56002b50bc9003ce7931
1 parent
a139d2b3
Bug fix: avoid extraneous pipeline finish calls with multiple contents
Avoid calling finish() multiple times on the pipeline passed to pipeContentStreams. This commit also fixes a bug in which qpdf was not exiting with the proper exit status if warnings found while splitting pages; this was exposed by a test case that changed.
Showing
9 changed files
with
52 additions
and
59 deletions
TODO
| @@ -16,23 +16,6 @@ Candidates for upcoming release | @@ -16,23 +16,6 @@ Candidates for upcoming release | ||
| 16 | definition of an object without breaking compatibility | 16 | definition of an object without breaking compatibility |
| 17 | * See if the objects part can be sorted by object number | 17 | * See if the objects part can be sorted by object number |
| 18 | 18 | ||
| 19 | -* QPDFObjectHandle::pipeContentStreams calls finish() after each | ||
| 20 | - stream. In some code paths, Pl_Concatenate is used, which suppresses | ||
| 21 | - that, but in other code paths, it's not used, and the library relies | ||
| 22 | - on the behavior of finish() being called. Then there's the issue of | ||
| 23 | - nested Pl_Concatenate pipelines -- calling manualFinish() on the top | ||
| 24 | - one doesn't call manualFinish() on the lower ones, and there are no | ||
| 25 | - exposed methods that allow us to apply things down the pipeline | ||
| 26 | - stack, so it's hard to fix this without changing the API (at least | ||
| 27 | - making Pipeline::getNext() public, which may be undesirable). To see | ||
| 28 | - this problem in action, stick a Pl_Concatenate in front of the | ||
| 29 | - pipeline in pipeContentStreams and observe the test failure. One | ||
| 30 | - solution might be to add an additional argument indicating whether | ||
| 31 | - or not to delay calling finish() until the end. See comments on | ||
| 32 | - QPDFPageObjectHelper::filterPageContents, | ||
| 33 | - QPDFObjectHandle::filterPageContents, and | ||
| 34 | - QPDFObjectHandle::pipeContentStreams | ||
| 35 | - | ||
| 36 | * Remember to check work `qpdf` project for private issues | 19 | * Remember to check work `qpdf` project for private issues |
| 37 | * file with very slow page extraction | 20 | * file with very slow page extraction |
| 38 | * big page even with --remove-unreferenced-resources=yes, even with --empty | 21 | * big page even with --remove-unreferenced-resources=yes, even with --empty |
include/qpdf/QPDFPageObjectHelper.hh
| @@ -199,12 +199,7 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | @@ -199,12 +199,7 @@ class QPDFPageObjectHelper: public QPDFObjectHelper | ||
| 199 | 199 | ||
| 200 | // Pipe a page's contents through the given pipeline. This method | 200 | // Pipe a page's contents through the given pipeline. This method |
| 201 | // works whether the contents are a single stream or an array of | 201 | // works whether the contents are a single stream or an array of |
| 202 | - // streams. Call on a page object. Please note that if there is an | ||
| 203 | - // array of content streams, p->finish() is called after each | ||
| 204 | - // stream. If you pass a pipeline that doesn't allow write() to be | ||
| 205 | - // called after finish(), you can wrap it in an instance of | ||
| 206 | - // Pl_Concatenate and then call manualFinish() on the | ||
| 207 | - // Pl_Concatenate pipeline at the end. | 202 | + // streams. |
| 208 | QPDF_DLL | 203 | QPDF_DLL |
| 209 | void pipePageContents(Pipeline* p); | 204 | void pipePageContents(Pipeline* p); |
| 210 | 205 |
libqpdf/QPDFObjectHandle.cc
| @@ -14,7 +14,6 @@ | @@ -14,7 +14,6 @@ | ||
| 14 | #include <qpdf/QPDF_Stream.hh> | 14 | #include <qpdf/QPDF_Stream.hh> |
| 15 | #include <qpdf/QPDF_Reserved.hh> | 15 | #include <qpdf/QPDF_Reserved.hh> |
| 16 | #include <qpdf/Pl_Buffer.hh> | 16 | #include <qpdf/Pl_Buffer.hh> |
| 17 | -#include <qpdf/Pl_Concatenate.hh> | ||
| 18 | #include <qpdf/Pl_QPDFTokenizer.hh> | 17 | #include <qpdf/Pl_QPDFTokenizer.hh> |
| 19 | #include <qpdf/BufferInputSource.hh> | 18 | #include <qpdf/BufferInputSource.hh> |
| 20 | #include <qpdf/QPDFExc.hh> | 19 | #include <qpdf/QPDFExc.hh> |
| @@ -90,13 +89,11 @@ void | @@ -90,13 +89,11 @@ void | ||
| 90 | CoalesceProvider::provideStreamData(int, int, Pipeline* p) | 89 | CoalesceProvider::provideStreamData(int, int, Pipeline* p) |
| 91 | { | 90 | { |
| 92 | QTC::TC("qpdf", "QPDFObjectHandle coalesce provide stream data"); | 91 | QTC::TC("qpdf", "QPDFObjectHandle coalesce provide stream data"); |
| 93 | - Pl_Concatenate concat("concatenate", p); | ||
| 94 | std::string description = "page object " + | 92 | std::string description = "page object " + |
| 95 | QUtil::int_to_string(containing_page.getObjectID()) + " " + | 93 | QUtil::int_to_string(containing_page.getObjectID()) + " " + |
| 96 | QUtil::int_to_string(containing_page.getGeneration()); | 94 | QUtil::int_to_string(containing_page.getGeneration()); |
| 97 | std::string all_description; | 95 | std::string all_description; |
| 98 | - old_contents.pipeContentStreams(&concat, description, all_description); | ||
| 99 | - concat.manualFinish(); | 96 | + old_contents.pipeContentStreams(p, description, all_description); |
| 100 | } | 97 | } |
| 101 | 98 | ||
| 102 | void | 99 | void |
| @@ -1630,14 +1627,15 @@ QPDFObjectHandle::pipeContentStreams( | @@ -1630,14 +1627,15 @@ QPDFObjectHandle::pipeContentStreams( | ||
| 1630 | arrayOrStreamToStreamArray( | 1627 | arrayOrStreamToStreamArray( |
| 1631 | description, all_description); | 1628 | description, all_description); |
| 1632 | bool need_newline = false; | 1629 | bool need_newline = false; |
| 1630 | + Pl_Buffer buf("concatenated content stream buffer"); | ||
| 1633 | for (std::vector<QPDFObjectHandle>::iterator iter = streams.begin(); | 1631 | for (std::vector<QPDFObjectHandle>::iterator iter = streams.begin(); |
| 1634 | iter != streams.end(); ++iter) | 1632 | iter != streams.end(); ++iter) |
| 1635 | { | 1633 | { |
| 1636 | if (need_newline) | 1634 | if (need_newline) |
| 1637 | { | 1635 | { |
| 1638 | - p->write(QUtil::unsigned_char_pointer("\n"), 1); | 1636 | + buf.write(QUtil::unsigned_char_pointer("\n"), 1); |
| 1639 | } | 1637 | } |
| 1640 | - LastChar lc(p); | 1638 | + LastChar lc(&buf); |
| 1641 | QPDFObjectHandle stream = *iter; | 1639 | QPDFObjectHandle stream = *iter; |
| 1642 | std::string og = | 1640 | std::string og = |
| 1643 | QUtil::int_to_string(stream.getObjectID()) + " " + | 1641 | QUtil::int_to_string(stream.getObjectID()) + " " + |
| @@ -1655,6 +1653,9 @@ QPDFObjectHandle::pipeContentStreams( | @@ -1655,6 +1653,9 @@ QPDFObjectHandle::pipeContentStreams( | ||
| 1655 | QTC::TC("qpdf", "QPDFObjectHandle need_newline", | 1653 | QTC::TC("qpdf", "QPDFObjectHandle need_newline", |
| 1656 | need_newline ? 0 : 1); | 1654 | need_newline ? 0 : 1); |
| 1657 | } | 1655 | } |
| 1656 | + std::unique_ptr<Buffer> b(buf.getBuffer()); | ||
| 1657 | + p->write(b->getBuffer(), b->getSize()); | ||
| 1658 | + p->finish(); | ||
| 1658 | } | 1659 | } |
| 1659 | 1660 | ||
| 1660 | void | 1661 | void |
qpdf/qpdf.cc
| @@ -5569,7 +5569,7 @@ static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w) | @@ -5569,7 +5569,7 @@ static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w) | ||
| 5569 | } | 5569 | } |
| 5570 | } | 5570 | } |
| 5571 | 5571 | ||
| 5572 | -static void do_split_pages(QPDF& pdf, Options& o) | 5572 | +static void do_split_pages(QPDF& pdf, Options& o, bool& warnings) |
| 5573 | { | 5573 | { |
| 5574 | // Generate output file pattern | 5574 | // Generate output file pattern |
| 5575 | std::string before; | 5575 | std::string before; |
| @@ -5653,6 +5653,10 @@ static void do_split_pages(QPDF& pdf, Options& o) | @@ -5653,6 +5653,10 @@ static void do_split_pages(QPDF& pdf, Options& o) | ||
| 5653 | { | 5653 | { |
| 5654 | std::cout << whoami << ": wrote file " << outfile << std::endl; | 5654 | std::cout << whoami << ": wrote file " << outfile << std::endl; |
| 5655 | } | 5655 | } |
| 5656 | + if (outpdf.anyWarnings()) | ||
| 5657 | + { | ||
| 5658 | + warnings = true; | ||
| 5659 | + } | ||
| 5656 | } | 5660 | } |
| 5657 | } | 5661 | } |
| 5658 | 5662 | ||
| @@ -5794,6 +5798,7 @@ int realmain(int argc, char* argv[]) | @@ -5794,6 +5798,7 @@ int realmain(int argc, char* argv[]) | ||
| 5794 | } | 5798 | } |
| 5795 | handle_under_overlay(pdf, o); | 5799 | handle_under_overlay(pdf, o); |
| 5796 | handle_transformations(pdf, o); | 5800 | handle_transformations(pdf, o); |
| 5801 | + bool split_warnings = false; | ||
| 5797 | 5802 | ||
| 5798 | if ((o.outfilename == 0) && (! o.replace_input)) | 5803 | if ((o.outfilename == 0) && (! o.replace_input)) |
| 5799 | { | 5804 | { |
| @@ -5801,13 +5806,13 @@ int realmain(int argc, char* argv[]) | @@ -5801,13 +5806,13 @@ int realmain(int argc, char* argv[]) | ||
| 5801 | } | 5806 | } |
| 5802 | else if (o.split_pages) | 5807 | else if (o.split_pages) |
| 5803 | { | 5808 | { |
| 5804 | - do_split_pages(pdf, o); | 5809 | + do_split_pages(pdf, o, split_warnings); |
| 5805 | } | 5810 | } |
| 5806 | else | 5811 | else |
| 5807 | { | 5812 | { |
| 5808 | write_outfile(pdf, o); | 5813 | write_outfile(pdf, o); |
| 5809 | } | 5814 | } |
| 5810 | - if (! pdf.getWarnings().empty()) | 5815 | + if ((! pdf.getWarnings().empty()) || split_warnings) |
| 5811 | { | 5816 | { |
| 5812 | if (! o.suppress_warnings) | 5817 | if (! o.suppress_warnings) |
| 5813 | { | 5818 | { |
qpdf/qtest/qpdf/normalize-warnings.out
| @@ -6,4 +6,7 @@ WARNING: split-tokens.pdf (offset 823): Resulting stream data may be corrupted b | @@ -6,4 +6,7 @@ WARNING: split-tokens.pdf (offset 823): Resulting stream data may be corrupted b | ||
| 6 | WARNING: split-tokens.pdf (offset 962): content normalization encountered bad tokens | 6 | WARNING: split-tokens.pdf (offset 962): content normalization encountered bad tokens |
| 7 | WARNING: split-tokens.pdf (offset 962): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents | 7 | WARNING: split-tokens.pdf (offset 962): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents |
| 8 | WARNING: split-tokens.pdf (offset 962): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. | 8 | WARNING: split-tokens.pdf (offset 962): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. |
| 9 | +WARNING: split-tokens.pdf (offset 1338): content normalization encountered bad tokens | ||
| 10 | +WARNING: split-tokens.pdf (offset 1338): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents | ||
| 11 | +WARNING: split-tokens.pdf (offset 1338): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. | ||
| 9 | qpdf: operation succeeded with warnings; resulting file may have some problems | 12 | qpdf: operation succeeded with warnings; resulting file may have some problems |
qpdf/qtest/qpdf/split-tokens-split-1-2.pdf
No preview for this file type
qpdf/qtest/qpdf/split-tokens-split.out
| @@ -7,4 +7,7 @@ WARNING: empty PDF: Resulting stream data may be corrupted but is may still usef | @@ -7,4 +7,7 @@ WARNING: empty PDF: Resulting stream data may be corrupted but is may still usef | ||
| 7 | WARNING: empty PDF: content normalization encountered bad tokens | 7 | WARNING: empty PDF: content normalization encountered bad tokens |
| 8 | WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents | 8 | WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents |
| 9 | WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. | 9 | WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. |
| 10 | +WARNING: empty PDF: content normalization encountered bad tokens | ||
| 11 | +WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents | ||
| 12 | +WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. | ||
| 10 | qpdf: operation succeeded with warnings; resulting file may have some problems | 13 | qpdf: operation succeeded with warnings; resulting file may have some problems |
qpdf/qtest/qpdf/split-tokens.pdf
No preview for this file type
qpdf/qtest/qpdf/split-tokens.qdf
No preview for this file type