Commit 11df7809af7131af139be2e76f2db87128700939

Authored by Jay Berkenbilt
1 parent 98765c3b

add pipeline-based stream data replacement function

git-svn-id: svn+q:///qpdf/trunk@990 71b93d88-0707-0410-a8cf-f5a4172ac649
@@ -12,22 +12,6 @@ Next @@ -12,22 +12,6 @@ Next
12 Stefan Heinsen <stefan.heinsen@gmx.de> in August, 2009. He seems 12 Stefan Heinsen <stefan.heinsen@gmx.de> in August, 2009. He seems
13 to like to send encrypted mail. (key 01FCC336) 13 to like to send encrypted mail. (key 01FCC336)
14 14
15 - It appears that the only thing in the code that actually has to  
16 - change is the QPDF_Stream object. When replacing stream data, we  
17 - have to mutate the stream's dictionary to adjust /Filter,  
18 - /DecodeParms, and /Length. We should probably just provide a  
19 - method to replace the stream data, /Filter, and /DecodeParms all at  
20 - once. If new values are provided, then pipeStreamData can use the  
21 - new values, and we essentially then lose the original values. The  
22 - code for replacing stream data would be to use getStreamData to get  
23 - the old data and then to replace it all before any calls that would  
24 - cause QPDFWriter to write new stream data. Will have to go through  
25 - QPDF_Stream.cc carefully line by line to make sure everything is  
26 - adjusted properly.  
27 -  
28 - Don't forget to provide a method that provides a pipeline through  
29 - which the stream data is to be piped.  
30 -  
31 * Add helper routines for manipulating page content streams. 15 * Add helper routines for manipulating page content streams.
32 Operations should include ability to convert page contents from a 16 Operations should include ability to convert page contents from a
33 stream to an array of streams and to append or prepend to the page 17 stream to an array of streams and to append or prepend to the page
include/qpdf/QPDFObjectHandle.hh
@@ -195,8 +195,42 @@ class QPDFObjectHandle @@ -195,8 +195,42 @@ class QPDFObjectHandle
195 // decryption filters have been applied, is as presented. 195 // decryption filters have been applied, is as presented.
196 QPDF_DLL 196 QPDF_DLL
197 void replaceStreamData(PointerHolder<Buffer> data, 197 void replaceStreamData(PointerHolder<Buffer> data,
198 - QPDFObjectHandle filter,  
199 - QPDFObjectHandle decode_parms); 198 + QPDFObjectHandle const& filter,
  199 + QPDFObjectHandle const& decode_parms);
  200 +
  201 + class StreamDataProvider
  202 + {
  203 + public:
  204 + QPDF_DLL
  205 + virtual ~StreamDataProvider()
  206 + {
  207 + }
  208 + // See replaceStreamData below for details on how to override
  209 + // this method.
  210 + virtual void provideStreamData(int objid, int generation,
  211 + Pipeline* pipeline) = 0;
  212 + };
  213 + // As above, replace this stream's stream data. Instead of
  214 + // directly providing a buffer with the stream data, call the
  215 + // given provider's provideStreamData method. The method is to
  216 + // write the unencrypted, raw stream data to the provided
  217 + // pipeline. The stream's /Length key will be set to the length
  218 + // as provided. This must match the number of bytes written to
  219 + // the pipeline. The provider must write exactly the same data to
  220 + // the pipeline every time it is called. The method is invoked
  221 + // with the object ID and generation number, which are just there
  222 + // to be available to the handler in case it is useful for
  223 + // indexing purposes. This makes it easier to reuse the same
  224 + // StreamDataProvider object for multiple streams. Although it is
  225 + // more complex to use this form of replaceStreamData, it makes it
  226 + // possible to avoid allocating memory for the stream data.
  227 + // Example programs are provided that use both forms of
  228 + // replaceStreamData.
  229 + QPDF_DLL
  230 + void replaceStreamData(PointerHolder<StreamDataProvider> provider,
  231 + QPDFObjectHandle const& filter,
  232 + QPDFObjectHandle const& decode_parms,
  233 + size_t length);
200 234
201 // return 0 for direct objects 235 // return 0 for direct objects
202 QPDF_DLL 236 QPDF_DLL
libqpdf/QPDFObjectHandle.cc
@@ -354,14 +354,25 @@ QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter, @@ -354,14 +354,25 @@ QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter,
354 354
355 void 355 void
356 QPDFObjectHandle::replaceStreamData(PointerHolder<Buffer> data, 356 QPDFObjectHandle::replaceStreamData(PointerHolder<Buffer> data,
357 - QPDFObjectHandle filter,  
358 - QPDFObjectHandle decode_parms) 357 + QPDFObjectHandle const& filter,
  358 + QPDFObjectHandle const& decode_parms)
359 { 359 {
360 assertType("Stream", isStream()); 360 assertType("Stream", isStream());
361 dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceStreamData( 361 dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceStreamData(
362 data, filter, decode_parms); 362 data, filter, decode_parms);
363 } 363 }
364 364
  365 +void
  366 +QPDFObjectHandle::replaceStreamData(PointerHolder<StreamDataProvider> provider,
  367 + QPDFObjectHandle const& filter,
  368 + QPDFObjectHandle const& decode_parms,
  369 + size_t length)
  370 +{
  371 + assertType("Stream", isStream());
  372 + dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceStreamData(
  373 + provider, filter, decode_parms, length);
  374 +}
  375 +
365 int 376 int
366 QPDFObjectHandle::getObjectID() const 377 QPDFObjectHandle::getObjectID() const
367 { 378 {
libqpdf/QPDF_Stream.cc
@@ -9,6 +9,7 @@ @@ -9,6 +9,7 @@
9 #include <qpdf/Pl_ASCII85Decoder.hh> 9 #include <qpdf/Pl_ASCII85Decoder.hh>
10 #include <qpdf/Pl_ASCIIHexDecoder.hh> 10 #include <qpdf/Pl_ASCIIHexDecoder.hh>
11 #include <qpdf/Pl_LZWDecoder.hh> 11 #include <qpdf/Pl_LZWDecoder.hh>
  12 +#include <qpdf/Pl_Count.hh>
12 13
13 #include <qpdf/QTC.hh> 14 #include <qpdf/QTC.hh>
14 #include <qpdf/QPDF.hh> 15 #include <qpdf/QPDF.hh>
@@ -326,6 +327,32 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool filter, @@ -326,6 +327,32 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool filter,
326 pipeline->write(b.getBuffer(), b.getSize()); 327 pipeline->write(b.getBuffer(), b.getSize());
327 pipeline->finish(); 328 pipeline->finish();
328 } 329 }
  330 + else if (this->stream_provider.getPointer())
  331 + {
  332 + QPDFObjectHandle::StreamDataProvider& p =
  333 + (*this->stream_provider.getPointer());
  334 + Pl_Count count("stream provider count", pipeline);
  335 + p.provideStreamData(this->objid, this->generation, &count);
  336 + size_t actual_length = count.getCount();
  337 + size_t desired_length =
  338 + this->stream_dict.getKey("/Length").getIntValue();
  339 + if (actual_length == desired_length)
  340 + {
  341 + QTC::TC("qpdf", "QPDF_Stream pipe use stream provider");
  342 + }
  343 + else
  344 + {
  345 + QTC::TC("qpdf", "QPDF_Stream provider length mismatch");
  346 + throw std::logic_error(
  347 + "stream data provider for " +
  348 + QUtil::int_to_string(this->objid) + " " +
  349 + QUtil::int_to_string(this->generation) +
  350 + " provided " +
  351 + QUtil::int_to_string(actual_length) +
  352 + " bytes instead of expected " +
  353 + QUtil::int_to_string(desired_length) + " bytes");
  354 + }
  355 + }
329 else 356 else
330 { 357 {
331 QTC::TC("qpdf", "QPDF_Stream pipe original stream data"); 358 QTC::TC("qpdf", "QPDF_Stream pipe original stream data");
@@ -339,13 +366,33 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool filter, @@ -339,13 +366,33 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool filter,
339 366
340 void 367 void
341 QPDF_Stream::replaceStreamData(PointerHolder<Buffer> data, 368 QPDF_Stream::replaceStreamData(PointerHolder<Buffer> data,
342 - QPDFObjectHandle filter,  
343 - QPDFObjectHandle decode_parms) 369 + QPDFObjectHandle const& filter,
  370 + QPDFObjectHandle const& decode_parms)
344 { 371 {
345 this->stream_data = data; 372 this->stream_data = data;
  373 + this->stream_provider = 0;
  374 + replaceFilterData(filter, decode_parms, data.getPointer()->getSize());
  375 +}
  376 +
  377 +void
  378 +QPDF_Stream::replaceStreamData(
  379 + PointerHolder<QPDFObjectHandle::StreamDataProvider> provider,
  380 + QPDFObjectHandle const& filter,
  381 + QPDFObjectHandle const& decode_parms,
  382 + size_t length)
  383 +{
  384 + this->stream_provider = provider;
  385 + this->stream_data = 0;
  386 + replaceFilterData(filter, decode_parms, length);
  387 +}
  388 +
  389 +void
  390 +QPDF_Stream::replaceFilterData(QPDFObjectHandle const& filter,
  391 + QPDFObjectHandle const& decode_parms,
  392 + size_t length)
  393 +{
346 this->stream_dict.replaceOrRemoveKey("/Filter", filter); 394 this->stream_dict.replaceOrRemoveKey("/Filter", filter);
347 this->stream_dict.replaceOrRemoveKey("/DecodeParms", decode_parms); 395 this->stream_dict.replaceOrRemoveKey("/DecodeParms", decode_parms);
348 this->stream_dict.replaceKey("/Length", 396 this->stream_dict.replaceKey("/Length",
349 - QPDFObjectHandle::newInteger(  
350 - data.getPointer()->getSize())); 397 + QPDFObjectHandle::newInteger(length));
351 } 398 }
libqpdf/qpdf/QPDF_Stream.hh
@@ -23,10 +23,18 @@ class QPDF_Stream: public QPDFObject @@ -23,10 +23,18 @@ class QPDF_Stream: public QPDFObject
23 bool normalize, bool compress); 23 bool normalize, bool compress);
24 PointerHolder<Buffer> getStreamData(); 24 PointerHolder<Buffer> getStreamData();
25 void replaceStreamData(PointerHolder<Buffer> data, 25 void replaceStreamData(PointerHolder<Buffer> data,
26 - QPDFObjectHandle filter,  
27 - QPDFObjectHandle decode_parms); 26 + QPDFObjectHandle const& filter,
  27 + QPDFObjectHandle const& decode_parms);
  28 + void replaceStreamData(
  29 + PointerHolder<QPDFObjectHandle::StreamDataProvider> provider,
  30 + QPDFObjectHandle const& filter,
  31 + QPDFObjectHandle const& decode_parms,
  32 + size_t length);
28 33
29 private: 34 private:
  35 + void replaceFilterData(QPDFObjectHandle const& filter,
  36 + QPDFObjectHandle const& decode_parms,
  37 + size_t length);
30 bool filterable(std::vector<std::string>& filters, 38 bool filterable(std::vector<std::string>& filters,
31 int& predictor, int& columns, bool& early_code_change); 39 int& predictor, int& columns, bool& early_code_change);
32 40
@@ -37,6 +45,7 @@ class QPDF_Stream: public QPDFObject @@ -37,6 +45,7 @@ class QPDF_Stream: public QPDFObject
37 off_t offset; 45 off_t offset;
38 int length; 46 int length;
39 PointerHolder<Buffer> stream_data; 47 PointerHolder<Buffer> stream_data;
  48 + PointerHolder<QPDFObjectHandle::StreamDataProvider> stream_provider;
40 }; 49 };
41 50
42 #endif // __QPDF_STREAM_HH__ 51 #endif // __QPDF_STREAM_HH__
qpdf/qpdf.testcov
@@ -176,3 +176,5 @@ qpdf-c called qpdf_has_error 0 @@ -176,3 +176,5 @@ qpdf-c called qpdf_has_error 0
176 qpdf-c called qpdf_get_qpdf_version 0 176 qpdf-c called qpdf_get_qpdf_version 0
177 QPDF_Stream pipe original stream data 0 177 QPDF_Stream pipe original stream data 0
178 QPDF_Stream pipe replaced stream data 0 178 QPDF_Stream pipe replaced stream data 0
  179 +QPDF_Stream pipe use stream provider 0
  180 +QPDF_Stream provider length mismatch 0
qpdf/qtest/qpdf.test
@@ -107,7 +107,7 @@ $td-&gt;runtest(&quot;check output&quot;, @@ -107,7 +107,7 @@ $td-&gt;runtest(&quot;check output&quot;,
107 107
108 $td->runtest("replace stream data compressed", 108 $td->runtest("replace stream data compressed",
109 {$td->COMMAND => "test_driver 8 qstream.pdf"}, 109 {$td->COMMAND => "test_driver 8 qstream.pdf"},
110 - {$td->STRING => "test 8 done\n", $td->EXIT_STATUS => 0}, 110 + {$td->FILE => "test8.out", $td->EXIT_STATUS => 0},
111 $td->NORMALIZE_NEWLINES); 111 $td->NORMALIZE_NEWLINES);
112 $td->runtest("check output", 112 $td->runtest("check output",
113 {$td->FILE => "a.pdf"}, 113 {$td->FILE => "a.pdf"},
qpdf/qtest/qpdf/test8.out 0 → 100644
  1 +exception: stream data provider for 7 0 provided 29 bytes instead of expected 28 bytes
  2 +test 8 done
qpdf/test_driver.cc
@@ -23,6 +23,39 @@ void usage() @@ -23,6 +23,39 @@ void usage()
23 exit(2); 23 exit(2);
24 } 24 }
25 25
  26 +class Provider: public QPDFObjectHandle::StreamDataProvider
  27 +{
  28 + public:
  29 + Provider(PointerHolder<Buffer> b) :
  30 + b(b),
  31 + bad_length(false)
  32 + {
  33 + }
  34 + virtual ~Provider()
  35 + {
  36 + }
  37 + virtual void provideStreamData(int objid, int generation,
  38 + Pipeline* p)
  39 + {
  40 + p->write(b.getPointer()->getBuffer(),
  41 + b.getPointer()->getSize());
  42 + if (this->bad_length)
  43 + {
  44 + unsigned char ch = ' ';
  45 + p->write(&ch, 1);
  46 + }
  47 + p->finish();
  48 + }
  49 + void badLength(bool v)
  50 + {
  51 + this->bad_length = v;
  52 + }
  53 +
  54 + private:
  55 + PointerHolder<Buffer> b;
  56 + bool bad_length;
  57 +};
  58 +
26 void runtest(int n, char const* filename) 59 void runtest(int n, char const* filename)
27 { 60 {
28 QPDF pdf; 61 QPDF pdf;
@@ -341,9 +374,25 @@ void runtest(int n, char const* filename) @@ -341,9 +374,25 @@ void runtest(int n, char const* filename)
341 p2.write((unsigned char*)"new data for stream\n", 20); // no null! 374 p2.write((unsigned char*)"new data for stream\n", 20); // no null!
342 p2.finish(); 375 p2.finish();
343 PointerHolder<Buffer> b = p1.getBuffer(); 376 PointerHolder<Buffer> b = p1.getBuffer();
  377 + // This is a bogus way to use StreamDataProvider, but it does
  378 + // adequately test its functionality.
  379 + Provider* provider = new Provider(b);
  380 + PointerHolder<QPDFObjectHandle::StreamDataProvider> p = provider;
344 qstream.replaceStreamData( 381 qstream.replaceStreamData(
345 - b, QPDFObjectHandle::newName("/FlateDecode"),  
346 - QPDFObjectHandle::newNull()); 382 + p, QPDFObjectHandle::newName("/FlateDecode"),
  383 + QPDFObjectHandle::newNull(),
  384 + b.getPointer()->getSize());
  385 + provider->badLength(true);
  386 + try
  387 + {
  388 + qstream.getStreamData();
  389 + std::cout << "oops -- getStreamData didn't throw" << std::endl;
  390 + }
  391 + catch (std::logic_error const& e)
  392 + {
  393 + std::cout << "exception: " << e.what() << std::endl;
  394 + }
  395 + provider->badLength(false);
347 QPDFWriter w(pdf, "a.pdf"); 396 QPDFWriter w(pdf, "a.pdf");
348 w.setStaticID(true); 397 w.setStaticID(true);
349 w.setStreamDataMode(qpdf_s_preserve); 398 w.setStreamDataMode(qpdf_s_preserve);