Commit 9744414c66e3f85700ebc8b32d90f45ff97221bd

Authored by Jay Berkenbilt
1 parent ae90d2c4

Enable finer grained control of stream decoding

This commit adds several API methods that enable control over which
types of filters QPDF will attempt to decode. It also adds support for
/RunLengthDecode and /DCTDecode filters for both encoding and
decoding.
ChangeLog
  1 +2017-08-19 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Remove --precheck-streams. This is enabled by default now
  4 + without any efficiency cost. This feature was never released.
  5 +
  6 + * Update pdf-create example to illustrate use of additional image
  7 + compression filters.
  8 +
  9 + * Add support for /RunLengthDecode and /DCTDecode:
  10 + - New pipeline types Pl_RunLength and Pl_DCT
  11 + - New command-line flags --compress-streams and --decode-level
  12 + to replace/enhance --stream-data
  13 + - New QPDFWriter::setCompressStreams and
  14 + QPDFWriter::setDecodeLevel methods
  15 + Please see documentation, header files, and help messages for
  16 + details on these new features.
  17 +
1 18 2017-08-12 Jay Berkenbilt <ejb@ql.org>
2 19  
3 20 * Add QPDFObjectHandle::rotatePage to apply rotation to a page
... ...
examples/pdf-invert-images.cc
... ... @@ -121,7 +121,8 @@ int main(int argc, char* argv[])
121 121 // pipeStreamData with a null pipeline to determine
122 122 // whether the image is filterable. Directly inspect
123 123 // keys to determine the image type.
124   - if (image.pipeStreamData(0, true, false, false) &&
  124 + if (image.pipeStreamData(0, qpdf_ef_compress,
  125 + qpdf_dl_generalized) &&
125 126 color_space.isName() &&
126 127 bits_per_component.isInteger() &&
127 128 (color_space.getName() == "/DeviceGray") &&
... ...
include/qpdf/Constants.h
... ... @@ -26,7 +26,7 @@ enum qpdf_error_code_e
26 26 qpdf_e_pages, /* erroneous or unsupported pages structure */
27 27 };
28 28  
29   -/* Write Parameters */
  29 +/* Write Parameters. See QPDFWriter.hh for details. */
30 30  
31 31 enum qpdf_object_stream_e
32 32 {
... ... @@ -41,6 +41,23 @@ enum qpdf_stream_data_e
41 41 qpdf_s_compress /* compress stream data */
42 42 };
43 43  
  44 +/* Stream data flags */
  45 +
  46 +/* See pipeStreamData in QPDFObjectHandle.hh for details on these flags. */
  47 +enum qpdf_stream_encode_flags_e
  48 +{
  49 + qpdf_ef_compress = 1 << 0, /* compress uncompressed streams */
  50 + qpdf_ef_normalize = 1 << 1, /* normalize content stream */
  51 +};
  52 +enum qpdf_stream_decode_level_e
  53 +{
  54 + /* These must be in order from less to more decoding. */
  55 + qpdf_dl_none = 0, /* preserve all stream filters */
  56 + qpdf_dl_generalized, /* decode general-purpose filters */
  57 + qpdf_dl_specialized, /* also decode other non-lossy filters */
  58 + qpdf_dl_all /* also decode loss filters */
  59 +};
  60 +
44 61 /* R3 Encryption Parameters */
45 62  
46 63 enum qpdf_r3_print_e
... ...
include/qpdf/QPDFObjectHandle.hh
... ... @@ -10,6 +10,7 @@
10 10  
11 11 #include <qpdf/DLL.h>
12 12 #include <qpdf/Types.h>
  13 +#include <qpdf/Constants.h>
13 14  
14 15 #include <string>
15 16 #include <vector>
... ... @@ -44,19 +45,19 @@ class QPDFObjectHandle
44 45 virtual ~StreamDataProvider()
45 46 {
46 47 }
47   - // The implementation of this function must write the
48   - // unencrypted, raw stream data to the given pipeline. Every
49   - // call to provideStreamData for a given stream must write the
50   - // same data. The number of bytes written must agree with the
51   - // length provided at the time the StreamDataProvider object
52   - // was associated with the stream. The object ID and
53   - // generation passed to this method are those that belong to
54   - // the stream on behalf of which the provider is called. They
55   - // may be ignored or used by the implementation for indexing
56   - // or other purposes. This information is made available just
57   - // to make it more convenient to use a single
58   - // StreamDataProvider object to provide data for multiple
59   - // streams.
  48 + // The implementation of this function must write stream data
  49 + // to the given pipeline. The stream data must conform to
  50 + // whatever filters are explicitly associated with the stream.
  51 + // QPDFWriter may, in some cases, add compression, but if it
  52 + // does, it will update the filters as needed. Every call to
  53 + // provideStreamData for a given stream must write the same
  54 + // data.The object ID and generation passed to this method are
  55 + // those that belong to the stream on behalf of which the
  56 + // provider is called. They may be ignored or used by the
  57 + // implementation for indexing or other purposes. This
  58 + // information is made available just to make it more
  59 + // convenient to use a single StreamDataProvider object to
  60 + // provide data for multiple streams.
60 61 virtual void provideStreamData(int objid, int generation,
61 62 Pipeline* pipeline) = 0;
62 63 };
... ... @@ -370,32 +371,71 @@ class QPDFObjectHandle
370 371 // Returns filtered (uncompressed) stream data. Throws an
371 372 // exception if the stream is filtered and we can't decode it.
372 373 QPDF_DLL
373   - PointerHolder<Buffer> getStreamData();
  374 + PointerHolder<Buffer> getStreamData(
  375 + qpdf_stream_decode_level_e level = qpdf_dl_generalized);
  376 +
374 377 // Returns unfiltered (raw) stream data.
375 378 QPDF_DLL
376 379 PointerHolder<Buffer> getRawStreamData();
377 380  
378   - // Write stream data through the given pipeline. A null pipeline
  381 + // Write stream data through the given pipeline. A null pipeline
379 382 // value may be used if all you want to do is determine whether a
380   - // stream is filterable. If filter is false, write raw stream
381   - // data and return false. If filter is true, then attempt to
382   - // apply all the decoding filters to the stream data. If we are
383   - // successful, return true. Otherwise, return false and write raw
384   - // data. If filtering is requested and successfully performed,
385   - // then the normalize and compress flags are used to determine
386   - // whether stream data should be normalized and compressed. In
387   - // all cases, if this function returns false, raw data has been
388   - // written. If it returns true, then any requested filtering has
389   - // been performed. Note that if the original stream data has no
390   - // filters applied to it, the return value will be equal to the
391   - // value of the filter parameter. Callers may use the return
392   - // value of this function to determine whether or not the /Filter
393   - // and /DecodeParms keys in the stream dictionary should be
394   - // replaced if writing a new stream object.
  383 + // stream is filterable and would be filtered based on the
  384 + // provided flags. If flags is 0, write raw stream data and return
  385 + // false. Otherwise, the flags alter the behavior in the following
  386 + // way:
  387 + //
  388 + // encode_flags:
  389 + //
  390 + // qpdf_sf_compress -- compress data with /FlateDecode if no other
  391 + // compression filters are applied.
  392 + //
  393 + // qpdf_sf_normalize -- tokenize as content stream and normalize tokens
  394 + //
  395 + // decode_level:
  396 + //
  397 + // qpdf_dl_none -- do not decode any streams.
  398 + //
  399 + // qpdf_dl_generalized -- decode supported general-purpose
  400 + // filters. This includes /ASCIIHexDecode, /ASCII85Decode,
  401 + // /LZWDecode, and /FlateDecode.
  402 + //
  403 + // qpdf_dl_specialized -- in addition to generalized filters, also
  404 + // decode supported non-lossy specialized filters. This includes
  405 + // /RunLengthDecode.
  406 + //
  407 + // qpdf_dl_all -- in addition to generalized and non-lossy
  408 + // specialized filters, decode supported lossy filters. This
  409 + // includes /DCTDecode.
  410 + //
  411 + // If, based on the flags and the filters and decode parameters,
  412 + // we determine that we know how to apply all requested filters,
  413 + // do so and return true if we are successful.
  414 + //
  415 + // In all cases, a return value of true means that filtered data
  416 + // has been written successfully. If filtering is requested but
  417 + // this method returns false, it means there was some error in the
  418 + // filtering, in which case the resulting data is likely partially
  419 + // filtered and/or incomplete and may not be consistent with the
  420 + // configured filters. QPDFWriter handles this by attempting to
  421 + // get the stream data without filtering, but callers should
  422 + // consider a false return value when decode_level is not
  423 + // qpdf_dl_none to be a potential loss of data.
  424 + QPDF_DLL
  425 + bool pipeStreamData(Pipeline*,
  426 + unsigned long encode_flags,
  427 + qpdf_stream_decode_level_e decode_level,
  428 + bool suppress_warnings = false);
  429 +
  430 + // Legacy pipeStreamData. This maps to the the flags-based
  431 + // pipeStreamData as follows:
  432 + // filter = false -> encode_flags = 0
  433 + // filter = true -> decode_level = qpdf_dl_generalized
  434 + // normalize = true -> encode_flags |= qpdf_sf_normalize
  435 + // compress = true -> encode_flags |= qpdf_sf_compress
395 436 QPDF_DLL
396 437 bool pipeStreamData(Pipeline*, bool filter,
397   - bool normalize, bool compress,
398   - bool suppress_warnings = false);
  438 + bool normalize, bool compress);
399 439  
400 440 // Replace a stream's dictionary. The new dictionary must be
401 441 // consistent with the stream's data. This is most appropriately
... ...
include/qpdf/QPDFWriter.hh
... ... @@ -118,14 +118,70 @@ class QPDFWriter
118 118 QPDF_DLL
119 119 void setObjectStreamMode(qpdf_object_stream_e);
120 120  
121   - // Set value of stream data mode. In uncompress mode, we attempt
122   - // to uncompress any stream that we can. In preserve mode, we
123   - // preserve any filtering applied to streams. In compress mode,
124   - // if we can apply all filters and the stream is not already
125   - // optimally compressed, recompress the stream.
  121 + // Set value of stream data mode. This is an older interface.
  122 + // Instead of using this, prefer setCompressStreams() and
  123 + // setDecodeLevel(). This method is retained for compatibility,
  124 + // but it does not cover the full range of available
  125 + // configurations. The mapping between this and the new methods is
  126 + // as follows:
  127 + //
  128 + // qpdf_s_uncompress:
  129 + // setCompressStreams(false)
  130 + // setDecodeLevel(qpdf_dl_generalized)
  131 + // qpdf_s_preserve:
  132 + // setCompressStreams(false)
  133 + // setDecodeLevel(qpdf_dl_none)
  134 + // qpdf_s_compress:
  135 + // setCompressStreams(true)
  136 + // setDecodeLevel(qpdf_dl_generalized)
  137 + //
  138 + // The default is qpdf_s_compress.
126 139 QPDF_DLL
127 140 void setStreamDataMode(qpdf_stream_data_e);
128 141  
  142 + // If true, compress any uncompressed streams when writing them.
  143 + // Metadata streams are a special case and are not compressed even
  144 + // if this is true. This is true by default for QPDFWriter. If you
  145 + // want QPDFWriter to leave uncompressed streams uncompressed,
  146 + // pass false to this method.
  147 + QPDF_DLL
  148 + void setCompressStreams(bool);
  149 +
  150 + // When QPDFWriter encounters streams, this parameter controls the
  151 + // behavior with respect to attempting to apply any filters to the
  152 + // streams when copying to the output. The decode levels are as
  153 + // follows:
  154 + //
  155 + // qpdf_dl_none: Do not attempt to apply any filters. Streams
  156 + // remain as they appear in the original file. Note that
  157 + // uncompressed streams may still be compressed on output. You can
  158 + // disable that by calling setCompressStreams(false).
  159 + //
  160 + // qpdf_dl_generalized: This is the default. QPDFWriter will apply
  161 + // LZWDecode, ASCII85Decode, ASCIIHexDecode, and FlateDecode
  162 + // filters on the input. When combined with
  163 + // setCompressStreams(true), which the default, the effect of this
  164 + // is that streams filtered with these older and less efficient
  165 + // filters will be recompressed with the Flate filter. As a
  166 + // special case, if a stream is already compressed with
  167 + // FlateDecode and setCompressStreams is enabled, the original
  168 + // compressed data will be preserved.
  169 + //
  170 + // qpdf_dl_specialized: In addition to uncompressing the
  171 + // generalized compression formats, supported non-lossy
  172 + // compression will also be be decoded. At present, this includes
  173 + // the RunLengthDecode filter.
  174 + //
  175 + // qpdf_dl_all: In addition to generalized and non-lossy
  176 + // specialized filters, supported lossy compression filters will
  177 + // be applied. At present, this includes DCTDecode (JPEG)
  178 + // compression. Note that compressing the resulting data with
  179 + // DCTDecode again will accumulate loss, so avoid multiple
  180 + // compression and decompression cycles. This is mostly useful for
  181 + // retreiving image data.
  182 + QPDF_DLL
  183 + void setDecodeLevel(qpdf_stream_decode_level_e);
  184 +
129 185 // Set value of content stream normalization. The default is
130 186 // "false". If true, we attempt to normalize newlines inside of
131 187 // content streams. Some constructs such as inline images may
... ... @@ -434,8 +490,10 @@ class QPDFWriter
434 490 Buffer* output_buffer;
435 491 bool normalize_content_set;
436 492 bool normalize_content;
437   - bool stream_data_mode_set;
438   - qpdf_stream_data_e stream_data_mode;
  493 + bool compress_streams;
  494 + bool compress_streams_set;
  495 + qpdf_stream_decode_level_e stream_decode_level;
  496 + bool stream_decode_level_set;
439 497 bool qdf_mode;
440 498 bool precheck_streams;
441 499 bool preserve_unreferenced_objects;
... ...
libqpdf/QPDF.cc
... ... @@ -45,7 +45,7 @@ QPDF::CopiedStreamDataProvider::provideStreamData(
45 45 {
46 46 QPDFObjectHandle foreign_stream =
47 47 this->foreign_streams[QPDFObjGen(objid, generation)];
48   - foreign_stream.pipeStreamData(pipeline, false, false, false);
  48 + foreign_stream.pipeStreamData(pipeline, 0, qpdf_dl_none);
49 49 }
50 50  
51 51 void
... ... @@ -2377,6 +2377,7 @@ QPDF::pipeStreamData(int objid, int generation,
2377 2377 length -= len;
2378 2378 pipeline->write(QUtil::unsigned_char_pointer(buf), len);
2379 2379 }
  2380 + pipeline->finish();
2380 2381 success = true;
2381 2382 }
2382 2383 catch (QPDFExc& e)
... ... @@ -2398,13 +2399,16 @@ QPDF::pipeStreamData(int objid, int generation,
2398 2399 QUtil::int_to_string(generation) + ": " + e.what()));
2399 2400 }
2400 2401 }
2401   - try
2402   - {
2403   - pipeline->finish();
2404   - }
2405   - catch (std::exception&)
  2402 + if (! success)
2406 2403 {
2407   - // ignore
  2404 + try
  2405 + {
  2406 + pipeline->finish();
  2407 + }
  2408 + catch (std::exception&)
  2409 + {
  2410 + // ignore
  2411 + }
2408 2412 }
2409 2413 return success;
2410 2414 }
... ...
libqpdf/QPDFObjectHandle.cc
... ... @@ -482,10 +482,10 @@ QPDFObjectHandle::replaceDict(QPDFObjectHandle new_dict)
482 482 }
483 483  
484 484 PointerHolder<Buffer>
485   -QPDFObjectHandle::getStreamData()
  485 +QPDFObjectHandle::getStreamData(qpdf_stream_decode_level_e level)
486 486 {
487 487 assertStream();
488   - return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getStreamData();
  488 + return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getStreamData(level);
489 489 }
490 490  
491 491 PointerHolder<Buffer>
... ... @@ -496,13 +496,35 @@ QPDFObjectHandle::getRawStreamData()
496 496 }
497 497  
498 498 bool
499   -QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter,
500   - bool normalize, bool compress,
  499 +QPDFObjectHandle::pipeStreamData(Pipeline* p,
  500 + unsigned long encode_flags,
  501 + qpdf_stream_decode_level_e decode_level,
501 502 bool suppress_warnings)
502 503 {
503 504 assertStream();
504 505 return dynamic_cast<QPDF_Stream*>(obj.getPointer())->pipeStreamData(
505   - p, filter, normalize, compress, suppress_warnings);
  506 + p, encode_flags, decode_level, suppress_warnings);
  507 +}
  508 +
  509 +bool
  510 +QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter,
  511 + bool normalize, bool compress)
  512 +{
  513 + unsigned long encode_flags = 0;
  514 + qpdf_stream_decode_level_e decode_level = qpdf_dl_none;
  515 + if (filter)
  516 + {
  517 + decode_level = qpdf_dl_generalized;
  518 + if (normalize)
  519 + {
  520 + encode_flags |= qpdf_ef_normalize;
  521 + }
  522 + if (compress)
  523 + {
  524 + encode_flags |= qpdf_ef_compress;
  525 + }
  526 + }
  527 + return pipeStreamData(p, encode_flags, decode_level, false);
506 528 }
507 529  
508 530 void
... ... @@ -825,7 +847,7 @@ QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array,
825 847 all_description += ",";
826 848 }
827 849 all_description += " " + og;
828   - if (! stream.pipeStreamData(&buf, true, false, false, false))
  850 + if (! stream.pipeStreamData(&buf, 0, qpdf_dl_specialized))
829 851 {
830 852 QTC::TC("qpdf", "QPDFObjectHandle errors in parsecontent");
831 853 warn(stream.getOwningQPDF(),
... ...
libqpdf/QPDFWriter.cc
... ... @@ -54,8 +54,10 @@ QPDFWriter::init()
54 54 output_buffer = 0;
55 55 normalize_content_set = false;
56 56 normalize_content = false;
57   - stream_data_mode_set = false;
58   - stream_data_mode = qpdf_s_compress;
  57 + compress_streams = true;
  58 + compress_streams_set = false;
  59 + stream_decode_level = qpdf_dl_none;
  60 + stream_decode_level_set = false;
59 61 qdf_mode = false;
60 62 precheck_streams = false;
61 63 preserve_unreferenced_objects = false;
... ... @@ -162,8 +164,42 @@ QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode)
162 164 void
163 165 QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode)
164 166 {
165   - this->stream_data_mode_set = true;
166   - this->stream_data_mode = mode;
  167 + switch (mode)
  168 + {
  169 + case qpdf_s_uncompress:
  170 + this->stream_decode_level =
  171 + std::max(qpdf_dl_generalized, this->stream_decode_level);
  172 + this->compress_streams = false;
  173 + break;
  174 +
  175 + case qpdf_s_preserve:
  176 + this->stream_decode_level = qpdf_dl_none;
  177 + this->compress_streams = false;
  178 + break;
  179 +
  180 + case qpdf_s_compress:
  181 + this->stream_decode_level =
  182 + std::max(qpdf_dl_generalized, this->stream_decode_level);
  183 + this->compress_streams = true;
  184 + break;
  185 + }
  186 + this->stream_decode_level_set = true;
  187 + this->compress_streams_set = true;
  188 +}
  189 +
  190 +
  191 +void
  192 +QPDFWriter::setCompressStreams(bool val)
  193 +{
  194 + this->compress_streams = val;
  195 + this->compress_streams_set = true;
  196 +}
  197 +
  198 +void
  199 +QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val)
  200 +{
  201 + this->stream_decode_level = val;
  202 + this->stream_decode_level_set = true;
167 203 }
168 204  
169 205 void
... ... @@ -1512,8 +1548,8 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
1512 1548 {
1513 1549 is_metadata = true;
1514 1550 }
1515   - bool filter = (this->stream_data_mode != qpdf_s_preserve);
1516   - if (this->stream_data_mode == qpdf_s_compress)
  1551 + bool filter = (this->compress_streams || this->stream_decode_level);
  1552 + if (this->compress_streams)
1517 1553 {
1518 1554 // Don't filter if the stream is already compressed with
1519 1555 // FlateDecode. We don't want to make it worse by getting
... ... @@ -1532,19 +1568,21 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
1532 1568 }
1533 1569 bool normalize = false;
1534 1570 bool compress = false;
  1571 + bool uncompress = false;
1535 1572 if (is_metadata &&
1536 1573 ((! this->encrypted) || (this->encrypt_metadata == false)))
1537 1574 {
1538 1575 QTC::TC("qpdf", "QPDFWriter not compressing metadata");
1539 1576 filter = true;
1540 1577 compress = false;
  1578 + uncompress = true;
1541 1579 }
1542 1580 else if (this->normalize_content && normalized_streams.count(old_og))
1543 1581 {
1544 1582 normalize = true;
1545 1583 filter = true;
1546 1584 }
1547   - else if (filter && (this->stream_data_mode == qpdf_s_compress))
  1585 + else if (filter && this->compress_streams)
1548 1586 {
1549 1587 compress = true;
1550 1588 QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream");
... ... @@ -1559,7 +1597,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
1559 1597 QTC::TC("qpdf", "QPDFWriter precheck stream");
1560 1598 Pl_Discard discard;
1561 1599 filter = object.pipeStreamData(
1562   - &discard, true, false, false, true);
  1600 + &discard, 0, qpdf_dl_all, true);
1563 1601 }
1564 1602 catch (std::exception&)
1565 1603 {
... ... @@ -1569,8 +1607,15 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
1569 1607  
1570 1608 pushPipeline(new Pl_Buffer("stream data"));
1571 1609 activatePipelineStack();
  1610 +
1572 1611 bool filtered =
1573   - object.pipeStreamData(this->pipeline, filter, normalize, compress);
  1612 + object.pipeStreamData(
  1613 + this->pipeline,
  1614 + (((filter && normalize) ? qpdf_ef_normalize : 0) |
  1615 + ((filter && compress) ? qpdf_ef_compress : 0)),
  1616 + (filter
  1617 + ? (uncompress ? qpdf_dl_all : this->stream_decode_level)
  1618 + : qpdf_dl_none));
1574 1619 PointerHolder<Buffer> stream_data;
1575 1620 popPipelineStack(&stream_data);
1576 1621 if (filtered)
... ... @@ -1717,8 +1762,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1717 1762  
1718 1763 // Set up a stream to write the stream data into a buffer.
1719 1764 Pipeline* next = pushPipeline(new Pl_Buffer("object stream"));
1720   - if (! ((this->stream_data_mode == qpdf_s_uncompress) ||
1721   - this->qdf_mode))
  1765 + if (! (this->stream_decode_level || this->qdf_mode))
1722 1766 {
1723 1767 compressed = true;
1724 1768 next = pushPipeline(
... ... @@ -2180,7 +2224,8 @@ QPDFWriter::prepareFileForWrite()
2180 2224 is_stream = true;
2181 2225 dict = node.getDict();
2182 2226 // See whether we are able to filter this stream.
2183   - filterable = node.pipeStreamData(0, true, false, false);
  2227 + filterable = node.pipeStreamData(
  2228 + 0, 0, this->stream_decode_level, true);
2184 2229 }
2185 2230 else if (pdf.getRoot().getObjectID() == node.getObjectID())
2186 2231 {
... ... @@ -2260,10 +2305,14 @@ QPDFWriter::write()
2260 2305 {
2261 2306 this->normalize_content = true;
2262 2307 }
2263   - if (! this->stream_data_mode_set)
  2308 + if (! this->compress_streams_set)
2264 2309 {
2265   - this->stream_data_mode = qpdf_s_uncompress;
  2310 + this->compress_streams = false;
2266 2311 }
  2312 + if (! this->stream_decode_level_set)
  2313 + {
  2314 + this->stream_decode_level = qpdf_dl_generalized;
  2315 + }
2267 2316 }
2268 2317  
2269 2318 if (this->encrypted)
... ... @@ -2272,7 +2321,7 @@ QPDFWriter::write()
2272 2321 this->preserve_encryption = false;
2273 2322 }
2274 2323 else if (this->normalize_content ||
2275   - (this->stream_data_mode == qpdf_s_uncompress) ||
  2324 + this->stream_decode_level ||
2276 2325 this->qdf_mode)
2277 2326 {
2278 2327 // Encryption makes looking at contents pretty useless. If
... ... @@ -2300,7 +2349,7 @@ QPDFWriter::write()
2300 2349 }
2301 2350  
2302 2351 if (this->qdf_mode || this->normalize_content ||
2303   - (this->stream_data_mode == qpdf_s_uncompress))
  2352 + this->stream_decode_level)
2304 2353 {
2305 2354 initializeSpecialStreams();
2306 2355 }
... ... @@ -2586,7 +2635,7 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset,
2586 2635  
2587 2636 Pipeline* p = pushPipeline(new Pl_Buffer("xref stream"));
2588 2637 bool compressed = false;
2589   - if (! ((this->stream_data_mode == qpdf_s_uncompress) || this->qdf_mode))
  2638 + if (! (this->stream_decode_level || this->qdf_mode))
2590 2639 {
2591 2640 compressed = true;
2592 2641 if (! skip_compression)
... ...
libqpdf/QPDF_Stream.cc
... ... @@ -9,6 +9,8 @@
9 9 #include <qpdf/Pl_ASCII85Decoder.hh>
10 10 #include <qpdf/Pl_ASCIIHexDecoder.hh>
11 11 #include <qpdf/Pl_LZWDecoder.hh>
  12 +#include <qpdf/Pl_RunLength.hh>
  13 +#include <qpdf/Pl_DCT.hh>
12 14 #include <qpdf/Pl_Count.hh>
13 15  
14 16 #include <qpdf/QTC.hh>
... ... @@ -82,10 +84,10 @@ QPDF_Stream::getDict() const
82 84 }
83 85  
84 86 PointerHolder<Buffer>
85   -QPDF_Stream::getStreamData()
  87 +QPDF_Stream::getStreamData(qpdf_stream_decode_level_e decode_level)
86 88 {
87 89 Pl_Buffer buf("stream data buffer");
88   - if (! pipeStreamData(&buf, true, false, false, false))
  90 + if (! pipeStreamData(&buf, 0, decode_level, false))
89 91 {
90 92 throw std::logic_error("getStreamData called on unfilterable stream");
91 93 }
... ... @@ -97,7 +99,7 @@ PointerHolder&lt;Buffer&gt;
97 99 QPDF_Stream::getRawStreamData()
98 100 {
99 101 Pl_Buffer buf("stream data buffer");
100   - pipeStreamData(&buf, false, false, false, false);
  102 + pipeStreamData(&buf, 0, qpdf_dl_none, false);
101 103 QTC::TC("qpdf", "QPDF_Stream getRawStreamData");
102 104 return buf.getBuffer();
103 105 }
... ... @@ -178,6 +180,8 @@ QPDF_Stream::understandDecodeParams(
178 180  
179 181 bool
180 182 QPDF_Stream::filterable(std::vector<std::string>& filters,
  183 + bool& specialized_compression,
  184 + bool& lossy_compression,
181 185 int& predictor, int& columns,
182 186 bool& early_code_change)
183 187 {
... ... @@ -254,11 +258,20 @@ QPDF_Stream::filterable(std::vector&lt;std::string&gt;&amp; filters,
254 258 filter = filter_abbreviations[filter];
255 259 }
256 260  
257   - if (! ((filter == "/Crypt") ||
258   - (filter == "/FlateDecode") ||
259   - (filter == "/LZWDecode") ||
260   - (filter == "/ASCII85Decode") ||
261   - (filter == "/ASCIIHexDecode")))
  261 + if (filter == "/RunLengthDecode")
  262 + {
  263 + specialized_compression = true;
  264 + }
  265 + else if (filter == "/DCTDecode")
  266 + {
  267 + specialized_compression = true;
  268 + lossy_compression = true;
  269 + }
  270 + else if (! ((filter == "/Crypt") ||
  271 + (filter == "/FlateDecode") ||
  272 + (filter == "/LZWDecode") ||
  273 + (filter == "/ASCII85Decode") ||
  274 + (filter == "/ASCIIHexDecode")))
262 275 {
263 276 filterable = false;
264 277 }
... ... @@ -350,17 +363,35 @@ QPDF_Stream::filterable(std::vector&lt;std::string&gt;&amp; filters,
350 363 }
351 364  
352 365 bool
353   -QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool filter,
354   - bool normalize, bool compress,
  366 +QPDF_Stream::pipeStreamData(Pipeline* pipeline,
  367 + unsigned long encode_flags,
  368 + qpdf_stream_decode_level_e decode_level,
355 369 bool suppress_warnings)
356 370 {
357 371 std::vector<std::string> filters;
358 372 int predictor = 1;
359 373 int columns = 0;
360 374 bool early_code_change = true;
  375 + bool specialized_compression = false;
  376 + bool lossy_compression = false;
  377 + bool filter = (! ((encode_flags == 0) && (decode_level == qpdf_dl_none)));
361 378 if (filter)
362 379 {
363   - filter = filterable(filters, predictor, columns, early_code_change);
  380 + filter = filterable(filters, specialized_compression, lossy_compression,
  381 + predictor, columns, early_code_change);
  382 + if ((decode_level < qpdf_dl_all) && lossy_compression)
  383 + {
  384 + filter = false;
  385 + }
  386 + if ((decode_level < qpdf_dl_specialized) && specialized_compression)
  387 + {
  388 + filter = false;
  389 + }
  390 + QTC::TC("qpdf", "QPDF_Stream special filters",
  391 + (! filter) ? 0 :
  392 + lossy_compression ? 1 :
  393 + specialized_compression ? 2 :
  394 + 3);
364 395 }
365 396  
366 397 if (pipeline == 0)
... ... @@ -375,14 +406,14 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool filter,
375 406  
376 407 if (filter)
377 408 {
378   - if (compress)
  409 + if (encode_flags & qpdf_ef_compress)
379 410 {
380 411 pipeline = new Pl_Flate("compress object stream", pipeline,
381 412 Pl_Flate::a_deflate);
382 413 to_delete.push_back(pipeline);
383 414 }
384 415  
385   - if (normalize)
  416 + if (encode_flags & qpdf_ef_normalize)
386 417 {
387 418 pipeline = new Pl_QPDFTokenizer("normalizer", pipeline);
388 419 to_delete.push_back(pipeline);
... ... @@ -427,6 +458,17 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool filter,
427 458 early_code_change);
428 459 to_delete.push_back(pipeline);
429 460 }
  461 + else if (filter == "/RunLengthDecode")
  462 + {
  463 + pipeline = new Pl_RunLength("runlength decode", pipeline,
  464 + Pl_RunLength::a_decode);
  465 + to_delete.push_back(pipeline);
  466 + }
  467 + else if (filter == "/DCTDecode")
  468 + {
  469 + pipeline = new Pl_DCT("DCT decode", pipeline);
  470 + to_delete.push_back(pipeline);
  471 + }
430 472 else
431 473 {
432 474 throw std::logic_error(
... ...
libqpdf/QPDF_linearization.cc
... ... @@ -393,7 +393,7 @@ QPDF::readHintStream(Pipeline&amp; pl, qpdf_offset_t offset, size_t length)
393 393 this->file->getLastOffset(),
394 394 "hint table length mismatch");
395 395 }
396   - H.pipeStreamData(&pl, true, false, false);
  396 + H.pipeStreamData(&pl, 0, qpdf_dl_specialized);
397 397 return Hdict;
398 398 }
399 399  
... ...
libqpdf/qpdf/QPDF_Stream.hh
... ... @@ -22,10 +22,11 @@ class QPDF_Stream: public QPDFObject
22 22 QPDFObjectHandle getDict() const;
23 23  
24 24 // See comments in QPDFObjectHandle.hh for these methods.
25   - bool pipeStreamData(Pipeline*, bool filter,
26   - bool normalize, bool compress,
  25 + bool pipeStreamData(Pipeline*,
  26 + unsigned long encode_flags,
  27 + qpdf_stream_decode_level_e decode_level,
27 28 bool suppress_warnings);
28   - PointerHolder<Buffer> getStreamData();
  29 + PointerHolder<Buffer> getStreamData(qpdf_stream_decode_level_e);
29 30 PointerHolder<Buffer> getRawStreamData();
30 31 void replaceStreamData(PointerHolder<Buffer> data,
31 32 QPDFObjectHandle const& filter,
... ... @@ -52,6 +53,7 @@ class QPDF_Stream: public QPDFObject
52 53 std::string const& filter, QPDFObjectHandle decode_params,
53 54 int& predictor, int& columns, bool& early_code_change);
54 55 bool filterable(std::vector<std::string>& filters,
  56 + bool& specialized_compression, bool& lossy_compression,
55 57 int& predictor, int& columns, bool& early_code_change);
56 58 void warn(QPDFExc const& e);
57 59  
... ...
manual/qpdf-manual.xml
... ... @@ -853,28 +853,90 @@ outfile.pdf&lt;/option&gt;
853 853 developers. The following options are available:
854 854 <variablelist>
855 855 <varlistentry>
  856 + <term><option>--compress-streams=<replaceable>[yn]</replaceable></option></term>
  857 + <listitem>
  858 + <para>
  859 + By default, or with <option>--compress-streams=y</option>,
  860 + qpdf will compress any stream with no other filters applied to
  861 + it with the <literal>/FlateDecode</literal> filter when it
  862 + writes it. To suppress this behavior and preserve uncompressed
  863 + streams as uncompressed, use
  864 + <option>--compress-streams=n</option>.
  865 + </para>
  866 + </listitem>
  867 + </varlistentry>
  868 + <varlistentry>
  869 + <term><option>--decode-level=<replaceable>option</replaceable></option></term>
  870 + <listitem>
  871 + <para>
  872 + Controls which streams qpdf tries to decode. The default is
  873 + <option>generalized</option>. The following options are
  874 + available:
  875 + <itemizedlist>
  876 + <listitem>
  877 + <para>
  878 + <option>none</option>: do not attempt to decode any streams
  879 + </para>
  880 + </listitem>
  881 + <listitem>
  882 + <para>
  883 + <option>generalized</option>: decode streams filtered with
  884 + supported generalized filters: <option>/LZWDecode</option>,
  885 + <option>/FlateDecode</option>,
  886 + <option>/ASCII85Decode</option>, and
  887 + <option>/ASCIIHexDecode</option>
  888 + </para>
  889 + </listitem>
  890 + <listitem>
  891 + <para>
  892 + <option>specialized</option>: in addition to generalized,
  893 + decode streams with supported non-lossy specialized
  894 + filters; currently this is just <option>/RunLengthDecode</option>
  895 + </para>
  896 + </listitem>
  897 + <listitem>
  898 + <para>
  899 + <option>all</option>: in addition to generalized and
  900 + specialized, decode streams with supported lossy filters;
  901 + currently this is just <option>/DCTDecode</option> (JPEG)
  902 + </para>
  903 + </listitem>
  904 + </itemizedlist>
  905 + </para>
  906 + </listitem>
  907 + </varlistentry>
  908 + <varlistentry>
856 909 <term><option>--stream-data=<replaceable>option</replaceable></option></term>
857 910 <listitem>
858 911 <para>
859   - Controls transformation of stream data. The value of
860   - <option><replaceable>option</replaceable></option> may be one
861   - of the following:
  912 + Controls transformation of stream data. This option predates
  913 + the <option>--compress-streams</option> and
  914 + <option>--decode-level</option> options. Those options can be
  915 + used to achieve the same affect with more control. The value
  916 + of <option><replaceable>option</replaceable></option> may be
  917 + one of the following:
862 918 <itemizedlist>
863 919 <listitem>
864 920 <para>
865 921 <option>compress</option>: recompress stream data when
866   - possible (default)
  922 + possible (default); equivalent to
  923 + <option>--compress-streams=y</option>
  924 + <option>--decode-level=generalized</option>
867 925 </para>
868 926 </listitem>
869 927 <listitem>
870 928 <para>
871   - <option>preserve</option>: leave all stream data as is
  929 + <option>preserve</option>: leave all stream data as is;
  930 + equivalent to <option>--compress-streams=n</option>
  931 + <option>--decode-level=none</option>
872 932 </para>
873 933 </listitem>
874 934 <listitem>
875 935 <para>
876 936 <option>uncompress</option>: uncompress stream data when
877   - possible
  937 + possible; equivalent to
  938 + <option>--compress-streams=n</option>
  939 + <option>--decode-level=generalized</option>
878 940 </para>
879 941 </listitem>
880 942 </itemizedlist>
... ...
qpdf/qpdf.cc
... ... @@ -76,6 +76,10 @@ struct Options
76 76 use_aes(false),
77 77 stream_data_set(false),
78 78 stream_data_mode(qpdf_s_compress),
  79 + compress_streams(true),
  80 + compress_streams_set(false),
  81 + decode_level(qpdf_dl_generalized),
  82 + decode_level_set(false),
79 83 normalize_set(false),
80 84 normalize(false),
81 85 suppress_recovery(false),
... ... @@ -134,6 +138,10 @@ struct Options
134 138 bool use_aes;
135 139 bool stream_data_set;
136 140 qpdf_stream_data_e stream_data_mode;
  141 + bool compress_streams;
  142 + bool compress_streams_set;
  143 + qpdf_stream_decode_level_e decode_level;
  144 + bool decode_level_set;
137 145 bool normalize_set;
138 146 bool normalize;
139 147 bool suppress_recovery;
... ... @@ -357,6 +365,8 @@ the output file. Mostly these are of use only to people who are very\n\
357 365 familiar with the PDF file format or who are PDF developers.\n\
358 366 \n\
359 367 --stream-data=option controls transformation of stream data (below)\n\
  368 +--compress-streams=[yn] controls whether to compress streams on output\n\
  369 +--decode-level=option controls how to filter streams from the input\n\
360 370 --normalize-content=[yn] enables or disables normalization of content streams\n\
361 371 --suppress-recovery prevents qpdf from attempting to recover damaged files\n\
362 372 --object-streams=mode controls handing of object streams\n\
... ... @@ -383,6 +393,19 @@ Values for object stream mode:\n\
383 393 disable don't write any object streams\n\
384 394 generate use object streams wherever possible\n\
385 395 \n\
  396 +When --compress-streams=n is specified, this overrides the default behavior\n\
  397 +of qpdf, which is to attempt compress uncompressed streams. Setting\n\
  398 +stream data mode to uncompress or preserve has the same effect.\n\
  399 +\n\
  400 +The --decode-level parameter may be set to one of the following values:\n\
  401 + none do not decode streams\n\
  402 + generalized decode streams compressed with generalized filters\n\
  403 + including LZW, Flate, and the ASCII encoding filters.\n\
  404 + specialized additionally decode streams with non-lossy specialized\n\
  405 + filters including RunLength\n\
  406 + all additionally decode streams with lossy filters\n\
  407 + including DCT (JPEG)\n\
  408 +\n\
386 409 In qdf mode, by default, content normalization is turned on, and the\n\
387 410 stream data mode is set to uncompress.\n\
388 411 \n\
... ... @@ -1344,15 +1367,68 @@ static void parse_options(int argc, char* argv[], Options&amp; o)
1344 1367 usage("invalid stream-data option");
1345 1368 }
1346 1369 }
  1370 + else if (strcmp(arg, "compress-streams") == 0)
  1371 + {
  1372 + o.compress_streams_set = true;
  1373 + if (parameter && (strcmp(parameter, "y") == 0))
  1374 + {
  1375 + o.compress_streams = true;
  1376 + }
  1377 + else if (parameter && (strcmp(parameter, "n") == 0))
  1378 + {
  1379 + o.compress_streams = false;
  1380 + }
  1381 + else
  1382 + {
  1383 + usage("--compress-streams must be given as"
  1384 + " --compress-streams=[yn]");
  1385 + }
  1386 + }
  1387 + else if (strcmp(arg, "decode-level") == 0)
  1388 + {
  1389 + if (parameter == 0)
  1390 + {
  1391 + usage("--decode-level must be given as"
  1392 + "--decode-level=option");
  1393 + }
  1394 + o.decode_level_set = true;
  1395 + if (strcmp(parameter, "none") == 0)
  1396 + {
  1397 + o.decode_level = qpdf_dl_none;
  1398 + }
  1399 + else if (strcmp(parameter, "generalized") == 0)
  1400 + {
  1401 + o.decode_level = qpdf_dl_generalized;
  1402 + }
  1403 + else if (strcmp(parameter, "specialized") == 0)
  1404 + {
  1405 + o.decode_level = qpdf_dl_specialized;
  1406 + }
  1407 + else if (strcmp(parameter, "all") == 0)
  1408 + {
  1409 + o.decode_level = qpdf_dl_all;
  1410 + }
  1411 + else
  1412 + {
  1413 + usage("invalid stream-data option");
  1414 + }
  1415 + }
1347 1416 else if (strcmp(arg, "normalize-content") == 0)
1348 1417 {
1349   - if ((parameter == 0) || (*parameter == '\0'))
  1418 + o.normalize_set = true;
  1419 + if (parameter && (strcmp(parameter, "y") == 0))
  1420 + {
  1421 + o.normalize = true;
  1422 + }
  1423 + else if (parameter && (strcmp(parameter, "n") == 0))
  1424 + {
  1425 + o.normalize = false;
  1426 + }
  1427 + else
1350 1428 {
1351 1429 usage("--normalize-content must be given as"
1352 1430 " --normalize-content=[yn]");
1353 1431 }
1354   - o.normalize_set = true;
1355   - o.normalize = (parameter[0] == 'y');
1356 1432 }
1357 1433 else if (strcmp(arg, "suppress-recovery") == 0)
1358 1434 {
... ... @@ -1606,7 +1682,7 @@ static void do_check(QPDF&amp; pdf, Options&amp; o, int&amp; exit_code)
1606 1682 QPDFWriter w(pdf);
1607 1683 Pl_Discard discard;
1608 1684 w.setOutputPipeline(&discard);
1609   - w.setStreamDataMode(qpdf_s_uncompress);
  1685 + w.setDecodeLevel(qpdf_dl_all);
1610 1686 w.write();
1611 1687  
1612 1688 // Parse all content streams
... ... @@ -1667,7 +1743,7 @@ static void do_show_obj(QPDF&amp; pdf, Options&amp; o, int&amp; exit_code)
1667 1743 {
1668 1744 bool filter = o.show_filtered_stream_data;
1669 1745 if (filter &&
1670   - (! obj.pipeStreamData(0, true, false, false)))
  1746 + (! obj.pipeStreamData(0, 0, qpdf_dl_all)))
1671 1747 {
1672 1748 QTC::TC("qpdf", "qpdf unable to filter");
1673 1749 std::cerr << "Unable to filter stream data."
... ... @@ -1678,7 +1754,10 @@ static void do_show_obj(QPDF&amp; pdf, Options&amp; o, int&amp; exit_code)
1678 1754 {
1679 1755 QUtil::binary_stdout();
1680 1756 Pl_StdioFile out("stdout", stdout);
1681   - obj.pipeStreamData(&out, filter, o.normalize, false);
  1757 + obj.pipeStreamData(
  1758 + &out,
  1759 + (filter && o.normalize) ? qpdf_ef_normalize : 0,
  1760 + filter ? qpdf_dl_all : qpdf_dl_none);
1682 1761 }
1683 1762 }
1684 1763 else
... ... @@ -2035,6 +2114,14 @@ static void set_writer_options(QPDF&amp; pdf, Options&amp; o, QPDFWriter&amp; w)
2035 2114 {
2036 2115 w.setStreamDataMode(o.stream_data_mode);
2037 2116 }
  2117 + if (o.compress_streams_set)
  2118 + {
  2119 + w.setCompressStreams(o.compress_streams);
  2120 + }
  2121 + if (o.decode_level_set)
  2122 + {
  2123 + w.setDecodeLevel(o.decode_level);
  2124 + }
2038 2125 if (o.decrypt)
2039 2126 {
2040 2127 w.setPreserveEncryption(false);
... ...
qpdf/qpdf.testcov
... ... @@ -296,3 +296,4 @@ QPDF ignore length error xref entry 0
296 296 QPDF_encryption pad short parameter 0
297 297 QPDFWriter ignore self-referential object stream 0
298 298 QPDFObjectHandle found old angle 1
  299 +QPDF_Stream special filters 3
... ...
qpdf/qtest/qpdf.test
... ... @@ -937,6 +937,39 @@ $td-&gt;runtest(&quot;check output&quot;,
937 937 {$td->FILE => "bad-data-precheck.pdf"});
938 938 show_ntests();
939 939 # ----------
  940 +$td->notify("--- Decode levels ---");
  941 +$n_tests += 10;
  942 +
  943 +# image-streams.pdf is the output of examples/pdf-create.
  944 +# examples/pdf-create validates the actual image data.
  945 +foreach my $l (qw(none generalized specialized all))
  946 +{
  947 + $td->runtest("image-streams: $l",
  948 + {$td->COMMAND =>
  949 + "qpdf image-streams.pdf --compress-streams=n" .
  950 + " --decode-level=$l a.pdf"},
  951 + {$td->STRING => "", $td->EXIT_STATUS => 0},
  952 + $td->NORMALIZE_NEWLINES);
  953 + $td->runtest("check image-streams: $l",
  954 + {$td->COMMAND => "test_driver 39 a.pdf"},
  955 + {$td->FILE => "image-streams-$l.out", $td->EXIT_STATUS => 0},
  956 + $td->NORMALIZE_NEWLINES);
  957 +}
  958 +
  959 +# Bad JPEG data
  960 +$td->runtest("check finds bad jpeg data",
  961 + {$td->COMMAND => "qpdf --check bad-jpeg.pdf"},
  962 + {$td->FILE => "bad-jpeg-check.out",
  963 + $td->EXIT_STATUS => 3},
  964 + $td->NORMALIZE_NEWLINES);
  965 +$td->runtest("get data",
  966 + {$td->COMMAND => "qpdf --show-object=6" .
  967 + " --filtered-stream-data bad-jpeg.pdf"},
  968 + {$td->FILE => "bad-jpeg-show.out", $td->EXIT_STATUS => 3},
  969 + $td->NORMALIZE_NEWLINES);
  970 +
  971 +show_ntests();
  972 +# ----------
940 973 $td->notify("--- Preserve unreferenced objects ---");
941 974 $n_tests += 4;
942 975  
... ... @@ -1429,8 +1462,8 @@ $td-&gt;runtest(&quot;show-page-1-image&quot;,
1429 1462 $td->EXIT_STATUS => 0});
1430 1463  
1431 1464 $td->runtest("unfilterable stream data",
1432   - {$td->COMMAND => "qpdf encrypted-with-images.pdf" .
1433   - " --show-object=8 --filtered-stream-data"},
  1465 + {$td->COMMAND => "qpdf unfilterable.pdf" .
  1466 + " --show-object=4 --filtered-stream-data"},
1434 1467 {$td->FILE => "show-unfilterable.out",
1435 1468 $td->EXIT_STATUS => 2},
1436 1469 $td->NORMALIZE_NEWLINES);
... ... @@ -1461,7 +1494,7 @@ foreach my $f (qw(compressed-metadata.pdf enc-base.pdf))
1461 1494 {
1462 1495 foreach my $w (qw(compress preserve))
1463 1496 {
1464   - $td->runtest("$w streams",
  1497 + $td->runtest("$w streams ($f)",
1465 1498 {$td->COMMAND => "qpdf --stream-data=$w $f a.pdf"},
1466 1499 {$td->STRING => "", $td->EXIT_STATUS => 0});
1467 1500 check_metadata("a.pdf", 0, 1);
... ...
qpdf/qtest/qpdf/bad-jpeg-check.out 0 → 100644
  1 +checking bad-jpeg.pdf
  2 +PDF Version: 1.3
  3 +File is not encrypted
  4 +File is not linearized
  5 +WARNING: bad-jpeg.pdf (file position 735): error decoding stream data for object 6 0: Not a JPEG file: starts with 0x77 0x77
... ...
qpdf/qtest/qpdf/bad-jpeg-out.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/bad-jpeg-show.out 0 → 100644
  1 +WARNING: bad-jpeg.pdf (file position 735): error decoding stream data for object 6 0: Not a JPEG file: starts with 0x77 0x77
  2 +qpdf: operation succeeded with warnings; resulting file may have some problems
... ...
qpdf/qtest/qpdf/bad-jpeg.out 0 → 100644
  1 +WARNING: bad-jpeg.pdf (file position 735): error decoding stream data for object 6 0: Not a JPEG file: starts with 0x77 0x77
  2 +qpdf: operation succeeded with warnings; resulting file may have some problems
... ...
qpdf/qtest/qpdf/bad-jpeg.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/c-write-warnings.out
... ... @@ -18,8 +18,3 @@ warning: bad33.pdf (file position 629): stream filter type is not name or array
18 18 file: bad33.pdf
19 19 pos : 629
20 20 text: stream filter type is not name or array
21   -warning: bad33.pdf (file position 629): stream filter type is not name or array
22   - code: 5
23   - file: bad33.pdf
24   - pos : 629
25   - text: stream filter type is not name or array
... ...
qpdf/qtest/qpdf/image-streams-all.out 0 → 100644
  1 +page 1
  2 +filter: null, color space: /DeviceCMYK
  3 +page 2
  4 +filter: null, color space: /DeviceCMYK
  5 +page 3
  6 +filter: null, color space: /DeviceCMYK
  7 +page 4
  8 +filter: null, color space: /DeviceRGB
  9 +page 5
  10 +filter: null, color space: /DeviceRGB
  11 +page 6
  12 +filter: null, color space: /DeviceRGB
  13 +page 7
  14 +filter: null, color space: /DeviceGray
  15 +page 8
  16 +filter: null, color space: /DeviceGray
  17 +page 9
  18 +filter: null, color space: /DeviceGray
  19 +test 39 done
... ...
qpdf/qtest/qpdf/image-streams-generalized.out 0 → 100644
  1 +page 1
  2 +filter: null, color space: /DeviceCMYK
  3 +page 2
  4 +filter: /DCTDecode, color space: /DeviceCMYK
  5 +page 3
  6 +filter: /RunLengthDecode, color space: /DeviceCMYK
  7 +page 4
  8 +filter: null, color space: /DeviceRGB
  9 +page 5
  10 +filter: /DCTDecode, color space: /DeviceRGB
  11 +page 6
  12 +filter: /RunLengthDecode, color space: /DeviceRGB
  13 +page 7
  14 +filter: null, color space: /DeviceGray
  15 +page 8
  16 +filter: /DCTDecode, color space: /DeviceGray
  17 +page 9
  18 +filter: /RunLengthDecode, color space: /DeviceGray
  19 +test 39 done
... ...
qpdf/qtest/qpdf/image-streams-none.out 0 → 100644
  1 +page 1
  2 +filter: /FlateDecode, color space: /DeviceCMYK
  3 +page 2
  4 +filter: /DCTDecode, color space: /DeviceCMYK
  5 +page 3
  6 +filter: /RunLengthDecode, color space: /DeviceCMYK
  7 +page 4
  8 +filter: /FlateDecode, color space: /DeviceRGB
  9 +page 5
  10 +filter: /DCTDecode, color space: /DeviceRGB
  11 +page 6
  12 +filter: /RunLengthDecode, color space: /DeviceRGB
  13 +page 7
  14 +filter: /FlateDecode, color space: /DeviceGray
  15 +page 8
  16 +filter: /DCTDecode, color space: /DeviceGray
  17 +page 9
  18 +filter: /RunLengthDecode, color space: /DeviceGray
  19 +test 39 done
... ...
qpdf/qtest/qpdf/image-streams-specialized.out 0 → 100644
  1 +page 1
  2 +filter: null, color space: /DeviceCMYK
  3 +page 2
  4 +filter: /DCTDecode, color space: /DeviceCMYK
  5 +page 3
  6 +filter: null, color space: /DeviceCMYK
  7 +page 4
  8 +filter: null, color space: /DeviceRGB
  9 +page 5
  10 +filter: /DCTDecode, color space: /DeviceRGB
  11 +page 6
  12 +filter: null, color space: /DeviceRGB
  13 +page 7
  14 +filter: null, color space: /DeviceGray
  15 +page 8
  16 +filter: /DCTDecode, color space: /DeviceGray
  17 +page 9
  18 +filter: null, color space: /DeviceGray
  19 +test 39 done
... ...
qpdf/qtest/qpdf/image-streams.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/unfilterable.pdf 0 → 100644
No preview for this file type
qpdf/test_driver.cc
... ... @@ -314,15 +314,15 @@ void runtest(int n, char const* filename1, char const* arg2)
314 314 std::cout.flush();
315 315 QUtil::binary_stdout();
316 316 PointerHolder<Pl_StdioFile> out = new Pl_StdioFile("raw", stdout);
317   - qtest.pipeStreamData(out.getPointer(), false, false, false);
  317 + qtest.pipeStreamData(out.getPointer(), 0, qpdf_dl_none);
318 318  
319 319 std::cout << std::endl << "Uncompressed stream data:" << std::endl;
320   - if (qtest.pipeStreamData(0, true, false, false))
  320 + if (qtest.pipeStreamData(0, 0, qpdf_dl_all))
321 321 {
322 322 std::cout.flush();
323 323 QUtil::binary_stdout();
324 324 out = new Pl_StdioFile("filtered", stdout);
325   - qtest.pipeStreamData(out.getPointer(), true, false, false);
  325 + qtest.pipeStreamData(out.getPointer(), 0, qpdf_dl_all);
326 326 std::cout << std::endl << "End of stream data" << std::endl;
327 327 }
328 328 else
... ... @@ -362,7 +362,7 @@ void runtest(int n, char const* filename1, char const* arg2)
362 362 QPDFObjectHandle contents = page.getKey("/Contents");
363 363 QUtil::binary_stdout();
364 364 PointerHolder<Pl_StdioFile> out = new Pl_StdioFile("filtered", stdout);
365   - contents.pipeStreamData(out.getPointer(), true, false, false);
  365 + contents.pipeStreamData(out.getPointer(), 0, qpdf_dl_generalized);
366 366 }
367 367 else if (n == 3)
368 368 {
... ... @@ -375,7 +375,8 @@ void runtest(int n, char const* filename1, char const* arg2)
375 375 QUtil::binary_stdout();
376 376 PointerHolder<Pl_StdioFile> out =
377 377 new Pl_StdioFile("tokenized stream", stdout);
378   - stream.pipeStreamData(out.getPointer(), true, true, false);
  378 + stream.pipeStreamData(out.getPointer(),
  379 + qpdf_ef_normalize, qpdf_dl_generalized);
379 380 }
380 381 }
381 382 else if (n == 4)
... ... @@ -497,7 +498,7 @@ void runtest(int n, char const* filename1, char const* arg2)
497 498 throw std::logic_error("test 6 run on file with no metadata");
498 499 }
499 500 Pl_Buffer bufpl("buffer");
500   - metadata.pipeStreamData(&bufpl, false, false, false);
  501 + metadata.pipeStreamData(&bufpl, 0, qpdf_dl_none);
501 502 Buffer* buf = bufpl.getBuffer();
502 503 unsigned char const* data = buf->getBuffer();
503 504 bool cleartext = false;
... ... @@ -1277,7 +1278,7 @@ void runtest(int n, char const* filename1, char const* arg2)
1277 1278 QPDFObjectHandle stream = item.getKey("/EF").getKey("/F");
1278 1279 Pl_Buffer p1("buffer");
1279 1280 Pl_Flate p2("compress", &p1, Pl_Flate::a_inflate);
1280   - stream.pipeStreamData(&p2, false, false, false);
  1281 + stream.pipeStreamData(&p2, 0, qpdf_dl_none);
1281 1282 PointerHolder<Buffer> buf = p1.getBuffer();
1282 1283 std::string data = std::string(
1283 1284 reinterpret_cast<char const*>(buf->getBuffer()),
... ... @@ -1309,6 +1310,30 @@ void runtest(int n, char const* filename1, char const* arg2)
1309 1310 std::cout << qtest.getArrayItem(i).unparseResolved() << std::endl;
1310 1311 }
1311 1312 }
  1313 + else if (n == 39)
  1314 + {
  1315 + // Display image filter and color set for each image on each page
  1316 + std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
  1317 + int pageno = 0;
  1318 + for (std::vector<QPDFObjectHandle>::iterator p_iter =
  1319 + pages.begin();
  1320 + p_iter != pages.end(); ++p_iter)
  1321 + {
  1322 + std::cout << "page " << ++pageno << std::endl;
  1323 + std::map<std::string, QPDFObjectHandle> images =
  1324 + (*p_iter).getPageImages();
  1325 + for (std::map<std::string, QPDFObjectHandle>::iterator i_iter =
  1326 + images.begin(); i_iter != images.end(); ++i_iter)
  1327 + {
  1328 + QPDFObjectHandle image_dict = (*i_iter).second.getDict();
  1329 + std::cout << "filter: "
  1330 + << image_dict.getKey("/Filter").unparseResolved()
  1331 + << ", color space: "
  1332 + << image_dict.getKey("/ColorSpace").unparseResolved()
  1333 + << std::endl;
  1334 + }
  1335 + }
  1336 + }
1312 1337 else
1313 1338 {
1314 1339 throw std::runtime_error(std::string("invalid test ") +
... ...
qpdf/test_large_file.cc
... ... @@ -273,7 +273,7 @@ static void check_image(int pageno, QPDFObjectHandle page)
273 273 QPDFObjectHandle image =
274 274 page.getKey("/Resources").getKey("/XObject").getKey("/Im1");
275 275 ImageChecker ic(pageno);
276   - image.pipeStreamData(&ic, true, false, false);
  276 + image.pipeStreamData(&ic, 0, qpdf_dl_specialized);
277 277 }
278 278  
279 279 static void check_pdf(char const* filename)
... ...