Commit d31a7b76e7fa6fc6fa31d94394b59f990920d09c

Authored by Jay Berkenbilt
1 parent eaacf940

Improve message for stream decoding error

Tweak the message so that we inform the user that we are mitigating
data loss.
ChangeLog
1 2017-09-12 Jay Berkenbilt <ejb@ql.org> 1 2017-09-12 Jay Berkenbilt <ejb@ql.org>
2 2
  3 + * Improve the error message that is issued when QPDFWriter
  4 + encounters a stream that can't be decoded. In particular, mention
  5 + that the stream will be copied without filtering to avoid data
  6 + loss.
  7 +
3 * Add new methods to the C API to correspond to new additions to 8 * Add new methods to the C API to correspond to new additions to
4 QPDFWriter: 9 QPDFWriter:
5 - qpdf_set_compress_streams 10 - qpdf_set_compress_streams
@@ -3,10 +3,6 @@ Before final 7.0.0 @@ -3,10 +3,6 @@ Before final 7.0.0
3 3
4 * Create release notes 4 * Create release notes
5 5
6 - * See if the error message that gets generated when retrying a stream  
7 - without filtering after error detection can be less scary.  
8 - Communicate that the original stream data is being preserved.  
9 -  
10 Soon 6 Soon
11 ==== 7 ====
12 8
include/qpdf/QPDF.hh
@@ -554,11 +554,13 @@ class QPDF @@ -554,11 +554,13 @@ class QPDF
554 static bool pipeStreamData(QPDF* qpdf, int objid, int generation, 554 static bool pipeStreamData(QPDF* qpdf, int objid, int generation,
555 qpdf_offset_t offset, size_t length, 555 qpdf_offset_t offset, size_t length,
556 QPDFObjectHandle dict, 556 QPDFObjectHandle dict,
557 - Pipeline* pipeline, bool suppress_warnings) 557 + Pipeline* pipeline,
  558 + bool suppress_warnings,
  559 + bool will_retry)
558 { 560 {
559 return qpdf->pipeStreamData( 561 return qpdf->pipeStreamData(
560 objid, generation, offset, length, dict, pipeline, 562 objid, generation, offset, length, dict, pipeline,
561 - suppress_warnings); 563 + suppress_warnings, will_retry);
562 } 564 }
563 }; 565 };
564 friend class Pipe; 566 friend class Pipe;
@@ -688,7 +690,8 @@ class QPDF @@ -688,7 +690,8 @@ class QPDF
688 qpdf_offset_t offset, size_t length, 690 qpdf_offset_t offset, size_t length,
689 QPDFObjectHandle dict, 691 QPDFObjectHandle dict,
690 Pipeline* pipeline, 692 Pipeline* pipeline,
691 - bool suppress_warnings); 693 + bool suppress_warnings,
  694 + bool will_retry);
692 695
693 // For QPDFWriter: 696 // For QPDFWriter:
694 697
include/qpdf/QPDFObjectHandle.hh
@@ -420,12 +420,21 @@ class QPDFObjectHandle @@ -420,12 +420,21 @@ class QPDFObjectHandle
420 // configured filters. QPDFWriter handles this by attempting to 420 // configured filters. QPDFWriter handles this by attempting to
421 // get the stream data without filtering, but callers should 421 // get the stream data without filtering, but callers should
422 // consider a false return value when decode_level is not 422 // consider a false return value when decode_level is not
423 - // qpdf_dl_none to be a potential loss of data. 423 + // qpdf_dl_none to be a potential loss of data. If you intend to
  424 + // retry in that case, pass true as the value of will_retry. This
  425 + // changes the warning issued by the library to indicate that the
  426 + // operation will be retried without filtering to avoid data loss.
424 QPDF_DLL 427 QPDF_DLL
425 bool pipeStreamData(Pipeline*, 428 bool pipeStreamData(Pipeline*,
426 unsigned long encode_flags, 429 unsigned long encode_flags,
427 qpdf_stream_decode_level_e decode_level, 430 qpdf_stream_decode_level_e decode_level,
428 bool suppress_warnings = false); 431 bool suppress_warnings = false);
  432 + QPDF_DLL
  433 + bool pipeStreamData(Pipeline*,
  434 + unsigned long encode_flags,
  435 + qpdf_stream_decode_level_e decode_level,
  436 + bool suppress_warnings,
  437 + bool will_retry);
429 438
430 // Legacy pipeStreamData. This maps to the the flags-based 439 // Legacy pipeStreamData. This maps to the the flags-based
431 // pipeStreamData as follows: 440 // pipeStreamData as follows:
libqpdf/QPDF.cc
@@ -2382,7 +2382,8 @@ QPDF::pipeStreamData(int objid, int generation, @@ -2382,7 +2382,8 @@ QPDF::pipeStreamData(int objid, int generation,
2382 qpdf_offset_t offset, size_t length, 2382 qpdf_offset_t offset, size_t length,
2383 QPDFObjectHandle stream_dict, 2383 QPDFObjectHandle stream_dict,
2384 Pipeline* pipeline, 2384 Pipeline* pipeline,
2385 - bool suppress_warnings) 2385 + bool suppress_warnings,
  2386 + bool will_retry)
2386 { 2387 {
2387 bool success = false; 2388 bool success = false;
2388 std::vector<PointerHolder<Pipeline> > to_delete; 2389 std::vector<PointerHolder<Pipeline> > to_delete;
@@ -2430,6 +2431,13 @@ QPDF::pipeStreamData(int objid, int generation, @@ -2430,6 +2431,13 @@ QPDF::pipeStreamData(int objid, int generation,
2430 "error decoding stream data for object " + 2431 "error decoding stream data for object " +
2431 QUtil::int_to_string(objid) + " " + 2432 QUtil::int_to_string(objid) + " " +
2432 QUtil::int_to_string(generation) + ": " + e.what())); 2433 QUtil::int_to_string(generation) + ": " + e.what()));
  2434 + if (will_retry)
  2435 + {
  2436 + warn(QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),
  2437 + "", this->m->file->getLastOffset(),
  2438 + "stream will be re-processed without"
  2439 + " filtering to avoid data loss"));
  2440 + }
2433 } 2441 }
2434 } 2442 }
2435 if (! success) 2443 if (! success)
libqpdf/QPDFObjectHandle.cc
@@ -501,9 +501,19 @@ QPDFObjectHandle::pipeStreamData(Pipeline* p, @@ -501,9 +501,19 @@ QPDFObjectHandle::pipeStreamData(Pipeline* p,
501 qpdf_stream_decode_level_e decode_level, 501 qpdf_stream_decode_level_e decode_level,
502 bool suppress_warnings) 502 bool suppress_warnings)
503 { 503 {
  504 + return pipeStreamData(
  505 + p, encode_flags, decode_level, suppress_warnings, false);
  506 +}
  507 +
  508 +bool
  509 +QPDFObjectHandle::pipeStreamData(Pipeline* p,
  510 + unsigned long encode_flags,
  511 + qpdf_stream_decode_level_e decode_level,
  512 + bool suppress_warnings, bool will_retry)
  513 +{
504 assertStream(); 514 assertStream();
505 return dynamic_cast<QPDF_Stream*>(obj.getPointer())->pipeStreamData( 515 return dynamic_cast<QPDF_Stream*>(obj.getPointer())->pipeStreamData(
506 - p, encode_flags, decode_level, suppress_warnings); 516 + p, encode_flags, decode_level, suppress_warnings, will_retry);
507 } 517 }
508 518
509 bool 519 bool
libqpdf/QPDFWriter.cc
@@ -1623,7 +1623,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, @@ -1623,7 +1623,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
1623 ((filter && compress) ? qpdf_ef_compress : 0)), 1623 ((filter && compress) ? qpdf_ef_compress : 0)),
1624 (filter 1624 (filter
1625 ? (uncompress ? qpdf_dl_all : this->m->stream_decode_level) 1625 ? (uncompress ? qpdf_dl_all : this->m->stream_decode_level)
1626 - : qpdf_dl_none)); 1626 + : qpdf_dl_none), false, (attempt == 1));
1627 popPipelineStack(&stream_data); 1627 popPipelineStack(&stream_data);
1628 if (filter && (! filtered)) 1628 if (filter && (! filtered))
1629 { 1629 {
libqpdf/QPDF_Stream.cc
@@ -94,7 +94,7 @@ PointerHolder&lt;Buffer&gt; @@ -94,7 +94,7 @@ PointerHolder&lt;Buffer&gt;
94 QPDF_Stream::getStreamData(qpdf_stream_decode_level_e decode_level) 94 QPDF_Stream::getStreamData(qpdf_stream_decode_level_e decode_level)
95 { 95 {
96 Pl_Buffer buf("stream data buffer"); 96 Pl_Buffer buf("stream data buffer");
97 - if (! pipeStreamData(&buf, 0, decode_level, false)) 97 + if (! pipeStreamData(&buf, 0, decode_level, false, false))
98 { 98 {
99 throw std::logic_error("getStreamData called on unfilterable stream"); 99 throw std::logic_error("getStreamData called on unfilterable stream");
100 } 100 }
@@ -106,7 +106,7 @@ PointerHolder&lt;Buffer&gt; @@ -106,7 +106,7 @@ PointerHolder&lt;Buffer&gt;
106 QPDF_Stream::getRawStreamData() 106 QPDF_Stream::getRawStreamData()
107 { 107 {
108 Pl_Buffer buf("stream data buffer"); 108 Pl_Buffer buf("stream data buffer");
109 - pipeStreamData(&buf, 0, qpdf_dl_none, false); 109 + pipeStreamData(&buf, 0, qpdf_dl_none, false, false);
110 QTC::TC("qpdf", "QPDF_Stream getRawStreamData"); 110 QTC::TC("qpdf", "QPDF_Stream getRawStreamData");
111 return buf.getBuffer(); 111 return buf.getBuffer();
112 } 112 }
@@ -373,7 +373,7 @@ bool @@ -373,7 +373,7 @@ bool
373 QPDF_Stream::pipeStreamData(Pipeline* pipeline, 373 QPDF_Stream::pipeStreamData(Pipeline* pipeline,
374 unsigned long encode_flags, 374 unsigned long encode_flags,
375 qpdf_stream_decode_level_e decode_level, 375 qpdf_stream_decode_level_e decode_level,
376 - bool suppress_warnings) 376 + bool suppress_warnings, bool will_retry)
377 { 377 {
378 std::vector<std::string> filters; 378 std::vector<std::string> filters;
379 int predictor = 1; 379 int predictor = 1;
@@ -540,7 +540,8 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, @@ -540,7 +540,8 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline,
540 if (! QPDF::Pipe::pipeStreamData(this->qpdf, this->objid, this->generation, 540 if (! QPDF::Pipe::pipeStreamData(this->qpdf, this->objid, this->generation,
541 this->offset, this->length, 541 this->offset, this->length,
542 this->stream_dict, pipeline, 542 this->stream_dict, pipeline,
543 - suppress_warnings)) 543 + suppress_warnings,
  544 + will_retry))
544 { 545 {
545 filter = false; 546 filter = false;
546 } 547 }
libqpdf/qpdf/QPDF_Stream.hh
@@ -25,7 +25,7 @@ class QPDF_Stream: public QPDFObject @@ -25,7 +25,7 @@ class QPDF_Stream: public QPDFObject
25 bool pipeStreamData(Pipeline*, 25 bool pipeStreamData(Pipeline*,
26 unsigned long encode_flags, 26 unsigned long encode_flags,
27 qpdf_stream_decode_level_e decode_level, 27 qpdf_stream_decode_level_e decode_level,
28 - bool suppress_warnings); 28 + bool suppress_warnings, bool will_retry);
29 PointerHolder<Buffer> getStreamData(qpdf_stream_decode_level_e); 29 PointerHolder<Buffer> getStreamData(qpdf_stream_decode_level_e);
30 PointerHolder<Buffer> getRawStreamData(); 30 PointerHolder<Buffer> getRawStreamData();
31 void replaceStreamData(PointerHolder<Buffer> data, 31 void replaceStreamData(PointerHolder<Buffer> data,
qpdf/qtest/qpdf/bad-data.out
1 WARNING: bad-data.pdf (file position 319): error decoding stream data for object 4 0: LZWDecoder: bad code received 1 WARNING: bad-data.pdf (file position 319): error decoding stream data for object 4 0: LZWDecoder: bad code received
  2 +WARNING: bad-data.pdf (file position 319): stream will be re-processed without filtering to avoid data loss
2 qpdf: operation succeeded with warnings; resulting file may have some problems 3 qpdf: operation succeeded with warnings; resulting file may have some problems
qpdf/qtest/qpdf/bad-jpeg-check.out
@@ -3,3 +3,4 @@ PDF Version: 1.3 @@ -3,3 +3,4 @@ PDF Version: 1.3
3 File is not encrypted 3 File is not encrypted
4 File is not linearized 4 File is not linearized
5 WARNING: bad-jpeg.pdf (file position 735): error decoding stream data for object 6 0: Not a JPEG file: starts with 0x77 0x77 5 WARNING: bad-jpeg.pdf (file position 735): error decoding stream data for object 6 0: Not a JPEG file: starts with 0x77 0x77
  6 +WARNING: bad-jpeg.pdf (file position 735): stream will be re-processed without filtering to avoid data loss
qpdf/qtest/qpdf/bad-jpeg.out
1 WARNING: bad-jpeg.pdf (file position 735): error decoding stream data for object 6 0: Not a JPEG file: starts with 0x77 0x77 1 WARNING: bad-jpeg.pdf (file position 735): error decoding stream data for object 6 0: Not a JPEG file: starts with 0x77 0x77
  2 +WARNING: bad-jpeg.pdf (file position 735): stream will be re-processed without filtering to avoid data loss
2 qpdf: operation succeeded with warnings; resulting file may have some problems 3 qpdf: operation succeeded with warnings; resulting file may have some problems
qpdf/qtest/qpdf/damaged-stream-c-check.out
@@ -3,3 +3,8 @@ warning: damaged-stream.pdf (file position 426): error decoding stream data for @@ -3,3 +3,8 @@ warning: damaged-stream.pdf (file position 426): error decoding stream data for
3 file: damaged-stream.pdf 3 file: damaged-stream.pdf
4 pos : 426 4 pos : 426
5 text: error decoding stream data for object 5 0: LZWDecoder: bad code received 5 text: error decoding stream data for object 5 0: LZWDecoder: bad code received
  6 +warning: damaged-stream.pdf (file position 426): stream will be re-processed without filtering to avoid data loss
  7 + code: 5
  8 + file: damaged-stream.pdf
  9 + pos : 426
  10 + text: stream will be re-processed without filtering to avoid data loss
qpdf/qtest/qpdf/damaged-stream.out
@@ -3,3 +3,4 @@ PDF Version: 1.3 @@ -3,3 +3,4 @@ PDF Version: 1.3
3 File is not encrypted 3 File is not encrypted
4 File is not linearized 4 File is not linearized
5 WARNING: damaged-stream.pdf (file position 426): error decoding stream data for object 5 0: LZWDecoder: bad code received 5 WARNING: damaged-stream.pdf (file position 426): error decoding stream data for object 5 0: LZWDecoder: bad code received
  6 +WARNING: damaged-stream.pdf (file position 426): stream will be re-processed without filtering to avoid data loss
qpdf/qtest/qpdf/issue-106.out
@@ -2,4 +2,5 @@ WARNING: issue-106.pdf: file is damaged @@ -2,4 +2,5 @@ WARNING: issue-106.pdf: file is damaged
2 WARNING: issue-106.pdf (file position 809): xref not found 2 WARNING: issue-106.pdf (file position 809): xref not found
3 WARNING: issue-106.pdf: Attempting to reconstruct cross-reference table 3 WARNING: issue-106.pdf: Attempting to reconstruct cross-reference table
4 WARNING: issue-106.pdf (file position 965): error decoding stream data for object 8 0: stream inflate: inflate: data: incorrect data check 4 WARNING: issue-106.pdf (file position 965): error decoding stream data for object 8 0: stream inflate: inflate: data: incorrect data check
  5 +WARNING: issue-106.pdf (file position 965): stream will be re-processed without filtering to avoid data loss
5 qpdf: operation succeeded with warnings; resulting file may have some problems 6 qpdf: operation succeeded with warnings; resulting file may have some problems
qpdf/qtest/qpdf/split-content-stream-errors.out
@@ -3,6 +3,7 @@ PDF Version: 1.3 @@ -3,6 +3,7 @@ PDF Version: 1.3
3 File is not encrypted 3 File is not encrypted
4 File is not linearized 4 File is not linearized
5 WARNING: split-content-stream-errors.pdf (file position 557): error decoding stream data for object 6 0: LZWDecoder: bad code received 5 WARNING: split-content-stream-errors.pdf (file position 557): error decoding stream data for object 6 0: LZWDecoder: bad code received
  6 +WARNING: split-content-stream-errors.pdf (file position 557): stream will be re-processed without filtering to avoid data loss
6 WARNING: content stream: ignoring non-stream while parsing content streams 7 WARNING: content stream: ignoring non-stream while parsing content streams
7 WARNING: split-content-stream-errors.pdf (file position 557): error decoding stream data for object 6 0: LZWDecoder: bad code received 8 WARNING: split-content-stream-errors.pdf (file position 557): error decoding stream data for object 6 0: LZWDecoder: bad code received
8 WARNING: content stream (content stream object 6 0): errors while decoding content stream 9 WARNING: content stream (content stream object 6 0): errors while decoding content stream