Commit 532a4f3d60f6981b22beb32e6ff688ec41f87e26

Authored by Jay Berkenbilt
1 parent c491d9f6

Detect recoverable but invalid zlib data streams (fixes #562)

ChangeLog
1 2021-11-02 Jay Berkenbilt <ejb@ql.org> 1 2021-11-02 Jay Berkenbilt <ejb@ql.org>
2 2
  3 + * zlib-flate: warn and exit with code 3 when there is corrupted
  4 + input data even when decompression is possible. We do this in the
  5 + zlib-flate CLI so that it can be more reliably used to test the
  6 + validity of zlib streams, but we don't warn by default in qpdf
  7 + itself because PDF files in the wild exist with this problem and
  8 + other readers appear to tolerate it. There is a PDF in the qpdf
  9 + test suite (form-filled-by-acrobat.pdf) that was written by a
  10 + version of Adobe Acrobat that exhibits this problem. Fixes #562.
  11 +
  12 + * Add Pl_Flate::setWarnCallback to make it possible to be notified
  13 + of data errors that are recoverable but still indicate invalid
  14 + data.
  15 +
3 * Improve error reporting when someone forgets the -- after 16 * Improve error reporting when someone forgets the -- after
4 --pages. Fixes #555. 17 --pages. Fixes #555.
5 18
include/qpdf/Pl_Flate.hh
@@ -23,6 +23,7 @@ @@ -23,6 +23,7 @@
23 #define PL_FLATE_HH 23 #define PL_FLATE_HH
24 24
25 #include <qpdf/Pipeline.hh> 25 #include <qpdf/Pipeline.hh>
  26 +#include <functional>
26 27
27 class Pl_Flate: public Pipeline 28 class Pl_Flate: public Pipeline
28 { 29 {
@@ -52,9 +53,13 @@ class Pl_Flate: public Pipeline @@ -52,9 +53,13 @@ class Pl_Flate: public Pipeline
52 QPDF_DLL 53 QPDF_DLL
53 static void setCompressionLevel(int); 54 static void setCompressionLevel(int);
54 55
  56 + QPDF_DLL
  57 + void setWarnCallback(std::function<void(char const*, int)> callback);
  58 +
55 private: 59 private:
56 void handleData(unsigned char* data, size_t len, int flush); 60 void handleData(unsigned char* data, size_t len, int flush);
57 void checkError(char const* prefix, int error_code); 61 void checkError(char const* prefix, int error_code);
  62 + void warn(char const*, int error_code);
58 63
59 class Members 64 class Members
60 { 65 {
@@ -73,6 +78,7 @@ class Pl_Flate: public Pipeline @@ -73,6 +78,7 @@ class Pl_Flate: public Pipeline
73 action_e action; 78 action_e action;
74 bool initialized; 79 bool initialized;
75 void* zdata; 80 void* zdata;
  81 + std::function<void(char const*, int)> callback;
76 }; 82 };
77 83
78 PointerHolder<Members> m; 84 PointerHolder<Members> m;
libqpdf/Pl_Flate.cc
@@ -72,6 +72,21 @@ Pl_Flate::~Pl_Flate() @@ -72,6 +72,21 @@ Pl_Flate::~Pl_Flate()
72 } 72 }
73 73
74 void 74 void
  75 +Pl_Flate::setWarnCallback(std::function<void(char const*, int)> callback)
  76 +{
  77 + this->m->callback = callback;
  78 +}
  79 +
  80 +void
  81 +Pl_Flate::warn(char const* msg, int code)
  82 +{
  83 + if (this->m->callback != nullptr)
  84 + {
  85 + this->m->callback(msg, code);
  86 + }
  87 +}
  88 +
  89 +void
75 Pl_Flate::write(unsigned char* data, size_t len) 90 Pl_Flate::write(unsigned char* data, size_t len)
76 { 91 {
77 if (this->m->outbuf.getPointer() == 0) 92 if (this->m->outbuf.getPointer() == 0)
@@ -164,7 +179,14 @@ Pl_Flate::handleData(unsigned char* data, size_t len, int flush) @@ -164,7 +179,14 @@ Pl_Flate::handleData(unsigned char* data, size_t len, int flush)
164 // Probably shouldn't be able to happen, but possible as a 179 // Probably shouldn't be able to happen, but possible as a
165 // boundary condition: if the last call to inflate exactly 180 // boundary condition: if the last call to inflate exactly
166 // filled the output buffer, it's possible that the next 181 // filled the output buffer, it's possible that the next
167 - // call to inflate could have nothing to do. 182 + // call to inflate could have nothing to do. There are PDF
  183 + // files in the wild that have this error (including at
  184 + // least one in qpdf's test suite). In some cases, we want
  185 + // to know about this, because it indicates incorrect
  186 + // compression, so call a callback if provided.
  187 + this->warn(
  188 + "input stream is complete but output may still be valid",
  189 + err);
168 done = true; 190 done = true;
169 break; 191 break;
170 192
@@ -231,8 +253,15 @@ Pl_Flate::finish() @@ -231,8 +253,15 @@ Pl_Flate::finish()
231 } 253 }
232 catch (std::exception& e) 254 catch (std::exception& e)
233 { 255 {
234 - this->getNext()->finish();  
235 - throw e; 256 + try
  257 + {
  258 + this->getNext()->finish();
  259 + }
  260 + catch (...)
  261 + {
  262 + // ignore secondary exception
  263 + }
  264 + throw std::runtime_error(e.what());
236 } 265 }
237 this->getNext()->finish(); 266 this->getNext()->finish();
238 } 267 }
libqpdf/QPDF_Stream.cc
@@ -503,6 +503,14 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp, @@ -503,6 +503,14 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp,
503 { 503 {
504 pipeline = decode_pipeline; 504 pipeline = decode_pipeline;
505 } 505 }
  506 + Pl_Flate* flate = dynamic_cast<Pl_Flate*>(pipeline);
  507 + if (flate != nullptr)
  508 + {
  509 + flate->setWarnCallback([this](char const* msg, int code) {
  510 + warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
  511 + "", this->offset, msg));
  512 + });
  513 + }
506 } 514 }
507 } 515 }
508 516
qpdf/qtest/qpdf/form-filled-by-acrobat-warn.out 0 → 100644
  1 +WARNING: form-filled-by-acrobat.pdf (offset 56091): input stream is complete but output may still be valid
  2 +WARNING: form-filled-by-acrobat.pdf (offset 56420): input stream is complete but output may still be valid
  3 +WARNING: form-filled-by-acrobat.pdf (offset 56657): input stream is complete but output may still be valid
  4 +qpdf: operation succeeded with warnings; resulting file may have some problems
zlib-flate/qtest/missing-z-finish.in 0 → 100644
No preview for this file type
zlib-flate/qtest/zf.test
@@ -29,4 +29,11 @@ $td-&gt;runtest(&quot;error&quot;, @@ -29,4 +29,11 @@ $td-&gt;runtest(&quot;error&quot;,
29 $td->EXIT_STATUS => 2}, 29 $td->EXIT_STATUS => 2},
30 $td->NORMALIZE_NEWLINES); 30 $td->NORMALIZE_NEWLINES);
31 31
32 -$td->report(7); 32 +$td->runtest("corrupted input",
  33 + {$td->COMMAND => "zlib-flate -uncompress < missing-z-finish.in"},
  34 + {$td->REGEXP =>
  35 + "input stream is complete but output may still be valid",
  36 + $td->EXIT_STATUS => 3},
  37 + $td->NORMALIZE_NEWLINES);
  38 +
  39 +$td->report(8);
zlib-flate/zlib-flate.cc
@@ -76,6 +76,12 @@ int main(int argc, char* argv[]) @@ -76,6 +76,12 @@ int main(int argc, char* argv[])
76 PointerHolder<Pl_StdioFile> out = new Pl_StdioFile("stdout", stdout); 76 PointerHolder<Pl_StdioFile> out = new Pl_StdioFile("stdout", stdout);
77 PointerHolder<Pl_Flate> flate = 77 PointerHolder<Pl_Flate> flate =
78 new Pl_Flate("flate", out.getPointer(), action); 78 new Pl_Flate("flate", out.getPointer(), action);
  79 + bool warn = false;
  80 + flate->setWarnCallback([&warn](char const* msg, int code) {
  81 + warn = true;
  82 + std::cerr << whoami << ": WARNING: zlib code " << code
  83 + << ", msg = " << msg << std::endl;
  84 + });
79 85
80 try 86 try
81 { 87 {
@@ -97,9 +103,13 @@ int main(int argc, char* argv[]) @@ -97,9 +103,13 @@ int main(int argc, char* argv[])
97 } 103 }
98 catch (std::exception& e) 104 catch (std::exception& e)
99 { 105 {
100 - std::cerr << e.what() << std::endl; 106 + std::cerr << whoami << ": " << e.what() << std::endl;
101 exit(2); 107 exit(2);
102 } 108 }
103 109
  110 + if (warn)
  111 + {
  112 + exit(3);
  113 + }
104 return 0; 114 return 0;
105 } 115 }