Commit 532a4f3d60f6981b22beb32e6ff688ec41f87e26

Authored by Jay Berkenbilt
1 parent c491d9f6

Detect recoverable but invalid zlib data streams (fixes #562)

ChangeLog
1 1 2021-11-02 Jay Berkenbilt <ejb@ql.org>
2 2  
  3 + * zlib-flate: warn and exit with code 3 when there is corrupted
  4 + input data even when decompression is possible. We do this in the
  5 + zlib-flate CLI so that it can be more reliably used to test the
  6 + validity of zlib streams, but we don't warn by default in qpdf
  7 + itself because PDF files in the wild exist with this problem and
  8 + other readers appear to tolerate it. There is a PDF in the qpdf
  9 + test suite (form-filled-by-acrobat.pdf) that was written by a
  10 + version of Adobe Acrobat that exhibits this problem. Fixes #562.
  11 +
  12 + * Add Pl_Flate::setWarnCallback to make it possible to be notified
  13 + of data errors that are recoverable but still indicate invalid
  14 + data.
  15 +
3 16 * Improve error reporting when someone forgets the -- after
4 17 --pages. Fixes #555.
5 18  
... ...
include/qpdf/Pl_Flate.hh
... ... @@ -23,6 +23,7 @@
23 23 #define PL_FLATE_HH
24 24  
25 25 #include <qpdf/Pipeline.hh>
  26 +#include <functional>
26 27  
27 28 class Pl_Flate: public Pipeline
28 29 {
... ... @@ -52,9 +53,13 @@ class Pl_Flate: public Pipeline
52 53 QPDF_DLL
53 54 static void setCompressionLevel(int);
54 55  
  56 + QPDF_DLL
  57 + void setWarnCallback(std::function<void(char const*, int)> callback);
  58 +
55 59 private:
56 60 void handleData(unsigned char* data, size_t len, int flush);
57 61 void checkError(char const* prefix, int error_code);
  62 + void warn(char const*, int error_code);
58 63  
59 64 class Members
60 65 {
... ... @@ -73,6 +78,7 @@ class Pl_Flate: public Pipeline
73 78 action_e action;
74 79 bool initialized;
75 80 void* zdata;
  81 + std::function<void(char const*, int)> callback;
76 82 };
77 83  
78 84 PointerHolder<Members> m;
... ...
libqpdf/Pl_Flate.cc
... ... @@ -72,6 +72,21 @@ Pl_Flate::~Pl_Flate()
72 72 }
73 73  
74 74 void
  75 +Pl_Flate::setWarnCallback(std::function<void(char const*, int)> callback)
  76 +{
  77 + this->m->callback = callback;
  78 +}
  79 +
  80 +void
  81 +Pl_Flate::warn(char const* msg, int code)
  82 +{
  83 + if (this->m->callback != nullptr)
  84 + {
  85 + this->m->callback(msg, code);
  86 + }
  87 +}
  88 +
  89 +void
75 90 Pl_Flate::write(unsigned char* data, size_t len)
76 91 {
77 92 if (this->m->outbuf.getPointer() == 0)
... ... @@ -164,7 +179,14 @@ Pl_Flate::handleData(unsigned char* data, size_t len, int flush)
164 179 // Probably shouldn't be able to happen, but possible as a
165 180 // boundary condition: if the last call to inflate exactly
166 181 // filled the output buffer, it's possible that the next
167   - // call to inflate could have nothing to do.
  182 + // call to inflate could have nothing to do. There are PDF
  183 + // files in the wild that have this error (including at
  184 + // least one in qpdf's test suite). In some cases, we want
  185 + // to know about this, because it indicates incorrect
  186 + // compression, so call a callback if provided.
  187 + this->warn(
  188 + "input stream is complete but output may still be valid",
  189 + err);
168 190 done = true;
169 191 break;
170 192  
... ... @@ -231,8 +253,15 @@ Pl_Flate::finish()
231 253 }
232 254 catch (std::exception& e)
233 255 {
234   - this->getNext()->finish();
235   - throw e;
  256 + try
  257 + {
  258 + this->getNext()->finish();
  259 + }
  260 + catch (...)
  261 + {
  262 + // ignore secondary exception
  263 + }
  264 + throw std::runtime_error(e.what());
236 265 }
237 266 this->getNext()->finish();
238 267 }
... ...
libqpdf/QPDF_Stream.cc
... ... @@ -503,6 +503,14 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp,
503 503 {
504 504 pipeline = decode_pipeline;
505 505 }
  506 + Pl_Flate* flate = dynamic_cast<Pl_Flate*>(pipeline);
  507 + if (flate != nullptr)
  508 + {
  509 + flate->setWarnCallback([this](char const* msg, int code) {
  510 + warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
  511 + "", this->offset, msg));
  512 + });
  513 + }
506 514 }
507 515 }
508 516  
... ...
qpdf/qtest/qpdf/form-filled-by-acrobat-warn.out 0 → 100644
  1 +WARNING: form-filled-by-acrobat.pdf (offset 56091): input stream is complete but output may still be valid
  2 +WARNING: form-filled-by-acrobat.pdf (offset 56420): input stream is complete but output may still be valid
  3 +WARNING: form-filled-by-acrobat.pdf (offset 56657): input stream is complete but output may still be valid
  4 +qpdf: operation succeeded with warnings; resulting file may have some problems
... ...
zlib-flate/qtest/missing-z-finish.in 0 → 100644
No preview for this file type
zlib-flate/qtest/zf.test
... ... @@ -29,4 +29,11 @@ $td-&gt;runtest(&quot;error&quot;,
29 29 $td->EXIT_STATUS => 2},
30 30 $td->NORMALIZE_NEWLINES);
31 31  
32   -$td->report(7);
  32 +$td->runtest("corrupted input",
  33 + {$td->COMMAND => "zlib-flate -uncompress < missing-z-finish.in"},
  34 + {$td->REGEXP =>
  35 + "input stream is complete but output may still be valid",
  36 + $td->EXIT_STATUS => 3},
  37 + $td->NORMALIZE_NEWLINES);
  38 +
  39 +$td->report(8);
... ...
zlib-flate/zlib-flate.cc
... ... @@ -76,6 +76,12 @@ int main(int argc, char* argv[])
76 76 PointerHolder<Pl_StdioFile> out = new Pl_StdioFile("stdout", stdout);
77 77 PointerHolder<Pl_Flate> flate =
78 78 new Pl_Flate("flate", out.getPointer(), action);
  79 + bool warn = false;
  80 + flate->setWarnCallback([&warn](char const* msg, int code) {
  81 + warn = true;
  82 + std::cerr << whoami << ": WARNING: zlib code " << code
  83 + << ", msg = " << msg << std::endl;
  84 + });
79 85  
80 86 try
81 87 {
... ... @@ -97,9 +103,13 @@ int main(int argc, char* argv[])
97 103 }
98 104 catch (std::exception& e)
99 105 {
100   - std::cerr << e.what() << std::endl;
  106 + std::cerr << whoami << ": " << e.what() << std::endl;
101 107 exit(2);
102 108 }
103 109  
  110 + if (warn)
  111 + {
  112 + exit(3);
  113 + }
104 114 return 0;
105 115 }
... ...