Commit 532a4f3d60f6981b22beb32e6ff688ec41f87e26
1 parent
c491d9f6
Detect recoverable but invalid zlib data streams (fixes #562)
Showing
8 changed files
with
84 additions
and
5 deletions
ChangeLog
| 1 | 2021-11-02 Jay Berkenbilt <ejb@ql.org> | 1 | 2021-11-02 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | ||
| 3 | + * zlib-flate: warn and exit with code 3 when there is corrupted | ||
| 4 | + input data even when decompression is possible. We do this in the | ||
| 5 | + zlib-flate CLI so that it can be more reliably used to test the | ||
| 6 | + validity of zlib streams, but we don't warn by default in qpdf | ||
| 7 | + itself because PDF files in the wild exist with this problem and | ||
| 8 | + other readers appear to tolerate it. There is a PDF in the qpdf | ||
| 9 | + test suite (form-filled-by-acrobat.pdf) that was written by a | ||
| 10 | + version of Adobe Acrobat that exhibits this problem. Fixes #562. | ||
| 11 | + | ||
| 12 | + * Add Pl_Flate::setWarnCallback to make it possible to be notified | ||
| 13 | + of data errors that are recoverable but still indicate invalid | ||
| 14 | + data. | ||
| 15 | + | ||
| 3 | * Improve error reporting when someone forgets the -- after | 16 | * Improve error reporting when someone forgets the -- after |
| 4 | --pages. Fixes #555. | 17 | --pages. Fixes #555. |
| 5 | 18 |
include/qpdf/Pl_Flate.hh
| @@ -23,6 +23,7 @@ | @@ -23,6 +23,7 @@ | ||
| 23 | #define PL_FLATE_HH | 23 | #define PL_FLATE_HH |
| 24 | 24 | ||
| 25 | #include <qpdf/Pipeline.hh> | 25 | #include <qpdf/Pipeline.hh> |
| 26 | +#include <functional> | ||
| 26 | 27 | ||
| 27 | class Pl_Flate: public Pipeline | 28 | class Pl_Flate: public Pipeline |
| 28 | { | 29 | { |
| @@ -52,9 +53,13 @@ class Pl_Flate: public Pipeline | @@ -52,9 +53,13 @@ class Pl_Flate: public Pipeline | ||
| 52 | QPDF_DLL | 53 | QPDF_DLL |
| 53 | static void setCompressionLevel(int); | 54 | static void setCompressionLevel(int); |
| 54 | 55 | ||
| 56 | + QPDF_DLL | ||
| 57 | + void setWarnCallback(std::function<void(char const*, int)> callback); | ||
| 58 | + | ||
| 55 | private: | 59 | private: |
| 56 | void handleData(unsigned char* data, size_t len, int flush); | 60 | void handleData(unsigned char* data, size_t len, int flush); |
| 57 | void checkError(char const* prefix, int error_code); | 61 | void checkError(char const* prefix, int error_code); |
| 62 | + void warn(char const*, int error_code); | ||
| 58 | 63 | ||
| 59 | class Members | 64 | class Members |
| 60 | { | 65 | { |
| @@ -73,6 +78,7 @@ class Pl_Flate: public Pipeline | @@ -73,6 +78,7 @@ class Pl_Flate: public Pipeline | ||
| 73 | action_e action; | 78 | action_e action; |
| 74 | bool initialized; | 79 | bool initialized; |
| 75 | void* zdata; | 80 | void* zdata; |
| 81 | + std::function<void(char const*, int)> callback; | ||
| 76 | }; | 82 | }; |
| 77 | 83 | ||
| 78 | PointerHolder<Members> m; | 84 | PointerHolder<Members> m; |
libqpdf/Pl_Flate.cc
| @@ -72,6 +72,21 @@ Pl_Flate::~Pl_Flate() | @@ -72,6 +72,21 @@ Pl_Flate::~Pl_Flate() | ||
| 72 | } | 72 | } |
| 73 | 73 | ||
| 74 | void | 74 | void |
| 75 | +Pl_Flate::setWarnCallback(std::function<void(char const*, int)> callback) | ||
| 76 | +{ | ||
| 77 | + this->m->callback = callback; | ||
| 78 | +} | ||
| 79 | + | ||
| 80 | +void | ||
| 81 | +Pl_Flate::warn(char const* msg, int code) | ||
| 82 | +{ | ||
| 83 | + if (this->m->callback != nullptr) | ||
| 84 | + { | ||
| 85 | + this->m->callback(msg, code); | ||
| 86 | + } | ||
| 87 | +} | ||
| 88 | + | ||
| 89 | +void | ||
| 75 | Pl_Flate::write(unsigned char* data, size_t len) | 90 | Pl_Flate::write(unsigned char* data, size_t len) |
| 76 | { | 91 | { |
| 77 | if (this->m->outbuf.getPointer() == 0) | 92 | if (this->m->outbuf.getPointer() == 0) |
| @@ -164,7 +179,14 @@ Pl_Flate::handleData(unsigned char* data, size_t len, int flush) | @@ -164,7 +179,14 @@ Pl_Flate::handleData(unsigned char* data, size_t len, int flush) | ||
| 164 | // Probably shouldn't be able to happen, but possible as a | 179 | // Probably shouldn't be able to happen, but possible as a |
| 165 | // boundary condition: if the last call to inflate exactly | 180 | // boundary condition: if the last call to inflate exactly |
| 166 | // filled the output buffer, it's possible that the next | 181 | // filled the output buffer, it's possible that the next |
| 167 | - // call to inflate could have nothing to do. | 182 | + // call to inflate could have nothing to do. There are PDF |
| 183 | + // files in the wild that have this error (including at | ||
| 184 | + // least one in qpdf's test suite). In some cases, we want | ||
| 185 | + // to know about this, because it indicates incorrect | ||
| 186 | + // compression, so call a callback if provided. | ||
| 187 | + this->warn( | ||
| 188 | + "input stream is complete but output may still be valid", | ||
| 189 | + err); | ||
| 168 | done = true; | 190 | done = true; |
| 169 | break; | 191 | break; |
| 170 | 192 | ||
| @@ -231,8 +253,15 @@ Pl_Flate::finish() | @@ -231,8 +253,15 @@ Pl_Flate::finish() | ||
| 231 | } | 253 | } |
| 232 | catch (std::exception& e) | 254 | catch (std::exception& e) |
| 233 | { | 255 | { |
| 234 | - this->getNext()->finish(); | ||
| 235 | - throw e; | 256 | + try |
| 257 | + { | ||
| 258 | + this->getNext()->finish(); | ||
| 259 | + } | ||
| 260 | + catch (...) | ||
| 261 | + { | ||
| 262 | + // ignore secondary exception | ||
| 263 | + } | ||
| 264 | + throw std::runtime_error(e.what()); | ||
| 236 | } | 265 | } |
| 237 | this->getNext()->finish(); | 266 | this->getNext()->finish(); |
| 238 | } | 267 | } |
libqpdf/QPDF_Stream.cc
| @@ -503,6 +503,14 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp, | @@ -503,6 +503,14 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp, | ||
| 503 | { | 503 | { |
| 504 | pipeline = decode_pipeline; | 504 | pipeline = decode_pipeline; |
| 505 | } | 505 | } |
| 506 | + Pl_Flate* flate = dynamic_cast<Pl_Flate*>(pipeline); | ||
| 507 | + if (flate != nullptr) | ||
| 508 | + { | ||
| 509 | + flate->setWarnCallback([this](char const* msg, int code) { | ||
| 510 | + warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(), | ||
| 511 | + "", this->offset, msg)); | ||
| 512 | + }); | ||
| 513 | + } | ||
| 506 | } | 514 | } |
| 507 | } | 515 | } |
| 508 | 516 |
qpdf/qtest/qpdf/form-filled-by-acrobat-warn.out
0 → 100644
| 1 | +WARNING: form-filled-by-acrobat.pdf (offset 56091): input stream is complete but output may still be valid | ||
| 2 | +WARNING: form-filled-by-acrobat.pdf (offset 56420): input stream is complete but output may still be valid | ||
| 3 | +WARNING: form-filled-by-acrobat.pdf (offset 56657): input stream is complete but output may still be valid | ||
| 4 | +qpdf: operation succeeded with warnings; resulting file may have some problems |
zlib-flate/qtest/missing-z-finish.in
0 → 100644
No preview for this file type
zlib-flate/qtest/zf.test
| @@ -29,4 +29,11 @@ $td->runtest("error", | @@ -29,4 +29,11 @@ $td->runtest("error", | ||
| 29 | $td->EXIT_STATUS => 2}, | 29 | $td->EXIT_STATUS => 2}, |
| 30 | $td->NORMALIZE_NEWLINES); | 30 | $td->NORMALIZE_NEWLINES); |
| 31 | 31 | ||
| 32 | -$td->report(7); | 32 | +$td->runtest("corrupted input", |
| 33 | + {$td->COMMAND => "zlib-flate -uncompress < missing-z-finish.in"}, | ||
| 34 | + {$td->REGEXP => | ||
| 35 | + "input stream is complete but output may still be valid", | ||
| 36 | + $td->EXIT_STATUS => 3}, | ||
| 37 | + $td->NORMALIZE_NEWLINES); | ||
| 38 | + | ||
| 39 | +$td->report(8); |
zlib-flate/zlib-flate.cc
| @@ -76,6 +76,12 @@ int main(int argc, char* argv[]) | @@ -76,6 +76,12 @@ int main(int argc, char* argv[]) | ||
| 76 | PointerHolder<Pl_StdioFile> out = new Pl_StdioFile("stdout", stdout); | 76 | PointerHolder<Pl_StdioFile> out = new Pl_StdioFile("stdout", stdout); |
| 77 | PointerHolder<Pl_Flate> flate = | 77 | PointerHolder<Pl_Flate> flate = |
| 78 | new Pl_Flate("flate", out.getPointer(), action); | 78 | new Pl_Flate("flate", out.getPointer(), action); |
| 79 | + bool warn = false; | ||
| 80 | + flate->setWarnCallback([&warn](char const* msg, int code) { | ||
| 81 | + warn = true; | ||
| 82 | + std::cerr << whoami << ": WARNING: zlib code " << code | ||
| 83 | + << ", msg = " << msg << std::endl; | ||
| 84 | + }); | ||
| 79 | 85 | ||
| 80 | try | 86 | try |
| 81 | { | 87 | { |
| @@ -97,9 +103,13 @@ int main(int argc, char* argv[]) | @@ -97,9 +103,13 @@ int main(int argc, char* argv[]) | ||
| 97 | } | 103 | } |
| 98 | catch (std::exception& e) | 104 | catch (std::exception& e) |
| 99 | { | 105 | { |
| 100 | - std::cerr << e.what() << std::endl; | 106 | + std::cerr << whoami << ": " << e.what() << std::endl; |
| 101 | exit(2); | 107 | exit(2); |
| 102 | } | 108 | } |
| 103 | 109 | ||
| 110 | + if (warn) | ||
| 111 | + { | ||
| 112 | + exit(3); | ||
| 113 | + } | ||
| 104 | return 0; | 114 | return 0; |
| 105 | } | 115 | } |