Commit 532a4f3d60f6981b22beb32e6ff688ec41f87e26
1 parent
c491d9f6
Detect recoverable but invalid zlib data streams (fixes #562)
Showing
8 changed files
with
84 additions
and
5 deletions
ChangeLog
| 1 | 1 | 2021-11-02 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | |
| 3 | + * zlib-flate: warn and exit with code 3 when there is corrupted | |
| 4 | + input data even when decompression is possible. We do this in the | |
| 5 | + zlib-flate CLI so that it can be more reliably used to test the | |
| 6 | + validity of zlib streams, but we don't warn by default in qpdf | |
| 7 | + itself because PDF files in the wild exist with this problem and | |
| 8 | + other readers appear to tolerate it. There is a PDF in the qpdf | |
| 9 | + test suite (form-filled-by-acrobat.pdf) that was written by a | |
| 10 | + version of Adobe Acrobat that exhibits this problem. Fixes #562. | |
| 11 | + | |
| 12 | + * Add Pl_Flate::setWarnCallback to make it possible to be notified | |
| 13 | + of data errors that are recoverable but still indicate invalid | |
| 14 | + data. | |
| 15 | + | |
| 3 | 16 | * Improve error reporting when someone forgets the -- after |
| 4 | 17 | --pages. Fixes #555. |
| 5 | 18 | ... | ... |
include/qpdf/Pl_Flate.hh
| ... | ... | @@ -23,6 +23,7 @@ |
| 23 | 23 | #define PL_FLATE_HH |
| 24 | 24 | |
| 25 | 25 | #include <qpdf/Pipeline.hh> |
| 26 | +#include <functional> | |
| 26 | 27 | |
| 27 | 28 | class Pl_Flate: public Pipeline |
| 28 | 29 | { |
| ... | ... | @@ -52,9 +53,13 @@ class Pl_Flate: public Pipeline |
| 52 | 53 | QPDF_DLL |
| 53 | 54 | static void setCompressionLevel(int); |
| 54 | 55 | |
| 56 | + QPDF_DLL | |
| 57 | + void setWarnCallback(std::function<void(char const*, int)> callback); | |
| 58 | + | |
| 55 | 59 | private: |
| 56 | 60 | void handleData(unsigned char* data, size_t len, int flush); |
| 57 | 61 | void checkError(char const* prefix, int error_code); |
| 62 | + void warn(char const*, int error_code); | |
| 58 | 63 | |
| 59 | 64 | class Members |
| 60 | 65 | { |
| ... | ... | @@ -73,6 +78,7 @@ class Pl_Flate: public Pipeline |
| 73 | 78 | action_e action; |
| 74 | 79 | bool initialized; |
| 75 | 80 | void* zdata; |
| 81 | + std::function<void(char const*, int)> callback; | |
| 76 | 82 | }; |
| 77 | 83 | |
| 78 | 84 | PointerHolder<Members> m; | ... | ... |
libqpdf/Pl_Flate.cc
| ... | ... | @@ -72,6 +72,21 @@ Pl_Flate::~Pl_Flate() |
| 72 | 72 | } |
| 73 | 73 | |
| 74 | 74 | void |
| 75 | +Pl_Flate::setWarnCallback(std::function<void(char const*, int)> callback) | |
| 76 | +{ | |
| 77 | + this->m->callback = callback; | |
| 78 | +} | |
| 79 | + | |
| 80 | +void | |
| 81 | +Pl_Flate::warn(char const* msg, int code) | |
| 82 | +{ | |
| 83 | + if (this->m->callback != nullptr) | |
| 84 | + { | |
| 85 | + this->m->callback(msg, code); | |
| 86 | + } | |
| 87 | +} | |
| 88 | + | |
| 89 | +void | |
| 75 | 90 | Pl_Flate::write(unsigned char* data, size_t len) |
| 76 | 91 | { |
| 77 | 92 | if (this->m->outbuf.getPointer() == 0) |
| ... | ... | @@ -164,7 +179,14 @@ Pl_Flate::handleData(unsigned char* data, size_t len, int flush) |
| 164 | 179 | // Probably shouldn't be able to happen, but possible as a |
| 165 | 180 | // boundary condition: if the last call to inflate exactly |
| 166 | 181 | // filled the output buffer, it's possible that the next |
| 167 | - // call to inflate could have nothing to do. | |
| 182 | + // call to inflate could have nothing to do. There are PDF | |
| 183 | + // files in the wild that have this error (including at | |
| 184 | + // least one in qpdf's test suite). In some cases, we want | |
| 185 | + // to know about this, because it indicates incorrect | |
| 186 | + // compression, so call a callback if provided. | |
| 187 | + this->warn( | |
| 188 | + "input stream is complete but output may still be valid", | |
| 189 | + err); | |
| 168 | 190 | done = true; |
| 169 | 191 | break; |
| 170 | 192 | |
| ... | ... | @@ -231,8 +253,15 @@ Pl_Flate::finish() |
| 231 | 253 | } |
| 232 | 254 | catch (std::exception& e) |
| 233 | 255 | { |
| 234 | - this->getNext()->finish(); | |
| 235 | - throw e; | |
| 256 | + try | |
| 257 | + { | |
| 258 | + this->getNext()->finish(); | |
| 259 | + } | |
| 260 | + catch (...) | |
| 261 | + { | |
| 262 | + // ignore secondary exception | |
| 263 | + } | |
| 264 | + throw std::runtime_error(e.what()); | |
| 236 | 265 | } |
| 237 | 266 | this->getNext()->finish(); |
| 238 | 267 | } | ... | ... |
libqpdf/QPDF_Stream.cc
| ... | ... | @@ -503,6 +503,14 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp, |
| 503 | 503 | { |
| 504 | 504 | pipeline = decode_pipeline; |
| 505 | 505 | } |
| 506 | + Pl_Flate* flate = dynamic_cast<Pl_Flate*>(pipeline); | |
| 507 | + if (flate != nullptr) | |
| 508 | + { | |
| 509 | + flate->setWarnCallback([this](char const* msg, int code) { | |
| 510 | + warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(), | |
| 511 | + "", this->offset, msg)); | |
| 512 | + }); | |
| 513 | + } | |
| 506 | 514 | } |
| 507 | 515 | } |
| 508 | 516 | ... | ... |
qpdf/qtest/qpdf/form-filled-by-acrobat-warn.out
0 → 100644
| 1 | +WARNING: form-filled-by-acrobat.pdf (offset 56091): input stream is complete but output may still be valid | |
| 2 | +WARNING: form-filled-by-acrobat.pdf (offset 56420): input stream is complete but output may still be valid | |
| 3 | +WARNING: form-filled-by-acrobat.pdf (offset 56657): input stream is complete but output may still be valid | |
| 4 | +qpdf: operation succeeded with warnings; resulting file may have some problems | ... | ... |
zlib-flate/qtest/missing-z-finish.in
0 → 100644
No preview for this file type
zlib-flate/qtest/zf.test
| ... | ... | @@ -29,4 +29,11 @@ $td->runtest("error", |
| 29 | 29 | $td->EXIT_STATUS => 2}, |
| 30 | 30 | $td->NORMALIZE_NEWLINES); |
| 31 | 31 | |
| 32 | -$td->report(7); | |
| 32 | +$td->runtest("corrupted input", | |
| 33 | + {$td->COMMAND => "zlib-flate -uncompress < missing-z-finish.in"}, | |
| 34 | + {$td->REGEXP => | |
| 35 | + "input stream is complete but output may still be valid", | |
| 36 | + $td->EXIT_STATUS => 3}, | |
| 37 | + $td->NORMALIZE_NEWLINES); | |
| 38 | + | |
| 39 | +$td->report(8); | ... | ... |
zlib-flate/zlib-flate.cc
| ... | ... | @@ -76,6 +76,12 @@ int main(int argc, char* argv[]) |
| 76 | 76 | PointerHolder<Pl_StdioFile> out = new Pl_StdioFile("stdout", stdout); |
| 77 | 77 | PointerHolder<Pl_Flate> flate = |
| 78 | 78 | new Pl_Flate("flate", out.getPointer(), action); |
| 79 | + bool warn = false; | |
| 80 | + flate->setWarnCallback([&warn](char const* msg, int code) { | |
| 81 | + warn = true; | |
| 82 | + std::cerr << whoami << ": WARNING: zlib code " << code | |
| 83 | + << ", msg = " << msg << std::endl; | |
| 84 | + }); | |
| 79 | 85 | |
| 80 | 86 | try |
| 81 | 87 | { |
| ... | ... | @@ -97,9 +103,13 @@ int main(int argc, char* argv[]) |
| 97 | 103 | } |
| 98 | 104 | catch (std::exception& e) |
| 99 | 105 | { |
| 100 | - std::cerr << e.what() << std::endl; | |
| 106 | + std::cerr << whoami << ": " << e.what() << std::endl; | |
| 101 | 107 | exit(2); |
| 102 | 108 | } |
| 103 | 109 | |
| 110 | + if (warn) | |
| 111 | + { | |
| 112 | + exit(3); | |
| 113 | + } | |
| 104 | 114 | return 0; |
| 105 | 115 | } | ... | ... |