diff --git a/libqpdf/ContentNormalizer.cc b/libqpdf/ContentNormalizer.cc index a092f02..6ec6cf9 100644 --- a/libqpdf/ContentNormalizer.cc +++ b/libqpdf/ContentNormalizer.cc @@ -71,15 +71,3 @@ ContentNormalizer::handleToken(QPDFTokenizer::Token const& token) write("\n"); } } - -bool -ContentNormalizer::anyBadTokens() const -{ - return this->any_bad_tokens; -} - -bool -ContentNormalizer::lastTokenWasBad() const -{ - return this->last_token_was_bad; -} diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc index 105973f..ca718c5 100644 --- a/libqpdf/QPDF_Stream.cc +++ b/libqpdf/QPDF_Stream.cc @@ -27,33 +27,37 @@ using namespace qpdf; namespace { - class SF_Crypt: public QPDFStreamFilter + class SF_Crypt final: public QPDFStreamFilter { public: SF_Crypt() = default; - ~SF_Crypt() override = default; + ~SF_Crypt() final = default; bool - setDecodeParms(QPDFObjectHandle decode_parms) override + setDecodeParms(QPDFObjectHandle decode_parms) final { - if (decode_parms.isNull()) { - return true; - } - bool filterable = true; - for (auto const& key: decode_parms.getKeys()) { - if (((key == "/Type") || (key == "/Name")) && - ((!decode_parms.hasKey("/Type")) || - decode_parms.isDictionaryOfType("/CryptFilterDecodeParms"))) { - // we handle this in decryptStream - } else { - filterable = false; + // we only validate here - processing happens in decryptStream + if (auto dict = decode_parms.as_dictionary(optional)) { + for (auto const& [key, value]: dict) { + if (key == "/Type" && + (value.null() || + (value.isName() && value.getName() == "/CryptFilterDecodeParms"))) { + continue; + } + if (key == "/Name") { + continue; + } + if (!value.null()) { + return false; + } } + return true; } - return filterable; + return false; } Pipeline* - getDecodePipeline(Pipeline*) override + getDecodePipeline(Pipeline*) final { // Not used -- handled by pipeStreamData return nullptr; @@ -78,31 +82,67 @@ namespace Stream stream; qpdf_stream_decode_level_e decode_level; }; + + /// User defined streamfilter factories + std::map()>> filter_factories; } // namespace -std::map Stream::filter_abbreviations = { +std::function()> +QPDF_Stream::Members::filter_factory(std::string const& name) const +{ + if (name == "/FlateDecode") { + return SF_FlateLzwDecode::flate_factory; + } + if (name == "/Crypt") { + return []() { return std::make_shared(); }; + } + if (name == "/LZWDecode") { + return SF_FlateLzwDecode::lzw_factory; + } + if (name == "/RunLengthDecode") { + return SF_RunLengthDecode::factory; + } + if (name == "/DCTDecode") { + return SF_DCTDecode::factory; + } + if (name == "/ASCII85Decode") { + return SF_ASCII85Decode::factory; + } + if (name == "/ASCIIHexDecode") { + return SF_ASCIIHexDecode::factory; + } // The PDF specification provides these filter abbreviations for use in inline images, but // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader also // accepts them for stream filters. - {"/AHx", "/ASCIIHexDecode"}, - {"/A85", "/ASCII85Decode"}, - {"/LZW", "/LZWDecode"}, - {"/Fl", "/FlateDecode"}, - {"/RL", "/RunLengthDecode"}, - {"/CCF", "/CCITTFaxDecode"}, - {"/DCT", "/DCTDecode"}, -}; - -std::map()>> Stream::filter_factories = - { - {"/Crypt", []() { return std::make_shared(); }}, - {"/FlateDecode", SF_FlateLzwDecode::flate_factory}, - {"/LZWDecode", SF_FlateLzwDecode::lzw_factory}, - {"/RunLengthDecode", SF_RunLengthDecode::factory}, - {"/DCTDecode", SF_DCTDecode::factory}, - {"/ASCII85Decode", SF_ASCII85Decode::factory}, - {"/ASCIIHexDecode", SF_ASCIIHexDecode::factory}, -}; + + if (name == "/Fl") { + return SF_FlateLzwDecode::flate_factory; + } + if (name == "/AHx") { + return SF_ASCIIHexDecode::factory; + } + if (name == "/A85") { + return SF_ASCII85Decode::factory; + } + if (name == "/LZW") { + return SF_FlateLzwDecode::lzw_factory; + } + if (name == "/RL") { + return SF_RunLengthDecode::factory; + } + if (name == "/DCT") { + return SF_DCTDecode::factory; + } + if (filter_factories.empty()) { + return nullptr; + } + auto ff = + name == "/CCF" ? filter_factories.find("/CCITTFaxDecode") : filter_factories.find(name); + if (ff == filter_factories.end()) { + return nullptr; + } + return ff->second; +} Stream::Stream( QPDF& qpdf, QPDFObjGen og, QPDFObjectHandle stream_dict, qpdf_offset_t offset, size_t length) : @@ -292,112 +332,88 @@ Stream::isRootMetadata() const bool Stream::filterable( - std::vector>& filters, - bool& specialized_compression, - bool& lossy_compression) + qpdf_stream_decode_level_e decode_level, + std::vector>& filters) { auto s = stream(); // Check filters - QPDFObjectHandle filter_obj = s->stream_dict.getKey("/Filter"); - bool filters_okay = true; - - std::vector filter_names; + auto filter_obj = s->stream_dict.getKey("/Filter"); if (filter_obj.isNull()) { // No filters - } else if (filter_obj.isName()) { + return true; + } + if (filter_obj.isName()) { // One filter - filter_names.push_back(filter_obj.getName()); - } else if (filter_obj.isArray()) { + auto ff = s->filter_factory(filter_obj.getName()); + if (!ff) { + return false; + } + filters.emplace_back(ff()); + } else if (auto array = filter_obj.as_array(strict)) { // Potentially multiple filters - int n = filter_obj.getArrayNItems(); - for (int i = 0; i < n; ++i) { - QPDFObjectHandle item = filter_obj.getArrayItem(i); - if (item.isName()) { - filter_names.push_back(item.getName()); - } else { - filters_okay = false; + for (auto const& item: array) { + if (!item.isName()) { + warn("stream filter type is not name or array"); + return false; } + auto ff = s->filter_factory(item.getName()); + if (!ff) { + filters.clear(); + return false; + } + filters.emplace_back(ff()); } } else { - filters_okay = false; - } - - if (!filters_okay) { - QTC::TC("qpdf", "QPDF_Stream invalid filter"); warn("stream filter type is not name or array"); return false; } - bool filterable = true; + // filters now contains a list of filters to be applied in order. See which ones we can support. + // See if we can support any decode parameters that are specified. - for (auto& filter_name: filter_names) { - if (filter_abbreviations.count(filter_name)) { - QTC::TC("qpdf", "QPDF_Stream expand filter abbreviation"); - filter_name = filter_abbreviations[filter_name]; - } + auto decode_obj = s->stream_dict.getKey("/DecodeParms"); - auto ff = filter_factories.find(filter_name); - if (ff == filter_factories.end()) { - filterable = false; - } else { - filters.push_back((ff->second)()); + auto can_filter = // linebreak + [](auto d_level, auto& filter, auto& d_obj) -> bool { + if (!filter.setDecodeParms(d_obj) || + (d_level < qpdf_dl_all && filter.isLossyCompression()) || + (d_level < qpdf_dl_specialized && filter.isSpecializedCompression())) { + return false; } - } - - if (!filterable) { - return false; - } - - // filters now contains a list of filters to be applied in order. See which ones we can support. + return true; + }; - // See if we can support any decode parameters that are specified. + auto decode_array = decode_obj.as_array(strict); + if (!decode_array || decode_array.size() == 0) { + if (decode_array) { + decode_obj = QPDFObjectHandle::newNull(); + } - QPDFObjectHandle decode_obj = s->stream_dict.getKey("/DecodeParms"); - std::vector decode_parms; - if (decode_obj.isArray() && (decode_obj.getArrayNItems() == 0)) { - decode_obj = QPDFObjectHandle::newNull(); - } - if (decode_obj.isArray()) { - for (int i = 0; i < decode_obj.getArrayNItems(); ++i) { - decode_parms.push_back(decode_obj.getArrayItem(i)); + for (auto& filter: filters) { + if (!can_filter(decode_level, *filter, decode_obj)) { + return false; + } } } else { - for (unsigned int i = 0; i < filter_names.size(); ++i) { - decode_parms.push_back(decode_obj); + // Ignore /DecodeParms entirely if /Filters is empty. At least one case of a file whose + // /DecodeParms was [ << >> ] when /Filters was empty has been seen in the wild. + if (!filters.empty() && QIntC::to_size(decode_array.size()) != filters.size()) { + warn("stream /DecodeParms length is inconsistent with filters"); + return false; } - } - - // Ignore /DecodeParms entirely if /Filters is empty. At least one case of a file whose - // /DecodeParms was [ << >> ] when /Filters was empty has been seen in the wild. - if ((filters.size() != 0) && (decode_parms.size() != filters.size())) { - warn("stream /DecodeParms length is inconsistent with filters"); - filterable = false; - } - - if (!filterable) { - return false; - } - for (size_t i = 0; i < filters.size(); ++i) { - auto filter = filters.at(i); - auto decode_item = decode_parms.at(i); - - if (filter->setDecodeParms(decode_item)) { - if (filter->isSpecializedCompression()) { - specialized_compression = true; - } - if (filter->isLossyCompression()) { - specialized_compression = true; - lossy_compression = true; + int i = -1; + for (auto& filter: filters) { + auto d_obj = decode_array.at(++i).second; + if (!can_filter(decode_level, *filter, d_obj)) { + return false; } - } else { - filterable = false; } } - return filterable; + return true; } bool @@ -411,33 +427,17 @@ Stream::pipeStreamData( { auto s = stream(); std::vector> filters; - bool specialized_compression = false; - bool lossy_compression = false; bool ignored; - if (filterp == nullptr) { + if (!filterp) { filterp = &ignored; } bool& filter = *filterp; - filter = (!((encode_flags == 0) && (decode_level == qpdf_dl_none))); - bool success = true; + filter = encode_flags || decode_level != qpdf_dl_none; if (filter) { - filter = filterable(filters, specialized_compression, lossy_compression); - if ((decode_level < qpdf_dl_all) && lossy_compression) { - filter = false; - } - if ((decode_level < qpdf_dl_specialized) && specialized_compression) { - filter = false; - } - QTC::TC( - "qpdf", - "QPDF_Stream special filters", - (!filter) ? 0 - : lossy_compression ? 1 - : specialized_compression ? 2 - : 3); + filter = filterable(decode_level, filters); } - if (pipeline == nullptr) { + if (!pipeline) { QTC::TC("qpdf", "QPDF_Stream pipeStreamData with null pipeline"); // Return value is whether we can filter in this case. return filter; @@ -446,40 +446,37 @@ Stream::pipeStreamData( // Construct the pipeline in reverse order. Force pipelines we create to be deleted when this // function finishes. Pipelines created by QPDFStreamFilter objects will be deleted by those // objects. - std::vector> to_delete; + std::vector> to_delete; - std::shared_ptr normalizer; - std::shared_ptr new_pipeline; + ContentNormalizer normalizer; if (filter) { if (encode_flags & qpdf_ef_compress) { - new_pipeline = - std::make_shared("compress stream", pipeline, Pl_Flate::a_deflate); - to_delete.push_back(new_pipeline); + auto new_pipeline = + std::make_unique("compress stream", pipeline, Pl_Flate::a_deflate); pipeline = new_pipeline.get(); + to_delete.push_back(std::move(new_pipeline)); } if (encode_flags & qpdf_ef_normalize) { - normalizer = std::make_shared(); - new_pipeline = - std::make_shared("normalizer", normalizer.get(), pipeline); - to_delete.push_back(new_pipeline); + auto new_pipeline = + std::make_unique("normalizer", &normalizer, pipeline); pipeline = new_pipeline.get(); + to_delete.push_back(std::move(new_pipeline)); } for (auto iter = s->token_filters.rbegin(); iter != s->token_filters.rend(); ++iter) { - new_pipeline = - std::make_shared("token filter", (*iter).get(), pipeline); - to_delete.push_back(new_pipeline); + auto new_pipeline = + std::make_unique("token filter", (*iter).get(), pipeline); pipeline = new_pipeline.get(); + to_delete.push_back(std::move(new_pipeline)); } for (auto f_iter = filters.rbegin(); f_iter != filters.rend(); ++f_iter) { - auto decode_pipeline = (*f_iter)->getDecodePipeline(pipeline); - if (decode_pipeline) { + if (auto decode_pipeline = (*f_iter)->getDecodePipeline(pipeline)) { pipeline = decode_pipeline; } auto* flate = dynamic_cast(pipeline); - if (flate != nullptr) { + if (flate) { flate->setWarnCallback([this](char const* msg, int code) { warn(msg); }); } } @@ -495,18 +492,15 @@ Stream::pipeStreamData( if (!s->stream_provider->provideStreamData( obj->getObjGen(), &count, suppress_warnings, will_retry)) { filter = false; - success = false; + return false; } } else { s->stream_provider->provideStreamData(obj->getObjGen(), &count); } qpdf_offset_t actual_length = count.getCount(); - qpdf_offset_t desired_length = 0; - if (success && s->stream_dict.hasKey("/Length")) { - desired_length = s->stream_dict.getKey("/Length").getIntValue(); - if (actual_length == desired_length) { - QTC::TC("qpdf", "QPDF_Stream pipe use stream provider"); - } else { + if (s->stream_dict.hasKey("/Length")) { + auto desired_length = s->stream_dict.getKey("/Length").getIntValue(); + if (actual_length != desired_length) { QTC::TC("qpdf", "QPDF_Stream provider length mismatch"); // This would be caused by programmer error on the part of a library user, not by // invalid input data. @@ -515,14 +509,15 @@ Stream::pipeStreamData( std::to_string(actual_length) + " bytes instead of expected " + std::to_string(desired_length) + " bytes"); } - } else if (success) { + } else { QTC::TC("qpdf", "QPDF_Stream provider length not provided"); s->stream_dict.replaceKey("/Length", QPDFObjectHandle::newInteger(actual_length)); } - } else if (obj->getParsedOffset() == 0) { - QTC::TC("qpdf", "QPDF_Stream pipe no stream data"); - throw std::logic_error("pipeStreamData called for stream with no data"); } else { + if (obj->getParsedOffset() == 0) { + QTC::TC("qpdf", "QPDF_Stream pipe no stream data"); + throw std::logic_error("pipeStreamData called for stream with no data"); + } QTC::TC("qpdf", "QPDF_Stream pipe original stream data"); if (!QPDF::Pipe::pipeStreamData( obj->getQPDF(), @@ -535,13 +530,13 @@ Stream::pipeStreamData( suppress_warnings, will_retry)) { filter = false; - success = false; + return false; } } - if (filter && (!suppress_warnings) && normalizer.get() && normalizer->anyBadTokens()) { + if (filter && !suppress_warnings && normalizer.anyBadTokens()) { warn("content normalization encountered bad tokens"); - if (normalizer->lastTokenWasBad()) { + if (normalizer.lastTokenWasBad()) { QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize"); warn( "normalized content ended with a bad token; you may be able to resolve this by " @@ -554,7 +549,7 @@ Stream::pipeStreamData( "in the manual."); } - return success; + return true; } void diff --git a/libqpdf/SF_FlateLzwDecode.cc b/libqpdf/SF_FlateLzwDecode.cc index 433d585..bd5454d 100644 --- a/libqpdf/SF_FlateLzwDecode.cc +++ b/libqpdf/SF_FlateLzwDecode.cc @@ -69,48 +69,37 @@ SF_FlateLzwDecode::setDecodeParms(QPDFObjectHandle decode_parms) Pipeline* SF_FlateLzwDecode::getDecodePipeline(Pipeline* next) { - std::shared_ptr pipeline; + std::unique_ptr pipeline; if (predictor >= 10 && predictor <= 15) { QTC::TC("qpdf", "SF_FlateLzwDecode PNG filter"); - pipeline = std::make_shared( + pipeline = std::make_unique( "png decode", next, Pl_PNGFilter::a_decode, QIntC::to_uint(columns), QIntC::to_uint(colors), QIntC::to_uint(bits_per_component)); - pipelines.push_back(pipeline); next = pipeline.get(); + pipelines.push_back(std::move(pipeline)); } else if (predictor == 2) { QTC::TC("qpdf", "SF_FlateLzwDecode TIFF predictor"); - pipeline = std::make_shared( + pipeline = std::make_unique( "tiff decode", next, Pl_TIFFPredictor::a_decode, QIntC::to_uint(columns), QIntC::to_uint(colors), QIntC::to_uint(bits_per_component)); - pipelines.push_back(pipeline); next = pipeline.get(); + pipelines.push_back(std::move(pipeline)); } if (lzw) { - pipeline = std::make_shared("lzw decode", next, early_code_change); + pipeline = std::make_unique("lzw decode", next, early_code_change); } else { - pipeline = std::make_shared("stream inflate", next, Pl_Flate::a_inflate); + pipeline = std::make_unique("stream inflate", next, Pl_Flate::a_inflate); } - pipelines.push_back(pipeline); - return pipeline.get(); -} - -std::shared_ptr -SF_FlateLzwDecode::flate_factory() -{ - return std::make_shared(false); -} - -std::shared_ptr -SF_FlateLzwDecode::lzw_factory() -{ - return std::make_shared(true); + next = pipeline.get(); + pipelines.push_back(std::move(pipeline)); + return next; } diff --git a/libqpdf/qpdf/ContentNormalizer.hh b/libqpdf/qpdf/ContentNormalizer.hh index d423aa4..930cf14 100644 --- a/libqpdf/qpdf/ContentNormalizer.hh +++ b/libqpdf/qpdf/ContentNormalizer.hh @@ -3,15 +3,23 @@ #include -class ContentNormalizer: public QPDFObjectHandle::TokenFilter +class ContentNormalizer final: public QPDFObjectHandle::TokenFilter { public: ContentNormalizer(); - ~ContentNormalizer() override = default; - void handleToken(QPDFTokenizer::Token const&) override; + ~ContentNormalizer() final = default; + void handleToken(QPDFTokenizer::Token const&) final; - bool anyBadTokens() const; - bool lastTokenWasBad() const; + bool + anyBadTokens() const + { + return any_bad_tokens; + } + bool + lastTokenWasBad() const + { + return last_token_was_bad; + } private: bool any_bad_tokens; diff --git a/libqpdf/qpdf/QPDFObjectHandle_private.hh b/libqpdf/qpdf/QPDFObjectHandle_private.hh index a26d54f..ddba62f 100644 --- a/libqpdf/qpdf/QPDFObjectHandle_private.hh +++ b/libqpdf/qpdf/QPDFObjectHandle_private.hh @@ -312,17 +312,14 @@ namespace qpdf return nullptr; // unreachable } bool filterable( - std::vector>& filters, - bool& specialized_compression, - bool& lossy_compression); + qpdf_stream_decode_level_e decode_level, + std::vector>& filters); void replaceFilterData( QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms, size_t length); void warn(std::string const& message); static std::map filter_abbreviations; - static std::map()>> - filter_factories; }; template diff --git a/libqpdf/qpdf/QPDFObject_private.hh b/libqpdf/qpdf/QPDFObject_private.hh index 61b8d60..ce68f1f 100644 --- a/libqpdf/qpdf/QPDFObject_private.hh +++ b/libqpdf/qpdf/QPDFObject_private.hh @@ -226,6 +226,8 @@ class QPDF_Stream final std::shared_ptr stream_data; std::shared_ptr stream_provider; std::vector> token_filters; + std::function()> + filter_factory(std::string const& name) const; }; friend class QPDFObject; diff --git a/libqpdf/qpdf/SF_FlateLzwDecode.hh b/libqpdf/qpdf/SF_FlateLzwDecode.hh index 4b70b50..9244adc 100644 --- a/libqpdf/qpdf/SF_FlateLzwDecode.hh +++ b/libqpdf/qpdf/SF_FlateLzwDecode.hh @@ -17,8 +17,16 @@ class SF_FlateLzwDecode final: public QPDFStreamFilter bool setDecodeParms(QPDFObjectHandle decode_parms) final; Pipeline* getDecodePipeline(Pipeline* next) final; - static std::shared_ptr flate_factory(); - static std::shared_ptr lzw_factory(); + static std::shared_ptr + flate_factory() + { + return std::make_shared(false); + } + static std::shared_ptr + lzw_factory() + { + return std::make_shared(true); + } private: bool lzw{}; @@ -28,7 +36,7 @@ class SF_FlateLzwDecode final: public QPDFStreamFilter int colors{1}; int bits_per_component{8}; bool early_code_change{true}; - std::vector> pipelines; + std::vector> pipelines; }; #endif // SF_FLATELZWDECODE_HH diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index b4d2eab..4e90b10 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -77,7 +77,6 @@ QPDFTokenizer bad hexstring 2nd character 0 QPDFTokenizer null in name 0 QPDFTokenizer bad name 1 0 QPDFTokenizer bad name 2 0 -QPDF_Stream invalid filter 0 QPDF UseOutlines but no Outlines 0 QPDFObjectHandle makeDirect loop 0 QPDFObjectHandle copy stream 1 @@ -168,7 +167,6 @@ qpdf-c called qpdf_has_error 0 qpdf-c called qpdf_get_qpdf_version 0 QPDF_Stream pipe original stream data 0 QPDF_Stream pipe replaced stream data 0 -QPDF_Stream pipe use stream provider 0 QPDF_Stream provider length mismatch 0 QPDFObjectHandle newStream 0 QPDFObjectHandle newStream with data 0 @@ -177,7 +175,6 @@ QPDFObjectHandle prepend page contents 0 QPDFObjectHandle append page contents 0 QPDF_Stream getRawStreamData 0 QPDF_Stream getStreamData 0 -QPDF_Stream expand filter abbreviation 0 qpdf-c called qpdf_read_memory 0 QPDF stream without newline 0 QPDF stream with CR only 0 @@ -281,7 +278,6 @@ QPDF ignore second extra space in xref entry 0 QPDF ignore length error xref entry 0 QPDF_encryption pad short parameter 0 QPDFObjectHandle found old angle 1 -QPDF_Stream special filters 3 QPDFTokenizer block long token 0 qpdf-c called qpdf_set_decode_level 0 qpdf-c called qpdf_set_compress_streams 0