From 8720065c61799978103f2441987f2ac2522c86c6 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 5 Apr 2025 18:02:54 -0400 Subject: [PATCH] Fix logic around cleartext metadata (fixes #1368) --- include/qpdf/QPDF.hh | 3 +++ include/qpdf/QPDFObjectHandle.hh | 4 ++++ libqpdf/QPDF.cc | 16 ++++++++++++---- libqpdf/QPDFWriter.cc | 12 ++++++------ libqpdf/QPDF_Stream.cc | 17 +++++++++++++++++ libqpdf/QPDF_encryption.cc | 3 ++- libqpdf/qpdf/QPDFObjectHandle_private.hh | 1 + libqpdf/qpdf/QPDF_private.hh | 7 +++++-- manual/release-notes.rst | 6 ++++++ qpdf/qtest/cleartext-metadata.test | 13 ++++++++++++- qpdf/qtest/qpdf/compressed-metadata-out-normal.pdf | Bin 0 -> 14895 bytes qpdf/qtest/qpdf/compressed-metadata.pdf | Bin 14007 -> 0 bytes qpdf/test_driver.cc | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------- 13 files changed, 164 insertions(+), 31 deletions(-) create mode 100644 qpdf/qtest/qpdf/compressed-metadata-out-normal.pdf diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 6d6e2fd..96276ca 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -835,6 +835,7 @@ class QPDF qpdf_offset_t offset, size_t length, QPDFObjectHandle dict, + bool is_root_metadata, Pipeline* pipeline, bool suppress_warnings, bool will_retry); @@ -848,6 +849,7 @@ class QPDF qpdf_offset_t offset, size_t length, QPDFObjectHandle dict, + bool is_root_metadata, Pipeline* pipeline, bool suppress_warnings, bool will_retry); @@ -920,6 +922,7 @@ class QPDF Pipeline*& pipeline, QPDFObjGen og, QPDFObjectHandle& stream_dict, + bool is_root_metadata, std::unique_ptr& heap); // Methods to support object copying diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index c7d8d30..4a9aa57 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -1063,6 +1063,10 @@ class QPDFObjectHandle: public qpdf::BaseHandle QPDF_DLL void replaceDict(QPDFObjectHandle const&); + // Test whether a stream is the root XMP /Metadata object of its owning QPDF. + QPDF_DLL + bool isRootMetadata() const; + // REPLACING STREAM DATA // Note about all replaceStreamData methods: whatever values are passed as filter and diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index f6adbc8..c048f75 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -115,13 +115,15 @@ QPDF::ForeignStreamData::ForeignStreamData( QPDFObjGen foreign_og, qpdf_offset_t offset, size_t length, - QPDFObjectHandle local_dict) : + QPDFObjectHandle local_dict, + bool is_root_metadata) : encp(encp), file(file), foreign_og(foreign_og), offset(offset), length(length), - local_dict(local_dict) + local_dict(local_dict), + is_root_metadata(is_root_metadata) { } @@ -740,7 +742,8 @@ QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign) foreign, foreign.getParsedOffset(), stream.getLength(), - dict); + dict, + stream.isRootMetadata()); m->copied_stream_data_provider->registerForeignStream(local_og, foreign_stream_data); result.replaceStreamData( m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms")); @@ -849,13 +852,15 @@ QPDF::pipeStreamData( qpdf_offset_t offset, size_t length, QPDFObjectHandle stream_dict, + bool is_root_metadata, Pipeline* pipeline, bool suppress_warnings, bool will_retry) { std::unique_ptr to_delete; if (encp->encrypted) { - decryptStream(encp, file, qpdf_for_warning, pipeline, og, stream_dict, to_delete); + decryptStream( + encp, file, qpdf_for_warning, pipeline, og, stream_dict, is_root_metadata, to_delete); } bool attempted_finish = false; @@ -911,6 +916,7 @@ QPDF::pipeStreamData( qpdf_offset_t offset, size_t length, QPDFObjectHandle stream_dict, + bool is_root_metadata, Pipeline* pipeline, bool suppress_warnings, bool will_retry) @@ -923,6 +929,7 @@ QPDF::pipeStreamData( offset, length, stream_dict, + is_root_metadata, pipeline, suppress_warnings, will_retry); @@ -946,6 +953,7 @@ QPDF::pipeForeignStreamData( foreign->offset, foreign->length, foreign->local_dict, + foreign->is_root_metadata, pipeline, suppress_warnings, will_retry); diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index c240a59..cf97ea3 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -1247,18 +1247,18 @@ QPDFWriter::writeTrailer( bool QPDFWriter::willFilterStream( QPDFObjectHandle stream, - bool& compress_stream, // out only - bool& is_metadata, // out only + bool& compress_stream, // out only + bool& is_root_metadata, // out only std::string* stream_data) { compress_stream = false; - is_metadata = false; + is_root_metadata = false; QPDFObjGen old_og = stream.getObjGen(); QPDFObjectHandle stream_dict = stream.getDict(); - if (stream_dict.isDictionaryOfType("/Metadata")) { - is_metadata = true; + if (stream.isRootMetadata()) { + is_root_metadata = true; } bool filter = stream.isDataModified() || m->compress_streams || m->stream_decode_level; bool filter_on_write = stream.getFilterOnWrite(); @@ -1280,7 +1280,7 @@ QPDFWriter::willFilterStream( } bool normalize = false; bool uncompress = false; - if (filter_on_write && is_metadata && (!m->encrypted || !m->encrypt_metadata)) { + if (filter_on_write && is_root_metadata && (!m->encrypted || !m->encrypt_metadata)) { QTC::TC("qpdf", "QPDFWriter not compressing metadata"); filter = true; compress_stream = false; diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc index 29e735b..105973f 100644 --- a/libqpdf/QPDF_Stream.cc +++ b/libqpdf/QPDF_Stream.cc @@ -281,6 +281,16 @@ Stream::getRawStreamData() } bool +Stream::isRootMetadata() const +{ + if (!getDict().isDictionaryOfType("/Metadata", "/XML")) { + return false; + } + auto root_metadata = qpdf()->getRoot().getKey("/Metadata"); + return root_metadata.isSameObjectAs(obj); +} + +bool Stream::filterable( std::vector>& filters, bool& specialized_compression, @@ -520,6 +530,7 @@ Stream::pipeStreamData( obj->getParsedOffset(), s->length, s->stream_dict, + isRootMetadata(), pipeline, suppress_warnings, will_retry)) { @@ -626,6 +637,12 @@ QPDFObjectHandle::replaceDict(QPDFObjectHandle const& new_dict) as_stream(error).replaceDict(new_dict); } +bool +QPDFObjectHandle::isRootMetadata() const +{ + return as_stream(error).isRootMetadata(); +} + std::shared_ptr QPDFObjectHandle::getStreamData(qpdf_stream_decode_level_e level) { diff --git a/libqpdf/QPDF_encryption.cc b/libqpdf/QPDF_encryption.cc index 3abfc64..f008dfe 100644 --- a/libqpdf/QPDF_encryption.cc +++ b/libqpdf/QPDF_encryption.cc @@ -1049,6 +1049,7 @@ QPDF::decryptStream( Pipeline*& pipeline, QPDFObjGen og, QPDFObjectHandle& stream_dict, + bool is_root_metadata, std::unique_ptr& decrypt_pipeline) { std::string type; @@ -1094,7 +1095,7 @@ QPDF::decryptStream( } if (method == e_unknown) { - if ((!encp->encrypt_metadata) && (type == "/Metadata")) { + if ((!encp->encrypt_metadata) && is_root_metadata) { QTC::TC("qpdf", "QPDF_encryption cleartext metadata"); method = e_none; } else { diff --git a/libqpdf/qpdf/QPDFObjectHandle_private.hh b/libqpdf/qpdf/QPDFObjectHandle_private.hh index 2473c2b..a26d54f 100644 --- a/libqpdf/qpdf/QPDFObjectHandle_private.hh +++ b/libqpdf/qpdf/QPDFObjectHandle_private.hh @@ -290,6 +290,7 @@ namespace qpdf s->stream_dict = new_dict; setDictDescription(); } + bool isRootMetadata() const; void setDictDescription(); diff --git a/libqpdf/qpdf/QPDF_private.hh b/libqpdf/qpdf/QPDF_private.hh index 54ad4f7..eae4199 100644 --- a/libqpdf/qpdf/QPDF_private.hh +++ b/libqpdf/qpdf/QPDF_private.hh @@ -143,12 +143,13 @@ class QPDF::Pipe qpdf_offset_t offset, size_t length, QPDFObjectHandle dict, + bool is_root_metadata, Pipeline* pipeline, bool suppress_warnings, bool will_retry) { return qpdf->pipeStreamData( - og, offset, length, dict, pipeline, suppress_warnings, will_retry); + og, offset, length, dict, is_root_metadata, pipeline, suppress_warnings, will_retry); } }; @@ -216,7 +217,8 @@ class QPDF::ForeignStreamData QPDFObjGen foreign_og, qpdf_offset_t offset, size_t length, - QPDFObjectHandle local_dict); + QPDFObjectHandle local_dict, + bool is_root_metadata); private: std::shared_ptr encp; @@ -225,6 +227,7 @@ class QPDF::ForeignStreamData qpdf_offset_t offset; size_t length; QPDFObjectHandle local_dict; + bool is_root_metadata{false}; }; class QPDF::CopiedStreamDataProvider: public QPDFObjectHandle::StreamDataProvider diff --git a/manual/release-notes.rst b/manual/release-notes.rst index 2b30b64..168d40f 100644 --- a/manual/release-notes.rst +++ b/manual/release-notes.rst @@ -32,6 +32,12 @@ more detail. - Accept an array for ``rotate`` in qpdf job JSON since it is a repeatable option. + - When reading an encrypted PDF with cleartext metadata, only + expect top-level /Metadata to be clear-text. When writing an + encrypted PDF with cleartext metadata, only leave top-level + unencrypted. qpdf has always incorrectly handled all + ``/Metadata`` streams as special with cleartext metadata. + - CLI Enhancements - New :qpdf:ref:`--remove-structure` option to exclude the document diff --git a/qpdf/qtest/cleartext-metadata.test b/qpdf/qtest/cleartext-metadata.test index 10d1f6a..16b26e3 100644 --- a/qpdf/qtest/cleartext-metadata.test +++ b/qpdf/qtest/cleartext-metadata.test @@ -14,7 +14,7 @@ cleanup(); my $td = new TestDriver('cleartext-metadata'); -my $n_tests = 58; +my $n_tests = 61; # args: file, exp_encrypted, exp_cleartext check_metadata($td, "compressed-metadata.pdf", 0, 0); @@ -68,5 +68,16 @@ foreach my $f (qw(compressed-metadata.pdf enc-base.pdf)) } } +$td->runtest("only root metadata is compressed", + {$td->COMMAND => "qpdf --deterministic-id compressed-metadata.pdf a.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); +$td->runtest("check file", + {$td->COMMAND => "qpdf-test-compare a.pdf compressed-metadata-out-normal.pdf"}, + {$td->FILE => "compressed-metadata-out-normal.pdf", $td->EXIT_STATUS => 0}); +$td->runtest("additional cleartext metadata tests", + {$td->COMMAND => "test_driver 100 compressed-metadata.pdf"}, + {$td->STRING => "test 100 done\n", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + cleanup(); $td->report($n_tests); diff --git a/qpdf/qtest/qpdf/compressed-metadata-out-normal.pdf b/qpdf/qtest/qpdf/compressed-metadata-out-normal.pdf new file mode 100644 index 0000000..77c50b6 Binary files /dev/null and b/qpdf/qtest/qpdf/compressed-metadata-out-normal.pdf differ diff --git a/qpdf/qtest/qpdf/compressed-metadata.pdf b/qpdf/qtest/qpdf/compressed-metadata.pdf index 1d93bb3..1538eef 100644 Binary files a/qpdf/qtest/qpdf/compressed-metadata.pdf and b/qpdf/qtest/qpdf/compressed-metadata.pdf differ diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index 843d576..4cb8882 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -3482,6 +3482,85 @@ test_99(QPDF& pdf, char const* arg2) } } +static void +test_100(QPDF& pdf, char const* arg2) +{ + // Designed for compressed-metadata.pdf + auto is_cleartext = [](QPDFObjectHandle& metadata, bool raw) { + std::string buf; + Pl_String bufpl("buffer", nullptr, buf); + metadata.pipeStreamData(&bufpl, 0, raw ? qpdf_dl_none : qpdf_dl_generalized); + return buf.substr(0, 9) == " bool { + auto buf = metadata.getRawStreamData(); + auto offset = metadata.getParsedOffset(); + auto from_file = raw_bytes.substr(QIntC::to_size(offset), 10); + assert(buf->getSize() > 10); + auto from_buf = std::string(reinterpret_cast(buf->getBuffer()), from_file.size()); + return from_buf == from_file; + }; + + { + QPDFWriter w(pdf, "a.pdf"); + w.setR6EncryptionParameters( + "", "", true, true, true, true, true, true, qpdf_r3p_full, false); + w.write(); + } + { + QPDF encrypted; + encrypted.processFile("a.pdf"); + auto raw_bytes = QUtil::read_file_into_string("a.pdf"); + auto root = encrypted.getRoot(); + auto root_metadata = root.getKey("/Metadata"); + if (!root_metadata.isStream()) { + throw std::logic_error("test 100 run on file with no metadata"); + } + assert(is_cleartext_in_file(raw_bytes, root_metadata)); + assert(is_cleartext(root_metadata, true)); + auto n_pages = QIntC::to_size(root.getKey("/Pages").getKey("/Count").getIntValue()); + auto page = encrypted.getAllPages().at(n_pages - 1); + auto page_metadata = page.getKey("/Metadata"); + if (!page_metadata.isStream()) { + throw std::logic_error("test 100 run on file with no metadata on last page"); + } + // It's encrypted in the file, but you can recover the data, so it is properly decrypted. + assert(!is_cleartext_in_file(raw_bytes, page_metadata)); + assert(!is_cleartext(page_metadata, true)); + assert(is_cleartext(page_metadata, false)); + + // Now copy these metadata objects to the file and write it out again. + auto copied_root_metadata = pdf.copyForeignObject(root_metadata); + auto copied_page_metadata = pdf.copyForeignObject(page_metadata); + pdf.getRoot().replaceKey("/CopiedRootMetadata", copied_root_metadata); + pdf.getRoot().replaceKey("/CopiedPageMetadata", copied_root_metadata); + QPDFWriter w(pdf, "b.pdf"); + w.setR6EncryptionParameters( + "", "", true, true, true, true, true, true, qpdf_r3p_full, false); + w.write(); + } + auto raw_bytes = QUtil::read_file_into_string("b.pdf"); + QPDF updated; + updated.processFile("b.pdf"); + auto root = updated.getRoot(); + auto root_metadata = root.getKey("/Metadata"); + auto n_pages = QIntC::to_size(root.getKey("/Pages").getKey("/Count").getIntValue()); + auto page = updated.getAllPages().at(n_pages - 1); + auto page_metadata = page.getKey("/Metadata"); + auto copied_root = root.getKey("/CopiedRootMetadata"); + auto copied_page = root.getKey("/CopiedPageMetadata"); + // The ultimate root is still clear-text in file + assert(is_cleartext_in_file(raw_bytes, root_metadata)); + assert(is_cleartext(root_metadata, true)); + // Everything else is compressed and encrypted in the file (not handled as special case). + for (auto o: {page_metadata, copied_root, copied_page}) { + assert(!is_cleartext_in_file(raw_bytes, o)); + assert(!is_cleartext(o, true)); + assert(is_cleartext(o, false)); + } +} + void runtest(int n, char const* filename1, char const* arg2) { @@ -3563,23 +3642,23 @@ runtest(int n, char const* filename1, char const* arg2) } std::map test_functions = { - {0, test_0_1}, {1, test_0_1}, {2, test_2}, {3, test_3}, {4, test_4}, {5, test_5}, - {6, test_6}, {7, test_7}, {8, test_8}, {9, test_9}, {10, test_10}, {11, test_11}, - {12, test_12}, {13, test_13}, {14, test_14}, {15, test_15}, {16, test_16}, {17, test_17}, - {18, test_18}, {19, test_19}, {20, test_20}, {21, test_21}, {22, test_22}, {23, test_23}, - {24, test_24}, {25, test_25}, {26, test_26}, {27, test_27}, {28, test_28}, {29, test_29}, - {30, test_30}, {31, test_31}, {32, test_32}, {33, test_33}, {34, test_34}, {35, test_35}, - {36, test_36}, {37, test_37}, {38, test_38}, {39, test_39}, {40, test_40}, {41, test_41}, - {42, test_42}, {43, test_43}, {44, test_44}, {45, test_45}, {46, test_46}, {47, test_47}, - {48, test_48}, {49, test_49}, {50, test_50}, {51, test_51}, {52, test_52}, {53, test_53}, - {54, test_54}, {55, test_55}, {56, test_56}, {57, test_57}, {58, test_58}, {59, test_59}, - {60, test_60}, {61, test_61}, {62, test_62}, {63, test_63}, {64, test_64}, {65, test_65}, - {66, test_66}, {67, test_67}, {68, test_68}, {69, test_69}, {70, test_70}, {71, test_71}, - {72, test_72}, {73, test_73}, {74, test_74}, {75, test_75}, {76, test_76}, {77, test_77}, - {78, test_78}, {79, test_79}, {80, test_80}, {81, test_81}, {82, test_82}, {83, test_83}, - {84, test_84}, {85, test_85}, {86, test_86}, {87, test_87}, {88, test_88}, {89, test_89}, - {90, test_90}, {91, test_91}, {92, test_92}, {93, test_93}, {94, test_94}, {95, test_95}, - {96, test_96}, {97, test_97}, {98, test_98}, {99, test_99}}; + {0, test_0_1}, {1, test_0_1}, {2, test_2}, {3, test_3}, {4, test_4}, {5, test_5}, + {6, test_6}, {7, test_7}, {8, test_8}, {9, test_9}, {10, test_10}, {11, test_11}, + {12, test_12}, {13, test_13}, {14, test_14}, {15, test_15}, {16, test_16}, {17, test_17}, + {18, test_18}, {19, test_19}, {20, test_20}, {21, test_21}, {22, test_22}, {23, test_23}, + {24, test_24}, {25, test_25}, {26, test_26}, {27, test_27}, {28, test_28}, {29, test_29}, + {30, test_30}, {31, test_31}, {32, test_32}, {33, test_33}, {34, test_34}, {35, test_35}, + {36, test_36}, {37, test_37}, {38, test_38}, {39, test_39}, {40, test_40}, {41, test_41}, + {42, test_42}, {43, test_43}, {44, test_44}, {45, test_45}, {46, test_46}, {47, test_47}, + {48, test_48}, {49, test_49}, {50, test_50}, {51, test_51}, {52, test_52}, {53, test_53}, + {54, test_54}, {55, test_55}, {56, test_56}, {57, test_57}, {58, test_58}, {59, test_59}, + {60, test_60}, {61, test_61}, {62, test_62}, {63, test_63}, {64, test_64}, {65, test_65}, + {66, test_66}, {67, test_67}, {68, test_68}, {69, test_69}, {70, test_70}, {71, test_71}, + {72, test_72}, {73, test_73}, {74, test_74}, {75, test_75}, {76, test_76}, {77, test_77}, + {78, test_78}, {79, test_79}, {80, test_80}, {81, test_81}, {82, test_82}, {83, test_83}, + {84, test_84}, {85, test_85}, {86, test_86}, {87, test_87}, {88, test_88}, {89, test_89}, + {90, test_90}, {91, test_91}, {92, test_92}, {93, test_93}, {94, test_94}, {95, test_95}, + {96, test_96}, {97, test_97}, {98, test_98}, {99, test_99}, {100, test_100}}; auto fn = test_functions.find(n); if (fn == test_functions.end()) { -- libgit2 0.21.4