Commit 8720065c61799978103f2441987f2ac2522c86c6

Authored by Jay Berkenbilt
1 parent 4927e049

Fix logic around cleartext metadata (fixes #1368)

Only top-level XMP metadata is supposed to be left unencrypted. All
other metadata is not treated specially.
include/qpdf/QPDF.hh
... ... @@ -835,6 +835,7 @@ class QPDF
835 835 qpdf_offset_t offset,
836 836 size_t length,
837 837 QPDFObjectHandle dict,
  838 + bool is_root_metadata,
838 839 Pipeline* pipeline,
839 840 bool suppress_warnings,
840 841 bool will_retry);
... ... @@ -848,6 +849,7 @@ class QPDF
848 849 qpdf_offset_t offset,
849 850 size_t length,
850 851 QPDFObjectHandle dict,
  852 + bool is_root_metadata,
851 853 Pipeline* pipeline,
852 854 bool suppress_warnings,
853 855 bool will_retry);
... ... @@ -920,6 +922,7 @@ class QPDF
920 922 Pipeline*& pipeline,
921 923 QPDFObjGen og,
922 924 QPDFObjectHandle& stream_dict,
  925 + bool is_root_metadata,
923 926 std::unique_ptr<Pipeline>& heap);
924 927  
925 928 // Methods to support object copying
... ...
include/qpdf/QPDFObjectHandle.hh
... ... @@ -1063,6 +1063,10 @@ class QPDFObjectHandle: public qpdf::BaseHandle
1063 1063 QPDF_DLL
1064 1064 void replaceDict(QPDFObjectHandle const&);
1065 1065  
  1066 + // Test whether a stream is the root XMP /Metadata object of its owning QPDF.
  1067 + QPDF_DLL
  1068 + bool isRootMetadata() const;
  1069 +
1066 1070 // REPLACING STREAM DATA
1067 1071  
1068 1072 // Note about all replaceStreamData methods: whatever values are passed as filter and
... ...
libqpdf/QPDF.cc
... ... @@ -115,13 +115,15 @@ QPDF::ForeignStreamData::ForeignStreamData(
115 115 QPDFObjGen foreign_og,
116 116 qpdf_offset_t offset,
117 117 size_t length,
118   - QPDFObjectHandle local_dict) :
  118 + QPDFObjectHandle local_dict,
  119 + bool is_root_metadata) :
119 120 encp(encp),
120 121 file(file),
121 122 foreign_og(foreign_og),
122 123 offset(offset),
123 124 length(length),
124   - local_dict(local_dict)
  125 + local_dict(local_dict),
  126 + is_root_metadata(is_root_metadata)
125 127 {
126 128 }
127 129  
... ... @@ -740,7 +742,8 @@ QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)
740 742 foreign,
741 743 foreign.getParsedOffset(),
742 744 stream.getLength(),
743   - dict);
  745 + dict,
  746 + stream.isRootMetadata());
744 747 m->copied_stream_data_provider->registerForeignStream(local_og, foreign_stream_data);
745 748 result.replaceStreamData(
746 749 m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
... ... @@ -849,13 +852,15 @@ QPDF::pipeStreamData(
849 852 qpdf_offset_t offset,
850 853 size_t length,
851 854 QPDFObjectHandle stream_dict,
  855 + bool is_root_metadata,
852 856 Pipeline* pipeline,
853 857 bool suppress_warnings,
854 858 bool will_retry)
855 859 {
856 860 std::unique_ptr<Pipeline> to_delete;
857 861 if (encp->encrypted) {
858   - decryptStream(encp, file, qpdf_for_warning, pipeline, og, stream_dict, to_delete);
  862 + decryptStream(
  863 + encp, file, qpdf_for_warning, pipeline, og, stream_dict, is_root_metadata, to_delete);
859 864 }
860 865  
861 866 bool attempted_finish = false;
... ... @@ -911,6 +916,7 @@ QPDF::pipeStreamData(
911 916 qpdf_offset_t offset,
912 917 size_t length,
913 918 QPDFObjectHandle stream_dict,
  919 + bool is_root_metadata,
914 920 Pipeline* pipeline,
915 921 bool suppress_warnings,
916 922 bool will_retry)
... ... @@ -923,6 +929,7 @@ QPDF::pipeStreamData(
923 929 offset,
924 930 length,
925 931 stream_dict,
  932 + is_root_metadata,
926 933 pipeline,
927 934 suppress_warnings,
928 935 will_retry);
... ... @@ -946,6 +953,7 @@ QPDF::pipeForeignStreamData(
946 953 foreign->offset,
947 954 foreign->length,
948 955 foreign->local_dict,
  956 + foreign->is_root_metadata,
949 957 pipeline,
950 958 suppress_warnings,
951 959 will_retry);
... ...
libqpdf/QPDFWriter.cc
... ... @@ -1247,18 +1247,18 @@ QPDFWriter::writeTrailer(
1247 1247 bool
1248 1248 QPDFWriter::willFilterStream(
1249 1249 QPDFObjectHandle stream,
1250   - bool& compress_stream, // out only
1251   - bool& is_metadata, // out only
  1250 + bool& compress_stream, // out only
  1251 + bool& is_root_metadata, // out only
1252 1252 std::string* stream_data)
1253 1253 {
1254 1254 compress_stream = false;
1255   - is_metadata = false;
  1255 + is_root_metadata = false;
1256 1256  
1257 1257 QPDFObjGen old_og = stream.getObjGen();
1258 1258 QPDFObjectHandle stream_dict = stream.getDict();
1259 1259  
1260   - if (stream_dict.isDictionaryOfType("/Metadata")) {
1261   - is_metadata = true;
  1260 + if (stream.isRootMetadata()) {
  1261 + is_root_metadata = true;
1262 1262 }
1263 1263 bool filter = stream.isDataModified() || m->compress_streams || m->stream_decode_level;
1264 1264 bool filter_on_write = stream.getFilterOnWrite();
... ... @@ -1280,7 +1280,7 @@ QPDFWriter::willFilterStream(
1280 1280 }
1281 1281 bool normalize = false;
1282 1282 bool uncompress = false;
1283   - if (filter_on_write && is_metadata && (!m->encrypted || !m->encrypt_metadata)) {
  1283 + if (filter_on_write && is_root_metadata && (!m->encrypted || !m->encrypt_metadata)) {
1284 1284 QTC::TC("qpdf", "QPDFWriter not compressing metadata");
1285 1285 filter = true;
1286 1286 compress_stream = false;
... ...
libqpdf/QPDF_Stream.cc
... ... @@ -281,6 +281,16 @@ Stream::getRawStreamData()
281 281 }
282 282  
283 283 bool
  284 +Stream::isRootMetadata() const
  285 +{
  286 + if (!getDict().isDictionaryOfType("/Metadata", "/XML")) {
  287 + return false;
  288 + }
  289 + auto root_metadata = qpdf()->getRoot().getKey("/Metadata");
  290 + return root_metadata.isSameObjectAs(obj);
  291 +}
  292 +
  293 +bool
284 294 Stream::filterable(
285 295 std::vector<std::shared_ptr<QPDFStreamFilter>>& filters,
286 296 bool& specialized_compression,
... ... @@ -520,6 +530,7 @@ Stream::pipeStreamData(
520 530 obj->getParsedOffset(),
521 531 s->length,
522 532 s->stream_dict,
  533 + isRootMetadata(),
523 534 pipeline,
524 535 suppress_warnings,
525 536 will_retry)) {
... ... @@ -626,6 +637,12 @@ QPDFObjectHandle::replaceDict(QPDFObjectHandle const&amp; new_dict)
626 637 as_stream(error).replaceDict(new_dict);
627 638 }
628 639  
  640 +bool
  641 +QPDFObjectHandle::isRootMetadata() const
  642 +{
  643 + return as_stream(error).isRootMetadata();
  644 +}
  645 +
629 646 std::shared_ptr<Buffer>
630 647 QPDFObjectHandle::getStreamData(qpdf_stream_decode_level_e level)
631 648 {
... ...
libqpdf/QPDF_encryption.cc
... ... @@ -1049,6 +1049,7 @@ QPDF::decryptStream(
1049 1049 Pipeline*& pipeline,
1050 1050 QPDFObjGen og,
1051 1051 QPDFObjectHandle& stream_dict,
  1052 + bool is_root_metadata,
1052 1053 std::unique_ptr<Pipeline>& decrypt_pipeline)
1053 1054 {
1054 1055 std::string type;
... ... @@ -1094,7 +1095,7 @@ QPDF::decryptStream(
1094 1095 }
1095 1096  
1096 1097 if (method == e_unknown) {
1097   - if ((!encp->encrypt_metadata) && (type == "/Metadata")) {
  1098 + if ((!encp->encrypt_metadata) && is_root_metadata) {
1098 1099 QTC::TC("qpdf", "QPDF_encryption cleartext metadata");
1099 1100 method = e_none;
1100 1101 } else {
... ...
libqpdf/qpdf/QPDFObjectHandle_private.hh
... ... @@ -290,6 +290,7 @@ namespace qpdf
290 290 s->stream_dict = new_dict;
291 291 setDictDescription();
292 292 }
  293 + bool isRootMetadata() const;
293 294  
294 295 void setDictDescription();
295 296  
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -143,12 +143,13 @@ class QPDF::Pipe
143 143 qpdf_offset_t offset,
144 144 size_t length,
145 145 QPDFObjectHandle dict,
  146 + bool is_root_metadata,
146 147 Pipeline* pipeline,
147 148 bool suppress_warnings,
148 149 bool will_retry)
149 150 {
150 151 return qpdf->pipeStreamData(
151   - og, offset, length, dict, pipeline, suppress_warnings, will_retry);
  152 + og, offset, length, dict, is_root_metadata, pipeline, suppress_warnings, will_retry);
152 153 }
153 154 };
154 155  
... ... @@ -216,7 +217,8 @@ class QPDF::ForeignStreamData
216 217 QPDFObjGen foreign_og,
217 218 qpdf_offset_t offset,
218 219 size_t length,
219   - QPDFObjectHandle local_dict);
  220 + QPDFObjectHandle local_dict,
  221 + bool is_root_metadata);
220 222  
221 223 private:
222 224 std::shared_ptr<EncryptionParameters> encp;
... ... @@ -225,6 +227,7 @@ class QPDF::ForeignStreamData
225 227 qpdf_offset_t offset;
226 228 size_t length;
227 229 QPDFObjectHandle local_dict;
  230 + bool is_root_metadata{false};
228 231 };
229 232  
230 233 class QPDF::CopiedStreamDataProvider: public QPDFObjectHandle::StreamDataProvider
... ...
manual/release-notes.rst
... ... @@ -32,6 +32,12 @@ more detail.
32 32 - Accept an array for ``rotate`` in qpdf job JSON since it is a
33 33 repeatable option.
34 34  
  35 + - When reading an encrypted PDF with cleartext metadata, only
  36 + expect top-level /Metadata to be clear-text. When writing an
  37 + encrypted PDF with cleartext metadata, only leave top-level
  38 + unencrypted. qpdf has always incorrectly handled all
  39 + ``/Metadata`` streams as special with cleartext metadata.
  40 +
35 41 - CLI Enhancements
36 42  
37 43 - New :qpdf:ref:`--remove-structure` option to exclude the document
... ...
qpdf/qtest/cleartext-metadata.test
... ... @@ -14,7 +14,7 @@ cleanup();
14 14  
15 15 my $td = new TestDriver('cleartext-metadata');
16 16  
17   -my $n_tests = 58;
  17 +my $n_tests = 61;
18 18  
19 19 # args: file, exp_encrypted, exp_cleartext
20 20 check_metadata($td, "compressed-metadata.pdf", 0, 0);
... ... @@ -68,5 +68,16 @@ foreach my $f (qw(compressed-metadata.pdf enc-base.pdf))
68 68 }
69 69 }
70 70  
  71 +$td->runtest("only root metadata is compressed",
  72 + {$td->COMMAND => "qpdf --deterministic-id compressed-metadata.pdf a.pdf"},
  73 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  74 +$td->runtest("check file",
  75 + {$td->COMMAND => "qpdf-test-compare a.pdf compressed-metadata-out-normal.pdf"},
  76 + {$td->FILE => "compressed-metadata-out-normal.pdf", $td->EXIT_STATUS => 0});
  77 +$td->runtest("additional cleartext metadata tests",
  78 + {$td->COMMAND => "test_driver 100 compressed-metadata.pdf"},
  79 + {$td->STRING => "test 100 done\n", $td->EXIT_STATUS => 0},
  80 + $td->NORMALIZE_NEWLINES);
  81 +
71 82 cleanup();
72 83 $td->report($n_tests);
... ...
qpdf/qtest/qpdf/compressed-metadata-out-normal.pdf 0 โ†’ 100644
No preview for this file type
qpdf/qtest/qpdf/compressed-metadata.pdf
No preview for this file type
qpdf/test_driver.cc
... ... @@ -3482,6 +3482,85 @@ test_99(QPDF&amp; pdf, char const* arg2)
3482 3482 }
3483 3483 }
3484 3484  
  3485 +static void
  3486 +test_100(QPDF& pdf, char const* arg2)
  3487 +{
  3488 + // Designed for compressed-metadata.pdf
  3489 + auto is_cleartext = [](QPDFObjectHandle& metadata, bool raw) {
  3490 + std::string buf;
  3491 + Pl_String bufpl("buffer", nullptr, buf);
  3492 + metadata.pipeStreamData(&bufpl, 0, raw ? qpdf_dl_none : qpdf_dl_generalized);
  3493 + return buf.substr(0, 9) == "<?xpacket";
  3494 + };
  3495 + auto is_cleartext_in_file = [](std::string const& raw_bytes,
  3496 + QPDFObjectHandle& metadata) -> bool {
  3497 + auto buf = metadata.getRawStreamData();
  3498 + auto offset = metadata.getParsedOffset();
  3499 + auto from_file = raw_bytes.substr(QIntC::to_size(offset), 10);
  3500 + assert(buf->getSize() > 10);
  3501 + auto from_buf = std::string(reinterpret_cast<char*>(buf->getBuffer()), from_file.size());
  3502 + return from_buf == from_file;
  3503 + };
  3504 +
  3505 + {
  3506 + QPDFWriter w(pdf, "a.pdf");
  3507 + w.setR6EncryptionParameters(
  3508 + "", "", true, true, true, true, true, true, qpdf_r3p_full, false);
  3509 + w.write();
  3510 + }
  3511 + {
  3512 + QPDF encrypted;
  3513 + encrypted.processFile("a.pdf");
  3514 + auto raw_bytes = QUtil::read_file_into_string("a.pdf");
  3515 + auto root = encrypted.getRoot();
  3516 + auto root_metadata = root.getKey("/Metadata");
  3517 + if (!root_metadata.isStream()) {
  3518 + throw std::logic_error("test 100 run on file with no metadata");
  3519 + }
  3520 + assert(is_cleartext_in_file(raw_bytes, root_metadata));
  3521 + assert(is_cleartext(root_metadata, true));
  3522 + auto n_pages = QIntC::to_size(root.getKey("/Pages").getKey("/Count").getIntValue());
  3523 + auto page = encrypted.getAllPages().at(n_pages - 1);
  3524 + auto page_metadata = page.getKey("/Metadata");
  3525 + if (!page_metadata.isStream()) {
  3526 + throw std::logic_error("test 100 run on file with no metadata on last page");
  3527 + }
  3528 + // It's encrypted in the file, but you can recover the data, so it is properly decrypted.
  3529 + assert(!is_cleartext_in_file(raw_bytes, page_metadata));
  3530 + assert(!is_cleartext(page_metadata, true));
  3531 + assert(is_cleartext(page_metadata, false));
  3532 +
  3533 + // Now copy these metadata objects to the file and write it out again.
  3534 + auto copied_root_metadata = pdf.copyForeignObject(root_metadata);
  3535 + auto copied_page_metadata = pdf.copyForeignObject(page_metadata);
  3536 + pdf.getRoot().replaceKey("/CopiedRootMetadata", copied_root_metadata);
  3537 + pdf.getRoot().replaceKey("/CopiedPageMetadata", copied_root_metadata);
  3538 + QPDFWriter w(pdf, "b.pdf");
  3539 + w.setR6EncryptionParameters(
  3540 + "", "", true, true, true, true, true, true, qpdf_r3p_full, false);
  3541 + w.write();
  3542 + }
  3543 + auto raw_bytes = QUtil::read_file_into_string("b.pdf");
  3544 + QPDF updated;
  3545 + updated.processFile("b.pdf");
  3546 + auto root = updated.getRoot();
  3547 + auto root_metadata = root.getKey("/Metadata");
  3548 + auto n_pages = QIntC::to_size(root.getKey("/Pages").getKey("/Count").getIntValue());
  3549 + auto page = updated.getAllPages().at(n_pages - 1);
  3550 + auto page_metadata = page.getKey("/Metadata");
  3551 + auto copied_root = root.getKey("/CopiedRootMetadata");
  3552 + auto copied_page = root.getKey("/CopiedPageMetadata");
  3553 + // The ultimate root is still clear-text in file
  3554 + assert(is_cleartext_in_file(raw_bytes, root_metadata));
  3555 + assert(is_cleartext(root_metadata, true));
  3556 + // Everything else is compressed and encrypted in the file (not handled as special case).
  3557 + for (auto o: {page_metadata, copied_root, copied_page}) {
  3558 + assert(!is_cleartext_in_file(raw_bytes, o));
  3559 + assert(!is_cleartext(o, true));
  3560 + assert(is_cleartext(o, false));
  3561 + }
  3562 +}
  3563 +
3485 3564 void
3486 3565 runtest(int n, char const* filename1, char const* arg2)
3487 3566 {
... ... @@ -3563,23 +3642,23 @@ runtest(int n, char const* filename1, char const* arg2)
3563 3642 }
3564 3643  
3565 3644 std::map<int, void (*)(QPDF&, char const*)> test_functions = {
3566   - {0, test_0_1}, {1, test_0_1}, {2, test_2}, {3, test_3}, {4, test_4}, {5, test_5},
3567   - {6, test_6}, {7, test_7}, {8, test_8}, {9, test_9}, {10, test_10}, {11, test_11},
3568   - {12, test_12}, {13, test_13}, {14, test_14}, {15, test_15}, {16, test_16}, {17, test_17},
3569   - {18, test_18}, {19, test_19}, {20, test_20}, {21, test_21}, {22, test_22}, {23, test_23},
3570   - {24, test_24}, {25, test_25}, {26, test_26}, {27, test_27}, {28, test_28}, {29, test_29},
3571   - {30, test_30}, {31, test_31}, {32, test_32}, {33, test_33}, {34, test_34}, {35, test_35},
3572   - {36, test_36}, {37, test_37}, {38, test_38}, {39, test_39}, {40, test_40}, {41, test_41},
3573   - {42, test_42}, {43, test_43}, {44, test_44}, {45, test_45}, {46, test_46}, {47, test_47},
3574   - {48, test_48}, {49, test_49}, {50, test_50}, {51, test_51}, {52, test_52}, {53, test_53},
3575   - {54, test_54}, {55, test_55}, {56, test_56}, {57, test_57}, {58, test_58}, {59, test_59},
3576   - {60, test_60}, {61, test_61}, {62, test_62}, {63, test_63}, {64, test_64}, {65, test_65},
3577   - {66, test_66}, {67, test_67}, {68, test_68}, {69, test_69}, {70, test_70}, {71, test_71},
3578   - {72, test_72}, {73, test_73}, {74, test_74}, {75, test_75}, {76, test_76}, {77, test_77},
3579   - {78, test_78}, {79, test_79}, {80, test_80}, {81, test_81}, {82, test_82}, {83, test_83},
3580   - {84, test_84}, {85, test_85}, {86, test_86}, {87, test_87}, {88, test_88}, {89, test_89},
3581   - {90, test_90}, {91, test_91}, {92, test_92}, {93, test_93}, {94, test_94}, {95, test_95},
3582   - {96, test_96}, {97, test_97}, {98, test_98}, {99, test_99}};
  3645 + {0, test_0_1}, {1, test_0_1}, {2, test_2}, {3, test_3}, {4, test_4}, {5, test_5},
  3646 + {6, test_6}, {7, test_7}, {8, test_8}, {9, test_9}, {10, test_10}, {11, test_11},
  3647 + {12, test_12}, {13, test_13}, {14, test_14}, {15, test_15}, {16, test_16}, {17, test_17},
  3648 + {18, test_18}, {19, test_19}, {20, test_20}, {21, test_21}, {22, test_22}, {23, test_23},
  3649 + {24, test_24}, {25, test_25}, {26, test_26}, {27, test_27}, {28, test_28}, {29, test_29},
  3650 + {30, test_30}, {31, test_31}, {32, test_32}, {33, test_33}, {34, test_34}, {35, test_35},
  3651 + {36, test_36}, {37, test_37}, {38, test_38}, {39, test_39}, {40, test_40}, {41, test_41},
  3652 + {42, test_42}, {43, test_43}, {44, test_44}, {45, test_45}, {46, test_46}, {47, test_47},
  3653 + {48, test_48}, {49, test_49}, {50, test_50}, {51, test_51}, {52, test_52}, {53, test_53},
  3654 + {54, test_54}, {55, test_55}, {56, test_56}, {57, test_57}, {58, test_58}, {59, test_59},
  3655 + {60, test_60}, {61, test_61}, {62, test_62}, {63, test_63}, {64, test_64}, {65, test_65},
  3656 + {66, test_66}, {67, test_67}, {68, test_68}, {69, test_69}, {70, test_70}, {71, test_71},
  3657 + {72, test_72}, {73, test_73}, {74, test_74}, {75, test_75}, {76, test_76}, {77, test_77},
  3658 + {78, test_78}, {79, test_79}, {80, test_80}, {81, test_81}, {82, test_82}, {83, test_83},
  3659 + {84, test_84}, {85, test_85}, {86, test_86}, {87, test_87}, {88, test_88}, {89, test_89},
  3660 + {90, test_90}, {91, test_91}, {92, test_92}, {93, test_93}, {94, test_94}, {95, test_95},
  3661 + {96, test_96}, {97, test_97}, {98, test_98}, {99, test_99}, {100, test_100}};
3583 3662  
3584 3663 auto fn = test_functions.find(n);
3585 3664 if (fn == test_functions.end()) {
... ...