Commit 94131116a90a076c49e799aa5e4c63ce0ecb0391
1 parent
3356b670
more notes, testing of cleartext metadata, some crypt filter fixes
git-svn-id: svn+q:///qpdf/trunk@823 71b93d88-0707-0410-a8cf-f5a4172ac649
Showing
7 changed files
with
149 additions
and
33 deletions
TODO
| ... | ... | @@ -43,15 +43,6 @@ |
| 43 | 43 | (http://delphi.about.com). .. use at your own risk and for whatever |
| 44 | 44 | the purpose you want .. no support provided. Sample code provided." |
| 45 | 45 | |
| 46 | - * Test cases for metadata: make sure we get uncompressed metadata for | |
| 47 | - all --stream-data modes unless encrypted. Have check_metadata | |
| 48 | - function in the test suite that should report whether the metadata | |
| 49 | - is compressed (by looking at the /Filter key in the stream | |
| 50 | - dictionary) and tries to extract it filtered to make sure | |
| 51 | - encryption/decryption works. We should also grep for some string | |
| 52 | - for encrypted files where it's not supposed to be encrypted to make | |
| 53 | - sure it's also not compressed. | |
| 54 | - | |
| 55 | 46 | * R = 4, V = 4 encryption. |
| 56 | 47 | |
| 57 | 48 | - Update C API for R4 encryption |
| ... | ... | @@ -64,7 +55,7 @@ |
| 64 | 55 | |
| 65 | 56 | - figure out a way to test crypt filters defined on a stream |
| 66 | 57 | |
| 67 | - - test extraction of metadata with and without encrypted metadata | |
| 58 | + - test combinations of linearization and v4 encryption | |
| 68 | 59 | |
| 69 | 60 | - would be nice to test strings and streams with different |
| 70 | 61 | encryption types, but without sample data, we'd have to write |
| ... | ... | @@ -115,6 +106,29 @@ |
| 115 | 106 | General |
| 116 | 107 | ======= |
| 117 | 108 | |
| 109 | + * Handle embedded files. PDF Reference 1.7 section 3.10, "File | |
| 110 | + Specifications", discusses this. Once we can definitely recongize | |
| 111 | + all embedded files in a docucment, we can update the encryption | |
| 112 | + code to handle it properly. In QPDF_encryption.cc, search for | |
| 113 | + cf_file. Remove exception thrown if cf_file is different from | |
| 114 | + cf_stream, and write code in the stream decryption section to use | |
| 115 | + cf_file instead of cf_stream. In general, add interfaces to | |
| 116 | + get the list of embedded files and to extract them. To handle | |
| 117 | + general embedded files associated with the whole document, follow | |
| 118 | + root -> /Names -> /EmbeddedFiles -> /Names to get to the file | |
| 119 | + specification dictionaries. Then, in each file specification | |
| 120 | + dictionary, follow /EF -> /F to the actual stream. | |
| 121 | + | |
| 122 | + * The description of Crypt filters is unclear with respect to how to | |
| 123 | + use them to override /StmF for specific streams. I'm not sure | |
| 124 | + whether qpdf will do the right thing for any specific individual | |
| 125 | + streams that might have crypt filters. The specification seems to | |
| 126 | + imply that only embedded file streams and metadata streams can have | |
| 127 | + crypt filters, and there are already special cases in the code to | |
| 128 | + handle those. Most likely, it won't be a problem, but someday | |
| 129 | + someone may find a file that qpdf doesn't work on because of crypt | |
| 130 | + filters. | |
| 131 | + | |
| 118 | 132 | * The second xref stream for linearized files has to be padded only |
| 119 | 133 | because we need file_size as computed in pass 1 to be accurate. If |
| 120 | 134 | we were not allowing writing to a pipe, we could seek back to the |
| ... | ... | @@ -150,10 +164,6 @@ General |
| 150 | 164 | of doing this seems very low since no viewer seems to care, so it's |
| 151 | 165 | probably not worth it. |
| 152 | 166 | |
| 153 | - * Embedded file streams: figure out why running qpdf over the pdf 1.7 | |
| 154 | - spec results in a file that crashes acrobat reader when you try to | |
| 155 | - save nested documents. | |
| 156 | - | |
| 157 | 167 | * QPDFObjectHandle::getPageImages() doesn't notice images in |
| 158 | 168 | inherited resource dictionaries. See comments in that function. |
| 159 | 169 | ... | ... |
libqpdf/QPDFWriter.cc
| ... | ... | @@ -791,14 +791,15 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, |
| 791 | 791 | } |
| 792 | 792 | else if (object.isDictionary()) |
| 793 | 793 | { |
| 794 | - // XXX Must not preserve Crypt filters from original stream | |
| 795 | - // dictionary | |
| 796 | 794 | writeString("<<"); |
| 797 | 795 | writeStringQDF("\n"); |
| 798 | 796 | std::set<std::string> keys = object.getKeys(); |
| 799 | 797 | for (std::set<std::string>::iterator iter = keys.begin(); |
| 800 | 798 | iter != keys.end(); ++iter) |
| 801 | 799 | { |
| 800 | + // I'm not fully clear on /Crypt keys in /DecodeParms. If | |
| 801 | + // one is found, we refuse to filter, so we should be | |
| 802 | + // safe. | |
| 802 | 803 | std::string const& key = *iter; |
| 803 | 804 | if ((flags & f_filtered) && |
| 804 | 805 | ((key == "/Filter") || | ... | ... |
libqpdf/QPDF_encryption.cc
| ... | ... | @@ -292,6 +292,10 @@ QPDF::interpretCF(QPDFObjectHandle cf) |
| 292 | 292 | { |
| 293 | 293 | return this->crypt_filters[filter]; |
| 294 | 294 | } |
| 295 | + else if (filter == "/Identity") | |
| 296 | + { | |
| 297 | + return e_none; | |
| 298 | + } | |
| 295 | 299 | else |
| 296 | 300 | { |
| 297 | 301 | return e_unknown; |
| ... | ... | @@ -299,6 +303,7 @@ QPDF::interpretCF(QPDFObjectHandle cf) |
| 299 | 303 | } |
| 300 | 304 | else |
| 301 | 305 | { |
| 306 | + // Default: /Identity | |
| 302 | 307 | return e_none; |
| 303 | 308 | } |
| 304 | 309 | } |
| ... | ... | @@ -432,12 +437,12 @@ QPDF::initializeEncryption() |
| 432 | 437 | std::string method_name = cdict.getKey("/CFM").getName(); |
| 433 | 438 | if (method_name == "/V2") |
| 434 | 439 | { |
| 435 | - // XXX coverage | |
| 440 | + QTC::TC("qpdf", "QPDF_encryption CFM V2"); | |
| 436 | 441 | method = e_rc4; |
| 437 | 442 | } |
| 438 | 443 | else if (method_name == "/AESV2") |
| 439 | 444 | { |
| 440 | - // XXX coverage | |
| 445 | + QTC::TC("qpdf", "QPDF_encryption CFM AESV2"); | |
| 441 | 446 | method = e_aes; |
| 442 | 447 | } |
| 443 | 448 | else |
| ... | ... | @@ -464,6 +469,15 @@ QPDF::initializeEncryption() |
| 464 | 469 | { |
| 465 | 470 | this->cf_file = this->cf_stream; |
| 466 | 471 | } |
| 472 | + if (this->cf_file != this->cf_stream) | |
| 473 | + { | |
| 474 | + throw QPDFExc(this->file.getName(), this->file.getLastOffset(), | |
| 475 | + "This document has embedded files that are" | |
| 476 | + " encrypted differently from the rest of the file." | |
| 477 | + " qpdf does not presently support this due to" | |
| 478 | + " lack of test data; if possible, please submit" | |
| 479 | + " a bug report that includes this file."); | |
| 480 | + } | |
| 467 | 481 | } |
| 468 | 482 | EncryptionData data(V, R, Length / 8, P, O, U, id1, this->encrypt_metadata); |
| 469 | 483 | if (check_owner_password( |
| ... | ... | @@ -542,7 +556,7 @@ QPDF::decryptString(std::string& str, int objid, int generation) |
| 542 | 556 | std::string key = getKeyForObject(objid, generation, use_aes); |
| 543 | 557 | if (use_aes) |
| 544 | 558 | { |
| 545 | - // XXX coverage | |
| 559 | + QTC::TC("qpdf", "QPDF_encryption aes decode string"); | |
| 546 | 560 | assert(key.length() == Pl_AES_PDF::key_size); |
| 547 | 561 | Pl_Buffer bufpl("decrypted string"); |
| 548 | 562 | Pl_AES_PDF pl("aes decrypt string", &bufpl, false, |
| ... | ... | @@ -586,30 +600,33 @@ QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation, |
| 586 | 600 | encryption_method_e method = e_unknown; |
| 587 | 601 | std::string method_source = "/StmF from /Encrypt dictionary"; |
| 588 | 602 | |
| 589 | - if (stream_dict.getKey("/DecodeParms").isDictionary()) | |
| 590 | - { | |
| 591 | - QPDFObjectHandle decode_parms = stream_dict.getKey("/DecodeParms"); | |
| 592 | - if (decode_parms.getKey("/Crypt").isDictionary()) | |
| 593 | - { | |
| 594 | - // XXX coverage | |
| 595 | - QPDFObjectHandle crypt = decode_parms.getKey("/Crypt"); | |
| 596 | - method = interpretCF(crypt.getKey("/Name")); | |
| 597 | - method_source = "stream's Crypt decode parameters"; | |
| 598 | - } | |
| 599 | - } | |
| 603 | + // NOTE: the section in the PDF specification on crypt filters | |
| 604 | + // seems to suggest that there might be a /Crypt key in | |
| 605 | + // /DecodeParms whose value is a crypt filter (.e.g., << /Name | |
| 606 | + // /StdCF >>), but implementation notes suggest this can only | |
| 607 | + // happen for metadata streams, and emperical observation | |
| 608 | + // suggests that they are otherwise ignored. Not having been | |
| 609 | + // able to find a sample file that uses crypt filters in any | |
| 610 | + // way other than /StrF and /StmF, I'm not really sure what to | |
| 611 | + // do about this. If we were to override the encryption on a | |
| 612 | + // per-stream basis using crypt filters, set method_source to | |
| 613 | + // something useful in the error message for unknown | |
| 614 | + // encryption methods (search for method_source). | |
| 600 | 615 | |
| 601 | 616 | if (method == e_unknown) |
| 602 | 617 | { |
| 603 | 618 | if ((! this->encrypt_metadata) && (type == "/Metadata")) |
| 604 | 619 | { |
| 605 | - // XXX coverage | |
| 620 | + QTC::TC("qpdf", "QPDF_encryption cleartext metadata"); | |
| 606 | 621 | method = e_none; |
| 607 | 622 | } |
| 608 | 623 | else |
| 609 | 624 | { |
| 625 | + // NOTE: We should should use cf_file if this is an | |
| 626 | + // embedded file, but we can't yet detect embedded | |
| 627 | + // file streams as such. | |
| 610 | 628 | method = this->cf_stream; |
| 611 | 629 | } |
| 612 | - // XXX What about embedded file streams? | |
| 613 | 630 | } |
| 614 | 631 | use_aes = false; |
| 615 | 632 | switch (method) |
| ... | ... | @@ -640,7 +657,7 @@ QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation, |
| 640 | 657 | std::string key = getKeyForObject(objid, generation, use_aes); |
| 641 | 658 | if (use_aes) |
| 642 | 659 | { |
| 643 | - // XXX coverage | |
| 660 | + QTC::TC("qpdf", "QPDF_encryption aes decode stream"); | |
| 644 | 661 | assert(key.length() == Pl_AES_PDF::key_size); |
| 645 | 662 | pipeline = new Pl_AES_PDF("AES stream decryption", pipeline, |
| 646 | 663 | false, (unsigned char*) key.c_str()); | ... | ... |
qpdf/qpdf.testcov
| ... | ... | @@ -161,3 +161,9 @@ qpdf-c called qpdf_init_write multiple times 0 |
| 161 | 161 | QPDF_encryption rc4 decode string 0 |
| 162 | 162 | QPDF_encryption rc4 decode stream 0 |
| 163 | 163 | QPDFWriter not compressing metadata 0 |
| 164 | +QPDF_encryption CFM V2 0 | |
| 165 | +QPDF_encryption CFM AESV2 0 | |
| 166 | +QPDF_encryption aes decode string 0 | |
| 167 | +QPDF_encryption cleartext metadata 0 | |
| 168 | +QPDF_encryption aes decode stream 0 | |
| 169 | +QPDF_encryption stream crypt filter 0 | ... | ... |
qpdf/qtest/qpdf.test
| ... | ... | @@ -620,6 +620,51 @@ $td->runtest("show-xref-by-id-filtered", |
| 620 | 620 | |
| 621 | 621 | show_ntests(); |
| 622 | 622 | # ---------- |
| 623 | +$td->notify("--- Clear-text Metadata Tests ---"); | |
| 624 | +$n_tests += 42; | |
| 625 | + | |
| 626 | +# args: file, exp_encrypted, exp_cleartext | |
| 627 | +check_metadata("compressed-metadata.pdf", 0, 0); | |
| 628 | +check_metadata("enc-base.pdf", 0, 1); | |
| 629 | + | |
| 630 | +foreach my $f (qw(compressed-metadata.pdf enc-base.pdf)) | |
| 631 | +{ | |
| 632 | + foreach my $w (qw(compress preserve)) | |
| 633 | + { | |
| 634 | + $td->runtest("$w streams", | |
| 635 | + {$td->COMMAND => "qpdf --stream-data=$w $f a.pdf"}, | |
| 636 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | |
| 637 | + check_metadata("a.pdf", 0, 1); | |
| 638 | + $td->runtest("encrypt normally", | |
| 639 | + {$td->COMMAND => | |
| 640 | + "qpdf --encrypt '' '' 128 -- a.pdf b.pdf"}, | |
| 641 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | |
| 642 | + check_metadata("b.pdf", 1, 0); | |
| 643 | + unlink "b.pdf"; | |
| 644 | + $td->runtest("encrypt V4", | |
| 645 | + {$td->COMMAND => | |
| 646 | + "qpdf --encrypt '' '' 128 --force-V4 -- a.pdf b.pdf"}, | |
| 647 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | |
| 648 | + check_metadata("b.pdf", 1, 0); | |
| 649 | + unlink "b.pdf"; | |
| 650 | + $td->runtest("encrypt with cleartext metadata", | |
| 651 | + {$td->COMMAND => | |
| 652 | + "qpdf --encrypt '' '' 128 --cleartext-metadata --" . | |
| 653 | + " a.pdf b.pdf"}, | |
| 654 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | |
| 655 | + check_metadata("b.pdf", 1, 1); | |
| 656 | + unlink "b.pdf"; | |
| 657 | + $td->runtest("encrypt with aes and cleartext metadata", | |
| 658 | + {$td->COMMAND => | |
| 659 | + "qpdf --encrypt '' '' 128" . | |
| 660 | + " --cleartext-metadata --use-aes=y -- a.pdf b.pdf"}, | |
| 661 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | |
| 662 | + check_metadata("b.pdf", 1, 1); | |
| 663 | + } | |
| 664 | +} | |
| 665 | + | |
| 666 | +show_ntests(); | |
| 667 | +# ---------- | |
| 623 | 668 | $td->notify("--- Linearization Tests ---"); |
| 624 | 669 | # $n_tests incremented after initialization of @linearized_files and |
| 625 | 670 | # @to_linearize. |
| ... | ... | @@ -1192,6 +1237,17 @@ sub compare_pdfs |
| 1192 | 1237 | system("rm -rf tif1 tif2"); |
| 1193 | 1238 | } |
| 1194 | 1239 | |
| 1240 | +sub check_metadata | |
| 1241 | +{ | |
| 1242 | + my ($file, $exp_encrypted, $exp_cleartext) = @_; | |
| 1243 | + my $out = "encrypted=$exp_encrypted; cleartext=$exp_cleartext\n" . | |
| 1244 | + "test 6 done\n"; | |
| 1245 | + $td->runtest("check metadata: $file", | |
| 1246 | + {$td->COMMAND => "test_driver 6 $file"}, | |
| 1247 | + {$td->STRING => $out, $td->EXIT_STATUS => 0}, | |
| 1248 | + $td->NORMALIZE_NEWLINES); | |
| 1249 | +} | |
| 1250 | + | |
| 1195 | 1251 | sub get_md5_checksum |
| 1196 | 1252 | { |
| 1197 | 1253 | my $file = shift; | ... | ... |
qpdf/qtest/qpdf/compressed-metadata.pdf
0 → 100644
No preview for this file type
qpdf/test_driver.cc
| ... | ... | @@ -6,6 +6,7 @@ |
| 6 | 6 | #include <qpdf/QUtil.hh> |
| 7 | 7 | #include <qpdf/QTC.hh> |
| 8 | 8 | #include <qpdf/Pl_StdioFile.hh> |
| 9 | +#include <qpdf/Pl_Buffer.hh> | |
| 9 | 10 | #include <qpdf/QPDFWriter.hh> |
| 10 | 11 | #include <iostream> |
| 11 | 12 | #include <string.h> |
| ... | ... | @@ -282,6 +283,31 @@ void runtest(int n, char const* filename) |
| 282 | 283 | } |
| 283 | 284 | } |
| 284 | 285 | } |
| 286 | + else if (n == 6) | |
| 287 | + { | |
| 288 | + QPDFObjectHandle root = pdf.getRoot(); | |
| 289 | + QPDFObjectHandle metadata = root.getKey("/Metadata"); | |
| 290 | + if (! metadata.isStream()) | |
| 291 | + { | |
| 292 | + throw std::logic_error("test 6 run on file with no metadata"); | |
| 293 | + } | |
| 294 | + Pl_Buffer bufpl("buffer"); | |
| 295 | + metadata.pipeStreamData(&bufpl, false, false, false); | |
| 296 | + Buffer* buf = bufpl.getBuffer(); | |
| 297 | + unsigned char const* data = buf->getBuffer(); | |
| 298 | + bool cleartext = false; | |
| 299 | + if ((buf->getSize() > 9) && | |
| 300 | + (strncmp((char const*)data, "<?xpacket", 9) == 0)) | |
| 301 | + { | |
| 302 | + cleartext = true; | |
| 303 | + } | |
| 304 | + delete buf; | |
| 305 | + std::cout << "encrypted=" | |
| 306 | + << (pdf.isEncrypted() ? 1 : 0) | |
| 307 | + << "; cleartext=" | |
| 308 | + << (cleartext ? 1 : 0) | |
| 309 | + << std::endl; | |
| 310 | + } | |
| 285 | 311 | else |
| 286 | 312 | { |
| 287 | 313 | throw std::runtime_error(std::string("invalid test ") + | ... | ... |