Commit 94131116a90a076c49e799aa5e4c63ce0ecb0391
1 parent
3356b670
more notes, testing of cleartext metadata, some crypt filter fixes
git-svn-id: svn+q:///qpdf/trunk@823 71b93d88-0707-0410-a8cf-f5a4172ac649
Showing
7 changed files
with
149 additions
and
33 deletions
TODO
| @@ -43,15 +43,6 @@ | @@ -43,15 +43,6 @@ | ||
| 43 | (http://delphi.about.com). .. use at your own risk and for whatever | 43 | (http://delphi.about.com). .. use at your own risk and for whatever |
| 44 | the purpose you want .. no support provided. Sample code provided." | 44 | the purpose you want .. no support provided. Sample code provided." |
| 45 | 45 | ||
| 46 | - * Test cases for metadata: make sure we get uncompressed metadata for | ||
| 47 | - all --stream-data modes unless encrypted. Have check_metadata | ||
| 48 | - function in the test suite that should report whether the metadata | ||
| 49 | - is compressed (by looking at the /Filter key in the stream | ||
| 50 | - dictionary) and tries to extract it filtered to make sure | ||
| 51 | - encryption/decryption works. We should also grep for some string | ||
| 52 | - for encrypted files where it's not supposed to be encrypted to make | ||
| 53 | - sure it's also not compressed. | ||
| 54 | - | ||
| 55 | * R = 4, V = 4 encryption. | 46 | * R = 4, V = 4 encryption. |
| 56 | 47 | ||
| 57 | - Update C API for R4 encryption | 48 | - Update C API for R4 encryption |
| @@ -64,7 +55,7 @@ | @@ -64,7 +55,7 @@ | ||
| 64 | 55 | ||
| 65 | - figure out a way to test crypt filters defined on a stream | 56 | - figure out a way to test crypt filters defined on a stream |
| 66 | 57 | ||
| 67 | - - test extraction of metadata with and without encrypted metadata | 58 | + - test combinations of linearization and v4 encryption |
| 68 | 59 | ||
| 69 | - would be nice to test strings and streams with different | 60 | - would be nice to test strings and streams with different |
| 70 | encryption types, but without sample data, we'd have to write | 61 | encryption types, but without sample data, we'd have to write |
| @@ -115,6 +106,29 @@ | @@ -115,6 +106,29 @@ | ||
| 115 | General | 106 | General |
| 116 | ======= | 107 | ======= |
| 117 | 108 | ||
| 109 | + * Handle embedded files. PDF Reference 1.7 section 3.10, "File | ||
| 110 | + Specifications", discusses this. Once we can definitely recongize | ||
| 111 | + all embedded files in a docucment, we can update the encryption | ||
| 112 | + code to handle it properly. In QPDF_encryption.cc, search for | ||
| 113 | + cf_file. Remove exception thrown if cf_file is different from | ||
| 114 | + cf_stream, and write code in the stream decryption section to use | ||
| 115 | + cf_file instead of cf_stream. In general, add interfaces to | ||
| 116 | + get the list of embedded files and to extract them. To handle | ||
| 117 | + general embedded files associated with the whole document, follow | ||
| 118 | + root -> /Names -> /EmbeddedFiles -> /Names to get to the file | ||
| 119 | + specification dictionaries. Then, in each file specification | ||
| 120 | + dictionary, follow /EF -> /F to the actual stream. | ||
| 121 | + | ||
| 122 | + * The description of Crypt filters is unclear with respect to how to | ||
| 123 | + use them to override /StmF for specific streams. I'm not sure | ||
| 124 | + whether qpdf will do the right thing for any specific individual | ||
| 125 | + streams that might have crypt filters. The specification seems to | ||
| 126 | + imply that only embedded file streams and metadata streams can have | ||
| 127 | + crypt filters, and there are already special cases in the code to | ||
| 128 | + handle those. Most likely, it won't be a problem, but someday | ||
| 129 | + someone may find a file that qpdf doesn't work on because of crypt | ||
| 130 | + filters. | ||
| 131 | + | ||
| 118 | * The second xref stream for linearized files has to be padded only | 132 | * The second xref stream for linearized files has to be padded only |
| 119 | because we need file_size as computed in pass 1 to be accurate. If | 133 | because we need file_size as computed in pass 1 to be accurate. If |
| 120 | we were not allowing writing to a pipe, we could seek back to the | 134 | we were not allowing writing to a pipe, we could seek back to the |
| @@ -150,10 +164,6 @@ General | @@ -150,10 +164,6 @@ General | ||
| 150 | of doing this seems very low since no viewer seems to care, so it's | 164 | of doing this seems very low since no viewer seems to care, so it's |
| 151 | probably not worth it. | 165 | probably not worth it. |
| 152 | 166 | ||
| 153 | - * Embedded file streams: figure out why running qpdf over the pdf 1.7 | ||
| 154 | - spec results in a file that crashes acrobat reader when you try to | ||
| 155 | - save nested documents. | ||
| 156 | - | ||
| 157 | * QPDFObjectHandle::getPageImages() doesn't notice images in | 167 | * QPDFObjectHandle::getPageImages() doesn't notice images in |
| 158 | inherited resource dictionaries. See comments in that function. | 168 | inherited resource dictionaries. See comments in that function. |
| 159 | 169 |
libqpdf/QPDFWriter.cc
| @@ -791,14 +791,15 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, | @@ -791,14 +791,15 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, | ||
| 791 | } | 791 | } |
| 792 | else if (object.isDictionary()) | 792 | else if (object.isDictionary()) |
| 793 | { | 793 | { |
| 794 | - // XXX Must not preserve Crypt filters from original stream | ||
| 795 | - // dictionary | ||
| 796 | writeString("<<"); | 794 | writeString("<<"); |
| 797 | writeStringQDF("\n"); | 795 | writeStringQDF("\n"); |
| 798 | std::set<std::string> keys = object.getKeys(); | 796 | std::set<std::string> keys = object.getKeys(); |
| 799 | for (std::set<std::string>::iterator iter = keys.begin(); | 797 | for (std::set<std::string>::iterator iter = keys.begin(); |
| 800 | iter != keys.end(); ++iter) | 798 | iter != keys.end(); ++iter) |
| 801 | { | 799 | { |
| 800 | + // I'm not fully clear on /Crypt keys in /DecodeParms. If | ||
| 801 | + // one is found, we refuse to filter, so we should be | ||
| 802 | + // safe. | ||
| 802 | std::string const& key = *iter; | 803 | std::string const& key = *iter; |
| 803 | if ((flags & f_filtered) && | 804 | if ((flags & f_filtered) && |
| 804 | ((key == "/Filter") || | 805 | ((key == "/Filter") || |
libqpdf/QPDF_encryption.cc
| @@ -292,6 +292,10 @@ QPDF::interpretCF(QPDFObjectHandle cf) | @@ -292,6 +292,10 @@ QPDF::interpretCF(QPDFObjectHandle cf) | ||
| 292 | { | 292 | { |
| 293 | return this->crypt_filters[filter]; | 293 | return this->crypt_filters[filter]; |
| 294 | } | 294 | } |
| 295 | + else if (filter == "/Identity") | ||
| 296 | + { | ||
| 297 | + return e_none; | ||
| 298 | + } | ||
| 295 | else | 299 | else |
| 296 | { | 300 | { |
| 297 | return e_unknown; | 301 | return e_unknown; |
| @@ -299,6 +303,7 @@ QPDF::interpretCF(QPDFObjectHandle cf) | @@ -299,6 +303,7 @@ QPDF::interpretCF(QPDFObjectHandle cf) | ||
| 299 | } | 303 | } |
| 300 | else | 304 | else |
| 301 | { | 305 | { |
| 306 | + // Default: /Identity | ||
| 302 | return e_none; | 307 | return e_none; |
| 303 | } | 308 | } |
| 304 | } | 309 | } |
| @@ -432,12 +437,12 @@ QPDF::initializeEncryption() | @@ -432,12 +437,12 @@ QPDF::initializeEncryption() | ||
| 432 | std::string method_name = cdict.getKey("/CFM").getName(); | 437 | std::string method_name = cdict.getKey("/CFM").getName(); |
| 433 | if (method_name == "/V2") | 438 | if (method_name == "/V2") |
| 434 | { | 439 | { |
| 435 | - // XXX coverage | 440 | + QTC::TC("qpdf", "QPDF_encryption CFM V2"); |
| 436 | method = e_rc4; | 441 | method = e_rc4; |
| 437 | } | 442 | } |
| 438 | else if (method_name == "/AESV2") | 443 | else if (method_name == "/AESV2") |
| 439 | { | 444 | { |
| 440 | - // XXX coverage | 445 | + QTC::TC("qpdf", "QPDF_encryption CFM AESV2"); |
| 441 | method = e_aes; | 446 | method = e_aes; |
| 442 | } | 447 | } |
| 443 | else | 448 | else |
| @@ -464,6 +469,15 @@ QPDF::initializeEncryption() | @@ -464,6 +469,15 @@ QPDF::initializeEncryption() | ||
| 464 | { | 469 | { |
| 465 | this->cf_file = this->cf_stream; | 470 | this->cf_file = this->cf_stream; |
| 466 | } | 471 | } |
| 472 | + if (this->cf_file != this->cf_stream) | ||
| 473 | + { | ||
| 474 | + throw QPDFExc(this->file.getName(), this->file.getLastOffset(), | ||
| 475 | + "This document has embedded files that are" | ||
| 476 | + " encrypted differently from the rest of the file." | ||
| 477 | + " qpdf does not presently support this due to" | ||
| 478 | + " lack of test data; if possible, please submit" | ||
| 479 | + " a bug report that includes this file."); | ||
| 480 | + } | ||
| 467 | } | 481 | } |
| 468 | EncryptionData data(V, R, Length / 8, P, O, U, id1, this->encrypt_metadata); | 482 | EncryptionData data(V, R, Length / 8, P, O, U, id1, this->encrypt_metadata); |
| 469 | if (check_owner_password( | 483 | if (check_owner_password( |
| @@ -542,7 +556,7 @@ QPDF::decryptString(std::string& str, int objid, int generation) | @@ -542,7 +556,7 @@ QPDF::decryptString(std::string& str, int objid, int generation) | ||
| 542 | std::string key = getKeyForObject(objid, generation, use_aes); | 556 | std::string key = getKeyForObject(objid, generation, use_aes); |
| 543 | if (use_aes) | 557 | if (use_aes) |
| 544 | { | 558 | { |
| 545 | - // XXX coverage | 559 | + QTC::TC("qpdf", "QPDF_encryption aes decode string"); |
| 546 | assert(key.length() == Pl_AES_PDF::key_size); | 560 | assert(key.length() == Pl_AES_PDF::key_size); |
| 547 | Pl_Buffer bufpl("decrypted string"); | 561 | Pl_Buffer bufpl("decrypted string"); |
| 548 | Pl_AES_PDF pl("aes decrypt string", &bufpl, false, | 562 | Pl_AES_PDF pl("aes decrypt string", &bufpl, false, |
| @@ -586,30 +600,33 @@ QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation, | @@ -586,30 +600,33 @@ QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation, | ||
| 586 | encryption_method_e method = e_unknown; | 600 | encryption_method_e method = e_unknown; |
| 587 | std::string method_source = "/StmF from /Encrypt dictionary"; | 601 | std::string method_source = "/StmF from /Encrypt dictionary"; |
| 588 | 602 | ||
| 589 | - if (stream_dict.getKey("/DecodeParms").isDictionary()) | ||
| 590 | - { | ||
| 591 | - QPDFObjectHandle decode_parms = stream_dict.getKey("/DecodeParms"); | ||
| 592 | - if (decode_parms.getKey("/Crypt").isDictionary()) | ||
| 593 | - { | ||
| 594 | - // XXX coverage | ||
| 595 | - QPDFObjectHandle crypt = decode_parms.getKey("/Crypt"); | ||
| 596 | - method = interpretCF(crypt.getKey("/Name")); | ||
| 597 | - method_source = "stream's Crypt decode parameters"; | ||
| 598 | - } | ||
| 599 | - } | 603 | + // NOTE: the section in the PDF specification on crypt filters |
| 604 | + // seems to suggest that there might be a /Crypt key in | ||
| 605 | + // /DecodeParms whose value is a crypt filter (.e.g., << /Name | ||
| 606 | + // /StdCF >>), but implementation notes suggest this can only | ||
| 607 | + // happen for metadata streams, and emperical observation | ||
| 608 | + // suggests that they are otherwise ignored. Not having been | ||
| 609 | + // able to find a sample file that uses crypt filters in any | ||
| 610 | + // way other than /StrF and /StmF, I'm not really sure what to | ||
| 611 | + // do about this. If we were to override the encryption on a | ||
| 612 | + // per-stream basis using crypt filters, set method_source to | ||
| 613 | + // something useful in the error message for unknown | ||
| 614 | + // encryption methods (search for method_source). | ||
| 600 | 615 | ||
| 601 | if (method == e_unknown) | 616 | if (method == e_unknown) |
| 602 | { | 617 | { |
| 603 | if ((! this->encrypt_metadata) && (type == "/Metadata")) | 618 | if ((! this->encrypt_metadata) && (type == "/Metadata")) |
| 604 | { | 619 | { |
| 605 | - // XXX coverage | 620 | + QTC::TC("qpdf", "QPDF_encryption cleartext metadata"); |
| 606 | method = e_none; | 621 | method = e_none; |
| 607 | } | 622 | } |
| 608 | else | 623 | else |
| 609 | { | 624 | { |
| 625 | + // NOTE: We should should use cf_file if this is an | ||
| 626 | + // embedded file, but we can't yet detect embedded | ||
| 627 | + // file streams as such. | ||
| 610 | method = this->cf_stream; | 628 | method = this->cf_stream; |
| 611 | } | 629 | } |
| 612 | - // XXX What about embedded file streams? | ||
| 613 | } | 630 | } |
| 614 | use_aes = false; | 631 | use_aes = false; |
| 615 | switch (method) | 632 | switch (method) |
| @@ -640,7 +657,7 @@ QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation, | @@ -640,7 +657,7 @@ QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation, | ||
| 640 | std::string key = getKeyForObject(objid, generation, use_aes); | 657 | std::string key = getKeyForObject(objid, generation, use_aes); |
| 641 | if (use_aes) | 658 | if (use_aes) |
| 642 | { | 659 | { |
| 643 | - // XXX coverage | 660 | + QTC::TC("qpdf", "QPDF_encryption aes decode stream"); |
| 644 | assert(key.length() == Pl_AES_PDF::key_size); | 661 | assert(key.length() == Pl_AES_PDF::key_size); |
| 645 | pipeline = new Pl_AES_PDF("AES stream decryption", pipeline, | 662 | pipeline = new Pl_AES_PDF("AES stream decryption", pipeline, |
| 646 | false, (unsigned char*) key.c_str()); | 663 | false, (unsigned char*) key.c_str()); |
qpdf/qpdf.testcov
| @@ -161,3 +161,9 @@ qpdf-c called qpdf_init_write multiple times 0 | @@ -161,3 +161,9 @@ qpdf-c called qpdf_init_write multiple times 0 | ||
| 161 | QPDF_encryption rc4 decode string 0 | 161 | QPDF_encryption rc4 decode string 0 |
| 162 | QPDF_encryption rc4 decode stream 0 | 162 | QPDF_encryption rc4 decode stream 0 |
| 163 | QPDFWriter not compressing metadata 0 | 163 | QPDFWriter not compressing metadata 0 |
| 164 | +QPDF_encryption CFM V2 0 | ||
| 165 | +QPDF_encryption CFM AESV2 0 | ||
| 166 | +QPDF_encryption aes decode string 0 | ||
| 167 | +QPDF_encryption cleartext metadata 0 | ||
| 168 | +QPDF_encryption aes decode stream 0 | ||
| 169 | +QPDF_encryption stream crypt filter 0 |
qpdf/qtest/qpdf.test
| @@ -620,6 +620,51 @@ $td->runtest("show-xref-by-id-filtered", | @@ -620,6 +620,51 @@ $td->runtest("show-xref-by-id-filtered", | ||
| 620 | 620 | ||
| 621 | show_ntests(); | 621 | show_ntests(); |
| 622 | # ---------- | 622 | # ---------- |
| 623 | +$td->notify("--- Clear-text Metadata Tests ---"); | ||
| 624 | +$n_tests += 42; | ||
| 625 | + | ||
| 626 | +# args: file, exp_encrypted, exp_cleartext | ||
| 627 | +check_metadata("compressed-metadata.pdf", 0, 0); | ||
| 628 | +check_metadata("enc-base.pdf", 0, 1); | ||
| 629 | + | ||
| 630 | +foreach my $f (qw(compressed-metadata.pdf enc-base.pdf)) | ||
| 631 | +{ | ||
| 632 | + foreach my $w (qw(compress preserve)) | ||
| 633 | + { | ||
| 634 | + $td->runtest("$w streams", | ||
| 635 | + {$td->COMMAND => "qpdf --stream-data=$w $f a.pdf"}, | ||
| 636 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | ||
| 637 | + check_metadata("a.pdf", 0, 1); | ||
| 638 | + $td->runtest("encrypt normally", | ||
| 639 | + {$td->COMMAND => | ||
| 640 | + "qpdf --encrypt '' '' 128 -- a.pdf b.pdf"}, | ||
| 641 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | ||
| 642 | + check_metadata("b.pdf", 1, 0); | ||
| 643 | + unlink "b.pdf"; | ||
| 644 | + $td->runtest("encrypt V4", | ||
| 645 | + {$td->COMMAND => | ||
| 646 | + "qpdf --encrypt '' '' 128 --force-V4 -- a.pdf b.pdf"}, | ||
| 647 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | ||
| 648 | + check_metadata("b.pdf", 1, 0); | ||
| 649 | + unlink "b.pdf"; | ||
| 650 | + $td->runtest("encrypt with cleartext metadata", | ||
| 651 | + {$td->COMMAND => | ||
| 652 | + "qpdf --encrypt '' '' 128 --cleartext-metadata --" . | ||
| 653 | + " a.pdf b.pdf"}, | ||
| 654 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | ||
| 655 | + check_metadata("b.pdf", 1, 1); | ||
| 656 | + unlink "b.pdf"; | ||
| 657 | + $td->runtest("encrypt with aes and cleartext metadata", | ||
| 658 | + {$td->COMMAND => | ||
| 659 | + "qpdf --encrypt '' '' 128" . | ||
| 660 | + " --cleartext-metadata --use-aes=y -- a.pdf b.pdf"}, | ||
| 661 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | ||
| 662 | + check_metadata("b.pdf", 1, 1); | ||
| 663 | + } | ||
| 664 | +} | ||
| 665 | + | ||
| 666 | +show_ntests(); | ||
| 667 | +# ---------- | ||
| 623 | $td->notify("--- Linearization Tests ---"); | 668 | $td->notify("--- Linearization Tests ---"); |
| 624 | # $n_tests incremented after initialization of @linearized_files and | 669 | # $n_tests incremented after initialization of @linearized_files and |
| 625 | # @to_linearize. | 670 | # @to_linearize. |
| @@ -1192,6 +1237,17 @@ sub compare_pdfs | @@ -1192,6 +1237,17 @@ sub compare_pdfs | ||
| 1192 | system("rm -rf tif1 tif2"); | 1237 | system("rm -rf tif1 tif2"); |
| 1193 | } | 1238 | } |
| 1194 | 1239 | ||
| 1240 | +sub check_metadata | ||
| 1241 | +{ | ||
| 1242 | + my ($file, $exp_encrypted, $exp_cleartext) = @_; | ||
| 1243 | + my $out = "encrypted=$exp_encrypted; cleartext=$exp_cleartext\n" . | ||
| 1244 | + "test 6 done\n"; | ||
| 1245 | + $td->runtest("check metadata: $file", | ||
| 1246 | + {$td->COMMAND => "test_driver 6 $file"}, | ||
| 1247 | + {$td->STRING => $out, $td->EXIT_STATUS => 0}, | ||
| 1248 | + $td->NORMALIZE_NEWLINES); | ||
| 1249 | +} | ||
| 1250 | + | ||
| 1195 | sub get_md5_checksum | 1251 | sub get_md5_checksum |
| 1196 | { | 1252 | { |
| 1197 | my $file = shift; | 1253 | my $file = shift; |
qpdf/qtest/qpdf/compressed-metadata.pdf
0 → 100644
No preview for this file type
qpdf/test_driver.cc
| @@ -6,6 +6,7 @@ | @@ -6,6 +6,7 @@ | ||
| 6 | #include <qpdf/QUtil.hh> | 6 | #include <qpdf/QUtil.hh> |
| 7 | #include <qpdf/QTC.hh> | 7 | #include <qpdf/QTC.hh> |
| 8 | #include <qpdf/Pl_StdioFile.hh> | 8 | #include <qpdf/Pl_StdioFile.hh> |
| 9 | +#include <qpdf/Pl_Buffer.hh> | ||
| 9 | #include <qpdf/QPDFWriter.hh> | 10 | #include <qpdf/QPDFWriter.hh> |
| 10 | #include <iostream> | 11 | #include <iostream> |
| 11 | #include <string.h> | 12 | #include <string.h> |
| @@ -282,6 +283,31 @@ void runtest(int n, char const* filename) | @@ -282,6 +283,31 @@ void runtest(int n, char const* filename) | ||
| 282 | } | 283 | } |
| 283 | } | 284 | } |
| 284 | } | 285 | } |
| 286 | + else if (n == 6) | ||
| 287 | + { | ||
| 288 | + QPDFObjectHandle root = pdf.getRoot(); | ||
| 289 | + QPDFObjectHandle metadata = root.getKey("/Metadata"); | ||
| 290 | + if (! metadata.isStream()) | ||
| 291 | + { | ||
| 292 | + throw std::logic_error("test 6 run on file with no metadata"); | ||
| 293 | + } | ||
| 294 | + Pl_Buffer bufpl("buffer"); | ||
| 295 | + metadata.pipeStreamData(&bufpl, false, false, false); | ||
| 296 | + Buffer* buf = bufpl.getBuffer(); | ||
| 297 | + unsigned char const* data = buf->getBuffer(); | ||
| 298 | + bool cleartext = false; | ||
| 299 | + if ((buf->getSize() > 9) && | ||
| 300 | + (strncmp((char const*)data, "<?xpacket", 9) == 0)) | ||
| 301 | + { | ||
| 302 | + cleartext = true; | ||
| 303 | + } | ||
| 304 | + delete buf; | ||
| 305 | + std::cout << "encrypted=" | ||
| 306 | + << (pdf.isEncrypted() ? 1 : 0) | ||
| 307 | + << "; cleartext=" | ||
| 308 | + << (cleartext ? 1 : 0) | ||
| 309 | + << std::endl; | ||
| 310 | + } | ||
| 285 | else | 311 | else |
| 286 | { | 312 | { |
| 287 | throw std::runtime_error(std::string("invalid test ") + | 313 | throw std::runtime_error(std::string("invalid test ") + |