Commit 94131116a90a076c49e799aa5e4c63ce0ecb0391

Authored by Jay Berkenbilt
1 parent 3356b670

more notes, testing of cleartext metadata, some crypt filter fixes

git-svn-id: svn+q:///qpdf/trunk@823 71b93d88-0707-0410-a8cf-f5a4172ac649
... ... @@ -43,15 +43,6 @@
43 43 (http://delphi.about.com). .. use at your own risk and for whatever
44 44 the purpose you want .. no support provided. Sample code provided."
45 45  
46   - * Test cases for metadata: make sure we get uncompressed metadata for
47   - all --stream-data modes unless encrypted. Have check_metadata
48   - function in the test suite that should report whether the metadata
49   - is compressed (by looking at the /Filter key in the stream
50   - dictionary) and tries to extract it filtered to make sure
51   - encryption/decryption works. We should also grep for some string
52   - for encrypted files where it's not supposed to be encrypted to make
53   - sure it's also not compressed.
54   -
55 46 * R = 4, V = 4 encryption.
56 47  
57 48 - Update C API for R4 encryption
... ... @@ -64,7 +55,7 @@
64 55  
65 56 - figure out a way to test crypt filters defined on a stream
66 57  
67   - - test extraction of metadata with and without encrypted metadata
  58 + - test combinations of linearization and v4 encryption
68 59  
69 60 - would be nice to test strings and streams with different
70 61 encryption types, but without sample data, we'd have to write
... ... @@ -115,6 +106,29 @@
115 106 General
116 107 =======
117 108  
  109 + * Handle embedded files. PDF Reference 1.7 section 3.10, "File
  110 + Specifications", discusses this. Once we can definitely recongize
  111 + all embedded files in a docucment, we can update the encryption
  112 + code to handle it properly. In QPDF_encryption.cc, search for
  113 + cf_file. Remove exception thrown if cf_file is different from
  114 + cf_stream, and write code in the stream decryption section to use
  115 + cf_file instead of cf_stream. In general, add interfaces to
  116 + get the list of embedded files and to extract them. To handle
  117 + general embedded files associated with the whole document, follow
  118 + root -> /Names -> /EmbeddedFiles -> /Names to get to the file
  119 + specification dictionaries. Then, in each file specification
  120 + dictionary, follow /EF -> /F to the actual stream.
  121 +
  122 + * The description of Crypt filters is unclear with respect to how to
  123 + use them to override /StmF for specific streams. I'm not sure
  124 + whether qpdf will do the right thing for any specific individual
  125 + streams that might have crypt filters. The specification seems to
  126 + imply that only embedded file streams and metadata streams can have
  127 + crypt filters, and there are already special cases in the code to
  128 + handle those. Most likely, it won't be a problem, but someday
  129 + someone may find a file that qpdf doesn't work on because of crypt
  130 + filters.
  131 +
118 132 * The second xref stream for linearized files has to be padded only
119 133 because we need file_size as computed in pass 1 to be accurate. If
120 134 we were not allowing writing to a pipe, we could seek back to the
... ... @@ -150,10 +164,6 @@ General
150 164 of doing this seems very low since no viewer seems to care, so it's
151 165 probably not worth it.
152 166  
153   - * Embedded file streams: figure out why running qpdf over the pdf 1.7
154   - spec results in a file that crashes acrobat reader when you try to
155   - save nested documents.
156   -
157 167 * QPDFObjectHandle::getPageImages() doesn't notice images in
158 168 inherited resource dictionaries. See comments in that function.
159 169  
... ...
libqpdf/QPDFWriter.cc
... ... @@ -791,14 +791,15 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
791 791 }
792 792 else if (object.isDictionary())
793 793 {
794   - // XXX Must not preserve Crypt filters from original stream
795   - // dictionary
796 794 writeString("<<");
797 795 writeStringQDF("\n");
798 796 std::set<std::string> keys = object.getKeys();
799 797 for (std::set<std::string>::iterator iter = keys.begin();
800 798 iter != keys.end(); ++iter)
801 799 {
  800 + // I'm not fully clear on /Crypt keys in /DecodeParms. If
  801 + // one is found, we refuse to filter, so we should be
  802 + // safe.
802 803 std::string const& key = *iter;
803 804 if ((flags & f_filtered) &&
804 805 ((key == "/Filter") ||
... ...
libqpdf/QPDF_encryption.cc
... ... @@ -292,6 +292,10 @@ QPDF::interpretCF(QPDFObjectHandle cf)
292 292 {
293 293 return this->crypt_filters[filter];
294 294 }
  295 + else if (filter == "/Identity")
  296 + {
  297 + return e_none;
  298 + }
295 299 else
296 300 {
297 301 return e_unknown;
... ... @@ -299,6 +303,7 @@ QPDF::interpretCF(QPDFObjectHandle cf)
299 303 }
300 304 else
301 305 {
  306 + // Default: /Identity
302 307 return e_none;
303 308 }
304 309 }
... ... @@ -432,12 +437,12 @@ QPDF::initializeEncryption()
432 437 std::string method_name = cdict.getKey("/CFM").getName();
433 438 if (method_name == "/V2")
434 439 {
435   - // XXX coverage
  440 + QTC::TC("qpdf", "QPDF_encryption CFM V2");
436 441 method = e_rc4;
437 442 }
438 443 else if (method_name == "/AESV2")
439 444 {
440   - // XXX coverage
  445 + QTC::TC("qpdf", "QPDF_encryption CFM AESV2");
441 446 method = e_aes;
442 447 }
443 448 else
... ... @@ -464,6 +469,15 @@ QPDF::initializeEncryption()
464 469 {
465 470 this->cf_file = this->cf_stream;
466 471 }
  472 + if (this->cf_file != this->cf_stream)
  473 + {
  474 + throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
  475 + "This document has embedded files that are"
  476 + " encrypted differently from the rest of the file."
  477 + " qpdf does not presently support this due to"
  478 + " lack of test data; if possible, please submit"
  479 + " a bug report that includes this file.");
  480 + }
467 481 }
468 482 EncryptionData data(V, R, Length / 8, P, O, U, id1, this->encrypt_metadata);
469 483 if (check_owner_password(
... ... @@ -542,7 +556,7 @@ QPDF::decryptString(std::string&amp; str, int objid, int generation)
542 556 std::string key = getKeyForObject(objid, generation, use_aes);
543 557 if (use_aes)
544 558 {
545   - // XXX coverage
  559 + QTC::TC("qpdf", "QPDF_encryption aes decode string");
546 560 assert(key.length() == Pl_AES_PDF::key_size);
547 561 Pl_Buffer bufpl("decrypted string");
548 562 Pl_AES_PDF pl("aes decrypt string", &bufpl, false,
... ... @@ -586,30 +600,33 @@ QPDF::decryptStream(Pipeline*&amp; pipeline, int objid, int generation,
586 600 encryption_method_e method = e_unknown;
587 601 std::string method_source = "/StmF from /Encrypt dictionary";
588 602  
589   - if (stream_dict.getKey("/DecodeParms").isDictionary())
590   - {
591   - QPDFObjectHandle decode_parms = stream_dict.getKey("/DecodeParms");
592   - if (decode_parms.getKey("/Crypt").isDictionary())
593   - {
594   - // XXX coverage
595   - QPDFObjectHandle crypt = decode_parms.getKey("/Crypt");
596   - method = interpretCF(crypt.getKey("/Name"));
597   - method_source = "stream's Crypt decode parameters";
598   - }
599   - }
  603 + // NOTE: the section in the PDF specification on crypt filters
  604 + // seems to suggest that there might be a /Crypt key in
  605 + // /DecodeParms whose value is a crypt filter (.e.g., << /Name
  606 + // /StdCF >>), but implementation notes suggest this can only
  607 + // happen for metadata streams, and emperical observation
  608 + // suggests that they are otherwise ignored. Not having been
  609 + // able to find a sample file that uses crypt filters in any
  610 + // way other than /StrF and /StmF, I'm not really sure what to
  611 + // do about this. If we were to override the encryption on a
  612 + // per-stream basis using crypt filters, set method_source to
  613 + // something useful in the error message for unknown
  614 + // encryption methods (search for method_source).
600 615  
601 616 if (method == e_unknown)
602 617 {
603 618 if ((! this->encrypt_metadata) && (type == "/Metadata"))
604 619 {
605   - // XXX coverage
  620 + QTC::TC("qpdf", "QPDF_encryption cleartext metadata");
606 621 method = e_none;
607 622 }
608 623 else
609 624 {
  625 + // NOTE: We should should use cf_file if this is an
  626 + // embedded file, but we can't yet detect embedded
  627 + // file streams as such.
610 628 method = this->cf_stream;
611 629 }
612   - // XXX What about embedded file streams?
613 630 }
614 631 use_aes = false;
615 632 switch (method)
... ... @@ -640,7 +657,7 @@ QPDF::decryptStream(Pipeline*&amp; pipeline, int objid, int generation,
640 657 std::string key = getKeyForObject(objid, generation, use_aes);
641 658 if (use_aes)
642 659 {
643   - // XXX coverage
  660 + QTC::TC("qpdf", "QPDF_encryption aes decode stream");
644 661 assert(key.length() == Pl_AES_PDF::key_size);
645 662 pipeline = new Pl_AES_PDF("AES stream decryption", pipeline,
646 663 false, (unsigned char*) key.c_str());
... ...
qpdf/qpdf.testcov
... ... @@ -161,3 +161,9 @@ qpdf-c called qpdf_init_write multiple times 0
161 161 QPDF_encryption rc4 decode string 0
162 162 QPDF_encryption rc4 decode stream 0
163 163 QPDFWriter not compressing metadata 0
  164 +QPDF_encryption CFM V2 0
  165 +QPDF_encryption CFM AESV2 0
  166 +QPDF_encryption aes decode string 0
  167 +QPDF_encryption cleartext metadata 0
  168 +QPDF_encryption aes decode stream 0
  169 +QPDF_encryption stream crypt filter 0
... ...
qpdf/qtest/qpdf.test
... ... @@ -620,6 +620,51 @@ $td-&gt;runtest(&quot;show-xref-by-id-filtered&quot;,
620 620  
621 621 show_ntests();
622 622 # ----------
  623 +$td->notify("--- Clear-text Metadata Tests ---");
  624 +$n_tests += 42;
  625 +
  626 +# args: file, exp_encrypted, exp_cleartext
  627 +check_metadata("compressed-metadata.pdf", 0, 0);
  628 +check_metadata("enc-base.pdf", 0, 1);
  629 +
  630 +foreach my $f (qw(compressed-metadata.pdf enc-base.pdf))
  631 +{
  632 + foreach my $w (qw(compress preserve))
  633 + {
  634 + $td->runtest("$w streams",
  635 + {$td->COMMAND => "qpdf --stream-data=$w $f a.pdf"},
  636 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  637 + check_metadata("a.pdf", 0, 1);
  638 + $td->runtest("encrypt normally",
  639 + {$td->COMMAND =>
  640 + "qpdf --encrypt '' '' 128 -- a.pdf b.pdf"},
  641 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  642 + check_metadata("b.pdf", 1, 0);
  643 + unlink "b.pdf";
  644 + $td->runtest("encrypt V4",
  645 + {$td->COMMAND =>
  646 + "qpdf --encrypt '' '' 128 --force-V4 -- a.pdf b.pdf"},
  647 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  648 + check_metadata("b.pdf", 1, 0);
  649 + unlink "b.pdf";
  650 + $td->runtest("encrypt with cleartext metadata",
  651 + {$td->COMMAND =>
  652 + "qpdf --encrypt '' '' 128 --cleartext-metadata --" .
  653 + " a.pdf b.pdf"},
  654 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  655 + check_metadata("b.pdf", 1, 1);
  656 + unlink "b.pdf";
  657 + $td->runtest("encrypt with aes and cleartext metadata",
  658 + {$td->COMMAND =>
  659 + "qpdf --encrypt '' '' 128" .
  660 + " --cleartext-metadata --use-aes=y -- a.pdf b.pdf"},
  661 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  662 + check_metadata("b.pdf", 1, 1);
  663 + }
  664 +}
  665 +
  666 +show_ntests();
  667 +# ----------
623 668 $td->notify("--- Linearization Tests ---");
624 669 # $n_tests incremented after initialization of @linearized_files and
625 670 # @to_linearize.
... ... @@ -1192,6 +1237,17 @@ sub compare_pdfs
1192 1237 system("rm -rf tif1 tif2");
1193 1238 }
1194 1239  
  1240 +sub check_metadata
  1241 +{
  1242 + my ($file, $exp_encrypted, $exp_cleartext) = @_;
  1243 + my $out = "encrypted=$exp_encrypted; cleartext=$exp_cleartext\n" .
  1244 + "test 6 done\n";
  1245 + $td->runtest("check metadata: $file",
  1246 + {$td->COMMAND => "test_driver 6 $file"},
  1247 + {$td->STRING => $out, $td->EXIT_STATUS => 0},
  1248 + $td->NORMALIZE_NEWLINES);
  1249 +}
  1250 +
1195 1251 sub get_md5_checksum
1196 1252 {
1197 1253 my $file = shift;
... ...
qpdf/qtest/qpdf/compressed-metadata.pdf 0 → 100644
No preview for this file type
qpdf/test_driver.cc
... ... @@ -6,6 +6,7 @@
6 6 #include <qpdf/QUtil.hh>
7 7 #include <qpdf/QTC.hh>
8 8 #include <qpdf/Pl_StdioFile.hh>
  9 +#include <qpdf/Pl_Buffer.hh>
9 10 #include <qpdf/QPDFWriter.hh>
10 11 #include <iostream>
11 12 #include <string.h>
... ... @@ -282,6 +283,31 @@ void runtest(int n, char const* filename)
282 283 }
283 284 }
284 285 }
  286 + else if (n == 6)
  287 + {
  288 + QPDFObjectHandle root = pdf.getRoot();
  289 + QPDFObjectHandle metadata = root.getKey("/Metadata");
  290 + if (! metadata.isStream())
  291 + {
  292 + throw std::logic_error("test 6 run on file with no metadata");
  293 + }
  294 + Pl_Buffer bufpl("buffer");
  295 + metadata.pipeStreamData(&bufpl, false, false, false);
  296 + Buffer* buf = bufpl.getBuffer();
  297 + unsigned char const* data = buf->getBuffer();
  298 + bool cleartext = false;
  299 + if ((buf->getSize() > 9) &&
  300 + (strncmp((char const*)data, "<?xpacket", 9) == 0))
  301 + {
  302 + cleartext = true;
  303 + }
  304 + delete buf;
  305 + std::cout << "encrypted="
  306 + << (pdf.isEncrypted() ? 1 : 0)
  307 + << "; cleartext="
  308 + << (cleartext ? 1 : 0)
  309 + << std::endl;
  310 + }
285 311 else
286 312 {
287 313 throw std::runtime_error(std::string("invalid test ") +
... ...