Commit 93ac1695a4b79f3d5b71e2d57ed876c28866d2c9

Authored by Jay Berkenbilt
1 parent eff2c9a6

Support files with only attachments encrypted

Test cases added in a future commit since they depend on /R=6 support.
... ... @@ -89,20 +89,11 @@ Index: QPDFWriter.cc
89 89 }
90 90 ------------------------------
91 91  
92   - * Handle embedded files. PDF Reference 1.7 section 3.10, "File
93   - Specifications", discusses this. Once we can definitely recognize
94   - all embedded files in a document, we can update the encryption
95   - code to handle it properly. In QPDF_encryption.cc, search for
96   - cf_file. Remove exception thrown if cf_file is different from
97   - cf_stream, and write code in the stream decryption section to use
98   - cf_file instead of cf_stream. In general, add interfaces to get
99   - the list of embedded files and to extract them. To handle general
100   - embedded files associated with the whole document, follow root ->
101   - /Names -> /EmbeddedFiles -> /Names to get to the file specification
102   - dictionaries. Then, in each file specification dictionary, follow
103   - /EF -> /F to the actual stream. There may be other places file
104   - specification dictionaries may appear, and there are also /RF keys
105   - with related files, so reread section 3.10 carefully.
  92 + * Provide APIs for embedded files. See *attachments*.pdf in test
  93 + suite. The private method findAttachmentStreams finds at least
  94 + cases for modern versions of Adobe Reader (>= 1.7, maybe earlier).
  95 + PDF Reference 1.7 section 3.10, "File Specifications", discusses
  96 + this.
106 97  
107 98 A sourceforge user asks if qpdf can handle extracting and embedded
108 99 resources and references these tools, which may be useful as a
... ...
include/qpdf/QPDF.hh
... ... @@ -604,6 +604,7 @@ class QPDF
604 604 int& act_objid, int& act_generation);
605 605 PointerHolder<QPDFObject> resolve(int objid, int generation);
606 606 void resolveObjectsInStream(int obj_stream_number);
  607 + void findAttachmentStreams();
607 608  
608 609 // Calls finish() on the pipeline when done but does not delete it
609 610 void pipeStreamData(int objid, int generation,
... ... @@ -1004,6 +1005,7 @@ class QPDF
1004 1005 PointerHolder<QPDFObjectHandle::StreamDataProvider> copied_streams;
1005 1006 // copied_stream_data_provider is owned by copied_streams
1006 1007 CopiedStreamDataProvider* copied_stream_data_provider;
  1008 + std::set<ObjGen> attachment_streams;
1007 1009  
1008 1010 // Linearization data
1009 1011 qpdf_offset_t first_xref_item_offset; // actual value from file
... ...
libqpdf/QPDF.cc
... ... @@ -314,6 +314,7 @@ QPDF::parse(char const* password)
314 314 }
315 315  
316 316 initializeEncryption();
  317 + findAttachmentStreams();
317 318 }
318 319  
319 320 void
... ... @@ -2069,3 +2070,38 @@ QPDF::pipeStreamData(int objid, int generation,
2069 2070 }
2070 2071 pipeline->finish();
2071 2072 }
  2073 +
  2074 +void
  2075 +QPDF::findAttachmentStreams()
  2076 +{
  2077 + QPDFObjectHandle root = getRoot();
  2078 + QPDFObjectHandle names = root.getKey("/Names");
  2079 + if (! names.isDictionary())
  2080 + {
  2081 + return;
  2082 + }
  2083 + QPDFObjectHandle embeddedFiles = names.getKey("/EmbeddedFiles");
  2084 + if (! embeddedFiles.isDictionary())
  2085 + {
  2086 + return;
  2087 + }
  2088 + names = embeddedFiles.getKey("/Names");
  2089 + if (! names.isArray())
  2090 + {
  2091 + return;
  2092 + }
  2093 + for (int i = 0; i < names.getArrayNItems(); ++i)
  2094 + {
  2095 + QPDFObjectHandle item = names.getArrayItem(i);
  2096 + if (item.isDictionary() &&
  2097 + item.getKey("/Type").isName() &&
  2098 + (item.getKey("/Type").getName() == "/Filespec") &&
  2099 + item.getKey("/EF").isDictionary() &&
  2100 + item.getKey("/EF").getKey("/F").isStream())
  2101 + {
  2102 + QPDFObjectHandle stream = item.getKey("/EF").getKey("/F");
  2103 + this->attachment_streams.insert(
  2104 + ObjGen(stream.getObjectID(), stream.getGeneration()));
  2105 + }
  2106 + }
  2107 +}
... ...
libqpdf/QPDFWriter.cc
... ... @@ -470,27 +470,13 @@ QPDFWriter::copyEncryptionParameters(QPDF&amp; qpdf)
470 470 }
471 471 if (V >= 4)
472 472 {
473   - if (encrypt.hasKey("/CF") &&
474   - encrypt.getKey("/CF").isDictionary() &&
475   - encrypt.hasKey("/StmF") &&
476   - encrypt.getKey("/StmF").isName())
477   - {
478   - // Determine whether to use AES from StmF. QPDFWriter
479   - // can't write files with different StrF and StmF.
480   - QPDFObjectHandle CF = encrypt.getKey("/CF");
481   - QPDFObjectHandle StmF = encrypt.getKey("/StmF");
482   - if (CF.hasKey(StmF.getName()) &&
483   - CF.getKey(StmF.getName()).isDictionary())
484   - {
485   - QPDFObjectHandle StmF_data = CF.getKey(StmF.getName());
486   - if (StmF_data.hasKey("/CFM") &&
487   - StmF_data.getKey("/CFM").isName() &&
488   - StmF_data.getKey("/CFM").getName() == "/AESV2")
489   - {
490   - this->encrypt_use_aes = true;
491   - }
492   - }
493   - }
  473 + // When copying encryption parameters, use AES even if the
  474 + // original file did not. Acrobat doesn't create files
  475 + // with V >= 4 that don't use AES, and the logic of
  476 + // figuring out whether AES is used or not is complicated
  477 + // with /StmF, /StrF, and /EFF all potentially having
  478 + // different values.
  479 + this->encrypt_use_aes = true;
494 480 }
495 481 QTC::TC("qpdf", "QPDFWriter copy encrypt metadata",
496 482 this->encrypt_metadata ? 0 : 1);
... ...
libqpdf/QPDF_Stream.cc
... ... @@ -91,6 +91,80 @@ QPDF_Stream::getRawStreamData()
91 91 }
92 92  
93 93 bool
  94 +QPDF_Stream::understandDecodeParams(
  95 + std::string const& filter, QPDFObjectHandle decode_obj,
  96 + int& predictor, int& columns, bool& early_code_change)
  97 +{
  98 + bool filterable = true;
  99 + std::set<std::string> keys = decode_obj.getKeys();
  100 + for (std::set<std::string>::iterator iter = keys.begin();
  101 + iter != keys.end(); ++iter)
  102 + {
  103 + std::string const& key = *iter;
  104 + if ((filter == "/FlateDecode") && (key == "/Predictor"))
  105 + {
  106 + QPDFObjectHandle predictor_obj = decode_obj.getKey(key);
  107 + if (predictor_obj.isInteger())
  108 + {
  109 + predictor = predictor_obj.getIntValue();
  110 + if (! ((predictor == 1) || (predictor == 12)))
  111 + {
  112 + filterable = false;
  113 + }
  114 + }
  115 + else
  116 + {
  117 + filterable = false;
  118 + }
  119 + }
  120 + else if ((filter == "/LZWDecode") && (key == "/EarlyChange"))
  121 + {
  122 + QPDFObjectHandle earlychange_obj = decode_obj.getKey(key);
  123 + if (earlychange_obj.isInteger())
  124 + {
  125 + int earlychange = earlychange_obj.getIntValue();
  126 + early_code_change = (earlychange == 1);
  127 + if (! ((earlychange == 0) || (earlychange == 1)))
  128 + {
  129 + filterable = false;
  130 + }
  131 + }
  132 + else
  133 + {
  134 + filterable = false;
  135 + }
  136 + }
  137 + else if (key == "/Columns")
  138 + {
  139 + QPDFObjectHandle columns_obj = decode_obj.getKey(key);
  140 + if (columns_obj.isInteger())
  141 + {
  142 + columns = columns_obj.getIntValue();
  143 + }
  144 + else
  145 + {
  146 + filterable = false;
  147 + }
  148 + }
  149 + else if ((filter == "/Crypt") &&
  150 + (((key == "/Type") || (key == "/Name")) &&
  151 + (decode_obj.getKey("/Type").isNull() ||
  152 + (decode_obj.getKey("/Type").isName() &&
  153 + (decode_obj.getKey("/Type").getName() ==
  154 + "/CryptFilterDecodeParms")))))
  155 + {
  156 + // we handle this in decryptStream
  157 + }
  158 + else
  159 + {
  160 + filterable = false;
  161 + }
  162 + }
  163 +
  164 + return filterable;
  165 +}
  166 +
  167 +bool
94 168 QPDF_Stream::filterable(std::vector<std::string>& filters,
95 169 int& predictor, int& columns,
96 170 bool& early_code_change)
... ... @@ -110,106 +184,6 @@ QPDF_Stream::filterable(std::vector&lt;std::string&gt;&amp; filters,
110 184 filter_abbreviations["/DCT"] = "/DCTDecode";
111 185 }
112 186  
113   - // Initialize values to their defaults as per the PDF spec
114   - predictor = 1;
115   - columns = 0;
116   - early_code_change = true;
117   -
118   - bool filterable = true;
119   -
120   - // See if we can support any decode parameters that are specified.
121   -
122   - QPDFObjectHandle decode_obj =
123   - this->stream_dict.getKey("/DecodeParms");
124   - if (decode_obj.isNull())
125   - {
126   - // no problem
127   - }
128   - else if (decode_obj.isDictionary())
129   - {
130   - std::set<std::string> keys = decode_obj.getKeys();
131   - for (std::set<std::string>::iterator iter = keys.begin();
132   - iter != keys.end(); ++iter)
133   - {
134   - std::string const& key = *iter;
135   - if (key == "/Predictor")
136   - {
137   - QPDFObjectHandle predictor_obj = decode_obj.getKey(key);
138   - if (predictor_obj.isInteger())
139   - {
140   - predictor = predictor_obj.getIntValue();
141   - if (! ((predictor == 1) || (predictor == 12)))
142   - {
143   - filterable = false;
144   - }
145   - }
146   - else
147   - {
148   - filterable = false;
149   - }
150   - }
151   - else if (key == "/EarlyChange")
152   - {
153   - QPDFObjectHandle earlychange_obj = decode_obj.getKey(key);
154   - if (earlychange_obj.isInteger())
155   - {
156   - int earlychange = earlychange_obj.getIntValue();
157   - early_code_change = (earlychange == 1);
158   - if (! ((earlychange == 0) || (earlychange == 1)))
159   - {
160   - filterable = false;
161   - }
162   - }
163   - else
164   - {
165   - filterable = false;
166   - }
167   - }
168   - else if (key == "/Columns")
169   - {
170   - QPDFObjectHandle columns_obj = decode_obj.getKey(key);
171   - if (columns_obj.isInteger())
172   - {
173   - columns = columns_obj.getIntValue();
174   - }
175   - else
176   - {
177   - filterable = false;
178   - }
179   - }
180   - else if (((key == "/Type") || (key == "/Name")) &&
181   - decode_obj.getKey("/Type").isName() &&
182   - (decode_obj.getKey("/Type").getName() ==
183   - "/CryptFilterDecodeParms"))
184   - {
185   - // we handle this in decryptStream
186   - }
187   - else
188   - {
189   - filterable = false;
190   - }
191   - }
192   - }
193   - else
194   - {
195   - // Ignore for now -- some filter types, like CCITTFaxDecode,
196   - // use types other than dictionary for this.
197   - QTC::TC("qpdf", "QPDF_Stream ignore non-dictionary DecodeParms");
198   -
199   - filterable = false;
200   - }
201   -
202   - if ((predictor > 1) && (columns == 0))
203   - {
204   - // invalid
205   - filterable = false;
206   - }
207   -
208   - if (! filterable)
209   - {
210   - return false;
211   - }
212   -
213 187 // Check filters
214 188  
215 189 QPDFObjectHandle filter_obj = this->stream_dict.getKey("/Filter");
... ... @@ -254,8 +228,7 @@ QPDF_Stream::filterable(std::vector&lt;std::string&gt;&amp; filters,
254 228 "stream filter type is not name or array");
255 229 }
256 230  
257   - // `filters' now contains a list of filters to be applied in
258   - // order. See which ones we can support.
  231 + bool filterable = true;
259 232  
260 233 for (std::vector<std::string>::iterator iter = filters.begin();
261 234 iter != filters.end(); ++iter)
... ... @@ -278,6 +251,79 @@ QPDF_Stream::filterable(std::vector&lt;std::string&gt;&amp; filters,
278 251 }
279 252 }
280 253  
  254 + if (! filterable)
  255 + {
  256 + return false;
  257 + }
  258 +
  259 + // `filters' now contains a list of filters to be applied in
  260 + // order. See which ones we can support.
  261 +
  262 + // Initialize values to their defaults as per the PDF spec
  263 + predictor = 1;
  264 + columns = 0;
  265 + early_code_change = true;
  266 +
  267 + // See if we can support any decode parameters that are specified.
  268 +
  269 + QPDFObjectHandle decode_obj = this->stream_dict.getKey("/DecodeParms");
  270 + std::vector<QPDFObjectHandle> decode_parms;
  271 + if (decode_obj.isArray())
  272 + {
  273 + for (int i = 0; i < decode_obj.getArrayNItems(); ++i)
  274 + {
  275 + decode_parms.push_back(decode_obj.getArrayItem(i));
  276 + }
  277 + }
  278 + else
  279 + {
  280 + for (unsigned int i = 0; i < filters.size(); ++i)
  281 + {
  282 + decode_parms.push_back(decode_obj);
  283 + }
  284 + }
  285 +
  286 + if (decode_parms.size() != filters.size())
  287 + {
  288 + throw QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
  289 + "", this->offset,
  290 + "stream /DecodeParms length is"
  291 + " inconsistent with filters");
  292 + }
  293 +
  294 + for (unsigned int i = 0; i < filters.size(); ++i)
  295 + {
  296 + QPDFObjectHandle decode_item = decode_parms[i];
  297 + if (decode_item.isNull())
  298 + {
  299 + // okay
  300 + }
  301 + else if (decode_item.isDictionary())
  302 + {
  303 + if (! understandDecodeParams(
  304 + filters[i], decode_item,
  305 + predictor, columns, early_code_change))
  306 + {
  307 + filterable = false;
  308 + }
  309 + }
  310 + else
  311 + {
  312 + filterable = false;
  313 + }
  314 + }
  315 +
  316 + if ((predictor > 1) && (columns == 0))
  317 + {
  318 + // invalid
  319 + filterable = false;
  320 + }
  321 +
  322 + if (! filterable)
  323 + {
  324 + return false;
  325 + }
  326 +
281 327 return filterable;
282 328 }
283 329  
... ...
libqpdf/QPDF_encryption.cc
... ... @@ -573,28 +573,6 @@ QPDF::initializeEncryption()
573 573 {
574 574 this->cf_file = this->cf_stream;
575 575 }
576   - if (this->cf_file != this->cf_stream)
577   - {
578   - // The issue for qpdf is that it can't tell the difference
579   - // between an embedded file stream and a regular stream.
580   - // Search for a comment containing cf_file. To fix this,
581   - // we need files with encrypted embedded files and
582   - // non-encrypted native streams and vice versa. Also if
583   - // it is possible for them to be encrypted in different
584   - // ways, we should have some of those too. In cases where
585   - // we can detect whether a stream is encrypted or not, we
586   - // might want to try to detecet that automatically in
587   - // defense of possible logic errors surrounding detection
588   - // of embedded file streams, unless that's really clear
589   - // from the specification.
590   - throw QPDFExc(qpdf_e_unsupported, this->file->getName(),
591   - "encryption dictionary", this->file->getLastOffset(),
592   - "This document has embedded files that are"
593   - " encrypted differently from the rest of the file."
594   - " qpdf does not presently support this due to"
595   - " lack of test data; if possible, please submit"
596   - " a bug report that includes this file.");
597   - }
598 576 }
599 577 EncryptionData data(V, R, Length / 8, P, O, U, "", "", "",
600 578 id1, this->encrypt_metadata);
... ... @@ -737,18 +715,48 @@ QPDF::decryptStream(Pipeline*&amp; pipeline, int objid, int generation,
737 715 encryption_method_e method = e_unknown;
738 716 std::string method_source = "/StmF from /Encrypt dictionary";
739 717  
740   - if (stream_dict.getKey("/Filter").isOrHasName("/Crypt") &&
741   - stream_dict.getKey("/DecodeParms").isDictionary())
742   - {
743   - QPDFObjectHandle decode_parms = stream_dict.getKey("/DecodeParms");
744   - if (decode_parms.getKey("/Type").isName() &&
745   - (decode_parms.getKey("/Type").getName() ==
746   - "/CryptFilterDecodeParms"))
747   - {
748   - QTC::TC("qpdf", "QPDF_encryption stream crypt filter");
749   - method = interpretCF(decode_parms.getKey("/Name"));
750   - method_source = "stream's Crypt decode parameters";
751   - }
  718 + if (stream_dict.getKey("/Filter").isOrHasName("/Crypt"))
  719 + {
  720 + if (stream_dict.getKey("/DecodeParms").isDictionary())
  721 + {
  722 + QPDFObjectHandle decode_parms =
  723 + stream_dict.getKey("/DecodeParms");
  724 + if (decode_parms.getKey("/Type").isName() &&
  725 + (decode_parms.getKey("/Type").getName() ==
  726 + "/CryptFilterDecodeParms"))
  727 + {
  728 + QTC::TC("qpdf", "QPDF_encryption stream crypt filter");
  729 + method = interpretCF(decode_parms.getKey("/Name"));
  730 + method_source = "stream's Crypt decode parameters";
  731 + }
  732 + }
  733 + else if (stream_dict.getKey("/DecodeParms").isArray() &&
  734 + stream_dict.getKey("/Filter").isArray())
  735 + {
  736 + QPDFObjectHandle filter = stream_dict.getKey("/Filter");
  737 + QPDFObjectHandle decode = stream_dict.getKey("/DecodeParms");
  738 + if (filter.getArrayNItems() == decode.getArrayNItems())
  739 + {
  740 + for (int i = 0; i < filter.getArrayNItems(); ++i)
  741 + {
  742 + if (filter.getArrayItem(i).isName() &&
  743 + (filter.getArrayItem(i).getName() == "/Crypt"))
  744 + {
  745 + QPDFObjectHandle crypt_params =
  746 + decode.getArrayItem(i);
  747 + if (crypt_params.isDictionary() &&
  748 + crypt_params.getKey("/Name").isName())
  749 + {
  750 +// XXX QTC::TC("qpdf", "QPDF_encrypt crypt array");
  751 + method = interpretCF(
  752 + crypt_params.getKey("/Name"));
  753 + method_source = "stream's Crypt "
  754 + "decode parameters (array)";
  755 + }
  756 + }
  757 + }
  758 + }
  759 + }
752 760 }
753 761  
754 762 if (method == e_unknown)
... ... @@ -760,12 +768,15 @@ QPDF::decryptStream(Pipeline*&amp; pipeline, int objid, int generation,
760 768 }
761 769 else
762 770 {
763   - // NOTE: We should should use cf_file if this is an
764   - // embedded file, but we can't yet detect embedded
765   - // file streams as such. When fixing, search for all
766   - // occurrences of cf_file to find a reference to this
767   - // comment.
768   - method = this->cf_stream;
  771 + if (this->attachment_streams.count(
  772 + ObjGen(objid, generation)) > 0)
  773 + {
  774 + method = this->cf_file;
  775 + }
  776 + else
  777 + {
  778 + method = this->cf_stream;
  779 + }
769 780 }
770 781 }
771 782 use_aes = false;
... ...
libqpdf/qpdf/QPDF_Stream.hh
... ... @@ -45,6 +45,9 @@ class QPDF_Stream: public QPDFObject
45 45 void replaceFilterData(QPDFObjectHandle const& filter,
46 46 QPDFObjectHandle const& decode_parms,
47 47 size_t length);
  48 + bool understandDecodeParams(
  49 + std::string const& filter, QPDFObjectHandle decode_params,
  50 + int& predictor, int& columns, bool& early_code_change);
48 51 bool filterable(std::vector<std::string>& filters,
49 52 int& predictor, int& columns, bool& early_code_change);
50 53  
... ...
qpdf/qpdf.testcov
... ... @@ -116,7 +116,6 @@ qpdf unable to filter 0
116 116 QPDF_String non-trivial UTF-16 0
117 117 QPDF xref overwrite object 0
118 118 QPDF decoding error warning 0
119   -QPDF_Stream ignore non-dictionary DecodeParms 0
120 119 qpdf-c called qpdf_init 0
121 120 qpdf-c called qpdf_cleanup 0
122 121 qpdf-c called qpdf_more_warnings 0
... ...
qpdf/qtest/qpdf/obj0-check.out
1   -checking obj0.pdf
2 1 WARNING: obj0.pdf: file is damaged
3 2 WARNING: obj0.pdf (object 1 0, file position 77): expected n n obj
4 3 WARNING: obj0.pdf: Attempting to reconstruct cross-reference table
  4 +checking obj0.pdf
5 5 PDF Version: 1.3
6 6 File is not encrypted
7 7 File is not linearized
... ...