Commit b8bdef0ad12883d72ced5eb443e6e34a93bbbb91

Authored by Jay Berkenbilt
1 parent 607c3921

Implement deterministic ID

For non-encrypted files, determinstic ID generation uses file contents
instead of timestamp and file name. At a small runtime cost, this
enables generation of the same /ID if the same inputs are converted in
the same way multiple times.
ChangeLog
  1 +2015-10-29 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Implement QPDFWriter::setDeterministicID and --deterministic-id
  4 + commandline-flag to qpdf to request generation of a deterministic
  5 + /ID for non-encrypted files.
  6 +
1 2015-05-24 Jay Berkenbilt <ejb@ql.org> 7 2015-05-24 Jay Berkenbilt <ejb@ql.org>
2 8
3 * 5.1.3: release 9 * 5.1.3: release
@@ -46,6 +46,14 @@ Small, command-line tool only enhancements to do soon @@ -46,6 +46,14 @@ Small, command-line tool only enhancements to do soon
46 (libtool). 46 (libtool).
47 47
48 48
  49 +Next ABI change
  50 +===============
  51 +
  52 +Remove private methods that are there only for ABI compatibility
  53 +including extra QPDFWriter writeTrailer, writeXRefTable,
  54 +writeXRefStream.
  55 +
  56 +
49 5.2.0 57 5.2.0
50 ===== 58 =====
51 59
include/qpdf/QPDFWriter.hh
@@ -35,6 +35,7 @@ @@ -35,6 +35,7 @@
35 class QPDF; 35 class QPDF;
36 class QPDFObjectHandle; 36 class QPDFObjectHandle;
37 class Pl_Count; 37 class Pl_Count;
  38 +class Pl_MD5;
38 39
39 class QPDFWriter 40 class QPDFWriter
40 { 41 {
@@ -189,8 +190,22 @@ class QPDFWriter @@ -189,8 +190,22 @@ class QPDFWriter
189 QPDF_DLL 190 QPDF_DLL
190 void setExtraHeaderText(std::string const&); 191 void setExtraHeaderText(std::string const&);
191 192
  193 + // Causes a deterministic /ID value to be generated. When this is
  194 + // set, the current time and output file name are not used as part
  195 + // of /ID generation. Instead, a digest of all significant parts
  196 + // of the output file's contents is included in the /ID
  197 + // calculation. Use of a deterministic /ID can be handy when it is
  198 + // desirable for a repeat of the same qpdf operation on the same
  199 + // inputs being written to the same outputs with the same
  200 + // parameters to generate exactly the same results. This feature
  201 + // is incompatible with encrypted files because, for encrypted
  202 + // files, the /ID is generated before any part of the file is
  203 + // written since it is an input to the encryption process.
  204 + QPDF_DLL
  205 + void setDeterministicID(bool);
  206 +
192 // Cause a static /ID value to be generated. Use only in test 207 // Cause a static /ID value to be generated. Use only in test
193 - // suites. 208 + // suites. See also setDeterministicID.
194 QPDF_DLL 209 QPDF_DLL
195 void setStaticID(bool); 210 void setStaticID(bool);
196 211
@@ -298,6 +313,9 @@ class QPDFWriter @@ -298,6 +313,9 @@ class QPDFWriter
298 void writeObject(QPDFObjectHandle object, int object_stream_index = -1); 313 void writeObject(QPDFObjectHandle object, int object_stream_index = -1);
299 void writeTrailer(trailer_e which, int size, 314 void writeTrailer(trailer_e which, int size,
300 bool xref_stream, qpdf_offset_t prev = 0); 315 bool xref_stream, qpdf_offset_t prev = 0);
  316 + void writeTrailer(trailer_e which, int size,
  317 + bool xref_stream, qpdf_offset_t prev,
  318 + int linearization_pass);
301 void unparseObject(QPDFObjectHandle object, int level, 319 void unparseObject(QPDFObjectHandle object, int level,
302 unsigned int flags); 320 unsigned int flags);
303 void unparseObject(QPDFObjectHandle object, int level, 321 void unparseObject(QPDFObjectHandle object, int level,
@@ -348,6 +366,15 @@ class QPDFWriter @@ -348,6 +366,15 @@ class QPDFWriter
348 int hint_id, 366 int hint_id,
349 qpdf_offset_t hint_offset, 367 qpdf_offset_t hint_offset,
350 qpdf_offset_t hint_length); 368 qpdf_offset_t hint_length);
  369 + qpdf_offset_t writeXRefTable(
  370 + trailer_e which, int first, int last, int size,
  371 + // for linearization
  372 + qpdf_offset_t prev,
  373 + bool suppress_offsets,
  374 + int hint_id,
  375 + qpdf_offset_t hint_offset,
  376 + qpdf_offset_t hint_length,
  377 + int linearization_pass);
351 qpdf_offset_t writeXRefStream( 378 qpdf_offset_t writeXRefStream(
352 int objid, int max_id, qpdf_offset_t max_offset, 379 int objid, int max_id, qpdf_offset_t max_offset,
353 trailer_e which, int first, int last, int size); 380 trailer_e which, int first, int last, int size);
@@ -360,6 +387,16 @@ class QPDFWriter @@ -360,6 +387,16 @@ class QPDFWriter
360 qpdf_offset_t hint_offset, 387 qpdf_offset_t hint_offset,
361 qpdf_offset_t hint_length, 388 qpdf_offset_t hint_length,
362 bool skip_compression); 389 bool skip_compression);
  390 + qpdf_offset_t writeXRefStream(
  391 + int objid, int max_id, qpdf_offset_t max_offset,
  392 + trailer_e which, int first, int last, int size,
  393 + // for linearization
  394 + qpdf_offset_t prev,
  395 + int hint_id,
  396 + qpdf_offset_t hint_offset,
  397 + qpdf_offset_t hint_length,
  398 + bool skip_compression,
  399 + int linearization_pass);
363 int calculateXrefStreamPadding(int xref_bytes); 400 int calculateXrefStreamPadding(int xref_bytes);
364 401
365 // When filtering subsections, push additional pipelines to the 402 // When filtering subsections, push additional pipelines to the
@@ -380,6 +417,8 @@ class QPDFWriter @@ -380,6 +417,8 @@ class QPDFWriter
380 void adjustAESStreamLength(size_t& length); 417 void adjustAESStreamLength(size_t& length);
381 void pushEncryptionFilter(); 418 void pushEncryptionFilter();
382 void pushDiscardFilter(); 419 void pushDiscardFilter();
  420 + void pushMD5Pipeline();
  421 + void computeDeterministicIDData();
383 422
384 void discardGeneration(std::map<QPDFObjGen, int> const& in, 423 void discardGeneration(std::map<QPDFObjGen, int> const& in,
385 std::map<int, int>& out); 424 std::map<int, int>& out);
@@ -437,6 +476,9 @@ class QPDFWriter @@ -437,6 +476,9 @@ class QPDFWriter
437 std::map<QPDFObjGen, int> object_to_object_stream; 476 std::map<QPDFObjGen, int> object_to_object_stream;
438 std::map<int, std::set<QPDFObjGen> > object_stream_to_objects; 477 std::map<int, std::set<QPDFObjGen> > object_stream_to_objects;
439 std::list<Pipeline*> pipeline_stack; 478 std::list<Pipeline*> pipeline_stack;
  479 + bool deterministic_id;
  480 + Pl_MD5* md5_pipeline;
  481 + std::string deterministic_id_data;
440 482
441 // For linearization only 483 // For linearization only
442 std::map<int, int> obj_renumber_no_gen; 484 std::map<int, int> obj_renumber_no_gen;
include/qpdf/qpdf-c.h
@@ -324,8 +324,11 @@ extern &quot;C&quot; { @@ -324,8 +324,11 @@ extern &quot;C&quot; {
324 QPDF_DLL 324 QPDF_DLL
325 void qpdf_set_qdf_mode(qpdf_data qpdf, QPDF_BOOL value); 325 void qpdf_set_qdf_mode(qpdf_data qpdf, QPDF_BOOL value);
326 326
  327 + QPDF_DLL
  328 + void qpdf_set_deterministic_ID(qpdf_data qpdf, QPDF_BOOL value);
  329 +
327 /* Never use qpdf_set_static_ID except in test suites to suppress 330 /* Never use qpdf_set_static_ID except in test suites to suppress
328 - * generation of a random /ID. 331 + * generation of a random /ID. See also qpdf_set_deterministic_ID.
329 */ 332 */
330 QPDF_DLL 333 QPDF_DLL
331 void qpdf_set_static_ID(qpdf_data qpdf, QPDF_BOOL value); 334 void qpdf_set_static_ID(qpdf_data qpdf, QPDF_BOOL value);
libqpdf/Pl_MD5.cc
@@ -3,7 +3,9 @@ @@ -3,7 +3,9 @@
3 3
4 Pl_MD5::Pl_MD5(char const* identifier, Pipeline* next) : 4 Pl_MD5::Pl_MD5(char const* identifier, Pipeline* next) :
5 Pipeline(identifier, next), 5 Pipeline(identifier, next),
6 - in_progress(false) 6 + in_progress(false),
  7 + enabled(true),
  8 + persist_across_finish(false)
7 { 9 {
8 } 10 }
9 11
@@ -14,24 +16,27 @@ Pl_MD5::~Pl_MD5() @@ -14,24 +16,27 @@ Pl_MD5::~Pl_MD5()
14 void 16 void
15 Pl_MD5::write(unsigned char* buf, size_t len) 17 Pl_MD5::write(unsigned char* buf, size_t len)
16 { 18 {
17 - if (! this->in_progress) 19 + if (this->enabled)
18 { 20 {
19 - this->md5.reset();  
20 - this->in_progress = true;  
21 - } 21 + if (! this->in_progress)
  22 + {
  23 + this->md5.reset();
  24 + this->in_progress = true;
  25 + }
22 26
23 - // Write in chunks in case len is too big to fit in an int.  
24 - // Assume int is at least 32 bits.  
25 - static size_t const max_bytes = 1 << 30;  
26 - size_t bytes_left = len;  
27 - unsigned char* data = buf;  
28 - while (bytes_left > 0)  
29 - {  
30 - size_t bytes = (bytes_left >= max_bytes ? max_bytes : bytes_left);  
31 - this->md5.encodeDataIncrementally(  
32 - reinterpret_cast<char*>(data), bytes);  
33 - bytes_left -= bytes;  
34 - data += bytes; 27 + // Write in chunks in case len is too big to fit in an int.
  28 + // Assume int is at least 32 bits.
  29 + static size_t const max_bytes = 1 << 30;
  30 + size_t bytes_left = len;
  31 + unsigned char* data = buf;
  32 + while (bytes_left > 0)
  33 + {
  34 + size_t bytes = (bytes_left >= max_bytes ? max_bytes : bytes_left);
  35 + this->md5.encodeDataIncrementally(
  36 + reinterpret_cast<char*>(data), bytes);
  37 + bytes_left -= bytes;
  38 + data += bytes;
  39 + }
35 } 40 }
36 41
37 this->getNext()->write(buf, len); 42 this->getNext()->write(buf, len);
@@ -41,16 +46,32 @@ void @@ -41,16 +46,32 @@ void
41 Pl_MD5::finish() 46 Pl_MD5::finish()
42 { 47 {
43 this->getNext()->finish(); 48 this->getNext()->finish();
44 - this->in_progress = false; 49 + if (! this->persist_across_finish)
  50 + {
  51 + this->in_progress = false;
  52 + }
  53 +}
  54 +
  55 +void
  56 +Pl_MD5::enable(bool enabled)
  57 +{
  58 + this->enabled = enabled;
  59 +}
  60 +
  61 +void
  62 +Pl_MD5::persistAcrossFinish(bool persist)
  63 +{
  64 + this->persist_across_finish = persist;
45 } 65 }
46 66
47 std::string 67 std::string
48 Pl_MD5::getHexDigest() 68 Pl_MD5::getHexDigest()
49 { 69 {
50 - if (this->in_progress) 70 + if (! this->enabled)
51 { 71 {
52 throw std::logic_error( 72 throw std::logic_error(
53 - "digest requested for in-progress MD5 Pipeline"); 73 + "digest requested for a disabled MD5 Pipeline");
54 } 74 }
  75 + this->in_progress = false;
55 return this->md5.unparse(); 76 return this->md5.unparse();
56 } 77 }
libqpdf/QPDFWriter.cc
@@ -9,6 +9,7 @@ @@ -9,6 +9,7 @@
9 #include <qpdf/Pl_AES_PDF.hh> 9 #include <qpdf/Pl_AES_PDF.hh>
10 #include <qpdf/Pl_Flate.hh> 10 #include <qpdf/Pl_Flate.hh>
11 #include <qpdf/Pl_PNGFilter.hh> 11 #include <qpdf/Pl_PNGFilter.hh>
  12 +#include <qpdf/Pl_MD5.hh>
12 #include <qpdf/QUtil.hh> 13 #include <qpdf/QUtil.hh>
13 #include <qpdf/MD5.hh> 14 #include <qpdf/MD5.hh>
14 #include <qpdf/RC4.hh> 15 #include <qpdf/RC4.hh>
@@ -77,6 +78,8 @@ QPDFWriter::init() @@ -77,6 +78,8 @@ QPDFWriter::init()
77 cur_stream_length = 0; 78 cur_stream_length = 0;
78 added_newline = false; 79 added_newline = false;
79 max_ostream_index = 0; 80 max_ostream_index = 0;
  81 + deterministic_id = false;
  82 + md5_pipeline = 0;
80 } 83 }
81 84
82 QPDFWriter::~QPDFWriter() 85 QPDFWriter::~QPDFWriter()
@@ -264,6 +267,12 @@ QPDFWriter::setStaticID(bool val) @@ -264,6 +267,12 @@ QPDFWriter::setStaticID(bool val)
264 } 267 }
265 268
266 void 269 void
  270 +QPDFWriter::setDeterministicID(bool val)
  271 +{
  272 + this->deterministic_id = val;
  273 +}
  274 +
  275 +void
267 QPDFWriter::setStaticAesIV(bool val) 276 QPDFWriter::setStaticAesIV(bool val)
268 { 277 {
269 if (val) 278 if (val)
@@ -507,10 +516,10 @@ void @@ -507,10 +516,10 @@ void
507 QPDFWriter::copyEncryptionParameters(QPDF& qpdf) 516 QPDFWriter::copyEncryptionParameters(QPDF& qpdf)
508 { 517 {
509 this->preserve_encryption = false; 518 this->preserve_encryption = false;
510 - generateID();  
511 QPDFObjectHandle trailer = qpdf.getTrailer(); 519 QPDFObjectHandle trailer = qpdf.getTrailer();
512 if (trailer.hasKey("/Encrypt")) 520 if (trailer.hasKey("/Encrypt"))
513 { 521 {
  522 + generateID();
514 this->id1 = 523 this->id1 =
515 trailer.getKey("/ID").getArrayItem(0).getStringValue(); 524 trailer.getKey("/ID").getArrayItem(0).getStringValue();
516 QPDFObjectHandle encrypt = trailer.getKey("/Encrypt"); 525 QPDFObjectHandle encrypt = trailer.getKey("/Encrypt");
@@ -864,6 +873,10 @@ QPDFWriter::popPipelineStack(PointerHolder&lt;Buffer&gt;* bp) @@ -864,6 +873,10 @@ QPDFWriter::popPipelineStack(PointerHolder&lt;Buffer&gt;* bp)
864 while (dynamic_cast<Pl_Count*>(this->pipeline_stack.back()) == 0) 873 while (dynamic_cast<Pl_Count*>(this->pipeline_stack.back()) == 0)
865 { 874 {
866 Pipeline* p = this->pipeline_stack.back(); 875 Pipeline* p = this->pipeline_stack.back();
  876 + if (dynamic_cast<Pl_MD5*>(p) == this->md5_pipeline)
  877 + {
  878 + this->md5_pipeline = 0;
  879 + }
867 this->pipeline_stack.pop_back(); 880 this->pipeline_stack.pop_back();
868 Pl_Buffer* buf = dynamic_cast<Pl_Buffer*>(p); 881 Pl_Buffer* buf = dynamic_cast<Pl_Buffer*>(p);
869 if (bp && buf) 882 if (bp && buf)
@@ -921,6 +934,36 @@ QPDFWriter::pushDiscardFilter() @@ -921,6 +934,36 @@ QPDFWriter::pushDiscardFilter()
921 activatePipelineStack(); 934 activatePipelineStack();
922 } 935 }
923 936
  937 +void
  938 +QPDFWriter::pushMD5Pipeline()
  939 +{
  940 + if (! this->id2.empty())
  941 + {
  942 + // Can't happen in the code
  943 + throw std::logic_error(
  944 + "Deterministic ID computation enabled after ID"
  945 + " generation has already occurred.");
  946 + }
  947 + assert(this->deterministic_id);
  948 + assert(this->md5_pipeline == 0);
  949 + assert(this->pipeline->getCount() == 0);
  950 + this->md5_pipeline = new Pl_MD5("qpdf md5", this->pipeline);
  951 + this->md5_pipeline->persistAcrossFinish(true);
  952 + // Special case code in popPipelineStack clears this->md5_pipeline
  953 + // upon deletion.
  954 + pushPipeline(this->md5_pipeline);
  955 + activatePipelineStack();
  956 +}
  957 +
  958 +void
  959 +QPDFWriter::computeDeterministicIDData()
  960 +{
  961 + assert(this->md5_pipeline != 0);
  962 + assert(this->deterministic_id_data.empty());
  963 + this->deterministic_id_data = this->md5_pipeline->getHexDigest();
  964 + this->md5_pipeline->enable(false);
  965 +}
  966 +
924 int 967 int
925 QPDFWriter::openObject(int objid) 968 QPDFWriter::openObject(int objid)
926 { 969 {
@@ -1069,6 +1112,13 @@ void @@ -1069,6 +1112,13 @@ void
1069 QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream, 1112 QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream,
1070 qpdf_offset_t prev) 1113 qpdf_offset_t prev)
1071 { 1114 {
  1115 + writeTrailer(which, size, xref_stream, prev, 0);
  1116 +}
  1117 +
  1118 +void
  1119 +QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream,
  1120 + qpdf_offset_t prev, int linearization_pass)
  1121 +{
1072 QPDFObjectHandle trailer = getTrimmedTrailer(); 1122 QPDFObjectHandle trailer = getTrimmedTrailer();
1073 if (! xref_stream) 1123 if (! xref_stream)
1074 { 1124 {
@@ -1119,8 +1169,21 @@ QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream, @@ -1119,8 +1169,21 @@ QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream,
1119 // Write ID 1169 // Write ID
1120 writeStringQDF(" "); 1170 writeStringQDF(" ");
1121 writeString(" /ID ["); 1171 writeString(" /ID [");
1122 - writeString(QPDF_String(this->id1).unparse(true));  
1123 - writeString(QPDF_String(this->id2).unparse(true)); 1172 + if (linearization_pass == 1)
  1173 + {
  1174 + writeString("<00000000000000000000000000000000>"
  1175 + "<00000000000000000000000000000000>");
  1176 + }
  1177 + else
  1178 + {
  1179 + if ((linearization_pass == 0) && (this->deterministic_id))
  1180 + {
  1181 + computeDeterministicIDData();
  1182 + }
  1183 + generateID();
  1184 + writeString(QPDF_String(this->id1).unparse(true));
  1185 + writeString(QPDF_String(this->id2).unparse(true));
  1186 + }
1124 writeString("]"); 1187 writeString("]");
1125 1188
1126 if (which != t_lin_second) 1189 if (which != t_lin_second)
@@ -1794,12 +1857,8 @@ QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index) @@ -1794,12 +1857,8 @@ QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
1794 void 1857 void
1795 QPDFWriter::generateID() 1858 QPDFWriter::generateID()
1796 { 1859 {
1797 - // Note: we can't call generateID() at the time of construction  
1798 - // since the caller hasn't yet had a chance to call setStaticID(),  
1799 - // but we need to generate it before computing encryption  
1800 - // dictionary parameters. This is why we call this function both  
1801 - // from setEncryptionParameters() and from write() and return  
1802 - // immediately if the ID has already been generated. 1860 + // Generate the ID lazily so that we can handle the user's
  1861 + // preference to use static or deterministic ID generation.
1803 1862
1804 if (! this->id2.empty()) 1863 if (! this->id2.empty())
1805 { 1864 {
@@ -1822,17 +1881,40 @@ QPDFWriter::generateID() @@ -1822,17 +1881,40 @@ QPDFWriter::generateID()
1822 } 1881 }
1823 else 1882 else
1824 { 1883 {
1825 - // The PDF specification has guidelines for creating IDs, but it  
1826 - // states clearly that the only thing that's really important is  
1827 - // that it is very likely to be unique. We can't really follow  
1828 - // the guidelines in the spec exactly because we haven't written  
1829 - // the file yet. This scheme should be fine though. 1884 + // The PDF specification has guidelines for creating IDs, but
  1885 + // it states clearly that the only thing that's really
  1886 + // important is that it is very likely to be unique. We can't
  1887 + // really follow the guidelines in the spec exactly because we
  1888 + // haven't written the file yet. This scheme should be fine
  1889 + // though. The deterministic ID case uses a digest of a
  1890 + // sufficient portion of the file's contents such no two
  1891 + // non-matching files would match in the subsets used for this
  1892 + // computation. Note that we explicitly omit the filename from
  1893 + // the digest calculation for deterministic ID so that the same
  1894 + // file converted with qpdf, in that case, would have the same
  1895 + // ID regardless of the output file's name.
1830 1896
1831 std::string seed; 1897 std::string seed;
1832 - seed += QUtil::int_to_string(QUtil::get_current_time()); 1898 + if (this->deterministic_id)
  1899 + {
  1900 + if (this->deterministic_id_data.empty())
  1901 + {
  1902 + QTC::TC("qpdf", "QPDFWriter deterministic with no data");
  1903 + throw std::logic_error(
  1904 + "INTERNAL ERROR: QPDFWriter::generateID has no"
  1905 + " data for deterministic ID. This may happen if"
  1906 + " deterministic ID and file encryption are requested"
  1907 + " together.");
  1908 + }
  1909 + seed += this->deterministic_id_data;
  1910 + }
  1911 + else
  1912 + {
  1913 + seed += QUtil::int_to_string(QUtil::get_current_time());
  1914 + seed += this->filename;
  1915 + seed += " ";
  1916 + }
1833 seed += " QPDF "; 1917 seed += " QPDF ";
1834 - seed += this->filename;  
1835 - seed += " ";  
1836 if (trailer.hasKey("/Info")) 1918 if (trailer.hasKey("/Info"))
1837 { 1919 {
1838 QPDFObjectHandle info = trailer.getKey("/Info"); 1920 QPDFObjectHandle info = trailer.getKey("/Info");
@@ -2260,8 +2342,6 @@ QPDFWriter::write() @@ -2260,8 +2342,6 @@ QPDFWriter::write()
2260 setMinimumPDFVersion("1.5"); 2342 setMinimumPDFVersion("1.5");
2261 } 2343 }
2262 2344
2263 - generateID();  
2264 -  
2265 prepareFileForWrite(); 2345 prepareFileForWrite();
2266 2346
2267 if (this->linearized) 2347 if (this->linearized)
@@ -2397,6 +2477,17 @@ QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size, @@ -2397,6 +2477,17 @@ QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
2397 int hint_id, qpdf_offset_t hint_offset, 2477 int hint_id, qpdf_offset_t hint_offset,
2398 qpdf_offset_t hint_length) 2478 qpdf_offset_t hint_length)
2399 { 2479 {
  2480 + // ABI compatibility
  2481 + return writeXRefTable(which, first, last, size, prev, suppress_offsets,
  2482 + hint_id, hint_offset, hint_length, 0);
  2483 +}
  2484 +
  2485 +qpdf_offset_t
  2486 +QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
  2487 + qpdf_offset_t prev, bool suppress_offsets,
  2488 + int hint_id, qpdf_offset_t hint_offset,
  2489 + qpdf_offset_t hint_length, int linearization_pass)
  2490 +{
2400 writeString("xref\n"); 2491 writeString("xref\n");
2401 writeString(QUtil::int_to_string(first)); 2492 writeString(QUtil::int_to_string(first));
2402 writeString(" "); 2493 writeString(" ");
@@ -2426,7 +2517,7 @@ QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size, @@ -2426,7 +2517,7 @@ QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
2426 writeString(" 00000 n \n"); 2517 writeString(" 00000 n \n");
2427 } 2518 }
2428 } 2519 }
2429 - writeTrailer(which, size, false, prev); 2520 + writeTrailer(which, size, false, prev, linearization_pass);
2430 writeString("\n"); 2521 writeString("\n");
2431 return space_before_zero; 2522 return space_before_zero;
2432 } 2523 }
@@ -2435,8 +2526,9 @@ qpdf_offset_t @@ -2435,8 +2526,9 @@ qpdf_offset_t
2435 QPDFWriter::writeXRefStream(int objid, int max_id, qpdf_offset_t max_offset, 2526 QPDFWriter::writeXRefStream(int objid, int max_id, qpdf_offset_t max_offset,
2436 trailer_e which, int first, int last, int size) 2527 trailer_e which, int first, int last, int size)
2437 { 2528 {
  2529 + // ABI compatibility
2438 return writeXRefStream(objid, max_id, max_offset, 2530 return writeXRefStream(objid, max_id, max_offset,
2439 - which, first, last, size, 0, 0, 0, 0, false); 2531 + which, first, last, size, 0, 0, 0, 0, false, 0);
2440 } 2532 }
2441 2533
2442 qpdf_offset_t 2534 qpdf_offset_t
@@ -2445,7 +2537,8 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset, @@ -2445,7 +2537,8 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset,
2445 qpdf_offset_t prev, int hint_id, 2537 qpdf_offset_t prev, int hint_id,
2446 qpdf_offset_t hint_offset, 2538 qpdf_offset_t hint_offset,
2447 qpdf_offset_t hint_length, 2539 qpdf_offset_t hint_length,
2448 - bool skip_compression) 2540 + bool skip_compression,
  2541 + int linearization_pass)
2449 { 2542 {
2450 qpdf_offset_t xref_offset = this->pipeline->getCount(); 2543 qpdf_offset_t xref_offset = this->pipeline->getCount();
2451 qpdf_offset_t space_before_zero = xref_offset - 1; 2544 qpdf_offset_t space_before_zero = xref_offset - 1;
@@ -2545,7 +2638,7 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset, @@ -2545,7 +2638,7 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset,
2545 QUtil::int_to_string(first) + " " + 2638 QUtil::int_to_string(first) + " " +
2546 QUtil::int_to_string(last - first + 1) + " ]"); 2639 QUtil::int_to_string(last - first + 1) + " ]");
2547 } 2640 }
2548 - writeTrailer(which, size, true, prev); 2641 + writeTrailer(which, size, true, prev, linearization_pass);
2549 writeString("\nstream\n"); 2642 writeString("\nstream\n");
2550 writeBuffer(xref_data); 2643 writeBuffer(xref_data);
2551 writeString("\nendstream"); 2644 writeString("\nendstream");
@@ -2725,6 +2818,10 @@ QPDFWriter::writeLinearized() @@ -2725,6 +2818,10 @@ QPDFWriter::writeLinearized()
2725 if (pass == 1) 2818 if (pass == 1)
2726 { 2819 {
2727 pushDiscardFilter(); 2820 pushDiscardFilter();
  2821 + if (this->deterministic_id)
  2822 + {
  2823 + pushMD5Pipeline();
  2824 + }
2728 } 2825 }
2729 2826
2730 // Part 1: header 2827 // Part 1: header
@@ -2807,7 +2904,7 @@ QPDFWriter::writeLinearized() @@ -2807,7 +2904,7 @@ QPDFWriter::writeLinearized()
2807 first_trailer_size, 2904 first_trailer_size,
2808 hint_length + second_xref_offset, 2905 hint_length + second_xref_offset,
2809 hint_id, hint_offset, hint_length, 2906 hint_id, hint_offset, hint_length,
2810 - (pass == 1)); 2907 + (pass == 1), pass);
2811 qpdf_offset_t endpos = this->pipeline->getCount(); 2908 qpdf_offset_t endpos = this->pipeline->getCount();
2812 if (pass == 1) 2909 if (pass == 1)
2813 { 2910 {
@@ -2834,7 +2931,8 @@ QPDFWriter::writeLinearized() @@ -2834,7 +2931,8 @@ QPDFWriter::writeLinearized()
2834 { 2931 {
2835 writeXRefTable(t_lin_first, first_half_start, first_half_end, 2932 writeXRefTable(t_lin_first, first_half_start, first_half_end,
2836 first_trailer_size, hint_length + second_xref_offset, 2933 first_trailer_size, hint_length + second_xref_offset,
2837 - (pass == 1), hint_id, hint_offset, hint_length); 2934 + (pass == 1), hint_id, hint_offset, hint_length,
  2935 + pass);
2838 writeString("startxref\n0\n%%EOF\n"); 2936 writeString("startxref\n0\n%%EOF\n");
2839 } 2937 }
2840 2938
@@ -2886,7 +2984,7 @@ QPDFWriter::writeLinearized() @@ -2886,7 +2984,7 @@ QPDFWriter::writeLinearized()
2886 second_half_end, second_xref_offset, 2984 second_half_end, second_xref_offset,
2887 t_lin_second, 0, second_half_end, 2985 t_lin_second, 0, second_half_end,
2888 second_trailer_size, 2986 second_trailer_size,
2889 - 0, 0, 0, 0, (pass == 1)); 2987 + 0, 0, 0, 0, (pass == 1), pass);
2890 qpdf_offset_t endpos = this->pipeline->getCount(); 2988 qpdf_offset_t endpos = this->pipeline->getCount();
2891 2989
2892 if (pass == 1) 2990 if (pass == 1)
@@ -2920,7 +3018,7 @@ QPDFWriter::writeLinearized() @@ -2920,7 +3018,7 @@ QPDFWriter::writeLinearized()
2920 { 3018 {
2921 space_before_zero = 3019 space_before_zero =
2922 writeXRefTable(t_lin_second, 0, second_half_end, 3020 writeXRefTable(t_lin_second, 0, second_half_end,
2923 - second_trailer_size); 3021 + second_trailer_size, 0, false, 0, 0, 0, pass);
2924 } 3022 }
2925 writeString("startxref\n"); 3023 writeString("startxref\n");
2926 writeString(QUtil::int_to_string(first_xref_offset)); 3024 writeString(QUtil::int_to_string(first_xref_offset));
@@ -2930,6 +3028,15 @@ QPDFWriter::writeLinearized() @@ -2930,6 +3028,15 @@ QPDFWriter::writeLinearized()
2930 3028
2931 if (pass == 1) 3029 if (pass == 1)
2932 { 3030 {
  3031 + if (this->deterministic_id)
  3032 + {
  3033 + QTC::TC("qpdf", "QPDFWriter linearized deterministic ID",
  3034 + need_xref_stream ? 0 : 1);
  3035 + computeDeterministicIDData();
  3036 + popPipelineStack();
  3037 + assert(this->md5_pipeline == 0);
  3038 + }
  3039 +
2933 // Close first pass pipeline 3040 // Close first pass pipeline
2934 file_size = this->pipeline->getCount(); 3041 file_size = this->pipeline->getCount();
2935 popPipelineStack(); 3042 popPipelineStack();
@@ -2954,6 +3061,11 @@ QPDFWriter::writeLinearized() @@ -2954,6 +3061,11 @@ QPDFWriter::writeLinearized()
2954 void 3061 void
2955 QPDFWriter::writeStandard() 3062 QPDFWriter::writeStandard()
2956 { 3063 {
  3064 + if (this->deterministic_id)
  3065 + {
  3066 + pushMD5Pipeline();
  3067 + }
  3068 +
2957 // Start writing 3069 // Start writing
2958 3070
2959 writeHeader(); 3071 writeHeader();
@@ -3005,4 +3117,12 @@ QPDFWriter::writeStandard() @@ -3005,4 +3117,12 @@ QPDFWriter::writeStandard()
3005 writeString("startxref\n"); 3117 writeString("startxref\n");
3006 writeString(QUtil::int_to_string(xref_offset)); 3118 writeString(QUtil::int_to_string(xref_offset));
3007 writeString("\n%%EOF\n"); 3119 writeString("\n%%EOF\n");
  3120 +
  3121 + if (this->deterministic_id)
  3122 + {
  3123 + QTC::TC("qpdf", "QPDFWriter standard deterministic ID",
  3124 + this->object_stream_to_objects.empty() ? 0 : 1);
  3125 + popPipelineStack();
  3126 + assert(this->md5_pipeline == 0);
  3127 + }
3008 } 3128 }
libqpdf/qpdf-c.cc
@@ -512,6 +512,12 @@ void qpdf_set_qdf_mode(qpdf_data qpdf, QPDF_BOOL value) @@ -512,6 +512,12 @@ void qpdf_set_qdf_mode(qpdf_data qpdf, QPDF_BOOL value)
512 qpdf->qpdf_writer->setQDFMode(value); 512 qpdf->qpdf_writer->setQDFMode(value);
513 } 513 }
514 514
  515 +void qpdf_set_deterministic_ID(qpdf_data qpdf, QPDF_BOOL value)
  516 +{
  517 + QTC::TC("qpdf", "qpdf-c called qpdf_set_deterministic_ID");
  518 + qpdf->qpdf_writer->setDeterministicID(value);
  519 +}
  520 +
515 void qpdf_set_static_ID(qpdf_data qpdf, QPDF_BOOL value) 521 void qpdf_set_static_ID(qpdf_data qpdf, QPDF_BOOL value)
516 { 522 {
517 QTC::TC("qpdf", "qpdf-c called qpdf_set_static_ID"); 523 QTC::TC("qpdf", "qpdf-c called qpdf_set_static_ID");
libqpdf/qpdf/Pl_MD5.hh
@@ -25,10 +25,24 @@ class Pl_MD5: public Pipeline @@ -25,10 +25,24 @@ class Pl_MD5: public Pipeline
25 virtual void finish(); 25 virtual void finish();
26 QPDF_DLL 26 QPDF_DLL
27 std::string getHexDigest(); 27 std::string getHexDigest();
  28 + // Enable/disable. Disabling the pipeline causes it to become a
  29 + // pass-through. This makes it possible to stick an MD5 pipeline
  30 + // in a pipeline when it may or may not be required. Disabling it
  31 + // avoids incurring the runtime overhead of doing needless
  32 + // digest computation.
  33 + QPDF_DLL
  34 + void enable(bool enabled);
  35 + // If persistAcrossFinish is called, calls to finish do not
  36 + // finalize the underlying md5 object. In this case, the object is
  37 + // not finalized until getHexDigest() is called.
  38 + QPDF_DLL
  39 + void persistAcrossFinish(bool);
28 40
29 private: 41 private:
30 bool in_progress; 42 bool in_progress;
31 MD5 md5; 43 MD5 md5;
  44 + bool enabled;
  45 + bool persist_across_finish;
32 }; 46 };
33 47
34 #endif // __PL_MD5_HH__ 48 #endif // __PL_MD5_HH__
libtests/md5.cc
@@ -45,6 +45,13 @@ int main(int, char*[]) @@ -45,6 +45,13 @@ int main(int, char*[])
45 45
46 Pl_Discard d; 46 Pl_Discard d;
47 Pl_MD5 p("MD5", &d); 47 Pl_MD5 p("MD5", &d);
  48 + // Create a second pipeline, protect against finish, and call
  49 + // getHexDigest only once at the end of both passes. Make sure the
  50 + // checksum is that of the input file concatenated to itself. This
  51 + // will require changes to Pl_MD5.cc to prevent finish from
  52 + // calling finalize.
  53 + Pl_MD5 p2("MD5", &d);
  54 + p2.persistAcrossFinish(true);
48 for (int i = 0; i < 2; ++i) 55 for (int i = 0; i < 2; ++i)
49 { 56 {
50 FILE* f = QUtil::safe_fopen("md5.in", "rb"); 57 FILE* f = QUtil::safe_fopen("md5.in", "rb");
@@ -61,12 +68,23 @@ int main(int, char*[]) @@ -61,12 +68,23 @@ int main(int, char*[])
61 else 68 else
62 { 69 {
63 p.write(buf, len); 70 p.write(buf, len);
  71 + p2.write(buf, len);
  72 + if (i == 1)
  73 + {
  74 + // Partial digest -- resets after each call to write
  75 + std::cout << p.getHexDigest() << std::endl;
  76 + }
64 } 77 }
65 } 78 }
66 fclose(f); 79 fclose(f);
67 p.finish(); 80 p.finish();
  81 + p2.finish();
  82 + // Make sure calling getHexDigest twice with no intervening
  83 + // writes results in the same result each time.
  84 + std::cout << p.getHexDigest() << std::endl;
68 std::cout << p.getHexDigest() << std::endl; 85 std::cout << p.getHexDigest() << std::endl;
69 } 86 }
  87 + std::cout << p2.getHexDigest() << std::endl;
70 88
71 return 0; 89 return 0;
72 } 90 }
libtests/qtest/md5/md5.out
@@ -14,3 +14,11 @@ d174ab98d277d9f5a5611c2c9f419d9f @@ -14,3 +14,11 @@ d174ab98d277d9f5a5611c2c9f419d9f
14 0 14 0
15 5f4b4321873433daae578f85c72f9e74 15 5f4b4321873433daae578f85c72f9e74
16 5f4b4321873433daae578f85c72f9e74 16 5f4b4321873433daae578f85c72f9e74
  17 +41f977636f79cf1bad1b439caa7d627c
  18 +c30e03b5536e37306df25489622e13e3
  19 +9dabbd135cc47bb603a94989df37c926
  20 +ce80591b269b749f65c53b71d0be5212
  21 +db5448be0a1e931cbd84654e82063483
  22 +db5448be0a1e931cbd84654e82063483
  23 +db5448be0a1e931cbd84654e82063483
  24 +9833b12b21147bebb2f33d35807049af
manual/qpdf-manual.xml
@@ -991,11 +991,30 @@ outfile.pdf&lt;/option&gt; @@ -991,11 +991,30 @@ outfile.pdf&lt;/option&gt;
991 file should be given. The following options are available: 991 file should be given. The following options are available:
992 <variablelist> 992 <variablelist>
993 <varlistentry> 993 <varlistentry>
  994 + <term><option>--deterministic-id</option></term>
  995 + <listitem>
  996 + <para>
  997 + Causes generation of a deterministic value for /ID. This
  998 + prevents use of timestamp and output file name information in
  999 + the /ID generation. Instead, at some slight additional runtime
  1000 + cost, the /ID field is generated to include a digest of the
  1001 + significant parts of the content of the output PDF file. This
  1002 + means that a given qpdf operation should generate the same /ID
  1003 + each time it is run, which can be useful when caching results
  1004 + or for generation of some test data. Use of this flag is not
  1005 + compatible with creation of encrypted files.
  1006 + </para>
  1007 + </listitem>
  1008 + </varlistentry>
  1009 + <varlistentry>
994 <term><option>--static-id</option></term> 1010 <term><option>--static-id</option></term>
995 <listitem> 1011 <listitem>
996 <para> 1012 <para>
997 - Causes generation of a fixed value for /ID. This is intended  
998 - for testing only. Never use it for production files. 1013 + Causes generation of a fixed value for /ID. This is intended
  1014 + for testing only. Never use it for production files. If you
  1015 + are trying to get the same /ID each time for a given file and
  1016 + you are not generating encrypted files, consider using the
  1017 + <option>--deterministic-id</option> option.
999 </para> 1018 </para>
1000 </listitem> 1019 </listitem>
1001 </varlistentry> 1020 </varlistentry>
qpdf/qpdf-ctest.c
@@ -427,6 +427,18 @@ static void test18(char const* infile, @@ -427,6 +427,18 @@ static void test18(char const* infile,
427 report_errors(); 427 report_errors();
428 } 428 }
429 429
  430 +static void test19(char const* infile,
  431 + char const* password,
  432 + char const* outfile,
  433 + char const* outfile2)
  434 +{
  435 + qpdf_read(qpdf, infile, password);
  436 + qpdf_init_write(qpdf, outfile);
  437 + qpdf_set_deterministic_ID(qpdf, QPDF_TRUE);
  438 + qpdf_write(qpdf);
  439 + report_errors();
  440 +}
  441 +
430 int main(int argc, char* argv[]) 442 int main(int argc, char* argv[])
431 { 443 {
432 char* p = 0; 444 char* p = 0;
@@ -485,6 +497,7 @@ int main(int argc, char* argv[]) @@ -485,6 +497,7 @@ int main(int argc, char* argv[])
485 (n == 16) ? test16 : 497 (n == 16) ? test16 :
486 (n == 17) ? test17 : 498 (n == 17) ? test17 :
487 (n == 18) ? test18 : 499 (n == 18) ? test18 :
  500 + (n == 19) ? test19 :
488 0); 501 0);
489 502
490 if (fn == 0) 503 if (fn == 0)
qpdf/qpdf.cc
@@ -237,6 +237,7 @@ Testing, Inspection, and Debugging Options\n\ @@ -237,6 +237,7 @@ Testing, Inspection, and Debugging Options\n\
237 These options can be useful for digging into PDF files or for use in\n\ 237 These options can be useful for digging into PDF files or for use in\n\
238 automated test suites for software that uses the qpdf library.\n\ 238 automated test suites for software that uses the qpdf library.\n\
239 \n\ 239 \n\
  240 +--deterministic-id generate deterministic /ID\n\
240 --static-id generate static /ID: FOR TESTING ONLY!\n\ 241 --static-id generate static /ID: FOR TESTING ONLY!\n\
241 --static-aes-iv use a static initialization vector for AES-CBC\n\ 242 --static-aes-iv use a static initialization vector for AES-CBC\n\
242 This is option is not secure! FOR TESTING ONLY!\n\ 243 This is option is not secure! FOR TESTING ONLY!\n\
@@ -1031,6 +1032,7 @@ int main(int argc, char* argv[]) @@ -1031,6 +1032,7 @@ int main(int argc, char* argv[])
1031 std::string force_version; 1032 std::string force_version;
1032 1033
1033 bool show_npages = false; 1034 bool show_npages = false;
  1035 + bool deterministic_id = false;
1034 bool static_id = false; 1036 bool static_id = false;
1035 bool static_aes_iv = false; 1037 bool static_aes_iv = false;
1036 bool suppress_original_object_id = false; 1038 bool suppress_original_object_id = false;
@@ -1229,6 +1231,10 @@ int main(int argc, char* argv[]) @@ -1229,6 +1231,10 @@ int main(int argc, char* argv[])
1229 } 1231 }
1230 force_version = parameter; 1232 force_version = parameter;
1231 } 1233 }
  1234 + else if (strcmp(arg, "deterministic-id") == 0)
  1235 + {
  1236 + deterministic_id = true;
  1237 + }
1232 else if (strcmp(arg, "static-id") == 0) 1238 else if (strcmp(arg, "static-id") == 0)
1233 { 1239 {
1234 static_id = true; 1240 static_id = true;
@@ -1710,6 +1716,10 @@ int main(int argc, char* argv[]) @@ -1710,6 +1716,10 @@ int main(int argc, char* argv[])
1710 { 1716 {
1711 w.setPreserveEncryption(false); 1717 w.setPreserveEncryption(false);
1712 } 1718 }
  1719 + if (deterministic_id)
  1720 + {
  1721 + w.setDeterministicID(true);
  1722 + }
1713 if (static_id) 1723 if (static_id)
1714 { 1724 {
1715 w.setStaticID(true); 1725 w.setStaticID(true);
qpdf/qpdf.testcov
@@ -269,3 +269,7 @@ qpdf pages range omitted at end 0 @@ -269,3 +269,7 @@ qpdf pages range omitted at end 0
269 qpdf pages range omitted in middle 0 269 qpdf pages range omitted in middle 0
270 qpdf npages 0 270 qpdf npages 0
271 QPDF already reserved object 0 271 QPDF already reserved object 0
  272 +QPDFWriter standard deterministic ID 1
  273 +QPDFWriter linearized deterministic ID 1
  274 +QPDFWriter deterministic with no data 0
  275 +qpdf-c called qpdf_set_deterministic_ID 0
qpdf/qtest/qpdf.test
@@ -990,6 +990,43 @@ $td-&gt;runtest(&quot;write damaged&quot;, @@ -990,6 +990,43 @@ $td-&gt;runtest(&quot;write damaged&quot;,
990 990
991 show_ntests(); 991 show_ntests();
992 # ---------- 992 # ----------
  993 +$td->notify("--- Deterministic ID Tests ---");
  994 +$n_tests += 11;
  995 +foreach my $d ('nn', 'ny', 'yn', 'yy')
  996 +{
  997 + my $linearize = ($d =~ m/^y/);
  998 + my $ostream = ($d =~ m/y$/);
  999 + $td->runtest("deterministic ID: linearize/ostream=$d",
  1000 + {$td->COMMAND =>
  1001 + "qpdf -deterministic-id" .
  1002 + ($linearize ? " -linearize" : "") .
  1003 + " -object-streams=" . ($ostream ? "generate" : "disable") .
  1004 + " deterministic-id-in.pdf a.pdf"},
  1005 + {$td->STRING => "",
  1006 + $td->EXIT_STATUS => 0});
  1007 + $td->runtest("compare files",
  1008 + {$td->FILE => "a.pdf"},
  1009 + {$td->FILE => "deterministic-id-$d.pdf"});
  1010 +}
  1011 +
  1012 +$td->runtest("deterministic ID with encryption",
  1013 + {$td->COMMAND => "qpdf -deterministic-id encrypted-with-images.pdf a.pdf"},
  1014 + {$td->STRING => "INTERNAL ERROR: QPDFWriter::generateID" .
  1015 + " has no data for deterministic ID." .
  1016 + " This may happen if deterministic ID and" .
  1017 + " file encryption are requested together.\n",
  1018 + $td->EXIT_STATUS => 2},
  1019 + $td->NORMALIZE_NEWLINES);
  1020 +$td->runtest("deterministic ID (C API)",
  1021 + {$td->COMMAND =>
  1022 + "qpdf-ctest 19 deterministic-id-in.pdf '' a.pdf"},
  1023 + {$td->STRING => "",
  1024 + $td->EXIT_STATUS => 0});
  1025 +$td->runtest("compare files",
  1026 + {$td->FILE => "a.pdf"},
  1027 + {$td->FILE => "deterministic-id-nn.pdf"});
  1028 +
  1029 +# ----------
993 $td->notify("--- Object Stream Tests ---"); 1030 $td->notify("--- Object Stream Tests ---");
994 $n_tests += (36 * 4) + (12 * 2); 1031 $n_tests += (36 * 4) + (12 * 2);
995 $n_compare_pdfs += 36; 1032 $n_compare_pdfs += 36;
qpdf/qtest/qpdf/deterministic-id-in.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/deterministic-id-nn.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/deterministic-id-ny.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/deterministic-id-yn.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/deterministic-id-yy.pdf 0 → 100644
No preview for this file type