Commit b8bdef0ad12883d72ced5eb443e6e34a93bbbb91

Authored by Jay Berkenbilt
1 parent 607c3921

Implement deterministic ID

For non-encrypted files, determinstic ID generation uses file contents
instead of timestamp and file name. At a small runtime cost, this
enables generation of the same /ID if the same inputs are converted in
the same way multiple times.
ChangeLog
  1 +2015-10-29 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Implement QPDFWriter::setDeterministicID and --deterministic-id
  4 + commandline-flag to qpdf to request generation of a deterministic
  5 + /ID for non-encrypted files.
  6 +
1 7 2015-05-24 Jay Berkenbilt <ejb@ql.org>
2 8  
3 9 * 5.1.3: release
... ...
... ... @@ -46,6 +46,14 @@ Small, command-line tool only enhancements to do soon
46 46 (libtool).
47 47  
48 48  
  49 +Next ABI change
  50 +===============
  51 +
  52 +Remove private methods that are there only for ABI compatibility
  53 +including extra QPDFWriter writeTrailer, writeXRefTable,
  54 +writeXRefStream.
  55 +
  56 +
49 57 5.2.0
50 58 =====
51 59  
... ...
include/qpdf/QPDFWriter.hh
... ... @@ -35,6 +35,7 @@
35 35 class QPDF;
36 36 class QPDFObjectHandle;
37 37 class Pl_Count;
  38 +class Pl_MD5;
38 39  
39 40 class QPDFWriter
40 41 {
... ... @@ -189,8 +190,22 @@ class QPDFWriter
189 190 QPDF_DLL
190 191 void setExtraHeaderText(std::string const&);
191 192  
  193 + // Causes a deterministic /ID value to be generated. When this is
  194 + // set, the current time and output file name are not used as part
  195 + // of /ID generation. Instead, a digest of all significant parts
  196 + // of the output file's contents is included in the /ID
  197 + // calculation. Use of a deterministic /ID can be handy when it is
  198 + // desirable for a repeat of the same qpdf operation on the same
  199 + // inputs being written to the same outputs with the same
  200 + // parameters to generate exactly the same results. This feature
  201 + // is incompatible with encrypted files because, for encrypted
  202 + // files, the /ID is generated before any part of the file is
  203 + // written since it is an input to the encryption process.
  204 + QPDF_DLL
  205 + void setDeterministicID(bool);
  206 +
192 207 // Cause a static /ID value to be generated. Use only in test
193   - // suites.
  208 + // suites. See also setDeterministicID.
194 209 QPDF_DLL
195 210 void setStaticID(bool);
196 211  
... ... @@ -298,6 +313,9 @@ class QPDFWriter
298 313 void writeObject(QPDFObjectHandle object, int object_stream_index = -1);
299 314 void writeTrailer(trailer_e which, int size,
300 315 bool xref_stream, qpdf_offset_t prev = 0);
  316 + void writeTrailer(trailer_e which, int size,
  317 + bool xref_stream, qpdf_offset_t prev,
  318 + int linearization_pass);
301 319 void unparseObject(QPDFObjectHandle object, int level,
302 320 unsigned int flags);
303 321 void unparseObject(QPDFObjectHandle object, int level,
... ... @@ -348,6 +366,15 @@ class QPDFWriter
348 366 int hint_id,
349 367 qpdf_offset_t hint_offset,
350 368 qpdf_offset_t hint_length);
  369 + qpdf_offset_t writeXRefTable(
  370 + trailer_e which, int first, int last, int size,
  371 + // for linearization
  372 + qpdf_offset_t prev,
  373 + bool suppress_offsets,
  374 + int hint_id,
  375 + qpdf_offset_t hint_offset,
  376 + qpdf_offset_t hint_length,
  377 + int linearization_pass);
351 378 qpdf_offset_t writeXRefStream(
352 379 int objid, int max_id, qpdf_offset_t max_offset,
353 380 trailer_e which, int first, int last, int size);
... ... @@ -360,6 +387,16 @@ class QPDFWriter
360 387 qpdf_offset_t hint_offset,
361 388 qpdf_offset_t hint_length,
362 389 bool skip_compression);
  390 + qpdf_offset_t writeXRefStream(
  391 + int objid, int max_id, qpdf_offset_t max_offset,
  392 + trailer_e which, int first, int last, int size,
  393 + // for linearization
  394 + qpdf_offset_t prev,
  395 + int hint_id,
  396 + qpdf_offset_t hint_offset,
  397 + qpdf_offset_t hint_length,
  398 + bool skip_compression,
  399 + int linearization_pass);
363 400 int calculateXrefStreamPadding(int xref_bytes);
364 401  
365 402 // When filtering subsections, push additional pipelines to the
... ... @@ -380,6 +417,8 @@ class QPDFWriter
380 417 void adjustAESStreamLength(size_t& length);
381 418 void pushEncryptionFilter();
382 419 void pushDiscardFilter();
  420 + void pushMD5Pipeline();
  421 + void computeDeterministicIDData();
383 422  
384 423 void discardGeneration(std::map<QPDFObjGen, int> const& in,
385 424 std::map<int, int>& out);
... ... @@ -437,6 +476,9 @@ class QPDFWriter
437 476 std::map<QPDFObjGen, int> object_to_object_stream;
438 477 std::map<int, std::set<QPDFObjGen> > object_stream_to_objects;
439 478 std::list<Pipeline*> pipeline_stack;
  479 + bool deterministic_id;
  480 + Pl_MD5* md5_pipeline;
  481 + std::string deterministic_id_data;
440 482  
441 483 // For linearization only
442 484 std::map<int, int> obj_renumber_no_gen;
... ...
include/qpdf/qpdf-c.h
... ... @@ -324,8 +324,11 @@ extern &quot;C&quot; {
324 324 QPDF_DLL
325 325 void qpdf_set_qdf_mode(qpdf_data qpdf, QPDF_BOOL value);
326 326  
  327 + QPDF_DLL
  328 + void qpdf_set_deterministic_ID(qpdf_data qpdf, QPDF_BOOL value);
  329 +
327 330 /* Never use qpdf_set_static_ID except in test suites to suppress
328   - * generation of a random /ID.
  331 + * generation of a random /ID. See also qpdf_set_deterministic_ID.
329 332 */
330 333 QPDF_DLL
331 334 void qpdf_set_static_ID(qpdf_data qpdf, QPDF_BOOL value);
... ...
libqpdf/Pl_MD5.cc
... ... @@ -3,7 +3,9 @@
3 3  
4 4 Pl_MD5::Pl_MD5(char const* identifier, Pipeline* next) :
5 5 Pipeline(identifier, next),
6   - in_progress(false)
  6 + in_progress(false),
  7 + enabled(true),
  8 + persist_across_finish(false)
7 9 {
8 10 }
9 11  
... ... @@ -14,24 +16,27 @@ Pl_MD5::~Pl_MD5()
14 16 void
15 17 Pl_MD5::write(unsigned char* buf, size_t len)
16 18 {
17   - if (! this->in_progress)
  19 + if (this->enabled)
18 20 {
19   - this->md5.reset();
20   - this->in_progress = true;
21   - }
  21 + if (! this->in_progress)
  22 + {
  23 + this->md5.reset();
  24 + this->in_progress = true;
  25 + }
22 26  
23   - // Write in chunks in case len is too big to fit in an int.
24   - // Assume int is at least 32 bits.
25   - static size_t const max_bytes = 1 << 30;
26   - size_t bytes_left = len;
27   - unsigned char* data = buf;
28   - while (bytes_left > 0)
29   - {
30   - size_t bytes = (bytes_left >= max_bytes ? max_bytes : bytes_left);
31   - this->md5.encodeDataIncrementally(
32   - reinterpret_cast<char*>(data), bytes);
33   - bytes_left -= bytes;
34   - data += bytes;
  27 + // Write in chunks in case len is too big to fit in an int.
  28 + // Assume int is at least 32 bits.
  29 + static size_t const max_bytes = 1 << 30;
  30 + size_t bytes_left = len;
  31 + unsigned char* data = buf;
  32 + while (bytes_left > 0)
  33 + {
  34 + size_t bytes = (bytes_left >= max_bytes ? max_bytes : bytes_left);
  35 + this->md5.encodeDataIncrementally(
  36 + reinterpret_cast<char*>(data), bytes);
  37 + bytes_left -= bytes;
  38 + data += bytes;
  39 + }
35 40 }
36 41  
37 42 this->getNext()->write(buf, len);
... ... @@ -41,16 +46,32 @@ void
41 46 Pl_MD5::finish()
42 47 {
43 48 this->getNext()->finish();
44   - this->in_progress = false;
  49 + if (! this->persist_across_finish)
  50 + {
  51 + this->in_progress = false;
  52 + }
  53 +}
  54 +
  55 +void
  56 +Pl_MD5::enable(bool enabled)
  57 +{
  58 + this->enabled = enabled;
  59 +}
  60 +
  61 +void
  62 +Pl_MD5::persistAcrossFinish(bool persist)
  63 +{
  64 + this->persist_across_finish = persist;
45 65 }
46 66  
47 67 std::string
48 68 Pl_MD5::getHexDigest()
49 69 {
50   - if (this->in_progress)
  70 + if (! this->enabled)
51 71 {
52 72 throw std::logic_error(
53   - "digest requested for in-progress MD5 Pipeline");
  73 + "digest requested for a disabled MD5 Pipeline");
54 74 }
  75 + this->in_progress = false;
55 76 return this->md5.unparse();
56 77 }
... ...
libqpdf/QPDFWriter.cc
... ... @@ -9,6 +9,7 @@
9 9 #include <qpdf/Pl_AES_PDF.hh>
10 10 #include <qpdf/Pl_Flate.hh>
11 11 #include <qpdf/Pl_PNGFilter.hh>
  12 +#include <qpdf/Pl_MD5.hh>
12 13 #include <qpdf/QUtil.hh>
13 14 #include <qpdf/MD5.hh>
14 15 #include <qpdf/RC4.hh>
... ... @@ -77,6 +78,8 @@ QPDFWriter::init()
77 78 cur_stream_length = 0;
78 79 added_newline = false;
79 80 max_ostream_index = 0;
  81 + deterministic_id = false;
  82 + md5_pipeline = 0;
80 83 }
81 84  
82 85 QPDFWriter::~QPDFWriter()
... ... @@ -264,6 +267,12 @@ QPDFWriter::setStaticID(bool val)
264 267 }
265 268  
266 269 void
  270 +QPDFWriter::setDeterministicID(bool val)
  271 +{
  272 + this->deterministic_id = val;
  273 +}
  274 +
  275 +void
267 276 QPDFWriter::setStaticAesIV(bool val)
268 277 {
269 278 if (val)
... ... @@ -507,10 +516,10 @@ void
507 516 QPDFWriter::copyEncryptionParameters(QPDF& qpdf)
508 517 {
509 518 this->preserve_encryption = false;
510   - generateID();
511 519 QPDFObjectHandle trailer = qpdf.getTrailer();
512 520 if (trailer.hasKey("/Encrypt"))
513 521 {
  522 + generateID();
514 523 this->id1 =
515 524 trailer.getKey("/ID").getArrayItem(0).getStringValue();
516 525 QPDFObjectHandle encrypt = trailer.getKey("/Encrypt");
... ... @@ -864,6 +873,10 @@ QPDFWriter::popPipelineStack(PointerHolder&lt;Buffer&gt;* bp)
864 873 while (dynamic_cast<Pl_Count*>(this->pipeline_stack.back()) == 0)
865 874 {
866 875 Pipeline* p = this->pipeline_stack.back();
  876 + if (dynamic_cast<Pl_MD5*>(p) == this->md5_pipeline)
  877 + {
  878 + this->md5_pipeline = 0;
  879 + }
867 880 this->pipeline_stack.pop_back();
868 881 Pl_Buffer* buf = dynamic_cast<Pl_Buffer*>(p);
869 882 if (bp && buf)
... ... @@ -921,6 +934,36 @@ QPDFWriter::pushDiscardFilter()
921 934 activatePipelineStack();
922 935 }
923 936  
  937 +void
  938 +QPDFWriter::pushMD5Pipeline()
  939 +{
  940 + if (! this->id2.empty())
  941 + {
  942 + // Can't happen in the code
  943 + throw std::logic_error(
  944 + "Deterministic ID computation enabled after ID"
  945 + " generation has already occurred.");
  946 + }
  947 + assert(this->deterministic_id);
  948 + assert(this->md5_pipeline == 0);
  949 + assert(this->pipeline->getCount() == 0);
  950 + this->md5_pipeline = new Pl_MD5("qpdf md5", this->pipeline);
  951 + this->md5_pipeline->persistAcrossFinish(true);
  952 + // Special case code in popPipelineStack clears this->md5_pipeline
  953 + // upon deletion.
  954 + pushPipeline(this->md5_pipeline);
  955 + activatePipelineStack();
  956 +}
  957 +
  958 +void
  959 +QPDFWriter::computeDeterministicIDData()
  960 +{
  961 + assert(this->md5_pipeline != 0);
  962 + assert(this->deterministic_id_data.empty());
  963 + this->deterministic_id_data = this->md5_pipeline->getHexDigest();
  964 + this->md5_pipeline->enable(false);
  965 +}
  966 +
924 967 int
925 968 QPDFWriter::openObject(int objid)
926 969 {
... ... @@ -1069,6 +1112,13 @@ void
1069 1112 QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream,
1070 1113 qpdf_offset_t prev)
1071 1114 {
  1115 + writeTrailer(which, size, xref_stream, prev, 0);
  1116 +}
  1117 +
  1118 +void
  1119 +QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream,
  1120 + qpdf_offset_t prev, int linearization_pass)
  1121 +{
1072 1122 QPDFObjectHandle trailer = getTrimmedTrailer();
1073 1123 if (! xref_stream)
1074 1124 {
... ... @@ -1119,8 +1169,21 @@ QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream,
1119 1169 // Write ID
1120 1170 writeStringQDF(" ");
1121 1171 writeString(" /ID [");
1122   - writeString(QPDF_String(this->id1).unparse(true));
1123   - writeString(QPDF_String(this->id2).unparse(true));
  1172 + if (linearization_pass == 1)
  1173 + {
  1174 + writeString("<00000000000000000000000000000000>"
  1175 + "<00000000000000000000000000000000>");
  1176 + }
  1177 + else
  1178 + {
  1179 + if ((linearization_pass == 0) && (this->deterministic_id))
  1180 + {
  1181 + computeDeterministicIDData();
  1182 + }
  1183 + generateID();
  1184 + writeString(QPDF_String(this->id1).unparse(true));
  1185 + writeString(QPDF_String(this->id2).unparse(true));
  1186 + }
1124 1187 writeString("]");
1125 1188  
1126 1189 if (which != t_lin_second)
... ... @@ -1794,12 +1857,8 @@ QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
1794 1857 void
1795 1858 QPDFWriter::generateID()
1796 1859 {
1797   - // Note: we can't call generateID() at the time of construction
1798   - // since the caller hasn't yet had a chance to call setStaticID(),
1799   - // but we need to generate it before computing encryption
1800   - // dictionary parameters. This is why we call this function both
1801   - // from setEncryptionParameters() and from write() and return
1802   - // immediately if the ID has already been generated.
  1860 + // Generate the ID lazily so that we can handle the user's
  1861 + // preference to use static or deterministic ID generation.
1803 1862  
1804 1863 if (! this->id2.empty())
1805 1864 {
... ... @@ -1822,17 +1881,40 @@ QPDFWriter::generateID()
1822 1881 }
1823 1882 else
1824 1883 {
1825   - // The PDF specification has guidelines for creating IDs, but it
1826   - // states clearly that the only thing that's really important is
1827   - // that it is very likely to be unique. We can't really follow
1828   - // the guidelines in the spec exactly because we haven't written
1829   - // the file yet. This scheme should be fine though.
  1884 + // The PDF specification has guidelines for creating IDs, but
  1885 + // it states clearly that the only thing that's really
  1886 + // important is that it is very likely to be unique. We can't
  1887 + // really follow the guidelines in the spec exactly because we
  1888 + // haven't written the file yet. This scheme should be fine
  1889 + // though. The deterministic ID case uses a digest of a
  1890 + // sufficient portion of the file's contents such no two
  1891 + // non-matching files would match in the subsets used for this
  1892 + // computation. Note that we explicitly omit the filename from
  1893 + // the digest calculation for deterministic ID so that the same
  1894 + // file converted with qpdf, in that case, would have the same
  1895 + // ID regardless of the output file's name.
1830 1896  
1831 1897 std::string seed;
1832   - seed += QUtil::int_to_string(QUtil::get_current_time());
  1898 + if (this->deterministic_id)
  1899 + {
  1900 + if (this->deterministic_id_data.empty())
  1901 + {
  1902 + QTC::TC("qpdf", "QPDFWriter deterministic with no data");
  1903 + throw std::logic_error(
  1904 + "INTERNAL ERROR: QPDFWriter::generateID has no"
  1905 + " data for deterministic ID. This may happen if"
  1906 + " deterministic ID and file encryption are requested"
  1907 + " together.");
  1908 + }
  1909 + seed += this->deterministic_id_data;
  1910 + }
  1911 + else
  1912 + {
  1913 + seed += QUtil::int_to_string(QUtil::get_current_time());
  1914 + seed += this->filename;
  1915 + seed += " ";
  1916 + }
1833 1917 seed += " QPDF ";
1834   - seed += this->filename;
1835   - seed += " ";
1836 1918 if (trailer.hasKey("/Info"))
1837 1919 {
1838 1920 QPDFObjectHandle info = trailer.getKey("/Info");
... ... @@ -2260,8 +2342,6 @@ QPDFWriter::write()
2260 2342 setMinimumPDFVersion("1.5");
2261 2343 }
2262 2344  
2263   - generateID();
2264   -
2265 2345 prepareFileForWrite();
2266 2346  
2267 2347 if (this->linearized)
... ... @@ -2397,6 +2477,17 @@ QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
2397 2477 int hint_id, qpdf_offset_t hint_offset,
2398 2478 qpdf_offset_t hint_length)
2399 2479 {
  2480 + // ABI compatibility
  2481 + return writeXRefTable(which, first, last, size, prev, suppress_offsets,
  2482 + hint_id, hint_offset, hint_length, 0);
  2483 +}
  2484 +
  2485 +qpdf_offset_t
  2486 +QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
  2487 + qpdf_offset_t prev, bool suppress_offsets,
  2488 + int hint_id, qpdf_offset_t hint_offset,
  2489 + qpdf_offset_t hint_length, int linearization_pass)
  2490 +{
2400 2491 writeString("xref\n");
2401 2492 writeString(QUtil::int_to_string(first));
2402 2493 writeString(" ");
... ... @@ -2426,7 +2517,7 @@ QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
2426 2517 writeString(" 00000 n \n");
2427 2518 }
2428 2519 }
2429   - writeTrailer(which, size, false, prev);
  2520 + writeTrailer(which, size, false, prev, linearization_pass);
2430 2521 writeString("\n");
2431 2522 return space_before_zero;
2432 2523 }
... ... @@ -2435,8 +2526,9 @@ qpdf_offset_t
2435 2526 QPDFWriter::writeXRefStream(int objid, int max_id, qpdf_offset_t max_offset,
2436 2527 trailer_e which, int first, int last, int size)
2437 2528 {
  2529 + // ABI compatibility
2438 2530 return writeXRefStream(objid, max_id, max_offset,
2439   - which, first, last, size, 0, 0, 0, 0, false);
  2531 + which, first, last, size, 0, 0, 0, 0, false, 0);
2440 2532 }
2441 2533  
2442 2534 qpdf_offset_t
... ... @@ -2445,7 +2537,8 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset,
2445 2537 qpdf_offset_t prev, int hint_id,
2446 2538 qpdf_offset_t hint_offset,
2447 2539 qpdf_offset_t hint_length,
2448   - bool skip_compression)
  2540 + bool skip_compression,
  2541 + int linearization_pass)
2449 2542 {
2450 2543 qpdf_offset_t xref_offset = this->pipeline->getCount();
2451 2544 qpdf_offset_t space_before_zero = xref_offset - 1;
... ... @@ -2545,7 +2638,7 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset,
2545 2638 QUtil::int_to_string(first) + " " +
2546 2639 QUtil::int_to_string(last - first + 1) + " ]");
2547 2640 }
2548   - writeTrailer(which, size, true, prev);
  2641 + writeTrailer(which, size, true, prev, linearization_pass);
2549 2642 writeString("\nstream\n");
2550 2643 writeBuffer(xref_data);
2551 2644 writeString("\nendstream");
... ... @@ -2725,6 +2818,10 @@ QPDFWriter::writeLinearized()
2725 2818 if (pass == 1)
2726 2819 {
2727 2820 pushDiscardFilter();
  2821 + if (this->deterministic_id)
  2822 + {
  2823 + pushMD5Pipeline();
  2824 + }
2728 2825 }
2729 2826  
2730 2827 // Part 1: header
... ... @@ -2807,7 +2904,7 @@ QPDFWriter::writeLinearized()
2807 2904 first_trailer_size,
2808 2905 hint_length + second_xref_offset,
2809 2906 hint_id, hint_offset, hint_length,
2810   - (pass == 1));
  2907 + (pass == 1), pass);
2811 2908 qpdf_offset_t endpos = this->pipeline->getCount();
2812 2909 if (pass == 1)
2813 2910 {
... ... @@ -2834,7 +2931,8 @@ QPDFWriter::writeLinearized()
2834 2931 {
2835 2932 writeXRefTable(t_lin_first, first_half_start, first_half_end,
2836 2933 first_trailer_size, hint_length + second_xref_offset,
2837   - (pass == 1), hint_id, hint_offset, hint_length);
  2934 + (pass == 1), hint_id, hint_offset, hint_length,
  2935 + pass);
2838 2936 writeString("startxref\n0\n%%EOF\n");
2839 2937 }
2840 2938  
... ... @@ -2886,7 +2984,7 @@ QPDFWriter::writeLinearized()
2886 2984 second_half_end, second_xref_offset,
2887 2985 t_lin_second, 0, second_half_end,
2888 2986 second_trailer_size,
2889   - 0, 0, 0, 0, (pass == 1));
  2987 + 0, 0, 0, 0, (pass == 1), pass);
2890 2988 qpdf_offset_t endpos = this->pipeline->getCount();
2891 2989  
2892 2990 if (pass == 1)
... ... @@ -2920,7 +3018,7 @@ QPDFWriter::writeLinearized()
2920 3018 {
2921 3019 space_before_zero =
2922 3020 writeXRefTable(t_lin_second, 0, second_half_end,
2923   - second_trailer_size);
  3021 + second_trailer_size, 0, false, 0, 0, 0, pass);
2924 3022 }
2925 3023 writeString("startxref\n");
2926 3024 writeString(QUtil::int_to_string(first_xref_offset));
... ... @@ -2930,6 +3028,15 @@ QPDFWriter::writeLinearized()
2930 3028  
2931 3029 if (pass == 1)
2932 3030 {
  3031 + if (this->deterministic_id)
  3032 + {
  3033 + QTC::TC("qpdf", "QPDFWriter linearized deterministic ID",
  3034 + need_xref_stream ? 0 : 1);
  3035 + computeDeterministicIDData();
  3036 + popPipelineStack();
  3037 + assert(this->md5_pipeline == 0);
  3038 + }
  3039 +
2933 3040 // Close first pass pipeline
2934 3041 file_size = this->pipeline->getCount();
2935 3042 popPipelineStack();
... ... @@ -2954,6 +3061,11 @@ QPDFWriter::writeLinearized()
2954 3061 void
2955 3062 QPDFWriter::writeStandard()
2956 3063 {
  3064 + if (this->deterministic_id)
  3065 + {
  3066 + pushMD5Pipeline();
  3067 + }
  3068 +
2957 3069 // Start writing
2958 3070  
2959 3071 writeHeader();
... ... @@ -3005,4 +3117,12 @@ QPDFWriter::writeStandard()
3005 3117 writeString("startxref\n");
3006 3118 writeString(QUtil::int_to_string(xref_offset));
3007 3119 writeString("\n%%EOF\n");
  3120 +
  3121 + if (this->deterministic_id)
  3122 + {
  3123 + QTC::TC("qpdf", "QPDFWriter standard deterministic ID",
  3124 + this->object_stream_to_objects.empty() ? 0 : 1);
  3125 + popPipelineStack();
  3126 + assert(this->md5_pipeline == 0);
  3127 + }
3008 3128 }
... ...
libqpdf/qpdf-c.cc
... ... @@ -512,6 +512,12 @@ void qpdf_set_qdf_mode(qpdf_data qpdf, QPDF_BOOL value)
512 512 qpdf->qpdf_writer->setQDFMode(value);
513 513 }
514 514  
  515 +void qpdf_set_deterministic_ID(qpdf_data qpdf, QPDF_BOOL value)
  516 +{
  517 + QTC::TC("qpdf", "qpdf-c called qpdf_set_deterministic_ID");
  518 + qpdf->qpdf_writer->setDeterministicID(value);
  519 +}
  520 +
515 521 void qpdf_set_static_ID(qpdf_data qpdf, QPDF_BOOL value)
516 522 {
517 523 QTC::TC("qpdf", "qpdf-c called qpdf_set_static_ID");
... ...
libqpdf/qpdf/Pl_MD5.hh
... ... @@ -25,10 +25,24 @@ class Pl_MD5: public Pipeline
25 25 virtual void finish();
26 26 QPDF_DLL
27 27 std::string getHexDigest();
  28 + // Enable/disable. Disabling the pipeline causes it to become a
  29 + // pass-through. This makes it possible to stick an MD5 pipeline
  30 + // in a pipeline when it may or may not be required. Disabling it
  31 + // avoids incurring the runtime overhead of doing needless
  32 + // digest computation.
  33 + QPDF_DLL
  34 + void enable(bool enabled);
  35 + // If persistAcrossFinish is called, calls to finish do not
  36 + // finalize the underlying md5 object. In this case, the object is
  37 + // not finalized until getHexDigest() is called.
  38 + QPDF_DLL
  39 + void persistAcrossFinish(bool);
28 40  
29 41 private:
30 42 bool in_progress;
31 43 MD5 md5;
  44 + bool enabled;
  45 + bool persist_across_finish;
32 46 };
33 47  
34 48 #endif // __PL_MD5_HH__
... ...
libtests/md5.cc
... ... @@ -45,6 +45,13 @@ int main(int, char*[])
45 45  
46 46 Pl_Discard d;
47 47 Pl_MD5 p("MD5", &d);
  48 + // Create a second pipeline, protect against finish, and call
  49 + // getHexDigest only once at the end of both passes. Make sure the
  50 + // checksum is that of the input file concatenated to itself. This
  51 + // will require changes to Pl_MD5.cc to prevent finish from
  52 + // calling finalize.
  53 + Pl_MD5 p2("MD5", &d);
  54 + p2.persistAcrossFinish(true);
48 55 for (int i = 0; i < 2; ++i)
49 56 {
50 57 FILE* f = QUtil::safe_fopen("md5.in", "rb");
... ... @@ -61,12 +68,23 @@ int main(int, char*[])
61 68 else
62 69 {
63 70 p.write(buf, len);
  71 + p2.write(buf, len);
  72 + if (i == 1)
  73 + {
  74 + // Partial digest -- resets after each call to write
  75 + std::cout << p.getHexDigest() << std::endl;
  76 + }
64 77 }
65 78 }
66 79 fclose(f);
67 80 p.finish();
  81 + p2.finish();
  82 + // Make sure calling getHexDigest twice with no intervening
  83 + // writes results in the same result each time.
  84 + std::cout << p.getHexDigest() << std::endl;
68 85 std::cout << p.getHexDigest() << std::endl;
69 86 }
  87 + std::cout << p2.getHexDigest() << std::endl;
70 88  
71 89 return 0;
72 90 }
... ...
libtests/qtest/md5/md5.out
... ... @@ -14,3 +14,11 @@ d174ab98d277d9f5a5611c2c9f419d9f
14 14 0
15 15 5f4b4321873433daae578f85c72f9e74
16 16 5f4b4321873433daae578f85c72f9e74
  17 +41f977636f79cf1bad1b439caa7d627c
  18 +c30e03b5536e37306df25489622e13e3
  19 +9dabbd135cc47bb603a94989df37c926
  20 +ce80591b269b749f65c53b71d0be5212
  21 +db5448be0a1e931cbd84654e82063483
  22 +db5448be0a1e931cbd84654e82063483
  23 +db5448be0a1e931cbd84654e82063483
  24 +9833b12b21147bebb2f33d35807049af
... ...
manual/qpdf-manual.xml
... ... @@ -991,11 +991,30 @@ outfile.pdf&lt;/option&gt;
991 991 file should be given. The following options are available:
992 992 <variablelist>
993 993 <varlistentry>
  994 + <term><option>--deterministic-id</option></term>
  995 + <listitem>
  996 + <para>
  997 + Causes generation of a deterministic value for /ID. This
  998 + prevents use of timestamp and output file name information in
  999 + the /ID generation. Instead, at some slight additional runtime
  1000 + cost, the /ID field is generated to include a digest of the
  1001 + significant parts of the content of the output PDF file. This
  1002 + means that a given qpdf operation should generate the same /ID
  1003 + each time it is run, which can be useful when caching results
  1004 + or for generation of some test data. Use of this flag is not
  1005 + compatible with creation of encrypted files.
  1006 + </para>
  1007 + </listitem>
  1008 + </varlistentry>
  1009 + <varlistentry>
994 1010 <term><option>--static-id</option></term>
995 1011 <listitem>
996 1012 <para>
997   - Causes generation of a fixed value for /ID. This is intended
998   - for testing only. Never use it for production files.
  1013 + Causes generation of a fixed value for /ID. This is intended
  1014 + for testing only. Never use it for production files. If you
  1015 + are trying to get the same /ID each time for a given file and
  1016 + you are not generating encrypted files, consider using the
  1017 + <option>--deterministic-id</option> option.
999 1018 </para>
1000 1019 </listitem>
1001 1020 </varlistentry>
... ...
qpdf/qpdf-ctest.c
... ... @@ -427,6 +427,18 @@ static void test18(char const* infile,
427 427 report_errors();
428 428 }
429 429  
  430 +static void test19(char const* infile,
  431 + char const* password,
  432 + char const* outfile,
  433 + char const* outfile2)
  434 +{
  435 + qpdf_read(qpdf, infile, password);
  436 + qpdf_init_write(qpdf, outfile);
  437 + qpdf_set_deterministic_ID(qpdf, QPDF_TRUE);
  438 + qpdf_write(qpdf);
  439 + report_errors();
  440 +}
  441 +
430 442 int main(int argc, char* argv[])
431 443 {
432 444 char* p = 0;
... ... @@ -485,6 +497,7 @@ int main(int argc, char* argv[])
485 497 (n == 16) ? test16 :
486 498 (n == 17) ? test17 :
487 499 (n == 18) ? test18 :
  500 + (n == 19) ? test19 :
488 501 0);
489 502  
490 503 if (fn == 0)
... ...
qpdf/qpdf.cc
... ... @@ -237,6 +237,7 @@ Testing, Inspection, and Debugging Options\n\
237 237 These options can be useful for digging into PDF files or for use in\n\
238 238 automated test suites for software that uses the qpdf library.\n\
239 239 \n\
  240 +--deterministic-id generate deterministic /ID\n\
240 241 --static-id generate static /ID: FOR TESTING ONLY!\n\
241 242 --static-aes-iv use a static initialization vector for AES-CBC\n\
242 243 This is option is not secure! FOR TESTING ONLY!\n\
... ... @@ -1031,6 +1032,7 @@ int main(int argc, char* argv[])
1031 1032 std::string force_version;
1032 1033  
1033 1034 bool show_npages = false;
  1035 + bool deterministic_id = false;
1034 1036 bool static_id = false;
1035 1037 bool static_aes_iv = false;
1036 1038 bool suppress_original_object_id = false;
... ... @@ -1229,6 +1231,10 @@ int main(int argc, char* argv[])
1229 1231 }
1230 1232 force_version = parameter;
1231 1233 }
  1234 + else if (strcmp(arg, "deterministic-id") == 0)
  1235 + {
  1236 + deterministic_id = true;
  1237 + }
1232 1238 else if (strcmp(arg, "static-id") == 0)
1233 1239 {
1234 1240 static_id = true;
... ... @@ -1710,6 +1716,10 @@ int main(int argc, char* argv[])
1710 1716 {
1711 1717 w.setPreserveEncryption(false);
1712 1718 }
  1719 + if (deterministic_id)
  1720 + {
  1721 + w.setDeterministicID(true);
  1722 + }
1713 1723 if (static_id)
1714 1724 {
1715 1725 w.setStaticID(true);
... ...
qpdf/qpdf.testcov
... ... @@ -269,3 +269,7 @@ qpdf pages range omitted at end 0
269 269 qpdf pages range omitted in middle 0
270 270 qpdf npages 0
271 271 QPDF already reserved object 0
  272 +QPDFWriter standard deterministic ID 1
  273 +QPDFWriter linearized deterministic ID 1
  274 +QPDFWriter deterministic with no data 0
  275 +qpdf-c called qpdf_set_deterministic_ID 0
... ...
qpdf/qtest/qpdf.test
... ... @@ -990,6 +990,43 @@ $td-&gt;runtest(&quot;write damaged&quot;,
990 990  
991 991 show_ntests();
992 992 # ----------
  993 +$td->notify("--- Deterministic ID Tests ---");
  994 +$n_tests += 11;
  995 +foreach my $d ('nn', 'ny', 'yn', 'yy')
  996 +{
  997 + my $linearize = ($d =~ m/^y/);
  998 + my $ostream = ($d =~ m/y$/);
  999 + $td->runtest("deterministic ID: linearize/ostream=$d",
  1000 + {$td->COMMAND =>
  1001 + "qpdf -deterministic-id" .
  1002 + ($linearize ? " -linearize" : "") .
  1003 + " -object-streams=" . ($ostream ? "generate" : "disable") .
  1004 + " deterministic-id-in.pdf a.pdf"},
  1005 + {$td->STRING => "",
  1006 + $td->EXIT_STATUS => 0});
  1007 + $td->runtest("compare files",
  1008 + {$td->FILE => "a.pdf"},
  1009 + {$td->FILE => "deterministic-id-$d.pdf"});
  1010 +}
  1011 +
  1012 +$td->runtest("deterministic ID with encryption",
  1013 + {$td->COMMAND => "qpdf -deterministic-id encrypted-with-images.pdf a.pdf"},
  1014 + {$td->STRING => "INTERNAL ERROR: QPDFWriter::generateID" .
  1015 + " has no data for deterministic ID." .
  1016 + " This may happen if deterministic ID and" .
  1017 + " file encryption are requested together.\n",
  1018 + $td->EXIT_STATUS => 2},
  1019 + $td->NORMALIZE_NEWLINES);
  1020 +$td->runtest("deterministic ID (C API)",
  1021 + {$td->COMMAND =>
  1022 + "qpdf-ctest 19 deterministic-id-in.pdf '' a.pdf"},
  1023 + {$td->STRING => "",
  1024 + $td->EXIT_STATUS => 0});
  1025 +$td->runtest("compare files",
  1026 + {$td->FILE => "a.pdf"},
  1027 + {$td->FILE => "deterministic-id-nn.pdf"});
  1028 +
  1029 +# ----------
993 1030 $td->notify("--- Object Stream Tests ---");
994 1031 $n_tests += (36 * 4) + (12 * 2);
995 1032 $n_compare_pdfs += 36;
... ...
qpdf/qtest/qpdf/deterministic-id-in.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/deterministic-id-nn.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/deterministic-id-ny.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/deterministic-id-yn.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/deterministic-id-yy.pdf 0 → 100644
No preview for this file type