Commit b8bdef0ad12883d72ced5eb443e6e34a93bbbb91
1 parent
607c3921
Implement deterministic ID
For non-encrypted files, determinstic ID generation uses file contents instead of timestamp and file name. At a small runtime cost, this enables generation of the same /ID if the same inputs are converted in the same way multiple times.
Showing
20 changed files
with
2232 additions
and
51 deletions
ChangeLog
| 1 | +2015-10-29 Jay Berkenbilt <ejb@ql.org> | ||
| 2 | + | ||
| 3 | + * Implement QPDFWriter::setDeterministicID and --deterministic-id | ||
| 4 | + commandline-flag to qpdf to request generation of a deterministic | ||
| 5 | + /ID for non-encrypted files. | ||
| 6 | + | ||
| 1 | 2015-05-24 Jay Berkenbilt <ejb@ql.org> | 7 | 2015-05-24 Jay Berkenbilt <ejb@ql.org> |
| 2 | 8 | ||
| 3 | * 5.1.3: release | 9 | * 5.1.3: release |
TODO
| @@ -46,6 +46,14 @@ Small, command-line tool only enhancements to do soon | @@ -46,6 +46,14 @@ Small, command-line tool only enhancements to do soon | ||
| 46 | (libtool). | 46 | (libtool). |
| 47 | 47 | ||
| 48 | 48 | ||
| 49 | +Next ABI change | ||
| 50 | +=============== | ||
| 51 | + | ||
| 52 | +Remove private methods that are there only for ABI compatibility | ||
| 53 | +including extra QPDFWriter writeTrailer, writeXRefTable, | ||
| 54 | +writeXRefStream. | ||
| 55 | + | ||
| 56 | + | ||
| 49 | 5.2.0 | 57 | 5.2.0 |
| 50 | ===== | 58 | ===== |
| 51 | 59 |
include/qpdf/QPDFWriter.hh
| @@ -35,6 +35,7 @@ | @@ -35,6 +35,7 @@ | ||
| 35 | class QPDF; | 35 | class QPDF; |
| 36 | class QPDFObjectHandle; | 36 | class QPDFObjectHandle; |
| 37 | class Pl_Count; | 37 | class Pl_Count; |
| 38 | +class Pl_MD5; | ||
| 38 | 39 | ||
| 39 | class QPDFWriter | 40 | class QPDFWriter |
| 40 | { | 41 | { |
| @@ -189,8 +190,22 @@ class QPDFWriter | @@ -189,8 +190,22 @@ class QPDFWriter | ||
| 189 | QPDF_DLL | 190 | QPDF_DLL |
| 190 | void setExtraHeaderText(std::string const&); | 191 | void setExtraHeaderText(std::string const&); |
| 191 | 192 | ||
| 193 | + // Causes a deterministic /ID value to be generated. When this is | ||
| 194 | + // set, the current time and output file name are not used as part | ||
| 195 | + // of /ID generation. Instead, a digest of all significant parts | ||
| 196 | + // of the output file's contents is included in the /ID | ||
| 197 | + // calculation. Use of a deterministic /ID can be handy when it is | ||
| 198 | + // desirable for a repeat of the same qpdf operation on the same | ||
| 199 | + // inputs being written to the same outputs with the same | ||
| 200 | + // parameters to generate exactly the same results. This feature | ||
| 201 | + // is incompatible with encrypted files because, for encrypted | ||
| 202 | + // files, the /ID is generated before any part of the file is | ||
| 203 | + // written since it is an input to the encryption process. | ||
| 204 | + QPDF_DLL | ||
| 205 | + void setDeterministicID(bool); | ||
| 206 | + | ||
| 192 | // Cause a static /ID value to be generated. Use only in test | 207 | // Cause a static /ID value to be generated. Use only in test |
| 193 | - // suites. | 208 | + // suites. See also setDeterministicID. |
| 194 | QPDF_DLL | 209 | QPDF_DLL |
| 195 | void setStaticID(bool); | 210 | void setStaticID(bool); |
| 196 | 211 | ||
| @@ -298,6 +313,9 @@ class QPDFWriter | @@ -298,6 +313,9 @@ class QPDFWriter | ||
| 298 | void writeObject(QPDFObjectHandle object, int object_stream_index = -1); | 313 | void writeObject(QPDFObjectHandle object, int object_stream_index = -1); |
| 299 | void writeTrailer(trailer_e which, int size, | 314 | void writeTrailer(trailer_e which, int size, |
| 300 | bool xref_stream, qpdf_offset_t prev = 0); | 315 | bool xref_stream, qpdf_offset_t prev = 0); |
| 316 | + void writeTrailer(trailer_e which, int size, | ||
| 317 | + bool xref_stream, qpdf_offset_t prev, | ||
| 318 | + int linearization_pass); | ||
| 301 | void unparseObject(QPDFObjectHandle object, int level, | 319 | void unparseObject(QPDFObjectHandle object, int level, |
| 302 | unsigned int flags); | 320 | unsigned int flags); |
| 303 | void unparseObject(QPDFObjectHandle object, int level, | 321 | void unparseObject(QPDFObjectHandle object, int level, |
| @@ -348,6 +366,15 @@ class QPDFWriter | @@ -348,6 +366,15 @@ class QPDFWriter | ||
| 348 | int hint_id, | 366 | int hint_id, |
| 349 | qpdf_offset_t hint_offset, | 367 | qpdf_offset_t hint_offset, |
| 350 | qpdf_offset_t hint_length); | 368 | qpdf_offset_t hint_length); |
| 369 | + qpdf_offset_t writeXRefTable( | ||
| 370 | + trailer_e which, int first, int last, int size, | ||
| 371 | + // for linearization | ||
| 372 | + qpdf_offset_t prev, | ||
| 373 | + bool suppress_offsets, | ||
| 374 | + int hint_id, | ||
| 375 | + qpdf_offset_t hint_offset, | ||
| 376 | + qpdf_offset_t hint_length, | ||
| 377 | + int linearization_pass); | ||
| 351 | qpdf_offset_t writeXRefStream( | 378 | qpdf_offset_t writeXRefStream( |
| 352 | int objid, int max_id, qpdf_offset_t max_offset, | 379 | int objid, int max_id, qpdf_offset_t max_offset, |
| 353 | trailer_e which, int first, int last, int size); | 380 | trailer_e which, int first, int last, int size); |
| @@ -360,6 +387,16 @@ class QPDFWriter | @@ -360,6 +387,16 @@ class QPDFWriter | ||
| 360 | qpdf_offset_t hint_offset, | 387 | qpdf_offset_t hint_offset, |
| 361 | qpdf_offset_t hint_length, | 388 | qpdf_offset_t hint_length, |
| 362 | bool skip_compression); | 389 | bool skip_compression); |
| 390 | + qpdf_offset_t writeXRefStream( | ||
| 391 | + int objid, int max_id, qpdf_offset_t max_offset, | ||
| 392 | + trailer_e which, int first, int last, int size, | ||
| 393 | + // for linearization | ||
| 394 | + qpdf_offset_t prev, | ||
| 395 | + int hint_id, | ||
| 396 | + qpdf_offset_t hint_offset, | ||
| 397 | + qpdf_offset_t hint_length, | ||
| 398 | + bool skip_compression, | ||
| 399 | + int linearization_pass); | ||
| 363 | int calculateXrefStreamPadding(int xref_bytes); | 400 | int calculateXrefStreamPadding(int xref_bytes); |
| 364 | 401 | ||
| 365 | // When filtering subsections, push additional pipelines to the | 402 | // When filtering subsections, push additional pipelines to the |
| @@ -380,6 +417,8 @@ class QPDFWriter | @@ -380,6 +417,8 @@ class QPDFWriter | ||
| 380 | void adjustAESStreamLength(size_t& length); | 417 | void adjustAESStreamLength(size_t& length); |
| 381 | void pushEncryptionFilter(); | 418 | void pushEncryptionFilter(); |
| 382 | void pushDiscardFilter(); | 419 | void pushDiscardFilter(); |
| 420 | + void pushMD5Pipeline(); | ||
| 421 | + void computeDeterministicIDData(); | ||
| 383 | 422 | ||
| 384 | void discardGeneration(std::map<QPDFObjGen, int> const& in, | 423 | void discardGeneration(std::map<QPDFObjGen, int> const& in, |
| 385 | std::map<int, int>& out); | 424 | std::map<int, int>& out); |
| @@ -437,6 +476,9 @@ class QPDFWriter | @@ -437,6 +476,9 @@ class QPDFWriter | ||
| 437 | std::map<QPDFObjGen, int> object_to_object_stream; | 476 | std::map<QPDFObjGen, int> object_to_object_stream; |
| 438 | std::map<int, std::set<QPDFObjGen> > object_stream_to_objects; | 477 | std::map<int, std::set<QPDFObjGen> > object_stream_to_objects; |
| 439 | std::list<Pipeline*> pipeline_stack; | 478 | std::list<Pipeline*> pipeline_stack; |
| 479 | + bool deterministic_id; | ||
| 480 | + Pl_MD5* md5_pipeline; | ||
| 481 | + std::string deterministic_id_data; | ||
| 440 | 482 | ||
| 441 | // For linearization only | 483 | // For linearization only |
| 442 | std::map<int, int> obj_renumber_no_gen; | 484 | std::map<int, int> obj_renumber_no_gen; |
include/qpdf/qpdf-c.h
| @@ -324,8 +324,11 @@ extern "C" { | @@ -324,8 +324,11 @@ extern "C" { | ||
| 324 | QPDF_DLL | 324 | QPDF_DLL |
| 325 | void qpdf_set_qdf_mode(qpdf_data qpdf, QPDF_BOOL value); | 325 | void qpdf_set_qdf_mode(qpdf_data qpdf, QPDF_BOOL value); |
| 326 | 326 | ||
| 327 | + QPDF_DLL | ||
| 328 | + void qpdf_set_deterministic_ID(qpdf_data qpdf, QPDF_BOOL value); | ||
| 329 | + | ||
| 327 | /* Never use qpdf_set_static_ID except in test suites to suppress | 330 | /* Never use qpdf_set_static_ID except in test suites to suppress |
| 328 | - * generation of a random /ID. | 331 | + * generation of a random /ID. See also qpdf_set_deterministic_ID. |
| 329 | */ | 332 | */ |
| 330 | QPDF_DLL | 333 | QPDF_DLL |
| 331 | void qpdf_set_static_ID(qpdf_data qpdf, QPDF_BOOL value); | 334 | void qpdf_set_static_ID(qpdf_data qpdf, QPDF_BOOL value); |
libqpdf/Pl_MD5.cc
| @@ -3,7 +3,9 @@ | @@ -3,7 +3,9 @@ | ||
| 3 | 3 | ||
| 4 | Pl_MD5::Pl_MD5(char const* identifier, Pipeline* next) : | 4 | Pl_MD5::Pl_MD5(char const* identifier, Pipeline* next) : |
| 5 | Pipeline(identifier, next), | 5 | Pipeline(identifier, next), |
| 6 | - in_progress(false) | 6 | + in_progress(false), |
| 7 | + enabled(true), | ||
| 8 | + persist_across_finish(false) | ||
| 7 | { | 9 | { |
| 8 | } | 10 | } |
| 9 | 11 | ||
| @@ -14,24 +16,27 @@ Pl_MD5::~Pl_MD5() | @@ -14,24 +16,27 @@ Pl_MD5::~Pl_MD5() | ||
| 14 | void | 16 | void |
| 15 | Pl_MD5::write(unsigned char* buf, size_t len) | 17 | Pl_MD5::write(unsigned char* buf, size_t len) |
| 16 | { | 18 | { |
| 17 | - if (! this->in_progress) | 19 | + if (this->enabled) |
| 18 | { | 20 | { |
| 19 | - this->md5.reset(); | ||
| 20 | - this->in_progress = true; | ||
| 21 | - } | 21 | + if (! this->in_progress) |
| 22 | + { | ||
| 23 | + this->md5.reset(); | ||
| 24 | + this->in_progress = true; | ||
| 25 | + } | ||
| 22 | 26 | ||
| 23 | - // Write in chunks in case len is too big to fit in an int. | ||
| 24 | - // Assume int is at least 32 bits. | ||
| 25 | - static size_t const max_bytes = 1 << 30; | ||
| 26 | - size_t bytes_left = len; | ||
| 27 | - unsigned char* data = buf; | ||
| 28 | - while (bytes_left > 0) | ||
| 29 | - { | ||
| 30 | - size_t bytes = (bytes_left >= max_bytes ? max_bytes : bytes_left); | ||
| 31 | - this->md5.encodeDataIncrementally( | ||
| 32 | - reinterpret_cast<char*>(data), bytes); | ||
| 33 | - bytes_left -= bytes; | ||
| 34 | - data += bytes; | 27 | + // Write in chunks in case len is too big to fit in an int. |
| 28 | + // Assume int is at least 32 bits. | ||
| 29 | + static size_t const max_bytes = 1 << 30; | ||
| 30 | + size_t bytes_left = len; | ||
| 31 | + unsigned char* data = buf; | ||
| 32 | + while (bytes_left > 0) | ||
| 33 | + { | ||
| 34 | + size_t bytes = (bytes_left >= max_bytes ? max_bytes : bytes_left); | ||
| 35 | + this->md5.encodeDataIncrementally( | ||
| 36 | + reinterpret_cast<char*>(data), bytes); | ||
| 37 | + bytes_left -= bytes; | ||
| 38 | + data += bytes; | ||
| 39 | + } | ||
| 35 | } | 40 | } |
| 36 | 41 | ||
| 37 | this->getNext()->write(buf, len); | 42 | this->getNext()->write(buf, len); |
| @@ -41,16 +46,32 @@ void | @@ -41,16 +46,32 @@ void | ||
| 41 | Pl_MD5::finish() | 46 | Pl_MD5::finish() |
| 42 | { | 47 | { |
| 43 | this->getNext()->finish(); | 48 | this->getNext()->finish(); |
| 44 | - this->in_progress = false; | 49 | + if (! this->persist_across_finish) |
| 50 | + { | ||
| 51 | + this->in_progress = false; | ||
| 52 | + } | ||
| 53 | +} | ||
| 54 | + | ||
| 55 | +void | ||
| 56 | +Pl_MD5::enable(bool enabled) | ||
| 57 | +{ | ||
| 58 | + this->enabled = enabled; | ||
| 59 | +} | ||
| 60 | + | ||
| 61 | +void | ||
| 62 | +Pl_MD5::persistAcrossFinish(bool persist) | ||
| 63 | +{ | ||
| 64 | + this->persist_across_finish = persist; | ||
| 45 | } | 65 | } |
| 46 | 66 | ||
| 47 | std::string | 67 | std::string |
| 48 | Pl_MD5::getHexDigest() | 68 | Pl_MD5::getHexDigest() |
| 49 | { | 69 | { |
| 50 | - if (this->in_progress) | 70 | + if (! this->enabled) |
| 51 | { | 71 | { |
| 52 | throw std::logic_error( | 72 | throw std::logic_error( |
| 53 | - "digest requested for in-progress MD5 Pipeline"); | 73 | + "digest requested for a disabled MD5 Pipeline"); |
| 54 | } | 74 | } |
| 75 | + this->in_progress = false; | ||
| 55 | return this->md5.unparse(); | 76 | return this->md5.unparse(); |
| 56 | } | 77 | } |
libqpdf/QPDFWriter.cc
| @@ -9,6 +9,7 @@ | @@ -9,6 +9,7 @@ | ||
| 9 | #include <qpdf/Pl_AES_PDF.hh> | 9 | #include <qpdf/Pl_AES_PDF.hh> |
| 10 | #include <qpdf/Pl_Flate.hh> | 10 | #include <qpdf/Pl_Flate.hh> |
| 11 | #include <qpdf/Pl_PNGFilter.hh> | 11 | #include <qpdf/Pl_PNGFilter.hh> |
| 12 | +#include <qpdf/Pl_MD5.hh> | ||
| 12 | #include <qpdf/QUtil.hh> | 13 | #include <qpdf/QUtil.hh> |
| 13 | #include <qpdf/MD5.hh> | 14 | #include <qpdf/MD5.hh> |
| 14 | #include <qpdf/RC4.hh> | 15 | #include <qpdf/RC4.hh> |
| @@ -77,6 +78,8 @@ QPDFWriter::init() | @@ -77,6 +78,8 @@ QPDFWriter::init() | ||
| 77 | cur_stream_length = 0; | 78 | cur_stream_length = 0; |
| 78 | added_newline = false; | 79 | added_newline = false; |
| 79 | max_ostream_index = 0; | 80 | max_ostream_index = 0; |
| 81 | + deterministic_id = false; | ||
| 82 | + md5_pipeline = 0; | ||
| 80 | } | 83 | } |
| 81 | 84 | ||
| 82 | QPDFWriter::~QPDFWriter() | 85 | QPDFWriter::~QPDFWriter() |
| @@ -264,6 +267,12 @@ QPDFWriter::setStaticID(bool val) | @@ -264,6 +267,12 @@ QPDFWriter::setStaticID(bool val) | ||
| 264 | } | 267 | } |
| 265 | 268 | ||
| 266 | void | 269 | void |
| 270 | +QPDFWriter::setDeterministicID(bool val) | ||
| 271 | +{ | ||
| 272 | + this->deterministic_id = val; | ||
| 273 | +} | ||
| 274 | + | ||
| 275 | +void | ||
| 267 | QPDFWriter::setStaticAesIV(bool val) | 276 | QPDFWriter::setStaticAesIV(bool val) |
| 268 | { | 277 | { |
| 269 | if (val) | 278 | if (val) |
| @@ -507,10 +516,10 @@ void | @@ -507,10 +516,10 @@ void | ||
| 507 | QPDFWriter::copyEncryptionParameters(QPDF& qpdf) | 516 | QPDFWriter::copyEncryptionParameters(QPDF& qpdf) |
| 508 | { | 517 | { |
| 509 | this->preserve_encryption = false; | 518 | this->preserve_encryption = false; |
| 510 | - generateID(); | ||
| 511 | QPDFObjectHandle trailer = qpdf.getTrailer(); | 519 | QPDFObjectHandle trailer = qpdf.getTrailer(); |
| 512 | if (trailer.hasKey("/Encrypt")) | 520 | if (trailer.hasKey("/Encrypt")) |
| 513 | { | 521 | { |
| 522 | + generateID(); | ||
| 514 | this->id1 = | 523 | this->id1 = |
| 515 | trailer.getKey("/ID").getArrayItem(0).getStringValue(); | 524 | trailer.getKey("/ID").getArrayItem(0).getStringValue(); |
| 516 | QPDFObjectHandle encrypt = trailer.getKey("/Encrypt"); | 525 | QPDFObjectHandle encrypt = trailer.getKey("/Encrypt"); |
| @@ -864,6 +873,10 @@ QPDFWriter::popPipelineStack(PointerHolder<Buffer>* bp) | @@ -864,6 +873,10 @@ QPDFWriter::popPipelineStack(PointerHolder<Buffer>* bp) | ||
| 864 | while (dynamic_cast<Pl_Count*>(this->pipeline_stack.back()) == 0) | 873 | while (dynamic_cast<Pl_Count*>(this->pipeline_stack.back()) == 0) |
| 865 | { | 874 | { |
| 866 | Pipeline* p = this->pipeline_stack.back(); | 875 | Pipeline* p = this->pipeline_stack.back(); |
| 876 | + if (dynamic_cast<Pl_MD5*>(p) == this->md5_pipeline) | ||
| 877 | + { | ||
| 878 | + this->md5_pipeline = 0; | ||
| 879 | + } | ||
| 867 | this->pipeline_stack.pop_back(); | 880 | this->pipeline_stack.pop_back(); |
| 868 | Pl_Buffer* buf = dynamic_cast<Pl_Buffer*>(p); | 881 | Pl_Buffer* buf = dynamic_cast<Pl_Buffer*>(p); |
| 869 | if (bp && buf) | 882 | if (bp && buf) |
| @@ -921,6 +934,36 @@ QPDFWriter::pushDiscardFilter() | @@ -921,6 +934,36 @@ QPDFWriter::pushDiscardFilter() | ||
| 921 | activatePipelineStack(); | 934 | activatePipelineStack(); |
| 922 | } | 935 | } |
| 923 | 936 | ||
| 937 | +void | ||
| 938 | +QPDFWriter::pushMD5Pipeline() | ||
| 939 | +{ | ||
| 940 | + if (! this->id2.empty()) | ||
| 941 | + { | ||
| 942 | + // Can't happen in the code | ||
| 943 | + throw std::logic_error( | ||
| 944 | + "Deterministic ID computation enabled after ID" | ||
| 945 | + " generation has already occurred."); | ||
| 946 | + } | ||
| 947 | + assert(this->deterministic_id); | ||
| 948 | + assert(this->md5_pipeline == 0); | ||
| 949 | + assert(this->pipeline->getCount() == 0); | ||
| 950 | + this->md5_pipeline = new Pl_MD5("qpdf md5", this->pipeline); | ||
| 951 | + this->md5_pipeline->persistAcrossFinish(true); | ||
| 952 | + // Special case code in popPipelineStack clears this->md5_pipeline | ||
| 953 | + // upon deletion. | ||
| 954 | + pushPipeline(this->md5_pipeline); | ||
| 955 | + activatePipelineStack(); | ||
| 956 | +} | ||
| 957 | + | ||
| 958 | +void | ||
| 959 | +QPDFWriter::computeDeterministicIDData() | ||
| 960 | +{ | ||
| 961 | + assert(this->md5_pipeline != 0); | ||
| 962 | + assert(this->deterministic_id_data.empty()); | ||
| 963 | + this->deterministic_id_data = this->md5_pipeline->getHexDigest(); | ||
| 964 | + this->md5_pipeline->enable(false); | ||
| 965 | +} | ||
| 966 | + | ||
| 924 | int | 967 | int |
| 925 | QPDFWriter::openObject(int objid) | 968 | QPDFWriter::openObject(int objid) |
| 926 | { | 969 | { |
| @@ -1069,6 +1112,13 @@ void | @@ -1069,6 +1112,13 @@ void | ||
| 1069 | QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream, | 1112 | QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream, |
| 1070 | qpdf_offset_t prev) | 1113 | qpdf_offset_t prev) |
| 1071 | { | 1114 | { |
| 1115 | + writeTrailer(which, size, xref_stream, prev, 0); | ||
| 1116 | +} | ||
| 1117 | + | ||
| 1118 | +void | ||
| 1119 | +QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream, | ||
| 1120 | + qpdf_offset_t prev, int linearization_pass) | ||
| 1121 | +{ | ||
| 1072 | QPDFObjectHandle trailer = getTrimmedTrailer(); | 1122 | QPDFObjectHandle trailer = getTrimmedTrailer(); |
| 1073 | if (! xref_stream) | 1123 | if (! xref_stream) |
| 1074 | { | 1124 | { |
| @@ -1119,8 +1169,21 @@ QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream, | @@ -1119,8 +1169,21 @@ QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream, | ||
| 1119 | // Write ID | 1169 | // Write ID |
| 1120 | writeStringQDF(" "); | 1170 | writeStringQDF(" "); |
| 1121 | writeString(" /ID ["); | 1171 | writeString(" /ID ["); |
| 1122 | - writeString(QPDF_String(this->id1).unparse(true)); | ||
| 1123 | - writeString(QPDF_String(this->id2).unparse(true)); | 1172 | + if (linearization_pass == 1) |
| 1173 | + { | ||
| 1174 | + writeString("<00000000000000000000000000000000>" | ||
| 1175 | + "<00000000000000000000000000000000>"); | ||
| 1176 | + } | ||
| 1177 | + else | ||
| 1178 | + { | ||
| 1179 | + if ((linearization_pass == 0) && (this->deterministic_id)) | ||
| 1180 | + { | ||
| 1181 | + computeDeterministicIDData(); | ||
| 1182 | + } | ||
| 1183 | + generateID(); | ||
| 1184 | + writeString(QPDF_String(this->id1).unparse(true)); | ||
| 1185 | + writeString(QPDF_String(this->id2).unparse(true)); | ||
| 1186 | + } | ||
| 1124 | writeString("]"); | 1187 | writeString("]"); |
| 1125 | 1188 | ||
| 1126 | if (which != t_lin_second) | 1189 | if (which != t_lin_second) |
| @@ -1794,12 +1857,8 @@ QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index) | @@ -1794,12 +1857,8 @@ QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index) | ||
| 1794 | void | 1857 | void |
| 1795 | QPDFWriter::generateID() | 1858 | QPDFWriter::generateID() |
| 1796 | { | 1859 | { |
| 1797 | - // Note: we can't call generateID() at the time of construction | ||
| 1798 | - // since the caller hasn't yet had a chance to call setStaticID(), | ||
| 1799 | - // but we need to generate it before computing encryption | ||
| 1800 | - // dictionary parameters. This is why we call this function both | ||
| 1801 | - // from setEncryptionParameters() and from write() and return | ||
| 1802 | - // immediately if the ID has already been generated. | 1860 | + // Generate the ID lazily so that we can handle the user's |
| 1861 | + // preference to use static or deterministic ID generation. | ||
| 1803 | 1862 | ||
| 1804 | if (! this->id2.empty()) | 1863 | if (! this->id2.empty()) |
| 1805 | { | 1864 | { |
| @@ -1822,17 +1881,40 @@ QPDFWriter::generateID() | @@ -1822,17 +1881,40 @@ QPDFWriter::generateID() | ||
| 1822 | } | 1881 | } |
| 1823 | else | 1882 | else |
| 1824 | { | 1883 | { |
| 1825 | - // The PDF specification has guidelines for creating IDs, but it | ||
| 1826 | - // states clearly that the only thing that's really important is | ||
| 1827 | - // that it is very likely to be unique. We can't really follow | ||
| 1828 | - // the guidelines in the spec exactly because we haven't written | ||
| 1829 | - // the file yet. This scheme should be fine though. | 1884 | + // The PDF specification has guidelines for creating IDs, but |
| 1885 | + // it states clearly that the only thing that's really | ||
| 1886 | + // important is that it is very likely to be unique. We can't | ||
| 1887 | + // really follow the guidelines in the spec exactly because we | ||
| 1888 | + // haven't written the file yet. This scheme should be fine | ||
| 1889 | + // though. The deterministic ID case uses a digest of a | ||
| 1890 | + // sufficient portion of the file's contents such no two | ||
| 1891 | + // non-matching files would match in the subsets used for this | ||
| 1892 | + // computation. Note that we explicitly omit the filename from | ||
| 1893 | + // the digest calculation for deterministic ID so that the same | ||
| 1894 | + // file converted with qpdf, in that case, would have the same | ||
| 1895 | + // ID regardless of the output file's name. | ||
| 1830 | 1896 | ||
| 1831 | std::string seed; | 1897 | std::string seed; |
| 1832 | - seed += QUtil::int_to_string(QUtil::get_current_time()); | 1898 | + if (this->deterministic_id) |
| 1899 | + { | ||
| 1900 | + if (this->deterministic_id_data.empty()) | ||
| 1901 | + { | ||
| 1902 | + QTC::TC("qpdf", "QPDFWriter deterministic with no data"); | ||
| 1903 | + throw std::logic_error( | ||
| 1904 | + "INTERNAL ERROR: QPDFWriter::generateID has no" | ||
| 1905 | + " data for deterministic ID. This may happen if" | ||
| 1906 | + " deterministic ID and file encryption are requested" | ||
| 1907 | + " together."); | ||
| 1908 | + } | ||
| 1909 | + seed += this->deterministic_id_data; | ||
| 1910 | + } | ||
| 1911 | + else | ||
| 1912 | + { | ||
| 1913 | + seed += QUtil::int_to_string(QUtil::get_current_time()); | ||
| 1914 | + seed += this->filename; | ||
| 1915 | + seed += " "; | ||
| 1916 | + } | ||
| 1833 | seed += " QPDF "; | 1917 | seed += " QPDF "; |
| 1834 | - seed += this->filename; | ||
| 1835 | - seed += " "; | ||
| 1836 | if (trailer.hasKey("/Info")) | 1918 | if (trailer.hasKey("/Info")) |
| 1837 | { | 1919 | { |
| 1838 | QPDFObjectHandle info = trailer.getKey("/Info"); | 1920 | QPDFObjectHandle info = trailer.getKey("/Info"); |
| @@ -2260,8 +2342,6 @@ QPDFWriter::write() | @@ -2260,8 +2342,6 @@ QPDFWriter::write() | ||
| 2260 | setMinimumPDFVersion("1.5"); | 2342 | setMinimumPDFVersion("1.5"); |
| 2261 | } | 2343 | } |
| 2262 | 2344 | ||
| 2263 | - generateID(); | ||
| 2264 | - | ||
| 2265 | prepareFileForWrite(); | 2345 | prepareFileForWrite(); |
| 2266 | 2346 | ||
| 2267 | if (this->linearized) | 2347 | if (this->linearized) |
| @@ -2397,6 +2477,17 @@ QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size, | @@ -2397,6 +2477,17 @@ QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size, | ||
| 2397 | int hint_id, qpdf_offset_t hint_offset, | 2477 | int hint_id, qpdf_offset_t hint_offset, |
| 2398 | qpdf_offset_t hint_length) | 2478 | qpdf_offset_t hint_length) |
| 2399 | { | 2479 | { |
| 2480 | + // ABI compatibility | ||
| 2481 | + return writeXRefTable(which, first, last, size, prev, suppress_offsets, | ||
| 2482 | + hint_id, hint_offset, hint_length, 0); | ||
| 2483 | +} | ||
| 2484 | + | ||
| 2485 | +qpdf_offset_t | ||
| 2486 | +QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size, | ||
| 2487 | + qpdf_offset_t prev, bool suppress_offsets, | ||
| 2488 | + int hint_id, qpdf_offset_t hint_offset, | ||
| 2489 | + qpdf_offset_t hint_length, int linearization_pass) | ||
| 2490 | +{ | ||
| 2400 | writeString("xref\n"); | 2491 | writeString("xref\n"); |
| 2401 | writeString(QUtil::int_to_string(first)); | 2492 | writeString(QUtil::int_to_string(first)); |
| 2402 | writeString(" "); | 2493 | writeString(" "); |
| @@ -2426,7 +2517,7 @@ QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size, | @@ -2426,7 +2517,7 @@ QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size, | ||
| 2426 | writeString(" 00000 n \n"); | 2517 | writeString(" 00000 n \n"); |
| 2427 | } | 2518 | } |
| 2428 | } | 2519 | } |
| 2429 | - writeTrailer(which, size, false, prev); | 2520 | + writeTrailer(which, size, false, prev, linearization_pass); |
| 2430 | writeString("\n"); | 2521 | writeString("\n"); |
| 2431 | return space_before_zero; | 2522 | return space_before_zero; |
| 2432 | } | 2523 | } |
| @@ -2435,8 +2526,9 @@ qpdf_offset_t | @@ -2435,8 +2526,9 @@ qpdf_offset_t | ||
| 2435 | QPDFWriter::writeXRefStream(int objid, int max_id, qpdf_offset_t max_offset, | 2526 | QPDFWriter::writeXRefStream(int objid, int max_id, qpdf_offset_t max_offset, |
| 2436 | trailer_e which, int first, int last, int size) | 2527 | trailer_e which, int first, int last, int size) |
| 2437 | { | 2528 | { |
| 2529 | + // ABI compatibility | ||
| 2438 | return writeXRefStream(objid, max_id, max_offset, | 2530 | return writeXRefStream(objid, max_id, max_offset, |
| 2439 | - which, first, last, size, 0, 0, 0, 0, false); | 2531 | + which, first, last, size, 0, 0, 0, 0, false, 0); |
| 2440 | } | 2532 | } |
| 2441 | 2533 | ||
| 2442 | qpdf_offset_t | 2534 | qpdf_offset_t |
| @@ -2445,7 +2537,8 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset, | @@ -2445,7 +2537,8 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset, | ||
| 2445 | qpdf_offset_t prev, int hint_id, | 2537 | qpdf_offset_t prev, int hint_id, |
| 2446 | qpdf_offset_t hint_offset, | 2538 | qpdf_offset_t hint_offset, |
| 2447 | qpdf_offset_t hint_length, | 2539 | qpdf_offset_t hint_length, |
| 2448 | - bool skip_compression) | 2540 | + bool skip_compression, |
| 2541 | + int linearization_pass) | ||
| 2449 | { | 2542 | { |
| 2450 | qpdf_offset_t xref_offset = this->pipeline->getCount(); | 2543 | qpdf_offset_t xref_offset = this->pipeline->getCount(); |
| 2451 | qpdf_offset_t space_before_zero = xref_offset - 1; | 2544 | qpdf_offset_t space_before_zero = xref_offset - 1; |
| @@ -2545,7 +2638,7 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset, | @@ -2545,7 +2638,7 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset, | ||
| 2545 | QUtil::int_to_string(first) + " " + | 2638 | QUtil::int_to_string(first) + " " + |
| 2546 | QUtil::int_to_string(last - first + 1) + " ]"); | 2639 | QUtil::int_to_string(last - first + 1) + " ]"); |
| 2547 | } | 2640 | } |
| 2548 | - writeTrailer(which, size, true, prev); | 2641 | + writeTrailer(which, size, true, prev, linearization_pass); |
| 2549 | writeString("\nstream\n"); | 2642 | writeString("\nstream\n"); |
| 2550 | writeBuffer(xref_data); | 2643 | writeBuffer(xref_data); |
| 2551 | writeString("\nendstream"); | 2644 | writeString("\nendstream"); |
| @@ -2725,6 +2818,10 @@ QPDFWriter::writeLinearized() | @@ -2725,6 +2818,10 @@ QPDFWriter::writeLinearized() | ||
| 2725 | if (pass == 1) | 2818 | if (pass == 1) |
| 2726 | { | 2819 | { |
| 2727 | pushDiscardFilter(); | 2820 | pushDiscardFilter(); |
| 2821 | + if (this->deterministic_id) | ||
| 2822 | + { | ||
| 2823 | + pushMD5Pipeline(); | ||
| 2824 | + } | ||
| 2728 | } | 2825 | } |
| 2729 | 2826 | ||
| 2730 | // Part 1: header | 2827 | // Part 1: header |
| @@ -2807,7 +2904,7 @@ QPDFWriter::writeLinearized() | @@ -2807,7 +2904,7 @@ QPDFWriter::writeLinearized() | ||
| 2807 | first_trailer_size, | 2904 | first_trailer_size, |
| 2808 | hint_length + second_xref_offset, | 2905 | hint_length + second_xref_offset, |
| 2809 | hint_id, hint_offset, hint_length, | 2906 | hint_id, hint_offset, hint_length, |
| 2810 | - (pass == 1)); | 2907 | + (pass == 1), pass); |
| 2811 | qpdf_offset_t endpos = this->pipeline->getCount(); | 2908 | qpdf_offset_t endpos = this->pipeline->getCount(); |
| 2812 | if (pass == 1) | 2909 | if (pass == 1) |
| 2813 | { | 2910 | { |
| @@ -2834,7 +2931,8 @@ QPDFWriter::writeLinearized() | @@ -2834,7 +2931,8 @@ QPDFWriter::writeLinearized() | ||
| 2834 | { | 2931 | { |
| 2835 | writeXRefTable(t_lin_first, first_half_start, first_half_end, | 2932 | writeXRefTable(t_lin_first, first_half_start, first_half_end, |
| 2836 | first_trailer_size, hint_length + second_xref_offset, | 2933 | first_trailer_size, hint_length + second_xref_offset, |
| 2837 | - (pass == 1), hint_id, hint_offset, hint_length); | 2934 | + (pass == 1), hint_id, hint_offset, hint_length, |
| 2935 | + pass); | ||
| 2838 | writeString("startxref\n0\n%%EOF\n"); | 2936 | writeString("startxref\n0\n%%EOF\n"); |
| 2839 | } | 2937 | } |
| 2840 | 2938 | ||
| @@ -2886,7 +2984,7 @@ QPDFWriter::writeLinearized() | @@ -2886,7 +2984,7 @@ QPDFWriter::writeLinearized() | ||
| 2886 | second_half_end, second_xref_offset, | 2984 | second_half_end, second_xref_offset, |
| 2887 | t_lin_second, 0, second_half_end, | 2985 | t_lin_second, 0, second_half_end, |
| 2888 | second_trailer_size, | 2986 | second_trailer_size, |
| 2889 | - 0, 0, 0, 0, (pass == 1)); | 2987 | + 0, 0, 0, 0, (pass == 1), pass); |
| 2890 | qpdf_offset_t endpos = this->pipeline->getCount(); | 2988 | qpdf_offset_t endpos = this->pipeline->getCount(); |
| 2891 | 2989 | ||
| 2892 | if (pass == 1) | 2990 | if (pass == 1) |
| @@ -2920,7 +3018,7 @@ QPDFWriter::writeLinearized() | @@ -2920,7 +3018,7 @@ QPDFWriter::writeLinearized() | ||
| 2920 | { | 3018 | { |
| 2921 | space_before_zero = | 3019 | space_before_zero = |
| 2922 | writeXRefTable(t_lin_second, 0, second_half_end, | 3020 | writeXRefTable(t_lin_second, 0, second_half_end, |
| 2923 | - second_trailer_size); | 3021 | + second_trailer_size, 0, false, 0, 0, 0, pass); |
| 2924 | } | 3022 | } |
| 2925 | writeString("startxref\n"); | 3023 | writeString("startxref\n"); |
| 2926 | writeString(QUtil::int_to_string(first_xref_offset)); | 3024 | writeString(QUtil::int_to_string(first_xref_offset)); |
| @@ -2930,6 +3028,15 @@ QPDFWriter::writeLinearized() | @@ -2930,6 +3028,15 @@ QPDFWriter::writeLinearized() | ||
| 2930 | 3028 | ||
| 2931 | if (pass == 1) | 3029 | if (pass == 1) |
| 2932 | { | 3030 | { |
| 3031 | + if (this->deterministic_id) | ||
| 3032 | + { | ||
| 3033 | + QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", | ||
| 3034 | + need_xref_stream ? 0 : 1); | ||
| 3035 | + computeDeterministicIDData(); | ||
| 3036 | + popPipelineStack(); | ||
| 3037 | + assert(this->md5_pipeline == 0); | ||
| 3038 | + } | ||
| 3039 | + | ||
| 2933 | // Close first pass pipeline | 3040 | // Close first pass pipeline |
| 2934 | file_size = this->pipeline->getCount(); | 3041 | file_size = this->pipeline->getCount(); |
| 2935 | popPipelineStack(); | 3042 | popPipelineStack(); |
| @@ -2954,6 +3061,11 @@ QPDFWriter::writeLinearized() | @@ -2954,6 +3061,11 @@ QPDFWriter::writeLinearized() | ||
| 2954 | void | 3061 | void |
| 2955 | QPDFWriter::writeStandard() | 3062 | QPDFWriter::writeStandard() |
| 2956 | { | 3063 | { |
| 3064 | + if (this->deterministic_id) | ||
| 3065 | + { | ||
| 3066 | + pushMD5Pipeline(); | ||
| 3067 | + } | ||
| 3068 | + | ||
| 2957 | // Start writing | 3069 | // Start writing |
| 2958 | 3070 | ||
| 2959 | writeHeader(); | 3071 | writeHeader(); |
| @@ -3005,4 +3117,12 @@ QPDFWriter::writeStandard() | @@ -3005,4 +3117,12 @@ QPDFWriter::writeStandard() | ||
| 3005 | writeString("startxref\n"); | 3117 | writeString("startxref\n"); |
| 3006 | writeString(QUtil::int_to_string(xref_offset)); | 3118 | writeString(QUtil::int_to_string(xref_offset)); |
| 3007 | writeString("\n%%EOF\n"); | 3119 | writeString("\n%%EOF\n"); |
| 3120 | + | ||
| 3121 | + if (this->deterministic_id) | ||
| 3122 | + { | ||
| 3123 | + QTC::TC("qpdf", "QPDFWriter standard deterministic ID", | ||
| 3124 | + this->object_stream_to_objects.empty() ? 0 : 1); | ||
| 3125 | + popPipelineStack(); | ||
| 3126 | + assert(this->md5_pipeline == 0); | ||
| 3127 | + } | ||
| 3008 | } | 3128 | } |
libqpdf/qpdf-c.cc
| @@ -512,6 +512,12 @@ void qpdf_set_qdf_mode(qpdf_data qpdf, QPDF_BOOL value) | @@ -512,6 +512,12 @@ void qpdf_set_qdf_mode(qpdf_data qpdf, QPDF_BOOL value) | ||
| 512 | qpdf->qpdf_writer->setQDFMode(value); | 512 | qpdf->qpdf_writer->setQDFMode(value); |
| 513 | } | 513 | } |
| 514 | 514 | ||
| 515 | +void qpdf_set_deterministic_ID(qpdf_data qpdf, QPDF_BOOL value) | ||
| 516 | +{ | ||
| 517 | + QTC::TC("qpdf", "qpdf-c called qpdf_set_deterministic_ID"); | ||
| 518 | + qpdf->qpdf_writer->setDeterministicID(value); | ||
| 519 | +} | ||
| 520 | + | ||
| 515 | void qpdf_set_static_ID(qpdf_data qpdf, QPDF_BOOL value) | 521 | void qpdf_set_static_ID(qpdf_data qpdf, QPDF_BOOL value) |
| 516 | { | 522 | { |
| 517 | QTC::TC("qpdf", "qpdf-c called qpdf_set_static_ID"); | 523 | QTC::TC("qpdf", "qpdf-c called qpdf_set_static_ID"); |
libqpdf/qpdf/Pl_MD5.hh
| @@ -25,10 +25,24 @@ class Pl_MD5: public Pipeline | @@ -25,10 +25,24 @@ class Pl_MD5: public Pipeline | ||
| 25 | virtual void finish(); | 25 | virtual void finish(); |
| 26 | QPDF_DLL | 26 | QPDF_DLL |
| 27 | std::string getHexDigest(); | 27 | std::string getHexDigest(); |
| 28 | + // Enable/disable. Disabling the pipeline causes it to become a | ||
| 29 | + // pass-through. This makes it possible to stick an MD5 pipeline | ||
| 30 | + // in a pipeline when it may or may not be required. Disabling it | ||
| 31 | + // avoids incurring the runtime overhead of doing needless | ||
| 32 | + // digest computation. | ||
| 33 | + QPDF_DLL | ||
| 34 | + void enable(bool enabled); | ||
| 35 | + // If persistAcrossFinish is called, calls to finish do not | ||
| 36 | + // finalize the underlying md5 object. In this case, the object is | ||
| 37 | + // not finalized until getHexDigest() is called. | ||
| 38 | + QPDF_DLL | ||
| 39 | + void persistAcrossFinish(bool); | ||
| 28 | 40 | ||
| 29 | private: | 41 | private: |
| 30 | bool in_progress; | 42 | bool in_progress; |
| 31 | MD5 md5; | 43 | MD5 md5; |
| 44 | + bool enabled; | ||
| 45 | + bool persist_across_finish; | ||
| 32 | }; | 46 | }; |
| 33 | 47 | ||
| 34 | #endif // __PL_MD5_HH__ | 48 | #endif // __PL_MD5_HH__ |
libtests/md5.cc
| @@ -45,6 +45,13 @@ int main(int, char*[]) | @@ -45,6 +45,13 @@ int main(int, char*[]) | ||
| 45 | 45 | ||
| 46 | Pl_Discard d; | 46 | Pl_Discard d; |
| 47 | Pl_MD5 p("MD5", &d); | 47 | Pl_MD5 p("MD5", &d); |
| 48 | + // Create a second pipeline, protect against finish, and call | ||
| 49 | + // getHexDigest only once at the end of both passes. Make sure the | ||
| 50 | + // checksum is that of the input file concatenated to itself. This | ||
| 51 | + // will require changes to Pl_MD5.cc to prevent finish from | ||
| 52 | + // calling finalize. | ||
| 53 | + Pl_MD5 p2("MD5", &d); | ||
| 54 | + p2.persistAcrossFinish(true); | ||
| 48 | for (int i = 0; i < 2; ++i) | 55 | for (int i = 0; i < 2; ++i) |
| 49 | { | 56 | { |
| 50 | FILE* f = QUtil::safe_fopen("md5.in", "rb"); | 57 | FILE* f = QUtil::safe_fopen("md5.in", "rb"); |
| @@ -61,12 +68,23 @@ int main(int, char*[]) | @@ -61,12 +68,23 @@ int main(int, char*[]) | ||
| 61 | else | 68 | else |
| 62 | { | 69 | { |
| 63 | p.write(buf, len); | 70 | p.write(buf, len); |
| 71 | + p2.write(buf, len); | ||
| 72 | + if (i == 1) | ||
| 73 | + { | ||
| 74 | + // Partial digest -- resets after each call to write | ||
| 75 | + std::cout << p.getHexDigest() << std::endl; | ||
| 76 | + } | ||
| 64 | } | 77 | } |
| 65 | } | 78 | } |
| 66 | fclose(f); | 79 | fclose(f); |
| 67 | p.finish(); | 80 | p.finish(); |
| 81 | + p2.finish(); | ||
| 82 | + // Make sure calling getHexDigest twice with no intervening | ||
| 83 | + // writes results in the same result each time. | ||
| 84 | + std::cout << p.getHexDigest() << std::endl; | ||
| 68 | std::cout << p.getHexDigest() << std::endl; | 85 | std::cout << p.getHexDigest() << std::endl; |
| 69 | } | 86 | } |
| 87 | + std::cout << p2.getHexDigest() << std::endl; | ||
| 70 | 88 | ||
| 71 | return 0; | 89 | return 0; |
| 72 | } | 90 | } |
libtests/qtest/md5/md5.out
| @@ -14,3 +14,11 @@ d174ab98d277d9f5a5611c2c9f419d9f | @@ -14,3 +14,11 @@ d174ab98d277d9f5a5611c2c9f419d9f | ||
| 14 | 0 | 14 | 0 |
| 15 | 5f4b4321873433daae578f85c72f9e74 | 15 | 5f4b4321873433daae578f85c72f9e74 |
| 16 | 5f4b4321873433daae578f85c72f9e74 | 16 | 5f4b4321873433daae578f85c72f9e74 |
| 17 | +41f977636f79cf1bad1b439caa7d627c | ||
| 18 | +c30e03b5536e37306df25489622e13e3 | ||
| 19 | +9dabbd135cc47bb603a94989df37c926 | ||
| 20 | +ce80591b269b749f65c53b71d0be5212 | ||
| 21 | +db5448be0a1e931cbd84654e82063483 | ||
| 22 | +db5448be0a1e931cbd84654e82063483 | ||
| 23 | +db5448be0a1e931cbd84654e82063483 | ||
| 24 | +9833b12b21147bebb2f33d35807049af |
manual/qpdf-manual.xml
| @@ -991,11 +991,30 @@ outfile.pdf</option> | @@ -991,11 +991,30 @@ outfile.pdf</option> | ||
| 991 | file should be given. The following options are available: | 991 | file should be given. The following options are available: |
| 992 | <variablelist> | 992 | <variablelist> |
| 993 | <varlistentry> | 993 | <varlistentry> |
| 994 | + <term><option>--deterministic-id</option></term> | ||
| 995 | + <listitem> | ||
| 996 | + <para> | ||
| 997 | + Causes generation of a deterministic value for /ID. This | ||
| 998 | + prevents use of timestamp and output file name information in | ||
| 999 | + the /ID generation. Instead, at some slight additional runtime | ||
| 1000 | + cost, the /ID field is generated to include a digest of the | ||
| 1001 | + significant parts of the content of the output PDF file. This | ||
| 1002 | + means that a given qpdf operation should generate the same /ID | ||
| 1003 | + each time it is run, which can be useful when caching results | ||
| 1004 | + or for generation of some test data. Use of this flag is not | ||
| 1005 | + compatible with creation of encrypted files. | ||
| 1006 | + </para> | ||
| 1007 | + </listitem> | ||
| 1008 | + </varlistentry> | ||
| 1009 | + <varlistentry> | ||
| 994 | <term><option>--static-id</option></term> | 1010 | <term><option>--static-id</option></term> |
| 995 | <listitem> | 1011 | <listitem> |
| 996 | <para> | 1012 | <para> |
| 997 | - Causes generation of a fixed value for /ID. This is intended | ||
| 998 | - for testing only. Never use it for production files. | 1013 | + Causes generation of a fixed value for /ID. This is intended |
| 1014 | + for testing only. Never use it for production files. If you | ||
| 1015 | + are trying to get the same /ID each time for a given file and | ||
| 1016 | + you are not generating encrypted files, consider using the | ||
| 1017 | + <option>--deterministic-id</option> option. | ||
| 999 | </para> | 1018 | </para> |
| 1000 | </listitem> | 1019 | </listitem> |
| 1001 | </varlistentry> | 1020 | </varlistentry> |
qpdf/qpdf-ctest.c
| @@ -427,6 +427,18 @@ static void test18(char const* infile, | @@ -427,6 +427,18 @@ static void test18(char const* infile, | ||
| 427 | report_errors(); | 427 | report_errors(); |
| 428 | } | 428 | } |
| 429 | 429 | ||
| 430 | +static void test19(char const* infile, | ||
| 431 | + char const* password, | ||
| 432 | + char const* outfile, | ||
| 433 | + char const* outfile2) | ||
| 434 | +{ | ||
| 435 | + qpdf_read(qpdf, infile, password); | ||
| 436 | + qpdf_init_write(qpdf, outfile); | ||
| 437 | + qpdf_set_deterministic_ID(qpdf, QPDF_TRUE); | ||
| 438 | + qpdf_write(qpdf); | ||
| 439 | + report_errors(); | ||
| 440 | +} | ||
| 441 | + | ||
| 430 | int main(int argc, char* argv[]) | 442 | int main(int argc, char* argv[]) |
| 431 | { | 443 | { |
| 432 | char* p = 0; | 444 | char* p = 0; |
| @@ -485,6 +497,7 @@ int main(int argc, char* argv[]) | @@ -485,6 +497,7 @@ int main(int argc, char* argv[]) | ||
| 485 | (n == 16) ? test16 : | 497 | (n == 16) ? test16 : |
| 486 | (n == 17) ? test17 : | 498 | (n == 17) ? test17 : |
| 487 | (n == 18) ? test18 : | 499 | (n == 18) ? test18 : |
| 500 | + (n == 19) ? test19 : | ||
| 488 | 0); | 501 | 0); |
| 489 | 502 | ||
| 490 | if (fn == 0) | 503 | if (fn == 0) |
qpdf/qpdf.cc
| @@ -237,6 +237,7 @@ Testing, Inspection, and Debugging Options\n\ | @@ -237,6 +237,7 @@ Testing, Inspection, and Debugging Options\n\ | ||
| 237 | These options can be useful for digging into PDF files or for use in\n\ | 237 | These options can be useful for digging into PDF files or for use in\n\ |
| 238 | automated test suites for software that uses the qpdf library.\n\ | 238 | automated test suites for software that uses the qpdf library.\n\ |
| 239 | \n\ | 239 | \n\ |
| 240 | +--deterministic-id generate deterministic /ID\n\ | ||
| 240 | --static-id generate static /ID: FOR TESTING ONLY!\n\ | 241 | --static-id generate static /ID: FOR TESTING ONLY!\n\ |
| 241 | --static-aes-iv use a static initialization vector for AES-CBC\n\ | 242 | --static-aes-iv use a static initialization vector for AES-CBC\n\ |
| 242 | This is option is not secure! FOR TESTING ONLY!\n\ | 243 | This is option is not secure! FOR TESTING ONLY!\n\ |
| @@ -1031,6 +1032,7 @@ int main(int argc, char* argv[]) | @@ -1031,6 +1032,7 @@ int main(int argc, char* argv[]) | ||
| 1031 | std::string force_version; | 1032 | std::string force_version; |
| 1032 | 1033 | ||
| 1033 | bool show_npages = false; | 1034 | bool show_npages = false; |
| 1035 | + bool deterministic_id = false; | ||
| 1034 | bool static_id = false; | 1036 | bool static_id = false; |
| 1035 | bool static_aes_iv = false; | 1037 | bool static_aes_iv = false; |
| 1036 | bool suppress_original_object_id = false; | 1038 | bool suppress_original_object_id = false; |
| @@ -1229,6 +1231,10 @@ int main(int argc, char* argv[]) | @@ -1229,6 +1231,10 @@ int main(int argc, char* argv[]) | ||
| 1229 | } | 1231 | } |
| 1230 | force_version = parameter; | 1232 | force_version = parameter; |
| 1231 | } | 1233 | } |
| 1234 | + else if (strcmp(arg, "deterministic-id") == 0) | ||
| 1235 | + { | ||
| 1236 | + deterministic_id = true; | ||
| 1237 | + } | ||
| 1232 | else if (strcmp(arg, "static-id") == 0) | 1238 | else if (strcmp(arg, "static-id") == 0) |
| 1233 | { | 1239 | { |
| 1234 | static_id = true; | 1240 | static_id = true; |
| @@ -1710,6 +1716,10 @@ int main(int argc, char* argv[]) | @@ -1710,6 +1716,10 @@ int main(int argc, char* argv[]) | ||
| 1710 | { | 1716 | { |
| 1711 | w.setPreserveEncryption(false); | 1717 | w.setPreserveEncryption(false); |
| 1712 | } | 1718 | } |
| 1719 | + if (deterministic_id) | ||
| 1720 | + { | ||
| 1721 | + w.setDeterministicID(true); | ||
| 1722 | + } | ||
| 1713 | if (static_id) | 1723 | if (static_id) |
| 1714 | { | 1724 | { |
| 1715 | w.setStaticID(true); | 1725 | w.setStaticID(true); |
qpdf/qpdf.testcov
| @@ -269,3 +269,7 @@ qpdf pages range omitted at end 0 | @@ -269,3 +269,7 @@ qpdf pages range omitted at end 0 | ||
| 269 | qpdf pages range omitted in middle 0 | 269 | qpdf pages range omitted in middle 0 |
| 270 | qpdf npages 0 | 270 | qpdf npages 0 |
| 271 | QPDF already reserved object 0 | 271 | QPDF already reserved object 0 |
| 272 | +QPDFWriter standard deterministic ID 1 | ||
| 273 | +QPDFWriter linearized deterministic ID 1 | ||
| 274 | +QPDFWriter deterministic with no data 0 | ||
| 275 | +qpdf-c called qpdf_set_deterministic_ID 0 |
qpdf/qtest/qpdf.test
| @@ -990,6 +990,43 @@ $td->runtest("write damaged", | @@ -990,6 +990,43 @@ $td->runtest("write damaged", | ||
| 990 | 990 | ||
| 991 | show_ntests(); | 991 | show_ntests(); |
| 992 | # ---------- | 992 | # ---------- |
| 993 | +$td->notify("--- Deterministic ID Tests ---"); | ||
| 994 | +$n_tests += 11; | ||
| 995 | +foreach my $d ('nn', 'ny', 'yn', 'yy') | ||
| 996 | +{ | ||
| 997 | + my $linearize = ($d =~ m/^y/); | ||
| 998 | + my $ostream = ($d =~ m/y$/); | ||
| 999 | + $td->runtest("deterministic ID: linearize/ostream=$d", | ||
| 1000 | + {$td->COMMAND => | ||
| 1001 | + "qpdf -deterministic-id" . | ||
| 1002 | + ($linearize ? " -linearize" : "") . | ||
| 1003 | + " -object-streams=" . ($ostream ? "generate" : "disable") . | ||
| 1004 | + " deterministic-id-in.pdf a.pdf"}, | ||
| 1005 | + {$td->STRING => "", | ||
| 1006 | + $td->EXIT_STATUS => 0}); | ||
| 1007 | + $td->runtest("compare files", | ||
| 1008 | + {$td->FILE => "a.pdf"}, | ||
| 1009 | + {$td->FILE => "deterministic-id-$d.pdf"}); | ||
| 1010 | +} | ||
| 1011 | + | ||
| 1012 | +$td->runtest("deterministic ID with encryption", | ||
| 1013 | + {$td->COMMAND => "qpdf -deterministic-id encrypted-with-images.pdf a.pdf"}, | ||
| 1014 | + {$td->STRING => "INTERNAL ERROR: QPDFWriter::generateID" . | ||
| 1015 | + " has no data for deterministic ID." . | ||
| 1016 | + " This may happen if deterministic ID and" . | ||
| 1017 | + " file encryption are requested together.\n", | ||
| 1018 | + $td->EXIT_STATUS => 2}, | ||
| 1019 | + $td->NORMALIZE_NEWLINES); | ||
| 1020 | +$td->runtest("deterministic ID (C API)", | ||
| 1021 | + {$td->COMMAND => | ||
| 1022 | + "qpdf-ctest 19 deterministic-id-in.pdf '' a.pdf"}, | ||
| 1023 | + {$td->STRING => "", | ||
| 1024 | + $td->EXIT_STATUS => 0}); | ||
| 1025 | +$td->runtest("compare files", | ||
| 1026 | + {$td->FILE => "a.pdf"}, | ||
| 1027 | + {$td->FILE => "deterministic-id-nn.pdf"}); | ||
| 1028 | + | ||
| 1029 | +# ---------- | ||
| 993 | $td->notify("--- Object Stream Tests ---"); | 1030 | $td->notify("--- Object Stream Tests ---"); |
| 994 | $n_tests += (36 * 4) + (12 * 2); | 1031 | $n_tests += (36 * 4) + (12 * 2); |
| 995 | $n_compare_pdfs += 36; | 1032 | $n_compare_pdfs += 36; |
qpdf/qtest/qpdf/deterministic-id-in.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/deterministic-id-nn.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/deterministic-id-ny.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/deterministic-id-yn.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/deterministic-id-yy.pdf
0 → 100644
No preview for this file type