Commit a3576a73593987b26cd3eff346f8f7c11f713cbd
1 parent
96eb9651
Bug fix: handle generation > 0 when generating object streams
Rework QPDFWriter to always track old object IDs and QPDFObjGen instead of int, thus not discarding the generation number. Switch to QPDF::getCompressibleObjGen() to properly handle the case of an old object eligible for compression that has a generation of other than zero.
Showing
9 changed files
with
285 additions
and
85 deletions
ChangeLog
| 1 | 1 | 2013-06-14 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | |
| 3 | + * Bug fix: properly handle object stream generation when the | |
| 4 | + original file has some compressible objects with generation != 0. | |
| 5 | + | |
| 6 | + * Add QPDF::getCompressibleObjGens() and deprecate | |
| 7 | + QPDF::getCompressibleObjects(), which had a flaw in its logic. | |
| 8 | + | |
| 3 | 9 | * Add new QPDFObjectHandle::getObjGen() method and indiciate in |
| 4 | 10 | comments that its use is favored over getObjectID() and |
| 5 | 11 | getGeneration() for most cases. | ... | ... |
include/qpdf/QPDF.hh
| ... | ... | @@ -434,8 +434,19 @@ class QPDF |
| 434 | 434 | // Map object to object stream that contains it |
| 435 | 435 | QPDF_DLL |
| 436 | 436 | void getObjectStreamData(std::map<int, int>&); |
| 437 | + | |
| 437 | 438 | // Get a list of objects that would be permitted in an object |
| 438 | - // stream | |
| 439 | + // stream. | |
| 440 | + QPDF_DLL | |
| 441 | + std::vector<QPDFObjGen> getCompressibleObjGens(); | |
| 442 | + | |
| 443 | + // Deprecated: get a list of objects that would be permitted in an | |
| 444 | + // object stream. This method is deprecated and will be removed. | |
| 445 | + // It's incorrect because it disregards the generations of the | |
| 446 | + // compressible objects, which can lead (and has lead) to bugs. | |
| 447 | + // This method will throw an exception if any of the objects | |
| 448 | + // returned have a generation of other than zero. Use | |
| 449 | + // getCompressibleObjGens() instead. | |
| 439 | 450 | QPDF_DLL |
| 440 | 451 | std::vector<int> getCompressibleObjects(); |
| 441 | 452 | ... | ... |
include/qpdf/QPDFWriter.hh
| ... | ... | @@ -24,6 +24,7 @@ |
| 24 | 24 | |
| 25 | 25 | #include <qpdf/Constants.h> |
| 26 | 26 | |
| 27 | +#include <qpdf/QPDFObjGen.hh> | |
| 27 | 28 | #include <qpdf/QPDFXRefEntry.hh> |
| 28 | 29 | |
| 29 | 30 | #include <qpdf/Pl_Buffer.hh> |
| ... | ... | @@ -289,7 +290,7 @@ class QPDFWriter |
| 289 | 290 | void writeStringQDF(std::string const& str); |
| 290 | 291 | void writeStringNoQDF(std::string const& str); |
| 291 | 292 | void writePad(int nspaces); |
| 292 | - void assignCompressedObjectNumbers(int objid); | |
| 293 | + void assignCompressedObjectNumbers(QPDFObjGen const& og); | |
| 293 | 294 | void enqueueObject(QPDFObjectHandle object); |
| 294 | 295 | void writeObjectStreamOffsets( |
| 295 | 296 | std::vector<qpdf_offset_t>& offsets, int first_obj); |
| ... | ... | @@ -380,6 +381,9 @@ class QPDFWriter |
| 380 | 381 | void pushEncryptionFilter(); |
| 381 | 382 | void pushDiscardFilter(); |
| 382 | 383 | |
| 384 | + void discardGeneration(std::map<QPDFObjGen, int> const& in, | |
| 385 | + std::map<int, int>& out); | |
| 386 | + | |
| 383 | 387 | QPDF& pdf; |
| 384 | 388 | char const* filename; |
| 385 | 389 | FILE* file; |
| ... | ... | @@ -419,7 +423,7 @@ class QPDFWriter |
| 419 | 423 | std::list<PointerHolder<Pipeline> > to_delete; |
| 420 | 424 | Pl_Count* pipeline; |
| 421 | 425 | std::list<QPDFObjectHandle> object_queue; |
| 422 | - std::map<int, int> obj_renumber; | |
| 426 | + std::map<QPDFObjGen, int> obj_renumber; | |
| 423 | 427 | std::map<int, QPDFXRefEntry> xref; |
| 424 | 428 | std::map<int, qpdf_offset_t> lengths; |
| 425 | 429 | int next_objid; |
| ... | ... | @@ -427,12 +431,16 @@ class QPDFWriter |
| 427 | 431 | size_t cur_stream_length; |
| 428 | 432 | bool added_newline; |
| 429 | 433 | int max_ostream_index; |
| 430 | - std::set<int> normalized_streams; | |
| 431 | - std::map<int, int> page_object_to_seq; | |
| 432 | - std::map<int, int> contents_to_page_seq; | |
| 433 | - std::map<int, int> object_to_object_stream; | |
| 434 | - std::map<int, std::set<int> > object_stream_to_objects; | |
| 434 | + std::set<QPDFObjGen> normalized_streams; | |
| 435 | + std::map<QPDFObjGen, int> page_object_to_seq; | |
| 436 | + std::map<QPDFObjGen, int> contents_to_page_seq; | |
| 437 | + std::map<QPDFObjGen, int> object_to_object_stream; | |
| 438 | + std::map<int, std::set<QPDFObjGen> > object_stream_to_objects; | |
| 435 | 439 | std::list<Pipeline*> pipeline_stack; |
| 440 | + | |
| 441 | + // For linearization only | |
| 442 | + std::map<int, int> obj_renumber_no_gen; | |
| 443 | + std::map<int, int> object_to_object_stream_no_gen; | |
| 436 | 444 | }; |
| 437 | 445 | |
| 438 | 446 | #endif // __QPDFWRITER_HH__ | ... | ... |
libqpdf/QPDF.cc
| ... | ... | @@ -1944,55 +1944,68 @@ QPDF::getObjectStreamData(std::map<int, int>& omap) |
| 1944 | 1944 | std::vector<int> |
| 1945 | 1945 | QPDF::getCompressibleObjects() |
| 1946 | 1946 | { |
| 1947 | - // Return a set of object numbers of objects that are allowed to | |
| 1948 | - // be in object streams. We disregard generation numbers here | |
| 1949 | - // since this is a helper function for QPDFWriter which is going | |
| 1950 | - // to renumber objects anyway. This code will do weird things if | |
| 1951 | - // we have two objects with the same object number and different | |
| 1952 | - // generations, but so do virtually all PDF consumers, | |
| 1953 | - // particularly since this is not a permitted condition. | |
| 1954 | - | |
| 1955 | - // We walk through the objects by traversing the document from the | |
| 1956 | - // root, including a traversal of the pages tree. This makes that | |
| 1957 | - // objects that are on the same page are more likely to be in the | |
| 1958 | - // same object stream, which is slightly more efficient, | |
| 1947 | + std::vector<QPDFObjGen> objects = getCompressibleObjGens(); | |
| 1948 | + std::vector<int> result; | |
| 1949 | + for (std::vector<QPDFObjGen>::iterator iter = objects.begin(); | |
| 1950 | + iter != objects.end(); ++iter) | |
| 1951 | + { | |
| 1952 | + if ((*iter).getGen() != 0) | |
| 1953 | + { | |
| 1954 | + throw std::logic_error( | |
| 1955 | + "QPDF::getCompressibleObjects() would return an object ID" | |
| 1956 | + " for an object with generation != 0. Use" | |
| 1957 | + " QPDF::getCompressibleObjGens() instead." | |
| 1958 | + " See comments in QPDF.hh."); | |
| 1959 | + } | |
| 1960 | + else | |
| 1961 | + { | |
| 1962 | + result.push_back((*iter).getObj()); | |
| 1963 | + } | |
| 1964 | + } | |
| 1965 | + return result; | |
| 1966 | +} | |
| 1967 | + | |
| 1968 | +std::vector<QPDFObjGen> | |
| 1969 | +QPDF::getCompressibleObjGens() | |
| 1970 | +{ | |
| 1971 | + // Return a list of objects that are allowed to be in object | |
| 1972 | + // streams. Walk through the objects by traversing the document | |
| 1973 | + // from the root, including a traversal of the pages tree. This | |
| 1974 | + // makes that objects that are on the same page are more likely to | |
| 1975 | + // be in the same object stream, which is slightly more efficient, | |
| 1959 | 1976 | // particularly with linearized files. This is better than |
| 1960 | 1977 | // iterating through the xref table since it avoids preserving |
| 1961 | 1978 | // orphaned items. |
| 1962 | 1979 | |
| 1963 | 1980 | // Exclude encryption dictionary, if any |
| 1964 | - int encryption_dict_id = 0; | |
| 1965 | 1981 | QPDFObjectHandle encryption_dict = trailer.getKey("/Encrypt"); |
| 1966 | - if (encryption_dict.isIndirect()) | |
| 1967 | - { | |
| 1968 | - encryption_dict_id = encryption_dict.getObjectID(); | |
| 1969 | - } | |
| 1982 | + QPDFObjGen encryption_dict_og = encryption_dict.getObjGen(); | |
| 1970 | 1983 | |
| 1971 | - std::set<int> visited; | |
| 1984 | + std::set<QPDFObjGen> visited; | |
| 1972 | 1985 | std::list<QPDFObjectHandle> queue; |
| 1973 | 1986 | queue.push_front(this->trailer); |
| 1974 | - std::vector<int> result; | |
| 1987 | + std::vector<QPDFObjGen> result; | |
| 1975 | 1988 | while (! queue.empty()) |
| 1976 | 1989 | { |
| 1977 | 1990 | QPDFObjectHandle obj = queue.front(); |
| 1978 | 1991 | queue.pop_front(); |
| 1979 | 1992 | if (obj.isIndirect()) |
| 1980 | 1993 | { |
| 1981 | - int objid = obj.getObjectID(); | |
| 1982 | - if (visited.count(objid)) | |
| 1994 | + QPDFObjGen og = obj.getObjGen(); | |
| 1995 | + if (visited.count(og)) | |
| 1983 | 1996 | { |
| 1984 | 1997 | QTC::TC("qpdf", "QPDF loop detected traversing objects"); |
| 1985 | 1998 | continue; |
| 1986 | 1999 | } |
| 1987 | - if (objid == encryption_dict_id) | |
| 2000 | + if (og == encryption_dict_og) | |
| 1988 | 2001 | { |
| 1989 | 2002 | QTC::TC("qpdf", "QPDF exclude encryption dictionary"); |
| 1990 | 2003 | } |
| 1991 | 2004 | else if (! obj.isStream()) |
| 1992 | 2005 | { |
| 1993 | - result.push_back(objid); | |
| 2006 | + result.push_back(og); | |
| 1994 | 2007 | } |
| 1995 | - visited.insert(objid); | |
| 2008 | + visited.insert(og); | |
| 1996 | 2009 | } |
| 1997 | 2010 | if (obj.isStream()) |
| 1998 | 2011 | { | ... | ... |
libqpdf/QPDFWriter.cc
| ... | ... | @@ -933,16 +933,19 @@ QPDFWriter::closeObject(int objid) |
| 933 | 933 | } |
| 934 | 934 | |
| 935 | 935 | void |
| 936 | -QPDFWriter::assignCompressedObjectNumbers(int objid) | |
| 936 | +QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen const& og) | |
| 937 | 937 | { |
| 938 | - if (this->object_stream_to_objects.count(objid) == 0) | |
| 938 | + int objid = og.getObj(); | |
| 939 | + if ((og.getGen() != 0) || | |
| 940 | + (this->object_stream_to_objects.count(objid) == 0)) | |
| 939 | 941 | { |
| 942 | + // This is not an object stream. | |
| 940 | 943 | return; |
| 941 | 944 | } |
| 942 | 945 | |
| 943 | 946 | // Reserve numbers for the objects that belong to this object |
| 944 | 947 | // stream. |
| 945 | - for (std::set<int>::iterator iter = | |
| 948 | + for (std::set<QPDFObjGen>::iterator iter = | |
| 946 | 949 | this->object_stream_to_objects[objid].begin(); |
| 947 | 950 | iter != this->object_stream_to_objects[objid].end(); |
| 948 | 951 | ++iter) |
| ... | ... | @@ -969,30 +972,32 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object) |
| 969 | 972 | { |
| 970 | 973 | // This is a place-holder object for an object stream |
| 971 | 974 | } |
| 972 | - int objid = object.getObjectID(); | |
| 975 | + QPDFObjGen og = object.getObjGen(); | |
| 973 | 976 | |
| 974 | - if (obj_renumber.count(objid) == 0) | |
| 977 | + if (obj_renumber.count(og) == 0) | |
| 975 | 978 | { |
| 976 | - if (this->object_to_object_stream.count(objid)) | |
| 979 | + if (this->object_to_object_stream.count(og)) | |
| 977 | 980 | { |
| 978 | 981 | // This is in an object stream. Don't process it |
| 979 | - // here. Instead, enqueue the object stream. | |
| 980 | - int stream_id = this->object_to_object_stream[objid]; | |
| 982 | + // here. Instead, enqueue the object stream. Object | |
| 983 | + // streams always have generation 0. | |
| 984 | + int stream_id = this->object_to_object_stream[og]; | |
| 981 | 985 | enqueueObject(this->pdf.getObjectByID(stream_id, 0)); |
| 982 | 986 | } |
| 983 | 987 | else |
| 984 | 988 | { |
| 985 | 989 | object_queue.push_back(object); |
| 986 | - obj_renumber[objid] = next_objid++; | |
| 990 | + obj_renumber[og] = next_objid++; | |
| 987 | 991 | |
| 988 | - if (this->object_stream_to_objects.count(objid)) | |
| 992 | + if ((og.getGen() == 0) && | |
| 993 | + this->object_stream_to_objects.count(og.getObj())) | |
| 989 | 994 | { |
| 990 | 995 | // For linearized files, uncompressed objects go |
| 991 | 996 | // at end, and we take care of assigning numbers |
| 992 | 997 | // to them elsewhere. |
| 993 | 998 | if (! this->linearized) |
| 994 | 999 | { |
| 995 | - assignCompressedObjectNumbers(objid); | |
| 1000 | + assignCompressedObjectNumbers(og); | |
| 996 | 1001 | } |
| 997 | 1002 | } |
| 998 | 1003 | else if ((! this->direct_stream_lengths) && object.isStream()) |
| ... | ... | @@ -1041,8 +1046,8 @@ QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags) |
| 1041 | 1046 | } |
| 1042 | 1047 | if (child.isIndirect()) |
| 1043 | 1048 | { |
| 1044 | - int old_id = child.getObjectID(); | |
| 1045 | - int new_id = obj_renumber[old_id]; | |
| 1049 | + QPDFObjGen old_og = child.getObjGen(); | |
| 1050 | + int new_id = obj_renumber[old_og]; | |
| 1046 | 1051 | writeString(QUtil::int_to_string(new_id)); |
| 1047 | 1052 | writeString(" 0 R"); |
| 1048 | 1053 | } |
| ... | ... | @@ -1134,7 +1139,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, |
| 1134 | 1139 | unsigned int flags, size_t stream_length, |
| 1135 | 1140 | bool compress) |
| 1136 | 1141 | { |
| 1137 | - int old_id = object.getObjectID(); | |
| 1142 | + QPDFObjGen old_og = object.getObjGen(); | |
| 1138 | 1143 | unsigned int child_flags = flags & ~f_stream; |
| 1139 | 1144 | |
| 1140 | 1145 | std::string indent; |
| ... | ... | @@ -1201,7 +1206,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, |
| 1201 | 1206 | bool have_extensions_adbe = false; |
| 1202 | 1207 | |
| 1203 | 1208 | QPDFObjectHandle extensions; |
| 1204 | - if (old_id == pdf.getRoot().getObjectID()) | |
| 1209 | + if (old_og == pdf.getRoot().getObjGen()) | |
| 1205 | 1210 | { |
| 1206 | 1211 | is_root = true; |
| 1207 | 1212 | if (object.hasKey("/Extensions") && |
| ... | ... | @@ -1396,7 +1401,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, |
| 1396 | 1401 | else if (object.isStream()) |
| 1397 | 1402 | { |
| 1398 | 1403 | // Write stream data to a buffer. |
| 1399 | - int new_id = obj_renumber[old_id]; | |
| 1404 | + int new_id = obj_renumber[old_og]; | |
| 1400 | 1405 | if (! this->direct_stream_lengths) |
| 1401 | 1406 | { |
| 1402 | 1407 | this->cur_stream_length_id = new_id + 1; |
| ... | ... | @@ -1436,7 +1441,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, |
| 1436 | 1441 | filter = true; |
| 1437 | 1442 | compress = false; |
| 1438 | 1443 | } |
| 1439 | - else if (this->normalize_content && normalized_streams.count(old_id)) | |
| 1444 | + else if (this->normalize_content && normalized_streams.count(old_og)) | |
| 1440 | 1445 | { |
| 1441 | 1446 | normalize = true; |
| 1442 | 1447 | filter = true; |
| ... | ... | @@ -1562,8 +1567,10 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) |
| 1562 | 1567 | // Note: object might be null if this is a place-holder for an |
| 1563 | 1568 | // object stream that we are generating from scratch. |
| 1564 | 1569 | |
| 1565 | - int old_id = object.getObjectID(); | |
| 1566 | - int new_id = obj_renumber[old_id]; | |
| 1570 | + QPDFObjGen old_og = object.getObjGen(); | |
| 1571 | + assert(old_og.getGen() == 0); | |
| 1572 | + int old_id = old_og.getObj(); | |
| 1573 | + int new_id = obj_renumber[old_og]; | |
| 1567 | 1574 | |
| 1568 | 1575 | std::vector<qpdf_offset_t> offsets; |
| 1569 | 1576 | qpdf_offset_t first = 0; |
| ... | ... | @@ -1612,12 +1619,12 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) |
| 1612 | 1619 | } |
| 1613 | 1620 | |
| 1614 | 1621 | int count = 0; |
| 1615 | - for (std::set<int>::iterator iter = | |
| 1622 | + for (std::set<QPDFObjGen>::iterator iter = | |
| 1616 | 1623 | this->object_stream_to_objects[old_id].begin(); |
| 1617 | 1624 | iter != this->object_stream_to_objects[old_id].end(); |
| 1618 | 1625 | ++iter, ++count) |
| 1619 | 1626 | { |
| 1620 | - int obj = *iter; | |
| 1627 | + QPDFObjGen obj = *iter; | |
| 1621 | 1628 | int new_obj = this->obj_renumber[obj]; |
| 1622 | 1629 | if (first_obj == -1) |
| 1623 | 1630 | { |
| ... | ... | @@ -1631,7 +1638,17 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) |
| 1631 | 1638 | if (! this->suppress_original_object_ids) |
| 1632 | 1639 | { |
| 1633 | 1640 | writeString("; original object ID: " + |
| 1634 | - QUtil::int_to_string(obj)); | |
| 1641 | + QUtil::int_to_string(obj.getObj())); | |
| 1642 | + // For compatibility, only write the generation if | |
| 1643 | + // non-zero. While object streams only allow | |
| 1644 | + // objects with generation 0, if we are generating | |
| 1645 | + // object streams, the old object could have a | |
| 1646 | + // non-zero generation. | |
| 1647 | + if (obj.getGen() != 0) | |
| 1648 | + { | |
| 1649 | + QTC::TC("qpdf", "QPDFWriter original obj non-zero gen"); | |
| 1650 | + writeString(" " + QUtil::int_to_string(obj.getGen())); | |
| 1651 | + } | |
| 1635 | 1652 | } |
| 1636 | 1653 | writeString("\n"); |
| 1637 | 1654 | } |
| ... | ... | @@ -1639,7 +1656,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) |
| 1639 | 1656 | { |
| 1640 | 1657 | offsets.push_back(this->pipeline->getCount()); |
| 1641 | 1658 | } |
| 1642 | - writeObject(this->pdf.getObjectByID(obj, 0), count); | |
| 1659 | + writeObject(this->pdf.getObjectByObjGen(obj), count); | |
| 1643 | 1660 | |
| 1644 | 1661 | this->xref[new_obj] = QPDFXRefEntry(2, new_id, count); |
| 1645 | 1662 | } |
| ... | ... | @@ -1697,32 +1714,33 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) |
| 1697 | 1714 | void |
| 1698 | 1715 | QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index) |
| 1699 | 1716 | { |
| 1700 | - int old_id = object.getObjectID(); | |
| 1717 | + QPDFObjGen old_og = object.getObjGen(); | |
| 1701 | 1718 | |
| 1702 | 1719 | if ((object_stream_index == -1) && |
| 1703 | - (this->object_stream_to_objects.count(old_id))) | |
| 1720 | + (old_og.getGen() == 0) && | |
| 1721 | + (this->object_stream_to_objects.count(old_og.getObj()))) | |
| 1704 | 1722 | { |
| 1705 | 1723 | writeObjectStream(object); |
| 1706 | 1724 | return; |
| 1707 | 1725 | } |
| 1708 | 1726 | |
| 1709 | - int new_id = obj_renumber[old_id]; | |
| 1727 | + int new_id = obj_renumber[old_og]; | |
| 1710 | 1728 | if (this->qdf_mode) |
| 1711 | 1729 | { |
| 1712 | - if (this->page_object_to_seq.count(old_id)) | |
| 1730 | + if (this->page_object_to_seq.count(old_og)) | |
| 1713 | 1731 | { |
| 1714 | 1732 | writeString("%% Page "); |
| 1715 | 1733 | writeString( |
| 1716 | 1734 | QUtil::int_to_string( |
| 1717 | - this->page_object_to_seq[old_id])); | |
| 1735 | + this->page_object_to_seq[old_og])); | |
| 1718 | 1736 | writeString("\n"); |
| 1719 | 1737 | } |
| 1720 | - if (this->contents_to_page_seq.count(old_id)) | |
| 1738 | + if (this->contents_to_page_seq.count(old_og)) | |
| 1721 | 1739 | { |
| 1722 | 1740 | writeString("%% Contents for page "); |
| 1723 | 1741 | writeString( |
| 1724 | 1742 | QUtil::int_to_string( |
| 1725 | - this->contents_to_page_seq[old_id])); | |
| 1743 | + this->contents_to_page_seq[old_og])); | |
| 1726 | 1744 | writeString("\n"); |
| 1727 | 1745 | } |
| 1728 | 1746 | } |
| ... | ... | @@ -1854,24 +1872,24 @@ QPDFWriter::initializeSpecialStreams() |
| 1854 | 1872 | iter != pages.end(); ++iter) |
| 1855 | 1873 | { |
| 1856 | 1874 | QPDFObjectHandle& page = *iter; |
| 1857 | - this->page_object_to_seq[page.getObjectID()] = ++num; | |
| 1875 | + this->page_object_to_seq[page.getObjGen()] = ++num; | |
| 1858 | 1876 | QPDFObjectHandle contents = page.getKey("/Contents"); |
| 1859 | - std::vector<int> contents_objects; | |
| 1877 | + std::vector<QPDFObjGen> contents_objects; | |
| 1860 | 1878 | if (contents.isArray()) |
| 1861 | 1879 | { |
| 1862 | 1880 | int n = contents.getArrayNItems(); |
| 1863 | 1881 | for (int i = 0; i < n; ++i) |
| 1864 | 1882 | { |
| 1865 | 1883 | contents_objects.push_back( |
| 1866 | - contents.getArrayItem(i).getObjectID()); | |
| 1884 | + contents.getArrayItem(i).getObjGen()); | |
| 1867 | 1885 | } |
| 1868 | 1886 | } |
| 1869 | 1887 | else if (contents.isStream()) |
| 1870 | 1888 | { |
| 1871 | - contents_objects.push_back(contents.getObjectID()); | |
| 1889 | + contents_objects.push_back(contents.getObjGen()); | |
| 1872 | 1890 | } |
| 1873 | 1891 | |
| 1874 | - for (std::vector<int>::iterator iter = contents_objects.begin(); | |
| 1892 | + for (std::vector<QPDFObjGen>::iterator iter = contents_objects.begin(); | |
| 1875 | 1893 | iter != contents_objects.end(); ++iter) |
| 1876 | 1894 | { |
| 1877 | 1895 | this->contents_to_page_seq[*iter] = num; |
| ... | ... | @@ -1883,7 +1901,20 @@ QPDFWriter::initializeSpecialStreams() |
| 1883 | 1901 | void |
| 1884 | 1902 | QPDFWriter::preserveObjectStreams() |
| 1885 | 1903 | { |
| 1886 | - this->pdf.getObjectStreamData(this->object_to_object_stream); | |
| 1904 | + // Our object_to_object_stream map has to map ObjGen -> ObjGen | |
| 1905 | + // since we may be generating object streams out of old objects | |
| 1906 | + // that have generation numbers greater than zero. However in an | |
| 1907 | + // existing PDF, all object stream objects and all objects in them | |
| 1908 | + // must have generation 0 because the PDF spec does not provide | |
| 1909 | + // any way to do otherwise. | |
| 1910 | + std::map<int, int> omap; | |
| 1911 | + this->pdf.getObjectStreamData(omap); | |
| 1912 | + for (std::map<int, int>::iterator iter = omap.begin(); | |
| 1913 | + iter != omap.end(); ++iter) | |
| 1914 | + { | |
| 1915 | + this->object_to_object_stream[QPDFObjGen((*iter).first, 0)] = | |
| 1916 | + (*iter).second; | |
| 1917 | + } | |
| 1887 | 1918 | } |
| 1888 | 1919 | |
| 1889 | 1920 | void |
| ... | ... | @@ -1899,7 +1930,8 @@ QPDFWriter::generateObjectStreams() |
| 1899 | 1930 | |
| 1900 | 1931 | // This code doesn't do anything with /Extends. |
| 1901 | 1932 | |
| 1902 | - std::vector<int> const& eligible = this->pdf.getCompressibleObjects(); | |
| 1933 | + std::vector<QPDFObjGen> const& eligible = | |
| 1934 | + this->pdf.getCompressibleObjGens(); | |
| 1903 | 1935 | unsigned int n_object_streams = (eligible.size() + 99) / 100; |
| 1904 | 1936 | unsigned int n_per = eligible.size() / n_object_streams; |
| 1905 | 1937 | if (n_per * n_object_streams < eligible.size()) |
| ... | ... | @@ -1908,7 +1940,7 @@ QPDFWriter::generateObjectStreams() |
| 1908 | 1940 | } |
| 1909 | 1941 | unsigned int n = 0; |
| 1910 | 1942 | int cur_ostream = 0; |
| 1911 | - for (std::vector<int>::const_iterator iter = eligible.begin(); | |
| 1943 | + for (std::vector<QPDFObjGen>::const_iterator iter = eligible.begin(); | |
| 1912 | 1944 | iter != eligible.end(); ++iter) |
| 1913 | 1945 | { |
| 1914 | 1946 | if ((n % n_per) == 0) |
| ... | ... | @@ -2172,11 +2204,11 @@ QPDFWriter::write() |
| 2172 | 2204 | iter != pages.end(); ++iter) |
| 2173 | 2205 | { |
| 2174 | 2206 | QPDFObjectHandle& page = *iter; |
| 2175 | - int objid = page.getObjectID(); | |
| 2176 | - if (this->object_to_object_stream.count(objid)) | |
| 2207 | + QPDFObjGen og = page.getObjGen(); | |
| 2208 | + if (this->object_to_object_stream.count(og)) | |
| 2177 | 2209 | { |
| 2178 | 2210 | QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary"); |
| 2179 | - this->object_to_object_stream.erase(objid); | |
| 2211 | + this->object_to_object_stream.erase(og); | |
| 2180 | 2212 | } |
| 2181 | 2213 | } |
| 2182 | 2214 | } |
| ... | ... | @@ -2188,20 +2220,20 @@ QPDFWriter::write() |
| 2188 | 2220 | // 8.0.0 has a bug that prevents it from being able to handle |
| 2189 | 2221 | // encrypted files with compressed document catalogs, so we |
| 2190 | 2222 | // disable them in that case as well. |
| 2191 | - int objid = pdf.getRoot().getObjectID(); | |
| 2192 | - if (this->object_to_object_stream.count(objid)) | |
| 2223 | + QPDFObjGen og = pdf.getRoot().getObjGen(); | |
| 2224 | + if (this->object_to_object_stream.count(og)) | |
| 2193 | 2225 | { |
| 2194 | 2226 | QTC::TC("qpdf", "QPDFWriter uncompressing root"); |
| 2195 | - this->object_to_object_stream.erase(objid); | |
| 2227 | + this->object_to_object_stream.erase(og); | |
| 2196 | 2228 | } |
| 2197 | 2229 | } |
| 2198 | 2230 | |
| 2199 | 2231 | // Generate reverse mapping from object stream to objects |
| 2200 | - for (std::map<int, int>::iterator iter = | |
| 2232 | + for (std::map<QPDFObjGen, int>::iterator iter = | |
| 2201 | 2233 | this->object_to_object_stream.begin(); |
| 2202 | 2234 | iter != this->object_to_object_stream.end(); ++iter) |
| 2203 | 2235 | { |
| 2204 | - int obj = (*iter).first; | |
| 2236 | + QPDFObjGen obj = (*iter).first; | |
| 2205 | 2237 | int stream = (*iter).second; |
| 2206 | 2238 | this->object_stream_to_objects[stream].insert(obj); |
| 2207 | 2239 | this->max_ostream_index = |
| ... | ... | @@ -2303,7 +2335,8 @@ QPDFWriter::writeHintStream(int hint_id) |
| 2303 | 2335 | int S = 0; |
| 2304 | 2336 | int O = 0; |
| 2305 | 2337 | pdf.generateHintStream( |
| 2306 | - this->xref, this->lengths, this->obj_renumber, hint_buffer, S, O); | |
| 2338 | + this->xref, this->lengths, this->obj_renumber_no_gen, | |
| 2339 | + hint_buffer, S, O); | |
| 2307 | 2340 | |
| 2308 | 2341 | openObject(hint_id); |
| 2309 | 2342 | setDataKey(hint_id); |
| ... | ... | @@ -2522,19 +2555,57 @@ QPDFWriter::calculateXrefStreamPadding(int xref_bytes) |
| 2522 | 2555 | } |
| 2523 | 2556 | |
| 2524 | 2557 | void |
| 2558 | +QPDFWriter::discardGeneration(std::map<QPDFObjGen, int> const& in, | |
| 2559 | + std::map<int, int>& out) | |
| 2560 | +{ | |
| 2561 | + // There are deep assumptions in the linearization code in QPDF | |
| 2562 | + // that there is only one object with each object number; i.e., | |
| 2563 | + // you can't have two objects with the same object number and | |
| 2564 | + // different generations. This is a pretty safe assumption | |
| 2565 | + // because Adobe Reader and Acrobat can't actually handle this | |
| 2566 | + // case. There is not much if any code in QPDF outside | |
| 2567 | + // linearization that assumes this, but the linearization code as | |
| 2568 | + // currently implemented would do weird things if we found such a | |
| 2569 | + // case. In order to avoid breaking ABI changes in QPDF, we will | |
| 2570 | + // first assert that this condition holds. Then we can create new | |
| 2571 | + // maps for QPDF that throw away generation numbers. | |
| 2572 | + | |
| 2573 | + out.clear(); | |
| 2574 | + for (std::map<QPDFObjGen, int>::const_iterator iter = in.begin(); | |
| 2575 | + iter != in.end(); ++iter) | |
| 2576 | + { | |
| 2577 | + if (out.count((*iter).first.getObj())) | |
| 2578 | + { | |
| 2579 | + throw std::logic_error( | |
| 2580 | + "QPDF cannot currently linearize files that contain" | |
| 2581 | + " multiple objects with the same object ID and different" | |
| 2582 | + " generations. If you see this error message, please file" | |
| 2583 | + " a bug report and attach the file if possible. As a" | |
| 2584 | + " workaround, first convert the file with qpdf without" | |
| 2585 | + " linearizing, and then linearize the result of that" | |
| 2586 | + " conversion."); | |
| 2587 | + } | |
| 2588 | + out[(*iter).first.getObj()] = (*iter).second; | |
| 2589 | + } | |
| 2590 | +} | |
| 2591 | + | |
| 2592 | +void | |
| 2525 | 2593 | QPDFWriter::writeLinearized() |
| 2526 | 2594 | { |
| 2527 | 2595 | // Optimize file and enqueue objects in order |
| 2528 | 2596 | |
| 2597 | + discardGeneration(this->object_to_object_stream, | |
| 2598 | + this->object_to_object_stream_no_gen); | |
| 2599 | + | |
| 2529 | 2600 | bool need_xref_stream = (! this->object_to_object_stream.empty()); |
| 2530 | - pdf.optimize(this->object_to_object_stream); | |
| 2601 | + pdf.optimize(this->object_to_object_stream_no_gen); | |
| 2531 | 2602 | |
| 2532 | 2603 | std::vector<QPDFObjectHandle> part4; |
| 2533 | 2604 | std::vector<QPDFObjectHandle> part6; |
| 2534 | 2605 | std::vector<QPDFObjectHandle> part7; |
| 2535 | 2606 | std::vector<QPDFObjectHandle> part8; |
| 2536 | 2607 | std::vector<QPDFObjectHandle> part9; |
| 2537 | - pdf.getLinearizedParts(this->object_to_object_stream, | |
| 2608 | + pdf.getLinearizedParts(this->object_to_object_stream_no_gen, | |
| 2538 | 2609 | part4, part6, part7, part8, part9); |
| 2539 | 2610 | |
| 2540 | 2611 | // Object number sequence: |
| ... | ... | @@ -2570,7 +2641,7 @@ QPDFWriter::writeLinearized() |
| 2570 | 2641 | for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs2[i]).begin(); |
| 2571 | 2642 | iter != (*vecs2[i]).end(); ++iter) |
| 2572 | 2643 | { |
| 2573 | - assignCompressedObjectNumbers((*iter).getObjectID()); | |
| 2644 | + assignCompressedObjectNumbers((*iter).getObjGen()); | |
| 2574 | 2645 | } |
| 2575 | 2646 | } |
| 2576 | 2647 | int second_half_end = this->next_objid - 1; |
| ... | ... | @@ -2602,7 +2673,7 @@ QPDFWriter::writeLinearized() |
| 2602 | 2673 | for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs1[i]).begin(); |
| 2603 | 2674 | iter != (*vecs1[i]).end(); ++iter) |
| 2604 | 2675 | { |
| 2605 | - assignCompressedObjectNumbers((*iter).getObjectID()); | |
| 2676 | + assignCompressedObjectNumbers((*iter).getObjGen()); | |
| 2606 | 2677 | } |
| 2607 | 2678 | } |
| 2608 | 2679 | int first_half_end = this->next_objid - 1; |
| ... | ... | @@ -2660,7 +2731,7 @@ QPDFWriter::writeLinearized() |
| 2660 | 2731 | if (pass == 2) |
| 2661 | 2732 | { |
| 2662 | 2733 | std::vector<QPDFObjectHandle> const& pages = pdf.getAllPages(); |
| 2663 | - int first_page_object = obj_renumber[pages[0].getObjectID()]; | |
| 2734 | + int first_page_object = obj_renumber[pages[0].getObjGen()]; | |
| 2664 | 2735 | int npages = pages.size(); |
| 2665 | 2736 | |
| 2666 | 2737 | writeString(" /Linearized 1 /L "); |
| ... | ... | @@ -2834,6 +2905,8 @@ QPDFWriter::writeLinearized() |
| 2834 | 2905 | writeString(QUtil::int_to_string(first_xref_offset)); |
| 2835 | 2906 | writeString("\n%%EOF\n"); |
| 2836 | 2907 | |
| 2908 | + discardGeneration(this->obj_renumber, this->obj_renumber_no_gen); | |
| 2909 | + | |
| 2837 | 2910 | if (pass == 1) |
| 2838 | 2911 | { |
| 2839 | 2912 | // Close first pass pipeline | ... | ... |
qpdf/qpdf.testcov
qpdf/qtest/qpdf.test
| ... | ... | @@ -199,7 +199,7 @@ $td->runtest("remove page we don't have", |
| 199 | 199 | show_ntests(); |
| 200 | 200 | # ---------- |
| 201 | 201 | $td->notify("--- Miscellaneous Tests ---"); |
| 202 | -$n_tests += 62; | |
| 202 | +$n_tests += 64; | |
| 203 | 203 | |
| 204 | 204 | $td->runtest("qpdf version", |
| 205 | 205 | {$td->COMMAND => "qpdf --version"}, |
| ... | ... | @@ -501,6 +501,14 @@ $td->runtest("overridden compressed objects", |
| 501 | 501 | $td->EXIT_STATUS => 0}, |
| 502 | 502 | $td->NORMALIZE_NEWLINES); |
| 503 | 503 | |
| 504 | +$td->runtest("generate object streams for gen > 0", | |
| 505 | + {$td->COMMAND => "qpdf --qdf --static-id" . | |
| 506 | + " --object-streams=generate gen1.pdf a.pdf"}, | |
| 507 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | |
| 508 | +$td->runtest("check file", | |
| 509 | + {$td->FILE => "a.pdf"}, | |
| 510 | + {$td->FILE => "gen1.qdf"}); | |
| 511 | + | |
| 504 | 512 | show_ntests(); |
| 505 | 513 | # ---------- |
| 506 | 514 | $td->notify("--- Numeric range parsing tests ---"); |
| ... | ... | @@ -1183,6 +1191,7 @@ my @to_linearize = |
| 1183 | 1191 | 'lin-delete-and-reuse', # linearized, then delete and reuse |
| 1184 | 1192 | 'object-stream', # contains object streams |
| 1185 | 1193 | 'hybrid-xref', # contains both xref tables and streams |
| 1194 | + 'gen1', # has objects with generation > 0 | |
| 1186 | 1195 | @linearized_files, # we should be able to relinearize |
| 1187 | 1196 | ); |
| 1188 | 1197 | ... | ... |
qpdf/qtest/qpdf/gen1.pdf
0 → 100644
| 1 | +%PDF-1.3 | |
| 2 | +1 1 obj | |
| 3 | +<< | |
| 4 | + /Type /Catalog | |
| 5 | + /Pages 2 1 R | |
| 6 | +>> | |
| 7 | +endobj | |
| 8 | + | |
| 9 | +2 1 obj | |
| 10 | +<< | |
| 11 | + /Type /Pages | |
| 12 | + /Kids [ | |
| 13 | + 3 1 R | |
| 14 | + ] | |
| 15 | + /Count 1 | |
| 16 | +>> | |
| 17 | +endobj | |
| 18 | + | |
| 19 | +3 1 obj | |
| 20 | +<< | |
| 21 | + /Type /Page | |
| 22 | + /Parent 2 1 R | |
| 23 | + /MediaBox [0 0 612 792] | |
| 24 | + /Contents 4 1 R | |
| 25 | + /Resources << | |
| 26 | + /ProcSet 5 1 R | |
| 27 | + /Font << | |
| 28 | + /F1 6 1 R | |
| 29 | + >> | |
| 30 | + >> | |
| 31 | +>> | |
| 32 | +endobj | |
| 33 | + | |
| 34 | +4 1 obj | |
| 35 | +<< | |
| 36 | + /Length 44 | |
| 37 | +>> | |
| 38 | +stream | |
| 39 | +BT | |
| 40 | + /F1 24 Tf | |
| 41 | + 72 720 Td | |
| 42 | + (Potato) Tj | |
| 43 | +ET | |
| 44 | +endstream | |
| 45 | +endobj | |
| 46 | + | |
| 47 | +5 1 obj | |
| 48 | +[ | |
| 49 | ||
| 50 | + /Text | |
| 51 | +] | |
| 52 | +endobj | |
| 53 | + | |
| 54 | +6 1 obj | |
| 55 | +<< | |
| 56 | + /Type /Font | |
| 57 | + /Subtype /Type1 | |
| 58 | + /Name /F1 | |
| 59 | + /BaseFont /Helvetica | |
| 60 | + /Encoding /WinAnsiEncoding | |
| 61 | +>> | |
| 62 | +endobj | |
| 63 | + | |
| 64 | +xref | |
| 65 | +0 7 | |
| 66 | +0000000000 65535 f | |
| 67 | +0000000009 00001 n | |
| 68 | +0000000063 00001 n | |
| 69 | +0000000135 00001 n | |
| 70 | +0000000307 00001 n | |
| 71 | +0000000403 00001 n | |
| 72 | +0000000438 00001 n | |
| 73 | +trailer << | |
| 74 | + /Size 7 | |
| 75 | + /Root 1 1 R | |
| 76 | +>> | |
| 77 | +startxref | |
| 78 | +556 | |
| 79 | +%%EOF | ... | ... |
qpdf/qtest/qpdf/gen1.qdf
0 → 100644
No preview for this file type