Commit a3576a73593987b26cd3eff346f8f7c11f713cbd

Authored by Jay Berkenbilt
1 parent 96eb9651

Bug fix: handle generation > 0 when generating object streams

Rework QPDFWriter to always track old object IDs and QPDFObjGen
instead of int, thus not discarding the generation number.  Switch to
QPDF::getCompressibleObjGen() to properly handle the case of an old
object eligible for compression that has a generation of other than
zero.
ChangeLog
1 2013-06-14 Jay Berkenbilt <ejb@ql.org> 1 2013-06-14 Jay Berkenbilt <ejb@ql.org>
2 2
  3 + * Bug fix: properly handle object stream generation when the
  4 + original file has some compressible objects with generation != 0.
  5 +
  6 + * Add QPDF::getCompressibleObjGens() and deprecate
  7 + QPDF::getCompressibleObjects(), which had a flaw in its logic.
  8 +
3 * Add new QPDFObjectHandle::getObjGen() method and indiciate in 9 * Add new QPDFObjectHandle::getObjGen() method and indiciate in
4 comments that its use is favored over getObjectID() and 10 comments that its use is favored over getObjectID() and
5 getGeneration() for most cases. 11 getGeneration() for most cases.
include/qpdf/QPDF.hh
@@ -434,8 +434,19 @@ class QPDF @@ -434,8 +434,19 @@ class QPDF
434 // Map object to object stream that contains it 434 // Map object to object stream that contains it
435 QPDF_DLL 435 QPDF_DLL
436 void getObjectStreamData(std::map<int, int>&); 436 void getObjectStreamData(std::map<int, int>&);
  437 +
437 // Get a list of objects that would be permitted in an object 438 // Get a list of objects that would be permitted in an object
438 - // stream 439 + // stream.
  440 + QPDF_DLL
  441 + std::vector<QPDFObjGen> getCompressibleObjGens();
  442 +
  443 + // Deprecated: get a list of objects that would be permitted in an
  444 + // object stream. This method is deprecated and will be removed.
  445 + // It's incorrect because it disregards the generations of the
  446 + // compressible objects, which can lead (and has lead) to bugs.
  447 + // This method will throw an exception if any of the objects
  448 + // returned have a generation of other than zero. Use
  449 + // getCompressibleObjGens() instead.
439 QPDF_DLL 450 QPDF_DLL
440 std::vector<int> getCompressibleObjects(); 451 std::vector<int> getCompressibleObjects();
441 452
include/qpdf/QPDFWriter.hh
@@ -24,6 +24,7 @@ @@ -24,6 +24,7 @@
24 24
25 #include <qpdf/Constants.h> 25 #include <qpdf/Constants.h>
26 26
  27 +#include <qpdf/QPDFObjGen.hh>
27 #include <qpdf/QPDFXRefEntry.hh> 28 #include <qpdf/QPDFXRefEntry.hh>
28 29
29 #include <qpdf/Pl_Buffer.hh> 30 #include <qpdf/Pl_Buffer.hh>
@@ -289,7 +290,7 @@ class QPDFWriter @@ -289,7 +290,7 @@ class QPDFWriter
289 void writeStringQDF(std::string const& str); 290 void writeStringQDF(std::string const& str);
290 void writeStringNoQDF(std::string const& str); 291 void writeStringNoQDF(std::string const& str);
291 void writePad(int nspaces); 292 void writePad(int nspaces);
292 - void assignCompressedObjectNumbers(int objid); 293 + void assignCompressedObjectNumbers(QPDFObjGen const& og);
293 void enqueueObject(QPDFObjectHandle object); 294 void enqueueObject(QPDFObjectHandle object);
294 void writeObjectStreamOffsets( 295 void writeObjectStreamOffsets(
295 std::vector<qpdf_offset_t>& offsets, int first_obj); 296 std::vector<qpdf_offset_t>& offsets, int first_obj);
@@ -380,6 +381,9 @@ class QPDFWriter @@ -380,6 +381,9 @@ class QPDFWriter
380 void pushEncryptionFilter(); 381 void pushEncryptionFilter();
381 void pushDiscardFilter(); 382 void pushDiscardFilter();
382 383
  384 + void discardGeneration(std::map<QPDFObjGen, int> const& in,
  385 + std::map<int, int>& out);
  386 +
383 QPDF& pdf; 387 QPDF& pdf;
384 char const* filename; 388 char const* filename;
385 FILE* file; 389 FILE* file;
@@ -419,7 +423,7 @@ class QPDFWriter @@ -419,7 +423,7 @@ class QPDFWriter
419 std::list<PointerHolder<Pipeline> > to_delete; 423 std::list<PointerHolder<Pipeline> > to_delete;
420 Pl_Count* pipeline; 424 Pl_Count* pipeline;
421 std::list<QPDFObjectHandle> object_queue; 425 std::list<QPDFObjectHandle> object_queue;
422 - std::map<int, int> obj_renumber; 426 + std::map<QPDFObjGen, int> obj_renumber;
423 std::map<int, QPDFXRefEntry> xref; 427 std::map<int, QPDFXRefEntry> xref;
424 std::map<int, qpdf_offset_t> lengths; 428 std::map<int, qpdf_offset_t> lengths;
425 int next_objid; 429 int next_objid;
@@ -427,12 +431,16 @@ class QPDFWriter @@ -427,12 +431,16 @@ class QPDFWriter
427 size_t cur_stream_length; 431 size_t cur_stream_length;
428 bool added_newline; 432 bool added_newline;
429 int max_ostream_index; 433 int max_ostream_index;
430 - std::set<int> normalized_streams;  
431 - std::map<int, int> page_object_to_seq;  
432 - std::map<int, int> contents_to_page_seq;  
433 - std::map<int, int> object_to_object_stream;  
434 - std::map<int, std::set<int> > object_stream_to_objects; 434 + std::set<QPDFObjGen> normalized_streams;
  435 + std::map<QPDFObjGen, int> page_object_to_seq;
  436 + std::map<QPDFObjGen, int> contents_to_page_seq;
  437 + std::map<QPDFObjGen, int> object_to_object_stream;
  438 + std::map<int, std::set<QPDFObjGen> > object_stream_to_objects;
435 std::list<Pipeline*> pipeline_stack; 439 std::list<Pipeline*> pipeline_stack;
  440 +
  441 + // For linearization only
  442 + std::map<int, int> obj_renumber_no_gen;
  443 + std::map<int, int> object_to_object_stream_no_gen;
436 }; 444 };
437 445
438 #endif // __QPDFWRITER_HH__ 446 #endif // __QPDFWRITER_HH__
libqpdf/QPDF.cc
@@ -1944,55 +1944,68 @@ QPDF::getObjectStreamData(std::map&lt;int, int&gt;&amp; omap) @@ -1944,55 +1944,68 @@ QPDF::getObjectStreamData(std::map&lt;int, int&gt;&amp; omap)
1944 std::vector<int> 1944 std::vector<int>
1945 QPDF::getCompressibleObjects() 1945 QPDF::getCompressibleObjects()
1946 { 1946 {
1947 - // Return a set of object numbers of objects that are allowed to  
1948 - // be in object streams. We disregard generation numbers here  
1949 - // since this is a helper function for QPDFWriter which is going  
1950 - // to renumber objects anyway. This code will do weird things if  
1951 - // we have two objects with the same object number and different  
1952 - // generations, but so do virtually all PDF consumers,  
1953 - // particularly since this is not a permitted condition.  
1954 -  
1955 - // We walk through the objects by traversing the document from the  
1956 - // root, including a traversal of the pages tree. This makes that  
1957 - // objects that are on the same page are more likely to be in the  
1958 - // same object stream, which is slightly more efficient, 1947 + std::vector<QPDFObjGen> objects = getCompressibleObjGens();
  1948 + std::vector<int> result;
  1949 + for (std::vector<QPDFObjGen>::iterator iter = objects.begin();
  1950 + iter != objects.end(); ++iter)
  1951 + {
  1952 + if ((*iter).getGen() != 0)
  1953 + {
  1954 + throw std::logic_error(
  1955 + "QPDF::getCompressibleObjects() would return an object ID"
  1956 + " for an object with generation != 0. Use"
  1957 + " QPDF::getCompressibleObjGens() instead."
  1958 + " See comments in QPDF.hh.");
  1959 + }
  1960 + else
  1961 + {
  1962 + result.push_back((*iter).getObj());
  1963 + }
  1964 + }
  1965 + return result;
  1966 +}
  1967 +
  1968 +std::vector<QPDFObjGen>
  1969 +QPDF::getCompressibleObjGens()
  1970 +{
  1971 + // Return a list of objects that are allowed to be in object
  1972 + // streams. Walk through the objects by traversing the document
  1973 + // from the root, including a traversal of the pages tree. This
  1974 + // makes that objects that are on the same page are more likely to
  1975 + // be in the same object stream, which is slightly more efficient,
1959 // particularly with linearized files. This is better than 1976 // particularly with linearized files. This is better than
1960 // iterating through the xref table since it avoids preserving 1977 // iterating through the xref table since it avoids preserving
1961 // orphaned items. 1978 // orphaned items.
1962 1979
1963 // Exclude encryption dictionary, if any 1980 // Exclude encryption dictionary, if any
1964 - int encryption_dict_id = 0;  
1965 QPDFObjectHandle encryption_dict = trailer.getKey("/Encrypt"); 1981 QPDFObjectHandle encryption_dict = trailer.getKey("/Encrypt");
1966 - if (encryption_dict.isIndirect())  
1967 - {  
1968 - encryption_dict_id = encryption_dict.getObjectID();  
1969 - } 1982 + QPDFObjGen encryption_dict_og = encryption_dict.getObjGen();
1970 1983
1971 - std::set<int> visited; 1984 + std::set<QPDFObjGen> visited;
1972 std::list<QPDFObjectHandle> queue; 1985 std::list<QPDFObjectHandle> queue;
1973 queue.push_front(this->trailer); 1986 queue.push_front(this->trailer);
1974 - std::vector<int> result; 1987 + std::vector<QPDFObjGen> result;
1975 while (! queue.empty()) 1988 while (! queue.empty())
1976 { 1989 {
1977 QPDFObjectHandle obj = queue.front(); 1990 QPDFObjectHandle obj = queue.front();
1978 queue.pop_front(); 1991 queue.pop_front();
1979 if (obj.isIndirect()) 1992 if (obj.isIndirect())
1980 { 1993 {
1981 - int objid = obj.getObjectID();  
1982 - if (visited.count(objid)) 1994 + QPDFObjGen og = obj.getObjGen();
  1995 + if (visited.count(og))
1983 { 1996 {
1984 QTC::TC("qpdf", "QPDF loop detected traversing objects"); 1997 QTC::TC("qpdf", "QPDF loop detected traversing objects");
1985 continue; 1998 continue;
1986 } 1999 }
1987 - if (objid == encryption_dict_id) 2000 + if (og == encryption_dict_og)
1988 { 2001 {
1989 QTC::TC("qpdf", "QPDF exclude encryption dictionary"); 2002 QTC::TC("qpdf", "QPDF exclude encryption dictionary");
1990 } 2003 }
1991 else if (! obj.isStream()) 2004 else if (! obj.isStream())
1992 { 2005 {
1993 - result.push_back(objid); 2006 + result.push_back(og);
1994 } 2007 }
1995 - visited.insert(objid); 2008 + visited.insert(og);
1996 } 2009 }
1997 if (obj.isStream()) 2010 if (obj.isStream())
1998 { 2011 {
libqpdf/QPDFWriter.cc
@@ -933,16 +933,19 @@ QPDFWriter::closeObject(int objid) @@ -933,16 +933,19 @@ QPDFWriter::closeObject(int objid)
933 } 933 }
934 934
935 void 935 void
936 -QPDFWriter::assignCompressedObjectNumbers(int objid) 936 +QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen const& og)
937 { 937 {
938 - if (this->object_stream_to_objects.count(objid) == 0) 938 + int objid = og.getObj();
  939 + if ((og.getGen() != 0) ||
  940 + (this->object_stream_to_objects.count(objid) == 0))
939 { 941 {
  942 + // This is not an object stream.
940 return; 943 return;
941 } 944 }
942 945
943 // Reserve numbers for the objects that belong to this object 946 // Reserve numbers for the objects that belong to this object
944 // stream. 947 // stream.
945 - for (std::set<int>::iterator iter = 948 + for (std::set<QPDFObjGen>::iterator iter =
946 this->object_stream_to_objects[objid].begin(); 949 this->object_stream_to_objects[objid].begin();
947 iter != this->object_stream_to_objects[objid].end(); 950 iter != this->object_stream_to_objects[objid].end();
948 ++iter) 951 ++iter)
@@ -969,30 +972,32 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object) @@ -969,30 +972,32 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object)
969 { 972 {
970 // This is a place-holder object for an object stream 973 // This is a place-holder object for an object stream
971 } 974 }
972 - int objid = object.getObjectID(); 975 + QPDFObjGen og = object.getObjGen();
973 976
974 - if (obj_renumber.count(objid) == 0) 977 + if (obj_renumber.count(og) == 0)
975 { 978 {
976 - if (this->object_to_object_stream.count(objid)) 979 + if (this->object_to_object_stream.count(og))
977 { 980 {
978 // This is in an object stream. Don't process it 981 // This is in an object stream. Don't process it
979 - // here. Instead, enqueue the object stream.  
980 - int stream_id = this->object_to_object_stream[objid]; 982 + // here. Instead, enqueue the object stream. Object
  983 + // streams always have generation 0.
  984 + int stream_id = this->object_to_object_stream[og];
981 enqueueObject(this->pdf.getObjectByID(stream_id, 0)); 985 enqueueObject(this->pdf.getObjectByID(stream_id, 0));
982 } 986 }
983 else 987 else
984 { 988 {
985 object_queue.push_back(object); 989 object_queue.push_back(object);
986 - obj_renumber[objid] = next_objid++; 990 + obj_renumber[og] = next_objid++;
987 991
988 - if (this->object_stream_to_objects.count(objid)) 992 + if ((og.getGen() == 0) &&
  993 + this->object_stream_to_objects.count(og.getObj()))
989 { 994 {
990 // For linearized files, uncompressed objects go 995 // For linearized files, uncompressed objects go
991 // at end, and we take care of assigning numbers 996 // at end, and we take care of assigning numbers
992 // to them elsewhere. 997 // to them elsewhere.
993 if (! this->linearized) 998 if (! this->linearized)
994 { 999 {
995 - assignCompressedObjectNumbers(objid); 1000 + assignCompressedObjectNumbers(og);
996 } 1001 }
997 } 1002 }
998 else if ((! this->direct_stream_lengths) && object.isStream()) 1003 else if ((! this->direct_stream_lengths) && object.isStream())
@@ -1041,8 +1046,8 @@ QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags) @@ -1041,8 +1046,8 @@ QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags)
1041 } 1046 }
1042 if (child.isIndirect()) 1047 if (child.isIndirect())
1043 { 1048 {
1044 - int old_id = child.getObjectID();  
1045 - int new_id = obj_renumber[old_id]; 1049 + QPDFObjGen old_og = child.getObjGen();
  1050 + int new_id = obj_renumber[old_og];
1046 writeString(QUtil::int_to_string(new_id)); 1051 writeString(QUtil::int_to_string(new_id));
1047 writeString(" 0 R"); 1052 writeString(" 0 R");
1048 } 1053 }
@@ -1134,7 +1139,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, @@ -1134,7 +1139,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
1134 unsigned int flags, size_t stream_length, 1139 unsigned int flags, size_t stream_length,
1135 bool compress) 1140 bool compress)
1136 { 1141 {
1137 - int old_id = object.getObjectID(); 1142 + QPDFObjGen old_og = object.getObjGen();
1138 unsigned int child_flags = flags & ~f_stream; 1143 unsigned int child_flags = flags & ~f_stream;
1139 1144
1140 std::string indent; 1145 std::string indent;
@@ -1201,7 +1206,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, @@ -1201,7 +1206,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
1201 bool have_extensions_adbe = false; 1206 bool have_extensions_adbe = false;
1202 1207
1203 QPDFObjectHandle extensions; 1208 QPDFObjectHandle extensions;
1204 - if (old_id == pdf.getRoot().getObjectID()) 1209 + if (old_og == pdf.getRoot().getObjGen())
1205 { 1210 {
1206 is_root = true; 1211 is_root = true;
1207 if (object.hasKey("/Extensions") && 1212 if (object.hasKey("/Extensions") &&
@@ -1396,7 +1401,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, @@ -1396,7 +1401,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
1396 else if (object.isStream()) 1401 else if (object.isStream())
1397 { 1402 {
1398 // Write stream data to a buffer. 1403 // Write stream data to a buffer.
1399 - int new_id = obj_renumber[old_id]; 1404 + int new_id = obj_renumber[old_og];
1400 if (! this->direct_stream_lengths) 1405 if (! this->direct_stream_lengths)
1401 { 1406 {
1402 this->cur_stream_length_id = new_id + 1; 1407 this->cur_stream_length_id = new_id + 1;
@@ -1436,7 +1441,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, @@ -1436,7 +1441,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
1436 filter = true; 1441 filter = true;
1437 compress = false; 1442 compress = false;
1438 } 1443 }
1439 - else if (this->normalize_content && normalized_streams.count(old_id)) 1444 + else if (this->normalize_content && normalized_streams.count(old_og))
1440 { 1445 {
1441 normalize = true; 1446 normalize = true;
1442 filter = true; 1447 filter = true;
@@ -1562,8 +1567,10 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) @@ -1562,8 +1567,10 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1562 // Note: object might be null if this is a place-holder for an 1567 // Note: object might be null if this is a place-holder for an
1563 // object stream that we are generating from scratch. 1568 // object stream that we are generating from scratch.
1564 1569
1565 - int old_id = object.getObjectID();  
1566 - int new_id = obj_renumber[old_id]; 1570 + QPDFObjGen old_og = object.getObjGen();
  1571 + assert(old_og.getGen() == 0);
  1572 + int old_id = old_og.getObj();
  1573 + int new_id = obj_renumber[old_og];
1567 1574
1568 std::vector<qpdf_offset_t> offsets; 1575 std::vector<qpdf_offset_t> offsets;
1569 qpdf_offset_t first = 0; 1576 qpdf_offset_t first = 0;
@@ -1612,12 +1619,12 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) @@ -1612,12 +1619,12 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1612 } 1619 }
1613 1620
1614 int count = 0; 1621 int count = 0;
1615 - for (std::set<int>::iterator iter = 1622 + for (std::set<QPDFObjGen>::iterator iter =
1616 this->object_stream_to_objects[old_id].begin(); 1623 this->object_stream_to_objects[old_id].begin();
1617 iter != this->object_stream_to_objects[old_id].end(); 1624 iter != this->object_stream_to_objects[old_id].end();
1618 ++iter, ++count) 1625 ++iter, ++count)
1619 { 1626 {
1620 - int obj = *iter; 1627 + QPDFObjGen obj = *iter;
1621 int new_obj = this->obj_renumber[obj]; 1628 int new_obj = this->obj_renumber[obj];
1622 if (first_obj == -1) 1629 if (first_obj == -1)
1623 { 1630 {
@@ -1631,7 +1638,17 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) @@ -1631,7 +1638,17 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1631 if (! this->suppress_original_object_ids) 1638 if (! this->suppress_original_object_ids)
1632 { 1639 {
1633 writeString("; original object ID: " + 1640 writeString("; original object ID: " +
1634 - QUtil::int_to_string(obj)); 1641 + QUtil::int_to_string(obj.getObj()));
  1642 + // For compatibility, only write the generation if
  1643 + // non-zero. While object streams only allow
  1644 + // objects with generation 0, if we are generating
  1645 + // object streams, the old object could have a
  1646 + // non-zero generation.
  1647 + if (obj.getGen() != 0)
  1648 + {
  1649 + QTC::TC("qpdf", "QPDFWriter original obj non-zero gen");
  1650 + writeString(" " + QUtil::int_to_string(obj.getGen()));
  1651 + }
1635 } 1652 }
1636 writeString("\n"); 1653 writeString("\n");
1637 } 1654 }
@@ -1639,7 +1656,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) @@ -1639,7 +1656,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1639 { 1656 {
1640 offsets.push_back(this->pipeline->getCount()); 1657 offsets.push_back(this->pipeline->getCount());
1641 } 1658 }
1642 - writeObject(this->pdf.getObjectByID(obj, 0), count); 1659 + writeObject(this->pdf.getObjectByObjGen(obj), count);
1643 1660
1644 this->xref[new_obj] = QPDFXRefEntry(2, new_id, count); 1661 this->xref[new_obj] = QPDFXRefEntry(2, new_id, count);
1645 } 1662 }
@@ -1697,32 +1714,33 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) @@ -1697,32 +1714,33 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1697 void 1714 void
1698 QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index) 1715 QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
1699 { 1716 {
1700 - int old_id = object.getObjectID(); 1717 + QPDFObjGen old_og = object.getObjGen();
1701 1718
1702 if ((object_stream_index == -1) && 1719 if ((object_stream_index == -1) &&
1703 - (this->object_stream_to_objects.count(old_id))) 1720 + (old_og.getGen() == 0) &&
  1721 + (this->object_stream_to_objects.count(old_og.getObj())))
1704 { 1722 {
1705 writeObjectStream(object); 1723 writeObjectStream(object);
1706 return; 1724 return;
1707 } 1725 }
1708 1726
1709 - int new_id = obj_renumber[old_id]; 1727 + int new_id = obj_renumber[old_og];
1710 if (this->qdf_mode) 1728 if (this->qdf_mode)
1711 { 1729 {
1712 - if (this->page_object_to_seq.count(old_id)) 1730 + if (this->page_object_to_seq.count(old_og))
1713 { 1731 {
1714 writeString("%% Page "); 1732 writeString("%% Page ");
1715 writeString( 1733 writeString(
1716 QUtil::int_to_string( 1734 QUtil::int_to_string(
1717 - this->page_object_to_seq[old_id])); 1735 + this->page_object_to_seq[old_og]));
1718 writeString("\n"); 1736 writeString("\n");
1719 } 1737 }
1720 - if (this->contents_to_page_seq.count(old_id)) 1738 + if (this->contents_to_page_seq.count(old_og))
1721 { 1739 {
1722 writeString("%% Contents for page "); 1740 writeString("%% Contents for page ");
1723 writeString( 1741 writeString(
1724 QUtil::int_to_string( 1742 QUtil::int_to_string(
1725 - this->contents_to_page_seq[old_id])); 1743 + this->contents_to_page_seq[old_og]));
1726 writeString("\n"); 1744 writeString("\n");
1727 } 1745 }
1728 } 1746 }
@@ -1854,24 +1872,24 @@ QPDFWriter::initializeSpecialStreams() @@ -1854,24 +1872,24 @@ QPDFWriter::initializeSpecialStreams()
1854 iter != pages.end(); ++iter) 1872 iter != pages.end(); ++iter)
1855 { 1873 {
1856 QPDFObjectHandle& page = *iter; 1874 QPDFObjectHandle& page = *iter;
1857 - this->page_object_to_seq[page.getObjectID()] = ++num; 1875 + this->page_object_to_seq[page.getObjGen()] = ++num;
1858 QPDFObjectHandle contents = page.getKey("/Contents"); 1876 QPDFObjectHandle contents = page.getKey("/Contents");
1859 - std::vector<int> contents_objects; 1877 + std::vector<QPDFObjGen> contents_objects;
1860 if (contents.isArray()) 1878 if (contents.isArray())
1861 { 1879 {
1862 int n = contents.getArrayNItems(); 1880 int n = contents.getArrayNItems();
1863 for (int i = 0; i < n; ++i) 1881 for (int i = 0; i < n; ++i)
1864 { 1882 {
1865 contents_objects.push_back( 1883 contents_objects.push_back(
1866 - contents.getArrayItem(i).getObjectID()); 1884 + contents.getArrayItem(i).getObjGen());
1867 } 1885 }
1868 } 1886 }
1869 else if (contents.isStream()) 1887 else if (contents.isStream())
1870 { 1888 {
1871 - contents_objects.push_back(contents.getObjectID()); 1889 + contents_objects.push_back(contents.getObjGen());
1872 } 1890 }
1873 1891
1874 - for (std::vector<int>::iterator iter = contents_objects.begin(); 1892 + for (std::vector<QPDFObjGen>::iterator iter = contents_objects.begin();
1875 iter != contents_objects.end(); ++iter) 1893 iter != contents_objects.end(); ++iter)
1876 { 1894 {
1877 this->contents_to_page_seq[*iter] = num; 1895 this->contents_to_page_seq[*iter] = num;
@@ -1883,7 +1901,20 @@ QPDFWriter::initializeSpecialStreams() @@ -1883,7 +1901,20 @@ QPDFWriter::initializeSpecialStreams()
1883 void 1901 void
1884 QPDFWriter::preserveObjectStreams() 1902 QPDFWriter::preserveObjectStreams()
1885 { 1903 {
1886 - this->pdf.getObjectStreamData(this->object_to_object_stream); 1904 + // Our object_to_object_stream map has to map ObjGen -> ObjGen
  1905 + // since we may be generating object streams out of old objects
  1906 + // that have generation numbers greater than zero. However in an
  1907 + // existing PDF, all object stream objects and all objects in them
  1908 + // must have generation 0 because the PDF spec does not provide
  1909 + // any way to do otherwise.
  1910 + std::map<int, int> omap;
  1911 + this->pdf.getObjectStreamData(omap);
  1912 + for (std::map<int, int>::iterator iter = omap.begin();
  1913 + iter != omap.end(); ++iter)
  1914 + {
  1915 + this->object_to_object_stream[QPDFObjGen((*iter).first, 0)] =
  1916 + (*iter).second;
  1917 + }
1887 } 1918 }
1888 1919
1889 void 1920 void
@@ -1899,7 +1930,8 @@ QPDFWriter::generateObjectStreams() @@ -1899,7 +1930,8 @@ QPDFWriter::generateObjectStreams()
1899 1930
1900 // This code doesn't do anything with /Extends. 1931 // This code doesn't do anything with /Extends.
1901 1932
1902 - std::vector<int> const& eligible = this->pdf.getCompressibleObjects(); 1933 + std::vector<QPDFObjGen> const& eligible =
  1934 + this->pdf.getCompressibleObjGens();
1903 unsigned int n_object_streams = (eligible.size() + 99) / 100; 1935 unsigned int n_object_streams = (eligible.size() + 99) / 100;
1904 unsigned int n_per = eligible.size() / n_object_streams; 1936 unsigned int n_per = eligible.size() / n_object_streams;
1905 if (n_per * n_object_streams < eligible.size()) 1937 if (n_per * n_object_streams < eligible.size())
@@ -1908,7 +1940,7 @@ QPDFWriter::generateObjectStreams() @@ -1908,7 +1940,7 @@ QPDFWriter::generateObjectStreams()
1908 } 1940 }
1909 unsigned int n = 0; 1941 unsigned int n = 0;
1910 int cur_ostream = 0; 1942 int cur_ostream = 0;
1911 - for (std::vector<int>::const_iterator iter = eligible.begin(); 1943 + for (std::vector<QPDFObjGen>::const_iterator iter = eligible.begin();
1912 iter != eligible.end(); ++iter) 1944 iter != eligible.end(); ++iter)
1913 { 1945 {
1914 if ((n % n_per) == 0) 1946 if ((n % n_per) == 0)
@@ -2172,11 +2204,11 @@ QPDFWriter::write() @@ -2172,11 +2204,11 @@ QPDFWriter::write()
2172 iter != pages.end(); ++iter) 2204 iter != pages.end(); ++iter)
2173 { 2205 {
2174 QPDFObjectHandle& page = *iter; 2206 QPDFObjectHandle& page = *iter;
2175 - int objid = page.getObjectID();  
2176 - if (this->object_to_object_stream.count(objid)) 2207 + QPDFObjGen og = page.getObjGen();
  2208 + if (this->object_to_object_stream.count(og))
2177 { 2209 {
2178 QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary"); 2210 QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
2179 - this->object_to_object_stream.erase(objid); 2211 + this->object_to_object_stream.erase(og);
2180 } 2212 }
2181 } 2213 }
2182 } 2214 }
@@ -2188,20 +2220,20 @@ QPDFWriter::write() @@ -2188,20 +2220,20 @@ QPDFWriter::write()
2188 // 8.0.0 has a bug that prevents it from being able to handle 2220 // 8.0.0 has a bug that prevents it from being able to handle
2189 // encrypted files with compressed document catalogs, so we 2221 // encrypted files with compressed document catalogs, so we
2190 // disable them in that case as well. 2222 // disable them in that case as well.
2191 - int objid = pdf.getRoot().getObjectID();  
2192 - if (this->object_to_object_stream.count(objid)) 2223 + QPDFObjGen og = pdf.getRoot().getObjGen();
  2224 + if (this->object_to_object_stream.count(og))
2193 { 2225 {
2194 QTC::TC("qpdf", "QPDFWriter uncompressing root"); 2226 QTC::TC("qpdf", "QPDFWriter uncompressing root");
2195 - this->object_to_object_stream.erase(objid); 2227 + this->object_to_object_stream.erase(og);
2196 } 2228 }
2197 } 2229 }
2198 2230
2199 // Generate reverse mapping from object stream to objects 2231 // Generate reverse mapping from object stream to objects
2200 - for (std::map<int, int>::iterator iter = 2232 + for (std::map<QPDFObjGen, int>::iterator iter =
2201 this->object_to_object_stream.begin(); 2233 this->object_to_object_stream.begin();
2202 iter != this->object_to_object_stream.end(); ++iter) 2234 iter != this->object_to_object_stream.end(); ++iter)
2203 { 2235 {
2204 - int obj = (*iter).first; 2236 + QPDFObjGen obj = (*iter).first;
2205 int stream = (*iter).second; 2237 int stream = (*iter).second;
2206 this->object_stream_to_objects[stream].insert(obj); 2238 this->object_stream_to_objects[stream].insert(obj);
2207 this->max_ostream_index = 2239 this->max_ostream_index =
@@ -2303,7 +2335,8 @@ QPDFWriter::writeHintStream(int hint_id) @@ -2303,7 +2335,8 @@ QPDFWriter::writeHintStream(int hint_id)
2303 int S = 0; 2335 int S = 0;
2304 int O = 0; 2336 int O = 0;
2305 pdf.generateHintStream( 2337 pdf.generateHintStream(
2306 - this->xref, this->lengths, this->obj_renumber, hint_buffer, S, O); 2338 + this->xref, this->lengths, this->obj_renumber_no_gen,
  2339 + hint_buffer, S, O);
2307 2340
2308 openObject(hint_id); 2341 openObject(hint_id);
2309 setDataKey(hint_id); 2342 setDataKey(hint_id);
@@ -2522,19 +2555,57 @@ QPDFWriter::calculateXrefStreamPadding(int xref_bytes) @@ -2522,19 +2555,57 @@ QPDFWriter::calculateXrefStreamPadding(int xref_bytes)
2522 } 2555 }
2523 2556
2524 void 2557 void
  2558 +QPDFWriter::discardGeneration(std::map<QPDFObjGen, int> const& in,
  2559 + std::map<int, int>& out)
  2560 +{
  2561 + // There are deep assumptions in the linearization code in QPDF
  2562 + // that there is only one object with each object number; i.e.,
  2563 + // you can't have two objects with the same object number and
  2564 + // different generations. This is a pretty safe assumption
  2565 + // because Adobe Reader and Acrobat can't actually handle this
  2566 + // case. There is not much if any code in QPDF outside
  2567 + // linearization that assumes this, but the linearization code as
  2568 + // currently implemented would do weird things if we found such a
  2569 + // case. In order to avoid breaking ABI changes in QPDF, we will
  2570 + // first assert that this condition holds. Then we can create new
  2571 + // maps for QPDF that throw away generation numbers.
  2572 +
  2573 + out.clear();
  2574 + for (std::map<QPDFObjGen, int>::const_iterator iter = in.begin();
  2575 + iter != in.end(); ++iter)
  2576 + {
  2577 + if (out.count((*iter).first.getObj()))
  2578 + {
  2579 + throw std::logic_error(
  2580 + "QPDF cannot currently linearize files that contain"
  2581 + " multiple objects with the same object ID and different"
  2582 + " generations. If you see this error message, please file"
  2583 + " a bug report and attach the file if possible. As a"
  2584 + " workaround, first convert the file with qpdf without"
  2585 + " linearizing, and then linearize the result of that"
  2586 + " conversion.");
  2587 + }
  2588 + out[(*iter).first.getObj()] = (*iter).second;
  2589 + }
  2590 +}
  2591 +
  2592 +void
2525 QPDFWriter::writeLinearized() 2593 QPDFWriter::writeLinearized()
2526 { 2594 {
2527 // Optimize file and enqueue objects in order 2595 // Optimize file and enqueue objects in order
2528 2596
  2597 + discardGeneration(this->object_to_object_stream,
  2598 + this->object_to_object_stream_no_gen);
  2599 +
2529 bool need_xref_stream = (! this->object_to_object_stream.empty()); 2600 bool need_xref_stream = (! this->object_to_object_stream.empty());
2530 - pdf.optimize(this->object_to_object_stream); 2601 + pdf.optimize(this->object_to_object_stream_no_gen);
2531 2602
2532 std::vector<QPDFObjectHandle> part4; 2603 std::vector<QPDFObjectHandle> part4;
2533 std::vector<QPDFObjectHandle> part6; 2604 std::vector<QPDFObjectHandle> part6;
2534 std::vector<QPDFObjectHandle> part7; 2605 std::vector<QPDFObjectHandle> part7;
2535 std::vector<QPDFObjectHandle> part8; 2606 std::vector<QPDFObjectHandle> part8;
2536 std::vector<QPDFObjectHandle> part9; 2607 std::vector<QPDFObjectHandle> part9;
2537 - pdf.getLinearizedParts(this->object_to_object_stream, 2608 + pdf.getLinearizedParts(this->object_to_object_stream_no_gen,
2538 part4, part6, part7, part8, part9); 2609 part4, part6, part7, part8, part9);
2539 2610
2540 // Object number sequence: 2611 // Object number sequence:
@@ -2570,7 +2641,7 @@ QPDFWriter::writeLinearized() @@ -2570,7 +2641,7 @@ QPDFWriter::writeLinearized()
2570 for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs2[i]).begin(); 2641 for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs2[i]).begin();
2571 iter != (*vecs2[i]).end(); ++iter) 2642 iter != (*vecs2[i]).end(); ++iter)
2572 { 2643 {
2573 - assignCompressedObjectNumbers((*iter).getObjectID()); 2644 + assignCompressedObjectNumbers((*iter).getObjGen());
2574 } 2645 }
2575 } 2646 }
2576 int second_half_end = this->next_objid - 1; 2647 int second_half_end = this->next_objid - 1;
@@ -2602,7 +2673,7 @@ QPDFWriter::writeLinearized() @@ -2602,7 +2673,7 @@ QPDFWriter::writeLinearized()
2602 for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs1[i]).begin(); 2673 for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs1[i]).begin();
2603 iter != (*vecs1[i]).end(); ++iter) 2674 iter != (*vecs1[i]).end(); ++iter)
2604 { 2675 {
2605 - assignCompressedObjectNumbers((*iter).getObjectID()); 2676 + assignCompressedObjectNumbers((*iter).getObjGen());
2606 } 2677 }
2607 } 2678 }
2608 int first_half_end = this->next_objid - 1; 2679 int first_half_end = this->next_objid - 1;
@@ -2660,7 +2731,7 @@ QPDFWriter::writeLinearized() @@ -2660,7 +2731,7 @@ QPDFWriter::writeLinearized()
2660 if (pass == 2) 2731 if (pass == 2)
2661 { 2732 {
2662 std::vector<QPDFObjectHandle> const& pages = pdf.getAllPages(); 2733 std::vector<QPDFObjectHandle> const& pages = pdf.getAllPages();
2663 - int first_page_object = obj_renumber[pages[0].getObjectID()]; 2734 + int first_page_object = obj_renumber[pages[0].getObjGen()];
2664 int npages = pages.size(); 2735 int npages = pages.size();
2665 2736
2666 writeString(" /Linearized 1 /L "); 2737 writeString(" /Linearized 1 /L ");
@@ -2834,6 +2905,8 @@ QPDFWriter::writeLinearized() @@ -2834,6 +2905,8 @@ QPDFWriter::writeLinearized()
2834 writeString(QUtil::int_to_string(first_xref_offset)); 2905 writeString(QUtil::int_to_string(first_xref_offset));
2835 writeString("\n%%EOF\n"); 2906 writeString("\n%%EOF\n");
2836 2907
  2908 + discardGeneration(this->obj_renumber, this->obj_renumber_no_gen);
  2909 +
2837 if (pass == 1) 2910 if (pass == 1)
2838 { 2911 {
2839 // Close first pass pipeline 2912 // Close first pass pipeline
qpdf/qpdf.testcov
@@ -262,3 +262,4 @@ qpdf-c called qpdf_set_r6_encryption_parameters 0 @@ -262,3 +262,4 @@ qpdf-c called qpdf_set_r6_encryption_parameters 0
262 QPDFObjectHandle EOF in inline image 0 262 QPDFObjectHandle EOF in inline image 0
263 QPDFObjectHandle inline image token 0 263 QPDFObjectHandle inline image token 0
264 QPDF not caching overridden objstm object 0 264 QPDF not caching overridden objstm object 0
  265 +QPDFWriter original obj non-zero gen 0
qpdf/qtest/qpdf.test
@@ -199,7 +199,7 @@ $td-&gt;runtest(&quot;remove page we don&#39;t have&quot;, @@ -199,7 +199,7 @@ $td-&gt;runtest(&quot;remove page we don&#39;t have&quot;,
199 show_ntests(); 199 show_ntests();
200 # ---------- 200 # ----------
201 $td->notify("--- Miscellaneous Tests ---"); 201 $td->notify("--- Miscellaneous Tests ---");
202 -$n_tests += 62; 202 +$n_tests += 64;
203 203
204 $td->runtest("qpdf version", 204 $td->runtest("qpdf version",
205 {$td->COMMAND => "qpdf --version"}, 205 {$td->COMMAND => "qpdf --version"},
@@ -501,6 +501,14 @@ $td-&gt;runtest(&quot;overridden compressed objects&quot;, @@ -501,6 +501,14 @@ $td-&gt;runtest(&quot;overridden compressed objects&quot;,
501 $td->EXIT_STATUS => 0}, 501 $td->EXIT_STATUS => 0},
502 $td->NORMALIZE_NEWLINES); 502 $td->NORMALIZE_NEWLINES);
503 503
  504 +$td->runtest("generate object streams for gen > 0",
  505 + {$td->COMMAND => "qpdf --qdf --static-id" .
  506 + " --object-streams=generate gen1.pdf a.pdf"},
  507 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  508 +$td->runtest("check file",
  509 + {$td->FILE => "a.pdf"},
  510 + {$td->FILE => "gen1.qdf"});
  511 +
504 show_ntests(); 512 show_ntests();
505 # ---------- 513 # ----------
506 $td->notify("--- Numeric range parsing tests ---"); 514 $td->notify("--- Numeric range parsing tests ---");
@@ -1183,6 +1191,7 @@ my @to_linearize = @@ -1183,6 +1191,7 @@ my @to_linearize =
1183 'lin-delete-and-reuse', # linearized, then delete and reuse 1191 'lin-delete-and-reuse', # linearized, then delete and reuse
1184 'object-stream', # contains object streams 1192 'object-stream', # contains object streams
1185 'hybrid-xref', # contains both xref tables and streams 1193 'hybrid-xref', # contains both xref tables and streams
  1194 + 'gen1', # has objects with generation > 0
1186 @linearized_files, # we should be able to relinearize 1195 @linearized_files, # we should be able to relinearize
1187 ); 1196 );
1188 1197
qpdf/qtest/qpdf/gen1.pdf 0 → 100644
  1 +%PDF-1.3
  2 +1 1 obj
  3 +<<
  4 + /Type /Catalog
  5 + /Pages 2 1 R
  6 +>>
  7 +endobj
  8 +
  9 +2 1 obj
  10 +<<
  11 + /Type /Pages
  12 + /Kids [
  13 + 3 1 R
  14 + ]
  15 + /Count 1
  16 +>>
  17 +endobj
  18 +
  19 +3 1 obj
  20 +<<
  21 + /Type /Page
  22 + /Parent 2 1 R
  23 + /MediaBox [0 0 612 792]
  24 + /Contents 4 1 R
  25 + /Resources <<
  26 + /ProcSet 5 1 R
  27 + /Font <<
  28 + /F1 6 1 R
  29 + >>
  30 + >>
  31 +>>
  32 +endobj
  33 +
  34 +4 1 obj
  35 +<<
  36 + /Length 44
  37 +>>
  38 +stream
  39 +BT
  40 + /F1 24 Tf
  41 + 72 720 Td
  42 + (Potato) Tj
  43 +ET
  44 +endstream
  45 +endobj
  46 +
  47 +5 1 obj
  48 +[
  49 + /PDF
  50 + /Text
  51 +]
  52 +endobj
  53 +
  54 +6 1 obj
  55 +<<
  56 + /Type /Font
  57 + /Subtype /Type1
  58 + /Name /F1
  59 + /BaseFont /Helvetica
  60 + /Encoding /WinAnsiEncoding
  61 +>>
  62 +endobj
  63 +
  64 +xref
  65 +0 7
  66 +0000000000 65535 f
  67 +0000000009 00001 n
  68 +0000000063 00001 n
  69 +0000000135 00001 n
  70 +0000000307 00001 n
  71 +0000000403 00001 n
  72 +0000000438 00001 n
  73 +trailer <<
  74 + /Size 7
  75 + /Root 1 1 R
  76 +>>
  77 +startxref
  78 +556
  79 +%%EOF
qpdf/qtest/qpdf/gen1.qdf 0 → 100644
No preview for this file type