Commit a3576a73593987b26cd3eff346f8f7c11f713cbd

Authored by Jay Berkenbilt
1 parent 96eb9651

Bug fix: handle generation > 0 when generating object streams

Rework QPDFWriter to always track old object IDs and QPDFObjGen
instead of int, thus not discarding the generation number.  Switch to
QPDF::getCompressibleObjGen() to properly handle the case of an old
object eligible for compression that has a generation of other than
zero.
ChangeLog
1 1 2013-06-14 Jay Berkenbilt <ejb@ql.org>
2 2  
  3 + * Bug fix: properly handle object stream generation when the
  4 + original file has some compressible objects with generation != 0.
  5 +
  6 + * Add QPDF::getCompressibleObjGens() and deprecate
  7 + QPDF::getCompressibleObjects(), which had a flaw in its logic.
  8 +
3 9 * Add new QPDFObjectHandle::getObjGen() method and indiciate in
4 10 comments that its use is favored over getObjectID() and
5 11 getGeneration() for most cases.
... ...
include/qpdf/QPDF.hh
... ... @@ -434,8 +434,19 @@ class QPDF
434 434 // Map object to object stream that contains it
435 435 QPDF_DLL
436 436 void getObjectStreamData(std::map<int, int>&);
  437 +
437 438 // Get a list of objects that would be permitted in an object
438   - // stream
  439 + // stream.
  440 + QPDF_DLL
  441 + std::vector<QPDFObjGen> getCompressibleObjGens();
  442 +
  443 + // Deprecated: get a list of objects that would be permitted in an
  444 + // object stream. This method is deprecated and will be removed.
  445 + // It's incorrect because it disregards the generations of the
  446 + // compressible objects, which can lead (and has lead) to bugs.
  447 + // This method will throw an exception if any of the objects
  448 + // returned have a generation of other than zero. Use
  449 + // getCompressibleObjGens() instead.
439 450 QPDF_DLL
440 451 std::vector<int> getCompressibleObjects();
441 452  
... ...
include/qpdf/QPDFWriter.hh
... ... @@ -24,6 +24,7 @@
24 24  
25 25 #include <qpdf/Constants.h>
26 26  
  27 +#include <qpdf/QPDFObjGen.hh>
27 28 #include <qpdf/QPDFXRefEntry.hh>
28 29  
29 30 #include <qpdf/Pl_Buffer.hh>
... ... @@ -289,7 +290,7 @@ class QPDFWriter
289 290 void writeStringQDF(std::string const& str);
290 291 void writeStringNoQDF(std::string const& str);
291 292 void writePad(int nspaces);
292   - void assignCompressedObjectNumbers(int objid);
  293 + void assignCompressedObjectNumbers(QPDFObjGen const& og);
293 294 void enqueueObject(QPDFObjectHandle object);
294 295 void writeObjectStreamOffsets(
295 296 std::vector<qpdf_offset_t>& offsets, int first_obj);
... ... @@ -380,6 +381,9 @@ class QPDFWriter
380 381 void pushEncryptionFilter();
381 382 void pushDiscardFilter();
382 383  
  384 + void discardGeneration(std::map<QPDFObjGen, int> const& in,
  385 + std::map<int, int>& out);
  386 +
383 387 QPDF& pdf;
384 388 char const* filename;
385 389 FILE* file;
... ... @@ -419,7 +423,7 @@ class QPDFWriter
419 423 std::list<PointerHolder<Pipeline> > to_delete;
420 424 Pl_Count* pipeline;
421 425 std::list<QPDFObjectHandle> object_queue;
422   - std::map<int, int> obj_renumber;
  426 + std::map<QPDFObjGen, int> obj_renumber;
423 427 std::map<int, QPDFXRefEntry> xref;
424 428 std::map<int, qpdf_offset_t> lengths;
425 429 int next_objid;
... ... @@ -427,12 +431,16 @@ class QPDFWriter
427 431 size_t cur_stream_length;
428 432 bool added_newline;
429 433 int max_ostream_index;
430   - std::set<int> normalized_streams;
431   - std::map<int, int> page_object_to_seq;
432   - std::map<int, int> contents_to_page_seq;
433   - std::map<int, int> object_to_object_stream;
434   - std::map<int, std::set<int> > object_stream_to_objects;
  434 + std::set<QPDFObjGen> normalized_streams;
  435 + std::map<QPDFObjGen, int> page_object_to_seq;
  436 + std::map<QPDFObjGen, int> contents_to_page_seq;
  437 + std::map<QPDFObjGen, int> object_to_object_stream;
  438 + std::map<int, std::set<QPDFObjGen> > object_stream_to_objects;
435 439 std::list<Pipeline*> pipeline_stack;
  440 +
  441 + // For linearization only
  442 + std::map<int, int> obj_renumber_no_gen;
  443 + std::map<int, int> object_to_object_stream_no_gen;
436 444 };
437 445  
438 446 #endif // __QPDFWRITER_HH__
... ...
libqpdf/QPDF.cc
... ... @@ -1944,55 +1944,68 @@ QPDF::getObjectStreamData(std::map&lt;int, int&gt;&amp; omap)
1944 1944 std::vector<int>
1945 1945 QPDF::getCompressibleObjects()
1946 1946 {
1947   - // Return a set of object numbers of objects that are allowed to
1948   - // be in object streams. We disregard generation numbers here
1949   - // since this is a helper function for QPDFWriter which is going
1950   - // to renumber objects anyway. This code will do weird things if
1951   - // we have two objects with the same object number and different
1952   - // generations, but so do virtually all PDF consumers,
1953   - // particularly since this is not a permitted condition.
1954   -
1955   - // We walk through the objects by traversing the document from the
1956   - // root, including a traversal of the pages tree. This makes that
1957   - // objects that are on the same page are more likely to be in the
1958   - // same object stream, which is slightly more efficient,
  1947 + std::vector<QPDFObjGen> objects = getCompressibleObjGens();
  1948 + std::vector<int> result;
  1949 + for (std::vector<QPDFObjGen>::iterator iter = objects.begin();
  1950 + iter != objects.end(); ++iter)
  1951 + {
  1952 + if ((*iter).getGen() != 0)
  1953 + {
  1954 + throw std::logic_error(
  1955 + "QPDF::getCompressibleObjects() would return an object ID"
  1956 + " for an object with generation != 0. Use"
  1957 + " QPDF::getCompressibleObjGens() instead."
  1958 + " See comments in QPDF.hh.");
  1959 + }
  1960 + else
  1961 + {
  1962 + result.push_back((*iter).getObj());
  1963 + }
  1964 + }
  1965 + return result;
  1966 +}
  1967 +
  1968 +std::vector<QPDFObjGen>
  1969 +QPDF::getCompressibleObjGens()
  1970 +{
  1971 + // Return a list of objects that are allowed to be in object
  1972 + // streams. Walk through the objects by traversing the document
  1973 + // from the root, including a traversal of the pages tree. This
  1974 + // makes that objects that are on the same page are more likely to
  1975 + // be in the same object stream, which is slightly more efficient,
1959 1976 // particularly with linearized files. This is better than
1960 1977 // iterating through the xref table since it avoids preserving
1961 1978 // orphaned items.
1962 1979  
1963 1980 // Exclude encryption dictionary, if any
1964   - int encryption_dict_id = 0;
1965 1981 QPDFObjectHandle encryption_dict = trailer.getKey("/Encrypt");
1966   - if (encryption_dict.isIndirect())
1967   - {
1968   - encryption_dict_id = encryption_dict.getObjectID();
1969   - }
  1982 + QPDFObjGen encryption_dict_og = encryption_dict.getObjGen();
1970 1983  
1971   - std::set<int> visited;
  1984 + std::set<QPDFObjGen> visited;
1972 1985 std::list<QPDFObjectHandle> queue;
1973 1986 queue.push_front(this->trailer);
1974   - std::vector<int> result;
  1987 + std::vector<QPDFObjGen> result;
1975 1988 while (! queue.empty())
1976 1989 {
1977 1990 QPDFObjectHandle obj = queue.front();
1978 1991 queue.pop_front();
1979 1992 if (obj.isIndirect())
1980 1993 {
1981   - int objid = obj.getObjectID();
1982   - if (visited.count(objid))
  1994 + QPDFObjGen og = obj.getObjGen();
  1995 + if (visited.count(og))
1983 1996 {
1984 1997 QTC::TC("qpdf", "QPDF loop detected traversing objects");
1985 1998 continue;
1986 1999 }
1987   - if (objid == encryption_dict_id)
  2000 + if (og == encryption_dict_og)
1988 2001 {
1989 2002 QTC::TC("qpdf", "QPDF exclude encryption dictionary");
1990 2003 }
1991 2004 else if (! obj.isStream())
1992 2005 {
1993   - result.push_back(objid);
  2006 + result.push_back(og);
1994 2007 }
1995   - visited.insert(objid);
  2008 + visited.insert(og);
1996 2009 }
1997 2010 if (obj.isStream())
1998 2011 {
... ...
libqpdf/QPDFWriter.cc
... ... @@ -933,16 +933,19 @@ QPDFWriter::closeObject(int objid)
933 933 }
934 934  
935 935 void
936   -QPDFWriter::assignCompressedObjectNumbers(int objid)
  936 +QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen const& og)
937 937 {
938   - if (this->object_stream_to_objects.count(objid) == 0)
  938 + int objid = og.getObj();
  939 + if ((og.getGen() != 0) ||
  940 + (this->object_stream_to_objects.count(objid) == 0))
939 941 {
  942 + // This is not an object stream.
940 943 return;
941 944 }
942 945  
943 946 // Reserve numbers for the objects that belong to this object
944 947 // stream.
945   - for (std::set<int>::iterator iter =
  948 + for (std::set<QPDFObjGen>::iterator iter =
946 949 this->object_stream_to_objects[objid].begin();
947 950 iter != this->object_stream_to_objects[objid].end();
948 951 ++iter)
... ... @@ -969,30 +972,32 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object)
969 972 {
970 973 // This is a place-holder object for an object stream
971 974 }
972   - int objid = object.getObjectID();
  975 + QPDFObjGen og = object.getObjGen();
973 976  
974   - if (obj_renumber.count(objid) == 0)
  977 + if (obj_renumber.count(og) == 0)
975 978 {
976   - if (this->object_to_object_stream.count(objid))
  979 + if (this->object_to_object_stream.count(og))
977 980 {
978 981 // This is in an object stream. Don't process it
979   - // here. Instead, enqueue the object stream.
980   - int stream_id = this->object_to_object_stream[objid];
  982 + // here. Instead, enqueue the object stream. Object
  983 + // streams always have generation 0.
  984 + int stream_id = this->object_to_object_stream[og];
981 985 enqueueObject(this->pdf.getObjectByID(stream_id, 0));
982 986 }
983 987 else
984 988 {
985 989 object_queue.push_back(object);
986   - obj_renumber[objid] = next_objid++;
  990 + obj_renumber[og] = next_objid++;
987 991  
988   - if (this->object_stream_to_objects.count(objid))
  992 + if ((og.getGen() == 0) &&
  993 + this->object_stream_to_objects.count(og.getObj()))
989 994 {
990 995 // For linearized files, uncompressed objects go
991 996 // at end, and we take care of assigning numbers
992 997 // to them elsewhere.
993 998 if (! this->linearized)
994 999 {
995   - assignCompressedObjectNumbers(objid);
  1000 + assignCompressedObjectNumbers(og);
996 1001 }
997 1002 }
998 1003 else if ((! this->direct_stream_lengths) && object.isStream())
... ... @@ -1041,8 +1046,8 @@ QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags)
1041 1046 }
1042 1047 if (child.isIndirect())
1043 1048 {
1044   - int old_id = child.getObjectID();
1045   - int new_id = obj_renumber[old_id];
  1049 + QPDFObjGen old_og = child.getObjGen();
  1050 + int new_id = obj_renumber[old_og];
1046 1051 writeString(QUtil::int_to_string(new_id));
1047 1052 writeString(" 0 R");
1048 1053 }
... ... @@ -1134,7 +1139,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
1134 1139 unsigned int flags, size_t stream_length,
1135 1140 bool compress)
1136 1141 {
1137   - int old_id = object.getObjectID();
  1142 + QPDFObjGen old_og = object.getObjGen();
1138 1143 unsigned int child_flags = flags & ~f_stream;
1139 1144  
1140 1145 std::string indent;
... ... @@ -1201,7 +1206,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
1201 1206 bool have_extensions_adbe = false;
1202 1207  
1203 1208 QPDFObjectHandle extensions;
1204   - if (old_id == pdf.getRoot().getObjectID())
  1209 + if (old_og == pdf.getRoot().getObjGen())
1205 1210 {
1206 1211 is_root = true;
1207 1212 if (object.hasKey("/Extensions") &&
... ... @@ -1396,7 +1401,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
1396 1401 else if (object.isStream())
1397 1402 {
1398 1403 // Write stream data to a buffer.
1399   - int new_id = obj_renumber[old_id];
  1404 + int new_id = obj_renumber[old_og];
1400 1405 if (! this->direct_stream_lengths)
1401 1406 {
1402 1407 this->cur_stream_length_id = new_id + 1;
... ... @@ -1436,7 +1441,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
1436 1441 filter = true;
1437 1442 compress = false;
1438 1443 }
1439   - else if (this->normalize_content && normalized_streams.count(old_id))
  1444 + else if (this->normalize_content && normalized_streams.count(old_og))
1440 1445 {
1441 1446 normalize = true;
1442 1447 filter = true;
... ... @@ -1562,8 +1567,10 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1562 1567 // Note: object might be null if this is a place-holder for an
1563 1568 // object stream that we are generating from scratch.
1564 1569  
1565   - int old_id = object.getObjectID();
1566   - int new_id = obj_renumber[old_id];
  1570 + QPDFObjGen old_og = object.getObjGen();
  1571 + assert(old_og.getGen() == 0);
  1572 + int old_id = old_og.getObj();
  1573 + int new_id = obj_renumber[old_og];
1567 1574  
1568 1575 std::vector<qpdf_offset_t> offsets;
1569 1576 qpdf_offset_t first = 0;
... ... @@ -1612,12 +1619,12 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1612 1619 }
1613 1620  
1614 1621 int count = 0;
1615   - for (std::set<int>::iterator iter =
  1622 + for (std::set<QPDFObjGen>::iterator iter =
1616 1623 this->object_stream_to_objects[old_id].begin();
1617 1624 iter != this->object_stream_to_objects[old_id].end();
1618 1625 ++iter, ++count)
1619 1626 {
1620   - int obj = *iter;
  1627 + QPDFObjGen obj = *iter;
1621 1628 int new_obj = this->obj_renumber[obj];
1622 1629 if (first_obj == -1)
1623 1630 {
... ... @@ -1631,7 +1638,17 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1631 1638 if (! this->suppress_original_object_ids)
1632 1639 {
1633 1640 writeString("; original object ID: " +
1634   - QUtil::int_to_string(obj));
  1641 + QUtil::int_to_string(obj.getObj()));
  1642 + // For compatibility, only write the generation if
  1643 + // non-zero. While object streams only allow
  1644 + // objects with generation 0, if we are generating
  1645 + // object streams, the old object could have a
  1646 + // non-zero generation.
  1647 + if (obj.getGen() != 0)
  1648 + {
  1649 + QTC::TC("qpdf", "QPDFWriter original obj non-zero gen");
  1650 + writeString(" " + QUtil::int_to_string(obj.getGen()));
  1651 + }
1635 1652 }
1636 1653 writeString("\n");
1637 1654 }
... ... @@ -1639,7 +1656,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1639 1656 {
1640 1657 offsets.push_back(this->pipeline->getCount());
1641 1658 }
1642   - writeObject(this->pdf.getObjectByID(obj, 0), count);
  1659 + writeObject(this->pdf.getObjectByObjGen(obj), count);
1643 1660  
1644 1661 this->xref[new_obj] = QPDFXRefEntry(2, new_id, count);
1645 1662 }
... ... @@ -1697,32 +1714,33 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1697 1714 void
1698 1715 QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
1699 1716 {
1700   - int old_id = object.getObjectID();
  1717 + QPDFObjGen old_og = object.getObjGen();
1701 1718  
1702 1719 if ((object_stream_index == -1) &&
1703   - (this->object_stream_to_objects.count(old_id)))
  1720 + (old_og.getGen() == 0) &&
  1721 + (this->object_stream_to_objects.count(old_og.getObj())))
1704 1722 {
1705 1723 writeObjectStream(object);
1706 1724 return;
1707 1725 }
1708 1726  
1709   - int new_id = obj_renumber[old_id];
  1727 + int new_id = obj_renumber[old_og];
1710 1728 if (this->qdf_mode)
1711 1729 {
1712   - if (this->page_object_to_seq.count(old_id))
  1730 + if (this->page_object_to_seq.count(old_og))
1713 1731 {
1714 1732 writeString("%% Page ");
1715 1733 writeString(
1716 1734 QUtil::int_to_string(
1717   - this->page_object_to_seq[old_id]));
  1735 + this->page_object_to_seq[old_og]));
1718 1736 writeString("\n");
1719 1737 }
1720   - if (this->contents_to_page_seq.count(old_id))
  1738 + if (this->contents_to_page_seq.count(old_og))
1721 1739 {
1722 1740 writeString("%% Contents for page ");
1723 1741 writeString(
1724 1742 QUtil::int_to_string(
1725   - this->contents_to_page_seq[old_id]));
  1743 + this->contents_to_page_seq[old_og]));
1726 1744 writeString("\n");
1727 1745 }
1728 1746 }
... ... @@ -1854,24 +1872,24 @@ QPDFWriter::initializeSpecialStreams()
1854 1872 iter != pages.end(); ++iter)
1855 1873 {
1856 1874 QPDFObjectHandle& page = *iter;
1857   - this->page_object_to_seq[page.getObjectID()] = ++num;
  1875 + this->page_object_to_seq[page.getObjGen()] = ++num;
1858 1876 QPDFObjectHandle contents = page.getKey("/Contents");
1859   - std::vector<int> contents_objects;
  1877 + std::vector<QPDFObjGen> contents_objects;
1860 1878 if (contents.isArray())
1861 1879 {
1862 1880 int n = contents.getArrayNItems();
1863 1881 for (int i = 0; i < n; ++i)
1864 1882 {
1865 1883 contents_objects.push_back(
1866   - contents.getArrayItem(i).getObjectID());
  1884 + contents.getArrayItem(i).getObjGen());
1867 1885 }
1868 1886 }
1869 1887 else if (contents.isStream())
1870 1888 {
1871   - contents_objects.push_back(contents.getObjectID());
  1889 + contents_objects.push_back(contents.getObjGen());
1872 1890 }
1873 1891  
1874   - for (std::vector<int>::iterator iter = contents_objects.begin();
  1892 + for (std::vector<QPDFObjGen>::iterator iter = contents_objects.begin();
1875 1893 iter != contents_objects.end(); ++iter)
1876 1894 {
1877 1895 this->contents_to_page_seq[*iter] = num;
... ... @@ -1883,7 +1901,20 @@ QPDFWriter::initializeSpecialStreams()
1883 1901 void
1884 1902 QPDFWriter::preserveObjectStreams()
1885 1903 {
1886   - this->pdf.getObjectStreamData(this->object_to_object_stream);
  1904 + // Our object_to_object_stream map has to map ObjGen -> ObjGen
  1905 + // since we may be generating object streams out of old objects
  1906 + // that have generation numbers greater than zero. However in an
  1907 + // existing PDF, all object stream objects and all objects in them
  1908 + // must have generation 0 because the PDF spec does not provide
  1909 + // any way to do otherwise.
  1910 + std::map<int, int> omap;
  1911 + this->pdf.getObjectStreamData(omap);
  1912 + for (std::map<int, int>::iterator iter = omap.begin();
  1913 + iter != omap.end(); ++iter)
  1914 + {
  1915 + this->object_to_object_stream[QPDFObjGen((*iter).first, 0)] =
  1916 + (*iter).second;
  1917 + }
1887 1918 }
1888 1919  
1889 1920 void
... ... @@ -1899,7 +1930,8 @@ QPDFWriter::generateObjectStreams()
1899 1930  
1900 1931 // This code doesn't do anything with /Extends.
1901 1932  
1902   - std::vector<int> const& eligible = this->pdf.getCompressibleObjects();
  1933 + std::vector<QPDFObjGen> const& eligible =
  1934 + this->pdf.getCompressibleObjGens();
1903 1935 unsigned int n_object_streams = (eligible.size() + 99) / 100;
1904 1936 unsigned int n_per = eligible.size() / n_object_streams;
1905 1937 if (n_per * n_object_streams < eligible.size())
... ... @@ -1908,7 +1940,7 @@ QPDFWriter::generateObjectStreams()
1908 1940 }
1909 1941 unsigned int n = 0;
1910 1942 int cur_ostream = 0;
1911   - for (std::vector<int>::const_iterator iter = eligible.begin();
  1943 + for (std::vector<QPDFObjGen>::const_iterator iter = eligible.begin();
1912 1944 iter != eligible.end(); ++iter)
1913 1945 {
1914 1946 if ((n % n_per) == 0)
... ... @@ -2172,11 +2204,11 @@ QPDFWriter::write()
2172 2204 iter != pages.end(); ++iter)
2173 2205 {
2174 2206 QPDFObjectHandle& page = *iter;
2175   - int objid = page.getObjectID();
2176   - if (this->object_to_object_stream.count(objid))
  2207 + QPDFObjGen og = page.getObjGen();
  2208 + if (this->object_to_object_stream.count(og))
2177 2209 {
2178 2210 QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
2179   - this->object_to_object_stream.erase(objid);
  2211 + this->object_to_object_stream.erase(og);
2180 2212 }
2181 2213 }
2182 2214 }
... ... @@ -2188,20 +2220,20 @@ QPDFWriter::write()
2188 2220 // 8.0.0 has a bug that prevents it from being able to handle
2189 2221 // encrypted files with compressed document catalogs, so we
2190 2222 // disable them in that case as well.
2191   - int objid = pdf.getRoot().getObjectID();
2192   - if (this->object_to_object_stream.count(objid))
  2223 + QPDFObjGen og = pdf.getRoot().getObjGen();
  2224 + if (this->object_to_object_stream.count(og))
2193 2225 {
2194 2226 QTC::TC("qpdf", "QPDFWriter uncompressing root");
2195   - this->object_to_object_stream.erase(objid);
  2227 + this->object_to_object_stream.erase(og);
2196 2228 }
2197 2229 }
2198 2230  
2199 2231 // Generate reverse mapping from object stream to objects
2200   - for (std::map<int, int>::iterator iter =
  2232 + for (std::map<QPDFObjGen, int>::iterator iter =
2201 2233 this->object_to_object_stream.begin();
2202 2234 iter != this->object_to_object_stream.end(); ++iter)
2203 2235 {
2204   - int obj = (*iter).first;
  2236 + QPDFObjGen obj = (*iter).first;
2205 2237 int stream = (*iter).second;
2206 2238 this->object_stream_to_objects[stream].insert(obj);
2207 2239 this->max_ostream_index =
... ... @@ -2303,7 +2335,8 @@ QPDFWriter::writeHintStream(int hint_id)
2303 2335 int S = 0;
2304 2336 int O = 0;
2305 2337 pdf.generateHintStream(
2306   - this->xref, this->lengths, this->obj_renumber, hint_buffer, S, O);
  2338 + this->xref, this->lengths, this->obj_renumber_no_gen,
  2339 + hint_buffer, S, O);
2307 2340  
2308 2341 openObject(hint_id);
2309 2342 setDataKey(hint_id);
... ... @@ -2522,19 +2555,57 @@ QPDFWriter::calculateXrefStreamPadding(int xref_bytes)
2522 2555 }
2523 2556  
2524 2557 void
  2558 +QPDFWriter::discardGeneration(std::map<QPDFObjGen, int> const& in,
  2559 + std::map<int, int>& out)
  2560 +{
  2561 + // There are deep assumptions in the linearization code in QPDF
  2562 + // that there is only one object with each object number; i.e.,
  2563 + // you can't have two objects with the same object number and
  2564 + // different generations. This is a pretty safe assumption
  2565 + // because Adobe Reader and Acrobat can't actually handle this
  2566 + // case. There is not much if any code in QPDF outside
  2567 + // linearization that assumes this, but the linearization code as
  2568 + // currently implemented would do weird things if we found such a
  2569 + // case. In order to avoid breaking ABI changes in QPDF, we will
  2570 + // first assert that this condition holds. Then we can create new
  2571 + // maps for QPDF that throw away generation numbers.
  2572 +
  2573 + out.clear();
  2574 + for (std::map<QPDFObjGen, int>::const_iterator iter = in.begin();
  2575 + iter != in.end(); ++iter)
  2576 + {
  2577 + if (out.count((*iter).first.getObj()))
  2578 + {
  2579 + throw std::logic_error(
  2580 + "QPDF cannot currently linearize files that contain"
  2581 + " multiple objects with the same object ID and different"
  2582 + " generations. If you see this error message, please file"
  2583 + " a bug report and attach the file if possible. As a"
  2584 + " workaround, first convert the file with qpdf without"
  2585 + " linearizing, and then linearize the result of that"
  2586 + " conversion.");
  2587 + }
  2588 + out[(*iter).first.getObj()] = (*iter).second;
  2589 + }
  2590 +}
  2591 +
  2592 +void
2525 2593 QPDFWriter::writeLinearized()
2526 2594 {
2527 2595 // Optimize file and enqueue objects in order
2528 2596  
  2597 + discardGeneration(this->object_to_object_stream,
  2598 + this->object_to_object_stream_no_gen);
  2599 +
2529 2600 bool need_xref_stream = (! this->object_to_object_stream.empty());
2530   - pdf.optimize(this->object_to_object_stream);
  2601 + pdf.optimize(this->object_to_object_stream_no_gen);
2531 2602  
2532 2603 std::vector<QPDFObjectHandle> part4;
2533 2604 std::vector<QPDFObjectHandle> part6;
2534 2605 std::vector<QPDFObjectHandle> part7;
2535 2606 std::vector<QPDFObjectHandle> part8;
2536 2607 std::vector<QPDFObjectHandle> part9;
2537   - pdf.getLinearizedParts(this->object_to_object_stream,
  2608 + pdf.getLinearizedParts(this->object_to_object_stream_no_gen,
2538 2609 part4, part6, part7, part8, part9);
2539 2610  
2540 2611 // Object number sequence:
... ... @@ -2570,7 +2641,7 @@ QPDFWriter::writeLinearized()
2570 2641 for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs2[i]).begin();
2571 2642 iter != (*vecs2[i]).end(); ++iter)
2572 2643 {
2573   - assignCompressedObjectNumbers((*iter).getObjectID());
  2644 + assignCompressedObjectNumbers((*iter).getObjGen());
2574 2645 }
2575 2646 }
2576 2647 int second_half_end = this->next_objid - 1;
... ... @@ -2602,7 +2673,7 @@ QPDFWriter::writeLinearized()
2602 2673 for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs1[i]).begin();
2603 2674 iter != (*vecs1[i]).end(); ++iter)
2604 2675 {
2605   - assignCompressedObjectNumbers((*iter).getObjectID());
  2676 + assignCompressedObjectNumbers((*iter).getObjGen());
2606 2677 }
2607 2678 }
2608 2679 int first_half_end = this->next_objid - 1;
... ... @@ -2660,7 +2731,7 @@ QPDFWriter::writeLinearized()
2660 2731 if (pass == 2)
2661 2732 {
2662 2733 std::vector<QPDFObjectHandle> const& pages = pdf.getAllPages();
2663   - int first_page_object = obj_renumber[pages[0].getObjectID()];
  2734 + int first_page_object = obj_renumber[pages[0].getObjGen()];
2664 2735 int npages = pages.size();
2665 2736  
2666 2737 writeString(" /Linearized 1 /L ");
... ... @@ -2834,6 +2905,8 @@ QPDFWriter::writeLinearized()
2834 2905 writeString(QUtil::int_to_string(first_xref_offset));
2835 2906 writeString("\n%%EOF\n");
2836 2907  
  2908 + discardGeneration(this->obj_renumber, this->obj_renumber_no_gen);
  2909 +
2837 2910 if (pass == 1)
2838 2911 {
2839 2912 // Close first pass pipeline
... ...
qpdf/qpdf.testcov
... ... @@ -262,3 +262,4 @@ qpdf-c called qpdf_set_r6_encryption_parameters 0
262 262 QPDFObjectHandle EOF in inline image 0
263 263 QPDFObjectHandle inline image token 0
264 264 QPDF not caching overridden objstm object 0
  265 +QPDFWriter original obj non-zero gen 0
... ...
qpdf/qtest/qpdf.test
... ... @@ -199,7 +199,7 @@ $td-&gt;runtest(&quot;remove page we don&#39;t have&quot;,
199 199 show_ntests();
200 200 # ----------
201 201 $td->notify("--- Miscellaneous Tests ---");
202   -$n_tests += 62;
  202 +$n_tests += 64;
203 203  
204 204 $td->runtest("qpdf version",
205 205 {$td->COMMAND => "qpdf --version"},
... ... @@ -501,6 +501,14 @@ $td-&gt;runtest(&quot;overridden compressed objects&quot;,
501 501 $td->EXIT_STATUS => 0},
502 502 $td->NORMALIZE_NEWLINES);
503 503  
  504 +$td->runtest("generate object streams for gen > 0",
  505 + {$td->COMMAND => "qpdf --qdf --static-id" .
  506 + " --object-streams=generate gen1.pdf a.pdf"},
  507 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  508 +$td->runtest("check file",
  509 + {$td->FILE => "a.pdf"},
  510 + {$td->FILE => "gen1.qdf"});
  511 +
504 512 show_ntests();
505 513 # ----------
506 514 $td->notify("--- Numeric range parsing tests ---");
... ... @@ -1183,6 +1191,7 @@ my @to_linearize =
1183 1191 'lin-delete-and-reuse', # linearized, then delete and reuse
1184 1192 'object-stream', # contains object streams
1185 1193 'hybrid-xref', # contains both xref tables and streams
  1194 + 'gen1', # has objects with generation > 0
1186 1195 @linearized_files, # we should be able to relinearize
1187 1196 );
1188 1197  
... ...
qpdf/qtest/qpdf/gen1.pdf 0 → 100644
  1 +%PDF-1.3
  2 +1 1 obj
  3 +<<
  4 + /Type /Catalog
  5 + /Pages 2 1 R
  6 +>>
  7 +endobj
  8 +
  9 +2 1 obj
  10 +<<
  11 + /Type /Pages
  12 + /Kids [
  13 + 3 1 R
  14 + ]
  15 + /Count 1
  16 +>>
  17 +endobj
  18 +
  19 +3 1 obj
  20 +<<
  21 + /Type /Page
  22 + /Parent 2 1 R
  23 + /MediaBox [0 0 612 792]
  24 + /Contents 4 1 R
  25 + /Resources <<
  26 + /ProcSet 5 1 R
  27 + /Font <<
  28 + /F1 6 1 R
  29 + >>
  30 + >>
  31 +>>
  32 +endobj
  33 +
  34 +4 1 obj
  35 +<<
  36 + /Length 44
  37 +>>
  38 +stream
  39 +BT
  40 + /F1 24 Tf
  41 + 72 720 Td
  42 + (Potato) Tj
  43 +ET
  44 +endstream
  45 +endobj
  46 +
  47 +5 1 obj
  48 +[
  49 + /PDF
  50 + /Text
  51 +]
  52 +endobj
  53 +
  54 +6 1 obj
  55 +<<
  56 + /Type /Font
  57 + /Subtype /Type1
  58 + /Name /F1
  59 + /BaseFont /Helvetica
  60 + /Encoding /WinAnsiEncoding
  61 +>>
  62 +endobj
  63 +
  64 +xref
  65 +0 7
  66 +0000000000 65535 f
  67 +0000000009 00001 n
  68 +0000000063 00001 n
  69 +0000000135 00001 n
  70 +0000000307 00001 n
  71 +0000000403 00001 n
  72 +0000000438 00001 n
  73 +trailer <<
  74 + /Size 7
  75 + /Root 1 1 R
  76 +>>
  77 +startxref
  78 +556
  79 +%%EOF
... ...
qpdf/qtest/qpdf/gen1.qdf 0 → 100644
No preview for this file type