Commit 05a6d9669d1ac20d3a86eb958506e247a5d91aa9

Authored by Jay Berkenbilt
1 parent 3416c60f

redo padding calculation for first half xref stream; old calculation

failed to consider the effect of compressing the strema


git-svn-id: svn+q:///qpdf/trunk@932 71b93d88-0707-0410-a8cf-f5a4172ac649
Showing 1 changed file with 35 additions and 18 deletions
libqpdf/QPDFWriter.cc
... ... @@ -1734,14 +1734,15 @@ QPDFWriter::writeXRefStream(int objid, int max_id, int max_offset,
1734 1734 trailer_e which, int first, int last, int size)
1735 1735 {
1736 1736 return writeXRefStream(objid, max_id, max_offset,
1737   - which, first, last, size, 0, 0, 0, 0);
  1737 + which, first, last, size, 0, 0, 0, 0, false);
1738 1738 }
1739 1739  
1740 1740 int
1741 1741 QPDFWriter::writeXRefStream(int xref_id, int max_id, int max_offset,
1742 1742 trailer_e which, int first, int last, int size,
1743 1743 int prev, int hint_id,
1744   - int hint_offset, int hint_length)
  1744 + int hint_offset, int hint_length,
  1745 + bool skip_compression)
1745 1746 {
1746 1747 int xref_offset = this->pipeline->getCount();
1747 1748 int space_before_zero = xref_offset - 1;
... ... @@ -1764,8 +1765,14 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, int max_offset,
1764 1765 if (! ((this->stream_data_mode == qpdf_s_uncompress) || this->qdf_mode))
1765 1766 {
1766 1767 compressed = true;
1767   - p = pushPipeline(
1768   - new Pl_Flate("compress xref", p, Pl_Flate::a_deflate));
  1768 + if (! skip_compression)
  1769 + {
  1770 + // Write the stream dictionary for compression but don't
  1771 + // actually compress. This helps us with computation of
  1772 + // padding for pass 1 of linearization.
  1773 + p = pushPipeline(
  1774 + new Pl_Flate("compress xref", p, Pl_Flate::a_deflate));
  1775 + }
1769 1776 p = pushPipeline(
1770 1777 new Pl_PNGFilter(
1771 1778 "pngify xref", p, Pl_PNGFilter::a_encode, esize, 0));
... ... @@ -2024,12 +2031,15 @@ QPDFWriter::writeLinearized()
2024 2031 // Must pad here too.
2025 2032 if (pass == 1)
2026 2033 {
2027   - // first_half_max_obj_offset is very likely to fall
2028   - // within the first 64K of the document (thus
2029   - // requiring two bytes for offsets) since it is the
2030   - // offset of the last uncompressed object in page 1.
2031   - // We allow for it to do otherwise though.
2032   - first_half_max_obj_offset = 65535;
  2034 + // Set first_half_max_obj_offset to a value large
  2035 + // enough to force four bytes to be reserved for each
  2036 + // file offset. This would provide adequate space for
  2037 + // the xref stream as long as the last object in page
  2038 + // 1 starts with in the first 4 GB of the file, which
  2039 + // is extremely likely. In the second pass, we will
  2040 + // know the actual value for this, but it's okay if
  2041 + // it's smaller.
  2042 + first_half_max_obj_offset = 1 << 25;
2033 2043 }
2034 2044 pos = this->pipeline->getCount();
2035 2045 writeXRefStream(first_half_xref, first_half_end,
... ... @@ -2037,19 +2047,24 @@ QPDFWriter::writeLinearized()
2037 2047 t_lin_first, first_half_start, first_half_end,
2038 2048 first_trailer_size,
2039 2049 hint_length + second_xref_offset,
2040   - hint_id, hint_offset, hint_length);
  2050 + hint_id, hint_offset, hint_length,
  2051 + (pass == 1));
2041 2052 int endpos = this->pipeline->getCount();
2042 2053 if (pass == 1)
2043 2054 {
2044 2055 // Pad so we have enough room for the real xref
2045   - // stream. In an extremely unlikely worst case,
2046   - // first_half_max_obj_offset could be enough larger to
2047   - // require two extra bytes beyond what we calculated
2048   - // in pass 1. This means we need to save two extra
2049   - // bytes for each xref entry. To that, we'll add 10
2050   - // extra bytes for number length increases.
  2056 + // stream. We've written the stream without
  2057 + // compression (but with all the stream dictionary
  2058 + // parameters to enable it) and assuming a very
  2059 + // generous allowance for writing file offsets. We
  2060 + // need a little extra padding to allow for zlib's
  2061 + // output to be larger than its input (6 bytes plus 5
  2062 + // bytes per 16K), and then we'll add 10 extra bytes
  2063 + // for number length increases.
  2064 +
  2065 + unsigned int xref_bytes = endpos - pos;
2051 2066 int possible_extra =
2052   - 10 + (2 * (first_half_end - first_half_start + 1));
  2067 + 16 + (5 * ((xref_bytes + 16383) / 16384));
2053 2068 for (int i = 0; i < possible_extra; ++i)
2054 2069 {
2055 2070 writeString(" ");
... ... @@ -2064,6 +2079,8 @@ QPDFWriter::writeLinearized()
2064 2079 {
2065 2080 writeString(" ");
2066 2081 }
  2082 + // A failure of this insertion means we didn't allow
  2083 + // enough padding for the first pass xref stream.
2067 2084 assert(this->pipeline->getCount() == first_xref_end);
2068 2085 }
2069 2086 writeString("\n");
... ...