Commit 05a6d9669d1ac20d3a86eb958506e247a5d91aa9
1 parent
3416c60f
redo padding calculation for first half xref stream; old calculation
failed to consider the effect of compressing the strema git-svn-id: svn+q:///qpdf/trunk@932 71b93d88-0707-0410-a8cf-f5a4172ac649
Showing
1 changed file
with
35 additions
and
18 deletions
libqpdf/QPDFWriter.cc
| ... | ... | @@ -1734,14 +1734,15 @@ QPDFWriter::writeXRefStream(int objid, int max_id, int max_offset, |
| 1734 | 1734 | trailer_e which, int first, int last, int size) |
| 1735 | 1735 | { |
| 1736 | 1736 | return writeXRefStream(objid, max_id, max_offset, |
| 1737 | - which, first, last, size, 0, 0, 0, 0); | |
| 1737 | + which, first, last, size, 0, 0, 0, 0, false); | |
| 1738 | 1738 | } |
| 1739 | 1739 | |
| 1740 | 1740 | int |
| 1741 | 1741 | QPDFWriter::writeXRefStream(int xref_id, int max_id, int max_offset, |
| 1742 | 1742 | trailer_e which, int first, int last, int size, |
| 1743 | 1743 | int prev, int hint_id, |
| 1744 | - int hint_offset, int hint_length) | |
| 1744 | + int hint_offset, int hint_length, | |
| 1745 | + bool skip_compression) | |
| 1745 | 1746 | { |
| 1746 | 1747 | int xref_offset = this->pipeline->getCount(); |
| 1747 | 1748 | int space_before_zero = xref_offset - 1; |
| ... | ... | @@ -1764,8 +1765,14 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, int max_offset, |
| 1764 | 1765 | if (! ((this->stream_data_mode == qpdf_s_uncompress) || this->qdf_mode)) |
| 1765 | 1766 | { |
| 1766 | 1767 | compressed = true; |
| 1767 | - p = pushPipeline( | |
| 1768 | - new Pl_Flate("compress xref", p, Pl_Flate::a_deflate)); | |
| 1768 | + if (! skip_compression) | |
| 1769 | + { | |
| 1770 | + // Write the stream dictionary for compression but don't | |
| 1771 | + // actually compress. This helps us with computation of | |
| 1772 | + // padding for pass 1 of linearization. | |
| 1773 | + p = pushPipeline( | |
| 1774 | + new Pl_Flate("compress xref", p, Pl_Flate::a_deflate)); | |
| 1775 | + } | |
| 1769 | 1776 | p = pushPipeline( |
| 1770 | 1777 | new Pl_PNGFilter( |
| 1771 | 1778 | "pngify xref", p, Pl_PNGFilter::a_encode, esize, 0)); |
| ... | ... | @@ -2024,12 +2031,15 @@ QPDFWriter::writeLinearized() |
| 2024 | 2031 | // Must pad here too. |
| 2025 | 2032 | if (pass == 1) |
| 2026 | 2033 | { |
| 2027 | - // first_half_max_obj_offset is very likely to fall | |
| 2028 | - // within the first 64K of the document (thus | |
| 2029 | - // requiring two bytes for offsets) since it is the | |
| 2030 | - // offset of the last uncompressed object in page 1. | |
| 2031 | - // We allow for it to do otherwise though. | |
| 2032 | - first_half_max_obj_offset = 65535; | |
| 2034 | + // Set first_half_max_obj_offset to a value large | |
| 2035 | + // enough to force four bytes to be reserved for each | |
| 2036 | + // file offset. This would provide adequate space for | |
| 2037 | + // the xref stream as long as the last object in page | |
| 2038 | + // 1 starts with in the first 4 GB of the file, which | |
| 2039 | + // is extremely likely. In the second pass, we will | |
| 2040 | + // know the actual value for this, but it's okay if | |
| 2041 | + // it's smaller. | |
| 2042 | + first_half_max_obj_offset = 1 << 25; | |
| 2033 | 2043 | } |
| 2034 | 2044 | pos = this->pipeline->getCount(); |
| 2035 | 2045 | writeXRefStream(first_half_xref, first_half_end, |
| ... | ... | @@ -2037,19 +2047,24 @@ QPDFWriter::writeLinearized() |
| 2037 | 2047 | t_lin_first, first_half_start, first_half_end, |
| 2038 | 2048 | first_trailer_size, |
| 2039 | 2049 | hint_length + second_xref_offset, |
| 2040 | - hint_id, hint_offset, hint_length); | |
| 2050 | + hint_id, hint_offset, hint_length, | |
| 2051 | + (pass == 1)); | |
| 2041 | 2052 | int endpos = this->pipeline->getCount(); |
| 2042 | 2053 | if (pass == 1) |
| 2043 | 2054 | { |
| 2044 | 2055 | // Pad so we have enough room for the real xref |
| 2045 | - // stream. In an extremely unlikely worst case, | |
| 2046 | - // first_half_max_obj_offset could be enough larger to | |
| 2047 | - // require two extra bytes beyond what we calculated | |
| 2048 | - // in pass 1. This means we need to save two extra | |
| 2049 | - // bytes for each xref entry. To that, we'll add 10 | |
| 2050 | - // extra bytes for number length increases. | |
| 2056 | + // stream. We've written the stream without | |
| 2057 | + // compression (but with all the stream dictionary | |
| 2058 | + // parameters to enable it) and assuming a very | |
| 2059 | + // generous allowance for writing file offsets. We | |
| 2060 | + // need a little extra padding to allow for zlib's | |
| 2061 | + // output to be larger than its input (6 bytes plus 5 | |
| 2062 | + // bytes per 16K), and then we'll add 10 extra bytes | |
| 2063 | + // for number length increases. | |
| 2064 | + | |
| 2065 | + unsigned int xref_bytes = endpos - pos; | |
| 2051 | 2066 | int possible_extra = |
| 2052 | - 10 + (2 * (first_half_end - first_half_start + 1)); | |
| 2067 | + 16 + (5 * ((xref_bytes + 16383) / 16384)); | |
| 2053 | 2068 | for (int i = 0; i < possible_extra; ++i) |
| 2054 | 2069 | { |
| 2055 | 2070 | writeString(" "); |
| ... | ... | @@ -2064,6 +2079,8 @@ QPDFWriter::writeLinearized() |
| 2064 | 2079 | { |
| 2065 | 2080 | writeString(" "); |
| 2066 | 2081 | } |
| 2082 | + // A failure of this insertion means we didn't allow | |
| 2083 | + // enough padding for the first pass xref stream. | |
| 2067 | 2084 | assert(this->pipeline->getCount() == first_xref_end); |
| 2068 | 2085 | } |
| 2069 | 2086 | writeString("\n"); | ... | ... |