Commit 09027344b9a6265a597da7c3c92e0fdd3ccb71fb
1 parent
4cbe2abc
Refactor: separate code that determines whether to filter a stream
Showing
2 changed files
with
104 additions
and
89 deletions
include/qpdf/QPDFWriter.hh
| @@ -529,6 +529,9 @@ class QPDFWriter | @@ -529,6 +529,9 @@ class QPDFWriter | ||
| 529 | void writeTrailer(trailer_e which, int size, | 529 | void writeTrailer(trailer_e which, int size, |
| 530 | bool xref_stream, qpdf_offset_t prev, | 530 | bool xref_stream, qpdf_offset_t prev, |
| 531 | int linearization_pass); | 531 | int linearization_pass); |
| 532 | + bool willFilterStream(QPDFObjectHandle stream, | ||
| 533 | + bool& compress_stream, bool& is_metadata, | ||
| 534 | + PointerHolder<Buffer>* stream_data); | ||
| 532 | void unparseObject(QPDFObjectHandle object, int level, int flags, | 535 | void unparseObject(QPDFObjectHandle object, int level, int flags, |
| 533 | // for stream dictionaries | 536 | // for stream dictionaries |
| 534 | size_t stream_length = 0, bool compress = false); | 537 | size_t stream_length = 0, bool compress = false); |
libqpdf/QPDFWriter.cc
| @@ -1463,6 +1463,96 @@ QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream, | @@ -1463,6 +1463,96 @@ QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream, | ||
| 1463 | writeString(">>"); | 1463 | writeString(">>"); |
| 1464 | } | 1464 | } |
| 1465 | 1465 | ||
| 1466 | +bool | ||
| 1467 | +QPDFWriter::willFilterStream(QPDFObjectHandle stream, | ||
| 1468 | + bool& compress_stream, bool& is_metadata, | ||
| 1469 | + PointerHolder<Buffer>* stream_data) | ||
| 1470 | +{ | ||
| 1471 | + compress_stream = false; | ||
| 1472 | + is_metadata = false; | ||
| 1473 | + QPDFObjGen old_og = stream.getObjGen(); | ||
| 1474 | + QPDFObjectHandle stream_dict = stream.getDict(); | ||
| 1475 | + | ||
| 1476 | + if (stream_dict.getKey("/Type").isName() && | ||
| 1477 | + (stream_dict.getKey("/Type").getName() == "/Metadata")) | ||
| 1478 | + { | ||
| 1479 | + is_metadata = true; | ||
| 1480 | + } | ||
| 1481 | + bool filter = (stream.isDataModified() || | ||
| 1482 | + this->m->compress_streams || | ||
| 1483 | + this->m->stream_decode_level); | ||
| 1484 | + if (this->m->compress_streams) | ||
| 1485 | + { | ||
| 1486 | + // Don't filter if the stream is already compressed with | ||
| 1487 | + // FlateDecode. This way we don't make it worse if the | ||
| 1488 | + // original file used a better Flate algorithm, and we | ||
| 1489 | + // don't spend time and CPU cycles uncompressing and | ||
| 1490 | + // recompressing stuff. This can be overridden with | ||
| 1491 | + // setRecompressFlate(true). | ||
| 1492 | + QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter"); | ||
| 1493 | + if ((! this->m->recompress_flate) && | ||
| 1494 | + (! stream.isDataModified()) && | ||
| 1495 | + filter_obj.isName() && | ||
| 1496 | + ((filter_obj.getName() == "/FlateDecode") || | ||
| 1497 | + (filter_obj.getName() == "/Fl"))) | ||
| 1498 | + { | ||
| 1499 | + QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode"); | ||
| 1500 | + filter = false; | ||
| 1501 | + } | ||
| 1502 | + } | ||
| 1503 | + bool normalize = false; | ||
| 1504 | + bool uncompress = false; | ||
| 1505 | + if (is_metadata && | ||
| 1506 | + ((! this->m->encrypted) || (this->m->encrypt_metadata == false))) | ||
| 1507 | + { | ||
| 1508 | + QTC::TC("qpdf", "QPDFWriter not compressing metadata"); | ||
| 1509 | + filter = true; | ||
| 1510 | + compress_stream = false; | ||
| 1511 | + uncompress = true; | ||
| 1512 | + } | ||
| 1513 | + else if (this->m->normalize_content && | ||
| 1514 | + this->m->normalized_streams.count(old_og)) | ||
| 1515 | + { | ||
| 1516 | + normalize = true; | ||
| 1517 | + filter = true; | ||
| 1518 | + } | ||
| 1519 | + else if (filter && this->m->compress_streams) | ||
| 1520 | + { | ||
| 1521 | + compress_stream = true; | ||
| 1522 | + QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream"); | ||
| 1523 | + } | ||
| 1524 | + | ||
| 1525 | + bool filtered = false; | ||
| 1526 | + for (int attempt = 1; attempt <= 2; ++attempt) | ||
| 1527 | + { | ||
| 1528 | + pushPipeline(new Pl_Buffer("stream data")); | ||
| 1529 | + PipelinePopper pp_stream_data(this, stream_data); | ||
| 1530 | + activatePipelineStack(pp_stream_data); | ||
| 1531 | + filtered = | ||
| 1532 | + stream.pipeStreamData( | ||
| 1533 | + this->m->pipeline, | ||
| 1534 | + (((filter && normalize) ? qpdf_ef_normalize : 0) | | ||
| 1535 | + ((filter && compress_stream) ? qpdf_ef_compress : 0)), | ||
| 1536 | + (filter | ||
| 1537 | + ? (uncompress ? qpdf_dl_all : this->m->stream_decode_level) | ||
| 1538 | + : qpdf_dl_none), false, (attempt == 1)); | ||
| 1539 | + if (filter && (! filtered)) | ||
| 1540 | + { | ||
| 1541 | + // Try again | ||
| 1542 | + filter = false; | ||
| 1543 | + } | ||
| 1544 | + else | ||
| 1545 | + { | ||
| 1546 | + break; | ||
| 1547 | + } | ||
| 1548 | + } | ||
| 1549 | + if (! filtered) | ||
| 1550 | + { | ||
| 1551 | + compress_stream = false; | ||
| 1552 | + } | ||
| 1553 | + return filtered; | ||
| 1554 | +} | ||
| 1555 | + | ||
| 1466 | void | 1556 | void |
| 1467 | QPDFWriter::unparseObject(QPDFObjectHandle object, int level, | 1557 | QPDFWriter::unparseObject(QPDFObjectHandle object, int level, |
| 1468 | int flags, size_t stream_length, | 1558 | int flags, size_t stream_length, |
| @@ -1502,13 +1592,12 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, | @@ -1502,13 +1592,12 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, | ||
| 1502 | else if (object.isDictionary()) | 1592 | else if (object.isDictionary()) |
| 1503 | { | 1593 | { |
| 1504 | // Make a shallow copy of this object so we can modify it | 1594 | // Make a shallow copy of this object so we can modify it |
| 1505 | - // safely without affecting the original. This code makes | ||
| 1506 | - // assumptions about things that are made true in | ||
| 1507 | - // prepareFileForWrite, such as that certain things are direct | ||
| 1508 | - // objects so that replacing them doesn't leave unreferenced | ||
| 1509 | - // objects in the output. We can use unsafeShallowCopy here | ||
| 1510 | - // because we are all we are doing is removing or replacing | ||
| 1511 | - // top-level keys. | 1595 | + // safely without affecting the original. This code has logic |
| 1596 | + // to skip certain keys in agreement with prepareFileForWrite | ||
| 1597 | + // and with skip_stream_parameters so that replacing them | ||
| 1598 | + // doesn't leave unreferenced objects in the output. We can | ||
| 1599 | + // use unsafeShallowCopy here because we are all we are doing | ||
| 1600 | + // is removing or replacing top-level keys. | ||
| 1512 | object = object.unsafeShallowCopy(); | 1601 | object = object.unsafeShallowCopy(); |
| 1513 | 1602 | ||
| 1514 | // Handle special cases for specific dictionaries. | 1603 | // Handle special cases for specific dictionaries. |
| @@ -1760,94 +1849,17 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, | @@ -1760,94 +1849,17 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, | ||
| 1760 | { | 1849 | { |
| 1761 | this->m->cur_stream_length_id = new_id + 1; | 1850 | this->m->cur_stream_length_id = new_id + 1; |
| 1762 | } | 1851 | } |
| 1763 | - QPDFObjectHandle stream_dict = object.getDict(); | ||
| 1764 | - | ||
| 1765 | - bool is_metadata = false; | ||
| 1766 | - if (stream_dict.getKey("/Type").isName() && | ||
| 1767 | - (stream_dict.getKey("/Type").getName() == "/Metadata")) | ||
| 1768 | - { | ||
| 1769 | - is_metadata = true; | ||
| 1770 | - } | ||
| 1771 | - bool filter = (object.isDataModified() || | ||
| 1772 | - this->m->compress_streams || | ||
| 1773 | - this->m->stream_decode_level); | ||
| 1774 | - if (this->m->compress_streams) | ||
| 1775 | - { | ||
| 1776 | - // Don't filter if the stream is already compressed with | ||
| 1777 | - // FlateDecode. This way we don't make it worse if the | ||
| 1778 | - // original file used a better Flate algorithm, and we | ||
| 1779 | - // don't spend time and CPU cycles uncompressing and | ||
| 1780 | - // recompressing stuff. This can be overridden with | ||
| 1781 | - // setRecompressFlate(true). | ||
| 1782 | - QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter"); | ||
| 1783 | - if ((! this->m->recompress_flate) && | ||
| 1784 | - (! object.isDataModified()) && | ||
| 1785 | - filter_obj.isName() && | ||
| 1786 | - ((filter_obj.getName() == "/FlateDecode") || | ||
| 1787 | - (filter_obj.getName() == "/Fl"))) | ||
| 1788 | - { | ||
| 1789 | - QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode"); | ||
| 1790 | - filter = false; | ||
| 1791 | - } | ||
| 1792 | - } | ||
| 1793 | - bool normalize = false; | ||
| 1794 | - bool compress_stream = false; | ||
| 1795 | - bool uncompress = false; | ||
| 1796 | - if (is_metadata && | ||
| 1797 | - ((! this->m->encrypted) || (this->m->encrypt_metadata == false))) | ||
| 1798 | - { | ||
| 1799 | - QTC::TC("qpdf", "QPDFWriter not compressing metadata"); | ||
| 1800 | - filter = true; | ||
| 1801 | - compress_stream = false; | ||
| 1802 | - uncompress = true; | ||
| 1803 | - } | ||
| 1804 | - else if (this->m->normalize_content && | ||
| 1805 | - this->m->normalized_streams.count(old_og)) | ||
| 1806 | - { | ||
| 1807 | - normalize = true; | ||
| 1808 | - filter = true; | ||
| 1809 | - } | ||
| 1810 | - else if (filter && this->m->compress_streams) | ||
| 1811 | - { | ||
| 1812 | - compress_stream = true; | ||
| 1813 | - QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream"); | ||
| 1814 | - } | ||
| 1815 | 1852 | ||
| 1816 | flags |= f_stream; | 1853 | flags |= f_stream; |
| 1817 | - | 1854 | + bool compress_stream = false; |
| 1855 | + bool is_metadata = false; | ||
| 1818 | PointerHolder<Buffer> stream_data; | 1856 | PointerHolder<Buffer> stream_data; |
| 1819 | - bool filtered = false; | ||
| 1820 | - for (int attempt = 1; attempt <= 2; ++attempt) | ||
| 1821 | - { | ||
| 1822 | - pushPipeline(new Pl_Buffer("stream data")); | ||
| 1823 | - PipelinePopper pp_stream_data(this, &stream_data); | ||
| 1824 | - activatePipelineStack(pp_stream_data); | ||
| 1825 | - filtered = | ||
| 1826 | - object.pipeStreamData( | ||
| 1827 | - this->m->pipeline, | ||
| 1828 | - (((filter && normalize) ? qpdf_ef_normalize : 0) | | ||
| 1829 | - ((filter && compress_stream) ? qpdf_ef_compress : 0)), | ||
| 1830 | - (filter | ||
| 1831 | - ? (uncompress ? qpdf_dl_all : this->m->stream_decode_level) | ||
| 1832 | - : qpdf_dl_none), false, (attempt == 1)); | ||
| 1833 | - if (filter && (! filtered)) | ||
| 1834 | - { | ||
| 1835 | - // Try again | ||
| 1836 | - filter = false; | ||
| 1837 | - } | ||
| 1838 | - else | ||
| 1839 | - { | ||
| 1840 | - break; | ||
| 1841 | - } | ||
| 1842 | - } | ||
| 1843 | - if (filtered) | 1857 | + if (willFilterStream(object, compress_stream, |
| 1858 | + is_metadata, &stream_data)) | ||
| 1844 | { | 1859 | { |
| 1845 | flags |= f_filtered; | 1860 | flags |= f_filtered; |
| 1846 | } | 1861 | } |
| 1847 | - else | ||
| 1848 | - { | ||
| 1849 | - compress_stream = false; | ||
| 1850 | - } | 1862 | + QPDFObjectHandle stream_dict = object.getDict(); |
| 1851 | 1863 | ||
| 1852 | this->m->cur_stream_length = stream_data->getSize(); | 1864 | this->m->cur_stream_length = stream_data->getSize(); |
| 1853 | if (is_metadata && this->m->encrypted && (! this->m->encrypt_metadata)) | 1865 | if (is_metadata && this->m->encrypted && (! this->m->encrypt_metadata)) |