Commit 09027344b9a6265a597da7c3c92e0fdd3ccb71fb
1 parent
4cbe2abc
Refactor: separate code that determines whether to filter a stream
Showing
2 changed files
with
104 additions
and
89 deletions
include/qpdf/QPDFWriter.hh
| ... | ... | @@ -529,6 +529,9 @@ class QPDFWriter |
| 529 | 529 | void writeTrailer(trailer_e which, int size, |
| 530 | 530 | bool xref_stream, qpdf_offset_t prev, |
| 531 | 531 | int linearization_pass); |
| 532 | + bool willFilterStream(QPDFObjectHandle stream, | |
| 533 | + bool& compress_stream, bool& is_metadata, | |
| 534 | + PointerHolder<Buffer>* stream_data); | |
| 532 | 535 | void unparseObject(QPDFObjectHandle object, int level, int flags, |
| 533 | 536 | // for stream dictionaries |
| 534 | 537 | size_t stream_length = 0, bool compress = false); | ... | ... |
libqpdf/QPDFWriter.cc
| ... | ... | @@ -1463,6 +1463,96 @@ QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream, |
| 1463 | 1463 | writeString(">>"); |
| 1464 | 1464 | } |
| 1465 | 1465 | |
| 1466 | +bool | |
| 1467 | +QPDFWriter::willFilterStream(QPDFObjectHandle stream, | |
| 1468 | + bool& compress_stream, bool& is_metadata, | |
| 1469 | + PointerHolder<Buffer>* stream_data) | |
| 1470 | +{ | |
| 1471 | + compress_stream = false; | |
| 1472 | + is_metadata = false; | |
| 1473 | + QPDFObjGen old_og = stream.getObjGen(); | |
| 1474 | + QPDFObjectHandle stream_dict = stream.getDict(); | |
| 1475 | + | |
| 1476 | + if (stream_dict.getKey("/Type").isName() && | |
| 1477 | + (stream_dict.getKey("/Type").getName() == "/Metadata")) | |
| 1478 | + { | |
| 1479 | + is_metadata = true; | |
| 1480 | + } | |
| 1481 | + bool filter = (stream.isDataModified() || | |
| 1482 | + this->m->compress_streams || | |
| 1483 | + this->m->stream_decode_level); | |
| 1484 | + if (this->m->compress_streams) | |
| 1485 | + { | |
| 1486 | + // Don't filter if the stream is already compressed with | |
| 1487 | + // FlateDecode. This way we don't make it worse if the | |
| 1488 | + // original file used a better Flate algorithm, and we | |
| 1489 | + // don't spend time and CPU cycles uncompressing and | |
| 1490 | + // recompressing stuff. This can be overridden with | |
| 1491 | + // setRecompressFlate(true). | |
| 1492 | + QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter"); | |
| 1493 | + if ((! this->m->recompress_flate) && | |
| 1494 | + (! stream.isDataModified()) && | |
| 1495 | + filter_obj.isName() && | |
| 1496 | + ((filter_obj.getName() == "/FlateDecode") || | |
| 1497 | + (filter_obj.getName() == "/Fl"))) | |
| 1498 | + { | |
| 1499 | + QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode"); | |
| 1500 | + filter = false; | |
| 1501 | + } | |
| 1502 | + } | |
| 1503 | + bool normalize = false; | |
| 1504 | + bool uncompress = false; | |
| 1505 | + if (is_metadata && | |
| 1506 | + ((! this->m->encrypted) || (this->m->encrypt_metadata == false))) | |
| 1507 | + { | |
| 1508 | + QTC::TC("qpdf", "QPDFWriter not compressing metadata"); | |
| 1509 | + filter = true; | |
| 1510 | + compress_stream = false; | |
| 1511 | + uncompress = true; | |
| 1512 | + } | |
| 1513 | + else if (this->m->normalize_content && | |
| 1514 | + this->m->normalized_streams.count(old_og)) | |
| 1515 | + { | |
| 1516 | + normalize = true; | |
| 1517 | + filter = true; | |
| 1518 | + } | |
| 1519 | + else if (filter && this->m->compress_streams) | |
| 1520 | + { | |
| 1521 | + compress_stream = true; | |
| 1522 | + QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream"); | |
| 1523 | + } | |
| 1524 | + | |
| 1525 | + bool filtered = false; | |
| 1526 | + for (int attempt = 1; attempt <= 2; ++attempt) | |
| 1527 | + { | |
| 1528 | + pushPipeline(new Pl_Buffer("stream data")); | |
| 1529 | + PipelinePopper pp_stream_data(this, stream_data); | |
| 1530 | + activatePipelineStack(pp_stream_data); | |
| 1531 | + filtered = | |
| 1532 | + stream.pipeStreamData( | |
| 1533 | + this->m->pipeline, | |
| 1534 | + (((filter && normalize) ? qpdf_ef_normalize : 0) | | |
| 1535 | + ((filter && compress_stream) ? qpdf_ef_compress : 0)), | |
| 1536 | + (filter | |
| 1537 | + ? (uncompress ? qpdf_dl_all : this->m->stream_decode_level) | |
| 1538 | + : qpdf_dl_none), false, (attempt == 1)); | |
| 1539 | + if (filter && (! filtered)) | |
| 1540 | + { | |
| 1541 | + // Try again | |
| 1542 | + filter = false; | |
| 1543 | + } | |
| 1544 | + else | |
| 1545 | + { | |
| 1546 | + break; | |
| 1547 | + } | |
| 1548 | + } | |
| 1549 | + if (! filtered) | |
| 1550 | + { | |
| 1551 | + compress_stream = false; | |
| 1552 | + } | |
| 1553 | + return filtered; | |
| 1554 | +} | |
| 1555 | + | |
| 1466 | 1556 | void |
| 1467 | 1557 | QPDFWriter::unparseObject(QPDFObjectHandle object, int level, |
| 1468 | 1558 | int flags, size_t stream_length, |
| ... | ... | @@ -1502,13 +1592,12 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, |
| 1502 | 1592 | else if (object.isDictionary()) |
| 1503 | 1593 | { |
| 1504 | 1594 | // Make a shallow copy of this object so we can modify it |
| 1505 | - // safely without affecting the original. This code makes | |
| 1506 | - // assumptions about things that are made true in | |
| 1507 | - // prepareFileForWrite, such as that certain things are direct | |
| 1508 | - // objects so that replacing them doesn't leave unreferenced | |
| 1509 | - // objects in the output. We can use unsafeShallowCopy here | |
| 1510 | - // because we are all we are doing is removing or replacing | |
| 1511 | - // top-level keys. | |
| 1595 | + // safely without affecting the original. This code has logic | |
| 1596 | + // to skip certain keys in agreement with prepareFileForWrite | |
| 1597 | + // and with skip_stream_parameters so that replacing them | |
| 1598 | + // doesn't leave unreferenced objects in the output. We can | |
| 1599 | + // use unsafeShallowCopy here because we are all we are doing | |
| 1600 | + // is removing or replacing top-level keys. | |
| 1512 | 1601 | object = object.unsafeShallowCopy(); |
| 1513 | 1602 | |
| 1514 | 1603 | // Handle special cases for specific dictionaries. |
| ... | ... | @@ -1760,94 +1849,17 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, |
| 1760 | 1849 | { |
| 1761 | 1850 | this->m->cur_stream_length_id = new_id + 1; |
| 1762 | 1851 | } |
| 1763 | - QPDFObjectHandle stream_dict = object.getDict(); | |
| 1764 | - | |
| 1765 | - bool is_metadata = false; | |
| 1766 | - if (stream_dict.getKey("/Type").isName() && | |
| 1767 | - (stream_dict.getKey("/Type").getName() == "/Metadata")) | |
| 1768 | - { | |
| 1769 | - is_metadata = true; | |
| 1770 | - } | |
| 1771 | - bool filter = (object.isDataModified() || | |
| 1772 | - this->m->compress_streams || | |
| 1773 | - this->m->stream_decode_level); | |
| 1774 | - if (this->m->compress_streams) | |
| 1775 | - { | |
| 1776 | - // Don't filter if the stream is already compressed with | |
| 1777 | - // FlateDecode. This way we don't make it worse if the | |
| 1778 | - // original file used a better Flate algorithm, and we | |
| 1779 | - // don't spend time and CPU cycles uncompressing and | |
| 1780 | - // recompressing stuff. This can be overridden with | |
| 1781 | - // setRecompressFlate(true). | |
| 1782 | - QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter"); | |
| 1783 | - if ((! this->m->recompress_flate) && | |
| 1784 | - (! object.isDataModified()) && | |
| 1785 | - filter_obj.isName() && | |
| 1786 | - ((filter_obj.getName() == "/FlateDecode") || | |
| 1787 | - (filter_obj.getName() == "/Fl"))) | |
| 1788 | - { | |
| 1789 | - QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode"); | |
| 1790 | - filter = false; | |
| 1791 | - } | |
| 1792 | - } | |
| 1793 | - bool normalize = false; | |
| 1794 | - bool compress_stream = false; | |
| 1795 | - bool uncompress = false; | |
| 1796 | - if (is_metadata && | |
| 1797 | - ((! this->m->encrypted) || (this->m->encrypt_metadata == false))) | |
| 1798 | - { | |
| 1799 | - QTC::TC("qpdf", "QPDFWriter not compressing metadata"); | |
| 1800 | - filter = true; | |
| 1801 | - compress_stream = false; | |
| 1802 | - uncompress = true; | |
| 1803 | - } | |
| 1804 | - else if (this->m->normalize_content && | |
| 1805 | - this->m->normalized_streams.count(old_og)) | |
| 1806 | - { | |
| 1807 | - normalize = true; | |
| 1808 | - filter = true; | |
| 1809 | - } | |
| 1810 | - else if (filter && this->m->compress_streams) | |
| 1811 | - { | |
| 1812 | - compress_stream = true; | |
| 1813 | - QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream"); | |
| 1814 | - } | |
| 1815 | 1852 | |
| 1816 | 1853 | flags |= f_stream; |
| 1817 | - | |
| 1854 | + bool compress_stream = false; | |
| 1855 | + bool is_metadata = false; | |
| 1818 | 1856 | PointerHolder<Buffer> stream_data; |
| 1819 | - bool filtered = false; | |
| 1820 | - for (int attempt = 1; attempt <= 2; ++attempt) | |
| 1821 | - { | |
| 1822 | - pushPipeline(new Pl_Buffer("stream data")); | |
| 1823 | - PipelinePopper pp_stream_data(this, &stream_data); | |
| 1824 | - activatePipelineStack(pp_stream_data); | |
| 1825 | - filtered = | |
| 1826 | - object.pipeStreamData( | |
| 1827 | - this->m->pipeline, | |
| 1828 | - (((filter && normalize) ? qpdf_ef_normalize : 0) | | |
| 1829 | - ((filter && compress_stream) ? qpdf_ef_compress : 0)), | |
| 1830 | - (filter | |
| 1831 | - ? (uncompress ? qpdf_dl_all : this->m->stream_decode_level) | |
| 1832 | - : qpdf_dl_none), false, (attempt == 1)); | |
| 1833 | - if (filter && (! filtered)) | |
| 1834 | - { | |
| 1835 | - // Try again | |
| 1836 | - filter = false; | |
| 1837 | - } | |
| 1838 | - else | |
| 1839 | - { | |
| 1840 | - break; | |
| 1841 | - } | |
| 1842 | - } | |
| 1843 | - if (filtered) | |
| 1857 | + if (willFilterStream(object, compress_stream, | |
| 1858 | + is_metadata, &stream_data)) | |
| 1844 | 1859 | { |
| 1845 | 1860 | flags |= f_filtered; |
| 1846 | 1861 | } |
| 1847 | - else | |
| 1848 | - { | |
| 1849 | - compress_stream = false; | |
| 1850 | - } | |
| 1862 | + QPDFObjectHandle stream_dict = object.getDict(); | |
| 1851 | 1863 | |
| 1852 | 1864 | this->m->cur_stream_length = stream_data->getSize(); |
| 1853 | 1865 | if (is_metadata && this->m->encrypted && (! this->m->encrypt_metadata)) | ... | ... |