Commit 09027344b9a6265a597da7c3c92e0fdd3ccb71fb

Authored by Jay Berkenbilt
1 parent 4cbe2abc

Refactor: separate code that determines whether to filter a stream

include/qpdf/QPDFWriter.hh
@@ -529,6 +529,9 @@ class QPDFWriter @@ -529,6 +529,9 @@ class QPDFWriter
529 void writeTrailer(trailer_e which, int size, 529 void writeTrailer(trailer_e which, int size,
530 bool xref_stream, qpdf_offset_t prev, 530 bool xref_stream, qpdf_offset_t prev,
531 int linearization_pass); 531 int linearization_pass);
  532 + bool willFilterStream(QPDFObjectHandle stream,
  533 + bool& compress_stream, bool& is_metadata,
  534 + PointerHolder<Buffer>* stream_data);
532 void unparseObject(QPDFObjectHandle object, int level, int flags, 535 void unparseObject(QPDFObjectHandle object, int level, int flags,
533 // for stream dictionaries 536 // for stream dictionaries
534 size_t stream_length = 0, bool compress = false); 537 size_t stream_length = 0, bool compress = false);
libqpdf/QPDFWriter.cc
@@ -1463,6 +1463,96 @@ QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream, @@ -1463,6 +1463,96 @@ QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream,
1463 writeString(">>"); 1463 writeString(">>");
1464 } 1464 }
1465 1465
  1466 +bool
  1467 +QPDFWriter::willFilterStream(QPDFObjectHandle stream,
  1468 + bool& compress_stream, bool& is_metadata,
  1469 + PointerHolder<Buffer>* stream_data)
  1470 +{
  1471 + compress_stream = false;
  1472 + is_metadata = false;
  1473 + QPDFObjGen old_og = stream.getObjGen();
  1474 + QPDFObjectHandle stream_dict = stream.getDict();
  1475 +
  1476 + if (stream_dict.getKey("/Type").isName() &&
  1477 + (stream_dict.getKey("/Type").getName() == "/Metadata"))
  1478 + {
  1479 + is_metadata = true;
  1480 + }
  1481 + bool filter = (stream.isDataModified() ||
  1482 + this->m->compress_streams ||
  1483 + this->m->stream_decode_level);
  1484 + if (this->m->compress_streams)
  1485 + {
  1486 + // Don't filter if the stream is already compressed with
  1487 + // FlateDecode. This way we don't make it worse if the
  1488 + // original file used a better Flate algorithm, and we
  1489 + // don't spend time and CPU cycles uncompressing and
  1490 + // recompressing stuff. This can be overridden with
  1491 + // setRecompressFlate(true).
  1492 + QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter");
  1493 + if ((! this->m->recompress_flate) &&
  1494 + (! stream.isDataModified()) &&
  1495 + filter_obj.isName() &&
  1496 + ((filter_obj.getName() == "/FlateDecode") ||
  1497 + (filter_obj.getName() == "/Fl")))
  1498 + {
  1499 + QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode");
  1500 + filter = false;
  1501 + }
  1502 + }
  1503 + bool normalize = false;
  1504 + bool uncompress = false;
  1505 + if (is_metadata &&
  1506 + ((! this->m->encrypted) || (this->m->encrypt_metadata == false)))
  1507 + {
  1508 + QTC::TC("qpdf", "QPDFWriter not compressing metadata");
  1509 + filter = true;
  1510 + compress_stream = false;
  1511 + uncompress = true;
  1512 + }
  1513 + else if (this->m->normalize_content &&
  1514 + this->m->normalized_streams.count(old_og))
  1515 + {
  1516 + normalize = true;
  1517 + filter = true;
  1518 + }
  1519 + else if (filter && this->m->compress_streams)
  1520 + {
  1521 + compress_stream = true;
  1522 + QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream");
  1523 + }
  1524 +
  1525 + bool filtered = false;
  1526 + for (int attempt = 1; attempt <= 2; ++attempt)
  1527 + {
  1528 + pushPipeline(new Pl_Buffer("stream data"));
  1529 + PipelinePopper pp_stream_data(this, stream_data);
  1530 + activatePipelineStack(pp_stream_data);
  1531 + filtered =
  1532 + stream.pipeStreamData(
  1533 + this->m->pipeline,
  1534 + (((filter && normalize) ? qpdf_ef_normalize : 0) |
  1535 + ((filter && compress_stream) ? qpdf_ef_compress : 0)),
  1536 + (filter
  1537 + ? (uncompress ? qpdf_dl_all : this->m->stream_decode_level)
  1538 + : qpdf_dl_none), false, (attempt == 1));
  1539 + if (filter && (! filtered))
  1540 + {
  1541 + // Try again
  1542 + filter = false;
  1543 + }
  1544 + else
  1545 + {
  1546 + break;
  1547 + }
  1548 + }
  1549 + if (! filtered)
  1550 + {
  1551 + compress_stream = false;
  1552 + }
  1553 + return filtered;
  1554 +}
  1555 +
1466 void 1556 void
1467 QPDFWriter::unparseObject(QPDFObjectHandle object, int level, 1557 QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
1468 int flags, size_t stream_length, 1558 int flags, size_t stream_length,
@@ -1502,13 +1592,12 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, @@ -1502,13 +1592,12 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
1502 else if (object.isDictionary()) 1592 else if (object.isDictionary())
1503 { 1593 {
1504 // Make a shallow copy of this object so we can modify it 1594 // Make a shallow copy of this object so we can modify it
1505 - // safely without affecting the original. This code makes  
1506 - // assumptions about things that are made true in  
1507 - // prepareFileForWrite, such as that certain things are direct  
1508 - // objects so that replacing them doesn't leave unreferenced  
1509 - // objects in the output. We can use unsafeShallowCopy here  
1510 - // because we are all we are doing is removing or replacing  
1511 - // top-level keys. 1595 + // safely without affecting the original. This code has logic
  1596 + // to skip certain keys in agreement with prepareFileForWrite
  1597 + // and with skip_stream_parameters so that replacing them
  1598 + // doesn't leave unreferenced objects in the output. We can
  1599 + // use unsafeShallowCopy here because we are all we are doing
  1600 + // is removing or replacing top-level keys.
1512 object = object.unsafeShallowCopy(); 1601 object = object.unsafeShallowCopy();
1513 1602
1514 // Handle special cases for specific dictionaries. 1603 // Handle special cases for specific dictionaries.
@@ -1760,94 +1849,17 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, @@ -1760,94 +1849,17 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
1760 { 1849 {
1761 this->m->cur_stream_length_id = new_id + 1; 1850 this->m->cur_stream_length_id = new_id + 1;
1762 } 1851 }
1763 - QPDFObjectHandle stream_dict = object.getDict();  
1764 -  
1765 - bool is_metadata = false;  
1766 - if (stream_dict.getKey("/Type").isName() &&  
1767 - (stream_dict.getKey("/Type").getName() == "/Metadata"))  
1768 - {  
1769 - is_metadata = true;  
1770 - }  
1771 - bool filter = (object.isDataModified() ||  
1772 - this->m->compress_streams ||  
1773 - this->m->stream_decode_level);  
1774 - if (this->m->compress_streams)  
1775 - {  
1776 - // Don't filter if the stream is already compressed with  
1777 - // FlateDecode. This way we don't make it worse if the  
1778 - // original file used a better Flate algorithm, and we  
1779 - // don't spend time and CPU cycles uncompressing and  
1780 - // recompressing stuff. This can be overridden with  
1781 - // setRecompressFlate(true).  
1782 - QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter");  
1783 - if ((! this->m->recompress_flate) &&  
1784 - (! object.isDataModified()) &&  
1785 - filter_obj.isName() &&  
1786 - ((filter_obj.getName() == "/FlateDecode") ||  
1787 - (filter_obj.getName() == "/Fl")))  
1788 - {  
1789 - QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode");  
1790 - filter = false;  
1791 - }  
1792 - }  
1793 - bool normalize = false;  
1794 - bool compress_stream = false;  
1795 - bool uncompress = false;  
1796 - if (is_metadata &&  
1797 - ((! this->m->encrypted) || (this->m->encrypt_metadata == false)))  
1798 - {  
1799 - QTC::TC("qpdf", "QPDFWriter not compressing metadata");  
1800 - filter = true;  
1801 - compress_stream = false;  
1802 - uncompress = true;  
1803 - }  
1804 - else if (this->m->normalize_content &&  
1805 - this->m->normalized_streams.count(old_og))  
1806 - {  
1807 - normalize = true;  
1808 - filter = true;  
1809 - }  
1810 - else if (filter && this->m->compress_streams)  
1811 - {  
1812 - compress_stream = true;  
1813 - QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream");  
1814 - }  
1815 1852
1816 flags |= f_stream; 1853 flags |= f_stream;
1817 - 1854 + bool compress_stream = false;
  1855 + bool is_metadata = false;
1818 PointerHolder<Buffer> stream_data; 1856 PointerHolder<Buffer> stream_data;
1819 - bool filtered = false;  
1820 - for (int attempt = 1; attempt <= 2; ++attempt)  
1821 - {  
1822 - pushPipeline(new Pl_Buffer("stream data"));  
1823 - PipelinePopper pp_stream_data(this, &stream_data);  
1824 - activatePipelineStack(pp_stream_data);  
1825 - filtered =  
1826 - object.pipeStreamData(  
1827 - this->m->pipeline,  
1828 - (((filter && normalize) ? qpdf_ef_normalize : 0) |  
1829 - ((filter && compress_stream) ? qpdf_ef_compress : 0)),  
1830 - (filter  
1831 - ? (uncompress ? qpdf_dl_all : this->m->stream_decode_level)  
1832 - : qpdf_dl_none), false, (attempt == 1));  
1833 - if (filter && (! filtered))  
1834 - {  
1835 - // Try again  
1836 - filter = false;  
1837 - }  
1838 - else  
1839 - {  
1840 - break;  
1841 - }  
1842 - }  
1843 - if (filtered) 1857 + if (willFilterStream(object, compress_stream,
  1858 + is_metadata, &stream_data))
1844 { 1859 {
1845 flags |= f_filtered; 1860 flags |= f_filtered;
1846 } 1861 }
1847 - else  
1848 - {  
1849 - compress_stream = false;  
1850 - } 1862 + QPDFObjectHandle stream_dict = object.getDict();
1851 1863
1852 this->m->cur_stream_length = stream_data->getSize(); 1864 this->m->cur_stream_length = stream_data->getSize();
1853 if (is_metadata && this->m->encrypted && (! this->m->encrypt_metadata)) 1865 if (is_metadata && this->m->encrypted && (! this->m->encrypt_metadata))