Commit 6cadafda1b194f0047c1e8c3df61636764659110

Authored by m-holger
1 parent ae9e65f3

Remove for loop in QPDFWriter::writeObjectStream

libqpdf/QPDFWriter.cc
@@ -1646,81 +1646,79 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) @@ -1646,81 +1646,79 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1646 std::shared_ptr<Buffer> stream_buffer; 1646 std::shared_ptr<Buffer> stream_buffer;
1647 int first_obj = -1; 1647 int first_obj = -1;
1648 bool compressed = false; 1648 bool compressed = false;
1649 - for (int pass: {1, 2}) {  
1650 - PipelinePopper pp_ostream(this, &stream_buffer);  
1651 - if (pass == 1) {  
1652 - pushPipeline(new Pl_Buffer("object stream"));  
1653 - activatePipelineStack(pp_ostream);  
1654 - } else {  
1655 - // Adjust offsets to skip over comment before first object  
1656 - first = offsets.at(0);  
1657 - for (auto& iter: offsets) {  
1658 - iter -= first; 1649 + {
  1650 + // Pass 1
  1651 + PipelinePopper pp_ostream_pass1(this, &stream_buffer);
  1652 +
  1653 + pushPipeline(new Pl_Buffer("object stream"));
  1654 + activatePipelineStack(pp_ostream_pass1);
  1655 +
  1656 + int count = -1;
  1657 + for (auto const& obj: m->object_stream_to_objects[old_id]) {
  1658 + ++count;
  1659 + int new_obj = m->obj[obj].renumber;
  1660 + if (first_obj == -1) {
  1661 + first_obj = new_obj;
1659 } 1662 }
1660 -  
1661 - // Take one pass at writing pairs of numbers so we can get their size information  
1662 - {  
1663 - PipelinePopper pp_discard(this);  
1664 - pushDiscardFilter(pp_discard);  
1665 - writeObjectStreamOffsets(offsets, first_obj);  
1666 - first += m->pipeline->getCount(); 1663 + if (m->qdf_mode) {
  1664 + writeString(
  1665 + "%% Object stream: object " + std::to_string(new_obj) + ", index " +
  1666 + std::to_string(count));
  1667 + if (!m->suppress_original_object_ids) {
  1668 + writeString("; original object ID: " + std::to_string(obj.getObj()));
  1669 + // For compatibility, only write the generation if non-zero. While object
  1670 + // streams only allow objects with generation 0, if we are generating object
  1671 + // streams, the old object could have a non-zero generation.
  1672 + if (obj.getGen() != 0) {
  1673 + QTC::TC("qpdf", "QPDFWriter original obj non-zero gen");
  1674 + writeString(" " + std::to_string(obj.getGen()));
  1675 + }
  1676 + }
  1677 + writeString("\n");
1667 } 1678 }
1668 1679
1669 - // Set up a stream to write the stream data into a buffer.  
1670 - Pipeline* next = pushPipeline(new Pl_Buffer("object stream"));  
1671 - if (m->compress_streams && !m->qdf_mode) {  
1672 - compressed = true;  
1673 - next =  
1674 - pushPipeline(new Pl_Flate("compress object stream", next, Pl_Flate::a_deflate)); 1680 + offsets.push_back(m->pipeline->getCount());
  1681 + // To avoid double-counting objects being written in object streams for progress
  1682 + // reporting, decrement in pass 1.
  1683 + indicateProgress(true, false);
  1684 +
  1685 + QPDFObjectHandle obj_to_write = m->pdf.getObject(obj);
  1686 + if (obj_to_write.isStream()) {
  1687 + // This condition occurred in a fuzz input. Ideally we should block it at parse
  1688 + // time, but it's not clear to me how to construct a case for this.
  1689 + obj_to_write.warnIfPossible("stream found inside object stream; treating as null");
  1690 + obj_to_write = QPDFObjectHandle::newNull();
1675 } 1691 }
1676 - activatePipelineStack(pp_ostream);  
1677 - writeObjectStreamOffsets(offsets, first_obj);  
1678 - writeBuffer(stream_buffer); 1692 + writeObject(obj_to_write, count);
  1693 +
  1694 + m->new_obj[new_obj].xref = QPDFXRefEntry(new_stream_id, count);
  1695 + }
  1696 + }
  1697 + {
  1698 + PipelinePopper pp_ostream(this, &stream_buffer);
  1699 + // Adjust offsets to skip over comment before first object
  1700 + first = offsets.at(0);
  1701 + for (auto& iter: offsets) {
  1702 + iter -= first;
1679 } 1703 }
1680 1704
1681 - if (pass == 1) {  
1682 - int count = -1;  
1683 - for (auto const& obj: m->object_stream_to_objects[old_id]) {  
1684 - ++count;  
1685 - int new_obj = m->obj[obj].renumber;  
1686 - if (first_obj == -1) {  
1687 - first_obj = new_obj;  
1688 - }  
1689 - if (m->qdf_mode) {  
1690 - writeString(  
1691 - "%% Object stream: object " + std::to_string(new_obj) + ", index " +  
1692 - std::to_string(count));  
1693 - if (!m->suppress_original_object_ids) {  
1694 - writeString("; original object ID: " + std::to_string(obj.getObj()));  
1695 - // For compatibility, only write the generation if non-zero. While object  
1696 - // streams only allow objects with generation 0, if we are generating object  
1697 - // streams, the old object could have a non-zero generation.  
1698 - if (obj.getGen() != 0) {  
1699 - QTC::TC("qpdf", "QPDFWriter original obj non-zero gen");  
1700 - writeString(" " + std::to_string(obj.getGen()));  
1701 - }  
1702 - }  
1703 - writeString("\n");  
1704 - }  
1705 - if (pass == 1) {  
1706 - offsets.push_back(m->pipeline->getCount());  
1707 - // To avoid double-counting objects being written in object streams for progress  
1708 - // reporting, decrement in pass 1.  
1709 - indicateProgress(true, false);  
1710 - }  
1711 - QPDFObjectHandle obj_to_write = m->pdf.getObject(obj);  
1712 - if (obj_to_write.isStream()) {  
1713 - // This condition occurred in a fuzz input. Ideally we should block it at parse  
1714 - // time, but it's not clear to me how to construct a case for this.  
1715 - obj_to_write.warnIfPossible(  
1716 - "stream found inside object stream; treating as null");  
1717 - obj_to_write = QPDFObjectHandle::newNull();  
1718 - }  
1719 - writeObject(obj_to_write, count); 1705 + // Take one pass at writing pairs of numbers so we can get their size information
  1706 + {
  1707 + PipelinePopper pp_discard(this);
  1708 + pushDiscardFilter(pp_discard);
  1709 + writeObjectStreamOffsets(offsets, first_obj);
  1710 + first += m->pipeline->getCount();
  1711 + }
1720 1712
1721 - m->new_obj[new_obj].xref = QPDFXRefEntry(new_stream_id, count);  
1722 - } 1713 + // Set up a stream to write the stream data into a buffer.
  1714 + Pipeline* next = pushPipeline(new Pl_Buffer("object stream"));
  1715 + if (m->compress_streams && !m->qdf_mode) {
  1716 + compressed = true;
  1717 + next = pushPipeline(new Pl_Flate("compress object stream", next, Pl_Flate::a_deflate));
1723 } 1718 }
  1719 + activatePipelineStack(pp_ostream);
  1720 + writeObjectStreamOffsets(offsets, first_obj);
  1721 + writeBuffer(stream_buffer);
1724 } 1722 }
1725 1723
1726 // Write the object 1724 // Write the object
manual/release-notes.rst
@@ -26,6 +26,9 @@ more detail. @@ -26,6 +26,9 @@ more detail.
26 - There have been further enhancements to how files with damaged xref 26 - There have been further enhancements to how files with damaged xref
27 tables are recovered. 27 tables are recovered.
28 28
  29 + - There has been some refactoring of how object streams are written with
  30 + some performance improvement.
  31 +
29 .. cSpell:ignore substract 32 .. cSpell:ignore substract
30 33
31 .. _r12-0-0: 34 .. _r12-0-0: