Commit be25fc30d471b55132eab7f590f3db7a51c7fbd2
Committed by
GitHub
Merge pull request #1397 from m-holger/ostream
Refactor QPDF::resolveObjectsInStream
Showing
9 changed files
with
125 additions
and
70 deletions
include/qpdf/QPDF.hh
| @@ -48,6 +48,7 @@ | @@ -48,6 +48,7 @@ | ||
| 48 | class QPDF_Stream; | 48 | class QPDF_Stream; |
| 49 | class BitStream; | 49 | class BitStream; |
| 50 | class BitWriter; | 50 | class BitWriter; |
| 51 | +class BufferInputSource; | ||
| 51 | class QPDFLogger; | 52 | class QPDFLogger; |
| 52 | class QPDFParser; | 53 | class QPDFParser; |
| 53 | 54 | ||
| @@ -784,7 +785,7 @@ class QPDF | @@ -784,7 +785,7 @@ class QPDF | ||
| 784 | QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og); | 785 | QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og); |
| 785 | void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); | 786 | void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); |
| 786 | void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); | 787 | void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); |
| 787 | - QPDFObjectHandle readObjectInStream(std::shared_ptr<InputSource>& input, int obj); | 788 | + QPDFObjectHandle readObjectInStream(BufferInputSource& input, int stream_id, int obj_id); |
| 788 | size_t recoverStreamLength( | 789 | size_t recoverStreamLength( |
| 789 | std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset); | 790 | std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset); |
| 790 | QPDFTokenizer::Token readToken(InputSource&, size_t max_len = 0); | 791 | QPDFTokenizer::Token readToken(InputSource&, size_t max_len = 0); |
libqpdf/QPDFObject.cc
| @@ -3,6 +3,10 @@ | @@ -3,6 +3,10 @@ | ||
| 3 | std::string | 3 | std::string |
| 4 | QPDFObject::getDescription() | 4 | QPDFObject::getDescription() |
| 5 | { | 5 | { |
| 6 | + qpdf_offset_t shift = (getTypeCode() == ::ot_dictionary) ? 2 | ||
| 7 | + : (getTypeCode() == ::ot_array) ? 1 | ||
| 8 | + : 0; | ||
| 9 | + | ||
| 6 | if (object_description) { | 10 | if (object_description) { |
| 7 | switch (object_description->index()) { | 11 | switch (object_description->index()) { |
| 8 | case 0: | 12 | case 0: |
| @@ -14,10 +18,6 @@ QPDFObject::getDescription() | @@ -14,10 +18,6 @@ QPDFObject::getDescription() | ||
| 14 | description.replace(pos, 3, og.unparse(' ')); | 18 | description.replace(pos, 3, og.unparse(' ')); |
| 15 | } | 19 | } |
| 16 | if (auto pos = description.find("$PO"); pos != std::string::npos) { | 20 | if (auto pos = description.find("$PO"); pos != std::string::npos) { |
| 17 | - qpdf_offset_t shift = (getTypeCode() == ::ot_dictionary) ? 2 | ||
| 18 | - : (getTypeCode() == ::ot_array) ? 1 | ||
| 19 | - : 0; | ||
| 20 | - | ||
| 21 | description.replace(pos, 3, std::to_string(parsed_offset + shift)); | 21 | description.replace(pos, 3, std::to_string(parsed_offset + shift)); |
| 22 | } | 22 | } |
| 23 | return description; | 23 | return description; |
| @@ -44,7 +44,14 @@ QPDFObject::getDescription() | @@ -44,7 +44,14 @@ QPDFObject::getDescription() | ||
| 44 | } | 44 | } |
| 45 | return result; | 45 | return result; |
| 46 | } | 46 | } |
| 47 | + case 3: | ||
| 48 | + auto [stream_id, obj_id] = std::get<3>(*object_description); | ||
| 49 | + std::string result = qpdf ? qpdf->getFilename() : ""; | ||
| 50 | + result += " object stream " + std::to_string(stream_id) + ", object " + | ||
| 51 | + std::to_string(obj_id) + " 0 at offset " + std::to_string(parsed_offset + shift); | ||
| 52 | + return result; | ||
| 47 | } | 53 | } |
| 54 | + | ||
| 48 | } else if (og.isIndirect()) { | 55 | } else if (og.isIndirect()) { |
| 49 | return "object " + og.unparse(' '); | 56 | return "object " + og.unparse(' '); |
| 50 | } | 57 | } |
libqpdf/QPDFParser.cc
| @@ -10,6 +10,8 @@ | @@ -10,6 +10,8 @@ | ||
| 10 | 10 | ||
| 11 | #include <memory> | 11 | #include <memory> |
| 12 | 12 | ||
| 13 | +using namespace std::literals; | ||
| 14 | + | ||
| 13 | using ObjectPtr = std::shared_ptr<QPDFObject>; | 15 | using ObjectPtr = std::shared_ptr<QPDFObject>; |
| 14 | 16 | ||
| 15 | QPDFObjectHandle | 17 | QPDFObjectHandle |
| @@ -524,7 +526,13 @@ QPDFParser::warnDuplicateKey() | @@ -524,7 +526,13 @@ QPDFParser::warnDuplicateKey() | ||
| 524 | void | 526 | void |
| 525 | QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const | 527 | QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const |
| 526 | { | 528 | { |
| 527 | - warn(QPDFExc(qpdf_e_damaged_pdf, input.getName(), object_description, offset, msg)); | 529 | + if (stream_id) { |
| 530 | + std::string descr = "object "s + std::to_string(obj_id) + " 0"; | ||
| 531 | + std::string name = context->getFilename() + " object stream " + std::to_string(stream_id); | ||
| 532 | + warn(QPDFExc(qpdf_e_damaged_pdf, name, descr, offset, msg)); | ||
| 533 | + } else { | ||
| 534 | + warn(QPDFExc(qpdf_e_damaged_pdf, input.getName(), object_description, offset, msg)); | ||
| 535 | + } | ||
| 528 | } | 536 | } |
| 529 | 537 | ||
| 530 | void | 538 | void |
libqpdf/QPDF_objects.cc
| @@ -1292,19 +1292,22 @@ QPDF::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset | @@ -1292,19 +1292,22 @@ QPDF::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset | ||
| 1292 | } | 1292 | } |
| 1293 | 1293 | ||
| 1294 | QPDFObjectHandle | 1294 | QPDFObjectHandle |
| 1295 | -QPDF::readObjectInStream(std::shared_ptr<InputSource>& input, int obj) | 1295 | +QPDF::readObjectInStream(BufferInputSource& input, int stream_id, int obj_id) |
| 1296 | { | 1296 | { |
| 1297 | - m->last_object_description.erase(7); // last_object_description starts with "object " | ||
| 1298 | - m->last_object_description += std::to_string(obj); | ||
| 1299 | - m->last_object_description += " 0"; | ||
| 1300 | - | ||
| 1301 | bool empty = false; | 1297 | bool empty = false; |
| 1302 | - auto object = QPDFParser(*input, m->last_object_description, m->tokenizer, nullptr, this, true) | ||
| 1303 | - .parse(empty, false); | 1298 | + auto object = |
| 1299 | + QPDFParser(input, stream_id, obj_id, m->last_object_description, m->tokenizer, this) | ||
| 1300 | + .parse(empty, false); | ||
| 1304 | if (empty) { | 1301 | if (empty) { |
| 1305 | // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in | 1302 | // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in |
| 1306 | // actual PDF files and Adobe Reader appears to ignore them. | 1303 | // actual PDF files and Adobe Reader appears to ignore them. |
| 1307 | - warn(damagedPDF(*input, input->getLastOffset(), "empty object treated as null")); | 1304 | + warn(QPDFExc( |
| 1305 | + qpdf_e_damaged_pdf, | ||
| 1306 | + m->file->getName() + " object stream " + std::to_string(stream_id), | ||
| 1307 | + +"object " + std::to_string(obj_id) + " 0, offset " + | ||
| 1308 | + std::to_string(input.getLastOffset()), | ||
| 1309 | + 0, | ||
| 1310 | + "empty object treated as null")); | ||
| 1308 | } | 1311 | } |
| 1309 | return object; | 1312 | return object; |
| 1310 | } | 1313 | } |
| @@ -1605,13 +1608,23 @@ QPDF::resolve(QPDFObjGen og) | @@ -1605,13 +1608,23 @@ QPDF::resolve(QPDFObjGen og) | ||
| 1605 | void | 1608 | void |
| 1606 | QPDF::resolveObjectsInStream(int obj_stream_number) | 1609 | QPDF::resolveObjectsInStream(int obj_stream_number) |
| 1607 | { | 1610 | { |
| 1611 | + auto damaged = | ||
| 1612 | + [this, obj_stream_number](int id, qpdf_offset_t offset, std::string const& msg) -> QPDFExc { | ||
| 1613 | + return { | ||
| 1614 | + qpdf_e_damaged_pdf, | ||
| 1615 | + m->file->getName() + " object stream " + std::to_string(obj_stream_number), | ||
| 1616 | + +"object " + std::to_string(id) + " 0", | ||
| 1617 | + offset, | ||
| 1618 | + msg}; | ||
| 1619 | + }; | ||
| 1620 | + | ||
| 1608 | if (m->resolved_object_streams.count(obj_stream_number)) { | 1621 | if (m->resolved_object_streams.count(obj_stream_number)) { |
| 1609 | return; | 1622 | return; |
| 1610 | } | 1623 | } |
| 1611 | m->resolved_object_streams.insert(obj_stream_number); | 1624 | m->resolved_object_streams.insert(obj_stream_number); |
| 1612 | // Force resolution of object stream | 1625 | // Force resolution of object stream |
| 1613 | - QPDFObjectHandle obj_stream = getObjectByID(obj_stream_number, 0); | ||
| 1614 | - if (!obj_stream.isStream()) { | 1626 | + auto obj_stream = getObject(obj_stream_number, 0).as_stream(); |
| 1627 | + if (!obj_stream) { | ||
| 1615 | throw damagedPDF( | 1628 | throw damagedPDF( |
| 1616 | "object " + std::to_string(obj_stream_number) + " 0", | 1629 | "object " + std::to_string(obj_stream_number) + " 0", |
| 1617 | "supposed object stream " + std::to_string(obj_stream_number) + " is not a stream"); | 1630 | "supposed object stream " + std::to_string(obj_stream_number) + " is not a stream"); |
| @@ -1631,34 +1644,25 @@ QPDF::resolveObjectsInStream(int obj_stream_number) | @@ -1631,34 +1644,25 @@ QPDF::resolveObjectsInStream(int obj_stream_number) | ||
| 1631 | "supposed object stream " + std::to_string(obj_stream_number) + " has wrong type")); | 1644 | "supposed object stream " + std::to_string(obj_stream_number) + " has wrong type")); |
| 1632 | } | 1645 | } |
| 1633 | 1646 | ||
| 1634 | - if (!(dict.getKey("/N").isInteger() && dict.getKey("/First").isInteger())) { | 1647 | + unsigned int n{0}; |
| 1648 | + int first{0}; | ||
| 1649 | + if (!(dict.getKey("/N").getValueAsUInt(n) && dict.getKey("/First").getValueAsInt(first))) { | ||
| 1635 | throw damagedPDF( | 1650 | throw damagedPDF( |
| 1636 | "object " + std::to_string(obj_stream_number) + " 0", | 1651 | "object " + std::to_string(obj_stream_number) + " 0", |
| 1637 | "object stream " + std::to_string(obj_stream_number) + " has incorrect keys"); | 1652 | "object stream " + std::to_string(obj_stream_number) + " has incorrect keys"); |
| 1638 | } | 1653 | } |
| 1639 | 1654 | ||
| 1640 | - int n = dict.getKey("/N").getIntValueAsInt(); | ||
| 1641 | - int first = dict.getKey("/First").getIntValueAsInt(); | 1655 | + std::vector<std::pair<int, long long>> offsets; |
| 1642 | 1656 | ||
| 1643 | - std::map<int, int> offsets; | ||
| 1644 | - | ||
| 1645 | - std::shared_ptr<Buffer> bp = obj_stream.getStreamData(qpdf_dl_specialized); | ||
| 1646 | - auto input = std::shared_ptr<InputSource>( | ||
| 1647 | - // line-break | ||
| 1648 | - new BufferInputSource( | ||
| 1649 | - (m->file->getName() + " object stream " + std::to_string(obj_stream_number)), | ||
| 1650 | - bp.get())); | 1657 | + auto bp = obj_stream.getStreamData(qpdf_dl_specialized); |
| 1658 | + BufferInputSource input("", bp.get()); | ||
| 1651 | 1659 | ||
| 1652 | long long last_offset = -1; | 1660 | long long last_offset = -1; |
| 1653 | - for (int i = 0; i < n; ++i) { | ||
| 1654 | - QPDFTokenizer::Token tnum = readToken(*input); | ||
| 1655 | - QPDFTokenizer::Token toffset = readToken(*input); | 1661 | + for (unsigned int i = 0; i < n; ++i) { |
| 1662 | + auto tnum = readToken(input); | ||
| 1663 | + auto toffset = readToken(input); | ||
| 1656 | if (!(tnum.isInteger() && toffset.isInteger())) { | 1664 | if (!(tnum.isInteger() && toffset.isInteger())) { |
| 1657 | - throw damagedPDF( | ||
| 1658 | - *input, | ||
| 1659 | - "object " + std::to_string(obj_stream_number) + " 0", | ||
| 1660 | - input->getLastOffset(), | ||
| 1661 | - "expected integer in object stream header"); | 1665 | + throw damaged(0, input.getLastOffset(), "expected integer in object stream header"); |
| 1662 | } | 1666 | } |
| 1663 | 1667 | ||
| 1664 | int num = QUtil::string_to_int(tnum.getValue().c_str()); | 1668 | int num = QUtil::string_to_int(tnum.getValue().c_str()); |
| @@ -1666,29 +1670,20 @@ QPDF::resolveObjectsInStream(int obj_stream_number) | @@ -1666,29 +1670,20 @@ QPDF::resolveObjectsInStream(int obj_stream_number) | ||
| 1666 | 1670 | ||
| 1667 | if (num == obj_stream_number) { | 1671 | if (num == obj_stream_number) { |
| 1668 | QTC::TC("qpdf", "QPDF ignore self-referential object stream"); | 1672 | QTC::TC("qpdf", "QPDF ignore self-referential object stream"); |
| 1669 | - warn(damagedPDF( | ||
| 1670 | - *input, | ||
| 1671 | - "object " + std::to_string(obj_stream_number) + " 0", | ||
| 1672 | - input->getLastOffset(), | ||
| 1673 | - "object stream claims to contain itself")); | 1673 | + warn(damaged(num, input.getLastOffset(), "object stream claims to contain itself")); |
| 1674 | continue; | 1674 | continue; |
| 1675 | } | 1675 | } |
| 1676 | 1676 | ||
| 1677 | if (num < 1) { | 1677 | if (num < 1) { |
| 1678 | QTC::TC("qpdf", "QPDF object stream contains id < 1"); | 1678 | QTC::TC("qpdf", "QPDF object stream contains id < 1"); |
| 1679 | - warn(damagedPDF( | ||
| 1680 | - *input, | ||
| 1681 | - "object " + std::to_string(num) + " 0", | ||
| 1682 | - input->getLastOffset(), | ||
| 1683 | - "object id is invalid"s)); | 1679 | + warn(damaged(num, input.getLastOffset(), "object id is invalid"s)); |
| 1684 | continue; | 1680 | continue; |
| 1685 | } | 1681 | } |
| 1686 | 1682 | ||
| 1687 | if (offset <= last_offset) { | 1683 | if (offset <= last_offset) { |
| 1688 | QTC::TC("qpdf", "QPDF object stream offsets not increasing"); | 1684 | QTC::TC("qpdf", "QPDF object stream offsets not increasing"); |
| 1689 | - warn(damagedPDF( | ||
| 1690 | - *input, | ||
| 1691 | - "object " + std::to_string(num) + " 0", | 1685 | + warn(damaged( |
| 1686 | + num, | ||
| 1692 | offset, | 1687 | offset, |
| 1693 | "offset is invalid (must be larger than previous offset " + | 1688 | "offset is invalid (must be larger than previous offset " + |
| 1694 | std::to_string(last_offset) + ")")); | 1689 | std::to_string(last_offset) + ")")); |
| @@ -1700,23 +1695,20 @@ QPDF::resolveObjectsInStream(int obj_stream_number) | @@ -1700,23 +1695,20 @@ QPDF::resolveObjectsInStream(int obj_stream_number) | ||
| 1700 | continue; | 1695 | continue; |
| 1701 | } | 1696 | } |
| 1702 | 1697 | ||
| 1703 | - offsets[num] = toI(offset + first); | 1698 | + offsets.emplace_back(num, offset + first); |
| 1704 | } | 1699 | } |
| 1705 | 1700 | ||
| 1706 | // To avoid having to read the object stream multiple times, store all objects that would be | 1701 | // To avoid having to read the object stream multiple times, store all objects that would be |
| 1707 | // found here in the cache. Remember that some objects stored here might have been overridden | 1702 | // found here in the cache. Remember that some objects stored here might have been overridden |
| 1708 | // by new objects appended to the file, so it is necessary to recheck the xref table and only | 1703 | // by new objects appended to the file, so it is necessary to recheck the xref table and only |
| 1709 | // cache what would actually be resolved here. | 1704 | // cache what would actually be resolved here. |
| 1710 | - m->last_object_description.clear(); | ||
| 1711 | - m->last_object_description += "object "; | ||
| 1712 | - for (auto const& iter: offsets) { | ||
| 1713 | - QPDFObjGen og(iter.first, 0); | 1705 | + for (auto const& [id, offset]: offsets) { |
| 1706 | + QPDFObjGen og(id, 0); | ||
| 1714 | auto entry = m->xref_table.find(og); | 1707 | auto entry = m->xref_table.find(og); |
| 1715 | if (entry != m->xref_table.end() && entry->second.getType() == 2 && | 1708 | if (entry != m->xref_table.end() && entry->second.getType() == 2 && |
| 1716 | entry->second.getObjStreamNumber() == obj_stream_number) { | 1709 | entry->second.getObjStreamNumber() == obj_stream_number) { |
| 1717 | - int offset = iter.second; | ||
| 1718 | - input->seek(offset, SEEK_SET); | ||
| 1719 | - QPDFObjectHandle oh = readObjectInStream(input, iter.first); | 1710 | + input.seek(offset, SEEK_SET); |
| 1711 | + QPDFObjectHandle oh = readObjectInStream(input, obj_stream_number, id); | ||
| 1720 | updateCache(og, oh.getObj(), end_before_space, end_after_space); | 1712 | updateCache(og, oh.getObj(), end_before_space, end_after_space); |
| 1721 | } else { | 1713 | } else { |
| 1722 | QTC::TC("qpdf", "QPDF not caching overridden objstm object"); | 1714 | QTC::TC("qpdf", "QPDF not caching overridden objstm object"); |
libqpdf/qpdf/QPDFObjectHandle_private.hh
| @@ -4,6 +4,7 @@ | @@ -4,6 +4,7 @@ | ||
| 4 | #include <qpdf/QPDFObjectHandle.hh> | 4 | #include <qpdf/QPDFObjectHandle.hh> |
| 5 | 5 | ||
| 6 | #include <qpdf/QPDFObject_private.hh> | 6 | #include <qpdf/QPDFObject_private.hh> |
| 7 | +#include <qpdf/QPDF_private.hh> | ||
| 7 | #include <qpdf/QUtil.hh> | 8 | #include <qpdf/QUtil.hh> |
| 8 | 9 | ||
| 9 | namespace qpdf | 10 | namespace qpdf |
| @@ -428,6 +429,18 @@ QPDFObject::create(Args&&... args) | @@ -428,6 +429,18 @@ QPDFObject::create(Args&&... args) | ||
| 428 | return std::make_shared<QPDFObject>(std::forward<T>(T(std::forward<Args>(args)...))); | 429 | return std::make_shared<QPDFObject>(std::forward<T>(T(std::forward<Args>(args)...))); |
| 429 | } | 430 | } |
| 430 | 431 | ||
| 432 | +inline qpdf_object_type_e | ||
| 433 | +QPDFObject::getResolvedTypeCode() const | ||
| 434 | +{ | ||
| 435 | + if (getTypeCode() == ::ot_unresolved) { | ||
| 436 | + return QPDF::Resolver::resolved(qpdf, og)->getTypeCode(); | ||
| 437 | + } | ||
| 438 | + if (getTypeCode() == ::ot_reference) { | ||
| 439 | + return std::get<QPDF_Reference>(value).obj->getTypeCode(); | ||
| 440 | + } | ||
| 441 | + return getTypeCode(); | ||
| 442 | +} | ||
| 443 | + | ||
| 431 | inline qpdf::Array | 444 | inline qpdf::Array |
| 432 | QPDFObjectHandle::as_array(qpdf::typed options) const | 445 | QPDFObjectHandle::as_array(qpdf::typed options) const |
| 433 | { | 446 | { |
libqpdf/qpdf/QPDFObject_private.hh
| @@ -7,8 +7,8 @@ | @@ -7,8 +7,8 @@ | ||
| 7 | #include <qpdf/Constants.h> | 7 | #include <qpdf/Constants.h> |
| 8 | #include <qpdf/JSON.hh> | 8 | #include <qpdf/JSON.hh> |
| 9 | #include <qpdf/JSON_writer.hh> | 9 | #include <qpdf/JSON_writer.hh> |
| 10 | +#include <qpdf/QPDF.hh> | ||
| 10 | #include <qpdf/QPDFObjGen.hh> | 11 | #include <qpdf/QPDFObjGen.hh> |
| 11 | -#include <qpdf/QPDF_private.hh> | ||
| 12 | #include <qpdf/Types.h> | 12 | #include <qpdf/Types.h> |
| 13 | 13 | ||
| 14 | #include <map> | 14 | #include <map> |
| @@ -301,17 +301,8 @@ class QPDFObject | @@ -301,17 +301,8 @@ class QPDFObject | ||
| 301 | std::string getStringValue() const; | 301 | std::string getStringValue() const; |
| 302 | 302 | ||
| 303 | // Return a unique type code for the resolved object | 303 | // Return a unique type code for the resolved object |
| 304 | - qpdf_object_type_e | ||
| 305 | - getResolvedTypeCode() const | ||
| 306 | - { | ||
| 307 | - if (getTypeCode() == ::ot_unresolved) { | ||
| 308 | - return QPDF::Resolver::resolved(qpdf, og)->getTypeCode(); | ||
| 309 | - } | ||
| 310 | - if (getTypeCode() == ::ot_reference) { | ||
| 311 | - return std::get<QPDF_Reference>(value).obj->getTypeCode(); | ||
| 312 | - } | ||
| 313 | - return getTypeCode(); | ||
| 314 | - } | 304 | + inline qpdf_object_type_e getResolvedTypeCode() const; |
| 305 | + | ||
| 315 | // Return a unique type code for the object | 306 | // Return a unique type code for the object |
| 316 | qpdf_object_type_e | 307 | qpdf_object_type_e |
| 317 | getTypeCode() const | 308 | getTypeCode() const |
| @@ -390,7 +381,17 @@ class QPDFObject | @@ -390,7 +381,17 @@ class QPDFObject | ||
| 390 | std::string var_descr; | 381 | std::string var_descr; |
| 391 | }; | 382 | }; |
| 392 | 383 | ||
| 393 | - using Description = std::variant<std::string, JSON_Descr, ChildDescr>; | 384 | + struct ObjStreamDescr |
| 385 | + { | ||
| 386 | + ObjStreamDescr(int stream_id, int obj_id) : | ||
| 387 | + stream_id(stream_id), | ||
| 388 | + obj_id(obj_id) {}; | ||
| 389 | + | ||
| 390 | + int stream_id; | ||
| 391 | + int obj_id; | ||
| 392 | + }; | ||
| 393 | + | ||
| 394 | + using Description = std::variant<std::string, JSON_Descr, ChildDescr, ObjStreamDescr>; | ||
| 394 | 395 | ||
| 395 | void | 396 | void |
| 396 | setDescription( | 397 | setDescription( |
libqpdf/qpdf/QPDFParser.hh
| @@ -62,9 +62,32 @@ class QPDFParser | @@ -62,9 +62,32 @@ class QPDFParser | ||
| 62 | decrypter(nullptr), | 62 | decrypter(nullptr), |
| 63 | context(context), | 63 | context(context), |
| 64 | description(std::move(sp_description)), | 64 | description(std::move(sp_description)), |
| 65 | - parse_pdf(false) | 65 | + parse_pdf(true) |
| 66 | { | 66 | { |
| 67 | } | 67 | } |
| 68 | + | ||
| 69 | + // Used by readObjectInStream only | ||
| 70 | + QPDFParser( | ||
| 71 | + InputSource& input, | ||
| 72 | + int stream_id, | ||
| 73 | + int obj_id, | ||
| 74 | + std::string const& object_description, | ||
| 75 | + qpdf::Tokenizer& tokenizer, | ||
| 76 | + QPDF* context) : | ||
| 77 | + input(input), | ||
| 78 | + object_description(object_description), | ||
| 79 | + tokenizer(tokenizer), | ||
| 80 | + decrypter(nullptr), | ||
| 81 | + context(context), | ||
| 82 | + description( | ||
| 83 | + std::make_shared<QPDFObject::Description>( | ||
| 84 | + QPDFObject::ObjStreamDescr(stream_id, obj_id))), | ||
| 85 | + parse_pdf(true), | ||
| 86 | + stream_id(stream_id), | ||
| 87 | + obj_id(obj_id) | ||
| 88 | + { | ||
| 89 | + } | ||
| 90 | + | ||
| 68 | ~QPDFParser() = default; | 91 | ~QPDFParser() = default; |
| 69 | 92 | ||
| 70 | QPDFObjectHandle parse(bool& empty, bool content_stream); | 93 | QPDFObjectHandle parse(bool& empty, bool content_stream); |
| @@ -124,6 +147,8 @@ class QPDFParser | @@ -124,6 +147,8 @@ class QPDFParser | ||
| 124 | QPDF* context; | 147 | QPDF* context; |
| 125 | std::shared_ptr<QPDFObject::Description> description; | 148 | std::shared_ptr<QPDFObject::Description> description; |
| 126 | bool parse_pdf; | 149 | bool parse_pdf; |
| 150 | + int stream_id{0}; | ||
| 151 | + int obj_id{0}; | ||
| 127 | 152 | ||
| 128 | std::vector<StackFrame> stack; | 153 | std::vector<StackFrame> stack; |
| 129 | StackFrame* frame{nullptr}; | 154 | StackFrame* frame{nullptr}; |
libqpdf/qpdf/QPDF_private.hh
| @@ -3,6 +3,7 @@ | @@ -3,6 +3,7 @@ | ||
| 3 | 3 | ||
| 4 | #include <qpdf/QPDF.hh> | 4 | #include <qpdf/QPDF.hh> |
| 5 | 5 | ||
| 6 | +#include <qpdf/QPDFObject_private.hh> | ||
| 6 | #include <qpdf/QPDFTokenizer_private.hh> | 7 | #include <qpdf/QPDFTokenizer_private.hh> |
| 7 | 8 | ||
| 8 | // Writer class is restricted to QPDFWriter so that only it can call certain methods. | 9 | // Writer class is restricted to QPDFWriter so that only it can call certain methods. |
| @@ -457,6 +458,7 @@ class QPDF::Members | @@ -457,6 +458,7 @@ class QPDF::Members | ||
| 457 | qpdf::Tokenizer tokenizer; | 458 | qpdf::Tokenizer tokenizer; |
| 458 | std::shared_ptr<InputSource> file; | 459 | std::shared_ptr<InputSource> file; |
| 459 | std::string last_object_description; | 460 | std::string last_object_description; |
| 461 | + std::shared_ptr<QPDFObject::Description> last_ostream_description; | ||
| 460 | bool provided_password_is_hex_key{false}; | 462 | bool provided_password_is_hex_key{false}; |
| 461 | bool ignore_xref_streams{false}; | 463 | bool ignore_xref_streams{false}; |
| 462 | bool suppress_warnings{false}; | 464 | bool suppress_warnings{false}; |
manual/release-notes.rst
| @@ -29,6 +29,12 @@ more detail. | @@ -29,6 +29,12 @@ more detail. | ||
| 29 | - There have been further enhancements to how files with damaged xref | 29 | - There have been further enhancements to how files with damaged xref |
| 30 | tables are recovered. | 30 | tables are recovered. |
| 31 | 31 | ||
| 32 | + - Other changes | ||
| 33 | + | ||
| 34 | + - The parsing of object streams including the creation of error/warning | ||
| 35 | + messages and object descriptions has been refactored with some | ||
| 36 | + improvement both in runtime and memory usage. | ||
| 37 | + | ||
| 32 | - There has been some refactoring of how object streams are written with | 38 | - There has been some refactoring of how object streams are written with |
| 33 | some performance improvement. | 39 | some performance improvement. |
| 34 | 40 |