diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 23eef96..dd4bfe4 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -734,7 +734,6 @@ class QPDF class ParseGuard; class Pipe; class JobSetter; - class Xref_table; // For testing only -- do not add to DLL static bool test_json_validators(); @@ -811,7 +810,7 @@ class QPDF void optimize( QPDFWriter::ObjTable const& obj, std::function skip_stream_parameters); - void optimize(Xref_table const& obj); + void optimize(Objects const& obj); // Get lists of all objects in order according to the part of a linearized file that they belong // to. @@ -904,7 +903,7 @@ class QPDF QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, std::map const& object_stream_data); QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj); - QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, Xref_table const& obj); + QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, Objects const& obj); int lengthNextN(int first_object, int n); void checkHPageOffset(std::vector const& pages, std::map& idx_to_obj); @@ -950,7 +949,7 @@ class QPDF std::function skip_stream_parameters); void filterCompressedObjects(std::map const& object_stream_data); void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data); - void filterCompressedObjects(Xref_table const& object_stream_data); + void filterCompressedObjects(Objects const& object_stream_data); // JSON import void importJSON(std::shared_ptr, bool must_be_complete); diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 1973592..ff57098 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -185,8 +185,7 @@ QPDF::Members::Members(QPDF& qpdf) : file_sp(new InvalidInputSource(no_input_name)), file(file_sp.get()), encp(new EncryptionParameters), - objects(qpdf, this), - xref_table(qpdf, objects, file) + objects(qpdf, this, file) { } @@ -279,7 +278,7 @@ QPDF::emptyPDF() { m->pdf_version = "1.3"; m->no_input_name = "empty PDF"; - m->xref_table.initialize_empty(); + m->objects.xref_table().initialize_empty(); } void @@ -292,7 +291,7 @@ QPDF::registerStreamFilter( void QPDF::setIgnoreXRefStreams(bool val) { - m->xref_table.ignore_streams(val); + m->objects.xref_table().ignore_streams(val); } std::shared_ptr @@ -330,7 +329,7 @@ void QPDF::setAttemptRecovery(bool val) { m->attempt_recovery = val; - m->xref_table.attempt_recovery(val); + m->objects.xref_table().attempt_recovery(val); } void @@ -424,9 +423,9 @@ QPDF::parse(char const* password) m->pdf_version = "1.2"; } - m->xref_table.initialize(); + m->objects.xref_table().initialize(); initializeEncryption(); - if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) { + if (m->objects.xref_table().size() > 0 && !getRoot().getKey("/Pages").isDictionary()) { // QPDFs created from JSON have an empty xref table and no root object yet. throw damagedPDF("", 0, "unable to find page tree"); } @@ -469,7 +468,7 @@ QPDF::warn( void QPDF::showXRefTable() { - m->xref_table.show(); + m->objects.xref_table().show(); } // Ensure all objects in the pdf file, including those in indirect references, appear in the object @@ -480,9 +479,9 @@ QPDF::fixDanglingReferences(bool force) if (m->fixed_dangling_refs) { return; } - if (!m->xref_table.resolve()) { + if (!m->objects.xref_table().resolve()) { QTC::TC("qpdf", "QPDF fix dangling triggered xref reconstruction"); - m->xref_table.resolve(); + m->objects.xref_table().resolve(); } m->fixed_dangling_refs = true; } @@ -578,7 +577,7 @@ QPDF::getObject(QPDFObjGen const& og) { if (auto it = m->objects.obj_cache.find(og); it != m->objects.obj_cache.end()) { return {it->second.object}; - } else if (m->xref_table.initialized() && !m->xref_table.type(og)) { + } else if (m->objects.xref_table().initialized() && !m->objects.xref_table().type(og)) { return QPDF_Null::create(); } else { auto result = m->objects.obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og)); @@ -945,13 +944,13 @@ QPDF::getExtensionLevel() QPDFObjectHandle QPDF::getTrailer() { - return m->xref_table.trailer(); + return m->objects.trailer(); } QPDFObjectHandle QPDF::getRoot() { - QPDFObjectHandle root = m->xref_table.trailer().getKey("/Root"); + auto root = m->objects.trailer().getKey("/Root"); if (!root.isDictionary()) { throw damagedPDF("", 0, "unable to find /Root dictionary"); } else if ( @@ -967,10 +966,10 @@ QPDF::getRoot() std::map QPDF::getXRefTable() { - if (!m->xref_table.initialized()) { + if (!m->objects.xref_table().initialized()) { throw std::logic_error("QPDF::getXRefTable called before parsing."); } - return m->xref_table.as_map(); + return m->objects.xref_table().as_map(); } bool diff --git a/libqpdf/QPDF_encryption.cc b/libqpdf/QPDF_encryption.cc index 436111b..eacbd6c 100644 --- a/libqpdf/QPDF_encryption.cc +++ b/libqpdf/QPDF_encryption.cc @@ -727,7 +727,7 @@ QPDF::initializeEncryption() // at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption // dictionary. - if (!m->xref_table.trailer().hasKey("/Encrypt")) { + if (!m->objects.trailer().hasKey("/Encrypt")) { return; } @@ -736,7 +736,7 @@ QPDF::initializeEncryption() m->encp->encrypted = true; std::string id1; - QPDFObjectHandle id_obj = m->xref_table.trailer().getKey("/ID"); + QPDFObjectHandle id_obj = m->objects.trailer().getKey("/ID"); if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) { id1 = id_obj.getArrayItem(0).getStringValue(); } else { @@ -745,7 +745,7 @@ QPDF::initializeEncryption() warn(damagedPDF("trailer", "invalid /ID in trailer dictionary")); } - QPDFObjectHandle encryption_dict = m->xref_table.trailer().getKey("/Encrypt"); + QPDFObjectHandle encryption_dict = m->objects.trailer().getKey("/Encrypt"); if (!encryption_dict.isDictionary()) { throw damagedPDF("/Encrypt in trailer dictionary is not a dictionary"); } diff --git a/libqpdf/QPDF_json.cc b/libqpdf/QPDF_json.cc index 5cb2d59..13276cc 100644 --- a/libqpdf/QPDF_json.cc +++ b/libqpdf/QPDF_json.cc @@ -582,7 +582,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) this->saw_value = true; // The trailer must be a dictionary, so we can use setNextStateIfDictionary. if (setNextStateIfDictionary("trailer.value", value, st_object)) { - pdf.m->xref_table.trailer(makeObject(value)); + pdf.m->objects.xref_table().trailer(makeObject(value)); } } else if (key == "stream") { // Don't need to set saw_stream here since there's already an error. @@ -776,7 +776,7 @@ QPDF::createFromJSON(std::shared_ptr is) { m->pdf_version = "1.3"; m->no_input_name = is->getName(); - m->xref_table.initialize_json(); + m->objects.xref_table().initialize_json(); importJSON(is, true); } diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc index 13bca55..621aaf1 100644 --- a/libqpdf/QPDF_linearization.cc +++ b/libqpdf/QPDF_linearization.cc @@ -288,8 +288,8 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) QPDFObjGen og; QPDFObjectHandle H = objects().read(false, offset, "linearization hint stream", QPDFObjGen(0, 0), og, false); - qpdf_offset_t min_end_offset = m->xref_table.end_before_space(og); - qpdf_offset_t max_end_offset = m->xref_table.end_after_space(og); + qpdf_offset_t min_end_offset = m->objects.xref_table().end_before_space(og); + qpdf_offset_t max_end_offset = m->objects.xref_table().end_after_space(og); if (!H.isStream()) { throw damagedPDF("linearization dictionary", "hint table is not a stream"); } @@ -303,8 +303,8 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) auto length_og = Hdict.getKey("/Length").getObjGen(); if (length_og.isIndirect()) { QTC::TC("qpdf", "QPDF hint table length indirect"); - min_end_offset = m->xref_table.end_before_space(length_og); - max_end_offset = m->xref_table.end_after_space(length_og); + min_end_offset = m->objects.xref_table().end_before_space(length_og); + max_end_offset = m->objects.xref_table().end_after_space(length_og); } else { QTC::TC("qpdf", "QPDF hint table length direct"); } @@ -441,7 +441,7 @@ QPDF::checkLinearizationInternal() for (size_t i = 0; i < toS(npages); ++i) { QPDFObjectHandle const& page = pages.at(i); QPDFObjGen og(page.getObjGen()); - if (m->xref_table.type(og) == 2) { + if (m->objects.xref_table().type(og) == 2) { linearizationWarning( "page dictionary for page " + std::to_string(i) + " is compressed"); } @@ -457,11 +457,11 @@ QPDF::checkLinearizationInternal() break; } } - if (m->file->tell() != m->xref_table.first_item_offset()) { + if (m->file->tell() != m->objects.xref_table().first_item_offset()) { QTC::TC("qpdf", "QPDF err /T mismatch"); linearizationWarning( "space before first xref item (/T) mismatch (computed = " + - std::to_string(m->xref_table.first_item_offset()) + + std::to_string(m->objects.xref_table().first_item_offset()) + "; file = " + std::to_string(m->file->tell())); } @@ -472,7 +472,7 @@ QPDF::checkLinearizationInternal() // compressed objects are supposed to be at the end of the containing xref section if any object // streams are in use. - if (m->xref_table.uncompressed_after_compressed()) { + if (m->objects.xref_table().uncompressed_after_compressed()) { linearizationWarning("linearized file contains an uncompressed object after a compressed " "one in a cross-reference stream"); } @@ -481,8 +481,8 @@ QPDF::checkLinearizationInternal() // make changes. If it has to, then the file is not properly linearized. We use the xref table // to figure out which objects are compressed and which are uncompressed. - optimize(m->xref_table); - calculateLinearizationData(m->xref_table); + optimize(m->objects); + calculateLinearizationData(m->objects); // E: offset of end of first page -- Implementation note 123 says Acrobat includes on extra // object here by mistake. pdlin fails to place thumbnail images in section 9, so when @@ -499,8 +499,8 @@ QPDF::checkLinearizationInternal() qpdf_offset_t max_E = -1; for (auto const& oh: m->part6) { QPDFObjGen og(oh.getObjGen()); - auto before = m->xref_table.end_before_space(og); - auto after = m->xref_table.end_after_space(og); + auto before = m->objects.xref_table().end_before_space(og); + auto after = m->objects.xref_table().end_after_space(og); if (before <= 0) { // All objects have to have been dereferenced to be classified. throw std::logic_error("linearization part6 object not in cache"); @@ -533,7 +533,7 @@ QPDF::maxEnd(ObjUser const& ou) } qpdf_offset_t end = 0; for (auto const& og: m->obj_user_to_objects[ou]) { - auto e = m->xref_table.end_after_space(og); + auto e = m->objects.xref_table().end_after_space(og); if (e <= 0) { stopOnError("unknown object referenced in object user table"); } @@ -545,13 +545,14 @@ QPDF::maxEnd(ObjUser const& ou) qpdf_offset_t QPDF::getLinearizationOffset(QPDFObjGen const& og) { - switch (m->xref_table.type(og)) { + switch (m->objects.xref_table().type(og)) { case 1: - return m->xref_table.offset(og); + return m->objects.xref_table().offset(og); case 2: // For compressed objects, return the offset of the object stream that contains them. - return getLinearizationOffset(QPDFObjGen(m->xref_table.stream_number(og.getObj()), 0)); + return getLinearizationOffset( + QPDFObjGen(m->objects.xref_table().stream_number(og.getObj()), 0)); default: stopOnError("getLinearizationOffset called for xref entry not of type 1 or 2"); @@ -571,13 +572,13 @@ QPDF::getUncompressedObject(QPDFObjectHandle& obj, std::map const& obj } QPDFObjectHandle -QPDF::getUncompressedObject(QPDFObjectHandle& obj, Xref_table const& xref) +QPDF::getUncompressedObject(QPDFObjectHandle& obj, Objects const& objects) { auto og = obj.getObjGen(); - if (obj.isNull() || xref.type(og) != 2) { + if (obj.isNull() || objects.xref_table().type(og) != 2) { return obj; } - return getObject(xref.stream_number(og.getObj()), 0); + return getObject(objects.xref_table().stream_number(og.getObj()), 0); } QPDFObjectHandle @@ -597,7 +598,7 @@ QPDF::lengthNextN(int first_object, int n) int length = 0; for (int i = 0; i < n; ++i) { QPDFObjGen og(first_object + i, 0); - auto end = m->xref_table.end_after_space(og); + auto end = m->objects.xref_table().end_after_space(og); if (end <= 0) { linearizationWarning( "no xref table entry for " + std::to_string(first_object + i) + " 0"); @@ -627,7 +628,7 @@ QPDF::checkHPageOffset( int npages = toI(pages.size()); qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset); QPDFObjGen first_page_og(pages.at(0).getObjGen()); - if (m->xref_table.type(first_page_og) == 0) { + if (m->objects.xref_table().type(first_page_og) == 0) { stopOnError("supposed first page object is not known"); } qpdf_offset_t offset = getLinearizationOffset(first_page_og); @@ -638,7 +639,7 @@ QPDF::checkHPageOffset( for (int pageno = 0; pageno < npages; ++pageno) { QPDFObjGen page_og(pages.at(toS(pageno)).getObjGen()); int first_object = page_og.getObj(); - if (m->xref_table.type(page_og) == 0) { + if (m->objects.xref_table().type(page_og) == 0) { stopOnError("unknown object in page offset hint table"); } offset = getLinearizationOffset(page_og); @@ -760,7 +761,7 @@ QPDF::checkHSharedObject(std::vector const& pages, std::mapxref_table.type(og) == 0) { + if (m->objects.xref_table().type(og) == 0) { stopOnError("unknown object in shared object hint table"); } qpdf_offset_t offset = getLinearizationOffset(og); @@ -811,7 +812,7 @@ QPDF::checkHOutlines() return; } QPDFObjGen og(outlines.getObjGen()); - if (m->xref_table.type(og) == 0) { + if (m->objects.xref_table().type(og) == 0) { stopOnError("unknown object in outlines hint table"); } qpdf_offset_t offset = getLinearizationOffset(og); @@ -1158,7 +1159,7 @@ QPDF::calculateLinearizationData(T const& object_stream_data) // Map all page objects to the containing object stream. This should be a no-op in a // properly linearized file. for (auto oh: getAllPages()) { - pages.push_back(getUncompressedObject(oh, object_stream_data)); + pages.emplace_back(getUncompressedObject(oh, object_stream_data)); } } int npages = toI(pages.size()); diff --git a/libqpdf/QPDF_objects.cc b/libqpdf/QPDF_objects.cc index e28d936..b7d0aaf 100644 --- a/libqpdf/QPDF_objects.cc +++ b/libqpdf/QPDF_objects.cc @@ -24,6 +24,9 @@ #include #include +using Objects = QPDF::Objects; +using Xref_table = Objects::Xref_table; + namespace { class InvalidInputSource final: public InputSource @@ -98,7 +101,7 @@ QPDF::findStartxref() } void -QPDF::Xref_table::initialize_empty() +Xref_table::initialize_empty() { initialized_ = true; trailer_ = QPDFObjectHandle::newDictionary(); @@ -114,7 +117,7 @@ QPDF::Xref_table::initialize_empty() } void -QPDF::Xref_table::initialize_json() +Xref_table::initialize_json() { initialized_ = true; table.resize(1); @@ -123,7 +126,7 @@ QPDF::Xref_table::initialize_json() } void -QPDF::Xref_table::initialize() +Xref_table::initialize() { // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra // 30 characters to leave room for the startxref stuff. @@ -166,7 +169,7 @@ QPDF::Xref_table::initialize() } void -QPDF::Xref_table::reconstruct(QPDFExc& e) +Xref_table::reconstruct(QPDFExc& e) { if (reconstructed_) { // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because @@ -318,7 +321,7 @@ QPDF::Xref_table::reconstruct(QPDFExc& e) } void -QPDF::Xref_table::read(qpdf_offset_t xref_offset) +Xref_table::read(qpdf_offset_t xref_offset) { std::map free_table; std::set visited; @@ -392,8 +395,8 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset) // entries, including missing entries before the last actual entry. } -QPDF::Xref_table::Subsection -QPDF::Xref_table::subsection(std::string const& line) +Xref_table::Subsection +Xref_table::subsection(std::string const& line) { auto terminate = [this]() -> void { QTC::TC("qpdf", "QPDF invalid xref"); @@ -447,10 +450,10 @@ QPDF::Xref_table::subsection(std::string const& line) return {obj, count, file->getLastOffset() + toI(p - start)}; } -std::vector -QPDF::Xref_table::bad_subsections(std::string& line, qpdf_offset_t start) +std::vector +Xref_table::bad_subsections(std::string& line, qpdf_offset_t start) { - std::vector result; + std::vector result; file->seek(start, SEEK_SET); while (true) { @@ -475,12 +478,12 @@ QPDF::Xref_table::bad_subsections(std::string& line, qpdf_offset_t start) // Optimistically read and parse all subsection headers. If an error is encountered return the // result of bad_subsections. -std::vector -QPDF::Xref_table::subsections(std::string& line) +std::vector +Xref_table::subsections(std::string& line) { auto recovery_offset = file->tell(); try { - std::vector result; + std::vector result; while (true) { line.assign(50, '\0'); @@ -507,7 +510,7 @@ QPDF::Xref_table::subsections(std::string& line) // Returns (success, f1, f2, type). std::tuple -QPDF::Xref_table::read_bad_entry() +Xref_table::read_bad_entry() { qpdf_offset_t f1{0}; int f2{0}; @@ -592,7 +595,7 @@ QPDF::Xref_table::read_bad_entry() // Optimistically read and parse xref entry. If entry is bad, call read_bad_xrefEntry and return // result. Returns (success, f1, f2, type). std::tuple -QPDF::Xref_table::read_entry() +Xref_table::read_entry() { qpdf_offset_t f1{0}; int f2{0}; @@ -651,7 +654,7 @@ QPDF::Xref_table::read_entry() // Read a single cross-reference table section and associated trailer. qpdf_offset_t -QPDF::Xref_table::process_section(qpdf_offset_t xref_offset) +Xref_table::process_section(qpdf_offset_t xref_offset) { file->seek(xref_offset, SEEK_SET); std::string line; @@ -738,7 +741,7 @@ QPDF::Xref_table::process_section(qpdf_offset_t xref_offset) // Read a single cross-reference stream. qpdf_offset_t -QPDF::Xref_table::read_stream(qpdf_offset_t xref_offset) +Xref_table::read_stream(qpdf_offset_t xref_offset) { if (!ignore_streams_) { QPDFObjGen x_og; @@ -762,8 +765,7 @@ QPDF::Xref_table::read_stream(qpdf_offset_t xref_offset) // Return the entry size of the xref stream and the processed W array. std::pair> -QPDF::Xref_table::process_W( - QPDFObjectHandle& dict, std::function damaged) +Xref_table::process_W(QPDFObjectHandle& dict, std::function damaged) { auto W_obj = dict.getKey("/W"); if (!(W_obj.isArray() && W_obj.getArrayNItems() >= 3 && W_obj.getArrayItem(0).isInteger() && @@ -794,7 +796,7 @@ QPDF::Xref_table::process_W( // Validate Size entry and return the maximum number of entries that the xref stream can contain and // the value of the Size entry. std::pair -QPDF::Xref_table::process_Size( +Xref_table::process_Size( QPDFObjectHandle& dict, int entry_size, std::function damaged) { // Number of entries is limited by the highest possible object id and stream size. @@ -818,7 +820,7 @@ QPDF::Xref_table::process_Size( // Return the number of entries of the xref stream and the processed Index array. std::pair>> -QPDF::Xref_table::process_Index( +Xref_table::process_Index( QPDFObjectHandle& dict, int max_num_entries, std::function damaged) { auto size = dict.getKey("/Size").getIntValueAsInt(); @@ -885,7 +887,7 @@ QPDF::Xref_table::process_Index( } qpdf_offset_t -QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) +Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) { auto damaged = [this, xref_offset](std::string_view msg) -> QPDFExc { return qpdf.damagedPDF("xref stream", xref_offset, msg.data()); @@ -978,7 +980,7 @@ QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle& xr } void -QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2) +Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2) { // Populate the xref table in such a way that the first reference to an object that we see, // which is the one in the latest xref table in which it appears, is the one that gets stored. @@ -1040,7 +1042,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2) } void -QPDF::Xref_table::insert_free(QPDFObjGen og) +Xref_table::insert_free(QPDFObjGen og) { // At the moment we are processing the updates last to first and therefore the gen doesn't // matter as long as it > 0 to distinguish it from an uninitialized entry. This will need to be @@ -1055,7 +1057,7 @@ QPDF::Xref_table::insert_free(QPDFObjGen og) } QPDFObjGen -QPDF::Xref_table::at_offset(qpdf_offset_t offset) const noexcept +Xref_table::at_offset(qpdf_offset_t offset) const noexcept { int id = 0; int gen = 0; @@ -1075,7 +1077,7 @@ QPDF::Xref_table::at_offset(qpdf_offset_t offset) const noexcept } std::map -QPDF::Xref_table::as_map() const +Xref_table::as_map() const { std::map result; int i{0}; @@ -1099,7 +1101,7 @@ QPDF::Xref_table::as_map() const } void -QPDF::Xref_table::show() +Xref_table::show() { auto& cout = *qpdf.m->log->getInfo(); int i = -1; @@ -1128,7 +1130,7 @@ QPDF::Xref_table::show() // Resolve all objects in the xref table. If this triggers a xref table reconstruction abort and // return false. Otherwise return true. bool -QPDF::Xref_table::resolve() +Xref_table::resolve() { bool may_change = !reconstructed_; int i = -1; @@ -1159,7 +1161,7 @@ QPDF::getAllObjects() } QPDFObjectHandle -QPDF::Xref_table::read_trailer() +Xref_table::read_trailer() { qpdf_offset_t offset = file->tell(); bool empty = false; @@ -1177,7 +1179,7 @@ QPDF::Xref_table::read_trailer() } QPDFObjectHandle -QPDF::Objects::read_object(std::string const& description, QPDFObjGen og) +Objects::read_object(std::string const& description, QPDFObjGen og) { qpdf.setLastObjectDescription(description, og); qpdf_offset_t offset = m->file->tell(); @@ -1209,7 +1211,7 @@ QPDF::Objects::read_object(std::string const& description, QPDFObjGen og) // After reading stream dictionary and stream keyword, read rest of stream. void -QPDF::Objects::read_stream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset) +Objects::read_stream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset) { validate_stream_line_end(object, og, offset); @@ -1250,8 +1252,7 @@ QPDF::Objects::read_stream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_ } void -QPDF::Objects::validate_stream_line_end( - QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset) +Objects::validate_stream_line_end(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset) { // The PDF specification states that the word "stream" should be followed by either a carriage // return and a newline or by a newline alone. It specifically disallowed following it by a @@ -1302,7 +1303,7 @@ QPDF::Objects::validate_stream_line_end( } QPDFObjectHandle -QPDF::Objects::readObjectInStream(std::shared_ptr& input, int obj) +Objects::readObjectInStream(std::shared_ptr& input, int obj) { m->last_object_description.erase(7); // last_object_description starts with "object " m->last_object_description += std::to_string(obj); @@ -1332,7 +1333,7 @@ QPDF::findEndstream() } size_t -QPDF::Objects::recover_stream_length( +Objects::recover_stream_length( std::shared_ptr input, QPDFObjGen og, qpdf_offset_t stream_offset) { // Try to reconstruct stream length by looking for endstream or endobj @@ -1351,7 +1352,7 @@ QPDF::Objects::recover_stream_length( if (length) { // Make sure this is inside this object - auto found = m->xref_table.at_offset(stream_offset + toO(length)); + auto found = xref.at_offset(stream_offset + toO(length)); if (found == QPDFObjGen() || found == og) { // If we are trying to recover an XRef stream the xref table will not contain and // won't contain any entries, therefore we cannot check the found length. Otherwise we @@ -1376,7 +1377,7 @@ QPDF::Objects::recover_stream_length( } QPDFObjectHandle -QPDF::Objects::read( +Objects::read( bool try_recovery, qpdf_offset_t offset, std::string const& description, @@ -1455,10 +1456,10 @@ QPDF::Objects::read( } catch (QPDFExc& e) { if (try_recovery) { // Try again after reconstructing xref table - m->xref_table.reconstruct(e); - if (m->xref_table.type(exp_og) == 1) { + xref.reconstruct(e); + if (xref.type(exp_og) == 1) { QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset"); - return read(false, m->xref_table.offset(exp_og), description, exp_og, og, false); + return read(false, xref.offset(exp_og), description, exp_og, og, false); } else { QTC::TC("qpdf", "QPDF object gone after xref reconstruction"); qpdf.warn(qpdf.damagedPDF( @@ -1498,7 +1499,7 @@ QPDF::Objects::read( } } qpdf_offset_t end_after_space = m->file->tell(); - if (skip_cache_if_in_xref && m->xref_table.type(og)) { + if (skip_cache_if_in_xref && xref.type(og)) { // Ordinarily, an object gets read here when resolved through xref table or stream. In // the special case of the xref stream and linearization hint tables, the offset comes // from another source. For the specific case of xref streams, the xref stream is read @@ -1526,8 +1527,7 @@ QPDF::Objects::read( // could use !check_og in place of skip_cache_if_in_xref. QTC::TC("qpdf", "QPDF skipping cache for known unchecked object"); } else { - m->xref_table.linearization_offsets( - toS(og.getObj()), end_before_space, end_after_space); + xref.linearization_offsets(toS(og.getObj()), end_before_space, end_after_space); update_table(og, oh.getObj()); } } @@ -1536,7 +1536,7 @@ QPDF::Objects::read( } QPDFObject* -QPDF::Objects::resolve(QPDFObjGen og) +Objects::resolve(QPDFObjGen og) { if (!unresolved(og)) { return obj_cache[og].object.get(); @@ -1553,19 +1553,19 @@ QPDF::Objects::resolve(QPDFObjGen og) ResolveRecorder rr(&qpdf, og); try { - switch (m->xref_table.type(og)) { + switch (xref.type(og)) { case 0: break; case 1: { // Object stored in cache by readObjectAtOffset QPDFObjGen a_og; - QPDFObjectHandle oh = read(true, m->xref_table.offset(og), "", og, a_og, false); + QPDFObjectHandle oh = read(true, xref.offset(og), "", og, a_og, false); } break; case 2: - resolveObjectsInStream(m->xref_table.stream_number(og.getObj())); + resolveObjectsInStream(xref.stream_number(og.getObj())); break; default: @@ -1591,7 +1591,7 @@ QPDF::Objects::resolve(QPDFObjGen og) } void -QPDF::Objects::resolveObjectsInStream(int obj_stream_number) +Objects::resolveObjectsInStream(int obj_stream_number) { if (m->resolved_object_streams.count(obj_stream_number)) { return; @@ -1642,7 +1642,7 @@ QPDF::Objects::resolveObjectsInStream(int obj_stream_number) int num = QUtil::string_to_int(tnum.getValue().c_str()); long long offset = QUtil::string_to_int(toffset.getValue().c_str()); - if (num > m->xref_table.max_id()) { + if (num > xref.max_id()) { continue; } if (num == obj_stream_number) { @@ -1674,8 +1674,7 @@ QPDF::Objects::resolveObjectsInStream(int obj_stream_number) m->last_object_description += "object "; for (auto const& iter: offsets) { QPDFObjGen og(iter.first, 0); - if (m->xref_table.type(og) == 2 && - m->xref_table.stream_number(og.getObj()) == obj_stream_number) { + if (xref.type(og) == 2 && xref.stream_number(og.getObj()) == obj_stream_number) { int offset = iter.second; input->seek(offset, SEEK_SET); QPDFObjectHandle oh = readObjectInStream(input, iter.first); @@ -1687,7 +1686,7 @@ QPDF::Objects::resolveObjectsInStream(int obj_stream_number) } void -QPDF::Objects::update_table(QPDFObjGen og, const std::shared_ptr& object) +Objects::update_table(QPDFObjGen og, const std::shared_ptr& object) { object->setObjGen(&qpdf, og); if (cached(og)) { @@ -1699,19 +1698,19 @@ QPDF::Objects::update_table(QPDFObjGen og, const std::shared_ptr& ob } bool -QPDF::Objects::cached(QPDFObjGen og) +Objects::cached(QPDFObjGen og) { return obj_cache.count(og) != 0; } bool -QPDF::Objects::unresolved(QPDFObjGen og) +Objects::unresolved(QPDFObjGen og) { return !cached(og) || obj_cache[og].object->isUnresolved(); } QPDFObjGen -QPDF::Objects::next_id() +Objects::next_id() { int max_objid = toI(qpdf.getObjectCount()); if (max_objid == std::numeric_limits::max()) { @@ -1721,7 +1720,7 @@ QPDF::Objects::next_id() } QPDFObjectHandle -QPDF::Objects::make_indirect(std::shared_ptr const& obj) +Objects::make_indirect(std::shared_ptr const& obj) { QPDFObjGen next{next_id()}; obj_cache[next] = ObjCache(obj); @@ -1729,14 +1728,14 @@ QPDF::Objects::make_indirect(std::shared_ptr const& obj) } std::shared_ptr -QPDF::Objects::get_for_parser(int id, int gen, bool parse_pdf) +Objects::get_for_parser(int id, int gen, bool parse_pdf) { // This method is called by the parser and therefore must not resolve any objects. auto og = QPDFObjGen(id, gen); if (auto iter = obj_cache.find(og); iter != obj_cache.end()) { return iter->second.object; } - if (m->xref_table.type(og) || !m->xref_table.initialized()) { + if (xref.type(og) || !xref.initialized()) { return obj_cache.insert({og, QPDF_Unresolved::create(&qpdf, og)}).first->second.object; } if (parse_pdf) { @@ -1746,15 +1745,14 @@ QPDF::Objects::get_for_parser(int id, int gen, bool parse_pdf) } std::shared_ptr -QPDF::Objects::get_for_json(int id, int gen) +Objects::get_for_json(int id, int gen) { auto og = QPDFObjGen(id, gen); auto [it, inserted] = obj_cache.try_emplace(og); auto& obj = it->second.object; if (inserted) { - obj = (m->xref_table.initialized() && !m->xref_table.type(og)) - ? QPDF_Null::create(&qpdf, og) - : QPDF_Unresolved::create(&qpdf, og); + obj = (xref.initialized() && !xref.type(og)) ? QPDF_Null::create(&qpdf, og) + : QPDF_Unresolved::create(&qpdf, og); } return obj; } @@ -1770,7 +1768,7 @@ QPDF::replaceObject(QPDFObjGen const& og, QPDFObjectHandle oh) } void -QPDF::Objects::erase(QPDFObjGen og) +Objects::erase(QPDFObjGen og) { if (auto cached = obj_cache.find(og); cached != obj_cache.end()) { // Take care of any object handles that may be floating around. @@ -1790,11 +1788,11 @@ QPDF::swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2) } size_t -QPDF::Objects::table_size() +Objects::table_size() { // If obj_cache is dense, accommodate all object in tables,else accommodate only original // objects. - auto max_xref = toI(m->xref_table.size()); + auto max_xref = toI(xref.size()); if (max_xref > 0) { --max_xref; } @@ -1813,20 +1811,20 @@ QPDF::Objects::table_size() } std::vector -QPDF::Objects::compressible_vector() +Objects::compressible_vector() { return compressible(); } std::vector -QPDF::Objects::compressible_set() +Objects::compressible_set() { return compressible(); } template std::vector -QPDF::Objects::compressible() +Objects::compressible() { // Return a list of objects that are allowed to be in object streams. Walk through the objects // by traversing the document from the root, including a traversal of the pages tree. This @@ -1835,14 +1833,14 @@ QPDF::Objects::compressible() // iterating through the xref table since it avoids preserving orphaned items. // Exclude encryption dictionary, if any - QPDFObjectHandle encryption_dict = m->xref_table.trailer().getKey("/Encrypt"); + QPDFObjectHandle encryption_dict = trailer().getKey("/Encrypt"); QPDFObjGen encryption_dict_og = encryption_dict.getObjGen(); const size_t max_obj = qpdf.getObjectCount(); std::vector visited(max_obj, false); std::vector queue; queue.reserve(512); - queue.push_back(m->xref_table.trailer()); + queue.emplace_back(trailer()); std::vector result; if constexpr (std::is_same_v) { result.reserve(obj_cache.size()); diff --git a/libqpdf/QPDF_optimization.cc b/libqpdf/QPDF_optimization.cc index 3dcaa36..585b707 100644 --- a/libqpdf/QPDF_optimization.cc +++ b/libqpdf/QPDF_optimization.cc @@ -79,9 +79,9 @@ QPDF::optimize( } void -QPDF::optimize(QPDF::Xref_table const& xref) +QPDF::optimize(QPDF::Objects const& objects) { - optimize_internal(xref, false, nullptr); + optimize_internal(objects, false, nullptr); } template @@ -121,13 +121,13 @@ QPDF::optimize_internal( } // Traverse document-level items - for (auto const& key: m->xref_table.trailer().getKeys()) { + for (auto const& key: m->objects.trailer().getKeys()) { if (key == "/Root") { // handled separately } else { updateObjectMaps( ObjUser(ObjUser::ou_trailer_key, key), - m->xref_table.trailer().getKey(key), + m->objects.trailer().getKey(key), skip_stream_parameters); } } @@ -175,7 +175,7 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) // values for them. std::map> key_ancestors; pushInheritedAttributesToPageInternal( - m->xref_table.trailer().getKey("/Root").getKey("/Pages"), + m->objects.trailer().getKey("/Root").getKey("/Pages"), key_ancestors, allow_changes, warn_skipped_keys); @@ -450,8 +450,9 @@ QPDF::filterCompressedObjects(QPDFWriter::ObjTable const& obj) } void -QPDF::filterCompressedObjects(QPDF::Xref_table const& xref) +QPDF::filterCompressedObjects(QPDF::Objects const& objects) { + auto const& xref = objects.xref_table(); if (!xref.object_streams()) { return; } diff --git a/libqpdf/qpdf/QPDF_objects.hh b/libqpdf/qpdf/QPDF_objects.hh index 2d2cadb..9cf74fc 100644 --- a/libqpdf/qpdf/QPDF_objects.hh +++ b/libqpdf/qpdf/QPDF_objects.hh @@ -3,17 +3,410 @@ #include +#include + // The Objects class is responsible for keeping track of all objects belonging to a QPDF instance, // including loading it from an input source when required. class QPDF::Objects { public: - Objects(QPDF& qpdf, QPDF::Members* m) : + // Xref_table encapsulates the pdf's xref table and trailer. + class Xref_table + { + public: + Xref_table(Objects& objects) : + qpdf(objects.qpdf), + objects(objects), + file(objects.file) + { + tokenizer.allowEOF(); + } + + void initialize(); + void initialize_empty(); + void initialize_json(); + void reconstruct(QPDFExc& e); + void show(); + bool resolve(); + + QPDFObjectHandle + trailer() noexcept + { + return trailer_; + } + + QPDFObjectHandle const& + trailer() const noexcept + { + return trailer_; + } + + void + trailer(QPDFObjectHandle&& oh) + { + trailer_ = std::move(oh); + } + + // Returns 0 if og is not in table. + size_t + type(QPDFObjGen og) const + { + int id = og.getObj(); + if (id < 1 || static_cast(id) >= table.size()) { + return 0; + } + auto& e = table[static_cast(id)]; + return e.gen() == og.getGen() ? e.type() : 0; + } + + // Returns 0 if og is not in table. + size_t + type(size_t id) const noexcept + { + if (id >= table.size()) { + return 0; + } + return table[id].type(); + } + + // Returns 0 if og is not in table. + qpdf_offset_t + offset(QPDFObjGen og) const noexcept + { + int id = og.getObj(); + if (id < 1 || static_cast(id) >= table.size()) { + return 0; + } + return table[static_cast(id)].offset(); + } + + // Returns 0 if id is not in table. + int + stream_number(int id) const noexcept + { + if (id < 1 || static_cast(id) >= table.size()) { + return 0; + } + return table[static_cast(id)].stream_number(); + } + + int + stream_index(int id) const noexcept + { + if (id < 1 || static_cast(id) >= table.size()) { + return 0; + } + return table[static_cast(id)].stream_index(); + } + + QPDFObjGen at_offset(qpdf_offset_t offset) const noexcept; + + std::map as_map() const; + + bool + object_streams() const noexcept + { + return object_streams_; + } + + // Return a vector of object id and stream number for each compressed object. + std::vector> + compressed_objects() const + { + if (!initialized()) { + throw std::logic_error("Xref_table::compressed_objects called before parsing."); + } + + std::vector> result; + result.reserve(table.size()); + + unsigned int i{0}; + for (auto const& item: table) { + if (item.type() == 2) { + result.emplace_back(i, item.stream_number()); + } + ++i; + } + return result; + } + + // Temporary access to underlying table size + size_t + size() const noexcept + { + return table.size(); + } + + void + ignore_streams(bool val) noexcept + { + ignore_streams_ = val; + } + + bool + initialized() const noexcept + { + return initialized_; + } + + void + attempt_recovery(bool val) noexcept + { + attempt_recovery_ = val; + } + + int + max_id() const noexcept + { + return max_id_; + } + + // For Linearization + + qpdf_offset_t + end_after_space(QPDFObjGen og) + { + auto& e = entry(toS(og.getObj())); + switch (e.type()) { + case 1: + return e.end_after_space_; + case 2: + { + auto es = entry(toS(e.stream_number())); + return es.type() == 1 ? es.end_after_space_ : 0; + } + default: + return 0; + } + } + + qpdf_offset_t + end_before_space(QPDFObjGen og) + { + auto& e = entry(toS(og.getObj())); + switch (e.type()) { + case 1: + return e.end_before_space_; + case 2: + { + auto es = entry(toS(e.stream_number())); + return es.type() == 1 ? es.end_before_space_ : 0; + } + default: + return 0; + } + } + + void + linearization_offsets(size_t id, qpdf_offset_t before, qpdf_offset_t after) + { + if (type(id)) { + table[id].end_before_space_ = before; + table[id].end_after_space_ = after; + } + } + + bool + uncompressed_after_compressed() const noexcept + { + return uncompressed_after_compressed_; + } + + // Actual value from file + qpdf_offset_t + first_item_offset() const noexcept + { + return first_item_offset_; + } + + private: + // Object, count, offset of first entry + typedef std::tuple Subsection; + + struct Uncompressed + { + Uncompressed(qpdf_offset_t offset) : + offset(offset) + { + } + qpdf_offset_t offset; + }; + + struct Compressed + { + Compressed(int stream_number, int stream_index) : + stream_number(stream_number), + stream_index(stream_index) + { + } + int stream_number{0}; + int stream_index{0}; + }; + + typedef std::variant Xref; + + struct Entry + { + Entry() = default; + + Entry(int gen, Xref entry) : + gen_(gen), + entry(entry) + { + } + + int + gen() const noexcept + { + return gen_; + } + + size_t + type() const noexcept + { + return entry.index(); + } + + qpdf_offset_t + offset() const noexcept + { + return type() == 1 ? std::get<1>(entry).offset : 0; + } + + int + stream_number() const noexcept + { + return type() == 2 ? std::get<2>(entry).stream_number : 0; + } + + int + stream_index() const noexcept + { + return type() == 2 ? std::get<2>(entry).stream_index : 0; + } + + int gen_{0}; + Xref entry; + qpdf_offset_t end_before_space_{0}; + qpdf_offset_t end_after_space_{0}; + }; + + Entry& + entry(size_t id) + { + return id < table.size() ? table[id] : table[0]; + } + + void read(qpdf_offset_t offset); + + // Methods to parse tables + qpdf_offset_t process_section(qpdf_offset_t offset); + std::vector subsections(std::string& line); + std::vector bad_subsections(std::string& line, qpdf_offset_t offset); + Subsection subsection(std::string const& line); + std::tuple read_entry(); + std::tuple read_bad_entry(); + + // Methods to parse streams + qpdf_offset_t read_stream(qpdf_offset_t offset); + qpdf_offset_t process_stream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream); + std::pair> + process_W(QPDFObjectHandle& dict, std::function damaged); + std::pair process_Size( + QPDFObjectHandle& dict, + int entry_size, + std::function damaged); + std::pair>> process_Index( + QPDFObjectHandle& dict, + int max_num_entries, + std::function damaged); + + QPDFObjectHandle read_trailer(); + + QPDFTokenizer::Token + read_token(size_t max_len = 0) + { + return tokenizer.readToken(*file, "", true, max_len); + } + + // Methods to insert table entries + void insert(int obj, int f0, qpdf_offset_t f1, int f2); + void insert_free(QPDFObjGen); + + QPDFExc + damaged_pdf(std::string const& msg) + { + return qpdf.damagedPDF("", 0, msg); + } + + QPDFExc + damaged_table(std::string const& msg) + { + return qpdf.damagedPDF("xref table", msg); + } + + void + warn_damaged(std::string const& msg) + { + qpdf.warn(damaged_pdf(msg)); + } + + QPDF& qpdf; + QPDF::Objects& objects; + InputSource* const& file; + QPDFTokenizer tokenizer; + + std::vector table; + QPDFObjectHandle trailer_; + + bool attempt_recovery_{true}; + bool initialized_{false}; + bool ignore_streams_{false}; + bool reconstructed_{false}; + bool object_streams_{false}; + // Before the xref table is initialized, max_id_ is an upper bound on the possible object + // ids that could be present in the PDF file. Once the trailer has been read, max_id_ is set + // to the value of /Size. If the file is damaged, max_id_ becomes the maximum object id in + // the xref table after reconstruction. + int max_id_{std::numeric_limits::max() - 1}; + + // Linearization data + bool uncompressed_after_compressed_{false}; + qpdf_offset_t first_item_offset_{0}; // actual value from file + }; // Xref_table; + + Objects(QPDF& qpdf, QPDF::Members* m, InputSource* const& file) : qpdf(qpdf), - m(m) + file(file), + m(m), + xref(*this) { } + Xref_table& + xref_table() noexcept + { + return xref; + } + + Xref_table const& + xref_table() const noexcept + { + return xref; + } + + QPDFObjectHandle + trailer() noexcept + { + return xref.trailer(); + } + + QPDFObjectHandle const& + trailer() const noexcept + { + return xref.trailer(); + } + std::map obj_cache; QPDFObjectHandle readObjectInStream(std::shared_ptr& input, int obj); @@ -42,8 +435,6 @@ class QPDF::Objects size_t table_size(); private: - friend class QPDF::Xref_table; - void erase(QPDFObjGen og); bool cached(QPDFObjGen og); bool unresolved(QPDFObjGen og); @@ -55,7 +446,9 @@ class QPDF::Objects std::shared_ptr input, QPDFObjGen og, qpdf_offset_t stream_offset); QPDF& qpdf; + InputSource* const& file; QPDF::Members* m; + Xref_table xref; }; // Objects #endif // QPDF_OBJECTS_HH diff --git a/libqpdf/qpdf/QPDF_private.hh b/libqpdf/qpdf/QPDF_private.hh index 89bf8a5..405d058 100644 --- a/libqpdf/qpdf/QPDF_private.hh +++ b/libqpdf/qpdf/QPDF_private.hh @@ -7,363 +7,6 @@ #include -// Xref_table encapsulates the pdf's xref table and trailer. -class QPDF::Xref_table -{ - public: - Xref_table(QPDF& qpdf, QPDF::Objects& objects, InputSource* const& file) : - qpdf(qpdf), - objects(objects), - file(file) - { - tokenizer.allowEOF(); - } - - void initialize(); - void initialize_empty(); - void initialize_json(); - void reconstruct(QPDFExc& e); - void show(); - bool resolve(); - - QPDFObjectHandle - trailer() const - { - return trailer_; - } - - void - trailer(QPDFObjectHandle&& oh) - { - trailer_ = std::move(oh); - } - - // Returns 0 if og is not in table. - size_t - type(QPDFObjGen og) const - { - int id = og.getObj(); - if (id < 1 || static_cast(id) >= table.size()) { - return 0; - } - auto& e = table[static_cast(id)]; - return e.gen() == og.getGen() ? e.type() : 0; - } - - // Returns 0 if og is not in table. - size_t - type(size_t id) const noexcept - { - if (id >= table.size()) { - return 0; - } - return table[id].type(); - } - - // Returns 0 if og is not in table. - qpdf_offset_t - offset(QPDFObjGen og) const noexcept - { - int id = og.getObj(); - if (id < 1 || static_cast(id) >= table.size()) { - return 0; - } - return table[static_cast(id)].offset(); - } - - // Returns 0 if id is not in table. - int - stream_number(int id) const noexcept - { - if (id < 1 || static_cast(id) >= table.size()) { - return 0; - } - return table[static_cast(id)].stream_number(); - } - - int - stream_index(int id) const noexcept - { - if (id < 1 || static_cast(id) >= table.size()) { - return 0; - } - return table[static_cast(id)].stream_index(); - } - - QPDFObjGen at_offset(qpdf_offset_t offset) const noexcept; - - std::map as_map() const; - - bool - object_streams() const noexcept - { - return object_streams_; - } - - // Return a vector of object id and stream number for each compressed object. - std::vector> - compressed_objects() const - { - if (!initialized()) { - throw std::logic_error("Xref_table::compressed_objects called before parsing."); - } - - std::vector> result; - result.reserve(table.size()); - - unsigned int i{0}; - for (auto const& item: table) { - if (item.type() == 2) { - result.emplace_back(i, item.stream_number()); - } - ++i; - } - return result; - } - - // Temporary access to underlying table size - size_t - size() const noexcept - { - return table.size(); - } - - void - ignore_streams(bool val) noexcept - { - ignore_streams_ = val; - } - - bool - initialized() const noexcept - { - return initialized_; - } - - void - attempt_recovery(bool val) noexcept - { - attempt_recovery_ = val; - } - - int - max_id() const noexcept - { - return max_id_; - } - - // For Linearization - - qpdf_offset_t - end_after_space(QPDFObjGen og) - { - auto& e = entry(toS(og.getObj())); - switch (e.type()) { - case 1: - return e.end_after_space_; - case 2: - { - auto es = entry(toS(e.stream_number())); - return es.type() == 1 ? es.end_after_space_ : 0; - } - default: - return 0; - } - } - - qpdf_offset_t - end_before_space(QPDFObjGen og) - { - auto& e = entry(toS(og.getObj())); - switch (e.type()) { - case 1: - return e.end_before_space_; - case 2: - { - auto es = entry(toS(e.stream_number())); - return es.type() == 1 ? es.end_before_space_ : 0; - } - default: - return 0; - } - } - - void - linearization_offsets(size_t id, qpdf_offset_t before, qpdf_offset_t after) - { - if (type(id)) { - table[id].end_before_space_ = before; - table[id].end_after_space_ = after; - } - } - - bool - uncompressed_after_compressed() const noexcept - { - return uncompressed_after_compressed_; - } - - // Actual value from file - qpdf_offset_t - first_item_offset() const noexcept - { - return first_item_offset_; - } - - private: - // Object, count, offset of first entry - typedef std::tuple Subsection; - - struct Uncompressed - { - Uncompressed(qpdf_offset_t offset) : - offset(offset) - { - } - qpdf_offset_t offset; - }; - - struct Compressed - { - Compressed(int stream_number, int stream_index) : - stream_number(stream_number), - stream_index(stream_index) - { - } - int stream_number{0}; - int stream_index{0}; - }; - - typedef std::variant Xref; - - struct Entry - { - Entry() = default; - - Entry(int gen, Xref entry) : - gen_(gen), - entry(entry) - { - } - - int - gen() const noexcept - { - return gen_; - } - - size_t - type() const noexcept - { - return entry.index(); - } - - qpdf_offset_t - offset() const noexcept - { - return type() == 1 ? std::get<1>(entry).offset : 0; - } - - int - stream_number() const noexcept - { - return type() == 2 ? std::get<2>(entry).stream_number : 0; - } - - int - stream_index() const noexcept - { - return type() == 2 ? std::get<2>(entry).stream_index : 0; - } - - int gen_{0}; - Xref entry; - qpdf_offset_t end_before_space_{0}; - qpdf_offset_t end_after_space_{0}; - }; - - Entry& - entry(size_t id) - { - return id < table.size() ? table[id] : table[0]; - } - - void read(qpdf_offset_t offset); - - // Methods to parse tables - qpdf_offset_t process_section(qpdf_offset_t offset); - std::vector subsections(std::string& line); - std::vector bad_subsections(std::string& line, qpdf_offset_t offset); - Subsection subsection(std::string const& line); - std::tuple read_entry(); - std::tuple read_bad_entry(); - - // Methods to parse streams - qpdf_offset_t read_stream(qpdf_offset_t offset); - qpdf_offset_t process_stream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream); - std::pair> - process_W(QPDFObjectHandle& dict, std::function damaged); - std::pair process_Size( - QPDFObjectHandle& dict, int entry_size, std::function damaged); - std::pair>> process_Index( - QPDFObjectHandle& dict, - int max_num_entries, - std::function damaged); - - QPDFObjectHandle read_trailer(); - - QPDFTokenizer::Token - read_token(size_t max_len = 0) - { - return tokenizer.readToken(*file, "", true, max_len); - } - - // Methods to insert table entries - void insert(int obj, int f0, qpdf_offset_t f1, int f2); - void insert_free(QPDFObjGen); - - QPDFExc - damaged_pdf(std::string const& msg) - { - return qpdf.damagedPDF("", 0, msg); - } - - QPDFExc - damaged_table(std::string const& msg) - { - return qpdf.damagedPDF("xref table", msg); - } - - void - warn_damaged(std::string const& msg) - { - qpdf.warn(damaged_pdf(msg)); - } - - QPDF& qpdf; - QPDF::Objects& objects; - InputSource* const& file; - QPDFTokenizer tokenizer; - - std::vector table; - QPDFObjectHandle trailer_; - - bool attempt_recovery_{true}; - bool initialized_{false}; - bool ignore_streams_{false}; - bool reconstructed_{false}; - bool object_streams_{false}; - // Before the xref table is initialized, max_id_ is an upper bound on the possible object ids - // that could be present in the PDF file. Once the trailer has been read, max_id_ is set to the - // value of /Size. If the file is damaged, max_id_ becomes the maximum object id in the xref - // table after reconstruction. - int max_id_{std::numeric_limits::max() - 1}; - - // Linearization data - bool uncompressed_after_compressed_{false}; - qpdf_offset_t first_item_offset_{0}; // actual value from file -}; - // StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data. class QPDF::StreamCopier { @@ -740,7 +383,6 @@ class QPDF::Members std::shared_ptr encp; std::string pdf_version; Objects objects; - Xref_table xref_table; std::set resolving; std::vector all_pages; bool invalid_page_found{false}; @@ -901,10 +543,10 @@ class QPDF::Writer return qpdf.objects().compressible_set(); } - static Xref_table const& + static Objects::Xref_table const& getXRefTable(QPDF& qpdf) { - return qpdf.m->xref_table; + return qpdf.objects().xref_table(); } static size_t