diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 05c801e..64cce84 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -827,6 +827,12 @@ class QPDF int& O, bool compressed); + // Get a list of objects that would be permitted in an object stream. + template + std::vector getCompressibleObjGens(); + std::vector getCompressibleObjVector(); + std::vector getCompressibleObjSet(); + // methods to support page handling void getAllPagesInternal( @@ -927,12 +933,6 @@ class QPDF // Methods to support optimization - void pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys); - void pushInheritedAttributesToPageInternal( - QPDFObjectHandle, - std::map>&, - bool allow_changes, - bool warn_skipped_keys); void updateObjectMaps( ObjUser const& ou, QPDFObjectHandle oh, diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index eceb702..69c6a69 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -181,6 +181,7 @@ QPDF::QPDFVersion() QPDF::Members::Members(QPDF& qpdf) : doc(qpdf, *this), objects(doc.objects()), + pages(doc.pages()), log(QPDFLogger::defaultLogger()), file(new InvalidInputSource()), encp(new EncryptionParameters) diff --git a/libqpdf/QPDF_optimization.cc b/libqpdf/QPDF_optimization.cc index 6606e81..cd7a2c2 100644 --- a/libqpdf/QPDF_optimization.cc +++ b/libqpdf/QPDF_optimization.cc @@ -7,6 +7,8 @@ #include #include +using Pages = QPDF::Doc::Pages; + QPDF::ObjUser::ObjUser(user_e type) : ou_type(type) { @@ -86,14 +88,13 @@ QPDF::optimize_internal( if (root.getKey("/Outlines").isDictionary()) { QPDFObjectHandle outlines = root.getKey("/Outlines"); if (!outlines.isIndirect()) { - QTC::TC("qpdf", "QPDF_optimization indirect outlines"); root.replaceKey("/Outlines", makeIndirectObject(outlines)); } } // Traverse pages tree pushing all inherited resources down to the page level. This also // initializes m->all_pages. - pushInheritedAttributesToPage(allow_changes, false); + m->pages.pushInheritedAttributesToPage(allow_changes, false); // Traverse pages size_t n = m->all_pages.size(); @@ -136,11 +137,11 @@ void QPDF::pushInheritedAttributesToPage() { // Public API should not have access to allow_changes. - pushInheritedAttributesToPage(true, false); + m->pages.pushInheritedAttributesToPage(true, false); } void -QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) +Pages::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) { // Traverse pages tree pushing all inherited resources down to the page level. @@ -152,7 +153,7 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) // Calling getAllPages() resolves any duplicated page objects, repairs broken nodes, and detects // loops, so we don't have to do those activities here. - getAllPages(); + qpdf.getAllPages(); // key_ancestors is a mapping of page attribute keys to a stack of Pages nodes that contain // values for them. @@ -171,7 +172,7 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) } void -QPDF::pushInheritedAttributesToPageInternal( +Pages ::pushInheritedAttributesToPageInternal( QPDFObjectHandle cur_pages, std::map>& key_ancestors, bool allow_changes, @@ -183,8 +184,7 @@ QPDF::pushInheritedAttributesToPageInternal( std::set inheritable_keys; for (auto const& key: cur_pages.getKeys()) { - if ((key == "/MediaBox") || (key == "/CropBox") || (key == "/Resources") || - (key == "/Rotate")) { + if (key == "/MediaBox" || key == "/CropBox" || key == "/Resources" || key == "/Rotate") { if (!allow_changes) { throw QPDFExc( qpdf_e_internal, @@ -197,21 +197,19 @@ QPDF::pushInheritedAttributesToPageInternal( // This is an inheritable resource inheritable_keys.insert(key); QPDFObjectHandle oh = cur_pages.getKey(key); - QTC::TC("qpdf", "QPDF opt direct pages resource", oh.isIndirect() ? 0 : 1); - if (!oh.isIndirect()) { + QTC::TC("qpdf", "QPDF opt direct pages resource", oh.indirect() ? 0 : 1); + if (!oh.indirect()) { if (!oh.isScalar()) { // Replace shared direct object non-scalar resources with indirect objects to // avoid copying large structures around. - cur_pages.replaceKey(key, makeIndirectObject(oh)); + cur_pages.replaceKey(key, qpdf.makeIndirectObject(oh)); oh = cur_pages.getKey(key); } else { // It's okay to copy scalars. - QTC::TC("qpdf", "QPDF opt inherited scalar"); } } key_ancestors[key].push_back(oh); if (key_ancestors[key].size() > 1) { - QTC::TC("qpdf", "QPDF opt key ancestors depth > 1"); } // Remove this resource from this node. It will be reattached at the page level. cur_pages.removeKey(key); @@ -219,7 +217,7 @@ QPDF::pushInheritedAttributesToPageInternal( // Warn when flattening, but not if the key is at the top level (i.e. "/Parent" not // set), as we don't change these; but flattening removes intermediate /Pages nodes. if (warn_skipped_keys && cur_pages.hasKey("/Parent")) { - warn( + qpdf.warn( qpdf_e_pages, "Pages object: object " + cur_pages.id_gen().unparse(' '), 0, @@ -242,7 +240,6 @@ QPDF::pushInheritedAttributesToPageInternal( for (auto const& iter: key_ancestors) { std::string const& key = iter.first; if (!kid.hasKey(key)) { - QTC::TC("qpdf", "QPDF opt resource inherited"); kid.replaceKey(key, iter.second.back()); } else { QTC::TC("qpdf", "QPDF opt page resource hides ancestor"); @@ -256,11 +253,9 @@ QPDF::pushInheritedAttributesToPageInternal( // which inheritable attributes are available. if (!inheritable_keys.empty()) { - QTC::TC("qpdf", "QPDF opt inheritable keys"); for (auto const& key: inheritable_keys) { key_ancestors[key].pop_back(); if (key_ancestors[key].empty()) { - QTC::TC("qpdf", "QPDF opt erase empty key ancestor"); key_ancestors.erase(key); } } diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index 604387d..0239314 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -37,6 +37,8 @@ // insertPage, and removePage, along with methods they call, are concerned with it. Everything else // goes through one of those methods. +using Pages = QPDF::Doc::Pages; + std::vector const& QPDF::getAllPages() { @@ -75,14 +77,14 @@ QPDF::getAllPages() qpdf_e_pages, m->file->getName(), "", 0, "root of pages tree has no /Kids array"); } try { - getAllPagesInternal(pages, visited, seen, false, false); + m->pages.getAllPagesInternal(pages, visited, seen, false, false); } catch (...) { m->all_pages.clear(); m->invalid_page_found = false; throw; } if (m->invalid_page_found) { - flattenPagesTree(); + m->pages.flattenPagesTree(); m->invalid_page_found = false; } } @@ -90,7 +92,7 @@ QPDF::getAllPages() } void -QPDF::getAllPagesInternal( +Pages::getAllPagesInternal( QPDFObjectHandle cur_node, QPDFObjGen::set& visited, QPDFObjGen::set& seen, @@ -139,17 +141,15 @@ QPDF::getAllPagesInternal( continue; } if (!kid.isIndirect()) { - QTC::TC("qpdf", "QPDF handle direct page object"); cur_node.warn( "kid " + std::to_string(i) + " (from 0) is direct; converting to indirect"); - kid = makeIndirectObject(kid); + kid = qpdf.makeIndirectObject(kid); ++errors; } if (kid.hasKey("/Kids")) { getAllPagesInternal(kid, visited, seen, media_box, resources); } else { if (!media_box && !kid.getKey("/MediaBox").isRectangle()) { - QTC::TC("qpdf", "QPDF missing mediabox"); kid.warn( "kid " + std::to_string(i) + " (from 0) MediaBox is undefined; setting to letter / ANSI A"); @@ -193,7 +193,6 @@ QPDF::getAllPagesInternal( if (!seen.add(kid)) { // Make a copy of the page. This does the same as shallowCopyPage in // QPDFPageObjectHelper. - QTC::TC("qpdf", "QPDF resolve duplicated page object"); if (!m->reconstructed_xref) { cur_node.warn( "kid " + std::to_string(i) + @@ -201,7 +200,7 @@ QPDF::getAllPagesInternal( " creating a new page object as a copy"); // This needs to be fixed. shallowCopy does not necessarily produce a valid // page. - kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy()); + kid = qpdf.makeIndirectObject(QPDFObjectHandle(kid).shallowCopy()); seen.add(kid); } else { cur_node.warn( @@ -239,7 +238,6 @@ QPDF::updateAllPagesCache() // Force regeneration of the pages cache. We force immediate recalculation of all_pages since // users may have references to it that they got from calls to getAllPages(). We can defer // recalculation of pageobj_to_pages_pos until needed. - QTC::TC("qpdf", "QPDF updateAllPagesCache"); m->all_pages.clear(); m->pageobj_to_pages_pos.clear(); m->pushed_inherited_attributes_to_pages = false; @@ -247,7 +245,7 @@ QPDF::updateAllPagesCache() } void -QPDF::flattenPagesTree() +Pages::flattenPagesTree() { // If not already done, flatten the /Pages structure and initialize pageobj_to_pages_pos. @@ -259,7 +257,7 @@ QPDF::flattenPagesTree() // generated. pushInheritedAttributesToPage(true, true); - QPDFObjectHandle pages = getRoot().getKey("/Pages"); + QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages"); size_t const len = m->all_pages.size(); for (size_t pos = 0; pos < len; ++pos) { @@ -282,7 +280,7 @@ QPDF::flattenPagesTree() } void -QPDF::insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate) +Pages::insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate) { QPDFObjGen og(obj.getObjGen()); if (check_duplicate) { @@ -301,24 +299,22 @@ QPDF::insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_dupli } void -QPDF::insertPage(QPDFObjectHandle newpage, int pos) +Pages::insertPage(QPDFObjectHandle newpage, int pos) { // pos is numbered from 0, so pos = 0 inserts at the beginning and pos = npages adds to the end. flattenPagesTree(); if (!newpage.isIndirect()) { - QTC::TC("qpdf", "QPDF insert non-indirect page"); - newpage = makeIndirectObject(newpage); - } else if (newpage.getOwningQPDF() != this) { - QTC::TC("qpdf", "QPDF insert foreign page"); + newpage = qpdf.makeIndirectObject(newpage); + } else if (newpage.getOwningQPDF() != &qpdf) { newpage.getQPDF().pushInheritedAttributesToPage(); - newpage = copyForeignObject(newpage); + newpage = qpdf.copyForeignObject(newpage); } else { QTC::TC("qpdf", "QPDF insert indirect page"); } - if ((pos < 0) || (toS(pos) > m->all_pages.size())) { + if (pos < 0 || toS(pos) > m->all_pages.size()) { throw std::runtime_error("QPDF::insertPage called with pos out of range"); } @@ -331,11 +327,10 @@ QPDF::insertPage(QPDFObjectHandle newpage, int pos) auto og = newpage.getObjGen(); if (m->pageobj_to_pages_pos.contains(og)) { - QTC::TC("qpdf", "QPDF resolve duplicated page in insert"); - newpage = makeIndirectObject(QPDFObjectHandle(newpage).shallowCopy()); + newpage = qpdf.makeIndirectObject(QPDFObjectHandle(newpage).shallowCopy()); } - QPDFObjectHandle pages = getRoot().getKey("/Pages"); + QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages"); QPDFObjectHandle kids = pages.getKey("/Kids"); newpage.replaceKey("/Parent", pages); @@ -369,7 +364,7 @@ QPDF::removePage(QPDFObjectHandle page) m->all_pages.erase(m->all_pages.begin() + pos); m->pageobj_to_pages_pos.erase(page.getObjGen()); for (int i = pos; i < npages; ++i) { - insertPageobjToPage(m->all_pages.at(toS(i)), i, false); + m->pages.insertPageobjToPage(m->all_pages.at(toS(i)), i, false); } } @@ -380,16 +375,17 @@ QPDF::addPageAt(QPDFObjectHandle newpage, bool before, QPDFObjectHandle refpage) if (!before) { ++refpos; } - insertPage(newpage, refpos); + m->pages.insertPage(newpage, refpos); } void QPDF::addPage(QPDFObjectHandle newpage, bool first) { if (first) { - insertPage(newpage, 0); + m->pages.insertPage(newpage, 0); } else { - insertPage(newpage, getRoot().getKey("/Pages").getKey("/Count").getIntValueAsInt()); + m->pages.insertPage( + newpage, getRoot().getKey("/Pages").getKey("/Count").getIntValueAsInt()); } } @@ -402,7 +398,7 @@ QPDF::findPage(QPDFObjectHandle& page) int QPDF::findPage(QPDFObjGen og) { - flattenPagesTree(); + m->pages.flattenPagesTree(); auto it = m->pageobj_to_pages_pos.find(og); if (it == m->pageobj_to_pages_pos.end()) { throw QPDFExc( diff --git a/libqpdf/qpdf/QPDF_private.hh b/libqpdf/qpdf/QPDF_private.hh index ab103bc..97daa71 100644 --- a/libqpdf/qpdf/QPDF_private.hh +++ b/libqpdf/qpdf/QPDF_private.hh @@ -550,7 +550,45 @@ class QPDF::Doc private: QPDF& qpdf; QPDF::Members* m; - }; // class Objects + }; // class QPDF::Doc::Objects + + // This class is used to represent a PDF Pages tree. + class Pages + { + public: + Pages() = delete; + Pages(Pages const&) = delete; + Pages(Pages&&) = delete; + Pages& operator=(Pages const&) = delete; + Pages& operator=(Pages&&) = delete; + ~Pages() = default; + + Pages(QPDF& qpdf, QPDF::Members* m) : + qpdf(qpdf), + m(m) + { + } + + void getAllPagesInternal( + QPDFObjectHandle cur_pages, + QPDFObjGen::set& visited, + QPDFObjGen::set& seen, + bool media_box, + bool resources); + void insertPage(QPDFObjectHandle newpage, int pos); + void flattenPagesTree(); + void insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate); + void pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys); + void pushInheritedAttributesToPageInternal( + QPDFObjectHandle, + std::map>&, + bool allow_changes, + bool warn_skipped_keys); + + private: + QPDF& qpdf; + QPDF::Members* m; + }; // class QPDF::Doc::Pages // StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data. class StreamCopier @@ -575,7 +613,8 @@ class QPDF::Doc Doc(QPDF& qpdf, QPDF::Members& m) : qpdf(qpdf), m(m), - objects_(qpdf, &m) + objects_(qpdf, &m), + pages_(qpdf, &m) { } @@ -585,6 +624,12 @@ class QPDF::Doc return objects_; }; + Pages& + pages() + { + return pages_; + } + bool reconstructed_xref() const; QPDFAcroFormDocumentHelper& @@ -637,6 +682,7 @@ class QPDF::Doc QPDF::Members& m; Objects objects_; + Pages pages_; // Document Helpers; std::unique_ptr acroform_; @@ -659,6 +705,7 @@ class QPDF::Members private: Doc doc; Doc::Objects& objects; + Doc::Pages& pages; std::shared_ptr log; unsigned long long unique_id{0}; qpdf::Tokenizer tokenizer; diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index d8d8329..a03c408 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -2,12 +2,8 @@ ignored-scope: libtests QPDF hint table length direct 0 QPDF P absent in lindict 1 QPDF opt direct pages resource 1 -QPDF opt inheritable keys 0 QPDF opt no inheritable keys 0 -QPDF opt erase empty key ancestor 0 -QPDF opt resource inherited 0 QPDF opt page resource hides ancestor 0 -QPDF opt key ancestors depth > 1 0 QPDF opt loop detected 0 QPDF categorize pagemode present 1 QPDF categorize pagemode outlines 1 @@ -30,7 +26,6 @@ main QTest dictionary indirect 1 main QTest stream 0 QPDF lin write nshared_total > nshared_first_page 1 QPDFWriter encrypted hint stream 0 -QPDF opt inherited scalar 0 QPDF xref gen > 0 1 QPDF startxref more than 1024 before end 0 QPDFParser bad brace 0 @@ -127,8 +122,6 @@ exercise processFile(FILE*) 0 exercise processMemoryFile 0 QPDF remove page 2 QPDF insert page 2 -QPDF updateAllPagesCache 0 -QPDF insert non-indirect page 0 QPDF insert indirect page 0 QPDF_Stream ERR shallow copy stream 0 QPDFObjectHandle newStream with string 0 @@ -142,7 +135,6 @@ QPDF replace array 0 QPDF replace dictionary 0 QPDF replace stream 0 QPDF replace foreign indirect with null 0 -QPDF insert foreign page 0 QPDFWriter copy use_aes 1 QPDFParser indirect without context 0 QPDFObjectHandle trailing data in parse 0 @@ -162,7 +154,6 @@ qpdf-c called qpdf_set_r6_encryption_parameters 0 QPDFObjectHandle EOF in inline image 0 QPDFObjectHandle inline image token 0 QPDF not caching overridden objstm object 0 -QPDF_optimization indirect outlines 0 QPDF xref space 2 QPDFJob pages range omitted in middle 0 QPDFWriter standard deterministic ID 1 @@ -282,9 +273,6 @@ QPDFPageDocumentHelper ignore annotation with no appearance 0 QPDFFormFieldObjectHelper replaced BMC at EOF 0 QPDFFormFieldObjectHelper fallback Tf 0 QPDFPageObjectHelper copy shared attribute 1 -QPDF resolve duplicated page object 0 -QPDF handle direct page object 0 -QPDF missing mediabox 0 QPDF inherit mediabox 1 QPDFTokenizer finder found wrong word 0 QPDFTokenizer found EI by byte count 0 @@ -402,7 +390,6 @@ QPDFAcroFormDocumentHelper /DA parse error 0 QPDFAcroFormDocumentHelper AP parse error 1 QPDFJob copy fields not this file 0 QPDFJob copy fields non-first from orig 0 -QPDF resolve duplicated page in insert 0 QPDFWriter exclude from object stream 0 QPDFJob weak crypto error 0 qpdf-c called qpdf_oh_is_initialized 0