diff --git a/examples/examples.testcov b/examples/examples.testcov index 5a19b7c..e0811a1 100644 --- a/examples/examples.testcov +++ b/examples/examples.testcov @@ -1,12 +1,3 @@ -pdf-bookmarks lines 0 -pdf-bookmarks numbers 0 -pdf-bookmarks none 0 -pdf-bookmarks has count 0 -pdf-bookmarks no count 0 -pdf-bookmarks open 0 -pdf-bookmarks closed 0 -pdf-bookmarks dest 0 -pdf-bookmarks targets 0 pdf-mod-info --dump 0 pdf-mod-info no in file 0 pdf-mod-info in-place 0 diff --git a/examples/pdf-attach-file.cc b/examples/pdf-attach-file.cc index 6f3659b..ded5ce2 100644 --- a/examples/pdf-attach-file.cc +++ b/examples/pdf-attach-file.cc @@ -79,7 +79,7 @@ process( } // Add the embedded file at the document level as an attachment. - auto efdh = QPDFEmbeddedFileDocumentHelper(q); + auto& efdh = QPDFEmbeddedFileDocumentHelper::get(q); efdh.replaceEmbeddedFile(key, fs); // Create a file attachment annotation. diff --git a/examples/pdf-bookmarks.cc b/examples/pdf-bookmarks.cc index 665ac0f..0aaf564 100644 --- a/examples/pdf-bookmarks.cc +++ b/examples/pdf-bookmarks.cc @@ -47,7 +47,7 @@ print_lines(std::vector& numbers) void generate_page_map(QPDF& qpdf) { - QPDFPageDocumentHelper dh(qpdf); + auto& dh = QPDFPageDocumentHelper::get(qpdf); int n = 0; for (auto const& page: dh.getAllPages()) { page_map[page] = ++n; @@ -60,11 +60,9 @@ show_bookmark_details(QPDFOutlineObjectHelper outline, std::vector numbers) // No default so gcc will warn on missing tag switch (style) { case st_none: - QTC::TC("examples", "pdf-bookmarks none"); break; case st_numbers: - QTC::TC("examples", "pdf-bookmarks numbers"); for (auto const& number: numbers) { std::cout << number << "."; } @@ -72,7 +70,6 @@ show_bookmark_details(QPDFOutlineObjectHelper outline, std::vector numbers) break; case st_lines: - QTC::TC("examples", "pdf-bookmarks lines"); print_lines(numbers); std::cout << "|\n"; print_lines(numbers); @@ -83,27 +80,21 @@ show_bookmark_details(QPDFOutlineObjectHelper outline, std::vector numbers) if (show_open) { int count = outline.getCount(); if (count) { - QTC::TC("examples", "pdf-bookmarks has count"); if (count > 0) { // hierarchy is open at this point - QTC::TC("examples", "pdf-bookmarks open"); std::cout << "(v) "; } else { - QTC::TC("examples", "pdf-bookmarks closed"); std::cout << "(>) "; } } else { - QTC::TC("examples", "pdf-bookmarks no count"); std::cout << "( ) "; } } if (show_targets) { - QTC::TC("examples", "pdf-bookmarks targets"); std::string target = "unknown"; QPDFObjectHandle dest_page = outline.getDestPage(); if (!dest_page.isNull()) { - QTC::TC("examples", "pdf-bookmarks dest"); if (page_map.contains(dest_page)) { target = std::to_string(page_map[dest_page]); } @@ -177,7 +168,7 @@ main(int argc, char* argv[]) QPDF qpdf; qpdf.processFile(filename, password); - QPDFOutlineDocumentHelper odh(qpdf); + auto& odh = QPDFOutlineDocumentHelper::get(qpdf); if (odh.hasOutlines()) { std::vector numbers; if (show_targets) { diff --git a/examples/pdf-count-strings.cc b/examples/pdf-count-strings.cc index c262459..1a6ad7e 100644 --- a/examples/pdf-count-strings.cc +++ b/examples/pdf-count-strings.cc @@ -76,7 +76,7 @@ main(int argc, char* argv[]) QPDF pdf; pdf.processFile(infilename); int pageno = 0; - for (auto& page: QPDFPageDocumentHelper(pdf).getAllPages()) { + for (auto& page: QPDFPageDocumentHelper::get(pdf).getAllPages()) { ++pageno; // Pass the contents of a page through our string counter. If it's an even page, capture // the output. This illustrates that you may capture any output generated by the filter, diff --git a/examples/pdf-create.cc b/examples/pdf-create.cc index f09d8c4..070bc5f 100644 --- a/examples/pdf-create.cc +++ b/examples/pdf-create.cc @@ -229,7 +229,7 @@ check( QPDF pdf; pdf.processFile(filename); - auto pages = QPDFPageDocumentHelper(pdf).getAllPages(); + auto pages = QPDFPageDocumentHelper::get(pdf).getAllPages(); if (n_color_spaces * n_filters != pages.size()) { throw std::logic_error("incorrect number of pages"); } diff --git a/examples/pdf-overlay-page.cc b/examples/pdf-overlay-page.cc index d4da647..fc9dd79 100644 --- a/examples/pdf-overlay-page.cc +++ b/examples/pdf-overlay-page.cc @@ -29,14 +29,14 @@ stamp_page(char const* infile, char const* stampfile, char const* outfile) stamppdf.processFile(stampfile); // Get first page from other file - QPDFPageObjectHelper stamp_page_1 = QPDFPageDocumentHelper(stamppdf).getAllPages().at(0); + QPDFPageObjectHelper stamp_page_1 = QPDFPageDocumentHelper::get(stamppdf).getAllPages().at(0); // Convert page to a form XObject QPDFObjectHandle foreign_fo = stamp_page_1.getFormXObjectForPage(); // Copy form XObject to the input file QPDFObjectHandle stamp_fo = inpdf.copyForeignObject(foreign_fo); // For each page... - for (auto& ph: QPDFPageDocumentHelper(inpdf).getAllPages()) { + for (auto& ph: QPDFPageDocumentHelper::get(inpdf).getAllPages()) { // Find a unique resource name for the new form XObject QPDFObjectHandle resources = ph.getAttribute("/Resources", true); int min_suffix = 1; diff --git a/fuzz/qpdf_outlines_fuzzer.cc b/fuzz/qpdf_outlines_fuzzer.cc index 8efca69..82d0d8c 100644 --- a/fuzz/qpdf_outlines_fuzzer.cc +++ b/fuzz/qpdf_outlines_fuzzer.cc @@ -49,7 +49,7 @@ FuzzHelper::testOutlines() { std::shared_ptr q = getQpdf(); std::list> queue; - QPDFOutlineDocumentHelper odh(*q); + auto& odh = QPDFOutlineDocumentHelper::get(*q); queue.push_back(odh.getTopLevelOutlines()); while (!queue.empty()) { for (auto& ol: *(queue.begin())) { diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index fe0ce8f..23f910e 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -63,6 +63,10 @@ class BufferInputSource; class QPDFLogger; class QPDFParser; class QPDFAcroFormDocumentHelper; +class QPDFEmbeddedFileDocumentHelper; +class QPDFOutlineDocumentHelper; +class QPDFPageDocumentHelper; +class QPDFPageLabelDocumentHelper; class QPDF { @@ -799,6 +803,10 @@ class QPDF inline bool reconstructed_xref() const; inline QPDFAcroFormDocumentHelper& acroform(); + inline QPDFEmbeddedFileDocumentHelper& embedded_files(); + inline QPDFOutlineDocumentHelper& outlines(); + inline QPDFPageDocumentHelper& pages(); + inline QPDFPageLabelDocumentHelper& page_labels(); // For testing only -- do not add to DLL static bool test_json_validators(); diff --git a/include/qpdf/QPDFEmbeddedFileDocumentHelper.hh b/include/qpdf/QPDFEmbeddedFileDocumentHelper.hh index 1a0f606..9db3674 100644 --- a/include/qpdf/QPDFEmbeddedFileDocumentHelper.hh +++ b/include/qpdf/QPDFEmbeddedFileDocumentHelper.hh @@ -36,6 +36,21 @@ class QPDFEmbeddedFileDocumentHelper: public QPDFDocumentHelper { public: + // Get a shared document helper for a given QPDF object. + // + // Retrieving a document helper for a QPDF object rather than creating a new one avoids repeated + // validation of the EmbeddedFiles structure, which can be expensive. + QPDF_DLL + static QPDFEmbeddedFileDocumentHelper& get(QPDF& qpdf); + + // Re-validate the EmbeddedFiles structure. This is useful if you have modified the structure of + // the EmbeddedFiles dictionary in a way that would invalidate the cache. + // + // If repair is true, the document will be repaired if possible if the validation encounters + // errors. + QPDF_DLL + void validate(bool repair = true); + QPDF_DLL QPDFEmbeddedFileDocumentHelper(QPDF&); diff --git a/include/qpdf/QPDFOutlineDocumentHelper.hh b/include/qpdf/QPDFOutlineDocumentHelper.hh index 5dc639a..d6f3ecc 100644 --- a/include/qpdf/QPDFOutlineDocumentHelper.hh +++ b/include/qpdf/QPDFOutlineDocumentHelper.hh @@ -38,6 +38,21 @@ class QPDFOutlineDocumentHelper: public QPDFDocumentHelper { public: + // Get a shared document helper for a given QPDF object. + // + // Retrieving a document helper for a QPDF object rather than creating a new one avoids repeated + // validation of the Acroform structure, which can be expensive. + QPDF_DLL + static QPDFOutlineDocumentHelper& get(QPDF& qpdf); + + // Re-validate the Outlines structure. This is useful if you have modified the structure of the + // Outlines dictionary in a way that would invalidate the cache. + // + // If repair is true, the document will be repaired if possible if the validation encounters + // errors. + QPDF_DLL + void validate(bool repair = true); + QPDF_DLL QPDFOutlineDocumentHelper(QPDF&); diff --git a/include/qpdf/QPDFPageDocumentHelper.hh b/include/qpdf/QPDFPageDocumentHelper.hh index bad7e68..3714258 100644 --- a/include/qpdf/QPDFPageDocumentHelper.hh +++ b/include/qpdf/QPDFPageDocumentHelper.hh @@ -35,6 +35,21 @@ class QPDFAcroFormDocumentHelper; class QPDFPageDocumentHelper: public QPDFDocumentHelper { public: + // Get a shared document helper for a given QPDF object. + // + // Retrieving a document helper for a QPDF object rather than creating a new one avoids repeated + // validation of the Acroform structure, which can be expensive. + QPDF_DLL + static QPDFPageDocumentHelper& get(QPDF& qpdf); + + // Re-validate the Pages structure. This is useful if you have modified the Pages structure in + // a way that would invalidate the cache. + // + // If repair is true, the document will be repaired if possible if the validation encounters + // errors. + QPDF_DLL + void validate(bool repair = true); + QPDF_DLL QPDFPageDocumentHelper(QPDF&); @@ -112,17 +127,7 @@ class QPDFPageDocumentHelper: public QPDFDocumentHelper int required_flags, int forbidden_flags); - class Members - { - friend class QPDFPageDocumentHelper; - - public: - ~Members() = default; - - private: - Members() = default; - Members(Members const&) = delete; - }; + class Members; std::shared_ptr m; }; diff --git a/include/qpdf/QPDFPageLabelDocumentHelper.hh b/include/qpdf/QPDFPageLabelDocumentHelper.hh index c5a9c3f..d04c6ce 100644 --- a/include/qpdf/QPDFPageLabelDocumentHelper.hh +++ b/include/qpdf/QPDFPageLabelDocumentHelper.hh @@ -22,11 +22,10 @@ #include +#include #include -#include -#include -#include +#include // Page labels are discussed in the PDF spec (ISO-32000) in section 12.4.2. // @@ -42,6 +41,21 @@ class QPDFPageLabelDocumentHelper: public QPDFDocumentHelper { public: + // Get a shared document helper for a given QPDF object. + // + // Retrieving a document helper for a QPDF object rather than creating a new one avoids repeated + // validation of the PageLabels structure, which can be expensive. + QPDF_DLL + static QPDFPageLabelDocumentHelper& get(QPDF& qpdf); + + // Re-validate the PageLabels structure. This is useful if you have modified the structure of + // the PageLabels dictionary in a way that could have invalidated the structure. + // + // If repair is true, the document will be repaired if possible if the validation encounters + // errors. + QPDF_DLL + void validate(bool repair = true); + QPDF_DLL QPDFPageLabelDocumentHelper(QPDF&); diff --git a/libqpdf/QPDFEmbeddedFileDocumentHelper.cc b/libqpdf/QPDFEmbeddedFileDocumentHelper.cc index 552a389..3b95279 100644 --- a/libqpdf/QPDFEmbeddedFileDocumentHelper.cc +++ b/libqpdf/QPDFEmbeddedFileDocumentHelper.cc @@ -1,5 +1,8 @@ #include +#include +#include + // File attachments are stored in the /EmbeddedFiles (name tree) key of the /Names dictionary from // the document catalog. Each entry points to a /FileSpec, which in turn points to one more Embedded // File Streams. Note that file specs can appear in other places as well, such as file attachment @@ -44,6 +47,19 @@ QPDFEmbeddedFileDocumentHelper::QPDFEmbeddedFileDocumentHelper(QPDF& qpdf) : QPDFDocumentHelper(qpdf), m(std::make_shared()) { + validate(); +} + +QPDFEmbeddedFileDocumentHelper& +QPDFEmbeddedFileDocumentHelper::get(QPDF& qpdf) +{ + return qpdf.embedded_files(); +} + +void +QPDFEmbeddedFileDocumentHelper::validate(bool repair) +{ + m->embedded_files.reset(); auto names = qpdf.getRoot().getKey("/Names"); if (names.isDictionary()) { auto embedded_files = names.getKey("/EmbeddedFiles"); @@ -53,7 +69,7 @@ QPDFEmbeddedFileDocumentHelper::QPDFEmbeddedFileDocumentHelper(QPDF& qpdf) : qpdf, [](QPDFObjectHandle const& o) -> bool { return o.isDictionary(); }, true); - m->embedded_files->validate(); + m->embedded_files->validate(repair); } } } diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index c9e91a1..560bead 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -13,15 +13,10 @@ #include #include #include -#include #include -#include #include #include #include -#include -#include -#include #include #include #include @@ -784,6 +779,14 @@ QPDFJob::doCheck(QPDF& pdf) cout << "File is not linearized\n"; } + // Create all document helper to trigger any validations they carry out. + auto& pages = pdf.pages(); + (void)pdf.acroform(); + (void)pdf.embedded_files(); + (void)pdf.page_labels(); + (void)pdf.outlines().resolveNamedDest(QPDFObjectHandle::newString("dummy")); + (void)pdf.outlines().getOutlinesForPage(pages.getAllPages().at(0)); + // Write the file to nowhere, uncompressing streams. This causes full file traversal and // decoding of all streams we can decode. QPDFWriter w(pdf); @@ -794,7 +797,7 @@ QPDFJob::doCheck(QPDF& pdf) // Parse all content streams int pageno = 0; - for (auto& page: QPDFPageDocumentHelper(pdf).getAllPages()) { + for (auto& page: pages.getAllPages()) { ++pageno; try { page.parseContents(nullptr); @@ -862,7 +865,7 @@ QPDFJob::doShowPages(QPDF& pdf) { int pageno = 0; auto& cout = *m->log->getInfo(); - for (auto& ph: QPDFPageDocumentHelper(pdf).getAllPages()) { + for (auto& ph: pdf.pages().getAllPages()) { QPDFObjectHandle page = ph.getObjectHandle(); ++pageno; @@ -894,7 +897,7 @@ QPDFJob::doShowPages(QPDF& pdf) void QPDFJob::doListAttachments(QPDF& pdf) { - QPDFEmbeddedFileDocumentHelper efdh(pdf); + auto& efdh = pdf.embedded_files(); if (efdh.hasEmbeddedFiles()) { for (auto const& i: efdh.getEmbeddedFiles()) { std::string const& key = i.first; @@ -934,7 +937,7 @@ QPDFJob::doListAttachments(QPDF& pdf) void QPDFJob::doShowAttachment(QPDF& pdf) { - QPDFEmbeddedFileDocumentHelper efdh(pdf); + auto& efdh = pdf.embedded_files(); auto fs = efdh.getEmbeddedFile(m->attachment_to_show); if (!fs) { throw std::runtime_error("attachment " + m->attachment_to_show + " not found"); @@ -1053,10 +1056,10 @@ QPDFJob::doJSONPages(Pipeline* p, bool& first, QPDF& pdf) JSON::writeDictionaryKey(p, first, "pages", 1); bool first_page = true; JSON::writeArrayOpen(p, first_page, 2); - QPDFPageLabelDocumentHelper pldh(pdf); - QPDFOutlineDocumentHelper odh(pdf); + auto& pldh = pdf.page_labels(); + auto& odh = pdf.outlines(); int pageno = -1; - for (auto& ph: QPDFPageDocumentHelper(pdf).getAllPages()) { + for (auto& ph: pdf.pages().getAllPages()) { ++pageno; JSON j_page = JSON::makeDictionary(); QPDFObjectHandle page = ph.getObjectHandle(); @@ -1116,8 +1119,8 @@ void QPDFJob::doJSONPageLabels(Pipeline* p, bool& first, QPDF& pdf) { JSON j_labels = JSON::makeArray(); - QPDFPageLabelDocumentHelper pldh(pdf); - long long npages = QIntC::to_longlong(QPDFPageDocumentHelper(pdf).getAllPages().size()); + auto& pldh = pdf.page_labels(); + long long npages = QIntC::to_longlong(pdf.pages().getAllPages().size()); if (pldh.hasPageLabels()) { std::vector labels; pldh.getLabelsForPageRange(0, npages - 1, 0, labels); @@ -1165,14 +1168,13 @@ QPDFJob::doJSONOutlines(Pipeline* p, bool& first, QPDF& pdf) { std::map page_numbers; int n = 0; - for (auto const& ph: QPDFPageDocumentHelper(pdf).getAllPages()) { + for (auto const& ph: pdf.pages().getAllPages()) { QPDFObjectHandle oh = ph.getObjectHandle(); page_numbers[oh.getObjGen()] = ++n; } JSON j_outlines = JSON::makeArray(); - QPDFOutlineDocumentHelper odh(pdf); - addOutlinesToJson(odh.getTopLevelOutlines(), j_outlines, page_numbers); + addOutlinesToJson(pdf.outlines().getTopLevelOutlines(), j_outlines, page_numbers); JSON::writeDictionaryItem(p, first, "outlines", j_outlines, 1); } @@ -1185,7 +1187,7 @@ QPDFJob::doJSONAcroform(Pipeline* p, bool& first, QPDF& pdf) j_acroform.addDictionaryMember("needappearances", JSON::makeBool(afdh.getNeedAppearances())); JSON j_fields = j_acroform.addDictionaryMember("fields", JSON::makeArray()); int pagepos1 = 0; - for (auto const& page: QPDFPageDocumentHelper(pdf).getAllPages()) { + for (auto const& page: pdf.pages().getAllPages()) { ++pagepos1; for (auto& aoh: afdh.getWidgetAnnotationsForPage(page)) { QPDFFormFieldObjectHelper ffh = afdh.getFieldForAnnotation(aoh); @@ -1321,7 +1323,7 @@ QPDFJob::doJSONAttachments(Pipeline* p, bool& first, QPDF& pdf) }; JSON j_attachments = JSON::makeDictionary(); - QPDFEmbeddedFileDocumentHelper efdh(pdf); + auto& efdh = pdf.embedded_files(); for (auto const& iter: efdh.getEmbeddedFiles()) { std::string const& key = iter.first; auto fsoh = iter.second; @@ -1862,7 +1864,7 @@ QPDFJob::processInputSource( void QPDFJob::validateUnderOverlay(QPDF& pdf, UnderOverlay* uo) { - QPDFPageDocumentHelper main_pdh(pdf); + auto& main_pdh = pdf.pages(); int main_npages = QIntC::to_int(main_pdh.getAllPages().size()); processFile(uo->pdf, uo->filename.data(), uo->password.data(), true, false); QPDFPageDocumentHelper uo_pdh(*(uo->pdf)); @@ -2073,7 +2075,7 @@ void QPDFJob::addAttachments(QPDF& pdf) { maybe_set_pagemode(pdf, "/UseAttachments"); - QPDFEmbeddedFileDocumentHelper efdh(pdf); + auto& efdh = pdf.embedded_files(); std::vector duplicated_keys; for (auto const& to_add: m->attachments_to_add) { if ((!to_add.replace) && efdh.getEmbeddedFile(to_add.key)) { @@ -2117,7 +2119,7 @@ void QPDFJob::copyAttachments(QPDF& pdf) { maybe_set_pagemode(pdf, "/UseAttachments"); - QPDFEmbeddedFileDocumentHelper efdh(pdf); + auto& efdh = pdf.embedded_files(); std::vector duplicates; for (auto const& to_copy: m->attachments_to_copy) { doIfVerbose([&](Pipeline& v, std::string const& prefix) { @@ -2125,7 +2127,7 @@ QPDFJob::copyAttachments(QPDF& pdf) }); std::unique_ptr other; processFile(other, to_copy.path.c_str(), to_copy.password.c_str(), false, false); - QPDFEmbeddedFileDocumentHelper other_efdh(*other); + auto& other_efdh = other->embedded_files(); auto other_attachments = other_efdh.getEmbeddedFiles(); for (auto const& iter: other_attachments) { std::string new_key = to_copy.prefix + iter.first; @@ -2259,7 +2261,7 @@ QPDFJob::handleTransformations(QPDF& pdf) pdf.getRoot().replaceKey("/PageLabels", page_labels); } if (!m->attachments_to_remove.empty()) { - QPDFEmbeddedFileDocumentHelper efdh(pdf); + auto& efdh = pdf.embedded_files(); for (auto const& key: m->attachments_to_remove) { if (efdh.removeEmbeddedFile(key)) { doIfVerbose([&](Pipeline& v, std::string const& prefix) { @@ -2548,7 +2550,7 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector>& page_hea cis = page_spec_cfis[page_data.filename]; cis->stayOpen(true); } - QPDFPageLabelDocumentHelper pldh(*page_data.qpdf); + auto& pldh = page_data.qpdf->page_labels(); auto& other_afdh = page_data.qpdf->acroform(); if (pldh.hasPageLabels()) { any_page_labels = true; @@ -2992,7 +2994,7 @@ QPDFJob::doSplitPages(QPDF& pdf) QPDFPageDocumentHelper dh(pdf); dh.removeUnreferencedResources(); } - QPDFPageLabelDocumentHelper pldh(pdf); + auto& pldh = pdf.page_labels(); auto& afdh = pdf.acroform(); std::vector const& pages = pdf.getAllPages(); size_t pageno_len = std::to_string(pages.size()).length(); diff --git a/libqpdf/QPDFOutlineDocumentHelper.cc b/libqpdf/QPDFOutlineDocumentHelper.cc index bcba309..5403f64 100644 --- a/libqpdf/QPDFOutlineDocumentHelper.cc +++ b/libqpdf/QPDFOutlineDocumentHelper.cc @@ -1,6 +1,7 @@ #include #include +#include #include class QPDFOutlineDocumentHelper::Members @@ -27,6 +28,21 @@ QPDFOutlineDocumentHelper::QPDFOutlineDocumentHelper(QPDF& qpdf) : QPDFDocumentHelper(qpdf), m(std::make_shared()) { + validate(); +} + +QPDFOutlineDocumentHelper& +QPDFOutlineDocumentHelper::get(QPDF& qpdf) +{ + return qpdf.outlines(); +} + +void +QPDFOutlineDocumentHelper::validate(bool repair) +{ + m->outlines.clear(); + m->names_dest = nullptr; + QPDFObjectHandle root = qpdf.getRoot(); if (!root.hasKey("/Outlines")) { return; @@ -37,7 +53,11 @@ QPDFOutlineDocumentHelper::QPDFOutlineDocumentHelper(QPDF& qpdf) : } QPDFObjectHandle cur = outlines.getKey("/First"); QPDFObjGen::set seen; - while (!cur.null() && seen.add(cur)) { + while (!cur.null()) { + if (!seen.add(cur)) { + cur.warn("Loop detected loop in /Outlines tree"); + return; + } m->outlines.emplace_back(QPDFOutlineObjectHelper::Accessor::create(cur, *this, 1)); cur = cur.getKey("/Next"); } diff --git a/libqpdf/QPDFOutlineObjectHelper.cc b/libqpdf/QPDFOutlineObjectHelper.cc index 0ae818e..3a163b6 100644 --- a/libqpdf/QPDFOutlineObjectHelper.cc +++ b/libqpdf/QPDFOutlineObjectHelper.cc @@ -20,15 +20,20 @@ QPDFOutlineObjectHelper::QPDFOutlineObjectHelper( return; } if (QPDFOutlineDocumentHelper::Accessor::checkSeen(m->dh, a_oh.getObjGen())) { + a_oh.warn("Loop detected loop in /Outlines tree"); return; } QPDFObjGen::set children; QPDFObjectHandle cur = a_oh.getKey("/First"); - while (!cur.null() && cur.isIndirect() && children.add(cur)) { + while (!cur.null() && cur.isIndirect()) { + if (!children.add(cur)) { + cur.warn("Loop detected loop in /Outlines tree"); + break; + } QPDFOutlineObjectHelper new_ooh(cur, dh, 1 + depth); new_ooh.m->parent = std::make_shared(*this); - m->kids.push_back(new_ooh); + m->kids.emplace_back(new_ooh); cur = cur.getKey("/Next"); } } diff --git a/libqpdf/QPDFPageDocumentHelper.cc b/libqpdf/QPDFPageDocumentHelper.cc index 154443e..98ef166 100644 --- a/libqpdf/QPDFPageDocumentHelper.cc +++ b/libqpdf/QPDFPageDocumentHelper.cc @@ -6,11 +6,26 @@ #include #include +class QPDFPageDocumentHelper::Members +{ +}; + QPDFPageDocumentHelper::QPDFPageDocumentHelper(QPDF& qpdf) : QPDFDocumentHelper(qpdf) { } +QPDFPageDocumentHelper& +QPDFPageDocumentHelper::get(QPDF& qpdf) +{ + return qpdf.pages(); +} + +void +QPDFPageDocumentHelper::validate(bool repair) +{ +} + std::vector QPDFPageDocumentHelper::getAllPages() { diff --git a/libqpdf/QPDFPageLabelDocumentHelper.cc b/libqpdf/QPDFPageLabelDocumentHelper.cc index 878c54e..0f32a8b 100644 --- a/libqpdf/QPDFPageLabelDocumentHelper.cc +++ b/libqpdf/QPDFPageLabelDocumentHelper.cc @@ -1,6 +1,10 @@ #include +#include #include +#include + +using namespace qpdf; class QPDFPageLabelDocumentHelper::Members { @@ -16,14 +20,23 @@ QPDFPageLabelDocumentHelper::QPDFPageLabelDocumentHelper(QPDF& qpdf) : QPDFDocumentHelper(qpdf), m(std::make_shared()) { - QPDFObjectHandle root = qpdf.getRoot(); - if (root.hasKey("/PageLabels")) { + validate(); +} + +QPDFPageLabelDocumentHelper& +QPDFPageLabelDocumentHelper::get(QPDF& qpdf) +{ + return qpdf.page_labels(); +} + +void +QPDFPageLabelDocumentHelper::validate(bool repair) +{ + m->labels = nullptr; + if (Dictionary labels = qpdf.getRoot()["/PageLabels"]) { m->labels = std::make_unique( - root.getKey("/PageLabels"), - this->qpdf, - [](QPDFObjectHandle const& o) -> bool { return o.isDictionary(); }, - true); - m->labels->validate(); + labels, qpdf, [](QPDFObjectHandle const& o) -> bool { return o.isDictionary(); }, true); + m->labels->validate(repair); } } diff --git a/libqpdf/qpdf/QPDF_private.hh b/libqpdf/qpdf/QPDF_private.hh index 19d9e3c..e0fc568 100644 --- a/libqpdf/qpdf/QPDF_private.hh +++ b/libqpdf/qpdf/QPDF_private.hh @@ -4,7 +4,11 @@ #include #include +#include #include +#include +#include +#include #include using namespace qpdf; @@ -553,6 +557,10 @@ class QPDF::Members // Document Helpers; std::unique_ptr acroform; + std::unique_ptr embedded_files; + std::unique_ptr outlines; + std::unique_ptr pages; + std::unique_ptr page_labels; }; // JobSetter class is restricted to QPDFJob. @@ -584,4 +592,40 @@ QPDF::acroform() return *m->acroform; } +inline QPDFEmbeddedFileDocumentHelper& +QPDF::embedded_files() +{ + if (!m->embedded_files) { + m->embedded_files = std::make_unique(*this); + } + return *m->embedded_files; +} + +inline QPDFOutlineDocumentHelper& +QPDF::outlines() +{ + if (!m->outlines) { + m->outlines = std::make_unique(*this); + } + return *m->outlines; +} + +inline QPDFPageDocumentHelper& +QPDF::pages() +{ + if (!m->pages) { + m->pages = std::make_unique(*this); + } + return *m->pages; +} + +inline QPDFPageLabelDocumentHelper& +QPDF::page_labels() +{ + if (!m->page_labels) { + m->page_labels = std::make_unique(*this); + } + return *m->page_labels; +} + #endif // QPDF_PRIVATE_HH diff --git a/manual/release-notes.rst b/manual/release-notes.rst index 82c43be..f8860b0 100644 --- a/manual/release-notes.rst +++ b/manual/release-notes.rst @@ -26,19 +26,29 @@ more detail. - Library Enhancements - Add ``QPDFNameTreeObjectHelper`` and ``QPDFNumberTreeObjectHelper`` - constructor overloads that allow a function to ne passed to + constructor overloads that allow a function to be passed to validate the values in the tree. - Add new ``QPDFNameTreeObjectHelper`` and ``QPDFNumberTreeObjectHelper`` ``validate`` method to validate and optionally repair the name/number tree. + - Add new ``get`` and ``validate`` methods to all DocumentHelper classes. + The ``get`` method retrieves a shared DocumentHelper, avoiding the the + overhead of repeatedly validating the underlying document structure + and/or building internal caches. If the underlying document structure + is directly modified (without the use of DocumentHelpers), the + ``validate`` methods revalidates the structure and resynchronizes any + internal caches. + - CLI Enhancements - Disallow option :qpdf:ref:`--deterministic-id` to be used together with the incompatible options :qpdf:ref:`--encrypt` or :qpdf:ref:`--copy-encryption`. + - Option :qpdf:ref:`--check` now includes additional basic checks of the + AcroForm, Dests, Outlines, and PageLabels structures. - Other enhancements diff --git a/qpdf/qtest/outlines.test b/qpdf/qtest/outlines.test index 9444b4e..0e286da 100644 --- a/qpdf/qtest/outlines.test +++ b/qpdf/qtest/outlines.test @@ -21,7 +21,7 @@ my @outline_files = ( 'outlines-with-old-root-dests-dict', 'outlines-with-loop', ); -my $n_tests = scalar(@outline_files); +my $n_tests = scalar(@outline_files) + 1; foreach my $f (@outline_files) { $td->runtest("outlines: $f", @@ -30,5 +30,10 @@ foreach my $f (@outline_files) $td->NORMALIZE_NEWLINES); } +$td->runtest("outlines: outlines-with-loop --check", + {$td->COMMAND => "qpdf --check outlines-with-loop.pdf"}, + {$td->FILE => "outlines-with-loop-check.out", $td->EXIT_STATUS => 3}, + $td->NORMALIZE_NEWLINES); + cleanup(); $td->report($n_tests); diff --git a/qpdf/qtest/page-labels.test b/qpdf/qtest/page-labels.test index 37f2a61..9f098a3 100644 --- a/qpdf/qtest/page-labels.test +++ b/qpdf/qtest/page-labels.test @@ -14,7 +14,7 @@ cleanup(); my $td = new TestDriver('page-labels'); -my $n_tests = 4; +my $n_tests = 5; $td->runtest("complex page labels", {$td->COMMAND => "test_driver 47 page-labels-num-tree.pdf"}, @@ -38,6 +38,11 @@ $td->runtest("damaged page labels", {$td->FILE => "page-labels-num-tree-damaged.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); +$td->runtest("damaged page labels --check", + {$td->COMMAND => "qpdf --check page-labels-num-tree-damaged.pdf"}, + {$td->FILE => "page-labels-num-tree-damaged-check.out", $td->EXIT_STATUS => 3}, + $td->NORMALIZE_NEWLINES); + # --set-page-labels my @errors = ( ["quack", ".*page label spec must be.*"], diff --git a/qpdf/qtest/qpdf/outlines-with-loop-check.out b/qpdf/qtest/qpdf/outlines-with-loop-check.out new file mode 100644 index 0000000..d7648a2 --- /dev/null +++ b/qpdf/qtest/qpdf/outlines-with-loop-check.out @@ -0,0 +1,7 @@ +checking outlines-with-loop.pdf +PDF Version: 1.3 +File is not encrypted +File is not linearized +WARNING: outlines-with-loop.pdf, object 4 0 at offset 637: Loop detected loop in /Outlines tree +WARNING: outlines-with-loop.pdf, object 5 0 at offset 855: Loop detected loop in /Outlines tree +qpdf: operation succeeded with warnings diff --git a/qpdf/qtest/qpdf/outlines-with-loop.out b/qpdf/qtest/qpdf/outlines-with-loop.out index 9ca99dd..e4f66f8 100644 --- a/qpdf/qtest/qpdf/outlines-with-loop.out +++ b/qpdf/qtest/qpdf/outlines-with-loop.out @@ -1,3 +1,5 @@ +WARNING: outlines-with-loop.pdf, object 4 0 at offset 637: Loop detected loop in /Outlines tree +WARNING: outlines-with-loop.pdf, object 5 0 at offset 855: Loop detected loop in /Outlines tree page 5: Potato 1 -> 5: /XYZ null null null -> [ 11 0 R /XYZ null null null ] page 5: Potato 1 -> 5: /XYZ null null null -> [ 11 0 R /XYZ null null null ] page 11: Mern 1.1 -> 11: /Fit -> [ 17 0 R /Fit ] diff --git a/qpdf/qtest/qpdf/page-labels-num-tree-damaged-check.out b/qpdf/qtest/qpdf/page-labels-num-tree-damaged-check.out new file mode 100644 index 0000000..b7b8295 --- /dev/null +++ b/qpdf/qtest/qpdf/page-labels-num-tree-damaged-check.out @@ -0,0 +1,7 @@ +checking page-labels-num-tree-damaged.pdf +PDF Version: 1.3 +File is not encrypted +File is not linearized +WARNING: page-labels-num-tree-damaged.pdf (Name/Number tree node (object 2)): attempting to repair after error: page-labels-num-tree-damaged.pdf (Name/Number tree node (object 2)): keys are not sorted in validate +WARNING: page-labels-num-tree-damaged.pdf (Name/Number tree node (object 37)): item 1 is invalid +qpdf: operation succeeded with warnings diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index b2cb985..09dcd7a 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -1802,7 +1802,7 @@ static void test_47(QPDF& pdf, char const* arg2) { // Test page labels. - QPDFPageLabelDocumentHelper pldh(pdf); + auto& pldh = QPDFPageLabelDocumentHelper::get(pdf); long long npages = pdf.getRoot().getKey("/Pages").getKey("/Count").getIntValue(); std::vector labels; pldh.getLabelsForPageRange(0, npages - 1, 1, labels); @@ -2624,7 +2624,7 @@ test_76(QPDF& pdf, char const* arg2) { // Embedded files. arg2 is a file to attach. Hard-code the // mime type and file name for test purposes. - QPDFEmbeddedFileDocumentHelper efdh(pdf); + auto &efdh = QPDFEmbeddedFileDocumentHelper::get(pdf); auto fs1 = QPDFFileSpecObjectHelper::createFileSpec(pdf, "att1.txt", arg2); fs1.setDescription("some text"); auto efs1 = QPDFEFStreamObjectHelper(fs1.getEmbeddedFileStream());