Commit 6ccbc12148274e58c110873aaba629dbb6d5dc01

Authored by m-holger
Committed by GitHub
2 parents 551a9534 3dd27a1d

Merge pull request #1541 from m-holger/dh

Cache DocumentHelpers
examples/examples.testcov
1   -pdf-bookmarks lines 0
2   -pdf-bookmarks numbers 0
3   -pdf-bookmarks none 0
4   -pdf-bookmarks has count 0
5   -pdf-bookmarks no count 0
6   -pdf-bookmarks open 0
7   -pdf-bookmarks closed 0
8   -pdf-bookmarks dest 0
9   -pdf-bookmarks targets 0
10 1 pdf-mod-info --dump 0
11 2 pdf-mod-info no in file 0
12 3 pdf-mod-info in-place 0
... ...
examples/pdf-attach-file.cc
... ... @@ -79,7 +79,7 @@ process(
79 79 }
80 80  
81 81 // Add the embedded file at the document level as an attachment.
82   - auto efdh = QPDFEmbeddedFileDocumentHelper(q);
  82 + auto& efdh = QPDFEmbeddedFileDocumentHelper::get(q);
83 83 efdh.replaceEmbeddedFile(key, fs);
84 84  
85 85 // Create a file attachment annotation.
... ...
examples/pdf-bookmarks.cc
... ... @@ -47,7 +47,7 @@ print_lines(std::vector<int>& numbers)
47 47 void
48 48 generate_page_map(QPDF& qpdf)
49 49 {
50   - QPDFPageDocumentHelper dh(qpdf);
  50 + auto& dh = QPDFPageDocumentHelper::get(qpdf);
51 51 int n = 0;
52 52 for (auto const& page: dh.getAllPages()) {
53 53 page_map[page] = ++n;
... ... @@ -60,11 +60,9 @@ show_bookmark_details(QPDFOutlineObjectHelper outline, std::vector<int> numbers)
60 60 // No default so gcc will warn on missing tag
61 61 switch (style) {
62 62 case st_none:
63   - QTC::TC("examples", "pdf-bookmarks none");
64 63 break;
65 64  
66 65 case st_numbers:
67   - QTC::TC("examples", "pdf-bookmarks numbers");
68 66 for (auto const& number: numbers) {
69 67 std::cout << number << ".";
70 68 }
... ... @@ -72,7 +70,6 @@ show_bookmark_details(QPDFOutlineObjectHelper outline, std::vector&lt;int&gt; numbers)
72 70 break;
73 71  
74 72 case st_lines:
75   - QTC::TC("examples", "pdf-bookmarks lines");
76 73 print_lines(numbers);
77 74 std::cout << "|\n";
78 75 print_lines(numbers);
... ... @@ -83,27 +80,21 @@ show_bookmark_details(QPDFOutlineObjectHelper outline, std::vector&lt;int&gt; numbers)
83 80 if (show_open) {
84 81 int count = outline.getCount();
85 82 if (count) {
86   - QTC::TC("examples", "pdf-bookmarks has count");
87 83 if (count > 0) {
88 84 // hierarchy is open at this point
89   - QTC::TC("examples", "pdf-bookmarks open");
90 85 std::cout << "(v) ";
91 86 } else {
92   - QTC::TC("examples", "pdf-bookmarks closed");
93 87 std::cout << "(>) ";
94 88 }
95 89 } else {
96   - QTC::TC("examples", "pdf-bookmarks no count");
97 90 std::cout << "( ) ";
98 91 }
99 92 }
100 93  
101 94 if (show_targets) {
102   - QTC::TC("examples", "pdf-bookmarks targets");
103 95 std::string target = "unknown";
104 96 QPDFObjectHandle dest_page = outline.getDestPage();
105 97 if (!dest_page.isNull()) {
106   - QTC::TC("examples", "pdf-bookmarks dest");
107 98 if (page_map.contains(dest_page)) {
108 99 target = std::to_string(page_map[dest_page]);
109 100 }
... ... @@ -177,7 +168,7 @@ main(int argc, char* argv[])
177 168 QPDF qpdf;
178 169 qpdf.processFile(filename, password);
179 170  
180   - QPDFOutlineDocumentHelper odh(qpdf);
  171 + auto& odh = QPDFOutlineDocumentHelper::get(qpdf);
181 172 if (odh.hasOutlines()) {
182 173 std::vector<int> numbers;
183 174 if (show_targets) {
... ...
examples/pdf-count-strings.cc
... ... @@ -76,7 +76,7 @@ main(int argc, char* argv[])
76 76 QPDF pdf;
77 77 pdf.processFile(infilename);
78 78 int pageno = 0;
79   - for (auto& page: QPDFPageDocumentHelper(pdf).getAllPages()) {
  79 + for (auto& page: QPDFPageDocumentHelper::get(pdf).getAllPages()) {
80 80 ++pageno;
81 81 // Pass the contents of a page through our string counter. If it's an even page, capture
82 82 // the output. This illustrates that you may capture any output generated by the filter,
... ...
examples/pdf-create.cc
... ... @@ -229,7 +229,7 @@ check(
229 229  
230 230 QPDF pdf;
231 231 pdf.processFile(filename);
232   - auto pages = QPDFPageDocumentHelper(pdf).getAllPages();
  232 + auto pages = QPDFPageDocumentHelper::get(pdf).getAllPages();
233 233 if (n_color_spaces * n_filters != pages.size()) {
234 234 throw std::logic_error("incorrect number of pages");
235 235 }
... ...
examples/pdf-overlay-page.cc
... ... @@ -29,14 +29,14 @@ stamp_page(char const* infile, char const* stampfile, char const* outfile)
29 29 stamppdf.processFile(stampfile);
30 30  
31 31 // Get first page from other file
32   - QPDFPageObjectHelper stamp_page_1 = QPDFPageDocumentHelper(stamppdf).getAllPages().at(0);
  32 + QPDFPageObjectHelper stamp_page_1 = QPDFPageDocumentHelper::get(stamppdf).getAllPages().at(0);
33 33 // Convert page to a form XObject
34 34 QPDFObjectHandle foreign_fo = stamp_page_1.getFormXObjectForPage();
35 35 // Copy form XObject to the input file
36 36 QPDFObjectHandle stamp_fo = inpdf.copyForeignObject(foreign_fo);
37 37  
38 38 // For each page...
39   - for (auto& ph: QPDFPageDocumentHelper(inpdf).getAllPages()) {
  39 + for (auto& ph: QPDFPageDocumentHelper::get(inpdf).getAllPages()) {
40 40 // Find a unique resource name for the new form XObject
41 41 QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
42 42 int min_suffix = 1;
... ...
fuzz/qpdf_outlines_fuzzer.cc
... ... @@ -49,7 +49,7 @@ FuzzHelper::testOutlines()
49 49 {
50 50 std::shared_ptr<QPDF> q = getQpdf();
51 51 std::list<std::vector<QPDFOutlineObjectHelper>> queue;
52   - QPDFOutlineDocumentHelper odh(*q);
  52 + auto& odh = QPDFOutlineDocumentHelper::get(*q);
53 53 queue.push_back(odh.getTopLevelOutlines());
54 54 while (!queue.empty()) {
55 55 for (auto& ol: *(queue.begin())) {
... ...
include/qpdf/QPDF.hh
... ... @@ -63,6 +63,10 @@ class BufferInputSource;
63 63 class QPDFLogger;
64 64 class QPDFParser;
65 65 class QPDFAcroFormDocumentHelper;
  66 +class QPDFEmbeddedFileDocumentHelper;
  67 +class QPDFOutlineDocumentHelper;
  68 +class QPDFPageDocumentHelper;
  69 +class QPDFPageLabelDocumentHelper;
66 70  
67 71 class QPDF
68 72 {
... ... @@ -799,6 +803,10 @@ class QPDF
799 803  
800 804 inline bool reconstructed_xref() const;
801 805 inline QPDFAcroFormDocumentHelper& acroform();
  806 + inline QPDFEmbeddedFileDocumentHelper& embedded_files();
  807 + inline QPDFOutlineDocumentHelper& outlines();
  808 + inline QPDFPageDocumentHelper& pages();
  809 + inline QPDFPageLabelDocumentHelper& page_labels();
802 810  
803 811 // For testing only -- do not add to DLL
804 812 static bool test_json_validators();
... ...
include/qpdf/QPDFEmbeddedFileDocumentHelper.hh
... ... @@ -36,6 +36,21 @@
36 36 class QPDFEmbeddedFileDocumentHelper: public QPDFDocumentHelper
37 37 {
38 38 public:
  39 + // Get a shared document helper for a given QPDF object.
  40 + //
  41 + // Retrieving a document helper for a QPDF object rather than creating a new one avoids repeated
  42 + // validation of the EmbeddedFiles structure, which can be expensive.
  43 + QPDF_DLL
  44 + static QPDFEmbeddedFileDocumentHelper& get(QPDF& qpdf);
  45 +
  46 + // Re-validate the EmbeddedFiles structure. This is useful if you have modified the structure of
  47 + // the EmbeddedFiles dictionary in a way that would invalidate the cache.
  48 + //
  49 + // If repair is true, the document will be repaired if possible if the validation encounters
  50 + // errors.
  51 + QPDF_DLL
  52 + void validate(bool repair = true);
  53 +
39 54 QPDF_DLL
40 55 QPDFEmbeddedFileDocumentHelper(QPDF&);
41 56  
... ...
include/qpdf/QPDFOutlineDocumentHelper.hh
... ... @@ -38,6 +38,21 @@
38 38 class QPDFOutlineDocumentHelper: public QPDFDocumentHelper
39 39 {
40 40 public:
  41 + // Get a shared document helper for a given QPDF object.
  42 + //
  43 + // Retrieving a document helper for a QPDF object rather than creating a new one avoids repeated
  44 + // validation of the Acroform structure, which can be expensive.
  45 + QPDF_DLL
  46 + static QPDFOutlineDocumentHelper& get(QPDF& qpdf);
  47 +
  48 + // Re-validate the Outlines structure. This is useful if you have modified the structure of the
  49 + // Outlines dictionary in a way that would invalidate the cache.
  50 + //
  51 + // If repair is true, the document will be repaired if possible if the validation encounters
  52 + // errors.
  53 + QPDF_DLL
  54 + void validate(bool repair = true);
  55 +
41 56 QPDF_DLL
42 57 QPDFOutlineDocumentHelper(QPDF&);
43 58  
... ...
include/qpdf/QPDFPageDocumentHelper.hh
... ... @@ -35,6 +35,21 @@ class QPDFAcroFormDocumentHelper;
35 35 class QPDFPageDocumentHelper: public QPDFDocumentHelper
36 36 {
37 37 public:
  38 + // Get a shared document helper for a given QPDF object.
  39 + //
  40 + // Retrieving a document helper for a QPDF object rather than creating a new one avoids repeated
  41 + // validation of the Acroform structure, which can be expensive.
  42 + QPDF_DLL
  43 + static QPDFPageDocumentHelper& get(QPDF& qpdf);
  44 +
  45 + // Re-validate the Pages structure. This is useful if you have modified the Pages structure in
  46 + // a way that would invalidate the cache.
  47 + //
  48 + // If repair is true, the document will be repaired if possible if the validation encounters
  49 + // errors.
  50 + QPDF_DLL
  51 + void validate(bool repair = true);
  52 +
38 53 QPDF_DLL
39 54 QPDFPageDocumentHelper(QPDF&);
40 55  
... ... @@ -112,17 +127,7 @@ class QPDFPageDocumentHelper: public QPDFDocumentHelper
112 127 int required_flags,
113 128 int forbidden_flags);
114 129  
115   - class Members
116   - {
117   - friend class QPDFPageDocumentHelper;
118   -
119   - public:
120   - ~Members() = default;
121   -
122   - private:
123   - Members() = default;
124   - Members(Members const&) = delete;
125   - };
  130 + class Members;
126 131  
127 132 std::shared_ptr<Members> m;
128 133 };
... ...
include/qpdf/QPDFPageLabelDocumentHelper.hh
... ... @@ -22,11 +22,10 @@
22 22  
23 23 #include <qpdf/QPDFDocumentHelper.hh>
24 24  
  25 +#include <qpdf/DLL.h>
25 26 #include <qpdf/QPDF.hh>
26   -#include <qpdf/QPDFNumberTreeObjectHelper.hh>
27   -#include <vector>
28 27  
29   -#include <qpdf/DLL.h>
  28 +#include <vector>
30 29  
31 30 // Page labels are discussed in the PDF spec (ISO-32000) in section 12.4.2.
32 31 //
... ... @@ -42,6 +41,21 @@
42 41 class QPDFPageLabelDocumentHelper: public QPDFDocumentHelper
43 42 {
44 43 public:
  44 + // Get a shared document helper for a given QPDF object.
  45 + //
  46 + // Retrieving a document helper for a QPDF object rather than creating a new one avoids repeated
  47 + // validation of the PageLabels structure, which can be expensive.
  48 + QPDF_DLL
  49 + static QPDFPageLabelDocumentHelper& get(QPDF& qpdf);
  50 +
  51 + // Re-validate the PageLabels structure. This is useful if you have modified the structure of
  52 + // the PageLabels dictionary in a way that could have invalidated the structure.
  53 + //
  54 + // If repair is true, the document will be repaired if possible if the validation encounters
  55 + // errors.
  56 + QPDF_DLL
  57 + void validate(bool repair = true);
  58 +
45 59 QPDF_DLL
46 60 QPDFPageLabelDocumentHelper(QPDF&);
47 61  
... ...
libqpdf/QPDFEmbeddedFileDocumentHelper.cc
1 1 #include <qpdf/QPDFEmbeddedFileDocumentHelper.hh>
2 2  
  3 +#include <qpdf/QPDFNameTreeObjectHelper.hh>
  4 +#include <qpdf/QPDF_private.hh>
  5 +
3 6 // File attachments are stored in the /EmbeddedFiles (name tree) key of the /Names dictionary from
4 7 // the document catalog. Each entry points to a /FileSpec, which in turn points to one more Embedded
5 8 // File Streams. Note that file specs can appear in other places as well, such as file attachment
... ... @@ -44,6 +47,19 @@ QPDFEmbeddedFileDocumentHelper::QPDFEmbeddedFileDocumentHelper(QPDF&amp; qpdf) :
44 47 QPDFDocumentHelper(qpdf),
45 48 m(std::make_shared<Members>())
46 49 {
  50 + validate();
  51 +}
  52 +
  53 +QPDFEmbeddedFileDocumentHelper&
  54 +QPDFEmbeddedFileDocumentHelper::get(QPDF& qpdf)
  55 +{
  56 + return qpdf.embedded_files();
  57 +}
  58 +
  59 +void
  60 +QPDFEmbeddedFileDocumentHelper::validate(bool repair)
  61 +{
  62 + m->embedded_files.reset();
47 63 auto names = qpdf.getRoot().getKey("/Names");
48 64 if (names.isDictionary()) {
49 65 auto embedded_files = names.getKey("/EmbeddedFiles");
... ... @@ -53,7 +69,7 @@ QPDFEmbeddedFileDocumentHelper::QPDFEmbeddedFileDocumentHelper(QPDF&amp; qpdf) :
53 69 qpdf,
54 70 [](QPDFObjectHandle const& o) -> bool { return o.isDictionary(); },
55 71 true);
56   - m->embedded_files->validate();
  72 + m->embedded_files->validate(repair);
57 73 }
58 74 }
59 75 }
... ...
libqpdf/QPDFJob.cc
... ... @@ -13,15 +13,10 @@
13 13 #include <qpdf/Pl_StdioFile.hh>
14 14 #include <qpdf/Pl_String.hh>
15 15 #include <qpdf/QIntC.hh>
16   -#include <qpdf/QPDFAcroFormDocumentHelper.hh>
17 16 #include <qpdf/QPDFCryptoProvider.hh>
18   -#include <qpdf/QPDFEmbeddedFileDocumentHelper.hh>
19 17 #include <qpdf/QPDFExc.hh>
20 18 #include <qpdf/QPDFLogger.hh>
21 19 #include <qpdf/QPDFObjectHandle_private.hh>
22   -#include <qpdf/QPDFOutlineDocumentHelper.hh>
23   -#include <qpdf/QPDFPageDocumentHelper.hh>
24   -#include <qpdf/QPDFPageLabelDocumentHelper.hh>
25 20 #include <qpdf/QPDFPageObjectHelper.hh>
26 21 #include <qpdf/QPDFSystemError.hh>
27 22 #include <qpdf/QPDFUsage.hh>
... ... @@ -784,6 +779,14 @@ QPDFJob::doCheck(QPDF&amp; pdf)
784 779 cout << "File is not linearized\n";
785 780 }
786 781  
  782 + // Create all document helper to trigger any validations they carry out.
  783 + auto& pages = pdf.pages();
  784 + (void)pdf.acroform();
  785 + (void)pdf.embedded_files();
  786 + (void)pdf.page_labels();
  787 + (void)pdf.outlines().resolveNamedDest(QPDFObjectHandle::newString("dummy"));
  788 + (void)pdf.outlines().getOutlinesForPage(pages.getAllPages().at(0));
  789 +
787 790 // Write the file to nowhere, uncompressing streams. This causes full file traversal and
788 791 // decoding of all streams we can decode.
789 792 QPDFWriter w(pdf);
... ... @@ -794,7 +797,7 @@ QPDFJob::doCheck(QPDF&amp; pdf)
794 797  
795 798 // Parse all content streams
796 799 int pageno = 0;
797   - for (auto& page: QPDFPageDocumentHelper(pdf).getAllPages()) {
  800 + for (auto& page: pages.getAllPages()) {
798 801 ++pageno;
799 802 try {
800 803 page.parseContents(nullptr);
... ... @@ -862,7 +865,7 @@ QPDFJob::doShowPages(QPDF&amp; pdf)
862 865 {
863 866 int pageno = 0;
864 867 auto& cout = *m->log->getInfo();
865   - for (auto& ph: QPDFPageDocumentHelper(pdf).getAllPages()) {
  868 + for (auto& ph: pdf.pages().getAllPages()) {
866 869 QPDFObjectHandle page = ph.getObjectHandle();
867 870 ++pageno;
868 871  
... ... @@ -894,7 +897,7 @@ QPDFJob::doShowPages(QPDF&amp; pdf)
894 897 void
895 898 QPDFJob::doListAttachments(QPDF& pdf)
896 899 {
897   - QPDFEmbeddedFileDocumentHelper efdh(pdf);
  900 + auto& efdh = pdf.embedded_files();
898 901 if (efdh.hasEmbeddedFiles()) {
899 902 for (auto const& i: efdh.getEmbeddedFiles()) {
900 903 std::string const& key = i.first;
... ... @@ -934,7 +937,7 @@ QPDFJob::doListAttachments(QPDF&amp; pdf)
934 937 void
935 938 QPDFJob::doShowAttachment(QPDF& pdf)
936 939 {
937   - QPDFEmbeddedFileDocumentHelper efdh(pdf);
  940 + auto& efdh = pdf.embedded_files();
938 941 auto fs = efdh.getEmbeddedFile(m->attachment_to_show);
939 942 if (!fs) {
940 943 throw std::runtime_error("attachment " + m->attachment_to_show + " not found");
... ... @@ -1053,10 +1056,10 @@ QPDFJob::doJSONPages(Pipeline* p, bool&amp; first, QPDF&amp; pdf)
1053 1056 JSON::writeDictionaryKey(p, first, "pages", 1);
1054 1057 bool first_page = true;
1055 1058 JSON::writeArrayOpen(p, first_page, 2);
1056   - QPDFPageLabelDocumentHelper pldh(pdf);
1057   - QPDFOutlineDocumentHelper odh(pdf);
  1059 + auto& pldh = pdf.page_labels();
  1060 + auto& odh = pdf.outlines();
1058 1061 int pageno = -1;
1059   - for (auto& ph: QPDFPageDocumentHelper(pdf).getAllPages()) {
  1062 + for (auto& ph: pdf.pages().getAllPages()) {
1060 1063 ++pageno;
1061 1064 JSON j_page = JSON::makeDictionary();
1062 1065 QPDFObjectHandle page = ph.getObjectHandle();
... ... @@ -1116,8 +1119,8 @@ void
1116 1119 QPDFJob::doJSONPageLabels(Pipeline* p, bool& first, QPDF& pdf)
1117 1120 {
1118 1121 JSON j_labels = JSON::makeArray();
1119   - QPDFPageLabelDocumentHelper pldh(pdf);
1120   - long long npages = QIntC::to_longlong(QPDFPageDocumentHelper(pdf).getAllPages().size());
  1122 + auto& pldh = pdf.page_labels();
  1123 + long long npages = QIntC::to_longlong(pdf.pages().getAllPages().size());
1121 1124 if (pldh.hasPageLabels()) {
1122 1125 std::vector<QPDFObjectHandle> labels;
1123 1126 pldh.getLabelsForPageRange(0, npages - 1, 0, labels);
... ... @@ -1165,14 +1168,13 @@ QPDFJob::doJSONOutlines(Pipeline* p, bool&amp; first, QPDF&amp; pdf)
1165 1168 {
1166 1169 std::map<QPDFObjGen, int> page_numbers;
1167 1170 int n = 0;
1168   - for (auto const& ph: QPDFPageDocumentHelper(pdf).getAllPages()) {
  1171 + for (auto const& ph: pdf.pages().getAllPages()) {
1169 1172 QPDFObjectHandle oh = ph.getObjectHandle();
1170 1173 page_numbers[oh.getObjGen()] = ++n;
1171 1174 }
1172 1175  
1173 1176 JSON j_outlines = JSON::makeArray();
1174   - QPDFOutlineDocumentHelper odh(pdf);
1175   - addOutlinesToJson(odh.getTopLevelOutlines(), j_outlines, page_numbers);
  1177 + addOutlinesToJson(pdf.outlines().getTopLevelOutlines(), j_outlines, page_numbers);
1176 1178 JSON::writeDictionaryItem(p, first, "outlines", j_outlines, 1);
1177 1179 }
1178 1180  
... ... @@ -1185,7 +1187,7 @@ QPDFJob::doJSONAcroform(Pipeline* p, bool&amp; first, QPDF&amp; pdf)
1185 1187 j_acroform.addDictionaryMember("needappearances", JSON::makeBool(afdh.getNeedAppearances()));
1186 1188 JSON j_fields = j_acroform.addDictionaryMember("fields", JSON::makeArray());
1187 1189 int pagepos1 = 0;
1188   - for (auto const& page: QPDFPageDocumentHelper(pdf).getAllPages()) {
  1190 + for (auto const& page: pdf.pages().getAllPages()) {
1189 1191 ++pagepos1;
1190 1192 for (auto& aoh: afdh.getWidgetAnnotationsForPage(page)) {
1191 1193 QPDFFormFieldObjectHelper ffh = afdh.getFieldForAnnotation(aoh);
... ... @@ -1321,7 +1323,7 @@ QPDFJob::doJSONAttachments(Pipeline* p, bool&amp; first, QPDF&amp; pdf)
1321 1323 };
1322 1324  
1323 1325 JSON j_attachments = JSON::makeDictionary();
1324   - QPDFEmbeddedFileDocumentHelper efdh(pdf);
  1326 + auto& efdh = pdf.embedded_files();
1325 1327 for (auto const& iter: efdh.getEmbeddedFiles()) {
1326 1328 std::string const& key = iter.first;
1327 1329 auto fsoh = iter.second;
... ... @@ -1862,7 +1864,7 @@ QPDFJob::processInputSource(
1862 1864 void
1863 1865 QPDFJob::validateUnderOverlay(QPDF& pdf, UnderOverlay* uo)
1864 1866 {
1865   - QPDFPageDocumentHelper main_pdh(pdf);
  1867 + auto& main_pdh = pdf.pages();
1866 1868 int main_npages = QIntC::to_int(main_pdh.getAllPages().size());
1867 1869 processFile(uo->pdf, uo->filename.data(), uo->password.data(), true, false);
1868 1870 QPDFPageDocumentHelper uo_pdh(*(uo->pdf));
... ... @@ -2073,7 +2075,7 @@ void
2073 2075 QPDFJob::addAttachments(QPDF& pdf)
2074 2076 {
2075 2077 maybe_set_pagemode(pdf, "/UseAttachments");
2076   - QPDFEmbeddedFileDocumentHelper efdh(pdf);
  2078 + auto& efdh = pdf.embedded_files();
2077 2079 std::vector<std::string> duplicated_keys;
2078 2080 for (auto const& to_add: m->attachments_to_add) {
2079 2081 if ((!to_add.replace) && efdh.getEmbeddedFile(to_add.key)) {
... ... @@ -2117,7 +2119,7 @@ void
2117 2119 QPDFJob::copyAttachments(QPDF& pdf)
2118 2120 {
2119 2121 maybe_set_pagemode(pdf, "/UseAttachments");
2120   - QPDFEmbeddedFileDocumentHelper efdh(pdf);
  2122 + auto& efdh = pdf.embedded_files();
2121 2123 std::vector<std::string> duplicates;
2122 2124 for (auto const& to_copy: m->attachments_to_copy) {
2123 2125 doIfVerbose([&](Pipeline& v, std::string const& prefix) {
... ... @@ -2125,7 +2127,7 @@ QPDFJob::copyAttachments(QPDF&amp; pdf)
2125 2127 });
2126 2128 std::unique_ptr<QPDF> other;
2127 2129 processFile(other, to_copy.path.c_str(), to_copy.password.c_str(), false, false);
2128   - QPDFEmbeddedFileDocumentHelper other_efdh(*other);
  2130 + auto& other_efdh = other->embedded_files();
2129 2131 auto other_attachments = other_efdh.getEmbeddedFiles();
2130 2132 for (auto const& iter: other_attachments) {
2131 2133 std::string new_key = to_copy.prefix + iter.first;
... ... @@ -2259,7 +2261,7 @@ QPDFJob::handleTransformations(QPDF&amp; pdf)
2259 2261 pdf.getRoot().replaceKey("/PageLabels", page_labels);
2260 2262 }
2261 2263 if (!m->attachments_to_remove.empty()) {
2262   - QPDFEmbeddedFileDocumentHelper efdh(pdf);
  2264 + auto& efdh = pdf.embedded_files();
2263 2265 for (auto const& key: m->attachments_to_remove) {
2264 2266 if (efdh.removeEmbeddedFile(key)) {
2265 2267 doIfVerbose([&](Pipeline& v, std::string const& prefix) {
... ... @@ -2548,7 +2550,7 @@ QPDFJob::handlePageSpecs(QPDF&amp; pdf, std::vector&lt;std::unique_ptr&lt;QPDF&gt;&gt;&amp; page_hea
2548 2550 cis = page_spec_cfis[page_data.filename];
2549 2551 cis->stayOpen(true);
2550 2552 }
2551   - QPDFPageLabelDocumentHelper pldh(*page_data.qpdf);
  2553 + auto& pldh = page_data.qpdf->page_labels();
2552 2554 auto& other_afdh = page_data.qpdf->acroform();
2553 2555 if (pldh.hasPageLabels()) {
2554 2556 any_page_labels = true;
... ... @@ -2992,7 +2994,7 @@ QPDFJob::doSplitPages(QPDF&amp; pdf)
2992 2994 QPDFPageDocumentHelper dh(pdf);
2993 2995 dh.removeUnreferencedResources();
2994 2996 }
2995   - QPDFPageLabelDocumentHelper pldh(pdf);
  2997 + auto& pldh = pdf.page_labels();
2996 2998 auto& afdh = pdf.acroform();
2997 2999 std::vector<QPDFObjectHandle> const& pages = pdf.getAllPages();
2998 3000 size_t pageno_len = std::to_string(pages.size()).length();
... ...
libqpdf/QPDFOutlineDocumentHelper.cc
1 1 #include <qpdf/QPDFOutlineDocumentHelper.hh>
2 2  
3 3 #include <qpdf/QPDFObjectHandle_private.hh>
  4 +#include <qpdf/QPDF_private.hh>
4 5 #include <qpdf/QTC.hh>
5 6  
6 7 class QPDFOutlineDocumentHelper::Members
... ... @@ -27,6 +28,21 @@ QPDFOutlineDocumentHelper::QPDFOutlineDocumentHelper(QPDF&amp; qpdf) :
27 28 QPDFDocumentHelper(qpdf),
28 29 m(std::make_shared<Members>())
29 30 {
  31 + validate();
  32 +}
  33 +
  34 +QPDFOutlineDocumentHelper&
  35 +QPDFOutlineDocumentHelper::get(QPDF& qpdf)
  36 +{
  37 + return qpdf.outlines();
  38 +}
  39 +
  40 +void
  41 +QPDFOutlineDocumentHelper::validate(bool repair)
  42 +{
  43 + m->outlines.clear();
  44 + m->names_dest = nullptr;
  45 +
30 46 QPDFObjectHandle root = qpdf.getRoot();
31 47 if (!root.hasKey("/Outlines")) {
32 48 return;
... ... @@ -37,7 +53,11 @@ QPDFOutlineDocumentHelper::QPDFOutlineDocumentHelper(QPDF&amp; qpdf) :
37 53 }
38 54 QPDFObjectHandle cur = outlines.getKey("/First");
39 55 QPDFObjGen::set seen;
40   - while (!cur.null() && seen.add(cur)) {
  56 + while (!cur.null()) {
  57 + if (!seen.add(cur)) {
  58 + cur.warn("Loop detected loop in /Outlines tree");
  59 + return;
  60 + }
41 61 m->outlines.emplace_back(QPDFOutlineObjectHelper::Accessor::create(cur, *this, 1));
42 62 cur = cur.getKey("/Next");
43 63 }
... ...
libqpdf/QPDFOutlineObjectHelper.cc
... ... @@ -20,15 +20,20 @@ QPDFOutlineObjectHelper::QPDFOutlineObjectHelper(
20 20 return;
21 21 }
22 22 if (QPDFOutlineDocumentHelper::Accessor::checkSeen(m->dh, a_oh.getObjGen())) {
  23 + a_oh.warn("Loop detected loop in /Outlines tree");
23 24 return;
24 25 }
25 26  
26 27 QPDFObjGen::set children;
27 28 QPDFObjectHandle cur = a_oh.getKey("/First");
28   - while (!cur.null() && cur.isIndirect() && children.add(cur)) {
  29 + while (!cur.null() && cur.isIndirect()) {
  30 + if (!children.add(cur)) {
  31 + cur.warn("Loop detected loop in /Outlines tree");
  32 + break;
  33 + }
29 34 QPDFOutlineObjectHelper new_ooh(cur, dh, 1 + depth);
30 35 new_ooh.m->parent = std::make_shared<QPDFOutlineObjectHelper>(*this);
31   - m->kids.push_back(new_ooh);
  36 + m->kids.emplace_back(new_ooh);
32 37 cur = cur.getKey("/Next");
33 38 }
34 39 }
... ...
libqpdf/QPDFPageDocumentHelper.cc
... ... @@ -6,11 +6,26 @@
6 6 #include <qpdf/QTC.hh>
7 7 #include <qpdf/QUtil.hh>
8 8  
  9 +class QPDFPageDocumentHelper::Members
  10 +{
  11 +};
  12 +
9 13 QPDFPageDocumentHelper::QPDFPageDocumentHelper(QPDF& qpdf) :
10 14 QPDFDocumentHelper(qpdf)
11 15 {
12 16 }
13 17  
  18 +QPDFPageDocumentHelper&
  19 +QPDFPageDocumentHelper::get(QPDF& qpdf)
  20 +{
  21 + return qpdf.pages();
  22 +}
  23 +
  24 +void
  25 +QPDFPageDocumentHelper::validate(bool repair)
  26 +{
  27 +}
  28 +
14 29 std::vector<QPDFPageObjectHelper>
15 30 QPDFPageDocumentHelper::getAllPages()
16 31 {
... ...
libqpdf/QPDFPageLabelDocumentHelper.cc
1 1 #include <qpdf/QPDFPageLabelDocumentHelper.hh>
2 2  
  3 +#include <qpdf/QPDFNumberTreeObjectHelper.hh>
3 4 #include <qpdf/QPDFObjectHandle_private.hh>
  5 +#include <qpdf/QPDF_private.hh>
  6 +
  7 +using namespace qpdf;
4 8  
5 9 class QPDFPageLabelDocumentHelper::Members
6 10 {
... ... @@ -16,14 +20,23 @@ QPDFPageLabelDocumentHelper::QPDFPageLabelDocumentHelper(QPDF&amp; qpdf) :
16 20 QPDFDocumentHelper(qpdf),
17 21 m(std::make_shared<Members>())
18 22 {
19   - QPDFObjectHandle root = qpdf.getRoot();
20   - if (root.hasKey("/PageLabels")) {
  23 + validate();
  24 +}
  25 +
  26 +QPDFPageLabelDocumentHelper&
  27 +QPDFPageLabelDocumentHelper::get(QPDF& qpdf)
  28 +{
  29 + return qpdf.page_labels();
  30 +}
  31 +
  32 +void
  33 +QPDFPageLabelDocumentHelper::validate(bool repair)
  34 +{
  35 + m->labels = nullptr;
  36 + if (Dictionary labels = qpdf.getRoot()["/PageLabels"]) {
21 37 m->labels = std::make_unique<QPDFNumberTreeObjectHelper>(
22   - root.getKey("/PageLabels"),
23   - this->qpdf,
24   - [](QPDFObjectHandle const& o) -> bool { return o.isDictionary(); },
25   - true);
26   - m->labels->validate();
  38 + labels, qpdf, [](QPDFObjectHandle const& o) -> bool { return o.isDictionary(); }, true);
  39 + m->labels->validate(repair);
27 40 }
28 41 }
29 42  
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -4,7 +4,11 @@
4 4 #include <qpdf/QPDF.hh>
5 5  
6 6 #include <qpdf/QPDFAcroFormDocumentHelper.hh>
  7 +#include <qpdf/QPDFEmbeddedFileDocumentHelper.hh>
7 8 #include <qpdf/QPDFObject_private.hh>
  9 +#include <qpdf/QPDFOutlineDocumentHelper.hh>
  10 +#include <qpdf/QPDFPageDocumentHelper.hh>
  11 +#include <qpdf/QPDFPageLabelDocumentHelper.hh>
8 12 #include <qpdf/QPDFTokenizer_private.hh>
9 13  
10 14 using namespace qpdf;
... ... @@ -553,6 +557,10 @@ class QPDF::Members
553 557  
554 558 // Document Helpers;
555 559 std::unique_ptr<QPDFAcroFormDocumentHelper> acroform;
  560 + std::unique_ptr<QPDFEmbeddedFileDocumentHelper> embedded_files;
  561 + std::unique_ptr<QPDFOutlineDocumentHelper> outlines;
  562 + std::unique_ptr<QPDFPageDocumentHelper> pages;
  563 + std::unique_ptr<QPDFPageLabelDocumentHelper> page_labels;
556 564 };
557 565  
558 566 // JobSetter class is restricted to QPDFJob.
... ... @@ -584,4 +592,40 @@ QPDF::acroform()
584 592 return *m->acroform;
585 593 }
586 594  
  595 +inline QPDFEmbeddedFileDocumentHelper&
  596 +QPDF::embedded_files()
  597 +{
  598 + if (!m->embedded_files) {
  599 + m->embedded_files = std::make_unique<QPDFEmbeddedFileDocumentHelper>(*this);
  600 + }
  601 + return *m->embedded_files;
  602 +}
  603 +
  604 +inline QPDFOutlineDocumentHelper&
  605 +QPDF::outlines()
  606 +{
  607 + if (!m->outlines) {
  608 + m->outlines = std::make_unique<QPDFOutlineDocumentHelper>(*this);
  609 + }
  610 + return *m->outlines;
  611 +}
  612 +
  613 +inline QPDFPageDocumentHelper&
  614 +QPDF::pages()
  615 +{
  616 + if (!m->pages) {
  617 + m->pages = std::make_unique<QPDFPageDocumentHelper>(*this);
  618 + }
  619 + return *m->pages;
  620 +}
  621 +
  622 +inline QPDFPageLabelDocumentHelper&
  623 +QPDF::page_labels()
  624 +{
  625 + if (!m->page_labels) {
  626 + m->page_labels = std::make_unique<QPDFPageLabelDocumentHelper>(*this);
  627 + }
  628 + return *m->page_labels;
  629 +}
  630 +
587 631 #endif // QPDF_PRIVATE_HH
... ...
manual/release-notes.rst
... ... @@ -26,19 +26,29 @@ more detail.
26 26 - Library Enhancements
27 27  
28 28 - Add ``QPDFNameTreeObjectHelper`` and ``QPDFNumberTreeObjectHelper``
29   - constructor overloads that allow a function to ne passed to
  29 + constructor overloads that allow a function to be passed to
30 30 validate the values in the tree.
31 31  
32 32 - Add new ``QPDFNameTreeObjectHelper`` and ``QPDFNumberTreeObjectHelper``
33 33 ``validate`` method to validate and optionally repair the name/number
34 34 tree.
35 35  
  36 + - Add new ``get`` and ``validate`` methods to all DocumentHelper classes.
  37 + The ``get`` method retrieves a shared DocumentHelper, avoiding the the
  38 + overhead of repeatedly validating the underlying document structure
  39 + and/or building internal caches. If the underlying document structure
  40 + is directly modified (without the use of DocumentHelpers), the
  41 + ``validate`` methods revalidates the structure and resynchronizes any
  42 + internal caches.
  43 +
36 44 - CLI Enhancements
37 45  
38 46 - Disallow option :qpdf:ref:`--deterministic-id` to be used together
39 47 with the incompatible options :qpdf:ref:`--encrypt` or
40 48 :qpdf:ref:`--copy-encryption`.
41 49  
  50 + - Option :qpdf:ref:`--check` now includes additional basic checks of the
  51 + AcroForm, Dests, Outlines, and PageLabels structures.
42 52  
43 53 - Other enhancements
44 54  
... ...
qpdf/qtest/outlines.test
... ... @@ -21,7 +21,7 @@ my @outline_files = (
21 21 'outlines-with-old-root-dests-dict',
22 22 'outlines-with-loop',
23 23 );
24   -my $n_tests = scalar(@outline_files);
  24 +my $n_tests = scalar(@outline_files) + 1;
25 25 foreach my $f (@outline_files)
26 26 {
27 27 $td->runtest("outlines: $f",
... ... @@ -30,5 +30,10 @@ foreach my $f (@outline_files)
30 30 $td->NORMALIZE_NEWLINES);
31 31 }
32 32  
  33 +$td->runtest("outlines: outlines-with-loop --check",
  34 + {$td->COMMAND => "qpdf --check outlines-with-loop.pdf"},
  35 + {$td->FILE => "outlines-with-loop-check.out", $td->EXIT_STATUS => 3},
  36 + $td->NORMALIZE_NEWLINES);
  37 +
33 38 cleanup();
34 39 $td->report($n_tests);
... ...
qpdf/qtest/page-labels.test
... ... @@ -14,7 +14,7 @@ cleanup();
14 14  
15 15 my $td = new TestDriver('page-labels');
16 16  
17   -my $n_tests = 4;
  17 +my $n_tests = 5;
18 18  
19 19 $td->runtest("complex page labels",
20 20 {$td->COMMAND => "test_driver 47 page-labels-num-tree.pdf"},
... ... @@ -38,6 +38,11 @@ $td-&gt;runtest(&quot;damaged page labels&quot;,
38 38 {$td->FILE => "page-labels-num-tree-damaged.out", $td->EXIT_STATUS => 0},
39 39 $td->NORMALIZE_NEWLINES);
40 40  
  41 +$td->runtest("damaged page labels --check",
  42 + {$td->COMMAND => "qpdf --check page-labels-num-tree-damaged.pdf"},
  43 + {$td->FILE => "page-labels-num-tree-damaged-check.out", $td->EXIT_STATUS => 3},
  44 + $td->NORMALIZE_NEWLINES);
  45 +
41 46 # --set-page-labels
42 47 my @errors = (
43 48 ["quack", ".*page label spec must be.*"],
... ...
qpdf/qtest/qpdf/outlines-with-loop-check.out 0 → 100644
  1 +checking outlines-with-loop.pdf
  2 +PDF Version: 1.3
  3 +File is not encrypted
  4 +File is not linearized
  5 +WARNING: outlines-with-loop.pdf, object 4 0 at offset 637: Loop detected loop in /Outlines tree
  6 +WARNING: outlines-with-loop.pdf, object 5 0 at offset 855: Loop detected loop in /Outlines tree
  7 +qpdf: operation succeeded with warnings
... ...
qpdf/qtest/qpdf/outlines-with-loop.out
  1 +WARNING: outlines-with-loop.pdf, object 4 0 at offset 637: Loop detected loop in /Outlines tree
  2 +WARNING: outlines-with-loop.pdf, object 5 0 at offset 855: Loop detected loop in /Outlines tree
1 3 page 5: Potato 1 -> 5: /XYZ null null null -> [ 11 0 R /XYZ null null null ]
2 4 page 5: Potato 1 -> 5: /XYZ null null null -> [ 11 0 R /XYZ null null null ]
3 5 page 11: Mern 1.1 -> 11: /Fit -> [ 17 0 R /Fit ]
... ...
qpdf/qtest/qpdf/page-labels-num-tree-damaged-check.out 0 → 100644
  1 +checking page-labels-num-tree-damaged.pdf
  2 +PDF Version: 1.3
  3 +File is not encrypted
  4 +File is not linearized
  5 +WARNING: page-labels-num-tree-damaged.pdf (Name/Number tree node (object 2)): attempting to repair after error: page-labels-num-tree-damaged.pdf (Name/Number tree node (object 2)): keys are not sorted in validate
  6 +WARNING: page-labels-num-tree-damaged.pdf (Name/Number tree node (object 37)): item 1 is invalid
  7 +qpdf: operation succeeded with warnings
... ...
qpdf/test_driver.cc
... ... @@ -1802,7 +1802,7 @@ static void
1802 1802 test_47(QPDF& pdf, char const* arg2)
1803 1803 {
1804 1804 // Test page labels.
1805   - QPDFPageLabelDocumentHelper pldh(pdf);
  1805 + auto& pldh = QPDFPageLabelDocumentHelper::get(pdf);
1806 1806 long long npages = pdf.getRoot().getKey("/Pages").getKey("/Count").getIntValue();
1807 1807 std::vector<QPDFObjectHandle> labels;
1808 1808 pldh.getLabelsForPageRange(0, npages - 1, 1, labels);
... ... @@ -2624,7 +2624,7 @@ test_76(QPDF&amp; pdf, char const* arg2)
2624 2624 {
2625 2625 // Embedded files. arg2 is a file to attach. Hard-code the
2626 2626 // mime type and file name for test purposes.
2627   - QPDFEmbeddedFileDocumentHelper efdh(pdf);
  2627 + auto &efdh = QPDFEmbeddedFileDocumentHelper::get(pdf);
2628 2628 auto fs1 = QPDFFileSpecObjectHelper::createFileSpec(pdf, "att1.txt", arg2);
2629 2629 fs1.setDescription("some text");
2630 2630 auto efs1 = QPDFEFStreamObjectHelper(fs1.getEmbeddedFileStream());
... ...