Commit f38df27aa3eae905e3ee90365099335e317173d8
1 parent
077d3d45
Add QPDFNumberTreeObjectHelper
Showing
8 changed files
with
464 additions
and
0 deletions
ChangeLog
| 1 | 1 | 2018-12-18 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | |
| 3 | + * Add QPDFNumberTreeObjectHelper class. This class provides useful | |
| 4 | + methods for dealing with number trees, which are discussed in | |
| 5 | + section 7.9.7 of the PDF spec (ISO-32000). Page label dictionaries | |
| 6 | + are represented as number trees. | |
| 7 | + | |
| 3 | 8 | * New method QPDFObjectHandle::wrapInArray returns the object |
| 4 | 9 | itself if it is an array. Otherwise, it returns an array |
| 5 | 10 | containing the object. This is useful for dealing with PDF data | ... | ... |
include/qpdf/QPDFNumberTreeObjectHelper.hh
0 → 100644
| 1 | +// Copyright (c) 2005-2018 Jay Berkenbilt | |
| 2 | +// | |
| 3 | +// This file is part of qpdf. | |
| 4 | +// | |
| 5 | +// Licensed under the Apache License, Version 2.0 (the "License"); | |
| 6 | +// you may not use this file except in compliance with the License. | |
| 7 | +// You may obtain a copy of the License at | |
| 8 | +// | |
| 9 | +// http://www.apache.org/licenses/LICENSE-2.0 | |
| 10 | +// | |
| 11 | +// Unless required by applicable law or agreed to in writing, software | |
| 12 | +// distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 | +// See the License for the specific language governing permissions and | |
| 15 | +// limitations under the License. | |
| 16 | +// | |
| 17 | +// Versions of qpdf prior to version 7 were released under the terms | |
| 18 | +// of version 2.0 of the Artistic License. At your option, you may | |
| 19 | +// continue to consider qpdf to be licensed under those terms. Please | |
| 20 | +// see the manual for additional information. | |
| 21 | + | |
| 22 | +#ifndef QPDFNUMBERTREEOBJECTHELPER_HH | |
| 23 | +#define QPDFNUMBERTREEOBJECTHELPER_HH | |
| 24 | + | |
| 25 | +#include <qpdf/QPDFObjectHelper.hh> | |
| 26 | +#include <qpdf/QPDFObjGen.hh> | |
| 27 | +#include <functional> | |
| 28 | +#include <map> | |
| 29 | + | |
| 30 | +#include <qpdf/DLL.h> | |
| 31 | + | |
| 32 | +// This is an object helper for number trees. See section 7.9.7 in the | |
| 33 | +// PDF spec (ISO 32000) for a description of number trees. This | |
| 34 | +// implementation disregards stated limits and sequencing and simply | |
| 35 | +// builds a map from numerical index to object. If the array of | |
| 36 | +// numbers does not contain a numerical value where expected, this | |
| 37 | +// implementation silently skips forward until it finds a number. | |
| 38 | + | |
| 39 | +class QPDFNumberTreeObjectHelper: public QPDFObjectHelper | |
| 40 | +{ | |
| 41 | + public: | |
| 42 | + QPDF_DLL | |
| 43 | + QPDFNumberTreeObjectHelper(QPDFObjectHandle); | |
| 44 | + | |
| 45 | + typedef long long int numtree_number; | |
| 46 | + | |
| 47 | + // Return overall minimum and maximum indices | |
| 48 | + QPDF_DLL | |
| 49 | + numtree_number getMin(); | |
| 50 | + QPDF_DLL | |
| 51 | + numtree_number getMax(); | |
| 52 | + | |
| 53 | + // Return whether the number tree has an explicit entry for this | |
| 54 | + // number. | |
| 55 | + QPDF_DLL | |
| 56 | + bool hasIndex(numtree_number idx); | |
| 57 | + | |
| 58 | + // Find an object with a specific index. If found, returns true | |
| 59 | + // and initializes oh. | |
| 60 | + QPDF_DLL | |
| 61 | + bool findObject(numtree_number idx, QPDFObjectHandle& oh); | |
| 62 | + // Find the object at the index or, if not found, the object whose | |
| 63 | + // index is the highest index less than the requested index. If | |
| 64 | + // the requested index is less than the minimum, return false. | |
| 65 | + // Otherwise, return true, initialize oh to the object, and set | |
| 66 | + // offset to the difference between the requested index and the | |
| 67 | + // actual index. For example, if a number tree has values for 3 | |
| 68 | + // and 6 and idx is 5, this method would return true, initialize | |
| 69 | + // oh to the value with index 3, and set offset to 2 (5 - 3). | |
| 70 | + QPDF_DLL | |
| 71 | + bool findObjectAtOrBelow(numtree_number idx, QPDFObjectHandle& oh, | |
| 72 | + numtree_number& offset); | |
| 73 | + | |
| 74 | + typedef std::map<numtree_number, QPDFObjectHandle> idx_map; | |
| 75 | + QPDF_DLL | |
| 76 | + idx_map getAsMap() const; | |
| 77 | + | |
| 78 | + private: | |
| 79 | + class Members | |
| 80 | + { | |
| 81 | + friend class QPDFNumberTreeObjectHelper; | |
| 82 | + typedef QPDFNumberTreeObjectHelper::numtree_number numtree_number; | |
| 83 | + | |
| 84 | + public: | |
| 85 | + QPDF_DLL | |
| 86 | + ~Members(); | |
| 87 | + | |
| 88 | + private: | |
| 89 | + Members(); | |
| 90 | + Members(Members const&); | |
| 91 | + | |
| 92 | + // Use a reverse sorted map so we can use the lower_bound | |
| 93 | + // method for searching. lower_bound returns smallest entry | |
| 94 | + // not before the searched entry, meaning that the searched | |
| 95 | + // entry is the lower bound. There's also an upper_bound | |
| 96 | + // method, but it does not do what you'd think it should. | |
| 97 | + // lower_bound implements >=, and upper_bound implements >. | |
| 98 | + typedef std::map<numtree_number, | |
| 99 | + QPDFObjectHandle, | |
| 100 | + std::greater<numtree_number> > idx_map; | |
| 101 | + idx_map entries; | |
| 102 | + std::set<QPDFObjGen> seen; | |
| 103 | + }; | |
| 104 | + | |
| 105 | + void updateMap(QPDFObjectHandle oh); | |
| 106 | + | |
| 107 | + PointerHolder<Members> m; | |
| 108 | +}; | |
| 109 | + | |
| 110 | +#endif // QPDFNUMBERTREEOBJECTHELPER_HH | ... | ... |
libqpdf/QPDFNumberTreeObjectHelper.cc
0 → 100644
| 1 | +#include <qpdf/QPDFNumberTreeObjectHelper.hh> | |
| 2 | +#include <qpdf/QTC.hh> | |
| 3 | + | |
| 4 | +QPDFNumberTreeObjectHelper::Members::~Members() | |
| 5 | +{ | |
| 6 | +} | |
| 7 | + | |
| 8 | +QPDFNumberTreeObjectHelper::Members::Members() | |
| 9 | +{ | |
| 10 | +} | |
| 11 | + | |
| 12 | +QPDFNumberTreeObjectHelper::QPDFNumberTreeObjectHelper(QPDFObjectHandle oh) : | |
| 13 | + QPDFObjectHelper(oh), | |
| 14 | + m(new Members()) | |
| 15 | +{ | |
| 16 | + updateMap(oh); | |
| 17 | +} | |
| 18 | + | |
| 19 | +void | |
| 20 | +QPDFNumberTreeObjectHelper::updateMap(QPDFObjectHandle oh) | |
| 21 | +{ | |
| 22 | + if (this->m->seen.count(oh.getObjGen())) | |
| 23 | + { | |
| 24 | + return; | |
| 25 | + } | |
| 26 | + this->m->seen.insert(oh.getObjGen()); | |
| 27 | + QPDFObjectHandle nums = oh.getKey("/Nums"); | |
| 28 | + if (nums.isArray()) | |
| 29 | + { | |
| 30 | + size_t nitems = nums.getArrayNItems(); | |
| 31 | + size_t i = 0; | |
| 32 | + while (i < nitems - 1) | |
| 33 | + { | |
| 34 | + QPDFObjectHandle num = nums.getArrayItem(i); | |
| 35 | + if (num.isInteger()) | |
| 36 | + { | |
| 37 | + ++i; | |
| 38 | + QPDFObjectHandle obj = nums.getArrayItem(i); | |
| 39 | + this->m->entries[num.getIntValue()] = obj; | |
| 40 | + } | |
| 41 | + ++i; | |
| 42 | + } | |
| 43 | + } | |
| 44 | + QPDFObjectHandle kids = oh.getKey("/Kids"); | |
| 45 | + if (kids.isArray()) | |
| 46 | + { | |
| 47 | + size_t nitems = kids.getArrayNItems(); | |
| 48 | + for (size_t i = 0; i < nitems; ++i) | |
| 49 | + { | |
| 50 | + updateMap(kids.getArrayItem(i)); | |
| 51 | + } | |
| 52 | + } | |
| 53 | +} | |
| 54 | + | |
| 55 | + | |
| 56 | +QPDFNumberTreeObjectHelper::numtree_number | |
| 57 | +QPDFNumberTreeObjectHelper::getMin() | |
| 58 | +{ | |
| 59 | + if (this->m->entries.empty()) | |
| 60 | + { | |
| 61 | + return 0; | |
| 62 | + } | |
| 63 | + // Our map is sorted in reverse. | |
| 64 | + return this->m->entries.rbegin()->first; | |
| 65 | +} | |
| 66 | + | |
| 67 | +QPDFNumberTreeObjectHelper::numtree_number | |
| 68 | +QPDFNumberTreeObjectHelper::getMax() | |
| 69 | +{ | |
| 70 | + if (this->m->entries.empty()) | |
| 71 | + { | |
| 72 | + return 0; | |
| 73 | + } | |
| 74 | + // Our map is sorted in reverse. | |
| 75 | + return this->m->entries.begin()->first; | |
| 76 | +} | |
| 77 | + | |
| 78 | +bool | |
| 79 | +QPDFNumberTreeObjectHelper::hasIndex(numtree_number idx) | |
| 80 | +{ | |
| 81 | + return this->m->entries.count(idx) != 0; | |
| 82 | +} | |
| 83 | + | |
| 84 | +bool | |
| 85 | +QPDFNumberTreeObjectHelper::findObject( | |
| 86 | + numtree_number idx, QPDFObjectHandle& oh) | |
| 87 | +{ | |
| 88 | + Members::idx_map::iterator i = this->m->entries.find(idx); | |
| 89 | + if (i == this->m->entries.end()) | |
| 90 | + { | |
| 91 | + return false; | |
| 92 | + } | |
| 93 | + oh = (*i).second; | |
| 94 | + return true; | |
| 95 | +} | |
| 96 | + | |
| 97 | +bool | |
| 98 | +QPDFNumberTreeObjectHelper::findObjectAtOrBelow( | |
| 99 | + numtree_number idx, QPDFObjectHandle& oh, | |
| 100 | + numtree_number& offset) | |
| 101 | +{ | |
| 102 | + Members::idx_map::iterator i = this->m->entries.lower_bound(idx); | |
| 103 | + if (i == this->m->entries.end()) | |
| 104 | + { | |
| 105 | + return false; | |
| 106 | + } | |
| 107 | + oh = (*i).second; | |
| 108 | + offset = idx - (*i).first; | |
| 109 | + return true; | |
| 110 | +} | |
| 111 | + | |
| 112 | +std::map<QPDFNumberTreeObjectHelper::numtree_number, QPDFObjectHandle> | |
| 113 | +QPDFNumberTreeObjectHelper::getAsMap() const | |
| 114 | +{ | |
| 115 | + std::map<numtree_number, QPDFObjectHandle> result; | |
| 116 | + for (Members::idx_map::const_iterator iter = this->m->entries.begin(); | |
| 117 | + iter != this->m->entries.end(); ++iter) | |
| 118 | + { | |
| 119 | + result[(*iter).first] = (*iter).second; | |
| 120 | + } | |
| 121 | + return result; | |
| 122 | +} | ... | ... |
libqpdf/build.mk
| ... | ... | @@ -40,6 +40,7 @@ SRCS_libqpdf = \ |
| 40 | 40 | libqpdf/QPDFAnnotationObjectHelper.cc \ |
| 41 | 41 | libqpdf/QPDFExc.cc \ |
| 42 | 42 | libqpdf/QPDFFormFieldObjectHelper.cc \ |
| 43 | + libqpdf/QPDFNumberTreeObjectHelper.cc \ | |
| 43 | 44 | libqpdf/QPDFObjGen.cc \ |
| 44 | 45 | libqpdf/QPDFObject.cc \ |
| 45 | 46 | libqpdf/QPDFObjectHandle.cc \ | ... | ... |
qpdf/qtest/qpdf.test
| ... | ... | @@ -226,6 +226,16 @@ foreach my $input (@ext_inputs) |
| 226 | 226 | } |
| 227 | 227 | show_ntests(); |
| 228 | 228 | # ---------- |
| 229 | +$td->notify("--- Number Trees ---"); | |
| 230 | +$n_tests += 1; | |
| 231 | + | |
| 232 | +$td->runtest("number trees", | |
| 233 | + {$td->COMMAND => "test_driver 46 number-tree.pdf"}, | |
| 234 | + {$td->FILE => "number-tree.out", $td->EXIT_STATUS => 0}, | |
| 235 | + $td->NORMALIZE_NEWLINES); | |
| 236 | + | |
| 237 | +show_ntests(); | |
| 238 | +# ---------- | |
| 229 | 239 | $td->notify("--- Page API Tests ---"); |
| 230 | 240 | $n_tests += 9; |
| 231 | 241 | ... | ... |
qpdf/qtest/qpdf/number-tree.out
0 → 100644
qpdf/qtest/qpdf/number-tree.pdf
0 → 100644
| 1 | +%PDF-1.3 | |
| 2 | +%¿÷¢þ | |
| 3 | +%QDF-1.0 | |
| 4 | + | |
| 5 | +1 0 obj | |
| 6 | +<< | |
| 7 | + /Pages 2 0 R | |
| 8 | + /Type /Catalog | |
| 9 | +>> | |
| 10 | +endobj | |
| 11 | + | |
| 12 | +2 0 obj | |
| 13 | +<< | |
| 14 | + /Count 1 | |
| 15 | + /Kids [ | |
| 16 | + 3 0 R | |
| 17 | + ] | |
| 18 | + /Type /Pages | |
| 19 | +>> | |
| 20 | +endobj | |
| 21 | + | |
| 22 | +%% Page 1 | |
| 23 | +3 0 obj | |
| 24 | +<< | |
| 25 | + /Contents 4 0 R | |
| 26 | + /MediaBox [ | |
| 27 | + 0 | |
| 28 | + 0 | |
| 29 | + 612 | |
| 30 | + 792 | |
| 31 | + ] | |
| 32 | + /Parent 2 0 R | |
| 33 | + /Resources << | |
| 34 | + /Font << | |
| 35 | + /F1 6 0 R | |
| 36 | + >> | |
| 37 | + /ProcSet 7 0 R | |
| 38 | + >> | |
| 39 | + /Type /Page | |
| 40 | +>> | |
| 41 | +endobj | |
| 42 | + | |
| 43 | +%% Contents for page 1 | |
| 44 | +4 0 obj | |
| 45 | +<< | |
| 46 | + /Length 5 0 R | |
| 47 | +>> | |
| 48 | +stream | |
| 49 | +BT | |
| 50 | + /F1 24 Tf | |
| 51 | + 72 720 Td | |
| 52 | + (Potato) Tj | |
| 53 | +ET | |
| 54 | +endstream | |
| 55 | +endobj | |
| 56 | + | |
| 57 | +5 0 obj | |
| 58 | +44 | |
| 59 | +endobj | |
| 60 | + | |
| 61 | +6 0 obj | |
| 62 | +<< | |
| 63 | + /BaseFont /Helvetica | |
| 64 | + /Encoding /WinAnsiEncoding | |
| 65 | + /Name /F1 | |
| 66 | + /Subtype /Type1 | |
| 67 | + /Type /Font | |
| 68 | +>> | |
| 69 | +endobj | |
| 70 | + | |
| 71 | +7 0 obj | |
| 72 | +[ | |
| 73 | ||
| 74 | + /Text | |
| 75 | +] | |
| 76 | +endobj | |
| 77 | + | |
| 78 | +8 0 obj | |
| 79 | +<< | |
| 80 | + /Kids [ | |
| 81 | + 9 0 R | |
| 82 | + 10 0 R | |
| 83 | + ] | |
| 84 | +>> | |
| 85 | +endobj | |
| 86 | + | |
| 87 | +9 0 obj | |
| 88 | +<< | |
| 89 | + /Kids [ | |
| 90 | + 11 0 R | |
| 91 | + 12 0 R | |
| 92 | + ] | |
| 93 | + /Limits [ | |
| 94 | + 0 | |
| 95 | + 19 | |
| 96 | + ] | |
| 97 | +>> | |
| 98 | +endobj | |
| 99 | + | |
| 100 | +10 0 obj | |
| 101 | +<< | |
| 102 | + /Limits [ | |
| 103 | + 20 | |
| 104 | + 29 | |
| 105 | + ] | |
| 106 | + /Nums [ | |
| 107 | + 20 (twenty) | |
| 108 | + 22 (twenty-two) | |
| 109 | + 23 (twenty-three) | |
| 110 | + 29 (twenty-nine) | |
| 111 | + ] | |
| 112 | +>> | |
| 113 | +endobj | |
| 114 | + | |
| 115 | +11 0 obj | |
| 116 | +<< | |
| 117 | + /Limits [ | |
| 118 | + 0 | |
| 119 | + 9 | |
| 120 | + ] | |
| 121 | + /Nums [ | |
| 122 | + 1 (one) | |
| 123 | + 2 (two) | |
| 124 | + 3 (three) | |
| 125 | + 5 (five) | |
| 126 | + 6 (six) | |
| 127 | + 9 (nine) | |
| 128 | + ] | |
| 129 | +>> | |
| 130 | +endobj | |
| 131 | + | |
| 132 | +12 0 obj | |
| 133 | +<< | |
| 134 | + /Limits [ | |
| 135 | + 11 | |
| 136 | + 19 | |
| 137 | + ] | |
| 138 | + /Nums [ | |
| 139 | + 11 (elephant) | |
| 140 | + 12 (twelve) | |
| 141 | + 15 (fifteen) | |
| 142 | + 19 (nineteen) | |
| 143 | + ] | |
| 144 | +>> | |
| 145 | +endobj | |
| 146 | + | |
| 147 | + | |
| 148 | +xref | |
| 149 | +0 13 | |
| 150 | +0000000000 65535 f | |
| 151 | +0000000025 00000 n | |
| 152 | +0000000079 00000 n | |
| 153 | +0000000161 00000 n | |
| 154 | +0000000376 00000 n | |
| 155 | +0000000475 00000 n | |
| 156 | +0000000494 00000 n | |
| 157 | +0000000612 00000 n | |
| 158 | +0000000647 00000 n | |
| 159 | +0000000704 00000 n | |
| 160 | +0000000791 00000 n | |
| 161 | +0000000937 00000 n | |
| 162 | +0000001078 00000 n | |
| 163 | +trailer << | |
| 164 | + /Root 1 0 R | |
| 165 | + /QTest 8 0 R | |
| 166 | + /Size 13 | |
| 167 | + /ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>] | |
| 168 | +>> | |
| 169 | +startxref | |
| 170 | +1215 | |
| 171 | +%%EOF | ... | ... |
qpdf/test_driver.cc
| ... | ... | @@ -6,6 +6,7 @@ |
| 6 | 6 | #include <qpdf/QPDFPageDocumentHelper.hh> |
| 7 | 7 | #include <qpdf/QPDFPageObjectHelper.hh> |
| 8 | 8 | #include <qpdf/QPDFAcroFormDocumentHelper.hh> |
| 9 | +#include <qpdf/QPDFNumberTreeObjectHelper.hh> | |
| 9 | 10 | #include <qpdf/QUtil.hh> |
| 10 | 11 | #include <qpdf/QTC.hh> |
| 11 | 12 | #include <qpdf/Pl_StdioFile.hh> |
| ... | ... | @@ -1660,6 +1661,35 @@ void runtest(int n, char const* filename1, char const* arg2) |
| 1660 | 1661 | exit(3); |
| 1661 | 1662 | } |
| 1662 | 1663 | } |
| 1664 | + else if (n == 46) | |
| 1665 | + { | |
| 1666 | + // Test number tree. This test is crafted to work with | |
| 1667 | + // number-tree.pdf | |
| 1668 | + QPDFObjectHandle qtest = pdf.getTrailer().getKey("/QTest"); | |
| 1669 | + QPDFNumberTreeObjectHelper ntoh(qtest); | |
| 1670 | + QPDFNumberTreeObjectHelper::idx_map ntoh_map = ntoh.getAsMap(); | |
| 1671 | + for (QPDFNumberTreeObjectHelper::idx_map::iterator iter = | |
| 1672 | + ntoh_map.begin(); | |
| 1673 | + iter != ntoh_map.end(); ++iter) | |
| 1674 | + { | |
| 1675 | + std::cout << (*iter).first << " " | |
| 1676 | + << (*iter).second.getStringValue() | |
| 1677 | + << std::endl; | |
| 1678 | + } | |
| 1679 | + assert(1 == ntoh.getMin()); | |
| 1680 | + assert(29 == ntoh.getMax()); | |
| 1681 | + assert(ntoh.hasIndex(6)); | |
| 1682 | + assert(! ntoh.hasIndex(500)); | |
| 1683 | + QPDFObjectHandle oh; | |
| 1684 | + assert(! ntoh.findObject(4, oh)); | |
| 1685 | + assert(ntoh.findObject(3, oh)); | |
| 1686 | + assert("three" == oh.getStringValue()); | |
| 1687 | + QPDFNumberTreeObjectHelper::numtree_number offset = 0; | |
| 1688 | + assert(! ntoh.findObjectAtOrBelow(0, oh, offset)); | |
| 1689 | + assert(ntoh.findObjectAtOrBelow(8, oh, offset)); | |
| 1690 | + assert("six" == oh.getStringValue()); | |
| 1691 | + assert(2 == offset); | |
| 1692 | + } | |
| 1663 | 1693 | else |
| 1664 | 1694 | { |
| 1665 | 1695 | throw std::runtime_error(std::string("invalid test ") + | ... | ... |