Commit 4a1cce0a470e6deed6dbeb6093e4e5c16f53439d

Authored by Jay Berkenbilt
1 parent 9ad6cfd4

Reimplement name and number tree object helpers

Create a computationally and memory efficient implementation of name
and number trees that does binary searches as intended by the data
structure rather than loading into a map, which can use a great deal
of memory and can be very slow.
include/qpdf/QPDFNameTreeObjectHelper.hh
@@ -25,17 +25,16 @@ @@ -25,17 +25,16 @@
25 #include <qpdf/QPDFObjectHelper.hh> 25 #include <qpdf/QPDFObjectHelper.hh>
26 #include <qpdf/QPDFObjGen.hh> 26 #include <qpdf/QPDFObjGen.hh>
27 #include <map> 27 #include <map>
  28 +#include <memory>
28 29
29 #include <qpdf/DLL.h> 30 #include <qpdf/DLL.h>
30 31
31 // This is an object helper for name trees. See section 7.9.6 in the 32 // This is an object helper for name trees. See section 7.9.6 in the
32 -// PDF spec (ISO 32000) for a description of name trees. This  
33 -// implementation disregards stated limits and sequencing and simply  
34 -// builds a map from string object. If the array of values does not  
35 -// contain a string where expected, this implementation silently skips  
36 -// forward until it finds a string. When looking up items in the name  
37 -// tree, use UTF-8 strings. All names are normalized for lookup  
38 -// purposes. 33 +// PDF spec (ISO 32000) for a description of name trees. When looking
  34 +// up items in the name tree, use UTF-8 strings. All names are
  35 +// normalized for lookup purposes.
  36 +
  37 +class NNTreeImpl;
39 38
40 class QPDFNameTreeObjectHelper: public QPDFObjectHelper 39 class QPDFNameTreeObjectHelper: public QPDFObjectHelper
41 { 40 {
@@ -55,6 +54,9 @@ class QPDFNameTreeObjectHelper: public QPDFObjectHelper @@ -55,6 +54,9 @@ class QPDFNameTreeObjectHelper: public QPDFObjectHelper
55 QPDF_DLL 54 QPDF_DLL
56 bool findObject(std::string const& utf8, QPDFObjectHandle& oh); 55 bool findObject(std::string const& utf8, QPDFObjectHandle& oh);
57 56
  57 + // Return the contents of the name tree as a map. Note that name
  58 + // trees may be very large, so this may use a lot of RAM. It is
  59 + // more efficient to use QPDFNameTreeObjectHelper's iterator.
58 QPDF_DLL 60 QPDF_DLL
59 std::map<std::string, QPDFObjectHandle> getAsMap() const; 61 std::map<std::string, QPDFObjectHandle> getAsMap() const;
60 62
@@ -68,15 +70,12 @@ class QPDFNameTreeObjectHelper: public QPDFObjectHelper @@ -68,15 +70,12 @@ class QPDFNameTreeObjectHelper: public QPDFObjectHelper
68 ~Members(); 70 ~Members();
69 71
70 private: 72 private:
71 - Members();  
72 - Members(Members const&); 73 + Members(QPDFObjectHandle& oh);
  74 + Members(Members const&) = delete;
73 75
74 - std::map<std::string, QPDFObjectHandle> entries;  
75 - std::set<QPDFObjGen> seen; 76 + std::shared_ptr<NNTreeImpl> impl;
76 }; 77 };
77 78
78 - void updateMap(QPDFObjectHandle oh);  
79 -  
80 PointerHolder<Members> m; 79 PointerHolder<Members> m;
81 }; 80 };
82 81
include/qpdf/QPDFNumberTreeObjectHelper.hh
@@ -24,17 +24,15 @@ @@ -24,17 +24,15 @@
24 24
25 #include <qpdf/QPDFObjectHelper.hh> 25 #include <qpdf/QPDFObjectHelper.hh>
26 #include <qpdf/QPDFObjGen.hh> 26 #include <qpdf/QPDFObjGen.hh>
27 -#include <functional>  
28 #include <map> 27 #include <map>
  28 +#include <memory>
29 29
30 #include <qpdf/DLL.h> 30 #include <qpdf/DLL.h>
31 31
32 // This is an object helper for number trees. See section 7.9.7 in the 32 // This is an object helper for number trees. See section 7.9.7 in the
33 -// PDF spec (ISO 32000) for a description of number trees. This  
34 -// implementation disregards stated limits and sequencing and simply  
35 -// builds a map from numerical index to object. If the array of  
36 -// numbers does not contain a numerical value where expected, this  
37 -// implementation silently skips forward until it finds a number. 33 +// PDF spec (ISO 32000) for a description of number trees.
  34 +
  35 +class NNTreeImpl;
38 36
39 class QPDFNumberTreeObjectHelper: public QPDFObjectHelper 37 class QPDFNumberTreeObjectHelper: public QPDFObjectHelper
40 { 38 {
@@ -75,6 +73,10 @@ class QPDFNumberTreeObjectHelper: public QPDFObjectHelper @@ -75,6 +73,10 @@ class QPDFNumberTreeObjectHelper: public QPDFObjectHelper
75 bool findObjectAtOrBelow(numtree_number idx, QPDFObjectHandle& oh, 73 bool findObjectAtOrBelow(numtree_number idx, QPDFObjectHandle& oh,
76 numtree_number& offset); 74 numtree_number& offset);
77 75
  76 + // Return the contents of the number tree as a map. Note that
  77 + // number trees may be very large, so this may use a lot of RAM.
  78 + // It is more efficient to use QPDFNumberTreeObjectHelper's
  79 + // iterator.
78 typedef std::map<numtree_number, QPDFObjectHandle> idx_map; 80 typedef std::map<numtree_number, QPDFObjectHandle> idx_map;
79 QPDF_DLL 81 QPDF_DLL
80 idx_map getAsMap() const; 82 idx_map getAsMap() const;
@@ -90,23 +92,11 @@ class QPDFNumberTreeObjectHelper: public QPDFObjectHelper @@ -90,23 +92,11 @@ class QPDFNumberTreeObjectHelper: public QPDFObjectHelper
90 ~Members(); 92 ~Members();
91 93
92 private: 94 private:
93 - Members();  
94 - Members(Members const&);  
95 -  
96 - // Use a reverse sorted map so we can use the lower_bound  
97 - // method for searching. lower_bound returns smallest entry  
98 - // not before the searched entry, meaning that the searched  
99 - // entry is the lower bound. There's also an upper_bound  
100 - // method, but it does not do what you'd think it should.  
101 - // lower_bound implements >=, and upper_bound implements >.  
102 - typedef std::map<numtree_number,  
103 - QPDFObjectHandle,  
104 - std::greater<numtree_number> > idx_map;  
105 - idx_map entries;  
106 - std::set<QPDFObjGen> seen;  
107 - }; 95 + Members(QPDFObjectHandle& oh);
  96 + Members(Members const&) = delete;
108 97
109 - void updateMap(QPDFObjectHandle oh); 98 + std::shared_ptr<NNTreeImpl> impl;
  99 + };
110 100
111 PointerHolder<Members> m; 101 PointerHolder<Members> m;
112 }; 102 };
libqpdf/NNTree.cc 0 → 100644
  1 +#include <qpdf/NNTree.hh>
  2 +#include <qpdf/QUtil.hh>
  3 +
  4 +#include <exception>
  5 +
  6 +NNTreeIterator::PathElement::PathElement(
  7 + QPDFObjectHandle const& node, int kid_number) :
  8 + node(node),
  9 + kid_number(kid_number)
  10 +{
  11 +}
  12 +
  13 +QPDFObjectHandle
  14 +NNTreeIterator::PathElement::getNextKid(bool backward)
  15 +{
  16 + kid_number += backward ? -1 : 1;
  17 + auto kids = node.getKey("/Kids");
  18 + QPDFObjectHandle result;
  19 + if ((kid_number >= 0) && (kid_number < kids.getArrayNItems()))
  20 + {
  21 + result = kids.getArrayItem(kid_number);
  22 + }
  23 + else
  24 + {
  25 + result = QPDFObjectHandle::newNull();
  26 + }
  27 + return result;
  28 +}
  29 +
  30 +bool
  31 +NNTreeIterator::valid() const
  32 +{
  33 + return this->item_number >= 0;
  34 +}
  35 +
  36 +void
  37 +NNTreeIterator::increment(bool backward)
  38 +{
  39 + if (this->item_number < 0)
  40 + {
  41 + throw std::logic_error(
  42 + "attempt made to increment or decrement an invalid"
  43 + " name/number tree iterator");
  44 + }
  45 + this->item_number += backward ? -2 : 2;
  46 + auto items = this->node.getKey(details.itemsKey());
  47 + if ((this->item_number < 0) ||
  48 + (this->item_number >= items.getArrayNItems()))
  49 + {
  50 + bool found = false;
  51 + setItemNumber(QPDFObjectHandle(), -1);
  52 + while (! (found || this->path.empty()))
  53 + {
  54 + auto& element = this->path.back();
  55 + auto node = element.getNextKid(backward);
  56 + if (node.isNull())
  57 + {
  58 + this->path.pop_back();
  59 + }
  60 + else
  61 + {
  62 + deepen(node, ! backward);
  63 + found = true;
  64 + }
  65 + }
  66 + }
  67 +}
  68 +
  69 +NNTreeIterator&
  70 +NNTreeIterator::operator++()
  71 +{
  72 + increment(false);
  73 + return *this;
  74 +}
  75 +
  76 +NNTreeIterator&
  77 +NNTreeIterator::operator--()
  78 +{
  79 + increment(true);
  80 + return *this;
  81 +}
  82 +
  83 +NNTreeIterator::reference
  84 +NNTreeIterator::operator*()
  85 +{
  86 + if (this->item_number < 0)
  87 + {
  88 + throw std::logic_error(
  89 + "attempt made to dereference an invalid"
  90 + " name/number tree iterator");
  91 + }
  92 + auto items = this->node.getKey(details.itemsKey());
  93 + return std::make_pair(items.getArrayItem(this->item_number),
  94 + items.getArrayItem(1+this->item_number));
  95 +}
  96 +
  97 +bool
  98 +NNTreeIterator::operator==(NNTreeIterator const& other) const
  99 +{
  100 + if ((this->item_number == -1) && (other.item_number == -1))
  101 + {
  102 + return true;
  103 + }
  104 + if (this->path.size() != other.path.size())
  105 + {
  106 + return false;
  107 + }
  108 + auto tpi = this->path.begin();
  109 + auto opi = other.path.begin();
  110 + while (tpi != this->path.end())
  111 + {
  112 + if ((*tpi).kid_number != (*opi).kid_number)
  113 + {
  114 + return false;
  115 + }
  116 + ++tpi;
  117 + ++opi;
  118 + }
  119 + if (this->item_number != other.item_number)
  120 + {
  121 + return false;
  122 + }
  123 + return true;
  124 +}
  125 +
  126 +void
  127 +NNTreeIterator::setItemNumber(QPDFObjectHandle const& node, int n)
  128 +{
  129 + this->node = node;
  130 + this->item_number = n;
  131 +}
  132 +
  133 +void
  134 +NNTreeIterator::addPathElement(QPDFObjectHandle const& node,
  135 + int kid_number)
  136 +{
  137 + this->path.push_back(PathElement(node, kid_number));
  138 +}
  139 +
  140 +void
  141 +NNTreeIterator::deepen(QPDFObjectHandle node, bool first)
  142 +{
  143 + std::set<QPDFObjGen> seen;
  144 + while (true)
  145 + {
  146 + if (node.isIndirect())
  147 + {
  148 + auto og = node.getObjGen();
  149 + if (seen.count(og))
  150 + {
  151 + throw std::runtime_error("loop detected");
  152 + }
  153 + seen.insert(og);
  154 + }
  155 + auto kids = node.getKey("/Kids");
  156 + int nkids = kids.isArray() ? kids.getArrayNItems() : 0;
  157 + auto items = node.getKey(details.itemsKey());
  158 + int nitems = items.isArray() ? items.getArrayNItems() : 0;
  159 + if (nitems > 0)
  160 + {
  161 + setItemNumber(node, first ? 0 : nitems - 2);
  162 + break;
  163 + }
  164 + else if (nkids > 0)
  165 + {
  166 + int kid_number = first ? 0 : nkids - 1;
  167 + addPathElement(node, kid_number);
  168 + node = kids.getArrayItem(kid_number);
  169 + }
  170 + else
  171 + {
  172 + throw std::runtime_error("node has neither /Kids nor /Names");
  173 + }
  174 + }
  175 +}
  176 +
  177 +NNTreeImpl::NNTreeImpl(NNTreeDetails const& details,
  178 + QPDF* qpdf,
  179 + QPDFObjectHandle& oh,
  180 + bool auto_repair) :
  181 + details(details),
  182 + oh(oh)
  183 +{
  184 +}
  185 +
  186 +NNTreeImpl::iterator
  187 +NNTreeImpl::begin()
  188 +{
  189 + iterator result(details);
  190 + result.deepen(this->oh, true);
  191 + return result;
  192 +}
  193 +
  194 +NNTreeImpl::iterator
  195 +NNTreeImpl::end()
  196 +{
  197 + return iterator(details);
  198 +}
  199 +
  200 +NNTreeImpl::iterator
  201 +NNTreeImpl::last()
  202 +{
  203 + iterator result(details);
  204 + result.deepen(this->oh, false);
  205 + return result;
  206 +}
  207 +
  208 +int
  209 +NNTreeImpl::withinLimits(QPDFObjectHandle key, QPDFObjectHandle node)
  210 +{
  211 + int result = 0;
  212 + auto limits = node.getKey("/Limits");
  213 + if (limits.isArray() && (limits.getArrayNItems() >= 2) &&
  214 + details.keyValid(limits.getArrayItem(0)) &&
  215 + details.keyValid(limits.getArrayItem(1)))
  216 + {
  217 + if (details.compareKeys(key, limits.getArrayItem(0)) < 0)
  218 + {
  219 + result = -1;
  220 + }
  221 + else if (details.compareKeys(key, limits.getArrayItem(1)) > 0)
  222 + {
  223 + result = 1;
  224 + }
  225 + }
  226 + else
  227 + {
  228 + // The root node has no limits, so consider the item to be in
  229 + // here if there are no limits. This will cause checking lower
  230 + // items.
  231 + }
  232 + return result;
  233 +}
  234 +
  235 +int
  236 +NNTreeImpl::binarySearch(
  237 + QPDFObjectHandle key, QPDFObjectHandle items,
  238 + int num_items, bool return_prev_if_not_found,
  239 + int (NNTreeImpl::*compare)(QPDFObjectHandle& key,
  240 + QPDFObjectHandle& node,
  241 + int item))
  242 +{
  243 + int max_idx = 1;
  244 + while (max_idx < num_items)
  245 + {
  246 + max_idx <<= 1;
  247 + }
  248 +
  249 + int step = max_idx / 2;
  250 + int checks = max_idx;
  251 + int idx = step;
  252 + int found_idx = -1;
  253 + bool found = false;
  254 + bool found_leq = false;
  255 + int status = 0;
  256 +
  257 + while ((! found) && (checks > 0))
  258 + {
  259 + if (idx < num_items)
  260 + {
  261 + status = (this->*compare)(key, items, idx);
  262 + if (status >= 0)
  263 + {
  264 + found_leq = true;
  265 + found_idx = idx;
  266 + }
  267 + }
  268 + else
  269 + {
  270 + // consider item to be below anything after the top
  271 + status = -1;
  272 + }
  273 +
  274 + if (status == 0)
  275 + {
  276 + found = true;
  277 + }
  278 + else
  279 + {
  280 + checks >>= 1;
  281 + if (checks > 0)
  282 + {
  283 + step >>= 1;
  284 + if (step == 0)
  285 + {
  286 + step = 1;
  287 + }
  288 +
  289 + if (status < 0)
  290 + {
  291 + idx -= step;
  292 + }
  293 + else
  294 + {
  295 + idx += step;
  296 + }
  297 + }
  298 + }
  299 + }
  300 +
  301 + if (found || (found_leq && return_prev_if_not_found))
  302 + {
  303 + return found_idx;
  304 + }
  305 + else
  306 + {
  307 + return -1;
  308 + }
  309 +}
  310 +
  311 +int
  312 +NNTreeImpl::compareKeyItem(
  313 + QPDFObjectHandle& key, QPDFObjectHandle& items, int idx)
  314 +{
  315 + if (! ((items.isArray() && (items.getArrayNItems() > (2 * idx)) &&
  316 + details.keyValid(items.getArrayItem(2 * idx)))))
  317 + {
  318 + throw std::runtime_error("item at index " +
  319 + QUtil::int_to_string(2 * idx) +
  320 + " is not the right type");
  321 + }
  322 + return details.compareKeys(key, items.getArrayItem(2 * idx));
  323 +}
  324 +
  325 +int
  326 +NNTreeImpl::compareKeyKid(QPDFObjectHandle& key, QPDFObjectHandle& kids, int idx)
  327 +{
  328 + if (! (kids.isArray() && (idx < kids.getArrayNItems()) &&
  329 + kids.getArrayItem(idx).isDictionary()))
  330 + {
  331 + throw std::runtime_error("invalid kid at index " +
  332 + QUtil::int_to_string(idx));
  333 + }
  334 + return withinLimits(key, kids.getArrayItem(idx));
  335 +}
  336 +
  337 +
  338 +NNTreeImpl::iterator
  339 +NNTreeImpl::find(QPDFObjectHandle key, bool return_prev_if_not_found)
  340 +{
  341 + auto first_item = begin();
  342 + auto last_item = end();
  343 + if (first_item.valid() &&
  344 + details.keyValid((*first_item).first) &&
  345 + details.compareKeys(key, (*first_item).first) < 0)
  346 + {
  347 + // Before the first key
  348 + return end();
  349 + }
  350 + else if (last_item.valid() &&
  351 + details.keyValid((*last_item).first) &&
  352 + details.compareKeys(key, (*last_item).first) > 0)
  353 + {
  354 + // After the last key
  355 + if (return_prev_if_not_found)
  356 + {
  357 + return last_item;
  358 + }
  359 + else
  360 + {
  361 + return end();
  362 + }
  363 + }
  364 +
  365 + std::set<QPDFObjGen> seen;
  366 + auto node = this->oh;
  367 + iterator result(details);
  368 +
  369 + while (true)
  370 + {
  371 + auto og = node.getObjGen();
  372 + if (seen.count(og))
  373 + {
  374 + throw std::runtime_error("loop detected in find");
  375 + }
  376 + seen.insert(og);
  377 +
  378 + auto kids = node.getKey("/Kids");
  379 + int nkids = kids.isArray() ? kids.getArrayNItems() : 0;
  380 + auto items = node.getKey(details.itemsKey());
  381 + int nitems = items.isArray() ? items.getArrayNItems() : 0;
  382 + if (nitems > 0)
  383 + {
  384 + int idx = binarySearch(
  385 + key, items, nitems / 2, return_prev_if_not_found,
  386 + &NNTreeImpl::compareKeyItem);
  387 + if (idx >= 0)
  388 + {
  389 + result.setItemNumber(node, 2 * idx);
  390 + }
  391 + break;
  392 + }
  393 + else if (nkids > 0)
  394 + {
  395 + int idx = binarySearch(
  396 + key, kids, nkids, true,
  397 + &NNTreeImpl::compareKeyKid);
  398 + if (idx == -1)
  399 + {
  400 + throw std::runtime_error(
  401 + "unexpected -1 from binary search of kids;"
  402 + " tree may not be sorted");
  403 + }
  404 + result.addPathElement(node, idx);
  405 + node = kids.getArrayItem(idx);
  406 + }
  407 + else
  408 + {
  409 + throw std::runtime_error("bad node during find");
  410 + }
  411 + }
  412 +
  413 + return result;
  414 +}
libqpdf/QPDFNameTreeObjectHelper.cc
1 #include <qpdf/QPDFNameTreeObjectHelper.hh> 1 #include <qpdf/QPDFNameTreeObjectHelper.hh>
  2 +#include <qpdf/NNTree.hh>
  3 +
  4 +class NameTreeDetails: public NNTreeDetails
  5 +{
  6 + public:
  7 + virtual std::string const& itemsKey() const override
  8 + {
  9 + static std::string k("/Names");
  10 + return k;
  11 + }
  12 + virtual bool keyValid(QPDFObjectHandle oh) const override
  13 + {
  14 + return oh.isString();
  15 + }
  16 + virtual int compareKeys(
  17 + QPDFObjectHandle a, QPDFObjectHandle b) const override
  18 + {
  19 + if (! (keyValid(a) && keyValid(b)))
  20 + {
  21 + // We don't call this without calling keyValid first
  22 + throw std::logic_error("comparing invalid keys");
  23 + }
  24 + auto as = a.getUTF8Value();
  25 + auto bs = b.getUTF8Value();
  26 + return ((as < bs) ? -1 : (as > bs) ? 1 : 0);
  27 + }
  28 +};
  29 +
  30 +static NameTreeDetails name_tree_details;
2 31
3 QPDFNameTreeObjectHelper::Members::~Members() 32 QPDFNameTreeObjectHelper::Members::~Members()
4 { 33 {
5 } 34 }
6 35
7 -QPDFNameTreeObjectHelper::Members::Members() 36 +QPDFNameTreeObjectHelper::Members::Members(QPDFObjectHandle& oh) :
  37 + impl(std::make_shared<NNTreeImpl>(name_tree_details, nullptr, oh, false))
8 { 38 {
9 } 39 }
10 40
11 QPDFNameTreeObjectHelper::QPDFNameTreeObjectHelper(QPDFObjectHandle oh) : 41 QPDFNameTreeObjectHelper::QPDFNameTreeObjectHelper(QPDFObjectHandle oh) :
12 QPDFObjectHelper(oh), 42 QPDFObjectHelper(oh),
13 - m(new Members()) 43 + m(new Members(oh))
14 { 44 {
15 - updateMap(oh);  
16 } 45 }
17 46
18 QPDFNameTreeObjectHelper::~QPDFNameTreeObjectHelper() 47 QPDFNameTreeObjectHelper::~QPDFNameTreeObjectHelper()
19 { 48 {
20 } 49 }
21 50
22 -void  
23 -QPDFNameTreeObjectHelper::updateMap(QPDFObjectHandle oh)  
24 -{  
25 - if (this->m->seen.count(oh.getObjGen()))  
26 - {  
27 - return;  
28 - }  
29 - this->m->seen.insert(oh.getObjGen());  
30 - QPDFObjectHandle names = oh.getKey("/Names");  
31 - if (names.isArray())  
32 - {  
33 - int nitems = names.getArrayNItems();  
34 - int i = 0;  
35 - while (i < nitems - 1)  
36 - {  
37 - QPDFObjectHandle name = names.getArrayItem(i);  
38 - if (name.isString())  
39 - {  
40 - ++i;  
41 - QPDFObjectHandle obj = names.getArrayItem(i);  
42 - this->m->entries[name.getUTF8Value()] = obj;  
43 - }  
44 - ++i;  
45 - }  
46 - }  
47 - QPDFObjectHandle kids = oh.getKey("/Kids");  
48 - if (kids.isArray())  
49 - {  
50 - int nitems = kids.getArrayNItems();  
51 - for (int i = 0; i < nitems; ++i)  
52 - {  
53 - updateMap(kids.getArrayItem(i));  
54 - }  
55 - }  
56 -}  
57 -  
58 bool 51 bool
59 QPDFNameTreeObjectHelper::hasName(std::string const& name) 52 QPDFNameTreeObjectHelper::hasName(std::string const& name)
60 { 53 {
61 - return this->m->entries.count(name) != 0; 54 + auto i = this->m->impl->find(QPDFObjectHandle::newUnicodeString(name));
  55 + return (i != this->m->impl->end());
62 } 56 }
63 57
64 bool 58 bool
65 QPDFNameTreeObjectHelper::findObject( 59 QPDFNameTreeObjectHelper::findObject(
66 std::string const& name, QPDFObjectHandle& oh) 60 std::string const& name, QPDFObjectHandle& oh)
67 { 61 {
68 - std::map<std::string, QPDFObjectHandle>::iterator i =  
69 - this->m->entries.find(name);  
70 - if (i == this->m->entries.end()) 62 + auto i = this->m->impl->find(QPDFObjectHandle::newUnicodeString(name));
  63 + if (i == this->m->impl->end())
71 { 64 {
72 return false; 65 return false;
73 } 66 }
@@ -78,5 +71,12 @@ QPDFNameTreeObjectHelper::findObject( @@ -78,5 +71,12 @@ QPDFNameTreeObjectHelper::findObject(
78 std::map<std::string, QPDFObjectHandle> 71 std::map<std::string, QPDFObjectHandle>
79 QPDFNameTreeObjectHelper::getAsMap() const 72 QPDFNameTreeObjectHelper::getAsMap() const
80 { 73 {
81 - return this->m->entries; 74 + std::map<std::string, QPDFObjectHandle> result;
  75 + for (auto i: *(this->m->impl))
  76 + {
  77 + result.insert(
  78 + std::make_pair(i.first.getUTF8Value(),
  79 + i.second));
  80 + }
  81 + return result;
82 } 82 }
libqpdf/QPDFNumberTreeObjectHelper.cc
1 #include <qpdf/QPDFNumberTreeObjectHelper.hh> 1 #include <qpdf/QPDFNumberTreeObjectHelper.hh>
  2 +#include <qpdf/NNTree.hh>
2 3
3 -QPDFNumberTreeObjectHelper::Members::~Members()  
4 -{  
5 -}  
6 -  
7 -QPDFNumberTreeObjectHelper::Members::Members() 4 +class NumberTreeDetails: public NNTreeDetails
8 { 5 {
9 -}  
10 -  
11 -QPDFNumberTreeObjectHelper::QPDFNumberTreeObjectHelper(QPDFObjectHandle oh) :  
12 - QPDFObjectHelper(oh),  
13 - m(new Members())  
14 -{  
15 - updateMap(oh);  
16 -}  
17 -  
18 -void  
19 -QPDFNumberTreeObjectHelper::updateMap(QPDFObjectHandle oh)  
20 -{  
21 - if (this->m->seen.count(oh.getObjGen())) 6 + public:
  7 + virtual std::string const& itemsKey() const override
22 { 8 {
23 - return; 9 + static std::string k("/Nums");
  10 + return k;
24 } 11 }
25 - this->m->seen.insert(oh.getObjGen());  
26 - QPDFObjectHandle nums = oh.getKey("/Nums");  
27 - if (nums.isArray()) 12 + virtual bool keyValid(QPDFObjectHandle oh) const override
28 { 13 {
29 - int nitems = nums.getArrayNItems();  
30 - int i = 0;  
31 - while (i < nitems - 1)  
32 - {  
33 - QPDFObjectHandle num = nums.getArrayItem(i);  
34 - if (num.isInteger())  
35 - {  
36 - ++i;  
37 - QPDFObjectHandle obj = nums.getArrayItem(i);  
38 - this->m->entries[num.getIntValue()] = obj;  
39 - }  
40 - ++i;  
41 - } 14 + return oh.isInteger();
42 } 15 }
43 - QPDFObjectHandle kids = oh.getKey("/Kids");  
44 - if (kids.isArray()) 16 + virtual int compareKeys(
  17 + QPDFObjectHandle a, QPDFObjectHandle b) const override
45 { 18 {
46 - int nitems = kids.getArrayNItems();  
47 - for (int i = 0; i < nitems; ++i) 19 + if (! (keyValid(a) && keyValid(b)))
48 { 20 {
49 - updateMap(kids.getArrayItem(i)); 21 + // We don't call this without calling keyValid first
  22 + throw std::logic_error("comparing invalid keys");
50 } 23 }
  24 + auto as = a.getIntValue();
  25 + auto bs = b.getIntValue();
  26 + return ((as < bs) ? -1 : (as > bs) ? 1 : 0);
51 } 27 }
  28 +};
  29 +
  30 +static NumberTreeDetails number_tree_details;
  31 +
  32 +QPDFNumberTreeObjectHelper::Members::~Members()
  33 +{
  34 +}
  35 +
  36 +QPDFNumberTreeObjectHelper::Members::Members(QPDFObjectHandle& oh) :
  37 + impl(std::make_shared<NNTreeImpl>(number_tree_details, nullptr, oh, false))
  38 +{
52 } 39 }
53 40
  41 +QPDFNumberTreeObjectHelper::QPDFNumberTreeObjectHelper(QPDFObjectHandle oh) :
  42 + QPDFObjectHelper(oh),
  43 + m(new Members(oh))
  44 +{
  45 +}
54 46
55 QPDFNumberTreeObjectHelper::numtree_number 47 QPDFNumberTreeObjectHelper::numtree_number
56 QPDFNumberTreeObjectHelper::getMin() 48 QPDFNumberTreeObjectHelper::getMin()
57 { 49 {
58 - if (this->m->entries.empty()) 50 + auto i = this->m->impl->begin();
  51 + if (i == this->m->impl->end())
59 { 52 {
60 return 0; 53 return 0;
61 } 54 }
62 - // Our map is sorted in reverse.  
63 - return this->m->entries.rbegin()->first; 55 + return (*i).first.getIntValue();
64 } 56 }
65 57
66 QPDFNumberTreeObjectHelper::numtree_number 58 QPDFNumberTreeObjectHelper::numtree_number
67 QPDFNumberTreeObjectHelper::getMax() 59 QPDFNumberTreeObjectHelper::getMax()
68 { 60 {
69 - if (this->m->entries.empty()) 61 + auto i = this->m->impl->last();
  62 + if (i == this->m->impl->end())
70 { 63 {
71 return 0; 64 return 0;
72 } 65 }
73 - // Our map is sorted in reverse.  
74 - return this->m->entries.begin()->first; 66 + return (*i).first.getIntValue();
75 } 67 }
76 68
77 bool 69 bool
78 QPDFNumberTreeObjectHelper::hasIndex(numtree_number idx) 70 QPDFNumberTreeObjectHelper::hasIndex(numtree_number idx)
79 { 71 {
80 - return this->m->entries.count(idx) != 0; 72 + auto i = this->m->impl->find(QPDFObjectHandle::newInteger(idx));
  73 + return (i != this->m->impl->end());
81 } 74 }
82 75
83 bool 76 bool
84 QPDFNumberTreeObjectHelper::findObject( 77 QPDFNumberTreeObjectHelper::findObject(
85 numtree_number idx, QPDFObjectHandle& oh) 78 numtree_number idx, QPDFObjectHandle& oh)
86 { 79 {
87 - Members::idx_map::iterator i = this->m->entries.find(idx);  
88 - if (i == this->m->entries.end()) 80 + auto i = this->m->impl->find(QPDFObjectHandle::newInteger(idx));
  81 + if (i == this->m->impl->end())
89 { 82 {
90 return false; 83 return false;
91 } 84 }
@@ -98,13 +91,13 @@ QPDFNumberTreeObjectHelper::findObjectAtOrBelow( @@ -98,13 +91,13 @@ QPDFNumberTreeObjectHelper::findObjectAtOrBelow(
98 numtree_number idx, QPDFObjectHandle& oh, 91 numtree_number idx, QPDFObjectHandle& oh,
99 numtree_number& offset) 92 numtree_number& offset)
100 { 93 {
101 - Members::idx_map::iterator i = this->m->entries.lower_bound(idx);  
102 - if (i == this->m->entries.end()) 94 + auto i = this->m->impl->find(QPDFObjectHandle::newInteger(idx), true);
  95 + if (i == this->m->impl->end())
103 { 96 {
104 return false; 97 return false;
105 } 98 }
106 oh = (*i).second; 99 oh = (*i).second;
107 - offset = idx - (*i).first; 100 + offset = idx - (*i).first.getIntValue();
108 return true; 101 return true;
109 } 102 }
110 103
@@ -112,10 +105,11 @@ std::map&lt;QPDFNumberTreeObjectHelper::numtree_number, QPDFObjectHandle&gt; @@ -112,10 +105,11 @@ std::map&lt;QPDFNumberTreeObjectHelper::numtree_number, QPDFObjectHandle&gt;
112 QPDFNumberTreeObjectHelper::getAsMap() const 105 QPDFNumberTreeObjectHelper::getAsMap() const
113 { 106 {
114 std::map<numtree_number, QPDFObjectHandle> result; 107 std::map<numtree_number, QPDFObjectHandle> result;
115 - for (Members::idx_map::const_iterator iter = this->m->entries.begin();  
116 - iter != this->m->entries.end(); ++iter) 108 + for (auto i: *(this->m->impl))
117 { 109 {
118 - result[(*iter).first] = (*iter).second; 110 + result.insert(
  111 + std::make_pair(i.first.getIntValue(),
  112 + i.second));
119 } 113 }
120 return result; 114 return result;
121 } 115 }
libqpdf/build.mk
@@ -33,6 +33,7 @@ SRCS_libqpdf = \ @@ -33,6 +33,7 @@ SRCS_libqpdf = \
33 libqpdf/InsecureRandomDataProvider.cc \ 33 libqpdf/InsecureRandomDataProvider.cc \
34 libqpdf/JSON.cc \ 34 libqpdf/JSON.cc \
35 libqpdf/MD5.cc \ 35 libqpdf/MD5.cc \
  36 + libqpdf/NNTree.cc \
36 libqpdf/OffsetInputSource.cc \ 37 libqpdf/OffsetInputSource.cc \
37 libqpdf/Pipeline.cc \ 38 libqpdf/Pipeline.cc \
38 libqpdf/Pl_AES_PDF.cc \ 39 libqpdf/Pl_AES_PDF.cc \
libqpdf/qpdf/NNTree.hh 0 → 100644
  1 +#ifndef NNTREE_HH
  2 +#define NNTREE_HH
  3 +
  4 +#include <qpdf/QPDF.hh>
  5 +#include <qpdf/QPDFObjectHandle.hh>
  6 +
  7 +#include <iterator>
  8 +#include <list>
  9 +
  10 +class NNTreeDetails
  11 +{
  12 + public:
  13 + virtual std::string const& itemsKey() const = 0;
  14 + virtual bool keyValid(QPDFObjectHandle) const = 0;
  15 + virtual int compareKeys(QPDFObjectHandle, QPDFObjectHandle) const = 0;
  16 +};
  17 +
  18 +class NNTreeIterator: public std::iterator<
  19 + std::bidirectional_iterator_tag,
  20 + std::pair<QPDFObjectHandle, QPDFObjectHandle>,
  21 + void,
  22 + std::pair<QPDFObjectHandle, QPDFObjectHandle>*,
  23 + std::pair<QPDFObjectHandle, QPDFObjectHandle>>
  24 +{
  25 + friend class NNTreeImpl;
  26 + public:
  27 + bool valid() const;
  28 + NNTreeIterator& operator++();
  29 + NNTreeIterator operator++(int)
  30 + {
  31 + NNTreeIterator t = *this;
  32 + ++(*this);
  33 + return t;
  34 + }
  35 + NNTreeIterator& operator--();
  36 + NNTreeIterator operator--(int)
  37 + {
  38 + NNTreeIterator t = *this;
  39 + --(*this);
  40 + return t;
  41 + }
  42 + reference operator*();
  43 + bool operator==(NNTreeIterator const& other) const;
  44 + bool operator!=(NNTreeIterator const& other) const
  45 + {
  46 + return ! operator==(other);
  47 + }
  48 +
  49 + private:
  50 + class PathElement
  51 + {
  52 + public:
  53 + PathElement(QPDFObjectHandle const& node, int kid_number);
  54 + QPDFObjectHandle getNextKid(bool backward);
  55 +
  56 + QPDFObjectHandle node;
  57 + int kid_number;
  58 + };
  59 +
  60 + NNTreeIterator(NNTreeDetails const& details) :
  61 + details(details),
  62 + item_number(-1)
  63 + {
  64 + }
  65 + void deepen(QPDFObjectHandle node, bool first);
  66 + void setItemNumber(QPDFObjectHandle const& node, int);
  67 + void addPathElement(QPDFObjectHandle const& node, int kid_number);
  68 + void increment(bool backward);
  69 +
  70 + NNTreeDetails const& details;
  71 + std::list<PathElement> path;
  72 + QPDFObjectHandle node;
  73 + int item_number;
  74 +};
  75 +
  76 +class NNTreeImpl
  77 +{
  78 + public:
  79 + typedef NNTreeIterator iterator;
  80 +
  81 + NNTreeImpl(NNTreeDetails const&, QPDF*, QPDFObjectHandle&,
  82 + bool auto_repair = true);
  83 + iterator begin();
  84 + iterator end();
  85 + iterator last();
  86 + iterator find(QPDFObjectHandle key, bool return_prev_if_not_found = false);
  87 +
  88 + private:
  89 + int withinLimits(QPDFObjectHandle key, QPDFObjectHandle node);
  90 + int binarySearch(
  91 + QPDFObjectHandle key, QPDFObjectHandle items,
  92 + int num_items, bool return_prev_if_not_found,
  93 + int (NNTreeImpl::*compare)(QPDFObjectHandle& key,
  94 + QPDFObjectHandle& node,
  95 + int item));
  96 + int compareKeyItem(
  97 + QPDFObjectHandle& key, QPDFObjectHandle& items, int idx);
  98 + int compareKeyKid(
  99 + QPDFObjectHandle& key, QPDFObjectHandle& items, int idx);
  100 +
  101 + NNTreeDetails const& details;
  102 + QPDFObjectHandle oh;
  103 +};
  104 +
  105 +#endif // NNTREE_HH
libtests/build.mk
@@ -16,6 +16,7 @@ BINS_libtests = \ @@ -16,6 +16,7 @@ BINS_libtests = \
16 main_from_wmain \ 16 main_from_wmain \
17 matrix \ 17 matrix \
18 md5 \ 18 md5 \
  19 + nntree \
19 numrange \ 20 numrange \
20 pointer_holder \ 21 pointer_holder \
21 predictors \ 22 predictors \
libtests/nntree.cc 0 → 100644
  1 +#include <qpdf/QPDFNumberTreeObjectHelper.hh>
  2 +#include <qpdf/QPDF.hh>
  3 +#include <qpdf/QUtil.hh>
  4 +#include <iostream>
  5 +
  6 +bool report(QPDFObjectHandle oh, long long item, long long exp_item)
  7 +{
  8 + QPDFNumberTreeObjectHelper nh(oh);
  9 + QPDFObjectHandle o1;
  10 + long long offset = 0;
  11 + bool f1 = nh.findObjectAtOrBelow(item, o1, offset);
  12 + QPDFObjectHandle o2;
  13 + bool f2 = nh.findObject(item, o2);
  14 +
  15 + bool failed = false;
  16 + auto show = [&failed, &oh, &item] () {
  17 + if (! failed)
  18 + {
  19 + failed = true;
  20 + std::cout << "key = " << item << ", oh = "
  21 + << oh.unparseResolved() << std::endl;
  22 + }
  23 + };
  24 +
  25 + auto mk_wanted = [](long long i) {
  26 + return ((i == -1)
  27 + ? "end"
  28 + : (QUtil::int_to_string(i) +
  29 + "/(-" + QUtil::int_to_string(i) + "-)"));
  30 + };
  31 + std::string i1_wanted = mk_wanted(exp_item);
  32 + std::string i2_wanted = mk_wanted(item == exp_item ? item : -1);
  33 + auto mk_actual = [](bool found, long long v, QPDFObjectHandle& o) {
  34 + return (found
  35 + ? QUtil::int_to_string(v) + "/" + o.unparse()
  36 + : "end");
  37 + };
  38 + std::string i1_actual = mk_actual(f1, item - offset, o1);
  39 + std::string i2_actual = mk_actual(f2, item, o2);
  40 +
  41 + if (i1_wanted != i1_actual)
  42 + {
  43 + show();
  44 + std::cout << "i1: wanted " << i1_wanted
  45 + << ", got " << i1_actual
  46 + << std::endl;
  47 + }
  48 + if (i2_wanted != i2_actual)
  49 + {
  50 + show();
  51 + std::cout << "i2: wanted " << i2_wanted
  52 + << ", got " << i2_actual
  53 + << std::endl;
  54 + }
  55 +
  56 + return failed;
  57 +}
  58 +
  59 +int main()
  60 +{
  61 + QPDF q;
  62 + q.emptyPDF();
  63 +
  64 + auto mk = [&q] (std::vector<int> const& v) {
  65 + auto nums = QPDFObjectHandle::newArray();
  66 + for (auto i: v)
  67 + {
  68 + nums.appendItem(QPDFObjectHandle::newInteger(i));
  69 + nums.appendItem(QPDFObjectHandle::newString(
  70 + "-" + QUtil::int_to_string(i) + "-"));
  71 + }
  72 + auto limits = QPDFObjectHandle::newArray();
  73 + limits.appendItem(QPDFObjectHandle::newInteger(v.at(0)));
  74 + limits.appendItem(QPDFObjectHandle::newInteger(v.at(v.size() - 1)));
  75 + auto node = q.makeIndirectObject(QPDFObjectHandle::newDictionary());
  76 + node.replaceKey("/Nums", nums);
  77 + node.replaceKey("/Limits", limits);
  78 + return node;
  79 + };
  80 +
  81 + bool any_failures = false;
  82 + auto r = [&any_failures](QPDFObjectHandle& oh, int item, int exp) {
  83 + if (report(oh, item, exp))
  84 + {
  85 + any_failures = true;
  86 + }
  87 + };
  88 +
  89 + auto a = mk({2, 3, 5, 9, 11, 12, 14, 18});
  90 + r(a, 1, -1);
  91 + r(a, 2, 2);
  92 + r(a, 9, 9);
  93 + r(a, 11, 11);
  94 + r(a, 10, 9);
  95 + r(a, 7, 5);
  96 + r(a, 18, 18);
  97 + r(a, 19, 18);
  98 +
  99 + auto b = mk({2, 4});
  100 + r(b, 1, -1);
  101 + r(b, 2, 2);
  102 + r(b, 3, 2);
  103 + r(b, 4, 4);
  104 + r(b, 5, 4);
  105 +
  106 + auto c = mk({3});
  107 + r(c, 1, -1);
  108 + r(c, 3, 3);
  109 + r(c, 5, 3);
  110 +
  111 + auto d = mk({2, 3, 5, 9, 10, 12, 14, 18, 19, 20});
  112 + r(d, 1, -1);
  113 + r(d, 2, 2);
  114 + r(d, 18, 18);
  115 + r(d, 14, 14);
  116 + r(d, 19, 19);
  117 + r(d, 20, 20);
  118 + r(d, 25, 20);
  119 +
  120 + if (! any_failures)
  121 + {
  122 + std::cout << "all tests passed" << std::endl;
  123 + }
  124 +}
libtests/qtest/nntree.test 0 → 100644
  1 +#!/usr/bin/env perl
  2 +require 5.008;
  3 +use warnings;
  4 +use strict;
  5 +
  6 +require TestDriver;
  7 +
  8 +my $td = new TestDriver('nntree');
  9 +
  10 +$td->runtest("nntree",
  11 + {$td->COMMAND => "nntree"},
  12 + {$td->STRING => "all tests passed\n",
  13 + $td->EXIT_STATUS => 0},
  14 + $td->NORMALIZE_NEWLINES);
  15 +
  16 +$td->report(1);
qpdf/qtest/qpdf/name-tree.pdf
@@ -91,8 +91,8 @@ endobj @@ -91,8 +91,8 @@ endobj
91 12 0 R 91 12 0 R
92 ] 92 ]
93 /Limits [ 93 /Limits [
94 - 0  
95 - 19 94 + (01 one)
  95 + (15 fifteen)
96 ] 96 ]
97 >> 97 >>
98 endobj 98 endobj
@@ -100,8 +100,8 @@ endobj @@ -100,8 +100,8 @@ endobj
100 10 0 obj 100 10 0 obj
101 << 101 <<
102 /Limits [ 102 /Limits [
103 - 20  
104 - 29 103 + (20 twenty)
  104 + (29 twenty-nine)
105 ] 105 ]
106 /Names [ 106 /Names [
107 (20 twenty) (twenty.) 107 (20 twenty) (twenty.)
@@ -152,9 +152,9 @@ xref @@ -152,9 +152,9 @@ xref
152 0000000612 00000 n 152 0000000612 00000 n
153 0000000647 00000 n 153 0000000647 00000 n
154 0000000704 00000 n 154 0000000704 00000 n
155 -0000000791 00000 n  
156 -0000000955 00000 n  
157 -0000001151 00000 n 155 +0000000808 00000 n
  156 +0000000995 00000 n
  157 +0000001191 00000 n
158 trailer << 158 trailer <<
159 /Root 1 0 R 159 /Root 1 0 R
160 /QTest 8 0 R 160 /QTest 8 0 R
@@ -162,5 +162,5 @@ trailer &lt;&lt; @@ -162,5 +162,5 @@ trailer &lt;&lt;
162 /ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>] 162 /ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>]
163 >> 163 >>
164 startxref 164 startxref
165 -1325 165 +1365
166 %%EOF 166 %%EOF