Commit 4a1cce0a470e6deed6dbeb6093e4e5c16f53439d

Authored by Jay Berkenbilt
1 parent 9ad6cfd4

Reimplement name and number tree object helpers

Create a computationally and memory efficient implementation of name
and number trees that does binary searches as intended by the data
structure rather than loading into a map, which can use a great deal
of memory and can be very slow.
include/qpdf/QPDFNameTreeObjectHelper.hh
... ... @@ -25,17 +25,16 @@
25 25 #include <qpdf/QPDFObjectHelper.hh>
26 26 #include <qpdf/QPDFObjGen.hh>
27 27 #include <map>
  28 +#include <memory>
28 29  
29 30 #include <qpdf/DLL.h>
30 31  
31 32 // This is an object helper for name trees. See section 7.9.6 in the
32   -// PDF spec (ISO 32000) for a description of name trees. This
33   -// implementation disregards stated limits and sequencing and simply
34   -// builds a map from string object. If the array of values does not
35   -// contain a string where expected, this implementation silently skips
36   -// forward until it finds a string. When looking up items in the name
37   -// tree, use UTF-8 strings. All names are normalized for lookup
38   -// purposes.
  33 +// PDF spec (ISO 32000) for a description of name trees. When looking
  34 +// up items in the name tree, use UTF-8 strings. All names are
  35 +// normalized for lookup purposes.
  36 +
  37 +class NNTreeImpl;
39 38  
40 39 class QPDFNameTreeObjectHelper: public QPDFObjectHelper
41 40 {
... ... @@ -55,6 +54,9 @@ class QPDFNameTreeObjectHelper: public QPDFObjectHelper
55 54 QPDF_DLL
56 55 bool findObject(std::string const& utf8, QPDFObjectHandle& oh);
57 56  
  57 + // Return the contents of the name tree as a map. Note that name
  58 + // trees may be very large, so this may use a lot of RAM. It is
  59 + // more efficient to use QPDFNameTreeObjectHelper's iterator.
58 60 QPDF_DLL
59 61 std::map<std::string, QPDFObjectHandle> getAsMap() const;
60 62  
... ... @@ -68,15 +70,12 @@ class QPDFNameTreeObjectHelper: public QPDFObjectHelper
68 70 ~Members();
69 71  
70 72 private:
71   - Members();
72   - Members(Members const&);
  73 + Members(QPDFObjectHandle& oh);
  74 + Members(Members const&) = delete;
73 75  
74   - std::map<std::string, QPDFObjectHandle> entries;
75   - std::set<QPDFObjGen> seen;
  76 + std::shared_ptr<NNTreeImpl> impl;
76 77 };
77 78  
78   - void updateMap(QPDFObjectHandle oh);
79   -
80 79 PointerHolder<Members> m;
81 80 };
82 81  
... ...
include/qpdf/QPDFNumberTreeObjectHelper.hh
... ... @@ -24,17 +24,15 @@
24 24  
25 25 #include <qpdf/QPDFObjectHelper.hh>
26 26 #include <qpdf/QPDFObjGen.hh>
27   -#include <functional>
28 27 #include <map>
  28 +#include <memory>
29 29  
30 30 #include <qpdf/DLL.h>
31 31  
32 32 // This is an object helper for number trees. See section 7.9.7 in the
33   -// PDF spec (ISO 32000) for a description of number trees. This
34   -// implementation disregards stated limits and sequencing and simply
35   -// builds a map from numerical index to object. If the array of
36   -// numbers does not contain a numerical value where expected, this
37   -// implementation silently skips forward until it finds a number.
  33 +// PDF spec (ISO 32000) for a description of number trees.
  34 +
  35 +class NNTreeImpl;
38 36  
39 37 class QPDFNumberTreeObjectHelper: public QPDFObjectHelper
40 38 {
... ... @@ -75,6 +73,10 @@ class QPDFNumberTreeObjectHelper: public QPDFObjectHelper
75 73 bool findObjectAtOrBelow(numtree_number idx, QPDFObjectHandle& oh,
76 74 numtree_number& offset);
77 75  
  76 + // Return the contents of the number tree as a map. Note that
  77 + // number trees may be very large, so this may use a lot of RAM.
  78 + // It is more efficient to use QPDFNumberTreeObjectHelper's
  79 + // iterator.
78 80 typedef std::map<numtree_number, QPDFObjectHandle> idx_map;
79 81 QPDF_DLL
80 82 idx_map getAsMap() const;
... ... @@ -90,23 +92,11 @@ class QPDFNumberTreeObjectHelper: public QPDFObjectHelper
90 92 ~Members();
91 93  
92 94 private:
93   - Members();
94   - Members(Members const&);
95   -
96   - // Use a reverse sorted map so we can use the lower_bound
97   - // method for searching. lower_bound returns smallest entry
98   - // not before the searched entry, meaning that the searched
99   - // entry is the lower bound. There's also an upper_bound
100   - // method, but it does not do what you'd think it should.
101   - // lower_bound implements >=, and upper_bound implements >.
102   - typedef std::map<numtree_number,
103   - QPDFObjectHandle,
104   - std::greater<numtree_number> > idx_map;
105   - idx_map entries;
106   - std::set<QPDFObjGen> seen;
107   - };
  95 + Members(QPDFObjectHandle& oh);
  96 + Members(Members const&) = delete;
108 97  
109   - void updateMap(QPDFObjectHandle oh);
  98 + std::shared_ptr<NNTreeImpl> impl;
  99 + };
110 100  
111 101 PointerHolder<Members> m;
112 102 };
... ...
libqpdf/NNTree.cc 0 → 100644
  1 +#include <qpdf/NNTree.hh>
  2 +#include <qpdf/QUtil.hh>
  3 +
  4 +#include <exception>
  5 +
  6 +NNTreeIterator::PathElement::PathElement(
  7 + QPDFObjectHandle const& node, int kid_number) :
  8 + node(node),
  9 + kid_number(kid_number)
  10 +{
  11 +}
  12 +
  13 +QPDFObjectHandle
  14 +NNTreeIterator::PathElement::getNextKid(bool backward)
  15 +{
  16 + kid_number += backward ? -1 : 1;
  17 + auto kids = node.getKey("/Kids");
  18 + QPDFObjectHandle result;
  19 + if ((kid_number >= 0) && (kid_number < kids.getArrayNItems()))
  20 + {
  21 + result = kids.getArrayItem(kid_number);
  22 + }
  23 + else
  24 + {
  25 + result = QPDFObjectHandle::newNull();
  26 + }
  27 + return result;
  28 +}
  29 +
  30 +bool
  31 +NNTreeIterator::valid() const
  32 +{
  33 + return this->item_number >= 0;
  34 +}
  35 +
  36 +void
  37 +NNTreeIterator::increment(bool backward)
  38 +{
  39 + if (this->item_number < 0)
  40 + {
  41 + throw std::logic_error(
  42 + "attempt made to increment or decrement an invalid"
  43 + " name/number tree iterator");
  44 + }
  45 + this->item_number += backward ? -2 : 2;
  46 + auto items = this->node.getKey(details.itemsKey());
  47 + if ((this->item_number < 0) ||
  48 + (this->item_number >= items.getArrayNItems()))
  49 + {
  50 + bool found = false;
  51 + setItemNumber(QPDFObjectHandle(), -1);
  52 + while (! (found || this->path.empty()))
  53 + {
  54 + auto& element = this->path.back();
  55 + auto node = element.getNextKid(backward);
  56 + if (node.isNull())
  57 + {
  58 + this->path.pop_back();
  59 + }
  60 + else
  61 + {
  62 + deepen(node, ! backward);
  63 + found = true;
  64 + }
  65 + }
  66 + }
  67 +}
  68 +
  69 +NNTreeIterator&
  70 +NNTreeIterator::operator++()
  71 +{
  72 + increment(false);
  73 + return *this;
  74 +}
  75 +
  76 +NNTreeIterator&
  77 +NNTreeIterator::operator--()
  78 +{
  79 + increment(true);
  80 + return *this;
  81 +}
  82 +
  83 +NNTreeIterator::reference
  84 +NNTreeIterator::operator*()
  85 +{
  86 + if (this->item_number < 0)
  87 + {
  88 + throw std::logic_error(
  89 + "attempt made to dereference an invalid"
  90 + " name/number tree iterator");
  91 + }
  92 + auto items = this->node.getKey(details.itemsKey());
  93 + return std::make_pair(items.getArrayItem(this->item_number),
  94 + items.getArrayItem(1+this->item_number));
  95 +}
  96 +
  97 +bool
  98 +NNTreeIterator::operator==(NNTreeIterator const& other) const
  99 +{
  100 + if ((this->item_number == -1) && (other.item_number == -1))
  101 + {
  102 + return true;
  103 + }
  104 + if (this->path.size() != other.path.size())
  105 + {
  106 + return false;
  107 + }
  108 + auto tpi = this->path.begin();
  109 + auto opi = other.path.begin();
  110 + while (tpi != this->path.end())
  111 + {
  112 + if ((*tpi).kid_number != (*opi).kid_number)
  113 + {
  114 + return false;
  115 + }
  116 + ++tpi;
  117 + ++opi;
  118 + }
  119 + if (this->item_number != other.item_number)
  120 + {
  121 + return false;
  122 + }
  123 + return true;
  124 +}
  125 +
  126 +void
  127 +NNTreeIterator::setItemNumber(QPDFObjectHandle const& node, int n)
  128 +{
  129 + this->node = node;
  130 + this->item_number = n;
  131 +}
  132 +
  133 +void
  134 +NNTreeIterator::addPathElement(QPDFObjectHandle const& node,
  135 + int kid_number)
  136 +{
  137 + this->path.push_back(PathElement(node, kid_number));
  138 +}
  139 +
  140 +void
  141 +NNTreeIterator::deepen(QPDFObjectHandle node, bool first)
  142 +{
  143 + std::set<QPDFObjGen> seen;
  144 + while (true)
  145 + {
  146 + if (node.isIndirect())
  147 + {
  148 + auto og = node.getObjGen();
  149 + if (seen.count(og))
  150 + {
  151 + throw std::runtime_error("loop detected");
  152 + }
  153 + seen.insert(og);
  154 + }
  155 + auto kids = node.getKey("/Kids");
  156 + int nkids = kids.isArray() ? kids.getArrayNItems() : 0;
  157 + auto items = node.getKey(details.itemsKey());
  158 + int nitems = items.isArray() ? items.getArrayNItems() : 0;
  159 + if (nitems > 0)
  160 + {
  161 + setItemNumber(node, first ? 0 : nitems - 2);
  162 + break;
  163 + }
  164 + else if (nkids > 0)
  165 + {
  166 + int kid_number = first ? 0 : nkids - 1;
  167 + addPathElement(node, kid_number);
  168 + node = kids.getArrayItem(kid_number);
  169 + }
  170 + else
  171 + {
  172 + throw std::runtime_error("node has neither /Kids nor /Names");
  173 + }
  174 + }
  175 +}
  176 +
  177 +NNTreeImpl::NNTreeImpl(NNTreeDetails const& details,
  178 + QPDF* qpdf,
  179 + QPDFObjectHandle& oh,
  180 + bool auto_repair) :
  181 + details(details),
  182 + oh(oh)
  183 +{
  184 +}
  185 +
  186 +NNTreeImpl::iterator
  187 +NNTreeImpl::begin()
  188 +{
  189 + iterator result(details);
  190 + result.deepen(this->oh, true);
  191 + return result;
  192 +}
  193 +
  194 +NNTreeImpl::iterator
  195 +NNTreeImpl::end()
  196 +{
  197 + return iterator(details);
  198 +}
  199 +
  200 +NNTreeImpl::iterator
  201 +NNTreeImpl::last()
  202 +{
  203 + iterator result(details);
  204 + result.deepen(this->oh, false);
  205 + return result;
  206 +}
  207 +
  208 +int
  209 +NNTreeImpl::withinLimits(QPDFObjectHandle key, QPDFObjectHandle node)
  210 +{
  211 + int result = 0;
  212 + auto limits = node.getKey("/Limits");
  213 + if (limits.isArray() && (limits.getArrayNItems() >= 2) &&
  214 + details.keyValid(limits.getArrayItem(0)) &&
  215 + details.keyValid(limits.getArrayItem(1)))
  216 + {
  217 + if (details.compareKeys(key, limits.getArrayItem(0)) < 0)
  218 + {
  219 + result = -1;
  220 + }
  221 + else if (details.compareKeys(key, limits.getArrayItem(1)) > 0)
  222 + {
  223 + result = 1;
  224 + }
  225 + }
  226 + else
  227 + {
  228 + // The root node has no limits, so consider the item to be in
  229 + // here if there are no limits. This will cause checking lower
  230 + // items.
  231 + }
  232 + return result;
  233 +}
  234 +
  235 +int
  236 +NNTreeImpl::binarySearch(
  237 + QPDFObjectHandle key, QPDFObjectHandle items,
  238 + int num_items, bool return_prev_if_not_found,
  239 + int (NNTreeImpl::*compare)(QPDFObjectHandle& key,
  240 + QPDFObjectHandle& node,
  241 + int item))
  242 +{
  243 + int max_idx = 1;
  244 + while (max_idx < num_items)
  245 + {
  246 + max_idx <<= 1;
  247 + }
  248 +
  249 + int step = max_idx / 2;
  250 + int checks = max_idx;
  251 + int idx = step;
  252 + int found_idx = -1;
  253 + bool found = false;
  254 + bool found_leq = false;
  255 + int status = 0;
  256 +
  257 + while ((! found) && (checks > 0))
  258 + {
  259 + if (idx < num_items)
  260 + {
  261 + status = (this->*compare)(key, items, idx);
  262 + if (status >= 0)
  263 + {
  264 + found_leq = true;
  265 + found_idx = idx;
  266 + }
  267 + }
  268 + else
  269 + {
  270 + // consider item to be below anything after the top
  271 + status = -1;
  272 + }
  273 +
  274 + if (status == 0)
  275 + {
  276 + found = true;
  277 + }
  278 + else
  279 + {
  280 + checks >>= 1;
  281 + if (checks > 0)
  282 + {
  283 + step >>= 1;
  284 + if (step == 0)
  285 + {
  286 + step = 1;
  287 + }
  288 +
  289 + if (status < 0)
  290 + {
  291 + idx -= step;
  292 + }
  293 + else
  294 + {
  295 + idx += step;
  296 + }
  297 + }
  298 + }
  299 + }
  300 +
  301 + if (found || (found_leq && return_prev_if_not_found))
  302 + {
  303 + return found_idx;
  304 + }
  305 + else
  306 + {
  307 + return -1;
  308 + }
  309 +}
  310 +
  311 +int
  312 +NNTreeImpl::compareKeyItem(
  313 + QPDFObjectHandle& key, QPDFObjectHandle& items, int idx)
  314 +{
  315 + if (! ((items.isArray() && (items.getArrayNItems() > (2 * idx)) &&
  316 + details.keyValid(items.getArrayItem(2 * idx)))))
  317 + {
  318 + throw std::runtime_error("item at index " +
  319 + QUtil::int_to_string(2 * idx) +
  320 + " is not the right type");
  321 + }
  322 + return details.compareKeys(key, items.getArrayItem(2 * idx));
  323 +}
  324 +
  325 +int
  326 +NNTreeImpl::compareKeyKid(QPDFObjectHandle& key, QPDFObjectHandle& kids, int idx)
  327 +{
  328 + if (! (kids.isArray() && (idx < kids.getArrayNItems()) &&
  329 + kids.getArrayItem(idx).isDictionary()))
  330 + {
  331 + throw std::runtime_error("invalid kid at index " +
  332 + QUtil::int_to_string(idx));
  333 + }
  334 + return withinLimits(key, kids.getArrayItem(idx));
  335 +}
  336 +
  337 +
  338 +NNTreeImpl::iterator
  339 +NNTreeImpl::find(QPDFObjectHandle key, bool return_prev_if_not_found)
  340 +{
  341 + auto first_item = begin();
  342 + auto last_item = end();
  343 + if (first_item.valid() &&
  344 + details.keyValid((*first_item).first) &&
  345 + details.compareKeys(key, (*first_item).first) < 0)
  346 + {
  347 + // Before the first key
  348 + return end();
  349 + }
  350 + else if (last_item.valid() &&
  351 + details.keyValid((*last_item).first) &&
  352 + details.compareKeys(key, (*last_item).first) > 0)
  353 + {
  354 + // After the last key
  355 + if (return_prev_if_not_found)
  356 + {
  357 + return last_item;
  358 + }
  359 + else
  360 + {
  361 + return end();
  362 + }
  363 + }
  364 +
  365 + std::set<QPDFObjGen> seen;
  366 + auto node = this->oh;
  367 + iterator result(details);
  368 +
  369 + while (true)
  370 + {
  371 + auto og = node.getObjGen();
  372 + if (seen.count(og))
  373 + {
  374 + throw std::runtime_error("loop detected in find");
  375 + }
  376 + seen.insert(og);
  377 +
  378 + auto kids = node.getKey("/Kids");
  379 + int nkids = kids.isArray() ? kids.getArrayNItems() : 0;
  380 + auto items = node.getKey(details.itemsKey());
  381 + int nitems = items.isArray() ? items.getArrayNItems() : 0;
  382 + if (nitems > 0)
  383 + {
  384 + int idx = binarySearch(
  385 + key, items, nitems / 2, return_prev_if_not_found,
  386 + &NNTreeImpl::compareKeyItem);
  387 + if (idx >= 0)
  388 + {
  389 + result.setItemNumber(node, 2 * idx);
  390 + }
  391 + break;
  392 + }
  393 + else if (nkids > 0)
  394 + {
  395 + int idx = binarySearch(
  396 + key, kids, nkids, true,
  397 + &NNTreeImpl::compareKeyKid);
  398 + if (idx == -1)
  399 + {
  400 + throw std::runtime_error(
  401 + "unexpected -1 from binary search of kids;"
  402 + " tree may not be sorted");
  403 + }
  404 + result.addPathElement(node, idx);
  405 + node = kids.getArrayItem(idx);
  406 + }
  407 + else
  408 + {
  409 + throw std::runtime_error("bad node during find");
  410 + }
  411 + }
  412 +
  413 + return result;
  414 +}
... ...
libqpdf/QPDFNameTreeObjectHelper.cc
1 1 #include <qpdf/QPDFNameTreeObjectHelper.hh>
  2 +#include <qpdf/NNTree.hh>
  3 +
  4 +class NameTreeDetails: public NNTreeDetails
  5 +{
  6 + public:
  7 + virtual std::string const& itemsKey() const override
  8 + {
  9 + static std::string k("/Names");
  10 + return k;
  11 + }
  12 + virtual bool keyValid(QPDFObjectHandle oh) const override
  13 + {
  14 + return oh.isString();
  15 + }
  16 + virtual int compareKeys(
  17 + QPDFObjectHandle a, QPDFObjectHandle b) const override
  18 + {
  19 + if (! (keyValid(a) && keyValid(b)))
  20 + {
  21 + // We don't call this without calling keyValid first
  22 + throw std::logic_error("comparing invalid keys");
  23 + }
  24 + auto as = a.getUTF8Value();
  25 + auto bs = b.getUTF8Value();
  26 + return ((as < bs) ? -1 : (as > bs) ? 1 : 0);
  27 + }
  28 +};
  29 +
  30 +static NameTreeDetails name_tree_details;
2 31  
3 32 QPDFNameTreeObjectHelper::Members::~Members()
4 33 {
5 34 }
6 35  
7   -QPDFNameTreeObjectHelper::Members::Members()
  36 +QPDFNameTreeObjectHelper::Members::Members(QPDFObjectHandle& oh) :
  37 + impl(std::make_shared<NNTreeImpl>(name_tree_details, nullptr, oh, false))
8 38 {
9 39 }
10 40  
11 41 QPDFNameTreeObjectHelper::QPDFNameTreeObjectHelper(QPDFObjectHandle oh) :
12 42 QPDFObjectHelper(oh),
13   - m(new Members())
  43 + m(new Members(oh))
14 44 {
15   - updateMap(oh);
16 45 }
17 46  
18 47 QPDFNameTreeObjectHelper::~QPDFNameTreeObjectHelper()
19 48 {
20 49 }
21 50  
22   -void
23   -QPDFNameTreeObjectHelper::updateMap(QPDFObjectHandle oh)
24   -{
25   - if (this->m->seen.count(oh.getObjGen()))
26   - {
27   - return;
28   - }
29   - this->m->seen.insert(oh.getObjGen());
30   - QPDFObjectHandle names = oh.getKey("/Names");
31   - if (names.isArray())
32   - {
33   - int nitems = names.getArrayNItems();
34   - int i = 0;
35   - while (i < nitems - 1)
36   - {
37   - QPDFObjectHandle name = names.getArrayItem(i);
38   - if (name.isString())
39   - {
40   - ++i;
41   - QPDFObjectHandle obj = names.getArrayItem(i);
42   - this->m->entries[name.getUTF8Value()] = obj;
43   - }
44   - ++i;
45   - }
46   - }
47   - QPDFObjectHandle kids = oh.getKey("/Kids");
48   - if (kids.isArray())
49   - {
50   - int nitems = kids.getArrayNItems();
51   - for (int i = 0; i < nitems; ++i)
52   - {
53   - updateMap(kids.getArrayItem(i));
54   - }
55   - }
56   -}
57   -
58 51 bool
59 52 QPDFNameTreeObjectHelper::hasName(std::string const& name)
60 53 {
61   - return this->m->entries.count(name) != 0;
  54 + auto i = this->m->impl->find(QPDFObjectHandle::newUnicodeString(name));
  55 + return (i != this->m->impl->end());
62 56 }
63 57  
64 58 bool
65 59 QPDFNameTreeObjectHelper::findObject(
66 60 std::string const& name, QPDFObjectHandle& oh)
67 61 {
68   - std::map<std::string, QPDFObjectHandle>::iterator i =
69   - this->m->entries.find(name);
70   - if (i == this->m->entries.end())
  62 + auto i = this->m->impl->find(QPDFObjectHandle::newUnicodeString(name));
  63 + if (i == this->m->impl->end())
71 64 {
72 65 return false;
73 66 }
... ... @@ -78,5 +71,12 @@ QPDFNameTreeObjectHelper::findObject(
78 71 std::map<std::string, QPDFObjectHandle>
79 72 QPDFNameTreeObjectHelper::getAsMap() const
80 73 {
81   - return this->m->entries;
  74 + std::map<std::string, QPDFObjectHandle> result;
  75 + for (auto i: *(this->m->impl))
  76 + {
  77 + result.insert(
  78 + std::make_pair(i.first.getUTF8Value(),
  79 + i.second));
  80 + }
  81 + return result;
82 82 }
... ...
libqpdf/QPDFNumberTreeObjectHelper.cc
1 1 #include <qpdf/QPDFNumberTreeObjectHelper.hh>
  2 +#include <qpdf/NNTree.hh>
2 3  
3   -QPDFNumberTreeObjectHelper::Members::~Members()
4   -{
5   -}
6   -
7   -QPDFNumberTreeObjectHelper::Members::Members()
  4 +class NumberTreeDetails: public NNTreeDetails
8 5 {
9   -}
10   -
11   -QPDFNumberTreeObjectHelper::QPDFNumberTreeObjectHelper(QPDFObjectHandle oh) :
12   - QPDFObjectHelper(oh),
13   - m(new Members())
14   -{
15   - updateMap(oh);
16   -}
17   -
18   -void
19   -QPDFNumberTreeObjectHelper::updateMap(QPDFObjectHandle oh)
20   -{
21   - if (this->m->seen.count(oh.getObjGen()))
  6 + public:
  7 + virtual std::string const& itemsKey() const override
22 8 {
23   - return;
  9 + static std::string k("/Nums");
  10 + return k;
24 11 }
25   - this->m->seen.insert(oh.getObjGen());
26   - QPDFObjectHandle nums = oh.getKey("/Nums");
27   - if (nums.isArray())
  12 + virtual bool keyValid(QPDFObjectHandle oh) const override
28 13 {
29   - int nitems = nums.getArrayNItems();
30   - int i = 0;
31   - while (i < nitems - 1)
32   - {
33   - QPDFObjectHandle num = nums.getArrayItem(i);
34   - if (num.isInteger())
35   - {
36   - ++i;
37   - QPDFObjectHandle obj = nums.getArrayItem(i);
38   - this->m->entries[num.getIntValue()] = obj;
39   - }
40   - ++i;
41   - }
  14 + return oh.isInteger();
42 15 }
43   - QPDFObjectHandle kids = oh.getKey("/Kids");
44   - if (kids.isArray())
  16 + virtual int compareKeys(
  17 + QPDFObjectHandle a, QPDFObjectHandle b) const override
45 18 {
46   - int nitems = kids.getArrayNItems();
47   - for (int i = 0; i < nitems; ++i)
  19 + if (! (keyValid(a) && keyValid(b)))
48 20 {
49   - updateMap(kids.getArrayItem(i));
  21 + // We don't call this without calling keyValid first
  22 + throw std::logic_error("comparing invalid keys");
50 23 }
  24 + auto as = a.getIntValue();
  25 + auto bs = b.getIntValue();
  26 + return ((as < bs) ? -1 : (as > bs) ? 1 : 0);
51 27 }
  28 +};
  29 +
  30 +static NumberTreeDetails number_tree_details;
  31 +
  32 +QPDFNumberTreeObjectHelper::Members::~Members()
  33 +{
  34 +}
  35 +
  36 +QPDFNumberTreeObjectHelper::Members::Members(QPDFObjectHandle& oh) :
  37 + impl(std::make_shared<NNTreeImpl>(number_tree_details, nullptr, oh, false))
  38 +{
52 39 }
53 40  
  41 +QPDFNumberTreeObjectHelper::QPDFNumberTreeObjectHelper(QPDFObjectHandle oh) :
  42 + QPDFObjectHelper(oh),
  43 + m(new Members(oh))
  44 +{
  45 +}
54 46  
55 47 QPDFNumberTreeObjectHelper::numtree_number
56 48 QPDFNumberTreeObjectHelper::getMin()
57 49 {
58   - if (this->m->entries.empty())
  50 + auto i = this->m->impl->begin();
  51 + if (i == this->m->impl->end())
59 52 {
60 53 return 0;
61 54 }
62   - // Our map is sorted in reverse.
63   - return this->m->entries.rbegin()->first;
  55 + return (*i).first.getIntValue();
64 56 }
65 57  
66 58 QPDFNumberTreeObjectHelper::numtree_number
67 59 QPDFNumberTreeObjectHelper::getMax()
68 60 {
69   - if (this->m->entries.empty())
  61 + auto i = this->m->impl->last();
  62 + if (i == this->m->impl->end())
70 63 {
71 64 return 0;
72 65 }
73   - // Our map is sorted in reverse.
74   - return this->m->entries.begin()->first;
  66 + return (*i).first.getIntValue();
75 67 }
76 68  
77 69 bool
78 70 QPDFNumberTreeObjectHelper::hasIndex(numtree_number idx)
79 71 {
80   - return this->m->entries.count(idx) != 0;
  72 + auto i = this->m->impl->find(QPDFObjectHandle::newInteger(idx));
  73 + return (i != this->m->impl->end());
81 74 }
82 75  
83 76 bool
84 77 QPDFNumberTreeObjectHelper::findObject(
85 78 numtree_number idx, QPDFObjectHandle& oh)
86 79 {
87   - Members::idx_map::iterator i = this->m->entries.find(idx);
88   - if (i == this->m->entries.end())
  80 + auto i = this->m->impl->find(QPDFObjectHandle::newInteger(idx));
  81 + if (i == this->m->impl->end())
89 82 {
90 83 return false;
91 84 }
... ... @@ -98,13 +91,13 @@ QPDFNumberTreeObjectHelper::findObjectAtOrBelow(
98 91 numtree_number idx, QPDFObjectHandle& oh,
99 92 numtree_number& offset)
100 93 {
101   - Members::idx_map::iterator i = this->m->entries.lower_bound(idx);
102   - if (i == this->m->entries.end())
  94 + auto i = this->m->impl->find(QPDFObjectHandle::newInteger(idx), true);
  95 + if (i == this->m->impl->end())
103 96 {
104 97 return false;
105 98 }
106 99 oh = (*i).second;
107   - offset = idx - (*i).first;
  100 + offset = idx - (*i).first.getIntValue();
108 101 return true;
109 102 }
110 103  
... ... @@ -112,10 +105,11 @@ std::map&lt;QPDFNumberTreeObjectHelper::numtree_number, QPDFObjectHandle&gt;
112 105 QPDFNumberTreeObjectHelper::getAsMap() const
113 106 {
114 107 std::map<numtree_number, QPDFObjectHandle> result;
115   - for (Members::idx_map::const_iterator iter = this->m->entries.begin();
116   - iter != this->m->entries.end(); ++iter)
  108 + for (auto i: *(this->m->impl))
117 109 {
118   - result[(*iter).first] = (*iter).second;
  110 + result.insert(
  111 + std::make_pair(i.first.getIntValue(),
  112 + i.second));
119 113 }
120 114 return result;
121 115 }
... ...
libqpdf/build.mk
... ... @@ -33,6 +33,7 @@ SRCS_libqpdf = \
33 33 libqpdf/InsecureRandomDataProvider.cc \
34 34 libqpdf/JSON.cc \
35 35 libqpdf/MD5.cc \
  36 + libqpdf/NNTree.cc \
36 37 libqpdf/OffsetInputSource.cc \
37 38 libqpdf/Pipeline.cc \
38 39 libqpdf/Pl_AES_PDF.cc \
... ...
libqpdf/qpdf/NNTree.hh 0 → 100644
  1 +#ifndef NNTREE_HH
  2 +#define NNTREE_HH
  3 +
  4 +#include <qpdf/QPDF.hh>
  5 +#include <qpdf/QPDFObjectHandle.hh>
  6 +
  7 +#include <iterator>
  8 +#include <list>
  9 +
  10 +class NNTreeDetails
  11 +{
  12 + public:
  13 + virtual std::string const& itemsKey() const = 0;
  14 + virtual bool keyValid(QPDFObjectHandle) const = 0;
  15 + virtual int compareKeys(QPDFObjectHandle, QPDFObjectHandle) const = 0;
  16 +};
  17 +
  18 +class NNTreeIterator: public std::iterator<
  19 + std::bidirectional_iterator_tag,
  20 + std::pair<QPDFObjectHandle, QPDFObjectHandle>,
  21 + void,
  22 + std::pair<QPDFObjectHandle, QPDFObjectHandle>*,
  23 + std::pair<QPDFObjectHandle, QPDFObjectHandle>>
  24 +{
  25 + friend class NNTreeImpl;
  26 + public:
  27 + bool valid() const;
  28 + NNTreeIterator& operator++();
  29 + NNTreeIterator operator++(int)
  30 + {
  31 + NNTreeIterator t = *this;
  32 + ++(*this);
  33 + return t;
  34 + }
  35 + NNTreeIterator& operator--();
  36 + NNTreeIterator operator--(int)
  37 + {
  38 + NNTreeIterator t = *this;
  39 + --(*this);
  40 + return t;
  41 + }
  42 + reference operator*();
  43 + bool operator==(NNTreeIterator const& other) const;
  44 + bool operator!=(NNTreeIterator const& other) const
  45 + {
  46 + return ! operator==(other);
  47 + }
  48 +
  49 + private:
  50 + class PathElement
  51 + {
  52 + public:
  53 + PathElement(QPDFObjectHandle const& node, int kid_number);
  54 + QPDFObjectHandle getNextKid(bool backward);
  55 +
  56 + QPDFObjectHandle node;
  57 + int kid_number;
  58 + };
  59 +
  60 + NNTreeIterator(NNTreeDetails const& details) :
  61 + details(details),
  62 + item_number(-1)
  63 + {
  64 + }
  65 + void deepen(QPDFObjectHandle node, bool first);
  66 + void setItemNumber(QPDFObjectHandle const& node, int);
  67 + void addPathElement(QPDFObjectHandle const& node, int kid_number);
  68 + void increment(bool backward);
  69 +
  70 + NNTreeDetails const& details;
  71 + std::list<PathElement> path;
  72 + QPDFObjectHandle node;
  73 + int item_number;
  74 +};
  75 +
  76 +class NNTreeImpl
  77 +{
  78 + public:
  79 + typedef NNTreeIterator iterator;
  80 +
  81 + NNTreeImpl(NNTreeDetails const&, QPDF*, QPDFObjectHandle&,
  82 + bool auto_repair = true);
  83 + iterator begin();
  84 + iterator end();
  85 + iterator last();
  86 + iterator find(QPDFObjectHandle key, bool return_prev_if_not_found = false);
  87 +
  88 + private:
  89 + int withinLimits(QPDFObjectHandle key, QPDFObjectHandle node);
  90 + int binarySearch(
  91 + QPDFObjectHandle key, QPDFObjectHandle items,
  92 + int num_items, bool return_prev_if_not_found,
  93 + int (NNTreeImpl::*compare)(QPDFObjectHandle& key,
  94 + QPDFObjectHandle& node,
  95 + int item));
  96 + int compareKeyItem(
  97 + QPDFObjectHandle& key, QPDFObjectHandle& items, int idx);
  98 + int compareKeyKid(
  99 + QPDFObjectHandle& key, QPDFObjectHandle& items, int idx);
  100 +
  101 + NNTreeDetails const& details;
  102 + QPDFObjectHandle oh;
  103 +};
  104 +
  105 +#endif // NNTREE_HH
... ...
libtests/build.mk
... ... @@ -16,6 +16,7 @@ BINS_libtests = \
16 16 main_from_wmain \
17 17 matrix \
18 18 md5 \
  19 + nntree \
19 20 numrange \
20 21 pointer_holder \
21 22 predictors \
... ...
libtests/nntree.cc 0 → 100644
  1 +#include <qpdf/QPDFNumberTreeObjectHelper.hh>
  2 +#include <qpdf/QPDF.hh>
  3 +#include <qpdf/QUtil.hh>
  4 +#include <iostream>
  5 +
  6 +bool report(QPDFObjectHandle oh, long long item, long long exp_item)
  7 +{
  8 + QPDFNumberTreeObjectHelper nh(oh);
  9 + QPDFObjectHandle o1;
  10 + long long offset = 0;
  11 + bool f1 = nh.findObjectAtOrBelow(item, o1, offset);
  12 + QPDFObjectHandle o2;
  13 + bool f2 = nh.findObject(item, o2);
  14 +
  15 + bool failed = false;
  16 + auto show = [&failed, &oh, &item] () {
  17 + if (! failed)
  18 + {
  19 + failed = true;
  20 + std::cout << "key = " << item << ", oh = "
  21 + << oh.unparseResolved() << std::endl;
  22 + }
  23 + };
  24 +
  25 + auto mk_wanted = [](long long i) {
  26 + return ((i == -1)
  27 + ? "end"
  28 + : (QUtil::int_to_string(i) +
  29 + "/(-" + QUtil::int_to_string(i) + "-)"));
  30 + };
  31 + std::string i1_wanted = mk_wanted(exp_item);
  32 + std::string i2_wanted = mk_wanted(item == exp_item ? item : -1);
  33 + auto mk_actual = [](bool found, long long v, QPDFObjectHandle& o) {
  34 + return (found
  35 + ? QUtil::int_to_string(v) + "/" + o.unparse()
  36 + : "end");
  37 + };
  38 + std::string i1_actual = mk_actual(f1, item - offset, o1);
  39 + std::string i2_actual = mk_actual(f2, item, o2);
  40 +
  41 + if (i1_wanted != i1_actual)
  42 + {
  43 + show();
  44 + std::cout << "i1: wanted " << i1_wanted
  45 + << ", got " << i1_actual
  46 + << std::endl;
  47 + }
  48 + if (i2_wanted != i2_actual)
  49 + {
  50 + show();
  51 + std::cout << "i2: wanted " << i2_wanted
  52 + << ", got " << i2_actual
  53 + << std::endl;
  54 + }
  55 +
  56 + return failed;
  57 +}
  58 +
  59 +int main()
  60 +{
  61 + QPDF q;
  62 + q.emptyPDF();
  63 +
  64 + auto mk = [&q] (std::vector<int> const& v) {
  65 + auto nums = QPDFObjectHandle::newArray();
  66 + for (auto i: v)
  67 + {
  68 + nums.appendItem(QPDFObjectHandle::newInteger(i));
  69 + nums.appendItem(QPDFObjectHandle::newString(
  70 + "-" + QUtil::int_to_string(i) + "-"));
  71 + }
  72 + auto limits = QPDFObjectHandle::newArray();
  73 + limits.appendItem(QPDFObjectHandle::newInteger(v.at(0)));
  74 + limits.appendItem(QPDFObjectHandle::newInteger(v.at(v.size() - 1)));
  75 + auto node = q.makeIndirectObject(QPDFObjectHandle::newDictionary());
  76 + node.replaceKey("/Nums", nums);
  77 + node.replaceKey("/Limits", limits);
  78 + return node;
  79 + };
  80 +
  81 + bool any_failures = false;
  82 + auto r = [&any_failures](QPDFObjectHandle& oh, int item, int exp) {
  83 + if (report(oh, item, exp))
  84 + {
  85 + any_failures = true;
  86 + }
  87 + };
  88 +
  89 + auto a = mk({2, 3, 5, 9, 11, 12, 14, 18});
  90 + r(a, 1, -1);
  91 + r(a, 2, 2);
  92 + r(a, 9, 9);
  93 + r(a, 11, 11);
  94 + r(a, 10, 9);
  95 + r(a, 7, 5);
  96 + r(a, 18, 18);
  97 + r(a, 19, 18);
  98 +
  99 + auto b = mk({2, 4});
  100 + r(b, 1, -1);
  101 + r(b, 2, 2);
  102 + r(b, 3, 2);
  103 + r(b, 4, 4);
  104 + r(b, 5, 4);
  105 +
  106 + auto c = mk({3});
  107 + r(c, 1, -1);
  108 + r(c, 3, 3);
  109 + r(c, 5, 3);
  110 +
  111 + auto d = mk({2, 3, 5, 9, 10, 12, 14, 18, 19, 20});
  112 + r(d, 1, -1);
  113 + r(d, 2, 2);
  114 + r(d, 18, 18);
  115 + r(d, 14, 14);
  116 + r(d, 19, 19);
  117 + r(d, 20, 20);
  118 + r(d, 25, 20);
  119 +
  120 + if (! any_failures)
  121 + {
  122 + std::cout << "all tests passed" << std::endl;
  123 + }
  124 +}
... ...
libtests/qtest/nntree.test 0 → 100644
  1 +#!/usr/bin/env perl
  2 +require 5.008;
  3 +use warnings;
  4 +use strict;
  5 +
  6 +require TestDriver;
  7 +
  8 +my $td = new TestDriver('nntree');
  9 +
  10 +$td->runtest("nntree",
  11 + {$td->COMMAND => "nntree"},
  12 + {$td->STRING => "all tests passed\n",
  13 + $td->EXIT_STATUS => 0},
  14 + $td->NORMALIZE_NEWLINES);
  15 +
  16 +$td->report(1);
... ...
qpdf/qtest/qpdf/name-tree.pdf
... ... @@ -91,8 +91,8 @@ endobj
91 91 12 0 R
92 92 ]
93 93 /Limits [
94   - 0
95   - 19
  94 + (01 one)
  95 + (15 fifteen)
96 96 ]
97 97 >>
98 98 endobj
... ... @@ -100,8 +100,8 @@ endobj
100 100 10 0 obj
101 101 <<
102 102 /Limits [
103   - 20
104   - 29
  103 + (20 twenty)
  104 + (29 twenty-nine)
105 105 ]
106 106 /Names [
107 107 (20 twenty) (twenty.)
... ... @@ -152,9 +152,9 @@ xref
152 152 0000000612 00000 n
153 153 0000000647 00000 n
154 154 0000000704 00000 n
155   -0000000791 00000 n
156   -0000000955 00000 n
157   -0000001151 00000 n
  155 +0000000808 00000 n
  156 +0000000995 00000 n
  157 +0000001191 00000 n
158 158 trailer <<
159 159 /Root 1 0 R
160 160 /QTest 8 0 R
... ... @@ -162,5 +162,5 @@ trailer &lt;&lt;
162 162 /ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>]
163 163 >>
164 164 startxref
165   -1325
  165 +1365
166 166 %%EOF
... ...