Commit f38df27aa3eae905e3ee90365099335e317173d8

Authored by Jay Berkenbilt
1 parent 077d3d45

Add QPDFNumberTreeObjectHelper

ChangeLog
1 1 2018-12-18 Jay Berkenbilt <ejb@ql.org>
2 2  
  3 + * Add QPDFNumberTreeObjectHelper class. This class provides useful
  4 + methods for dealing with number trees, which are discussed in
  5 + section 7.9.7 of the PDF spec (ISO-32000). Page label dictionaries
  6 + are represented as number trees.
  7 +
3 8 * New method QPDFObjectHandle::wrapInArray returns the object
4 9 itself if it is an array. Otherwise, it returns an array
5 10 containing the object. This is useful for dealing with PDF data
... ...
include/qpdf/QPDFNumberTreeObjectHelper.hh 0 → 100644
  1 +// Copyright (c) 2005-2018 Jay Berkenbilt
  2 +//
  3 +// This file is part of qpdf.
  4 +//
  5 +// Licensed under the Apache License, Version 2.0 (the "License");
  6 +// you may not use this file except in compliance with the License.
  7 +// You may obtain a copy of the License at
  8 +//
  9 +// http://www.apache.org/licenses/LICENSE-2.0
  10 +//
  11 +// Unless required by applicable law or agreed to in writing, software
  12 +// distributed under the License is distributed on an "AS IS" BASIS,
  13 +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 +// See the License for the specific language governing permissions and
  15 +// limitations under the License.
  16 +//
  17 +// Versions of qpdf prior to version 7 were released under the terms
  18 +// of version 2.0 of the Artistic License. At your option, you may
  19 +// continue to consider qpdf to be licensed under those terms. Please
  20 +// see the manual for additional information.
  21 +
  22 +#ifndef QPDFNUMBERTREEOBJECTHELPER_HH
  23 +#define QPDFNUMBERTREEOBJECTHELPER_HH
  24 +
  25 +#include <qpdf/QPDFObjectHelper.hh>
  26 +#include <qpdf/QPDFObjGen.hh>
  27 +#include <functional>
  28 +#include <map>
  29 +
  30 +#include <qpdf/DLL.h>
  31 +
  32 +// This is an object helper for number trees. See section 7.9.7 in the
  33 +// PDF spec (ISO 32000) for a description of number trees. This
  34 +// implementation disregards stated limits and sequencing and simply
  35 +// builds a map from numerical index to object. If the array of
  36 +// numbers does not contain a numerical value where expected, this
  37 +// implementation silently skips forward until it finds a number.
  38 +
  39 +class QPDFNumberTreeObjectHelper: public QPDFObjectHelper
  40 +{
  41 + public:
  42 + QPDF_DLL
  43 + QPDFNumberTreeObjectHelper(QPDFObjectHandle);
  44 +
  45 + typedef long long int numtree_number;
  46 +
  47 + // Return overall minimum and maximum indices
  48 + QPDF_DLL
  49 + numtree_number getMin();
  50 + QPDF_DLL
  51 + numtree_number getMax();
  52 +
  53 + // Return whether the number tree has an explicit entry for this
  54 + // number.
  55 + QPDF_DLL
  56 + bool hasIndex(numtree_number idx);
  57 +
  58 + // Find an object with a specific index. If found, returns true
  59 + // and initializes oh.
  60 + QPDF_DLL
  61 + bool findObject(numtree_number idx, QPDFObjectHandle& oh);
  62 + // Find the object at the index or, if not found, the object whose
  63 + // index is the highest index less than the requested index. If
  64 + // the requested index is less than the minimum, return false.
  65 + // Otherwise, return true, initialize oh to the object, and set
  66 + // offset to the difference between the requested index and the
  67 + // actual index. For example, if a number tree has values for 3
  68 + // and 6 and idx is 5, this method would return true, initialize
  69 + // oh to the value with index 3, and set offset to 2 (5 - 3).
  70 + QPDF_DLL
  71 + bool findObjectAtOrBelow(numtree_number idx, QPDFObjectHandle& oh,
  72 + numtree_number& offset);
  73 +
  74 + typedef std::map<numtree_number, QPDFObjectHandle> idx_map;
  75 + QPDF_DLL
  76 + idx_map getAsMap() const;
  77 +
  78 + private:
  79 + class Members
  80 + {
  81 + friend class QPDFNumberTreeObjectHelper;
  82 + typedef QPDFNumberTreeObjectHelper::numtree_number numtree_number;
  83 +
  84 + public:
  85 + QPDF_DLL
  86 + ~Members();
  87 +
  88 + private:
  89 + Members();
  90 + Members(Members const&);
  91 +
  92 + // Use a reverse sorted map so we can use the lower_bound
  93 + // method for searching. lower_bound returns smallest entry
  94 + // not before the searched entry, meaning that the searched
  95 + // entry is the lower bound. There's also an upper_bound
  96 + // method, but it does not do what you'd think it should.
  97 + // lower_bound implements >=, and upper_bound implements >.
  98 + typedef std::map<numtree_number,
  99 + QPDFObjectHandle,
  100 + std::greater<numtree_number> > idx_map;
  101 + idx_map entries;
  102 + std::set<QPDFObjGen> seen;
  103 + };
  104 +
  105 + void updateMap(QPDFObjectHandle oh);
  106 +
  107 + PointerHolder<Members> m;
  108 +};
  109 +
  110 +#endif // QPDFNUMBERTREEOBJECTHELPER_HH
... ...
libqpdf/QPDFNumberTreeObjectHelper.cc 0 → 100644
  1 +#include <qpdf/QPDFNumberTreeObjectHelper.hh>
  2 +#include <qpdf/QTC.hh>
  3 +
  4 +QPDFNumberTreeObjectHelper::Members::~Members()
  5 +{
  6 +}
  7 +
  8 +QPDFNumberTreeObjectHelper::Members::Members()
  9 +{
  10 +}
  11 +
  12 +QPDFNumberTreeObjectHelper::QPDFNumberTreeObjectHelper(QPDFObjectHandle oh) :
  13 + QPDFObjectHelper(oh),
  14 + m(new Members())
  15 +{
  16 + updateMap(oh);
  17 +}
  18 +
  19 +void
  20 +QPDFNumberTreeObjectHelper::updateMap(QPDFObjectHandle oh)
  21 +{
  22 + if (this->m->seen.count(oh.getObjGen()))
  23 + {
  24 + return;
  25 + }
  26 + this->m->seen.insert(oh.getObjGen());
  27 + QPDFObjectHandle nums = oh.getKey("/Nums");
  28 + if (nums.isArray())
  29 + {
  30 + size_t nitems = nums.getArrayNItems();
  31 + size_t i = 0;
  32 + while (i < nitems - 1)
  33 + {
  34 + QPDFObjectHandle num = nums.getArrayItem(i);
  35 + if (num.isInteger())
  36 + {
  37 + ++i;
  38 + QPDFObjectHandle obj = nums.getArrayItem(i);
  39 + this->m->entries[num.getIntValue()] = obj;
  40 + }
  41 + ++i;
  42 + }
  43 + }
  44 + QPDFObjectHandle kids = oh.getKey("/Kids");
  45 + if (kids.isArray())
  46 + {
  47 + size_t nitems = kids.getArrayNItems();
  48 + for (size_t i = 0; i < nitems; ++i)
  49 + {
  50 + updateMap(kids.getArrayItem(i));
  51 + }
  52 + }
  53 +}
  54 +
  55 +
  56 +QPDFNumberTreeObjectHelper::numtree_number
  57 +QPDFNumberTreeObjectHelper::getMin()
  58 +{
  59 + if (this->m->entries.empty())
  60 + {
  61 + return 0;
  62 + }
  63 + // Our map is sorted in reverse.
  64 + return this->m->entries.rbegin()->first;
  65 +}
  66 +
  67 +QPDFNumberTreeObjectHelper::numtree_number
  68 +QPDFNumberTreeObjectHelper::getMax()
  69 +{
  70 + if (this->m->entries.empty())
  71 + {
  72 + return 0;
  73 + }
  74 + // Our map is sorted in reverse.
  75 + return this->m->entries.begin()->first;
  76 +}
  77 +
  78 +bool
  79 +QPDFNumberTreeObjectHelper::hasIndex(numtree_number idx)
  80 +{
  81 + return this->m->entries.count(idx) != 0;
  82 +}
  83 +
  84 +bool
  85 +QPDFNumberTreeObjectHelper::findObject(
  86 + numtree_number idx, QPDFObjectHandle& oh)
  87 +{
  88 + Members::idx_map::iterator i = this->m->entries.find(idx);
  89 + if (i == this->m->entries.end())
  90 + {
  91 + return false;
  92 + }
  93 + oh = (*i).second;
  94 + return true;
  95 +}
  96 +
  97 +bool
  98 +QPDFNumberTreeObjectHelper::findObjectAtOrBelow(
  99 + numtree_number idx, QPDFObjectHandle& oh,
  100 + numtree_number& offset)
  101 +{
  102 + Members::idx_map::iterator i = this->m->entries.lower_bound(idx);
  103 + if (i == this->m->entries.end())
  104 + {
  105 + return false;
  106 + }
  107 + oh = (*i).second;
  108 + offset = idx - (*i).first;
  109 + return true;
  110 +}
  111 +
  112 +std::map<QPDFNumberTreeObjectHelper::numtree_number, QPDFObjectHandle>
  113 +QPDFNumberTreeObjectHelper::getAsMap() const
  114 +{
  115 + std::map<numtree_number, QPDFObjectHandle> result;
  116 + for (Members::idx_map::const_iterator iter = this->m->entries.begin();
  117 + iter != this->m->entries.end(); ++iter)
  118 + {
  119 + result[(*iter).first] = (*iter).second;
  120 + }
  121 + return result;
  122 +}
... ...
libqpdf/build.mk
... ... @@ -40,6 +40,7 @@ SRCS_libqpdf = \
40 40 libqpdf/QPDFAnnotationObjectHelper.cc \
41 41 libqpdf/QPDFExc.cc \
42 42 libqpdf/QPDFFormFieldObjectHelper.cc \
  43 + libqpdf/QPDFNumberTreeObjectHelper.cc \
43 44 libqpdf/QPDFObjGen.cc \
44 45 libqpdf/QPDFObject.cc \
45 46 libqpdf/QPDFObjectHandle.cc \
... ...
qpdf/qtest/qpdf.test
... ... @@ -226,6 +226,16 @@ foreach my $input (@ext_inputs)
226 226 }
227 227 show_ntests();
228 228 # ----------
  229 +$td->notify("--- Number Trees ---");
  230 +$n_tests += 1;
  231 +
  232 +$td->runtest("number trees",
  233 + {$td->COMMAND => "test_driver 46 number-tree.pdf"},
  234 + {$td->FILE => "number-tree.out", $td->EXIT_STATUS => 0},
  235 + $td->NORMALIZE_NEWLINES);
  236 +
  237 +show_ntests();
  238 +# ----------
229 239 $td->notify("--- Page API Tests ---");
230 240 $n_tests += 9;
231 241  
... ...
qpdf/qtest/qpdf/number-tree.out 0 → 100644
  1 +1 one
  2 +2 two
  3 +3 three
  4 +5 five
  5 +6 six
  6 +9 nine
  7 +11 elephant
  8 +12 twelve
  9 +15 fifteen
  10 +19 nineteen
  11 +20 twenty
  12 +22 twenty-two
  13 +23 twenty-three
  14 +29 twenty-nine
  15 +test 46 done
... ...
qpdf/qtest/qpdf/number-tree.pdf 0 → 100644
  1 +%PDF-1.3
  2 +%¿÷¢þ
  3 +%QDF-1.0
  4 +
  5 +1 0 obj
  6 +<<
  7 + /Pages 2 0 R
  8 + /Type /Catalog
  9 +>>
  10 +endobj
  11 +
  12 +2 0 obj
  13 +<<
  14 + /Count 1
  15 + /Kids [
  16 + 3 0 R
  17 + ]
  18 + /Type /Pages
  19 +>>
  20 +endobj
  21 +
  22 +%% Page 1
  23 +3 0 obj
  24 +<<
  25 + /Contents 4 0 R
  26 + /MediaBox [
  27 + 0
  28 + 0
  29 + 612
  30 + 792
  31 + ]
  32 + /Parent 2 0 R
  33 + /Resources <<
  34 + /Font <<
  35 + /F1 6 0 R
  36 + >>
  37 + /ProcSet 7 0 R
  38 + >>
  39 + /Type /Page
  40 +>>
  41 +endobj
  42 +
  43 +%% Contents for page 1
  44 +4 0 obj
  45 +<<
  46 + /Length 5 0 R
  47 +>>
  48 +stream
  49 +BT
  50 + /F1 24 Tf
  51 + 72 720 Td
  52 + (Potato) Tj
  53 +ET
  54 +endstream
  55 +endobj
  56 +
  57 +5 0 obj
  58 +44
  59 +endobj
  60 +
  61 +6 0 obj
  62 +<<
  63 + /BaseFont /Helvetica
  64 + /Encoding /WinAnsiEncoding
  65 + /Name /F1
  66 + /Subtype /Type1
  67 + /Type /Font
  68 +>>
  69 +endobj
  70 +
  71 +7 0 obj
  72 +[
  73 + /PDF
  74 + /Text
  75 +]
  76 +endobj
  77 +
  78 +8 0 obj
  79 +<<
  80 + /Kids [
  81 + 9 0 R
  82 + 10 0 R
  83 + ]
  84 +>>
  85 +endobj
  86 +
  87 +9 0 obj
  88 +<<
  89 + /Kids [
  90 + 11 0 R
  91 + 12 0 R
  92 + ]
  93 + /Limits [
  94 + 0
  95 + 19
  96 + ]
  97 +>>
  98 +endobj
  99 +
  100 +10 0 obj
  101 +<<
  102 + /Limits [
  103 + 20
  104 + 29
  105 + ]
  106 + /Nums [
  107 + 20 (twenty)
  108 + 22 (twenty-two)
  109 + 23 (twenty-three)
  110 + 29 (twenty-nine)
  111 + ]
  112 +>>
  113 +endobj
  114 +
  115 +11 0 obj
  116 +<<
  117 + /Limits [
  118 + 0
  119 + 9
  120 + ]
  121 + /Nums [
  122 + 1 (one)
  123 + 2 (two)
  124 + 3 (three)
  125 + 5 (five)
  126 + 6 (six)
  127 + 9 (nine)
  128 + ]
  129 +>>
  130 +endobj
  131 +
  132 +12 0 obj
  133 +<<
  134 + /Limits [
  135 + 11
  136 + 19
  137 + ]
  138 + /Nums [
  139 + 11 (elephant)
  140 + 12 (twelve)
  141 + 15 (fifteen)
  142 + 19 (nineteen)
  143 + ]
  144 +>>
  145 +endobj
  146 +
  147 +
  148 +xref
  149 +0 13
  150 +0000000000 65535 f
  151 +0000000025 00000 n
  152 +0000000079 00000 n
  153 +0000000161 00000 n
  154 +0000000376 00000 n
  155 +0000000475 00000 n
  156 +0000000494 00000 n
  157 +0000000612 00000 n
  158 +0000000647 00000 n
  159 +0000000704 00000 n
  160 +0000000791 00000 n
  161 +0000000937 00000 n
  162 +0000001078 00000 n
  163 +trailer <<
  164 + /Root 1 0 R
  165 + /QTest 8 0 R
  166 + /Size 13
  167 + /ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>]
  168 +>>
  169 +startxref
  170 +1215
  171 +%%EOF
... ...
qpdf/test_driver.cc
... ... @@ -6,6 +6,7 @@
6 6 #include <qpdf/QPDFPageDocumentHelper.hh>
7 7 #include <qpdf/QPDFPageObjectHelper.hh>
8 8 #include <qpdf/QPDFAcroFormDocumentHelper.hh>
  9 +#include <qpdf/QPDFNumberTreeObjectHelper.hh>
9 10 #include <qpdf/QUtil.hh>
10 11 #include <qpdf/QTC.hh>
11 12 #include <qpdf/Pl_StdioFile.hh>
... ... @@ -1660,6 +1661,35 @@ void runtest(int n, char const* filename1, char const* arg2)
1660 1661 exit(3);
1661 1662 }
1662 1663 }
  1664 + else if (n == 46)
  1665 + {
  1666 + // Test number tree. This test is crafted to work with
  1667 + // number-tree.pdf
  1668 + QPDFObjectHandle qtest = pdf.getTrailer().getKey("/QTest");
  1669 + QPDFNumberTreeObjectHelper ntoh(qtest);
  1670 + QPDFNumberTreeObjectHelper::idx_map ntoh_map = ntoh.getAsMap();
  1671 + for (QPDFNumberTreeObjectHelper::idx_map::iterator iter =
  1672 + ntoh_map.begin();
  1673 + iter != ntoh_map.end(); ++iter)
  1674 + {
  1675 + std::cout << (*iter).first << " "
  1676 + << (*iter).second.getStringValue()
  1677 + << std::endl;
  1678 + }
  1679 + assert(1 == ntoh.getMin());
  1680 + assert(29 == ntoh.getMax());
  1681 + assert(ntoh.hasIndex(6));
  1682 + assert(! ntoh.hasIndex(500));
  1683 + QPDFObjectHandle oh;
  1684 + assert(! ntoh.findObject(4, oh));
  1685 + assert(ntoh.findObject(3, oh));
  1686 + assert("three" == oh.getStringValue());
  1687 + QPDFNumberTreeObjectHelper::numtree_number offset = 0;
  1688 + assert(! ntoh.findObjectAtOrBelow(0, oh, offset));
  1689 + assert(ntoh.findObjectAtOrBelow(8, oh, offset));
  1690 + assert("six" == oh.getStringValue());
  1691 + assert(2 == offset);
  1692 + }
1663 1693 else
1664 1694 {
1665 1695 throw std::runtime_error(std::string("invalid test ") +
... ...