Commit 0776c00129fac282e2e758bf1f32474af85db50e

Authored by Jay Berkenbilt
1 parent d2f39759

Add QPDFNameTreeObjectHelper

ChangeLog
1 2018-12-18 Jay Berkenbilt <ejb@ql.org> 1 2018-12-18 Jay Berkenbilt <ejb@ql.org>
2 2
  3 + * Add QPDFNameTreeObjectHelper class. This class provides useful
  4 + methods for dealing with name trees, which are discussed in
  5 + section 7.9.6 of the PDF spec (ISO-32000).
  6 +
3 * Preserve page labels when merging and splitting files. Prior 7 * Preserve page labels when merging and splitting files. Prior
4 versions of qpdf simply preserved the page label information from 8 versions of qpdf simply preserved the page label information from
5 the first file, which usually wouldn't make any sense in the 9 the first file, which usually wouldn't make any sense in the
include/qpdf/QPDFNameTreeObjectHelper.hh 0 → 100644
  1 +// Copyright (c) 2005-2018 Jay Berkenbilt
  2 +//
  3 +// This file is part of qpdf.
  4 +//
  5 +// Licensed under the Apache License, Version 2.0 (the "License");
  6 +// you may not use this file except in compliance with the License.
  7 +// You may obtain a copy of the License at
  8 +//
  9 +// http://www.apache.org/licenses/LICENSE-2.0
  10 +//
  11 +// Unless required by applicable law or agreed to in writing, software
  12 +// distributed under the License is distributed on an "AS IS" BASIS,
  13 +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 +// See the License for the specific language governing permissions and
  15 +// limitations under the License.
  16 +//
  17 +// Versions of qpdf prior to version 7 were released under the terms
  18 +// of version 2.0 of the Artistic License. At your option, you may
  19 +// continue to consider qpdf to be licensed under those terms. Please
  20 +// see the manual for additional information.
  21 +
  22 +#ifndef QPDFNAMETREEOBJECTHELPER_HH
  23 +#define QPDFNAMETREEOBJECTHELPER_HH
  24 +
  25 +#include <qpdf/QPDFObjectHelper.hh>
  26 +#include <qpdf/QPDFObjGen.hh>
  27 +#include <map>
  28 +
  29 +#include <qpdf/DLL.h>
  30 +
  31 +// This is an object helper for name trees. See section 7.9.6 in the
  32 +// PDF spec (ISO 32000) for a description of name trees. This
  33 +// implementation disregards stated limits and sequencing and simply
  34 +// builds a map from string object. If the array of values does not
  35 +// contain a string where expected, this implementation silently skips
  36 +// forward until it finds a string. When looking up items in the name
  37 +// tree, use UTF-8 strings. All names are normalized for lookup
  38 +// purposes.
  39 +
  40 +class QPDFNameTreeObjectHelper: public QPDFObjectHelper
  41 +{
  42 + public:
  43 + QPDF_DLL
  44 + QPDFNameTreeObjectHelper(QPDFObjectHandle);
  45 + QPDF_DLL
  46 + virtual ~QPDFNameTreeObjectHelper();
  47 +
  48 + // Return whether the number tree has an explicit entry for this
  49 + // number.
  50 + QPDF_DLL
  51 + bool hasName(std::string const& utf8);
  52 +
  53 + // Find an object by name. If found, returns true and initializes
  54 + // oh.
  55 + QPDF_DLL
  56 + bool findObject(std::string const& utf8, QPDFObjectHandle& oh);
  57 +
  58 + QPDF_DLL
  59 + std::map<std::string, QPDFObjectHandle> getAsMap() const;
  60 +
  61 + private:
  62 + class Members
  63 + {
  64 + friend class QPDFNameTreeObjectHelper;
  65 +
  66 + public:
  67 + QPDF_DLL
  68 + ~Members();
  69 +
  70 + private:
  71 + Members();
  72 + Members(Members const&);
  73 +
  74 + std::map<std::string, QPDFObjectHandle> entries;
  75 + std::set<QPDFObjGen> seen;
  76 + };
  77 +
  78 + void updateMap(QPDFObjectHandle oh);
  79 +
  80 + PointerHolder<Members> m;
  81 +};
  82 +
  83 +#endif // QPDFNAMETREEOBJECTHELPER_HH
libqpdf/QPDFNameTreeObjectHelper.cc 0 → 100644
  1 +#include <qpdf/QPDFNameTreeObjectHelper.hh>
  2 +
  3 +QPDFNameTreeObjectHelper::Members::~Members()
  4 +{
  5 +}
  6 +
  7 +QPDFNameTreeObjectHelper::Members::Members()
  8 +{
  9 +}
  10 +
  11 +QPDFNameTreeObjectHelper::QPDFNameTreeObjectHelper(QPDFObjectHandle oh) :
  12 + QPDFObjectHelper(oh),
  13 + m(new Members())
  14 +{
  15 + updateMap(oh);
  16 +}
  17 +
  18 +QPDFNameTreeObjectHelper::~QPDFNameTreeObjectHelper()
  19 +{
  20 +}
  21 +
  22 +void
  23 +QPDFNameTreeObjectHelper::updateMap(QPDFObjectHandle oh)
  24 +{
  25 + if (this->m->seen.count(oh.getObjGen()))
  26 + {
  27 + return;
  28 + }
  29 + this->m->seen.insert(oh.getObjGen());
  30 + QPDFObjectHandle names = oh.getKey("/Names");
  31 + if (names.isArray())
  32 + {
  33 + size_t nitems = names.getArrayNItems();
  34 + size_t i = 0;
  35 + while (i < nitems - 1)
  36 + {
  37 + QPDFObjectHandle name = names.getArrayItem(i);
  38 + if (name.isString())
  39 + {
  40 + ++i;
  41 + QPDFObjectHandle obj = names.getArrayItem(i);
  42 + this->m->entries[name.getUTF8Value()] = obj;
  43 + }
  44 + ++i;
  45 + }
  46 + }
  47 + QPDFObjectHandle kids = oh.getKey("/Kids");
  48 + if (kids.isArray())
  49 + {
  50 + size_t nitems = kids.getArrayNItems();
  51 + for (size_t i = 0; i < nitems; ++i)
  52 + {
  53 + updateMap(kids.getArrayItem(i));
  54 + }
  55 + }
  56 +}
  57 +
  58 +bool
  59 +QPDFNameTreeObjectHelper::hasName(std::string const& name)
  60 +{
  61 + return this->m->entries.count(name) != 0;
  62 +}
  63 +
  64 +bool
  65 +QPDFNameTreeObjectHelper::findObject(
  66 + std::string const& name, QPDFObjectHandle& oh)
  67 +{
  68 + std::map<std::string, QPDFObjectHandle>::iterator i =
  69 + this->m->entries.find(name);
  70 + if (i == this->m->entries.end())
  71 + {
  72 + return false;
  73 + }
  74 + oh = (*i).second;
  75 + return true;
  76 +}
  77 +
  78 +std::map<std::string, QPDFObjectHandle>
  79 +QPDFNameTreeObjectHelper::getAsMap() const
  80 +{
  81 + return this->m->entries;
  82 +}
libqpdf/build.mk
@@ -40,6 +40,7 @@ SRCS_libqpdf = \ @@ -40,6 +40,7 @@ SRCS_libqpdf = \
40 libqpdf/QPDFAnnotationObjectHelper.cc \ 40 libqpdf/QPDFAnnotationObjectHelper.cc \
41 libqpdf/QPDFExc.cc \ 41 libqpdf/QPDFExc.cc \
42 libqpdf/QPDFFormFieldObjectHelper.cc \ 42 libqpdf/QPDFFormFieldObjectHelper.cc \
  43 + libqpdf/QPDFNameTreeObjectHelper.cc \
43 libqpdf/QPDFNumberTreeObjectHelper.cc \ 44 libqpdf/QPDFNumberTreeObjectHelper.cc \
44 libqpdf/QPDFObjGen.cc \ 45 libqpdf/QPDFObjGen.cc \
45 libqpdf/QPDFObject.cc \ 46 libqpdf/QPDFObject.cc \
qpdf/qtest/qpdf.test
@@ -226,13 +226,17 @@ foreach my $input (@ext_inputs) @@ -226,13 +226,17 @@ foreach my $input (@ext_inputs)
226 } 226 }
227 show_ntests(); 227 show_ntests();
228 # ---------- 228 # ----------
229 -$td->notify("--- Number Trees ---");  
230 -$n_tests += 1; 229 +$td->notify("--- Number and Name Trees ---");
  230 +$n_tests += 2;
231 231
232 $td->runtest("number trees", 232 $td->runtest("number trees",
233 {$td->COMMAND => "test_driver 46 number-tree.pdf"}, 233 {$td->COMMAND => "test_driver 46 number-tree.pdf"},
234 {$td->FILE => "number-tree.out", $td->EXIT_STATUS => 0}, 234 {$td->FILE => "number-tree.out", $td->EXIT_STATUS => 0},
235 $td->NORMALIZE_NEWLINES); 235 $td->NORMALIZE_NEWLINES);
  236 +$td->runtest("name trees",
  237 + {$td->COMMAND => "test_driver 48 name-tree.pdf"},
  238 + {$td->FILE => "name-tree.out", $td->EXIT_STATUS => 0},
  239 + $td->NORMALIZE_NEWLINES);
236 240
237 show_ntests(); 241 show_ntests();
238 # ---------- 242 # ----------
qpdf/qtest/qpdf/name-tree.out 0 → 100644
  1 +01 one -> one!
  2 +06 σιχ -> six!
  3 +07 sev•n -> seven!
  4 +11 elephant -> elephant?
  5 +12 twelve -> twelve!
  6 +15 fifteen -> fifteen!
  7 +20 twenty -> twenty.
  8 +22 twenty-two -> twenty-two!
  9 +29 twenty-nine -> twenty-nine!
  10 +test 48 done
qpdf/qtest/qpdf/name-tree.pdf 0 → 100644
  1 +%PDF-1.3
  2 +%¿÷¢þ
  3 +%QDF-1.0
  4 +
  5 +1 0 obj
  6 +<<
  7 + /Pages 2 0 R
  8 + /Type /Catalog
  9 +>>
  10 +endobj
  11 +
  12 +2 0 obj
  13 +<<
  14 + /Count 1
  15 + /Kids [
  16 + 3 0 R
  17 + ]
  18 + /Type /Pages
  19 +>>
  20 +endobj
  21 +
  22 +%% Page 1
  23 +3 0 obj
  24 +<<
  25 + /Contents 4 0 R
  26 + /MediaBox [
  27 + 0
  28 + 0
  29 + 612
  30 + 792
  31 + ]
  32 + /Parent 2 0 R
  33 + /Resources <<
  34 + /Font <<
  35 + /F1 6 0 R
  36 + >>
  37 + /ProcSet 7 0 R
  38 + >>
  39 + /Type /Page
  40 +>>
  41 +endobj
  42 +
  43 +%% Contents for page 1
  44 +4 0 obj
  45 +<<
  46 + /Length 5 0 R
  47 +>>
  48 +stream
  49 +BT
  50 + /F1 24 Tf
  51 + 72 720 Td
  52 + (Potato) Tj
  53 +ET
  54 +endstream
  55 +endobj
  56 +
  57 +5 0 obj
  58 +44
  59 +endobj
  60 +
  61 +6 0 obj
  62 +<<
  63 + /BaseFont /Helvetica
  64 + /Encoding /WinAnsiEncoding
  65 + /Name /F1
  66 + /Subtype /Type1
  67 + /Type /Font
  68 +>>
  69 +endobj
  70 +
  71 +7 0 obj
  72 +[
  73 + /PDF
  74 + /Text
  75 +]
  76 +endobj
  77 +
  78 +8 0 obj
  79 +<<
  80 + /Kids [
  81 + 9 0 R
  82 + 10 0 R
  83 + ]
  84 +>>
  85 +endobj
  86 +
  87 +9 0 obj
  88 +<<
  89 + /Kids [
  90 + 11 0 R
  91 + 12 0 R
  92 + ]
  93 + /Limits [
  94 + 0
  95 + 19
  96 + ]
  97 +>>
  98 +endobj
  99 +
  100 +10 0 obj
  101 +<<
  102 + /Limits [
  103 + 20
  104 + 29
  105 + ]
  106 + /Names [
  107 + (20 twenty) (twenty.)
  108 + (22 twenty-two) (twenty-two!)
  109 + (29 twenty-nine) (twenty-nine!)
  110 + ]
  111 +>>
  112 +endobj
  113 +
  114 +11 0 obj
  115 +<<
  116 + /Limits [
  117 + (01 one)
  118 + <feff0030003700200073006500762022006e>
  119 + ]
  120 + /Names [
  121 + (01 one) (one!)
  122 + <feff00300036002003C303B903C7> (six!)
  123 + (07 sev€n) (seven!)
  124 + ]
  125 +>>
  126 +endobj
  127 +
  128 +12 0 obj
  129 +<<
  130 + /Limits [
  131 + (11 elephant)
  132 + (15 fifteen)
  133 + ]
  134 + /Names [
  135 + (11 elephant) (elephant?)
  136 + (12 twelve) (twelve!)
  137 + (15 fifteen) (fifteen!)
  138 + ]
  139 +>>
  140 +endobj
  141 +
  142 +
  143 +xref
  144 +0 13
  145 +0000000000 65535 f
  146 +0000000025 00000 n
  147 +0000000079 00000 n
  148 +0000000161 00000 n
  149 +0000000376 00000 n
  150 +0000000475 00000 n
  151 +0000000494 00000 n
  152 +0000000612 00000 n
  153 +0000000647 00000 n
  154 +0000000704 00000 n
  155 +0000000791 00000 n
  156 +0000000955 00000 n
  157 +0000001151 00000 n
  158 +trailer <<
  159 + /Root 1 0 R
  160 + /QTest 8 0 R
  161 + /Size 13
  162 + /ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>]
  163 +>>
  164 +startxref
  165 +1325
  166 +%%EOF
qpdf/test_driver.cc
@@ -7,6 +7,7 @@ @@ -7,6 +7,7 @@
7 #include <qpdf/QPDFPageObjectHelper.hh> 7 #include <qpdf/QPDFPageObjectHelper.hh>
8 #include <qpdf/QPDFAcroFormDocumentHelper.hh> 8 #include <qpdf/QPDFAcroFormDocumentHelper.hh>
9 #include <qpdf/QPDFNumberTreeObjectHelper.hh> 9 #include <qpdf/QPDFNumberTreeObjectHelper.hh>
  10 +#include <qpdf/QPDFNameTreeObjectHelper.hh>
10 #include <qpdf/QPDFPageLabelDocumentHelper.hh> 11 #include <qpdf/QPDFPageLabelDocumentHelper.hh>
11 #include <qpdf/QUtil.hh> 12 #include <qpdf/QUtil.hh>
12 #include <qpdf/QTC.hh> 13 #include <qpdf/QTC.hh>
@@ -1706,6 +1707,29 @@ void runtest(int n, char const* filename1, char const* arg2) @@ -1706,6 +1707,29 @@ void runtest(int n, char const* filename1, char const* arg2)
1706 << labels.at(i+1).unparse() << std::endl; 1707 << labels.at(i+1).unparse() << std::endl;
1707 } 1708 }
1708 } 1709 }
  1710 + else if (n == 48)
  1711 + {
  1712 + // Test name tree. This test is crafted to work with
  1713 + // name-tree.pdf
  1714 + QPDFObjectHandle qtest = pdf.getTrailer().getKey("/QTest");
  1715 + QPDFNameTreeObjectHelper ntoh(qtest);
  1716 + std::map<std::string, QPDFObjectHandle> ntoh_map = ntoh.getAsMap();
  1717 + for (std::map<std::string, QPDFObjectHandle>::iterator iter =
  1718 + ntoh_map.begin();
  1719 + iter != ntoh_map.end(); ++iter)
  1720 + {
  1721 + std::cout << (*iter).first << " -> "
  1722 + << (*iter).second.getStringValue()
  1723 + << std::endl;
  1724 + }
  1725 + assert(ntoh.hasName("11 elephant"));
  1726 + assert(ntoh.hasName("07 sev\xe2\x80\xa2n"));
  1727 + assert(! ntoh.hasName("potato"));
  1728 + QPDFObjectHandle oh;
  1729 + assert(! ntoh.findObject("potato", oh));
  1730 + assert(ntoh.findObject("07 sev\xe2\x80\xa2n", oh));
  1731 + assert("seven!" == oh.getStringValue());
  1732 + }
1709 else 1733 else
1710 { 1734 {
1711 throw std::runtime_error(std::string("invalid test ") + 1735 throw std::runtime_error(std::string("invalid test ") +