Commit 3f1ab640669ac493f1b2985b70322dba7c037ac9

Authored by Jay Berkenbilt
1 parent 4b2e72c4

Pass offset and length to ParserCallbacks::handleObject

ChangeLog
1 1 2019-08-22 Jay Berkenbilt <ejb@ql.org>
2 2  
  3 + * In QPDFObjectHandle::ParserCallbacks, in addition to
  4 + handleObject(QPDFObjectHandle), allow developers to override
  5 + handleObject(QPDFObjectHandle, size_t offset, size_t length). If
  6 + this method appears instead, it is called with the offset of the
  7 + object in the content stream (which may be concatenated from an
  8 + array of streams) and the length of the object. Intervening
  9 + whitespace and comments are not included in offset and length.
  10 +
  11 + * Add method
  12 + QPDFObjectHandle::ParserCallbacks::contentSize(size_t). If
  13 + defined, it is called by the content stream parser before the
  14 + first call to handleObject, and the argument is the total size in
  15 + bytes of the content streams.
  16 +
3 17 * Add QPDFObjectHandle::isDirectNull() -- a const method that
4 18 allows determining whether an object is a literal null without
5 19 attempting to resolve it.
... ...
examples/pdf-parse-content.cc
... ... @@ -26,14 +26,23 @@ class ParserCallbacks: public QPDFObjectHandle::ParserCallbacks
26 26 {
27 27 }
28 28  
29   - virtual void handleObject(QPDFObjectHandle);
  29 + virtual void contentSize(size_t);
  30 + virtual void handleObject(QPDFObjectHandle, size_t offset, size_t length);
30 31 virtual void handleEOF();
31 32 };
32 33  
33 34 void
34   -ParserCallbacks::handleObject(QPDFObjectHandle obj)
  35 +ParserCallbacks::contentSize(size_t size)
35 36 {
36   - std::cout << obj.getTypeName() << ": ";
  37 + std::cout << "content size: " << size << std::endl;
  38 +}
  39 +
  40 +void
  41 +ParserCallbacks::handleObject(QPDFObjectHandle obj,
  42 + size_t offset, size_t length)
  43 +{
  44 + std::cout << obj.getTypeName() << ", offset=" << offset
  45 + << ", length=" << length << ": ";
37 46 if (obj.isInlineImage())
38 47 {
39 48 std::cout << QUtil::hex_encode(obj.getInlineImageValue()) << std::endl;
... ...
examples/qtest/parse-content/content.out
1   -operator: BT
2   -name: /F1
3   -integer: 24
4   -operator: Tf
5   -integer: 72
6   -integer: 720
7   -operator: Td
8   -string: (Potato)
9   -operator: Tj
10   -operator: ET
  1 +content size: 44
  2 +operator, offset=0, length=2: BT
  3 +name, offset=5, length=3: /F1
  4 +integer, offset=9, length=2: 24
  5 +operator, offset=12, length=2: Tf
  6 +integer, offset=17, length=2: 72
  7 +integer, offset=20, length=3: 720
  8 +operator, offset=24, length=2: Td
  9 +string, offset=29, length=8: (Potato)
  10 +operator, offset=38, length=2: Tj
  11 +operator, offset=41, length=2: ET
11 12 -EOF-
... ...
include/qpdf/QPDFObjectHandle.hh
... ... @@ -159,16 +159,28 @@ class QPDFObjectHandle
159 159 // This class is used by parsePageContents. Callers must
160 160 // instantiate a subclass of this with handlers defined to accept
161 161 // QPDFObjectHandles that are parsed from the stream.
162   - class ParserCallbacks
  162 + class QPDF_DLL_CLASS ParserCallbacks
163 163 {
164 164 public:
165 165 QPDF_DLL
166 166 virtual ~ParserCallbacks()
167 167 {
168 168 }
169   - virtual void handleObject(QPDFObjectHandle) = 0;
  169 + // One of the handleObject methods must be overridden.
  170 + QPDF_DLL
  171 + virtual void handleObject(QPDFObjectHandle);
  172 + QPDF_DLL
  173 + virtual void handleObject(
  174 + QPDFObjectHandle, size_t offset, size_t length);
  175 +
170 176 virtual void handleEOF() = 0;
171 177  
  178 + // Override this if you want to know the full size of the
  179 + // contents, possibly after concatenation of multiple streams.
  180 + // This is called before the first call to handleObject.
  181 + QPDF_DLL
  182 + virtual void contentSize(size_t);
  183 +
172 184 protected:
173 185 // Implementors may call this method during parsing to
174 186 // terminate parsing early. This method throws an exception
... ...
libqpdf/QPDFObjectHandle.cc
... ... @@ -106,6 +106,29 @@ QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const&amp; token)
106 106 }
107 107  
108 108 void
  109 +QPDFObjectHandle::ParserCallbacks::handleObject(QPDFObjectHandle)
  110 +{
  111 + throw std::logic_error("You must override one of the"
  112 + " handleObject methods in ParserCallbacks");
  113 +}
  114 +
  115 +void
  116 +QPDFObjectHandle::ParserCallbacks::handleObject(
  117 + QPDFObjectHandle oh, size_t, size_t)
  118 +{
  119 + // This version of handleObject was added in qpdf 9. If the
  120 + // developer did not override it, fall back to the older
  121 + // interface.
  122 + handleObject(oh);
  123 +}
  124 +
  125 +void
  126 +QPDFObjectHandle::ParserCallbacks::contentSize(size_t)
  127 +{
  128 + // Ignore by default; overriding this is optional.
  129 +}
  130 +
  131 +void
109 132 QPDFObjectHandle::ParserCallbacks::terminateParsing()
110 133 {
111 134 throw TerminateParsing();
... ... @@ -1615,6 +1638,7 @@ QPDFObjectHandle::parseContentStream_internal(
1615 1638 std::string all_description;
1616 1639 pipeContentStreams(&buf, description, all_description);
1617 1640 PointerHolder<Buffer> stream_data = buf.getBuffer();
  1641 + callbacks->contentSize(stream_data->getSize());
1618 1642 try
1619 1643 {
1620 1644 parseContentStream_data(stream_data, all_description,
... ... @@ -1642,6 +1666,13 @@ QPDFObjectHandle::parseContentStream_data(
1642 1666 bool empty = false;
1643 1667 while (QIntC::to_size(input->tell()) < length)
1644 1668 {
  1669 + // Read a token and seek to the beginning. The offset we get
  1670 + // from this process is the beginning of the next
  1671 + // non-ignorable (space, comment) token. This way, the offset
  1672 + // and don't including ignorable content.
  1673 + tokenizer.readToken(input, "content", true);
  1674 + qpdf_offset_t offset = input->getLastOffset();
  1675 + input->seek(offset, SEEK_SET);
1645 1676 QPDFObjectHandle obj =
1646 1677 parseInternal(input, "content", tokenizer,
1647 1678 empty, 0, context, true);
... ... @@ -1650,8 +1681,9 @@ QPDFObjectHandle::parseContentStream_data(
1650 1681 // EOF
1651 1682 break;
1652 1683 }
  1684 + size_t length = QIntC::to_size(input->tell() - offset);
1653 1685  
1654   - callbacks->handleObject(obj);
  1686 + callbacks->handleObject(obj, QIntC::to_size(offset), length);
1655 1687 if (obj.isOperator() && (obj.getOperatorValue() == "ID"))
1656 1688 {
1657 1689 // Discard next character; it is the space after ID that
... ... @@ -1661,6 +1693,8 @@ QPDFObjectHandle::parseContentStream_data(
1661 1693 tokenizer.expectInlineImage(input);
1662 1694 QPDFTokenizer::Token t =
1663 1695 tokenizer.readToken(input, description, true);
  1696 + offset = input->getLastOffset();
  1697 + length = QIntC::to_size(input->tell() - offset);
1664 1698 if (t.getType() == QPDFTokenizer::tt_bad)
1665 1699 {
1666 1700 QTC::TC("qpdf", "QPDFObjectHandle EOF in inline image");
... ... @@ -1674,7 +1708,8 @@ QPDFObjectHandle::parseContentStream_data(
1674 1708 std::string inline_image = t.getValue();
1675 1709 QTC::TC("qpdf", "QPDFObjectHandle inline image token");
1676 1710 callbacks->handleObject(
1677   - QPDFObjectHandle::newInlineImage(inline_image));
  1711 + QPDFObjectHandle::newInlineImage(inline_image),
  1712 + QIntC::to_size(offset), length);
1678 1713 }
1679 1714 }
1680 1715 }
... ...
manual/qpdf-manual.xml
... ... @@ -4491,6 +4491,25 @@ print &quot;\n&quot;;
4491 4491 </listitem>
4492 4492 <listitem>
4493 4493 <para>
  4494 + When parsing content streams with
  4495 + <classname>QPDFObjectHandle::ParserCallbacks</classname>, in
  4496 + place of the method
  4497 + <function>handleObject(QPDFObjectHandle)</function>, the
  4498 + developer may override
  4499 + <function>handleObject(QPDFObjectHandle, size_t offset,
  4500 + size_t length)</function>. If this method is defined, it
  4501 + will be invoked with the object along with its offset and
  4502 + length within the overall contents being parsed. Intervening
  4503 + spaces and comments are not included in offset and length.
  4504 + Additionally, a new method
  4505 + <function>contentSize(size_t)</function> may be implemented.
  4506 + If present, it will be called prior to the first call to
  4507 + <function>handleObject</function> with the total size in
  4508 + bytes of the combined contents.
  4509 + </para>
  4510 + </listitem>
  4511 + <listitem>
  4512 + <para>
4494 4513 The underlying implementation of QPDF arrays has been
4495 4514 enhanced to be much more memory efficient when dealing with
4496 4515 arrays with lots of nulls. This enables qpdf to use
... ...
qpdf/qtest/qpdf/eof-in-inline-image.out
1   -operator: BT
2   -name: /F1
3   -integer: 24
4   -operator: Tf
5   -integer: 72
6   -integer: 720
7   -operator: Td
8   -string: (Potato)
9   -operator: Tj
10   -operator: ET
11   -operator: BI
12   -name: /CS
13   -name: /G
14   -name: /W
15   -integer: 1
16   -name: /H
17   -integer: 1
18   -name: /BPC
19   -integer: 8
20   -name: /F
21   -name: /Fl
22   -name: /DP
23   -dictionary: << /Columns 1 /Predictor 15 >>
24   -operator: ID
  1 +content size: 139
  2 +operator, offset=0, length=2: BT
  3 +name, offset=5, length=3: /F1
  4 +integer, offset=9, length=2: 24
  5 +operator, offset=12, length=2: Tf
  6 +integer, offset=17, length=2: 72
  7 +integer, offset=20, length=3: 720
  8 +operator, offset=24, length=2: Td
  9 +string, offset=29, length=8: (Potato)
  10 +operator, offset=38, length=2: Tj
  11 +operator, offset=41, length=2: ET
  12 +operator, offset=66, length=2: BI
  13 +name, offset=69, length=3: /CS
  14 +name, offset=73, length=2: /G
  15 +name, offset=75, length=2: /W
  16 +integer, offset=78, length=1: 1
  17 +name, offset=79, length=2: /H
  18 +integer, offset=82, length=1: 1
  19 +name, offset=83, length=4: /BPC
  20 +integer, offset=88, length=1: 8
  21 +name, offset=89, length=2: /F
  22 +name, offset=91, length=3: /Fl
  23 +name, offset=94, length=3: /DP
  24 +dictionary, offset=97, length=27: << /Columns 1 /Predictor 15 >>
  25 +operator, offset=125, length=2: ID
25 26 WARNING: page object 3 0 stream 4 0 (stream data, offset 139): EOF found while reading inline image
26 27 -EOF-
27 28 test 37 done
... ...
qpdf/qtest/qpdf/terminate-parsing.out
1   -name: /potato
  1 +content size: 44
  2 +name, offset=0, length=7: /potato
2 3 test suite: terminating parsing
3   -real: 0.1
4   -integer: 0
5   -integer: 0
6   -real: 0.1
7   -integer: 0
8   -integer: 0
9   -operator: cm
10   -operator: q
11   -integer: 0
12   -real: 1.1999
13   -real: -1.1999
14   -integer: 0
15   -real: 121.19
16   -real: 150.009
17   -operator: cm
18   -operator: BI
19   -name: /CS
20   -name: /G
21   -name: /W
22   -integer: 1
23   -name: /H
24   -integer: 1
25   -name: /BPC
26   -integer: 8
27   -name: /F
28   -name: /Fl
29   -name: /DP
30   -dictionary: << /Columns 1 /Predictor 15 >>
31   -operator: ID
32   -inline-image: 789c63fc0f00010301010a
33   -operator: EI
34   -operator: Q
35   -operator: q
36   -integer: 0
37   -real: 35.997
38   -real: -128.389
39   -integer: 0
40   -real: 431.964
41   -real: 7269.02
42   -operator: cm
43   -operator: BI
44   -name: /CS
45   -name: /G
46   -name: /W
47   -integer: 30
48   -name: /H
49   -integer: 107
50   -name: /BPC
51   -integer: 8
52   -name: /F
53   -name: /Fl
54   -name: /DP
55   -dictionary: << /Columns 30 /Predictor 15 >>
56   -operator: ID
57   -inline-image: 789cedd1a11100300800b1b2ffd06503148283bc8dfcf8af2a306ee352eff2e06318638c31c63b3801627b620a0a
58   -operator: EI
59   -operator: Q
60   -operator: q
61   -integer: 0
62   -real: 38.3968
63   -real: -93.5922
64   -integer: 0
65   -real: 431.964
66   -real: 7567.79
67   -operator: cm
68   -operator: BI
69   -name: /CS
70   -name: /G
71   -name: /W
72   -integer: 32
73   -name: /H
74   -integer: 78
75   -name: /BPC
76   -integer: 8
77   -name: /F
78   -name: /Fl
79   -name: /DP
80   -dictionary: << /Columns 32 /Predictor 15 >>
81   -operator: ID
82   -inline-image: 789c63fccf801f308e2a185530aa60882a20203faa605401890a0643aa1e5530aa6054010d140000bdd03c130a
83   -operator: EI
84   -operator: Q
  4 +content size: 454
  5 +real, offset=0, length=3: 0.1
  6 +integer, offset=4, length=1: 0
  7 +integer, offset=6, length=1: 0
  8 +real, offset=8, length=3: 0.1
  9 +integer, offset=12, length=1: 0
  10 +integer, offset=14, length=1: 0
  11 +operator, offset=16, length=2: cm
  12 +operator, offset=19, length=1: q
  13 +integer, offset=21, length=1: 0
  14 +real, offset=23, length=6: 1.1999
  15 +real, offset=30, length=7: -1.1999
  16 +integer, offset=38, length=2: 0
  17 +real, offset=41, length=6: 121.19
  18 +real, offset=48, length=7: 150.009
  19 +operator, offset=56, length=2: cm
  20 +operator, offset=59, length=2: BI
  21 +name, offset=62, length=3: /CS
  22 +name, offset=66, length=2: /G
  23 +name, offset=68, length=2: /W
  24 +integer, offset=71, length=1: 1
  25 +name, offset=72, length=2: /H
  26 +integer, offset=75, length=1: 1
  27 +name, offset=76, length=4: /BPC
  28 +integer, offset=81, length=1: 8
  29 +name, offset=82, length=2: /F
  30 +name, offset=84, length=3: /Fl
  31 +name, offset=87, length=3: /DP
  32 +dictionary, offset=90, length=27: << /Columns 1 /Predictor 15 >>
  33 +operator, offset=118, length=2: ID
  34 +inline-image, offset=121, length=11: 789c63fc0f00010301010a
  35 +operator, offset=132, length=2: EI
  36 +operator, offset=135, length=1: Q
  37 +operator, offset=137, length=1: q
  38 +integer, offset=139, length=1: 0
  39 +real, offset=141, length=6: 35.997
  40 +real, offset=148, length=8: -128.389
  41 +integer, offset=157, length=2: 0
  42 +real, offset=160, length=7: 431.964
  43 +real, offset=168, length=7: 7269.02
  44 +operator, offset=176, length=2: cm
  45 +operator, offset=179, length=2: BI
  46 +name, offset=182, length=3: /CS
  47 +name, offset=186, length=2: /G
  48 +name, offset=188, length=2: /W
  49 +integer, offset=191, length=2: 30
  50 +name, offset=193, length=2: /H
  51 +integer, offset=196, length=3: 107
  52 +name, offset=199, length=4: /BPC
  53 +integer, offset=204, length=1: 8
  54 +name, offset=205, length=2: /F
  55 +name, offset=207, length=3: /Fl
  56 +name, offset=210, length=3: /DP
  57 +dictionary, offset=213, length=28: << /Columns 30 /Predictor 15 >>
  58 +operator, offset=242, length=2: ID
  59 +inline-image, offset=245, length=46: 789cedd1a11100300800b1b2ffd06503148283bc8dfcf8af2a306ee352eff2e06318638c31c63b3801627b620a0a
  60 +operator, offset=291, length=2: EI
  61 +operator, offset=294, length=1: Q
  62 +operator, offset=296, length=1: q
  63 +integer, offset=298, length=1: 0
  64 +real, offset=300, length=7: 38.3968
  65 +real, offset=308, length=8: -93.5922
  66 +integer, offset=317, length=2: 0
  67 +real, offset=320, length=7: 431.964
  68 +real, offset=328, length=7: 7567.79
  69 +operator, offset=336, length=2: cm
  70 +operator, offset=339, length=2: BI
  71 +name, offset=342, length=3: /CS
  72 +name, offset=346, length=2: /G
  73 +name, offset=348, length=2: /W
  74 +integer, offset=351, length=2: 32
  75 +name, offset=353, length=2: /H
  76 +integer, offset=356, length=2: 78
  77 +name, offset=358, length=4: /BPC
  78 +integer, offset=363, length=1: 8
  79 +name, offset=364, length=2: /F
  80 +name, offset=366, length=3: /Fl
  81 +name, offset=369, length=3: /DP
  82 +dictionary, offset=372, length=28: << /Columns 32 /Predictor 15 >>
  83 +operator, offset=401, length=2: ID
  84 +inline-image, offset=404, length=45: 789c63fccf801f308e2a185530aa60882a20203faa605401890a0643aa1e5530aa6054010d140000bdd03c130a
  85 +operator, offset=449, length=2: EI
  86 +operator, offset=452, length=1: Q
85 87 -EOF-
86 88 test 37 done
... ...
qpdf/qtest/qpdf/tokenize-content-streams.out
1   -operator: BT
2   -name: /F1
3   -integer: 24
4   -operator: Tf
5   -integer: 72
6   -integer: 720
7   -operator: Td
8   -string: (Potato)
9   -operator: Tj
10   -operator: ET
  1 +content size: 44
  2 +operator, offset=0, length=2: BT
  3 +name, offset=5, length=3: /F1
  4 +integer, offset=9, length=2: 24
  5 +operator, offset=12, length=2: Tf
  6 +integer, offset=17, length=2: 72
  7 +integer, offset=20, length=3: 720
  8 +operator, offset=24, length=2: Td
  9 +string, offset=29, length=8: (Potato)
  10 +operator, offset=38, length=2: Tj
  11 +operator, offset=41, length=2: ET
11 12 -EOF-
12   -real: 0.1
13   -integer: 0
14   -integer: 0
15   -real: 0.1
16   -integer: 0
17   -integer: 0
18   -operator: cm
19   -operator: q
20   -integer: 0
21   -real: 1.1999
22   -real: -1.1999
23   -integer: 0
24   -real: 121.19
25   -real: 150.009
26   -operator: cm
27   -operator: BI
28   -name: /CS
29   -name: /G
30   -name: /W
31   -integer: 1
32   -name: /H
33   -integer: 1
34   -name: /BPC
35   -integer: 8
36   -name: /F
37   -name: /Fl
38   -name: /DP
39   -dictionary: << /Columns 1 /Predictor 15 >>
40   -operator: ID
41   -inline-image: 789c63fc0f00010301010a
42   -operator: EI
43   -operator: Q
44   -operator: q
45   -integer: 0
46   -real: 35.997
47   -real: -128.389
48   -integer: 0
49   -real: 431.964
50   -real: 7269.02
51   -operator: cm
52   -operator: BI
53   -name: /CS
54   -name: /G
55   -name: /W
56   -integer: 30
57   -name: /H
58   -integer: 107
59   -name: /BPC
60   -integer: 8
61   -name: /F
62   -name: /Fl
63   -name: /DP
64   -dictionary: << /Columns 30 /Predictor 15 >>
65   -operator: ID
66   -inline-image: 789cedd1a11100300800b1b2ffd06503148283bc8dfcf8af2a306ee352eff2e06318638c31c63b3801627b620a0a
67   -operator: EI
68   -operator: Q
69   -operator: q
70   -integer: 0
71   -real: 38.3968
72   -real: -93.5922
73   -integer: 0
74   -real: 431.964
75   -real: 7567.79
76   -operator: cm
77   -operator: BI
78   -name: /CS
79   -name: /G
80   -name: /W
81   -integer: 32
82   -name: /H
83   -integer: 78
84   -name: /BPC
85   -integer: 8
86   -name: /F
87   -name: /Fl
88   -name: /DP
89   -dictionary: << /Columns 32 /Predictor 15 >>
90   -operator: ID
91   -inline-image: 789c63fccf801f308e2a185530aa60882a20203faa605401890a0643aa1e5530aa6054010d140000bdd03c130a
92   -operator: EI
93   -operator: Q
  13 +content size: 490
  14 +real, offset=0, length=3: 0.1
  15 +integer, offset=4, length=1: 0
  16 +integer, offset=6, length=1: 0
  17 +real, offset=8, length=3: 0.1
  18 +integer, offset=12, length=1: 0
  19 +integer, offset=14, length=1: 0
  20 +operator, offset=16, length=2: cm
  21 +operator, offset=19, length=1: q
  22 +integer, offset=21, length=1: 0
  23 +real, offset=23, length=6: 1.1999
  24 +real, offset=30, length=7: -1.1999
  25 +integer, offset=38, length=2: 0
  26 +real, offset=41, length=6: 121.19
  27 +real, offset=48, length=7: 150.009
  28 +operator, offset=56, length=2: cm
  29 +operator, offset=59, length=2: BI
  30 +name, offset=62, length=3: /CS
  31 +name, offset=66, length=2: /G
  32 +name, offset=68, length=2: /W
  33 +integer, offset=71, length=1: 1
  34 +name, offset=72, length=2: /H
  35 +integer, offset=75, length=1: 1
  36 +name, offset=76, length=4: /BPC
  37 +integer, offset=81, length=1: 8
  38 +name, offset=82, length=2: /F
  39 +name, offset=84, length=3: /Fl
  40 +name, offset=87, length=3: /DP
  41 +dictionary, offset=90, length=27: << /Columns 1 /Predictor 15 >>
  42 +operator, offset=118, length=2: ID
  43 +inline-image, offset=121, length=11: 789c63fc0f00010301010a
  44 +operator, offset=132, length=2: EI
  45 +operator, offset=135, length=1: Q
  46 +operator, offset=137, length=1: q
  47 +integer, offset=139, length=1: 0
  48 +real, offset=141, length=6: 35.997
  49 +real, offset=148, length=8: -128.389
  50 +integer, offset=157, length=2: 0
  51 +real, offset=160, length=7: 431.964
  52 +real, offset=168, length=7: 7269.02
  53 +operator, offset=176, length=2: cm
  54 +operator, offset=179, length=2: BI
  55 +name, offset=182, length=3: /CS
  56 +name, offset=186, length=2: /G
  57 +name, offset=188, length=2: /W
  58 +integer, offset=191, length=2: 30
  59 +name, offset=193, length=2: /H
  60 +integer, offset=196, length=3: 107
  61 +name, offset=199, length=4: /BPC
  62 +integer, offset=204, length=1: 8
  63 +name, offset=205, length=2: /F
  64 +name, offset=207, length=3: /Fl
  65 +name, offset=210, length=3: /DP
  66 +dictionary, offset=214, length=28: << /Columns 30 /Predictor 15 >>
  67 +operator, offset=243, length=2: ID
  68 +inline-image, offset=246, length=46: 789cedd1a11100300800b1b2ffd06503148283bc8dfcf8af2a306ee352eff2e06318638c31c63b3801627b620a0a
  69 +operator, offset=292, length=2: EI
  70 +operator, offset=295, length=1: Q
  71 +operator, offset=297, length=1: q
  72 +array, offset=299, length=30: [ 1 /two (three) << /four 5 >> ]
  73 +operator, offset=330, length=1: Q
  74 +operator, offset=332, length=1: q
  75 +integer, offset=334, length=1: 0
  76 +real, offset=336, length=7: 38.3968
  77 +real, offset=344, length=8: -93.5922
  78 +integer, offset=353, length=2: 0
  79 +real, offset=356, length=7: 431.964
  80 +real, offset=364, length=7: 7567.79
  81 +operator, offset=372, length=2: cm
  82 +operator, offset=375, length=2: BI
  83 +name, offset=378, length=3: /CS
  84 +name, offset=382, length=2: /G
  85 +name, offset=384, length=2: /W
  86 +integer, offset=387, length=2: 32
  87 +name, offset=389, length=2: /H
  88 +integer, offset=392, length=2: 78
  89 +name, offset=394, length=4: /BPC
  90 +integer, offset=399, length=1: 8
  91 +name, offset=400, length=2: /F
  92 +name, offset=402, length=3: /Fl
  93 +name, offset=405, length=3: /DP
  94 +dictionary, offset=408, length=28: << /Columns 32 /Predictor 15 >>
  95 +operator, offset=437, length=2: ID
  96 +inline-image, offset=440, length=45: 789c63fccf801f308e2a185530aa60882a20203faa605401890a0643aa1e5530aa6054010d140000bdd03c130a
  97 +operator, offset=485, length=2: EI
  98 +operator, offset=488, length=1: Q
94 99 -EOF-
95 100 test 37 done
... ...
qpdf/qtest/qpdf/tokenize-content-streams.pdf
No preview for this file type
qpdf/test_driver.cc
... ... @@ -76,19 +76,28 @@ class ParserCallbacks: public QPDFObjectHandle::ParserCallbacks
76 76 {
77 77 }
78 78  
79   - virtual void handleObject(QPDFObjectHandle);
  79 + virtual void contentSize(size_t size);
  80 + virtual void handleObject(QPDFObjectHandle, size_t, size_t);
80 81 virtual void handleEOF();
81 82 };
82 83  
83 84 void
84   -ParserCallbacks::handleObject(QPDFObjectHandle obj)
  85 +ParserCallbacks::contentSize(size_t size)
  86 +{
  87 + std::cout << "content size: " << size << std::endl;
  88 +}
  89 +
  90 +void
  91 +ParserCallbacks::handleObject(QPDFObjectHandle obj,
  92 + size_t offset, size_t length)
85 93 {
86 94 if (obj.isName() && (obj.getName() == "/Abort"))
87 95 {
88 96 std::cout << "test suite: terminating parsing" << std::endl;
89 97 terminateParsing();
90 98 }
91   - std::cout << obj.getTypeName() << ": ";
  99 + std::cout << obj.getTypeName() << ", offset=" << offset
  100 + << ", length=" << length << ": ";
92 101 if (obj.isInlineImage())
93 102 {
94 103 // Exercise getTypeCode
... ...