Commit 3f1ab640669ac493f1b2985b70322dba7c037ac9

Authored by Jay Berkenbilt
1 parent 4b2e72c4

Pass offset and length to ParserCallbacks::handleObject

ChangeLog
1 2019-08-22 Jay Berkenbilt <ejb@ql.org> 1 2019-08-22 Jay Berkenbilt <ejb@ql.org>
2 2
  3 + * In QPDFObjectHandle::ParserCallbacks, in addition to
  4 + handleObject(QPDFObjectHandle), allow developers to override
  5 + handleObject(QPDFObjectHandle, size_t offset, size_t length). If
  6 + this method appears instead, it is called with the offset of the
  7 + object in the content stream (which may be concatenated from an
  8 + array of streams) and the length of the object. Intervening
  9 + whitespace and comments are not included in offset and length.
  10 +
  11 + * Add method
  12 + QPDFObjectHandle::ParserCallbacks::contentSize(size_t). If
  13 + defined, it is called by the content stream parser before the
  14 + first call to handleObject, and the argument is the total size in
  15 + bytes of the content streams.
  16 +
3 * Add QPDFObjectHandle::isDirectNull() -- a const method that 17 * Add QPDFObjectHandle::isDirectNull() -- a const method that
4 allows determining whether an object is a literal null without 18 allows determining whether an object is a literal null without
5 attempting to resolve it. 19 attempting to resolve it.
examples/pdf-parse-content.cc
@@ -26,14 +26,23 @@ class ParserCallbacks: public QPDFObjectHandle::ParserCallbacks @@ -26,14 +26,23 @@ class ParserCallbacks: public QPDFObjectHandle::ParserCallbacks
26 { 26 {
27 } 27 }
28 28
29 - virtual void handleObject(QPDFObjectHandle); 29 + virtual void contentSize(size_t);
  30 + virtual void handleObject(QPDFObjectHandle, size_t offset, size_t length);
30 virtual void handleEOF(); 31 virtual void handleEOF();
31 }; 32 };
32 33
33 void 34 void
34 -ParserCallbacks::handleObject(QPDFObjectHandle obj) 35 +ParserCallbacks::contentSize(size_t size)
35 { 36 {
36 - std::cout << obj.getTypeName() << ": "; 37 + std::cout << "content size: " << size << std::endl;
  38 +}
  39 +
  40 +void
  41 +ParserCallbacks::handleObject(QPDFObjectHandle obj,
  42 + size_t offset, size_t length)
  43 +{
  44 + std::cout << obj.getTypeName() << ", offset=" << offset
  45 + << ", length=" << length << ": ";
37 if (obj.isInlineImage()) 46 if (obj.isInlineImage())
38 { 47 {
39 std::cout << QUtil::hex_encode(obj.getInlineImageValue()) << std::endl; 48 std::cout << QUtil::hex_encode(obj.getInlineImageValue()) << std::endl;
examples/qtest/parse-content/content.out
1 -operator: BT  
2 -name: /F1  
3 -integer: 24  
4 -operator: Tf  
5 -integer: 72  
6 -integer: 720  
7 -operator: Td  
8 -string: (Potato)  
9 -operator: Tj  
10 -operator: ET 1 +content size: 44
  2 +operator, offset=0, length=2: BT
  3 +name, offset=5, length=3: /F1
  4 +integer, offset=9, length=2: 24
  5 +operator, offset=12, length=2: Tf
  6 +integer, offset=17, length=2: 72
  7 +integer, offset=20, length=3: 720
  8 +operator, offset=24, length=2: Td
  9 +string, offset=29, length=8: (Potato)
  10 +operator, offset=38, length=2: Tj
  11 +operator, offset=41, length=2: ET
11 -EOF- 12 -EOF-
include/qpdf/QPDFObjectHandle.hh
@@ -159,16 +159,28 @@ class QPDFObjectHandle @@ -159,16 +159,28 @@ class QPDFObjectHandle
159 // This class is used by parsePageContents. Callers must 159 // This class is used by parsePageContents. Callers must
160 // instantiate a subclass of this with handlers defined to accept 160 // instantiate a subclass of this with handlers defined to accept
161 // QPDFObjectHandles that are parsed from the stream. 161 // QPDFObjectHandles that are parsed from the stream.
162 - class ParserCallbacks 162 + class QPDF_DLL_CLASS ParserCallbacks
163 { 163 {
164 public: 164 public:
165 QPDF_DLL 165 QPDF_DLL
166 virtual ~ParserCallbacks() 166 virtual ~ParserCallbacks()
167 { 167 {
168 } 168 }
169 - virtual void handleObject(QPDFObjectHandle) = 0; 169 + // One of the handleObject methods must be overridden.
  170 + QPDF_DLL
  171 + virtual void handleObject(QPDFObjectHandle);
  172 + QPDF_DLL
  173 + virtual void handleObject(
  174 + QPDFObjectHandle, size_t offset, size_t length);
  175 +
170 virtual void handleEOF() = 0; 176 virtual void handleEOF() = 0;
171 177
  178 + // Override this if you want to know the full size of the
  179 + // contents, possibly after concatenation of multiple streams.
  180 + // This is called before the first call to handleObject.
  181 + QPDF_DLL
  182 + virtual void contentSize(size_t);
  183 +
172 protected: 184 protected:
173 // Implementors may call this method during parsing to 185 // Implementors may call this method during parsing to
174 // terminate parsing early. This method throws an exception 186 // terminate parsing early. This method throws an exception
libqpdf/QPDFObjectHandle.cc
@@ -106,6 +106,29 @@ QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const&amp; token) @@ -106,6 +106,29 @@ QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const&amp; token)
106 } 106 }
107 107
108 void 108 void
  109 +QPDFObjectHandle::ParserCallbacks::handleObject(QPDFObjectHandle)
  110 +{
  111 + throw std::logic_error("You must override one of the"
  112 + " handleObject methods in ParserCallbacks");
  113 +}
  114 +
  115 +void
  116 +QPDFObjectHandle::ParserCallbacks::handleObject(
  117 + QPDFObjectHandle oh, size_t, size_t)
  118 +{
  119 + // This version of handleObject was added in qpdf 9. If the
  120 + // developer did not override it, fall back to the older
  121 + // interface.
  122 + handleObject(oh);
  123 +}
  124 +
  125 +void
  126 +QPDFObjectHandle::ParserCallbacks::contentSize(size_t)
  127 +{
  128 + // Ignore by default; overriding this is optional.
  129 +}
  130 +
  131 +void
109 QPDFObjectHandle::ParserCallbacks::terminateParsing() 132 QPDFObjectHandle::ParserCallbacks::terminateParsing()
110 { 133 {
111 throw TerminateParsing(); 134 throw TerminateParsing();
@@ -1615,6 +1638,7 @@ QPDFObjectHandle::parseContentStream_internal( @@ -1615,6 +1638,7 @@ QPDFObjectHandle::parseContentStream_internal(
1615 std::string all_description; 1638 std::string all_description;
1616 pipeContentStreams(&buf, description, all_description); 1639 pipeContentStreams(&buf, description, all_description);
1617 PointerHolder<Buffer> stream_data = buf.getBuffer(); 1640 PointerHolder<Buffer> stream_data = buf.getBuffer();
  1641 + callbacks->contentSize(stream_data->getSize());
1618 try 1642 try
1619 { 1643 {
1620 parseContentStream_data(stream_data, all_description, 1644 parseContentStream_data(stream_data, all_description,
@@ -1642,6 +1666,13 @@ QPDFObjectHandle::parseContentStream_data( @@ -1642,6 +1666,13 @@ QPDFObjectHandle::parseContentStream_data(
1642 bool empty = false; 1666 bool empty = false;
1643 while (QIntC::to_size(input->tell()) < length) 1667 while (QIntC::to_size(input->tell()) < length)
1644 { 1668 {
  1669 + // Read a token and seek to the beginning. The offset we get
  1670 + // from this process is the beginning of the next
  1671 + // non-ignorable (space, comment) token. This way, the offset
  1672 + // and don't including ignorable content.
  1673 + tokenizer.readToken(input, "content", true);
  1674 + qpdf_offset_t offset = input->getLastOffset();
  1675 + input->seek(offset, SEEK_SET);
1645 QPDFObjectHandle obj = 1676 QPDFObjectHandle obj =
1646 parseInternal(input, "content", tokenizer, 1677 parseInternal(input, "content", tokenizer,
1647 empty, 0, context, true); 1678 empty, 0, context, true);
@@ -1650,8 +1681,9 @@ QPDFObjectHandle::parseContentStream_data( @@ -1650,8 +1681,9 @@ QPDFObjectHandle::parseContentStream_data(
1650 // EOF 1681 // EOF
1651 break; 1682 break;
1652 } 1683 }
  1684 + size_t length = QIntC::to_size(input->tell() - offset);
1653 1685
1654 - callbacks->handleObject(obj); 1686 + callbacks->handleObject(obj, QIntC::to_size(offset), length);
1655 if (obj.isOperator() && (obj.getOperatorValue() == "ID")) 1687 if (obj.isOperator() && (obj.getOperatorValue() == "ID"))
1656 { 1688 {
1657 // Discard next character; it is the space after ID that 1689 // Discard next character; it is the space after ID that
@@ -1661,6 +1693,8 @@ QPDFObjectHandle::parseContentStream_data( @@ -1661,6 +1693,8 @@ QPDFObjectHandle::parseContentStream_data(
1661 tokenizer.expectInlineImage(input); 1693 tokenizer.expectInlineImage(input);
1662 QPDFTokenizer::Token t = 1694 QPDFTokenizer::Token t =
1663 tokenizer.readToken(input, description, true); 1695 tokenizer.readToken(input, description, true);
  1696 + offset = input->getLastOffset();
  1697 + length = QIntC::to_size(input->tell() - offset);
1664 if (t.getType() == QPDFTokenizer::tt_bad) 1698 if (t.getType() == QPDFTokenizer::tt_bad)
1665 { 1699 {
1666 QTC::TC("qpdf", "QPDFObjectHandle EOF in inline image"); 1700 QTC::TC("qpdf", "QPDFObjectHandle EOF in inline image");
@@ -1674,7 +1708,8 @@ QPDFObjectHandle::parseContentStream_data( @@ -1674,7 +1708,8 @@ QPDFObjectHandle::parseContentStream_data(
1674 std::string inline_image = t.getValue(); 1708 std::string inline_image = t.getValue();
1675 QTC::TC("qpdf", "QPDFObjectHandle inline image token"); 1709 QTC::TC("qpdf", "QPDFObjectHandle inline image token");
1676 callbacks->handleObject( 1710 callbacks->handleObject(
1677 - QPDFObjectHandle::newInlineImage(inline_image)); 1711 + QPDFObjectHandle::newInlineImage(inline_image),
  1712 + QIntC::to_size(offset), length);
1678 } 1713 }
1679 } 1714 }
1680 } 1715 }
manual/qpdf-manual.xml
@@ -4491,6 +4491,25 @@ print &quot;\n&quot;; @@ -4491,6 +4491,25 @@ print &quot;\n&quot;;
4491 </listitem> 4491 </listitem>
4492 <listitem> 4492 <listitem>
4493 <para> 4493 <para>
  4494 + When parsing content streams with
  4495 + <classname>QPDFObjectHandle::ParserCallbacks</classname>, in
  4496 + place of the method
  4497 + <function>handleObject(QPDFObjectHandle)</function>, the
  4498 + developer may override
  4499 + <function>handleObject(QPDFObjectHandle, size_t offset,
  4500 + size_t length)</function>. If this method is defined, it
  4501 + will be invoked with the object along with its offset and
  4502 + length within the overall contents being parsed. Intervening
  4503 + spaces and comments are not included in offset and length.
  4504 + Additionally, a new method
  4505 + <function>contentSize(size_t)</function> may be implemented.
  4506 + If present, it will be called prior to the first call to
  4507 + <function>handleObject</function> with the total size in
  4508 + bytes of the combined contents.
  4509 + </para>
  4510 + </listitem>
  4511 + <listitem>
  4512 + <para>
4494 The underlying implementation of QPDF arrays has been 4513 The underlying implementation of QPDF arrays has been
4495 enhanced to be much more memory efficient when dealing with 4514 enhanced to be much more memory efficient when dealing with
4496 arrays with lots of nulls. This enables qpdf to use 4515 arrays with lots of nulls. This enables qpdf to use
qpdf/qtest/qpdf/eof-in-inline-image.out
1 -operator: BT  
2 -name: /F1  
3 -integer: 24  
4 -operator: Tf  
5 -integer: 72  
6 -integer: 720  
7 -operator: Td  
8 -string: (Potato)  
9 -operator: Tj  
10 -operator: ET  
11 -operator: BI  
12 -name: /CS  
13 -name: /G  
14 -name: /W  
15 -integer: 1  
16 -name: /H  
17 -integer: 1  
18 -name: /BPC  
19 -integer: 8  
20 -name: /F  
21 -name: /Fl  
22 -name: /DP  
23 -dictionary: << /Columns 1 /Predictor 15 >>  
24 -operator: ID 1 +content size: 139
  2 +operator, offset=0, length=2: BT
  3 +name, offset=5, length=3: /F1
  4 +integer, offset=9, length=2: 24
  5 +operator, offset=12, length=2: Tf
  6 +integer, offset=17, length=2: 72
  7 +integer, offset=20, length=3: 720
  8 +operator, offset=24, length=2: Td
  9 +string, offset=29, length=8: (Potato)
  10 +operator, offset=38, length=2: Tj
  11 +operator, offset=41, length=2: ET
  12 +operator, offset=66, length=2: BI
  13 +name, offset=69, length=3: /CS
  14 +name, offset=73, length=2: /G
  15 +name, offset=75, length=2: /W
  16 +integer, offset=78, length=1: 1
  17 +name, offset=79, length=2: /H
  18 +integer, offset=82, length=1: 1
  19 +name, offset=83, length=4: /BPC
  20 +integer, offset=88, length=1: 8
  21 +name, offset=89, length=2: /F
  22 +name, offset=91, length=3: /Fl
  23 +name, offset=94, length=3: /DP
  24 +dictionary, offset=97, length=27: << /Columns 1 /Predictor 15 >>
  25 +operator, offset=125, length=2: ID
25 WARNING: page object 3 0 stream 4 0 (stream data, offset 139): EOF found while reading inline image 26 WARNING: page object 3 0 stream 4 0 (stream data, offset 139): EOF found while reading inline image
26 -EOF- 27 -EOF-
27 test 37 done 28 test 37 done
qpdf/qtest/qpdf/terminate-parsing.out
1 -name: /potato 1 +content size: 44
  2 +name, offset=0, length=7: /potato
2 test suite: terminating parsing 3 test suite: terminating parsing
3 -real: 0.1  
4 -integer: 0  
5 -integer: 0  
6 -real: 0.1  
7 -integer: 0  
8 -integer: 0  
9 -operator: cm  
10 -operator: q  
11 -integer: 0  
12 -real: 1.1999  
13 -real: -1.1999  
14 -integer: 0  
15 -real: 121.19  
16 -real: 150.009  
17 -operator: cm  
18 -operator: BI  
19 -name: /CS  
20 -name: /G  
21 -name: /W  
22 -integer: 1  
23 -name: /H  
24 -integer: 1  
25 -name: /BPC  
26 -integer: 8  
27 -name: /F  
28 -name: /Fl  
29 -name: /DP  
30 -dictionary: << /Columns 1 /Predictor 15 >>  
31 -operator: ID  
32 -inline-image: 789c63fc0f00010301010a  
33 -operator: EI  
34 -operator: Q  
35 -operator: q  
36 -integer: 0  
37 -real: 35.997  
38 -real: -128.389  
39 -integer: 0  
40 -real: 431.964  
41 -real: 7269.02  
42 -operator: cm  
43 -operator: BI  
44 -name: /CS  
45 -name: /G  
46 -name: /W  
47 -integer: 30  
48 -name: /H  
49 -integer: 107  
50 -name: /BPC  
51 -integer: 8  
52 -name: /F  
53 -name: /Fl  
54 -name: /DP  
55 -dictionary: << /Columns 30 /Predictor 15 >>  
56 -operator: ID  
57 -inline-image: 789cedd1a11100300800b1b2ffd06503148283bc8dfcf8af2a306ee352eff2e06318638c31c63b3801627b620a0a  
58 -operator: EI  
59 -operator: Q  
60 -operator: q  
61 -integer: 0  
62 -real: 38.3968  
63 -real: -93.5922  
64 -integer: 0  
65 -real: 431.964  
66 -real: 7567.79  
67 -operator: cm  
68 -operator: BI  
69 -name: /CS  
70 -name: /G  
71 -name: /W  
72 -integer: 32  
73 -name: /H  
74 -integer: 78  
75 -name: /BPC  
76 -integer: 8  
77 -name: /F  
78 -name: /Fl  
79 -name: /DP  
80 -dictionary: << /Columns 32 /Predictor 15 >>  
81 -operator: ID  
82 -inline-image: 789c63fccf801f308e2a185530aa60882a20203faa605401890a0643aa1e5530aa6054010d140000bdd03c130a  
83 -operator: EI  
84 -operator: Q 4 +content size: 454
  5 +real, offset=0, length=3: 0.1
  6 +integer, offset=4, length=1: 0
  7 +integer, offset=6, length=1: 0
  8 +real, offset=8, length=3: 0.1
  9 +integer, offset=12, length=1: 0
  10 +integer, offset=14, length=1: 0
  11 +operator, offset=16, length=2: cm
  12 +operator, offset=19, length=1: q
  13 +integer, offset=21, length=1: 0
  14 +real, offset=23, length=6: 1.1999
  15 +real, offset=30, length=7: -1.1999
  16 +integer, offset=38, length=2: 0
  17 +real, offset=41, length=6: 121.19
  18 +real, offset=48, length=7: 150.009
  19 +operator, offset=56, length=2: cm
  20 +operator, offset=59, length=2: BI
  21 +name, offset=62, length=3: /CS
  22 +name, offset=66, length=2: /G
  23 +name, offset=68, length=2: /W
  24 +integer, offset=71, length=1: 1
  25 +name, offset=72, length=2: /H
  26 +integer, offset=75, length=1: 1
  27 +name, offset=76, length=4: /BPC
  28 +integer, offset=81, length=1: 8
  29 +name, offset=82, length=2: /F
  30 +name, offset=84, length=3: /Fl
  31 +name, offset=87, length=3: /DP
  32 +dictionary, offset=90, length=27: << /Columns 1 /Predictor 15 >>
  33 +operator, offset=118, length=2: ID
  34 +inline-image, offset=121, length=11: 789c63fc0f00010301010a
  35 +operator, offset=132, length=2: EI
  36 +operator, offset=135, length=1: Q
  37 +operator, offset=137, length=1: q
  38 +integer, offset=139, length=1: 0
  39 +real, offset=141, length=6: 35.997
  40 +real, offset=148, length=8: -128.389
  41 +integer, offset=157, length=2: 0
  42 +real, offset=160, length=7: 431.964
  43 +real, offset=168, length=7: 7269.02
  44 +operator, offset=176, length=2: cm
  45 +operator, offset=179, length=2: BI
  46 +name, offset=182, length=3: /CS
  47 +name, offset=186, length=2: /G
  48 +name, offset=188, length=2: /W
  49 +integer, offset=191, length=2: 30
  50 +name, offset=193, length=2: /H
  51 +integer, offset=196, length=3: 107
  52 +name, offset=199, length=4: /BPC
  53 +integer, offset=204, length=1: 8
  54 +name, offset=205, length=2: /F
  55 +name, offset=207, length=3: /Fl
  56 +name, offset=210, length=3: /DP
  57 +dictionary, offset=213, length=28: << /Columns 30 /Predictor 15 >>
  58 +operator, offset=242, length=2: ID
  59 +inline-image, offset=245, length=46: 789cedd1a11100300800b1b2ffd06503148283bc8dfcf8af2a306ee352eff2e06318638c31c63b3801627b620a0a
  60 +operator, offset=291, length=2: EI
  61 +operator, offset=294, length=1: Q
  62 +operator, offset=296, length=1: q
  63 +integer, offset=298, length=1: 0
  64 +real, offset=300, length=7: 38.3968
  65 +real, offset=308, length=8: -93.5922
  66 +integer, offset=317, length=2: 0
  67 +real, offset=320, length=7: 431.964
  68 +real, offset=328, length=7: 7567.79
  69 +operator, offset=336, length=2: cm
  70 +operator, offset=339, length=2: BI
  71 +name, offset=342, length=3: /CS
  72 +name, offset=346, length=2: /G
  73 +name, offset=348, length=2: /W
  74 +integer, offset=351, length=2: 32
  75 +name, offset=353, length=2: /H
  76 +integer, offset=356, length=2: 78
  77 +name, offset=358, length=4: /BPC
  78 +integer, offset=363, length=1: 8
  79 +name, offset=364, length=2: /F
  80 +name, offset=366, length=3: /Fl
  81 +name, offset=369, length=3: /DP
  82 +dictionary, offset=372, length=28: << /Columns 32 /Predictor 15 >>
  83 +operator, offset=401, length=2: ID
  84 +inline-image, offset=404, length=45: 789c63fccf801f308e2a185530aa60882a20203faa605401890a0643aa1e5530aa6054010d140000bdd03c130a
  85 +operator, offset=449, length=2: EI
  86 +operator, offset=452, length=1: Q
85 -EOF- 87 -EOF-
86 test 37 done 88 test 37 done
qpdf/qtest/qpdf/tokenize-content-streams.out
1 -operator: BT  
2 -name: /F1  
3 -integer: 24  
4 -operator: Tf  
5 -integer: 72  
6 -integer: 720  
7 -operator: Td  
8 -string: (Potato)  
9 -operator: Tj  
10 -operator: ET 1 +content size: 44
  2 +operator, offset=0, length=2: BT
  3 +name, offset=5, length=3: /F1
  4 +integer, offset=9, length=2: 24
  5 +operator, offset=12, length=2: Tf
  6 +integer, offset=17, length=2: 72
  7 +integer, offset=20, length=3: 720
  8 +operator, offset=24, length=2: Td
  9 +string, offset=29, length=8: (Potato)
  10 +operator, offset=38, length=2: Tj
  11 +operator, offset=41, length=2: ET
11 -EOF- 12 -EOF-
12 -real: 0.1  
13 -integer: 0  
14 -integer: 0  
15 -real: 0.1  
16 -integer: 0  
17 -integer: 0  
18 -operator: cm  
19 -operator: q  
20 -integer: 0  
21 -real: 1.1999  
22 -real: -1.1999  
23 -integer: 0  
24 -real: 121.19  
25 -real: 150.009  
26 -operator: cm  
27 -operator: BI  
28 -name: /CS  
29 -name: /G  
30 -name: /W  
31 -integer: 1  
32 -name: /H  
33 -integer: 1  
34 -name: /BPC  
35 -integer: 8  
36 -name: /F  
37 -name: /Fl  
38 -name: /DP  
39 -dictionary: << /Columns 1 /Predictor 15 >>  
40 -operator: ID  
41 -inline-image: 789c63fc0f00010301010a  
42 -operator: EI  
43 -operator: Q  
44 -operator: q  
45 -integer: 0  
46 -real: 35.997  
47 -real: -128.389  
48 -integer: 0  
49 -real: 431.964  
50 -real: 7269.02  
51 -operator: cm  
52 -operator: BI  
53 -name: /CS  
54 -name: /G  
55 -name: /W  
56 -integer: 30  
57 -name: /H  
58 -integer: 107  
59 -name: /BPC  
60 -integer: 8  
61 -name: /F  
62 -name: /Fl  
63 -name: /DP  
64 -dictionary: << /Columns 30 /Predictor 15 >>  
65 -operator: ID  
66 -inline-image: 789cedd1a11100300800b1b2ffd06503148283bc8dfcf8af2a306ee352eff2e06318638c31c63b3801627b620a0a  
67 -operator: EI  
68 -operator: Q  
69 -operator: q  
70 -integer: 0  
71 -real: 38.3968  
72 -real: -93.5922  
73 -integer: 0  
74 -real: 431.964  
75 -real: 7567.79  
76 -operator: cm  
77 -operator: BI  
78 -name: /CS  
79 -name: /G  
80 -name: /W  
81 -integer: 32  
82 -name: /H  
83 -integer: 78  
84 -name: /BPC  
85 -integer: 8  
86 -name: /F  
87 -name: /Fl  
88 -name: /DP  
89 -dictionary: << /Columns 32 /Predictor 15 >>  
90 -operator: ID  
91 -inline-image: 789c63fccf801f308e2a185530aa60882a20203faa605401890a0643aa1e5530aa6054010d140000bdd03c130a  
92 -operator: EI  
93 -operator: Q 13 +content size: 490
  14 +real, offset=0, length=3: 0.1
  15 +integer, offset=4, length=1: 0
  16 +integer, offset=6, length=1: 0
  17 +real, offset=8, length=3: 0.1
  18 +integer, offset=12, length=1: 0
  19 +integer, offset=14, length=1: 0
  20 +operator, offset=16, length=2: cm
  21 +operator, offset=19, length=1: q
  22 +integer, offset=21, length=1: 0
  23 +real, offset=23, length=6: 1.1999
  24 +real, offset=30, length=7: -1.1999
  25 +integer, offset=38, length=2: 0
  26 +real, offset=41, length=6: 121.19
  27 +real, offset=48, length=7: 150.009
  28 +operator, offset=56, length=2: cm
  29 +operator, offset=59, length=2: BI
  30 +name, offset=62, length=3: /CS
  31 +name, offset=66, length=2: /G
  32 +name, offset=68, length=2: /W
  33 +integer, offset=71, length=1: 1
  34 +name, offset=72, length=2: /H
  35 +integer, offset=75, length=1: 1
  36 +name, offset=76, length=4: /BPC
  37 +integer, offset=81, length=1: 8
  38 +name, offset=82, length=2: /F
  39 +name, offset=84, length=3: /Fl
  40 +name, offset=87, length=3: /DP
  41 +dictionary, offset=90, length=27: << /Columns 1 /Predictor 15 >>
  42 +operator, offset=118, length=2: ID
  43 +inline-image, offset=121, length=11: 789c63fc0f00010301010a
  44 +operator, offset=132, length=2: EI
  45 +operator, offset=135, length=1: Q
  46 +operator, offset=137, length=1: q
  47 +integer, offset=139, length=1: 0
  48 +real, offset=141, length=6: 35.997
  49 +real, offset=148, length=8: -128.389
  50 +integer, offset=157, length=2: 0
  51 +real, offset=160, length=7: 431.964
  52 +real, offset=168, length=7: 7269.02
  53 +operator, offset=176, length=2: cm
  54 +operator, offset=179, length=2: BI
  55 +name, offset=182, length=3: /CS
  56 +name, offset=186, length=2: /G
  57 +name, offset=188, length=2: /W
  58 +integer, offset=191, length=2: 30
  59 +name, offset=193, length=2: /H
  60 +integer, offset=196, length=3: 107
  61 +name, offset=199, length=4: /BPC
  62 +integer, offset=204, length=1: 8
  63 +name, offset=205, length=2: /F
  64 +name, offset=207, length=3: /Fl
  65 +name, offset=210, length=3: /DP
  66 +dictionary, offset=214, length=28: << /Columns 30 /Predictor 15 >>
  67 +operator, offset=243, length=2: ID
  68 +inline-image, offset=246, length=46: 789cedd1a11100300800b1b2ffd06503148283bc8dfcf8af2a306ee352eff2e06318638c31c63b3801627b620a0a
  69 +operator, offset=292, length=2: EI
  70 +operator, offset=295, length=1: Q
  71 +operator, offset=297, length=1: q
  72 +array, offset=299, length=30: [ 1 /two (three) << /four 5 >> ]
  73 +operator, offset=330, length=1: Q
  74 +operator, offset=332, length=1: q
  75 +integer, offset=334, length=1: 0
  76 +real, offset=336, length=7: 38.3968
  77 +real, offset=344, length=8: -93.5922
  78 +integer, offset=353, length=2: 0
  79 +real, offset=356, length=7: 431.964
  80 +real, offset=364, length=7: 7567.79
  81 +operator, offset=372, length=2: cm
  82 +operator, offset=375, length=2: BI
  83 +name, offset=378, length=3: /CS
  84 +name, offset=382, length=2: /G
  85 +name, offset=384, length=2: /W
  86 +integer, offset=387, length=2: 32
  87 +name, offset=389, length=2: /H
  88 +integer, offset=392, length=2: 78
  89 +name, offset=394, length=4: /BPC
  90 +integer, offset=399, length=1: 8
  91 +name, offset=400, length=2: /F
  92 +name, offset=402, length=3: /Fl
  93 +name, offset=405, length=3: /DP
  94 +dictionary, offset=408, length=28: << /Columns 32 /Predictor 15 >>
  95 +operator, offset=437, length=2: ID
  96 +inline-image, offset=440, length=45: 789c63fccf801f308e2a185530aa60882a20203faa605401890a0643aa1e5530aa6054010d140000bdd03c130a
  97 +operator, offset=485, length=2: EI
  98 +operator, offset=488, length=1: Q
94 -EOF- 99 -EOF-
95 test 37 done 100 test 37 done
qpdf/qtest/qpdf/tokenize-content-streams.pdf
No preview for this file type
qpdf/test_driver.cc
@@ -76,19 +76,28 @@ class ParserCallbacks: public QPDFObjectHandle::ParserCallbacks @@ -76,19 +76,28 @@ class ParserCallbacks: public QPDFObjectHandle::ParserCallbacks
76 { 76 {
77 } 77 }
78 78
79 - virtual void handleObject(QPDFObjectHandle); 79 + virtual void contentSize(size_t size);
  80 + virtual void handleObject(QPDFObjectHandle, size_t, size_t);
80 virtual void handleEOF(); 81 virtual void handleEOF();
81 }; 82 };
82 83
83 void 84 void
84 -ParserCallbacks::handleObject(QPDFObjectHandle obj) 85 +ParserCallbacks::contentSize(size_t size)
  86 +{
  87 + std::cout << "content size: " << size << std::endl;
  88 +}
  89 +
  90 +void
  91 +ParserCallbacks::handleObject(QPDFObjectHandle obj,
  92 + size_t offset, size_t length)
85 { 93 {
86 if (obj.isName() && (obj.getName() == "/Abort")) 94 if (obj.isName() && (obj.getName() == "/Abort"))
87 { 95 {
88 std::cout << "test suite: terminating parsing" << std::endl; 96 std::cout << "test suite: terminating parsing" << std::endl;
89 terminateParsing(); 97 terminateParsing();
90 } 98 }
91 - std::cout << obj.getTypeName() << ": "; 99 + std::cout << obj.getTypeName() << ", offset=" << offset
  100 + << ", length=" << length << ": ";
92 if (obj.isInlineImage()) 101 if (obj.isInlineImage())
93 { 102 {
94 // Exercise getTypeCode 103 // Exercise getTypeCode