Commit fefe25030eaffdaf06a9e957b3255304682c71cf

Authored by Jay Berkenbilt
1 parent 2699ecf1

Inline image token type

ChangeLog
1 2018-02-04 Jay Berkenbilt <ejb@ql.org> 1 2018-02-04 Jay Berkenbilt <ejb@ql.org>
2 2
3 * Significant lexer (tokenizer) enhancements. These are changes to 3 * Significant lexer (tokenizer) enhancements. These are changes to
4 - the QPDFTokenizer class. These changes are of concern only to  
5 - people who are operating with PDF files at the lexical layer  
6 - using qpdf. They have little or no impact on most high-level  
7 - interfaces or the command-line tool.  
8 - * New token types tt_space and tt_comment to recognize  
9 - whitespace and comments. this makes it possible to tokenize a  
10 - PDF file or stream and preserve everything about it.  
11 - * For backward compatibility, space and comment tokens are not  
12 - returned by the tokenizer unless  
13 - QPDFTokenizer.includeIgnorable() is called.  
14 - * Better handling of null bytes. These are now included in space  
15 - tokens rather than being their own "tt_word" tokens. This  
16 - should have no impact on any correct PDF file and has no  
17 - impact on output, but it may change offsets in some error  
18 - messages when trying to parse contents of bad files. Under  
19 - default operation, qpdf does not attempt to parse content  
20 - streams, so this change is mostly invisible.  
21 - * Bug fix to handling of bad tokens at ends of streams. Now,  
22 - when allowEOF() has been called, these are treated as bad tokens  
23 - (tt_bad or an exception, depending on invocation), and a  
24 - separate tt_eof token is returned. Before the bad token  
25 - contents were returned as the value of a tt_eof token. tt_eof  
26 - tokens are always empty now.  
27 - * Fix a bug that would, on rare occasions, report the offset in an  
28 - error message in the wrong space because of spaces or comments  
29 - adjacent to a bad token.  
30 - * Clarify in comments exactly where the input source is  
31 - positioned surrounding calls to readToken and getToken. 4 + the QPDFTokenizer class. These changes are of concern only to
  5 + people who are operating with PDF files at the lexical layer using
  6 + qpdf. They have little or no impact on most high-level interfaces
  7 + or the command-line tool.
  8 +
  9 + New token types tt_space and tt_comment to recognize whitespace
  10 + and comments. this makes it possible to tokenize a PDF file or
  11 + stream and preserve everything about it.
  12 +
  13 + For backward compatibility, space and comment tokens are not
  14 + returned by the tokenizer unless QPDFTokenizer.includeIgnorable()
  15 + is called.
  16 +
  17 + Better handling of null bytes. These are now included in space
  18 + tokens rather than being their own "tt_word" tokens. This should
  19 + have no impact on any correct PDF file and has no impact on
  20 + output, but it may change offsets in some error messages when
  21 + trying to parse contents of bad files. Under default operation,
  22 + qpdf does not attempt to parse content streams, so this change is
  23 + mostly invisible.
  24 +
  25 + Bug fix to handling of bad tokens at ends of streams. Now, when
  26 + allowEOF() has been called, these are treated as bad tokens
  27 + (tt_bad or an exception, depending on invocation), and a
  28 + separate tt_eof token is returned. Before the bad token
  29 + contents were returned as the value of a tt_eof token. tt_eof
  30 + tokens are always empty now.
  31 +
  32 + Fix a bug that would, on rare occasions, report the offset in an
  33 + error message in the wrong space because of spaces or comments
  34 + adjacent to a bad token.
  35 +
  36 + Clarify in comments exactly where the input source is positioned
  37 + surrounding calls to readToken and getToken.
  38 +
  39 + * Add a new token type for inline images. This token type is only
  40 + returned by QPDFTokenizer immediately following a call to
  41 + expectInlineImage(). This change includes internal refactoring of
  42 + a handful of places that all separately handled inline images, The
  43 + logic of detecting inline images in content streams is now handled
  44 + in one place in the code. Also we are more flexible about what
  45 + characters may surround the EI operator that marks the end of an
  46 + inline image.
32 47
33 2018-02-04 Jay Berkenbilt <ejb@ql.org> 48 2018-02-04 Jay Berkenbilt <ejb@ql.org>
34 49
include/qpdf/QPDFTokenizer.hh
@@ -34,7 +34,8 @@ class QPDFTokenizer @@ -34,7 +34,8 @@ class QPDFTokenizer
34 public: 34 public:
35 // Token type tt_eof is only returned of allowEOF() is called on 35 // Token type tt_eof is only returned of allowEOF() is called on
36 // the tokenizer. tt_eof was introduced in QPDF version 4.1. 36 // the tokenizer. tt_eof was introduced in QPDF version 4.1.
37 - // tt_space and tt_comment were added in QPDF version 8. 37 + // tt_space, tt_comment, and tt_inline_image were added in QPDF
  38 + // version 8.
38 enum token_type_e 39 enum token_type_e
39 { 40 {
40 tt_bad, 41 tt_bad,
@@ -54,6 +55,7 @@ class QPDFTokenizer @@ -54,6 +55,7 @@ class QPDFTokenizer
54 tt_eof, 55 tt_eof,
55 tt_space, 56 tt_space,
56 tt_comment, 57 tt_comment,
  58 + tt_inline_image,
57 }; 59 };
58 60
59 class Token 61 class Token
@@ -128,11 +130,17 @@ class QPDFTokenizer @@ -128,11 +130,17 @@ class QPDFTokenizer
128 QPDF_DLL 130 QPDF_DLL
129 void includeIgnorable(); 131 void includeIgnorable();
130 132
131 - // Mode of operation: 133 + // There are two modes of operation: push and pull. The pull
  134 + // method is easier but requires an input source. The push method
  135 + // is more complicated but can be used to tokenize a stream of
  136 + // incoming characters in a pipeline.
132 137
133 - // Keep presenting characters and calling getToken() until  
134 - // getToken() returns true. When it does, be sure to check  
135 - // unread_ch and to unread ch if it is true. 138 + // Push mode:
  139 +
  140 + // Keep presenting characters with presentCharacter() and
  141 + // presentEOF() and calling getToken() until getToken() returns
  142 + // true. When it does, be sure to check unread_ch and to unread ch
  143 + // if it is true.
136 144
137 // It these are called when a token is available, an exception 145 // It these are called when a token is available, an exception
138 // will be thrown. 146 // will be thrown.
@@ -155,15 +163,30 @@ class QPDFTokenizer @@ -155,15 +163,30 @@ class QPDFTokenizer
155 QPDF_DLL 163 QPDF_DLL
156 bool betweenTokens(); 164 bool betweenTokens();
157 165
158 - // Read a token from an input source. Context describes the 166 + // Pull mode:
  167 +
  168 + // Read a token from an input source. Context describes the
159 // context in which the token is being read and is used in the 169 // context in which the token is being read and is used in the
160 - // exception thrown if there is an error. 170 + // exception thrown if there is an error. After a token is read,
  171 + // the position of the input source returned by input->tell()
  172 + // points to just after the token, and the input source's "last
  173 + // offset" as returned by input->getLastOffset() points to the
  174 + // beginning of the token.
161 QPDF_DLL 175 QPDF_DLL
162 Token readToken(PointerHolder<InputSource> input, 176 Token readToken(PointerHolder<InputSource> input,
163 std::string const& context, 177 std::string const& context,
164 bool allow_bad = false, 178 bool allow_bad = false,
165 size_t max_len = 0); 179 size_t max_len = 0);
166 180
  181 + // Calling this method puts the tokenizer in a state for reading
  182 + // inline images. In that state, it will return all data up to and
  183 + // including the next EI token. After you call this method, the
  184 + // next call to readToken (or the token created next time getToken
  185 + // returns true) will either be tt_inline_image or tt_bad. This is
  186 + // the only way readToken returns a tt_inline_image token.
  187 + QPDF_DLL
  188 + void expectInlineImage();
  189 +
167 private: 190 private:
168 // Do not implement copy or assignment 191 // Do not implement copy or assignment
169 QPDFTokenizer(QPDFTokenizer const&); 192 QPDFTokenizer(QPDFTokenizer const&);
@@ -171,10 +194,11 @@ class QPDFTokenizer @@ -171,10 +194,11 @@ class QPDFTokenizer
171 194
172 void resolveLiteral(); 195 void resolveLiteral();
173 bool isSpace(char); 196 bool isSpace(char);
  197 + bool isDelimiter(char);
174 198
175 enum state_e { 199 enum state_e {
176 st_top, st_in_space, st_in_comment, st_in_string, st_lt, st_gt, 200 st_top, st_in_space, st_in_comment, st_in_string, st_lt, st_gt,
177 - st_literal, st_in_hexstring, st_token_ready 201 + st_literal, st_in_hexstring, st_inline_image, st_token_ready
178 }; 202 };
179 203
180 class Members 204 class Members
libqpdf/QPDFTokenizer.cc
@@ -69,6 +69,12 @@ QPDFTokenizer::isSpace(char ch) @@ -69,6 +69,12 @@ QPDFTokenizer::isSpace(char ch)
69 return ((ch == '\0') || QUtil::is_space(ch)); 69 return ((ch == '\0') || QUtil::is_space(ch));
70 } 70 }
71 71
  72 +bool
  73 +QPDFTokenizer::isDelimiter(char ch)
  74 +{
  75 + return (strchr(" \t\n\v\f\r()<>[]{}/%", ch) != 0);
  76 +}
  77 +
72 void 78 void
73 QPDFTokenizer::resolveLiteral() 79 QPDFTokenizer::resolveLiteral()
74 { 80 {
@@ -95,7 +101,7 @@ QPDFTokenizer::resolveLiteral() @@ -95,7 +101,7 @@ QPDFTokenizer::resolveLiteral()
95 if (ch == '\0') 101 if (ch == '\0')
96 { 102 {
97 this->m->type = tt_bad; 103 this->m->type = tt_bad;
98 - QTC::TC("qpdf", "QPDF_Tokenizer null in name"); 104 + QTC::TC("qpdf", "QPDFTokenizer null in name");
99 this->m->error_message = 105 this->m->error_message =
100 "null character not allowed in name token"; 106 "null character not allowed in name token";
101 nval += "#00"; 107 nval += "#00";
@@ -108,7 +114,7 @@ QPDFTokenizer::resolveLiteral() @@ -108,7 +114,7 @@ QPDFTokenizer::resolveLiteral()
108 } 114 }
109 else 115 else
110 { 116 {
111 - QTC::TC("qpdf", "QPDF_Tokenizer bad name"); 117 + QTC::TC("qpdf", "QPDFTokenizer bad name");
112 this->m->type = tt_bad; 118 this->m->type = tt_bad;
113 this->m->error_message = "invalid name token"; 119 this->m->error_message = "invalid name token";
114 nval += *p; 120 nval += *p;
@@ -209,7 +215,7 @@ QPDFTokenizer::presentCharacter(char ch) @@ -209,7 +215,7 @@ QPDFTokenizer::presentCharacter(char ch)
209 if (ch == ')') 215 if (ch == ')')
210 { 216 {
211 this->m->type = tt_bad; 217 this->m->type = tt_bad;
212 - QTC::TC("qpdf", "QPDF_Tokenizer bad )"); 218 + QTC::TC("qpdf", "QPDFTokenizer bad )");
213 this->m->error_message = "unexpected )"; 219 this->m->error_message = "unexpected )";
214 this->m->state = st_token_ready; 220 this->m->state = st_token_ready;
215 } 221 }
@@ -301,7 +307,7 @@ QPDFTokenizer::presentCharacter(char ch) @@ -301,7 +307,7 @@ QPDFTokenizer::presentCharacter(char ch)
301 { 307 {
302 this->m->val = ">"; 308 this->m->val = ">";
303 this->m->type = tt_bad; 309 this->m->type = tt_bad;
304 - QTC::TC("qpdf", "QPDF_Tokenizer bad >"); 310 + QTC::TC("qpdf", "QPDFTokenizer bad >");
305 this->m->error_message = "unexpected >"; 311 this->m->error_message = "unexpected >";
306 this->m->unread_char = true; 312 this->m->unread_char = true;
307 this->m->char_to_unread = ch; 313 this->m->char_to_unread = ch;
@@ -403,7 +409,7 @@ QPDFTokenizer::presentCharacter(char ch) @@ -403,7 +409,7 @@ QPDFTokenizer::presentCharacter(char ch)
403 } 409 }
404 else if (this->m->state == st_literal) 410 else if (this->m->state == st_literal)
405 { 411 {
406 - if (strchr(" \t\n\v\f\r()<>[]{}/%", ch) != 0) 412 + if (isDelimiter(ch))
407 { 413 {
408 // A C-locale whitespace character or delimiter terminates 414 // A C-locale whitespace character or delimiter terminates
409 // token. It is important to unread the whitespace 415 // token. It is important to unread the whitespace
@@ -423,6 +429,25 @@ QPDFTokenizer::presentCharacter(char ch) @@ -423,6 +429,25 @@ QPDFTokenizer::presentCharacter(char ch)
423 this->m->val += ch; 429 this->m->val += ch;
424 } 430 }
425 } 431 }
  432 + else if (this->m->state == st_inline_image)
  433 + {
  434 + size_t len = this->m->val.length();
  435 + if ((len >= 4) &&
  436 + isDelimiter(this->m->val.at(len-4)) &&
  437 + (this->m->val.at(len-3) == 'E') &&
  438 + (this->m->val.at(len-2) == 'I') &&
  439 + isDelimiter(this->m->val.at(len-1)))
  440 + {
  441 + this->m->type = tt_inline_image;
  442 + this->m->unread_char = true;
  443 + this->m->char_to_unread = ch;
  444 + this->m->state = st_token_ready;
  445 + }
  446 + else
  447 + {
  448 + this->m->val += ch;
  449 + }
  450 + }
426 else 451 else
427 { 452 {
428 handled = false; 453 handled = false;
@@ -468,7 +493,7 @@ QPDFTokenizer::presentCharacter(char ch) @@ -468,7 +493,7 @@ QPDFTokenizer::presentCharacter(char ch)
468 else 493 else
469 { 494 {
470 this->m->type = tt_bad; 495 this->m->type = tt_bad;
471 - QTC::TC("qpdf", "QPDF_Tokenizer bad hexstring character"); 496 + QTC::TC("qpdf", "QPDFTokenizer bad hexstring character");
472 this->m->error_message = std::string("invalid character (") + 497 this->m->error_message = std::string("invalid character (") +
473 ch + ") in hexstring"; 498 ch + ") in hexstring";
474 this->m->state = st_token_ready; 499 this->m->state = st_token_ready;
@@ -495,9 +520,23 @@ QPDFTokenizer::presentCharacter(char ch) @@ -495,9 +520,23 @@ QPDFTokenizer::presentCharacter(char ch)
495 void 520 void
496 QPDFTokenizer::presentEOF() 521 QPDFTokenizer::presentEOF()
497 { 522 {
  523 + if (this->m->state == st_inline_image)
  524 + {
  525 + size_t len = this->m->val.length();
  526 + if ((len >= 3) &&
  527 + isDelimiter(this->m->val.at(len-3)) &&
  528 + (this->m->val.at(len-2) == 'E') &&
  529 + (this->m->val.at(len-1) == 'I'))
  530 + {
  531 + QTC::TC("qpdf", "QPDFTokenizer inline image at EOF");
  532 + this->m->type = tt_inline_image;
  533 + this->m->state = st_token_ready;
  534 + }
  535 + }
  536 +
498 if (this->m->state == st_literal) 537 if (this->m->state == st_literal)
499 { 538 {
500 - QTC::TC("qpdf", "QPDF_Tokenizer EOF reading appendable token"); 539 + QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token");
501 resolveLiteral(); 540 resolveLiteral();
502 } 541 }
503 else if ((this->m->include_ignorable) && (this->m->state == st_in_space)) 542 else if ((this->m->include_ignorable) && (this->m->state == st_in_space))
@@ -514,7 +553,7 @@ QPDFTokenizer::presentEOF() @@ -514,7 +553,7 @@ QPDFTokenizer::presentEOF()
514 } 553 }
515 else if (this->m->state != st_token_ready) 554 else if (this->m->state != st_token_ready)
516 { 555 {
517 - QTC::TC("qpdf", "QPDF_Tokenizer EOF reading token"); 556 + QTC::TC("qpdf", "QPDFTokenizer EOF reading token");
518 this->m->type = tt_bad; 557 this->m->type = tt_bad;
519 this->m->error_message = "EOF while reading token"; 558 this->m->error_message = "EOF while reading token";
520 } 559 }
@@ -522,6 +561,17 @@ QPDFTokenizer::presentEOF() @@ -522,6 +561,17 @@ QPDFTokenizer::presentEOF()
522 this->m->state = st_token_ready; 561 this->m->state = st_token_ready;
523 } 562 }
524 563
  564 +void
  565 +QPDFTokenizer::expectInlineImage()
  566 +{
  567 + if (this->m->state != st_top)
  568 + {
  569 + throw std::logic_error("QPDFTokenizer::expectInlineImage called"
  570 + " when tokenizer is in improper state");
  571 + }
  572 + this->m->state = st_inline_image;
  573 +}
  574 +
525 bool 575 bool
526 QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch) 576 QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch)
527 { 577 {
@@ -572,7 +622,7 @@ QPDFTokenizer::readToken(PointerHolder&lt;InputSource&gt; input, @@ -572,7 +622,7 @@ QPDFTokenizer::readToken(PointerHolder&lt;InputSource&gt; input,
572 presented_eof = true; 622 presented_eof = true;
573 if ((this->m->type == tt_eof) && (! this->m->allow_eof)) 623 if ((this->m->type == tt_eof) && (! this->m->allow_eof))
574 { 624 {
575 - QTC::TC("qpdf", "QPDF_Tokenizer EOF when not allowed"); 625 + QTC::TC("qpdf", "QPDFTokenizer EOF when not allowed");
576 this->m->type = tt_bad; 626 this->m->type = tt_bad;
577 this->m->error_message = "unexpected EOF"; 627 this->m->error_message = "unexpected EOF";
578 offset = input->getLastOffset(); 628 offset = input->getLastOffset();
qpdf/qpdf.testcov
@@ -64,11 +64,11 @@ QPDF stream length not integer 0 @@ -64,11 +64,11 @@ QPDF stream length not integer 0
64 QPDF missing endstream 0 64 QPDF missing endstream 0
65 QPDFObjectHandle bad dictionary close 0 65 QPDFObjectHandle bad dictionary close 0
66 QPDF can't find xref 0 66 QPDF can't find xref 0
67 -QPDF_Tokenizer bad ) 0  
68 -QPDF_Tokenizer bad > 0  
69 -QPDF_Tokenizer bad hexstring character 0  
70 -QPDF_Tokenizer null in name 0  
71 -QPDF_Tokenizer bad name 0 67 +QPDFTokenizer bad ) 0
  68 +QPDFTokenizer bad > 0
  69 +QPDFTokenizer bad hexstring character 0
  70 +QPDFTokenizer null in name 0
  71 +QPDFTokenizer bad name 0
72 QPDF_Stream invalid filter 0 72 QPDF_Stream invalid filter 0
73 QPDF UseOutlines but no Outlines 0 73 QPDF UseOutlines but no Outlines 0
74 QPDFObjectHandle clone bool 0 74 QPDFObjectHandle clone bool 0
@@ -233,8 +233,8 @@ QPDFWriter copy use_aes 1 @@ -233,8 +233,8 @@ QPDFWriter copy use_aes 1
233 QPDFObjectHandle indirect without context 0 233 QPDFObjectHandle indirect without context 0
234 QPDFObjectHandle trailing data in parse 0 234 QPDFObjectHandle trailing data in parse 0
235 qpdf pages encryption password 0 235 qpdf pages encryption password 0
236 -QPDF_Tokenizer EOF reading token 0  
237 -QPDF_Tokenizer EOF reading appendable token 0 236 +QPDFTokenizer EOF reading token 0
  237 +QPDFTokenizer EOF reading appendable token 0
238 QPDFWriter extra header text no newline 0 238 QPDFWriter extra header text no newline 0
239 QPDFWriter extra header text add newline 0 239 QPDFWriter extra header text add newline 0
240 QPDF bogus 0 offset 0 240 QPDF bogus 0 offset 0
@@ -302,4 +302,5 @@ qpdf-c called qpdf_set_compress_streams 0 @@ -302,4 +302,5 @@ qpdf-c called qpdf_set_compress_streams 0
302 qpdf-c called qpdf_set_preserve_unreferenced_objects 0 302 qpdf-c called qpdf_set_preserve_unreferenced_objects 0
303 qpdf-c called qpdf_set_newline_before_endstream 0 303 qpdf-c called qpdf_set_newline_before_endstream 0
304 QPDF_Stream TIFF predictor 0 304 QPDF_Stream TIFF predictor 0
305 -QPDF_Tokenizer EOF when not allowed 0 305 +QPDFTokenizer EOF when not allowed 0
  306 +QPDFTokenizer inline image at EOF 0
qpdf/qtest/qpdf/tokens-maxlen.out
@@ -222,307 +222,311 @@ skipping to endstream @@ -222,307 +222,311 @@ skipping to endstream
222 7601: word: endstream 222 7601: word: endstream
223 7610: space: \x0a 223 7610: space: \x0a
224 7611: word: endobj 224 7611: word: endobj
225 -7617: space: \x0a\x0a  
226 -7619: integer: 46  
227 -7621: space:  
228 -7622: integer: 0  
229 -7623: space:  
230 -7624: word: obj  
231 -7627: space: \x0a  
232 -7628: integer: 68  
233 -7630: space: \x0a  
234 -7631: word: endobj  
235 -7637: space: \x0a\x0a  
236 -7639: comment: %% Contents for page 6  
237 -7661: space: \x0a  
238 -7662: comment: %% Original object ID: 42 0  
239 -7689: space: \x0a  
240 -7690: integer: 47  
241 -7692: space:  
242 -7693: integer: 0  
243 -7694: space:  
244 -7695: word: obj  
245 -7698: space: \x0a  
246 -7699: dict_open: <<  
247 -7701: space: \x0a  
248 -7704: name: /Length  
249 -7711: space:  
250 -7712: integer: 48  
251 -7714: space:  
252 -7715: integer: 0  
253 -7716: space:  
254 -7717: word: R  
255 -7718: space: \x0a  
256 -7719: dict_close: >>  
257 -7721: space: \x0a  
258 -7722: word: stream 225 +7617: space: \x0a
  226 +7618: comment: %QDF: ignore_newline
  227 +7638: space: \x0a\x0a
  228 +7640: integer: 46
  229 +7642: space:
  230 +7643: integer: 0
  231 +7644: space:
  232 +7645: word: obj
  233 +7648: space: \x0a
  234 +7649: integer: 67
  235 +7651: space: \x0a
  236 +7652: word: endobj
  237 +7658: space: \x0a\x0a
  238 +7660: comment: %% Contents for page 6
  239 +7682: space: \x0a
  240 +7683: comment: %% Original object ID: 42 0
  241 +7710: space: \x0a
  242 +7711: integer: 47
  243 +7713: space:
  244 +7714: integer: 0
  245 +7715: space:
  246 +7716: word: obj
  247 +7719: space: \x0a
  248 +7720: dict_open: <<
  249 +7722: space: \x0a
  250 +7725: name: /Length
  251 +7732: space:
  252 +7733: integer: 48
  253 +7735: space:
  254 +7736: integer: 0
  255 +7737: space:
  256 +7738: word: R
  257 +7739: space: \x0a
  258 +7740: dict_close: >>
  259 +7742: space: \x0a
  260 +7743: word: stream
259 skipping to endstream 261 skipping to endstream
260 -7773: word: endstream  
261 -7782: space: \x0a  
262 -7783: word: endobj  
263 -7789: space: \x0a\x0a  
264 -7791: integer: 48  
265 -7793: space:  
266 -7794: integer: 0  
267 -7795: space:  
268 -7796: word: obj  
269 -7799: space: \x0a  
270 -7800: integer: 44  
271 -7802: space: \x0a  
272 -7803: word: endobj  
273 -7809: space: \x0a\x0a  
274 -7811: comment: %% Contents for page 7  
275 -7833: space: \x0a  
276 -7834: comment: %% Original object ID: 43 0  
277 -7861: space: \x0a  
278 -7862: integer: 49  
279 -7864: space:  
280 -7865: integer: 0  
281 -7866: space:  
282 -7867: word: obj  
283 -7870: space: \x0a  
284 -7871: dict_open: <<  
285 -7873: space: \x0a  
286 -7876: name: /Length  
287 -7883: space:  
288 -7884: integer: 50  
289 -7886: space:  
290 -7887: integer: 0  
291 -7888: space:  
292 -7889: word: R  
293 -7890: space: \x0a  
294 -7891: dict_close: >>  
295 -7893: space: \x0a  
296 -7894: word: stream 262 +7794: word: endstream
  263 +7803: space: \x0a
  264 +7804: word: endobj
  265 +7810: space: \x0a\x0a
  266 +7812: integer: 48
  267 +7814: space:
  268 +7815: integer: 0
  269 +7816: space:
  270 +7817: word: obj
  271 +7820: space: \x0a
  272 +7821: integer: 44
  273 +7823: space: \x0a
  274 +7824: word: endobj
  275 +7830: space: \x0a\x0a
  276 +7832: comment: %% Contents for page 7
  277 +7854: space: \x0a
  278 +7855: comment: %% Original object ID: 43 0
  279 +7882: space: \x0a
  280 +7883: integer: 49
  281 +7885: space:
  282 +7886: integer: 0
  283 +7887: space:
  284 +7888: word: obj
  285 +7891: space: \x0a
  286 +7892: dict_open: <<
  287 +7894: space: \x0a
  288 +7897: name: /Length
  289 +7904: space:
  290 +7905: integer: 50
  291 +7907: space:
  292 +7908: integer: 0
  293 +7909: space:
  294 +7910: word: R
  295 +7911: space: \x0a
  296 +7912: dict_close: >>
  297 +7914: space: \x0a
  298 +7915: word: stream
297 skipping to endstream 299 skipping to endstream
298 -7945: word: endstream  
299 -7954: space: \x0a  
300 -7955: word: endobj  
301 -7961: space: \x0a\x0a  
302 -7963: integer: 50  
303 -7965: space:  
304 -7966: integer: 0  
305 -7967: space:  
306 -7968: word: obj  
307 -7971: space: \x0a  
308 -7972: integer: 44  
309 -7974: space: \x0a  
310 -7975: word: endobj  
311 -7981: space: \x0a\x0a  
312 -7983: comment: %% Contents for page 8  
313 -8005: space: \x0a  
314 -8006: comment: %% Original object ID: 44 0  
315 -8033: space: \x0a  
316 -8034: integer: 51  
317 -8036: space:  
318 -8037: integer: 0  
319 -8038: space:  
320 -8039: word: obj  
321 -8042: space: \x0a  
322 -8043: dict_open: <<  
323 -8045: space: \x0a  
324 -8048: name: /Length  
325 -8055: space:  
326 -8056: integer: 52  
327 -8058: space:  
328 -8059: integer: 0  
329 -8060: space:  
330 -8061: word: R  
331 -8062: space: \x0a  
332 -8063: dict_close: >>  
333 -8065: space: \x0a  
334 -8066: word: stream 300 +8241: word: endstream
  301 +8250: space: \x0a
  302 +8251: word: endobj
  303 +8257: space: \x0a
  304 +8258: comment: %QDF: ignore_newline
  305 +8278: space: \x0a\x0a
  306 +8280: integer: 50
  307 +8282: space:
  308 +8283: integer: 0
  309 +8284: space:
  310 +8285: word: obj
  311 +8288: space: \x0a
  312 +8289: integer: 318
  313 +8292: space: \x0a
  314 +8293: word: endobj
  315 +8299: space: \x0a\x0a
  316 +8301: comment: %% Contents for page 8
  317 +8323: space: \x0a
  318 +8324: comment: %% Original object ID: 44 0
  319 +8351: space: \x0a
  320 +8352: integer: 51
  321 +8354: space:
  322 +8355: integer: 0
  323 +8356: space:
  324 +8357: word: obj
  325 +8360: space: \x0a
  326 +8361: dict_open: <<
  327 +8363: space: \x0a
  328 +8366: name: /Length
  329 +8373: space:
  330 +8374: integer: 52
  331 +8376: space:
  332 +8377: integer: 0
  333 +8378: space:
  334 +8379: word: R
  335 +8380: space: \x0a
  336 +8381: dict_close: >>
  337 +8383: space: \x0a
  338 +8384: word: stream
335 skipping to endstream 339 skipping to endstream
336 -8117: word: endstream  
337 -8126: space: \x0a  
338 -8127: word: endobj  
339 -8133: space: \x0a\x0a  
340 -8135: integer: 52  
341 -8137: space:  
342 -8138: integer: 0  
343 -8139: space:  
344 -8140: word: obj  
345 -8143: space: \x0a  
346 -8144: integer: 44  
347 -8146: space: \x0a  
348 -8147: word: endobj  
349 -8153: space: \x0a\x0a  
350 -8155: comment: %% Contents for page 9  
351 -8177: space: \x0a  
352 -8178: comment: %% Original object ID: 45 0  
353 -8205: space: \x0a  
354 -8206: integer: 53  
355 -8208: space:  
356 -8209: integer: 0  
357 -8210: space:  
358 -8211: word: obj  
359 -8214: space: \x0a  
360 -8215: dict_open: <<  
361 -8217: space: \x0a  
362 -8220: name: /Length  
363 -8227: space:  
364 -8228: integer: 54  
365 -8230: space:  
366 -8231: integer: 0  
367 -8232: space:  
368 -8233: word: R  
369 -8234: space: \x0a  
370 -8235: dict_close: >>  
371 -8237: space: \x0a  
372 -8238: word: stream  
373 -skipping to endstream  
374 -8289: word: endstream  
375 -8298: space: \x0a  
376 -8299: word: endobj  
377 -8305: space: \x0a\x0a  
378 -8307: integer: 54  
379 -8309: space:  
380 -8310: integer: 0  
381 -8311: space:  
382 -8312: word: obj  
383 -8315: space: \x0a  
384 -8316: integer: 44  
385 -8318: space: \x0a  
386 -8319: word: endobj  
387 -8325: space: \x0a\x0a  
388 -8327: comment: %% Contents for page 10  
389 -8350: space: \x0a  
390 -8351: comment: %% Original object ID: 46 0  
391 -8378: space: \x0a  
392 -8379: integer: 55  
393 -8381: space:  
394 -8382: integer: 0  
395 -8383: space:  
396 -8384: word: obj  
397 -8387: space: \x0a  
398 -8388: dict_open: <<  
399 -8390: space: \x0a  
400 -8393: name: /Length  
401 -8400: space:  
402 -8401: integer: 56  
403 -8403: space:  
404 -8404: integer: 0  
405 -8405: space:  
406 -8406: word: R  
407 -8407: space: \x0a  
408 -8408: dict_close: >>  
409 -8410: space: \x0a  
410 -8411: word: stream  
411 -skipping to endstream  
412 -8462: word: endstream  
413 -8471: space: \x0a  
414 -8472: word: endobj  
415 -8478: space: \x0a\x0a  
416 -8480: integer: 56  
417 -8482: space:  
418 -8483: integer: 0  
419 -8484: space:  
420 -8485: word: obj  
421 -8488: space: \x0a  
422 -8489: integer: 44  
423 -8491: space: \x0a  
424 -8492: word: endobj  
425 -8498: space: \x0a\x0a  
426 -8500: comment: %% Contents for page 11 340 +8435: word: endstream
  341 +8444: space: \x0a
  342 +8445: word: endobj
  343 +8451: space: \x0a\x0a
  344 +8453: integer: 52
  345 +8455: space:
  346 +8456: integer: 0
  347 +8457: space:
  348 +8458: word: obj
  349 +8461: space: \x0a
  350 +8462: integer: 44
  351 +8464: space: \x0a
  352 +8465: word: endobj
  353 +8471: space: \x0a\x0a
  354 +8473: comment: %% Contents for page 9
  355 +8495: space: \x0a
  356 +8496: comment: %% Original object ID: 45 0
427 8523: space: \x0a 357 8523: space: \x0a
428 -8524: comment: %% Original object ID: 47 0  
429 -8551: space: \x0a  
430 -8552: integer: 57  
431 -8554: space:  
432 -8555: integer: 0  
433 -8556: space:  
434 -8557: word: obj  
435 -8560: space: \x0a  
436 -8561: dict_open: <<  
437 -8563: space: \x0a  
438 -8566: name: /Length  
439 -8573: space:  
440 -8574: integer: 58  
441 -8576: space:  
442 -8577: integer: 0  
443 -8578: space:  
444 -8579: word: R  
445 -8580: space: \x0a  
446 -8581: dict_close: >>  
447 -8583: space: \x0a  
448 -8584: word: stream 358 +8524: integer: 53
  359 +8526: space:
  360 +8527: integer: 0
  361 +8528: space:
  362 +8529: word: obj
  363 +8532: space: \x0a
  364 +8533: dict_open: <<
  365 +8535: space: \x0a
  366 +8538: name: /Length
  367 +8545: space:
  368 +8546: integer: 54
  369 +8548: space:
  370 +8549: integer: 0
  371 +8550: space:
  372 +8551: word: R
  373 +8552: space: \x0a
  374 +8553: dict_close: >>
  375 +8555: space: \x0a
  376 +8556: word: stream
449 skipping to endstream 377 skipping to endstream
450 -8635: word: endstream  
451 -8644: space: \x0a  
452 -8645: word: endobj  
453 -8651: space: \x0a\x0a  
454 -8653: integer: 58  
455 -8655: space:  
456 -8656: integer: 0  
457 -8657: space:  
458 -8658: word: obj  
459 -8661: space: \x0a  
460 -8662: integer: 44  
461 -8664: space: \x0a  
462 -8665: word: endobj  
463 -8671: space: \x0a\x0a  
464 -8673: integer: 59  
465 -8675: space:  
466 -8676: integer: 0  
467 -8677: space:  
468 -8678: word: obj  
469 -8681: space: \x0a  
470 -8682: dict_open: <<  
471 -8684: space: \x0a  
472 -8687: name: /Type  
473 -8692: space:  
474 -8693: name: /XRef  
475 -8698: space: \x0a  
476 -8701: name: /Length  
477 -8708: space:  
478 -8709: integer: 240  
479 -8712: space: \x0a  
480 -8715: name: /W  
481 -8717: space:  
482 -8718: array_open: [  
483 -8719: space:  
484 -8720: integer: 1 378 +8607: word: endstream
  379 +8616: space: \x0a
  380 +8617: word: endobj
  381 +8623: space: \x0a\x0a
  382 +8625: integer: 54
  383 +8627: space:
  384 +8628: integer: 0
  385 +8629: space:
  386 +8630: word: obj
  387 +8633: space: \x0a
  388 +8634: integer: 44
  389 +8636: space: \x0a
  390 +8637: word: endobj
  391 +8643: space: \x0a\x0a
  392 +8645: comment: %% Contents for page 10
  393 +8668: space: \x0a
  394 +8669: comment: %% Original object ID: 46 0
  395 +8696: space: \x0a
  396 +8697: integer: 55
  397 +8699: space:
  398 +8700: integer: 0
  399 +8701: space:
  400 +8702: word: obj
  401 +8705: space: \x0a
  402 +8706: dict_open: <<
  403 +8708: space: \x0a
  404 +8711: name: /Length
  405 +8718: space:
  406 +8719: integer: 56
485 8721: space: 407 8721: space:
486 -8722: integer: 2 408 +8722: integer: 0
487 8723: space: 409 8723: space:
488 -8724: integer: 1  
489 -8725: space:  
490 -8726: array_close: ]  
491 -8727: space: \x0a  
492 -8730: name: /Root  
493 -8735: space:  
494 -8736: integer: 2  
495 -8737: space:  
496 -8738: integer: 0  
497 -8739: space:  
498 -8740: word: R  
499 -8741: space: \x0a  
500 -8744: name: /Size  
501 -8749: space:  
502 -8750: integer: 60  
503 -8752: space: \x0a  
504 -8755: name: /ID  
505 -8758: space:  
506 -8759: array_open: [  
507 -8760: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>)  
508 -8794: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: <edd60fe8ee87f88731a86f819fe65199>)  
509 -8828: array_close: ]  
510 -8829: space: \x0a  
511 -8830: dict_close: >>  
512 -8832: space: \x0a  
513 -8833: word: stream 410 +8724: word: R
  411 +8725: space: \x0a
  412 +8726: dict_close: >>
  413 +8728: space: \x0a
  414 +8729: word: stream
  415 +skipping to endstream
  416 +8780: word: endstream
  417 +8789: space: \x0a
  418 +8790: word: endobj
  419 +8796: space: \x0a\x0a
  420 +8798: integer: 56
  421 +8800: space:
  422 +8801: integer: 0
  423 +8802: space:
  424 +8803: word: obj
  425 +8806: space: \x0a
  426 +8807: integer: 44
  427 +8809: space: \x0a
  428 +8810: word: endobj
  429 +8816: space: \x0a\x0a
  430 +8818: comment: %% Contents for page 11
  431 +8841: space: \x0a
  432 +8842: comment: %% Original object ID: 47 0
  433 +8869: space: \x0a
  434 +8870: integer: 57
  435 +8872: space:
  436 +8873: integer: 0
  437 +8874: space:
  438 +8875: word: obj
  439 +8878: space: \x0a
  440 +8879: dict_open: <<
  441 +8881: space: \x0a
  442 +8884: name: /Length
  443 +8891: space:
  444 +8892: integer: 58
  445 +8894: space:
  446 +8895: integer: 0
  447 +8896: space:
  448 +8897: word: R
  449 +8898: space: \x0a
  450 +8899: dict_close: >>
  451 +8901: space: \x0a
  452 +8902: word: stream
  453 +skipping to endstream
  454 +8953: word: endstream
  455 +8962: space: \x0a
  456 +8963: word: endobj
  457 +8969: space: \x0a\x0a
  458 +8971: integer: 58
  459 +8973: space:
  460 +8974: integer: 0
  461 +8975: space:
  462 +8976: word: obj
  463 +8979: space: \x0a
  464 +8980: integer: 44
  465 +8982: space: \x0a
  466 +8983: word: endobj
  467 +8989: space: \x0a\x0a
  468 +8991: integer: 59
  469 +8993: space:
  470 +8994: integer: 0
  471 +8995: space:
  472 +8996: word: obj
  473 +8999: space: \x0a
  474 +9000: dict_open: <<
  475 +9002: space: \x0a
  476 +9005: name: /Type
  477 +9010: space:
  478 +9011: name: /XRef
  479 +9016: space: \x0a
  480 +9019: name: /Length
  481 +9026: space:
  482 +9027: integer: 240
  483 +9030: space: \x0a
  484 +9033: name: /W
  485 +9035: space:
  486 +9036: array_open: [
  487 +9037: space:
  488 +9038: integer: 1
  489 +9039: space:
  490 +9040: integer: 2
  491 +9041: space:
  492 +9042: integer: 1
  493 +9043: space:
  494 +9044: array_close: ]
  495 +9045: space: \x0a
  496 +9048: name: /Root
  497 +9053: space:
  498 +9054: integer: 2
  499 +9055: space:
  500 +9056: integer: 0
  501 +9057: space:
  502 +9058: word: R
  503 +9059: space: \x0a
  504 +9062: name: /Size
  505 +9067: space:
  506 +9068: integer: 60
  507 +9070: space: \x0a
  508 +9073: name: /ID
  509 +9076: space:
  510 +9077: array_open: [
  511 +9078: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>)
  512 +9112: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: <edd60fe8ee87f88731a86f819fe65199>)
  513 +9146: array_close: ]
  514 +9147: space: \x0a
  515 +9148: dict_close: >>
  516 +9150: space: \x0a
  517 +9151: word: stream
514 skipping to endstream 518 skipping to endstream
515 -9081: word: endstream  
516 -9090: space: \x0a  
517 -9091: word: endobj  
518 -9097: space: \x0a\x0a  
519 -9099: word: startxref  
520 -9108: space: \x0a  
521 -9109: integer: 8673  
522 -9113: space: \x0a  
523 -9114: comment: %%EOF  
524 -9119: space: \x0a  
525 -9120: eof 519 +9399: word: endstream
  520 +9408: space: \x0a
  521 +9409: word: endobj
  522 +9415: space: \x0a\x0a
  523 +9417: word: startxref
  524 +9426: space: \x0a
  525 +9427: integer: 8991
  526 +9431: space: \x0a
  527 +9432: comment: %%EOF
  528 +9437: space: \x0a
  529 +9438: eof
526 --- END FILE --- 530 --- END FILE ---
527 --- BEGIN PAGE 1 --- 531 --- BEGIN PAGE 1 ---
528 0: word: BT 532 0: word: BT
@@ -595,9 +599,7 @@ skipping to endstream @@ -595,9 +599,7 @@ skipping to endstream
595 103: dict_close: >> 599 103: dict_close: >>
596 105: space: \x0a 600 105: space: \x0a
597 106: word: ID 601 106: word: ID
598 -skipping to EI  
599 -352: word: EI  
600 -354: space: \x0a 602 +108: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI\x0a
601 355: word: BT 603 355: word: BT
602 357: space: \x0a 604 357: space: \x0a
603 360: name: /F1 605 360: name: /F1
@@ -743,13 +745,11 @@ skipping to EI @@ -743,13 +745,11 @@ skipping to EI
743 47: word: ET 745 47: word: ET
744 49: space: \x0a\x00\x0a 746 49: space: \x0a\x00\x0a
745 52: name: /ThisMustBeLast 747 52: name: /ThisMustBeLast
746 -67: space: \x0a  
747 -68: eof 748 +67: eof
748 --- END PAGE 5 --- 749 --- END PAGE 5 ---
749 --- BEGIN PAGE 6 --- 750 --- BEGIN PAGE 6 ---
750 0: word: ID 751 0: word: ID
751 -skipping to EI  
752 -EI not found 752 +EI not found; resuming normal scanning
753 2: space: \x0a 753 2: space: \x0a
754 5: name: /F1 754 5: name: /F1
755 8: space: 755 8: space:
@@ -772,27 +772,37 @@ EI not found @@ -772,27 +772,37 @@ EI not found
772 44: eof 772 44: eof
773 --- END PAGE 6 --- 773 --- END PAGE 6 ---
774 --- BEGIN PAGE 7 --- 774 --- BEGIN PAGE 7 ---
775 -0: word: BT  
776 -2: space: \x0a  
777 -5: name: /F1  
778 -8: space:  
779 -9: integer: 24  
780 -11: space:  
781 -12: word: Tf  
782 -14: space: \x0a  
783 -17: integer: 72 775 +0: name: /potato
  776 +7: space: \x0a
  777 +8: word: BI
  778 +10: space: \x0a
  779 +11: name: /CS
  780 +14: space:
  781 +15: name: /G
  782 +17: name: /W
784 19: space: 783 19: space:
785 -20: integer: 720  
786 -23: space:  
787 -24: word: Td  
788 -26: space: \x0a  
789 -29: string: Potato (raw: (Potato))  
790 -37: space:  
791 -38: word: Tj  
792 -40: space: \x0a  
793 -41: word: ET  
794 -43: space: \x0a  
795 -44: eof 784 +20: integer: 66
  785 +22: name: /H
  786 +24: space:
  787 +25: integer: 47
  788 +27: name: /BPC
  789 +31: space:
  790 +32: integer: 8
  791 +33: name: /F
  792 +35: name: /Fl
  793 +38: name: /DP
  794 +41: dict_open: <<
  795 +43: name: /Predictor
  796 +53: space:
  797 +54: integer: 15
  798 +56: name: /Columns
  799 +64: space:
  800 +65: integer: 66
  801 +67: dict_close: >>
  802 +69: space: \x0a
  803 +70: word: ID
  804 +72: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI
  805 +318: eof
796 --- END PAGE 7 --- 806 --- END PAGE 7 ---
797 --- BEGIN PAGE 8 --- 807 --- BEGIN PAGE 8 ---
798 0: word: BT 808 0: word: BT
qpdf/qtest/qpdf/tokens-no-ignorable.out
@@ -101,152 +101,152 @@ skipping to endstream @@ -101,152 +101,152 @@ skipping to endstream
101 skipping to endstream 101 skipping to endstream
102 7601: word: endstream 102 7601: word: endstream
103 7611: word: endobj 103 7611: word: endobj
104 -7619: integer: 46  
105 -7622: integer: 0  
106 -7624: word: obj  
107 -7628: integer: 68  
108 -7631: word: endobj  
109 -7690: integer: 47  
110 -7693: integer: 0  
111 -7695: word: obj  
112 -7699: dict_open: <<  
113 -7704: name: /Length  
114 -7712: integer: 48  
115 -7715: integer: 0  
116 -7717: word: R  
117 -7719: dict_close: >>  
118 -7722: word: stream 104 +7640: integer: 46
  105 +7643: integer: 0
  106 +7645: word: obj
  107 +7649: integer: 67
  108 +7652: word: endobj
  109 +7711: integer: 47
  110 +7714: integer: 0
  111 +7716: word: obj
  112 +7720: dict_open: <<
  113 +7725: name: /Length
  114 +7733: integer: 48
  115 +7736: integer: 0
  116 +7738: word: R
  117 +7740: dict_close: >>
  118 +7743: word: stream
119 skipping to endstream 119 skipping to endstream
120 -7773: word: endstream  
121 -7783: word: endobj  
122 -7791: integer: 48  
123 -7794: integer: 0  
124 -7796: word: obj  
125 -7800: integer: 44  
126 -7803: word: endobj  
127 -7862: integer: 49  
128 -7865: integer: 0  
129 -7867: word: obj  
130 -7871: dict_open: <<  
131 -7876: name: /Length  
132 -7884: integer: 50  
133 -7887: integer: 0  
134 -7889: word: R  
135 -7891: dict_close: >>  
136 -7894: word: stream 120 +7794: word: endstream
  121 +7804: word: endobj
  122 +7812: integer: 48
  123 +7815: integer: 0
  124 +7817: word: obj
  125 +7821: integer: 44
  126 +7824: word: endobj
  127 +7883: integer: 49
  128 +7886: integer: 0
  129 +7888: word: obj
  130 +7892: dict_open: <<
  131 +7897: name: /Length
  132 +7905: integer: 50
  133 +7908: integer: 0
  134 +7910: word: R
  135 +7912: dict_close: >>
  136 +7915: word: stream
137 skipping to endstream 137 skipping to endstream
138 -7945: word: endstream  
139 -7955: word: endobj  
140 -7963: integer: 50  
141 -7966: integer: 0  
142 -7968: word: obj  
143 -7972: integer: 44  
144 -7975: word: endobj  
145 -8034: integer: 51  
146 -8037: integer: 0  
147 -8039: word: obj  
148 -8043: dict_open: <<  
149 -8048: name: /Length  
150 -8056: integer: 52  
151 -8059: integer: 0  
152 -8061: word: R  
153 -8063: dict_close: >>  
154 -8066: word: stream 138 +8241: word: endstream
  139 +8251: word: endobj
  140 +8280: integer: 50
  141 +8283: integer: 0
  142 +8285: word: obj
  143 +8289: integer: 318
  144 +8293: word: endobj
  145 +8352: integer: 51
  146 +8355: integer: 0
  147 +8357: word: obj
  148 +8361: dict_open: <<
  149 +8366: name: /Length
  150 +8374: integer: 52
  151 +8377: integer: 0
  152 +8379: word: R
  153 +8381: dict_close: >>
  154 +8384: word: stream
155 skipping to endstream 155 skipping to endstream
156 -8117: word: endstream  
157 -8127: word: endobj  
158 -8135: integer: 52  
159 -8138: integer: 0  
160 -8140: word: obj  
161 -8144: integer: 44  
162 -8147: word: endobj  
163 -8206: integer: 53  
164 -8209: integer: 0  
165 -8211: word: obj  
166 -8215: dict_open: <<  
167 -8220: name: /Length  
168 -8228: integer: 54  
169 -8231: integer: 0  
170 -8233: word: R  
171 -8235: dict_close: >>  
172 -8238: word: stream 156 +8435: word: endstream
  157 +8445: word: endobj
  158 +8453: integer: 52
  159 +8456: integer: 0
  160 +8458: word: obj
  161 +8462: integer: 44
  162 +8465: word: endobj
  163 +8524: integer: 53
  164 +8527: integer: 0
  165 +8529: word: obj
  166 +8533: dict_open: <<
  167 +8538: name: /Length
  168 +8546: integer: 54
  169 +8549: integer: 0
  170 +8551: word: R
  171 +8553: dict_close: >>
  172 +8556: word: stream
173 skipping to endstream 173 skipping to endstream
174 -8289: word: endstream  
175 -8299: word: endobj  
176 -8307: integer: 54  
177 -8310: integer: 0  
178 -8312: word: obj  
179 -8316: integer: 44  
180 -8319: word: endobj  
181 -8379: integer: 55  
182 -8382: integer: 0  
183 -8384: word: obj  
184 -8388: dict_open: <<  
185 -8393: name: /Length  
186 -8401: integer: 56  
187 -8404: integer: 0  
188 -8406: word: R  
189 -8408: dict_close: >>  
190 -8411: word: stream 174 +8607: word: endstream
  175 +8617: word: endobj
  176 +8625: integer: 54
  177 +8628: integer: 0
  178 +8630: word: obj
  179 +8634: integer: 44
  180 +8637: word: endobj
  181 +8697: integer: 55
  182 +8700: integer: 0
  183 +8702: word: obj
  184 +8706: dict_open: <<
  185 +8711: name: /Length
  186 +8719: integer: 56
  187 +8722: integer: 0
  188 +8724: word: R
  189 +8726: dict_close: >>
  190 +8729: word: stream
191 skipping to endstream 191 skipping to endstream
192 -8462: word: endstream  
193 -8472: word: endobj  
194 -8480: integer: 56  
195 -8483: integer: 0  
196 -8485: word: obj  
197 -8489: integer: 44  
198 -8492: word: endobj  
199 -8552: integer: 57  
200 -8555: integer: 0  
201 -8557: word: obj  
202 -8561: dict_open: <<  
203 -8566: name: /Length  
204 -8574: integer: 58  
205 -8577: integer: 0  
206 -8579: word: R  
207 -8581: dict_close: >>  
208 -8584: word: stream 192 +8780: word: endstream
  193 +8790: word: endobj
  194 +8798: integer: 56
  195 +8801: integer: 0
  196 +8803: word: obj
  197 +8807: integer: 44
  198 +8810: word: endobj
  199 +8870: integer: 57
  200 +8873: integer: 0
  201 +8875: word: obj
  202 +8879: dict_open: <<
  203 +8884: name: /Length
  204 +8892: integer: 58
  205 +8895: integer: 0
  206 +8897: word: R
  207 +8899: dict_close: >>
  208 +8902: word: stream
209 skipping to endstream 209 skipping to endstream
210 -8635: word: endstream  
211 -8645: word: endobj  
212 -8653: integer: 58  
213 -8656: integer: 0  
214 -8658: word: obj  
215 -8662: integer: 44  
216 -8665: word: endobj  
217 -8673: integer: 59  
218 -8676: integer: 0  
219 -8678: word: obj  
220 -8682: dict_open: <<  
221 -8687: name: /Type  
222 -8693: name: /XRef  
223 -8701: name: /Length  
224 -8709: integer: 240  
225 -8715: name: /W  
226 -8718: array_open: [  
227 -8720: integer: 1  
228 -8722: integer: 2  
229 -8724: integer: 1  
230 -8726: array_close: ]  
231 -8730: name: /Root  
232 -8736: integer: 2  
233 -8738: integer: 0  
234 -8740: word: R  
235 -8744: name: /Size  
236 -8750: integer: 60  
237 -8755: name: /ID  
238 -8759: array_open: [  
239 -8760: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>)  
240 -8794: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: <edd60fe8ee87f88731a86f819fe65199>)  
241 -8828: array_close: ]  
242 -8830: dict_close: >>  
243 -8833: word: stream 210 +8953: word: endstream
  211 +8963: word: endobj
  212 +8971: integer: 58
  213 +8974: integer: 0
  214 +8976: word: obj
  215 +8980: integer: 44
  216 +8983: word: endobj
  217 +8991: integer: 59
  218 +8994: integer: 0
  219 +8996: word: obj
  220 +9000: dict_open: <<
  221 +9005: name: /Type
  222 +9011: name: /XRef
  223 +9019: name: /Length
  224 +9027: integer: 240
  225 +9033: name: /W
  226 +9036: array_open: [
  227 +9038: integer: 1
  228 +9040: integer: 2
  229 +9042: integer: 1
  230 +9044: array_close: ]
  231 +9048: name: /Root
  232 +9054: integer: 2
  233 +9056: integer: 0
  234 +9058: word: R
  235 +9062: name: /Size
  236 +9068: integer: 60
  237 +9073: name: /ID
  238 +9077: array_open: [
  239 +9078: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>)
  240 +9112: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: <edd60fe8ee87f88731a86f819fe65199>)
  241 +9146: array_close: ]
  242 +9148: dict_close: >>
  243 +9151: word: stream
244 skipping to endstream 244 skipping to endstream
245 -9081: word: endstream  
246 -9091: word: endobj  
247 -9099: word: startxref  
248 -9109: integer: 8673  
249 -9120: eof 245 +9399: word: endstream
  246 +9409: word: endobj
  247 +9417: word: startxref
  248 +9427: integer: 8991
  249 +9438: eof
250 --- END FILE --- 250 --- END FILE ---
251 --- BEGIN PAGE 1 --- 251 --- BEGIN PAGE 1 ---
252 0: word: BT 252 0: word: BT
@@ -291,8 +291,7 @@ skipping to endstream @@ -291,8 +291,7 @@ skipping to endstream
291 101: integer: 66 291 101: integer: 66
292 103: dict_close: >> 292 103: dict_close: >>
293 106: word: ID 293 106: word: ID
294 -skipping to EI  
295 -352: word: EI 294 +108: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI\x0a
296 355: word: BT 295 355: word: BT
297 360: name: /F1 296 360: name: /F1
298 364: integer: 24 297 364: integer: 24
@@ -374,12 +373,11 @@ skipping to EI @@ -374,12 +373,11 @@ skipping to EI
374 44: word: Tj 373 44: word: Tj
375 47: word: ET 374 47: word: ET
376 52: name: /ThisMustBeLast 375 52: name: /ThisMustBeLast
377 -68: eof 376 +67: eof
378 --- END PAGE 5 --- 377 --- END PAGE 5 ---
379 --- BEGIN PAGE 6 --- 378 --- BEGIN PAGE 6 ---
380 0: word: ID 379 0: word: ID
381 -skipping to EI  
382 -EI not found 380 +EI not found; resuming normal scanning
383 5: name: /F1 381 5: name: /F1
384 9: integer: 24 382 9: integer: 24
385 12: word: Tf 383 12: word: Tf
@@ -392,17 +390,28 @@ EI not found @@ -392,17 +390,28 @@ EI not found
392 44: eof 390 44: eof
393 --- END PAGE 6 --- 391 --- END PAGE 6 ---
394 --- BEGIN PAGE 7 --- 392 --- BEGIN PAGE 7 ---
395 -0: word: BT  
396 -5: name: /F1  
397 -9: integer: 24  
398 -12: word: Tf  
399 -17: integer: 72  
400 -20: integer: 720  
401 -24: word: Td  
402 -29: string: Potato (raw: (Potato))  
403 -38: word: Tj  
404 -41: word: ET  
405 -44: eof 393 +0: name: /potato
  394 +8: word: BI
  395 +11: name: /CS
  396 +15: name: /G
  397 +17: name: /W
  398 +20: integer: 66
  399 +22: name: /H
  400 +25: integer: 47
  401 +27: name: /BPC
  402 +32: integer: 8
  403 +33: name: /F
  404 +35: name: /Fl
  405 +38: name: /DP
  406 +41: dict_open: <<
  407 +43: name: /Predictor
  408 +54: integer: 15
  409 +56: name: /Columns
  410 +65: integer: 66
  411 +67: dict_close: >>
  412 +70: word: ID
  413 +72: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI
  414 +318: eof
406 --- END PAGE 7 --- 415 --- END PAGE 7 ---
407 --- BEGIN PAGE 8 --- 416 --- BEGIN PAGE 8 ---
408 0: word: BT 417 0: word: BT
qpdf/qtest/qpdf/tokens.out
@@ -222,307 +222,311 @@ skipping to endstream @@ -222,307 +222,311 @@ skipping to endstream
222 7601: word: endstream 222 7601: word: endstream
223 7610: space: \x0a 223 7610: space: \x0a
224 7611: word: endobj 224 7611: word: endobj
225 -7617: space: \x0a\x0a  
226 -7619: integer: 46  
227 -7621: space:  
228 -7622: integer: 0  
229 -7623: space:  
230 -7624: word: obj  
231 -7627: space: \x0a  
232 -7628: integer: 68  
233 -7630: space: \x0a  
234 -7631: word: endobj  
235 -7637: space: \x0a\x0a  
236 -7639: comment: %% Contents for page 6  
237 -7661: space: \x0a  
238 -7662: comment: %% Original object ID: 42 0  
239 -7689: space: \x0a  
240 -7690: integer: 47  
241 -7692: space:  
242 -7693: integer: 0  
243 -7694: space:  
244 -7695: word: obj  
245 -7698: space: \x0a  
246 -7699: dict_open: <<  
247 -7701: space: \x0a  
248 -7704: name: /Length  
249 -7711: space:  
250 -7712: integer: 48  
251 -7714: space:  
252 -7715: integer: 0  
253 -7716: space:  
254 -7717: word: R  
255 -7718: space: \x0a  
256 -7719: dict_close: >>  
257 -7721: space: \x0a  
258 -7722: word: stream 225 +7617: space: \x0a
  226 +7618: comment: %QDF: ignore_newline
  227 +7638: space: \x0a\x0a
  228 +7640: integer: 46
  229 +7642: space:
  230 +7643: integer: 0
  231 +7644: space:
  232 +7645: word: obj
  233 +7648: space: \x0a
  234 +7649: integer: 67
  235 +7651: space: \x0a
  236 +7652: word: endobj
  237 +7658: space: \x0a\x0a
  238 +7660: comment: %% Contents for page 6
  239 +7682: space: \x0a
  240 +7683: comment: %% Original object ID: 42 0
  241 +7710: space: \x0a
  242 +7711: integer: 47
  243 +7713: space:
  244 +7714: integer: 0
  245 +7715: space:
  246 +7716: word: obj
  247 +7719: space: \x0a
  248 +7720: dict_open: <<
  249 +7722: space: \x0a
  250 +7725: name: /Length
  251 +7732: space:
  252 +7733: integer: 48
  253 +7735: space:
  254 +7736: integer: 0
  255 +7737: space:
  256 +7738: word: R
  257 +7739: space: \x0a
  258 +7740: dict_close: >>
  259 +7742: space: \x0a
  260 +7743: word: stream
259 skipping to endstream 261 skipping to endstream
260 -7773: word: endstream  
261 -7782: space: \x0a  
262 -7783: word: endobj  
263 -7789: space: \x0a\x0a  
264 -7791: integer: 48  
265 -7793: space:  
266 -7794: integer: 0  
267 -7795: space:  
268 -7796: word: obj  
269 -7799: space: \x0a  
270 -7800: integer: 44  
271 -7802: space: \x0a  
272 -7803: word: endobj  
273 -7809: space: \x0a\x0a  
274 -7811: comment: %% Contents for page 7  
275 -7833: space: \x0a  
276 -7834: comment: %% Original object ID: 43 0  
277 -7861: space: \x0a  
278 -7862: integer: 49  
279 -7864: space:  
280 -7865: integer: 0  
281 -7866: space:  
282 -7867: word: obj  
283 -7870: space: \x0a  
284 -7871: dict_open: <<  
285 -7873: space: \x0a  
286 -7876: name: /Length  
287 -7883: space:  
288 -7884: integer: 50  
289 -7886: space:  
290 -7887: integer: 0  
291 -7888: space:  
292 -7889: word: R  
293 -7890: space: \x0a  
294 -7891: dict_close: >>  
295 -7893: space: \x0a  
296 -7894: word: stream 262 +7794: word: endstream
  263 +7803: space: \x0a
  264 +7804: word: endobj
  265 +7810: space: \x0a\x0a
  266 +7812: integer: 48
  267 +7814: space:
  268 +7815: integer: 0
  269 +7816: space:
  270 +7817: word: obj
  271 +7820: space: \x0a
  272 +7821: integer: 44
  273 +7823: space: \x0a
  274 +7824: word: endobj
  275 +7830: space: \x0a\x0a
  276 +7832: comment: %% Contents for page 7
  277 +7854: space: \x0a
  278 +7855: comment: %% Original object ID: 43 0
  279 +7882: space: \x0a
  280 +7883: integer: 49
  281 +7885: space:
  282 +7886: integer: 0
  283 +7887: space:
  284 +7888: word: obj
  285 +7891: space: \x0a
  286 +7892: dict_open: <<
  287 +7894: space: \x0a
  288 +7897: name: /Length
  289 +7904: space:
  290 +7905: integer: 50
  291 +7907: space:
  292 +7908: integer: 0
  293 +7909: space:
  294 +7910: word: R
  295 +7911: space: \x0a
  296 +7912: dict_close: >>
  297 +7914: space: \x0a
  298 +7915: word: stream
297 skipping to endstream 299 skipping to endstream
298 -7945: word: endstream  
299 -7954: space: \x0a  
300 -7955: word: endobj  
301 -7961: space: \x0a\x0a  
302 -7963: integer: 50  
303 -7965: space:  
304 -7966: integer: 0  
305 -7967: space:  
306 -7968: word: obj  
307 -7971: space: \x0a  
308 -7972: integer: 44  
309 -7974: space: \x0a  
310 -7975: word: endobj  
311 -7981: space: \x0a\x0a  
312 -7983: comment: %% Contents for page 8  
313 -8005: space: \x0a  
314 -8006: comment: %% Original object ID: 44 0  
315 -8033: space: \x0a  
316 -8034: integer: 51  
317 -8036: space:  
318 -8037: integer: 0  
319 -8038: space:  
320 -8039: word: obj  
321 -8042: space: \x0a  
322 -8043: dict_open: <<  
323 -8045: space: \x0a  
324 -8048: name: /Length  
325 -8055: space:  
326 -8056: integer: 52  
327 -8058: space:  
328 -8059: integer: 0  
329 -8060: space:  
330 -8061: word: R  
331 -8062: space: \x0a  
332 -8063: dict_close: >>  
333 -8065: space: \x0a  
334 -8066: word: stream 300 +8241: word: endstream
  301 +8250: space: \x0a
  302 +8251: word: endobj
  303 +8257: space: \x0a
  304 +8258: comment: %QDF: ignore_newline
  305 +8278: space: \x0a\x0a
  306 +8280: integer: 50
  307 +8282: space:
  308 +8283: integer: 0
  309 +8284: space:
  310 +8285: word: obj
  311 +8288: space: \x0a
  312 +8289: integer: 318
  313 +8292: space: \x0a
  314 +8293: word: endobj
  315 +8299: space: \x0a\x0a
  316 +8301: comment: %% Contents for page 8
  317 +8323: space: \x0a
  318 +8324: comment: %% Original object ID: 44 0
  319 +8351: space: \x0a
  320 +8352: integer: 51
  321 +8354: space:
  322 +8355: integer: 0
  323 +8356: space:
  324 +8357: word: obj
  325 +8360: space: \x0a
  326 +8361: dict_open: <<
  327 +8363: space: \x0a
  328 +8366: name: /Length
  329 +8373: space:
  330 +8374: integer: 52
  331 +8376: space:
  332 +8377: integer: 0
  333 +8378: space:
  334 +8379: word: R
  335 +8380: space: \x0a
  336 +8381: dict_close: >>
  337 +8383: space: \x0a
  338 +8384: word: stream
335 skipping to endstream 339 skipping to endstream
336 -8117: word: endstream  
337 -8126: space: \x0a  
338 -8127: word: endobj  
339 -8133: space: \x0a\x0a  
340 -8135: integer: 52  
341 -8137: space:  
342 -8138: integer: 0  
343 -8139: space:  
344 -8140: word: obj  
345 -8143: space: \x0a  
346 -8144: integer: 44  
347 -8146: space: \x0a  
348 -8147: word: endobj  
349 -8153: space: \x0a\x0a  
350 -8155: comment: %% Contents for page 9  
351 -8177: space: \x0a  
352 -8178: comment: %% Original object ID: 45 0  
353 -8205: space: \x0a  
354 -8206: integer: 53  
355 -8208: space:  
356 -8209: integer: 0  
357 -8210: space:  
358 -8211: word: obj  
359 -8214: space: \x0a  
360 -8215: dict_open: <<  
361 -8217: space: \x0a  
362 -8220: name: /Length  
363 -8227: space:  
364 -8228: integer: 54  
365 -8230: space:  
366 -8231: integer: 0  
367 -8232: space:  
368 -8233: word: R  
369 -8234: space: \x0a  
370 -8235: dict_close: >>  
371 -8237: space: \x0a  
372 -8238: word: stream  
373 -skipping to endstream  
374 -8289: word: endstream  
375 -8298: space: \x0a  
376 -8299: word: endobj  
377 -8305: space: \x0a\x0a  
378 -8307: integer: 54  
379 -8309: space:  
380 -8310: integer: 0  
381 -8311: space:  
382 -8312: word: obj  
383 -8315: space: \x0a  
384 -8316: integer: 44  
385 -8318: space: \x0a  
386 -8319: word: endobj  
387 -8325: space: \x0a\x0a  
388 -8327: comment: %% Contents for page 10  
389 -8350: space: \x0a  
390 -8351: comment: %% Original object ID: 46 0  
391 -8378: space: \x0a  
392 -8379: integer: 55  
393 -8381: space:  
394 -8382: integer: 0  
395 -8383: space:  
396 -8384: word: obj  
397 -8387: space: \x0a  
398 -8388: dict_open: <<  
399 -8390: space: \x0a  
400 -8393: name: /Length  
401 -8400: space:  
402 -8401: integer: 56  
403 -8403: space:  
404 -8404: integer: 0  
405 -8405: space:  
406 -8406: word: R  
407 -8407: space: \x0a  
408 -8408: dict_close: >>  
409 -8410: space: \x0a  
410 -8411: word: stream  
411 -skipping to endstream  
412 -8462: word: endstream  
413 -8471: space: \x0a  
414 -8472: word: endobj  
415 -8478: space: \x0a\x0a  
416 -8480: integer: 56  
417 -8482: space:  
418 -8483: integer: 0  
419 -8484: space:  
420 -8485: word: obj  
421 -8488: space: \x0a  
422 -8489: integer: 44  
423 -8491: space: \x0a  
424 -8492: word: endobj  
425 -8498: space: \x0a\x0a  
426 -8500: comment: %% Contents for page 11 340 +8435: word: endstream
  341 +8444: space: \x0a
  342 +8445: word: endobj
  343 +8451: space: \x0a\x0a
  344 +8453: integer: 52
  345 +8455: space:
  346 +8456: integer: 0
  347 +8457: space:
  348 +8458: word: obj
  349 +8461: space: \x0a
  350 +8462: integer: 44
  351 +8464: space: \x0a
  352 +8465: word: endobj
  353 +8471: space: \x0a\x0a
  354 +8473: comment: %% Contents for page 9
  355 +8495: space: \x0a
  356 +8496: comment: %% Original object ID: 45 0
427 8523: space: \x0a 357 8523: space: \x0a
428 -8524: comment: %% Original object ID: 47 0  
429 -8551: space: \x0a  
430 -8552: integer: 57  
431 -8554: space:  
432 -8555: integer: 0  
433 -8556: space:  
434 -8557: word: obj  
435 -8560: space: \x0a  
436 -8561: dict_open: <<  
437 -8563: space: \x0a  
438 -8566: name: /Length  
439 -8573: space:  
440 -8574: integer: 58  
441 -8576: space:  
442 -8577: integer: 0  
443 -8578: space:  
444 -8579: word: R  
445 -8580: space: \x0a  
446 -8581: dict_close: >>  
447 -8583: space: \x0a  
448 -8584: word: stream 358 +8524: integer: 53
  359 +8526: space:
  360 +8527: integer: 0
  361 +8528: space:
  362 +8529: word: obj
  363 +8532: space: \x0a
  364 +8533: dict_open: <<
  365 +8535: space: \x0a
  366 +8538: name: /Length
  367 +8545: space:
  368 +8546: integer: 54
  369 +8548: space:
  370 +8549: integer: 0
  371 +8550: space:
  372 +8551: word: R
  373 +8552: space: \x0a
  374 +8553: dict_close: >>
  375 +8555: space: \x0a
  376 +8556: word: stream
449 skipping to endstream 377 skipping to endstream
450 -8635: word: endstream  
451 -8644: space: \x0a  
452 -8645: word: endobj  
453 -8651: space: \x0a\x0a  
454 -8653: integer: 58  
455 -8655: space:  
456 -8656: integer: 0  
457 -8657: space:  
458 -8658: word: obj  
459 -8661: space: \x0a  
460 -8662: integer: 44  
461 -8664: space: \x0a  
462 -8665: word: endobj  
463 -8671: space: \x0a\x0a  
464 -8673: integer: 59  
465 -8675: space:  
466 -8676: integer: 0  
467 -8677: space:  
468 -8678: word: obj  
469 -8681: space: \x0a  
470 -8682: dict_open: <<  
471 -8684: space: \x0a  
472 -8687: name: /Type  
473 -8692: space:  
474 -8693: name: /XRef  
475 -8698: space: \x0a  
476 -8701: name: /Length  
477 -8708: space:  
478 -8709: integer: 240  
479 -8712: space: \x0a  
480 -8715: name: /W  
481 -8717: space:  
482 -8718: array_open: [  
483 -8719: space:  
484 -8720: integer: 1 378 +8607: word: endstream
  379 +8616: space: \x0a
  380 +8617: word: endobj
  381 +8623: space: \x0a\x0a
  382 +8625: integer: 54
  383 +8627: space:
  384 +8628: integer: 0
  385 +8629: space:
  386 +8630: word: obj
  387 +8633: space: \x0a
  388 +8634: integer: 44
  389 +8636: space: \x0a
  390 +8637: word: endobj
  391 +8643: space: \x0a\x0a
  392 +8645: comment: %% Contents for page 10
  393 +8668: space: \x0a
  394 +8669: comment: %% Original object ID: 46 0
  395 +8696: space: \x0a
  396 +8697: integer: 55
  397 +8699: space:
  398 +8700: integer: 0
  399 +8701: space:
  400 +8702: word: obj
  401 +8705: space: \x0a
  402 +8706: dict_open: <<
  403 +8708: space: \x0a
  404 +8711: name: /Length
  405 +8718: space:
  406 +8719: integer: 56
485 8721: space: 407 8721: space:
486 -8722: integer: 2 408 +8722: integer: 0
487 8723: space: 409 8723: space:
488 -8724: integer: 1  
489 -8725: space:  
490 -8726: array_close: ]  
491 -8727: space: \x0a  
492 -8730: name: /Root  
493 -8735: space:  
494 -8736: integer: 2  
495 -8737: space:  
496 -8738: integer: 0  
497 -8739: space:  
498 -8740: word: R  
499 -8741: space: \x0a  
500 -8744: name: /Size  
501 -8749: space:  
502 -8750: integer: 60  
503 -8752: space: \x0a  
504 -8755: name: /ID  
505 -8758: space:  
506 -8759: array_open: [  
507 -8760: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>)  
508 -8794: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: <edd60fe8ee87f88731a86f819fe65199>)  
509 -8828: array_close: ]  
510 -8829: space: \x0a  
511 -8830: dict_close: >>  
512 -8832: space: \x0a  
513 -8833: word: stream 410 +8724: word: R
  411 +8725: space: \x0a
  412 +8726: dict_close: >>
  413 +8728: space: \x0a
  414 +8729: word: stream
  415 +skipping to endstream
  416 +8780: word: endstream
  417 +8789: space: \x0a
  418 +8790: word: endobj
  419 +8796: space: \x0a\x0a
  420 +8798: integer: 56
  421 +8800: space:
  422 +8801: integer: 0
  423 +8802: space:
  424 +8803: word: obj
  425 +8806: space: \x0a
  426 +8807: integer: 44
  427 +8809: space: \x0a
  428 +8810: word: endobj
  429 +8816: space: \x0a\x0a
  430 +8818: comment: %% Contents for page 11
  431 +8841: space: \x0a
  432 +8842: comment: %% Original object ID: 47 0
  433 +8869: space: \x0a
  434 +8870: integer: 57
  435 +8872: space:
  436 +8873: integer: 0
  437 +8874: space:
  438 +8875: word: obj
  439 +8878: space: \x0a
  440 +8879: dict_open: <<
  441 +8881: space: \x0a
  442 +8884: name: /Length
  443 +8891: space:
  444 +8892: integer: 58
  445 +8894: space:
  446 +8895: integer: 0
  447 +8896: space:
  448 +8897: word: R
  449 +8898: space: \x0a
  450 +8899: dict_close: >>
  451 +8901: space: \x0a
  452 +8902: word: stream
  453 +skipping to endstream
  454 +8953: word: endstream
  455 +8962: space: \x0a
  456 +8963: word: endobj
  457 +8969: space: \x0a\x0a
  458 +8971: integer: 58
  459 +8973: space:
  460 +8974: integer: 0
  461 +8975: space:
  462 +8976: word: obj
  463 +8979: space: \x0a
  464 +8980: integer: 44
  465 +8982: space: \x0a
  466 +8983: word: endobj
  467 +8989: space: \x0a\x0a
  468 +8991: integer: 59
  469 +8993: space:
  470 +8994: integer: 0
  471 +8995: space:
  472 +8996: word: obj
  473 +8999: space: \x0a
  474 +9000: dict_open: <<
  475 +9002: space: \x0a
  476 +9005: name: /Type
  477 +9010: space:
  478 +9011: name: /XRef
  479 +9016: space: \x0a
  480 +9019: name: /Length
  481 +9026: space:
  482 +9027: integer: 240
  483 +9030: space: \x0a
  484 +9033: name: /W
  485 +9035: space:
  486 +9036: array_open: [
  487 +9037: space:
  488 +9038: integer: 1
  489 +9039: space:
  490 +9040: integer: 2
  491 +9041: space:
  492 +9042: integer: 1
  493 +9043: space:
  494 +9044: array_close: ]
  495 +9045: space: \x0a
  496 +9048: name: /Root
  497 +9053: space:
  498 +9054: integer: 2
  499 +9055: space:
  500 +9056: integer: 0
  501 +9057: space:
  502 +9058: word: R
  503 +9059: space: \x0a
  504 +9062: name: /Size
  505 +9067: space:
  506 +9068: integer: 60
  507 +9070: space: \x0a
  508 +9073: name: /ID
  509 +9076: space:
  510 +9077: array_open: [
  511 +9078: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>)
  512 +9112: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: <edd60fe8ee87f88731a86f819fe65199>)
  513 +9146: array_close: ]
  514 +9147: space: \x0a
  515 +9148: dict_close: >>
  516 +9150: space: \x0a
  517 +9151: word: stream
514 skipping to endstream 518 skipping to endstream
515 -9081: word: endstream  
516 -9090: space: \x0a  
517 -9091: word: endobj  
518 -9097: space: \x0a\x0a  
519 -9099: word: startxref  
520 -9108: space: \x0a  
521 -9109: integer: 8673  
522 -9113: space: \x0a  
523 -9114: comment: %%EOF  
524 -9119: space: \x0a  
525 -9120: eof 519 +9399: word: endstream
  520 +9408: space: \x0a
  521 +9409: word: endobj
  522 +9415: space: \x0a\x0a
  523 +9417: word: startxref
  524 +9426: space: \x0a
  525 +9427: integer: 8991
  526 +9431: space: \x0a
  527 +9432: comment: %%EOF
  528 +9437: space: \x0a
  529 +9438: eof
526 --- END FILE --- 530 --- END FILE ---
527 --- BEGIN PAGE 1 --- 531 --- BEGIN PAGE 1 ---
528 0: word: BT 532 0: word: BT
@@ -595,9 +599,7 @@ skipping to endstream @@ -595,9 +599,7 @@ skipping to endstream
595 103: dict_close: >> 599 103: dict_close: >>
596 105: space: \x0a 600 105: space: \x0a
597 106: word: ID 601 106: word: ID
598 -skipping to EI  
599 -352: word: EI  
600 -354: space: \x0a 602 +108: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI\x0a
601 355: word: BT 603 355: word: BT
602 357: space: \x0a 604 357: space: \x0a
603 360: name: /F1 605 360: name: /F1
@@ -743,13 +745,11 @@ skipping to EI @@ -743,13 +745,11 @@ skipping to EI
743 47: word: ET 745 47: word: ET
744 49: space: \x0a\x00\x0a 746 49: space: \x0a\x00\x0a
745 52: name: /ThisMustBeLast 747 52: name: /ThisMustBeLast
746 -67: space: \x0a  
747 -68: eof 748 +67: eof
748 --- END PAGE 5 --- 749 --- END PAGE 5 ---
749 --- BEGIN PAGE 6 --- 750 --- BEGIN PAGE 6 ---
750 0: word: ID 751 0: word: ID
751 -skipping to EI  
752 -EI not found 752 +EI not found; resuming normal scanning
753 2: space: \x0a 753 2: space: \x0a
754 5: name: /F1 754 5: name: /F1
755 8: space: 755 8: space:
@@ -772,27 +772,37 @@ EI not found @@ -772,27 +772,37 @@ EI not found
772 44: eof 772 44: eof
773 --- END PAGE 6 --- 773 --- END PAGE 6 ---
774 --- BEGIN PAGE 7 --- 774 --- BEGIN PAGE 7 ---
775 -0: word: BT  
776 -2: space: \x0a  
777 -5: name: /F1  
778 -8: space:  
779 -9: integer: 24  
780 -11: space:  
781 -12: word: Tf  
782 -14: space: \x0a  
783 -17: integer: 72 775 +0: name: /potato
  776 +7: space: \x0a
  777 +8: word: BI
  778 +10: space: \x0a
  779 +11: name: /CS
  780 +14: space:
  781 +15: name: /G
  782 +17: name: /W
784 19: space: 783 19: space:
785 -20: integer: 720  
786 -23: space:  
787 -24: word: Td  
788 -26: space: \x0a  
789 -29: string: Potato (raw: (Potato))  
790 -37: space:  
791 -38: word: Tj  
792 -40: space: \x0a  
793 -41: word: ET  
794 -43: space: \x0a  
795 -44: eof 784 +20: integer: 66
  785 +22: name: /H
  786 +24: space:
  787 +25: integer: 47
  788 +27: name: /BPC
  789 +31: space:
  790 +32: integer: 8
  791 +33: name: /F
  792 +35: name: /Fl
  793 +38: name: /DP
  794 +41: dict_open: <<
  795 +43: name: /Predictor
  796 +53: space:
  797 +54: integer: 15
  798 +56: name: /Columns
  799 +64: space:
  800 +65: integer: 66
  801 +67: dict_close: >>
  802 +69: space: \x0a
  803 +70: word: ID
  804 +72: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI
  805 +318: eof
796 --- END PAGE 7 --- 806 --- END PAGE 7 ---
797 --- BEGIN PAGE 8 --- 807 --- BEGIN PAGE 8 ---
798 0: word: BT 808 0: word: BT
qpdf/qtest/qpdf/tokens.pdf
No preview for this file type
qpdf/test_tokenizer.cc
@@ -88,6 +88,8 @@ static char const* tokenTypeName(QPDFTokenizer::token_type_e ttype) @@ -88,6 +88,8 @@ static char const* tokenTypeName(QPDFTokenizer::token_type_e ttype)
88 return "space"; 88 return "space";
89 case QPDFTokenizer::tt_comment: 89 case QPDFTokenizer::tt_comment:
90 return "comment"; 90 return "comment";
  91 + case QPDFTokenizer::tt_inline_image:
  92 + return "inline-image";
91 } 93 }
92 return 0; 94 return 0;
93 } 95 }
@@ -131,7 +133,6 @@ dump_tokens(PointerHolder&lt;InputSource&gt; is, std::string const&amp; label, @@ -131,7 +133,6 @@ dump_tokens(PointerHolder&lt;InputSource&gt; is, std::string const&amp; label,
131 bool skip_streams, bool skip_inline_images) 133 bool skip_streams, bool skip_inline_images)
132 { 134 {
133 Finder f1(is, "endstream"); 135 Finder f1(is, "endstream");
134 - Finder f2(is, "EI");  
135 std::cout << "--- BEGIN " << label << " ---" << std::endl; 136 std::cout << "--- BEGIN " << label << " ---" << std::endl;
136 bool done = false; 137 bool done = false;
137 QPDFTokenizer tokenizer; 138 QPDFTokenizer tokenizer;
@@ -140,10 +141,20 @@ dump_tokens(PointerHolder&lt;InputSource&gt; is, std::string const&amp; label, @@ -140,10 +141,20 @@ dump_tokens(PointerHolder&lt;InputSource&gt; is, std::string const&amp; label,
140 { 141 {
141 tokenizer.includeIgnorable(); 142 tokenizer.includeIgnorable();
142 } 143 }
  144 + qpdf_offset_t inline_image_offset = 0;
143 while (! done) 145 while (! done)
144 { 146 {
145 QPDFTokenizer::Token token = 147 QPDFTokenizer::Token token =
146 - tokenizer.readToken(is, "test", true, max_len); 148 + tokenizer.readToken(is, "test", true,
  149 + inline_image_offset ? 0 : max_len);
  150 + if (inline_image_offset && (token.getType() == QPDFTokenizer::tt_bad))
  151 + {
  152 + std::cout << "EI not found; resuming normal scanning" << std::endl;
  153 + is->seek(inline_image_offset, SEEK_SET);
  154 + inline_image_offset = 0;
  155 + continue;
  156 + }
  157 + inline_image_offset = 0;
147 158
148 qpdf_offset_t offset = is->getLastOffset(); 159 qpdf_offset_t offset = is->getLastOffset();
149 std::cout << offset << ": " 160 std::cout << offset << ": "
@@ -170,7 +181,8 @@ dump_tokens(PointerHolder&lt;InputSource&gt; is, std::string const&amp; label, @@ -170,7 +181,8 @@ dump_tokens(PointerHolder&lt;InputSource&gt; is, std::string const&amp; label,
170 else if (skip_inline_images && 181 else if (skip_inline_images &&
171 (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "ID"))) 182 (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "ID")))
172 { 183 {
173 - try_skipping(tokenizer, is, max_len, "EI", f2); 184 + tokenizer.expectInlineImage();
  185 + inline_image_offset = is->tell();
174 } 186 }
175 else if (token.getType() == QPDFTokenizer::tt_eof) 187 else if (token.getType() == QPDFTokenizer::tt_eof)
176 { 188 {