Commit fefe25030eaffdaf06a9e957b3255304682c71cf

Authored by Jay Berkenbilt
1 parent 2699ecf1

Inline image token type

ChangeLog
1 1 2018-02-04 Jay Berkenbilt <ejb@ql.org>
2 2  
3 3 * Significant lexer (tokenizer) enhancements. These are changes to
4   - the QPDFTokenizer class. These changes are of concern only to
5   - people who are operating with PDF files at the lexical layer
6   - using qpdf. They have little or no impact on most high-level
7   - interfaces or the command-line tool.
8   - * New token types tt_space and tt_comment to recognize
9   - whitespace and comments. this makes it possible to tokenize a
10   - PDF file or stream and preserve everything about it.
11   - * For backward compatibility, space and comment tokens are not
12   - returned by the tokenizer unless
13   - QPDFTokenizer.includeIgnorable() is called.
14   - * Better handling of null bytes. These are now included in space
15   - tokens rather than being their own "tt_word" tokens. This
16   - should have no impact on any correct PDF file and has no
17   - impact on output, but it may change offsets in some error
18   - messages when trying to parse contents of bad files. Under
19   - default operation, qpdf does not attempt to parse content
20   - streams, so this change is mostly invisible.
21   - * Bug fix to handling of bad tokens at ends of streams. Now,
22   - when allowEOF() has been called, these are treated as bad tokens
23   - (tt_bad or an exception, depending on invocation), and a
24   - separate tt_eof token is returned. Before the bad token
25   - contents were returned as the value of a tt_eof token. tt_eof
26   - tokens are always empty now.
27   - * Fix a bug that would, on rare occasions, report the offset in an
28   - error message in the wrong space because of spaces or comments
29   - adjacent to a bad token.
30   - * Clarify in comments exactly where the input source is
31   - positioned surrounding calls to readToken and getToken.
  4 + the QPDFTokenizer class. These changes are of concern only to
  5 + people who are operating with PDF files at the lexical layer using
  6 + qpdf. They have little or no impact on most high-level interfaces
  7 + or the command-line tool.
  8 +
  9 + New token types tt_space and tt_comment to recognize whitespace
  10 + and comments. this makes it possible to tokenize a PDF file or
  11 + stream and preserve everything about it.
  12 +
  13 + For backward compatibility, space and comment tokens are not
  14 + returned by the tokenizer unless QPDFTokenizer.includeIgnorable()
  15 + is called.
  16 +
  17 + Better handling of null bytes. These are now included in space
  18 + tokens rather than being their own "tt_word" tokens. This should
  19 + have no impact on any correct PDF file and has no impact on
  20 + output, but it may change offsets in some error messages when
  21 + trying to parse contents of bad files. Under default operation,
  22 + qpdf does not attempt to parse content streams, so this change is
  23 + mostly invisible.
  24 +
  25 + Bug fix to handling of bad tokens at ends of streams. Now, when
  26 + allowEOF() has been called, these are treated as bad tokens
  27 + (tt_bad or an exception, depending on invocation), and a
  28 + separate tt_eof token is returned. Before the bad token
  29 + contents were returned as the value of a tt_eof token. tt_eof
  30 + tokens are always empty now.
  31 +
  32 + Fix a bug that would, on rare occasions, report the offset in an
  33 + error message in the wrong space because of spaces or comments
  34 + adjacent to a bad token.
  35 +
  36 + Clarify in comments exactly where the input source is positioned
  37 + surrounding calls to readToken and getToken.
  38 +
  39 + * Add a new token type for inline images. This token type is only
  40 + returned by QPDFTokenizer immediately following a call to
  41 + expectInlineImage(). This change includes internal refactoring of
  42 + a handful of places that all separately handled inline images, The
  43 + logic of detecting inline images in content streams is now handled
  44 + in one place in the code. Also we are more flexible about what
  45 + characters may surround the EI operator that marks the end of an
  46 + inline image.
32 47  
33 48 2018-02-04 Jay Berkenbilt <ejb@ql.org>
34 49  
... ...
include/qpdf/QPDFTokenizer.hh
... ... @@ -34,7 +34,8 @@ class QPDFTokenizer
34 34 public:
35 35 // Token type tt_eof is only returned of allowEOF() is called on
36 36 // the tokenizer. tt_eof was introduced in QPDF version 4.1.
37   - // tt_space and tt_comment were added in QPDF version 8.
  37 + // tt_space, tt_comment, and tt_inline_image were added in QPDF
  38 + // version 8.
38 39 enum token_type_e
39 40 {
40 41 tt_bad,
... ... @@ -54,6 +55,7 @@ class QPDFTokenizer
54 55 tt_eof,
55 56 tt_space,
56 57 tt_comment,
  58 + tt_inline_image,
57 59 };
58 60  
59 61 class Token
... ... @@ -128,11 +130,17 @@ class QPDFTokenizer
128 130 QPDF_DLL
129 131 void includeIgnorable();
130 132  
131   - // Mode of operation:
  133 + // There are two modes of operation: push and pull. The pull
  134 + // method is easier but requires an input source. The push method
  135 + // is more complicated but can be used to tokenize a stream of
  136 + // incoming characters in a pipeline.
132 137  
133   - // Keep presenting characters and calling getToken() until
134   - // getToken() returns true. When it does, be sure to check
135   - // unread_ch and to unread ch if it is true.
  138 + // Push mode:
  139 +
  140 + // Keep presenting characters with presentCharacter() and
  141 + // presentEOF() and calling getToken() until getToken() returns
  142 + // true. When it does, be sure to check unread_ch and to unread ch
  143 + // if it is true.
136 144  
137 145 // It these are called when a token is available, an exception
138 146 // will be thrown.
... ... @@ -155,15 +163,30 @@ class QPDFTokenizer
155 163 QPDF_DLL
156 164 bool betweenTokens();
157 165  
158   - // Read a token from an input source. Context describes the
  166 + // Pull mode:
  167 +
  168 + // Read a token from an input source. Context describes the
159 169 // context in which the token is being read and is used in the
160   - // exception thrown if there is an error.
  170 + // exception thrown if there is an error. After a token is read,
  171 + // the position of the input source returned by input->tell()
  172 + // points to just after the token, and the input source's "last
  173 + // offset" as returned by input->getLastOffset() points to the
  174 + // beginning of the token.
161 175 QPDF_DLL
162 176 Token readToken(PointerHolder<InputSource> input,
163 177 std::string const& context,
164 178 bool allow_bad = false,
165 179 size_t max_len = 0);
166 180  
  181 + // Calling this method puts the tokenizer in a state for reading
  182 + // inline images. In that state, it will return all data up to and
  183 + // including the next EI token. After you call this method, the
  184 + // next call to readToken (or the token created next time getToken
  185 + // returns true) will either be tt_inline_image or tt_bad. This is
  186 + // the only way readToken returns a tt_inline_image token.
  187 + QPDF_DLL
  188 + void expectInlineImage();
  189 +
167 190 private:
168 191 // Do not implement copy or assignment
169 192 QPDFTokenizer(QPDFTokenizer const&);
... ... @@ -171,10 +194,11 @@ class QPDFTokenizer
171 194  
172 195 void resolveLiteral();
173 196 bool isSpace(char);
  197 + bool isDelimiter(char);
174 198  
175 199 enum state_e {
176 200 st_top, st_in_space, st_in_comment, st_in_string, st_lt, st_gt,
177   - st_literal, st_in_hexstring, st_token_ready
  201 + st_literal, st_in_hexstring, st_inline_image, st_token_ready
178 202 };
179 203  
180 204 class Members
... ...
libqpdf/QPDFTokenizer.cc
... ... @@ -69,6 +69,12 @@ QPDFTokenizer::isSpace(char ch)
69 69 return ((ch == '\0') || QUtil::is_space(ch));
70 70 }
71 71  
  72 +bool
  73 +QPDFTokenizer::isDelimiter(char ch)
  74 +{
  75 + return (strchr(" \t\n\v\f\r()<>[]{}/%", ch) != 0);
  76 +}
  77 +
72 78 void
73 79 QPDFTokenizer::resolveLiteral()
74 80 {
... ... @@ -95,7 +101,7 @@ QPDFTokenizer::resolveLiteral()
95 101 if (ch == '\0')
96 102 {
97 103 this->m->type = tt_bad;
98   - QTC::TC("qpdf", "QPDF_Tokenizer null in name");
  104 + QTC::TC("qpdf", "QPDFTokenizer null in name");
99 105 this->m->error_message =
100 106 "null character not allowed in name token";
101 107 nval += "#00";
... ... @@ -108,7 +114,7 @@ QPDFTokenizer::resolveLiteral()
108 114 }
109 115 else
110 116 {
111   - QTC::TC("qpdf", "QPDF_Tokenizer bad name");
  117 + QTC::TC("qpdf", "QPDFTokenizer bad name");
112 118 this->m->type = tt_bad;
113 119 this->m->error_message = "invalid name token";
114 120 nval += *p;
... ... @@ -209,7 +215,7 @@ QPDFTokenizer::presentCharacter(char ch)
209 215 if (ch == ')')
210 216 {
211 217 this->m->type = tt_bad;
212   - QTC::TC("qpdf", "QPDF_Tokenizer bad )");
  218 + QTC::TC("qpdf", "QPDFTokenizer bad )");
213 219 this->m->error_message = "unexpected )";
214 220 this->m->state = st_token_ready;
215 221 }
... ... @@ -301,7 +307,7 @@ QPDFTokenizer::presentCharacter(char ch)
301 307 {
302 308 this->m->val = ">";
303 309 this->m->type = tt_bad;
304   - QTC::TC("qpdf", "QPDF_Tokenizer bad >");
  310 + QTC::TC("qpdf", "QPDFTokenizer bad >");
305 311 this->m->error_message = "unexpected >";
306 312 this->m->unread_char = true;
307 313 this->m->char_to_unread = ch;
... ... @@ -403,7 +409,7 @@ QPDFTokenizer::presentCharacter(char ch)
403 409 }
404 410 else if (this->m->state == st_literal)
405 411 {
406   - if (strchr(" \t\n\v\f\r()<>[]{}/%", ch) != 0)
  412 + if (isDelimiter(ch))
407 413 {
408 414 // A C-locale whitespace character or delimiter terminates
409 415 // token. It is important to unread the whitespace
... ... @@ -423,6 +429,25 @@ QPDFTokenizer::presentCharacter(char ch)
423 429 this->m->val += ch;
424 430 }
425 431 }
  432 + else if (this->m->state == st_inline_image)
  433 + {
  434 + size_t len = this->m->val.length();
  435 + if ((len >= 4) &&
  436 + isDelimiter(this->m->val.at(len-4)) &&
  437 + (this->m->val.at(len-3) == 'E') &&
  438 + (this->m->val.at(len-2) == 'I') &&
  439 + isDelimiter(this->m->val.at(len-1)))
  440 + {
  441 + this->m->type = tt_inline_image;
  442 + this->m->unread_char = true;
  443 + this->m->char_to_unread = ch;
  444 + this->m->state = st_token_ready;
  445 + }
  446 + else
  447 + {
  448 + this->m->val += ch;
  449 + }
  450 + }
426 451 else
427 452 {
428 453 handled = false;
... ... @@ -468,7 +493,7 @@ QPDFTokenizer::presentCharacter(char ch)
468 493 else
469 494 {
470 495 this->m->type = tt_bad;
471   - QTC::TC("qpdf", "QPDF_Tokenizer bad hexstring character");
  496 + QTC::TC("qpdf", "QPDFTokenizer bad hexstring character");
472 497 this->m->error_message = std::string("invalid character (") +
473 498 ch + ") in hexstring";
474 499 this->m->state = st_token_ready;
... ... @@ -495,9 +520,23 @@ QPDFTokenizer::presentCharacter(char ch)
495 520 void
496 521 QPDFTokenizer::presentEOF()
497 522 {
  523 + if (this->m->state == st_inline_image)
  524 + {
  525 + size_t len = this->m->val.length();
  526 + if ((len >= 3) &&
  527 + isDelimiter(this->m->val.at(len-3)) &&
  528 + (this->m->val.at(len-2) == 'E') &&
  529 + (this->m->val.at(len-1) == 'I'))
  530 + {
  531 + QTC::TC("qpdf", "QPDFTokenizer inline image at EOF");
  532 + this->m->type = tt_inline_image;
  533 + this->m->state = st_token_ready;
  534 + }
  535 + }
  536 +
498 537 if (this->m->state == st_literal)
499 538 {
500   - QTC::TC("qpdf", "QPDF_Tokenizer EOF reading appendable token");
  539 + QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token");
501 540 resolveLiteral();
502 541 }
503 542 else if ((this->m->include_ignorable) && (this->m->state == st_in_space))
... ... @@ -514,7 +553,7 @@ QPDFTokenizer::presentEOF()
514 553 }
515 554 else if (this->m->state != st_token_ready)
516 555 {
517   - QTC::TC("qpdf", "QPDF_Tokenizer EOF reading token");
  556 + QTC::TC("qpdf", "QPDFTokenizer EOF reading token");
518 557 this->m->type = tt_bad;
519 558 this->m->error_message = "EOF while reading token";
520 559 }
... ... @@ -522,6 +561,17 @@ QPDFTokenizer::presentEOF()
522 561 this->m->state = st_token_ready;
523 562 }
524 563  
  564 +void
  565 +QPDFTokenizer::expectInlineImage()
  566 +{
  567 + if (this->m->state != st_top)
  568 + {
  569 + throw std::logic_error("QPDFTokenizer::expectInlineImage called"
  570 + " when tokenizer is in improper state");
  571 + }
  572 + this->m->state = st_inline_image;
  573 +}
  574 +
525 575 bool
526 576 QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch)
527 577 {
... ... @@ -572,7 +622,7 @@ QPDFTokenizer::readToken(PointerHolder&lt;InputSource&gt; input,
572 622 presented_eof = true;
573 623 if ((this->m->type == tt_eof) && (! this->m->allow_eof))
574 624 {
575   - QTC::TC("qpdf", "QPDF_Tokenizer EOF when not allowed");
  625 + QTC::TC("qpdf", "QPDFTokenizer EOF when not allowed");
576 626 this->m->type = tt_bad;
577 627 this->m->error_message = "unexpected EOF";
578 628 offset = input->getLastOffset();
... ...
qpdf/qpdf.testcov
... ... @@ -64,11 +64,11 @@ QPDF stream length not integer 0
64 64 QPDF missing endstream 0
65 65 QPDFObjectHandle bad dictionary close 0
66 66 QPDF can't find xref 0
67   -QPDF_Tokenizer bad ) 0
68   -QPDF_Tokenizer bad > 0
69   -QPDF_Tokenizer bad hexstring character 0
70   -QPDF_Tokenizer null in name 0
71   -QPDF_Tokenizer bad name 0
  67 +QPDFTokenizer bad ) 0
  68 +QPDFTokenizer bad > 0
  69 +QPDFTokenizer bad hexstring character 0
  70 +QPDFTokenizer null in name 0
  71 +QPDFTokenizer bad name 0
72 72 QPDF_Stream invalid filter 0
73 73 QPDF UseOutlines but no Outlines 0
74 74 QPDFObjectHandle clone bool 0
... ... @@ -233,8 +233,8 @@ QPDFWriter copy use_aes 1
233 233 QPDFObjectHandle indirect without context 0
234 234 QPDFObjectHandle trailing data in parse 0
235 235 qpdf pages encryption password 0
236   -QPDF_Tokenizer EOF reading token 0
237   -QPDF_Tokenizer EOF reading appendable token 0
  236 +QPDFTokenizer EOF reading token 0
  237 +QPDFTokenizer EOF reading appendable token 0
238 238 QPDFWriter extra header text no newline 0
239 239 QPDFWriter extra header text add newline 0
240 240 QPDF bogus 0 offset 0
... ... @@ -302,4 +302,5 @@ qpdf-c called qpdf_set_compress_streams 0
302 302 qpdf-c called qpdf_set_preserve_unreferenced_objects 0
303 303 qpdf-c called qpdf_set_newline_before_endstream 0
304 304 QPDF_Stream TIFF predictor 0
305   -QPDF_Tokenizer EOF when not allowed 0
  305 +QPDFTokenizer EOF when not allowed 0
  306 +QPDFTokenizer inline image at EOF 0
... ...
qpdf/qtest/qpdf/tokens-maxlen.out
... ... @@ -222,307 +222,311 @@ skipping to endstream
222 222 7601: word: endstream
223 223 7610: space: \x0a
224 224 7611: word: endobj
225   -7617: space: \x0a\x0a
226   -7619: integer: 46
227   -7621: space:
228   -7622: integer: 0
229   -7623: space:
230   -7624: word: obj
231   -7627: space: \x0a
232   -7628: integer: 68
233   -7630: space: \x0a
234   -7631: word: endobj
235   -7637: space: \x0a\x0a
236   -7639: comment: %% Contents for page 6
237   -7661: space: \x0a
238   -7662: comment: %% Original object ID: 42 0
239   -7689: space: \x0a
240   -7690: integer: 47
241   -7692: space:
242   -7693: integer: 0
243   -7694: space:
244   -7695: word: obj
245   -7698: space: \x0a
246   -7699: dict_open: <<
247   -7701: space: \x0a
248   -7704: name: /Length
249   -7711: space:
250   -7712: integer: 48
251   -7714: space:
252   -7715: integer: 0
253   -7716: space:
254   -7717: word: R
255   -7718: space: \x0a
256   -7719: dict_close: >>
257   -7721: space: \x0a
258   -7722: word: stream
  225 +7617: space: \x0a
  226 +7618: comment: %QDF: ignore_newline
  227 +7638: space: \x0a\x0a
  228 +7640: integer: 46
  229 +7642: space:
  230 +7643: integer: 0
  231 +7644: space:
  232 +7645: word: obj
  233 +7648: space: \x0a
  234 +7649: integer: 67
  235 +7651: space: \x0a
  236 +7652: word: endobj
  237 +7658: space: \x0a\x0a
  238 +7660: comment: %% Contents for page 6
  239 +7682: space: \x0a
  240 +7683: comment: %% Original object ID: 42 0
  241 +7710: space: \x0a
  242 +7711: integer: 47
  243 +7713: space:
  244 +7714: integer: 0
  245 +7715: space:
  246 +7716: word: obj
  247 +7719: space: \x0a
  248 +7720: dict_open: <<
  249 +7722: space: \x0a
  250 +7725: name: /Length
  251 +7732: space:
  252 +7733: integer: 48
  253 +7735: space:
  254 +7736: integer: 0
  255 +7737: space:
  256 +7738: word: R
  257 +7739: space: \x0a
  258 +7740: dict_close: >>
  259 +7742: space: \x0a
  260 +7743: word: stream
259 261 skipping to endstream
260   -7773: word: endstream
261   -7782: space: \x0a
262   -7783: word: endobj
263   -7789: space: \x0a\x0a
264   -7791: integer: 48
265   -7793: space:
266   -7794: integer: 0
267   -7795: space:
268   -7796: word: obj
269   -7799: space: \x0a
270   -7800: integer: 44
271   -7802: space: \x0a
272   -7803: word: endobj
273   -7809: space: \x0a\x0a
274   -7811: comment: %% Contents for page 7
275   -7833: space: \x0a
276   -7834: comment: %% Original object ID: 43 0
277   -7861: space: \x0a
278   -7862: integer: 49
279   -7864: space:
280   -7865: integer: 0
281   -7866: space:
282   -7867: word: obj
283   -7870: space: \x0a
284   -7871: dict_open: <<
285   -7873: space: \x0a
286   -7876: name: /Length
287   -7883: space:
288   -7884: integer: 50
289   -7886: space:
290   -7887: integer: 0
291   -7888: space:
292   -7889: word: R
293   -7890: space: \x0a
294   -7891: dict_close: >>
295   -7893: space: \x0a
296   -7894: word: stream
  262 +7794: word: endstream
  263 +7803: space: \x0a
  264 +7804: word: endobj
  265 +7810: space: \x0a\x0a
  266 +7812: integer: 48
  267 +7814: space:
  268 +7815: integer: 0
  269 +7816: space:
  270 +7817: word: obj
  271 +7820: space: \x0a
  272 +7821: integer: 44
  273 +7823: space: \x0a
  274 +7824: word: endobj
  275 +7830: space: \x0a\x0a
  276 +7832: comment: %% Contents for page 7
  277 +7854: space: \x0a
  278 +7855: comment: %% Original object ID: 43 0
  279 +7882: space: \x0a
  280 +7883: integer: 49
  281 +7885: space:
  282 +7886: integer: 0
  283 +7887: space:
  284 +7888: word: obj
  285 +7891: space: \x0a
  286 +7892: dict_open: <<
  287 +7894: space: \x0a
  288 +7897: name: /Length
  289 +7904: space:
  290 +7905: integer: 50
  291 +7907: space:
  292 +7908: integer: 0
  293 +7909: space:
  294 +7910: word: R
  295 +7911: space: \x0a
  296 +7912: dict_close: >>
  297 +7914: space: \x0a
  298 +7915: word: stream
297 299 skipping to endstream
298   -7945: word: endstream
299   -7954: space: \x0a
300   -7955: word: endobj
301   -7961: space: \x0a\x0a
302   -7963: integer: 50
303   -7965: space:
304   -7966: integer: 0
305   -7967: space:
306   -7968: word: obj
307   -7971: space: \x0a
308   -7972: integer: 44
309   -7974: space: \x0a
310   -7975: word: endobj
311   -7981: space: \x0a\x0a
312   -7983: comment: %% Contents for page 8
313   -8005: space: \x0a
314   -8006: comment: %% Original object ID: 44 0
315   -8033: space: \x0a
316   -8034: integer: 51
317   -8036: space:
318   -8037: integer: 0
319   -8038: space:
320   -8039: word: obj
321   -8042: space: \x0a
322   -8043: dict_open: <<
323   -8045: space: \x0a
324   -8048: name: /Length
325   -8055: space:
326   -8056: integer: 52
327   -8058: space:
328   -8059: integer: 0
329   -8060: space:
330   -8061: word: R
331   -8062: space: \x0a
332   -8063: dict_close: >>
333   -8065: space: \x0a
334   -8066: word: stream
  300 +8241: word: endstream
  301 +8250: space: \x0a
  302 +8251: word: endobj
  303 +8257: space: \x0a
  304 +8258: comment: %QDF: ignore_newline
  305 +8278: space: \x0a\x0a
  306 +8280: integer: 50
  307 +8282: space:
  308 +8283: integer: 0
  309 +8284: space:
  310 +8285: word: obj
  311 +8288: space: \x0a
  312 +8289: integer: 318
  313 +8292: space: \x0a
  314 +8293: word: endobj
  315 +8299: space: \x0a\x0a
  316 +8301: comment: %% Contents for page 8
  317 +8323: space: \x0a
  318 +8324: comment: %% Original object ID: 44 0
  319 +8351: space: \x0a
  320 +8352: integer: 51
  321 +8354: space:
  322 +8355: integer: 0
  323 +8356: space:
  324 +8357: word: obj
  325 +8360: space: \x0a
  326 +8361: dict_open: <<
  327 +8363: space: \x0a
  328 +8366: name: /Length
  329 +8373: space:
  330 +8374: integer: 52
  331 +8376: space:
  332 +8377: integer: 0
  333 +8378: space:
  334 +8379: word: R
  335 +8380: space: \x0a
  336 +8381: dict_close: >>
  337 +8383: space: \x0a
  338 +8384: word: stream
335 339 skipping to endstream
336   -8117: word: endstream
337   -8126: space: \x0a
338   -8127: word: endobj
339   -8133: space: \x0a\x0a
340   -8135: integer: 52
341   -8137: space:
342   -8138: integer: 0
343   -8139: space:
344   -8140: word: obj
345   -8143: space: \x0a
346   -8144: integer: 44
347   -8146: space: \x0a
348   -8147: word: endobj
349   -8153: space: \x0a\x0a
350   -8155: comment: %% Contents for page 9
351   -8177: space: \x0a
352   -8178: comment: %% Original object ID: 45 0
353   -8205: space: \x0a
354   -8206: integer: 53
355   -8208: space:
356   -8209: integer: 0
357   -8210: space:
358   -8211: word: obj
359   -8214: space: \x0a
360   -8215: dict_open: <<
361   -8217: space: \x0a
362   -8220: name: /Length
363   -8227: space:
364   -8228: integer: 54
365   -8230: space:
366   -8231: integer: 0
367   -8232: space:
368   -8233: word: R
369   -8234: space: \x0a
370   -8235: dict_close: >>
371   -8237: space: \x0a
372   -8238: word: stream
373   -skipping to endstream
374   -8289: word: endstream
375   -8298: space: \x0a
376   -8299: word: endobj
377   -8305: space: \x0a\x0a
378   -8307: integer: 54
379   -8309: space:
380   -8310: integer: 0
381   -8311: space:
382   -8312: word: obj
383   -8315: space: \x0a
384   -8316: integer: 44
385   -8318: space: \x0a
386   -8319: word: endobj
387   -8325: space: \x0a\x0a
388   -8327: comment: %% Contents for page 10
389   -8350: space: \x0a
390   -8351: comment: %% Original object ID: 46 0
391   -8378: space: \x0a
392   -8379: integer: 55
393   -8381: space:
394   -8382: integer: 0
395   -8383: space:
396   -8384: word: obj
397   -8387: space: \x0a
398   -8388: dict_open: <<
399   -8390: space: \x0a
400   -8393: name: /Length
401   -8400: space:
402   -8401: integer: 56
403   -8403: space:
404   -8404: integer: 0
405   -8405: space:
406   -8406: word: R
407   -8407: space: \x0a
408   -8408: dict_close: >>
409   -8410: space: \x0a
410   -8411: word: stream
411   -skipping to endstream
412   -8462: word: endstream
413   -8471: space: \x0a
414   -8472: word: endobj
415   -8478: space: \x0a\x0a
416   -8480: integer: 56
417   -8482: space:
418   -8483: integer: 0
419   -8484: space:
420   -8485: word: obj
421   -8488: space: \x0a
422   -8489: integer: 44
423   -8491: space: \x0a
424   -8492: word: endobj
425   -8498: space: \x0a\x0a
426   -8500: comment: %% Contents for page 11
  340 +8435: word: endstream
  341 +8444: space: \x0a
  342 +8445: word: endobj
  343 +8451: space: \x0a\x0a
  344 +8453: integer: 52
  345 +8455: space:
  346 +8456: integer: 0
  347 +8457: space:
  348 +8458: word: obj
  349 +8461: space: \x0a
  350 +8462: integer: 44
  351 +8464: space: \x0a
  352 +8465: word: endobj
  353 +8471: space: \x0a\x0a
  354 +8473: comment: %% Contents for page 9
  355 +8495: space: \x0a
  356 +8496: comment: %% Original object ID: 45 0
427 357 8523: space: \x0a
428   -8524: comment: %% Original object ID: 47 0
429   -8551: space: \x0a
430   -8552: integer: 57
431   -8554: space:
432   -8555: integer: 0
433   -8556: space:
434   -8557: word: obj
435   -8560: space: \x0a
436   -8561: dict_open: <<
437   -8563: space: \x0a
438   -8566: name: /Length
439   -8573: space:
440   -8574: integer: 58
441   -8576: space:
442   -8577: integer: 0
443   -8578: space:
444   -8579: word: R
445   -8580: space: \x0a
446   -8581: dict_close: >>
447   -8583: space: \x0a
448   -8584: word: stream
  358 +8524: integer: 53
  359 +8526: space:
  360 +8527: integer: 0
  361 +8528: space:
  362 +8529: word: obj
  363 +8532: space: \x0a
  364 +8533: dict_open: <<
  365 +8535: space: \x0a
  366 +8538: name: /Length
  367 +8545: space:
  368 +8546: integer: 54
  369 +8548: space:
  370 +8549: integer: 0
  371 +8550: space:
  372 +8551: word: R
  373 +8552: space: \x0a
  374 +8553: dict_close: >>
  375 +8555: space: \x0a
  376 +8556: word: stream
449 377 skipping to endstream
450   -8635: word: endstream
451   -8644: space: \x0a
452   -8645: word: endobj
453   -8651: space: \x0a\x0a
454   -8653: integer: 58
455   -8655: space:
456   -8656: integer: 0
457   -8657: space:
458   -8658: word: obj
459   -8661: space: \x0a
460   -8662: integer: 44
461   -8664: space: \x0a
462   -8665: word: endobj
463   -8671: space: \x0a\x0a
464   -8673: integer: 59
465   -8675: space:
466   -8676: integer: 0
467   -8677: space:
468   -8678: word: obj
469   -8681: space: \x0a
470   -8682: dict_open: <<
471   -8684: space: \x0a
472   -8687: name: /Type
473   -8692: space:
474   -8693: name: /XRef
475   -8698: space: \x0a
476   -8701: name: /Length
477   -8708: space:
478   -8709: integer: 240
479   -8712: space: \x0a
480   -8715: name: /W
481   -8717: space:
482   -8718: array_open: [
483   -8719: space:
484   -8720: integer: 1
  378 +8607: word: endstream
  379 +8616: space: \x0a
  380 +8617: word: endobj
  381 +8623: space: \x0a\x0a
  382 +8625: integer: 54
  383 +8627: space:
  384 +8628: integer: 0
  385 +8629: space:
  386 +8630: word: obj
  387 +8633: space: \x0a
  388 +8634: integer: 44
  389 +8636: space: \x0a
  390 +8637: word: endobj
  391 +8643: space: \x0a\x0a
  392 +8645: comment: %% Contents for page 10
  393 +8668: space: \x0a
  394 +8669: comment: %% Original object ID: 46 0
  395 +8696: space: \x0a
  396 +8697: integer: 55
  397 +8699: space:
  398 +8700: integer: 0
  399 +8701: space:
  400 +8702: word: obj
  401 +8705: space: \x0a
  402 +8706: dict_open: <<
  403 +8708: space: \x0a
  404 +8711: name: /Length
  405 +8718: space:
  406 +8719: integer: 56
485 407 8721: space:
486   -8722: integer: 2
  408 +8722: integer: 0
487 409 8723: space:
488   -8724: integer: 1
489   -8725: space:
490   -8726: array_close: ]
491   -8727: space: \x0a
492   -8730: name: /Root
493   -8735: space:
494   -8736: integer: 2
495   -8737: space:
496   -8738: integer: 0
497   -8739: space:
498   -8740: word: R
499   -8741: space: \x0a
500   -8744: name: /Size
501   -8749: space:
502   -8750: integer: 60
503   -8752: space: \x0a
504   -8755: name: /ID
505   -8758: space:
506   -8759: array_open: [
507   -8760: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>)
508   -8794: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: <edd60fe8ee87f88731a86f819fe65199>)
509   -8828: array_close: ]
510   -8829: space: \x0a
511   -8830: dict_close: >>
512   -8832: space: \x0a
513   -8833: word: stream
  410 +8724: word: R
  411 +8725: space: \x0a
  412 +8726: dict_close: >>
  413 +8728: space: \x0a
  414 +8729: word: stream
  415 +skipping to endstream
  416 +8780: word: endstream
  417 +8789: space: \x0a
  418 +8790: word: endobj
  419 +8796: space: \x0a\x0a
  420 +8798: integer: 56
  421 +8800: space:
  422 +8801: integer: 0
  423 +8802: space:
  424 +8803: word: obj
  425 +8806: space: \x0a
  426 +8807: integer: 44
  427 +8809: space: \x0a
  428 +8810: word: endobj
  429 +8816: space: \x0a\x0a
  430 +8818: comment: %% Contents for page 11
  431 +8841: space: \x0a
  432 +8842: comment: %% Original object ID: 47 0
  433 +8869: space: \x0a
  434 +8870: integer: 57
  435 +8872: space:
  436 +8873: integer: 0
  437 +8874: space:
  438 +8875: word: obj
  439 +8878: space: \x0a
  440 +8879: dict_open: <<
  441 +8881: space: \x0a
  442 +8884: name: /Length
  443 +8891: space:
  444 +8892: integer: 58
  445 +8894: space:
  446 +8895: integer: 0
  447 +8896: space:
  448 +8897: word: R
  449 +8898: space: \x0a
  450 +8899: dict_close: >>
  451 +8901: space: \x0a
  452 +8902: word: stream
  453 +skipping to endstream
  454 +8953: word: endstream
  455 +8962: space: \x0a
  456 +8963: word: endobj
  457 +8969: space: \x0a\x0a
  458 +8971: integer: 58
  459 +8973: space:
  460 +8974: integer: 0
  461 +8975: space:
  462 +8976: word: obj
  463 +8979: space: \x0a
  464 +8980: integer: 44
  465 +8982: space: \x0a
  466 +8983: word: endobj
  467 +8989: space: \x0a\x0a
  468 +8991: integer: 59
  469 +8993: space:
  470 +8994: integer: 0
  471 +8995: space:
  472 +8996: word: obj
  473 +8999: space: \x0a
  474 +9000: dict_open: <<
  475 +9002: space: \x0a
  476 +9005: name: /Type
  477 +9010: space:
  478 +9011: name: /XRef
  479 +9016: space: \x0a
  480 +9019: name: /Length
  481 +9026: space:
  482 +9027: integer: 240
  483 +9030: space: \x0a
  484 +9033: name: /W
  485 +9035: space:
  486 +9036: array_open: [
  487 +9037: space:
  488 +9038: integer: 1
  489 +9039: space:
  490 +9040: integer: 2
  491 +9041: space:
  492 +9042: integer: 1
  493 +9043: space:
  494 +9044: array_close: ]
  495 +9045: space: \x0a
  496 +9048: name: /Root
  497 +9053: space:
  498 +9054: integer: 2
  499 +9055: space:
  500 +9056: integer: 0
  501 +9057: space:
  502 +9058: word: R
  503 +9059: space: \x0a
  504 +9062: name: /Size
  505 +9067: space:
  506 +9068: integer: 60
  507 +9070: space: \x0a
  508 +9073: name: /ID
  509 +9076: space:
  510 +9077: array_open: [
  511 +9078: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>)
  512 +9112: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: <edd60fe8ee87f88731a86f819fe65199>)
  513 +9146: array_close: ]
  514 +9147: space: \x0a
  515 +9148: dict_close: >>
  516 +9150: space: \x0a
  517 +9151: word: stream
514 518 skipping to endstream
515   -9081: word: endstream
516   -9090: space: \x0a
517   -9091: word: endobj
518   -9097: space: \x0a\x0a
519   -9099: word: startxref
520   -9108: space: \x0a
521   -9109: integer: 8673
522   -9113: space: \x0a
523   -9114: comment: %%EOF
524   -9119: space: \x0a
525   -9120: eof
  519 +9399: word: endstream
  520 +9408: space: \x0a
  521 +9409: word: endobj
  522 +9415: space: \x0a\x0a
  523 +9417: word: startxref
  524 +9426: space: \x0a
  525 +9427: integer: 8991
  526 +9431: space: \x0a
  527 +9432: comment: %%EOF
  528 +9437: space: \x0a
  529 +9438: eof
526 530 --- END FILE ---
527 531 --- BEGIN PAGE 1 ---
528 532 0: word: BT
... ... @@ -595,9 +599,7 @@ skipping to endstream
595 599 103: dict_close: >>
596 600 105: space: \x0a
597 601 106: word: ID
598   -skipping to EI
599   -352: word: EI
600   -354: space: \x0a
  602 +108: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI\x0a
601 603 355: word: BT
602 604 357: space: \x0a
603 605 360: name: /F1
... ... @@ -743,13 +745,11 @@ skipping to EI
743 745 47: word: ET
744 746 49: space: \x0a\x00\x0a
745 747 52: name: /ThisMustBeLast
746   -67: space: \x0a
747   -68: eof
  748 +67: eof
748 749 --- END PAGE 5 ---
749 750 --- BEGIN PAGE 6 ---
750 751 0: word: ID
751   -skipping to EI
752   -EI not found
  752 +EI not found; resuming normal scanning
753 753 2: space: \x0a
754 754 5: name: /F1
755 755 8: space:
... ... @@ -772,27 +772,37 @@ EI not found
772 772 44: eof
773 773 --- END PAGE 6 ---
774 774 --- BEGIN PAGE 7 ---
775   -0: word: BT
776   -2: space: \x0a
777   -5: name: /F1
778   -8: space:
779   -9: integer: 24
780   -11: space:
781   -12: word: Tf
782   -14: space: \x0a
783   -17: integer: 72
  775 +0: name: /potato
  776 +7: space: \x0a
  777 +8: word: BI
  778 +10: space: \x0a
  779 +11: name: /CS
  780 +14: space:
  781 +15: name: /G
  782 +17: name: /W
784 783 19: space:
785   -20: integer: 720
786   -23: space:
787   -24: word: Td
788   -26: space: \x0a
789   -29: string: Potato (raw: (Potato))
790   -37: space:
791   -38: word: Tj
792   -40: space: \x0a
793   -41: word: ET
794   -43: space: \x0a
795   -44: eof
  784 +20: integer: 66
  785 +22: name: /H
  786 +24: space:
  787 +25: integer: 47
  788 +27: name: /BPC
  789 +31: space:
  790 +32: integer: 8
  791 +33: name: /F
  792 +35: name: /Fl
  793 +38: name: /DP
  794 +41: dict_open: <<
  795 +43: name: /Predictor
  796 +53: space:
  797 +54: integer: 15
  798 +56: name: /Columns
  799 +64: space:
  800 +65: integer: 66
  801 +67: dict_close: >>
  802 +69: space: \x0a
  803 +70: word: ID
  804 +72: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI
  805 +318: eof
796 806 --- END PAGE 7 ---
797 807 --- BEGIN PAGE 8 ---
798 808 0: word: BT
... ...
qpdf/qtest/qpdf/tokens-no-ignorable.out
... ... @@ -101,152 +101,152 @@ skipping to endstream
101 101 skipping to endstream
102 102 7601: word: endstream
103 103 7611: word: endobj
104   -7619: integer: 46
105   -7622: integer: 0
106   -7624: word: obj
107   -7628: integer: 68
108   -7631: word: endobj
109   -7690: integer: 47
110   -7693: integer: 0
111   -7695: word: obj
112   -7699: dict_open: <<
113   -7704: name: /Length
114   -7712: integer: 48
115   -7715: integer: 0
116   -7717: word: R
117   -7719: dict_close: >>
118   -7722: word: stream
  104 +7640: integer: 46
  105 +7643: integer: 0
  106 +7645: word: obj
  107 +7649: integer: 67
  108 +7652: word: endobj
  109 +7711: integer: 47
  110 +7714: integer: 0
  111 +7716: word: obj
  112 +7720: dict_open: <<
  113 +7725: name: /Length
  114 +7733: integer: 48
  115 +7736: integer: 0
  116 +7738: word: R
  117 +7740: dict_close: >>
  118 +7743: word: stream
119 119 skipping to endstream
120   -7773: word: endstream
121   -7783: word: endobj
122   -7791: integer: 48
123   -7794: integer: 0
124   -7796: word: obj
125   -7800: integer: 44
126   -7803: word: endobj
127   -7862: integer: 49
128   -7865: integer: 0
129   -7867: word: obj
130   -7871: dict_open: <<
131   -7876: name: /Length
132   -7884: integer: 50
133   -7887: integer: 0
134   -7889: word: R
135   -7891: dict_close: >>
136   -7894: word: stream
  120 +7794: word: endstream
  121 +7804: word: endobj
  122 +7812: integer: 48
  123 +7815: integer: 0
  124 +7817: word: obj
  125 +7821: integer: 44
  126 +7824: word: endobj
  127 +7883: integer: 49
  128 +7886: integer: 0
  129 +7888: word: obj
  130 +7892: dict_open: <<
  131 +7897: name: /Length
  132 +7905: integer: 50
  133 +7908: integer: 0
  134 +7910: word: R
  135 +7912: dict_close: >>
  136 +7915: word: stream
137 137 skipping to endstream
138   -7945: word: endstream
139   -7955: word: endobj
140   -7963: integer: 50
141   -7966: integer: 0
142   -7968: word: obj
143   -7972: integer: 44
144   -7975: word: endobj
145   -8034: integer: 51
146   -8037: integer: 0
147   -8039: word: obj
148   -8043: dict_open: <<
149   -8048: name: /Length
150   -8056: integer: 52
151   -8059: integer: 0
152   -8061: word: R
153   -8063: dict_close: >>
154   -8066: word: stream
  138 +8241: word: endstream
  139 +8251: word: endobj
  140 +8280: integer: 50
  141 +8283: integer: 0
  142 +8285: word: obj
  143 +8289: integer: 318
  144 +8293: word: endobj
  145 +8352: integer: 51
  146 +8355: integer: 0
  147 +8357: word: obj
  148 +8361: dict_open: <<
  149 +8366: name: /Length
  150 +8374: integer: 52
  151 +8377: integer: 0
  152 +8379: word: R
  153 +8381: dict_close: >>
  154 +8384: word: stream
155 155 skipping to endstream
156   -8117: word: endstream
157   -8127: word: endobj
158   -8135: integer: 52
159   -8138: integer: 0
160   -8140: word: obj
161   -8144: integer: 44
162   -8147: word: endobj
163   -8206: integer: 53
164   -8209: integer: 0
165   -8211: word: obj
166   -8215: dict_open: <<
167   -8220: name: /Length
168   -8228: integer: 54
169   -8231: integer: 0
170   -8233: word: R
171   -8235: dict_close: >>
172   -8238: word: stream
  156 +8435: word: endstream
  157 +8445: word: endobj
  158 +8453: integer: 52
  159 +8456: integer: 0
  160 +8458: word: obj
  161 +8462: integer: 44
  162 +8465: word: endobj
  163 +8524: integer: 53
  164 +8527: integer: 0
  165 +8529: word: obj
  166 +8533: dict_open: <<
  167 +8538: name: /Length
  168 +8546: integer: 54
  169 +8549: integer: 0
  170 +8551: word: R
  171 +8553: dict_close: >>
  172 +8556: word: stream
173 173 skipping to endstream
174   -8289: word: endstream
175   -8299: word: endobj
176   -8307: integer: 54
177   -8310: integer: 0
178   -8312: word: obj
179   -8316: integer: 44
180   -8319: word: endobj
181   -8379: integer: 55
182   -8382: integer: 0
183   -8384: word: obj
184   -8388: dict_open: <<
185   -8393: name: /Length
186   -8401: integer: 56
187   -8404: integer: 0
188   -8406: word: R
189   -8408: dict_close: >>
190   -8411: word: stream
  174 +8607: word: endstream
  175 +8617: word: endobj
  176 +8625: integer: 54
  177 +8628: integer: 0
  178 +8630: word: obj
  179 +8634: integer: 44
  180 +8637: word: endobj
  181 +8697: integer: 55
  182 +8700: integer: 0
  183 +8702: word: obj
  184 +8706: dict_open: <<
  185 +8711: name: /Length
  186 +8719: integer: 56
  187 +8722: integer: 0
  188 +8724: word: R
  189 +8726: dict_close: >>
  190 +8729: word: stream
191 191 skipping to endstream
192   -8462: word: endstream
193   -8472: word: endobj
194   -8480: integer: 56
195   -8483: integer: 0
196   -8485: word: obj
197   -8489: integer: 44
198   -8492: word: endobj
199   -8552: integer: 57
200   -8555: integer: 0
201   -8557: word: obj
202   -8561: dict_open: <<
203   -8566: name: /Length
204   -8574: integer: 58
205   -8577: integer: 0
206   -8579: word: R
207   -8581: dict_close: >>
208   -8584: word: stream
  192 +8780: word: endstream
  193 +8790: word: endobj
  194 +8798: integer: 56
  195 +8801: integer: 0
  196 +8803: word: obj
  197 +8807: integer: 44
  198 +8810: word: endobj
  199 +8870: integer: 57
  200 +8873: integer: 0
  201 +8875: word: obj
  202 +8879: dict_open: <<
  203 +8884: name: /Length
  204 +8892: integer: 58
  205 +8895: integer: 0
  206 +8897: word: R
  207 +8899: dict_close: >>
  208 +8902: word: stream
209 209 skipping to endstream
210   -8635: word: endstream
211   -8645: word: endobj
212   -8653: integer: 58
213   -8656: integer: 0
214   -8658: word: obj
215   -8662: integer: 44
216   -8665: word: endobj
217   -8673: integer: 59
218   -8676: integer: 0
219   -8678: word: obj
220   -8682: dict_open: <<
221   -8687: name: /Type
222   -8693: name: /XRef
223   -8701: name: /Length
224   -8709: integer: 240
225   -8715: name: /W
226   -8718: array_open: [
227   -8720: integer: 1
228   -8722: integer: 2
229   -8724: integer: 1
230   -8726: array_close: ]
231   -8730: name: /Root
232   -8736: integer: 2
233   -8738: integer: 0
234   -8740: word: R
235   -8744: name: /Size
236   -8750: integer: 60
237   -8755: name: /ID
238   -8759: array_open: [
239   -8760: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>)
240   -8794: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: <edd60fe8ee87f88731a86f819fe65199>)
241   -8828: array_close: ]
242   -8830: dict_close: >>
243   -8833: word: stream
  210 +8953: word: endstream
  211 +8963: word: endobj
  212 +8971: integer: 58
  213 +8974: integer: 0
  214 +8976: word: obj
  215 +8980: integer: 44
  216 +8983: word: endobj
  217 +8991: integer: 59
  218 +8994: integer: 0
  219 +8996: word: obj
  220 +9000: dict_open: <<
  221 +9005: name: /Type
  222 +9011: name: /XRef
  223 +9019: name: /Length
  224 +9027: integer: 240
  225 +9033: name: /W
  226 +9036: array_open: [
  227 +9038: integer: 1
  228 +9040: integer: 2
  229 +9042: integer: 1
  230 +9044: array_close: ]
  231 +9048: name: /Root
  232 +9054: integer: 2
  233 +9056: integer: 0
  234 +9058: word: R
  235 +9062: name: /Size
  236 +9068: integer: 60
  237 +9073: name: /ID
  238 +9077: array_open: [
  239 +9078: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>)
  240 +9112: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: <edd60fe8ee87f88731a86f819fe65199>)
  241 +9146: array_close: ]
  242 +9148: dict_close: >>
  243 +9151: word: stream
244 244 skipping to endstream
245   -9081: word: endstream
246   -9091: word: endobj
247   -9099: word: startxref
248   -9109: integer: 8673
249   -9120: eof
  245 +9399: word: endstream
  246 +9409: word: endobj
  247 +9417: word: startxref
  248 +9427: integer: 8991
  249 +9438: eof
250 250 --- END FILE ---
251 251 --- BEGIN PAGE 1 ---
252 252 0: word: BT
... ... @@ -291,8 +291,7 @@ skipping to endstream
291 291 101: integer: 66
292 292 103: dict_close: >>
293 293 106: word: ID
294   -skipping to EI
295   -352: word: EI
  294 +108: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI\x0a
296 295 355: word: BT
297 296 360: name: /F1
298 297 364: integer: 24
... ... @@ -374,12 +373,11 @@ skipping to EI
374 373 44: word: Tj
375 374 47: word: ET
376 375 52: name: /ThisMustBeLast
377   -68: eof
  376 +67: eof
378 377 --- END PAGE 5 ---
379 378 --- BEGIN PAGE 6 ---
380 379 0: word: ID
381   -skipping to EI
382   -EI not found
  380 +EI not found; resuming normal scanning
383 381 5: name: /F1
384 382 9: integer: 24
385 383 12: word: Tf
... ... @@ -392,17 +390,28 @@ EI not found
392 390 44: eof
393 391 --- END PAGE 6 ---
394 392 --- BEGIN PAGE 7 ---
395   -0: word: BT
396   -5: name: /F1
397   -9: integer: 24
398   -12: word: Tf
399   -17: integer: 72
400   -20: integer: 720
401   -24: word: Td
402   -29: string: Potato (raw: (Potato))
403   -38: word: Tj
404   -41: word: ET
405   -44: eof
  393 +0: name: /potato
  394 +8: word: BI
  395 +11: name: /CS
  396 +15: name: /G
  397 +17: name: /W
  398 +20: integer: 66
  399 +22: name: /H
  400 +25: integer: 47
  401 +27: name: /BPC
  402 +32: integer: 8
  403 +33: name: /F
  404 +35: name: /Fl
  405 +38: name: /DP
  406 +41: dict_open: <<
  407 +43: name: /Predictor
  408 +54: integer: 15
  409 +56: name: /Columns
  410 +65: integer: 66
  411 +67: dict_close: >>
  412 +70: word: ID
  413 +72: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI
  414 +318: eof
406 415 --- END PAGE 7 ---
407 416 --- BEGIN PAGE 8 ---
408 417 0: word: BT
... ...
qpdf/qtest/qpdf/tokens.out
... ... @@ -222,307 +222,311 @@ skipping to endstream
222 222 7601: word: endstream
223 223 7610: space: \x0a
224 224 7611: word: endobj
225   -7617: space: \x0a\x0a
226   -7619: integer: 46
227   -7621: space:
228   -7622: integer: 0
229   -7623: space:
230   -7624: word: obj
231   -7627: space: \x0a
232   -7628: integer: 68
233   -7630: space: \x0a
234   -7631: word: endobj
235   -7637: space: \x0a\x0a
236   -7639: comment: %% Contents for page 6
237   -7661: space: \x0a
238   -7662: comment: %% Original object ID: 42 0
239   -7689: space: \x0a
240   -7690: integer: 47
241   -7692: space:
242   -7693: integer: 0
243   -7694: space:
244   -7695: word: obj
245   -7698: space: \x0a
246   -7699: dict_open: <<
247   -7701: space: \x0a
248   -7704: name: /Length
249   -7711: space:
250   -7712: integer: 48
251   -7714: space:
252   -7715: integer: 0
253   -7716: space:
254   -7717: word: R
255   -7718: space: \x0a
256   -7719: dict_close: >>
257   -7721: space: \x0a
258   -7722: word: stream
  225 +7617: space: \x0a
  226 +7618: comment: %QDF: ignore_newline
  227 +7638: space: \x0a\x0a
  228 +7640: integer: 46
  229 +7642: space:
  230 +7643: integer: 0
  231 +7644: space:
  232 +7645: word: obj
  233 +7648: space: \x0a
  234 +7649: integer: 67
  235 +7651: space: \x0a
  236 +7652: word: endobj
  237 +7658: space: \x0a\x0a
  238 +7660: comment: %% Contents for page 6
  239 +7682: space: \x0a
  240 +7683: comment: %% Original object ID: 42 0
  241 +7710: space: \x0a
  242 +7711: integer: 47
  243 +7713: space:
  244 +7714: integer: 0
  245 +7715: space:
  246 +7716: word: obj
  247 +7719: space: \x0a
  248 +7720: dict_open: <<
  249 +7722: space: \x0a
  250 +7725: name: /Length
  251 +7732: space:
  252 +7733: integer: 48
  253 +7735: space:
  254 +7736: integer: 0
  255 +7737: space:
  256 +7738: word: R
  257 +7739: space: \x0a
  258 +7740: dict_close: >>
  259 +7742: space: \x0a
  260 +7743: word: stream
259 261 skipping to endstream
260   -7773: word: endstream
261   -7782: space: \x0a
262   -7783: word: endobj
263   -7789: space: \x0a\x0a
264   -7791: integer: 48
265   -7793: space:
266   -7794: integer: 0
267   -7795: space:
268   -7796: word: obj
269   -7799: space: \x0a
270   -7800: integer: 44
271   -7802: space: \x0a
272   -7803: word: endobj
273   -7809: space: \x0a\x0a
274   -7811: comment: %% Contents for page 7
275   -7833: space: \x0a
276   -7834: comment: %% Original object ID: 43 0
277   -7861: space: \x0a
278   -7862: integer: 49
279   -7864: space:
280   -7865: integer: 0
281   -7866: space:
282   -7867: word: obj
283   -7870: space: \x0a
284   -7871: dict_open: <<
285   -7873: space: \x0a
286   -7876: name: /Length
287   -7883: space:
288   -7884: integer: 50
289   -7886: space:
290   -7887: integer: 0
291   -7888: space:
292   -7889: word: R
293   -7890: space: \x0a
294   -7891: dict_close: >>
295   -7893: space: \x0a
296   -7894: word: stream
  262 +7794: word: endstream
  263 +7803: space: \x0a
  264 +7804: word: endobj
  265 +7810: space: \x0a\x0a
  266 +7812: integer: 48
  267 +7814: space:
  268 +7815: integer: 0
  269 +7816: space:
  270 +7817: word: obj
  271 +7820: space: \x0a
  272 +7821: integer: 44
  273 +7823: space: \x0a
  274 +7824: word: endobj
  275 +7830: space: \x0a\x0a
  276 +7832: comment: %% Contents for page 7
  277 +7854: space: \x0a
  278 +7855: comment: %% Original object ID: 43 0
  279 +7882: space: \x0a
  280 +7883: integer: 49
  281 +7885: space:
  282 +7886: integer: 0
  283 +7887: space:
  284 +7888: word: obj
  285 +7891: space: \x0a
  286 +7892: dict_open: <<
  287 +7894: space: \x0a
  288 +7897: name: /Length
  289 +7904: space:
  290 +7905: integer: 50
  291 +7907: space:
  292 +7908: integer: 0
  293 +7909: space:
  294 +7910: word: R
  295 +7911: space: \x0a
  296 +7912: dict_close: >>
  297 +7914: space: \x0a
  298 +7915: word: stream
297 299 skipping to endstream
298   -7945: word: endstream
299   -7954: space: \x0a
300   -7955: word: endobj
301   -7961: space: \x0a\x0a
302   -7963: integer: 50
303   -7965: space:
304   -7966: integer: 0
305   -7967: space:
306   -7968: word: obj
307   -7971: space: \x0a
308   -7972: integer: 44
309   -7974: space: \x0a
310   -7975: word: endobj
311   -7981: space: \x0a\x0a
312   -7983: comment: %% Contents for page 8
313   -8005: space: \x0a
314   -8006: comment: %% Original object ID: 44 0
315   -8033: space: \x0a
316   -8034: integer: 51
317   -8036: space:
318   -8037: integer: 0
319   -8038: space:
320   -8039: word: obj
321   -8042: space: \x0a
322   -8043: dict_open: <<
323   -8045: space: \x0a
324   -8048: name: /Length
325   -8055: space:
326   -8056: integer: 52
327   -8058: space:
328   -8059: integer: 0
329   -8060: space:
330   -8061: word: R
331   -8062: space: \x0a
332   -8063: dict_close: >>
333   -8065: space: \x0a
334   -8066: word: stream
  300 +8241: word: endstream
  301 +8250: space: \x0a
  302 +8251: word: endobj
  303 +8257: space: \x0a
  304 +8258: comment: %QDF: ignore_newline
  305 +8278: space: \x0a\x0a
  306 +8280: integer: 50
  307 +8282: space:
  308 +8283: integer: 0
  309 +8284: space:
  310 +8285: word: obj
  311 +8288: space: \x0a
  312 +8289: integer: 318
  313 +8292: space: \x0a
  314 +8293: word: endobj
  315 +8299: space: \x0a\x0a
  316 +8301: comment: %% Contents for page 8
  317 +8323: space: \x0a
  318 +8324: comment: %% Original object ID: 44 0
  319 +8351: space: \x0a
  320 +8352: integer: 51
  321 +8354: space:
  322 +8355: integer: 0
  323 +8356: space:
  324 +8357: word: obj
  325 +8360: space: \x0a
  326 +8361: dict_open: <<
  327 +8363: space: \x0a
  328 +8366: name: /Length
  329 +8373: space:
  330 +8374: integer: 52
  331 +8376: space:
  332 +8377: integer: 0
  333 +8378: space:
  334 +8379: word: R
  335 +8380: space: \x0a
  336 +8381: dict_close: >>
  337 +8383: space: \x0a
  338 +8384: word: stream
335 339 skipping to endstream
336   -8117: word: endstream
337   -8126: space: \x0a
338   -8127: word: endobj
339   -8133: space: \x0a\x0a
340   -8135: integer: 52
341   -8137: space:
342   -8138: integer: 0
343   -8139: space:
344   -8140: word: obj
345   -8143: space: \x0a
346   -8144: integer: 44
347   -8146: space: \x0a
348   -8147: word: endobj
349   -8153: space: \x0a\x0a
350   -8155: comment: %% Contents for page 9
351   -8177: space: \x0a
352   -8178: comment: %% Original object ID: 45 0
353   -8205: space: \x0a
354   -8206: integer: 53
355   -8208: space:
356   -8209: integer: 0
357   -8210: space:
358   -8211: word: obj
359   -8214: space: \x0a
360   -8215: dict_open: <<
361   -8217: space: \x0a
362   -8220: name: /Length
363   -8227: space:
364   -8228: integer: 54
365   -8230: space:
366   -8231: integer: 0
367   -8232: space:
368   -8233: word: R
369   -8234: space: \x0a
370   -8235: dict_close: >>
371   -8237: space: \x0a
372   -8238: word: stream
373   -skipping to endstream
374   -8289: word: endstream
375   -8298: space: \x0a
376   -8299: word: endobj
377   -8305: space: \x0a\x0a
378   -8307: integer: 54
379   -8309: space:
380   -8310: integer: 0
381   -8311: space:
382   -8312: word: obj
383   -8315: space: \x0a
384   -8316: integer: 44
385   -8318: space: \x0a
386   -8319: word: endobj
387   -8325: space: \x0a\x0a
388   -8327: comment: %% Contents for page 10
389   -8350: space: \x0a
390   -8351: comment: %% Original object ID: 46 0
391   -8378: space: \x0a
392   -8379: integer: 55
393   -8381: space:
394   -8382: integer: 0
395   -8383: space:
396   -8384: word: obj
397   -8387: space: \x0a
398   -8388: dict_open: <<
399   -8390: space: \x0a
400   -8393: name: /Length
401   -8400: space:
402   -8401: integer: 56
403   -8403: space:
404   -8404: integer: 0
405   -8405: space:
406   -8406: word: R
407   -8407: space: \x0a
408   -8408: dict_close: >>
409   -8410: space: \x0a
410   -8411: word: stream
411   -skipping to endstream
412   -8462: word: endstream
413   -8471: space: \x0a
414   -8472: word: endobj
415   -8478: space: \x0a\x0a
416   -8480: integer: 56
417   -8482: space:
418   -8483: integer: 0
419   -8484: space:
420   -8485: word: obj
421   -8488: space: \x0a
422   -8489: integer: 44
423   -8491: space: \x0a
424   -8492: word: endobj
425   -8498: space: \x0a\x0a
426   -8500: comment: %% Contents for page 11
  340 +8435: word: endstream
  341 +8444: space: \x0a
  342 +8445: word: endobj
  343 +8451: space: \x0a\x0a
  344 +8453: integer: 52
  345 +8455: space:
  346 +8456: integer: 0
  347 +8457: space:
  348 +8458: word: obj
  349 +8461: space: \x0a
  350 +8462: integer: 44
  351 +8464: space: \x0a
  352 +8465: word: endobj
  353 +8471: space: \x0a\x0a
  354 +8473: comment: %% Contents for page 9
  355 +8495: space: \x0a
  356 +8496: comment: %% Original object ID: 45 0
427 357 8523: space: \x0a
428   -8524: comment: %% Original object ID: 47 0
429   -8551: space: \x0a
430   -8552: integer: 57
431   -8554: space:
432   -8555: integer: 0
433   -8556: space:
434   -8557: word: obj
435   -8560: space: \x0a
436   -8561: dict_open: <<
437   -8563: space: \x0a
438   -8566: name: /Length
439   -8573: space:
440   -8574: integer: 58
441   -8576: space:
442   -8577: integer: 0
443   -8578: space:
444   -8579: word: R
445   -8580: space: \x0a
446   -8581: dict_close: >>
447   -8583: space: \x0a
448   -8584: word: stream
  358 +8524: integer: 53
  359 +8526: space:
  360 +8527: integer: 0
  361 +8528: space:
  362 +8529: word: obj
  363 +8532: space: \x0a
  364 +8533: dict_open: <<
  365 +8535: space: \x0a
  366 +8538: name: /Length
  367 +8545: space:
  368 +8546: integer: 54
  369 +8548: space:
  370 +8549: integer: 0
  371 +8550: space:
  372 +8551: word: R
  373 +8552: space: \x0a
  374 +8553: dict_close: >>
  375 +8555: space: \x0a
  376 +8556: word: stream
449 377 skipping to endstream
450   -8635: word: endstream
451   -8644: space: \x0a
452   -8645: word: endobj
453   -8651: space: \x0a\x0a
454   -8653: integer: 58
455   -8655: space:
456   -8656: integer: 0
457   -8657: space:
458   -8658: word: obj
459   -8661: space: \x0a
460   -8662: integer: 44
461   -8664: space: \x0a
462   -8665: word: endobj
463   -8671: space: \x0a\x0a
464   -8673: integer: 59
465   -8675: space:
466   -8676: integer: 0
467   -8677: space:
468   -8678: word: obj
469   -8681: space: \x0a
470   -8682: dict_open: <<
471   -8684: space: \x0a
472   -8687: name: /Type
473   -8692: space:
474   -8693: name: /XRef
475   -8698: space: \x0a
476   -8701: name: /Length
477   -8708: space:
478   -8709: integer: 240
479   -8712: space: \x0a
480   -8715: name: /W
481   -8717: space:
482   -8718: array_open: [
483   -8719: space:
484   -8720: integer: 1
  378 +8607: word: endstream
  379 +8616: space: \x0a
  380 +8617: word: endobj
  381 +8623: space: \x0a\x0a
  382 +8625: integer: 54
  383 +8627: space:
  384 +8628: integer: 0
  385 +8629: space:
  386 +8630: word: obj
  387 +8633: space: \x0a
  388 +8634: integer: 44
  389 +8636: space: \x0a
  390 +8637: word: endobj
  391 +8643: space: \x0a\x0a
  392 +8645: comment: %% Contents for page 10
  393 +8668: space: \x0a
  394 +8669: comment: %% Original object ID: 46 0
  395 +8696: space: \x0a
  396 +8697: integer: 55
  397 +8699: space:
  398 +8700: integer: 0
  399 +8701: space:
  400 +8702: word: obj
  401 +8705: space: \x0a
  402 +8706: dict_open: <<
  403 +8708: space: \x0a
  404 +8711: name: /Length
  405 +8718: space:
  406 +8719: integer: 56
485 407 8721: space:
486   -8722: integer: 2
  408 +8722: integer: 0
487 409 8723: space:
488   -8724: integer: 1
489   -8725: space:
490   -8726: array_close: ]
491   -8727: space: \x0a
492   -8730: name: /Root
493   -8735: space:
494   -8736: integer: 2
495   -8737: space:
496   -8738: integer: 0
497   -8739: space:
498   -8740: word: R
499   -8741: space: \x0a
500   -8744: name: /Size
501   -8749: space:
502   -8750: integer: 60
503   -8752: space: \x0a
504   -8755: name: /ID
505   -8758: space:
506   -8759: array_open: [
507   -8760: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>)
508   -8794: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: <edd60fe8ee87f88731a86f819fe65199>)
509   -8828: array_close: ]
510   -8829: space: \x0a
511   -8830: dict_close: >>
512   -8832: space: \x0a
513   -8833: word: stream
  410 +8724: word: R
  411 +8725: space: \x0a
  412 +8726: dict_close: >>
  413 +8728: space: \x0a
  414 +8729: word: stream
  415 +skipping to endstream
  416 +8780: word: endstream
  417 +8789: space: \x0a
  418 +8790: word: endobj
  419 +8796: space: \x0a\x0a
  420 +8798: integer: 56
  421 +8800: space:
  422 +8801: integer: 0
  423 +8802: space:
  424 +8803: word: obj
  425 +8806: space: \x0a
  426 +8807: integer: 44
  427 +8809: space: \x0a
  428 +8810: word: endobj
  429 +8816: space: \x0a\x0a
  430 +8818: comment: %% Contents for page 11
  431 +8841: space: \x0a
  432 +8842: comment: %% Original object ID: 47 0
  433 +8869: space: \x0a
  434 +8870: integer: 57
  435 +8872: space:
  436 +8873: integer: 0
  437 +8874: space:
  438 +8875: word: obj
  439 +8878: space: \x0a
  440 +8879: dict_open: <<
  441 +8881: space: \x0a
  442 +8884: name: /Length
  443 +8891: space:
  444 +8892: integer: 58
  445 +8894: space:
  446 +8895: integer: 0
  447 +8896: space:
  448 +8897: word: R
  449 +8898: space: \x0a
  450 +8899: dict_close: >>
  451 +8901: space: \x0a
  452 +8902: word: stream
  453 +skipping to endstream
  454 +8953: word: endstream
  455 +8962: space: \x0a
  456 +8963: word: endobj
  457 +8969: space: \x0a\x0a
  458 +8971: integer: 58
  459 +8973: space:
  460 +8974: integer: 0
  461 +8975: space:
  462 +8976: word: obj
  463 +8979: space: \x0a
  464 +8980: integer: 44
  465 +8982: space: \x0a
  466 +8983: word: endobj
  467 +8989: space: \x0a\x0a
  468 +8991: integer: 59
  469 +8993: space:
  470 +8994: integer: 0
  471 +8995: space:
  472 +8996: word: obj
  473 +8999: space: \x0a
  474 +9000: dict_open: <<
  475 +9002: space: \x0a
  476 +9005: name: /Type
  477 +9010: space:
  478 +9011: name: /XRef
  479 +9016: space: \x0a
  480 +9019: name: /Length
  481 +9026: space:
  482 +9027: integer: 240
  483 +9030: space: \x0a
  484 +9033: name: /W
  485 +9035: space:
  486 +9036: array_open: [
  487 +9037: space:
  488 +9038: integer: 1
  489 +9039: space:
  490 +9040: integer: 2
  491 +9041: space:
  492 +9042: integer: 1
  493 +9043: space:
  494 +9044: array_close: ]
  495 +9045: space: \x0a
  496 +9048: name: /Root
  497 +9053: space:
  498 +9054: integer: 2
  499 +9055: space:
  500 +9056: integer: 0
  501 +9057: space:
  502 +9058: word: R
  503 +9059: space: \x0a
  504 +9062: name: /Size
  505 +9067: space:
  506 +9068: integer: 60
  507 +9070: space: \x0a
  508 +9073: name: /ID
  509 +9076: space:
  510 +9077: array_open: [
  511 +9078: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>)
  512 +9112: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: <edd60fe8ee87f88731a86f819fe65199>)
  513 +9146: array_close: ]
  514 +9147: space: \x0a
  515 +9148: dict_close: >>
  516 +9150: space: \x0a
  517 +9151: word: stream
514 518 skipping to endstream
515   -9081: word: endstream
516   -9090: space: \x0a
517   -9091: word: endobj
518   -9097: space: \x0a\x0a
519   -9099: word: startxref
520   -9108: space: \x0a
521   -9109: integer: 8673
522   -9113: space: \x0a
523   -9114: comment: %%EOF
524   -9119: space: \x0a
525   -9120: eof
  519 +9399: word: endstream
  520 +9408: space: \x0a
  521 +9409: word: endobj
  522 +9415: space: \x0a\x0a
  523 +9417: word: startxref
  524 +9426: space: \x0a
  525 +9427: integer: 8991
  526 +9431: space: \x0a
  527 +9432: comment: %%EOF
  528 +9437: space: \x0a
  529 +9438: eof
526 530 --- END FILE ---
527 531 --- BEGIN PAGE 1 ---
528 532 0: word: BT
... ... @@ -595,9 +599,7 @@ skipping to endstream
595 599 103: dict_close: >>
596 600 105: space: \x0a
597 601 106: word: ID
598   -skipping to EI
599   -352: word: EI
600   -354: space: \x0a
  602 +108: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI\x0a
601 603 355: word: BT
602 604 357: space: \x0a
603 605 360: name: /F1
... ... @@ -743,13 +745,11 @@ skipping to EI
743 745 47: word: ET
744 746 49: space: \x0a\x00\x0a
745 747 52: name: /ThisMustBeLast
746   -67: space: \x0a
747   -68: eof
  748 +67: eof
748 749 --- END PAGE 5 ---
749 750 --- BEGIN PAGE 6 ---
750 751 0: word: ID
751   -skipping to EI
752   -EI not found
  752 +EI not found; resuming normal scanning
753 753 2: space: \x0a
754 754 5: name: /F1
755 755 8: space:
... ... @@ -772,27 +772,37 @@ EI not found
772 772 44: eof
773 773 --- END PAGE 6 ---
774 774 --- BEGIN PAGE 7 ---
775   -0: word: BT
776   -2: space: \x0a
777   -5: name: /F1
778   -8: space:
779   -9: integer: 24
780   -11: space:
781   -12: word: Tf
782   -14: space: \x0a
783   -17: integer: 72
  775 +0: name: /potato
  776 +7: space: \x0a
  777 +8: word: BI
  778 +10: space: \x0a
  779 +11: name: /CS
  780 +14: space:
  781 +15: name: /G
  782 +17: name: /W
784 783 19: space:
785   -20: integer: 720
786   -23: space:
787   -24: word: Td
788   -26: space: \x0a
789   -29: string: Potato (raw: (Potato))
790   -37: space:
791   -38: word: Tj
792   -40: space: \x0a
793   -41: word: ET
794   -43: space: \x0a
795   -44: eof
  784 +20: integer: 66
  785 +22: name: /H
  786 +24: space:
  787 +25: integer: 47
  788 +27: name: /BPC
  789 +31: space:
  790 +32: integer: 8
  791 +33: name: /F
  792 +35: name: /Fl
  793 +38: name: /DP
  794 +41: dict_open: <<
  795 +43: name: /Predictor
  796 +53: space:
  797 +54: integer: 15
  798 +56: name: /Columns
  799 +64: space:
  800 +65: integer: 66
  801 +67: dict_close: >>
  802 +69: space: \x0a
  803 +70: word: ID
  804 +72: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI
  805 +318: eof
796 806 --- END PAGE 7 ---
797 807 --- BEGIN PAGE 8 ---
798 808 0: word: BT
... ...
qpdf/qtest/qpdf/tokens.pdf
No preview for this file type
qpdf/test_tokenizer.cc
... ... @@ -88,6 +88,8 @@ static char const* tokenTypeName(QPDFTokenizer::token_type_e ttype)
88 88 return "space";
89 89 case QPDFTokenizer::tt_comment:
90 90 return "comment";
  91 + case QPDFTokenizer::tt_inline_image:
  92 + return "inline-image";
91 93 }
92 94 return 0;
93 95 }
... ... @@ -131,7 +133,6 @@ dump_tokens(PointerHolder&lt;InputSource&gt; is, std::string const&amp; label,
131 133 bool skip_streams, bool skip_inline_images)
132 134 {
133 135 Finder f1(is, "endstream");
134   - Finder f2(is, "EI");
135 136 std::cout << "--- BEGIN " << label << " ---" << std::endl;
136 137 bool done = false;
137 138 QPDFTokenizer tokenizer;
... ... @@ -140,10 +141,20 @@ dump_tokens(PointerHolder&lt;InputSource&gt; is, std::string const&amp; label,
140 141 {
141 142 tokenizer.includeIgnorable();
142 143 }
  144 + qpdf_offset_t inline_image_offset = 0;
143 145 while (! done)
144 146 {
145 147 QPDFTokenizer::Token token =
146   - tokenizer.readToken(is, "test", true, max_len);
  148 + tokenizer.readToken(is, "test", true,
  149 + inline_image_offset ? 0 : max_len);
  150 + if (inline_image_offset && (token.getType() == QPDFTokenizer::tt_bad))
  151 + {
  152 + std::cout << "EI not found; resuming normal scanning" << std::endl;
  153 + is->seek(inline_image_offset, SEEK_SET);
  154 + inline_image_offset = 0;
  155 + continue;
  156 + }
  157 + inline_image_offset = 0;
147 158  
148 159 qpdf_offset_t offset = is->getLastOffset();
149 160 std::cout << offset << ": "
... ... @@ -170,7 +181,8 @@ dump_tokens(PointerHolder&lt;InputSource&gt; is, std::string const&amp; label,
170 181 else if (skip_inline_images &&
171 182 (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "ID")))
172 183 {
173   - try_skipping(tokenizer, is, max_len, "EI", f2);
  184 + tokenizer.expectInlineImage();
  185 + inline_image_offset = is->tell();
174 186 }
175 187 else if (token.getType() == QPDFTokenizer::tt_eof)
176 188 {
... ...