Commit ec538792fac039daa9636f9c94000b7bc1f3a669
1 parent
5b5f45e9
Use inline image token type in tokenizer filter
Showing
3 changed files
with
15 additions
and
59 deletions
libqpdf/Pl_QPDFTokenizer.cc
| ... | ... | @@ -12,10 +12,8 @@ Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) : |
| 12 | 12 | just_wrote_nl(false), |
| 13 | 13 | last_char_was_cr(false), |
| 14 | 14 | unread_char(false), |
| 15 | - char_to_unread('\0'), | |
| 16 | - in_inline_image(false) | |
| 15 | + char_to_unread('\0') | |
| 17 | 16 | { |
| 18 | - memset(this->image_buf, 0, IMAGE_BUF_SIZE); | |
| 19 | 17 | } |
| 20 | 18 | |
| 21 | 19 | Pl_QPDFTokenizer::~Pl_QPDFTokenizer() |
| ... | ... | @@ -56,37 +54,6 @@ Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token) |
| 56 | 54 | void |
| 57 | 55 | Pl_QPDFTokenizer::processChar(char ch) |
| 58 | 56 | { |
| 59 | - if (this->in_inline_image) | |
| 60 | - { | |
| 61 | - // Scan through the input looking for EI surrounded by | |
| 62 | - // whitespace. If that pattern appears in the inline image's | |
| 63 | - // representation, we're hosed, but this situation seems | |
| 64 | - // excessively unlikely, and this code path is only followed | |
| 65 | - // during content stream normalization, which is pretty much | |
| 66 | - // used for debugging and human inspection of PDF files. | |
| 67 | - memmove(this->image_buf, | |
| 68 | - this->image_buf + 1, | |
| 69 | - IMAGE_BUF_SIZE - 1); | |
| 70 | - this->image_buf[IMAGE_BUF_SIZE - 1] = ch; | |
| 71 | - if (strchr(" \t\n\v\f\r", this->image_buf[0]) && | |
| 72 | - (this->image_buf[1] == 'E') && | |
| 73 | - (this->image_buf[2] == 'I') && | |
| 74 | - strchr(" \t\n\v\f\r", this->image_buf[3])) | |
| 75 | - { | |
| 76 | - // We've found an EI operator. We've already written the | |
| 77 | - // EI operator to output; terminate with a newline | |
| 78 | - // character and resume normal processing. | |
| 79 | - writeNext("\n", 1); | |
| 80 | - this->in_inline_image = false; | |
| 81 | - QTC::TC("qpdf", "Pl_QPDFTokenizer found EI"); | |
| 82 | - } | |
| 83 | - else | |
| 84 | - { | |
| 85 | - writeNext(&ch, 1); | |
| 86 | - } | |
| 87 | - return; | |
| 88 | - } | |
| 89 | - | |
| 90 | 57 | tokenizer.presentCharacter(ch); |
| 91 | 58 | QPDFTokenizer::Token token; |
| 92 | 59 | if (tokenizer.getToken(token, this->unread_char, this->char_to_unread)) |
| ... | ... | @@ -100,13 +67,8 @@ Pl_QPDFTokenizer::processChar(char ch) |
| 100 | 67 | if ((token.getType() == QPDFTokenizer::tt_word) && |
| 101 | 68 | (token.getValue() == "ID")) |
| 102 | 69 | { |
| 103 | - // Suspend normal scanning until we find an EI token. | |
| 104 | - this->in_inline_image = true; | |
| 105 | - if (this->unread_char) | |
| 106 | - { | |
| 107 | - writeNext(&this->char_to_unread, 1); | |
| 108 | - this->unread_char = false; | |
| 109 | - } | |
| 70 | + QTC::TC("qpdf", "Pl_QPDFTokenizer found ID"); | |
| 71 | + tokenizer.expectInlineImage(); | |
| 110 | 72 | } |
| 111 | 73 | } |
| 112 | 74 | else |
| ... | ... | @@ -171,21 +133,18 @@ void |
| 171 | 133 | Pl_QPDFTokenizer::finish() |
| 172 | 134 | { |
| 173 | 135 | this->tokenizer.presentEOF(); |
| 174 | - if (! this->in_inline_image) | |
| 136 | + QPDFTokenizer::Token token; | |
| 137 | + if (tokenizer.getToken(token, this->unread_char, this->char_to_unread)) | |
| 175 | 138 | { |
| 176 | - QPDFTokenizer::Token token; | |
| 177 | - if (tokenizer.getToken(token, this->unread_char, this->char_to_unread)) | |
| 178 | - { | |
| 179 | - writeToken(token); | |
| 180 | - if (unread_char) | |
| 181 | - { | |
| 182 | - if (this->char_to_unread == '\r') | |
| 183 | - { | |
| 184 | - this->char_to_unread = '\n'; | |
| 185 | - } | |
| 186 | - writeNext(&this->char_to_unread, 1); | |
| 187 | - } | |
| 188 | - } | |
| 139 | + writeToken(token); | |
| 140 | + if (unread_char) | |
| 141 | + { | |
| 142 | + if (this->char_to_unread == '\r') | |
| 143 | + { | |
| 144 | + this->char_to_unread = '\n'; | |
| 145 | + } | |
| 146 | + writeNext(&this->char_to_unread, 1); | |
| 147 | + } | |
| 189 | 148 | } |
| 190 | 149 | if (! this->just_wrote_nl) |
| 191 | 150 | { | ... | ... |
libqpdf/qpdf/Pl_QPDFTokenizer.hh
| ... | ... | @@ -33,9 +33,6 @@ class Pl_QPDFTokenizer: public Pipeline |
| 33 | 33 | bool last_char_was_cr; |
| 34 | 34 | bool unread_char; |
| 35 | 35 | char char_to_unread; |
| 36 | - bool in_inline_image; | |
| 37 | - static int const IMAGE_BUF_SIZE = 4; // must be >= 4 | |
| 38 | - char image_buf[IMAGE_BUF_SIZE]; | |
| 39 | 36 | }; |
| 40 | 37 | |
| 41 | 38 | #endif // __PL_QPDFTOKENIZER_HH__ | ... | ... |
qpdf/qpdf.testcov
| ... | ... | @@ -182,7 +182,6 @@ QPDF_Stream getRawStreamData 0 |
| 182 | 182 | QPDF_Stream getStreamData 0 |
| 183 | 183 | QPDF_Stream expand filter abbreviation 0 |
| 184 | 184 | qpdf-c called qpdf_read_memory 0 |
| 185 | -Pl_QPDFTokenizer found EI 0 | |
| 186 | 185 | QPDF stream without newline 0 |
| 187 | 186 | QPDF stream with CR only 0 |
| 188 | 187 | QPDF stream with CRNL 0 |
| ... | ... | @@ -304,3 +303,4 @@ qpdf-c called qpdf_set_newline_before_endstream 0 |
| 304 | 303 | QPDF_Stream TIFF predictor 0 |
| 305 | 304 | QPDFTokenizer EOF when not allowed 0 |
| 306 | 305 | QPDFTokenizer inline image at EOF 0 |
| 306 | +Pl_QPDFTokenizer found ID 0 | ... | ... |