Commit ba453ba4fff442dc03ea04a3328aaa58bb8e6923
1 parent
ec538792
Use space tokens in tokenizer filter
Showing
2 changed files
with
36 additions
and
34 deletions
libqpdf/Pl_QPDFTokenizer.cc
| @@ -8,12 +8,13 @@ | @@ -8,12 +8,13 @@ | ||
| 8 | 8 | ||
| 9 | Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) : | 9 | Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) : |
| 10 | Pipeline(identifier, next), | 10 | Pipeline(identifier, next), |
| 11 | - newline_after_next_token(false), | ||
| 12 | just_wrote_nl(false), | 11 | just_wrote_nl(false), |
| 13 | last_char_was_cr(false), | 12 | last_char_was_cr(false), |
| 14 | unread_char(false), | 13 | unread_char(false), |
| 15 | char_to_unread('\0') | 14 | char_to_unread('\0') |
| 16 | { | 15 | { |
| 16 | + tokenizer.allowEOF(); | ||
| 17 | + tokenizer.includeIgnorable(); | ||
| 17 | } | 18 | } |
| 18 | 19 | ||
| 19 | Pl_QPDFTokenizer::~Pl_QPDFTokenizer() | 20 | Pl_QPDFTokenizer::~Pl_QPDFTokenizer() |
| @@ -37,8 +38,35 @@ Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token) | @@ -37,8 +38,35 @@ Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token) | ||
| 37 | 38 | ||
| 38 | switch (token.getType()) | 39 | switch (token.getType()) |
| 39 | { | 40 | { |
| 41 | + case QPDFTokenizer::tt_space: | ||
| 42 | + { | ||
| 43 | + size_t len = value.length(); | ||
| 44 | + for (size_t i = 0; i < len; ++i) | ||
| 45 | + { | ||
| 46 | + char ch = value.at(i); | ||
| 47 | + if (ch == '\r') | ||
| 48 | + { | ||
| 49 | + if ((i + 1 < len) && (value.at(i + 1) == '\n')) | ||
| 50 | + { | ||
| 51 | + // ignore | ||
| 52 | + } | ||
| 53 | + else | ||
| 54 | + { | ||
| 55 | + writeNext("\n", 1); | ||
| 56 | + } | ||
| 57 | + } | ||
| 58 | + else | ||
| 59 | + { | ||
| 60 | + writeNext(&ch, 1); | ||
| 61 | + } | ||
| 62 | + } | ||
| 63 | + } | ||
| 64 | + value.clear(); | ||
| 65 | + break; | ||
| 66 | + | ||
| 40 | case QPDFTokenizer::tt_string: | 67 | case QPDFTokenizer::tt_string: |
| 41 | value = QPDF_String(token.getValue()).unparse(); | 68 | value = QPDF_String(token.getValue()).unparse(); |
| 69 | + | ||
| 42 | break; | 70 | break; |
| 43 | 71 | ||
| 44 | case QPDFTokenizer::tt_name: | 72 | case QPDFTokenizer::tt_name: |
| @@ -59,10 +87,14 @@ Pl_QPDFTokenizer::processChar(char ch) | @@ -59,10 +87,14 @@ Pl_QPDFTokenizer::processChar(char ch) | ||
| 59 | if (tokenizer.getToken(token, this->unread_char, this->char_to_unread)) | 87 | if (tokenizer.getToken(token, this->unread_char, this->char_to_unread)) |
| 60 | { | 88 | { |
| 61 | writeToken(token); | 89 | writeToken(token); |
| 62 | - if (this->newline_after_next_token) | ||
| 63 | - { | 90 | + std::string value = token.getRawValue(); |
| 91 | + QPDFTokenizer::token_type_e token_type = token.getType(); | ||
| 92 | + if (((token_type == QPDFTokenizer::tt_string) || | ||
| 93 | + (token_type == QPDFTokenizer::tt_name)) && | ||
| 94 | + ((value.find('\r') != std::string::npos) || | ||
| 95 | + (value.find('\n') != std::string::npos))) | ||
| 96 | + { | ||
| 64 | writeNext("\n", 1); | 97 | writeNext("\n", 1); |
| 65 | - this->newline_after_next_token = false; | ||
| 66 | } | 98 | } |
| 67 | if ((token.getType() == QPDFTokenizer::tt_word) && | 99 | if ((token.getType() == QPDFTokenizer::tt_word) && |
| 68 | (token.getValue() == "ID")) | 100 | (token.getValue() == "ID")) |
| @@ -71,35 +103,6 @@ Pl_QPDFTokenizer::processChar(char ch) | @@ -71,35 +103,6 @@ Pl_QPDFTokenizer::processChar(char ch) | ||
| 71 | tokenizer.expectInlineImage(); | 103 | tokenizer.expectInlineImage(); |
| 72 | } | 104 | } |
| 73 | } | 105 | } |
| 74 | - else | ||
| 75 | - { | ||
| 76 | - bool suppress = false; | ||
| 77 | - if ((ch == '\n') && (this->last_char_was_cr)) | ||
| 78 | - { | ||
| 79 | - // Always ignore \n following \r | ||
| 80 | - suppress = true; | ||
| 81 | - } | ||
| 82 | - | ||
| 83 | - if ((this->last_char_was_cr = (ch == '\r'))) | ||
| 84 | - { | ||
| 85 | - ch = '\n'; | ||
| 86 | - } | ||
| 87 | - | ||
| 88 | - if (this->tokenizer.betweenTokens()) | ||
| 89 | - { | ||
| 90 | - if (! suppress) | ||
| 91 | - { | ||
| 92 | - writeNext(&ch, 1); | ||
| 93 | - } | ||
| 94 | - } | ||
| 95 | - else | ||
| 96 | - { | ||
| 97 | - if (ch == '\n') | ||
| 98 | - { | ||
| 99 | - this->newline_after_next_token = true; | ||
| 100 | - } | ||
| 101 | - } | ||
| 102 | - } | ||
| 103 | } | 106 | } |
| 104 | 107 | ||
| 105 | 108 |
libqpdf/qpdf/Pl_QPDFTokenizer.hh
| @@ -28,7 +28,6 @@ class Pl_QPDFTokenizer: public Pipeline | @@ -28,7 +28,6 @@ class Pl_QPDFTokenizer: public Pipeline | ||
| 28 | void writeToken(QPDFTokenizer::Token&); | 28 | void writeToken(QPDFTokenizer::Token&); |
| 29 | 29 | ||
| 30 | QPDFTokenizer tokenizer; | 30 | QPDFTokenizer tokenizer; |
| 31 | - bool newline_after_next_token; | ||
| 32 | bool just_wrote_nl; | 31 | bool just_wrote_nl; |
| 33 | bool last_char_was_cr; | 32 | bool last_char_was_cr; |
| 34 | bool unread_char; | 33 | bool unread_char; |