Commit ba453ba4fff442dc03ea04a3328aaa58bb8e6923

Authored by Jay Berkenbilt
1 parent ec538792

Use space tokens in tokenizer filter

libqpdf/Pl_QPDFTokenizer.cc
... ... @@ -8,12 +8,13 @@
8 8  
9 9 Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) :
10 10 Pipeline(identifier, next),
11   - newline_after_next_token(false),
12 11 just_wrote_nl(false),
13 12 last_char_was_cr(false),
14 13 unread_char(false),
15 14 char_to_unread('\0')
16 15 {
  16 + tokenizer.allowEOF();
  17 + tokenizer.includeIgnorable();
17 18 }
18 19  
19 20 Pl_QPDFTokenizer::~Pl_QPDFTokenizer()
... ... @@ -37,8 +38,35 @@ Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token)
37 38  
38 39 switch (token.getType())
39 40 {
  41 + case QPDFTokenizer::tt_space:
  42 + {
  43 + size_t len = value.length();
  44 + for (size_t i = 0; i < len; ++i)
  45 + {
  46 + char ch = value.at(i);
  47 + if (ch == '\r')
  48 + {
  49 + if ((i + 1 < len) && (value.at(i + 1) == '\n'))
  50 + {
  51 + // ignore
  52 + }
  53 + else
  54 + {
  55 + writeNext("\n", 1);
  56 + }
  57 + }
  58 + else
  59 + {
  60 + writeNext(&ch, 1);
  61 + }
  62 + }
  63 + }
  64 + value.clear();
  65 + break;
  66 +
40 67 case QPDFTokenizer::tt_string:
41 68 value = QPDF_String(token.getValue()).unparse();
  69 +
42 70 break;
43 71  
44 72 case QPDFTokenizer::tt_name:
... ... @@ -59,10 +87,14 @@ Pl_QPDFTokenizer::processChar(char ch)
59 87 if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
60 88 {
61 89 writeToken(token);
62   - if (this->newline_after_next_token)
63   - {
  90 + std::string value = token.getRawValue();
  91 + QPDFTokenizer::token_type_e token_type = token.getType();
  92 + if (((token_type == QPDFTokenizer::tt_string) ||
  93 + (token_type == QPDFTokenizer::tt_name)) &&
  94 + ((value.find('\r') != std::string::npos) ||
  95 + (value.find('\n') != std::string::npos)))
  96 + {
64 97 writeNext("\n", 1);
65   - this->newline_after_next_token = false;
66 98 }
67 99 if ((token.getType() == QPDFTokenizer::tt_word) &&
68 100 (token.getValue() == "ID"))
... ... @@ -71,35 +103,6 @@ Pl_QPDFTokenizer::processChar(char ch)
71 103 tokenizer.expectInlineImage();
72 104 }
73 105 }
74   - else
75   - {
76   - bool suppress = false;
77   - if ((ch == '\n') && (this->last_char_was_cr))
78   - {
79   - // Always ignore \n following \r
80   - suppress = true;
81   - }
82   -
83   - if ((this->last_char_was_cr = (ch == '\r')))
84   - {
85   - ch = '\n';
86   - }
87   -
88   - if (this->tokenizer.betweenTokens())
89   - {
90   - if (! suppress)
91   - {
92   - writeNext(&ch, 1);
93   - }
94   - }
95   - else
96   - {
97   - if (ch == '\n')
98   - {
99   - this->newline_after_next_token = true;
100   - }
101   - }
102   - }
103 106 }
104 107  
105 108  
... ...
libqpdf/qpdf/Pl_QPDFTokenizer.hh
... ... @@ -28,7 +28,6 @@ class Pl_QPDFTokenizer: public Pipeline
28 28 void writeToken(QPDFTokenizer::Token&);
29 29  
30 30 QPDFTokenizer tokenizer;
31   - bool newline_after_next_token;
32 31 bool just_wrote_nl;
33 32 bool last_char_was_cr;
34 33 bool unread_char;
... ...