Commit ba453ba4fff442dc03ea04a3328aaa58bb8e6923

Authored by Jay Berkenbilt
1 parent ec538792

Use space tokens in tokenizer filter

libqpdf/Pl_QPDFTokenizer.cc
@@ -8,12 +8,13 @@ @@ -8,12 +8,13 @@
8 8
9 Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) : 9 Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) :
10 Pipeline(identifier, next), 10 Pipeline(identifier, next),
11 - newline_after_next_token(false),  
12 just_wrote_nl(false), 11 just_wrote_nl(false),
13 last_char_was_cr(false), 12 last_char_was_cr(false),
14 unread_char(false), 13 unread_char(false),
15 char_to_unread('\0') 14 char_to_unread('\0')
16 { 15 {
  16 + tokenizer.allowEOF();
  17 + tokenizer.includeIgnorable();
17 } 18 }
18 19
19 Pl_QPDFTokenizer::~Pl_QPDFTokenizer() 20 Pl_QPDFTokenizer::~Pl_QPDFTokenizer()
@@ -37,8 +38,35 @@ Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token) @@ -37,8 +38,35 @@ Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token)
37 38
38 switch (token.getType()) 39 switch (token.getType())
39 { 40 {
  41 + case QPDFTokenizer::tt_space:
  42 + {
  43 + size_t len = value.length();
  44 + for (size_t i = 0; i < len; ++i)
  45 + {
  46 + char ch = value.at(i);
  47 + if (ch == '\r')
  48 + {
  49 + if ((i + 1 < len) && (value.at(i + 1) == '\n'))
  50 + {
  51 + // ignore
  52 + }
  53 + else
  54 + {
  55 + writeNext("\n", 1);
  56 + }
  57 + }
  58 + else
  59 + {
  60 + writeNext(&ch, 1);
  61 + }
  62 + }
  63 + }
  64 + value.clear();
  65 + break;
  66 +
40 case QPDFTokenizer::tt_string: 67 case QPDFTokenizer::tt_string:
41 value = QPDF_String(token.getValue()).unparse(); 68 value = QPDF_String(token.getValue()).unparse();
  69 +
42 break; 70 break;
43 71
44 case QPDFTokenizer::tt_name: 72 case QPDFTokenizer::tt_name:
@@ -59,10 +87,14 @@ Pl_QPDFTokenizer::processChar(char ch) @@ -59,10 +87,14 @@ Pl_QPDFTokenizer::processChar(char ch)
59 if (tokenizer.getToken(token, this->unread_char, this->char_to_unread)) 87 if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
60 { 88 {
61 writeToken(token); 89 writeToken(token);
62 - if (this->newline_after_next_token)  
63 - { 90 + std::string value = token.getRawValue();
  91 + QPDFTokenizer::token_type_e token_type = token.getType();
  92 + if (((token_type == QPDFTokenizer::tt_string) ||
  93 + (token_type == QPDFTokenizer::tt_name)) &&
  94 + ((value.find('\r') != std::string::npos) ||
  95 + (value.find('\n') != std::string::npos)))
  96 + {
64 writeNext("\n", 1); 97 writeNext("\n", 1);
65 - this->newline_after_next_token = false;  
66 } 98 }
67 if ((token.getType() == QPDFTokenizer::tt_word) && 99 if ((token.getType() == QPDFTokenizer::tt_word) &&
68 (token.getValue() == "ID")) 100 (token.getValue() == "ID"))
@@ -71,35 +103,6 @@ Pl_QPDFTokenizer::processChar(char ch) @@ -71,35 +103,6 @@ Pl_QPDFTokenizer::processChar(char ch)
71 tokenizer.expectInlineImage(); 103 tokenizer.expectInlineImage();
72 } 104 }
73 } 105 }
74 - else  
75 - {  
76 - bool suppress = false;  
77 - if ((ch == '\n') && (this->last_char_was_cr))  
78 - {  
79 - // Always ignore \n following \r  
80 - suppress = true;  
81 - }  
82 -  
83 - if ((this->last_char_was_cr = (ch == '\r')))  
84 - {  
85 - ch = '\n';  
86 - }  
87 -  
88 - if (this->tokenizer.betweenTokens())  
89 - {  
90 - if (! suppress)  
91 - {  
92 - writeNext(&ch, 1);  
93 - }  
94 - }  
95 - else  
96 - {  
97 - if (ch == '\n')  
98 - {  
99 - this->newline_after_next_token = true;  
100 - }  
101 - }  
102 - }  
103 } 106 }
104 107
105 108
libqpdf/qpdf/Pl_QPDFTokenizer.hh
@@ -28,7 +28,6 @@ class Pl_QPDFTokenizer: public Pipeline @@ -28,7 +28,6 @@ class Pl_QPDFTokenizer: public Pipeline
28 void writeToken(QPDFTokenizer::Token&); 28 void writeToken(QPDFTokenizer::Token&);
29 29
30 QPDFTokenizer tokenizer; 30 QPDFTokenizer tokenizer;
31 - bool newline_after_next_token;  
32 bool just_wrote_nl; 31 bool just_wrote_nl;
33 bool last_char_was_cr; 32 bool last_char_was_cr;
34 bool unread_char; 33 bool unread_char;