Commit 42ed58e44685c20c3c3a16ce835044d4d3b5ec3d

Authored by m-holger
1 parent fe33b7ca

Integrate booleans and null into state machine in QPDFTokenizer

include/qpdf/QPDFTokenizer.hh
... ... @@ -193,7 +193,6 @@ class QPDFTokenizer
193 193 QPDFTokenizer(QPDFTokenizer const&) = delete;
194 194 QPDFTokenizer& operator=(QPDFTokenizer const&) = delete;
195 195  
196   - void resolveLiteral();
197 196 bool isSpace(char);
198 197 bool isDelimiter(char);
199 198 void findEI(std::shared_ptr<InputSource> input);
... ...
libqpdf/QPDFTokenizer.cc
... ... @@ -132,34 +132,13 @@ QPDFTokenizer::isDelimiter(char ch)
132 132 }
133 133  
134 134 void
135   -QPDFTokenizer::resolveLiteral()
136   -{
137   - if ((this->val == "true") || (this->val == "false")) {
138   - this->type = tt_bool;
139   - } else if (this->val == "null") {
140   - this->type = tt_null;
141   - } else {
142   - // I don't really know what it is, so leave it as tt_word.
143   - // Lots of cases ($, #, etc.) other than actual words fall
144   - // into this category, but that's okay at least for now.
145   - this->type = tt_word;
146   - }
147   -}
148   -
149   -void
150 135 QPDFTokenizer::presentCharacter(char ch)
151 136 {
152   - char orig_ch = ch;
153   -
154 137 handleCharacter(ch);
155 138  
156   - if ((this->state == st_token_ready) && (this->type == tt_word)) {
157   - resolveLiteral();
158   - }
159   -
160 139 if (!(betweenTokens() ||
161 140 ((this->state == st_token_ready) && this->unread_char))) {
162   - this->raw_val += orig_ch;
  141 + this->raw_val += ch;
163 142 }
164 143 }
165 144  
... ... @@ -697,10 +676,12 @@ QPDFTokenizer::inLiteral(char ch)
697 676 // though not on any files in the test suite as of this
698 677 // writing.
699 678  
700   - this->type = tt_word;
701 679 this->unread_char = true;
702 680 this->char_to_unread = ch;
703 681 this->state = st_token_ready;
  682 + this->type = (this->val == "true") || (this->val == "false")
  683 + ? tt_bool
  684 + : (this->val == "null" ? tt_null : tt_word);
704 685 } else {
705 686 this->val += ch;
706 687 }
... ... @@ -804,14 +785,13 @@ QPDFTokenizer::presentEOF()
804 785 if (this->state == st_name || this->state == st_name_hex1 ||
805 786 this->state == st_name_hex2 || this->state == st_number ||
806 787 this->state == st_real || this->state == st_sign ||
807   - this->state == st_decimal) {
  788 + this->state == st_decimal || this->state == st_literal) {
  789 +
  790 + QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token");
808 791 // Push any delimiter to the state machine to finish off the final
809 792 // token.
810 793 presentCharacter('\f');
811 794 this->unread_char = false;
812   - } else if (this->state == st_literal) {
813   - QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token");
814   - resolveLiteral();
815 795 } else if ((this->include_ignorable) && (this->state == st_in_space)) {
816 796 this->type = tt_space;
817 797 } else if ((this->include_ignorable) && (this->state == st_in_comment)) {
... ... @@ -823,7 +803,6 @@ QPDFTokenizer::presentEOF()
823 803 this->type = tt_bad;
824 804 this->error_message = "EOF while reading token";
825 805 }
826   -
827 806 this->state = st_token_ready;
828 807 }
829 808  
... ...