Commit 42ed58e44685c20c3c3a16ce835044d4d3b5ec3d
1 parent
fe33b7ca
Integrate booleans and null into state machine in QPDFTokenizer
Showing
2 changed files
with
7 additions
and
29 deletions
include/qpdf/QPDFTokenizer.hh
| ... | ... | @@ -193,7 +193,6 @@ class QPDFTokenizer |
| 193 | 193 | QPDFTokenizer(QPDFTokenizer const&) = delete; |
| 194 | 194 | QPDFTokenizer& operator=(QPDFTokenizer const&) = delete; |
| 195 | 195 | |
| 196 | - void resolveLiteral(); | |
| 197 | 196 | bool isSpace(char); |
| 198 | 197 | bool isDelimiter(char); |
| 199 | 198 | void findEI(std::shared_ptr<InputSource> input); | ... | ... |
libqpdf/QPDFTokenizer.cc
| ... | ... | @@ -132,34 +132,13 @@ QPDFTokenizer::isDelimiter(char ch) |
| 132 | 132 | } |
| 133 | 133 | |
| 134 | 134 | void |
| 135 | -QPDFTokenizer::resolveLiteral() | |
| 136 | -{ | |
| 137 | - if ((this->val == "true") || (this->val == "false")) { | |
| 138 | - this->type = tt_bool; | |
| 139 | - } else if (this->val == "null") { | |
| 140 | - this->type = tt_null; | |
| 141 | - } else { | |
| 142 | - // I don't really know what it is, so leave it as tt_word. | |
| 143 | - // Lots of cases ($, #, etc.) other than actual words fall | |
| 144 | - // into this category, but that's okay at least for now. | |
| 145 | - this->type = tt_word; | |
| 146 | - } | |
| 147 | -} | |
| 148 | - | |
| 149 | -void | |
| 150 | 135 | QPDFTokenizer::presentCharacter(char ch) |
| 151 | 136 | { |
| 152 | - char orig_ch = ch; | |
| 153 | - | |
| 154 | 137 | handleCharacter(ch); |
| 155 | 138 | |
| 156 | - if ((this->state == st_token_ready) && (this->type == tt_word)) { | |
| 157 | - resolveLiteral(); | |
| 158 | - } | |
| 159 | - | |
| 160 | 139 | if (!(betweenTokens() || |
| 161 | 140 | ((this->state == st_token_ready) && this->unread_char))) { |
| 162 | - this->raw_val += orig_ch; | |
| 141 | + this->raw_val += ch; | |
| 163 | 142 | } |
| 164 | 143 | } |
| 165 | 144 | |
| ... | ... | @@ -697,10 +676,12 @@ QPDFTokenizer::inLiteral(char ch) |
| 697 | 676 | // though not on any files in the test suite as of this |
| 698 | 677 | // writing. |
| 699 | 678 | |
| 700 | - this->type = tt_word; | |
| 701 | 679 | this->unread_char = true; |
| 702 | 680 | this->char_to_unread = ch; |
| 703 | 681 | this->state = st_token_ready; |
| 682 | + this->type = (this->val == "true") || (this->val == "false") | |
| 683 | + ? tt_bool | |
| 684 | + : (this->val == "null" ? tt_null : tt_word); | |
| 704 | 685 | } else { |
| 705 | 686 | this->val += ch; |
| 706 | 687 | } |
| ... | ... | @@ -804,14 +785,13 @@ QPDFTokenizer::presentEOF() |
| 804 | 785 | if (this->state == st_name || this->state == st_name_hex1 || |
| 805 | 786 | this->state == st_name_hex2 || this->state == st_number || |
| 806 | 787 | this->state == st_real || this->state == st_sign || |
| 807 | - this->state == st_decimal) { | |
| 788 | + this->state == st_decimal || this->state == st_literal) { | |
| 789 | + | |
| 790 | + QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token"); | |
| 808 | 791 | // Push any delimiter to the state machine to finish off the final |
| 809 | 792 | // token. |
| 810 | 793 | presentCharacter('\f'); |
| 811 | 794 | this->unread_char = false; |
| 812 | - } else if (this->state == st_literal) { | |
| 813 | - QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token"); | |
| 814 | - resolveLiteral(); | |
| 815 | 795 | } else if ((this->include_ignorable) && (this->state == st_in_space)) { |
| 816 | 796 | this->type = tt_space; |
| 817 | 797 | } else if ((this->include_ignorable) && (this->state == st_in_comment)) { |
| ... | ... | @@ -823,7 +803,6 @@ QPDFTokenizer::presentEOF() |
| 823 | 803 | this->type = tt_bad; |
| 824 | 804 | this->error_message = "EOF while reading token"; |
| 825 | 805 | } |
| 826 | - | |
| 827 | 806 | this->state = st_token_ready; |
| 828 | 807 | } |
| 829 | 808 | ... | ... |