Commit 137dc7acb9f46dfe40b73dd0079bf130eb6981e0

Authored by Jay Berkenbilt
1 parent 6c820d6c

Refactor: move resolution of literal to its own method

include/qpdf/QPDFTokenizer.hh
@@ -133,6 +133,7 @@ class QPDFTokenizer @@ -133,6 +133,7 @@ class QPDFTokenizer
133 133
134 private: 134 private:
135 void reset(); 135 void reset();
  136 + void resolveLiteral();
136 137
137 // Lexer state 138 // Lexer state
138 enum { st_top, st_in_comment, st_in_string, st_lt, st_gt, 139 enum { st_top, st_in_comment, st_in_string, st_lt, st_gt,
libqpdf/QPDFTokenizer.cc
@@ -50,10 +50,90 @@ QPDFTokenizer::reset() @@ -50,10 +50,90 @@ QPDFTokenizer::reset()
50 } 50 }
51 51
52 void 52 void
53 -QPDFTokenizer::presentCharacter(char ch) 53 +QPDFTokenizer::resolveLiteral()
54 { 54 {
55 PCRE num_re("^[\\+\\-]?(?:\\.\\d+|\\d+(?:\\.\\d+)?)$"); 55 PCRE num_re("^[\\+\\-]?(?:\\.\\d+|\\d+(?:\\.\\d+)?)$");
56 56
  57 + if ((val.length() > 0) && (val[0] == '/'))
  58 + {
  59 + type = tt_name;
  60 + // Deal with # in name token. Note: '/' by itself is a
  61 + // valid name, so don't strip leading /. That way we
  62 + // don't have to deal with the empty string as a name.
  63 + std::string nval = "/";
  64 + char const* valstr = val.c_str() + 1;
  65 + for (char const* p = valstr; *p; ++p)
  66 + {
  67 + if ((*p == '#') && this->pound_special_in_name)
  68 + {
  69 + if (p[1] && p[2] &&
  70 + is_hex_digit(p[1]) && is_hex_digit(p[2]))
  71 + {
  72 + char num[3];
  73 + num[0] = p[1];
  74 + num[1] = p[2];
  75 + num[2] = '\0';
  76 + char ch = (char)(strtol(num, 0, 16));
  77 + if (ch == '\0')
  78 + {
  79 + type = tt_bad;
  80 + QTC::TC("qpdf", "QPDF_Tokenizer null in name");
  81 + error_message =
  82 + "null character not allowed in name token";
  83 + nval += "#00";
  84 + }
  85 + else
  86 + {
  87 + nval += ch;
  88 + }
  89 + p += 2;
  90 + }
  91 + else
  92 + {
  93 + QTC::TC("qpdf", "QPDF_Tokenizer bad name");
  94 + type = tt_bad;
  95 + error_message = "invalid name token";
  96 + nval += *p;
  97 + }
  98 + }
  99 + else
  100 + {
  101 + nval += *p;
  102 + }
  103 + }
  104 + val = nval;
  105 + }
  106 + else if (num_re.match(val.c_str()))
  107 + {
  108 + if (val.find('.') != std::string::npos)
  109 + {
  110 + type = tt_real;
  111 + }
  112 + else
  113 + {
  114 + type = tt_integer;
  115 + }
  116 + }
  117 + else if ((val == "true") || (val == "false"))
  118 + {
  119 + type = tt_bool;
  120 + }
  121 + else if (val == "null")
  122 + {
  123 + type = tt_null;
  124 + }
  125 + else
  126 + {
  127 + // I don't really know what it is, so leave it as tt_word.
  128 + // Lots of cases ($, #, etc.) other than actual words fall
  129 + // into this category, but that's okay at least for now.
  130 + type = tt_word;
  131 + }
  132 +}
  133 +
  134 +void
  135 +QPDFTokenizer::presentCharacter(char ch)
  136 +{
57 if (state == st_token_ready) 137 if (state == st_token_ready)
58 { 138 {
59 throw std::logic_error( 139 throw std::logic_error(
@@ -342,81 +422,7 @@ QPDFTokenizer::presentCharacter(char ch) @@ -342,81 +422,7 @@ QPDFTokenizer::presentCharacter(char ch)
342 422
343 if ((state == st_token_ready) && (type == tt_word)) 423 if ((state == st_token_ready) && (type == tt_word))
344 { 424 {
345 - if ((val.length() > 0) && (val[0] == '/'))  
346 - {  
347 - type = tt_name;  
348 - // Deal with # in name token. Note: '/' by itself is a  
349 - // valid name, so don't strip leading /. That way we  
350 - // don't have to deal with the empty string as a name.  
351 - std::string nval = "/";  
352 - char const* valstr = val.c_str() + 1;  
353 - for (char const* p = valstr; *p; ++p)  
354 - {  
355 - if ((*p == '#') && this->pound_special_in_name)  
356 - {  
357 - if (p[1] && p[2] &&  
358 - is_hex_digit(p[1]) && is_hex_digit(p[2]))  
359 - {  
360 - char num[3];  
361 - num[0] = p[1];  
362 - num[1] = p[2];  
363 - num[2] = '\0';  
364 - char ch = (char)(strtol(num, 0, 16));  
365 - if (ch == '\0')  
366 - {  
367 - type = tt_bad;  
368 - QTC::TC("qpdf", "QPDF_Tokenizer null in name");  
369 - error_message =  
370 - "null character not allowed in name token";  
371 - nval += "#00";  
372 - }  
373 - else  
374 - {  
375 - nval += ch;  
376 - }  
377 - p += 2;  
378 - }  
379 - else  
380 - {  
381 - QTC::TC("qpdf", "QPDF_Tokenizer bad name");  
382 - type = tt_bad;  
383 - error_message = "invalid name token";  
384 - nval += *p;  
385 - }  
386 - }  
387 - else  
388 - {  
389 - nval += *p;  
390 - }  
391 - }  
392 - val = nval;  
393 - }  
394 - else if (num_re.match(val.c_str()))  
395 - {  
396 - if (val.find('.') != std::string::npos)  
397 - {  
398 - type = tt_real;  
399 - }  
400 - else  
401 - {  
402 - type = tt_integer;  
403 - }  
404 - }  
405 - else if ((val == "true") || (val == "false"))  
406 - {  
407 - type = tt_bool;  
408 - }  
409 - else if (val == "null")  
410 - {  
411 - type = tt_null;  
412 - }  
413 - else  
414 - {  
415 - // I don't really know what it is, so leave it as tt_word.  
416 - // Lots of cases ($, #, etc.) other than actual words fall  
417 - // into this category, but that's okay at least for now.  
418 - type = tt_word;  
419 - } 425 + resolveLiteral();
420 } 426 }
421 427
422 if (! (betweenTokens() || ((state == st_token_ready) && unread_char))) 428 if (! (betweenTokens() || ((state == st_token_ready) && unread_char)))