Commit 137dc7acb9f46dfe40b73dd0079bf130eb6981e0

Authored by Jay Berkenbilt
1 parent 6c820d6c

Refactor: move resolution of literal to its own method

include/qpdf/QPDFTokenizer.hh
... ... @@ -133,6 +133,7 @@ class QPDFTokenizer
133 133  
134 134 private:
135 135 void reset();
  136 + void resolveLiteral();
136 137  
137 138 // Lexer state
138 139 enum { st_top, st_in_comment, st_in_string, st_lt, st_gt,
... ...
libqpdf/QPDFTokenizer.cc
... ... @@ -50,10 +50,90 @@ QPDFTokenizer::reset()
50 50 }
51 51  
52 52 void
53   -QPDFTokenizer::presentCharacter(char ch)
  53 +QPDFTokenizer::resolveLiteral()
54 54 {
55 55 PCRE num_re("^[\\+\\-]?(?:\\.\\d+|\\d+(?:\\.\\d+)?)$");
56 56  
  57 + if ((val.length() > 0) && (val[0] == '/'))
  58 + {
  59 + type = tt_name;
  60 + // Deal with # in name token. Note: '/' by itself is a
  61 + // valid name, so don't strip leading /. That way we
  62 + // don't have to deal with the empty string as a name.
  63 + std::string nval = "/";
  64 + char const* valstr = val.c_str() + 1;
  65 + for (char const* p = valstr; *p; ++p)
  66 + {
  67 + if ((*p == '#') && this->pound_special_in_name)
  68 + {
  69 + if (p[1] && p[2] &&
  70 + is_hex_digit(p[1]) && is_hex_digit(p[2]))
  71 + {
  72 + char num[3];
  73 + num[0] = p[1];
  74 + num[1] = p[2];
  75 + num[2] = '\0';
  76 + char ch = (char)(strtol(num, 0, 16));
  77 + if (ch == '\0')
  78 + {
  79 + type = tt_bad;
  80 + QTC::TC("qpdf", "QPDF_Tokenizer null in name");
  81 + error_message =
  82 + "null character not allowed in name token";
  83 + nval += "#00";
  84 + }
  85 + else
  86 + {
  87 + nval += ch;
  88 + }
  89 + p += 2;
  90 + }
  91 + else
  92 + {
  93 + QTC::TC("qpdf", "QPDF_Tokenizer bad name");
  94 + type = tt_bad;
  95 + error_message = "invalid name token";
  96 + nval += *p;
  97 + }
  98 + }
  99 + else
  100 + {
  101 + nval += *p;
  102 + }
  103 + }
  104 + val = nval;
  105 + }
  106 + else if (num_re.match(val.c_str()))
  107 + {
  108 + if (val.find('.') != std::string::npos)
  109 + {
  110 + type = tt_real;
  111 + }
  112 + else
  113 + {
  114 + type = tt_integer;
  115 + }
  116 + }
  117 + else if ((val == "true") || (val == "false"))
  118 + {
  119 + type = tt_bool;
  120 + }
  121 + else if (val == "null")
  122 + {
  123 + type = tt_null;
  124 + }
  125 + else
  126 + {
  127 + // I don't really know what it is, so leave it as tt_word.
  128 + // Lots of cases ($, #, etc.) other than actual words fall
  129 + // into this category, but that's okay at least for now.
  130 + type = tt_word;
  131 + }
  132 +}
  133 +
  134 +void
  135 +QPDFTokenizer::presentCharacter(char ch)
  136 +{
57 137 if (state == st_token_ready)
58 138 {
59 139 throw std::logic_error(
... ... @@ -342,81 +422,7 @@ QPDFTokenizer::presentCharacter(char ch)
342 422  
343 423 if ((state == st_token_ready) && (type == tt_word))
344 424 {
345   - if ((val.length() > 0) && (val[0] == '/'))
346   - {
347   - type = tt_name;
348   - // Deal with # in name token. Note: '/' by itself is a
349   - // valid name, so don't strip leading /. That way we
350   - // don't have to deal with the empty string as a name.
351   - std::string nval = "/";
352   - char const* valstr = val.c_str() + 1;
353   - for (char const* p = valstr; *p; ++p)
354   - {
355   - if ((*p == '#') && this->pound_special_in_name)
356   - {
357   - if (p[1] && p[2] &&
358   - is_hex_digit(p[1]) && is_hex_digit(p[2]))
359   - {
360   - char num[3];
361   - num[0] = p[1];
362   - num[1] = p[2];
363   - num[2] = '\0';
364   - char ch = (char)(strtol(num, 0, 16));
365   - if (ch == '\0')
366   - {
367   - type = tt_bad;
368   - QTC::TC("qpdf", "QPDF_Tokenizer null in name");
369   - error_message =
370   - "null character not allowed in name token";
371   - nval += "#00";
372   - }
373   - else
374   - {
375   - nval += ch;
376   - }
377   - p += 2;
378   - }
379   - else
380   - {
381   - QTC::TC("qpdf", "QPDF_Tokenizer bad name");
382   - type = tt_bad;
383   - error_message = "invalid name token";
384   - nval += *p;
385   - }
386   - }
387   - else
388   - {
389   - nval += *p;
390   - }
391   - }
392   - val = nval;
393   - }
394   - else if (num_re.match(val.c_str()))
395   - {
396   - if (val.find('.') != std::string::npos)
397   - {
398   - type = tt_real;
399   - }
400   - else
401   - {
402   - type = tt_integer;
403   - }
404   - }
405   - else if ((val == "true") || (val == "false"))
406   - {
407   - type = tt_bool;
408   - }
409   - else if (val == "null")
410   - {
411   - type = tt_null;
412   - }
413   - else
414   - {
415   - // I don't really know what it is, so leave it as tt_word.
416   - // Lots of cases ($, #, etc.) other than actual words fall
417   - // into this category, but that's okay at least for now.
418   - type = tt_word;
419   - }
  425 + resolveLiteral();
420 426 }
421 427  
422 428 if (! (betweenTokens() || ((state == st_token_ready) && unread_char)))
... ...