Commit f15af87a886a7c663305d02eeb426d27294198f9

Authored by Philippe Lagadec
1 parent f5b8a4fd

olevba: partially fixed issue 34 in the VBA parser (fixed incorrect expression parsing)

Showing 1 changed file with 63 additions and 3 deletions
oletools/olevba.py
... ... @@ -161,8 +161,9 @@ https://github.com/unixfreak0037/officeparser
161 161 # 2016-01-31 PL: - fixed issue #31 in VBA_Parser.open_mht
162 162 # - fixed issue #32 by monkeypatching email.feedparser
163 163 # 2016-02-07 PL: - KeyboardInterrupt is now raised properly
  164 +# 2016-02-20 v0.43 PL: - fixed issue #34 in the VBA parser and vba_chr
164 165  
165   -__version__ = '0.42'
  166 +__version__ = '0.43'
166 167  
167 168 #------------------------------------------------------------------------------
168 169 # TODO:
... ... @@ -531,6 +532,7 @@ class VbaExpressionString(str):
531 532 Then isinstance(s, VbaExpressionString) is True only for VBA expressions.
532 533 (see detect_vba_strings)
533 534 """
  535 + # TODO: use Unicode everywhere instead of str
534 536 pass
535 537  
536 538  
... ... @@ -588,12 +590,70 @@ vba_expr_int = Forward()
588 590  
589 591 # --- CHR --------------------------------------------------------------------
590 592  
  593 +# MS-VBAL 6.1.2.11.1.4 Chr / Chr$
  594 +# Function Chr(CharCode As Long) As Variant
  595 +# Function Chr$(CharCode As Long) As String
  596 +# Parameter Description
  597 +# CharCode Long whose value is a code point.
  598 +# Returns a String data value consisting of a single character containing the character whose code
  599 +# point is the data value of the argument.
  600 +# - If the argument is not in the range 0 to 255, Error Number 5 ("Invalid procedure call or
  601 +# argument") is raised unless the implementation supports a character set with a larger code point
  602 +# range.
  603 +# - If the argument value is in the range of 0 to 127, it is interpreted as a 7-bit ASCII code point.
  604 +# - If the argument value is in the range of 128 to 255, the code point interpretation of the value is
  605 +# implementation defined.
  606 +# - Chr$ has the same runtime semantics as Chr, however the declared type of its function result is
  607 +# String rather than Variant.
  608 +
  609 +# 6.1.2.11.1.5 ChrB / ChrB$
  610 +# Function ChrB(CharCode As Long) As Variant
  611 +# Function ChrB$(CharCode As Long) As String
  612 +# CharCode Long whose value is a code point.
  613 +# Returns a String data value consisting of a single byte character whose code point value is the
  614 +# data value of the argument.
  615 +# - If the argument is not in the range 0 to 255, Error Number 6 ("Overflow") is raised.
  616 +# - ChrB$ has the same runtime semantics as ChrB however the declared type of its function result
  617 +# is String rather than Variant.
  618 +# - Note: the ChrB function is used with byte data contained in a String. Instead of returning a
  619 +# character, which may be one or two bytes, ChrB always returns a single byte. The ChrW function
  620 +# returns a String containing the Unicode character except on platforms where Unicode is not
  621 +# supported, in which case, the behavior is identical to the Chr function.
  622 +
  623 +# 6.1.2.11.1.6 ChrW/ ChrW$
  624 +# Function ChrW(CharCode As Long) As Variant
  625 +# Function ChrW$(CharCode As Long) As String
  626 +# CharCode Long whose value is a code point.
  627 +# Returns a String data value consisting of a single character containing the character whose code
  628 +# point is the data value of the argument.
  629 +# - If the argument is not in the range -32,767 to 65,535 then Error Number 5 ("Invalid procedure
  630 +# call or argument") is raised.
  631 +# - If the argument is a negative value it is treated as if it was the value: CharCode + 65,536.
  632 +# - If the implemented uses 16-bit Unicode code points argument, data value is interpreted as a 16-
  633 +# bit Unicode code point.
  634 +# - If the implementation does not support Unicode, ChrW has the same semantics as Chr.
  635 +# - ChrW$ has the same runtime semantics as ChrW, however the declared type of its function result
  636 +# is String rather than Variant.
  637 +
591 638 # Chr, Chr$, ChrB, ChrW(int) => char
592 639 vba_chr = Suppress(
593 640 Combine(WordStart(vba_identifier_chars) + CaselessLiteral('Chr')
594 641 + Optional(CaselessLiteral('B') | CaselessLiteral('W')) + Optional('$'))
595 642 + '(') + vba_expr_int + Suppress(')')
596   -vba_chr.setParseAction(lambda t: VbaExpressionString(chr(t[0])))
  643 +
  644 +def vba_chr_tostr(t):
  645 + try:
  646 + i = t[0]
  647 + # normal, non-unicode character:
  648 + if i>=0 and i<=255:
  649 + return VbaExpressionString(chr(i))
  650 + else:
  651 + return VbaExpressionString(unichr(i).encode('utf-8', 'backslashreplace'))
  652 + except ValueError:
  653 + log.exception('ERROR: incorrect parameter value for chr(): %r' % i)
  654 + return VbaExpressionString('Chr(%r)' % i)
  655 +
  656 +vba_chr.setParseAction(vba_chr_tostr)
597 657  
598 658  
599 659 # --- ASC --------------------------------------------------------------------
... ... @@ -735,8 +795,8 @@ vba_expr_int &lt;&lt;= infixNotation(vba_expr_int_item,
735 795 [
736 796 ("*", 2, opAssoc.LEFT, multiply_ints_list),
737 797 ("/", 2, opAssoc.LEFT, divide_ints_list),
738   - ("+", 2, opAssoc.LEFT, sum_ints_list),
739 798 ("-", 2, opAssoc.LEFT, subtract_ints_list),
  799 + ("+", 2, opAssoc.LEFT, sum_ints_list),
740 800 ])
741 801  
742 802  
... ...