Commit b4b52d224d2fce4dafbcc1dca158cdd9ff2710f9

Authored by decalage2
1 parent 2ed0a9b6

olevba: deobfuscation line by line to handle large files

Showing 1 changed file with 21 additions and 18 deletions
oletools/olevba.py
... ... @@ -195,8 +195,9 @@ from __future__ import print_function
195 195 # 2017-05-19 PL: - added enable_logging to fix issue #154
196 196 # 2017-05-31 c1fe: - PR #135 fixing issue #132 for some Mac files
197 197 # 2017-06-08 PL: - fixed issue #122 Chr() with negative numbers
  198 +# 2017-06-15 PL: - deobfuscation line by line to handle large files
198 199  
199   -__version__ = '0.51dev10'
  200 +__version__ = '0.51dev11'
200 201  
201 202 #------------------------------------------------------------------------------
202 203 # TODO:
... ... @@ -1961,23 +1962,25 @@ def detect_vba_strings(vba_code):
1961 1962 # we must expand tabs to have the same string as pyparsing.
1962 1963 # Otherwise, start and end offsets are incorrect.
1963 1964 vba_code = vba_code.expandtabs()
1964   - for tokens, start, end in vba_expr_str.scanString(vba_code):
1965   - encoded = vba_code[start:end]
1966   - decoded = tokens[0]
1967   - if isinstance(decoded, VbaExpressionString):
1968   - # This is a VBA expression, not a simple string
1969   - # print 'VBA EXPRESSION: encoded=%r => decoded=%r' % (encoded, decoded)
1970   - # remove parentheses and quotes from original string:
1971   - # if encoded.startswith('(') and encoded.endswith(')'):
1972   - # encoded = encoded[1:-1]
1973   - # if encoded.startswith('"') and encoded.endswith('"'):
1974   - # encoded = encoded[1:-1]
1975   - # avoid duplicates and simple strings:
1976   - if encoded not in found and decoded != encoded:
1977   - results.append((encoded, decoded))
1978   - found.add(encoded)
1979   - # else:
1980   - # print 'VBA STRING: encoded=%r => decoded=%r' % (encoded, decoded)
  1965 + # Split the VBA code line by line to avoid MemoryError on large scripts:
  1966 + for vba_line in vba_code.splitlines():
  1967 + for tokens, start, end in vba_expr_str.scanString(vba_line):
  1968 + encoded = vba_line[start:end]
  1969 + decoded = tokens[0]
  1970 + if isinstance(decoded, VbaExpressionString):
  1971 + # This is a VBA expression, not a simple string
  1972 + # print 'VBA EXPRESSION: encoded=%r => decoded=%r' % (encoded, decoded)
  1973 + # remove parentheses and quotes from original string:
  1974 + # if encoded.startswith('(') and encoded.endswith(')'):
  1975 + # encoded = encoded[1:-1]
  1976 + # if encoded.startswith('"') and encoded.endswith('"'):
  1977 + # encoded = encoded[1:-1]
  1978 + # avoid duplicates and simple strings:
  1979 + if encoded not in found and decoded != encoded:
  1980 + results.append((encoded, decoded))
  1981 + found.add(encoded)
  1982 + # else:
  1983 + # print 'VBA STRING: encoded=%r => decoded=%r' % (encoded, decoded)
1981 1984 return results
1982 1985  
1983 1986  
... ...