Commit b4b52d224d2fce4dafbcc1dca158cdd9ff2710f9

Authored by decalage2
1 parent 2ed0a9b6

olevba: deobfuscation line by line to handle large files

Showing 1 changed file with 21 additions and 18 deletions
oletools/olevba.py
@@ -195,8 +195,9 @@ from __future__ import print_function @@ -195,8 +195,9 @@ from __future__ import print_function
195 # 2017-05-19 PL: - added enable_logging to fix issue #154 195 # 2017-05-19 PL: - added enable_logging to fix issue #154
196 # 2017-05-31 c1fe: - PR #135 fixing issue #132 for some Mac files 196 # 2017-05-31 c1fe: - PR #135 fixing issue #132 for some Mac files
197 # 2017-06-08 PL: - fixed issue #122 Chr() with negative numbers 197 # 2017-06-08 PL: - fixed issue #122 Chr() with negative numbers
  198 +# 2017-06-15 PL: - deobfuscation line by line to handle large files
198 199
199 -__version__ = '0.51dev10' 200 +__version__ = '0.51dev11'
200 201
201 #------------------------------------------------------------------------------ 202 #------------------------------------------------------------------------------
202 # TODO: 203 # TODO:
@@ -1961,23 +1962,25 @@ def detect_vba_strings(vba_code): @@ -1961,23 +1962,25 @@ def detect_vba_strings(vba_code):
1961 # we must expand tabs to have the same string as pyparsing. 1962 # we must expand tabs to have the same string as pyparsing.
1962 # Otherwise, start and end offsets are incorrect. 1963 # Otherwise, start and end offsets are incorrect.
1963 vba_code = vba_code.expandtabs() 1964 vba_code = vba_code.expandtabs()
1964 - for tokens, start, end in vba_expr_str.scanString(vba_code):  
1965 - encoded = vba_code[start:end]  
1966 - decoded = tokens[0]  
1967 - if isinstance(decoded, VbaExpressionString):  
1968 - # This is a VBA expression, not a simple string  
1969 - # print 'VBA EXPRESSION: encoded=%r => decoded=%r' % (encoded, decoded)  
1970 - # remove parentheses and quotes from original string:  
1971 - # if encoded.startswith('(') and encoded.endswith(')'):  
1972 - # encoded = encoded[1:-1]  
1973 - # if encoded.startswith('"') and encoded.endswith('"'):  
1974 - # encoded = encoded[1:-1]  
1975 - # avoid duplicates and simple strings:  
1976 - if encoded not in found and decoded != encoded:  
1977 - results.append((encoded, decoded))  
1978 - found.add(encoded)  
1979 - # else:  
1980 - # print 'VBA STRING: encoded=%r => decoded=%r' % (encoded, decoded) 1965 + # Split the VBA code line by line to avoid MemoryError on large scripts:
  1966 + for vba_line in vba_code.splitlines():
  1967 + for tokens, start, end in vba_expr_str.scanString(vba_line):
  1968 + encoded = vba_line[start:end]
  1969 + decoded = tokens[0]
  1970 + if isinstance(decoded, VbaExpressionString):
  1971 + # This is a VBA expression, not a simple string
  1972 + # print 'VBA EXPRESSION: encoded=%r => decoded=%r' % (encoded, decoded)
  1973 + # remove parentheses and quotes from original string:
  1974 + # if encoded.startswith('(') and encoded.endswith(')'):
  1975 + # encoded = encoded[1:-1]
  1976 + # if encoded.startswith('"') and encoded.endswith('"'):
  1977 + # encoded = encoded[1:-1]
  1978 + # avoid duplicates and simple strings:
  1979 + if encoded not in found and decoded != encoded:
  1980 + results.append((encoded, decoded))
  1981 + found.add(encoded)
  1982 + # else:
  1983 + # print 'VBA STRING: encoded=%r => decoded=%r' % (encoded, decoded)
1981 return results 1984 return results
1982 1985
1983 1986