Commit b4b52d224d2fce4dafbcc1dca158cdd9ff2710f9
1 parent
2ed0a9b6
olevba: deobfuscation line by line to handle large files
Showing
1 changed file
with
21 additions
and
18 deletions
oletools/olevba.py
| ... | ... | @@ -195,8 +195,9 @@ from __future__ import print_function |
| 195 | 195 | # 2017-05-19 PL: - added enable_logging to fix issue #154 |
| 196 | 196 | # 2017-05-31 c1fe: - PR #135 fixing issue #132 for some Mac files |
| 197 | 197 | # 2017-06-08 PL: - fixed issue #122 Chr() with negative numbers |
| 198 | +# 2017-06-15 PL: - deobfuscation line by line to handle large files | |
| 198 | 199 | |
| 199 | -__version__ = '0.51dev10' | |
| 200 | +__version__ = '0.51dev11' | |
| 200 | 201 | |
| 201 | 202 | #------------------------------------------------------------------------------ |
| 202 | 203 | # TODO: |
| ... | ... | @@ -1961,23 +1962,25 @@ def detect_vba_strings(vba_code): |
| 1961 | 1962 | # we must expand tabs to have the same string as pyparsing. |
| 1962 | 1963 | # Otherwise, start and end offsets are incorrect. |
| 1963 | 1964 | vba_code = vba_code.expandtabs() |
| 1964 | - for tokens, start, end in vba_expr_str.scanString(vba_code): | |
| 1965 | - encoded = vba_code[start:end] | |
| 1966 | - decoded = tokens[0] | |
| 1967 | - if isinstance(decoded, VbaExpressionString): | |
| 1968 | - # This is a VBA expression, not a simple string | |
| 1969 | - # print 'VBA EXPRESSION: encoded=%r => decoded=%r' % (encoded, decoded) | |
| 1970 | - # remove parentheses and quotes from original string: | |
| 1971 | - # if encoded.startswith('(') and encoded.endswith(')'): | |
| 1972 | - # encoded = encoded[1:-1] | |
| 1973 | - # if encoded.startswith('"') and encoded.endswith('"'): | |
| 1974 | - # encoded = encoded[1:-1] | |
| 1975 | - # avoid duplicates and simple strings: | |
| 1976 | - if encoded not in found and decoded != encoded: | |
| 1977 | - results.append((encoded, decoded)) | |
| 1978 | - found.add(encoded) | |
| 1979 | - # else: | |
| 1980 | - # print 'VBA STRING: encoded=%r => decoded=%r' % (encoded, decoded) | |
| 1965 | + # Split the VBA code line by line to avoid MemoryError on large scripts: | |
| 1966 | + for vba_line in vba_code.splitlines(): | |
| 1967 | + for tokens, start, end in vba_expr_str.scanString(vba_line): | |
| 1968 | + encoded = vba_line[start:end] | |
| 1969 | + decoded = tokens[0] | |
| 1970 | + if isinstance(decoded, VbaExpressionString): | |
| 1971 | + # This is a VBA expression, not a simple string | |
| 1972 | + # print 'VBA EXPRESSION: encoded=%r => decoded=%r' % (encoded, decoded) | |
| 1973 | + # remove parentheses and quotes from original string: | |
| 1974 | + # if encoded.startswith('(') and encoded.endswith(')'): | |
| 1975 | + # encoded = encoded[1:-1] | |
| 1976 | + # if encoded.startswith('"') and encoded.endswith('"'): | |
| 1977 | + # encoded = encoded[1:-1] | |
| 1978 | + # avoid duplicates and simple strings: | |
| 1979 | + if encoded not in found and decoded != encoded: | |
| 1980 | + results.append((encoded, decoded)) | |
| 1981 | + found.add(encoded) | |
| 1982 | + # else: | |
| 1983 | + # print 'VBA STRING: encoded=%r => decoded=%r' % (encoded, decoded) | |
| 1981 | 1984 | return results |
| 1982 | 1985 | |
| 1983 | 1986 | ... | ... |