Commit ea37768589cffccc2835cfa503dfe50e893773de

Authored by decalage2
1 parent 19911393

sync olevba and olevba3 (work in progress): updated decompress_stream to use a bytearray

oletools/olevba.py
@@ -322,6 +322,9 @@ email.feedparser.headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:?|[\t @@ -322,6 +322,9 @@ email.feedparser.headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:?|[\t
322 322
323 if sys.version_info[0] <= 2: 323 if sys.version_info[0] <= 2:
324 # Python 2.x 324 # Python 2.x
  325 + # to use ord on bytes/bytearray items the same way in Python 2+3
  326 + # on Python 2, just use the normal ord() because items are bytes
  327 + byte_ord = ord
325 if sys.version_info[1] <= 6: 328 if sys.version_info[1] <= 6:
326 # Python 2.6 329 # Python 2.6
327 # use is_zipfile backported from Python 2.7: 330 # use is_zipfile backported from Python 2.7:
@@ -331,6 +334,9 @@ if sys.version_info[0] &lt;= 2: @@ -331,6 +334,9 @@ if sys.version_info[0] &lt;= 2:
331 from zipfile import is_zipfile 334 from zipfile import is_zipfile
332 else: 335 else:
333 # Python 3.x+ 336 # Python 3.x+
  337 + # to use ord on bytes/bytearray items the same way in Python 2+3
  338 + # on Python 3, items are int, so just return the item
  339 + byte_ord = lambda x: x
334 from zipfile import is_zipfile 340 from zipfile import is_zipfile
335 # xrange is now called range: 341 # xrange is now called range:
336 xrange = range 342 xrange = range
@@ -1235,10 +1241,13 @@ def decompress_stream(compressed_container): @@ -1235,10 +1241,13 @@ def decompress_stream(compressed_container):
1235 # DecompressedChunkStart: The location of the first byte of the DecompressedChunk (section 2.4.1.1.3) within the 1241 # DecompressedChunkStart: The location of the first byte of the DecompressedChunk (section 2.4.1.1.3) within the
1236 # DecompressedBuffer (section 2.4.1.1.2). 1242 # DecompressedBuffer (section 2.4.1.1.2).
1237 1243
1238 - decompressed_container = '' # result 1244 + # Check the input is a bytearray:
  1245 + if not isinstance(compressed_container, bytearray):
  1246 + raise TypeError('decompress_stream requires a bytearray as input')
  1247 + decompressed_container = bytearray() # result
1239 compressed_current = 0 1248 compressed_current = 0
1240 1249
1241 - sig_byte = ord(compressed_container[compressed_current]) 1250 + sig_byte = compressed_container[compressed_current]
1242 if sig_byte != 0x01: 1251 if sig_byte != 0x01:
1243 raise ValueError('invalid signature byte {0:02X}'.format(sig_byte)) 1252 raise ValueError('invalid signature byte {0:02X}'.format(sig_byte))
1244 1253
@@ -1284,7 +1293,7 @@ def decompress_stream(compressed_container): @@ -1284,7 +1293,7 @@ def decompress_stream(compressed_container):
1284 # MS-OVBA 2.4.1.3.3 Decompressing a RawChunk 1293 # MS-OVBA 2.4.1.3.3 Decompressing a RawChunk
1285 # uncompressed chunk: read the next 4096 bytes as-is 1294 # uncompressed chunk: read the next 4096 bytes as-is
1286 #TODO: check if there are at least 4096 bytes left 1295 #TODO: check if there are at least 4096 bytes left
1287 - decompressed_container += compressed_container[compressed_current:compressed_current + 4096] 1296 + decompressed_container.extend([compressed_container[compressed_current:compressed_current + 4096]])
1288 compressed_current += 4096 1297 compressed_current += 4096
1289 else: 1298 else:
1290 # MS-OVBA 2.4.1.3.2 Decompressing a CompressedChunk 1299 # MS-OVBA 2.4.1.3.2 Decompressing a CompressedChunk
@@ -1295,7 +1304,7 @@ def decompress_stream(compressed_container): @@ -1295,7 +1304,7 @@ def decompress_stream(compressed_container):
1295 # log.debug('compressed_current = %d / compressed_end = %d' % (compressed_current, compressed_end)) 1304 # log.debug('compressed_current = %d / compressed_end = %d' % (compressed_current, compressed_end))
1296 # FlagByte: 8 bits indicating if the following 8 tokens are either literal (1 byte of plain text) or 1305 # FlagByte: 8 bits indicating if the following 8 tokens are either literal (1 byte of plain text) or
1297 # copy tokens (reference to a previous literal token) 1306 # copy tokens (reference to a previous literal token)
1298 - flag_byte = ord(compressed_container[compressed_current]) 1307 + flag_byte = compressed_container[compressed_current]
1299 compressed_current += 1 1308 compressed_current += 1
1300 for bit_index in xrange(0, 8): 1309 for bit_index in xrange(0, 8):
1301 # log.debug('bit_index=%d / compressed_current=%d / compressed_end=%d' % (bit_index, compressed_current, compressed_end)) 1310 # log.debug('bit_index=%d / compressed_current=%d / compressed_end=%d' % (bit_index, compressed_current, compressed_end))
@@ -1307,7 +1316,7 @@ def decompress_stream(compressed_container): @@ -1307,7 +1316,7 @@ def decompress_stream(compressed_container):
1307 #log.debug('bit_index=%d: flag_bit=%d' % (bit_index, flag_bit)) 1316 #log.debug('bit_index=%d: flag_bit=%d' % (bit_index, flag_bit))
1308 if flag_bit == 0: # LiteralToken 1317 if flag_bit == 0: # LiteralToken
1309 # copy one byte directly to output 1318 # copy one byte directly to output
1310 - decompressed_container += compressed_container[compressed_current] 1319 + decompressed_container.extend([compressed_container[compressed_current]])
1311 compressed_current += 1 1320 compressed_current += 1
1312 else: # CopyToken 1321 else: # CopyToken
1313 # MS-OVBA 2.4.1.3.19.2 Unpack CopyToken 1322 # MS-OVBA 2.4.1.3.19.2 Unpack CopyToken
@@ -1323,9 +1332,9 @@ def decompress_stream(compressed_container): @@ -1323,9 +1332,9 @@ def decompress_stream(compressed_container):
1323 #log.debug('offset=%d length=%d' % (offset, length)) 1332 #log.debug('offset=%d length=%d' % (offset, length))
1324 copy_source = len(decompressed_container) - offset 1333 copy_source = len(decompressed_container) - offset
1325 for index in xrange(copy_source, copy_source + length): 1334 for index in xrange(copy_source, copy_source + length):
1326 - decompressed_container += decompressed_container[index] 1335 + decompressed_container.extend([decompressed_container[index]])
1327 compressed_current += 2 1336 compressed_current += 2
1328 - return decompressed_container 1337 + return bytes(decompressed_container)
1329 1338
1330 1339
1331 def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): 1340 def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
@@ -1366,6 +1375,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): @@ -1366,6 +1375,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1366 code_modules = {} 1375 code_modules = {}
1367 1376
1368 for line in project: 1377 for line in project:
  1378 + log.debug('PROJECT: %r' % line)
1369 line = line.strip() 1379 line = line.strip()
1370 if '=' in line: 1380 if '=' in line:
1371 # split line at the 1st equal sign: 1381 # split line at the 1st equal sign:
@@ -1397,7 +1407,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): @@ -1397,7 +1407,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1397 else: 1407 else:
1398 raise UnexpectedDataError(dir_path, name, expected, value) 1408 raise UnexpectedDataError(dir_path, name, expected, value)
1399 1409
1400 - dir_stream = BytesIO(decompress_stream(dir_compressed)) 1410 + dir_stream = BytesIO(decompress_stream(bytearray(dir_compressed)))
1401 1411
1402 # PROJECTSYSKIND Record 1412 # PROJECTSYSKIND Record
1403 projectsyskind_id = struct.unpack("<H", dir_stream.read(2))[0] 1413 projectsyskind_id = struct.unpack("<H", dir_stream.read(2))[0]
@@ -1813,7 +1823,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): @@ -1813,7 +1823,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1813 log.debug("offset of code_data = {0}".format(moduleoffset_textoffset)) 1823 log.debug("offset of code_data = {0}".format(moduleoffset_textoffset))
1814 code_data = code_data[moduleoffset_textoffset:] 1824 code_data = code_data[moduleoffset_textoffset:]
1815 if len(code_data) > 0: 1825 if len(code_data) > 0:
1816 - code_data = decompress_stream(code_data) 1826 + code_data = decompress_stream(bytearray(code_data))
1817 # case-insensitive search in the code_modules dict to find the file extension: 1827 # case-insensitive search in the code_modules dict to find the file extension:
1818 filext = code_modules.get(modulename_modulename.lower(), 'bin') 1828 filext = code_modules.get(modulename_modulename.lower(), 'bin')
1819 filename = '{0}.{1}'.format(modulename_modulename, filext) 1829 filename = '{0}.{1}'.format(modulename_modulename, filext)
@@ -2120,7 +2130,6 @@ def print_json(json_dict=None, _json_is_first=False, _json_is_last=False, @@ -2120,7 +2130,6 @@ def print_json(json_dict=None, _json_is_first=False, _json_is_last=False,
2120 :param bool _json_is_last: set to True only for very last entry to complete 2130 :param bool _json_is_last: set to True only for very last entry to complete
2121 the top-level json-list 2131 the top-level json-list
2122 """ 2132 """
2123 -  
2124 if json_dict and json_parts: 2133 if json_dict and json_parts:
2125 raise ValueError('Invalid json argument: want either single dict or ' 2134 raise ValueError('Invalid json argument: want either single dict or '
2126 'key=value parts but got both)') 2135 'key=value parts but got both)')
@@ -2949,7 +2958,7 @@ class VBA_Parser(object): @@ -2949,7 +2958,7 @@ class VBA_Parser(object):
2949 log.debug('Found VBA compressed code at index %X' % start) 2958 log.debug('Found VBA compressed code at index %X' % start)
2950 compressed_code = data[start:] 2959 compressed_code = data[start:]
2951 try: 2960 try:
2952 - vba_code = decompress_stream(compressed_code) 2961 + vba_code = decompress_stream(bytearray(compressed_code))
2953 yield (self.filename, d.name, d.name, vba_code) 2962 yield (self.filename, d.name, d.name, vba_code)
2954 except Exception as exc: 2963 except Exception as exc:
2955 # display the exception with full stack trace for debugging 2964 # display the exception with full stack trace for debugging
oletools/olevba3.py
@@ -1232,6 +1232,9 @@ def decompress_stream(compressed_container): @@ -1232,6 +1232,9 @@ def decompress_stream(compressed_container):
1232 # DecompressedChunkStart: The location of the first byte of the DecompressedChunk (section 2.4.1.1.3) within the 1232 # DecompressedChunkStart: The location of the first byte of the DecompressedChunk (section 2.4.1.1.3) within the
1233 # DecompressedBuffer (section 2.4.1.1.2). 1233 # DecompressedBuffer (section 2.4.1.1.2).
1234 1234
  1235 + # Check the input is a bytearray:
  1236 + if not isinstance(compressed_container, bytearray):
  1237 + raise TypeError('decompress_stream requires a bytearray as input')
1235 decompressed_container = bytearray() # result 1238 decompressed_container = bytearray() # result
1236 compressed_current = 0 1239 compressed_current = 0
1237 1240
@@ -1294,7 +1297,7 @@ def decompress_stream(compressed_container): @@ -1294,7 +1297,7 @@ def decompress_stream(compressed_container):
1294 # copy tokens (reference to a previous literal token) 1297 # copy tokens (reference to a previous literal token)
1295 flag_byte = compressed_container[compressed_current] 1298 flag_byte = compressed_container[compressed_current]
1296 compressed_current += 1 1299 compressed_current += 1
1297 - for bit_index in range(0, 8): 1300 + for bit_index in xrange(0, 8):
1298 # log.debug('bit_index=%d / compressed_current=%d / compressed_end=%d' % (bit_index, compressed_current, compressed_end)) 1301 # log.debug('bit_index=%d / compressed_current=%d / compressed_end=%d' % (bit_index, compressed_current, compressed_end))
1299 if compressed_current >= compressed_end: 1302 if compressed_current >= compressed_end:
1300 break 1303 break
@@ -1319,7 +1322,7 @@ def decompress_stream(compressed_container): @@ -1319,7 +1322,7 @@ def decompress_stream(compressed_container):
1319 offset = (temp1 >> temp2) + 1 1322 offset = (temp1 >> temp2) + 1
1320 #log.debug('offset=%d length=%d' % (offset, length)) 1323 #log.debug('offset=%d length=%d' % (offset, length))
1321 copy_source = len(decompressed_container) - offset 1324 copy_source = len(decompressed_container) - offset
1322 - for index in range(copy_source, copy_source + length): 1325 + for index in xrange(copy_source, copy_source + length):
1323 decompressed_container.extend([decompressed_container[index]]) 1326 decompressed_container.extend([decompressed_container[index]])
1324 compressed_current += 2 1327 compressed_current += 2
1325 return bytes(decompressed_container) 1328 return bytes(decompressed_container)
@@ -1394,7 +1397,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): @@ -1394,7 +1397,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1394 else: 1397 else:
1395 raise UnexpectedDataError(dir_path, name, expected, value) 1398 raise UnexpectedDataError(dir_path, name, expected, value)
1396 1399
1397 - dir_stream = BytesIO(decompress_stream(dir_compressed)) 1400 + dir_stream = BytesIO(decompress_stream(bytearray(dir_compressed)))
1398 1401
1399 # PROJECTSYSKIND Record 1402 # PROJECTSYSKIND Record
1400 projectsyskind_id = struct.unpack("<H", dir_stream.read(2))[0] 1403 projectsyskind_id = struct.unpack("<H", dir_stream.read(2))[0]
@@ -1541,7 +1544,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): @@ -1541,7 +1544,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1541 # So let's ignore it, otherwise it crashes on some files (issue #132) 1544 # So let's ignore it, otherwise it crashes on some files (issue #132)
1542 # PR #135 by @c1fe: 1545 # PR #135 by @c1fe:
1543 # contrary to the specification I think that the unicode name 1546 # contrary to the specification I think that the unicode name
1544 - # is optional. if reference_reserved is not 0x003E I think it 1547 + # is optional. if reference_reserved is not 0x003E I think it
1545 # is actually the start of another REFERENCE record 1548 # is actually the start of another REFERENCE record
1546 # at least when projectsyskind_syskind == 0x02 (Macintosh) 1549 # at least when projectsyskind_syskind == 0x02 (Macintosh)
1547 if reference_reserved == 0x003E: 1550 if reference_reserved == 0x003E:
@@ -1671,7 +1674,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): @@ -1671,7 +1674,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1671 uni_out = lambda unicode_text: unicode_text.encode('utf-8', 'replace') 1674 uni_out = lambda unicode_text: unicode_text.encode('utf-8', 'replace')
1672 1675
1673 log.debug("parsing {0} modules".format(projectmodules_count)) 1676 log.debug("parsing {0} modules".format(projectmodules_count))
1674 - for projectmodule_index in range(0, projectmodules_count): 1677 + for projectmodule_index in xrange(0, projectmodules_count):
1675 try: 1678 try:
1676 modulename_id = struct.unpack("<H", dir_stream.read(2))[0] 1679 modulename_id = struct.unpack("<H", dir_stream.read(2))[0]
1677 check_value('MODULENAME_Id', 0x0019, modulename_id) 1680 check_value('MODULENAME_Id', 0x0019, modulename_id)
@@ -1810,7 +1813,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): @@ -1810,7 +1813,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1810 log.debug("offset of code_data = {0}".format(moduleoffset_textoffset)) 1813 log.debug("offset of code_data = {0}".format(moduleoffset_textoffset))
1811 code_data = code_data[moduleoffset_textoffset:] 1814 code_data = code_data[moduleoffset_textoffset:]
1812 if len(code_data) > 0: 1815 if len(code_data) > 0:
1813 - code_data = decompress_stream(code_data) 1816 + code_data = decompress_stream(bytearray(code_data))
1814 # case-insensitive search in the code_modules dict to find the file extension: 1817 # case-insensitive search in the code_modules dict to find the file extension:
1815 filext = code_modules.get(modulename_modulename.lower(), 'bin') 1818 filext = code_modules.get(modulename_modulename.lower(), 'bin')
1816 filename = '{0}.{1}'.format(modulename_modulename, filext) 1819 filename = '{0}.{1}'.format(modulename_modulename, filext)
@@ -2950,7 +2953,7 @@ class VBA_Parser(object): @@ -2950,7 +2953,7 @@ class VBA_Parser(object):
2950 log.debug('Found VBA compressed code at index %X' % start) 2953 log.debug('Found VBA compressed code at index %X' % start)
2951 compressed_code = data[start:] 2954 compressed_code = data[start:]
2952 try: 2955 try:
2953 - vba_code = decompress_stream(compressed_code) 2956 + vba_code = decompress_stream(bytearray(compressed_code))
2954 yield (self.filename, d.name, d.name, vba_code) 2957 yield (self.filename, d.name, d.name, vba_code)
2955 except Exception as exc: 2958 except Exception as exc:
2956 # display the exception with full stack trace for debugging 2959 # display the exception with full stack trace for debugging
@@ -2978,6 +2981,8 @@ class VBA_Parser(object): @@ -2978,6 +2981,8 @@ class VBA_Parser(object):
2978 """ 2981 """
2979 runs extract_macros and analyze the source code of all VBA macros 2982 runs extract_macros and analyze the source code of all VBA macros
2980 found in the file. 2983 found in the file.
  2984 + All results are stored in self.analysis_results.
  2985 + If called more than once, simply returns the previous results.
2981 """ 2986 """
2982 if self.detect_vba_macros(): 2987 if self.detect_vba_macros():
2983 # if the analysis was already done, avoid doing it twice: 2988 # if the analysis was already done, avoid doing it twice:
@@ -3390,16 +3395,6 @@ class VBA_Parser_CLI(VBA_Parser): @@ -3390,16 +3395,6 @@ class VBA_Parser_CLI(VBA_Parser):
3390 3395
3391 line = '%-12s %s' % (flags, self.filename) 3396 line = '%-12s %s' % (flags, self.filename)
3392 print(line) 3397 print(line)
3393 -  
3394 - # old table display:  
3395 - # macros = autoexec = suspicious = iocs = hexstrings = 'no'  
3396 - # if nb_macros: macros = 'YES:%d' % nb_macros  
3397 - # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec  
3398 - # if nb_suspicious: suspicious = 'YES:%d' % nb_suspicious  
3399 - # if nb_iocs: iocs = 'YES:%d' % nb_iocs  
3400 - # if nb_hexstrings: hexstrings = 'YES:%d' % nb_hexstrings  
3401 - # # 2nd line = info  
3402 - # print '%-8s %-7s %-7s %-7s %-7s %-7s' % (self.type, macros, autoexec, suspicious, iocs, hexstrings)  
3403 except Exception as exc: 3398 except Exception as exc:
3404 # display the exception with full stack trace for debugging only 3399 # display the exception with full stack trace for debugging only
3405 log.debug('Error processing file %s (%s)' % (self.filename, exc), 3400 log.debug('Error processing file %s (%s)' % (self.filename, exc),
@@ -3407,20 +3402,6 @@ class VBA_Parser_CLI(VBA_Parser): @@ -3407,20 +3402,6 @@ class VBA_Parser_CLI(VBA_Parser):
3407 raise ProcessingError(self.filename, exc) 3402 raise ProcessingError(self.filename, exc)
3408 3403
3409 3404
3410 - # t = prettytable.PrettyTable(('filename', 'type', 'macros', 'autoexec', 'suspicious', 'ioc', 'hexstrings'),  
3411 - # header=False, border=False)  
3412 - # t.align = 'l'  
3413 - # t.max_width['filename'] = 30  
3414 - # t.max_width['type'] = 10  
3415 - # t.max_width['macros'] = 6  
3416 - # t.max_width['autoexec'] = 6  
3417 - # t.max_width['suspicious'] = 6  
3418 - # t.max_width['ioc'] = 6  
3419 - # t.max_width['hexstrings'] = 6  
3420 - # t.add_row((filename, ftype, macros, autoexec, suspicious, iocs, hexstrings))  
3421 - # print t  
3422 -  
3423 -  
3424 #=== MAIN ===================================================================== 3405 #=== MAIN =====================================================================
3425 3406
3426 def parse_args(cmd_line_args=None): 3407 def parse_args(cmd_line_args=None):
@@ -3515,10 +3496,6 @@ def main(cmd_line_args=None): @@ -3515,10 +3496,6 @@ def main(cmd_line_args=None):
3515 # enable logging in the modules: 3496 # enable logging in the modules:
3516 enable_logging() 3497 enable_logging()
3517 3498
3518 - # Old display with number of items detected:  
3519 - # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr')  
3520 - # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('-'*8, '-'*7, '-'*7, '-'*7, '-'*7, '-'*7)  
3521 -  
3522 # with the option --reveal, make sure --deobf is also enabled: 3499 # with the option --reveal, make sure --deobf is also enabled:
3523 if options.show_deobfuscated_code and not options.deobfuscate: 3500 if options.show_deobfuscated_code and not options.deobfuscate:
3524 log.info('set --deobf because --reveal was set') 3501 log.info('set --deobf because --reveal was set')