Commit 42fbc3ee1264f0adbb59eeff40477e6ec6223743

Authored by Christian Herdtweck
1 parent b21c1465

update todo and other comments about PowerPoint 97-2003

Showing 1 changed file with 14 additions and 13 deletions
oletools/olevba.py
@@ -9,6 +9,7 @@ and analyze malicious macros. @@ -9,6 +9,7 @@ and analyze malicious macros.
9 Supported formats: 9 Supported formats:
10 - Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm) 10 - Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm)
11 - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb) 11 - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb)
  12 +- PowerPoint 97-2003 (.ppt)
12 - PowerPoint 2007+ (.pptm, .ppsm) 13 - PowerPoint 2007+ (.pptm, .ppsm)
13 - Word 2003 XML (.xml) 14 - Word 2003 XML (.xml)
14 - Word/Excel Single File Web Page / MHTML (.mht) 15 - Word/Excel Single File Web Page / MHTML (.mht)
@@ -188,7 +189,6 @@ __version__ = '0.46' @@ -188,7 +189,6 @@ __version__ = '0.46'
188 # + look for VBA in embedded documents (e.g. Excel in Word) 189 # + look for VBA in embedded documents (e.g. Excel in Word)
189 # + support SRP streams (see Lenny's article + links and sample) 190 # + support SRP streams (see Lenny's article + links and sample)
190 # - python 3.x support 191 # - python 3.x support
191 -# - add support for PowerPoint macros (see libclamav, libgsf), use oledump heuristic?  
192 # - check VBA macros in Visio, Access, Project, etc 192 # - check VBA macros in Visio, Access, Project, etc
193 # - extract_macros: convert to a class, split long function into smaller methods 193 # - extract_macros: convert to a class, split long function into smaller methods
194 # - extract_macros: read bytes from stream file objects instead of strings 194 # - extract_macros: read bytes from stream file objects instead of strings
@@ -1911,6 +1911,7 @@ class VBA_Parser(object): @@ -1911,6 +1911,7 @@ class VBA_Parser(object):
1911 - Word MHT - Single File Web Page / MHTML (.mht) 1911 - Word MHT - Single File Web Page / MHTML (.mht)
1912 - Excel 97-2003 (.xls) 1912 - Excel 97-2003 (.xls)
1913 - Excel 2007+ (.xlsm, .xlsb) 1913 - Excel 2007+ (.xlsm, .xlsb)
  1914 + - PowerPoint 97-2003 (.ppt)
1914 - PowerPoint 2007+ (.pptm, .ppsm) 1915 - PowerPoint 2007+ (.pptm, .ppsm)
1915 """ 1916 """
1916 1917
@@ -1971,6 +1972,7 @@ class VBA_Parser(object): @@ -1971,6 +1972,7 @@ class VBA_Parser(object):
1971 if olefile.isOleFile(_file): 1972 if olefile.isOleFile(_file):
1972 # This looks like an OLE file 1973 # This looks like an OLE file
1973 self.open_ole(_file) 1974 self.open_ole(_file)
  1975 +
1974 # if this worked, try whether it is a ppt file (special ole file) 1976 # if this worked, try whether it is a ppt file (special ole file)
1975 self.open_ppt() 1977 self.open_ppt()
1976 if self.type is None and zipfile.is_zipfile(_file): 1978 if self.type is None and zipfile.is_zipfile(_file):
@@ -2017,7 +2019,6 @@ class VBA_Parser(object): @@ -2017,7 +2019,6 @@ class VBA_Parser(object):
2017 try: 2019 try:
2018 # Open and parse the OLE file, using unicode for path names: 2020 # Open and parse the OLE file, using unicode for path names:
2019 self.ole_file = olefile.OleFileIO(_file, path_encoding=None) 2021 self.ole_file = olefile.OleFileIO(_file, path_encoding=None)
2020 - # TODO: raise TypeError if this is a Powerpoint 97 file, since VBA macros cannot be detected yet  
2021 # set type only if parsing succeeds 2022 # set type only if parsing succeeds
2022 self.type = TYPE_OLE 2023 self.type = TYPE_OLE
2023 except KeyboardInterrupt: 2024 except KeyboardInterrupt:
@@ -2261,8 +2262,7 @@ class VBA_Parser(object): @@ -2261,8 +2262,7 @@ class VBA_Parser(object):
2261 dir_path is the path of the OLE stream named "VBA/dir" within the VBA project. 2262 dir_path is the path of the OLE stream named "VBA/dir" within the VBA project.
2262 2263
2263 If this function returns an empty list for one of the supported formats 2264 If this function returns an empty list for one of the supported formats
2264 - (i.e. Word, Excel, Powerpoint except Powerpoint 97-2003), then the  
2265 - file does not contain VBA macros. 2265 + (i.e. Word, Excel, Powerpoint), then the file does not contain VBA macros.
2266 2266
2267 :return: None if OpenXML file, list of tuples (vba_root, project_path, dir_path) 2267 :return: None if OpenXML file, list of tuples (vba_root, project_path, dir_path)
2268 for each VBA project found if OLE file 2268 for each VBA project found if OLE file
@@ -2293,7 +2293,7 @@ class VBA_Parser(object): @@ -2293,7 +2293,7 @@ class VBA_Parser(object):
2293 # Find the VBA project root (different in MS Word, Excel, etc): 2293 # Find the VBA project root (different in MS Word, Excel, etc):
2294 # - Word 97-2003: Macros 2294 # - Word 97-2003: Macros
2295 # - Excel 97-2003: _VBA_PROJECT_CUR 2295 # - Excel 97-2003: _VBA_PROJECT_CUR
2296 - # - PowerPoint 97-2003: not supported yet (different file structure) 2296 + # - PowerPoint 97-2003: PptParser has identified ole_subfiles
2297 # - Word 2007+: word/vbaProject.bin in zip archive, then the VBA project is the root of vbaProject.bin. 2297 # - Word 2007+: word/vbaProject.bin in zip archive, then the VBA project is the root of vbaProject.bin.
2298 # - Excel 2007+: xl/vbaProject.bin in zip archive, then same as Word 2298 # - Excel 2007+: xl/vbaProject.bin in zip archive, then same as Word
2299 # - PowerPoint 2007+: ppt/vbaProject.bin in zip archive, then same as Word 2299 # - PowerPoint 2007+: ppt/vbaProject.bin in zip archive, then same as Word
@@ -2350,7 +2350,6 @@ class VBA_Parser(object): @@ -2350,7 +2350,6 @@ class VBA_Parser(object):
2350 if it contains VBA projects. Both OLE and OpenXML files are supported. 2350 if it contains VBA projects. Both OLE and OpenXML files are supported.
2351 2351
2352 Important: for now, results are accurate only for Word, Excel and PowerPoint 2352 Important: for now, results are accurate only for Word, Excel and PowerPoint
2353 - EXCEPT Powerpoint 97-2003, which has a different structure for VBA.  
2354 2353
2355 Note: this method does NOT attempt to check the actual presence or validity 2354 Note: this method does NOT attempt to check the actual presence or validity
2356 of VBA macro source code, so there might be false positives. 2355 of VBA macro source code, so there might be false positives.
@@ -2365,7 +2364,7 @@ class VBA_Parser(object): @@ -2365,7 +2364,7 @@ class VBA_Parser(object):
2365 # if this method was already called, return the previous result: 2364 # if this method was already called, return the previous result:
2366 if self.contains_macros is not None: 2365 if self.contains_macros is not None:
2367 return self.contains_macros 2366 return self.contains_macros
2368 - # if OpenXML, check all the OLE subfiles: 2367 + # if OpenXML/PPT, check all the OLE subfiles:
2369 if self.ole_file is None: 2368 if self.ole_file is None:
2370 for ole_subfile in self.ole_subfiles: 2369 for ole_subfile in self.ole_subfiles:
2371 if ole_subfile.detect_vba_macros(): 2370 if ole_subfile.detect_vba_macros():
@@ -2390,14 +2389,15 @@ class VBA_Parser(object): @@ -2390,14 +2389,15 @@ class VBA_Parser(object):
2390 If the file is OLE, filename is the path of the file. 2389 If the file is OLE, filename is the path of the file.
2391 If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros 2390 If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros
2392 within the zip archive, e.g. word/vbaProject.bin. 2391 within the zip archive, e.g. word/vbaProject.bin.
  2392 + If the file is PPT, result is as for OpenXML but filename is useless
2393 """ 2393 """
2394 if self.ole_file is None: 2394 if self.ole_file is None:
2395 - # This may be either an OpenXML or a text file: 2395 + # This may be either an OpenXML/PPT or a text file:
2396 if self.type == TYPE_TEXT: 2396 if self.type == TYPE_TEXT:
2397 # This is a text file, yield the full code: 2397 # This is a text file, yield the full code:
2398 yield (self.filename, '', self.filename, self.vba_code_all_modules) 2398 yield (self.filename, '', self.filename, self.vba_code_all_modules)
2399 else: 2399 else:
2400 - # OpenXML: recursively yield results from each OLE subfile: 2400 + # OpenXML/PPT: recursively yield results from each OLE subfile:
2401 for ole_subfile in self.ole_subfiles: 2401 for ole_subfile in self.ole_subfiles:
2402 for results in ole_subfile.extract_macros(): 2402 for results in ole_subfile.extract_macros():
2403 yield results 2403 yield results
@@ -2490,13 +2490,13 @@ class VBA_Parser(object): @@ -2490,13 +2490,13 @@ class VBA_Parser(object):
2490 dir_path is the path of the OLE stream named "VBA/dir" within the VBA project. 2490 dir_path is the path of the OLE stream named "VBA/dir" within the VBA project.
2491 2491
2492 If this function returns an empty list for one of the supported formats 2492 If this function returns an empty list for one of the supported formats
2493 - (i.e. Word, Excel, Powerpoint except Powerpoint 97-2003), then the  
2494 - file does not contain VBA macros. 2493 + (i.e. Word, Excel, Powerpoint), then the file does not contain VBA forms.
2495 2494
2496 :return: None if OpenXML file, list of tuples (vba_root, project_path, dir_path) 2495 :return: None if OpenXML file, list of tuples (vba_root, project_path, dir_path)
2497 for each VBA project found if OLE file 2496 for each VBA project found if OLE file
2498 """ 2497 """
2499 log.debug('VBA_Parser.find_vba_forms') 2498 log.debug('VBA_Parser.find_vba_forms')
  2499 +
2500 # if the file is not OLE but OpenXML, return None: 2500 # if the file is not OLE but OpenXML, return None:
2501 if self.ole_file is None and self.type != TYPE_PPT: 2501 if self.ole_file is None and self.type != TYPE_PPT:
2502 return None 2502 return None
@@ -2553,14 +2553,15 @@ class VBA_Parser(object): @@ -2553,14 +2553,15 @@ class VBA_Parser(object):
2553 If the file is OLE, filename is the path of the file. 2553 If the file is OLE, filename is the path of the file.
2554 If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros 2554 If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros
2555 within the zip archive, e.g. word/vbaProject.bin. 2555 within the zip archive, e.g. word/vbaProject.bin.
  2556 + If the file is PPT, result is as for OpenXML but filename is useless
2556 """ 2557 """
2557 if self.ole_file is None: 2558 if self.ole_file is None:
2558 - # This may be either an OpenXML or a text file: 2559 + # This may be either an OpenXML/PPT or a text file:
2559 if self.type == TYPE_TEXT: 2560 if self.type == TYPE_TEXT:
2560 # This is a text file, return no results: 2561 # This is a text file, return no results:
2561 return 2562 return
2562 else: 2563 else:
2563 - # OpenXML: recursively yield results from each OLE subfile: 2564 + # OpenXML/PPT: recursively yield results from each OLE subfile:
2564 for ole_subfile in self.ole_subfiles: 2565 for ole_subfile in self.ole_subfiles:
2565 for results in ole_subfile.extract_form_strings(): 2566 for results in ole_subfile.extract_form_strings():
2566 yield results 2567 yield results