Commit 42fbc3ee1264f0adbb59eeff40477e6ec6223743

Authored by Christian Herdtweck
1 parent b21c1465

update todo and other comments about PowerPoint 97-2003

Showing 1 changed file with 14 additions and 13 deletions
oletools/olevba.py
... ... @@ -9,6 +9,7 @@ and analyze malicious macros.
9 9 Supported formats:
10 10 - Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm)
11 11 - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb)
  12 +- PowerPoint 97-2003 (.ppt)
12 13 - PowerPoint 2007+ (.pptm, .ppsm)
13 14 - Word 2003 XML (.xml)
14 15 - Word/Excel Single File Web Page / MHTML (.mht)
... ... @@ -188,7 +189,6 @@ __version__ = '0.46'
188 189 # + look for VBA in embedded documents (e.g. Excel in Word)
189 190 # + support SRP streams (see Lenny's article + links and sample)
190 191 # - python 3.x support
191   -# - add support for PowerPoint macros (see libclamav, libgsf), use oledump heuristic?
192 192 # - check VBA macros in Visio, Access, Project, etc
193 193 # - extract_macros: convert to a class, split long function into smaller methods
194 194 # - extract_macros: read bytes from stream file objects instead of strings
... ... @@ -1911,6 +1911,7 @@ class VBA_Parser(object):
1911 1911 - Word MHT - Single File Web Page / MHTML (.mht)
1912 1912 - Excel 97-2003 (.xls)
1913 1913 - Excel 2007+ (.xlsm, .xlsb)
  1914 + - PowerPoint 97-2003 (.ppt)
1914 1915 - PowerPoint 2007+ (.pptm, .ppsm)
1915 1916 """
1916 1917  
... ... @@ -1971,6 +1972,7 @@ class VBA_Parser(object):
1971 1972 if olefile.isOleFile(_file):
1972 1973 # This looks like an OLE file
1973 1974 self.open_ole(_file)
  1975 +
1974 1976 # if this worked, try whether it is a ppt file (special ole file)
1975 1977 self.open_ppt()
1976 1978 if self.type is None and zipfile.is_zipfile(_file):
... ... @@ -2017,7 +2019,6 @@ class VBA_Parser(object):
2017 2019 try:
2018 2020 # Open and parse the OLE file, using unicode for path names:
2019 2021 self.ole_file = olefile.OleFileIO(_file, path_encoding=None)
2020   - # TODO: raise TypeError if this is a Powerpoint 97 file, since VBA macros cannot be detected yet
2021 2022 # set type only if parsing succeeds
2022 2023 self.type = TYPE_OLE
2023 2024 except KeyboardInterrupt:
... ... @@ -2261,8 +2262,7 @@ class VBA_Parser(object):
2261 2262 dir_path is the path of the OLE stream named "VBA/dir" within the VBA project.
2262 2263  
2263 2264 If this function returns an empty list for one of the supported formats
2264   - (i.e. Word, Excel, Powerpoint except Powerpoint 97-2003), then the
2265   - file does not contain VBA macros.
  2265 + (i.e. Word, Excel, Powerpoint), then the file does not contain VBA macros.
2266 2266  
2267 2267 :return: None if OpenXML file, list of tuples (vba_root, project_path, dir_path)
2268 2268 for each VBA project found if OLE file
... ... @@ -2293,7 +2293,7 @@ class VBA_Parser(object):
2293 2293 # Find the VBA project root (different in MS Word, Excel, etc):
2294 2294 # - Word 97-2003: Macros
2295 2295 # - Excel 97-2003: _VBA_PROJECT_CUR
2296   - # - PowerPoint 97-2003: not supported yet (different file structure)
  2296 + # - PowerPoint 97-2003: PptParser has identified ole_subfiles
2297 2297 # - Word 2007+: word/vbaProject.bin in zip archive, then the VBA project is the root of vbaProject.bin.
2298 2298 # - Excel 2007+: xl/vbaProject.bin in zip archive, then same as Word
2299 2299 # - PowerPoint 2007+: ppt/vbaProject.bin in zip archive, then same as Word
... ... @@ -2350,7 +2350,6 @@ class VBA_Parser(object):
2350 2350 if it contains VBA projects. Both OLE and OpenXML files are supported.
2351 2351  
2352 2352 Important: for now, results are accurate only for Word, Excel and PowerPoint
2353   - EXCEPT Powerpoint 97-2003, which has a different structure for VBA.
2354 2353  
2355 2354 Note: this method does NOT attempt to check the actual presence or validity
2356 2355 of VBA macro source code, so there might be false positives.
... ... @@ -2365,7 +2364,7 @@ class VBA_Parser(object):
2365 2364 # if this method was already called, return the previous result:
2366 2365 if self.contains_macros is not None:
2367 2366 return self.contains_macros
2368   - # if OpenXML, check all the OLE subfiles:
  2367 + # if OpenXML/PPT, check all the OLE subfiles:
2369 2368 if self.ole_file is None:
2370 2369 for ole_subfile in self.ole_subfiles:
2371 2370 if ole_subfile.detect_vba_macros():
... ... @@ -2390,14 +2389,15 @@ class VBA_Parser(object):
2390 2389 If the file is OLE, filename is the path of the file.
2391 2390 If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros
2392 2391 within the zip archive, e.g. word/vbaProject.bin.
  2392 + If the file is PPT, result is as for OpenXML but filename is useless
2393 2393 """
2394 2394 if self.ole_file is None:
2395   - # This may be either an OpenXML or a text file:
  2395 + # This may be either an OpenXML/PPT or a text file:
2396 2396 if self.type == TYPE_TEXT:
2397 2397 # This is a text file, yield the full code:
2398 2398 yield (self.filename, '', self.filename, self.vba_code_all_modules)
2399 2399 else:
2400   - # OpenXML: recursively yield results from each OLE subfile:
  2400 + # OpenXML/PPT: recursively yield results from each OLE subfile:
2401 2401 for ole_subfile in self.ole_subfiles:
2402 2402 for results in ole_subfile.extract_macros():
2403 2403 yield results
... ... @@ -2490,13 +2490,13 @@ class VBA_Parser(object):
2490 2490 dir_path is the path of the OLE stream named "VBA/dir" within the VBA project.
2491 2491  
2492 2492 If this function returns an empty list for one of the supported formats
2493   - (i.e. Word, Excel, Powerpoint except Powerpoint 97-2003), then the
2494   - file does not contain VBA macros.
  2493 + (i.e. Word, Excel, Powerpoint), then the file does not contain VBA forms.
2495 2494  
2496 2495 :return: None if OpenXML file, list of tuples (vba_root, project_path, dir_path)
2497 2496 for each VBA project found if OLE file
2498 2497 """
2499 2498 log.debug('VBA_Parser.find_vba_forms')
  2499 +
2500 2500 # if the file is not OLE but OpenXML, return None:
2501 2501 if self.ole_file is None and self.type != TYPE_PPT:
2502 2502 return None
... ... @@ -2553,14 +2553,15 @@ class VBA_Parser(object):
2553 2553 If the file is OLE, filename is the path of the file.
2554 2554 If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros
2555 2555 within the zip archive, e.g. word/vbaProject.bin.
  2556 + If the file is PPT, result is as for OpenXML but filename is useless
2556 2557 """
2557 2558 if self.ole_file is None:
2558   - # This may be either an OpenXML or a text file:
  2559 + # This may be either an OpenXML/PPT or a text file:
2559 2560 if self.type == TYPE_TEXT:
2560 2561 # This is a text file, return no results:
2561 2562 return
2562 2563 else:
2563   - # OpenXML: recursively yield results from each OLE subfile:
  2564 + # OpenXML/PPT: recursively yield results from each OLE subfile:
2564 2565 for ole_subfile in self.ole_subfiles:
2565 2566 for results in ole_subfile.extract_form_strings():
2566 2567 yield results
... ...