Commit 1c52c0d5430856fcdef9f2d8f4720b0180a8ffa9
1 parent
b8c80db7
olevba: synchronized some changes with olevba3 (issue #106)
Showing
2 changed files
with
70 additions
and
19 deletions
oletools/olevba.py
| @@ -269,6 +269,8 @@ import ppt_parser | @@ -269,6 +269,8 @@ import ppt_parser | ||
| 269 | import email.feedparser | 269 | import email.feedparser |
| 270 | email.feedparser.headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:?|[\t ])') | 270 | email.feedparser.headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:?|[\t ])') |
| 271 | 271 | ||
| 272 | +# === PYTHON 2+3 SUPPORT ====================================================== | ||
| 273 | + | ||
| 272 | if sys.version_info[0] <= 2: | 274 | if sys.version_info[0] <= 2: |
| 273 | # Python 2.x | 275 | # Python 2.x |
| 274 | if sys.version_info[1] <= 6: | 276 | if sys.version_info[1] <= 6: |
| @@ -281,6 +283,8 @@ if sys.version_info[0] <= 2: | @@ -281,6 +283,8 @@ if sys.version_info[0] <= 2: | ||
| 281 | else: | 283 | else: |
| 282 | # Python 3.x+ | 284 | # Python 3.x+ |
| 283 | from zipfile import is_zipfile | 285 | from zipfile import is_zipfile |
| 286 | + # xrange is now called range: | ||
| 287 | + xrange = range | ||
| 284 | 288 | ||
| 285 | # === LOGGING ================================================================= | 289 | # === LOGGING ================================================================= |
| 286 | 290 | ||
| @@ -443,7 +447,7 @@ TYPE2TAG = { | @@ -443,7 +447,7 @@ TYPE2TAG = { | ||
| 443 | 447 | ||
| 444 | 448 | ||
| 445 | # MSO files ActiveMime header magic | 449 | # MSO files ActiveMime header magic |
| 446 | -MSO_ACTIVEMIME_HEADER = 'ActiveMime' | 450 | +MSO_ACTIVEMIME_HEADER = b'ActiveMime' |
| 447 | 451 | ||
| 448 | MODULE_EXTENSION = "bas" | 452 | MODULE_EXTENSION = "bas" |
| 449 | CLASS_EXTENSION = "cls" | 453 | CLASS_EXTENSION = "cls" |
| @@ -2252,7 +2256,7 @@ class VBA_Parser(object): | @@ -2252,7 +2256,7 @@ class VBA_Parser(object): | ||
| 2252 | if data is None: | 2256 | if data is None: |
| 2253 | data = open(filename, 'rb').read() | 2257 | data = open(filename, 'rb').read() |
| 2254 | # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace | 2258 | # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace |
| 2255 | - if 'http://schemas.microsoft.com/office/word/2003/wordml' in data: | 2259 | + if b'http://schemas.microsoft.com/office/word/2003/wordml' in data: |
| 2256 | self.open_word2003xml(data) | 2260 | self.open_word2003xml(data) |
| 2257 | # store a lowercase version for the next tests: | 2261 | # store a lowercase version for the next tests: |
| 2258 | data_lowercase = data.lower() | 2262 | data_lowercase = data.lower() |
| @@ -2262,14 +2266,14 @@ class VBA_Parser(object): | @@ -2262,14 +2266,14 @@ class VBA_Parser(object): | ||
| 2262 | # and even whitespaces in between "MIME", "-", "Version" and ":". The version number is ignored. | 2266 | # and even whitespaces in between "MIME", "-", "Version" and ":". The version number is ignored. |
| 2263 | # And the line is case insensitive. | 2267 | # And the line is case insensitive. |
| 2264 | # so we'll just check the presence of mime, version and multipart anywhere: | 2268 | # so we'll just check the presence of mime, version and multipart anywhere: |
| 2265 | - if self.type is None and 'mime' in data_lowercase and 'version' in data_lowercase \ | ||
| 2266 | - and 'multipart' in data_lowercase: | 2269 | + if self.type is None and b'mime' in data_lowercase and b'version' in data_lowercase \ |
| 2270 | + and b'multipart' in data_lowercase: | ||
| 2267 | self.open_mht(data) | 2271 | self.open_mht(data) |
| 2268 | #TODO: handle exceptions | 2272 | #TODO: handle exceptions |
| 2269 | #TODO: Excel 2003 XML | 2273 | #TODO: Excel 2003 XML |
| 2270 | # Check if this is a plain text VBA or VBScript file: | 2274 | # Check if this is a plain text VBA or VBScript file: |
| 2271 | # To avoid scanning binary files, we simply check for some control chars: | 2275 | # To avoid scanning binary files, we simply check for some control chars: |
| 2272 | - if self.type is None and '\x00' not in data: | 2276 | + if self.type is None and b'\x00' not in data: |
| 2273 | self.open_text(data) | 2277 | self.open_text(data) |
| 2274 | if self.type is None: | 2278 | if self.type is None: |
| 2275 | # At this stage, could not match a known format: | 2279 | # At this stage, could not match a known format: |
oletools/olevba3.py
| @@ -12,6 +12,7 @@ Supported formats: | @@ -12,6 +12,7 @@ Supported formats: | ||
| 12 | - PowerPoint 97-2003 (.ppt), PowerPoint 2007+ (.pptm, .ppsm) | 12 | - PowerPoint 97-2003 (.ppt), PowerPoint 2007+ (.pptm, .ppsm) |
| 13 | - Word 2003 XML (.xml) | 13 | - Word 2003 XML (.xml) |
| 14 | - Word/Excel Single File Web Page / MHTML (.mht) | 14 | - Word/Excel Single File Web Page / MHTML (.mht) |
| 15 | +- Publisher (.pub) | ||
| 15 | 16 | ||
| 16 | Author: Philippe Lagadec - http://www.decalage.info | 17 | Author: Philippe Lagadec - http://www.decalage.info |
| 17 | License: BSD, see source code or documentation | 18 | License: BSD, see source code or documentation |
| @@ -72,6 +73,8 @@ https://github.com/unixfreak0037/officeparser | @@ -72,6 +73,8 @@ https://github.com/unixfreak0037/officeparser | ||
| 72 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | 73 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 73 | # SOFTWARE. | 74 | # SOFTWARE. |
| 74 | 75 | ||
| 76 | +from __future__ import print_function | ||
| 77 | + | ||
| 75 | #------------------------------------------------------------------------------ | 78 | #------------------------------------------------------------------------------ |
| 76 | # CHANGELOG: | 79 | # CHANGELOG: |
| 77 | # 2014-08-05 v0.01 PL: - first version based on officeparser code | 80 | # 2014-08-05 v0.01 PL: - first version based on officeparser code |
| @@ -178,9 +181,16 @@ https://github.com/unixfreak0037/officeparser | @@ -178,9 +181,16 @@ https://github.com/unixfreak0037/officeparser | ||
| 178 | # 2016-06-12 v0.50 PL: - fixed small bugs in VBA parsing code | 181 | # 2016-06-12 v0.50 PL: - fixed small bugs in VBA parsing code |
| 179 | # 2016-07-01 PL: - fixed issue #58 with format() to support Python 2.6 | 182 | # 2016-07-01 PL: - fixed issue #58 with format() to support Python 2.6 |
| 180 | # 2016-07-29 CH: - fixed several bugs including #73 (Mac Roman encoding) | 183 | # 2016-07-29 CH: - fixed several bugs including #73 (Mac Roman encoding) |
| 184 | +# 2016-08-31 PL: - added autoexec keyword InkPicture_Painted | ||
| 185 | +# - detect_autoexec now returns the exact keyword found | ||
| 186 | +# 2016-09-05 PL: - added autoexec keywords for MS Publisher (.pub) | ||
| 187 | +# 2016-09-06 PL: - fixed issue #20, is_zipfile on Python 2.6 | ||
| 188 | +# 2016-09-12 PL: - enabled packrat to improve pyparsing performance | ||
| 189 | +# 2016-10-25 PL: - fixed raise and print statements for Python 3 | ||
| 181 | # 2016-10-25 PL: - fixed regex bytes strings (PR/issue #100) | 190 | # 2016-10-25 PL: - fixed regex bytes strings (PR/issue #100) |
| 191 | +# 2016-11-03 v0.51 PL: - added EnumDateFormats and EnumSystemLanguageGroupsW | ||
| 182 | 192 | ||
| 183 | -__version__ = '0.50' | 193 | +__version__ = '0.51a' |
| 184 | 194 | ||
| 185 | #------------------------------------------------------------------------------ | 195 | #------------------------------------------------------------------------------ |
| 186 | # TODO: | 196 | # TODO: |
| @@ -260,6 +270,22 @@ import oletools.ppt_parser as ppt_parser | @@ -260,6 +270,22 @@ import oletools.ppt_parser as ppt_parser | ||
| 260 | import email.feedparser | 270 | import email.feedparser |
| 261 | email.feedparser.headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:?|[\t ])') | 271 | email.feedparser.headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:?|[\t ])') |
| 262 | 272 | ||
| 273 | +# === PYTHON 2+3 SUPPORT ====================================================== | ||
| 274 | + | ||
| 275 | +if sys.version_info[0] <= 2: | ||
| 276 | + # Python 2.x | ||
| 277 | + if sys.version_info[1] <= 6: | ||
| 278 | + # Python 2.6 | ||
| 279 | + # use is_zipfile backported from Python 2.7: | ||
| 280 | + from thirdparty.zipfile27 import is_zipfile | ||
| 281 | + else: | ||
| 282 | + # Python 2.7 | ||
| 283 | + from zipfile import is_zipfile | ||
| 284 | +else: | ||
| 285 | + # Python 3.x+ | ||
| 286 | + from zipfile import is_zipfile | ||
| 287 | + # xrange is now called range: | ||
| 288 | + xrange = range | ||
| 263 | 289 | ||
| 264 | # === LOGGING ================================================================= | 290 | # === LOGGING ================================================================= |
| 265 | 291 | ||
| @@ -438,7 +464,7 @@ ATTR_NAME = NS_W + 'name' | @@ -438,7 +464,7 @@ ATTR_NAME = NS_W + 'name' | ||
| 438 | AUTOEXEC_KEYWORDS = { | 464 | AUTOEXEC_KEYWORDS = { |
| 439 | # MS Word: | 465 | # MS Word: |
| 440 | 'Runs when the Word document is opened': | 466 | 'Runs when the Word document is opened': |
| 441 | - ('AutoExec', 'AutoOpen', 'Document_Open', 'DocumentOpen'), | 467 | + ('AutoExec', 'AutoOpen', 'DocumentOpen'), |
| 442 | 'Runs when the Word document is closed': | 468 | 'Runs when the Word document is closed': |
| 443 | ('AutoExit', 'AutoClose', 'Document_Close', 'DocumentBeforeClose'), | 469 | ('AutoExit', 'AutoClose', 'Document_Close', 'DocumentBeforeClose'), |
| 444 | 'Runs when the Word document is modified': | 470 | 'Runs when the Word document is modified': |
| @@ -446,13 +472,24 @@ AUTOEXEC_KEYWORDS = { | @@ -446,13 +472,24 @@ AUTOEXEC_KEYWORDS = { | ||
| 446 | 'Runs when a new Word document is created': | 472 | 'Runs when a new Word document is created': |
| 447 | ('AutoNew', 'Document_New', 'NewDocument'), | 473 | ('AutoNew', 'Document_New', 'NewDocument'), |
| 448 | 474 | ||
| 475 | + # MS Word and Publisher: | ||
| 476 | + 'Runs when the Word or Publisher document is opened': | ||
| 477 | + ('Document_Open',), | ||
| 478 | + 'Runs when the Publisher document is closed': | ||
| 479 | + ('Document_BeforeClose',), | ||
| 480 | + | ||
| 449 | # MS Excel: | 481 | # MS Excel: |
| 450 | 'Runs when the Excel Workbook is opened': | 482 | 'Runs when the Excel Workbook is opened': |
| 451 | ('Auto_Open', 'Workbook_Open', 'Workbook_Activate'), | 483 | ('Auto_Open', 'Workbook_Open', 'Workbook_Activate'), |
| 452 | 'Runs when the Excel Workbook is closed': | 484 | 'Runs when the Excel Workbook is closed': |
| 453 | ('Auto_Close', 'Workbook_Close'), | 485 | ('Auto_Close', 'Workbook_Close'), |
| 454 | 486 | ||
| 455 | - #TODO: full list in MS specs?? | 487 | + # any MS Office application: |
| 488 | + 'Runs when the file is opened (using InkPicture ActiveX object)': | ||
| 489 | + # ref:https://twitter.com/joe4security/status/770691099988025345 | ||
| 490 | + (r'\w+_Painted',), | ||
| 491 | + 'Runs when the file is opened and ActiveX objects trigger events': | ||
| 492 | + (r'\w+_(?:GotFocus|LostFocus|MouseHover)',), | ||
| 456 | } | 493 | } |
| 457 | 494 | ||
| 458 | # Suspicious Keywords that may be used by malware | 495 | # Suspicious Keywords that may be used by malware |
| @@ -516,7 +553,11 @@ SUSPICIOUS_KEYWORDS = { | @@ -516,7 +553,11 @@ SUSPICIOUS_KEYWORDS = { | ||
| 516 | ('Lib',), | 553 | ('Lib',), |
| 517 | 'May inject code into another process': | 554 | 'May inject code into another process': |
| 518 | ('CreateThread', 'VirtualAlloc', # (issue #9) suggested by Davy Douhine - used by MSF payload | 555 | ('CreateThread', 'VirtualAlloc', # (issue #9) suggested by Davy Douhine - used by MSF payload |
| 556 | + 'VirtualAllocEx', 'RtlMoveMemory', | ||
| 519 | ), | 557 | ), |
| 558 | + 'May run a shellcode in memory': | ||
| 559 | + ('EnumSystemLanguageGroupsW?', # Used by Hancitor in Oct 2016 | ||
| 560 | + 'EnumDateFormats(?:W|(?:Ex){1,2})?'), # see https://msdn.microsoft.com/en-us/library/windows/desktop/dd317810(v=vs.85).aspx | ||
| 520 | 'May download files from the Internet': | 561 | 'May download files from the Internet': |
| 521 | #TODO: regex to find urlmon+URLDownloadToFileA on same line | 562 | #TODO: regex to find urlmon+URLDownloadToFileA on same line |
| 522 | ('URLDownloadToFileA', 'Msxml2.XMLHTTP', 'Microsoft.XMLHTTP', | 563 | ('URLDownloadToFileA', 'Msxml2.XMLHTTP', 'Microsoft.XMLHTTP', |
| @@ -532,7 +573,7 @@ SUSPICIOUS_KEYWORDS = { | @@ -532,7 +573,7 @@ SUSPICIOUS_KEYWORDS = { | ||
| 532 | 'May attempt to obfuscate malicious function calls': | 573 | 'May attempt to obfuscate malicious function calls': |
| 533 | ('CallByName',), | 574 | ('CallByName',), |
| 534 | #CallByName: http://msdn.microsoft.com/en-us/library/office/gg278760%28v=office.15%29.aspx | 575 | #CallByName: http://msdn.microsoft.com/en-us/library/office/gg278760%28v=office.15%29.aspx |
| 535 | - 'May attempt to obfuscate specific strings': | 576 | + 'May attempt to obfuscate specific strings (use option --deobf to deobfuscate)': |
| 536 | #TODO: regex to find several Chr*, not just one | 577 | #TODO: regex to find several Chr*, not just one |
| 537 | ('Chr', 'ChrB', 'ChrW', 'StrReverse', 'Xor'), | 578 | ('Chr', 'ChrB', 'ChrW', 'StrReverse', 'Xor'), |
| 538 | #Chr: http://msdn.microsoft.com/en-us/library/office/gg264465%28v=office.15%29.aspx | 579 | #Chr: http://msdn.microsoft.com/en-us/library/office/gg264465%28v=office.15%29.aspx |
| @@ -571,8 +612,6 @@ SUSPICIOUS_KEYWORDS = { | @@ -571,8 +612,6 @@ SUSPICIOUS_KEYWORDS = { | ||
| 571 | 'May detect WinJail Sandbox': | 612 | 'May detect WinJail Sandbox': |
| 572 | # ref: http://www.cplusplus.com/forum/windows/96874/ | 613 | # ref: http://www.cplusplus.com/forum/windows/96874/ |
| 573 | ('Afx:400000:0',), | 614 | ('Afx:400000:0',), |
| 574 | - 'Memory manipulation': | ||
| 575 | - ('VirtualAllocEx', 'RtlMoveMemory'), | ||
| 576 | } | 615 | } |
| 577 | 616 | ||
| 578 | # Regular Expression for a URL: | 617 | # Regular Expression for a URL: |
| @@ -646,6 +685,10 @@ re_printable_string = re.compile(b'[\\t\\r\\n\\x20-\\xFF]{5,}') | @@ -646,6 +685,10 @@ re_printable_string = re.compile(b'[\\t\\r\\n\\x20-\\xFF]{5,}') | ||
| 646 | # TODO: set whitespaces according to VBA | 685 | # TODO: set whitespaces according to VBA |
| 647 | # TODO: merge extended lines before parsing | 686 | # TODO: merge extended lines before parsing |
| 648 | 687 | ||
| 688 | +# Enable PackRat for better performance: | ||
| 689 | +# (see https://pythonhosted.org/pyparsing/pyparsing.ParserElement-class.html#enablePackrat) | ||
| 690 | +ParserElement.enablePackrat() | ||
| 691 | + | ||
| 649 | # VBA identifier chars (from MS-VBAL 3.3.5) | 692 | # VBA identifier chars (from MS-VBAL 3.3.5) |
| 650 | vba_identifier_chars = alphanums + '_' | 693 | vba_identifier_chars = alphanums + '_' |
| 651 | 694 | ||
| @@ -1712,9 +1755,11 @@ def detect_autoexec(vba_code, obfuscation=None): | @@ -1712,9 +1755,11 @@ def detect_autoexec(vba_code, obfuscation=None): | ||
| 1712 | for keyword in keywords: | 1755 | for keyword in keywords: |
| 1713 | #TODO: if keyword is already a compiled regex, use it as-is | 1756 | #TODO: if keyword is already a compiled regex, use it as-is |
| 1714 | # search using regex to detect word boundaries: | 1757 | # search using regex to detect word boundaries: |
| 1715 | - if re.search(r'(?i)\b' + keyword + r'\b', vba_code): | 1758 | + match = re.search(r'(?i)\b' + keyword + r'\b', vba_code) |
| 1759 | + if match: | ||
| 1716 | #if keyword.lower() in vba_code: | 1760 | #if keyword.lower() in vba_code: |
| 1717 | - results.append((keyword, description + obf_text)) | 1761 | + found_keyword = match.group() |
| 1762 | + results.append((found_keyword, description + obf_text)) | ||
| 1718 | return results | 1763 | return results |
| 1719 | 1764 | ||
| 1720 | 1765 | ||
| @@ -1736,9 +1781,11 @@ def detect_suspicious(vba_code, obfuscation=None): | @@ -1736,9 +1781,11 @@ def detect_suspicious(vba_code, obfuscation=None): | ||
| 1736 | for description, keywords in SUSPICIOUS_KEYWORDS.items(): | 1781 | for description, keywords in SUSPICIOUS_KEYWORDS.items(): |
| 1737 | for keyword in keywords: | 1782 | for keyword in keywords: |
| 1738 | # search using regex to detect word boundaries: | 1783 | # search using regex to detect word boundaries: |
| 1739 | - if re.search(r'(?i)\b' + keyword + r'\b', vba_code): | 1784 | + match = re.search(r'(?i)\b' + keyword + r'\b', vba_code) |
| 1785 | + if match: | ||
| 1740 | #if keyword.lower() in vba_code: | 1786 | #if keyword.lower() in vba_code: |
| 1741 | - results.append((keyword, description + obf_text)) | 1787 | + found_keyword = match.group() |
| 1788 | + results.append((found_keyword, description + obf_text)) | ||
| 1742 | return results | 1789 | return results |
| 1743 | 1790 | ||
| 1744 | 1791 | ||
| @@ -2203,7 +2250,7 @@ class VBA_Parser(object): | @@ -2203,7 +2250,7 @@ class VBA_Parser(object): | ||
| 2203 | 2250 | ||
| 2204 | # if this worked, try whether it is a ppt file (special ole file) | 2251 | # if this worked, try whether it is a ppt file (special ole file) |
| 2205 | self.open_ppt() | 2252 | self.open_ppt() |
| 2206 | - if self.type is None and zipfile.is_zipfile(_file): | 2253 | + if self.type is None and is_zipfile(_file): |
| 2207 | # Zip file, which may be an OpenXML document | 2254 | # Zip file, which may be an OpenXML document |
| 2208 | self.open_openxml(_file) | 2255 | self.open_openxml(_file) |
| 2209 | if self.type is None: | 2256 | if self.type is None: |
| @@ -2606,7 +2653,7 @@ class VBA_Parser(object): | @@ -2606,7 +2653,7 @@ class VBA_Parser(object): | ||
| 2606 | # Also look for VBA code in any stream including orphans | 2653 | # Also look for VBA code in any stream including orphans |
| 2607 | # (happens in some malformed files) | 2654 | # (happens in some malformed files) |
| 2608 | ole = self.ole_file | 2655 | ole = self.ole_file |
| 2609 | - for sid in range(len(ole.direntries)): | 2656 | + for sid in xrange(len(ole.direntries)): |
| 2610 | # check if id is already done above: | 2657 | # check if id is already done above: |
| 2611 | log.debug('Checking DirEntry #%d' % sid) | 2658 | log.debug('Checking DirEntry #%d' % sid) |
| 2612 | d = ole.direntries[sid] | 2659 | d = ole.direntries[sid] |
| @@ -2672,7 +2719,7 @@ class VBA_Parser(object): | @@ -2672,7 +2719,7 @@ class VBA_Parser(object): | ||
| 2672 | # Also look for VBA code in any stream including orphans | 2719 | # Also look for VBA code in any stream including orphans |
| 2673 | # (happens in some malformed files) | 2720 | # (happens in some malformed files) |
| 2674 | ole = self.ole_file | 2721 | ole = self.ole_file |
| 2675 | - for sid in range(len(ole.direntries)): | 2722 | + for sid in xrange(len(ole.direntries)): |
| 2676 | # check if id is already done above: | 2723 | # check if id is already done above: |
| 2677 | log.debug('Checking DirEntry #%d' % sid) | 2724 | log.debug('Checking DirEntry #%d' % sid) |
| 2678 | if sid in vba_stream_ids: | 2725 | if sid in vba_stream_ids: |
| @@ -3099,7 +3146,7 @@ class VBA_Parser_CLI(VBA_Parser): | @@ -3099,7 +3146,7 @@ class VBA_Parser_CLI(VBA_Parser): | ||
| 3099 | if self.detect_vba_macros(): | 3146 | if self.detect_vba_macros(): |
| 3100 | # print a waiting message only if the output is not redirected to a file: | 3147 | # print a waiting message only if the output is not redirected to a file: |
| 3101 | if sys.stdout.isatty(): | 3148 | if sys.stdout.isatty(): |
| 3102 | - print('Analysis...\r') | 3149 | + print('Analysis...\r', end='') |
| 3103 | sys.stdout.flush() | 3150 | sys.stdout.flush() |
| 3104 | self.analyze_macros(show_decoded_strings=show_decoded_strings, | 3151 | self.analyze_macros(show_decoded_strings=show_decoded_strings, |
| 3105 | deobfuscate=deobfuscate) | 3152 | deobfuscate=deobfuscate) |