Commit 1c52c0d5430856fcdef9f2d8f4720b0180a8ffa9
1 parent
b8c80db7
olevba: synchronized some changes with olevba3 (issue #106)
Showing
2 changed files
with
70 additions
and
19 deletions
oletools/olevba.py
| ... | ... | @@ -269,6 +269,8 @@ import ppt_parser |
| 269 | 269 | import email.feedparser |
| 270 | 270 | email.feedparser.headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:?|[\t ])') |
| 271 | 271 | |
| 272 | +# === PYTHON 2+3 SUPPORT ====================================================== | |
| 273 | + | |
| 272 | 274 | if sys.version_info[0] <= 2: |
| 273 | 275 | # Python 2.x |
| 274 | 276 | if sys.version_info[1] <= 6: |
| ... | ... | @@ -281,6 +283,8 @@ if sys.version_info[0] <= 2: |
| 281 | 283 | else: |
| 282 | 284 | # Python 3.x+ |
| 283 | 285 | from zipfile import is_zipfile |
| 286 | + # xrange is now called range: | |
| 287 | + xrange = range | |
| 284 | 288 | |
| 285 | 289 | # === LOGGING ================================================================= |
| 286 | 290 | |
| ... | ... | @@ -443,7 +447,7 @@ TYPE2TAG = { |
| 443 | 447 | |
| 444 | 448 | |
| 445 | 449 | # MSO files ActiveMime header magic |
| 446 | -MSO_ACTIVEMIME_HEADER = 'ActiveMime' | |
| 450 | +MSO_ACTIVEMIME_HEADER = b'ActiveMime' | |
| 447 | 451 | |
| 448 | 452 | MODULE_EXTENSION = "bas" |
| 449 | 453 | CLASS_EXTENSION = "cls" |
| ... | ... | @@ -2252,7 +2256,7 @@ class VBA_Parser(object): |
| 2252 | 2256 | if data is None: |
| 2253 | 2257 | data = open(filename, 'rb').read() |
| 2254 | 2258 | # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace |
| 2255 | - if 'http://schemas.microsoft.com/office/word/2003/wordml' in data: | |
| 2259 | + if b'http://schemas.microsoft.com/office/word/2003/wordml' in data: | |
| 2256 | 2260 | self.open_word2003xml(data) |
| 2257 | 2261 | # store a lowercase version for the next tests: |
| 2258 | 2262 | data_lowercase = data.lower() |
| ... | ... | @@ -2262,14 +2266,14 @@ class VBA_Parser(object): |
| 2262 | 2266 | # and even whitespaces in between "MIME", "-", "Version" and ":". The version number is ignored. |
| 2263 | 2267 | # And the line is case insensitive. |
| 2264 | 2268 | # so we'll just check the presence of mime, version and multipart anywhere: |
| 2265 | - if self.type is None and 'mime' in data_lowercase and 'version' in data_lowercase \ | |
| 2266 | - and 'multipart' in data_lowercase: | |
| 2269 | + if self.type is None and b'mime' in data_lowercase and b'version' in data_lowercase \ | |
| 2270 | + and b'multipart' in data_lowercase: | |
| 2267 | 2271 | self.open_mht(data) |
| 2268 | 2272 | #TODO: handle exceptions |
| 2269 | 2273 | #TODO: Excel 2003 XML |
| 2270 | 2274 | # Check if this is a plain text VBA or VBScript file: |
| 2271 | 2275 | # To avoid scanning binary files, we simply check for some control chars: |
| 2272 | - if self.type is None and '\x00' not in data: | |
| 2276 | + if self.type is None and b'\x00' not in data: | |
| 2273 | 2277 | self.open_text(data) |
| 2274 | 2278 | if self.type is None: |
| 2275 | 2279 | # At this stage, could not match a known format: | ... | ... |
oletools/olevba3.py
| ... | ... | @@ -12,6 +12,7 @@ Supported formats: |
| 12 | 12 | - PowerPoint 97-2003 (.ppt), PowerPoint 2007+ (.pptm, .ppsm) |
| 13 | 13 | - Word 2003 XML (.xml) |
| 14 | 14 | - Word/Excel Single File Web Page / MHTML (.mht) |
| 15 | +- Publisher (.pub) | |
| 15 | 16 | |
| 16 | 17 | Author: Philippe Lagadec - http://www.decalage.info |
| 17 | 18 | License: BSD, see source code or documentation |
| ... | ... | @@ -72,6 +73,8 @@ https://github.com/unixfreak0037/officeparser |
| 72 | 73 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 73 | 74 | # SOFTWARE. |
| 74 | 75 | |
| 76 | +from __future__ import print_function | |
| 77 | + | |
| 75 | 78 | #------------------------------------------------------------------------------ |
| 76 | 79 | # CHANGELOG: |
| 77 | 80 | # 2014-08-05 v0.01 PL: - first version based on officeparser code |
| ... | ... | @@ -178,9 +181,16 @@ https://github.com/unixfreak0037/officeparser |
| 178 | 181 | # 2016-06-12 v0.50 PL: - fixed small bugs in VBA parsing code |
| 179 | 182 | # 2016-07-01 PL: - fixed issue #58 with format() to support Python 2.6 |
| 180 | 183 | # 2016-07-29 CH: - fixed several bugs including #73 (Mac Roman encoding) |
| 184 | +# 2016-08-31 PL: - added autoexec keyword InkPicture_Painted | |
| 185 | +# - detect_autoexec now returns the exact keyword found | |
| 186 | +# 2016-09-05 PL: - added autoexec keywords for MS Publisher (.pub) | |
| 187 | +# 2016-09-06 PL: - fixed issue #20, is_zipfile on Python 2.6 | |
| 188 | +# 2016-09-12 PL: - enabled packrat to improve pyparsing performance | |
| 189 | +# 2016-10-25 PL: - fixed raise and print statements for Python 3 | |
| 181 | 190 | # 2016-10-25 PL: - fixed regex bytes strings (PR/issue #100) |
| 191 | +# 2016-11-03 v0.51 PL: - added EnumDateFormats and EnumSystemLanguageGroupsW | |
| 182 | 192 | |
| 183 | -__version__ = '0.50' | |
| 193 | +__version__ = '0.51a' | |
| 184 | 194 | |
| 185 | 195 | #------------------------------------------------------------------------------ |
| 186 | 196 | # TODO: |
| ... | ... | @@ -260,6 +270,22 @@ import oletools.ppt_parser as ppt_parser |
| 260 | 270 | import email.feedparser |
| 261 | 271 | email.feedparser.headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:?|[\t ])') |
| 262 | 272 | |
| 273 | +# === PYTHON 2+3 SUPPORT ====================================================== | |
| 274 | + | |
| 275 | +if sys.version_info[0] <= 2: | |
| 276 | + # Python 2.x | |
| 277 | + if sys.version_info[1] <= 6: | |
| 278 | + # Python 2.6 | |
| 279 | + # use is_zipfile backported from Python 2.7: | |
| 280 | + from thirdparty.zipfile27 import is_zipfile | |
| 281 | + else: | |
| 282 | + # Python 2.7 | |
| 283 | + from zipfile import is_zipfile | |
| 284 | +else: | |
| 285 | + # Python 3.x+ | |
| 286 | + from zipfile import is_zipfile | |
| 287 | + # xrange is now called range: | |
| 288 | + xrange = range | |
| 263 | 289 | |
| 264 | 290 | # === LOGGING ================================================================= |
| 265 | 291 | |
| ... | ... | @@ -438,7 +464,7 @@ ATTR_NAME = NS_W + 'name' |
| 438 | 464 | AUTOEXEC_KEYWORDS = { |
| 439 | 465 | # MS Word: |
| 440 | 466 | 'Runs when the Word document is opened': |
| 441 | - ('AutoExec', 'AutoOpen', 'Document_Open', 'DocumentOpen'), | |
| 467 | + ('AutoExec', 'AutoOpen', 'DocumentOpen'), | |
| 442 | 468 | 'Runs when the Word document is closed': |
| 443 | 469 | ('AutoExit', 'AutoClose', 'Document_Close', 'DocumentBeforeClose'), |
| 444 | 470 | 'Runs when the Word document is modified': |
| ... | ... | @@ -446,13 +472,24 @@ AUTOEXEC_KEYWORDS = { |
| 446 | 472 | 'Runs when a new Word document is created': |
| 447 | 473 | ('AutoNew', 'Document_New', 'NewDocument'), |
| 448 | 474 | |
| 475 | + # MS Word and Publisher: | |
| 476 | + 'Runs when the Word or Publisher document is opened': | |
| 477 | + ('Document_Open',), | |
| 478 | + 'Runs when the Publisher document is closed': | |
| 479 | + ('Document_BeforeClose',), | |
| 480 | + | |
| 449 | 481 | # MS Excel: |
| 450 | 482 | 'Runs when the Excel Workbook is opened': |
| 451 | 483 | ('Auto_Open', 'Workbook_Open', 'Workbook_Activate'), |
| 452 | 484 | 'Runs when the Excel Workbook is closed': |
| 453 | 485 | ('Auto_Close', 'Workbook_Close'), |
| 454 | 486 | |
| 455 | - #TODO: full list in MS specs?? | |
| 487 | + # any MS Office application: | |
| 488 | + 'Runs when the file is opened (using InkPicture ActiveX object)': | |
| 489 | + # ref:https://twitter.com/joe4security/status/770691099988025345 | |
| 490 | + (r'\w+_Painted',), | |
| 491 | + 'Runs when the file is opened and ActiveX objects trigger events': | |
| 492 | + (r'\w+_(?:GotFocus|LostFocus|MouseHover)',), | |
| 456 | 493 | } |
| 457 | 494 | |
| 458 | 495 | # Suspicious Keywords that may be used by malware |
| ... | ... | @@ -516,7 +553,11 @@ SUSPICIOUS_KEYWORDS = { |
| 516 | 553 | ('Lib',), |
| 517 | 554 | 'May inject code into another process': |
| 518 | 555 | ('CreateThread', 'VirtualAlloc', # (issue #9) suggested by Davy Douhine - used by MSF payload |
| 556 | + 'VirtualAllocEx', 'RtlMoveMemory', | |
| 519 | 557 | ), |
| 558 | + 'May run a shellcode in memory': | |
| 559 | + ('EnumSystemLanguageGroupsW?', # Used by Hancitor in Oct 2016 | |
| 560 | + 'EnumDateFormats(?:W|(?:Ex){1,2})?'), # see https://msdn.microsoft.com/en-us/library/windows/desktop/dd317810(v=vs.85).aspx | |
| 520 | 561 | 'May download files from the Internet': |
| 521 | 562 | #TODO: regex to find urlmon+URLDownloadToFileA on same line |
| 522 | 563 | ('URLDownloadToFileA', 'Msxml2.XMLHTTP', 'Microsoft.XMLHTTP', |
| ... | ... | @@ -532,7 +573,7 @@ SUSPICIOUS_KEYWORDS = { |
| 532 | 573 | 'May attempt to obfuscate malicious function calls': |
| 533 | 574 | ('CallByName',), |
| 534 | 575 | #CallByName: http://msdn.microsoft.com/en-us/library/office/gg278760%28v=office.15%29.aspx |
| 535 | - 'May attempt to obfuscate specific strings': | |
| 576 | + 'May attempt to obfuscate specific strings (use option --deobf to deobfuscate)': | |
| 536 | 577 | #TODO: regex to find several Chr*, not just one |
| 537 | 578 | ('Chr', 'ChrB', 'ChrW', 'StrReverse', 'Xor'), |
| 538 | 579 | #Chr: http://msdn.microsoft.com/en-us/library/office/gg264465%28v=office.15%29.aspx |
| ... | ... | @@ -571,8 +612,6 @@ SUSPICIOUS_KEYWORDS = { |
| 571 | 612 | 'May detect WinJail Sandbox': |
| 572 | 613 | # ref: http://www.cplusplus.com/forum/windows/96874/ |
| 573 | 614 | ('Afx:400000:0',), |
| 574 | - 'Memory manipulation': | |
| 575 | - ('VirtualAllocEx', 'RtlMoveMemory'), | |
| 576 | 615 | } |
| 577 | 616 | |
| 578 | 617 | # Regular Expression for a URL: |
| ... | ... | @@ -646,6 +685,10 @@ re_printable_string = re.compile(b'[\\t\\r\\n\\x20-\\xFF]{5,}') |
| 646 | 685 | # TODO: set whitespaces according to VBA |
| 647 | 686 | # TODO: merge extended lines before parsing |
| 648 | 687 | |
| 688 | +# Enable PackRat for better performance: | |
| 689 | +# (see https://pythonhosted.org/pyparsing/pyparsing.ParserElement-class.html#enablePackrat) | |
| 690 | +ParserElement.enablePackrat() | |
| 691 | + | |
| 649 | 692 | # VBA identifier chars (from MS-VBAL 3.3.5) |
| 650 | 693 | vba_identifier_chars = alphanums + '_' |
| 651 | 694 | |
| ... | ... | @@ -1712,9 +1755,11 @@ def detect_autoexec(vba_code, obfuscation=None): |
| 1712 | 1755 | for keyword in keywords: |
| 1713 | 1756 | #TODO: if keyword is already a compiled regex, use it as-is |
| 1714 | 1757 | # search using regex to detect word boundaries: |
| 1715 | - if re.search(r'(?i)\b' + keyword + r'\b', vba_code): | |
| 1758 | + match = re.search(r'(?i)\b' + keyword + r'\b', vba_code) | |
| 1759 | + if match: | |
| 1716 | 1760 | #if keyword.lower() in vba_code: |
| 1717 | - results.append((keyword, description + obf_text)) | |
| 1761 | + found_keyword = match.group() | |
| 1762 | + results.append((found_keyword, description + obf_text)) | |
| 1718 | 1763 | return results |
| 1719 | 1764 | |
| 1720 | 1765 | |
| ... | ... | @@ -1736,9 +1781,11 @@ def detect_suspicious(vba_code, obfuscation=None): |
| 1736 | 1781 | for description, keywords in SUSPICIOUS_KEYWORDS.items(): |
| 1737 | 1782 | for keyword in keywords: |
| 1738 | 1783 | # search using regex to detect word boundaries: |
| 1739 | - if re.search(r'(?i)\b' + keyword + r'\b', vba_code): | |
| 1784 | + match = re.search(r'(?i)\b' + keyword + r'\b', vba_code) | |
| 1785 | + if match: | |
| 1740 | 1786 | #if keyword.lower() in vba_code: |
| 1741 | - results.append((keyword, description + obf_text)) | |
| 1787 | + found_keyword = match.group() | |
| 1788 | + results.append((found_keyword, description + obf_text)) | |
| 1742 | 1789 | return results |
| 1743 | 1790 | |
| 1744 | 1791 | |
| ... | ... | @@ -2203,7 +2250,7 @@ class VBA_Parser(object): |
| 2203 | 2250 | |
| 2204 | 2251 | # if this worked, try whether it is a ppt file (special ole file) |
| 2205 | 2252 | self.open_ppt() |
| 2206 | - if self.type is None and zipfile.is_zipfile(_file): | |
| 2253 | + if self.type is None and is_zipfile(_file): | |
| 2207 | 2254 | # Zip file, which may be an OpenXML document |
| 2208 | 2255 | self.open_openxml(_file) |
| 2209 | 2256 | if self.type is None: |
| ... | ... | @@ -2606,7 +2653,7 @@ class VBA_Parser(object): |
| 2606 | 2653 | # Also look for VBA code in any stream including orphans |
| 2607 | 2654 | # (happens in some malformed files) |
| 2608 | 2655 | ole = self.ole_file |
| 2609 | - for sid in range(len(ole.direntries)): | |
| 2656 | + for sid in xrange(len(ole.direntries)): | |
| 2610 | 2657 | # check if id is already done above: |
| 2611 | 2658 | log.debug('Checking DirEntry #%d' % sid) |
| 2612 | 2659 | d = ole.direntries[sid] |
| ... | ... | @@ -2672,7 +2719,7 @@ class VBA_Parser(object): |
| 2672 | 2719 | # Also look for VBA code in any stream including orphans |
| 2673 | 2720 | # (happens in some malformed files) |
| 2674 | 2721 | ole = self.ole_file |
| 2675 | - for sid in range(len(ole.direntries)): | |
| 2722 | + for sid in xrange(len(ole.direntries)): | |
| 2676 | 2723 | # check if id is already done above: |
| 2677 | 2724 | log.debug('Checking DirEntry #%d' % sid) |
| 2678 | 2725 | if sid in vba_stream_ids: |
| ... | ... | @@ -3099,7 +3146,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 3099 | 3146 | if self.detect_vba_macros(): |
| 3100 | 3147 | # print a waiting message only if the output is not redirected to a file: |
| 3101 | 3148 | if sys.stdout.isatty(): |
| 3102 | - print('Analysis...\r') | |
| 3149 | + print('Analysis...\r', end='') | |
| 3103 | 3150 | sys.stdout.flush() |
| 3104 | 3151 | self.analyze_macros(show_decoded_strings=show_decoded_strings, |
| 3105 | 3152 | deobfuscate=deobfuscate) | ... | ... |