Commit d71220495077b1e0b917829ffa756416ece76764
1 parent
c47b13c1
olevba: VBA_Parser: split each file format parser into a separate method
Showing
1 changed file
with
142 additions
and
96 deletions
oletools/olevba.py
| @@ -153,8 +153,9 @@ https://github.com/unixfreak0037/officeparser | @@ -153,8 +153,9 @@ https://github.com/unixfreak0037/officeparser | ||
| 153 | # - disabled unused option --each | 153 | # - disabled unused option --each |
| 154 | # 2015-09-22 v0.41 PL: - added new option --reveal | 154 | # 2015-09-22 v0.41 PL: - added new option --reveal |
| 155 | # - added suspicious strings for PowerShell.exe options | 155 | # - added suspicious strings for PowerShell.exe options |
| 156 | +# 2015-10-09 v0.42 PL: - VBA_Parser: split each format into a separate method | ||
| 156 | 157 | ||
| 157 | -__version__ = '0.41' | 158 | +__version__ = '0.42' |
| 158 | 159 | ||
| 159 | #------------------------------------------------------------------------------ | 160 | #------------------------------------------------------------------------------ |
| 160 | # TODO: | 161 | # TODO: |
| @@ -1719,34 +1720,10 @@ class VBA_Parser(object): | @@ -1719,34 +1720,10 @@ class VBA_Parser(object): | ||
| 1719 | # self.filename = '<file-like object>' | 1720 | # self.filename = '<file-like object>' |
| 1720 | if olefile.isOleFile(_file): | 1721 | if olefile.isOleFile(_file): |
| 1721 | # This looks like an OLE file | 1722 | # This looks like an OLE file |
| 1722 | - logging.info('Opening OLE file %s' % self.filename) | ||
| 1723 | - # Open and parse the OLE file, using unicode for path names: | ||
| 1724 | - self.type = TYPE_OLE | ||
| 1725 | - # TODO: handle OLE parsing exceptions | ||
| 1726 | - self.ole_file = olefile.OleFileIO(_file, path_encoding=None) | ||
| 1727 | - # TODO: raise TypeError if this is a Powerpoint 97 file, since VBA macros cannot be detected yet | 1723 | + self.open_ole(_file) |
| 1728 | elif zipfile.is_zipfile(_file): | 1724 | elif zipfile.is_zipfile(_file): |
| 1729 | - # This looks like a zip file, need to look for vbaProject.bin inside | ||
| 1730 | - # It can be any OLE file inside the archive | ||
| 1731 | - #...because vbaProject.bin can be renamed: | ||
| 1732 | - # see http://www.decalage.info/files/JCV07_Lagadec_OpenDocument_OpenXML_v4_decalage.pdf#page=18 | ||
| 1733 | - logging.info('Opening ZIP/OpenXML file %s' % self.filename) | ||
| 1734 | - self.type = TYPE_OpenXML | ||
| 1735 | - z = zipfile.ZipFile(_file) | ||
| 1736 | - #TODO: check if this is actually an OpenXML file | ||
| 1737 | - #TODO: if the zip file is encrypted, suggest to use the -z option, or try '-z infected' automatically | ||
| 1738 | - # check each file within the zip if it is an OLE file, by reading its magic: | ||
| 1739 | - for subfile in z.namelist(): | ||
| 1740 | - magic = z.open(subfile).read(len(olefile.MAGIC)) | ||
| 1741 | - if magic == olefile.MAGIC: | ||
| 1742 | - logging.debug('Opening OLE file %s within zip' % subfile) | ||
| 1743 | - ole_data = z.open(subfile).read() | ||
| 1744 | - try: | ||
| 1745 | - self.ole_subfiles.append(VBA_Parser(filename=subfile, data=ole_data)) | ||
| 1746 | - except: | ||
| 1747 | - logging.debug('%s is not a valid OLE file' % subfile) | ||
| 1748 | - continue | ||
| 1749 | - z.close() | 1725 | + # Zip file, which may be an OpenXML document |
| 1726 | + self.open_openxml(_file) | ||
| 1750 | else: | 1727 | else: |
| 1751 | # read file from disk, check if it is a Word 2003 XML file (WordProcessingML), Excel 2003 XML, | 1728 | # read file from disk, check if it is a Word 2003 XML file (WordProcessingML), Excel 2003 XML, |
| 1752 | # or a plain text file containing VBA code | 1729 | # or a plain text file containing VBA code |
| @@ -1754,38 +1731,9 @@ class VBA_Parser(object): | @@ -1754,38 +1731,9 @@ class VBA_Parser(object): | ||
| 1754 | data = open(filename, 'rb').read() | 1731 | data = open(filename, 'rb').read() |
| 1755 | # store a lowercase version for some tests: | 1732 | # store a lowercase version for some tests: |
| 1756 | data_lowercase = data.lower() | 1733 | data_lowercase = data.lower() |
| 1757 | - # TODO: move each format parser to a separate method | ||
| 1758 | # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace | 1734 | # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace |
| 1759 | if 'http://schemas.microsoft.com/office/word/2003/wordml' in data: | 1735 | if 'http://schemas.microsoft.com/office/word/2003/wordml' in data: |
| 1760 | - logging.info('Opening Word 2003 XML file %s' % self.filename) | ||
| 1761 | - try: | ||
| 1762 | - # parse the XML content | ||
| 1763 | - # TODO: handle XML parsing exceptions | ||
| 1764 | - et = ET.fromstring(data) | ||
| 1765 | - # set type only if parsing succeeds | ||
| 1766 | - self.type = TYPE_Word2003_XML | ||
| 1767 | - # find all the binData elements: | ||
| 1768 | - for bindata in et.getiterator(TAG_BINDATA): | ||
| 1769 | - # the binData content is an OLE container for the VBA project, compressed | ||
| 1770 | - # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. | ||
| 1771 | - # get the filename: | ||
| 1772 | - fname = bindata.get(ATTR_NAME, 'noname.mso') | ||
| 1773 | - # decode the base64 activemime | ||
| 1774 | - mso_data = binascii.a2b_base64(bindata.text) | ||
| 1775 | - if is_mso_file(mso_data): | ||
| 1776 | - # decompress the zlib data stored in the MSO file, which is the OLE container: | ||
| 1777 | - # TODO: handle different offsets => separate function | ||
| 1778 | - ole_data = mso_file_extract(mso_data) | ||
| 1779 | - try: | ||
| 1780 | - self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data)) | ||
| 1781 | - except: | ||
| 1782 | - logging.error('%s does not contain a valid OLE file' % fname) | ||
| 1783 | - else: | ||
| 1784 | - logging.error('%s is not a valid MSO file' % fname) | ||
| 1785 | - except: | ||
| 1786 | - # TODO: differentiate exceptions for each parsing stage | ||
| 1787 | - logging.exception('Failed XML parsing for file %r' % self.filename) | ||
| 1788 | - pass | 1736 | + self.open_word2003xml(data) |
| 1789 | # check if it is a MHT file (MIME HTML, Word or Excel saved as "Single File Web Page"): | 1737 | # check if it is a MHT file (MIME HTML, Word or Excel saved as "Single File Web Page"): |
| 1790 | # According to my tests, these files usually start with "MIME-Version: 1.0" on the 1st line | 1738 | # According to my tests, these files usually start with "MIME-Version: 1.0" on the 1st line |
| 1791 | # BUT Word accepts a blank line or other MIME headers inserted before, | 1739 | # BUT Word accepts a blank line or other MIME headers inserted before, |
| @@ -1793,44 +1741,7 @@ class VBA_Parser(object): | @@ -1793,44 +1741,7 @@ class VBA_Parser(object): | ||
| 1793 | # And the line is case insensitive. | 1741 | # And the line is case insensitive. |
| 1794 | # so we'll just check the presence of mime, version and multipart anywhere: | 1742 | # so we'll just check the presence of mime, version and multipart anywhere: |
| 1795 | if self.type is None and 'mime' in data_lowercase and 'version' in data_lowercase and 'multipart' in data_lowercase: | 1743 | if self.type is None and 'mime' in data_lowercase and 'version' in data_lowercase and 'multipart' in data_lowercase: |
| 1796 | - logging.info('Opening MHTML file %s' % self.filename) | ||
| 1797 | - try: | ||
| 1798 | - # parse the MIME content | ||
| 1799 | - # remove any leading whitespace or newline (workaround for issue in email package) | ||
| 1800 | - stripped_data = data.lstrip('\r\n\t ') | ||
| 1801 | - mhtml = email.message_from_string(stripped_data) | ||
| 1802 | - self.type = TYPE_MHTML | ||
| 1803 | - # find all the attached files: | ||
| 1804 | - for part in mhtml.walk(): | ||
| 1805 | - content_type = part.get_content_type() # always returns a value | ||
| 1806 | - fname = part.get_filename(None) # returns None if it fails | ||
| 1807 | - # TODO: get content-location if no filename | ||
| 1808 | - logging.debug('MHTML part: filename=%r, content-type=%r' % (fname, content_type)) | ||
| 1809 | - part_data = part.get_payload(decode=True) | ||
| 1810 | - # VBA macros are stored in a binary file named "editdata.mso". | ||
| 1811 | - # the data content is an OLE container for the VBA project, compressed | ||
| 1812 | - # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. | ||
| 1813 | - # decompress the zlib data starting at offset 0x32, which is the OLE container: | ||
| 1814 | - # check ActiveMime header: | ||
| 1815 | - if isinstance(part_data, str) and is_mso_file(part_data): | ||
| 1816 | - logging.debug('Found ActiveMime header, decompressing MSO container') | ||
| 1817 | - try: | ||
| 1818 | - ole_data = mso_file_extract(part_data) | ||
| 1819 | - try: | ||
| 1820 | - # TODO: check if it is actually an OLE file | ||
| 1821 | - # TODO: get the MSO filename from content_location? | ||
| 1822 | - self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data)) | ||
| 1823 | - except: | ||
| 1824 | - logging.debug('%s does not contain a valid OLE file' % fname) | ||
| 1825 | - except: | ||
| 1826 | - logging.exception('Failed decompressing an MSO container in %r - %s' | ||
| 1827 | - % (fname, MSG_OLEVBA_ISSUES)) | ||
| 1828 | - # TODO: bug here - need to split in smaller functions/classes? | ||
| 1829 | - except: | ||
| 1830 | - logging.exception('Failed MIME parsing for file %r - %s' | ||
| 1831 | - % (self.filename, MSG_OLEVBA_ISSUES)) | ||
| 1832 | - pass | ||
| 1833 | - | 1744 | + self.open_mht(data) |
| 1834 | #TODO: handle exceptions | 1745 | #TODO: handle exceptions |
| 1835 | #TODO: Excel 2003 XML | 1746 | #TODO: Excel 2003 XML |
| 1836 | #TODO: plain text VBA file | 1747 | #TODO: plain text VBA file |
| @@ -1839,6 +1750,141 @@ class VBA_Parser(object): | @@ -1839,6 +1750,141 @@ class VBA_Parser(object): | ||
| 1839 | logging.error(msg) | 1750 | logging.error(msg) |
| 1840 | raise TypeError(msg) | 1751 | raise TypeError(msg) |
| 1841 | 1752 | ||
| 1753 | + def open_ole(self, _file): | ||
| 1754 | + """ | ||
| 1755 | + Open an OLE file | ||
| 1756 | + :param _file: filename or file contents in a file object | ||
| 1757 | + :return: nothing | ||
| 1758 | + """ | ||
| 1759 | + logging.info('Opening OLE file %s' % self.filename) | ||
| 1760 | + try: | ||
| 1761 | + # Open and parse the OLE file, using unicode for path names: | ||
| 1762 | + self.ole_file = olefile.OleFileIO(_file, path_encoding=None) | ||
| 1763 | + # TODO: raise TypeError if this is a Powerpoint 97 file, since VBA macros cannot be detected yet | ||
| 1764 | + # set type only if parsing succeeds | ||
| 1765 | + self.type = TYPE_OLE | ||
| 1766 | + except: | ||
| 1767 | + # TODO: handle OLE parsing exceptions | ||
| 1768 | + logging.exception('Failed OLE parsing for file %r' % self.filename) | ||
| 1769 | + pass | ||
| 1770 | + | ||
| 1771 | + | ||
| 1772 | + def open_openxml(self, _file): | ||
| 1773 | + """ | ||
| 1774 | + Open an OpenXML file | ||
| 1775 | + :param _file: filename or file contents in a file object | ||
| 1776 | + :return: nothing | ||
| 1777 | + """ | ||
| 1778 | + # This looks like a zip file, need to look for vbaProject.bin inside | ||
| 1779 | + # It can be any OLE file inside the archive | ||
| 1780 | + #...because vbaProject.bin can be renamed: | ||
| 1781 | + # see http://www.decalage.info/files/JCV07_Lagadec_OpenDocument_OpenXML_v4_decalage.pdf#page=18 | ||
| 1782 | + logging.info('Opening ZIP/OpenXML file %s' % self.filename) | ||
| 1783 | + try: | ||
| 1784 | + z = zipfile.ZipFile(_file) | ||
| 1785 | + #TODO: check if this is actually an OpenXML file | ||
| 1786 | + #TODO: if the zip file is encrypted, suggest to use the -z option, or try '-z infected' automatically | ||
| 1787 | + # check each file within the zip if it is an OLE file, by reading its magic: | ||
| 1788 | + for subfile in z.namelist(): | ||
| 1789 | + magic = z.open(subfile).read(len(olefile.MAGIC)) | ||
| 1790 | + if magic == olefile.MAGIC: | ||
| 1791 | + logging.debug('Opening OLE file %s within zip' % subfile) | ||
| 1792 | + ole_data = z.open(subfile).read() | ||
| 1793 | + try: | ||
| 1794 | + self.ole_subfiles.append(VBA_Parser(filename=subfile, data=ole_data)) | ||
| 1795 | + except: | ||
| 1796 | + logging.debug('%s is not a valid OLE file' % subfile) | ||
| 1797 | + continue | ||
| 1798 | + z.close() | ||
| 1799 | + # set type only if parsing succeeds | ||
| 1800 | + self.type = TYPE_OpenXML | ||
| 1801 | + except: | ||
| 1802 | + # TODO: handle parsing exceptions | ||
| 1803 | + logging.exception('Failed Zip/OpenXML parsing for file %r' % self.filename) | ||
| 1804 | + pass | ||
| 1805 | + | ||
| 1806 | + def open_word2003xml(self, data): | ||
| 1807 | + """ | ||
| 1808 | + Open a Word 2003 XML file | ||
| 1809 | + :param data: file contents in a string or bytes | ||
| 1810 | + :return: nothing | ||
| 1811 | + """ | ||
| 1812 | + logging.info('Opening Word 2003 XML file %s' % self.filename) | ||
| 1813 | + try: | ||
| 1814 | + # parse the XML content | ||
| 1815 | + # TODO: handle XML parsing exceptions | ||
| 1816 | + et = ET.fromstring(data) | ||
| 1817 | + # find all the binData elements: | ||
| 1818 | + for bindata in et.getiterator(TAG_BINDATA): | ||
| 1819 | + # the binData content is an OLE container for the VBA project, compressed | ||
| 1820 | + # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. | ||
| 1821 | + # get the filename: | ||
| 1822 | + fname = bindata.get(ATTR_NAME, 'noname.mso') | ||
| 1823 | + # decode the base64 activemime | ||
| 1824 | + mso_data = binascii.a2b_base64(bindata.text) | ||
| 1825 | + if is_mso_file(mso_data): | ||
| 1826 | + # decompress the zlib data stored in the MSO file, which is the OLE container: | ||
| 1827 | + # TODO: handle different offsets => separate function | ||
| 1828 | + ole_data = mso_file_extract(mso_data) | ||
| 1829 | + try: | ||
| 1830 | + self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data)) | ||
| 1831 | + except: | ||
| 1832 | + logging.error('%s does not contain a valid OLE file' % fname) | ||
| 1833 | + else: | ||
| 1834 | + logging.error('%s is not a valid MSO file' % fname) | ||
| 1835 | + # set type only if parsing succeeds | ||
| 1836 | + self.type = TYPE_Word2003_XML | ||
| 1837 | + except: | ||
| 1838 | + # TODO: differentiate exceptions for each parsing stage | ||
| 1839 | + logging.exception('Failed XML parsing for file %r' % self.filename) | ||
| 1840 | + pass | ||
| 1841 | + | ||
| 1842 | + def open_mht(self, data): | ||
| 1843 | + """ | ||
| 1844 | + Open a MHTML file | ||
| 1845 | + :param data: file contents in a string or bytes | ||
| 1846 | + :return: nothing | ||
| 1847 | + """ | ||
| 1848 | + logging.info('Opening MHTML file %s' % self.filename) | ||
| 1849 | + try: | ||
| 1850 | + # parse the MIME content | ||
| 1851 | + # remove any leading whitespace or newline (workaround for issue in email package) | ||
| 1852 | + stripped_data = data.lstrip('\r\n\t ') | ||
| 1853 | + mhtml = email.message_from_string(stripped_data) | ||
| 1854 | + # find all the attached files: | ||
| 1855 | + for part in mhtml.walk(): | ||
| 1856 | + content_type = part.get_content_type() # always returns a value | ||
| 1857 | + fname = part.get_filename(None) # returns None if it fails | ||
| 1858 | + # TODO: get content-location if no filename | ||
| 1859 | + logging.debug('MHTML part: filename=%r, content-type=%r' % (fname, content_type)) | ||
| 1860 | + part_data = part.get_payload(decode=True) | ||
| 1861 | + # VBA macros are stored in a binary file named "editdata.mso". | ||
| 1862 | + # the data content is an OLE container for the VBA project, compressed | ||
| 1863 | + # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. | ||
| 1864 | + # decompress the zlib data starting at offset 0x32, which is the OLE container: | ||
| 1865 | + # check ActiveMime header: | ||
| 1866 | + if isinstance(part_data, str) and is_mso_file(part_data): | ||
| 1867 | + logging.debug('Found ActiveMime header, decompressing MSO container') | ||
| 1868 | + try: | ||
| 1869 | + ole_data = mso_file_extract(part_data) | ||
| 1870 | + try: | ||
| 1871 | + # TODO: check if it is actually an OLE file | ||
| 1872 | + # TODO: get the MSO filename from content_location? | ||
| 1873 | + self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data)) | ||
| 1874 | + except: | ||
| 1875 | + logging.debug('%s does not contain a valid OLE file' % fname) | ||
| 1876 | + except: | ||
| 1877 | + logging.exception('Failed decompressing an MSO container in %r - %s' | ||
| 1878 | + % (fname, MSG_OLEVBA_ISSUES)) | ||
| 1879 | + # TODO: bug here - need to split in smaller functions/classes? | ||
| 1880 | + # set type only if parsing succeeds | ||
| 1881 | + self.type = TYPE_MHTML | ||
| 1882 | + except: | ||
| 1883 | + logging.exception('Failed MIME parsing for file %r - %s' | ||
| 1884 | + % (self.filename, MSG_OLEVBA_ISSUES)) | ||
| 1885 | + pass | ||
| 1886 | + | ||
| 1887 | + | ||
| 1842 | def find_vba_projects(self): | 1888 | def find_vba_projects(self): |
| 1843 | """ | 1889 | """ |
| 1844 | Finds all the VBA projects stored in an OLE file. | 1890 | Finds all the VBA projects stored in an OLE file. |