Commit 7e150ed3635bbf18437aa16e04a6bc78d63783fd
1 parent
c1d26ba7
oletools v0.04: Fixed bug in rtfobj, added documentation for rtfobj
Showing
3 changed files
with
65 additions
and
6 deletions
README.md
| @@ -14,11 +14,13 @@ Tools in python-oletools: | @@ -14,11 +14,13 @@ Tools in python-oletools: | ||
| 14 | - **pyxswf**: a tool to detect, extract and analyze Flash objects (SWF) that may | 14 | - **pyxswf**: a tool to detect, extract and analyze Flash objects (SWF) that may |
| 15 | be embedded in files such as MS Office documents (e.g. Word, Excel) and RTF, | 15 | be embedded in files such as MS Office documents (e.g. Word, Excel) and RTF, |
| 16 | which is especially useful for malware analysis. | 16 | which is especially useful for malware analysis. |
| 17 | +- **rtfobj**: a tool and python module to extract embedded objects from RTF files. | ||
| 17 | - and a few others (coming soon) | 18 | - and a few others (coming soon) |
| 18 | 19 | ||
| 19 | News | 20 | News |
| 20 | ---- | 21 | ---- |
| 21 | 22 | ||
| 23 | +- 2013-04-18 v0.04: fixed bug in rtfobj, added documentation for rtfobj | ||
| 22 | - 2012-11-09 v0.03: Improved pyxswf to extract Flash objects from RTF | 24 | - 2012-11-09 v0.03: Improved pyxswf to extract Flash objects from RTF |
| 23 | - 2012-10-29 v0.02: Added oleid | 25 | - 2012-10-29 v0.02: Added oleid |
| 24 | - 2012-10-09 v0.01: Initial version of olebrowse and pyxswf | 26 | - 2012-10-09 v0.01: Initial version of olebrowse and pyxswf |
| @@ -137,6 +139,26 @@ Example 2 - detecting and extracting a SWF file from a RTF document on Windows: | @@ -137,6 +139,26 @@ Example 2 - detecting and extracting a SWF file from a RTF document on Windows: | ||
| 137 | For more info, see [http://www.decalage.info/python/pyxswf](http://www.decalage.info/python/pyxswf) | 139 | For more info, see [http://www.decalage.info/python/pyxswf](http://www.decalage.info/python/pyxswf) |
| 138 | 140 | ||
| 139 | 141 | ||
| 142 | +rtfobj | ||
| 143 | +------ | ||
| 144 | + | ||
| 145 | +rtfobj is a Python module to extract embedded objects from RTF files, such as | ||
| 146 | +OLE ojects. It can be used as a Python library or a command-line tool. | ||
| 147 | + | ||
| 148 | + Usage: rtfobj.py <file.rtf> | ||
| 149 | + | ||
| 150 | +It extracts and decodes all the data blocks encoded as hexadecimal in the RTF document, and saves them as files named "object_xxxx.bin", xxxx being the location of the object in the RTF file. | ||
| 151 | + | ||
| 152 | +Usage as python module: rtf_iter_objects(filename) is an iterator which yields a tuple (index, object) providing the index of each hexadecimal stream in the RTF file, and the corresponding decoded object. Example: | ||
| 153 | + | ||
| 154 | + import rtfobj | ||
| 155 | + for index, data in rtfobj.rtf_iter_objects("myfile.rtf"): | ||
| 156 | + print 'found object size %d at index %08X' % (len(data), index) | ||
| 157 | + | ||
| 158 | + | ||
| 159 | +For more info, see [http://www.decalage.info/python/rtfobj](http://www.decalage.info/python/rtfobj) | ||
| 160 | + | ||
| 161 | + | ||
| 140 | How to contribute: | 162 | How to contribute: |
| 141 | ------------------ | 163 | ------------------ |
| 142 | 164 | ||
| @@ -154,7 +176,7 @@ License | @@ -154,7 +176,7 @@ License | ||
| 154 | 176 | ||
| 155 | This license applies to the python-oletools package, apart from the thirdparty folder which contains third-party files published with their own license. | 177 | This license applies to the python-oletools package, apart from the thirdparty folder which contains third-party files published with their own license. |
| 156 | 178 | ||
| 157 | -The python-oletools package is copyright (c) 2012, Philippe Lagadec (http://www.decalage.info) | 179 | +The python-oletools package is copyright (c) 2012-2013, Philippe Lagadec (http://www.decalage.info) |
| 158 | All rights reserved. | 180 | All rights reserved. |
| 159 | 181 | ||
| 160 | Redistribution and use in source and binary forms, with or without modification, | 182 | Redistribution and use in source and binary forms, with or without modification, |
oletools/README.txt
| @@ -26,11 +26,14 @@ Tools in python-oletools: | @@ -26,11 +26,14 @@ Tools in python-oletools: | ||
| 26 | - **pyxswf**: a tool to detect, extract and analyze Flash objects (SWF) | 26 | - **pyxswf**: a tool to detect, extract and analyze Flash objects (SWF) |
| 27 | that may be embedded in files such as MS Office documents (e.g. Word, | 27 | that may be embedded in files such as MS Office documents (e.g. Word, |
| 28 | Excel) and RTF, which is especially useful for malware analysis. | 28 | Excel) and RTF, which is especially useful for malware analysis. |
| 29 | +- **rtfobj**: a tool and python module to extract embedded objects from | ||
| 30 | + RTF files. | ||
| 29 | - and a few others (coming soon) | 31 | - and a few others (coming soon) |
| 30 | 32 | ||
| 31 | News | 33 | News |
| 32 | ---- | 34 | ---- |
| 33 | 35 | ||
| 36 | +- 2013-04-18 v0.04: fixed bug in rtfobj, added documentation for rtfobj | ||
| 34 | - 2012-11-09 v0.03: Improved pyxswf to extract Flash objects from RTF | 37 | - 2012-11-09 v0.03: Improved pyxswf to extract Flash objects from RTF |
| 35 | - 2012-10-29 v0.02: Added oleid | 38 | - 2012-10-29 v0.02: Added oleid |
| 36 | - 2012-10-09 v0.01: Initial version of olebrowse and pyxswf | 39 | - 2012-10-09 v0.01: Initial version of olebrowse and pyxswf |
| @@ -174,6 +177,35 @@ Windows: | @@ -174,6 +177,35 @@ Windows: | ||
| 174 | For more info, see | 177 | For more info, see |
| 175 | `http://www.decalage.info/python/pyxswf <http://www.decalage.info/python/pyxswf>`_ | 178 | `http://www.decalage.info/python/pyxswf <http://www.decalage.info/python/pyxswf>`_ |
| 176 | 179 | ||
| 180 | +rtfobj | ||
| 181 | +------ | ||
| 182 | + | ||
| 183 | +rtfobj is a Python module to extract embedded objects from RTF files, | ||
| 184 | +such as OLE ojects. It can be used as a Python library or a command-line | ||
| 185 | +tool. | ||
| 186 | + | ||
| 187 | +:: | ||
| 188 | + | ||
| 189 | + Usage: rtfobj.py <file.rtf> | ||
| 190 | + | ||
| 191 | +It extracts and decodes all the data blocks encoded as hexadecimal in | ||
| 192 | +the RTF document, and saves them as files named "object\_xxxx.bin", xxxx | ||
| 193 | +being the location of the object in the RTF file. | ||
| 194 | + | ||
| 195 | +Usage as python module: rtf\_iter\_objects(filename) is an iterator | ||
| 196 | +which yields a tuple (index, object) providing the index of each | ||
| 197 | +hexadecimal stream in the RTF file, and the corresponding decoded | ||
| 198 | +object. Example: | ||
| 199 | + | ||
| 200 | +:: | ||
| 201 | + | ||
| 202 | + import rtfobj | ||
| 203 | + for index, data in rtfobj.rtf_iter_objects("myfile.rtf"): | ||
| 204 | + print 'found object size %d at index %08X' % (len(data), index) | ||
| 205 | + | ||
| 206 | +For more info, see | ||
| 207 | +`http://www.decalage.info/python/rtfobj <http://www.decalage.info/python/rtfobj>`_ | ||
| 208 | + | ||
| 177 | How to contribute: | 209 | How to contribute: |
| 178 | ------------------ | 210 | ------------------ |
| 179 | 211 | ||
| @@ -199,7 +231,7 @@ This license applies to the python-oletools package, apart from the | @@ -199,7 +231,7 @@ This license applies to the python-oletools package, apart from the | ||
| 199 | thirdparty folder which contains third-party files published with their | 231 | thirdparty folder which contains third-party files published with their |
| 200 | own license. | 232 | own license. |
| 201 | 233 | ||
| 202 | -The python-oletools package is copyright (c) 2012, Philippe Lagadec | 234 | +The python-oletools package is copyright (c) 2012-2013, Philippe Lagadec |
| 203 | (http://www.decalage.info) All rights reserved. | 235 | (http://www.decalage.info) All rights reserved. |
| 204 | 236 | ||
| 205 | Redistribution and use in source and binary forms, with or without | 237 | Redistribution and use in source and binary forms, with or without |
oletools/rtfobj.py
| 1 | #!/usr/bin/env python | 1 | #!/usr/bin/env python |
| 2 | """ | 2 | """ |
| 3 | -rtfobj.py - Philippe Lagadec 2012-11-09 | 3 | +rtfobj.py - Philippe Lagadec 2013-04-02 |
| 4 | 4 | ||
| 5 | rtfobj is a Python module to extract embedded objects from RTF files, such as | 5 | rtfobj is a Python module to extract embedded objects from RTF files, such as |
| 6 | OLE ojects. It can be used as a Python library or a command-line tool. | 6 | OLE ojects. It can be used as a Python library or a command-line tool. |
| @@ -12,7 +12,7 @@ rtfobj project website: http://www.decalage.info/python/rtfobj | @@ -12,7 +12,7 @@ rtfobj project website: http://www.decalage.info/python/rtfobj | ||
| 12 | rtfobj is part of the python-oletools package: | 12 | rtfobj is part of the python-oletools package: |
| 13 | http://www.decalage.info/python/oletools | 13 | http://www.decalage.info/python/oletools |
| 14 | 14 | ||
| 15 | -rtfobj is copyright (c) 2012, Philippe Lagadec (http://www.decalage.info) | 15 | +rtfobj is copyright (c) 2012-2013, Philippe Lagadec (http://www.decalage.info) |
| 16 | All rights reserved. | 16 | All rights reserved. |
| 17 | 17 | ||
| 18 | Redistribution and use in source and binary forms, with or without modification, | 18 | Redistribution and use in source and binary forms, with or without modification, |
| @@ -36,15 +36,18 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | @@ -36,15 +36,18 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 36 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 36 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 37 | """ | 37 | """ |
| 38 | 38 | ||
| 39 | -__version__ = '0.01' | 39 | +__version__ = '0.02' |
| 40 | 40 | ||
| 41 | #------------------------------------------------------------------------------ | 41 | #------------------------------------------------------------------------------ |
| 42 | # CHANGELOG: | 42 | # CHANGELOG: |
| 43 | # 2012-11-09 v0.01 PL: - first version | 43 | # 2012-11-09 v0.01 PL: - first version |
| 44 | +# 2013-04-02 v0.02 PL: - fixed bug in main | ||
| 44 | 45 | ||
| 45 | #------------------------------------------------------------------------------ | 46 | #------------------------------------------------------------------------------ |
| 46 | # TODO: | 47 | # TODO: |
| 47 | # - improve regex pattern for better performance? | 48 | # - improve regex pattern for better performance? |
| 49 | +# - allow semicolon within hex, as found in this sample: | ||
| 50 | +# http://contagiodump.blogspot.nl/2011/10/sep-28-cve-2010-3333-manuscript-with.html | ||
| 48 | 51 | ||
| 49 | import re, sys, string, binascii | 52 | import re, sys, string, binascii |
| 50 | 53 | ||
| @@ -54,6 +57,8 @@ import re, sys, string, binascii | @@ -54,6 +57,8 @@ import re, sys, string, binascii | ||
| 54 | # several hex chars, at least 4: (?:[0-9A-Fa-f]{2}){4,} | 57 | # several hex chars, at least 4: (?:[0-9A-Fa-f]{2}){4,} |
| 55 | # at least 4 hex chars, followed by whitespace or CR/LF: (?:[0-9A-Fa-f]{2}){4,}\s* | 58 | # at least 4 hex chars, followed by whitespace or CR/LF: (?:[0-9A-Fa-f]{2}){4,}\s* |
| 56 | PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}' | 59 | PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}' |
| 60 | +# improved pattern, allowing semicolons within hex: | ||
| 61 | +#PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}' | ||
| 57 | 62 | ||
| 58 | # a dummy translation table for str.translate, which does not change anythying: | 63 | # a dummy translation table for str.translate, which does not change anythying: |
| 59 | TRANSTABLE_NOCHANGE = string.maketrans('', '') | 64 | TRANSTABLE_NOCHANGE = string.maketrans('', '') |
| @@ -78,7 +83,7 @@ def rtf_iter_objects (filename, min_size=32): | @@ -78,7 +83,7 @@ def rtf_iter_objects (filename, min_size=32): | ||
| 78 | yield m.start(), found | 83 | yield m.start(), found |
| 79 | 84 | ||
| 80 | if __name__ == '__main__': | 85 | if __name__ == '__main__': |
| 81 | - if len(sys.argv<2): | 86 | + if len(sys.argv)<2: |
| 82 | sys.exit(__doc__) | 87 | sys.exit(__doc__) |
| 83 | for index, data in rtf_iter_objects(sys.argv[1]): | 88 | for index, data in rtf_iter_objects(sys.argv[1]): |
| 84 | print 'found object size %d at index %08X' % (len(data), index) | 89 | print 'found object size %d at index %08X' % (len(data), index) |