Commit 7e150ed3635bbf18437aa16e04a6bc78d63783fd
1 parent
c1d26ba7
oletools v0.04: Fixed bug in rtfobj, added documentation for rtfobj
Showing
3 changed files
with
65 additions
and
6 deletions
README.md
| ... | ... | @@ -14,11 +14,13 @@ Tools in python-oletools: |
| 14 | 14 | - **pyxswf**: a tool to detect, extract and analyze Flash objects (SWF) that may |
| 15 | 15 | be embedded in files such as MS Office documents (e.g. Word, Excel) and RTF, |
| 16 | 16 | which is especially useful for malware analysis. |
| 17 | +- **rtfobj**: a tool and python module to extract embedded objects from RTF files. | |
| 17 | 18 | - and a few others (coming soon) |
| 18 | 19 | |
| 19 | 20 | News |
| 20 | 21 | ---- |
| 21 | 22 | |
| 23 | +- 2013-04-18 v0.04: fixed bug in rtfobj, added documentation for rtfobj | |
| 22 | 24 | - 2012-11-09 v0.03: Improved pyxswf to extract Flash objects from RTF |
| 23 | 25 | - 2012-10-29 v0.02: Added oleid |
| 24 | 26 | - 2012-10-09 v0.01: Initial version of olebrowse and pyxswf |
| ... | ... | @@ -137,6 +139,26 @@ Example 2 - detecting and extracting a SWF file from a RTF document on Windows: |
| 137 | 139 | For more info, see [http://www.decalage.info/python/pyxswf](http://www.decalage.info/python/pyxswf) |
| 138 | 140 | |
| 139 | 141 | |
| 142 | +rtfobj | |
| 143 | +------ | |
| 144 | + | |
| 145 | +rtfobj is a Python module to extract embedded objects from RTF files, such as | |
| 146 | +OLE ojects. It can be used as a Python library or a command-line tool. | |
| 147 | + | |
| 148 | + Usage: rtfobj.py <file.rtf> | |
| 149 | + | |
| 150 | +It extracts and decodes all the data blocks encoded as hexadecimal in the RTF document, and saves them as files named "object_xxxx.bin", xxxx being the location of the object in the RTF file. | |
| 151 | + | |
| 152 | +Usage as python module: rtf_iter_objects(filename) is an iterator which yields a tuple (index, object) providing the index of each hexadecimal stream in the RTF file, and the corresponding decoded object. Example: | |
| 153 | + | |
| 154 | + import rtfobj | |
| 155 | + for index, data in rtfobj.rtf_iter_objects("myfile.rtf"): | |
| 156 | + print 'found object size %d at index %08X' % (len(data), index) | |
| 157 | + | |
| 158 | + | |
| 159 | +For more info, see [http://www.decalage.info/python/rtfobj](http://www.decalage.info/python/rtfobj) | |
| 160 | + | |
| 161 | + | |
| 140 | 162 | How to contribute: |
| 141 | 163 | ------------------ |
| 142 | 164 | |
| ... | ... | @@ -154,7 +176,7 @@ License |
| 154 | 176 | |
| 155 | 177 | This license applies to the python-oletools package, apart from the thirdparty folder which contains third-party files published with their own license. |
| 156 | 178 | |
| 157 | -The python-oletools package is copyright (c) 2012, Philippe Lagadec (http://www.decalage.info) | |
| 179 | +The python-oletools package is copyright (c) 2012-2013, Philippe Lagadec (http://www.decalage.info) | |
| 158 | 180 | All rights reserved. |
| 159 | 181 | |
| 160 | 182 | Redistribution and use in source and binary forms, with or without modification, | ... | ... |
oletools/README.txt
| ... | ... | @@ -26,11 +26,14 @@ Tools in python-oletools: |
| 26 | 26 | - **pyxswf**: a tool to detect, extract and analyze Flash objects (SWF) |
| 27 | 27 | that may be embedded in files such as MS Office documents (e.g. Word, |
| 28 | 28 | Excel) and RTF, which is especially useful for malware analysis. |
| 29 | +- **rtfobj**: a tool and python module to extract embedded objects from | |
| 30 | + RTF files. | |
| 29 | 31 | - and a few others (coming soon) |
| 30 | 32 | |
| 31 | 33 | News |
| 32 | 34 | ---- |
| 33 | 35 | |
| 36 | +- 2013-04-18 v0.04: fixed bug in rtfobj, added documentation for rtfobj | |
| 34 | 37 | - 2012-11-09 v0.03: Improved pyxswf to extract Flash objects from RTF |
| 35 | 38 | - 2012-10-29 v0.02: Added oleid |
| 36 | 39 | - 2012-10-09 v0.01: Initial version of olebrowse and pyxswf |
| ... | ... | @@ -174,6 +177,35 @@ Windows: |
| 174 | 177 | For more info, see |
| 175 | 178 | `http://www.decalage.info/python/pyxswf <http://www.decalage.info/python/pyxswf>`_ |
| 176 | 179 | |
| 180 | +rtfobj | |
| 181 | +------ | |
| 182 | + | |
| 183 | +rtfobj is a Python module to extract embedded objects from RTF files, | |
| 184 | +such as OLE ojects. It can be used as a Python library or a command-line | |
| 185 | +tool. | |
| 186 | + | |
| 187 | +:: | |
| 188 | + | |
| 189 | + Usage: rtfobj.py <file.rtf> | |
| 190 | + | |
| 191 | +It extracts and decodes all the data blocks encoded as hexadecimal in | |
| 192 | +the RTF document, and saves them as files named "object\_xxxx.bin", xxxx | |
| 193 | +being the location of the object in the RTF file. | |
| 194 | + | |
| 195 | +Usage as python module: rtf\_iter\_objects(filename) is an iterator | |
| 196 | +which yields a tuple (index, object) providing the index of each | |
| 197 | +hexadecimal stream in the RTF file, and the corresponding decoded | |
| 198 | +object. Example: | |
| 199 | + | |
| 200 | +:: | |
| 201 | + | |
| 202 | + import rtfobj | |
| 203 | + for index, data in rtfobj.rtf_iter_objects("myfile.rtf"): | |
| 204 | + print 'found object size %d at index %08X' % (len(data), index) | |
| 205 | + | |
| 206 | +For more info, see | |
| 207 | +`http://www.decalage.info/python/rtfobj <http://www.decalage.info/python/rtfobj>`_ | |
| 208 | + | |
| 177 | 209 | How to contribute: |
| 178 | 210 | ------------------ |
| 179 | 211 | |
| ... | ... | @@ -199,7 +231,7 @@ This license applies to the python-oletools package, apart from the |
| 199 | 231 | thirdparty folder which contains third-party files published with their |
| 200 | 232 | own license. |
| 201 | 233 | |
| 202 | -The python-oletools package is copyright (c) 2012, Philippe Lagadec | |
| 234 | +The python-oletools package is copyright (c) 2012-2013, Philippe Lagadec | |
| 203 | 235 | (http://www.decalage.info) All rights reserved. |
| 204 | 236 | |
| 205 | 237 | Redistribution and use in source and binary forms, with or without | ... | ... |
oletools/rtfobj.py
| 1 | 1 | #!/usr/bin/env python |
| 2 | 2 | """ |
| 3 | -rtfobj.py - Philippe Lagadec 2012-11-09 | |
| 3 | +rtfobj.py - Philippe Lagadec 2013-04-02 | |
| 4 | 4 | |
| 5 | 5 | rtfobj is a Python module to extract embedded objects from RTF files, such as |
| 6 | 6 | OLE ojects. It can be used as a Python library or a command-line tool. |
| ... | ... | @@ -12,7 +12,7 @@ rtfobj project website: http://www.decalage.info/python/rtfobj |
| 12 | 12 | rtfobj is part of the python-oletools package: |
| 13 | 13 | http://www.decalage.info/python/oletools |
| 14 | 14 | |
| 15 | -rtfobj is copyright (c) 2012, Philippe Lagadec (http://www.decalage.info) | |
| 15 | +rtfobj is copyright (c) 2012-2013, Philippe Lagadec (http://www.decalage.info) | |
| 16 | 16 | All rights reserved. |
| 17 | 17 | |
| 18 | 18 | Redistribution and use in source and binary forms, with or without modification, |
| ... | ... | @@ -36,15 +36,18 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 36 | 36 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 37 | 37 | """ |
| 38 | 38 | |
| 39 | -__version__ = '0.01' | |
| 39 | +__version__ = '0.02' | |
| 40 | 40 | |
| 41 | 41 | #------------------------------------------------------------------------------ |
| 42 | 42 | # CHANGELOG: |
| 43 | 43 | # 2012-11-09 v0.01 PL: - first version |
| 44 | +# 2013-04-02 v0.02 PL: - fixed bug in main | |
| 44 | 45 | |
| 45 | 46 | #------------------------------------------------------------------------------ |
| 46 | 47 | # TODO: |
| 47 | 48 | # - improve regex pattern for better performance? |
| 49 | +# - allow semicolon within hex, as found in this sample: | |
| 50 | +# http://contagiodump.blogspot.nl/2011/10/sep-28-cve-2010-3333-manuscript-with.html | |
| 48 | 51 | |
| 49 | 52 | import re, sys, string, binascii |
| 50 | 53 | |
| ... | ... | @@ -54,6 +57,8 @@ import re, sys, string, binascii |
| 54 | 57 | # several hex chars, at least 4: (?:[0-9A-Fa-f]{2}){4,} |
| 55 | 58 | # at least 4 hex chars, followed by whitespace or CR/LF: (?:[0-9A-Fa-f]{2}){4,}\s* |
| 56 | 59 | PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}' |
| 60 | +# improved pattern, allowing semicolons within hex: | |
| 61 | +#PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}' | |
| 57 | 62 | |
| 58 | 63 | # a dummy translation table for str.translate, which does not change anythying: |
| 59 | 64 | TRANSTABLE_NOCHANGE = string.maketrans('', '') |
| ... | ... | @@ -78,7 +83,7 @@ def rtf_iter_objects (filename, min_size=32): |
| 78 | 83 | yield m.start(), found |
| 79 | 84 | |
| 80 | 85 | if __name__ == '__main__': |
| 81 | - if len(sys.argv<2): | |
| 86 | + if len(sys.argv)<2: | |
| 82 | 87 | sys.exit(__doc__) |
| 83 | 88 | for index, data in rtf_iter_objects(sys.argv[1]): |
| 84 | 89 | print 'found object size %d at index %08X' % (len(data), index) | ... | ... |