diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..2c621c8
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,9 @@
+language: python
+
+python:
+ - "2.7"
+ - "3.6"
+ - "nightly"
+cache: pip
+script:
+ - python setup.py test
diff --git a/README.md b/README.md
index 9cbe39b..7f3664c 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,7 @@
python-oletools
===============
+[](https://pypi.python.org/pypi/oletools)
+[](https://travis-ci.org/decalage2/oletools)
[oletools](http://www.decalage.info/python/oletools) is a package of python tools to analyze
[Microsoft OLE2 files](http://en.wikipedia.org/wiki/Compound_File_Binary_Format)
@@ -22,7 +24,17 @@ Note: python-oletools is not related to OLETools published by BeCubed Software.
News
----
-- **2016-11-01 v0.50**: all oletools now support python 2 and 3.
+- **2017-06-29 v0.51**:
+ - added the [oletools cheatsheet](https://github.com/decalage2/oletools/blob/master/cheatsheet/oletools_cheatsheet.pdf)
+ - improved [rtfobj](https://github.com/decalage2/oletools/wiki/rtfobj) to handle malformed RTF files, detect vulnerability CVE-2017-0199
+ - olevba: improved deobfuscation and Mac files support
+ - [mraptor](https://github.com/decalage2/oletools/wiki/mraptor): added more ActiveX macro triggers
+ - added [DocVarDump.vba](https://github.com/decalage2/oletools/blob/master/oletools/DocVarDump.vba) to dump document variables using Word
+ - olemap: can now detect and extract [extra data at end of file](http://decalage.info/en/ole_extradata), improved display
+ - oledir, olemeta, oletimes: added support for zip files and wildcards
+ - many [bugfixes](https://github.com/decalage2/oletools/milestone/3?closed=1) in all the tools
+ - improved Python 2+3 support
+- 2016-11-01 v0.50: all oletools now support python 2 and 3.
- olevba: several bugfixes and improvements.
- mraptor: improved detection, added mraptor_milter for Sendmail/Postfix integration.
- rtfobj: brand new RTF parser, obfuscation-aware, improved display, detect
@@ -33,13 +45,6 @@ improved handling of malformed/incomplete documents, improved error handling and
now returns an exit code based on analysis results, new --relaxed option.
[rtfobj](https://github.com/decalage2/oletools/wiki/rtfobj): improved parsing to handle obfuscated RTF documents,
added -d option to set output dir. Moved repository and documentation to GitHub.
-- 2016-04-19 v0.46: [olevba](https://github.com/decalage2/oletools/wiki/olevba)
-does not deobfuscate VBA expressions by default (much faster), new option --deobf
-to enable it. Fixed color display bug on Windows for several tools.
-- 2016-04-12 v0.45: improved [rtfobj](https://github.com/decalage2/oletools/wiki/rtfobj)
-to handle several [anti-analysis tricks](http://www.decalage.info/rtf_tricks),
-improved [olevba](https://github.com/decalage2/oletools/wiki/olevba)
-to export results in JSON format.
See the [full changelog](https://github.com/decalage2/oletools/wiki/Changelog) for more information.
@@ -67,6 +72,7 @@ Projects using oletools:
oletools are used by a number of projects and online malware analysis services,
including [Viper](http://viper.li/), [REMnux](https://remnux.org/),
+[FAME](https://certsocietegenerale.github.io/fame/),
[Hybrid-analysis.com](https://www.hybrid-analysis.com/),
[Joe Sandbox](https://www.document-analyzer.net/),
[Deepviz](https://sandbox.deepviz.com/),
@@ -129,7 +135,7 @@ License
This license applies to the python-oletools package, apart from the thirdparty folder which contains third-party files
published with their own license.
-The python-oletools package is copyright (c) 2012-2016 Philippe Lagadec (http://www.decalage.info)
+The python-oletools package is copyright (c) 2012-2017 Philippe Lagadec (http://www.decalage.info)
All rights reserved.
diff --git a/cheatsheet/oletools_cheatsheet.docx b/cheatsheet/oletools_cheatsheet.docx
new file mode 100644
index 0000000..f8ee793
--- /dev/null
+++ b/cheatsheet/oletools_cheatsheet.docx
diff --git a/cheatsheet/oletools_cheatsheet.pdf b/cheatsheet/oletools_cheatsheet.pdf
new file mode 100644
index 0000000..d91fd32
--- /dev/null
+++ b/cheatsheet/oletools_cheatsheet.pdf
diff --git a/oletools/DocVarDump.vba b/oletools/DocVarDump.vba
new file mode 100644
index 0000000..70b2137
--- /dev/null
+++ b/oletools/DocVarDump.vba
@@ -0,0 +1,117 @@
+' DocVarDump.vba
+'
+' DocVarDump is a VBA macro that can be used to dump the content of all document
+' variables stored in a MS Word document.
+'
+' USAGE:
+' 1. Open the document to be analyzed in MS Word
+' 2. Do NOT click on "Enable Content", to avoid running malicious macros
+' 3. Save the document with a new name, using the DOCX format (not doc, not docm)
+' This will remove all VBA macro code.
+' 4. Close the file, and reopen the DOCX file you just saved
+' 5. Press Alt+F11 to open the VBA Editor
+' 6. Double-click on "This Document" under Project
+' 7. Copy and Paste all the code from DocVarDump.vba
+' 8. Move the cursor on the line "Sub DocVarDump()"
+' 9. Press F5: This should run the code, and create a file "docvardump.txt"
+' containing a hex dump of all document variables.
+'
+' ALTERNATIVE: Open the document in LibreOffice/OpenOffice,
+' then go to File / Properties / Custom Properties
+'
+' Author: Philippe Lagadec - http://www.decalage.info
+' License: BSD, see source code or documentation
+'
+' DocVarDump is part of the python-oletools package:
+' http://www.decalage.info/python/oletools
+
+' CHANGELOG:
+' 2016-09-21 v0.01 PL: - First working version
+' 2017-04-10 v0.02 PL: - Added usage instructions
+
+Sub DocVarDump()
+ intFileNum = FreeFile
+ FName = Environ("TEMP") & "\docvardump.txt"
+ Open FName For Output As intFileNum
+ For Each myvar In ActiveDocument.Variables
+ Write #intFileNum, "Name = " & myvar.Name
+ 'TODO: check VarType, and only use hexdump for strings with non-printable chars
+ Write #intFileNum, "Value = " & HexDump(myvar.value)
+ Write #intFileNum,
+ Next myvar
+ Close intFileNum
+ Documents.Open (FName)
+End Sub
+
+Function Hex2(value As Integer)
+ h = Hex(value)
+ If Len(h) < 2 Then
+ h = "0" & h
+ End If
+ Hex2 = h
+End Function
+
+Function HexN(value As Integer, nchars As Integer)
+ h = Hex(value)
+ Do While Len(h) < nchars
+ h = "0" & h
+ Loop
+ HexN = h
+End Function
+
+Function ReplaceClean1(sText As String)
+ Dim J As Integer
+ Dim vAddText
+
+ vAddText = Array(Chr(129), Chr(141), Chr(143), Chr(144), Chr(157))
+ For J = 0 To 31
+ sText = Replace(sText, Chr(J), "\x" & Hex2(J))
+ Next
+ For J = 0 To UBound(vAddText)
+ c = vAddText(J)
+ a = Asc(c)
+ sText = Replace(sText, c, "\x" & Hex2(a))
+ Next
+ ReplaceClean1 = sText
+End Function
+
+Function ReplaceClean3(sText As String)
+ Dim J As Integer
+ For J = 0 To 31
+ sText = Replace(sText, Chr(J), ".")
+ Next
+ For J = 127 To 255
+ sText = Replace(sText, Chr(J), ".")
+ Next
+ ReplaceClean3 = sText
+End Function
+
+Function HexBytes(sText As String)
+ Dim i As Integer
+ HexBytes = ""
+ For i = 1 To Len(sText)
+ HexBytes = HexBytes & Hex2(Asc(Mid(sText, i))) & " "
+ Next
+End Function
+
+
+Function HexDump(sText As String)
+ Dim chunk As String
+ Dim i As Long
+ ' "\" is integer division, "/" is normal division (float)
+ nbytes = 8
+ nchunks = Len(sText) \ nbytes
+ lastchunk = Len(sText) Mod nbytes
+ HexDump = ""
+ For i = 0 To nchunks - 1
+ Offset = HexN(i * nbytes, 8)
+ chunk = Mid(sText, i * nbytes + 1, nbytes)
+ HexDump = HexDump & Offset & " " & HexBytes(chunk) & " " & ReplaceClean3(chunk) & vbCrLf
+ Next i
+ 'TODO: LAST CHUNK!
+ If lastchunk > 0 Then
+ Offset = HexN(nchunks * nbytes, 8)
+ chunk = Mid(sText, nchunks * nbytes + 1, lastchunk)
+ HexDump = HexDump & Offset & " " & HexBytes(chunk) & " " & ReplaceClean3(chunk) & vbCrLf
+ End If
+End Function
diff --git a/oletools/LICENSE.txt b/oletools/LICENSE.txt
index 4b9f629..5651e93 100644
--- a/oletools/LICENSE.txt
+++ b/oletools/LICENSE.txt
@@ -3,7 +3,7 @@ LICENSE for the python-oletools package:
This license applies to the python-oletools package, apart from the thirdparty
folder which contains third-party files published with their own license.
-The python-oletools package is copyright (c) 2012-2016 Philippe Lagadec (http://www.decalage.info)
+The python-oletools package is copyright (c) 2012-2017 Philippe Lagadec (http://www.decalage.info)
All rights reserved.
diff --git a/oletools/README.html b/oletools/README.html
index 2e40975..5a3199e 100644
--- a/oletools/README.html
+++ b/oletools/README.html
@@ -9,12 +9,24 @@
-oletools is a package of python tools to analyze Microsoft OLE2 files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), such as Microsoft Office documents or Outlook messages, mainly for malware analysis, forensics and debugging. It is based on the olefile parser. See http://www.decalage.info/python/oletools for more info.
+oletools is a package of python tools to analyze Microsoft OLE2 files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), such as Microsoft Office documents or Outlook messages, mainly for malware analysis, forensics and debugging. It is based on the olefile parser. See http://www.decalage.info/python/oletools for more info.
Quick links: Home page - Download/Install - Documentation - Report Issues/Suggestions/Questions - Contact the Author - Repository - Updates on Twitter
Note: python-oletools is not related to OLETools published by BeCubed Software.
News
-- 2016-11-01 v0.50: all oletools now support python 2 and 3.
+
- 2017-06-29 v0.51:
+
+- added the oletools cheatsheet
+- improved rtfobj to handle malformed RTF files, detect vulnerability CVE-2017-0199
+- olevba: improved deobfuscation and Mac files support
+- mraptor: added more ActiveX macro triggers
+- added DocVarDump.vba to dump document variables using Word
+- olemap: can now detect and extract extra data at end of file, improved display
+- oledir, olemeta, oletimes: added support for zip files and wildcards
+- many bugfixes in all the tools
+- improved Python 2+3 support
+
+- 2016-11-01 v0.50: all oletools now support python 2 and 3.
- olevba: several bugfixes and improvements.
- mraptor: improved detection, added mraptor_milter for Sendmail/Postfix integration.
@@ -22,28 +34,9 @@
- setup: now creates handy command-line scripts to run oletools from any directory.
- 2016-06-10 v0.47: olevba added PPT97 macros support, improved handling of malformed/incomplete documents, improved error handling and JSON output, now returns an exit code based on analysis results, new --relaxed option. rtfobj: improved parsing to handle obfuscated RTF documents, added -d option to set output dir. Moved repository and documentation to GitHub.
-- 2016-04-19 v0.46: olevba does not deobfuscate VBA expressions by default (much faster), new option --deobf to enable it. Fixed color display bug on Windows for several tools.
-- 2016-04-12 v0.45: improved rtfobj to handle several anti-analysis tricks, improved olevba to export results in JSON format.
-- 2016-03-11 v0.44: improved olevba to extract and analyse strings from VBA Forms.
-- 2016-03-04 v0.43: added new tool MacroRaptor (mraptor) to detect malicious macros, bugfix and slight improvements in olevba.
-- 2016-02-07 v0.42: added two new tools oledir and olemap, better handling of malformed files and several bugfixes in olevba, improved display for olemeta.
-- 2015-09-22 v0.41: added new --reveal option to olevba, to show the macro code with VBA strings deobfuscated.
-- 2015-09-17 v0.40: Improved macro deobfuscation in olevba, to decode Hex and Base64 within VBA expressions. Display printable deobfuscated strings by default. Improved the VBA_Parser API. Improved performance. Fixed issue #23 with sys.stderr.
-- 2015-06-19 v0.12: olevba can now deobfuscate VBA expressions with any combination of Chr, Asc, Val, StrReverse, Environ, +, &, using a VBA parser built with pyparsing. New options to display only the analysis results or only the macros source code. The analysis is now done on all the VBA modules at once.
-- 2015-05-29 v0.11: Improved parsing of MHTML and ActiveMime/MSO files in olevba, added several suspicious keywords to VBA scanner (thanks to @ozhermit and Davy Douhine for the suggestions)
-- 2015-05-06 v0.10: olevba now supports Word MHTML files with macros, aka "Single File Web Page" (.mht) - see issue #10 for more info
-- 2015-03-23 v0.09: olevba now supports Word 2003 XML files, added anti-sandboxing/VM detection
-- 2015-02-08 v0.08: olevba can now decode strings obfuscated with Hex/StrReverse/Base64/Dridex and extract IOCs. Added new triage mode, support for non-western codepages with olefile 0.42, improved API and display, several bugfixes.
-- 2015-01-05 v0.07: improved olevba to detect suspicious keywords and IOCs in VBA macros, can now scan several files and open password-protected zip archives, added a Python API, upgraded OleFileIO_PL to olefile v0.41
-- 2014-08-28 v0.06: added olevba, a new tool to extract VBA Macro source code from MS Office documents (97-2003 and 2007+). Improved documentation
-- 2013-07-24 v0.05: added new tools olemeta and oletimes
-- 2013-04-18 v0.04: fixed bug in rtfobj, added documentation for rtfobj
-- 2012-11-09 v0.03: Improved pyxswf to extract Flash objects from RTF
-- 2012-10-29 v0.02: Added oleid
-- 2012-10-09 v0.01: Initial version of olebrowse and pyxswf
-- see changelog in source code for more info.
-
+See the full changelog for more information.
+
- olebrowse: A simple GUI to browse OLE files (e.g. MS Word, Excel, Powerpoint documents), to view and extract individual data streams.
- oleid: to analyze OLE files to detect specific characteristics usually found in malicious files.
@@ -59,13 +52,20 @@
- and a few others (coming soon)
-oletools are used by a number of projects and online malware analysis services, including Viper, REMnux, Hybrid-analysis.com, Joe Sandbox, Deepviz, Laika BOSS, Cuckoo Sandbox, Anlyz.io, pcodedmp and probably VirusTotal. (Please contact me if you have or know a project using oletools)
+oletools are used by a number of projects and online malware analysis services, including Viper, REMnux, FAME, Hybrid-analysis.com, Joe Sandbox, Deepviz, Laika BOSS, Cuckoo Sandbox, Anlyz.io, ViperMonkey, pcodedmp, dridex.malwareconfig.com, and probably VirusTotal. (Please contact me if you have or know a project using oletools)
Download and Install:
-To use python-oletools from the command line as analysis tools, you may simply download the latest release archive and extract the files into the directory of your choice.
-You may also download the latest development version with the most recent features.
-Another possibility is to use a git client to clone the repository (https://github.com/decalage2/oletools.git) into a folder. You can then update it easily in the future.
-If you plan to use python-oletools with other Python applications or your own scripts, then the simplest solution is to use "pip install oletools" or "easy_install oletools" to download and install in one go. Otherwise you may download/extract the zip archive and run "setup.py install".
-Important: to update oletools if it is already installed, you must run "pip install -U oletools", otherwise pip will not update it.
+The recommended way to download and install/update the latest stable release of oletools is to use pip:
+
+- On Linux/Mac:
sudo -H pip install -U oletools
+- On Windows:
pip install -U oletools
+
+This should automatically create command-line scripts to run each tool from any directory: olevba, mraptor, rtfobj, etc.
+To get the latest development version instead:
+
+- On Linux/Mac:
sudo -H pip install -U https://github.com/decalage2/oletools/archive/master.zip
+- On Windows:
pip install -U https://github.com/decalage2/oletools/archive/master.zip
+
+See the documentation for other installation options.
Documentation:
The latest version of the documentation can be found online, otherwise a copy is provided in the doc subfolder of the package.
How to Suggest Improvements, Report Issues or Contribute:
@@ -75,7 +75,7 @@
The code is available in a GitHub repository. You may use it to submit enhancements using forks and pull requests.
License
This license applies to the python-oletools package, apart from the thirdparty folder which contains third-party files published with their own license.
-The python-oletools package is copyright (c) 2012-2016 Philippe Lagadec (http://www.decalage.info)
+The python-oletools package is copyright (c) 2012-2017 Philippe Lagadec (http://www.decalage.info)
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
diff --git a/oletools/README.rst b/oletools/README.rst
index 8100b3a..1399972 100644
--- a/oletools/README.rst
+++ b/oletools/README.rst
@@ -26,7 +26,29 @@ Software.
News
----
-- **2016-11-01 v0.50**: all oletools now support python 2 and 3.
+- **2017-06-29 v0.51**:
+
+ - added the `oletools
+ cheatsheet `__
+ - improved
+ `rtfobj `__ to
+ handle malformed RTF files, detect vulnerability CVE-2017-0199
+ - olevba: improved deobfuscation and Mac files support
+ - `mraptor `__:
+ added more ActiveX macro triggers
+ - added
+ `DocVarDump.vba `__
+ to dump document variables using Word
+ - olemap: can now detect and extract `extra data at end of
+ file `__, improved display
+ - oledir, olemeta, oletimes: added support for zip files and
+ wildcards
+ - many
+ `bugfixes `__
+ in all the tools
+ - improved Python 2+3 support
+
+- 2016-11-01 v0.50: all oletools now support python 2 and 3.
- olevba: several bugfixes and improvements.
- mraptor: improved detection, added mraptor\_milter for
@@ -44,92 +66,13 @@ News
`rtfobj `__:
improved parsing to handle obfuscated RTF documents, added -d option
to set output dir. Moved repository and documentation to GitHub.
-- 2016-04-19 v0.46:
- `olevba `__ does
- not deobfuscate VBA expressions by default (much faster), new option
- --deobf to enable it. Fixed color display bug on Windows for several
- tools.
-- 2016-04-12 v0.45: improved
- `rtfobj `__ to
- handle several `anti-analysis
- tricks `__, improved
- `olevba `__ to
- export results in JSON format.
-- 2016-03-11 v0.44: improved
- `olevba `__ to
- extract and analyse strings from VBA Forms.
-- 2016-03-04 v0.43: added new tool
- `MacroRaptor `__
- (mraptor) to detect malicious macros, bugfix and slight improvements
- in `olevba `__.
-- 2016-02-07 v0.42: added two new tools oledir and olemap, better
- handling of malformed files and several bugfixes in
- `olevba `__,
- improved display for
- `olemeta `__.
-- 2015-09-22 v0.41: added new --reveal option to
- `olevba `__, to
- show the macro code with VBA strings deobfuscated.
-- 2015-09-17 v0.40: Improved macro deobfuscation in
- `olevba `__, to
- decode Hex and Base64 within VBA expressions. Display printable
- deobfuscated strings by default. Improved the VBA\_Parser API.
- Improved performance. Fixed `issue
- #23 `__ with
- sys.stderr.
-- 2015-06-19 v0.12:
- `olevba `__ can
- now deobfuscate VBA expressions with any combination of Chr, Asc,
- Val, StrReverse, Environ, +, &, using a VBA parser built with
- `pyparsing `__. New options to
- display only the analysis results or only the macros source code. The
- analysis is now done on all the VBA modules at once.
-- 2015-05-29 v0.11: Improved parsing of MHTML and ActiveMime/MSO files
- in `olevba `__,
- added several suspicious keywords to VBA scanner (thanks to @ozhermit
- and Davy Douhine for the suggestions)
-- 2015-05-06 v0.10:
- `olevba `__ now
- supports Word MHTML files with macros, aka "Single File Web Page"
- (.mht) - see `issue
- #10 `__ for more
- info
-- 2015-03-23 v0.09:
- `olevba `__ now
- supports Word 2003 XML files, added anti-sandboxing/VM detection
-- 2015-02-08 v0.08:
- `olevba `__ can
- now decode strings obfuscated with Hex/StrReverse/Base64/Dridex and
- extract IOCs. Added new triage mode, support for non-western
- codepages with olefile 0.42, improved API and display, several
- bugfixes.
-- 2015-01-05 v0.07: improved
- `olevba `__ to
- detect suspicious keywords and IOCs in VBA macros, can now scan
- several files and open password-protected zip archives, added a
- Python API, upgraded OleFileIO\_PL to olefile v0.41
-- 2014-08-28 v0.06: added
- `olevba `__, a new
- tool to extract VBA Macro source code from MS Office documents
- (97-2003 and 2007+). Improved
- `documentation `__
-- 2013-07-24 v0.05: added new tools
- `olemeta `__ and
- `oletimes `__
-- 2013-04-18 v0.04: fixed bug in rtfobj, added documentation for
- `rtfobj `__
-- 2012-11-09 v0.03: Improved
- `pyxswf `__ to
- extract Flash objects from RTF
-- 2012-10-29 v0.02: Added
- `oleid `__
-- 2012-10-09 v0.01: Initial version of
- `olebrowse `__
- and pyxswf
-- see changelog in source code for more info.
-
-Tools in python-oletools:
--------------------------
+
+See the `full
+changelog `__ for
+more information.
+
+Tools:
+------
- `olebrowse `__:
A simple GUI to browse OLE files (e.g. MS Word, Excel, Powerpoint
@@ -168,41 +111,43 @@ Projects using oletools:
oletools are used by a number of projects and online malware analysis
services, including `Viper `__,
`REMnux `__,
+`FAME `__,
`Hybrid-analysis.com `__, `Joe
Sandbox `__,
`Deepviz `__, `Laika
BOSS `__, `Cuckoo
Sandbox `__,
`Anlyz.io `__,
-`pcodedmp `__ and probably
-`VirusTotal `__. (Please `contact
+`ViperMonkey `__,
+`pcodedmp `__,
+`dridex.malwareconfig.com `__, and
+probably `VirusTotal `__. (Please `contact
me <(http://decalage.info/contact)>`__ if you have or know a project
using oletools)
Download and Install:
---------------------
-To use python-oletools from the command line as analysis tools, you may
-simply `download the latest release
-archive `__ and extract
-the files into the directory of your choice.
+The recommended way to download and install/update the **latest stable
+release** of oletools is to use
+`pip `__:
+
+- On Linux/Mac: ``sudo -H pip install -U oletools``
+- On Windows: ``pip install -U oletools``
-You may also download the `latest development
-version `__
-with the most recent features.
+This should automatically create command-line scripts to run each tool
+from any directory: ``olevba``, ``mraptor``, ``rtfobj``, etc.
-Another possibility is to use a git client to clone the repository
-(https://github.com/decalage2/oletools.git) into a folder. You can then
-update it easily in the future.
+To get the **latest development version** instead:
-If you plan to use python-oletools with other Python applications or
-your own scripts, then the simplest solution is to use "**pip install
-oletools**\ " or "**easy\_install oletools**\ " to download and install
-in one go. Otherwise you may download/extract the zip archive and run
-"**setup.py install**\ ".
+- On Linux/Mac:
+ ``sudo -H pip install -U https://github.com/decalage2/oletools/archive/master.zip``
+- On Windows:
+ ``pip install -U https://github.com/decalage2/oletools/archive/master.zip``
-**Important: to update oletools** if it is already installed, you must
-run **"pip install -U oletools"**, otherwise pip will not update it.
+See the
+`documentation `__
+for other installation options.
Documentation:
--------------
@@ -235,7 +180,7 @@ This license applies to the python-oletools package, apart from the
thirdparty folder which contains third-party files published with their
own license.
-The python-oletools package is copyright (c) 2012-2016 Philippe Lagadec
+The python-oletools package is copyright (c) 2012-2017 Philippe Lagadec
(http://www.decalage.info)
All rights reserved.
diff --git a/oletools/doc/Home.html b/oletools/doc/Home.html
index 2283a25..10de278 100644
--- a/oletools/doc/Home.html
+++ b/oletools/doc/Home.html
@@ -8,9 +8,9 @@
-
+
This is the home page of the documentation for python-oletools. The latest version can be found online, otherwise a copy is provided in the doc subfolder of the package.
-python-oletools is a package of python tools to analyze Microsoft OLE2 files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), such as Microsoft Office documents or Outlook messages, mainly for malware analysis, forensics and debugging. It is based on the olefile parser. See http://www.decalage.info/python/oletools for more info.
+python-oletools is a package of python tools to analyze Microsoft OLE2 files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), such as Microsoft Office documents or Outlook messages, mainly for malware analysis, forensics and debugging. It is based on the olefile parser. See http://www.decalage.info/python/oletools for more info.
Quick links: Home page - Download/Install - Documentation - Report Issues/Suggestions/Questions - Contact the Author - Repository - Updates on Twitter
Note: python-oletools is not related to OLETools published by BeCubed Software.
diff --git a/oletools/doc/Home.md b/oletools/doc/Home.md
index 700ee88..257fa39 100644
--- a/oletools/doc/Home.md
+++ b/oletools/doc/Home.md
@@ -1,4 +1,4 @@
-python-oletools v0.50 documentation
+python-oletools v0.51 documentation
===================================
This is the home page of the documentation for python-oletools. The latest version can be found
diff --git a/oletools/doc/License.html b/oletools/doc/License.html
index 13bc45e..71193c5 100644
--- a/oletools/doc/License.html
+++ b/oletools/doc/License.html
@@ -10,7 +10,7 @@
This license applies to the python-oletools package, apart from the thirdparty folder which contains third-party files published with their own license.
-The python-oletools package is copyright (c) 2012-2016 Philippe Lagadec (http://www.decalage.info)
+The python-oletools package is copyright (c) 2012-2017 Philippe Lagadec (http://www.decalage.info)
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
@@ -21,7 +21,7 @@
-| License for officeparser |
+License for officeparser |
diff --git a/oletools/doc/License.md b/oletools/doc/License.md
index 4c796ae..4a6defe 100644
--- a/oletools/doc/License.md
+++ b/oletools/doc/License.md
@@ -4,7 +4,7 @@ License for python-oletools
This license applies to the [python-oletools](http://www.decalage.info/python/oletools) package, apart from the
thirdparty folder which contains third-party files published with their own license.
-The python-oletools package is copyright (c) 2012-2016 Philippe Lagadec ([http://www.decalage.info](http://www.decalage.info))
+The python-oletools package is copyright (c) 2012-2017 Philippe Lagadec ([http://www.decalage.info](http://www.decalage.info))
All rights reserved.
diff --git a/oletools/doc/mraptor.html b/oletools/doc/mraptor.html
index 1c5e479..9478009 100644
--- a/oletools/doc/mraptor.html
+++ b/oletools/doc/mraptor.html
@@ -49,6 +49,7 @@ An exit code is returned based on the analysis result:
Important: on Linux/MacOSX, always add double quotes around a file name when you use wildcards such as * and ?. Otherwise, the shell may replace the argument with the actual list of files matching the wildcards before starting the script.
Python 3 support - mraptor3
As of v0.50, mraptor has been ported to Python 3 thanks to @sebdraven. However, the differences between Python 2 and 3 are significant and for now there is a separate version of mraptor named mraptor3 to be used with Python 3.
diff --git a/oletools/doc/olebrowse.html b/oletools/doc/olebrowse.html
index 6a369ca..348889c 100644
--- a/oletools/doc/olebrowse.html
+++ b/oletools/doc/olebrowse.html
@@ -24,14 +24,17 @@
Main menu, showing all streams in the OLE file:
Menu with actions for a stream:
Hex view for a stream:
diff --git a/oletools/doc/oledir.html b/oletools/doc/oledir.html
index 008e1b7..1ea3e75 100644
--- a/oletools/doc/oledir.html
+++ b/oletools/doc/oledir.html
@@ -19,6 +19,7 @@
oledir.py file.doc
How to use oledir in Python applications
diff --git a/oletools/doc/oleid.html b/oletools/doc/oleid.html
index 2f47426..d2b6543 100644
--- a/oletools/doc/oleid.html
+++ b/oletools/doc/oleid.html
@@ -7,23 +7,41 @@
@@ -76,9 +94,9 @@ Filename: word_flash_vba.doc
+-------------------------------+-----------------------+
How to use oleid in your Python applications
First, import oletools.oleid, and create an OleID object to scan a file:
-import oletools.oleid
+import oletools.oleid
-oid = oletools.oleid.OleID(filename)
+oid
= oletools.oleid.OleID(filename)
Note: filename can be a filename, a file-like object, or a bytes string containing the file to be analyzed.
Second, call the check() method. It returns a list of Indicator objects.
Each Indicator object has the following attributes:
@@ -90,11 +108,11 @@ oid = oletools.oleid.OleID(filename)
- value: value of the indicator
For example, the following code displays all the indicators:
-indicators = oid.check()
-for i in indicators:
- print 'Indicator id=%s name="%s" type=%s value=%s' % (i.id, i.name, i.type, repr(i.value))
- print 'description:', i.description
- print ''
+indicators = oid.check()
+for i in indicators:
+ print 'Indicator id=%s name="%s" type=%s value=%s' % (i.id, i.name, i.type, repr(i.value))
+ print 'description:', i.description
+ print ''
See the source code of oleid.py for more details.
diff --git a/oletools/doc/olemap.html b/oletools/doc/olemap.html
index 66afaab..b6ffcf1 100644
--- a/oletools/doc/olemap.html
+++ b/oletools/doc/olemap.html
@@ -19,9 +19,11 @@
olemap.py file.doc
How to use olemap in Python applications
diff --git a/oletools/doc/olemeta.html b/oletools/doc/olemeta.html
index 5c32b1b..adefef1 100644
--- a/oletools/doc/olemeta.html
+++ b/oletools/doc/olemeta.html
@@ -16,6 +16,7 @@
Example
TODO
diff --git a/oletools/doc/olevba.html b/oletools/doc/olevba.html
index 5ed2781..c718243 100644
--- a/oletools/doc/olevba.html
+++ b/oletools/doc/olevba.html
@@ -7,23 +7,41 @@
@@ -219,22 +237,22 @@ OLE:MA----- \MalwareZoo\VBA\samples\Word within Word macro auto.doc
IMPORTANT: olevba is currently under active development, therefore this API is likely to change.
Import olevba
First, import the oletools.olevba package, using at least the VBA_Parser and VBA_Scanner classes:
-from oletools.olevba import VBA_Parser, TYPE_OLE, TYPE_OpenXML, TYPE_Word2003_XML, TYPE_MHTML
+from oletools.olevba import VBA_Parser, TYPE_OLE, TYPE_OpenXML, TYPE_Word2003_XML, TYPE_MHTML
Parse a MS Office file - VBA_Parser
To parse a file on disk, create an instance of the VBA_Parser class, providing the name of the file to open as parameter. For example:
-vbaparser = VBA_Parser('my_file_with_macros.doc')
+vbaparser = VBA_Parser('my_file_with_macros.doc')
The file may also be provided as a bytes string containing its data. In that case, the actual filename must be provided for reference, and the file content with the data parameter. For example:
-myfile = 'my_file_with_macros.doc'
-filedata = open(myfile, 'rb').read()
-vbaparser = VBA_Parser(myfile, data=filedata)
+myfile = 'my_file_with_macros.doc'
+filedata = open(myfile, 'rb').read()
+vbaparser = VBA_Parser(myfile, data=filedata)
VBA_Parser will raise an exception if the file is not a supported format, such as OLE (MS Office 97-2003), OpenXML (MS Office 2007+), MHTML or Word 2003 XML.
After parsing the file, the attribute VBA_Parser.type is a string indicating the file type. It can be either TYPE_OLE, TYPE_OpenXML, TYPE_Word2003_XML or TYPE_MHTML. (constants defined in the olevba module)
Detect VBA macros
The method detect_vba_macros of a VBA_Parser object returns True if VBA macros have been found in the file, False otherwise.
-if vbaparser.detect_vba_macros():
- print 'VBA Macros found'
-else:
- print 'No VBA Macros found'
+if vbaparser.detect_vba_macros():
+ print 'VBA Macros found'
+else:
+ print 'No VBA Macros found'
Note: The detection algorithm looks for streams and storage with specific names in the OLE structure, which works fine for all the supported formats listed above. However, for some formats such as PowerPoint 97-2003, this method will always return False because VBA Macros are stored in a different way which is not yet supported by olevba.
Moreover, if the file contains an embedded document (e.g. an Excel workbook inserted into a Word document), this method may return True if the embedded document contains VBA Macros, even if the main document does not.
@@ -246,13 +264,13 @@ vbaparser = VBA_Parser(myfile, data=filedata)
vba_code: string containing the VBA source code in clear text
Example:
-for (filename, stream_path, vba_filename, vba_code) in vbaparser.extract_macros():
- print '-'*79
- print 'Filename :', filename
- print 'OLE stream :', stream_path
- print 'VBA filename:', vba_filename
- print '- '*39
- print vba_code
+for (filename, stream_path, vba_filename, vba_code) in vbaparser.extract_macros():
+ print '-'*79
+ print 'Filename :', filename
+ print 'OLE stream :', stream_path
+ print 'VBA filename:', vba_filename
+ print '- '*39
+ print vba_code
Alternatively, the VBA_Parser method extract_all_macros returns the same results as a list of tuples.
Analyze VBA Source Code
Since version 0.40, the VBA_Parser class provides simpler methods than VBA_Scanner to analyze all macros contained in a file:
@@ -265,24 +283,24 @@ vbaparser = VBA_Parser(myfile, data=filedata)
description provides a description of the keyword. For obfuscated strings, it is the encoded value of the string.
Example:
-results = vbaparser.analyze_macros()
-for kw_type, keyword, description in results:
- print 'type=%s - keyword=%s - description=%s' % (kw_type, keyword, description)
+results = vbaparser.analyze_macros()
+for kw_type, keyword, description in results:
+ print 'type=%s - keyword=%s - description=%s' % (kw_type, keyword, description)
After calling analyze_macros, the following VBA_Parser attributes also provide the number of items found for each category:
-print 'AutoExec keywords: %d' % vbaparser.nb_autoexec
-print 'Suspicious keywords: %d' % vbaparser.nb_suspicious
-print 'IOCs: %d' % vbaparser.nb_iocs
-print 'Hex obfuscated strings: %d' % vbaparser.nb_hexstrings
-print 'Base64 obfuscated strings: %d' % vbaparser.nb_base64strings
-print 'Dridex obfuscated strings: %d' % vbaparser.nb_dridexstrings
-print 'VBA obfuscated strings: %d' % vbaparser.nb_vbastrings
+print 'AutoExec keywords: %d' % vbaparser.nb_autoexec
+print 'Suspicious keywords: %d' % vbaparser.nb_suspicious
+print 'IOCs: %d' % vbaparser.nb_iocs
+print 'Hex obfuscated strings: %d' % vbaparser.nb_hexstrings
+print 'Base64 obfuscated strings: %d' % vbaparser.nb_base64strings
+print 'Dridex obfuscated strings: %d' % vbaparser.nb_dridexstrings
+print 'VBA obfuscated strings: %d' % vbaparser.nb_vbastrings
Deobfuscate VBA Macro Source Code
The method reveal attempts to deobfuscate the macro source code by replacing all the obfuscated strings by their decoded content. Returns a single string.
Example:
-print vbaparser.reveal()
+
Close the VBA_Parser
After usage, it is better to call the close method of the VBA_Parser object, to make sure the file is closed, especially if your application is parsing many files.
-vbaparser.close()
+
Deprecated API
The following methods and functions are still functional, but their usage is not recommended since they have been replaced by better solutions.
@@ -297,54 +315,54 @@ vbaparser = VBA_Parser(myfile, data=filedata)
description provides a description of the keyword. For obfuscated strings, it is the encoded value of the string.
Example:
-vba_scanner = VBA_Scanner(vba_code)
-results = vba_scanner.scan(include_decoded_strings=True)
-for kw_type, keyword, description in results:
- print 'type=%s - keyword=%s - description=%s' % (kw_type, keyword, description)
+vba_scanner = VBA_Scanner(vba_code)
+results = vba_scanner.scan(include_decoded_strings=True)
+for kw_type, keyword, description in results:
+ print 'type=%s - keyword=%s - description=%s' % (kw_type, keyword, description)
The function scan_vba is a shortcut for VBA_Scanner(vba_code).scan():
-results = scan_vba(vba_code, include_decoded_strings=True)
-for kw_type, keyword, description in results:
- print 'type=%s - keyword=%s - description=%s' % (kw_type, keyword, description)
+results = scan_vba(vba_code, include_decoded_strings=True)
+for kw_type, keyword, description in results:
+ print 'type=%s - keyword=%s - description=%s' % (kw_type, keyword, description)
scan_summary returns a tuple with the number of items found for each category: (autoexec, suspicious, IOCs, hex, base64, dridex).
Detect auto-executable macros (deprecated)
Deprecated: It is preferable to use either scan_vba or VBA_Scanner to get all results at once.
The function detect_autoexec checks if VBA macro code contains specific macro names that will be triggered when the document/workbook is opened, closed, changed, etc.
It returns a list of tuples containing two strings, the detected keyword, and the description of the trigger. (See the malware example above)
Sample usage:
-from oletools.olevba import detect_autoexec
-autoexec_keywords = detect_autoexec(vba_code)
-if autoexec_keywords:
- print 'Auto-executable macro keywords found:'
- for keyword, description in autoexec_keywords:
- print '%s: %s' % (keyword, description)
-else:
- print 'Auto-executable macro keywords: None found'
+from oletools.olevba import detect_autoexec
+autoexec_keywords = detect_autoexec(vba_code)
+if autoexec_keywords:
+ print 'Auto-executable macro keywords found:'
+ for keyword, description in autoexec_keywords:
+ print '%s: %s' % (keyword, description)
+else:
+ print 'Auto-executable macro keywords: None found'
Detect suspicious VBA keywords (deprecated)
Deprecated: It is preferable to use either scan_vba or VBA_Scanner to get all results at once.
The function detect_suspicious checks if VBA macro code contains specific keywords often used by malware to act on the system (create files, run commands or applications, write to the registry, etc).
It returns a list of tuples containing two strings, the detected keyword, and the description of the corresponding malicious behaviour. (See the malware example above)
Sample usage:
-from oletools.olevba import detect_suspicious
-suspicious_keywords = detect_suspicious(vba_code)
-if suspicious_keywords:
- print 'Suspicious VBA keywords found:'
- for keyword, description in suspicious_keywords:
- print '%s: %s' % (keyword, description)
-else:
- print 'Suspicious VBA keywords: None found'
+from oletools.olevba import detect_suspicious
+suspicious_keywords = detect_suspicious(vba_code)
+if suspicious_keywords:
+ print 'Suspicious VBA keywords found:'
+ for keyword, description in suspicious_keywords:
+ print '%s: %s' % (keyword, description)
+else:
+ print 'Suspicious VBA keywords: None found'
Deprecated: It is preferable to use either scan_vba or VBA_Scanner to get all results at once.
The function detect_patterns checks if VBA macro code contains specific patterns of interest, that may be useful for malware analysis and detection (potential Indicators of Compromise): IP addresses, e-mail addresses, URLs, executable file names.
It returns a list of tuples containing two strings, the pattern type, and the extracted value. (See the malware example above)
Sample usage:
-from oletools.olevba import detect_patterns
-patterns = detect_patterns(vba_code)
-if patterns:
- print 'Patterns found:'
- for pattern_type, value in patterns:
- print '%s: %s' % (pattern_type, value)
-else:
- print 'Patterns: None found'
+from oletools.olevba import detect_patterns
+patterns = detect_patterns(vba_code)
+if patterns:
+ print 'Patterns found:'
+ for pattern_type, value in patterns:
+ print '%s: %s' % (pattern_type, value)
+else:
+ print 'Patterns: None found'
diff --git a/oletools/doc/rtfobj.html b/oletools/doc/rtfobj.html
index 15d9b3d..e3386d9 100644
--- a/oletools/doc/rtfobj.html
+++ b/oletools/doc/rtfobj.html
@@ -7,23 +7,41 @@
@@ -57,6 +75,7 @@ Options:
When an OLE Package object contains an executable file or script, it is highlighted as such. For example:
To extract an object or file, use the option -s followed by the object number as shown in the table.
Example:
@@ -67,9 +86,9 @@ Options:
Deprecated API (still functional):
rtf_iter_objects(filename) is an iterator which yields a tuple (index, orig_len, object) providing the index of each hexadecimal stream in the RTF file, and the corresponding decoded object.
Example:
-
from oletools import rtfobj
-for index, orig_len, data in rtfobj.rtf_iter_objects("myfile.rtf"):
- print('found object size %d at index %08X' % (len(data), index))
+from oletools import rtfobj
+for index, orig_len, data in rtfobj.rtf_iter_objects("myfile.rtf"):
+ print('found object size %d at index %08X' % (len(data), index))
diff --git a/oletools/ezhexviewer.py b/oletools/ezhexviewer.py
index aef233b..701f05e 100644
--- a/oletools/ezhexviewer.py
+++ b/oletools/ezhexviewer.py
@@ -16,7 +16,7 @@ Usage in a python application:
ezhexviewer project website: http://www.decalage.info/python/ezhexviewer
-ezhexviewer is copyright (c) 2012-2016, Philippe Lagadec (http://www.decalage.info)
+ezhexviewer is copyright (c) 2012-2017, Philippe Lagadec (http://www.decalage.info)
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
@@ -46,16 +46,32 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# 2012-10-04 v0.02 PL: - added license
# 2016-09-06 v0.50 PL: - added main function for entry points in setup.py
# 2016-10-26 PL: - fixed to run on Python 2+3
+# 2017-03-23 v0.51 PL: - fixed display of control characters (issue #151)
+# 2017-04-26 PL: - fixed absolute imports (issue #141)
-__version__ = '0.50'
+__version__ = '0.51'
-#------------------------------------------------------------------------------
+#-----------------------------------------------------------------------------
# TODO:
# + options to set title and msg
+# === IMPORTS ================================================================
+
+import sys, os
+
+# IMPORTANT: it should be possible to run oletools directly as scripts
+# in any directory without installing them with pip or setup.py.
+# In that case, relative imports are NOT usable.
+# And to enable Python 2+3 compatibility, we need to use absolute imports,
+# so we add the oletools parent folder to sys.path (absolute+normalized path):
+_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
+# print('_thismodule_dir = %r' % _thismodule_dir)
+_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
+# print('_parent_dir = %r' % _thirdparty_dir)
+if not _parent_dir in sys.path:
+ sys.path.insert(0, _parent_dir)
-from thirdparty.easygui import easygui
-import sys
+from oletools.thirdparty.easygui import easygui
# === PYTHON 2+3 SUPPORT ======================================================
@@ -106,7 +122,7 @@ def bchr(x):
# PSF license: http://docs.python.org/license.html
# Copyright (c) 2001-2012 Python Software Foundation; All Rights Reserved
-FILTER = b''.join([(len(repr(bchr(x)))<=4 and x != 0x0A) and bchr(x) or b'.' for x in range(256)])
+FILTER = b''.join([(len(repr(bchr(x)))<=4 and x>=0x20) and bchr(x) or b'.' for x in range(256)])
def hexdump3(src, length=8, startindex=0):
"""
@@ -154,4 +170,4 @@ def main():
if __name__ == '__main__':
- main()
\ No newline at end of file
+ main()
diff --git a/oletools/mraptor.py b/oletools/mraptor.py
index ee35654..7504dbd 100644
--- a/oletools/mraptor.py
+++ b/oletools/mraptor.py
@@ -22,7 +22,7 @@ http://www.decalage.info/python/oletools
# === LICENSE ==================================================================
-# MacroRaptor is copyright (c) 2016 Philippe Lagadec (http://www.decalage.info)
+# MacroRaptor is copyright (c) 2016-2017 Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
@@ -55,6 +55,7 @@ http://www.decalage.info/python/oletools
# 2016-09-05 PL: - added Document_BeforeClose keyword for MS Publisher (.pub)
# 2016-10-25 PL: - fixed print for Python 3
# 2016-12-21 v0.51 PL: - added more ActiveX macro triggers
+# 2017-03-08 PL: - fixed absolute imports
__version__ = '0.51'
@@ -64,12 +65,24 @@ __version__ = '0.51'
#--- IMPORTS ------------------------------------------------------------------
-import sys, logging, optparse, re
+import sys, logging, optparse, re, os
-from thirdparty.xglob import xglob
-from thirdparty.tablestream import tablestream
+# IMPORTANT: it should be possible to run oletools directly as scripts
+# in any directory without installing them with pip or setup.py.
+# In that case, relative imports are NOT usable.
+# And to enable Python 2+3 compatibility, we need to use absolute imports,
+# so we add the oletools parent folder to sys.path (absolute+normalized path):
+_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
+# print('_thismodule_dir = %r' % _thismodule_dir)
+_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
+# print('_parent_dir = %r' % _thirdparty_dir)
+if not _parent_dir in sys.path:
+ sys.path.insert(0, _parent_dir)
-import olevba
+from oletools.thirdparty.xglob import xglob
+from oletools.thirdparty.tablestream import tablestream
+
+from oletools import olevba
# === LOGGING =================================================================
@@ -228,7 +241,7 @@ def main():
'critical': logging.CRITICAL
}
- usage = 'usage: %prog [options] [filename2 ...]'
+ usage = 'usage: mraptor [options] [filename2 ...]'
parser = optparse.OptionParser(usage=usage)
parser.add_option("-r", action="store_true", dest="recursive",
help='find files recursively in subdirectories.')
@@ -247,6 +260,8 @@ def main():
# Print help if no arguments are passed
if len(args) == 0:
+ print('MacroRaptor %s - http://decalage.info/python/oletools' % __version__)
+ print('This is work in progress, please report issues at %s' % URL_ISSUES)
print(__doc__)
parser.print_help()
print('\nAn exit code is returned based on the analysis result:')
diff --git a/oletools/mraptor3.py b/oletools/mraptor3.py
index d74cf58..b421562 100644
--- a/oletools/mraptor3.py
+++ b/oletools/mraptor3.py
@@ -11,6 +11,7 @@ Supported formats:
- PowerPoint 97-2003 (.ppt), PowerPoint 2007+ (.pptm, .ppsm)
- Word 2003 XML (.xml)
- Word/Excel Single File Web Page / MHTML (.mht)
+- Publisher (.pub)
Author: Philippe Lagadec - http://www.decalage.info
License: BSD, see source code or documentation
@@ -21,7 +22,7 @@ http://www.decalage.info/python/oletools
# === LICENSE ==================================================================
-# MacroRaptor is copyright (c) 2016 Philippe Lagadec (http://www.decalage.info)
+# MacroRaptor is copyright (c) 2016-2017 Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
@@ -52,8 +53,10 @@ http://www.decalage.info/python/oletools
# 2016-03-08 v0.04 PL: - collapse long lines before analysis
# 2016-07-19 v0.50 SL: - converted to Python 3
# 2016-08-26 PL: - changed imports for Python 3
+# 2017-04-26 v0.51 PL: - fixed absolute imports (issue #141)
+# 2017-06-29 PL: - synced with mraptor.py 0.51
-__version__ = '0.50py3'
+__version__ = '0.51'
#------------------------------------------------------------------------------
# TODO:
@@ -61,15 +64,25 @@ __version__ = '0.50py3'
#--- IMPORTS ------------------------------------------------------------------
-import sys, logging, optparse, re
+import sys, os, logging, optparse, re
-from thirdparty.xglob import xglob
+# IMPORTANT: it should be possible to run oletools directly as scripts
+# in any directory without installing them with pip or setup.py.
+# In that case, relative imports are NOT usable.
+# And to enable Python 2+3 compatibility, we need to use absolute imports,
+# so we add the oletools parent folder to sys.path (absolute+normalized path):
+_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
+# print('_thismodule_dir = %r' % _thismodule_dir)
+_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
+# print('_parent_dir = %r' % _thirdparty_dir)
+if not _parent_dir in sys.path:
+ sys.path.insert(0, _parent_dir)
-# import the python 3 version of tablestream:
-from thirdparty.tablestream import tablestream
+from oletools.thirdparty.xglob import xglob
+from oletools.thirdparty.tablestream import tablestream
# import the python 3 version of olevba
-import olevba3 as olevba
+from oletools import olevba3 as olevba
# === LOGGING =================================================================
@@ -86,15 +99,24 @@ MSG_ISSUES = 'Please report this issue on %s' % URL_ISSUES
# 'AutoExec', 'AutoOpen', 'Auto_Open', 'AutoClose', 'Auto_Close', 'AutoNew', 'AutoExit',
# 'Document_Open', 'DocumentOpen',
-# 'Document_Close', 'DocumentBeforeClose',
+# 'Document_Close', 'DocumentBeforeClose', 'Document_BeforeClose',
# 'DocumentChange','Document_New',
# 'NewDocument'
# 'Workbook_Open', 'Workbook_Close',
+# *_Painted such as InkPicture1_Painted
+# *_GotFocus|LostFocus|MouseHover for other ActiveX objects
+# reference: http://www.greyhathacker.net/?p=948
# TODO: check if line also contains Sub or Function
re_autoexec = re.compile(r'(?i)\b(?:Auto(?:Exec|_?Open|_?Close|Exit|New)' +
- r'|Document(?:_?Open|_Close|BeforeClose|Change|_New)' +
- r'|NewDocument|Workbook(?:_Open|_Activate|_Close))\b')
+ r'|Document(?:_?Open|_Close|_?BeforeClose|Change|_New)' +
+ r'|NewDocument|Workbook(?:_Open|_Activate|_Close)' +
+ r'|\w+_(?:Painted|Painting|GotFocus|LostFocus|MouseHover' +
+ r'|Layout|Click|Change|Resize|BeforeNavigate2|BeforeScriptExecute' +
+ r'|DocumentComplete|DownloadBegin|DownloadComplete|FileDownload' +
+ r'|NavigateComplete2|NavigateError|ProgressChange|PropertyChange' +
+ r'|SetSecureLockIcon|StatusTextChange|TitleChange|MouseMove' +
+ r'|MouseEnter|MouseLeave|))\b')
# MS-VBAL 5.4.5.1 Open Statement:
RE_OPEN_WRITE = r'(?:\bOpen\b[^\n]+\b(?:Write|Append|Binary|Output|Random)\b)'
@@ -238,6 +260,8 @@ def main():
# Print help if no arguments are passed
if len(args) == 0:
+ print('MacroRaptor %s - http://decalage.info/python/oletools' % __version__)
+ print('This is work in progress, please report issues at %s' % URL_ISSUES)
print(__doc__)
parser.print_help()
print('\nAn exit code is returned based on the analysis result:')
diff --git a/oletools/mraptor_milter.py b/oletools/mraptor_milter.py
index 2752090..2856a36 100644
--- a/oletools/mraptor_milter.py
+++ b/oletools/mraptor_milter.py
@@ -24,7 +24,7 @@ http://www.decalage.info/python/oletools
# === LICENSE ==================================================================
-# mraptor_milter is copyright (c) 2016 Philippe Lagadec (http://www.decalage.info)
+# mraptor_milter is copyright (c) 2016-2017 Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
@@ -53,8 +53,9 @@ http://www.decalage.info/python/oletools
# - archive each e-mail to a file before filtering
# 2016-08-30 v0.03 PL: - added daemonize to run as a Unix daemon
# 2016-09-06 v0.50 PL: - fixed issue #20, is_zipfile on Python 2.6
+# 2017-04-26 v0.51 PL: - fixed absolute imports (issue #141)
-__version__ = '0.50'
+__version__ = '0.51'
# --- TODO -------------------------------------------------------------------
@@ -81,6 +82,18 @@ import StringIO
from socket import AF_INET6
+# IMPORTANT: it should be possible to run oletools directly as scripts
+# in any directory without installing them with pip or setup.py.
+# In that case, relative imports are NOT usable.
+# And to enable Python 2+3 compatibility, we need to use absolute imports,
+# so we add the oletools parent folder to sys.path (absolute+normalized path):
+_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
+# print('_thismodule_dir = %r' % _thismodule_dir)
+_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
+# print('_parent_dir = %r' % _thirdparty_dir)
+if not _parent_dir in sys.path:
+ sys.path.insert(0, _parent_dir)
+
from oletools import olevba, mraptor
from Milter.utils import parse_addr
@@ -137,6 +150,7 @@ log.setLevel(logging.CRITICAL+1)
# === CLASSES ================================================================
# Inspired from https://github.com/jmehnle/pymilter/blob/master/milter-template.py
+# TODO: check https://github.com/sdgathman/pymilter which looks more recent
class MacroRaptorMilter(Milter.Base):
'''
diff --git a/oletools/msodde.py b/oletools/msodde.py
new file mode 100644
index 0000000..d235503
--- /dev/null
+++ b/oletools/msodde.py
@@ -0,0 +1,550 @@
+#!/usr/bin/env python
+"""
+msodde.py
+
+msodde is a script to parse MS Office documents
+(e.g. Word, Excel), to detect and extract DDE links.
+
+Supported formats:
+- Word 97-2003 (.doc, .dot), Word 2007+ (.docx, .dotx, .docm, .dotm)
+
+Author: Philippe Lagadec - http://www.decalage.info
+License: BSD, see source code or documentation
+
+msodde is part of the python-oletools package:
+http://www.decalage.info/python/oletools
+"""
+
+# === LICENSE ==================================================================
+
+# msodde is copyright (c) 2017 Philippe Lagadec (http://www.decalage.info)
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import print_function
+
+#------------------------------------------------------------------------------
+# CHANGELOG:
+# 2017-10-18 v0.52 PL: - first version
+# 2017-10-20 PL: - fixed issue #202 (handling empty xml tags)
+# 2017-10-23 ES: - add check for fldSimple codes
+# 2017-10-24 ES: - group tags and track begin/end tags to keep DDE strings together
+# 2017-10-25 CH: - add json output
+# 2017-10-25 CH: - parse doc
+# PL: - added logging
+
+__version__ = '0.52dev4'
+
+#------------------------------------------------------------------------------
+# TODO: field codes can be in headers/footers/comments - parse these
+# TODO: add xlsx support
+
+#------------------------------------------------------------------------------
+# REFERENCES:
+
+
+#--- IMPORTS ------------------------------------------------------------------
+
+# import lxml or ElementTree for XML parsing:
+try:
+ # lxml: best performance for XML processing
+ import lxml.etree as ET
+except ImportError:
+ import xml.etree.cElementTree as ET
+
+import argparse
+import zipfile
+import os
+import sys
+import json
+import logging
+
+# little hack to allow absolute imports even if oletools is not installed
+# Copied from olevba.py
+_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
+_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
+if not _parent_dir in sys.path:
+ sys.path.insert(0, _parent_dir)
+
+from oletools.thirdparty import olefile
+
+# === PYTHON 2+3 SUPPORT ======================================================
+
+if sys.version_info[0] >= 3:
+ unichr = chr
+
+# === CONSTANTS ==============================================================
+
+
+NS_WORD = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'
+NO_QUOTES = False
+# XML tag for 'w:instrText'
+TAG_W_INSTRTEXT = '{%s}instrText' % NS_WORD
+TAG_W_FLDSIMPLE = '{%s}fldSimple' % NS_WORD
+TAG_W_FLDCHAR = '{%s}fldChar' % NS_WORD
+TAG_W_P = "{%s}p" % NS_WORD
+TAG_W_R = "{%s}r" % NS_WORD
+ATTR_W_INSTR = '{%s}instr' % NS_WORD
+ATTR_W_FLDCHARTYPE = '{%s}fldCharType' % NS_WORD
+LOCATIONS = ['word/document.xml','word/endnotes.xml','word/footnotes.xml','word/header1.xml','word/footer1.xml','word/header2.xml','word/footer2.xml','word/comments.xml']
+
+# banner to be printed at program start
+BANNER = """msodde %s - http://decalage.info/python/oletools
+THIS IS WORK IN PROGRESS - Check updates regularly!
+Please report any issue at https://github.com/decalage2/oletools/issues
+""" % __version__
+
+BANNER_JSON = dict(type='meta', version=__version__, name='msodde',
+ link='http://decalage.info/python/oletools',
+ message='THIS IS WORK IN PROGRESS - Check updates regularly! '
+ 'Please report any issue at '
+ 'https://github.com/decalage2/oletools/issues')
+
+# === LOGGING =================================================================
+
+DEFAULT_LOG_LEVEL = "warning" # Default log level
+LOG_LEVELS = {
+ 'debug': logging.DEBUG,
+ 'info': logging.INFO,
+ 'warning': logging.WARNING,
+ 'error': logging.ERROR,
+ 'critical': logging.CRITICAL
+}
+
+class NullHandler(logging.Handler):
+ """
+ Log Handler without output, to avoid printing messages if logging is not
+ configured by the main application.
+ Python 2.7 has logging.NullHandler, but this is necessary for 2.6:
+ see https://docs.python.org/2.6/library/logging.html#configuring-logging-for-a-library
+ """
+ def emit(self, record):
+ pass
+
+def get_logger(name, level=logging.CRITICAL+1):
+ """
+ Create a suitable logger object for this module.
+ The goal is not to change settings of the root logger, to avoid getting
+ other modules' logs on the screen.
+ If a logger exists with same name, reuse it. (Else it would have duplicate
+ handlers and messages would be doubled.)
+ The level is set to CRITICAL+1 by default, to avoid any logging.
+ """
+ # First, test if there is already a logger with the same name, else it
+ # will generate duplicate messages (due to duplicate handlers):
+ if name in logging.Logger.manager.loggerDict:
+ #NOTE: another less intrusive but more "hackish" solution would be to
+ # use getLogger then test if its effective level is not default.
+ logger = logging.getLogger(name)
+ # make sure level is OK:
+ logger.setLevel(level)
+ return logger
+ # get a new logger:
+ logger = logging.getLogger(name)
+ # only add a NullHandler for this logger, it is up to the application
+ # to configure its own logging:
+ logger.addHandler(NullHandler())
+ logger.setLevel(level)
+ return logger
+
+# a global logger object used for debugging:
+log = get_logger('msodde')
+
+
+# === UNICODE IN PY2 =========================================================
+
+def ensure_stdout_handles_unicode():
+ """ Ensure stdout can handle unicode by wrapping it if necessary
+
+ Required e.g. if output of this script is piped or redirected in a linux
+ shell, since then sys.stdout.encoding is ascii and cannot handle
+ print(unicode). In that case we need to find some compatible encoding and
+ wrap sys.stdout into a encoder following (many thanks!)
+ https://stackoverflow.com/a/1819009 or https://stackoverflow.com/a/20447935
+
+ Can be undone by setting sys.stdout = sys.__stdout__
+ """
+ import codecs
+ import locale
+
+ # do not re-wrap
+ if isinstance(sys.stdout, codecs.StreamWriter):
+ return
+
+ # try to find encoding for sys.stdout
+ encoding = None
+ try:
+ encoding = sys.stdout.encoding # variable encoding might not exist
+ except Exception:
+ pass
+
+ if encoding not in (None, '', 'ascii'):
+ return # no need to wrap
+
+ # try to find an encoding that can handle unicode
+ try:
+ encoding = locale.getpreferredencoding()
+ except Exception:
+ pass
+
+ # fallback if still no encoding available
+ if encoding in (None, '', 'ascii'):
+ encoding = 'utf8'
+
+ # logging is probably not initialized yet, but just in case
+ log.debug('wrapping sys.stdout with encoder using {0}'.format(encoding))
+
+ wrapper = codecs.getwriter(encoding)
+ sys.stdout = wrapper(sys.stdout)
+
+
+ensure_stdout_handles_unicode() # e.g. for print(text) in main()
+
+
+# === ARGUMENT PARSING =======================================================
+
+class ArgParserWithBanner(argparse.ArgumentParser):
+ """ Print banner before showing any error """
+ def error(self, message):
+ print(BANNER)
+ super(ArgParserWithBanner, self).error(message)
+
+
+def existing_file(filename):
+ """ called by argument parser to see whether given file exists """
+ if not os.path.exists(filename):
+ raise argparse.ArgumentTypeError('File {0} does not exist.'
+ .format(filename))
+ return filename
+
+
+def process_args(cmd_line_args=None):
+ """ parse command line arguments (given ones or per default sys.argv) """
+ parser = ArgParserWithBanner(description='A python tool to detect and extract DDE links in MS Office files')
+ parser.add_argument("filepath", help="path of the file to be analyzed",
+ type=existing_file, metavar='FILE')
+ parser.add_argument("--json", '-j', action='store_true',
+ help="Output in json format. Do not use with -ldebug")
+ parser.add_argument("--nounquote", help="don't unquote values",action='store_true')
+ parser.add_argument('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
+ help="logging level debug/info/warning/error/critical (default=%(default)s)")
+
+ return parser.parse_args(cmd_line_args)
+
+
+# === FUNCTIONS ==============================================================
+
+# from [MS-DOC], section 2.8.25 (PlcFld):
+# A field consists of two parts: field instructions and, optionally, a result. All fields MUST begin with
+# Unicode character 0x0013 with sprmCFSpec applied with a value of 1. This is the field begin
+# character. All fields MUST end with a Unicode character 0x0015 with sprmCFSpec applied with a value
+# of 1. This is the field end character. If the field has a result, then there MUST be a Unicode character
+# 0x0014 with sprmCFSpec applied with a value of 1 somewhere between the field begin character and
+# the field end character. This is the field separator. The field result is the content between the field
+# separator and the field end character. The field instructions are the content between the field begin
+# character and the field separator, if one is present, or between the field begin character and the field
+# end character if no separator is present. The field begin character, field end character, and field
+# separator are collectively referred to as field characters.
+
+
+def process_ole_field(data):
+ """ check if field instructions start with DDE
+
+ expects unicode input, returns unicode output (empty if not dde) """
+ #log.debug('processing field \'{0}\''.format(data))
+
+ if data.lstrip().lower().startswith(u'dde'):
+ #log.debug('--> is DDE!')
+ return data
+ elif data.lstrip().lower().startswith(u'\x00d\x00d\x00e\x00'):
+ return data
+ else:
+ return u''
+
+
+OLE_FIELD_START = 0x13
+OLE_FIELD_SEP = 0x14
+OLE_FIELD_END = 0x15
+OLE_FIELD_MAX_SIZE = 1000 # max field size to analyze, rest is ignored
+
+
+def process_ole_stream(stream):
+ """ find dde links in single ole stream
+
+ since ole file stream are subclasses of io.BytesIO, they are buffered, so
+ reading char-wise is not that bad performanc-wise """
+
+ have_start = False
+ have_sep = False
+ field_contents = None
+ result_parts = []
+ max_size_exceeded = False
+ idx = -1
+ while True:
+ idx += 1
+ char = stream.read(1) # loop over every single byte
+ if len(char) == 0:
+ break
+ else:
+ char = ord(char)
+
+ if char == OLE_FIELD_START:
+ have_start = True
+ have_sep = False
+ max_size_exceeded = False
+ field_contents = u''
+ continue
+ elif not have_start:
+ continue
+
+ # now we are after start char but not at end yet
+ if char == OLE_FIELD_SEP:
+ have_sep = True
+ elif char == OLE_FIELD_END:
+ # have complete field now, process it
+ result_parts.append(process_ole_field(field_contents))
+
+ # re-set variables for next field
+ have_start = False
+ have_sep = False
+ field_contents = None
+ elif not have_sep:
+ # check that array does not get too long by accident
+ if max_size_exceeded:
+ pass
+ elif len(field_contents) > OLE_FIELD_MAX_SIZE:
+ log.debug('field exceeds max size of {0}. Ignore rest'
+ .format(OLE_FIELD_MAX_SIZE))
+ max_size_exceeded = True
+
+ # appending a raw byte to a unicode string here. Not clean but
+ # all we do later is check for the ascii-sequence 'DDE' later...
+ elif char < 128:
+ field_contents += unichr(char)
+ else:
+ field_contents += u'?'
+ log.debug('Checked {0} characters, found {1} fields'
+ .format(idx, len(result_parts)))
+
+ return result_parts
+
+
+def process_ole_storage(ole):
+ """ process a "directory" inside an ole file; recursive """
+ results = []
+ for st in ole.listdir(streams=True, storages=True):
+ st_type = ole.get_type(st)
+ if st_type == olefile.STGTY_STREAM: # a stream
+ stream = None
+ links = []
+ try:
+ stream = ole.openstream(st)
+ log.debug('Checking stream {0}'.format(st))
+ links = process_ole_stream(stream)
+ except Exception:
+ raise
+ finally:
+ if stream:
+ stream.close()
+ if links:
+ results.extend(links)
+ elif st_type == olefile.STGTY_STORAGE: # a storage
+ log.debug('Checking storage {0}'.format(st))
+ links = process_ole_storage(st)
+ if links:
+ results.extend(links)
+ else:
+ log.info('unexpected type {0} for entry {1}. Ignore it'
+ .format(st_type, st))
+ continue
+ return results
+
+
+def process_ole(filepath):
+ """
+ find dde links in ole file
+
+ like process_xml, returns a concatenated unicode string of dde links or
+ empty if none were found. dde-links will still being with the dde[auto] key
+ word (possibly after some whitespace)
+ """
+ log.debug('process_ole')
+ ole = olefile.OleFileIO(filepath, path_encoding=None)
+ text_parts = process_ole_storage(ole)
+
+ # mimic behaviour of process_openxml: combine links to single text string
+ return u'\n'.join(text_parts)
+
+
+def process_openxml(filepath):
+ log.debug('process_openxml')
+ all_fields = []
+ z = zipfile.ZipFile(filepath)
+ for filepath in z.namelist():
+ if filepath in LOCATIONS:
+ data = z.read(filepath)
+ fields = process_xml(data)
+ if len(fields) > 0:
+ #print ('DDE Links in %s:'%filepath)
+ #for f in fields:
+ # print(f)
+ all_fields.extend(fields)
+ z.close()
+ return u'\n'.join(all_fields)
+
+def process_xml(data):
+ # parse the XML data:
+ root = ET.fromstring(data)
+ fields = []
+ ddetext = u''
+ level = 0
+ # find all the tags 'w:p':
+ # parse each for begin and end tags, to group DDE strings
+ # fldChar can be in either a w:r element, floating alone in the w:p or spread accross w:p tags
+ # escape DDE if quoted etc
+ # (each is a chunk of a DDE link)
+
+ for subs in root.iter(TAG_W_P):
+ elem = None
+ for e in subs:
+ #check if w:r and if it is parse children elements to pull out the first FLDCHAR or INSTRTEXT
+ if e.tag == TAG_W_R:
+ for child in e:
+ if child.tag == TAG_W_FLDCHAR or child.tag == TAG_W_INSTRTEXT:
+ elem = child
+ break
+ else:
+ elem = e
+ #this should be an error condition
+ if elem is None:
+ continue
+
+ #check if FLDCHARTYPE and whether "begin" or "end" tag
+ if elem.attrib.get(ATTR_W_FLDCHARTYPE) is not None:
+ if elem.attrib[ATTR_W_FLDCHARTYPE] == "begin":
+ level += 1
+ if elem.attrib[ATTR_W_FLDCHARTYPE] == "end":
+ level -= 1
+ if level == 0 or level == -1 : # edge-case where level becomes -1
+ fields.append(ddetext)
+ ddetext = u''
+ level = 0 # reset edge-case
+
+ # concatenate the text of the field, if present:
+ if elem.tag == TAG_W_INSTRTEXT and elem.text is not None:
+ #expand field code if QUOTED
+ ddetext += unquote(elem.text)
+
+ for elem in root.iter(TAG_W_FLDSIMPLE):
+ # concatenate the attribute of the field, if present:
+ if elem.attrib is not None:
+ fields.append(elem.attrib[ATTR_W_INSTR])
+
+ return fields
+
+def unquote(field):
+ if "QUOTE" not in field or NO_QUOTES:
+ return field
+ #split into components
+ parts = field.strip().split(" ")
+ ddestr = ""
+ for p in parts[1:]:
+ try:
+ ch = chr(int(p))
+ except ValueError:
+ ch = p
+ ddestr += ch
+ return ddestr
+
+
+def process_file(filepath):
+ """ decides to either call process_openxml or process_ole """
+ if olefile.isOleFile(filepath):
+ return process_ole(filepath)
+ else:
+ return process_openxml(filepath)
+
+
+#=== MAIN =================================================================
+
+def main(cmd_line_args=None):
+ """ Main function, called if this file is called as a script
+
+ Optional argument: command line arguments to be forwarded to ArgumentParser
+ in process_args. Per default (cmd_line_args=None), sys.argv is used. Option
+ mainly added for unit-testing
+ """
+ args = process_args(cmd_line_args)
+
+ # Setup logging to the console:
+ # here we use stdout instead of stderr by default, so that the output
+ # can be redirected properly.
+ logging.basicConfig(level=LOG_LEVELS[args.loglevel], stream=sys.stdout,
+ format='%(levelname)-8s %(message)s')
+ # enable logging in the modules:
+ log.setLevel(logging.NOTSET)
+
+ if args.json and args.loglevel.lower() == 'debug':
+ log.warning('Debug log output will not be json-compatible!')
+
+ if args.nounquote :
+ global NO_QUOTES
+ NO_QUOTES = True
+
+ if args.json:
+ jout = []
+ jout.append(BANNER_JSON)
+ else:
+ # print banner with version
+ print(BANNER)
+
+ if not args.json:
+ print('Opening file: %s' % args.filepath)
+
+ text = ''
+ return_code = 1
+ try:
+ text = process_file(args.filepath)
+ return_code = 0
+ except Exception as exc:
+ if args.json:
+ jout.append(dict(type='error', error=type(exc).__name__,
+ message=str(exc))) # strange: str(exc) is enclosed in ""
+ else:
+ raise
+
+ if args.json:
+ for line in text.splitlines():
+ if line.strip():
+ jout.append(dict(type='dde-link', link=line.strip()))
+ json.dump(jout, sys.stdout, check_circular=False, indent=4)
+ print() # add a newline after closing "]"
+ return return_code # required if we catch an exception in json-mode
+ else:
+ print ('DDE Links:')
+ print(text)
+
+ return return_code
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/oletools/olebrowse.py b/oletools/olebrowse.py
index ba2e3dc..ccfb0a9 100644
--- a/oletools/olebrowse.py
+++ b/oletools/olebrowse.py
@@ -12,7 +12,7 @@ olebrowse project website: http://www.decalage.info/python/olebrowse
olebrowse is part of the python-oletools package:
http://www.decalage.info/python/oletools
-olebrowse is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info)
+olebrowse is copyright (c) 2012-2017, Philippe Lagadec (http://www.decalage.info)
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
@@ -36,12 +36,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
-__version__ = '0.02'
-
#------------------------------------------------------------------------------
# CHANGELOG:
# 2012-09-17 v0.01 PL: - first version
# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL
+# 2017-04-26 v0.51 PL: - fixed absolute imports (issue #141)
+
+__version__ = '0.51'
#------------------------------------------------------------------------------
# TODO:
@@ -51,10 +52,25 @@ __version__ = '0.02'
# - for a stream, display info: size, path, etc
# - stream info: magic, entropy, ... ?
+# === IMPORTS ================================================================
+
import optparse, sys, os
-from thirdparty.easygui import easygui
-import thirdparty.olefile as olefile
-import ezhexviewer
+
+# IMPORTANT: it should be possible to run oletools directly as scripts
+# in any directory without installing them with pip or setup.py.
+# In that case, relative imports are NOT usable.
+# And to enable Python 2+3 compatibility, we need to use absolute imports,
+# so we add the oletools parent folder to sys.path (absolute+normalized path):
+_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
+# print('_thismodule_dir = %r' % _thismodule_dir)
+_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
+# print('_parent_dir = %r' % _thirdparty_dir)
+if not _parent_dir in sys.path:
+ sys.path.insert(0, _parent_dir)
+
+from oletools.thirdparty.easygui import easygui
+from oletools.thirdparty import olefile
+from oletools import ezhexviewer
ABOUT = '~ About olebrowse'
QUIT = '~ Quit'
diff --git a/oletools/oledir.py b/oletools/oledir.py
index 8ceb4d1..80442e8 100644
--- a/oletools/oledir.py
+++ b/oletools/oledir.py
@@ -2,7 +2,7 @@
"""
oledir.py
-oledir parses OLE files to display technical information about its directory
+oledir parses OLE files to display technical information about their directory
entries, including deleted/orphan streams/storages and unused entries.
Author: Philippe Lagadec - http://www.decalage.info
@@ -14,7 +14,7 @@ http://www.decalage.info/python/oletools
#=== LICENSE ==================================================================
-# oledir is copyright (c) 2015-2016 Philippe Lagadec (http://www.decalage.info)
+# oledir is copyright (c) 2015-2017 Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
@@ -37,6 +37,7 @@ http://www.decalage.info/python/oletools
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+from __future__ import print_function
#------------------------------------------------------------------------------
# CHANGELOG:
@@ -45,8 +46,10 @@ http://www.decalage.info/python/oletools
# 2016-01-13 v0.03 PL: - replaced prettytable by tablestream, added colors
# 2016-07-20 v0.50 SL: - added Python 3 support
# 2016-08-09 PL: - fixed issue #77 (imports from thirdparty dir)
+# 2017-03-08 v0.51 PL: - fixed absolute imports, added optparse
+# - added support for zip files and wildcards
-__version__ = '0.50'
+__version__ = '0.51'
#------------------------------------------------------------------------------
# TODO:
@@ -55,12 +58,22 @@ __version__ = '0.50'
# === IMPORTS ================================================================
-import sys, os
+import sys, os, optparse
-# add the thirdparty subfolder to sys.path (absolute+normalized path):
+# IMPORTANT: it should be possible to run oletools directly as scripts
+# in any directory without installing them with pip or setup.py.
+# In that case, relative imports are NOT usable.
+# And to enable Python 2+3 compatibility, we need to use absolute imports,
+# so we add the oletools parent folder to sys.path (absolute+normalized path):
_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
# print('_thismodule_dir = %r' % _thismodule_dir)
-# assumption: the thirdparty dir is a subfolder:
+_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
+# print('_parent_dir = %r' % _parent_dir)
+if not _parent_dir in sys.path:
+ sys.path.insert(0, _parent_dir)
+
+# we also need the thirdparty dir for colorclass
+# TODO: remove colorclass from thirdparty, make it a dependency
_thirdparty_dir = os.path.normpath(os.path.join(_thismodule_dir, 'thirdparty'))
# print('_thirdparty_dir = %r' % _thirdparty_dir)
if not _thirdparty_dir in sys.path:
@@ -72,12 +85,15 @@ import colorclass
if os.name == 'nt':
colorclass.Windows.enable(auto_colors=True)
-import olefile
-from tablestream import tablestream
+from oletools.thirdparty import olefile
+from oletools.thirdparty.tablestream import tablestream
+from oletools.thirdparty.xglob import xglob
# === CONSTANTS ==============================================================
+BANNER = 'oledir %s - http://decalage.info/python/oletools' % __version__
+
STORAGE_NAMES = {
olefile.STGTY_EMPTY: 'Empty',
olefile.STGTY_STORAGE: 'Storage',
@@ -115,72 +131,104 @@ def sid_display(sid):
# === MAIN ===================================================================
def main():
+ usage = 'usage: oledir [options] [filename2 ...]'
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option("-r", action="store_true", dest="recursive",
+ help='find files recursively in subdirectories.')
+ parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
+ help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)')
+ parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
+ help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
+ # parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
+ # help="logging level debug/info/warning/error/critical (default=%default)")
+
+ # TODO: add logfile option
+
+ (options, args) = parser.parse_args()
+
+ # Print help if no arguments are passed
+ if len(args) == 0:
+ print(BANNER)
+ print(__doc__)
+ parser.print_help()
+ sys.exit()
+
# print banner with version
- print('oledir %s - http://decalage.info/python/oletools' % __version__)
+ print(BANNER)
if os.name == 'nt':
colorclass.Windows.enable(auto_colors=True, reset_atexit=True)
- fname = sys.argv[1]
- print('OLE directory entries in file %s:' % fname)
- ole = olefile.OleFileIO(fname)
- # ole.dumpdirectory()
-
- # t = prettytable.PrettyTable(('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size'))
- # t.align = 'l'
- # t.max_width['id'] = 4
- # t.max_width['Status'] = 6
- # t.max_width['Type'] = 10
- # t.max_width['Name'] = 10
- # t.max_width['Left'] = 5
- # t.max_width['Right'] = 5
- # t.max_width['Child'] = 5
- # t.max_width['1st Sect'] = 8
- # t.max_width['Size'] = 6
-
- table = tablestream.TableStream(column_width=[4, 6, 7, 22, 5, 5, 5, 8, 6],
- header_row=('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size'),
- style=tablestream.TableStyleSlim)
-
- # TODO: read ALL the actual directory entries from the directory stream, because olefile does not!
- # TODO: OR fix olefile!
- # TODO: olefile should store or give access to the raw direntry data on demand
- # TODO: oledir option to hexdump the raw direntries
- # TODO: olefile should be less picky about incorrect directory structures
-
- for id in range(len(ole.direntries)):
- d = ole.direntries[id]
- if d is None:
- # this direntry is not part of the tree: either unused or an orphan
- d = ole._load_direntry(id) #ole.direntries[id]
- # print('%03d: %s *** ORPHAN ***' % (id, d.name))
- if d.entry_type == olefile.STGTY_EMPTY:
- status = 'unused'
- else:
- status = 'ORPHAN'
+ for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
+ zip_password=options.zip_password, zip_fname=options.zip_fname):
+ # ignore directory names stored in zip files:
+ if container and filename.endswith('/'):
+ continue
+ full_name = '%s in %s' % (filename, container) if container else filename
+ print('OLE directory entries in file %s:' % full_name)
+ if data is not None:
+ # data extracted from zip file
+ ole = olefile.OleFileIO(data)
else:
- # print('%03d: %s' % (id, d.name))
- status = ''
- if d.name.startswith('\x00'):
- # this may happen with unused entries, the name may be filled with zeroes
- name = ''
- else:
- # handle non-printable chars using repr(), remove quotes:
- name = repr(d.name)[1:-1]
- left = sid_display(d.sid_left)
- right = sid_display(d.sid_right)
- child = sid_display(d.sid_child)
- entry_type = STORAGE_NAMES.get(d.entry_type, 'Unknown')
- etype_color = STORAGE_COLORS.get(d.entry_type, 'red')
- status_color = STATUS_COLORS.get(status, 'red')
-
- # print(' type=%7s sid_left=%s sid_right=%s sid_child=%s'
- # %(entry_type, left, right, child))
- # t.add_row((id, status, entry_type, name, left, right, child, hex(d.isectStart), d.size))
- table.write_row((id, status, entry_type, name, left, right, child, '%X' % d.isectStart, d.size),
- colors=(None, status_color, etype_color, None, None, None, None, None, None))
- ole.close()
- # print t
+ # normal filename
+ ole = olefile.OleFileIO(filename)
+ # ole.dumpdirectory()
+
+ # t = prettytable.PrettyTable(('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size'))
+ # t.align = 'l'
+ # t.max_width['id'] = 4
+ # t.max_width['Status'] = 6
+ # t.max_width['Type'] = 10
+ # t.max_width['Name'] = 10
+ # t.max_width['Left'] = 5
+ # t.max_width['Right'] = 5
+ # t.max_width['Child'] = 5
+ # t.max_width['1st Sect'] = 8
+ # t.max_width['Size'] = 6
+
+ table = tablestream.TableStream(column_width=[4, 6, 7, 22, 5, 5, 5, 8, 6],
+ header_row=('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size'),
+ style=tablestream.TableStyleSlim)
+
+ # TODO: read ALL the actual directory entries from the directory stream, because olefile does not!
+ # TODO: OR fix olefile!
+ # TODO: olefile should store or give access to the raw direntry data on demand
+ # TODO: oledir option to hexdump the raw direntries
+ # TODO: olefile should be less picky about incorrect directory structures
+
+ for id in range(len(ole.direntries)):
+ d = ole.direntries[id]
+ if d is None:
+ # this direntry is not part of the tree: either unused or an orphan
+ d = ole._load_direntry(id) #ole.direntries[id]
+ # print('%03d: %s *** ORPHAN ***' % (id, d.name))
+ if d.entry_type == olefile.STGTY_EMPTY:
+ status = 'unused'
+ else:
+ status = 'ORPHAN'
+ else:
+ # print('%03d: %s' % (id, d.name))
+ status = ''
+ if d.name.startswith('\x00'):
+ # this may happen with unused entries, the name may be filled with zeroes
+ name = ''
+ else:
+ # handle non-printable chars using repr(), remove quotes:
+ name = repr(d.name)[1:-1]
+ left = sid_display(d.sid_left)
+ right = sid_display(d.sid_right)
+ child = sid_display(d.sid_child)
+ entry_type = STORAGE_NAMES.get(d.entry_type, 'Unknown')
+ etype_color = STORAGE_COLORS.get(d.entry_type, 'red')
+ status_color = STATUS_COLORS.get(status, 'red')
+
+ # print(' type=%7s sid_left=%s sid_right=%s sid_child=%s'
+ # %(entry_type, left, right, child))
+ # t.add_row((id, status, entry_type, name, left, right, child, hex(d.isectStart), d.size))
+ table.write_row((id, status, entry_type, name, left, right, child, '%X' % d.isectStart, d.size),
+ colors=(None, status_color, etype_color, None, None, None, None, None, None))
+ ole.close()
+ # print t
if __name__ == '__main__':
diff --git a/oletools/oleid.py b/oletools/oleid.py
index b6545a7..b0643dd 100644
--- a/oletools/oleid.py
+++ b/oletools/oleid.py
@@ -18,7 +18,7 @@ http://www.decalage.info/python/oletools
#=== LICENSE =================================================================
-# oleid is copyright (c) 2012-2016, Philippe Lagadec (http://www.decalage.info)
+# oleid is copyright (c) 2012-2017, Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
@@ -53,6 +53,7 @@ from __future__ import print_function
# 2014-11-30 v0.03 PL: - improved output with prettytable
# 2016-10-25 v0.50 PL: - fixed print and bytes strings for Python 3
# 2016-12-12 v0.51 PL: - fixed relative imports for Python 3 (issue #115)
+# 2017-04-26 PL: - fixed absolute imports (issue #141)
__version__ = '0.51'
@@ -77,19 +78,20 @@ __version__ = '0.51'
import optparse, sys, os, re, zlib, struct
-try:
- # Relative imports (only works when imported from package):
- from .thirdparty import olefile
- from .thirdparty.prettytable import prettytable
-except:
- # if it does not work, fall back to absolute imports:
- # add this module's folder to sys.path (absolute+normalized path):
- _thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
- if not _thismodule_dir in sys.path:
- sys.path.insert(0, _thismodule_dir)
- # absolute imports:
- from thirdparty import olefile
- from thirdparty.prettytable import prettytable
+# IMPORTANT: it should be possible to run oletools directly as scripts
+# in any directory without installing them with pip or setup.py.
+# In that case, relative imports are NOT usable.
+# And to enable Python 2+3 compatibility, we need to use absolute imports,
+# so we add the oletools parent folder to sys.path (absolute+normalized path):
+_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
+# print('_thismodule_dir = %r' % _thismodule_dir)
+_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
+# print('_parent_dir = %r' % _thirdparty_dir)
+if not _parent_dir in sys.path:
+ sys.path.insert(0, _parent_dir)
+
+from oletools.thirdparty import olefile
+from oletools.thirdparty.prettytable import prettytable
diff --git a/oletools/olemap.py b/oletools/olemap.py
index e8b8489..6f8f51f 100644
--- a/oletools/olemap.py
+++ b/oletools/olemap.py
@@ -13,7 +13,7 @@ http://www.decalage.info/python/oletools
#=== LICENSE ==================================================================
-# olemap is copyright (c) 2015-2016 Philippe Lagadec (http://www.decalage.info)
+# olemap is copyright (c) 2015-2017 Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
@@ -43,25 +43,45 @@ http://www.decalage.info/python/oletools
# 2016-01-13 v0.02 PL: - improved display with tablestream, added colors
# 2016-07-20 v0.50 SL: - added Python 3 support
# 2016-09-05 PL: - added main entry point for setup.py
+# 2017-03-20 v0.51 PL: - fixed absolute imports, added optparse
+# - added support for zip files and wildcards
+# - improved MiniFAT display with tablestream
+# 2017-03-21 PL: - added header display
+# - added options --header, --fat and --minifat
+# 2017-03-22 PL: - added extra data detection, completed header display
+# 2017-03-23 PL: - only display the header by default
+# - added option --exdata to display extra data in hex
-__version__ = '0.50'
+
+__version__ = '0.51'
#------------------------------------------------------------------------------
# TODO:
# === IMPORTS ================================================================
-import sys
-from thirdparty.olefile import olefile
-from thirdparty.tablestream import tablestream
+import sys, os, optparse, binascii
+# IMPORTANT: it should be possible to run oletools directly as scripts
+# in any directory without installing them with pip or setup.py.
+# In that case, relative imports are NOT usable.
+# And to enable Python 2+3 compatibility, we need to use absolute imports,
+# so we add the oletools parent folder to sys.path (absolute+normalized path):
+_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
+# print('_thismodule_dir = %r' % _thismodule_dir)
+_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
+# print('_parent_dir = %r' % _thirdparty_dir)
+if not _parent_dir in sys.path:
+ sys.path.insert(0, _parent_dir)
+from oletools.thirdparty.olefile import olefile
+from oletools.thirdparty.tablestream import tablestream
+from oletools.thirdparty.xglob import xglob
+from oletools.ezhexviewer import hexdump3
-def sid_display(sid):
- if sid == olefile.NOSTREAM:
- return None
- else:
- return sid
+# === CONSTANTS ==============================================================
+
+BANNER = 'olemap %s - http://decalage.info/python/oletools' % __version__
STORAGE_NAMES = {
olefile.STGTY_EMPTY: 'Empty',
@@ -88,37 +108,188 @@ FAT_COLORS = {
}
-# === MAIN ===================================================================
+# === FUNCTIONS ==============================================================
-def main():
- # print banner with version
- print('olemap %s - http://decalage.info/python/oletools' % __version__)
+def sid_display(sid):
+ if sid == olefile.NOSTREAM:
+ return None
+ else:
+ return sid
+
+
+def show_header(ole, extra_data=False):
+ print("OLE HEADER:")
+ t = tablestream.TableStream([24, 16, 79-(4+24+16)], header_row=['Attribute', 'Value', 'Description'])
+ t.write_row(['OLE Signature (hex)', binascii.b2a_hex(ole.header_signature).upper(), 'Should be D0CF11E0A1B11AE1'])
+ t.write_row(['Header CLSID (hex)', binascii.b2a_hex(ole.header_clsid).upper(), 'Should be 0'])
+ t.write_row(['Minor Version', '%04X' % ole.minor_version, 'Should be 003E'])
+ t.write_row(['Major Version', '%04X' % ole.dll_version, 'Should be 3 or 4'])
+ t.write_row(['Byte Order', '%04X' % ole.byte_order, 'Should be FFFE (little endian)'])
+ t.write_row(['Sector Shift', '%04X' % ole.sector_shift, 'Should be 0009 or 000C'])
+ t.write_row(['# of Dir Sectors', ole.num_dir_sectors, 'Should be 0 if major version is 3'])
+ t.write_row(['# of FAT Sectors', ole.num_fat_sectors, ''])
+ t.write_row(['First Dir Sector', '%08X' % ole.first_dir_sector, '(hex)'])
+ t.write_row(['Transaction Sig Number', ole.transaction_signature_number, 'Should be 0'])
+ t.write_row(['MiniStream cutoff', ole.mini_stream_cutoff_size, 'Should be 4096 bytes'])
+ t.write_row(['First MiniFAT Sector', '%08X' % ole.first_mini_fat_sector, '(hex)'])
+ t.write_row(['# of MiniFAT Sectors', ole.num_mini_fat_sectors, ''])
+ t.write_row(['First DIFAT Sector', '%08X' % ole.first_difat_sector, '(hex)'])
+ t.write_row(['# of DIFAT Sectors', ole.num_difat_sectors, ''])
+ t.close()
+ print('')
+ print("CALCULATED ATTRIBUTES:")
+ t = tablestream.TableStream([24, 16, 79-(4+24+16)], header_row=['Attribute', 'Value', 'Description'])
+ t.write_row(['Sector Size (bytes)', ole.sector_size, 'Should be 512 or 4096 bytes'])
+ t.write_row(['Actual File Size (bytes)', ole._filesize, 'Real file size on disk'])
+ num_sectors_per_fat_sector = ole.sector_size/4
+ num_sectors_in_fat = num_sectors_per_fat_sector * ole.num_fat_sectors
+ # Need to add one sector for the header:
+ max_filesize_fat = (num_sectors_in_fat + 1) * ole.sector_size
+ t.write_row(['Max File Size in FAT', max_filesize_fat, 'Max file size covered by FAT'])
+ if ole._filesize > max_filesize_fat:
+ extra_size_beyond_fat = ole._filesize - max_filesize_fat
+ color = 'red'
+ else:
+ extra_size_beyond_fat = 0
+ color = None
+ t.write_row(['Extra data beyond FAT', extra_size_beyond_fat, 'Only if file is larger than FAT coverage'],
+ colors=[color, color, color])
+ # Find the last used sector:
+ # By default, it's the last sector in the FAT
+ last_used_sector = len(ole.fat)-1
+ for i in range(len(ole.fat)-1, 0, -1):
+ last_used_sector = i
+ if ole.fat[i] != olefile.FREESECT:
+ break
+ # Extra data would start at the next sector
+ offset_extra_data = ole.sectorsize * (last_used_sector + 2)
+ t.write_row(['Extra data offset in FAT', '%08X' % offset_extra_data, 'Offset of the 1st free sector at end of FAT'])
+ extra_data_size = ole._filesize - offset_extra_data
+ color = 'red' if extra_data_size > 0 else None
+ t.write_row(['Extra data size', extra_data_size, 'Size of data starting at the 1st free sector at end of FAT'],
+ colors=[color, color, color])
+ t.close()
+ print('')
- fname = sys.argv[1]
- ole = olefile.OleFileIO(fname)
+ if extra_data:
+ # hex dump of extra data
+ print('HEX DUMP OF EXTRA DATA:\n')
+ if extra_data_size <= 0:
+ print('No extra data found at end of file.')
+ else:
+ ole.fp.seek(offset_extra_data)
+ # read until end of file:
+ exdata = ole.fp.read()
+ assert len(exdata) == extra_data_size
+ print(hexdump3(exdata, length=16, startindex=offset_extra_data))
+ print('')
+
+def show_fat(ole):
print('FAT:')
t = tablestream.TableStream([8, 12, 8, 8], header_row=['Sector #', 'Type', 'Offset', 'Next #'])
- for i in range(ole.nb_sect):
+ for i in range(len(ole.fat)):
fat_value = ole.fat[i]
fat_type = FAT_TYPES.get(fat_value, '')
color_type = FAT_COLORS.get(fat_value, FAT_COLORS['default'])
# compute offset based on sector size:
- offset = ole.sectorsize * (i+1)
+ offset = ole.sectorsize * (i + 1)
# print '%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value)
t.write_row(['%8X' % i, fat_type, '%08X' % offset, '%8X' % fat_value],
- colors=[None, color_type, None, None])
+ colors=[None, color_type, None, None])
+ t.close()
print('')
+
+def show_minifat(ole):
print('MiniFAT:')
# load MiniFAT if it wasn't already done:
ole.loadminifat()
+ t = tablestream.TableStream([8, 12, 8, 8], header_row=['Sector #', 'Type', 'Offset', 'Next #'])
for i in range(len(ole.minifat)):
fat_value = ole.minifat[i]
fat_type = FAT_TYPES.get(fat_value, 'Data')
- print('%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value))
+ color_type = FAT_COLORS.get(fat_value, FAT_COLORS['default'])
+ # TODO: compute offset
+ # print('%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value))
+ t.write_row(['%8X' % i, fat_type, 'N/A', '%8X' % fat_value],
+ colors=[None, color_type, None, None])
+ t.close()
+ print('')
+
+# === MAIN ===================================================================
+
+def main():
+ usage = 'usage: olemap [options] [filename2 ...]'
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option("-r", action="store_true", dest="recursive",
+ help='find files recursively in subdirectories.')
+ parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
+ help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)')
+ parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
+ help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
+ # parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
+ # help="logging level debug/info/warning/error/critical (default=%default)")
+ parser.add_option("--header", action="store_true", dest="header",
+ help='Display the OLE header (default: yes)')
+ parser.add_option("--fat", action="store_true", dest="fat",
+ help='Display the FAT (default: no)')
+ parser.add_option("--minifat", action="store_true", dest="minifat",
+ help='Display the MiniFAT (default: no)')
+ parser.add_option('-x', "--exdata", action="store_true", dest="extra_data",
+ help='Display a hex dump of extra data at end of file')
+
+ # TODO: add logfile option
+
+ (options, args) = parser.parse_args()
+
+ # Print help if no arguments are passed
+ if len(args) == 0:
+ print(BANNER)
+ print(__doc__)
+ parser.print_help()
+ sys.exit()
+
+ # if no display option is provided, set defaults:
+ default_options = False
+ if not (options.header or options.fat or options.minifat):
+ options.header = True
+ # options.fat = True
+ # options.minifat = True
+ default_options = True
+
+ # print banner with version
+ print(BANNER)
+
+ for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
+ zip_password=options.zip_password, zip_fname=options.zip_fname):
+ # TODO: handle xglob errors
+ # ignore directory names stored in zip files:
+ if container and filename.endswith('/'):
+ continue
+ full_name = '%s in %s' % (filename, container) if container else filename
+ print("-" * 79)
+ print('FILE: %s\n' % full_name)
+ if data is not None:
+ # data extracted from zip file
+ ole = olefile.OleFileIO(data)
+ else:
+ # normal filename
+ ole = olefile.OleFileIO(filename)
+
+ if options.header:
+ show_header(ole, extra_data=options.extra_data)
+ if options.fat:
+ show_fat(ole)
+ if options.minifat:
+ show_minifat(ole)
+
+ ole.close()
+
+ # if no display option is provided, print a tip:
+ if default_options:
+ print('To display the FAT or MiniFAT structures, use options --fat or --minifat, and -h for help.')
- ole.close()
if __name__ == '__main__':
main()
diff --git a/oletools/olemeta.py b/oletools/olemeta.py
index 1ad0abd..7ae8b3e 100644
--- a/oletools/olemeta.py
+++ b/oletools/olemeta.py
@@ -15,7 +15,7 @@ http://www.decalage.info/python/oletools
#=== LICENSE =================================================================
-# olemeta is copyright (c) 2013-2016, Philippe Lagadec (http://www.decalage.info)
+# olemeta is copyright (c) 2013-2017, Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
@@ -47,31 +47,42 @@ http://www.decalage.info/python/oletools
# 2016-09-06 v0.50 PL: - added main entry point for setup.py
# 2016-10-25 PL: - fixed print for Python 3
# 2016-10-28 PL: - removed the UTF8 codec for console display
+# 2017-04-26 v0.51 PL: - fixed absolute imports (issue #141)
+# 2017-05-04 PL: - added optparse and xglob (issue #141)
-__version__ = '0.50'
+__version__ = '0.51'
#------------------------------------------------------------------------------
# TODO:
-# + optparse
# + nicer output: table with fixed columns, datetime, etc
# + CSV output
# + option to only show available properties (by default)
+# + display codepage names
#=== IMPORTS =================================================================
-import sys, codecs
-import thirdparty.olefile as olefile
-from thirdparty.tablestream import tablestream
+import sys, os, optparse
+# IMPORTANT: it should be possible to run oletools directly as scripts
+# in any directory without installing them with pip or setup.py.
+# In that case, relative imports are NOT usable.
+# And to enable Python 2+3 compatibility, we need to use absolute imports,
+# so we add the oletools parent folder to sys.path (absolute+normalized path):
+_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
+# print('_thismodule_dir = %r' % _thismodule_dir)
+_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
+# print('_parent_dir = %r' % _thirdparty_dir)
+if not _parent_dir in sys.path:
+ sys.path.insert(0, _parent_dir)
-#=== MAIN =================================================================
+from oletools.thirdparty import olefile
+from oletools.thirdparty import xglob
+from oletools.thirdparty.tablestream import tablestream
-def main():
- try:
- ole = olefile.OleFileIO(sys.argv[1])
- except IndexError:
- sys.exit(__doc__)
+#=== MAIN =================================================================
+
+def process_ole(ole):
# parse and display metadata:
meta = ole.get_metadata()
@@ -114,7 +125,53 @@ def main():
t.write_row([prop, value], colors=[None, 'yellow'])
t.close()
- ole.close()
+
+# === MAIN ===================================================================
+
+def main():
+ # print banner with version
+ print('olemeta %s - http://decalage.info/python/oletools' % __version__)
+ print ('THIS IS WORK IN PROGRESS - Check updates regularly!')
+ print ('Please report any issue at https://github.com/decalage2/oletools/issues')
+
+ usage = 'usage: olemeta [options] [filename2 ...]'
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option("-r", action="store_true", dest="recursive",
+ help='find files recursively in subdirectories.')
+ parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
+ help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)')
+ parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
+ help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
+
+ # TODO: add logfile option
+ # parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
+ # help="logging level debug/info/warning/error/critical (default=%default)")
+
+ (options, args) = parser.parse_args()
+
+ # Print help if no arguments are passed
+ if len(args) == 0:
+ print(__doc__)
+ parser.print_help()
+ sys.exit()
+
+ for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
+ zip_password=options.zip_password, zip_fname=options.zip_fname):
+ # TODO: handle xglob errors
+ # ignore directory names stored in zip files:
+ if container and filename.endswith('/'):
+ continue
+ full_name = '%s in %s' % (filename, container) if container else filename
+ print("=" * 79)
+ print('FILE: %s\n' % full_name)
+ if data is not None:
+ # data extracted from zip file
+ ole = olefile.OleFileIO(data)
+ else:
+ # normal filename
+ ole = olefile.OleFileIO(filename)
+ process_ole(ole)
+ ole.close()
if __name__ == '__main__':
- main()
\ No newline at end of file
+ main()
diff --git a/oletools/oleobj.py b/oletools/oleobj.py
index 50a48ff..1b54ccb 100644
--- a/oletools/oleobj.py
+++ b/oletools/oleobj.py
@@ -15,7 +15,7 @@ http://www.decalage.info/python/oletools
# === LICENSE ==================================================================
-# oleobj is copyright (c) 2015-2016 Philippe Lagadec (http://www.decalage.info)
+# oleobj is copyright (c) 2015-2017 Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
@@ -47,6 +47,7 @@ http://www.decalage.info/python/oletools
# 2016-07-19 PL: - fixed Python 2.6-7 support
# 2016-11-17 v0.51 PL: - fixed OLE native object extraction
# 2016-11-18 PL: - added main for setup.py entry point
+# 2017-05-03 PL: - fixed absolute imports (issue #141)
__version__ = '0.51'
@@ -70,8 +71,20 @@ __version__ = '0.51'
import logging, struct, optparse, os, re, sys
-from thirdparty.olefile import olefile
-from thirdparty.xglob import xglob
+# IMPORTANT: it should be possible to run oletools directly as scripts
+# in any directory without installing them with pip or setup.py.
+# In that case, relative imports are NOT usable.
+# And to enable Python 2+3 compatibility, we need to use absolute imports,
+# so we add the oletools parent folder to sys.path (absolute+normalized path):
+_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
+# print('_thismodule_dir = %r' % _thismodule_dir)
+_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
+# print('_parent_dir = %r' % _thirdparty_dir)
+if not _parent_dir in sys.path:
+ sys.path.insert(0, _parent_dir)
+
+from oletools.thirdparty.olefile import olefile
+from oletools.thirdparty.xglob import xglob
# === LOGGING =================================================================
@@ -114,6 +127,14 @@ def get_logger(name, level=logging.CRITICAL+1):
# a global logger object used for debugging:
log = get_logger('oleobj')
+def enable_logging():
+ """
+ Enable logging for this module (disabled by default).
+ This will set the module-specific logger level to NOTSET, which
+ means the main application controls the actual logging level.
+ """
+ log.setLevel(logging.NOTSET)
+
# === CONSTANTS ==============================================================
@@ -290,6 +311,9 @@ class OleObject (object):
:param data: bytes, OLE 1.0 Object structure containing an OLE object
:return:
"""
+ # from ezhexviewer import hexdump3
+ # print("Parsing OLE object data:")
+ # print(hexdump3(data, length=16))
# Header: see MS-OLEDS 2.2.4 ObjectHeader
self.ole_version, data = read_uint32(data)
self.format_id, data = read_uint32(data)
diff --git a/oletools/oletimes.py b/oletools/oletimes.py
index 5970c8c..a00ce3d 100644
--- a/oletools/oletimes.py
+++ b/oletools/oletimes.py
@@ -16,7 +16,7 @@ http://www.decalage.info/python/oletools
#=== LICENSE =================================================================
-# oletimes is copyright (c) 2013-2016, Philippe Lagadec (http://www.decalage.info)
+# oletimes is copyright (c) 2013-2017, Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
@@ -48,61 +48,109 @@ http://www.decalage.info/python/oletools
# 2014-11-30 v0.03 PL: - improved output with prettytable
# 2016-07-20 v0.50 SL: - added Python 3 support
# 2016-09-05 PL: - added main entry point for setup.py
+# 2017-05-03 v0.51 PL: - fixed absolute imports (issue #141)
+# 2017-05-04 PL: - added optparse and xglob (issue #141)
-__version__ = '0.50'
+__version__ = '0.51'
#------------------------------------------------------------------------------
# TODO:
-# + optparse
# + nicer output: table with fixed columns, datetime, etc
# + CSV output
# + option to only show available timestamps (by default?)
#=== IMPORTS =================================================================
-import sys, datetime
-import thirdparty.olefile as olefile
-from thirdparty.prettytable import prettytable
+import sys, os, optparse
+# IMPORTANT: it should be possible to run oletools directly as scripts
+# in any directory without installing them with pip or setup.py.
+# In that case, relative imports are NOT usable.
+# And to enable Python 2+3 compatibility, we need to use absolute imports,
+# so we add the oletools parent folder to sys.path (absolute+normalized path):
+_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
+# print('_thismodule_dir = %r' % _thismodule_dir)
+_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
+# print('_parent_dir = %r' % _thirdparty_dir)
+if not _parent_dir in sys.path:
+ sys.path.insert(0, _parent_dir)
-# === MAIN ===================================================================
+from oletools.thirdparty import olefile
+from oletools.thirdparty import xglob
+from oletools.thirdparty.prettytable import prettytable
-def main():
- # print banner with version
- print('oletimes %s - http://decalage.info/python/oletools' % __version__)
- try:
- ole = olefile.OleFileIO(sys.argv[1])
- except IndexError:
- sys.exit(__doc__)
+# === FUNCTIONS ==============================================================
- def dt2str (dt):
- """
- Convert a datetime object to a string for display, without microseconds
+def dt2str(dt):
+ """
+ Convert a datetime object to a string for display, without microseconds
- :param dt: datetime.datetime object, or None
- :return: str, or None
- """
- if dt is None:
- return None
- dt = dt.replace(microsecond = 0)
- return str(dt)
+ :param dt: datetime.datetime object, or None
+ :return: str, or None
+ """
+ if dt is None:
+ return None
+ dt = dt.replace(microsecond=0)
+ return str(dt)
+
+def process_ole(ole):
t = prettytable.PrettyTable(['Stream/Storage name', 'Modification Time', 'Creation Time'])
t.align = 'l'
t.max_width = 26
- #t.border = False
-
- #print'- Root mtime=%s ctime=%s' % (ole.root.getmtime(), ole.root.getctime())
t.add_row(('Root', dt2str(ole.root.getmtime()), dt2str(ole.root.getctime())))
-
for obj in ole.listdir(streams=True, storages=True):
- #print '- %s: mtime=%s ctime=%s' % (repr('/'.join(obj)), ole.getmtime(obj), ole.getctime(obj))
t.add_row((repr('/'.join(obj)), dt2str(ole.getmtime(obj)), dt2str(ole.getctime(obj))))
-
print(t)
- ole.close()
+
+# === MAIN ===================================================================
+
+def main():
+ # print banner with version
+ print('oletimes %s - http://decalage.info/python/oletools' % __version__)
+ print ('THIS IS WORK IN PROGRESS - Check updates regularly!')
+ print ('Please report any issue at https://github.com/decalage2/oletools/issues')
+
+ usage = 'usage: oletimes [options] [filename2 ...]'
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option("-r", action="store_true", dest="recursive",
+ help='find files recursively in subdirectories.')
+ parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
+ help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)')
+ parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
+ help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
+
+ # TODO: add logfile option
+ # parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
+ # help="logging level debug/info/warning/error/critical (default=%default)")
+
+ (options, args) = parser.parse_args()
+
+ # Print help if no arguments are passed
+ if len(args) == 0:
+ print(__doc__)
+ parser.print_help()
+ sys.exit()
+
+ for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
+ zip_password=options.zip_password, zip_fname=options.zip_fname):
+ # TODO: handle xglob errors
+ # ignore directory names stored in zip files:
+ if container and filename.endswith('/'):
+ continue
+ full_name = '%s in %s' % (filename, container) if container else filename
+ print("=" * 79)
+ print('FILE: %s\n' % full_name)
+ if data is not None:
+ # data extracted from zip file
+ ole = olefile.OleFileIO(data)
+ else:
+ # normal filename
+ ole = olefile.OleFileIO(filename)
+ process_ole(ole)
+ ole.close()
if __name__ == '__main__':
main()
diff --git a/oletools/olevba.py b/oletools/olevba.py
index ae0032b..d94a0b1 100644
--- a/oletools/olevba.py
+++ b/oletools/olevba.py
@@ -26,7 +26,7 @@ https://github.com/unixfreak0037/officeparser
# === LICENSE ==================================================================
-# olevba is copyright (c) 2014-2016 Philippe Lagadec (http://www.decalage.info)
+# olevba is copyright (c) 2014-2017 Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
@@ -188,8 +188,18 @@ from __future__ import print_function
# 2016-09-12 PL: - enabled packrat to improve pyparsing performance
# 2016-10-25 PL: - fixed raise and print statements for Python 3
# 2016-11-03 v0.51 PL: - added EnumDateFormats and EnumSystemLanguageGroupsW
-
-__version__ = '0.51a'
+# 2017-02-07 PL: - temporary fix for issue #132
+# - added keywords for Mac-specific macros (issue #130)
+# 2017-03-08 PL: - fixed absolute imports
+# 2017-03-16 PL: - fixed issues #148 and #149 for option --reveal
+# 2017-05-19 PL: - added enable_logging to fix issue #154
+# 2017-05-31 c1fe: - PR #135 fixing issue #132 for some Mac files
+# 2017-06-08 PL: - fixed issue #122 Chr() with negative numbers
+# 2017-06-15 PL: - deobfuscation line by line to handle large files
+# 2017-07-11 v0.52 PL: - raise exception instead of sys.exit (issue #180)
+# 2017-11-08 VB: - PR #124 adding user form parsing (Vincent Brillault)
+
+__version__ = '0.52dev3'
#------------------------------------------------------------------------------
# TODO:
@@ -223,7 +233,9 @@ __version__ = '0.51a'
#--- IMPORTS ------------------------------------------------------------------
-import sys, logging
+import sys
+import os
+import logging
import struct
import cStringIO
import math
@@ -256,15 +268,28 @@ except ImportError:
from oleform import extract_OleFormVariables
-import thirdparty.olefile as olefile
-from thirdparty.prettytable import prettytable
-from thirdparty.xglob import xglob, PathNotFoundException
-from thirdparty.pyparsing.pyparsing import \
+# IMPORTANT: it should be possible to run oletools directly as scripts
+# in any directory without installing them with pip or setup.py.
+# In that case, relative imports are NOT usable.
+# And to enable Python 2+3 compatibility, we need to use absolute imports,
+# so we add the oletools parent folder to sys.path (absolute+normalized path):
+_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
+# print('_thismodule_dir = %r' % _thismodule_dir)
+_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
+# print('_parent_dir = %r' % _thirdparty_dir)
+if not _parent_dir in sys.path:
+ sys.path.insert(0, _parent_dir)
+
+from oletools.thirdparty import olefile
+from oletools.thirdparty.prettytable import prettytable
+from oletools.thirdparty.xglob import xglob, PathNotFoundException
+from oletools.thirdparty.pyparsing.pyparsing import \
CaselessKeyword, CaselessLiteral, Combine, Forward, Literal, \
Optional, QuotedString,Regex, Suppress, Word, WordStart, \
alphanums, alphas, hexnums,nums, opAssoc, srange, \
infixNotation, ParserElement
-import ppt_parser
+from oletools import ppt_parser
+
# monkeypatch email to fix issue #32:
# allow header lines without ":"
@@ -330,6 +355,18 @@ def get_logger(name, level=logging.CRITICAL+1):
log = get_logger('olevba')
+def enable_logging():
+ """
+ Enable logging for this module (disabled by default).
+ This will set the module-specific logger level to NOTSET, which
+ means the main application controls the actual logging level.
+ """
+ log.setLevel(logging.NOTSET)
+ # Also enable logging in the ppt_parser module:
+ ppt_parser.enable_logging()
+
+
+
#=== EXCEPTIONS ==============================================================
class OlevbaBaseException(Exception):
@@ -387,10 +424,17 @@ class UnexpectedDataError(OlevbaBaseException):
""" raised when parsing is strict (=not relaxed) and data is unexpected """
def __init__(self, stream_path, variable, expected, value):
+ if isinstance(expected, int):
+ es = '{0:04X}'.format(expected)
+ elif isinstance(expected, tuple):
+ es = ','.join('{0:04X}'.format(e) for e in expected)
+ es = '({0})'.format(es)
+ else:
+ raise ValueError('Unknown type encountered: {0}'.format(type(expected)))
super(UnexpectedDataError, self).__init__(
'Unexpected value in {0} for variable {1}: '
- 'expected {2:04X} but found {3:04X}!'
- .format(stream_path, variable, expected, value))
+ 'expected {2} but found {3:04X}!'
+ .format(stream_path, variable, es, value))
self.stream_path = stream_path
self.variable = variable
self.expected = expected
@@ -520,6 +564,11 @@ SUSPICIOUS_KEYWORDS = {
'May run an executable file or a system command':
('Shell', 'vbNormal', 'vbNormalFocus', 'vbHide', 'vbMinimizedFocus', 'vbMaximizedFocus', 'vbNormalNoFocus',
'vbMinimizedNoFocus', 'WScript.Shell', 'Run', 'ShellExecute'),
+ # MacScript: see https://msdn.microsoft.com/en-us/library/office/gg264812.aspx
+ 'May run an executable file or a system command on a Mac':
+ ('MacScript',),
+ 'May run an executable file or a system command on a Mac (if combined with libc.dylib)':
+ ('system', 'popen', r'exec[lv][ep]?'),
#Shell: http://msdn.microsoft.com/en-us/library/office/gg278437%28v=office.15%29.aspx
#WScript.Shell+Run sample: http://pastebin.com/Z4TMyuq6
'May run PowerShell commands':
@@ -550,8 +599,11 @@ SUSPICIOUS_KEYWORDS = {
'May enumerate application windows (if combined with Shell.Application object)':
('Windows', 'FindWindow'),
'May run code from a DLL':
- #TODO: regex to find declare+lib on same line
+ #TODO: regex to find declare+lib on same line - see mraptor
('Lib',),
+ 'May run code from a library on a Mac':
+ #TODO: regex to find declare+lib on same line - see mraptor
+ ('libc.dylib', 'dylib'),
'May inject code into another process':
('CreateThread', 'VirtualAlloc', # (issue #9) suggested by Davy Douhine - used by MSF payload
'VirtualAllocEx', 'RtlMoveMemory',
@@ -723,7 +775,7 @@ class VbaExpressionString(str):
# NOTE: here Combine() is required to avoid spaces between elements
# NOTE: here WordStart is necessary to avoid matching a number preceded by
# letters or underscore (e.g. "VBT1" or "ABC_34"), when using scanString
-decimal_literal = Combine(WordStart(vba_identifier_chars) + Word(nums)
+decimal_literal = Combine(Optional('-') + WordStart(vba_identifier_chars) + Word(nums)
+ Suppress(Optional(Word('%&^', exact=1))))
decimal_literal.setParseAction(lambda t: int(t[0]))
@@ -1411,15 +1463,27 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
reference_sizeof_name = struct.unpack(" decoded=%r' % (encoded, decoded)
- # remove parentheses and quotes from original string:
- # if encoded.startswith('(') and encoded.endswith(')'):
- # encoded = encoded[1:-1]
- # if encoded.startswith('"') and encoded.endswith('"'):
- # encoded = encoded[1:-1]
- # avoid duplicates and simple strings:
- if encoded not in found and decoded != encoded:
- results.append((encoded, decoded))
- found.add(encoded)
- # else:
- # print 'VBA STRING: encoded=%r => decoded=%r' % (encoded, decoded)
+ # Split the VBA code line by line to avoid MemoryError on large scripts:
+ for vba_line in vba_code.splitlines():
+ for tokens, start, end in vba_expr_str.scanString(vba_line):
+ encoded = vba_line[start:end]
+ decoded = tokens[0]
+ if isinstance(decoded, VbaExpressionString):
+ # This is a VBA expression, not a simple string
+ # print 'VBA EXPRESSION: encoded=%r => decoded=%r' % (encoded, decoded)
+ # remove parentheses and quotes from original string:
+ # if encoded.startswith('(') and encoded.endswith(')'):
+ # encoded = encoded[1:-1]
+ # if encoded.startswith('"') and encoded.endswith('"'):
+ # encoded = encoded[1:-1]
+ # avoid duplicates and simple strings:
+ if encoded not in found and decoded != encoded:
+ results.append((encoded, decoded))
+ found.add(encoded)
+ # else:
+ # print 'VBA STRING: encoded=%r => decoded=%r' % (encoded, decoded)
return results
@@ -1957,19 +2027,19 @@ def json2ascii(json_obj, encoding='utf8', errors='replace'):
return json_obj
-_have_printed_json_start = False
-
-def print_json(json_dict=None, _json_is_last=False, **json_parts):
+def print_json(json_dict=None, _json_is_first=False, _json_is_last=False,
+ **json_parts):
""" line-wise print of json.dumps(json2ascii(..)) with options and indent+1
can use in two ways:
(1) print_json(some_dict)
(2) print_json(key1=value1, key2=value2, ...)
+ :param bool _json_is_first: set to True only for very first entry to complete
+ the top-level json-list
:param bool _json_is_last: set to True only for very last entry to complete
the top-level json-list
"""
- global _have_printed_json_start
if json_dict and json_parts:
raise ValueError('Invalid json argument: want either single dict or '
@@ -1981,9 +2051,8 @@ def print_json(json_dict=None, _json_is_last=False, **json_parts):
if json_parts:
json_dict = json_parts
- if not _have_printed_json_start:
+ if _json_is_first:
print('[')
- _have_printed_json_start = True
lines = json.dumps(json2ascii(json_dict), check_circular=False,
indent=4, ensure_ascii=False).splitlines()
@@ -2483,7 +2552,6 @@ class VBA_Parser(object):
"""
log.info('Check whether OLE file is PPT')
- ppt_parser.enable_logging()
try:
ppt = ppt_parser.PptParser(self.ole_file, fast_fail=True)
for vba_data in ppt.iter_vba_data():
@@ -2493,7 +2561,7 @@ class VBA_Parser(object):
self.ole_file.close() # just in case
self.ole_file = None # required to make other methods look at ole_subfiles
self.type = TYPE_PPT
- except Exception as exc:
+ except (ppt_parser.PptUnexpectedData, ValueError) as exc:
if self.container == 'PptParser':
# this is a subfile of a ppt --> to be expected that is no ppt
log.debug('PPT subfile is not a PPT file')
@@ -2704,12 +2772,16 @@ class VBA_Parser(object):
vba_stream_ids = set()
for vba_root, project_path, dir_path in self.vba_projects:
# extract all VBA macros from that VBA root storage:
- for stream_path, vba_filename, vba_code in \
- _extract_vba(self.ole_file, vba_root, project_path,
- dir_path, self.relaxed):
- # store direntry ids in a set:
- vba_stream_ids.add(self.ole_file._find(stream_path))
- yield (self.filename, stream_path, vba_filename, vba_code)
+ # The function _extract_vba may fail on some files (issue #132)
+ try:
+ for stream_path, vba_filename, vba_code in \
+ _extract_vba(self.ole_file, vba_root, project_path,
+ dir_path, self.relaxed):
+ # store direntry ids in a set:
+ vba_stream_ids.add(self.ole_file._find(stream_path))
+ yield (self.filename, stream_path, vba_filename, vba_code)
+ except Exception as e:
+ log.exception('Error in _extract_vba')
# Also look for VBA code in any stream including orphans
# (happens in some malformed files)
ole = self.ole_file
@@ -2796,14 +2868,23 @@ class VBA_Parser(object):
# based on the length of the encoded string, in reverse order:
analysis = sorted(analysis, key=lambda type_decoded_encoded: len(type_decoded_encoded[2]), reverse=True)
# normally now self.vba_code_all_modules contains source code from all modules
- deobf_code = self.vba_code_all_modules
+ # Need to collapse long lines:
+ deobf_code = vba_collapse_long_lines(self.vba_code_all_modules)
+ deobf_code = filter_vba(deobf_code)
for kw_type, decoded, encoded in analysis:
if kw_type == 'VBA string':
#print '%3d occurences: %r => %r' % (deobf_code.count(encoded), encoded, decoded)
# need to add double quotes around the decoded strings
# after escaping double-quotes as double-double-quotes for VBA:
decoded = decoded.replace('"', '""')
- deobf_code = deobf_code.replace(encoded, '"%s"' % decoded)
+ decoded = '"%s"' % decoded
+ # if the encoded string is enclosed in parentheses,
+ # keep them in the decoded version:
+ if encoded.startswith('(') and encoded.endswith(')'):
+ decoded = '(%s)' % decoded
+ deobf_code = deobf_code.replace(encoded, decoded)
+ # # TODO: there is a bug somewhere which creates double returns '\r\r'
+ # deobf_code = deobf_code.replace('\r\r', '\r')
return deobf_code
#TODO: repasser l'analyse plusieurs fois si des chaines hex ou base64 sont revelees
@@ -3213,10 +3294,9 @@ class VBA_Parser_CLI(VBA_Parser):
#=== MAIN =====================================================================
-def main():
- """
- Main function, called when olevba is run from the command line
- """
+def parse_args(cmd_line_args=None):
+ """ parse command line arguments (given ones or per default sys.argv) """
+
DEFAULT_LOG_LEVEL = "warning" # Default log level
LOG_LEVELS = {
'debug': logging.DEBUG,
@@ -3226,7 +3306,7 @@ def main():
'critical': logging.CRITICAL
}
- usage = 'usage: %prog [options] [filename2 ...]'
+ usage = 'usage: olevba [options] [filename2 ...]'
parser = optparse.OptionParser(usage=usage)
# parser.add_option('-o', '--outfile', dest='outfile',
# help='output file')
@@ -3268,26 +3348,43 @@ def main():
parser.add_option('--relaxed', dest="relaxed", action="store_true", default=False,
help="Do not raise errors if opening of substream fails")
- (options, args) = parser.parse_args()
+ (options, args) = parser.parse_args(cmd_line_args)
# Print help if no arguments are passed
if len(args) == 0:
+ print('olevba %s - http://decalage.info/python/oletools' % __version__)
print(__doc__)
parser.print_help()
sys.exit(RETURN_WRONG_ARGS)
+ options.loglevel = LOG_LEVELS[options.loglevel]
+
+ return options, args
+
+
+def main(cmd_line_args=None):
+ """
+ Main function, called when olevba is run from the command line
+
+ Optional argument: command line arguments to be forwarded to ArgumentParser
+ in process_args. Per default (cmd_line_args=None), sys.argv is used. Option
+ mainly added for unit-testing
+ """
+
+ options, args = parse_args(cmd_line_args)
+
# provide info about tool and its version
if options.output_mode == 'json':
- # prints opening [
+ # print first json entry with meta info and opening '['
print_json(script_name='olevba', version=__version__,
url='http://decalage.info/python/oletools',
- type='MetaInformation')
+ type='MetaInformation', _json_is_first=True)
else:
print('olevba %s - http://decalage.info/python/oletools' % __version__)
- logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s')
+ logging.basicConfig(level=options.loglevel, format='%(levelname)-8s %(message)s')
# enable logging in the modules:
- log.setLevel(logging.NOTSET)
+ enable_logging()
# Old display with number of items detected:
# print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr')
diff --git a/oletools/olevba3.py b/oletools/olevba3.py
index 69e5f86..708f207 100644
--- a/oletools/olevba3.py
+++ b/oletools/olevba3.py
@@ -26,7 +26,7 @@ https://github.com/unixfreak0037/officeparser
# === LICENSE ==================================================================
-# olevba is copyright (c) 2014-2016 Philippe Lagadec (http://www.decalage.info)
+# olevba is copyright (c) 2014-2017 Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
@@ -189,8 +189,9 @@ from __future__ import print_function
# 2016-10-25 PL: - fixed raise and print statements for Python 3
# 2016-10-25 PL: - fixed regex bytes strings (PR/issue #100)
# 2016-11-03 v0.51 PL: - added EnumDateFormats and EnumSystemLanguageGroupsW
+# 2017-04-26 PL: - fixed absolute imports
-__version__ = '0.51a'
+__version__ = '0.51'
#------------------------------------------------------------------------------
# TODO:
@@ -224,7 +225,7 @@ __version__ = '0.51a'
#--- IMPORTS ------------------------------------------------------------------
-import sys, logging
+import sys, logging, os
import struct
from _io import StringIO,BytesIO
import math
@@ -255,14 +256,26 @@ except ImportError:
+ "see http://codespeak.net/lxml " \
+ "or http://effbot.org/zone/element-index.htm")
-import oletools.thirdparty.olefile as olefile
+# IMPORTANT: it should be possible to run oletools directly as scripts
+# in any directory without installing them with pip or setup.py.
+# In that case, relative imports are NOT usable.
+# And to enable Python 2+3 compatibility, we need to use absolute imports,
+# so we add the oletools parent folder to sys.path (absolute+normalized path):
+_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
+# print('_thismodule_dir = %r' % _thismodule_dir)
+_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
+# print('_parent_dir = %r' % _thirdparty_dir)
+if not _parent_dir in sys.path:
+ sys.path.insert(0, _parent_dir)
+
+from oletools.thirdparty import olefile
from oletools.thirdparty.prettytable import prettytable
from oletools.thirdparty.xglob import xglob, PathNotFoundException
from oletools.thirdparty.pyparsing.pyparsing import \
CaselessKeyword, CaselessLiteral, Combine, Forward, Literal, \
Optional, QuotedString,Regex, Suppress, Word, WordStart, \
alphanums, alphas, hexnums,nums, opAssoc, srange, \
- infixNotation
+ infixNotation, ParserElement
import oletools.ppt_parser as ppt_parser
# monkeypatch email to fix issue #32:
@@ -287,6 +300,25 @@ else:
# xrange is now called range:
xrange = range
+
+# === PYTHON 3.0 - 3.4 SUPPORT ======================================================
+
+# From https://gist.github.com/ynkdir/867347/c5e188a4886bc2dd71876c7e069a7b00b6c16c61
+
+if sys.version_info >= (3, 0) and sys.version_info < (3, 5):
+ import codecs
+
+ _backslashreplace_errors = codecs.lookup_error("backslashreplace")
+
+ def backslashreplace_errors(exc):
+ if isinstance(exc, UnicodeDecodeError):
+ u = "".join("\\x{0:02x}".format(c) for c in exc.object[exc.start:exc.end])
+ return (u, exc.end)
+ return _backslashreplace_errors(exc)
+
+ codecs.register_error("backslashreplace", backslashreplace_errors)
+
+
# === LOGGING =================================================================
class NullHandler(logging.Handler):
@@ -1535,7 +1567,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
modulename_id = struct.unpack("253:
+ data_cropped = data_cropped[:254]
+ # append a space so that the regex can check the following character:
+ data_cropped += b' '
+ # m = re_control_word.match(self.data, self.index, self.index+253)
+ m = re_control_word.match(data_cropped)
if m:
cword = m.group(1)
param = None
if len(m.groups()) > 1:
param = m.group(2)
- # log.debug('control word %r at index %Xh - cword=%r param=%r' % (m.group(), self.index, cword, param))
+ # log.debug('control word at index %Xh - cword=%r param=%r %r' % (self.index, cword, param, m.group()))
self._control_word(m, cword, param)
self.index += len(m.group())
# if it's \bin, call _bin after updating index
if cword == b'bin':
self._bin(m, param)
continue
+ # Otherwise, it may be a control symbol:
m = re_control_symbol.match(self.data, self.index)
if m:
self.control_symbol(m)
self.index += len(m.group())
continue
+ # Otherwise, this is plain text:
+ # Use a regex to match all characters until the next brace or backslash:
m = re_text.match(self.data, self.index)
if m:
self._text(m)
self.index += len(m.group())
continue
raise RuntimeError('Should not have reached this point - index=%Xh' % self.index)
- self.end_of_file()
+ # call _end_of_file to make sure all groups are closed properly
+ self._end_of_file()
def _open_group(self):
@@ -430,6 +486,8 @@ class RtfParser(object):
def _control_word(self, matchobject, cword, param):
#log.debug('control word %r at index %Xh' % (matchobject.group(), self.index))
+ # TODO: according to RTF specs v1.9.1, "Destination changes are legal only immediately after an opening brace ({)"
+ # (not counting the special control symbol \*, of course)
if cword in DESTINATION_CONTROL_WORDS:
# log.debug('%r is a destination control word: starting a new destination' % cword)
self._open_destination(matchobject, cword)
@@ -454,9 +512,19 @@ class RtfParser(object):
def _bin(self, matchobject, param):
binlen = int(param)
+ # handle negative length
+ if binlen < 0:
+ log.warn('Detected anti-analysis trick: \\bin object with negative length at index %X' % self.index)
+ # binlen = int(param.strip('-'))
+ # According to my tests, if the bin length is negative,
+ # it should be treated as a null length:
+ binlen=0
+ # ignore optional space after \bin
+ if self.data[self.index] == ' ':
+ log.debug('\\bin: ignoring whitespace before data')
+ self.index += 1
log.debug('\\bin: reading %d bytes of binary data' % binlen)
- # TODO: handle optional space?
- # TODO: handle negative length, and length greater than data
+ # TODO: handle length greater than data
bindata = self.data[self.index:self.index + binlen]
self.index += binlen
self.bin(bindata)
@@ -468,7 +536,7 @@ class RtfParser(object):
# log.debug('%Xh Reached End of File')
# close any group/destination that is still open:
while self.group_level > 0:
- # log.debug('Group Level = %d, closing group' % self.group_level)
+ log.debug('Group Level = %d, closing group' % self.group_level)
self._close_group()
self.end_of_file()
@@ -517,6 +585,7 @@ class RtfObjParser(RtfParser):
self.objects = []
def open_destination(self, destination):
+ # TODO: detect when the destination is within an objdata, report as obfuscation
if destination.cword == b'objdata':
log.debug('*** Start object data at index %Xh' % destination.start)
@@ -530,10 +599,10 @@ class RtfObjParser(RtfParser):
# Filter out all whitespaces first (just ignored):
hexdata1 = destination.data.translate(None, b' \t\r\n\f\v')
# Then filter out any other non-hex character:
- hexdata = re.sub(b'[^a-hA-H0-9]', b'', hexdata1)
+ hexdata = re.sub(b'[^a-fA-F0-9]', b'', hexdata1)
if len(hexdata) < len(hexdata1):
# this is only for debugging:
- nonhex = re.sub(b'[a-hA-H0-9]', b'', hexdata1)
+ nonhex = re.sub(b'[a-fA-F0-9]', b'', hexdata1)
log.debug('Found non-hex chars in hexdata: %r' % nonhex)
# MS Word accepts an extra hex digit, so we need to trim it if present:
if len(hexdata) & 1:
@@ -573,6 +642,7 @@ class RtfObjParser(RtfParser):
# TODO: extract useful cwords such as objclass
# TODO: keep track of cwords inside objdata, because it is unusual and indicates potential obfuscation
# TODO: same with control symbols, and opening bracket
+ # log.debug('- Control word "%s", param=%s, level=%d' % (cword, param, self.group_level))
pass
@@ -649,11 +719,22 @@ def process_file(container, filename, data, output_dir=None, save_object=False):
rtfp = RtfObjParser(data)
rtfp.parse()
for rtfobj in rtfp.objects:
+ ole_color = None
pkg_color = None
if rtfobj.is_ole:
- ole_column = 'format_id: %d\n' % rtfobj.format_id
+ ole_column = 'format_id: %d ' % rtfobj.format_id
+ if rtfobj.format_id == oleobj.OleObject.TYPE_EMBEDDED:
+ ole_column += '(Embedded)\n'
+ elif rtfobj.format_id == oleobj.OleObject.TYPE_LINKED:
+ ole_column += '(Linked)\n'
+ else:
+ ole_column += '(Unknown)\n'
ole_column += 'class name: %r\n' % rtfobj.class_name
- ole_column += 'data size: %d' % rtfobj.oledata_size
+ # if the object is linked and not embedded, data_size=None:
+ if rtfobj.oledata_size is None:
+ ole_column += 'data size: N/A'
+ else:
+ ole_column += 'data size: %d' % rtfobj.oledata_size
if rtfobj.is_package:
pkg_column = 'Filename: %r\n' % rtfobj.filename
pkg_column += 'Source path: %r\n' % rtfobj.src_path
@@ -667,6 +748,11 @@ def process_file(container, filename, data, output_dir=None, save_object=False):
pkg_column += '\nEXECUTABLE FILE'
else:
pkg_column = 'Not an OLE Package'
+ # Detect OLE2Link exploit
+ # http://www.kb.cert.org/vuls/id/921560
+ if rtfobj.class_name == 'OLE2Link':
+ ole_color = 'red'
+ ole_column += '\nPossibly an exploit for the OLE2Link vulnerability (VU#921560, CVE-2017-0199)'
else:
pkg_column = ''
ole_column = 'Not a well-formed OLE object'
@@ -676,7 +762,7 @@ def process_file(container, filename, data, output_dir=None, save_object=False):
'%08Xh' % rtfobj.start,
ole_column,
pkg_column
- ), colors=(None, None, None, pkg_color)
+ ), colors=(None, None, ole_color, pkg_color)
)
tstream.write_sep()
if save_object:
@@ -703,7 +789,8 @@ def process_file(container, filename, data, output_dir=None, save_object=False):
fname = '%s_object_%08X.noname' % (fname_prefix, rtfobj.start)
print(' saving to file %s' % fname)
open(fname, 'wb').write(rtfobj.olepkgdata)
- elif rtfobj.is_ole:
+ # When format_id=TYPE_LINKED, oledata_size=None
+ elif rtfobj.is_ole and rtfobj.oledata_size is not None:
print('Saving file embedded in OLE object #%d:' % i)
print(' format_id = %d' % rtfobj.format_id)
print(' class name = %r' % rtfobj.class_name)
@@ -789,7 +876,7 @@ def main():
format='%(levelname)-8s %(message)s')
# enable logging in the modules:
log.setLevel(logging.NOTSET)
- oleobj.log.setLevel(logging.NOTSET)
+ oleobj.enable_logging()
for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
zip_password=options.zip_password, zip_fname=options.zip_fname):
diff --git a/oletools/thirdparty/olefile/olefile.py b/oletools/thirdparty/olefile/olefile.py
index cd40472..1c4b50a 100644
--- a/oletools/thirdparty/olefile/olefile.py
+++ b/oletools/thirdparty/olefile/olefile.py
@@ -1,25 +1,26 @@
-#!/usr/bin/env python
+"""
+olefile (formerly OleFileIO_PL)
-# olefile (formerly OleFileIO_PL)
-#
-# Module to read/write Microsoft OLE2 files (also called Structured Storage or
-# Microsoft Compound Document File Format), such as Microsoft Office 97-2003
-# documents, Image Composer and FlashPix files, Outlook messages, ...
-# This version is compatible with Python 2.6+ and 3.x
-#
-# Project website: http://www.decalage.info/olefile
-#
-# olefile is copyright (c) 2005-2016 Philippe Lagadec (http://www.decalage.info)
-#
-# olefile is based on the OleFileIO module from the PIL library v1.1.6
-# See: http://www.pythonware.com/products/pil/index.htm
-#
-# The Python Imaging Library (PIL) is
-# Copyright (c) 1997-2005 by Secret Labs AB
-# Copyright (c) 1995-2005 by Fredrik Lundh
-#
-# See source code and LICENSE.txt for information on usage and redistribution.
+Module to read/write Microsoft OLE2 files (also called Structured Storage or
+Microsoft Compound Document File Format), such as Microsoft Office 97-2003
+documents, Image Composer and FlashPix files, Outlook messages, ...
+This version is compatible with Python 2.6+ and 3.x
+
+Project website: https://www.decalage.info/olefile
+
+olefile is copyright (c) 2005-2017 Philippe Lagadec
+(https://www.decalage.info)
+olefile is based on the OleFileIO module from the PIL library v1.1.7
+See: http://www.pythonware.com/products/pil/index.htm
+and http://svn.effbot.org/public/tags/pil-1.1.7/PIL/OleFileIO.py
+
+The Python Imaging Library (PIL) is
+Copyright (c) 1997-2009 by Secret Labs AB
+Copyright (c) 1995-2009 by Fredrik Lundh
+
+See source code and LICENSE.txt for information on usage and redistribution.
+"""
# Since OleFileIO_PL v0.30, only Python 2.6+ and 3.x is supported
# This import enables print() as a function rather than a keyword
@@ -28,14 +29,10 @@
from __future__ import print_function # This version of olefile requires Python 2.6+ or 3.x.
-__author__ = "Philippe Lagadec"
-__date__ = "2016-04-26"
-__version__ = '0.44'
-
#--- LICENSE ------------------------------------------------------------------
-# olefile (formerly OleFileIO_PL) is copyright (c) 2005-2016 Philippe Lagadec
-# (http://www.decalage.info)
+# olefile (formerly OleFileIO_PL) is copyright (c) 2005-2017 Philippe Lagadec
+# (https://www.decalage.info)
#
# All rights reserved.
#
@@ -66,8 +63,8 @@ __version__ = '0.44'
# Imaging Library (PIL) published by Fredrik Lundh under the following license:
# The Python Imaging Library (PIL) is
-# Copyright (c) 1997-2005 by Secret Labs AB
-# Copyright (c) 1995-2005 by Fredrik Lundh
+# Copyright (c) 1997-2009 by Secret Labs AB
+# Copyright (c) 1995-2009 by Fredrik Lundh
#
# By obtaining, using, and/or copying this software and/or its associated
# documentation, you agree that you have read, understood, and will comply with
@@ -138,7 +135,7 @@ __version__ = '0.44'
# 2009-12-11 v0.20 PL: - bugfix in OleFileIO.open when filename is not plain str
# 2010-01-22 v0.21 PL: - added support for big-endian CPUs such as PowerPC Macs
# 2012-02-16 v0.22 PL: - fixed bug in getproperties, patch by chuckleberryfinn
-# (https://bitbucket.org/decalage/olefileio_pl/issue/7)
+# (https://github.com/decalage2/olefile/issues/7)
# - added close method to OleFileIO (fixed issue #2)
# 2012-07-25 v0.23 PL: - added support for file-like objects (patch by mete0r_kr)
# 2013-05-05 v0.24 PL: - getproperties: added conversion from filetime to python
@@ -196,6 +193,16 @@ __version__ = '0.44'
# 2016-04-27 - added support for incomplete streams and incorrect
# directory entries (to read malformed documents)
# 2016-05-04 - fixed slight bug in OleStream
+# 2016-11-27 DR: - added method to get the clsid of a storage/stream
+# (Daniel Roethlisberger)
+# 2017-05-31 v0.45 BS: - PR #114 from oletools to handle excessive number of
+# properties:
+# https://github.com/decalage2/oletools/pull/114
+# 2017-07-11 PL: - ignore incorrect ByteOrder (issue #70)
+
+__date__ = "2017-07-11"
+__version__ = '0.45dev2'
+__author__ = "Philippe Lagadec"
#-----------------------------------------------------------------------------
# TODO (for version 1.0):
@@ -223,7 +230,7 @@ __version__ = '0.44'
# - see also original notes and FIXME below
# - remove all obsolete FIXMEs
# - OleMetadata: fix version attrib according to
-# http://msdn.microsoft.com/en-us/library/dd945671%28v=office.12%29.aspx
+# https://msdn.microsoft.com/en-us/library/dd945671%28v=office.12%29.aspx
# IDEAS:
# - in OleFileIO._open and OleStream, use size=None instead of 0x7FFFFFFF for
@@ -238,8 +245,8 @@ __version__ = '0.44'
# - create a simple OLE explorer with wxPython
# FUTURE EVOLUTIONS to add write support:
-# see issue #6 on Bitbucket:
-# https://bitbucket.org/decalage/olefileio_pl/issue/6/improve-olefileio_pl-to-write-ole-files
+# see issue #6 on GitHub:
+# https://github.com/decalage2/olefile/issues/6
#-----------------------------------------------------------------------------
# NOTES from PIL 1.1.6:
@@ -268,6 +275,10 @@ __version__ = '0.44'
#------------------------------------------------------------------------------
+__all__ = ['isOleFile', 'OleFileIO', 'OleMetadata', 'enable_logging',
+ 'MAGIC', 'STGTY_EMPTY',
+ 'STGTY_STREAM', 'STGTY_STORAGE', 'STGTY_ROOT', 'STGTY_PROPERTY',
+ 'STGTY_LOCKBYTES', 'MINIMAL_OLEFILE_SIZE', 'NOSTREAM']
import io
import sys
@@ -317,17 +328,10 @@ else:
#[PL] These workarounds were inspired from the Path module
# (see http://www.jorendorff.com/articles/python/path/)
-#TODO: test with old Python versions
-
-# Pre-2.3 workaround for basestring.
try:
basestring
except NameError:
- try:
- # is Unicode supported (Python >2.0 or >1.6 ?)
- basestring = (str, unicode)
- except NameError:
- basestring = str
+ basestring = str
#[PL] Experimental setting: if True, OLE filenames will be kept in Unicode
# if False (default PIL behaviour), all filenames are converted to Latin-1.
@@ -395,27 +399,27 @@ def enable_logging():
#=== CONSTANTS ===============================================================
-# magic bytes that should be at the beginning of every OLE file:
+#: magic bytes that should be at the beginning of every OLE file:
MAGIC = b'\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1'
#[PL]: added constants for Sector IDs (from AAF specifications)
-MAXREGSECT = 0xFFFFFFFA # (-6) maximum SECT
-DIFSECT = 0xFFFFFFFC # (-4) denotes a DIFAT sector in a FAT
-FATSECT = 0xFFFFFFFD # (-3) denotes a FAT sector in a FAT
-ENDOFCHAIN = 0xFFFFFFFE # (-2) end of a virtual stream chain
-FREESECT = 0xFFFFFFFF # (-1) unallocated sector
+MAXREGSECT = 0xFFFFFFFA #: (-6) maximum SECT
+DIFSECT = 0xFFFFFFFC #: (-4) denotes a DIFAT sector in a FAT
+FATSECT = 0xFFFFFFFD #: (-3) denotes a FAT sector in a FAT
+ENDOFCHAIN = 0xFFFFFFFE #: (-2) end of a virtual stream chain
+FREESECT = 0xFFFFFFFF #: (-1) unallocated sector
#[PL]: added constants for Directory Entry IDs (from AAF specifications)
-MAXREGSID = 0xFFFFFFFA # (-6) maximum directory entry ID
-NOSTREAM = 0xFFFFFFFF # (-1) unallocated directory entry
+MAXREGSID = 0xFFFFFFFA #: (-6) maximum directory entry ID
+NOSTREAM = 0xFFFFFFFF #: (-1) unallocated directory entry
#[PL] object types in storage (from AAF specifications)
-STGTY_EMPTY = 0 # empty directory entry (according to OpenOffice.org doc)
-STGTY_STORAGE = 1 # element is a storage object
-STGTY_STREAM = 2 # element is a stream object
-STGTY_LOCKBYTES = 3 # element is an ILockBytes object
-STGTY_PROPERTY = 4 # element is an IPropertyStorage object
-STGTY_ROOT = 5 # element is a root storage
+STGTY_EMPTY = 0 #: empty directory entry
+STGTY_STORAGE = 1 #: element is a storage object
+STGTY_STREAM = 2 #: element is a stream object
+STGTY_LOCKBYTES = 3 #: element is an ILockBytes object
+STGTY_PROPERTY = 4 #: element is an IPropertyStorage object
+STGTY_ROOT = 5 #: element is a root storage
# Unknown size for a stream (used by OleStream):
UNKNOWN_SIZE = 0x7FFFFFFF
@@ -472,7 +476,13 @@ def isOleFile (filename):
"""
Test if a file is an OLE container (according to the magic bytes in its header).
- :param filename: string-like or file-like object, OLE file to parse
+ .. note::
+ This function only checks the first 8 bytes of the file, not the
+ rest of the OLE structure.
+
+ .. versionadded:: 0.16
+
+ :param filename: filename, contents or file-like object of the OLE file (string-like or file-like object)
- if filename is a string smaller than 1536 bytes, it is the path
of the file to open. (bytes or unicode string)
@@ -481,7 +491,9 @@ def isOleFile (filename):
- if filename is a file-like object (with read and seek methods),
it is parsed as-is.
+ :type filename: bytes or str or unicode or file
:returns: True if OLE, False otherwise.
+ :rtype: bool
"""
# check if filename is a string-like or file-like object:
if hasattr(filename, 'read'):
@@ -494,7 +506,8 @@ def isOleFile (filename):
header = filename[:len(MAGIC)]
else:
# string-like object: filename of file on disk
- header = open(filename, 'rb').read(len(MAGIC))
+ with open(filename, 'rb') as fp:
+ header = fp.read(len(MAGIC))
if header == MAGIC:
return True
else:
@@ -511,8 +524,6 @@ else:
return c if c.__class__ is int else c[0]
-#TODO: replace i16 and i32 with more readable struct.unpack equivalent?
-
def i16(c, o = 0):
"""
Converts a 2-bytes (16 bits) string to an integer.
@@ -520,7 +531,7 @@ def i16(c, o = 0):
:param c: string containing bytes to convert
:param o: offset of bytes to convert in string
"""
- return i8(c[o]) | (i8(c[o+1])<<8)
+ return struct.unpack("
+
+useruser202017-10-26T09:10:00Z2017-10-26T09:10:00Z1392502128816TestThis is a harmless test document.It contains neither macros nor dde links nor embedded viruses nor links to evil web pages. Not even a single insult. Boring!Just to make things slightly interesting, however, we add some ünicöde-ßtringß and different text sizes, colors and fonts
\ No newline at end of file
diff --git a/tests/test-data/msodde-doc/harmless-clean.doc b/tests/test-data/msodde-doc/harmless-clean.doc
new file mode 100644
index 0000000..38fcf72
--- /dev/null
+++ b/tests/test-data/msodde-doc/harmless-clean.doc
diff --git a/tests/test-data/msodde-doc/harmless-clean.docm b/tests/test-data/msodde-doc/harmless-clean.docm
new file mode 100644
index 0000000..f234cae
--- /dev/null
+++ b/tests/test-data/msodde-doc/harmless-clean.docm
diff --git a/tests/test-data/msodde-doc/harmless-clean.docx b/tests/test-data/msodde-doc/harmless-clean.docx
new file mode 100644
index 0000000..59099f3
--- /dev/null
+++ b/tests/test-data/msodde-doc/harmless-clean.docx
diff --git a/tests/test-data/msodde-doc/harmless-clean.xml b/tests/test-data/msodde-doc/harmless-clean.xml
new file mode 100644
index 0000000..cd1e53c
--- /dev/null
+++ b/tests/test-data/msodde-doc/harmless-clean.xml
@@ -0,0 +1,3 @@
+
+
+TestThis is a harmless test document.It contains neither macros nor dde links nor embedded viruses nor links to evil web pages. Not even a single insult. Boring!Just to make things slightly interesting, however, we add some ünicöde-ßtringß and different text sizes, colors and fontsNormal0139250Microsoft Office Word021falseTitel1false288falsefalse16.0000useruser22017-10-26T09:10:00Z2017-10-26T09:10:00Z
\ No newline at end of file
diff --git a/tests/test-data/rtfobj/issue_185.rtf b/tests/test-data/rtfobj/issue_185.rtf
new file mode 100644
index 0000000..3a1da01
--- /dev/null
+++ b/tests/test-data/rtfobj/issue_185.rtf
@@ -0,0 +1 @@
+{\rt{\object\objautlink\objupdate\rsltpict\objw37542\objh829\objscalex59286\objscaley86308{\*\objclass \'77}{\*\objdata 32\bin6 FF}}}
\ No newline at end of file
diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py
new file mode 100644
index 0000000..fca8642
--- /dev/null
+++ b/tests/test_utils/__init__.py
@@ -0,0 +1,6 @@
+from .output_capture import OutputCapture
+
+from os.path import dirname, join
+
+# Directory with test data, independent of current working directory
+DATA_BASE_DIR = join(dirname(dirname(__file__)), 'test-data')
diff --git a/tests/test_utils/output_capture.py b/tests/test_utils/output_capture.py
new file mode 100644
index 0000000..686bc38
--- /dev/null
+++ b/tests/test_utils/output_capture.py
@@ -0,0 +1,50 @@
+""" class OutputCapture to test what scripts print to stdout """
+
+from __future__ import print_function
+import sys
+
+
+# python 2/3 version conflict:
+if sys.version_info.major <= 2:
+ from StringIO import StringIO
+else:
+ from io import StringIO
+
+class OutputCapture:
+ """ context manager that captures stdout
+
+ use as follows::
+
+ with OutputCapture() as capturer:
+ run_my_script(some_args)
+
+ # either test line-by-line ...
+ for line in capturer:
+ some_test(line)
+ # ...or test all output in one go
+ some_test(capturer.buffer.getvalue())
+
+ """
+
+ def __init__(self):
+ self.buffer = StringIO()
+ self.orig_stdout = None
+
+ def __enter__(self):
+ # replace sys.stdout with own buffer.
+ self.orig_stdout = sys.stdout
+ sys.stdout = self.buffer
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ sys.stdout = self.orig_stdout # re-set to original
+
+ if exc_type: # there has been an error
+ print('Got error during output capture!')
+ print('Print captured output and re-raise:')
+ for line in self.buffer.getvalue().splitlines():
+ print(line.rstrip()) # print output before re-raising
+
+ def __iter__(self):
+ for line in self.buffer.getvalue().splitlines():
+ yield line.rstrip() # remove newline at end of line
diff --git a/tests/test_utils/testdata_reader.py b/tests/test_utils/testdata_reader.py
new file mode 100644
index 0000000..4445024
--- /dev/null
+++ b/tests/test_utils/testdata_reader.py
@@ -0,0 +1,8 @@
+import os
+from os.path import dirname, abspath, normpath, join
+from . import DATA_BASE_DIR
+
+
+def read(relative_path):
+ with open(join(DATA_BASE_DIR, relative_path), 'rb') as file_handle:
+ return file_handle.read()
diff --git a/tests/unittest_template.py b/tests/unittest_template.py
new file mode 100644
index 0000000..a5c2cb6
--- /dev/null
+++ b/tests/unittest_template.py
@@ -0,0 +1,37 @@
+""" Test my new feature
+
+Some more info if you want
+
+Should work with python2 and python3!
+"""
+
+import unittest
+
+# if you need data from oletools/test-data/DIR/, uncomment these lines:
+#from os.path import join, dirname, normpath
+#Directory with test data, independent of current working directory
+#DATA_DIR = normpath(join(dirname(__file__), '..', 'test-data', 'DIR'))
+
+
+class TestMyFeature(unittest.TestCase):
+ """ Tests my cool new feature """
+
+ def test_this(self):
+ """ check that this works """
+ pass # your code here
+
+ def test_that(self):
+ """ check that that also works """
+ pass # your code here
+
+ def helper_function(self, filename):
+ """ to be called from other test functions to avoid copy-and-paste
+
+ this is not called by unittest directly, only from your functions """
+ pass # your code here
+ # e.g.: msodde.main(join(DATA_DIR, filename))
+
+
+# just in case somebody calls this file as a script
+if __name__ == '__main__':
+ unittest.main()