diff --git a/.travis.yml b/.travis.yml index de2b1f7..a0a8f50 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,5 +17,8 @@ matrix: - python: pypy - python: pypy3 +install: + - pip install msoffcrypto-tool + script: - python setup.py test diff --git a/INSTALL.txt b/INSTALL.txt index 62e4a4e..f1b4a46 100644 --- a/INSTALL.txt +++ b/INSTALL.txt @@ -1,16 +1,12 @@ -How to Download and Install python-oletools -=========================================== +How to Download and Install oletools +==================================== Pre-requisites -------------- -The recommended Python version to run oletools is Python 2.7. -Python 2.6 is also supported, but as it is not tested as often as 2.7, some features -might not work as expected. - -Since v0.50, oletools can also run with Python 3.x. As this is quite new, please -report any issue you may encounter. - +The recommended Python version to run oletools is the latest **Python 3.x** (3.7 for now). +Python 2.7 is still supported, but as it will become end of life in 2020 (see https://pythonclock.org/), it is highly +recommended to switch to Python 3 now. Recommended way to Download+Install/Update oletools: pip -------------------------------------------------------- @@ -23,7 +19,11 @@ system, either upgrade Python or see https://pip.pypa.io/en/stable/installing/ To download and install/update the latest release version of oletools, run the following command in a shell: +```text sudo -H pip install -U oletools +``` + +Replace `pip` by `pip3` or `pip2` to install on a specific Python version. **Important**: Since version 0.50, pip will automatically create convenient command-line scripts in /usr/local/bin to run all the oletools from any directory. @@ -33,7 +33,19 @@ in /usr/local/bin to run all the oletools from any directory. To download and install/update the latest release version of oletools, run the following command in a cmd window: +```text pip install -U oletools +``` + +Replace `pip` by `pip3` or `pip2` to install on a specific Python version. + +**Note**: with Python 3, you may need to open a cmd window with Administrator privileges in order to run pip +and install for all users. If that is not possible, you may also install only for the current user +by adding the `--user` option: + +```text +pip3 install -U --user oletools +``` **Important**: Since version 0.50, pip will automatically create convenient command-line scripts to run all the oletools from any directory: olevba, mraptor, oleid, rtfobj, etc. @@ -47,18 +59,33 @@ you may also use pip: ### Linux, Mac OSX, Unix +```text sudo -H pip install -U https://github.com/decalage2/oletools/archive/master.zip +``` + +Replace `pip` by `pip3` or `pip2` to install on a specific Python version. ### Windows +```text pip install -U https://github.com/decalage2/oletools/archive/master.zip +``` + +Replace `pip` by `pip3` or `pip2` to install on a specific Python version. + +**Note**: with Python 3, you may need to open a cmd window with Administrator privileges in order to run pip +and install for all users. If that is not possible, you may also install only for the current user +by adding the `--user` option: +```text +pip3 install -U --user https://github.com/decalage2/oletools/archive/master.zip +``` How to install offline - Computer without Internet access --------------------------------------------------------- First, download the oletools archive on a computer with Internet access: -* Latest stable version: from https://github.com/decalage2/oletools/releases +* Latest stable version: from https://pypi.org/project/oletools/ or https://github.com/decalage2/oletools/releases * Development version: https://github.com/decalage2/oletools/archive/master.zip Copy the archive file to the target computer. @@ -66,11 +93,15 @@ Copy the archive file to the target computer. On Linux, Mac OSX, Unix, run the following command using the filename of the archive that you downloaded: +```text sudo -H pip install -U oletools.zip +``` On Windows: +```text pip install -U oletools.zip +``` Old school install using setup.py @@ -88,9 +119,12 @@ Then extract the archive, open a shell and go to the oletools directory. ### Linux, Mac OSX, Unix +```text sudo -H python setup.py install +``` ### Windows: +```text python setup.py install - +``` diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..896a57a --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,52 @@ +This license applies to the python-oletools package, apart from the thirdparty folder which contains third-party files +published with their own license. + +The python-oletools package is copyright (c) 2012-2019 Philippe Lagadec (http://www.decalage.info) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +---------- + +olevba contains modified source code from the officeparser project, published +under the following MIT License (MIT): + +officeparser is copyright (c) 2014 John William Davison + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..b08e1e6 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,14 @@ +include install.bat +include INSTALL.txt +include README.md +include requirements.txt +include oletools/README.rst +include oletools/README.html +include oletools/LICENSE.txt +include oletools/DocVarDump.vba +recursive-include oletools/thirdparty *.* +recursive-include cheatsheet *.* +global-exclude *.pyc + +recursive-include tests *.py +graft tests/test-data diff --git a/README.md b/README.md index 69b7b81..163bc95 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,25 @@ Note: python-oletools is not related to OLETools published by BeCubed Software. News ---- -- **2018-05-30 v0.53**: +- **2019-05-22 v0.54.2**: + - bugfix release: fixed several issues related to encrypted documents + and XLM/XLF Excel 4 macros + - msoffcrypto-tool is now installed by default to handle encrypted documents + - olevba and msodde now handle documents encrypted with common passwords such + as 123, 1234, 4321, 12345, 123456, VelvetSweatShop automatically. +- **2019-04-04 v0.54**: + - olevba, msodde: added support for encrypted MS Office files + - olevba: added detection and extraction of XLM/XLF Excel 4 macros (thanks to plugin_biff from Didier Stevens' oledump) + - olevba, mraptor: added detection of VBA running Excel 4 macros + - olevba: detect and display special characters such as backspace + - olevba: colorized output showing suspicious keywords in the VBA code + - olevba, mraptor: full Python 3 compatibility, no separate olevba3/mraptor3 anymore + - olevba: improved handling of code pages and unicode + - olevba: fixed a false-positive in VBA macro detection + - rtfobj: improved OLE Package handling, improved Equation object detection + - oleobj: added detection of external links to objects in OpenXML + - replaced third party packages by PyPI dependencies +- 2018-05-30 v0.53: - olevba and mraptor can now parse Word/PowerPoint 2007+ pure XML files (aka Flat OPC format) - improved support for VBA forms in olevba (oleform) - rtfobj now displays the CLSID of OLE objects, which is the best way to identify them. Known-bad CLSIDs such as MS Equation Editor are highlighted in red. @@ -75,26 +93,38 @@ Projects using oletools: ------------------------ oletools are used by a number of projects and online malware analysis services, -including [Viper](http://viper.li/), [REMnux](https://remnux.org/), +including +[ACE](https://github.com/IntegralDefense/ACE), +[Anlyz.io](https://sandbox.anlyz.io/), +[AssemblyLine](https://www.cse-cst.gc.ca/en/assemblyline), +[CAPE](https://github.com/ctxis/CAPE), +[Cuckoo Sandbox](https://github.com/cuckoosandbox/cuckoo), +[DARKSURGEON](https://github.com/cryps1s/DARKSURGEON), +[Deepviz](https://sandbox.deepviz.com/), +[dridex.malwareconfig.com](https://dridex.malwareconfig.com), [FAME](https://certsocietegenerale.github.io/fame/), +[FLARE-VM](https://github.com/fireeye/flare-vm), [Hybrid-analysis.com](https://www.hybrid-analysis.com/), [Joe Sandbox](https://www.document-analyzer.net/), -[Deepviz](https://sandbox.deepviz.com/), [Laika BOSS](https://github.com/lmco/laikaboss), -[Cuckoo Sandbox](https://github.com/cuckoosandbox/cuckoo), -[Anlyz.io](https://sandbox.anlyz.io/), -[ViperMonkey](https://github.com/decalage2/ViperMonkey), -[pcodedmp](https://github.com/bontchev/pcodedmp), -[dridex.malwareconfig.com](https://dridex.malwareconfig.com), -[Snake](https://github.com/countercept/snake), -[DARKSURGEON](https://github.com/cryps1s/DARKSURGEON), -[CAPE](https://github.com/ctxis/CAPE), -[AssemblyLine](https://www.cse-cst.gc.ca/en/assemblyline), +[MacroMilter](https://github.com/sbidy/MacroMilter), [malshare.io](https://malshare.io), -[Malware Repository Framework (MRF)](https://www.adlice.com/download/mrf/), [malware-repo](https://github.com/Tigzy/malware-repo), -[Vba2Graph](https://github.com/MalwareCantFly/Vba2Graph), +[Malware Repository Framework (MRF)](https://www.adlice.com/download/mrf/), +[olefy](https://github.com/HeinleinSupport/olefy), +[PeekabooAV](https://github.com/scVENUS/PeekabooAV), +[pcodedmp](https://github.com/bontchev/pcodedmp), +[PyCIRCLean](https://github.com/CIRCL/PyCIRCLean), +[REMnux](https://remnux.org/), +[Snake](https://github.com/countercept/snake), +[SNDBOX](https://app.sndbox.com), [Strelka](https://github.com/target/strelka), +[stoQ](https://stoq.punchcyber.com/), +[TheHive/Cortex](https://github.com/TheHive-Project/Cortex-Analyzers), +[Vba2Graph](https://github.com/MalwareCantFly/Vba2Graph), +[Viper](http://viper.li/), +[ViperMonkey](https://github.com/decalage2/ViperMonkey), +[YOMI](https://yomi.yoroi.company), and probably [VirusTotal](https://www.virustotal.com). And quite a few [other projects on GitHub](https://github.com/search?q=oletools&type=Repositories). (Please [contact me]((http://decalage.info/contact)) if you have or know @@ -149,7 +179,7 @@ License This license applies to the python-oletools package, apart from the thirdparty folder which contains third-party files published with their own license. -The python-oletools package is copyright (c) 2012-2018 Philippe Lagadec (http://www.decalage.info) +The python-oletools package is copyright (c) 2012-2019 Philippe Lagadec (http://www.decalage.info) All rights reserved. diff --git a/oletools/LICENSE.txt b/oletools/LICENSE.txt index 64da42b..4a964f8 100644 --- a/oletools/LICENSE.txt +++ b/oletools/LICENSE.txt @@ -1,54 +1,54 @@ -LICENSE for the python-oletools package: - -This license applies to the python-oletools package, apart from the thirdparty -folder which contains third-party files published with their own license. - -The python-oletools package is copyright (c) 2012-2018 Philippe Lagadec (http://www.decalage.info) - -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ----------- - -olevba contains modified source code from the officeparser project, published -under the following MIT License (MIT): - -officeparser is copyright (c) 2014 John William Davison - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +LICENSE for the python-oletools package: + +This license applies to the python-oletools package, apart from the thirdparty +folder which contains third-party files published with their own license. + +The python-oletools package is copyright (c) 2012-2019 Philippe Lagadec (http://www.decalage.info) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +---------- + +olevba contains modified source code from the officeparser project, published +under the following MIT License (MIT): + +officeparser is copyright (c) 2014 John William Davison + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/oletools/README.html b/oletools/README.html index 4da6e8a..1e4d8bf 100644 --- a/oletools/README.html +++ b/oletools/README.html @@ -17,13 +17,33 @@
oletools is a package of python tools to analyze Microsoft OLE2 files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), such as Microsoft Office documents or Outlook messages, mainly for malware analysis, forensics and debugging. It is based on the olefile parser. See http://www.decalage.info/python/oletools for more info.
Quick links: Home page - Download/Install - Documentation - Report Issues/Suggestions/Questions - Contact the Author - Repository - Updates on Twitter Cheatsheet
Note: python-oletools is not related to OLETools published by BeCubed Software.
oletools are used by a number of projects and online malware analysis services, including Viper, REMnux, FAME, Hybrid-analysis.com, Joe Sandbox, Deepviz, Laika BOSS, Cuckoo Sandbox, Anlyz.io, ViperMonkey, pcodedmp, dridex.malwareconfig.com, Snake, DARKSURGEON, and probably VirusTotal. (Please contact me if you have or know a project using oletools)
+oletools are used by a number of projects and online malware analysis services, including Viper, REMnux, FLARE-VM, FAME, Hybrid-analysis.com, Joe Sandbox, Deepviz, Laika BOSS, Cuckoo Sandbox, Anlyz.io, ViperMonkey, pcodedmp, dridex.malwareconfig.com, Snake, DARKSURGEON, CAPE, AssemblyLine, malshare.io, Malware Repository Framework (MRF), malware-repo, Vba2Graph, Strelka, stoQ, YOMI, and probably VirusTotal. And quite a few other projects on GitHub. (Please contact me if you have or know a project using oletools)
The recommended way to download and install/update the latest stable release of oletools is to use pip:
The code is available in a GitHub repository. You may use it to submit enhancements using forks and pull requests.
This license applies to the python-oletools package, apart from the thirdparty folder which contains third-party files published with their own license.
-The python-oletools package is copyright (c) 2012-2018 Philippe Lagadec (http://www.decalage.info)
+The python-oletools package is copyright (c) 2012-2019 Philippe Lagadec (http://www.decalage.info)
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
This is the home page of the documentation for python-oletools. The latest version can be found online, otherwise a copy is provided in the doc subfolder of the package.
python-oletools is a package of python tools to analyze Microsoft OLE2 files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), such as Microsoft Office documents or Outlook messages, mainly for malware analysis, forensics and debugging. It is based on the olefile parser. See http://www.decalage.info/python/oletools for more info.
Quick links: Home page - Download/Install - Documentation - Report Issues/Suggestions/Questions - Contact the Author - Repository - Updates on Twitter
diff --git a/oletools/doc/Home.md b/oletools/doc/Home.md index 29c03f4..7b1a93f 100644 --- a/oletools/doc/Home.md +++ b/oletools/doc/Home.md @@ -1,4 +1,4 @@ -python-oletools v0.53 documentation +python-oletools v0.54 documentation =================================== This is the home page of the documentation for python-oletools. The latest version can be found diff --git a/oletools/doc/Install.html b/oletools/doc/Install.html index c4e40e4..b67c1ea 100644 --- a/oletools/doc/Install.html +++ b/oletools/doc/Install.html @@ -16,28 +16,35 @@ -The recommended Python version to run oletools is Python 2.7. Python 2.6 is also supported, but as it is not tested as often as 2.7, some features might not work as expected.
-Since oletools v0.50, thanks to contributions by [@Sebdraven](https://twitter.com/Sebdraven), most tools can also run with Python 3.x. As this is quite new, please report any issue you may encounter.
+The recommended Python version to run oletools is the latest Python 3.x (3.7 for now). Python 2.7 is still supported, but as it will become end of life in 2020 (see https://pythonclock.org/), it is highly recommended to switch to Python 3 now.
Pip is included with Python since version 2.7.9 and 3.4. If it is not installed on your system, either upgrade Python or see https://pip.pypa.io/en/stable/installing/
To download and install/update the latest release version of oletools, run the following command in a shell:
sudo -H pip install -U oletools
+Replace pip by pip3 or pip2 to install on a specific Python version.
Important: Since version 0.50, pip will automatically create convenient command-line scripts in /usr/local/bin to run all the oletools from any directory.
To download and install/update the latest release version of oletools, run the following command in a cmd window:
pip install -U oletools
+Replace pip by pip3 or pip2 to install on a specific Python version.
Note: with Python 3, you may need to open a cmd window with Administrator privileges in order to run pip and install for all users. If that is not possible, you may also install only for the current user by adding the --user option:
pip3 install -U --user oletools
Important: Since version 0.50, pip will automatically create convenient command-line scripts to run all the oletools from any directory: olevba, mraptor, oleid, rtfobj, etc.
If you want to benefit from the latest improvements in the development version, you may also use pip:
sudo -H pip install -U https://github.com/decalage2/oletools/archive/master.zip
+Replace pip by pip3 or pip2 to install on a specific Python version.
pip install -U https://github.com/decalage2/oletools/archive/master.zip
+Replace pip by pip3 or pip2 to install on a specific Python version.
Note: with Python 3, you may need to open a cmd window with Administrator privileges in order to run pip and install for all users. If that is not possible, you may also install only for the current user by adding the --user option:
pip3 install -U --user https://github.com/decalage2/oletools/archive/master.zip
First, download the oletools archive on a computer with Internet access: * Latest stable version: from https://github.com/decalage2/oletools/releases * Development version: https://github.com/decalage2/oletools/archive/master.zip
+First, download the oletools archive on a computer with Internet access: * Latest stable version: from https://pypi.org/project/oletools/ or https://github.com/decalage2/oletools/releases * Development version: https://github.com/decalage2/oletools/archive/master.zip
Copy the archive file to the target computer.
On Linux, Mac OSX, Unix, run the following command using the filename of the archive that you downloaded:
sudo -H pip install -U oletools.zip
diff --git a/oletools/doc/Install.md b/oletools/doc/Install.md
index 1cbce29..01c0375 100644
--- a/oletools/doc/Install.md
+++ b/oletools/doc/Install.md
@@ -1,18 +1,12 @@
-How to Download and Install python-oletools
-===========================================
+How to Download and Install oletools
+====================================
Pre-requisites
--------------
-The recommended Python version to run oletools is **Python 2.7**.
-Python 2.6 is also supported, but as it is not tested as often as 2.7, some features
-might not work as expected.
-
-Since oletools v0.50, thanks to contributions by [@Sebdraven](https://twitter.com/Sebdraven),
-most tools can also run with **Python 3.x**. As this is quite new, please
-[report any issue]((https://github.com/decalage2/oletools/issues)) you may encounter.
-
-
+The recommended Python version to run oletools is the latest **Python 3.x** (3.7 for now).
+Python 2.7 is still supported, but as it will become end of life in 2020 (see https://pythonclock.org/), it is highly
+recommended to switch to Python 3 now.
Recommended way to Download+Install/Update oletools: pip
--------------------------------------------------------
@@ -29,6 +23,8 @@ run the following command in a shell:
sudo -H pip install -U oletools
```
+Replace `pip` by `pip3` or `pip2` to install on a specific Python version.
+
**Important**: Since version 0.50, pip will automatically create convenient command-line scripts
in /usr/local/bin to run all the oletools from any directory.
@@ -41,6 +37,16 @@ run the following command in a cmd window:
pip install -U oletools
```
+Replace `pip` by `pip3` or `pip2` to install on a specific Python version.
+
+**Note**: with Python 3, you may need to open a cmd window with Administrator privileges in order to run pip
+and install for all users. If that is not possible, you may also install only for the current user
+by adding the `--user` option:
+
+```text
+pip3 install -U --user oletools
+```
+
**Important**: Since version 0.50, pip will automatically create convenient command-line scripts
to run all the oletools from any directory: olevba, mraptor, oleid, rtfobj, etc.
@@ -57,17 +63,29 @@ you may also use pip:
sudo -H pip install -U https://github.com/decalage2/oletools/archive/master.zip
```
+Replace `pip` by `pip3` or `pip2` to install on a specific Python version.
+
### Windows
```text
pip install -U https://github.com/decalage2/oletools/archive/master.zip
```
+Replace `pip` by `pip3` or `pip2` to install on a specific Python version.
+
+**Note**: with Python 3, you may need to open a cmd window with Administrator privileges in order to run pip
+and install for all users. If that is not possible, you may also install only for the current user
+by adding the `--user` option:
+
+```text
+pip3 install -U --user https://github.com/decalage2/oletools/archive/master.zip
+```
+
How to install offline - Computer without Internet access
---------------------------------------------------------
First, download the oletools archive on a computer with Internet access:
-* Latest stable version: from https://github.com/decalage2/oletools/releases
+* Latest stable version: from https://pypi.org/project/oletools/ or https://github.com/decalage2/oletools/releases
* Development version: https://github.com/decalage2/oletools/archive/master.zip
Copy the archive file to the target computer.
diff --git a/oletools/doc/License.html b/oletools/doc/License.html
index fc828b1..082b2de 100644
--- a/oletools/doc/License.html
+++ b/oletools/doc/License.html
@@ -18,7 +18,7 @@
This license applies to the python-oletools package, apart from the thirdparty folder which contains third-party files published with their own license.
-The python-oletools package is copyright (c) 2012-2018 Philippe Lagadec (http://www.decalage.info)
+The python-oletools package is copyright (c) 2012-2019 Philippe Lagadec (http://www.decalage.info)
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
mraptor can be used either as a command-line tool, or as a python module from your own applications.
It is part of the python-oletools package.
Usage: mraptor.py [options] <filename> [filename2 ...]
+Usage: mraptor [options] <filename> [filename2 ...]
Options:
-h, --help show this help message and exit
@@ -49,15 +49,15 @@ An exit code is returned based on the analysis result:
- 20: SUSPICIOUS
Examples
Scan a single file:
-mraptor.py file.doc
+mraptor file.doc
Scan a single file, stored in a Zip archive with password “infected”:
-mraptor.py malicious_file.xls.zip -z infected
+mraptor malicious_file.xls.zip -z infected
Scan a collection of files stored in a folder:
-mraptor.py "MalwareZoo/VBA/*"
+mraptor "MalwareZoo/VBA/*"
Important: on Linux/MacOSX, always add double quotes around a file name when you use wildcards such as * and ?. Otherwise, the shell may replace the argument with the actual list of files matching the wildcards before starting the script.

Python 3 support - mraptor3
-As of v0.50, mraptor has been ported to Python 3 thanks to @sebdraven. However, the differences between Python 2 and 3 are significant and for now there is a separate version of mraptor named mraptor3 to be used with Python 3.
+Since v0.54, mraptor is fully compatible with both Python 2 and 3. There is no need to use mraptor3 anymore, however it is still present for backward compatibility.
How to use mraptor in Python applications
TODO
diff --git a/oletools/doc/mraptor.md b/oletools/doc/mraptor.md
index 2c4fabd..55b5547 100644
--- a/oletools/doc/mraptor.md
+++ b/oletools/doc/mraptor.md
@@ -24,7 +24,7 @@ It is part of the [python-oletools](http://www.decalage.info/python/oletools) pa
## Usage
```text
-Usage: mraptor.py [options] [filename2 ...]
+Usage: mraptor [options] [filename2 ...]
Options:
-h, --help show this help message and exit
@@ -54,19 +54,19 @@ An exit code is returned based on the analysis result:
Scan a single file:
```text
-mraptor.py file.doc
+mraptor file.doc
```
Scan a single file, stored in a Zip archive with password "infected":
```text
-mraptor.py malicious_file.xls.zip -z infected
+mraptor malicious_file.xls.zip -z infected
```
Scan a collection of files stored in a folder:
```text
-mraptor.py "MalwareZoo/VBA/*"
+mraptor "MalwareZoo/VBA/*"
```
**Important**: on Linux/MacOSX, always add double quotes around a file name when you use
@@ -77,10 +77,8 @@ list of files matching the wildcards before starting the script.
## Python 3 support - mraptor3
-As of v0.50, mraptor has been ported to Python 3 thanks to @sebdraven.
-However, the differences between Python 2 and 3 are significant and for now
-there is a separate version of mraptor named mraptor3 to be used with
-Python 3.
+Since v0.54, mraptor is fully compatible with both Python 2 and 3.
+There is no need to use mraptor3 anymore, however it is still present for backward compatibility.
--------------------------------------------------------------------------
diff --git a/oletools/doc/olebrowse.html b/oletools/doc/olebrowse.html
index f212399..3a2f4ae 100644
--- a/oletools/doc/olebrowse.html
+++ b/oletools/doc/olebrowse.html
@@ -26,7 +26,7 @@
And for Python 3:
sudo apt-get install python3-tk
Usage
-olebrowse.py [file]
+olebrowse [file]
If you provide a file it will be opened, else a dialog will allow you to browse folders to open a file. Then if it is a valid OLE file, the list of data streams will be displayed. You can select a stream, and then either view its content in a builtin hexadecimal viewer, or save it to a file for further analysis.
Screenshots
Main menu, showing all streams in the OLE file:
diff --git a/oletools/doc/olebrowse.md b/oletools/doc/olebrowse.md
index fa58a57..3f98497 100644
--- a/oletools/doc/olebrowse.md
+++ b/oletools/doc/olebrowse.md
@@ -30,9 +30,9 @@ sudo apt-get install python3-tk
Usage
-----
-
- olebrowse.py [file]
-
+```
+olebrowse [file]
+```
If you provide a file it will be opened, else a dialog will allow you to browse
folders to open a file. Then if it is a valid OLE file, the list of data streams
will be displayed. You can select a stream, and then either view its content
diff --git a/oletools/doc/oledir.html b/oletools/doc/oledir.html
index 9495810..0b22446 100644
--- a/oletools/doc/oledir.html
+++ b/oletools/doc/oledir.html
@@ -21,10 +21,21 @@
It can be used either as a command-line tool, or as a python module from your own applications.
It is part of the python-oletools package.
Usage
-Usage: oledir.py <filename>
+Usage: oledir [options] <filename> [filename2 ...]
+
+Options:
+ -h, --help show this help message and exit
+ -r find files recursively in subdirectories.
+ -z ZIP_PASSWORD, --zip=ZIP_PASSWORD
+ if the file is a zip archive, open all files from it,
+ using the provided password (requires Python 2.6+)
+ -f ZIP_FNAME, --zipfname=ZIP_FNAME
+ if the file is a zip archive, file(s) to be opened
+ within the zip. Wildcards * and ? are supported.
+ (default:*)
Examples
Scan a single file:
-oledir.py file.doc
+oledir file.doc

How to use oledir in Python applications
diff --git a/oletools/doc/oledir.md b/oletools/doc/oledir.md
index 2ca5047..e520dbf 100644
--- a/oletools/doc/oledir.md
+++ b/oletools/doc/oledir.md
@@ -11,7 +11,18 @@ It is part of the [python-oletools](http://www.decalage.info/python/oletools) pa
## Usage
```text
-Usage: oledir.py
+Usage: oledir [options] [filename2 ...]
+
+Options:
+ -h, --help show this help message and exit
+ -r find files recursively in subdirectories.
+ -z ZIP_PASSWORD, --zip=ZIP_PASSWORD
+ if the file is a zip archive, open all files from it,
+ using the provided password (requires Python 2.6+)
+ -f ZIP_FNAME, --zipfname=ZIP_FNAME
+ if the file is a zip archive, file(s) to be opened
+ within the zip. Wildcards * and ? are supported.
+ (default:*)
```
### Examples
@@ -19,7 +30,7 @@ Usage: oledir.py
Scan a single file:
```text
-oledir.py file.doc
+oledir file.doc
```

diff --git a/oletools/doc/oleid.html b/oletools/doc/oleid.html
index 65862c4..7ccb46d 100644
--- a/oletools/doc/oleid.html
+++ b/oletools/doc/oleid.html
@@ -107,10 +107,10 @@ code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warni
- CSV output
oleid.py <file>
+oleid <file>
Analyzing a Word document containing a Flash object and VBA macros:
-C:\oletools>oleid.py word_flash_vba.doc
+C:\oletools>oleid word_flash_vba.doc
Filename: word_flash_vba.doc
+-------------------------------+-----------------------+
diff --git a/oletools/doc/oleid.md b/oletools/doc/oleid.md
index 3b71309..9bcf1e8 100644
--- a/oletools/doc/oleid.md
+++ b/oletools/doc/oleid.md
@@ -32,7 +32,7 @@ Planned improvements:
## Usage
```text
-oleid.py
+oleid
```
### Example
@@ -40,7 +40,7 @@ oleid.py
Analyzing a Word document containing a Flash object and VBA macros:
```text
-C:\oletools>oleid.py word_flash_vba.doc
+C:\oletools>oleid word_flash_vba.doc
Filename: word_flash_vba.doc
+-------------------------------+-----------------------+
diff --git a/oletools/doc/olemap.html b/oletools/doc/olemap.html
index e115cdb..4af5a3d 100644
--- a/oletools/doc/olemap.html
+++ b/oletools/doc/olemap.html
@@ -21,10 +21,10 @@
It can be used either as a command-line tool, or as a python module from your own applications.
It is part of the python-oletools package.
Usage
-Usage: olemap.py <filename>
+Usage: olemap <filename>
Examples
Scan a single file:
-olemap.py file.doc
+olemap file.doc


diff --git a/oletools/doc/olemap.md b/oletools/doc/olemap.md
index 5863a8a..8c0eac7 100644
--- a/oletools/doc/olemap.md
+++ b/oletools/doc/olemap.md
@@ -10,7 +10,7 @@ It is part of the [python-oletools](http://www.decalage.info/python/oletools) pa
## Usage
```text
-Usage: olemap.py
+Usage: olemap
```
### Examples
@@ -18,7 +18,7 @@ Usage: olemap.py
Scan a single file:
```text
-olemap.py file.doc
+olemap file.doc
```

diff --git a/oletools/doc/olemeta.html b/oletools/doc/olemeta.html
index 798c267..302844e 100644
--- a/oletools/doc/olemeta.html
+++ b/oletools/doc/olemeta.html
@@ -20,7 +20,7 @@
olemeta is a script to parse OLE files such as MS Office documents (e.g. Word, Excel), to extract all standard properties present in the OLE file.
It is part of the python-oletools package.
Usage
-olemeta.py <file>
+olemeta <file>
Example

How to use olemeta in Python applications
diff --git a/oletools/doc/olemeta.md b/oletools/doc/olemeta.md
index 5675ddd..6e0f569 100644
--- a/oletools/doc/olemeta.md
+++ b/oletools/doc/olemeta.md
@@ -9,7 +9,7 @@ It is part of the [python-oletools](http://www.decalage.info/python/oletools) pa
## Usage
```text
-olemeta.py
+olemeta
```
### Example
diff --git a/oletools/doc/oletimes.html b/oletools/doc/oletimes.html
index 83806a3..04a6745 100644
--- a/oletools/doc/oletimes.html
+++ b/oletools/doc/oletimes.html
@@ -20,10 +20,10 @@
oletimes is a script to parse OLE files such as MS Office documents (e.g. Word, Excel), to extract creation and modification times of all streams and storages in the OLE file.
It is part of the python-oletools package.
Usage
-oletimes.py <file>
+oletimes <file>
Example
Checking the malware sample DIAN_caso-5415.doc:
->oletimes.py DIAN_caso-5415.doc
+>oletimes DIAN_caso-5415.doc
+----------------------------+---------------------+---------------------+
| Stream/Storage name | Modification Time | Creation Time |
diff --git a/oletools/doc/oletimes.md b/oletools/doc/oletimes.md
index f5f5a48..fe79790 100644
--- a/oletools/doc/oletimes.md
+++ b/oletools/doc/oletimes.md
@@ -10,7 +10,7 @@ It is part of the [python-oletools](http://www.decalage.info/python/oletools) pa
## Usage
```text
-oletimes.py
+oletimes
```
### Example
@@ -18,7 +18,7 @@ oletimes.py
Checking the malware sample [DIAN_caso-5415.doc](https://malwr.com/analysis/M2I4YWRhM2IwY2QwNDljN2E3ZWFjYTg3ODk4NmZhYmE/):
```text
->oletimes.py DIAN_caso-5415.doc
+>oletimes DIAN_caso-5415.doc
+----------------------------+---------------------+---------------------+
| Stream/Storage name | Modification Time | Creation Time |
diff --git a/oletools/doc/olevba.html b/oletools/doc/olevba.html
index 3d7c8e2..121f9be 100644
--- a/oletools/doc/olevba.html
+++ b/oletools/doc/olevba.html
@@ -127,56 +127,65 @@ code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warni
- olevba scans the macro source code and the deobfuscated strings to find suspicious keywords, auto-executable macros and potential IOCs (URLs, IP addresses, e-mail addresses, executable filenames, etc).
Usage
-Usage: olevba.py [options] <filename> [filename2 ...]
-
+Usage: olevba [options] <filename> [filename2 ...]
+
Options:
-h, --help show this help message and exit
-r find files recursively in subdirectories.
-z ZIP_PASSWORD, --zip=ZIP_PASSWORD
if the file is a zip archive, open all files from it,
- using the provided password (requires Python 2.6+)
+ using the provided password.
+ -p PASSWORD, --password=PASSWORD
+ if encrypted office files are encountered, try
+ decryption with this password. May be repeated.
-f ZIP_FNAME, --zipfname=ZIP_FNAME
if the file is a zip archive, file(s) to be opened
within the zip. Wildcards * and ? are supported.
(default:*)
- -t, --triage triage mode, display results as a summary table
- (default for multiple files)
- -d, --detailed detailed mode, display full results (default for
- single file)
-a, --analysis display only analysis results, not the macro source
code
-c, --code display only VBA source code, do not analyze it
- -i INPUT, --input=INPUT
- input file containing VBA source code to be analyzed
- (no parsing)
--decode display all the obfuscated strings with their decoded
content (Hex, Base64, StrReverse, Dridex, VBA).
--attr display the attribute lines at the beginning of VBA
source code
--reveal display the macro source code after replacing all the
- obfuscated strings by their decoded content.
+ obfuscated strings by their decoded content.
+ -l LOGLEVEL, --loglevel=LOGLEVEL
+ logging level debug/info/warning/error/critical
+ (default=warning)
+ --deobf Attempt to deobfuscate VBA expressions (slow)
+ --relaxed Do not raise errors if opening of substream fails
+
+ Output mode (mutually exclusive):
+ -t, --triage triage mode, display results as a summary table
+ (default for multiple files)
+ -d, --detailed detailed mode, display full results (default for
+ single file)
+ -j, --json json mode, detailed in json format (never default)
+New in v0.54: the -p option can now be used to decrypt encrypted documents using the provided password(s).
Examples
Scan a single file:
-olevba.py file.doc
+olevba file.doc
Scan a single file, stored in a Zip archive with password “infected”:
-olevba.py malicious_file.xls.zip -z infected
+olevba malicious_file.xls.zip -z infected
Scan a single file, showing all obfuscated strings decoded:
-olevba.py file.doc --decode
+olevba file.doc --decode
Scan a single file, showing the macro source code with VBA strings deobfuscated:
-olevba.py file.doc --reveal
+olevba file.doc --reveal
Scan VBA source code extracted into a text file:
-olevba.py source_code.vba
+olevba source_code.vba
Scan a collection of files stored in a folder:
-olevba.py "MalwareZoo/VBA/*"
+olevba "MalwareZoo/VBA/*"
NOTE: On Linux, MacOSX and other Unix variants, it is required to add double quotes around wildcards. Otherwise, they will be expanded by the shell instead of olevba.
Scan all .doc and .xls files, recursively in all subfolders:
-olevba.py "MalwareZoo/VBA/*.doc" "MalwareZoo/VBA/*.xls" -r
+olevba "MalwareZoo/VBA/*.doc" "MalwareZoo/VBA/*.xls" -r
Scan all .doc files within all .zip files with password, recursively:
-olevba.py "MalwareZoo/VBA/*.zip" -r -z infected -f "*.doc"
+olevba "MalwareZoo/VBA/*.zip" -r -z infected -f "*.doc"
Detailed analysis mode (default for single file)
When a single file is scanned, or when using the option -d, all details of the analysis are displayed.
For example, checking the malware sample DIAN_caso-5415.doc:
->olevba.py c:\MalwareZoo\VBA\DIAN_caso-5415.doc.zip -z infected
+>olevba c:\MalwareZoo\VBA\DIAN_caso-5415.doc.zip -z infected
===============================================================================
FILE: DIAN_caso-5415.doc.malware in c:\MalwareZoo\VBA\DIAN_caso-5415.doc.zip
Type: OLE
@@ -246,7 +255,7 @@ ANALYSIS:
- V: VBA string expressions (potential obfuscation)
Here is an example:
-c:\>olevba.py \MalwareZoo\VBA\samples\*
+c:\>olevba \MalwareZoo\VBA\samples\*
Flags Filename
----------- -----------------------------------------------------------------
OLE:MASI--- \MalwareZoo\VBA\samples\DIAN_caso-5415.doc.malware
@@ -266,7 +275,7 @@ OpX:MASI--- \MalwareZoo\VBA\samples\RottenKitten.xlsb.malware
OLE:MASI-B- \MalwareZoo\VBA\samples\ROVNIX.doc.malware
OLE:MA----- \MalwareZoo\VBA\samples\Word within Word macro auto.doc
Python 3 support - olevba3
-As of v0.50, olevba has been ported to Python 3 thanks to @sebdraven. However, the differences between Python 2 and 3 are significant and for now there is a separate version of olevba named olevba3 to be used with Python 3.
+Since v0.54, olevba is fully compatible with both Python 2 and 3. There is no need to use olevba3 anymore, however it is still present for backward compatibility.
How to use olevba in Python applications
olevba may be used to open a MS Office file, detect if it contains VBA macros, extract and analyze the VBA source code from your own python applications.
diff --git a/oletools/doc/olevba.md b/oletools/doc/olevba.md
index 699dc91..458020a 100644
--- a/oletools/doc/olevba.md
+++ b/oletools/doc/olevba.md
@@ -67,85 +67,95 @@ and potential IOCs (URLs, IP addresses, e-mail addresses, executable filenames,
## Usage
```text
-Usage: olevba.py [options] [filename2 ...]
-
+Usage: olevba [options] [filename2 ...]
+
Options:
-h, --help show this help message and exit
-r find files recursively in subdirectories.
-z ZIP_PASSWORD, --zip=ZIP_PASSWORD
if the file is a zip archive, open all files from it,
- using the provided password (requires Python 2.6+)
+ using the provided password.
+ -p PASSWORD, --password=PASSWORD
+ if encrypted office files are encountered, try
+ decryption with this password. May be repeated.
-f ZIP_FNAME, --zipfname=ZIP_FNAME
if the file is a zip archive, file(s) to be opened
within the zip. Wildcards * and ? are supported.
(default:*)
- -t, --triage triage mode, display results as a summary table
- (default for multiple files)
- -d, --detailed detailed mode, display full results (default for
- single file)
-a, --analysis display only analysis results, not the macro source
code
-c, --code display only VBA source code, do not analyze it
- -i INPUT, --input=INPUT
- input file containing VBA source code to be analyzed
- (no parsing)
--decode display all the obfuscated strings with their decoded
content (Hex, Base64, StrReverse, Dridex, VBA).
--attr display the attribute lines at the beginning of VBA
source code
--reveal display the macro source code after replacing all the
obfuscated strings by their decoded content.
+ -l LOGLEVEL, --loglevel=LOGLEVEL
+ logging level debug/info/warning/error/critical
+ (default=warning)
+ --deobf Attempt to deobfuscate VBA expressions (slow)
+ --relaxed Do not raise errors if opening of substream fails
+
+ Output mode (mutually exclusive):
+ -t, --triage triage mode, display results as a summary table
+ (default for multiple files)
+ -d, --detailed detailed mode, display full results (default for
+ single file)
+ -j, --json json mode, detailed in json format (never default)
```
+**New in v0.54:** the -p option can now be used to decrypt encrypted documents using the provided password(s).
+
### Examples
Scan a single file:
```text
-olevba.py file.doc
+olevba file.doc
```
Scan a single file, stored in a Zip archive with password "infected":
```text
-olevba.py malicious_file.xls.zip -z infected
+olevba malicious_file.xls.zip -z infected
```
Scan a single file, showing all obfuscated strings decoded:
```text
-olevba.py file.doc --decode
+olevba file.doc --decode
```
Scan a single file, showing the macro source code with VBA strings deobfuscated:
```text
-olevba.py file.doc --reveal
+olevba file.doc --reveal
```
Scan VBA source code extracted into a text file:
```text
-olevba.py source_code.vba
+olevba source_code.vba
```
Scan a collection of files stored in a folder:
```text
-olevba.py "MalwareZoo/VBA/*"
+olevba "MalwareZoo/VBA/*"
```
NOTE: On Linux, MacOSX and other Unix variants, it is required to add double quotes around wildcards. Otherwise, they will be expanded by the shell instead of olevba.
Scan all .doc and .xls files, recursively in all subfolders:
```text
-olevba.py "MalwareZoo/VBA/*.doc" "MalwareZoo/VBA/*.xls" -r
+olevba "MalwareZoo/VBA/*.doc" "MalwareZoo/VBA/*.xls" -r
```
Scan all .doc files within all .zip files with password, recursively:
```text
-olevba.py "MalwareZoo/VBA/*.zip" -r -z infected -f "*.doc"
+olevba "MalwareZoo/VBA/*.zip" -r -z infected -f "*.doc"
```
@@ -156,7 +166,7 @@ When a single file is scanned, or when using the option -d, all details of the a
For example, checking the malware sample [DIAN_caso-5415.doc](https://malwr.com/analysis/M2I4YWRhM2IwY2QwNDljN2E3ZWFjYTg3ODk4NmZhYmE/):
```text
->olevba.py c:\MalwareZoo\VBA\DIAN_caso-5415.doc.zip -z infected
+>olevba c:\MalwareZoo\VBA\DIAN_caso-5415.doc.zip -z infected
===============================================================================
FILE: DIAN_caso-5415.doc.malware in c:\MalwareZoo\VBA\DIAN_caso-5415.doc.zip
Type: OLE
@@ -233,7 +243,7 @@ The following flags show the results of the analysis:
Here is an example:
```text
-c:\>olevba.py \MalwareZoo\VBA\samples\*
+c:\>olevba \MalwareZoo\VBA\samples\*
Flags Filename
----------- -----------------------------------------------------------------
OLE:MASI--- \MalwareZoo\VBA\samples\DIAN_caso-5415.doc.malware
@@ -256,10 +266,9 @@ OLE:MA----- \MalwareZoo\VBA\samples\Word within Word macro auto.doc
## Python 3 support - olevba3
-As of v0.50, olevba has been ported to Python 3 thanks to @sebdraven.
-However, the differences between Python 2 and 3 are significant and for now
-there is a separate version of olevba named olevba3 to be used with
-Python 3.
+Since v0.54, olevba is fully compatible with both Python 2 and 3.
+There is no need to use olevba3 anymore, however it is still present for backward compatibility.
+
--------------------------------------------------------------------------
diff --git a/oletools/doc/pyxswf.html b/oletools/doc/pyxswf.html
index f7b11e8..e76c31c 100644
--- a/oletools/doc/pyxswf.html
+++ b/oletools/doc/pyxswf.html
@@ -24,7 +24,7 @@
It can also extract Flash objects from RTF documents, by parsing embedded objects encoded in hexadecimal format (-f option).
For this, simply add the -o option to work on OLE streams rather than raw files, or the -f option to work on RTF files.
Usage
-Usage: pyxswf.py [options] <file.bad>
+Usage: pyxswf [options] <file.bad>
Options:
-o, --ole Parse an OLE file (e.g. Word, Excel) to look for SWF
@@ -46,18 +46,18 @@ Options:
contain SWFs. Must provide path in quotes
-c, --compress Compresses the SWF using Zlib
Example 1 - detecting and extracting a SWF file from a Word document on Windows:
-C:\oletools>pyxswf.py -o word_flash.doc
+C:\oletools>pyxswf -o word_flash.doc
OLE stream: 'Contents'
[SUMMARY] 1 SWF(s) in MD5:993664cc86f60d52d671b6610813cfd1:Contents
[ADDR] SWF 1 at 0x8 - FWS Header
-C:\oletools>pyxswf.py -xo word_flash.doc
+C:\oletools>pyxswf -xo word_flash.doc
OLE stream: 'Contents'
[SUMMARY] 1 SWF(s) in MD5:993664cc86f60d52d671b6610813cfd1:Contents
[ADDR] SWF 1 at 0x8 - FWS Header
[FILE] Carved SWF MD5: 2498e9c0701dc0e461ab4358f9102bc5.swf
Example 2 - detecting and extracting a SWF file from a RTF document on Windows:
-C:\oletools>pyxswf.py -xf "rtf_flash.rtf"
+C:\oletools>pyxswf -xf "rtf_flash.rtf"
RTF embedded object size 1498557 at index 000036DD
[SUMMARY] 1 SWF(s) in MD5:46a110548007e04f4043785ac4184558:RTF_embedded_object_0
00036DD
diff --git a/oletools/doc/pyxswf.md b/oletools/doc/pyxswf.md
index 09399e9..6be489a 100644
--- a/oletools/doc/pyxswf.md
+++ b/oletools/doc/pyxswf.md
@@ -21,7 +21,7 @@ For this, simply add the -o option to work on OLE streams rather than raw files,
## Usage
```text
-Usage: pyxswf.py [options]
+Usage: pyxswf [options]
Options:
-o, --ole Parse an OLE file (e.g. Word, Excel) to look for SWF
@@ -47,12 +47,12 @@ Options:
### Example 1 - detecting and extracting a SWF file from a Word document on Windows:
```text
-C:\oletools>pyxswf.py -o word_flash.doc
+C:\oletools>pyxswf -o word_flash.doc
OLE stream: 'Contents'
[SUMMARY] 1 SWF(s) in MD5:993664cc86f60d52d671b6610813cfd1:Contents
[ADDR] SWF 1 at 0x8 - FWS Header
-C:\oletools>pyxswf.py -xo word_flash.doc
+C:\oletools>pyxswf -xo word_flash.doc
OLE stream: 'Contents'
[SUMMARY] 1 SWF(s) in MD5:993664cc86f60d52d671b6610813cfd1:Contents
[ADDR] SWF 1 at 0x8 - FWS Header
@@ -62,7 +62,7 @@ OLE stream: 'Contents'
### Example 2 - detecting and extracting a SWF file from a RTF document on Windows:
```text
-C:\oletools>pyxswf.py -xf "rtf_flash.rtf"
+C:\oletools>pyxswf -xf "rtf_flash.rtf"
RTF embedded object size 1498557 at index 000036DD
[SUMMARY] 1 SWF(s) in MD5:46a110548007e04f4043785ac4184558:RTF_embedded_object_0
00036DD
diff --git a/oletools/ezhexviewer.py b/oletools/ezhexviewer.py
index a43e408..142d547 100644
--- a/oletools/ezhexviewer.py
+++ b/oletools/ezhexviewer.py
@@ -16,7 +16,7 @@ Usage in a python application:
ezhexviewer project website: http://www.decalage.info/python/ezhexviewer
-ezhexviewer is copyright (c) 2012-2017, Philippe Lagadec (http://www.decalage.info)
+ezhexviewer is copyright (c) 2012-2019, Philippe Lagadec (http://www.decalage.info)
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
@@ -50,7 +50,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# 2017-04-26 PL: - fixed absolute imports (issue #141)
# 2018-09-15 v0.54 PL: - easygui is now a dependency
-__version__ = '0.54dev1'
+__version__ = '0.54'
#-----------------------------------------------------------------------------
# TODO:
diff --git a/oletools/mraptor.py b/oletools/mraptor.py
index e6ac23e..8805a00 100644
--- a/oletools/mraptor.py
+++ b/oletools/mraptor.py
@@ -23,7 +23,7 @@ http://www.decalage.info/python/oletools
# === LICENSE ==================================================================
-# MacroRaptor is copyright (c) 2016-2018 Philippe Lagadec (http://www.decalage.info)
+# MacroRaptor is copyright (c) 2016-2019 Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
@@ -58,8 +58,9 @@ http://www.decalage.info/python/oletools
# 2016-12-21 v0.51 PL: - added more ActiveX macro triggers
# 2017-03-08 PL: - fixed absolute imports
# 2018-05-25 v0.53 PL: - added Word/PowerPoint 2007+ XML (aka Flat OPC) issue #283
+# 2019-04-04 v0.54 PL: - added ExecuteExcel4Macro, ShellExecuteA, XLM keywords
-__version__ = '0.53'
+__version__ = '0.54'
#------------------------------------------------------------------------------
# TODO:
@@ -119,20 +120,21 @@ re_autoexec = re.compile(r'(?i)\b(?:Auto(?:Exec|_?Open|_?Close|Exit|New)' +
r'|DocumentComplete|DownloadBegin|DownloadComplete|FileDownload' +
r'|NavigateComplete2|NavigateError|ProgressChange|PropertyChange' +
r'|SetSecureLockIcon|StatusTextChange|TitleChange|MouseMove' +
- r'|MouseEnter|MouseLeave|))\b')
+ r'|MouseEnter|MouseLeave))|Auto_Ope\b')
+# TODO: "Auto_Ope" is temporarily here because of a bug in plugin_biff, which misses the last byte in "Auto_Open"...
# MS-VBAL 5.4.5.1 Open Statement:
RE_OPEN_WRITE = r'(?:\bOpen\b[^\n]+\b(?:Write|Append|Binary|Output|Random)\b)'
re_write = re.compile(r'(?i)\b(?:FileCopy|CopyFile|Kill|CreateTextFile|'
- + r'VirtualAlloc|RtlMoveMemory|URLDownloadToFileA?|AltStartupPath|'
+ + r'VirtualAlloc|RtlMoveMemory|URLDownloadToFileA?|AltStartupPath|WriteProcessMemory|'
+ r'ADODB\.Stream|WriteText|SaveToFile|SaveAs|SaveAsRTF|FileSaveAs|MkDir|RmDir|SaveSetting|SetAttr)\b|' + RE_OPEN_WRITE)
# MS-VBAL 5.2.3.5 External Procedure Declaration
RE_DECLARE_LIB = r'(?:\bDeclare\b[^\n]+\bLib\b)'
re_execute = re.compile(r'(?i)\b(?:Shell|CreateObject|GetObject|SendKeys|'
- + r'MacScript|FollowHyperlink|CreateThread|ShellExecute)\b|' + RE_DECLARE_LIB)
+ + r'MacScript|FollowHyperlink|CreateThread|ShellExecuteA?|ExecuteExcel4Macro|EXEC|REGISTER)\b|' + RE_DECLARE_LIB)
# === CLASSES =================================================================
diff --git a/oletools/mraptor3.py b/oletools/mraptor3.py
index 46ba949..f3d7235 100644
--- a/oletools/mraptor3.py
+++ b/oletools/mraptor3.py
@@ -1,72 +1,10 @@
#!/usr/bin/env python
-"""
-mraptor.py - MacroRaptor
-MacroRaptor is a script to parse OLE and OpenXML files such as MS Office
-documents (e.g. Word, Excel), to detect malicious macros.
+# mraptor3 is a stub that redirects to mraptor.py, for backwards compatibility
-Supported formats:
-- Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm)
-- Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb)
-- PowerPoint 97-2003 (.ppt), PowerPoint 2007+ (.pptm, .ppsm)
-- Word/PowerPoint 2007+ XML (aka Flat OPC)
-- Word 2003 XML (.xml)
-- Word/Excel Single File Web Page / MHTML (.mht)
-- Publisher (.pub)
+import sys, os, warnings
-Author: Philippe Lagadec - http://www.decalage.info
-License: BSD, see source code or documentation
-
-MacroRaptor is part of the python-oletools package:
-http://www.decalage.info/python/oletools
-"""
-
-# === LICENSE ==================================================================
-
-# MacroRaptor is copyright (c) 2016-2018 Philippe Lagadec (http://www.decalage.info)
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without modification,
-# are permitted provided that the following conditions are met:
-#
-# * Redistributions of source code must retain the above copyright notice, this
-# list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#------------------------------------------------------------------------------
-# CHANGELOG:
-# 2016-02-23 v0.01 PL: - first version
-# 2016-02-29 v0.02 PL: - added Workbook_Activate, FileSaveAs
-# 2016-03-04 v0.03 PL: - returns an exit code based on the overall result
-# 2016-03-08 v0.04 PL: - collapse long lines before analysis
-# 2016-07-19 v0.50 SL: - converted to Python 3
-# 2016-08-26 PL: - changed imports for Python 3
-# 2017-04-26 v0.51 PL: - fixed absolute imports (issue #141)
-# 2017-06-29 PL: - synced with mraptor.py 0.51
-# 2018-05-25 v0.53 PL: - added Word/PowerPoint 2007+ XML (aka Flat OPC) issue #283
-
-__version__ = '0.53'
-
-#------------------------------------------------------------------------------
-# TODO:
-
-
-#--- IMPORTS ------------------------------------------------------------------
-
-import sys, os, logging, optparse, re
+warnings.warn('mraptor3 is deprecated, mraptor should be used instead.', DeprecationWarning)
# IMPORTANT: it should be possible to run oletools directly as scripts
# in any directory without installing them with pip or setup.py.
@@ -74,280 +12,12 @@ import sys, os, logging, optparse, re
# And to enable Python 2+3 compatibility, we need to use absolute imports,
# so we add the oletools parent folder to sys.path (absolute+normalized path):
_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
-# print('_thismodule_dir = %r' % _thismodule_dir)
_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
-# print('_parent_dir = %r' % _thirdparty_dir)
-if not _parent_dir in sys.path:
+if _parent_dir not in sys.path:
sys.path.insert(0, _parent_dir)
-from oletools.thirdparty.xglob import xglob
-from oletools.thirdparty.tablestream import tablestream
-
-# import the python 3 version of olevba
-from oletools import olevba3 as olevba
-from oletools.olevba3 import TYPE2TAG
-
-# === LOGGING =================================================================
-
-# a global logger object used for debugging:
-log = olevba.get_logger('mraptor')
-
-
-#--- CONSTANTS ----------------------------------------------------------------
-
-# URL and message to report issues:
-# TODO: make it a common variable for all oletools
-URL_ISSUES = 'https://github.com/decalage2/oletools/issues'
-MSG_ISSUES = 'Please report this issue on %s' % URL_ISSUES
-
-# 'AutoExec', 'AutoOpen', 'Auto_Open', 'AutoClose', 'Auto_Close', 'AutoNew', 'AutoExit',
-# 'Document_Open', 'DocumentOpen',
-# 'Document_Close', 'DocumentBeforeClose', 'Document_BeforeClose',
-# 'DocumentChange','Document_New',
-# 'NewDocument'
-# 'Workbook_Open', 'Workbook_Close',
-# *_Painted such as InkPicture1_Painted
-# *_GotFocus|LostFocus|MouseHover for other ActiveX objects
-# reference: http://www.greyhathacker.net/?p=948
-
-# TODO: check if line also contains Sub or Function
-re_autoexec = re.compile(r'(?i)\b(?:Auto(?:Exec|_?Open|_?Close|Exit|New)' +
- r'|Document(?:_?Open|_Close|_?BeforeClose|Change|_New)' +
- r'|NewDocument|Workbook(?:_Open|_Activate|_Close)' +
- r'|\w+_(?:Painted|Painting|GotFocus|LostFocus|MouseHover' +
- r'|Layout|Click|Change|Resize|BeforeNavigate2|BeforeScriptExecute' +
- r'|DocumentComplete|DownloadBegin|DownloadComplete|FileDownload' +
- r'|NavigateComplete2|NavigateError|ProgressChange|PropertyChange' +
- r'|SetSecureLockIcon|StatusTextChange|TitleChange|MouseMove' +
- r'|MouseEnter|MouseLeave|))\b')
-
-# MS-VBAL 5.4.5.1 Open Statement:
-RE_OPEN_WRITE = r'(?:\bOpen\b[^\n]+\b(?:Write|Append|Binary|Output|Random)\b)'
-
-re_write = re.compile(r'(?i)\b(?:FileCopy|CopyFile|Kill|CreateTextFile|'
- + r'VirtualAlloc|RtlMoveMemory|URLDownloadToFileA?|AltStartupPath|'
- + r'ADODB\.Stream|WriteText|SaveToFile|SaveAs|SaveAsRTF|FileSaveAs|MkDir|RmDir|SaveSetting|SetAttr)\b|' + RE_OPEN_WRITE)
-
-# MS-VBAL 5.2.3.5 External Procedure Declaration
-RE_DECLARE_LIB = r'(?:\bDeclare\b[^\n]+\bLib\b)'
-
-re_execute = re.compile(r'(?i)\b(?:Shell|CreateObject|GetObject|SendKeys|'
- + r'MacScript|FollowHyperlink|CreateThread|ShellExecute)\b|' + RE_DECLARE_LIB)
-
-
-# === CLASSES =================================================================
-
-class Result_NoMacro(object):
- exit_code = 0
- color = 'green'
- name = 'No Macro'
-
-
-class Result_NotMSOffice(object):
- exit_code = 1
- color = 'green'
- name = 'Not MS Office'
-
-
-class Result_MacroOK(object):
- exit_code = 2
- color = 'cyan'
- name = 'Macro OK'
-
-
-class Result_Error(object):
- exit_code = 10
- color = 'yellow'
- name = 'ERROR'
-
-
-class Result_Suspicious(object):
- exit_code = 20
- color = 'red'
- name = 'SUSPICIOUS'
-
-
-class MacroRaptor(object):
- """
- class to scan VBA macro code to detect if it is malicious
- """
- def __init__(self, vba_code):
- """
- MacroRaptor constructor
- :param vba_code: string containing the VBA macro code
- """
- # collapse long lines first
- self.vba_code = olevba.vba_collapse_long_lines(vba_code)
- self.autoexec = False
- self.write = False
- self.execute = False
- self.flags = ''
- self.suspicious = False
- self.autoexec_match = None
- self.write_match = None
- self.execute_match = None
- self.matches = []
-
- def scan(self):
- """
- Scan the VBA macro code to detect if it is malicious
- :return:
- """
- m = re_autoexec.search(self.vba_code)
- if m is not None:
- self.autoexec = True
- self.autoexec_match = m.group()
- self.matches.append(m.group())
- m = re_write.search(self.vba_code)
- if m is not None:
- self.write = True
- self.write_match = m.group()
- self.matches.append(m.group())
- m = re_execute.search(self.vba_code)
- if m is not None:
- self.execute = True
- self.execute_match = m.group()
- self.matches.append(m.group())
- if self.autoexec and (self.execute or self.write):
- self.suspicious = True
-
- def get_flags(self):
- flags = ''
- flags += 'A' if self.autoexec else '-'
- flags += 'W' if self.write else '-'
- flags += 'X' if self.execute else '-'
- return flags
-
-
-# === MAIN ====================================================================
-
-def main():
- """
- Main function, called when olevba is run from the command line
- """
- global log
- DEFAULT_LOG_LEVEL = "warning" # Default log level
- LOG_LEVELS = {
- 'debug': logging.DEBUG,
- 'info': logging.INFO,
- 'warning': logging.WARNING,
- 'error': logging.ERROR,
- 'critical': logging.CRITICAL
- }
-
- usage = 'usage: %prog [options] [filename2 ...]'
- parser = optparse.OptionParser(usage=usage)
- parser.add_option("-r", action="store_true", dest="recursive",
- help='find files recursively in subdirectories.')
- parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
- help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)')
- parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
- help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
- parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
- help="logging level debug/info/warning/error/critical (default=%default)")
- parser.add_option("-m", '--matches', action="store_true", dest="show_matches",
- help='Show matched strings.')
-
- # TODO: add logfile option
-
- (options, args) = parser.parse_args()
-
- # Print help if no arguments are passed
- if len(args) == 0:
- print('MacroRaptor %s - http://decalage.info/python/oletools' % __version__)
- print('This is work in progress, please report issues at %s' % URL_ISSUES)
- print(__doc__)
- parser.print_help()
- print('\nAn exit code is returned based on the analysis result:')
- for result in (Result_NoMacro, Result_NotMSOffice, Result_MacroOK, Result_Error, Result_Suspicious):
- print(' - %d: %s' % (result.exit_code, result.name))
- sys.exit()
-
- # print banner with version
- print('MacroRaptor %s - http://decalage.info/python/oletools' % __version__)
- print('This is work in progress, please report issues at %s' % URL_ISSUES)
-
- logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s')
- # enable logging in the modules:
- log.setLevel(logging.NOTSET)
-
- t = tablestream.TableStream(style=tablestream.TableStyleSlim,
- header_row=['Result', 'Flags', 'Type', 'File'],
- column_width=[10, 5, 4, 56])
-
- exitcode = -1
- global_result = None
- # TODO: handle errors in xglob, to continue processing the next files
- for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
- zip_password=options.zip_password, zip_fname=options.zip_fname):
- # ignore directory names stored in zip files:
- if container and filename.endswith('/'):
- continue
- full_name = '%s in %s' % (filename, container) if container else filename
- # try:
- # # Open the file
- # if data is None:
- # data = open(filename, 'rb').read()
- # except:
- # log.exception('Error when opening file %r' % full_name)
- # continue
- if isinstance(data, Exception):
- result = Result_Error
- t.write_row([result.name, '', '', full_name],
- colors=[result.color, None, None, None])
- t.write_row(['', '', '', str(data)],
- colors=[None, None, None, result.color])
- else:
- filetype = '???'
- try:
- vba_parser = olevba.VBA_Parser(filename=filename, data=data, container=container)
- filetype = TYPE2TAG[vba_parser.type]
- except Exception as e:
- # log.error('Error when parsing VBA macros from file %r' % full_name)
- # TODO: distinguish actual errors from non-MSOffice files
- result = Result_Error
- t.write_row([result.name, '', filetype, full_name],
- colors=[result.color, None, None, None])
- t.write_row(['', '', '', str(e)],
- colors=[None, None, None, result.color])
- continue
- if vba_parser.detect_vba_macros():
- vba_code_all_modules = ''
- try:
- for (subfilename, stream_path, vba_filename, vba_code) in vba_parser.extract_all_macros():
- vba_code_all_modules += vba_code.decode('utf-8','replace') + '\n'
- except Exception as e:
- # log.error('Error when parsing VBA macros from file %r' % full_name)
- result = Result_Error
- t.write_row([result.name, '', TYPE2TAG[vba_parser.type], full_name],
- colors=[result.color, None, None, None])
- t.write_row(['', '', '', str(e)],
- colors=[None, None, None, result.color])
- continue
- mraptor = MacroRaptor(vba_code_all_modules)
- mraptor.scan()
- if mraptor.suspicious:
- result = Result_Suspicious
- else:
- result = Result_MacroOK
- t.write_row([result.name, mraptor.get_flags(), filetype, full_name],
- colors=[result.color, None, None, None])
- if mraptor.matches and options.show_matches:
- t.write_row(['', '', '', 'Matches: %r' % mraptor.matches])
- else:
- result = Result_NoMacro
- t.write_row([result.name, '', filetype, full_name],
- colors=[result.color, None, None, None])
- if result.exit_code > exitcode:
- global_result = result
- exitcode = result.exit_code
-
- print('')
- print('Flags: A=AutoExec, W=Write, X=Execute')
- print('Exit code: %d - %s' % (exitcode, global_result.name))
- sys.exit(exitcode)
+from oletools.mraptor import *
+from oletools.mraptor import __doc__, __version__
if __name__ == '__main__':
main()
-
-# Soundtrack: "Dark Child" by Marlon Williams
diff --git a/oletools/mraptor_milter.py b/oletools/mraptor_milter.py
index 6b70309..eaf01f6 100644
--- a/oletools/mraptor_milter.py
+++ b/oletools/mraptor_milter.py
@@ -98,18 +98,7 @@ from oletools import olevba, mraptor
from Milter.utils import parse_addr
-if sys.version_info[0] <= 2:
- # Python 2.x
- if sys.version_info[1] <= 6:
- # Python 2.6
- # use is_zipfile backported from Python 2.7:
- from oletools.thirdparty.zipfile27 import is_zipfile
- else:
- # Python 2.7
- from zipfile import is_zipfile
-else:
- # Python 3.x+
- from zipfile import is_zipfile
+from zipfile import is_zipfile
diff --git a/oletools/msodde.py b/oletools/msodde.py
index b59b77e..50e4802 100644
--- a/oletools/msodde.py
+++ b/oletools/msodde.py
@@ -11,7 +11,6 @@ Supported formats:
- RTF
- CSV (exported from / imported into Excel)
- XML (exported from Word 2003, Word 2007+, Excel 2003, (Excel 2007+?)
-- raises an error if run with files encrypted using MS Crypto API RC4
Author: Philippe Lagadec - http://www.decalage.info
License: BSD, see source code or documentation
@@ -22,7 +21,7 @@ http://www.decalage.info/python/oletools
# === LICENSE =================================================================
-# msodde is copyright (c) 2017-2018 Philippe Lagadec (http://www.decalage.info)
+# msodde is copyright (c) 2017-2019 Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
@@ -52,19 +51,30 @@ from __future__ import print_function
import argparse
import os
-from os.path import abspath, dirname
import sys
import re
import csv
import olefile
+# IMPORTANT: it should be possible to run oletools directly as scripts
+# in any directory without installing them with pip or setup.py.
+# In that case, relative imports are NOT usable.
+# And to enable Python 2+3 compatibility, we need to use absolute imports,
+# so we add the oletools parent folder to sys.path (absolute+normalized path):
+_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
+# print('_thismodule_dir = %r' % _thismodule_dir)
+_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
+# print('_parent_dir = %r' % _thirdparty_dir)
+if _parent_dir not in sys.path:
+ sys.path.insert(0, _parent_dir)
+
from oletools import ooxml
from oletools import xls_parser
from oletools import rtfobj
-from oletools import oleid
+from oletools.ppt_record_parser import is_ppt
+from oletools import crypto
from oletools.common.log_helper import log_helper
-from oletools.common.errors import FileIsEncryptedError
# -----------------------------------------------------------------------------
# CHANGELOG:
@@ -88,8 +98,11 @@ from oletools.common.errors import FileIsEncryptedError
# 2018-03-21 CH: - added detection for various CSV formulas (issue #259)
# 2018-09-11 v0.54 PL: - olefile is now a dependency
# 2018-10-25 CH: - detect encryption and raise error if detected
+# 2019-03-25 CH: - added decryption of password-protected files
+# 2019-07-17 v0.55 CH: - fixed issue #267, unicode error on Python 2
+
-__version__ = '0.54dev4'
+__version__ = '0.55.dev3'
# -----------------------------------------------------------------------------
# TODO: field codes can be in headers/footers/comments - parse these
@@ -305,6 +318,9 @@ def process_args(cmd_line_args=None):
default=DEFAULT_LOG_LEVEL,
help="logging level debug/info/warning/error/critical "
"(default=%(default)s)")
+ parser.add_argument("-p", "--password", type=str, action='append',
+ help='if encrypted office files are encountered, try '
+ 'decryption with this password. May be repeated.')
filter_group = parser.add_argument_group(
title='Filter which OpenXML field commands are returned',
description='Only applies to OpenXML (e.g. docx) and rtf, not to OLE '
@@ -348,14 +364,13 @@ def process_doc_field(data):
""" check if field instructions start with DDE
expects unicode input, returns unicode output (empty if not dde) """
- logger.debug('processing field \'{0}\''.format(data))
+ logger.debug(u'processing field \'{0}\''.format(data))
if data.lstrip().lower().startswith(u'dde'):
return data
- elif data.lstrip().lower().startswith(u'\x00d\x00d\x00e\x00'):
+ if data.lstrip().lower().startswith(u'\x00d\x00d\x00e\x00'):
return data
- else:
- return u''
+ return u''
OLE_FIELD_START = 0x13
@@ -379,7 +394,7 @@ def process_doc_stream(stream):
while True:
idx += 1
char = stream.read(1) # loop over every single byte
- if len(char) == 0:
+ if len(char) == 0: # pylint: disable=len-as-condition
break
else:
char = ord(char)
@@ -417,7 +432,7 @@ def process_doc_stream(stream):
pass
elif len(field_contents) > OLE_FIELD_MAX_SIZE:
logger.debug('field exceeds max size of {0}. Ignore rest'
- .format(OLE_FIELD_MAX_SIZE))
+ .format(OLE_FIELD_MAX_SIZE))
max_size_exceeded = True
# appending a raw byte to a unicode string here. Not clean but
@@ -437,7 +452,7 @@ def process_doc_stream(stream):
logger.debug('big field was not a field after all')
logger.debug('Checked {0} characters, found {1} fields'
- .format(idx, len(result_parts)))
+ .format(idx, len(result_parts)))
return result_parts
@@ -462,11 +477,10 @@ def process_doc(ole):
direntry = ole._load_direntry(sid)
is_stream = direntry.entry_type == olefile.STGTY_STREAM
logger.debug('direntry {:2d} {}: {}'
- .format(sid, '[orphan]' if is_orphan else direntry.name,
- 'is stream of size {}'.format(direntry.size)
- if is_stream else
- 'no stream ({})'
- .format(direntry.entry_type)))
+ .format(sid, '[orphan]' if is_orphan else direntry.name,
+ 'is stream of size {}'.format(direntry.size)
+ if is_stream else
+ 'no stream ({})'.format(direntry.entry_type)))
if is_stream:
new_parts = process_doc_stream(
ole._open(direntry.isectStart, direntry.size))
@@ -480,17 +494,23 @@ def process_xls(filepath):
""" find dde links in excel ole file """
result = []
- for stream in xls_parser.XlsFile(filepath).iter_streams():
- if not isinstance(stream, xls_parser.WorkbookStream):
- continue
- for record in stream.iter_records():
- if not isinstance(record, xls_parser.XlsRecordSupBook):
+ xls_file = None
+ try:
+ xls_file = xls_parser.XlsFile(filepath)
+ for stream in xls_file.iter_streams():
+ if not isinstance(stream, xls_parser.WorkbookStream):
continue
- if record.support_link_type in (
- xls_parser.XlsRecordSupBook.LINK_TYPE_OLE_DDE,
- xls_parser.XlsRecordSupBook.LINK_TYPE_EXTERNAL):
- result.append(record.virt_path.replace(u'\u0003', u' '))
- return u'\n'.join(result)
+ for record in stream.iter_records():
+ if not isinstance(record, xls_parser.XlsRecordSupBook):
+ continue
+ if record.support_link_type in (
+ xls_parser.XlsRecordSupBook.LINK_TYPE_OLE_DDE,
+ xls_parser.XlsRecordSupBook.LINK_TYPE_EXTERNAL):
+ result.append(record.virt_path.replace(u'\u0003', u' '))
+ return u'\n'.join(result)
+ finally:
+ if xls_file is not None:
+ xls_file.close()
def process_docx(filepath, field_filter_mode=None):
@@ -525,7 +545,8 @@ def process_docx(filepath, field_filter_mode=None):
else:
elem = curr_elem
if elem is None:
- raise BadOOXML(filepath, 'Got "None"-Element from iter_xml')
+ raise ooxml.BadOOXML(filepath,
+ 'Got "None"-Element from iter_xml')
# check if FLDCHARTYPE and whether "begin" or "end" tag
attrib_type = elem.attrib.get(ATTR_W_FLDCHARTYPE[0]) or \
@@ -535,7 +556,7 @@ def process_docx(filepath, field_filter_mode=None):
level += 1
if attrib_type == "end":
level -= 1
- if level == 0 or level == -1: # edge-case; level gets -1
+ if level in (0, -1): # edge-case; level gets -1
all_fields.append(ddetext)
ddetext = u''
level = 0 # reset edge-case
@@ -564,6 +585,7 @@ def process_docx(filepath, field_filter_mode=None):
def unquote(field):
+ """TODO: document what exactly is happening here..."""
if "QUOTE" not in field or NO_QUOTES:
return field
# split into components
@@ -605,8 +627,8 @@ def field_is_blacklisted(contents):
index = FIELD_BLACKLIST_CMDS.index(words[0].lower())
except ValueError: # first word is no blacklisted command
return False
- logger.debug('trying to match "{0}" to blacklist command {1}'
- .format(contents, FIELD_BLACKLIST[index]))
+ logger.debug(u'trying to match "{0}" to blacklist command {1}'
+ .format(contents, FIELD_BLACKLIST[index]))
_, nargs_required, nargs_optional, sw_with_arg, sw_solo, sw_format \
= FIELD_BLACKLIST[index]
@@ -617,12 +639,13 @@ def field_is_blacklisted(contents):
break
nargs += 1
if nargs < nargs_required:
- logger.debug('too few args: found {0}, but need at least {1} in "{2}"'
- .format(nargs, nargs_required, contents))
+ logger.debug(u'too few args: found {0}, but need at least {1} in "{2}"'
+ .format(nargs, nargs_required, contents))
return False
- elif nargs > nargs_required + nargs_optional:
- logger.debug('too many args: found {0}, but need at most {1}+{2} in "{3}"'
- .format(nargs, nargs_required, nargs_optional, contents))
+ if nargs > nargs_required + nargs_optional:
+ logger.debug(u'too many args: found {0}, but need at most {1}+{2} in '
+ u'"{3}"'
+ .format(nargs, nargs_required, nargs_optional, contents))
return False
# check switches
@@ -631,15 +654,15 @@ def field_is_blacklisted(contents):
for word in words[1+nargs:]:
if expect_arg: # this is an argument for the last switch
if arg_choices and (word not in arg_choices):
- logger.debug('Found invalid switch argument "{0}" in "{1}"'
- .format(word, contents))
+ logger.debug(u'Found invalid switch argument "{0}" in "{1}"'
+ .format(word, contents))
return False
expect_arg = False
arg_choices = [] # in general, do not enforce choices
continue # "no further questions, your honor"
elif not FIELD_SWITCH_REGEX.match(word):
- logger.debug('expected switch, found "{0}" in "{1}"'
- .format(word, contents))
+ logger.debug(u'expected switch, found "{0}" in "{1}"'
+ .format(word, contents))
return False
# we want a switch and we got a valid one
switch = word[1]
@@ -660,8 +683,8 @@ def field_is_blacklisted(contents):
if 'numeric' in sw_format:
arg_choices = [] # too many choices to list them here
else:
- logger.debug('unexpected switch {0} in "{1}"'
- .format(switch, contents))
+ logger.debug(u'unexpected switch {0} in "{1}"'
+ .format(switch, contents))
return False
# if nothing went wrong sofar, the contents seems to match the blacklist
@@ -676,7 +699,7 @@ def process_xlsx(filepath):
tag = elem.tag.lower()
if tag == 'ddelink' or tag.endswith('}ddelink'):
# we have found a dde link. Try to get more info about it
- link_info = ['DDE-Link']
+ link_info = []
if 'ddeService' in elem.attrib:
link_info.append(elem.attrib['ddeService'])
if 'ddeTopic' in elem.attrib:
@@ -687,16 +710,15 @@ def process_xlsx(filepath):
for subfile, content_type, handle in parser.iter_non_xml():
try:
logger.info('Parsing non-xml subfile {0} with content type {1}'
- .format(subfile, content_type))
+ .format(subfile, content_type))
for record in xls_parser.parse_xlsb_part(handle, content_type,
subfile):
logger.debug('{0}: {1}'.format(subfile, record))
if isinstance(record, xls_parser.XlsbBeginSupBook) and \
record.link_type == \
xls_parser.XlsbBeginSupBook.LINK_TYPE_DDE:
- dde_links.append('DDE-Link ' + record.string1 + ' ' +
- record.string2)
- except Exception:
+ dde_links.append(record.string1 + ' ' + record.string2)
+ except Exception as exc:
if content_type.startswith('application/vnd.ms-excel.') or \
content_type.startswith('application/vnd.ms-office.'): # pylint: disable=bad-indentation
# should really be able to parse these either as xml or records
@@ -727,7 +749,8 @@ class RtfFieldParser(rtfobj.RtfParser):
def open_destination(self, destination):
if destination.cword == b'fldinst':
- logger.debug('*** Start field data at index %Xh' % destination.start)
+ logger.debug('*** Start field data at index %Xh'
+ % destination.start)
def close_destination(self, destination):
if destination.cword == b'fldinst':
@@ -758,7 +781,7 @@ def process_rtf(file_handle, field_filter_mode=None):
all_fields = [field.decode('ascii') for field in rtfparser.fields]
# apply field command filter
logger.debug('found {1} fields, filtering with mode "{0}"'
- .format(field_filter_mode, len(all_fields)))
+ .format(field_filter_mode, len(all_fields)))
if field_filter_mode in (FIELD_FILTER_ALL, None):
clean_fields = all_fields
elif field_filter_mode == FIELD_FILTER_DDE:
@@ -815,11 +838,12 @@ def process_csv(filepath):
results, _ = process_csv_dialect(file_handle, delim)
except csv.Error: # e.g. sniffing fails
logger.debug('failed to csv-parse with delimiter {0!r}'
- .format(delim))
+ .format(delim))
if is_small and not results:
# try whole file as single cell, since sniffing fails in this case
- logger.debug('last attempt: take whole file as single unquoted cell')
+ logger.debug('last attempt: take whole file as single unquoted '
+ 'cell')
file_handle.seek(0)
match = CSV_DDE_FORMAT.match(file_handle.read(CSV_SMALL_THRESH))
if match:
@@ -836,8 +860,8 @@ def process_csv_dialect(file_handle, delimiters):
delimiters=delimiters)
dialect.strict = False # microsoft is never strict
logger.debug('sniffed csv dialect with delimiter {0!r} '
- 'and quote char {1!r}'
- .format(dialect.delimiter, dialect.quotechar))
+ 'and quote char {1!r}'
+ .format(dialect.delimiter, dialect.quotechar))
# rewind file handle to start
file_handle.seek(0)
@@ -877,7 +901,7 @@ def process_excel_xml(filepath):
break
if formula is None:
continue
- logger.debug('found cell with formula {0}'.format(formula))
+ logger.debug(u'found cell with formula {0}'.format(formula))
match = re.match(XML_DDE_FORMAT, formula)
if match:
dde_links.append(u' '.join(match.groups()[:2]))
@@ -891,19 +915,11 @@ def process_file(filepath, field_filter_mode=None):
if xls_parser.is_xls(filepath):
logger.debug('Process file as excel 2003 (xls)')
return process_xls(filepath)
-
- # encrypted files also look like ole, even if office 2007+ (xml-based)
- # so check for encryption, first
- ole = olefile.OleFileIO(filepath, path_encoding=None)
- oid = oleid.OleID(ole)
- if oid.check_encrypted().value:
- log.debug('is encrypted - raise error')
- raise FileIsEncryptedError(filepath)
- elif oid.check_powerpoint().value:
- log.debug('is ppt - cannot have DDE')
+ if is_ppt(filepath):
+ logger.debug('is ppt - cannot have DDE')
return u''
- else:
- logger.debug('Process file as word 2003 (doc)')
+ logger.debug('Process file as word 2003 (doc)')
+ with olefile.OleFileIO(filepath, path_encoding=None) as ole:
return process_doc(ole)
with open(filepath, 'rb') as file_handle:
@@ -921,22 +937,77 @@ def process_file(filepath, field_filter_mode=None):
if doctype == ooxml.DOCTYPE_EXCEL:
logger.debug('Process file as excel 2007+ (xlsx)')
return process_xlsx(filepath)
- elif doctype in (ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_EXCEL_XML2003):
+ if doctype in (ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_EXCEL_XML2003):
logger.debug('Process file as xml from excel 2003/2007+')
return process_excel_xml(filepath)
- elif doctype in (ooxml.DOCTYPE_WORD_XML, ooxml.DOCTYPE_WORD_XML2003):
+ if doctype in (ooxml.DOCTYPE_WORD_XML, ooxml.DOCTYPE_WORD_XML2003):
logger.debug('Process file as xml from word 2003/2007+')
return process_docx(filepath)
- elif doctype is None:
+ if doctype is None:
logger.debug('Process file as csv')
return process_csv(filepath)
- else: # could be docx; if not: this is the old default code path
- logger.debug('Process file as word 2007+ (docx)')
- return process_docx(filepath, field_filter_mode)
+ # could be docx; if not: this is the old default code path
+ logger.debug('Process file as word 2007+ (docx)')
+ return process_docx(filepath, field_filter_mode)
# === MAIN =================================================================
+
+def process_maybe_encrypted(filepath, passwords=None, crypto_nesting=0,
+ **kwargs):
+ """
+ Process a file that might be encrypted.
+
+ Calls :py:func:`process_file` and if that fails tries to decrypt and
+ process the result. Based on recommendation in module doc string of
+ :py:mod:`oletools.crypto`.
+
+ :param str filepath: path to file on disc.
+ :param passwords: list of passwords (str) to try for decryption or None
+ :param int crypto_nesting: How many decryption layers were already used to
+ get the given file.
+ :param kwargs: same as :py:func:`process_file`
+ :returns: same as :py:func:`process_file`
+ """
+ result = u''
+ try:
+ result = process_file(filepath, **kwargs)
+ if not crypto.is_encrypted(filepath):
+ return result
+ except Exception:
+ logger.debug('Ignoring exception:', exc_info=True)
+ if not crypto.is_encrypted(filepath):
+ raise
+
+ # we reach this point only if file is encrypted
+ # check if this is an encrypted file in an encrypted file in an ...
+ if crypto_nesting >= crypto.MAX_NESTING_DEPTH:
+ raise crypto.MaxCryptoNestingReached(crypto_nesting, filepath)
+
+ decrypted_file = None
+ if passwords is None:
+ passwords = crypto.DEFAULT_PASSWORDS
+ else:
+ passwords = list(passwords) + crypto.DEFAULT_PASSWORDS
+ try:
+ logger.debug('Trying to decrypt file')
+ decrypted_file = crypto.decrypt(filepath, passwords)
+ if not decrypted_file:
+ logger.error('Decrypt failed, run with debug output to get details')
+ raise crypto.WrongEncryptionPassword(filepath)
+ logger.info('Analyze decrypted file')
+ result = process_maybe_encrypted(decrypted_file, passwords,
+ crypto_nesting+1, **kwargs)
+ finally: # clean up
+ try: # (maybe file was not yet created)
+ os.unlink(decrypted_file)
+ except Exception:
+ logger.debug('Ignoring exception closing decrypted file:',
+ exc_info=True)
+ return result
+
+
def main(cmd_line_args=None):
""" Main function, called if this file is called as a script
@@ -961,13 +1032,16 @@ def main(cmd_line_args=None):
text = ''
return_code = 1
try:
- text = process_file(args.filepath, args.field_filter_mode)
+ text = process_maybe_encrypted(
+ args.filepath, args.password,
+ field_filter_mode=args.field_filter_mode)
return_code = 0
except Exception as exc:
- logger.exception(exc.message)
+ logger.exception(str(exc))
logger.print_str('DDE Links:')
- logger.print_str(text)
+ for link in text.splitlines():
+ logger.print_str(text, type='dde-link')
log_helper.end_logging()
diff --git a/oletools/olebrowse.py b/oletools/olebrowse.py
index cb77033..74bba02 100644
--- a/oletools/olebrowse.py
+++ b/oletools/olebrowse.py
@@ -12,7 +12,7 @@ olebrowse project website: http://www.decalage.info/python/olebrowse
olebrowse is part of the python-oletools package:
http://www.decalage.info/python/oletools
-olebrowse is copyright (c) 2012-2017, Philippe Lagadec (http://www.decalage.info)
+olebrowse is copyright (c) 2012-2019, Philippe Lagadec (http://www.decalage.info)
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
@@ -43,7 +43,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# 2017-04-26 v0.51 PL: - fixed absolute imports (issue #141)
# 2018-09-11 v0.54 PL: - olefile is now a dependency
-__version__ = '0.54dev1'
+__version__ = '0.54'
#------------------------------------------------------------------------------
# TODO:
diff --git a/oletools/oledir.py b/oletools/oledir.py
index 6b6d530..42cda7e 100644
--- a/oletools/oledir.py
+++ b/oletools/oledir.py
@@ -14,7 +14,7 @@ http://www.decalage.info/python/oletools
#=== LICENSE ==================================================================
-# oledir is copyright (c) 2015-2018 Philippe Lagadec (http://www.decalage.info)
+# oledir is copyright (c) 2015-2019 Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
@@ -53,7 +53,7 @@ from __future__ import print_function
# 2018-08-28 v0.54 PL: - olefile is now a dependency
# 2018-10-06 - colorclass is now a dependency
-__version__ = '0.54dev1'
+__version__ = '0.54'
#------------------------------------------------------------------------------
# TODO:
diff --git a/oletools/oleform.py b/oletools/oleform.py
index f913e89..e486d34 100644
--- a/oletools/oleform.py
+++ b/oletools/oleform.py
@@ -1,5 +1,8 @@
#!/usr/bin/env python
+# REFERENCES:
+# - MS-OFORMS: https://msdn.microsoft.com/en-us/library/office/cc313125%28v=office.12%29.aspx?f=255&MSPPError=-2147217396
+
# CHANGELOG:
# 2018-02-19 v0.53 PL: - fixed issue #260, removed long integer literals
diff --git a/oletools/oleid.py b/oletools/oleid.py
index 5370503..e70e053 100644
--- a/oletools/oleid.py
+++ b/oletools/oleid.py
@@ -17,7 +17,7 @@ http://www.decalage.info/python/oletools
#=== LICENSE =================================================================
-# oleid is copyright (c) 2012-2018, Philippe Lagadec (http://www.decalage.info)
+# oleid is copyright (c) 2012-2019, Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
@@ -59,7 +59,7 @@ from __future__ import print_function
# 2018-10-19 CH: - accept olefile as well as filename, return Indicators,
# improve encryption detection for ppt
-__version__ = '0.54dev4'
+__version__ = '0.54'
#------------------------------------------------------------------------------
@@ -80,22 +80,26 @@ __version__ = '0.54dev4'
#=== IMPORTS =================================================================
-import argparse, sys, re, zlib, struct
+import argparse, sys, re, zlib, struct, os
from os.path import dirname, abspath
-# little hack to allow absolute imports even if oletools is not installed
-# (required to run oletools directly as scripts in any directory).
-try:
- from oletools.thirdparty import prettytable
-except ImportError:
- PARENT_DIR = dirname(dirname(abspath(__file__)))
- if PARENT_DIR not in sys.path:
- sys.path.insert(0, PARENT_DIR)
- del PARENT_DIR
- from oletools.thirdparty import prettytable
-
import olefile
+# IMPORTANT: it should be possible to run oletools directly as scripts
+# in any directory without installing them with pip or setup.py.
+# In that case, relative imports are NOT usable.
+# And to enable Python 2+3 compatibility, we need to use absolute imports,
+# so we add the oletools parent folder to sys.path (absolute+normalized path):
+_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
+# print('_thismodule_dir = %r' % _thismodule_dir)
+_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
+# print('_parent_dir = %r' % _thirdparty_dir)
+if _parent_dir not in sys.path:
+ sys.path.insert(0, _parent_dir)
+
+from oletools.thirdparty.prettytable import prettytable
+from oletools import crypto
+
#=== FUNCTIONS ===============================================================
@@ -279,20 +283,7 @@ class OleID(object):
self.indicators.append(encrypted)
if not self.ole:
return None
- # check if bit 1 of security field = 1:
- # (this field may be missing for Powerpoint2000, for example)
- if self.suminfo_data is None:
- self.check_properties()
- if 0x13 in self.suminfo_data:
- if self.suminfo_data[0x13] & 1:
- encrypted.value = True
- # check if this is an OpenXML encrypted file
- elif self.ole.exists('EncryptionInfo'):
- encrypted.value = True
- # or an encrypted ppt file
- if self.ole.exists('EncryptedSummary') and \
- not self.ole.exists('SummaryInformation'):
- encrypted.value = True
+ encrypted.value = crypto.is_encrypted(self.ole)
return encrypted
def check_word(self):
@@ -316,27 +307,7 @@ class OleID(object):
return None, None
if self.ole.exists('WordDocument'):
word.value = True
- # check for Word-specific encryption flag:
- stream = None
- try:
- stream = self.ole.openstream(["WordDocument"])
- # pass header 10 bytes
- stream.read(10)
- # read flag structure:
- temp16 = struct.unpack("H", stream.read(2))[0]
- f_encrypted = (temp16 & 0x0100) >> 8
- if f_encrypted:
- # correct encrypted indicator if present or add one
- encrypt_ind = self.get_indicator('encrypted')
- if encrypt_ind:
- encrypt_ind.value = True
- else:
- self.indicators.append('encrypted', True, name='Encrypted')
- except Exception:
- raise
- finally:
- if stream is not None:
- stream.close()
+
# check for VBA macros:
if self.ole.exists('Macros'):
macros.value = True
diff --git a/oletools/olemap.py b/oletools/olemap.py
index d7c9fa8..5460a54 100644
--- a/oletools/olemap.py
+++ b/oletools/olemap.py
@@ -13,7 +13,7 @@ http://www.decalage.info/python/oletools
#=== LICENSE ==================================================================
-# olemap is copyright (c) 2015-2018 Philippe Lagadec (http://www.decalage.info)
+# olemap is copyright (c) 2015-2019 Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
@@ -52,8 +52,9 @@ http://www.decalage.info/python/oletools
# 2017-03-23 PL: - only display the header by default
# - added option --exdata to display extra data in hex
# 2018-08-28 v0.54 PL: - olefile is now a dependency
+# 2019-07-10 v0.55 PL: - fixed display of OLE header CLSID (issue #394)
-__version__ = '0.54dev1'
+__version__ = '0.55.dev3'
#------------------------------------------------------------------------------
# TODO:
@@ -121,7 +122,7 @@ def show_header(ole, extra_data=False):
print("OLE HEADER:")
t = tablestream.TableStream([24, 16, 79-(4+24+16)], header_row=['Attribute', 'Value', 'Description'])
t.write_row(['OLE Signature (hex)', binascii.b2a_hex(ole.header_signature).upper(), 'Should be D0CF11E0A1B11AE1'])
- t.write_row(['Header CLSID (hex)', binascii.b2a_hex(ole.header_clsid).upper(), 'Should be 0'])
+ t.write_row(['Header CLSID', ole.header_clsid, 'Should be empty (0)'])
t.write_row(['Minor Version', '%04X' % ole.minor_version, 'Should be 003E'])
t.write_row(['Major Version', '%04X' % ole.dll_version, 'Should be 3 or 4'])
t.write_row(['Byte Order', '%04X' % ole.byte_order, 'Should be FFFE (little endian)'])
diff --git a/oletools/olemeta.py b/oletools/olemeta.py
index 194da7b..2c0badd 100644
--- a/oletools/olemeta.py
+++ b/oletools/olemeta.py
@@ -15,7 +15,7 @@ http://www.decalage.info/python/oletools
#=== LICENSE =================================================================
-# olemeta is copyright (c) 2013-2018, Philippe Lagadec (http://www.decalage.info)
+# olemeta is copyright (c) 2013-2019, Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
@@ -51,7 +51,7 @@ http://www.decalage.info/python/oletools
# 2017-05-04 PL: - added optparse and xglob (issue #141)
# 2018-09-11 v0.54 PL: - olefile is now a dependency
-__version__ = '0.54dev1'
+__version__ = '0.54'
#------------------------------------------------------------------------------
# TODO:
diff --git a/oletools/oleobj.py b/oletools/oleobj.py
index a0f5e49..d9cf876 100644
--- a/oletools/oleobj.py
+++ b/oletools/oleobj.py
@@ -14,7 +14,7 @@ http://www.decalage.info/python/oletools
# === LICENSE =================================================================
-# oleobj is copyright (c) 2015-2018 Philippe Lagadec (http://www.decalage.info)
+# oleobj is copyright (c) 2015-2019 Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
@@ -89,7 +89,7 @@ from oletools.ooxml import XmlParser
# 2018-09-11 v0.54 PL: - olefile is now a dependency
# 2018-10-30 SA: - added detection of external links (PR #317)
-__version__ = '0.54dev4'
+__version__ = '0.54'
# -----------------------------------------------------------------------------
# TODO:
@@ -526,29 +526,35 @@ def find_ole_in_ppt(filename):
can contain the actual embedded file we are looking for (caller will check
for these).
"""
- for stream in PptFile(filename).iter_streams():
- for record_idx, record in enumerate(stream.iter_records()):
- if isinstance(record, PptRecordExOleVbaActiveXAtom):
- ole = None
- try:
- data_start = next(record.iter_uncompressed())
- if data_start[:len(olefile.MAGIC)] != olefile.MAGIC:
- continue # could be an ActiveX control or VBA Storage
-
- # otherwise, this should be an OLE object
- log.debug('Found record with embedded ole object in ppt '
- '(stream "{0}", record no {1})'
- .format(stream.name, record_idx))
- ole = record.get_data_as_olefile()
- yield ole
- except IOError:
- log.warning('Error reading data from {0} stream or '
- 'interpreting it as OLE object'
- .format(stream.name))
- log.debug('', exc_info=True)
- finally:
- if ole is not None:
- ole.close()
+ ppt_file = None
+ try:
+ ppt_file = PptFile(filename)
+ for stream in ppt_file.iter_streams():
+ for record_idx, record in enumerate(stream.iter_records()):
+ if isinstance(record, PptRecordExOleVbaActiveXAtom):
+ ole = None
+ try:
+ data_start = next(record.iter_uncompressed())
+ if data_start[:len(olefile.MAGIC)] != olefile.MAGIC:
+ continue # could be ActiveX control / VBA Storage
+
+ # otherwise, this should be an OLE object
+ log.debug('Found record with embedded ole object in '
+ 'ppt (stream "{0}", record no {1})'
+ .format(stream.name, record_idx))
+ ole = record.get_data_as_olefile()
+ yield ole
+ except IOError:
+ log.warning('Error reading data from {0} stream or '
+ 'interpreting it as OLE object'
+ .format(stream.name))
+ log.debug('', exc_info=True)
+ finally:
+ if ole is not None:
+ ole.close()
+ finally:
+ if ppt_file is not None:
+ ppt_file.close()
class FakeFile(io.RawIOBase):
@@ -750,13 +756,13 @@ def process_file(filename, data, output_dir=None):
xml_parser = None
if is_zipfile(filename):
- log.info('file is a OOXML file, looking for relationships with external links')
+ log.info('file could be an OOXML file, looking for relationships with '
+ 'external links')
xml_parser = XmlParser(filename)
for relationship, target in find_external_relationships(xml_parser):
did_dump = True
print("Found relationship '%s' with external link %s" % (relationship, target))
-
# look for ole files inside file (e.g. unzip docx)
# have to finish work on every ole stream inside iteration, since handles
# are closed in find_ole
@@ -765,9 +771,9 @@ def process_file(filename, data, output_dir=None):
continue
for path_parts in ole.listdir():
+ stream_path = '/'.join(path_parts)
+ log.debug('Checking stream %r', stream_path)
if path_parts[-1] == '\x01Ole10Native':
- stream_path = '/'.join(path_parts)
- log.debug('Checking stream %r', stream_path)
stream = None
try:
stream = ole.openstream(path_parts)
diff --git a/oletools/oletimes.py b/oletools/oletimes.py
index fa9f5b5..5d7809a 100644
--- a/oletools/oletimes.py
+++ b/oletools/oletimes.py
@@ -16,7 +16,7 @@ http://www.decalage.info/python/oletools
#=== LICENSE =================================================================
-# oletimes is copyright (c) 2013-2017, Philippe Lagadec (http://www.decalage.info)
+# oletimes is copyright (c) 2013-2019, Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
@@ -52,7 +52,7 @@ http://www.decalage.info/python/oletools
# 2017-05-04 PL: - added optparse and xglob (issue #141)
# 2018-09-11 v0.54 PL: - olefile is now a dependency
-__version__ = '0.54dev1'
+__version__ = '0.54'
#------------------------------------------------------------------------------
# TODO:
diff --git a/oletools/olevba.py b/oletools/olevba.py
index d7cd047..211099e 100644
--- a/oletools/olevba.py
+++ b/oletools/olevba.py
@@ -7,14 +7,14 @@ olevba is a script to parse OLE and OpenXML files such as MS Office documents
and analyze malicious macros.
Supported formats:
-- Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm)
-- Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb)
-- PowerPoint 97-2003 (.ppt), PowerPoint 2007+ (.pptm, .ppsm)
-- Word/PowerPoint 2007+ XML (aka Flat OPC)
-- Word 2003 XML (.xml)
-- Word/Excel Single File Web Page / MHTML (.mht)
-- Publisher (.pub)
-- raises an error if run with files encrypted using MS Crypto API RC4
+ - Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm)
+ - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb)
+ - PowerPoint 97-2003 (.ppt), PowerPoint 2007+ (.pptm, .ppsm)
+ - Word/PowerPoint 2007+ XML (aka Flat OPC)
+ - Word 2003 XML (.xml)
+ - Word/Excel Single File Web Page / MHTML (.mht)
+ - Publisher (.pub)
+ - raises an error if run with files encrypted using MS Crypto API RC4
Author: Philippe Lagadec - http://www.decalage.info
License: BSD, see source code or documentation
@@ -28,7 +28,7 @@ https://github.com/unixfreak0037/officeparser
# === LICENSE ==================================================================
-# olevba is copyright (c) 2014-2018 Philippe Lagadec (http://www.decalage.info)
+# olevba is copyright (c) 2014-2019 Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
@@ -210,8 +210,16 @@ from __future__ import print_function
# 2018-09-11 v0.54 PL: - olefile is now a dependency
# 2018-10-08 PL: - replace backspace before printing to console (issue #358)
# 2018-10-25 CH: - detect encryption and raise error if detected
+# 2018-12-03 PL: - uses tablestream (+colors) instead of prettytable
+# 2018-12-06 PL: - colorize the suspicious keywords found in VBA code
+# 2019-01-01 PL: - removed support for Python 2.6
+# 2019-03-18 PL: - added XLM/XLF macros detection for Excel OLE files
+# 2019-03-25 CH: - added decryption of password-protected files
+# 2019-04-09 PL: - decompress_stream accepts bytes (issue #422)
+# 2019-05-23 v0.55 PL: - added option --pcode to call pcodedmp and display P-code
+# 2019-06-05 PL: - added VBA stomping detection
-__version__ = '0.54dev4'
+__version__ = '0.55.dev3'
#------------------------------------------------------------------------------
# TODO:
@@ -236,23 +244,20 @@ __version__ = '0.54dev4'
# - extract_macros: use combined struct.unpack instead of many calls
# - all except clauses should target specific exceptions
-#------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------
# REFERENCES:
# - [MS-OVBA]: Microsoft Office VBA File Format Structure
# http://msdn.microsoft.com/en-us/library/office/cc313094%28v=office.12%29.aspx
# - officeparser: https://github.com/unixfreak0037/officeparser
-#--- IMPORTS ------------------------------------------------------------------
+# --- IMPORTS ------------------------------------------------------------------
import sys
import os
import logging
import struct
-try:
- from cStringIO import StringIO
-except ImportError:
- from io import StringIO
+from io import BytesIO, StringIO
import math
import zipfile
import re
@@ -261,7 +266,7 @@ import binascii
import base64
import zlib
import email # for MHTML parsing
-import string # for printable
+import string # for printable
import json # for json output mode (argument --json)
# import lxml or ElementTree for XML parsing:
@@ -297,11 +302,11 @@ _thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
# print('_thismodule_dir = %r' % _thismodule_dir)
_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
# print('_parent_dir = %r' % _thirdparty_dir)
-if not _parent_dir in sys.path:
+if _parent_dir not in sys.path:
sys.path.insert(0, _parent_dir)
import olefile
-from oletools.thirdparty.prettytable import prettytable
+from oletools.thirdparty.tablestream import tablestream
from oletools.thirdparty.xglob import xglob, PathNotFoundException
from pyparsing import \
CaselessKeyword, CaselessLiteral, Combine, Forward, Literal, \
@@ -311,9 +316,8 @@ from pyparsing import \
from oletools import ppt_parser
from oletools import oleform
from oletools import rtfobj
-from oletools import oleid
-from oletools.common.errors import FileIsEncryptedError
-
+from oletools import crypto
+from oletools.common import codepages
# monkeypatch email to fix issue #32:
# allow header lines without ":"
@@ -324,30 +328,77 @@ email.feedparser.headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:?|[\t
if sys.version_info[0] <= 2:
# Python 2.x
- if sys.version_info[1] <= 6:
- # Python 2.6
- # use is_zipfile backported from Python 2.7:
- from thirdparty.zipfile27 import is_zipfile
- else:
- # Python 2.7
- from zipfile import is_zipfile
+ PYTHON2 = True
+ # to use ord on bytes/bytearray items the same way in Python 2+3
+ # on Python 2, just use the normal ord() because items are bytes
+ byte_ord = ord
+ #: Default string encoding for the olevba API
+ DEFAULT_API_ENCODING = 'utf8' # on Python 2: UTF-8 (bytes)
else:
# Python 3.x+
- from zipfile import is_zipfile
+ PYTHON2 = False
+
+ # to use ord on bytes/bytearray items the same way in Python 2+3
+ # on Python 3, items are int, so just return the item
+ def byte_ord(x):
+ return x
# xrange is now called range:
xrange = range
+ # unichr does not exist anymore, only chr:
+ unichr = chr
+ # json2ascii also needs "unicode":
+ unicode = str
+ from functools import reduce
+ #: Default string encoding for the olevba API
+ DEFAULT_API_ENCODING = None # on Python 3: None (unicode)
+ # Python 3.0 - 3.4 support:
+ # From https://gist.github.com/ynkdir/867347/c5e188a4886bc2dd71876c7e069a7b00b6c16c61
+ if sys.version_info < (3, 5):
+ import codecs
+ _backslashreplace_errors = codecs.lookup_error("backslashreplace")
+
+ def backslashreplace_errors(exc):
+ if isinstance(exc, UnicodeDecodeError):
+ u = "".join("\\x{0:02x}".format(c) for c in exc.object[exc.start:exc.end])
+ return u, exc.end
+ return _backslashreplace_errors(exc)
+
+ codecs.register_error("backslashreplace", backslashreplace_errors)
+
+
+def unicode2str(unicode_string):
+ """
+ convert a unicode string to a native str:
+ - on Python 3, it returns the same string
+ - on Python 2, the string is encoded with UTF-8 to a bytes str
+ :param unicode_string: unicode string to be converted
+ :return: the string converted to str
+ :rtype: str
+ """
+ if PYTHON2:
+ return unicode_string.encode('utf8', errors='replace')
+ else:
+ return unicode_string
-# === LOGGING =================================================================
-class NullHandler(logging.Handler):
+def bytes2str(bytes_string, encoding='utf8'):
"""
- Log Handler without output, to avoid printing messages if logging is not
- configured by the main application.
- Python 2.7 has logging.NullHandler, but this is necessary for 2.6:
- see https://docs.python.org/2.6/library/logging.html#configuring-logging-for-a-library
+ convert a bytes string to a native str:
+ - on Python 2, it returns the same string (bytes=str)
+ - on Python 3, the string is decoded using the provided encoding
+ (UTF-8 by default) to a unicode str
+ :param bytes_string: bytes string to be converted
+ :param encoding: codec to be used for decoding
+ :return: the string converted to str
+ :rtype: str
"""
- def emit(self, record):
- pass
+ if PYTHON2:
+ return bytes_string
+ else:
+ return bytes_string.decode('utf8', errors='replace')
+
+
+# === LOGGING =================================================================
def get_logger(name, level=logging.CRITICAL+1):
"""
@@ -361,7 +412,7 @@ def get_logger(name, level=logging.CRITICAL+1):
# First, test if there is already a logger with the same name, else it
# will generate duplicate messages (due to duplicate handlers):
if name in logging.Logger.manager.loggerDict:
- #NOTE: another less intrusive but more "hackish" solution would be to
+ # NOTE: another less intrusive but more "hackish" solution would be to
# use getLogger then test if its effective level is not default.
logger = logging.getLogger(name)
# make sure level is OK:
@@ -371,7 +422,7 @@ def get_logger(name, level=logging.CRITICAL+1):
logger = logging.getLogger(name)
# only add a NullHandler for this logger, it is up to the application
# to configure its own logging:
- logger.addHandler(NullHandler())
+ logger.addHandler(logging.NullHandler())
logger.setLevel(level)
return logger
@@ -388,6 +439,7 @@ def enable_logging():
log.setLevel(logging.NOTSET)
# Also enable logging in the ppt_parser module:
ppt_parser.enable_logging()
+ crypto.enable_logging()
@@ -564,7 +616,8 @@ AUTOEXEC_KEYWORDS = {
# MS Excel:
'Runs when the Excel Workbook is opened':
- ('Auto_Open', 'Workbook_Open', 'Workbook_Activate'),
+ ('Auto_Open', 'Workbook_Open', 'Workbook_Activate', 'Auto_Ope'),
+ # TODO: "Auto_Ope" is temporarily here because of a bug in plugin_biff, which misses the last byte in "Auto_Open"...
'Runs when the Excel Workbook is closed':
('Auto_Close', 'Workbook_Close'),
@@ -600,9 +653,10 @@ SUSPICIOUS_KEYWORDS = {
('CreateTextFile', 'ADODB.Stream', 'WriteText', 'SaveToFile'),
#CreateTextFile: http://msdn.microsoft.com/en-us/library/office/gg264617%28v=office.15%29.aspx
#ADODB.Stream sample: http://pastebin.com/Z4TMyuq6
+ # ShellExecute: https://twitter.com/StanHacked/status/1075088449768693762
'May run an executable file or a system command':
('Shell', 'vbNormal', 'vbNormalFocus', 'vbHide', 'vbMinimizedFocus', 'vbMaximizedFocus', 'vbNormalNoFocus',
- 'vbMinimizedNoFocus', 'WScript.Shell', 'Run', 'ShellExecute'),
+ 'vbMinimizedNoFocus', 'WScript.Shell', 'Run', 'ShellExecute', 'ShellExecuteA', 'shell32'),
# MacScript: see https://msdn.microsoft.com/en-us/library/office/gg264812.aspx
'May run an executable file or a system command on a Mac':
('MacScript',),
@@ -620,6 +674,8 @@ SUSPICIOUS_KEYWORDS = {
'invoke-command', 'scriptblock', 'Invoke-Expression', 'AuthorizationManager'),
'May run an executable file or a system command using PowerShell':
('Start-Process',),
+ 'May run an executable file or a system command using Excel 4 Macros (XLM/XLF)':
+ ('EXEC',),
'May hide the application':
('Application.Visible', 'ShowWindow', 'SW_HIDE'),
'May create a directory':
@@ -635,6 +691,8 @@ SUSPICIOUS_KEYWORDS = {
('New-Object',),
'May run an application (if combined with CreateObject)':
('Shell.Application',),
+ 'May run an Excel 4 Macro (aka XLM/XLF)':
+ ('ExecuteExcel4Macro',),
'May enumerate application windows (if combined with Shell.Application object)':
('Windows', 'FindWindow'),
'May run code from a DLL':
@@ -643,9 +701,12 @@ SUSPICIOUS_KEYWORDS = {
'May run code from a library on a Mac':
#TODO: regex to find declare+lib on same line - see mraptor
('libc.dylib', 'dylib'),
+ 'May run code from a DLL using Excel 4 Macros (XLM/XLF)':
+ ('REGISTER',),
'May inject code into another process':
- ('CreateThread', 'VirtualAlloc', # (issue #9) suggested by Davy Douhine - used by MSF payload
- 'VirtualAllocEx', 'RtlMoveMemory',
+ ('CreateThread', 'CreateUserThread', 'VirtualAlloc', # (issue #9) suggested by Davy Douhine - used by MSF payload
+ 'VirtualAllocEx', 'RtlMoveMemory', 'WriteProcessMemory',
+ 'SetContextThread', 'QueueApcThread', 'WriteVirtualMemory', 'VirtualProtect'
),
'May run a shellcode in memory':
('EnumSystemLanguageGroupsW?', # Used by Hancitor in Oct 2016
@@ -777,7 +838,8 @@ re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"')
re_nothex_check = re.compile(r'[G-Zg-z]')
# regex to extract printable strings (at least 5 chars) from VBA Forms:
-re_printable_string = re.compile(r'[\t\r\n\x20-\xFF]{5,}')
+# (must be bytes for Python 3)
+re_printable_string = re.compile(b'[\\t\\r\\n\\x20-\\xFF]{5,}')
# === PARTIAL VBA GRAMMAR ====================================================
@@ -918,10 +980,13 @@ vba_chr = Suppress(
def vba_chr_tostr(t):
try:
i = t[0]
- # normal, non-unicode character:
if i>=0 and i<=255:
+ # normal, non-unicode character:
+ # TODO: check if it needs to be converted to bytes for Python 3
return VbaExpressionString(chr(i))
else:
+ # unicode character
+ # Note: this distinction is only needed for Python 2
return VbaExpressionString(unichr(i).encode('utf-8', 'backslashreplace'))
except ValueError:
log.exception('ERROR: incorrect parameter value for chr(): %r' % i)
@@ -1188,8 +1253,9 @@ def decompress_stream(compressed_container):
"""
Decompress a stream according to MS-OVBA section 2.4.1
- compressed_container: string compressed according to the MS-OVBA 2.4.1.3.6 Compression algorithm
- return the decompressed container as a string (bytes)
+ :param compressed_container bytearray: bytearray or bytes compressed according to the MS-OVBA 2.4.1.3.6 Compression algorithm
+ :return: the decompressed container as a bytes string
+ :rtype: bytes
"""
# 2.4.1.2 State Variables
@@ -1211,10 +1277,14 @@ def decompress_stream(compressed_container):
# DecompressedChunkStart: The location of the first byte of the DecompressedChunk (section 2.4.1.1.3) within the
# DecompressedBuffer (section 2.4.1.1.2).
- decompressed_container = '' # result
+ # Check the input is a bytearray, otherwise convert it (assuming it's bytes):
+ if not isinstance(compressed_container, bytearray):
+ compressed_container = bytearray(compressed_container)
+ # raise TypeError('decompress_stream requires a bytearray as input')
+ decompressed_container = bytearray() # result
compressed_current = 0
- sig_byte = ord(compressed_container[compressed_current])
+ sig_byte = compressed_container[compressed_current]
if sig_byte != 0x01:
raise ValueError('invalid signature byte {0:02X}'.format(sig_byte))
@@ -1260,7 +1330,7 @@ def decompress_stream(compressed_container):
# MS-OVBA 2.4.1.3.3 Decompressing a RawChunk
# uncompressed chunk: read the next 4096 bytes as-is
#TODO: check if there are at least 4096 bytes left
- decompressed_container += compressed_container[compressed_current:compressed_current + 4096]
+ decompressed_container.extend([compressed_container[compressed_current:compressed_current + 4096]])
compressed_current += 4096
else:
# MS-OVBA 2.4.1.3.2 Decompressing a CompressedChunk
@@ -1271,7 +1341,7 @@ def decompress_stream(compressed_container):
# log.debug('compressed_current = %d / compressed_end = %d' % (compressed_current, compressed_end))
# FlagByte: 8 bits indicating if the following 8 tokens are either literal (1 byte of plain text) or
# copy tokens (reference to a previous literal token)
- flag_byte = ord(compressed_container[compressed_current])
+ flag_byte = compressed_container[compressed_current]
compressed_current += 1
for bit_index in xrange(0, 8):
# log.debug('bit_index=%d / compressed_current=%d / compressed_end=%d' % (bit_index, compressed_current, compressed_end))
@@ -1283,7 +1353,7 @@ def decompress_stream(compressed_container):
#log.debug('bit_index=%d: flag_bit=%d' % (bit_index, flag_bit))
if flag_bit == 0: # LiteralToken
# copy one byte directly to output
- decompressed_container += compressed_container[compressed_current]
+ decompressed_container.extend([compressed_container[compressed_current]])
compressed_current += 1
else: # CopyToken
# MS-OVBA 2.4.1.3.19.2 Unpack CopyToken
@@ -1299,520 +1369,664 @@ def decompress_stream(compressed_container):
#log.debug('offset=%d length=%d' % (offset, length))
copy_source = len(decompressed_container) - offset
for index in xrange(copy_source, copy_source + length):
- decompressed_container += decompressed_container[index]
+ decompressed_container.extend([decompressed_container[index]])
compressed_current += 2
- return decompressed_container
+ return bytes(decompressed_container)
-def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
+class VBA_Module(object):
"""
- Extract VBA macros from an OleFileIO object.
- Internal function, do not call directly.
-
- vba_root: path to the VBA root storage, containing the VBA storage and the PROJECT stream
- vba_project: path to the PROJECT stream
- :param relaxed: If True, only create info/debug log entry if data is not as expected
- (e.g. opening substream fails); if False, raise an error in this case
- This is a generator, yielding (stream path, VBA filename, VBA source code) for each VBA code stream
+ Class to parse a VBA module from an OLE file, and to store all the corresponding
+ metadata and VBA source code.
"""
- # Open the PROJECT stream:
- project = ole.openstream(project_path)
- log.debug('relaxed is %s' % relaxed)
-
- # sample content of the PROJECT stream:
-
- ## ID="{5312AC8A-349D-4950-BDD0-49BE3C4DD0F0}"
- ## Document=ThisDocument/&H00000000
- ## Module=NewMacros
- ## Name="Project"
- ## HelpContextID="0"
- ## VersionCompatible32="393222000"
- ## CMG="F1F301E705E705E705E705"
- ## DPB="8F8D7FE3831F2020202020"
- ## GC="2D2FDD81E51EE61EE6E1"
- ##
- ## [Host Extender Info]
- ## &H00000001={3832D640-CF90-11CF-8E43-00A0C911005A};VBE;&H00000000
- ## &H00000002={000209F2-0000-0000-C000-000000000046};Word8.0;&H00000000
- ##
- ## [Workspace]
- ## ThisDocument=22, 29, 339, 477, Z
- ## NewMacros=-4, 42, 832, 510, C
-
- code_modules = {}
-
- for line in project:
- line = line.strip()
- if '=' in line:
- # split line at the 1st equal sign:
- name, value = line.split('=', 1)
- # looking for code modules
- # add the code module as a key in the dictionary
- # the value will be the extension needed later
- # The value is converted to lowercase, to allow case-insensitive matching (issue #3)
- value = value.lower()
- if name == 'Document':
- # split value at the 1st slash, keep 1st part:
- value = value.split('/', 1)[0]
- code_modules[value] = CLASS_EXTENSION
- elif name == 'Module':
- code_modules[value] = MODULE_EXTENSION
- elif name == 'Class':
- code_modules[value] = CLASS_EXTENSION
- elif name == 'BaseClass':
- code_modules[value] = FORM_EXTENSION
-
- # read data from dir stream (compressed)
- dir_compressed = ole.openstream(dir_path).read()
-
- def check_value(name, expected, value):
- if expected != value:
- if relaxed:
- log.error("invalid value for {0} expected {1:04X} got {2:04X}"
- .format(name, expected, value))
- else:
- raise UnexpectedDataError(dir_path, name, expected, value)
-
- dir_stream = StringIO(decompress_stream(dir_compressed))
-
- # PROJECTSYSKIND Record
- projectsyskind_id = struct.unpack(" 128:
- log.error("PROJECTNAME_SizeOfProjectName value not in range: {0}".format(projectname_sizeof_projectname))
- projectname_projectname = dir_stream.read(projectname_sizeof_projectname)
- unused = projectname_projectname
-
- # PROJECTDOCSTRING Record
- projectdocstring_id = struct.unpack(" 2000:
- log.error(
- "PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(projectdocstring_sizeof_docstring))
- projectdocstring_docstring = dir_stream.read(projectdocstring_sizeof_docstring)
- projectdocstring_reserved = struct.unpack(" 260:
- log.error(
- "PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(projecthelpfilepath_sizeof_helpfile1))
- projecthelpfilepath_helpfile1 = dir_stream.read(projecthelpfilepath_sizeof_helpfile1)
- projecthelpfilepath_reserved = struct.unpack(" 1015:
- log.error(
- "PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(projectconstants_sizeof_constants))
- projectconstants_constants = dir_stream.read(projectconstants_sizeof_constants)
- projectconstants_reserved = struct.unpack(" 0:
- code_data = decompress_stream(code_data)
+ code_data = decompress_stream(bytearray(code_data))
+ # store the raw code encoded as bytes with the project's code page:
+ self.code_raw = code_data
+ # decode it to unicode:
+ self.code = project.decode_bytes(code_data)
+ # also store a native str version:
+ self.code_str = unicode2str(self.code)
# case-insensitive search in the code_modules dict to find the file extension:
- filext = code_modules.get(modulename_modulename.lower(), 'bin')
- filename = '{0}.{1}'.format(modulename_modulename, filext)
- #TODO: also yield the codepage so that callers can decode it properly
- yield (code_path, filename, code_data)
- # print '-'*79
- # print filename
- # print ''
- # print code_data
- # print ''
- log.debug('extracted file {0}'.format(filename))
+ filext = self.project.module_ext.get(self.name.lower(), 'vba')
+ self.filename = u'{0}.{1}'.format(self.name, filext)
+ self.filename_str = unicode2str(self.filename)
+ log.debug('extracted file {0}'.format(self.filename_str))
else:
- log.warning("module stream {0} has code data length 0".format(modulestreamname_streamname))
+ log.warning("module stream {0} has code data length 0".format(self.streamname_str))
except (UnexpectedDataError, SubstreamOpenError):
raise
except Exception as exc:
- log.info('Error parsing module {0} of {1} in _extract_vba:'
- .format(projectmodule_index, projectmodules_count),
+ log.info('Error parsing module {0} of {1}:'
+ .format(module_index, project.modules_count),
exc_info=True)
- if not relaxed:
+ if not project.relaxed:
raise
- _ = unused # make pylint happy: now variable "unused" is being used ;-)
- return
+
+
+class VBA_Project(object):
+ """
+ Class to parse a VBA project from an OLE file, and to store all the corresponding
+ metadata and VBA modules.
+ """
+
+ def __init__(self, ole, vba_root, project_path, dir_path, relaxed=False):
+ """
+ Extract VBA macros from an OleFileIO object.
+
+ :param vba_root: path to the VBA root storage, containing the VBA storage and the PROJECT stream
+ :param project_path: path to the PROJECT stream
+ :param relaxed: If True, only create info/debug log entry if data is not as expected
+ (e.g. opening substream fails); if False, raise an error in this case
+ """
+ self.ole = ole
+ self.vba_root = vba_root
+ self. project_path = project_path
+ self.dir_path = dir_path
+ self.relaxed = relaxed
+ #: VBA modules contained in the project (list of VBA_Module objects)
+ self.modules = []
+ #: file extension for each VBA module
+ self.module_ext = {}
+ log.debug('Parsing the dir stream from %r' % dir_path)
+ # read data from dir stream (compressed)
+ dir_compressed = ole.openstream(dir_path).read()
+ # decompress it:
+ dir_stream = BytesIO(decompress_stream(bytearray(dir_compressed)))
+ # store reference for later use:
+ self.dir_stream = dir_stream
+
+ # reference: MS-VBAL 2.3.4.2 dir Stream: Version Independent Project Information
+
+ # PROJECTSYSKIND Record
+ # Specifies the platform for which the VBA project is created.
+ projectsyskind_id = struct.unpack(" 128:
+ # TODO: raise an actual error? What is MS Office's behaviour?
+ log.error("PROJECTNAME_SizeOfProjectName value not in range [1-128]: {0}".format(sizeof_projectname))
+ projectname_bytes = dir_stream.read(sizeof_projectname)
+ self.projectname = self.decode_bytes(projectname_bytes)
+
+
+ # PROJECTDOCSTRING Record
+ # Specifies the description for the VBA project.
+ projectdocstring_id = struct.unpack(" 2000:
+ log.error(
+ "PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(projectdocstring_sizeof_docstring))
+ # DocString (variable): An array of SizeOfDocString bytes that specifies the description for the VBA project.
+ # MUST contain MBCS characters encoded using the code page specified in PROJECTCODEPAGE (section 2.3.4.2.1.4).
+ # MUST NOT contain null characters.
+ docstring_bytes = dir_stream.read(projectdocstring_sizeof_docstring)
+ self.docstring = self.decode_bytes(docstring_bytes)
+ projectdocstring_reserved = struct.unpack(" 260:
+ log.error(
+ "PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(projecthelpfilepath_sizeof_helpfile1))
+ projecthelpfilepath_helpfile1 = dir_stream.read(projecthelpfilepath_sizeof_helpfile1)
+ projecthelpfilepath_reserved = struct.unpack(" 1015:
+ log.error(
+ "PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(projectconstants_sizeof_constants))
+ projectconstants_constants = dir_stream.read(projectconstants_sizeof_constants)
+ projectconstants_reserved = struct.unpack(" -1:
stripped_data = stripped_data[content_offset:]
# TODO: quick and dirty fix: insert a standard line with MIME-Version header?
- mhtml = email.message_from_string(stripped_data)
+ if PYTHON2:
+ mhtml = email.message_from_string(stripped_data)
+ else:
+ # on Python 3, need to use message_from_bytes instead:
+ mhtml = email.message_from_bytes(stripped_data)
# find all the attached files:
for part in mhtml.walk():
content_type = part.get_content_type() # always returns a value
@@ -2627,7 +2859,7 @@ class VBA_Parser(object):
# using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded.
# decompress the zlib data starting at offset 0x32, which is the OLE container:
# check ActiveMime header:
- if isinstance(part_data, str) and is_mso_file(part_data):
+ if isinstance(part_data, bytes) and is_mso_file(part_data):
log.debug('Found ActiveMime header, decompressing MSO container')
try:
ole_data = mso_file_extract(part_data)
@@ -2697,7 +2929,9 @@ class VBA_Parser(object):
"""
log.info('Opening text file %s' % self.filename)
# directly store the source code:
- self.vba_code_all_modules = data
+ # On Python 2, store it as a raw bytes string
+ # On Python 3, convert it to unicode assuming it was encoded with UTF-8
+ self.vba_code_all_modules = bytes2str(data)
self.contains_macros = True
# set type only if parsing succeeds
self.type = TYPE_TEXT
@@ -2853,7 +3087,7 @@ class VBA_Parser(object):
log.debug('%r...[much more data]...%r' % (data[:100], data[-50:]))
else:
log.debug(repr(data))
- if 'Attribut\x00' in data:
+ if b'Attribut\x00' in data:
log.debug('Found VBA compressed code')
self.contains_macros = True
except IOError as exc:
@@ -2862,8 +3096,44 @@ class VBA_Parser(object):
log.debug('Trace:', exc_trace=True)
else:
raise SubstreamOpenError(self.filename, d.name, exc)
+ if self.detect_xlm_macros():
+ self.contains_macros = True
return self.contains_macros
+ def detect_xlm_macros(self):
+ from oletools.thirdparty.oledump.plugin_biff import cBIFF
+ self.xlm_macros = []
+ if self.ole_file is None:
+ return False
+ for excel_stream in ('Workbook', 'Book'):
+ if self.ole_file.exists(excel_stream):
+ log.debug('Found Excel stream %r' % excel_stream)
+ data = self.ole_file.openstream(excel_stream).read()
+ log.debug('Running BIFF plugin from oledump')
+ try:
+ biff_plugin = cBIFF(name=[excel_stream], stream=data, options='-x')
+ self.xlm_macros = biff_plugin.Analyze()
+ if len(self.xlm_macros)>0:
+ log.debug('Found XLM macros')
+ return True
+ except:
+ log.exception('Error when running oledump.plugin_biff, please report to %s' % URL_OLEVBA_ISSUES)
+ return False
+
+
+ def encode_string(self, unicode_str):
+ """
+ Encode a unicode string to bytes or str, using the specified encoding
+ for the VBA_parser. By default, it will be bytes/UTF-8 on Python 2, and
+ a normal unicode string on Python 3.
+ :param str unicode_str: string to be encoded
+ :return: encoded string
+ """
+ if self.encoding is None:
+ return unicode_str
+ else:
+ return unicode_str.encode(self.encoding, errors='replace')
+
def extract_macros(self):
"""
Extract and decompress source code for each VBA macro found in the file
@@ -2920,18 +3190,33 @@ class VBA_Parser(object):
# read data
log.debug('Reading data from stream %r' % d.name)
data = ole._open(d.isectStart, d.size).read()
- for match in re.finditer(r'\x00Attribut[^e]', data, flags=re.IGNORECASE):
+ for match in re.finditer(b'\\x00Attribut[^e]', data, flags=re.IGNORECASE):
start = match.start() - 3
log.debug('Found VBA compressed code at index %X' % start)
compressed_code = data[start:]
try:
- vba_code = decompress_stream(compressed_code)
+ vba_code = decompress_stream(bytearray(compressed_code))
+ # TODO vba_code = self.encode_string(vba_code)
yield (self.filename, d.name, d.name, vba_code)
except Exception as exc:
# display the exception with full stack trace for debugging
log.debug('Error processing stream %r in file %r (%s)' % (d.name, self.filename, exc))
log.debug('Traceback:', exc_info=True)
# do not raise the error, as it is unlikely to be a compressed macro stream
+ if self.xlm_macros:
+ vba_code = ''
+ for line in self.xlm_macros:
+ vba_code += "' " + line + '\n'
+ yield ('xlm_macro', 'xlm_macro', 'xlm_macro.txt', vba_code)
+ # Analyse the VBA P-code to detect VBA stomping:
+ # If stomping is detected, add a fake VBA module with the P-code as source comments
+ # so that VBA_Scanner can find keywords and IOCs in it
+ if self.detect_vba_stomping():
+ vba_code = ''
+ for line in self.pcodedmp_output.splitlines():
+ vba_code += "' " + line + '\n'
+ yield ('VBA P-code', 'VBA P-code', 'VBA_P-code.txt', vba_code)
+
def extract_all_macros(self):
"""
@@ -2953,6 +3238,8 @@ class VBA_Parser(object):
"""
runs extract_macros and analyze the source code of all VBA macros
found in the file.
+ All results are stored in self.analysis_results.
+ If called more than once, simply returns the previous results.
"""
if self.detect_vba_macros():
# if the analysis was already done, avoid doing it twice:
@@ -2969,6 +3256,13 @@ class VBA_Parser(object):
# Analyze the whole code at once:
scanner = VBA_Scanner(self.vba_code_all_modules)
self.analysis_results = scanner.scan(show_decoded_strings, deobfuscate)
+ if self.detect_vba_stomping():
+ log.debug('adding VBA stomping to suspicious keywords')
+ keyword = 'VBA Stomping'
+ description = 'VBA Stomping was detected: the VBA source code and P-code are different, '\
+ 'this may have been used to hide malicious code'
+ scanner.suspicious_keywords.append((keyword, description))
+ scanner.results.append(('Suspicious', keyword, description))
autoexec, suspicious, iocs, hexstrings, base64strings, dridex, vbastrings = scanner.scan_summary()
self.nb_autoexec += autoexec
self.nb_suspicious += suspicious
@@ -3080,11 +3374,12 @@ class VBA_Parser(object):
"""
Extract printable strings from each VBA Form found in the file
- Iterator: yields (filename, stream_path, vba_filename, vba_code) for each VBA macro found
+ Iterator: yields (filename, stream_path, form_string) for each printable string found in forms
If the file is OLE, filename is the path of the file.
If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros
within the zip archive, e.g. word/vbaProject.bin.
If the file is PPT, result is as for OpenXML but filename is useless
+ Note: form_string is a raw bytes string on Python 2, a unicode str on Python 3
"""
if self.ole_file is None:
# This may be either an OpenXML/PPT or a text file:
@@ -3107,7 +3402,13 @@ class VBA_Parser(object):
# Extract printable strings from the form object stream "o":
for m in re_printable_string.finditer(form_data):
log.debug('Printable string found in form: %r' % m.group())
- yield (self.filename, '/'.join(o_stream), m.group())
+ # On Python 3, convert bytes string to unicode str:
+ if PYTHON2:
+ found_str = m.group()
+ else:
+ found_str = m.group().decode('utf8', errors='replace')
+ if found_str != 'Tahoma':
+ yield (self.filename, '/'.join(o_stream), found_str)
def extract_form_strings_extended(self):
if self.ole_file is None:
@@ -3128,6 +3429,136 @@ class VBA_Parser(object):
for variable in oleform.extract_OleFormVariables(ole, form_storage):
yield (self.filename, '/'.join(form_storage), variable)
+ def extract_pcode(self):
+ """
+ Extract and disassemble the VBA P-code, using pcodedmp
+
+ :return: VBA P-code disassembly
+ :rtype: str
+ """
+ # only run it once:
+ if self.pcodedmp_output is None:
+ log.debug('Calling pcodedmp to extract and disassemble the VBA P-code')
+ # import pcodedmp here to avoid circular imports:
+ try:
+ from pcodedmp import pcodedmp
+ except Exception as e:
+ # This may happen with Pypy, because pcodedmp imports win_unicode_console...
+ # TODO: this is a workaround, we just ignore P-code
+ # TODO: here we just use log.info, because the word "error" in the output makes some of the tests fail...
+ log.info('Exception when importing pcodedmp: {}'.format(e))
+ self.pcodedmp_output = ''
+ return ''
+ # logging is disabled after importing pcodedmp, need to re-enable it
+ # This is because pcodedmp imports olevba again :-/
+ # TODO: here it works only if logging was enabled, need to change pcodedmp!
+ enable_logging()
+ # pcodedmp prints all its output to sys.stdout, so we need to capture it so that
+ # we can process the results later on.
+ # save sys.stdout, then modify it to capture pcodedmp's output:
+ # stdout = sys.stdout
+ if PYTHON2:
+ # on Python 2, console output is bytes
+ output = BytesIO()
+ else:
+ # on Python 3, console output is unicode
+ output = StringIO()
+ # sys.stdout = output
+ # we need to fake an argparser for those two args used by pcodedmp:
+ class args:
+ disasmOnly = True
+ verbose = False
+ try:
+ # TODO: handle files in memory too
+ log.debug('before pcodedmp')
+ pcodedmp.processFile(self.filename, args, output_file=output)
+ log.debug('after pcodedmp')
+ except Exception as e:
+ # print('Error while running pcodedmp: {}'.format(e), file=sys.stderr, flush=True)
+ # set sys.stdout back to its original value
+ # sys.stdout = stdout
+ log.exception('Error while running pcodedmp')
+ # finally:
+ # # set sys.stdout back to its original value
+ # sys.stdout = stdout
+ self.pcodedmp_output = output.getvalue()
+ # print(self.pcodedmp_output)
+ # log.debug(self.pcodedmp_output)
+ return self.pcodedmp_output
+
+ def detect_vba_stomping(self):
+ """
+ Detect VBA stomping, by comparing the keywords present in the P-code and
+ in the VBA source code.
+
+ :return: True if VBA stomping detected, False otherwise
+ :rtype: bool
+ """
+ # only run it once:
+ if self.vba_stomping_detected is None:
+ log.debug('Analysing the P-code to detect VBA stomping')
+ self.extract_pcode()
+ # print('pcodedmp OK')
+ log.debug('pcodedmp OK')
+ # process the output to extract keywords, to detect VBA stomping
+ keywords = set()
+ for line in self.pcodedmp_output.splitlines():
+ if line.startswith('\t'):
+ log.debug('P-code: ' + line.strip())
+ tokens = line.split(None, 1)
+ mnemonic = tokens[0]
+ args = ''
+ if len(tokens) == 2:
+ args = tokens[1].strip()
+ # log.debug(repr([mnemonic, args]))
+ # if mnemonic in ('VarDefn',):
+ # # just add the rest of the line
+ # keywords.add(args)
+ # if mnemonic == 'FuncDefn':
+ # # function definition: just strip parentheses
+ # funcdefn = args.strip('()')
+ # keywords.add(funcdefn)
+ if mnemonic in ('ArgsCall', 'ArgsLd', 'St', 'Ld', 'MemSt', 'Label'):
+ # add 1st argument:
+ name = args.split(None, 1)[0]
+ # sometimes pcodedmp reports names like "id_FFFF", which are not
+ # directly present in the VBA source code
+ # (for example "Me" in VBA appears as id_FFFF in P-code)
+ if not name.startswith('id_'):
+ keywords.add(name)
+ if mnemonic == 'LitStr':
+ # re_string = re.compile(r'\"([^\"]|\"\")*\"')
+ # for match in re_string.finditer(line):
+ # print('\t' + match.group())
+ # the string is the 2nd argument:
+ s = args.split(None, 1)[1]
+ # tricky issue: when a string contains double quotes inside,
+ # pcodedmp returns a single ", whereas in the VBA source code
+ # it is always a double "".
+ # We have to remove the " around the strings, then double the remaining ",
+ # and put back the " around:
+ if len(s)>=2:
+ assert(s[0]=='"' and s[-1]=='"')
+ s = s[1:-1]
+ s = s.replace('"', '""')
+ s = '"' + s + '"'
+ keywords.add(s)
+ log.debug('Keywords extracted from P-code: ' + repr(sorted(keywords)))
+ self.vba_stomping_detected = False
+ # TODO: add a method to get all VBA code as one string
+ vba_code_all_modules = ''
+ for (_, _, _, vba_code) in self.extract_all_macros():
+ vba_code_all_modules += vba_code + '\n'
+ for keyword in keywords:
+ if keyword not in vba_code_all_modules:
+ log.debug('Keyword {!r} not found in VBA code'.format(keyword))
+ log.debug('VBA STOMPING DETECTED!')
+ self.vba_stomping_detected = True
+ break
+ if not self.vba_stomping_detected:
+ log.debug('No VBA stomping detected.')
+ return self.vba_stomping_detected
+
def close(self):
"""
Close all the open files. This method must be called after usage, if
@@ -3156,11 +3587,11 @@ class VBA_Parser_CLI(VBA_Parser):
super(VBA_Parser_CLI, self).__init__(*args, **kwargs)
- def print_analysis(self, show_decoded_strings=False, deobfuscate=False):
+ def run_analysis(self, show_decoded_strings=False, deobfuscate=False):
"""
- Analyze the provided VBA code, and print the results in a table
+ Analyze the provided VBA code, without printing the results (yet)
+ All results are stored in self.analysis_results.
- :param vba_code: str, VBA source code to be analyzed
:param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
:param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
:return: None
@@ -3169,21 +3600,37 @@ class VBA_Parser_CLI(VBA_Parser):
if sys.stdout.isatty():
print('Analysis...\r', end='')
sys.stdout.flush()
- results = self.analyze_macros(show_decoded_strings, deobfuscate)
+ self.analyze_macros(show_decoded_strings, deobfuscate)
+
+
+ def print_analysis(self, show_decoded_strings=False, deobfuscate=False):
+ """
+ print the analysis results in a table
+
+ :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
+ :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
+ :return: None
+ """
+ results = self.analysis_results
if results:
- t = prettytable.PrettyTable(('Type', 'Keyword', 'Description'))
- t.align = 'l'
- t.max_width['Type'] = 10
- t.max_width['Keyword'] = 20
- t.max_width['Description'] = 39
+ t = tablestream.TableStream(column_width=(10, 20, 45),
+ header_row=('Type', 'Keyword', 'Description'))
+ COLOR_TYPE = {
+ 'AutoExec': 'yellow',
+ 'Suspicious': 'red',
+ 'IOC': 'cyan',
+ }
for kw_type, keyword, description in results:
# handle non printable strings:
if not is_printable(keyword):
keyword = repr(keyword)
if not is_printable(description):
description = repr(description)
- t.add_row((kw_type, keyword, description))
- print(t)
+ color_type = COLOR_TYPE.get(kw_type, None)
+ t.write_row((kw_type, keyword, description), colors=(color_type, None, None))
+ t.close()
+ if self.vba_stomping_detected:
+ print('VBA Stomping detection is experimental: please report any false positive/negative at https://github.com/decalage2/oletools/issues')
else:
print('No suspicious keyword or IOC found.')
@@ -3204,10 +3651,29 @@ class VBA_Parser_CLI(VBA_Parser):
return [dict(type=kw_type, keyword=keyword, description=description)
for kw_type, keyword, description in self.analyze_macros(show_decoded_strings, deobfuscate)]
+ def colorize_keywords(self, vba_code):
+ """
+ Colorize keywords found during the VBA code analysis
+ :param vba_code: str, VBA code to be colorized
+ :return: str, VBA code including color tags for Colorclass
+ """
+ results = self.analysis_results
+ if results:
+ COLOR_TYPE = {
+ 'AutoExec': 'yellow',
+ 'Suspicious': 'red',
+ 'IOC': 'cyan',
+ }
+ for kw_type, keyword, description in results:
+ color_type = COLOR_TYPE.get(kw_type, None)
+ if color_type:
+ vba_code = vba_code.replace(keyword, '{auto%s}%s{/%s}' % (color_type, keyword, color_type))
+ return vba_code
+
def process_file(self, show_decoded_strings=False,
display_code=True, hide_attributes=True,
vba_code_only=False, show_deobfuscated_code=False,
- deobfuscate=False):
+ deobfuscate=False, pcode=False):
"""
Process a single file
@@ -3219,6 +3685,7 @@ class VBA_Parser_CLI(VBA_Parser):
otherwise each module is analyzed separately (old behaviour)
:param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default)
:param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
+ :param pcode bool: if True, call pcodedmp to disassemble P-code and display it
"""
#TODO: replace print by writing to a provided output file (sys.stdout by default)
# fix conflicting parameters:
@@ -3234,6 +3701,8 @@ class VBA_Parser_CLI(VBA_Parser):
#TODO: handle olefile errors, when an OLE file is malformed
print('Type: %s'% self.type)
if self.detect_vba_macros():
+ # run analysis before displaying VBA code, in order to colorize found keywords
+ self.run_analysis(show_decoded_strings=show_decoded_strings, deobfuscate=deobfuscate)
#print 'Contains VBA Macros:'
for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros():
if hide_attributes:
@@ -3251,21 +3720,30 @@ class VBA_Parser_CLI(VBA_Parser):
print('(empty macro)')
else:
# check if the VBA code contains special characters such as backspace (issue #358)
- if b'\x08' in vba_code_filtered:
+ if '\x08' in vba_code_filtered:
log.warning('The VBA code contains special characters such as backspace, that may be used for obfuscation.')
if sys.stdout.isatty():
# if the standard output is the console, we'll display colors
backspace = colorclass.Color(b'{autored}\\x08{/red}')
else:
- backspace = b'\\x08'
+ backspace = '\\x08'
# replace backspace by "\x08" for display
- vba_code_filtered = vba_code_filtered.replace(b'\x08', backspace)
+ vba_code_filtered = vba_code_filtered.replace('\x08', backspace)
+ try:
+ # Colorize the interesting keywords in the output:
+ # (unless the output is redirected to a file)
+ if sys.stdout.isatty():
+ vba_code_filtered = colorclass.Color(self.colorize_keywords(vba_code_filtered))
+ except UnicodeError:
+ # TODO better handling of Unicode
+ log.error('Unicode conversion to be fixed before colorizing the output')
print(vba_code_filtered)
for (subfilename, stream_path, form_string) in self.extract_form_strings():
- print('-' * 79)
- print('VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path))
- print('- ' * 39)
- print(form_string)
+ if form_string is not None:
+ print('-' * 79)
+ print('VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path))
+ print('- ' * 39)
+ print(form_string)
try:
for (subfilename, stream_path, form_variables) in self.extract_form_strings_extended():
if form_variables is not None:
@@ -3277,6 +3755,11 @@ class VBA_Parser_CLI(VBA_Parser):
# display the exception with full stack trace for debugging
log.info('Error parsing form: %s' % exc)
log.debug('Traceback:', exc_info=True)
+ if pcode:
+ print('-' * 79)
+ print('P-CODE disassembly:')
+ pcode = self.extract_pcode()
+ print(pcode)
if not vba_code_only:
# analyse the code from all modules at once:
@@ -3398,16 +3881,6 @@ class VBA_Parser_CLI(VBA_Parser):
line = '%-12s %s' % (flags, self.filename)
print(line)
-
- # old table display:
- # macros = autoexec = suspicious = iocs = hexstrings = 'no'
- # if nb_macros: macros = 'YES:%d' % nb_macros
- # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec
- # if nb_suspicious: suspicious = 'YES:%d' % nb_suspicious
- # if nb_iocs: iocs = 'YES:%d' % nb_iocs
- # if nb_hexstrings: hexstrings = 'YES:%d' % nb_hexstrings
- # # 2nd line = info
- # print '%-8s %-7s %-7s %-7s %-7s %-7s' % (self.type, macros, autoexec, suspicious, iocs, hexstrings)
except Exception as exc:
# display the exception with full stack trace for debugging only
log.debug('Error processing file %s (%s)' % (self.filename, exc),
@@ -3415,20 +3888,6 @@ class VBA_Parser_CLI(VBA_Parser):
raise ProcessingError(self.filename, exc)
- # t = prettytable.PrettyTable(('filename', 'type', 'macros', 'autoexec', 'suspicious', 'ioc', 'hexstrings'),
- # header=False, border=False)
- # t.align = 'l'
- # t.max_width['filename'] = 30
- # t.max_width['type'] = 10
- # t.max_width['macros'] = 6
- # t.max_width['autoexec'] = 6
- # t.max_width['suspicious'] = 6
- # t.max_width['ioc'] = 6
- # t.max_width['hexstrings'] = 6
- # t.add_row((filename, ftype, macros, autoexec, suspicious, iocs, hexstrings))
- # print t
-
-
#=== MAIN =====================================================================
def parse_args(cmd_line_args=None):
@@ -3452,7 +3911,11 @@ def parse_args(cmd_line_args=None):
parser.add_option("-r", action="store_true", dest="recursive",
help='find files recursively in subdirectories.')
parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
- help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)')
+ help='if the file is a zip archive, open all files from it, using the provided password.')
+ parser.add_option("-p", "--password", type='str', action='append',
+ default=[],
+ help='if encrypted office files are encountered, try '
+ 'decryption with this password. May be repeated.')
parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
# output mode; could make this even simpler with add_option(type='choice') but that would make
@@ -3484,12 +3947,17 @@ def parse_args(cmd_line_args=None):
help="Attempt to deobfuscate VBA expressions (slow)")
parser.add_option('--relaxed', dest="relaxed", action="store_true", default=False,
help="Do not raise errors if opening of substream fails")
+ parser.add_option('--pcode', dest="pcode", action="store_true", default=False,
+ help="Disassemble and display the P-code (using pcodedmp)")
(options, args) = parser.parse_args(cmd_line_args)
# Print help if no arguments are passed
if len(args) == 0:
- print('olevba %s - http://decalage.info/python/oletools' % __version__)
+ # print banner with version
+ python_version = '%d.%d.%d' % sys.version_info[0:3]
+ print('olevba %s on Python %s - http://decalage.info/python/oletools' %
+ (__version__, python_version))
print(__doc__)
parser.print_help()
sys.exit(RETURN_WRONG_ARGS)
@@ -3499,6 +3967,112 @@ def parse_args(cmd_line_args=None):
return options, args
+def process_file(filename, data, container, options, crypto_nesting=0):
+ """
+ Part of main function that processes a single file.
+
+ This handles exceptions and encryption.
+
+ Returns a single code summarizing the status of processing of this file
+ """
+ try:
+ # Open the file
+ vba_parser = VBA_Parser_CLI(filename, data=data, container=container,
+ relaxed=options.relaxed)
+
+ if options.output_mode == 'detailed':
+ # fully detailed output
+ vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
+ display_code=options.display_code,
+ hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
+ show_deobfuscated_code=options.show_deobfuscated_code,
+ deobfuscate=options.deobfuscate, pcode=options.pcode)
+ elif options.output_mode == 'triage':
+ # summarized output for triage:
+ vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,
+ deobfuscate=options.deobfuscate)
+ elif options.output_mode == 'json':
+ print_json(
+ vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings,
+ display_code=options.display_code,
+ hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
+ show_deobfuscated_code=options.show_deobfuscated_code,
+ deobfuscate=options.deobfuscate))
+ else: # (should be impossible)
+ raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode))
+
+ # even if processing succeeds, file might still be encrypted
+ log.debug('Checking for encryption (normal)')
+ if not crypto.is_encrypted(filename):
+ log.debug('no encryption detected')
+ return RETURN_OK
+ except Exception as exc:
+ log.debug('Checking for encryption (after exception)')
+ if crypto.is_encrypted(filename):
+ pass # deal with this below
+ else:
+ if isinstance(exc, (SubstreamOpenError, UnexpectedDataError)):
+ if options.output_mode in ('triage', 'unspecified'):
+ print('%-12s %s - Error opening substream or uenxpected ' \
+ 'content' % ('?', filename))
+ elif options.output_mode == 'json':
+ print_json(file=filename, type='error',
+ error=type(exc).__name__, message=str(exc))
+ else:
+ log.exception('Error opening substream or unexpected '
+ 'content in %s' % filename)
+ return RETURN_OPEN_ERROR
+ elif isinstance(exc, FileOpenError):
+ if options.output_mode in ('triage', 'unspecified'):
+ print('%-12s %s - File format not supported' % ('?', filename))
+ elif options.output_mode == 'json':
+ print_json(file=filename, type='error',
+ error=type(exc).__name__, message=str(exc))
+ else:
+ log.exception('Failed to open %s -- probably not supported!' % filename)
+ return RETURN_OPEN_ERROR
+ elif isinstance(exc, ProcessingError):
+ if options.output_mode in ('triage', 'unspecified'):
+ print('%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc))
+ elif options.output_mode == 'json':
+ print_json(file=filename, type='error',
+ error=type(exc).__name__,
+ message=str(exc.orig_exc))
+ else:
+ log.exception('Error processing file %s (%s)!'
+ % (filename, exc.orig_exc))
+ return RETURN_PARSE_ERROR
+ else:
+ raise # let caller deal with this
+
+ # we reach this point only if file is encrypted
+ # check if this is an encrypted file in an encrypted file in an ...
+ if crypto_nesting >= crypto.MAX_NESTING_DEPTH:
+ raise crypto.MaxCryptoNestingReached(crypto_nesting, filename)
+
+ decrypted_file = None
+ try:
+ log.debug('Checking encryption passwords {}'.format(options.password))
+ passwords = options.password + crypto.DEFAULT_PASSWORDS
+ decrypted_file = crypto.decrypt(filename, passwords)
+ if not decrypted_file:
+ log.error('Decrypt failed, run with debug output to get details')
+ raise crypto.WrongEncryptionPassword(filename)
+ log.info('Working on decrypted file')
+ return process_file(decrypted_file, data, container or filename,
+ options, crypto_nesting+1)
+ except Exception:
+ raise
+ finally: # clean up
+ try:
+ log.debug('Removing crypt temp file {}'.format(decrypted_file))
+ os.unlink(decrypted_file)
+ except Exception: # e.g. file does not exist or is None
+ pass
+ # no idea what to return now
+ raise Exception('Programming error -- should never have reached this!')
+
+
def main(cmd_line_args=None):
"""
Main function, called when olevba is run from the command line
@@ -3517,52 +4091,60 @@ def main(cmd_line_args=None):
url='http://decalage.info/python/oletools',
type='MetaInformation', _json_is_first=True)
else:
- print('olevba %s - http://decalage.info/python/oletools' % __version__)
+ # print banner with version
+ python_version = '%d.%d.%d' % sys.version_info[0:3]
+ print('olevba %s on Python %s - http://decalage.info/python/oletools' %
+ (__version__, python_version))
logging.basicConfig(level=options.loglevel, format='%(levelname)-8s %(message)s')
# enable logging in the modules:
enable_logging()
- # Old display with number of items detected:
- # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr')
- # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('-'*8, '-'*7, '-'*7, '-'*7, '-'*7, '-'*7)
-
# with the option --reveal, make sure --deobf is also enabled:
if options.show_deobfuscated_code and not options.deobfuscate:
- log.info('set --deobf because --reveal was set')
+ log.debug('set --deobf because --reveal was set')
options.deobfuscate = True
if options.output_mode == 'triage' and options.show_deobfuscated_code:
- log.info('ignoring option --reveal in triage output mode')
+ log.debug('ignoring option --reveal in triage output mode')
+
+ # gather info on all files that must be processed
+ # ignore directory names stored in zip files:
+ all_input_info = tuple((container, filename, data) for
+ container, filename, data in xglob.iter_files(
+ args, recursive=options.recursive,
+ zip_password=options.zip_password,
+ zip_fname=options.zip_fname)
+ if not (container and filename.endswith('/')))
+
+ # specify output mode if options -t, -d and -j were not specified
+ if options.output_mode == 'unspecified':
+ if len(all_input_info) == 1:
+ options.output_mode = 'detailed'
+ else:
+ options.output_mode = 'triage'
- # Column headers (do not know how many files there will be yet, so if no output_mode
- # was specified, we will print triage for first file --> need these headers)
- if options.output_mode in ('triage', 'unspecified'):
+ # Column headers for triage mode
+ if options.output_mode == 'triage':
print('%-12s %-65s' % ('Flags', 'Filename'))
print('%-12s %-65s' % ('-' * 11, '-' * 65))
previous_container = None
count = 0
container = filename = data = None
- vba_parser = None
return_code = RETURN_OK
try:
- for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
- zip_password=options.zip_password, zip_fname=options.zip_fname):
- # ignore directory names stored in zip files:
- if container and filename.endswith('/'):
- continue
-
+ for container, filename, data in all_input_info:
# handle errors from xglob
if isinstance(data, Exception):
if isinstance(data, PathNotFoundException):
- if options.output_mode in ('triage', 'unspecified'):
+ if options.output_mode == 'triage':
print('%-12s %s - File not found' % ('?', filename))
elif options.output_mode != 'json':
log.error('Given path %r does not exist!' % filename)
return_code = RETURN_FILE_NOT_FOUND if return_code == 0 \
else RETURN_SEVERAL_ERRS
else:
- if options.output_mode in ('triage', 'unspecified'):
+ if options.output_mode == 'triage':
print('%-12s %s - Failed to read from zip file %s' % ('?', filename, container))
elif options.output_mode != 'json':
log.error('Exception opening/reading %r from zip file %r: %s'
@@ -3574,107 +4156,42 @@ def main(cmd_line_args=None):
error=type(data).__name__, message=str(data))
continue
- try:
- # close the previous file if analyzing several:
- # (this must be done here to avoid closing the file if there is only 1,
- # to fix issue #219)
- if vba_parser is not None:
- vba_parser.close()
- # Open the file
- vba_parser = VBA_Parser_CLI(filename, data=data, container=container,
- relaxed=options.relaxed)
-
- if options.output_mode == 'detailed':
- # fully detailed output
- vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
- display_code=options.display_code,
- hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
- show_deobfuscated_code=options.show_deobfuscated_code,
- deobfuscate=options.deobfuscate)
- elif options.output_mode in ('triage', 'unspecified'):
- # print container name when it changes:
- if container != previous_container:
- if container is not None:
- print('\nFiles in %s:' % container)
- previous_container = container
- # summarized output for triage:
- vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,
- deobfuscate=options.deobfuscate)
- elif options.output_mode == 'json':
- print_json(
- vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings,
- display_code=options.display_code,
- hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
- show_deobfuscated_code=options.show_deobfuscated_code,
- deobfuscate=options.deobfuscate))
- else: # (should be impossible)
- raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode))
- count += 1
-
- except (SubstreamOpenError, UnexpectedDataError) as exc:
- if options.output_mode in ('triage', 'unspecified'):
- print('%-12s %s - Error opening substream or uenxpected ' \
- 'content' % ('?', filename))
- elif options.output_mode == 'json':
- print_json(file=filename, type='error',
- error=type(exc).__name__, message=str(exc))
- else:
- log.exception('Error opening substream or unexpected '
- 'content in %s' % filename)
- return_code = RETURN_OPEN_ERROR if return_code == 0 \
- else RETURN_SEVERAL_ERRS
- except FileOpenError as exc:
- if options.output_mode in ('triage', 'unspecified'):
- print('%-12s %s - File format not supported' % ('?', filename))
- elif options.output_mode == 'json':
- print_json(file=filename, type='error',
- error=type(exc).__name__, message=str(exc))
- else:
- log.exception('Failed to open %s -- probably not supported!' % filename)
- return_code = RETURN_OPEN_ERROR if return_code == 0 \
- else RETURN_SEVERAL_ERRS
- except ProcessingError as exc:
- if options.output_mode in ('triage', 'unspecified'):
- print('%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc))
- elif options.output_mode == 'json':
- print_json(file=filename, type='error',
- error=type(exc).__name__,
- message=str(exc.orig_exc))
- else:
- log.exception('Error processing file %s (%s)!'
- % (filename, exc.orig_exc))
- return_code = RETURN_PARSE_ERROR if return_code == 0 \
- else RETURN_SEVERAL_ERRS
- except FileIsEncryptedError as exc:
- if options.output_mode in ('triage', 'unspecified'):
- print('%-12s %s - File is encrypted' % ('!ERROR', filename))
- elif options.output_mode == 'json':
- print_json(file=filename, type='error',
- error=type(exc).__name__, message=str(exc))
- else:
- log.exception('File %s is encrypted!' % (filename))
- return_code = RETURN_ENCRYPTED if return_code == 0 \
- else RETURN_SEVERAL_ERRS
- # Here we do not close the vba_parser, because process_file may need it below.
+ if options.output_mode == 'triage':
+ # print container name when it changes:
+ if container != previous_container:
+ if container is not None:
+ print('\nFiles in %s:' % container)
+ previous_container = container
+
+ # process the file, handling errors and encryption
+ curr_return_code = process_file(filename, data, container, options)
+ count += 1
+
+ # adjust overall return code
+ if curr_return_code == RETURN_OK:
+ continue # do not modify overall return code
+ if return_code == RETURN_OK:
+ return_code = curr_return_code # first error return code
+ else:
+ return_code = RETURN_SEVERAL_ERRS # several errors
if options.output_mode == 'triage':
print('\n(Flags: OpX=OpenXML, XML=Word2003XML, FlX=FlatOPC XML, MHT=MHTML, TXT=Text, M=Macros, ' \
'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \
'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n')
- if count == 1 and options.output_mode == 'unspecified':
- # if options -t, -d and -j were not specified and it's a single file, print details:
- vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
- display_code=options.display_code,
- hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
- show_deobfuscated_code=options.show_deobfuscated_code,
- deobfuscate=options.deobfuscate)
-
if options.output_mode == 'json':
# print last json entry (a last one without a comma) and closing ]
print_json(type='MetaInformation', return_code=return_code,
n_processed=count, _json_is_last=True)
+ except crypto.CryptoErrorBase as exc:
+ log.exception('Problems with encryption in main: {}'.format(exc),
+ exc_info=True)
+ if return_code == RETURN_OK:
+ return_code = RETURN_ENCRYPTED
+ else:
+ return_code == RETURN_SEVERAL_ERRS
except Exception as exc:
# some unexpected error, maybe some of the types caught in except clauses
# above were not sufficient. This is very bad, so log complete trace at exception level
diff --git a/oletools/olevba3.py b/oletools/olevba3.py
index a4258c3..23d65ba 100644
--- a/oletools/olevba3.py
+++ b/oletools/olevba3.py
@@ -1,282 +1,10 @@
#!/usr/bin/env python
-"""
-olevba3.py
-olevba is a script to parse OLE and OpenXML files such as MS Office documents
-(e.g. Word, Excel), to extract VBA Macro code in clear text, deobfuscate
-and analyze malicious macros.
+# olevba3 is a stub that redirects to olevba.py, for backwards compatibility
-olevba3 is the version of olevba that runs on Python 3.x.
+import sys, os, warnings
-Supported formats:
-- Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm)
-- Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb)
-- PowerPoint 97-2003 (.ppt), PowerPoint 2007+ (.pptm, .ppsm)
-- Word/PowerPoint 2007+ XML (aka Flat OPC)
-- Word 2003 XML (.xml)
-- Word/Excel Single File Web Page / MHTML (.mht)
-- Publisher (.pub)
-- raises an error if run with files encrypted using MS Crypto API RC4
-
-Author: Philippe Lagadec - http://www.decalage.info
-License: BSD, see source code or documentation
-
-olevba is part of the python-oletools package:
-http://www.decalage.info/python/oletools
-
-olevba is based on source code from officeparser by John William Davison
-https://github.com/unixfreak0037/officeparser
-"""
-
-# === LICENSE ==================================================================
-
-# olevba is copyright (c) 2014-2018 Philippe Lagadec (http://www.decalage.info)
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without modification,
-# are permitted provided that the following conditions are met:
-#
-# * Redistributions of source code must retain the above copyright notice, this
-# list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-# olevba contains modified source code from the officeparser project, published
-# under the following MIT License (MIT):
-#
-# officeparser is copyright (c) 2014 John William Davison
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-from __future__ import print_function
-
-
-#------------------------------------------------------------------------------
-# CHANGELOG:
-# 2014-08-05 v0.01 PL: - first version based on officeparser code
-# 2014-08-14 v0.02 PL: - fixed bugs in code, added license from officeparser
-# 2014-08-15 PL: - fixed incorrect value check in projecthelpfilepath Record
-# 2014-08-15 v0.03 PL: - refactored extract_macros to support OpenXML formats
-# and to find the VBA project root anywhere in the file
-# 2014-11-29 v0.04 PL: - use olefile instead of OleFileIO_PL
-# 2014-12-05 v0.05 PL: - refactored most functions into a class, new API
-# - added detect_vba_macros
-# 2014-12-10 v0.06 PL: - hide first lines with VB attributes
-# - detect auto-executable macros
-# - ignore empty macros
-# 2014-12-14 v0.07 PL: - detect_autoexec() is now case-insensitive
-# 2014-12-15 v0.08 PL: - improved display for empty macros
-# - added pattern extraction
-# 2014-12-25 v0.09 PL: - added suspicious keywords detection
-# 2014-12-27 v0.10 PL: - added OptionParser, main and process_file
-# - uses xglob to scan several files with wildcards
-# - option -r to recurse subdirectories
-# - option -z to scan files in password-protected zips
-# 2015-01-02 v0.11 PL: - improved filter_vba to detect colons
-# 2015-01-03 v0.12 PL: - fixed detect_patterns to detect all patterns
-# - process_file: improved display, shows container file
-# - improved list of executable file extensions
-# 2015-01-04 v0.13 PL: - added several suspicious keywords, improved display
-# 2015-01-08 v0.14 PL: - added hex strings detection and decoding
-# - fixed issue #2, decoding VBA stream names using
-# specified codepage and unicode stream names
-# 2015-01-11 v0.15 PL: - added new triage mode, options -t and -d
-# 2015-01-16 v0.16 PL: - fix for issue #3 (exception when module name="text")
-# - added several suspicious keywords
-# - added option -i to analyze VBA source code directly
-# 2015-01-17 v0.17 PL: - removed .com from the list of executable extensions
-# - added scan_vba to run all detection algorithms
-# - decoded hex strings are now also scanned + reversed
-# 2015-01-23 v0.18 PL: - fixed issue #3, case-insensitive search in code_modules
-# 2015-01-24 v0.19 PL: - improved the detection of IOCs obfuscated with hex
-# strings and StrReverse
-# 2015-01-26 v0.20 PL: - added option --hex to show all hex strings decoded
-# 2015-01-29 v0.21 PL: - added Dridex obfuscation decoding
-# - improved display, shows obfuscation name
-# 2015-02-01 v0.22 PL: - fixed issue #4: regex for URL, e-mail and exe filename
-# - added Base64 obfuscation decoding (contribution from
-# @JamesHabben)
-# 2015-02-03 v0.23 PL: - triage now uses VBA_Scanner results, shows Base64 and
-# Dridex strings
-# - exception handling in detect_base64_strings
-# 2015-02-07 v0.24 PL: - renamed option --hex to --decode, fixed display
-# - display exceptions with stack trace
-# - added several suspicious keywords
-# - improved Base64 detection and decoding
-# - fixed triage mode not to scan attrib lines
-# 2015-03-04 v0.25 PL: - added support for Word 2003 XML
-# 2015-03-22 v0.26 PL: - added suspicious keywords for sandboxing and
-# virtualisation detection
-# 2015-05-06 v0.27 PL: - added support for MHTML files with VBA macros
-# (issue #10 reported by Greg from SpamStopsHere)
-# 2015-05-24 v0.28 PL: - improved support for MHTML files with modified header
-# (issue #11 reported by Thomas Chopitea)
-# 2015-05-26 v0.29 PL: - improved MSO files parsing, taking into account
-# various data offsets (issue #12)
-# - improved detection of MSO files, avoiding incorrect
-# parsing errors (issue #7)
-# 2015-05-29 v0.30 PL: - added suspicious keywords suggested by @ozhermit,
-# Davy Douhine (issue #9), issue #13
-# 2015-06-16 v0.31 PL: - added generic VBA expression deobfuscation (chr,asc,etc)
-# 2015-06-19 PL: - added options -a, -c, --each, --attr
-# 2015-06-21 v0.32 PL: - always display decoded strings which are printable
-# - fix VBA_Scanner.scan to return raw strings, not repr()
-# 2015-07-09 v0.40 PL: - removed usage of sys.stderr which causes issues
-# 2015-07-12 PL: - added Hex function decoding to VBA Parser
-# 2015-07-13 PL: - added Base64 function decoding to VBA Parser
-# 2015-09-06 PL: - improved VBA_Parser, refactored the main functions
-# 2015-09-13 PL: - moved main functions to a class VBA_Parser_CLI
-# - fixed issue when analysis was done twice
-# 2015-09-15 PL: - remove duplicate IOCs from results
-# 2015-09-16 PL: - join long VBA lines ending with underscore before scan
-# - disabled unused option --each
-# 2015-09-22 v0.41 PL: - added new option --reveal
-# - added suspicious strings for PowerShell.exe options
-# 2015-10-09 v0.42 PL: - VBA_Parser: split each format into a separate method
-# 2015-10-10 PL: - added support for text files with VBA source code
-# 2015-11-17 PL: - fixed bug with --decode option
-# 2015-12-16 PL: - fixed bug in main (no options input anymore)
-# - improved logging, added -l option
-# 2016-01-31 PL: - fixed issue #31 in VBA_Parser.open_mht
-# - fixed issue #32 by monkeypatching email.feedparser
-# 2016-02-07 PL: - KeyboardInterrupt is now raised properly
-# 2016-02-20 v0.43 PL: - fixed issue #34 in the VBA parser and vba_chr
-# 2016-02-29 PL: - added Workbook_Activate to suspicious keywords
-# 2016-03-08 v0.44 PL: - added VBA Form strings extraction and analysis
-# 2016-03-04 v0.45 CH: - added JSON output (by Christian Herdtweck)
-# 2016-03-16 CH: - added option --no-deobfuscate (temporary)
-# 2016-04-19 v0.46 PL: - new option --deobf instead of --no-deobfuscate
-# - updated suspicious keywords
-# 2016-05-04 v0.47 PL: - look for VBA code in any stream including orphans
-# 2016-04-28 CH: - return an exit code depending on the results
-# - improved error and exception handling
-# - improved JSON output
-# 2016-05-12 CH: - added support for PowerPoint 97-2003 files
-# 2016-06-06 CH: - improved handling of unicode VBA module names
-# 2016-06-07 CH: - added option --relaxed, stricter parsing by default
-# 2016-06-12 v0.50 PL: - fixed small bugs in VBA parsing code
-# 2016-07-01 PL: - fixed issue #58 with format() to support Python 2.6
-# 2016-07-29 CH: - fixed several bugs including #73 (Mac Roman encoding)
-# 2016-08-31 PL: - added autoexec keyword InkPicture_Painted
-# - detect_autoexec now returns the exact keyword found
-# 2016-09-05 PL: - added autoexec keywords for MS Publisher (.pub)
-# 2016-09-06 PL: - fixed issue #20, is_zipfile on Python 2.6
-# 2016-09-12 PL: - enabled packrat to improve pyparsing performance
-# 2016-10-25 PL: - fixed raise and print statements for Python 3
-# 2016-11-03 v0.51 PL: - added EnumDateFormats and EnumSystemLanguageGroupsW
-# 2017-02-07 PL: - temporary fix for issue #132
-# - added keywords for Mac-specific macros (issue #130)
-# 2017-03-08 PL: - fixed absolute imports
-# 2017-03-16 PL: - fixed issues #148 and #149 for option --reveal
-# 2017-05-19 PL: - added enable_logging to fix issue #154
-# 2017-05-31 c1fe: - PR #135 fixing issue #132 for some Mac files
-# 2017-06-08 PL: - fixed issue #122 Chr() with negative numbers
-# 2017-06-15 PL: - deobfuscation line by line to handle large files
-# 2017-07-11 v0.52 PL: - raise exception instead of sys.exit (issue #180)
-# 2018-03-19 PL: - removed pyparsing from the thirdparty subfolder
-# 2018-05-13 v0.53 PL: - added support for Word/PowerPoint 2007+ XML (FlatOPC)
-# (issue #283)
-# 2018-06-11 v0.53.1 MHW: - fixed #320: chr instead of unichr on python 3
-# 2018-06-12 MHW: - fixed #322: import reduce from functools
-# 2018-09-11 v0.54 PL: - olefile is now a dependency
-# 2018-10-25 CH: - detect encryption and raise error if detected
-
-__version__ = '0.54dev4'
-
-#------------------------------------------------------------------------------
-# TODO:
-# + setup logging (common with other oletools)
-# + add xor bruteforcing like bbharvest
-# + options -a and -c should imply -d
-
-# TODO later:
-# + performance improvement: instead of searching each keyword separately,
-# first split vba code into a list of words (per line), then check each
-# word against a dict. (or put vba words into a set/dict?)
-# + for regex, maybe combine them into a single re with named groups?
-# + add Yara support, include sample rules? plugins like balbuzard?
-# + add balbuzard support
-# + output to file (replace print by file.write, sys.stdout by default)
-# + look for VBA in embedded documents (e.g. Excel in Word)
-# + support SRP streams (see Lenny's article + links and sample)
-# - python 3.x support
-# - check VBA macros in Visio, Access, Project, etc
-# - extract_macros: convert to a class, split long function into smaller methods
-# - extract_macros: read bytes from stream file objects instead of strings
-# - extract_macros: use combined struct.unpack instead of many calls
-# - all except clauses should target specific exceptions
-
-#------------------------------------------------------------------------------
-# REFERENCES:
-# - [MS-OVBA]: Microsoft Office VBA File Format Structure
-# http://msdn.microsoft.com/en-us/library/office/cc313094%28v=office.12%29.aspx
-# - officeparser: https://github.com/unixfreak0037/officeparser
-
-
-#--- IMPORTS ------------------------------------------------------------------
-
-import sys
-import os
-import logging
-import struct
-from _io import StringIO,BytesIO
-import math
-import zipfile
-import re
-import optparse
-import binascii
-import base64
-import zlib
-import email # for MHTML parsing
-import string # for printable
-import json # for json output mode (argument --json)
-from functools import reduce
-
-# import lxml or ElementTree for XML parsing:
-try:
- # lxml: best performance for XML processing
- import lxml.etree as ET
-except ImportError:
- try:
- # Python 2.5+: batteries included
- import xml.etree.cElementTree as ET
- except ImportError:
- try:
- # Python <2.5: standalone ElementTree install
- import elementtree.cElementTree as ET
- except ImportError:
- raise ImportError("lxml or ElementTree are not installed, " \
- + "see http://codespeak.net/lxml " \
- + "or http://effbot.org/zone/element-index.htm")
+warnings.warn('olevba3 is deprecated, olevba should be used instead.', DeprecationWarning)
# IMPORTANT: it should be possible to run oletools directly as scripts
# in any directory without installing them with pip or setup.py.
@@ -284,3374 +12,13 @@ except ImportError:
# And to enable Python 2+3 compatibility, we need to use absolute imports,
# so we add the oletools parent folder to sys.path (absolute+normalized path):
_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
-# print('_thismodule_dir = %r' % _thismodule_dir)
_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
-# print('_parent_dir = %r' % _thirdparty_dir)
-if not _parent_dir in sys.path:
+if _parent_dir not in sys.path:
sys.path.insert(0, _parent_dir)
-import olefile
-from oletools.thirdparty.prettytable import prettytable
-from oletools.thirdparty.xglob import xglob, PathNotFoundException
-from pyparsing import \
- CaselessKeyword, CaselessLiteral, Combine, Forward, Literal, \
- Optional, QuotedString,Regex, Suppress, Word, WordStart, \
- alphanums, alphas, hexnums,nums, opAssoc, srange, \
- infixNotation, ParserElement
-import oletools.ppt_parser as ppt_parser
-from oletools import rtfobj
-from oletools import oleid
-from oletools.common.errors import FileIsEncryptedError
-
-# monkeypatch email to fix issue #32:
-# allow header lines without ":"
-import email.feedparser
-email.feedparser.headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:?|[\t ])')
-
-# === PYTHON 2+3 SUPPORT ======================================================
-
-if sys.version_info[0] <= 2:
- # Python 2.x
- if sys.version_info[1] <= 6:
- # Python 2.6
- # use is_zipfile backported from Python 2.7:
- from thirdparty.zipfile27 import is_zipfile
- else:
- # Python 2.7
- from zipfile import is_zipfile
-else:
- # Python 3.x+
- from zipfile import is_zipfile
- # xrange is now called range:
- xrange = range
-
-
-# === PYTHON 3.0 - 3.4 SUPPORT ======================================================
-
-# From https://gist.github.com/ynkdir/867347/c5e188a4886bc2dd71876c7e069a7b00b6c16c61
-
-if sys.version_info >= (3, 0) and sys.version_info < (3, 5):
- import codecs
-
- _backslashreplace_errors = codecs.lookup_error("backslashreplace")
-
- def backslashreplace_errors(exc):
- if isinstance(exc, UnicodeDecodeError):
- u = "".join("\\x{0:02x}".format(c) for c in exc.object[exc.start:exc.end])
- return (u, exc.end)
- return _backslashreplace_errors(exc)
-
- codecs.register_error("backslashreplace", backslashreplace_errors)
-
-
-# === LOGGING =================================================================
-
-class NullHandler(logging.Handler):
- """
- Log Handler without output, to avoid printing messages if logging is not
- configured by the main application.
- Python 2.7 has logging.NullHandler, but this is necessary for 2.6:
- see https://docs.python.org/2.6/library/logging.html#configuring-logging-for-a-library
- """
- def emit(self, record):
- pass
-
-def get_logger(name, level=logging.CRITICAL+1):
- """
- Create a suitable logger object for this module.
- The goal is not to change settings of the root logger, to avoid getting
- other modules' logs on the screen.
- If a logger exists with same name, reuse it. (Else it would have duplicate
- handlers and messages would be doubled.)
- The level is set to CRITICAL+1 by default, to avoid any logging.
- """
- # First, test if there is already a logger with the same name, else it
- # will generate duplicate messages (due to duplicate handlers):
- if name in logging.Logger.manager.loggerDict:
- #NOTE: another less intrusive but more "hackish" solution would be to
- # use getLogger then test if its effective level is not default.
- logger = logging.getLogger(name)
- # make sure level is OK:
- logger.setLevel(level)
- return logger
- # get a new logger:
- logger = logging.getLogger(name)
- # only add a NullHandler for this logger, it is up to the application
- # to configure its own logging:
- logger.addHandler(NullHandler())
- logger.setLevel(level)
- return logger
-
-# a global logger object used for debugging:
-log = get_logger('olevba')
-
-
-def enable_logging():
- """
- Enable logging for this module (disabled by default).
- This will set the module-specific logger level to NOTSET, which
- means the main application controls the actual logging level.
- """
- log.setLevel(logging.NOTSET)
- # Also enable logging in the ppt_parser module:
- ppt_parser.enable_logging()
-
-
-
-#=== EXCEPTIONS ==============================================================
-
-class OlevbaBaseException(Exception):
- """ Base class for exceptions produced here for simpler except clauses """
- def __init__(self, msg, filename=None, orig_exc=None, **kwargs):
- if orig_exc:
- super(OlevbaBaseException, self).__init__(msg +
- ' ({0})'.format(orig_exc),
- **kwargs)
- else:
- super(OlevbaBaseException, self).__init__(msg, **kwargs)
- self.msg = msg
- self.filename = filename
- self.orig_exc = orig_exc
-
-
-class FileOpenError(OlevbaBaseException):
- """ raised by VBA_Parser constructor if all open_... attempts failed
-
- probably means the file type is not supported
- """
-
- def __init__(self, filename, orig_exc=None):
- super(FileOpenError, self).__init__(
- 'Failed to open file %s' % filename, filename, orig_exc)
-
-
-class ProcessingError(OlevbaBaseException):
- """ raised by VBA_Parser.process_file* functions """
-
- def __init__(self, filename, orig_exc):
- super(ProcessingError, self).__init__(
- 'Error processing file %s' % filename, filename, orig_exc)
-
-
-class MsoExtractionError(RuntimeError, OlevbaBaseException):
- """ raised by mso_file_extract if parsing MSO/ActiveMIME data failed """
-
- def __init__(self, msg):
- MsoExtractionError.__init__(self, msg)
- OlevbaBaseException.__init__(self, msg)
-
-
-class SubstreamOpenError(FileOpenError):
- """ special kind of FileOpenError: file is a substream of original file """
-
- def __init__(self, filename, subfilename, orig_exc=None):
- super(SubstreamOpenError, self).__init__(
- str(filename) + '/' + str(subfilename), orig_exc)
- self.filename = filename # overwrite setting in OlevbaBaseException
- self.subfilename = subfilename
-
-
-class UnexpectedDataError(OlevbaBaseException):
- """ raised when parsing is strict (=not relaxed) and data is unexpected """
-
- def __init__(self, stream_path, variable, expected, value):
- if isinstance(expected, int):
- es = '{0:04X}'.format(expected)
- elif isinstance(expected, tuple):
- es = ','.join('{0:04X}'.format(e) for e in expected)
- es = '({0})'.format(es)
- else:
- raise ValueError('Unknown type encountered: {0}'.format(type(expected)))
- super(UnexpectedDataError, self).__init__(
- 'Unexpected value in {0} for variable {1}: '
- 'expected {2} but found {3:04X}!'
- .format(stream_path, variable, es, value))
- self.stream_path = stream_path
- self.variable = variable
- self.expected = expected
- self.value = value
-
-#--- CONSTANTS ----------------------------------------------------------------
-
-# return codes
-RETURN_OK = 0
-RETURN_WARNINGS = 1 # (reserved, not used yet)
-RETURN_WRONG_ARGS = 2 # (fixed, built into optparse)
-RETURN_FILE_NOT_FOUND = 3
-RETURN_XGLOB_ERR = 4
-RETURN_OPEN_ERROR = 5
-RETURN_PARSE_ERROR = 6
-RETURN_SEVERAL_ERRS = 7
-RETURN_UNEXPECTED = 8
-RETURN_ENCRYPTED = 9
-
-# MAC codepages (from http://stackoverflow.com/questions/1592925/decoding-mac-os-text-in-python)
-MAC_CODEPAGES = {
- 10000: 'mac-roman',
- 10001: 'shiftjis', # not found: 'mac-shift-jis',
- 10003: 'ascii', # nothing appropriate found: 'mac-hangul',
- 10008: 'gb2321', # not found: 'mac-gb2312',
- 10002: 'big5', # not found: 'mac-big5',
- 10005: 'hebrew', # not found: 'mac-hebrew',
- 10004: 'mac-arabic',
- 10006: 'mac-greek',
- 10081: 'mac-turkish',
- 10021: 'thai', # not found: mac-thai',
- 10029: 'maccentraleurope', # not found: 'mac-east europe',
- 10007: 'ascii', # nothing appropriate found: 'mac-russian',
-}
-
-# URL and message to report issues:
-URL_OLEVBA_ISSUES = 'https://github.com/decalage2/oletools/issues'
-MSG_OLEVBA_ISSUES = 'Please report this issue on %s' % URL_OLEVBA_ISSUES
-
-# Container types:
-TYPE_OLE = 'OLE'
-TYPE_OpenXML = 'OpenXML'
-TYPE_FlatOPC_XML = 'FlatOPC_XML'
-TYPE_Word2003_XML = 'Word2003_XML'
-TYPE_MHTML = 'MHTML'
-TYPE_TEXT = 'Text'
-TYPE_PPT = 'PPT'
-
-# short tag to display file types in triage mode:
-TYPE2TAG = {
- TYPE_OLE: 'OLE:',
- TYPE_OpenXML: 'OpX:',
- TYPE_FlatOPC_XML: 'FlX:',
- TYPE_Word2003_XML: 'XML:',
- TYPE_MHTML: 'MHT:',
- TYPE_TEXT: 'TXT:',
- TYPE_PPT: 'PPT',
-}
-
-
-# MSO files ActiveMime header magic
-MSO_ACTIVEMIME_HEADER = b'ActiveMime'
-
-MODULE_EXTENSION = "bas"
-CLASS_EXTENSION = "cls"
-FORM_EXTENSION = "frm"
-
-# Namespaces and tags for Word2003 XML parsing:
-NS_W = '{http://schemas.microsoft.com/office/word/2003/wordml}'
-# the tag contains the VBA macro code:
-TAG_BINDATA = NS_W + 'binData'
-ATTR_NAME = NS_W + 'name'
-
-# Namespaces and tags for Word/PowerPoint 2007+ XML parsing:
-# root:
-NS_XMLPACKAGE = '{http://schemas.microsoft.com/office/2006/xmlPackage}'
-TAG_PACKAGE = NS_XMLPACKAGE + 'package'
-# the tag includes that contains the VBA macro code in Base64:
-#
-TAG_PKGPART = NS_XMLPACKAGE + 'part'
-ATTR_PKG_NAME = NS_XMLPACKAGE + 'name'
-ATTR_PKG_CONTENTTYPE = NS_XMLPACKAGE + 'contentType'
-CTYPE_VBAPROJECT = "application/vnd.ms-office.vbaProject"
-TAG_PKGBINDATA = NS_XMLPACKAGE + 'binaryData'
-
-# Keywords to detect auto-executable macros
-AUTOEXEC_KEYWORDS = {
- # MS Word:
- 'Runs when the Word document is opened':
- ('AutoExec', 'AutoOpen', 'DocumentOpen'),
- 'Runs when the Word document is closed':
- ('AutoExit', 'AutoClose', 'Document_Close', 'DocumentBeforeClose'),
- 'Runs when the Word document is modified':
- ('DocumentChange',),
- 'Runs when a new Word document is created':
- ('AutoNew', 'Document_New', 'NewDocument'),
-
- # MS Word and Publisher:
- 'Runs when the Word or Publisher document is opened':
- ('Document_Open',),
- 'Runs when the Publisher document is closed':
- ('Document_BeforeClose',),
-
- # MS Excel:
- 'Runs when the Excel Workbook is opened':
- ('Auto_Open', 'Workbook_Open', 'Workbook_Activate'),
- 'Runs when the Excel Workbook is closed':
- ('Auto_Close', 'Workbook_Close'),
-
- # any MS Office application:
- 'Runs when the file is opened (using InkPicture ActiveX object)':
- # ref:https://twitter.com/joe4security/status/770691099988025345
- (r'\w+_Painted',),
- 'Runs when the file is opened and ActiveX objects trigger events':
- (r'\w+_(?:GotFocus|LostFocus|MouseHover)',),
-}
-
-# Suspicious Keywords that may be used by malware
-# See VBA language reference: http://msdn.microsoft.com/en-us/library/office/jj692818%28v=office.15%29.aspx
-SUSPICIOUS_KEYWORDS = {
- #TODO: use regex to support variable whitespaces
- 'May read system environment variables':
- ('Environ',),
- 'May open a file':
- ('Open',),
- 'May write to a file (if combined with Open)':
- #TODO: regex to find Open+Write on same line
- ('Write', 'Put', 'Output', 'Print #'),
- 'May read or write a binary file (if combined with Open)':
- #TODO: regex to find Open+Binary on same line
- ('Binary',),
- 'May copy a file':
- ('FileCopy', 'CopyFile'),
- #FileCopy: http://msdn.microsoft.com/en-us/library/office/gg264390%28v=office.15%29.aspx
- #CopyFile: http://msdn.microsoft.com/en-us/library/office/gg264089%28v=office.15%29.aspx
- 'May delete a file':
- ('Kill',),
- 'May create a text file':
- ('CreateTextFile', 'ADODB.Stream', 'WriteText', 'SaveToFile'),
- #CreateTextFile: http://msdn.microsoft.com/en-us/library/office/gg264617%28v=office.15%29.aspx
- #ADODB.Stream sample: http://pastebin.com/Z4TMyuq6
- 'May run an executable file or a system command':
- ('Shell', 'vbNormal', 'vbNormalFocus', 'vbHide', 'vbMinimizedFocus', 'vbMaximizedFocus', 'vbNormalNoFocus',
- 'vbMinimizedNoFocus', 'WScript.Shell', 'Run', 'ShellExecute'),
- # MacScript: see https://msdn.microsoft.com/en-us/library/office/gg264812.aspx
- 'May run an executable file or a system command on a Mac':
- ('MacScript',),
- 'May run an executable file or a system command on a Mac (if combined with libc.dylib)':
- ('system', 'popen', r'exec[lv][ep]?'),
- #Shell: http://msdn.microsoft.com/en-us/library/office/gg278437%28v=office.15%29.aspx
- #WScript.Shell+Run sample: http://pastebin.com/Z4TMyuq6
- 'May run PowerShell commands':
- #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
- #also: https://bitbucket.org/decalage/oletools/issues/14/olevba-library-update-ioc
- # ref: https://blog.netspi.com/15-ways-to-bypass-the-powershell-execution-policy/
- # TODO: add support for keywords starting with a non-alpha character, such as "-noexit"
- # TODO: '-command', '-EncodedCommand', '-scriptblock'
- ('PowerShell', 'noexit', 'ExecutionPolicy', 'noprofile', 'command', 'EncodedCommand',
- 'invoke-command', 'scriptblock', 'Invoke-Expression', 'AuthorizationManager'),
- 'May run an executable file or a system command using PowerShell':
- ('Start-Process',),
- 'May hide the application':
- ('Application.Visible', 'ShowWindow', 'SW_HIDE'),
- 'May create a directory':
- ('MkDir',),
- 'May save the current workbook':
- ('ActiveWorkbook.SaveAs',),
- 'May change which directory contains files to open at startup':
- #TODO: confirm the actual effect
- ('Application.AltStartupPath',),
- 'May create an OLE object':
- ('CreateObject',),
- 'May create an OLE object using PowerShell':
- ('New-Object',),
- 'May run an application (if combined with CreateObject)':
- ('Shell.Application',),
- 'May enumerate application windows (if combined with Shell.Application object)':
- ('Windows', 'FindWindow'),
- 'May run code from a DLL':
- #TODO: regex to find declare+lib on same line - see mraptor
- ('Lib',),
- 'May run code from a library on a Mac':
- #TODO: regex to find declare+lib on same line - see mraptor
- ('libc.dylib', 'dylib'),
- 'May inject code into another process':
- ('CreateThread', 'VirtualAlloc', # (issue #9) suggested by Davy Douhine - used by MSF payload
- 'VirtualAllocEx', 'RtlMoveMemory',
- ),
- 'May run a shellcode in memory':
- ('EnumSystemLanguageGroupsW?', # Used by Hancitor in Oct 2016
- 'EnumDateFormats(?:W|(?:Ex){1,2})?'), # see https://msdn.microsoft.com/en-us/library/windows/desktop/dd317810(v=vs.85).aspx
- 'May download files from the Internet':
- #TODO: regex to find urlmon+URLDownloadToFileA on same line
- ('URLDownloadToFileA', 'Msxml2.XMLHTTP', 'Microsoft.XMLHTTP',
- 'MSXML2.ServerXMLHTTP', # suggested in issue #13
- 'User-Agent', # sample from @ozhermit: http://pastebin.com/MPc3iV6z
- ),
- 'May download files from the Internet using PowerShell':
- #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
- ('Net.WebClient', 'DownloadFile', 'DownloadString'),
- 'May control another application by simulating user keystrokes':
- ('SendKeys', 'AppActivate'),
- #SendKeys: http://msdn.microsoft.com/en-us/library/office/gg278655%28v=office.15%29.aspx
- 'May attempt to obfuscate malicious function calls':
- ('CallByName',),
- #CallByName: http://msdn.microsoft.com/en-us/library/office/gg278760%28v=office.15%29.aspx
- 'May attempt to obfuscate specific strings (use option --deobf to deobfuscate)':
- #TODO: regex to find several Chr*, not just one
- ('Chr', 'ChrB', 'ChrW', 'StrReverse', 'Xor'),
- #Chr: http://msdn.microsoft.com/en-us/library/office/gg264465%28v=office.15%29.aspx
- 'May read or write registry keys':
- #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
- ('RegOpenKeyExA', 'RegOpenKeyEx', 'RegCloseKey'),
- 'May read registry keys':
- #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
- ('RegQueryValueExA', 'RegQueryValueEx',
- 'RegRead', #with Wscript.Shell
- ),
- 'May detect virtualization':
- # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
- (r'SYSTEM\ControlSet001\Services\Disk\Enum', 'VIRTUAL', 'VMWARE', 'VBOX'),
- 'May detect Anubis Sandbox':
- # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
- # NOTES: this sample also checks App.EXEName but that seems to be a bug, it works in VB6 but not in VBA
- # ref: http://www.syssec-project.eu/m/page-media/3/disarm-raid11.pdf
- ('GetVolumeInformationA', 'GetVolumeInformation', # with kernel32.dll
- '1824245000', r'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\ProductId',
- '76487-337-8429955-22614', 'andy', 'sample', r'C:\exec\exec.exe', 'popupkiller'
- ),
- 'May detect Sandboxie':
- # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
- # ref: http://www.cplusplus.com/forum/windows/96874/
- ('SbieDll.dll', 'SandboxieControlWndClass'),
- 'May detect Sunbelt Sandbox':
- # ref: http://www.cplusplus.com/forum/windows/96874/
- (r'C:\file.exe',),
- 'May detect Norman Sandbox':
- # ref: http://www.cplusplus.com/forum/windows/96874/
- ('currentuser',),
- 'May detect CW Sandbox':
- # ref: http://www.cplusplus.com/forum/windows/96874/
- ('Schmidti',),
- 'May detect WinJail Sandbox':
- # ref: http://www.cplusplus.com/forum/windows/96874/
- ('Afx:400000:0',),
-}
-
-# Regular Expression for a URL:
-# http://en.wikipedia.org/wiki/Uniform_resource_locator
-# http://www.w3.org/Addressing/URL/uri-spec.html
-#TODO: also support username:password@server
-#TODO: other protocols (file, gopher, wais, ...?)
-SCHEME = r'\b(?:http|ftp)s?'
-# see http://en.wikipedia.org/wiki/List_of_Internet_top-level_domains
-TLD = r'(?:xn--[a-zA-Z0-9]{4,20}|[a-zA-Z]{2,20})'
-DNS_NAME = r'(?:[a-zA-Z0-9\-\.]+\.' + TLD + ')'
-#TODO: IPv6 - see https://www.debuggex.com/
-# A literal numeric IPv6 address may be given, but must be enclosed in [ ] e.g. [db8:0cec::99:123a]
-NUMBER_0_255 = r'(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'
-IPv4 = r'(?:' + NUMBER_0_255 + r'\.){3}' + NUMBER_0_255
-# IPv4 must come before the DNS name because it is more specific
-SERVER = r'(?:' + IPv4 + '|' + DNS_NAME + ')'
-PORT = r'(?:\:[0-9]{1,5})?'
-SERVER_PORT = SERVER + PORT
-URL_PATH = r'(?:/[a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~]*)?' # [^\.\,\)\(\s"]
-URL_RE = SCHEME + r'\://' + SERVER_PORT + URL_PATH
-re_url = re.compile(URL_RE)
-
-
-# Patterns to be extracted (IP addresses, URLs, etc)
-# From patterns.py in balbuzard
-RE_PATTERNS = (
- ('URL', re.compile(URL_RE)),
- ('IPv4 address', re.compile(IPv4)),
- # TODO: add IPv6
- ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@' + SERVER + '\b')),
- # ('Domain name', re.compile(r'(?=^.{1,254}$)(^(?:(?!\d+\.|-)[a-zA-Z0-9_\-]{1,63}(? char
-vba_chr = Suppress(
- Combine(WordStart(vba_identifier_chars) + CaselessLiteral('Chr')
- + Optional(CaselessLiteral('B') | CaselessLiteral('W')) + Optional('$'))
- + '(') + vba_expr_int + Suppress(')')
-
-def vba_chr_tostr(t):
- try:
- i = t[0]
- # normal, non-unicode character:
- if i>=0 and i<=255:
- return VbaExpressionString(chr(i))
- else:
- return VbaExpressionString(chr(i).encode('utf-8', 'backslashreplace'))
- except ValueError:
- log.exception('ERROR: incorrect parameter value for chr(): %r' % i)
- return VbaExpressionString('Chr(%r)' % i)
-
-vba_chr.setParseAction(vba_chr_tostr)
-
-
-# --- ASC --------------------------------------------------------------------
-
-# Asc(char) => int
-#TODO: see MS-VBAL 6.1.2.11.1.1 page 240 => AscB, AscW
-vba_asc = Suppress(CaselessKeyword('Asc') + '(') + vba_expr_str + Suppress(')')
-vba_asc.setParseAction(lambda t: ord(t[0]))
-
-
-# --- VAL --------------------------------------------------------------------
-
-# Val(string) => int
-# TODO: make sure the behavior of VBA's val is fully covered
-vba_val = Suppress(CaselessKeyword('Val') + '(') + vba_expr_str + Suppress(')')
-vba_val.setParseAction(lambda t: int(t[0].strip()))
-
-
-# --- StrReverse() --------------------------------------------------------------------
-
-# StrReverse(string) => string
-strReverse = Suppress(CaselessKeyword('StrReverse') + '(') + vba_expr_str + Suppress(')')
-strReverse.setParseAction(lambda t: VbaExpressionString(str(t[0])[::-1]))
-
-
-# --- ENVIRON() --------------------------------------------------------------------
-
-# Environ("name") => just translated to "%name%", that is enough for malware analysis
-environ = Suppress(CaselessKeyword('Environ') + '(') + vba_expr_str + Suppress(')')
-environ.setParseAction(lambda t: VbaExpressionString('%%%s%%' % t[0]))
-
-
-# --- IDENTIFIER -------------------------------------------------------------
-
-#TODO: see MS-VBAL 3.3.5 page 33
-# 3.3.5 Identifier Tokens
-# Latin-identifier = first-Latin-identifier-character *subsequent-Latin-identifier-character
-# first-Latin-identifier-character = (%x0041-005A / %x0061-007A) ; A-Z / a-z
-# subsequent-Latin-identifier-character = first-Latin-identifier-character / DIGIT / %x5F ; underscore
-latin_identifier = Word(initChars=alphas, bodyChars=alphanums + '_')
-
-# --- HEX FUNCTION -----------------------------------------------------------
-
-# match any custom function name with a hex string as argument:
-# TODO: accept vba_expr_str_item as argument, check if it is a hex or base64 string at runtime
-
-# quoted string of at least two hexadecimal numbers of two digits:
-quoted_hex_string = Suppress('"') + Combine(Word(hexnums, exact=2) * (2, None)) + Suppress('"')
-quoted_hex_string.setParseAction(lambda t: str(t[0]))
-
-hex_function_call = Suppress(latin_identifier) + Suppress('(') + \
- quoted_hex_string('hex_string') + Suppress(')')
-hex_function_call.setParseAction(lambda t: VbaExpressionString(binascii.a2b_hex(t.hex_string)))
-
-
-# --- BASE64 FUNCTION -----------------------------------------------------------
-
-# match any custom function name with a Base64 string as argument:
-# TODO: accept vba_expr_str_item as argument, check if it is a hex or base64 string at runtime
-
-# quoted string of at least two hexadecimal numbers of two digits:
-quoted_base64_string = Suppress('"') + Regex(BASE64_RE) + Suppress('"')
-quoted_base64_string.setParseAction(lambda t: str(t[0]))
-
-base64_function_call = Suppress(latin_identifier) + Suppress('(') + \
- quoted_base64_string('base64_string') + Suppress(')')
-base64_function_call.setParseAction(lambda t: VbaExpressionString(binascii.a2b_base64(t.base64_string)))
-
-
-# ---STRING EXPRESSION -------------------------------------------------------
-
-def concat_strings_list(tokens):
- """
- parse action to concatenate strings in a VBA expression with operators '+' or '&'
- """
- # extract argument from the tokens:
- # expected to be a tuple containing a list of strings such as [a,'&',b,'&',c,...]
- strings = tokens[0][::2]
- return VbaExpressionString(''.join(strings))
-
-
-vba_expr_str_item = (vba_chr | strReverse | environ | quoted_string | hex_function_call | base64_function_call)
-
-vba_expr_str <<= infixNotation(vba_expr_str_item,
- [
- ("+", 2, opAssoc.LEFT, concat_strings_list),
- ("&", 2, opAssoc.LEFT, concat_strings_list),
- ])
-
-
-# --- INTEGER EXPRESSION -------------------------------------------------------
-
-def sum_ints_list(tokens):
- """
- parse action to sum integers in a VBA expression with operator '+'
- """
- # extract argument from the tokens:
- # expected to be a tuple containing a list of integers such as [a,'&',b,'&',c,...]
- integers = tokens[0][::2]
- return sum(integers)
-
-
-def subtract_ints_list(tokens):
- """
- parse action to subtract integers in a VBA expression with operator '-'
- """
- # extract argument from the tokens:
- # expected to be a tuple containing a list of integers such as [a,'&',b,'&',c,...]
- integers = tokens[0][::2]
- return reduce(lambda x,y:x-y, integers)
-
-
-def multiply_ints_list(tokens):
- """
- parse action to multiply integers in a VBA expression with operator '*'
- """
- # extract argument from the tokens:
- # expected to be a tuple containing a list of integers such as [a,'&',b,'&',c,...]
- integers = tokens[0][::2]
- return reduce(lambda x,y:x*y, integers)
-
-
-def divide_ints_list(tokens):
- """
- parse action to divide integers in a VBA expression with operator '/'
- """
- # extract argument from the tokens:
- # expected to be a tuple containing a list of integers such as [a,'&',b,'&',c,...]
- integers = tokens[0][::2]
- return reduce(lambda x,y:x/y, integers)
-
-
-vba_expr_int_item = (vba_asc | vba_val | integer)
-
-# operators associativity:
-# https://en.wikipedia.org/wiki/Operator_associativity
-
-vba_expr_int <<= infixNotation(vba_expr_int_item,
- [
- ("*", 2, opAssoc.LEFT, multiply_ints_list),
- ("/", 2, opAssoc.LEFT, divide_ints_list),
- ("-", 2, opAssoc.LEFT, subtract_ints_list),
- ("+", 2, opAssoc.LEFT, sum_ints_list),
- ])
-
-
-# see detect_vba_strings for the deobfuscation code using this grammar
-
-# === MSO/ActiveMime files parsing ===========================================
-
-def is_mso_file(data):
- """
- Check if the provided data is the content of a MSO/ActiveMime file, such as
- the ones created by Outlook in some cases, or Word/Excel when saving a
- file with the MHTML format or the Word 2003 XML format.
- This function only checks the ActiveMime magic at the beginning of data.
- :param data: bytes string, MSO/ActiveMime file content
- :return: bool, True if the file is MSO, False otherwise
- """
- return data.startswith(MSO_ACTIVEMIME_HEADER)
-
-
-# regex to find zlib block headers, starting with byte 0x78 = 'x'
-re_zlib_header = re.compile(r'x')
-
-
-def mso_file_extract(data):
- """
- Extract the data stored into a MSO/ActiveMime file, such as
- the ones created by Outlook in some cases, or Word/Excel when saving a
- file with the MHTML format or the Word 2003 XML format.
-
- :param data: bytes string, MSO/ActiveMime file content
- :return: bytes string, extracted data (uncompressed)
-
- raise a MsoExtractionError if the data cannot be extracted
- """
- # check the magic:
- assert is_mso_file(data)
-
- # In all the samples seen so far, Word always uses an offset of 0x32,
- # and Excel 0x22A. But we read the offset from the header to be more
- # generic.
- offsets = [0x32, 0x22A]
-
- # First, attempt to get the compressed data offset from the header
- # According to my tests, it should be an unsigned 16 bits integer,
- # at offset 0x1E (little endian) + add 46:
- try:
- offset = struct.unpack_from('> bit_count
- offset_mask = ~length_mask
- maximum_length = (0xFFFF >> bit_count) + 3
- return length_mask, offset_mask, bit_count, maximum_length
-
-
-def decompress_stream(compressed_container):
- """
- Decompress a stream according to MS-OVBA section 2.4.1
-
- compressed_container: string compressed according to the MS-OVBA 2.4.1.3.6 Compression algorithm
- return the decompressed container as a string (bytes)
- """
- # 2.4.1.2 State Variables
-
- # The following state is maintained for the CompressedContainer (section 2.4.1.1.1):
- # CompressedRecordEnd: The location of the byte after the last byte in the CompressedContainer (section 2.4.1.1.1).
- # CompressedCurrent: The location of the next byte in the CompressedContainer (section 2.4.1.1.1) to be read by
- # decompression or to be written by compression.
-
- # The following state is maintained for the current CompressedChunk (section 2.4.1.1.4):
- # CompressedChunkStart: The location of the first byte of the CompressedChunk (section 2.4.1.1.4) within the
- # CompressedContainer (section 2.4.1.1.1).
-
- # The following state is maintained for a DecompressedBuffer (section 2.4.1.1.2):
- # DecompressedCurrent: The location of the next byte in the DecompressedBuffer (section 2.4.1.1.2) to be written by
- # decompression or to be read by compression.
- # DecompressedBufferEnd: The location of the byte after the last byte in the DecompressedBuffer (section 2.4.1.1.2).
-
- # The following state is maintained for the current DecompressedChunk (section 2.4.1.1.3):
- # DecompressedChunkStart: The location of the first byte of the DecompressedChunk (section 2.4.1.1.3) within the
- # DecompressedBuffer (section 2.4.1.1.2).
-
- decompressed_container = bytearray() # result
- compressed_current = 0
-
- sig_byte = compressed_container[compressed_current]
- if sig_byte != 0x01:
- raise ValueError('invalid signature byte {0:02X}'.format(sig_byte))
-
- compressed_current += 1
-
- #NOTE: the definition of CompressedRecordEnd is ambiguous. Here we assume that
- # CompressedRecordEnd = len(compressed_container)
- while compressed_current < len(compressed_container):
- # 2.4.1.1.5
- compressed_chunk_start = compressed_current
- # chunk header = first 16 bits
- compressed_chunk_header = \
- struct.unpack("> 12) & 0x07
- if chunk_signature != 0b011:
- raise ValueError('Invalid CompressedChunkSignature in VBA compressed stream')
- # chunk flag = next bit - 1 == compressed, 0 == uncompressed
- chunk_flag = (compressed_chunk_header >> 15) & 0x01
- log.debug("chunk size = {0}, compressed flag = {1}".format(chunk_size, chunk_flag))
-
- #MS-OVBA 2.4.1.3.12: the maximum size of a chunk including its header is 4098 bytes (header 2 + data 4096)
- # The minimum size is 3 bytes
- # NOTE: there seems to be a typo in MS-OVBA, the check should be with 4098, not 4095 (which is the max value
- # in chunk header before adding 3.
- # Also the first test is not useful since a 12 bits value cannot be larger than 4095.
- if chunk_flag == 1 and chunk_size > 4098:
- raise ValueError('CompressedChunkSize > 4098 but CompressedChunkFlag == 1')
- if chunk_flag == 0 and chunk_size != 4098:
- raise ValueError('CompressedChunkSize != 4098 but CompressedChunkFlag == 0')
-
- # check if chunk_size goes beyond the compressed data, instead of silently cutting it:
- #TODO: raise an exception?
- if compressed_chunk_start + chunk_size > len(compressed_container):
- log.warning('Chunk size is larger than remaining compressed data')
- compressed_end = min([len(compressed_container), compressed_chunk_start + chunk_size])
- # read after chunk header:
- compressed_current = compressed_chunk_start + 2
-
- if chunk_flag == 0:
- # MS-OVBA 2.4.1.3.3 Decompressing a RawChunk
- # uncompressed chunk: read the next 4096 bytes as-is
- #TODO: check if there are at least 4096 bytes left
- decompressed_container.extend([compressed_container[compressed_current:compressed_current + 4096]])
- compressed_current += 4096
- else:
- # MS-OVBA 2.4.1.3.2 Decompressing a CompressedChunk
- # compressed chunk
- decompressed_chunk_start = len(decompressed_container)
- while compressed_current < compressed_end:
- # MS-OVBA 2.4.1.3.4 Decompressing a TokenSequence
- # log.debug('compressed_current = %d / compressed_end = %d' % (compressed_current, compressed_end))
- # FlagByte: 8 bits indicating if the following 8 tokens are either literal (1 byte of plain text) or
- # copy tokens (reference to a previous literal token)
- flag_byte = compressed_container[compressed_current]
- compressed_current += 1
- for bit_index in range(0, 8):
- # log.debug('bit_index=%d / compressed_current=%d / compressed_end=%d' % (bit_index, compressed_current, compressed_end))
- if compressed_current >= compressed_end:
- break
- # MS-OVBA 2.4.1.3.5 Decompressing a Token
- # MS-OVBA 2.4.1.3.17 Extract FlagBit
- flag_bit = (flag_byte >> bit_index) & 1
- #log.debug('bit_index=%d: flag_bit=%d' % (bit_index, flag_bit))
- if flag_bit == 0: # LiteralToken
- # copy one byte directly to output
- decompressed_container.extend([compressed_container[compressed_current]])
- compressed_current += 1
- else: # CopyToken
- # MS-OVBA 2.4.1.3.19.2 Unpack CopyToken
- copy_token = \
- struct.unpack("> temp2) + 1
- #log.debug('offset=%d length=%d' % (offset, length))
- copy_source = len(decompressed_container) - offset
- for index in range(copy_source, copy_source + length):
- decompressed_container.extend([decompressed_container[index]])
- compressed_current += 2
- return bytes(decompressed_container)
-
-
-def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
- """
- Extract VBA macros from an OleFileIO object.
- Internal function, do not call directly.
-
- vba_root: path to the VBA root storage, containing the VBA storage and the PROJECT stream
- vba_project: path to the PROJECT stream
- :param relaxed: If True, only create info/debug log entry if data is not as expected
- (e.g. opening substream fails); if False, raise an error in this case
- This is a generator, yielding (stream path, VBA filename, VBA source code) for each VBA code stream
- """
- # Open the PROJECT stream:
- project = ole.openstream(project_path)
- log.debug('relaxed is %s' % relaxed)
-
- # sample content of the PROJECT stream:
-
- ## ID="{5312AC8A-349D-4950-BDD0-49BE3C4DD0F0}"
- ## Document=ThisDocument/&H00000000
- ## Module=NewMacros
- ## Name="Project"
- ## HelpContextID="0"
- ## VersionCompatible32="393222000"
- ## CMG="F1F301E705E705E705E705"
- ## DPB="8F8D7FE3831F2020202020"
- ## GC="2D2FDD81E51EE61EE6E1"
- ##
- ## [Host Extender Info]
- ## &H00000001={3832D640-CF90-11CF-8E43-00A0C911005A};VBE;&H00000000
- ## &H00000002={000209F2-0000-0000-C000-000000000046};Word8.0;&H00000000
- ##
- ## [Workspace]
- ## ThisDocument=22, 29, 339, 477, Z
- ## NewMacros=-4, 42, 832, 510, C
-
- code_modules = {}
-
- for line in project:
- line = line.strip().decode('utf-8','ignore')
- if '=' in line:
- # split line at the 1st equal sign:
- name, value = line.split('=', 1)
- # looking for code modules
- # add the code module as a key in the dictionary
- # the value will be the extension needed later
- # The value is converted to lowercase, to allow case-insensitive matching (issue #3)
- value = value.lower()
- if name == 'Document':
- # split value at the 1st slash, keep 1st part:
- value = value.split('/', 1)[0]
- code_modules[value] = CLASS_EXTENSION
- elif name == 'Module':
- code_modules[value] = MODULE_EXTENSION
- elif name == 'Class':
- code_modules[value] = CLASS_EXTENSION
- elif name == 'BaseClass':
- code_modules[value] = FORM_EXTENSION
-
- # read data from dir stream (compressed)
- dir_compressed = ole.openstream(dir_path).read()
-
- def check_value(name, expected, value):
- if expected != value:
- if relaxed:
- log.error("invalid value for {0} expected {1:04X} got {2:04X}"
- .format(name, expected, value))
- else:
- raise UnexpectedDataError(dir_path, name, expected, value)
-
- dir_stream = BytesIO(decompress_stream(dir_compressed))
-
- # PROJECTSYSKIND Record
- projectsyskind_id = struct.unpack(" 128:
- log.error("PROJECTNAME_SizeOfProjectName value not in range: {0}".format(projectname_sizeof_projectname))
- projectname_projectname = dir_stream.read(projectname_sizeof_projectname)
- unused = projectname_projectname
-
- # PROJECTDOCSTRING Record
- projectdocstring_id = struct.unpack(" 2000:
- log.error(
- "PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(projectdocstring_sizeof_docstring))
- projectdocstring_docstring = dir_stream.read(projectdocstring_sizeof_docstring)
- projectdocstring_reserved = struct.unpack(" 260:
- log.error(
- "PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(projecthelpfilepath_sizeof_helpfile1))
- projecthelpfilepath_helpfile1 = dir_stream.read(projecthelpfilepath_sizeof_helpfile1)
- projecthelpfilepath_reserved = struct.unpack(" 1015:
- log.error(
- "PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(projectconstants_sizeof_constants))
- projectconstants_constants = dir_stream.read(projectconstants_sizeof_constants)
- projectconstants_reserved = struct.unpack(" 0:
- code_data = decompress_stream(code_data)
- # case-insensitive search in the code_modules dict to find the file extension:
- filext = code_modules.get(modulename_modulename.lower(), 'bin')
- filename = '{0}.{1}'.format(modulename_modulename, filext)
- #TODO: also yield the codepage so that callers can decode it properly
- yield (code_path, filename, code_data)
- # print '-'*79
- # print filename
- # print ''
- # print code_data
- # print ''
- log.debug('extracted file {0}'.format(filename))
- else:
- log.warning("module stream {0} has code data length 0".format(modulestreamname_streamname))
- except (UnexpectedDataError, SubstreamOpenError):
- raise
- except Exception as exc:
- log.info('Error parsing module {0} of {1} in _extract_vba:'
- .format(projectmodule_index, projectmodules_count),
- exc_info=True)
- if not relaxed:
- raise
- _ = unused # make pylint happy: now variable "unused" is being used ;-)
- return
-
-
-def vba_collapse_long_lines(vba_code):
- """
- Parse a VBA module code to detect continuation line characters (underscore) and
- collapse split lines. Continuation line characters are replaced by spaces.
-
- :param vba_code: str, VBA module code
- :return: str, VBA module code with long lines collapsed
- """
- # TODO: use a regex instead, to allow whitespaces after the underscore?
- vba_code = vba_code.replace(' _\r\n', ' ')
- vba_code = vba_code.replace(' _\r', ' ')
- vba_code = vba_code.replace(' _\n', ' ')
- return vba_code
-
-
-def filter_vba(vba_code):
- """
- Filter VBA source code to remove the first lines starting with "Attribute VB_",
- which are automatically added by MS Office and not displayed in the VBA Editor.
- This should only be used when displaying source code for human analysis.
-
- Note: lines are not filtered if they contain a colon, because it could be
- used to hide malicious instructions.
-
- :param vba_code: str, VBA source code
- :return: str, filtered VBA source code
- """
- vba_lines = vba_code.splitlines()
- start = 0
- for line in vba_lines:
- if line.startswith("Attribute VB_") and not ':' in line:
- start += 1
- else:
- break
- #TODO: also remove empty lines?
- vba = '\n'.join(vba_lines[start:])
- return vba
-
-
-def detect_autoexec(vba_code, obfuscation=None):
- """
- Detect if the VBA code contains keywords corresponding to macros running
- automatically when triggered by specific actions (e.g. when a document is
- opened or closed).
-
- :param vba_code: str, VBA source code
- :param obfuscation: None or str, name of obfuscation to be added to description
- :return: list of str tuples (keyword, description)
- """
- #TODO: merge code with detect_suspicious
- # case-insensitive search
- #vba_code = vba_code.lower()
- results = []
- obf_text = ''
- if obfuscation:
- obf_text = ' (obfuscation: %s)' % obfuscation
- for description, keywords in AUTOEXEC_KEYWORDS.items():
- for keyword in keywords:
- #TODO: if keyword is already a compiled regex, use it as-is
- # search using regex to detect word boundaries:
- match = re.search(r'(?i)\b' + keyword + r'\b', vba_code)
- if match:
- #if keyword.lower() in vba_code:
- found_keyword = match.group()
- results.append((found_keyword, description + obf_text))
- return results
-
-
-def detect_suspicious(vba_code, obfuscation=None):
- """
- Detect if the VBA code contains suspicious keywords corresponding to
- potential malware behaviour.
-
- :param vba_code: str, VBA source code
- :param obfuscation: None or str, name of obfuscation to be added to description
- :return: list of str tuples (keyword, description)
- """
- # case-insensitive search
- #vba_code = vba_code.lower()
- results = []
- obf_text = ''
- if obfuscation:
- obf_text = ' (obfuscation: %s)' % obfuscation
- for description, keywords in SUSPICIOUS_KEYWORDS.items():
- for keyword in keywords:
- # search using regex to detect word boundaries:
- match = re.search(r'(?i)\b' + re.escape(keyword) + r'\b', vba_code)
- if match:
- #if keyword.lower() in vba_code:
- found_keyword = match.group()
- results.append((found_keyword, description + obf_text))
- return results
-
-
-def detect_patterns(vba_code, obfuscation=None):
- """
- Detect if the VBA code contains specific patterns such as IP addresses,
- URLs, e-mail addresses, executable file names, etc.
-
- :param vba_code: str, VBA source code
- :return: list of str tuples (pattern type, value)
- """
- results = []
- found = set()
- obf_text = ''
- if obfuscation:
- obf_text = ' (obfuscation: %s)' % obfuscation
- for pattern_type, pattern_re in RE_PATTERNS:
- for match in pattern_re.finditer(vba_code):
- value = match.group()
- if value not in found:
- results.append((pattern_type + obf_text, value))
- found.add(value)
- return results
-
-
-def detect_hex_strings(vba_code):
- """
- Detect if the VBA code contains strings encoded in hexadecimal.
-
- :param vba_code: str, VBA source code
- :return: list of str tuples (encoded string, decoded string)
- """
- results = []
- found = set()
- for match in re_hex_string.finditer(vba_code):
- value = match.group()
- if value not in found:
- decoded = binascii.unhexlify(value)
- results.append((value, decoded.decode('utf-8', 'backslashreplace')))
- found.add(value)
- return results
-
-
-def detect_base64_strings(vba_code):
- """
- Detect if the VBA code contains strings encoded in base64.
-
- :param vba_code: str, VBA source code
- :return: list of str tuples (encoded string, decoded string)
- """
- #TODO: avoid matching simple hex strings as base64?
- results = []
- found = set()
- for match in re_base64_string.finditer(vba_code):
- # extract the base64 string without quotes:
- value = match.group().strip('"')
- # check it is not just a hex string:
- if not re_nothex_check.search(value):
- continue
- # only keep new values and not in the whitelist:
- if value not in found and value.lower() not in BASE64_WHITELIST:
- try:
- decoded = base64.b64decode(value)
- results.append((value, decoded.decode('utf-8','replace')))
- found.add(value)
- except (TypeError, ValueError) as exc:
- log.debug('Failed to base64-decode (%s)' % exc)
- # if an exception occurs, it is likely not a base64-encoded string
- return results
-
-
-def detect_dridex_strings(vba_code):
- """
- Detect if the VBA code contains strings obfuscated with a specific algorithm found in Dridex samples.
-
- :param vba_code: str, VBA source code
- :return: list of str tuples (encoded string, decoded string)
- """
- # TODO: move this at the beginning of script
- from oletools.thirdparty.DridexUrlDecoder.DridexUrlDecoder import DridexUrlDecode
-
- results = []
- found = set()
- for match in re_dridex_string.finditer(vba_code):
- value = match.group()[1:-1]
- # check it is not just a hex string:
- if not re_nothex_check.search(value):
- continue
- if value not in found:
- try:
- decoded = DridexUrlDecode(value)
- results.append((value, decoded))
- found.add(value)
- except Exception as exc:
- log.debug('Failed to Dridex-decode (%s)' % exc)
- # if an exception occurs, it is likely not a dridex-encoded string
- return results
-
-
-def detect_vba_strings(vba_code):
- """
- Detect if the VBA code contains strings obfuscated with VBA expressions
- using keywords such as Chr, Asc, Val, StrReverse, etc.
-
- :param vba_code: str, VBA source code
- :return: list of str tuples (encoded string, decoded string)
- """
- # TODO: handle exceptions
- results = []
- found = set()
- # IMPORTANT: to extract the actual VBA expressions found in the code,
- # we must expand tabs to have the same string as pyparsing.
- # Otherwise, start and end offsets are incorrect.
- vba_code = vba_code.expandtabs()
- # Split the VBA code line by line to avoid MemoryError on large scripts:
- for vba_line in vba_code.splitlines():
- for tokens, start, end in vba_expr_str.scanString(vba_line):
- encoded = vba_line[start:end]
- decoded = tokens[0]
- if isinstance(decoded, VbaExpressionString):
- # This is a VBA expression, not a simple string
- # print 'VBA EXPRESSION: encoded=%r => decoded=%r' % (encoded, decoded)
- # remove parentheses and quotes from original string:
- # if encoded.startswith('(') and encoded.endswith(')'):
- # encoded = encoded[1:-1]
- # if encoded.startswith('"') and encoded.endswith('"'):
- # encoded = encoded[1:-1]
- # avoid duplicates and simple strings:
- if encoded not in found and decoded != encoded:
- results.append((encoded, decoded))
- found.add(encoded)
- # else:
- # print 'VBA STRING: encoded=%r => decoded=%r' % (encoded, decoded)
- return results
-
-
-def json2ascii(json_obj, encoding='utf8', errors='replace'):
- """ ensure there is no unicode in json and all strings are safe to decode
-
- works recursively, decodes and re-encodes every string to/from unicode
- to ensure there will be no trouble in loading the dumped json output
- """
- if json_obj is None:
- pass
- elif isinstance(json_obj, (bool, int, float)):
- pass
- elif isinstance(json_obj, str):
- # de-code and re-encode
- dencoded = json_obj
- if dencoded != json_obj:
- log.debug('json2ascii: replaced: {0} (len {1})'
- .format(json_obj, len(json_obj)))
- log.debug('json2ascii: with: {0} (len {1})'
- .format(dencoded, len(dencoded)))
- return dencoded
- elif isinstance(json_obj, bytes):
- log.debug('json2ascii: encode unicode: {0}'
- .format(json_obj.decode(encoding, errors)))
- # cannot put original into logger
- # print 'original: ' json_obj
- return json_obj.decode(encoding, errors)
- elif isinstance(json_obj, dict):
- for key in json_obj:
- json_obj[key] = json2ascii(json_obj[key])
- elif isinstance(json_obj, (list,tuple)):
- for item in json_obj:
- item = json2ascii(item)
- else:
- log.debug('unexpected type in json2ascii: {0} -- leave as is'
- .format(type(json_obj)))
- return json_obj
-
-
-def print_json(json_dict=None, _json_is_first=False, _json_is_last=False,
- **json_parts):
- """ line-wise print of json.dumps(json2ascii(..)) with options and indent+1
-
- can use in two ways:
- (1) print_json(some_dict)
- (2) print_json(key1=value1, key2=value2, ...)
-
- :param bool _json_is_first: set to True only for very first entry to complete
- the top-level json-list
- :param bool _json_is_last: set to True only for very last entry to complete
- the top-level json-list
- """
- if json_dict and json_parts:
- raise ValueError('Invalid json argument: want either single dict or '
- 'key=value parts but got both)')
- elif (json_dict is not None) and (not isinstance(json_dict, dict)):
- raise ValueError('Invalid json argument: want either single dict or '
- 'key=value parts but got {0} instead of dict)'
- .format(type(json_dict)))
- if json_parts:
- json_dict = json_parts
-
- if _json_is_first:
- print('[')
-
- lines = json.dumps(json2ascii(json_dict), check_circular=False,
- indent=4, ensure_ascii=False).splitlines()
- for line in lines[:-1]:
- print(' {0}'.format(line))
- if _json_is_last:
- print(' {0}'.format(lines[-1])) # print last line without comma
- print(']')
- else:
- print(' {0},'.format(lines[-1])) # print last line with comma
-
-
-class VBA_Scanner(object):
- """
- Class to scan the source code of a VBA module to find obfuscated strings,
- suspicious keywords, IOCs, auto-executable macros, etc.
- """
-
- def __init__(self, vba_code):
- """
- VBA_Scanner constructor
-
- :param vba_code: str, VBA source code to be analyzed
- """
- if isinstance(vba_code, bytes):
- vba_code = vba_code.decode('utf-8', 'backslashreplace')
- # join long lines ending with " _":
- self.code = vba_collapse_long_lines(vba_code)
- self.code_hex = ''
- self.code_hex_rev = ''
- self.code_rev_hex = ''
- self.code_base64 = ''
- self.code_dridex = ''
- self.code_vba = ''
- self.strReverse = None
- # results = None before scanning, then a list of tuples after scanning
- self.results = None
- self.autoexec_keywords = None
- self.suspicious_keywords = None
- self.iocs = None
- self.hex_strings = None
- self.base64_strings = None
- self.dridex_strings = None
- self.vba_strings = None
-
-
- def scan(self, include_decoded_strings=False, deobfuscate=False):
- """
- Analyze the provided VBA code to detect suspicious keywords,
- auto-executable macros, IOC patterns, obfuscation patterns
- such as hex-encoded strings.
-
- :param include_decoded_strings: bool, if True, all encoded strings will be included with their decoded content.
- :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
- :return: list of tuples (type, keyword, description)
- (type = 'AutoExec', 'Suspicious', 'IOC', 'Hex String', 'Base64 String' or 'Dridex String')
- """
- # First, detect and extract hex-encoded strings:
- self.hex_strings = detect_hex_strings(self.code)
- # detect if the code contains StrReverse:
- self.strReverse = False
- if 'strreverse' in self.code.lower(): self.strReverse = True
- # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords:
- for encoded, decoded in self.hex_strings:
- self.code_hex += '\n' + decoded
- # if the code contains "StrReverse", also append the hex strings in reverse order:
- if self.strReverse:
- # StrReverse after hex decoding:
- self.code_hex_rev += '\n' + decoded[::-1]
- # StrReverse before hex decoding:
- self.code_rev_hex += '\n' + str(binascii.unhexlify(encoded[::-1]))
- #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/
- #TODO: also append the full code reversed if StrReverse? (risk of false positives?)
- # Detect Base64-encoded strings
- self.base64_strings = detect_base64_strings(self.code)
- for encoded, decoded in self.base64_strings:
- self.code_base64 += '\n' + decoded
- # Detect Dridex-encoded strings
- self.dridex_strings = detect_dridex_strings(self.code)
- for encoded, decoded in self.dridex_strings:
- self.code_dridex += '\n' + decoded
- # Detect obfuscated strings in VBA expressions
- if deobfuscate:
- self.vba_strings = detect_vba_strings(self.code)
- else:
- self.vba_strings = []
- for encoded, decoded in self.vba_strings:
- self.code_vba += '\n' + decoded
- results = []
- self.autoexec_keywords = []
- self.suspicious_keywords = []
- self.iocs = []
-
- for code, obfuscation in (
- (self.code, None),
- (self.code_hex, 'Hex'),
- (self.code_hex_rev, 'Hex+StrReverse'),
- (self.code_rev_hex, 'StrReverse+Hex'),
- (self.code_base64, 'Base64'),
- (self.code_dridex, 'Dridex'),
- (self.code_vba, 'VBA expression'),
- ):
- if isinstance(code,bytes):
- code=code.decode('utf-8','backslashreplace')
- self.autoexec_keywords += detect_autoexec(code, obfuscation)
- self.suspicious_keywords += detect_suspicious(code, obfuscation)
- self.iocs += detect_patterns(code, obfuscation)
-
- # If hex-encoded strings were discovered, add an item to suspicious keywords:
- if self.hex_strings:
- self.suspicious_keywords.append(('Hex Strings',
- 'Hex-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)'))
- if self.base64_strings:
- self.suspicious_keywords.append(('Base64 Strings',
- 'Base64-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)'))
- if self.dridex_strings:
- self.suspicious_keywords.append(('Dridex Strings',
- 'Dridex-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)'))
- if self.vba_strings:
- self.suspicious_keywords.append(('VBA obfuscated Strings',
- 'VBA string expressions were detected, may be used to obfuscate strings (option --decode to see all)'))
- # use a set to avoid duplicate keywords
- keyword_set = set()
- for keyword, description in self.autoexec_keywords:
- if keyword not in keyword_set:
- results.append(('AutoExec', keyword, description))
- keyword_set.add(keyword)
- keyword_set = set()
- for keyword, description in self.suspicious_keywords:
- if keyword not in keyword_set:
- results.append(('Suspicious', keyword, description))
- keyword_set.add(keyword)
- keyword_set = set()
- for pattern_type, value in self.iocs:
- if value not in keyword_set:
- results.append(('IOC', value, pattern_type))
- keyword_set.add(value)
-
- # include decoded strings only if they are printable or if --decode option:
- for encoded, decoded in self.hex_strings:
- if include_decoded_strings or is_printable(decoded):
- results.append(('Hex String', decoded, encoded))
- for encoded, decoded in self.base64_strings:
- if include_decoded_strings or is_printable(decoded):
- results.append(('Base64 String', decoded, encoded))
- for encoded, decoded in self.dridex_strings:
- if include_decoded_strings or is_printable(decoded):
- results.append(('Dridex string', decoded, encoded))
- for encoded, decoded in self.vba_strings:
- if include_decoded_strings or is_printable(decoded):
- results.append(('VBA string', decoded, encoded))
- self.results = results
- return results
-
- def scan_summary(self):
- """
- Analyze the provided VBA code to detect suspicious keywords,
- auto-executable macros, IOC patterns, obfuscation patterns
- such as hex-encoded strings.
-
- :return: tuple with the number of items found for each category:
- (autoexec, suspicious, IOCs, hex, base64, dridex, vba)
- """
- # avoid scanning the same code twice:
- if self.results is None:
- self.scan()
- return (len(self.autoexec_keywords), len(self.suspicious_keywords),
- len(self.iocs), len(self.hex_strings), len(self.base64_strings),
- len(self.dridex_strings), len(self.vba_strings))
-
-
-def scan_vba(vba_code, include_decoded_strings, deobfuscate=False):
- """
- Analyze the provided VBA code to detect suspicious keywords,
- auto-executable macros, IOC patterns, obfuscation patterns
- such as hex-encoded strings.
- (shortcut for VBA_Scanner(vba_code).scan())
-
- :param vba_code: str, VBA source code to be analyzed
- :param include_decoded_strings: bool, if True all encoded strings will be included with their decoded content.
- :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
- :return: list of tuples (type, keyword, description)
- (type = 'AutoExec', 'Suspicious', 'IOC', 'Hex String', 'Base64 String' or 'Dridex String')
- """
- return VBA_Scanner(vba_code).scan(include_decoded_strings, deobfuscate)
-
-
-#=== CLASSES =================================================================
-
-class VBA_Parser(object):
- """
- Class to parse MS Office files, to detect VBA macros and extract VBA source code
- Supported file formats:
- - Word 97-2003 (.doc, .dot)
- - Word 2007+ (.docm, .dotm)
- - Word 2003 XML (.xml)
- - Word MHT - Single File Web Page / MHTML (.mht)
- - Excel 97-2003 (.xls)
- - Excel 2007+ (.xlsm, .xlsb)
- - PowerPoint 97-2003 (.ppt)
- - PowerPoint 2007+ (.pptm, .ppsm)
- """
-
- def __init__(self, filename, data=None, container=None, relaxed=False):
- """
- Constructor for VBA_Parser
-
- :param filename: filename or path of file to parse, or file-like object
-
- :param data: None or bytes str, if None the file will be read from disk (or from the file-like object).
- If data is provided as a bytes string, it will be parsed as the content of the file in memory,
- and not read from disk. Note: files must be read in binary mode, i.e. open(f, 'rb').
-
- :param container: str, path and filename of container if the file is within
- a zip archive, None otherwise.
-
- :param relaxed: if True, treat mal-formed documents and missing streams more like MS office:
- do nothing; if False (default), raise errors in these cases
-
- raises a FileOpenError if all attemps to interpret the data header failed
- """
- #TODO: filename should only be a string, data should be used for the file-like object
- #TODO: filename should be mandatory, optional data is a string or file-like object
- #TODO: also support olefile and zipfile as input
- if data is None:
- # open file from disk:
- _file = filename
- else:
- # file already read in memory, make it a file-like object for zipfile:
- _file = BytesIO(data)
- #self.file = _file
- self.ole_file = None
- self.ole_subfiles = []
- self.filename = filename
- self.container = container
- self.relaxed = relaxed
- self.type = None
- self.vba_projects = None
- self.vba_forms = None
- self.contains_macros = None # will be set to True or False by detect_macros
- self.vba_code_all_modules = None # to store the source code of all modules
- # list of tuples for each module: (subfilename, stream_path, vba_filename, vba_code)
- self.modules = None
- # Analysis results: list of tuples (type, keyword, description) - See VBA_Scanner
- self.analysis_results = None
- # statistics for the scan summary and flags
- self.nb_macros = 0
- self.nb_autoexec = 0
- self.nb_suspicious = 0
- self.nb_iocs = 0
- self.nb_hexstrings = 0
- self.nb_base64strings = 0
- self.nb_dridexstrings = 0
- self.nb_vbastrings = 0
-
- # if filename is None:
- # if isinstance(_file, basestring):
- # if len(_file) < olefile.MINIMAL_OLEFILE_SIZE:
- # self.filename = _file
- # else:
- # self.filename = ''
- # else:
- # self.filename = ''
- if olefile.isOleFile(_file):
- # This looks like an OLE file
- self.open_ole(_file)
-
- # check whether file is encrypted (need to do this before try ppt)
- log.debug('Check encryption of ole file')
- crypt_indicator = oleid.OleID(self.ole_file).check_encrypted()
- if crypt_indicator.value:
- raise FileIsEncryptedError(filename)
-
- # if this worked, try whether it is a ppt file (special ole file)
- self.open_ppt()
- if self.type is None and is_zipfile(_file):
- # Zip file, which may be an OpenXML document
- self.open_openxml(_file)
- if self.type is None:
- # read file from disk, check if it is a Word 2003 XML file (WordProcessingML), Excel 2003 XML,
- # or a plain text file containing VBA code
- if data is None:
- with open(filename, 'rb') as file_handle:
- data = file_handle.read()
- # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace
- if b'http://schemas.microsoft.com/office/word/2003/wordml' in data:
- self.open_word2003xml(data)
- # check if it is a Word/PowerPoint 2007+ XML file (Flat OPC): must contain the namespace
- if b'http://schemas.microsoft.com/office/2006/xmlPackage' in data:
- self.open_flatopc(data)
- # store a lowercase version for the next tests:
- data_lowercase = data.lower()
- # check if it is a MHT file (MIME HTML, Word or Excel saved as "Single File Web Page"):
- # According to my tests, these files usually start with "MIME-Version: 1.0" on the 1st line
- # BUT Word accepts a blank line or other MIME headers inserted before,
- # and even whitespaces in between "MIME", "-", "Version" and ":". The version number is ignored.
- # And the line is case insensitive.
- # so we'll just check the presence of mime, version and multipart anywhere:
- if self.type is None and b'mime' in data_lowercase and b'version' in data_lowercase \
- and b'multipart' in data_lowercase:
- self.open_mht(data)
- #TODO: handle exceptions
- #TODO: Excel 2003 XML
- # Check whether this is rtf
- if rtfobj.is_rtf(data, treat_str_as_data=True):
- # Ignore RTF since it contains no macros and methods in here will not find macros
- # in embedded objects. run rtfobj and repeat on its output.
- msg = '%s is RTF, need to run rtfobj.py and find VBA Macros in its output.' % self.filename
- log.info(msg)
- raise FileOpenError(msg)
- # Check if this is a plain text VBA or VBScript file:
- # To avoid scanning binary files, we simply check for some control chars:
- if self.type is None and b'\x00' not in data:
- self.open_text(data)
- if self.type is None:
- # At this stage, could not match a known format:
- msg = '%s is not a supported file type, cannot extract VBA Macros.' % self.filename
- log.info(msg)
- raise FileOpenError(msg)
-
- def open_ole(self, _file):
- """
- Open an OLE file
- :param _file: filename or file contents in a file object
- :return: nothing
- """
- log.info('Opening OLE file %s' % self.filename)
- try:
- # Open and parse the OLE file, using unicode for path names:
- self.ole_file = olefile.OleFileIO(_file, path_encoding=None)
- # set type only if parsing succeeds
- self.type = TYPE_OLE
- except (IOError, TypeError, ValueError) as exc:
- # TODO: handle OLE parsing exceptions
- log.info('Failed OLE parsing for file %r (%s)' % (self.filename, exc))
- log.debug('Trace:', exc_info=True)
-
-
- def open_openxml(self, _file):
- """
- Open an OpenXML file
- :param _file: filename or file contents in a file object
- :return: nothing
- """
- # This looks like a zip file, need to look for vbaProject.bin inside
- # It can be any OLE file inside the archive
- #...because vbaProject.bin can be renamed:
- # see http://www.decalage.info/files/JCV07_Lagadec_OpenDocument_OpenXML_v4_decalage.pdf#page=18
- log.info('Opening ZIP/OpenXML file %s' % self.filename)
- try:
- z = zipfile.ZipFile(_file)
- #TODO: check if this is actually an OpenXML file
- #TODO: if the zip file is encrypted, suggest to use the -z option, or try '-z infected' automatically
- # check each file within the zip if it is an OLE file, by reading its magic:
- for subfile in z.namelist():
- with z.open(subfile) as file_handle:
- magic = file_handle.read(len(olefile.MAGIC))
- if magic == olefile.MAGIC:
- log.debug('Opening OLE file %s within zip' % subfile)
- with z.open(subfile) as file_handle:
- ole_data = file_handle.read()
- try:
- self.ole_subfiles.append(
- VBA_Parser(filename=subfile, data=ole_data,
- relaxed=self.relaxed))
- except OlevbaBaseException as exc:
- if self.relaxed:
- log.info('%s is not a valid OLE file (%s)' % (subfile, exc))
- log.debug('Trace:', exc_info=True)
- continue
- else:
- raise SubstreamOpenError(self.filename, subfile,
- exc)
- z.close()
- # set type only if parsing succeeds
- self.type = TYPE_OpenXML
- except OlevbaBaseException as exc:
- if self.relaxed:
- log.info('Error {0} caught in Zip/OpenXML parsing for file {1}'
- .format(exc, self.filename))
- log.debug('Trace:', exc_info=True)
- else:
- raise
- except (RuntimeError, zipfile.BadZipfile, zipfile.LargeZipFile, IOError) as exc:
- # TODO: handle parsing exceptions
- log.info('Failed Zip/OpenXML parsing for file %r (%s)'
- % (self.filename, exc))
- log.debug('Trace:', exc_info=True)
-
- def open_word2003xml(self, data):
- """
- Open a Word 2003 XML file
- :param data: file contents in a string or bytes
- :return: nothing
- """
- log.info('Opening Word 2003 XML file %s' % self.filename)
- try:
- # parse the XML content
- # TODO: handle XML parsing exceptions
- et = ET.fromstring(data)
- # find all the binData elements:
- for bindata in et.getiterator(TAG_BINDATA):
- # the binData content is an OLE container for the VBA project, compressed
- # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded.
- # get the filename:
- fname = bindata.get(ATTR_NAME, 'noname.mso')
- # decode the base64 activemime
- mso_data = binascii.a2b_base64(bindata.text)
- if is_mso_file(mso_data):
- # decompress the zlib data stored in the MSO file, which is the OLE container:
- # TODO: handle different offsets => separate function
- try:
- ole_data = mso_file_extract(mso_data)
- self.ole_subfiles.append(
- VBA_Parser(filename=fname, data=ole_data,
- relaxed=self.relaxed))
- except OlevbaBaseException as exc:
- if self.relaxed:
- log.info('Error parsing subfile {0}: {1}'
- .format(fname, exc))
- log.debug('Trace:', exc_info=True)
- else:
- raise SubstreamOpenError(self.filename, fname, exc)
- else:
- log.info('%s is not a valid MSO file' % fname)
- # set type only if parsing succeeds
- self.type = TYPE_Word2003_XML
- except OlevbaBaseException as exc:
- if self.relaxed:
- log.info('Failed XML parsing for file %r (%s)' % (self.filename, exc))
- log.debug('Trace:', exc_info=True)
- else:
- raise
- except Exception as exc:
- # TODO: differentiate exceptions for each parsing stage
- # (but ET is different libs, no good exception description in API)
- # found: XMLSyntaxError
- log.info('Failed XML parsing for file %r (%s)' % (self.filename, exc))
- log.debug('Trace:', exc_info=True)
-
- def open_flatopc(self, data):
- """
- Open a Word or PowerPoint 2007+ XML file, aka "Flat OPC"
- :param data: file contents in a string or bytes
- :return: nothing
- """
- log.info('Opening Flat OPC Word/PowerPoint XML file %s' % self.filename)
- try:
- # parse the XML content
- # TODO: handle XML parsing exceptions
- et = ET.fromstring(data)
- # TODO: check root node namespace and tag
- # find all the pkg:part elements:
- for pkgpart in et.iter(TAG_PKGPART):
- fname = pkgpart.get(ATTR_PKG_NAME, 'unknown')
- content_type = pkgpart.get(ATTR_PKG_CONTENTTYPE, 'unknown')
- if content_type == CTYPE_VBAPROJECT:
- for bindata in pkgpart.iterfind(TAG_PKGBINDATA):
- try:
- ole_data = binascii.a2b_base64(bindata.text)
- self.ole_subfiles.append(
- VBA_Parser(filename=fname, data=ole_data,
- relaxed=self.relaxed))
- except OlevbaBaseException as exc:
- if self.relaxed:
- log.info('Error parsing subfile {0}: {1}'
- .format(fname, exc))
- log.debug('Trace:', exc_info=True)
- else:
- raise SubstreamOpenError(self.filename, fname, exc)
- # set type only if parsing succeeds
- self.type = TYPE_FlatOPC_XML
- except OlevbaBaseException as exc:
- if self.relaxed:
- log.info('Failed XML parsing for file %r (%s)' % (self.filename, exc))
- log.debug('Trace:', exc_info=True)
- else:
- raise
- except Exception as exc:
- # TODO: differentiate exceptions for each parsing stage
- # (but ET is different libs, no good exception description in API)
- # found: XMLSyntaxError
- log.info('Failed XML parsing for file %r (%s)' % (self.filename, exc))
- log.debug('Trace:', exc_info=True)
-
- def open_mht(self, data):
- """
- Open a MHTML file
- :param data: file contents in a string or bytes
- :return: nothing
- """
- log.info('Opening MHTML file %s' % self.filename)
- try:
- if isinstance(data,bytes):
- data = data.decode('utf8', 'backslashreplace')
- # parse the MIME content
- # remove any leading whitespace or newline (workaround for issue in email package)
- stripped_data = data.lstrip('\r\n\t ')
- # strip any junk from the beginning of the file
- # (issue #31 fix by Greg C - gdigreg)
- # TODO: improve keywords to avoid false positives
- mime_offset = stripped_data.find('MIME')
- content_offset = stripped_data.find('Content')
- # if "MIME" is found, and located before "Content":
- if -1 < mime_offset <= content_offset:
- stripped_data = stripped_data[mime_offset:]
- # else if "Content" is found, and before "MIME"
- # TODO: can it work without "MIME" at all?
- elif content_offset > -1:
- stripped_data = stripped_data[content_offset:]
- # TODO: quick and dirty fix: insert a standard line with MIME-Version header?
- mhtml = email.message_from_string(stripped_data)
- # find all the attached files:
- for part in mhtml.walk():
- content_type = part.get_content_type() # always returns a value
- fname = part.get_filename(None) # returns None if it fails
- # TODO: get content-location if no filename
- log.debug('MHTML part: filename=%r, content-type=%r' % (fname, content_type))
- part_data = part.get_payload(decode=True)
- # VBA macros are stored in a binary file named "editdata.mso".
- # the data content is an OLE container for the VBA project, compressed
- # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded.
- # decompress the zlib data starting at offset 0x32, which is the OLE container:
- # check ActiveMime header:
-
- if (isinstance(part_data, str) or isinstance(part_data, bytes)) and is_mso_file(part_data):
- log.debug('Found ActiveMime header, decompressing MSO container')
- try:
- ole_data = mso_file_extract(part_data)
-
- # TODO: check if it is actually an OLE file
- # TODO: get the MSO filename from content_location?
- self.ole_subfiles.append(
- VBA_Parser(filename=fname, data=ole_data,
- relaxed=self.relaxed))
- except OlevbaBaseException as exc:
- if self.relaxed:
- log.info('%s does not contain a valid OLE file (%s)'
- % (fname, exc))
- log.debug('Trace:', exc_info=True)
- # TODO: bug here - need to split in smaller functions/classes?
- else:
- raise SubstreamOpenError(self.filename, fname, exc)
- else:
- log.debug('type(part_data) = %s' % type(part_data))
- try:
- log.debug('part_data[0:20] = %r' % part_data[0:20])
- except TypeError as err:
- log.debug('part_data has no __getitem__')
- # set type only if parsing succeeds
- self.type = TYPE_MHTML
- except OlevbaBaseException:
- raise
- except Exception:
- log.info('Failed MIME parsing for file %r - %s'
- % (self.filename, MSG_OLEVBA_ISSUES))
- log.debug('Trace:', exc_info=True)
-
- def open_ppt(self):
- """ try to interpret self.ole_file as PowerPoint 97-2003 using PptParser
-
- Although self.ole_file is a valid olefile.OleFileIO, we set
- self.ole_file = None in here and instead set self.ole_subfiles to the
- VBA ole streams found within the main ole file. That makes most of the
- code below treat this like an OpenXML file and only look at the
- ole_subfiles (except find_vba_* which needs to explicitly check for
- self.type)
- """
-
- log.info('Check whether OLE file is PPT')
- try:
- ppt = ppt_parser.PptParser(self.ole_file, fast_fail=True)
- for vba_data in ppt.iter_vba_data():
- self.ole_subfiles.append(VBA_Parser(None, vba_data,
- container='PptParser'))
- log.info('File is PPT')
- self.ole_file.close() # just in case
- self.ole_file = None # required to make other methods look at ole_subfiles
- self.type = TYPE_PPT
- except Exception as exc:
- if self.container == 'PptParser':
- # this is a subfile of a ppt --> to be expected that is no ppt
- log.debug('PPT subfile is not a PPT file')
- else:
- log.debug("File appears not to be a ppt file (%s)" % exc)
-
-
- def open_text(self, data):
- """
- Open a text file containing VBA or VBScript source code
- :param data: file contents in a string or bytes
- :return: nothing
- """
- log.info('Opening text file %s' % self.filename)
- # directly store the source code:
- if isinstance(data,bytes):
- data=data.decode('utf8','backslashreplace')
- self.vba_code_all_modules = data
- self.contains_macros = True
- # set type only if parsing succeeds
- self.type = TYPE_TEXT
-
-
- def find_vba_projects(self):
- """
- Finds all the VBA projects stored in an OLE file.
-
- Return None if the file is not OLE but OpenXML.
- Return a list of tuples (vba_root, project_path, dir_path) for each VBA project.
- vba_root is the path of the root OLE storage containing the VBA project,
- including a trailing slash unless it is the root of the OLE file.
- project_path is the path of the OLE stream named "PROJECT" within the VBA project.
- dir_path is the path of the OLE stream named "VBA/dir" within the VBA project.
-
- If this function returns an empty list for one of the supported formats
- (i.e. Word, Excel, Powerpoint), then the file does not contain VBA macros.
-
- :return: None if OpenXML file, list of tuples (vba_root, project_path, dir_path)
- for each VBA project found if OLE file
- """
- log.debug('VBA_Parser.find_vba_projects')
-
- # if the file is not OLE but OpenXML, return None:
- if self.ole_file is None and self.type != TYPE_PPT:
- return None
-
- # if this method has already been called, return previous result:
- if self.vba_projects is not None:
- return self.vba_projects
-
- # if this is a ppt file (PowerPoint 97-2003):
- # self.ole_file is None but the ole_subfiles do contain vba_projects
- # (like for OpenXML files).
- if self.type == TYPE_PPT:
- # TODO: so far, this function is never called for PPT files, but
- # if that happens, the information is lost which ole file contains
- # which storage!
- log.warning('Returned info is not complete for PPT types!')
- self.vba_projects = []
- for subfile in self.ole_subfiles:
- self.vba_projects.extend(subfile.find_vba_projects())
- return self.vba_projects
-
- # Find the VBA project root (different in MS Word, Excel, etc):
- # - Word 97-2003: Macros
- # - Excel 97-2003: _VBA_PROJECT_CUR
- # - PowerPoint 97-2003: PptParser has identified ole_subfiles
- # - Word 2007+: word/vbaProject.bin in zip archive, then the VBA project is the root of vbaProject.bin.
- # - Excel 2007+: xl/vbaProject.bin in zip archive, then same as Word
- # - PowerPoint 2007+: ppt/vbaProject.bin in zip archive, then same as Word
- # - Visio 2007: not supported yet (different file structure)
-
- # According to MS-OVBA section 2.2.1:
- # - the VBA project root storage MUST contain a VBA storage and a PROJECT stream
- # - The root/VBA storage MUST contain a _VBA_PROJECT stream and a dir stream
- # - all names are case-insensitive
-
- def check_vba_stream(ole, vba_root, stream_path):
- full_path = vba_root + stream_path
- if ole.exists(full_path) and ole.get_type(full_path) == olefile.STGTY_STREAM:
- log.debug('Found %s stream: %s' % (stream_path, full_path))
- return full_path
- else:
- log.debug('Missing %s stream, this is not a valid VBA project structure' % stream_path)
- return False
-
- # start with an empty list:
- self.vba_projects = []
- # Look for any storage containing those storage/streams:
- ole = self.ole_file
- for storage in ole.listdir(streams=False, storages=True):
- log.debug('Checking storage %r' % storage)
- # Look for a storage ending with "VBA":
- if storage[-1].upper() == 'VBA':
- log.debug('Found VBA storage: %s' % ('/'.join(storage)))
- vba_root = '/'.join(storage[:-1])
- # Add a trailing slash to vba_root, unless it is the root of the OLE file:
- # (used later to append all the child streams/storages)
- if vba_root != '':
- vba_root += '/'
- log.debug('Checking vba_root="%s"' % vba_root)
-
- # Check if the VBA root storage also contains a PROJECT stream:
- project_path = check_vba_stream(ole, vba_root, 'PROJECT')
- if not project_path: continue
- # Check if the VBA root storage also contains a VBA/_VBA_PROJECT stream:
- vba_project_path = check_vba_stream(ole, vba_root, 'VBA/_VBA_PROJECT')
- if not vba_project_path: continue
- # Check if the VBA root storage also contains a VBA/dir stream:
- dir_path = check_vba_stream(ole, vba_root, 'VBA/dir')
- if not dir_path: continue
- # Now we are pretty sure it is a VBA project structure
- log.debug('VBA root storage: "%s"' % vba_root)
- # append the results to the list as a tuple for later use:
- self.vba_projects.append((vba_root, project_path, dir_path))
- return self.vba_projects
-
- def detect_vba_macros(self):
- """
- Detect the potential presence of VBA macros in the file, by checking
- if it contains VBA projects. Both OLE and OpenXML files are supported.
-
- Important: for now, results are accurate only for Word, Excel and PowerPoint
-
- Note: this method does NOT attempt to check the actual presence or validity
- of VBA macro source code, so there might be false positives.
- It may also detect VBA macros in files embedded within the main file,
- for example an Excel workbook with macros embedded into a Word
- document without macros may be detected, without distinction.
-
- :return: bool, True if at least one VBA project has been found, False otherwise
- """
- #TODO: return None or raise exception if format not supported
- #TODO: return the number of VBA projects found instead of True/False?
- # if this method was already called, return the previous result:
- if self.contains_macros is not None:
- return self.contains_macros
- # if OpenXML/PPT, check all the OLE subfiles:
- if self.ole_file is None:
- for ole_subfile in self.ole_subfiles:
- if ole_subfile.detect_vba_macros():
- self.contains_macros = True
- return True
- # otherwise, no macro found:
- self.contains_macros = False
- return False
- # otherwise it's an OLE file, find VBA projects:
- vba_projects = self.find_vba_projects()
- if len(vba_projects) == 0:
- self.contains_macros = False
- else:
- self.contains_macros = True
- # Also look for VBA code in any stream including orphans
- # (happens in some malformed files)
- ole = self.ole_file
- for sid in xrange(len(ole.direntries)):
- # check if id is already done above:
- log.debug('Checking DirEntry #%d' % sid)
- d = ole.direntries[sid]
- if d is None:
- # this direntry is not part of the tree: either unused or an orphan
- d = ole._load_direntry(sid)
- log.debug('This DirEntry is an orphan or unused')
- if d.entry_type == olefile.STGTY_STREAM:
- # read data
- log.debug('Reading data from stream %r - size: %d bytes' % (d.name, d.size))
- try:
- data = ole._open(d.isectStart, d.size).read()
- log.debug('Read %d bytes' % len(data))
- if len(data) > 200:
- log.debug('%r...[much more data]...%r' % (data[:100], data[-50:]))
- else:
- log.debug(repr(data))
- if 'Attribut\x00' in data.decode('utf-8', 'ignore'):
- log.debug('Found VBA compressed code')
- self.contains_macros = True
- except IOError as exc:
- if self.relaxed:
- log.info('Error when reading OLE Stream %r' % d.name)
- log.debug('Trace:', exc_trace=True)
- else:
- raise SubstreamOpenError(self.filename, d.name, exc)
- return self.contains_macros
-
- def extract_macros(self):
- """
- Extract and decompress source code for each VBA macro found in the file
-
- Iterator: yields (filename, stream_path, vba_filename, vba_code) for each VBA macro found
- If the file is OLE, filename is the path of the file.
- If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros
- within the zip archive, e.g. word/vbaProject.bin.
- If the file is PPT, result is as for OpenXML but filename is useless
- """
- log.debug('extract_macros:')
- if self.ole_file is None:
- # This may be either an OpenXML/PPT or a text file:
- if self.type == TYPE_TEXT:
- # This is a text file, yield the full code:
- yield (self.filename, '', self.filename, self.vba_code_all_modules)
- else:
- # OpenXML/PPT: recursively yield results from each OLE subfile:
- for ole_subfile in self.ole_subfiles:
- for results in ole_subfile.extract_macros():
- yield results
- else:
- # This is an OLE file:
- self.find_vba_projects()
- # set of stream ids
- vba_stream_ids = set()
- for vba_root, project_path, dir_path in self.vba_projects:
- # extract all VBA macros from that VBA root storage:
- # The function _extract_vba may fail on some files (issue #132)
- try:
- for stream_path, vba_filename, vba_code in \
- _extract_vba(self.ole_file, vba_root, project_path,
- dir_path, self.relaxed):
- # store direntry ids in a set:
- vba_stream_ids.add(self.ole_file._find(stream_path))
- yield (self.filename, stream_path, vba_filename, vba_code)
- except Exception as e:
- log.exception('Error in _extract_vba')
- # Also look for VBA code in any stream including orphans
- # (happens in some malformed files)
- ole = self.ole_file
- for sid in xrange(len(ole.direntries)):
- # check if id is already done above:
- log.debug('Checking DirEntry #%d' % sid)
- if sid in vba_stream_ids:
- log.debug('Already extracted')
- continue
- d = ole.direntries[sid]
- if d is None:
- # this direntry is not part of the tree: either unused or an orphan
- d = ole._load_direntry(sid)
- log.debug('This DirEntry is an orphan or unused')
- if d.entry_type == olefile.STGTY_STREAM:
- # read data
- log.debug('Reading data from stream %r' % d.name)
- data = ole._open(d.isectStart, d.size).read()
- for match in re.finditer(b'\\x00Attribut[^e]', data, flags=re.IGNORECASE):
- start = match.start() - 3
- log.debug('Found VBA compressed code at index %X' % start)
- compressed_code = data[start:]
- try:
- vba_code = decompress_stream(compressed_code)
- yield (self.filename, d.name, d.name, vba_code)
- except Exception as exc:
- # display the exception with full stack trace for debugging
- log.debug('Error processing stream %r in file %r (%s)' % (d.name, self.filename, exc))
- log.debug('Traceback:', exc_info=True)
- # do not raise the error, as it is unlikely to be a compressed macro stream
-
- def extract_all_macros(self):
- """
- Extract and decompress source code for each VBA macro found in the file
- by calling extract_macros(), store the results as a list of tuples
- (filename, stream_path, vba_filename, vba_code) in self.modules.
- See extract_macros for details.
- """
- if self.modules is None:
- self.modules = []
- for (subfilename, stream_path, vba_filename, vba_code) in self.extract_macros():
- self.modules.append((subfilename, stream_path, vba_filename, vba_code))
- self.nb_macros = len(self.modules)
- return self.modules
-
-
-
- def analyze_macros(self, show_decoded_strings=False, deobfuscate=False):
- """
- runs extract_macros and analyze the source code of all VBA macros
- found in the file.
- """
- if self.detect_vba_macros():
- # if the analysis was already done, avoid doing it twice:
- if self.analysis_results is not None:
- return self.analysis_results
- # variable to merge source code from all modules:
- if self.vba_code_all_modules is None:
- self.vba_code_all_modules = ''
- for (_, _, _, vba_code) in self.extract_all_macros():
- #TODO: filter code? (each module)
- if isinstance(vba_code, bytes):
- vba_code = vba_code.decode('utf-8', 'ignore')
- self.vba_code_all_modules += vba_code + '\n'
- for (_, _, form_string) in self.extract_form_strings():
- self.vba_code_all_modules += form_string.decode('utf-8', 'ignore') + '\n'
- # Analyze the whole code at once:
- scanner = VBA_Scanner(self.vba_code_all_modules)
- self.analysis_results = scanner.scan(show_decoded_strings, deobfuscate)
- autoexec, suspicious, iocs, hexstrings, base64strings, dridex, vbastrings = scanner.scan_summary()
- self.nb_autoexec += autoexec
- self.nb_suspicious += suspicious
- self.nb_iocs += iocs
- self.nb_hexstrings += hexstrings
- self.nb_base64strings += base64strings
- self.nb_dridexstrings += dridex
- self.nb_vbastrings += vbastrings
-
- return self.analysis_results
-
-
- def reveal(self):
- # we only want printable strings:
- analysis = self.analyze_macros(show_decoded_strings=False)
- # to avoid replacing short strings contained into longer strings, we sort the analysis results
- # based on the length of the encoded string, in reverse order:
- analysis = sorted(analysis, key=lambda type_decoded_encoded: len(type_decoded_encoded[2]), reverse=True)
- # normally now self.vba_code_all_modules contains source code from all modules
- # Need to collapse long lines:
- deobf_code = vba_collapse_long_lines(self.vba_code_all_modules)
- deobf_code = filter_vba(deobf_code)
- for kw_type, decoded, encoded in analysis:
- if kw_type == 'VBA string':
- #print '%3d occurences: %r => %r' % (deobf_code.count(encoded), encoded, decoded)
- # need to add double quotes around the decoded strings
- # after escaping double-quotes as double-double-quotes for VBA:
- decoded = decoded.replace('"', '""')
- decoded = '"%s"' % decoded
- # if the encoded string is enclosed in parentheses,
- # keep them in the decoded version:
- if encoded.startswith('(') and encoded.endswith(')'):
- decoded = '(%s)' % decoded
- deobf_code = deobf_code.replace(encoded, decoded)
- # # TODO: there is a bug somewhere which creates double returns '\r\r'
- # deobf_code = deobf_code.replace('\r\r', '\r')
- return deobf_code
- #TODO: repasser l'analyse plusieurs fois si des chaines hex ou base64 sont revelees
-
-
- def find_vba_forms(self):
- """
- Finds all the VBA forms stored in an OLE file.
-
- Return None if the file is not OLE but OpenXML.
- Return a list of tuples (vba_root, project_path, dir_path) for each VBA project.
- vba_root is the path of the root OLE storage containing the VBA project,
- including a trailing slash unless it is the root of the OLE file.
- project_path is the path of the OLE stream named "PROJECT" within the VBA project.
- dir_path is the path of the OLE stream named "VBA/dir" within the VBA project.
-
- If this function returns an empty list for one of the supported formats
- (i.e. Word, Excel, Powerpoint), then the file does not contain VBA forms.
-
- :return: None if OpenXML file, list of tuples (vba_root, project_path, dir_path)
- for each VBA project found if OLE file
- """
- log.debug('VBA_Parser.find_vba_forms')
-
- # if the file is not OLE but OpenXML, return None:
- if self.ole_file is None and self.type != TYPE_PPT:
- return None
-
- # if this method has already been called, return previous result:
- # if self.vba_projects is not None:
- # return self.vba_projects
-
- # According to MS-OFORMS section 2.1.2 Control Streams:
- # - A parent control, that is, a control that can contain embedded controls,
- # MUST be persisted as a storage that contains multiple streams.
- # - All parent controls MUST contain a FormControl. The FormControl
- # properties are persisted to a stream (1) as specified in section 2.1.1.2.
- # The name of this stream (1) MUST be "f".
- # - Embedded controls that cannot themselves contain other embedded
- # controls are persisted sequentially as FormEmbeddedActiveXControls
- # to a stream (1) contained in the same storage as the parent control.
- # The name of this stream (1) MUST be "o".
- # - all names are case-insensitive
-
- if self.type == TYPE_PPT:
- # TODO: so far, this function is never called for PPT files, but
- # if that happens, the information is lost which ole file contains
- # which storage!
- ole_files = self.ole_subfiles
- log.warning('Returned info is not complete for PPT types!')
- else:
- ole_files = [self.ole_file, ]
-
- # start with an empty list:
- self.vba_forms = []
-
- # Loop over ole streams
- for ole in ole_files:
- # Look for any storage containing those storage/streams:
- for storage in ole.listdir(streams=False, storages=True):
- log.debug('Checking storage %r' % storage)
- # Look for two streams named 'o' and 'f':
- o_stream = storage + ['o']
- f_stream = storage + ['f']
- log.debug('Checking if streams %r and %r exist' % (f_stream, o_stream))
- if ole.exists(o_stream) and ole.get_type(o_stream) == olefile.STGTY_STREAM \
- and ole.exists(f_stream) and ole.get_type(f_stream) == olefile.STGTY_STREAM:
- form_path = '/'.join(storage)
- log.debug('Found VBA Form: %r' % form_path)
- self.vba_forms.append(storage)
- return self.vba_forms
-
- def extract_form_strings(self):
- """
- Extract printable strings from each VBA Form found in the file
-
- Iterator: yields (filename, stream_path, vba_filename, vba_code) for each VBA macro found
- If the file is OLE, filename is the path of the file.
- If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros
- within the zip archive, e.g. word/vbaProject.bin.
- If the file is PPT, result is as for OpenXML but filename is useless
- """
- if self.ole_file is None:
- # This may be either an OpenXML/PPT or a text file:
- if self.type == TYPE_TEXT:
- # This is a text file, return no results:
- return
- else:
- # OpenXML/PPT: recursively yield results from each OLE subfile:
- for ole_subfile in self.ole_subfiles:
- for results in ole_subfile.extract_form_strings():
- yield results
- else:
- # This is an OLE file:
- self.find_vba_forms()
- ole = self.ole_file
- for form_storage in self.vba_forms:
- o_stream = form_storage + ['o']
- log.debug('Opening form object stream %r' % '/'.join(o_stream))
- form_data = ole.openstream(o_stream).read()
- # Extract printable strings from the form object stream "o":
- for m in re_printable_string.finditer(form_data):
- log.debug('Printable string found in form: %r' % m.group())
- yield (self.filename, '/'.join(o_stream), m.group())
-
-
- def close(self):
- """
- Close all the open files. This method must be called after usage, if
- the application is opening many files.
- """
- if self.ole_file is None:
- if self.ole_subfiles is not None:
- for ole_subfile in self.ole_subfiles:
- ole_subfile.close()
- else:
- self.ole_file.close()
-
-
-
-class VBA_Parser_CLI(VBA_Parser):
- """
- VBA parser and analyzer, adding methods for the command line interface
- of olevba. (see VBA_Parser)
- """
-
- def __init__(self, *args, **kwargs):
- """
- Constructor for VBA_Parser_CLI.
- Calls __init__ from VBA_Parser with all arguments --> see doc there
- """
- super(VBA_Parser_CLI, self).__init__(*args, **kwargs)
-
-
- def print_analysis(self, show_decoded_strings=False, deobfuscate=False):
- """
- Analyze the provided VBA code, and print the results in a table
-
- :param vba_code: str, VBA source code to be analyzed
- :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
- :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
- :return: None
- """
- # print a waiting message only if the output is not redirected to a file:
- if sys.stdout.isatty():
- print('Analysis...\r', end='')
- sys.stdout.flush()
- results = self.analyze_macros(show_decoded_strings, deobfuscate)
- if results:
- t = prettytable.PrettyTable(('Type', 'Keyword', 'Description'))
- t.align = 'l'
- t.max_width['Type'] = 10
- t.max_width['Keyword'] = 20
- t.max_width['Description'] = 39
- for kw_type, keyword, description in results:
- # handle non printable strings:
- if not is_printable(keyword):
- keyword = repr(keyword)
- if not is_printable(description):
- description = repr(description)
- t.add_row((kw_type, keyword, description))
- print(t)
- else:
- print('No suspicious keyword or IOC found.')
-
- def print_analysis_json(self, show_decoded_strings=False, deobfuscate=False):
- """
- Analyze the provided VBA code, and return the results in json format
-
- :param vba_code: str, VBA source code to be analyzed
- :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
- :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
-
- :return: dict
- """
- # print a waiting message only if the output is not redirected to a file:
- if sys.stdout.isatty():
- print('Analysis...\r', end='')
- sys.stdout.flush()
- return [dict(type=kw_type, keyword=keyword, description=description)
- for kw_type, keyword, description in self.analyze_macros(show_decoded_strings, deobfuscate)]
-
- def process_file(self, show_decoded_strings=False,
- display_code=True, hide_attributes=True,
- vba_code_only=False, show_deobfuscated_code=False,
- deobfuscate=False):
- """
- Process a single file
-
- :param filename: str, path and filename of file on disk, or within the container.
- :param data: bytes, content of the file if it is in a container, None if it is a file on disk.
- :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
- :param display_code: bool, if False VBA source code is not displayed (default True)
- :param global_analysis: bool, if True all modules are merged for a single analysis (default),
- otherwise each module is analyzed separately (old behaviour)
- :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default)
- :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
- """
- #TODO: replace print by writing to a provided output file (sys.stdout by default)
- # fix conflicting parameters:
- if vba_code_only and not display_code:
- display_code = True
- if self.container:
- display_filename = '%s in %s' % (self.filename, self.container)
- else:
- display_filename = self.filename
- print('=' * 79)
- print('FILE: %s' % display_filename)
- try:
- #TODO: handle olefile errors, when an OLE file is malformed
- print('Type: %s'% self.type)
- if self.detect_vba_macros():
- #print 'Contains VBA Macros:'
- for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros():
- if hide_attributes:
- # hide attribute lines:
- if isinstance(vba_code,bytes):
- vba_code =vba_code.decode('utf-8','backslashreplace')
- vba_code_filtered = filter_vba(vba_code)
- else:
- vba_code_filtered = vba_code
- print('-' * 79)
- print('VBA MACRO %s ' % vba_filename)
- print('in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)))
- if display_code:
- print('- ' * 39)
- # detect empty macros:
- if vba_code_filtered.strip() == '':
- print('(empty macro)')
- else:
- print(vba_code_filtered)
- for (subfilename, stream_path, form_string) in self.extract_form_strings():
- print('-' * 79)
- print('VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path))
- print('- ' * 39)
- print(form_string.decode('utf-8', 'ignore'))
- if not vba_code_only:
- # analyse the code from all modules at once:
- self.print_analysis(show_decoded_strings, deobfuscate)
- if show_deobfuscated_code:
- print('MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n\n')
- print(self.reveal())
- else:
- print('No VBA macros found.')
- except OlevbaBaseException:
- raise
- except Exception as exc:
- # display the exception with full stack trace for debugging
- log.info('Error processing file %s (%s)' % (self.filename, exc))
- log.debug('Traceback:', exc_info=True)
- raise ProcessingError(self.filename, exc)
- print('')
-
-
- def process_file_json(self, show_decoded_strings=False,
- display_code=True, hide_attributes=True,
- vba_code_only=False, show_deobfuscated_code=False,
- deobfuscate=False):
- """
- Process a single file
-
- every "show" or "print" here is to be translated as "add to json"
-
- :param filename: str, path and filename of file on disk, or within the container.
- :param data: bytes, content of the file if it is in a container, None if it is a file on disk.
- :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
- :param display_code: bool, if False VBA source code is not displayed (default True)
- :param global_analysis: bool, if True all modules are merged for a single analysis (default),
- otherwise each module is analyzed separately (old behaviour)
- :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default)
- :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
- """
- #TODO: fix conflicting parameters (?)
-
- if vba_code_only and not display_code:
- display_code = True
-
- result = {}
-
- if self.container:
- result['container'] = self.container
- else:
- result['container'] = None
- result['file'] = self.filename
- result['json_conversion_successful'] = False
- result['analysis'] = None
- result['code_deobfuscated'] = None
- result['do_deobfuscate'] = deobfuscate
-
- try:
- #TODO: handle olefile errors, when an OLE file is malformed
- result['type'] = self.type
- macros = []
- if self.detect_vba_macros():
- for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros():
- curr_macro = {}
- if isinstance(vba_code, bytes):
- vba_code = vba_code.decode('utf-8', 'backslashreplace')
-
- if hide_attributes:
- # hide attribute lines:
- vba_code_filtered = filter_vba(vba_code)
- else:
- vba_code_filtered = vba_code
-
- curr_macro['vba_filename'] = vba_filename
- curr_macro['subfilename'] = subfilename
- curr_macro['ole_stream'] = stream_path
- if display_code:
- curr_macro['code'] = vba_code_filtered.strip()
- else:
- curr_macro['code'] = None
- macros.append(curr_macro)
- if not vba_code_only:
- # analyse the code from all modules at once:
- result['analysis'] = self.print_analysis_json(show_decoded_strings,
- deobfuscate)
- if show_deobfuscated_code:
- result['code_deobfuscated'] = self.reveal()
- result['macros'] = macros
- result['json_conversion_successful'] = True
- except Exception as exc:
- # display the exception with full stack trace for debugging
- log.info('Error processing file %s (%s)' % (self.filename, exc))
- log.debug('Traceback:', exc_info=True)
- raise ProcessingError(self.filename, exc)
-
- return result
-
-
- def process_file_triage(self, show_decoded_strings=False, deobfuscate=False):
- """
- Process a file in triage mode, showing only summary results on one line.
- """
- #TODO: replace print by writing to a provided output file (sys.stdout by default)
- try:
- #TODO: handle olefile errors, when an OLE file is malformed
- if self.detect_vba_macros():
- # print a waiting message only if the output is not redirected to a file:
- if sys.stdout.isatty():
- print('Analysis...\r', end='')
- sys.stdout.flush()
- self.analyze_macros(show_decoded_strings=show_decoded_strings,
- deobfuscate=deobfuscate)
- flags = TYPE2TAG[self.type]
- macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-'
- if self.contains_macros: macros = 'M'
- if self.nb_autoexec: autoexec = 'A'
- if self.nb_suspicious: suspicious = 'S'
- if self.nb_iocs: iocs = 'I'
- if self.nb_hexstrings: hexstrings = 'H'
- if self.nb_base64strings: base64obf = 'B'
- if self.nb_dridexstrings: dridex = 'D'
- if self.nb_vbastrings: vba_obf = 'V'
- flags += '%s%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings,
- base64obf, dridex, vba_obf)
-
- line = '%-12s %s' % (flags, self.filename)
- print(line)
-
- # old table display:
- # macros = autoexec = suspicious = iocs = hexstrings = 'no'
- # if nb_macros: macros = 'YES:%d' % nb_macros
- # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec
- # if nb_suspicious: suspicious = 'YES:%d' % nb_suspicious
- # if nb_iocs: iocs = 'YES:%d' % nb_iocs
- # if nb_hexstrings: hexstrings = 'YES:%d' % nb_hexstrings
- # # 2nd line = info
- # print '%-8s %-7s %-7s %-7s %-7s %-7s' % (self.type, macros, autoexec, suspicious, iocs, hexstrings)
- except Exception as exc:
- # display the exception with full stack trace for debugging only
- log.debug('Error processing file %s (%s)' % (self.filename, exc),
- exc_info=True)
- raise ProcessingError(self.filename, exc)
-
-
- # t = prettytable.PrettyTable(('filename', 'type', 'macros', 'autoexec', 'suspicious', 'ioc', 'hexstrings'),
- # header=False, border=False)
- # t.align = 'l'
- # t.max_width['filename'] = 30
- # t.max_width['type'] = 10
- # t.max_width['macros'] = 6
- # t.max_width['autoexec'] = 6
- # t.max_width['suspicious'] = 6
- # t.max_width['ioc'] = 6
- # t.max_width['hexstrings'] = 6
- # t.add_row((filename, ftype, macros, autoexec, suspicious, iocs, hexstrings))
- # print t
-
-
-#=== MAIN =====================================================================
-
-def parse_args(cmd_line_args=None):
- """ parse command line arguments (given ones or per default sys.argv) """
-
- DEFAULT_LOG_LEVEL = "warning" # Default log level
- LOG_LEVELS = {
- 'debug': logging.DEBUG,
- 'info': logging.INFO,
- 'warning': logging.WARNING,
- 'error': logging.ERROR,
- 'critical': logging.CRITICAL
- }
-
- usage = 'usage: olevba [options] [filename2 ...]'
- parser = optparse.OptionParser(usage=usage)
- # parser.add_option('-o', '--outfile', dest='outfile',
- # help='output file')
- # parser.add_option('-c', '--csv', dest='csv',
- # help='export results to a CSV file')
- parser.add_option("-r", action="store_true", dest="recursive",
- help='find files recursively in subdirectories.')
- parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
- help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)')
- parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
- help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
- # output mode; could make this even simpler with add_option(type='choice') but that would make
- # cmd line interface incompatible...
- modes = optparse.OptionGroup(parser, title='Output mode (mutually exclusive)')
- modes.add_option("-t", '--triage', action="store_const", dest="output_mode",
- const='triage', default='unspecified',
- help='triage mode, display results as a summary table (default for multiple files)')
- modes.add_option("-d", '--detailed', action="store_const", dest="output_mode",
- const='detailed', default='unspecified',
- help='detailed mode, display full results (default for single file)')
- modes.add_option("-j", '--json', action="store_const", dest="output_mode",
- const='json', default='unspecified',
- help='json mode, detailed in json format (never default)')
- parser.add_option_group(modes)
- parser.add_option("-a", '--analysis', action="store_false", dest="display_code", default=True,
- help='display only analysis results, not the macro source code')
- parser.add_option("-c", '--code', action="store_true", dest="vba_code_only", default=False,
- help='display only VBA source code, do not analyze it')
- parser.add_option("--decode", action="store_true", dest="show_decoded_strings",
- help='display all the obfuscated strings with their decoded content (Hex, Base64, StrReverse, Dridex, VBA).')
- parser.add_option("--attr", action="store_false", dest="hide_attributes", default=True,
- help='display the attribute lines at the beginning of VBA source code')
- parser.add_option("--reveal", action="store_true", dest="show_deobfuscated_code",
- help='display the macro source code after replacing all the obfuscated strings by their decoded content.')
- parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
- help="logging level debug/info/warning/error/critical (default=%default)")
- parser.add_option('--deobf', dest="deobfuscate", action="store_true", default=False,
- help="Attempt to deobfuscate VBA expressions (slow)")
- parser.add_option('--relaxed', dest="relaxed", action="store_true", default=False,
- help="Do not raise errors if opening of substream fails")
-
- (options, args) = parser.parse_args(cmd_line_args)
-
- # Print help if no arguments are passed
- if len(args) == 0:
- print('olevba %s - http://decalage.info/python/oletools' % __version__)
- print(__doc__)
- parser.print_help()
- sys.exit(RETURN_WRONG_ARGS)
-
- options.loglevel = LOG_LEVELS[options.loglevel]
-
- return options, args
-
-
-def main(cmd_line_args=None):
- """
- Main function, called when olevba is run from the command line
-
- Optional argument: command line arguments to be forwarded to ArgumentParser
- in process_args. Per default (cmd_line_args=None), sys.argv is used. Option
- mainly added for unit-testing
- """
-
- options, args = parse_args(cmd_line_args)
-
- # provide info about tool and its version
- if options.output_mode == 'json':
- # print first json entry with meta info and opening '['
- print_json(script_name='olevba', version=__version__,
- url='http://decalage.info/python/oletools',
- type='MetaInformation', _json_is_first=True)
- else:
- print('olevba3 %s - http://decalage.info/python/oletools' % __version__)
-
- logging.basicConfig(level=options.loglevel, format='%(levelname)-8s %(message)s')
- # enable logging in the modules:
- enable_logging()
-
- # Old display with number of items detected:
- # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr')
- # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('-'*8, '-'*7, '-'*7, '-'*7, '-'*7, '-'*7)
-
- # with the option --reveal, make sure --deobf is also enabled:
- if options.show_deobfuscated_code and not options.deobfuscate:
- log.info('set --deobf because --reveal was set')
- options.deobfuscate = True
- if options.output_mode == 'triage' and options.show_deobfuscated_code:
- log.info('ignoring option --reveal in triage output mode')
-
- # Column headers (do not know how many files there will be yet, so if no output_mode
- # was specified, we will print triage for first file --> need these headers)
- if options.output_mode in ('triage', 'unspecified'):
- print('%-12s %-65s' % ('Flags', 'Filename'))
- print('%-12s %-65s' % ('-' * 11, '-' * 65))
-
- previous_container = None
- count = 0
- container = filename = data = None
- vba_parser = None
- return_code = RETURN_OK
- try:
- for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
- zip_password=options.zip_password, zip_fname=options.zip_fname):
- # ignore directory names stored in zip files:
- if container and filename.endswith('/'):
- continue
-
- # handle errors from xglob
- if isinstance(data, Exception):
- if isinstance(data, PathNotFoundException):
- if options.output_mode in ('triage', 'unspecified'):
- print('%-12s %s - File not found' % ('?', filename))
- elif options.output_mode != 'json':
- log.error('Given path %r does not exist!' % filename)
- return_code = RETURN_FILE_NOT_FOUND if return_code == 0 \
- else RETURN_SEVERAL_ERRS
- else:
- if options.output_mode in ('triage', 'unspecified'):
- print('%-12s %s - Failed to read from zip file %s' % ('?', filename, container))
- elif options.output_mode != 'json':
- log.error('Exception opening/reading %r from zip file %r: %s'
- % (filename, container, data))
- return_code = RETURN_XGLOB_ERR if return_code == 0 \
- else RETURN_SEVERAL_ERRS
- if options.output_mode == 'json':
- print_json(file=filename, type='error',
- error=type(data).__name__, message=str(data))
- continue
-
- try:
- # Open the file
- vba_parser = VBA_Parser_CLI(filename, data=data, container=container,
- relaxed=options.relaxed)
-
- if options.output_mode == 'detailed':
- # fully detailed output
- vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
- display_code=options.display_code,
- hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
- show_deobfuscated_code=options.show_deobfuscated_code,
- deobfuscate=options.deobfuscate)
- elif options.output_mode in ('triage', 'unspecified'):
- # print container name when it changes:
- if container != previous_container:
- if container is not None:
- print('\nFiles in %s:' % container)
- previous_container = container
- # summarized output for triage:
- vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,
- deobfuscate=options.deobfuscate)
- elif options.output_mode == 'json':
- print_json(
- vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings,
- display_code=options.display_code,
- hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
- show_deobfuscated_code=options.show_deobfuscated_code,
- deobfuscate=options.deobfuscate))
- else: # (should be impossible)
- raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode))
- count += 1
-
- except (SubstreamOpenError, UnexpectedDataError) as exc:
- if options.output_mode in ('triage', 'unspecified'):
- print('%-12s %s - Error opening substream or uenxpected ' \
- 'content' % ('?', filename))
- elif options.output_mode == 'json':
- print_json(file=filename, type='error',
- error=type(exc).__name__, message=str(exc))
- else:
- log.exception('Error opening substream or unexpected '
- 'content in %s' % filename)
- return_code = RETURN_OPEN_ERROR if return_code == 0 \
- else RETURN_SEVERAL_ERRS
- except FileOpenError as exc:
- if options.output_mode in ('triage', 'unspecified'):
- print('%-12s %s - File format not supported' % ('?', filename))
- elif options.output_mode == 'json':
- print_json(file=filename, type='error',
- error=type(exc).__name__, message=str(exc))
- else:
- log.exception('Failed to open %s -- probably not supported!' % filename)
- return_code = RETURN_OPEN_ERROR if return_code == 0 \
- else RETURN_SEVERAL_ERRS
- except ProcessingError as exc:
- if options.output_mode in ('triage', 'unspecified'):
- print('%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc))
- elif options.output_mode == 'json':
- print_json(file=filename, type='error',
- error=type(exc).__name__,
- message=str(exc.orig_exc))
- else:
- log.exception('Error processing file %s (%s)!'
- % (filename, exc.orig_exc))
- return_code = RETURN_PARSE_ERROR if return_code == 0 \
- else RETURN_SEVERAL_ERRS
- except FileIsEncryptedError as exc:
- if options.output_mode in ('triage', 'unspecified'):
- print('%-12s %s - File is encrypted' % ('!ERROR', filename))
- elif options.output_mode == 'json':
- print_json(file=filename, type='error',
- error=type(exc).__name__, message=str(exc))
- else:
- log.exception('File %s is encrypted!' % (filename))
- return_code = RETURN_ENCRYPTED if return_code == 0 \
- else RETURN_SEVERAL_ERRS
- # Here we do not close the vba_parser, because process_file may need it below.
-
- finally:
- if vba_parser is not None:
- vba_parser.close()
-
- if options.output_mode == 'triage':
- print('\n(Flags: OpX=OpenXML, XML=Word2003XML, FlX=FlatOPC XML, MHT=MHTML, TXT=Text, M=Macros, ' \
- 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \
- 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n')
-
- if count == 1 and options.output_mode == 'unspecified':
- # if options -t, -d and -j were not specified and it's a single file, print details:
- vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
- display_code=options.display_code,
- hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
- show_deobfuscated_code=options.show_deobfuscated_code,
- deobfuscate=options.deobfuscate)
-
- if options.output_mode == 'json':
- # print last json entry (a last one without a comma) and closing ]
- print_json(type='MetaInformation', return_code=return_code,
- n_processed=count, _json_is_last=True)
-
- except Exception as exc:
- # some unexpected error, maybe some of the types caught in except clauses
- # above were not sufficient. This is very bad, so log complete trace at exception level
- # and do not care about output mode
- log.exception('Unhandled exception in main: %s' % exc, exc_info=True)
- return_code = RETURN_UNEXPECTED # even if there were others before -- this is more important
- # TODO: print msg with URL to report issues (except in JSON mode)
-
- # done. exit
- log.debug('will exit now with code %s' % return_code)
- sys.exit(return_code)
+from oletools.olevba import *
+from oletools.olevba import __doc__, __version__
if __name__ == '__main__':
main()
-# This was coded while listening to "Dust" from I Love You But I've Chosen Darkness
diff --git a/oletools/ooxml.py b/oletools/ooxml.py
index 174c46d..a36c99d 100644
--- a/oletools/ooxml.py
+++ b/oletools/ooxml.py
@@ -16,11 +16,11 @@ TODO: "xml2003" == "flatopc"?
"""
import sys
-from oletools.common.log_helper import log_helper
from zipfile import ZipFile, BadZipfile, is_zipfile
from os.path import splitext
import io
import re
+from oletools.common.log_helper import log_helper
# import lxml or ElementTree for XML parsing:
try:
@@ -107,16 +107,14 @@ def debug_str(elem):
text = u', '.join(parts)
if len(text) > 150:
return text[:147] + u'...]'
- else:
- return text + u']'
+ return text + u']'
def isstr(some_var):
""" version-independent test for isinstance(some_var, (str, unicode)) """
if sys.version_info.major == 2:
return isinstance(some_var, basestring) # true for str and unicode
- else:
- return isinstance(some_var, str) # there is no unicode
+ return isinstance(some_var, str) # there is no unicode
###############################################################################
@@ -136,23 +134,29 @@ def get_type(filename):
prog_id = match.groups()[0]
if prog_id == WORD_XML_PROG_ID:
return DOCTYPE_WORD_XML
- elif prog_id == EXCEL_XML_PROG_ID:
+ if prog_id == EXCEL_XML_PROG_ID:
return DOCTYPE_EXCEL_XML
- else:
- return DOCTYPE_NONE
+ return DOCTYPE_NONE
is_doc = False
is_xls = False
is_ppt = False
- for _, elem, _ in parser.iter_xml(FILE_CONTENT_TYPES):
- logger.debug(u' ' + debug_str(elem))
- try:
- content_type = elem.attrib['ContentType']
- except KeyError: # ContentType not an attr
- continue
- is_xls |= content_type.startswith(CONTENT_TYPES_EXCEL)
- is_doc |= content_type.startswith(CONTENT_TYPES_WORD)
- is_ppt |= content_type.startswith(CONTENT_TYPES_PPT)
+ try:
+ for _, elem, _ in parser.iter_xml(FILE_CONTENT_TYPES):
+ logger.debug(u' ' + debug_str(elem))
+ try:
+ content_type = elem.attrib['ContentType']
+ except KeyError: # ContentType not an attr
+ continue
+ is_xls |= content_type.startswith(CONTENT_TYPES_EXCEL)
+ is_doc |= content_type.startswith(CONTENT_TYPES_WORD)
+ is_ppt |= content_type.startswith(CONTENT_TYPES_PPT)
+ except BadOOXML as oo_err:
+ if oo_err.more_info.startswith('invalid subfile') and \
+ FILE_CONTENT_TYPES in oo_err.more_info:
+ # no FILE_CONTENT_TYPES in zip, so probably no ms office xml.
+ return DOCTYPE_NONE
+ raise
if is_doc and not is_xls and not is_ppt:
return DOCTYPE_WORD
@@ -162,9 +166,8 @@ def get_type(filename):
return DOCTYPE_POWERPOINT
if not is_doc and not is_xls and not is_ppt:
return DOCTYPE_NONE
- else:
- logger.warning('Encountered contradictory content types')
- return DOCTYPE_MIXED
+ logger.warning('Encountered contradictory content types')
+ return DOCTYPE_MIXED
def is_ooxml(filename):
@@ -177,6 +180,7 @@ def is_ooxml(filename):
return False
if doctype == DOCTYPE_NONE:
return False
+ return True
###############################################################################
@@ -216,6 +220,7 @@ class ZipSubFile(object):
See also (and maybe could some day merge with):
ppt_record_parser.IterStream; also: oleobj.FakeFile
"""
+ CHUNK_SIZE = 4096
def __init__(self, container, filename, mode='r', size=None):
""" remember all necessary vars but do not open yet """
@@ -253,7 +258,7 @@ class ZipSubFile(object):
# print('ZipSubFile: opened; size={}'.format(self.size))
return self
- def write(self, *args, **kwargs): # pylint: disable=unused-argument,no-self-use
+ def write(self, *args, **kwargs):
""" write is not allowed """
raise IOError('writing not implemented')
@@ -311,10 +316,9 @@ class ZipSubFile(object):
""" helper for seek: skip forward by given amount using read() """
# print('ZipSubFile: seek by skipping {} bytes starting at {}'
# .format(self.pos, to_skip))
- CHUNK_SIZE = 4096
- n_chunks, leftover = divmod(to_skip, CHUNK_SIZE)
+ n_chunks, leftover = divmod(to_skip, self.CHUNK_SIZE)
for _ in range(n_chunks):
- self.read(CHUNK_SIZE) # just read and discard
+ self.read(self.CHUNK_SIZE) # just read and discard
self.read(leftover)
# print('ZipSubFile: seek by skipping done, pos now {}'
# .format(self.pos))
@@ -417,8 +421,7 @@ class XmlParser(object):
if match:
self._is_single_xml = True
return True
- if not match:
- raise BadOOXML(self.filename, 'is no zip and has no prog_id')
+ raise BadOOXML(self.filename, 'is no zip and has no prog_id')
def iter_files(self, args=None):
""" Find files in zip or just give single xml file """
@@ -433,17 +436,14 @@ class XmlParser(object):
subfiles = None
try:
zipper = ZipFile(self.filename)
- try:
- _ = zipper.getinfo(FILE_CONTENT_TYPES)
- except KeyError:
- raise BadOOXML(self.filename,
- 'No content type information')
if not args:
subfiles = zipper.namelist()
elif isstr(args):
subfiles = [args, ]
else:
- subfiles = tuple(args) # make a copy in case orig changes
+ # make a copy in case original args are modified
+ # Not sure whether this really is needed...
+ subfiles = tuple(arg for arg in args)
for subfile in subfiles:
with zipper.open(subfile, 'r') as handle:
@@ -451,10 +451,12 @@ class XmlParser(object):
if not args:
self.did_iter_all = True
except KeyError as orig_err:
+ # Note: do not change text of this message without adjusting
+ # conditions in except handlers
raise BadOOXML(self.filename,
'invalid subfile: ' + str(orig_err))
except BadZipfile:
- raise BadOOXML(self.filename, 'neither zip nor xml')
+ raise BadOOXML(self.filename, 'not in zip format')
finally:
if zipper:
zipper.close()
@@ -503,7 +505,7 @@ class XmlParser(object):
if event == 'start':
if elem.tag in want_tags:
logger.debug('remember start of tag {0} at {1}'
- .format(elem.tag, depth))
+ .format(elem.tag, depth))
inside_tags.append((elem.tag, depth))
depth += 1
continue
@@ -519,18 +521,18 @@ class XmlParser(object):
inside_tags.pop()
else:
logger.error('found end for wanted tag {0} '
- 'but last start tag {1} does not'
- ' match'.format(curr_tag,
- inside_tags[-1]))
+ 'but last start tag {1} does not'
+ ' match'.format(curr_tag,
+ inside_tags[-1]))
# try to recover: close all deeper tags
while inside_tags and \
inside_tags[-1][1] >= depth:
logger.debug('recover: pop {0}'
- .format(inside_tags[-1]))
+ .format(inside_tags[-1]))
inside_tags.pop()
except IndexError: # no inside_tag[-1]
logger.error('found end of {0} at depth {1} but '
- 'no start event')
+ 'no start event')
# yield element
if is_wanted or not want_tags:
yield subfile, elem, depth
@@ -544,7 +546,7 @@ class XmlParser(object):
except ET.ParseError as err:
self.subfiles_no_xml.add(subfile)
if subfile is None: # this is no zip subfile but single xml
- raise BadOOXML(self.filename, 'is neither zip nor xml')
+ raise BadOOXML(self.filename, 'content is not valid XML')
elif subfile.endswith('.xml'):
log = logger.warning
else:
@@ -568,21 +570,30 @@ class XmlParser(object):
defaults = []
files = []
- for _, elem, _ in self.iter_xml(FILE_CONTENT_TYPES):
- if elem.tag.endswith('Default'):
- extension = elem.attrib['Extension']
- if extension.startswith('.'):
- extension = extension[1:]
- defaults.append((extension, elem.attrib['ContentType']))
- logger.debug('found content type for extension {0[0]}: {0[1]}'
- .format(defaults[-1]))
- elif elem.tag.endswith('Override'):
- subfile = elem.attrib['PartName']
- if subfile.startswith('/'):
- subfile = subfile[1:]
- files.append((subfile, elem.attrib['ContentType']))
- logger.debug('found content type for subfile {0[0]}: {0[1]}'
- .format(files[-1]))
+ try:
+ for _, elem, _ in self.iter_xml(FILE_CONTENT_TYPES):
+ if elem.tag.endswith('Default'):
+ extension = elem.attrib['Extension']
+ if extension.startswith('.'):
+ extension = extension[1:]
+ defaults.append((extension, elem.attrib['ContentType']))
+ logger.debug('found content type for extension {0[0]}: '
+ '{0[1]}'.format(defaults[-1]))
+ elif elem.tag.endswith('Override'):
+ subfile = elem.attrib['PartName']
+ if subfile.startswith('/'):
+ subfile = subfile[1:]
+ files.append((subfile, elem.attrib['ContentType']))
+ logger.debug('found content type for subfile {0[0]}: '
+ '{0[1]}'.format(files[-1]))
+ except BadOOXML as oo_err:
+ if oo_err.more_info.startswith('invalid subfile') and \
+ FILE_CONTENT_TYPES in oo_err.more_info:
+ # no FILE_CONTENT_TYPES in zip, so probably no ms office xml.
+ # Maybe OpenDocument format? In any case, try to analyze.
+ pass
+ else:
+ raise
return dict(files), dict(defaults)
def iter_non_xml(self):
@@ -599,7 +610,7 @@ class XmlParser(object):
"""
if not self.did_iter_all:
logger.warning('Did not iterate through complete file. '
- 'Should run iter_xml() without args, first.')
+ 'Should run iter_xml() without args, first.')
if not self.subfiles_no_xml:
return
@@ -631,7 +642,7 @@ def test():
see module doc for more info
"""
- log_helper.enable_logging(False, logger.DEBUG)
+ log_helper.enable_logging(False, 'debug')
if len(sys.argv) != 2:
print(u'To test this code, give me a single file as arg')
return 2
diff --git a/oletools/ppt_parser.py b/oletools/ppt_parser.py
index a98b9bc..fa1fd29 100644
--- a/oletools/ppt_parser.py
+++ b/oletools/ppt_parser.py
@@ -43,7 +43,7 @@ file structure and will replace this module some time soon!
# 2017-04-23 v0.51 PL: - fixed absolute imports and issue #101
# 2018-09-11 v0.54 PL: - olefile is now a dependency
-__version__ = '0.54dev1'
+__version__ = '0.54'
# --- IMPORTS ------------------------------------------------------------------
diff --git a/oletools/ppt_record_parser.py b/oletools/ppt_record_parser.py
index acdc0dd..f8d54ea 100644
--- a/oletools/ppt_record_parser.py
+++ b/oletools/ppt_record_parser.py
@@ -63,7 +63,6 @@ except ImportError:
sys.path.insert(0, PARENT_DIR)
del PARENT_DIR
from oletools import record_base
-from oletools.common.errors import FileIsEncryptedError
# types of relevant records (there are much more than listed here)
@@ -109,10 +108,11 @@ RECORD_TYPES = dict([
])
-# record types where version is not 0x0 or 0xf
+# record types where version is not 0x0 or 0x1 or 0xf
VERSION_EXCEPTIONS = dict([
(0x0400, 2), # rt_vbainfoatom
(0x03ef, 2), # rt_slideatom
+ (0xe9c7, 7), # tests/test-data/encrypted/encrypted.ppt, not investigated
])
@@ -149,6 +149,10 @@ def is_ppt(filename):
Param filename can be anything that OleFileIO constructor accepts: name of
file or file data or data stream.
+ Will not try to decrypt the file not even try to determine whether it is
+ encrypted. If the file is encrypted will either raise an error or just
+ return `False`.
+
see also: oleid.OleID.check_powerpoint
"""
have_current_user = False
@@ -170,7 +174,7 @@ def is_ppt(filename):
for record in stream.iter_records():
if record.type == 0x0ff5: # UserEditAtom
have_user_edit = True
- elif record.type == 0x1772: # PersisDirectoryAtom
+ elif record.type == 0x1772: # PersistDirectoryAtom
have_persist_dir = True
elif record.type == 0x03e8: # DocumentContainer
have_document_container = True
@@ -181,13 +185,12 @@ def is_ppt(filename):
return True
else: # ignore other streams/storages since they are optional
continue
- except FileIsEncryptedError:
- assert ppt_file is not None, \
- 'Encryption error should not be raised from just opening OLE file.'
- # just rely on stream names, copied from oleid
- return ppt_file.exists('PowerPoint Document')
- except Exception:
- pass
+ except Exception as exc:
+ logging.debug('Ignoring exception in is_ppt, assume is not ppt',
+ exc_info=True)
+ finally:
+ if ppt_file is not None:
+ ppt_file.close()
return False
diff --git a/oletools/pyxswf.py b/oletools/pyxswf.py
index 27f1254..63861db 100644
--- a/oletools/pyxswf.py
+++ b/oletools/pyxswf.py
@@ -25,7 +25,7 @@ http://www.decalage.info/python/oletools
#=== LICENSE =================================================================
-# pyxswf is copyright (c) 2012-2016, Philippe Lagadec (http://www.decalage.info)
+# pyxswf is copyright (c) 2012-2019, Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
@@ -59,7 +59,7 @@ http://www.decalage.info/python/oletools
# 2016-11-01 PL: - replaced StringIO by BytesIO for Python 3
# 2018-09-11 v0.54 PL: - olefile is now a dependency
-__version__ = '0.54dev1'
+__version__ = '0.54'
#------------------------------------------------------------------------------
# TODO:
diff --git a/oletools/record_base.py b/oletools/record_base.py
index 49b3cb5..db96a63 100644
--- a/oletools/record_base.py
+++ b/oletools/record_base.py
@@ -8,7 +8,10 @@ This is the case for xls and ppt, so classes are bases for xls_parser.py and
ppt_record_parser.py .
"""
-# === LICENSE =================================================================
+# === LICENSE ==================================================================
+
+# record_base is copyright (c) 2014-2019 Philippe Lagadec (http://www.decalage.info)
+# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
@@ -37,8 +40,10 @@ from __future__ import print_function
# CHANGELOG:
# 2017-11-30 v0.01 CH: - first version based on xls_parser
# 2018-09-11 v0.54 PL: - olefile is now a dependency
+# 2019-01-30 PL: - fixed import to avoid mixing installed oletools
+# and dev version
-__version__ = '0.54dev1'
+__version__ = '0.54'
# -----------------------------------------------------------------------------
# TODO:
@@ -63,16 +68,12 @@ import logging
import olefile
-try:
- from oletools.common.errors import FileIsEncryptedError
-except ImportError:
- # little hack to allow absolute imports even if oletools is not installed.
- PARENT_DIR = os.path.normpath(os.path.dirname(os.path.dirname(
- os.path.abspath(__file__))))
- if PARENT_DIR not in sys.path:
- sys.path.insert(0, PARENT_DIR)
- del PARENT_DIR
- from oletools.common.errors import FileIsEncryptedError
+# little hack to allow absolute imports even if oletools is not installed.
+PARENT_DIR = os.path.normpath(os.path.dirname(os.path.dirname(
+ os.path.abspath(__file__))))
+if PARENT_DIR not in sys.path:
+ sys.path.insert(0, PARENT_DIR)
+del PARENT_DIR
from oletools import oleid
@@ -125,10 +126,9 @@ class OleRecordFile(olefile.OleFileIO):
"""
def open(self, filename, *args, **kwargs):
- """Call OleFileIO.open, raise error if is encrypted."""
+ """Call OleFileIO.open."""
#super(OleRecordFile, self).open(filename, *args, **kwargs)
OleFileIO.open(self, filename, *args, **kwargs)
- self.is_encrypted = oleid.OleID(self).check_encrypted().value
@classmethod
def stream_class_for_name(cls, stream_name):
@@ -161,8 +161,7 @@ class OleRecordFile(olefile.OleFileIO):
stream = clz(self._open(direntry.isectStart, direntry.size),
direntry.size,
None if is_orphan else direntry.name,
- direntry.entry_type,
- self.is_encrypted)
+ direntry.entry_type)
yield stream
stream.close()
@@ -175,14 +174,13 @@ class OleRecordStream(object):
abstract base class
"""
- def __init__(self, stream, size, name, stream_type, is_encrypted=False):
+ def __init__(self, stream, size, name, stream_type):
self.stream = stream
self.size = size
self.name = name
if stream_type not in ENTRY_TYPE2STR:
raise ValueError('Unknown stream type: {0}'.format(stream_type))
self.stream_type = stream_type
- self.is_encrypted = is_encrypted
def read_record_head(self):
""" read first few bytes of record to determine size and type
@@ -211,9 +209,6 @@ class OleRecordStream(object):
Stream must be positioned at start of records (e.g. start of stream).
"""
- if self.is_encrypted:
- raise FileIsEncryptedError()
-
while True:
# unpacking as in olevba._extract_vba
pos = self.stream.tell()
diff --git a/oletools/rtfobj.py b/oletools/rtfobj.py
index 7179045..4f4176b 100644
--- a/oletools/rtfobj.py
+++ b/oletools/rtfobj.py
@@ -17,7 +17,7 @@ http://www.decalage.info/python/oletools
#=== LICENSE =================================================================
-# rtfobj is copyright (c) 2012-2018, Philippe Lagadec (http://www.decalage.info)
+# rtfobj is copyright (c) 2012-2019, Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
@@ -88,8 +88,10 @@ http://www.decalage.info/python/oletools
# 2018-05-31 v0.53.1 PP: - fixed issue #316: whitespace after \bin on Python 3
# 2018-06-22 v0.53.2 PL: - fixed issue #327: added "\pnaiu" & "\pnaiud"
# 2018-09-11 v0.54 PL: - olefile is now a dependency
+# 2019-07-08 v0.55 MM: - added URL carver for CVE-2017-0199 (Equation Editor) PR #460
+# - added SCT to the list of executable file extensions PR #461
-__version__ = '0.54dev1'
+__version__ = '0.55.dev3'
# ------------------------------------------------------------------------------
# TODO:
@@ -103,7 +105,7 @@ __version__ = '0.54dev1'
# === IMPORTS =================================================================
-import re, os, sys, binascii, logging, optparse
+import re, os, sys, binascii, logging, optparse, hashlib
import os.path
from time import time
@@ -268,7 +270,7 @@ re_delim_hexblock = re.compile(DELIMITER + PATTERN)
# TODO: use a frozenset instead of a regex?
re_executable_extensions = re.compile(
- r"(?i)\.(EXE|COM|PIF|GADGET|MSI|MSP|MSC|VBS|VBE|VB|JSE|JS|WSF|WSC|WSH|WS|BAT|CMD|DLL|SCR|HTA|CPL|CLASS|JAR|PS1XML|PS1|PS2XML|PS2|PSC1|PSC2|SCF|LNK|INF|REG)\b")
+ r"(?i)\.(BAT|CLASS|CMD|CPL|DLL|EXECOM|GADGET|HTA|INF|JAR|JS|JSE|LNK|MSC|MSI|MSP|PIF|PS1|PS1XML|PS2|PS2XML|PSC1|PSC2|REG|SCF|SCR|SCT|VB|VBE|VBS|WS|WSC|WSF|WSH)\b")
# Destination Control Words, according to MS RTF Specifications v1.9.1:
DESTINATION_CONTROL_WORDS = frozenset((
@@ -678,6 +680,7 @@ class RtfObjParser(RtfParser):
rtfobj.hexdata = hexdata
object_data = binascii.unhexlify(hexdata)
rtfobj.rawdata = object_data
+ rtfobj.rawdata_md5 = hashlib.md5(object_data).hexdigest()
# TODO: check if all hex data is extracted properly
obj = oleobj.OleObject()
@@ -687,6 +690,7 @@ class RtfObjParser(RtfParser):
rtfobj.class_name = obj.class_name
rtfobj.oledata_size = obj.data_size
rtfobj.oledata = obj.data
+ rtfobj.oledata_md5 = hashlib.md5(obj.data).hexdigest()
rtfobj.is_ole = True
if obj.class_name.lower() == b'package':
opkg = oleobj.OleNativeStream(bindata=obj.data,
@@ -695,6 +699,7 @@ class RtfObjParser(RtfParser):
rtfobj.src_path = opkg.src_path
rtfobj.temp_path = opkg.temp_path
rtfobj.olepkgdata = opkg.data
+ rtfobj.olepkgdata_md5 = hashlib.md5(opkg.data).hexdigest()
rtfobj.is_package = True
else:
if olefile.isOleFile(obj.data):
@@ -878,15 +883,23 @@ def process_file(container, filename, data, output_dir=None, save_object=False):
ole_column += '\nFilename: %r' % rtfobj.filename
ole_column += '\nSource path: %r' % rtfobj.src_path
ole_column += '\nTemp path = %r' % rtfobj.temp_path
+ ole_column += '\nMD5 = %r' % rtfobj.olepkgdata_md5
ole_color = 'yellow'
# check if the file extension is executable:
- _, ext = os.path.splitext(rtfobj.filename)
- log.debug('File extension: %r' % ext)
- if re_executable_extensions.match(ext):
+
+ _, temp_ext = os.path.splitext(rtfobj.temp_path)
+ log.debug('Temp path extension: %r' % temp_ext)
+ _, file_ext = os.path.splitext(rtfobj.filename)
+ log.debug('File extension: %r' % file_ext)
+
+ if temp_ext != file_ext:
+ ole_column += "\nMODIFIED FILE EXTENSION"
+
+ if re_executable_extensions.match(temp_ext) or re_executable_extensions.match(file_ext):
ole_color = 'red'
ole_column += '\nEXECUTABLE FILE'
- # else:
- # pkg_column = 'Not an OLE Package'
+ else:
+ ole_column += '\nMD5 = %r' % rtfobj.oledata_md5
if rtfobj.clsid is not None:
ole_column += '\nCLSID: %s' % rtfobj.clsid
ole_column += '\n%s' % rtfobj.clsid_desc
@@ -896,7 +909,28 @@ def process_file(container, filename, data, output_dir=None, save_object=False):
# http://www.kb.cert.org/vuls/id/921560
if rtfobj.class_name == b'OLE2Link':
ole_color = 'red'
- ole_column += '\nPossibly an exploit for the OLE2Link vulnerability (VU#921560, CVE-2017-0199)'
+ ole_column += '\nPossibly an exploit for the OLE2Link vulnerability (VU#921560, CVE-2017-0199)\n'
+ # https://bitbucket.org/snippets/Alexander_Hanel/7Adpp
+ found_list = re.findall(r'[a-fA-F0-9\x0D\x0A]{128,}',data)
+ urls = []
+ for item in found_list:
+ try:
+ temp = item.replace("\x0D\x0A","").decode("hex")
+ except:
+ continue
+ pat = re.compile(r'(?:[\x20-\x7E][\x00]){3,}')
+ words = [w.decode('utf-16le') for w in pat.findall(temp)]
+ for w in words:
+ if "http" in w:
+ urls.append(w)
+ urls = sorted(set(urls))
+ if urls:
+ ole_column += 'URL extracted: ' + ', '.join(urls)
+ # Detect Equation Editor exploit
+ # https://www.kb.cert.org/vuls/id/421280/
+ elif rtfobj.class_name.lower() == b'equation.3':
+ ole_color = 'red'
+ ole_column += '\nPossibly an exploit for the Equation Editor vulnerability (VU#421280, CVE-2017-11882)'
else:
ole_column = 'Not a well-formed OLE object'
tstream.write_row((
@@ -930,6 +964,7 @@ def process_file(container, filename, data, output_dir=None, save_object=False):
else:
fname = '%s_object_%08X.noname' % (fname_prefix, rtfobj.start)
print(' saving to file %s' % fname)
+ print(' md5 %s' % rtfobj.olepkgdata_md5)
open(fname, 'wb').write(rtfobj.olepkgdata)
# When format_id=TYPE_LINKED, oledata_size=None
elif rtfobj.is_ole and rtfobj.oledata_size is not None:
@@ -947,11 +982,13 @@ def process_file(container, filename, data, output_dir=None, save_object=False):
ext = 'bin'
fname = '%s_object_%08X.%s' % (fname_prefix, rtfobj.start, ext)
print(' saving to file %s' % fname)
+ print(' md5 %s' % rtfobj.oledata_md5)
open(fname, 'wb').write(rtfobj.oledata)
else:
print('Saving raw data in object #%d:' % i)
fname = '%s_object_%08X.raw' % (fname_prefix, rtfobj.start)
print(' saving object to file %s' % fname)
+ print(' md5 %s' % rtfobj.rawdata_md5)
open(fname, 'wb').write(rtfobj.rawdata)
@@ -1035,4 +1072,3 @@ if __name__ == '__main__':
main()
# This code was developed while listening to The Mary Onettes "Lost"
-
diff --git a/oletools/thirdparty/oledump/__init__.py b/oletools/thirdparty/oledump/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/oletools/thirdparty/oledump/__init__.py
diff --git a/oletools/thirdparty/oledump/plugin_biff.py b/oletools/thirdparty/oledump/plugin_biff.py
new file mode 100644
index 0000000..e32c541
--- /dev/null
+++ b/oletools/thirdparty/oledump/plugin_biff.py
@@ -0,0 +1,1112 @@
+#!/usr/bin/env python
+
+__description__ = 'BIFF plugin for oledump.py'
+__author__ = 'Didier Stevens'
+__version__ = '0.0.5'
+__date__ = '2019/03/06'
+
+# Slightly modified version by Philippe Lagadec to be imported into olevba
+
+"""
+
+Source code put in public domain by Didier Stevens, no Copyright
+https://DidierStevens.com
+Use at your own risk
+
+History:
+ 2014/11/15: start
+ 2014/11/21: changed interface: added options; added options -a (asciidump) and -s (strings)
+ 2017/12/10: 0.0.2 added optparse & option -o
+ 2017/12/12: added option -f
+ 2017/12/13: added 0x support for option -f
+ 2018/10/24: 0.0.3 started coding Excel 4.0 macro support
+ 2018/10/25: continue
+ 2018/10/26: continue
+ 2019/01/05: 0.0.4 added option -x
+ 2019/03/06: 0.0.5 enhanced parsing of formula expressions
+
+Todo:
+"""
+
+import struct
+import re
+import optparse
+import binascii
+import sys
+
+# from olevba:
+
+if sys.version_info[0] <= 2:
+ # Python 2.x
+ PYTHON2 = True
+else:
+ # Python 3.x+
+ PYTHON2 = False
+
+def unicode2str(unicode_string):
+ """
+ convert a unicode string to a native str:
+ - on Python 3, it returns the same string
+ - on Python 2, the string is encoded with UTF-8 to a bytes str
+ :param unicode_string: unicode string to be converted
+ :return: the string converted to str
+ :rtype: str
+ """
+ if PYTHON2:
+ return unicode_string.encode('utf8', errors='replace')
+ else:
+ return unicode_string
+
+
+def bytes2str(bytes_string, encoding='utf8'):
+ """
+ convert a bytes string to a native str:
+ - on Python 2, it returns the same string (bytes=str)
+ - on Python 3, the string is decoded using the provided encoding
+ (UTF-8 by default) to a unicode str
+ :param bytes_string: bytes string to be converted
+ :param encoding: codec to be used for decoding
+ :return: the string converted to str
+ :rtype: str
+ """
+ if PYTHON2:
+ return bytes_string
+ else:
+ return bytes_string.decode(encoding, errors='replace')
+
+
+dTokens = {
+0x01: 'ptgExp',
+0x02: 'ptgTbl',
+0x03: 'ptgAdd',
+0x04: 'ptgSub',
+0x05: 'ptgMul',
+0x06: 'ptgDiv',
+0x07: 'ptgPower',
+0x08: 'ptgConcat',
+0x09: 'ptgLT',
+0x0A: 'ptgLE',
+0x0B: 'ptgEQ',
+0x0C: 'ptgGE',
+0x0D: 'ptgGT',
+0x0E: 'ptgNE',
+0x0F: 'ptgIsect',
+0x10: 'ptgUnion',
+0x11: 'ptgRange',
+0x12: 'ptgUplus',
+0x13: 'ptgUminus',
+0x14: 'ptgPercent',
+0x15: 'ptgParen',
+0x16: 'ptgMissArg',
+0x17: 'ptgStr',
+0x19: 'ptgAttr',
+0x1A: 'ptgSheet',
+0x1B: 'ptgEndSheet',
+0x1C: 'ptgErr',
+0x1D: 'ptgBool',
+0x1E: 'ptgInt',
+0x1F: 'ptgNum',
+0x20: 'ptgArray',
+0x21: 'ptgFunc',
+0x22: 'ptgFuncVar',
+0x23: 'ptgName',
+0x24: 'ptgRef',
+0x25: 'ptgArea',
+0x26: 'ptgMemArea',
+0x27: 'ptgMemErr',
+0x28: 'ptgMemNoMem',
+0x29: 'ptgMemFunc',
+0x2A: 'ptgRefErr',
+0x2B: 'ptgAreaErr',
+0x2C: 'ptgRefN',
+0x2D: 'ptgAreaN',
+0x2E: 'ptgMemAreaN',
+0x2F: 'ptgMemNoMemN',
+0x39: 'ptgNameX',
+0x3A: 'ptgRef3d',
+0x3B: 'ptgArea3d',
+0x3C: 'ptgRefErr3d',
+0x3D: 'ptgAreaErr3d',
+0x40: 'ptgArrayV',
+0x41: 'ptgFuncV',
+0x42: 'ptgFuncVarV',
+0x43: 'ptgNameV',
+0x44: 'ptgRefV',
+0x45: 'ptgAreaV',
+0x46: 'ptgMemAreaV',
+0x47: 'ptgMemErrV',
+0x48: 'ptgMemNoMemV',
+0x49: 'ptgMemFuncV',
+0x4A: 'ptgRefErrV',
+0x4B: 'ptgAreaErrV',
+0x4C: 'ptgRefNV',
+0x4D: 'ptgAreaNV',
+0x4E: 'ptgMemAreaNV',
+0x4F: 'ptgMemNoMemNV',
+0x58: 'ptgFuncCEV',
+0x59: 'ptgNameXV',
+0x5A: 'ptgRef3dV',
+0x5B: 'ptgArea3dV',
+0x5C: 'ptgRefErr3dV',
+0x5D: 'ptgAreaErr3dV',
+0x60: 'ptgArrayA',
+0x61: 'ptgFuncA',
+0x62: 'ptgFuncVarA',
+0x63: 'ptgNameA',
+0x64: 'ptgRefA',
+0x65: 'ptgAreaA',
+0x66: 'ptgMemAreaA',
+0x67: 'ptgMemErrA',
+0x68: 'ptgMemNoMemA',
+0x69: 'ptgMemFuncA',
+0x6A: 'ptgRefErrA',
+0x6B: 'ptgAreaErrA',
+0x6C: 'ptgRefNA',
+0x6D: 'ptgAreaNA',
+0x6E: 'ptgMemAreaNA',
+0x6F: 'ptgMemNoMemNA',
+0x78: 'ptgFuncCEA',
+0x79: 'ptgNameXA',
+0x7A: 'ptgRef3dA',
+0x7B: 'ptgArea3dA',
+0x7C: 'ptgRefErr3dA',
+0x7D: 'ptgAreaErr3dA',
+}
+
+#https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/00b5dd7d-51ca-4938-b7b7-483fe0e5933b
+dFunctions = {
+0x0000: 'COUNT',
+0x0001: 'IF',
+0x0002: 'ISNA',
+0x0003: 'ISERROR',
+0x0004: 'SUM',
+0x0005: 'AVERAGE',
+0x0006: 'MIN',
+0x0007: 'MAX',
+0x0008: 'ROW',
+0x0009: 'COLUMN',
+0x000A: 'NA',
+0x000B: 'NPV',
+0x000C: 'STDEV',
+0x000D: 'DOLLAR',
+0x000E: 'FIXED',
+0x000F: 'SIN',
+0x0010: 'COS',
+0x0011: 'TAN',
+0x0012: 'ATAN',
+0x0013: 'PI',
+0x0014: 'SQRT',
+0x0015: 'EXP',
+0x0016: 'LN',
+0x0017: 'LOG10',
+0x0018: 'ABS',
+0x0019: 'INT',
+0x001A: 'SIGN',
+0x001B: 'ROUND',
+0x001C: 'LOOKUP',
+0x001D: 'INDEX',
+0x001E: 'REPT',
+0x001F: 'MID',
+0x0020: 'LEN',
+0x0021: 'VALUE',
+0x0022: 'TRUE',
+0x0023: 'FALSE',
+0x0024: 'AND',
+0x0025: 'OR',
+0x0026: 'NOT',
+0x0027: 'MOD',
+0x0028: 'DCOUNT',
+0x0029: 'DSUM',
+0x002A: 'DAVERAGE',
+0x002B: 'DMIN',
+0x002C: 'DMAX',
+0x002D: 'DSTDEV',
+0x002E: 'VAR',
+0x002F: 'DVAR',
+0x0030: 'TEXT',
+0x0031: 'LINEST',
+0x0032: 'TREND',
+0x0033: 'LOGEST',
+0x0034: 'GROWTH',
+0x0035: 'GOTO',
+0x0036: 'HALT',
+0x0037: 'RETURN',
+0x0038: 'PV',
+0x0039: 'FV',
+0x003A: 'NPER',
+0x003B: 'PMT',
+0x003C: 'RATE',
+0x003D: 'MIRR',
+0x003E: 'IRR',
+0x003F: 'RAND',
+0x0040: 'MATCH',
+0x0041: 'DATE',
+0x0042: 'TIME',
+0x0043: 'DAY',
+0x0044: 'MONTH',
+0x0045: 'YEAR',
+0x0046: 'WEEKDAY',
+0x0047: 'HOUR',
+0x0048: 'MINUTE',
+0x0049: 'SECOND',
+0x004A: 'NOW',
+0x004B: 'AREAS',
+0x004C: 'ROWS',
+0x004D: 'COLUMNS',
+0x004E: 'OFFSET',
+0x004F: 'ABSREF',
+0x0050: 'RELREF',
+0x0051: 'ARGUMENT',
+0x0052: 'SEARCH',
+0x0053: 'TRANSPOSE',
+0x0054: 'ERROR',
+0x0055: 'STEP',
+0x0056: 'TYPE',
+0x0057: 'ECHO',
+0x0058: 'SET.NAME',
+0x0059: 'CALLER',
+0x005A: 'DEREF',
+0x005B: 'WINDOWS',
+0x005C: 'SERIES',
+0x005D: 'DOCUMENTS',
+0x005E: 'ACTIVE.CELL',
+0x005F: 'SELECTION',
+0x0060: 'RESULT',
+0x0061: 'ATAN2',
+0x0062: 'ASIN',
+0x0063: 'ACOS',
+0x0064: 'CHOOSE',
+0x0065: 'HLOOKUP',
+0x0066: 'VLOOKUP',
+0x0067: 'LINKS',
+0x0068: 'INPUT',
+0x0069: 'ISREF',
+0x006A: 'GET.FORMULA',
+0x006B: 'GET.NAME',
+0x006C: 'SET.VALUE',
+0x006D: 'LOG',
+0x006E: 'EXEC',
+0x006F: 'CHAR',
+0x0070: 'LOWER',
+0x0071: 'UPPER',
+0x0072: 'PROPER',
+0x0073: 'LEFT',
+0x0074: 'RIGHT',
+0x0075: 'EXACT',
+0x0076: 'TRIM',
+0x0077: 'REPLACE',
+0x0078: 'SUBSTITUTE',
+0x0079: 'CODE',
+0x007A: 'NAMES',
+0x007B: 'DIRECTORY',
+0x007C: 'FIND',
+0x007D: 'CELL',
+0x007E: 'ISERR',
+0x007F: 'ISTEXT',
+0x0080: 'ISNUMBER',
+0x0081: 'ISBLANK',
+0x0082: 'T',
+0x0083: 'N',
+0x0084: 'FOPEN',
+0x0085: 'FCLOSE',
+0x0086: 'FSIZE',
+0x0087: 'FREADLN',
+0x0088: 'FREAD',
+0x0089: 'FWRITELN',
+0x008A: 'FWRITE',
+0x008B: 'FPOS',
+0x008C: 'DATEVALUE',
+0x008D: 'TIMEVALUE',
+0x008E: 'SLN',
+0x008F: 'SYD',
+0x0090: 'DDB',
+0x0091: 'GET.DEF',
+0x0092: 'REFTEXT',
+0x0093: 'TEXTREF',
+0x0094: 'INDIRECT',
+0x0095: 'REGISTER',
+0x0096: 'CALL',
+0x0097: 'ADD.BAR',
+0x0098: 'ADD.MENU',
+0x0099: 'ADD.COMMAND',
+0x009A: 'ENABLE.COMMAND',
+0x009B: 'CHECK.COMMAND',
+0x009C: 'RENAME.COMMAND',
+0x009D: 'SHOW.BAR',
+0x009E: 'DELETE.MENU',
+0x009F: 'DELETE.COMMAND',
+0x00A0: 'GET.CHART.ITEM',
+0x00A1: 'DIALOG.BOX',
+0x00A2: 'CLEAN',
+0x00A3: 'MDETERM',
+0x00A4: 'MINVERSE',
+0x00A5: 'MMULT',
+0x00A6: 'FILES',
+0x00A7: 'IPMT',
+0x00A8: 'PPMT',
+0x00A9: 'COUNTA',
+0x00AA: 'CANCEL.KEY',
+0x00AB: 'FOR',
+0x00AC: 'WHILE',
+0x00AD: 'BREAK',
+0x00AE: 'NEXT',
+0x00AF: 'INITIATE',
+0x00B0: 'REQUEST',
+0x00B1: 'POKE',
+0x00B2: 'EXECUTE',
+0x00B3: 'TERMINATE',
+0x00B4: 'RESTART',
+0x00B5: 'HELP',
+0x00B6: 'GET.BAR',
+0x00B7: 'PRODUCT',
+0x00B8: 'FACT',
+0x00B9: 'GET.CELL',
+0x00BA: 'GET.WORKSPACE',
+0x00BB: 'GET.WINDOW',
+0x00BC: 'GET.DOCUMENT',
+0x00BD: 'DPRODUCT',
+0x00BE: 'ISNONTEXT',
+0x00BF: 'GET.NOTE',
+0x00C0: 'NOTE',
+0x00C1: 'STDEVP',
+0x00C2: 'VARP',
+0x00C3: 'DSTDEVP',
+0x00C4: 'DVARP',
+0x00C5: 'TRUNC',
+0x00C6: 'ISLOGICAL',
+0x00C7: 'DCOUNTA',
+0x00C8: 'DELETE.BAR',
+0x00C9: 'UNREGISTER',
+0x00CC: 'USDOLLAR',
+0x00CD: 'FINDB',
+0x00CE: 'SEARCHB',
+0x00CF: 'REPLACEB',
+0x00D0: 'LEFTB',
+0x00D1: 'RIGHTB',
+0x00D2: 'MIDB',
+0x00D3: 'LENB',
+0x00D4: 'ROUNDUP',
+0x00D5: 'ROUNDDOWN',
+0x00D6: 'ASC',
+0x00D7: 'DBCS',
+0x00D8: 'RANK',
+0x00DB: 'ADDRESS',
+0x00DC: 'DAYS360',
+0x00DD: 'TODAY',
+0x00DE: 'VDB',
+0x00DF: 'ELSE',
+0x00E0: 'ELSE.IF',
+0x00E1: 'END.IF',
+0x00E2: 'FOR.CELL',
+0x00E3: 'MEDIAN',
+0x00E4: 'SUMPRODUCT',
+0x00E5: 'SINH',
+0x00E6: 'COSH',
+0x00E7: 'TANH',
+0x00E8: 'ASINH',
+0x00E9: 'ACOSH',
+0x00EA: 'ATANH',
+0x00EB: 'DGET',
+0x00EC: 'CREATE.OBJECT',
+0x00ED: 'VOLATILE',
+0x00EE: 'LAST.ERROR',
+0x00EF: 'CUSTOM.UNDO',
+0x00F0: 'CUSTOM.REPEAT',
+0x00F1: 'FORMULA.CONVERT',
+0x00F2: 'GET.LINK.INFO',
+0x00F3: 'TEXT.BOX',
+0x00F4: 'INFO',
+0x00F5: 'GROUP',
+0x00F6: 'GET.OBJECT',
+0x00F7: 'DB',
+0x00F8: 'PAUSE',
+0x00FB: 'RESUME',
+0x00FC: 'FREQUENCY',
+0x00FD: 'ADD.TOOLBAR',
+0x00FE: 'DELETE.TOOLBAR',
+0x00FF: 'User Defined Function',
+0x0100: 'RESET.TOOLBAR',
+0x0101: 'EVALUATE',
+0x0102: 'GET.TOOLBAR',
+0x0103: 'GET.TOOL',
+0x0104: 'SPELLING.CHECK',
+0x0105: 'ERROR.TYPE',
+0x0106: 'APP.TITLE',
+0x0107: 'WINDOW.TITLE',
+0x0108: 'SAVE.TOOLBAR',
+0x0109: 'ENABLE.TOOL',
+0x010A: 'PRESS.TOOL',
+0x010B: 'REGISTER.ID',
+0x010C: 'GET.WORKBOOK',
+0x010D: 'AVEDEV',
+0x010E: 'BETADIST',
+0x010F: 'GAMMALN',
+0x0110: 'BETAINV',
+0x0111: 'BINOMDIST',
+0x0112: 'CHIDIST',
+0x0113: 'CHIINV',
+0x0114: 'COMBIN',
+0x0115: 'CONFIDENCE',
+0x0116: 'CRITBINOM',
+0x0117: 'EVEN',
+0x0118: 'EXPONDIST',
+0x0119: 'FDIST',
+0x011A: 'FINV',
+0x011B: 'FISHER',
+0x011C: 'FISHERINV',
+0x011D: 'FLOOR',
+0x011E: 'GAMMADIST',
+0x011F: 'GAMMAINV',
+0x0120: 'CEILING',
+0x0121: 'HYPGEOMDIST',
+0x0122: 'LOGNORMDIST',
+0x0123: 'LOGINV',
+0x0124: 'NEGBINOMDIST',
+0x0125: 'NORMDIST',
+0x0126: 'NORMSDIST',
+0x0127: 'NORMINV',
+0x0128: 'NORMSINV',
+0x0129: 'STANDARDIZE',
+0x012A: 'ODD',
+0x012B: 'PERMUT',
+0x012C: 'POISSON',
+0x012D: 'TDIST',
+0x012E: 'WEIBULL',
+0x012F: 'SUMXMY2',
+0x0130: 'SUMX2MY2',
+0x0131: 'SUMX2PY2',
+0x0132: 'CHITEST',
+0x0133: 'CORREL',
+0x0134: 'COVAR',
+0x0135: 'FORECAST',
+0x0136: 'FTEST',
+0x0137: 'INTERCEPT',
+0x0138: 'PEARSON',
+0x0139: 'RSQ',
+0x013A: 'STEYX',
+0x013B: 'SLOPE',
+0x013C: 'TTEST',
+0x013D: 'PROB',
+0x013E: 'DEVSQ',
+0x013F: 'GEOMEAN',
+0x0140: 'HARMEAN',
+0x0141: 'SUMSQ',
+0x0142: 'KURT',
+0x0143: 'SKEW',
+0x0144: 'ZTEST',
+0x0145: 'LARGE',
+0x0146: 'SMALL',
+0x0147: 'QUARTILE',
+0x0148: 'PERCENTILE',
+0x0149: 'PERCENTRANK',
+0x014A: 'MODE',
+0x014B: 'TRIMMEAN',
+0x014C: 'TINV',
+0x014E: 'MOVIE.COMMAND',
+0x014F: 'GET.MOVIE',
+0x0150: 'CONCATENATE',
+0x0151: 'POWER',
+0x0152: 'PIVOT.ADD.DATA',
+0x0153: 'GET.PIVOT.TABLE',
+0x0154: 'GET.PIVOT.FIELD',
+0x0155: 'GET.PIVOT.ITEM',
+0x0156: 'RADIANS',
+0x0157: 'DEGREES',
+0x0158: 'SUBTOTAL',
+0x0159: 'SUMIF',
+0x015A: 'COUNTIF',
+0x015B: 'COUNTBLANK',
+0x015C: 'SCENARIO.GET',
+0x015D: 'OPTIONS.LISTS.GET',
+0x015E: 'ISPMT',
+0x015F: 'DATEDIF',
+0x0160: 'DATESTRING',
+0x0161: 'NUMBERSTRING',
+0x0162: 'ROMAN',
+0x0163: 'OPEN.DIALOG',
+0x0164: 'SAVE.DIALOG',
+0x0165: 'VIEW.GET',
+0x0166: 'GETPIVOTDATA',
+0x0167: 'HYPERLINK',
+0x0168: 'PHONETIC',
+0x0169: 'AVERAGEA',
+0x016A: 'MAXA',
+0x016B: 'MINA',
+0x016C: 'STDEVPA',
+0x016D: 'VARPA',
+0x016E: 'STDEVA',
+0x016F: 'VARA',
+0x0170: 'BAHTTEXT',
+0x0171: 'THAIDAYOFWEEK',
+0x0172: 'THAIDIGIT',
+0x0173: 'THAIMONTHOFYEAR',
+0x0174: 'THAINUMSOUND',
+0x0175: 'THAINUMSTRING',
+0x0176: 'THAISTRINGLENGTH',
+0x0177: 'ISTHAIDIGIT',
+0x0178: 'ROUNDBAHTDOWN',
+0x0179: 'ROUNDBAHTUP',
+0x017A: 'THAIYEAR',
+0x017B: 'RTD',
+
+0x8076: 'ALERT',
+}
+
+dOpcodes = {
+ 0x06: 'FORMULA : Cell Formula',
+ 0x0A: 'EOF : End of File',
+ 0x0C: 'CALCCOUNT : Iteration Count',
+ 0x0D: 'CALCMODE : Calculation Mode',
+ 0x0E: 'PRECISION : Precision',
+ 0x0F: 'REFMODE : Reference Mode',
+ 0x10: 'DELTA : Iteration Increment',
+ 0x11: 'ITERATION : Iteration Mode',
+ 0x12: 'PROTECT : Protection Flag',
+ 0x13: 'PASSWORD : Protection Password',
+ 0x14: 'HEADER : Print Header on Each Page',
+ 0x15: 'FOOTER : Print Footer on Each Page',
+ 0x16: 'EXTERNCOUNT : Number of External References',
+ 0x17: 'EXTERNSHEET : External Reference',
+ 0x18: 'LABEL : Cell Value, String Constant',
+ 0x19: 'WINDOWPROTECT : Windows Are Protected',
+ 0x1A: 'VERTICALPAGEBREAKS : Explicit Column Page Breaks',
+ 0x1B: 'HORIZONTALPAGEBREAKS : Explicit Row Page Breaks',
+ 0x1C: 'NOTE : Comment Associated with a Cell',
+ 0x1D: 'SELECTION : Current Selection',
+ 0x22: '1904 : 1904 Date System',
+ 0x26: 'LEFTMARGIN : Left Margin Measurement',
+ 0x27: 'RIGHTMARGIN : Right Margin Measurement',
+ 0x28: 'TOPMARGIN : Top Margin Measurement',
+ 0x29: 'BOTTOMMARGIN : Bottom Margin Measurement',
+ 0x2A: 'PRINTHEADERS : Print Row/Column Labels',
+ 0x2B: 'PRINTGRIDLINES : Print Gridlines Flag',
+ 0x2F: 'FILEPASS : File Is Password-Protected',
+ 0x3C: 'CONTINUE : Continues Long Records',
+ 0x3D: 'WINDOW1 : Window Information',
+ 0x40: 'BACKUP : Save Backup Version of the File',
+ 0x41: 'PANE : Number of Panes and Their Position',
+ 0x42: 'CODENAME : VBE Object Name',
+ 0x42: 'CODEPAGE : Default Code Page',
+ 0x4D: 'PLS : Environment-Specific Print Record',
+ 0x50: 'DCON : Data Consolidation Information',
+ 0x51: 'DCONREF : Data Consolidation References',
+ 0x52: 'DCONNAME : Data Consolidation Named References',
+ 0x55: 'DEFCOLWIDTH : Default Width for Columns',
+ 0x59: 'XCT : CRN Record Count',
+ 0x5A: 'CRN : Nonresident Operands',
+ 0x5B: 'FILESHARING : File-Sharing Information',
+ 0x5C: 'WRITEACCESS : Write Access User Name',
+ 0x5D: 'OBJ : Describes a Graphic Object',
+ 0x5E: 'UNCALCED : Recalculation Status',
+ 0x5F: 'SAVERECALC : Recalculate Before Save',
+ 0x60: 'TEMPLATE : Workbook Is a Template',
+ 0x63: 'OBJPROTECT : Objects Are Protected',
+ 0x7D: 'COLINFO : Column Formatting Information',
+ 0x7E: 'RK : Cell Value, RK Number',
+ 0x7F: 'IMDATA : Image Data',
+ 0x80: 'GUTS : Size of Row and Column Gutters',
+ 0x81: 'WSBOOL : Additional Workspace Information',
+ 0x82: 'GRIDSET : State Change of Gridlines Option',
+ 0x83: 'HCENTER : Center Between Horizontal Margins',
+ 0x84: 'VCENTER : Center Between Vertical Margins',
+ 0x85: 'BOUNDSHEET : Sheet Information',
+ 0x86: 'WRITEPROT : Workbook Is Write-Protected',
+ 0x87: 'ADDIN : Workbook Is an Add-in Macro',
+ 0x88: 'EDG : Edition Globals',
+ 0x89: 'PUB : Publisher',
+ 0x8C: 'COUNTRY : Default Country and WIN.INI Country',
+ 0x8D: 'HIDEOBJ : Object Display Options',
+ 0x90: 'SORT : Sorting Options',
+ 0x91: 'SUB : Subscriber',
+ 0x92: 'PALETTE : Color Palette Definition',
+ 0x94: 'LHRECORD : .WK? File Conversion Information',
+ 0x95: 'LHNGRAPH : Named Graph Information',
+ 0x96: 'SOUND : Sound Note',
+ 0x98: 'LPR : Sheet Was Printed Using LINE.PRINT(',
+ 0x99: 'STANDARDWIDTH : Standard Column Width',
+ 0x9A: 'FNGROUPNAME : Function Group Name',
+ 0x9B: 'FILTERMODE : Sheet Contains Filtered List',
+ 0x9C: 'FNGROUPCOUNT : Built-in Function Group Count',
+ 0x9D: 'AUTOFILTERINFO : Drop-Down Arrow Count',
+ 0x9E: 'AUTOFILTER : AutoFilter Data',
+ 0xA0: 'SCL : Window Zoom Magnification',
+ 0xA1: 'SETUP : Page Setup',
+ 0xA9: 'COORDLIST : Polygon Object Vertex Coordinates',
+ 0xAB: 'GCW : Global Column-Width Flags',
+ 0xAE: 'SCENMAN : Scenario Output Data',
+ 0xAF: 'SCENARIO : Scenario Data',
+ 0xB0: 'SXVIEW : View Definition',
+ 0xB1: 'SXVD : View Fields',
+ 0xB2: 'SXVI : View Item',
+ 0xB4: 'SXIVD : Row/Column Field IDs',
+ 0xB5: 'SXLI : Line Item Array',
+ 0xB6: 'SXPI : Page Item',
+ 0xB8: 'DOCROUTE : Routing Slip Information',
+ 0xB9: 'RECIPNAME : Recipient Name',
+ 0xBC: 'SHRFMLA : Shared Formula',
+ 0xBD: 'MULRK : Multiple RK Cells',
+ 0xBE: 'MULBLANK : Multiple Blank Cells',
+ 0xC1: 'MMS : ADDMENU / DELMENU Record Group Count',
+ 0xC2: 'ADDMENU : Menu Addition',
+ 0xC3: 'DELMENU : Menu Deletion',
+ 0xC5: 'SXDI : Data Item',
+ 0xC6: 'SXDB : PivotTable Cache Data',
+ 0xCD: 'SXSTRING : String',
+ 0xD0: 'SXTBL : Multiple Consolidation Source Info',
+ 0xD1: 'SXTBRGIITM : Page Item Name Count',
+ 0xD2: 'SXTBPG : Page Item Indexes',
+ 0xD3: 'OBPROJ : Visual Basic Project',
+ 0xD5: 'SXIDSTM : Stream ID',
+ 0xD6: 'RSTRING : Cell with Character Formatting',
+ 0xD7: 'DBCELL : Stream Offsets',
+ 0xDA: 'BOOKBOOL : Workbook Option Flag',
+ 0xDC: 'PARAMQRY : Query Parameters',
+ 0xDC: 'SXEXT : External Source Information',
+ 0xDD: 'SCENPROTECT : Scenario Protection',
+ 0xDE: 'OLESIZE : Size of OLE Object',
+ 0xDF: 'UDDESC : Description String for Chart Autoformat',
+ 0xE0: 'XF : Extended Format',
+ 0xE1: 'INTERFACEHDR : Beginning of User Interface Records',
+ 0xE2: 'INTERFACEEND : End of User Interface Records',
+ 0xE3: 'SXVS : View Source',
+ 0xE5: 'MERGECELLS : Merged Cells',
+ 0xEA: 'TABIDCONF : Sheet Tab ID of Conflict History',
+ 0xEB: 'MSODRAWINGGROUP : Microsoft Office Drawing Group',
+ 0xEC: 'MSODRAWING : Microsoft Office Drawing',
+ 0xED: 'MSODRAWINGSELECTION : Microsoft Office Drawing Selection',
+ 0xF0: 'SXRULE : PivotTable Rule Data',
+ 0xF1: 'SXEX : PivotTable View Extended Information',
+ 0xF2: 'SXFILT : PivotTable Rule Filter',
+ 0xF4: 'SXDXF : Pivot Table Formatting',
+ 0xF5: 'SXITM : Pivot Table Item Indexes',
+ 0xF6: 'SXNAME : PivotTable Name',
+ 0xF7: 'SXSELECT : PivotTable Selection Information',
+ 0xF8: 'SXPAIR : PivotTable Name Pair',
+ 0xF9: 'SXFMLA : Pivot Table Parsed Expression',
+ 0xFB: 'SXFORMAT : PivotTable Format Record',
+ 0xFC: 'SST : Shared String Table',
+ 0xFD: 'LABELSST : Cell Value, String Constant/ SST',
+ 0xFF: 'EXTSST : Extended Shared String Table',
+ 0x100: 'SXVDEX : Extended PivotTable View Fields',
+ 0x103: 'SXFORMULA : PivotTable Formula Record',
+ 0x122: 'SXDBEX : PivotTable Cache Data',
+ 0x13D: 'TABID : Sheet Tab Index Array',
+ 0x160: 'USESELFS : Natural Language Formulas Flag',
+ 0x161: 'DSF : Double Stream File',
+ 0x162: 'XL5MODIFY : Flag for DSF',
+ 0x1A5: 'FILESHARING2 : File-Sharing Information for Shared Lists',
+ 0x1A9: 'USERBVIEW : Workbook Custom View Settings',
+ 0x1AA: 'USERSVIEWBEGIN : Custom View Settings',
+ 0x1AB: 'USERSVIEWEND : End of Custom View Records',
+ 0x1AD: 'QSI : External Data Range',
+ 0x1AE: 'SUPBOOK : Supporting Workbook',
+ 0x1AF: 'PROT4REV : Shared Workbook Protection Flag',
+ 0x1B0: 'CONDFMT : Conditional Formatting Range Information',
+ 0x1B1: 'CF : Conditional Formatting Conditions',
+ 0x1B2: 'DVAL : Data Validation Information',
+ 0x1B5: 'DCONBIN : Data Consolidation Information',
+ 0x1B6: 'TXO : Text Object',
+ 0x1B7: 'REFRESHALL : Refresh Flag',
+ 0x1B8: 'HLINK : Hyperlink',
+ 0x1BB: 'SXFDBTYPE : SQL Datatype Identifier',
+ 0x1BC: 'PROT4REVPASS : Shared Workbook Protection Password',
+ 0x1BE: 'DV : Data Validation Criteria',
+ 0x1C0: 'EXCEL9FILE : Excel 9 File',
+ 0x1C1: 'RECALCID : Recalc Information',
+ 0x200: 'DIMENSIONS : Cell Table Size',
+ 0x201: 'BLANK : Cell Value, Blank Cell',
+ 0x203: 'NUMBER : Cell Value, Floating-Point Number',
+ 0x204: 'LABEL : Cell Value, String Constant',
+ 0x205: 'BOOLERR : Cell Value, Boolean or Error',
+ 0x207: 'STRING : String Value of a Formula',
+ 0x208: 'ROW : Describes a Row',
+ 0x20B: 'INDEX : Index Record',
+ 0x218: 'NAME : Defined Name',
+ 0x221: 'ARRAY : Array-Entered Formula',
+ 0x223: 'EXTERNNAME : Externally Referenced Name',
+ 0x225: 'DEFAULTROWHEIGHT : Default Row Height',
+ 0x231: 'FONT : Font Description',
+ 0x236: 'TABLE : Data Table',
+ 0x23E: 'WINDOW2 : Sheet Window Information',
+ 0x293: 'STYLE : Style Information',
+ 0x406: 'FORMULA : Cell Formula',
+ 0x41E: 'FORMAT : Number Format',
+ 0x800: 'HLINKTOOLTIP : Hyperlink Tooltip',
+ 0x801: 'WEBPUB : Web Publish Item',
+ 0x802: 'QSISXTAG : PivotTable and Query Table Extensions',
+ 0x803: 'DBQUERYEXT : Database Query Extensions',
+ 0x804: 'EXTSTRING : FRT String',
+ 0x805: 'TXTQUERY : Text Query Information',
+ 0x806: 'QSIR : Query Table Formatting',
+ 0x807: 'QSIF : Query Table Field Formatting',
+ 0x809: 'BOF : Beginning of File',
+ 0x80A: 'OLEDBCONN : OLE Database Connection',
+ 0x80B: 'WOPT : Web Options',
+ 0x80C: 'SXVIEWEX : Pivot Table OLAP Extensions',
+ 0x80D: 'SXTH : PivotTable OLAP Hierarchy',
+ 0x80E: 'SXPIEX : OLAP Page Item Extensions',
+ 0x80F: 'SXVDTEX : View Dimension OLAP Extensions',
+ 0x810: 'SXVIEWEX9 : Pivot Table Extensions',
+ 0x812: 'CONTINUEFRT : Continued FRT',
+ 0x813: 'REALTIMEDATA : Real-Time Data (RTD)',
+ 0x862: 'SHEETEXT : Extra Sheet Info',
+ 0x863: 'BOOKEXT : Extra Book Info',
+ 0x864: 'SXADDL : Pivot Table Additional Info',
+ 0x865: 'CRASHRECERR : Crash Recovery Error',
+ 0x866: 'HFPicture : Header / Footer Picture',
+ 0x867: 'FEATHEADR : Shared Feature Header',
+ 0x868: 'FEAT : Shared Feature Record',
+ 0x86A: 'DATALABEXT : Chart Data Label Extension',
+ 0x86B: 'DATALABEXTCONTENTS : Chart Data Label Extension Contents',
+ 0x86C: 'CELLWATCH : Cell Watch',
+ 0x86d: 'FEATINFO : Shared Feature Info Record',
+ 0x871: 'FEATHEADR11 : Shared Feature Header 11',
+ 0x872: 'FEAT11 : Shared Feature 11 Record',
+ 0x873: 'FEATINFO11 : Shared Feature Info 11 Record',
+ 0x874: 'DROPDOWNOBJIDS : Drop Down Object',
+ 0x875: 'CONTINUEFRT11 : Continue FRT 11',
+ 0x876: 'DCONN : Data Connection',
+ 0x877: 'LIST12 : Extra Table Data Introduced in Excel 2007',
+ 0x878: 'FEAT12 : Shared Feature 12 Record',
+ 0x879: 'CONDFMT12 : Conditional Formatting Range Information 12',
+ 0x87A: 'CF12 : Conditional Formatting Condition 12',
+ 0x87B: 'CFEX : Conditional Formatting Extension',
+ 0x87C: 'XFCRC : XF Extensions Checksum',
+ 0x87D: 'XFEXT : XF Extension',
+ 0x87E: 'EZFILTER12 : AutoFilter Data Introduced in Excel 2007',
+ 0x87F: 'CONTINUEFRT12 : Continue FRT 12',
+ 0x881: 'SXADDL12 : Additional Workbook Connections Information',
+ 0x884: 'MDTINFO : Information about a Metadata Type',
+ 0x885: 'MDXSTR : MDX Metadata String',
+ 0x886: 'MDXTUPLE : Tuple MDX Metadata',
+ 0x887: 'MDXSET : Set MDX Metadata',
+ 0x888: 'MDXPROP : Member Property MDX Metadata',
+ 0x889: 'MDXKPI : Key Performance Indicator MDX Metadata',
+ 0x88A: 'MDTB : Block of Metadata Records',
+ 0x88B: 'PLV : Page Layout View Settings in Excel 2007',
+ 0x88C: 'COMPAT12 : Compatibility Checker 12',
+ 0x88D: 'DXF : Differential XF',
+ 0x88E: 'TABLESTYLES : Table Styles',
+ 0x88F: 'TABLESTYLE : Table Style',
+ 0x890: 'TABLESTYLEELEMENT : Table Style Element',
+ 0x892: 'STYLEEXT : Named Cell Style Extension',
+ 0x893: 'NAMEPUBLISH : Publish To Excel Server Data for Name',
+ 0x894: 'NAMECMT : Name Comment',
+ 0x895: 'SORTDATA12 : Sort Data 12',
+ 0x896: 'THEME : Theme',
+ 0x897: 'GUIDTYPELIB : VB Project Typelib GUID',
+ 0x898: 'FNGRP12 : Function Group',
+ 0x899: 'NAMEFNGRP12 : Extra Function Group',
+ 0x89A: 'MTRSETTINGS : Multi-Threaded Calculation Settings',
+ 0x89B: 'COMPRESSPICTURES : Automatic Picture Compression Mode',
+ 0x89C: 'HEADERFOOTER : Header Footer',
+ 0x8A3: 'FORCEFULLCALCULATION : Force Full Calculation Settings',
+ 0x8c1: 'LISTOBJ : List Object',
+ 0x8c2: 'LISTFIELD : List Field',
+ 0x8c3: 'LISTDV : List Data Validation',
+ 0x8c4: 'LISTCONDFMT : List Conditional Formatting',
+ 0x8c5: 'LISTCF : List Cell Formatting',
+ 0x8c6: 'FMQRY : Filemaker queries',
+ 0x8c7: 'FMSQRY : File maker queries',
+ 0x8c8: 'PLV : Page Layout View in Mac Excel 11',
+ 0x8c9: 'LNEXT : Extension information for borders in Mac Office 11',
+ 0x8ca: 'MKREXT : Extension information for markers in Mac Office 11'
+}
+
+
+# CIC: Call If Callable
+def CIC(expression):
+ if callable(expression):
+ return expression()
+ else:
+ return expression
+
+
+# IFF: IF Function
+def IFF(expression, valueTrue, valueFalse):
+ if expression:
+ return CIC(valueTrue)
+ else:
+ return CIC(valueFalse)
+
+
+def CombineHexASCII(hexDump, asciiDump, length):
+ if hexDump == '':
+ return ''
+ return hexDump + ' ' + (' ' * (3 * (length - len(asciiDump)))) + asciiDump
+
+def HexASCII(data, length=16):
+ result = []
+ if len(data) > 0:
+ hexDump = ''
+ asciiDump = ''
+ for i, b in enumerate(data):
+ if i % length == 0:
+ if hexDump != '':
+ result.append(CombineHexASCII(hexDump, asciiDump, length))
+ hexDump = '%08X:' % i
+ asciiDump = ''
+ hexDump += ' %02X' % ord(b)
+ asciiDump += IFF(ord(b) >= 32, b, '.')
+ result.append(CombineHexASCII(hexDump, asciiDump, length))
+ return result
+
+def StringsASCII(data):
+ """
+ Extract a list of plain ASCII strings of 4+ chars found in data.
+ :param data: bytearray or bytes
+ :return: list of str (converted to unicode on Python 3)
+ """
+ # list of bytes strings:
+ bytes_strings = re.findall(b'[^\x00-\x08\x0A-\x1F\x7F-\xFF]{4,}', bytes(data))
+ return [bytes2str(bs) for bs in bytes_strings]
+
+def StringsUNICODE(data):
+ """
+ Extract a list of Unicode strings (made of 4+ plain ASCII characters only) found in data.
+ :param data: bytearray or bytes
+ :return: list of str (converted to unicode on Python 3)
+ """
+ # list of bytes strings:
+ # TODO: check if the null byte should be before or after the ascii byte
+ bytes_strings = [foundunicodestring.replace(b'\x00', b'') for foundunicodestring, dummy in re.findall(b'(([^\x00-\x08\x0A-\x1F\x7F-\xFF]\x00){4,})', bytes(data))]
+ return [bytes2str(bs) for bs in bytes_strings]
+
+def Strings(data, encodings='sL'):
+ """
+
+ :param data bytearray: bytearray, data to be scanned for strings
+ :param encodings:
+ :return: dict with key = 's' or 'L', values = list of str
+ """
+ dStrings = {}
+ for encoding in encodings:
+ if encoding == 's':
+ dStrings[encoding] = StringsASCII(data)
+ elif encoding == 'L':
+ dStrings[encoding] = StringsUNICODE(data)
+ return dStrings
+
+def ContainsWord(word, expression):
+ return struct.pack(' 0:
+ ptgid = expression[0] # int
+ expression = expression[1:] # bytearray
+ if ptgid in dTokens:
+ result += dTokens[ptgid] + ' '
+ if ptgid == 0x17: # ptgStr
+ length = expression[0] # int
+ expression = expression[1:]
+ if expression[0] == 0: # probably BIFF8 -> UNICODE (compressed)
+ expression = expression[1:]
+ result += '"%s" ' % bytes2str(expression[:length])
+ expression = expression[length:]
+ elif ptgid == 0x19: # ptgAttr
+ grbit = expression[0] # int
+ expression = expression[1:]
+ if grbit & 0x04:
+ result += 'CHOOSE '
+ break
+ else:
+ expression = expression[2:]
+ elif ptgid == 0x16 or ptgid == 0x0e: # 0x0E: 'ptgNE', 0x16: 'ptgMissArg'
+ pass
+ elif ptgid == 0x1e: # ptgInt
+ result += '%d ' % (expression[0] + expression[1] * 0x100)
+ expression = expression[2:]
+ elif ptgid == 0x41: # ptgFuncV
+ functionid = expression[0] + expression[1] * 0x100
+ result += '%s (0x%04x) ' % (dFunctions.get(functionid, '*UNKNOWN FUNCTION*'), functionid)
+ expression = expression[2:]
+ elif ptgid == 0x22 or ptgid == 0x42: # 0x22: 'ptgFuncVar', 0x42: 'ptgFuncVarV'
+ functionid = expression[1] + expression[2] * 0x100
+ result += 'args %d func %s (0x%04x) ' % (expression[0], dFunctions.get(functionid, '*UNKNOWN FUNCTION*'), functionid)
+ expression = expression[3:]
+ elif ptgid == 0x23: # ptgName
+ result += '%04x ' % (expression[0] + expression[1] * 0x100)
+ # TODO: looks like we're skipping quite a few bytes
+ expression = expression[14:]
+ elif ptgid == 0x1f: # ptgNum
+ result += 'FLOAT '
+ # TODO: looks like we're skipping quite a few bytes
+ expression = expression[8:]
+ elif ptgid == 0x26: # ptgMemArea
+ expression = expression[4:] # skipping 4 bytes
+ expression = expression[expression[0] + expression[1] * 0x100:]
+ result += 'REFERENCE-EXPRESSION '
+ elif ptgid == 0x01: # ptgExp
+ formatcodes = 'HH'
+ formatsize = struct.calcsize(formatcodes)
+ row, column = struct.unpack(formatcodes, expression[0:formatsize])
+ expression = expression[formatsize:]
+ result += 'R%dC%d ' % (row + 1, column + 1)
+ elif ptgid == 0x24 or ptgid == 0x44: # 0x24: 'ptgRef', 0x44: 'ptgRefV'
+ result += '%s ' % ParseLoc(expression)
+ expression = expression[4:]
+ elif ptgid == 0x3A or ptgid == 0x5A: # 0x3A: 'ptgRef3d', 0x5A: 'ptgRef3dV'
+ result += '%s ' % ParseLoc(expression[2:])
+ expression = expression[6:]
+ else:
+ break
+ else:
+ result += '*UNKNOWN TOKEN* '
+ break
+ if len(expression) == 0:
+ return result
+ else:
+ # 0x006E: 'EXEC', 0x0095: 'REGISTER'
+ functions = [dFunctions[functionid] for functionid in [0x6E, 0x95] if ContainsWord(functionid, expression)]
+ if functions != []:
+ message = ' Could contain following functions: ' + ','.join(functions) + ' -'
+ else:
+ message = ''
+ return result + ' *INCOMPLETE FORMULA PARSING*' + message + ' Remaining, unparsed expression: ' + repr(expression)
+
+
+class cBIFF(object): # cPluginParent):
+ macroOnly = False
+ name = 'BIFF plugin'
+
+ def __init__(self, name, stream, options):
+ self.streamname = name
+ self.stream = stream
+ self.options = options
+ self.ran = False
+
+ def Analyze(self):
+ result = []
+ macros4Found = False
+ if self.streamname in [['Workbook'], ['Book']]:
+ self.ran = True
+ # use a bytearray to have Python 2+3 compatibility with the same code (no need for ord())
+ stream = bytearray(self.stream)
+
+ oParser = optparse.OptionParser()
+ oParser.add_option('-s', '--strings', action='store_true', default=False, help='Dump strings')
+ oParser.add_option('-a', '--hexascii', action='store_true', default=False, help='Dump hex ascii')
+ oParser.add_option('-x', '--xlm', action='store_true', default=False, help='Select all records relevant for Excel 4.0 macros')
+ oParser.add_option('-o', '--opcode', type=str, default='', help='Opcode to filter for')
+ oParser.add_option('-f', '--find', type=str, default='', help='Content to search for')
+ (options, args) = oParser.parse_args(self.options.split(' '))
+
+ if options.find.startswith('0x'):
+ options.find = binascii.a2b_hex(options.find[2:])
+
+ while len(stream)>0:
+ formatcodes = 'HH'
+ formatsize = struct.calcsize(formatcodes)
+ # print('formatsize=%d' % formatsize)
+ opcode, length = struct.unpack(formatcodes, stream[0:formatsize])
+ # print('opcode=%d length=%d len(stream)=%d' % (opcode, length, len(stream)))
+ stream = stream[formatsize:]
+ data = stream[:length]
+ stream = stream[length:]
+
+ if opcode in dOpcodes:
+ opcodename = dOpcodes[opcode]
+ else:
+ opcodename = ''
+ line = '%04x %6d %s' % (opcode, length, opcodename)
+ # print(line)
+
+ # FORMULA record
+ if opcode == 0x06 and len(data) >= 21:
+ formatcodes = 'HH'
+ formatsize = struct.calcsize(formatcodes)
+ row, column = struct.unpack(formatcodes, data[0:formatsize])
+ formatcodes = 'H'
+ formatsize = struct.calcsize(formatcodes)
+ length = struct.unpack(formatcodes, data[20:20 + formatsize])[0]
+ expression = data[22:]
+ line += ' - R%dC%d len=%d %s' % (row + 1, column + 1, length, ParseExpression(expression))
+ # print(line)
+
+ # FORMULA record #a# difference BIFF4 and BIFF5+
+ if opcode == 0x18 and len(data) >= 16:
+ if data[0] & 0x20:
+ dBuildInNames = {1: 'Auto_Open', 2: 'Auto_Close'}
+ code = data[14]
+ if code == 0: #a# hack with BIFF8 Unicode
+ code = data[15]
+ line += ' - build-in-name %d %s' % (code, dBuildInNames.get(code, '?'))
+ else:
+ pass
+ line += ' - %s' % bytes2str(data[14:14+data[3]])
+ # print(line)
+
+ # BOUNDSHEET record
+ if opcode == 0x85 and len(data) >= 6:
+ dSheetType = {0: 'worksheet or dialog sheet', 1: 'Excel 4.0 macro sheet', 2: 'chart', 6: 'Visual Basic module'}
+ if data[5] == 1:
+ macros4Found = True
+ dSheetState = {0: 'visible', 1: 'hidden', 2: 'very hidden'}
+ line += ' - %s, %s' % (dSheetType.get(data[5], '%02x' % data[5]), dSheetState.get(data[4], '%02x' % data[4]))
+ # print(line)
+
+ # STRING record
+ if opcode == 0x207 and len(data) >= 4:
+ values = list(Strings(data[3:]).values())
+ strings = ''
+ if values[0] != []:
+ strings += ' '.join(values[0])
+ if values[1] != []:
+ if strings != '':
+ strings += ' '
+ strings += ' '.join(values[1])
+ line += ' - %s' % strings
+ # print(line)
+
+ if options.find == '' and options.opcode == '' and not options.xlm or options.opcode != '' and options.opcode.lower() in line.lower() or options.find != '' and options.find in data or options.xlm and opcode in [0x06, 0x18, 0x85, 0x207]:
+ result.append(line)
+
+ if options.hexascii:
+ result.extend(' ' + foundstring for foundstring in HexASCII(data, 8))
+ elif options.strings:
+ dEncodings = {'s': 'ASCII', 'L': 'UNICODE'}
+ for encoding, strings in Strings(data).items():
+ if len(strings) > 0:
+ result.append(' ' + dEncodings[encoding] + ':')
+ result.extend(' ' + foundstring for foundstring in strings)
+
+ if options.xlm and not macros4Found:
+ result = []
+
+ return result
+
+# AddPlugin(cBIFF)
diff --git a/oletools/thirdparty/tablestream/tablestream.py b/oletools/thirdparty/tablestream/tablestream.py
index dcc82ab..cd5a924 100644
--- a/oletools/thirdparty/tablestream/tablestream.py
+++ b/oletools/thirdparty/tablestream/tablestream.py
@@ -55,8 +55,9 @@ from __future__ import print_function
# 2016-08-28 v0.07 PL: - support for both Python 2.6+ and 3.x
# - all cells are converted to unicode
# 2018-09-22 v0.08 PL: - removed mention to oletools' thirdparty folder
+# 2019-03-27 v0.09 PL: - slight fix, TableStyleSlim inherits from TableStyle
-__version__ = '0.08'
+__version__ = '0.09'
#------------------------------------------------------------------------------
# TODO:
@@ -174,7 +175,7 @@ class TableStyle(object):
bottom_right = u'+'
-class TableStyleSlim(object):
+class TableStyleSlim(TableStyle):
"""
Style for a TableStream.
Example:
diff --git a/oletools/thirdparty/xglob/xglob.py b/oletools/thirdparty/xglob/xglob.py
index d8f14ed..c83cf90 100644
--- a/oletools/thirdparty/xglob/xglob.py
+++ b/oletools/thirdparty/xglob/xglob.py
@@ -1,208 +1,214 @@
-#! /usr/bin/env python2
-"""
-xglob
-
-xglob is a python package to list files matching wildcards (*, ?, []),
-extending the functionality of the glob module from the standard python
-library (https://docs.python.org/2/library/glob.html).
-
-Main features:
-- recursive file listing (including subfolders)
-- file listing within Zip archives
-- helper function to open files specified as arguments, supporting files
- within zip archives encrypted with a password
-
-Author: Philippe Lagadec - http://www.decalage.info
-License: BSD, see source code or documentation
-
-For more info and updates: http://www.decalage.info/xglob
-"""
-
-# LICENSE:
-#
-# xglob is copyright (c) 2013-2016, Philippe Lagadec (http://www.decalage.info)
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without modification,
-# are permitted provided that the following conditions are met:
-#
-# * Redistributions of source code must retain the above copyright notice, this
-# list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-#------------------------------------------------------------------------------
-# CHANGELOG:
-# 2013-12-04 v0.01 PL: - scan several files from command line args
-# 2014-01-14 v0.02 PL: - added riglob, ziglob
-# 2014-12-26 v0.03 PL: - moved code from balbuzard into a separate package
-# 2015-01-03 v0.04 PL: - fixed issues in iter_files + yield container name
-# 2016-02-24 v0.05 PL: - do not stop on exceptions, return them as data
-# - fixed issue when using wildcards with empty path
-# 2016-04-28 v0.06 CH: - improved handling of non-existing files
-# (by Christian Herdtweck)
-
-__version__ = '0.06'
-
-
-#=== IMPORTS =================================================================
-
-import os, fnmatch, glob, zipfile
-
-#=== EXCEPTIONS ==============================================================
-
-class PathNotFoundException(Exception):
- """ raised if given a fixed file/dir (not a glob) that does not exist """
- def __init__(self, path):
- super(PathNotFoundException, self).__init__(
- 'Given path does not exist: %r' % path)
-
-
-#=== FUNCTIONS ===============================================================
-
-# recursive glob function to find files in any subfolder:
-# inspired by http://stackoverflow.com/questions/14798220/how-can-i-search-sub-folders-using-glob-glob-module-in-python
-def rglob (path, pattern='*.*'):
- """
- Recursive glob:
- similar to glob.glob, but finds files recursively in all subfolders of path.
- path: root directory where to search files
- pattern: pattern for filenames, using wildcards, e.g. *.txt
- """
- #TODO: more compatible API with glob: use single param, split path from pattern
- return [os.path.join(dirpath, f)
- for dirpath, dirnames, files in os.walk(path)
- for f in fnmatch.filter(files, pattern)]
-
-
-def riglob (pathname):
- """
- Recursive iglob:
- similar to glob.iglob, but finds files recursively in all subfolders of path.
- pathname: root directory where to search files followed by pattern for
- filenames, using wildcards, e.g. *.txt
- """
- path, filespec = os.path.split(pathname)
- # fix path if empty:
- if path == '':
- path = '.'
- # print 'riglob: path=%r, filespec=%r' % (path, filespec)
- for dirpath, dirnames, files in os.walk(path):
- for f in fnmatch.filter(files, filespec):
- yield os.path.join(dirpath, f)
-
-
-def ziglob (zipfileobj, pathname):
- """
- iglob in a zip:
- similar to glob.iglob, but finds files within a zip archive.
- - zipfileobj: zipfile.ZipFile object
- - pathname: root directory where to search files followed by pattern for
- filenames, using wildcards, e.g. *.txt
- """
- files = zipfileobj.namelist()
- #for f in files: print f
- for f in fnmatch.filter(files, pathname):
- yield f
-
-
-def iter_files(files, recursive=False, zip_password=None, zip_fname='*'):
- """
- Open each file provided as argument:
- - files is a list of arguments
- - if zip_password is None, each file is listed without reading its content.
- Wilcards are supported.
- - if not, then each file is opened as a zip archive with the provided password
- - then files matching zip_fname are opened from the zip archive
-
- Iterator: yields (container, filename, data) for each file. If zip_password is None, then
- only the filename is returned, container and data=None. Otherwise container is the
- filename of the container (zip file), and data is the file content (or an exception).
- If a given filename is not a glob and does not exist, the triplet
- (None, filename, PathNotFoundException) is yielded. (Globs matching nothing
- do not trigger exceptions)
- """
- #TODO: catch exceptions and yield them for the caller (no file found, file is not zip, wrong password, etc)
- #TODO: use logging instead of printing
- #TODO: split in two simpler functions, the caller knows if it's a zip or not
- # print 'iter_files: files=%r, recursive=%s' % (files, recursive)
- # choose recursive or non-recursive iglob:
- if recursive:
- iglob = riglob
- else:
- iglob = glob.iglob
- for filespec in files:
- if not is_glob(filespec) and not os.path.exists(filespec):
- yield None, filespec, PathNotFoundException(filespec)
- continue
- for filename in iglob(filespec):
- if zip_password is not None:
- # Each file is expected to be a zip archive:
- #print 'Opening zip archive %s with provided password' % filename
- z = zipfile.ZipFile(filename, 'r')
- #print 'Looking for file(s) matching "%s"' % zip_fname
- for subfilename in ziglob(z, zip_fname):
- #print 'Opening file in zip archive:', filename
- try:
- data = z.read(subfilename, zip_password)
- yield filename, subfilename, data
- except Exception as e:
- yield filename, subfilename, e
- z.close()
- else:
- # normal file
- # do not read the file content, just yield the filename
- yield None, filename, None
- #print 'Opening file', filename
- #data = open(filename, 'rb').read()
- #yield None, filename, data
-
-
-def is_glob(filespec):
- """ determine if given file specification is a single file name or a glob
-
- python's glob and fnmatch can only interpret ?, *, [list], and [ra-nge],
- (and combinations: hex_*_[A-Fabcdef0-9]).
- The special chars *?[-] can only be escaped using []
- --> file_name is not a glob
- --> file?name is a glob
- --> file* is a glob
- --> file[-._]name is a glob
- --> file[?]name is not a glob (matches literal "file?name")
- --> file[*]name is not a glob (matches literal "file*name")
- --> file[-]name is not a glob (matches literal "file-name")
- --> file-name is not a glob
-
- Also, obviously incorrect globs are treated as non-globs
- --> file[name is not a glob (matches literal "file[name")
- --> file]-[name is treated as a glob
- (it is not a valid glob but detecting errors like this requires
- sophisticated regular expression matching)
-
- Python's glob also works with globs in directory-part of path
- --> dir-part of path is analyzed just like filename-part
- --> thirdparty/*/xglob.py is a (valid) glob
-
- TODO: create a correct regexp to test for validity of ranges
- """
-
- # remove escaped special chars
- cleaned = filespec.replace('[*]', '').replace('[?]', '') \
- .replace('[[]', '').replace('[]]', '').replace('[-]', '')
-
- # check if special chars remain
- return '*' in cleaned or '?' in cleaned or \
- ('[' in cleaned and ']' in cleaned)
+#! /usr/bin/env python2
+"""
+xglob
+
+xglob is a python package to list files matching wildcards (*, ?, []),
+extending the functionality of the glob module from the standard python
+library (https://docs.python.org/2/library/glob.html).
+
+Main features:
+- recursive file listing (including subfolders)
+- file listing within Zip archives
+- helper function to open files specified as arguments, supporting files
+ within zip archives encrypted with a password
+
+Author: Philippe Lagadec - http://www.decalage.info
+License: BSD, see source code or documentation
+
+For more info and updates: http://www.decalage.info/xglob
+"""
+
+# LICENSE:
+#
+# xglob is copyright (c) 2013-2018, Philippe Lagadec (http://www.decalage.info)
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+#------------------------------------------------------------------------------
+# CHANGELOG:
+# 2013-12-04 v0.01 PL: - scan several files from command line args
+# 2014-01-14 v0.02 PL: - added riglob, ziglob
+# 2014-12-26 v0.03 PL: - moved code from balbuzard into a separate package
+# 2015-01-03 v0.04 PL: - fixed issues in iter_files + yield container name
+# 2016-02-24 v0.05 PL: - do not stop on exceptions, return them as data
+# - fixed issue when using wildcards with empty path
+# 2016-04-28 v0.06 CH: - improved handling of non-existing files
+# (by Christian Herdtweck)
+# 2018-12-08 v0.07 PL: - fixed issue #373, zip password must be bytes
+
+__version__ = '0.07'
+
+
+#=== IMPORTS =================================================================
+
+import os, fnmatch, glob, zipfile
+
+#=== EXCEPTIONS ==============================================================
+
+class PathNotFoundException(Exception):
+ """ raised if given a fixed file/dir (not a glob) that does not exist """
+ def __init__(self, path):
+ super(PathNotFoundException, self).__init__(
+ 'Given path does not exist: %r' % path)
+
+
+#=== FUNCTIONS ===============================================================
+
+# recursive glob function to find files in any subfolder:
+# inspired by http://stackoverflow.com/questions/14798220/how-can-i-search-sub-folders-using-glob-glob-module-in-python
+def rglob (path, pattern='*.*'):
+ """
+ Recursive glob:
+ similar to glob.glob, but finds files recursively in all subfolders of path.
+ path: root directory where to search files
+ pattern: pattern for filenames, using wildcards, e.g. *.txt
+ """
+ #TODO: more compatible API with glob: use single param, split path from pattern
+ return [os.path.join(dirpath, f)
+ for dirpath, dirnames, files in os.walk(path)
+ for f in fnmatch.filter(files, pattern)]
+
+
+def riglob (pathname):
+ """
+ Recursive iglob:
+ similar to glob.iglob, but finds files recursively in all subfolders of path.
+ pathname: root directory where to search files followed by pattern for
+ filenames, using wildcards, e.g. *.txt
+ """
+ path, filespec = os.path.split(pathname)
+ # fix path if empty:
+ if path == '':
+ path = '.'
+ # print 'riglob: path=%r, filespec=%r' % (path, filespec)
+ for dirpath, dirnames, files in os.walk(path):
+ for f in fnmatch.filter(files, filespec):
+ yield os.path.join(dirpath, f)
+
+
+def ziglob (zipfileobj, pathname):
+ """
+ iglob in a zip:
+ similar to glob.iglob, but finds files within a zip archive.
+ - zipfileobj: zipfile.ZipFile object
+ - pathname: root directory where to search files followed by pattern for
+ filenames, using wildcards, e.g. *.txt
+ """
+ files = zipfileobj.namelist()
+ #for f in files: print f
+ for f in fnmatch.filter(files, pathname):
+ yield f
+
+
+def iter_files(files, recursive=False, zip_password=None, zip_fname='*'):
+ """
+ Open each file provided as argument:
+ - files is a list of arguments
+ - if zip_password is None, each file is listed without reading its content.
+ Wilcards are supported.
+ - if not, then each file is opened as a zip archive with the provided password
+ - then files matching zip_fname are opened from the zip archive
+
+ Iterator: yields (container, filename, data) for each file. If zip_password is None, then
+ only the filename is returned, container and data=None. Otherwise container is the
+ filename of the container (zip file), and data is the file content (or an exception).
+ If a given filename is not a glob and does not exist, the triplet
+ (None, filename, PathNotFoundException) is yielded. (Globs matching nothing
+ do not trigger exceptions)
+ """
+ #TODO: catch exceptions and yield them for the caller (no file found, file is not zip, wrong password, etc)
+ #TODO: use logging instead of printing
+ #TODO: split in two simpler functions, the caller knows if it's a zip or not
+ # print 'iter_files: files=%r, recursive=%s' % (files, recursive)
+ # choose recursive or non-recursive iglob:
+ if recursive:
+ iglob = riglob
+ else:
+ iglob = glob.iglob
+ for filespec in files:
+ if not is_glob(filespec) and not os.path.exists(filespec):
+ yield None, filespec, PathNotFoundException(filespec)
+ continue
+ for filename in iglob(filespec):
+ if zip_password is not None:
+ # Each file is expected to be a zip archive:
+ # The zip password must be bytes, not unicode/str:
+ if not isinstance(zip_password, bytes):
+ zip_password = bytes(zip_password, encoding='utf8')
+ # print('Opening zip archive %s with provided password' % filename)
+ # print('zip password: %r' % zip_password)
+ # print(type(zip_password))
+ z = zipfile.ZipFile(filename, 'r')
+ #print 'Looking for file(s) matching "%s"' % zip_fname
+ for subfilename in ziglob(z, zip_fname):
+ #print 'Opening file in zip archive:', filename
+ try:
+ data = z.read(subfilename, zip_password)
+ yield filename, subfilename, data
+ except Exception as e:
+ yield filename, subfilename, e
+ z.close()
+ else:
+ # normal file
+ # do not read the file content, just yield the filename
+ yield None, filename, None
+ #print 'Opening file', filename
+ #data = open(filename, 'rb').read()
+ #yield None, filename, data
+
+
+def is_glob(filespec):
+ """ determine if given file specification is a single file name or a glob
+
+ python's glob and fnmatch can only interpret ?, *, [list], and [ra-nge],
+ (and combinations: hex_*_[A-Fabcdef0-9]).
+ The special chars *?[-] can only be escaped using []
+ --> file_name is not a glob
+ --> file?name is a glob
+ --> file* is a glob
+ --> file[-._]name is a glob
+ --> file[?]name is not a glob (matches literal "file?name")
+ --> file[*]name is not a glob (matches literal "file*name")
+ --> file[-]name is not a glob (matches literal "file-name")
+ --> file-name is not a glob
+
+ Also, obviously incorrect globs are treated as non-globs
+ --> file[name is not a glob (matches literal "file[name")
+ --> file]-[name is treated as a glob
+ (it is not a valid glob but detecting errors like this requires
+ sophisticated regular expression matching)
+
+ Python's glob also works with globs in directory-part of path
+ --> dir-part of path is analyzed just like filename-part
+ --> thirdparty/*/xglob.py is a (valid) glob
+
+ TODO: create a correct regexp to test for validity of ranges
+ """
+
+ # remove escaped special chars
+ cleaned = filespec.replace('[*]', '').replace('[?]', '') \
+ .replace('[[]', '').replace('[]]', '').replace('[-]', '')
+
+ # check if special chars remain
+ return '*' in cleaned or '?' in cleaned or \
+ ('[' in cleaned and ']' in cleaned)
diff --git a/oletools/thirdparty/zipfile27/LICENSE.txt b/oletools/thirdparty/zipfile27/LICENSE.txt
deleted file mode 100644
index 83453ee..0000000
--- a/oletools/thirdparty/zipfile27/LICENSE.txt
+++ /dev/null
@@ -1,275 +0,0 @@
-Python 2.7 license
-
-This is the official license for the Python 2.7 release:
-
-A. HISTORY OF THE SOFTWARE
-==========================
-
-Python was created in the early 1990s by Guido van Rossum at Stichting
-Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands
-as a successor of a language called ABC. Guido remains Python's
-principal author, although it includes many contributions from others.
-
-In 1995, Guido continued his work on Python at the Corporation for
-National Research Initiatives (CNRI, see http://www.cnri.reston.va.us)
-in Reston, Virginia where he released several versions of the
-software.
-
-In May 2000, Guido and the Python core development team moved to
-BeOpen.com to form the BeOpen PythonLabs team. In October of the same
-year, the PythonLabs team moved to Digital Creations (now Zope
-Corporation, see http://www.zope.com). In 2001, the Python Software
-Foundation (PSF, see http://www.python.org/psf/) was formed, a
-non-profit organization created specifically to own Python-related
-Intellectual Property. Zope Corporation is a sponsoring member of
-the PSF.
-
-All Python releases are Open Source (see http://www.opensource.org for
-the Open Source Definition). Historically, most, but not all, Python
-releases have also been GPL-compatible; the table below summarizes
-the various releases.
-
- Release Derived Year Owner GPL-
- from compatible? (1)
-
- 0.9.0 thru 1.2 1991-1995 CWI yes
- 1.3 thru 1.5.2 1.2 1995-1999 CNRI yes
- 1.6 1.5.2 2000 CNRI no
- 2.0 1.6 2000 BeOpen.com no
- 1.6.1 1.6 2001 CNRI yes (2)
- 2.1 2.0+1.6.1 2001 PSF no
- 2.0.1 2.0+1.6.1 2001 PSF yes
- 2.1.1 2.1+2.0.1 2001 PSF yes
- 2.2 2.1.1 2001 PSF yes
- 2.1.2 2.1.1 2002 PSF yes
- 2.1.3 2.1.2 2002 PSF yes
- 2.2.1 2.2 2002 PSF yes
- 2.2.2 2.2.1 2002 PSF yes
- 2.2.3 2.2.2 2003 PSF yes
- 2.3 2.2.2 2002-2003 PSF yes
- 2.3.1 2.3 2002-2003 PSF yes
- 2.3.2 2.3.1 2002-2003 PSF yes
- 2.3.3 2.3.2 2002-2003 PSF yes
- 2.3.4 2.3.3 2004 PSF yes
- 2.3.5 2.3.4 2005 PSF yes
- 2.4 2.3 2004 PSF yes
- 2.4.1 2.4 2005 PSF yes
- 2.4.2 2.4.1 2005 PSF yes
- 2.4.3 2.4.2 2006 PSF yes
- 2.5 2.4 2006 PSF yes
- 2.7 2.6 2010 PSF yes
-
-Footnotes:
-
-(1) GPL-compatible doesn't mean that we're distributing Python under
- the GPL. All Python licenses, unlike the GPL, let you distribute
- a modified version without making your changes open source. The
- GPL-compatible licenses make it possible to combine Python with
- other software that is released under the GPL; the others don't.
-
-(2) According to Richard Stallman, 1.6.1 is not GPL-compatible,
- because its license has a choice of law clause. According to
- CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1
- is "not incompatible" with the GPL.
-
-Thanks to the many outside volunteers who have worked under Guido's
-direction to make these releases possible.
-
-
-B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON
-===============================================================
-
-PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
---------------------------------------------
-
-1. This LICENSE AGREEMENT is between the Python Software Foundation
-("PSF"), and the Individual or Organization ("Licensee") accessing and
-otherwise using this software ("Python") in source or binary form and
-its associated documentation.
-
-2. Subject to the terms and conditions of this License Agreement, PSF
-hereby grants Licensee a nonexclusive, royalty-free, world-wide
-license to reproduce, analyze, test, perform and/or display publicly,
-prepare derivative works, distribute, and otherwise use Python
-alone or in any derivative version, provided, however, that PSF's
-License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
-2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation; All Rights
-Reserved" are retained in Python alone or in any derivative version
-prepared by Licensee.
-
-3. In the event Licensee prepares a derivative work that is based on
-or incorporates Python or any part thereof, and wants to make
-the derivative work available to others as provided herein, then
-Licensee hereby agrees to include in any such work a brief summary of
-the changes made to Python.
-
-4. PSF is making Python available to Licensee on an "AS IS"
-basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
-IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
-DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
-FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
-INFRINGE ANY THIRD PARTY RIGHTS.
-
-5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
-FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
-A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
-OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
-
-6. This License Agreement will automatically terminate upon a material
-breach of its terms and conditions.
-
-7. Nothing in this License Agreement shall be deemed to create any
-relationship of agency, partnership, or joint venture between PSF and
-Licensee. This License Agreement does not grant permission to use PSF
-trademarks or trade name in a trademark sense to endorse or promote
-products or services of Licensee, or any third party.
-
-8. By copying, installing or otherwise using Python, Licensee
-agrees to be bound by the terms and conditions of this License
-Agreement.
-
-
-BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0
--------------------------------------------
-
-BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
-
-1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an
-office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the
-Individual or Organization ("Licensee") accessing and otherwise using
-this software in source or binary form and its associated
-documentation ("the Software").
-
-2. Subject to the terms and conditions of this BeOpen Python License
-Agreement, BeOpen hereby grants Licensee a non-exclusive,
-royalty-free, world-wide license to reproduce, analyze, test, perform
-and/or display publicly, prepare derivative works, distribute, and
-otherwise use the Software alone or in any derivative version,
-provided, however, that the BeOpen Python License is retained in the
-Software, alone or in any derivative version prepared by Licensee.
-
-3. BeOpen is making the Software available to Licensee on an "AS IS"
-basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
-IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND
-DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
-FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT
-INFRINGE ANY THIRD PARTY RIGHTS.
-
-4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
-SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
-AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY
-DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
-
-5. This License Agreement will automatically terminate upon a material
-breach of its terms and conditions.
-
-6. This License Agreement shall be governed by and interpreted in all
-respects by the law of the State of California, excluding conflict of
-law provisions. Nothing in this License Agreement shall be deemed to
-create any relationship of agency, partnership, or joint venture
-between BeOpen and Licensee. This License Agreement does not grant
-permission to use BeOpen trademarks or trade names in a trademark
-sense to endorse or promote products or services of Licensee, or any
-third party. As an exception, the "BeOpen Python" logos available at
-http://www.pythonlabs.com/logos.html may be used according to the
-permissions granted on that web page.
-
-7. By copying, installing or otherwise using the software, Licensee
-agrees to be bound by the terms and conditions of this License
-Agreement.
-
-
-CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1
----------------------------------------
-
-1. This LICENSE AGREEMENT is between the Corporation for National
-Research Initiatives, having an office at 1895 Preston White Drive,
-Reston, VA 20191 ("CNRI"), and the Individual or Organization
-("Licensee") accessing and otherwise using Python 1.6.1 software in
-source or binary form and its associated documentation.
-
-2. Subject to the terms and conditions of this License Agreement, CNRI
-hereby grants Licensee a nonexclusive, royalty-free, world-wide
-license to reproduce, analyze, test, perform and/or display publicly,
-prepare derivative works, distribute, and otherwise use Python 1.6.1
-alone or in any derivative version, provided, however, that CNRI's
-License Agreement and CNRI's notice of copyright, i.e., "Copyright (c)
-1995-2001 Corporation for National Research Initiatives; All Rights
-Reserved" are retained in Python 1.6.1 alone or in any derivative
-version prepared by Licensee. Alternately, in lieu of CNRI's License
-Agreement, Licensee may substitute the following text (omitting the
-quotes): "Python 1.6.1 is made available subject to the terms and
-conditions in CNRI's License Agreement. This Agreement together with
-Python 1.6.1 may be located on the Internet using the following
-unique, persistent identifier (known as a handle): 1895.22/1013. This
-Agreement may also be obtained from a proxy server on the Internet
-using the following URL: http://hdl.handle.net/1895.22/1013".
-
-3. In the event Licensee prepares a derivative work that is based on
-or incorporates Python 1.6.1 or any part thereof, and wants to make
-the derivative work available to others as provided herein, then
-Licensee hereby agrees to include in any such work a brief summary of
-the changes made to Python 1.6.1.
-
-4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS"
-basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
-IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND
-DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
-FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT
-INFRINGE ANY THIRD PARTY RIGHTS.
-
-5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
-1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
-A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1,
-OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
-
-6. This License Agreement will automatically terminate upon a material
-breach of its terms and conditions.
-
-7. This License Agreement shall be governed by the federal
-intellectual property law of the United States, including without
-limitation the federal copyright law, and, to the extent such
-U.S. federal law does not apply, by the law of the Commonwealth of
-Virginia, excluding Virginia's conflict of law provisions.
-Notwithstanding the foregoing, with regard to derivative works based
-on Python 1.6.1 that incorporate non-separable material that was
-previously distributed under the GNU General Public License (GPL), the
-law of the Commonwealth of Virginia shall govern this License
-Agreement only as to issues arising under or with respect to
-Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this
-License Agreement shall be deemed to create any relationship of
-agency, partnership, or joint venture between CNRI and Licensee. This
-License Agreement does not grant permission to use CNRI trademarks or
-trade name in a trademark sense to endorse or promote products or
-services of Licensee, or any third party.
-
-8. By clicking on the "ACCEPT" button where indicated, or by copying,
-installing or otherwise using Python 1.6.1, Licensee agrees to be
-bound by the terms and conditions of this License Agreement.
-
- ACCEPT
-
-
-CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2
---------------------------------------------------
-
-Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam,
-The Netherlands. All rights reserved.
-
-Permission to use, copy, modify, and distribute this software and its
-documentation for any purpose and without fee is hereby granted,
-provided that the above copyright notice appear in all copies and that
-both that copyright notice and this permission notice appear in
-supporting documentation, and that the name of Stichting Mathematisch
-Centrum or CWI not be used in advertising or publicity pertaining to
-distribution of the software without specific, written prior
-permission.
-
-STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
-THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
-FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
-FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
-OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
diff --git a/oletools/thirdparty/zipfile27/__init__.py b/oletools/thirdparty/zipfile27/__init__.py
deleted file mode 100644
index bbab5a7..0000000
--- a/oletools/thirdparty/zipfile27/__init__.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# Excerpt from the zipfile module from Python 2.7, to enable is_zipfile
-# to check any file object (e.g. in memory), for Python 2.6.
-# is_zipfile in Python 2.6 can only check files on disk.
-
-# This code from Python 2.7 was not modified.
-
-# 2016-09-06 v0.01 PL: - first version
-
-
-from zipfile import _EndRecData
-
-def _check_zipfile(fp):
- try:
- if _EndRecData(fp):
- return True # file has correct magic number
- except IOError:
- pass
- return False
-
-def is_zipfile(filename):
- """Quickly see if a file is a ZIP file by checking the magic number.
-
- The filename argument may be a file or file-like object too.
- """
- result = False
- try:
- if hasattr(filename, "read"):
- result = _check_zipfile(fp=filename)
- else:
- with open(filename, "rb") as fp:
- result = _check_zipfile(fp)
- except IOError:
- pass
- return result
-
diff --git a/oletools/xls_parser.py b/oletools/xls_parser.py
index 52575a7..2f0bdad 100644
--- a/oletools/xls_parser.py
+++ b/oletools/xls_parser.py
@@ -5,7 +5,7 @@ Read storages, (sub-)streams, records from xls file
#
# === LICENSE ==================================================================
-# xls_parser is copyright (c) 2014-2018 Philippe Lagadec (http://www.decalage.info)
+# xls_parser is copyright (c) 2014-2019 Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
@@ -33,8 +33,10 @@ Read storages, (sub-)streams, records from xls file
# 2017-11-02 v0.1 CH: - first version
# 2017-11-02 v0.2 CH: - move some code to record_base.py
# (to avoid copy-and-paste in ppt_parser.py)
+# 2019-01-30 v0.54 PL: - fixed import to avoid mixing installed oletools
+# and dev version
-__version__ = '0.2'
+__version__ = '0.54'
# -----------------------------------------------------------------------------
# TODO:
@@ -56,17 +58,14 @@ import os.path
from struct import unpack
import logging
-try:
- from oletools import record_base
-except ImportError:
- # little hack to allow absolute imports even if oletools is not installed.
- # Copied from olevba.py
- PARENT_DIR = os.path.normpath(os.path.dirname(os.path.dirname(
- os.path.abspath(__file__))))
- if PARENT_DIR not in sys.path:
- sys.path.insert(0, PARENT_DIR)
- del PARENT_DIR
- from oletools import record_base
+# little hack to allow absolute imports even if oletools is not installed.
+# Copied from olevba.py
+PARENT_DIR = os.path.normpath(os.path.dirname(os.path.dirname(
+ os.path.abspath(__file__))))
+if PARENT_DIR not in sys.path:
+ sys.path.insert(0, PARENT_DIR)
+del PARENT_DIR
+from oletools import record_base
# === PYTHON 2+3 SUPPORT ======================================================
@@ -89,12 +88,18 @@ def is_xls(filename):
substream.
See also: oleid.OleID.check_excel
"""
+ xls_file = None
try:
- for stream in XlsFile(filename).iter_streams():
+ xls_file = XlsFile(filename)
+ for stream in xls_file.iter_streams():
if isinstance(stream, WorkbookStream):
return True
except Exception:
- pass
+ logging.debug('Ignoring exception in is_xls, assume is not xls',
+ exc_info=True)
+ finally:
+ if xls_file is not None:
+ xls_file.close()
return False
@@ -102,7 +107,7 @@ def read_unicode(data, start_idx, n_chars):
""" read a unicode string from a XLUnicodeStringNoCch structure """
# first bit 0x0 --> only low-bytes are saved, all high bytes are 0
# first bit 0x1 --> 2 bytes per character
- low_bytes_only = (ord(data[start_idx]) == 0)
+ low_bytes_only = (ord(data[start_idx:start_idx+1]) == 0)
if low_bytes_only:
end_idx = start_idx + 1 + n_chars
return data[start_idx+1:end_idx].decode('ascii'), end_idx
@@ -350,6 +355,7 @@ class XlsRecordSupBook(XlsRecord):
LINK_TYPE_EXTERNAL = 'external workbook'
def finish_constructing(self, _):
+ """Finish constructing this record; called at end of constructor."""
# set defaults
self.ctab = None
self.cch = None
diff --git a/requirements.txt b/requirements.txt
index 378be44..41010fc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,6 @@
pyparsing>=2.2.0
-olefile>=0.45
+olefile>=0.46
+easygui
+colorclass
+msoffcrypto-tool
+pcodedmp>=1.2.5
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 8b26df6..2c45c76 100644
--- a/setup.py
+++ b/setup.py
@@ -28,6 +28,9 @@ to install this package.
# 2018-09-15 PL: - easygui is now a dependency
# 2018-09-22 PL: - colorclass is now a dependency
# 2018-10-27 PL: - fixed issue #359 (bug when importing log_helper)
+# 2019-02-26 CH: - add optional dependency msoffcrypto for decryption
+# 2019-05-22 PL: - 'msoffcrypto-tool' is now a required dependency
+# 2019-05-23 v0.55 PL: - added pcodedmp as dependency
#--- TODO ---------------------------------------------------------------------
@@ -47,7 +50,7 @@ import os, fnmatch
#--- METADATA -----------------------------------------------------------------
name = "oletools"
-version = '0.54dev4'
+version = '0.55.dev3'
desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR"
long_desc = open('oletools/README.rst').read()
author = "Philippe Lagadec"
@@ -73,6 +76,7 @@ classifiers=[
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
+ "Programming Language :: Python :: 3.7",
"Topic :: Security",
"Topic :: Software Development :: Libraries :: Python Modules",
]
@@ -89,7 +93,7 @@ packages=[
'oletools.thirdparty.xglob',
'oletools.thirdparty.DridexUrlDecoder',
'oletools.thirdparty.tablestream',
- 'oletools.thirdparty.zipfile27',
+ 'oletools.thirdparty.oledump',
]
##setupdir = '.'
##package_dir={'': setupdir}
@@ -177,9 +181,6 @@ package_data={
'oletools.thirdparty.DridexUrlDecoder': [
'LICENSE.txt',
],
- 'oletools.thirdparty.zipfile27': [
- 'LICENSE.txt',
- ],
# 'oletools.thirdparty.tablestream': [
# 'LICENSE', 'README',
# ],
@@ -305,11 +306,11 @@ def main():
author_email=author_email,
url=url,
license=license,
-## package_dir=package_dir,
+ # package_dir=package_dir,
packages=packages,
package_data = package_data,
download_url=download_url,
-# data_files=data_files,
+ # data_files=data_files,
entry_points=entry_points,
test_suite="tests",
# scripts=scripts,
@@ -318,6 +319,8 @@ def main():
"olefile>=0.46",
"easygui",
'colorclass',
+ 'msoffcrypto-tool',
+ 'pcodedmp>=1.2.5',
],
)
diff --git a/tests/common/log_helper/log_helper_test_imported.py b/tests/common/log_helper/log_helper_test_imported.py
index b3777af..8820a3e 100644
--- a/tests/common/log_helper/log_helper_test_imported.py
+++ b/tests/common/log_helper/log_helper_test_imported.py
@@ -11,6 +11,8 @@ INFO_MESSAGE = 'imported: info log'
WARNING_MESSAGE = 'imported: warning log'
ERROR_MESSAGE = 'imported: error log'
CRITICAL_MESSAGE = 'imported: critical log'
+RESULT_MESSAGE = 'imported: result log'
+RESULT_TYPE = 'imported: result'
logger = log_helper.get_or_create_silent_logger('test_imported', logging.ERROR)
@@ -21,3 +23,4 @@ def log():
logger.warning(WARNING_MESSAGE)
logger.error(ERROR_MESSAGE)
logger.critical(CRITICAL_MESSAGE)
+ logger.info(RESULT_MESSAGE, type=RESULT_TYPE)
diff --git a/tests/common/log_helper/log_helper_test_main.py b/tests/common/log_helper/log_helper_test_main.py
index 0f6057a..fb0ccca 100644
--- a/tests/common/log_helper/log_helper_test_main.py
+++ b/tests/common/log_helper/log_helper_test_main.py
@@ -9,6 +9,8 @@ INFO_MESSAGE = 'main: info log'
WARNING_MESSAGE = 'main: warning log'
ERROR_MESSAGE = 'main: error log'
CRITICAL_MESSAGE = 'main: critical log'
+RESULT_MESSAGE = 'main: result log'
+RESULT_TYPE = 'main: result'
logger = log_helper.get_or_create_silent_logger('test_main')
@@ -32,12 +34,16 @@ def init_logging_and_log(args):
level = args[-1]
use_json = 'as-json' in args
throw = 'throw' in args
+ percent_autoformat = '%-autoformat' in args
if 'enable' in args:
log_helper.enable_logging(use_json, level, stream=sys.stdout)
_log()
+ if percent_autoformat:
+ logger.info('The %s is %d.', 'answer', 47)
+
if throw:
raise Exception('An exception occurred before ending the logging')
@@ -50,6 +56,7 @@ def _log():
logger.warning(WARNING_MESSAGE)
logger.error(ERROR_MESSAGE)
logger.critical(CRITICAL_MESSAGE)
+ logger.info(RESULT_MESSAGE, type=RESULT_TYPE)
log_helper_test_imported.log()
diff --git a/tests/common/log_helper/test_log_helper.py b/tests/common/log_helper/test_log_helper.py
index 03dee68..bcd0de0 100644
--- a/tests/common/log_helper/test_log_helper.py
+++ b/tests/common/log_helper/test_log_helper.py
@@ -13,9 +13,11 @@ from tests.common.log_helper import log_helper_test_main
from tests.common.log_helper import log_helper_test_imported
from os.path import dirname, join, relpath, abspath
+from tests.test_utils import PROJECT_ROOT
+
# this is the common base of "tests" and "oletools" dirs
-ROOT_DIRECTORY = abspath(join(__file__, '..', '..', '..', '..'))
-TEST_FILE = relpath(join(dirname(__file__), 'log_helper_test_main.py'), ROOT_DIRECTORY)
+TEST_FILE = relpath(join(dirname(abspath(__file__)), 'log_helper_test_main.py'),
+ PROJECT_ROOT)
PYTHON_EXECUTABLE = sys.executable
MAIN_LOG_MESSAGES = [
@@ -59,6 +61,62 @@ class TestLogHelper(unittest.TestCase):
log_helper_test_imported.CRITICAL_MESSAGE
])
+ def test_logs_type_ignored(self):
+ """Run test script with logging enabled at info level. Want no type."""
+ output = self._run_test(['enable', 'info'])
+
+ expect = '\n'.join([
+ 'INFO ' + log_helper_test_main.INFO_MESSAGE,
+ 'WARNING ' + log_helper_test_main.WARNING_MESSAGE,
+ 'ERROR ' + log_helper_test_main.ERROR_MESSAGE,
+ 'CRITICAL ' + log_helper_test_main.CRITICAL_MESSAGE,
+ 'INFO ' + log_helper_test_main.RESULT_MESSAGE,
+ 'INFO ' + log_helper_test_imported.INFO_MESSAGE,
+ 'WARNING ' + log_helper_test_imported.WARNING_MESSAGE,
+ 'ERROR ' + log_helper_test_imported.ERROR_MESSAGE,
+ 'CRITICAL ' + log_helper_test_imported.CRITICAL_MESSAGE,
+ 'INFO ' + log_helper_test_imported.RESULT_MESSAGE,
+ ])
+ self.assertEqual(output, expect)
+
+ def test_logs_type_in_json(self):
+ """Check type field is contained in json log."""
+ output = self._run_test(['enable', 'as-json', 'info'])
+
+ # convert to json preserving order of output
+ jout = json.loads(output)
+
+ jexpect = [
+ dict(type='msg', level='INFO',
+ msg=log_helper_test_main.INFO_MESSAGE),
+ dict(type='msg', level='WARNING',
+ msg=log_helper_test_main.WARNING_MESSAGE),
+ dict(type='msg', level='ERROR',
+ msg=log_helper_test_main.ERROR_MESSAGE),
+ dict(type='msg', level='CRITICAL',
+ msg=log_helper_test_main.CRITICAL_MESSAGE),
+ # this is the important entry (has a different "type" field):
+ dict(type=log_helper_test_main.RESULT_TYPE, level='INFO',
+ msg=log_helper_test_main.RESULT_MESSAGE),
+ dict(type='msg', level='INFO',
+ msg=log_helper_test_imported.INFO_MESSAGE),
+ dict(type='msg', level='WARNING',
+ msg=log_helper_test_imported.WARNING_MESSAGE),
+ dict(type='msg', level='ERROR',
+ msg=log_helper_test_imported.ERROR_MESSAGE),
+ dict(type='msg', level='CRITICAL',
+ msg=log_helper_test_imported.CRITICAL_MESSAGE),
+ # ... and this:
+ dict(type=log_helper_test_imported.RESULT_TYPE, level='INFO',
+ msg=log_helper_test_imported.RESULT_MESSAGE),
+ ]
+ self.assertEqual(jout, jexpect)
+
+ def test_percent_autoformat(self):
+ """Test that auto-formatting of log strings with `%` works."""
+ output = self._run_test(['enable', '%-autoformat', 'info'])
+ self.assertIn('The answer is 47.', output)
+
def test_json_correct_on_exceptions(self):
"""
Test that even on unhandled exceptions our JSON is always correct
@@ -72,10 +130,10 @@ class TestLogHelper(unittest.TestCase):
def _assert_json_messages(self, output, messages):
try:
json_data = json.loads(output)
- self.assertEquals(len(json_data), len(messages))
+ self.assertEqual(len(json_data), len(messages))
for i in range(len(messages)):
- self.assertEquals(messages[i], json_data[i]['msg'])
+ self.assertEqual(messages[i], json_data[i]['msg'])
except ValueError:
self.fail('Invalid json:\n' + output)
@@ -90,9 +148,9 @@ class TestLogHelper(unittest.TestCase):
child = subprocess.Popen(
[PYTHON_EXECUTABLE, TEST_FILE] + args,
shell=False,
- env={'PYTHONPATH': ROOT_DIRECTORY},
+ env={'PYTHONPATH': PROJECT_ROOT},
universal_newlines=True,
- cwd=ROOT_DIRECTORY,
+ cwd=PROJECT_ROOT,
stdin=None,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
@@ -102,7 +160,7 @@ class TestLogHelper(unittest.TestCase):
if not isinstance(output, str):
output = output.decode('utf-8')
- self.assertEquals(child.returncode == 0, should_succeed)
+ self.assertEqual(child.returncode == 0, should_succeed)
return output.strip()
diff --git a/tests/msodde/test_basic.py b/tests/msodde/test_basic.py
index 3386462..b1fa4e0 100644
--- a/tests/msodde/test_basic.py
+++ b/tests/msodde/test_basic.py
@@ -9,11 +9,16 @@ Ensure that
from __future__ import print_function
import unittest
-from oletools import msodde
-from tests.test_utils import DATA_BASE_DIR as BASE_DIR
+import sys
import os
-from os.path import join
+from os.path import join, basename
from traceback import print_exc
+import json
+from collections import OrderedDict
+from oletools import msodde
+from oletools.crypto import \
+ WrongEncryptionPassword, CryptoLibNotImported, check_msoffcrypto
+from tests.test_utils import call_and_capture, DATA_BASE_DIR as BASE_DIR
class TestReturnCode(unittest.TestCase):
@@ -46,15 +51,21 @@ class TestReturnCode(unittest.TestCase):
def test_invalid_none(self):
""" check that no file argument leads to non-zero exit status """
- self.do_test_validity('', True)
+ if sys.hexversion > 0x03030000: # version 3.3 and higher
+ # different errors probably depending on whether msoffcryto is
+ # available or not
+ expect_error = (AttributeError, FileNotFoundError)
+ else:
+ expect_error = (AttributeError, IOError)
+ self.do_test_validity('', expect_error)
def test_invalid_empty(self):
""" check that empty file argument leads to non-zero exit status """
- self.do_test_validity(join(BASE_DIR, 'basic/empty'), True)
+ self.do_test_validity(join(BASE_DIR, 'basic/empty'), Exception)
def test_invalid_text(self):
""" check that text file argument leads to non-zero exit status """
- self.do_test_validity(join(BASE_DIR, 'basic/text'), True)
+ self.do_test_validity(join(BASE_DIR, 'basic/text'), Exception)
def test_encrypted(self):
"""
@@ -64,28 +75,56 @@ class TestReturnCode(unittest.TestCase):
Encryption) is tested.
"""
CRYPT_DIR = join(BASE_DIR, 'encrypted')
- ADD_ARGS = '', '-j', '-d', '-f', '-a'
+ have_crypto = check_msoffcrypto()
for filename in os.listdir(CRYPT_DIR):
- full_name = join(CRYPT_DIR, filename)
- for args in ADD_ARGS:
- self.do_test_validity(args + ' ' + full_name, True)
-
- def do_test_validity(self, args, expect_error=False):
- """ helper for test_valid_doc[x] """
- have_exception = False
+ if have_crypto and 'standardpassword' in filename:
+ # these are automagically decrypted
+ self.do_test_validity(join(CRYPT_DIR, filename))
+ elif have_crypto:
+ self.do_test_validity(join(CRYPT_DIR, filename),
+ WrongEncryptionPassword)
+ else:
+ self.do_test_validity(join(CRYPT_DIR, filename),
+ CryptoLibNotImported)
+
+ def do_test_validity(self, filename, expect_error=None):
+ """ helper for test_[in]valid_* """
+ found_error = None
+ # DEBUG: print('Testing file {}'.format(filename))
try:
- msodde.process_file(args, msodde.FIELD_FILTER_BLACKLIST)
- except Exception:
- have_exception = True
- print_exc()
- except SystemExit as exc: # sys.exit() was called
- have_exception = True
- if exc.code is None:
- have_exception = False
-
- self.assertEqual(expect_error, have_exception,
- msg='Args={0}, expect={1}, exc={2}'
- .format(args, expect_error, have_exception))
+ msodde.process_maybe_encrypted(filename,
+ field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
+ except Exception as exc:
+ found_error = exc
+ # DEBUG: print_exc()
+
+ if expect_error and not found_error:
+ self.fail('Expected {} but msodde finished without errors for {}'
+ .format(expect_error, filename))
+ elif not expect_error and found_error:
+ self.fail('Unexpected error {} from msodde for {}'
+ .format(found_error, filename))
+ elif expect_error and not isinstance(found_error, expect_error):
+ self.fail('Wrong kind of error {} from msodde for {}, expected {}'
+ .format(type(found_error), filename, expect_error))
+
+
+@unittest.skipIf(not check_msoffcrypto(),
+ 'Module msoffcrypto not installed for {}'
+ .format(basename(sys.executable)))
+class TestErrorOutput(unittest.TestCase):
+ """msodde does not specify error by return code but text output."""
+
+ def test_crypt_output(self):
+ """Check for helpful error message when failing to decrypt."""
+ for suffix in 'doc', 'docm', 'docx', 'ppt', 'pptm', 'pptx', 'xls', \
+ 'xlsb', 'xlsm', 'xlsx':
+ example_file = join(BASE_DIR, 'encrypted', 'encrypted.' + suffix)
+ output, ret_code = call_and_capture('msodde', [example_file, ],
+ accept_nonzero_exit=True)
+ self.assertEqual(ret_code, 1)
+ self.assertIn('passwords could not decrypt office file', output,
+ msg='Unexpected output: {}'.format(output.strip()))
class TestDdeLinks(unittest.TestCase):
@@ -100,33 +139,37 @@ class TestDdeLinks(unittest.TestCase):
def test_with_dde(self):
""" check that dde links appear on stdout """
filename = 'dde-test-from-office2003.doc'
- output = msodde.process_file(
- join(BASE_DIR, 'msodde', filename), msodde.FIELD_FILTER_BLACKLIST)
+ output = msodde.process_maybe_encrypted(
+ join(BASE_DIR, 'msodde', filename),
+ field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
self.assertNotEqual(len(self.get_dde_from_output(output)), 0,
msg='Found no dde links in output of ' + filename)
def test_no_dde(self):
""" check that no dde links appear on stdout """
filename = 'harmless-clean.doc'
- output = msodde.process_file(
- join(BASE_DIR, 'msodde', filename), msodde.FIELD_FILTER_BLACKLIST)
+ output = msodde.process_maybe_encrypted(
+ join(BASE_DIR, 'msodde', filename),
+ field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
self.assertEqual(len(self.get_dde_from_output(output)), 0,
msg='Found dde links in output of ' + filename)
def test_with_dde_utf16le(self):
""" check that dde links appear on stdout """
filename = 'dde-test-from-office2013-utf_16le-korean.doc'
- output = msodde.process_file(
- join(BASE_DIR, 'msodde', filename), msodde.FIELD_FILTER_BLACKLIST)
+ output = msodde.process_maybe_encrypted(
+ join(BASE_DIR, 'msodde', filename),
+ field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
self.assertNotEqual(len(self.get_dde_from_output(output)), 0,
msg='Found no dde links in output of ' + filename)
def test_excel(self):
""" check that dde links are found in excel 2007+ files """
- expect = ['DDE-Link cmd /c calc.exe', ]
+ expect = ['cmd /c calc.exe', ]
for extn in 'xlsx', 'xlsm', 'xlsb':
- output = msodde.process_file(
- join(BASE_DIR, 'msodde', 'dde-test.' + extn), msodde.FIELD_FILTER_BLACKLIST)
+ output = msodde.process_maybe_encrypted(
+ join(BASE_DIR, 'msodde', 'dde-test.' + extn),
+ field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
self.assertEqual(expect, self.get_dde_from_output(output),
msg='unexpected output for dde-test.{0}: {1}'
@@ -136,8 +179,9 @@ class TestDdeLinks(unittest.TestCase):
""" check that dde in xml from word / excel is found """
for name_part in 'excel2003', 'word2003', 'word2007':
filename = 'dde-in-' + name_part + '.xml'
- output = msodde.process_file(
- join(BASE_DIR, 'msodde', filename), msodde.FIELD_FILTER_BLACKLIST)
+ output = msodde.process_maybe_encrypted(
+ join(BASE_DIR, 'msodde', filename),
+ field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
links = self.get_dde_from_output(output)
self.assertEqual(len(links), 1, 'found {0} dde-links in {1}'
.format(len(links), filename))
@@ -149,15 +193,17 @@ class TestDdeLinks(unittest.TestCase):
def test_clean_rtf_blacklist(self):
""" find a lot of hyperlinks in rtf spec """
filename = 'RTF-Spec-1.7.rtf'
- output = msodde.process_file(
- join(BASE_DIR, 'msodde', filename), msodde.FIELD_FILTER_BLACKLIST)
+ output = msodde.process_maybe_encrypted(
+ join(BASE_DIR, 'msodde', filename),
+ field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
self.assertEqual(len(self.get_dde_from_output(output)), 1413)
def test_clean_rtf_ddeonly(self):
""" find no dde links in rtf spec """
filename = 'RTF-Spec-1.7.rtf'
- output = msodde.process_file(
- join(BASE_DIR, 'msodde', filename), msodde.FIELD_FILTER_DDE)
+ output = msodde.process_maybe_encrypted(
+ join(BASE_DIR, 'msodde', filename),
+ field_filter_mode=msodde.FIELD_FILTER_DDE)
self.assertEqual(len(self.get_dde_from_output(output)), 0,
msg='Found dde links in output of ' + filename)
diff --git a/tests/msodde/test_crypto.py b/tests/msodde/test_crypto.py
new file mode 100644
index 0000000..38b2f06
--- /dev/null
+++ b/tests/msodde/test_crypto.py
@@ -0,0 +1,32 @@
+"""Check decryption of files from msodde works."""
+
+import sys
+import unittest
+from os.path import basename, join as pjoin
+
+from tests.test_utils import DATA_BASE_DIR, call_and_capture
+
+from oletools import crypto
+
+
+@unittest.skipIf(not crypto.check_msoffcrypto(),
+ 'Module msoffcrypto not installed for {}'
+ .format(basename(sys.executable)))
+class MsoddeCryptoTest(unittest.TestCase):
+ """Test integration of decryption in msodde."""
+
+ def test_standard_password(self):
+ """Check dde-link is found in xls[mb] sample files."""
+ for suffix in 'xls', 'xlsx', 'xlsm', 'xlsb':
+ example_file = pjoin(DATA_BASE_DIR, 'encrypted',
+ 'dde-test-encrypt-standardpassword.' + suffix)
+ output, _ = call_and_capture('msodde', [example_file, ])
+ self.assertIn('\nDDE Links:\ncmd /c calc.exe\n', output,
+ msg='Unexpected output {!r} for {}'
+ .format(output, suffix))
+
+ # TODO: add more, in particular a sample with a "proper" password
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/oleid/test_basic.py b/tests/oleid/test_basic.py
index e527fa2..ce4187a 100644
--- a/tests/oleid/test_basic.py
+++ b/tests/oleid/test_basic.py
@@ -20,7 +20,7 @@ class TestOleIDBasic(unittest.TestCase):
"""Run all file in test-data through oleid and compare to known ouput"""
# this relies on order of indicators being constant, could relax that
# Also requires that files have the correct suffixes (no rtf in doc)
- NON_OLE_SUFFIXES = ('.xml', '.csv', '.rtf', '')
+ NON_OLE_SUFFIXES = ('.xml', '.csv', '.rtf', '', '.odt', '.ods', '.odp')
NON_OLE_VALUES = (False, )
WORD = b'Microsoft Office Word'
PPT = b'Microsoft Office PowerPoint'
@@ -121,6 +121,33 @@ class TestOleIDBasic(unittest.TestCase):
'msodde/harmless-clean.docx': (False,),
'oleform/oleform-PR314.docm': (False,),
'basic/encrypted.docx': CRYPT,
+ 'oleobj/external_link/sample_with_external_link_to_doc.docx': (False,),
+ 'oleobj/external_link/sample_with_external_link_to_doc.xlsb': (False,),
+ 'oleobj/external_link/sample_with_external_link_to_doc.dotm': (False,),
+ 'oleobj/external_link/sample_with_external_link_to_doc.xlsm': (False,),
+ 'oleobj/external_link/sample_with_external_link_to_doc.pptx': (False,),
+ 'oleobj/external_link/sample_with_external_link_to_doc.dotx': (False,),
+ 'oleobj/external_link/sample_with_external_link_to_doc.docm': (False,),
+ 'oleobj/external_link/sample_with_external_link_to_doc.potm': (False,),
+ 'oleobj/external_link/sample_with_external_link_to_doc.xlsx': (False,),
+ 'oleobj/external_link/sample_with_external_link_to_doc.potx': (False,),
+ 'oleobj/external_link/sample_with_external_link_to_doc.ppsm': (False,),
+ 'oleobj/external_link/sample_with_external_link_to_doc.pptm': (False,),
+ 'oleobj/external_link/sample_with_external_link_to_doc.ppsx': (False,),
+ 'encrypted/autostart-encrypt-standardpassword.xlsm':
+ (True, False, 'unknown', True, False, False, False, False, False, False, 0),
+ 'encrypted/autostart-encrypt-standardpassword.xls':
+ (True, True, EXCEL, True, False, True, True, False, False, False, 0),
+ 'encrypted/dde-test-encrypt-standardpassword.xlsx':
+ (True, False, 'unknown', True, False, False, False, False, False, False, 0),
+ 'encrypted/dde-test-encrypt-standardpassword.xlsm':
+ (True, False, 'unknown', True, False, False, False, False, False, False, 0),
+ 'encrypted/autostart-encrypt-standardpassword.xlsb':
+ (True, False, 'unknown', True, False, False, False, False, False, False, 0),
+ 'encrypted/dde-test-encrypt-standardpassword.xls':
+ (True, True, EXCEL, True, False, False, True, False, False, False, 0),
+ 'encrypted/dde-test-encrypt-standardpassword.xlsb':
+ (True, False, 'unknown', True, False, False, False, False, False, False, 0),
}
indicator_names = []
@@ -148,7 +175,8 @@ class TestOleIDBasic(unittest.TestCase):
OLE_VALUES[name]))
except KeyError:
print('Should add oleid output for {} to {} ({})'
- .format(name, __name__, values[3:]))
+ .format(name, __name__, values))
+
# just in case somebody calls this file as a script
if __name__ == '__main__':
diff --git a/tests/oleobj/test_basic.py b/tests/oleobj/test_basic.py
index 783ae5a..8ad0ef5 100644
--- a/tests/oleobj/test_basic.py
+++ b/tests/oleobj/test_basic.py
@@ -8,7 +8,7 @@ from hashlib import md5
from glob import glob
# Directory with test data, independent of current working directory
-from tests.test_utils import DATA_BASE_DIR
+from tests.test_utils import DATA_BASE_DIR, call_and_capture
from oletools import oleobj
@@ -41,8 +41,10 @@ SAMPLES += tuple(
'ab8c65e4c0fc51739aa66ca5888265b4')
for extn in ('xls', 'xlsx', 'xlsb', 'xlsm', 'xla', 'xlam', 'xlt', 'xltm',
'xltx', 'ppt', 'pptx', 'pptm', 'pps', 'ppsx', 'ppsm', 'pot',
- 'potx', 'potm')
+ 'potx', 'potm', 'ods', 'odp')
)
+SAMPLES += (('embedded-simple-2007.odt', 'simple-text-file.txt',
+ 'bd5c063a5a43f67b3c50dc7b0f1195af'), )
def calc_md5(filename):
@@ -79,10 +81,6 @@ class TestOleObj(unittest.TestCase):
""" fixture start: create temp dir """
self.temp_dir = mkdtemp(prefix='oletools-oleobj-')
self.did_fail = False
- if DEBUG:
- import logging
- logging.basicConfig(level=logging.DEBUG if DEBUG else logging.INFO)
- oleobj.log.setLevel(logging.NOTSET)
def tearDown(self):
""" fixture end: remove temp dir """
@@ -99,7 +97,8 @@ class TestOleObj(unittest.TestCase):
"""
test that oleobj can be called with -i and -v
- this is the way that amavisd calls oleobj, thinking it is ripOLE
+ This is how ripOLE used to be often called (e.g. by amavisd-new);
+ ensure oleobj is a compatible replacement.
"""
self.do_test_md5(['-d', self.temp_dir, '-v', '-i'])
@@ -110,35 +109,52 @@ class TestOleObj(unittest.TestCase):
'embedded-simple-2007.xml',
'embedded-simple-2007-as2003.xml'):
full_name = join(DATA_BASE_DIR, 'oleobj', sample_name)
- ret_val = oleobj.main(args + [full_name, ])
+ output, ret_val = call_and_capture('oleobj', args + [full_name, ],
+ accept_nonzero_exit=True)
if glob(self.temp_dir + 'ole-object-*'):
- self.fail('found embedded data in {0}'.format(sample_name))
- self.assertEqual(ret_val, oleobj.RETURN_NO_DUMP)
+ self.fail('found embedded data in {0}. Output:\n{1}'
+ .format(sample_name, output))
+ self.assertEqual(ret_val, oleobj.RETURN_NO_DUMP,
+ msg='Wrong return value {} for {}. Output:\n{}'
+ .format(ret_val, sample_name, output))
- def do_test_md5(self, args, test_fun=oleobj.main):
+ def do_test_md5(self, args, test_fun=None, only_run_every=1):
""" helper for test_md5 and test_md5_args """
- # name of sample, extension of embedded file, md5 hash of embedded file
data_dir = join(DATA_BASE_DIR, 'oleobj')
- for sample_name, embedded_name, expect_hash in SAMPLES:
- ret_val = test_fun(args + [join(data_dir, sample_name), ])
- self.assertEqual(ret_val, oleobj.RETURN_DID_DUMP)
+
+ # name of sample, extension of embedded file, md5 hash of embedded file
+ for sample_index, (sample_name, embedded_name, expect_hash) \
+ in enumerate(SAMPLES):
+ if sample_index % only_run_every != 0:
+ continue
+ args_with_path = args + [join(data_dir, sample_name), ]
+ if test_fun is None:
+ output, ret_val = call_and_capture('oleobj', args_with_path,
+ accept_nonzero_exit=True)
+ else:
+ ret_val = test_fun(args_with_path)
+ output = '[output: see above]'
+ self.assertEqual(ret_val, oleobj.RETURN_DID_DUMP,
+ msg='Wrong return value {} for {}. Output:\n{}'
+ .format(ret_val, sample_name, output))
expect_name = join(self.temp_dir,
sample_name + '_' + embedded_name)
if not isfile(expect_name):
self.did_fail = True
- self.fail('{0} not created from {1}'.format(expect_name,
- sample_name))
+ self.fail('{0} not created from {1}. Output:\n{2}'
+ .format(expect_name, sample_name, output))
continue
md5_hash = calc_md5(expect_name)
if md5_hash != expect_hash:
self.did_fail = True
- self.fail('Wrong md5 {0} of {1} from {2}'
- .format(md5_hash, expect_name, sample_name))
+ self.fail('Wrong md5 {0} of {1} from {2}. Output:\n{3}'
+ .format(md5_hash, expect_name, sample_name, output))
continue
def test_non_streamed(self):
""" Ensure old oleobj behaviour still works: pre-read whole file """
- return self.do_test_md5(['-d', self.temp_dir], test_fun=preread_file)
+ return self.do_test_md5(['-d', self.temp_dir], test_fun=preread_file,
+ only_run_every=4)
# just in case somebody calls this file as a script
diff --git a/tests/oleobj/test_external_links.py b/tests/oleobj/test_external_links.py
index 9c7e632..2b7fc5b 100644
--- a/tests/oleobj/test_external_links.py
+++ b/tests/oleobj/test_external_links.py
@@ -6,7 +6,7 @@ import os
from os import path
# Directory with test data, independent of current working directory
-from tests.test_utils import DATA_BASE_DIR
+from tests.test_utils import DATA_BASE_DIR, call_and_capture
from oletools import oleobj
BASE_DIR = path.join(DATA_BASE_DIR, 'oleobj', 'external_link')
@@ -22,8 +22,11 @@ class TestExternalLinks(unittest.TestCase):
for filename in filenames:
file_path = path.join(dirpath, filename)
- ret_val = oleobj.main([file_path])
- self.assertEqual(ret_val, oleobj.RETURN_DID_DUMP)
+ output, ret_val = call_and_capture('oleobj', [file_path, ],
+ accept_nonzero_exit=True)
+ self.assertEqual(ret_val, oleobj.RETURN_DID_DUMP,
+ msg='Wrong return value {} for {}. Output:\n{}'
+ .format(ret_val, filename, output))
# just in case somebody calls this file as a script
diff --git a/tests/olevba/test_basic.py b/tests/olevba/test_basic.py
index d319a12..28238fc 100644
--- a/tests/olevba/test_basic.py
+++ b/tests/olevba/test_basic.py
@@ -3,21 +3,71 @@ Test basic functionality of olevba[3]
"""
import unittest
-import sys
-if sys.version_info.major <= 2:
- from oletools import olevba
-else:
- from oletools import olevba3 as olevba
import os
from os.path import join
+import re
# Directory with test data, independent of current working directory
-from tests.test_utils import DATA_BASE_DIR
+from tests.test_utils import DATA_BASE_DIR, call_and_capture
class TestOlevbaBasic(unittest.TestCase):
"""Tests olevba basic functionality"""
+ def test_text_behaviour(self):
+ """Test behaviour of olevba when presented with pure text file."""
+ self.do_test_behaviour('text')
+
+ def test_empty_behaviour(self):
+ """Test behaviour of olevba when presented with pure text file."""
+ self.do_test_behaviour('empty')
+
+ def do_test_behaviour(self, filename):
+ """Helper for test_{text,empty}_behaviour."""
+ input_file = join(DATA_BASE_DIR, 'basic', filename)
+ output, _ = call_and_capture('olevba', args=(input_file, ))
+
+ # check output
+ self.assertTrue(re.search(r'^Type:\s+Text\s*$', output, re.MULTILINE),
+ msg='"Type: Text" not found in output:\n' + output)
+ self.assertTrue(re.search(r'^No suspicious .+ found.$', output,
+ re.MULTILINE),
+ msg='"No suspicous...found" not found in output:\n' + \
+ output)
+ self.assertNotIn('error', output.lower())
+
+ # check warnings
+ for line in output.splitlines():
+ if line.startswith('WARNING ') and 'encrypted' in line:
+ continue # encryption warnings are ok
+ elif 'warn' in line.lower():
+ raise self.fail('Found "warn" in output line: "{}"'
+ .format(line.rstrip()))
+ self.assertIn('not encrypted', output)
+
+ def test_rtf_behaviour(self):
+ """Test behaviour of olevba when presented with an rtf file."""
+ input_file = join(DATA_BASE_DIR, 'msodde', 'RTF-Spec-1.7.rtf')
+ output, ret_code = call_and_capture('olevba', args=(input_file, ),
+ accept_nonzero_exit=True)
+
+ # check that return code is olevba.RETURN_OPEN_ERROR
+ self.assertEqual(ret_code, 5)
+
+ # check output:
+ self.assertIn('FileOpenError', output)
+ self.assertIn('is RTF', output)
+ self.assertIn('rtfobj.py', output)
+ self.assertIn('not encrypted', output)
+
+ # check warnings
+ for line in output.splitlines():
+ if line.startswith('WARNING ') and 'encrypted' in line:
+ continue # encryption warnings are ok
+ elif 'warn' in line.lower():
+ raise self.fail('Found "warn" in output line: "{}"'
+ .format(line.rstrip()))
+
def test_crypt_return(self):
"""
Tests that encrypted files give a certain return code.
@@ -28,15 +78,23 @@ class TestOlevbaBasic(unittest.TestCase):
CRYPT_DIR = join(DATA_BASE_DIR, 'encrypted')
CRYPT_RETURN_CODE = 9
ADD_ARGS = [], ['-d', ], ['-a', ], ['-j', ], ['-t', ]
+ EXCEPTIONS = ['autostart-encrypt-standardpassword.xls', # These ...
+ 'autostart-encrypt-standardpassword.xlsm', # files ...
+ 'autostart-encrypt-standardpassword.xlsb', # are ...
+ 'dde-test-encrypt-standardpassword.xls', # automati...
+ 'dde-test-encrypt-standardpassword.xlsx', # ...cally...
+ 'dde-test-encrypt-standardpassword.xlsm', # decrypted.
+ 'dde-test-encrypt-standardpassword.xlsb']
for filename in os.listdir(CRYPT_DIR):
+ if filename in EXCEPTIONS:
+ continue
full_name = join(CRYPT_DIR, filename)
for args in ADD_ARGS:
- try:
- ret_code = olevba.main(args + [full_name, ])
- except SystemExit as se:
- ret_code = se.code or 0 # se.code can be None
+ _, ret_code = call_and_capture('olevba',
+ args=[full_name, ] + args,
+ accept_nonzero_exit=True)
self.assertEqual(ret_code, CRYPT_RETURN_CODE,
- msg='Wrong return code {} for args {}'
+ msg='Wrong return code {} for args {}'\
.format(ret_code, args + [filename, ]))
diff --git a/tests/olevba/test_crypto.py b/tests/olevba/test_crypto.py
new file mode 100644
index 0000000..aad78df
--- /dev/null
+++ b/tests/olevba/test_crypto.py
@@ -0,0 +1,66 @@
+"""Check decryption of files from olevba works."""
+
+import sys
+import unittest
+from os.path import basename, join as pjoin
+import json
+from collections import OrderedDict
+
+from tests.test_utils import DATA_BASE_DIR, call_and_capture
+
+from oletools import crypto
+
+
+@unittest.skipIf(not crypto.check_msoffcrypto(),
+ 'Module msoffcrypto not installed for {}'
+ .format(basename(sys.executable)))
+class OlevbaCryptoWriteProtectTest(unittest.TestCase):
+ """
+ Test documents that are 'write-protected' through encryption.
+
+ Excel has a way to 'write-protect' documents by encrypting them with a
+ hard-coded standard password. When looking at the file-structure you see
+ an OLE-file with streams `EncryptedPackage`, `StrongEncryptionSpace`, and
+ `EncryptionInfo`. Contained in the first is the actual file. When opening
+ such a file in excel, it is decrypted without the user noticing.
+
+ Olevba should detect such encryption, try to decrypt with the standard
+ password and look for VBA code in the decrypted file.
+
+ All these tests are skipped if the module `msoffcrypto-tools` is not
+ installed.
+ """
+ def test_autostart(self):
+ """Check that autostart macro is found in xls[mb] sample file."""
+ for suffix in 'xlsm', 'xlsb':
+ example_file = pjoin(
+ DATA_BASE_DIR, 'encrypted',
+ 'autostart-encrypt-standardpassword.' + suffix)
+ output, _ = call_and_capture('olevba', args=('-j', example_file),
+ exclude_stderr=True)
+ data = json.loads(output, object_pairs_hook=OrderedDict)
+ # debug: json.dump(data, sys.stdout, indent=4)
+ self.assertEqual(len(data), 4)
+ self.assertIn('script_name', data[0])
+ self.assertIn('version', data[0])
+ self.assertEqual(data[0]['type'], 'MetaInformation')
+ self.assertIn('return_code', data[-1])
+ self.assertEqual(data[-1]['type'], 'MetaInformation')
+ self.assertEqual(data[1]['container'], None)
+ self.assertEqual(data[1]['file'], example_file)
+ self.assertEqual(data[1]['analysis'], None)
+ self.assertEqual(data[1]['macros'], [])
+ self.assertEqual(data[1]['type'], 'OLE')
+ self.assertEqual(data[2]['container'], example_file)
+ self.assertNotEqual(data[2]['file'], example_file)
+ self.assertEqual(data[2]['type'], "OpenXML")
+ analysis = data[2]['analysis']
+ self.assertEqual(analysis[0]['type'], 'AutoExec')
+ self.assertEqual(analysis[0]['keyword'], 'Auto_Open')
+ macros = data[2]['macros']
+ self.assertEqual(macros[0]['vba_filename'], 'Modul1.bas')
+ self.assertIn('Sub Auto_Open()', macros[0]['code'])
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/ooxml/test_basic.py b/tests/ooxml/test_basic.py
index 440d08d..b97c432 100644
--- a/tests/ooxml/test_basic.py
+++ b/tests/ooxml/test_basic.py
@@ -33,6 +33,8 @@ class TestOOXML(unittest.TestCase):
pptx=ooxml.DOCTYPE_POWERPOINT, pptm=ooxml.DOCTYPE_POWERPOINT,
ppsx=ooxml.DOCTYPE_POWERPOINT, ppsm=ooxml.DOCTYPE_POWERPOINT,
potx=ooxml.DOCTYPE_POWERPOINT, potm=ooxml.DOCTYPE_POWERPOINT,
+ ods=ooxml.DOCTYPE_NONE, odt=ooxml.DOCTYPE_NONE,
+ odp=ooxml.DOCTYPE_NONE,
)
# files that are neither OLE nor xml:
diff --git a/tests/ooxml/test_zip_sub_file.py b/tests/ooxml/test_zip_sub_file.py
index ac49fb5..6e6085b 100644
--- a/tests/ooxml/test_zip_sub_file.py
+++ b/tests/ooxml/test_zip_sub_file.py
@@ -144,15 +144,15 @@ class TestZipSubFile(unittest.TestCase):
self.subfile.seek(0, os.SEEK_END)
self.compare.seek(0, os.SEEK_END)
- self.assertEquals(self.compare.read(10), self.subfile.read(10))
- self.assertEquals(self.compare.tell(), self.subfile.tell())
+ self.assertEqual(self.compare.read(10), self.subfile.read(10))
+ self.assertEqual(self.compare.tell(), self.subfile.tell())
self.subfile.seek(0)
self.compare.seek(0)
self.subfile.seek(len(FILE_CONTENTS) - 1)
self.compare.seek(len(FILE_CONTENTS) - 1)
- self.assertEquals(self.compare.read(10), self.subfile.read(10))
- self.assertEquals(self.compare.tell(), self.subfile.tell())
+ self.assertEqual(self.compare.read(10), self.subfile.read(10))
+ self.assertEqual(self.compare.tell(), self.subfile.tell())
def test_error_seek(self):
""" test correct behaviour if seek beyond end (no exception) """
diff --git a/tests/ppt_parser/test_basic.py b/tests/ppt_parser/test_basic.py
index 9772e96..b653257 100644
--- a/tests/ppt_parser/test_basic.py
+++ b/tests/ppt_parser/test_basic.py
@@ -16,7 +16,7 @@ class TestBasic(unittest.TestCase):
def test_is_ppt(self):
""" test ppt_record_parser.is_ppt(filename) """
- exceptions = []
+ exceptions = ['encrypted.ppt', ] # actually is ppt but embedded
for base_dir, _, files in os.walk(DATA_BASE_DIR):
for filename in files:
if filename in exceptions:
diff --git a/tests/test-data/encrypted/autostart-encrypt-standardpassword.xls b/tests/test-data/encrypted/autostart-encrypt-standardpassword.xls
new file mode 100644
index 0000000..65c2ac7
--- /dev/null
+++ b/tests/test-data/encrypted/autostart-encrypt-standardpassword.xls
diff --git a/tests/test-data/encrypted/autostart-encrypt-standardpassword.xlsb b/tests/test-data/encrypted/autostart-encrypt-standardpassword.xlsb
new file mode 100644
index 0000000..b905d7c
--- /dev/null
+++ b/tests/test-data/encrypted/autostart-encrypt-standardpassword.xlsb
diff --git a/tests/test-data/encrypted/autostart-encrypt-standardpassword.xlsm b/tests/test-data/encrypted/autostart-encrypt-standardpassword.xlsm
new file mode 100644
index 0000000..2b2e113
--- /dev/null
+++ b/tests/test-data/encrypted/autostart-encrypt-standardpassword.xlsm
diff --git a/tests/test-data/encrypted/dde-test-encrypt-standardpassword.xls b/tests/test-data/encrypted/dde-test-encrypt-standardpassword.xls
new file mode 100644
index 0000000..c61f12b
--- /dev/null
+++ b/tests/test-data/encrypted/dde-test-encrypt-standardpassword.xls
diff --git a/tests/test-data/encrypted/dde-test-encrypt-standardpassword.xlsb b/tests/test-data/encrypted/dde-test-encrypt-standardpassword.xlsb
new file mode 100644
index 0000000..3518a20
--- /dev/null
+++ b/tests/test-data/encrypted/dde-test-encrypt-standardpassword.xlsb
diff --git a/tests/test-data/encrypted/dde-test-encrypt-standardpassword.xlsm b/tests/test-data/encrypted/dde-test-encrypt-standardpassword.xlsm
new file mode 100644
index 0000000..b9cce05
--- /dev/null
+++ b/tests/test-data/encrypted/dde-test-encrypt-standardpassword.xlsm
diff --git a/tests/test-data/encrypted/dde-test-encrypt-standardpassword.xlsx b/tests/test-data/encrypted/dde-test-encrypt-standardpassword.xlsx
new file mode 100644
index 0000000..c677227
--- /dev/null
+++ b/tests/test-data/encrypted/dde-test-encrypt-standardpassword.xlsx
diff --git a/tests/test-data/oleobj/embedded-simple-2007.odp b/tests/test-data/oleobj/embedded-simple-2007.odp
new file mode 100644
index 0000000..eeb85e8
--- /dev/null
+++ b/tests/test-data/oleobj/embedded-simple-2007.odp
diff --git a/tests/test-data/oleobj/embedded-simple-2007.ods b/tests/test-data/oleobj/embedded-simple-2007.ods
new file mode 100644
index 0000000..e465229
--- /dev/null
+++ b/tests/test-data/oleobj/embedded-simple-2007.ods
diff --git a/tests/test-data/oleobj/embedded-simple-2007.odt b/tests/test-data/oleobj/embedded-simple-2007.odt
new file mode 100644
index 0000000..c73fe59
--- /dev/null
+++ b/tests/test-data/oleobj/embedded-simple-2007.odt
diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py
index c6671c7..16281fe 100644
--- a/tests/test_utils/__init__.py
+++ b/tests/test_utils/__init__.py
@@ -1,4 +1 @@
-from os.path import dirname, join
-
-# Directory with test data, independent of current working directory
-DATA_BASE_DIR = join(dirname(dirname(__file__)), 'test-data')
+from .utils import *
diff --git a/tests/test_utils/utils.py b/tests/test_utils/utils.py
new file mode 100644
index 0000000..45cedc8
--- /dev/null
+++ b/tests/test_utils/utils.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+
+"""Utils generally useful for unittests."""
+
+import sys
+import os
+from os.path import dirname, join, abspath
+from subprocess import check_output, PIPE, STDOUT, CalledProcessError
+
+
+# Base dir of project, contains subdirs "tests" and "oletools" and README.md
+PROJECT_ROOT = dirname(dirname(dirname(abspath(__file__))))
+
+# Directory with test data, independent of current working directory
+DATA_BASE_DIR = join(PROJECT_ROOT, 'tests', 'test-data')
+
+# Directory with source code
+SOURCE_BASE_DIR = join(PROJECT_ROOT, 'oletools')
+
+
+def call_and_capture(module, args=None, accept_nonzero_exit=False,
+ exclude_stderr=False):
+ """
+ Run module as script, capturing and returning output and return code.
+
+ This is the best way to capture a module's stdout and stderr; trying to
+ modify sys.stdout/sys.stderr to StringIO-Buffers frequently causes trouble.
+
+ Only drawback sofar: stdout and stderr are merged into one (which is
+ what users see on their shell as well). When testing for json-compatible
+ output you should `exclude_stderr` to `False` since logging ignores stderr,
+ so unforseen warnings (e.g. issued by pypy) would mess up your json.
+
+ :param str module: name of module to test, e.g. `olevba`
+ :param args: arguments for module's main function
+ :param bool fail_nonzero: Raise error if command returns non-0 return code
+ :param bool exclude_stderr: Exclude output to `sys.stderr` from output
+ (e.g. if parsing output through json)
+ :returns: ret_code, output
+ :rtype: int, str
+ """
+ # create a PYTHONPATH environment var to prefer our current code
+ env = os.environ.copy()
+ try:
+ env['PYTHONPATH'] = SOURCE_BASE_DIR + os.pathsep + \
+ os.environ['PYTHONPATH']
+ except KeyError:
+ env['PYTHONPATH'] = SOURCE_BASE_DIR
+
+ # hack: in python2 output encoding (sys.stdout.encoding) was None
+ # although sys.getdefaultencoding() and sys.getfilesystemencoding were ok
+ # TODO: maybe can remove this once branch
+ # "encoding-for-non-unicode-environments" is merged
+ if 'PYTHONIOENCODING' not in env:
+ env['PYTHONIOENCODING'] = 'utf8'
+
+ # ensure args is a tuple
+ my_args = tuple(args) if args else ()
+
+ ret_code = -1
+ try:
+ output = check_output((sys.executable, '-m', module) + my_args,
+ universal_newlines=True, env=env,
+ stderr=PIPE if exclude_stderr else STDOUT)
+ ret_code = 0
+
+ except CalledProcessError as err:
+ if accept_nonzero_exit:
+ ret_code = err.returncode
+ output = err.output
+ else:
+ print(err.output)
+ raise
+
+ return output, ret_code