Commit 6b58e3302cdee48a75b74b065cbb9138f51dc167

Authored by Philippe Lagadec
Committed by GitHub
2 parents 78b2d459 3b7a4ea8

Merge pull request #796 from christian-intra2net/unittest-automation

Run unittests automatically on github
.github/workflows/unittests.yml 0 → 100644
  1 +name: Python package
  2 +
  3 +on:
  4 + push:
  5 + branches: [master]
  6 + pull_request:
  7 + branches: [master]
  8 +
  9 +jobs:
  10 + check:
  11 + runs-on: ${{ matrix.os }}
  12 + strategy:
  13 + matrix:
  14 + os: ["ubuntu-latest", "windows-latest", "macos-latest"]
  15 + python-version: ["3.x", "pypy-3.9"]
  16 + include:
  17 + - python-version: 3.x
  18 + runlint: 1
  19 +
  20 + steps:
  21 + - uses: actions/checkout@v3
  22 + - name: Set up Python ${{ matrix.python-version }}
  23 + uses: actions/setup-python@v3
  24 + with:
  25 + python-version: ${{ matrix.python-version }}
  26 + - name: Install dependencies
  27 + run: |
  28 + python -c "import sys; import platform; print(sys.version); print(sys.platform); print(platform.python_implementation()); print(platform.system())"
  29 + cat requirements.txt
  30 + python -m pip install --upgrade pip
  31 + pip install -r requirements.txt
  32 + pip install pylint
  33 + - name: Run pylint
  34 + if: ${{ matrix.runlint }}
  35 + run: pylint -E --ignore=thirdparty oletools tests
  36 + - name: Run unittests
  37 + run: python -m unittest discover -f
0 38 \ No newline at end of file
... ...
oletools/common/io_encoding.py
... ... @@ -58,7 +58,7 @@ PY3 = sys.version_info.major >= 3
58 58 if PY3:
59 59 from builtins import open as builtin_open
60 60 else:
61   - from __builtin__ import open as builtin_open
  61 + from __builtin__ import open as builtin_open # pylint: disable=import-error
62 62  
63 63 # -- CONSTANTS ----------------------------------------------------------------
64 64 #: encoding to use for redirection if no good encoding can be found
... ...
oletools/common/log_helper/_json_formatter.py
... ... @@ -5,12 +5,17 @@ import json
5 5 class JsonFormatter(logging.Formatter):
6 6 """
7 7 Format every message to be logged as a JSON object
  8 +
  9 + Uses the standard :py:class:`logging.Formatter` with standard arguments
  10 + to do the actual formatting, could save and use a user-supplied formatter
  11 + instead.
8 12 """
9 13 _is_first_line = True
10 14  
11 15 def __init__(self, other_logger_has_first_line=False):
12 16 if other_logger_has_first_line:
13 17 self._is_first_line = False
  18 + self.msg_formatter = logging.Formatter() # could adjust this
14 19  
15 20 def format(self, record):
16 21 """
... ... @@ -18,12 +23,34 @@ class JsonFormatter(logging.Formatter):
18 23 the output JSON-compatible. The only exception is when printing the first line,
19 24 so we need to keep track of it.
20 25  
21   - We assume that all input comes from the OletoolsLoggerAdapter which
22   - ensures that there is a `type` field in the record. Otherwise will have
23   - to add a try-except around the access to `record.type`.
  26 + The actual conversion from :py:class:`logging.LogRecord` to a text message
  27 + (i.e. %-formatting, adding exception information, etc.) is delegated to the
  28 + standard :py:class:`logging.Formatter.
  29 +
  30 + The dumped json structure contains fields `msg` with the formatted message,
  31 + `level` with the log-level of the message and `type`, which is created by
  32 + :py:class:`oletools.common.log_helper.OletoolsLoggerAdapter` or added here
  33 + (for input from e.g. captured warnings, third-party libraries)
24 34 """
25   - json_dict = dict(msg=record.msg.replace('\n', ' '), level=record.levelname)
26   - json_dict['type'] = record.type
  35 + json_dict = dict(msg='', level='', type='')
  36 + try:
  37 + msg = self.msg_formatter.format(record)
  38 + json_dict['msg'] = msg.replace('\n', ' ')
  39 + json_dict['level'] = record.levelname
  40 + json_dict['type'] = record.type
  41 + except AttributeError: # most probably: record has no "type" field
  42 + if record.name == 'py.warnings': # this is from python's warning-capture logger
  43 + json_dict['type'] = 'warning'
  44 + else:
  45 + json_dict['type'] = 'msg' # message of unknown origin
  46 + except Exception as exc:
  47 + try:
  48 + json_dict['msg'] = "Ignore {0} when formatting '{1}': {2}".format(type(exc), record.msg, exc)
  49 + except Exception as exc2:
  50 + json_dict['msg'] = 'Caught {0} in logging'.format(str(exc2))
  51 + json_dict['type'] = 'log-warning'
  52 + json_dict['level'] = 'warning'
  53 +
27 54 formatted_message = ' ' + json.dumps(json_dict)
28 55  
29 56 if self._is_first_line:
... ...
oletools/common/log_helper/_logger_adapter.py
... ... @@ -7,6 +7,7 @@ class OletoolsLoggerAdapter(logging.LoggerAdapter):
7 7 Adapter class for all loggers returned by the logging module.
8 8 """
9 9 _json_enabled = None
  10 + _is_warn_logger = False # this is always False
10 11  
11 12 def print_str(self, message, **kwargs):
12 13 """
... ... @@ -44,7 +45,10 @@ class OletoolsLoggerAdapter(logging.LoggerAdapter):
44 45 kwargs['extra']['type'] = kwargs['type']
45 46 del kwargs['type'] # downstream loggers cannot deal with this
46 47 if 'type' not in kwargs['extra']:
47   - kwargs['extra']['type'] = 'msg' # type will be added to LogRecord
  48 + if self._is_warn_logger:
  49 + kwargs['extra']['type'] = 'warning' # this will add field
  50 + else:
  51 + kwargs['extra']['type'] = 'msg' # 'type' to LogRecord
48 52 return msg, kwargs
49 53  
50 54 def set_json_enabled_function(self, json_enabled):
... ... @@ -53,6 +57,12 @@ class OletoolsLoggerAdapter(logging.LoggerAdapter):
53 57 """
54 58 self._json_enabled = json_enabled
55 59  
  60 + def set_warnings_logger(self):
  61 + """Make this the logger for warnings"""
  62 + # create a object attribute that shadows the class attribute which is
  63 + # always False
  64 + self._is_warn_logger = True
  65 +
56 66 def level(self):
57 67 """Return current level of logger."""
58 68 return self.logger.level
... ...
oletools/common/log_helper/log_helper.py
... ... @@ -152,6 +152,11 @@ class LogHelper:
152 152 self._use_json = use_json
153 153 sys.excepthook = self._get_except_hook(sys.excepthook)
154 154  
  155 + # make sure warnings do not mess up our output
  156 + logging.captureWarnings(True)
  157 + warn_logger = self.get_or_create_silent_logger('py.warnings')
  158 + warn_logger.set_warnings_logger()
  159 +
155 160 # since there could be loggers already created we go through all of them
156 161 # and set their levels to 0 so they will use the root logger's level
157 162 for name in self._all_names:
... ... @@ -174,6 +179,7 @@ class LogHelper:
174 179  
175 180 # end logging
176 181 self._all_names = set()
  182 + logging.captureWarnings(False)
177 183 logging.shutdown()
178 184  
179 185 # end json list
... ...
oletools/doc/Contribute.md
... ... @@ -13,6 +13,8 @@ to **send feedback**.
13 13  
14 14 The code is available in [a repository on GitHub](https://github.com/decalage2/oletools).
15 15 You may use it to **submit enhancements** using forks and pull requests.
  16 +When submitting a PR, GitHub will automatically check that unittests pass and
  17 +`pylint -E` does not report anything for the code files you changed.
16 18  
17 19 --------------------------------------------------------------------------
18 20  
... ...
oletools/mraptor_milter.py
... ... @@ -69,7 +69,7 @@ __version__ = '0.51'
69 69  
70 70 # --- IMPORTS ----------------------------------------------------------------
71 71  
72   -import Milter
  72 +import Milter # not part of requirements, therefore: # pylint: disable=import-error
73 73 import io
74 74 import time
75 75 import email
... ... @@ -78,7 +78,7 @@ import os
78 78 import logging
79 79 import logging.handlers
80 80 import datetime
81   -import StringIO
  81 +import StringIO # not part of requirements, therefore: # pylint: disable=import-error
82 82  
83 83 from socket import AF_INET6
84 84  
... ... @@ -96,7 +96,7 @@ if not _parent_dir in sys.path:
96 96  
97 97 from oletools import olevba, mraptor
98 98  
99   -from Milter.utils import parse_addr
  99 +from Milter.utils import parse_addr # not part of requirements, therefore: # pylint: disable=import-error
100 100  
101 101 from zipfile import is_zipfile
102 102  
... ... @@ -389,7 +389,7 @@ if __name__ == "__main__":
389 389  
390 390 # Using daemonize:
391 391 # See http://daemonize.readthedocs.io/en/latest/
392   - from daemonize import Daemonize
  392 + from daemonize import Daemonize # not part of requirements, therefore: # pylint: disable=import-error
393 393 daemon = Daemonize(app="mraptor_milter", pid=PIDFILE, action=main)
394 394 daemon.start()
395 395  
... ...
oletools/msodde.py
... ... @@ -149,69 +149,69 @@ LOCATIONS = ('word/document.xml', 'word/endnotes.xml', 'word/footnotes.xml',
149 149 # switches_with_args, switches_without_args, format_switches)
150 150 FIELD_BLACKLIST = (
151 151 # date and time:
152   - ('CREATEDATE', 0, 0, '', 'hs', 'datetime'), # pylint: disable=bad-whitespace
153   - ('DATE', 0, 0, '', 'hls', 'datetime'), # pylint: disable=bad-whitespace
154   - ('EDITTIME', 0, 0, '', '', 'numeric'), # pylint: disable=bad-whitespace
155   - ('PRINTDATE', 0, 0, '', 'hs', 'datetime'), # pylint: disable=bad-whitespace
156   - ('SAVEDATE', 0, 0, '', 'hs', 'datetime'), # pylint: disable=bad-whitespace
157   - ('TIME', 0, 0, '', '', 'datetime'), # pylint: disable=bad-whitespace
  152 + ('CREATEDATE', 0, 0, '', 'hs', 'datetime'),
  153 + ('DATE', 0, 0, '', 'hls', 'datetime'),
  154 + ('EDITTIME', 0, 0, '', '', 'numeric'),
  155 + ('PRINTDATE', 0, 0, '', 'hs', 'datetime'),
  156 + ('SAVEDATE', 0, 0, '', 'hs', 'datetime'),
  157 + ('TIME', 0, 0, '', '', 'datetime'),
158 158 # exclude document automation (we hate the "auto" in "automation")
159 159 # (COMPARE, DOCVARIABLE, GOTOBUTTON, IF, MACROBUTTON, PRINT)
160 160 # document information
161   - ('AUTHOR', 0, 1, '', '', 'string'), # pylint: disable=bad-whitespace
162   - ('COMMENTS', 0, 1, '', '', 'string'), # pylint: disable=bad-whitespace
163   - ('DOCPROPERTY', 1, 0, '', '', 'string/numeric/datetime'), # pylint: disable=bad-whitespace
164   - ('FILENAME', 0, 0, '', 'p', 'string'), # pylint: disable=bad-whitespace
165   - ('FILESIZE', 0, 0, '', 'km', 'numeric'), # pylint: disable=bad-whitespace
166   - ('KEYWORDS', 0, 1, '', '', 'string'), # pylint: disable=bad-whitespace
167   - ('LASTSAVEDBY', 0, 0, '', '', 'string'), # pylint: disable=bad-whitespace
168   - ('NUMCHARS', 0, 0, '', '', 'numeric'), # pylint: disable=bad-whitespace
169   - ('NUMPAGES', 0, 0, '', '', 'numeric'), # pylint: disable=bad-whitespace
170   - ('NUMWORDS', 0, 0, '', '', 'numeric'), # pylint: disable=bad-whitespace
171   - ('SUBJECT', 0, 1, '', '', 'string'), # pylint: disable=bad-whitespace
172   - ('TEMPLATE', 0, 0, '', 'p', 'string'), # pylint: disable=bad-whitespace
173   - ('TITLE', 0, 1, '', '', 'string'), # pylint: disable=bad-whitespace
  161 + ('AUTHOR', 0, 1, '', '', 'string'),
  162 + ('COMMENTS', 0, 1, '', '', 'string'),
  163 + ('DOCPROPERTY', 1, 0, '', '', 'string/numeric/datetime'),
  164 + ('FILENAME', 0, 0, '', 'p', 'string'),
  165 + ('FILESIZE', 0, 0, '', 'km', 'numeric'),
  166 + ('KEYWORDS', 0, 1, '', '', 'string'),
  167 + ('LASTSAVEDBY', 0, 0, '', '', 'string'),
  168 + ('NUMCHARS', 0, 0, '', '', 'numeric'),
  169 + ('NUMPAGES', 0, 0, '', '', 'numeric'),
  170 + ('NUMWORDS', 0, 0, '', '', 'numeric'),
  171 + ('SUBJECT', 0, 1, '', '', 'string'),
  172 + ('TEMPLATE', 0, 0, '', 'p', 'string'),
  173 + ('TITLE', 0, 1, '', '', 'string'),
174 174 # equations and formulas
175 175 # exlude '=' formulae because they have different syntax (and can be bad)
176   - ('ADVANCE', 0, 0, 'dlruxy', '', ''), # pylint: disable=bad-whitespace
177   - ('SYMBOL', 1, 0, 'fs', 'ahju', ''), # pylint: disable=bad-whitespace
  176 + ('ADVANCE', 0, 0, 'dlruxy', '', ''),
  177 + ('SYMBOL', 1, 0, 'fs', 'ahju', ''),
178 178 # form fields
179   - ('FORMCHECKBOX', 0, 0, '', '', ''), # pylint: disable=bad-whitespace
180   - ('FORMDROPDOWN', 0, 0, '', '', ''), # pylint: disable=bad-whitespace
181   - ('FORMTEXT', 0, 0, '', '', ''), # pylint: disable=bad-whitespace
  179 + ('FORMCHECKBOX', 0, 0, '', '', ''),
  180 + ('FORMDROPDOWN', 0, 0, '', '', ''),
  181 + ('FORMTEXT', 0, 0, '', '', ''),
182 182 # index and tables
183   - ('INDEX', 0, 0, 'bcdefghklpsz', 'ry', ''), # pylint: disable=bad-whitespace
  183 + ('INDEX', 0, 0, 'bcdefghklpsz', 'ry', ''),
184 184 # exlude RD since that imports data from other files
185   - ('TA', 0, 0, 'clrs', 'bi', ''), # pylint: disable=bad-whitespace
186   - ('TC', 1, 0, 'fl', 'n', ''), # pylint: disable=bad-whitespace
187   - ('TOA', 0, 0, 'bcdegls', 'fhp', ''), # pylint: disable=bad-whitespace
188   - ('TOC', 0, 0, 'abcdflnopst', 'huwxz', ''), # pylint: disable=bad-whitespace
189   - ('XE', 1, 0, 'frty', 'bi', ''), # pylint: disable=bad-whitespace
  185 + ('TA', 0, 0, 'clrs', 'bi', ''),
  186 + ('TC', 1, 0, 'fl', 'n', ''),
  187 + ('TOA', 0, 0, 'bcdegls', 'fhp', ''),
  188 + ('TOC', 0, 0, 'abcdflnopst', 'huwxz', ''),
  189 + ('XE', 1, 0, 'frty', 'bi', ''),
190 190 # links and references
191 191 # exclude AUTOTEXT and AUTOTEXTLIST since we do not like stuff with 'AUTO'
192   - ('BIBLIOGRAPHY', 0, 0, 'lfm', '', ''), # pylint: disable=bad-whitespace
193   - ('CITATION', 1, 0, 'lfspvm', 'nty', ''), # pylint: disable=bad-whitespace
  192 + ('BIBLIOGRAPHY', 0, 0, 'lfm', '', ''),
  193 + ('CITATION', 1, 0, 'lfspvm', 'nty', ''),
194 194 # exclude HYPERLINK since we are allergic to URLs
195 195 # exclude INCLUDEPICTURE and INCLUDETEXT (other file or maybe even URL?)
196 196 # exclude LINK and REF (could reference other files)
197   - ('NOTEREF', 1, 0, '', 'fhp', ''), # pylint: disable=bad-whitespace
198   - ('PAGEREF', 1, 0, '', 'hp', ''), # pylint: disable=bad-whitespace
199   - ('QUOTE', 1, 0, '', '', 'datetime'), # pylint: disable=bad-whitespace
200   - ('STYLEREF', 1, 0, '', 'lnprtw', ''), # pylint: disable=bad-whitespace
  197 + ('NOTEREF', 1, 0, '', 'fhp', ''),
  198 + ('PAGEREF', 1, 0, '', 'hp', ''),
  199 + ('QUOTE', 1, 0, '', '', 'datetime'),
  200 + ('STYLEREF', 1, 0, '', 'lnprtw', ''),
201 201 # exclude all Mail Merge commands since they import data from other files
202 202 # (ADDRESSBLOCK, ASK, COMPARE, DATABASE, FILLIN, GREETINGLINE, IF,
203 203 # MERGEFIELD, MERGEREC, MERGESEQ, NEXT, NEXTIF, SET, SKIPIF)
204 204 # Numbering
205   - ('LISTNUM', 0, 1, 'ls', '', ''), # pylint: disable=bad-whitespace
206   - ('PAGE', 0, 0, '', '', 'numeric'), # pylint: disable=bad-whitespace
207   - ('REVNUM', 0, 0, '', '', ''), # pylint: disable=bad-whitespace
208   - ('SECTION', 0, 0, '', '', 'numeric'), # pylint: disable=bad-whitespace
209   - ('SECTIONPAGES', 0, 0, '', '', 'numeric'), # pylint: disable=bad-whitespace
210   - ('SEQ', 1, 1, 'rs', 'chn', 'numeric'), # pylint: disable=bad-whitespace
211   - # user information # pylint: disable=bad-whitespace
212   - ('USERADDRESS', 0, 1, '', '', 'string'), # pylint: disable=bad-whitespace
213   - ('USERINITIALS', 0, 1, '', '', 'string'), # pylint: disable=bad-whitespace
214   - ('USERNAME', 0, 1, '', '', 'string'), # pylint: disable=bad-whitespace
  205 + ('LISTNUM', 0, 1, 'ls', '', ''),
  206 + ('PAGE', 0, 0, '', '', 'numeric'),
  207 + ('REVNUM', 0, 0, '', '', ''),
  208 + ('SECTION', 0, 0, '', '', 'numeric'),
  209 + ('SECTIONPAGES', 0, 0, '', '', 'numeric'),
  210 + ('SEQ', 1, 1, 'rs', 'chn', 'numeric'),
  211 + # user information
  212 + ('USERADDRESS', 0, 1, '', '', 'string'),
  213 + ('USERINITIALS', 0, 1, '', '', 'string'),
  214 + ('USERNAME', 0, 1, '', '', 'string'),
215 215 )
216 216  
217 217 FIELD_DDE_REGEX = re.compile(r'^\s*dde(auto)?\s+', re.I)
... ...
oletools/oleobj.py
... ... @@ -180,7 +180,7 @@ if sys.version_info[0] <= 2:
180 180 NULL_CHAR = '\x00'
181 181 else:
182 182 # Python 3.x
183   - NULL_CHAR = 0 # pylint: disable=redefined-variable-type
  183 + NULL_CHAR = 0
184 184 xrange = range # pylint: disable=redefined-builtin, invalid-name
185 185  
186 186 OOXML_RELATIONSHIP_TAG = '{http://schemas.openxmlformats.org/package/2006/relationships}Relationship'
... ...
oletools/olevba.py
... ... @@ -3104,7 +3104,7 @@ class VBA_Parser(object):
3104 3104 log.info('Check whether OLE file is PPT')
3105 3105 try:
3106 3106 ppt = ppt_parser.PptParser(self.ole_file, fast_fail=True)
3107   - for vba_data in ppt.iter_vba_data():
  3107 + for vba_data in ppt.iter_vba_data(): # pylint: disable=no-value-for-parameter
3108 3108 self.append_subfile(None, vba_data, container='PptParser')
3109 3109 log.info('File is PPT')
3110 3110 self.ole_file.close() # just in case
... ...
oletools/ooxml.py
... ... @@ -160,7 +160,7 @@ def debug_str(elem):
160 160 def isstr(some_var):
161 161 """ version-independent test for isinstance(some_var, (str, unicode)) """
162 162 if sys.version_info.major == 2:
163   - return isinstance(some_var, basestring) # true for str and unicode
  163 + return isinstance(some_var, basestring) # true for str and unicode # pylint: disable=undefined-variable
164 164 return isinstance(some_var, str) # there is no unicode
165 165  
166 166  
... ...
oletools/ppt_parser.py
... ... @@ -1377,7 +1377,7 @@ class PptParser(object):
1377 1377 # first identified in step 3 of Part 1, that is, the UserEditAtom
1378 1378 # record closest to the end of the stream.
1379 1379 if self.persist_object_directory is None:
1380   - self.parse_persist_object_directory()
  1380 + self.parse_persist_object_directory() # pylint: disable=no-value-for-parameter
1381 1381  
1382 1382 # Step 2: Lookup the value of the docPersistIdRef field in the persist
1383 1383 # object directory constructed in step 8 of Part 1 to find the stream
... ... @@ -1462,7 +1462,7 @@ class PptParser(object):
1462 1462 rec_len=VBAInfoAtom.RECORD_LENGTH)
1463 1463  
1464 1464 # try parse
1465   - for idx in self.search_pattern(pattern):
  1465 + for idx in self.search_pattern(pattern): # pylint: disable=no-value-for-parameter
1466 1466 # assume that in stream at idx there is a VBAInfoContainer
1467 1467 stream.seek(idx)
1468 1468 log.debug('extracting at idx {0}'.format(idx))
... ... @@ -1515,7 +1515,7 @@ class PptParser(object):
1515 1515 pattern = obj_type.generate_pattern()
1516 1516  
1517 1517 # try parse
1518   - for idx in self.search_pattern(pattern):
  1518 + for idx in self.search_pattern(pattern): # pylint: disable=no-value-for-parameter
1519 1519 # assume a ExternalObjectStorage in stream at idx
1520 1520 stream.seek(idx)
1521 1521 log.debug('extracting at idx {0}'.format(idx))
... ... @@ -1589,7 +1589,7 @@ class PptParser(object):
1589 1589  
1590 1590 n_infos = 0
1591 1591 n_macros = 0
1592   - for info in self.search_vba_info():
  1592 + for info in self.search_vba_info(stream):
1593 1593 n_infos += 1
1594 1594 if info.vba_info_atom.f_has_macros > 0:
1595 1595 n_macros += 1
... ... @@ -1597,13 +1597,13 @@ class PptParser(object):
1597 1597 # --> no vba-info, so all storages probably ActiveX or other OLE
1598 1598 n_storages = 0
1599 1599 n_compressed = 0
1600   - for storage in self.search_vba_storage():
  1600 + for storage in self.search_vba_storage(): # pylint: disable=no-value-for-parameter
1601 1601 n_storages += 1
1602 1602 if storage.is_compressed:
1603 1603 n_compressed += 1
1604   - yield self.decompress_vba_storage(storage)
  1604 + yield self.decompress_vba_storage(storage) # pylint: disable=no-value-for-parameter
1605 1605 else:
1606   - yield self.read_vba_storage_data(storage)
  1606 + yield self.read_vba_storage_data(storage) # pylint: disable=no-value-for-parameter
1607 1607  
1608 1608 log.info('found {0} infos ({1} with macros) and {2} storages '
1609 1609 '({3} compressed)'
... ...
oletools/rtfobj.py
... ... @@ -337,7 +337,7 @@ if sys.version_info[0] <= 2:
337 337 BACKSLASH = '\\'
338 338 BRACE_OPEN = '{'
339 339 BRACE_CLOSE = '}'
340   - UNICODE_TYPE = unicode
  340 + UNICODE_TYPE = unicode # pylint: disable=undefined-variable
341 341 else:
342 342 # Python 3.x - Integers
343 343 BACKSLASH = ord('\\')
... ...
oletools/xls_parser.py
... ... @@ -229,46 +229,46 @@ class XlsbStream(record_base.OleRecordStream):
229 229  
230 230 # records that appear often but do not need their own XlsRecord subclass (yet)
231 231 FREQUENT_RECORDS = dict([
232   - ( 156, 'BuiltInFnGroupCount'), # pylint: disable=bad-whitespace
233   - (2147, 'BookExt'), # pylint: disable=bad-whitespace
234   - ( 442, 'CodeName'), # pylint: disable=bad-whitespace
235   - ( 66, 'CodePage'), # pylint: disable=bad-whitespace
236   - (4195, 'Dat'), # pylint: disable=bad-whitespace
237   - (2154, 'DataLabExt'), # pylint: disable=bad-whitespace
238   - (2155, 'DataLabExtContents'), # pylint: disable=bad-whitespace
239   - ( 215, 'DBCell'), # pylint: disable=bad-whitespace
240   - ( 220, 'DbOrParmQry'), # pylint: disable=bad-whitespace
241   - (2051, 'DBQueryExt'), # pylint: disable=bad-whitespace
242   - (2166, 'DConn'), # pylint: disable=bad-whitespace
243   - ( 35, 'ExternName'), # pylint: disable=bad-whitespace
244   - ( 23, 'ExternSheet'), # pylint: disable=bad-whitespace
245   - ( 255, 'ExtSST'), # pylint: disable=bad-whitespace
246   - (2052, 'ExtString'), # pylint: disable=bad-whitespace
247   - (2151, 'FeatHdr'), # pylint: disable=bad-whitespace
248   - ( 91, 'FileSharing'), # pylint: disable=bad-whitespace
249   - (1054, 'Format'), # pylint: disable=bad-whitespace
250   - ( 49, 'Font'), # pylint: disable=bad-whitespace
251   - (2199, 'GUIDTypeLib'), # pylint: disable=bad-whitespace
252   - ( 440, 'HLink'), # pylint: disable=bad-whitespace
253   - ( 225, 'InterfaceHdr'), # pylint: disable=bad-whitespace
254   - ( 226, 'InterfaceEnd'), # pylint: disable=bad-whitespace
255   - ( 523, 'Index'), # pylint: disable=bad-whitespace
256   - ( 24, 'Lbl'), # pylint: disable=bad-whitespace
257   - ( 193, 'Mms'), # pylint: disable=bad-whitespace
258   - ( 93, 'Obj'), # pylint: disable=bad-whitespace
259   - (4135, 'ObjectLink'), # pylint: disable=bad-whitespace
260   - (2058, 'OleDbConn'), # pylint: disable=bad-whitespace
261   - ( 222, 'OleObjectSize'), # pylint: disable=bad-whitespace
262   - (2214, 'RichTextStream'), # pylint: disable=bad-whitespace
263   - (2146, 'SheetExt'), # pylint: disable=bad-whitespace
264   - (1212, 'ShrFmla'), # pylint: disable=bad-whitespace
265   - (2060, 'SxViewExt'), # pylint: disable=bad-whitespace
266   - (2136, 'SxViewLink'), # pylint: disable=bad-whitespace
267   - (2049, 'WebPub'), # pylint: disable=bad-whitespace
268   - ( 224, 'XF (formatting)'), # pylint: disable=bad-whitespace
269   - (2173, 'XFExt (formatting)'), # pylint: disable=bad-whitespace
270   - ( 659, 'Style'), # pylint: disable=bad-whitespace
271   - (2194, 'StyleExt') # pylint: disable=bad-whitespace
  232 + ( 156, 'BuiltInFnGroupCount'),
  233 + (2147, 'BookExt'),
  234 + ( 442, 'CodeName'),
  235 + ( 66, 'CodePage'),
  236 + (4195, 'Dat'),
  237 + (2154, 'DataLabExt'),
  238 + (2155, 'DataLabExtContents'),
  239 + ( 215, 'DBCell'),
  240 + ( 220, 'DbOrParmQry'),
  241 + (2051, 'DBQueryExt'),
  242 + (2166, 'DConn'),
  243 + ( 35, 'ExternName'),
  244 + ( 23, 'ExternSheet'),
  245 + ( 255, 'ExtSST'),
  246 + (2052, 'ExtString'),
  247 + (2151, 'FeatHdr'),
  248 + ( 91, 'FileSharing'),
  249 + (1054, 'Format'),
  250 + ( 49, 'Font'),
  251 + (2199, 'GUIDTypeLib'),
  252 + ( 440, 'HLink'),
  253 + ( 225, 'InterfaceHdr'),
  254 + ( 226, 'InterfaceEnd'),
  255 + ( 523, 'Index'),
  256 + ( 24, 'Lbl'),
  257 + ( 193, 'Mms'),
  258 + ( 93, 'Obj'),
  259 + (4135, 'ObjectLink'),
  260 + (2058, 'OleDbConn'),
  261 + ( 222, 'OleObjectSize'),
  262 + (2214, 'RichTextStream'),
  263 + (2146, 'SheetExt'),
  264 + (1212, 'ShrFmla'),
  265 + (2060, 'SxViewExt'),
  266 + (2136, 'SxViewLink'),
  267 + (2049, 'WebPub'),
  268 + ( 224, 'XF (formatting)'),
  269 + (2173, 'XFExt (formatting)'),
  270 + ( 659, 'Style'),
  271 + (2194, 'StyleExt')
272 272 ])
273 273  
274 274 #: records found in xlsb binary parts
... ...
tests/common/log_helper/log_helper_test_imported.py
... ... @@ -4,6 +4,7 @@ by the main test file
4 4 """
5 5  
6 6 from oletools.common.log_helper import log_helper
  7 +import warnings
7 8  
8 9 DEBUG_MESSAGE = 'imported: debug log'
9 10 INFO_MESSAGE = 'imported: info log'
... ... @@ -11,7 +12,10 @@ WARNING_MESSAGE = 'imported: warning log'
11 12 ERROR_MESSAGE = 'imported: error log'
12 13 CRITICAL_MESSAGE = 'imported: critical log'
13 14 RESULT_MESSAGE = 'imported: result log'
  15 +
14 16 RESULT_TYPE = 'imported: result'
  17 +ACTUAL_WARNING = 'Feature XYZ provided by this module might be deprecated at '\
  18 + 'some point in the future ... or not'
15 19  
16 20 logger = log_helper.get_or_create_silent_logger('test_imported')
17 21  
... ... @@ -27,3 +31,7 @@ def log():
27 31 logger.error(ERROR_MESSAGE)
28 32 logger.critical(CRITICAL_MESSAGE)
29 33 logger.info(RESULT_MESSAGE, type=RESULT_TYPE)
  34 +
  35 +
  36 +def warn():
  37 + warnings.warn(ACTUAL_WARNING)
... ...
tests/common/log_helper/log_helper_test_main.py
... ... @@ -2,6 +2,7 @@
2 2  
3 3 import sys
4 4 import logging
  5 +import warnings
5 6 from tests.common.log_helper import log_helper_test_imported
6 7 from oletools.common.log_helper import log_helper
7 8  
... ... @@ -11,7 +12,9 @@ WARNING_MESSAGE = 'main: warning log'
11 12 ERROR_MESSAGE = 'main: error log'
12 13 CRITICAL_MESSAGE = 'main: critical log'
13 14 RESULT_MESSAGE = 'main: result log'
  15 +
14 16 RESULT_TYPE = 'main: result'
  17 +ACTUAL_WARNING = 'Warnings can pop up anywhere, have to be prepared!'
15 18  
16 19 logger = log_helper.get_or_create_silent_logger('test_main')
17 20  
... ... @@ -24,7 +27,8 @@ def enable_logging():
24 27  
25 28 def main(args):
26 29 """
27   - Try to cover possible logging scenarios. For each scenario covered, here's the expected args and outcome:
  30 + Try to cover possible logging scenarios. For each scenario covered, here's
  31 + the expected args and outcome:
28 32 - Log without enabling: ['<level>']
29 33 * logging when being imported - should never print
30 34 - Log as JSON without enabling: ['as-json', '<level>']
... ... @@ -35,6 +39,8 @@ def main(args):
35 39 * logging as JSON when being run as script - should log messages as JSON
36 40 - Enable, log as JSON and throw: ['enable', 'as-json', 'throw', '<level>']
37 41 * should produce JSON-compatible output, even after an unhandled exception
  42 + - Enable, log as JSON and warn: ['enable', 'as-json', 'warn', '<level>']
  43 + * should produce JSON-compatible output, even after a warning
38 44 """
39 45  
40 46 # the level should always be the last argument passed
... ... @@ -42,6 +48,9 @@ def main(args):
42 48 use_json = 'as-json' in args
43 49 throw = 'throw' in args
44 50 percent_autoformat = '%-autoformat' in args
  51 + warn = 'warn' in args
  52 + exc_info = 'exc-info' in args
  53 + wrong_log_args = 'wrong-log-args' in args
45 54  
46 55 log_helper_test_imported.logger.setLevel(logging.ERROR)
47 56  
... ... @@ -53,6 +62,22 @@ def main(args):
53 62 if throw:
54 63 raise Exception('An exception occurred before ending the logging')
55 64  
  65 + if warn:
  66 + warnings.warn(ACTUAL_WARNING)
  67 + log_helper_test_imported.warn()
  68 +
  69 + if exc_info:
  70 + try:
  71 + raise Exception('This is an exception')
  72 + except Exception:
  73 + logger.exception('Caught exception') # has exc_info=True
  74 +
  75 + if wrong_log_args:
  76 + logger.info('Opening file /dangerous/file/with-%s-in-name')
  77 + logger.info('The result is %f')
  78 + logger.info('No result', 1.23)
  79 + logger.info('The result is %f', 'bla')
  80 +
56 81 log_helper.end_logging()
57 82  
58 83  
... ...
tests/common/log_helper/test_log_helper.py
... ... @@ -11,13 +11,13 @@ import json
11 11 import subprocess
12 12 from tests.common.log_helper import log_helper_test_main
13 13 from tests.common.log_helper import log_helper_test_imported
  14 +import os
14 15 from os.path import dirname, join, relpath, abspath
15 16  
16 17 from tests.test_utils import PROJECT_ROOT
17 18  
18 19 # test file we use as "main" module
19   -TEST_FILE = relpath(join(dirname(abspath(__file__)), 'log_helper_test_main.py'),
20   - PROJECT_ROOT)
  20 +TEST_FILE = join(dirname(abspath(__file__)), 'log_helper_test_main.py')
21 21  
22 22 # test file simulating a third party main module that only imports oletools
23 23 TEST_FILE_3RD_PARTY = relpath(join(dirname(abspath(__file__)),
... ... @@ -26,6 +26,8 @@ TEST_FILE_3RD_PARTY = relpath(join(dirname(abspath(__file__)),
26 26  
27 27 PYTHON_EXECUTABLE = sys.executable
28 28  
  29 +PERCENT_FORMAT_OUTPUT = 'The answer is 47.'
  30 +
29 31  
30 32 class TestLogHelper(unittest.TestCase):
31 33 def test_it_doesnt_log_when_not_enabled(self):
... ... @@ -113,7 +115,7 @@ class TestLogHelper(unittest.TestCase):
113 115 def test_percent_autoformat(self):
114 116 """Test that auto-formatting of log strings with `%` works."""
115 117 output = self._run_test(['enable', '%-autoformat', 'info'])
116   - self.assertIn('The answer is 47.', output)
  118 + self.assertIn(PERCENT_FORMAT_OUTPUT, output)
117 119  
118 120 def test_json_correct_on_exceptions(self):
119 121 """
... ... @@ -141,6 +143,93 @@ class TestLogHelper(unittest.TestCase):
141 143 self.assertIn('INFO:test_main:main: info log', output)
142 144 self.assertIn('INFO:test_imported:imported: info log', output)
143 145  
  146 + def test_json_correct_on_warnings(self):
  147 + """
  148 + Test that even on warnings our JSON is always correct
  149 + """
  150 + output = self._run_test(['enable', 'as-json', 'warn', 'warning'])
  151 + expected_messages = [
  152 + log_helper_test_main.WARNING_MESSAGE,
  153 + log_helper_test_main.ERROR_MESSAGE,
  154 + log_helper_test_main.CRITICAL_MESSAGE,
  155 + log_helper_test_imported.WARNING_MESSAGE,
  156 + log_helper_test_imported.ERROR_MESSAGE,
  157 + log_helper_test_imported.CRITICAL_MESSAGE,
  158 + ]
  159 +
  160 + for msg in expected_messages:
  161 + self.assertIn(msg, output)
  162 +
  163 + # last two entries of output should be warnings
  164 + jout = json.loads(output)
  165 + self.assertEqual(jout[-2]['level'], 'WARNING')
  166 + self.assertEqual(jout[-1]['level'], 'WARNING')
  167 + self.assertEqual(jout[-2]['type'], 'warning')
  168 + self.assertEqual(jout[-1]['type'], 'warning')
  169 + self.assertIn(log_helper_test_main.ACTUAL_WARNING, jout[-2]['msg'])
  170 + self.assertIn(log_helper_test_imported.ACTUAL_WARNING, jout[-1]['msg'])
  171 +
  172 + def test_warnings(self):
  173 + """Check that warnings are captured and printed correctly"""
  174 + output = self._run_test(['enable', 'warn', 'warning'])
  175 +
  176 + # find out which line contains the call to warnings.warn:
  177 + warnings_line = None
  178 + with open(TEST_FILE, 'rt') as reader:
  179 + for line_idx, line in enumerate(reader):
  180 + if 'warnings.warn' in line:
  181 + warnings_line = line_idx + 1
  182 + break
  183 + self.assertNotEqual(warnings_line, None)
  184 +
  185 + imported_file = join(dirname(abspath(__file__)),
  186 + 'log_helper_test_imported.py')
  187 + imported_line = None
  188 + with open(imported_file, 'rt') as reader:
  189 + for line_idx, line in enumerate(reader):
  190 + if 'warnings.warn' in line:
  191 + imported_line = line_idx + 1
  192 + break
  193 + self.assertNotEqual(imported_line, None)
  194 +
  195 + expect = '\n'.join([
  196 + 'WARNING ' + log_helper_test_main.WARNING_MESSAGE,
  197 + 'ERROR ' + log_helper_test_main.ERROR_MESSAGE,
  198 + 'CRITICAL ' + log_helper_test_main.CRITICAL_MESSAGE,
  199 + 'WARNING ' + log_helper_test_imported.WARNING_MESSAGE,
  200 + 'ERROR ' + log_helper_test_imported.ERROR_MESSAGE,
  201 + 'CRITICAL ' + log_helper_test_imported.CRITICAL_MESSAGE,
  202 + 'WARNING {0}:{1}: UserWarning: {2}'
  203 + .format(TEST_FILE, warnings_line, log_helper_test_main.ACTUAL_WARNING),
  204 + ' warnings.warn(ACTUAL_WARNING)', # warnings include source line
  205 + '',
  206 + 'WARNING {0}:{1}: UserWarning: {2}'
  207 + .format(imported_file, imported_line, log_helper_test_imported.ACTUAL_WARNING),
  208 + ' warnings.warn(ACTUAL_WARNING)', # warnings include source line
  209 + ])
  210 + self.assertEqual(output.strip(), expect)
  211 +
  212 + def test_json_percent_formatting(self):
  213 + """Test that json-output has formatting args included in output."""
  214 + output = self._run_test(['enable', 'as-json', '%-autoformat', 'info'])
  215 + json.loads(output) # check that this does not raise, so json is valid
  216 + self.assertIn(PERCENT_FORMAT_OUTPUT, output)
  217 +
  218 + def test_json_exception_formatting(self):
  219 + """Test that json-output has formatted exception info in output"""
  220 + output = self._run_test(['enable', 'as-json', 'exc-info', 'info'])
  221 + json.loads(output) # check that this does not raise, so json is valid
  222 + self.assertIn('Caught exception', output) # actual log message
  223 + self.assertIn('This is an exception', output) # message of caught exception
  224 + self.assertIn('Traceback (most recent call last)', output) # start of trace
  225 + self.assertIn(TEST_FILE.replace('\\', '\\\\'), output) # part of trace
  226 +
  227 + def test_json_wrong_args(self):
  228 + """Test that too many or missing args do not raise exceptions inside logger"""
  229 + output = self._run_test(['enable', 'as-json', 'wrong-log-args', 'info'])
  230 + json.loads(output) # check that this does not raise, so json is valid
  231 + # do not care about actual contents of output
  232 +
144 233 def _assert_json_messages(self, output, messages):
145 234 try:
146 235 json_data = json.loads(output)
... ... @@ -160,8 +249,10 @@ class TestLogHelper(unittest.TestCase):
160 249 we might get errors or false positives between sequential tests runs)
161 250  
162 251 When arg `run_third_party` is `True`, we do not run the `TEST_FILE` as
163   - main moduel but the `TEST_FILE_3RD_PARTY` and return contents of
  252 + main module but the `TEST_FILE_3RD_PARTY` and return contents of
164 253 `stderr` instead of `stdout`.
  254 +
  255 + TODO: use tests.utils.call_and_capture
165 256 """
166 257 all_args = [PYTHON_EXECUTABLE, ]
167 258 if run_third_party:
... ... @@ -169,10 +260,12 @@ class TestLogHelper(unittest.TestCase):
169 260 else:
170 261 all_args.append(TEST_FILE)
171 262 all_args.extend(args)
  263 + env = os.environ.copy()
  264 + env['PYTHONPATH'] = PROJECT_ROOT
172 265 child = subprocess.Popen(
173 266 all_args,
174 267 shell=False,
175   - env={'PYTHONPATH': PROJECT_ROOT},
  268 + env=env,
176 269 universal_newlines=True,
177 270 cwd=PROJECT_ROOT,
178 271 stdin=None,
... ...
tests/common/test_encoding_handler.py
... ... @@ -14,7 +14,13 @@ FILE_TEXT = u&#39;The unicode check mark is \u2713.\n&#39;
14 14  
15 15 @contextmanager
16 16 def temp_file(just_name=True):
17   - """Context manager that creates temp file and deletes it in the end"""
  17 + """
  18 + Context manager that creates temp file and deletes it in the end.
  19 +
  20 + If `just_name` is `False` this yields (file-name, open-file-handle),
  21 + if `just_name` is `True` this yields just the file-name (and closes
  22 + the file-handle if we are on windows)
  23 + """
18 24 tmp_descriptor = None
19 25 tmp_name = None
20 26 tmp_handle = None
... ... @@ -24,8 +30,12 @@ def temp_file(just_name=True):
24 30 # we create our own file handle since we want to be able to close the
25 31 # file and open it again for reading.
26 32 # We keep the os-level descriptor open so file name is still reserved
27   - # for us
  33 + # for us ... except for Windows where it is not possible for another
  34 + # process to write to that handle
28 35 if just_name:
  36 + if sys.platform.startswith('win'):
  37 + os.close(tmp_descriptor)
  38 + tmp_descriptor = None
29 39 yield tmp_name
30 40 else:
31 41 tmp_handle = open(tmp_name, 'wb')
... ... @@ -51,11 +61,7 @@ class TestEncodingHandler(unittest.TestCase):
51 61 shell=True)
52 62  
53 63 def test_print_redirect(self):
54   - """
55   - Test redirection of unicode output to files does not raise error
56   -
57   - TODO: test this on non-linux OSs
58   - """
  64 + """Test redirection of unicode output to files does not raise error."""
59 65 with temp_file() as tmp_file:
60 66 check_call('{python} {this_file} print > {tmp_file}'
61 67 .format(python=sys.executable, this_file=__file__,
... ... @@ -63,7 +69,7 @@ class TestEncodingHandler(unittest.TestCase):
63 69 shell=True)
64 70  
65 71 @unittest.skipIf(not sys.platform.startswith('linux'),
66   - 'Only tested on linux sofar')
  72 + 'Need to adapt this test to Windows')
67 73 def test_print_no_lang(self):
68 74 """
69 75 Test redirection of unicode output to files does not raise error
... ... @@ -89,11 +95,7 @@ class TestEncodingHandler(unittest.TestCase):
89 95 self.fail(cpe.output)
90 96  
91 97 def test_uopen_redirect(self):
92   - """
93   - Test redirection of unicode output to files does not raise error
94   -
95   - TODO: test this on non-linux OSs
96   - """
  98 + """Test redirection of unicode output to files does not raise error."""
97 99 with temp_file(False) as (tmp_handle, tmp_file):
98 100 tmp_handle.write(FILE_TEXT.encode('utf8'))
99 101 tmp_handle.close()
... ... @@ -109,7 +111,7 @@ class TestEncodingHandler(unittest.TestCase):
109 111 self.fail(cpe.output)
110 112  
111 113 @unittest.skipIf(not sys.platform.startswith('linux'),
112   - 'Only tested on linux sofar')
  114 + 'Need to adapt this test to Windows')
113 115 def test_uopen_no_lang(self):
114 116 """
115 117 Test that uopen in a C-LANG environment is ok
... ...
tests/ftguess/test_basic.py
1 1 """Test ftguess"""
2   -
3 2 import unittest
4 3 import os
5   -from os.path import splitext
  4 +from os.path import splitext, join
6 5 from oletools import ftguess
7 6  
8 7 # Directory with test data, independent of current working directory
... ... @@ -47,7 +46,7 @@ class TestFTGuess(unittest.TestCase):
47 46 before_dot, extension = splitext(filename)
48 47 if extension == '.zip':
49 48 extension = splitext(before_dot)[1]
50   - elif filename in ('basic/empty', 'basic/text'):
  49 + elif filename in (join('basic', 'empty'), join('basic', 'text')):
51 50 extension = '.csv' # have just like that
52 51 elif not extension:
53 52 self.fail('Could not find extension for test sample {0}'
... ... @@ -55,7 +54,7 @@ class TestFTGuess(unittest.TestCase):
55 54 extension = extension[1:] # remove the leading '.'
56 55  
57 56 # encrypted files are mostly not recognized (yet?), except .xls
58   - if filename.startswith('encrypted/'):
  57 + if filename.startswith('encrypted' + os.sep):
59 58 if extension == 'xls':
60 59 expect = ftguess.FType_Excel97
61 60 else:
... ... @@ -69,7 +68,7 @@ class TestFTGuess(unittest.TestCase):
69 68 # not implemented yet
70 69 expect = ftguess.FType_Unknown
71 70  
72   - elif filename == 'basic/encrypted.docx':
  71 + elif filename == join('basic', 'encrypted.docx'):
73 72 expect = ftguess.FType_Generic_OLE
74 73  
75 74 elif 'excel5' in filename:
... ...
tests/msodde/test_basic.py
... ... @@ -9,6 +9,7 @@ Ensure that
9 9 from __future__ import print_function
10 10  
11 11 import unittest
  12 +from platform import python_implementation
12 13 import sys
13 14 import os
14 15 from os.path import join, basename
... ... @@ -19,8 +20,21 @@ from tests.test_utils import call_and_capture, decrypt_sample,\
19 20 DATA_BASE_DIR as BASE_DIR
20 21  
21 22  
  23 +# Check whether we run with PyPy on windows because that causes trouble
  24 +# when using the :py:func:`tests.test_utils.decrypt_sample`.
  25 +#
  26 +# :return: `(do_skip, explanation)` where `do_skip` is `True` iff running
  27 +# PyPy on Windows; `explanation` is a simple text string
  28 +SKIP_PYPY_WIN = (
  29 + python_implementation().lower().startswith('pypy')
  30 + and sys.platform.lower().startswith('win'),
  31 + "On PyPy there is a problem with deleting temp files for decrypt_sample"
  32 +)
  33 +
  34 +
22 35 class TestReturnCode(unittest.TestCase):
23 36 """ check return codes and exception behaviour (not text output) """
  37 + @unittest.skipIf(*SKIP_PYPY_WIN)
24 38 def test_valid_doc(self):
25 39 """ check that a valid doc file leads to 0 exit status """
26 40 for filename in (
... ... @@ -44,6 +58,7 @@ class TestReturnCode(unittest.TestCase):
44 58 self.do_test_validity(join(BASE_DIR, 'msodde',
45 59 filename + '.docm'))
46 60  
  61 + @unittest.skipIf(*SKIP_PYPY_WIN)
47 62 def test_valid_xml(self):
48 63 """ check that xml leads to 0 exit status """
49 64 for filename in (
... ... @@ -67,11 +82,11 @@ class TestReturnCode(unittest.TestCase):
67 82  
68 83 def test_invalid_empty(self):
69 84 """ check that empty file argument leads to non-zero exit status """
70   - self.do_test_validity(join(BASE_DIR, 'basic/empty'), Exception)
  85 + self.do_test_validity(join(BASE_DIR, 'basic', 'empty'), Exception)
71 86  
72 87 def test_invalid_text(self):
73 88 """ check that text file argument leads to non-zero exit status """
74   - self.do_test_validity(join(BASE_DIR, 'basic/text'), Exception)
  89 + self.do_test_validity(join(BASE_DIR, 'basic', 'text'), Exception)
75 90  
76 91 def test_encrypted(self):
77 92 """
... ... @@ -140,6 +155,7 @@ class TestDdeLinks(unittest.TestCase):
140 155 """
141 156 return [o for o in output.splitlines()]
142 157  
  158 + @unittest.skipIf(*SKIP_PYPY_WIN)
143 159 def test_with_dde(self):
144 160 """ check that dde links appear on stdout """
145 161 filename = 'dde-test-from-office2003.doc.zip'
... ... @@ -158,6 +174,7 @@ class TestDdeLinks(unittest.TestCase):
158 174 self.assertEqual(len(self.get_dde_from_output(output)), 0,
159 175 msg='Found dde links in output of ' + filename)
160 176  
  177 + @unittest.skipIf(*SKIP_PYPY_WIN)
161 178 def test_with_dde_utf16le(self):
162 179 """ check that dde links appear on stdout """
163 180 filename = 'dde-test-from-office2013-utf_16le-korean.doc.zip'
... ... @@ -179,6 +196,7 @@ class TestDdeLinks(unittest.TestCase):
179 196 msg='unexpected output for dde-test.{0}: {1}'
180 197 .format(extn, output))
181 198  
  199 + @unittest.skipIf(*SKIP_PYPY_WIN)
182 200 def test_xml(self):
183 201 """ check that dde in xml from word / excel is found """
184 202 for filename in ('dde-in-excel2003.xml',
... ...
tests/oleid/test_basic.py
... ... @@ -90,14 +90,14 @@ class TestOleIDBasic(unittest.TestCase):
90 90 for filename, value_dict in self.oleids:
91 91 # print('Debugging: testing file {0}'.format(filename))
92 92 self.assertEqual(value_dict['ext_rels'],
93   - '/external_link/' in filename)
  93 + os.sep + 'external_link' + os.sep in filename)
94 94  
95 95 def test_objectpool(self):
96 96 """Test indicator for ObjectPool stream in ole files."""
97 97 for filename, value_dict in self.oleids:
98 98 # print('Debugging: testing file {0}'.format(filename))
99   - if (filename.startswith('oleobj/sample_with_')
100   - or filename.startswith('oleobj/embedded')) \
  99 + if (filename.startswith(join('oleobj', 'sample_with_'))
  100 + or filename.startswith(join('oleobj', 'embedded'))) \
101 101 and (filename.endswith('.doc')
102 102 or filename.endswith('.dot')):
103 103 self.assertTrue(value_dict['ObjectPool'])
... ... @@ -106,6 +106,15 @@ class TestOleIDBasic(unittest.TestCase):
106 106  
107 107 def test_macros(self):
108 108 """Test indicator for macros."""
  109 + find_vba = (
  110 + join('ooxml', 'dde-in-excel2003.xml'), # not really
  111 + join('encrypted', 'autostart-encrypt-standardpassword.xls'),
  112 + join('msodde', 'dde-in-csv.csv'), # "Windows" "calc.exe"
  113 + join('msodde', 'dde-in-excel2003.xml'), # same as above
  114 + join('oleform', 'oleform-PR314.docm'),
  115 + join('basic', 'empty'), # WTF?
  116 + join('basic', 'text'),
  117 + )
109 118 for filename, value_dict in self.oleids:
110 119 # TODO: we need a sample file with xlm macros
111 120 before_dot, suffix = splitext(filename)
... ... @@ -119,18 +128,10 @@ class TestOleIDBasic(unittest.TestCase):
119 128 self.assertIn(value_dict['xlm'], ('Unknown', 'No'))
120 129  
121 130 # "macro detection" in text files leads to interesting results:
122   - if filename in ('ooxml/dde-in-excel2003.xml', # not really
123   - 'encrypted/autostart-encrypt-standardpassword.xls',
124   - 'msodde/dde-in-csv.csv', # "Windows" "calc.exe"
125   - 'msodde/dde-in-excel2003.xml', # same as above
126   - 'oleform/oleform-PR314.docm',
127   - 'basic/empty', # WTF?
128   - 'basic/text', # no macros!
129   - 'olevba/sample_with_vba.ppt',
130   - ):
  131 + if filename in find_vba: # no macros!
131 132 self.assertEqual(value_dict['vba'], 'Yes')
132 133 else:
133   - self.assertEqual(value_dict['vba'], 'No')
  134 + self.assertIn(value_dict['vba'], ('No', 'Error'))
134 135  
135 136 def test_flash(self):
136 137 """Test indicator for flash."""
... ...
tests/oleid/test_issue_166.py
... ... @@ -2,17 +2,17 @@
2 2 Test if oleid detects encrypted documents
3 3 """
4 4  
5   -import unittest, sys, os
6   -
7   -from tests.test_utils import DATA_BASE_DIR
  5 +import unittest
8 6 from os.path import join
  7 +from tests.test_utils import DATA_BASE_DIR
9 8  
10 9 from oletools import oleid
11 10  
  11 +
12 12 class TestEncryptedDocumentDetection(unittest.TestCase):
13 13 def test_encrypted_document_detection(self):
14 14 """ Run oleid and check if the document is flagged as encrypted """
15   - filename = join(DATA_BASE_DIR, 'basic/encrypted.docx')
  15 + filename = join(DATA_BASE_DIR, 'basic', 'encrypted.docx')
16 16  
17 17 oleid_instance = oleid.OleID(filename)
18 18 indicators = oleid_instance.check()
... ... @@ -21,6 +21,7 @@ class TestEncryptedDocumentDetection(unittest.TestCase):
21 21  
22 22 self.assertEqual(is_encrypted, True)
23 23  
  24 +
24 25 # just in case somebody calls this file as a script
25 26 if __name__ == '__main__':
26 27 unittest.main()
27 28 \ No newline at end of file
... ...
tests/olevba/test_basic.py
... ... @@ -120,10 +120,14 @@ class TestOlevbaBasic(unittest.TestCase):
120 120 args=[full_name, ] + ADD_ARGS,
121 121 accept_nonzero_exit=True)
122 122 output = json.loads(out_str)
123   - self.assertEqual(len(output), 2)
  123 + self.assertGreaterEqual(len(output), 2)
124 124 self.assertEqual(output[0]['type'], 'MetaInformation')
125 125 self.assertEqual(output[0]['script_name'], 'olevba')
126   - result = output[1]
  126 + for entry in output[1:]:
  127 + if entry['type'] in ('msg', 'warning'):
  128 + continue # ignore messages
  129 + result = entry
  130 + break
127 131 self.assertTrue(result['json_conversion_successful'])
128 132 if suffix in ('.xlsb', '.xltm', '.xlsm'):
129 133 # TODO: cannot extract xlm macros for these types yet
... ...
tests/olevba/test_crypto.py
... ... @@ -40,7 +40,7 @@ class OlevbaCryptoWriteProtectTest(unittest.TestCase):
40 40 exclude_stderr=True)
41 41 data = json.loads(output, object_pairs_hook=OrderedDict)
42 42 # debug: json.dump(data, sys.stdout, indent=4)
43   - self.assertIn(len(data), (3, 4))
  43 + self.assertGreaterEqual(len(data), 3)
44 44  
45 45 # first 2 parts: general info about script and file
46 46 self.assertIn('script_name', data[0])
... ... @@ -53,22 +53,23 @@ class OlevbaCryptoWriteProtectTest(unittest.TestCase):
53 53 self.assertEqual(data[1]['type'], 'OLE')
54 54 self.assertTrue(data[1]['json_conversion_successful'])
55 55  
56   - # possible VBA stomping warning
57   - if len(data) == 4:
58   - self.assertEqual(data[2]['type'], 'msg')
59   - self.assertIn('VBA stomping', data[2]['msg'])
  56 + for entry in data[2:]:
  57 + if entry['type'] in ('msg', 'warning'):
  58 + continue
  59 + result = entry
  60 + break
60 61  
61 62 # last part is the actual result
62   - self.assertEqual(data[-1]['container'], example_file)
63   - self.assertNotEqual(data[-1]['file'], example_file)
64   - self.assertEqual(data[-1]['type'], "OpenXML")
65   - analysis = data[-1]['analysis']
  63 + self.assertEqual(result['container'], example_file)
  64 + self.assertNotEqual(result['file'], example_file)
  65 + self.assertEqual(result['type'], "OpenXML")
  66 + analysis = result['analysis']
66 67 self.assertEqual(analysis[0]['type'], 'AutoExec')
67 68 self.assertEqual(analysis[0]['keyword'], 'Auto_Open')
68   - macros = data[-1]['macros']
  69 + macros = result['macros']
69 70 self.assertEqual(macros[0]['vba_filename'], 'Modul1.bas')
70 71 self.assertIn('Sub Auto_Open()', macros[0]['code'])
71   - self.assertTrue(data[-1]['json_conversion_successful'])
  72 + self.assertTrue(result['json_conversion_successful'])
72 73  
73 74  
74 75 if __name__ == '__main__':
... ...
tests/ooxml/test_zip_sub_file.py
... ... @@ -111,8 +111,8 @@ class TestZipSubFile(unittest.TestCase):
111 111 self.assertEqual(self.subfile.tell(), self.compare.tell())
112 112  
113 113 # seek backward (only implemented case: back to start)
114   - self.subfile.seek(-self.subfile.tell(), os.SEEK_CUR)
115   - self.compare.seek(-self.compare.tell(), os.SEEK_CUR)
  114 + self.subfile.seek(-1 * self.subfile.tell(), os.SEEK_CUR)
  115 + self.compare.seek(-1 * self.compare.tell(), os.SEEK_CUR)
116 116 self.assertEqual(self.subfile.read(1), self.compare.read(1))
117 117 self.assertEqual(self.subfile.tell(), self.compare.tell())
118 118  
... ...
tests/rtfobj/test_issue_185.py
1   -import unittest, sys, os
2   -
  1 +import unittest
  2 +from os.path import join
3 3 from tests.test_utils import testdata_reader
4 4 from oletools import rtfobj
5 5  
  6 +
6 7 class TestRtfObjIssue185(unittest.TestCase):
7 8 def test_skip_space_after_bin_control_word(self):
8   - data = testdata_reader.read_encrypted('rtfobj/issue_185.rtf.zip')
  9 + data = testdata_reader.read_encrypted(join('rtfobj', 'issue_185.rtf.zip'))
9 10 rtfp = rtfobj.RtfObjParser(data)
10 11 rtfp.parse()
11 12 objects = rtfp.objects
12 13  
13 14 self.assertTrue(len(objects) == 1)
14 15  
  16 +
15 17 if __name__ == '__main__':
16 18 unittest.main()
... ...
tests/rtfobj/test_issue_251.py
1   -import unittest, sys, os
2   -
  1 +import unittest
  2 +from os.path import join
3 3 from tests.test_utils import testdata_reader
4 4 from oletools import rtfobj
5 5  
  6 +
6 7 class TestRtfObjIssue251(unittest.TestCase):
7 8 def test_bin_no_param(self):
8   - data = testdata_reader.read('rtfobj/issue_251.rtf')
  9 + data = testdata_reader.read(join('rtfobj', 'issue_251.rtf'))
9 10 rtfp = rtfobj.RtfObjParser(data)
10 11 rtfp.parse()
11 12 objects = rtfp.objects
12 13  
13 14 self.assertTrue(len(objects) == 1)
14 15  
  16 +
15 17 if __name__ == '__main__':
16 18 unittest.main()
... ...
tests/test_utils/testdata_reader.py
... ... @@ -100,6 +100,9 @@ def decrypt_sample(relpath):
100 100  
101 101 Code based on test_encoding_handler.temp_file().
102 102  
  103 + Note: this causes problems if running with PyPy on Windows. The `unlink`
  104 + fails because the file is "still being used by another process".
  105 +
103 106 :param relpath: path inside `DATA_BASE_DIR`, should end in '.zip'
104 107 :return: absolute path name to decrypted sample.
105 108 """
... ...