Commit 48c753b4a708b978b13f8efe0bb3567489b77544

Authored by decalage2
1 parent 31479934

oledir: fixed absolute imports, added optparse, added support for zip files and wildcards

Showing 1 changed file with 116 additions and 68 deletions
oletools/oledir.py
... ... @@ -2,7 +2,7 @@
2 2 """
3 3 oledir.py
4 4  
5   -oledir parses OLE files to display technical information about its directory
  5 +oledir parses OLE files to display technical information about their directory
6 6 entries, including deleted/orphan streams/storages and unused entries.
7 7  
8 8 Author: Philippe Lagadec - http://www.decalage.info
... ... @@ -14,7 +14,7 @@ http://www.decalage.info/python/oletools
14 14  
15 15 #=== LICENSE ==================================================================
16 16  
17   -# oledir is copyright (c) 2015-2016 Philippe Lagadec (http://www.decalage.info)
  17 +# oledir is copyright (c) 2015-2017 Philippe Lagadec (http://www.decalage.info)
18 18 # All rights reserved.
19 19 #
20 20 # Redistribution and use in source and binary forms, with or without modification,
... ... @@ -37,6 +37,7 @@ http://www.decalage.info/python/oletools
37 37 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 38 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 39  
  40 +from __future__ import print_function
40 41  
41 42 #------------------------------------------------------------------------------
42 43 # CHANGELOG:
... ... @@ -45,8 +46,10 @@ http://www.decalage.info/python/oletools
45 46 # 2016-01-13 v0.03 PL: - replaced prettytable by tablestream, added colors
46 47 # 2016-07-20 v0.50 SL: - added Python 3 support
47 48 # 2016-08-09 PL: - fixed issue #77 (imports from thirdparty dir)
  49 +# 2017-03-08 v0.51 PL: - fixed absolute imports, added optparse
  50 +# - added support for zip files and wildcards
48 51  
49   -__version__ = '0.50'
  52 +__version__ = '0.51dev2'
50 53  
51 54 #------------------------------------------------------------------------------
52 55 # TODO:
... ... @@ -55,12 +58,22 @@ __version__ = '0.50'
55 58  
56 59 # === IMPORTS ================================================================
57 60  
58   -import sys, os
  61 +import sys, os, optparse
59 62  
60   -# add the thirdparty subfolder to sys.path (absolute+normalized path):
  63 +# IMPORTANT: it should be possible to run oletools directly as scripts
  64 +# in any directory without installing them with pip or setup.py.
  65 +# In that case, relative imports are NOT usable.
  66 +# And to enable Python 2+3 compatibility, we need to use absolute imports,
  67 +# so we add the oletools parent folder to sys.path (absolute+normalized path):
61 68 _thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
62 69 # print('_thismodule_dir = %r' % _thismodule_dir)
63   -# assumption: the thirdparty dir is a subfolder:
  70 +_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
  71 +# print('_parent_dir = %r' % _parent_dir)
  72 +if not _parent_dir in sys.path:
  73 + sys.path.insert(0, _parent_dir)
  74 +
  75 +# we also need the thirdparty dir for colorclass
  76 +# TODO: remove colorclass from thirdparty, make it a dependency
64 77 _thirdparty_dir = os.path.normpath(os.path.join(_thismodule_dir, 'thirdparty'))
65 78 # print('_thirdparty_dir = %r' % _thirdparty_dir)
66 79 if not _thirdparty_dir in sys.path:
... ... @@ -72,12 +85,15 @@ import colorclass
72 85 if os.name == 'nt':
73 86 colorclass.Windows.enable(auto_colors=True)
74 87  
75   -import olefile
76   -from tablestream import tablestream
  88 +from oletools.thirdparty import olefile
  89 +from oletools.thirdparty.tablestream import tablestream
  90 +from oletools.thirdparty.xglob import xglob
77 91  
78 92  
79 93 # === CONSTANTS ==============================================================
80 94  
  95 +BANNER = 'oledir %s - http://decalage.info/python/oletools' % __version__
  96 +
81 97 STORAGE_NAMES = {
82 98 olefile.STGTY_EMPTY: 'Empty',
83 99 olefile.STGTY_STORAGE: 'Storage',
... ... @@ -115,72 +131,104 @@ def sid_display(sid):
115 131 # === MAIN ===================================================================
116 132  
117 133 def main():
  134 + usage = 'usage: oledir [options] <filename> [filename2 ...]'
  135 + parser = optparse.OptionParser(usage=usage)
  136 + parser.add_option("-r", action="store_true", dest="recursive",
  137 + help='find files recursively in subdirectories.')
  138 + parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
  139 + help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)')
  140 + parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
  141 + help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
  142 + # parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
  143 + # help="logging level debug/info/warning/error/critical (default=%default)")
  144 +
  145 + # TODO: add logfile option
  146 +
  147 + (options, args) = parser.parse_args()
  148 +
  149 + # Print help if no arguments are passed
  150 + if len(args) == 0:
  151 + print(BANNER)
  152 + print(__doc__)
  153 + parser.print_help()
  154 + sys.exit()
  155 +
118 156 # print banner with version
119   - print('oledir %s - http://decalage.info/python/oletools' % __version__)
  157 + print(BANNER)
120 158  
121 159 if os.name == 'nt':
122 160 colorclass.Windows.enable(auto_colors=True, reset_atexit=True)
123 161  
124   - fname = sys.argv[1]
125   - print('OLE directory entries in file %s:' % fname)
126   - ole = olefile.OleFileIO(fname)
127   - # ole.dumpdirectory()
128   -
129   - # t = prettytable.PrettyTable(('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size'))
130   - # t.align = 'l'
131   - # t.max_width['id'] = 4
132   - # t.max_width['Status'] = 6
133   - # t.max_width['Type'] = 10
134   - # t.max_width['Name'] = 10
135   - # t.max_width['Left'] = 5
136   - # t.max_width['Right'] = 5
137   - # t.max_width['Child'] = 5
138   - # t.max_width['1st Sect'] = 8
139   - # t.max_width['Size'] = 6
140   -
141   - table = tablestream.TableStream(column_width=[4, 6, 7, 22, 5, 5, 5, 8, 6],
142   - header_row=('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size'),
143   - style=tablestream.TableStyleSlim)
144   -
145   - # TODO: read ALL the actual directory entries from the directory stream, because olefile does not!
146   - # TODO: OR fix olefile!
147   - # TODO: olefile should store or give access to the raw direntry data on demand
148   - # TODO: oledir option to hexdump the raw direntries
149   - # TODO: olefile should be less picky about incorrect directory structures
150   -
151   - for id in range(len(ole.direntries)):
152   - d = ole.direntries[id]
153   - if d is None:
154   - # this direntry is not part of the tree: either unused or an orphan
155   - d = ole._load_direntry(id) #ole.direntries[id]
156   - # print('%03d: %s *** ORPHAN ***' % (id, d.name))
157   - if d.entry_type == olefile.STGTY_EMPTY:
158   - status = 'unused'
159   - else:
160   - status = 'ORPHAN'
  162 + for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
  163 + zip_password=options.zip_password, zip_fname=options.zip_fname):
  164 + # ignore directory names stored in zip files:
  165 + if container and filename.endswith('/'):
  166 + continue
  167 + full_name = '%s in %s' % (filename, container) if container else filename
  168 + print('OLE directory entries in file %s:' % full_name)
  169 + if data is not None:
  170 + # data extracted from zip file
  171 + ole = olefile.OleFileIO(data)
161 172 else:
162   - # print('%03d: %s' % (id, d.name))
163   - status = '<Used>'
164   - if d.name.startswith('\x00'):
165   - # this may happen with unused entries, the name may be filled with zeroes
166   - name = ''
167   - else:
168   - # handle non-printable chars using repr(), remove quotes:
169   - name = repr(d.name)[1:-1]
170   - left = sid_display(d.sid_left)
171   - right = sid_display(d.sid_right)
172   - child = sid_display(d.sid_child)
173   - entry_type = STORAGE_NAMES.get(d.entry_type, 'Unknown')
174   - etype_color = STORAGE_COLORS.get(d.entry_type, 'red')
175   - status_color = STATUS_COLORS.get(status, 'red')
176   -
177   - # print(' type=%7s sid_left=%s sid_right=%s sid_child=%s'
178   - # %(entry_type, left, right, child))
179   - # t.add_row((id, status, entry_type, name, left, right, child, hex(d.isectStart), d.size))
180   - table.write_row((id, status, entry_type, name, left, right, child, '%X' % d.isectStart, d.size),
181   - colors=(None, status_color, etype_color, None, None, None, None, None, None))
182   - ole.close()
183   - # print t
  173 + # normal filename
  174 + ole = olefile.OleFileIO(filename)
  175 + # ole.dumpdirectory()
  176 +
  177 + # t = prettytable.PrettyTable(('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size'))
  178 + # t.align = 'l'
  179 + # t.max_width['id'] = 4
  180 + # t.max_width['Status'] = 6
  181 + # t.max_width['Type'] = 10
  182 + # t.max_width['Name'] = 10
  183 + # t.max_width['Left'] = 5
  184 + # t.max_width['Right'] = 5
  185 + # t.max_width['Child'] = 5
  186 + # t.max_width['1st Sect'] = 8
  187 + # t.max_width['Size'] = 6
  188 +
  189 + table = tablestream.TableStream(column_width=[4, 6, 7, 22, 5, 5, 5, 8, 6],
  190 + header_row=('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size'),
  191 + style=tablestream.TableStyleSlim)
  192 +
  193 + # TODO: read ALL the actual directory entries from the directory stream, because olefile does not!
  194 + # TODO: OR fix olefile!
  195 + # TODO: olefile should store or give access to the raw direntry data on demand
  196 + # TODO: oledir option to hexdump the raw direntries
  197 + # TODO: olefile should be less picky about incorrect directory structures
  198 +
  199 + for id in range(len(ole.direntries)):
  200 + d = ole.direntries[id]
  201 + if d is None:
  202 + # this direntry is not part of the tree: either unused or an orphan
  203 + d = ole._load_direntry(id) #ole.direntries[id]
  204 + # print('%03d: %s *** ORPHAN ***' % (id, d.name))
  205 + if d.entry_type == olefile.STGTY_EMPTY:
  206 + status = 'unused'
  207 + else:
  208 + status = 'ORPHAN'
  209 + else:
  210 + # print('%03d: %s' % (id, d.name))
  211 + status = '<Used>'
  212 + if d.name.startswith('\x00'):
  213 + # this may happen with unused entries, the name may be filled with zeroes
  214 + name = ''
  215 + else:
  216 + # handle non-printable chars using repr(), remove quotes:
  217 + name = repr(d.name)[1:-1]
  218 + left = sid_display(d.sid_left)
  219 + right = sid_display(d.sid_right)
  220 + child = sid_display(d.sid_child)
  221 + entry_type = STORAGE_NAMES.get(d.entry_type, 'Unknown')
  222 + etype_color = STORAGE_COLORS.get(d.entry_type, 'red')
  223 + status_color = STATUS_COLORS.get(status, 'red')
  224 +
  225 + # print(' type=%7s sid_left=%s sid_right=%s sid_child=%s'
  226 + # %(entry_type, left, right, child))
  227 + # t.add_row((id, status, entry_type, name, left, right, child, hex(d.isectStart), d.size))
  228 + table.write_row((id, status, entry_type, name, left, right, child, '%X' % d.isectStart, d.size),
  229 + colors=(None, status_color, etype_color, None, None, None, None, None, None))
  230 + ole.close()
  231 + # print t
184 232  
185 233  
186 234 if __name__ == '__main__':
... ...