Commit 48c753b4a708b978b13f8efe0bb3567489b77544

Authored by decalage2
1 parent 31479934

oledir: fixed absolute imports, added optparse, added support for zip files and wildcards

Showing 1 changed file with 116 additions and 68 deletions
oletools/oledir.py
@@ -2,7 +2,7 @@ @@ -2,7 +2,7 @@
2 """ 2 """
3 oledir.py 3 oledir.py
4 4
5 -oledir parses OLE files to display technical information about its directory 5 +oledir parses OLE files to display technical information about their directory
6 entries, including deleted/orphan streams/storages and unused entries. 6 entries, including deleted/orphan streams/storages and unused entries.
7 7
8 Author: Philippe Lagadec - http://www.decalage.info 8 Author: Philippe Lagadec - http://www.decalage.info
@@ -14,7 +14,7 @@ http://www.decalage.info/python/oletools @@ -14,7 +14,7 @@ http://www.decalage.info/python/oletools
14 14
15 #=== LICENSE ================================================================== 15 #=== LICENSE ==================================================================
16 16
17 -# oledir is copyright (c) 2015-2016 Philippe Lagadec (http://www.decalage.info) 17 +# oledir is copyright (c) 2015-2017 Philippe Lagadec (http://www.decalage.info)
18 # All rights reserved. 18 # All rights reserved.
19 # 19 #
20 # Redistribution and use in source and binary forms, with or without modification, 20 # Redistribution and use in source and binary forms, with or without modification,
@@ -37,6 +37,7 @@ http://www.decalage.info/python/oletools @@ -37,6 +37,7 @@ http://www.decalage.info/python/oletools
37 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 37 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 39
  40 +from __future__ import print_function
40 41
41 #------------------------------------------------------------------------------ 42 #------------------------------------------------------------------------------
42 # CHANGELOG: 43 # CHANGELOG:
@@ -45,8 +46,10 @@ http://www.decalage.info/python/oletools @@ -45,8 +46,10 @@ http://www.decalage.info/python/oletools
45 # 2016-01-13 v0.03 PL: - replaced prettytable by tablestream, added colors 46 # 2016-01-13 v0.03 PL: - replaced prettytable by tablestream, added colors
46 # 2016-07-20 v0.50 SL: - added Python 3 support 47 # 2016-07-20 v0.50 SL: - added Python 3 support
47 # 2016-08-09 PL: - fixed issue #77 (imports from thirdparty dir) 48 # 2016-08-09 PL: - fixed issue #77 (imports from thirdparty dir)
  49 +# 2017-03-08 v0.51 PL: - fixed absolute imports, added optparse
  50 +# - added support for zip files and wildcards
48 51
49 -__version__ = '0.50' 52 +__version__ = '0.51dev2'
50 53
51 #------------------------------------------------------------------------------ 54 #------------------------------------------------------------------------------
52 # TODO: 55 # TODO:
@@ -55,12 +58,22 @@ __version__ = '0.50' @@ -55,12 +58,22 @@ __version__ = '0.50'
55 58
56 # === IMPORTS ================================================================ 59 # === IMPORTS ================================================================
57 60
58 -import sys, os 61 +import sys, os, optparse
59 62
60 -# add the thirdparty subfolder to sys.path (absolute+normalized path): 63 +# IMPORTANT: it should be possible to run oletools directly as scripts
  64 +# in any directory without installing them with pip or setup.py.
  65 +# In that case, relative imports are NOT usable.
  66 +# And to enable Python 2+3 compatibility, we need to use absolute imports,
  67 +# so we add the oletools parent folder to sys.path (absolute+normalized path):
61 _thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) 68 _thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
62 # print('_thismodule_dir = %r' % _thismodule_dir) 69 # print('_thismodule_dir = %r' % _thismodule_dir)
63 -# assumption: the thirdparty dir is a subfolder: 70 +_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
  71 +# print('_parent_dir = %r' % _parent_dir)
  72 +if not _parent_dir in sys.path:
  73 + sys.path.insert(0, _parent_dir)
  74 +
  75 +# we also need the thirdparty dir for colorclass
  76 +# TODO: remove colorclass from thirdparty, make it a dependency
64 _thirdparty_dir = os.path.normpath(os.path.join(_thismodule_dir, 'thirdparty')) 77 _thirdparty_dir = os.path.normpath(os.path.join(_thismodule_dir, 'thirdparty'))
65 # print('_thirdparty_dir = %r' % _thirdparty_dir) 78 # print('_thirdparty_dir = %r' % _thirdparty_dir)
66 if not _thirdparty_dir in sys.path: 79 if not _thirdparty_dir in sys.path:
@@ -72,12 +85,15 @@ import colorclass @@ -72,12 +85,15 @@ import colorclass
72 if os.name == 'nt': 85 if os.name == 'nt':
73 colorclass.Windows.enable(auto_colors=True) 86 colorclass.Windows.enable(auto_colors=True)
74 87
75 -import olefile  
76 -from tablestream import tablestream 88 +from oletools.thirdparty import olefile
  89 +from oletools.thirdparty.tablestream import tablestream
  90 +from oletools.thirdparty.xglob import xglob
77 91
78 92
79 # === CONSTANTS ============================================================== 93 # === CONSTANTS ==============================================================
80 94
  95 +BANNER = 'oledir %s - http://decalage.info/python/oletools' % __version__
  96 +
81 STORAGE_NAMES = { 97 STORAGE_NAMES = {
82 olefile.STGTY_EMPTY: 'Empty', 98 olefile.STGTY_EMPTY: 'Empty',
83 olefile.STGTY_STORAGE: 'Storage', 99 olefile.STGTY_STORAGE: 'Storage',
@@ -115,72 +131,104 @@ def sid_display(sid): @@ -115,72 +131,104 @@ def sid_display(sid):
115 # === MAIN =================================================================== 131 # === MAIN ===================================================================
116 132
117 def main(): 133 def main():
  134 + usage = 'usage: oledir [options] <filename> [filename2 ...]'
  135 + parser = optparse.OptionParser(usage=usage)
  136 + parser.add_option("-r", action="store_true", dest="recursive",
  137 + help='find files recursively in subdirectories.')
  138 + parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
  139 + help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)')
  140 + parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
  141 + help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
  142 + # parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
  143 + # help="logging level debug/info/warning/error/critical (default=%default)")
  144 +
  145 + # TODO: add logfile option
  146 +
  147 + (options, args) = parser.parse_args()
  148 +
  149 + # Print help if no arguments are passed
  150 + if len(args) == 0:
  151 + print(BANNER)
  152 + print(__doc__)
  153 + parser.print_help()
  154 + sys.exit()
  155 +
118 # print banner with version 156 # print banner with version
119 - print('oledir %s - http://decalage.info/python/oletools' % __version__) 157 + print(BANNER)
120 158
121 if os.name == 'nt': 159 if os.name == 'nt':
122 colorclass.Windows.enable(auto_colors=True, reset_atexit=True) 160 colorclass.Windows.enable(auto_colors=True, reset_atexit=True)
123 161
124 - fname = sys.argv[1]  
125 - print('OLE directory entries in file %s:' % fname)  
126 - ole = olefile.OleFileIO(fname)  
127 - # ole.dumpdirectory()  
128 -  
129 - # t = prettytable.PrettyTable(('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size'))  
130 - # t.align = 'l'  
131 - # t.max_width['id'] = 4  
132 - # t.max_width['Status'] = 6  
133 - # t.max_width['Type'] = 10  
134 - # t.max_width['Name'] = 10  
135 - # t.max_width['Left'] = 5  
136 - # t.max_width['Right'] = 5  
137 - # t.max_width['Child'] = 5  
138 - # t.max_width['1st Sect'] = 8  
139 - # t.max_width['Size'] = 6  
140 -  
141 - table = tablestream.TableStream(column_width=[4, 6, 7, 22, 5, 5, 5, 8, 6],  
142 - header_row=('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size'),  
143 - style=tablestream.TableStyleSlim)  
144 -  
145 - # TODO: read ALL the actual directory entries from the directory stream, because olefile does not!  
146 - # TODO: OR fix olefile!  
147 - # TODO: olefile should store or give access to the raw direntry data on demand  
148 - # TODO: oledir option to hexdump the raw direntries  
149 - # TODO: olefile should be less picky about incorrect directory structures  
150 -  
151 - for id in range(len(ole.direntries)):  
152 - d = ole.direntries[id]  
153 - if d is None:  
154 - # this direntry is not part of the tree: either unused or an orphan  
155 - d = ole._load_direntry(id) #ole.direntries[id]  
156 - # print('%03d: %s *** ORPHAN ***' % (id, d.name))  
157 - if d.entry_type == olefile.STGTY_EMPTY:  
158 - status = 'unused'  
159 - else:  
160 - status = 'ORPHAN' 162 + for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
  163 + zip_password=options.zip_password, zip_fname=options.zip_fname):
  164 + # ignore directory names stored in zip files:
  165 + if container and filename.endswith('/'):
  166 + continue
  167 + full_name = '%s in %s' % (filename, container) if container else filename
  168 + print('OLE directory entries in file %s:' % full_name)
  169 + if data is not None:
  170 + # data extracted from zip file
  171 + ole = olefile.OleFileIO(data)
161 else: 172 else:
162 - # print('%03d: %s' % (id, d.name))  
163 - status = '<Used>'  
164 - if d.name.startswith('\x00'):  
165 - # this may happen with unused entries, the name may be filled with zeroes  
166 - name = ''  
167 - else:  
168 - # handle non-printable chars using repr(), remove quotes:  
169 - name = repr(d.name)[1:-1]  
170 - left = sid_display(d.sid_left)  
171 - right = sid_display(d.sid_right)  
172 - child = sid_display(d.sid_child)  
173 - entry_type = STORAGE_NAMES.get(d.entry_type, 'Unknown')  
174 - etype_color = STORAGE_COLORS.get(d.entry_type, 'red')  
175 - status_color = STATUS_COLORS.get(status, 'red')  
176 -  
177 - # print(' type=%7s sid_left=%s sid_right=%s sid_child=%s'  
178 - # %(entry_type, left, right, child))  
179 - # t.add_row((id, status, entry_type, name, left, right, child, hex(d.isectStart), d.size))  
180 - table.write_row((id, status, entry_type, name, left, right, child, '%X' % d.isectStart, d.size),  
181 - colors=(None, status_color, etype_color, None, None, None, None, None, None))  
182 - ole.close()  
183 - # print t 173 + # normal filename
  174 + ole = olefile.OleFileIO(filename)
  175 + # ole.dumpdirectory()
  176 +
  177 + # t = prettytable.PrettyTable(('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size'))
  178 + # t.align = 'l'
  179 + # t.max_width['id'] = 4
  180 + # t.max_width['Status'] = 6
  181 + # t.max_width['Type'] = 10
  182 + # t.max_width['Name'] = 10
  183 + # t.max_width['Left'] = 5
  184 + # t.max_width['Right'] = 5
  185 + # t.max_width['Child'] = 5
  186 + # t.max_width['1st Sect'] = 8
  187 + # t.max_width['Size'] = 6
  188 +
  189 + table = tablestream.TableStream(column_width=[4, 6, 7, 22, 5, 5, 5, 8, 6],
  190 + header_row=('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size'),
  191 + style=tablestream.TableStyleSlim)
  192 +
  193 + # TODO: read ALL the actual directory entries from the directory stream, because olefile does not!
  194 + # TODO: OR fix olefile!
  195 + # TODO: olefile should store or give access to the raw direntry data on demand
  196 + # TODO: oledir option to hexdump the raw direntries
  197 + # TODO: olefile should be less picky about incorrect directory structures
  198 +
  199 + for id in range(len(ole.direntries)):
  200 + d = ole.direntries[id]
  201 + if d is None:
  202 + # this direntry is not part of the tree: either unused or an orphan
  203 + d = ole._load_direntry(id) #ole.direntries[id]
  204 + # print('%03d: %s *** ORPHAN ***' % (id, d.name))
  205 + if d.entry_type == olefile.STGTY_EMPTY:
  206 + status = 'unused'
  207 + else:
  208 + status = 'ORPHAN'
  209 + else:
  210 + # print('%03d: %s' % (id, d.name))
  211 + status = '<Used>'
  212 + if d.name.startswith('\x00'):
  213 + # this may happen with unused entries, the name may be filled with zeroes
  214 + name = ''
  215 + else:
  216 + # handle non-printable chars using repr(), remove quotes:
  217 + name = repr(d.name)[1:-1]
  218 + left = sid_display(d.sid_left)
  219 + right = sid_display(d.sid_right)
  220 + child = sid_display(d.sid_child)
  221 + entry_type = STORAGE_NAMES.get(d.entry_type, 'Unknown')
  222 + etype_color = STORAGE_COLORS.get(d.entry_type, 'red')
  223 + status_color = STATUS_COLORS.get(status, 'red')
  224 +
  225 + # print(' type=%7s sid_left=%s sid_right=%s sid_child=%s'
  226 + # %(entry_type, left, right, child))
  227 + # t.add_row((id, status, entry_type, name, left, right, child, hex(d.isectStart), d.size))
  228 + table.write_row((id, status, entry_type, name, left, right, child, '%X' % d.isectStart, d.size),
  229 + colors=(None, status_color, etype_color, None, None, None, None, None, None))
  230 + ole.close()
  231 + # print t
184 232
185 233
186 if __name__ == '__main__': 234 if __name__ == '__main__':