Commit c3aa06c8e3d2a2714715013f0ad00fe1e61db7db

Authored by Philippe Lagadec
1 parent f92bfd75

xglob: fixed issues in iter_files + yield container name

oletools/thirdparty/xglob/xglob.py
... ... @@ -20,7 +20,7 @@ For more info and updates: http://www.decalage.info/xglob
20 20  
21 21 # LICENSE:
22 22 #
23   -# xglob is copyright (c) 2013-2014, Philippe Lagadec (http://www.decalage.info)
  23 +# xglob is copyright (c) 2013-2015, Philippe Lagadec (http://www.decalage.info)
24 24 # All rights reserved.
25 25 #
26 26 # Redistribution and use in source and binary forms, with or without modification,
... ... @@ -49,8 +49,9 @@ For more info and updates: http://www.decalage.info/xglob
49 49 # 2013-12-04 v0.01 PL: - scan several files from command line args
50 50 # 2014-01-14 v0.02 PL: - added riglob, ziglob
51 51 # 2014-12-26 v0.03 PL: - moved code from balbuzard into a separate package
  52 +# 2015-01-03 v0.04 PL: - fixed issues in iter_files + yield container name
52 53  
53   -__version__ = '0.03'
  54 +__version__ = '0.04'
54 55  
55 56  
56 57 #=== IMPORTS =================================================================
... ... @@ -96,7 +97,7 @@ def ziglob (zipfileobj, pathname):
96 97 filenames, using wildcards, e.g. *.txt
97 98 """
98 99 files = zipfileobj.namelist()
99   - for f in files: print f
  100 + #for f in files: print f
100 101 for f in fnmatch.filter(files, pathname):
101 102 yield f
102 103  
... ... @@ -110,10 +111,13 @@ def iter_files(files, recursive=False, zip_password=None, zip_fname='*'):
110 111 - if not, then each file is opened as a zip archive with the provided password
111 112 - then files matching zip_fname are opened from the zip archive
112 113  
113   - Iterator: yields (filename, data) for each file. If zip_password is None, then
114   - only the filename is returned, and data=None. Otherwise data is the file
115   - content.
  114 + Iterator: yields (container, filename, data) for each file. If zip_password is None, then
  115 + only the filename is returned, container and data=None. Otherwise container si the
  116 + filename of the container (zip file), and data is the file content.
116 117 """
  118 + #TODO: catch exceptions and yield them for the caller (no file found, file is not zip, wrong password, etc)
  119 + #TODO: use logging instead of printing
  120 + #TODO: split in two simpler functions, the caller knows if it's a zip or not
117 121 # choose recursive or non-recursive iglob:
118 122 if recursive:
119 123 iglob = riglob
... ... @@ -123,18 +127,19 @@ def iter_files(files, recursive=False, zip_password=None, zip_fname='*'):
123 127 for filename in iglob(filespec):
124 128 if zip_password is not None:
125 129 # Each file is expected to be a zip archive:
126   - print 'Opening zip archive %s with provided password' % filename
  130 + #print 'Opening zip archive %s with provided password' % filename
127 131 z = zipfile.ZipFile(filename, 'r')
128   - print 'Looking for file(s) matching "%s"' % zip_fname
129   - for filename in ziglob(z, zip_fname):
130   - print 'Opening file in zip archive:', filename
131   - data = z.read(filename, zip_password)
132   - yield filename, data
  132 + #print 'Looking for file(s) matching "%s"' % zip_fname
  133 + for subfilename in ziglob(z, zip_fname):
  134 + #print 'Opening file in zip archive:', filename
  135 + data = z.read(subfilename, zip_password)
  136 + yield filename, subfilename, data
  137 + z.close()
133 138 else:
134 139 # normal file
135 140 # do not read the file content, just yield the filename
136   - yield filename, None
137   - print 'Opening file', filename
  141 + yield None, filename, None
  142 + #print 'Opening file', filename
138 143 #data = open(filename, 'rb').read()
139   - #yield filename, data
  144 + #yield None, filename, data
140 145  
... ...