Commit 8c59b3e4cea9c2967c85484281d06dacd4d74c2b
1 parent
6fa2ba97
fixed raise and print statements for Python 3, removed legacy olefile2 - issue #62
Showing
9 changed files
with
464 additions
and
2748 deletions
oletools/mraptor.py
| ... | ... | @@ -53,6 +53,7 @@ http://www.decalage.info/python/oletools |
| 53 | 53 | # 2016-03-08 v0.04 PL: - collapse long lines before analysis |
| 54 | 54 | # 2016-08-31 v0.50 PL: - added macro trigger InkPicture_Painted |
| 55 | 55 | # 2016-09-05 PL: - added Document_BeforeClose keyword for MS Publisher (.pub) |
| 56 | +# 2016-10-25 PL: - fixed print for Python 3 | |
| 56 | 57 | |
| 57 | 58 | __version__ = '0.50' |
| 58 | 59 | |
| ... | ... | @@ -239,16 +240,16 @@ def main(): |
| 239 | 240 | |
| 240 | 241 | # Print help if no arguments are passed |
| 241 | 242 | if len(args) == 0: |
| 242 | - print __doc__ | |
| 243 | + print(__doc__) | |
| 243 | 244 | parser.print_help() |
| 244 | - print '\nAn exit code is returned based on the analysis result:' | |
| 245 | + print('\nAn exit code is returned based on the analysis result:') | |
| 245 | 246 | for result in (Result_NoMacro, Result_NotMSOffice, Result_MacroOK, Result_Error, Result_Suspicious): |
| 246 | - print ' - %d: %s' % (result.exit_code, result.name) | |
| 247 | + print(' - %d: %s' % (result.exit_code, result.name)) | |
| 247 | 248 | sys.exit() |
| 248 | 249 | |
| 249 | 250 | # print banner with version |
| 250 | - print 'MacroRaptor %s - http://decalage.info/python/oletools' % __version__ | |
| 251 | - print 'This is work in progress, please report issues at %s' % URL_ISSUES | |
| 251 | + print('MacroRaptor %s - http://decalage.info/python/oletools' % __version__) | |
| 252 | + print('This is work in progress, please report issues at %s' % URL_ISSUES) | |
| 252 | 253 | |
| 253 | 254 | logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') |
| 254 | 255 | # enable logging in the modules: |
| ... | ... | @@ -325,9 +326,9 @@ def main(): |
| 325 | 326 | global_result = result |
| 326 | 327 | exitcode = result.exit_code |
| 327 | 328 | |
| 328 | - print '' | |
| 329 | - print 'Flags: A=AutoExec, W=Write, X=Execute' | |
| 330 | - print 'Exit code: %d - %s' % (exitcode, global_result.name) | |
| 329 | + print('') | |
| 330 | + print('Flags: A=AutoExec, W=Write, X=Execute') | |
| 331 | + print('Exit code: %d - %s' % (exitcode, global_result.name)) | |
| 331 | 332 | sys.exit(exitcode) |
| 332 | 333 | |
| 333 | 334 | if __name__ == '__main__': | ... | ... |
oletools/oleid.py
| ... | ... | @@ -18,7 +18,7 @@ http://www.decalage.info/python/oletools |
| 18 | 18 | |
| 19 | 19 | #=== LICENSE ================================================================= |
| 20 | 20 | |
| 21 | -# oleid is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info) | |
| 21 | +# oleid is copyright (c) 2012-2016, Philippe Lagadec (http://www.decalage.info) | |
| 22 | 22 | # All rights reserved. |
| 23 | 23 | # |
| 24 | 24 | # Redistribution and use in source and binary forms, with or without modification, |
| ... | ... | @@ -48,8 +48,9 @@ http://www.decalage.info/python/oletools |
| 48 | 48 | # 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL |
| 49 | 49 | # - improved usage display with -h |
| 50 | 50 | # 2014-11-30 v0.03 PL: - improved output with prettytable |
| 51 | +# 2016-10-25 v0.50 PL: - fixed print for Python 3 | |
| 51 | 52 | |
| 52 | -__version__ = '0.03' | |
| 53 | +__version__ = '0.50' | |
| 53 | 54 | |
| 54 | 55 | |
| 55 | 56 | #------------------------------------------------------------------------------ |
| ... | ... | @@ -275,7 +276,7 @@ def main(): |
| 275 | 276 | return |
| 276 | 277 | |
| 277 | 278 | for filename in args: |
| 278 | - print '\nFilename:', filename | |
| 279 | + print('\nFilename:', filename) | |
| 279 | 280 | oleid = OleID(filename) |
| 280 | 281 | indicators = oleid.check() |
| 281 | 282 | |
| ... | ... | @@ -290,7 +291,7 @@ def main(): |
| 290 | 291 | #print '%s: %s' % (indicator.name, indicator.value) |
| 291 | 292 | t.add_row((indicator.name, indicator.value)) |
| 292 | 293 | |
| 293 | - print t | |
| 294 | + print(t) | |
| 294 | 295 | |
| 295 | 296 | if __name__ == '__main__': |
| 296 | 297 | main() | ... | ... |
oletools/olemeta.py
| ... | ... | @@ -45,6 +45,7 @@ http://www.decalage.info/python/oletools |
| 45 | 45 | # - improved usage display |
| 46 | 46 | # 2015-12-29 v0.03 PL: - only display properties present in the file |
| 47 | 47 | # 2016-09-06 v0.50 PL: - added main entry point for setup.py |
| 48 | +# 2016-10-25 PL: - fixed print for Python 3 | |
| 48 | 49 | |
| 49 | 50 | __version__ = '0.50' |
| 50 | 51 | |
| ... | ... | @@ -93,7 +94,7 @@ def main(): |
| 93 | 94 | value = str(value) |
| 94 | 95 | t.write_row([prop, value], colors=[None, 'yellow']) |
| 95 | 96 | t.close() |
| 96 | - print '' | |
| 97 | + print('') | |
| 97 | 98 | |
| 98 | 99 | print('Properties from the DocumentSummaryInformation stream:') |
| 99 | 100 | t = tablestream.TableStream([21, 30], header_row=['Property', 'Value'], outfile=console_utf8) | ... | ... |
oletools/olevba.py
| ... | ... | @@ -73,6 +73,8 @@ https://github.com/unixfreak0037/officeparser |
| 73 | 73 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 74 | 74 | # SOFTWARE. |
| 75 | 75 | |
| 76 | +from __future__ import print_function | |
| 77 | + | |
| 76 | 78 | #------------------------------------------------------------------------------ |
| 77 | 79 | # CHANGELOG: |
| 78 | 80 | # 2014-08-05 v0.01 PL: - first version based on officeparser code |
| ... | ... | @@ -184,6 +186,7 @@ https://github.com/unixfreak0037/officeparser |
| 184 | 186 | # 2016-09-05 PL: - added autoexec keywords for MS Publisher (.pub) |
| 185 | 187 | # 2016-09-06 PL: - fixed issue #20, is_zipfile on Python 2.6 |
| 186 | 188 | # 2016-09-12 PL: - enabled packrat to improve pyparsing performance |
| 189 | +# 2016-10-25 PL: - fixed raise and print statements for Python 3 | |
| 187 | 190 | |
| 188 | 191 | __version__ = '0.50' |
| 189 | 192 | |
| ... | ... | @@ -246,9 +249,9 @@ except ImportError: |
| 246 | 249 | # Python <2.5: standalone ElementTree install |
| 247 | 250 | import elementtree.cElementTree as ET |
| 248 | 251 | except ImportError: |
| 249 | - raise ImportError, "lxml or ElementTree are not installed, " \ | |
| 252 | + raise ImportError("lxml or ElementTree are not installed, " \ | |
| 250 | 253 | + "see http://codespeak.net/lxml " \ |
| 251 | - + "or http://effbot.org/zone/element-index.htm" | |
| 254 | + + "or http://effbot.org/zone/element-index.htm") | |
| 252 | 255 | |
| 253 | 256 | import thirdparty.olefile as olefile |
| 254 | 257 | from thirdparty.prettytable import prettytable |
| ... | ... | @@ -1968,18 +1971,18 @@ def print_json(json_dict=None, _json_is_last=False, **json_parts): |
| 1968 | 1971 | json_dict = json_parts |
| 1969 | 1972 | |
| 1970 | 1973 | if not _have_printed_json_start: |
| 1971 | - print '[' | |
| 1974 | + print('[') | |
| 1972 | 1975 | _have_printed_json_start = True |
| 1973 | 1976 | |
| 1974 | 1977 | lines = json.dumps(json2ascii(json_dict), check_circular=False, |
| 1975 | 1978 | indent=4, ensure_ascii=False).splitlines() |
| 1976 | 1979 | for line in lines[:-1]: |
| 1977 | - print ' {0}'.format(line) | |
| 1980 | + print(' {0}'.format(line)) | |
| 1978 | 1981 | if _json_is_last: |
| 1979 | - print ' {0}'.format(lines[-1]) # print last line without comma | |
| 1980 | - print ']' | |
| 1982 | + print(' {0}'.format(lines[-1])) # print last line without comma | |
| 1983 | + print(']') | |
| 1981 | 1984 | else: |
| 1982 | - print ' {0},'.format(lines[-1]) # print last line with comma | |
| 1985 | + print(' {0},'.format(lines[-1])) # print last line with comma | |
| 1983 | 1986 | |
| 1984 | 1987 | |
| 1985 | 1988 | class VBA_Scanner(object): |
| ... | ... | @@ -2934,7 +2937,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2934 | 2937 | """ |
| 2935 | 2938 | # print a waiting message only if the output is not redirected to a file: |
| 2936 | 2939 | if sys.stdout.isatty(): |
| 2937 | - print 'Analysis...\r', | |
| 2940 | + print('Analysis...\r', end='') | |
| 2938 | 2941 | sys.stdout.flush() |
| 2939 | 2942 | results = self.analyze_macros(show_decoded_strings, deobfuscate) |
| 2940 | 2943 | if results: |
| ... | ... | @@ -2950,9 +2953,9 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2950 | 2953 | if not is_printable(description): |
| 2951 | 2954 | description = repr(description) |
| 2952 | 2955 | t.add_row((kw_type, keyword, description)) |
| 2953 | - print t | |
| 2956 | + print(t) | |
| 2954 | 2957 | else: |
| 2955 | - print 'No suspicious keyword or IOC found.' | |
| 2958 | + print('No suspicious keyword or IOC found.') | |
| 2956 | 2959 | |
| 2957 | 2960 | def print_analysis_json(self, show_decoded_strings=False, deobfuscate=False): |
| 2958 | 2961 | """ |
| ... | ... | @@ -2966,7 +2969,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2966 | 2969 | """ |
| 2967 | 2970 | # print a waiting message only if the output is not redirected to a file: |
| 2968 | 2971 | if sys.stdout.isatty(): |
| 2969 | - print 'Analysis...\r', | |
| 2972 | + print('Analysis...\r', end='') | |
| 2970 | 2973 | sys.stdout.flush() |
| 2971 | 2974 | return [dict(type=kw_type, keyword=keyword, description=description) |
| 2972 | 2975 | for kw_type, keyword, description in self.analyze_macros(show_decoded_strings, deobfuscate)] |
| ... | ... | @@ -2995,11 +2998,11 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2995 | 2998 | display_filename = '%s in %s' % (self.filename, self.container) |
| 2996 | 2999 | else: |
| 2997 | 3000 | display_filename = self.filename |
| 2998 | - print '=' * 79 | |
| 2999 | - print 'FILE:', display_filename | |
| 3001 | + print('=' * 79) | |
| 3002 | + print('FILE: %s' % display_filename) | |
| 3000 | 3003 | try: |
| 3001 | 3004 | #TODO: handle olefile errors, when an OLE file is malformed |
| 3002 | - print 'Type:', self.type | |
| 3005 | + print('Type: %s'% self.type) | |
| 3003 | 3006 | if self.detect_vba_macros(): |
| 3004 | 3007 | #print 'Contains VBA Macros:' |
| 3005 | 3008 | for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros(): |
| ... | ... | @@ -3008,29 +3011,29 @@ class VBA_Parser_CLI(VBA_Parser): |
| 3008 | 3011 | vba_code_filtered = filter_vba(vba_code) |
| 3009 | 3012 | else: |
| 3010 | 3013 | vba_code_filtered = vba_code |
| 3011 | - print '-' * 79 | |
| 3012 | - print 'VBA MACRO %s ' % vba_filename | |
| 3013 | - print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)) | |
| 3014 | + print('-' * 79) | |
| 3015 | + print('VBA MACRO %s ' % vba_filename) | |
| 3016 | + print('in file: %s - OLE stream: %s' % (subfilename, repr(stream_path))) | |
| 3014 | 3017 | if display_code: |
| 3015 | - print '- ' * 39 | |
| 3018 | + print('- ' * 39) | |
| 3016 | 3019 | # detect empty macros: |
| 3017 | 3020 | if vba_code_filtered.strip() == '': |
| 3018 | - print '(empty macro)' | |
| 3021 | + print('(empty macro)') | |
| 3019 | 3022 | else: |
| 3020 | - print vba_code_filtered | |
| 3023 | + print(vba_code_filtered) | |
| 3021 | 3024 | for (subfilename, stream_path, form_string) in self.extract_form_strings(): |
| 3022 | - print '-' * 79 | |
| 3023 | - print 'VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path) | |
| 3024 | - print '- ' * 39 | |
| 3025 | - print form_string | |
| 3025 | + print('-' * 79) | |
| 3026 | + print('VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path)) | |
| 3027 | + print('- ' * 39) | |
| 3028 | + print(form_string) | |
| 3026 | 3029 | if not vba_code_only: |
| 3027 | 3030 | # analyse the code from all modules at once: |
| 3028 | 3031 | self.print_analysis(show_decoded_strings, deobfuscate) |
| 3029 | 3032 | if show_deobfuscated_code: |
| 3030 | - print 'MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n\n' | |
| 3031 | - print self.reveal() | |
| 3033 | + print('MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n\n') | |
| 3034 | + print(self.reveal()) | |
| 3032 | 3035 | else: |
| 3033 | - print 'No VBA macros found.' | |
| 3036 | + print('No VBA macros found.') | |
| 3034 | 3037 | except OlevbaBaseException: |
| 3035 | 3038 | raise |
| 3036 | 3039 | except Exception as exc: |
| ... | ... | @@ -3038,7 +3041,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 3038 | 3041 | log.info('Error processing file %s (%s)' % (self.filename, exc)) |
| 3039 | 3042 | log.debug('Traceback:', exc_info=True) |
| 3040 | 3043 | raise ProcessingError(self.filename, exc) |
| 3041 | - print '' | |
| 3044 | + print('') | |
| 3042 | 3045 | |
| 3043 | 3046 | |
| 3044 | 3047 | def process_file_json(self, show_decoded_strings=False, |
| ... | ... | @@ -3124,7 +3127,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 3124 | 3127 | if self.detect_vba_macros(): |
| 3125 | 3128 | # print a waiting message only if the output is not redirected to a file: |
| 3126 | 3129 | if sys.stdout.isatty(): |
| 3127 | - print 'Analysis...\r', | |
| 3130 | + print('Analysis...\r', end='') | |
| 3128 | 3131 | sys.stdout.flush() |
| 3129 | 3132 | self.analyze_macros(show_decoded_strings=show_decoded_strings, |
| 3130 | 3133 | deobfuscate=deobfuscate) |
| ... | ... | @@ -3142,7 +3145,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 3142 | 3145 | base64obf, dridex, vba_obf) |
| 3143 | 3146 | |
| 3144 | 3147 | line = '%-12s %s' % (flags, self.filename) |
| 3145 | - print line | |
| 3148 | + print(line) | |
| 3146 | 3149 | |
| 3147 | 3150 | # old table display: |
| 3148 | 3151 | # macros = autoexec = suspicious = iocs = hexstrings = 'no' |
| ... | ... | @@ -3235,7 +3238,7 @@ def main(): |
| 3235 | 3238 | |
| 3236 | 3239 | # Print help if no arguments are passed |
| 3237 | 3240 | if len(args) == 0: |
| 3238 | - print __doc__ | |
| 3241 | + print(__doc__) | |
| 3239 | 3242 | parser.print_help() |
| 3240 | 3243 | sys.exit(RETURN_WRONG_ARGS) |
| 3241 | 3244 | |
| ... | ... | @@ -3246,7 +3249,7 @@ def main(): |
| 3246 | 3249 | url='http://decalage.info/python/oletools', |
| 3247 | 3250 | type='MetaInformation') |
| 3248 | 3251 | else: |
| 3249 | - print 'olevba %s - http://decalage.info/python/oletools' % __version__ | |
| 3252 | + print('olevba %s - http://decalage.info/python/oletools' % __version__) | |
| 3250 | 3253 | |
| 3251 | 3254 | logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') |
| 3252 | 3255 | # enable logging in the modules: |
| ... | ... | @@ -3266,8 +3269,8 @@ def main(): |
| 3266 | 3269 | # Column headers (do not know how many files there will be yet, so if no output_mode |
| 3267 | 3270 | # was specified, we will print triage for first file --> need these headers) |
| 3268 | 3271 | if options.output_mode in ('triage', 'unspecified'): |
| 3269 | - print '%-12s %-65s' % ('Flags', 'Filename') | |
| 3270 | - print '%-12s %-65s' % ('-' * 11, '-' * 65) | |
| 3272 | + print('%-12s %-65s' % ('Flags', 'Filename')) | |
| 3273 | + print('%-12s %-65s' % ('-' * 11, '-' * 65)) | |
| 3271 | 3274 | |
| 3272 | 3275 | previous_container = None |
| 3273 | 3276 | count = 0 |
| ... | ... | @@ -3285,14 +3288,14 @@ def main(): |
| 3285 | 3288 | if isinstance(data, Exception): |
| 3286 | 3289 | if isinstance(data, PathNotFoundException): |
| 3287 | 3290 | if options.output_mode in ('triage', 'unspecified'): |
| 3288 | - print '%-12s %s - File not found' % ('?', filename) | |
| 3291 | + print('%-12s %s - File not found' % ('?', filename)) | |
| 3289 | 3292 | elif options.output_mode != 'json': |
| 3290 | 3293 | log.error('Given path %r does not exist!' % filename) |
| 3291 | 3294 | return_code = RETURN_FILE_NOT_FOUND if return_code == 0 \ |
| 3292 | 3295 | else RETURN_SEVERAL_ERRS |
| 3293 | 3296 | else: |
| 3294 | 3297 | if options.output_mode in ('triage', 'unspecified'): |
| 3295 | - print '%-12s %s - Failed to read from zip file %s' % ('?', filename, container) | |
| 3298 | + print('%-12s %s - Failed to read from zip file %s' % ('?', filename, container)) | |
| 3296 | 3299 | elif options.output_mode != 'json': |
| 3297 | 3300 | log.error('Exception opening/reading %r from zip file %r: %s' |
| 3298 | 3301 | % (filename, container, data)) |
| ... | ... | @@ -3319,7 +3322,7 @@ def main(): |
| 3319 | 3322 | # print container name when it changes: |
| 3320 | 3323 | if container != previous_container: |
| 3321 | 3324 | if container is not None: |
| 3322 | - print '\nFiles in %s:' % container | |
| 3325 | + print('\nFiles in %s:' % container) | |
| 3323 | 3326 | previous_container = container |
| 3324 | 3327 | # summarized output for triage: |
| 3325 | 3328 | vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, |
| ... | ... | @@ -3337,8 +3340,8 @@ def main(): |
| 3337 | 3340 | |
| 3338 | 3341 | except (SubstreamOpenError, UnexpectedDataError) as exc: |
| 3339 | 3342 | if options.output_mode in ('triage', 'unspecified'): |
| 3340 | - print '%-12s %s - Error opening substream or uenxpected ' \ | |
| 3341 | - 'content' % ('?', filename) | |
| 3343 | + print('%-12s %s - Error opening substream or uenxpected ' \ | |
| 3344 | + 'content' % ('?', filename)) | |
| 3342 | 3345 | elif options.output_mode == 'json': |
| 3343 | 3346 | print_json(file=filename, type='error', |
| 3344 | 3347 | error=type(exc).__name__, message=str(exc)) |
| ... | ... | @@ -3349,7 +3352,7 @@ def main(): |
| 3349 | 3352 | else RETURN_SEVERAL_ERRS |
| 3350 | 3353 | except FileOpenError as exc: |
| 3351 | 3354 | if options.output_mode in ('triage', 'unspecified'): |
| 3352 | - print '%-12s %s - File format not supported' % ('?', filename) | |
| 3355 | + print('%-12s %s - File format not supported' % ('?', filename)) | |
| 3353 | 3356 | elif options.output_mode == 'json': |
| 3354 | 3357 | print_json(file=filename, type='error', |
| 3355 | 3358 | error=type(exc).__name__, message=str(exc)) |
| ... | ... | @@ -3359,7 +3362,7 @@ def main(): |
| 3359 | 3362 | else RETURN_SEVERAL_ERRS |
| 3360 | 3363 | except ProcessingError as exc: |
| 3361 | 3364 | if options.output_mode in ('triage', 'unspecified'): |
| 3362 | - print '%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc) | |
| 3365 | + print('%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc)) | |
| 3363 | 3366 | elif options.output_mode == 'json': |
| 3364 | 3367 | print_json(file=filename, type='error', |
| 3365 | 3368 | error=type(exc).__name__, |
| ... | ... | @@ -3374,9 +3377,9 @@ def main(): |
| 3374 | 3377 | vba_parser.close() |
| 3375 | 3378 | |
| 3376 | 3379 | if options.output_mode == 'triage': |
| 3377 | - print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \ | |
| 3380 | + print('\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \ | |
| 3378 | 3381 | 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \ |
| 3379 | - 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n' | |
| 3382 | + 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n') | |
| 3380 | 3383 | |
| 3381 | 3384 | if count == 1 and options.output_mode == 'unspecified': |
| 3382 | 3385 | # if options -t, -d and -j were not specified and it's a single file, print details: | ... | ... |
oletools/pyxswf.py
| ... | ... | @@ -55,6 +55,7 @@ http://www.decalage.info/python/oletools |
| 55 | 55 | # 2014-11-29 v0.03 PL: - use olefile instead of OleFileIO_PL |
| 56 | 56 | # - improved usage display with -h |
| 57 | 57 | # 2016-09-06 v0.50 PL: - updated to match the rtfobj API |
| 58 | +# 2016-10-25 PL: - fixed print for Python 3 | |
| 58 | 59 | |
| 59 | 60 | __version__ = '0.50' |
| 60 | 61 | |
| ... | ... | @@ -122,7 +123,7 @@ def main(): |
| 122 | 123 | # check if data contains the SWF magic: FWS or CWS |
| 123 | 124 | data = f.getvalue() |
| 124 | 125 | if 'FWS' in data or 'CWS' in data: |
| 125 | - print 'OLE stream: %s' % repr(direntry.name) | |
| 126 | + print('OLE stream: %s' % repr(direntry.name)) | |
| 126 | 127 | # call xxxswf to scan or extract Flash files: |
| 127 | 128 | xxxswf.disneyland(f, direntry.name, options) |
| 128 | 129 | f.close() |
| ... | ... | @@ -133,7 +134,7 @@ def main(): |
| 133 | 134 | for filename in args: |
| 134 | 135 | for index, orig_len, data in rtfobj.rtf_iter_objects(filename): |
| 135 | 136 | if 'FWS' in data or 'CWS' in data: |
| 136 | - print 'RTF embedded object size %d at index %08X' % (len(data), index) | |
| 137 | + print('RTF embedded object size %d at index %08X' % (len(data), index)) | |
| 137 | 138 | f = StringIO.StringIO(data) |
| 138 | 139 | name = 'RTF_embedded_object_%08X' % index |
| 139 | 140 | # call xxxswf to scan or extract Flash files: | ... | ... |
oletools/thirdparty/olefile/__init__.py
| 1 | -#!/usr/local/bin/python | |
| 2 | -# -*- coding: latin-1 -*- | |
| 3 | -""" | |
| 4 | -olefile (formerly OleFileIO_PL) | |
| 5 | - | |
| 6 | -Module to read/write Microsoft OLE2 files (also called Structured Storage or | |
| 7 | -Microsoft Compound Document File Format), such as Microsoft Office 97-2003 | |
| 8 | -documents, Image Composer and FlashPix files, Outlook messages, ... | |
| 9 | -This version is compatible with Python 2.6+ and 3.x | |
| 10 | - | |
| 11 | -Project website: http://www.decalage.info/olefile | |
| 12 | - | |
| 13 | -olefile is copyright (c) 2005-2015 Philippe Lagadec (http://www.decalage.info) | |
| 14 | - | |
| 15 | -olefile is based on the OleFileIO module from the PIL library v1.1.6 | |
| 16 | -See: http://www.pythonware.com/products/pil/index.htm | |
| 17 | - | |
| 18 | -The Python Imaging Library (PIL) is | |
| 19 | - Copyright (c) 1997-2005 by Secret Labs AB | |
| 20 | - Copyright (c) 1995-2005 by Fredrik Lundh | |
| 21 | - | |
| 22 | -See source code and LICENSE.txt for information on usage and redistribution. | |
| 23 | -""" | |
| 24 | - | |
| 25 | -try: | |
| 26 | - # first try to import olefile for Python 2.6+/3.x | |
| 27 | - from .olefile import * | |
| 28 | - # import metadata not covered by *: | |
| 29 | - from .olefile import __version__, __author__, __date__ | |
| 30 | - | |
| 31 | -except: | |
| 32 | - # if it fails, fallback to the old version olefile2 for Python 2.x: | |
| 33 | - from .olefile2 import * | |
| 34 | - # import metadata not covered by *: | |
| 35 | - from .olefile2 import __doc__, __version__, __author__, __date__ | |
| 1 | +#!/usr/local/bin/python | |
| 2 | +# -*- coding: latin-1 -*- | |
| 3 | +""" | |
| 4 | +olefile (formerly OleFileIO_PL) | |
| 5 | + | |
| 6 | +Module to read/write Microsoft OLE2 files (also called Structured Storage or | |
| 7 | +Microsoft Compound Document File Format), such as Microsoft Office 97-2003 | |
| 8 | +documents, Image Composer and FlashPix files, Outlook messages, ... | |
| 9 | +This version is compatible with Python 2.6+ and 3.x | |
| 10 | + | |
| 11 | +Project website: http://www.decalage.info/olefile | |
| 12 | + | |
| 13 | +olefile is copyright (c) 2005-2015 Philippe Lagadec (http://www.decalage.info) | |
| 14 | + | |
| 15 | +olefile is based on the OleFileIO module from the PIL library v1.1.6 | |
| 16 | +See: http://www.pythonware.com/products/pil/index.htm | |
| 17 | + | |
| 18 | +The Python Imaging Library (PIL) is | |
| 19 | + Copyright (c) 1997-2005 by Secret Labs AB | |
| 20 | + Copyright (c) 1995-2005 by Fredrik Lundh | |
| 21 | + | |
| 22 | +See source code and LICENSE.txt for information on usage and redistribution. | |
| 23 | +""" | |
| 24 | + | |
| 25 | +# first try to import olefile for Python 2.6+/3.x | |
| 26 | +from .olefile import * | |
| 27 | +# import metadata not covered by *: | |
| 28 | +from .olefile import __version__, __author__, __date__ | ... | ... |
oletools/thirdparty/olefile/olefile2.html deleted
| 1 | - | |
| 2 | -<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> | |
| 3 | -<html><head><title>Python: module olefile2</title> | |
| 4 | -</head><body bgcolor="#f0f0f8"> | |
| 5 | - | |
| 6 | -<table width="100%" cellspacing=0 cellpadding=2 border=0 summary="heading"> | |
| 7 | -<tr bgcolor="#7799ee"> | |
| 8 | -<td valign=bottom> <br> | |
| 9 | -<font color="#ffffff" face="helvetica, arial"> <br><big><big><strong>olefile2</strong></big></big> (version 0.40py2, 2014-10-01)</font></td | |
| 10 | -><td align=right valign=bottom | |
| 11 | -><font color="#ffffff" face="helvetica, arial"><a href=".">index</a><br><a href="file:./olefile2.py">.\olefile2.py</a></font></td></tr></table> | |
| 12 | - <p><tt>olefile2 (formerly OleFileIO_PL2) version 0.40py2 2014-10-01<br> | |
| 13 | - <br> | |
| 14 | -Module to read Microsoft OLE2 files (also called Structured Storage or<br> | |
| 15 | -Microsoft Compound Document File Format), such as Microsoft Office<br> | |
| 16 | -documents, Image Composer and FlashPix files, Outlook messages, ...<br> | |
| 17 | - <br> | |
| 18 | -IMPORTANT NOTE: olefile2 is an old version of olefile meant to be used<br> | |
| 19 | -as fallback for Python 2.5 and older. For Python 2.6, 2.7 and 3.x, please use<br> | |
| 20 | -olefile which is more up-to-date. The improvements in olefile might<br> | |
| 21 | -not always be backported to olefile2.<br> | |
| 22 | - <br> | |
| 23 | -Project website: <a href="http://www.decalage.info/python/olefileio">http://www.decalage.info/python/olefileio</a><br> | |
| 24 | - <br> | |
| 25 | -olefile2 is copyright (c) 2005-2014 Philippe Lagadec (<a href="http://www.decalage.info">http://www.decalage.info</a>)<br> | |
| 26 | - <br> | |
| 27 | -olefile2 is based on the <a href="#OleFileIO">OleFileIO</a> module from the PIL library v1.1.6<br> | |
| 28 | -See: <a href="http://www.pythonware.com/products/pil/index.htm">http://www.pythonware.com/products/pil/index.htm</a><br> | |
| 29 | - <br> | |
| 30 | -The Python Imaging Library (PIL) is<br> | |
| 31 | - Copyright (c) 1997-2005 by Secret Labs AB<br> | |
| 32 | - Copyright (c) 1995-2005 by Fredrik Lundh<br> | |
| 33 | - <br> | |
| 34 | -See source code and LICENSE.txt for information on usage and redistribution.</tt></p> | |
| 35 | -<p> | |
| 36 | -<table width="100%" cellspacing=0 cellpadding=2 border=0 summary="section"> | |
| 37 | -<tr bgcolor="#aa55cc"> | |
| 38 | -<td colspan=3 valign=bottom> <br> | |
| 39 | -<font color="#ffffff" face="helvetica, arial"><big><strong>Modules</strong></big></font></td></tr> | |
| 40 | - | |
| 41 | -<tr><td bgcolor="#aa55cc"><tt> </tt></td><td> </td> | |
| 42 | -<td width="100%"><table width="100%" summary="list"><tr><td width="25%" valign=top><a href="StringIO.html">StringIO</a><br> | |
| 43 | -<a href="array.html">array</a><br> | |
| 44 | -</td><td width="25%" valign=top><a href="datetime.html">datetime</a><br> | |
| 45 | -<a href="os.html">os</a><br> | |
| 46 | -</td><td width="25%" valign=top><a href="string.html">string</a><br> | |
| 47 | -<a href="struct.html">struct</a><br> | |
| 48 | -</td><td width="25%" valign=top><a href="sys.html">sys</a><br> | |
| 49 | -</td></tr></table></td></tr></table><p> | |
| 50 | -<table width="100%" cellspacing=0 cellpadding=2 border=0 summary="section"> | |
| 51 | -<tr bgcolor="#ee77aa"> | |
| 52 | -<td colspan=3 valign=bottom> <br> | |
| 53 | -<font color="#ffffff" face="helvetica, arial"><big><strong>Classes</strong></big></font></td></tr> | |
| 54 | - | |
| 55 | -<tr><td bgcolor="#ee77aa"><tt> </tt></td><td> </td> | |
| 56 | -<td width="100%"><dl> | |
| 57 | -<dt><font face="helvetica, arial"><a href="olefile2.html#OleFileIO">OleFileIO</a> | |
| 58 | -</font></dt></dl> | |
| 59 | - <p> | |
| 60 | -<table width="100%" cellspacing=0 cellpadding=2 border=0 summary="section"> | |
| 61 | -<tr bgcolor="#ffc8d8"> | |
| 62 | -<td colspan=3 valign=bottom> <br> | |
| 63 | -<font color="#000000" face="helvetica, arial"><a name="OleFileIO">class <strong>OleFileIO</strong></a></font></td></tr> | |
| 64 | - | |
| 65 | -<tr bgcolor="#ffc8d8"><td rowspan=2><tt> </tt></td> | |
| 66 | -<td colspan=2><tt>OLE container object<br> | |
| 67 | - <br> | |
| 68 | -This class encapsulates the interface to an OLE 2 structured<br> | |
| 69 | -storage file. Use the {@link listdir} and {@link openstream} methods to<br> | |
| 70 | -access the contents of this file.<br> | |
| 71 | - <br> | |
| 72 | -Object names are given as a list of strings, one for each subentry<br> | |
| 73 | -level. The root entry should be omitted. For example, the following<br> | |
| 74 | -code extracts all image streams from a Microsoft Image Composer file:<br> | |
| 75 | - <br> | |
| 76 | - ole = <a href="#OleFileIO">OleFileIO</a>("fan.mic")<br> | |
| 77 | - <br> | |
| 78 | - for entry in ole.<a href="#OleFileIO-listdir">listdir</a>():<br> | |
| 79 | - if entry[1:2] == "Image":<br> | |
| 80 | - fin = ole.<a href="#OleFileIO-openstream">openstream</a>(entry)<br> | |
| 81 | - fout = <a href="#OleFileIO-open">open</a>(entry[0:1], "wb")<br> | |
| 82 | - while True:<br> | |
| 83 | - s = fin.read(8192)<br> | |
| 84 | - if not s:<br> | |
| 85 | - break<br> | |
| 86 | - fout.write(s)<br> | |
| 87 | - <br> | |
| 88 | -You can use the viewer application provided with the Python Imaging<br> | |
| 89 | -Library to view the resulting files (which happens to be standard<br> | |
| 90 | -TIFF files).<br> </tt></td></tr> | |
| 91 | -<tr><td> </td> | |
| 92 | -<td width="100%">Methods defined here:<br> | |
| 93 | -<dl><dt><a name="OleFileIO-__init__"><strong>__init__</strong></a>(self, filename<font color="#909090">=None</font>, raise_defects<font color="#909090">=40</font>)</dt><dd><tt>Constructor for <a href="#OleFileIO">OleFileIO</a> class.<br> | |
| 94 | - <br> | |
| 95 | -filename: file to open.<br> | |
| 96 | -raise_defects: minimal level for defects to be raised as exceptions.<br> | |
| 97 | -(use DEFECT_FATAL for a typical application, DEFECT_INCORRECT for a<br> | |
| 98 | -security-oriented application, see source code for details)</tt></dd></dl> | |
| 99 | - | |
| 100 | -<dl><dt><a name="OleFileIO-close"><strong>close</strong></a>(self)</dt><dd><tt>close the OLE file, to release the file object</tt></dd></dl> | |
| 101 | - | |
| 102 | -<dl><dt><a name="OleFileIO-dumpdirectory"><strong>dumpdirectory</strong></a>(self)</dt><dd><tt>Dump directory (for debugging only)</tt></dd></dl> | |
| 103 | - | |
| 104 | -<dl><dt><a name="OleFileIO-dumpfat"><strong>dumpfat</strong></a>(self, fat, firstindex<font color="#909090">=0</font>)</dt><dd><tt>Displays a part of FAT in human-readable form for debugging purpose</tt></dd></dl> | |
| 105 | - | |
| 106 | -<dl><dt><a name="OleFileIO-dumpsect"><strong>dumpsect</strong></a>(self, sector, firstindex<font color="#909090">=0</font>)</dt><dd><tt>Displays a sector in a human-readable form, for debugging purpose.</tt></dd></dl> | |
| 107 | - | |
| 108 | -<dl><dt><a name="OleFileIO-exists"><strong>exists</strong></a>(self, filename)</dt><dd><tt>Test if given filename exists as a stream or a storage in the OLE<br> | |
| 109 | -container.<br> | |
| 110 | - <br> | |
| 111 | -filename: path of stream in storage tree. (see openstream for syntax)<br> | |
| 112 | -return: True if object exist, else False.</tt></dd></dl> | |
| 113 | - | |
| 114 | -<dl><dt><a name="OleFileIO-get_metadata"><strong>get_metadata</strong></a>(self)</dt><dd><tt>Parse standard properties streams, return an OleMetadata object<br> | |
| 115 | -containing all the available metadata.<br> | |
| 116 | -(also stored in the metadata attribute of the <a href="#OleFileIO">OleFileIO</a> object)<br> | |
| 117 | - <br> | |
| 118 | -new in version 0.25</tt></dd></dl> | |
| 119 | - | |
| 120 | -<dl><dt><a name="OleFileIO-get_rootentry_name"><strong>get_rootentry_name</strong></a>(self)</dt><dd><tt>Return root entry name. Should usually be 'Root Entry' or 'R' in most<br> | |
| 121 | -implementations.</tt></dd></dl> | |
| 122 | - | |
| 123 | -<dl><dt><a name="OleFileIO-get_size"><strong>get_size</strong></a>(self, filename)</dt><dd><tt>Return size of a stream in the OLE container, in bytes.<br> | |
| 124 | - <br> | |
| 125 | -filename: path of stream in storage tree (see openstream for syntax)<br> | |
| 126 | -return: size in bytes (long integer)<br> | |
| 127 | -raise: IOError if file not found, TypeError if this is not a stream.</tt></dd></dl> | |
| 128 | - | |
| 129 | -<dl><dt><a name="OleFileIO-get_type"><strong>get_type</strong></a>(self, filename)</dt><dd><tt>Test if given filename exists as a stream or a storage in the OLE<br> | |
| 130 | -container, and return its type.<br> | |
| 131 | - <br> | |
| 132 | -filename: path of stream in storage tree. (see openstream for syntax)<br> | |
| 133 | -return: False if object does not exist, its entry type (>0) otherwise:<br> | |
| 134 | - - STGTY_STREAM: a stream<br> | |
| 135 | - - STGTY_STORAGE: a storage<br> | |
| 136 | - - STGTY_ROOT: the root entry</tt></dd></dl> | |
| 137 | - | |
| 138 | -<dl><dt><a name="OleFileIO-getctime"><strong>getctime</strong></a>(self, filename)</dt><dd><tt>Return creation time of a stream/storage.<br> | |
| 139 | - <br> | |
| 140 | -filename: path of stream/storage in storage tree. (see openstream for<br> | |
| 141 | -syntax)<br> | |
| 142 | -return: None if creation time is null, a python datetime object<br> | |
| 143 | -otherwise (UTC timezone)<br> | |
| 144 | - <br> | |
| 145 | -new in version 0.26</tt></dd></dl> | |
| 146 | - | |
| 147 | -<dl><dt><a name="OleFileIO-getmtime"><strong>getmtime</strong></a>(self, filename)</dt><dd><tt>Return modification time of a stream/storage.<br> | |
| 148 | - <br> | |
| 149 | -filename: path of stream/storage in storage tree. (see openstream for<br> | |
| 150 | -syntax)<br> | |
| 151 | -return: None if modification time is null, a python datetime object<br> | |
| 152 | -otherwise (UTC timezone)<br> | |
| 153 | - <br> | |
| 154 | -new in version 0.26</tt></dd></dl> | |
| 155 | - | |
| 156 | -<dl><dt><a name="OleFileIO-getproperties"><strong>getproperties</strong></a>(self, filename, convert_time<font color="#909090">=False</font>, no_conversion<font color="#909090">=None</font>)</dt><dd><tt>Return properties described in substream.<br> | |
| 157 | - <br> | |
| 158 | -filename: path of stream in storage tree (see openstream for syntax)<br> | |
| 159 | -convert_time: bool, if True timestamps will be converted to Python datetime<br> | |
| 160 | -no_conversion: None or list of int, timestamps not to be converted<br> | |
| 161 | - (for example total editing time is not a real timestamp)<br> | |
| 162 | -return: a dictionary of values indexed by id (integer)</tt></dd></dl> | |
| 163 | - | |
| 164 | -<dl><dt><a name="OleFileIO-getsect"><strong>getsect</strong></a>(self, sect)</dt><dd><tt>Read given sector from file on disk.<br> | |
| 165 | -sect: sector index<br> | |
| 166 | -returns a string containing the sector data.</tt></dd></dl> | |
| 167 | - | |
| 168 | -<dl><dt><a name="OleFileIO-listdir"><strong>listdir</strong></a>(self, streams<font color="#909090">=True</font>, storages<font color="#909090">=False</font>)</dt><dd><tt>Return a list of streams stored in this file<br> | |
| 169 | - <br> | |
| 170 | -streams: bool, include streams if True (True by default) - new in v0.26<br> | |
| 171 | -storages: bool, include storages if True (False by default) - new in v0.26<br> | |
| 172 | -(note: the root storage is never included)</tt></dd></dl> | |
| 173 | - | |
| 174 | -<dl><dt><a name="OleFileIO-loaddirectory"><strong>loaddirectory</strong></a>(self, sect)</dt><dd><tt>Load the directory.<br> | |
| 175 | -sect: sector index of directory stream.</tt></dd></dl> | |
| 176 | - | |
| 177 | -<dl><dt><a name="OleFileIO-loadfat"><strong>loadfat</strong></a>(self, header)</dt><dd><tt>Load the FAT table.</tt></dd></dl> | |
| 178 | - | |
| 179 | -<dl><dt><a name="OleFileIO-loadfat_sect"><strong>loadfat_sect</strong></a>(self, sect)</dt><dd><tt>Adds the indexes of the given sector to the FAT<br> | |
| 180 | -sect: string containing the first FAT sector, or array of long integers<br> | |
| 181 | -return: index of last FAT sector.</tt></dd></dl> | |
| 182 | - | |
| 183 | -<dl><dt><a name="OleFileIO-loadminifat"><strong>loadminifat</strong></a>(self)</dt><dd><tt>Load the MiniFAT table.</tt></dd></dl> | |
| 184 | - | |
| 185 | -<dl><dt><a name="OleFileIO-open"><strong>open</strong></a>(self, filename)</dt><dd><tt>Open an OLE2 file.<br> | |
| 186 | -Reads the header, FAT and directory.<br> | |
| 187 | - <br> | |
| 188 | -filename: string-like or file-like object</tt></dd></dl> | |
| 189 | - | |
| 190 | -<dl><dt><a name="OleFileIO-openstream"><strong>openstream</strong></a>(self, filename)</dt><dd><tt>Open a stream as a read-only file object (StringIO).<br> | |
| 191 | - <br> | |
| 192 | -filename: path of stream in storage tree (except root entry), either:<br> | |
| 193 | - - a string using Unix path syntax, for example:<br> | |
| 194 | - 'storage_1/storage_1.2/stream'<br> | |
| 195 | - - a list of storage filenames, path to the desired stream/storage.<br> | |
| 196 | - Example: ['storage_1', 'storage_1.2', 'stream']<br> | |
| 197 | -return: file object (read-only)<br> | |
| 198 | -raise IOError if filename not found, or if this is not a stream.</tt></dd></dl> | |
| 199 | - | |
| 200 | -<dl><dt><a name="OleFileIO-sect2array"><strong>sect2array</strong></a>(self, sect)</dt><dd><tt>convert a sector to an array of 32 bits unsigned integers,<br> | |
| 201 | -swapping bytes on big endian CPUs such as PowerPC (old Macs)</tt></dd></dl> | |
| 202 | - | |
| 203 | -</td></tr></table></td></tr></table><p> | |
| 204 | -<table width="100%" cellspacing=0 cellpadding=2 border=0 summary="section"> | |
| 205 | -<tr bgcolor="#eeaa77"> | |
| 206 | -<td colspan=3 valign=bottom> <br> | |
| 207 | -<font color="#ffffff" face="helvetica, arial"><big><strong>Functions</strong></big></font></td></tr> | |
| 208 | - | |
| 209 | -<tr><td bgcolor="#eeaa77"><tt> </tt></td><td> </td> | |
| 210 | -<td width="100%"><dl><dt><a name="-isOleFile"><strong>isOleFile</strong></a>(filename)</dt><dd><tt>Test if file is an OLE container (according to its header).<br> | |
| 211 | -filename: file name or path (str, unicode)<br> | |
| 212 | -return: True if OLE, False otherwise.</tt></dd></dl> | |
| 213 | -</td></tr></table><p> | |
| 214 | -<table width="100%" cellspacing=0 cellpadding=2 border=0 summary="section"> | |
| 215 | -<tr bgcolor="#55aa55"> | |
| 216 | -<td colspan=3 valign=bottom> <br> | |
| 217 | -<font color="#ffffff" face="helvetica, arial"><big><strong>Data</strong></big></font></td></tr> | |
| 218 | - | |
| 219 | -<tr><td bgcolor="#55aa55"><tt> </tt></td><td> </td> | |
| 220 | -<td width="100%"><strong>DEFECT_FATAL</strong> = 40<br> | |
| 221 | -<strong>DEFECT_INCORRECT</strong> = 30<br> | |
| 222 | -<strong>DEFECT_POTENTIAL</strong> = 20<br> | |
| 223 | -<strong>DEFECT_UNSURE</strong> = 10<br> | |
| 224 | -<strong>STGTY_EMPTY</strong> = 0<br> | |
| 225 | -<strong>STGTY_LOCKBYTES</strong> = 3<br> | |
| 226 | -<strong>STGTY_PROPERTY</strong> = 4<br> | |
| 227 | -<strong>STGTY_ROOT</strong> = 5<br> | |
| 228 | -<strong>STGTY_STORAGE</strong> = 1<br> | |
| 229 | -<strong>STGTY_STREAM</strong> = 2<br> | |
| 230 | -<strong>__all__</strong> = ['OleFileIO', 'isOleFile', 'DEFECT_UNSURE', 'STGTY_STREAM', 'DEFECT_FATAL', 'STGTY_EMPTY', 'STGTY_LOCKBYTES', 'STGTY_STORAGE', 'STGTY_PROPERTY', 'DEFECT_INCORRECT', 'DEFECT_POTENTIAL', 'STGTY_ROOT']<br> | |
| 231 | -<strong>__author__</strong> = 'Philippe Lagadec'<br> | |
| 232 | -<strong>__date__</strong> = '2014-10-01'<br> | |
| 233 | -<strong>__version__</strong> = '0.40py2'</td></tr></table><p> | |
| 234 | -<table width="100%" cellspacing=0 cellpadding=2 border=0 summary="section"> | |
| 235 | -<tr bgcolor="#7799ee"> | |
| 236 | -<td colspan=3 valign=bottom> <br> | |
| 237 | -<font color="#ffffff" face="helvetica, arial"><big><strong>Author</strong></big></font></td></tr> | |
| 238 | - | |
| 239 | -<tr><td bgcolor="#7799ee"><tt> </tt></td><td> </td> | |
| 240 | -<td width="100%">Philippe Lagadec</td></tr></table> | |
| 241 | -</body></html> | |
| 242 | 0 | \ No newline at end of file |
oletools/thirdparty/olefile/olefile2.py deleted
| 1 | -#!/usr/local/bin/python | |
| 2 | -# -*- coding: latin-1 -*- | |
| 3 | -""" | |
| 4 | -olefile2 (formerly OleFileIO_PL2) version 0.40py2 2014-10-01 | |
| 5 | - | |
| 6 | -Module to read Microsoft OLE2 files (also called Structured Storage or | |
| 7 | -Microsoft Compound Document File Format), such as Microsoft Office | |
| 8 | -documents, Image Composer and FlashPix files, Outlook messages, ... | |
| 9 | - | |
| 10 | -IMPORTANT NOTE: olefile2 is an old version of olefile meant to be used | |
| 11 | -as fallback for Python 2.5 and older. For Python 2.6, 2.7 and 3.x, please use | |
| 12 | -olefile which is more up-to-date. The improvements in olefile might | |
| 13 | -not always be backported to olefile2. | |
| 14 | - | |
| 15 | -Project website: http://www.decalage.info/python/olefileio | |
| 16 | - | |
| 17 | -olefile2 is copyright (c) 2005-2014 Philippe Lagadec (http://www.decalage.info) | |
| 18 | - | |
| 19 | -olefile2 is based on the OleFileIO module from the PIL library v1.1.6 | |
| 20 | -See: http://www.pythonware.com/products/pil/index.htm | |
| 21 | - | |
| 22 | -The Python Imaging Library (PIL) is | |
| 23 | - Copyright (c) 1997-2005 by Secret Labs AB | |
| 24 | - Copyright (c) 1995-2005 by Fredrik Lundh | |
| 25 | - | |
| 26 | -See source code and LICENSE.txt for information on usage and redistribution. | |
| 27 | -""" | |
| 28 | - | |
| 29 | -__author__ = "Philippe Lagadec" | |
| 30 | -__date__ = "2014-10-01" | |
| 31 | -__version__ = '0.40py2' | |
| 32 | - | |
| 33 | -#--- LICENSE ------------------------------------------------------------------ | |
| 34 | - | |
| 35 | -# olefile (formerly OleFileIO_PL) is copyright (c) 2005-2014 Philippe Lagadec | |
| 36 | -# (http://www.decalage.info) | |
| 37 | -# | |
| 38 | -# All rights reserved. | |
| 39 | -# | |
| 40 | -# Redistribution and use in source and binary forms, with or without modification, | |
| 41 | -# are permitted provided that the following conditions are met: | |
| 42 | -# | |
| 43 | -# * Redistributions of source code must retain the above copyright notice, this | |
| 44 | -# list of conditions and the following disclaimer. | |
| 45 | -# * Redistributions in binary form must reproduce the above copyright notice, | |
| 46 | -# this list of conditions and the following disclaimer in the documentation | |
| 47 | -# and/or other materials provided with the distribution. | |
| 48 | -# | |
| 49 | -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 50 | -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 51 | -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 52 | -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
| 53 | -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 54 | -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 55 | -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 56 | -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 57 | -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 58 | -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 59 | - | |
| 60 | -# ---------- | |
| 61 | -# PIL License: | |
| 62 | -# | |
| 63 | -# olefile is based on source code from the OleFileIO module of the Python | |
| 64 | -# Imaging Library (PIL) published by Fredrik Lundh under the following license: | |
| 65 | - | |
| 66 | -# The Python Imaging Library (PIL) is | |
| 67 | -# Copyright (c) 1997-2005 by Secret Labs AB | |
| 68 | -# Copyright (c) 1995-2005 by Fredrik Lundh | |
| 69 | -# | |
| 70 | -# By obtaining, using, and/or copying this software and/or its associated | |
| 71 | -# documentation, you agree that you have read, understood, and will comply with | |
| 72 | -# the following terms and conditions: | |
| 73 | -# | |
| 74 | -# Permission to use, copy, modify, and distribute this software and its | |
| 75 | -# associated documentation for any purpose and without fee is hereby granted, | |
| 76 | -# provided that the above copyright notice appears in all copies, and that both | |
| 77 | -# that copyright notice and this permission notice appear in supporting | |
| 78 | -# documentation, and that the name of Secret Labs AB or the author(s) not be used | |
| 79 | -# in advertising or publicity pertaining to distribution of the software | |
| 80 | -# without specific, written prior permission. | |
| 81 | -# | |
| 82 | -# SECRET LABS AB AND THE AUTHORS DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS | |
| 83 | -# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. | |
| 84 | -# IN NO EVENT SHALL SECRET LABS AB OR THE AUTHORS BE LIABLE FOR ANY SPECIAL, | |
| 85 | -# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM | |
| 86 | -# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR | |
| 87 | -# OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
| 88 | -# PERFORMANCE OF THIS SOFTWARE. | |
| 89 | - | |
| 90 | -#----------------------------------------------------------------------------- | |
| 91 | -# CHANGELOG: (only olefile/OleFileIO_PL changes compared to PIL 1.1.6) | |
| 92 | -# 2005-05-11 v0.10 PL: - a few fixes for Python 2.4 compatibility | |
| 93 | -# (all changes flagged with [PL]) | |
| 94 | -# 2006-02-22 v0.11 PL: - a few fixes for some Office 2003 documents which raise | |
| 95 | -# exceptions in _OleStream.__init__() | |
| 96 | -# 2006-06-09 v0.12 PL: - fixes for files above 6.8MB (DIFAT in loadfat) | |
| 97 | -# - added some constants | |
| 98 | -# - added header values checks | |
| 99 | -# - added some docstrings | |
| 100 | -# - getsect: bugfix in case sectors >512 bytes | |
| 101 | -# - getsect: added conformity checks | |
| 102 | -# - DEBUG_MODE constant to activate debug display | |
| 103 | -# 2007-09-04 v0.13 PL: - improved/translated (lots of) comments | |
| 104 | -# - updated license | |
| 105 | -# - converted tabs to 4 spaces | |
| 106 | -# 2007-11-19 v0.14 PL: - added OleFileIO._raise_defect() to adapt sensitivity | |
| 107 | -# - improved _unicode() to use Python 2.x unicode support | |
| 108 | -# - fixed bug in _OleDirectoryEntry | |
| 109 | -# 2007-11-25 v0.15 PL: - added safety checks to detect FAT loops | |
| 110 | -# - fixed _OleStream which didn't check stream size | |
| 111 | -# - added/improved many docstrings and comments | |
| 112 | -# - moved helper functions _unicode and _clsid out of | |
| 113 | -# OleFileIO class | |
| 114 | -# - improved OleFileIO._find() to add Unix path syntax | |
| 115 | -# - OleFileIO._find() is now case-insensitive | |
| 116 | -# - added get_type() and get_rootentry_name() | |
| 117 | -# - rewritten loaddirectory and _OleDirectoryEntry | |
| 118 | -# 2007-11-27 v0.16 PL: - added _OleDirectoryEntry.kids_dict | |
| 119 | -# - added detection of duplicate filenames in storages | |
| 120 | -# - added detection of duplicate references to streams | |
| 121 | -# - added get_size() and exists() to _OleDirectoryEntry | |
| 122 | -# - added isOleFile to check header before parsing | |
| 123 | -# - added __all__ list to control public keywords in pydoc | |
| 124 | -# 2007-12-04 v0.17 PL: - added _load_direntry to fix a bug in loaddirectory | |
| 125 | -# - improved _unicode(), added workarounds for Python <2.3 | |
| 126 | -# - added set_debug_mode and -d option to set debug mode | |
| 127 | -# - fixed bugs in OleFileIO.open and _OleDirectoryEntry | |
| 128 | -# - added safety check in main for large or binary | |
| 129 | -# properties | |
| 130 | -# - allow size>0 for storages for some implementations | |
| 131 | -# 2007-12-05 v0.18 PL: - fixed several bugs in handling of FAT, MiniFAT and | |
| 132 | -# streams | |
| 133 | -# - added option '-c' in main to check all streams | |
| 134 | -# 2009-12-10 v0.19 PL: - bugfix for 32 bit arrays on 64 bits platforms | |
| 135 | -# (thanks to Ben G. and Martijn for reporting the bug) | |
| 136 | -# 2009-12-11 v0.20 PL: - bugfix in OleFileIO.open when filename is not plain str | |
| 137 | -# 2010-01-22 v0.21 PL: - added support for big-endian CPUs such as PowerPC Macs | |
| 138 | -# 2012-02-16 v0.22 PL: - fixed bug in getproperties, patch by chuckleberryfinn | |
| 139 | -# (https://bitbucket.org/decalage/olefileio_pl/issue/7) | |
| 140 | -# - added close method to OleFileIO (fixed issue #2) | |
| 141 | -# 2012-07-25 v0.23 PL: - added support for file-like objects (patch by mete0r_kr) | |
| 142 | -# 2013-05-05 v0.24 PL: - getproperties: added conversion from filetime to python | |
| 143 | -# datetime | |
| 144 | -# - main: displays properties with date format | |
| 145 | -# - new class OleMetadata to parse standard properties | |
| 146 | -# - added get_metadata method | |
| 147 | -# 2013-05-07 v0.24 PL: - a few improvements in OleMetadata | |
| 148 | -# 2013-05-24 v0.25 PL: - getproperties: option to not convert some timestamps | |
| 149 | -# - OleMetaData: total_edit_time is now a number of seconds, | |
| 150 | -# not a timestamp | |
| 151 | -# - getproperties: added support for VT_BOOL, VT_INT, V_UINT | |
| 152 | -# - getproperties: filter out null chars from strings | |
| 153 | -# - getproperties: raise non-fatal defects instead of | |
| 154 | -# exceptions when properties cannot be parsed properly | |
| 155 | -# 2013-05-27 PL: - getproperties: improved exception handling | |
| 156 | -# - _raise_defect: added option to set exception type | |
| 157 | -# - all non-fatal issues are now recorded, and displayed | |
| 158 | -# when run as a script | |
| 159 | -# 2013-07-11 v0.26 PL: - added methods to get modification and creation times | |
| 160 | -# of a directory entry or a storage/stream | |
| 161 | -# - fixed parsing of direntry timestamps | |
| 162 | -# 2013-07-24 PL: - new options in listdir to list storages and/or streams | |
| 163 | -# 2014-07-18 v0.31 - preliminary support for 4K sectors | |
| 164 | -# 2014-09-26 v0.40 PL: - renamed OleFileIO_PL to olefile | |
| 165 | - | |
| 166 | -#----------------------------------------------------------------------------- | |
| 167 | -# TODO: | |
| 168 | -# + check if running on Python 2.6+, if so issue warning to use olefile | |
| 169 | - | |
| 170 | -#----------------------------------------------------------------------------- | |
| 171 | - | |
| 172 | -# | |
| 173 | -# THIS IS WORK IN PROGRESS | |
| 174 | -# | |
| 175 | -# The Python Imaging Library | |
| 176 | -# $Id: OleFileIO.py 2339 2005-03-25 08:02:17Z fredrik $ | |
| 177 | -# | |
| 178 | -# stuff to deal with OLE2 Structured Storage files. this module is | |
| 179 | -# used by PIL to read Image Composer and FlashPix files, but can also | |
| 180 | -# be used to read other files of this type. | |
| 181 | -# | |
| 182 | -# History: | |
| 183 | -# 1997-01-20 fl Created | |
| 184 | -# 1997-01-22 fl Fixed 64-bit portability quirk | |
| 185 | -# 2003-09-09 fl Fixed typo in OleFileIO.loadfat (noted by Daniel Haertle) | |
| 186 | -# 2004-02-29 fl Changed long hex constants to signed integers | |
| 187 | -# | |
| 188 | -# Notes: | |
| 189 | -# FIXME: sort out sign problem (eliminate long hex constants) | |
| 190 | -# FIXME: change filename to use "a/b/c" instead of ["a", "b", "c"] | |
| 191 | -# FIXME: provide a glob mechanism function (using fnmatchcase) | |
| 192 | -# | |
| 193 | -# Literature: | |
| 194 | -# | |
| 195 | -# "FlashPix Format Specification, Appendix A", Kodak and Microsoft, | |
| 196 | -# September 1996. | |
| 197 | -# | |
| 198 | -# Quotes: | |
| 199 | -# | |
| 200 | -# "If this document and functionality of the Software conflict, | |
| 201 | -# the actual functionality of the Software represents the correct | |
| 202 | -# functionality" -- Microsoft, in the OLE format specification | |
| 203 | -# | |
| 204 | -# Copyright (c) Secret Labs AB 1997. | |
| 205 | -# Copyright (c) Fredrik Lundh 1997. | |
| 206 | -# | |
| 207 | -# See the README file for information on usage and redistribution. | |
| 208 | -# | |
| 209 | - | |
| 210 | -#------------------------------------------------------------------------------ | |
| 211 | - | |
| 212 | -import string, StringIO, struct, array, os.path, sys, datetime | |
| 213 | - | |
| 214 | -#[PL] Define explicitly the public API to avoid private objects in pydoc: | |
| 215 | -__all__ = ['OleFileIO', 'isOleFile'] | |
| 216 | - | |
| 217 | -#[PL] workaround to fix an issue with array item size on 64 bits systems: | |
| 218 | -if array.array('L').itemsize == 4: | |
| 219 | - # on 32 bits platforms, long integers in an array are 32 bits: | |
| 220 | - UINT32 = 'L' | |
| 221 | -elif array.array('I').itemsize == 4: | |
| 222 | - # on 64 bits platforms, integers in an array are 32 bits: | |
| 223 | - UINT32 = 'I' | |
| 224 | -else: | |
| 225 | - raise ValueError, 'Need to fix a bug with 32 bit arrays, please contact author...' | |
| 226 | - | |
| 227 | - | |
| 228 | -#[PL] These workarounds were inspired from the Path module | |
| 229 | -# (see http://www.jorendorff.com/articles/python/path/) | |
| 230 | -#TODO: test with old Python versions | |
| 231 | - | |
| 232 | -# Pre-2.3 workaround for booleans | |
| 233 | -try: | |
| 234 | - True, False | |
| 235 | -except NameError: | |
| 236 | - True, False = 1, 0 | |
| 237 | - | |
| 238 | -# Pre-2.3 workaround for basestring. | |
| 239 | -try: | |
| 240 | - basestring | |
| 241 | -except NameError: | |
| 242 | - try: | |
| 243 | - # is Unicode supported (Python >2.0 or >1.6 ?) | |
| 244 | - basestring = (str, unicode) | |
| 245 | - except NameError: | |
| 246 | - basestring = str | |
| 247 | - | |
| 248 | -#[PL] Experimental setting: if True, OLE filenames will be kept in Unicode | |
| 249 | -# if False (default PIL behaviour), all filenames are converted to Latin-1. | |
| 250 | -KEEP_UNICODE_NAMES = False | |
| 251 | - | |
| 252 | -#[PL] DEBUG display mode: False by default, use set_debug_mode() or "-d" on | |
| 253 | -# command line to change it. | |
| 254 | -DEBUG_MODE = False | |
| 255 | -def debug_print(msg): | |
| 256 | - print msg | |
| 257 | -def debug_pass(msg): | |
| 258 | - pass | |
| 259 | -debug = debug_pass | |
| 260 | - | |
| 261 | -def set_debug_mode(debug_mode): | |
| 262 | - """ | |
| 263 | - Set debug mode on or off, to control display of debugging messages. | |
| 264 | - mode: True or False | |
| 265 | - """ | |
| 266 | - global DEBUG_MODE, debug | |
| 267 | - DEBUG_MODE = debug_mode | |
| 268 | - if debug_mode: | |
| 269 | - debug = debug_print | |
| 270 | - else: | |
| 271 | - debug = debug_pass | |
| 272 | - | |
| 273 | -#TODO: convert this to hex | |
| 274 | -MAGIC = '\320\317\021\340\241\261\032\341' | |
| 275 | - | |
| 276 | -#[PL]: added constants for Sector IDs (from AAF specifications) | |
| 277 | -MAXREGSECT = 0xFFFFFFFAL; # maximum SECT | |
| 278 | -DIFSECT = 0xFFFFFFFCL; # (-4) denotes a DIFAT sector in a FAT | |
| 279 | -FATSECT = 0xFFFFFFFDL; # (-3) denotes a FAT sector in a FAT | |
| 280 | -ENDOFCHAIN = 0xFFFFFFFEL; # (-2) end of a virtual stream chain | |
| 281 | -FREESECT = 0xFFFFFFFFL; # (-1) unallocated sector | |
| 282 | - | |
| 283 | -#[PL]: added constants for Directory Entry IDs (from AAF specifications) | |
| 284 | -MAXREGSID = 0xFFFFFFFAL; # maximum directory entry ID | |
| 285 | -NOSTREAM = 0xFFFFFFFFL; # (-1) unallocated directory entry | |
| 286 | - | |
| 287 | -#[PL] object types in storage (from AAF specifications) | |
| 288 | -STGTY_EMPTY = 0 # empty directory entry (according to OpenOffice.org doc) | |
| 289 | -STGTY_STORAGE = 1 # element is a storage object | |
| 290 | -STGTY_STREAM = 2 # element is a stream object | |
| 291 | -STGTY_LOCKBYTES = 3 # element is an ILockBytes object | |
| 292 | -STGTY_PROPERTY = 4 # element is an IPropertyStorage object | |
| 293 | -STGTY_ROOT = 5 # element is a root storage | |
| 294 | - | |
| 295 | - | |
| 296 | -# | |
| 297 | -# -------------------------------------------------------------------- | |
| 298 | -# property types | |
| 299 | - | |
| 300 | -VT_EMPTY=0; VT_NULL=1; VT_I2=2; VT_I4=3; VT_R4=4; VT_R8=5; VT_CY=6; | |
| 301 | -VT_DATE=7; VT_BSTR=8; VT_DISPATCH=9; VT_ERROR=10; VT_BOOL=11; | |
| 302 | -VT_VARIANT=12; VT_UNKNOWN=13; VT_DECIMAL=14; VT_I1=16; VT_UI1=17; | |
| 303 | -VT_UI2=18; VT_UI4=19; VT_I8=20; VT_UI8=21; VT_INT=22; VT_UINT=23; | |
| 304 | -VT_VOID=24; VT_HRESULT=25; VT_PTR=26; VT_SAFEARRAY=27; VT_CARRAY=28; | |
| 305 | -VT_USERDEFINED=29; VT_LPSTR=30; VT_LPWSTR=31; VT_FILETIME=64; | |
| 306 | -VT_BLOB=65; VT_STREAM=66; VT_STORAGE=67; VT_STREAMED_OBJECT=68; | |
| 307 | -VT_STORED_OBJECT=69; VT_BLOB_OBJECT=70; VT_CF=71; VT_CLSID=72; | |
| 308 | -VT_VECTOR=0x1000; | |
| 309 | - | |
| 310 | -# map property id to name (for debugging purposes) | |
| 311 | - | |
| 312 | -VT = {} | |
| 313 | -for keyword, var in vars().items(): | |
| 314 | - if keyword[:3] == "VT_": | |
| 315 | - VT[var] = keyword | |
| 316 | - | |
| 317 | -# | |
| 318 | -# -------------------------------------------------------------------- | |
| 319 | -# Some common document types (root.clsid fields) | |
| 320 | - | |
| 321 | -WORD_CLSID = "00020900-0000-0000-C000-000000000046" | |
| 322 | -#TODO: check Excel, PPT, ... | |
| 323 | - | |
| 324 | -#[PL]: Defect levels to classify parsing errors - see OleFileIO._raise_defect() | |
| 325 | -DEFECT_UNSURE = 10 # a case which looks weird, but not sure it's a defect | |
| 326 | -DEFECT_POTENTIAL = 20 # a potential defect | |
| 327 | -DEFECT_INCORRECT = 30 # an error according to specifications, but parsing | |
| 328 | - # can go on | |
| 329 | -DEFECT_FATAL = 40 # an error which cannot be ignored, parsing is | |
| 330 | - # impossible | |
| 331 | - | |
| 332 | -#[PL] add useful constants to __all__: | |
| 333 | -for key in vars().keys(): | |
| 334 | - if key.startswith('STGTY_') or key.startswith('DEFECT_'): | |
| 335 | - __all__.append(key) | |
| 336 | - | |
| 337 | - | |
| 338 | -#--- FUNCTIONS ---------------------------------------------------------------- | |
| 339 | - | |
| 340 | -def isOleFile (filename): | |
| 341 | - """ | |
| 342 | - Test if file is an OLE container (according to its header). | |
| 343 | - filename: file name or path (str, unicode) | |
| 344 | - return: True if OLE, False otherwise. | |
| 345 | - """ | |
| 346 | - f = open(filename, 'rb') | |
| 347 | - header = f.read(len(MAGIC)) | |
| 348 | - if header == MAGIC: | |
| 349 | - return True | |
| 350 | - else: | |
| 351 | - return False | |
| 352 | - | |
| 353 | - | |
| 354 | -#TODO: replace i16 and i32 with more readable struct.unpack equivalent | |
| 355 | -def i16(c, o = 0): | |
| 356 | - """ | |
| 357 | - Converts a 2-bytes (16 bits) string to an integer. | |
| 358 | - | |
| 359 | - c: string containing bytes to convert | |
| 360 | - o: offset of bytes to convert in string | |
| 361 | - """ | |
| 362 | - return ord(c[o])+(ord(c[o+1])<<8) | |
| 363 | - | |
| 364 | - | |
| 365 | -def i32(c, o = 0): | |
| 366 | - """ | |
| 367 | - Converts a 4-bytes (32 bits) string to an integer. | |
| 368 | - | |
| 369 | - c: string containing bytes to convert | |
| 370 | - o: offset of bytes to convert in string | |
| 371 | - """ | |
| 372 | - return int(ord(c[o])+(ord(c[o+1])<<8)+(ord(c[o+2])<<16)+(ord(c[o+3])<<24)) | |
| 373 | - # [PL]: added int() because "<<" gives long int since Python 2.4 | |
| 374 | - | |
| 375 | - | |
| 376 | -def _clsid(clsid): | |
| 377 | - """ | |
| 378 | - Converts a CLSID to a human-readable string. | |
| 379 | - clsid: string of length 16. | |
| 380 | - """ | |
| 381 | - assert len(clsid) == 16 | |
| 382 | - if clsid == "\0" * len(clsid): | |
| 383 | - return "" | |
| 384 | - return (("%08X-%04X-%04X-%02X%02X-" + "%02X" * 6) % | |
| 385 | - ((i32(clsid, 0), i16(clsid, 4), i16(clsid, 6)) + | |
| 386 | - tuple(map(ord, clsid[8:16])))) | |
| 387 | - | |
| 388 | - | |
| 389 | - | |
| 390 | -# UNICODE support for Old Python versions: | |
| 391 | -# (necessary to handle storages/streams names which use Unicode) | |
| 392 | - | |
| 393 | -try: | |
| 394 | - # is Unicode supported ? | |
| 395 | - unicode | |
| 396 | - | |
| 397 | - def _unicode(s, errors='replace'): | |
| 398 | - """ | |
| 399 | - Map unicode string to Latin 1. (Python with Unicode support) | |
| 400 | - | |
| 401 | - s: UTF-16LE unicode string to convert to Latin-1 | |
| 402 | - errors: 'replace', 'ignore' or 'strict'. See Python doc for unicode() | |
| 403 | - """ | |
| 404 | - #TODO: test if it OleFileIO works with Unicode strings, instead of | |
| 405 | - # converting to Latin-1. | |
| 406 | - try: | |
| 407 | - # First the string is converted to plain Unicode: | |
| 408 | - # (assuming it is encoded as UTF-16 little-endian) | |
| 409 | - u = s.decode('UTF-16LE', errors) | |
| 410 | - if KEEP_UNICODE_NAMES: | |
| 411 | - return u | |
| 412 | - else: | |
| 413 | - # Second the unicode string is converted to Latin-1 | |
| 414 | - return u.encode('latin_1', errors) | |
| 415 | - except: | |
| 416 | - # there was an error during Unicode to Latin-1 conversion: | |
| 417 | - raise IOError, 'incorrect Unicode name' | |
| 418 | - | |
| 419 | -except NameError: | |
| 420 | - def _unicode(s, errors='replace'): | |
| 421 | - """ | |
| 422 | - Map unicode string to Latin 1. (Python without native Unicode support) | |
| 423 | - | |
| 424 | - s: UTF-16LE unicode string to convert to Latin-1 | |
| 425 | - errors: 'replace', 'ignore' or 'strict'. (ignored in this version) | |
| 426 | - """ | |
| 427 | - # If the unicode function does not exist, we assume this is an old | |
| 428 | - # Python version without Unicode support. | |
| 429 | - # Null bytes are simply removed (this only works with usual Latin-1 | |
| 430 | - # strings which do not contain unicode characters>256): | |
| 431 | - return filter(ord, s) | |
| 432 | - | |
| 433 | - | |
| 434 | -def filetime2datetime(filetime): | |
| 435 | - """ | |
| 436 | - convert FILETIME (64 bits int) to Python datetime.datetime | |
| 437 | - """ | |
| 438 | - # TODO: manage exception when microseconds is too large | |
| 439 | - # inspired from http://code.activestate.com/recipes/511425-filetime-to-datetime/ | |
| 440 | - _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0) | |
| 441 | - #debug('timedelta days=%d' % (filetime/(10*1000000*3600*24))) | |
| 442 | - return _FILETIME_null_date + datetime.timedelta(microseconds=filetime/10) | |
| 443 | - | |
| 444 | - | |
| 445 | - | |
| 446 | -#=== CLASSES ================================================================== | |
| 447 | - | |
| 448 | -class OleMetadata: | |
| 449 | - """ | |
| 450 | - class to parse and store metadata from standard properties of OLE files. | |
| 451 | - | |
| 452 | - Available attributes: | |
| 453 | - codepage, title, subject, author, keywords, comments, template, | |
| 454 | - last_saved_by, revision_number, total_edit_time, last_printed, create_time, | |
| 455 | - last_saved_time, num_pages, num_words, num_chars, thumbnail, | |
| 456 | - creating_application, security, codepage_doc, category, presentation_target, | |
| 457 | - bytes, lines, paragraphs, slides, notes, hidden_slides, mm_clips, | |
| 458 | - scale_crop, heading_pairs, titles_of_parts, manager, company, links_dirty, | |
| 459 | - chars_with_spaces, unused, shared_doc, link_base, hlinks, hlinks_changed, | |
| 460 | - version, dig_sig, content_type, content_status, language, doc_version | |
| 461 | - | |
| 462 | - Note: an attribute is set to None when not present in the properties of the | |
| 463 | - OLE file. | |
| 464 | - | |
| 465 | - References for SummaryInformation stream: | |
| 466 | - - http://msdn.microsoft.com/en-us/library/dd942545.aspx | |
| 467 | - - http://msdn.microsoft.com/en-us/library/dd925819%28v=office.12%29.aspx | |
| 468 | - - http://msdn.microsoft.com/en-us/library/windows/desktop/aa380376%28v=vs.85%29.aspx | |
| 469 | - - http://msdn.microsoft.com/en-us/library/aa372045.aspx | |
| 470 | - - http://sedna-soft.de/summary-information-stream/ | |
| 471 | - - http://poi.apache.org/apidocs/org/apache/poi/hpsf/SummaryInformation.html | |
| 472 | - | |
| 473 | - References for DocumentSummaryInformation stream: | |
| 474 | - - http://msdn.microsoft.com/en-us/library/dd945671%28v=office.12%29.aspx | |
| 475 | - - http://msdn.microsoft.com/en-us/library/windows/desktop/aa380374%28v=vs.85%29.aspx | |
| 476 | - - http://poi.apache.org/apidocs/org/apache/poi/hpsf/DocumentSummaryInformation.html | |
| 477 | - | |
| 478 | - new in version 0.25 | |
| 479 | - """ | |
| 480 | - | |
| 481 | - # attribute names for SummaryInformation stream properties: | |
| 482 | - # (ordered by property id, starting at 1) | |
| 483 | - SUMMARY_ATTRIBS = ['codepage', 'title', 'subject', 'author', 'keywords', 'comments', | |
| 484 | - 'template', 'last_saved_by', 'revision_number', 'total_edit_time', | |
| 485 | - 'last_printed', 'create_time', 'last_saved_time', 'num_pages', | |
| 486 | - 'num_words', 'num_chars', 'thumbnail', 'creating_application', | |
| 487 | - 'security'] | |
| 488 | - | |
| 489 | - # attribute names for DocumentSummaryInformation stream properties: | |
| 490 | - # (ordered by property id, starting at 1) | |
| 491 | - DOCSUM_ATTRIBS = ['codepage_doc', 'category', 'presentation_target', 'bytes', 'lines', 'paragraphs', | |
| 492 | - 'slides', 'notes', 'hidden_slides', 'mm_clips', | |
| 493 | - 'scale_crop', 'heading_pairs', 'titles_of_parts', 'manager', | |
| 494 | - 'company', 'links_dirty', 'chars_with_spaces', 'unused', 'shared_doc', | |
| 495 | - 'link_base', 'hlinks', 'hlinks_changed', 'version', 'dig_sig', | |
| 496 | - 'content_type', 'content_status', 'language', 'doc_version'] | |
| 497 | - | |
| 498 | - def __init__(self): | |
| 499 | - """ | |
| 500 | - Constructor for OleMetadata | |
| 501 | - All attributes are set to None by default | |
| 502 | - """ | |
| 503 | - # properties from SummaryInformation stream | |
| 504 | - self.codepage = None | |
| 505 | - self.title = None | |
| 506 | - self.subject = None | |
| 507 | - self.author = None | |
| 508 | - self.keywords = None | |
| 509 | - self.comments = None | |
| 510 | - self.template = None | |
| 511 | - self.last_saved_by = None | |
| 512 | - self.revision_number = None | |
| 513 | - self.total_edit_time = None | |
| 514 | - self.last_printed = None | |
| 515 | - self.create_time = None | |
| 516 | - self.last_saved_time = None | |
| 517 | - self.num_pages = None | |
| 518 | - self.num_words = None | |
| 519 | - self.num_chars = None | |
| 520 | - self.thumbnail = None | |
| 521 | - self.creating_application = None | |
| 522 | - self.security = None | |
| 523 | - # properties from DocumentSummaryInformation stream | |
| 524 | - self.codepage_doc = None | |
| 525 | - self.category = None | |
| 526 | - self.presentation_target = None | |
| 527 | - self.bytes = None | |
| 528 | - self.lines = None | |
| 529 | - self.paragraphs = None | |
| 530 | - self.slides = None | |
| 531 | - self.notes = None | |
| 532 | - self.hidden_slides = None | |
| 533 | - self.mm_clips = None | |
| 534 | - self.scale_crop = None | |
| 535 | - self.heading_pairs = None | |
| 536 | - self.titles_of_parts = None | |
| 537 | - self.manager = None | |
| 538 | - self.company = None | |
| 539 | - self.links_dirty = None | |
| 540 | - self.chars_with_spaces = None | |
| 541 | - self.unused = None | |
| 542 | - self.shared_doc = None | |
| 543 | - self.link_base = None | |
| 544 | - self.hlinks = None | |
| 545 | - self.hlinks_changed = None | |
| 546 | - self.version = None | |
| 547 | - self.dig_sig = None | |
| 548 | - self.content_type = None | |
| 549 | - self.content_status = None | |
| 550 | - self.language = None | |
| 551 | - self.doc_version = None | |
| 552 | - | |
| 553 | - | |
| 554 | - def parse_properties(self, olefile): | |
| 555 | - """ | |
| 556 | - Parse standard properties of an OLE file, from the streams | |
| 557 | - "\x05SummaryInformation" and "\x05DocumentSummaryInformation", | |
| 558 | - if present. | |
| 559 | - Properties are converted to strings, integers or python datetime objects. | |
| 560 | - If a property is not present, its value is set to None. | |
| 561 | - """ | |
| 562 | - # first set all attributes to None: | |
| 563 | - for attrib in (self.SUMMARY_ATTRIBS + self.DOCSUM_ATTRIBS): | |
| 564 | - setattr(self, attrib, None) | |
| 565 | - if olefile.exists("\x05SummaryInformation"): | |
| 566 | - # get properties from the stream: | |
| 567 | - # (converting timestamps to python datetime, except total_edit_time, | |
| 568 | - # which is property #10) | |
| 569 | - props = olefile.getproperties("\x05SummaryInformation", | |
| 570 | - convert_time=True, no_conversion=[10]) | |
| 571 | - # store them into this object's attributes: | |
| 572 | - for i in range(len(self.SUMMARY_ATTRIBS)): | |
| 573 | - # ids for standards properties start at 0x01, until 0x13 | |
| 574 | - value = props.get(i+1, None) | |
| 575 | - setattr(self, self.SUMMARY_ATTRIBS[i], value) | |
| 576 | - if olefile.exists("\x05DocumentSummaryInformation"): | |
| 577 | - # get properties from the stream: | |
| 578 | - props = olefile.getproperties("\x05DocumentSummaryInformation", | |
| 579 | - convert_time=True) | |
| 580 | - # store them into this object's attributes: | |
| 581 | - for i in range(len(self.DOCSUM_ATTRIBS)): | |
| 582 | - # ids for standards properties start at 0x01, until 0x13 | |
| 583 | - value = props.get(i+1, None) | |
| 584 | - setattr(self, self.DOCSUM_ATTRIBS[i], value) | |
| 585 | - | |
| 586 | - def dump(self): | |
| 587 | - """ | |
| 588 | - Dump all metadata, for debugging purposes. | |
| 589 | - """ | |
| 590 | - print 'Properties from SummaryInformation stream:' | |
| 591 | - for prop in self.SUMMARY_ATTRIBS: | |
| 592 | - value = getattr(self, prop) | |
| 593 | - print '- %s: %s' % (prop, repr(value)) | |
| 594 | - print 'Properties from DocumentSummaryInformation stream:' | |
| 595 | - for prop in self.DOCSUM_ATTRIBS: | |
| 596 | - value = getattr(self, prop) | |
| 597 | - print '- %s: %s' % (prop, repr(value)) | |
| 598 | - | |
| 599 | - | |
| 600 | -#--- _OleStream --------------------------------------------------------------- | |
| 601 | - | |
| 602 | -class _OleStream(StringIO.StringIO): | |
| 603 | - """ | |
| 604 | - OLE2 Stream | |
| 605 | - | |
| 606 | - Returns a read-only file object which can be used to read | |
| 607 | - the contents of a OLE stream (instance of the StringIO class). | |
| 608 | - To open a stream, use the openstream method in the OleFile class. | |
| 609 | - | |
| 610 | - This function can be used with either ordinary streams, | |
| 611 | - or ministreams, depending on the offset, sectorsize, and | |
| 612 | - fat table arguments. | |
| 613 | - | |
| 614 | - Attributes: | |
| 615 | - - size: actual size of data stream, after it was opened. | |
| 616 | - """ | |
| 617 | - | |
| 618 | - # FIXME: should store the list of sects obtained by following | |
| 619 | - # the fat chain, and load new sectors on demand instead of | |
| 620 | - # loading it all in one go. | |
| 621 | - | |
| 622 | - def __init__(self, fp, sect, size, offset, sectorsize, fat, filesize): | |
| 623 | - """ | |
| 624 | - Constructor for _OleStream class. | |
| 625 | - | |
| 626 | - fp : file object, the OLE container or the MiniFAT stream | |
| 627 | - sect : sector index of first sector in the stream | |
| 628 | - size : total size of the stream | |
| 629 | - offset : offset in bytes for the first FAT or MiniFAT sector | |
| 630 | - sectorsize: size of one sector | |
| 631 | - fat : array/list of sector indexes (FAT or MiniFAT) | |
| 632 | - filesize : size of OLE file (for debugging) | |
| 633 | - return : a StringIO instance containing the OLE stream | |
| 634 | - """ | |
| 635 | - debug('_OleStream.__init__:') | |
| 636 | - debug(' sect=%d (%X), size=%d, offset=%d, sectorsize=%d, len(fat)=%d, fp=%s' | |
| 637 | - %(sect,sect,size,offset,sectorsize,len(fat), repr(fp))) | |
| 638 | - #[PL] To detect malformed documents with FAT loops, we compute the | |
| 639 | - # expected number of sectors in the stream: | |
| 640 | - unknown_size = False | |
| 641 | - if size==0x7FFFFFFF: | |
| 642 | - # this is the case when called from OleFileIO._open(), and stream | |
| 643 | - # size is not known in advance (for example when reading the | |
| 644 | - # Directory stream). Then we can only guess maximum size: | |
| 645 | - size = len(fat)*sectorsize | |
| 646 | - # and we keep a record that size was unknown: | |
| 647 | - unknown_size = True | |
| 648 | - debug(' stream with UNKNOWN SIZE') | |
| 649 | - nb_sectors = (size + (sectorsize-1)) / sectorsize | |
| 650 | - debug('nb_sectors = %d' % nb_sectors) | |
| 651 | - # This number should (at least) be less than the total number of | |
| 652 | - # sectors in the given FAT: | |
| 653 | - if nb_sectors > len(fat): | |
| 654 | - raise IOError, 'malformed OLE document, stream too large' | |
| 655 | - # optimization(?): data is first a list of strings, and join() is called | |
| 656 | - # at the end to concatenate all in one string. | |
| 657 | - # (this may not be really useful with recent Python versions) | |
| 658 | - data = [] | |
| 659 | - # if size is zero, then first sector index should be ENDOFCHAIN: | |
| 660 | - if size == 0 and sect != ENDOFCHAIN: | |
| 661 | - debug('size == 0 and sect != ENDOFCHAIN:') | |
| 662 | - raise IOError, 'incorrect OLE sector index for empty stream' | |
| 663 | - #[PL] A fixed-length for loop is used instead of an undefined while | |
| 664 | - # loop to avoid DoS attacks: | |
| 665 | - for i in xrange(nb_sectors): | |
| 666 | - # Sector index may be ENDOFCHAIN, but only if size was unknown | |
| 667 | - if sect == ENDOFCHAIN: | |
| 668 | - if unknown_size: | |
| 669 | - break | |
| 670 | - else: | |
| 671 | - # else this means that the stream is smaller than declared: | |
| 672 | - debug('sect=ENDOFCHAIN before expected size') | |
| 673 | - raise IOError, 'incomplete OLE stream' | |
| 674 | - # sector index should be within FAT: | |
| 675 | - if sect<0 or sect>=len(fat): | |
| 676 | - debug('sect=%d (%X) / len(fat)=%d' % (sect, sect, len(fat))) | |
| 677 | - debug('i=%d / nb_sectors=%d' %(i, nb_sectors)) | |
| 678 | -## tmp_data = string.join(data, "") | |
| 679 | -## f = open('test_debug.bin', 'wb') | |
| 680 | -## f.write(tmp_data) | |
| 681 | -## f.close() | |
| 682 | -## debug('data read so far: %d bytes' % len(tmp_data)) | |
| 683 | - raise IOError, 'incorrect OLE FAT, sector index out of range' | |
| 684 | - #TODO: merge this code with OleFileIO.getsect() ? | |
| 685 | - #TODO: check if this works with 4K sectors: | |
| 686 | - try: | |
| 687 | - fp.seek(offset + sectorsize * sect) | |
| 688 | - except: | |
| 689 | - debug('sect=%d, seek=%d, filesize=%d' % | |
| 690 | - (sect, offset+sectorsize*sect, filesize)) | |
| 691 | - raise IOError, 'OLE sector index out of range' | |
| 692 | - sector_data = fp.read(sectorsize) | |
| 693 | - # [PL] check if there was enough data: | |
| 694 | - # Note: if sector is the last of the file, sometimes it is not a | |
| 695 | - # complete sector (of 512 or 4K), so we may read less than | |
| 696 | - # sectorsize. | |
| 697 | - if len(sector_data)!=sectorsize and sect!=(len(fat)-1): | |
| 698 | - debug('sect=%d / len(fat)=%d, seek=%d / filesize=%d, len read=%d' % | |
| 699 | - (sect, len(fat), offset+sectorsize*sect, filesize, len(sector_data))) | |
| 700 | - debug('seek+len(read)=%d' % (offset+sectorsize*sect+len(sector_data))) | |
| 701 | - raise IOError, 'incomplete OLE sector' | |
| 702 | - data.append(sector_data) | |
| 703 | - # jump to next sector in the FAT: | |
| 704 | - try: | |
| 705 | - sect = fat[sect] | |
| 706 | - except IndexError: | |
| 707 | - # [PL] if pointer is out of the FAT an exception is raised | |
| 708 | - raise IOError, 'incorrect OLE FAT, sector index out of range' | |
| 709 | - #[PL] Last sector should be a "end of chain" marker: | |
| 710 | - if sect != ENDOFCHAIN: | |
| 711 | - raise IOError, 'incorrect last sector index in OLE stream' | |
| 712 | - data = string.join(data, "") | |
| 713 | - # Data is truncated to the actual stream size: | |
| 714 | - if len(data) >= size: | |
| 715 | - data = data[:size] | |
| 716 | - # actual stream size is stored for future use: | |
| 717 | - self.size = size | |
| 718 | - elif unknown_size: | |
| 719 | - # actual stream size was not known, now we know the size of read | |
| 720 | - # data: | |
| 721 | - self.size = len(data) | |
| 722 | - else: | |
| 723 | - # read data is less than expected: | |
| 724 | - debug('len(data)=%d, size=%d' % (len(data), size)) | |
| 725 | - raise IOError, 'OLE stream size is less than declared' | |
| 726 | - # when all data is read in memory, StringIO constructor is called | |
| 727 | - StringIO.StringIO.__init__(self, data) | |
| 728 | - # Then the _OleStream object can be used as a read-only file object. | |
| 729 | - | |
| 730 | - | |
| 731 | -#--- _OleDirectoryEntry ------------------------------------------------------- | |
| 732 | - | |
| 733 | -class _OleDirectoryEntry: | |
| 734 | - | |
| 735 | - """ | |
| 736 | - OLE2 Directory Entry | |
| 737 | - """ | |
| 738 | - #[PL] parsing code moved from OleFileIO.loaddirectory | |
| 739 | - | |
| 740 | - # struct to parse directory entries: | |
| 741 | - # <: little-endian byte order, standard sizes | |
| 742 | - # (note: this should guarantee that Q returns a 64 bits int) | |
| 743 | - # 64s: string containing entry name in unicode (max 31 chars) + null char | |
| 744 | - # H: uint16, number of bytes used in name buffer, including null = (len+1)*2 | |
| 745 | - # B: uint8, dir entry type (between 0 and 5) | |
| 746 | - # B: uint8, color: 0=black, 1=red | |
| 747 | - # I: uint32, index of left child node in the red-black tree, NOSTREAM if none | |
| 748 | - # I: uint32, index of right child node in the red-black tree, NOSTREAM if none | |
| 749 | - # I: uint32, index of child root node if it is a storage, else NOSTREAM | |
| 750 | - # 16s: CLSID, unique identifier (only used if it is a storage) | |
| 751 | - # I: uint32, user flags | |
| 752 | - # Q (was 8s): uint64, creation timestamp or zero | |
| 753 | - # Q (was 8s): uint64, modification timestamp or zero | |
| 754 | - # I: uint32, SID of first sector if stream or ministream, SID of 1st sector | |
| 755 | - # of stream containing ministreams if root entry, 0 otherwise | |
| 756 | - # I: uint32, total stream size in bytes if stream (low 32 bits), 0 otherwise | |
| 757 | - # I: uint32, total stream size in bytes if stream (high 32 bits), 0 otherwise | |
| 758 | - STRUCT_DIRENTRY = '<64sHBBIII16sIQQIII' | |
| 759 | - # size of a directory entry: 128 bytes | |
| 760 | - DIRENTRY_SIZE = 128 | |
| 761 | - assert struct.calcsize(STRUCT_DIRENTRY) == DIRENTRY_SIZE | |
| 762 | - | |
| 763 | - | |
| 764 | - def __init__(self, entry, sid, olefile): | |
| 765 | - """ | |
| 766 | - Constructor for an _OleDirectoryEntry object. | |
| 767 | - Parses a 128-bytes entry from the OLE Directory stream. | |
| 768 | - | |
| 769 | - entry : string (must be 128 bytes long) | |
| 770 | - sid : index of this directory entry in the OLE file directory | |
| 771 | - olefile: OleFileIO containing this directory entry | |
| 772 | - """ | |
| 773 | - self.sid = sid | |
| 774 | - # ref to olefile is stored for future use | |
| 775 | - self.olefile = olefile | |
| 776 | - # kids is a list of children entries, if this entry is a storage: | |
| 777 | - # (list of _OleDirectoryEntry objects) | |
| 778 | - self.kids = [] | |
| 779 | - # kids_dict is a dictionary of children entries, indexed by their | |
| 780 | - # name in lowercase: used to quickly find an entry, and to detect | |
| 781 | - # duplicates | |
| 782 | - self.kids_dict = {} | |
| 783 | - # flag used to detect if the entry is referenced more than once in | |
| 784 | - # directory: | |
| 785 | - self.used = False | |
| 786 | - # decode DirEntry | |
| 787 | - ( | |
| 788 | - name, | |
| 789 | - namelength, | |
| 790 | - self.entry_type, | |
| 791 | - self.color, | |
| 792 | - self.sid_left, | |
| 793 | - self.sid_right, | |
| 794 | - self.sid_child, | |
| 795 | - clsid, | |
| 796 | - self.dwUserFlags, | |
| 797 | - self.createTime, | |
| 798 | - self.modifyTime, | |
| 799 | - self.isectStart, | |
| 800 | - sizeLow, | |
| 801 | - sizeHigh | |
| 802 | - ) = struct.unpack(_OleDirectoryEntry.STRUCT_DIRENTRY, entry) | |
| 803 | - if self.entry_type not in [STGTY_ROOT, STGTY_STORAGE, STGTY_STREAM, STGTY_EMPTY]: | |
| 804 | - olefile._raise_defect(DEFECT_INCORRECT, 'unhandled OLE storage type') | |
| 805 | - # only first directory entry can (and should) be root: | |
| 806 | - if self.entry_type == STGTY_ROOT and sid != 0: | |
| 807 | - olefile._raise_defect(DEFECT_INCORRECT, 'duplicate OLE root entry') | |
| 808 | - if sid == 0 and self.entry_type != STGTY_ROOT: | |
| 809 | - olefile._raise_defect(DEFECT_INCORRECT, 'incorrect OLE root entry') | |
| 810 | - #debug (struct.unpack(fmt_entry, entry[:len_entry])) | |
| 811 | - # name should be at most 31 unicode characters + null character, | |
| 812 | - # so 64 bytes in total (31*2 + 2): | |
| 813 | - if namelength>64: | |
| 814 | - olefile._raise_defect(DEFECT_INCORRECT, 'incorrect DirEntry name length') | |
| 815 | - # if exception not raised, namelength is set to the maximum value: | |
| 816 | - namelength = 64 | |
| 817 | - # only characters without ending null char are kept: | |
| 818 | - name = name[:(namelength-2)] | |
| 819 | - # name is converted from unicode to Latin-1: | |
| 820 | - self.name = _unicode(name) | |
| 821 | - | |
| 822 | - debug('DirEntry SID=%d: %s' % (self.sid, repr(self.name))) | |
| 823 | - debug(' - type: %d' % self.entry_type) | |
| 824 | - debug(' - sect: %d' % self.isectStart) | |
| 825 | - debug(' - SID left: %d, right: %d, child: %d' % (self.sid_left, | |
| 826 | - self.sid_right, self.sid_child)) | |
| 827 | - | |
| 828 | - # sizeHigh is only used for 4K sectors, it should be zero for 512 bytes | |
| 829 | - # sectors, BUT apparently some implementations set it as 0xFFFFFFFFL, 1 | |
| 830 | - # or some other value so it cannot be raised as a defect in general: | |
| 831 | - if olefile.sectorsize == 512: | |
| 832 | - if sizeHigh != 0 and sizeHigh != 0xFFFFFFFFL: | |
| 833 | - debug('sectorsize=%d, sizeLow=%d, sizeHigh=%d (%X)' % | |
| 834 | - (olefile.sectorsize, sizeLow, sizeHigh, sizeHigh)) | |
| 835 | - olefile._raise_defect(DEFECT_UNSURE, 'incorrect OLE stream size') | |
| 836 | - self.size = sizeLow | |
| 837 | - else: | |
| 838 | - self.size = sizeLow + (long(sizeHigh)<<32) | |
| 839 | - debug(' - size: %d (sizeLow=%d, sizeHigh=%d)' % (self.size, sizeLow, sizeHigh)) | |
| 840 | - | |
| 841 | - self.clsid = _clsid(clsid) | |
| 842 | - # a storage should have a null size, BUT some implementations such as | |
| 843 | - # Word 8 for Mac seem to allow non-null values => Potential defect: | |
| 844 | - if self.entry_type == STGTY_STORAGE and self.size != 0: | |
| 845 | - olefile._raise_defect(DEFECT_POTENTIAL, 'OLE storage with size>0') | |
| 846 | - # check if stream is not already referenced elsewhere: | |
| 847 | - if self.entry_type in (STGTY_ROOT, STGTY_STREAM) and self.size>0: | |
| 848 | - if self.size < olefile.minisectorcutoff \ | |
| 849 | - and self.entry_type==STGTY_STREAM: # only streams can be in MiniFAT | |
| 850 | - # ministream object | |
| 851 | - minifat = True | |
| 852 | - else: | |
| 853 | - minifat = False | |
| 854 | - olefile._check_duplicate_stream(self.isectStart, minifat) | |
| 855 | - | |
| 856 | - | |
| 857 | - | |
| 858 | - def build_storage_tree(self): | |
| 859 | - """ | |
| 860 | - Read and build the red-black tree attached to this _OleDirectoryEntry | |
| 861 | - object, if it is a storage. | |
| 862 | - Note that this method builds a tree of all subentries, so it should | |
| 863 | - only be called for the root object once. | |
| 864 | - """ | |
| 865 | - debug('build_storage_tree: SID=%d - %s - sid_child=%d' | |
| 866 | - % (self.sid, repr(self.name), self.sid_child)) | |
| 867 | - if self.sid_child != NOSTREAM: | |
| 868 | - # if child SID is not NOSTREAM, then this entry is a storage. | |
| 869 | - # Let's walk through the tree of children to fill the kids list: | |
| 870 | - self.append_kids(self.sid_child) | |
| 871 | - | |
| 872 | - # Note from OpenOffice documentation: the safest way is to | |
| 873 | - # recreate the tree because some implementations may store broken | |
| 874 | - # red-black trees... | |
| 875 | - | |
| 876 | - # in the OLE file, entries are sorted on (length, name). | |
| 877 | - # for convenience, we sort them on name instead: | |
| 878 | - # (see __cmp__ method in this class) | |
| 879 | - self.kids.sort() | |
| 880 | - | |
| 881 | - | |
| 882 | - def append_kids(self, child_sid): | |
| 883 | - """ | |
| 884 | - Walk through red-black tree of children of this directory entry to add | |
| 885 | - all of them to the kids list. (recursive method) | |
| 886 | - | |
| 887 | - child_sid : index of child directory entry to use, or None when called | |
| 888 | - first time for the root. (only used during recursion) | |
| 889 | - """ | |
| 890 | - #[PL] this method was added to use simple recursion instead of a complex | |
| 891 | - # algorithm. | |
| 892 | - # if this is not a storage or a leaf of the tree, nothing to do: | |
| 893 | - if child_sid == NOSTREAM: | |
| 894 | - return | |
| 895 | - # check if child SID is in the proper range: | |
| 896 | - if child_sid<0 or child_sid>=len(self.olefile.direntries): | |
| 897 | - self.olefile._raise_defect(DEFECT_FATAL, 'OLE DirEntry index out of range') | |
| 898 | - # get child direntry: | |
| 899 | - child = self.olefile._load_direntry(child_sid) #direntries[child_sid] | |
| 900 | - debug('append_kids: child_sid=%d - %s - sid_left=%d, sid_right=%d, sid_child=%d' | |
| 901 | - % (child.sid, repr(child.name), child.sid_left, child.sid_right, child.sid_child)) | |
| 902 | - # the directory entries are organized as a red-black tree. | |
| 903 | - # (cf. Wikipedia for details) | |
| 904 | - # First walk through left side of the tree: | |
| 905 | - self.append_kids(child.sid_left) | |
| 906 | - # Check if its name is not already used (case-insensitive): | |
| 907 | - name_lower = child.name.lower() | |
| 908 | - if self.kids_dict.has_key(name_lower): | |
| 909 | - self.olefile._raise_defect(DEFECT_INCORRECT, | |
| 910 | - "Duplicate filename in OLE storage") | |
| 911 | - # Then the child_sid _OleDirectoryEntry object is appended to the | |
| 912 | - # kids list and dictionary: | |
| 913 | - self.kids.append(child) | |
| 914 | - self.kids_dict[name_lower] = child | |
| 915 | - # Check if kid was not already referenced in a storage: | |
| 916 | - if child.used: | |
| 917 | - self.olefile._raise_defect(DEFECT_INCORRECT, | |
| 918 | - 'OLE Entry referenced more than once') | |
| 919 | - child.used = True | |
| 920 | - # Finally walk through right side of the tree: | |
| 921 | - self.append_kids(child.sid_right) | |
| 922 | - # Afterwards build kid's own tree if it's also a storage: | |
| 923 | - child.build_storage_tree() | |
| 924 | - | |
| 925 | - | |
| 926 | - def __cmp__(self, other): | |
| 927 | - "Compare entries by name" | |
| 928 | - return cmp(self.name, other.name) | |
| 929 | - #TODO: replace by the same function as MS implementation ? | |
| 930 | - # (order by name length first, then case-insensitive order) | |
| 931 | - | |
| 932 | - | |
| 933 | - def dump(self, tab = 0): | |
| 934 | - "Dump this entry, and all its subentries (for debug purposes only)" | |
| 935 | - TYPES = ["(invalid)", "(storage)", "(stream)", "(lockbytes)", | |
| 936 | - "(property)", "(root)"] | |
| 937 | - print " "*tab + repr(self.name), TYPES[self.entry_type], | |
| 938 | - if self.entry_type in (STGTY_STREAM, STGTY_ROOT): | |
| 939 | - print self.size, "bytes", | |
| 940 | ||
| 941 | - if self.entry_type in (STGTY_STORAGE, STGTY_ROOT) and self.clsid: | |
| 942 | - print " "*tab + "{%s}" % self.clsid | |
| 943 | - | |
| 944 | - for kid in self.kids: | |
| 945 | - kid.dump(tab + 2) | |
| 946 | - | |
| 947 | - | |
| 948 | - def getmtime(self): | |
| 949 | - """ | |
| 950 | - Return modification time of a directory entry. | |
| 951 | - | |
| 952 | - return: None if modification time is null, a python datetime object | |
| 953 | - otherwise (UTC timezone) | |
| 954 | - | |
| 955 | - new in version 0.26 | |
| 956 | - """ | |
| 957 | - if self.modifyTime == 0: | |
| 958 | - return None | |
| 959 | - return filetime2datetime(self.modifyTime) | |
| 960 | - | |
| 961 | - | |
| 962 | - def getctime(self): | |
| 963 | - """ | |
| 964 | - Return creation time of a directory entry. | |
| 965 | - | |
| 966 | - return: None if modification time is null, a python datetime object | |
| 967 | - otherwise (UTC timezone) | |
| 968 | - | |
| 969 | - new in version 0.26 | |
| 970 | - """ | |
| 971 | - if self.createTime == 0: | |
| 972 | - return None | |
| 973 | - return filetime2datetime(self.createTime) | |
| 974 | - | |
| 975 | - | |
| 976 | -#--- OleFileIO ---------------------------------------------------------------- | |
| 977 | - | |
| 978 | -class OleFileIO: | |
| 979 | - """ | |
| 980 | - OLE container object | |
| 981 | - | |
| 982 | - This class encapsulates the interface to an OLE 2 structured | |
| 983 | - storage file. Use the {@link listdir} and {@link openstream} methods to | |
| 984 | - access the contents of this file. | |
| 985 | - | |
| 986 | - Object names are given as a list of strings, one for each subentry | |
| 987 | - level. The root entry should be omitted. For example, the following | |
| 988 | - code extracts all image streams from a Microsoft Image Composer file: | |
| 989 | - | |
| 990 | - ole = OleFileIO("fan.mic") | |
| 991 | - | |
| 992 | - for entry in ole.listdir(): | |
| 993 | - if entry[1:2] == "Image": | |
| 994 | - fin = ole.openstream(entry) | |
| 995 | - fout = open(entry[0:1], "wb") | |
| 996 | - while True: | |
| 997 | - s = fin.read(8192) | |
| 998 | - if not s: | |
| 999 | - break | |
| 1000 | - fout.write(s) | |
| 1001 | - | |
| 1002 | - You can use the viewer application provided with the Python Imaging | |
| 1003 | - Library to view the resulting files (which happens to be standard | |
| 1004 | - TIFF files). | |
| 1005 | - """ | |
| 1006 | - | |
| 1007 | - def __init__(self, filename = None, raise_defects=DEFECT_FATAL): | |
| 1008 | - """ | |
| 1009 | - Constructor for OleFileIO class. | |
| 1010 | - | |
| 1011 | - filename: file to open. | |
| 1012 | - raise_defects: minimal level for defects to be raised as exceptions. | |
| 1013 | - (use DEFECT_FATAL for a typical application, DEFECT_INCORRECT for a | |
| 1014 | - security-oriented application, see source code for details) | |
| 1015 | - """ | |
| 1016 | - # minimal level for defects to be raised as exceptions: | |
| 1017 | - self._raise_defects_level = raise_defects | |
| 1018 | - # list of defects/issues not raised as exceptions: | |
| 1019 | - # tuples of (exception type, message) | |
| 1020 | - self.parsing_issues = [] | |
| 1021 | - if filename: | |
| 1022 | - self.open(filename) | |
| 1023 | - | |
| 1024 | - | |
| 1025 | - def _raise_defect(self, defect_level, message, exception_type=IOError): | |
| 1026 | - """ | |
| 1027 | - This method should be called for any defect found during file parsing. | |
| 1028 | - It may raise an IOError exception according to the minimal level chosen | |
| 1029 | - for the OleFileIO object. | |
| 1030 | - | |
| 1031 | - defect_level: defect level, possible values are: | |
| 1032 | - DEFECT_UNSURE : a case which looks weird, but not sure it's a defect | |
| 1033 | - DEFECT_POTENTIAL : a potential defect | |
| 1034 | - DEFECT_INCORRECT : an error according to specifications, but parsing can go on | |
| 1035 | - DEFECT_FATAL : an error which cannot be ignored, parsing is impossible | |
| 1036 | - message: string describing the defect, used with raised exception. | |
| 1037 | - exception_type: exception class to be raised, IOError by default | |
| 1038 | - """ | |
| 1039 | - # added by [PL] | |
| 1040 | - if defect_level >= self._raise_defects_level: | |
| 1041 | - raise exception_type, message | |
| 1042 | - else: | |
| 1043 | - # just record the issue, no exception raised: | |
| 1044 | - self.parsing_issues.append((exception_type, message)) | |
| 1045 | - | |
| 1046 | - | |
| 1047 | - def open(self, filename): | |
| 1048 | - """ | |
| 1049 | - Open an OLE2 file. | |
| 1050 | - Reads the header, FAT and directory. | |
| 1051 | - | |
| 1052 | - filename: string-like or file-like object | |
| 1053 | - """ | |
| 1054 | - #[PL] check if filename is a string-like or file-like object: | |
| 1055 | - # (it is better to check for a read() method) | |
| 1056 | - if hasattr(filename, 'read'): | |
| 1057 | - # file-like object | |
| 1058 | - self.fp = filename | |
| 1059 | - else: | |
| 1060 | - # string-like object: filename of file on disk | |
| 1061 | - #TODO: if larger than 1024 bytes, this could be the actual data => StringIO | |
| 1062 | - self.fp = open(filename, "rb") | |
| 1063 | - # old code fails if filename is not a plain string: | |
| 1064 | - #if type(filename) == type(""): | |
| 1065 | - # self.fp = open(filename, "rb") | |
| 1066 | - #else: | |
| 1067 | - # self.fp = filename | |
| 1068 | - # obtain the filesize by using seek and tell, which should work on most | |
| 1069 | - # file-like objects: | |
| 1070 | - #TODO: do it above, using getsize with filename when possible? | |
| 1071 | - #TODO: fix code to fail with clear exception when filesize cannot be obtained | |
| 1072 | - self.fp.seek(0, os.SEEK_END) | |
| 1073 | - try: | |
| 1074 | - filesize = self.fp.tell() | |
| 1075 | - finally: | |
| 1076 | - self.fp.seek(0) | |
| 1077 | - self._filesize = filesize | |
| 1078 | - | |
| 1079 | - # lists of streams in FAT and MiniFAT, to detect duplicate references | |
| 1080 | - # (list of indexes of first sectors of each stream) | |
| 1081 | - self._used_streams_fat = [] | |
| 1082 | - self._used_streams_minifat = [] | |
| 1083 | - | |
| 1084 | - header = self.fp.read(512) | |
| 1085 | - | |
| 1086 | - if len(header) != 512 or header[:8] != MAGIC: | |
| 1087 | - self._raise_defect(DEFECT_FATAL, "not an OLE2 structured storage file") | |
| 1088 | - | |
| 1089 | - # [PL] header structure according to AAF specifications: | |
| 1090 | - ##Header | |
| 1091 | - ##struct StructuredStorageHeader { // [offset from start (bytes), length (bytes)] | |
| 1092 | - ##BYTE _abSig[8]; // [00H,08] {0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, | |
| 1093 | - ## // 0x1a, 0xe1} for current version | |
| 1094 | - ##CLSID _clsid; // [08H,16] reserved must be zero (WriteClassStg/ | |
| 1095 | - ## // GetClassFile uses root directory class id) | |
| 1096 | - ##USHORT _uMinorVersion; // [18H,02] minor version of the format: 33 is | |
| 1097 | - ## // written by reference implementation | |
| 1098 | - ##USHORT _uDllVersion; // [1AH,02] major version of the dll/format: 3 for | |
| 1099 | - ## // 512-byte sectors, 4 for 4 KB sectors | |
| 1100 | - ##USHORT _uByteOrder; // [1CH,02] 0xFFFE: indicates Intel byte-ordering | |
| 1101 | - ##USHORT _uSectorShift; // [1EH,02] size of sectors in power-of-two; | |
| 1102 | - ## // typically 9 indicating 512-byte sectors | |
| 1103 | - ##USHORT _uMiniSectorShift; // [20H,02] size of mini-sectors in power-of-two; | |
| 1104 | - ## // typically 6 indicating 64-byte mini-sectors | |
| 1105 | - ##USHORT _usReserved; // [22H,02] reserved, must be zero | |
| 1106 | - ##ULONG _ulReserved1; // [24H,04] reserved, must be zero | |
| 1107 | - ##FSINDEX _csectDir; // [28H,04] must be zero for 512-byte sectors, | |
| 1108 | - ## // number of SECTs in directory chain for 4 KB | |
| 1109 | - ## // sectors | |
| 1110 | - ##FSINDEX _csectFat; // [2CH,04] number of SECTs in the FAT chain | |
| 1111 | - ##SECT _sectDirStart; // [30H,04] first SECT in the directory chain | |
| 1112 | - ##DFSIGNATURE _signature; // [34H,04] signature used for transactions; must | |
| 1113 | - ## // be zero. The reference implementation | |
| 1114 | - ## // does not support transactions | |
| 1115 | - ##ULONG _ulMiniSectorCutoff; // [38H,04] maximum size for a mini stream; | |
| 1116 | - ## // typically 4096 bytes | |
| 1117 | - ##SECT _sectMiniFatStart; // [3CH,04] first SECT in the MiniFAT chain | |
| 1118 | - ##FSINDEX _csectMiniFat; // [40H,04] number of SECTs in the MiniFAT chain | |
| 1119 | - ##SECT _sectDifStart; // [44H,04] first SECT in the DIFAT chain | |
| 1120 | - ##FSINDEX _csectDif; // [48H,04] number of SECTs in the DIFAT chain | |
| 1121 | - ##SECT _sectFat[109]; // [4CH,436] the SECTs of first 109 FAT sectors | |
| 1122 | - ##}; | |
| 1123 | - | |
| 1124 | - # [PL] header decoding: | |
| 1125 | - # '<' indicates little-endian byte ordering for Intel (cf. struct module help) | |
| 1126 | - fmt_header = '<8s16sHHHHHHLLLLLLLLLL' | |
| 1127 | - header_size = struct.calcsize(fmt_header) | |
| 1128 | - debug( "fmt_header size = %d, +FAT = %d" % (header_size, header_size + 109*4) ) | |
| 1129 | - header1 = header[:header_size] | |
| 1130 | - ( | |
| 1131 | - self.Sig, | |
| 1132 | - self.clsid, | |
| 1133 | - self.MinorVersion, | |
| 1134 | - self.DllVersion, | |
| 1135 | - self.ByteOrder, | |
| 1136 | - self.SectorShift, | |
| 1137 | - self.MiniSectorShift, | |
| 1138 | - self.Reserved, self.Reserved1, | |
| 1139 | - self.csectDir, | |
| 1140 | - self.csectFat, | |
| 1141 | - self.sectDirStart, | |
| 1142 | - self.signature, | |
| 1143 | - self.MiniSectorCutoff, | |
| 1144 | - self.MiniFatStart, | |
| 1145 | - self.csectMiniFat, | |
| 1146 | - self.sectDifStart, | |
| 1147 | - self.csectDif | |
| 1148 | - ) = struct.unpack(fmt_header, header1) | |
| 1149 | - debug( struct.unpack(fmt_header, header1)) | |
| 1150 | - | |
| 1151 | - if self.Sig != '\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1': | |
| 1152 | - # OLE signature should always be present | |
| 1153 | - self._raise_defect(DEFECT_FATAL, "incorrect OLE signature") | |
| 1154 | - if self.clsid != '\x00'*16: | |
| 1155 | - # according to AAF specs, CLSID should always be zero | |
| 1156 | - self._raise_defect(DEFECT_INCORRECT, "incorrect CLSID in OLE header") | |
| 1157 | - debug( "MinorVersion = %d" % self.MinorVersion ) | |
| 1158 | - debug( "DllVersion = %d" % self.DllVersion ) | |
| 1159 | - if self.DllVersion not in [3, 4]: | |
| 1160 | - # version 3: usual format, 512 bytes per sector | |
| 1161 | - # version 4: large format, 4K per sector | |
| 1162 | - self._raise_defect(DEFECT_INCORRECT, "incorrect DllVersion in OLE header") | |
| 1163 | - debug( "ByteOrder = %X" % self.ByteOrder ) | |
| 1164 | - if self.ByteOrder != 0xFFFE: | |
| 1165 | - # For now only common little-endian documents are handled correctly | |
| 1166 | - self._raise_defect(DEFECT_FATAL, "incorrect ByteOrder in OLE header") | |
| 1167 | - # TODO: add big-endian support for documents created on Mac ? | |
| 1168 | - self.SectorSize = 2**self.SectorShift | |
| 1169 | - debug( "sector_size = %d" % self.SectorSize ) | |
| 1170 | - if self.SectorSize not in [512, 4096]: | |
| 1171 | - self._raise_defect(DEFECT_INCORRECT, "incorrect sector_size in OLE header") | |
| 1172 | - if (self.DllVersion==3 and self.SectorSize!=512) \ | |
| 1173 | - or (self.DllVersion==4 and self.SectorSize!=4096): | |
| 1174 | - self._raise_defect(DEFECT_INCORRECT, "sector_size does not match DllVersion in OLE header") | |
| 1175 | - self.MiniSectorSize = 2**self.MiniSectorShift | |
| 1176 | - debug( "mini_sector_size = %d" % self.MiniSectorSize ) | |
| 1177 | - if self.MiniSectorSize not in [64]: | |
| 1178 | - self._raise_defect(DEFECT_INCORRECT, "incorrect mini_sector_size in OLE header") | |
| 1179 | - if self.Reserved != 0 or self.Reserved1 != 0: | |
| 1180 | - self._raise_defect(DEFECT_INCORRECT, "incorrect OLE header (non-null reserved bytes)") | |
| 1181 | - debug( "csectDir = %d" % self.csectDir ) | |
| 1182 | - if self.SectorSize==512 and self.csectDir!=0: | |
| 1183 | - self._raise_defect(DEFECT_INCORRECT, "incorrect csectDir in OLE header") | |
| 1184 | - debug( "num_fat_sectors = %d" % self.csectFat ) | |
| 1185 | - debug( "first_dir_sector = %X" % self.sectDirStart ) | |
| 1186 | - debug( "transaction_signature_number = %d" % self.signature ) | |
| 1187 | - # Signature should be zero, BUT some implementations do not follow this | |
| 1188 | - # rule => only a potential defect: | |
| 1189 | - if self.signature != 0: | |
| 1190 | - self._raise_defect(DEFECT_POTENTIAL, "incorrect OLE header (transaction_signature_number>0)") | |
| 1191 | - debug( "mini_stream_cutoff_size = %d" % self.MiniSectorCutoff ) | |
| 1192 | - debug( "first_mini_fat_sector = %X" % self.MiniFatStart ) | |
| 1193 | - debug( "num_mini_fat_sectors = %d" % self.csectMiniFat ) | |
| 1194 | - debug( "first_difat_sector = %X" % self.sectDifStart ) | |
| 1195 | - debug( "num_difat_sectors = %d" % self.csectDif ) | |
| 1196 | - | |
| 1197 | - # calculate the number of sectors in the file | |
| 1198 | - # (-1 because header doesn't count) | |
| 1199 | - self.nb_sect = ( (filesize + self.SectorSize-1) / self.SectorSize) - 1 | |
| 1200 | - debug( "Number of sectors in the file: %d" % self.nb_sect ) | |
| 1201 | - | |
| 1202 | - # file clsid (probably never used, so we don't store it) | |
| 1203 | - clsid = _clsid(header[8:24]) | |
| 1204 | - self.sectorsize = self.SectorSize #1 << i16(header, 30) | |
| 1205 | - self.minisectorsize = self.MiniSectorSize #1 << i16(header, 32) | |
| 1206 | - self.minisectorcutoff = self.MiniSectorCutoff # i32(header, 56) | |
| 1207 | - | |
| 1208 | - # check known streams for duplicate references (these are always in FAT, | |
| 1209 | - # never in MiniFAT): | |
| 1210 | - self._check_duplicate_stream(self.sectDirStart) | |
| 1211 | - # check MiniFAT only if it is not empty: | |
| 1212 | - if self.csectMiniFat: | |
| 1213 | - self._check_duplicate_stream(self.MiniFatStart) | |
| 1214 | - # check DIFAT only if it is not empty: | |
| 1215 | - if self.csectDif: | |
| 1216 | - self._check_duplicate_stream(self.sectDifStart) | |
| 1217 | - | |
| 1218 | - # Load file allocation tables | |
| 1219 | - self.loadfat(header) | |
| 1220 | - # Load direcory. This sets both the direntries list (ordered by sid) | |
| 1221 | - # and the root (ordered by hierarchy) members. | |
| 1222 | - self.loaddirectory(self.sectDirStart)#i32(header, 48)) | |
| 1223 | - self.ministream = None | |
| 1224 | - self.minifatsect = self.MiniFatStart #i32(header, 60) | |
| 1225 | - | |
| 1226 | - | |
| 1227 | - def close(self): | |
| 1228 | - """ | |
| 1229 | - close the OLE file, to release the file object | |
| 1230 | - """ | |
| 1231 | - self.fp.close() | |
| 1232 | - | |
| 1233 | - | |
| 1234 | - def _check_duplicate_stream(self, first_sect, minifat=False): | |
| 1235 | - """ | |
| 1236 | - Checks if a stream has not been already referenced elsewhere. | |
| 1237 | - This method should only be called once for each known stream, and only | |
| 1238 | - if stream size is not null. | |
| 1239 | - first_sect: index of first sector of the stream in FAT | |
| 1240 | - minifat: if True, stream is located in the MiniFAT, else in the FAT | |
| 1241 | - """ | |
| 1242 | - if minifat: | |
| 1243 | - debug('_check_duplicate_stream: sect=%d in MiniFAT' % first_sect) | |
| 1244 | - used_streams = self._used_streams_minifat | |
| 1245 | - else: | |
| 1246 | - debug('_check_duplicate_stream: sect=%d in FAT' % first_sect) | |
| 1247 | - # some values can be safely ignored (not a real stream): | |
| 1248 | - if first_sect in (DIFSECT,FATSECT,ENDOFCHAIN,FREESECT): | |
| 1249 | - return | |
| 1250 | - used_streams = self._used_streams_fat | |
| 1251 | - #TODO: would it be more efficient using a dict or hash values, instead | |
| 1252 | - # of a list of long ? | |
| 1253 | - if first_sect in used_streams: | |
| 1254 | - self._raise_defect(DEFECT_INCORRECT, 'Stream referenced twice') | |
| 1255 | - else: | |
| 1256 | - used_streams.append(first_sect) | |
| 1257 | - | |
| 1258 | - | |
| 1259 | - def dumpfat(self, fat, firstindex=0): | |
| 1260 | - "Displays a part of FAT in human-readable form for debugging purpose" | |
| 1261 | - # [PL] added only for debug | |
| 1262 | - if not DEBUG_MODE: | |
| 1263 | - return | |
| 1264 | - # dictionary to convert special FAT values in human-readable strings | |
| 1265 | - VPL=8 # valeurs par ligne (8+1 * 8+1 = 81) | |
| 1266 | - fatnames = { | |
| 1267 | - FREESECT: "..free..", | |
| 1268 | - ENDOFCHAIN: "[ END. ]", | |
| 1269 | - FATSECT: "FATSECT ", | |
| 1270 | - DIFSECT: "DIFSECT " | |
| 1271 | - } | |
| 1272 | - nbsect = len(fat) | |
| 1273 | - nlines = (nbsect+VPL-1)/VPL | |
| 1274 | - print "index", | |
| 1275 | - for i in range(VPL): | |
| 1276 | - print ("%8X" % i), | |
| 1277 | - print "" | |
| 1278 | - for l in range(nlines): | |
| 1279 | - index = l*VPL | |
| 1280 | - print ("%8X:" % (firstindex+index)), | |
| 1281 | - for i in range(index, index+VPL): | |
| 1282 | - if i>=nbsect: | |
| 1283 | - break | |
| 1284 | - sect = fat[i] | |
| 1285 | - if sect in fatnames: | |
| 1286 | - nom = fatnames[sect] | |
| 1287 | - else: | |
| 1288 | - if sect == i+1: | |
| 1289 | - nom = " --->" | |
| 1290 | - else: | |
| 1291 | - nom = "%8X" % sect | |
| 1292 | - print nom, | |
| 1293 | - print "" | |
| 1294 | - | |
| 1295 | - | |
| 1296 | - def dumpsect(self, sector, firstindex=0): | |
| 1297 | - "Displays a sector in a human-readable form, for debugging purpose." | |
| 1298 | - if not DEBUG_MODE: | |
| 1299 | - return | |
| 1300 | - VPL=8 # number of values per line (8+1 * 8+1 = 81) | |
| 1301 | - tab = array.array(UINT32, sector) | |
| 1302 | - nbsect = len(tab) | |
| 1303 | - nlines = (nbsect+VPL-1)/VPL | |
| 1304 | - print "index", | |
| 1305 | - for i in range(VPL): | |
| 1306 | - print ("%8X" % i), | |
| 1307 | - print "" | |
| 1308 | - for l in range(nlines): | |
| 1309 | - index = l*VPL | |
| 1310 | - print ("%8X:" % (firstindex+index)), | |
| 1311 | - for i in range(index, index+VPL): | |
| 1312 | - if i>=nbsect: | |
| 1313 | - break | |
| 1314 | - sect = tab[i] | |
| 1315 | - nom = "%8X" % sect | |
| 1316 | - print nom, | |
| 1317 | - print "" | |
| 1318 | - | |
| 1319 | - def sect2array(self, sect): | |
| 1320 | - """ | |
| 1321 | - convert a sector to an array of 32 bits unsigned integers, | |
| 1322 | - swapping bytes on big endian CPUs such as PowerPC (old Macs) | |
| 1323 | - """ | |
| 1324 | - a = array.array(UINT32, sect) | |
| 1325 | - # if CPU is big endian, swap bytes: | |
| 1326 | - if sys.byteorder == 'big': | |
| 1327 | - a.byteswap() | |
| 1328 | - return a | |
| 1329 | - | |
| 1330 | - | |
| 1331 | - def loadfat_sect(self, sect): | |
| 1332 | - """ | |
| 1333 | - Adds the indexes of the given sector to the FAT | |
| 1334 | - sect: string containing the first FAT sector, or array of long integers | |
| 1335 | - return: index of last FAT sector. | |
| 1336 | - """ | |
| 1337 | - # a FAT sector is an array of ulong integers. | |
| 1338 | - if isinstance(sect, array.array): | |
| 1339 | - # if sect is already an array it is directly used | |
| 1340 | - fat1 = sect | |
| 1341 | - else: | |
| 1342 | - # if it's a raw sector, it is parsed in an array | |
| 1343 | - fat1 = self.sect2array(sect) | |
| 1344 | - self.dumpsect(sect) | |
| 1345 | - # The FAT is a sector chain starting at the first index of itself. | |
| 1346 | - for isect in fat1: | |
| 1347 | - #print "isect = %X" % isect | |
| 1348 | - if isect == ENDOFCHAIN or isect == FREESECT: | |
| 1349 | - # the end of the sector chain has been reached | |
| 1350 | - break | |
| 1351 | - # read the FAT sector | |
| 1352 | - s = self.getsect(isect) | |
| 1353 | - # parse it as an array of 32 bits integers, and add it to the | |
| 1354 | - # global FAT array | |
| 1355 | - nextfat = self.sect2array(s) | |
| 1356 | - self.fat = self.fat + nextfat | |
| 1357 | - return isect | |
| 1358 | - | |
| 1359 | - | |
| 1360 | - def loadfat(self, header): | |
| 1361 | - """ | |
| 1362 | - Load the FAT table. | |
| 1363 | - """ | |
| 1364 | - # The header contains a sector numbers | |
| 1365 | - # for the first 109 FAT sectors. Additional sectors are | |
| 1366 | - # described by DIF blocks | |
| 1367 | - | |
| 1368 | - sect = header[76:512] | |
| 1369 | - debug( "len(sect)=%d, so %d integers" % (len(sect), len(sect)/4) ) | |
| 1370 | - #fat = [] | |
| 1371 | - # [PL] FAT is an array of 32 bits unsigned ints, it's more effective | |
| 1372 | - # to use an array than a list in Python. | |
| 1373 | - # It's initialized as empty first: | |
| 1374 | - self.fat = array.array(UINT32) | |
| 1375 | - self.loadfat_sect(sect) | |
| 1376 | - #self.dumpfat(self.fat) | |
| 1377 | -## for i in range(0, len(sect), 4): | |
| 1378 | -## ix = i32(sect, i) | |
| 1379 | -## #[PL] if ix == -2 or ix == -1: # ix == 0xFFFFFFFEL or ix == 0xFFFFFFFFL: | |
| 1380 | -## if ix == 0xFFFFFFFEL or ix == 0xFFFFFFFFL: | |
| 1381 | -## break | |
| 1382 | -## s = self.getsect(ix) | |
| 1383 | -## #fat = fat + map(lambda i, s=s: i32(s, i), range(0, len(s), 4)) | |
| 1384 | -## fat = fat + array.array(UINT32, s) | |
| 1385 | - if self.csectDif != 0: | |
| 1386 | - # [PL] There's a DIFAT because file is larger than 6.8MB | |
| 1387 | - # some checks just in case: | |
| 1388 | - if self.csectFat <= 109: | |
| 1389 | - # there must be at least 109 blocks in header and the rest in | |
| 1390 | - # DIFAT, so number of sectors must be >109. | |
| 1391 | - self._raise_defect(DEFECT_INCORRECT, 'incorrect DIFAT, not enough sectors') | |
| 1392 | - if self.sectDifStart >= self.nb_sect: | |
| 1393 | - # initial DIFAT block index must be valid | |
| 1394 | - self._raise_defect(DEFECT_FATAL, 'incorrect DIFAT, first index out of range') | |
| 1395 | - debug( "DIFAT analysis..." ) | |
| 1396 | - # We compute the necessary number of DIFAT sectors : | |
| 1397 | - # (each DIFAT sector = 127 pointers + 1 towards next DIFAT sector) | |
| 1398 | - nb_difat = (self.csectFat-109 + 126)/127 | |
| 1399 | - debug( "nb_difat = %d" % nb_difat ) | |
| 1400 | - if self.csectDif != nb_difat: | |
| 1401 | - raise IOError, 'incorrect DIFAT' | |
| 1402 | - isect_difat = self.sectDifStart | |
| 1403 | - for i in xrange(nb_difat): | |
| 1404 | - debug( "DIFAT block %d, sector %X" % (i, isect_difat) ) | |
| 1405 | - #TODO: check if corresponding FAT SID = DIFSECT | |
| 1406 | - sector_difat = self.getsect(isect_difat) | |
| 1407 | - difat = self.sect2array(sector_difat) | |
| 1408 | - self.dumpsect(sector_difat) | |
| 1409 | - self.loadfat_sect(difat[:127]) | |
| 1410 | - # last DIFAT pointer is next DIFAT sector: | |
| 1411 | - isect_difat = difat[127] | |
| 1412 | - debug( "next DIFAT sector: %X" % isect_difat ) | |
| 1413 | - # checks: | |
| 1414 | - if isect_difat not in [ENDOFCHAIN, FREESECT]: | |
| 1415 | - # last DIFAT pointer value must be ENDOFCHAIN or FREESECT | |
| 1416 | - raise IOError, 'incorrect end of DIFAT' | |
| 1417 | -## if len(self.fat) != self.num_fat_sectors: | |
| 1418 | -## # FAT should contain num_fat_sectors blocks | |
| 1419 | -## print "FAT length: %d instead of %d" % (len(self.fat), self.num_fat_sectors) | |
| 1420 | -## raise IOError, 'incorrect DIFAT' | |
| 1421 | - # since FAT is read from fixed-size sectors, it may contain more values | |
| 1422 | - # than the actual number of sectors in the file. | |
| 1423 | - # Keep only the relevant sector indexes: | |
| 1424 | - if len(self.fat) > self.nb_sect: | |
| 1425 | - debug('len(fat)=%d, shrunk to nb_sect=%d' % (len(self.fat), self.nb_sect)) | |
| 1426 | - self.fat = self.fat[:self.nb_sect] | |
| 1427 | - debug('\nFAT:') | |
| 1428 | - self.dumpfat(self.fat) | |
| 1429 | - | |
| 1430 | - | |
| 1431 | - def loadminifat(self): | |
| 1432 | - """ | |
| 1433 | - Load the MiniFAT table. | |
| 1434 | - """ | |
| 1435 | - # MiniFAT is stored in a standard sub-stream, pointed to by a header | |
| 1436 | - # field. | |
| 1437 | - # NOTE: there are two sizes to take into account for this stream: | |
| 1438 | - # 1) Stream size is calculated according to the number of sectors | |
| 1439 | - # declared in the OLE header. This allocated stream may be more than | |
| 1440 | - # needed to store the actual sector indexes. | |
| 1441 | - # (self.num_mini_fat_sectors is the number of sectors of size self.sector_size) | |
| 1442 | - stream_size = self.csectMiniFat * self.SectorSize | |
| 1443 | - # 2) Actually used size is calculated by dividing the MiniStream size | |
| 1444 | - # (given by root entry size) by the size of mini sectors, *4 for | |
| 1445 | - # 32 bits indexes: | |
| 1446 | - nb_minisectors = (self.root.size + self.MiniSectorSize-1) / self.MiniSectorSize | |
| 1447 | - used_size = nb_minisectors * 4 | |
| 1448 | - debug('loadminifat(): minifatsect=%d, nb FAT sectors=%d, used_size=%d, stream_size=%d, nb MiniSectors=%d' % | |
| 1449 | - (self.minifatsect, self.csectMiniFat, used_size, stream_size, nb_minisectors)) | |
| 1450 | - if used_size > stream_size: | |
| 1451 | - # This is not really a problem, but may indicate a wrong implementation: | |
| 1452 | - self._raise_defect(DEFECT_INCORRECT, 'OLE MiniStream is larger than MiniFAT') | |
| 1453 | - # In any case, first read stream_size: | |
| 1454 | - s = self._open(self.minifatsect, stream_size, force_FAT=True).read() | |
| 1455 | - #[PL] Old code replaced by an array: | |
| 1456 | - #self.minifat = map(lambda i, s=s: i32(s, i), range(0, len(s), 4)) | |
| 1457 | - self.minifat = self.sect2array(s) | |
| 1458 | - # Then shrink the array to used size, to avoid indexes out of MiniStream: | |
| 1459 | - debug('MiniFAT shrunk from %d to %d sectors' % (len(self.minifat), nb_minisectors)) | |
| 1460 | - self.minifat = self.minifat[:nb_minisectors] | |
| 1461 | - debug('loadminifat(): len=%d' % len(self.minifat)) | |
| 1462 | - debug('\nMiniFAT:') | |
| 1463 | - self.dumpfat(self.minifat) | |
| 1464 | - | |
| 1465 | - def getsect(self, sect): | |
| 1466 | - """ | |
| 1467 | - Read given sector from file on disk. | |
| 1468 | - sect: sector index | |
| 1469 | - returns a string containing the sector data. | |
| 1470 | - """ | |
| 1471 | - # [PL] this original code was wrong when sectors are 4KB instead of | |
| 1472 | - # 512 bytes: | |
| 1473 | - #self.fp.seek(512 + self.sectorsize * sect) | |
| 1474 | - #[PL]: added safety checks: | |
| 1475 | - #print "getsect(%X)" % sect | |
| 1476 | - try: | |
| 1477 | - self.fp.seek(self.sectorsize * (sect+1)) | |
| 1478 | - except: | |
| 1479 | - debug('getsect(): sect=%X, seek=%d, filesize=%d' % | |
| 1480 | - (sect, self.sectorsize*(sect+1), self._filesize)) | |
| 1481 | - self._raise_defect(DEFECT_FATAL, 'OLE sector index out of range') | |
| 1482 | - sector = self.fp.read(self.sectorsize) | |
| 1483 | - if len(sector) != self.sectorsize: | |
| 1484 | - debug('getsect(): sect=%X, read=%d, sectorsize=%d' % | |
| 1485 | - (sect, len(sector), self.sectorsize)) | |
| 1486 | - self._raise_defect(DEFECT_FATAL, 'incomplete OLE sector') | |
| 1487 | - return sector | |
| 1488 | - | |
| 1489 | - | |
| 1490 | - def loaddirectory(self, sect): | |
| 1491 | - """ | |
| 1492 | - Load the directory. | |
| 1493 | - sect: sector index of directory stream. | |
| 1494 | - """ | |
| 1495 | - # The directory is stored in a standard | |
| 1496 | - # substream, independent of its size. | |
| 1497 | - | |
| 1498 | - # open directory stream as a read-only file: | |
| 1499 | - # (stream size is not known in advance) | |
| 1500 | - self.directory_fp = self._open(sect) | |
| 1501 | - | |
| 1502 | - #[PL] to detect malformed documents and avoid DoS attacks, the maximum | |
| 1503 | - # number of directory entries can be calculated: | |
| 1504 | - max_entries = self.directory_fp.size / 128 | |
| 1505 | - debug('loaddirectory: size=%d, max_entries=%d' % | |
| 1506 | - (self.directory_fp.size, max_entries)) | |
| 1507 | - | |
| 1508 | - # Create list of directory entries | |
| 1509 | - #self.direntries = [] | |
| 1510 | - # We start with a list of "None" object | |
| 1511 | - self.direntries = [None] * max_entries | |
| 1512 | -## for sid in xrange(max_entries): | |
| 1513 | -## entry = fp.read(128) | |
| 1514 | -## if not entry: | |
| 1515 | -## break | |
| 1516 | -## self.direntries.append(_OleDirectoryEntry(entry, sid, self)) | |
| 1517 | - # load root entry: | |
| 1518 | - root_entry = self._load_direntry(0) | |
| 1519 | - # Root entry is the first entry: | |
| 1520 | - self.root = self.direntries[0] | |
| 1521 | - # read and build all storage trees, starting from the root: | |
| 1522 | - self.root.build_storage_tree() | |
| 1523 | - | |
| 1524 | - | |
| 1525 | - def _load_direntry (self, sid): | |
| 1526 | - """ | |
| 1527 | - Load a directory entry from the directory. | |
| 1528 | - This method should only be called once for each storage/stream when | |
| 1529 | - loading the directory. | |
| 1530 | - sid: index of storage/stream in the directory. | |
| 1531 | - return: a _OleDirectoryEntry object | |
| 1532 | - raise: IOError if the entry has always been referenced. | |
| 1533 | - """ | |
| 1534 | - # check if SID is OK: | |
| 1535 | - if sid<0 or sid>=len(self.direntries): | |
| 1536 | - self._raise_defect(DEFECT_FATAL, "OLE directory index out of range") | |
| 1537 | - # check if entry was already referenced: | |
| 1538 | - if self.direntries[sid] is not None: | |
| 1539 | - self._raise_defect(DEFECT_INCORRECT, | |
| 1540 | - "double reference for OLE stream/storage") | |
| 1541 | - # if exception not raised, return the object | |
| 1542 | - return self.direntries[sid] | |
| 1543 | - self.directory_fp.seek(sid * 128) | |
| 1544 | - entry = self.directory_fp.read(128) | |
| 1545 | - self.direntries[sid] = _OleDirectoryEntry(entry, sid, self) | |
| 1546 | - return self.direntries[sid] | |
| 1547 | - | |
| 1548 | - | |
| 1549 | - def dumpdirectory(self): | |
| 1550 | - """ | |
| 1551 | - Dump directory (for debugging only) | |
| 1552 | - """ | |
| 1553 | - self.root.dump() | |
| 1554 | - | |
| 1555 | - | |
| 1556 | - def _open(self, start, size = 0x7FFFFFFF, force_FAT=False): | |
| 1557 | - """ | |
| 1558 | - Open a stream, either in FAT or MiniFAT according to its size. | |
| 1559 | - (openstream helper) | |
| 1560 | - | |
| 1561 | - start: index of first sector | |
| 1562 | - size: size of stream (or nothing if size is unknown) | |
| 1563 | - force_FAT: if False (default), stream will be opened in FAT or MiniFAT | |
| 1564 | - according to size. If True, it will always be opened in FAT. | |
| 1565 | - """ | |
| 1566 | - debug('OleFileIO.open(): sect=%d, size=%d, force_FAT=%s' % | |
| 1567 | - (start, size, str(force_FAT))) | |
| 1568 | - # stream size is compared to the mini_stream_cutoff_size threshold: | |
| 1569 | - if size < self.minisectorcutoff and not force_FAT: | |
| 1570 | - # ministream object | |
| 1571 | - if not self.ministream: | |
| 1572 | - # load MiniFAT if it wasn't already done: | |
| 1573 | - self.loadminifat() | |
| 1574 | - # The first sector index of the miniFAT stream is stored in the | |
| 1575 | - # root directory entry: | |
| 1576 | - size_ministream = self.root.size | |
| 1577 | - debug('Opening MiniStream: sect=%d, size=%d' % | |
| 1578 | - (self.root.isectStart, size_ministream)) | |
| 1579 | - self.ministream = self._open(self.root.isectStart, | |
| 1580 | - size_ministream, force_FAT=True) | |
| 1581 | - return _OleStream(fp=self.ministream, sect=start, size=size, | |
| 1582 | - offset=0, sectorsize=self.minisectorsize, | |
| 1583 | - fat=self.minifat, filesize=self.ministream.size) | |
| 1584 | - else: | |
| 1585 | - # standard stream | |
| 1586 | - return _OleStream(fp=self.fp, sect=start, size=size, | |
| 1587 | - offset=self.sectorsize, | |
| 1588 | - sectorsize=self.sectorsize, fat=self.fat, | |
| 1589 | - filesize=self._filesize) | |
| 1590 | - | |
| 1591 | - | |
| 1592 | - def _list(self, files, prefix, node, streams=True, storages=False): | |
| 1593 | - """ | |
| 1594 | - (listdir helper) | |
| 1595 | - files: list of files to fill in | |
| 1596 | - prefix: current location in storage tree (list of names) | |
| 1597 | - node: current node (_OleDirectoryEntry object) | |
| 1598 | - streams: bool, include streams if True (True by default) - new in v0.26 | |
| 1599 | - storages: bool, include storages if True (False by default) - new in v0.26 | |
| 1600 | - (note: the root storage is never included) | |
| 1601 | - """ | |
| 1602 | - prefix = prefix + [node.name] | |
| 1603 | - for entry in node.kids: | |
| 1604 | - if entry.kids: | |
| 1605 | - # this is a storage | |
| 1606 | - if storages: | |
| 1607 | - # add it to the list | |
| 1608 | - files.append(prefix[1:] + [entry.name]) | |
| 1609 | - # check its kids | |
| 1610 | - self._list(files, prefix, entry, streams, storages) | |
| 1611 | - else: | |
| 1612 | - # this is a stream | |
| 1613 | - if streams: | |
| 1614 | - # add it to the list | |
| 1615 | - files.append(prefix[1:] + [entry.name]) | |
| 1616 | - | |
| 1617 | - | |
| 1618 | - def listdir(self, streams=True, storages=False): | |
| 1619 | - """ | |
| 1620 | - Return a list of streams stored in this file | |
| 1621 | - | |
| 1622 | - streams: bool, include streams if True (True by default) - new in v0.26 | |
| 1623 | - storages: bool, include storages if True (False by default) - new in v0.26 | |
| 1624 | - (note: the root storage is never included) | |
| 1625 | - """ | |
| 1626 | - files = [] | |
| 1627 | - self._list(files, [], self.root, streams, storages) | |
| 1628 | - return files | |
| 1629 | - | |
| 1630 | - | |
| 1631 | - def _find(self, filename): | |
| 1632 | - """ | |
| 1633 | - Returns directory entry of given filename. (openstream helper) | |
| 1634 | - Note: this method is case-insensitive. | |
| 1635 | - | |
| 1636 | - filename: path of stream in storage tree (except root entry), either: | |
| 1637 | - - a string using Unix path syntax, for example: | |
| 1638 | - 'storage_1/storage_1.2/stream' | |
| 1639 | - - a list of storage filenames, path to the desired stream/storage. | |
| 1640 | - Example: ['storage_1', 'storage_1.2', 'stream'] | |
| 1641 | - return: sid of requested filename | |
| 1642 | - raise IOError if file not found | |
| 1643 | - """ | |
| 1644 | - | |
| 1645 | - # if filename is a string instead of a list, split it on slashes to | |
| 1646 | - # convert to a list: | |
| 1647 | - if isinstance(filename, basestring): | |
| 1648 | - filename = filename.split('/') | |
| 1649 | - # walk across storage tree, following given path: | |
| 1650 | - node = self.root | |
| 1651 | - for name in filename: | |
| 1652 | - for kid in node.kids: | |
| 1653 | - if kid.name.lower() == name.lower(): | |
| 1654 | - break | |
| 1655 | - else: | |
| 1656 | - raise IOError, "file not found" | |
| 1657 | - node = kid | |
| 1658 | - return node.sid | |
| 1659 | - | |
| 1660 | - | |
| 1661 | - def openstream(self, filename): | |
| 1662 | - """ | |
| 1663 | - Open a stream as a read-only file object (StringIO). | |
| 1664 | - | |
| 1665 | - filename: path of stream in storage tree (except root entry), either: | |
| 1666 | - - a string using Unix path syntax, for example: | |
| 1667 | - 'storage_1/storage_1.2/stream' | |
| 1668 | - - a list of storage filenames, path to the desired stream/storage. | |
| 1669 | - Example: ['storage_1', 'storage_1.2', 'stream'] | |
| 1670 | - return: file object (read-only) | |
| 1671 | - raise IOError if filename not found, or if this is not a stream. | |
| 1672 | - """ | |
| 1673 | - sid = self._find(filename) | |
| 1674 | - entry = self.direntries[sid] | |
| 1675 | - if entry.entry_type != STGTY_STREAM: | |
| 1676 | - raise IOError, "this file is not a stream" | |
| 1677 | - return self._open(entry.isectStart, entry.size) | |
| 1678 | - | |
| 1679 | - | |
| 1680 | - def get_type(self, filename): | |
| 1681 | - """ | |
| 1682 | - Test if given filename exists as a stream or a storage in the OLE | |
| 1683 | - container, and return its type. | |
| 1684 | - | |
| 1685 | - filename: path of stream in storage tree. (see openstream for syntax) | |
| 1686 | - return: False if object does not exist, its entry type (>0) otherwise: | |
| 1687 | - - STGTY_STREAM: a stream | |
| 1688 | - - STGTY_STORAGE: a storage | |
| 1689 | - - STGTY_ROOT: the root entry | |
| 1690 | - """ | |
| 1691 | - try: | |
| 1692 | - sid = self._find(filename) | |
| 1693 | - entry = self.direntries[sid] | |
| 1694 | - return entry.entry_type | |
| 1695 | - except: | |
| 1696 | - return False | |
| 1697 | - | |
| 1698 | - | |
| 1699 | - def getmtime(self, filename): | |
| 1700 | - """ | |
| 1701 | - Return modification time of a stream/storage. | |
| 1702 | - | |
| 1703 | - filename: path of stream/storage in storage tree. (see openstream for | |
| 1704 | - syntax) | |
| 1705 | - return: None if modification time is null, a python datetime object | |
| 1706 | - otherwise (UTC timezone) | |
| 1707 | - | |
| 1708 | - new in version 0.26 | |
| 1709 | - """ | |
| 1710 | - sid = self._find(filename) | |
| 1711 | - entry = self.direntries[sid] | |
| 1712 | - return entry.getmtime() | |
| 1713 | - | |
| 1714 | - | |
| 1715 | - def getctime(self, filename): | |
| 1716 | - """ | |
| 1717 | - Return creation time of a stream/storage. | |
| 1718 | - | |
| 1719 | - filename: path of stream/storage in storage tree. (see openstream for | |
| 1720 | - syntax) | |
| 1721 | - return: None if creation time is null, a python datetime object | |
| 1722 | - otherwise (UTC timezone) | |
| 1723 | - | |
| 1724 | - new in version 0.26 | |
| 1725 | - """ | |
| 1726 | - sid = self._find(filename) | |
| 1727 | - entry = self.direntries[sid] | |
| 1728 | - return entry.getctime() | |
| 1729 | - | |
| 1730 | - | |
| 1731 | - def exists(self, filename): | |
| 1732 | - """ | |
| 1733 | - Test if given filename exists as a stream or a storage in the OLE | |
| 1734 | - container. | |
| 1735 | - | |
| 1736 | - filename: path of stream in storage tree. (see openstream for syntax) | |
| 1737 | - return: True if object exist, else False. | |
| 1738 | - """ | |
| 1739 | - try: | |
| 1740 | - sid = self._find(filename) | |
| 1741 | - return True | |
| 1742 | - except: | |
| 1743 | - return False | |
| 1744 | - | |
| 1745 | - | |
| 1746 | - def get_size(self, filename): | |
| 1747 | - """ | |
| 1748 | - Return size of a stream in the OLE container, in bytes. | |
| 1749 | - | |
| 1750 | - filename: path of stream in storage tree (see openstream for syntax) | |
| 1751 | - return: size in bytes (long integer) | |
| 1752 | - raise: IOError if file not found, TypeError if this is not a stream. | |
| 1753 | - """ | |
| 1754 | - sid = self._find(filename) | |
| 1755 | - entry = self.direntries[sid] | |
| 1756 | - if entry.entry_type != STGTY_STREAM: | |
| 1757 | - #TODO: Should it return zero instead of raising an exception ? | |
| 1758 | - raise TypeError, 'object is not an OLE stream' | |
| 1759 | - return entry.size | |
| 1760 | - | |
| 1761 | - | |
| 1762 | - def get_rootentry_name(self): | |
| 1763 | - """ | |
| 1764 | - Return root entry name. Should usually be 'Root Entry' or 'R' in most | |
| 1765 | - implementations. | |
| 1766 | - """ | |
| 1767 | - return self.root.name | |
| 1768 | - | |
| 1769 | - | |
| 1770 | - def getproperties(self, filename, convert_time=False, no_conversion=None): | |
| 1771 | - """ | |
| 1772 | - Return properties described in substream. | |
| 1773 | - | |
| 1774 | - filename: path of stream in storage tree (see openstream for syntax) | |
| 1775 | - convert_time: bool, if True timestamps will be converted to Python datetime | |
| 1776 | - no_conversion: None or list of int, timestamps not to be converted | |
| 1777 | - (for example total editing time is not a real timestamp) | |
| 1778 | - return: a dictionary of values indexed by id (integer) | |
| 1779 | - """ | |
| 1780 | - # make sure no_conversion is a list, just to simplify code below: | |
| 1781 | - if no_conversion == None: | |
| 1782 | - no_conversion = [] | |
| 1783 | - # stream path as a string to report exceptions: | |
| 1784 | - streampath = filename | |
| 1785 | - if not isinstance(streampath, str): | |
| 1786 | - streampath = '/'.join(streampath) | |
| 1787 | - | |
| 1788 | - fp = self.openstream(filename) | |
| 1789 | - | |
| 1790 | - data = {} | |
| 1791 | - | |
| 1792 | - try: | |
| 1793 | - # header | |
| 1794 | - s = fp.read(28) | |
| 1795 | - clsid = _clsid(s[8:24]) | |
| 1796 | - | |
| 1797 | - # format id | |
| 1798 | - s = fp.read(20) | |
| 1799 | - fmtid = _clsid(s[:16]) | |
| 1800 | - fp.seek(i32(s, 16)) | |
| 1801 | - | |
| 1802 | - # get section | |
| 1803 | - s = "****" + fp.read(i32(fp.read(4))-4) | |
| 1804 | - # number of properties: | |
| 1805 | - num_props = i32(s, 4) | |
| 1806 | - except: | |
| 1807 | - # catch exception while parsing property header, and only raise | |
| 1808 | - # a DEFECT_INCORRECT then return an empty dict, because this is not | |
| 1809 | - # a fatal error when parsing the whole file | |
| 1810 | - exctype, excvalue = sys.exc_info()[:2] | |
| 1811 | - msg = 'Error while parsing properties header in stream %s: %s' % ( | |
| 1812 | - repr(streampath), excvalue) | |
| 1813 | - self._raise_defect(DEFECT_INCORRECT, msg, exctype) | |
| 1814 | - return data | |
| 1815 | - | |
| 1816 | - for i in range(num_props): | |
| 1817 | - try: | |
| 1818 | - id = 0 # just in case of an exception | |
| 1819 | - id = i32(s, 8+i*8) | |
| 1820 | - offset = i32(s, 12+i*8) | |
| 1821 | - type = i32(s, offset) | |
| 1822 | - | |
| 1823 | - debug ('property id=%d: type=%d offset=%X' % (id, type, offset)) | |
| 1824 | - | |
| 1825 | - # test for common types first (should perhaps use | |
| 1826 | - # a dictionary instead?) | |
| 1827 | - | |
| 1828 | - if type == VT_I2: # 16-bit signed integer | |
| 1829 | - value = i16(s, offset+4) | |
| 1830 | - if value >= 32768: | |
| 1831 | - value = value - 65536 | |
| 1832 | - elif type == VT_UI2: # 2-byte unsigned integer | |
| 1833 | - value = i16(s, offset+4) | |
| 1834 | - elif type in (VT_I4, VT_INT, VT_ERROR): | |
| 1835 | - # VT_I4: 32-bit signed integer | |
| 1836 | - # VT_ERROR: HRESULT, similar to 32-bit signed integer, | |
| 1837 | - # see http://msdn.microsoft.com/en-us/library/cc230330.aspx | |
| 1838 | - value = i32(s, offset+4) | |
| 1839 | - elif type in (VT_UI4, VT_UINT): # 4-byte unsigned integer | |
| 1840 | - value = i32(s, offset+4) # FIXME | |
| 1841 | - elif type in (VT_BSTR, VT_LPSTR): | |
| 1842 | - # CodePageString, see http://msdn.microsoft.com/en-us/library/dd942354.aspx | |
| 1843 | - # size is a 32 bits integer, including the null terminator, and | |
| 1844 | - # possibly trailing or embedded null chars | |
| 1845 | - #TODO: if codepage is unicode, the string should be converted as such | |
| 1846 | - count = i32(s, offset+4) | |
| 1847 | - value = s[offset+8:offset+8+count-1] | |
| 1848 | - # remove all null chars: | |
| 1849 | - value = value.replace('\x00', '') | |
| 1850 | - elif type == VT_BLOB: | |
| 1851 | - # binary large object (BLOB) | |
| 1852 | - # see http://msdn.microsoft.com/en-us/library/dd942282.aspx | |
| 1853 | - count = i32(s, offset+4) | |
| 1854 | - value = s[offset+8:offset+8+count] | |
| 1855 | - elif type == VT_LPWSTR: | |
| 1856 | - # UnicodeString | |
| 1857 | - # see http://msdn.microsoft.com/en-us/library/dd942313.aspx | |
| 1858 | - # "the string should NOT contain embedded or additional trailing | |
| 1859 | - # null characters." | |
| 1860 | - count = i32(s, offset+4) | |
| 1861 | - value = _unicode(s[offset+8:offset+8+count*2]) | |
| 1862 | - elif type == VT_FILETIME: | |
| 1863 | - value = long(i32(s, offset+4)) + (long(i32(s, offset+8))<<32) | |
| 1864 | - # FILETIME is a 64-bit int: "number of 100ns periods | |
| 1865 | - # since Jan 1,1601". | |
| 1866 | - if convert_time and id not in no_conversion: | |
| 1867 | - debug('Converting property #%d to python datetime, value=%d=%fs' | |
| 1868 | - %(id, value, float(value)/10000000L)) | |
| 1869 | - # convert FILETIME to Python datetime.datetime | |
| 1870 | - # inspired from http://code.activestate.com/recipes/511425-filetime-to-datetime/ | |
| 1871 | - _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0) | |
| 1872 | - debug('timedelta days=%d' % (value/(10*1000000*3600*24))) | |
| 1873 | - value = _FILETIME_null_date + datetime.timedelta(microseconds=value/10) | |
| 1874 | - else: | |
| 1875 | - # legacy code kept for backward compatibility: returns a | |
| 1876 | - # number of seconds since Jan 1,1601 | |
| 1877 | - value = value / 10000000L # seconds | |
| 1878 | - elif type == VT_UI1: # 1-byte unsigned integer | |
| 1879 | - value = ord(s[offset+4]) | |
| 1880 | - elif type == VT_CLSID: | |
| 1881 | - value = _clsid(s[offset+4:offset+20]) | |
| 1882 | - elif type == VT_CF: | |
| 1883 | - # PropertyIdentifier or ClipboardData?? | |
| 1884 | - # see http://msdn.microsoft.com/en-us/library/dd941945.aspx | |
| 1885 | - count = i32(s, offset+4) | |
| 1886 | - value = s[offset+8:offset+8+count] | |
| 1887 | - elif type == VT_BOOL: | |
| 1888 | - # VARIANT_BOOL, 16 bits bool, 0x0000=Fals, 0xFFFF=True | |
| 1889 | - # see http://msdn.microsoft.com/en-us/library/cc237864.aspx | |
| 1890 | - value = bool(i16(s, offset+4)) | |
| 1891 | - else: | |
| 1892 | - value = None # everything else yields "None" | |
| 1893 | - debug ('property id=%d: type=%d not implemented in parser yet' % (id, type)) | |
| 1894 | - | |
| 1895 | - # missing: VT_EMPTY, VT_NULL, VT_R4, VT_R8, VT_CY, VT_DATE, | |
| 1896 | - # VT_DECIMAL, VT_I1, VT_I8, VT_UI8, | |
| 1897 | - # see http://msdn.microsoft.com/en-us/library/dd942033.aspx | |
| 1898 | - | |
| 1899 | - # FIXME: add support for VT_VECTOR | |
| 1900 | - # VT_VECTOR is a 32 uint giving the number of items, followed by | |
| 1901 | - # the items in sequence. The VT_VECTOR value is combined with the | |
| 1902 | - # type of items, e.g. VT_VECTOR|VT_BSTR | |
| 1903 | - # see http://msdn.microsoft.com/en-us/library/dd942011.aspx | |
| 1904 | - | |
| 1905 | - #print "%08x" % id, repr(value), | |
| 1906 | - #print "(%s)" % VT[i32(s, offset) & 0xFFF] | |
| 1907 | - | |
| 1908 | - data[id] = value | |
| 1909 | - except: | |
| 1910 | - # catch exception while parsing each property, and only raise | |
| 1911 | - # a DEFECT_INCORRECT, because parsing can go on | |
| 1912 | - exctype, excvalue = sys.exc_info()[:2] | |
| 1913 | - msg = 'Error while parsing property id %d in stream %s: %s' % ( | |
| 1914 | - id, repr(streampath), excvalue) | |
| 1915 | - self._raise_defect(DEFECT_INCORRECT, msg, exctype) | |
| 1916 | - | |
| 1917 | - return data | |
| 1918 | - | |
| 1919 | - def get_metadata(self): | |
| 1920 | - """ | |
| 1921 | - Parse standard properties streams, return an OleMetadata object | |
| 1922 | - containing all the available metadata. | |
| 1923 | - (also stored in the metadata attribute of the OleFileIO object) | |
| 1924 | - | |
| 1925 | - new in version 0.25 | |
| 1926 | - """ | |
| 1927 | - self.metadata = OleMetadata() | |
| 1928 | - self.metadata.parse_properties(self) | |
| 1929 | - return self.metadata | |
| 1930 | - | |
| 1931 | -# | |
| 1932 | -# -------------------------------------------------------------------- | |
| 1933 | -# This script can be used to dump the directory of any OLE2 structured | |
| 1934 | -# storage file. | |
| 1935 | - | |
| 1936 | -if __name__ == "__main__": | |
| 1937 | - | |
| 1938 | - import sys | |
| 1939 | - | |
| 1940 | - # [PL] display quick usage info if launched from command-line | |
| 1941 | - if len(sys.argv) <= 1: | |
| 1942 | - print __doc__ | |
| 1943 | - print """ | |
| 1944 | -Launched from command line, this script parses OLE files and prints info. | |
| 1945 | - | |
| 1946 | -Usage: olefile2.py [-d] [-c] <file> [file2 ...] | |
| 1947 | - | |
| 1948 | -Options: | |
| 1949 | --d : debug mode (display a lot of debug information, for developers only) | |
| 1950 | --c : check all streams (for debugging purposes) | |
| 1951 | -""" | |
| 1952 | - sys.exit() | |
| 1953 | - | |
| 1954 | - check_streams = False | |
| 1955 | - for filename in sys.argv[1:]: | |
| 1956 | -## try: | |
| 1957 | - # OPTIONS: | |
| 1958 | - if filename == '-d': | |
| 1959 | - # option to switch debug mode on: | |
| 1960 | - set_debug_mode(True) | |
| 1961 | - continue | |
| 1962 | - if filename == '-c': | |
| 1963 | - # option to switch check streams mode on: | |
| 1964 | - check_streams = True | |
| 1965 | - continue | |
| 1966 | - | |
| 1967 | - ole = OleFileIO(filename)#, raise_defects=DEFECT_INCORRECT) | |
| 1968 | - print "-" * 68 | |
| 1969 | - print filename | |
| 1970 | - print "-" * 68 | |
| 1971 | - ole.dumpdirectory() | |
| 1972 | - for streamname in ole.listdir(): | |
| 1973 | - if streamname[-1][0] == "\005": | |
| 1974 | - print streamname, ": properties" | |
| 1975 | - props = ole.getproperties(streamname, convert_time=True) | |
| 1976 | - props = props.items() | |
| 1977 | - props.sort() | |
| 1978 | - for k, v in props: | |
| 1979 | - #[PL]: avoid to display too large or binary values: | |
| 1980 | - if isinstance(v, basestring): | |
| 1981 | - if len(v) > 50: | |
| 1982 | - v = v[:50] | |
| 1983 | - # quick and dirty binary check: | |
| 1984 | - for c in (1,2,3,4,5,6,7,11,12,14,15,16,17,18,19,20, | |
| 1985 | - 21,22,23,24,25,26,27,28,29,30,31): | |
| 1986 | - if chr(c) in v: | |
| 1987 | - v = '(binary data)' | |
| 1988 | - break | |
| 1989 | - print " ", k, v | |
| 1990 | - | |
| 1991 | - if check_streams: | |
| 1992 | - # Read all streams to check if there are errors: | |
| 1993 | - print '\nChecking streams...' | |
| 1994 | - for streamname in ole.listdir(): | |
| 1995 | - # print name using repr() to convert binary chars to \xNN: | |
| 1996 | - print '-', repr('/'.join(streamname)),'-', | |
| 1997 | - st_type = ole.get_type(streamname) | |
| 1998 | - if st_type == STGTY_STREAM: | |
| 1999 | - print 'size %d' % ole.get_size(streamname) | |
| 2000 | - # just try to read stream in memory: | |
| 2001 | - ole.openstream(streamname) | |
| 2002 | - else: | |
| 2003 | - print 'NOT a stream : type=%d' % st_type | |
| 2004 | - print '' | |
| 2005 | - | |
| 2006 | -## for streamname in ole.listdir(): | |
| 2007 | -## # print name using repr() to convert binary chars to \xNN: | |
| 2008 | -## print '-', repr('/'.join(streamname)),'-', | |
| 2009 | -## print ole.getmtime(streamname) | |
| 2010 | -## print '' | |
| 2011 | - | |
| 2012 | - print 'Modification/Creation times of all directory entries:' | |
| 2013 | - for entry in ole.direntries: | |
| 2014 | - if entry is not None: | |
| 2015 | - print '- %s: mtime=%s ctime=%s' % (entry.name, | |
| 2016 | - entry.getmtime(), entry.getctime()) | |
| 2017 | - print '' | |
| 2018 | - | |
| 2019 | - # parse and display metadata: | |
| 2020 | - meta = ole.get_metadata() | |
| 2021 | - meta.dump() | |
| 2022 | - print '' | |
| 2023 | - #[PL] Test a few new methods: | |
| 2024 | - root = ole.get_rootentry_name() | |
| 2025 | - print 'Root entry name: "%s"' % root | |
| 2026 | - if ole.exists('worddocument'): | |
| 2027 | - print "This is a Word document." | |
| 2028 | - print "type of stream 'WordDocument':", ole.get_type('worddocument') | |
| 2029 | - print "size :", ole.get_size('worddocument') | |
| 2030 | - if ole.exists('macros/vba'): | |
| 2031 | - print "This document may contain VBA macros." | |
| 2032 | - | |
| 2033 | - # print parsing issues: | |
| 2034 | - print '\nNon-fatal issues raised during parsing:' | |
| 2035 | - if ole.parsing_issues: | |
| 2036 | - for exctype, msg in ole.parsing_issues: | |
| 2037 | - print '- %s: %s' % (exctype.__name__, msg) | |
| 2038 | - else: | |
| 2039 | - print 'None' | |
| 2040 | -## except IOError, v: | |
| 2041 | -## print "***", "cannot read", file, "-", v | |
| 2042 | - | |
| 2043 | -# this code was developed while listening to The Wedding Present "Sea Monsters" |
oletools/thirdparty/xxxswf/xxxswf.py
| 1 | -# xxxswf.py was created by alexander dot hanel at gmail dot com | |
| 2 | -# version 0.1 | |
| 3 | -# Date - 12-07-2011 | |
| 4 | -# To do list | |
| 5 | -# - Tag Parser | |
| 6 | -# - ActionScript Decompiler | |
| 7 | - | |
| 8 | -import fnmatch | |
| 9 | -import hashlib | |
| 10 | -import imp | |
| 11 | -import math | |
| 12 | -import os | |
| 13 | -import re | |
| 14 | -import struct | |
| 15 | -import sys | |
| 16 | -import time | |
| 17 | -from StringIO import StringIO | |
| 18 | -from optparse import OptionParser | |
| 19 | -import zlib | |
| 20 | - | |
| 21 | -def checkMD5(md5): | |
| 22 | -# checks if MD5 has been seen in MD5 Dictionary | |
| 23 | -# MD5Dict contains the MD5 and the CVE | |
| 24 | -# For { 'MD5':'CVE', 'MD5-1':'CVE-1', 'MD5-2':'CVE-2'} | |
| 25 | - MD5Dict = {'c46299a5015c6d31ad5766cb49e4ab4b':'CVE-XXXX-XXXX'} | |
| 26 | - if MD5Dict.get(md5): | |
| 27 | - print '\t[BAD] MD5 Match on', MD5Dict.get(md5) | |
| 28 | - return | |
| 29 | - | |
| 30 | -def bad(f): | |
| 31 | - for idx, x in enumerate(findSWF(f)): | |
| 32 | - tmp = verifySWF(f,x) | |
| 33 | - if tmp != None: | |
| 34 | - yaraScan(tmp) | |
| 35 | - checkMD5(hashBuff(tmp)) | |
| 36 | - return | |
| 37 | - | |
| 38 | -def yaraScan(d): | |
| 39 | -# d = buffer of the read file | |
| 40 | -# Scans SWF using Yara | |
| 41 | - # test if yara module is installed | |
| 42 | - # if not Yara can be downloaded from http://code.google.com/p/yara-project/ | |
| 43 | - try: | |
| 44 | - imp.find_module('yara') | |
| 45 | - import yara | |
| 46 | - except ImportError: | |
| 47 | - print '\t[ERROR] Yara module not installed - aborting scan' | |
| 48 | - return | |
| 49 | - # test for yara compile errors | |
| 50 | - try: | |
| 51 | - r = yara.compile(r'rules.yar') | |
| 52 | - except: | |
| 53 | - pass | |
| 54 | - print '\t[ERROR] Yara compile error - aborting scan' | |
| 55 | - return | |
| 56 | - # get matches | |
| 57 | - m = r.match(data=d) | |
| 58 | - # print matches | |
| 59 | - for X in m: | |
| 60 | - print '\t[BAD] Yara Signature Hit:', X | |
| 61 | - return | |
| 62 | - | |
| 63 | -def findSWF(d): | |
| 64 | -# d = buffer of the read file | |
| 65 | -# Search for SWF Header Sigs in files | |
| 66 | - return [tmp.start() for tmp in re.finditer('CWS|FWS', d.read())] | |
| 67 | - | |
| 68 | -def hashBuff(d): | |
| 69 | -# d = buffer of the read file | |
| 70 | -# This function hashes the buffer | |
| 71 | -# source: http://stackoverflow.com/q/5853830 | |
| 72 | - if type(d) is str: | |
| 73 | - d = StringIO(d) | |
| 74 | - md5 = hashlib.md5() | |
| 75 | - while True: | |
| 76 | - data = d.read(128) | |
| 77 | - if not data: | |
| 78 | - break | |
| 79 | - md5.update(data) | |
| 80 | - return md5.hexdigest() | |
| 81 | - | |
| 82 | -def verifySWF(f,addr): | |
| 83 | - # Start of SWF | |
| 84 | - f.seek(addr) | |
| 85 | - # Read Header | |
| 86 | - header = f.read(3) | |
| 87 | - # Read Version | |
| 88 | - ver = struct.unpack('<b', f.read(1))[0] | |
| 89 | - # Read SWF Size | |
| 90 | - size = struct.unpack('<i', f.read(4))[0] | |
| 91 | - # Start of SWF | |
| 92 | - f.seek(addr) | |
| 93 | - try: | |
| 94 | - # Read SWF into buffer. If compressed read uncompressed size. | |
| 95 | - t = f.read(size) | |
| 96 | - except: | |
| 97 | - pass | |
| 98 | - # Error check for invalid SWF | |
| 99 | - print ' - [ERROR] Invalid SWF Size' | |
| 100 | - return None | |
| 101 | - if type(t) is str: | |
| 102 | - f = StringIO(t) | |
| 103 | - # Error check for version above 20 | |
| 104 | - if ver > 20: | |
| 105 | - print ' - [ERROR] Invalid SWF Version' | |
| 106 | - return None | |
| 107 | - | |
| 108 | - if 'CWS' in header: | |
| 109 | - try: | |
| 110 | - f.read(3) | |
| 111 | - tmp = 'FWS' + f.read(5) + zlib.decompress(f.read()) | |
| 112 | - print ' - CWS Header' | |
| 113 | - return tmp | |
| 114 | - | |
| 115 | - except: | |
| 116 | - pass | |
| 117 | - print '- [ERROR]: Zlib decompression error. Invalid CWS SWF' | |
| 118 | - return None | |
| 119 | - | |
| 120 | - elif 'FWS' in header: | |
| 121 | - try: | |
| 122 | - tmp = f.read(size) | |
| 123 | - print ' - FWS Header' | |
| 124 | - return tmp | |
| 125 | - | |
| 126 | - except: | |
| 127 | - pass | |
| 128 | - print ' - [ERROR] Invalid SWF Size' | |
| 129 | - return None | |
| 130 | - | |
| 131 | - else: | |
| 132 | - print ' - [Error] Logic Error Blame Programmer' | |
| 133 | - return None | |
| 134 | - | |
| 135 | -def headerInfo(f): | |
| 136 | -# f is the already opended file handle | |
| 137 | -# Yes, the format is is a rip off SWFDump. Can you blame me? Their tool is awesome. | |
| 138 | - # SWFDump FORMAT | |
| 139 | - # [HEADER] File version: 8 | |
| 140 | - # [HEADER] File is zlib compressed. Ratio: 52% | |
| 141 | - # [HEADER] File size: 37536 | |
| 142 | - # [HEADER] Frame rate: 18.000000 | |
| 143 | - # [HEADER] Frame count: 323 | |
| 144 | - # [HEADER] Movie width: 217.00 | |
| 145 | - # [HEADER] Movie height: 85.00 | |
| 146 | - if type(f) is str: | |
| 147 | - f = StringIO(f) | |
| 148 | - sig = f.read(3) | |
| 149 | - print '\t[HEADER] File header:', sig | |
| 150 | - if 'C' in sig: | |
| 151 | - print '\t[HEADER] File is zlib compressed.' | |
| 152 | - version = struct.unpack('<b', f.read(1))[0] | |
| 153 | - print '\t[HEADER] File version:', version | |
| 154 | - size = struct.unpack('<i', f.read(4))[0] | |
| 155 | - print '\t[HEADER] File size:', size | |
| 156 | - # deflate compressed SWF | |
| 157 | - if 'C' in sig: | |
| 158 | - f = verifySWF(f,0) | |
| 159 | - if type(f) is str: | |
| 160 | - f = StringIO(f) | |
| 161 | - f.seek(0, 0) | |
| 162 | - x = f.read(8) | |
| 163 | - ta = f.tell() | |
| 164 | - tmp = struct.unpack('<b', f.read(1))[0] | |
| 165 | - nbit = tmp >> 3 | |
| 166 | - print '\t[HEADER] Rect Nbit:', nbit | |
| 167 | - # Curretely the nbit is static at 15. This could be modified in the | |
| 168 | - # future. If larger than 9 this will break the struct unpack. Will have | |
| 169 | - # to revist must be a more effective way to deal with bits. Tried to keep | |
| 170 | - # the algo but damn this is ugly... | |
| 171 | - f.seek(ta) | |
| 172 | - rect = struct.unpack('>Q', f.read(int(math.ceil((nbit*4)/8.0))))[0] | |
| 173 | - tmp = struct.unpack('<b', f.read(1))[0] | |
| 174 | - tmp = bin(tmp>>7)[2:].zfill(1) | |
| 175 | - # bin requires Python 2.6 or higher | |
| 176 | - # skips string '0b' and the nbit | |
| 177 | - rect = bin(rect)[7:] | |
| 178 | - xmin = int(rect[0:nbit-1],2) | |
| 179 | - print '\t[HEADER] Rect Xmin:', xmin | |
| 180 | - xmax = int(rect[nbit:(nbit*2)-1],2) | |
| 181 | - print '\t[HEADER] Rect Xmax:', xmax | |
| 182 | - ymin = int(rect[nbit*2:(nbit*3)-1],2) | |
| 183 | - print '\t[HEADER] Rect Ymin:', ymin | |
| 184 | - # one bit needs to be added, my math might be off here | |
| 185 | - ymax = int(rect[nbit*3:(nbit*4)-1] + str(tmp) ,2) | |
| 186 | - print '\t[HEADER] Rect Ymax:', ymax | |
| 187 | - framerate = struct.unpack('<H', f.read(2))[0] | |
| 188 | - print '\t[HEADER] Frame Rate:', framerate | |
| 189 | - framecount = struct.unpack('<H', f.read(2))[0] | |
| 190 | - print '\t[HEADER] Frame Count:', framecount | |
| 191 | - | |
| 192 | -def walk4SWF(path): | |
| 193 | - # returns a list of [folder-path, [addr1,addrw2]] | |
| 194 | - # Don't ask, will come back to this code. | |
| 195 | - p = ['',[]] | |
| 196 | - r = p*0 | |
| 197 | - if os.path.isdir(path) != True and path != '': | |
| 198 | - print '\t[ERROR] walk4SWF path must be a dir.' | |
| 199 | - return | |
| 200 | - for root, dirs, files in os.walk(path): | |
| 201 | - for name in files: | |
| 202 | - try: | |
| 203 | - x = open(os.path.join(root, name), 'rb') | |
| 204 | - except: | |
| 205 | - pass | |
| 206 | - break | |
| 207 | - y = findSWF(x) | |
| 208 | - if len(y) != 0: | |
| 209 | - # Path of file SWF | |
| 210 | - p[0] = os.path.join(root, name) | |
| 211 | - # contains list of the file offset of SWF header | |
| 212 | - p[1] = y | |
| 213 | - r.insert(len(r),p) | |
| 214 | - p = ['',[]] | |
| 215 | - y = '' | |
| 216 | - x.close() | |
| 217 | - return r | |
| 218 | - | |
| 219 | -def tagsInfo(f): | |
| 220 | - return | |
| 221 | - | |
| 222 | -def fileExist(n, ext): | |
| 223 | - # Checks the working dir to see if the file is | |
| 224 | - # already in the dir. If exists the file will | |
| 225 | - # be named name.count.ext (n.c.ext). No more than | |
| 226 | - # 50 matching MD5s will be written to the dir. | |
| 227 | - if os.path.exists( n + '.' + ext): | |
| 228 | - c = 2 | |
| 229 | - while os.path.exists(n + '.' + str(c) + '.' + ext): | |
| 230 | - c = c + 1 | |
| 231 | - if c == 50: | |
| 232 | - print '\t[ERROR] Skipped 50 Matching MD5 SWFs' | |
| 233 | - break | |
| 234 | - n = n + '.' + str(c) | |
| 235 | - | |
| 236 | - return n + '.' + ext | |
| 237 | - | |
| 238 | -def CWSize(f): | |
| 239 | - # The file size in the header is of the uncompressed SWF. | |
| 240 | - # To estimate the size of the compressed data, we can grab | |
| 241 | - # the length, read that amount, deflate the data, then | |
| 242 | - # compress the data again, and then call len(). This will | |
| 243 | - # give us the length of the compressed SWF. | |
| 244 | - return | |
| 245 | - | |
| 246 | -def compressSWF(f): | |
| 247 | - if type(f) is str: | |
| 248 | - f = StringIO(f) | |
| 249 | - try: | |
| 250 | - f.read(3) | |
| 251 | - tmp = 'CWS' + f.read(5) + zlib.compress(f.read()) | |
| 252 | - return tmp | |
| 253 | - except: | |
| 254 | - pass | |
| 255 | - print '\t[ERROR] SWF Zlib Compression Failed' | |
| 256 | - return None | |
| 257 | - | |
| 258 | -def disneyland(f,filename, options): | |
| 259 | - # because this is where the magic happens | |
| 260 | - # but seriously I did the recursion part last.. | |
| 261 | - retfindSWF = findSWF(f) | |
| 262 | - f.seek(0) | |
| 263 | - print '\n[SUMMARY] %d SWF(s) in MD5:%s:%s' % ( len(retfindSWF),hashBuff(f), filename ) | |
| 264 | - # for each SWF in file | |
| 265 | - for idx, x in enumerate(retfindSWF): | |
| 266 | - print '\t[ADDR] SWF %d at %s' % (idx+1, hex(x)), | |
| 267 | - f.seek(x) | |
| 268 | - h = f.read(1) | |
| 269 | - f.seek(x) | |
| 270 | - swf = verifySWF(f,x) | |
| 271 | - if swf == None: | |
| 272 | - continue | |
| 273 | - if options.extract != None: | |
| 274 | - name = fileExist(hashBuff(swf), 'swf') | |
| 275 | - print '\t\t[FILE] Carved SWF MD5: %s' % name | |
| 276 | - try: | |
| 277 | - o = open(name, 'wb+') | |
| 278 | - except IOError, e: | |
| 279 | - print '\t[ERROR] Could Not Create %s ' % e | |
| 280 | - continue | |
| 281 | - o.write(swf) | |
| 282 | - o.close() | |
| 283 | - if options.yara != None: | |
| 284 | - yaraScan(swf) | |
| 285 | - if options.md5scan != None: | |
| 286 | - checkMD5(hashBuff(swf)) | |
| 287 | - if options.decompress != None: | |
| 288 | - name = fileExist(hashBuff(swf), 'swf') | |
| 289 | - print '\t\t[FILE] Carved SWF MD5: %s' % name | |
| 290 | - try: | |
| 291 | - o = open(name, 'wb+') | |
| 292 | - except IOError, e: | |
| 293 | - print '\t[ERROR] Could Not Create %s ' % e | |
| 294 | - continue | |
| 295 | - o.write(swf) | |
| 296 | - o.close() | |
| 297 | - if options.header != None: | |
| 298 | - headerInfo(swf) | |
| 299 | - if options.compress != None: | |
| 300 | - swf = compressSWF(swf) | |
| 301 | - if swf == None: | |
| 302 | - continue | |
| 303 | - name = fileExist(hashBuff(swf), 'swf') | |
| 304 | - print '\t\t[FILE] Compressed SWF MD5: %s' % name | |
| 305 | - try: | |
| 306 | - o = open(name, 'wb+') | |
| 307 | - except IOError, e: | |
| 308 | - print '\t[ERROR] Could Not Create %s ' % e | |
| 309 | - continue | |
| 310 | - o.write(swf) | |
| 311 | - o.close() | |
| 312 | - | |
| 313 | -def main(): | |
| 314 | - # Scenarios: | |
| 315 | - # Scan file for SWF(s) | |
| 316 | - # Scan file for SWF(s) and extract them | |
| 317 | - # Scan file for SWF(s) and scan them with Yara | |
| 318 | - # Scan file for SWF(s), extract them and scan with Yara | |
| 319 | - # Scan directory recursively for files that contain SWF(s) | |
| 320 | - # Scan directory recursively for files that contain SWF(s) and extract them | |
| 321 | - | |
| 322 | - parser = OptionParser() | |
| 323 | - usage = 'usage: %prog [options] <file.bad>' | |
| 324 | - parser = OptionParser(usage=usage) | |
| 325 | - parser.add_option('-x', '--extract', action='store_true', dest='extract', help='Extracts the embedded SWF(s), names it MD5HASH.swf & saves it in the working dir. No addition args needed') | |
| 326 | - parser.add_option('-y', '--yara', action='store_true', dest='yara', help='Scans the SWF(s) with yara. If the SWF(s) is compressed it will be deflated. No addition args needed') | |
| 327 | - parser.add_option('-s', '--md5scan', action='store_true', dest='md5scan', help='Scans the SWF(s) for MD5 signatures. Please see func checkMD5 to define hashes. No addition args needed') | |
| 328 | - parser.add_option('-H', '--header', action='store_true', dest='header', help='Displays the SWFs file header. No addition args needed') | |
| 329 | - parser.add_option('-d', '--decompress', action='store_true', dest='decompress', help='Deflates compressed SWFS(s)') | |
| 330 | - parser.add_option('-r', '--recdir', dest='PATH', type='string', help='Will recursively scan a directory for files that contain SWFs. Must provide path in quotes') | |
| 331 | - parser.add_option('-c', '--compress', action='store_true', dest='compress', help='Compresses the SWF using Zlib') | |
| 332 | - | |
| 333 | - (options, args) = parser.parse_args() | |
| 334 | - | |
| 335 | - # Print help if no argurments are passed | |
| 336 | - if len(sys.argv) < 2: | |
| 337 | - parser.print_help() | |
| 338 | - return | |
| 339 | - | |
| 340 | - # Note files can't start with '-' | |
| 341 | - if '-' in sys.argv[len(sys.argv)-1][0] and options.PATH == None: | |
| 342 | - parser.print_help() | |
| 343 | - return | |
| 344 | - | |
| 345 | - # Recusive Search | |
| 346 | - if options.PATH != None: | |
| 347 | - paths = walk4SWF(options.PATH) | |
| 348 | - for y in paths: | |
| 349 | - #if sys.argv[0] not in y[0]: | |
| 350 | - try: | |
| 351 | - t = open(y[0], 'rb+') | |
| 352 | - disneyland(t, y[0],options) | |
| 353 | - except IOError: | |
| 354 | - pass | |
| 355 | - return | |
| 356 | - | |
| 357 | - # try to open file | |
| 358 | - try: | |
| 359 | - f = open(sys.argv[len(sys.argv)-1],'rb+') | |
| 360 | - filename = sys.argv[len(sys.argv)-1] | |
| 361 | - except Exception: | |
| 362 | - print '[ERROR] File can not be opended/accessed' | |
| 363 | - return | |
| 364 | - | |
| 365 | - disneyland(f,filename,options) | |
| 366 | - f.close() | |
| 367 | - return | |
| 368 | - | |
| 369 | -if __name__ == '__main__': | |
| 370 | - main() | |
| 371 | - | |
| 1 | +# xxxswf.py was created by alexander dot hanel at gmail dot com | |
| 2 | +# version 0.1 | |
| 3 | +# Date - 12-07-2011 | |
| 4 | +# To do list | |
| 5 | +# - Tag Parser | |
| 6 | +# - ActionScript Decompiler | |
| 7 | + | |
| 8 | +import fnmatch | |
| 9 | +import hashlib | |
| 10 | +import imp | |
| 11 | +import math | |
| 12 | +import os | |
| 13 | +import re | |
| 14 | +import struct | |
| 15 | +import sys | |
| 16 | +import time | |
| 17 | +from StringIO import StringIO | |
| 18 | +from optparse import OptionParser | |
| 19 | +import zlib | |
| 20 | + | |
| 21 | +def checkMD5(md5): | |
| 22 | +# checks if MD5 has been seen in MD5 Dictionary | |
| 23 | +# MD5Dict contains the MD5 and the CVE | |
| 24 | +# For { 'MD5':'CVE', 'MD5-1':'CVE-1', 'MD5-2':'CVE-2'} | |
| 25 | + MD5Dict = {'c46299a5015c6d31ad5766cb49e4ab4b':'CVE-XXXX-XXXX'} | |
| 26 | + if MD5Dict.get(md5): | |
| 27 | + print('\t[BAD] MD5 Match on', MD5Dict.get(md5)) | |
| 28 | + return | |
| 29 | + | |
| 30 | +def bad(f): | |
| 31 | + for idx, x in enumerate(findSWF(f)): | |
| 32 | + tmp = verifySWF(f,x) | |
| 33 | + if tmp != None: | |
| 34 | + yaraScan(tmp) | |
| 35 | + checkMD5(hashBuff(tmp)) | |
| 36 | + return | |
| 37 | + | |
| 38 | +def yaraScan(d): | |
| 39 | +# d = buffer of the read file | |
| 40 | +# Scans SWF using Yara | |
| 41 | + # test if yara module is installed | |
| 42 | + # if not Yara can be downloaded from http://code.google.com/p/yara-project/ | |
| 43 | + try: | |
| 44 | + imp.find_module('yara') | |
| 45 | + import yara | |
| 46 | + except ImportError: | |
| 47 | + print('\t[ERROR] Yara module not installed - aborting scan') | |
| 48 | + return | |
| 49 | + # test for yara compile errors | |
| 50 | + try: | |
| 51 | + r = yara.compile(r'rules.yar') | |
| 52 | + except: | |
| 53 | + pass | |
| 54 | + print('\t[ERROR] Yara compile error - aborting scan') | |
| 55 | + return | |
| 56 | + # get matches | |
| 57 | + m = r.match(data=d) | |
| 58 | + # print matches | |
| 59 | + for X in m: | |
| 60 | + print('\t[BAD] Yara Signature Hit: %s' % X) | |
| 61 | + return | |
| 62 | + | |
| 63 | +def findSWF(d): | |
| 64 | +# d = buffer of the read file | |
| 65 | +# Search for SWF Header Sigs in files | |
| 66 | + return [tmp.start() for tmp in re.finditer('CWS|FWS', d.read())] | |
| 67 | + | |
| 68 | +def hashBuff(d): | |
| 69 | +# d = buffer of the read file | |
| 70 | +# This function hashes the buffer | |
| 71 | +# source: http://stackoverflow.com/q/5853830 | |
| 72 | + if type(d) is str: | |
| 73 | + d = StringIO(d) | |
| 74 | + md5 = hashlib.md5() | |
| 75 | + while True: | |
| 76 | + data = d.read(128) | |
| 77 | + if not data: | |
| 78 | + break | |
| 79 | + md5.update(data) | |
| 80 | + return md5.hexdigest() | |
| 81 | + | |
| 82 | +def verifySWF(f,addr): | |
| 83 | + # Start of SWF | |
| 84 | + f.seek(addr) | |
| 85 | + # Read Header | |
| 86 | + header = f.read(3) | |
| 87 | + # Read Version | |
| 88 | + ver = struct.unpack('<b', f.read(1))[0] | |
| 89 | + # Read SWF Size | |
| 90 | + size = struct.unpack('<i', f.read(4))[0] | |
| 91 | + # Start of SWF | |
| 92 | + f.seek(addr) | |
| 93 | + try: | |
| 94 | + # Read SWF into buffer. If compressed read uncompressed size. | |
| 95 | + t = f.read(size) | |
| 96 | + except: | |
| 97 | + pass | |
| 98 | + # Error check for invalid SWF | |
| 99 | + print(' - [ERROR] Invalid SWF Size') | |
| 100 | + return None | |
| 101 | + if type(t) is str: | |
| 102 | + f = StringIO(t) | |
| 103 | + # Error check for version above 20 | |
| 104 | + if ver > 20: | |
| 105 | + print(' - [ERROR] Invalid SWF Version') | |
| 106 | + return None | |
| 107 | + | |
| 108 | + if 'CWS' in header: | |
| 109 | + try: | |
| 110 | + f.read(3) | |
| 111 | + tmp = 'FWS' + f.read(5) + zlib.decompress(f.read()) | |
| 112 | + print(' - CWS Header') | |
| 113 | + return tmp | |
| 114 | + | |
| 115 | + except: | |
| 116 | + pass | |
| 117 | + print('- [ERROR]: Zlib decompression error. Invalid CWS SWF') | |
| 118 | + return None | |
| 119 | + | |
| 120 | + elif 'FWS' in header: | |
| 121 | + try: | |
| 122 | + tmp = f.read(size) | |
| 123 | + print(' - FWS Header') | |
| 124 | + return tmp | |
| 125 | + | |
| 126 | + except: | |
| 127 | + pass | |
| 128 | + print(' - [ERROR] Invalid SWF Size') | |
| 129 | + return None | |
| 130 | + | |
| 131 | + else: | |
| 132 | + print(' - [Error] Logic Error Blame Programmer') | |
| 133 | + return None | |
| 134 | + | |
| 135 | +def headerInfo(f): | |
| 136 | +# f is the already opended file handle | |
| 137 | +# Yes, the format is is a rip off SWFDump. Can you blame me? Their tool is awesome. | |
| 138 | + # SWFDump FORMAT | |
| 139 | + # [HEADER] File version: 8 | |
| 140 | + # [HEADER] File is zlib compressed. Ratio: 52% | |
| 141 | + # [HEADER] File size: 37536 | |
| 142 | + # [HEADER] Frame rate: 18.000000 | |
| 143 | + # [HEADER] Frame count: 323 | |
| 144 | + # [HEADER] Movie width: 217.00 | |
| 145 | + # [HEADER] Movie height: 85.00 | |
| 146 | + if type(f) is str: | |
| 147 | + f = StringIO(f) | |
| 148 | + sig = f.read(3) | |
| 149 | + print('\t[HEADER] File header: %s' % sig) | |
| 150 | + if 'C' in sig: | |
| 151 | + print('\t[HEADER] File is zlib compressed.') | |
| 152 | + version = struct.unpack('<b', f.read(1))[0] | |
| 153 | + print('\t[HEADER] File version: %d' % version) | |
| 154 | + size = struct.unpack('<i', f.read(4))[0] | |
| 155 | + print('\t[HEADER] File size: %d' % size) | |
| 156 | + # deflate compressed SWF | |
| 157 | + if 'C' in sig: | |
| 158 | + f = verifySWF(f,0) | |
| 159 | + if type(f) is str: | |
| 160 | + f = StringIO(f) | |
| 161 | + f.seek(0, 0) | |
| 162 | + x = f.read(8) | |
| 163 | + ta = f.tell() | |
| 164 | + tmp = struct.unpack('<b', f.read(1))[0] | |
| 165 | + nbit = tmp >> 3 | |
| 166 | + print('\t[HEADER] Rect Nbit: %d' % nbit) | |
| 167 | + # Curretely the nbit is static at 15. This could be modified in the | |
| 168 | + # future. If larger than 9 this will break the struct unpack. Will have | |
| 169 | + # to revist must be a more effective way to deal with bits. Tried to keep | |
| 170 | + # the algo but damn this is ugly... | |
| 171 | + f.seek(ta) | |
| 172 | + rect = struct.unpack('>Q', f.read(int(math.ceil((nbit*4)/8.0))))[0] | |
| 173 | + tmp = struct.unpack('<b', f.read(1))[0] | |
| 174 | + tmp = bin(tmp>>7)[2:].zfill(1) | |
| 175 | + # bin requires Python 2.6 or higher | |
| 176 | + # skips string '0b' and the nbit | |
| 177 | + rect = bin(rect)[7:] | |
| 178 | + xmin = int(rect[0:nbit-1],2) | |
| 179 | + print('\t[HEADER] Rect Xmin: %d' % xmin) | |
| 180 | + xmax = int(rect[nbit:(nbit*2)-1],2) | |
| 181 | + print('\t[HEADER] Rect Xmax: %d' % xmax) | |
| 182 | + ymin = int(rect[nbit*2:(nbit*3)-1],2) | |
| 183 | + print('\t[HEADER] Rect Ymin: %d' % ymin) | |
| 184 | + # one bit needs to be added, my math might be off here | |
| 185 | + ymax = int(rect[nbit*3:(nbit*4)-1] + str(tmp) ,2) | |
| 186 | + print('\t[HEADER] Rect Ymax: %d' % ymax) | |
| 187 | + framerate = struct.unpack('<H', f.read(2))[0] | |
| 188 | + print('\t[HEADER] Frame Rate: %d' % framerate) | |
| 189 | + framecount = struct.unpack('<H', f.read(2))[0] | |
| 190 | + print('\t[HEADER] Frame Count: %d' % framecount) | |
| 191 | + | |
| 192 | +def walk4SWF(path): | |
| 193 | + # returns a list of [folder-path, [addr1,addrw2]] | |
| 194 | + # Don't ask, will come back to this code. | |
| 195 | + p = ['',[]] | |
| 196 | + r = p*0 | |
| 197 | + if os.path.isdir(path) != True and path != '': | |
| 198 | + print('\t[ERROR] walk4SWF path must be a dir.') | |
| 199 | + return | |
| 200 | + for root, dirs, files in os.walk(path): | |
| 201 | + for name in files: | |
| 202 | + try: | |
| 203 | + x = open(os.path.join(root, name), 'rb') | |
| 204 | + except: | |
| 205 | + pass | |
| 206 | + break | |
| 207 | + y = findSWF(x) | |
| 208 | + if len(y) != 0: | |
| 209 | + # Path of file SWF | |
| 210 | + p[0] = os.path.join(root, name) | |
| 211 | + # contains list of the file offset of SWF header | |
| 212 | + p[1] = y | |
| 213 | + r.insert(len(r),p) | |
| 214 | + p = ['',[]] | |
| 215 | + y = '' | |
| 216 | + x.close() | |
| 217 | + return r | |
| 218 | + | |
| 219 | +def tagsInfo(f): | |
| 220 | + return | |
| 221 | + | |
| 222 | +def fileExist(n, ext): | |
| 223 | + # Checks the working dir to see if the file is | |
| 224 | + # already in the dir. If exists the file will | |
| 225 | + # be named name.count.ext (n.c.ext). No more than | |
| 226 | + # 50 matching MD5s will be written to the dir. | |
| 227 | + if os.path.exists( n + '.' + ext): | |
| 228 | + c = 2 | |
| 229 | + while os.path.exists(n + '.' + str(c) + '.' + ext): | |
| 230 | + c = c + 1 | |
| 231 | + if c == 50: | |
| 232 | + print('\t[ERROR] Skipped 50 Matching MD5 SWFs') | |
| 233 | + break | |
| 234 | + n = n + '.' + str(c) | |
| 235 | + | |
| 236 | + return n + '.' + ext | |
| 237 | + | |
| 238 | +def CWSize(f): | |
| 239 | + # The file size in the header is of the uncompressed SWF. | |
| 240 | + # To estimate the size of the compressed data, we can grab | |
| 241 | + # the length, read that amount, deflate the data, then | |
| 242 | + # compress the data again, and then call len(). This will | |
| 243 | + # give us the length of the compressed SWF. | |
| 244 | + return | |
| 245 | + | |
| 246 | +def compressSWF(f): | |
| 247 | + if type(f) is str: | |
| 248 | + f = StringIO(f) | |
| 249 | + try: | |
| 250 | + f.read(3) | |
| 251 | + tmp = 'CWS' + f.read(5) + zlib.compress(f.read()) | |
| 252 | + return tmp | |
| 253 | + except: | |
| 254 | + pass | |
| 255 | + print('\t[ERROR] SWF Zlib Compression Failed') | |
| 256 | + return None | |
| 257 | + | |
| 258 | +def disneyland(f,filename, options): | |
| 259 | + # because this is where the magic happens | |
| 260 | + # but seriously I did the recursion part last.. | |
| 261 | + retfindSWF = findSWF(f) | |
| 262 | + f.seek(0) | |
| 263 | + print('\n[SUMMARY] %d SWF(s) in MD5:%s:%s' % ( len(retfindSWF),hashBuff(f), filename )) | |
| 264 | + # for each SWF in file | |
| 265 | + for idx, x in enumerate(retfindSWF): | |
| 266 | + print('\t[ADDR] SWF %d at %s' % (idx+1, hex(x))) | |
| 267 | + f.seek(x) | |
| 268 | + h = f.read(1) | |
| 269 | + f.seek(x) | |
| 270 | + swf = verifySWF(f,x) | |
| 271 | + if swf == None: | |
| 272 | + continue | |
| 273 | + if options.extract != None: | |
| 274 | + name = fileExist(hashBuff(swf), 'swf') | |
| 275 | + print('\t\t[FILE] Carved SWF MD5: %s' % name) | |
| 276 | + try: | |
| 277 | + o = open(name, 'wb+') | |
| 278 | + except IOError as e: | |
| 279 | + print('\t[ERROR] Could Not Create %s ' % e) | |
| 280 | + continue | |
| 281 | + o.write(swf) | |
| 282 | + o.close() | |
| 283 | + if options.yara != None: | |
| 284 | + yaraScan(swf) | |
| 285 | + if options.md5scan != None: | |
| 286 | + checkMD5(hashBuff(swf)) | |
| 287 | + if options.decompress != None: | |
| 288 | + name = fileExist(hashBuff(swf), 'swf') | |
| 289 | + print('\t\t[FILE] Carved SWF MD5: %s' % name) | |
| 290 | + try: | |
| 291 | + o = open(name, 'wb+') | |
| 292 | + except IOError as e: | |
| 293 | + print('\t[ERROR] Could Not Create %s ' % e) | |
| 294 | + continue | |
| 295 | + o.write(swf) | |
| 296 | + o.close() | |
| 297 | + if options.header != None: | |
| 298 | + headerInfo(swf) | |
| 299 | + if options.compress != None: | |
| 300 | + swf = compressSWF(swf) | |
| 301 | + if swf == None: | |
| 302 | + continue | |
| 303 | + name = fileExist(hashBuff(swf), 'swf') | |
| 304 | + print('\t\t[FILE] Compressed SWF MD5: %s' % name) | |
| 305 | + try: | |
| 306 | + o = open(name, 'wb+') | |
| 307 | + except IOError as e: | |
| 308 | + print('\t[ERROR] Could Not Create %s ' % e) | |
| 309 | + continue | |
| 310 | + o.write(swf) | |
| 311 | + o.close() | |
| 312 | + | |
| 313 | +def main(): | |
| 314 | + # Scenarios: | |
| 315 | + # Scan file for SWF(s) | |
| 316 | + # Scan file for SWF(s) and extract them | |
| 317 | + # Scan file for SWF(s) and scan them with Yara | |
| 318 | + # Scan file for SWF(s), extract them and scan with Yara | |
| 319 | + # Scan directory recursively for files that contain SWF(s) | |
| 320 | + # Scan directory recursively for files that contain SWF(s) and extract them | |
| 321 | + | |
| 322 | + parser = OptionParser() | |
| 323 | + usage = 'usage: %prog [options] <file.bad>' | |
| 324 | + parser = OptionParser(usage=usage) | |
| 325 | + parser.add_option('-x', '--extract', action='store_true', dest='extract', help='Extracts the embedded SWF(s), names it MD5HASH.swf & saves it in the working dir. No addition args needed') | |
| 326 | + parser.add_option('-y', '--yara', action='store_true', dest='yara', help='Scans the SWF(s) with yara. If the SWF(s) is compressed it will be deflated. No addition args needed') | |
| 327 | + parser.add_option('-s', '--md5scan', action='store_true', dest='md5scan', help='Scans the SWF(s) for MD5 signatures. Please see func checkMD5 to define hashes. No addition args needed') | |
| 328 | + parser.add_option('-H', '--header', action='store_true', dest='header', help='Displays the SWFs file header. No addition args needed') | |
| 329 | + parser.add_option('-d', '--decompress', action='store_true', dest='decompress', help='Deflates compressed SWFS(s)') | |
| 330 | + parser.add_option('-r', '--recdir', dest='PATH', type='string', help='Will recursively scan a directory for files that contain SWFs. Must provide path in quotes') | |
| 331 | + parser.add_option('-c', '--compress', action='store_true', dest='compress', help='Compresses the SWF using Zlib') | |
| 332 | + | |
| 333 | + (options, args) = parser.parse_args() | |
| 334 | + | |
| 335 | + # Print help if no argurments are passed | |
| 336 | + if len(sys.argv) < 2: | |
| 337 | + parser.print_help() | |
| 338 | + return | |
| 339 | + | |
| 340 | + # Note files can't start with '-' | |
| 341 | + if '-' in sys.argv[len(sys.argv)-1][0] and options.PATH == None: | |
| 342 | + parser.print_help() | |
| 343 | + return | |
| 344 | + | |
| 345 | + # Recusive Search | |
| 346 | + if options.PATH != None: | |
| 347 | + paths = walk4SWF(options.PATH) | |
| 348 | + for y in paths: | |
| 349 | + #if sys.argv[0] not in y[0]: | |
| 350 | + try: | |
| 351 | + t = open(y[0], 'rb+') | |
| 352 | + disneyland(t, y[0],options) | |
| 353 | + except IOError: | |
| 354 | + pass | |
| 355 | + return | |
| 356 | + | |
| 357 | + # try to open file | |
| 358 | + try: | |
| 359 | + f = open(sys.argv[len(sys.argv)-1],'rb+') | |
| 360 | + filename = sys.argv[len(sys.argv)-1] | |
| 361 | + except Exception: | |
| 362 | + print('[ERROR] File can not be opended/accessed') | |
| 363 | + return | |
| 364 | + | |
| 365 | + disneyland(f,filename,options) | |
| 366 | + f.close() | |
| 367 | + return | |
| 368 | + | |
| 369 | +if __name__ == '__main__': | |
| 370 | + main() | |
| 371 | + | ... | ... |