Commit 6416b39aaa18b2efa23671bfdc5483c206ebfbf8
1 parent
1cf591dd
rtfobj, oleobj: fixed Python 2.6+2.7+3.x support
Showing
2 changed files
with
327 additions
and
149 deletions
oletools/oleobj.py
| 1 | #!/usr/bin/env python | 1 | #!/usr/bin/env python |
| 2 | +from __future__ import print_function | ||
| 2 | """ | 3 | """ |
| 3 | oleobj.py | 4 | oleobj.py |
| 4 | 5 | ||
| @@ -14,7 +15,7 @@ http://www.decalage.info/python/oletools | @@ -14,7 +15,7 @@ http://www.decalage.info/python/oletools | ||
| 14 | 15 | ||
| 15 | # === LICENSE ================================================================== | 16 | # === LICENSE ================================================================== |
| 16 | 17 | ||
| 17 | -# oleobj is copyright (c) 2015 Philippe Lagadec (http://www.decalage.info) | 18 | +# oleobj is copyright (c) 2015-2016 Philippe Lagadec (http://www.decalage.info) |
| 18 | # All rights reserved. | 19 | # All rights reserved. |
| 19 | # | 20 | # |
| 20 | # Redistribution and use in source and binary forms, with or without modification, | 21 | # Redistribution and use in source and binary forms, with or without modification, |
| @@ -41,8 +42,11 @@ http://www.decalage.info/python/oletools | @@ -41,8 +42,11 @@ http://www.decalage.info/python/oletools | ||
| 41 | #------------------------------------------------------------------------------ | 42 | #------------------------------------------------------------------------------ |
| 42 | # CHANGELOG: | 43 | # CHANGELOG: |
| 43 | # 2015-12-05 v0.01 PL: - first version | 44 | # 2015-12-05 v0.01 PL: - first version |
| 45 | +# 2016-06 PL: - added main and process_file (not working yet) | ||
| 46 | +# 2016-07-18 v0.48 SL: - added Python 3.5 support | ||
| 47 | +# 2016-07-19 PL: - fixed Python 2.6-7 support | ||
| 44 | 48 | ||
| 45 | -__version__ = '0.01' | 49 | +__version__ = '0.48' |
| 46 | 50 | ||
| 47 | #------------------------------------------------------------------------------ | 51 | #------------------------------------------------------------------------------ |
| 48 | # TODO: | 52 | # TODO: |
| @@ -62,8 +66,10 @@ __version__ = '0.01' | @@ -62,8 +66,10 @@ __version__ = '0.01' | ||
| 62 | 66 | ||
| 63 | #--- IMPORTS ------------------------------------------------------------------ | 67 | #--- IMPORTS ------------------------------------------------------------------ |
| 64 | 68 | ||
| 65 | -import logging, struct | 69 | +import logging, struct, optparse, os, re, sys |
| 66 | 70 | ||
| 71 | +from thirdparty.olefile import olefile | ||
| 72 | +from thirdparty.xglob import xglob | ||
| 67 | 73 | ||
| 68 | # === LOGGING ================================================================= | 74 | # === LOGGING ================================================================= |
| 69 | 75 | ||
| @@ -107,6 +113,18 @@ def get_logger(name, level=logging.CRITICAL+1): | @@ -107,6 +113,18 @@ def get_logger(name, level=logging.CRITICAL+1): | ||
| 107 | log = get_logger('oleobj') | 113 | log = get_logger('oleobj') |
| 108 | 114 | ||
| 109 | 115 | ||
| 116 | +# === CONSTANTS ============================================================== | ||
| 117 | + | ||
| 118 | +# some str methods on Python 2.x return characters, | ||
| 119 | +# while the equivalent bytes methods return integers on Python 3.x: | ||
| 120 | +if sys.version_info[0] <= 2: | ||
| 121 | + # Python 2.x | ||
| 122 | + NULL_CHAR = '\x00' | ||
| 123 | +else: | ||
| 124 | + # Python 3.x | ||
| 125 | + NULL_CHAR = 0 | ||
| 126 | + | ||
| 127 | + | ||
| 110 | # === GLOBAL VARIABLES ======================================================= | 128 | # === GLOBAL VARIABLES ======================================================= |
| 111 | 129 | ||
| 112 | # struct to parse an unsigned integer of 32 bits: | 130 | # struct to parse an unsigned integer of 32 bits: |
| @@ -162,7 +180,7 @@ def read_LengthPrefixedAnsiString(data): | @@ -162,7 +180,7 @@ def read_LengthPrefixedAnsiString(data): | ||
| 162 | ansi_string = data[:length-1] | 180 | ansi_string = data[:length-1] |
| 163 | # TODO: only in strict mode: | 181 | # TODO: only in strict mode: |
| 164 | # check the presence of the null char: | 182 | # check the presence of the null char: |
| 165 | - assert data[length] == 0 | 183 | + assert data[length] == NULL_CHAR |
| 166 | new_data = data[length:] | 184 | new_data = data[length:] |
| 167 | return (ansi_string, new_data) | 185 | return (ansi_string, new_data) |
| 168 | 186 | ||
| @@ -285,3 +303,149 @@ class OleObject (object): | @@ -285,3 +303,149 @@ class OleObject (object): | ||
| 285 | self.data = data[:self.data_size] | 303 | self.data = data[:self.data_size] |
| 286 | assert len(self.data) == self.data_size | 304 | assert len(self.data) == self.data_size |
| 287 | self.extra_data = data[self.data_size:] | 305 | self.extra_data = data[self.data_size:] |
| 306 | + | ||
| 307 | + | ||
| 308 | + | ||
| 309 | +def sanitize_filename(filename, replacement='_', max_length=200): | ||
| 310 | + """compute basename of filename. Replaces all non-whitelisted characters. | ||
| 311 | + The returned filename is always a basename of the file.""" | ||
| 312 | + basepath = os.path.basename(filename).strip() | ||
| 313 | + sane_fname = re.sub(r'[^\w\.\- ]', replacement, basepath) | ||
| 314 | + | ||
| 315 | + while ".." in sane_fname: | ||
| 316 | + sane_fname = sane_fname.replace('..', '.') | ||
| 317 | + | ||
| 318 | + while " " in sane_fname: | ||
| 319 | + sane_fname = sane_fname.replace(' ', ' ') | ||
| 320 | + | ||
| 321 | + if not len(filename): | ||
| 322 | + sane_fname = 'NONAME' | ||
| 323 | + | ||
| 324 | + # limit filename length | ||
| 325 | + if max_length: | ||
| 326 | + sane_fname = sane_fname[:max_length] | ||
| 327 | + | ||
| 328 | + return sane_fname | ||
| 329 | + | ||
| 330 | + | ||
| 331 | +def process_file(container, filename, data, output_dir=None): | ||
| 332 | + if output_dir: | ||
| 333 | + if not os.path.isdir(output_dir): | ||
| 334 | + log.info('creating output directory %s' % output_dir) | ||
| 335 | + os.mkdir(output_dir) | ||
| 336 | + | ||
| 337 | + fname_prefix = os.path.join(output_dir, | ||
| 338 | + sanitize_filename(filename)) | ||
| 339 | + else: | ||
| 340 | + base_dir = os.path.dirname(filename) | ||
| 341 | + sane_fname = sanitize_filename(filename) | ||
| 342 | + fname_prefix = os.path.join(base_dir, sane_fname) | ||
| 343 | + | ||
| 344 | + # TODO: option to extract objects to files (false by default) | ||
| 345 | + if data is None: | ||
| 346 | + data = open(filename, 'rb').read() | ||
| 347 | + print ('-'*79) | ||
| 348 | + print ('File: %r - %d bytes' % (filename, len(data))) | ||
| 349 | + ole = olefile.OleFileIO(data) | ||
| 350 | + index = 1 | ||
| 351 | + for stream in ole.listdir(): | ||
| 352 | + objdata = ole.openstream(stream).read() | ||
| 353 | + stream_path = '/'.join(stream) | ||
| 354 | + log.debug('Checking stream %r' % stream_path) | ||
| 355 | + obj = OleObject() | ||
| 356 | + try: | ||
| 357 | + obj.parse(objdata) | ||
| 358 | + print('extract file embedded in OLE object from stream %r:' % stream_path) | ||
| 359 | + print('format_id = %d' % obj.format_id) | ||
| 360 | + print('class name = %r' % obj.class_name) | ||
| 361 | + print('data size = %d' % obj.data_size) | ||
| 362 | + # set a file extension according to the class name: | ||
| 363 | + class_name = obj.class_name.lower() | ||
| 364 | + if class_name.startswith('word'): | ||
| 365 | + ext = 'doc' | ||
| 366 | + elif class_name.startswith('package'): | ||
| 367 | + ext = 'package' | ||
| 368 | + else: | ||
| 369 | + ext = 'bin' | ||
| 370 | + | ||
| 371 | + fname = '%s_object_%03d.%s' % (fname_prefix, index, ext) | ||
| 372 | + print ('saving to file %s' % fname) | ||
| 373 | + open(fname, 'wb').write(obj.data) | ||
| 374 | + if obj.class_name.lower() == 'package': | ||
| 375 | + print ('Parsing OLE Package') | ||
| 376 | + opkg = OleNativeStream(bindata=obj.data) | ||
| 377 | + print ('Filename = %r' % opkg.filename) | ||
| 378 | + print ('Source path = %r' % opkg.src_path) | ||
| 379 | + print ('Temp path = %r' % opkg.temp_path) | ||
| 380 | + if opkg.filename: | ||
| 381 | + fname = '%s_%s' % (fname_prefix, | ||
| 382 | + sanitize_filename(opkg.filename)) | ||
| 383 | + else: | ||
| 384 | + fname = '%s_object_%03d.noname' % (fname_prefix, index) | ||
| 385 | + print ('saving to file %s' % fname) | ||
| 386 | + open(fname, 'wb').write(opkg.data) | ||
| 387 | + index += 1 | ||
| 388 | + except: | ||
| 389 | + log.info('*** Not an OLE 1.0 Object') | ||
| 390 | + | ||
| 391 | + | ||
| 392 | + | ||
| 393 | +#=== MAIN ================================================================= | ||
| 394 | + | ||
| 395 | +if __name__ == '__main__': | ||
| 396 | + # print banner with version | ||
| 397 | + print ('oleobj %s - http://decalage.info/oletools' % __version__) | ||
| 398 | + print ('THIS IS WORK IN PROGRESS - Check updates regularly!') | ||
| 399 | + print ('Please report any issue at https://github.com/decalage2/oletools/issues') | ||
| 400 | + print ('') | ||
| 401 | + | ||
| 402 | + DEFAULT_LOG_LEVEL = "warning" # Default log level | ||
| 403 | + LOG_LEVELS = {'debug': logging.DEBUG, | ||
| 404 | + 'info': logging.INFO, | ||
| 405 | + 'warning': logging.WARNING, | ||
| 406 | + 'error': logging.ERROR, | ||
| 407 | + 'critical': logging.CRITICAL | ||
| 408 | + } | ||
| 409 | + | ||
| 410 | + usage = 'usage: %prog [options] <filename> [filename2 ...]' | ||
| 411 | + parser = optparse.OptionParser(usage=usage) | ||
| 412 | + # parser.add_option('-o', '--outfile', dest='outfile', | ||
| 413 | + # help='output file') | ||
| 414 | + # parser.add_option('-c', '--csv', dest='csv', | ||
| 415 | + # help='export results to a CSV file') | ||
| 416 | + parser.add_option("-r", action="store_true", dest="recursive", | ||
| 417 | + help='find files recursively in subdirectories.') | ||
| 418 | + parser.add_option("-d", type="str", dest="output_dir", | ||
| 419 | + help='use specified directory to output files.', default=None) | ||
| 420 | + parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None, | ||
| 421 | + help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)') | ||
| 422 | + parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', | ||
| 423 | + help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') | ||
| 424 | + parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL, | ||
| 425 | + help="logging level debug/info/warning/error/critical (default=%default)") | ||
| 426 | + | ||
| 427 | + (options, args) = parser.parse_args() | ||
| 428 | + | ||
| 429 | + # Print help if no arguments are passed | ||
| 430 | + if len(args) == 0: | ||
| 431 | + print (__doc__) | ||
| 432 | + parser.print_help() | ||
| 433 | + sys.exit() | ||
| 434 | + | ||
| 435 | + # Setup logging to the console: | ||
| 436 | + # here we use stdout instead of stderr by default, so that the output | ||
| 437 | + # can be redirected properly. | ||
| 438 | + logging.basicConfig(level=LOG_LEVELS[options.loglevel], stream=sys.stdout, | ||
| 439 | + format='%(levelname)-8s %(message)s') | ||
| 440 | + # enable logging in the modules: | ||
| 441 | + log.setLevel(logging.NOTSET) | ||
| 442 | + | ||
| 443 | + | ||
| 444 | + for container, filename, data in xglob.iter_files(args, recursive=options.recursive, | ||
| 445 | + zip_password=options.zip_password, zip_fname=options.zip_fname): | ||
| 446 | + # ignore directory names stored in zip files: | ||
| 447 | + if container and filename.endswith('/'): | ||
| 448 | + continue | ||
| 449 | + process_file(container, filename, data, options.output_dir) | ||
| 450 | + | ||
| 451 | + |
oletools/rtfobj.py
| @@ -55,18 +55,20 @@ http://www.decalage.info/python/oletools | @@ -55,18 +55,20 @@ http://www.decalage.info/python/oletools | ||
| 55 | # TJ: - sanitize filenames to avoid special characters | 55 | # TJ: - sanitize filenames to avoid special characters |
| 56 | # 2016-05-29 PL: - improved parsing, fixed issue #42 | 56 | # 2016-05-29 PL: - improved parsing, fixed issue #42 |
| 57 | # 2016-07-13 v0.48 PL: - new RtfParser and RtfObjParser classes | 57 | # 2016-07-13 v0.48 PL: - new RtfParser and RtfObjParser classes |
| 58 | +# 2016-07-18 SL: - added Python 3.5 support | ||
| 59 | +# 2016-07-19 PL: - fixed Python 2.6-2.7 support | ||
| 58 | 60 | ||
| 59 | __version__ = '0.48' | 61 | __version__ = '0.48' |
| 60 | 62 | ||
| 61 | -#------------------------------------------------------------------------------ | 63 | +# ------------------------------------------------------------------------------ |
| 62 | # TODO: | 64 | # TODO: |
| 63 | # - allow semicolon within hex, as found in this sample: | 65 | # - allow semicolon within hex, as found in this sample: |
| 64 | # http://contagiodump.blogspot.nl/2011/10/sep-28-cve-2010-3333-manuscript-with.html | 66 | # http://contagiodump.blogspot.nl/2011/10/sep-28-cve-2010-3333-manuscript-with.html |
| 65 | 67 | ||
| 66 | 68 | ||
| 67 | -#=== IMPORTS ================================================================= | 69 | +# === IMPORTS ================================================================= |
| 68 | 70 | ||
| 69 | -import re, os, sys, string, binascii, logging, optparse | 71 | +import re, os, sys, binascii, logging, optparse |
| 70 | 72 | ||
| 71 | from thirdparty.xglob import xglob | 73 | from thirdparty.xglob import xglob |
| 72 | from oleobj import OleObject, OleNativeStream | 74 | from oleobj import OleObject, OleNativeStream |
| @@ -120,7 +122,7 @@ log = get_logger('rtfobj') | @@ -120,7 +122,7 @@ log = get_logger('rtfobj') | ||
| 120 | # REGEX pattern to extract embedded OLE objects in hexadecimal format: | 122 | # REGEX pattern to extract embedded OLE objects in hexadecimal format: |
| 121 | 123 | ||
| 122 | # alphanum digit: [0-9A-Fa-f] | 124 | # alphanum digit: [0-9A-Fa-f] |
| 123 | -HEX_DIGIT = rb'[0-9A-Fa-f]' | 125 | +HEX_DIGIT = b'[0-9A-Fa-f]' |
| 124 | 126 | ||
| 125 | # hex char = two alphanum digits: [0-9A-Fa-f]{2} | 127 | # hex char = two alphanum digits: [0-9A-Fa-f]{2} |
| 126 | # HEX_CHAR = r'[0-9A-Fa-f]{2}' | 128 | # HEX_CHAR = r'[0-9A-Fa-f]{2}' |
| @@ -130,11 +132,11 @@ HEX_DIGIT = rb'[0-9A-Fa-f]' | @@ -130,11 +132,11 @@ HEX_DIGIT = rb'[0-9A-Fa-f]' | ||
| 130 | # AND the tags can be nested... | 132 | # AND the tags can be nested... |
| 131 | #SINGLE_RTF_TAG = r'[{][^{}]*[}]' | 133 | #SINGLE_RTF_TAG = r'[{][^{}]*[}]' |
| 132 | # Actually RTF tags may contain braces escaped with backslash (\{ \}): | 134 | # Actually RTF tags may contain braces escaped with backslash (\{ \}): |
| 133 | -SINGLE_RTF_TAG = rb'[{](?:\\.|[^{}\])*[}]' | 135 | +SINGLE_RTF_TAG = b'[{](?:\\\\.|[^{}\\\])*[}]' |
| 134 | 136 | ||
| 135 | # Nested tags, two levels (because Python's re does not support nested matching): | 137 | # Nested tags, two levels (because Python's re does not support nested matching): |
| 136 | # NESTED_RTF_TAG = r'[{](?:[^{}]|'+SINGLE_RTF_TAG+r')*[}]' | 138 | # NESTED_RTF_TAG = r'[{](?:[^{}]|'+SINGLE_RTF_TAG+r')*[}]' |
| 137 | -NESTED_RTF_TAG = rb'[{](?:\\.|[^{}\]|'+SINGLE_RTF_TAG+b')*[}]' | 139 | +NESTED_RTF_TAG = b'[{](?:\\\\.|[^{}\\\]|'+SINGLE_RTF_TAG+b')*[}]' |
| 138 | 140 | ||
| 139 | # AND it is also allowed to insert ANY control word or control symbol (ignored) | 141 | # AND it is also allowed to insert ANY control word or control symbol (ignored) |
| 140 | # According to Rich Text Format (RTF) Specification Version 1.9.1, | 142 | # According to Rich Text Format (RTF) Specification Version 1.9.1, |
| @@ -146,7 +148,7 @@ NESTED_RTF_TAG = rb'[{](?:\\.|[^{}\\]|'+SINGLE_RTF_TAG+b')*[}]' | @@ -146,7 +148,7 @@ NESTED_RTF_TAG = rb'[{](?:\\.|[^{}\\]|'+SINGLE_RTF_TAG+b')*[}]' | ||
| 146 | # "\AnyThing " "\AnyThing123z" ""\AnyThing-456{" "\AnyThing{" | 148 | # "\AnyThing " "\AnyThing123z" ""\AnyThing-456{" "\AnyThing{" |
| 147 | # control symbol = \<any char except letter or digit> (followed by anything) | 149 | # control symbol = \<any char except letter or digit> (followed by anything) |
| 148 | 150 | ||
| 149 | -ASCII_NAME = rb'([a-zA-Z]{1,250})' | 151 | +ASCII_NAME = b'([a-zA-Z]{1,250})' |
| 150 | 152 | ||
| 151 | # using Python's re lookahead assumption: | 153 | # using Python's re lookahead assumption: |
| 152 | # (?=...) Matches if ... matches next, but doesn't consume any of the string. | 154 | # (?=...) Matches if ... matches next, but doesn't consume any of the string. |
| @@ -155,21 +157,21 @@ ASCII_NAME = rb'([a-zA-Z]{1,250})' | @@ -155,21 +157,21 @@ ASCII_NAME = rb'([a-zA-Z]{1,250})' | ||
| 155 | 157 | ||
| 156 | # TODO: Find the actual limit on the number of digits for Word | 158 | # TODO: Find the actual limit on the number of digits for Word |
| 157 | # SIGNED_INTEGER = r'(-?\d{1,250})' | 159 | # SIGNED_INTEGER = r'(-?\d{1,250})' |
| 158 | -SIGNED_INTEGER = rb'(-?\d+)' | 160 | +SIGNED_INTEGER = b'(-?\\d+)' |
| 159 | 161 | ||
| 160 | -CONTROL_WORD = rb'(?:\\' + ASCII_NAME + rb'(?:(?=[^a-zA-Z0-9-])|' + SIGNED_INTEGER + rb'(?=[^0-9])))' | 162 | +CONTROL_WORD = b'(?:\\\\' + ASCII_NAME + b'(?:(?=[^a-zA-Z0-9-])|' + SIGNED_INTEGER + b'(?=[^0-9])))' |
| 161 | 163 | ||
| 162 | re_control_word = re.compile(CONTROL_WORD) | 164 | re_control_word = re.compile(CONTROL_WORD) |
| 163 | 165 | ||
| 164 | -CONTROL_SYMBOL = rb'(?:\[^a-zA-Z0-9])' | 166 | +CONTROL_SYMBOL = b'(?:\\\[^a-zA-Z0-9])' |
| 165 | re_control_symbol = re.compile(CONTROL_SYMBOL) | 167 | re_control_symbol = re.compile(CONTROL_SYMBOL) |
| 166 | 168 | ||
| 167 | # Text that is not a control word/symbol or a group: | 169 | # Text that is not a control word/symbol or a group: |
| 168 | -TEXT = rb'[^{}\]+' | 170 | +TEXT = b'[^{}\\\]+' |
| 169 | re_text = re.compile(TEXT) | 171 | re_text = re.compile(TEXT) |
| 170 | 172 | ||
| 171 | # ignored whitespaces and tags within a hex block: | 173 | # ignored whitespaces and tags within a hex block: |
| 172 | -IGNORED = rb'(?:\s|'+NESTED_RTF_TAG+rb'|'+CONTROL_SYMBOL+rb'|'+CONTROL_WORD+rb')*' | 174 | +IGNORED = b'(?:\\s|'+NESTED_RTF_TAG+b'|'+CONTROL_SYMBOL+b'|'+CONTROL_WORD+b')*' |
| 173 | #IGNORED = r'\s*' | 175 | #IGNORED = r'\s*' |
| 174 | 176 | ||
| 175 | # HEX_CHAR = HEX_DIGIT + IGNORED + HEX_DIGIT | 177 | # HEX_CHAR = HEX_DIGIT + IGNORED + HEX_DIGIT |
| @@ -189,27 +191,24 @@ IGNORED = rb'(?:\s|'+NESTED_RTF_TAG+rb'|'+CONTROL_SYMBOL+rb'|'+CONTROL_WORD+rb') | @@ -189,27 +191,24 @@ IGNORED = rb'(?:\s|'+NESTED_RTF_TAG+rb'|'+CONTROL_SYMBOL+rb'|'+CONTROL_WORD+rb') | ||
| 189 | 191 | ||
| 190 | #TODO PATTERN = r'\b(?:' + HEX_CHAR + IGNORED + r'){4,}\b' | 192 | #TODO PATTERN = r'\b(?:' + HEX_CHAR + IGNORED + r'){4,}\b' |
| 191 | # PATTERN = r'\b(?:' + HEX_CHAR + IGNORED + r'){4,}' #+ HEX_CHAR + r'\b' | 193 | # PATTERN = r'\b(?:' + HEX_CHAR + IGNORED + r'){4,}' #+ HEX_CHAR + r'\b' |
| 192 | -PATTERN = rb'\b(?:' + HEX_DIGIT + IGNORED + rb'){7,}' + HEX_DIGIT + rb'\b' | 194 | +PATTERN = b'\\b(?:' + HEX_DIGIT + IGNORED + b'){7,}' + HEX_DIGIT + b'\\b' |
| 193 | 195 | ||
| 194 | # at least 4 hex chars, followed by whitespace or CR/LF: (?:[0-9A-Fa-f]{2}){4,}\s* | 196 | # at least 4 hex chars, followed by whitespace or CR/LF: (?:[0-9A-Fa-f]{2}){4,}\s* |
| 195 | # PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}' | 197 | # PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}' |
| 196 | # improved pattern, allowing semicolons within hex: | 198 | # improved pattern, allowing semicolons within hex: |
| 197 | #PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}' | 199 | #PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}' |
| 198 | 200 | ||
| 199 | -# a dummy translation table for str.translate, which does not change anythying: | ||
| 200 | -TRANSTABLE_NOCHANGE = bytes.maketrans(b'', b'') | ||
| 201 | - | ||
| 202 | re_hexblock = re.compile(PATTERN) | 201 | re_hexblock = re.compile(PATTERN) |
| 203 | re_embedded_tags = re.compile(IGNORED) | 202 | re_embedded_tags = re.compile(IGNORED) |
| 204 | -re_decimal = re.compile(rb'\d+') | 203 | +re_decimal = re.compile(b'\\d+') |
| 205 | 204 | ||
| 206 | -re_delimiter = re.compile(rb'[ \t\r\n\f\v]') | 205 | +re_delimiter = re.compile(b'[ \\t\\r\\n\\f\\v]') |
| 207 | 206 | ||
| 208 | -DELIMITER = rb'[ \t\r\n\f\v]' | ||
| 209 | -DELIMITERS_ZeroOrMore = rb'[ \t\r\n\f\v]*' | ||
| 210 | -BACKSLASH_BIN = rb'\\bin' | 207 | +DELIMITER = b'[ \\t\\r\\n\\f\\v]' |
| 208 | +DELIMITERS_ZeroOrMore = b'[ \\t\\r\\n\\f\\v]*' | ||
| 209 | +BACKSLASH_BIN = b'\\\\bin' | ||
| 211 | # According to my tests, Word accepts up to 250 digits (leading zeroes) | 210 | # According to my tests, Word accepts up to 250 digits (leading zeroes) |
| 212 | -DECIMAL_GROUP = rb'(\d{1,250})' | 211 | +DECIMAL_GROUP = b'(\d{1,250})' |
| 213 | 212 | ||
| 214 | re_delims_bin_decimal = re.compile(DELIMITERS_ZeroOrMore + BACKSLASH_BIN | 213 | re_delims_bin_decimal = re.compile(DELIMITERS_ZeroOrMore + BACKSLASH_BIN |
| 215 | + DECIMAL_GROUP + DELIMITER) | 214 | + DECIMAL_GROUP + DELIMITER) |
| @@ -250,6 +249,19 @@ DESTINATION_CONTROL_WORDS = frozenset(( | @@ -250,6 +249,19 @@ DESTINATION_CONTROL_WORDS = frozenset(( | ||
| 250 | )) | 249 | )) |
| 251 | 250 | ||
| 252 | 251 | ||
| 252 | +# some str methods on Python 2.x return characters, | ||
| 253 | +# while the equivalent bytes methods return integers on Python 3.x: | ||
| 254 | +if sys.version_info[0] <= 2: | ||
| 255 | + # Python 2.x - Characters (str) | ||
| 256 | + BACKSLASH = '\\' | ||
| 257 | + BRACE_OPEN = '{' | ||
| 258 | + BRACE_CLOSE = '}' | ||
| 259 | +else: | ||
| 260 | + # Python 3.x - Integers | ||
| 261 | + BACKSLASH = ord('\\') | ||
| 262 | + BRACE_OPEN = ord('{') | ||
| 263 | + BRACE_CLOSE = ord('}') | ||
| 264 | + | ||
| 253 | 265 | ||
| 254 | #=== CLASSES ================================================================= | 266 | #=== CLASSES ================================================================= |
| 255 | 267 | ||
| @@ -294,15 +306,15 @@ class RtfParser(object): | @@ -294,15 +306,15 @@ class RtfParser(object): | ||
| 294 | def parse(self): | 306 | def parse(self): |
| 295 | self.index = 0 | 307 | self.index = 0 |
| 296 | while self.index < self.size: | 308 | while self.index < self.size: |
| 297 | - if self.data[self.index] == ord('{'): | 309 | + if self.data[self.index] == BRACE_OPEN: |
| 298 | self._open_group() | 310 | self._open_group() |
| 299 | self.index += 1 | 311 | self.index += 1 |
| 300 | continue | 312 | continue |
| 301 | - if self.data[self.index] == ord('}'): | 313 | + if self.data[self.index] == BRACE_CLOSE: |
| 302 | self._close_group() | 314 | self._close_group() |
| 303 | self.index += 1 | 315 | self.index += 1 |
| 304 | continue | 316 | continue |
| 305 | - if self.data[self.index] == ord('\\'): | 317 | + if self.data[self.index] == BACKSLASH: |
| 306 | m = re_control_word.match(self.data, self.index) | 318 | m = re_control_word.match(self.data, self.index) |
| 307 | if m: | 319 | if m: |
| 308 | cword = m.group(1) | 320 | cword = m.group(1) |
| @@ -332,7 +344,7 @@ class RtfParser(object): | @@ -332,7 +344,7 @@ class RtfParser(object): | ||
| 332 | 344 | ||
| 333 | def _open_group(self): | 345 | def _open_group(self): |
| 334 | self.group_level += 1 | 346 | self.group_level += 1 |
| 335 | - log.debug('{ Open Group at index %Xh - level=%d' % (self.index, self.group_level)) | 347 | + #log.debug('{ Open Group at index %Xh - level=%d' % (self.index, self.group_level)) |
| 336 | # call user method AFTER increasing the level: | 348 | # call user method AFTER increasing the level: |
| 337 | self.open_group() | 349 | self.open_group() |
| 338 | 350 | ||
| @@ -341,19 +353,20 @@ class RtfParser(object): | @@ -341,19 +353,20 @@ class RtfParser(object): | ||
| 341 | pass | 353 | pass |
| 342 | 354 | ||
| 343 | def _close_group(self): | 355 | def _close_group(self): |
| 344 | - log.debug('} Close Group at index %Xh - level=%d' % (self.index, self.group_level)) | 356 | + #log.debug('} Close Group at index %Xh - level=%d' % (self.index, self.group_level)) |
| 345 | # call user method BEFORE decreasing the level: | 357 | # call user method BEFORE decreasing the level: |
| 346 | self.close_group() | 358 | self.close_group() |
| 347 | # if the destination level is the same as the group level, close the destination: | 359 | # if the destination level is the same as the group level, close the destination: |
| 348 | if self.group_level == self.current_destination.group_level: | 360 | if self.group_level == self.current_destination.group_level: |
| 349 | - log.debug('Current Destination %r level = %d => Close Destination' % ( | ||
| 350 | - self.current_destination.cword, self.current_destination.group_level)) | 361 | + # log.debug('Current Destination %r level = %d => Close Destination' % ( |
| 362 | + # self.current_destination.cword, self.current_destination.group_level)) | ||
| 351 | self._close_destination() | 363 | self._close_destination() |
| 352 | else: | 364 | else: |
| 353 | - log.debug('Current Destination %r level = %d => Continue with same Destination' % ( | ||
| 354 | - self.current_destination.cword, self.current_destination.group_level)) | 365 | + # log.debug('Current Destination %r level = %d => Continue with same Destination' % ( |
| 366 | + # self.current_destination.cword, self.current_destination.group_level)) | ||
| 367 | + pass | ||
| 355 | self.group_level -= 1 | 368 | self.group_level -= 1 |
| 356 | - log.debug('Decreased group level to %d' % self.group_level) | 369 | + # log.debug('Decreased group level to %d' % self.group_level) |
| 357 | 370 | ||
| 358 | def close_group(self): | 371 | def close_group(self): |
| 359 | #log.debug('close group at index %Xh' % self.index) | 372 | #log.debug('close group at index %Xh' % self.index) |
| @@ -369,7 +382,7 @@ class RtfParser(object): | @@ -369,7 +382,7 @@ class RtfParser(object): | ||
| 369 | self.current_destination = new_dest | 382 | self.current_destination = new_dest |
| 370 | # start of the destination is right after the control word: | 383 | # start of the destination is right after the control word: |
| 371 | new_dest.start = self.index + len(matchobject.group()) | 384 | new_dest.start = self.index + len(matchobject.group()) |
| 372 | - log.debug("Open Destination %r start=%Xh - level=%d" % (cword, new_dest.start, new_dest.group_level)) | 385 | + # log.debug("Open Destination %r start=%Xh - level=%d" % (cword, new_dest.start, new_dest.group_level)) |
| 373 | # call the corresponding user method for additional processing: | 386 | # call the corresponding user method for additional processing: |
| 374 | self.open_destination(self.current_destination) | 387 | self.open_destination(self.current_destination) |
| 375 | 388 | ||
| @@ -377,8 +390,8 @@ class RtfParser(object): | @@ -377,8 +390,8 @@ class RtfParser(object): | ||
| 377 | pass | 390 | pass |
| 378 | 391 | ||
| 379 | def _close_destination(self): | 392 | def _close_destination(self): |
| 380 | - log.debug("Close Destination %r end=%Xh - level=%d" % (self.current_destination.cword, | ||
| 381 | - self.index, self.current_destination.group_level)) | 393 | + # log.debug("Close Destination %r end=%Xh - level=%d" % (self.current_destination.cword, |
| 394 | + # self.index, self.current_destination.group_level)) | ||
| 382 | self.current_destination.end = self.index | 395 | self.current_destination.end = self.index |
| 383 | # call the corresponding user method for additional processing: | 396 | # call the corresponding user method for additional processing: |
| 384 | self.close_destination(self.current_destination) | 397 | self.close_destination(self.current_destination) |
| @@ -388,7 +401,8 @@ class RtfParser(object): | @@ -388,7 +401,8 @@ class RtfParser(object): | ||
| 388 | if len(self.destinations) > 0: | 401 | if len(self.destinations) > 0: |
| 389 | self.current_destination = self.destinations[-1] | 402 | self.current_destination = self.destinations[-1] |
| 390 | else: | 403 | else: |
| 391 | - log.debug('All destinations are closed, keeping the document destination open') | 404 | + # log.debug('All destinations are closed, keeping the document destination open') |
| 405 | + pass | ||
| 392 | 406 | ||
| 393 | def close_destination(self, destination): | 407 | def close_destination(self, destination): |
| 394 | pass | 408 | pass |
| @@ -430,10 +444,10 @@ class RtfParser(object): | @@ -430,10 +444,10 @@ class RtfParser(object): | ||
| 430 | pass | 444 | pass |
| 431 | 445 | ||
| 432 | def _end_of_file(self): | 446 | def _end_of_file(self): |
| 433 | - log.debug('%Xh Reached End of File') | 447 | + # log.debug('%Xh Reached End of File') |
| 434 | # close any group/destination that is still open: | 448 | # close any group/destination that is still open: |
| 435 | while self.group_level > 0: | 449 | while self.group_level > 0: |
| 436 | - log.debug('Group Level = %d, closing group' % self.group_level) | 450 | + # log.debug('Group Level = %d, closing group' % self.group_level) |
| 437 | self._close_group() | 451 | self._close_group() |
| 438 | self.end_of_file() | 452 | self.end_of_file() |
| 439 | 453 | ||
| @@ -458,7 +472,7 @@ class RtfObjParser(RtfParser): | @@ -458,7 +472,7 @@ class RtfObjParser(RtfParser): | ||
| 458 | if destination.cword == b'objdata': | 472 | if destination.cword == b'objdata': |
| 459 | log.debug('*** Close object data at index %Xh' % self.index) | 473 | log.debug('*** Close object data at index %Xh' % self.index) |
| 460 | # Filter out all whitespaces first (just ignored): | 474 | # Filter out all whitespaces first (just ignored): |
| 461 | - hexdata1 = destination.data.translate(TRANSTABLE_NOCHANGE, b' \t\r\n\f\v') | 475 | + hexdata1 = destination.data.translate(None, b' \t\r\n\f\v') |
| 462 | # Then filter out any other non-hex character: | 476 | # Then filter out any other non-hex character: |
| 463 | hexdata = re.sub(b'[^a-hA-H0-9]', b'', hexdata1) | 477 | hexdata = re.sub(b'[^a-hA-H0-9]', b'', hexdata1) |
| 464 | if len(hexdata) < len(hexdata1): | 478 | if len(hexdata) < len(hexdata1): |
| @@ -528,116 +542,116 @@ class RtfObjParser(RtfParser): | @@ -528,116 +542,116 @@ class RtfObjParser(RtfParser): | ||
| 528 | 542 | ||
| 529 | #=== FUNCTIONS =============================================================== | 543 | #=== FUNCTIONS =============================================================== |
| 530 | 544 | ||
| 531 | -def rtf_iter_objects_old (filename, min_size=32): | ||
| 532 | - """ | ||
| 533 | - Open a RTF file, extract each embedded object encoded in hexadecimal of | ||
| 534 | - size > min_size, yield the index of the object in the RTF file and its data | ||
| 535 | - in binary format. | ||
| 536 | - This is an iterator. | ||
| 537 | - """ | ||
| 538 | - data = open(filename, 'rb').read() | ||
| 539 | - for m in re.finditer(PATTERN, data): | ||
| 540 | - found = m.group(0) | ||
| 541 | - orig_len = len(found) | ||
| 542 | - # remove all whitespace and line feeds: | ||
| 543 | - #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE | ||
| 544 | - found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v}') | ||
| 545 | - found = binascii.unhexlify(found) | ||
| 546 | - #print repr(found) | ||
| 547 | - if len(found)>min_size: | ||
| 548 | - yield m.start(), orig_len, found | 545 | +# def rtf_iter_objects_old (filename, min_size=32): |
| 546 | +# """ | ||
| 547 | +# Open a RTF file, extract each embedded object encoded in hexadecimal of | ||
| 548 | +# size > min_size, yield the index of the object in the RTF file and its data | ||
| 549 | +# in binary format. | ||
| 550 | +# This is an iterator. | ||
| 551 | +# """ | ||
| 552 | +# data = open(filename, 'rb').read() | ||
| 553 | +# for m in re.finditer(PATTERN, data): | ||
| 554 | +# found = m.group(0) | ||
| 555 | +# orig_len = len(found) | ||
| 556 | +# # remove all whitespace and line feeds: | ||
| 557 | +# #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE | ||
| 558 | +# found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v}') | ||
| 559 | +# found = binascii.unhexlify(found) | ||
| 560 | +# #print repr(found) | ||
| 561 | +# if len(found)>min_size: | ||
| 562 | +# yield m.start(), orig_len, found | ||
| 549 | 563 | ||
| 550 | # TODO: backward-compatible API? | 564 | # TODO: backward-compatible API? |
| 551 | 565 | ||
| 552 | 566 | ||
| 553 | -def search_hex_block(data, pos=0, min_size=32, first=True): | ||
| 554 | - if first: | ||
| 555 | - # Search 1st occurence of a hex block: | ||
| 556 | - match = re_hexblock.search(data, pos=pos) | ||
| 557 | - else: | ||
| 558 | - # Match next occurences of a hex block, from the current position only: | ||
| 559 | - match = re_hexblock.match(data, pos=pos) | ||
| 560 | - | ||
| 561 | - | ||
| 562 | - | ||
| 563 | -def rtf_iter_objects (data, min_size=32): | ||
| 564 | - """ | ||
| 565 | - Open a RTF file, extract each embedded object encoded in hexadecimal of | ||
| 566 | - size > min_size, yield the index of the object in the RTF file and its data | ||
| 567 | - in binary format. | ||
| 568 | - This is an iterator. | ||
| 569 | - """ | ||
| 570 | - # Search 1st occurence of a hex block: | ||
| 571 | - match = re_hexblock.search(data) | ||
| 572 | - if match is None: | ||
| 573 | - log.debug('No hex block found.') | ||
| 574 | - # no hex block found | ||
| 575 | - return | ||
| 576 | - while match is not None: | ||
| 577 | - found = match.group(0) | ||
| 578 | - # start index | ||
| 579 | - start = match.start() | ||
| 580 | - # current position | ||
| 581 | - current = match.end() | ||
| 582 | - log.debug('Found hex block starting at %08X, end %08X, size=%d' % (start, current, len(found))) | ||
| 583 | - if len(found) < min_size: | ||
| 584 | - log.debug('Too small - size<%d, ignored.' % min_size) | ||
| 585 | - match = re_hexblock.search(data, pos=current) | ||
| 586 | - continue | ||
| 587 | - #log.debug('Match: %s' % found) | ||
| 588 | - # remove all whitespace and line feeds: | ||
| 589 | - #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE | ||
| 590 | - found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v') | ||
| 591 | - # TODO: make it a function | ||
| 592 | - # Also remove embedded RTF tags: | ||
| 593 | - found = re_embedded_tags.sub('', found) | ||
| 594 | - # object data extracted from the RTF file | ||
| 595 | - # MS Word accepts an extra hex digit, so we need to trim it if present: | ||
| 596 | - if len(found) & 1: | ||
| 597 | - log.debug('Odd length, trimmed last byte.') | ||
| 598 | - found = found[:-1] | ||
| 599 | - #log.debug('Cleaned match: %s' % found) | ||
| 600 | - objdata = binascii.unhexlify(found) | ||
| 601 | - # Detect the "\bin" control word, which is sometimes used for obfuscation: | ||
| 602 | - bin_match = re_delims_bin_decimal.match(data, pos=current) | ||
| 603 | - while bin_match is not None: | ||
| 604 | - log.debug('Found \\bin block starting at %08X : %r' | ||
| 605 | - % (bin_match.start(), bin_match.group(0))) | ||
| 606 | - # extract the decimal integer following '\bin' | ||
| 607 | - bin_len = int(bin_match.group(1)) | ||
| 608 | - log.debug('\\bin block length = %d' % bin_len) | ||
| 609 | - if current+bin_len > len(data): | ||
| 610 | - log.error('\\bin block length is larger than the remaining data') | ||
| 611 | - # move the current index, ignore the \bin block | ||
| 612 | - current += len(bin_match.group(0)) | ||
| 613 | - break | ||
| 614 | - # read that number of bytes: | ||
| 615 | - objdata += data[current:current+bin_len] | ||
| 616 | - # TODO: handle exception | ||
| 617 | - current += len(bin_match.group(0)) + bin_len | ||
| 618 | - # TODO: check if current is out of range | ||
| 619 | - # TODO: is Word limiting the \bin length to a number of digits? | ||
| 620 | - log.debug('Current position = %08X' % current) | ||
| 621 | - match = re_delim_hexblock.match(data, pos=current) | ||
| 622 | - if match is not None: | ||
| 623 | - log.debug('Found next hex block starting at %08X, end %08X' | ||
| 624 | - % (match.start(), match.end())) | ||
| 625 | - found = match.group(0) | ||
| 626 | - log.debug('Match: %s' % found) | ||
| 627 | - # remove all whitespace and line feeds: | ||
| 628 | - #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE | ||
| 629 | - found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v') | ||
| 630 | - # Also remove embedded RTF tags: | ||
| 631 | - found = re_embedded_tags.sub(found, '') | ||
| 632 | - objdata += binascii.unhexlify(found) | ||
| 633 | - current = match.end() | ||
| 634 | - bin_match = re_delims_bin_decimal.match(data, pos=current) | ||
| 635 | - | ||
| 636 | - # print repr(found) | ||
| 637 | - if len(objdata)>min_size: | ||
| 638 | - yield start, current-start, objdata | ||
| 639 | - # Search next occurence of a hex block: | ||
| 640 | - match = re_hexblock.search(data, pos=current) | 567 | +# def search_hex_block(data, pos=0, min_size=32, first=True): |
| 568 | +# if first: | ||
| 569 | +# # Search 1st occurence of a hex block: | ||
| 570 | +# match = re_hexblock.search(data, pos=pos) | ||
| 571 | +# else: | ||
| 572 | +# # Match next occurences of a hex block, from the current position only: | ||
| 573 | +# match = re_hexblock.match(data, pos=pos) | ||
| 574 | +# | ||
| 575 | +# | ||
| 576 | +# | ||
| 577 | +# def rtf_iter_objects (data, min_size=32): | ||
| 578 | +# """ | ||
| 579 | +# Open a RTF file, extract each embedded object encoded in hexadecimal of | ||
| 580 | +# size > min_size, yield the index of the object in the RTF file and its data | ||
| 581 | +# in binary format. | ||
| 582 | +# This is an iterator. | ||
| 583 | +# """ | ||
| 584 | +# # Search 1st occurence of a hex block: | ||
| 585 | +# match = re_hexblock.search(data) | ||
| 586 | +# if match is None: | ||
| 587 | +# log.debug('No hex block found.') | ||
| 588 | +# # no hex block found | ||
| 589 | +# return | ||
| 590 | +# while match is not None: | ||
| 591 | +# found = match.group(0) | ||
| 592 | +# # start index | ||
| 593 | +# start = match.start() | ||
| 594 | +# # current position | ||
| 595 | +# current = match.end() | ||
| 596 | +# log.debug('Found hex block starting at %08X, end %08X, size=%d' % (start, current, len(found))) | ||
| 597 | +# if len(found) < min_size: | ||
| 598 | +# log.debug('Too small - size<%d, ignored.' % min_size) | ||
| 599 | +# match = re_hexblock.search(data, pos=current) | ||
| 600 | +# continue | ||
| 601 | +# #log.debug('Match: %s' % found) | ||
| 602 | +# # remove all whitespace and line feeds: | ||
| 603 | +# #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE | ||
| 604 | +# found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v') | ||
| 605 | +# # TODO: make it a function | ||
| 606 | +# # Also remove embedded RTF tags: | ||
| 607 | +# found = re_embedded_tags.sub('', found) | ||
| 608 | +# # object data extracted from the RTF file | ||
| 609 | +# # MS Word accepts an extra hex digit, so we need to trim it if present: | ||
| 610 | +# if len(found) & 1: | ||
| 611 | +# log.debug('Odd length, trimmed last byte.') | ||
| 612 | +# found = found[:-1] | ||
| 613 | +# #log.debug('Cleaned match: %s' % found) | ||
| 614 | +# objdata = binascii.unhexlify(found) | ||
| 615 | +# # Detect the "\bin" control word, which is sometimes used for obfuscation: | ||
| 616 | +# bin_match = re_delims_bin_decimal.match(data, pos=current) | ||
| 617 | +# while bin_match is not None: | ||
| 618 | +# log.debug('Found \\bin block starting at %08X : %r' | ||
| 619 | +# % (bin_match.start(), bin_match.group(0))) | ||
| 620 | +# # extract the decimal integer following '\bin' | ||
| 621 | +# bin_len = int(bin_match.group(1)) | ||
| 622 | +# log.debug('\\bin block length = %d' % bin_len) | ||
| 623 | +# if current+bin_len > len(data): | ||
| 624 | +# log.error('\\bin block length is larger than the remaining data') | ||
| 625 | +# # move the current index, ignore the \bin block | ||
| 626 | +# current += len(bin_match.group(0)) | ||
| 627 | +# break | ||
| 628 | +# # read that number of bytes: | ||
| 629 | +# objdata += data[current:current+bin_len] | ||
| 630 | +# # TODO: handle exception | ||
| 631 | +# current += len(bin_match.group(0)) + bin_len | ||
| 632 | +# # TODO: check if current is out of range | ||
| 633 | +# # TODO: is Word limiting the \bin length to a number of digits? | ||
| 634 | +# log.debug('Current position = %08X' % current) | ||
| 635 | +# match = re_delim_hexblock.match(data, pos=current) | ||
| 636 | +# if match is not None: | ||
| 637 | +# log.debug('Found next hex block starting at %08X, end %08X' | ||
| 638 | +# % (match.start(), match.end())) | ||
| 639 | +# found = match.group(0) | ||
| 640 | +# log.debug('Match: %s' % found) | ||
| 641 | +# # remove all whitespace and line feeds: | ||
| 642 | +# #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE | ||
| 643 | +# found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v') | ||
| 644 | +# # Also remove embedded RTF tags: | ||
| 645 | +# found = re_embedded_tags.sub(found, '') | ||
| 646 | +# objdata += binascii.unhexlify(found) | ||
| 647 | +# current = match.end() | ||
| 648 | +# bin_match = re_delims_bin_decimal.match(data, pos=current) | ||
| 649 | +# | ||
| 650 | +# # print repr(found) | ||
| 651 | +# if len(objdata)>min_size: | ||
| 652 | +# yield start, current-start, objdata | ||
| 653 | +# # Search next occurence of a hex block: | ||
| 654 | +# match = re_hexblock.search(data, pos=current) | ||
| 641 | 655 | ||
| 642 | 656 | ||
| 643 | 657 |