Commit 2685c6f5b70d40c7606fb55773f7ca8159130520

Authored by decalage2
1 parent 44ec0bd8

rtfobj: new RtfParser and RtfObjParser classes - a more complete RTF parser to s…

…upport tricky edge cases exploited by malware
Showing 1 changed file with 403 additions and 51 deletions
oletools/rtfobj.py
1 1 #!/usr/bin/env python
  2 +from __future__ import print_function
  3 +
2 4 """
3 5 rtfobj.py
4 6  
... ... @@ -52,12 +54,12 @@ http://www.decalage.info/python/oletools
52 54 # (contribution by Thomas Jarosch)
53 55 # TJ: - sanitize filenames to avoid special characters
54 56 # 2016-05-29 PL: - improved parsing, fixed issue #42
  57 +# 2016-07-13 v0.48 PL: - new RtfParser and RtfObjParser classes
55 58  
56   -__version__ = '0.47'
  59 +__version__ = '0.48'
57 60  
58 61 #------------------------------------------------------------------------------
59 62 # TODO:
60   -# - improve regex pattern for better performance?
61 63 # - allow semicolon within hex, as found in this sample:
62 64 # http://contagiodump.blogspot.nl/2011/10/sep-28-cve-2010-3333-manuscript-with.html
63 65  
... ... @@ -70,6 +72,7 @@ from thirdparty.xglob import xglob
70 72 from oleobj import OleObject, OleNativeStream
71 73 import oleobj
72 74  
  75 +
73 76 # === LOGGING =================================================================
74 77  
75 78 class NullHandler(logging.Handler):
... ... @@ -125,11 +128,47 @@ HEX_DIGIT = r'[0-9A-Fa-f]'
125 128 # HEX_CHAR = r'[0-9A-Fa-f]\s*[0-9A-Fa-f]'
126 129 # Even worse, MS Word also allows ANY RTF-style tag {*} in between!!
127 130 # AND the tags can be nested...
128   -SINGLE_RTF_TAG = r'[{][^{}]*[}]'
  131 +#SINGLE_RTF_TAG = r'[{][^{}]*[}]'
  132 +# Actually RTF tags may contain braces escaped with backslash (\{ \}):
  133 +SINGLE_RTF_TAG = r'[{](?:\\.|[^{}\\])*[}]'
  134 +
129 135 # Nested tags, two levels (because Python's re does not support nested matching):
130   -NESTED_RTF_TAG = r'[{](?:[^{}]|'+SINGLE_RTF_TAG+r')*[}]'
  136 +# NESTED_RTF_TAG = r'[{](?:[^{}]|'+SINGLE_RTF_TAG+r')*[}]'
  137 +NESTED_RTF_TAG = r'[{](?:\\.|[^{}\\]|'+SINGLE_RTF_TAG+r')*[}]'
  138 +
  139 +# AND it is also allowed to insert ANY control word or control symbol (ignored)
  140 +# According to Rich Text Format (RTF) Specification Version 1.9.1,
  141 +# section "Control Word":
  142 +# control word = \<ASCII Letter [a-zA-Z] Sequence max 32><Delimiter>
  143 +# delimiter = space, OR signed integer followed by any non-digit,
  144 +# OR any character except letter and digit
  145 +# examples of valid control words:
  146 +# "\AnyThing " "\AnyThing123z" ""\AnyThing-456{" "\AnyThing{"
  147 +# control symbol = \<any char except letter or digit> (followed by anything)
  148 +
  149 +ASCII_NAME = r'([a-zA-Z]{1,250})'
  150 +
  151 +# using Python's re lookahead assumption:
  152 +# (?=...) Matches if ... matches next, but doesn't consume any of the string.
  153 +# This is called a lookahead assertion. For example, Isaac (?=Asimov) will
  154 +# match 'Isaac ' only if it's followed by 'Asimov'.
  155 +
  156 +# TODO: Find the actual limit on the number of digits for Word
  157 +# SIGNED_INTEGER = r'(-?\d{1,250})'
  158 +SIGNED_INTEGER = r'(-?\d+)'
  159 +
  160 +CONTROL_WORD = r'(?:\\' + ASCII_NAME + r'(?:(?=[^a-zA-Z0-9-])|' + SIGNED_INTEGER + r'(?=[^0-9])))'
  161 +re_control_word = re.compile(CONTROL_WORD)
  162 +
  163 +CONTROL_SYMBOL = r'(?:\\[^a-zA-Z0-9])'
  164 +re_control_symbol = re.compile(CONTROL_SYMBOL)
  165 +
  166 +# Text that is not a control word/symbol or a group:
  167 +TEXT = r'[^{}\\]+'
  168 +re_text = re.compile(TEXT)
  169 +
131 170 # ignored whitespaces and tags within a hex block:
132   -IGNORED = r'(?:\s|'+NESTED_RTF_TAG+r')*'
  171 +IGNORED = r'(?:\s|'+NESTED_RTF_TAG+'|'+CONTROL_SYMBOL+'|'+CONTROL_WORD+r')*'
133 172 #IGNORED = r'\s*'
134 173  
135 174 # HEX_CHAR = HEX_DIGIT + IGNORED + HEX_DIGIT
... ... @@ -175,6 +214,316 @@ re_delims_bin_decimal = re.compile(DELIMITERS_ZeroOrMore + BACKSLASH_BIN
175 214 + DECIMAL_GROUP + DELIMITER)
176 215 re_delim_hexblock = re.compile(DELIMITER + PATTERN)
177 216  
  217 +# Destination Control Words, according to MS RTF Specifications v1.9.1:
  218 +DESTINATION_CONTROL_WORDS = frozenset((
  219 + "aftncn", "aftnsep", "aftnsepc", "annotation", "atnauthor", "atndate", "atnicn", "atnid", "atnparent", "atnref",
  220 + "atntime", "atrfend", "atrfstart", "author", "background", "bkmkend", "bkmkstart", "blipuid", "buptim", "category",
  221 + "colorschememapping", "colortbl", "comment", "company", "creatim", "datafield", "datastore", "defchp", "defpap",
  222 + "do", "doccomm", "docvar", "dptxbxtext", "ebcend", "ebcstart", "factoidname", "falt", "fchars", "ffdeftext",
  223 + "ffentrymcr", "ffexitmcr", "ffformat", "ffhelptext", "ffl", "ffname", "ffstattext", "field", "file", "filetbl",
  224 + "fldinst", "fldrslt", "fldtype", "fname", "fontemb", "fontfile", "fonttbl", "footer", "footerf", "footerl",
  225 + "footerr", "footnote", "formfield", "ftncn", "ftnsep", "ftnsepc", "g", "generator", "gridtbl", "header", "headerf",
  226 + "headerl", "headerr", "hl", "hlfr", "hlinkbase", "hlloc", "hlsrc", "hsv", "htmltag", "info", "keycode", "keywords",
  227 + "latentstyles", "lchars", "levelnumbers", "leveltext", "lfolevel", "linkval", "list", "listlevel", "listname",
  228 + "listoverride", "listoverridetable", "listpicture", "liststylename", "listtable", "listtext", "lsdlockedexcept",
  229 + "macc", "maccPr", "mailmerge", "maln", "malnScr", "manager", "margPr", "mbar", "mbarPr", "mbaseJc", "mbegChr",
  230 + "mborderBox", "mborderBoxPr", "mbox", "mboxPr", "mchr", "mcount", "mctrlPr", "md", "mdeg", "mdegHide", "mden",
  231 + "mdiff", "mdPr", "me", "mendChr", "meqArr", "meqArrPr", "mf", "mfName", "mfPr", "mfunc", "mfuncPr", "mgroupChr",
  232 + "mgroupChrPr", "mgrow", "mhideBot", "mhideLeft", "mhideRight", "mhideTop", "mhtmltag", "mlim", "mlimloc", "mlimlow",
  233 + "mlimlowPr", "mlimupp", "mlimuppPr", "mm", "mmaddfieldname", "mmath", "mmathPict", "mmathPr", "mmaxdist", "mmc",
  234 + "mmcJc", "mmconnectstr", "mmconnectstrdata", "mmcPr", "mmcs", "mmdatasource", "mmheadersource", "mmmailsubject",
  235 + "mmodso", "mmodsofilter", "mmodsofldmpdata", "mmodsomappedname", "mmodsoname", "mmodsorecipdata", "mmodsosort",
  236 + "mmodsosrc", "mmodsotable", "mmodsoudl", "mmodsoudldata", "mmodsouniquetag", "mmPr", "mmquery", "mmr", "mnary",
  237 + "mnaryPr", "mnoBreak", "mnum", "mobjDist", "moMath", "moMathPara", "moMathParaPr", "mopEmu", "mphant", "mphantPr",
  238 + "mplcHide", "mpos", "mr", "mrad", "mradPr", "mrPr", "msepChr", "mshow", "mshp", "msPre", "msPrePr", "msSub",
  239 + "msSubPr", "msSubSup", "msSubSupPr", "msSup", "msSupPr", "mstrikeBLTR", "mstrikeH", "mstrikeTLBR", "mstrikeV",
  240 + "msub", "msubHide", "msup", "msupHide", "mtransp", "mtype", "mvertJc", "mvfmf", "mvfml", "mvtof", "mvtol",
  241 + "mzeroAsc", "mzeroDesc", "mzeroWid", "nesttableprops", "nextfile", "nonesttables", "objalias", "objclass",
  242 + "objdata", "object", "objname", "objsect", "objtime", "oldcprops", "oldpprops", "oldsprops", "oldtprops",
  243 + "oleclsid", "operator", "panose", "password", "passwordhash", "pgp", "pgptbl", "picprop", "pict", "pn", "pnseclvl",
  244 + "pntext", "pntxta", "pntxtb", "printim", "private", "propname", "protend", "protstart", "protusertbl", "pxe",
  245 + "result", "revtbl", "revtim", "rsidtbl", "rtf", "rxe", "shp", "shpgrp", "shpinst", "shppict", "shprslt", "shptxt",
  246 + "sn", "sp", "staticval", "stylesheet", "subject", "sv", "svb", "tc", "template", "themedata", "title", "txe", "ud",
  247 + "upr", "userprops", "wgrffmtfilter", "windowcaption", "writereservation", "writereservhash", "xe", "xform",
  248 + "xmlattrname", "xmlattrvalue", "xmlclose", "xmlname", "xmlnstbl", "xmlopen"
  249 + ))
  250 +
  251 +
  252 +
  253 +#=== CLASSES =================================================================
  254 +
  255 +class Destination(object):
  256 + """
  257 + Stores the data associated with a destination control word
  258 + """
  259 + def __init__(self, cword=None):
  260 + self.cword = cword
  261 + self.data = ''
  262 + self.start = None
  263 + self.end = None
  264 + self.group_level = 0
  265 +
  266 +
  267 +# class Group(object):
  268 +# """
  269 +# Stores the data associated with a group between braces {...}
  270 +# """
  271 +# def __init__(self, cword=None):
  272 +# self.start = None
  273 +# self.end = None
  274 +# self.level = None
  275 +
  276 +
  277 +
  278 +class RtfParser(object):
  279 + """
  280 + Very simple generic RTF parser
  281 + """
  282 +
  283 + def __init__(self, data):
  284 + self.data = data
  285 + self.index = 0
  286 + self.size = len(data)
  287 + self.group_level = 0
  288 + # default destination for the document text:
  289 + document_destination = Destination()
  290 + self.destinations = [document_destination]
  291 + self.current_destination = document_destination
  292 +
  293 + def parse(self):
  294 + self.index = 0
  295 + while self.index < self.size:
  296 + if self.data[self.index] == '{':
  297 + self._open_group()
  298 + self.index += 1
  299 + continue
  300 + if self.data[self.index] == '}':
  301 + self._close_group()
  302 + self.index += 1
  303 + continue
  304 + if self.data[self.index] == '\\':
  305 + m = re_control_word.match(self.data, self.index)
  306 + if m:
  307 + cword = m.group(1)
  308 + param = None
  309 + if len(m.groups()) > 1:
  310 + param = m.group(2)
  311 + # log.debug('control word %r at index %Xh - cword=%r param=%r' % (m.group(), self.index, cword, param))
  312 + self._control_word(m, cword, param)
  313 + self.index += len(m.group())
  314 + # if it's \bin, call _bin after updating index
  315 + if cword == 'bin':
  316 + self._bin(m, param)
  317 + continue
  318 + m = re_control_symbol.match(self.data, self.index)
  319 + if m:
  320 + self.control_symbol(m)
  321 + self.index += len(m.group())
  322 + continue
  323 + m = re_text.match(self.data, self.index)
  324 + if m:
  325 + self._text(m)
  326 + self.index += len(m.group())
  327 + continue
  328 + raise RuntimeError('Should not have reached this point - index=%Xh' % self.index)
  329 + self.end_of_file()
  330 +
  331 +
  332 + def _open_group(self):
  333 + self.group_level += 1
  334 + log.debug('{ Open Group at index %Xh - level=%d' % (self.index, self.group_level))
  335 + # call user method AFTER increasing the level:
  336 + self.open_group()
  337 +
  338 + def open_group(self):
  339 + #log.debug('open group at index %Xh' % self.index)
  340 + pass
  341 +
  342 + def _close_group(self):
  343 + log.debug('} Close Group at index %Xh - level=%d' % (self.index, self.group_level))
  344 + # call user method BEFORE decreasing the level:
  345 + self.close_group()
  346 + # if the destination level is the same as the group level, close the destination:
  347 + if self.group_level == self.current_destination.group_level:
  348 + log.debug('Current Destination %r level = %d => Close Destination' % (
  349 + self.current_destination.cword, self.current_destination.group_level))
  350 + self._close_destination()
  351 + else:
  352 + log.debug('Current Destination %r level = %d => Continue with same Destination' % (
  353 + self.current_destination.cword, self.current_destination.group_level))
  354 + self.group_level -= 1
  355 + log.debug('Decreased group level to %d' % self.group_level)
  356 +
  357 + def close_group(self):
  358 + #log.debug('close group at index %Xh' % self.index)
  359 + pass
  360 +
  361 + def _open_destination(self, matchobject, cword):
  362 + # if the current destination is at the same group level, close it first:
  363 + if self.current_destination.group_level == self.group_level:
  364 + self._close_destination()
  365 + new_dest = Destination(cword)
  366 + new_dest.group_level = self.group_level
  367 + self.destinations.append(new_dest)
  368 + self.current_destination = new_dest
  369 + # start of the destination is right after the control word:
  370 + new_dest.start = self.index + len(matchobject.group())
  371 + log.debug("Open Destination %r start=%Xh - level=%d" % (cword, new_dest.start, new_dest.group_level))
  372 + # call the corresponding user method for additional processing:
  373 + self.open_destination(self.current_destination)
  374 +
  375 + def open_destination(self, destination):
  376 + pass
  377 +
  378 + def _close_destination(self):
  379 + log.debug("Close Destination %r end=%Xh - level=%d" % (self.current_destination.cword,
  380 + self.index, self.current_destination.group_level))
  381 + self.current_destination.end = self.index
  382 + # call the corresponding user method for additional processing:
  383 + self.close_destination(self.current_destination)
  384 + if len(self.destinations)>0:
  385 + # remove the current destination from the stack, and go back to the previous one:
  386 + self.destinations.pop()
  387 + if len(self.destinations) > 0:
  388 + self.current_destination = self.destinations[-1]
  389 + else:
  390 + log.debug('All destinations are closed, keeping the document destination open')
  391 +
  392 + def close_destination(self, destination):
  393 + pass
  394 +
  395 + def _control_word(self, matchobject, cword, param):
  396 + #log.debug('control word %r at index %Xh' % (matchobject.group(), self.index))
  397 + if cword in DESTINATION_CONTROL_WORDS:
  398 + # log.debug('%r is a destination control word: starting a new destination' % cword)
  399 + self._open_destination(matchobject, cword)
  400 + # call the corresponding user method for additional processing:
  401 + self.control_word(matchobject, cword, param)
  402 +
  403 + def control_word(self, matchobject, cword, param):
  404 + pass
  405 +
  406 + def control_symbol(self, matchobject):
  407 + #log.debug('control symbol %r at index %Xh' % (matchobject.group(), self.index))
  408 + pass
  409 +
  410 + def _text(self, matchobject):
  411 + text = matchobject.group()
  412 + self.current_destination.data += text
  413 + self.text(matchobject, text)
  414 +
  415 + def text(self, matchobject, text):
  416 + #log.debug('text %r at index %Xh' % (matchobject.group(), self.index))
  417 + pass
  418 +
  419 + def _bin(self, matchobject, param):
  420 + binlen = int(param)
  421 + log.debug('\\bin: reading %d bytes of binary data' % binlen)
  422 + # TODO: handle optional space?
  423 + # TODO: handle negative length, and length greater than data
  424 + bindata = self.data[self.index:self.index + binlen]
  425 + self.index += binlen
  426 + self.bin(bindata)
  427 +
  428 + def bin(self, bindata):
  429 + pass
  430 +
  431 + def _end_of_file(self):
  432 + log.debug('%Xh Reached End of File')
  433 + # close any group/destination that is still open:
  434 + while self.group_level > 0:
  435 + log.debug('Group Level = %d, closing group' % self.group_level)
  436 + self._close_group()
  437 + self.end_of_file()
  438 +
  439 + def end_of_file(self):
  440 + pass
  441 +
  442 +
  443 +class RtfObjParser(RtfParser):
  444 + """
  445 + Specialized RTF parser to extract OLE objects
  446 + """
  447 +
  448 + def __init__(self, data, fname_prefix='rtf'):
  449 + super(RtfObjParser, self).__init__(data)
  450 + self.fname_prefix = fname_prefix
  451 +
  452 + def open_destination(self, destination):
  453 + if destination.cword == 'objdata':
  454 + log.debug('*** Start object data at index %Xh' % destination.start)
  455 +
  456 + def close_destination(self, destination):
  457 + if destination.cword == 'objdata':
  458 + log.debug('*** Close object data at index %Xh' % self.index)
  459 + # Filter out all whitespaces first (just ignored):
  460 + hexdata1 = destination.data.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v')
  461 + # Then filter out any other non-hex character:
  462 + hexdata = re.sub(r'[^a-hA-H0-9]', '', hexdata1)
  463 + if len(hexdata) < len(hexdata1):
  464 + # this is only for debugging:
  465 + nonhex = re.sub(r'[a-hA-H0-9]', '', hexdata1)
  466 + log.debug('Found non-hex chars in hexdata: %r' % nonhex)
  467 + # MS Word accepts an extra hex digit, so we need to trim it if present:
  468 + if len(hexdata) & 1:
  469 + log.debug('Odd length, trimmed last byte.')
  470 + hexdata = hexdata[:-1]
  471 + object_data = binascii.unhexlify(hexdata)
  472 + print('found object size %d at index %08X - end %08X' % (len(object_data),
  473 + destination.start, self.index))
  474 + fname = '%s_object_%08X.raw' % (self.fname_prefix, destination.start)
  475 + print('saving object to file %s' % fname)
  476 + open(fname, 'wb').write(object_data)
  477 + # TODO: check if all hex data is extracted properly
  478 +
  479 + obj = OleObject()
  480 + try:
  481 + obj.parse(object_data)
  482 + print('extract file embedded in OLE object:')
  483 + print('format_id = %d' % obj.format_id)
  484 + print('class name = %r' % obj.class_name)
  485 + print('data size = %d' % obj.data_size)
  486 + # set a file extension according to the class name:
  487 + class_name = obj.class_name.lower()
  488 + if class_name.startswith('word'):
  489 + ext = 'doc'
  490 + elif class_name.startswith('package'):
  491 + ext = 'package'
  492 + else:
  493 + ext = 'bin'
  494 +
  495 + fname = '%s_object_%08X.%s' % (self.fname_prefix, destination.start, ext)
  496 + print('saving to file %s' % fname)
  497 + open(fname, 'wb').write(obj.data)
  498 + if obj.class_name.lower() == 'package':
  499 + print('Parsing OLE Package')
  500 + opkg = OleNativeStream(bindata=obj.data)
  501 + print('Filename = %r' % opkg.filename)
  502 + print('Source path = %r' % opkg.src_path)
  503 + print('Temp path = %r' % opkg.temp_path)
  504 + if opkg.filename:
  505 + fname = '%s_%s' % (self.fname_prefix,
  506 + sanitize_filename(opkg.filename))
  507 + else:
  508 + fname = '%s_object_%08X.noname' % (self.fname_prefix, destination.start)
  509 + print('saving to file %s' % fname)
  510 + open(fname, 'wb').write(opkg.data)
  511 + except:
  512 + pass
  513 + log.exception('*** Not an OLE 1.0 Object')
  514 +
  515 + def bin(self, bindata):
  516 + if self.current_destination.cword == 'objdata':
  517 + # TODO: keep track of this, because it is unusual and indicates potential obfuscation
  518 + # trick: hexlify binary data, add it to hex data
  519 + self.current_destination.data += binascii.hexlify(bindata)
  520 +
  521 + def control_word(self, matchobject, cword, param):
  522 + # TODO: extract useful cwords such as objclass
  523 + # TODO: keep track of cwords inside objdata, because it is unusual and indicates potential obfuscation
  524 + # TODO: same with control symbols, and opening bracket
  525 + pass
  526 +
178 527  
179 528 #=== FUNCTIONS ===============================================================
180 529  
... ... @@ -329,50 +678,53 @@ def process_file(container, filename, data, output_dir=None):
329 678 # TODO: option to extract objects to files (false by default)
330 679 if data is None:
331 680 data = open(filename, 'rb').read()
332   - print '-'*79
333   - print 'File: %r - %d bytes' % (filename, len(data))
334   - for index, orig_len, objdata in rtf_iter_objects(data):
335   - print 'found object size %d at index %08X - end %08X' % (len(objdata), index, index+orig_len)
336   - fname = '%s_object_%08X.raw' % (fname_prefix, index)
337   - print 'saving object to file %s' % fname
338   - open(fname, 'wb').write(objdata)
339   - # TODO: check if all hex data is extracted properly
340   -
341   - obj = OleObject()
342   - try:
343   - obj.parse(objdata)
344   - print 'extract file embedded in OLE object:'
345   - print 'format_id = %d' % obj.format_id
346   - print 'class name = %r' % obj.class_name
347   - print 'data size = %d' % obj.data_size
348   - # set a file extension according to the class name:
349   - class_name = obj.class_name.lower()
350   - if class_name.startswith('word'):
351   - ext = 'doc'
352   - elif class_name.startswith('package'):
353   - ext = 'package'
354   - else:
355   - ext = 'bin'
356   -
357   - fname = '%s_object_%08X.%s' % (fname_prefix, index, ext)
358   - print 'saving to file %s' % fname
359   - open(fname, 'wb').write(obj.data)
360   - if obj.class_name.lower() == 'package':
361   - print 'Parsing OLE Package'
362   - opkg = OleNativeStream(bindata=obj.data)
363   - print 'Filename = %r' % opkg.filename
364   - print 'Source path = %r' % opkg.src_path
365   - print 'Temp path = %r' % opkg.temp_path
366   - if opkg.filename:
367   - fname = '%s_%s' % (fname_prefix,
368   - sanitize_filename(opkg.filename))
369   - else:
370   - fname = '%s_object_%08X.noname' % (fname_prefix, index)
371   - print 'saving to file %s' % fname
372   - open(fname, 'wb').write(opkg.data)
373   - except:
374   - pass
375   - log.exception('*** Not an OLE 1.0 Object')
  681 + rtfp = RtfObjParser(data, fname_prefix)
  682 + rtfp.parse()
  683 +
  684 + # print '-'*79
  685 + # print 'File: %r - %d bytes' % (filename, len(data))
  686 + # for index, orig_len, objdata in rtf_iter_objects(data):
  687 + # print 'found object size %d at index %08X - end %08X' % (len(objdata), index, index+orig_len)
  688 + # fname = '%s_object_%08X.raw' % (fname_prefix, index)
  689 + # print 'saving object to file %s' % fname
  690 + # open(fname, 'wb').write(objdata)
  691 + # # TODO: check if all hex data is extracted properly
  692 + #
  693 + # obj = OleObject()
  694 + # try:
  695 + # obj.parse(objdata)
  696 + # print 'extract file embedded in OLE object:'
  697 + # print 'format_id = %d' % obj.format_id
  698 + # print 'class name = %r' % obj.class_name
  699 + # print 'data size = %d' % obj.data_size
  700 + # # set a file extension according to the class name:
  701 + # class_name = obj.class_name.lower()
  702 + # if class_name.startswith('word'):
  703 + # ext = 'doc'
  704 + # elif class_name.startswith('package'):
  705 + # ext = 'package'
  706 + # else:
  707 + # ext = 'bin'
  708 + #
  709 + # fname = '%s_object_%08X.%s' % (fname_prefix, index, ext)
  710 + # print 'saving to file %s' % fname
  711 + # open(fname, 'wb').write(obj.data)
  712 + # if obj.class_name.lower() == 'package':
  713 + # print 'Parsing OLE Package'
  714 + # opkg = OleNativeStream(bindata=obj.data)
  715 + # print 'Filename = %r' % opkg.filename
  716 + # print 'Source path = %r' % opkg.src_path
  717 + # print 'Temp path = %r' % opkg.temp_path
  718 + # if opkg.filename:
  719 + # fname = '%s_%s' % (fname_prefix,
  720 + # sanitize_filename(opkg.filename))
  721 + # else:
  722 + # fname = '%s_object_%08X.noname' % (fname_prefix, index)
  723 + # print 'saving to file %s' % fname
  724 + # open(fname, 'wb').write(opkg.data)
  725 + # except:
  726 + # pass
  727 + # log.exception('*** Not an OLE 1.0 Object')
376 728  
377 729  
378 730  
... ... @@ -414,7 +766,7 @@ if __name__ == &#39;__main__&#39;:
414 766  
415 767 # Print help if no arguments are passed
416 768 if len(args) == 0:
417   - print __doc__
  769 + print (__doc__)
418 770 parser.print_help()
419 771 sys.exit()
420 772  
... ... @@ -436,5 +788,5 @@ if __name__ == &#39;__main__&#39;:
436 788 process_file(container, filename, data, options.output_dir)
437 789  
438 790  
439   -
  791 +# This code was developed while listening to The Mary Onettes "Lost"
440 792  
... ...