Commit c8a4b6a9267a51d345fdd0e9a60f12db8b48e3f2
1 parent
aa95f26a
oleobj: parse and dump from stream
This way we do not have to keep a whole big office file in memory. (Olefile might do that, anyway, but then we have one copy less.) Also merge subfunction process_native_stream back into process_file (harder to read but makes more sense for exception handling)
Showing
1 changed file
with
56 additions
and
26 deletions
oletools/oleobj.py
| @@ -162,6 +162,10 @@ assert struct_uint16.size == 2 # make sure it matches 2 bytes | @@ -162,6 +162,10 @@ assert struct_uint16.size == 2 # make sure it matches 2 bytes | ||
| 162 | # max length of a zero-terminated ansi string. Not sure what this really is | 162 | # max length of a zero-terminated ansi string. Not sure what this really is |
| 163 | STR_MAX_LEN = 1024 | 163 | STR_MAX_LEN = 1024 |
| 164 | 164 | ||
| 165 | +# size of chunks to copy from ole stream to file | ||
| 166 | +DUMP_CHUNK_SIZE = 4096 | ||
| 167 | + | ||
| 168 | + | ||
| 165 | # === FUNCTIONS ============================================================== | 169 | # === FUNCTIONS ============================================================== |
| 166 | 170 | ||
| 167 | def read_uint32(data, index): | 171 | def read_uint32(data, index): |
| @@ -324,8 +328,9 @@ class OleNativeStream (object): | @@ -324,8 +328,9 @@ class OleNativeStream (object): | ||
| 324 | self.data = data[index:index+self.actual_size] | 328 | self.data = data[index:index+self.actual_size] |
| 325 | # TODO: exception when size > remaining data | 329 | # TODO: exception when size > remaining data |
| 326 | # TODO: SLACK DATA | 330 | # TODO: SLACK DATA |
| 327 | - except IOError, struct.error: # no data to read actual_size | 331 | + except (IOError, struct.error): # no data to read actual_size |
| 328 | logging.debug('data is not embedded but only a link') | 332 | logging.debug('data is not embedded but only a link') |
| 333 | + self.is_link = True | ||
| 329 | self.actual_size = 0 | 334 | self.actual_size = 0 |
| 330 | self.data = None | 335 | self.data = None |
| 331 | 336 | ||
| @@ -519,38 +524,63 @@ def process_file(container, filename, data, output_dir=None): | @@ -519,38 +524,63 @@ def process_file(container, filename, data, output_dir=None): | ||
| 519 | 524 | ||
| 520 | # look for ole files inside file (e.g. unzip docx) | 525 | # look for ole files inside file (e.g. unzip docx) |
| 521 | flag_no_ole = False | 526 | flag_no_ole = False |
| 527 | + flag_stream_error = False | ||
| 522 | for ole in find_ole(filename, data): | 528 | for ole in find_ole(filename, data): |
| 523 | if ole is None: # no ole file found | 529 | if ole is None: # no ole file found |
| 524 | flag_no_ole = True | 530 | flag_no_ole = True |
| 525 | continue | 531 | continue |
| 526 | 532 | ||
| 527 | - for stream in ole.listdir(): | ||
| 528 | - if stream[-1] == '\x01Ole10Native': | ||
| 529 | - process_native_stream(ole, stream, fname_prefix, index) | ||
| 530 | - index += 1 | 533 | + for path_parts in ole.listdir(): |
| 534 | + if path_parts[-1] == '\x01Ole10Native': | ||
| 535 | + stream_path = '/'.join(path_parts) | ||
| 536 | + log.debug('Checking stream %r' % stream_path) | ||
| 537 | + stream = None | ||
| 538 | + try: | ||
| 539 | + stream = ole.openstream(path_parts) | ||
| 540 | + print('extract file embedded in OLE object from stream %r:' % stream_path) | ||
| 541 | + print ('Parsing OLE Package') | ||
| 542 | + opkg = OleNativeStream(stream) | ||
| 543 | + # leave stream open until dumping is finished | ||
| 544 | + except Exception: | ||
| 545 | + log.warning('*** Not an OLE 1.0 Object ({0})'.format(exc)) | ||
| 546 | + flag_stream_error = True | ||
| 547 | + if stream is not None: | ||
| 548 | + stream.close() | ||
| 549 | + continue | ||
| 531 | 550 | ||
| 551 | + # print info | ||
| 552 | + print ('Filename = %r' % opkg.filename) | ||
| 553 | + print ('Source path = %r' % opkg.src_path) | ||
| 554 | + print ('Temp path = %r' % opkg.temp_path) | ||
| 555 | + if opkg.filename: | ||
| 556 | + fname = '%s_%s' % (fname_prefix, | ||
| 557 | + sanitize_filename(opkg.filename)) | ||
| 558 | + else: | ||
| 559 | + fname = '%s_object_%03d.noname' % (fname_prefix, index) | ||
| 532 | 560 | ||
| 533 | -def process_native_stream(ole, stream, fname_prefix, index): | ||
| 534 | - """ Dump data from OLE embedded object stream """ | ||
| 535 | - objdata = ole.openstream(stream).read() | ||
| 536 | - stream_path = '/'.join(stream) | ||
| 537 | - log.debug('Checking stream %r' % stream_path) | ||
| 538 | - try: | ||
| 539 | - print('extract file embedded in OLE object from stream %r:' % stream_path) | ||
| 540 | - print ('Parsing OLE Package') | ||
| 541 | - opkg = OleNativeStream(bindata=objdata) | ||
| 542 | - print ('Filename = %r' % opkg.filename) | ||
| 543 | - print ('Source path = %r' % opkg.src_path) | ||
| 544 | - print ('Temp path = %r' % opkg.temp_path) | ||
| 545 | - if opkg.filename: | ||
| 546 | - fname = '%s_%s' % (fname_prefix, | ||
| 547 | - sanitize_filename(opkg.filename)) | ||
| 548 | - else: | ||
| 549 | - fname = '%s_object_%03d.noname' % (fname_prefix, index) | ||
| 550 | - print ('saving to file %s' % fname) | ||
| 551 | - open(fname, 'wb').write(opkg.data) | ||
| 552 | - except Exception: | ||
| 553 | - log.debug('*** Not an OLE 1.0 Object') | 561 | + # dump |
| 562 | + try: | ||
| 563 | + print ('saving to file %s' % fname) | ||
| 564 | + with open(fname, 'wb') as writer: | ||
| 565 | + n_dumped = 0 | ||
| 566 | + next_size = min(DUMP_CHUNK_SIZE, opkg.actual_size) | ||
| 567 | + while next_size: | ||
| 568 | + data = stream.read(next_size) | ||
| 569 | + writer.write(data) | ||
| 570 | + n_dumped += len(data) | ||
| 571 | + if len(data) != next_size: | ||
| 572 | + logging.warning('Wanted to read {0}, got {1}' | ||
| 573 | + .format(next_size, len(data))) | ||
| 574 | + break | ||
| 575 | + next_size = min(DUMP_CHUNK_SIZE, | ||
| 576 | + opkg.actual_size - n_dumped) | ||
| 577 | + except Exception as exc: | ||
| 578 | + log.warning('error dumping to {0} ({1})' | ||
| 579 | + .format(fname, exc)) | ||
| 580 | + finally: | ||
| 581 | + stream.close() | ||
| 582 | + | ||
| 583 | + index += 1 | ||
| 554 | 584 | ||
| 555 | 585 | ||
| 556 | #=== MAIN ================================================================= | 586 | #=== MAIN ================================================================= |