diff --git a/oletools/ppt_record_parser.py b/oletools/ppt_record_parser.py index 7241f9a..6db6468 100644 --- a/oletools/ppt_record_parser.py +++ b/oletools/ppt_record_parser.py @@ -29,8 +29,6 @@ Alternative to ppt_parser.py that works on records # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. -from __future__ import print_function - #------------------------------------------------------------------------------ # CHANGELOG: # 2017-11-30 v0.01 CH: - first version based on xls_parser @@ -44,10 +42,11 @@ from __future__ import print_function import sys -from struct import unpack +from struct import unpack # unsigned: 1 Byte = B, 2 Byte = H, 4 Byte = L import logging import record_base import io +import zlib class PptFile(record_base.OleRecordFile): @@ -77,6 +76,10 @@ class PptStream(record_base.OleRecordStream): """ if rec_type == PptRecordCurrentUser.TYPE: return PptRecordCurrentUser, True + elif rec_type == PptRecordExOleObjAtom.TYPE: + return PptRecordExOleObjAtom, True + elif rec_type == PptRecordExOleVbaActiveXAtom.TYPE: + return PptRecordExOleVbaActiveXAtom, True try: record_name = RECORD_TYPES[rec_type] @@ -86,6 +89,8 @@ class PptStream(record_base.OleRecordStream): is_container = False elif record_name.endswith('Blob'): is_container = False + elif record_name == 'CString': + is_container = False else: logging.warning('Unexpected name for record type "{0}". typo?' .format(record_name)) @@ -106,7 +111,8 @@ class PptRecord(record_base.OleRecordBase): INSTANCE = None VERSION = None - def parse(self, more_data): + def finish_constructing(self, more_data): + """ check and save instance and version """ instance, version = more_data if self.INSTANCE is not None and self.INSTANCE != instance: raise ValueError('invalid instance {0} for {1}' @@ -147,9 +153,13 @@ class PptRecord(record_base.OleRecordBase): class PptContainerRecord(PptRecord): """ A record that contains other records """ - def parse(self, more_data): + def finish_constructing(self, more_data): + """ parse records from self.data """ # set self.version and self.instance - super(PptContainerRecord, self).parse(more_data) + super(PptContainerRecord, self).finish_constructing(more_data) + self.records = None + if not self.data: + return logging.debug('parsing contents of container record {0}'.format(self)) @@ -162,6 +172,16 @@ class PptContainerRecord(PptRecord): logging.debug('done parsing contents of container record {0}' .format(self)) + def __str__(self): + text = super(PptContainerRecord, self).__str__() + if self.records is None: + return '{0}, unparsed{1}'.format(text[:-2], text[-2:]) + elif self.records: + return '{0}, contains {1} recs{2}' \ + .format(text[:-2], len(self.records), text[-2:]) + else: + return text + class PptRecordCurrentUser(PptRecord): """ The CurrentUserAtom record """ @@ -169,14 +189,28 @@ class PptRecordCurrentUser(PptRecord): VERSION = 0 INSTANCE = 0 - def parse(self, more_data): - super(PptRecordCurrentUser, self).parse(more_data) + def finish_constructing(self, more_data): + """ read various attributes from data """ + super(PptRecordCurrentUser, self).finish_constructing(more_data) if self.size < 24: raise ValueError('CurrentUser record is too small ({0})' .format(self.size)) + self.size2 = None + self.header_token = None + self.offset_to_current_edit = None + self.len_user_name = None + self.doc_file_version = None + self.major_version = None + self.minor_version = None + self.ansi_user_name = None + self.unicode_user_name = None + + if not self.data: + return + self.size2, self.header_token, self.offset_to_current_edit, \ self.len_user_name, self.doc_file_version, self.major_version, \ - self.minor_version, _ = unpack(' meaning zlib) + "Office Forms ActiveX controls are specified in [MS-OFORMS]." + + whether this is an OLE object or ActiveX control or a VBA Storage, need to + find the corresponding PptRecordExOleObjAtom + """ + + + TYPE = 0x1011 + + def is_compressed(self): + return self.instance == 1 + + def get_uncompressed_size(self): + """ Get size of data in uncompressed form + + For uncompressed data, this just returns self.size. For compressed data, + this reads and returns the doecmpressedSize field value from self.data. + Raises a value error if compressed and data is not available. + """ + if not self.is_compressed(): + return self.size + elif self.data is None: + raise ValueError('Data not read from record') + else: + return unpack(' use PptRecordExOleVbaActiveXAtom (0x1006, 'ExAviMovieContainer'), (0x100e, 'ExCDAudioContainer'), (0x0fee, 'ExControlContainer'), @@ -260,11 +432,15 @@ RECORD_TYPES = dict([ (0x100f, 'ExWAVAudioEmbeddedContainer'), (0x1010, 'ExWAVAudioLinkContainer'), (0x1004, 'ExMediaAtom'), + (0x040a, 'ExObjListAtom'), + (0x0fcd, 'ExOleEmbedAtom'), + (0x0fc3, 'ExOleObjAtom'), # --> use PptRecordExOleObjAtom instead # other types (0x0fc1, 'MetafileBlob'), (0x0fb8, 'FontEmbedDataBlob'), (0x07e7, 'SoundDataBlob'), (0x138b, 'BinaryTagDataBlob'), + (0x0fba, 'CString'), ]) # record types where version is not 0x0 or 0xf @@ -302,16 +478,41 @@ INSTANCE_EXCEPTIONS = dict([ ############################################################################### +def print_records(record, print_fn, indent, do_print_record): + """ print additional info for record + + prints additional info for some types and subrecords recursively + """ + if do_print_record: + print_fn('{0}{1}'.format(' ' * indent, record)) + if isinstance(record, PptContainerRecord): + for subrec in record.records: + print_records(subrec, print_fn, indent+1, True) + elif isinstance(record, PptRecordCurrentUser): + logging.info('{4}--> crypt: {0}, offset {1}, user {2}/{3}' + .format(record.is_document_encrypted(), + record.offset_to_current_edit, + repr(record.ansi_user_name), + repr(record.unicode_user_name), + ' ' * indent)) + elif isinstance(record, PptRecordExOleObjAtom): + logging.info('{2}--> obj id {0}, persist id ref {1}' + .format(record.ex_obj_id, record.persist_id_ref, + ' ' * indent)) + elif isinstance(record, PptRecordExOleVbaActiveXAtom): + #with open('testdump', 'wb') as writer: + # for chunk in record.iter_uncompressed(): + # logging.info('{0}--> "{1}"'.format(' ' * indent, chunk)) + # writer.write(chunk) + chunk1 = next(record.iter_uncompressed()) + logging.info('{0}--> decompressed size {1}, data {2}...' + .format(' ' * indent, record.get_uncompressed_size(), + ', '.join('{0:02x}'.format(ord(c)) + for c in chunk1[:32]))) + + if __name__ == '__main__': - def print_subrecords(record): - if isinstance(record, PptContainerRecord): - for subrec in record.records: - logging.info(' {0}'.format(subrec)) - elif isinstance(record, PptRecordCurrentUser): - logging.info(' crypt: {0}, offset {1}, user {2}/{3}' - .format(record.is_document_encrypted(), - record.offset_to_current_edit, - repr(record.ansi_user_name), - repr(record.unicode_user_name))) + def do_per_record(record): + print_records(record, logging.info, 2, False) sys.exit(record_base.test(sys.argv[1:], PptFile, - do_per_record=print_subrecords)) + do_per_record=do_per_record))