Commit 0eba3885306add74959e251612e285bd523aabc4
1 parent
e8b75aad
updated OleFileIO_PL to v0.26
Showing
4 changed files
with
487 additions
and
90 deletions
oletools/oleid.py
| @@ -47,6 +47,7 @@ __version__ = '0.01' | @@ -47,6 +47,7 @@ __version__ = '0.01' | ||
| 47 | 47 | ||
| 48 | #------------------------------------------------------------------------------ | 48 | #------------------------------------------------------------------------------ |
| 49 | # TODO: | 49 | # TODO: |
| 50 | +# + extract relevant metadata: codepage, author, application, timestamps, etc | ||
| 50 | # - detect RTF and OpenXML | 51 | # - detect RTF and OpenXML |
| 51 | # - fragmentation | 52 | # - fragmentation |
| 52 | # - OLE package | 53 | # - OLE package |
oletools/thirdparty/OleFileIO_PL/LICENSE.txt
| @@ -4,7 +4,7 @@ LICENSE for the OleFileIO_PL module: | @@ -4,7 +4,7 @@ LICENSE for the OleFileIO_PL module: | ||
| 4 | OleFileIO_PL is an improved version of the OleFileIO module from the | 4 | OleFileIO_PL is an improved version of the OleFileIO module from the |
| 5 | Python Imaging Library (PIL). | 5 | Python Imaging Library (PIL). |
| 6 | 6 | ||
| 7 | -OleFileIO_PL changes are Copyright (c) 2005-2012 by Philippe Lagadec | 7 | +OleFileIO_PL changes are Copyright (c) 2005-2013 by Philippe Lagadec |
| 8 | 8 | ||
| 9 | The Python Imaging Library (PIL) is | 9 | The Python Imaging Library (PIL) is |
| 10 | Copyright (c) 1997-2005 by Secret Labs AB | 10 | Copyright (c) 1997-2005 by Secret Labs AB |
oletools/thirdparty/OleFileIO_PL/OleFileIO_PL.py
| @@ -6,7 +6,7 @@ OleFileIO_PL: | @@ -6,7 +6,7 @@ OleFileIO_PL: | ||
| 6 | Microsoft Compound Document File Format), such as Microsoft Office | 6 | Microsoft Compound Document File Format), such as Microsoft Office |
| 7 | documents, Image Composer and FlashPix files, Outlook messages, ... | 7 | documents, Image Composer and FlashPix files, Outlook messages, ... |
| 8 | 8 | ||
| 9 | -version 0.24 2012-09-18 Philippe Lagadec - http://www.decalage.info | 9 | +version 0.26 2013-07-24 Philippe Lagadec - http://www.decalage.info |
| 10 | 10 | ||
| 11 | Project website: http://www.decalage.info/python/olefileio | 11 | Project website: http://www.decalage.info/python/olefileio |
| 12 | 12 | ||
| @@ -16,23 +16,23 @@ See: http://www.pythonware.com/products/pil/index.htm | @@ -16,23 +16,23 @@ See: http://www.pythonware.com/products/pil/index.htm | ||
| 16 | The Python Imaging Library (PIL) is | 16 | The Python Imaging Library (PIL) is |
| 17 | Copyright (c) 1997-2005 by Secret Labs AB | 17 | Copyright (c) 1997-2005 by Secret Labs AB |
| 18 | Copyright (c) 1995-2005 by Fredrik Lundh | 18 | Copyright (c) 1995-2005 by Fredrik Lundh |
| 19 | -OleFileIO_PL changes are Copyright (c) 2005-2012 by Philippe Lagadec | 19 | +OleFileIO_PL changes are Copyright (c) 2005-2013 by Philippe Lagadec |
| 20 | 20 | ||
| 21 | See source code and LICENSE.txt for information on usage and redistribution. | 21 | See source code and LICENSE.txt for information on usage and redistribution. |
| 22 | 22 | ||
| 23 | WARNING: THIS IS (STILL) WORK IN PROGRESS. | 23 | WARNING: THIS IS (STILL) WORK IN PROGRESS. |
| 24 | """ | 24 | """ |
| 25 | 25 | ||
| 26 | -__author__ = "Fredrik Lundh (Secret Labs AB), Philippe Lagadec" | ||
| 27 | -__date__ = "2012-09-18" | ||
| 28 | -__version__ = '0.24' | 26 | +__author__ = "Philippe Lagadec, Fredrik Lundh (Secret Labs AB)" |
| 27 | +__date__ = "2013-07-24" | ||
| 28 | +__version__ = '0.26' | ||
| 29 | 29 | ||
| 30 | #--- LICENSE ------------------------------------------------------------------ | 30 | #--- LICENSE ------------------------------------------------------------------ |
| 31 | 31 | ||
| 32 | # OleFileIO_PL is an improved version of the OleFileIO module from the | 32 | # OleFileIO_PL is an improved version of the OleFileIO module from the |
| 33 | # Python Imaging Library (PIL). | 33 | # Python Imaging Library (PIL). |
| 34 | 34 | ||
| 35 | -# OleFileIO_PL changes are Copyright (c) 2005-2012 by Philippe Lagadec | 35 | +# OleFileIO_PL changes are Copyright (c) 2005-2013 by Philippe Lagadec |
| 36 | # | 36 | # |
| 37 | # The Python Imaging Library (PIL) is | 37 | # The Python Imaging Library (PIL) is |
| 38 | # Copyright (c) 1997-2005 by Secret Labs AB | 38 | # Copyright (c) 1997-2005 by Secret Labs AB |
| @@ -110,7 +110,27 @@ __version__ = '0.24' | @@ -110,7 +110,27 @@ __version__ = '0.24' | ||
| 110 | # (https://bitbucket.org/decalage/olefileio_pl/issue/7) | 110 | # (https://bitbucket.org/decalage/olefileio_pl/issue/7) |
| 111 | # - added close method to OleFileIO (fixed issue #2) | 111 | # - added close method to OleFileIO (fixed issue #2) |
| 112 | # 2012-07-25 v0.23 PL: - added support for file-like objects (patch by mete0r_kr) | 112 | # 2012-07-25 v0.23 PL: - added support for file-like objects (patch by mete0r_kr) |
| 113 | - | 113 | +# 2013-05-05 v0.24 PL: - getproperties: added conversion from filetime to python |
| 114 | +# datetime | ||
| 115 | +# - main: displays properties with date format | ||
| 116 | +# - new class OleMetadata to parse standard properties | ||
| 117 | +# - added get_metadata method | ||
| 118 | +# 2013-05-07 v0.24 PL: - a few improvements in OleMetadata | ||
| 119 | +# 2013-05-24 v0.25 PL: - getproperties: option to not convert some timestamps | ||
| 120 | +# - OleMetaData: total_edit_time is now a number of seconds, | ||
| 121 | +# not a timestamp | ||
| 122 | +# - getproperties: added support for VT_BOOL, VT_INT, V_UINT | ||
| 123 | +# - getproperties: filter out null chars from strings | ||
| 124 | +# - getproperties: raise non-fatal defects instead of | ||
| 125 | +# exceptions when properties cannot be parsed properly | ||
| 126 | +# 2013-05-27 PL: - getproperties: improved exception handling | ||
| 127 | +# - _raise_defect: added option to set exception type | ||
| 128 | +# - all non-fatal issues are now recorded, and displayed | ||
| 129 | +# when run as a script | ||
| 130 | +# 2013-07-11 v0.26 PL: - added methods to get modification and creation times | ||
| 131 | +# of a directory entry or a storage/stream | ||
| 132 | +# - fixed parsing of direntry timestamps | ||
| 133 | +# 2013-07-24 PL: - new options in listdir to list storages and/or streams | ||
| 114 | 134 | ||
| 115 | #----------------------------------------------------------------------------- | 135 | #----------------------------------------------------------------------------- |
| 116 | # TODO (for version 1.0): | 136 | # TODO (for version 1.0): |
| @@ -132,11 +152,10 @@ __version__ = '0.24' | @@ -132,11 +152,10 @@ __version__ = '0.24' | ||
| 132 | # - improve docstrings to show more sample uses | 152 | # - improve docstrings to show more sample uses |
| 133 | # - see also original notes and FIXME below | 153 | # - see also original notes and FIXME below |
| 134 | # - remove all obsolete FIXMEs | 154 | # - remove all obsolete FIXMEs |
| 155 | +# - OleMetadata: fix version attrib according to | ||
| 156 | +# http://msdn.microsoft.com/en-us/library/dd945671%28v=office.12%29.aspx | ||
| 135 | 157 | ||
| 136 | # IDEAS: | 158 | # IDEAS: |
| 137 | -# - allow _raise_defect to raise different exceptions, not only IOError | ||
| 138 | -# - provide a class with named attributes to get well-known properties of | ||
| 139 | -# MS Office documents (title, author, ...) ? | ||
| 140 | # - in OleFileIO._open and _OleStream, use size=None instead of 0x7FFFFFFF for | 159 | # - in OleFileIO._open and _OleStream, use size=None instead of 0x7FFFFFFF for |
| 141 | # streams with unknown size | 160 | # streams with unknown size |
| 142 | # - use arrays of int instead of long integers for FAT/MiniFAT, to improve | 161 | # - use arrays of int instead of long integers for FAT/MiniFAT, to improve |
| @@ -199,7 +218,7 @@ __version__ = '0.24' | @@ -199,7 +218,7 @@ __version__ = '0.24' | ||
| 199 | 218 | ||
| 200 | #------------------------------------------------------------------------------ | 219 | #------------------------------------------------------------------------------ |
| 201 | 220 | ||
| 202 | -import string, StringIO, struct, array, os.path, sys | 221 | +import string, StringIO, struct, array, os.path, sys, datetime |
| 203 | 222 | ||
| 204 | #[PL] Define explicitly the public API to avoid private objects in pydoc: | 223 | #[PL] Define explicitly the public API to avoid private objects in pydoc: |
| 205 | __all__ = ['OleFileIO', 'isOleFile'] | 224 | __all__ = ['OleFileIO', 'isOleFile'] |
| @@ -421,10 +440,172 @@ except NameError: | @@ -421,10 +440,172 @@ except NameError: | ||
| 421 | return filter(ord, s) | 440 | return filter(ord, s) |
| 422 | 441 | ||
| 423 | 442 | ||
| 443 | +def filetime2datetime(filetime): | ||
| 444 | + """ | ||
| 445 | + convert FILETIME (64 bits int) to Python datetime.datetime | ||
| 446 | + """ | ||
| 447 | + # TODO: manage exception when microseconds is too large | ||
| 448 | + # inspired from http://code.activestate.com/recipes/511425-filetime-to-datetime/ | ||
| 449 | + _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0) | ||
| 450 | + #debug('timedelta days=%d' % (filetime/(10*1000000*3600*24))) | ||
| 451 | + return _FILETIME_null_date + datetime.timedelta(microseconds=filetime/10) | ||
| 452 | + | ||
| 424 | 453 | ||
| 425 | 454 | ||
| 426 | #=== CLASSES ================================================================== | 455 | #=== CLASSES ================================================================== |
| 427 | 456 | ||
| 457 | +class OleMetadata: | ||
| 458 | + """ | ||
| 459 | + class to parse and store metadata from standard properties of OLE files. | ||
| 460 | + | ||
| 461 | + Available attributes: | ||
| 462 | + codepage, title, subject, author, keywords, comments, template, | ||
| 463 | + last_saved_by, revision_number, total_edit_time, last_printed, create_time, | ||
| 464 | + last_saved_time, num_pages, num_words, num_chars, thumbnail, | ||
| 465 | + creating_application, security, codepage_doc, category, presentation_target, | ||
| 466 | + bytes, lines, paragraphs, slides, notes, hidden_slides, mm_clips, | ||
| 467 | + scale_crop, heading_pairs, titles_of_parts, manager, company, links_dirty, | ||
| 468 | + chars_with_spaces, unused, shared_doc, link_base, hlinks, hlinks_changed, | ||
| 469 | + version, dig_sig, content_type, content_status, language, doc_version | ||
| 470 | + | ||
| 471 | + Note: an attribute is set to None when not present in the properties of the | ||
| 472 | + OLE file. | ||
| 473 | + | ||
| 474 | + References for SummaryInformation stream: | ||
| 475 | + - http://msdn.microsoft.com/en-us/library/dd942545.aspx | ||
| 476 | + - http://msdn.microsoft.com/en-us/library/dd925819%28v=office.12%29.aspx | ||
| 477 | + - http://msdn.microsoft.com/en-us/library/windows/desktop/aa380376%28v=vs.85%29.aspx | ||
| 478 | + - http://msdn.microsoft.com/en-us/library/aa372045.aspx | ||
| 479 | + - http://sedna-soft.de/summary-information-stream/ | ||
| 480 | + - http://poi.apache.org/apidocs/org/apache/poi/hpsf/SummaryInformation.html | ||
| 481 | + | ||
| 482 | + References for DocumentSummaryInformation stream: | ||
| 483 | + - http://msdn.microsoft.com/en-us/library/dd945671%28v=office.12%29.aspx | ||
| 484 | + - http://msdn.microsoft.com/en-us/library/windows/desktop/aa380374%28v=vs.85%29.aspx | ||
| 485 | + - http://poi.apache.org/apidocs/org/apache/poi/hpsf/DocumentSummaryInformation.html | ||
| 486 | + | ||
| 487 | + new in version 0.25 | ||
| 488 | + """ | ||
| 489 | + | ||
| 490 | + # attribute names for SummaryInformation stream properties: | ||
| 491 | + # (ordered by property id, starting at 1) | ||
| 492 | + SUMMARY_ATTRIBS = ['codepage', 'title', 'subject', 'author', 'keywords', 'comments', | ||
| 493 | + 'template', 'last_saved_by', 'revision_number', 'total_edit_time', | ||
| 494 | + 'last_printed', 'create_time', 'last_saved_time', 'num_pages', | ||
| 495 | + 'num_words', 'num_chars', 'thumbnail', 'creating_application', | ||
| 496 | + 'security'] | ||
| 497 | + | ||
| 498 | + # attribute names for DocumentSummaryInformation stream properties: | ||
| 499 | + # (ordered by property id, starting at 1) | ||
| 500 | + DOCSUM_ATTRIBS = ['codepage_doc', 'category', 'presentation_target', 'bytes', 'lines', 'paragraphs', | ||
| 501 | + 'slides', 'notes', 'hidden_slides', 'mm_clips', | ||
| 502 | + 'scale_crop', 'heading_pairs', 'titles_of_parts', 'manager', | ||
| 503 | + 'company', 'links_dirty', 'chars_with_spaces', 'unused', 'shared_doc', | ||
| 504 | + 'link_base', 'hlinks', 'hlinks_changed', 'version', 'dig_sig', | ||
| 505 | + 'content_type', 'content_status', 'language', 'doc_version'] | ||
| 506 | + | ||
| 507 | + def __init__(self): | ||
| 508 | + """ | ||
| 509 | + Constructor for OleMetadata | ||
| 510 | + All attributes are set to None by default | ||
| 511 | + """ | ||
| 512 | + # properties from SummaryInformation stream | ||
| 513 | + self.codepage = None | ||
| 514 | + self.title = None | ||
| 515 | + self.subject = None | ||
| 516 | + self.author = None | ||
| 517 | + self.keywords = None | ||
| 518 | + self.comments = None | ||
| 519 | + self.template = None | ||
| 520 | + self.last_saved_by = None | ||
| 521 | + self.revision_number = None | ||
| 522 | + self.total_edit_time = None | ||
| 523 | + self.last_printed = None | ||
| 524 | + self.create_time = None | ||
| 525 | + self.last_saved_time = None | ||
| 526 | + self.num_pages = None | ||
| 527 | + self.num_words = None | ||
| 528 | + self.num_chars = None | ||
| 529 | + self.thumbnail = None | ||
| 530 | + self.creating_application = None | ||
| 531 | + self.security = None | ||
| 532 | + # properties from DocumentSummaryInformation stream | ||
| 533 | + self.codepage_doc = None | ||
| 534 | + self.category = None | ||
| 535 | + self.presentation_target = None | ||
| 536 | + self.bytes = None | ||
| 537 | + self.lines = None | ||
| 538 | + self.paragraphs = None | ||
| 539 | + self.slides = None | ||
| 540 | + self.notes = None | ||
| 541 | + self.hidden_slides = None | ||
| 542 | + self.mm_clips = None | ||
| 543 | + self.scale_crop = None | ||
| 544 | + self.heading_pairs = None | ||
| 545 | + self.titles_of_parts = None | ||
| 546 | + self.manager = None | ||
| 547 | + self.company = None | ||
| 548 | + self.links_dirty = None | ||
| 549 | + self.chars_with_spaces = None | ||
| 550 | + self.unused = None | ||
| 551 | + self.shared_doc = None | ||
| 552 | + self.link_base = None | ||
| 553 | + self.hlinks = None | ||
| 554 | + self.hlinks_changed = None | ||
| 555 | + self.version = None | ||
| 556 | + self.dig_sig = None | ||
| 557 | + self.content_type = None | ||
| 558 | + self.content_status = None | ||
| 559 | + self.language = None | ||
| 560 | + self.doc_version = None | ||
| 561 | + | ||
| 562 | + | ||
| 563 | + def parse_properties(self, olefile): | ||
| 564 | + """ | ||
| 565 | + Parse standard properties of an OLE file, from the streams | ||
| 566 | + "\x05SummaryInformation" and "\x05DocumentSummaryInformation", | ||
| 567 | + if present. | ||
| 568 | + Properties are converted to strings, integers or python datetime objects. | ||
| 569 | + If a property is not present, its value is set to None. | ||
| 570 | + """ | ||
| 571 | + # first set all attributes to None: | ||
| 572 | + for attrib in (self.SUMMARY_ATTRIBS + self.DOCSUM_ATTRIBS): | ||
| 573 | + setattr(self, attrib, None) | ||
| 574 | + if olefile.exists("\x05SummaryInformation"): | ||
| 575 | + # get properties from the stream: | ||
| 576 | + # (converting timestamps to python datetime, except total_edit_time, | ||
| 577 | + # which is property #10) | ||
| 578 | + props = olefile.getproperties("\x05SummaryInformation", | ||
| 579 | + convert_time=True, no_conversion=[10]) | ||
| 580 | + # store them into this object's attributes: | ||
| 581 | + for i in range(len(self.SUMMARY_ATTRIBS)): | ||
| 582 | + # ids for standards properties start at 0x01, until 0x13 | ||
| 583 | + value = props.get(i+1, None) | ||
| 584 | + setattr(self, self.SUMMARY_ATTRIBS[i], value) | ||
| 585 | + if olefile.exists("\x05DocumentSummaryInformation"): | ||
| 586 | + # get properties from the stream: | ||
| 587 | + props = olefile.getproperties("\x05DocumentSummaryInformation", | ||
| 588 | + convert_time=True) | ||
| 589 | + # store them into this object's attributes: | ||
| 590 | + for i in range(len(self.DOCSUM_ATTRIBS)): | ||
| 591 | + # ids for standards properties start at 0x01, until 0x13 | ||
| 592 | + value = props.get(i+1, None) | ||
| 593 | + setattr(self, self.DOCSUM_ATTRIBS[i], value) | ||
| 594 | + | ||
| 595 | + def dump(self): | ||
| 596 | + """ | ||
| 597 | + Dump all metadata, for debugging purposes. | ||
| 598 | + """ | ||
| 599 | + print 'Properties from SummaryInformation stream:' | ||
| 600 | + for prop in self.SUMMARY_ATTRIBS: | ||
| 601 | + value = getattr(self, prop) | ||
| 602 | + print '- %s: %s' % (prop, repr(value)) | ||
| 603 | + print 'Properties from DocumentSummaryInformation stream:' | ||
| 604 | + for prop in self.DOCSUM_ATTRIBS: | ||
| 605 | + value = getattr(self, prop) | ||
| 606 | + print '- %s: %s' % (prop, repr(value)) | ||
| 607 | + | ||
| 608 | + | ||
| 428 | #--- _OleStream --------------------------------------------------------------- | 609 | #--- _OleStream --------------------------------------------------------------- |
| 429 | 610 | ||
| 430 | class _OleStream(StringIO.StringIO): | 611 | class _OleStream(StringIO.StringIO): |
| @@ -566,7 +747,8 @@ class _OleDirectoryEntry: | @@ -566,7 +747,8 @@ class _OleDirectoryEntry: | ||
| 566 | #[PL] parsing code moved from OleFileIO.loaddirectory | 747 | #[PL] parsing code moved from OleFileIO.loaddirectory |
| 567 | 748 | ||
| 568 | # struct to parse directory entries: | 749 | # struct to parse directory entries: |
| 569 | - # <: little-endian byte order | 750 | + # <: little-endian byte order, standard sizes |
| 751 | + # (note: this should guarantee that Q returns a 64 bits int) | ||
| 570 | # 64s: string containing entry name in unicode (max 31 chars) + null char | 752 | # 64s: string containing entry name in unicode (max 31 chars) + null char |
| 571 | # H: uint16, number of bytes used in name buffer, including null = (len+1)*2 | 753 | # H: uint16, number of bytes used in name buffer, including null = (len+1)*2 |
| 572 | # B: uint8, dir entry type (between 0 and 5) | 754 | # B: uint8, dir entry type (between 0 and 5) |
| @@ -576,13 +758,13 @@ class _OleDirectoryEntry: | @@ -576,13 +758,13 @@ class _OleDirectoryEntry: | ||
| 576 | # I: uint32, index of child root node if it is a storage, else NOSTREAM | 758 | # I: uint32, index of child root node if it is a storage, else NOSTREAM |
| 577 | # 16s: CLSID, unique identifier (only used if it is a storage) | 759 | # 16s: CLSID, unique identifier (only used if it is a storage) |
| 578 | # I: uint32, user flags | 760 | # I: uint32, user flags |
| 579 | - # 8s: uint64, creation timestamp or zero | ||
| 580 | - # 8s: uint64, modification timestamp or zero | 761 | + # Q (was 8s): uint64, creation timestamp or zero |
| 762 | + # Q (was 8s): uint64, modification timestamp or zero | ||
| 581 | # I: uint32, SID of first sector if stream or ministream, SID of 1st sector | 763 | # I: uint32, SID of first sector if stream or ministream, SID of 1st sector |
| 582 | # of stream containing ministreams if root entry, 0 otherwise | 764 | # of stream containing ministreams if root entry, 0 otherwise |
| 583 | # I: uint32, total stream size in bytes if stream (low 32 bits), 0 otherwise | 765 | # I: uint32, total stream size in bytes if stream (low 32 bits), 0 otherwise |
| 584 | # I: uint32, total stream size in bytes if stream (high 32 bits), 0 otherwise | 766 | # I: uint32, total stream size in bytes if stream (high 32 bits), 0 otherwise |
| 585 | - STRUCT_DIRENTRY = '<64sHBBIII16sI8s8sIII' | 767 | + STRUCT_DIRENTRY = '<64sHBBIII16sIQQIII' |
| 586 | # size of a directory entry: 128 bytes | 768 | # size of a directory entry: 128 bytes |
| 587 | DIRENTRY_SIZE = 128 | 769 | DIRENTRY_SIZE = 128 |
| 588 | assert struct.calcsize(STRUCT_DIRENTRY) == DIRENTRY_SIZE | 770 | assert struct.calcsize(STRUCT_DIRENTRY) == DIRENTRY_SIZE |
| @@ -772,6 +954,34 @@ class _OleDirectoryEntry: | @@ -772,6 +954,34 @@ class _OleDirectoryEntry: | ||
| 772 | kid.dump(tab + 2) | 954 | kid.dump(tab + 2) |
| 773 | 955 | ||
| 774 | 956 | ||
| 957 | + def getmtime(self): | ||
| 958 | + """ | ||
| 959 | + Return modification time of a directory entry. | ||
| 960 | + | ||
| 961 | + return: None if modification time is null, a python datetime object | ||
| 962 | + otherwise (UTC timezone) | ||
| 963 | + | ||
| 964 | + new in version 0.26 | ||
| 965 | + """ | ||
| 966 | + if self.modifyTime == 0: | ||
| 967 | + return None | ||
| 968 | + return filetime2datetime(self.modifyTime) | ||
| 969 | + | ||
| 970 | + | ||
| 971 | + def getctime(self): | ||
| 972 | + """ | ||
| 973 | + Return creation time of a directory entry. | ||
| 974 | + | ||
| 975 | + return: None if modification time is null, a python datetime object | ||
| 976 | + otherwise (UTC timezone) | ||
| 977 | + | ||
| 978 | + new in version 0.26 | ||
| 979 | + """ | ||
| 980 | + if self.createTime == 0: | ||
| 981 | + return None | ||
| 982 | + return filetime2datetime(self.createTime) | ||
| 983 | + | ||
| 984 | + | ||
| 775 | #--- OleFileIO ---------------------------------------------------------------- | 985 | #--- OleFileIO ---------------------------------------------------------------- |
| 776 | 986 | ||
| 777 | class OleFileIO: | 987 | class OleFileIO: |
| @@ -812,12 +1022,16 @@ class OleFileIO: | @@ -812,12 +1022,16 @@ class OleFileIO: | ||
| 812 | (use DEFECT_FATAL for a typical application, DEFECT_INCORRECT for a | 1022 | (use DEFECT_FATAL for a typical application, DEFECT_INCORRECT for a |
| 813 | security-oriented application, see source code for details) | 1023 | security-oriented application, see source code for details) |
| 814 | """ | 1024 | """ |
| 1025 | + # minimal level for defects to be raised as exceptions: | ||
| 815 | self._raise_defects_level = raise_defects | 1026 | self._raise_defects_level = raise_defects |
| 1027 | + # list of defects/issues not raised as exceptions: | ||
| 1028 | + # tuples of (exception type, message) | ||
| 1029 | + self.parsing_issues = [] | ||
| 816 | if filename: | 1030 | if filename: |
| 817 | self.open(filename) | 1031 | self.open(filename) |
| 818 | 1032 | ||
| 819 | 1033 | ||
| 820 | - def _raise_defect(self, defect_level, message): | 1034 | + def _raise_defect(self, defect_level, message, exception_type=IOError): |
| 821 | """ | 1035 | """ |
| 822 | This method should be called for any defect found during file parsing. | 1036 | This method should be called for any defect found during file parsing. |
| 823 | It may raise an IOError exception according to the minimal level chosen | 1037 | It may raise an IOError exception according to the minimal level chosen |
| @@ -829,10 +1043,14 @@ class OleFileIO: | @@ -829,10 +1043,14 @@ class OleFileIO: | ||
| 829 | DEFECT_INCORRECT : an error according to specifications, but parsing can go on | 1043 | DEFECT_INCORRECT : an error according to specifications, but parsing can go on |
| 830 | DEFECT_FATAL : an error which cannot be ignored, parsing is impossible | 1044 | DEFECT_FATAL : an error which cannot be ignored, parsing is impossible |
| 831 | message: string describing the defect, used with raised exception. | 1045 | message: string describing the defect, used with raised exception. |
| 1046 | + exception_type: exception class to be raised, IOError by default | ||
| 832 | """ | 1047 | """ |
| 833 | # added by [PL] | 1048 | # added by [PL] |
| 834 | if defect_level >= self._raise_defects_level: | 1049 | if defect_level >= self._raise_defects_level: |
| 835 | - raise IOError, message | 1050 | + raise exception_type, message |
| 1051 | + else: | ||
| 1052 | + # just record the issue, no exception raised: | ||
| 1053 | + self.parsing_issues.append((exception_type, message)) | ||
| 836 | 1054 | ||
| 837 | 1055 | ||
| 838 | def open(self, filename): | 1056 | def open(self, filename): |
| @@ -1378,27 +1596,42 @@ class OleFileIO: | @@ -1378,27 +1596,42 @@ class OleFileIO: | ||
| 1378 | self.sectorsize, self.fat, self._filesize) | 1596 | self.sectorsize, self.fat, self._filesize) |
| 1379 | 1597 | ||
| 1380 | 1598 | ||
| 1381 | - def _list(self, files, prefix, node): | 1599 | + def _list(self, files, prefix, node, streams=True, storages=False): |
| 1382 | """ | 1600 | """ |
| 1383 | (listdir helper) | 1601 | (listdir helper) |
| 1384 | files: list of files to fill in | 1602 | files: list of files to fill in |
| 1385 | prefix: current location in storage tree (list of names) | 1603 | prefix: current location in storage tree (list of names) |
| 1386 | node: current node (_OleDirectoryEntry object) | 1604 | node: current node (_OleDirectoryEntry object) |
| 1605 | + streams: bool, include streams if True (True by default) - new in v0.26 | ||
| 1606 | + storages: bool, include storages if True (False by default) - new in v0.26 | ||
| 1607 | + (note: the root storage is never included) | ||
| 1387 | """ | 1608 | """ |
| 1388 | prefix = prefix + [node.name] | 1609 | prefix = prefix + [node.name] |
| 1389 | for entry in node.kids: | 1610 | for entry in node.kids: |
| 1390 | if entry.kids: | 1611 | if entry.kids: |
| 1391 | - self._list(files, prefix, entry) | 1612 | + # this is a storage |
| 1613 | + if storages: | ||
| 1614 | + # add it to the list | ||
| 1615 | + files.append(prefix[1:] + [entry.name]) | ||
| 1616 | + # check its kids | ||
| 1617 | + self._list(files, prefix, entry, streams, storages) | ||
| 1392 | else: | 1618 | else: |
| 1393 | - files.append(prefix[1:] + [entry.name]) | 1619 | + # this is a stream |
| 1620 | + if streams: | ||
| 1621 | + # add it to the list | ||
| 1622 | + files.append(prefix[1:] + [entry.name]) | ||
| 1394 | 1623 | ||
| 1395 | 1624 | ||
| 1396 | - def listdir(self): | 1625 | + def listdir(self, streams=True, storages=False): |
| 1397 | """ | 1626 | """ |
| 1398 | Return a list of streams stored in this file | 1627 | Return a list of streams stored in this file |
| 1628 | + | ||
| 1629 | + streams: bool, include streams if True (True by default) - new in v0.26 | ||
| 1630 | + storages: bool, include storages if True (False by default) - new in v0.26 | ||
| 1631 | + (note: the root storage is never included) | ||
| 1399 | """ | 1632 | """ |
| 1400 | files = [] | 1633 | files = [] |
| 1401 | - self._list(files, [], self.root) | 1634 | + self._list(files, [], self.root, streams, storages) |
| 1402 | return files | 1635 | return files |
| 1403 | 1636 | ||
| 1404 | 1637 | ||
| @@ -1470,6 +1703,38 @@ class OleFileIO: | @@ -1470,6 +1703,38 @@ class OleFileIO: | ||
| 1470 | return False | 1703 | return False |
| 1471 | 1704 | ||
| 1472 | 1705 | ||
| 1706 | + def getmtime(self, filename): | ||
| 1707 | + """ | ||
| 1708 | + Return modification time of a stream/storage. | ||
| 1709 | + | ||
| 1710 | + filename: path of stream/storage in storage tree. (see openstream for | ||
| 1711 | + syntax) | ||
| 1712 | + return: None if modification time is null, a python datetime object | ||
| 1713 | + otherwise (UTC timezone) | ||
| 1714 | + | ||
| 1715 | + new in version 0.26 | ||
| 1716 | + """ | ||
| 1717 | + sid = self._find(filename) | ||
| 1718 | + entry = self.direntries[sid] | ||
| 1719 | + return entry.getmtime() | ||
| 1720 | + | ||
| 1721 | + | ||
| 1722 | + def getctime(self, filename): | ||
| 1723 | + """ | ||
| 1724 | + Return creation time of a stream/storage. | ||
| 1725 | + | ||
| 1726 | + filename: path of stream/storage in storage tree. (see openstream for | ||
| 1727 | + syntax) | ||
| 1728 | + return: None if creation time is null, a python datetime object | ||
| 1729 | + otherwise (UTC timezone) | ||
| 1730 | + | ||
| 1731 | + new in version 0.26 | ||
| 1732 | + """ | ||
| 1733 | + sid = self._find(filename) | ||
| 1734 | + entry = self.direntries[sid] | ||
| 1735 | + return entry.getctime() | ||
| 1736 | + | ||
| 1737 | + | ||
| 1473 | def exists(self, filename): | 1738 | def exists(self, filename): |
| 1474 | """ | 1739 | """ |
| 1475 | Test if given filename exists as a stream or a storage in the OLE | 1740 | Test if given filename exists as a stream or a storage in the OLE |
| @@ -1509,83 +1774,167 @@ class OleFileIO: | @@ -1509,83 +1774,167 @@ class OleFileIO: | ||
| 1509 | return self.root.name | 1774 | return self.root.name |
| 1510 | 1775 | ||
| 1511 | 1776 | ||
| 1512 | - def getproperties(self, filename): | 1777 | + def getproperties(self, filename, convert_time=False, no_conversion=None): |
| 1513 | """ | 1778 | """ |
| 1514 | Return properties described in substream. | 1779 | Return properties described in substream. |
| 1515 | 1780 | ||
| 1516 | filename: path of stream in storage tree (see openstream for syntax) | 1781 | filename: path of stream in storage tree (see openstream for syntax) |
| 1782 | + convert_time: bool, if True timestamps will be converted to Python datetime | ||
| 1783 | + no_conversion: None or list of int, timestamps not to be converted | ||
| 1784 | + (for example total editing time is not a real timestamp) | ||
| 1517 | return: a dictionary of values indexed by id (integer) | 1785 | return: a dictionary of values indexed by id (integer) |
| 1518 | """ | 1786 | """ |
| 1787 | + # make sure no_conversion is a list, just to simplify code below: | ||
| 1788 | + if no_conversion == None: | ||
| 1789 | + no_conversion = [] | ||
| 1790 | + # stream path as a string to report exceptions: | ||
| 1791 | + streampath = filename | ||
| 1792 | + if not isinstance(streampath, str): | ||
| 1793 | + streampath = '/'.join(streampath) | ||
| 1794 | + | ||
| 1519 | fp = self.openstream(filename) | 1795 | fp = self.openstream(filename) |
| 1520 | 1796 | ||
| 1521 | data = {} | 1797 | data = {} |
| 1522 | 1798 | ||
| 1523 | - # header | ||
| 1524 | - s = fp.read(28) | ||
| 1525 | - clsid = _clsid(s[8:24]) | ||
| 1526 | - | ||
| 1527 | - # format id | ||
| 1528 | - s = fp.read(20) | ||
| 1529 | - fmtid = _clsid(s[:16]) | ||
| 1530 | - fp.seek(i32(s, 16)) | ||
| 1531 | - | ||
| 1532 | - # get section | ||
| 1533 | - s = "****" + fp.read(i32(fp.read(4))-4) | ||
| 1534 | - | ||
| 1535 | - for i in range(i32(s, 4)): | ||
| 1536 | - | ||
| 1537 | - id = i32(s, 8+i*8) | ||
| 1538 | - offset = i32(s, 12+i*8) | ||
| 1539 | - type = i32(s, offset) | ||
| 1540 | - | ||
| 1541 | - debug ('property id=%d: type=%d offset=%X' % (id, type, offset)) | ||
| 1542 | - | ||
| 1543 | - # test for common types first (should perhaps use | ||
| 1544 | - # a dictionary instead?) | ||
| 1545 | - | ||
| 1546 | - if type == VT_I2: | ||
| 1547 | - value = i16(s, offset+4) | ||
| 1548 | - if value >= 32768: | ||
| 1549 | - value = value - 65536 | ||
| 1550 | - elif type == VT_UI2: | ||
| 1551 | - value = i16(s, offset+4) | ||
| 1552 | - elif type in (VT_I4, VT_ERROR): | ||
| 1553 | - value = i32(s, offset+4) | ||
| 1554 | - elif type == VT_UI4: | ||
| 1555 | - value = i32(s, offset+4) # FIXME | ||
| 1556 | - elif type in (VT_BSTR, VT_LPSTR): | ||
| 1557 | - count = i32(s, offset+4) | ||
| 1558 | - value = s[offset+8:offset+8+count-1] | ||
| 1559 | - elif type == VT_BLOB: | ||
| 1560 | - count = i32(s, offset+4) | ||
| 1561 | - value = s[offset+8:offset+8+count] | ||
| 1562 | - elif type == VT_LPWSTR: | ||
| 1563 | - count = i32(s, offset+4) | ||
| 1564 | - value = _unicode(s[offset+8:offset+8+count*2]) | ||
| 1565 | - elif type == VT_FILETIME: | ||
| 1566 | - value = long(i32(s, offset+4)) + (long(i32(s, offset+8))<<32) | ||
| 1567 | - # FIXME: this is a 64-bit int: "number of 100ns periods | ||
| 1568 | - # since Jan 1,1601". Should map this to Python time | ||
| 1569 | - value = value / 10000000L # seconds | ||
| 1570 | - elif type == VT_UI1: | ||
| 1571 | - value = ord(s[offset+4]) | ||
| 1572 | - elif type == VT_CLSID: | ||
| 1573 | - value = _clsid(s[offset+4:offset+20]) | ||
| 1574 | - elif type == VT_CF: | ||
| 1575 | - count = i32(s, offset+4) | ||
| 1576 | - value = s[offset+8:offset+8+count] | ||
| 1577 | - else: | ||
| 1578 | - value = None # everything else yields "None" | 1799 | + try: |
| 1800 | + # header | ||
| 1801 | + s = fp.read(28) | ||
| 1802 | + clsid = _clsid(s[8:24]) | ||
| 1803 | + | ||
| 1804 | + # format id | ||
| 1805 | + s = fp.read(20) | ||
| 1806 | + fmtid = _clsid(s[:16]) | ||
| 1807 | + fp.seek(i32(s, 16)) | ||
| 1808 | + | ||
| 1809 | + # get section | ||
| 1810 | + s = "****" + fp.read(i32(fp.read(4))-4) | ||
| 1811 | + # number of properties: | ||
| 1812 | + num_props = i32(s, 4) | ||
| 1813 | + except: | ||
| 1814 | + # catch exception while parsing property header, and only raise | ||
| 1815 | + # a DEFECT_INCORRECT then return an empty dict, because this is not | ||
| 1816 | + # a fatal error when parsing the whole file | ||
| 1817 | + exctype, excvalue = sys.exc_info()[:2] | ||
| 1818 | + msg = 'Error while parsing properties header in stream %s: %s' % ( | ||
| 1819 | + repr(streampath), excvalue) | ||
| 1820 | + self._raise_defect(DEFECT_INCORRECT, msg, exctype) | ||
| 1821 | + return data | ||
| 1822 | + | ||
| 1823 | + for i in range(num_props): | ||
| 1824 | + try: | ||
| 1825 | + id = 0 # just in case of an exception | ||
| 1826 | + id = i32(s, 8+i*8) | ||
| 1827 | + offset = i32(s, 12+i*8) | ||
| 1828 | + type = i32(s, offset) | ||
| 1829 | + | ||
| 1830 | + debug ('property id=%d: type=%d offset=%X' % (id, type, offset)) | ||
| 1831 | + | ||
| 1832 | + # test for common types first (should perhaps use | ||
| 1833 | + # a dictionary instead?) | ||
| 1834 | + | ||
| 1835 | + if type == VT_I2: # 16-bit signed integer | ||
| 1836 | + value = i16(s, offset+4) | ||
| 1837 | + if value >= 32768: | ||
| 1838 | + value = value - 65536 | ||
| 1839 | + elif type == VT_UI2: # 2-byte unsigned integer | ||
| 1840 | + value = i16(s, offset+4) | ||
| 1841 | + elif type in (VT_I4, VT_INT, VT_ERROR): | ||
| 1842 | + # VT_I4: 32-bit signed integer | ||
| 1843 | + # VT_ERROR: HRESULT, similar to 32-bit signed integer, | ||
| 1844 | + # see http://msdn.microsoft.com/en-us/library/cc230330.aspx | ||
| 1845 | + value = i32(s, offset+4) | ||
| 1846 | + elif type in (VT_UI4, VT_UINT): # 4-byte unsigned integer | ||
| 1847 | + value = i32(s, offset+4) # FIXME | ||
| 1848 | + elif type in (VT_BSTR, VT_LPSTR): | ||
| 1849 | + # CodePageString, see http://msdn.microsoft.com/en-us/library/dd942354.aspx | ||
| 1850 | + # size is a 32 bits integer, including the null terminator, and | ||
| 1851 | + # possibly trailing or embedded null chars | ||
| 1852 | + #TODO: if codepage is unicode, the string should be converted as such | ||
| 1853 | + count = i32(s, offset+4) | ||
| 1854 | + value = s[offset+8:offset+8+count-1] | ||
| 1855 | + # remove all null chars: | ||
| 1856 | + value = value.replace('\x00', '') | ||
| 1857 | + elif type == VT_BLOB: | ||
| 1858 | + # binary large object (BLOB) | ||
| 1859 | + # see http://msdn.microsoft.com/en-us/library/dd942282.aspx | ||
| 1860 | + count = i32(s, offset+4) | ||
| 1861 | + value = s[offset+8:offset+8+count] | ||
| 1862 | + elif type == VT_LPWSTR: | ||
| 1863 | + # UnicodeString | ||
| 1864 | + # see http://msdn.microsoft.com/en-us/library/dd942313.aspx | ||
| 1865 | + # "the string should NOT contain embedded or additional trailing | ||
| 1866 | + # null characters." | ||
| 1867 | + count = i32(s, offset+4) | ||
| 1868 | + value = _unicode(s[offset+8:offset+8+count*2]) | ||
| 1869 | + elif type == VT_FILETIME: | ||
| 1870 | + value = long(i32(s, offset+4)) + (long(i32(s, offset+8))<<32) | ||
| 1871 | + # FILETIME is a 64-bit int: "number of 100ns periods | ||
| 1872 | + # since Jan 1,1601". | ||
| 1873 | + if convert_time and id not in no_conversion: | ||
| 1874 | + debug('Converting property #%d to python datetime, value=%d=%fs' | ||
| 1875 | + %(id, value, float(value)/10000000L)) | ||
| 1876 | + # convert FILETIME to Python datetime.datetime | ||
| 1877 | + # inspired from http://code.activestate.com/recipes/511425-filetime-to-datetime/ | ||
| 1878 | + _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0) | ||
| 1879 | + debug('timedelta days=%d' % (value/(10*1000000*3600*24))) | ||
| 1880 | + value = _FILETIME_null_date + datetime.timedelta(microseconds=value/10) | ||
| 1881 | + else: | ||
| 1882 | + # legacy code kept for backward compatibility: returns a | ||
| 1883 | + # number of seconds since Jan 1,1601 | ||
| 1884 | + value = value / 10000000L # seconds | ||
| 1885 | + elif type == VT_UI1: # 1-byte unsigned integer | ||
| 1886 | + value = ord(s[offset+4]) | ||
| 1887 | + elif type == VT_CLSID: | ||
| 1888 | + value = _clsid(s[offset+4:offset+20]) | ||
| 1889 | + elif type == VT_CF: | ||
| 1890 | + # PropertyIdentifier or ClipboardData?? | ||
| 1891 | + # see http://msdn.microsoft.com/en-us/library/dd941945.aspx | ||
| 1892 | + count = i32(s, offset+4) | ||
| 1893 | + value = s[offset+8:offset+8+count] | ||
| 1894 | + elif type == VT_BOOL: | ||
| 1895 | + # VARIANT_BOOL, 16 bits bool, 0x0000=Fals, 0xFFFF=True | ||
| 1896 | + # see http://msdn.microsoft.com/en-us/library/cc237864.aspx | ||
| 1897 | + value = bool(i16(s, offset+4)) | ||
| 1898 | + else: | ||
| 1899 | + value = None # everything else yields "None" | ||
| 1900 | + debug ('property id=%d: type=%d not implemented in parser yet' % (id, type)) | ||
| 1579 | 1901 | ||
| 1580 | - # FIXME: add support for VT_VECTOR | 1902 | + # missing: VT_EMPTY, VT_NULL, VT_R4, VT_R8, VT_CY, VT_DATE, |
| 1903 | + # VT_DECIMAL, VT_I1, VT_I8, VT_UI8, | ||
| 1904 | + # see http://msdn.microsoft.com/en-us/library/dd942033.aspx | ||
| 1581 | 1905 | ||
| 1582 | - #print "%08x" % id, repr(value), | ||
| 1583 | - #print "(%s)" % VT[i32(s, offset) & 0xFFF] | 1906 | + # FIXME: add support for VT_VECTOR |
| 1907 | + # VT_VECTOR is a 32 uint giving the number of items, followed by | ||
| 1908 | + # the items in sequence. The VT_VECTOR value is combined with the | ||
| 1909 | + # type of items, e.g. VT_VECTOR|VT_BSTR | ||
| 1910 | + # see http://msdn.microsoft.com/en-us/library/dd942011.aspx | ||
| 1584 | 1911 | ||
| 1585 | - data[id] = value | 1912 | + #print "%08x" % id, repr(value), |
| 1913 | + #print "(%s)" % VT[i32(s, offset) & 0xFFF] | ||
| 1914 | + | ||
| 1915 | + data[id] = value | ||
| 1916 | + except: | ||
| 1917 | + # catch exception while parsing each property, and only raise | ||
| 1918 | + # a DEFECT_INCORRECT, because parsing can go on | ||
| 1919 | + exctype, excvalue = sys.exc_info()[:2] | ||
| 1920 | + msg = 'Error while parsing property id %d in stream %s: %s' % ( | ||
| 1921 | + id, repr(streampath), excvalue) | ||
| 1922 | + self._raise_defect(DEFECT_INCORRECT, msg, exctype) | ||
| 1586 | 1923 | ||
| 1587 | return data | 1924 | return data |
| 1588 | 1925 | ||
| 1926 | + def get_metadata(self): | ||
| 1927 | + """ | ||
| 1928 | + Parse standard properties streams, return an OleMetadata object | ||
| 1929 | + containing all the available metadata. | ||
| 1930 | + (also stored in the metadata attribute of the OleFileIO object) | ||
| 1931 | + | ||
| 1932 | + new in version 0.25 | ||
| 1933 | + """ | ||
| 1934 | + self.metadata = OleMetadata() | ||
| 1935 | + self.metadata.parse_properties(self) | ||
| 1936 | + return self.metadata | ||
| 1937 | + | ||
| 1589 | # | 1938 | # |
| 1590 | # -------------------------------------------------------------------- | 1939 | # -------------------------------------------------------------------- |
| 1591 | # This script can be used to dump the directory of any OLE2 structured | 1940 | # This script can be used to dump the directory of any OLE2 structured |
| @@ -1622,7 +1971,7 @@ Options: | @@ -1622,7 +1971,7 @@ Options: | ||
| 1622 | check_streams = True | 1971 | check_streams = True |
| 1623 | continue | 1972 | continue |
| 1624 | 1973 | ||
| 1625 | - ole = OleFileIO(filename, raise_defects=DEFECT_INCORRECT) | 1974 | + ole = OleFileIO(filename)#, raise_defects=DEFECT_INCORRECT) |
| 1626 | print "-" * 68 | 1975 | print "-" * 68 |
| 1627 | print filename | 1976 | print filename |
| 1628 | print "-" * 68 | 1977 | print "-" * 68 |
| @@ -1630,7 +1979,7 @@ Options: | @@ -1630,7 +1979,7 @@ Options: | ||
| 1630 | for streamname in ole.listdir(): | 1979 | for streamname in ole.listdir(): |
| 1631 | if streamname[-1][0] == "\005": | 1980 | if streamname[-1][0] == "\005": |
| 1632 | print streamname, ": properties" | 1981 | print streamname, ": properties" |
| 1633 | - props = ole.getproperties(streamname) | 1982 | + props = ole.getproperties(streamname, convert_time=True) |
| 1634 | props = props.items() | 1983 | props = props.items() |
| 1635 | props.sort() | 1984 | props.sort() |
| 1636 | for k, v in props: | 1985 | for k, v in props: |
| @@ -1661,6 +2010,23 @@ Options: | @@ -1661,6 +2010,23 @@ Options: | ||
| 1661 | print 'NOT a stream : type=%d' % st_type | 2010 | print 'NOT a stream : type=%d' % st_type |
| 1662 | print '' | 2011 | print '' |
| 1663 | 2012 | ||
| 2013 | +## for streamname in ole.listdir(): | ||
| 2014 | +## # print name using repr() to convert binary chars to \xNN: | ||
| 2015 | +## print '-', repr('/'.join(streamname)),'-', | ||
| 2016 | +## print ole.getmtime(streamname) | ||
| 2017 | +## print '' | ||
| 2018 | + | ||
| 2019 | + print 'Modification/Creation times of all directory entries:' | ||
| 2020 | + for entry in ole.direntries: | ||
| 2021 | + if entry is not None: | ||
| 2022 | + print '- %s: mtime=%s ctime=%s' % (entry.name, | ||
| 2023 | + entry.getmtime(), entry.getctime()) | ||
| 2024 | + print '' | ||
| 2025 | + | ||
| 2026 | + # parse and display metadata: | ||
| 2027 | + meta = ole.get_metadata() | ||
| 2028 | + meta.dump() | ||
| 2029 | + print '' | ||
| 1664 | #[PL] Test a few new methods: | 2030 | #[PL] Test a few new methods: |
| 1665 | root = ole.get_rootentry_name() | 2031 | root = ole.get_rootentry_name() |
| 1666 | print 'Root entry name: "%s"' % root | 2032 | print 'Root entry name: "%s"' % root |
| @@ -1670,5 +2036,13 @@ Options: | @@ -1670,5 +2036,13 @@ Options: | ||
| 1670 | print "size :", ole.get_size('worddocument') | 2036 | print "size :", ole.get_size('worddocument') |
| 1671 | if ole.exists('macros/vba'): | 2037 | if ole.exists('macros/vba'): |
| 1672 | print "This document may contain VBA macros." | 2038 | print "This document may contain VBA macros." |
| 2039 | + | ||
| 2040 | + # print parsing issues: | ||
| 2041 | + print '\nNon-fatal issues raised during parsing:' | ||
| 2042 | + if ole.parsing_issues: | ||
| 2043 | + for exctype, msg in ole.parsing_issues: | ||
| 2044 | + print '- %s: %s' % (exctype.__name__, msg) | ||
| 2045 | + else: | ||
| 2046 | + print 'None' | ||
| 1673 | ## except IOError, v: | 2047 | ## except IOError, v: |
| 1674 | ## print "***", "cannot read", file, "-", v | 2048 | ## print "***", "cannot read", file, "-", v |
oletools/thirdparty/OleFileIO_PL/README.txt
| @@ -12,7 +12,8 @@ This is an improved version of the OleFileIO module from | @@ -12,7 +12,8 @@ This is an improved version of the OleFileIO module from | ||
| 12 | `PIL <http://www.pythonware.com/products/pil/index.htm>`_, the excellent | 12 | `PIL <http://www.pythonware.com/products/pil/index.htm>`_, the excellent |
| 13 | Python Imaging Library, created and maintained by Fredrik Lundh. The API | 13 | Python Imaging Library, created and maintained by Fredrik Lundh. The API |
| 14 | is still compatible with PIL, but I have improved the internal | 14 | is still compatible with PIL, but I have improved the internal |
| 15 | -implementation significantly, with bugfixes and a more robust design. | 15 | +implementation significantly, with new features, bugfixes and a more |
| 16 | +robust design. | ||
| 16 | 17 | ||
| 17 | As far as I know, this module is now the most complete and robust Python | 18 | As far as I know, this module is now the most complete and robust Python |
| 18 | implementation to read MS OLE2 files, portable on several operating | 19 | implementation to read MS OLE2 files, portable on several operating |
| @@ -20,18 +21,31 @@ systems. (please tell me if you know other similar Python modules) | @@ -20,18 +21,31 @@ systems. (please tell me if you know other similar Python modules) | ||
| 20 | 21 | ||
| 21 | WARNING: THIS IS (STILL) WORK IN PROGRESS. | 22 | WARNING: THIS IS (STILL) WORK IN PROGRESS. |
| 22 | 23 | ||
| 23 | -Main improvements over PIL version: | ||
| 24 | ------------------------------------ | 24 | +Main improvements over PIL version of OleFileIO: |
| 25 | +------------------------------------------------ | ||
| 25 | 26 | ||
| 26 | - Better compatibility with Python 2.4 up to 2.7 | 27 | - Better compatibility with Python 2.4 up to 2.7 |
| 27 | - Support for files larger than 6.8MB | 28 | - Support for files larger than 6.8MB |
| 28 | - Robust: many checks to detect malformed files | 29 | - Robust: many checks to detect malformed files |
| 29 | - Improved API | 30 | - Improved API |
| 31 | +- New features: metadata extraction, stream/storage timestamps | ||
| 30 | - Added setup.py and install.bat to ease installation | 32 | - Added setup.py and install.bat to ease installation |
| 31 | 33 | ||
| 32 | News | 34 | News |
| 33 | ---- | 35 | ---- |
| 34 | 36 | ||
| 37 | +- 2013-07-24 v0.26: added methods to parse stream/storage timestamps, | ||
| 38 | + improved listdir to include storages, fixed parsing of direntry | ||
| 39 | + timestamps | ||
| 40 | +- 2013-05-27 v0.25: improved metadata extraction, properties parsing | ||
| 41 | + and exception handling, fixed `issue | ||
| 42 | + #12 <https://bitbucket.org/decalage/olefileio_pl/issue/12/error-when-converting-timestamps-in-ole>`_ | ||
| 43 | +- 2013-05-07 v0.24: new features to extract metadata (get\_metadata | ||
| 44 | + method and OleMetadata class), improved getproperties to convert | ||
| 45 | + timestamps to Python datetime | ||
| 46 | +- 2012-10-09: published | ||
| 47 | + `python-oletools <http://www.decalage.info/python/oletools>`_, a | ||
| 48 | + package of analysis tools based on OleFileIO\_PL | ||
| 35 | - 2012-09-11 v0.23: added support for file-like objects, fixed `issue | 49 | - 2012-09-11 v0.23: added support for file-like objects, fixed `issue |
| 36 | #8 <https://bitbucket.org/decalage/olefileio_pl/issue/8/bug-with-file-object>`_ | 50 | #8 <https://bitbucket.org/decalage/olefileio_pl/issue/8/bug-with-file-object>`_ |
| 37 | - 2012-02-17 v0.22: fixed issues #7 (bug in getproperties) and #2 | 51 | - 2012-02-17 v0.22: fixed issues #7 (bug in getproperties) and #2 |
| @@ -87,6 +101,14 @@ Here are a few examples: | @@ -87,6 +101,14 @@ Here are a few examples: | ||
| 87 | f.write(data) | 101 | f.write(data) |
| 88 | f.close() | 102 | f.close() |
| 89 | 103 | ||
| 104 | + # Extract metadata (new in v0.24) - see source code for all attributes: | ||
| 105 | + meta = ole.get_metadata() | ||
| 106 | + print 'Author:', meta.author | ||
| 107 | + print 'Title:', meta.title | ||
| 108 | + print 'Creation date:', meta.create_time | ||
| 109 | + # print all metadata: | ||
| 110 | + meta.dump() | ||
| 111 | + | ||
| 90 | # Close the OLE file: | 112 | # Close the OLE file: |
| 91 | ole.close() | 113 | ole.close() |
| 92 | 114 | ||
| @@ -144,7 +166,7 @@ License | @@ -144,7 +166,7 @@ License | ||
| 144 | 166 | ||
| 145 | OleFileIO\_PL is open-source. | 167 | OleFileIO\_PL is open-source. |
| 146 | 168 | ||
| 147 | -OleFileIO\_PL changes are Copyright (c) 2005-2012 by Philippe Lagadec. | 169 | +OleFileIO\_PL changes are Copyright (c) 2005-2013 by Philippe Lagadec. |
| 148 | 170 | ||
| 149 | The Python Imaging Library (PIL) is | 171 | The Python Imaging Library (PIL) is |
| 150 | 172 |