Commit 68a910d1e14d78005ef475c2c32b37112aa98034
1 parent
63db719f
olefile: synced changes from olefile repo
Showing
1 changed file
with
115 additions
and
91 deletions
oletools/thirdparty/olefile/olefile.py
| 1 | -#!/usr/bin/env python | 1 | +""" |
| 2 | +olefile (formerly OleFileIO_PL) | ||
| 2 | 3 | ||
| 3 | -# olefile (formerly OleFileIO_PL) | ||
| 4 | -# | ||
| 5 | -# Module to read/write Microsoft OLE2 files (also called Structured Storage or | ||
| 6 | -# Microsoft Compound Document File Format), such as Microsoft Office 97-2003 | ||
| 7 | -# documents, Image Composer and FlashPix files, Outlook messages, ... | ||
| 8 | -# This version is compatible with Python 2.6+ and 3.x | ||
| 9 | -# | ||
| 10 | -# Project website: http://www.decalage.info/olefile | ||
| 11 | -# | ||
| 12 | -# olefile is copyright (c) 2005-2016 Philippe Lagadec (http://www.decalage.info) | ||
| 13 | -# | ||
| 14 | -# olefile is based on the OleFileIO module from the PIL library v1.1.6 | ||
| 15 | -# See: http://www.pythonware.com/products/pil/index.htm | ||
| 16 | -# | ||
| 17 | -# The Python Imaging Library (PIL) is | ||
| 18 | -# Copyright (c) 1997-2005 by Secret Labs AB | ||
| 19 | -# Copyright (c) 1995-2005 by Fredrik Lundh | ||
| 20 | -# | ||
| 21 | -# See source code and LICENSE.txt for information on usage and redistribution. | 4 | +Module to read/write Microsoft OLE2 files (also called Structured Storage or |
| 5 | +Microsoft Compound Document File Format), such as Microsoft Office 97-2003 | ||
| 6 | +documents, Image Composer and FlashPix files, Outlook messages, ... | ||
| 7 | +This version is compatible with Python 2.6+ and 3.x | ||
| 8 | + | ||
| 9 | +Project website: https://www.decalage.info/olefile | ||
| 10 | + | ||
| 11 | +olefile is copyright (c) 2005-2017 Philippe Lagadec | ||
| 12 | +(https://www.decalage.info) | ||
| 22 | 13 | ||
| 14 | +olefile is based on the OleFileIO module from the PIL library v1.1.7 | ||
| 15 | +See: http://www.pythonware.com/products/pil/index.htm | ||
| 16 | +and http://svn.effbot.org/public/tags/pil-1.1.7/PIL/OleFileIO.py | ||
| 17 | + | ||
| 18 | +The Python Imaging Library (PIL) is | ||
| 19 | +Copyright (c) 1997-2009 by Secret Labs AB | ||
| 20 | +Copyright (c) 1995-2009 by Fredrik Lundh | ||
| 21 | + | ||
| 22 | +See source code and LICENSE.txt for information on usage and redistribution. | ||
| 23 | +""" | ||
| 23 | 24 | ||
| 24 | # Since OleFileIO_PL v0.30, only Python 2.6+ and 3.x is supported | 25 | # Since OleFileIO_PL v0.30, only Python 2.6+ and 3.x is supported |
| 25 | # This import enables print() as a function rather than a keyword | 26 | # This import enables print() as a function rather than a keyword |
| @@ -28,14 +29,10 @@ | @@ -28,14 +29,10 @@ | ||
| 28 | from __future__ import print_function # This version of olefile requires Python 2.6+ or 3.x. | 29 | from __future__ import print_function # This version of olefile requires Python 2.6+ or 3.x. |
| 29 | 30 | ||
| 30 | 31 | ||
| 31 | -__author__ = "Philippe Lagadec" | ||
| 32 | -__date__ = "2016-04-26" | ||
| 33 | -__version__ = '0.44' | ||
| 34 | - | ||
| 35 | #--- LICENSE ------------------------------------------------------------------ | 32 | #--- LICENSE ------------------------------------------------------------------ |
| 36 | 33 | ||
| 37 | -# olefile (formerly OleFileIO_PL) is copyright (c) 2005-2016 Philippe Lagadec | ||
| 38 | -# (http://www.decalage.info) | 34 | +# olefile (formerly OleFileIO_PL) is copyright (c) 2005-2017 Philippe Lagadec |
| 35 | +# (https://www.decalage.info) | ||
| 39 | # | 36 | # |
| 40 | # All rights reserved. | 37 | # All rights reserved. |
| 41 | # | 38 | # |
| @@ -66,8 +63,8 @@ __version__ = '0.44' | @@ -66,8 +63,8 @@ __version__ = '0.44' | ||
| 66 | # Imaging Library (PIL) published by Fredrik Lundh under the following license: | 63 | # Imaging Library (PIL) published by Fredrik Lundh under the following license: |
| 67 | 64 | ||
| 68 | # The Python Imaging Library (PIL) is | 65 | # The Python Imaging Library (PIL) is |
| 69 | -# Copyright (c) 1997-2005 by Secret Labs AB | ||
| 70 | -# Copyright (c) 1995-2005 by Fredrik Lundh | 66 | +# Copyright (c) 1997-2009 by Secret Labs AB |
| 67 | +# Copyright (c) 1995-2009 by Fredrik Lundh | ||
| 71 | # | 68 | # |
| 72 | # By obtaining, using, and/or copying this software and/or its associated | 69 | # By obtaining, using, and/or copying this software and/or its associated |
| 73 | # documentation, you agree that you have read, understood, and will comply with | 70 | # documentation, you agree that you have read, understood, and will comply with |
| @@ -138,7 +135,7 @@ __version__ = '0.44' | @@ -138,7 +135,7 @@ __version__ = '0.44' | ||
| 138 | # 2009-12-11 v0.20 PL: - bugfix in OleFileIO.open when filename is not plain str | 135 | # 2009-12-11 v0.20 PL: - bugfix in OleFileIO.open when filename is not plain str |
| 139 | # 2010-01-22 v0.21 PL: - added support for big-endian CPUs such as PowerPC Macs | 136 | # 2010-01-22 v0.21 PL: - added support for big-endian CPUs such as PowerPC Macs |
| 140 | # 2012-02-16 v0.22 PL: - fixed bug in getproperties, patch by chuckleberryfinn | 137 | # 2012-02-16 v0.22 PL: - fixed bug in getproperties, patch by chuckleberryfinn |
| 141 | -# (https://bitbucket.org/decalage/olefileio_pl/issue/7) | 138 | +# (https://github.com/decalage2/olefile/issues/7) |
| 142 | # - added close method to OleFileIO (fixed issue #2) | 139 | # - added close method to OleFileIO (fixed issue #2) |
| 143 | # 2012-07-25 v0.23 PL: - added support for file-like objects (patch by mete0r_kr) | 140 | # 2012-07-25 v0.23 PL: - added support for file-like objects (patch by mete0r_kr) |
| 144 | # 2013-05-05 v0.24 PL: - getproperties: added conversion from filetime to python | 141 | # 2013-05-05 v0.24 PL: - getproperties: added conversion from filetime to python |
| @@ -196,6 +193,15 @@ __version__ = '0.44' | @@ -196,6 +193,15 @@ __version__ = '0.44' | ||
| 196 | # 2016-04-27 - added support for incomplete streams and incorrect | 193 | # 2016-04-27 - added support for incomplete streams and incorrect |
| 197 | # directory entries (to read malformed documents) | 194 | # directory entries (to read malformed documents) |
| 198 | # 2016-05-04 - fixed slight bug in OleStream | 195 | # 2016-05-04 - fixed slight bug in OleStream |
| 196 | +# 2016-11-27 DR: - added method to get the clsid of a storage/stream | ||
| 197 | +# (Daniel Roethlisberger) | ||
| 198 | +# 2017-05-31 v0.45 BS: - PR #114 from oletools to handle excessive number of | ||
| 199 | +# properties: | ||
| 200 | +# https://github.com/decalage2/oletools/pull/114 | ||
| 201 | + | ||
| 202 | +__date__ = "2017-05-31" | ||
| 203 | +__version__ = '0.45dev1' | ||
| 204 | +__author__ = "Philippe Lagadec" | ||
| 199 | 205 | ||
| 200 | #----------------------------------------------------------------------------- | 206 | #----------------------------------------------------------------------------- |
| 201 | # TODO (for version 1.0): | 207 | # TODO (for version 1.0): |
| @@ -223,7 +229,7 @@ __version__ = '0.44' | @@ -223,7 +229,7 @@ __version__ = '0.44' | ||
| 223 | # - see also original notes and FIXME below | 229 | # - see also original notes and FIXME below |
| 224 | # - remove all obsolete FIXMEs | 230 | # - remove all obsolete FIXMEs |
| 225 | # - OleMetadata: fix version attrib according to | 231 | # - OleMetadata: fix version attrib according to |
| 226 | -# http://msdn.microsoft.com/en-us/library/dd945671%28v=office.12%29.aspx | 232 | +# https://msdn.microsoft.com/en-us/library/dd945671%28v=office.12%29.aspx |
| 227 | 233 | ||
| 228 | # IDEAS: | 234 | # IDEAS: |
| 229 | # - in OleFileIO._open and OleStream, use size=None instead of 0x7FFFFFFF for | 235 | # - in OleFileIO._open and OleStream, use size=None instead of 0x7FFFFFFF for |
| @@ -238,8 +244,8 @@ __version__ = '0.44' | @@ -238,8 +244,8 @@ __version__ = '0.44' | ||
| 238 | # - create a simple OLE explorer with wxPython | 244 | # - create a simple OLE explorer with wxPython |
| 239 | 245 | ||
| 240 | # FUTURE EVOLUTIONS to add write support: | 246 | # FUTURE EVOLUTIONS to add write support: |
| 241 | -# see issue #6 on Bitbucket: | ||
| 242 | -# https://bitbucket.org/decalage/olefileio_pl/issue/6/improve-olefileio_pl-to-write-ole-files | 247 | +# see issue #6 on GitHub: |
| 248 | +# https://github.com/decalage2/olefile/issues/6 | ||
| 243 | 249 | ||
| 244 | #----------------------------------------------------------------------------- | 250 | #----------------------------------------------------------------------------- |
| 245 | # NOTES from PIL 1.1.6: | 251 | # NOTES from PIL 1.1.6: |
| @@ -268,6 +274,10 @@ __version__ = '0.44' | @@ -268,6 +274,10 @@ __version__ = '0.44' | ||
| 268 | 274 | ||
| 269 | #------------------------------------------------------------------------------ | 275 | #------------------------------------------------------------------------------ |
| 270 | 276 | ||
| 277 | +__all__ = ['isOleFile', 'OleFileIO', 'OleMetadata', 'enable_logging', | ||
| 278 | + 'MAGIC', 'STGTY_EMPTY', | ||
| 279 | + 'STGTY_STREAM', 'STGTY_STORAGE', 'STGTY_ROOT', 'STGTY_PROPERTY', | ||
| 280 | + 'STGTY_LOCKBYTES', 'MINIMAL_OLEFILE_SIZE',] | ||
| 271 | 281 | ||
| 272 | import io | 282 | import io |
| 273 | import sys | 283 | import sys |
| @@ -317,17 +327,10 @@ else: | @@ -317,17 +327,10 @@ else: | ||
| 317 | 327 | ||
| 318 | #[PL] These workarounds were inspired from the Path module | 328 | #[PL] These workarounds were inspired from the Path module |
| 319 | # (see http://www.jorendorff.com/articles/python/path/) | 329 | # (see http://www.jorendorff.com/articles/python/path/) |
| 320 | -#TODO: test with old Python versions | ||
| 321 | - | ||
| 322 | -# Pre-2.3 workaround for basestring. | ||
| 323 | try: | 330 | try: |
| 324 | basestring | 331 | basestring |
| 325 | except NameError: | 332 | except NameError: |
| 326 | - try: | ||
| 327 | - # is Unicode supported (Python >2.0 or >1.6 ?) | ||
| 328 | - basestring = (str, unicode) | ||
| 329 | - except NameError: | ||
| 330 | - basestring = str | 333 | + basestring = str |
| 331 | 334 | ||
| 332 | #[PL] Experimental setting: if True, OLE filenames will be kept in Unicode | 335 | #[PL] Experimental setting: if True, OLE filenames will be kept in Unicode |
| 333 | # if False (default PIL behaviour), all filenames are converted to Latin-1. | 336 | # if False (default PIL behaviour), all filenames are converted to Latin-1. |
| @@ -395,27 +398,27 @@ def enable_logging(): | @@ -395,27 +398,27 @@ def enable_logging(): | ||
| 395 | 398 | ||
| 396 | #=== CONSTANTS =============================================================== | 399 | #=== CONSTANTS =============================================================== |
| 397 | 400 | ||
| 398 | -# magic bytes that should be at the beginning of every OLE file: | 401 | +#: magic bytes that should be at the beginning of every OLE file: |
| 399 | MAGIC = b'\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1' | 402 | MAGIC = b'\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1' |
| 400 | 403 | ||
| 401 | #[PL]: added constants for Sector IDs (from AAF specifications) | 404 | #[PL]: added constants for Sector IDs (from AAF specifications) |
| 402 | -MAXREGSECT = 0xFFFFFFFA # (-6) maximum SECT | ||
| 403 | -DIFSECT = 0xFFFFFFFC # (-4) denotes a DIFAT sector in a FAT | ||
| 404 | -FATSECT = 0xFFFFFFFD # (-3) denotes a FAT sector in a FAT | ||
| 405 | -ENDOFCHAIN = 0xFFFFFFFE # (-2) end of a virtual stream chain | ||
| 406 | -FREESECT = 0xFFFFFFFF # (-1) unallocated sector | 405 | +MAXREGSECT = 0xFFFFFFFA #: (-6) maximum SECT |
| 406 | +DIFSECT = 0xFFFFFFFC #: (-4) denotes a DIFAT sector in a FAT | ||
| 407 | +FATSECT = 0xFFFFFFFD #: (-3) denotes a FAT sector in a FAT | ||
| 408 | +ENDOFCHAIN = 0xFFFFFFFE #: (-2) end of a virtual stream chain | ||
| 409 | +FREESECT = 0xFFFFFFFF #: (-1) unallocated sector | ||
| 407 | 410 | ||
| 408 | #[PL]: added constants for Directory Entry IDs (from AAF specifications) | 411 | #[PL]: added constants for Directory Entry IDs (from AAF specifications) |
| 409 | -MAXREGSID = 0xFFFFFFFA # (-6) maximum directory entry ID | ||
| 410 | -NOSTREAM = 0xFFFFFFFF # (-1) unallocated directory entry | 412 | +MAXREGSID = 0xFFFFFFFA #: (-6) maximum directory entry ID |
| 413 | +NOSTREAM = 0xFFFFFFFF #: (-1) unallocated directory entry | ||
| 411 | 414 | ||
| 412 | #[PL] object types in storage (from AAF specifications) | 415 | #[PL] object types in storage (from AAF specifications) |
| 413 | -STGTY_EMPTY = 0 # empty directory entry (according to OpenOffice.org doc) | ||
| 414 | -STGTY_STORAGE = 1 # element is a storage object | ||
| 415 | -STGTY_STREAM = 2 # element is a stream object | ||
| 416 | -STGTY_LOCKBYTES = 3 # element is an ILockBytes object | ||
| 417 | -STGTY_PROPERTY = 4 # element is an IPropertyStorage object | ||
| 418 | -STGTY_ROOT = 5 # element is a root storage | 416 | +STGTY_EMPTY = 0 #: empty directory entry |
| 417 | +STGTY_STORAGE = 1 #: element is a storage object | ||
| 418 | +STGTY_STREAM = 2 #: element is a stream object | ||
| 419 | +STGTY_LOCKBYTES = 3 #: element is an ILockBytes object | ||
| 420 | +STGTY_PROPERTY = 4 #: element is an IPropertyStorage object | ||
| 421 | +STGTY_ROOT = 5 #: element is a root storage | ||
| 419 | 422 | ||
| 420 | # Unknown size for a stream (used by OleStream): | 423 | # Unknown size for a stream (used by OleStream): |
| 421 | UNKNOWN_SIZE = 0x7FFFFFFF | 424 | UNKNOWN_SIZE = 0x7FFFFFFF |
| @@ -472,7 +475,13 @@ def isOleFile (filename): | @@ -472,7 +475,13 @@ def isOleFile (filename): | ||
| 472 | """ | 475 | """ |
| 473 | Test if a file is an OLE container (according to the magic bytes in its header). | 476 | Test if a file is an OLE container (according to the magic bytes in its header). |
| 474 | 477 | ||
| 475 | - :param filename: string-like or file-like object, OLE file to parse | 478 | + .. note:: |
| 479 | + This function only checks the first 8 bytes of the file, not the | ||
| 480 | + rest of the OLE structure. | ||
| 481 | + | ||
| 482 | + .. versionadded:: 0.16 | ||
| 483 | + | ||
| 484 | + :param filename: filename, contents or file-like object of the OLE file (string-like or file-like object) | ||
| 476 | 485 | ||
| 477 | - if filename is a string smaller than 1536 bytes, it is the path | 486 | - if filename is a string smaller than 1536 bytes, it is the path |
| 478 | of the file to open. (bytes or unicode string) | 487 | of the file to open. (bytes or unicode string) |
| @@ -481,7 +490,9 @@ def isOleFile (filename): | @@ -481,7 +490,9 @@ def isOleFile (filename): | ||
| 481 | - if filename is a file-like object (with read and seek methods), | 490 | - if filename is a file-like object (with read and seek methods), |
| 482 | it is parsed as-is. | 491 | it is parsed as-is. |
| 483 | 492 | ||
| 493 | + :type filename: bytes or str or unicode or file | ||
| 484 | :returns: True if OLE, False otherwise. | 494 | :returns: True if OLE, False otherwise. |
| 495 | + :rtype: bool | ||
| 485 | """ | 496 | """ |
| 486 | # check if filename is a string-like or file-like object: | 497 | # check if filename is a string-like or file-like object: |
| 487 | if hasattr(filename, 'read'): | 498 | if hasattr(filename, 'read'): |
| @@ -494,7 +505,8 @@ def isOleFile (filename): | @@ -494,7 +505,8 @@ def isOleFile (filename): | ||
| 494 | header = filename[:len(MAGIC)] | 505 | header = filename[:len(MAGIC)] |
| 495 | else: | 506 | else: |
| 496 | # string-like object: filename of file on disk | 507 | # string-like object: filename of file on disk |
| 497 | - header = open(filename, 'rb').read(len(MAGIC)) | 508 | + with open(filename, 'rb') as fp: |
| 509 | + header = fp.read(len(MAGIC)) | ||
| 498 | if header == MAGIC: | 510 | if header == MAGIC: |
| 499 | return True | 511 | return True |
| 500 | else: | 512 | else: |
| @@ -511,8 +523,6 @@ else: | @@ -511,8 +523,6 @@ else: | ||
| 511 | return c if c.__class__ is int else c[0] | 523 | return c if c.__class__ is int else c[0] |
| 512 | 524 | ||
| 513 | 525 | ||
| 514 | -#TODO: replace i16 and i32 with more readable struct.unpack equivalent? | ||
| 515 | - | ||
| 516 | def i16(c, o = 0): | 526 | def i16(c, o = 0): |
| 517 | """ | 527 | """ |
| 518 | Converts a 2-bytes (16 bits) string to an integer. | 528 | Converts a 2-bytes (16 bits) string to an integer. |
| @@ -520,7 +530,7 @@ def i16(c, o = 0): | @@ -520,7 +530,7 @@ def i16(c, o = 0): | ||
| 520 | :param c: string containing bytes to convert | 530 | :param c: string containing bytes to convert |
| 521 | :param o: offset of bytes to convert in string | 531 | :param o: offset of bytes to convert in string |
| 522 | """ | 532 | """ |
| 523 | - return i8(c[o]) | (i8(c[o+1])<<8) | 533 | + return struct.unpack("<H", c[o:o+2])[0] |
| 524 | 534 | ||
| 525 | 535 | ||
| 526 | def i32(c, o = 0): | 536 | def i32(c, o = 0): |
| @@ -530,10 +540,7 @@ def i32(c, o = 0): | @@ -530,10 +540,7 @@ def i32(c, o = 0): | ||
| 530 | :param c: string containing bytes to convert | 540 | :param c: string containing bytes to convert |
| 531 | :param o: offset of bytes to convert in string | 541 | :param o: offset of bytes to convert in string |
| 532 | """ | 542 | """ |
| 533 | -## return int(ord(c[o])+(ord(c[o+1])<<8)+(ord(c[o+2])<<16)+(ord(c[o+3])<<24)) | ||
| 534 | -## # [PL]: added int() because "<<" gives long int since Python 2.4 | ||
| 535 | - # copied from Pillow's _binary: | ||
| 536 | - return i8(c[o]) | (i8(c[o+1])<<8) | (i8(c[o+2])<<16) | (i8(c[o+3])<<24) | 543 | + return struct.unpack("<I", c[o:o+4])[0] |
| 537 | 544 | ||
| 538 | 545 | ||
| 539 | def _clsid(clsid): | 546 | def _clsid(clsid): |
| @@ -558,7 +565,7 @@ def filetime2datetime(filetime): | @@ -558,7 +565,7 @@ def filetime2datetime(filetime): | ||
| 558 | convert FILETIME (64 bits int) to Python datetime.datetime | 565 | convert FILETIME (64 bits int) to Python datetime.datetime |
| 559 | """ | 566 | """ |
| 560 | # TODO: manage exception when microseconds is too large | 567 | # TODO: manage exception when microseconds is too large |
| 561 | - # inspired from http://code.activestate.com/recipes/511425-filetime-to-datetime/ | 568 | + # inspired from https://code.activestate.com/recipes/511425-filetime-to-datetime/ |
| 562 | _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0) | 569 | _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0) |
| 563 | #log.debug('timedelta days=%d' % (filetime//(10*1000000*3600*24))) | 570 | #log.debug('timedelta days=%d' % (filetime//(10*1000000*3600*24))) |
| 564 | return _FILETIME_null_date + datetime.timedelta(microseconds=filetime//10) | 571 | return _FILETIME_null_date + datetime.timedelta(microseconds=filetime//10) |
| @@ -585,17 +592,19 @@ class OleMetadata: | @@ -585,17 +592,19 @@ class OleMetadata: | ||
| 585 | OLE file. | 592 | OLE file. |
| 586 | 593 | ||
| 587 | References for SummaryInformation stream: | 594 | References for SummaryInformation stream: |
| 588 | - - http://msdn.microsoft.com/en-us/library/dd942545.aspx | ||
| 589 | - - http://msdn.microsoft.com/en-us/library/dd925819%28v=office.12%29.aspx | ||
| 590 | - - http://msdn.microsoft.com/en-us/library/windows/desktop/aa380376%28v=vs.85%29.aspx | ||
| 591 | - - http://msdn.microsoft.com/en-us/library/aa372045.aspx | ||
| 592 | - - http://sedna-soft.de/summary-information-stream/ | ||
| 593 | - - http://poi.apache.org/apidocs/org/apache/poi/hpsf/SummaryInformation.html | 595 | + |
| 596 | + - https://msdn.microsoft.com/en-us/library/dd942545.aspx | ||
| 597 | + - https://msdn.microsoft.com/en-us/library/dd925819%28v=office.12%29.aspx | ||
| 598 | + - https://msdn.microsoft.com/en-us/library/windows/desktop/aa380376%28v=vs.85%29.aspx | ||
| 599 | + - https://msdn.microsoft.com/en-us/library/aa372045.aspx | ||
| 600 | + - http://sedna-soft.de/articles/summary-information-stream/ | ||
| 601 | + - https://poi.apache.org/apidocs/org/apache/poi/hpsf/SummaryInformation.html | ||
| 594 | 602 | ||
| 595 | References for DocumentSummaryInformation stream: | 603 | References for DocumentSummaryInformation stream: |
| 596 | - - http://msdn.microsoft.com/en-us/library/dd945671%28v=office.12%29.aspx | ||
| 597 | - - http://msdn.microsoft.com/en-us/library/windows/desktop/aa380374%28v=vs.85%29.aspx | ||
| 598 | - - http://poi.apache.org/apidocs/org/apache/poi/hpsf/DocumentSummaryInformation.html | 604 | + |
| 605 | + - https://msdn.microsoft.com/en-us/library/dd945671%28v=office.12%29.aspx | ||
| 606 | + - https://msdn.microsoft.com/en-us/library/windows/desktop/aa380374%28v=vs.85%29.aspx | ||
| 607 | + - https://poi.apache.org/apidocs/org/apache/poi/hpsf/DocumentSummaryInformation.html | ||
| 599 | 608 | ||
| 600 | new in version 0.25 | 609 | new in version 0.25 |
| 601 | """ | 610 | """ |
| @@ -676,7 +685,7 @@ class OleMetadata: | @@ -676,7 +685,7 @@ class OleMetadata: | ||
| 676 | def parse_properties(self, olefile): | 685 | def parse_properties(self, olefile): |
| 677 | """ | 686 | """ |
| 678 | Parse standard properties of an OLE file, from the streams | 687 | Parse standard properties of an OLE file, from the streams |
| 679 | - "\x05SummaryInformation" and "\x05DocumentSummaryInformation", | 688 | + ``\\x05SummaryInformation`` and ``\\x05DocumentSummaryInformation``, |
| 680 | if present. | 689 | if present. |
| 681 | Properties are converted to strings, integers or python datetime objects. | 690 | Properties are converted to strings, integers or python datetime objects. |
| 682 | If a property is not present, its value is set to None. | 691 | If a property is not present, its value is set to None. |
| @@ -851,14 +860,14 @@ class OleStream(io.BytesIO): | @@ -851,14 +860,14 @@ class OleStream(io.BytesIO): | ||
| 851 | elif unknown_size: | 860 | elif unknown_size: |
| 852 | # actual stream size was not known, now we know the size of read | 861 | # actual stream size was not known, now we know the size of read |
| 853 | # data: | 862 | # data: |
| 854 | - log.debug('Read data of length %d, the stream size was unkown' % len(data)) | 863 | + log.debug('Read data of length %d, the stream size was unknown' % len(data)) |
| 855 | self.size = len(data) | 864 | self.size = len(data) |
| 856 | else: | 865 | else: |
| 857 | # read data is less than expected: | 866 | # read data is less than expected: |
| 858 | log.debug('Read data of length %d, less than expected stream size %d' % (len(data), size)) | 867 | log.debug('Read data of length %d, less than expected stream size %d' % (len(data), size)) |
| 859 | # TODO: provide details in exception message | 868 | # TODO: provide details in exception message |
| 860 | - self.ole._raise_defect(DEFECT_INCORRECT, 'OLE stream size is less than declared') | ||
| 861 | self.size = len(data) | 869 | self.size = len(data) |
| 870 | + self.ole._raise_defect(DEFECT_INCORRECT, 'OLE stream size is less than declared') | ||
| 862 | # when all data is read in memory, BytesIO constructor is called | 871 | # when all data is read in memory, BytesIO constructor is called |
| 863 | io.BytesIO.__init__(self, data) | 872 | io.BytesIO.__init__(self, data) |
| 864 | # Then the OleStream object can be used as a read-only file object. | 873 | # Then the OleStream object can be used as a read-only file object. |
| @@ -1023,7 +1032,7 @@ class OleDirectoryEntry: | @@ -1023,7 +1032,7 @@ class OleDirectoryEntry: | ||
| 1023 | Walk through red-black tree of children of this directory entry to add | 1032 | Walk through red-black tree of children of this directory entry to add |
| 1024 | all of them to the kids list. (recursive method) | 1033 | all of them to the kids list. (recursive method) |
| 1025 | 1034 | ||
| 1026 | - :param child_sid : index of child directory entry to use, or None when called | 1035 | + :param child_sid: index of child directory entry to use, or None when called |
| 1027 | first time for the root. (only used during recursion) | 1036 | first time for the root. (only used during recursion) |
| 1028 | """ | 1037 | """ |
| 1029 | log.debug('append_kids: child_sid=%d' % child_sid) | 1038 | log.debug('append_kids: child_sid=%d' % child_sid) |
| @@ -1188,8 +1197,8 @@ class OleFileIO: | @@ -1188,8 +1197,8 @@ class OleFileIO: | ||
| 1188 | """ | 1197 | """ |
| 1189 | # minimal level for defects to be raised as exceptions: | 1198 | # minimal level for defects to be raised as exceptions: |
| 1190 | self._raise_defects_level = raise_defects | 1199 | self._raise_defects_level = raise_defects |
| 1191 | - # list of defects/issues not raised as exceptions: | ||
| 1192 | - # tuples of (exception type, message) | 1200 | + #: list of defects/issues not raised as exceptions: |
| 1201 | + #: tuples of (exception type, message) | ||
| 1193 | self.parsing_issues = [] | 1202 | self.parsing_issues = [] |
| 1194 | self.write_mode = write_mode | 1203 | self.write_mode = write_mode |
| 1195 | self.path_encoding = path_encoding | 1204 | self.path_encoding = path_encoding |
| @@ -2081,6 +2090,21 @@ class OleFileIO: | @@ -2081,6 +2090,21 @@ class OleFileIO: | ||
| 2081 | return False | 2090 | return False |
| 2082 | 2091 | ||
| 2083 | 2092 | ||
| 2093 | + def getclsid(self, filename): | ||
| 2094 | + """ | ||
| 2095 | + Return clsid of a stream/storage. | ||
| 2096 | + | ||
| 2097 | + :param filename: path of stream/storage in storage tree. (see openstream for | ||
| 2098 | + syntax) | ||
| 2099 | + :returns: Empty string if clsid is null, a printable representation of the clsid otherwise | ||
| 2100 | + | ||
| 2101 | + new in version 0.44 | ||
| 2102 | + """ | ||
| 2103 | + sid = self._find(filename) | ||
| 2104 | + entry = self.direntries[sid] | ||
| 2105 | + return entry.clsid | ||
| 2106 | + | ||
| 2107 | + | ||
| 2084 | def getmtime(self, filename): | 2108 | def getmtime(self, filename): |
| 2085 | """ | 2109 | """ |
| 2086 | Return modification time of a stream/storage. | 2110 | Return modification time of a stream/storage. |
| @@ -2203,8 +2227,8 @@ class OleFileIO: | @@ -2203,8 +2227,8 @@ class OleFileIO: | ||
| 2203 | 2227 | ||
| 2204 | # clamp num_props based on the data length | 2228 | # clamp num_props based on the data length |
| 2205 | num_props = min(num_props, len(s) / 8) | 2229 | num_props = min(num_props, len(s) / 8) |
| 2206 | - | ||
| 2207 | - for i in xrange(num_props): | 2230 | + |
| 2231 | + for i in iterrange(num_props): | ||
| 2208 | property_id = 0 # just in case of an exception | 2232 | property_id = 0 # just in case of an exception |
| 2209 | try: | 2233 | try: |
| 2210 | property_id = i32(s, 8+i*8) | 2234 | property_id = i32(s, 8+i*8) |
| @@ -2225,12 +2249,12 @@ class OleFileIO: | @@ -2225,12 +2249,12 @@ class OleFileIO: | ||
| 2225 | elif property_type in (VT_I4, VT_INT, VT_ERROR): | 2249 | elif property_type in (VT_I4, VT_INT, VT_ERROR): |
| 2226 | # VT_I4: 32-bit signed integer | 2250 | # VT_I4: 32-bit signed integer |
| 2227 | # VT_ERROR: HRESULT, similar to 32-bit signed integer, | 2251 | # VT_ERROR: HRESULT, similar to 32-bit signed integer, |
| 2228 | - # see http://msdn.microsoft.com/en-us/library/cc230330.aspx | 2252 | + # see https://msdn.microsoft.com/en-us/library/cc230330.aspx |
| 2229 | value = i32(s, offset+4) | 2253 | value = i32(s, offset+4) |
| 2230 | elif property_type in (VT_UI4, VT_UINT): # 4-byte unsigned integer | 2254 | elif property_type in (VT_UI4, VT_UINT): # 4-byte unsigned integer |
| 2231 | value = i32(s, offset+4) # FIXME | 2255 | value = i32(s, offset+4) # FIXME |
| 2232 | elif property_type in (VT_BSTR, VT_LPSTR): | 2256 | elif property_type in (VT_BSTR, VT_LPSTR): |
| 2233 | - # CodePageString, see http://msdn.microsoft.com/en-us/library/dd942354.aspx | 2257 | + # CodePageString, see https://msdn.microsoft.com/en-us/library/dd942354.aspx |
| 2234 | # size is a 32 bits integer, including the null terminator, and | 2258 | # size is a 32 bits integer, including the null terminator, and |
| 2235 | # possibly trailing or embedded null chars | 2259 | # possibly trailing or embedded null chars |
| 2236 | #TODO: if codepage is unicode, the string should be converted as such | 2260 | #TODO: if codepage is unicode, the string should be converted as such |
| @@ -2240,12 +2264,12 @@ class OleFileIO: | @@ -2240,12 +2264,12 @@ class OleFileIO: | ||
| 2240 | value = value.replace(b'\x00', b'') | 2264 | value = value.replace(b'\x00', b'') |
| 2241 | elif property_type == VT_BLOB: | 2265 | elif property_type == VT_BLOB: |
| 2242 | # binary large object (BLOB) | 2266 | # binary large object (BLOB) |
| 2243 | - # see http://msdn.microsoft.com/en-us/library/dd942282.aspx | 2267 | + # see https://msdn.microsoft.com/en-us/library/dd942282.aspx |
| 2244 | count = i32(s, offset+4) | 2268 | count = i32(s, offset+4) |
| 2245 | value = s[offset+8:offset+8+count] | 2269 | value = s[offset+8:offset+8+count] |
| 2246 | elif property_type == VT_LPWSTR: | 2270 | elif property_type == VT_LPWSTR: |
| 2247 | # UnicodeString | 2271 | # UnicodeString |
| 2248 | - # see http://msdn.microsoft.com/en-us/library/dd942313.aspx | 2272 | + # see https://msdn.microsoft.com/en-us/library/dd942313.aspx |
| 2249 | # "the string should NOT contain embedded or additional trailing | 2273 | # "the string should NOT contain embedded or additional trailing |
| 2250 | # null characters." | 2274 | # null characters." |
| 2251 | count = i32(s, offset+4) | 2275 | count = i32(s, offset+4) |
| @@ -2258,7 +2282,7 @@ class OleFileIO: | @@ -2258,7 +2282,7 @@ class OleFileIO: | ||
| 2258 | log.debug('Converting property #%d to python datetime, value=%d=%fs' | 2282 | log.debug('Converting property #%d to python datetime, value=%d=%fs' |
| 2259 | %(property_id, value, float(value)/10000000)) | 2283 | %(property_id, value, float(value)/10000000)) |
| 2260 | # convert FILETIME to Python datetime.datetime | 2284 | # convert FILETIME to Python datetime.datetime |
| 2261 | - # inspired from http://code.activestate.com/recipes/511425-filetime-to-datetime/ | 2285 | + # inspired from https://code.activestate.com/recipes/511425-filetime-to-datetime/ |
| 2262 | _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0) | 2286 | _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0) |
| 2263 | log.debug('timedelta days=%d' % (value//(10*1000000*3600*24))) | 2287 | log.debug('timedelta days=%d' % (value//(10*1000000*3600*24))) |
| 2264 | value = _FILETIME_null_date + datetime.timedelta(microseconds=value//10) | 2288 | value = _FILETIME_null_date + datetime.timedelta(microseconds=value//10) |
| @@ -2272,12 +2296,12 @@ class OleFileIO: | @@ -2272,12 +2296,12 @@ class OleFileIO: | ||
| 2272 | value = _clsid(s[offset+4:offset+20]) | 2296 | value = _clsid(s[offset+4:offset+20]) |
| 2273 | elif property_type == VT_CF: | 2297 | elif property_type == VT_CF: |
| 2274 | # PropertyIdentifier or ClipboardData?? | 2298 | # PropertyIdentifier or ClipboardData?? |
| 2275 | - # see http://msdn.microsoft.com/en-us/library/dd941945.aspx | 2299 | + # see https://msdn.microsoft.com/en-us/library/dd941945.aspx |
| 2276 | count = i32(s, offset+4) | 2300 | count = i32(s, offset+4) |
| 2277 | value = s[offset+8:offset+8+count] | 2301 | value = s[offset+8:offset+8+count] |
| 2278 | elif property_type == VT_BOOL: | 2302 | elif property_type == VT_BOOL: |
| 2279 | # VARIANT_BOOL, 16 bits bool, 0x0000=Fals, 0xFFFF=True | 2303 | # VARIANT_BOOL, 16 bits bool, 0x0000=Fals, 0xFFFF=True |
| 2280 | - # see http://msdn.microsoft.com/en-us/library/cc237864.aspx | 2304 | + # see https://msdn.microsoft.com/en-us/library/cc237864.aspx |
| 2281 | value = bool(i16(s, offset+4)) | 2305 | value = bool(i16(s, offset+4)) |
| 2282 | else: | 2306 | else: |
| 2283 | value = None # everything else yields "None" | 2307 | value = None # everything else yields "None" |
| @@ -2285,13 +2309,13 @@ class OleFileIO: | @@ -2285,13 +2309,13 @@ class OleFileIO: | ||
| 2285 | 2309 | ||
| 2286 | # missing: VT_EMPTY, VT_NULL, VT_R4, VT_R8, VT_CY, VT_DATE, | 2310 | # missing: VT_EMPTY, VT_NULL, VT_R4, VT_R8, VT_CY, VT_DATE, |
| 2287 | # VT_DECIMAL, VT_I1, VT_I8, VT_UI8, | 2311 | # VT_DECIMAL, VT_I1, VT_I8, VT_UI8, |
| 2288 | - # see http://msdn.microsoft.com/en-us/library/dd942033.aspx | 2312 | + # see https://msdn.microsoft.com/en-us/library/dd942033.aspx |
| 2289 | 2313 | ||
| 2290 | # FIXME: add support for VT_VECTOR | 2314 | # FIXME: add support for VT_VECTOR |
| 2291 | # VT_VECTOR is a 32 uint giving the number of items, followed by | 2315 | # VT_VECTOR is a 32 uint giving the number of items, followed by |
| 2292 | # the items in sequence. The VT_VECTOR value is combined with the | 2316 | # the items in sequence. The VT_VECTOR value is combined with the |
| 2293 | # type of items, e.g. VT_VECTOR|VT_BSTR | 2317 | # type of items, e.g. VT_VECTOR|VT_BSTR |
| 2294 | - # see http://msdn.microsoft.com/en-us/library/dd942011.aspx | 2318 | + # see https://msdn.microsoft.com/en-us/library/dd942011.aspx |
| 2295 | 2319 | ||
| 2296 | #print("%08x" % property_id, repr(value), end=" ") | 2320 | #print("%08x" % property_id, repr(value), end=" ") |
| 2297 | #print("(%s)" % VT[i32(s, offset) & 0xFFF]) | 2321 | #print("(%s)" % VT[i32(s, offset) & 0xFFF]) |
| @@ -2347,7 +2371,7 @@ if __name__ == "__main__": | @@ -2347,7 +2371,7 @@ if __name__ == "__main__": | ||
| 2347 | 2371 | ||
| 2348 | (options, args) = parser.parse_args() | 2372 | (options, args) = parser.parse_args() |
| 2349 | 2373 | ||
| 2350 | - print('olefile version %s %s - http://www.decalage.info/en/olefile\n' % (__version__, __date__)) | 2374 | + print('olefile version %s %s - https://www.decalage.info/en/olefile\n' % (__version__, __date__)) |
| 2351 | 2375 | ||
| 2352 | # Print help if no arguments are passed | 2376 | # Print help if no arguments are passed |
| 2353 | if len(args) == 0: | 2377 | if len(args) == 0: |