Commit 68a910d1e14d78005ef475c2c32b37112aa98034

Authored by decalage2
1 parent 63db719f

olefile: synced changes from olefile repo

oletools/thirdparty/olefile/olefile.py
1 -#!/usr/bin/env python 1 +"""
  2 +olefile (formerly OleFileIO_PL)
2 3
3 -# olefile (formerly OleFileIO_PL)  
4 -#  
5 -# Module to read/write Microsoft OLE2 files (also called Structured Storage or  
6 -# Microsoft Compound Document File Format), such as Microsoft Office 97-2003  
7 -# documents, Image Composer and FlashPix files, Outlook messages, ...  
8 -# This version is compatible with Python 2.6+ and 3.x  
9 -#  
10 -# Project website: http://www.decalage.info/olefile  
11 -#  
12 -# olefile is copyright (c) 2005-2016 Philippe Lagadec (http://www.decalage.info)  
13 -#  
14 -# olefile is based on the OleFileIO module from the PIL library v1.1.6  
15 -# See: http://www.pythonware.com/products/pil/index.htm  
16 -#  
17 -# The Python Imaging Library (PIL) is  
18 -# Copyright (c) 1997-2005 by Secret Labs AB  
19 -# Copyright (c) 1995-2005 by Fredrik Lundh  
20 -#  
21 -# See source code and LICENSE.txt for information on usage and redistribution. 4 +Module to read/write Microsoft OLE2 files (also called Structured Storage or
  5 +Microsoft Compound Document File Format), such as Microsoft Office 97-2003
  6 +documents, Image Composer and FlashPix files, Outlook messages, ...
  7 +This version is compatible with Python 2.6+ and 3.x
  8 +
  9 +Project website: https://www.decalage.info/olefile
  10 +
  11 +olefile is copyright (c) 2005-2017 Philippe Lagadec
  12 +(https://www.decalage.info)
22 13
  14 +olefile is based on the OleFileIO module from the PIL library v1.1.7
  15 +See: http://www.pythonware.com/products/pil/index.htm
  16 +and http://svn.effbot.org/public/tags/pil-1.1.7/PIL/OleFileIO.py
  17 +
  18 +The Python Imaging Library (PIL) is
  19 +Copyright (c) 1997-2009 by Secret Labs AB
  20 +Copyright (c) 1995-2009 by Fredrik Lundh
  21 +
  22 +See source code and LICENSE.txt for information on usage and redistribution.
  23 +"""
23 24
24 # Since OleFileIO_PL v0.30, only Python 2.6+ and 3.x is supported 25 # Since OleFileIO_PL v0.30, only Python 2.6+ and 3.x is supported
25 # This import enables print() as a function rather than a keyword 26 # This import enables print() as a function rather than a keyword
@@ -28,14 +29,10 @@ @@ -28,14 +29,10 @@
28 from __future__ import print_function # This version of olefile requires Python 2.6+ or 3.x. 29 from __future__ import print_function # This version of olefile requires Python 2.6+ or 3.x.
29 30
30 31
31 -__author__ = "Philippe Lagadec"  
32 -__date__ = "2016-04-26"  
33 -__version__ = '0.44'  
34 -  
35 #--- LICENSE ------------------------------------------------------------------ 32 #--- LICENSE ------------------------------------------------------------------
36 33
37 -# olefile (formerly OleFileIO_PL) is copyright (c) 2005-2016 Philippe Lagadec  
38 -# (http://www.decalage.info) 34 +# olefile (formerly OleFileIO_PL) is copyright (c) 2005-2017 Philippe Lagadec
  35 +# (https://www.decalage.info)
39 # 36 #
40 # All rights reserved. 37 # All rights reserved.
41 # 38 #
@@ -66,8 +63,8 @@ __version__ = '0.44' @@ -66,8 +63,8 @@ __version__ = '0.44'
66 # Imaging Library (PIL) published by Fredrik Lundh under the following license: 63 # Imaging Library (PIL) published by Fredrik Lundh under the following license:
67 64
68 # The Python Imaging Library (PIL) is 65 # The Python Imaging Library (PIL) is
69 -# Copyright (c) 1997-2005 by Secret Labs AB  
70 -# Copyright (c) 1995-2005 by Fredrik Lundh 66 +# Copyright (c) 1997-2009 by Secret Labs AB
  67 +# Copyright (c) 1995-2009 by Fredrik Lundh
71 # 68 #
72 # By obtaining, using, and/or copying this software and/or its associated 69 # By obtaining, using, and/or copying this software and/or its associated
73 # documentation, you agree that you have read, understood, and will comply with 70 # documentation, you agree that you have read, understood, and will comply with
@@ -138,7 +135,7 @@ __version__ = '0.44' @@ -138,7 +135,7 @@ __version__ = '0.44'
138 # 2009-12-11 v0.20 PL: - bugfix in OleFileIO.open when filename is not plain str 135 # 2009-12-11 v0.20 PL: - bugfix in OleFileIO.open when filename is not plain str
139 # 2010-01-22 v0.21 PL: - added support for big-endian CPUs such as PowerPC Macs 136 # 2010-01-22 v0.21 PL: - added support for big-endian CPUs such as PowerPC Macs
140 # 2012-02-16 v0.22 PL: - fixed bug in getproperties, patch by chuckleberryfinn 137 # 2012-02-16 v0.22 PL: - fixed bug in getproperties, patch by chuckleberryfinn
141 -# (https://bitbucket.org/decalage/olefileio_pl/issue/7) 138 +# (https://github.com/decalage2/olefile/issues/7)
142 # - added close method to OleFileIO (fixed issue #2) 139 # - added close method to OleFileIO (fixed issue #2)
143 # 2012-07-25 v0.23 PL: - added support for file-like objects (patch by mete0r_kr) 140 # 2012-07-25 v0.23 PL: - added support for file-like objects (patch by mete0r_kr)
144 # 2013-05-05 v0.24 PL: - getproperties: added conversion from filetime to python 141 # 2013-05-05 v0.24 PL: - getproperties: added conversion from filetime to python
@@ -196,6 +193,15 @@ __version__ = '0.44' @@ -196,6 +193,15 @@ __version__ = '0.44'
196 # 2016-04-27 - added support for incomplete streams and incorrect 193 # 2016-04-27 - added support for incomplete streams and incorrect
197 # directory entries (to read malformed documents) 194 # directory entries (to read malformed documents)
198 # 2016-05-04 - fixed slight bug in OleStream 195 # 2016-05-04 - fixed slight bug in OleStream
  196 +# 2016-11-27 DR: - added method to get the clsid of a storage/stream
  197 +# (Daniel Roethlisberger)
  198 +# 2017-05-31 v0.45 BS: - PR #114 from oletools to handle excessive number of
  199 +# properties:
  200 +# https://github.com/decalage2/oletools/pull/114
  201 +
  202 +__date__ = "2017-05-31"
  203 +__version__ = '0.45dev1'
  204 +__author__ = "Philippe Lagadec"
199 205
200 #----------------------------------------------------------------------------- 206 #-----------------------------------------------------------------------------
201 # TODO (for version 1.0): 207 # TODO (for version 1.0):
@@ -223,7 +229,7 @@ __version__ = '0.44' @@ -223,7 +229,7 @@ __version__ = '0.44'
223 # - see also original notes and FIXME below 229 # - see also original notes and FIXME below
224 # - remove all obsolete FIXMEs 230 # - remove all obsolete FIXMEs
225 # - OleMetadata: fix version attrib according to 231 # - OleMetadata: fix version attrib according to
226 -# http://msdn.microsoft.com/en-us/library/dd945671%28v=office.12%29.aspx 232 +# https://msdn.microsoft.com/en-us/library/dd945671%28v=office.12%29.aspx
227 233
228 # IDEAS: 234 # IDEAS:
229 # - in OleFileIO._open and OleStream, use size=None instead of 0x7FFFFFFF for 235 # - in OleFileIO._open and OleStream, use size=None instead of 0x7FFFFFFF for
@@ -238,8 +244,8 @@ __version__ = '0.44' @@ -238,8 +244,8 @@ __version__ = '0.44'
238 # - create a simple OLE explorer with wxPython 244 # - create a simple OLE explorer with wxPython
239 245
240 # FUTURE EVOLUTIONS to add write support: 246 # FUTURE EVOLUTIONS to add write support:
241 -# see issue #6 on Bitbucket:  
242 -# https://bitbucket.org/decalage/olefileio_pl/issue/6/improve-olefileio_pl-to-write-ole-files 247 +# see issue #6 on GitHub:
  248 +# https://github.com/decalage2/olefile/issues/6
243 249
244 #----------------------------------------------------------------------------- 250 #-----------------------------------------------------------------------------
245 # NOTES from PIL 1.1.6: 251 # NOTES from PIL 1.1.6:
@@ -268,6 +274,10 @@ __version__ = '0.44' @@ -268,6 +274,10 @@ __version__ = '0.44'
268 274
269 #------------------------------------------------------------------------------ 275 #------------------------------------------------------------------------------
270 276
  277 +__all__ = ['isOleFile', 'OleFileIO', 'OleMetadata', 'enable_logging',
  278 + 'MAGIC', 'STGTY_EMPTY',
  279 + 'STGTY_STREAM', 'STGTY_STORAGE', 'STGTY_ROOT', 'STGTY_PROPERTY',
  280 + 'STGTY_LOCKBYTES', 'MINIMAL_OLEFILE_SIZE',]
271 281
272 import io 282 import io
273 import sys 283 import sys
@@ -317,17 +327,10 @@ else: @@ -317,17 +327,10 @@ else:
317 327
318 #[PL] These workarounds were inspired from the Path module 328 #[PL] These workarounds were inspired from the Path module
319 # (see http://www.jorendorff.com/articles/python/path/) 329 # (see http://www.jorendorff.com/articles/python/path/)
320 -#TODO: test with old Python versions  
321 -  
322 -# Pre-2.3 workaround for basestring.  
323 try: 330 try:
324 basestring 331 basestring
325 except NameError: 332 except NameError:
326 - try:  
327 - # is Unicode supported (Python >2.0 or >1.6 ?)  
328 - basestring = (str, unicode)  
329 - except NameError:  
330 - basestring = str 333 + basestring = str
331 334
332 #[PL] Experimental setting: if True, OLE filenames will be kept in Unicode 335 #[PL] Experimental setting: if True, OLE filenames will be kept in Unicode
333 # if False (default PIL behaviour), all filenames are converted to Latin-1. 336 # if False (default PIL behaviour), all filenames are converted to Latin-1.
@@ -395,27 +398,27 @@ def enable_logging(): @@ -395,27 +398,27 @@ def enable_logging():
395 398
396 #=== CONSTANTS =============================================================== 399 #=== CONSTANTS ===============================================================
397 400
398 -# magic bytes that should be at the beginning of every OLE file: 401 +#: magic bytes that should be at the beginning of every OLE file:
399 MAGIC = b'\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1' 402 MAGIC = b'\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1'
400 403
401 #[PL]: added constants for Sector IDs (from AAF specifications) 404 #[PL]: added constants for Sector IDs (from AAF specifications)
402 -MAXREGSECT = 0xFFFFFFFA # (-6) maximum SECT  
403 -DIFSECT = 0xFFFFFFFC # (-4) denotes a DIFAT sector in a FAT  
404 -FATSECT = 0xFFFFFFFD # (-3) denotes a FAT sector in a FAT  
405 -ENDOFCHAIN = 0xFFFFFFFE # (-2) end of a virtual stream chain  
406 -FREESECT = 0xFFFFFFFF # (-1) unallocated sector 405 +MAXREGSECT = 0xFFFFFFFA #: (-6) maximum SECT
  406 +DIFSECT = 0xFFFFFFFC #: (-4) denotes a DIFAT sector in a FAT
  407 +FATSECT = 0xFFFFFFFD #: (-3) denotes a FAT sector in a FAT
  408 +ENDOFCHAIN = 0xFFFFFFFE #: (-2) end of a virtual stream chain
  409 +FREESECT = 0xFFFFFFFF #: (-1) unallocated sector
407 410
408 #[PL]: added constants for Directory Entry IDs (from AAF specifications) 411 #[PL]: added constants for Directory Entry IDs (from AAF specifications)
409 -MAXREGSID = 0xFFFFFFFA # (-6) maximum directory entry ID  
410 -NOSTREAM = 0xFFFFFFFF # (-1) unallocated directory entry 412 +MAXREGSID = 0xFFFFFFFA #: (-6) maximum directory entry ID
  413 +NOSTREAM = 0xFFFFFFFF #: (-1) unallocated directory entry
411 414
412 #[PL] object types in storage (from AAF specifications) 415 #[PL] object types in storage (from AAF specifications)
413 -STGTY_EMPTY = 0 # empty directory entry (according to OpenOffice.org doc)  
414 -STGTY_STORAGE = 1 # element is a storage object  
415 -STGTY_STREAM = 2 # element is a stream object  
416 -STGTY_LOCKBYTES = 3 # element is an ILockBytes object  
417 -STGTY_PROPERTY = 4 # element is an IPropertyStorage object  
418 -STGTY_ROOT = 5 # element is a root storage 416 +STGTY_EMPTY = 0 #: empty directory entry
  417 +STGTY_STORAGE = 1 #: element is a storage object
  418 +STGTY_STREAM = 2 #: element is a stream object
  419 +STGTY_LOCKBYTES = 3 #: element is an ILockBytes object
  420 +STGTY_PROPERTY = 4 #: element is an IPropertyStorage object
  421 +STGTY_ROOT = 5 #: element is a root storage
419 422
420 # Unknown size for a stream (used by OleStream): 423 # Unknown size for a stream (used by OleStream):
421 UNKNOWN_SIZE = 0x7FFFFFFF 424 UNKNOWN_SIZE = 0x7FFFFFFF
@@ -472,7 +475,13 @@ def isOleFile (filename): @@ -472,7 +475,13 @@ def isOleFile (filename):
472 """ 475 """
473 Test if a file is an OLE container (according to the magic bytes in its header). 476 Test if a file is an OLE container (according to the magic bytes in its header).
474 477
475 - :param filename: string-like or file-like object, OLE file to parse 478 + .. note::
  479 + This function only checks the first 8 bytes of the file, not the
  480 + rest of the OLE structure.
  481 +
  482 + .. versionadded:: 0.16
  483 +
  484 + :param filename: filename, contents or file-like object of the OLE file (string-like or file-like object)
476 485
477 - if filename is a string smaller than 1536 bytes, it is the path 486 - if filename is a string smaller than 1536 bytes, it is the path
478 of the file to open. (bytes or unicode string) 487 of the file to open. (bytes or unicode string)
@@ -481,7 +490,9 @@ def isOleFile (filename): @@ -481,7 +490,9 @@ def isOleFile (filename):
481 - if filename is a file-like object (with read and seek methods), 490 - if filename is a file-like object (with read and seek methods),
482 it is parsed as-is. 491 it is parsed as-is.
483 492
  493 + :type filename: bytes or str or unicode or file
484 :returns: True if OLE, False otherwise. 494 :returns: True if OLE, False otherwise.
  495 + :rtype: bool
485 """ 496 """
486 # check if filename is a string-like or file-like object: 497 # check if filename is a string-like or file-like object:
487 if hasattr(filename, 'read'): 498 if hasattr(filename, 'read'):
@@ -494,7 +505,8 @@ def isOleFile (filename): @@ -494,7 +505,8 @@ def isOleFile (filename):
494 header = filename[:len(MAGIC)] 505 header = filename[:len(MAGIC)]
495 else: 506 else:
496 # string-like object: filename of file on disk 507 # string-like object: filename of file on disk
497 - header = open(filename, 'rb').read(len(MAGIC)) 508 + with open(filename, 'rb') as fp:
  509 + header = fp.read(len(MAGIC))
498 if header == MAGIC: 510 if header == MAGIC:
499 return True 511 return True
500 else: 512 else:
@@ -511,8 +523,6 @@ else: @@ -511,8 +523,6 @@ else:
511 return c if c.__class__ is int else c[0] 523 return c if c.__class__ is int else c[0]
512 524
513 525
514 -#TODO: replace i16 and i32 with more readable struct.unpack equivalent?  
515 -  
516 def i16(c, o = 0): 526 def i16(c, o = 0):
517 """ 527 """
518 Converts a 2-bytes (16 bits) string to an integer. 528 Converts a 2-bytes (16 bits) string to an integer.
@@ -520,7 +530,7 @@ def i16(c, o = 0): @@ -520,7 +530,7 @@ def i16(c, o = 0):
520 :param c: string containing bytes to convert 530 :param c: string containing bytes to convert
521 :param o: offset of bytes to convert in string 531 :param o: offset of bytes to convert in string
522 """ 532 """
523 - return i8(c[o]) | (i8(c[o+1])<<8) 533 + return struct.unpack("<H", c[o:o+2])[0]
524 534
525 535
526 def i32(c, o = 0): 536 def i32(c, o = 0):
@@ -530,10 +540,7 @@ def i32(c, o = 0): @@ -530,10 +540,7 @@ def i32(c, o = 0):
530 :param c: string containing bytes to convert 540 :param c: string containing bytes to convert
531 :param o: offset of bytes to convert in string 541 :param o: offset of bytes to convert in string
532 """ 542 """
533 -## return int(ord(c[o])+(ord(c[o+1])<<8)+(ord(c[o+2])<<16)+(ord(c[o+3])<<24))  
534 -## # [PL]: added int() because "<<" gives long int since Python 2.4  
535 - # copied from Pillow's _binary:  
536 - return i8(c[o]) | (i8(c[o+1])<<8) | (i8(c[o+2])<<16) | (i8(c[o+3])<<24) 543 + return struct.unpack("<I", c[o:o+4])[0]
537 544
538 545
539 def _clsid(clsid): 546 def _clsid(clsid):
@@ -558,7 +565,7 @@ def filetime2datetime(filetime): @@ -558,7 +565,7 @@ def filetime2datetime(filetime):
558 convert FILETIME (64 bits int) to Python datetime.datetime 565 convert FILETIME (64 bits int) to Python datetime.datetime
559 """ 566 """
560 # TODO: manage exception when microseconds is too large 567 # TODO: manage exception when microseconds is too large
561 - # inspired from http://code.activestate.com/recipes/511425-filetime-to-datetime/ 568 + # inspired from https://code.activestate.com/recipes/511425-filetime-to-datetime/
562 _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0) 569 _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0)
563 #log.debug('timedelta days=%d' % (filetime//(10*1000000*3600*24))) 570 #log.debug('timedelta days=%d' % (filetime//(10*1000000*3600*24)))
564 return _FILETIME_null_date + datetime.timedelta(microseconds=filetime//10) 571 return _FILETIME_null_date + datetime.timedelta(microseconds=filetime//10)
@@ -585,17 +592,19 @@ class OleMetadata: @@ -585,17 +592,19 @@ class OleMetadata:
585 OLE file. 592 OLE file.
586 593
587 References for SummaryInformation stream: 594 References for SummaryInformation stream:
588 - - http://msdn.microsoft.com/en-us/library/dd942545.aspx  
589 - - http://msdn.microsoft.com/en-us/library/dd925819%28v=office.12%29.aspx  
590 - - http://msdn.microsoft.com/en-us/library/windows/desktop/aa380376%28v=vs.85%29.aspx  
591 - - http://msdn.microsoft.com/en-us/library/aa372045.aspx  
592 - - http://sedna-soft.de/summary-information-stream/  
593 - - http://poi.apache.org/apidocs/org/apache/poi/hpsf/SummaryInformation.html 595 +
  596 + - https://msdn.microsoft.com/en-us/library/dd942545.aspx
  597 + - https://msdn.microsoft.com/en-us/library/dd925819%28v=office.12%29.aspx
  598 + - https://msdn.microsoft.com/en-us/library/windows/desktop/aa380376%28v=vs.85%29.aspx
  599 + - https://msdn.microsoft.com/en-us/library/aa372045.aspx
  600 + - http://sedna-soft.de/articles/summary-information-stream/
  601 + - https://poi.apache.org/apidocs/org/apache/poi/hpsf/SummaryInformation.html
594 602
595 References for DocumentSummaryInformation stream: 603 References for DocumentSummaryInformation stream:
596 - - http://msdn.microsoft.com/en-us/library/dd945671%28v=office.12%29.aspx  
597 - - http://msdn.microsoft.com/en-us/library/windows/desktop/aa380374%28v=vs.85%29.aspx  
598 - - http://poi.apache.org/apidocs/org/apache/poi/hpsf/DocumentSummaryInformation.html 604 +
  605 + - https://msdn.microsoft.com/en-us/library/dd945671%28v=office.12%29.aspx
  606 + - https://msdn.microsoft.com/en-us/library/windows/desktop/aa380374%28v=vs.85%29.aspx
  607 + - https://poi.apache.org/apidocs/org/apache/poi/hpsf/DocumentSummaryInformation.html
599 608
600 new in version 0.25 609 new in version 0.25
601 """ 610 """
@@ -676,7 +685,7 @@ class OleMetadata: @@ -676,7 +685,7 @@ class OleMetadata:
676 def parse_properties(self, olefile): 685 def parse_properties(self, olefile):
677 """ 686 """
678 Parse standard properties of an OLE file, from the streams 687 Parse standard properties of an OLE file, from the streams
679 - "\x05SummaryInformation" and "\x05DocumentSummaryInformation", 688 + ``\\x05SummaryInformation`` and ``\\x05DocumentSummaryInformation``,
680 if present. 689 if present.
681 Properties are converted to strings, integers or python datetime objects. 690 Properties are converted to strings, integers or python datetime objects.
682 If a property is not present, its value is set to None. 691 If a property is not present, its value is set to None.
@@ -851,14 +860,14 @@ class OleStream(io.BytesIO): @@ -851,14 +860,14 @@ class OleStream(io.BytesIO):
851 elif unknown_size: 860 elif unknown_size:
852 # actual stream size was not known, now we know the size of read 861 # actual stream size was not known, now we know the size of read
853 # data: 862 # data:
854 - log.debug('Read data of length %d, the stream size was unkown' % len(data)) 863 + log.debug('Read data of length %d, the stream size was unknown' % len(data))
855 self.size = len(data) 864 self.size = len(data)
856 else: 865 else:
857 # read data is less than expected: 866 # read data is less than expected:
858 log.debug('Read data of length %d, less than expected stream size %d' % (len(data), size)) 867 log.debug('Read data of length %d, less than expected stream size %d' % (len(data), size))
859 # TODO: provide details in exception message 868 # TODO: provide details in exception message
860 - self.ole._raise_defect(DEFECT_INCORRECT, 'OLE stream size is less than declared')  
861 self.size = len(data) 869 self.size = len(data)
  870 + self.ole._raise_defect(DEFECT_INCORRECT, 'OLE stream size is less than declared')
862 # when all data is read in memory, BytesIO constructor is called 871 # when all data is read in memory, BytesIO constructor is called
863 io.BytesIO.__init__(self, data) 872 io.BytesIO.__init__(self, data)
864 # Then the OleStream object can be used as a read-only file object. 873 # Then the OleStream object can be used as a read-only file object.
@@ -1023,7 +1032,7 @@ class OleDirectoryEntry: @@ -1023,7 +1032,7 @@ class OleDirectoryEntry:
1023 Walk through red-black tree of children of this directory entry to add 1032 Walk through red-black tree of children of this directory entry to add
1024 all of them to the kids list. (recursive method) 1033 all of them to the kids list. (recursive method)
1025 1034
1026 - :param child_sid : index of child directory entry to use, or None when called 1035 + :param child_sid: index of child directory entry to use, or None when called
1027 first time for the root. (only used during recursion) 1036 first time for the root. (only used during recursion)
1028 """ 1037 """
1029 log.debug('append_kids: child_sid=%d' % child_sid) 1038 log.debug('append_kids: child_sid=%d' % child_sid)
@@ -1188,8 +1197,8 @@ class OleFileIO: @@ -1188,8 +1197,8 @@ class OleFileIO:
1188 """ 1197 """
1189 # minimal level for defects to be raised as exceptions: 1198 # minimal level for defects to be raised as exceptions:
1190 self._raise_defects_level = raise_defects 1199 self._raise_defects_level = raise_defects
1191 - # list of defects/issues not raised as exceptions:  
1192 - # tuples of (exception type, message) 1200 + #: list of defects/issues not raised as exceptions:
  1201 + #: tuples of (exception type, message)
1193 self.parsing_issues = [] 1202 self.parsing_issues = []
1194 self.write_mode = write_mode 1203 self.write_mode = write_mode
1195 self.path_encoding = path_encoding 1204 self.path_encoding = path_encoding
@@ -2081,6 +2090,21 @@ class OleFileIO: @@ -2081,6 +2090,21 @@ class OleFileIO:
2081 return False 2090 return False
2082 2091
2083 2092
  2093 + def getclsid(self, filename):
  2094 + """
  2095 + Return clsid of a stream/storage.
  2096 +
  2097 + :param filename: path of stream/storage in storage tree. (see openstream for
  2098 + syntax)
  2099 + :returns: Empty string if clsid is null, a printable representation of the clsid otherwise
  2100 +
  2101 + new in version 0.44
  2102 + """
  2103 + sid = self._find(filename)
  2104 + entry = self.direntries[sid]
  2105 + return entry.clsid
  2106 +
  2107 +
2084 def getmtime(self, filename): 2108 def getmtime(self, filename):
2085 """ 2109 """
2086 Return modification time of a stream/storage. 2110 Return modification time of a stream/storage.
@@ -2203,8 +2227,8 @@ class OleFileIO: @@ -2203,8 +2227,8 @@ class OleFileIO:
2203 2227
2204 # clamp num_props based on the data length 2228 # clamp num_props based on the data length
2205 num_props = min(num_props, len(s) / 8) 2229 num_props = min(num_props, len(s) / 8)
2206 -  
2207 - for i in xrange(num_props): 2230 +
  2231 + for i in iterrange(num_props):
2208 property_id = 0 # just in case of an exception 2232 property_id = 0 # just in case of an exception
2209 try: 2233 try:
2210 property_id = i32(s, 8+i*8) 2234 property_id = i32(s, 8+i*8)
@@ -2225,12 +2249,12 @@ class OleFileIO: @@ -2225,12 +2249,12 @@ class OleFileIO:
2225 elif property_type in (VT_I4, VT_INT, VT_ERROR): 2249 elif property_type in (VT_I4, VT_INT, VT_ERROR):
2226 # VT_I4: 32-bit signed integer 2250 # VT_I4: 32-bit signed integer
2227 # VT_ERROR: HRESULT, similar to 32-bit signed integer, 2251 # VT_ERROR: HRESULT, similar to 32-bit signed integer,
2228 - # see http://msdn.microsoft.com/en-us/library/cc230330.aspx 2252 + # see https://msdn.microsoft.com/en-us/library/cc230330.aspx
2229 value = i32(s, offset+4) 2253 value = i32(s, offset+4)
2230 elif property_type in (VT_UI4, VT_UINT): # 4-byte unsigned integer 2254 elif property_type in (VT_UI4, VT_UINT): # 4-byte unsigned integer
2231 value = i32(s, offset+4) # FIXME 2255 value = i32(s, offset+4) # FIXME
2232 elif property_type in (VT_BSTR, VT_LPSTR): 2256 elif property_type in (VT_BSTR, VT_LPSTR):
2233 - # CodePageString, see http://msdn.microsoft.com/en-us/library/dd942354.aspx 2257 + # CodePageString, see https://msdn.microsoft.com/en-us/library/dd942354.aspx
2234 # size is a 32 bits integer, including the null terminator, and 2258 # size is a 32 bits integer, including the null terminator, and
2235 # possibly trailing or embedded null chars 2259 # possibly trailing or embedded null chars
2236 #TODO: if codepage is unicode, the string should be converted as such 2260 #TODO: if codepage is unicode, the string should be converted as such
@@ -2240,12 +2264,12 @@ class OleFileIO: @@ -2240,12 +2264,12 @@ class OleFileIO:
2240 value = value.replace(b'\x00', b'') 2264 value = value.replace(b'\x00', b'')
2241 elif property_type == VT_BLOB: 2265 elif property_type == VT_BLOB:
2242 # binary large object (BLOB) 2266 # binary large object (BLOB)
2243 - # see http://msdn.microsoft.com/en-us/library/dd942282.aspx 2267 + # see https://msdn.microsoft.com/en-us/library/dd942282.aspx
2244 count = i32(s, offset+4) 2268 count = i32(s, offset+4)
2245 value = s[offset+8:offset+8+count] 2269 value = s[offset+8:offset+8+count]
2246 elif property_type == VT_LPWSTR: 2270 elif property_type == VT_LPWSTR:
2247 # UnicodeString 2271 # UnicodeString
2248 - # see http://msdn.microsoft.com/en-us/library/dd942313.aspx 2272 + # see https://msdn.microsoft.com/en-us/library/dd942313.aspx
2249 # "the string should NOT contain embedded or additional trailing 2273 # "the string should NOT contain embedded or additional trailing
2250 # null characters." 2274 # null characters."
2251 count = i32(s, offset+4) 2275 count = i32(s, offset+4)
@@ -2258,7 +2282,7 @@ class OleFileIO: @@ -2258,7 +2282,7 @@ class OleFileIO:
2258 log.debug('Converting property #%d to python datetime, value=%d=%fs' 2282 log.debug('Converting property #%d to python datetime, value=%d=%fs'
2259 %(property_id, value, float(value)/10000000)) 2283 %(property_id, value, float(value)/10000000))
2260 # convert FILETIME to Python datetime.datetime 2284 # convert FILETIME to Python datetime.datetime
2261 - # inspired from http://code.activestate.com/recipes/511425-filetime-to-datetime/ 2285 + # inspired from https://code.activestate.com/recipes/511425-filetime-to-datetime/
2262 _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0) 2286 _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0)
2263 log.debug('timedelta days=%d' % (value//(10*1000000*3600*24))) 2287 log.debug('timedelta days=%d' % (value//(10*1000000*3600*24)))
2264 value = _FILETIME_null_date + datetime.timedelta(microseconds=value//10) 2288 value = _FILETIME_null_date + datetime.timedelta(microseconds=value//10)
@@ -2272,12 +2296,12 @@ class OleFileIO: @@ -2272,12 +2296,12 @@ class OleFileIO:
2272 value = _clsid(s[offset+4:offset+20]) 2296 value = _clsid(s[offset+4:offset+20])
2273 elif property_type == VT_CF: 2297 elif property_type == VT_CF:
2274 # PropertyIdentifier or ClipboardData?? 2298 # PropertyIdentifier or ClipboardData??
2275 - # see http://msdn.microsoft.com/en-us/library/dd941945.aspx 2299 + # see https://msdn.microsoft.com/en-us/library/dd941945.aspx
2276 count = i32(s, offset+4) 2300 count = i32(s, offset+4)
2277 value = s[offset+8:offset+8+count] 2301 value = s[offset+8:offset+8+count]
2278 elif property_type == VT_BOOL: 2302 elif property_type == VT_BOOL:
2279 # VARIANT_BOOL, 16 bits bool, 0x0000=Fals, 0xFFFF=True 2303 # VARIANT_BOOL, 16 bits bool, 0x0000=Fals, 0xFFFF=True
2280 - # see http://msdn.microsoft.com/en-us/library/cc237864.aspx 2304 + # see https://msdn.microsoft.com/en-us/library/cc237864.aspx
2281 value = bool(i16(s, offset+4)) 2305 value = bool(i16(s, offset+4))
2282 else: 2306 else:
2283 value = None # everything else yields "None" 2307 value = None # everything else yields "None"
@@ -2285,13 +2309,13 @@ class OleFileIO: @@ -2285,13 +2309,13 @@ class OleFileIO:
2285 2309
2286 # missing: VT_EMPTY, VT_NULL, VT_R4, VT_R8, VT_CY, VT_DATE, 2310 # missing: VT_EMPTY, VT_NULL, VT_R4, VT_R8, VT_CY, VT_DATE,
2287 # VT_DECIMAL, VT_I1, VT_I8, VT_UI8, 2311 # VT_DECIMAL, VT_I1, VT_I8, VT_UI8,
2288 - # see http://msdn.microsoft.com/en-us/library/dd942033.aspx 2312 + # see https://msdn.microsoft.com/en-us/library/dd942033.aspx
2289 2313
2290 # FIXME: add support for VT_VECTOR 2314 # FIXME: add support for VT_VECTOR
2291 # VT_VECTOR is a 32 uint giving the number of items, followed by 2315 # VT_VECTOR is a 32 uint giving the number of items, followed by
2292 # the items in sequence. The VT_VECTOR value is combined with the 2316 # the items in sequence. The VT_VECTOR value is combined with the
2293 # type of items, e.g. VT_VECTOR|VT_BSTR 2317 # type of items, e.g. VT_VECTOR|VT_BSTR
2294 - # see http://msdn.microsoft.com/en-us/library/dd942011.aspx 2318 + # see https://msdn.microsoft.com/en-us/library/dd942011.aspx
2295 2319
2296 #print("%08x" % property_id, repr(value), end=" ") 2320 #print("%08x" % property_id, repr(value), end=" ")
2297 #print("(%s)" % VT[i32(s, offset) & 0xFFF]) 2321 #print("(%s)" % VT[i32(s, offset) & 0xFFF])
@@ -2347,7 +2371,7 @@ if __name__ == &quot;__main__&quot;: @@ -2347,7 +2371,7 @@ if __name__ == &quot;__main__&quot;:
2347 2371
2348 (options, args) = parser.parse_args() 2372 (options, args) = parser.parse_args()
2349 2373
2350 - print('olefile version %s %s - http://www.decalage.info/en/olefile\n' % (__version__, __date__)) 2374 + print('olefile version %s %s - https://www.decalage.info/en/olefile\n' % (__version__, __date__))
2351 2375
2352 # Print help if no arguments are passed 2376 # Print help if no arguments are passed
2353 if len(args) == 0: 2377 if len(args) == 0: