Commit cf40e208977a0fc75189f4df3ba41e051af73d97

Authored by Sébastien Larinier
1 parent c7590d7d

olefile2 compliant python 3.5

oletools/oletimes.py
... ... @@ -94,6 +94,6 @@ for obj in ole.listdir(streams=True, storages=True):
94 94 #print '- %s: mtime=%s ctime=%s' % (repr('/'.join(obj)), ole.getmtime(obj), ole.getctime(obj))
95 95 t.add_row((repr('/'.join(obj)), dt2str(ole.getmtime(obj)), dt2str(ole.getctime(obj))))
96 96  
97   -print t
  97 +print(t)
98 98  
99 99 ole.close()
... ...
oletools/thirdparty/olefile/olefile2.py
... ... @@ -209,7 +209,12 @@ __version__ = '0.40py2'
209 209  
210 210 #------------------------------------------------------------------------------
211 211  
212   -import string, StringIO, struct, array, os.path, sys, datetime
  212 +import string, struct, array, os.path, sys, datetime
  213 +
  214 +try:
  215 + import StringIO
  216 +except:
  217 + from _io import StringIO as StringIO
213 218  
214 219 #[PL] Define explicitly the public API to avoid private objects in pydoc:
215 220 __all__ = ['OleFileIO', 'isOleFile']
... ... @@ -222,7 +227,7 @@ elif array.array('I').itemsize == 4:
222 227 # on 64 bits platforms, integers in an array are 32 bits:
223 228 UINT32 = 'I'
224 229 else:
225   - raise ValueError, 'Need to fix a bug with 32 bit arrays, please contact author...'
  230 + raise(ValueError, 'Need to fix a bug with 32 bit arrays, please contact author...')
226 231  
227 232  
228 233 #[PL] These workarounds were inspired from the Path module
... ... @@ -243,7 +248,7 @@ except NameError:
243 248 # is Unicode supported (Python >2.0 or >1.6 ?)
244 249 basestring = (str, unicode)
245 250 except NameError:
246   - basestring = str
  251 + basestring = str
247 252  
248 253 #[PL] Experimental setting: if True, OLE filenames will be kept in Unicode
249 254 # if False (default PIL behaviour), all filenames are converted to Latin-1.
... ... @@ -253,7 +258,7 @@ KEEP_UNICODE_NAMES = False
253 258 # command line to change it.
254 259 DEBUG_MODE = False
255 260 def debug_print(msg):
256   - print msg
  261 + print(msg)
257 262 def debug_pass(msg):
258 263 pass
259 264 debug = debug_pass
... ... @@ -274,15 +279,15 @@ def set_debug_mode(debug_mode):
274 279 MAGIC = '\320\317\021\340\241\261\032\341'
275 280  
276 281 #[PL]: added constants for Sector IDs (from AAF specifications)
277   -MAXREGSECT = 0xFFFFFFFAL; # maximum SECT
278   -DIFSECT = 0xFFFFFFFCL; # (-4) denotes a DIFAT sector in a FAT
279   -FATSECT = 0xFFFFFFFDL; # (-3) denotes a FAT sector in a FAT
280   -ENDOFCHAIN = 0xFFFFFFFEL; # (-2) end of a virtual stream chain
281   -FREESECT = 0xFFFFFFFFL; # (-1) unallocated sector
  282 +MAXREGSECT = 0xFFFFFFFA; # maximum SECT
  283 +DIFSECT = 0xFFFFFFFC; # (-4) denotes a DIFAT sector in a FAT
  284 +FATSECT = 0xFFFFFFFD; # (-3) denotes a FAT sector in a FAT
  285 +ENDOFCHAIN = 0xFFFFFFFE; # (-2) end of a virtual stream chain
  286 +FREESECT = 0xFFFFFFFF; # (-1) unallocated sector
282 287  
283 288 #[PL]: added constants for Directory Entry IDs (from AAF specifications)
284   -MAXREGSID = 0xFFFFFFFAL; # maximum directory entry ID
285   -NOSTREAM = 0xFFFFFFFFL; # (-1) unallocated directory entry
  289 +MAXREGSID = 0xFFFFFFFA; # maximum directory entry ID
  290 +NOSTREAM = 0xFFFFFFFF; # (-1) unallocated directory entry
286 291  
287 292 #[PL] object types in storage (from AAF specifications)
288 293 STGTY_EMPTY = 0 # empty directory entry (according to OpenOffice.org doc)
... ... @@ -392,7 +397,6 @@ def _clsid(clsid):
392 397  
393 398 try:
394 399 # is Unicode supported ?
395   - unicode
396 400  
397 401 def _unicode(s, errors='replace'):
398 402 """
... ... @@ -414,7 +418,7 @@ try:
414 418 return u.encode('latin_1', errors)
415 419 except:
416 420 # there was an error during Unicode to Latin-1 conversion:
417   - raise IOError, 'incorrect Unicode name'
  421 + raise(IOError, 'incorrect Unicode name')
418 422  
419 423 except NameError:
420 424 def _unicode(s, errors='replace'):
... ... @@ -587,14 +591,14 @@ class OleMetadata:
587 591 """
588 592 Dump all metadata, for debugging purposes.
589 593 """
590   - print 'Properties from SummaryInformation stream:'
  594 + print('Properties from SummaryInformation stream:')
591 595 for prop in self.SUMMARY_ATTRIBS:
592 596 value = getattr(self, prop)
593   - print '- %s: %s' % (prop, repr(value))
594   - print 'Properties from DocumentSummaryInformation stream:'
  597 + print('- %s: %s' % (prop, repr(value)))
  598 + print('Properties from DocumentSummaryInformation stream:')
595 599 for prop in self.DOCSUM_ATTRIBS:
596 600 value = getattr(self, prop)
597   - print '- %s: %s' % (prop, repr(value))
  601 + print('- %s: %s' % (prop, repr(value)))
598 602  
599 603  
600 604 #--- _OleStream ---------------------------------------------------------------
... ... @@ -651,7 +655,7 @@ class _OleStream(StringIO.StringIO):
651 655 # This number should (at least) be less than the total number of
652 656 # sectors in the given FAT:
653 657 if nb_sectors > len(fat):
654   - raise IOError, 'malformed OLE document, stream too large'
  658 + raise(IOError, 'malformed OLE document, stream too large')
655 659 # optimization(?): data is first a list of strings, and join() is called
656 660 # at the end to concatenate all in one string.
657 661 # (this may not be really useful with recent Python versions)
... ... @@ -659,10 +663,10 @@ class _OleStream(StringIO.StringIO):
659 663 # if size is zero, then first sector index should be ENDOFCHAIN:
660 664 if size == 0 and sect != ENDOFCHAIN:
661 665 debug('size == 0 and sect != ENDOFCHAIN:')
662   - raise IOError, 'incorrect OLE sector index for empty stream'
  666 + raise(IOError, 'incorrect OLE sector index for empty stream')
663 667 #[PL] A fixed-length for loop is used instead of an undefined while
664 668 # loop to avoid DoS attacks:
665   - for i in xrange(nb_sectors):
  669 + for i in range(nb_sectors):
666 670 # Sector index may be ENDOFCHAIN, but only if size was unknown
667 671 if sect == ENDOFCHAIN:
668 672 if unknown_size:
... ... @@ -670,7 +674,7 @@ class _OleStream(StringIO.StringIO):
670 674 else:
671 675 # else this means that the stream is smaller than declared:
672 676 debug('sect=ENDOFCHAIN before expected size')
673   - raise IOError, 'incomplete OLE stream'
  677 + raise(IOError, 'incomplete OLE stream')
674 678 # sector index should be within FAT:
675 679 if sect<0 or sect>=len(fat):
676 680 debug('sect=%d (%X) / len(fat)=%d' % (sect, sect, len(fat)))
... ... @@ -680,7 +684,7 @@ class _OleStream(StringIO.StringIO):
680 684 ## f.write(tmp_data)
681 685 ## f.close()
682 686 ## debug('data read so far: %d bytes' % len(tmp_data))
683   - raise IOError, 'incorrect OLE FAT, sector index out of range'
  687 + raise(IOError, 'incorrect OLE FAT, sector index out of range')
684 688 #TODO: merge this code with OleFileIO.getsect() ?
685 689 #TODO: check if this works with 4K sectors:
686 690 try:
... ... @@ -688,7 +692,7 @@ class _OleStream(StringIO.StringIO):
688 692 except:
689 693 debug('sect=%d, seek=%d, filesize=%d' %
690 694 (sect, offset+sectorsize*sect, filesize))
691   - raise IOError, 'OLE sector index out of range'
  695 + raise(IOError, 'OLE sector index out of range')
692 696 sector_data = fp.read(sectorsize)
693 697 # [PL] check if there was enough data:
694 698 # Note: if sector is the last of the file, sometimes it is not a
... ... @@ -698,17 +702,17 @@ class _OleStream(StringIO.StringIO):
698 702 debug('sect=%d / len(fat)=%d, seek=%d / filesize=%d, len read=%d' %
699 703 (sect, len(fat), offset+sectorsize*sect, filesize, len(sector_data)))
700 704 debug('seek+len(read)=%d' % (offset+sectorsize*sect+len(sector_data)))
701   - raise IOError, 'incomplete OLE sector'
  705 + raise(IOError, 'incomplete OLE sector')
702 706 data.append(sector_data)
703 707 # jump to next sector in the FAT:
704 708 try:
705 709 sect = fat[sect]
706 710 except IndexError:
707 711 # [PL] if pointer is out of the FAT an exception is raised
708   - raise IOError, 'incorrect OLE FAT, sector index out of range'
  712 + raise(IOError, 'incorrect OLE FAT, sector index out of range')
709 713 #[PL] Last sector should be a "end of chain" marker:
710 714 if sect != ENDOFCHAIN:
711   - raise IOError, 'incorrect last sector index in OLE stream'
  715 + raise(IOError, 'incorrect last sector index in OLE stream')
712 716 data = string.join(data, "")
713 717 # Data is truncated to the actual stream size:
714 718 if len(data) >= size:
... ... @@ -722,7 +726,7 @@ class _OleStream(StringIO.StringIO):
722 726 else:
723 727 # read data is less than expected:
724 728 debug('len(data)=%d, size=%d' % (len(data), size))
725   - raise IOError, 'OLE stream size is less than declared'
  729 + raise (IOError, 'OLE stream size is less than declared')
726 730 # when all data is read in memory, StringIO constructor is called
727 731 StringIO.StringIO.__init__(self, data)
728 732 # Then the _OleStream object can be used as a read-only file object.
... ... @@ -829,13 +833,13 @@ class _OleDirectoryEntry:
829 833 # sectors, BUT apparently some implementations set it as 0xFFFFFFFFL, 1
830 834 # or some other value so it cannot be raised as a defect in general:
831 835 if olefile.sectorsize == 512:
832   - if sizeHigh != 0 and sizeHigh != 0xFFFFFFFFL:
  836 + if sizeHigh != 0 and sizeHigh != 0xFFFFFFFF:
833 837 debug('sectorsize=%d, sizeLow=%d, sizeHigh=%d (%X)' %
834 838 (olefile.sectorsize, sizeLow, sizeHigh, sizeHigh))
835 839 olefile._raise_defect(DEFECT_UNSURE, 'incorrect OLE stream size')
836 840 self.size = sizeLow
837 841 else:
838   - self.size = sizeLow + (long(sizeHigh)<<32)
  842 + self.size = sizeLow + (int(sizeHigh)<<32)
839 843 debug(' - size: %d (sizeLow=%d, sizeHigh=%d)' % (self.size, sizeLow, sizeHigh))
840 844  
841 845 self.clsid = _clsid(clsid)
... ... @@ -925,7 +929,7 @@ class _OleDirectoryEntry:
925 929  
926 930 def __cmp__(self, other):
927 931 "Compare entries by name"
928   - return cmp(self.name, other.name)
  932 + return __lt__(self.name, other.name)
929 933 #TODO: replace by the same function as MS implementation ?
930 934 # (order by name length first, then case-insensitive order)
931 935  
... ... @@ -934,12 +938,13 @@ class _OleDirectoryEntry:
934 938 "Dump this entry, and all its subentries (for debug purposes only)"
935 939 TYPES = ["(invalid)", "(storage)", "(stream)", "(lockbytes)",
936 940 "(property)", "(root)"]
937   - print " "*tab + repr(self.name), TYPES[self.entry_type],
  941 + print(" "*tab + repr(self.name), TYPES[self.entry_type]),
938 942 if self.entry_type in (STGTY_STREAM, STGTY_ROOT):
939   - print self.size, "bytes",
940   - print
  943 + print(self.size),\
  944 + print("bytes"),
  945 + print("\n")
941 946 if self.entry_type in (STGTY_STORAGE, STGTY_ROOT) and self.clsid:
942   - print " "*tab + "{%s}" % self.clsid
  947 + print(" "*tab + "{%s}" % self.clsid)
943 948  
944 949 for kid in self.kids:
945 950 kid.dump(tab + 2)
... ... @@ -1038,7 +1043,7 @@ class OleFileIO:
1038 1043 """
1039 1044 # added by [PL]
1040 1045 if defect_level >= self._raise_defects_level:
1041   - raise exception_type, message
  1046 + raise(exception_type, message)
1042 1047 else:
1043 1048 # just record the issue, no exception raised:
1044 1049 self.parsing_issues.append((exception_type, message))
... ... @@ -1148,7 +1153,7 @@ class OleFileIO:
1148 1153 ) = struct.unpack(fmt_header, header1)
1149 1154 debug( struct.unpack(fmt_header, header1))
1150 1155  
1151   - if self.Sig != '\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1':
  1156 + if self.Sig != b'\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1':
1152 1157 # OLE signature should always be present
1153 1158 self._raise_defect(DEFECT_FATAL, "incorrect OLE signature")
1154 1159 if self.clsid != '\x00'*16:
... ... @@ -1271,10 +1276,10 @@ class OleFileIO:
1271 1276 }
1272 1277 nbsect = len(fat)
1273 1278 nlines = (nbsect+VPL-1)/VPL
1274   - print "index",
  1279 + print("index"),
1275 1280 for i in range(VPL):
1276 1281 print ("%8X" % i),
1277   - print ""
  1282 + print("")
1278 1283 for l in range(nlines):
1279 1284 index = l*VPL
1280 1285 print ("%8X:" % (firstindex+index)),
... ... @@ -1289,8 +1294,8 @@ class OleFileIO:
1289 1294 nom = " --->"
1290 1295 else:
1291 1296 nom = "%8X" % sect
1292   - print nom,
1293   - print ""
  1297 + print(nom)
  1298 + print("")
1294 1299  
1295 1300  
1296 1301 def dumpsect(self, sector, firstindex=0):
... ... @@ -1301,10 +1306,10 @@ class OleFileIO:
1301 1306 tab = array.array(UINT32, sector)
1302 1307 nbsect = len(tab)
1303 1308 nlines = (nbsect+VPL-1)/VPL
1304   - print "index",
  1309 + print("index"),
1305 1310 for i in range(VPL):
1306 1311 print ("%8X" % i),
1307   - print ""
  1312 + print("")
1308 1313 for l in range(nlines):
1309 1314 index = l*VPL
1310 1315 print ("%8X:" % (firstindex+index)),
... ... @@ -1313,8 +1318,8 @@ class OleFileIO:
1313 1318 break
1314 1319 sect = tab[i]
1315 1320 nom = "%8X" % sect
1316   - print nom,
1317   - print ""
  1321 + print(nom),
  1322 + print("")
1318 1323  
1319 1324 def sect2array(self, sect):
1320 1325 """
... ... @@ -1398,9 +1403,9 @@ class OleFileIO:
1398 1403 nb_difat = (self.csectFat-109 + 126)/127
1399 1404 debug( "nb_difat = %d" % nb_difat )
1400 1405 if self.csectDif != nb_difat:
1401   - raise IOError, 'incorrect DIFAT'
  1406 + raise(IOError, 'incorrect DIFAT')
1402 1407 isect_difat = self.sectDifStart
1403   - for i in xrange(nb_difat):
  1408 + for i in range(nb_difat):
1404 1409 debug( "DIFAT block %d, sector %X" % (i, isect_difat) )
1405 1410 #TODO: check if corresponding FAT SID = DIFSECT
1406 1411 sector_difat = self.getsect(isect_difat)
... ... @@ -1413,7 +1418,7 @@ class OleFileIO:
1413 1418 # checks:
1414 1419 if isect_difat not in [ENDOFCHAIN, FREESECT]:
1415 1420 # last DIFAT pointer value must be ENDOFCHAIN or FREESECT
1416   - raise IOError, 'incorrect end of DIFAT'
  1421 + raise(IOError, 'incorrect end of DIFAT')
1417 1422 ## if len(self.fat) != self.num_fat_sectors:
1418 1423 ## # FAT should contain num_fat_sectors blocks
1419 1424 ## print "FAT length: %d instead of %d" % (len(self.fat), self.num_fat_sectors)
... ... @@ -1653,7 +1658,7 @@ class OleFileIO:
1653 1658 if kid.name.lower() == name.lower():
1654 1659 break
1655 1660 else:
1656   - raise IOError, "file not found"
  1661 + raise(IOError, "file not found")
1657 1662 node = kid
1658 1663 return node.sid
1659 1664  
... ... @@ -1673,7 +1678,7 @@ class OleFileIO:
1673 1678 sid = self._find(filename)
1674 1679 entry = self.direntries[sid]
1675 1680 if entry.entry_type != STGTY_STREAM:
1676   - raise IOError, "this file is not a stream"
  1681 + raise(IOError, "this file is not a stream")
1677 1682 return self._open(entry.isectStart, entry.size)
1678 1683  
1679 1684  
... ... @@ -1755,7 +1760,7 @@ class OleFileIO:
1755 1760 entry = self.direntries[sid]
1756 1761 if entry.entry_type != STGTY_STREAM:
1757 1762 #TODO: Should it return zero instead of raising an exception ?
1758   - raise TypeError, 'object is not an OLE stream'
  1763 + raise(TypeError, 'object is not an OLE stream')
1759 1764 return entry.size
1760 1765  
1761 1766  
... ... @@ -1860,12 +1865,12 @@ class OleFileIO:
1860 1865 count = i32(s, offset+4)
1861 1866 value = _unicode(s[offset+8:offset+8+count*2])
1862 1867 elif type == VT_FILETIME:
1863   - value = long(i32(s, offset+4)) + (long(i32(s, offset+8))<<32)
  1868 + value = int(i32(s, offset+4)) + (int(i32(s, offset+8))<<32)
1864 1869 # FILETIME is a 64-bit int: "number of 100ns periods
1865 1870 # since Jan 1,1601".
1866 1871 if convert_time and id not in no_conversion:
1867 1872 debug('Converting property #%d to python datetime, value=%d=%fs'
1868   - %(id, value, float(value)/10000000L))
  1873 + %(id, value, float(value)/10000000))
1869 1874 # convert FILETIME to Python datetime.datetime
1870 1875 # inspired from http://code.activestate.com/recipes/511425-filetime-to-datetime/
1871 1876 _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0)
... ... @@ -1874,7 +1879,7 @@ class OleFileIO:
1874 1879 else:
1875 1880 # legacy code kept for backward compatibility: returns a
1876 1881 # number of seconds since Jan 1,1601
1877   - value = value / 10000000L # seconds
  1882 + value = value / 10000000 # seconds
1878 1883 elif type == VT_UI1: # 1-byte unsigned integer
1879 1884 value = ord(s[offset+4])
1880 1885 elif type == VT_CLSID:
... ... @@ -1939,8 +1944,8 @@ if __name__ == &quot;__main__&quot;:
1939 1944  
1940 1945 # [PL] display quick usage info if launched from command-line
1941 1946 if len(sys.argv) <= 1:
1942   - print __doc__
1943   - print """
  1947 + print(__doc__)
  1948 + print("""
1944 1949 Launched from command line, this script parses OLE files and prints info.
1945 1950  
1946 1951 Usage: olefile2.py [-d] [-c] <file> [file2 ...]
... ... @@ -1948,7 +1953,7 @@ Usage: olefile2.py [-d] [-c] &lt;file&gt; [file2 ...]
1948 1953 Options:
1949 1954 -d : debug mode (display a lot of debug information, for developers only)
1950 1955 -c : check all streams (for debugging purposes)
1951   -"""
  1956 +""")
1952 1957 sys.exit()
1953 1958  
1954 1959 check_streams = False
... ... @@ -1965,13 +1970,13 @@ Options:
1965 1970 continue
1966 1971  
1967 1972 ole = OleFileIO(filename)#, raise_defects=DEFECT_INCORRECT)
1968   - print "-" * 68
1969   - print filename
1970   - print "-" * 68
  1973 + print("-" * 68)
  1974 + print(filename)
  1975 + print("-" * 68)
1971 1976 ole.dumpdirectory()
1972 1977 for streamname in ole.listdir():
1973 1978 if streamname[-1][0] == "\005":
1974   - print streamname, ": properties"
  1979 + print( "%s : properties" % streamname)
1975 1980 props = ole.getproperties(streamname, convert_time=True)
1976 1981 props = props.items()
1977 1982 props.sort()
... ... @@ -1986,22 +1991,22 @@ Options:
1986 1991 if chr(c) in v:
1987 1992 v = '(binary data)'
1988 1993 break
1989   - print " ", k, v
  1994 + print(" ", k, v)
1990 1995  
1991 1996 if check_streams:
1992 1997 # Read all streams to check if there are errors:
1993   - print '\nChecking streams...'
  1998 + print('\nChecking streams...')
1994 1999 for streamname in ole.listdir():
1995 2000 # print name using repr() to convert binary chars to \xNN:
1996   - print '-', repr('/'.join(streamname)),'-',
  2001 + print('- %s -' % repr('/'.join(streamname)))
1997 2002 st_type = ole.get_type(streamname)
1998 2003 if st_type == STGTY_STREAM:
1999   - print 'size %d' % ole.get_size(streamname)
  2004 + print('size %d' % ole.get_size(streamname))
2000 2005 # just try to read stream in memory:
2001 2006 ole.openstream(streamname)
2002 2007 else:
2003   - print 'NOT a stream : type=%d' % st_type
2004   - print ''
  2008 + print('NOT a stream : type=%d' % st_type)
  2009 + print('')
2005 2010  
2006 2011 ## for streamname in ole.listdir():
2007 2012 ## # print name using repr() to convert binary chars to \xNN:
... ... @@ -2009,34 +2014,34 @@ Options:
2009 2014 ## print ole.getmtime(streamname)
2010 2015 ## print ''
2011 2016  
2012   - print 'Modification/Creation times of all directory entries:'
  2017 + print('Modification/Creation times of all directory entries:')
2013 2018 for entry in ole.direntries:
2014 2019 if entry is not None:
2015   - print '- %s: mtime=%s ctime=%s' % (entry.name,
2016   - entry.getmtime(), entry.getctime())
2017   - print ''
  2020 + print('- %s: mtime=%s ctime=%s' % (entry.name,
  2021 + entry.getmtime(), entry.getctime()))
  2022 + print('')
2018 2023  
2019 2024 # parse and display metadata:
2020 2025 meta = ole.get_metadata()
2021 2026 meta.dump()
2022   - print ''
  2027 + print('')
2023 2028 #[PL] Test a few new methods:
2024 2029 root = ole.get_rootentry_name()
2025   - print 'Root entry name: "%s"' % root
  2030 + print('Root entry name: "%s"' % root)
2026 2031 if ole.exists('worddocument'):
2027   - print "This is a Word document."
2028   - print "type of stream 'WordDocument':", ole.get_type('worddocument')
2029   - print "size :", ole.get_size('worddocument')
  2032 + print("This is a Word document.")
  2033 + print("type of stream 'WordDocument':", ole.get_type('worddocument'))
  2034 + print("size :", ole.get_size('worddocument'))
2030 2035 if ole.exists('macros/vba'):
2031   - print "This document may contain VBA macros."
  2036 + print("This document may contain VBA macros.")
2032 2037  
2033 2038 # print parsing issues:
2034   - print '\nNon-fatal issues raised during parsing:'
  2039 + print('\nNon-fatal issues raised during parsing:')
2035 2040 if ole.parsing_issues:
2036 2041 for exctype, msg in ole.parsing_issues:
2037   - print '- %s: %s' % (exctype.__name__, msg)
  2042 + print('- %s: %s' % (exctype.__name__, msg))
2038 2043 else:
2039   - print 'None'
  2044 + print('None')
2040 2045 ## except IOError, v:
2041 2046 ## print "***", "cannot read", file, "-", v
2042 2047  
... ...