Commit e28b20012629a29202510e05e8701942af7c7dbd

Authored by decalage2
1 parent 7e52ad1a

ftguess: reorganised Excel classes, added is_excel and is_word shortcuts

Showing 1 changed file with 57 additions and 21 deletions
oletools/ftguess.py
@@ -148,16 +148,23 @@ class FTYPE(object): @@ -148,16 +148,23 @@ class FTYPE(object):
148 Constants for file types 148 Constants for file types
149 """ 149 """
150 ZIP = 'Zip' 150 ZIP = 'Zip'
  151 + WORD = 'Word'
  152 + WORD6 = 'Word6'
151 WORD97 = 'Word97' 153 WORD97 = 'Word97'
152 WORD2007 = 'Word2007' 154 WORD2007 = 'Word2007'
153 - WORD2007_TEMPLATE = 'Word2007T'  
154 - WORD2007_MACRO = 'Word2007M'  
155 - WORD2007_TEMPLATE_MACRO = 'Word2007TM'  
156 - WORD6 = 'Word6' 155 + WORD2007_DOCX = 'Word2007_DOCX'
  156 + WORD2007_DOTX = 'Word2007_DOTX'
  157 + WORD2007_DOCM = 'Word2007_DOCM'
  158 + WORD2007_DOTM = 'Word2007_DOTM'
  159 + EXCEL = 'Excel'
  160 + EXCEL5 = 'Excel5'
157 EXCEL97 = 'Excel97' 161 EXCEL97 = 'Excel97'
158 EXCEL2007 = 'Excel2007' 162 EXCEL2007 = 'Excel2007'
159 - EXCEL2007_MACRO = 'Excel2007_XLSM'  
160 - EXCEL5 = 'Excel5' 163 + EXCEL2007_XLSX = 'Excel2007_XLSX'
  164 + EXCEL2007_XLSM = 'Excel2007_XLSM'
  165 + EXCEL2007_XLTX = 'Excel2007_XLTX'
  166 + EXCEL2007_XLTM = 'Excel2007_XLTM'
  167 + EXCEL2007_XLSB = 'Excel2007_XLSB'
161 # TODO: XLSB, DOCM, PPTM, PPSX, PPSM, ... 168 # TODO: XLSB, DOCM, PPTM, PPSX, PPSM, ...
162 RTF = 'RTF' 169 RTF = 'RTF'
163 HTML = 'HTML' 170 HTML = 'HTML'
@@ -444,6 +451,14 @@ class FType_Generic_OpenXML(FType_Base): @@ -444,6 +451,14 @@ class FType_Generic_OpenXML(FType_Base):
444 451
445 # --- WORD Formats --- 452 # --- WORD Formats ---
446 453
  454 +class FTYpe_Word(FType_Base):
  455 + 'Base class for all MS Word file types'
  456 + application = APP.MSWORD
  457 + name = 'MS Word (generic)'
  458 + longname = 'MS Word Document or Template (generic)'
  459 +
  460 +# TODO: all word FTypes should inherit from FType_Word
  461 +
447 class FType_Word97(FType_OLE_CLSID_Base): 462 class FType_Word97(FType_OLE_CLSID_Base):
448 application = APP.MSWORD 463 application = APP.MSWORD
449 filetype = FTYPE.WORD97 464 filetype = FTYPE.WORD97
@@ -469,44 +484,48 @@ class FType_Word6(FType_OLE_CLSID_Base): @@ -469,44 +484,48 @@ class FType_Word6(FType_OLE_CLSID_Base):
469 484
470 class FType_Word2007(FType_Generic_OpenXML): 485 class FType_Word2007(FType_Generic_OpenXML):
471 application = APP.MSWORD 486 application = APP.MSWORD
472 - filetype = FTYPE.WORD2007 487 + filetype = FTYPE.WORD2007_DOCX
473 name = 'MS Word 2007+ Document' 488 name = 'MS Word 2007+ Document'
474 longname = 'MS Word 2007+ Document (.docx)' 489 longname = 'MS Word 2007+ Document (.docx)'
475 extensions = ['docx'] 490 extensions = ['docx']
476 491
477 class FType_Word2007_Macro(FType_Generic_OpenXML): 492 class FType_Word2007_Macro(FType_Generic_OpenXML):
478 application = APP.MSWORD 493 application = APP.MSWORD
479 - filetype = FTYPE.WORD2007_MACRO 494 + filetype = FTYPE.WORD2007_DOCM
480 name = 'MS Word 2007+ Macro-Enabled Document' 495 name = 'MS Word 2007+ Macro-Enabled Document'
481 longname = 'MS Word 2007+ Macro-Enabled Document (.docm)' 496 longname = 'MS Word 2007+ Macro-Enabled Document (.docm)'
482 extensions = ['docm'] 497 extensions = ['docm']
483 498
484 class FType_Word2007_Template(FType_Generic_OpenXML): 499 class FType_Word2007_Template(FType_Generic_OpenXML):
485 application = APP.MSWORD 500 application = APP.MSWORD
486 - filetype = FTYPE.WORD2007_TEMPLATE 501 + filetype = FTYPE.WORD2007_DOTX
487 name = 'MS Word 2007+ Template' 502 name = 'MS Word 2007+ Template'
488 longname = 'MS Word 2007+ Template (.dotx)' 503 longname = 'MS Word 2007+ Template (.dotx)'
489 extensions = ['dotx'] 504 extensions = ['dotx']
490 505
491 class FType_Word2007_Template_Macro(FType_Generic_OpenXML): 506 class FType_Word2007_Template_Macro(FType_Generic_OpenXML):
492 application = APP.MSWORD 507 application = APP.MSWORD
493 - filetype = FTYPE.WORD2007_TEMPLATE_MACRO 508 + filetype = FTYPE.WORD2007_DOTM
494 name = 'MS Word 2007+ Macro-Enabled Template' 509 name = 'MS Word 2007+ Macro-Enabled Template'
495 longname = 'MS Word 2007+ Macro-Enabled Template (.dotm)' 510 longname = 'MS Word 2007+ Macro-Enabled Template (.dotm)'
496 extensions = ['dotm'] 511 extensions = ['dotm']
497 512
498 # --- EXCEL Formats --- 513 # --- EXCEL Formats ---
499 514
500 -class FType_Excel97(FType_OLE_CLSID_Base): 515 +class FTYpe_Excel(FType_Base):
  516 + 'Base class for all MS Excel file types'
501 application = APP.MSEXCEL 517 application = APP.MSEXCEL
  518 + name = 'MS Excel (generic)'
  519 + longname = 'MS Excel Workbook or Template (generic)'
  520 +
  521 +class FType_Excel97(FTYpe_Excel):
502 filetype = FTYPE.EXCEL97 522 filetype = FTYPE.EXCEL97
503 name = 'MS Excel 97 Workbook' 523 name = 'MS Excel 97 Workbook'
504 longname = 'MS Excel 97-2003 Workbook or Template' 524 longname = 'MS Excel 97-2003 Workbook or Template'
505 CLSIDS = ('00020820-0000-0000-C000-000000000046',) 525 CLSIDS = ('00020820-0000-0000-C000-000000000046',)
506 extensions = ['xls', 'xlt', 'xla'] 526 extensions = ['xls', 'xlt', 'xla']
507 527
508 -class FType_Excel5(FType_OLE_CLSID_Base):  
509 - application = APP.MSEXCEL 528 +class FType_Excel5(FTYpe_Excel):
510 filetype = FTYPE.EXCEL5 529 filetype = FTYPE.EXCEL5
511 name = 'MS Excel 5.0/95 Workbook' 530 name = 'MS Excel 5.0/95 Workbook'
512 longname = 'MS Excel 5.0/95 Workbook, Template or Add-in' 531 longname = 'MS Excel 5.0/95 Workbook, Template or Add-in'
@@ -514,18 +533,21 @@ class FType_Excel5(FType_OLE_CLSID_Base): @@ -514,18 +533,21 @@ class FType_Excel5(FType_OLE_CLSID_Base):
514 extensions = ['xls', 'xlt', 'xla'] 533 extensions = ['xls', 'xlt', 'xla']
515 # TODO: this CLSID is also used in Excel addins (.xla) saved by MS Excel 365 534 # TODO: this CLSID is also used in Excel addins (.xla) saved by MS Excel 365
516 535
517 -class FType_Excel2007_Workbook(FType_Generic_OpenXML):  
518 - application = APP.MSEXCEL  
519 - filetype = FTYPE.EXCEL2007 536 +class FTYpe_Excel2007(FTYpe_Excel):
  537 + 'Base class for all MS Excel 2007 file types'
  538 + name = 'MS Excel 2007+ (generic)'
  539 + longname = 'MS Excel 2007+ Workbook or Template (generic)'
  540 +
  541 +class FType_Excel2007_XLSX (FTYpe_Excel2007):
  542 + filetype = FTYPE.EXCEL2007_XLSX
520 name = 'MS Excel 2007+ Workbook' 543 name = 'MS Excel 2007+ Workbook'
521 longname = 'MS Excel 2007+ Workbook (.xlsx)' 544 longname = 'MS Excel 2007+ Workbook (.xlsx)'
522 extensions = ['xlsx'] 545 extensions = ['xlsx']
523 content_types = ('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',) 546 content_types = ('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',)
524 PUID = 'fmt/214' 547 PUID = 'fmt/214'
525 548
526 -class FType_Excel2007_Workbook_MacroEnabled(FType_Generic_OpenXML):  
527 - application = APP.MSEXCEL  
528 - filetype = FTYPE.EXCEL2007_MACRO 549 +class FType_Excel2007_XLSM (FTYpe_Excel2007):
  550 + filetype = FTYPE.EXCEL2007_XLSM
529 name = 'MS Excel 2007+ Macro-Enabled Workbook' 551 name = 'MS Excel 2007+ Macro-Enabled Workbook'
530 longname = 'MS Excel 2007+ Macro-Enabled Workbook (.xlsm)' 552 longname = 'MS Excel 2007+ Macro-Enabled Workbook (.xlsm)'
531 extensions = ['xlsm'] 553 extensions = ['xlsm']
@@ -550,8 +572,8 @@ openxml_ftypes = { @@ -550,8 +572,8 @@ openxml_ftypes = {
550 'application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml': FType_Word2007_Template, 572 'application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml': FType_Word2007_Template,
551 'application/vnd.ms-word.template.macroEnabledTemplate.main+xml': FType_Word2007_Template_Macro, 573 'application/vnd.ms-word.template.macroEnabledTemplate.main+xml': FType_Word2007_Template_Macro,
552 # EXCEL 574 # EXCEL
553 - 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml': FType_Excel2007_Workbook,  
554 - 'application/vnd.ms-excel.sheet.macroEnabled.main+xml': FType_Excel2007_Workbook_MacroEnabled, 575 + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml': FType_Excel2007_XLSX,
  576 + 'application/vnd.ms-excel.sheet.macroEnabled.main+xml': FType_Excel2007_XLSM,
555 'application/vnd.ms-excel.sheet.binary.macroEnabled.main': None, 577 'application/vnd.ms-excel.sheet.binary.macroEnabled.main': None,
556 } 578 }
557 579
@@ -628,6 +650,20 @@ class FileTypeGuesser(object): @@ -628,6 +650,20 @@ class FileTypeGuesser(object):
628 if self.zipfile is not None: 650 if self.zipfile is not None:
629 self.zipfile.close() 651 self.zipfile.close()
630 652
  653 + def is_word(self):
  654 + """
  655 + Shortcut to check if a file is an Excel workbook, template or add-in
  656 + :return: bool
  657 + """
  658 + return issubclass(self.ftype, FTYpe_Word)
  659 +
  660 + def is_excel(self):
  661 + """
  662 + Shortcut to check if a file is an Excel workbook, template or add-in
  663 + :return: bool
  664 + """
  665 + return issubclass(self.ftype, FTYpe_Excel)
  666 +
631 667
632 # === FUNCTIONS ============================================================== 668 # === FUNCTIONS ==============================================================
633 669