Commit e28b20012629a29202510e05e8701942af7c7dbd

Authored by decalage2
1 parent 7e52ad1a

ftguess: reorganised Excel classes, added is_excel and is_word shortcuts

Showing 1 changed file with 57 additions and 21 deletions
oletools/ftguess.py
... ... @@ -148,16 +148,23 @@ class FTYPE(object):
148 148 Constants for file types
149 149 """
150 150 ZIP = 'Zip'
  151 + WORD = 'Word'
  152 + WORD6 = 'Word6'
151 153 WORD97 = 'Word97'
152 154 WORD2007 = 'Word2007'
153   - WORD2007_TEMPLATE = 'Word2007T'
154   - WORD2007_MACRO = 'Word2007M'
155   - WORD2007_TEMPLATE_MACRO = 'Word2007TM'
156   - WORD6 = 'Word6'
  155 + WORD2007_DOCX = 'Word2007_DOCX'
  156 + WORD2007_DOTX = 'Word2007_DOTX'
  157 + WORD2007_DOCM = 'Word2007_DOCM'
  158 + WORD2007_DOTM = 'Word2007_DOTM'
  159 + EXCEL = 'Excel'
  160 + EXCEL5 = 'Excel5'
157 161 EXCEL97 = 'Excel97'
158 162 EXCEL2007 = 'Excel2007'
159   - EXCEL2007_MACRO = 'Excel2007_XLSM'
160   - EXCEL5 = 'Excel5'
  163 + EXCEL2007_XLSX = 'Excel2007_XLSX'
  164 + EXCEL2007_XLSM = 'Excel2007_XLSM'
  165 + EXCEL2007_XLTX = 'Excel2007_XLTX'
  166 + EXCEL2007_XLTM = 'Excel2007_XLTM'
  167 + EXCEL2007_XLSB = 'Excel2007_XLSB'
161 168 # TODO: XLSB, DOCM, PPTM, PPSX, PPSM, ...
162 169 RTF = 'RTF'
163 170 HTML = 'HTML'
... ... @@ -444,6 +451,14 @@ class FType_Generic_OpenXML(FType_Base):
444 451  
445 452 # --- WORD Formats ---
446 453  
  454 +class FTYpe_Word(FType_Base):
  455 + 'Base class for all MS Word file types'
  456 + application = APP.MSWORD
  457 + name = 'MS Word (generic)'
  458 + longname = 'MS Word Document or Template (generic)'
  459 +
  460 +# TODO: all word FTypes should inherit from FType_Word
  461 +
447 462 class FType_Word97(FType_OLE_CLSID_Base):
448 463 application = APP.MSWORD
449 464 filetype = FTYPE.WORD97
... ... @@ -469,44 +484,48 @@ class FType_Word6(FType_OLE_CLSID_Base):
469 484  
470 485 class FType_Word2007(FType_Generic_OpenXML):
471 486 application = APP.MSWORD
472   - filetype = FTYPE.WORD2007
  487 + filetype = FTYPE.WORD2007_DOCX
473 488 name = 'MS Word 2007+ Document'
474 489 longname = 'MS Word 2007+ Document (.docx)'
475 490 extensions = ['docx']
476 491  
477 492 class FType_Word2007_Macro(FType_Generic_OpenXML):
478 493 application = APP.MSWORD
479   - filetype = FTYPE.WORD2007_MACRO
  494 + filetype = FTYPE.WORD2007_DOCM
480 495 name = 'MS Word 2007+ Macro-Enabled Document'
481 496 longname = 'MS Word 2007+ Macro-Enabled Document (.docm)'
482 497 extensions = ['docm']
483 498  
484 499 class FType_Word2007_Template(FType_Generic_OpenXML):
485 500 application = APP.MSWORD
486   - filetype = FTYPE.WORD2007_TEMPLATE
  501 + filetype = FTYPE.WORD2007_DOTX
487 502 name = 'MS Word 2007+ Template'
488 503 longname = 'MS Word 2007+ Template (.dotx)'
489 504 extensions = ['dotx']
490 505  
491 506 class FType_Word2007_Template_Macro(FType_Generic_OpenXML):
492 507 application = APP.MSWORD
493   - filetype = FTYPE.WORD2007_TEMPLATE_MACRO
  508 + filetype = FTYPE.WORD2007_DOTM
494 509 name = 'MS Word 2007+ Macro-Enabled Template'
495 510 longname = 'MS Word 2007+ Macro-Enabled Template (.dotm)'
496 511 extensions = ['dotm']
497 512  
498 513 # --- EXCEL Formats ---
499 514  
500   -class FType_Excel97(FType_OLE_CLSID_Base):
  515 +class FTYpe_Excel(FType_Base):
  516 + 'Base class for all MS Excel file types'
501 517 application = APP.MSEXCEL
  518 + name = 'MS Excel (generic)'
  519 + longname = 'MS Excel Workbook or Template (generic)'
  520 +
  521 +class FType_Excel97(FTYpe_Excel):
502 522 filetype = FTYPE.EXCEL97
503 523 name = 'MS Excel 97 Workbook'
504 524 longname = 'MS Excel 97-2003 Workbook or Template'
505 525 CLSIDS = ('00020820-0000-0000-C000-000000000046',)
506 526 extensions = ['xls', 'xlt', 'xla']
507 527  
508   -class FType_Excel5(FType_OLE_CLSID_Base):
509   - application = APP.MSEXCEL
  528 +class FType_Excel5(FTYpe_Excel):
510 529 filetype = FTYPE.EXCEL5
511 530 name = 'MS Excel 5.0/95 Workbook'
512 531 longname = 'MS Excel 5.0/95 Workbook, Template or Add-in'
... ... @@ -514,18 +533,21 @@ class FType_Excel5(FType_OLE_CLSID_Base):
514 533 extensions = ['xls', 'xlt', 'xla']
515 534 # TODO: this CLSID is also used in Excel addins (.xla) saved by MS Excel 365
516 535  
517   -class FType_Excel2007_Workbook(FType_Generic_OpenXML):
518   - application = APP.MSEXCEL
519   - filetype = FTYPE.EXCEL2007
  536 +class FTYpe_Excel2007(FTYpe_Excel):
  537 + 'Base class for all MS Excel 2007 file types'
  538 + name = 'MS Excel 2007+ (generic)'
  539 + longname = 'MS Excel 2007+ Workbook or Template (generic)'
  540 +
  541 +class FType_Excel2007_XLSX (FTYpe_Excel2007):
  542 + filetype = FTYPE.EXCEL2007_XLSX
520 543 name = 'MS Excel 2007+ Workbook'
521 544 longname = 'MS Excel 2007+ Workbook (.xlsx)'
522 545 extensions = ['xlsx']
523 546 content_types = ('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',)
524 547 PUID = 'fmt/214'
525 548  
526   -class FType_Excel2007_Workbook_MacroEnabled(FType_Generic_OpenXML):
527   - application = APP.MSEXCEL
528   - filetype = FTYPE.EXCEL2007_MACRO
  549 +class FType_Excel2007_XLSM (FTYpe_Excel2007):
  550 + filetype = FTYPE.EXCEL2007_XLSM
529 551 name = 'MS Excel 2007+ Macro-Enabled Workbook'
530 552 longname = 'MS Excel 2007+ Macro-Enabled Workbook (.xlsm)'
531 553 extensions = ['xlsm']
... ... @@ -550,8 +572,8 @@ openxml_ftypes = {
550 572 'application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml': FType_Word2007_Template,
551 573 'application/vnd.ms-word.template.macroEnabledTemplate.main+xml': FType_Word2007_Template_Macro,
552 574 # EXCEL
553   - 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml': FType_Excel2007_Workbook,
554   - 'application/vnd.ms-excel.sheet.macroEnabled.main+xml': FType_Excel2007_Workbook_MacroEnabled,
  575 + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml': FType_Excel2007_XLSX,
  576 + 'application/vnd.ms-excel.sheet.macroEnabled.main+xml': FType_Excel2007_XLSM,
555 577 'application/vnd.ms-excel.sheet.binary.macroEnabled.main': None,
556 578 }
557 579  
... ... @@ -628,6 +650,20 @@ class FileTypeGuesser(object):
628 650 if self.zipfile is not None:
629 651 self.zipfile.close()
630 652  
  653 + def is_word(self):
  654 + """
  655 + Shortcut to check if a file is an Excel workbook, template or add-in
  656 + :return: bool
  657 + """
  658 + return issubclass(self.ftype, FTYpe_Word)
  659 +
  660 + def is_excel(self):
  661 + """
  662 + Shortcut to check if a file is an Excel workbook, template or add-in
  663 + :return: bool
  664 + """
  665 + return issubclass(self.ftype, FTYpe_Excel)
  666 +
631 667  
632 668 # === FUNCTIONS ==============================================================
633 669  
... ...