Commit e28b20012629a29202510e05e8701942af7c7dbd
1 parent
7e52ad1a
ftguess: reorganised Excel classes, added is_excel and is_word shortcuts
Showing
1 changed file
with
57 additions
and
21 deletions
oletools/ftguess.py
| @@ -148,16 +148,23 @@ class FTYPE(object): | @@ -148,16 +148,23 @@ class FTYPE(object): | ||
| 148 | Constants for file types | 148 | Constants for file types |
| 149 | """ | 149 | """ |
| 150 | ZIP = 'Zip' | 150 | ZIP = 'Zip' |
| 151 | + WORD = 'Word' | ||
| 152 | + WORD6 = 'Word6' | ||
| 151 | WORD97 = 'Word97' | 153 | WORD97 = 'Word97' |
| 152 | WORD2007 = 'Word2007' | 154 | WORD2007 = 'Word2007' |
| 153 | - WORD2007_TEMPLATE = 'Word2007T' | ||
| 154 | - WORD2007_MACRO = 'Word2007M' | ||
| 155 | - WORD2007_TEMPLATE_MACRO = 'Word2007TM' | ||
| 156 | - WORD6 = 'Word6' | 155 | + WORD2007_DOCX = 'Word2007_DOCX' |
| 156 | + WORD2007_DOTX = 'Word2007_DOTX' | ||
| 157 | + WORD2007_DOCM = 'Word2007_DOCM' | ||
| 158 | + WORD2007_DOTM = 'Word2007_DOTM' | ||
| 159 | + EXCEL = 'Excel' | ||
| 160 | + EXCEL5 = 'Excel5' | ||
| 157 | EXCEL97 = 'Excel97' | 161 | EXCEL97 = 'Excel97' |
| 158 | EXCEL2007 = 'Excel2007' | 162 | EXCEL2007 = 'Excel2007' |
| 159 | - EXCEL2007_MACRO = 'Excel2007_XLSM' | ||
| 160 | - EXCEL5 = 'Excel5' | 163 | + EXCEL2007_XLSX = 'Excel2007_XLSX' |
| 164 | + EXCEL2007_XLSM = 'Excel2007_XLSM' | ||
| 165 | + EXCEL2007_XLTX = 'Excel2007_XLTX' | ||
| 166 | + EXCEL2007_XLTM = 'Excel2007_XLTM' | ||
| 167 | + EXCEL2007_XLSB = 'Excel2007_XLSB' | ||
| 161 | # TODO: XLSB, DOCM, PPTM, PPSX, PPSM, ... | 168 | # TODO: XLSB, DOCM, PPTM, PPSX, PPSM, ... |
| 162 | RTF = 'RTF' | 169 | RTF = 'RTF' |
| 163 | HTML = 'HTML' | 170 | HTML = 'HTML' |
| @@ -444,6 +451,14 @@ class FType_Generic_OpenXML(FType_Base): | @@ -444,6 +451,14 @@ class FType_Generic_OpenXML(FType_Base): | ||
| 444 | 451 | ||
| 445 | # --- WORD Formats --- | 452 | # --- WORD Formats --- |
| 446 | 453 | ||
| 454 | +class FTYpe_Word(FType_Base): | ||
| 455 | + 'Base class for all MS Word file types' | ||
| 456 | + application = APP.MSWORD | ||
| 457 | + name = 'MS Word (generic)' | ||
| 458 | + longname = 'MS Word Document or Template (generic)' | ||
| 459 | + | ||
| 460 | +# TODO: all word FTypes should inherit from FType_Word | ||
| 461 | + | ||
| 447 | class FType_Word97(FType_OLE_CLSID_Base): | 462 | class FType_Word97(FType_OLE_CLSID_Base): |
| 448 | application = APP.MSWORD | 463 | application = APP.MSWORD |
| 449 | filetype = FTYPE.WORD97 | 464 | filetype = FTYPE.WORD97 |
| @@ -469,44 +484,48 @@ class FType_Word6(FType_OLE_CLSID_Base): | @@ -469,44 +484,48 @@ class FType_Word6(FType_OLE_CLSID_Base): | ||
| 469 | 484 | ||
| 470 | class FType_Word2007(FType_Generic_OpenXML): | 485 | class FType_Word2007(FType_Generic_OpenXML): |
| 471 | application = APP.MSWORD | 486 | application = APP.MSWORD |
| 472 | - filetype = FTYPE.WORD2007 | 487 | + filetype = FTYPE.WORD2007_DOCX |
| 473 | name = 'MS Word 2007+ Document' | 488 | name = 'MS Word 2007+ Document' |
| 474 | longname = 'MS Word 2007+ Document (.docx)' | 489 | longname = 'MS Word 2007+ Document (.docx)' |
| 475 | extensions = ['docx'] | 490 | extensions = ['docx'] |
| 476 | 491 | ||
| 477 | class FType_Word2007_Macro(FType_Generic_OpenXML): | 492 | class FType_Word2007_Macro(FType_Generic_OpenXML): |
| 478 | application = APP.MSWORD | 493 | application = APP.MSWORD |
| 479 | - filetype = FTYPE.WORD2007_MACRO | 494 | + filetype = FTYPE.WORD2007_DOCM |
| 480 | name = 'MS Word 2007+ Macro-Enabled Document' | 495 | name = 'MS Word 2007+ Macro-Enabled Document' |
| 481 | longname = 'MS Word 2007+ Macro-Enabled Document (.docm)' | 496 | longname = 'MS Word 2007+ Macro-Enabled Document (.docm)' |
| 482 | extensions = ['docm'] | 497 | extensions = ['docm'] |
| 483 | 498 | ||
| 484 | class FType_Word2007_Template(FType_Generic_OpenXML): | 499 | class FType_Word2007_Template(FType_Generic_OpenXML): |
| 485 | application = APP.MSWORD | 500 | application = APP.MSWORD |
| 486 | - filetype = FTYPE.WORD2007_TEMPLATE | 501 | + filetype = FTYPE.WORD2007_DOTX |
| 487 | name = 'MS Word 2007+ Template' | 502 | name = 'MS Word 2007+ Template' |
| 488 | longname = 'MS Word 2007+ Template (.dotx)' | 503 | longname = 'MS Word 2007+ Template (.dotx)' |
| 489 | extensions = ['dotx'] | 504 | extensions = ['dotx'] |
| 490 | 505 | ||
| 491 | class FType_Word2007_Template_Macro(FType_Generic_OpenXML): | 506 | class FType_Word2007_Template_Macro(FType_Generic_OpenXML): |
| 492 | application = APP.MSWORD | 507 | application = APP.MSWORD |
| 493 | - filetype = FTYPE.WORD2007_TEMPLATE_MACRO | 508 | + filetype = FTYPE.WORD2007_DOTM |
| 494 | name = 'MS Word 2007+ Macro-Enabled Template' | 509 | name = 'MS Word 2007+ Macro-Enabled Template' |
| 495 | longname = 'MS Word 2007+ Macro-Enabled Template (.dotm)' | 510 | longname = 'MS Word 2007+ Macro-Enabled Template (.dotm)' |
| 496 | extensions = ['dotm'] | 511 | extensions = ['dotm'] |
| 497 | 512 | ||
| 498 | # --- EXCEL Formats --- | 513 | # --- EXCEL Formats --- |
| 499 | 514 | ||
| 500 | -class FType_Excel97(FType_OLE_CLSID_Base): | 515 | +class FTYpe_Excel(FType_Base): |
| 516 | + 'Base class for all MS Excel file types' | ||
| 501 | application = APP.MSEXCEL | 517 | application = APP.MSEXCEL |
| 518 | + name = 'MS Excel (generic)' | ||
| 519 | + longname = 'MS Excel Workbook or Template (generic)' | ||
| 520 | + | ||
| 521 | +class FType_Excel97(FTYpe_Excel): | ||
| 502 | filetype = FTYPE.EXCEL97 | 522 | filetype = FTYPE.EXCEL97 |
| 503 | name = 'MS Excel 97 Workbook' | 523 | name = 'MS Excel 97 Workbook' |
| 504 | longname = 'MS Excel 97-2003 Workbook or Template' | 524 | longname = 'MS Excel 97-2003 Workbook or Template' |
| 505 | CLSIDS = ('00020820-0000-0000-C000-000000000046',) | 525 | CLSIDS = ('00020820-0000-0000-C000-000000000046',) |
| 506 | extensions = ['xls', 'xlt', 'xla'] | 526 | extensions = ['xls', 'xlt', 'xla'] |
| 507 | 527 | ||
| 508 | -class FType_Excel5(FType_OLE_CLSID_Base): | ||
| 509 | - application = APP.MSEXCEL | 528 | +class FType_Excel5(FTYpe_Excel): |
| 510 | filetype = FTYPE.EXCEL5 | 529 | filetype = FTYPE.EXCEL5 |
| 511 | name = 'MS Excel 5.0/95 Workbook' | 530 | name = 'MS Excel 5.0/95 Workbook' |
| 512 | longname = 'MS Excel 5.0/95 Workbook, Template or Add-in' | 531 | longname = 'MS Excel 5.0/95 Workbook, Template or Add-in' |
| @@ -514,18 +533,21 @@ class FType_Excel5(FType_OLE_CLSID_Base): | @@ -514,18 +533,21 @@ class FType_Excel5(FType_OLE_CLSID_Base): | ||
| 514 | extensions = ['xls', 'xlt', 'xla'] | 533 | extensions = ['xls', 'xlt', 'xla'] |
| 515 | # TODO: this CLSID is also used in Excel addins (.xla) saved by MS Excel 365 | 534 | # TODO: this CLSID is also used in Excel addins (.xla) saved by MS Excel 365 |
| 516 | 535 | ||
| 517 | -class FType_Excel2007_Workbook(FType_Generic_OpenXML): | ||
| 518 | - application = APP.MSEXCEL | ||
| 519 | - filetype = FTYPE.EXCEL2007 | 536 | +class FTYpe_Excel2007(FTYpe_Excel): |
| 537 | + 'Base class for all MS Excel 2007 file types' | ||
| 538 | + name = 'MS Excel 2007+ (generic)' | ||
| 539 | + longname = 'MS Excel 2007+ Workbook or Template (generic)' | ||
| 540 | + | ||
| 541 | +class FType_Excel2007_XLSX (FTYpe_Excel2007): | ||
| 542 | + filetype = FTYPE.EXCEL2007_XLSX | ||
| 520 | name = 'MS Excel 2007+ Workbook' | 543 | name = 'MS Excel 2007+ Workbook' |
| 521 | longname = 'MS Excel 2007+ Workbook (.xlsx)' | 544 | longname = 'MS Excel 2007+ Workbook (.xlsx)' |
| 522 | extensions = ['xlsx'] | 545 | extensions = ['xlsx'] |
| 523 | content_types = ('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',) | 546 | content_types = ('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',) |
| 524 | PUID = 'fmt/214' | 547 | PUID = 'fmt/214' |
| 525 | 548 | ||
| 526 | -class FType_Excel2007_Workbook_MacroEnabled(FType_Generic_OpenXML): | ||
| 527 | - application = APP.MSEXCEL | ||
| 528 | - filetype = FTYPE.EXCEL2007_MACRO | 549 | +class FType_Excel2007_XLSM (FTYpe_Excel2007): |
| 550 | + filetype = FTYPE.EXCEL2007_XLSM | ||
| 529 | name = 'MS Excel 2007+ Macro-Enabled Workbook' | 551 | name = 'MS Excel 2007+ Macro-Enabled Workbook' |
| 530 | longname = 'MS Excel 2007+ Macro-Enabled Workbook (.xlsm)' | 552 | longname = 'MS Excel 2007+ Macro-Enabled Workbook (.xlsm)' |
| 531 | extensions = ['xlsm'] | 553 | extensions = ['xlsm'] |
| @@ -550,8 +572,8 @@ openxml_ftypes = { | @@ -550,8 +572,8 @@ openxml_ftypes = { | ||
| 550 | 'application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml': FType_Word2007_Template, | 572 | 'application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml': FType_Word2007_Template, |
| 551 | 'application/vnd.ms-word.template.macroEnabledTemplate.main+xml': FType_Word2007_Template_Macro, | 573 | 'application/vnd.ms-word.template.macroEnabledTemplate.main+xml': FType_Word2007_Template_Macro, |
| 552 | # EXCEL | 574 | # EXCEL |
| 553 | - 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml': FType_Excel2007_Workbook, | ||
| 554 | - 'application/vnd.ms-excel.sheet.macroEnabled.main+xml': FType_Excel2007_Workbook_MacroEnabled, | 575 | + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml': FType_Excel2007_XLSX, |
| 576 | + 'application/vnd.ms-excel.sheet.macroEnabled.main+xml': FType_Excel2007_XLSM, | ||
| 555 | 'application/vnd.ms-excel.sheet.binary.macroEnabled.main': None, | 577 | 'application/vnd.ms-excel.sheet.binary.macroEnabled.main': None, |
| 556 | } | 578 | } |
| 557 | 579 | ||
| @@ -628,6 +650,20 @@ class FileTypeGuesser(object): | @@ -628,6 +650,20 @@ class FileTypeGuesser(object): | ||
| 628 | if self.zipfile is not None: | 650 | if self.zipfile is not None: |
| 629 | self.zipfile.close() | 651 | self.zipfile.close() |
| 630 | 652 | ||
| 653 | + def is_word(self): | ||
| 654 | + """ | ||
| 655 | + Shortcut to check if a file is an Excel workbook, template or add-in | ||
| 656 | + :return: bool | ||
| 657 | + """ | ||
| 658 | + return issubclass(self.ftype, FTYpe_Word) | ||
| 659 | + | ||
| 660 | + def is_excel(self): | ||
| 661 | + """ | ||
| 662 | + Shortcut to check if a file is an Excel workbook, template or add-in | ||
| 663 | + :return: bool | ||
| 664 | + """ | ||
| 665 | + return issubclass(self.ftype, FTYpe_Excel) | ||
| 666 | + | ||
| 631 | 667 | ||
| 632 | # === FUNCTIONS ============================================================== | 668 | # === FUNCTIONS ============================================================== |
| 633 | 669 |