Commit e28b20012629a29202510e05e8701942af7c7dbd
1 parent
7e52ad1a
ftguess: reorganised Excel classes, added is_excel and is_word shortcuts
Showing
1 changed file
with
57 additions
and
21 deletions
oletools/ftguess.py
| ... | ... | @@ -148,16 +148,23 @@ class FTYPE(object): |
| 148 | 148 | Constants for file types |
| 149 | 149 | """ |
| 150 | 150 | ZIP = 'Zip' |
| 151 | + WORD = 'Word' | |
| 152 | + WORD6 = 'Word6' | |
| 151 | 153 | WORD97 = 'Word97' |
| 152 | 154 | WORD2007 = 'Word2007' |
| 153 | - WORD2007_TEMPLATE = 'Word2007T' | |
| 154 | - WORD2007_MACRO = 'Word2007M' | |
| 155 | - WORD2007_TEMPLATE_MACRO = 'Word2007TM' | |
| 156 | - WORD6 = 'Word6' | |
| 155 | + WORD2007_DOCX = 'Word2007_DOCX' | |
| 156 | + WORD2007_DOTX = 'Word2007_DOTX' | |
| 157 | + WORD2007_DOCM = 'Word2007_DOCM' | |
| 158 | + WORD2007_DOTM = 'Word2007_DOTM' | |
| 159 | + EXCEL = 'Excel' | |
| 160 | + EXCEL5 = 'Excel5' | |
| 157 | 161 | EXCEL97 = 'Excel97' |
| 158 | 162 | EXCEL2007 = 'Excel2007' |
| 159 | - EXCEL2007_MACRO = 'Excel2007_XLSM' | |
| 160 | - EXCEL5 = 'Excel5' | |
| 163 | + EXCEL2007_XLSX = 'Excel2007_XLSX' | |
| 164 | + EXCEL2007_XLSM = 'Excel2007_XLSM' | |
| 165 | + EXCEL2007_XLTX = 'Excel2007_XLTX' | |
| 166 | + EXCEL2007_XLTM = 'Excel2007_XLTM' | |
| 167 | + EXCEL2007_XLSB = 'Excel2007_XLSB' | |
| 161 | 168 | # TODO: XLSB, DOCM, PPTM, PPSX, PPSM, ... |
| 162 | 169 | RTF = 'RTF' |
| 163 | 170 | HTML = 'HTML' |
| ... | ... | @@ -444,6 +451,14 @@ class FType_Generic_OpenXML(FType_Base): |
| 444 | 451 | |
| 445 | 452 | # --- WORD Formats --- |
| 446 | 453 | |
| 454 | +class FTYpe_Word(FType_Base): | |
| 455 | + 'Base class for all MS Word file types' | |
| 456 | + application = APP.MSWORD | |
| 457 | + name = 'MS Word (generic)' | |
| 458 | + longname = 'MS Word Document or Template (generic)' | |
| 459 | + | |
| 460 | +# TODO: all word FTypes should inherit from FType_Word | |
| 461 | + | |
| 447 | 462 | class FType_Word97(FType_OLE_CLSID_Base): |
| 448 | 463 | application = APP.MSWORD |
| 449 | 464 | filetype = FTYPE.WORD97 |
| ... | ... | @@ -469,44 +484,48 @@ class FType_Word6(FType_OLE_CLSID_Base): |
| 469 | 484 | |
| 470 | 485 | class FType_Word2007(FType_Generic_OpenXML): |
| 471 | 486 | application = APP.MSWORD |
| 472 | - filetype = FTYPE.WORD2007 | |
| 487 | + filetype = FTYPE.WORD2007_DOCX | |
| 473 | 488 | name = 'MS Word 2007+ Document' |
| 474 | 489 | longname = 'MS Word 2007+ Document (.docx)' |
| 475 | 490 | extensions = ['docx'] |
| 476 | 491 | |
| 477 | 492 | class FType_Word2007_Macro(FType_Generic_OpenXML): |
| 478 | 493 | application = APP.MSWORD |
| 479 | - filetype = FTYPE.WORD2007_MACRO | |
| 494 | + filetype = FTYPE.WORD2007_DOCM | |
| 480 | 495 | name = 'MS Word 2007+ Macro-Enabled Document' |
| 481 | 496 | longname = 'MS Word 2007+ Macro-Enabled Document (.docm)' |
| 482 | 497 | extensions = ['docm'] |
| 483 | 498 | |
| 484 | 499 | class FType_Word2007_Template(FType_Generic_OpenXML): |
| 485 | 500 | application = APP.MSWORD |
| 486 | - filetype = FTYPE.WORD2007_TEMPLATE | |
| 501 | + filetype = FTYPE.WORD2007_DOTX | |
| 487 | 502 | name = 'MS Word 2007+ Template' |
| 488 | 503 | longname = 'MS Word 2007+ Template (.dotx)' |
| 489 | 504 | extensions = ['dotx'] |
| 490 | 505 | |
| 491 | 506 | class FType_Word2007_Template_Macro(FType_Generic_OpenXML): |
| 492 | 507 | application = APP.MSWORD |
| 493 | - filetype = FTYPE.WORD2007_TEMPLATE_MACRO | |
| 508 | + filetype = FTYPE.WORD2007_DOTM | |
| 494 | 509 | name = 'MS Word 2007+ Macro-Enabled Template' |
| 495 | 510 | longname = 'MS Word 2007+ Macro-Enabled Template (.dotm)' |
| 496 | 511 | extensions = ['dotm'] |
| 497 | 512 | |
| 498 | 513 | # --- EXCEL Formats --- |
| 499 | 514 | |
| 500 | -class FType_Excel97(FType_OLE_CLSID_Base): | |
| 515 | +class FTYpe_Excel(FType_Base): | |
| 516 | + 'Base class for all MS Excel file types' | |
| 501 | 517 | application = APP.MSEXCEL |
| 518 | + name = 'MS Excel (generic)' | |
| 519 | + longname = 'MS Excel Workbook or Template (generic)' | |
| 520 | + | |
| 521 | +class FType_Excel97(FTYpe_Excel): | |
| 502 | 522 | filetype = FTYPE.EXCEL97 |
| 503 | 523 | name = 'MS Excel 97 Workbook' |
| 504 | 524 | longname = 'MS Excel 97-2003 Workbook or Template' |
| 505 | 525 | CLSIDS = ('00020820-0000-0000-C000-000000000046',) |
| 506 | 526 | extensions = ['xls', 'xlt', 'xla'] |
| 507 | 527 | |
| 508 | -class FType_Excel5(FType_OLE_CLSID_Base): | |
| 509 | - application = APP.MSEXCEL | |
| 528 | +class FType_Excel5(FTYpe_Excel): | |
| 510 | 529 | filetype = FTYPE.EXCEL5 |
| 511 | 530 | name = 'MS Excel 5.0/95 Workbook' |
| 512 | 531 | longname = 'MS Excel 5.0/95 Workbook, Template or Add-in' |
| ... | ... | @@ -514,18 +533,21 @@ class FType_Excel5(FType_OLE_CLSID_Base): |
| 514 | 533 | extensions = ['xls', 'xlt', 'xla'] |
| 515 | 534 | # TODO: this CLSID is also used in Excel addins (.xla) saved by MS Excel 365 |
| 516 | 535 | |
| 517 | -class FType_Excel2007_Workbook(FType_Generic_OpenXML): | |
| 518 | - application = APP.MSEXCEL | |
| 519 | - filetype = FTYPE.EXCEL2007 | |
| 536 | +class FTYpe_Excel2007(FTYpe_Excel): | |
| 537 | + 'Base class for all MS Excel 2007 file types' | |
| 538 | + name = 'MS Excel 2007+ (generic)' | |
| 539 | + longname = 'MS Excel 2007+ Workbook or Template (generic)' | |
| 540 | + | |
| 541 | +class FType_Excel2007_XLSX (FTYpe_Excel2007): | |
| 542 | + filetype = FTYPE.EXCEL2007_XLSX | |
| 520 | 543 | name = 'MS Excel 2007+ Workbook' |
| 521 | 544 | longname = 'MS Excel 2007+ Workbook (.xlsx)' |
| 522 | 545 | extensions = ['xlsx'] |
| 523 | 546 | content_types = ('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',) |
| 524 | 547 | PUID = 'fmt/214' |
| 525 | 548 | |
| 526 | -class FType_Excel2007_Workbook_MacroEnabled(FType_Generic_OpenXML): | |
| 527 | - application = APP.MSEXCEL | |
| 528 | - filetype = FTYPE.EXCEL2007_MACRO | |
| 549 | +class FType_Excel2007_XLSM (FTYpe_Excel2007): | |
| 550 | + filetype = FTYPE.EXCEL2007_XLSM | |
| 529 | 551 | name = 'MS Excel 2007+ Macro-Enabled Workbook' |
| 530 | 552 | longname = 'MS Excel 2007+ Macro-Enabled Workbook (.xlsm)' |
| 531 | 553 | extensions = ['xlsm'] |
| ... | ... | @@ -550,8 +572,8 @@ openxml_ftypes = { |
| 550 | 572 | 'application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml': FType_Word2007_Template, |
| 551 | 573 | 'application/vnd.ms-word.template.macroEnabledTemplate.main+xml': FType_Word2007_Template_Macro, |
| 552 | 574 | # EXCEL |
| 553 | - 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml': FType_Excel2007_Workbook, | |
| 554 | - 'application/vnd.ms-excel.sheet.macroEnabled.main+xml': FType_Excel2007_Workbook_MacroEnabled, | |
| 575 | + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml': FType_Excel2007_XLSX, | |
| 576 | + 'application/vnd.ms-excel.sheet.macroEnabled.main+xml': FType_Excel2007_XLSM, | |
| 555 | 577 | 'application/vnd.ms-excel.sheet.binary.macroEnabled.main': None, |
| 556 | 578 | } |
| 557 | 579 | |
| ... | ... | @@ -628,6 +650,20 @@ class FileTypeGuesser(object): |
| 628 | 650 | if self.zipfile is not None: |
| 629 | 651 | self.zipfile.close() |
| 630 | 652 | |
| 653 | + def is_word(self): | |
| 654 | + """ | |
| 655 | + Shortcut to check if a file is an Excel workbook, template or add-in | |
| 656 | + :return: bool | |
| 657 | + """ | |
| 658 | + return issubclass(self.ftype, FTYpe_Word) | |
| 659 | + | |
| 660 | + def is_excel(self): | |
| 661 | + """ | |
| 662 | + Shortcut to check if a file is an Excel workbook, template or add-in | |
| 663 | + :return: bool | |
| 664 | + """ | |
| 665 | + return issubclass(self.ftype, FTYpe_Excel) | |
| 666 | + | |
| 631 | 667 | |
| 632 | 668 | # === FUNCTIONS ============================================================== |
| 633 | 669 | ... | ... |