Commit 6e069d5fd28c98ba2e349096bd47fbde5568909f

Authored by Christian Herdtweck
1 parent 5222ef66

ftguess: Add XPS type

Showing 1 changed file with 15 additions and 2 deletions
oletools/ftguess.py
@@ -166,6 +166,7 @@ class FTYPE(object): @@ -166,6 +166,7 @@ class FTYPE(object):
166 EXCEL2007_XLTM = 'Excel2007_XLTM' 166 EXCEL2007_XLTM = 'Excel2007_XLTM'
167 EXCEL2007_XLSB = 'Excel2007_XLSB' 167 EXCEL2007_XLSB = 'Excel2007_XLSB'
168 # TODO: XLSB, DOCM, PPTM, PPSX, PPSM, ... 168 # TODO: XLSB, DOCM, PPTM, PPSX, PPSM, ...
  169 + XPS = 'XPS'
169 RTF = 'RTF' 170 RTF = 'RTF'
170 HTML = 'HTML' 171 HTML = 'HTML'
171 PDF = 'PDF' 172 PDF = 'PDF'
@@ -203,7 +204,7 @@ class APP(object): @@ -203,7 +204,7 @@ class APP(object):
203 MSPROJECT = 'MS Project' 204 MSPROJECT = 'MS Project'
204 MSOFFICE = 'MS Office' # when the exact app is unknown 205 MSOFFICE = 'MS Office' # when the exact app is unknown
205 ZIP_ARCHIVER = 'Any Zip Archiver' 206 ZIP_ARCHIVER = 'Any Zip Archiver'
206 - WINDOWS = 'Windows' # for Windows executables 207 + WINDOWS = 'Windows' # for Windows executables and XPS
207 UNKNOWN = 'Unknown Application' 208 UNKNOWN = 'Unknown Application'
208 209
209 # FTYPE_NAME = { 210 # FTYPE_NAME = {
@@ -222,6 +223,8 @@ ATTR_REL_TARGET = 'Target' @@ -222,6 +223,8 @@ ATTR_REL_TARGET = 'Target'
222 URL_REL_OFFICEDOC = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" 223 URL_REL_OFFICEDOC = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"
223 # For "strict" OpenXML formats, the URL is different: 224 # For "strict" OpenXML formats, the URL is different:
224 URL_REL_OFFICEDOC_STRICT = 'http://purl.oclc.org/ooxml/officeDocument/relationships/officeDocument' 225 URL_REL_OFFICEDOC_STRICT = 'http://purl.oclc.org/ooxml/officeDocument/relationships/officeDocument'
  226 +# Url for xps files
  227 +URL_REL_XPS = 'http://schemas.microsoft.com/xps/2005/06/fixedrepresentation'
225 # Namespaces and tags for OpenXML parsing`- Content-types file: 228 # Namespaces and tags for OpenXML parsing`- Content-types file:
226 NS_CONTENT_TYPES = '{http://schemas.openxmlformats.org/package/2006/content-types}' 229 NS_CONTENT_TYPES = '{http://schemas.openxmlformats.org/package/2006/content-types}'
227 TAG_CTYPES_DEFAULT = NS_CONTENT_TYPES + 'Default' 230 TAG_CTYPES_DEFAULT = NS_CONTENT_TYPES + 'Default'
@@ -395,7 +398,7 @@ class FType_Generic_OpenXML(FType_Base): @@ -395,7 +398,7 @@ class FType_Generic_OpenXML(FType_Base):
395 for elem_rel in elem_rels.iter(tag=TAG_REL): 398 for elem_rel in elem_rels.iter(tag=TAG_REL):
396 rel_type = elem_rel.get(ATTR_REL_TYPE) 399 rel_type = elem_rel.get(ATTR_REL_TYPE)
397 log.debug('Relationship: type=%s target=%s' % (rel_type, elem_rel.get(ATTR_REL_TARGET))) 400 log.debug('Relationship: type=%s target=%s' % (rel_type, elem_rel.get(ATTR_REL_TARGET)))
398 - if rel_type in (URL_REL_OFFICEDOC, URL_REL_OFFICEDOC_STRICT): 401 + if rel_type in (URL_REL_OFFICEDOC, URL_REL_OFFICEDOC_STRICT, URL_REL_XPS):
399 # TODO: is it useful to distinguish normal and strict OpenXML? 402 # TODO: is it useful to distinguish normal and strict OpenXML?
400 main_part = elem_rel.get(ATTR_REL_TARGET) 403 main_part = elem_rel.get(ATTR_REL_TARGET)
401 # TODO: raise anomaly if there are more than one rel with type office doc 404 # TODO: raise anomaly if there are more than one rel with type office doc
@@ -559,6 +562,14 @@ class FType_Excel2007_XLSM (FTYpe_Excel2007): @@ -559,6 +562,14 @@ class FType_Excel2007_XLSM (FTYpe_Excel2007):
559 content_types = ('application/vnd.ms-excel.sheet.macroEnabled.12',) 562 content_types = ('application/vnd.ms-excel.sheet.macroEnabled.12',)
560 PUID = 'fmt/445' 563 PUID = 'fmt/445'
561 564
  565 +class FType_XPS(FType_Generic_OpenXML):
  566 + application = APP.WINDOWS
  567 + filetype = FTYPE.XPS
  568 + name = 'XPS'
  569 + longname = 'Fixed-Page Document (.xps)',
  570 + extensions = ['xps']
  571 +
  572 +
562 # TODO: for PPT, check for stream 'PowerPoint Document' 573 # TODO: for PPT, check for stream 'PowerPoint Document'
563 # TODO: for Visio, check for stream 'VisioDocument' 574 # TODO: for Visio, check for stream 'VisioDocument'
564 575
@@ -583,6 +594,8 @@ openxml_ftypes = { @@ -583,6 +594,8 @@ openxml_ftypes = {
583 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml': FType_Excel2007_XLSX, 594 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml': FType_Excel2007_XLSX,
584 'application/vnd.ms-excel.sheet.macroEnabled.main+xml': FType_Excel2007_XLSM, 595 'application/vnd.ms-excel.sheet.macroEnabled.main+xml': FType_Excel2007_XLSM,
585 'application/vnd.ms-excel.sheet.binary.macroEnabled.main': None, 596 'application/vnd.ms-excel.sheet.binary.macroEnabled.main': None,
  597 + # XPS
  598 + 'application/vnd.ms-package.xps-fixeddocumentsequence+xml': FType_XPS,
586 } 599 }
587 600
588 601