Commit c5273ef2f18039b7b8536031098b89f5d684e2d6

Authored by decalage2
1 parent 3fef697b

codepages: added a few code pages from xlrd

Showing 1 changed file with 11 additions and 4 deletions
oletools/common/codepages.py
@@ -14,7 +14,7 @@ http://www.decalage.info/python/oletools @@ -14,7 +14,7 @@ http://www.decalage.info/python/oletools
14 14
15 # === LICENSE ================================================================== 15 # === LICENSE ==================================================================
16 16
17 -# codepages is copyright (c) 2018 Philippe Lagadec (http://www.decalage.info) 17 +# codepages is copyright (c) 2018-2019 Philippe Lagadec (http://www.decalage.info)
18 # All rights reserved. 18 # All rights reserved.
19 # 19 #
20 # Redistribution and use in source and binary forms, with or without modification, 20 # Redistribution and use in source and binary forms, with or without modification,
@@ -41,8 +41,9 @@ http://www.decalage.info/python/oletools @@ -41,8 +41,9 @@ http://www.decalage.info/python/oletools
41 # ----------------------------------------------------------------------------- 41 # -----------------------------------------------------------------------------
42 # CHANGELOG: 42 # CHANGELOG:
43 # 2018-12-13 v0.54 PL: - first version 43 # 2018-12-13 v0.54 PL: - first version
  44 +# 2019-01-30 PL: - added a few code pages from xlrd
44 45
45 -__version__ = '0.54dev6' 46 +__version__ = '0.54dev9'
46 47
47 # ----------------------------------------------------------------------------- 48 # -----------------------------------------------------------------------------
48 # TODO: 49 # TODO:
@@ -243,10 +244,13 @@ CODEPAGE_TO_CODEC = { @@ -243,10 +244,13 @@ CODEPAGE_TO_CODEC = {
243 10004: 'mac-arabic', 244 10004: 'mac-arabic',
244 10005: 'hebrew', # not found: 'mac-hebrew', 245 10005: 'hebrew', # not found: 'mac-hebrew',
245 10006: 'mac-greek', 246 10006: 'mac-greek',
246 - 10007: 'ascii', # nothing appropriate found: 'mac-russian', 247 + #10007: 'ascii', # nothing appropriate found: 'mac-russian',
  248 + 10007: 'mac_cyrillic', # guess (from xlrd)
247 10008: 'gb2312', # not found: 'mac-gb2312', 249 10008: 'gb2312', # not found: 'mac-gb2312',
248 10021: 'thai', # not found: mac-thai', 250 10021: 'thai', # not found: mac-thai',
249 - 10029: 'maccentraleurope', # not found: 'mac-east europe', 251 + #10029: 'maccentraleurope', # not found: 'mac-east europe',
  252 + 10029: 'mac_latin2', # guess (from xlrd)
  253 + 10079: 'mac_iceland', # guess (from xlrd)
250 10081: 'mac-turkish', 254 10081: 'mac-turkish',
251 255
252 12000: 'utf_32_le', # Unicode UTF-32, little endian byte order 256 12000: 'utf_32_le', # Unicode UTF-32, little endian byte order
@@ -265,6 +269,9 @@ CODEPAGE_TO_CODEC = { @@ -265,6 +269,9 @@ CODEPAGE_TO_CODEC = {
265 28599: 'iso8859_9', 269 28599: 'iso8859_9',
266 28603: 'iso8859_13', 270 28603: 'iso8859_13',
267 28605: 'iso8859_15', 271 28605: 'iso8859_15',
  272 +
  273 + 32768: 'mac_roman', # from xlrd
  274 + 32769: 'cp1252', # from xlrd
268 38598: 'iso8859_8', 275 38598: 'iso8859_8',
269 276
270 65000: 'utf7', 277 65000: 'utf7',