Commit c5273ef2f18039b7b8536031098b89f5d684e2d6

Authored by decalage2
1 parent 3fef697b

codepages: added a few code pages from xlrd

Showing 1 changed file with 11 additions and 4 deletions
oletools/common/codepages.py
... ... @@ -14,7 +14,7 @@ http://www.decalage.info/python/oletools
14 14  
15 15 # === LICENSE ==================================================================
16 16  
17   -# codepages is copyright (c) 2018 Philippe Lagadec (http://www.decalage.info)
  17 +# codepages is copyright (c) 2018-2019 Philippe Lagadec (http://www.decalage.info)
18 18 # All rights reserved.
19 19 #
20 20 # Redistribution and use in source and binary forms, with or without modification,
... ... @@ -41,8 +41,9 @@ http://www.decalage.info/python/oletools
41 41 # -----------------------------------------------------------------------------
42 42 # CHANGELOG:
43 43 # 2018-12-13 v0.54 PL: - first version
  44 +# 2019-01-30 PL: - added a few code pages from xlrd
44 45  
45   -__version__ = '0.54dev6'
  46 +__version__ = '0.54dev9'
46 47  
47 48 # -----------------------------------------------------------------------------
48 49 # TODO:
... ... @@ -243,10 +244,13 @@ CODEPAGE_TO_CODEC = {
243 244 10004: 'mac-arabic',
244 245 10005: 'hebrew', # not found: 'mac-hebrew',
245 246 10006: 'mac-greek',
246   - 10007: 'ascii', # nothing appropriate found: 'mac-russian',
  247 + #10007: 'ascii', # nothing appropriate found: 'mac-russian',
  248 + 10007: 'mac_cyrillic', # guess (from xlrd)
247 249 10008: 'gb2312', # not found: 'mac-gb2312',
248 250 10021: 'thai', # not found: mac-thai',
249   - 10029: 'maccentraleurope', # not found: 'mac-east europe',
  251 + #10029: 'maccentraleurope', # not found: 'mac-east europe',
  252 + 10029: 'mac_latin2', # guess (from xlrd)
  253 + 10079: 'mac_iceland', # guess (from xlrd)
250 254 10081: 'mac-turkish',
251 255  
252 256 12000: 'utf_32_le', # Unicode UTF-32, little endian byte order
... ... @@ -265,6 +269,9 @@ CODEPAGE_TO_CODEC = {
265 269 28599: 'iso8859_9',
266 270 28603: 'iso8859_13',
267 271 28605: 'iso8859_15',
  272 +
  273 + 32768: 'mac_roman', # from xlrd
  274 + 32769: 'cp1252', # from xlrd
268 275 38598: 'iso8859_8',
269 276  
270 277 65000: 'utf7',
... ...