Commit 2a69ce37647c0f57a23f39cecf549f68bb02af85
1 parent
fbfd20e7
check for mac codepages when decoding project codepage
based on suggesion in issue #42 in bitbucket, list of codepages on stackoverflow (see comment in code) and contents of /usr/lib/python2.7/encodings
Showing
1 changed file
with
29 additions
and
3 deletions
oletools/olevba.py
| ... | ... | @@ -380,6 +380,22 @@ RETURN_PARSE_ERROR = 6 |
| 380 | 380 | RETURN_SEVERAL_ERRS = 7 |
| 381 | 381 | RETURN_UNEXPECTED = 8 |
| 382 | 382 | |
| 383 | +# MAC codepages (from http://stackoverflow.com/questions/1592925/decoding-mac-os-text-in-python) | |
| 384 | +MAC_CODEPAGES = { | |
| 385 | + 10000: 'mac-roman', | |
| 386 | + 10001: 'shiftjis', # not found: 'mac-shift-jis', | |
| 387 | + 10003: 'ascii', # nothing appropriate found: 'mac-hangul', | |
| 388 | + 10008: 'gb2321', # not found: 'mac-gb2312', | |
| 389 | + 10002: 'big5', # not found: 'mac-big5', | |
| 390 | + 10005: 'hebrew', # not found: 'mac-hebrew', | |
| 391 | + 10004: 'mac-arabic', | |
| 392 | + 10006: 'mac-greek', | |
| 393 | + 10081: 'mac-turkish', | |
| 394 | + 10021: 'thai', # not found: mac-thai', | |
| 395 | + 10029: 'maccentraleurope', # not found: 'mac-east europe', | |
| 396 | + 10007: 'ascii', # nothing appropriate found: 'mac-russian', | |
| 397 | +} | |
| 398 | + | |
| 383 | 399 | # URL and message to report issues: |
| 384 | 400 | URL_OLEVBA_ISSUES = 'https://github.com/decalage2/oletools/issues' |
| 385 | 401 | MSG_OLEVBA_ISSUES = 'Please report this issue on %s' % URL_OLEVBA_ISSUES |
| ... | ... | @@ -1347,7 +1363,9 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): |
| 1347 | 1363 | reference_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0] |
| 1348 | 1364 | reference_name = dir_stream.read(reference_sizeof_name) |
| 1349 | 1365 | reference_reserved = struct.unpack("<H", dir_stream.read(2))[0] |
| 1350 | - check_value('REFERENCE_Reserved', 0x003E, reference_reserved) | |
| 1366 | + if reference_reserved not in (0x003E, 0x000D): | |
| 1367 | + raise UnexpectedDataError(dir_path, 'REFERENCE_Reserved', | |
| 1368 | + (0x003E, 0x000D), value) | |
| 1351 | 1369 | reference_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0] |
| 1352 | 1370 | reference_name_unicode = dir_stream.read(reference_sizeof_name_unicode) |
| 1353 | 1371 | unused = reference_id |
| ... | ... | @@ -1556,11 +1574,19 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): |
| 1556 | 1574 | log.warning('unknown or invalid module section id {0:04X}'.format(section_id)) |
| 1557 | 1575 | |
| 1558 | 1576 | log.debug('Project CodePage = %d' % projectcodepage_codepage) |
| 1559 | - vba_codec = 'cp%d' % projectcodepage_codepage | |
| 1577 | + if projectcodepage_codepage in MAC_CODEPAGES: | |
| 1578 | + vba_codec = MAC_CODEPAGES[projectcodepage_codepage] | |
| 1579 | + else: | |
| 1580 | + vba_codec = 'cp%d' % projectcodepage_codepage | |
| 1560 | 1581 | log.debug("ModuleName = {0}".format(modulename_modulename)) |
| 1561 | 1582 | log.debug("ModuleNameUnicode = {0}".format(uni_out(modulename_unicode_modulename_unicode))) |
| 1562 | 1583 | log.debug("StreamName = {0}".format(modulestreamname_streamname)) |
| 1563 | - streamname_unicode = modulestreamname_streamname.decode(vba_codec) | |
| 1584 | + try: | |
| 1585 | + streamname_unicode = modulestreamname_streamname.decode(vba_codec) | |
| 1586 | + except UnicodeError as ue: | |
| 1587 | + log.debug('failed to decode stream name {0!r} with codec {1}' | |
| 1588 | + .format(uni_out(streamname_unicode), vba_codec)) | |
| 1589 | + streamname_unicode = modulestreamname_streamname.decode(vba_codec, errors='replace') | |
| 1564 | 1590 | log.debug("StreamName.decode('%s') = %s" % (vba_codec, uni_out(streamname_unicode))) |
| 1565 | 1591 | log.debug("StreamNameUnicode = {0}".format(uni_out(modulestreamname_streamname_unicode))) |
| 1566 | 1592 | log.debug("TextOffset = {0}".format(moduleoffset_textoffset)) | ... | ... |