Commit 1cf591ddf5ec2da343d6ddc3f73383e8f1bdcdc8
Committed by
GitHub
Merge pull request #63 from sebdraven/master
python3.5 compliant only
Showing
2 changed files
with
64 additions
and
63 deletions
oletools/oleobj.py
| ... | ... | @@ -162,7 +162,7 @@ def read_LengthPrefixedAnsiString(data): |
| 162 | 162 | ansi_string = data[:length-1] |
| 163 | 163 | # TODO: only in strict mode: |
| 164 | 164 | # check the presence of the null char: |
| 165 | - assert data[length] == '\x00' | |
| 165 | + assert data[length] == 0 | |
| 166 | 166 | new_data = data[length:] |
| 167 | 167 | return (ansi_string, new_data) |
| 168 | 168 | |
| ... | ... | @@ -214,14 +214,14 @@ class OleNativeStream (object): |
| 214 | 214 | # log.debug('OLE native data size = {0:08X} ({0} bytes)'.format(self.native_data_size)) |
| 215 | 215 | # I thought this might be an OLE type specifier ??? |
| 216 | 216 | self.unknown_short, data = read_uint16(data) |
| 217 | - self.filename, data = data.split('\x00', 1) | |
| 217 | + self.filename, data = data.split(b'\x00', 1) | |
| 218 | 218 | # source path |
| 219 | - self.src_path, data = data.split('\x00', 1) | |
| 219 | + self.src_path, data = data.split(b'\x00', 1) | |
| 220 | 220 | # TODO I bet these next 8 bytes are a timestamp => FILETIME from olefile |
| 221 | 221 | self.unknown_long_1, data = read_uint32(data) |
| 222 | 222 | self.unknown_long_2, data = read_uint32(data) |
| 223 | 223 | # temp path? |
| 224 | - self.temp_path, data = data.split('\x00', 1) | |
| 224 | + self.temp_path, data = data.split(b'\x00', 1) | |
| 225 | 225 | # size of the rest of the data |
| 226 | 226 | self.actual_size, data = read_uint32(data) |
| 227 | 227 | self.data = data[0:self.actual_size] | ... | ... |
oletools/rtfobj.py
| ... | ... | @@ -120,7 +120,7 @@ log = get_logger('rtfobj') |
| 120 | 120 | # REGEX pattern to extract embedded OLE objects in hexadecimal format: |
| 121 | 121 | |
| 122 | 122 | # alphanum digit: [0-9A-Fa-f] |
| 123 | -HEX_DIGIT = r'[0-9A-Fa-f]' | |
| 123 | +HEX_DIGIT = rb'[0-9A-Fa-f]' | |
| 124 | 124 | |
| 125 | 125 | # hex char = two alphanum digits: [0-9A-Fa-f]{2} |
| 126 | 126 | # HEX_CHAR = r'[0-9A-Fa-f]{2}' |
| ... | ... | @@ -130,11 +130,11 @@ HEX_DIGIT = r'[0-9A-Fa-f]' |
| 130 | 130 | # AND the tags can be nested... |
| 131 | 131 | #SINGLE_RTF_TAG = r'[{][^{}]*[}]' |
| 132 | 132 | # Actually RTF tags may contain braces escaped with backslash (\{ \}): |
| 133 | -SINGLE_RTF_TAG = r'[{](?:\.|[^{}\])*[}]' | |
| 133 | +SINGLE_RTF_TAG = rb'[{](?:\.|[^{}\])*[}]' | |
| 134 | 134 | |
| 135 | 135 | # Nested tags, two levels (because Python's re does not support nested matching): |
| 136 | 136 | # NESTED_RTF_TAG = r'[{](?:[^{}]|'+SINGLE_RTF_TAG+r')*[}]' |
| 137 | -NESTED_RTF_TAG = r'[{](?:\\.|[^{}\\]|'+SINGLE_RTF_TAG+r')*[}]' | |
| 137 | +NESTED_RTF_TAG = rb'[{](?:\\.|[^{}\\]|'+SINGLE_RTF_TAG+b')*[}]' | |
| 138 | 138 | |
| 139 | 139 | # AND it is also allowed to insert ANY control word or control symbol (ignored) |
| 140 | 140 | # According to Rich Text Format (RTF) Specification Version 1.9.1, |
| ... | ... | @@ -146,7 +146,7 @@ NESTED_RTF_TAG = r'[{](?:\\.|[^{}\\]|'+SINGLE_RTF_TAG+r')*[}]' |
| 146 | 146 | # "\AnyThing " "\AnyThing123z" ""\AnyThing-456{" "\AnyThing{" |
| 147 | 147 | # control symbol = \<any char except letter or digit> (followed by anything) |
| 148 | 148 | |
| 149 | -ASCII_NAME = r'([a-zA-Z]{1,250})' | |
| 149 | +ASCII_NAME = rb'([a-zA-Z]{1,250})' | |
| 150 | 150 | |
| 151 | 151 | # using Python's re lookahead assumption: |
| 152 | 152 | # (?=...) Matches if ... matches next, but doesn't consume any of the string. |
| ... | ... | @@ -155,20 +155,21 @@ ASCII_NAME = r'([a-zA-Z]{1,250})' |
| 155 | 155 | |
| 156 | 156 | # TODO: Find the actual limit on the number of digits for Word |
| 157 | 157 | # SIGNED_INTEGER = r'(-?\d{1,250})' |
| 158 | -SIGNED_INTEGER = r'(-?\d+)' | |
| 158 | +SIGNED_INTEGER = rb'(-?\d+)' | |
| 159 | + | |
| 160 | +CONTROL_WORD = rb'(?:\\' + ASCII_NAME + rb'(?:(?=[^a-zA-Z0-9-])|' + SIGNED_INTEGER + rb'(?=[^0-9])))' | |
| 159 | 161 | |
| 160 | -CONTROL_WORD = r'(?:\\' + ASCII_NAME + r'(?:(?=[^a-zA-Z0-9-])|' + SIGNED_INTEGER + r'(?=[^0-9])))' | |
| 161 | 162 | re_control_word = re.compile(CONTROL_WORD) |
| 162 | 163 | |
| 163 | -CONTROL_SYMBOL = r'(?:\[^a-zA-Z0-9])' | |
| 164 | +CONTROL_SYMBOL = rb'(?:\[^a-zA-Z0-9])' | |
| 164 | 165 | re_control_symbol = re.compile(CONTROL_SYMBOL) |
| 165 | 166 | |
| 166 | 167 | # Text that is not a control word/symbol or a group: |
| 167 | -TEXT = r'[^{}\]+' | |
| 168 | +TEXT = rb'[^{}\]+' | |
| 168 | 169 | re_text = re.compile(TEXT) |
| 169 | 170 | |
| 170 | 171 | # ignored whitespaces and tags within a hex block: |
| 171 | -IGNORED = r'(?:\s|'+NESTED_RTF_TAG+'|'+CONTROL_SYMBOL+'|'+CONTROL_WORD+r')*' | |
| 172 | +IGNORED = rb'(?:\s|'+NESTED_RTF_TAG+rb'|'+CONTROL_SYMBOL+rb'|'+CONTROL_WORD+rb')*' | |
| 172 | 173 | #IGNORED = r'\s*' |
| 173 | 174 | |
| 174 | 175 | # HEX_CHAR = HEX_DIGIT + IGNORED + HEX_DIGIT |
| ... | ... | @@ -188,7 +189,7 @@ IGNORED = r'(?:\s|'+NESTED_RTF_TAG+'|'+CONTROL_SYMBOL+'|'+CONTROL_WORD+r')*' |
| 188 | 189 | |
| 189 | 190 | #TODO PATTERN = r'\b(?:' + HEX_CHAR + IGNORED + r'){4,}\b' |
| 190 | 191 | # PATTERN = r'\b(?:' + HEX_CHAR + IGNORED + r'){4,}' #+ HEX_CHAR + r'\b' |
| 191 | -PATTERN = r'\b(?:' + HEX_DIGIT + IGNORED + r'){7,}' + HEX_DIGIT + r'\b' | |
| 192 | +PATTERN = rb'\b(?:' + HEX_DIGIT + IGNORED + rb'){7,}' + HEX_DIGIT + rb'\b' | |
| 192 | 193 | |
| 193 | 194 | # at least 4 hex chars, followed by whitespace or CR/LF: (?:[0-9A-Fa-f]{2}){4,}\s* |
| 194 | 195 | # PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}' |
| ... | ... | @@ -196,19 +197,19 @@ PATTERN = r'\b(?:' + HEX_DIGIT + IGNORED + r'){7,}' + HEX_DIGIT + r'\b' |
| 196 | 197 | #PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}' |
| 197 | 198 | |
| 198 | 199 | # a dummy translation table for str.translate, which does not change anythying: |
| 199 | -TRANSTABLE_NOCHANGE = string.maketrans('', '') | |
| 200 | +TRANSTABLE_NOCHANGE = bytes.maketrans(b'', b'') | |
| 200 | 201 | |
| 201 | 202 | re_hexblock = re.compile(PATTERN) |
| 202 | 203 | re_embedded_tags = re.compile(IGNORED) |
| 203 | -re_decimal = re.compile(r'\d+') | |
| 204 | +re_decimal = re.compile(rb'\d+') | |
| 204 | 205 | |
| 205 | -re_delimiter = re.compile(r'[ \t\r\n\f\v]') | |
| 206 | +re_delimiter = re.compile(rb'[ \t\r\n\f\v]') | |
| 206 | 207 | |
| 207 | -DELIMITER = r'[ \t\r\n\f\v]' | |
| 208 | -DELIMITERS_ZeroOrMore = r'[ \t\r\n\f\v]*' | |
| 209 | -BACKSLASH_BIN = r'\\bin' | |
| 208 | +DELIMITER = rb'[ \t\r\n\f\v]' | |
| 209 | +DELIMITERS_ZeroOrMore = rb'[ \t\r\n\f\v]*' | |
| 210 | +BACKSLASH_BIN = rb'\\bin' | |
| 210 | 211 | # According to my tests, Word accepts up to 250 digits (leading zeroes) |
| 211 | -DECIMAL_GROUP = r'(\d{1,250})' | |
| 212 | +DECIMAL_GROUP = rb'(\d{1,250})' | |
| 212 | 213 | |
| 213 | 214 | re_delims_bin_decimal = re.compile(DELIMITERS_ZeroOrMore + BACKSLASH_BIN |
| 214 | 215 | + DECIMAL_GROUP + DELIMITER) |
| ... | ... | @@ -216,36 +217,36 @@ re_delim_hexblock = re.compile(DELIMITER + PATTERN) |
| 216 | 217 | |
| 217 | 218 | # Destination Control Words, according to MS RTF Specifications v1.9.1: |
| 218 | 219 | DESTINATION_CONTROL_WORDS = frozenset(( |
| 219 | - "aftncn", "aftnsep", "aftnsepc", "annotation", "atnauthor", "atndate", "atnicn", "atnid", "atnparent", "atnref", | |
| 220 | - "atntime", "atrfend", "atrfstart", "author", "background", "bkmkend", "bkmkstart", "blipuid", "buptim", "category", | |
| 221 | - "colorschememapping", "colortbl", "comment", "company", "creatim", "datafield", "datastore", "defchp", "defpap", | |
| 222 | - "do", "doccomm", "docvar", "dptxbxtext", "ebcend", "ebcstart", "factoidname", "falt", "fchars", "ffdeftext", | |
| 223 | - "ffentrymcr", "ffexitmcr", "ffformat", "ffhelptext", "ffl", "ffname", "ffstattext", "field", "file", "filetbl", | |
| 224 | - "fldinst", "fldrslt", "fldtype", "fname", "fontemb", "fontfile", "fonttbl", "footer", "footerf", "footerl", | |
| 225 | - "footerr", "footnote", "formfield", "ftncn", "ftnsep", "ftnsepc", "g", "generator", "gridtbl", "header", "headerf", | |
| 226 | - "headerl", "headerr", "hl", "hlfr", "hlinkbase", "hlloc", "hlsrc", "hsv", "htmltag", "info", "keycode", "keywords", | |
| 227 | - "latentstyles", "lchars", "levelnumbers", "leveltext", "lfolevel", "linkval", "list", "listlevel", "listname", | |
| 228 | - "listoverride", "listoverridetable", "listpicture", "liststylename", "listtable", "listtext", "lsdlockedexcept", | |
| 229 | - "macc", "maccPr", "mailmerge", "maln", "malnScr", "manager", "margPr", "mbar", "mbarPr", "mbaseJc", "mbegChr", | |
| 230 | - "mborderBox", "mborderBoxPr", "mbox", "mboxPr", "mchr", "mcount", "mctrlPr", "md", "mdeg", "mdegHide", "mden", | |
| 231 | - "mdiff", "mdPr", "me", "mendChr", "meqArr", "meqArrPr", "mf", "mfName", "mfPr", "mfunc", "mfuncPr", "mgroupChr", | |
| 232 | - "mgroupChrPr", "mgrow", "mhideBot", "mhideLeft", "mhideRight", "mhideTop", "mhtmltag", "mlim", "mlimloc", "mlimlow", | |
| 233 | - "mlimlowPr", "mlimupp", "mlimuppPr", "mm", "mmaddfieldname", "mmath", "mmathPict", "mmathPr", "mmaxdist", "mmc", | |
| 234 | - "mmcJc", "mmconnectstr", "mmconnectstrdata", "mmcPr", "mmcs", "mmdatasource", "mmheadersource", "mmmailsubject", | |
| 235 | - "mmodso", "mmodsofilter", "mmodsofldmpdata", "mmodsomappedname", "mmodsoname", "mmodsorecipdata", "mmodsosort", | |
| 236 | - "mmodsosrc", "mmodsotable", "mmodsoudl", "mmodsoudldata", "mmodsouniquetag", "mmPr", "mmquery", "mmr", "mnary", | |
| 237 | - "mnaryPr", "mnoBreak", "mnum", "mobjDist", "moMath", "moMathPara", "moMathParaPr", "mopEmu", "mphant", "mphantPr", | |
| 238 | - "mplcHide", "mpos", "mr", "mrad", "mradPr", "mrPr", "msepChr", "mshow", "mshp", "msPre", "msPrePr", "msSub", | |
| 239 | - "msSubPr", "msSubSup", "msSubSupPr", "msSup", "msSupPr", "mstrikeBLTR", "mstrikeH", "mstrikeTLBR", "mstrikeV", | |
| 240 | - "msub", "msubHide", "msup", "msupHide", "mtransp", "mtype", "mvertJc", "mvfmf", "mvfml", "mvtof", "mvtol", | |
| 241 | - "mzeroAsc", "mzeroDesc", "mzeroWid", "nesttableprops", "nextfile", "nonesttables", "objalias", "objclass", | |
| 242 | - "objdata", "object", "objname", "objsect", "objtime", "oldcprops", "oldpprops", "oldsprops", "oldtprops", | |
| 243 | - "oleclsid", "operator", "panose", "password", "passwordhash", "pgp", "pgptbl", "picprop", "pict", "pn", "pnseclvl", | |
| 244 | - "pntext", "pntxta", "pntxtb", "printim", "private", "propname", "protend", "protstart", "protusertbl", "pxe", | |
| 245 | - "result", "revtbl", "revtim", "rsidtbl", "rtf", "rxe", "shp", "shpgrp", "shpinst", "shppict", "shprslt", "shptxt", | |
| 246 | - "sn", "sp", "staticval", "stylesheet", "subject", "sv", "svb", "tc", "template", "themedata", "title", "txe", "ud", | |
| 247 | - "upr", "userprops", "wgrffmtfilter", "windowcaption", "writereservation", "writereservhash", "xe", "xform", | |
| 248 | - "xmlattrname", "xmlattrvalue", "xmlclose", "xmlname", "xmlnstbl", "xmlopen" | |
| 220 | + b"aftncn", b"aftnsep", b"aftnsepc", b"annotation", b"atnauthor", b"atndate", b"atnicn", b"atnid", b"atnparent", b"atnref", | |
| 221 | + b"atntime", b"atrfend", b"atrfstart", b"author", b"background", b"bkmkend", b"bkmkstart", b"blipuid", b"buptim", b"category", | |
| 222 | + b"colorschememapping", b"colortbl", b"comment", b"company", b"creatim", b"datafield", b"datastore", b"defchp", b"defpap", | |
| 223 | + b"do", b"doccomm", b"docvar", b"dptxbxtext", b"ebcend", b"ebcstart", b"factoidname", b"falt", b"fchars", b"ffdeftext", | |
| 224 | + b"ffentrymcr", b"ffexitmcr", b"ffformat", b"ffhelptext", b"ffl", b"ffname",b"ffstattext", b"field", b"file", b"filetbl", | |
| 225 | + b"fldinst", b"fldrslt", b"fldtype", b"fname", b"fontemb", b"fontfile", b"fonttbl", b"footer", b"footerf", b"footerl", | |
| 226 | + b"footerr", b"footnote", b"formfield", b"ftncn", b"ftnsep", b"ftnsepc", b"g", b"generator", b"gridtbl", b"header", b"headerf", | |
| 227 | + b"headerl", b"headerr", b"hl", b"hlfr", b"hlinkbase", b"hlloc", b"hlsrc", b"hsv", b"htmltag", b"info", b"keycode", b"keywords", | |
| 228 | + b"latentstyles", b"lchars", b"levelnumbers", b"leveltext", b"lfolevel", b"linkval", b"list", b"listlevel", b"listname", | |
| 229 | + b"listoverride", b"listoverridetable", b"listpicture", b"liststylename", b"listtable", b"listtext", b"lsdlockedexcept", | |
| 230 | + b"macc", b"maccPr", b"mailmerge", b"maln",b"malnScr", b"manager", b"margPr", b"mbar", b"mbarPr", b"mbaseJc", b"mbegChr", | |
| 231 | + b"mborderBox", b"mborderBoxPr", b"mbox", b"mboxPr", b"mchr", b"mcount", b"mctrlPr", b"md", b"mdeg", b"mdegHide", b"mden", | |
| 232 | + b"mdiff", b"mdPr", b"me", b"mendChr", b"meqArr", b"meqArrPr", b"mf", b"mfName", b"mfPr", b"mfunc", b"mfuncPr",b"mgroupChr", | |
| 233 | + b"mgroupChrPr",b"mgrow", b"mhideBot", b"mhideLeft", b"mhideRight", b"mhideTop", b"mhtmltag", b"mlim", b"mlimloc", b"mlimlow", | |
| 234 | + b"mlimlowPr", b"mlimupp", b"mlimuppPr", b"mm", b"mmaddfieldname", b"mmath", b"mmathPict", b"mmathPr",b"mmaxdist", b"mmc", | |
| 235 | + b"mmcJc", b"mmconnectstr", b"mmconnectstrdata", b"mmcPr", b"mmcs", b"mmdatasource", b"mmheadersource", b"mmmailsubject", | |
| 236 | + b"mmodso", b"mmodsofilter", b"mmodsofldmpdata", b"mmodsomappedname", b"mmodsoname", b"mmodsorecipdata", b"mmodsosort", | |
| 237 | + b"mmodsosrc", b"mmodsotable", b"mmodsoudl", b"mmodsoudldata", b"mmodsouniquetag", b"mmPr", b"mmquery", b"mmr", b"mnary", | |
| 238 | + b"mnaryPr", b"mnoBreak", b"mnum", b"mobjDist", b"moMath", b"moMathPara", b"moMathParaPr", b"mopEmu", b"mphant", b"mphantPr", | |
| 239 | + b"mplcHide", b"mpos", b"mr", b"mrad", b"mradPr", b"mrPr", b"msepChr", b"mshow", b"mshp", b"msPre", b"msPrePr", b"msSub", | |
| 240 | + b"msSubPr", b"msSubSup", b"msSubSupPr", b"msSup", b"msSupPr", b"mstrikeBLTR", b"mstrikeH", b"mstrikeTLBR", b"mstrikeV", | |
| 241 | + b"msub", b"msubHide", b"msup", b"msupHide", b"mtransp", b"mtype", b"mvertJc", b"mvfmf", b"mvfml", b"mvtof", b"mvtol", | |
| 242 | + b"mzeroAsc", b"mzeroDesc", b"mzeroWid", b"nesttableprops", b"nexctfile", b"nonesttables", b"objalias", b"objclass", | |
| 243 | + b"objdata", b"object", b"objname", b"objsect", b"objtime", b"oldcprops", b"oldpprops", b"oldsprops", b"oldtprops", | |
| 244 | + b"oleclsid", b"operator", b"panose", b"password", b"passwordhash", b"pgp", b"pgptbl", b"picprop", b"pict", b"pn", b"pnseclvl", | |
| 245 | + b"pntext", b"pntxta", b"pntxtb", b"printim", b"private", b"propname", b"protend", b"protstart", b"protusertbl", b"pxe", | |
| 246 | + b"result", b"revtbl", b"revtim", b"rsidtbl", b"rtf", b"rxe", b"shp", b"shpgrp", b"shpinst", b"shppict", b"shprslt", b"shptxt", | |
| 247 | + b"sn", b"sp", b"staticval", b"stylesheet", b"subject", b"sv", b"svb", b"tc", b"template", b"themedata", b"title", b"txe", b"ud", | |
| 248 | + b"upr", b"userprops", b"wgrffmtfilter", b"windowcaption", b"writereservation", b"writereservhash", b"xe", b"xform", | |
| 249 | + b"xmlattrname", b"xmlattrvalue", b"xmlclose", b"xmlname", b"xmlnstbl", b"xmlopen" | |
| 249 | 250 | )) |
| 250 | 251 | |
| 251 | 252 | |
| ... | ... | @@ -258,7 +259,7 @@ class Destination(object): |
| 258 | 259 | """ |
| 259 | 260 | def __init__(self, cword=None): |
| 260 | 261 | self.cword = cword |
| 261 | - self.data = '' | |
| 262 | + self.data = b'' | |
| 262 | 263 | self.start = None |
| 263 | 264 | self.end = None |
| 264 | 265 | self.group_level = 0 |
| ... | ... | @@ -293,15 +294,15 @@ class RtfParser(object): |
| 293 | 294 | def parse(self): |
| 294 | 295 | self.index = 0 |
| 295 | 296 | while self.index < self.size: |
| 296 | - if self.data[self.index] == '{': | |
| 297 | + if self.data[self.index] == ord('{'): | |
| 297 | 298 | self._open_group() |
| 298 | 299 | self.index += 1 |
| 299 | 300 | continue |
| 300 | - if self.data[self.index] == '}': | |
| 301 | + if self.data[self.index] == ord('}'): | |
| 301 | 302 | self._close_group() |
| 302 | 303 | self.index += 1 |
| 303 | 304 | continue |
| 304 | - if self.data[self.index] == '\\': | |
| 305 | + if self.data[self.index] == ord('\\'): | |
| 305 | 306 | m = re_control_word.match(self.data, self.index) |
| 306 | 307 | if m: |
| 307 | 308 | cword = m.group(1) |
| ... | ... | @@ -312,7 +313,7 @@ class RtfParser(object): |
| 312 | 313 | self._control_word(m, cword, param) |
| 313 | 314 | self.index += len(m.group()) |
| 314 | 315 | # if it's \bin, call _bin after updating index |
| 315 | - if cword == 'bin': | |
| 316 | + if cword == b'bin': | |
| 316 | 317 | self._bin(m, param) |
| 317 | 318 | continue |
| 318 | 319 | m = re_control_symbol.match(self.data, self.index) |
| ... | ... | @@ -450,19 +451,19 @@ class RtfObjParser(RtfParser): |
| 450 | 451 | self.fname_prefix = fname_prefix |
| 451 | 452 | |
| 452 | 453 | def open_destination(self, destination): |
| 453 | - if destination.cword == 'objdata': | |
| 454 | + if destination.cword == b'objdata': | |
| 454 | 455 | log.debug('*** Start object data at index %Xh' % destination.start) |
| 455 | 456 | |
| 456 | 457 | def close_destination(self, destination): |
| 457 | - if destination.cword == 'objdata': | |
| 458 | + if destination.cword == b'objdata': | |
| 458 | 459 | log.debug('*** Close object data at index %Xh' % self.index) |
| 459 | 460 | # Filter out all whitespaces first (just ignored): |
| 460 | - hexdata1 = destination.data.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v') | |
| 461 | + hexdata1 = destination.data.translate(TRANSTABLE_NOCHANGE, b' \t\r\n\f\v') | |
| 461 | 462 | # Then filter out any other non-hex character: |
| 462 | - hexdata = re.sub(r'[^a-hA-H0-9]', '', hexdata1) | |
| 463 | + hexdata = re.sub(b'[^a-hA-H0-9]', b'', hexdata1) | |
| 463 | 464 | if len(hexdata) < len(hexdata1): |
| 464 | 465 | # this is only for debugging: |
| 465 | - nonhex = re.sub(r'[a-hA-H0-9]', '', hexdata1) | |
| 466 | + nonhex = re.sub(b'[a-hA-H0-9]', b'', hexdata1) | |
| 466 | 467 | log.debug('Found non-hex chars in hexdata: %r' % nonhex) |
| 467 | 468 | # MS Word accepts an extra hex digit, so we need to trim it if present: |
| 468 | 469 | if len(hexdata) & 1: |
| ... | ... | @@ -485,9 +486,9 @@ class RtfObjParser(RtfParser): |
| 485 | 486 | print('data size = %d' % obj.data_size) |
| 486 | 487 | # set a file extension according to the class name: |
| 487 | 488 | class_name = obj.class_name.lower() |
| 488 | - if class_name.startswith('word'): | |
| 489 | + if class_name.startswith(b'word'): | |
| 489 | 490 | ext = 'doc' |
| 490 | - elif class_name.startswith('package'): | |
| 491 | + elif class_name.startswith(b'package'): | |
| 491 | 492 | ext = 'package' |
| 492 | 493 | else: |
| 493 | 494 | ext = 'bin' | ... | ... |