Commit 1cf591ddf5ec2da343d6ddc3f73383e8f1bdcdc8

Authored by Philippe Lagadec
Committed by GitHub
2 parents 2685c6f5 8adff0e0

Merge pull request #63 from sebdraven/master

python3.5 compliant only
oletools/oleobj.py
@@ -162,7 +162,7 @@ def read_LengthPrefixedAnsiString(data): @@ -162,7 +162,7 @@ def read_LengthPrefixedAnsiString(data):
162 ansi_string = data[:length-1] 162 ansi_string = data[:length-1]
163 # TODO: only in strict mode: 163 # TODO: only in strict mode:
164 # check the presence of the null char: 164 # check the presence of the null char:
165 - assert data[length] == '\x00' 165 + assert data[length] == 0
166 new_data = data[length:] 166 new_data = data[length:]
167 return (ansi_string, new_data) 167 return (ansi_string, new_data)
168 168
@@ -214,14 +214,14 @@ class OleNativeStream (object): @@ -214,14 +214,14 @@ class OleNativeStream (object):
214 # log.debug('OLE native data size = {0:08X} ({0} bytes)'.format(self.native_data_size)) 214 # log.debug('OLE native data size = {0:08X} ({0} bytes)'.format(self.native_data_size))
215 # I thought this might be an OLE type specifier ??? 215 # I thought this might be an OLE type specifier ???
216 self.unknown_short, data = read_uint16(data) 216 self.unknown_short, data = read_uint16(data)
217 - self.filename, data = data.split('\x00', 1) 217 + self.filename, data = data.split(b'\x00', 1)
218 # source path 218 # source path
219 - self.src_path, data = data.split('\x00', 1) 219 + self.src_path, data = data.split(b'\x00', 1)
220 # TODO I bet these next 8 bytes are a timestamp => FILETIME from olefile 220 # TODO I bet these next 8 bytes are a timestamp => FILETIME from olefile
221 self.unknown_long_1, data = read_uint32(data) 221 self.unknown_long_1, data = read_uint32(data)
222 self.unknown_long_2, data = read_uint32(data) 222 self.unknown_long_2, data = read_uint32(data)
223 # temp path? 223 # temp path?
224 - self.temp_path, data = data.split('\x00', 1) 224 + self.temp_path, data = data.split(b'\x00', 1)
225 # size of the rest of the data 225 # size of the rest of the data
226 self.actual_size, data = read_uint32(data) 226 self.actual_size, data = read_uint32(data)
227 self.data = data[0:self.actual_size] 227 self.data = data[0:self.actual_size]
oletools/rtfobj.py
@@ -120,7 +120,7 @@ log = get_logger('rtfobj') @@ -120,7 +120,7 @@ log = get_logger('rtfobj')
120 # REGEX pattern to extract embedded OLE objects in hexadecimal format: 120 # REGEX pattern to extract embedded OLE objects in hexadecimal format:
121 121
122 # alphanum digit: [0-9A-Fa-f] 122 # alphanum digit: [0-9A-Fa-f]
123 -HEX_DIGIT = r'[0-9A-Fa-f]' 123 +HEX_DIGIT = rb'[0-9A-Fa-f]'
124 124
125 # hex char = two alphanum digits: [0-9A-Fa-f]{2} 125 # hex char = two alphanum digits: [0-9A-Fa-f]{2}
126 # HEX_CHAR = r'[0-9A-Fa-f]{2}' 126 # HEX_CHAR = r'[0-9A-Fa-f]{2}'
@@ -130,11 +130,11 @@ HEX_DIGIT = r'[0-9A-Fa-f]' @@ -130,11 +130,11 @@ HEX_DIGIT = r'[0-9A-Fa-f]'
130 # AND the tags can be nested... 130 # AND the tags can be nested...
131 #SINGLE_RTF_TAG = r'[{][^{}]*[}]' 131 #SINGLE_RTF_TAG = r'[{][^{}]*[}]'
132 # Actually RTF tags may contain braces escaped with backslash (\{ \}): 132 # Actually RTF tags may contain braces escaped with backslash (\{ \}):
133 -SINGLE_RTF_TAG = r'[{](?:\.|[^{}\])*[}]' 133 +SINGLE_RTF_TAG = rb'[{](?:\.|[^{}\])*[}]'
134 134
135 # Nested tags, two levels (because Python's re does not support nested matching): 135 # Nested tags, two levels (because Python's re does not support nested matching):
136 # NESTED_RTF_TAG = r'[{](?:[^{}]|'+SINGLE_RTF_TAG+r')*[}]' 136 # NESTED_RTF_TAG = r'[{](?:[^{}]|'+SINGLE_RTF_TAG+r')*[}]'
137 -NESTED_RTF_TAG = r'[{](?:\\.|[^{}\\]|'+SINGLE_RTF_TAG+r')*[}]' 137 +NESTED_RTF_TAG = rb'[{](?:\\.|[^{}\\]|'+SINGLE_RTF_TAG+b')*[}]'
138 138
139 # AND it is also allowed to insert ANY control word or control symbol (ignored) 139 # AND it is also allowed to insert ANY control word or control symbol (ignored)
140 # According to Rich Text Format (RTF) Specification Version 1.9.1, 140 # According to Rich Text Format (RTF) Specification Version 1.9.1,
@@ -146,7 +146,7 @@ NESTED_RTF_TAG = r'[{](?:\\.|[^{}\\]|'+SINGLE_RTF_TAG+r')*[}]' @@ -146,7 +146,7 @@ NESTED_RTF_TAG = r'[{](?:\\.|[^{}\\]|'+SINGLE_RTF_TAG+r')*[}]'
146 # "\AnyThing " "\AnyThing123z" ""\AnyThing-456{" "\AnyThing{" 146 # "\AnyThing " "\AnyThing123z" ""\AnyThing-456{" "\AnyThing{"
147 # control symbol = \<any char except letter or digit> (followed by anything) 147 # control symbol = \<any char except letter or digit> (followed by anything)
148 148
149 -ASCII_NAME = r'([a-zA-Z]{1,250})' 149 +ASCII_NAME = rb'([a-zA-Z]{1,250})'
150 150
151 # using Python's re lookahead assumption: 151 # using Python's re lookahead assumption:
152 # (?=...) Matches if ... matches next, but doesn't consume any of the string. 152 # (?=...) Matches if ... matches next, but doesn't consume any of the string.
@@ -155,20 +155,21 @@ ASCII_NAME = r&#39;([a-zA-Z]{1,250})&#39; @@ -155,20 +155,21 @@ ASCII_NAME = r&#39;([a-zA-Z]{1,250})&#39;
155 155
156 # TODO: Find the actual limit on the number of digits for Word 156 # TODO: Find the actual limit on the number of digits for Word
157 # SIGNED_INTEGER = r'(-?\d{1,250})' 157 # SIGNED_INTEGER = r'(-?\d{1,250})'
158 -SIGNED_INTEGER = r'(-?\d+)' 158 +SIGNED_INTEGER = rb'(-?\d+)'
  159 +
  160 +CONTROL_WORD = rb'(?:\\' + ASCII_NAME + rb'(?:(?=[^a-zA-Z0-9-])|' + SIGNED_INTEGER + rb'(?=[^0-9])))'
159 161
160 -CONTROL_WORD = r'(?:\\' + ASCII_NAME + r'(?:(?=[^a-zA-Z0-9-])|' + SIGNED_INTEGER + r'(?=[^0-9])))'  
161 re_control_word = re.compile(CONTROL_WORD) 162 re_control_word = re.compile(CONTROL_WORD)
162 163
163 -CONTROL_SYMBOL = r'(?:\[^a-zA-Z0-9])' 164 +CONTROL_SYMBOL = rb'(?:\[^a-zA-Z0-9])'
164 re_control_symbol = re.compile(CONTROL_SYMBOL) 165 re_control_symbol = re.compile(CONTROL_SYMBOL)
165 166
166 # Text that is not a control word/symbol or a group: 167 # Text that is not a control word/symbol or a group:
167 -TEXT = r'[^{}\]+' 168 +TEXT = rb'[^{}\]+'
168 re_text = re.compile(TEXT) 169 re_text = re.compile(TEXT)
169 170
170 # ignored whitespaces and tags within a hex block: 171 # ignored whitespaces and tags within a hex block:
171 -IGNORED = r'(?:\s|'+NESTED_RTF_TAG+'|'+CONTROL_SYMBOL+'|'+CONTROL_WORD+r')*' 172 +IGNORED = rb'(?:\s|'+NESTED_RTF_TAG+rb'|'+CONTROL_SYMBOL+rb'|'+CONTROL_WORD+rb')*'
172 #IGNORED = r'\s*' 173 #IGNORED = r'\s*'
173 174
174 # HEX_CHAR = HEX_DIGIT + IGNORED + HEX_DIGIT 175 # HEX_CHAR = HEX_DIGIT + IGNORED + HEX_DIGIT
@@ -188,7 +189,7 @@ IGNORED = r&#39;(?:\s|&#39;+NESTED_RTF_TAG+&#39;|&#39;+CONTROL_SYMBOL+&#39;|&#39;+CONTROL_WORD+r&#39;)*&#39; @@ -188,7 +189,7 @@ IGNORED = r&#39;(?:\s|&#39;+NESTED_RTF_TAG+&#39;|&#39;+CONTROL_SYMBOL+&#39;|&#39;+CONTROL_WORD+r&#39;)*&#39;
188 189
189 #TODO PATTERN = r'\b(?:' + HEX_CHAR + IGNORED + r'){4,}\b' 190 #TODO PATTERN = r'\b(?:' + HEX_CHAR + IGNORED + r'){4,}\b'
190 # PATTERN = r'\b(?:' + HEX_CHAR + IGNORED + r'){4,}' #+ HEX_CHAR + r'\b' 191 # PATTERN = r'\b(?:' + HEX_CHAR + IGNORED + r'){4,}' #+ HEX_CHAR + r'\b'
191 -PATTERN = r'\b(?:' + HEX_DIGIT + IGNORED + r'){7,}' + HEX_DIGIT + r'\b' 192 +PATTERN = rb'\b(?:' + HEX_DIGIT + IGNORED + rb'){7,}' + HEX_DIGIT + rb'\b'
192 193
193 # at least 4 hex chars, followed by whitespace or CR/LF: (?:[0-9A-Fa-f]{2}){4,}\s* 194 # at least 4 hex chars, followed by whitespace or CR/LF: (?:[0-9A-Fa-f]{2}){4,}\s*
194 # PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}' 195 # PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}'
@@ -196,19 +197,19 @@ PATTERN = r&#39;\b(?:&#39; + HEX_DIGIT + IGNORED + r&#39;){7,}&#39; + HEX_DIGIT + r&#39;\b&#39; @@ -196,19 +197,19 @@ PATTERN = r&#39;\b(?:&#39; + HEX_DIGIT + IGNORED + r&#39;){7,}&#39; + HEX_DIGIT + r&#39;\b&#39;
196 #PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}' 197 #PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}'
197 198
198 # a dummy translation table for str.translate, which does not change anythying: 199 # a dummy translation table for str.translate, which does not change anythying:
199 -TRANSTABLE_NOCHANGE = string.maketrans('', '') 200 +TRANSTABLE_NOCHANGE = bytes.maketrans(b'', b'')
200 201
201 re_hexblock = re.compile(PATTERN) 202 re_hexblock = re.compile(PATTERN)
202 re_embedded_tags = re.compile(IGNORED) 203 re_embedded_tags = re.compile(IGNORED)
203 -re_decimal = re.compile(r'\d+') 204 +re_decimal = re.compile(rb'\d+')
204 205
205 -re_delimiter = re.compile(r'[ \t\r\n\f\v]') 206 +re_delimiter = re.compile(rb'[ \t\r\n\f\v]')
206 207
207 -DELIMITER = r'[ \t\r\n\f\v]'  
208 -DELIMITERS_ZeroOrMore = r'[ \t\r\n\f\v]*'  
209 -BACKSLASH_BIN = r'\\bin' 208 +DELIMITER = rb'[ \t\r\n\f\v]'
  209 +DELIMITERS_ZeroOrMore = rb'[ \t\r\n\f\v]*'
  210 +BACKSLASH_BIN = rb'\\bin'
210 # According to my tests, Word accepts up to 250 digits (leading zeroes) 211 # According to my tests, Word accepts up to 250 digits (leading zeroes)
211 -DECIMAL_GROUP = r'(\d{1,250})' 212 +DECIMAL_GROUP = rb'(\d{1,250})'
212 213
213 re_delims_bin_decimal = re.compile(DELIMITERS_ZeroOrMore + BACKSLASH_BIN 214 re_delims_bin_decimal = re.compile(DELIMITERS_ZeroOrMore + BACKSLASH_BIN
214 + DECIMAL_GROUP + DELIMITER) 215 + DECIMAL_GROUP + DELIMITER)
@@ -216,36 +217,36 @@ re_delim_hexblock = re.compile(DELIMITER + PATTERN) @@ -216,36 +217,36 @@ re_delim_hexblock = re.compile(DELIMITER + PATTERN)
216 217
217 # Destination Control Words, according to MS RTF Specifications v1.9.1: 218 # Destination Control Words, according to MS RTF Specifications v1.9.1:
218 DESTINATION_CONTROL_WORDS = frozenset(( 219 DESTINATION_CONTROL_WORDS = frozenset((
219 - "aftncn", "aftnsep", "aftnsepc", "annotation", "atnauthor", "atndate", "atnicn", "atnid", "atnparent", "atnref",  
220 - "atntime", "atrfend", "atrfstart", "author", "background", "bkmkend", "bkmkstart", "blipuid", "buptim", "category",  
221 - "colorschememapping", "colortbl", "comment", "company", "creatim", "datafield", "datastore", "defchp", "defpap",  
222 - "do", "doccomm", "docvar", "dptxbxtext", "ebcend", "ebcstart", "factoidname", "falt", "fchars", "ffdeftext",  
223 - "ffentrymcr", "ffexitmcr", "ffformat", "ffhelptext", "ffl", "ffname", "ffstattext", "field", "file", "filetbl",  
224 - "fldinst", "fldrslt", "fldtype", "fname", "fontemb", "fontfile", "fonttbl", "footer", "footerf", "footerl",  
225 - "footerr", "footnote", "formfield", "ftncn", "ftnsep", "ftnsepc", "g", "generator", "gridtbl", "header", "headerf",  
226 - "headerl", "headerr", "hl", "hlfr", "hlinkbase", "hlloc", "hlsrc", "hsv", "htmltag", "info", "keycode", "keywords",  
227 - "latentstyles", "lchars", "levelnumbers", "leveltext", "lfolevel", "linkval", "list", "listlevel", "listname",  
228 - "listoverride", "listoverridetable", "listpicture", "liststylename", "listtable", "listtext", "lsdlockedexcept",  
229 - "macc", "maccPr", "mailmerge", "maln", "malnScr", "manager", "margPr", "mbar", "mbarPr", "mbaseJc", "mbegChr",  
230 - "mborderBox", "mborderBoxPr", "mbox", "mboxPr", "mchr", "mcount", "mctrlPr", "md", "mdeg", "mdegHide", "mden",  
231 - "mdiff", "mdPr", "me", "mendChr", "meqArr", "meqArrPr", "mf", "mfName", "mfPr", "mfunc", "mfuncPr", "mgroupChr",  
232 - "mgroupChrPr", "mgrow", "mhideBot", "mhideLeft", "mhideRight", "mhideTop", "mhtmltag", "mlim", "mlimloc", "mlimlow",  
233 - "mlimlowPr", "mlimupp", "mlimuppPr", "mm", "mmaddfieldname", "mmath", "mmathPict", "mmathPr", "mmaxdist", "mmc",  
234 - "mmcJc", "mmconnectstr", "mmconnectstrdata", "mmcPr", "mmcs", "mmdatasource", "mmheadersource", "mmmailsubject",  
235 - "mmodso", "mmodsofilter", "mmodsofldmpdata", "mmodsomappedname", "mmodsoname", "mmodsorecipdata", "mmodsosort",  
236 - "mmodsosrc", "mmodsotable", "mmodsoudl", "mmodsoudldata", "mmodsouniquetag", "mmPr", "mmquery", "mmr", "mnary",  
237 - "mnaryPr", "mnoBreak", "mnum", "mobjDist", "moMath", "moMathPara", "moMathParaPr", "mopEmu", "mphant", "mphantPr",  
238 - "mplcHide", "mpos", "mr", "mrad", "mradPr", "mrPr", "msepChr", "mshow", "mshp", "msPre", "msPrePr", "msSub",  
239 - "msSubPr", "msSubSup", "msSubSupPr", "msSup", "msSupPr", "mstrikeBLTR", "mstrikeH", "mstrikeTLBR", "mstrikeV",  
240 - "msub", "msubHide", "msup", "msupHide", "mtransp", "mtype", "mvertJc", "mvfmf", "mvfml", "mvtof", "mvtol",  
241 - "mzeroAsc", "mzeroDesc", "mzeroWid", "nesttableprops", "nextfile", "nonesttables", "objalias", "objclass",  
242 - "objdata", "object", "objname", "objsect", "objtime", "oldcprops", "oldpprops", "oldsprops", "oldtprops",  
243 - "oleclsid", "operator", "panose", "password", "passwordhash", "pgp", "pgptbl", "picprop", "pict", "pn", "pnseclvl",  
244 - "pntext", "pntxta", "pntxtb", "printim", "private", "propname", "protend", "protstart", "protusertbl", "pxe",  
245 - "result", "revtbl", "revtim", "rsidtbl", "rtf", "rxe", "shp", "shpgrp", "shpinst", "shppict", "shprslt", "shptxt",  
246 - "sn", "sp", "staticval", "stylesheet", "subject", "sv", "svb", "tc", "template", "themedata", "title", "txe", "ud",  
247 - "upr", "userprops", "wgrffmtfilter", "windowcaption", "writereservation", "writereservhash", "xe", "xform",  
248 - "xmlattrname", "xmlattrvalue", "xmlclose", "xmlname", "xmlnstbl", "xmlopen" 220 + b"aftncn", b"aftnsep", b"aftnsepc", b"annotation", b"atnauthor", b"atndate", b"atnicn", b"atnid", b"atnparent", b"atnref",
  221 + b"atntime", b"atrfend", b"atrfstart", b"author", b"background", b"bkmkend", b"bkmkstart", b"blipuid", b"buptim", b"category",
  222 + b"colorschememapping", b"colortbl", b"comment", b"company", b"creatim", b"datafield", b"datastore", b"defchp", b"defpap",
  223 + b"do", b"doccomm", b"docvar", b"dptxbxtext", b"ebcend", b"ebcstart", b"factoidname", b"falt", b"fchars", b"ffdeftext",
  224 + b"ffentrymcr", b"ffexitmcr", b"ffformat", b"ffhelptext", b"ffl", b"ffname",b"ffstattext", b"field", b"file", b"filetbl",
  225 + b"fldinst", b"fldrslt", b"fldtype", b"fname", b"fontemb", b"fontfile", b"fonttbl", b"footer", b"footerf", b"footerl",
  226 + b"footerr", b"footnote", b"formfield", b"ftncn", b"ftnsep", b"ftnsepc", b"g", b"generator", b"gridtbl", b"header", b"headerf",
  227 + b"headerl", b"headerr", b"hl", b"hlfr", b"hlinkbase", b"hlloc", b"hlsrc", b"hsv", b"htmltag", b"info", b"keycode", b"keywords",
  228 + b"latentstyles", b"lchars", b"levelnumbers", b"leveltext", b"lfolevel", b"linkval", b"list", b"listlevel", b"listname",
  229 + b"listoverride", b"listoverridetable", b"listpicture", b"liststylename", b"listtable", b"listtext", b"lsdlockedexcept",
  230 + b"macc", b"maccPr", b"mailmerge", b"maln",b"malnScr", b"manager", b"margPr", b"mbar", b"mbarPr", b"mbaseJc", b"mbegChr",
  231 + b"mborderBox", b"mborderBoxPr", b"mbox", b"mboxPr", b"mchr", b"mcount", b"mctrlPr", b"md", b"mdeg", b"mdegHide", b"mden",
  232 + b"mdiff", b"mdPr", b"me", b"mendChr", b"meqArr", b"meqArrPr", b"mf", b"mfName", b"mfPr", b"mfunc", b"mfuncPr",b"mgroupChr",
  233 + b"mgroupChrPr",b"mgrow", b"mhideBot", b"mhideLeft", b"mhideRight", b"mhideTop", b"mhtmltag", b"mlim", b"mlimloc", b"mlimlow",
  234 + b"mlimlowPr", b"mlimupp", b"mlimuppPr", b"mm", b"mmaddfieldname", b"mmath", b"mmathPict", b"mmathPr",b"mmaxdist", b"mmc",
  235 + b"mmcJc", b"mmconnectstr", b"mmconnectstrdata", b"mmcPr", b"mmcs", b"mmdatasource", b"mmheadersource", b"mmmailsubject",
  236 + b"mmodso", b"mmodsofilter", b"mmodsofldmpdata", b"mmodsomappedname", b"mmodsoname", b"mmodsorecipdata", b"mmodsosort",
  237 + b"mmodsosrc", b"mmodsotable", b"mmodsoudl", b"mmodsoudldata", b"mmodsouniquetag", b"mmPr", b"mmquery", b"mmr", b"mnary",
  238 + b"mnaryPr", b"mnoBreak", b"mnum", b"mobjDist", b"moMath", b"moMathPara", b"moMathParaPr", b"mopEmu", b"mphant", b"mphantPr",
  239 + b"mplcHide", b"mpos", b"mr", b"mrad", b"mradPr", b"mrPr", b"msepChr", b"mshow", b"mshp", b"msPre", b"msPrePr", b"msSub",
  240 + b"msSubPr", b"msSubSup", b"msSubSupPr", b"msSup", b"msSupPr", b"mstrikeBLTR", b"mstrikeH", b"mstrikeTLBR", b"mstrikeV",
  241 + b"msub", b"msubHide", b"msup", b"msupHide", b"mtransp", b"mtype", b"mvertJc", b"mvfmf", b"mvfml", b"mvtof", b"mvtol",
  242 + b"mzeroAsc", b"mzeroDesc", b"mzeroWid", b"nesttableprops", b"nexctfile", b"nonesttables", b"objalias", b"objclass",
  243 + b"objdata", b"object", b"objname", b"objsect", b"objtime", b"oldcprops", b"oldpprops", b"oldsprops", b"oldtprops",
  244 + b"oleclsid", b"operator", b"panose", b"password", b"passwordhash", b"pgp", b"pgptbl", b"picprop", b"pict", b"pn", b"pnseclvl",
  245 + b"pntext", b"pntxta", b"pntxtb", b"printim", b"private", b"propname", b"protend", b"protstart", b"protusertbl", b"pxe",
  246 + b"result", b"revtbl", b"revtim", b"rsidtbl", b"rtf", b"rxe", b"shp", b"shpgrp", b"shpinst", b"shppict", b"shprslt", b"shptxt",
  247 + b"sn", b"sp", b"staticval", b"stylesheet", b"subject", b"sv", b"svb", b"tc", b"template", b"themedata", b"title", b"txe", b"ud",
  248 + b"upr", b"userprops", b"wgrffmtfilter", b"windowcaption", b"writereservation", b"writereservhash", b"xe", b"xform",
  249 + b"xmlattrname", b"xmlattrvalue", b"xmlclose", b"xmlname", b"xmlnstbl", b"xmlopen"
249 )) 250 ))
250 251
251 252
@@ -258,7 +259,7 @@ class Destination(object): @@ -258,7 +259,7 @@ class Destination(object):
258 """ 259 """
259 def __init__(self, cword=None): 260 def __init__(self, cword=None):
260 self.cword = cword 261 self.cword = cword
261 - self.data = '' 262 + self.data = b''
262 self.start = None 263 self.start = None
263 self.end = None 264 self.end = None
264 self.group_level = 0 265 self.group_level = 0
@@ -293,15 +294,15 @@ class RtfParser(object): @@ -293,15 +294,15 @@ class RtfParser(object):
293 def parse(self): 294 def parse(self):
294 self.index = 0 295 self.index = 0
295 while self.index < self.size: 296 while self.index < self.size:
296 - if self.data[self.index] == '{': 297 + if self.data[self.index] == ord('{'):
297 self._open_group() 298 self._open_group()
298 self.index += 1 299 self.index += 1
299 continue 300 continue
300 - if self.data[self.index] == '}': 301 + if self.data[self.index] == ord('}'):
301 self._close_group() 302 self._close_group()
302 self.index += 1 303 self.index += 1
303 continue 304 continue
304 - if self.data[self.index] == '\\': 305 + if self.data[self.index] == ord('\\'):
305 m = re_control_word.match(self.data, self.index) 306 m = re_control_word.match(self.data, self.index)
306 if m: 307 if m:
307 cword = m.group(1) 308 cword = m.group(1)
@@ -312,7 +313,7 @@ class RtfParser(object): @@ -312,7 +313,7 @@ class RtfParser(object):
312 self._control_word(m, cword, param) 313 self._control_word(m, cword, param)
313 self.index += len(m.group()) 314 self.index += len(m.group())
314 # if it's \bin, call _bin after updating index 315 # if it's \bin, call _bin after updating index
315 - if cword == 'bin': 316 + if cword == b'bin':
316 self._bin(m, param) 317 self._bin(m, param)
317 continue 318 continue
318 m = re_control_symbol.match(self.data, self.index) 319 m = re_control_symbol.match(self.data, self.index)
@@ -450,19 +451,19 @@ class RtfObjParser(RtfParser): @@ -450,19 +451,19 @@ class RtfObjParser(RtfParser):
450 self.fname_prefix = fname_prefix 451 self.fname_prefix = fname_prefix
451 452
452 def open_destination(self, destination): 453 def open_destination(self, destination):
453 - if destination.cword == 'objdata': 454 + if destination.cword == b'objdata':
454 log.debug('*** Start object data at index %Xh' % destination.start) 455 log.debug('*** Start object data at index %Xh' % destination.start)
455 456
456 def close_destination(self, destination): 457 def close_destination(self, destination):
457 - if destination.cword == 'objdata': 458 + if destination.cword == b'objdata':
458 log.debug('*** Close object data at index %Xh' % self.index) 459 log.debug('*** Close object data at index %Xh' % self.index)
459 # Filter out all whitespaces first (just ignored): 460 # Filter out all whitespaces first (just ignored):
460 - hexdata1 = destination.data.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v') 461 + hexdata1 = destination.data.translate(TRANSTABLE_NOCHANGE, b' \t\r\n\f\v')
461 # Then filter out any other non-hex character: 462 # Then filter out any other non-hex character:
462 - hexdata = re.sub(r'[^a-hA-H0-9]', '', hexdata1) 463 + hexdata = re.sub(b'[^a-hA-H0-9]', b'', hexdata1)
463 if len(hexdata) < len(hexdata1): 464 if len(hexdata) < len(hexdata1):
464 # this is only for debugging: 465 # this is only for debugging:
465 - nonhex = re.sub(r'[a-hA-H0-9]', '', hexdata1) 466 + nonhex = re.sub(b'[a-hA-H0-9]', b'', hexdata1)
466 log.debug('Found non-hex chars in hexdata: %r' % nonhex) 467 log.debug('Found non-hex chars in hexdata: %r' % nonhex)
467 # MS Word accepts an extra hex digit, so we need to trim it if present: 468 # MS Word accepts an extra hex digit, so we need to trim it if present:
468 if len(hexdata) & 1: 469 if len(hexdata) & 1:
@@ -485,9 +486,9 @@ class RtfObjParser(RtfParser): @@ -485,9 +486,9 @@ class RtfObjParser(RtfParser):
485 print('data size = %d' % obj.data_size) 486 print('data size = %d' % obj.data_size)
486 # set a file extension according to the class name: 487 # set a file extension according to the class name:
487 class_name = obj.class_name.lower() 488 class_name = obj.class_name.lower()
488 - if class_name.startswith('word'): 489 + if class_name.startswith(b'word'):
489 ext = 'doc' 490 ext = 'doc'
490 - elif class_name.startswith('package'): 491 + elif class_name.startswith(b'package'):
491 ext = 'package' 492 ext = 'package'
492 else: 493 else:
493 ext = 'bin' 494 ext = 'bin'