Commit aa95f26a753d93e4c89b7790a9543da27017e197

Authored by Christian Herdtweck
1 parent dad20c2c

oleobj: parse OleNativeStream and OleObject from stream

Can parse both now from bytes array or stream
Showing 1 changed file with 62 additions and 25 deletions
oletools/oleobj.py
@@ -159,6 +159,8 @@ assert struct_uint32.size == 4 # make sure it matches 4 bytes @@ -159,6 +159,8 @@ assert struct_uint32.size == 4 # make sure it matches 4 bytes
159 struct_uint16 = struct.Struct('<H') 159 struct_uint16 = struct.Struct('<H')
160 assert struct_uint16.size == 2 # make sure it matches 2 bytes 160 assert struct_uint16.size == 2 # make sure it matches 2 bytes
161 161
  162 +# max length of a zero-terminated ansi string. Not sure what this really is
  163 +STR_MAX_LEN = 1024
162 164
163 # === FUNCTIONS ============================================================== 165 # === FUNCTIONS ==============================================================
164 166
@@ -166,34 +168,42 @@ def read_uint32(data, index): @@ -166,34 +168,42 @@ def read_uint32(data, index):
166 """ 168 """
167 Read an unsigned integer from the first 32 bits of data. 169 Read an unsigned integer from the first 32 bits of data.
168 170
169 - :param data: bytes string containing the data to be extracted.  
170 - :param index: index to start reading from. 171 + :param data: bytes string or stream containing the data to be extracted.
  172 + :param index: index to start reading from or None if data is stream.
171 :return: tuple (value, index) containing the read value (int), 173 :return: tuple (value, index) containing the read value (int),
172 and the index to continue reading next time. 174 and the index to continue reading next time.
173 """ 175 """
174 - value = struct_uint32.unpack(data[index:index+4])[0]  
175 - return (value, index+4) 176 + if index is None:
  177 + value = struct_uint32.unpack(data.read(4))[0]
  178 + else:
  179 + value = struct_uint32.unpack(data[index:index+4])[0]
  180 + index += 4
  181 + return (value, index)
176 182
177 183
178 def read_uint16(data, index): 184 def read_uint16(data, index):
179 """ 185 """
180 Read an unsigned integer from the 16 bits of data following index. 186 Read an unsigned integer from the 16 bits of data following index.
181 187
182 - :param data: bytes string containing the data to be extracted.  
183 - :param index: index to start reading from. 188 + :param data: bytes string or stream containing the data to be extracted.
  189 + :param index: index to start reading from or None if data is stream
184 :return: tuple (value, index) containing the read value (int), 190 :return: tuple (value, index) containing the read value (int),
185 and the index to continue reading next time. 191 and the index to continue reading next time.
186 """ 192 """
187 - value = struct_uint16.unpack(data[index:index+2])[0]  
188 - return (value, index+2) 193 + if index is None:
  194 + value = struct_uint16.unpack(data.read(2))[0]
  195 + else:
  196 + value = struct_uint16.unpack(data[index:index+2])[0]
  197 + index += 2
  198 + return (value, index)
189 199
190 200
191 def read_LengthPrefixedAnsiString(data, index): 201 def read_LengthPrefixedAnsiString(data, index):
192 """ 202 """
193 Read a length-prefixed ANSI string from data. 203 Read a length-prefixed ANSI string from data.
194 204
195 - :param data: bytes string containing the data to be extracted.  
196 - :param index: index in data where string size starts 205 + :param data: bytes string or stream containing the data to be extracted.
  206 + :param index: index in data where string size start or None if data is stream
197 :return: tuple (value, index) containing the read value (bytes string), 207 :return: tuple (value, index) containing the read value (bytes string),
198 and the index to start reading from next time. 208 and the index to start reading from next time.
199 """ 209 """
@@ -202,24 +212,41 @@ def read_LengthPrefixedAnsiString(data, index): @@ -202,24 +212,41 @@ def read_LengthPrefixedAnsiString(data, index):
202 if length == 0: 212 if length == 0:
203 return ('', index) 213 return ('', index)
204 # extract the string without the last null character 214 # extract the string without the last null character
205 - ansi_string = data[index:index+length-1] 215 + if index is None:
  216 + ansi_string = data.read(length-1)
  217 + null_char = data.read(1)
  218 + else:
  219 + ansi_string = data[index:index+length-1]
  220 + null_char = data[index+length]
  221 + index += length
206 # TODO: only in strict mode: 222 # TODO: only in strict mode:
207 # check the presence of the null char: 223 # check the presence of the null char:
208 - assert data[index+length] == NULL_CHAR  
209 - return (ansi_string, index+length) 224 + assert null_char == NULL_CHAR
  225 + return (ansi_string, index)
210 226
211 227
212 def read_zero_terminated_ansi_string(data, index): 228 def read_zero_terminated_ansi_string(data, index):
213 """ 229 """
214 Read a zero-terminated ANSI string from data 230 Read a zero-terminated ANSI string from data
215 231
216 - :param data: bytes string containing an ansi string  
217 - :param index: index at which the string should start 232 + Guessing that max length is 256 bytes.
  233 +
  234 + :param data: bytes string or stream containing an ansi string
  235 + :param index: index at which the string should start or None if data is stream
218 :return: tuple (string, index) containing the read string (bytes string), 236 :return: tuple (string, index) containing the read string (bytes string),
219 and the index to start reading from next time. 237 and the index to start reading from next time.
220 """ 238 """
221 - end_idx = data.find(b'\x00', index)  
222 - return data[index:end_idx], end_idx+1 # return index after the 0-byte 239 + if index is None:
  240 + result = []
  241 + for count in xrange(STR_MAX_LEN):
  242 + char = data.read(1)
  243 + if char == b'\x00':
  244 + return b''.join(result), index
  245 + result.append(char)
  246 + raise ValueError('found no string-terminating zero-byte!')
  247 + else: # data is byte array, can just search
  248 + end_idx = data.index(b'\x00', index, index+STR_MAX_LEN)
  249 + return data[index:end_idx], end_idx+1 # return index after the 0-byte
223 250
224 251
225 # === CLASSES ================================================================ 252 # === CLASSES ================================================================
@@ -240,8 +267,9 @@ class OleNativeStream (object): @@ -240,8 +267,9 @@ class OleNativeStream (object):
240 Constructor for OleNativeStream. 267 Constructor for OleNativeStream.
241 If bindata is provided, it will be parsed using the parse() method. 268 If bindata is provided, it will be parsed using the parse() method.
242 269
243 - :param bindata: bytes, OLENativeStream structure containing an OLE object  
244 - :param package: bool, set to True when extracting from an OLE Package object 270 + :param bindata: forwarded to parse, see docu there
  271 + :param package: bool, set to True when extracting from an OLE Package
  272 + object
245 """ 273 """
246 self.filename = None 274 self.filename = None
247 self.src_path = None 275 self.src_path = None
@@ -261,16 +289,22 @@ class OleNativeStream (object): @@ -261,16 +289,22 @@ class OleNativeStream (object):
261 to extract the OLE object it contains. 289 to extract the OLE object it contains.
262 (see MS-OLEDS 2.3.6 OLENativeStream) 290 (see MS-OLEDS 2.3.6 OLENativeStream)
263 291
264 - :param data: bytes, OLENativeStream structure containing an OLE object  
265 - :return: 292 + :param data: bytes array or stream, containing OLENativeStream
  293 + structure containing an OLE object
  294 + :return: None
266 """ 295 """
267 # TODO: strict mode to raise exceptions when values are incorrect 296 # TODO: strict mode to raise exceptions when values are incorrect
268 # (permissive mode by default) 297 # (permissive mode by default)
  298 + if hasattr(data, 'read'):
  299 + index = None # marker for read_* functions to expect stream
  300 + else:
  301 + index = 0 # marker for read_* functions to expect array
  302 +
269 # An OLE Package object does not have the native data size field 303 # An OLE Package object does not have the native data size field
270 - index = 0  
271 if not self.package: 304 if not self.package:
272 self.native_data_size, index = read_uint32(data, index) 305 self.native_data_size, index = read_uint32(data, index)
273 - log.debug('OLE native data size = {0:08X} ({0} bytes)'.format(self.native_data_size)) 306 + log.debug('OLE native data size = {0:08X} ({0} bytes)'
  307 + .format(self.native_data_size))
274 # I thought this might be an OLE type specifier ??? 308 # I thought this might be an OLE type specifier ???
275 self.unknown_short, index = read_uint16(data, index) 309 self.unknown_short, index = read_uint16(data, index)
276 self.filename, index = read_zero_terminated_ansi_string(data, index) 310 self.filename, index = read_zero_terminated_ansi_string(data, index)
@@ -284,10 +318,13 @@ class OleNativeStream (object): @@ -284,10 +318,13 @@ class OleNativeStream (object):
284 # size of the rest of the data 318 # size of the rest of the data
285 try: 319 try:
286 self.actual_size, index = read_uint32(data, index) 320 self.actual_size, index = read_uint32(data, index)
287 - self.data = data[index:index+self.actual_size] 321 + if index is None: # data is a bytes stream
  322 + self.data = data
  323 + else: # data is a bytes array
  324 + self.data = data[index:index+self.actual_size]
288 # TODO: exception when size > remaining data 325 # TODO: exception when size > remaining data
289 # TODO: SLACK DATA 326 # TODO: SLACK DATA
290 - except IOError: # data is not embedded but only linked to 327 + except IOError, struct.error: # no data to read actual_size
291 logging.debug('data is not embedded but only a link') 328 logging.debug('data is not embedded but only a link')
292 self.actual_size = 0 329 self.actual_size = 0
293 self.data = None 330 self.data = None