Commit aa95f26a753d93e4c89b7790a9543da27017e197

Authored by Christian Herdtweck
1 parent dad20c2c

oleobj: parse OleNativeStream and OleObject from stream

Can parse both now from bytes array or stream
Showing 1 changed file with 62 additions and 25 deletions
oletools/oleobj.py
... ... @@ -159,6 +159,8 @@ assert struct_uint32.size == 4 # make sure it matches 4 bytes
159 159 struct_uint16 = struct.Struct('<H')
160 160 assert struct_uint16.size == 2 # make sure it matches 2 bytes
161 161  
  162 +# max length of a zero-terminated ansi string. Not sure what this really is
  163 +STR_MAX_LEN = 1024
162 164  
163 165 # === FUNCTIONS ==============================================================
164 166  
... ... @@ -166,34 +168,42 @@ def read_uint32(data, index):
166 168 """
167 169 Read an unsigned integer from the first 32 bits of data.
168 170  
169   - :param data: bytes string containing the data to be extracted.
170   - :param index: index to start reading from.
  171 + :param data: bytes string or stream containing the data to be extracted.
  172 + :param index: index to start reading from or None if data is stream.
171 173 :return: tuple (value, index) containing the read value (int),
172 174 and the index to continue reading next time.
173 175 """
174   - value = struct_uint32.unpack(data[index:index+4])[0]
175   - return (value, index+4)
  176 + if index is None:
  177 + value = struct_uint32.unpack(data.read(4))[0]
  178 + else:
  179 + value = struct_uint32.unpack(data[index:index+4])[0]
  180 + index += 4
  181 + return (value, index)
176 182  
177 183  
178 184 def read_uint16(data, index):
179 185 """
180 186 Read an unsigned integer from the 16 bits of data following index.
181 187  
182   - :param data: bytes string containing the data to be extracted.
183   - :param index: index to start reading from.
  188 + :param data: bytes string or stream containing the data to be extracted.
  189 + :param index: index to start reading from or None if data is stream
184 190 :return: tuple (value, index) containing the read value (int),
185 191 and the index to continue reading next time.
186 192 """
187   - value = struct_uint16.unpack(data[index:index+2])[0]
188   - return (value, index+2)
  193 + if index is None:
  194 + value = struct_uint16.unpack(data.read(2))[0]
  195 + else:
  196 + value = struct_uint16.unpack(data[index:index+2])[0]
  197 + index += 2
  198 + return (value, index)
189 199  
190 200  
191 201 def read_LengthPrefixedAnsiString(data, index):
192 202 """
193 203 Read a length-prefixed ANSI string from data.
194 204  
195   - :param data: bytes string containing the data to be extracted.
196   - :param index: index in data where string size starts
  205 + :param data: bytes string or stream containing the data to be extracted.
  206 + :param index: index in data where string size start or None if data is stream
197 207 :return: tuple (value, index) containing the read value (bytes string),
198 208 and the index to start reading from next time.
199 209 """
... ... @@ -202,24 +212,41 @@ def read_LengthPrefixedAnsiString(data, index):
202 212 if length == 0:
203 213 return ('', index)
204 214 # extract the string without the last null character
205   - ansi_string = data[index:index+length-1]
  215 + if index is None:
  216 + ansi_string = data.read(length-1)
  217 + null_char = data.read(1)
  218 + else:
  219 + ansi_string = data[index:index+length-1]
  220 + null_char = data[index+length]
  221 + index += length
206 222 # TODO: only in strict mode:
207 223 # check the presence of the null char:
208   - assert data[index+length] == NULL_CHAR
209   - return (ansi_string, index+length)
  224 + assert null_char == NULL_CHAR
  225 + return (ansi_string, index)
210 226  
211 227  
212 228 def read_zero_terminated_ansi_string(data, index):
213 229 """
214 230 Read a zero-terminated ANSI string from data
215 231  
216   - :param data: bytes string containing an ansi string
217   - :param index: index at which the string should start
  232 + Guessing that max length is 256 bytes.
  233 +
  234 + :param data: bytes string or stream containing an ansi string
  235 + :param index: index at which the string should start or None if data is stream
218 236 :return: tuple (string, index) containing the read string (bytes string),
219 237 and the index to start reading from next time.
220 238 """
221   - end_idx = data.find(b'\x00', index)
222   - return data[index:end_idx], end_idx+1 # return index after the 0-byte
  239 + if index is None:
  240 + result = []
  241 + for count in xrange(STR_MAX_LEN):
  242 + char = data.read(1)
  243 + if char == b'\x00':
  244 + return b''.join(result), index
  245 + result.append(char)
  246 + raise ValueError('found no string-terminating zero-byte!')
  247 + else: # data is byte array, can just search
  248 + end_idx = data.index(b'\x00', index, index+STR_MAX_LEN)
  249 + return data[index:end_idx], end_idx+1 # return index after the 0-byte
223 250  
224 251  
225 252 # === CLASSES ================================================================
... ... @@ -240,8 +267,9 @@ class OleNativeStream (object):
240 267 Constructor for OleNativeStream.
241 268 If bindata is provided, it will be parsed using the parse() method.
242 269  
243   - :param bindata: bytes, OLENativeStream structure containing an OLE object
244   - :param package: bool, set to True when extracting from an OLE Package object
  270 + :param bindata: forwarded to parse, see docu there
  271 + :param package: bool, set to True when extracting from an OLE Package
  272 + object
245 273 """
246 274 self.filename = None
247 275 self.src_path = None
... ... @@ -261,16 +289,22 @@ class OleNativeStream (object):
261 289 to extract the OLE object it contains.
262 290 (see MS-OLEDS 2.3.6 OLENativeStream)
263 291  
264   - :param data: bytes, OLENativeStream structure containing an OLE object
265   - :return:
  292 + :param data: bytes array or stream, containing OLENativeStream
  293 + structure containing an OLE object
  294 + :return: None
266 295 """
267 296 # TODO: strict mode to raise exceptions when values are incorrect
268 297 # (permissive mode by default)
  298 + if hasattr(data, 'read'):
  299 + index = None # marker for read_* functions to expect stream
  300 + else:
  301 + index = 0 # marker for read_* functions to expect array
  302 +
269 303 # An OLE Package object does not have the native data size field
270   - index = 0
271 304 if not self.package:
272 305 self.native_data_size, index = read_uint32(data, index)
273   - log.debug('OLE native data size = {0:08X} ({0} bytes)'.format(self.native_data_size))
  306 + log.debug('OLE native data size = {0:08X} ({0} bytes)'
  307 + .format(self.native_data_size))
274 308 # I thought this might be an OLE type specifier ???
275 309 self.unknown_short, index = read_uint16(data, index)
276 310 self.filename, index = read_zero_terminated_ansi_string(data, index)
... ... @@ -284,10 +318,13 @@ class OleNativeStream (object):
284 318 # size of the rest of the data
285 319 try:
286 320 self.actual_size, index = read_uint32(data, index)
287   - self.data = data[index:index+self.actual_size]
  321 + if index is None: # data is a bytes stream
  322 + self.data = data
  323 + else: # data is a bytes array
  324 + self.data = data[index:index+self.actual_size]
288 325 # TODO: exception when size > remaining data
289 326 # TODO: SLACK DATA
290   - except IOError: # data is not embedded but only linked to
  327 + except IOError, struct.error: # no data to read actual_size
291 328 logging.debug('data is not embedded but only a link')
292 329 self.actual_size = 0
293 330 self.data = None
... ...