Commit 0a8eace56e838800847f1c540467981b37a8f07a

Authored by Christian Herdtweck
1 parent 27e0a1c8

start looking for vba a different way: search for record header of VBAInfoAtom/Container

Showing 1 changed file with 64 additions and 1 deletions
oletools/ppt_parser.py
... ... @@ -126,6 +126,18 @@ class RecordHeader(object):
126 126 obj.rec_len))
127 127 return obj
128 128  
  129 + @classmethod
  130 + def generate(clz, rec_type, rec_len=None, rec_instance=0, rec_ver=0):
  131 + """ generate a record header string given values
  132 +
  133 + length of result depends on rec_len being given or not
  134 + """
  135 + version_instance = rec_ver + 2**4 * rec_instance
  136 + if rec_len is None:
  137 + return struct.pack('<HH', version_instance, rec_type)
  138 + else:
  139 + return struct.pack('<HHL', version_instance, rec_type, rec_len)
  140 +
129 141  
130 142 class PptType(object):
131 143 """ base class of data types found in ppt ole files
... ... @@ -1162,6 +1174,56 @@ class PptParser(object):
1162 1174 if errs and self.fast_fail:
1163 1175 raise errs[0]
1164 1176  
  1177 + def search_vba(self):
  1178 + """ quick-and-dirty: do not parse everything, just look for right bytes
  1179 +
  1180 + "quick" here means quick to program. Runtime now is linear is document
  1181 + size (--> for big documents the other method might be faster)
  1182 + """
  1183 +
  1184 + BUF_SIZE = 1024
  1185 +
  1186 + pattern = RecordHeader.generate(
  1187 + VBAInfoContainer.RECORD_TYPE, rec_len=0x14,
  1188 + rec_instance=VBAInfoContainer.RECORD_INSTANCE,
  1189 + rec_ver=VBAInfoContainer.RECORD_VERSION) \
  1190 + + RecordHeader.generate(
  1191 + VBAInfoAtom.RECORD_TYPE, rec_len=0xC,
  1192 + rec_instance=VBAInfoAtom.RECORD_INSTANCE,
  1193 + rec_ver=VBAInfoAtom.RECORD_VERSION)
  1194 + pattern_len = len(pattern)
  1195 + log.debug('pattern length is {}'.format(pattern_len))
  1196 + if pattern_len > BUF_SIZE:
  1197 + raise ValueError('need buf > pattern to search!')
  1198 +
  1199 + stream = None
  1200 + try:
  1201 + log.debug('opening stream')
  1202 + stream = self.ole.openstream(MAIN_STREAM_NAME)
  1203 + n_reads = 0
  1204 + while True:
  1205 + start_pos = stream.tell()
  1206 + n_reads += 1
  1207 + #log.debug('read {} starting from {}'
  1208 + # .format(BUF_SIZE, start_pos))
  1209 + buf = stream.read(BUF_SIZE)
  1210 + idx = buf.find(pattern)
  1211 + while idx != -1:
  1212 + log.info('found pattern at index {}'.format(start_pos+idx))
  1213 + idx = buf.find(pattern, idx+1)
  1214 +
  1215 + if len(buf) == BUF_SIZE:
  1216 + stream.seek(-1 * pattern_len, os.SEEK_CUR)
  1217 + else:
  1218 + log.debug('reached end of buf (read {}<{}) after {} reads'
  1219 + .format(len(buf), BUF_SIZE, n_reads))
  1220 + break
  1221 +
  1222 + finally:
  1223 + if stream is not None:
  1224 + log.debug('closing stream')
  1225 + stream.close()
  1226 +
1165 1227 # === TESTING =================================================================
1166 1228  
1167 1229 def test():
... ... @@ -1180,7 +1242,8 @@ def test():
1180 1242 log.info('-' * 72)
1181 1243 log.info('test file: {}'.format(file_name))
1182 1244 ppt = PptParser(file_name, fast_fail=False)
1183   - ppt.parse_document_persist_object()
  1245 + #ppt.parse_document_persist_object()
  1246 + ppt.search_vba()
1184 1247  
1185 1248  
1186 1249 if __name__ == '__main__':
... ...