Commit 4a25a0ff3bfe6b86704bebfaf6043cb22da934f4
1 parent
7667b3ae
rtfobj: report progress every 15s on debug log
Showing
1 changed file
with
36 additions
and
0 deletions
oletools/rtfobj.py
| @@ -95,6 +95,7 @@ __version__ = '0.52dev12' | @@ -95,6 +95,7 @@ __version__ = '0.52dev12' | ||
| 95 | 95 | ||
| 96 | import re, os, sys, binascii, logging, optparse | 96 | import re, os, sys, binascii, logging, optparse |
| 97 | import os.path | 97 | import os.path |
| 98 | +from time import time | ||
| 98 | 99 | ||
| 99 | # IMPORTANT: it should be possible to run oletools directly as scripts | 100 | # IMPORTANT: it should be possible to run oletools directly as scripts |
| 100 | # in any directory without installing them with pip or setup.py. | 101 | # in any directory without installing them with pip or setup.py. |
| @@ -314,6 +315,22 @@ else: | @@ -314,6 +315,22 @@ else: | ||
| 314 | RTF_MAGIC = b'\x7b\\rt' # \x7b == b'{' but does not mess up auto-indent | 315 | RTF_MAGIC = b'\x7b\\rt' # \x7b == b'{' but does not mess up auto-indent |
| 315 | 316 | ||
| 316 | 317 | ||
| 318 | +def duration_str(duration): | ||
| 319 | + """ create a human-readable string representation of duration [s] """ | ||
| 320 | + value = duration | ||
| 321 | + unit = 's' | ||
| 322 | + if value > 90: | ||
| 323 | + value /= 60. | ||
| 324 | + unit = 'min' | ||
| 325 | + if value > 90: | ||
| 326 | + value /= 60. | ||
| 327 | + unit = 'h' | ||
| 328 | + if value > 72: | ||
| 329 | + value /= 24. | ||
| 330 | + unit = 'days' | ||
| 331 | + return '{0:.1f}{1}'.format(value, unit) | ||
| 332 | + | ||
| 333 | + | ||
| 317 | #=== CLASSES ================================================================= | 334 | #=== CLASSES ================================================================= |
| 318 | 335 | ||
| 319 | class Destination(object): | 336 | class Destination(object): |
| @@ -360,6 +377,20 @@ class RtfParser(object): | @@ -360,6 +377,20 @@ class RtfParser(object): | ||
| 360 | self.destinations = [document_destination] | 377 | self.destinations = [document_destination] |
| 361 | self.current_destination = document_destination | 378 | self.current_destination = document_destination |
| 362 | 379 | ||
| 380 | + def _report_progress(self, start_time): | ||
| 381 | + """ report progress on parsing at regular intervals """ | ||
| 382 | + now = float(time()) | ||
| 383 | + if now == start_time or self.size == 0: | ||
| 384 | + return # avoid zero-division | ||
| 385 | + percent_done = 100. * self.index / self.size | ||
| 386 | + time_per_index = (now - start_time) / float(self.index) | ||
| 387 | + finish_estim = float(self.size - self.index) * time_per_index | ||
| 388 | + | ||
| 389 | + log.debug('After {0} finished {1:4.1f}% of current file ({2} bytes); ' | ||
| 390 | + 'will finish in approx {3}' | ||
| 391 | + .format(duration_str(now-start_time), percent_done, | ||
| 392 | + self.size, duration_str(finish_estim))) | ||
| 393 | + | ||
| 363 | def parse(self): | 394 | def parse(self): |
| 364 | """ | 395 | """ |
| 365 | Parse the RTF data | 396 | Parse the RTF data |
| @@ -368,8 +399,13 @@ class RtfParser(object): | @@ -368,8 +399,13 @@ class RtfParser(object): | ||
| 368 | """ | 399 | """ |
| 369 | # Start at beginning of data | 400 | # Start at beginning of data |
| 370 | self.index = 0 | 401 | self.index = 0 |
| 402 | + start_time = time() | ||
| 403 | + last_report = start_time | ||
| 371 | # Loop until the end | 404 | # Loop until the end |
| 372 | while self.index < self.size: | 405 | while self.index < self.size: |
| 406 | + if time() - last_report > 15: # report every 15s | ||
| 407 | + self._report_progress(start_time) | ||
| 408 | + last_report = time() | ||
| 373 | if self.data[self.index] == BRACE_OPEN: | 409 | if self.data[self.index] == BRACE_OPEN: |
| 374 | # Found an opening brace "{": Start of a group | 410 | # Found an opening brace "{": Start of a group |
| 375 | self._open_group() | 411 | self._open_group() |