Commit d4eb585e075304a7fd77fb7b8803e25a31908e70
1 parent
a7d1050e
ooxml: re-implement complete seek(); add attr closed
OleFileIO requires a complete seek() and checks for closed attribute. Also added some commented debug print commands to ZipSubFile
Showing
1 changed file
with
69 additions
and
32 deletions
oletools/ooxml.py
| @@ -130,13 +130,13 @@ class ZipSubFile(object): | @@ -130,13 +130,13 @@ class ZipSubFile(object): | ||
| 130 | 130 | ||
| 131 | ZipFile.open() gives file handles that can be read but not seek()ed since | 131 | ZipFile.open() gives file handles that can be read but not seek()ed since |
| 132 | the file is being decompressed in the background. This class implements a | 132 | the file is being decompressed in the background. This class implements a |
| 133 | - reset() function which corresponds to a seek to 0 (which just closes the | ||
| 134 | - stream and re-opens it behind the scenes.) | ||
| 135 | - --> can be used e.g. for olefile.isOleFile() | 133 | + reset() function (close and re-open stream) and a seek() that uses it. |
| 134 | + --> can be used as argument to olefile.OleFileIO and olefile.isOleFile() | ||
| 136 | 135 | ||
| 137 | Can be used as a context manager:: | 136 | Can be used as a context manager:: |
| 138 | 137 | ||
| 139 | with zipfile.ZipFile('file.zip') as zipper: | 138 | with zipfile.ZipFile('file.zip') as zipper: |
| 139 | + # replaces with zipper.open(subfile) as handle: | ||
| 140 | with ZipSubFile(zipper, 'subfile') as handle: | 140 | with ZipSubFile(zipper, 'subfile') as handle: |
| 141 | print('subfile in file.zip has size {0}, starts with {1}' | 141 | print('subfile in file.zip has size {0}, starts with {1}' |
| 142 | .format(handle.size, handle.read(20))) | 142 | .format(handle.size, handle.read(20))) |
| @@ -147,10 +147,12 @@ class ZipSubFile(object): | @@ -147,10 +147,12 @@ class ZipSubFile(object): | ||
| 147 | name: name of file within zip file | 147 | name: name of file within zip file |
| 148 | mode: open-mode, 'r' per default | 148 | mode: open-mode, 'r' per default |
| 149 | size: size of the stream (constructor arg or taken from ZipFile.getinfo) | 149 | size: size of the stream (constructor arg or taken from ZipFile.getinfo) |
| 150 | + closed: True if there was an open() but no close() since then | ||
| 150 | 151 | ||
| 151 | Attributes only not-None after open() and before close(): | 152 | Attributes only not-None after open() and before close(): |
| 152 | - handle: direkt handle to subfile stream, created by ZipFile.open() | ||
| 153 | - pos: current position within stream | 153 | + handle: direct handle to subfile stream, created by ZipFile.open() |
| 154 | + pos: current position within stream (can deviate from actual position in | ||
| 155 | + self.handle if we fake jump to end) | ||
| 154 | """ | 156 | """ |
| 155 | 157 | ||
| 156 | def __init__(self, container, filename, mode='r', size=None): | 158 | def __init__(self, container, filename, mode='r', size=None): |
| @@ -168,6 +170,7 @@ class ZipSubFile(object): | @@ -168,6 +170,7 @@ class ZipSubFile(object): | ||
| 168 | self.mode = mode | 170 | self.mode = mode |
| 169 | self.handle = None | 171 | self.handle = None |
| 170 | self.pos = None | 172 | self.pos = None |
| 173 | + self.closed = True | ||
| 171 | 174 | ||
| 172 | def open(self): | 175 | def open(self): |
| 173 | """ open subfile for reading; open mode given to constructor before """ | 176 | """ open subfile for reading; open mode given to constructor before """ |
| @@ -175,6 +178,8 @@ class ZipSubFile(object): | @@ -175,6 +178,8 @@ class ZipSubFile(object): | ||
| 175 | raise IOError('re-opening file not supported!') | 178 | raise IOError('re-opening file not supported!') |
| 176 | self.handle = self.container.open(self.name, self.mode) | 179 | self.handle = self.container.open(self.name, self.mode) |
| 177 | self.pos = 0 | 180 | self.pos = 0 |
| 181 | + self.closed = False | ||
| 182 | + # print('ZipSubFile: opened; size={}'.format(self.size)) | ||
| 178 | return self | 183 | return self |
| 179 | 184 | ||
| 180 | def write(self, *args, **kwargs): # pylint: disable=unused-argument,no-self-use | 185 | def write(self, *args, **kwargs): # pylint: disable=unused-argument,no-self-use |
| @@ -187,59 +192,91 @@ class ZipSubFile(object): | @@ -187,59 +192,91 @@ class ZipSubFile(object): | ||
| 187 | 192 | ||
| 188 | returns bytes (i.e. str in python2, bytes in python3) | 193 | returns bytes (i.e. str in python2, bytes in python3) |
| 189 | """ | 194 | """ |
| 190 | - if size is None: | ||
| 191 | - self.pos = self.size | ||
| 192 | - else: | ||
| 193 | - self.pos += size | ||
| 194 | - return self.handle.read(size) | ||
| 195 | - | ||
| 196 | - def seek(self, pos, offset): | ||
| 197 | - """ re-position point so read() will continue elsewhere | ||
| 198 | - | ||
| 199 | - only re-positioning to start of file is allowed | ||
| 200 | - """ | ||
| 201 | - CHUNK_SIZE = 4096 | ||
| 202 | - if pos == 0 and offset == io.SEEK_SET: | ||
| 203 | - self.reset() | 195 | + if self.handle is None: |
| 196 | + raise IOError('read on closed handle') | ||
| 197 | + if self.pos >= self.size: | ||
| 198 | + # print('ZipSubFile: read fake at end') | ||
| 199 | + return b'' # fake being at the end, even if we are not | ||
| 200 | + data = self.handle.read(size) | ||
| 201 | + self.pos += len(data) | ||
| 202 | + # print('ZipSubFile: read {} bytes, pos now {}'.format(size, self.pos)) | ||
| 203 | + return data | ||
| 204 | + | ||
| 205 | + def seek(self, pos, offset=io.SEEK_SET): | ||
| 206 | + """ re-position point so read() will continue elsewhere """ | ||
| 207 | + # calc target position from self.pos, pos and offset | ||
| 208 | + if offset == io.SEEK_SET: | ||
| 209 | + new_pos = pos | ||
| 204 | elif offset == io.SEEK_CUR: | 210 | elif offset == io.SEEK_CUR: |
| 205 | - if pos == -self.pos: | ||
| 206 | - self.reset() | ||
| 207 | - elif pos == 0: | ||
| 208 | - return | ||
| 209 | - elif pos > 0: | ||
| 210 | - skipped = 0 | ||
| 211 | - n_chunks, leftover = divmod(pos, CHUNK_SIZE) | ||
| 212 | - for _ in range(n_chunks): | ||
| 213 | - self.read(CHUNK_SIZE) # just read and discard | ||
| 214 | - self.read(leftover) | ||
| 215 | - else: | ||
| 216 | - raise NotImplementedError('could reset() and read()?') | 211 | + new_pos = self.pos + pos |
| 212 | + elif offset == io.SEEK_END: | ||
| 213 | + new_pos = self.size + pos | ||
| 217 | else: | 214 | else: |
| 218 | - raise NotImplementedError('could reset() and read()?') | 215 | + raise ValueError("invalid offset {0}, need SEEK_* constant" |
| 216 | + .format(offset)) | ||
| 217 | + | ||
| 218 | + # now get to that position, doing reads and resets as necessary | ||
| 219 | + if new_pos < 0: | ||
| 220 | + # print('ZipSubFile: Error: seek to {}'.format(new_pos)) | ||
| 221 | + raise IOError('Seek beyond start of file not allowed') | ||
| 222 | + elif new_pos == self.pos: | ||
| 223 | + # print('ZipSubFile: nothing to do') | ||
| 224 | + pass | ||
| 225 | + elif new_pos == 0: | ||
| 226 | + # print('ZipSubFile: seek to start') | ||
| 227 | + self.reset() | ||
| 228 | + elif new_pos < self.pos: | ||
| 229 | + # print('ZipSubFile: seek back') | ||
| 230 | + self.reset() | ||
| 231 | + self._seek_skip(new_pos) # --> read --> update self.pos | ||
| 232 | + elif new_pos < self.size: | ||
| 233 | + # print('ZipSubFile: seek forward') | ||
| 234 | + self._seek_skip(new_pos - self.pos) # --> read --> update self.pos | ||
| 235 | + else: # new_pos >= self.size | ||
| 236 | + # print('ZipSubFile: seek to end') | ||
| 237 | + self.pos = new_pos # fake being at the end; remember pos >= size | ||
| 238 | + | ||
| 239 | + def _seek_skip(self, to_skip): | ||
| 240 | + """ helper for seek: skip forward by given amount using read() """ | ||
| 241 | + # print('ZipSubFile: seek by skipping {} bytes starting at {}' | ||
| 242 | + # .format(self.pos, to_skip)) | ||
| 243 | + CHUNK_SIZE = 4096 | ||
| 244 | + n_chunks, leftover = divmod(to_skip, CHUNK_SIZE) | ||
| 245 | + for _ in range(n_chunks): | ||
| 246 | + self.read(CHUNK_SIZE) # just read and discard | ||
| 247 | + self.read(leftover) | ||
| 248 | + # print('ZipSubFile: seek by skipping done, pos now {}' | ||
| 249 | + # .format(self.pos)) | ||
| 219 | 250 | ||
| 220 | def tell(self): | 251 | def tell(self): |
| 221 | """ inform about position of next read """ | 252 | """ inform about position of next read """ |
| 253 | + # print('ZipSubFile: tell-ing we are at {}'.format(self.pos)) | ||
| 222 | return self.pos | 254 | return self.pos |
| 223 | 255 | ||
| 224 | def reset(self): | 256 | def reset(self): |
| 225 | """ close and re-open """ | 257 | """ close and re-open """ |
| 258 | + # print('ZipSubFile: resetting') | ||
| 226 | self.close() | 259 | self.close() |
| 227 | self.open() | 260 | self.open() |
| 228 | 261 | ||
| 229 | def close(self): | 262 | def close(self): |
| 230 | """ close file """ | 263 | """ close file """ |
| 264 | + # print('ZipSubFile: closing') | ||
| 231 | if self.handle is not None: | 265 | if self.handle is not None: |
| 232 | self.handle.close() | 266 | self.handle.close() |
| 233 | self.pos = None | 267 | self.pos = None |
| 234 | self.handle = None | 268 | self.handle = None |
| 269 | + self.closed = True | ||
| 235 | 270 | ||
| 236 | def __enter__(self): | 271 | def __enter__(self): |
| 237 | """ start of context manager; opens the file """ | 272 | """ start of context manager; opens the file """ |
| 273 | + # print('ZipSubFile: entering context') | ||
| 238 | self.open() | 274 | self.open() |
| 239 | return self | 275 | return self |
| 240 | 276 | ||
| 241 | def __exit__(self, *args, **kwargs): | 277 | def __exit__(self, *args, **kwargs): |
| 242 | """ end of context manager; closes the file """ | 278 | """ end of context manager; closes the file """ |
| 279 | + # print('ZipSubFile: exiting context') | ||
| 243 | self.close() | 280 | self.close() |
| 244 | 281 | ||
| 245 | def __str__(self): | 282 | def __str__(self): |