Commit 0acaf934a6ec2e7b0101dc2824996648e70ec6d5
1 parent
fde6adf1
tablestream: support for both Python 2.6+ and 3.x, all cells are converted to unicode
Showing
1 changed file
with
113 additions
and
65 deletions
oletools/thirdparty/tablestream/tablestream.py
| ... | ... | @@ -52,8 +52,10 @@ from __future__ import print_function |
| 52 | 52 | # 2016-05-25 v0.04 PL: - updated for colorclass 2.2.0 (now a package) |
| 53 | 53 | # 2016-07-29 v0.05 PL: - fixed oletools issue #57, bug when importing colorclass |
| 54 | 54 | # 2016-07-31 v0.06 PL: - handle newline characters properly in each cell |
| 55 | +# 2016-08-28 v0.07 PL: - support for both Python 2.6+ and 3.x | |
| 56 | +# - all cells are converted to unicode | |
| 55 | 57 | |
| 56 | -__version__ = '0.06' | |
| 58 | +__version__ = '0.07' | |
| 57 | 59 | |
| 58 | 60 | #------------------------------------------------------------------------------ |
| 59 | 61 | # TODO: |
| ... | ... | @@ -84,6 +86,43 @@ if os.name == 'nt': |
| 84 | 86 | colorclass.Windows.enable(auto_colors=True) |
| 85 | 87 | |
| 86 | 88 | |
| 89 | +# === PYTHON 2+3 SUPPORT ====================================================== | |
| 90 | + | |
| 91 | +if sys.version_info[0] >= 3: | |
| 92 | + # Python 3 specific adaptations | |
| 93 | + # py3 range = py2 xrange | |
| 94 | + xrange = range | |
| 95 | + ustr = str | |
| 96 | + # byte strings for to_ustr (with py3, bytearray supports encoding): | |
| 97 | + byte_strings = (bytes, bytearray) | |
| 98 | +else: | |
| 99 | + # Python 2 specific adaptations | |
| 100 | + ustr = unicode | |
| 101 | + # byte strings for to_ustr (with py2, bytearray does not support encoding): | |
| 102 | + byte_strings = bytes | |
| 103 | + | |
| 104 | + | |
| 105 | +# === FUNCTIONS ============================================================== | |
| 106 | + | |
| 107 | +def to_ustr(obj, encoding='utf8', errors='replace'): | |
| 108 | + """ | |
| 109 | + convert an object to unicode, using the appropriate method | |
| 110 | + :param obj: any object, str, bytes or unicode | |
| 111 | + :return: unicode string (ustr) | |
| 112 | + """ | |
| 113 | + # if the object is already unicode, return it unchanged: | |
| 114 | + if isinstance(obj, ustr): | |
| 115 | + return obj | |
| 116 | + # if it is a bytes string, decode it using the provided encoding | |
| 117 | + elif isinstance(obj, byte_strings): | |
| 118 | + return ustr(obj, encoding=encoding, errors=errors) | |
| 119 | + # else just convert it to unicode: | |
| 120 | + # (an exception is raised if we specify encoding in this case) | |
| 121 | + else: | |
| 122 | + return ustr(obj) | |
| 123 | + | |
| 124 | + | |
| 125 | + | |
| 87 | 126 | # === CLASSES ================================================================= |
| 88 | 127 | |
| 89 | 128 | |
| ... | ... | @@ -100,47 +139,47 @@ class TableStyle(object): |
| 100 | 139 | """ |
| 101 | 140 | # Header rows: |
| 102 | 141 | header_top = True |
| 103 | - header_top_left = '+' | |
| 104 | - header_top_horiz = '-' | |
| 105 | - header_top_middle = '+' | |
| 106 | - header_top_right = '+' | |
| 142 | + header_top_left = u'+' | |
| 143 | + header_top_horiz = u'-' | |
| 144 | + header_top_middle = u'+' | |
| 145 | + header_top_right = u'+' | |
| 107 | 146 | |
| 108 | - header_vertical_left = '|' | |
| 109 | - header_vertical_middle = '|' | |
| 110 | - header_vertical_right = '|' | |
| 147 | + header_vertical_left = u'|' | |
| 148 | + header_vertical_middle = u'|' | |
| 149 | + header_vertical_right = u'|' | |
| 111 | 150 | |
| 112 | 151 | # Separator line between header and normal rows: |
| 113 | 152 | header_sep = True |
| 114 | - header_sep_left = '+' | |
| 115 | - header_sep_horiz = '-' | |
| 116 | - header_sep_middle = '+' | |
| 117 | - header_sep_right = '+' | |
| 153 | + header_sep_left = u'+' | |
| 154 | + header_sep_horiz = u'-' | |
| 155 | + header_sep_middle = u'+' | |
| 156 | + header_sep_right = u'+' | |
| 118 | 157 | |
| 119 | 158 | # Top row if there is no header: |
| 120 | 159 | noheader_top = True |
| 121 | - noheader_top_left = '+' | |
| 122 | - noheader_top_horiz = '-' | |
| 123 | - noheader_top_middle = '+' | |
| 124 | - noheader_top_right = '+' | |
| 160 | + noheader_top_left = u'+' | |
| 161 | + noheader_top_horiz = u'-' | |
| 162 | + noheader_top_middle = u'+' | |
| 163 | + noheader_top_right = u'+' | |
| 125 | 164 | |
| 126 | 165 | # Normal rows |
| 127 | - vertical_left = '|' | |
| 128 | - vertical_middle = '|' | |
| 129 | - vertical_right = '|' | |
| 166 | + vertical_left = u'|' | |
| 167 | + vertical_middle = u'|' | |
| 168 | + vertical_right = u'|' | |
| 130 | 169 | |
| 131 | 170 | # Separator line between rows: |
| 132 | 171 | sep = False |
| 133 | - sep_left = '+' | |
| 134 | - sep_horiz = '-' | |
| 135 | - sep_middle = '+' | |
| 136 | - sep_right = '+' | |
| 172 | + sep_left = u'+' | |
| 173 | + sep_horiz = u'-' | |
| 174 | + sep_middle = u'+' | |
| 175 | + sep_right = u'+' | |
| 137 | 176 | |
| 138 | 177 | # Bottom line |
| 139 | 178 | bottom = True |
| 140 | - bottom_left = '+' | |
| 141 | - bottom_horiz = '-' | |
| 142 | - bottom_middle = '+' | |
| 143 | - bottom_right = '+' | |
| 179 | + bottom_left = u'+' | |
| 180 | + bottom_horiz = u'-' | |
| 181 | + bottom_middle = u'+' | |
| 182 | + bottom_right = u'+' | |
| 144 | 183 | |
| 145 | 184 | |
| 146 | 185 | class TableStyleSlim(object): |
| ... | ... | @@ -155,47 +194,47 @@ class TableStyleSlim(object): |
| 155 | 194 | """ |
| 156 | 195 | # Header rows: |
| 157 | 196 | header_top = True |
| 158 | - header_top_left = '' | |
| 159 | - header_top_horiz = '-' | |
| 160 | - header_top_middle = '+' | |
| 161 | - header_top_right = '' | |
| 197 | + header_top_left = u'' | |
| 198 | + header_top_horiz = u'-' | |
| 199 | + header_top_middle = u'+' | |
| 200 | + header_top_right = u'' | |
| 162 | 201 | |
| 163 | - header_vertical_left = '' | |
| 164 | - header_vertical_middle = '|' | |
| 165 | - header_vertical_right = '' | |
| 202 | + header_vertical_left = u'' | |
| 203 | + header_vertical_middle = u'|' | |
| 204 | + header_vertical_right = u'' | |
| 166 | 205 | |
| 167 | 206 | # Separator line between header and normal rows: |
| 168 | 207 | header_sep = True |
| 169 | - header_sep_left = '' | |
| 170 | - header_sep_horiz = '-' | |
| 171 | - header_sep_middle = '+' | |
| 172 | - header_sep_right = '' | |
| 208 | + header_sep_left = u'' | |
| 209 | + header_sep_horiz = u'-' | |
| 210 | + header_sep_middle = u'+' | |
| 211 | + header_sep_right = u'' | |
| 173 | 212 | |
| 174 | 213 | # Top row if there is no header: |
| 175 | 214 | noheader_top = True |
| 176 | - noheader_top_left = '' | |
| 177 | - noheader_top_horiz = '-' | |
| 178 | - noheader_top_middle = '+' | |
| 179 | - noheader_top_right = '' | |
| 215 | + noheader_top_left = u'' | |
| 216 | + noheader_top_horiz = u'-' | |
| 217 | + noheader_top_middle = u'+' | |
| 218 | + noheader_top_right = u'' | |
| 180 | 219 | |
| 181 | 220 | # Normal rows |
| 182 | - vertical_left = '' | |
| 183 | - vertical_middle = '|' | |
| 184 | - vertical_right = '' | |
| 221 | + vertical_left = u'' | |
| 222 | + vertical_middle = u'|' | |
| 223 | + vertical_right = u'' | |
| 185 | 224 | |
| 186 | 225 | # Separator line between rows: |
| 187 | 226 | sep = False |
| 188 | - sep_left = '' | |
| 189 | - sep_horiz = '-' | |
| 190 | - sep_middle = '+' | |
| 191 | - sep_right = '' | |
| 227 | + sep_left = u'' | |
| 228 | + sep_horiz = u'-' | |
| 229 | + sep_middle = u'+' | |
| 230 | + sep_right = u'' | |
| 192 | 231 | |
| 193 | 232 | # Bottom line |
| 194 | 233 | bottom = True |
| 195 | - bottom_left = '' | |
| 196 | - bottom_horiz = '-' | |
| 197 | - bottom_middle = '+' | |
| 198 | - bottom_right = '' | |
| 234 | + bottom_left = u'' | |
| 235 | + bottom_horiz = u'-' | |
| 236 | + bottom_middle = u'+' | |
| 237 | + bottom_right = u'' | |
| 199 | 238 | |
| 200 | 239 | |
| 201 | 240 | |
| ... | ... | @@ -213,10 +252,22 @@ class TableStream(object): |
| 213 | 252 | be processed row by row. |
| 214 | 253 | """ |
| 215 | 254 | |
| 216 | - def __init__(self, column_width, header_row=None, style=TableStyle, outfile=sys.stdout): | |
| 255 | + def __init__(self, column_width, header_row=None, style=TableStyle, | |
| 256 | + outfile=sys.stdout, encoding_in='utf8', encoding_out='utf8'): | |
| 257 | + ''' | |
| 258 | + Constructor for class TableStream | |
| 259 | + :param column_width: tuple or list containing the width of each column | |
| 260 | + :param header_row: tuple or list containing the header row text | |
| 261 | + :param style: style for the table, a TableStyle object | |
| 262 | + :param outfile: output file (sys.stdout by default to print on the console) | |
| 263 | + :param encoding_in: encoding used when the input text is bytes (UTF-8 by default) | |
| 264 | + :param encoding_out: encoding used for the output (UTF-8 by default) | |
| 265 | + ''' | |
| 217 | 266 | self.column_width = column_width |
| 218 | 267 | self.num_columns = len(column_width) |
| 219 | 268 | self.header_row = header_row |
| 269 | + self.encoding_in = encoding_in | |
| 270 | + self.encoding_out = encoding_out | |
| 220 | 271 | assert (header_row is None) or len(header_row) == self.num_columns |
| 221 | 272 | self.style = style |
| 222 | 273 | self.outfile = outfile |
| ... | ... | @@ -239,13 +290,7 @@ class TableStream(object): |
| 239 | 290 | for i in xrange(self.num_columns): |
| 240 | 291 | cell = row[i] |
| 241 | 292 | # Convert to string: |
| 242 | - # TODO: handle unicode properly | |
| 243 | - # TODO: use only unicode for textwrapper, to avoid str length issues | |
| 244 | - if isinstance(cell, bytes): | |
| 245 | - # encode to UTF8, avoiding errors | |
| 246 | - cell = cell.decode('utf-8', errors='replace') | |
| 247 | - else: | |
| 248 | - cell = unicode(cell) | |
| 293 | + cell = to_ustr(cell, encoding=self.encoding_in) | |
| 249 | 294 | # Wrap cell text according to the column width |
| 250 | 295 | # TODO: use a TextWrapper object for each column instead |
| 251 | 296 | # split the string if it contains newline characters, otherwise |
| ... | ... | @@ -259,7 +304,7 @@ class TableStream(object): |
| 259 | 304 | if color: |
| 260 | 305 | for j in xrange(len(column)): |
| 261 | 306 | # print '%r: %s' % (column[j], type(column[j])) |
| 262 | - column[j] = colorclass.Color('{auto%s}%s{/%s}' % (color, column[j], color)) | |
| 307 | + column[j] = colorclass.Color(u'{auto%s}%s{/%s}' % (color, column[j], color)) | |
| 263 | 308 | columns.append(column) |
| 264 | 309 | # determine which column has the highest number of lines |
| 265 | 310 | max_lines = max(len(columns[i]), max_lines) |
| ... | ... | @@ -271,11 +316,11 @@ class TableStream(object): |
| 271 | 316 | if j<len(column): |
| 272 | 317 | # text to be written |
| 273 | 318 | text_width = len(column[j]) |
| 274 | - self.write(column[j] + ' '*(self.column_width[i]-text_width)) | |
| 319 | + self.write(column[j] + u' '*(self.column_width[i]-text_width)) | |
| 275 | 320 | else: |
| 276 | 321 | # no more lines for this column |
| 277 | 322 | # TODO: precompute empty cells once |
| 278 | - self.write(' '*(self.column_width[i])) | |
| 323 | + self.write(u' '*(self.column_width[i])) | |
| 279 | 324 | if i < (self.num_columns - 1): |
| 280 | 325 | self.write(self.style.vertical_middle) |
| 281 | 326 | self.write(self.style.vertical_right) |
| ... | ... | @@ -293,7 +338,7 @@ class TableStream(object): |
| 293 | 338 | :param right: |
| 294 | 339 | :return: |
| 295 | 340 | """ |
| 296 | - return left + middle.join([horiz * width for width in self.column_width]) + right + '\n' | |
| 341 | + return left + middle.join([horiz * width for width in self.column_width]) + right + u'\n' | |
| 297 | 342 | |
| 298 | 343 | def write_header_top(self): |
| 299 | 344 | s = self.style |
| ... | ... | @@ -336,6 +381,8 @@ class TableStream(object): |
| 336 | 381 | self.write_bottom() |
| 337 | 382 | |
| 338 | 383 | |
| 384 | +# === MAIN =================================================================== | |
| 385 | + | |
| 339 | 386 | if __name__ == '__main__': |
| 340 | 387 | t = TableStream([10, 5, 20], header_row=['i', 'i*i', '2**i'], style=TableStyleSlim) |
| 341 | 388 | t.write_row(['test', 'test', 'test']) |
| ... | ... | @@ -343,6 +390,7 @@ if __name__ == '__main__': |
| 343 | 390 | t.write_row([cell, cell, cell], colors=['blue', None, 'red']) |
| 344 | 391 | for i in range(1, 11): |
| 345 | 392 | t.write_row([i, i*i, 2**i]) |
| 393 | + t.write_row([b'bytes', u'unicode', bytearray(b'bytearray')]) | |
| 346 | 394 | t.close() |
| 347 | 395 | |
| 348 | 396 | ... | ... |