Commit 0acaf934a6ec2e7b0101dc2824996648e70ec6d5
1 parent
fde6adf1
tablestream: support for both Python 2.6+ and 3.x, all cells are converted to unicode
Showing
1 changed file
with
113 additions
and
65 deletions
oletools/thirdparty/tablestream/tablestream.py
| @@ -52,8 +52,10 @@ from __future__ import print_function | @@ -52,8 +52,10 @@ from __future__ import print_function | ||
| 52 | # 2016-05-25 v0.04 PL: - updated for colorclass 2.2.0 (now a package) | 52 | # 2016-05-25 v0.04 PL: - updated for colorclass 2.2.0 (now a package) |
| 53 | # 2016-07-29 v0.05 PL: - fixed oletools issue #57, bug when importing colorclass | 53 | # 2016-07-29 v0.05 PL: - fixed oletools issue #57, bug when importing colorclass |
| 54 | # 2016-07-31 v0.06 PL: - handle newline characters properly in each cell | 54 | # 2016-07-31 v0.06 PL: - handle newline characters properly in each cell |
| 55 | +# 2016-08-28 v0.07 PL: - support for both Python 2.6+ and 3.x | ||
| 56 | +# - all cells are converted to unicode | ||
| 55 | 57 | ||
| 56 | -__version__ = '0.06' | 58 | +__version__ = '0.07' |
| 57 | 59 | ||
| 58 | #------------------------------------------------------------------------------ | 60 | #------------------------------------------------------------------------------ |
| 59 | # TODO: | 61 | # TODO: |
| @@ -84,6 +86,43 @@ if os.name == 'nt': | @@ -84,6 +86,43 @@ if os.name == 'nt': | ||
| 84 | colorclass.Windows.enable(auto_colors=True) | 86 | colorclass.Windows.enable(auto_colors=True) |
| 85 | 87 | ||
| 86 | 88 | ||
| 89 | +# === PYTHON 2+3 SUPPORT ====================================================== | ||
| 90 | + | ||
| 91 | +if sys.version_info[0] >= 3: | ||
| 92 | + # Python 3 specific adaptations | ||
| 93 | + # py3 range = py2 xrange | ||
| 94 | + xrange = range | ||
| 95 | + ustr = str | ||
| 96 | + # byte strings for to_ustr (with py3, bytearray supports encoding): | ||
| 97 | + byte_strings = (bytes, bytearray) | ||
| 98 | +else: | ||
| 99 | + # Python 2 specific adaptations | ||
| 100 | + ustr = unicode | ||
| 101 | + # byte strings for to_ustr (with py2, bytearray does not support encoding): | ||
| 102 | + byte_strings = bytes | ||
| 103 | + | ||
| 104 | + | ||
| 105 | +# === FUNCTIONS ============================================================== | ||
| 106 | + | ||
| 107 | +def to_ustr(obj, encoding='utf8', errors='replace'): | ||
| 108 | + """ | ||
| 109 | + convert an object to unicode, using the appropriate method | ||
| 110 | + :param obj: any object, str, bytes or unicode | ||
| 111 | + :return: unicode string (ustr) | ||
| 112 | + """ | ||
| 113 | + # if the object is already unicode, return it unchanged: | ||
| 114 | + if isinstance(obj, ustr): | ||
| 115 | + return obj | ||
| 116 | + # if it is a bytes string, decode it using the provided encoding | ||
| 117 | + elif isinstance(obj, byte_strings): | ||
| 118 | + return ustr(obj, encoding=encoding, errors=errors) | ||
| 119 | + # else just convert it to unicode: | ||
| 120 | + # (an exception is raised if we specify encoding in this case) | ||
| 121 | + else: | ||
| 122 | + return ustr(obj) | ||
| 123 | + | ||
| 124 | + | ||
| 125 | + | ||
| 87 | # === CLASSES ================================================================= | 126 | # === CLASSES ================================================================= |
| 88 | 127 | ||
| 89 | 128 | ||
| @@ -100,47 +139,47 @@ class TableStyle(object): | @@ -100,47 +139,47 @@ class TableStyle(object): | ||
| 100 | """ | 139 | """ |
| 101 | # Header rows: | 140 | # Header rows: |
| 102 | header_top = True | 141 | header_top = True |
| 103 | - header_top_left = '+' | ||
| 104 | - header_top_horiz = '-' | ||
| 105 | - header_top_middle = '+' | ||
| 106 | - header_top_right = '+' | 142 | + header_top_left = u'+' |
| 143 | + header_top_horiz = u'-' | ||
| 144 | + header_top_middle = u'+' | ||
| 145 | + header_top_right = u'+' | ||
| 107 | 146 | ||
| 108 | - header_vertical_left = '|' | ||
| 109 | - header_vertical_middle = '|' | ||
| 110 | - header_vertical_right = '|' | 147 | + header_vertical_left = u'|' |
| 148 | + header_vertical_middle = u'|' | ||
| 149 | + header_vertical_right = u'|' | ||
| 111 | 150 | ||
| 112 | # Separator line between header and normal rows: | 151 | # Separator line between header and normal rows: |
| 113 | header_sep = True | 152 | header_sep = True |
| 114 | - header_sep_left = '+' | ||
| 115 | - header_sep_horiz = '-' | ||
| 116 | - header_sep_middle = '+' | ||
| 117 | - header_sep_right = '+' | 153 | + header_sep_left = u'+' |
| 154 | + header_sep_horiz = u'-' | ||
| 155 | + header_sep_middle = u'+' | ||
| 156 | + header_sep_right = u'+' | ||
| 118 | 157 | ||
| 119 | # Top row if there is no header: | 158 | # Top row if there is no header: |
| 120 | noheader_top = True | 159 | noheader_top = True |
| 121 | - noheader_top_left = '+' | ||
| 122 | - noheader_top_horiz = '-' | ||
| 123 | - noheader_top_middle = '+' | ||
| 124 | - noheader_top_right = '+' | 160 | + noheader_top_left = u'+' |
| 161 | + noheader_top_horiz = u'-' | ||
| 162 | + noheader_top_middle = u'+' | ||
| 163 | + noheader_top_right = u'+' | ||
| 125 | 164 | ||
| 126 | # Normal rows | 165 | # Normal rows |
| 127 | - vertical_left = '|' | ||
| 128 | - vertical_middle = '|' | ||
| 129 | - vertical_right = '|' | 166 | + vertical_left = u'|' |
| 167 | + vertical_middle = u'|' | ||
| 168 | + vertical_right = u'|' | ||
| 130 | 169 | ||
| 131 | # Separator line between rows: | 170 | # Separator line between rows: |
| 132 | sep = False | 171 | sep = False |
| 133 | - sep_left = '+' | ||
| 134 | - sep_horiz = '-' | ||
| 135 | - sep_middle = '+' | ||
| 136 | - sep_right = '+' | 172 | + sep_left = u'+' |
| 173 | + sep_horiz = u'-' | ||
| 174 | + sep_middle = u'+' | ||
| 175 | + sep_right = u'+' | ||
| 137 | 176 | ||
| 138 | # Bottom line | 177 | # Bottom line |
| 139 | bottom = True | 178 | bottom = True |
| 140 | - bottom_left = '+' | ||
| 141 | - bottom_horiz = '-' | ||
| 142 | - bottom_middle = '+' | ||
| 143 | - bottom_right = '+' | 179 | + bottom_left = u'+' |
| 180 | + bottom_horiz = u'-' | ||
| 181 | + bottom_middle = u'+' | ||
| 182 | + bottom_right = u'+' | ||
| 144 | 183 | ||
| 145 | 184 | ||
| 146 | class TableStyleSlim(object): | 185 | class TableStyleSlim(object): |
| @@ -155,47 +194,47 @@ class TableStyleSlim(object): | @@ -155,47 +194,47 @@ class TableStyleSlim(object): | ||
| 155 | """ | 194 | """ |
| 156 | # Header rows: | 195 | # Header rows: |
| 157 | header_top = True | 196 | header_top = True |
| 158 | - header_top_left = '' | ||
| 159 | - header_top_horiz = '-' | ||
| 160 | - header_top_middle = '+' | ||
| 161 | - header_top_right = '' | 197 | + header_top_left = u'' |
| 198 | + header_top_horiz = u'-' | ||
| 199 | + header_top_middle = u'+' | ||
| 200 | + header_top_right = u'' | ||
| 162 | 201 | ||
| 163 | - header_vertical_left = '' | ||
| 164 | - header_vertical_middle = '|' | ||
| 165 | - header_vertical_right = '' | 202 | + header_vertical_left = u'' |
| 203 | + header_vertical_middle = u'|' | ||
| 204 | + header_vertical_right = u'' | ||
| 166 | 205 | ||
| 167 | # Separator line between header and normal rows: | 206 | # Separator line between header and normal rows: |
| 168 | header_sep = True | 207 | header_sep = True |
| 169 | - header_sep_left = '' | ||
| 170 | - header_sep_horiz = '-' | ||
| 171 | - header_sep_middle = '+' | ||
| 172 | - header_sep_right = '' | 208 | + header_sep_left = u'' |
| 209 | + header_sep_horiz = u'-' | ||
| 210 | + header_sep_middle = u'+' | ||
| 211 | + header_sep_right = u'' | ||
| 173 | 212 | ||
| 174 | # Top row if there is no header: | 213 | # Top row if there is no header: |
| 175 | noheader_top = True | 214 | noheader_top = True |
| 176 | - noheader_top_left = '' | ||
| 177 | - noheader_top_horiz = '-' | ||
| 178 | - noheader_top_middle = '+' | ||
| 179 | - noheader_top_right = '' | 215 | + noheader_top_left = u'' |
| 216 | + noheader_top_horiz = u'-' | ||
| 217 | + noheader_top_middle = u'+' | ||
| 218 | + noheader_top_right = u'' | ||
| 180 | 219 | ||
| 181 | # Normal rows | 220 | # Normal rows |
| 182 | - vertical_left = '' | ||
| 183 | - vertical_middle = '|' | ||
| 184 | - vertical_right = '' | 221 | + vertical_left = u'' |
| 222 | + vertical_middle = u'|' | ||
| 223 | + vertical_right = u'' | ||
| 185 | 224 | ||
| 186 | # Separator line between rows: | 225 | # Separator line between rows: |
| 187 | sep = False | 226 | sep = False |
| 188 | - sep_left = '' | ||
| 189 | - sep_horiz = '-' | ||
| 190 | - sep_middle = '+' | ||
| 191 | - sep_right = '' | 227 | + sep_left = u'' |
| 228 | + sep_horiz = u'-' | ||
| 229 | + sep_middle = u'+' | ||
| 230 | + sep_right = u'' | ||
| 192 | 231 | ||
| 193 | # Bottom line | 232 | # Bottom line |
| 194 | bottom = True | 233 | bottom = True |
| 195 | - bottom_left = '' | ||
| 196 | - bottom_horiz = '-' | ||
| 197 | - bottom_middle = '+' | ||
| 198 | - bottom_right = '' | 234 | + bottom_left = u'' |
| 235 | + bottom_horiz = u'-' | ||
| 236 | + bottom_middle = u'+' | ||
| 237 | + bottom_right = u'' | ||
| 199 | 238 | ||
| 200 | 239 | ||
| 201 | 240 | ||
| @@ -213,10 +252,22 @@ class TableStream(object): | @@ -213,10 +252,22 @@ class TableStream(object): | ||
| 213 | be processed row by row. | 252 | be processed row by row. |
| 214 | """ | 253 | """ |
| 215 | 254 | ||
| 216 | - def __init__(self, column_width, header_row=None, style=TableStyle, outfile=sys.stdout): | 255 | + def __init__(self, column_width, header_row=None, style=TableStyle, |
| 256 | + outfile=sys.stdout, encoding_in='utf8', encoding_out='utf8'): | ||
| 257 | + ''' | ||
| 258 | + Constructor for class TableStream | ||
| 259 | + :param column_width: tuple or list containing the width of each column | ||
| 260 | + :param header_row: tuple or list containing the header row text | ||
| 261 | + :param style: style for the table, a TableStyle object | ||
| 262 | + :param outfile: output file (sys.stdout by default to print on the console) | ||
| 263 | + :param encoding_in: encoding used when the input text is bytes (UTF-8 by default) | ||
| 264 | + :param encoding_out: encoding used for the output (UTF-8 by default) | ||
| 265 | + ''' | ||
| 217 | self.column_width = column_width | 266 | self.column_width = column_width |
| 218 | self.num_columns = len(column_width) | 267 | self.num_columns = len(column_width) |
| 219 | self.header_row = header_row | 268 | self.header_row = header_row |
| 269 | + self.encoding_in = encoding_in | ||
| 270 | + self.encoding_out = encoding_out | ||
| 220 | assert (header_row is None) or len(header_row) == self.num_columns | 271 | assert (header_row is None) or len(header_row) == self.num_columns |
| 221 | self.style = style | 272 | self.style = style |
| 222 | self.outfile = outfile | 273 | self.outfile = outfile |
| @@ -239,13 +290,7 @@ class TableStream(object): | @@ -239,13 +290,7 @@ class TableStream(object): | ||
| 239 | for i in xrange(self.num_columns): | 290 | for i in xrange(self.num_columns): |
| 240 | cell = row[i] | 291 | cell = row[i] |
| 241 | # Convert to string: | 292 | # Convert to string: |
| 242 | - # TODO: handle unicode properly | ||
| 243 | - # TODO: use only unicode for textwrapper, to avoid str length issues | ||
| 244 | - if isinstance(cell, bytes): | ||
| 245 | - # encode to UTF8, avoiding errors | ||
| 246 | - cell = cell.decode('utf-8', errors='replace') | ||
| 247 | - else: | ||
| 248 | - cell = unicode(cell) | 293 | + cell = to_ustr(cell, encoding=self.encoding_in) |
| 249 | # Wrap cell text according to the column width | 294 | # Wrap cell text according to the column width |
| 250 | # TODO: use a TextWrapper object for each column instead | 295 | # TODO: use a TextWrapper object for each column instead |
| 251 | # split the string if it contains newline characters, otherwise | 296 | # split the string if it contains newline characters, otherwise |
| @@ -259,7 +304,7 @@ class TableStream(object): | @@ -259,7 +304,7 @@ class TableStream(object): | ||
| 259 | if color: | 304 | if color: |
| 260 | for j in xrange(len(column)): | 305 | for j in xrange(len(column)): |
| 261 | # print '%r: %s' % (column[j], type(column[j])) | 306 | # print '%r: %s' % (column[j], type(column[j])) |
| 262 | - column[j] = colorclass.Color('{auto%s}%s{/%s}' % (color, column[j], color)) | 307 | + column[j] = colorclass.Color(u'{auto%s}%s{/%s}' % (color, column[j], color)) |
| 263 | columns.append(column) | 308 | columns.append(column) |
| 264 | # determine which column has the highest number of lines | 309 | # determine which column has the highest number of lines |
| 265 | max_lines = max(len(columns[i]), max_lines) | 310 | max_lines = max(len(columns[i]), max_lines) |
| @@ -271,11 +316,11 @@ class TableStream(object): | @@ -271,11 +316,11 @@ class TableStream(object): | ||
| 271 | if j<len(column): | 316 | if j<len(column): |
| 272 | # text to be written | 317 | # text to be written |
| 273 | text_width = len(column[j]) | 318 | text_width = len(column[j]) |
| 274 | - self.write(column[j] + ' '*(self.column_width[i]-text_width)) | 319 | + self.write(column[j] + u' '*(self.column_width[i]-text_width)) |
| 275 | else: | 320 | else: |
| 276 | # no more lines for this column | 321 | # no more lines for this column |
| 277 | # TODO: precompute empty cells once | 322 | # TODO: precompute empty cells once |
| 278 | - self.write(' '*(self.column_width[i])) | 323 | + self.write(u' '*(self.column_width[i])) |
| 279 | if i < (self.num_columns - 1): | 324 | if i < (self.num_columns - 1): |
| 280 | self.write(self.style.vertical_middle) | 325 | self.write(self.style.vertical_middle) |
| 281 | self.write(self.style.vertical_right) | 326 | self.write(self.style.vertical_right) |
| @@ -293,7 +338,7 @@ class TableStream(object): | @@ -293,7 +338,7 @@ class TableStream(object): | ||
| 293 | :param right: | 338 | :param right: |
| 294 | :return: | 339 | :return: |
| 295 | """ | 340 | """ |
| 296 | - return left + middle.join([horiz * width for width in self.column_width]) + right + '\n' | 341 | + return left + middle.join([horiz * width for width in self.column_width]) + right + u'\n' |
| 297 | 342 | ||
| 298 | def write_header_top(self): | 343 | def write_header_top(self): |
| 299 | s = self.style | 344 | s = self.style |
| @@ -336,6 +381,8 @@ class TableStream(object): | @@ -336,6 +381,8 @@ class TableStream(object): | ||
| 336 | self.write_bottom() | 381 | self.write_bottom() |
| 337 | 382 | ||
| 338 | 383 | ||
| 384 | +# === MAIN =================================================================== | ||
| 385 | + | ||
| 339 | if __name__ == '__main__': | 386 | if __name__ == '__main__': |
| 340 | t = TableStream([10, 5, 20], header_row=['i', 'i*i', '2**i'], style=TableStyleSlim) | 387 | t = TableStream([10, 5, 20], header_row=['i', 'i*i', '2**i'], style=TableStyleSlim) |
| 341 | t.write_row(['test', 'test', 'test']) | 388 | t.write_row(['test', 'test', 'test']) |
| @@ -343,6 +390,7 @@ if __name__ == '__main__': | @@ -343,6 +390,7 @@ if __name__ == '__main__': | ||
| 343 | t.write_row([cell, cell, cell], colors=['blue', None, 'red']) | 390 | t.write_row([cell, cell, cell], colors=['blue', None, 'red']) |
| 344 | for i in range(1, 11): | 391 | for i in range(1, 11): |
| 345 | t.write_row([i, i*i, 2**i]) | 392 | t.write_row([i, i*i, 2**i]) |
| 393 | + t.write_row([b'bytes', u'unicode', bytearray(b'bytearray')]) | ||
| 346 | t.close() | 394 | t.close() |
| 347 | 395 | ||
| 348 | 396 |