Commit 0acaf934a6ec2e7b0101dc2824996648e70ec6d5

Authored by decalage2
1 parent fde6adf1

tablestream: support for both Python 2.6+ and 3.x, all cells are converted to unicode

oletools/thirdparty/tablestream/tablestream.py
@@ -52,8 +52,10 @@ from __future__ import print_function @@ -52,8 +52,10 @@ from __future__ import print_function
52 # 2016-05-25 v0.04 PL: - updated for colorclass 2.2.0 (now a package) 52 # 2016-05-25 v0.04 PL: - updated for colorclass 2.2.0 (now a package)
53 # 2016-07-29 v0.05 PL: - fixed oletools issue #57, bug when importing colorclass 53 # 2016-07-29 v0.05 PL: - fixed oletools issue #57, bug when importing colorclass
54 # 2016-07-31 v0.06 PL: - handle newline characters properly in each cell 54 # 2016-07-31 v0.06 PL: - handle newline characters properly in each cell
  55 +# 2016-08-28 v0.07 PL: - support for both Python 2.6+ and 3.x
  56 +# - all cells are converted to unicode
55 57
56 -__version__ = '0.06' 58 +__version__ = '0.07'
57 59
58 #------------------------------------------------------------------------------ 60 #------------------------------------------------------------------------------
59 # TODO: 61 # TODO:
@@ -84,6 +86,43 @@ if os.name == 'nt': @@ -84,6 +86,43 @@ if os.name == 'nt':
84 colorclass.Windows.enable(auto_colors=True) 86 colorclass.Windows.enable(auto_colors=True)
85 87
86 88
  89 +# === PYTHON 2+3 SUPPORT ======================================================
  90 +
  91 +if sys.version_info[0] >= 3:
  92 + # Python 3 specific adaptations
  93 + # py3 range = py2 xrange
  94 + xrange = range
  95 + ustr = str
  96 + # byte strings for to_ustr (with py3, bytearray supports encoding):
  97 + byte_strings = (bytes, bytearray)
  98 +else:
  99 + # Python 2 specific adaptations
  100 + ustr = unicode
  101 + # byte strings for to_ustr (with py2, bytearray does not support encoding):
  102 + byte_strings = bytes
  103 +
  104 +
  105 +# === FUNCTIONS ==============================================================
  106 +
  107 +def to_ustr(obj, encoding='utf8', errors='replace'):
  108 + """
  109 + convert an object to unicode, using the appropriate method
  110 + :param obj: any object, str, bytes or unicode
  111 + :return: unicode string (ustr)
  112 + """
  113 + # if the object is already unicode, return it unchanged:
  114 + if isinstance(obj, ustr):
  115 + return obj
  116 + # if it is a bytes string, decode it using the provided encoding
  117 + elif isinstance(obj, byte_strings):
  118 + return ustr(obj, encoding=encoding, errors=errors)
  119 + # else just convert it to unicode:
  120 + # (an exception is raised if we specify encoding in this case)
  121 + else:
  122 + return ustr(obj)
  123 +
  124 +
  125 +
87 # === CLASSES ================================================================= 126 # === CLASSES =================================================================
88 127
89 128
@@ -100,47 +139,47 @@ class TableStyle(object): @@ -100,47 +139,47 @@ class TableStyle(object):
100 """ 139 """
101 # Header rows: 140 # Header rows:
102 header_top = True 141 header_top = True
103 - header_top_left = '+'  
104 - header_top_horiz = '-'  
105 - header_top_middle = '+'  
106 - header_top_right = '+' 142 + header_top_left = u'+'
  143 + header_top_horiz = u'-'
  144 + header_top_middle = u'+'
  145 + header_top_right = u'+'
107 146
108 - header_vertical_left = '|'  
109 - header_vertical_middle = '|'  
110 - header_vertical_right = '|' 147 + header_vertical_left = u'|'
  148 + header_vertical_middle = u'|'
  149 + header_vertical_right = u'|'
111 150
112 # Separator line between header and normal rows: 151 # Separator line between header and normal rows:
113 header_sep = True 152 header_sep = True
114 - header_sep_left = '+'  
115 - header_sep_horiz = '-'  
116 - header_sep_middle = '+'  
117 - header_sep_right = '+' 153 + header_sep_left = u'+'
  154 + header_sep_horiz = u'-'
  155 + header_sep_middle = u'+'
  156 + header_sep_right = u'+'
118 157
119 # Top row if there is no header: 158 # Top row if there is no header:
120 noheader_top = True 159 noheader_top = True
121 - noheader_top_left = '+'  
122 - noheader_top_horiz = '-'  
123 - noheader_top_middle = '+'  
124 - noheader_top_right = '+' 160 + noheader_top_left = u'+'
  161 + noheader_top_horiz = u'-'
  162 + noheader_top_middle = u'+'
  163 + noheader_top_right = u'+'
125 164
126 # Normal rows 165 # Normal rows
127 - vertical_left = '|'  
128 - vertical_middle = '|'  
129 - vertical_right = '|' 166 + vertical_left = u'|'
  167 + vertical_middle = u'|'
  168 + vertical_right = u'|'
130 169
131 # Separator line between rows: 170 # Separator line between rows:
132 sep = False 171 sep = False
133 - sep_left = '+'  
134 - sep_horiz = '-'  
135 - sep_middle = '+'  
136 - sep_right = '+' 172 + sep_left = u'+'
  173 + sep_horiz = u'-'
  174 + sep_middle = u'+'
  175 + sep_right = u'+'
137 176
138 # Bottom line 177 # Bottom line
139 bottom = True 178 bottom = True
140 - bottom_left = '+'  
141 - bottom_horiz = '-'  
142 - bottom_middle = '+'  
143 - bottom_right = '+' 179 + bottom_left = u'+'
  180 + bottom_horiz = u'-'
  181 + bottom_middle = u'+'
  182 + bottom_right = u'+'
144 183
145 184
146 class TableStyleSlim(object): 185 class TableStyleSlim(object):
@@ -155,47 +194,47 @@ class TableStyleSlim(object): @@ -155,47 +194,47 @@ class TableStyleSlim(object):
155 """ 194 """
156 # Header rows: 195 # Header rows:
157 header_top = True 196 header_top = True
158 - header_top_left = ''  
159 - header_top_horiz = '-'  
160 - header_top_middle = '+'  
161 - header_top_right = '' 197 + header_top_left = u''
  198 + header_top_horiz = u'-'
  199 + header_top_middle = u'+'
  200 + header_top_right = u''
162 201
163 - header_vertical_left = ''  
164 - header_vertical_middle = '|'  
165 - header_vertical_right = '' 202 + header_vertical_left = u''
  203 + header_vertical_middle = u'|'
  204 + header_vertical_right = u''
166 205
167 # Separator line between header and normal rows: 206 # Separator line between header and normal rows:
168 header_sep = True 207 header_sep = True
169 - header_sep_left = ''  
170 - header_sep_horiz = '-'  
171 - header_sep_middle = '+'  
172 - header_sep_right = '' 208 + header_sep_left = u''
  209 + header_sep_horiz = u'-'
  210 + header_sep_middle = u'+'
  211 + header_sep_right = u''
173 212
174 # Top row if there is no header: 213 # Top row if there is no header:
175 noheader_top = True 214 noheader_top = True
176 - noheader_top_left = ''  
177 - noheader_top_horiz = '-'  
178 - noheader_top_middle = '+'  
179 - noheader_top_right = '' 215 + noheader_top_left = u''
  216 + noheader_top_horiz = u'-'
  217 + noheader_top_middle = u'+'
  218 + noheader_top_right = u''
180 219
181 # Normal rows 220 # Normal rows
182 - vertical_left = ''  
183 - vertical_middle = '|'  
184 - vertical_right = '' 221 + vertical_left = u''
  222 + vertical_middle = u'|'
  223 + vertical_right = u''
185 224
186 # Separator line between rows: 225 # Separator line between rows:
187 sep = False 226 sep = False
188 - sep_left = ''  
189 - sep_horiz = '-'  
190 - sep_middle = '+'  
191 - sep_right = '' 227 + sep_left = u''
  228 + sep_horiz = u'-'
  229 + sep_middle = u'+'
  230 + sep_right = u''
192 231
193 # Bottom line 232 # Bottom line
194 bottom = True 233 bottom = True
195 - bottom_left = ''  
196 - bottom_horiz = '-'  
197 - bottom_middle = '+'  
198 - bottom_right = '' 234 + bottom_left = u''
  235 + bottom_horiz = u'-'
  236 + bottom_middle = u'+'
  237 + bottom_right = u''
199 238
200 239
201 240
@@ -213,10 +252,22 @@ class TableStream(object): @@ -213,10 +252,22 @@ class TableStream(object):
213 be processed row by row. 252 be processed row by row.
214 """ 253 """
215 254
216 - def __init__(self, column_width, header_row=None, style=TableStyle, outfile=sys.stdout): 255 + def __init__(self, column_width, header_row=None, style=TableStyle,
  256 + outfile=sys.stdout, encoding_in='utf8', encoding_out='utf8'):
  257 + '''
  258 + Constructor for class TableStream
  259 + :param column_width: tuple or list containing the width of each column
  260 + :param header_row: tuple or list containing the header row text
  261 + :param style: style for the table, a TableStyle object
  262 + :param outfile: output file (sys.stdout by default to print on the console)
  263 + :param encoding_in: encoding used when the input text is bytes (UTF-8 by default)
  264 + :param encoding_out: encoding used for the output (UTF-8 by default)
  265 + '''
217 self.column_width = column_width 266 self.column_width = column_width
218 self.num_columns = len(column_width) 267 self.num_columns = len(column_width)
219 self.header_row = header_row 268 self.header_row = header_row
  269 + self.encoding_in = encoding_in
  270 + self.encoding_out = encoding_out
220 assert (header_row is None) or len(header_row) == self.num_columns 271 assert (header_row is None) or len(header_row) == self.num_columns
221 self.style = style 272 self.style = style
222 self.outfile = outfile 273 self.outfile = outfile
@@ -239,13 +290,7 @@ class TableStream(object): @@ -239,13 +290,7 @@ class TableStream(object):
239 for i in xrange(self.num_columns): 290 for i in xrange(self.num_columns):
240 cell = row[i] 291 cell = row[i]
241 # Convert to string: 292 # Convert to string:
242 - # TODO: handle unicode properly  
243 - # TODO: use only unicode for textwrapper, to avoid str length issues  
244 - if isinstance(cell, bytes):  
245 - # encode to UTF8, avoiding errors  
246 - cell = cell.decode('utf-8', errors='replace')  
247 - else:  
248 - cell = unicode(cell) 293 + cell = to_ustr(cell, encoding=self.encoding_in)
249 # Wrap cell text according to the column width 294 # Wrap cell text according to the column width
250 # TODO: use a TextWrapper object for each column instead 295 # TODO: use a TextWrapper object for each column instead
251 # split the string if it contains newline characters, otherwise 296 # split the string if it contains newline characters, otherwise
@@ -259,7 +304,7 @@ class TableStream(object): @@ -259,7 +304,7 @@ class TableStream(object):
259 if color: 304 if color:
260 for j in xrange(len(column)): 305 for j in xrange(len(column)):
261 # print '%r: %s' % (column[j], type(column[j])) 306 # print '%r: %s' % (column[j], type(column[j]))
262 - column[j] = colorclass.Color('{auto%s}%s{/%s}' % (color, column[j], color)) 307 + column[j] = colorclass.Color(u'{auto%s}%s{/%s}' % (color, column[j], color))
263 columns.append(column) 308 columns.append(column)
264 # determine which column has the highest number of lines 309 # determine which column has the highest number of lines
265 max_lines = max(len(columns[i]), max_lines) 310 max_lines = max(len(columns[i]), max_lines)
@@ -271,11 +316,11 @@ class TableStream(object): @@ -271,11 +316,11 @@ class TableStream(object):
271 if j<len(column): 316 if j<len(column):
272 # text to be written 317 # text to be written
273 text_width = len(column[j]) 318 text_width = len(column[j])
274 - self.write(column[j] + ' '*(self.column_width[i]-text_width)) 319 + self.write(column[j] + u' '*(self.column_width[i]-text_width))
275 else: 320 else:
276 # no more lines for this column 321 # no more lines for this column
277 # TODO: precompute empty cells once 322 # TODO: precompute empty cells once
278 - self.write(' '*(self.column_width[i])) 323 + self.write(u' '*(self.column_width[i]))
279 if i < (self.num_columns - 1): 324 if i < (self.num_columns - 1):
280 self.write(self.style.vertical_middle) 325 self.write(self.style.vertical_middle)
281 self.write(self.style.vertical_right) 326 self.write(self.style.vertical_right)
@@ -293,7 +338,7 @@ class TableStream(object): @@ -293,7 +338,7 @@ class TableStream(object):
293 :param right: 338 :param right:
294 :return: 339 :return:
295 """ 340 """
296 - return left + middle.join([horiz * width for width in self.column_width]) + right + '\n' 341 + return left + middle.join([horiz * width for width in self.column_width]) + right + u'\n'
297 342
298 def write_header_top(self): 343 def write_header_top(self):
299 s = self.style 344 s = self.style
@@ -336,6 +381,8 @@ class TableStream(object): @@ -336,6 +381,8 @@ class TableStream(object):
336 self.write_bottom() 381 self.write_bottom()
337 382
338 383
  384 +# === MAIN ===================================================================
  385 +
339 if __name__ == '__main__': 386 if __name__ == '__main__':
340 t = TableStream([10, 5, 20], header_row=['i', 'i*i', '2**i'], style=TableStyleSlim) 387 t = TableStream([10, 5, 20], header_row=['i', 'i*i', '2**i'], style=TableStyleSlim)
341 t.write_row(['test', 'test', 'test']) 388 t.write_row(['test', 'test', 'test'])
@@ -343,6 +390,7 @@ if __name__ == &#39;__main__&#39;: @@ -343,6 +390,7 @@ if __name__ == &#39;__main__&#39;:
343 t.write_row([cell, cell, cell], colors=['blue', None, 'red']) 390 t.write_row([cell, cell, cell], colors=['blue', None, 'red'])
344 for i in range(1, 11): 391 for i in range(1, 11):
345 t.write_row([i, i*i, 2**i]) 392 t.write_row([i, i*i, 2**i])
  393 + t.write_row([b'bytes', u'unicode', bytearray(b'bytearray')])
346 t.close() 394 t.close()
347 395
348 396