Commit 0acaf934a6ec2e7b0101dc2824996648e70ec6d5

Authored by decalage2
1 parent fde6adf1

tablestream: support for both Python 2.6+ and 3.x, all cells are converted to unicode

oletools/thirdparty/tablestream/tablestream.py
... ... @@ -52,8 +52,10 @@ from __future__ import print_function
52 52 # 2016-05-25 v0.04 PL: - updated for colorclass 2.2.0 (now a package)
53 53 # 2016-07-29 v0.05 PL: - fixed oletools issue #57, bug when importing colorclass
54 54 # 2016-07-31 v0.06 PL: - handle newline characters properly in each cell
  55 +# 2016-08-28 v0.07 PL: - support for both Python 2.6+ and 3.x
  56 +# - all cells are converted to unicode
55 57  
56   -__version__ = '0.06'
  58 +__version__ = '0.07'
57 59  
58 60 #------------------------------------------------------------------------------
59 61 # TODO:
... ... @@ -84,6 +86,43 @@ if os.name == 'nt':
84 86 colorclass.Windows.enable(auto_colors=True)
85 87  
86 88  
  89 +# === PYTHON 2+3 SUPPORT ======================================================
  90 +
  91 +if sys.version_info[0] >= 3:
  92 + # Python 3 specific adaptations
  93 + # py3 range = py2 xrange
  94 + xrange = range
  95 + ustr = str
  96 + # byte strings for to_ustr (with py3, bytearray supports encoding):
  97 + byte_strings = (bytes, bytearray)
  98 +else:
  99 + # Python 2 specific adaptations
  100 + ustr = unicode
  101 + # byte strings for to_ustr (with py2, bytearray does not support encoding):
  102 + byte_strings = bytes
  103 +
  104 +
  105 +# === FUNCTIONS ==============================================================
  106 +
  107 +def to_ustr(obj, encoding='utf8', errors='replace'):
  108 + """
  109 + convert an object to unicode, using the appropriate method
  110 + :param obj: any object, str, bytes or unicode
  111 + :return: unicode string (ustr)
  112 + """
  113 + # if the object is already unicode, return it unchanged:
  114 + if isinstance(obj, ustr):
  115 + return obj
  116 + # if it is a bytes string, decode it using the provided encoding
  117 + elif isinstance(obj, byte_strings):
  118 + return ustr(obj, encoding=encoding, errors=errors)
  119 + # else just convert it to unicode:
  120 + # (an exception is raised if we specify encoding in this case)
  121 + else:
  122 + return ustr(obj)
  123 +
  124 +
  125 +
87 126 # === CLASSES =================================================================
88 127  
89 128  
... ... @@ -100,47 +139,47 @@ class TableStyle(object):
100 139 """
101 140 # Header rows:
102 141 header_top = True
103   - header_top_left = '+'
104   - header_top_horiz = '-'
105   - header_top_middle = '+'
106   - header_top_right = '+'
  142 + header_top_left = u'+'
  143 + header_top_horiz = u'-'
  144 + header_top_middle = u'+'
  145 + header_top_right = u'+'
107 146  
108   - header_vertical_left = '|'
109   - header_vertical_middle = '|'
110   - header_vertical_right = '|'
  147 + header_vertical_left = u'|'
  148 + header_vertical_middle = u'|'
  149 + header_vertical_right = u'|'
111 150  
112 151 # Separator line between header and normal rows:
113 152 header_sep = True
114   - header_sep_left = '+'
115   - header_sep_horiz = '-'
116   - header_sep_middle = '+'
117   - header_sep_right = '+'
  153 + header_sep_left = u'+'
  154 + header_sep_horiz = u'-'
  155 + header_sep_middle = u'+'
  156 + header_sep_right = u'+'
118 157  
119 158 # Top row if there is no header:
120 159 noheader_top = True
121   - noheader_top_left = '+'
122   - noheader_top_horiz = '-'
123   - noheader_top_middle = '+'
124   - noheader_top_right = '+'
  160 + noheader_top_left = u'+'
  161 + noheader_top_horiz = u'-'
  162 + noheader_top_middle = u'+'
  163 + noheader_top_right = u'+'
125 164  
126 165 # Normal rows
127   - vertical_left = '|'
128   - vertical_middle = '|'
129   - vertical_right = '|'
  166 + vertical_left = u'|'
  167 + vertical_middle = u'|'
  168 + vertical_right = u'|'
130 169  
131 170 # Separator line between rows:
132 171 sep = False
133   - sep_left = '+'
134   - sep_horiz = '-'
135   - sep_middle = '+'
136   - sep_right = '+'
  172 + sep_left = u'+'
  173 + sep_horiz = u'-'
  174 + sep_middle = u'+'
  175 + sep_right = u'+'
137 176  
138 177 # Bottom line
139 178 bottom = True
140   - bottom_left = '+'
141   - bottom_horiz = '-'
142   - bottom_middle = '+'
143   - bottom_right = '+'
  179 + bottom_left = u'+'
  180 + bottom_horiz = u'-'
  181 + bottom_middle = u'+'
  182 + bottom_right = u'+'
144 183  
145 184  
146 185 class TableStyleSlim(object):
... ... @@ -155,47 +194,47 @@ class TableStyleSlim(object):
155 194 """
156 195 # Header rows:
157 196 header_top = True
158   - header_top_left = ''
159   - header_top_horiz = '-'
160   - header_top_middle = '+'
161   - header_top_right = ''
  197 + header_top_left = u''
  198 + header_top_horiz = u'-'
  199 + header_top_middle = u'+'
  200 + header_top_right = u''
162 201  
163   - header_vertical_left = ''
164   - header_vertical_middle = '|'
165   - header_vertical_right = ''
  202 + header_vertical_left = u''
  203 + header_vertical_middle = u'|'
  204 + header_vertical_right = u''
166 205  
167 206 # Separator line between header and normal rows:
168 207 header_sep = True
169   - header_sep_left = ''
170   - header_sep_horiz = '-'
171   - header_sep_middle = '+'
172   - header_sep_right = ''
  208 + header_sep_left = u''
  209 + header_sep_horiz = u'-'
  210 + header_sep_middle = u'+'
  211 + header_sep_right = u''
173 212  
174 213 # Top row if there is no header:
175 214 noheader_top = True
176   - noheader_top_left = ''
177   - noheader_top_horiz = '-'
178   - noheader_top_middle = '+'
179   - noheader_top_right = ''
  215 + noheader_top_left = u''
  216 + noheader_top_horiz = u'-'
  217 + noheader_top_middle = u'+'
  218 + noheader_top_right = u''
180 219  
181 220 # Normal rows
182   - vertical_left = ''
183   - vertical_middle = '|'
184   - vertical_right = ''
  221 + vertical_left = u''
  222 + vertical_middle = u'|'
  223 + vertical_right = u''
185 224  
186 225 # Separator line between rows:
187 226 sep = False
188   - sep_left = ''
189   - sep_horiz = '-'
190   - sep_middle = '+'
191   - sep_right = ''
  227 + sep_left = u''
  228 + sep_horiz = u'-'
  229 + sep_middle = u'+'
  230 + sep_right = u''
192 231  
193 232 # Bottom line
194 233 bottom = True
195   - bottom_left = ''
196   - bottom_horiz = '-'
197   - bottom_middle = '+'
198   - bottom_right = ''
  234 + bottom_left = u''
  235 + bottom_horiz = u'-'
  236 + bottom_middle = u'+'
  237 + bottom_right = u''
199 238  
200 239  
201 240  
... ... @@ -213,10 +252,22 @@ class TableStream(object):
213 252 be processed row by row.
214 253 """
215 254  
216   - def __init__(self, column_width, header_row=None, style=TableStyle, outfile=sys.stdout):
  255 + def __init__(self, column_width, header_row=None, style=TableStyle,
  256 + outfile=sys.stdout, encoding_in='utf8', encoding_out='utf8'):
  257 + '''
  258 + Constructor for class TableStream
  259 + :param column_width: tuple or list containing the width of each column
  260 + :param header_row: tuple or list containing the header row text
  261 + :param style: style for the table, a TableStyle object
  262 + :param outfile: output file (sys.stdout by default to print on the console)
  263 + :param encoding_in: encoding used when the input text is bytes (UTF-8 by default)
  264 + :param encoding_out: encoding used for the output (UTF-8 by default)
  265 + '''
217 266 self.column_width = column_width
218 267 self.num_columns = len(column_width)
219 268 self.header_row = header_row
  269 + self.encoding_in = encoding_in
  270 + self.encoding_out = encoding_out
220 271 assert (header_row is None) or len(header_row) == self.num_columns
221 272 self.style = style
222 273 self.outfile = outfile
... ... @@ -239,13 +290,7 @@ class TableStream(object):
239 290 for i in xrange(self.num_columns):
240 291 cell = row[i]
241 292 # Convert to string:
242   - # TODO: handle unicode properly
243   - # TODO: use only unicode for textwrapper, to avoid str length issues
244   - if isinstance(cell, bytes):
245   - # encode to UTF8, avoiding errors
246   - cell = cell.decode('utf-8', errors='replace')
247   - else:
248   - cell = unicode(cell)
  293 + cell = to_ustr(cell, encoding=self.encoding_in)
249 294 # Wrap cell text according to the column width
250 295 # TODO: use a TextWrapper object for each column instead
251 296 # split the string if it contains newline characters, otherwise
... ... @@ -259,7 +304,7 @@ class TableStream(object):
259 304 if color:
260 305 for j in xrange(len(column)):
261 306 # print '%r: %s' % (column[j], type(column[j]))
262   - column[j] = colorclass.Color('{auto%s}%s{/%s}' % (color, column[j], color))
  307 + column[j] = colorclass.Color(u'{auto%s}%s{/%s}' % (color, column[j], color))
263 308 columns.append(column)
264 309 # determine which column has the highest number of lines
265 310 max_lines = max(len(columns[i]), max_lines)
... ... @@ -271,11 +316,11 @@ class TableStream(object):
271 316 if j<len(column):
272 317 # text to be written
273 318 text_width = len(column[j])
274   - self.write(column[j] + ' '*(self.column_width[i]-text_width))
  319 + self.write(column[j] + u' '*(self.column_width[i]-text_width))
275 320 else:
276 321 # no more lines for this column
277 322 # TODO: precompute empty cells once
278   - self.write(' '*(self.column_width[i]))
  323 + self.write(u' '*(self.column_width[i]))
279 324 if i < (self.num_columns - 1):
280 325 self.write(self.style.vertical_middle)
281 326 self.write(self.style.vertical_right)
... ... @@ -293,7 +338,7 @@ class TableStream(object):
293 338 :param right:
294 339 :return:
295 340 """
296   - return left + middle.join([horiz * width for width in self.column_width]) + right + '\n'
  341 + return left + middle.join([horiz * width for width in self.column_width]) + right + u'\n'
297 342  
298 343 def write_header_top(self):
299 344 s = self.style
... ... @@ -336,6 +381,8 @@ class TableStream(object):
336 381 self.write_bottom()
337 382  
338 383  
  384 +# === MAIN ===================================================================
  385 +
339 386 if __name__ == '__main__':
340 387 t = TableStream([10, 5, 20], header_row=['i', 'i*i', '2**i'], style=TableStyleSlim)
341 388 t.write_row(['test', 'test', 'test'])
... ... @@ -343,6 +390,7 @@ if __name__ == &#39;__main__&#39;:
343 390 t.write_row([cell, cell, cell], colors=['blue', None, 'red'])
344 391 for i in range(1, 11):
345 392 t.write_row([i, i*i, 2**i])
  393 + t.write_row([b'bytes', u'unicode', bytearray(b'bytearray')])
346 394 t.close()
347 395  
348 396  
... ...