BUG: #680 rears again. cut off another hydra head

adamklein · adamklein · commit f92526446cef · 2012-01-26T19:32:03.000-05:00
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -511,10 +511,11 @@ def set_eng_float_format(precision=None, accuracy=3, use_eng_prefix=False):
 
 def _stringify(col):
     # unicode workaround
-    if isinstance(col, tuple):
-        return str(col)
-    else:
-        return '%s' % console_encode(col)
+    return unicode(col)
+    #if isinstance(col, tuple):
+    #    return str(col)
+    #else:
+    #    return '%s' % console_encode(col)
 
 def _float_format_default(v, width=None):
     """
@@ -818,7 +819,6 @@ def load(path):
     finally:
         f.close()
 
-
 def console_encode(value):
     if py3compat.PY3 or not isinstance(value, unicode):
         return value
diff --git a/pandas/core/format.py b/pandas/core/format.py
@@ -170,7 +170,7 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
         else:
             self.columns = frame.columns
 
-    def to_string(self):
+    def to_string(self, force_unicode=False):
         """
         Render a DataFrame to a console-friendly tabular output.
         """
@@ -209,10 +209,17 @@ def to_string(self):
             else:
                 to_write.append(adjoin(1, *stringified))
 
-        for s in to_write:
-            if isinstance(s, unicode):
+        if force_unicode:
+            to_write = [unicode(s) for s in to_write]
+        else:
+            # generally everything is plain strings, which has ascii encoding.
+            # problem is when there is a char with value over 127 - everything
+            # then gets converted to unicode.
+            try:
+                for s in to_write:
+                    str(s)
+            except UnicodeError:
                 to_write = [unicode(s) for s in to_write]
-                break
 
         self.buf.writelines(to_write)
 
@@ -358,9 +365,9 @@ def is_numeric_dtype(dtype):
             fmt_columns = zip(*fmt_columns)
             dtypes = self.frame.dtypes.values
             need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
-            str_columns = zip(*[[' %s' % y
+            str_columns = zip(*[[u' %s' % y
                                 if y not in formatters and need_leadsp[x]
-                                else str(y) for y in x]
+                                else y for y in x]
                                for x in fmt_columns])
             if self.sparsify:
                 str_columns = _sparsify(str_columns)
@@ -370,9 +377,9 @@ def is_numeric_dtype(dtype):
             fmt_columns = self.columns.format()
             dtypes = self.frame.dtypes
             need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
-            str_columns = [[' %s' % x
+            str_columns = [[u' %s' % x
                             if col not in formatters and need_leadsp[x]
-                            else str(x)]
+                            else x]
                            for col, x in zip(self.columns, fmt_columns)]
 
         if self.show_index_names and self.has_index_names:
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -1858,13 +1858,7 @@ def test_to_string_unicode_two(self):
     def test_to_string_with_formatters_unicode(self):
         df = DataFrame({u'c/\u03c3':[1,2,3]})
         result = df.to_string(formatters={u'c/\u03c3': lambda x: '%s' % x})
-        cp437 = u'  c/\u03c3\n0 1  \n1 2  \n2 3  '.encode('cp437', 'ignore')
-        if py3compat.PY3:
-            self.assertEqual(result, u'  c/\u03c3\n0 1  \n1 2  \n2 3  ')
-        else:
-            assert(result in
-                   ('  c/\xcf\x83\n0 1   \n1 2   \n2 3   ', cp437,
-                    '  c/?\n0 1   \n1 2   \n2 3   ' ))
+        self.assertEqual(result, u'  c/\u03c3\n0 1  \n1 2  \n2 3  ')
 
     def test_to_string_buffer_all_unicode(self):
         buf = StringIO()
@@ -1878,6 +1872,10 @@ def test_to_string_buffer_all_unicode(self):
         # this should work
         ''.join(buf.buflist)
 
+    def test_unicode_problem_decoding_as_ascii(self):
+        dm = DataFrame({u'c/\u03c3': Series({'test':np.NaN})})
+        unicode(dm.to_string())
+
     def test_head_tail(self):
         assert_frame_equal(self.frame.head(), self.frame[:5])
         assert_frame_equal(self.frame.tail(), self.frame[-5:])