From bdf65e0dd80f2bc40f2e8184c08e07db39356db2 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Wed, 22 Aug 2012 13:20:11 -0400 Subject: [PATCH 1/2] TST: Failing test for to_string with non-ascii string --- pandas/tests/test_format.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 01f5d5923ad14..a17b383a203b5 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + try: from StringIO import StringIO except: @@ -110,6 +112,12 @@ def test_to_string_unicode_three(self): buf = StringIO() dm.to_string(buf) + def test_to_string_force_unicode(self): + #given string with non-ascii characters + df = DataFrame([["aaää", 1], ["bbbb", 2]]) + result = df.to_string(force_unicode=True) + self.assertEqual(result, u' 0 1\n0 aa\xe4\xe4 1\n1 bbbb 2') + def test_to_string_with_formatters(self): df = DataFrame({'int': [1, 2, 3], 'float': [1.0, 2.0, 3.0], From b7229066efc6d7e45642e721d2be60cbbe966b83 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Wed, 22 Aug 2012 13:20:46 -0400 Subject: [PATCH 2/2] BUG: Safely decode strings to unicode --- pandas/core/format.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/core/format.py b/pandas/core/format.py index 30baa4345caf2..ac156d68ffcbe 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -227,7 +227,11 @@ def _to_str_columns(self, force_unicode=False): if not py3compat.PY3: if force_unicode: - strcols = map(lambda col: map(unicode, col), strcols) + def make_unicode(x): + if isinstance(x, unicode): + return x + return x.decode('utf-8') + strcols = map(lambda col: map(make_unicode, col), strcols) else: # generally everything is plain strings, which has ascii # encoding. problem is when there is a char with value over 127 @@ -235,7 +239,11 @@ def _to_str_columns(self, force_unicode=False): try: map(lambda col: map(str, col), strcols) except UnicodeError: - strcols = map(lambda col: map(unicode, col), strcols) + def make_unicode(x): + if isinstance(x, unicode): + return x + return x.decode('utf-8') + strcols = map(lambda col: map(make_unicode, col), strcols) return strcols