diff --git a/pandas/core/format.py b/pandas/core/format.py index 30baa4345caf2..ac156d68ffcbe 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -227,7 +227,11 @@ def _to_str_columns(self, force_unicode=False): if not py3compat.PY3: if force_unicode: - strcols = map(lambda col: map(unicode, col), strcols) + def make_unicode(x): + if isinstance(x, unicode): + return x + return x.decode('utf-8') + strcols = map(lambda col: map(make_unicode, col), strcols) else: # generally everything is plain strings, which has ascii # encoding. problem is when there is a char with value over 127 @@ -235,7 +239,11 @@ def _to_str_columns(self, force_unicode=False): try: map(lambda col: map(str, col), strcols) except UnicodeError: - strcols = map(lambda col: map(unicode, col), strcols) + def make_unicode(x): + if isinstance(x, unicode): + return x + return x.decode('utf-8') + strcols = map(lambda col: map(make_unicode, col), strcols) return strcols diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 01f5d5923ad14..a17b383a203b5 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + try: from StringIO import StringIO except: @@ -110,6 +112,12 @@ def test_to_string_unicode_three(self): buf = StringIO() dm.to_string(buf) + def test_to_string_force_unicode(self): + #given string with non-ascii characters + df = DataFrame([["aaää", 1], ["bbbb", 2]]) + result = df.to_string(force_unicode=True) + self.assertEqual(result, u' 0 1\n0 aa\xe4\xe4 1\n1 bbbb 2') + def test_to_string_with_formatters(self): df = DataFrame({'int': [1, 2, 3], 'float': [1.0, 2.0, 3.0],