From 1445dd46bb8040a46cc18980059a9859324e7b30 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 5 Aug 2019 22:12:51 +0100 Subject: [PATCH 1/2] TYPING: more type hints for io.formats.printing --- pandas/io/formats/printing.py | 36 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 4ec9094ce4abe..b5e23c7629b26 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -9,6 +9,8 @@ from pandas.core.dtypes.inference import is_sequence +EscapeChars = Union[Dict[str, str], Iterable[str]] + def adjoin(space: int, *lists: List[str], **kwargs) -> str: """ @@ -150,17 +152,14 @@ def _pprint_dict( def pprint_thing( thing, _nest_lvl: int = 0, - escape_chars: Optional[Union[Dict[str, str], Iterable[str]]] = None, + escape_chars: Optional[EscapeChars] = None, default_escapes: bool = False, quote_strings: bool = False, max_seq_items: Optional[int] = None, ) -> str: """ This function is the sanctioned way of converting objects - to a unicode representation. - - properly handles nested sequences containing unicode strings - (unicode(object) does not) + to a string representation and properly handles nested sequences. Parameters ---------- @@ -178,21 +177,13 @@ def pprint_thing( Returns ------- - result - unicode str + str """ - def as_escaped_unicode(thing, escape_chars=escape_chars): - # Unicode is fine, else we try to decode using utf-8 and 'replace' - # if that's not it either, we have no way of knowing and the user - # should deal with it himself. - - try: - result = str(thing) # we should try this first - except UnicodeDecodeError: - # either utf-8 or we replace errors - result = str(thing).decode("utf-8", "replace") - + def as_escaped_string( + thing: object, escape_chars: Optional[EscapeChars] = escape_chars + ) -> str: translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"} if isinstance(escape_chars, dict): if default_escapes: @@ -202,10 +193,11 @@ def as_escaped_unicode(thing, escape_chars=escape_chars): escape_chars = list(escape_chars.keys()) else: escape_chars = escape_chars or tuple() + + result = str(thing) for c in escape_chars: result = result.replace(c, translate[c]) - - return str(result) + return result if hasattr(thing, "__next__"): return str(thing) @@ -224,11 +216,11 @@ def as_escaped_unicode(thing, escape_chars=escape_chars): max_seq_items=max_seq_items, ) elif isinstance(thing, str) and quote_strings: - result = "'{thing}'".format(thing=as_escaped_unicode(thing)) + result = "'{thing}'".format(thing=as_escaped_string(thing)) else: - result = as_escaped_unicode(thing) + result = as_escaped_string(thing) - return str(result) # always unicode + return result def pprint_thing_encoded( From bff68bea9004c5899874646d641f7002f135c719 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 7 Aug 2019 07:22:38 +0100 Subject: [PATCH 2/2] use Any --- pandas/io/formats/printing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index b5e23c7629b26..ead51693da791 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -3,7 +3,7 @@ """ import sys -from typing import Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union +from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union from pandas._config import get_option @@ -150,7 +150,7 @@ def _pprint_dict( def pprint_thing( - thing, + thing: Any, _nest_lvl: int = 0, escape_chars: Optional[EscapeChars] = None, default_escapes: bool = False, @@ -182,7 +182,7 @@ def pprint_thing( """ def as_escaped_string( - thing: object, escape_chars: Optional[EscapeChars] = escape_chars + thing: Any, escape_chars: Optional[EscapeChars] = escape_chars ) -> str: translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"} if isinstance(escape_chars, dict):