From 78118a6fb1e34b397c7a86a39a128c4c8e92e40d Mon Sep 17 00:00:00 2001 From: William Granados Date: Sat, 31 Oct 2020 22:29:33 -0400 Subject: [PATCH 1/4] rebased earlier change with master --- doc/source/whatsnew/v1.2.0.rst | 12 ++++++++++++ pandas/core/generic.py | 18 +++++++++++++++++- pandas/tests/io/formats/test_to_latex.py | 21 +++++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 690bd9bc9704b..9725a53e0e082 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -547,6 +547,18 @@ ExtensionArray - Fixed an inconsistency in :class:`PeriodArray`'s ``__init__`` signature to those of :class:`DatetimeArray` and :class:`TimedeltaArray` (:issue:`37289`) - Reductions for :class:`BooleanArray`, :class:`Categorical`, :class:`DatetimeArray`, :class:`FloatingArray`, :class:`IntegerArray`, :class:`PeriodArray`, :class:`TimedeltaArray`, and :class:`PandasArray` are now keyword-only methods (:issue:`37541`) +ToLatex +^^^^^^^ + +- Can now substitute simple fstrings in place of callable functions in to_latex + +.. ipython:: python + data = [[1, 2, 3], [4, 5, 6], [7, 8, 9.001]] + df = pd.DataFrame(data, columns=["a", "b", "c"], + index=["foo", "bar", "foobar"]) + result = df.to_latex(formatters=["d", ".2f", lambda x: f"{x:.3f}"]) + + Other ^^^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c90ab9cceea8c..d487ceb6c7680 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3034,7 +3034,7 @@ def to_latex( Write row names (index). na_rep : str, default 'NaN' Missing data representation. - formatters : list of functions or dict of {{str: function}}, optional + formatters : list of functions/str or dict of {{str: function}}, optional Formatter functions to apply to columns' elements by position or name. The result of each function must be a unicode string. List must be of length equal to the number of columns. @@ -3136,6 +3136,22 @@ def to_latex( if multirow is None: multirow = config.get_option("display.latex.multirow") + if is_list_like(formatters) and not isinstance(formatters, dict): + formatter_elems_type = all( + isinstance(elem, str) or callable(elem) for elem in formatters + ) + if formatter_elems_type: + formatters = [ + (lambda style: lambda x: "{0:{1}}".format(x, style))(style) + if isinstance(style, str) + else style + for style in formatters + ] + else: + raise ValueError( + "Formatters elements should be f-strings or callable functions" + ) + self = cast("DataFrame", self) formatter = DataFrameFormatter( self, diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 7cf7ed3f77609..b96b674b2f230 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -121,6 +121,27 @@ def test_to_latex_column_format(self): ) assert result == expected + def test_to_latex_with_float_format_list(self): + # GH: 26278 + data = [[1, 2, 3], [4, 5, 6], [7, 8, 9.001]] + df = DataFrame(data, columns=["a", "b", "c"], index=["foo", "bar", "foobar"]) + + result = df.to_latex(formatters=["d", ".2f", lambda x: f"{x:.3f}"]) + expected = _dedent( + r""" + \begin{tabular}{lrrr} + \toprule + {} & a & b & c \\ + \midrule + foo & 1 & 2.00 & 3.000 \\ + bar & 4 & 5.00 & 6.000 \\ + foobar & 7 & 8.00 & 9.001 \\ + \bottomrule + \end{tabular} + """ + ) + assert result == expected + def test_to_latex_empty_tabular(self): df = DataFrame() result = df.to_latex() From 9e0f9c052e3be3f7ce3fb48401f9defadf36f9c1 Mon Sep 17 00:00:00 2001 From: William Granados Date: Sun, 1 Nov 2020 15:17:26 -0500 Subject: [PATCH 2/4] doc changes --- doc/source/whatsnew/v1.2.0.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 9725a53e0e082..ab521e86419e7 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -554,8 +554,7 @@ ToLatex .. ipython:: python data = [[1, 2, 3], [4, 5, 6], [7, 8, 9.001]] - df = pd.DataFrame(data, columns=["a", "b", "c"], - index=["foo", "bar", "foobar"]) + df = pd.DataFrame(data, columns=["a", "b", "c"], index=["1", "2", "3"]) result = df.to_latex(formatters=["d", ".2f", lambda x: f"{x:.3f}"]) From ed6fe52aa12d35abc6dd1afd4200caa626da4c87 Mon Sep 17 00:00:00 2001 From: William Granados Date: Mon, 2 Nov 2020 21:17:52 -0500 Subject: [PATCH 3/4] move latex whatsnew section to io section --- doc/source/whatsnew/v1.2.0.rst | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index ab521e86419e7..49da4ef115d25 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -488,6 +488,7 @@ I/O - Bug in output rendering of complex numbers showing too many trailing zeros (:issue:`36799`) - Bug in :class:`HDFStore` threw a ``TypeError`` when exporting an empty :class:`DataFrame` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`) - Bug in :class:`HDFStore` was dropping timezone information when exporting :class:`Series` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`) +- In :meth:`to_latex` ``formatters`` now handles ``fstring`` and ``callable`` paramaters (:issue:`26278`) Plotting ^^^^^^^^ @@ -547,17 +548,6 @@ ExtensionArray - Fixed an inconsistency in :class:`PeriodArray`'s ``__init__`` signature to those of :class:`DatetimeArray` and :class:`TimedeltaArray` (:issue:`37289`) - Reductions for :class:`BooleanArray`, :class:`Categorical`, :class:`DatetimeArray`, :class:`FloatingArray`, :class:`IntegerArray`, :class:`PeriodArray`, :class:`TimedeltaArray`, and :class:`PandasArray` are now keyword-only methods (:issue:`37541`) -ToLatex -^^^^^^^ - -- Can now substitute simple fstrings in place of callable functions in to_latex - -.. ipython:: python - data = [[1, 2, 3], [4, 5, 6], [7, 8, 9.001]] - df = pd.DataFrame(data, columns=["a", "b", "c"], index=["1", "2", "3"]) - result = df.to_latex(formatters=["d", ".2f", lambda x: f"{x:.3f}"]) - - Other ^^^^^ From 7e78db729294f4cbcc85baafed86c41b4414f0c1 Mon Sep 17 00:00:00 2001 From: William Granados Date: Wed, 4 Nov 2020 23:33:38 -0500 Subject: [PATCH 4/4] documentation changes and moved to_latex logic to formatters --- pandas/core/generic.py | 33 ++++++++++++++++----------------- pandas/io/formats/format.py | 26 ++++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 19 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d487ceb6c7680..4af749810afe3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3038,6 +3038,10 @@ def to_latex( Formatter functions to apply to columns' elements by position or name. The result of each function must be a unicode string. List must be of length equal to the number of columns. + + .. versionchanged:: 1.2 + optionally allow fstrings in place of functions + float_format : one-parameter function or str, optional, default None Formatter for floating point numbers. For example ``float_format="%.2f"`` and ``float_format="{{:0.2f}}".format`` will @@ -3088,7 +3092,7 @@ def to_latex( .. versionadded:: 1.0.0 .. versionchanged:: 1.2.0 - Optionally allow caption to be a tuple ``(full_caption, short_caption)``. + optionally allow caption to be a tuple ``(full_caption, short_caption)``. label : str, optional The LaTeX label to be placed inside ``\label{{}}`` in the output. @@ -3121,6 +3125,17 @@ def to_latex( Donatello & purple & bo staff \\ \bottomrule \end{{tabular}} + >>> df = pd.DataFrame([[1,2], [3,4]], columns=['a','b']) + >>> print(df.to_latex(formatters=["d", ".3f"], + ... index=False)) # doctest: +NORMALIZE_WHITESPACE + \begin{{tabular}}{{rr}} + \toprule + a & b \\ + \midrule + 1 & 2.000 \\ + 3 & 4.000 \\ + \bottomrule + \end{{tabular}} """ # Get defaults from the pandas config if self.ndim == 1: @@ -3136,22 +3151,6 @@ def to_latex( if multirow is None: multirow = config.get_option("display.latex.multirow") - if is_list_like(formatters) and not isinstance(formatters, dict): - formatter_elems_type = all( - isinstance(elem, str) or callable(elem) for elem in formatters - ) - if formatter_elems_type: - formatters = [ - (lambda style: lambda x: "{0:{1}}".format(x, style))(style) - if isinstance(style, str) - else style - for style in formatters - ] - else: - raise ValueError( - "Formatters elements should be f-strings or callable functions" - ) - self = cast("DataFrame", self) formatter = DataFrameFormatter( self, diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 3c759f477899b..2c4fb8f1fbbfe 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -84,7 +84,11 @@ FormattersType = Union[ - List[Callable], Tuple[Callable, ...], Mapping[Union[str, int], Callable] + List[Callable], + Tuple[Callable, ...], + List[str], + List[Union[Callable, str]], + Mapping[Union[str, int], Callable], ] ColspaceType = Mapping[Label, Union[str, int]] ColspaceArgType = Union[ @@ -106,7 +110,7 @@ Whether to print index (row) labels. na_rep : str, optional, default 'NaN' String representation of ``NaN`` to use. - formatters : list, tuple or dict of one-param. functions, optional + formatters : list, tuple or dict of one-param. functions, str, optional Formatter functions to apply to columns' elements by position or name. The result of each function must be a unicode string. @@ -576,6 +580,24 @@ def _initialize_sparsify(self, sparsify: Optional[bool]) -> bool: def _initialize_formatters( self, formatters: Optional[FormattersType] ) -> FormattersType: + if is_list_like(formatters) and not isinstance(formatters, dict): + formatter_elems_type = all( + isinstance(elem, str) or callable(elem) for elem in formatters + ) + if formatter_elems_type: + # two fold lambda is required to bypass lambda replication + # issues in list comprehensions + formatters = [ + (lambda style: lambda x: "{0:{1}}".format(x, style))(style) + if isinstance(style, str) + else style + for style in formatters + ] + else: + raise ValueError( + "Formatters elements should be f-strings or callable functions" + ) + if formatters is None: return {} elif len(self.frame.columns) == len(formatters) or isinstance(formatters, dict):