From be80898920eab8f740cf61019327621a75d73947 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Mon, 18 Nov 2013 17:14:34 -0800 Subject: [PATCH 1/5] Truncate repr by default rather than changing to info view. Truncate wide HTML repr rather than changing format. Truncate HTML repr for DataFrames with MultiIndex-es Add tests for long & wide Dataframe HTML reprs Remove now-dead code for info display in HTML repr. Fix HTML repr of wide tables with named indices Make plain text repr follow HTML repr, truncating large DataFrames Fix up tests for string repr Update docs for DataFrame reprs Add an option to switch back to info repr for large DataFrames --- doc/source/dsintro.rst | 21 ++--- doc/source/v0.13.0.txt | 6 ++ pandas/core/config_init.py | 25 +++-- pandas/core/format.py | 132 +++++++++++++++++++------- pandas/core/frame.py | 92 +++++++++--------- pandas/tests/test_format.py | 182 ++++++++++++++++++++++++++---------- 6 files changed, 303 insertions(+), 155 deletions(-) diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index 08ef25b178af9..828797deff5cf 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -573,8 +573,9 @@ indexing semantics are quite different in places from a matrix. Console display ~~~~~~~~~~~~~~~ -For very large DataFrame objects, only a summary will be printed to the console -(here I am reading a CSV version of the **baseball** dataset from the **plyr** +Very large DataFrames will be truncated to display them in the console. +You can also get a summary using :meth:`~pandas.DataFrame.info`. +(Here I am reading a CSV version of the **baseball** dataset from the **plyr** R package): .. ipython:: python @@ -587,6 +588,7 @@ R package): baseball = read_csv('data/baseball.csv') print(baseball) + baseball.info() .. ipython:: python :suppress: @@ -622,19 +624,8 @@ option: reset_option('line_width') -You can also disable this feature via the ``expand_frame_repr`` option: - -.. ipython:: python - - set_option('expand_frame_repr', False) - - DataFrame(randn(3, 12)) - -.. ipython:: python - :suppress: - - reset_option('expand_frame_repr') - +You can also disable this feature via the ``expand_frame_repr`` option. +This will print the table in one block. DataFrame column attribute access and IPython completion ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index 207281caafeae..8bf4ee0dce648 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -607,6 +607,12 @@ Enhancements output datetime objects should be formatted. Datetimes encountered in the index, columns, and values will all have this formatting applied. (:issue:`4313`) - ``DataFrame.plot`` will scatter plot x versus y by passing ``kind='scatter'`` (:issue:`2215`) +- The HTML and plain text representations of :class:`DataFrame` now show + a truncated view of the table once it exceeds a certain size, rather + than switching to the short info view (:issue:`4886`, :issue:`5550`). + This makes the representation more consistent as small DataFrames get + larger. To get the info view, call :meth:`DataFrame.info`, or restore + the old behaviour with ``set_option('display.large_repr', 'info')``. .. _whatsnew_0130.experimental: diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 5502dc94e24c1..b7ec76522b60c 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -166,13 +166,19 @@ pc_max_info_rows_doc = """ : int or None - max_info_rows is the maximum number of rows for which a frame will - perform a null check on its columns when repr'ing To a console. - The default is 1,000,000 rows. So, if a DataFrame has more - 1,000,000 rows there will be no null check performed on the - columns and thus the representation will take much less time to - display in an interactive session. A value of None means always - perform a null check when repr'ing. + Deprecated. +""" + +pc_max_info_rows_deprecation_warning = """\ +max_info_rows has been deprecated, as reprs no longer use the info view. +""" + +pc_large_repr_doc = """ +: 'truncate'/'info' + + For DataFrames exceeding max_rows/max_cols, the repr (and HTML repr) can + show a truncated table (the default from 0.13), or switch to the view from + df.info() (the behaviour in earlier versions of pandas). """ pc_mpl_style_doc = """ @@ -220,6 +226,8 @@ def mpl_style_cb(key): cf.register_option('max_colwidth', 50, max_colwidth_doc, validator=is_int) cf.register_option('max_columns', 20, pc_max_cols_doc, validator=is_instance_factory([type(None), int])) + cf.register_option('large_repr', 'truncate', pc_large_repr_doc, + validator=is_one_of_factory(['truncate', 'info'])) cf.register_option('max_info_columns', 100, pc_max_info_cols_doc, validator=is_int) cf.register_option('colheader_justify', 'right', colheader_justify_doc, @@ -258,6 +266,9 @@ def mpl_style_cb(key): msg=pc_height_deprecation_warning, rkey='display.max_rows') +cf.deprecate_option('display.max_info_rows', + msg=pc_max_info_rows_deprecation_warning) + tc_sim_interactive_doc = """ : boolean Whether to simulate interactive mode for purposes of testing diff --git a/pandas/core/format.py b/pandas/core/format.py index 7354600c78c67..49f64088b44ab 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -263,7 +263,8 @@ class DataFrameFormatter(TableFormatter): def __init__(self, frame, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, justify=None, float_format=None, sparsify=None, - index_names=True, line_width=None, **kwds): + index_names=True, line_width=None, max_rows=None, max_cols=None, + **kwds): self.frame = frame self.buf = buf if buf is not None else StringIO() self.show_index_names = index_names @@ -280,6 +281,8 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, self.header = header self.index = index self.line_width = line_width + self.max_rows = max_rows + self.max_cols = max_cols if justify is None: self.justify = get_option("display.colheader_justify") @@ -303,12 +306,19 @@ def _to_str_columns(self): str_index = self._get_formatted_index() str_columns = self._get_formatted_column_labels() - stringified = [] - _strlen = _strlen_func() - for i, c in enumerate(self.columns): - if self.header: + cols_to_show = self.columns[:self.max_cols] + truncate_h = self.max_cols and (len(self.columns) > self.max_cols) + truncate_v = self.max_rows and (len(self.frame) > self.max_rows) + if truncate_h: + cols_to_show = self.columns[:self.max_cols] + else: + cols_to_show = self.columns + + if self.header: + stringified = [] + for i, c in enumerate(cols_to_show): fmt_values = self._format_col(i) cheader = str_columns[i] @@ -316,7 +326,7 @@ def _to_str_columns(self): *(_strlen(x) for x in cheader)) fmt_values = _make_fixed_width(fmt_values, self.justify, - minimum=max_colwidth) + minimum=max_colwidth, truncated=truncate_v) max_len = max(np.max([_strlen(x) for x in fmt_values]), max_colwidth) @@ -326,14 +336,17 @@ def _to_str_columns(self): cheader = [x.rjust(max_len) for x in cheader] stringified.append(cheader + fmt_values) - else: - stringified = [_make_fixed_width(self._format_col(i), - self.justify) - for i, c in enumerate(self.columns)] + else: + stringified = [_make_fixed_width(self._format_col(i), self.justify, + truncated=truncate_v) + for i, c in enumerate(cols_to_show)] strcols = stringified if self.index: strcols.insert(0, str_index) + if truncate_h: + strcols.append(([''] * len(str_columns[-1])) \ + + (['...'] * min(len(self.frame), self.max_rows)) ) return strcols @@ -378,6 +391,11 @@ def _join_multiline(self, *strcols): col_bins = _binify(col_widths, lwidth) nbins = len(col_bins) + if self.max_rows and len(self.frame) > self.max_rows: + nrows = self.max_rows + 1 + else: + nrows = len(self.frame) + str_lst = [] st = 0 for i, ed in enumerate(col_bins): @@ -385,9 +403,9 @@ def _join_multiline(self, *strcols): row.insert(0, idx) if nbins > 1: if ed <= len(strcols) and i < nbins - 1: - row.append([' \\'] + [' '] * (len(self.frame) - 1)) + row.append([' \\'] + [' '] * (nrows - 1)) else: - row.append([' '] * len(self.frame)) + row.append([' '] * nrows) str_lst.append(adjoin(adjoin_width, *row)) st = ed @@ -458,8 +476,8 @@ def write(buf, frame, column_format, strcols): def _format_col(self, i): formatter = self._get_formatter(i) - return format_array(self.frame.icol(i).get_values(), formatter, - float_format=self.float_format, + return format_array(self.frame.icol(i)[:self.max_rows].get_values(), + formatter, float_format=self.float_format, na_rep=self.na_rep, space=self.col_space) @@ -467,7 +485,9 @@ def to_html(self, classes=None): """ Render a DataFrame to a html table. """ - html_renderer = HTMLFormatter(self, classes=classes) + html_renderer = HTMLFormatter(self, classes=classes, + max_rows=self.max_rows, + max_cols=self.max_cols) if hasattr(self.buf, 'write'): html_renderer.write_result(self.buf) elif isinstance(self.buf, compat.string_types): @@ -483,8 +503,13 @@ def _get_formatted_column_labels(self): def is_numeric_dtype(dtype): return issubclass(dtype.type, np.number) - if isinstance(self.columns, MultiIndex): - fmt_columns = self.columns.format(sparsify=False, adjoin=False) + if self.max_cols: + columns = self.columns[:self.max_cols] + else: + columns = self.columns + + if isinstance(columns, MultiIndex): + fmt_columns = columns.format(sparsify=False, adjoin=False) fmt_columns = lzip(*fmt_columns) dtypes = self.frame.dtypes.values need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes))) @@ -496,14 +521,14 @@ def is_numeric_dtype(dtype): str_columns = [list(x) for x in zip(*str_columns)] else: - fmt_columns = self.columns.format() + fmt_columns = columns.format() dtypes = self.frame.dtypes need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes))) str_columns = [[' ' + x if not self._get_formatter(i) and need_leadsp[x] else x] for i, (col, x) in - enumerate(zip(self.columns, fmt_columns))] + enumerate(zip(columns, fmt_columns))] if self.show_index_names and self.has_index_names: for x in str_columns: @@ -521,7 +546,10 @@ def has_column_names(self): def _get_formatted_index(self): # Note: this is only used by to_string(), not by to_html(). - index = self.frame.index + if self.max_rows: + index = self.frame.index[:self.max_rows] + else: + index = self.frame.index columns = self.frame.columns show_index_names = self.show_index_names and self.has_index_names @@ -564,7 +592,7 @@ class HTMLFormatter(TableFormatter): indent_delta = 2 - def __init__(self, formatter, classes=None): + def __init__(self, formatter, classes=None, max_rows=None, max_cols=None): self.fmt = formatter self.classes = classes @@ -574,6 +602,9 @@ def __init__(self, formatter, classes=None): self.bold_rows = self.fmt.kwds.get('bold_rows', False) self.escape = self.fmt.kwds.get('escape', True) + self.max_rows = max_rows or len(self.fmt.frame) + self.max_cols = max_cols or len(self.fmt.columns) + def write(self, s, indent=0): rs = com.pprint_thing(s) self.elements.append(' ' * indent + rs) @@ -680,7 +711,9 @@ def _column_header(): else: if self.fmt.index: row.append(self.columns.name or '') - row.extend(self.columns) + row.extend(self.columns[:self.max_cols]) + if len(self.columns) > self.max_cols: + row.append('') return row self.write('', indent) @@ -695,6 +728,13 @@ def _column_header(): sentinal = com.sentinal_factory() levels = self.columns.format(sparsify=sentinal, adjoin=False, names=False) + # Truncate column names + if len(levels[0]) > self.max_cols: + levels = [lev[:self.max_cols] for lev in levels] + truncated = True + else: + truncated = False + level_lengths = _get_level_lengths(levels, sentinal) row_levels = self.frame.index.nlevels @@ -716,6 +756,9 @@ def _column_header(): j += 1 row.append(v) + if truncated: + row.append('') + self.write_tr(row, indent, self.indent_delta, tags=tags, header=True) else: @@ -726,8 +769,8 @@ def _column_header(): align=align) if self.fmt.has_index_names: - row = [x if x is not None else '' - for x in self.frame.index.names] + [''] * len(self.columns) + row = [x if x is not None else '' for x in self.frame.index.names] \ + + [''] * min(len(self.columns), self.max_cols) self.write_tr(row, indent, self.indent_delta, header=True) indent -= self.indent_delta @@ -740,15 +783,16 @@ def _write_body(self, indent): indent += self.indent_delta fmt_values = {} - for i in range(len(self.columns)): + for i in range(min(len(self.columns), self.max_cols)): fmt_values[i] = self.fmt._format_col(i) + truncated = (len(self.columns) > self.max_cols) # write values if self.fmt.index: if isinstance(self.frame.index, MultiIndex): self._write_hierarchical_rows(fmt_values, indent) else: - self._write_regular_rows(fmt_values, indent) + self._write_regular_rows(fmt_values, indent, truncated) else: for i in range(len(self.frame)): row = [fmt_values[j][i] for j in range(len(self.columns))] @@ -760,8 +804,8 @@ def _write_body(self, indent): return indent - def _write_regular_rows(self, fmt_values, indent): - ncols = len(self.columns) + def _write_regular_rows(self, fmt_values, indent, truncated): + ncols = min(len(self.columns), self.max_cols) fmt = self.fmt._get_formatter('__index__') if fmt is not None: @@ -769,10 +813,17 @@ def _write_regular_rows(self, fmt_values, indent): else: index_values = self.frame.index.format() - for i in range(len(self.frame)): + for i in range(min(len(self.frame), self.max_rows)): row = [] row.append(index_values[i]) row.extend(fmt_values[j][i] for j in range(ncols)) + if truncated: + row.append('...') + self.write_tr(row, indent, self.indent_delta, tags=None, + nindex_levels=1) + + if len(self.frame) > self.max_rows: + row = [''] + (['...'] * ncols) self.write_tr(row, indent, self.indent_delta, tags=None, nindex_levels=1) @@ -780,7 +831,8 @@ def _write_hierarchical_rows(self, fmt_values, indent): template = 'rowspan="%d" valign="top"' frame = self.frame - ncols = len(self.columns) + ncols = min(len(self.columns), self.max_cols) + truncate = (len(frame) > self.max_rows) idx_values = frame.index.format(sparsify=False, adjoin=False, names=False) @@ -792,9 +844,13 @@ def _write_hierarchical_rows(self, fmt_values, indent): sentinal = com.sentinal_factory() levels = frame.index.format(sparsify=sentinal, adjoin=False, names=False) + # Truncate row names + if truncate: + levels = [lev[:self.max_rows] for lev in levels] + level_lengths = _get_level_lengths(levels, sentinal) - for i in range(len(frame)): + for i in range(min(len(frame), self.max_rows)): row = [] tags = {} @@ -825,6 +881,11 @@ def _write_hierarchical_rows(self, fmt_values, indent): self.write_tr(row, indent, self.indent_delta, tags=None, nindex_levels=frame.index.nlevels) + # Truncation markers (...) + if truncate: + row = ([''] * frame.index.nlevels) + (['...'] * ncols) + self.write_tr(row, indent, self.indent_delta, tags=None) + def _get_level_lengths(levels, sentinal=''): from itertools import groupby @@ -1708,7 +1769,7 @@ def _format_timedelta64(x): return lib.repr_timedelta64(x) -def _make_fixed_width(strings, justify='right', minimum=None): +def _make_fixed_width(strings, justify='right', minimum=None, truncated=False): if len(strings) == 0: return strings @@ -1737,7 +1798,12 @@ def just(x): return justfunc(x, eff_len) - return [just(x) for x in strings] + result = [just(x) for x in strings] + + if truncated: + result.append(justfunc('...'[:max_len], max_len)) + + return result def _trim_zeros(str_floats, na_rep='NaN'): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e1dafc60e64d8..dca6c2250868b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -429,6 +429,12 @@ def _repr_fits_horizontal_(self, ignore_width=False): return repr_width < width + def _info_repr(self): + """True if the repr should show the info view.""" + info_repr_option = (get_option("display.large_repr") == "info") + return info_repr_option and not \ + (self._repr_fits_horizontal_() and self._repr_fits_vertical_()) + def __unicode__(self): """ Return a string representation for a particular DataFrame @@ -437,30 +443,18 @@ def __unicode__(self): py2/py3. """ buf = StringIO(u("")) - fits_vertical = self._repr_fits_vertical_() - fits_horizontal = False - if fits_vertical: - # This needs to compute the entire repr - # so don't do it unless rownum is bounded - fits_horizontal = self._repr_fits_horizontal_() - - if fits_vertical and fits_horizontal: - self.to_string(buf=buf) - else: + if self._info_repr(): + self.info(buf=buf) + return buf.getvalue() + + max_rows = get_option("display.max_rows") + max_cols = get_option("display.max_columns") + if get_option("display.expand_frame_repr"): width, _ = fmt.get_console_size() - max_columns = get_option("display.max_columns") - expand_repr = get_option("display.expand_frame_repr") - # within max_cols and max_rows, but cols exceed width - # of terminal, then use expand_repr - if (fits_vertical and - expand_repr and - len(self.columns) <= max_columns): - self.to_string(buf=buf, line_width=width) - else: - max_info_rows = get_option('display.max_info_rows') - verbose = (max_info_rows is None or - self.shape[0] <= max_info_rows) - self.info(buf=buf, verbose=verbose) + else: + width = None + self.to_string(buf=buf, max_rows=max_rows, max_cols=max_cols, + line_width=width) return buf.getvalue() @@ -480,28 +474,19 @@ def _repr_html_(self): if com.in_qtconsole(): raise ValueError('Disable HTML output in QtConsole') + if self._info_repr(): + buf = StringIO(u("")) + self.info(buf=buf) + return '
' + buf.getvalue() + '
' + if get_option("display.notebook_repr_html"): - fits_vertical = self._repr_fits_vertical_() - fits_horizontal = False - if fits_vertical: - fits_horizontal = self._repr_fits_horizontal_( - ignore_width=ipnbh) - - if fits_horizontal and fits_vertical: - return ('
\n' + - self.to_html() + '\n
') - else: - buf = StringIO(u("")) - max_info_rows = get_option('display.max_info_rows') - verbose = (max_info_rows is None or - self.shape[0] <= max_info_rows) - self.info(buf=buf, verbose=verbose) - info = buf.getvalue() - info = info.replace('&', r'&') - info = info.replace('<', r'<') - info = info.replace('>', r'>') - return ('
\n' + info + '\n
') + max_rows = get_option("display.max_rows") + max_cols = get_option("display.max_columns") + + return ('
\n' + + self.to_html(max_rows=max_rows, max_cols=max_cols) \ + + '\n
') else: return None @@ -1269,7 +1254,7 @@ def to_string(self, buf=None, columns=None, col_space=None, colSpace=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, nanRep=None, index_names=True, justify=None, force_unicode=None, - line_width=None): + line_width=None, max_rows=None, max_cols=None): """ Render a DataFrame to a console-friendly tabular output. """ @@ -1295,7 +1280,8 @@ def to_string(self, buf=None, columns=None, col_space=None, colSpace=None, justify=justify, index_names=index_names, header=header, index=index, - line_width=line_width) + line_width=line_width, + max_rows=max_rows, max_cols=max_cols) formatter.to_string() if buf is None: @@ -1307,7 +1293,7 @@ def to_html(self, buf=None, columns=None, col_space=None, colSpace=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, justify=None, force_unicode=None, bold_rows=True, - classes=None, escape=True): + classes=None, escape=True, max_rows=None, max_cols=None): """ Render a DataFrame as an HTML table. @@ -1318,7 +1304,12 @@ def to_html(self, buf=None, columns=None, col_space=None, colSpace=None, classes : str or list or tuple, default None CSS class(es) to apply to the resulting html table escape : boolean, default True - Convert the characters <, >, and & to HTML-safe sequences. + Convert the characters <, >, and & to HTML-safe sequences.= + max_rows : int, optional + Maximum number of rows to show before truncating. If None, show all. + max_cols : int, optional + Maximum number of columns to show before truncating. If None, show + all. """ @@ -1340,7 +1331,8 @@ def to_html(self, buf=None, columns=None, col_space=None, colSpace=None, index_names=index_names, header=header, index=index, bold_rows=bold_rows, - escape=escape) + escape=escape, + max_rows=max_rows, max_cols=max_cols) formatter.to_html(classes=classes) if buf is None: @@ -1386,7 +1378,7 @@ def to_latex(self, buf=None, columns=None, col_space=None, colSpace=None, def info(self, verbose=True, buf=None, max_cols=None): """ - Concise summary of a DataFrame, used in __repr__ when very large. + Concise summary of a DataFrame. Parameters ---------- diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index d9bf8adb71298..e5cd8af31ae1b 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -3,6 +3,7 @@ from pandas.compat import range, zip, lrange, StringIO, PY3, lzip, u import pandas.compat as compat +import itertools import os import sys import unittest @@ -34,6 +35,20 @@ def has_info_repr(df): r = repr(df) return r.split('\n')[0].startswith(" Date: Fri, 22 Nov 2013 18:33:32 -0800 Subject: [PATCH 2/5] Add repr footer showing total number of rows and columns --- pandas/core/format.py | 15 ++++++++++++++- pandas/core/frame.py | 17 +++++++++++------ pandas/tests/test_format.py | 18 +++++++++++------- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/pandas/core/format.py b/pandas/core/format.py index 49f64088b44ab..1ca68b8d47e09 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -1,3 +1,4 @@ +#coding: utf-8 from __future__ import print_function # pylint: disable=W0141 @@ -264,7 +265,7 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, justify=None, float_format=None, sparsify=None, index_names=True, line_width=None, max_rows=None, max_cols=None, - **kwds): + show_dimensions=False, **kwds): self.frame = frame self.buf = buf if buf is not None else StringIO() self.show_index_names = index_names @@ -283,6 +284,7 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, self.line_width = line_width self.max_rows = max_rows self.max_cols = max_cols + self.show_dimensions = show_dimensions if justify is None: self.justify = get_option("display.colheader_justify") @@ -311,6 +313,7 @@ def _to_str_columns(self): cols_to_show = self.columns[:self.max_cols] truncate_h = self.max_cols and (len(self.columns) > self.max_cols) truncate_v = self.max_rows and (len(self.frame) > self.max_rows) + self.truncated_v = truncate_v if truncate_h: cols_to_show = self.columns[:self.max_cols] else: @@ -377,6 +380,10 @@ def to_string(self, force_unicode=None): self.buf.writelines(text) + if self.show_dimensions: + self.buf.write("\n\n[%d rows x %d columns]" \ + % (len(frame), len(frame.columns)) ) + def _join_multiline(self, *strcols): lwidth = self.line_width adjoin_width = 1 @@ -671,6 +678,8 @@ def write_result(self, buf): 'not %s') % type(self.classes)) _classes.extend(self.classes) + + self.write('' % ' '.join(_classes), indent) @@ -687,6 +696,10 @@ def write_result(self, buf): indent = self._write_body(indent) self.write('
', indent) + if self.fmt.show_dimensions: + by = chr(215) if compat.PY3 else unichr(215) # × + self.write(u('

%d rows %s %d columns

') % + (len(frame), by, len(frame.columns)) ) _put_lines(buf, self.elements) def _write_header(self, indent): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index dca6c2250868b..88cf898d354e9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -454,7 +454,7 @@ def __unicode__(self): else: width = None self.to_string(buf=buf, max_rows=max_rows, max_cols=max_cols, - line_width=width) + line_width=width, show_dimensions=True) return buf.getvalue() @@ -485,7 +485,8 @@ def _repr_html_(self): return ('
\n' + - self.to_html(max_rows=max_rows, max_cols=max_cols) \ + self.to_html(max_rows=max_rows, max_cols=max_cols, + show_dimensions=True) \ + '\n
') else: return None @@ -1254,7 +1255,8 @@ def to_string(self, buf=None, columns=None, col_space=None, colSpace=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, nanRep=None, index_names=True, justify=None, force_unicode=None, - line_width=None, max_rows=None, max_cols=None): + line_width=None, max_rows=None, max_cols=None, + show_dimensions=False): """ Render a DataFrame to a console-friendly tabular output. """ @@ -1281,7 +1283,8 @@ def to_string(self, buf=None, columns=None, col_space=None, colSpace=None, index_names=index_names, header=header, index=index, line_width=line_width, - max_rows=max_rows, max_cols=max_cols) + max_rows=max_rows, max_cols=max_cols, + show_dimensions=show_dimensions) formatter.to_string() if buf is None: @@ -1293,7 +1296,8 @@ def to_html(self, buf=None, columns=None, col_space=None, colSpace=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, justify=None, force_unicode=None, bold_rows=True, - classes=None, escape=True, max_rows=None, max_cols=None): + classes=None, escape=True, max_rows=None, max_cols=None, + show_dimensions=False): """ Render a DataFrame as an HTML table. @@ -1332,7 +1336,8 @@ def to_html(self, buf=None, columns=None, col_space=None, colSpace=None, header=header, index=index, bold_rows=bold_rows, escape=escape, - max_rows=max_rows, max_cols=max_cols) + max_rows=max_rows, max_cols=max_cols, + show_dimensions=show_dimensions) formatter.to_html(classes=classes) if buf is None: diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index e5cd8af31ae1b..8e23176e9d005 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -41,7 +41,7 @@ def has_horizontally_truncated_repr(df): def has_vertically_truncated_repr(df): r = repr(df) - return '..' in r.splitlines()[-1] + return '..' in r.splitlines()[-3] def has_truncated_repr(df): return has_horizontally_truncated_repr(df) or has_vertically_truncated_repr(df) @@ -128,16 +128,16 @@ def test_repr_truncation(self): def test_repr_chop_threshold(self): df = DataFrame([[0.1, 0.5],[0.5, -0.1]]) pd.reset_option("display.chop_threshold") # default None - self.assertEqual(repr(df), ' 0 1\n0 0.1 0.5\n1 0.5 -0.1') + self.assertEqual(repr(df), ' 0 1\n0 0.1 0.5\n1 0.5 -0.1\n\n[2 rows x 2 columns]') with option_context("display.chop_threshold", 0.2 ): - self.assertEqual(repr(df), ' 0 1\n0 0.0 0.5\n1 0.5 0.0') + self.assertEqual(repr(df), ' 0 1\n0 0.0 0.5\n1 0.5 0.0\n\n[2 rows x 2 columns]') with option_context("display.chop_threshold", 0.6 ): - self.assertEqual(repr(df), ' 0 1\n0 0 0\n1 0 0') + self.assertEqual(repr(df), ' 0 1\n0 0 0\n1 0 0\n\n[2 rows x 2 columns]') with option_context("display.chop_threshold", None ): - self.assertEqual(repr(df), ' 0 1\n0 0.1 0.5\n1 0.5 -0.1') + self.assertEqual(repr(df), ' 0 1\n0 0.1 0.5\n1 0.5 -0.1\n\n[2 rows x 2 columns]') def test_repr_obeys_max_seq_limit(self): import pandas.core.common as com @@ -775,6 +775,8 @@ def test_wide_repr(self): df = DataFrame([col(max_cols-1, 25) for _ in range(10)]) set_option('display.expand_frame_repr', False) rep_str = repr(df) + print(rep_str) + assert "10 rows x %d columns" % (max_cols-1) in rep_str set_option('display.expand_frame_repr', True) wide_repr = repr(df) self.assert_(rep_str != wide_repr) @@ -790,7 +792,7 @@ def test_wide_repr_wide_columns(self): df = DataFrame(randn(5, 3), columns=['a' * 90, 'b' * 90, 'c' * 90]) rep_str = repr(df) - self.assert_(len(rep_str.splitlines()) == 20) + self.assertEqual(len(rep_str.splitlines()), 22) def test_wide_repr_named(self): with option_context('mode.sim_interactive', True): @@ -1450,6 +1452,8 @@ def test_repr_html_long(self): long_repr = df._repr_html_() assert '...' in long_repr assert str(40 + h) not in long_repr + assert u('%d rows ') % h in long_repr + assert u('2 columns') in long_repr def test_repr_html_long_multiindex(self): max_rows = get_option('display.max_rows') @@ -1565,7 +1569,7 @@ def test_float_trim_zeros(self): vals = [2.08430917305e+10, 3.52205017305e+10, 2.30674817305e+10, 2.03954217305e+10, 5.59897817305e+10] skip = True - for line in repr(DataFrame({'A': vals})).split('\n'): + for line in repr(DataFrame({'A': vals})).split('\n')[:-2]: if line.startswith('dtype:'): continue if _three_digit_exp(): From ba61b8f4127c0f52470df465494eb14cc2489aea Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Sat, 23 Nov 2013 13:59:47 -0800 Subject: [PATCH 3/5] Use to_string() to format DataFrames for clipboard. Using str() has some unhelpful properties, especially when DataFrames are large enough that the default repr does not show them in their entirety. The defaults for to_string() are more helpful for the clipboard. Closes gh-5346 --- pandas/io/clipboard.py | 9 +++++++-- pandas/io/tests/test_clipboard.py | 6 ++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/pandas/io/clipboard.py b/pandas/io/clipboard.py index 51142c9f52655..13135d255d9e2 100644 --- a/pandas/io/clipboard.py +++ b/pandas/io/clipboard.py @@ -1,5 +1,5 @@ """ io on the clipboard """ -from pandas import compat, get_option +from pandas import compat, get_option, DataFrame from pandas.compat import StringIO def read_clipboard(**kwargs): # pragma: no cover @@ -64,5 +64,10 @@ def to_clipboard(obj, excel=None, sep=None, **kwargs): # pragma: no cover except: pass - clipboard_set(str(obj)) + if isinstance(obj, DataFrame): + # str(df) has various unhelpful defaults, like truncation + objstr = obj.to_string() + else: + objstr = str(obj) + clipboard_set(objstr) diff --git a/pandas/io/tests/test_clipboard.py b/pandas/io/tests/test_clipboard.py index 45b479ebb589e..6ee0afa1c8c07 100644 --- a/pandas/io/tests/test_clipboard.py +++ b/pandas/io/tests/test_clipboard.py @@ -7,6 +7,7 @@ from pandas import DataFrame from pandas import read_clipboard +from pandas import get_option from pandas.util import testing as tm from pandas.util.testing import makeCustomDataframe as mkdf @@ -33,6 +34,11 @@ def setUpClass(cls): cls.data['mixed'] = DataFrame({'a': np.arange(1.0, 6.0) + 0.01, 'b': np.arange(1, 6), 'c': list('abcde')}) + # Test GH-5346 + max_rows = get_option('display.max_rows') + cls.data['longdf'] = mkdf(max_rows+1, 3, data_gen_f=lambda *args: randint(2), + c_idx_type='s', r_idx_type='i', + c_idx_names=[None], r_idx_names=[None]) cls.data_types = list(cls.data.keys()) @classmethod From c42b79786716b4889c7ed264f086543036a1d145 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Mon, 25 Nov 2013 10:54:06 -0800 Subject: [PATCH 4/5] Update FAQ entry on controlling repr --- doc/source/faq.rst | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/doc/source/faq.rst b/doc/source/faq.rst index e5312e241ce47..21d581f12c53f 100644 --- a/doc/source/faq.rst +++ b/doc/source/faq.rst @@ -36,21 +36,25 @@ horizontal scrolling, auto-detection of width/height. To appropriately address all these environments, the display behavior is controlled by several options, which you're encouraged to tweak to suit your setup. -As of 0.12, these are the relevant options, all under the `display` namespace, -(e.g. display.width, etc'): +As of 0.13, these are the relevant options, all under the `display` namespace, +(e.g. ``display.width``, etc.): - notebook_repr_html: if True, IPython frontends with HTML support will display dataframes as HTML tables when possible. -- expand_repr (default True): when the frame width cannot fit within the screen, - the output will be broken into multiple pages to accomedate. This applies to - textual (as opposed to HTML) display only. -- max_columns: max dataframe columns to display. a wider frame will trigger - a summary view, unless `expand_repr` is True and HTML output is disabled. -- max_rows: max dataframe rows display. a longer frame will trigger a summary view. -- width: width of display screen in characters, used to determine the width of lines - when expand_repr is active, Setting this to None will trigger auto-detection of terminal - width, this only works for proper terminals, not IPython frontends such as ipnb. - width is ignored in IPython notebook, since the browser provides horizontal scrolling. +- large_repr (default 'truncate'): when a :class:`~pandas.DataFrame` + exceeds max_columns or max_rows, it can be displayed either as a + truncated table or, with this set to 'info', as a short summary view. +- max_columns (default 20): max dataframe columns to display. +- max_rows (default 60): max dataframe rows display. + +Two additional options only apply to displaying DataFrames in terminals, +not to the HTML view: + +- expand_repr (default True): when the frame width cannot fit within + the screen, the output will be broken into multiple pages. +- width: width of display screen in characters, used to determine the + width of lines when expand_repr is active. Setting this to None will + trigger auto-detection of terminal width. IPython users can use the IPython startup file to import pandas and set these options automatically when starting up. From 03a81c5376d6d63890a9eb8e6a761ba5c4ad7dda Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Mon, 25 Nov 2013 16:08:58 -0800 Subject: [PATCH 5/5] Explain repr changes more prominently in release notes --- doc/source/v0.13.0.txt | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index 8bf4ee0dce648..a39e415abe519 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -375,6 +375,22 @@ HDFStore API Changes via the option ``io.hdf.dropna_table`` (:issue:`4625`) - pass thru store creation arguments; can be used to support in-memory stores +DataFrame repr Changes +~~~~~~~~~~~~~~~~~~~~~~ + +The HTML and plain text representations of :class:`DataFrame` now show +a truncated view of the table once it exceeds a certain size, rather +than switching to the short info view (:issue:`4886`, :issue:`5550`). +This makes the representation more consistent as small DataFrames get +larger. + +.. image:: _static/df_repr_truncated.png + :alt: Truncated HTML representation of a DataFrame + +To get the info view, call :meth:`DataFrame.info`. If you prefer the +info view as the repr for large DataFrames, you can set this by running +``set_option('display.large_repr', 'info')``. + Enhancements ~~~~~~~~~~~~ @@ -607,12 +623,6 @@ Enhancements output datetime objects should be formatted. Datetimes encountered in the index, columns, and values will all have this formatting applied. (:issue:`4313`) - ``DataFrame.plot`` will scatter plot x versus y by passing ``kind='scatter'`` (:issue:`2215`) -- The HTML and plain text representations of :class:`DataFrame` now show - a truncated view of the table once it exceeds a certain size, rather - than switching to the short info view (:issue:`4886`, :issue:`5550`). - This makes the representation more consistent as small DataFrames get - larger. To get the info view, call :meth:`DataFrame.info`, or restore - the old behaviour with ``set_option('display.large_repr', 'info')``. .. _whatsnew_0130.experimental: