Skip to content
Closed
5 changes: 5 additions & 0 deletions doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ Other enhancements
- ``Series`` gained an ``is_unique`` attribute (:issue:`11946`)
- ``DataFrame.quantile`` and ``Series.quantile`` now accept ``interpolation`` keyword (:issue:`10174`).
- ``DataFrame.select_dtypes`` now allows the ``np.float16`` typecode (:issue:`11990`)
- ``DataFrame.to_sql `` now allows a single value as the SQL type for all columns (:issue:`11886`).

.. _whatsnew_0180.enhancements.rounding:

Expand Down Expand Up @@ -303,6 +304,9 @@ Other API Changes

- ``.memory_usage`` now includes values in the index, as does memory_usage in ``.info`` (:issue:`11597`)

- ``DataFrame.to_latex()`` now supports non-ascii encodings (eg utf-8) in Python 2 with the parameter ``encoding`` (:issue:`7061`)


Changes to eval
^^^^^^^^^^^^^^^

Expand Down Expand Up @@ -463,6 +467,7 @@ Bug Fixes
- Bug in ``pd.read_clipboard`` and ``pd.to_clipboard`` functions not supporting Unicode; upgrade included ``pyperclip`` to v1.5.15 (:issue:`9263`)
- Bug in ``DataFrame.query`` containing an assignment (:issue:`8664`)

- Bug in ``from_msgpack`` where ``__contains__()`` fails for columns of the unpacked ``DataFrame``, if the ``DataFrame`` has object columns. (:issue: `11880`)


- Bug in timezone info lost when broadcasting scalar datetime to ``DataFrame`` (:issue:`11682`)
Expand Down
217 changes: 125 additions & 92 deletions pandas/core/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,105 +619,20 @@ def _join_multiline(self, *strcols):
st = ed
return '\n\n'.join(str_lst)

def to_latex(self, column_format=None, longtable=False):
def to_latex(self, column_format=None, longtable=False, encoding=None):
"""
Render a DataFrame to a LaTeX tabular/longtable environment output.
"""
self.escape = self.kwds.get('escape', True)

def get_col_type(dtype):
if issubclass(dtype.type, np.number):
return 'r'
else:
return 'l'

frame = self.frame

if len(frame.columns) == 0 or len(frame.index) == 0:
info_line = (u('Empty %s\nColumns: %s\nIndex: %s')
% (type(self.frame).__name__,
frame.columns, frame.index))
strcols = [[info_line]]
else:
strcols = self._to_str_columns()

if self.index and isinstance(self.frame.index, MultiIndex):
clevels = self.frame.columns.nlevels
strcols.pop(0)
name = any(self.frame.index.names)
for i, lev in enumerate(self.frame.index.levels):
lev2 = lev.format()
blank = ' ' * len(lev2[0])
lev3 = [blank] * clevels
if name:
lev3.append(lev.name)
for level_idx, group in itertools.groupby(
self.frame.index.labels[i]):
count = len(list(group))
lev3.extend([lev2[level_idx]] + [blank] * (count - 1))
strcols.insert(i, lev3)

if column_format is None:
dtypes = self.frame.dtypes._values
column_format = ''.join(map(get_col_type, dtypes))
if self.index:
index_format = 'l' * self.frame.index.nlevels
column_format = index_format + column_format
elif not isinstance(column_format,
compat.string_types): # pragma: no cover
raise AssertionError('column_format must be str or unicode, not %s'
% type(column_format))

def write(buf, frame, column_format, strcols, longtable=False):
if not longtable:
buf.write('\\begin{tabular}{%s}\n' % column_format)
buf.write('\\toprule\n')
else:
buf.write('\\begin{longtable}{%s}\n' % column_format)
buf.write('\\toprule\n')

nlevels = frame.columns.nlevels
if any(frame.index.names):
nlevels += 1
for i, row in enumerate(zip(*strcols)):
if i == nlevels and self.header:
buf.write('\\midrule\n') # End of header
if longtable:
buf.write('\\endhead\n')
buf.write('\\midrule\n')
buf.write('\\multicolumn{3}{r}{{Continued on next '
'page}} \\\\\n')
buf.write('\midrule\n')
buf.write('\endfoot\n\n')
buf.write('\\bottomrule\n')
buf.write('\\endlastfoot\n')
if self.escape:
crow = [(x.replace('\\', '\\textbackslash') # escape backslashes first
.replace('_', '\\_')
.replace('%', '\\%')
.replace('$', '\\$')
.replace('#', '\\#')
.replace('{', '\\{')
.replace('}', '\\}')
.replace('~', '\\textasciitilde')
.replace('^', '\\textasciicircum')
.replace('&', '\\&') if x else '{}') for x in row]
else:
crow = [x if x else '{}' for x in row]
buf.write(' & '.join(crow))
buf.write(' \\\\\n')

if not longtable:
buf.write('\\bottomrule\n')
buf.write('\\end{tabular}\n')
else:
buf.write('\\end{longtable}\n')
latex_renderer = LatexFormatter(self, column_format=column_format,
longtable=longtable)

if hasattr(self.buf, 'write'):
write(self.buf, frame, column_format, strcols, longtable)
latex_renderer.write_result(self.buf)
elif isinstance(self.buf, compat.string_types):
with open(self.buf, 'w') as f:
write(f, frame, column_format, strcols, longtable)
import codecs
with codecs.open(self.buf, 'w', encoding=encoding) as f:
latex_renderer.write_result(f)
else:
raise TypeError('buf is not a file name and it has no write '
'method')
Expand Down Expand Up @@ -851,6 +766,124 @@ def _get_column_name_list(self):
return names


class LatexFormatter(TableFormatter):
""" Used to render a DataFrame to a LaTeX tabular/longtable environment
output.

Parameters
----------
formatter : `DataFrameFormatter`
column_format : str, default None
The columns format as specified in `LaTeX table format
<https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g 'rcl' for 3 columns
longtable : boolean, default False
Use a longtable environment instead of tabular.

See also
--------
HTMLFormatter
"""

def __init__(self, formatter, column_format=None, longtable=False):
self.fmt = formatter
self.frame = self.fmt.frame
self.column_format = column_format
self.longtable = longtable

def write_result(self, buf):
"""
Render a DataFrame to a LaTeX tabular/longtable environment output.
"""

# string representation of the columns
if len(self.frame.columns) == 0 or len(self.frame.index) == 0:
info_line = (u('Empty %s\nColumns: %s\nIndex: %s')
% (type(self.frame).__name__,
self.frame.columns, self.frame.index))
strcols = [[info_line]]
else:
strcols = self.fmt._to_str_columns()

def get_col_type(dtype):
if issubclass(dtype.type, np.number):
return 'r'
else:
return 'l'

if self.fmt.index and isinstance(self.frame.index, MultiIndex):
clevels = self.frame.columns.nlevels
strcols.pop(0)
name = any(self.frame.index.names)
for i, lev in enumerate(self.frame.index.levels):
lev2 = lev.format()
blank = ' ' * len(lev2[0])
lev3 = [blank] * clevels
if name:
lev3.append(lev.name)
for level_idx, group in itertools.groupby(
self.frame.index.labels[i]):
count = len(list(group))
lev3.extend([lev2[level_idx]] + [blank] * (count - 1))
strcols.insert(i, lev3)

column_format = self.column_format
if column_format is None:
dtypes = self.frame.dtypes._values
column_format = ''.join(map(get_col_type, dtypes))
if self.fmt.index:
index_format = 'l' * self.frame.index.nlevels
column_format = index_format + column_format
elif not isinstance(column_format,
compat.string_types): # pragma: no cover
raise AssertionError('column_format must be str or unicode, not %s'
% type(column_format))

if not self.longtable:
buf.write('\\begin{tabular}{%s}\n' % column_format)
buf.write('\\toprule\n')
else:
buf.write('\\begin{longtable}{%s}\n' % column_format)
buf.write('\\toprule\n')

nlevels = self.frame.columns.nlevels
if any(self.frame.index.names):
nlevels += 1
for i, row in enumerate(zip(*strcols)):
if i == nlevels and self.fmt.header:
buf.write('\\midrule\n') # End of header
if self.longtable:
buf.write('\\endhead\n')
buf.write('\\midrule\n')
buf.write('\\multicolumn{3}{r}{{Continued on next '
'page}} \\\\\n')
buf.write('\\midrule\n')
buf.write('\\endfoot\n\n')
buf.write('\\bottomrule\n')
buf.write('\\endlastfoot\n')
if self.fmt.kwds.get('escape', True):
# escape backslashes first
crow = [(x.replace('\\', '\\textbackslash')
.replace('_', '\\_')
.replace('%', '\\%')
.replace('$', '\\$')
.replace('#', '\\#')
.replace('{', '\\{')
.replace('}', '\\}')
.replace('~', '\\textasciitilde')
.replace('^', '\\textasciicircum')
.replace('&', '\\&') if x else '{}') for x in row]
else:
crow = [x if x else '{}' for x in row]
buf.write(' & '.join(crow))
buf.write(' \\\\\n')

if not self.longtable:
buf.write('\\bottomrule\n')
buf.write('\\end{tabular}\n')
else:
buf.write('\\end{longtable}\n')


class HTMLFormatter(TableFormatter):

indent_delta = 2
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1547,7 +1547,7 @@ def to_latex(self, buf=None, columns=None, col_space=None, colSpace=None,
header=True, index=True, na_rep='NaN', formatters=None,
float_format=None, sparsify=None, index_names=True,
bold_rows=True, column_format=None,
longtable=None, escape=None):
longtable=None, escape=None, encoding=None):
"""
Render a DataFrame to a tabular environment table. You can splice
this into a LaTeX document. Requires \\usepackage{booktabs}.
Expand All @@ -1567,7 +1567,8 @@ def to_latex(self, buf=None, columns=None, col_space=None, colSpace=None,
default: True
When set to False prevents from escaping latex special
characters in column names.

encoding : str, default None
Default encoding is ascii in Python 2 and utf-8 in Python 3
"""

if colSpace is not None: # pragma: no cover
Expand All @@ -1589,7 +1590,8 @@ def to_latex(self, buf=None, columns=None, col_space=None, colSpace=None,
sparsify=sparsify,
index_names=index_names,
escape=escape)
formatter.to_latex(column_format=column_format, longtable=longtable)
formatter.to_latex(column_format=column_format, longtable=longtable,
encoding=encoding)

if buf is None:
return formatter.buf.getvalue()
Expand Down
1 change: 1 addition & 0 deletions pandas/core/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -965,6 +965,7 @@ def corr(self, other=None, pairwise=None, **kwargs):
Use a standard estimation bias correction
"""


class EWM(_Rolling):
r"""
Provides exponential weighted functions
Expand Down
4 changes: 2 additions & 2 deletions pandas/hashtable.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ cdef class Int64HashTable(HashTable):
self.table.vals[k] = <Py_ssize_t> values[i]

@cython.boundscheck(False)
def map_locations(self, int64_t[:] values):
def map_locations(self, ndarray[int64_t, ndim=1] values):
cdef:
Py_ssize_t i, n = len(values)
int ret = 0
Expand Down Expand Up @@ -570,7 +570,7 @@ cdef class Float64HashTable(HashTable):
return np.asarray(labels)

@cython.boundscheck(False)
def map_locations(self, float64_t[:] values):
def map_locations(self, ndarray[float64_t, ndim=1] values):
cdef:
Py_ssize_t i, n = len(values)
int ret = 0
Expand Down
25 changes: 18 additions & 7 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from pandas.core.common import isnull
from pandas.core.base import PandasObject
from pandas.core.dtypes import DatetimeTZDtype
from pandas.core.generic import is_dictlike
from pandas.tseries.tools import to_datetime
from pandas.util.decorators import Appender

Expand Down Expand Up @@ -548,9 +549,11 @@ def to_sql(frame, name, con, flavor='sqlite', schema=None, if_exists='fail',
chunksize : int, default None
If not None, then rows will be written in batches of this size at a
time. If None, all rows will be written at once.
dtype : dict of column name to SQL type, default None
dtype : single SQL type or dict of column name to SQL type, default None
Optional specifying the datatype for columns. The SQL type should
be a SQLAlchemy type, or a string for sqlite3 fallback connection.
be a SQLAlchemy type, or a string for sqlite3 fallback connection.
If all columns are of the same type, one single value can be
used.

"""
if if_exists not in ('fail', 'replace', 'append'):
Expand All @@ -563,7 +566,7 @@ def to_sql(frame, name, con, flavor='sqlite', schema=None, if_exists='fail',
elif not isinstance(frame, DataFrame):
raise NotImplementedError("'frame' argument should be either a "
"Series or a DataFrame")

pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index,
index_label=index_label, schema=schema,
chunksize=chunksize, dtype=dtype)
Expand Down Expand Up @@ -1222,11 +1225,15 @@ def to_sql(self, frame, name, if_exists='fail', index=True,
chunksize : int, default None
If not None, then rows will be written in batches of this size at a
time. If None, all rows will be written at once.
dtype : dict of column name to SQL type, default None
dtype : single SQL type or dict of column name to SQL type, default None
Optional specifying the datatype for columns. The SQL type should
be a SQLAlchemy type.
be a SQLAlchemy type. If all columns are of the same type, one
single value can be used.


"""
if dtype and not is_dictlike(dtype):
dtype = { col_name : dtype for col_name in frame }
if dtype is not None:
from sqlalchemy.types import to_instance, TypeEngine
for col, my_type in dtype.items():
Expand Down Expand Up @@ -1618,11 +1625,15 @@ def to_sql(self, frame, name, if_exists='fail', index=True,
chunksize : int, default None
If not None, then rows will be written in batches of this
size at a time. If None, all rows will be written at once.
dtype : dict of column name to SQL type, default None
dtype : single SQL type or dict of column name to SQL type, default None
Optional specifying the datatype for columns. The SQL type should
be a string.
be a string. If all columns are of the same type, one single
value can be used.

"""
if dtype and not is_dictlike(dtype):
dtype = { col_name : dtype for col_name in frame }

if dtype is not None:
for col, my_type in dtype.items():
if not isinstance(my_type, str):
Expand Down
Loading