Skip to content

ENH : Allow to_sql to recognize single sql type #11886 #12053

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
5 changes: 5 additions & 0 deletions doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ Other enhancements
- ``Series`` gained an ``is_unique`` attribute (:issue:`11946`)
- ``DataFrame.quantile`` and ``Series.quantile`` now accept ``interpolation`` keyword (:issue:`10174`).
- ``DataFrame.select_dtypes`` now allows the ``np.float16`` typecode (:issue:`11990`)
- ``DataFrame.to_sql `` now allows a single value as the SQL type for all columns (:issue:`11886`).

.. _whatsnew_0180.enhancements.rounding:

Expand Down Expand Up @@ -303,6 +304,9 @@ Other API Changes

- ``.memory_usage`` now includes values in the index, as does memory_usage in ``.info`` (:issue:`11597`)

- ``DataFrame.to_latex()`` now supports non-ascii encodings (eg utf-8) in Python 2 with the parameter ``encoding`` (:issue:`7061`)


Changes to eval
^^^^^^^^^^^^^^^

Expand Down Expand Up @@ -463,6 +467,7 @@ Bug Fixes
- Bug in ``pd.read_clipboard`` and ``pd.to_clipboard`` functions not supporting Unicode; upgrade included ``pyperclip`` to v1.5.15 (:issue:`9263`)
- Bug in ``DataFrame.query`` containing an assignment (:issue:`8664`)

- Bug in ``from_msgpack`` where ``__contains__()`` fails for columns of the unpacked ``DataFrame``, if the ``DataFrame`` has object columns. (:issue: `11880`)


- Bug in timezone info lost when broadcasting scalar datetime to ``DataFrame`` (:issue:`11682`)
Expand Down
217 changes: 125 additions & 92 deletions pandas/core/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,105 +619,20 @@ def _join_multiline(self, *strcols):
st = ed
return '\n\n'.join(str_lst)

def to_latex(self, column_format=None, longtable=False):
def to_latex(self, column_format=None, longtable=False, encoding=None):
"""
Render a DataFrame to a LaTeX tabular/longtable environment output.
"""
self.escape = self.kwds.get('escape', True)

def get_col_type(dtype):
if issubclass(dtype.type, np.number):
return 'r'
else:
return 'l'

frame = self.frame

if len(frame.columns) == 0 or len(frame.index) == 0:
info_line = (u('Empty %s\nColumns: %s\nIndex: %s')
% (type(self.frame).__name__,
frame.columns, frame.index))
strcols = [[info_line]]
else:
strcols = self._to_str_columns()

if self.index and isinstance(self.frame.index, MultiIndex):
clevels = self.frame.columns.nlevels
strcols.pop(0)
name = any(self.frame.index.names)
for i, lev in enumerate(self.frame.index.levels):
lev2 = lev.format()
blank = ' ' * len(lev2[0])
lev3 = [blank] * clevels
if name:
lev3.append(lev.name)
for level_idx, group in itertools.groupby(
self.frame.index.labels[i]):
count = len(list(group))
lev3.extend([lev2[level_idx]] + [blank] * (count - 1))
strcols.insert(i, lev3)

if column_format is None:
dtypes = self.frame.dtypes._values
column_format = ''.join(map(get_col_type, dtypes))
if self.index:
index_format = 'l' * self.frame.index.nlevels
column_format = index_format + column_format
elif not isinstance(column_format,
compat.string_types): # pragma: no cover
raise AssertionError('column_format must be str or unicode, not %s'
% type(column_format))

def write(buf, frame, column_format, strcols, longtable=False):
if not longtable:
buf.write('\\begin{tabular}{%s}\n' % column_format)
buf.write('\\toprule\n')
else:
buf.write('\\begin{longtable}{%s}\n' % column_format)
buf.write('\\toprule\n')

nlevels = frame.columns.nlevels
if any(frame.index.names):
nlevels += 1
for i, row in enumerate(zip(*strcols)):
if i == nlevels and self.header:
buf.write('\\midrule\n') # End of header
if longtable:
buf.write('\\endhead\n')
buf.write('\\midrule\n')
buf.write('\\multicolumn{3}{r}{{Continued on next '
'page}} \\\\\n')
buf.write('\midrule\n')
buf.write('\endfoot\n\n')
buf.write('\\bottomrule\n')
buf.write('\\endlastfoot\n')
if self.escape:
crow = [(x.replace('\\', '\\textbackslash') # escape backslashes first
.replace('_', '\\_')
.replace('%', '\\%')
.replace('$', '\\$')
.replace('#', '\\#')
.replace('{', '\\{')
.replace('}', '\\}')
.replace('~', '\\textasciitilde')
.replace('^', '\\textasciicircum')
.replace('&', '\\&') if x else '{}') for x in row]
else:
crow = [x if x else '{}' for x in row]
buf.write(' & '.join(crow))
buf.write(' \\\\\n')

if not longtable:
buf.write('\\bottomrule\n')
buf.write('\\end{tabular}\n')
else:
buf.write('\\end{longtable}\n')
latex_renderer = LatexFormatter(self, column_format=column_format,
longtable=longtable)

if hasattr(self.buf, 'write'):
write(self.buf, frame, column_format, strcols, longtable)
latex_renderer.write_result(self.buf)
elif isinstance(self.buf, compat.string_types):
with open(self.buf, 'w') as f:
write(f, frame, column_format, strcols, longtable)
import codecs
with codecs.open(self.buf, 'w', encoding=encoding) as f:
latex_renderer.write_result(f)
else:
raise TypeError('buf is not a file name and it has no write '
'method')
Expand Down Expand Up @@ -851,6 +766,124 @@ def _get_column_name_list(self):
return names


class LatexFormatter(TableFormatter):
""" Used to render a DataFrame to a LaTeX tabular/longtable environment
output.

Parameters
----------
formatter : `DataFrameFormatter`
column_format : str, default None
The columns format as specified in `LaTeX table format
<https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g 'rcl' for 3 columns
longtable : boolean, default False
Use a longtable environment instead of tabular.

See also
--------
HTMLFormatter
"""

def __init__(self, formatter, column_format=None, longtable=False):
self.fmt = formatter
self.frame = self.fmt.frame
self.column_format = column_format
self.longtable = longtable

def write_result(self, buf):
"""
Render a DataFrame to a LaTeX tabular/longtable environment output.
"""

# string representation of the columns
if len(self.frame.columns) == 0 or len(self.frame.index) == 0:
info_line = (u('Empty %s\nColumns: %s\nIndex: %s')
% (type(self.frame).__name__,
self.frame.columns, self.frame.index))
strcols = [[info_line]]
else:
strcols = self.fmt._to_str_columns()

def get_col_type(dtype):
if issubclass(dtype.type, np.number):
return 'r'
else:
return 'l'

if self.fmt.index and isinstance(self.frame.index, MultiIndex):
clevels = self.frame.columns.nlevels
strcols.pop(0)
name = any(self.frame.index.names)
for i, lev in enumerate(self.frame.index.levels):
lev2 = lev.format()
blank = ' ' * len(lev2[0])
lev3 = [blank] * clevels
if name:
lev3.append(lev.name)
for level_idx, group in itertools.groupby(
self.frame.index.labels[i]):
count = len(list(group))
lev3.extend([lev2[level_idx]] + [blank] * (count - 1))
strcols.insert(i, lev3)

column_format = self.column_format
if column_format is None:
dtypes = self.frame.dtypes._values
column_format = ''.join(map(get_col_type, dtypes))
if self.fmt.index:
index_format = 'l' * self.frame.index.nlevels
column_format = index_format + column_format
elif not isinstance(column_format,
compat.string_types): # pragma: no cover
raise AssertionError('column_format must be str or unicode, not %s'
% type(column_format))

if not self.longtable:
buf.write('\\begin{tabular}{%s}\n' % column_format)
buf.write('\\toprule\n')
else:
buf.write('\\begin{longtable}{%s}\n' % column_format)
buf.write('\\toprule\n')

nlevels = self.frame.columns.nlevels
if any(self.frame.index.names):
nlevels += 1
for i, row in enumerate(zip(*strcols)):
if i == nlevels and self.fmt.header:
buf.write('\\midrule\n') # End of header
if self.longtable:
buf.write('\\endhead\n')
buf.write('\\midrule\n')
buf.write('\\multicolumn{3}{r}{{Continued on next '
'page}} \\\\\n')
buf.write('\\midrule\n')
buf.write('\\endfoot\n\n')
buf.write('\\bottomrule\n')
buf.write('\\endlastfoot\n')
if self.fmt.kwds.get('escape', True):
# escape backslashes first
crow = [(x.replace('\\', '\\textbackslash')
.replace('_', '\\_')
.replace('%', '\\%')
.replace('$', '\\$')
.replace('#', '\\#')
.replace('{', '\\{')
.replace('}', '\\}')
.replace('~', '\\textasciitilde')
.replace('^', '\\textasciicircum')
.replace('&', '\\&') if x else '{}') for x in row]
else:
crow = [x if x else '{}' for x in row]
buf.write(' & '.join(crow))
buf.write(' \\\\\n')

if not self.longtable:
buf.write('\\bottomrule\n')
buf.write('\\end{tabular}\n')
else:
buf.write('\\end{longtable}\n')


class HTMLFormatter(TableFormatter):

indent_delta = 2
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1547,7 +1547,7 @@ def to_latex(self, buf=None, columns=None, col_space=None, colSpace=None,
header=True, index=True, na_rep='NaN', formatters=None,
float_format=None, sparsify=None, index_names=True,
bold_rows=True, column_format=None,
longtable=None, escape=None):
longtable=None, escape=None, encoding=None):
"""
Render a DataFrame to a tabular environment table. You can splice
this into a LaTeX document. Requires \\usepackage{booktabs}.
Expand All @@ -1567,7 +1567,8 @@ def to_latex(self, buf=None, columns=None, col_space=None, colSpace=None,
default: True
When set to False prevents from escaping latex special
characters in column names.

encoding : str, default None
Default encoding is ascii in Python 2 and utf-8 in Python 3
"""

if colSpace is not None: # pragma: no cover
Expand All @@ -1589,7 +1590,8 @@ def to_latex(self, buf=None, columns=None, col_space=None, colSpace=None,
sparsify=sparsify,
index_names=index_names,
escape=escape)
formatter.to_latex(column_format=column_format, longtable=longtable)
formatter.to_latex(column_format=column_format, longtable=longtable,
encoding=encoding)

if buf is None:
return formatter.buf.getvalue()
Expand Down
1 change: 1 addition & 0 deletions pandas/core/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -965,6 +965,7 @@ def corr(self, other=None, pairwise=None, **kwargs):
Use a standard estimation bias correction
"""


class EWM(_Rolling):
r"""
Provides exponential weighted functions
Expand Down
4 changes: 2 additions & 2 deletions pandas/hashtable.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ cdef class Int64HashTable(HashTable):
self.table.vals[k] = <Py_ssize_t> values[i]

@cython.boundscheck(False)
def map_locations(self, int64_t[:] values):
def map_locations(self, ndarray[int64_t, ndim=1] values):
cdef:
Py_ssize_t i, n = len(values)
int ret = 0
Expand Down Expand Up @@ -570,7 +570,7 @@ cdef class Float64HashTable(HashTable):
return np.asarray(labels)

@cython.boundscheck(False)
def map_locations(self, float64_t[:] values):
def map_locations(self, ndarray[float64_t, ndim=1] values):
cdef:
Py_ssize_t i, n = len(values)
int ret = 0
Expand Down
25 changes: 18 additions & 7 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from pandas.core.common import isnull
from pandas.core.base import PandasObject
from pandas.core.dtypes import DatetimeTZDtype
from pandas.core.generic import is_dictlike
from pandas.tseries.tools import to_datetime
from pandas.util.decorators import Appender

Expand Down Expand Up @@ -548,9 +549,11 @@ def to_sql(frame, name, con, flavor='sqlite', schema=None, if_exists='fail',
chunksize : int, default None
If not None, then rows will be written in batches of this size at a
time. If None, all rows will be written at once.
dtype : dict of column name to SQL type, default None
dtype : single SQL type or dict of column name to SQL type, default None
Optional specifying the datatype for columns. The SQL type should
be a SQLAlchemy type, or a string for sqlite3 fallback connection.
be a SQLAlchemy type, or a string for sqlite3 fallback connection.
If all columns are of the same type, one single value can be
used.

"""
if if_exists not in ('fail', 'replace', 'append'):
Expand All @@ -563,7 +566,7 @@ def to_sql(frame, name, con, flavor='sqlite', schema=None, if_exists='fail',
elif not isinstance(frame, DataFrame):
raise NotImplementedError("'frame' argument should be either a "
"Series or a DataFrame")

pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index,
index_label=index_label, schema=schema,
chunksize=chunksize, dtype=dtype)
Expand Down Expand Up @@ -1222,11 +1225,15 @@ def to_sql(self, frame, name, if_exists='fail', index=True,
chunksize : int, default None
If not None, then rows will be written in batches of this size at a
time. If None, all rows will be written at once.
dtype : dict of column name to SQL type, default None
dtype : single SQL type or dict of column name to SQL type, default None
Optional specifying the datatype for columns. The SQL type should
be a SQLAlchemy type.
be a SQLAlchemy type. If all columns are of the same type, one
single value can be used.


"""
if dtype and not is_dictlike(dtype):
dtype = { col_name : dtype for col_name in frame }
if dtype is not None:
from sqlalchemy.types import to_instance, TypeEngine
for col, my_type in dtype.items():
Expand Down Expand Up @@ -1618,11 +1625,15 @@ def to_sql(self, frame, name, if_exists='fail', index=True,
chunksize : int, default None
If not None, then rows will be written in batches of this
size at a time. If None, all rows will be written at once.
dtype : dict of column name to SQL type, default None
dtype : single SQL type or dict of column name to SQL type, default None
Optional specifying the datatype for columns. The SQL type should
be a string.
be a string. If all columns are of the same type, one single
value can be used.

"""
if dtype and not is_dictlike(dtype):
dtype = { col_name : dtype for col_name in frame }

if dtype is not None:
for col, my_type in dtype.items():
if not isinstance(my_type, str):
Expand Down
Loading