Skip to content

Commit

Permalink
Merge remote-tracking branch 'refs/remotes/pydata/master' into Fix-for-
Browse files Browse the repository at this point in the history
…pandas-dev#11317

Conflicts:
	doc/source/whatsnew/v0.17.1.txt

Bringing it up to date with the current master
  • Loading branch information
Dr-Irv committed Oct 16, 2015
2 parents c202599 + 89b4e5b commit 7c0e6f7
Show file tree
Hide file tree
Showing 16 changed files with 344 additions and 191 deletions.
10 changes: 10 additions & 0 deletions asv_bench/benchmarks/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -930,6 +930,16 @@ def time_frame_xs_row(self):
self.df.xs(50000)


class frame_sort_index(object):
goal_time = 0.2

def setup(self):
self.df = DataFrame(randn(1000000, 2), columns=list('AB'))

def time_frame_sort_index(self):
self.df.sort_index()


class series_string_vector_slice(object):
goal_time = 0.2

Expand Down
14 changes: 12 additions & 2 deletions doc/source/whatsnew/v0.17.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,17 @@ API changes
Deprecations
^^^^^^^^^^^^

- The ``pandas.io.ga`` module which implements ``google-analytics`` support is deprecated and will be removed in a future version (:issue:`11308`)
- Deprecate the ``engine`` keyword from ``.to_csv()``, which will be removed in a future version (:issue:`11274`)


.. _whatsnew_0171.performance:

Performance Improvements
~~~~~~~~~~~~~~~~~~~~~~~~

- Checking monotonic-ness before sorting on an index (:issue:`11080`)

.. _whatsnew_0171.bug_fixes:

Bug Fixes
Expand All @@ -61,10 +67,10 @@ Bug Fixes
- Bug in tz-conversions with an ambiguous time and ``.dt`` accessors (:issues:`11295`)


- Bug in list-like indexing with a mixed-integer Index (:issue:`11320`)




- Bug in ``DataFrame.plot`` cannot use hex strings colors (:issue:`10299`)



Expand Down Expand Up @@ -92,3 +98,7 @@ Bug Fixes
``datetime64[ns, tz]`` (:issue:`11245`).

- Bug in ``read_excel`` with multi-index containing integers (:issue:`11317`, :issue:`11328`)

- Bug in ``DataFrame.to_dict()`` produces a ``np.datetime64`` object instead of ``Timestamp`` when only datetime is present in data (:issue:`11327`)


152 changes: 1 addition & 151 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
import re
import collections
import numbers
import codecs
import csv
import types
from datetime import datetime, timedelta
from functools import partial
Expand All @@ -19,7 +17,7 @@
import pandas.lib as lib
import pandas.tslib as tslib
from pandas import compat
from pandas.compat import StringIO, BytesIO, range, long, u, zip, map, string_types, iteritems
from pandas.compat import BytesIO, range, long, u, zip, map, string_types, iteritems
from pandas.core.dtypes import CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, DatetimeTZDtypeType
from pandas.core.config import get_option

Expand Down Expand Up @@ -2808,154 +2806,6 @@ def _all_none(*args):
return True


class UTF8Recoder:

"""
Iterator that reads an encoded stream and reencodes the input to UTF-8
"""

def __init__(self, f, encoding):
self.reader = codecs.getreader(encoding)(f)

def __iter__(self):
return self

def read(self, bytes=-1):
return self.reader.read(bytes).encode('utf-8')

def readline(self):
return self.reader.readline().encode('utf-8')

def next(self):
return next(self.reader).encode("utf-8")

# Python 3 iterator
__next__ = next


def _get_handle(path, mode, encoding=None, compression=None):
"""Gets file handle for given path and mode.
NOTE: Under Python 3.2, getting a compressed file handle means reading in
the entire file, decompressing it and decoding it to ``str`` all at once
and then wrapping it in a StringIO.
"""
if compression is not None:
if encoding is not None and not compat.PY3:
msg = 'encoding + compression not yet supported in Python 2'
raise ValueError(msg)

if compression == 'gzip':
import gzip
f = gzip.GzipFile(path, mode)
elif compression == 'bz2':
import bz2
f = bz2.BZ2File(path, mode)
else:
raise ValueError('Unrecognized compression type: %s' %
compression)
if compat.PY3:
from io import TextIOWrapper
f = TextIOWrapper(f, encoding=encoding)
return f
else:
if compat.PY3:
if encoding:
f = open(path, mode, encoding=encoding)
else:
f = open(path, mode, errors='replace')
else:
f = open(path, mode)

return f


if compat.PY3: # pragma: no cover
def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds):
# ignore encoding
return csv.reader(f, dialect=dialect, **kwds)

def UnicodeWriter(f, dialect=csv.excel, encoding="utf-8", **kwds):
return csv.writer(f, dialect=dialect, **kwds)
else:
class UnicodeReader:

"""
A CSV reader which will iterate over lines in the CSV file "f",
which is encoded in the given encoding.
On Python 3, this is replaced (below) by csv.reader, which handles
unicode.
"""

def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
f = UTF8Recoder(f, encoding)
self.reader = csv.reader(f, dialect=dialect, **kwds)

def next(self):
row = next(self.reader)
return [compat.text_type(s, "utf-8") for s in row]

# python 3 iterator
__next__ = next

def __iter__(self): # pragma: no cover
return self

class UnicodeWriter:

"""
A CSV writer which will write rows to CSV file "f",
which is encoded in the given encoding.
"""

def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
self.quoting = kwds.get("quoting", None)

def writerow(self, row):
def _check_as_is(x):
return (self.quoting == csv.QUOTE_NONNUMERIC and
is_number(x)) or isinstance(x, str)

row = [x if _check_as_is(x)
else pprint_thing(x).encode('utf-8') for x in row]

self.writer.writerow([s for s in row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)

def writerows(self, rows):
def _check_as_is(x):
return (self.quoting == csv.QUOTE_NONNUMERIC and
is_number(x)) or isinstance(x, str)

for i, row in enumerate(rows):
rows[i] = [x if _check_as_is(x)
else pprint_thing(x).encode('utf-8') for x in row]

self.writer.writerows([[s for s in row] for row in rows])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)


def get_dtype_kinds(l):
"""
Parameters
Expand Down
14 changes: 10 additions & 4 deletions pandas/core/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
OrderedDict)
from pandas.util.terminal import get_terminal_size
from pandas.core.config import get_option, set_option
from pandas.io.common import _get_handle, UnicodeWriter
import pandas.core.common as com
import pandas.lib as lib
from pandas.tslib import iNaT, Timestamp, Timedelta, format_array_from_datetime
Expand All @@ -23,6 +24,7 @@

import itertools
import csv
import warnings

common_docstring = """
Parameters
Expand Down Expand Up @@ -1264,7 +1266,11 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None,
tupleize_cols=False, quotechar='"', date_format=None,
doublequote=True, escapechar=None, decimal='.'):

self.engine = engine # remove for 0.13
if engine is not None:
warnings.warn("'engine' keyword is deprecated and "
"will be removed in a future version",
FutureWarning, stacklevel=3)
self.engine = engine # remove for 0.18
self.obj = obj

if path_or_buf is None:
Expand Down Expand Up @@ -1470,8 +1476,8 @@ def save(self):
f = self.path_or_buf
close = False
else:
f = com._get_handle(self.path_or_buf, self.mode,
encoding=self.encoding,
f = _get_handle(self.path_or_buf, self.mode,
encoding=self.encoding,
compression=self.compression)
close = True

Expand All @@ -1483,7 +1489,7 @@ def save(self):
quotechar=self.quotechar)
if self.encoding is not None:
writer_kwargs['encoding'] = self.encoding
self.writer = com.UnicodeWriter(f, **writer_kwargs)
self.writer = UnicodeWriter(f, **writer_kwargs)
else:
self.writer = csv.writer(f, **writer_kwargs)

Expand Down
16 changes: 13 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -802,11 +802,12 @@ def to_dict(self, orient='dict'):
elif orient.lower().startswith('sp'):
return {'index': self.index.tolist(),
'columns': self.columns.tolist(),
'data': self.values.tolist()}
'data': lib.map_infer(self.values.ravel(), _maybe_box_datetimelike)
.reshape(self.values.shape).tolist()}
elif orient.lower().startswith('s'):
return dict((k, v) for k, v in compat.iteritems(self))
return dict((k, _maybe_box_datetimelike(v)) for k, v in compat.iteritems(self))
elif orient.lower().startswith('r'):
return [dict((k, v) for k, v in zip(self.columns, row))
return [dict((k, _maybe_box_datetimelike(v)) for k, v in zip(self.columns, row))
for row in self.values]
elif orient.lower().startswith('i'):
return dict((k, v.to_dict()) for k, v in self.iterrows())
Expand Down Expand Up @@ -3157,6 +3158,15 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
else:
from pandas.core.groupby import _nargsort

# GH11080 - Check monotonic-ness before sort an index
# if monotonic (already sorted), return None or copy() according to 'inplace'
if (ascending and labels.is_monotonic_increasing) or \
(not ascending and labels.is_monotonic_decreasing):
if inplace:
return
else:
return self.copy()

indexer = _nargsort(labels, kind=kind, ascending=ascending,
na_position=na_position)

Expand Down
6 changes: 2 additions & 4 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -982,10 +982,6 @@ def _convert_list_indexer(self, keyarr, kind=None):
if kind in [None, 'iloc', 'ix'] and is_integer_dtype(keyarr) \
and not self.is_floating() and not isinstance(keyarr, ABCPeriodIndex):

if self.inferred_type != 'integer':
keyarr = np.where(keyarr < 0,
len(self) + keyarr, keyarr)

if self.inferred_type == 'mixed-integer':
indexer = self.get_indexer(keyarr)
if (indexer >= 0).all():
Expand All @@ -998,6 +994,8 @@ def _convert_list_indexer(self, keyarr, kind=None):
return maybe_convert_indices(indexer, len(self))

elif not self.inferred_type == 'integer':
keyarr = np.where(keyarr < 0,
len(self) + keyarr, keyarr)
return keyarr

return None
Expand Down
Loading

0 comments on commit 7c0e6f7

Please sign in to comment.