Merge remote-tracking branch 'refs/remotes/pydata/master' into Fix-for-…

…pandas-dev#11317 Conflicts: doc/source/whatsnew/v0.17.1.txt Bringing it up to date with the current master
Dr-Irv · Oct 16, 2015 · 7c0e6f7 · 7c0e6f7
2 parents c202599 + 89b4e5b
commit 7c0e6f7
Show file tree

Hide file tree

Showing 16 changed files with 344 additions and 191 deletions.
diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
@@ -930,6 +930,16 @@ def time_frame_xs_row(self):
         self.df.xs(50000)
 
 
+class frame_sort_index(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.df = DataFrame(randn(1000000, 2), columns=list('AB'))
+
+    def time_frame_sort_index(self):
+        self.df.sort_index()
+
+
 class series_string_vector_slice(object):
     goal_time = 0.2
 

diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt
@@ -43,11 +43,17 @@ API changes
 Deprecations
 ^^^^^^^^^^^^
 
+- The ``pandas.io.ga`` module which implements ``google-analytics`` support is deprecated and will be removed in a future version (:issue:`11308`)
+- Deprecate the ``engine`` keyword from ``.to_csv()``, which will be removed in a future version (:issue:`11274`)
+
+
 .. _whatsnew_0171.performance:
 
 Performance Improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
+- Checking monotonic-ness before sorting on an index (:issue:`11080`)
+
 .. _whatsnew_0171.bug_fixes:
 
 Bug Fixes
@@ -61,10 +67,10 @@ Bug Fixes
 - Bug in tz-conversions with an ambiguous time and ``.dt`` accessors (:issues:`11295`)
 
 
+- Bug in list-like indexing with a mixed-integer Index (:issue:`11320`)
 
 
-
-
+- Bug in ``DataFrame.plot`` cannot use hex strings colors (:issue:`10299`)
 
 
 
@@ -92,3 +98,7 @@ Bug Fixes
   ``datetime64[ns, tz]`` (:issue:`11245`).
 
 - Bug in ``read_excel`` with multi-index containing integers (:issue:`11317`, :issue:`11328`)
+
+- Bug in ``DataFrame.to_dict()`` produces a ``np.datetime64`` object instead of ``Timestamp`` when only datetime is present in data (:issue:`11327`)
+
+
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -5,8 +5,6 @@
 import re
 import collections
 import numbers
-import codecs
-import csv
 import types
 from datetime import datetime, timedelta
 from functools import partial
@@ -19,7 +17,7 @@
 import pandas.lib as lib
 import pandas.tslib as tslib
 from pandas import compat
-from pandas.compat import StringIO, BytesIO, range, long, u, zip, map, string_types, iteritems
+from pandas.compat import BytesIO, range, long, u, zip, map, string_types, iteritems
 from pandas.core.dtypes import CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, DatetimeTZDtypeType
 from pandas.core.config import get_option
 
@@ -2808,154 +2806,6 @@ def _all_none(*args):
     return True
 
 
-class UTF8Recoder:
-
-    """
-    Iterator that reads an encoded stream and reencodes the input to UTF-8
-    """
-
-    def __init__(self, f, encoding):
-        self.reader = codecs.getreader(encoding)(f)
-
-    def __iter__(self):
-        return self
-
-    def read(self, bytes=-1):
-        return self.reader.read(bytes).encode('utf-8')
-
-    def readline(self):
-        return self.reader.readline().encode('utf-8')
-
-    def next(self):
-        return next(self.reader).encode("utf-8")
-
-    # Python 3 iterator
-    __next__ = next
-
-
-def _get_handle(path, mode, encoding=None, compression=None):
-    """Gets file handle for given path and mode.
-    NOTE: Under Python 3.2, getting a compressed file handle means reading in
-    the entire file, decompressing it and decoding it to ``str`` all at once
-    and then wrapping it in a StringIO.
-    """
-    if compression is not None:
-        if encoding is not None and not compat.PY3:
-            msg = 'encoding + compression not yet supported in Python 2'
-            raise ValueError(msg)
-
-        if compression == 'gzip':
-            import gzip
-            f = gzip.GzipFile(path, mode)
-        elif compression == 'bz2':
-            import bz2
-            f = bz2.BZ2File(path, mode)
-        else:
-            raise ValueError('Unrecognized compression type: %s' %
-                             compression)
-        if compat.PY3:
-            from io import TextIOWrapper
-            f = TextIOWrapper(f, encoding=encoding)
-        return f
-    else:
-        if compat.PY3:
-            if encoding:
-                f = open(path, mode, encoding=encoding)
-            else:
-                f = open(path, mode, errors='replace')
-        else:
-            f = open(path, mode)
-
-    return f
-
-
-if compat.PY3:  # pragma: no cover
-    def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds):
-        # ignore encoding
-        return csv.reader(f, dialect=dialect, **kwds)
-
-    def UnicodeWriter(f, dialect=csv.excel, encoding="utf-8", **kwds):
-        return csv.writer(f, dialect=dialect, **kwds)
-else:
-    class UnicodeReader:
-
-        """
-        A CSV reader which will iterate over lines in the CSV file "f",
-        which is encoded in the given encoding.
-
-        On Python 3, this is replaced (below) by csv.reader, which handles
-        unicode.
-        """
-
-        def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
-            f = UTF8Recoder(f, encoding)
-            self.reader = csv.reader(f, dialect=dialect, **kwds)
-
-        def next(self):
-            row = next(self.reader)
-            return [compat.text_type(s, "utf-8") for s in row]
-
-        # python 3 iterator
-        __next__ = next
-
-        def __iter__(self):  # pragma: no cover
-            return self
-
-    class UnicodeWriter:
-
-        """
-        A CSV writer which will write rows to CSV file "f",
-        which is encoded in the given encoding.
-        """
-
-        def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
-            # Redirect output to a queue
-            self.queue = StringIO()
-            self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
-            self.stream = f
-            self.encoder = codecs.getincrementalencoder(encoding)()
-            self.quoting = kwds.get("quoting", None)
-
-        def writerow(self, row):
-            def _check_as_is(x):
-                return (self.quoting == csv.QUOTE_NONNUMERIC and
-                        is_number(x)) or isinstance(x, str)
-
-            row = [x if _check_as_is(x)
-                   else pprint_thing(x).encode('utf-8') for x in row]
-
-            self.writer.writerow([s for s in row])
-            # Fetch UTF-8 output from the queue ...
-            data = self.queue.getvalue()
-            data = data.decode("utf-8")
-            # ... and reencode it into the target encoding
-            data = self.encoder.encode(data)
-            # write to the target stream
-            self.stream.write(data)
-            # empty queue
-            self.queue.truncate(0)
-
-        def writerows(self, rows):
-            def _check_as_is(x):
-                return (self.quoting == csv.QUOTE_NONNUMERIC and
-                        is_number(x)) or isinstance(x, str)
-
-            for i, row in enumerate(rows):
-                rows[i] = [x if _check_as_is(x)
-                           else pprint_thing(x).encode('utf-8') for x in row]
-
-            self.writer.writerows([[s for s in row] for row in rows])
-            # Fetch UTF-8 output from the queue ...
-            data = self.queue.getvalue()
-            data = data.decode("utf-8")
-            # ... and reencode it into the target encoding
-            data = self.encoder.encode(data)
-            # write to the target stream
-            self.stream.write(data)
-            # empty queue
-            self.queue.truncate(0)
-
-
 def get_dtype_kinds(l):
     """
     Parameters

diff --git a/pandas/core/format.py b/pandas/core/format.py
@@ -13,6 +13,7 @@
                           OrderedDict)
 from pandas.util.terminal import get_terminal_size
 from pandas.core.config import get_option, set_option
+from pandas.io.common import _get_handle, UnicodeWriter
 import pandas.core.common as com
 import pandas.lib as lib
 from pandas.tslib import iNaT, Timestamp, Timedelta, format_array_from_datetime
@@ -23,6 +24,7 @@
 
 import itertools
 import csv
+import warnings
 
 common_docstring = """
     Parameters
@@ -1264,7 +1266,11 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None,
                  tupleize_cols=False, quotechar='"', date_format=None,
                  doublequote=True, escapechar=None, decimal='.'):
 
-        self.engine = engine  # remove for 0.13
+        if engine is not None:
+            warnings.warn("'engine' keyword is deprecated and "
+                          "will be removed in a future version",
+                          FutureWarning, stacklevel=3)
+        self.engine = engine  # remove for 0.18
         self.obj = obj
 
         if path_or_buf is None:
@@ -1470,8 +1476,8 @@ def save(self):
             f = self.path_or_buf
             close = False
         else:
-            f = com._get_handle(self.path_or_buf, self.mode,
-                                encoding=self.encoding, 
+            f = _get_handle(self.path_or_buf, self.mode,
+                                encoding=self.encoding,
                                 compression=self.compression)
             close = True
 
@@ -1483,7 +1489,7 @@ def save(self):
                                  quotechar=self.quotechar)
             if self.encoding is not None:
                 writer_kwargs['encoding'] = self.encoding
-                self.writer = com.UnicodeWriter(f, **writer_kwargs)
+                self.writer = UnicodeWriter(f, **writer_kwargs)
             else:
                 self.writer = csv.writer(f, **writer_kwargs)
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -802,11 +802,12 @@ def to_dict(self, orient='dict'):
         elif orient.lower().startswith('sp'):
             return {'index': self.index.tolist(),
                     'columns': self.columns.tolist(),
-                    'data': self.values.tolist()}
+                    'data': lib.map_infer(self.values.ravel(), _maybe_box_datetimelike)
+                    .reshape(self.values.shape).tolist()}
         elif orient.lower().startswith('s'):
-            return dict((k, v) for k, v in compat.iteritems(self))
+            return dict((k, _maybe_box_datetimelike(v)) for k, v in compat.iteritems(self))
         elif orient.lower().startswith('r'):
-            return [dict((k, v) for k, v in zip(self.columns, row))
+            return [dict((k, _maybe_box_datetimelike(v)) for k, v in zip(self.columns, row))
                     for row in self.values]
         elif orient.lower().startswith('i'):
             return dict((k, v.to_dict()) for k, v in self.iterrows())
@@ -3157,6 +3158,15 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
         else:
             from pandas.core.groupby import _nargsort
 
+            # GH11080 - Check monotonic-ness before sort an index
+            # if monotonic (already sorted), return None or copy() according to 'inplace'
+            if (ascending and labels.is_monotonic_increasing) or \
+               (not ascending and labels.is_monotonic_decreasing):
+                if inplace:
+                    return
+                else:
+                    return self.copy()
+
             indexer = _nargsort(labels, kind=kind, ascending=ascending,
                                 na_position=na_position)
 

diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -982,10 +982,6 @@ def _convert_list_indexer(self, keyarr, kind=None):
         if kind in [None, 'iloc', 'ix'] and is_integer_dtype(keyarr) \
            and not self.is_floating() and not isinstance(keyarr, ABCPeriodIndex):
 
-            if self.inferred_type != 'integer':
-                keyarr = np.where(keyarr < 0,
-                                  len(self) + keyarr, keyarr)
-
             if self.inferred_type == 'mixed-integer':
                 indexer = self.get_indexer(keyarr)
                 if (indexer >= 0).all():
@@ -998,6 +994,8 @@ def _convert_list_indexer(self, keyarr, kind=None):
                 return maybe_convert_indices(indexer, len(self))
 
             elif not self.inferred_type == 'integer':
+                keyarr = np.where(keyarr < 0,
+                                  len(self) + keyarr, keyarr)
                 return keyarr
 
         return None