Merge branch 'master' into pickle_io_compression

AnkurDedania · Dec 28, 2016 · 81d55a0 · 81d55a0
2 parents 025a0cd + 7f0eefc
commit 81d55a0
Show file tree

Hide file tree

Showing 112 changed files with 2,638 additions and 1,541 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -80,11 +80,10 @@ matrix:
         apt:
           packages:
           - xsel
-    - python: 3.6-dev
+    - python: 3.6
       env:
         - PYTHON_VERSION=3.6
-        - JOB_NAME: "36_dev"
-        - JOB_TAG=_DEV
+        - JOB_NAME: "36"
         - NOSE_ARGS="not slow and not network and not disabled"
         - PANDAS_TESTING_MODE="deprecate"
       addons:

diff --git a/appveyor.yml b/appveyor.yml
@@ -80,6 +80,7 @@ install:
   - cmd: conda config --set ssl_verify false
 
   # add the pandas channel *before* defaults to have defaults take priority
+  - cmd: conda config --add channels conda-forge
   - cmd: conda config --add channels pandas
   - cmd: conda config --remove channels defaults
   - cmd: conda config --add channels defaults

diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
@@ -68,6 +68,8 @@ class Iteration(object):
     def setup(self):
         self.df = DataFrame(randn(10000, 1000))
         self.df2 = DataFrame(np.random.randn(50000, 10))
+        self.df3 = pd.DataFrame(np.random.randn(1000,5000),
+                                columns=['C'+str(c) for c in range(5000)])
 
     def f(self):
         if hasattr(self.df, '_item_cache'):
@@ -85,6 +87,11 @@ def time_iteritems(self):
     def time_iteritems_cached(self):
         self.g()
 
+    def time_iteritems_indexing(self):
+        df = self.df3
+        for col in df:
+            df[col]
+
     def time_itertuples(self):
         for row in self.df2.itertuples():
             pass

diff --git a/asv_bench/benchmarks/io_bench.py b/asv_bench/benchmarks/io_bench.py
@@ -153,7 +153,7 @@ def setup(self, compression, engine):
             # The Python 2 C parser can't read bz2 from open files.
             raise NotImplementedError
         try:
-            import boto
+            import s3fs
         except ImportError:
             # Skip these benchmarks if `boto` is not installed.
             raise NotImplementedError

diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py
@@ -49,3 +49,28 @@ def time_value_counts_pindex(self):
         self.i.value_counts()
 
 
+class period_standard_indexing(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.index = PeriodIndex(start='1985', periods=1000, freq='D')
+        self.series = Series(range(1000), index=self.index)
+        self.period = self.index[500]
+
+    def time_get_loc(self):
+        self.index.get_loc(self.period)
+
+    def time_shape(self):
+        self.index.shape
+
+    def time_shallow_copy(self):
+        self.index._shallow_copy()
+
+    def time_series_loc(self):
+        self.series.loc[self.period]
+
+    def time_align(self):
+        pd.DataFrame({'a': self.series, 'b': self.series[:500]})
+
+    def time_intersection(self):
+        self.index[:750].intersection(self.index[250:])
diff --git a/ci/install-3.6_DEV.sh b/ci/install-3.6_DEV.sh
diff --git a/ci/install_travis.sh b/ci/install_travis.sh
@@ -71,7 +71,8 @@ else
     conda config --set always_yes true --set changeps1 false || exit 1
     conda update -q conda
 
-    # add the pandas channel *before* defaults to have defaults take priority
+    # add the pandas channel to take priority
+    # to add extra packages
     echo "add channels"
     conda config --add channels pandas || exit 1
     conda config --remove channels defaults || exit 1
@@ -90,7 +91,15 @@ if [ -e ${INSTALL} ]; then
 else
 
     # create new env
-    time conda create -n pandas python=$PYTHON_VERSION nose coverage flake8 || exit 1
+    time conda create -n pandas python=$PYTHON_VERSION nose || exit 1
+
+    if [ "$COVERAGE" ]; then
+        pip install coverage
+    fi
+    if [ "$LINT" ]; then
+        conda install flake8
+        pip install cpplint
+    fi
 fi
 
 # build deps

diff --git a/ci/lint.sh b/ci/lint.sh
@@ -7,6 +7,7 @@ source activate pandas
 RET=0
 
 if [ "$LINT" ]; then
+
     # pandas/rpy is deprecated and will be removed.
     # pandas/src is C code, so no need to search there.
     echo "Linting  *.py"
@@ -43,13 +44,11 @@ if [ "$LINT" ]; then
     # from Cython files nor do we want to lint C files that we didn't modify for
     # this particular codebase (e.g. src/headers, src/klib, src/msgpack). However,
     # we can lint all header files since they aren't "generated" like C files are.
-    pip install cpplint
-
     echo "Linting *.c and *.h"
     for path in '*.h' 'period_helper.c' 'datetime' 'parser' 'ujson'
     do
         echo "linting -> pandas/src/$path"
-        cpplint --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/src/$path
+        cpplint --quiet --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/src/$path
         if [ $? -ne "0" ]; then
             RET=1
         fi

diff --git a/ci/requirements-2.7-64.run b/ci/requirements-2.7-64.run
@@ -3,15 +3,15 @@ pytz
 numpy=1.10*
 xlwt
 numexpr
-pytables
+pytables==3.2.2
 matplotlib
 openpyxl
 xlrd
 sqlalchemy
 lxml=3.2.1
 scipy
 xlsxwriter
-boto
+s3fs
 bottleneck
 html5lib
 beautiful-soup

diff --git a/ci/requirements-2.7.run b/ci/requirements-2.7.run
@@ -11,7 +11,7 @@ sqlalchemy=0.9.6
 lxml=3.2.1
 scipy
 xlsxwriter=0.4.6
-boto=2.36.0
+s3fs
 bottleneck
 psycopg2=2.5.2
 patsy

diff --git a/ci/requirements-2.7.sh b/ci/requirements-2.7.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+source activate pandas
+
+echo "install 27"
+
+conda install -n pandas -c conda-forge feather-format
diff --git a/ci/requirements-2.7_SLOW.run b/ci/requirements-2.7_SLOW.run
@@ -13,7 +13,7 @@ numexpr
 pytables
 sqlalchemy
 lxml
-boto
+s3fs
 bottleneck
 psycopg2
 pymysql

diff --git a/ci/requirements-3.5-64.run b/ci/requirements-3.5-64.run
@@ -1,11 +1,12 @@
 python-dateutil
 pytz
-numpy=1.10*
+numpy
 openpyxl
 xlsxwriter
 xlrd
 xlwt
 scipy
+feather-format
 numexpr
 pytables
 matplotlib

diff --git a/ci/requirements-3.5.run b/ci/requirements-3.5.run
@@ -17,7 +17,5 @@ sqlalchemy
 pymysql
 psycopg2
 xarray
-boto
-
-# incompat with conda ATM
-# beautiful-soup
+s3fs
+beautifulsoup4
diff --git a/ci/requirements-3.5.sh b/ci/requirements-3.5.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+source activate pandas
+
+echo "install 35"
+
+conda install -n pandas -c conda-forge feather-format
diff --git a/ci/requirements-3.5_OSX.run b/ci/requirements-3.5_OSX.run
@@ -12,7 +12,5 @@ matplotlib
 jinja2
 bottleneck
 xarray
-boto
-
-# incompat with conda ATM
-# beautiful-soup
+s3fs
+beautifulsoup4
diff --git a/ci/requirements-3.5_OSX.sh b/ci/requirements-3.5_OSX.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+source activate pandas
+
+echo "install 35_OSX"
+
+conda install -n pandas -c conda-forge feather-format
diff --git a/ci/requirements-3.6.build b/ci/requirements-3.6.build
@@ -0,0 +1,4 @@
+python-dateutil
+pytz
+numpy
+cython
diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run
@@ -0,0 +1,4 @@
+python-dateutil
+pytz
+numpy
+scipy
diff --git a/doc/cheatsheet/Pandas_Cheat_Sheet.pdf b/doc/cheatsheet/Pandas_Cheat_Sheet.pdf
diff --git a/doc/cheatsheet/Pandas_Cheat_Sheet.pptx b/doc/cheatsheet/Pandas_Cheat_Sheet.pptx
diff --git a/doc/cheatsheet/README.txt b/doc/cheatsheet/README.txt
@@ -0,0 +1,4 @@
+The Pandas Cheat Sheet was created using Microsoft Powerpoint 2013.
+To create the PDF version, within Powerpoint, simply do a "Save As"
+and pick "PDF' as the format.
+
diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -83,6 +83,14 @@ HDFStore: PyTables (HDF5)
    HDFStore.get
    HDFStore.select
 
+Feather
+~~~~~~~
+
+.. autosummary::
+   :toctree: generated/
+
+   read_feather
+
 SAS
 ~~~
 
@@ -1015,6 +1023,7 @@ Serialization / IO / Conversion
    DataFrame.to_excel
    DataFrame.to_json
    DataFrame.to_html
+   DataFrame.to_feather
    DataFrame.to_latex
    DataFrame.to_stata
    DataFrame.to_msgpack

diff --git a/doc/source/basics.rst b/doc/source/basics.rst
@@ -1757,6 +1757,7 @@ then the more *general* one will be used as the result of the operation.
    # conversion of dtypes
    df3.astype('float32').dtypes
 
+
 Convert a subset of columns to a specified type using :meth:`~DataFrame.astype`
 
 .. ipython:: python
@@ -1766,6 +1767,17 @@ Convert a subset of columns to a specified type using :meth:`~DataFrame.astype`
    dft
    dft.dtypes
 
+.. versionadded:: 0.19.0
+
+Convert certain columns to a specific dtype by passing a dict to :meth:`~DataFrame.astype`
+
+.. ipython:: python
+
+   dft1 = pd.DataFrame({'a': [1,0,1], 'b': [4,5,6], 'c': [7, 8, 9]})
+   dft1 = dft1.astype({'a': np.bool, 'c': np.float64})
+   dft1
+   dft1.dtypes
+
 .. note::
 
     When trying to convert a subset of columns to a specified type using :meth:`~DataFrame.astype`  and :meth:`~DataFrame.loc`, upcasting occurs.

diff --git a/doc/source/install.rst b/doc/source/install.rst
@@ -247,6 +247,7 @@ Optional Dependencies
 * `SciPy <http://www.scipy.org>`__: miscellaneous statistical functions
 * `xarray <http://xarray.pydata.org>`__: pandas like handling for > 2 dims, needed for converting Panels to xarray objects. Version 0.7.0 or higher is recommended.
 * `PyTables <http://www.pytables.org>`__: necessary for HDF5-based storage. Version 3.0.0 or higher required, Version 3.2.1 or higher highly recommended.
+* `Feather Format <https://github.com/wesm/feather>`__: necessary for feather-based storage, version 0.3.1 or higher.
 * `SQLAlchemy <http://www.sqlalchemy.org>`__: for SQL database support. Version 0.8.1 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs <http://docs.sqlalchemy.org/en/latest/dialects/index.html>`__. Some common drivers are:
 
     - `psycopg2 <http://initd.org/psycopg/>`__: for PostgreSQL
@@ -262,7 +263,7 @@ Optional Dependencies
   * `XlsxWriter <https://pypi.python.org/pypi/XlsxWriter>`__: Alternative Excel writer
 
 * `Jinja2 <http://jinja.pocoo.org/>`__: Template engine for conditional HTML formatting.
-* `boto <https://pypi.python.org/pypi/boto>`__: necessary for Amazon S3 access.
+* `s3fs <http://s3fs.readthedocs.io/>`__: necessary for Amazon S3 access (s3fs >= 0.0.7).
 * `blosc <https://pypi.python.org/pypi/blosc>`__: for msgpack compression using ``blosc``
 * One of `PyQt4
   <http://www.riverbankcomputing.com/software/pyqt/download>`__, `PySide