Skip to content

Commit

Permalink
Merge branch 'master' into pickle_io_compression
Browse files Browse the repository at this point in the history
  • Loading branch information
goldenbull authored Dec 28, 2016
2 parents 025a0cd + 7f0eefc commit 81d55a0
Show file tree
Hide file tree
Showing 112 changed files with 2,638 additions and 1,541 deletions.
5 changes: 2 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,10 @@ matrix:
apt:
packages:
- xsel
- python: 3.6-dev
- python: 3.6
env:
- PYTHON_VERSION=3.6
- JOB_NAME: "36_dev"
- JOB_TAG=_DEV
- JOB_NAME: "36"
- NOSE_ARGS="not slow and not network and not disabled"
- PANDAS_TESTING_MODE="deprecate"
addons:
Expand Down
1 change: 1 addition & 0 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ install:
- cmd: conda config --set ssl_verify false

# add the pandas channel *before* defaults to have defaults take priority
- cmd: conda config --add channels conda-forge
- cmd: conda config --add channels pandas
- cmd: conda config --remove channels defaults
- cmd: conda config --add channels defaults
Expand Down
7 changes: 7 additions & 0 deletions asv_bench/benchmarks/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ class Iteration(object):
def setup(self):
self.df = DataFrame(randn(10000, 1000))
self.df2 = DataFrame(np.random.randn(50000, 10))
self.df3 = pd.DataFrame(np.random.randn(1000,5000),
columns=['C'+str(c) for c in range(5000)])

def f(self):
if hasattr(self.df, '_item_cache'):
Expand All @@ -85,6 +87,11 @@ def time_iteritems(self):
def time_iteritems_cached(self):
self.g()

def time_iteritems_indexing(self):
df = self.df3
for col in df:
df[col]

def time_itertuples(self):
for row in self.df2.itertuples():
pass
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/io_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def setup(self, compression, engine):
# The Python 2 C parser can't read bz2 from open files.
raise NotImplementedError
try:
import boto
import s3fs
except ImportError:
# Skip these benchmarks if `boto` is not installed.
raise NotImplementedError
Expand Down
25 changes: 25 additions & 0 deletions asv_bench/benchmarks/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,28 @@ def time_value_counts_pindex(self):
self.i.value_counts()


class period_standard_indexing(object):
goal_time = 0.2

def setup(self):
self.index = PeriodIndex(start='1985', periods=1000, freq='D')
self.series = Series(range(1000), index=self.index)
self.period = self.index[500]

def time_get_loc(self):
self.index.get_loc(self.period)

def time_shape(self):
self.index.shape

def time_shallow_copy(self):
self.index._shallow_copy()

def time_series_loc(self):
self.series.loc[self.period]

def time_align(self):
pd.DataFrame({'a': self.series, 'b': self.series[:500]})

def time_intersection(self):
self.index[:750].intersection(self.index[250:])
16 changes: 0 additions & 16 deletions ci/install-3.6_DEV.sh

This file was deleted.

13 changes: 11 additions & 2 deletions ci/install_travis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ else
conda config --set always_yes true --set changeps1 false || exit 1
conda update -q conda

# add the pandas channel *before* defaults to have defaults take priority
# add the pandas channel to take priority
# to add extra packages
echo "add channels"
conda config --add channels pandas || exit 1
conda config --remove channels defaults || exit 1
Expand All @@ -90,7 +91,15 @@ if [ -e ${INSTALL} ]; then
else

# create new env
time conda create -n pandas python=$PYTHON_VERSION nose coverage flake8 || exit 1
time conda create -n pandas python=$PYTHON_VERSION nose || exit 1

if [ "$COVERAGE" ]; then
pip install coverage
fi
if [ "$LINT" ]; then
conda install flake8
pip install cpplint
fi
fi

# build deps
Expand Down
5 changes: 2 additions & 3 deletions ci/lint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ source activate pandas
RET=0

if [ "$LINT" ]; then

# pandas/rpy is deprecated and will be removed.
# pandas/src is C code, so no need to search there.
echo "Linting *.py"
Expand Down Expand Up @@ -43,13 +44,11 @@ if [ "$LINT" ]; then
# from Cython files nor do we want to lint C files that we didn't modify for
# this particular codebase (e.g. src/headers, src/klib, src/msgpack). However,
# we can lint all header files since they aren't "generated" like C files are.
pip install cpplint

echo "Linting *.c and *.h"
for path in '*.h' 'period_helper.c' 'datetime' 'parser' 'ujson'
do
echo "linting -> pandas/src/$path"
cpplint --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/src/$path
cpplint --quiet --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/src/$path
if [ $? -ne "0" ]; then
RET=1
fi
Expand Down
4 changes: 2 additions & 2 deletions ci/requirements-2.7-64.run
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@ pytz
numpy=1.10*
xlwt
numexpr
pytables
pytables==3.2.2
matplotlib
openpyxl
xlrd
sqlalchemy
lxml=3.2.1
scipy
xlsxwriter
boto
s3fs
bottleneck
html5lib
beautiful-soup
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements-2.7.run
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ sqlalchemy=0.9.6
lxml=3.2.1
scipy
xlsxwriter=0.4.6
boto=2.36.0
s3fs
bottleneck
psycopg2=2.5.2
patsy
Expand Down
7 changes: 7 additions & 0 deletions ci/requirements-2.7.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

source activate pandas

echo "install 27"

conda install -n pandas -c conda-forge feather-format
2 changes: 1 addition & 1 deletion ci/requirements-2.7_SLOW.run
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ numexpr
pytables
sqlalchemy
lxml
boto
s3fs
bottleneck
psycopg2
pymysql
Expand Down
3 changes: 2 additions & 1 deletion ci/requirements-3.5-64.run
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
python-dateutil
pytz
numpy=1.10*
numpy
openpyxl
xlsxwriter
xlrd
xlwt
scipy
feather-format
numexpr
pytables
matplotlib
Expand Down
6 changes: 2 additions & 4 deletions ci/requirements-3.5.run
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,5 @@ sqlalchemy
pymysql
psycopg2
xarray
boto

# incompat with conda ATM
# beautiful-soup
s3fs
beautifulsoup4
7 changes: 7 additions & 0 deletions ci/requirements-3.5.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

source activate pandas

echo "install 35"

conda install -n pandas -c conda-forge feather-format
6 changes: 2 additions & 4 deletions ci/requirements-3.5_OSX.run
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,5 @@ matplotlib
jinja2
bottleneck
xarray
boto

# incompat with conda ATM
# beautiful-soup
s3fs
beautifulsoup4
7 changes: 7 additions & 0 deletions ci/requirements-3.5_OSX.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

source activate pandas

echo "install 35_OSX"

conda install -n pandas -c conda-forge feather-format
4 changes: 4 additions & 0 deletions ci/requirements-3.6.build
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
python-dateutil
pytz
numpy
cython
4 changes: 4 additions & 0 deletions ci/requirements-3.6.run
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
python-dateutil
pytz
numpy
scipy
Binary file added doc/cheatsheet/Pandas_Cheat_Sheet.pdf
Binary file not shown.
Binary file added doc/cheatsheet/Pandas_Cheat_Sheet.pptx
Binary file not shown.
4 changes: 4 additions & 0 deletions doc/cheatsheet/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
The Pandas Cheat Sheet was created using Microsoft Powerpoint 2013.
To create the PDF version, within Powerpoint, simply do a "Save As"
and pick "PDF' as the format.

9 changes: 9 additions & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,14 @@ HDFStore: PyTables (HDF5)
HDFStore.get
HDFStore.select

Feather
~~~~~~~

.. autosummary::
:toctree: generated/

read_feather

SAS
~~~

Expand Down Expand Up @@ -1015,6 +1023,7 @@ Serialization / IO / Conversion
DataFrame.to_excel
DataFrame.to_json
DataFrame.to_html
DataFrame.to_feather
DataFrame.to_latex
DataFrame.to_stata
DataFrame.to_msgpack
Expand Down
12 changes: 12 additions & 0 deletions doc/source/basics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1757,6 +1757,7 @@ then the more *general* one will be used as the result of the operation.
# conversion of dtypes
df3.astype('float32').dtypes
Convert a subset of columns to a specified type using :meth:`~DataFrame.astype`

.. ipython:: python
Expand All @@ -1766,6 +1767,17 @@ Convert a subset of columns to a specified type using :meth:`~DataFrame.astype`
dft
dft.dtypes
.. versionadded:: 0.19.0

Convert certain columns to a specific dtype by passing a dict to :meth:`~DataFrame.astype`

.. ipython:: python
dft1 = pd.DataFrame({'a': [1,0,1], 'b': [4,5,6], 'c': [7, 8, 9]})
dft1 = dft1.astype({'a': np.bool, 'c': np.float64})
dft1
dft1.dtypes
.. note::

When trying to convert a subset of columns to a specified type using :meth:`~DataFrame.astype` and :meth:`~DataFrame.loc`, upcasting occurs.
Expand Down
3 changes: 2 additions & 1 deletion doc/source/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ Optional Dependencies
* `SciPy <http://www.scipy.org>`__: miscellaneous statistical functions
* `xarray <http://xarray.pydata.org>`__: pandas like handling for > 2 dims, needed for converting Panels to xarray objects. Version 0.7.0 or higher is recommended.
* `PyTables <http://www.pytables.org>`__: necessary for HDF5-based storage. Version 3.0.0 or higher required, Version 3.2.1 or higher highly recommended.
* `Feather Format <https://github.com/wesm/feather>`__: necessary for feather-based storage, version 0.3.1 or higher.
* `SQLAlchemy <http://www.sqlalchemy.org>`__: for SQL database support. Version 0.8.1 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs <http://docs.sqlalchemy.org/en/latest/dialects/index.html>`__. Some common drivers are:

- `psycopg2 <http://initd.org/psycopg/>`__: for PostgreSQL
Expand All @@ -262,7 +263,7 @@ Optional Dependencies
* `XlsxWriter <https://pypi.python.org/pypi/XlsxWriter>`__: Alternative Excel writer

* `Jinja2 <http://jinja.pocoo.org/>`__: Template engine for conditional HTML formatting.
* `boto <https://pypi.python.org/pypi/boto>`__: necessary for Amazon S3 access.
* `s3fs <http://s3fs.readthedocs.io/>`__: necessary for Amazon S3 access (s3fs >= 0.0.7).
* `blosc <https://pypi.python.org/pypi/blosc>`__: for msgpack compression using ``blosc``
* One of `PyQt4
<http://www.riverbankcomputing.com/software/pyqt/download>`__, `PySide
Expand Down
Loading

0 comments on commit 81d55a0

Please sign in to comment.