Skip to content

Commit 19a5487

Browse files
committed
Pandas 1D & 2D
1 parent 358f2a4 commit 19a5487

27 files changed

+805
-1
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ install:
3737
- ./.travis_no_output sudo apt-get install libgeos-dev libproj-dev
3838
- ./.travis_no_output sudo apt-get install libudunits2-dev libhdf5-serial-dev netcdf-bin libnetcdf-dev
3939
- ./.travis_no_output sudo apt-get install make unzip python-sphinx graphviz
40-
- ./.travis_no_output sudo /usr/bin/pip install pyke netCDF4
40+
- ./.travis_no_output sudo /usr/bin/pip install pyke netCDF4 pandas
4141
- ./.travis_no_output sudo apt-get install openjdk-7-jre
4242
- ./.travis_no_output sudo apt-get install python-gdal
4343
- export LD_LIBRARY_PATH=/usr/lib/jvm/java-7-openjdk-amd64/jre/lib/amd64/server:$LD_LIBRARY_PATH

CHANGES

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ Features added
1111
to a directory containing the test data required by the unit tests. It can
1212
be set by adding a ``test_data_dir`` entry to the ``Resources`` section of
1313
site.cfg. See `iris.config` for more details.
14+
* One and two dimensional cubes can be converted to and from Pandas objects.
15+
See `iris.pandas` for more details.
1416

1517
Bugs fixed
1618
----------

docs/iris/src/whatsnew/1.4.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ A summary of the main features added with version 1.4:
1818
to a directory containing the test data required by the unit tests. It can
1919
be set by adding a ``test_data_dir`` entry to the ``Resources`` section of
2020
``site.cfg``. See :mod:`iris.config` for more details.
21+
* One and two dimensional cubes can be converted to and from Pandas objects.
22+
See :mod:`iris.pandas` for more details.
2123

2224
Bugs fixed
2325
----------

lib/iris/pandas.py

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
# (C) British Crown Copyright 2013, Met Office
2+
#
3+
# This file is part of Iris.
4+
#
5+
# Iris is free software: you can redistribute it and/or modify it under
6+
# the terms of the GNU Lesser General Public License as published by the
7+
# Free Software Foundation, either version 3 of the License, or
8+
# (at your option) any later version.
9+
#
10+
# Iris is distributed in the hope that it will be useful,
11+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
# GNU Lesser General Public License for more details.
14+
#
15+
# You should have received a copy of the GNU Lesser General Public License
16+
# along with Iris. If not, see <http://www.gnu.org/licenses/>.
17+
"""
18+
Provide conversion to and from Pandas data structures.
19+
20+
See also: http://pandas.pydata.org/
21+
22+
"""
23+
from __future__ import absolute_import
24+
25+
import datetime
26+
27+
import netcdftime
28+
import numpy as np
29+
import pandas
30+
31+
import iris
32+
from iris.coords import AuxCoord, DimCoord
33+
from iris.cube import Cube
34+
from iris.unit import Unit
35+
36+
37+
def _add_iris_coord(cube, name, points, dim, calendar=None):
38+
"""
39+
Add a Coord to a Cube from a Pandas index or columns array.
40+
41+
If no calendar is specified for a time series, Gregorian is assumed.
42+
43+
"""
44+
units = Unit("unknown")
45+
if calendar is None:
46+
calendar = iris.unit.CALENDAR_GREGORIAN
47+
48+
# Convert pandas datetime objects to python datetime obejcts.
49+
if isinstance(points, pandas.tseries.index.DatetimeIndex):
50+
points = np.array([i.to_datetime() for i in points])
51+
52+
# Convert datetime objects to Iris' current datetime representation.
53+
if points.dtype == object:
54+
dt_types = (datetime.datetime, netcdftime.datetime)
55+
if all([isinstance(i, dt_types) for i in points]):
56+
units = Unit("hours since epoch", calendar=calendar)
57+
points = units.date2num(points)
58+
59+
points = np.array(points)
60+
if (np.issubdtype(points.dtype, np.number) and
61+
iris.util.monotonic(points, strict=True)):
62+
coord = DimCoord(points, units=units)
63+
coord.rename(name)
64+
cube.add_dim_coord(coord, dim)
65+
else:
66+
coord = AuxCoord(points, units=units)
67+
coord.rename(name)
68+
cube.add_aux_coord(coord, dim)
69+
70+
71+
def as_cube(pandas_array, copy=True, calendars=None):
72+
"""
73+
Convert a Pandas array into an Iris cube.
74+
75+
Args:
76+
77+
* pandas_array - A Pandas Series or DataFrame.
78+
79+
Kwargs:
80+
81+
* copy - Whether to make a copy of the data.
82+
Defaults to True.
83+
84+
* calendars - A dict mapping a dimension to a calendar.
85+
Required to convert datetime indices/columns.
86+
87+
Example usage::
88+
89+
as_cube(series, calendars={0: iris.unit.CALENDAR_360_DAY})
90+
as_cube(data_frame, calendars={1: iris.unit.CALENDAR_GREGORIAN})
91+
92+
.. note:: This function will copy your data by default.
93+
94+
"""
95+
calendars = calendars or {}
96+
if pandas_array.ndim not in [1, 2]:
97+
raise ValueError("Only 1D or 2D Pandas arrays "
98+
"can currently be conveted to Iris cubes.")
99+
100+
cube = Cube(np.ma.masked_invalid(pandas_array, copy=copy))
101+
_add_iris_coord(cube, "index", pandas_array.index, 0,
102+
calendars.get(0, None))
103+
if pandas_array.ndim == 2:
104+
_add_iris_coord(cube, "columns", pandas_array.columns, 1,
105+
calendars.get(1, None))
106+
return cube
107+
108+
109+
def _as_pandas_coord(coord):
110+
"""Convert an Iris Coord into a Pandas index or columns array."""
111+
index = coord.points
112+
if coord.units.is_time_reference():
113+
index = coord.units.num2date(index)
114+
return index
115+
116+
117+
def _assert_shared(np_obj, pandas_obj):
118+
"""Ensure the pandas object shares memory."""
119+
if isinstance(pandas_obj, pandas.Series):
120+
if not pandas_obj.base is np_obj:
121+
raise AssertionError("Pandas Series does not share memory")
122+
elif isinstance(pandas_obj, pandas.DataFrame):
123+
if not pandas_obj[0].base.base.base is np_obj:
124+
raise AssertionError("Pandas DataFrame does not share memory")
125+
else:
126+
raise ValueError("Expected a Pandas Series or DataFrame")
127+
128+
129+
def as_series(cube, copy=True):
130+
"""
131+
Convert a 1D cube to a Pandas Series.
132+
133+
Args:
134+
135+
* cube - The cube to convert to a Pandas Series.
136+
137+
Kwargs:
138+
139+
* copy - Whether to make a copy of the data.
140+
Defaults to True. Must be True for masked data.
141+
142+
.. note::
143+
144+
This function will copy your data by default.
145+
If you have a large array that cannot be copied,
146+
make sure it is not masked and use copy=False.
147+
148+
"""
149+
data = cube.data
150+
if isinstance(data, np.ma.MaskedArray):
151+
if not copy:
152+
raise ValueError("Masked arrays must always be copied.")
153+
data = data.astype('f').filled(np.nan)
154+
elif copy:
155+
data = data.copy()
156+
157+
index = None
158+
if cube.dim_coords:
159+
index = _as_pandas_coord(cube.dim_coords[0])
160+
161+
series = pandas.Series(data, index)
162+
if not copy:
163+
_assert_shared(data, series)
164+
165+
return series
166+
167+
168+
def as_data_frame(cube, copy=True):
169+
"""
170+
Convert a 2D cube to a Pandas DataFrame.
171+
172+
Args:
173+
174+
* cube - The cube to convert to a Pandas DataFrame.
175+
176+
Kwargs:
177+
178+
* copy - Whether to make a copy of the data.
179+
Defaults to True. Must be True for masked data
180+
and some data types (see notes below).
181+
182+
.. note::
183+
184+
This function will copy your data by default.
185+
If you have a large array that cannot be copied,
186+
make sure it is not masked and use copy=False.
187+
188+
.. note::
189+
190+
Pandas will sometimes make a copy of the array,
191+
for example when creating from an int32 array.
192+
Iris will detect this and raise an exception if copy=False.
193+
194+
"""
195+
data = cube.data
196+
if isinstance(data, np.ma.MaskedArray):
197+
if not copy:
198+
raise ValueError("Masked arrays must always be copied.")
199+
data = data.astype('f').filled(np.nan)
200+
elif copy:
201+
data = data.copy()
202+
203+
index = columns = None
204+
if cube.coords(dimensions=[0]):
205+
index = _as_pandas_coord(cube.coord(dimensions=[0]))
206+
if cube.coords(dimensions=[1]):
207+
columns = _as_pandas_coord(cube.coord(dimensions=[1]))
208+
209+
data_frame = pandas.DataFrame(data, index, columns)
210+
if not copy:
211+
_assert_shared(data, data_frame)
212+
213+
return data_frame
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<?xml version="1.0" ?>
2+
<cubes xmlns="urn:x-iris:cubeml-0.2">
3+
<cube units="unknown">
4+
<coords>
5+
<coord datadims="[1]">
6+
<dimCoord id="20efe55f" long_name="columns" points="[10, 11, 12, 13, 14]" shape="(5,)" units="Unit('unknown')" value_type="int64"/>
7+
</coord>
8+
<coord datadims="[0]">
9+
<dimCoord id="da06dd05" long_name="index" points="[271753.016944, 281282.033889]" shape="(2,)" units="Unit('hours since 1970-01-01 00:00:00', calendar='gregorian')" value_type="float64"/>
10+
</coord>
11+
</coords>
12+
<cellMethods/>
13+
<data byteorder="little" checksum="0x5e85e4a4" dtype="int64" mask_checksum="-0x1c75978a" mask_order="C" order="C" shape="(2, 5)"/>
14+
</cube>
15+
</cubes>
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<?xml version="1.0" ?>
2+
<cubes xmlns="urn:x-iris:cubeml-0.2">
3+
<cube units="unknown">
4+
<coords>
5+
<coord datadims="[1]">
6+
<dimCoord id="20efe55f" long_name="columns" points="[12, 13, 14, 15, 16]" shape="(5,)" units="Unit('unknown')" value_type="int64"/>
7+
</coord>
8+
<coord datadims="[0]">
9+
<dimCoord id="931e7cde" long_name="index" points="[10, 11]" shape="(2,)" units="Unit('unknown')" value_type="int64"/>
10+
</coord>
11+
</coords>
12+
<cellMethods/>
13+
<data byteorder="little" checksum="-0x731c5757" dtype="float64" mask_checksum="-0x1215b28c" mask_order="C" order="C" shape="(2, 5)"/>
14+
</cube>
15+
</cubes>
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<?xml version="1.0" ?>
2+
<cubes xmlns="urn:x-iris:cubeml-0.2">
3+
<cube units="unknown">
4+
<coords>
5+
<coord datadims="[1]">
6+
<dimCoord id="20efe55f" long_name="columns" points="[10, 11, 12, 13, 14]" shape="(5,)" units="Unit('unknown')" value_type="int64"/>
7+
</coord>
8+
<coord datadims="[0]">
9+
<dimCoord id="e2253575" long_name="index" points="[267841.016944, 277226.033889]" shape="(2,)" units="Unit('hours since 1970-01-01 00:00:00', calendar='360_day')" value_type="float64"/>
10+
</coord>
11+
</coords>
12+
<cellMethods/>
13+
<data byteorder="little" checksum="0x5e85e4a4" dtype="int64" mask_checksum="-0x1c75978a" mask_order="C" order="C" shape="(2, 5)"/>
14+
</cube>
15+
</cubes>
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<?xml version="1.0" ?>
2+
<cubes xmlns="urn:x-iris:cubeml-0.2">
3+
<cube units="unknown">
4+
<coords>
5+
<coord datadims="[1]">
6+
<auxCoord id="20efe55f" long_name="columns" points="[12, 12, 14, 15, 16]" shape="(5,)" units="Unit('unknown')" value_type="int64"/>
7+
</coord>
8+
<coord datadims="[0]">
9+
<auxCoord id="931e7cde" long_name="index" points="[10, 10]" shape="(2,)" units="Unit('unknown')" value_type="int64"/>
10+
</coord>
11+
</coords>
12+
<cellMethods/>
13+
<data byteorder="little" checksum="0x5e85e4a4" dtype="int64" mask_checksum="-0x1c75978a" mask_order="C" order="C" shape="(2, 5)"/>
14+
</cube>
15+
</cubes>
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<?xml version="1.0" ?>
2+
<cubes xmlns="urn:x-iris:cubeml-0.2">
3+
<cube units="unknown">
4+
<coords>
5+
<coord datadims="[1]">
6+
<dimCoord id="20efe55f" long_name="columns" points="[12, 13, 14, 15, 16]" shape="(5,)" units="Unit('unknown')" value_type="int64"/>
7+
</coord>
8+
<coord datadims="[0]">
9+
<dimCoord id="931e7cde" long_name="index" points="[10, 11]" shape="(2,)" units="Unit('unknown')" value_type="int64"/>
10+
</coord>
11+
</coords>
12+
<cellMethods/>
13+
<data byteorder="little" checksum="0x5e85e4a4" dtype="int64" mask_checksum="-0x1c75978a" mask_order="C" order="C" shape="(2, 5)"/>
14+
</cube>
15+
</cubes>
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<?xml version="1.0" ?>
2+
<cubes xmlns="urn:x-iris:cubeml-0.2">
3+
<cube units="unknown">
4+
<coords>
5+
<coord datadims="[0]">
6+
<dimCoord id="da06dd05" long_name="index" points="[271753.016944, 281282.033889, 290739.050833,
7+
300292.067778, 309797.084722]" shape="(5,)" units="Unit('hours since 1970-01-01 00:00:00', calendar='gregorian')" value_type="float64"/>
8+
</coord>
9+
</coords>
10+
<cellMethods/>
11+
<data byteorder="little" checksum="0x6c8f4e1c" dtype="int64" mask_checksum="-0x39dd08e3" mask_order="C" order="C" shape="(5,)"/>
12+
</cube>
13+
</cubes>

0 commit comments

Comments
 (0)