|
| 1 | +# (C) British Crown Copyright 2013, Met Office |
| 2 | +# |
| 3 | +# This file is part of Iris. |
| 4 | +# |
| 5 | +# Iris is free software: you can redistribute it and/or modify it under |
| 6 | +# the terms of the GNU Lesser General Public License as published by the |
| 7 | +# Free Software Foundation, either version 3 of the License, or |
| 8 | +# (at your option) any later version. |
| 9 | +# |
| 10 | +# Iris is distributed in the hope that it will be useful, |
| 11 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | +# GNU Lesser General Public License for more details. |
| 14 | +# |
| 15 | +# You should have received a copy of the GNU Lesser General Public License |
| 16 | +# along with Iris. If not, see <http://www.gnu.org/licenses/>. |
| 17 | +""" |
| 18 | +Provide conversion to and from Pandas data structures. |
| 19 | +
|
| 20 | +See also: http://pandas.pydata.org/ |
| 21 | +
|
| 22 | +""" |
| 23 | +from __future__ import absolute_import |
| 24 | + |
| 25 | +import datetime |
| 26 | + |
| 27 | +import netcdftime |
| 28 | +import numpy as np |
| 29 | +import pandas |
| 30 | + |
| 31 | +import iris |
| 32 | +from iris.coords import AuxCoord, DimCoord |
| 33 | +from iris.cube import Cube |
| 34 | +from iris.unit import Unit |
| 35 | + |
| 36 | + |
| 37 | +def _add_iris_coord(cube, name, points, dim, calendar=None): |
| 38 | + """ |
| 39 | + Add a Coord to a Cube from a Pandas index or columns array. |
| 40 | +
|
| 41 | + If no calendar is specified for a time series, Gregorian is assumed. |
| 42 | +
|
| 43 | + """ |
| 44 | + units = Unit("unknown") |
| 45 | + if calendar is None: |
| 46 | + calendar = iris.unit.CALENDAR_GREGORIAN |
| 47 | + |
| 48 | + # Convert pandas datetime objects to python datetime obejcts. |
| 49 | + if isinstance(points, pandas.tseries.index.DatetimeIndex): |
| 50 | + points = np.array([i.to_datetime() for i in points]) |
| 51 | + |
| 52 | + # Convert datetime objects to Iris' current datetime representation. |
| 53 | + if points.dtype == object: |
| 54 | + dt_types = (datetime.datetime, netcdftime.datetime) |
| 55 | + if all([isinstance(i, dt_types) for i in points]): |
| 56 | + units = Unit("hours since epoch", calendar=calendar) |
| 57 | + points = units.date2num(points) |
| 58 | + |
| 59 | + points = np.array(points) |
| 60 | + if (np.issubdtype(points.dtype, np.number) and |
| 61 | + iris.util.monotonic(points, strict=True)): |
| 62 | + coord = DimCoord(points, units=units) |
| 63 | + coord.rename(name) |
| 64 | + cube.add_dim_coord(coord, dim) |
| 65 | + else: |
| 66 | + coord = AuxCoord(points, units=units) |
| 67 | + coord.rename(name) |
| 68 | + cube.add_aux_coord(coord, dim) |
| 69 | + |
| 70 | + |
| 71 | +def as_cube(pandas_array, copy=True, calendars=None): |
| 72 | + """ |
| 73 | + Convert a Pandas array into an Iris cube. |
| 74 | +
|
| 75 | + Args: |
| 76 | +
|
| 77 | + * pandas_array - A Pandas Series or DataFrame. |
| 78 | +
|
| 79 | + Kwargs: |
| 80 | +
|
| 81 | + * copy - Whether to make a copy of the data. |
| 82 | + Defaults to True. |
| 83 | +
|
| 84 | + * calendars - A dict mapping a dimension to a calendar. |
| 85 | + Required to convert datetime indices/columns. |
| 86 | +
|
| 87 | + Example usage:: |
| 88 | + |
| 89 | + as_cube(series, calendars={0: iris.unit.CALENDAR_360_DAY}) |
| 90 | + as_cube(data_frame, calendars={1: iris.unit.CALENDAR_GREGORIAN}) |
| 91 | +
|
| 92 | + .. note:: This function will copy your data by default. |
| 93 | +
|
| 94 | + """ |
| 95 | + calendars = calendars or {} |
| 96 | + if pandas_array.ndim not in [1, 2]: |
| 97 | + raise ValueError("Only 1D or 2D Pandas arrays " |
| 98 | + "can currently be conveted to Iris cubes.") |
| 99 | + |
| 100 | + cube = Cube(np.ma.masked_invalid(pandas_array, copy=copy)) |
| 101 | + _add_iris_coord(cube, "index", pandas_array.index, 0, |
| 102 | + calendars.get(0, None)) |
| 103 | + if pandas_array.ndim == 2: |
| 104 | + _add_iris_coord(cube, "columns", pandas_array.columns, 1, |
| 105 | + calendars.get(1, None)) |
| 106 | + return cube |
| 107 | + |
| 108 | + |
| 109 | +def _as_pandas_coord(coord): |
| 110 | + """Convert an Iris Coord into a Pandas index or columns array.""" |
| 111 | + index = coord.points |
| 112 | + if coord.units.is_time_reference(): |
| 113 | + index = coord.units.num2date(index) |
| 114 | + return index |
| 115 | + |
| 116 | + |
| 117 | +def _assert_shared(np_obj, pandas_obj): |
| 118 | + """Ensure the pandas object shares memory.""" |
| 119 | + if isinstance(pandas_obj, pandas.Series): |
| 120 | + if not pandas_obj.base is np_obj: |
| 121 | + raise AssertionError("Pandas Series does not share memory") |
| 122 | + elif isinstance(pandas_obj, pandas.DataFrame): |
| 123 | + if not pandas_obj[0].base.base.base is np_obj: |
| 124 | + raise AssertionError("Pandas DataFrame does not share memory") |
| 125 | + else: |
| 126 | + raise ValueError("Expected a Pandas Series or DataFrame") |
| 127 | + |
| 128 | + |
| 129 | +def as_series(cube, copy=True): |
| 130 | + """ |
| 131 | + Convert a 1D cube to a Pandas Series. |
| 132 | +
|
| 133 | + Args: |
| 134 | +
|
| 135 | + * cube - The cube to convert to a Pandas Series. |
| 136 | +
|
| 137 | + Kwargs: |
| 138 | +
|
| 139 | + * copy - Whether to make a copy of the data. |
| 140 | + Defaults to True. Must be True for masked data. |
| 141 | +
|
| 142 | + .. note:: |
| 143 | +
|
| 144 | + This function will copy your data by default. |
| 145 | + If you have a large array that cannot be copied, |
| 146 | + make sure it is not masked and use copy=False. |
| 147 | +
|
| 148 | + """ |
| 149 | + data = cube.data |
| 150 | + if isinstance(data, np.ma.MaskedArray): |
| 151 | + if not copy: |
| 152 | + raise ValueError("Masked arrays must always be copied.") |
| 153 | + data = data.astype('f').filled(np.nan) |
| 154 | + elif copy: |
| 155 | + data = data.copy() |
| 156 | + |
| 157 | + index = None |
| 158 | + if cube.dim_coords: |
| 159 | + index = _as_pandas_coord(cube.dim_coords[0]) |
| 160 | + |
| 161 | + series = pandas.Series(data, index) |
| 162 | + if not copy: |
| 163 | + _assert_shared(data, series) |
| 164 | + |
| 165 | + return series |
| 166 | + |
| 167 | + |
| 168 | +def as_data_frame(cube, copy=True): |
| 169 | + """ |
| 170 | + Convert a 2D cube to a Pandas DataFrame. |
| 171 | +
|
| 172 | + Args: |
| 173 | +
|
| 174 | + * cube - The cube to convert to a Pandas DataFrame. |
| 175 | +
|
| 176 | + Kwargs: |
| 177 | +
|
| 178 | + * copy - Whether to make a copy of the data. |
| 179 | + Defaults to True. Must be True for masked data |
| 180 | + and some data types (see notes below). |
| 181 | +
|
| 182 | + .. note:: |
| 183 | +
|
| 184 | + This function will copy your data by default. |
| 185 | + If you have a large array that cannot be copied, |
| 186 | + make sure it is not masked and use copy=False. |
| 187 | +
|
| 188 | + .. note:: |
| 189 | + |
| 190 | + Pandas will sometimes make a copy of the array, |
| 191 | + for example when creating from an int32 array. |
| 192 | + Iris will detect this and raise an exception if copy=False. |
| 193 | +
|
| 194 | + """ |
| 195 | + data = cube.data |
| 196 | + if isinstance(data, np.ma.MaskedArray): |
| 197 | + if not copy: |
| 198 | + raise ValueError("Masked arrays must always be copied.") |
| 199 | + data = data.astype('f').filled(np.nan) |
| 200 | + elif copy: |
| 201 | + data = data.copy() |
| 202 | + |
| 203 | + index = columns = None |
| 204 | + if cube.coords(dimensions=[0]): |
| 205 | + index = _as_pandas_coord(cube.coord(dimensions=[0])) |
| 206 | + if cube.coords(dimensions=[1]): |
| 207 | + columns = _as_pandas_coord(cube.coord(dimensions=[1])) |
| 208 | + |
| 209 | + data_frame = pandas.DataFrame(data, index, columns) |
| 210 | + if not copy: |
| 211 | + _assert_shared(data, data_frame) |
| 212 | + |
| 213 | + return data_frame |
0 commit comments