From 88cffbe853a90b9eef522c66292039e29c6e8f17 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Tue, 13 May 2014 21:11:59 +0900 Subject: [PATCH] BUG: tzinfo lost when concatenating multiindex arrays --- doc/source/v0.14.1.txt | 1 + pandas/core/index.py | 8 +++++++ pandas/tests/test_multilevel.py | 41 ++++++++++++++++++++++++++++++++ pandas/tools/tests/test_merge.py | 21 ++++++++++++++++ pandas/tools/tests/test_pivot.py | 35 +++++++++++++++++++++++++++ 5 files changed, 106 insertions(+) diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt index 5e1d237b2b559..a2ea345051afd 100644 --- a/doc/source/v0.14.1.txt +++ b/doc/source/v0.14.1.txt @@ -70,3 +70,4 @@ Bug Fixes - Bug in ``DataFrame`` and ``Series`` bar and barh plot raises ``TypeError`` when ``bottom`` and ``left`` keyword is specified (:issue:`7226`) - BUG in ``DataFrame.hist`` raises ``TypeError`` when it contains non numeric column (:issue:`7277`) +- Bug in ``MultiIndex.append``, ``concat`` and ``pivot_table`` don't preserve timezone (:issue:`6606`) diff --git a/pandas/core/index.py b/pandas/core/index.py index 02d6e983f5183..6eac395b1ecad 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -2948,6 +2948,14 @@ def append(self, other): if not isinstance(other, (list, tuple)): other = [other] + if all((isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) for o in other): + arrays = [] + for i in range(self.nlevels): + label = self.get_level_values(i) + appended = [o.get_level_values(i) for o in other] + arrays.append(label.append(appended)) + return MultiIndex.from_arrays(arrays, names=self.names) + to_concat = (self.values,) + tuple(k.values for k in other) new_tuples = np.concatenate(to_concat) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index d02e52715a735..07b753b6724d8 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1,4 +1,5 @@ # pylint: disable-msg=W0612,E1101,W0141 +import datetime import nose from numpy.random import randn @@ -70,6 +71,46 @@ def test_append(self): result = a['A'].append(b['A']) tm.assert_series_equal(result, self.frame['A']) + def test_append_index(self): + + idx1 = Index([1.1, 1.2, 1.3]) + idx2 = pd.date_range('2011-01-01', freq='D', periods=3, tz='Asia/Tokyo') + idx3 = Index(['A', 'B', 'C']) + + midx_lv2 = MultiIndex.from_arrays([idx1, idx2]) + midx_lv3 = MultiIndex.from_arrays([idx1, idx2, idx3]) + + result = idx1.append(midx_lv2) + expected = Index([1.1, 1.2, 1.3, + (1.1, datetime.datetime(2010, 12, 31, 15, 0)), + (1.2, datetime.datetime(2011, 1, 1, 15, 0)), + (1.3, datetime.datetime(2011, 1, 2, 15, 0))]) + self.assert_(result.equals(expected)) + + result = midx_lv2.append(idx1) + expected = Index([(1.1, datetime.datetime(2010, 12, 31, 15, 0)), + (1.2, datetime.datetime(2011, 1, 1, 15, 0)), + (1.3, datetime.datetime(2011, 1, 2, 15, 0)), + 1.1, 1.2, 1.3]) + self.assert_(result.equals(expected)) + + result = midx_lv2.append(midx_lv2) + expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)]) + self.assert_(result.equals(expected)) + + result = midx_lv2.append(midx_lv3) + self.assert_(result.equals(expected)) + + result = midx_lv3.append(midx_lv2) + expected = Index._simple_new( + np.array([(1.1, datetime.datetime(2010, 12, 31, 15, 0), 'A'), + (1.2, datetime.datetime(2011, 1, 1, 15, 0), 'B'), + (1.3, datetime.datetime(2011, 1, 2, 15, 0), 'C'), + (1.1, datetime.datetime(2010, 12, 31, 15, 0)), + (1.2, datetime.datetime(2011, 1, 1, 15, 0)), + (1.3, datetime.datetime(2011, 1, 2, 15, 0))]), None) + self.assert_(result.equals(expected)) + def test_dataframe_constructor(self): multi = DataFrame(np.random.randn(4, 4), index=[np.array(['a', 'a', 'b', 'b']), diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index c4d11f0c15b39..f2239bba520e7 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -8,6 +8,7 @@ import numpy as np import random +import pandas as pd from pandas.compat import range, lrange, lzip, zip, StringIO from pandas import compat, _np_version_under1p7 from pandas.tseries.index import DatetimeIndex @@ -1497,6 +1498,26 @@ def test_concat_multiindex_with_keys(self): tm.assert_frame_equal(result.ix[1], frame) self.assertEqual(result.index.nlevels, 3) + def test_concat_multiindex_with_tz(self): + # GH 6606 + df = DataFrame({'dt': [datetime(2014, 1, 1), + datetime(2014, 1, 2), + datetime(2014, 1, 3)], + 'b': ['A', 'B', 'C'], + 'c': [1, 2, 3], 'd': [4, 5, 6]}) + df['dt'] = df['dt'].apply(lambda d: pd.Timestamp(d, tz='US/Pacific')) + df = df.set_index(['dt', 'b']) + + exp_idx1 = pd.DatetimeIndex(['2014-01-01', '2014-01-02', '2014-01-03'] * 2, + tz='US/Pacific', name='dt') + exp_idx2 = Index(['A', 'B', 'C'] * 2, name='b') + exp_idx = pd.MultiIndex.from_arrays([exp_idx1, exp_idx2]) + expected = DataFrame({'c': [1, 2, 3] * 2, 'd': [4, 5, 6] * 2}, + index=exp_idx, columns=['c', 'd']) + + result = concat([df, df]) + tm.assert_frame_equal(result, expected) + def test_concat_keys_and_levels(self): df = DataFrame(np.random.randn(1, 3)) df2 = DataFrame(np.random.randn(1, 4)) diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 23320e5b4e3a1..e2f275f8a39d1 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -490,6 +490,41 @@ def test_pivot_timegrouper(self): values='Quantity', aggfunc=np.sum) tm.assert_frame_equal(result, expected.T) + def test_pivot_datetime_tz(self): + dates1 = ['2011-07-19 07:00:00', '2011-07-19 08:00:00', '2011-07-19 09:00:00', + '2011-07-19 07:00:00', '2011-07-19 08:00:00', '2011-07-19 09:00:00'] + dates2 = ['2013-01-01 15:00:00', '2013-01-01 15:00:00', '2013-01-01 15:00:00', + '2013-02-01 15:00:00', '2013-02-01 15:00:00', '2013-02-01 15:00:00'] + df = DataFrame({'label': ['a', 'a', 'a', 'b', 'b', 'b'], + 'dt1': dates1, 'dt2': dates2, + 'value1': range(6), 'value2': [1, 2] * 3}) + df['dt1'] = df['dt1'].apply(lambda d: pd.Timestamp(d, tz='US/Pacific')) + df['dt2'] = df['dt2'].apply(lambda d: pd.Timestamp(d, tz='Asia/Tokyo')) + + exp_idx = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00', + '2011-07-19 09:00:00'], tz='US/Pacific', name='dt1') + exp_col1 = Index(['value1', 'value1']) + exp_col2 = Index(['a', 'b'], name='label') + exp_col = MultiIndex.from_arrays([exp_col1, exp_col2]) + expected = DataFrame([[0, 3], [1, 4], [2, 5]], + index=exp_idx, columns=exp_col) + result = pivot_table(df, index=['dt1'], columns=['label'], values=['value1']) + tm.assert_frame_equal(result, expected) + + + exp_col1 = Index(['sum', 'sum', 'sum', 'sum', 'mean', 'mean', 'mean', 'mean']) + exp_col2 = Index(['value1', 'value1', 'value2', 'value2'] * 2) + exp_col3 = pd.DatetimeIndex(['2013-01-01 15:00:00', '2013-02-01 15:00:00'] * 4, + tz='Asia/Tokyo', name='dt2') + exp_col = MultiIndex.from_arrays([exp_col1, exp_col2, exp_col3]) + expected = DataFrame(np.array([[0, 3, 1, 2, 0, 3, 1, 2], [1, 4, 2, 1, 1, 4, 2, 1], + [2, 5, 1, 2, 2, 5, 1, 2]]), index=exp_idx, columns=exp_col) + + result = pivot_table(df, index=['dt1'], columns=['dt2'], values=['value1', 'value2'], + aggfunc=[np.sum, np.mean]) + tm.assert_frame_equal(result, expected) + + class TestCrosstab(tm.TestCase): def setUp(self):