From 05cd38f501e14624529ff0496c9df9dc07062fc3 Mon Sep 17 00:00:00 2001 From: Albert Shieh Date: Tue, 3 Apr 2018 15:12:26 +0000 Subject: [PATCH 1/4] HDFStore timezone handling for Series and empty DataFrames --- pandas/io/pytables.py | 44 +++++++++++++++----------------- pandas/tests/io/test_pytables.py | 12 ++++++++- 2 files changed, 32 insertions(+), 24 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index f9a496edb45a3..4d83caaa6dc94 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2695,30 +2695,24 @@ def write_array(self, key, value, items=None): _tables().ObjectAtom()) vlarr.append(value) else: - if empty_array: + if is_datetime64_dtype(value.dtype): + self._handle.create_array(self.group, key, value.view('i8')) + getattr(self.group, key)._v_attrs.value_type = 'datetime64' + elif is_datetime64tz_dtype(value.dtype): + # store as UTC + # with a zone + self._handle.create_array(self.group, key, value.asi8) + + node = getattr(self.group, key) + node._v_attrs.tz = _get_tz(value.tz) + node._v_attrs.value_type = 'datetime64' + elif is_timedelta64_dtype(value.dtype): + self._handle.create_array(self.group, key, value.view('i8')) + getattr(self.group, key)._v_attrs.value_type = 'timedelta64' + elif empty_array: self.write_array_empty(key, value) else: - if is_datetime64_dtype(value.dtype): - self._handle.create_array( - self.group, key, value.view('i8')) - getattr( - self.group, key)._v_attrs.value_type = 'datetime64' - elif is_datetime64tz_dtype(value.dtype): - # store as UTC - # with a zone - self._handle.create_array(self.group, key, - value.asi8) - - node = getattr(self.group, key) - node._v_attrs.tz = _get_tz(value.tz) - node._v_attrs.value_type = 'datetime64' - elif is_timedelta64_dtype(value.dtype): - self._handle.create_array( - self.group, key, value.view('i8')) - getattr( - self.group, key)._v_attrs.value_type = 'timedelta64' - else: - self._handle.create_array(self.group, key, value) + self._handle.create_array(self.group, key, value) getattr(self.group, key)._v_attrs.transposed = transposed @@ -2771,7 +2765,11 @@ def read(self, **kwargs): def write(self, obj, **kwargs): super(SeriesFixed, self).write(obj, **kwargs) self.write_index('index', obj.index) - self.write_array('values', obj.values) + if is_datetime64tz_dtype(obj.dtype): + values = obj._data.blocks[0].values + else: + values = obj.values + self.write_array('values', values) self.attrs.name = obj.name diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index b34723d6cf72c..3272985581a1d 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -2793,10 +2793,20 @@ def test_empty_series_frame(self): self._check_roundtrip(df2, tm.assert_frame_equal) def test_empty_series(self): - for dtype in [np.int64, np.float64, np.object, 'm8[ns]', 'M8[ns]']: + for dtype in [np.int64, np.float64, np.object, 'm8[ns]', 'M8[ns]', + 'datetime64[ns, UTC]']: s = Series(dtype=dtype) self._check_roundtrip(s, tm.assert_series_equal) + def test_series_timezone(self): + s = Series([0], dtype='datetime64[ns, UTC]') + self._check_roundtrip(s, tm.assert_series_equal) + + def test_empty_frame_timezone(self): + s = Series(dtype='datetime64[ns, UTC]') + df = DataFrame({'A': s}) + self._check_roundtrip(df, tm.assert_frame_equal) + def test_can_serialize_dates(self): rng = [x.date() for x in bdate_range('1/1/2000', '1/30/2000')] From fc7753e7358015326d85dc26534bf362956aaf26 Mon Sep 17 00:00:00 2001 From: Albert Shieh Date: Tue, 3 Apr 2018 15:36:21 +0000 Subject: [PATCH 2/4] Whatsnew entry --- doc/source/whatsnew/v0.23.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index fb63dc16249b2..67867c9c3f4ca 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -1099,6 +1099,7 @@ I/O - Bug in :meth:`pandas.io.json.json_normalize` where subrecords are not properly normalized if any subrecords values are NoneType (:issue:`20030`) - Bug in ``usecols`` parameter in :func:`pandas.io.read_csv` and :func:`pandas.io.read_table` where error is not raised correctly when passing a string. (:issue:`20529`) - Bug in :func:`HDFStore.keys` when reading a file with a softlink causes exception (:issue:`20523`) +- Bug in :class:`HDFStore` for Series and empty DataFrames with timezone-aware data (:issue:`20594`) Plotting ^^^^^^^^ From 9394dcf95c60afe10d396162a3daa392b2594358 Mon Sep 17 00:00:00 2001 From: Albert Shieh Date: Tue, 3 Apr 2018 17:31:42 +0000 Subject: [PATCH 3/4] Parametrize test on dtype --- pandas/tests/io/test_pytables.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 3272985581a1d..4668a00f6f87b 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -2792,11 +2792,13 @@ def test_empty_series_frame(self): self._check_roundtrip(df1, tm.assert_frame_equal) self._check_roundtrip(df2, tm.assert_frame_equal) - def test_empty_series(self): - for dtype in [np.int64, np.float64, np.object, 'm8[ns]', 'M8[ns]', - 'datetime64[ns, UTC]']: - s = Series(dtype=dtype) - self._check_roundtrip(s, tm.assert_series_equal) + @pytest.mark.parametrize('dtype', [ + np.int64, np.float64, np.object, 'm8[ns]', 'M8[ns]', + 'datetime64[ns, UTC]' + ]) + def test_empty_series(self, dtype): + s = Series(dtype=dtype) + self._check_roundtrip(s, tm.assert_series_equal) def test_series_timezone(self): s = Series([0], dtype='datetime64[ns, UTC]') From b0c8f8264acb53a05a91bdf4d414ea8566dfd9cc Mon Sep 17 00:00:00 2001 From: Albert Shieh Date: Thu, 5 Apr 2018 20:09:51 +0000 Subject: [PATCH 4/4] Use accessor, add tests, and fix whatsnew entry --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/io/pytables.py | 2 +- pandas/tests/io/test_pytables.py | 16 +++++++++++----- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 67867c9c3f4ca..661402e4f2f27 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -1099,7 +1099,7 @@ I/O - Bug in :meth:`pandas.io.json.json_normalize` where subrecords are not properly normalized if any subrecords values are NoneType (:issue:`20030`) - Bug in ``usecols`` parameter in :func:`pandas.io.read_csv` and :func:`pandas.io.read_table` where error is not raised correctly when passing a string. (:issue:`20529`) - Bug in :func:`HDFStore.keys` when reading a file with a softlink causes exception (:issue:`20523`) -- Bug in :class:`HDFStore` for Series and empty DataFrames with timezone-aware data (:issue:`20594`) +- Bug in :class:`HDFStore` export of a :class:`Series` or an empty :class:`DataFrame` with timezone-aware data in fixed format (:issue:`20594`) Plotting ^^^^^^^^ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 4d83caaa6dc94..efd2e02e12c8b 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2766,7 +2766,7 @@ def write(self, obj, **kwargs): super(SeriesFixed, self).write(obj, **kwargs) self.write_index('index', obj.index) if is_datetime64tz_dtype(obj.dtype): - values = obj._data.blocks[0].values + values = obj.dt._get_values() else: values = obj.values self.write_array('values', values) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 4668a00f6f87b..4e3816d7be0fd 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -2794,18 +2794,24 @@ def test_empty_series_frame(self): @pytest.mark.parametrize('dtype', [ np.int64, np.float64, np.object, 'm8[ns]', 'M8[ns]', - 'datetime64[ns, UTC]' + 'datetime64[ns, UTC]', 'datetime64[ns, US/Eastern]' ]) def test_empty_series(self, dtype): s = Series(dtype=dtype) self._check_roundtrip(s, tm.assert_series_equal) - def test_series_timezone(self): - s = Series([0], dtype='datetime64[ns, UTC]') + @pytest.mark.parametrize('dtype', [ + 'datetime64[ns, UTC]', 'datetime64[ns, US/Eastern]' + ]) + def test_series_timezone(self, dtype): + s = Series([0], dtype=dtype) self._check_roundtrip(s, tm.assert_series_equal) - def test_empty_frame_timezone(self): - s = Series(dtype='datetime64[ns, UTC]') + @pytest.mark.parametrize('dtype', [ + 'datetime64[ns, UTC]', 'datetime64[ns, US/Eastern]' + ]) + def test_empty_frame_timezone(self, dtype): + s = Series(dtype=dtype) df = DataFrame({'A': s}) self._check_roundtrip(df, tm.assert_frame_equal)