Skip to content

Commit

Permalink
BUG: Fix concat series loss of timezone (pandas-dev#24027)
Browse files Browse the repository at this point in the history
  • Loading branch information
evangelinehl authored and jreback committed Dec 5, 2018
1 parent 4ae63aa commit b841374
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 10 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1554,6 +1554,7 @@ Reshaping
- Bug in :meth:`DataFrame.append` with a :class:`Series` with a dateutil timezone would raise a ``TypeError`` (:issue:`23682`)
- Bug in ``Series`` construction when passing no data and ``dtype=str`` (:issue:`22477`)
- Bug in :func:`cut` with ``bins`` as an overlapping ``IntervalIndex`` where multiple bins were returned per item instead of raising a ``ValueError`` (:issue:`23980`)
- Bug in :func:`pandas.concat` when joining ``Series`` datetimetz with ``Series`` category would lose timezone (:issue:`23816`)
- Bug in :meth:`DataFrame.join` when joining on partial MultiIndex would drop names (:issue:`20452`).

.. _whatsnew_0240.bug_fixes.sparse:
Expand Down
18 changes: 8 additions & 10 deletions pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,15 +191,6 @@ def _concat_categorical(to_concat, axis=0):
A single array, preserving the combined dtypes
"""

def _concat_asobject(to_concat):
to_concat = [x.get_values() if is_categorical_dtype(x.dtype)
else np.asarray(x).ravel() for x in to_concat]
res = _concat_compat(to_concat)
if axis == 1:
return res.reshape(1, len(res))
else:
return res

# we could have object blocks and categoricals here
# if we only have a single categoricals then combine everything
# else its a non-compat categorical
Expand All @@ -214,7 +205,14 @@ def _concat_asobject(to_concat):
if all(first.is_dtype_equal(other) for other in to_concat[1:]):
return union_categoricals(categoricals)

return _concat_asobject(to_concat)
# extract the categoricals & coerce to object if needed
to_concat = [x.get_values() if is_categorical_dtype(x.dtype)
else np.asarray(x).ravel() if not is_datetime64tz_dtype(x)
else np.asarray(x.astype(object)) for x in to_concat]
result = _concat_compat(to_concat)
if axis == 1:
result = result.reshape(1, len(result))
return result


def union_categoricals(to_union, sort_categories=False, ignore_order=False):
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/reshape/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2552,3 +2552,16 @@ def test_concat_series_name_npscalar_tuple(s1name, s2name):
result = pd.concat([s1, s2])
expected = pd.Series({'a': 1, 'b': 2, 'c': 5, 'd': 6})
tm.assert_series_equal(result, expected)


def test_concat_categorical_tz():
# GH-23816
a = pd.Series(pd.date_range('2017-01-01', periods=2, tz='US/Pacific'))
b = pd.Series(['a', 'b'], dtype='category')
result = pd.concat([a, b], ignore_index=True)
expected = pd.Series([
pd.Timestamp('2017-01-01', tz="US/Pacific"),
pd.Timestamp('2017-01-02', tz="US/Pacific"),
'a', 'b'
])
tm.assert_series_equal(result, expected)

0 comments on commit b841374

Please sign in to comment.