Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TST: split up test_concat.py #37243 - more follows up #37387

Merged
merged 13 commits into from
Oct 28, 2020
Merged
Empty file.
7 changes: 7 additions & 0 deletions pandas/tests/reshape/concat/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import pytest


@pytest.fixture(params=[True, False])
def sort(request):
"""Boolean sort keyword for concat and DataFrame.append."""
return request.param
6 changes: 0 additions & 6 deletions pandas/tests/reshape/concat/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,6 @@
import pandas._testing as tm


@pytest.fixture(params=[True, False])
def sort(request):
"""Boolean sort keyword for concat and DataFrame.append."""
return request.param


class TestAppend:
def test_append(self, sort, float_frame):
mixed_frame = float_frame.copy()
Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/reshape/concat/test_append_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,3 +725,26 @@ def test_concat_categorical_empty(self):

tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)

def test_categorical_concat_append(self):
cat = Categorical(["a", "b"], categories=["a", "b"])
vals = [1, 2]
df = DataFrame({"cats": cat, "vals": vals})
cat2 = Categorical(["a", "b", "a", "b"], categories=["a", "b"])
vals2 = [1, 2, 1, 2]
exp = DataFrame({"cats": cat2, "vals": vals2}, index=Index([0, 1, 0, 1]))

tm.assert_frame_equal(pd.concat([df, df]), exp)
tm.assert_frame_equal(df.append(df), exp)

# GH 13524 can concat different categories
cat3 = Categorical(["a", "b"], categories=["a", "b", "c"])
vals3 = [1, 2]
df_different_categories = DataFrame({"cats": cat3, "vals": vals3})

res = pd.concat([df, df_different_categories], ignore_index=True)
exp = DataFrame({"cats": list("abab"), "vals": [1, 2, 1, 2]})
tm.assert_frame_equal(res, exp)

res = df.append(df_different_categories, ignore_index=True)
tm.assert_frame_equal(res, exp)
195 changes: 195 additions & 0 deletions pandas/tests/reshape/concat/test_categorical.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
import numpy as np
import pytest

from pandas.core.dtypes.dtypes import CategoricalDtype

import pandas as pd
from pandas import Categorical, DataFrame, Series
import pandas._testing as tm


class TestCategoricalConcat:
def test_categorical_concat(self, sort):
# See GH 10177
df1 = DataFrame(
np.arange(18, dtype="int64").reshape(6, 3), columns=["a", "b", "c"]
)

df2 = DataFrame(np.arange(14, dtype="int64").reshape(7, 2), columns=["a", "c"])

cat_values = ["one", "one", "two", "one", "two", "two", "one"]
df2["h"] = Series(Categorical(cat_values))

res = pd.concat((df1, df2), axis=0, ignore_index=True, sort=sort)
exp = DataFrame(
{
"a": [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12],
"b": [
1,
4,
7,
10,
13,
16,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
],
"c": [2, 5, 8, 11, 14, 17, 1, 3, 5, 7, 9, 11, 13],
"h": [None] * 6 + cat_values,
}
)
tm.assert_frame_equal(res, exp)

def test_categorical_concat_dtypes(self):

# GH8143
index = ["cat", "obj", "num"]
cat = Categorical(["a", "b", "c"])
obj = Series(["a", "b", "c"])
num = Series([1, 2, 3])
df = pd.concat([Series(cat), obj, num], axis=1, keys=index)

result = df.dtypes == "object"
expected = Series([False, True, False], index=index)
tm.assert_series_equal(result, expected)

result = df.dtypes == "int64"
expected = Series([False, False, True], index=index)
tm.assert_series_equal(result, expected)

result = df.dtypes == "category"
expected = Series([True, False, False], index=index)
tm.assert_series_equal(result, expected)

def test_concat_categoricalindex(self):
# GH 16111, categories that aren't lexsorted
categories = [9, 0, 1, 2, 3]

a = Series(1, index=pd.CategoricalIndex([9, 0], categories=categories))
b = Series(2, index=pd.CategoricalIndex([0, 1], categories=categories))
c = Series(3, index=pd.CategoricalIndex([1, 2], categories=categories))

result = pd.concat([a, b, c], axis=1)

exp_idx = pd.CategoricalIndex([9, 0, 1, 2], categories=categories)
exp = DataFrame(
{
0: [1, 1, np.nan, np.nan],
1: [np.nan, 2, 2, np.nan],
2: [np.nan, np.nan, 3, 3],
},
columns=[0, 1, 2],
index=exp_idx,
)
tm.assert_frame_equal(result, exp)

def test_categorical_concat_preserve(self):

# GH 8641 series concat not preserving category dtype
# GH 13524 can concat different categories
s = Series(list("abc"), dtype="category")
s2 = Series(list("abd"), dtype="category")

exp = Series(list("abcabd"))
res = pd.concat([s, s2], ignore_index=True)
tm.assert_series_equal(res, exp)

exp = Series(list("abcabc"), dtype="category")
res = pd.concat([s, s], ignore_index=True)
tm.assert_series_equal(res, exp)

exp = Series(list("abcabc"), index=[0, 1, 2, 0, 1, 2], dtype="category")
res = pd.concat([s, s])
tm.assert_series_equal(res, exp)

a = Series(np.arange(6, dtype="int64"))
b = Series(list("aabbca"))

df2 = DataFrame({"A": a, "B": b.astype(CategoricalDtype(list("cab")))})
res = pd.concat([df2, df2])
exp = DataFrame(
{
"A": pd.concat([a, a]),
"B": pd.concat([b, b]).astype(CategoricalDtype(list("cab"))),
}
)
tm.assert_frame_equal(res, exp)

def test_categorical_index_preserver(self):

a = Series(np.arange(6, dtype="int64"))
b = Series(list("aabbca"))

df2 = DataFrame(
{"A": a, "B": b.astype(CategoricalDtype(list("cab")))}
).set_index("B")
result = pd.concat([df2, df2])
expected = DataFrame(
{
"A": pd.concat([a, a]),
"B": pd.concat([b, b]).astype(CategoricalDtype(list("cab"))),
}
).set_index("B")
tm.assert_frame_equal(result, expected)

# wrong categories
df3 = DataFrame(
{"A": a, "B": Categorical(b, categories=list("abe"))}
).set_index("B")
msg = "categories must match existing categories when appending"
with pytest.raises(TypeError, match=msg):
pd.concat([df2, df3])

def test_concat_categorical_tz(self):
# GH-23816
a = Series(pd.date_range("2017-01-01", periods=2, tz="US/Pacific"))
b = Series(["a", "b"], dtype="category")
result = pd.concat([a, b], ignore_index=True)
expected = Series(
[
pd.Timestamp("2017-01-01", tz="US/Pacific"),
pd.Timestamp("2017-01-02", tz="US/Pacific"),
"a",
"b",
]
)
tm.assert_series_equal(result, expected)

def test_concat_categorical_unchanged(self):
# GH-12007
# test fix for when concat on categorical and float
# coerces dtype categorical -> float
df = DataFrame(Series(["a", "b", "c"], dtype="category", name="A"))
ser = Series([0, 1, 2], index=[0, 1, 3], name="B")
result = pd.concat([df, ser], axis=1)
expected = DataFrame(
{
"A": Series(["a", "b", "c", np.nan], dtype="category"),
"B": Series([0, 1, np.nan, 2], dtype="float"),
}
)
tm.assert_equal(result, expected)

def test_categorical_concat_gh7864(self):
# GH 7864
# make sure ordering is preserved
df = DataFrame({"id": [1, 2, 3, 4, 5, 6], "raw_grade": list("abbaae")})
df["grade"] = Categorical(df["raw_grade"])
df["grade"].cat.set_categories(["e", "a", "b"])

df1 = df[0:3]
df2 = df[3:]

tm.assert_index_equal(df["grade"].cat.categories, df1["grade"].cat.categories)
tm.assert_index_equal(df["grade"].cat.categories, df2["grade"].cat.categories)

dfx = pd.concat([df1, df2])
tm.assert_index_equal(df["grade"].cat.categories, dfx["grade"].cat.categories)

dfa = df1.append(df2)
tm.assert_index_equal(df["grade"].cat.categories, dfa["grade"].cat.categories)
Loading