Skip to content

Commit

Permalink
TST / string dtype: clean-up xpasssing tests with future string dtype
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche committed Jul 26, 2024
1 parent 7c96801 commit b3b65a3
Show file tree
Hide file tree
Showing 13 changed files with 16 additions and 36 deletions.
3 changes: 0 additions & 3 deletions pandas/tests/arithmetic/test_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas.util._test_decorators as td

import pandas as pd
Expand Down Expand Up @@ -303,7 +301,6 @@ def test_iadd_string(self):
index += "_x"
assert "a_x" in index

@pytest.mark.xfail(using_string_dtype(), reason="add doesn't work")
def test_add(self):
index = pd.Index([str(i) for i in range(10)])
expected = pd.Index(index.values * 2)
Expand Down
5 changes: 1 addition & 4 deletions pandas/tests/base/test_unique.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
import pandas._testing as tm
from pandas.tests.base.common import allow_na_ops
Expand Down Expand Up @@ -100,12 +98,11 @@ def test_nunique_null(null_obj, index_or_series_obj):


@pytest.mark.single_cpu
@pytest.mark.xfail(using_string_dtype(), reason="decoding fails")
def test_unique_bad_unicode(index_or_series):
# regression test for #34550
uval = "\ud83d" # smiley emoji

obj = index_or_series([uval] * 2)
obj = index_or_series([uval] * 2, dtype=object)
result = obj.unique()

if isinstance(obj, pd.Index):
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/frame/constructors/test_from_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_constructor_single_row(self):
)
tm.assert_frame_equal(result, expected)

@pytest.mark.skipif(using_string_dtype(), reason="columns inferring logic broken")
@pytest.mark.xfail(using_string_dtype(), reason="columns inferring logic broken")
def test_constructor_list_of_series(self):
data = [
OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),
Expand Down Expand Up @@ -108,6 +108,7 @@ def test_constructor_list_of_series(self):
expected = DataFrame.from_dict(sdict, orient="index")
tm.assert_frame_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="columns inferring logic broken")
def test_constructor_orient(self, float_string_frame):
data_dict = float_string_frame.T._series
recons = DataFrame.from_dict(data_dict, orient="index")
Expand Down
4 changes: 1 addition & 3 deletions pandas/tests/frame/constructors/test_from_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,7 @@ def test_from_records_with_datetimes(self):
expected["EXPIRY"] = expected["EXPIRY"].astype("M8[s]")
tm.assert_frame_equal(result, expected)

@pytest.mark.skipif(
using_string_dtype(), reason="dtype checking logic doesn't work"
)
@pytest.mark.xfail(using_string_dtype(), reason="dtype checking logic doesn't work")
def test_from_records_sequencelike(self):
df = DataFrame(
{
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/frame/methods/test_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def test_fillna_datetime(self, datetime_frame):
with pytest.raises(TypeError, match=msg):
datetime_frame.fillna()

# TODO(infer_string) test as actual error instead of xfail
@pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string")
def test_fillna_mixed_type(self, float_string_frame):
mf = float_string_frame
Expand Down Expand Up @@ -537,6 +538,7 @@ def test_fillna_col_reordering(self):
filled = df.ffill()
assert df.columns.tolist() == filled.columns.tolist()

# TODO(infer_string) test as actual error instead of xfail
@pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string")
def test_fill_corner(self, float_frame, float_string_frame):
mf = float_string_frame
Expand Down
11 changes: 6 additions & 5 deletions pandas/tests/frame/methods/test_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from pandas import (
CategoricalIndex,
DataFrame,
Index,
MultiIndex,
Series,
date_range,
Expand Down Expand Up @@ -360,7 +361,7 @@ def test_info_memory_usage():
df = DataFrame(data)
df.columns = dtypes

df_with_object_index = DataFrame({"a": [1]}, index=["foo"])
df_with_object_index = DataFrame({"a": [1]}, index=Index(["foo"], dtype=object))
df_with_object_index.info(buf=buf, memory_usage=True)
res = buf.getvalue().splitlines()
assert re.match(r"memory usage: [^+]+\+", res[-1])
Expand Down Expand Up @@ -398,25 +399,25 @@ def test_info_memory_usage():

@pytest.mark.skipif(PYPY, reason="on PyPy deep=True doesn't change result")
def test_info_memory_usage_deep_not_pypy():
df_with_object_index = DataFrame({"a": [1]}, index=["foo"])
df_with_object_index = DataFrame({"a": [1]}, index=Index(["foo"], dtype=object))
assert (
df_with_object_index.memory_usage(index=True, deep=True).sum()
> df_with_object_index.memory_usage(index=True).sum()
)

df_object = DataFrame({"a": ["a"]})
df_object = DataFrame({"a": Series(["a"], dtype=object)})
assert df_object.memory_usage(deep=True).sum() > df_object.memory_usage().sum()


@pytest.mark.xfail(not PYPY, reason="on PyPy deep=True does not change result")
def test_info_memory_usage_deep_pypy():
df_with_object_index = DataFrame({"a": [1]}, index=["foo"])
df_with_object_index = DataFrame({"a": [1]}, index=Index(["foo"], dtype=object))
assert (
df_with_object_index.memory_usage(index=True, deep=True).sum()
== df_with_object_index.memory_usage(index=True).sum()
)

df_object = DataFrame({"a": ["a"]})
df_object = DataFrame({"a": Series(["a"], dtype=object)})
assert df_object.memory_usage(deep=True).sum() == df_object.memory_usage().sum()


Expand Down
1 change: 1 addition & 0 deletions pandas/tests/frame/methods/test_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def test_interpolate_inplace(self, frame_or_series, request):
assert np.shares_memory(orig, obj.values)
assert orig.squeeze()[1] == 1.5

# TODO(infer_string) raise proper TypeError in case of string dtype
@pytest.mark.xfail(
using_string_dtype(), reason="interpolate doesn't work for string"
)
Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -251,7 +249,6 @@ def test_timestamp_compare(self, left, right):
with pytest.raises(TypeError, match=msg):
right_f(pd.Timestamp("nat"), df)

@pytest.mark.xfail(using_string_dtype(), reason="can't compare string and int")
def test_mixed_comparison(self):
# GH#13128, GH#22163 != datetime64 vs non-dt64 should be False,
# not raise TypeError
Expand Down
7 changes: 2 additions & 5 deletions pandas/tests/indexes/interval/test_formats.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas import (
DataFrame,
DatetimeIndex,
Expand Down Expand Up @@ -42,12 +40,11 @@ def test_repr_missing(self, constructor, expected, using_infer_string, request):
result = repr(obj)
assert result == expected

@pytest.mark.xfail(using_string_dtype(), reason="repr different")
def test_repr_floats(self):
# GH 32553

markers = Series(
["foo", "bar"],
[1, 2],
index=IntervalIndex(
[
Interval(left, right)
Expand All @@ -59,7 +56,7 @@ def test_repr_floats(self):
),
)
result = str(markers)
expected = "(329.973, 345.137] foo\n(345.137, 360.191] bar\ndtype: object"
expected = "(329.973, 345.137] 1\n(345.137, 360.191] 2\ndtype: int64"
assert result == expected

@pytest.mark.parametrize(
Expand Down
4 changes: 0 additions & 4 deletions pandas/tests/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import (
IS64,
is_platform_windows,
Expand Down Expand Up @@ -825,8 +823,6 @@ def replacer(self, how, from_key, to_key):
raise ValueError
return replacer

# Expected needs adjustment for the infer string option, seems to work as expecetd
@pytest.mark.skipif(using_string_dtype(), reason="TODO: test is to complex")
def test_replace_series(self, how, to_key, from_key, replacer):
index = pd.Index([3, 4], name="xxx")
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
Expand Down
4 changes: 0 additions & 4 deletions pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.errors import IndexingError

from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -426,7 +424,6 @@ def test_set_index_nan(self):
)
tm.assert_frame_equal(result, df)

@pytest.mark.xfail(using_string_dtype(), reason="can't multiply arrow strings")
def test_multi_assign(self):
# GH 3626, an assignment of a sub-df to a df
# set float64 to avoid upcast when setting nan
Expand Down Expand Up @@ -652,7 +649,6 @@ def test_loc_setitem_fullindex_views(self):
df.loc[df.index] = df.loc[df.index]
tm.assert_frame_equal(df, df2)

@pytest.mark.xfail(using_string_dtype(), reason="can't set int into string")
def test_rhs_alignment(self):
# GH8258, tests that both rows & columns are aligned to what is
# assigned to. covers both uniform data-type & multi-type cases
Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/series/methods/test_reindex.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas import (
NA,
Categorical,
Expand All @@ -22,7 +20,6 @@
import pandas._testing as tm


@pytest.mark.xfail(using_string_dtype(), reason="share memory doesn't work for arrow")
def test_reindex(datetime_series, string_series):
identity = string_series.reindex(string_series.index)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/test_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def test_tidy_repr_name_0(self, arg):
assert "Name: 0" in rep_str

@pytest.mark.xfail(
using_string_dtype(), reason="TODO: investigate why this is failing"
using_string_dtype(), reason="TODO(infer_string): investigate failure"
)
def test_newline(self):
ser = Series(["a\n\r\tb"], name="a\n\r\td", index=["a\n\r\tf"])
Expand Down

0 comments on commit b3b65a3

Please sign in to comment.