-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
TST: refactored test_factorize #32311
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e419141
cdca771
1b4825b
9c22a50
d26d0fa
76d73cc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -533,66 +533,27 @@ def test_value_counts_datetime64(self, index_or_series): | |
result2 = td2.value_counts() | ||
tm.assert_series_equal(result2, expected_s) | ||
|
||
def test_factorize(self): | ||
for orig in self.objs: | ||
o = orig.copy() | ||
|
||
if isinstance(o, Index) and o.is_boolean(): | ||
exp_arr = np.array([0, 1] + [0] * 8, dtype=np.intp) | ||
exp_uniques = o | ||
exp_uniques = Index([False, True]) | ||
else: | ||
exp_arr = np.array(range(len(o)), dtype=np.intp) | ||
exp_uniques = o | ||
codes, uniques = o.factorize() | ||
|
||
tm.assert_numpy_array_equal(codes, exp_arr) | ||
if isinstance(o, Series): | ||
tm.assert_index_equal(uniques, Index(orig), check_names=False) | ||
else: | ||
# factorize explicitly resets name | ||
tm.assert_index_equal(uniques, exp_uniques, check_names=False) | ||
|
||
def test_factorize_repeated(self): | ||
for orig in self.objs: | ||
o = orig.copy() | ||
@pytest.mark.parametrize("sort", [True, False]) | ||
def test_factorize(self, index_or_series_obj, sort): | ||
obj = index_or_series_obj | ||
result_codes, result_uniques = obj.factorize(sort=sort) | ||
|
||
# don't test boolean | ||
if isinstance(o, Index) and o.is_boolean(): | ||
continue | ||
constructor = pd.Index | ||
if isinstance(obj, pd.MultiIndex): | ||
constructor = pd.MultiIndex.from_tuples | ||
expected_uniques = constructor(obj.unique()) | ||
|
||
# sort by value, and create duplicates | ||
if isinstance(o, Series): | ||
o = o.sort_values() | ||
n = o.iloc[5:].append(o) | ||
else: | ||
indexer = o.argsort() | ||
o = o.take(indexer) | ||
n = o[5:].append(o) | ||
|
||
exp_arr = np.array( | ||
[5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.intp | ||
) | ||
codes, uniques = n.factorize(sort=True) | ||
|
||
tm.assert_numpy_array_equal(codes, exp_arr) | ||
if isinstance(o, Series): | ||
tm.assert_index_equal( | ||
uniques, Index(orig).sort_values(), check_names=False | ||
) | ||
else: | ||
tm.assert_index_equal(uniques, o, check_names=False) | ||
if sort: | ||
expected_uniques = expected_uniques.sort_values() | ||
|
||
exp_arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4], np.intp) | ||
codes, uniques = n.factorize(sort=False) | ||
tm.assert_numpy_array_equal(codes, exp_arr) | ||
# construct an integer ndarray so that | ||
# `expected_uniques.take(expected_codes)` is equal to `obj` | ||
expected_uniques_list = list(expected_uniques) | ||
expected_codes = [expected_uniques_list.index(val) for val in obj] | ||
expected_codes = np.asarray(expected_codes, dtype=np.intp) | ||
Comment on lines
+552
to
+553
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you not just use np.take here instead? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can use expected_uniques.take is better There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think I can use
I could only use I guess I could use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can just use In [7]: import pandas as pd
In [8]: obj = pd.Series([1, 2, 1, 3, 5])
In [10]: pd.factorize(obj)
Out[10]: (array([0, 1, 0, 2, 3]), Int64Index([1, 2, 3, 5], dtype='int64')) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm testing factorize here, so I need an alternative implementation 😄 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok you can actually use |
||
|
||
if isinstance(o, Series): | ||
expected = Index(o.iloc[5:10].append(o.iloc[:5])) | ||
tm.assert_index_equal(uniques, expected, check_names=False) | ||
else: | ||
expected = o[5:10].append(o[:5]) | ||
tm.assert_index_equal(uniques, expected, check_names=False) | ||
tm.assert_numpy_array_equal(result_codes, expected_codes) | ||
tm.assert_index_equal(result_uniques, expected_uniques) | ||
|
||
def test_duplicated_drop_duplicates_index(self): | ||
# GH 4060 | ||
|
Uh oh!
There was an error while loading. Please reload this page.