Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SorrTask1075_Unit_test_hpandas_dassert_valid_remap #1080

Merged
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions helpers/hpandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ def dassert_valid_remap(to_remap: List[str], remap_dict: Dict[str, str]) -> None
to_remap,
"Keys to remap should be a subset of existing columns",
)
# TODO(Samarth): Function does not work for dict keys.
# The mapping is invertible.
hdbg.dassert_no_duplicates(remap_dict.keys())
hdbg.dassert_no_duplicates(remap_dict.values())
Expand Down
140 changes: 122 additions & 18 deletions helpers/test/test_hpandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

_AWS_PROFILE = "ck"


class Test_dassert_is_unique1(hunitest.TestCase):
def get_df1(self) -> pd.DataFrame:
"""
Expand Down Expand Up @@ -134,6 +135,109 @@ def test3(self) -> None:
# #############################################################################


class Test_dassert_valid_remap(hunitest.TestCase):
def test1(self) -> None:
"""
A simple test to check that the function works.
"""
# Set inputs.
to_remap = ["dummy_value_1", "dummy_value_2", "dummy_value_3"]
remap_dict = {
"dummy_value_1": "1, 2, 3",
"dummy_value_2": "A, B, C",
}
# Check.
hpandas.dassert_valid_remap(to_remap, remap_dict)

def test2(self) -> None:
"""
Check that an assert is raised if dictionary keys are not a subset.
"""
# Set inputs.
to_remap = ["dummy_value_1", "dummy_value_2"]
remap_dict = {
"dummy_value_1": "1, 2, 3",
"dummy_value_2": "A, B, C",
"dummy_value_3": "A1, A2, A3",
}
# Run
with self.assertRaises(AssertionError) as cm:
hpandas.dassert_valid_remap(to_remap, remap_dict)
actual = str(cm.exception)
expected = r"""
* Failed assertion *
val1=['dummy_value_1', 'dummy_value_2', 'dummy_value_3']
issubset
val2=['dummy_value_1', 'dummy_value_2']
val1 - val2=['dummy_value_3']
Keys to remap should be a subset of existing columns"""
# Check.
self.assert_equal(actual, expected, fuzzy_match=True)

def test3(self) -> None:
"""
Check if duplicate values create error.
"""
# Set inputs.
to_remap = ["dummy_value_1", "dummy_value_2", "dummy_value_3"]
remap_dict = {
"dummy_value_1": "1, 2, 3",
"dummy_value_2": "A, B, C",
"dummy_value_3": "1, 2, 3",
}
# Run
with self.assertRaises(AttributeError) as cm:
hpandas.dassert_valid_remap(to_remap, remap_dict)
actual = str(cm.exception)
expected = r"""
'dict_values' object has no attribute 'count'"""
# Check.
self.assert_equal(actual, expected, fuzzy_match=True)

def test4(self) -> None:
"""
Check if an error is raised when the instance is not a list.
"""
# Set inputs.
to_remap = {"dummy_value_1"}
remap_dict = {
"dummy_value_1": "1, 2, 3",
}
# Run
with self.assertRaises(AssertionError) as cm:
hpandas.dassert_valid_remap(to_remap, remap_dict)
actual = str(cm.exception)
expected = r"""
* Failed assertion *
Instance of '{'dummy_value_1'}' is '<class 'set'>' instead of '<class 'list'>'
"""
# Check.
self.assert_equal(actual, expected, fuzzy_match=True)

def test5(self) -> None:
"""
Check if an error is raised when the instance is not a dictionary.
"""
# Set inputs.
to_remap = ["dummy_value_1"]
remap_dict = [
"dummy_value_1 : 1, 2, 3",
]
# Run
with self.assertRaises(AssertionError) as cm:
hpandas.dassert_valid_remap(to_remap, remap_dict)
actual = str(cm.exception)
expected = r"""
* Failed assertion *
Instance of '['dummy_value_1 : 1, 2, 3']' is '<class 'list'>' instead of '<class 'dict'>'
"""
# Check.
self.assert_equal(actual, expected, fuzzy_match=True)


# #############################################################################


class Test_trim_df1(hunitest.TestCase):
def get_df(self, *args: Any, **kwargs: Any) -> pd.DataFrame:
"""
Expand Down Expand Up @@ -3959,29 +4063,29 @@ def get_multiindex_df(
index_is_datetime: bool,
) -> pd.DataFrame:
"""
Helper function to get test multi-index dataframe.
Helper function to get test multi-index dataframe.
Example of dataframe returned when `index_is_datetime = True`:

Example of dataframe returned when `index_is_datetime = True`:
```

```
column1 column2
index timestamp
index1 2022-01-01 21:00:00+00:00 -0.122140 -1.949431
2022-01-01 21:10:00+00:00 1.303778 -0.288235
index2 2022-01-01 21:00:00+00:00 1.237079 1.168012
2022-01-01 21:10:00+00:00 1.333692 1.708455
```
column1 column2
index timestamp
index1 2022-01-01 21:00:00+00:00 -0.122140 -1.949431
2022-01-01 21:10:00+00:00 1.303778 -0.288235
index2 2022-01-01 21:00:00+00:00 1.237079 1.168012
2022-01-01 21:10:00+00:00 1.333692 1.708455
```

Example of dataframe returned when `index_is_datetime = False`:
Example of dataframe returned when `index_is_datetime = False`:

```
column1 column2
index timestamp
index1 string1 -0.122140 -1.949431
string2 1.303778 -0.288235
index2 string1 1.237079 1.168012
string2 1.333692 1.708455
```
column1 column2
index timestamp
index1 string1 -0.122140 -1.949431
string2 1.303778 -0.288235
index2 string1 1.237079 1.168012
string2 1.333692 1.708455
```
"""
if index_is_datetime:
index_inner = [
Expand Down
Loading