From d64fd74ba8aa6791f7aed8583578ef562c02c50d Mon Sep 17 00:00:00 2001 From: smitpatel49 Date: Fri, 12 Jul 2024 19:48:04 -0500 Subject: [PATCH 1/4] Unit test dassert_valid_remap --- helpers/test/test_hpandas.py | 107 +++++++++++++++++++++++++++++------ 1 file changed, 90 insertions(+), 17 deletions(-) diff --git a/helpers/test/test_hpandas.py b/helpers/test/test_hpandas.py index 7bc5e7d113..989dca9bee 100644 --- a/helpers/test/test_hpandas.py +++ b/helpers/test/test_hpandas.py @@ -21,6 +21,7 @@ _AWS_PROFILE = "ck" + class Test_dassert_is_unique1(hunitest.TestCase): def get_df1(self) -> pd.DataFrame: """ @@ -134,6 +135,83 @@ def test3(self) -> None: # ############################################################################# +class Test_dassert_valid_remap(hunitest.TestCase): + def test1(self) -> None: + """ + A simple test to check that the function works. + """ + # Set inputs. + to_remap = ["dummy_value_1", "dummy_value_2", "dummy_value_3"] + remap_dict = { + "dummy_value_1": "1, 2, 3", + "dummy_value_2": "A, B, C", + } + # Check. + hpandas.dassert_valid_remap(to_remap, remap_dict) + + def test2(self) -> None: + """ + Check that an assert is raised if dictionary keys are not a subset. + """ + # Set inputs. + to_remap = ["dummy_value_1", "dummy_value_2"] + remap_dict = { + "dummy_value_1": "1, 2, 3", + "dummy_value_2": "A, B, C", + "dummy_value_3": "A1, A2, A3", + } + # Run + with self.assertRaises(AssertionError) as cm: + hpandas.dassert_valid_remap(to_remap, remap_dict) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + val1=['dummy_value_1', 'dummy_value_2', 'dummy_value_3'] + issubset + val2=['dummy_value_1', 'dummy_value_2'] + val1 - val2=['dummy_value_3'] + Keys to remap should be a subset of existing columns""" + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + def test3(self) -> None: + """ + Check if duplicate values create error. + """ + # Set inputs. + to_remap = ["dummy_value_1", "dummy_value_2", "dummy_value_3"] + remap_dict = { + "dummy_value_1": "1, 2, 3", + "dummy_value_2": "A, B, C", + "dummy_value_3": "1, 2, 3", + } + # Run + with self.assertRaises(AttributeError) as cm: + hpandas.dassert_valid_remap(to_remap, remap_dict) + actual = str(cm.exception) + expected = r""" + 'dict_values' object has no attribute 'count'""" + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) + + def test4(self) -> None: + """ + Check if duplicate key entries pass the test. + """ + # Set inputs. + to_remap = ["dummy_value_1", "dummy_value_2", "dummy_value_3"] + remap_dict = { + "dummy_value_1": "1, 2, 3", + "dummy_value_2": "A, B, C", + "dummy_value_1": "A1, B2, C3", + } + # Check. + hpandas.dassert_valid_remap(to_remap, remap_dict) + + +# ############################################################################# + + class Test_trim_df1(hunitest.TestCase): def get_df(self, *args: Any, **kwargs: Any) -> pd.DataFrame: """ @@ -3527,7 +3605,8 @@ def test2(self) -> None: def test3(self) -> None: """ - Check that an assert is raised for a not monotonically increasing index. + Check that an assert is raised for a not monotonically increasing + index. """ # Build test dataframe. idx = [ @@ -3963,25 +4042,19 @@ def get_multiindex_df( Example of dataframe returned when `index_is_datetime = True`: - ``` - column1 column2 - index timestamp - index1 2022-01-01 21:00:00+00:00 -0.122140 -1.949431 - 2022-01-01 21:10:00+00:00 1.303778 -0.288235 - index2 2022-01-01 21:00:00+00:00 1.237079 1.168012 - 2022-01-01 21:10:00+00:00 1.333692 1.708455 - ``` + ``` column1 column2 + index timestamp index1 2022-01-01 21:00:00+00:00 -0.122140 + -1.949431 2022-01-01 21:10:00+00:00 1.303778 + -0.288235 index2 2022-01-01 21:00:00+00:00 1.237079 + 1.168012 2022-01-01 21:10:00+00:00 1.333692 + 1.708455 ``` Example of dataframe returned when `index_is_datetime = False`: - ``` - column1 column2 - index timestamp - index1 string1 -0.122140 -1.949431 - string2 1.303778 -0.288235 - index2 string1 1.237079 1.168012 - string2 1.333692 1.708455 - ``` + ``` column1 column2 index timestamp + index1 string1 -0.122140 -1.949431 string2 + 1.303778 -0.288235 index2 string1 1.237079 1.168012 + string2 1.333692 1.708455 ``` """ if index_is_datetime: index_inner = [ From 9e796fb4807520fb6d635d5fef32b607607d495c Mon Sep 17 00:00:00 2001 From: smitpatel49 Date: Mon, 15 Jul 2024 17:35:48 -0500 Subject: [PATCH 2/4] Linter changes reverted --- helpers/test/test_hpandas.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/helpers/test/test_hpandas.py b/helpers/test/test_hpandas.py index 989dca9bee..4fe11ea87f 100644 --- a/helpers/test/test_hpandas.py +++ b/helpers/test/test_hpandas.py @@ -3605,8 +3605,7 @@ def test2(self) -> None: def test3(self) -> None: """ - Check that an assert is raised for a not monotonically increasing - index. + Check that an assert is raised for a not monotonically increasing index. """ # Build test dataframe. idx = [ @@ -4039,22 +4038,28 @@ def get_multiindex_df( ) -> pd.DataFrame: """ Helper function to get test multi-index dataframe. - Example of dataframe returned when `index_is_datetime = True`: - ``` column1 column2 - index timestamp index1 2022-01-01 21:00:00+00:00 -0.122140 - -1.949431 2022-01-01 21:10:00+00:00 1.303778 - -0.288235 index2 2022-01-01 21:00:00+00:00 1.237079 - 1.168012 2022-01-01 21:10:00+00:00 1.333692 - 1.708455 ``` + ``` + + column1 column2 + index timestamp + index1 2022-01-01 21:00:00+00:00 -0.122140 -1.949431 + 2022-01-01 21:10:00+00:00 1.303778 -0.288235 + index2 2022-01-01 21:00:00+00:00 1.237079 1.168012 + 2022-01-01 21:10:00+00:00 1.333692 1.708455 + ``` Example of dataframe returned when `index_is_datetime = False`: - ``` column1 column2 index timestamp - index1 string1 -0.122140 -1.949431 string2 - 1.303778 -0.288235 index2 string1 1.237079 1.168012 - string2 1.333692 1.708455 ``` + ``` + column1 column2 + index timestamp + index1 string1 -0.122140 -1.949431 + string2 1.303778 -0.288235 + index2 string1 1.237079 1.168012 + string2 1.333692 1.708455 + ``` """ if index_is_datetime: index_inner = [ From 01c0c59d879bc877029d2b14cb6a640f67cf6963 Mon Sep 17 00:00:00 2001 From: smitpatel49 Date: Mon, 22 Jul 2024 08:25:22 -0500 Subject: [PATCH 3/4] Added new test cases --- helpers/hpandas.py | 1 + helpers/test/test_hpandas.py | 76 ++++++++++++++++++++++++------------ 2 files changed, 52 insertions(+), 25 deletions(-) diff --git a/helpers/hpandas.py b/helpers/hpandas.py index 7612cf055b..e941a3555b 100644 --- a/helpers/hpandas.py +++ b/helpers/hpandas.py @@ -256,6 +256,7 @@ def dassert_valid_remap(to_remap: List[str], remap_dict: Dict[str, str]) -> None to_remap, "Keys to remap should be a subset of existing columns", ) + # TODO(Samarth): Function does not work for dict keys. # The mapping is invertible. hdbg.dassert_no_duplicates(remap_dict.keys()) hdbg.dassert_no_duplicates(remap_dict.values()) diff --git a/helpers/test/test_hpandas.py b/helpers/test/test_hpandas.py index 4fe11ea87f..cbcc415787 100644 --- a/helpers/test/test_hpandas.py +++ b/helpers/test/test_hpandas.py @@ -196,17 +196,43 @@ def test3(self) -> None: def test4(self) -> None: """ - Check if duplicate key entries pass the test. + Check if an error is raised when the instance is not a list. """ # Set inputs. - to_remap = ["dummy_value_1", "dummy_value_2", "dummy_value_3"] + to_remap = {"dummy_value_1"} remap_dict = { "dummy_value_1": "1, 2, 3", - "dummy_value_2": "A, B, C", - "dummy_value_1": "A1, B2, C3", } + # Run + with self.assertRaises(AssertionError) as cm: + hpandas.dassert_valid_remap(to_remap, remap_dict) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + Instance of '{'dummy_value_1'}' is '' instead of '' + """ # Check. - hpandas.dassert_valid_remap(to_remap, remap_dict) + self.assert_equal(actual, expected, fuzzy_match=True) + + def test5(self) -> None: + """ + Check if an error is raised when the instance is not a dictionary. + """ + # Set inputs. + to_remap = ["dummy_value_1"] + remap_dict = [ + "dummy_value_1 : 1, 2, 3", + ] + # Run + with self.assertRaises(AssertionError) as cm: + hpandas.dassert_valid_remap(to_remap, remap_dict) + actual = str(cm.exception) + expected = r""" + * Failed assertion * + Instance of '['dummy_value_1 : 1, 2, 3']' is '' instead of '' + """ + # Check. + self.assert_equal(actual, expected, fuzzy_match=True) # ############################################################################# @@ -4037,29 +4063,29 @@ def get_multiindex_df( index_is_datetime: bool, ) -> pd.DataFrame: """ - Helper function to get test multi-index dataframe. - Example of dataframe returned when `index_is_datetime = True`: - - ``` - - column1 column2 - index timestamp - index1 2022-01-01 21:00:00+00:00 -0.122140 -1.949431 - 2022-01-01 21:10:00+00:00 1.303778 -0.288235 - index2 2022-01-01 21:00:00+00:00 1.237079 1.168012 - 2022-01-01 21:10:00+00:00 1.333692 1.708455 - ``` + Helper function to get test multi-index dataframe. + Example of dataframe returned when `index_is_datetime = True`: + + ``` + + column1 column2 + index timestamp + index1 2022-01-01 21:00:00+00:00 -0.122140 -1.949431 + 2022-01-01 21:10:00+00:00 1.303778 -0.288235 + index2 2022-01-01 21:00:00+00:00 1.237079 1.168012 + 2022-01-01 21:10:00+00:00 1.333692 1.708455 + ``` - Example of dataframe returned when `index_is_datetime = False`: + Example of dataframe returned when `index_is_datetime = False`: - ``` - column1 column2 - index timestamp - index1 string1 -0.122140 -1.949431 - string2 1.303778 -0.288235 - index2 string1 1.237079 1.168012 - string2 1.333692 1.708455 ``` + column1 column2 + index timestamp + index1 string1 -0.122140 -1.949431 + string2 1.303778 -0.288235 + index2 string1 1.237079 1.168012 + string2 1.333692 1.708455 + ``` """ if index_is_datetime: index_inner = [ From f3ce130f5900a231c0a73fc821e0c23a2c9446a0 Mon Sep 17 00:00:00 2001 From: smitpatel49 Date: Thu, 25 Jul 2024 05:53:12 -0500 Subject: [PATCH 4/4] Fixed some nits --- helpers/test/test_hpandas.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/helpers/test/test_hpandas.py b/helpers/test/test_hpandas.py index cbcc415787..9979aac915 100644 --- a/helpers/test/test_hpandas.py +++ b/helpers/test/test_hpandas.py @@ -138,7 +138,7 @@ def test3(self) -> None: class Test_dassert_valid_remap(hunitest.TestCase): def test1(self) -> None: """ - A simple test to check that the function works. + Check that the function works with correct inputs. """ # Set inputs. to_remap = ["dummy_value_1", "dummy_value_2", "dummy_value_3"] @@ -151,7 +151,7 @@ def test1(self) -> None: def test2(self) -> None: """ - Check that an assert is raised if dictionary keys are not a subset. + Check that an assertion is raised if dictionary keys are not a subset. """ # Set inputs. to_remap = ["dummy_value_1", "dummy_value_2"] @@ -160,7 +160,7 @@ def test2(self) -> None: "dummy_value_2": "A, B, C", "dummy_value_3": "A1, A2, A3", } - # Run + # Run. with self.assertRaises(AssertionError) as cm: hpandas.dassert_valid_remap(to_remap, remap_dict) actual = str(cm.exception) @@ -176,16 +176,16 @@ def test2(self) -> None: def test3(self) -> None: """ - Check if duplicate values create error. + Check that an assertion is raised if the duplicate values are present in the dict. """ # Set inputs. to_remap = ["dummy_value_1", "dummy_value_2", "dummy_value_3"] remap_dict = { - "dummy_value_1": "1, 2, 3", + "dummy_value_1": 1, "dummy_value_2": "A, B, C", - "dummy_value_3": "1, 2, 3", + "dummy_value_3": "A, B, C", } - # Run + # Run. with self.assertRaises(AttributeError) as cm: hpandas.dassert_valid_remap(to_remap, remap_dict) actual = str(cm.exception) @@ -196,14 +196,14 @@ def test3(self) -> None: def test4(self) -> None: """ - Check if an error is raised when the instance is not a list. + Check that an assertion is raised if the input is not a list. """ # Set inputs. to_remap = {"dummy_value_1"} remap_dict = { "dummy_value_1": "1, 2, 3", } - # Run + # Run. with self.assertRaises(AssertionError) as cm: hpandas.dassert_valid_remap(to_remap, remap_dict) actual = str(cm.exception) @@ -216,14 +216,14 @@ def test4(self) -> None: def test5(self) -> None: """ - Check if an error is raised when the instance is not a dictionary. + Check that an assertion is raised if the input is not a dictionary. """ # Set inputs. to_remap = ["dummy_value_1"] remap_dict = [ "dummy_value_1 : 1, 2, 3", ] - # Run + # Run. with self.assertRaises(AssertionError) as cm: hpandas.dassert_valid_remap(to_remap, remap_dict) actual = str(cm.exception)