Skip to content

Commit

Permalink
IntColumn Stats Diff (#279)
Browse files Browse the repository at this point in the history
* Welcome to the best commit you've ever seen

* This is almost ready. The code is still cooking

* Some NICE code.
  • Loading branch information
grant-eden authored Jun 24, 2021
1 parent 863cce3 commit 2b7b01a
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 3 deletions.
8 changes: 7 additions & 1 deletion dataprofiler/profilers/numerical_column_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def _add_helper(self, other1, other2):
if "num_negatives" in self.__calculations.keys():
self.num_negatives = other1.num_negatives + other2.num_negatives

def _diff_helper(self, other_profile):
def diff(self, other_profile, options=None):
"""
Finds the differences for several numerical stats.
Expand All @@ -256,6 +256,12 @@ def _diff_helper(self, other_profile):
:return: the numerical stats differences
:rtype: dict
"""
cls = self.__class__
if not isinstance(other_profile, cls):
raise TypeError("Unsupported operand type(s) for diff: '{}' "
"and '{}'".format(cls.__name__,
other_profile.__class__.__name__))

differences = {
"min": utils.find_diff_of_numbers(self.min, other_profile.min),
"max": utils.find_diff_of_numbers(self.max, other_profile.max),
Expand Down
32 changes: 32 additions & 0 deletions dataprofiler/tests/profilers/test_int_column_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -788,3 +788,35 @@ def test_profile_merge_bin_edges_indices(self):
profile_2.update(data_2)

profile_1 + profile_2

def test_diff(self):
"""
Makes sure the IntColumn Diff() works appropriately.
"""
data = [2, 'not an int', 6, 4]
df = pd.Series(data).apply(str)
profiler1 = IntColumn("Int")
profiler1.update(df)

data = [1, 15]
df = pd.Series(data).apply(str)
profiler2 = IntColumn("Int")
profiler2.update(df)

# Assert the difference report is correct
expected_diff = {
'max': -9.0,
'mean': -4.0,
'min': 1.0,
'stddev': -7.899494936611665,
'sum': -4.0,
'variance': -94.0
}
self.assertDictEqual(expected_diff, profiler1.diff(profiler2))

# Assert type error is properly called
with self.assertRaises(TypeError) as exc:
profiler1.diff("Inproper input")
self.assertEqual(str(exc.exception),
"Unsupported operand type(s) for diff: 'IntColumn' and"
" 'str'")
11 changes: 9 additions & 2 deletions dataprofiler/tests/profilers/test_numeric_stats_mixin_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ def test_merge_num_zeros_and_negatives(self):
self.assertEqual(num_profiler.num_zeros, 0)
self.assertEqual(num_profiler.num_negatives, 0)

def test_diff_helper(self):
def test_diff(self):
"""
Checks _diff_helper() works appropriately.
"""
Expand All @@ -528,4 +528,11 @@ def test_diff_helper(self):
'variance': -8.362573099415204,
'stddev': -2.0238425028660023
}
self.assertDictEqual(expected_diff, other1._diff_helper(other2))
self.assertDictEqual(expected_diff, other1.diff(other2))

# Assert type error is properly called
with self.assertRaises(TypeError) as exc:
other1.diff("Inproper input")
self.assertEqual(str(exc.exception),
"Unsupported operand type(s) for diff: 'TestColumn' and"
" 'str'")

0 comments on commit 2b7b01a

Please sign in to comment.