diff --git a/dataprofiler/profilers/profile_builder.py b/dataprofiler/profilers/profile_builder.py index 38d434ca4..130d09c1c 100644 --- a/dataprofiler/profilers/profile_builder.py +++ b/dataprofiler/profilers/profile_builder.py @@ -1555,15 +1555,19 @@ def report(self, report_options=None): return _prepare_report(report, output_format, omit_keys) def _get_unique_row_ratio(self): - return len(self.hashed_row_dict) / self.total_samples + if self.total_samples: + return len(self.hashed_row_dict) / self.total_samples + return 0 def _get_row_is_null_ratio(self): - return 0 if self._min_col_samples_used in {0, None} \ - else self.row_is_null_count / self._min_col_samples_used + if self._min_col_samples_used: + return self.row_is_null_count / self._min_col_samples_used + return 0 def _get_row_has_null_ratio(self): - return 0 if self._min_col_samples_used in {0, None} \ - else self.row_has_null_count / self._min_col_samples_used + if self._min_col_samples_used: + return self.row_has_null_count / self._min_col_samples_used + return 0 def _get_duplicate_row_count(self): return self.total_samples - len(self.hashed_row_dict) diff --git a/dataprofiler/tests/profilers/test_profile_builder.py b/dataprofiler/tests/profilers/test_profile_builder.py index 347b7318a..731a89dac 100644 --- a/dataprofiler/tests/profilers/test_profile_builder.py +++ b/dataprofiler/tests/profilers/test_profile_builder.py @@ -1407,6 +1407,10 @@ def test_logs(self, mock_stderr, *mocks): # Ensure no progress bar printed self.assertNotIn('#' * 10, mock_stderr.getvalue()) + def test_unique_row_ratio_empty_profiler(self): + profiler = StructuredProfiler(pd.DataFrame([])) + self.assertEqual(0, profiler._get_unique_row_ratio()) + class TestStructuredColProfilerClass(unittest.TestCase):