Skip to content

Commit

Permalink
Move validate in other validate
Browse files Browse the repository at this point in the history
  • Loading branch information
fealho committed May 24, 2022
1 parent 30276b1 commit 31f5623
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 31 deletions.
56 changes: 27 additions & 29 deletions sdv/constraints/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,31 @@ def _sample_constraint_columns(self, table_data):
sampled_data = pd.concat(all_sampled_rows, ignore_index=True)
return sampled_data

def _validate_data_on_constraint(self, table_data):
"""Make sure the given data is valid for the given constraints.
Args:
data (pandas.DataFrame):
Table data.
Raises:
ConstraintsNotMetError:
If the table data is not valid for the provided constraints.
"""
if set(self.constraint_columns).issubset(table_data.columns.values):
is_valid_data = self.is_valid(table_data)
if not is_valid_data.all():
constraint_data = table_data[list(self.constraint_columns)]
invalid_rows = constraint_data[~is_valid_data]
err_msg = (
f"Data is not valid for the '{self.__class__.__name__}' constraint:\n"
f'{invalid_rows[:5]}'
)
if len(invalid_rows) > 5:
err_msg += f'\n+{len(invalid_rows) - 5} more'

raise ConstraintsNotMetError(err_msg)

def _validate_constraint_columns(self, table_data):
"""Validate the columns in ``table_data``.
Expand All @@ -237,6 +262,8 @@ def _validate_constraint_columns(self, table_data):
table_data (pandas.DataFrame):
Table data.
"""
self._validate_data_on_constraint(table_data)

missing_columns = [col for col in self.constraint_columns if col not in table_data.columns]
if missing_columns:
if not self._columns_model:
Expand All @@ -260,34 +287,6 @@ def _validate_constraint_columns(self, table_data):

return table_data

def _validate_data_on_constraint(self, table_data):
"""Make sure the given data is valid for the given constraints.
Args:
data (pandas.DataFrame):
Table data.
Returns:
None
Raises:
ConstraintsNotMetError:
If the table data is not valid for the provided constraints.
"""
if set(self.constraint_columns).issubset(table_data.columns.values):
is_valid = self.is_valid(table_data)
if not is_valid.all():
constraint_data = table_data[list(self.constraint_columns)]
invalid_rows = constraint_data[~is_valid]
err_msg = (
f"Data is not valid for the '{self.__class__.__name__}' constraint:\n"
f'{invalid_rows[:5]}'
)
if len(invalid_rows) > 5:
err_msg += f'\n+{len(invalid_rows) - 5} more'

raise ConstraintsNotMetError(err_msg)

def transform(self, table_data):
"""Perform necessary transformations needed by constraint.
Expand All @@ -306,7 +305,6 @@ def transform(self, table_data):
pandas.DataFrame:
Input data unmodified.
"""
self._validate_data_on_constraint(table_data)
table_data = self._validate_constraint_columns(table_data)
return self._transform(table_data)

Expand Down
4 changes: 2 additions & 2 deletions tests/unit/constraints/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,8 +301,8 @@ def test_fit_trains_column_model(self, ht_mock, gm_mock):
def test_transform(self):
"""Test the ``Constraint.transform`` method.
It is an identity method for completion, to be optionally
overwritten by subclasses. #TODO: this description seems wrong
When no constraints are passed, it behaves like an identity method,
to be optionally overwritten by subclasses.
The ``Constraint.transform`` method is expected to:
- Return the input data unmodified.
Expand Down

0 comments on commit 31f5623

Please sign in to comment.