Skip to content

Commit

Permalink
Reverse transform constraints later in the DataProcessor (#1511)
Browse files Browse the repository at this point in the history
* Move contraint reverese_transform order

* Remove uneeded file

* Add comment

* Remove mistake
  • Loading branch information
fealho authored Jul 24, 2023
1 parent aae5305 commit c4a5230
Showing 1 changed file with 9 additions and 4 deletions.
13 changes: 9 additions & 4 deletions sdv/data_processing/data_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,9 +699,6 @@ def reverse_transform(self, data, reset_keys=False):
except rdt.errors.NotFittedError:
LOGGER.info(f'HyperTransformer has not been fitted for table {self.table_name}')

for constraint in reversed(self._constraints_to_reverse):
reversed_data = constraint.reverse_transform(reversed_data)

num_rows = len(reversed_data)
sampled_columns = list(reversed_data.columns)
missing_columns = [
Expand All @@ -720,7 +717,15 @@ def reverse_transform(self, data, reset_keys=False):
generated_keys = self.generate_keys(num_rows, reset_keys)
sampled_columns.extend(self._keys)

# Sort the sampled columns in the order of the metadata
for constraint in reversed(self._constraints_to_reverse):
reversed_data = constraint.reverse_transform(reversed_data)

# Add new columns generated by the constraint
new_columns = list(set(reversed_data.columns) - set(sampled_columns))
sampled_columns.extend(new_columns)

# Sort the sampled columns in the order of the metadata.
# Any extra columns not present in the metadata will be dropped.
# In multitable there may be missing columns in the sample such as foreign keys
# And alternate keys. Thats the reason of ensuring that the metadata column is within
# The sampled columns.
Expand Down

0 comments on commit c4a5230

Please sign in to comment.