Skip to content

Commit

Permalink
Merge pull request #2395 from moj-analytical-services/add_symmetrical…
Browse files Browse the repository at this point in the history
…_arg_to_columns_reversed

Fix ColumnsReversedLevel
  • Loading branch information
RobinL authored Sep 11, 2024
2 parents 3c37b9d + 84f77bc commit 115b76f
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 5 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Match weight and m and u probabilities charts now have improved tooltips ([#2392](https://github.com/moj-analytical-services/splink/pull/2392))

### Fixed

- Fixed issue where `ColumnsReversedLevel` required equality on both columns ([#2395](https://github.com/moj-analytical-services/splink/pull/2395))

## [4.0.1] - 2024-09-06

### Added
Expand Down
19 changes: 15 additions & 4 deletions splink/internals/comparison_level_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,31 +333,42 @@ def __init__(
self,
col_name_1: Union[str, ColumnExpression],
col_name_2: Union[str, ColumnExpression],
symmetrical: bool = False,
):
"""Represents a comparison level where the columns are reversed. For example,
if surname is in the forename field and vice versa
By default, col_l = col_r. If the symmetrical argument is True, then
col_l = col_r AND col_r = col_l.
Args:
col_name_1 (str): First column, e.g. forename
col_name_2 (str): Second column, e.g. surname
symmetrical (bool): If True, equality is required in in both directions.
Default is False.
"""
self.col_expression_1 = ColumnExpression.instantiate_if_str(col_name_1)
self.col_expression_2 = ColumnExpression.instantiate_if_str(col_name_2)
self.symmetrical = symmetrical

def create_sql(self, sql_dialect: SplinkDialect) -> str:
self.col_expression_1.sql_dialect = sql_dialect
self.col_expression_2.sql_dialect = sql_dialect
col_1 = self.col_expression_1
col_2 = self.col_expression_2

return (
f"{col_1.name_l} = {col_2.name_r} " f"AND {col_1.name_r} = {col_2.name_l}"
)
if self.symmetrical:
return (
f"{col_1.name_l} = {col_2.name_r} AND {col_1.name_r} = {col_2.name_l}"
)
else:
return f"{col_1.name_l} = {col_2.name_r}"

def create_label_for_charts(self) -> str:
col_1 = self.col_expression_1
col_2 = self.col_expression_2
return f"Match on reversed cols: {col_1.label} and {col_2.label}"
direction = "both directions" if self.symmetrical else "one direction"
return f"Match on reversed cols: {col_1.label} and {col_2.label} ({direction})"


class LevenshteinLevel(ComparisonLevelCreator):
Expand Down
4 changes: 3 additions & 1 deletion splink/internals/comparison_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -1052,7 +1052,9 @@ def create_comparison_levels(self) -> List[ComparisonLevelCreator]:
)

levels.append(
cll.ColumnsReversedLevel(forename_col_expression, surname_col_expression)
cll.ColumnsReversedLevel(
forename_col_expression, surname_col_expression, symmetrical=True
)
)

for threshold in self.jaro_winkler_thresholds:
Expand Down

0 comments on commit 115b76f

Please sign in to comment.