style(matchers): rename matchers funcs for clarity (#137)

## Description * Rename feature matching and evaluation functions for clarity **Feature Matching** * `feature_match_any` -> `compare_match_any` * `feature_match_exact` -> `compare_match_all` * `feature_match_fuzzy_string` -> `compare_fuzzy_match` * `feature_match_log_odds_fuzzy_compare` -> `compare_probabilistic_fuzzy_match` **Evaluation** * `eval_perfect_match` -> `rule_match` * `eval_log_odds_cutoff` -> `rule_probabilistic_match` ## Related Issues closes #109 --------- Co-authored-by: cbrinson-rise8 <127439654+cbrinson-rise8@users.noreply.github.com>
CDCgov · Nov 27, 2024 · e15af3e · e15af3e
1 parent 0a14a40
commit e15af3e
Show file tree

Hide file tree

Showing 12 changed files with 156 additions and 857 deletions.
diff --git a/docs/site/reference.md b/docs/site/reference.md
@@ -119,12 +119,12 @@ patient data and used during query retrieval. The following blocking key types a
 These are the functions that can be used to evaluate the matching results as a collection, thus
 determining it the incoming payload is a match or not to an existing Patient record.
 
-`func:recordlinker.linking.matchers.eval_perfect_match`
+`func:recordlinker.linking.matchers.rule_match`
 
 :   Determines whether a given set of feature comparisons represent a 'perfect' match
     (i.e. all features that were compared match in whatever criteria was specified).
 
-`func:recordlinker.linking.matchers.eval_log_odds_cutoff`
+`func:recordlinker.linking.matchers.rule_probabilistic_match`
 
 :   Determines whether a given set of feature comparisons matches enough to be the
     result of a true patient link instead of just random chance. This is represented
@@ -143,22 +143,22 @@ matching is designed to compare one list of values to another list of values.  F
 incoming record could have a GIVEN_NAME of ["John", "Dean"] and we could be comparing them to an
 existing Patient with the GIVEN_NAME of ["John", "D"].
 
-`func:recordlinker.linking.matchers.feature_match_any`
+`func:recordlinker.linking.matchers.compare_match_any`
 
 :   Determines if any of the features are a direct match.
 
-`func:recordlinker.linking.matchers.feature_match_all`
+`func:recordlinker.linking.matchers.compare_match_all`
 
 :   Determines if all of the features are a direct match.
 
-`func:recordlinker.linking.matchers.feature_match_fuzzy_string`
+`func:recordlinker.linking.matchers.compare_fuzzy_match`
 
 :   Determines if the features are a fuzzy match based on a string comparison.
     JaroWinkler, Levenshtein and Damerau-Levenshtein are supported, with JaroWinkler as the default.
     Use the `kwargs` parameter to specify the desired algorithm and thresholds.
     Example: `{"kwargs": {"similarity_measure": "levenshtein", "thresholds": {"FIRST_NAME": 0.8}}}`
 
-`func:recordlinker.linking.matchers.feature_match_log_odds_fuzzy_compare`
+`func:recordlinker.linking.matchers.compare_probabilistic_fuzzy_match`
 
 :   Similar to the above function, but uses a log-odds ratio to determine if the features are a match 
     probabilistically. This is useful when wanting to more robustly compare features by incorporating

diff --git a/src/recordlinker/assets/initial_algorithms.json b/src/recordlinker/assets/initial_algorithms.json
@@ -15,14 +15,14 @@
                 "evaluators": [
                     {
                         "feature": "FIRST_NAME",
-                        "func": "func:recordlinker.linking.matchers.feature_match_fuzzy_string"
+                        "func": "func:recordlinker.linking.matchers.compare_fuzzy_match"
                     },
                     {
                         "feature": "LAST_NAME",
-                        "func": "func:recordlinker.linking.matchers.feature_match_exact"
+                        "func": "func:recordlinker.linking.matchers.compare_match_all"
                     }
                 ],
-                "rule": "func:recordlinker.linking.matchers.eval_perfect_match",
+                "rule": "func:recordlinker.linking.matchers.rule_match",
                 "kwargs": {
                     "thresholds": {
                         "FIRST_NAME": 0.9,
@@ -44,14 +44,14 @@
                 "evaluators": [
                     {
                         "feature": "ADDRESS",
-                        "func": "func:recordlinker.linking.matchers.feature_match_fuzzy_string"
+                        "func": "func:recordlinker.linking.matchers.compare_fuzzy_match"
                     },
                     {
                         "feature": "BIRTHDATE",
-                        "func": "func:recordlinker.linking.matchers.feature_match_exact"
+                        "func": "func:recordlinker.linking.matchers.compare_match_all"
                     }
                 ],
-                "rule": "func:recordlinker.linking.matchers.eval_perfect_match",
+                "rule": "func:recordlinker.linking.matchers.rule_match",
                 "kwargs": {
                     "thresholds": {
                         "FIRST_NAME": 0.9,
@@ -81,14 +81,14 @@
                 "evaluators": [
                     {
                         "feature": "FIRST_NAME",
-                        "func": "func:recordlinker.linking.matchers.feature_match_log_odds_fuzzy_compare"
+                        "func": "func:recordlinker.linking.matchers.compare_probabilistic_fuzzy_match"
                     },
                     {
                         "feature": "LAST_NAME",
-                        "func": "func:recordlinker.linking.matchers.feature_match_log_odds_fuzzy_compare"
+                        "func": "func:recordlinker.linking.matchers.compare_probabilistic_fuzzy_match"
                     }
                 ],
-                "rule": "func:recordlinker.linking.matchers.eval_log_odds_cutoff",
+                "rule": "func:recordlinker.linking.matchers.rule_probabilistic_match",
                 "kwargs": {
                     "similarity_measure": "JaroWinkler",
                     "thresholds": {
@@ -123,14 +123,14 @@
                 "evaluators": [
                     {
                         "feature": "ADDRESS",
-                        "func": "func:recordlinker.linking.matchers.feature_match_log_odds_fuzzy_compare"
+                        "func": "func:recordlinker.linking.matchers.compare_probabilistic_fuzzy_match"
                     },
                     {
                         "feature": "BIRTHDATE",
-                        "func": "func:recordlinker.linking.matchers.feature_match_log_odds_fuzzy_compare"
+                        "func": "func:recordlinker.linking.matchers.compare_probabilistic_fuzzy_match"
                     }
                 ],
-                "rule": "func:recordlinker.linking.matchers.eval_log_odds_cutoff",
+                "rule": "func:recordlinker.linking.matchers.rule_probabilistic_match",
                 "kwargs": {
                     "similarity_measure": "JaroWinkler",
                     "thresholds": {

diff --git a/src/recordlinker/linking/matchers.py b/src/recordlinker/linking/matchers.py
@@ -30,8 +30,8 @@ class RuleFunc(enum.Enum):
     the algorithm.
     """
 
-    PERFECT_MATCH = "func:recordlinker.linking.matchers.eval_perfect_match"
-    LOG_ODDS_CUTOFF = "func:recordlinker.linking.matchers.eval_log_odds_cutoff"
+    RULE_MATCH = "func:recordlinker.linking.matchers.rule_match"
+    RULE_PROBABILISTIC_MATCH = "func:recordlinker.linking.matchers.rule_probabilistic_match"
 
 
 class FeatureFunc(enum.Enum):
@@ -44,11 +44,11 @@ class FeatureFunc(enum.Enum):
     matching, based on the configuration of the algorithm.
     """
 
-    MATCH_ANY = "func:recordlinker.linking.matchers.feature_match_any"
-    MATCH_EXACT = "func:recordlinker.linking.matchers.feature_match_exact"
-    MATCH_FUZZY_STRING = "func:recordlinker.linking.matchers.feature_match_fuzzy_string"
-    MATCH_LOG_ODDS_FUZZY_COMPARE = (
-        "func:recordlinker.linking.matchers.feature_match_log_odds_fuzzy_compare"
+    COMPARE_MATCH_ANY = "func:recordlinker.linking.matchers.compare_match_any"
+    COMPARE_MATCH_ALL = "func:recordlinker.linking.matchers.compare_match_all"
+    COMPARE_FUZZY_MATCH = "func:recordlinker.linking.matchers.compare_fuzzy_match"
+    COMPARE_PROBABILISTIC_FUZZY_MATCH = (
+        "func:recordlinker.linking.matchers.compare_probabilistic_fuzzy_match"
     )
 
 
@@ -99,7 +99,7 @@ def _get_fuzzy_params(col: str, **kwargs) -> tuple[SIMILARITY_MEASURES, float]:
     return (similarity_measure, threshold)
 
 
-def eval_perfect_match(feature_comparisons: list[float], **kwargs: typing.Any) -> bool:
+def rule_match(feature_comparisons: list[float], **kwargs: typing.Any) -> bool:
     """
     Determines whether a given set of feature comparisons represent a
     'perfect' match (i.e. whether all features that were compared match
@@ -112,7 +112,7 @@ def eval_perfect_match(feature_comparisons: list[float], **kwargs: typing.Any) -
     return sum(feature_comparisons) == len(feature_comparisons)
 
 
-def eval_log_odds_cutoff(feature_comparisons: list[float], **kwargs: typing.Any) -> bool:
+def rule_probabilistic_match(feature_comparisons: list[float], **kwargs: typing.Any) -> bool:
     """
     Determines whether a given set of feature comparisons matches enough
     to be the result of a true patient link instead of just random chance.
@@ -129,7 +129,7 @@ def eval_log_odds_cutoff(feature_comparisons: list[float], **kwargs: typing.Any)
     return sum(feature_comparisons) >= float(threshold)
 
 
-def feature_match_any(
+def compare_match_any(
     record: PIIRecord, patient: Patient, key: Feature, **kwargs: typing.Any
 ) -> float:
     """
@@ -138,7 +138,7 @@ def feature_match_any(
     :param record: The incoming record to evaluate.
     :param patient: The patient record to compare against.
     :param key: The name of the column being evaluated (e.g. "city").
-    :return: A float indicating whether the features are an exact match.
+    :return: A float indicating whether any of the features are an exact match.
     """
     rec_values = set(record.feature_iter(key))
     if not rec_values:
@@ -147,8 +147,7 @@ def feature_match_any(
     return float(bool(rec_values & pat_values))
 
 
-# TODO: rename to feature_match_all
-def feature_match_exact(
+def compare_match_all(
     record: PIIRecord, patient: Patient, key: Feature, **kwargs: typing.Any
 ) -> float:
     """
@@ -157,7 +156,7 @@ def feature_match_exact(
     :param record: The incoming record to evaluate.
     :param patient: The patient record to compare against.
     :param key: The name of the column being evaluated (e.g. "city").
-    :return: A float indicating whether the features are an exact match.
+    :return: A float indicating whether all of the features are an exact match.
     """
     rec_values = set(record.feature_iter(key))
     if not rec_values:
@@ -166,8 +165,7 @@ def feature_match_exact(
     return float(rec_values == pat_values)
 
 
-# TODO: rename to feature_match_fuzzy_any
-def feature_match_fuzzy_string(
+def compare_fuzzy_match(
     record: PIIRecord, patient: Patient, key: Feature, **kwargs: typing.Any
 ) -> float:
     """
@@ -191,8 +189,7 @@ def feature_match_fuzzy_string(
     return 0
 
 
-# TODO: rename to feature_match_log_odds_fuzzy_any
-def feature_match_log_odds_fuzzy_compare(
+def compare_probabilistic_fuzzy_match(
     record: PIIRecord, patient: Patient, key: Feature, **kwargs: typing.Any
 ) -> float:
     """