Skip to content

Commit

Permalink
Add Damerau-Levenshtein similarity matching (#248)
Browse files Browse the repository at this point in the history
  • Loading branch information
femelo authored May 28, 2024
1 parent 3b4a9b6 commit 404b7fc
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions ovos_utils/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class MatchStrategy(IntEnum):
PARTIAL_TOKEN_RATIO = auto()
PARTIAL_TOKEN_SORT_RATIO = auto()
PARTIAL_TOKEN_SET_RATIO = auto()
DAMERAU_LEVENSHTEIN_SIMILARITY = auto()


def _validate_matching_strategy(strategy):
Expand All @@ -37,6 +38,7 @@ def fuzzy_match(x, against, strategy=MatchStrategy.SIMPLE_RATIO):
down to 0.0 for no match at all.
"""
strategy = _validate_matching_strategy(strategy)
LOG.debug(f"matching strategy: {strategy}")
if strategy == MatchStrategy.RATIO:
score = rapidfuzz.fuzz.ratio(x, against) / 100
elif strategy == MatchStrategy.PARTIAL_RATIO:
Expand All @@ -51,6 +53,8 @@ def fuzzy_match(x, against, strategy=MatchStrategy.SIMPLE_RATIO):
score = rapidfuzz.fuzz.partial_token_set_ratio(x, against) / 100
elif strategy == MatchStrategy.PARTIAL_TOKEN_RATIO:
score = rapidfuzz.fuzz.partial_token_ratio(x, against) / 100
elif strategy == MatchStrategy.DAMERAU_LEVENSHTEIN_SIMILARITY:
score = rapidfuzz.distance.DamerauLevenshtein.normalized_similarity(x, against)
else:
score = SequenceMatcher(None, x, against).ratio()

Expand Down

0 comments on commit 404b7fc

Please sign in to comment.