Skip to content

Commit c855062

Browse files
saahil-mahatopre-commit-ci[bot]cclauss
authored
Add Damerau-Levenshtein distance algorithm (#10159)
* Add Damerau-Levenshtein distance algorithm * fix: precommit check * fix: doc correction * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * refactor: use variable for length and doc correction * Update damerau_levenshtein_distance.py * Update damerau_levenshtein_distance.py --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Christian Clauss <cclauss@me.com>
1 parent ebe6693 commit c855062

File tree

1 file changed

+71
-0
lines changed

1 file changed

+71
-0
lines changed
+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""
2+
This script is a implementation of the Damerau-Levenshtein distance algorithm.
3+
4+
It's an algorithm that measures the edit distance between two string sequences
5+
6+
More information about this algorithm can be found in this wikipedia article:
7+
https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
8+
"""
9+
10+
11+
def damerau_levenshtein_distance(first_string: str, second_string: str) -> int:
12+
"""
13+
Implements the Damerau-Levenshtein distance algorithm that measures
14+
the edit distance between two strings.
15+
16+
Parameters:
17+
first_string: The first string to compare
18+
second_string: The second string to compare
19+
20+
Returns:
21+
distance: The edit distance between the first and second strings
22+
23+
>>> damerau_levenshtein_distance("cat", "cut")
24+
1
25+
>>> damerau_levenshtein_distance("kitten", "sitting")
26+
3
27+
>>> damerau_levenshtein_distance("hello", "world")
28+
4
29+
>>> damerau_levenshtein_distance("book", "back")
30+
2
31+
>>> damerau_levenshtein_distance("container", "containment")
32+
3
33+
>>> damerau_levenshtein_distance("container", "containment")
34+
3
35+
"""
36+
# Create a dynamic programming matrix to store the distances
37+
dp_matrix = [[0] * (len(second_string) + 1) for _ in range(len(first_string) + 1)]
38+
39+
# Initialize the matrix
40+
for i in range(len(first_string) + 1):
41+
dp_matrix[i][0] = i
42+
for j in range(len(second_string) + 1):
43+
dp_matrix[0][j] = j
44+
45+
# Fill the matrix
46+
for i, first_char in enumerate(first_string, start=1):
47+
for j, second_char in enumerate(second_string, start=1):
48+
cost = int(first_char != second_char)
49+
50+
dp_matrix[i][j] = min(
51+
dp_matrix[i - 1][j] + 1, # Deletion
52+
dp_matrix[i][j - 1] + 1, # Insertion
53+
dp_matrix[i - 1][j - 1] + cost, # Substitution
54+
)
55+
56+
if (
57+
i > 1
58+
and j > 1
59+
and first_string[i - 1] == second_string[j - 2]
60+
and first_string[i - 2] == second_string[j - 1]
61+
):
62+
# Transposition
63+
dp_matrix[i][j] = min(dp_matrix[i][j], dp_matrix[i - 2][j - 2] + cost)
64+
65+
return dp_matrix[-1][-1]
66+
67+
68+
if __name__ == "__main__":
69+
import doctest
70+
71+
doctest.testmod()

0 commit comments

Comments
 (0)