|
| 1 | +""" |
| 2 | +This script is a implementation of the Damerau-Levenshtein distance algorithm. |
| 3 | +
|
| 4 | +It's an algorithm that measures the edit distance between two string sequences |
| 5 | +
|
| 6 | +More information about this algorithm can be found in this wikipedia article: |
| 7 | +https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance |
| 8 | +""" |
| 9 | + |
| 10 | + |
| 11 | +def damerau_levenshtein_distance(first_string: str, second_string: str) -> int: |
| 12 | + """ |
| 13 | + Implements the Damerau-Levenshtein distance algorithm that measures |
| 14 | + the edit distance between two strings. |
| 15 | +
|
| 16 | + Parameters: |
| 17 | + first_string: The first string to compare |
| 18 | + second_string: The second string to compare |
| 19 | +
|
| 20 | + Returns: |
| 21 | + distance: The edit distance between the first and second strings |
| 22 | +
|
| 23 | + >>> damerau_levenshtein_distance("cat", "cut") |
| 24 | + 1 |
| 25 | + >>> damerau_levenshtein_distance("kitten", "sitting") |
| 26 | + 3 |
| 27 | + >>> damerau_levenshtein_distance("hello", "world") |
| 28 | + 4 |
| 29 | + >>> damerau_levenshtein_distance("book", "back") |
| 30 | + 2 |
| 31 | + >>> damerau_levenshtein_distance("container", "containment") |
| 32 | + 3 |
| 33 | + >>> damerau_levenshtein_distance("container", "containment") |
| 34 | + 3 |
| 35 | + """ |
| 36 | + # Create a dynamic programming matrix to store the distances |
| 37 | + dp_matrix = [[0] * (len(second_string) + 1) for _ in range(len(first_string) + 1)] |
| 38 | + |
| 39 | + # Initialize the matrix |
| 40 | + for i in range(len(first_string) + 1): |
| 41 | + dp_matrix[i][0] = i |
| 42 | + for j in range(len(second_string) + 1): |
| 43 | + dp_matrix[0][j] = j |
| 44 | + |
| 45 | + # Fill the matrix |
| 46 | + for i, first_char in enumerate(first_string, start=1): |
| 47 | + for j, second_char in enumerate(second_string, start=1): |
| 48 | + cost = int(first_char != second_char) |
| 49 | + |
| 50 | + dp_matrix[i][j] = min( |
| 51 | + dp_matrix[i - 1][j] + 1, # Deletion |
| 52 | + dp_matrix[i][j - 1] + 1, # Insertion |
| 53 | + dp_matrix[i - 1][j - 1] + cost, # Substitution |
| 54 | + ) |
| 55 | + |
| 56 | + if ( |
| 57 | + i > 1 |
| 58 | + and j > 1 |
| 59 | + and first_string[i - 1] == second_string[j - 2] |
| 60 | + and first_string[i - 2] == second_string[j - 1] |
| 61 | + ): |
| 62 | + # Transposition |
| 63 | + dp_matrix[i][j] = min(dp_matrix[i][j], dp_matrix[i - 2][j - 2] + cost) |
| 64 | + |
| 65 | + return dp_matrix[-1][-1] |
| 66 | + |
| 67 | + |
| 68 | +if __name__ == "__main__": |
| 69 | + import doctest |
| 70 | + |
| 71 | + doctest.testmod() |
0 commit comments