1
- """
2
- This is a Python implementation of the levenshtein distance.
3
- Levenshtein distance is a string metric for measuring the
4
- difference between two sequences.
5
-
6
- For doctests run following command:
7
- python -m doctest -v levenshtein-distance.py
8
- or
9
- python3 -m doctest -v levenshtein-distance.py
10
-
11
- For manual testing run:
12
- python levenshtein-distance.py
13
- """
1
+ from collections .abc import Callable
14
2
15
3
16
4
def levenshtein_distance (first_word : str , second_word : str ) -> int :
17
- """Implementation of the levenshtein distance in Python.
5
+ """
6
+ Implementation of the Levenshtein distance in Python.
18
7
:param first_word: the first word to measure the difference.
19
8
:param second_word: the second word to measure the difference.
20
9
:return: the levenshtein distance between the two words.
@@ -47,7 +36,7 @@ def levenshtein_distance(first_word: str, second_word: str) -> int:
47
36
current_row = [i + 1 ]
48
37
49
38
for j , c2 in enumerate (second_word ):
50
- # Calculate insertions, deletions and substitutions
39
+ # Calculate insertions, deletions, and substitutions
51
40
insertions = previous_row [j + 1 ] + 1
52
41
deletions = current_row [j ] + 1
53
42
substitutions = previous_row [j ] + (c1 != c2 )
@@ -62,9 +51,75 @@ def levenshtein_distance(first_word: str, second_word: str) -> int:
62
51
return previous_row [- 1 ]
63
52
64
53
54
+ def levenshtein_distance_optimized (first_word : str , second_word : str ) -> int :
55
+ """
56
+ Compute the Levenshtein distance between two words (strings).
57
+ The function is optimized for efficiency by modifying rows in place.
58
+ :param first_word: the first word to measure the difference.
59
+ :param second_word: the second word to measure the difference.
60
+ :return: the Levenshtein distance between the two words.
61
+ Examples:
62
+ >>> levenshtein_distance_optimized("planet", "planetary")
63
+ 3
64
+ >>> levenshtein_distance_optimized("", "test")
65
+ 4
66
+ >>> levenshtein_distance_optimized("book", "back")
67
+ 2
68
+ >>> levenshtein_distance_optimized("book", "book")
69
+ 0
70
+ >>> levenshtein_distance_optimized("test", "")
71
+ 4
72
+ >>> levenshtein_distance_optimized("", "")
73
+ 0
74
+ >>> levenshtein_distance_optimized("orchestration", "container")
75
+ 10
76
+ """
77
+ if len (first_word ) < len (second_word ):
78
+ return levenshtein_distance_optimized (second_word , first_word )
79
+
80
+ if len (second_word ) == 0 :
81
+ return len (first_word )
82
+
83
+ previous_row = list (range (len (second_word ) + 1 ))
84
+
85
+ for i , c1 in enumerate (first_word ):
86
+ current_row = [i + 1 ] + [0 ] * len (second_word )
87
+
88
+ for j , c2 in enumerate (second_word ):
89
+ insertions = previous_row [j + 1 ] + 1
90
+ deletions = current_row [j ] + 1
91
+ substitutions = previous_row [j ] + (c1 != c2 )
92
+ current_row [j + 1 ] = min (insertions , deletions , substitutions )
93
+
94
+ previous_row = current_row
95
+
96
+ return previous_row [- 1 ]
97
+
98
+
99
+ def benchmark_levenshtein_distance (func : Callable ) -> None :
100
+ """
101
+ Benchmark the Levenshtein distance function.
102
+ :param str: The name of the function being benchmarked.
103
+ :param func: The function to be benchmarked.
104
+ """
105
+ from timeit import timeit
106
+
107
+ stmt = f"{ func .__name__ } ('sitting', 'kitten')"
108
+ setup = f"from __main__ import { func .__name__ } "
109
+ number = 25_000
110
+ result = timeit (stmt = stmt , setup = setup , number = number )
111
+ print (f"{ func .__name__ :<30} finished { number :,} runs in { result :.5f} seconds" )
112
+
113
+
65
114
if __name__ == "__main__" :
66
- first_word = input ("Enter the first word:\n " ).strip ()
67
- second_word = input ("Enter the second word:\n " ).strip ()
115
+ # Get user input for words
116
+ first_word = input ("Enter the first word for Levenshtein distance:\n " ).strip ()
117
+ second_word = input ("Enter the second word for Levenshtein distance:\n " ).strip ()
118
+
119
+ # Calculate and print Levenshtein distances
120
+ print (f"{ levenshtein_distance (first_word , second_word ) = } " )
121
+ print (f"{ levenshtein_distance_optimized (first_word , second_word ) = } " )
68
122
69
- result = levenshtein_distance (first_word , second_word )
70
- print (f"Levenshtein distance between { first_word } and { second_word } is { result } " )
123
+ # Benchmark the Levenshtein distance functions
124
+ benchmark_levenshtein_distance (levenshtein_distance )
125
+ benchmark_levenshtein_distance (levenshtein_distance_optimized )
0 commit comments