1- """
2- This is a Python implementation of the levenshtein distance.
3- Levenshtein distance is a string metric for measuring the
4- difference between two sequences.
5-
6- For doctests run following command:
7- python -m doctest -v levenshtein-distance.py
8- or
9- python3 -m doctest -v levenshtein-distance.py
10-
11- For manual testing run:
12- python levenshtein-distance.py
13- """
1+ from collections .abc import Callable
142
153
164def levenshtein_distance (first_word : str , second_word : str ) -> int :
17- """Implementation of the levenshtein distance in Python.
5+ """
6+ Implementation of the Levenshtein distance in Python.
187 :param first_word: the first word to measure the difference.
198 :param second_word: the second word to measure the difference.
209 :return: the levenshtein distance between the two words.
@@ -47,7 +36,7 @@ def levenshtein_distance(first_word: str, second_word: str) -> int:
4736 current_row = [i + 1 ]
4837
4938 for j , c2 in enumerate (second_word ):
50- # Calculate insertions, deletions and substitutions
39+ # Calculate insertions, deletions, and substitutions
5140 insertions = previous_row [j + 1 ] + 1
5241 deletions = current_row [j ] + 1
5342 substitutions = previous_row [j ] + (c1 != c2 )
@@ -62,9 +51,75 @@ def levenshtein_distance(first_word: str, second_word: str) -> int:
6251 return previous_row [- 1 ]
6352
6453
54+ def levenshtein_distance_optimized (first_word : str , second_word : str ) -> int :
55+ """
56+ Compute the Levenshtein distance between two words (strings).
57+ The function is optimized for efficiency by modifying rows in place.
58+ :param first_word: the first word to measure the difference.
59+ :param second_word: the second word to measure the difference.
60+ :return: the Levenshtein distance between the two words.
61+ Examples:
62+ >>> levenshtein_distance_optimized("planet", "planetary")
63+ 3
64+ >>> levenshtein_distance_optimized("", "test")
65+ 4
66+ >>> levenshtein_distance_optimized("book", "back")
67+ 2
68+ >>> levenshtein_distance_optimized("book", "book")
69+ 0
70+ >>> levenshtein_distance_optimized("test", "")
71+ 4
72+ >>> levenshtein_distance_optimized("", "")
73+ 0
74+ >>> levenshtein_distance_optimized("orchestration", "container")
75+ 10
76+ """
77+ if len (first_word ) < len (second_word ):
78+ return levenshtein_distance_optimized (second_word , first_word )
79+
80+ if len (second_word ) == 0 :
81+ return len (first_word )
82+
83+ previous_row = list (range (len (second_word ) + 1 ))
84+
85+ for i , c1 in enumerate (first_word ):
86+ current_row = [i + 1 ] + [0 ] * len (second_word )
87+
88+ for j , c2 in enumerate (second_word ):
89+ insertions = previous_row [j + 1 ] + 1
90+ deletions = current_row [j ] + 1
91+ substitutions = previous_row [j ] + (c1 != c2 )
92+ current_row [j + 1 ] = min (insertions , deletions , substitutions )
93+
94+ previous_row = current_row
95+
96+ return previous_row [- 1 ]
97+
98+
99+ def benchmark_levenshtein_distance (func : Callable ) -> None :
100+ """
101+ Benchmark the Levenshtein distance function.
102+ :param str: The name of the function being benchmarked.
103+ :param func: The function to be benchmarked.
104+ """
105+ from timeit import timeit
106+
107+ stmt = f"{ func .__name__ } ('sitting', 'kitten')"
108+ setup = f"from __main__ import { func .__name__ } "
109+ number = 25_000
110+ result = timeit (stmt = stmt , setup = setup , number = number )
111+ print (f"{ func .__name__ :<30} finished { number :,} runs in { result :.5f} seconds" )
112+
113+
65114if __name__ == "__main__" :
66- first_word = input ("Enter the first word:\n " ).strip ()
67- second_word = input ("Enter the second word:\n " ).strip ()
115+ # Get user input for words
116+ first_word = input ("Enter the first word for Levenshtein distance:\n " ).strip ()
117+ second_word = input ("Enter the second word for Levenshtein distance:\n " ).strip ()
118+
119+ # Calculate and print Levenshtein distances
120+ print (f"{ levenshtein_distance (first_word , second_word ) = } " )
121+ print (f"{ levenshtein_distance_optimized (first_word , second_word ) = } " )
68122
69- result = levenshtein_distance (first_word , second_word )
70- print (f"Levenshtein distance between { first_word } and { second_word } is { result } " )
123+ # Benchmark the Levenshtein distance functions
124+ benchmark_levenshtein_distance (levenshtein_distance )
125+ benchmark_levenshtein_distance (levenshtein_distance_optimized )
0 commit comments