Skip to content

Commit c36b5a7

Browse files
pedram-mohajerpre-commit-ci[bot]cclauss
authored andcommitted
Update levenshtein_distance.py (TheAlgorithms#11171)
* Update levenshtein_distance.py * Update levenshtein_distance.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update levenshtein_distance.py * Update levenshtein_distance.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update levenshtein_distance.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update levenshtein_distance.py * Update levenshtein_distance.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Christian Clauss <cclauss@me.com>
1 parent 414f854 commit c36b5a7

File tree

1 file changed

+74
-19
lines changed

1 file changed

+74
-19
lines changed

strings/levenshtein_distance.py

+74-19
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,9 @@
1-
"""
2-
This is a Python implementation of the levenshtein distance.
3-
Levenshtein distance is a string metric for measuring the
4-
difference between two sequences.
5-
6-
For doctests run following command:
7-
python -m doctest -v levenshtein-distance.py
8-
or
9-
python3 -m doctest -v levenshtein-distance.py
10-
11-
For manual testing run:
12-
python levenshtein-distance.py
13-
"""
1+
from collections.abc import Callable
142

153

164
def levenshtein_distance(first_word: str, second_word: str) -> int:
17-
"""Implementation of the levenshtein distance in Python.
5+
"""
6+
Implementation of the Levenshtein distance in Python.
187
:param first_word: the first word to measure the difference.
198
:param second_word: the second word to measure the difference.
209
:return: the levenshtein distance between the two words.
@@ -47,7 +36,7 @@ def levenshtein_distance(first_word: str, second_word: str) -> int:
4736
current_row = [i + 1]
4837

4938
for j, c2 in enumerate(second_word):
50-
# Calculate insertions, deletions and substitutions
39+
# Calculate insertions, deletions, and substitutions
5140
insertions = previous_row[j + 1] + 1
5241
deletions = current_row[j] + 1
5342
substitutions = previous_row[j] + (c1 != c2)
@@ -62,9 +51,75 @@ def levenshtein_distance(first_word: str, second_word: str) -> int:
6251
return previous_row[-1]
6352

6453

54+
def levenshtein_distance_optimized(first_word: str, second_word: str) -> int:
55+
"""
56+
Compute the Levenshtein distance between two words (strings).
57+
The function is optimized for efficiency by modifying rows in place.
58+
:param first_word: the first word to measure the difference.
59+
:param second_word: the second word to measure the difference.
60+
:return: the Levenshtein distance between the two words.
61+
Examples:
62+
>>> levenshtein_distance_optimized("planet", "planetary")
63+
3
64+
>>> levenshtein_distance_optimized("", "test")
65+
4
66+
>>> levenshtein_distance_optimized("book", "back")
67+
2
68+
>>> levenshtein_distance_optimized("book", "book")
69+
0
70+
>>> levenshtein_distance_optimized("test", "")
71+
4
72+
>>> levenshtein_distance_optimized("", "")
73+
0
74+
>>> levenshtein_distance_optimized("orchestration", "container")
75+
10
76+
"""
77+
if len(first_word) < len(second_word):
78+
return levenshtein_distance_optimized(second_word, first_word)
79+
80+
if len(second_word) == 0:
81+
return len(first_word)
82+
83+
previous_row = list(range(len(second_word) + 1))
84+
85+
for i, c1 in enumerate(first_word):
86+
current_row = [i + 1] + [0] * len(second_word)
87+
88+
for j, c2 in enumerate(second_word):
89+
insertions = previous_row[j + 1] + 1
90+
deletions = current_row[j] + 1
91+
substitutions = previous_row[j] + (c1 != c2)
92+
current_row[j + 1] = min(insertions, deletions, substitutions)
93+
94+
previous_row = current_row
95+
96+
return previous_row[-1]
97+
98+
99+
def benchmark_levenshtein_distance(func: Callable) -> None:
100+
"""
101+
Benchmark the Levenshtein distance function.
102+
:param str: The name of the function being benchmarked.
103+
:param func: The function to be benchmarked.
104+
"""
105+
from timeit import timeit
106+
107+
stmt = f"{func.__name__}('sitting', 'kitten')"
108+
setup = f"from __main__ import {func.__name__}"
109+
number = 25_000
110+
result = timeit(stmt=stmt, setup=setup, number=number)
111+
print(f"{func.__name__:<30} finished {number:,} runs in {result:.5f} seconds")
112+
113+
65114
if __name__ == "__main__":
66-
first_word = input("Enter the first word:\n").strip()
67-
second_word = input("Enter the second word:\n").strip()
115+
# Get user input for words
116+
first_word = input("Enter the first word for Levenshtein distance:\n").strip()
117+
second_word = input("Enter the second word for Levenshtein distance:\n").strip()
118+
119+
# Calculate and print Levenshtein distances
120+
print(f"{levenshtein_distance(first_word, second_word) = }")
121+
print(f"{levenshtein_distance_optimized(first_word, second_word) = }")
68122

69-
result = levenshtein_distance(first_word, second_word)
70-
print(f"Levenshtein distance between {first_word} and {second_word} is {result}")
123+
# Benchmark the Levenshtein distance functions
124+
benchmark_levenshtein_distance(levenshtein_distance)
125+
benchmark_levenshtein_distance(levenshtein_distance_optimized)

0 commit comments

Comments
 (0)