Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update levenshtein_distance.py #11171

Merged
merged 11 commits into from
Nov 26, 2023
93 changes: 74 additions & 19 deletions strings/levenshtein_distance.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,9 @@
"""
This is a Python implementation of the levenshtein distance.
Levenshtein distance is a string metric for measuring the
difference between two sequences.

For doctests run following command:
python -m doctest -v levenshtein-distance.py
or
python3 -m doctest -v levenshtein-distance.py

For manual testing run:
python levenshtein-distance.py
"""
from collections.abc import Callable


def levenshtein_distance(first_word: str, second_word: str) -> int:
"""Implementation of the levenshtein distance in Python.
"""
Implementation of the Levenshtein distance in Python.
:param first_word: the first word to measure the difference.
:param second_word: the second word to measure the difference.
:return: the levenshtein distance between the two words.
Expand Down Expand Up @@ -47,7 +36,7 @@ def levenshtein_distance(first_word: str, second_word: str) -> int:
current_row = [i + 1]

for j, c2 in enumerate(second_word):
# Calculate insertions, deletions and substitutions
# Calculate insertions, deletions, and substitutions
insertions = previous_row[j + 1] + 1
deletions = current_row[j] + 1
substitutions = previous_row[j] + (c1 != c2)
Expand All @@ -62,9 +51,75 @@ def levenshtein_distance(first_word: str, second_word: str) -> int:
return previous_row[-1]


def levenshtein_distance_optimized(first_word: str, second_word: str) -> int:
"""
Compute the Levenshtein distance between two words (strings).
The function is optimized for efficiency by modifying rows in place.
:param first_word: the first word to measure the difference.
:param second_word: the second word to measure the difference.
:return: the Levenshtein distance between the two words.
Examples:
>>> levenshtein_distance_optimized("planet", "planetary")
3
>>> levenshtein_distance_optimized("", "test")
4
>>> levenshtein_distance_optimized("book", "back")
2
>>> levenshtein_distance_optimized("book", "book")
0
>>> levenshtein_distance_optimized("test", "")
4
>>> levenshtein_distance_optimized("", "")
0
>>> levenshtein_distance_optimized("orchestration", "container")
10
"""
if len(first_word) < len(second_word):
return levenshtein_distance_optimized(second_word, first_word)

if len(second_word) == 0:
return len(first_word)

previous_row = list(range(len(second_word) + 1))

for i, c1 in enumerate(first_word):
current_row = [i + 1] + [0] * len(second_word)

for j, c2 in enumerate(second_word):
insertions = previous_row[j + 1] + 1
deletions = current_row[j] + 1
substitutions = previous_row[j] + (c1 != c2)
current_row[j + 1] = min(insertions, deletions, substitutions)

previous_row = current_row

return previous_row[-1]


def benchmark_levenshtein_distance(func: Callable) -> None:
"""
Benchmark the Levenshtein distance function.
:param str: The name of the function being benchmarked.
:param func: The function to be benchmarked.
"""
from timeit import timeit

stmt = f"{func.__name__}('sitting', 'kitten')"
setup = f"from __main__ import {func.__name__}"
number = 25_000
result = timeit(stmt=stmt, setup=setup, number=number)
print(f"{func.__name__:<30} finished {number:,} runs in {result:.5f} seconds")


if __name__ == "__main__":
first_word = input("Enter the first word:\n").strip()
second_word = input("Enter the second word:\n").strip()
# Get user input for words
first_word = input("Enter the first word for Levenshtein distance:\n").strip()
second_word = input("Enter the second word for Levenshtein distance:\n").strip()

# Calculate and print Levenshtein distances
print(f"{levenshtein_distance(first_word, second_word) = }")
print(f"{levenshtein_distance_optimized(first_word, second_word) = }")

result = levenshtein_distance(first_word, second_word)
print(f"Levenshtein distance between {first_word} and {second_word} is {result}")
# Benchmark the Levenshtein distance functions
benchmark_levenshtein_distance(levenshtein_distance)
benchmark_levenshtein_distance(levenshtein_distance_optimized)