Skip to content
93 changes: 74 additions & 19 deletions strings/levenshtein_distance.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,9 @@
"""
This is a Python implementation of the levenshtein distance.
Levenshtein distance is a string metric for measuring the
difference between two sequences.

For doctests run following command:
python -m doctest -v levenshtein-distance.py
or
python3 -m doctest -v levenshtein-distance.py

For manual testing run:
python levenshtein-distance.py
"""
from collections.abc import Callable


def levenshtein_distance(first_word: str, second_word: str) -> int:
"""Implementation of the levenshtein distance in Python.
"""
Implementation of the Levenshtein distance in Python.
:param first_word: the first word to measure the difference.
:param second_word: the second word to measure the difference.
:return: the levenshtein distance between the two words.
Expand Down Expand Up @@ -47,7 +36,7 @@ def levenshtein_distance(first_word: str, second_word: str) -> int:
current_row = [i + 1]

for j, c2 in enumerate(second_word):
# Calculate insertions, deletions and substitutions
# Calculate insertions, deletions, and substitutions
insertions = previous_row[j + 1] + 1
deletions = current_row[j] + 1
substitutions = previous_row[j] + (c1 != c2)
Expand All @@ -62,9 +51,75 @@ def levenshtein_distance(first_word: str, second_word: str) -> int:
return previous_row[-1]


def levenshtein_distance_optimized(first_word: str, second_word: str) -> int:
"""
Compute the Levenshtein distance between two words (strings).
The function is optimized for efficiency by modifying rows in place.
:param first_word: the first word to measure the difference.
:param second_word: the second word to measure the difference.
:return: the Levenshtein distance between the two words.
Examples:
>>> levenshtein_distance_optimized("planet", "planetary")
3
>>> levenshtein_distance_optimized("", "test")
4
>>> levenshtein_distance_optimized("book", "back")
2
>>> levenshtein_distance_optimized("book", "book")
0
>>> levenshtein_distance_optimized("test", "")
4
>>> levenshtein_distance_optimized("", "")
0
>>> levenshtein_distance_optimized("orchestration", "container")
10
"""
if len(first_word) < len(second_word):
return levenshtein_distance_optimized(second_word, first_word)

if len(second_word) == 0:
return len(first_word)

previous_row = list(range(len(second_word) + 1))

for i, c1 in enumerate(first_word):
current_row = [i + 1] + [0] * len(second_word)

for j, c2 in enumerate(second_word):
insertions = previous_row[j + 1] + 1
deletions = current_row[j] + 1
substitutions = previous_row[j] + (c1 != c2)
current_row[j + 1] = min(insertions, deletions, substitutions)

previous_row = current_row

return previous_row[-1]


def benchmark_levenshtein_distance(func: Callable) -> None:
"""
Benchmark the Levenshtein distance function.
:param str: The name of the function being benchmarked.
:param func: The function to be benchmarked.
"""
from timeit import timeit

stmt = f"{func.__name__}('sitting', 'kitten')"
setup = f"from __main__ import {func.__name__}"
number = 25_000
result = timeit(stmt=stmt, setup=setup, number=number)
print(f"{func.__name__:<30} finished {number:,} runs in {result:.5f} seconds")


if __name__ == "__main__":
first_word = input("Enter the first word:\n").strip()
second_word = input("Enter the second word:\n").strip()
# Get user input for words
first_word = input("Enter the first word for Levenshtein distance:\n").strip()
second_word = input("Enter the second word for Levenshtein distance:\n").strip()

# Calculate and print Levenshtein distances
print(f"{levenshtein_distance(first_word, second_word) = }")
print(f"{levenshtein_distance_optimized(first_word, second_word) = }")

result = levenshtein_distance(first_word, second_word)
print(f"Levenshtein distance between {first_word} and {second_word} is {result}")
# Benchmark the Levenshtein distance functions
benchmark_levenshtein_distance(levenshtein_distance)
benchmark_levenshtein_distance(levenshtein_distance_optimized)