-
Notifications
You must be signed in to change notification settings - Fork 302
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #234 from PEZ/clojure-levensthtein
Add levensthtein for Clojure and Babashka (and Java for good measure)
- Loading branch information
Showing
4 changed files
with
147 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
(defn levenshtein-distance [s1 s2] | ||
(let [m (count s1) | ||
n (count s2) | ||
;; Create a matrix to store distances | ||
matrix (vec (map vec (repeat (inc m) (repeat (inc n) 0))))] | ||
;; Initialize first row and column | ||
(loop [i 0 | ||
matrix (assoc-in matrix [0 0] 0)] | ||
(if (< i (inc m)) | ||
(recur (inc i) (assoc-in matrix [i 0] i)) | ||
(loop [j 0 | ||
matrix matrix] | ||
(if (< j (inc n)) | ||
(recur (inc j) (assoc-in matrix [0 j] j)) | ||
;; Compute Levenshtein distance | ||
(loop [i 1 | ||
matrix matrix] | ||
(if (<= i m) | ||
(recur (inc i) | ||
(loop [j 1 | ||
matrix matrix] | ||
(if (<= j n) | ||
(let [cost (if (= (nth s1 (dec i)) (nth s2 (dec j))) 0 1)] | ||
(recur (inc j) | ||
(assoc-in matrix [i j] | ||
(min | ||
(inc (get-in matrix [(dec i) j])) ;; Deletion | ||
(inc (get-in matrix [i (dec j)])) ;; Insertion | ||
(+ (get-in matrix [(dec i) (dec j)]) cost))))) ;; Substitution | ||
matrix))) | ||
(get-in matrix [m n]))))))))) | ||
|
||
(defn main [& args] | ||
(let [strings (vec args) | ||
n (count strings) | ||
distances (for [i (range n) | ||
j (range n) | ||
:when (not= i j)] | ||
(levenshtein-distance (nth strings i) (nth strings j))) | ||
min-distance (apply min distances)] | ||
(println "times:" (* n (dec n))) | ||
(println "min_distance:" min-distance))) | ||
|
||
(when (= *file* (System/getProperty "babashka.file")) | ||
(apply main *command-line-args*)) | ||
|
||
(comment | ||
(time | ||
(main "abcde" "abdef" "ghijk" "gjkl" "mno" "pqr" "stu" "vwx" "yz" "banana" "oranges")) | ||
;; times: 110 | ||
;; min_distance: 2 | ||
;; "Elapsed time: 1.56575 msecs" | ||
:rcf) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
(ns code | ||
(:gen-class)) | ||
|
||
(set! *unchecked-math* :warn-on-boxed) | ||
|
||
(defn levenshtein-distance ^long [^String s1 ^String s2] | ||
(let [m (int (count s1)) | ||
n (int (count s2)) | ||
matrix (long-array (* (inc m) (inc n)))] | ||
(dotimes [i (inc m)] | ||
(aset matrix (* i (inc n)) i)) | ||
(dotimes [j (inc n)] | ||
(aset matrix j j)) | ||
(dotimes [i m] | ||
(dotimes [j n] | ||
(let [cost (if (= (.charAt s1 i) (.charAt s2 j)) 0 1) | ||
del (inc (aget matrix (+ (* i (inc n)) (inc j)))) | ||
ins (inc (aget matrix (+ (* (inc i) (inc n)) j))) | ||
sub (+ (aget matrix (+ (* i (inc n)) j)) cost) | ||
idx (+ (* (inc i) (inc n)) (inc j)) | ||
v (min del (min ins sub))] | ||
(aset matrix idx v)))) | ||
(aget matrix (+ (* m (inc n)) n)))) | ||
|
||
(defn -main [& args] | ||
(let [strings (vec args) | ||
n (count strings) | ||
distances (for [i (range n) | ||
j (range n) | ||
:when (not= i j)] | ||
(levenshtein-distance (nth strings i) (nth strings j))) | ||
min-distance (apply min distances)] | ||
(println "times:" (* n (dec n))) | ||
(println "min_distance:" min-distance))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
package jvm; | ||
|
||
public class code { | ||
|
||
public static long levenshteinDistance(String s1, String s2) { | ||
int m = s1.length(); | ||
int n = s2.length(); | ||
long[] matrix = new long[(m + 1) * (n + 1)]; | ||
|
||
// Initialize first row and column | ||
for (int i = 1; i <= m; i++) { | ||
matrix[i * (n + 1)] = i; | ||
} | ||
for (int j = 1; j <= n; j++) { | ||
matrix[j] = j; | ||
} | ||
|
||
// Compute Levenshtein distance | ||
for (int i = 0; i < m; i++) { | ||
for (int j = 0; j < n; j++) { | ||
long cost = (s1.charAt(i) == s2.charAt(j)) ? 0 : 1; | ||
long del = matrix[i * (n + 1) + (j + 1)] + 1; | ||
long ins = matrix[(i + 1) * (n + 1) + j] + 1; | ||
long sub = matrix[i * (n + 1) + j] + cost; | ||
matrix[(i + 1) * (n + 1) + (j + 1)] = Math.min(del, Math.min(ins, sub)); | ||
} | ||
} | ||
|
||
return matrix[m * (n + 1) + n]; | ||
} | ||
|
||
public static void main(String[] args) { | ||
if (args.length < 2) { | ||
System.out.println("Usage: java jvm.code <string1> <string2> ..."); | ||
return; | ||
} | ||
|
||
long minDistance = -1; | ||
int times = 0; | ||
for (int i = 0; i < args.length - 1; i++) { | ||
for (int j = 0; j < args.length - 1; j++) { | ||
if (i != j) { | ||
long distance = levenshteinDistance(args[i], args[j]); | ||
if (minDistance == -1 || minDistance > distance) { | ||
minDistance = distance; | ||
} | ||
times++; | ||
} | ||
} | ||
} | ||
|
||
// The only output from the program should be the times (number of comparisons) | ||
// and min distance calculated of all comparisons. Two total lines of output, | ||
// formatted exactly like this. | ||
System.out.println("times: " + times); | ||
System.out.println("min_distance: " + minDistance); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters