Skip to content

Commit

Permalink
Merge pull request #234 from PEZ/clojure-levensthtein
Browse files Browse the repository at this point in the history
Add levensthtein for Clojure and Babashka (and Java for good measure)
  • Loading branch information
bddicken authored Dec 11, 2024
2 parents 34c6d12 + 45d0ff5 commit 1789db4
Show file tree
Hide file tree
Showing 4 changed files with 147 additions and 0 deletions.
54 changes: 54 additions & 0 deletions levenshtein/bb/code.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
(defn levenshtein-distance [s1 s2]
(let [m (count s1)
n (count s2)
;; Create a matrix to store distances
matrix (vec (map vec (repeat (inc m) (repeat (inc n) 0))))]
;; Initialize first row and column
(loop [i 0
matrix (assoc-in matrix [0 0] 0)]
(if (< i (inc m))
(recur (inc i) (assoc-in matrix [i 0] i))
(loop [j 0
matrix matrix]
(if (< j (inc n))
(recur (inc j) (assoc-in matrix [0 j] j))
;; Compute Levenshtein distance
(loop [i 1
matrix matrix]
(if (<= i m)
(recur (inc i)
(loop [j 1
matrix matrix]
(if (<= j n)
(let [cost (if (= (nth s1 (dec i)) (nth s2 (dec j))) 0 1)]
(recur (inc j)
(assoc-in matrix [i j]
(min
(inc (get-in matrix [(dec i) j])) ;; Deletion
(inc (get-in matrix [i (dec j)])) ;; Insertion
(+ (get-in matrix [(dec i) (dec j)]) cost))))) ;; Substitution
matrix)))
(get-in matrix [m n])))))))))

(defn main [& args]
(let [strings (vec args)
n (count strings)
distances (for [i (range n)
j (range n)
:when (not= i j)]
(levenshtein-distance (nth strings i) (nth strings j)))
min-distance (apply min distances)]
(println "times:" (* n (dec n)))
(println "min_distance:" min-distance)))

(when (= *file* (System/getProperty "babashka.file"))
(apply main *command-line-args*))

(comment
(time
(main "abcde" "abdef" "ghijk" "gjkl" "mno" "pqr" "stu" "vwx" "yz" "banana" "oranges"))
;; times: 110
;; min_distance: 2
;; "Elapsed time: 1.56575 msecs"
:rcf)

34 changes: 34 additions & 0 deletions levenshtein/clojure/code.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
(ns code
(:gen-class))

(set! *unchecked-math* :warn-on-boxed)

(defn levenshtein-distance ^long [^String s1 ^String s2]
(let [m (int (count s1))
n (int (count s2))
matrix (long-array (* (inc m) (inc n)))]
(dotimes [i (inc m)]
(aset matrix (* i (inc n)) i))
(dotimes [j (inc n)]
(aset matrix j j))
(dotimes [i m]
(dotimes [j n]
(let [cost (if (= (.charAt s1 i) (.charAt s2 j)) 0 1)
del (inc (aget matrix (+ (* i (inc n)) (inc j))))
ins (inc (aget matrix (+ (* (inc i) (inc n)) j)))
sub (+ (aget matrix (+ (* i (inc n)) j)) cost)
idx (+ (* (inc i) (inc n)) (inc j))
v (min del (min ins sub))]
(aset matrix idx v))))
(aget matrix (+ (* m (inc n)) n))))

(defn -main [& args]
(let [strings (vec args)
n (count strings)
distances (for [i (range n)
j (range n)
:when (not= i j)]
(levenshtein-distance (nth strings i) (nth strings j)))
min-distance (apply min distances)]
(println "times:" (* n (dec n)))
(println "min_distance:" min-distance)))
58 changes: 58 additions & 0 deletions levenshtein/jvm/code.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package jvm;

public class code {

public static long levenshteinDistance(String s1, String s2) {
int m = s1.length();
int n = s2.length();
long[] matrix = new long[(m + 1) * (n + 1)];

// Initialize first row and column
for (int i = 1; i <= m; i++) {
matrix[i * (n + 1)] = i;
}
for (int j = 1; j <= n; j++) {
matrix[j] = j;
}

// Compute Levenshtein distance
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
long cost = (s1.charAt(i) == s2.charAt(j)) ? 0 : 1;
long del = matrix[i * (n + 1) + (j + 1)] + 1;
long ins = matrix[(i + 1) * (n + 1) + j] + 1;
long sub = matrix[i * (n + 1) + j] + cost;
matrix[(i + 1) * (n + 1) + (j + 1)] = Math.min(del, Math.min(ins, sub));
}
}

return matrix[m * (n + 1) + n];
}

public static void main(String[] args) {
if (args.length < 2) {
System.out.println("Usage: java jvm.code <string1> <string2> ...");
return;
}

long minDistance = -1;
int times = 0;
for (int i = 0; i < args.length - 1; i++) {
for (int j = 0; j < args.length - 1; j++) {
if (i != j) {
long distance = levenshteinDistance(args[i], args[j]);
if (minDistance == -1 || minDistance > distance) {
minDistance = distance;
}
times++;
}
}
}

// The only output from the program should be the times (number of comparisons)
// and min distance calculated of all comparisons. Two total lines of output,
// formatted exactly like this.
System.out.println("times: " + times);
System.out.println("min_distance: " + minDistance);
}
}
1 change: 1 addition & 0 deletions run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ run "Clojure" "java -cp clojure/classes:$(clojure -Spath)" "./clojure/code"
run "Babashka" "bb -cp clojure -m" "./babashka/code"
run "COBOL" "" "./cobol/main"
run "Octave" "octave ./octave/code.m 40"
run "Babashka" "bb" "bb/code.clj"
#run "F# AOT" "./fsharp/code-aot/code"
#run "C# AOT" "./csharp/code-aot/code"
#run "Haxe JVM" "java -jar haxe/code.jar" # was getting errors running `haxelib install hxjava`
Expand Down

0 comments on commit 1789db4

Please sign in to comment.