From c3a7a86cdf888767b998b27e635782fd5b1e1df4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Pedro?= Date: Sun, 1 Aug 2021 22:27:23 +0100 Subject: [PATCH] Add base impleentation for levenshtein functions --- .../exec/expr/fn/impl/StringFunctions.java | 33 +++++++++++++++++++ .../physical/impl/TestStringFunctions.java | 11 +++++++ 2 files changed, 44 insertions(+) diff --git a/sabot/kernel/src/main/java/com/dremio/exec/expr/fn/impl/StringFunctions.java b/sabot/kernel/src/main/java/com/dremio/exec/expr/fn/impl/StringFunctions.java index 698ad6087f..79fbde0c19 100644 --- a/sabot/kernel/src/main/java/com/dremio/exec/expr/fn/impl/StringFunctions.java +++ b/sabot/kernel/src/main/java/com/dremio/exec/expr/fn/impl/StringFunctions.java @@ -1717,4 +1717,37 @@ public void eval() { out.end = outBytea.length; } } + + /** + * Calculates the levenshtein distance of given strings. + */ + @FunctionTemplate(name = "levenshtein", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL) + public static class Levenshtein implements SimpleFunction { + @Param VarCharHolder in1; + @Param VarCharHolder in2; + @Output IntHolder out; + + @Override + public void setup() {} + + @Override + public void eval() { + int len1 = in1.end - in1.start; + int len2 = in2.end - in2.start; + // dist[i][j] represents the Levenstein distance between the strings + int[][] dist = new int[len1 + 1][len2 + 1]; + for (int i = 0; i <= len1; i++) dist[i][0] = i; + for (int j = 1; j <= len2; j++) dist[0][j] = j; + for (int j = 0; j < len2; j++) { + for (int i = 0; i < len1; i++) { + if(in1.buffer.getByte(i) == in2.buffer.getByte(j)) { + dist[i + 1][j + 1] = dist[i][j]; + } else { + dist[i + 1][j + 1] = Math.min(Math.min(dist[i][j + 1] + 1, dist[i + 1][j] + 1), dist[i][j] + 1); + } + } + } + out.value = dist[len1][len2]; + } + } } diff --git a/sabot/kernel/src/test/java/com/dremio/exec/physical/impl/TestStringFunctions.java b/sabot/kernel/src/test/java/com/dremio/exec/physical/impl/TestStringFunctions.java index eb3b100e48..9078d9c4e0 100644 --- a/sabot/kernel/src/test/java/com/dremio/exec/physical/impl/TestStringFunctions.java +++ b/sabot/kernel/src/test/java/com/dremio/exec/physical/impl/TestStringFunctions.java @@ -343,4 +343,15 @@ public void stringfuncs(){ }); } + @Test + public void levenshtein(){ + testFunctions(new Object[][]{ + { "levenshtein('test', 'task')", 2}, + { "levenshtein('kitten', 'sitting')", 3}, + { "levenshtein('', 'a')", 1}, + { "levenshtein('cat', 'coat')", 1}, + { "levenshtein('book', 'back')", 2} + }); + } + }