Skip to content

Commit 2ddc605

Browse files
herunkang2018JiajunBernoulli
authored andcommitted
[CALCITE-5826] Add FIND_IN_SET function (enabled in Hive and Spark libraries)
1 parent 8af1f11 commit 2ddc605

File tree

6 files changed

+62
-0
lines changed

6 files changed

+62
-0
lines changed

core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java

+2
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@
178178
import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXTRACT_VALUE;
179179
import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXTRACT_XML;
180180
import static org.apache.calcite.sql.fun.SqlLibraryOperators.FACTORIAL;
181+
import static org.apache.calcite.sql.fun.SqlLibraryOperators.FIND_IN_SET;
181182
import static org.apache.calcite.sql.fun.SqlLibraryOperators.FLOOR_BIG_QUERY;
182183
import static org.apache.calcite.sql.fun.SqlLibraryOperators.FORMAT_DATE;
183184
import static org.apache.calcite.sql.fun.SqlLibraryOperators.FORMAT_DATETIME;
@@ -598,6 +599,7 @@ Builder populate() {
598599
defineReflective(REGEXP_INSTR, BuiltInMethod.REGEXP_INSTR2.method,
599600
BuiltInMethod.REGEXP_INSTR3.method, BuiltInMethod.REGEXP_INSTR4.method,
600601
BuiltInMethod.REGEXP_INSTR5.method);
602+
defineMethod(FIND_IN_SET, BuiltInMethod.FIND_IN_SET.method, NullPolicy.ANY);
601603

602604
map.put(TRIM, new TrimImplementor());
603605

core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java

+25
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,8 @@
146146
@SuppressWarnings("UnnecessaryUnboxing")
147147
@Deterministic
148148
public class SqlFunctions {
149+
private static final String COMMA_DELIMITER = ",";
150+
149151
@SuppressWarnings("unused")
150152
private static final DecimalFormat DOUBLE_FORMAT =
151153
NumberUtil.decimalFormat("0.0E0");
@@ -1142,6 +1144,29 @@ public static int levenshtein(String string1, String string2) {
11421144
return LEVENSHTEIN_DISTANCE.apply(string1, string2);
11431145
}
11441146

1147+
/** SQL FIND_IN_SET(matchStr, textStr) function.
1148+
* Returns the index (1-based) of the given matchStr
1149+
* in the comma-delimited list textStr. Returns 0,
1150+
* if the matchStr is not found or if the matchStr
1151+
* contains a comma. */
1152+
public static @Nullable Integer findInSet(
1153+
@Nullable String matchStr,
1154+
@Nullable String textStr) {
1155+
if (matchStr == null || textStr == null) {
1156+
return null;
1157+
}
1158+
if (matchStr.contains(COMMA_DELIMITER)) {
1159+
return 0;
1160+
}
1161+
String[] splits = textStr.split(COMMA_DELIMITER);
1162+
for (int i = 0; i < splits.length; i++) {
1163+
if (matchStr.equals(splits[i])) {
1164+
return i + 1;
1165+
}
1166+
}
1167+
return 0;
1168+
}
1169+
11451170
/** SQL ASCII(string) function. */
11461171
public static int ascii(String s) {
11471172
return s.isEmpty()

core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java

+8
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,14 @@ static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding,
413413
OperandTypes.STRING_STRING_OPTIONAL_STRING,
414414
SqlFunctionCategory.STRING);
415415

416+
/** The "FIND_IN_SET(matchStr, textStr)" function. */
417+
@LibraryOperator(libraries = {HIVE, SPARK})
418+
public static final SqlFunction FIND_IN_SET =
419+
SqlBasicFunction.create("FIND_IN_SET",
420+
ReturnTypes.INTEGER_NULLABLE,
421+
OperandTypes.STRING_STRING,
422+
SqlFunctionCategory.STRING);
423+
416424
/** The "GREATEST(value, value)" function. */
417425
@LibraryOperator(libraries = {BIG_QUERY, ORACLE})
418426
public static final SqlFunction GREATEST =

core/src/main/java/org/apache/calcite/util/BuiltInMethod.java

+1
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,7 @@ public enum BuiltInMethod {
379379
DIFFERENCE(SqlFunctions.class, "difference", String.class, String.class),
380380
REVERSE(SqlFunctions.class, "reverse", String.class),
381381
LEVENSHTEIN(SqlFunctions.class, "levenshtein", String.class, String.class),
382+
FIND_IN_SET(SqlFunctions.class, "findInSet", String.class, String.class),
382383
LEFT(SqlFunctions.class, "left", String.class, int.class),
383384
RIGHT(SqlFunctions.class, "right", String.class, int.class),
384385
TO_BASE64(SqlFunctions.class, "toBase64", String.class),

site/_docs/reference.md

+1
Original file line numberDiff line numberDiff line change
@@ -2732,6 +2732,7 @@ BigQuery's type system uses confusingly different names for types and functions:
27322732
| o | EXISTSNODE(xml, xpath, [, namespaces ]) | Determines whether traversal of a XML document using a specified xpath results in any nodes. Returns 0 if no nodes remain after applying the XPath traversal on the document fragment of the element or elements matched by the XPath expression. Returns 1 if any nodes remain. The optional namespace value that specifies a default mapping or namespace mapping for prefixes, which is used when evaluating the XPath expression.
27332733
| m | EXTRACTVALUE(xml, xpathExpr)) | Returns the text of the first text node which is a child of the element or elements matched by the XPath expression.
27342734
| h s | FACTORIAL(integer) | Returns the factorial of *integer*, the range of *integer* is [0, 20]. Otherwise, returns NULL
2735+
| h s | FIND_IN_SET(matchStr, textStr) | Returns the index (1-based) of the given *matchStr* in the comma-delimited *textStr*. Returns 0, if the given *matchStr* is not found or if the *matchStr* contains a comma. For example, FIND_IN_SET('bc', 'a,bc,def') returns 2
27352736
| b | FLOOR(value) | Similar to standard `FLOOR(value)` except if *value* is an integer type, the return type is a double
27362737
| b | FORMAT_DATE(string, date) | Formats *date* according to the specified format *string*
27372738
| b | FORMAT_DATETIME(string, timestamp) | Formats *timestamp* according to the specified format *string*

testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java

+25
Original file line numberDiff line numberDiff line change
@@ -4708,6 +4708,31 @@ void testBitGetFunc(SqlOperatorFixture f, String functionName) {
47084708
f0.forEachLibrary(list(SqlLibrary.HIVE, SqlLibrary.SPARK), consumer);
47094709
}
47104710

4711+
@Test void testFindInSetFunc() {
4712+
final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.FIND_IN_SET);
4713+
f0.checkFails("^find_in_set('ab', 'abc,b,ab,c,def')^",
4714+
"No match found for function signature FIND_IN_SET\\(<CHARACTER>, <CHARACTER>\\)",
4715+
false);
4716+
final Consumer<SqlOperatorFixture> consumer = f -> {
4717+
f.checkString("find_in_set('ab', 'abc,b,ab,c,def')",
4718+
"3", "INTEGER NOT NULL");
4719+
f.checkString("find_in_set('ab', ',,,ab,abc,b,ab,c,def')",
4720+
"4", "INTEGER NOT NULL");
4721+
f.checkString("find_in_set('def', ',,,ab,abc,c,def')",
4722+
"7", "INTEGER NOT NULL");
4723+
f.checkString("find_in_set(_UTF8'\u4F60\u597D', _UTF8'b,ab,c,def,\u4F60\u597D')",
4724+
"5", "INTEGER NOT NULL");
4725+
f.checkString("find_in_set('acd', ',,,ab,abc,c,def')",
4726+
"0", "INTEGER NOT NULL");
4727+
f.checkString("find_in_set('ab,', 'abc,b,ab,c,def')",
4728+
"0", "INTEGER NOT NULL");
4729+
f.checkNull("find_in_set(cast(null as varchar), 'abc,b,ab,c,def')");
4730+
f.checkNull("find_in_set('ab', cast(null as varchar))");
4731+
f.checkNull("find_in_set(cast(null as varchar), cast(null as varchar))");
4732+
};
4733+
f0.forEachLibrary(list(SqlLibrary.HIVE, SqlLibrary.SPARK), consumer);
4734+
}
4735+
47114736
@Test void testIfFunc() {
47124737
final SqlOperatorFixture f = fixture();
47134738
checkIf(f.withLibrary(SqlLibrary.BIG_QUERY));

0 commit comments

Comments
 (0)