-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-27297] [SQL] Add higher order functions to scala API #24232
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9cf1ebf
efc6ba4
b9dceec
1fb46a3
03d602f
6bf07d8
b03399a
79d6f84
7adaf9c
ac5c1c2
40ac418
fb5f8ef
85979d4
5d389d2
5d77d6b
a8c7ecd
96fb0ad
5fa3e71
0bfa483
815e9f6
47b100b
4baf084
9c0f70e
06b4c82
412ece5
c49e7d3
182a08b
ef6b6bb
a543c90
527c0cb
013187f
554a992
0433756
f371413
c3e320c
84ccf55
e43033b
c1c76a9
10a5f2e
722f0e6
1bf2654
64c0f87
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,228 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package test.org.apache.spark.sql; | ||
|
|
||
| import java.util.HashMap; | ||
| import java.util.List; | ||
|
|
||
| import static scala.collection.JavaConverters.mapAsScalaMap; | ||
|
|
||
| import org.junit.After; | ||
| import org.junit.Before; | ||
| import org.junit.Test; | ||
|
|
||
| import org.apache.spark.sql.Dataset; | ||
| import org.apache.spark.sql.Row; | ||
| import org.apache.spark.sql.types.*; | ||
| import static org.apache.spark.sql.types.DataTypes.*; | ||
| import static org.apache.spark.sql.functions.*; | ||
| import org.apache.spark.sql.test.TestSparkSession; | ||
| import static test.org.apache.spark.sql.JavaTestUtils.*; | ||
|
|
||
| public class JavaHigherOrderFunctionsSuite { | ||
| private transient TestSparkSession spark; | ||
| private Dataset<Row> arrDf; | ||
| private Dataset<Row> mapDf; | ||
|
|
||
| private void setUpArrDf() { | ||
| List<Row> data = toRows( | ||
| makeArray(1, 9, 8, 7), | ||
| makeArray(5, 8, 9, 7, 2), | ||
| JavaTestUtils.<Integer>makeArray(), | ||
| null | ||
| ); | ||
| StructType schema = new StructType() | ||
| .add("x", new ArrayType(IntegerType, true), true); | ||
| arrDf = spark.createDataFrame(data, schema); | ||
| } | ||
|
|
||
| private void setUpMapDf() { | ||
| List<Row> data = toRows( | ||
| new HashMap<Integer, Integer>() {{ | ||
| put(1, 1); | ||
| put(2, 2); | ||
| }}, | ||
| null | ||
| ); | ||
| StructType schema = new StructType() | ||
| .add("x", new MapType(IntegerType, IntegerType, true)); | ||
| mapDf = spark.createDataFrame(data, schema); | ||
| } | ||
|
|
||
| @Before | ||
| public void setUp() { | ||
| spark = new TestSparkSession(); | ||
| setUpArrDf(); | ||
| setUpMapDf(); | ||
| } | ||
|
|
||
| @After | ||
| public void tearDown() { | ||
| spark.stop(); | ||
| spark = null; | ||
| } | ||
|
|
||
| @Test | ||
| public void testTransform() { | ||
| checkAnswer( | ||
| arrDf.select(transform(col("x"), x -> x.plus(1))), | ||
| toRows( | ||
| makeArray(2, 10, 9, 8), | ||
| makeArray(6, 9, 10, 8, 3), | ||
| JavaTestUtils.<Integer>makeArray(), | ||
| null | ||
| ) | ||
| ); | ||
| checkAnswer( | ||
| arrDf.select(transform(col("x"), (x, i) -> x.plus(i))), | ||
| toRows( | ||
| makeArray(1, 10, 10, 10), | ||
| makeArray(5, 9, 11, 10, 6), | ||
| JavaTestUtils.<Integer>makeArray(), | ||
| null | ||
| ) | ||
| ); | ||
| } | ||
|
|
||
| @Test | ||
| public void testFilter() { | ||
| checkAnswer( | ||
| arrDf.select(filter(col("x"), x -> x.plus(1).equalTo(10))), | ||
| toRows( | ||
| makeArray(9), | ||
| makeArray(9), | ||
| JavaTestUtils.<Integer>makeArray(), | ||
| null | ||
| ) | ||
| ); | ||
| } | ||
|
|
||
| @Test | ||
| public void testExists() { | ||
| checkAnswer( | ||
| arrDf.select(exists(col("x"), x -> x.plus(1).equalTo(10))), | ||
| toRows( | ||
| true, | ||
| true, | ||
| false, | ||
| null | ||
| ) | ||
| ); | ||
| } | ||
|
|
||
| @Test | ||
| public void testForall() { | ||
| checkAnswer( | ||
| arrDf.select(forall(col("x"), x -> x.plus(1).equalTo(10))), | ||
| toRows( | ||
| false, | ||
| false, | ||
| true, | ||
| null | ||
| ) | ||
| ); | ||
| } | ||
|
|
||
| @Test | ||
| public void testAggregate() { | ||
| checkAnswer( | ||
| arrDf.select(aggregate(col("x"), lit(0), (acc, x) -> acc.plus(x))), | ||
| toRows( | ||
| 25, | ||
| 31, | ||
| 0, | ||
| null | ||
| ) | ||
| ); | ||
| checkAnswer( | ||
| arrDf.select(aggregate(col("x"), lit(0), (acc, x) -> acc.plus(x), x -> x)), | ||
| toRows( | ||
| 25, | ||
| 31, | ||
| 0, | ||
| null | ||
| ) | ||
| ); | ||
| } | ||
|
|
||
| @Test | ||
| public void testZipWith() { | ||
| checkAnswer( | ||
| arrDf.select(zip_with(col("x"), col("x"), (a, b) -> lit(42))), | ||
| toRows( | ||
| makeArray(42, 42, 42, 42), | ||
| makeArray(42, 42, 42, 42, 42), | ||
| JavaTestUtils.<Integer>makeArray(), | ||
| null | ||
| ) | ||
| ); | ||
| } | ||
|
|
||
| @Test | ||
| public void testTransformKeys() { | ||
| checkAnswer( | ||
| mapDf.select(transform_keys(col("x"), (k, v) -> k.plus(v))), | ||
| toRows( | ||
| mapAsScalaMap(new HashMap<Integer, Integer>() {{ | ||
| put(2, 1); | ||
| put(4, 2); | ||
| }}), | ||
| null | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: style. one more indent? |
||
| ) | ||
| ); | ||
| } | ||
|
|
||
| @Test | ||
| public void testTransformValues() { | ||
| checkAnswer( | ||
| mapDf.select(transform_values(col("x"), (k, v) -> k.plus(v))), | ||
| toRows( | ||
| mapAsScalaMap(new HashMap<Integer, Integer>() {{ | ||
| put(1, 2); | ||
| put(2, 4); | ||
| }}), | ||
| null | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Test
public void testTransformValues() {
checkAnswer(
mapDf.select(transform_values(col("x"), (k, v) -> k.plus(v))),
toRows(
mapAsScalaMap(new HashMap<Integer, Integer>() {{
put(1, 2);
put(2, 4);
}}),
null
)
);
}Does this work as well? I've moved the new HashMap up a line. @ueshin Also, what is the general preference in the codebase, each paren and brace on a new line? Or the more "lispy" style of every close on the same line: @Test
public void testTransformValues() {
checkAnswer(
mapDf.select(transform_values(col("x"), (k, v) -> k.plus(v))),
toRows(
mapAsScalaMap(new HashMap<Integer, Integer>() {{
put(1, 2);
put(2, 4);}}),
null));
}I've seen a mixture of the two to various degrees in the code, I edited this file to at least be consistent with itself (the exception here being the mapAsScalaMap / hashmap since it really is its own entity just being converted to a scala equivalent.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe the first one is more preferred. mapAsScalaMap(new HashMap<Integer, Integer>() {{
put(1, 2);
put(2, 4);
}}),
null));I'm not quite sure about the parentheses after As for my comment, sorry, maybe my pointer was wrong. mapAsScalaMap(
new HashMap<Integer, Integer>() {{
put(1, 2);
put(2, 4);
}}
),
null |
||
| ) | ||
| ); | ||
| } | ||
|
|
||
| @Test | ||
| public void testMapFilter() { | ||
| checkAnswer( | ||
| mapDf.select(map_filter(col("x"), (k, v) -> lit(false))), | ||
| toRows( | ||
| mapAsScalaMap(new HashMap<Integer, Integer>()), | ||
| null | ||
| ) | ||
| ); | ||
| } | ||
|
|
||
| @Test | ||
| public void testMapZipWith() { | ||
| checkAnswer( | ||
| mapDf.select(map_zip_with(col("x"), col("x"), (k, v1, v2) -> lit(false))), | ||
| toRows( | ||
| mapAsScalaMap(new HashMap<Integer, Boolean>() {{ | ||
| put(1, false); | ||
| put(2, false); | ||
| }}), | ||
| null | ||
| ) | ||
| ); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But how do we support this in Java?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we change the signatures to accept
scala.runtime.AbstractFunctions instead to avoid using the Function traits?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's add
(Scala-specific)at least for each doc. BTW, please take a look for style guide at https://github.com/databricks/scala-style-guideUh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually a better idea would probably be to use java functional interfaces.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
And of course we would use the existing functional interfaces first from
java.util.function, but I don't think there are any that accept three parameters likes some of the functions here require.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It appears these interfaces already exist in the source tree: https://github.com/apache/spark/blob/v2.4.0/core/src/main/java/org/apache/spark/api/java/function/Function3.java
I'll come back later to add java-specific apis that utilizes these.