Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove Spark2 from Java testing projects #23749

Merged
merged 1 commit into from
Oct 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 2 additions & 13 deletions sdks/java/testing/load-tests/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def runnerDependency = (project.hasProperty(runnerProperty)
: ":runners:direct-java")
def loadTestRunnerVersionProperty = "runner.version"
def loadTestRunnerVersion = project.findProperty(loadTestRunnerVersionProperty)
def shouldProvideSpark = ":runners:spark:2".equals(runnerDependency)
def isSparkRunner = runnerDependency.startsWith(":runners:spark:")
def isDataflowRunner = ":runners:google-cloud-dataflow-java".equals(runnerDependency)
def isDataflowRunnerV2 = isDataflowRunner && "V2".equals(loadTestRunnerVersion)
def runnerConfiguration = ":runners:direct-java".equals(runnerDependency) ? "shadow" : null
Expand Down Expand Up @@ -82,20 +82,9 @@ dependencies {

gradleRun project(project.path)
gradleRun project(path: runnerDependency, configuration: runnerConfiguration)

// The Spark runner requires the user to provide a Spark dependency. For self-contained
// runs with the Spark runner, we can provide such a dependency. This is deliberately phrased
// to not hardcode any runner other than :runners:direct-java
if (shouldProvideSpark) {
gradleRun library.java.spark_streaming
gradleRun library.java.spark_core, {
exclude group:"org.slf4j", module:"jul-to-slf4j"
}
gradleRun library.java.spark_sql
}
}

if (shouldProvideSpark) {
if (isSparkRunner) {
configurations.gradleRun {
// Using Spark runner causes a StackOverflowError if slf4j-jdk14 is on the classpath
exclude group: "org.slf4j", module: "slf4j-jdk14"
Expand Down
47 changes: 18 additions & 29 deletions sdks/java/testing/nexmark/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@ def nexmarkRunnerDependency = project.findProperty(nexmarkRunnerProperty)
?: ":runners:direct-java"
def nexmarkRunnerVersionProperty = "nexmark.runner.version"
def nexmarkRunnerVersion = project.findProperty(nexmarkRunnerVersionProperty)
def shouldProvideSpark2 = ":runners:spark:2".equals(nexmarkRunnerDependency)
def shouldProvideSpark3 = ":runners:spark:3".equals(nexmarkRunnerDependency)
def isSparkRunner = nexmarkRunnerDependency.startsWith(":runners:spark:")
def isDataflowRunner = ":runners:google-cloud-dataflow-java".equals(nexmarkRunnerDependency)
def isDataflowRunnerV2 = isDataflowRunner && "V2".equals(nexmarkRunnerVersion)
def runnerConfiguration = ":runners:direct-java".equals(nexmarkRunnerDependency) ? "shadow" : null
Expand Down Expand Up @@ -91,39 +90,15 @@ dependencies {
testImplementation project(path: ":sdks:java:testing:test-utils", configuration: "testRuntimeMigration")
gradleRun project(project.path)
gradleRun project(path: nexmarkRunnerDependency, configuration: runnerConfiguration)

// The Spark runner requires the user to provide a Spark dependency. For self-contained
// runs with the Spark runner, we can provide such a dependency. This is deliberately phrased
// to not hardcode any runner other than :runners:direct-java
if (shouldProvideSpark2) {
gradleRun library.java.spark_core, {
exclude group:"org.slf4j", module:"jul-to-slf4j"
}
gradleRun library.java.spark_sql
gradleRun library.java.spark_streaming
}
if (shouldProvideSpark3) {
gradleRun library.java.spark3_core, {
exclude group:"org.slf4j", module:"jul-to-slf4j"
}

gradleRun library.java.spark3_sql
gradleRun library.java.spark3_streaming
}
}

if (shouldProvideSpark2) {
configurations.gradleRun {
// Using Spark runner causes a StackOverflowError if slf4j-jdk14 is on the classpath
exclude group: "org.slf4j", module: "slf4j-jdk14"
}
}
if (shouldProvideSpark3) {
if (isSparkRunner) {
configurations.gradleRun {
// Using Spark runner causes a StackOverflowError if slf4j-jdk14 is on the classpath
exclude group: "org.slf4j", module: "slf4j-jdk14"
}
}

def getNexmarkArgs = {
def nexmarkArgsStr = project.findProperty(nexmarkArgsProperty) ?: ""
def nexmarkArgsList = new ArrayList<String>()
Expand Down Expand Up @@ -155,14 +130,20 @@ def getNexmarkArgs = {
}
}
}

if(isSparkRunner) {
// For transparency, be explicit about configuration of local Spark
nexmarkArgsList.add("--sparkMaster=local[4]")
}

return nexmarkArgsList
}

// Execute the Nexmark queries or suites via Gradle.
//
// Parameters:
// -Pnexmark.runner
// Specify a runner subproject, such as ":runners:spark:2" or ":runners:flink:1.13"
// Specify a runner subproject, such as ":runners:spark:3" or ":runners:flink:1.13"
// Defaults to ":runners:direct-java"
//
// -Pnexmark.args
Expand All @@ -177,6 +158,14 @@ task run(type: JavaExec) {
dependsOn ":runners:google-cloud-dataflow-java:worker:legacy-worker:shadowJar"
}
}
if(isSparkRunner) {
// Disable UI
systemProperty "spark.ui.enabled", "false"
systemProperty "spark.ui.showConsoleProgress", "false"
// Dataset runner only
systemProperty "spark.sql.shuffle.partitions", "4"
}

mainClass = "org.apache.beam.sdk.nexmark.Main"
classpath = configurations.gradleRun
args nexmarkArgsList.toArray()
Expand Down
4 changes: 2 additions & 2 deletions sdks/java/testing/tpcds/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,10 @@ To run a query using ZetaSQL planner (currently Query96 can be run using ZetaSQL

## Spark Runner

To execute TPC-DS benchmark with Query3 for 1Gb dataset on Apache Spark 2.x, run the following example command from the command line:
To execute TPC-DS benchmark with Query3 for 1Gb dataset on Apache Spark 3.x, run the following example command from the command line:

```bash
./gradlew :sdks:java:testing:tpcds:run -Ptpcds.runner=":runners:spark:2" -Ptpcds.args=" \
./gradlew :sdks:java:testing:tpcds:run -Ptpcds.runner=":runners:spark:3" -Ptpcds.args=" \
--runner=SparkRunner \
--queries=3 \
--tpcParallel=1 \
Expand Down
2 changes: 1 addition & 1 deletion sdks/java/testing/tpcds/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ if (isSpark) {
//
// Parameters:
// -Ptpcds.runner
// Specify a runner subproject, such as ":runners:spark:2" or ":runners:flink:1.13"
// Specify a runner subproject, such as ":runners:spark:3" or ":runners:flink:1.13"
// Defaults to ":runners:direct-java"
//
// -Ptpcds.args
Expand Down