Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -625,3 +625,83 @@ jobs:
with:
name: unit-tests-log-tpcds--8-hadoop3.2-hive2.3
path: "**/target/unit-tests.log"

docker-integration-tests:
name: Run docker integration tests
runs-on: ubuntu-20.04
env:
HADOOP_PROFILE: hadoop3.2
HIVE_PROFILE: hive2.3
GITHUB_PREV_SHA: ${{ github.event.before }}
SPARK_LOCAL_IP: localhost
ORACLE_DOCKER_IMAGE_NAME: oracle/database:18.4.0-xe
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
with:
fetch-depth: 0
repository: apache/spark
ref: master
- name: Sync the current branch with the latest in Apache Spark
if: github.repository != 'apache/spark'
id: sync-branch
run: |
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, we should add echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref" after this line because we're running tests with run-tests.py.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will revert this for now .. seems like it breaks other tests.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sarutak would you mind opening a Pr again for this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, O.K. I'll do it. Thanks for letting me know.

- name: Cache Scala, SBT and Maven
uses: actions/cache@v2
with:
path: |
build/apache-maven-*
build/scala-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v2
with:
path: ~/.cache/coursier
key: docker-integration-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
docker-integration-coursier-
- name: Install Java 8
uses: actions/setup-java@v1
with:
java-version: 8
- name: Cache Oracle docker-images repository
id: cache-oracle-docker-images
uses: actions/cache@v2
with:
path: ./oracle/docker-images
# key should contains the commit hash of the Oracle docker images to be checkout.
key: oracle-docker-images-3f422c4a35b423dfcdbcc57a84f01db6c82eb6c1
- name: Checkout Oracle docker-images repository
uses: actions/checkout@v2
with:
fetch-depth: 0
repository: oracle/docker-images
ref: 3f422c4a35b423dfcdbcc57a84f01db6c82eb6c1
path: ./oracle/docker-images
- name: Install Oracle Docker image
run: |
cd oracle/docker-images/OracleDatabase/SingleInstance/dockerfiles
./buildContainerImage.sh -v 18.4.0 -x
- name: Run tests
run: |
export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
./dev/run-tests --parallelism 2 --modules docker-integration-tests --included-tags org.apache.spark.tags.DockerTest
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v2
with:
name: test-results-docker-integration--8-hadoop3.2-hive2.3
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: failure()
uses: actions/upload-artifact@v2
with:
name: unit-tests-log-docker-integration--8-hadoop3.2-hive2.3
path: "**/target/unit-tests.log"
16 changes: 9 additions & 7 deletions dev/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,19 +122,21 @@ def determine_modules_to_test(changed_modules, deduplicated=True):
['graphx', 'examples']
>>> [x.name for x in determine_modules_to_test([modules.sql])]
... # doctest: +NORMALIZE_WHITESPACE
['sql', 'avro', 'hive', 'mllib', 'sql-kafka-0-10', 'examples', 'hive-thriftserver',
'pyspark-sql', 'repl', 'sparkr', 'pyspark-mllib', 'pyspark-pandas', 'pyspark-ml']
['sql', 'avro', 'hive', 'mllib', 'sql-kafka-0-10', 'examples',
'hive-thriftserver', 'pyspark-sql', 'repl', 'sparkr',
'pyspark-mllib', 'pyspark-pandas', 'pyspark-ml']
>>> sorted([x.name for x in determine_modules_to_test(
... [modules.sparkr, modules.sql], deduplicated=False)])
... # doctest: +NORMALIZE_WHITESPACE
['avro', 'examples', 'hive', 'hive-thriftserver', 'mllib', 'pyspark-ml',
'pyspark-mllib', 'pyspark-pandas', 'pyspark-sql', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
['avro', 'examples', 'hive', 'hive-thriftserver', 'mllib',
'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas', 'pyspark-sql',
'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
>>> sorted([x.name for x in determine_modules_to_test(
... [modules.sql, modules.core], deduplicated=False)])
... # doctest: +NORMALIZE_WHITESPACE
['avro', 'catalyst', 'core', 'examples', 'graphx', 'hive', 'hive-thriftserver',
'mllib', 'mllib-local', 'pyspark-core', 'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas',
'pyspark-resource', 'pyspark-sql', 'pyspark-streaming', 'repl', 'root',
['avro', 'catalyst', 'core', 'examples', 'graphx', 'hive',
'hive-thriftserver', 'mllib', 'mllib-local', 'pyspark-core', 'pyspark-ml', 'pyspark-mllib',
'pyspark-pandas', 'pyspark-resource', 'pyspark-sql', 'pyspark-streaming', 'repl', 'root',
'sparkr', 'sql', 'sql-kafka-0-10', 'streaming', 'streaming-kafka-0-10',
'streaming-kinesis-asl']
"""
Expand Down
15 changes: 15 additions & 0 deletions dev/sparktestsupport/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from functools import total_ordering
import itertools
import os
import re

all_modules = []
Expand Down Expand Up @@ -743,6 +744,20 @@ def __hash__(self):
]
)

docker_integration_tests = Module(
name="docker-integration-tests",
dependencies=[],
build_profile_flags=["-Pdocker-integration-tests"],
source_file_regexes=["external/docker-integration-tests"],
sbt_test_goals=["docker-integration-tests/test"],
environ=None if "GITHUB_ACTIONS" not in os.environ else {
"ENABLE_DOCKER_INTEGRATION_TESTS": "1"
},
test_tags=[
"org.apache.spark.tags.DockerTest"
]
)

# The root module is a dummy module which is used to run all of the tests.
# No other modules should directly depend on this module.
root = Module(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import org.apache.spark.tags.DockerTest
/**
* To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0):
* {{{
* DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0
* ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0
* ./build/sbt -Pdocker-integration-tests
* "testOnly org.apache.spark.sql.jdbc.DB2IntegrationSuite"
* }}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import org.apache.spark.tags.DockerTest
/**
* To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0):
* {{{
* DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0
* ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0
* ./build/sbt -Pdocker-integration-tests "testOnly *DB2KrbIntegrationSuite"
* }}}
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.jdbc

import org.scalactic.source.Position
import org.scalatest.Tag

import org.apache.spark.SparkFunSuite

/**
* Helper class that runs docker integration tests.
* Ignores them based on env variable is set or not.
*/
trait DockerIntegrationFunSuite extends SparkFunSuite {
private val envVarNameForEnablingTests = "ENABLE_DOCKER_INTEGRATION_TESTS"
private val shouldRunTests = sys.env.getOrElse(envVarNameForEnablingTests, "0") match {
case "1" => true
case _ => false
}

/** Run the test if environment variable is set or ignore the test */
override def test(testName: String, testTags: Tag*)(testBody: => Any)
(implicit pos: Position): Unit = {
if (shouldRunTests) {
super.test(testName, testTags: _*)(testBody)
} else {
ignore(s"$testName [enable by setting env var $envVarNameForEnablingTests=1]")(testBody)
}
}

/** Run the give body of code only if Kinesis tests are enabled */
def runIfTestsEnabled(message: String)(body: => Unit): Unit = {
if (shouldRunTests) {
body
} else {
ignore(s"$message [enable by setting env var $envVarNameForEnablingTests=1]")(())
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ abstract class DatabaseOnDocker {
containerConfigBuilder: ContainerConfig.Builder): Unit = {}
}

abstract class DockerJDBCIntegrationSuite extends SharedSparkSession with Eventually {
abstract class DockerJDBCIntegrationSuite
extends SharedSparkSession with Eventually with DockerIntegrationFunSuite {

protected val dockerIp = DockerUtils.getDockerIp()
val db: DatabaseOnDocker
Expand All @@ -114,7 +115,7 @@ abstract class DockerJDBCIntegrationSuite extends SharedSparkSession with Eventu
private var pulled: Boolean = false
protected var jdbcUrl: String = _

override def beforeAll(): Unit = {
override def beforeAll(): Unit = runIfTestsEnabled(s"Prepare for ${this.getClass.getName}") {
super.beforeAll()
try {
docker = DefaultDockerClient.fromEnv.build()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ abstract class DockerKrbJDBCIntegrationSuite extends DockerJDBCIntegrationSuite
protected var keytabFullPath: String = _
protected def setAuthentication(keytabFile: String, principal: String): Unit

override def beforeAll(): Unit = {
override def beforeAll(): Unit = runIfTestsEnabled(s"Prepare for ${this.getClass.getName}") {
SecurityUtils.setGlobalKrbDebug(true)

val kdcDir = Utils.createTempDir()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import org.apache.spark.tags.DockerTest
/**
* To run this test suite for a specific version (e.g., mariadb:10.5.8):
* {{{
* MARIADB_DOCKER_IMAGE_NAME=mariadb:10.5.8
* ENABLE_DOCKER_INTEGRATION_TESTS=1 MARIADB_DOCKER_IMAGE_NAME=mariadb:10.5.8
* ./build/sbt -Pdocker-integration-tests
* "testOnly org.apache.spark.sql.jdbc.MariaDBKrbIntegrationSuite"
* }}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import org.apache.spark.tags.DockerTest
/**
* To run this test suite for a specific version (e.g., 2019-GA-ubuntu-16.04):
* {{{
* MSSQLSERVER_DOCKER_IMAGE_NAME=2019-GA-ubuntu-16.04
* ENABLE_DOCKER_INTEGRATION_TESTS=1 MSSQLSERVER_DOCKER_IMAGE_NAME=2019-GA-ubuntu-16.04
* ./build/sbt -Pdocker-integration-tests
* "testOnly org.apache.spark.sql.jdbc.MsSqlServerIntegrationSuite"
* }}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import org.apache.spark.tags.DockerTest
/**
* To run this test suite for a specific version (e.g., mysql:5.7.31):
* {{{
* MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.31
* ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.31
* ./build/sbt -Pdocker-integration-tests
* "testOnly org.apache.spark.sql.jdbc.MySQLIntegrationSuite"
* }}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ import org.apache.spark.tags.DockerTest
* $ cd docker-images/OracleDatabase/SingleInstance/dockerfiles
* $ ./buildContainerImage.sh -v 18.4.0 -x
* $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:18.4.0-xe
* $ export ENABLE_DOCKER_INTEGRATION_TESTS=1
* $ cd $SPARK_HOME
* $ ./build/sbt -Pdocker-integration-tests
* "testOnly org.apache.spark.sql.jdbc.OracleIntegrationSuite"
Expand All @@ -61,7 +62,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark
import testImplicits._

override val db = new DatabaseOnDocker {
override val imageName = sys.env("ORACLE_DOCKER_IMAGE_NAME")
lazy override val imageName = sys.env("ORACLE_DOCKER_IMAGE_NAME")
override val env = Map(
"ORACLE_PWD" -> "oracle"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import org.apache.spark.tags.DockerTest
/**
* To run this test suite for a specific version (e.g., postgres:13.0):
* {{{
* POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
* ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
* ./build/sbt -Pdocker-integration-tests
* "testOnly org.apache.spark.sql.jdbc.PostgresIntegrationSuite"
* }}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import org.apache.spark.tags.DockerTest
/**
* To run this test suite for a specific version (e.g., postgres:13.0):
* {{{
* POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
* ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
* ./build/sbt -Pdocker-integration-tests "testOnly *PostgresKrbIntegrationSuite"
* }}}
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import org.apache.spark.tags.DockerTest
/**
* To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0):
* {{{
* DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0
* ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0
* ./build/sbt -Pdocker-integration-tests "testOnly *v2.DB2IntegrationSuite"
* }}}
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import org.apache.spark.tags.DockerTest
/**
* To run this test suite for a specific version (e.g., 2019-GA-ubuntu-16.04):
* {{{
* MSSQLSERVER_DOCKER_IMAGE_NAME=2019-GA-ubuntu-16.04
* ENABLE_DOCKER_INTEGRATION_TESTS=1 MSSQLSERVER_DOCKER_IMAGE_NAME=2019-GA-ubuntu-16.04
* ./build/sbt -Pdocker-integration-tests "testOnly *v2*MsSqlServerIntegrationSuite"
* }}}
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ import org.apache.spark.tags.DockerTest
*
* To run this test suite for a specific version (e.g., mysql:5.7.31):
* {{{
* MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.31
* ./build/sbt -Pdocker-integration-tests "testOnly *v2*MySQLIntegrationSuite"
* ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.31
* ./build/sbt -Pdocker-integration-tests "testOnly *v2*MySQLIntegrationSuite"
*
* }}}
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ import org.apache.spark.tags.DockerTest
* $ cd docker-images/OracleDatabase/SingleInstance/dockerfiles
* $ ./buildContainerImage.sh -v 18.4.0 -x
* $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:18.4.0-xe
* $ export ENABLE_DOCKER_INTEGRATION_TESTS=1
* $ cd $SPARK_HOME
* $ ./build/sbt -Pdocker-integration-tests
* "testOnly org.apache.spark.sql.jdbc.v2.OracleIntegrationSuite"
Expand All @@ -55,7 +56,7 @@ import org.apache.spark.tags.DockerTest
class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
override val catalogName: String = "oracle"
override val db = new DatabaseOnDocker {
override val imageName = sys.env("ORACLE_DOCKER_IMAGE_NAME")
lazy override val imageName = sys.env("ORACLE_DOCKER_IMAGE_NAME")
override val env = Map(
"ORACLE_PWD" -> "oracle"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import org.apache.spark.tags.DockerTest
/**
* To run this test suite for a specific version (e.g., postgres:13.0):
* {{{
* POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
* ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
* ./build/sbt -Pdocker-integration-tests "testOnly *v2.PostgresIntegrationSuite"
* }}}
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import org.apache.spark.tags.DockerTest
/**
* To run this test suite for a specific version (e.g., postgres:13.0):
* {{{
* POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
* ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
* ./build/sbt -Pdocker-integration-tests "testOnly *v2.PostgresNamespaceSuite"
* }}}
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@ import org.apache.log4j.Level
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.connector.catalog.NamespaceChange
import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
import org.apache.spark.sql.jdbc.DockerIntegrationFunSuite
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.tags.DockerTest

@DockerTest
private[v2] trait V2JDBCNamespaceTest extends SharedSparkSession {
private[v2] trait V2JDBCNamespaceTest extends SharedSparkSession with DockerIntegrationFunSuite {
val catalog = new JDBCTableCatalog()

def builtinNamespaces: Array[Array[String]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@ package org.apache.spark.sql.jdbc.v2
import org.apache.log4j.Level

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.jdbc.DockerIntegrationFunSuite
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.sql.types._
import org.apache.spark.tags.DockerTest

@DockerTest
private[v2] trait V2JDBCTest extends SharedSparkSession {
private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFunSuite {
val catalogName: String
// dialect specific update column type test
def testUpdateColumnType(tbl: String): Unit
Expand Down