From 6308cc0f47262da37e71d262ddf323633ec591c6 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Sat, 22 May 2021 14:55:30 +0900 Subject: [PATCH 01/14] Add docker-integration-tests to GA. --- .github/workflows/build_and_test.yml | 19 ++++++++++++++++++ dev/run-tests.py | 20 ++++++++++++------- dev/sparktestsupport/modules.py | 11 ++++++++++ .../sql/jdbc/OracleIntegrationSuite.scala | 2 +- .../sql/jdbc/v2/OracleIntegrationSuite.scala | 2 +- 5 files changed, 45 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 47c7261df715..9ade114f357d 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -66,6 +66,12 @@ jobs: hive: hive2.3 excluded-tags: org.apache.spark.tags.ExtendedSQLTest comment: "- other tests" + - modules: docker-integration-tests + java: 8 + hadoop: hadoop3.2 + hive: hive2.3 + included-tags: org.apache.spark.tags.DockerTest + comment: "- docker integration tests" env: MODULES_TO_TEST: ${{ matrix.modules }} EXCLUDED_TAGS: ${{ matrix.excluded-tags }} @@ -131,11 +137,21 @@ jobs: python3.8 -m pip install numpy 'pyarrow<3.0.0' pandas scipy xmlrunner python3.8 -m pip list # Run the tests. + - name: Install Oracle docker image + if: (contains(matrix.modules, 'docker-integration-tests')) + run: | + pushd /tmp + git clone https://github.com/oracle/docker-images.git + cd docker-images/OracleDatabase/SingleInstance/dockerfiles + ./buildContainerImage.sh -v 18.4.0 -x + popd + # Run the tests. - name: Run tests run: | export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }} # Hive and SQL tests become flaky when running in parallel as it's too intensive. if [[ "$MODULES_TO_TEST" == "hive" ]] || [[ "$MODULES_TO_TEST" == "sql" ]]; then export SERIAL_SBT_TESTS=1; fi + if [[ "$MODULES_TO_TEST" == "docker-integration-tests" ]]; then export ORACLE_DOCKER_IMAGE_NAME=oracle/database:18.4.0-xe; fi ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS" - name: Upload test results to report if: always() @@ -489,6 +505,9 @@ jobs: name: Run TPC-DS queries with SF=1 runs-on: ubuntu-20.04 env: + HADOOP_PROFILE: hadoop3.2 + HIVE_PROFILE: hive2.3 + GITHUB_PREV_SHA: ${{ github.event.before }} SPARK_LOCAL_IP: localhost steps: - name: Checkout Spark repository diff --git a/dev/run-tests.py b/dev/run-tests.py index d5d3445bd612..3c3a9dc45d98 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -122,19 +122,21 @@ def determine_modules_to_test(changed_modules, deduplicated=True): ['graphx', 'examples'] >>> [x.name for x in determine_modules_to_test([modules.sql])] ... # doctest: +NORMALIZE_WHITESPACE - ['sql', 'avro', 'hive', 'mllib', 'sql-kafka-0-10', 'examples', 'hive-thriftserver', - 'pyspark-sql', 'repl', 'sparkr', 'pyspark-mllib', 'pyspark-pandas', 'pyspark-ml'] + ['sql', 'avro', 'docker-integration-tests', 'hive', 'mllib', 'sql-kafka-0-10', 'examples', + 'hive-thriftserver', 'pyspark-sql', 'repl', 'sparkr', + 'pyspark-mllib', 'pyspark-pandas', 'pyspark-ml'] >>> sorted([x.name for x in determine_modules_to_test( ... [modules.sparkr, modules.sql], deduplicated=False)]) ... # doctest: +NORMALIZE_WHITESPACE - ['avro', 'examples', 'hive', 'hive-thriftserver', 'mllib', 'pyspark-ml', - 'pyspark-mllib', 'pyspark-pandas', 'pyspark-sql', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10'] + ['avro', 'docker-integration-tests', 'examples', 'hive', 'hive-thriftserver', 'mllib', + 'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas', 'pyspark-sql', + 'repl', 'sparkr', 'sql', 'sql-kafka-0-10'] >>> sorted([x.name for x in determine_modules_to_test( ... [modules.sql, modules.core], deduplicated=False)]) ... # doctest: +NORMALIZE_WHITESPACE - ['avro', 'catalyst', 'core', 'examples', 'graphx', 'hive', 'hive-thriftserver', - 'mllib', 'mllib-local', 'pyspark-core', 'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas', - 'pyspark-resource', 'pyspark-sql', 'pyspark-streaming', 'repl', 'root', + ['avro', 'catalyst', 'core', 'docker-integration-tests', 'examples', 'graphx', 'hive', + 'hive-thriftserver', 'mllib', 'mllib-local', 'pyspark-core', 'pyspark-ml', 'pyspark-mllib', + 'pyspark-pandas', 'pyspark-resource', 'pyspark-sql', 'pyspark-streaming', 'repl', 'root', 'sparkr', 'sql', 'sql-kafka-0-10', 'streaming', 'streaming-kafka-0-10', 'streaming-kinesis-asl'] """ @@ -687,6 +689,10 @@ def main(): test_modules = determine_modules_to_test(changed_modules) excluded_tags = determine_tags_to_exclude(changed_modules) + # Run docker-integration-tests only if it's explicitly specified. + if opts.modules and modules.docker_integration_tests.name not in opts.modules: + excluded_tags.extend(modules.docker_integration_tests.test_tags) + # If there is no changed module found, tests all. if not changed_modules: changed_modules = [modules.root] diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 6bb3f2baaaf6..fa824ed91a7e 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -733,6 +733,17 @@ def __hash__(self): ] ) +docker_integration_tests = Module( + name="docker-integration-tests", + dependencies=[sql], + build_profile_flags=["-Pdocker-integration-tests"], + source_file_regexes=["external/docker-integration-tests"], + sbt_test_goals=["docker-integration-tests/test"], + test_tags=[ + "org.apache.spark.tags.DockerTest" + ] +) + # The root module is a dummy module which is used to run all of the tests. # No other modules should directly depend on this module. root = Module( diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala index 064763c26707..7942302025fc 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala @@ -61,7 +61,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark import testImplicits._ override val db = new DatabaseOnDocker { - override val imageName = sys.env("ORACLE_DOCKER_IMAGE_NAME") + lazy override val imageName = sys.env("ORACLE_DOCKER_IMAGE_NAME") override val env = Map( "ORACLE_PWD" -> "oracle" ) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala index 84b952937d0c..4a32ac0f3595 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala @@ -55,7 +55,7 @@ import org.apache.spark.tags.DockerTest class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest { override val catalogName: String = "oracle" override val db = new DatabaseOnDocker { - override val imageName = sys.env("ORACLE_DOCKER_IMAGE_NAME") + lazy override val imageName = sys.env("ORACLE_DOCKER_IMAGE_NAME") override val env = Map( "ORACLE_PWD" -> "oracle" ) From 5d17d603ae37152cb10e3ce31b2d439bd9b5b001 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Sat, 22 May 2021 15:33:46 +0900 Subject: [PATCH 02/14] Minor fix. --- .github/workflows/build_and_test.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 39481cadf01a..9ee574290c01 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -560,9 +560,6 @@ jobs: name: Run TPC-DS queries with SF=1 runs-on: ubuntu-20.04 env: - HADOOP_PROFILE: hadoop3.2 - HIVE_PROFILE: hive2.3 - GITHUB_PREV_SHA: ${{ github.event.before }} SPARK_LOCAL_IP: localhost steps: - name: Checkout Spark repository From f8ea91700d9435085073dce8fe774da6c2a9ad98 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Sat, 22 May 2021 18:52:15 +0900 Subject: [PATCH 03/14] Modify the condition in run-tests.py. --- dev/run-tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 3c3a9dc45d98..b631613a2c20 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -690,7 +690,7 @@ def main(): excluded_tags = determine_tags_to_exclude(changed_modules) # Run docker-integration-tests only if it's explicitly specified. - if opts.modules and modules.docker_integration_tests.name not in opts.modules: + if not opts.modules or modules.docker_integration_tests.name not in opts.modules: excluded_tags.extend(modules.docker_integration_tests.test_tags) # If there is no changed module found, tests all. From 92da5c04a31f452170c22febd63b8e761cac1be1 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Mon, 24 May 2021 11:33:38 +0900 Subject: [PATCH 04/14] Separate docker-integration-tests job. --- .github/workflows/build_and_test.yml | 96 +++++++++++++++++++++++----- 1 file changed, 80 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 9ee574290c01..111d0c3f4217 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -66,12 +66,6 @@ jobs: hive: hive2.3 excluded-tags: org.apache.spark.tags.ExtendedSQLTest comment: "- other tests" - - modules: docker-integration-tests - java: 8 - hadoop: hadoop3.2 - hive: hive2.3 - included-tags: org.apache.spark.tags.DockerTest - comment: "- docker integration tests" env: MODULES_TO_TEST: ${{ matrix.modules }} EXCLUDED_TAGS: ${{ matrix.excluded-tags }} @@ -135,21 +129,11 @@ jobs: python3.8 -m pip install numpy 'pyarrow<3.0.0' pandas scipy xmlrunner python3.8 -m pip list # Run the tests. - - name: Install Oracle docker image - if: (contains(matrix.modules, 'docker-integration-tests')) - run: | - pushd /tmp - git clone https://github.com/oracle/docker-images.git - cd docker-images/OracleDatabase/SingleInstance/dockerfiles - ./buildContainerImage.sh -v 18.4.0 -x - popd - # Run the tests. - name: Run tests run: | export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }} # Hive and SQL tests become flaky when running in parallel as it's too intensive. if [[ "$MODULES_TO_TEST" == "hive" ]] || [[ "$MODULES_TO_TEST" == "sql" ]]; then export SERIAL_SBT_TESTS=1; fi - if [[ "$MODULES_TO_TEST" == "docker-integration-tests" ]]; then export ORACLE_DOCKER_IMAGE_NAME=oracle/database:18.4.0-xe; fi ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS" - name: Upload test results to report if: always() @@ -164,6 +148,86 @@ jobs: name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} path: "**/target/unit-tests.log" + docker-integration-tests: + name: Run docker integration tests + runs-on: ubuntu-20.04 + env: + HADOOP_PROFILE: hadoop3.2 + HIVE_PROFILE: hive2.3 + GITHUB_PREV_SHA: ${{ github.event.before }} + SPARK_LOCAL_IP: localhost + ORACLE_DOCKER_IMAGE_NAME: oracle/database:18.4.0-xe + steps: + - name: Checkout Spark repository + uses: actions/checkout@v2 + with: + fetch-depth: 0 + repository: apache/spark + ref: master + - name: Sync the current branch with the latest in Apache Spark + if: github.repository != 'apache/spark' + id: sync-branch + run: | + git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" + - name: Cache Scala, SBT and Maven + uses: actions/cache@v2 + with: + path: | + build/apache-maven-* + build/scala-* + build/*.jar + ~/.sbt + key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} + restore-keys: | + build- + - name: Cache Coursier local repository + uses: actions/cache@v2 + with: + path: ~/.cache/coursier + key: docker-integration-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} + restore-keys: | + docker-integration-coursier- + - name: Install Java 8 + uses: actions/setup-java@v1 + with: + java-version: 8 + - name: Cache Oracle docker-images repository + id: cache-oracle-docker-images + uses: actions/cache@v2 + with: + path: ./oracle/docker-images + # key should contains the commit hash of the Oracle docker images to be checkout. + key: oracle-docker-images-3f422c4a35b423dfcdbcc57a84f01db6c82eb6c1 + - name: Checkout Oracle docker-images repository + uses: actions/checkout@v2 + with: + fetch-depth: 0 + repository: oracle/docker-images + ref: 3f422c4a35b423dfcdbcc57a84f01db6c82eb6c1 + path: ./oracle/docker-images + - name: Install Oracle docker image + run: | + cd oracle/docker-images/OracleDatabase/SingleInstance/dockerfiles + ./buildContainerImage.sh -v 18.4.0 -x + - name: Run tests + run: | + export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }} + ./dev/run-tests --parallelism 2 --modules docker-integration-tests --included-tags org.apache.spark.tags.DockerTest + - name: Upload test results to report + if: always() + uses: actions/upload-artifact@v2 + with: + name: test-results-docker-integration--8-hadoop3.2-hive2.3 + path: "**/target/test-reports/*.xml" + - name: Upload unit tests log files + if: failure() + uses: actions/upload-artifact@v2 + with: + name: unit-tests-log-docker-integration--8-hadoop3.2-hive2.3 + path: "**/target/unit-tests.log" + pyspark: name: "Build modules: ${{ matrix.modules }}" runs-on: ubuntu-20.04 From 32521b6124ecef8aaddd0f1e2cf98d08f95629df Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Wed, 26 May 2021 19:03:29 +0900 Subject: [PATCH 05/14] Reorder docker-integration-tests job. --- .github/workflows/build_and_test.yml | 160 +++++++++++++-------------- 1 file changed, 80 insertions(+), 80 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 4585d34808da..7fb591f3aa91 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -148,86 +148,6 @@ jobs: name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} path: "**/target/unit-tests.log" - docker-integration-tests: - name: Run docker integration tests - runs-on: ubuntu-20.04 - env: - HADOOP_PROFILE: hadoop3.2 - HIVE_PROFILE: hive2.3 - GITHUB_PREV_SHA: ${{ github.event.before }} - SPARK_LOCAL_IP: localhost - ORACLE_DOCKER_IMAGE_NAME: oracle/database:18.4.0-xe - steps: - - name: Checkout Spark repository - uses: actions/checkout@v2 - with: - fetch-depth: 0 - repository: apache/spark - ref: master - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - id: sync-branch - run: | - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" - - name: Cache Scala, SBT and Maven - uses: actions/cache@v2 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build- - - name: Cache Coursier local repository - uses: actions/cache@v2 - with: - path: ~/.cache/coursier - key: docker-integration-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} - restore-keys: | - docker-integration-coursier- - - name: Install Java 8 - uses: actions/setup-java@v1 - with: - java-version: 8 - - name: Cache Oracle docker-images repository - id: cache-oracle-docker-images - uses: actions/cache@v2 - with: - path: ./oracle/docker-images - # key should contains the commit hash of the Oracle docker images to be checkout. - key: oracle-docker-images-3f422c4a35b423dfcdbcc57a84f01db6c82eb6c1 - - name: Checkout Oracle docker-images repository - uses: actions/checkout@v2 - with: - fetch-depth: 0 - repository: oracle/docker-images - ref: 3f422c4a35b423dfcdbcc57a84f01db6c82eb6c1 - path: ./oracle/docker-images - - name: Install Oracle docker image - run: | - cd oracle/docker-images/OracleDatabase/SingleInstance/dockerfiles - ./buildContainerImage.sh -v 18.4.0 -x - - name: Run tests - run: | - export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }} - ./dev/run-tests --parallelism 2 --modules docker-integration-tests --included-tags org.apache.spark.tags.DockerTest - - name: Upload test results to report - if: always() - uses: actions/upload-artifact@v2 - with: - name: test-results-docker-integration--8-hadoop3.2-hive2.3 - path: "**/target/test-reports/*.xml" - - name: Upload unit tests log files - if: failure() - uses: actions/upload-artifact@v2 - with: - name: unit-tests-log-docker-integration--8-hadoop3.2-hive2.3 - path: "**/target/unit-tests.log" - pyspark: name: "Build modules: ${{ matrix.modules }}" runs-on: ubuntu-20.04 @@ -705,3 +625,83 @@ jobs: with: name: unit-tests-log-tpcds--8-hadoop3.2-hive2.3 path: "**/target/unit-tests.log" + + docker-integration-tests: + name: Run docker integration tests + runs-on: ubuntu-20.04 + env: + HADOOP_PROFILE: hadoop3.2 + HIVE_PROFILE: hive2.3 + GITHUB_PREV_SHA: ${{ github.event.before }} + SPARK_LOCAL_IP: localhost + ORACLE_DOCKER_IMAGE_NAME: oracle/database:18.4.0-xe + steps: + - name: Checkout Spark repository + uses: actions/checkout@v2 + with: + fetch-depth: 0 + repository: apache/spark + ref: master + - name: Sync the current branch with the latest in Apache Spark + if: github.repository != 'apache/spark' + id: sync-branch + run: | + git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" + - name: Cache Scala, SBT and Maven + uses: actions/cache@v2 + with: + path: | + build/apache-maven-* + build/scala-* + build/*.jar + ~/.sbt + key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} + restore-keys: | + build- + - name: Cache Coursier local repository + uses: actions/cache@v2 + with: + path: ~/.cache/coursier + key: docker-integration-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} + restore-keys: | + docker-integration-coursier- + - name: Install Java 8 + uses: actions/setup-java@v1 + with: + java-version: 8 + - name: Cache Oracle docker-images repository + id: cache-oracle-docker-images + uses: actions/cache@v2 + with: + path: ./oracle/docker-images + # key should contains the commit hash of the Oracle docker images to be checkout. + key: oracle-docker-images-3f422c4a35b423dfcdbcc57a84f01db6c82eb6c1 + - name: Checkout Oracle docker-images repository + uses: actions/checkout@v2 + with: + fetch-depth: 0 + repository: oracle/docker-images + ref: 3f422c4a35b423dfcdbcc57a84f01db6c82eb6c1 + path: ./oracle/docker-images + - name: Install Oracle docker image + run: | + cd oracle/docker-images/OracleDatabase/SingleInstance/dockerfiles + ./buildContainerImage.sh -v 18.4.0 -x + - name: Run tests + run: | + export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }} + ./dev/run-tests --parallelism 2 --modules docker-integration-tests --included-tags org.apache.spark.tags.DockerTest + - name: Upload test results to report + if: always() + uses: actions/upload-artifact@v2 + with: + name: test-results-docker-integration--8-hadoop3.2-hive2.3 + path: "**/target/test-reports/*.xml" + - name: Upload unit tests log files + if: failure() + uses: actions/upload-artifact@v2 + with: + name: unit-tests-log-docker-integration--8-hadoop3.2-hive2.3 + path: "**/target/unit-tests.log" From 620af5bcdc1fbc446f5cbb539ce79e6163a8d425 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Wed, 26 May 2021 22:19:37 +0900 Subject: [PATCH 06/14] Introduce ENABLE_DOCKER_INTEGRATION_TESTS. --- .github/workflows/build_and_test.yml | 1 + dev/run-tests.py | 6 +- .../spark/sql/jdbc/DB2IntegrationSuite.scala | 12 +- .../sql/jdbc/DockerIntegrationFunSuite.scala | 50 +++++++ .../sql/jdbc/DockerJDBCIntegrationSuite.scala | 124 +++++++++--------- .../jdbc/DockerKrbJDBCIntegrationSuite.scala | 42 +++--- .../jdbc/MsSqlServerIntegrationSuite.scala | 12 +- .../sql/jdbc/MySQLIntegrationSuite.scala | 12 +- .../sql/jdbc/OracleIntegrationSuite.scala | 27 ++-- .../sql/jdbc/PostgresIntegrationSuite.scala | 20 +-- .../sql/jdbc/v2/V2JDBCNamespaceTest.scala | 5 +- .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 17 +-- 12 files changed, 194 insertions(+), 134 deletions(-) create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerIntegrationFunSuite.scala diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 7fb591f3aa91..3b45920571cf 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -635,6 +635,7 @@ jobs: GITHUB_PREV_SHA: ${{ github.event.before }} SPARK_LOCAL_IP: localhost ORACLE_DOCKER_IMAGE_NAME: oracle/database:18.4.0-xe + ENABLE_DOCKER_INTEGRATION_TESTS: 1 steps: - name: Checkout Spark repository uses: actions/checkout@v2 diff --git a/dev/run-tests.py b/dev/run-tests.py index 39ccb6852201..b0d06e79e580 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -689,9 +689,9 @@ def main(): test_modules = determine_modules_to_test(changed_modules) excluded_tags = determine_tags_to_exclude(changed_modules) - # Run docker-integration-tests only if it's explicitly specified. - if not opts.modules or modules.docker_integration_tests.name not in opts.modules: - excluded_tags.extend(modules.docker_integration_tests.test_tags) + # With this script, disable docker integration by default. + if not os.environ.get("ENABLE_DOCKER_INTEGRATION_TESTS"): + os.environ["ENABLE_DOCKER_INTEGRATION_TESTS"] = "0" # If there is no changed module found, tests all. if not changed_modules: diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala index 62bba797413a..b305dc75ddbe 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala @@ -80,7 +80,7 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite { + "'Kathy')").executeUpdate() } - test("Basic test") { + testIfEnabled("Basic test") { val df = sqlContext.read.jdbc(jdbcUrl, "tbl", new Properties) val rows = df.collect() assert(rows.length == 2) @@ -90,7 +90,7 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite { assert(types(1).equals("class java.lang.String")) } - test("Numeric types") { + testIfEnabled("Numeric types") { val df = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties) val rows = df.collect() assert(rows.length == 1) @@ -119,7 +119,7 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite { assert(rows(0).getDecimal(9) == new BigDecimal("1234567891234567.123456789123456789")) } - test("Date types") { + testIfEnabled("Date types") { withDefaultTimeZone(UTC) { val df = sqlContext.read.jdbc(jdbcUrl, "dates", new Properties) val rows = df.collect() @@ -135,7 +135,7 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite { } } - test("String types") { + testIfEnabled("String types") { val df = sqlContext.read.jdbc(jdbcUrl, "strings", new Properties) val rows = df.collect() assert(rows.length == 1) @@ -152,7 +152,7 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite { assert(rows(0).getString(4).equals("""Kathy""")) } - test("Basic write test") { + testIfEnabled("Basic write test") { // cast decflt column with precision value of 38 to DB2 max decimal precision value of 31. val df1 = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties) .selectExpr("small", "med", "big", "deci", "flt", "dbl", "real", @@ -173,7 +173,7 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite { assert(rows(0).getString(2) == "1") } - test("query JDBC option") { + testIfEnabled("query JDBC option") { val expectedResult = Set( (42, "fred"), (17, "dave") diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerIntegrationFunSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerIntegrationFunSuite.scala new file mode 100644 index 000000000000..d6edcbc01fe5 --- /dev/null +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerIntegrationFunSuite.scala @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc + +import org.apache.spark.SparkFunSuite + +/** + * Helper class that runs docker integration tests. + * Ignores them based on env variable is set or not. + */ +trait DockerIntegrationFunSuite extends SparkFunSuite { + private val envVarNameForEnablingTests = "ENABLE_DOCKER_INTEGRATION_TESTS" + private val shouldRunTests = sys.env.getOrElse(envVarNameForEnablingTests, "1") match { + case "1" => true + case _ => false + } + + /** Run the test if environment variable is set or ignore the test */ + def testIfEnabled(testName: String)(testBody: => Unit): Unit = { + if (shouldRunTests) { + test(testName)(testBody) + } else { + ignore(s"$testName [enable by setting env var $envVarNameForEnablingTests=1]")(testBody) + } + } + + /** Run the give body of code only if Kinesis tests are enabled */ + def runIfTestsEnabled(message: String)(body: => Unit): Unit = { + if (shouldRunTests) { + body + } else { + ignore(s"$message [enable by setting env var $envVarNameForEnablingTests=1]")(()) + } + } +} diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala index 2020c9b6c34a..f9199d8a5041 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala @@ -92,7 +92,8 @@ abstract class DatabaseOnDocker { containerConfigBuilder: ContainerConfig.Builder): Unit = {} } -abstract class DockerJDBCIntegrationSuite extends SharedSparkSession with Eventually { +abstract class DockerJDBCIntegrationSuite + extends SharedSparkSession with Eventually with DockerIntegrationFunSuite { protected val dockerIp = DockerUtils.getDockerIp() val db: DatabaseOnDocker @@ -115,69 +116,72 @@ abstract class DockerJDBCIntegrationSuite extends SharedSparkSession with Eventu protected var jdbcUrl: String = _ override def beforeAll(): Unit = { - super.beforeAll() - try { - docker = DefaultDockerClient.fromEnv.build() - // Check that Docker is actually up - try { - docker.ping() - } catch { - case NonFatal(e) => - log.error("Exception while connecting to Docker. Check whether Docker is running.") - throw e - } - // Ensure that the Docker image is installed: - try { - docker.inspectImage(db.imageName) - } catch { - case e: ImageNotFoundException => - log.warn(s"Docker image ${db.imageName} not found; pulling image from registry") - docker.pull(db.imageName) - pulled = true - } - val hostConfigBuilder = HostConfig.builder() - .privileged(db.privileged) - .networkMode("bridge") - .ipcMode(if (db.usesIpc) "host" else "") - .portBindings( - Map(s"${db.jdbcPort}/tcp" -> List(PortBinding.of(dockerIp, externalPort)).asJava).asJava) - // Create the database container: - val containerConfigBuilder = ContainerConfig.builder() - .image(db.imageName) - .networkDisabled(false) - .env(db.env.map { case (k, v) => s"$k=$v" }.toSeq.asJava) - .exposedPorts(s"${db.jdbcPort}/tcp") - if (db.getEntryPoint.isDefined) { - containerConfigBuilder.entrypoint(db.getEntryPoint.get) - } - if (db.getStartupProcessName.isDefined) { - containerConfigBuilder.cmd(db.getStartupProcessName.get) - } - db.beforeContainerStart(hostConfigBuilder, containerConfigBuilder) - containerConfigBuilder.hostConfig(hostConfigBuilder.build()) - val config = containerConfigBuilder.build() - // Create the database container: - containerId = docker.createContainer(config).id - // Start the container and wait until the database can accept JDBC connections: - docker.startContainer(containerId) - jdbcUrl = db.getJdbcUrl(dockerIp, externalPort) - var conn: Connection = null - eventually(connectionTimeout, interval(1.second)) { - conn = getConnection() - } - // Run any setup queries: + runIfTestsEnabled(s"Prepare for $this.getClass.getName") { + super.beforeAll() try { - dataPreparation(conn) - } finally { - conn.close() - } - } catch { - case NonFatal(e) => + docker = DefaultDockerClient.fromEnv.build() + // Check that Docker is actually up + try { + docker.ping() + } catch { + case NonFatal(e) => + log.error("Exception while connecting to Docker. Check whether Docker is running.") + throw e + } + // Ensure that the Docker image is installed: + try { + docker.inspectImage(db.imageName) + } catch { + case e: ImageNotFoundException => + log.warn(s"Docker image ${db.imageName} not found; pulling image from registry") + docker.pull(db.imageName) + pulled = true + } + val hostConfigBuilder = HostConfig.builder() + .privileged(db.privileged) + .networkMode("bridge") + .ipcMode(if (db.usesIpc) "host" else "") + .portBindings( + Map(s"${db.jdbcPort}/tcp" -> + List(PortBinding.of(dockerIp, externalPort)).asJava).asJava) + // Create the database container: + val containerConfigBuilder = ContainerConfig.builder() + .image(db.imageName) + .networkDisabled(false) + .env(db.env.map { case (k, v) => s"$k=$v" }.toSeq.asJava) + .exposedPorts(s"${db.jdbcPort}/tcp") + if (db.getEntryPoint.isDefined) { + containerConfigBuilder.entrypoint(db.getEntryPoint.get) + } + if (db.getStartupProcessName.isDefined) { + containerConfigBuilder.cmd(db.getStartupProcessName.get) + } + db.beforeContainerStart(hostConfigBuilder, containerConfigBuilder) + containerConfigBuilder.hostConfig(hostConfigBuilder.build()) + val config = containerConfigBuilder.build() + // Create the database container: + containerId = docker.createContainer(config).id + // Start the container and wait until the database can accept JDBC connections: + docker.startContainer(containerId) + jdbcUrl = db.getJdbcUrl(dockerIp, externalPort) + var conn: Connection = null + eventually(connectionTimeout, interval(1.second)) { + conn = getConnection() + } + // Run any setup queries: try { - afterAll() + dataPreparation(conn) } finally { - throw e + conn.close() } + } catch { + case NonFatal(e) => + try { + afterAll() + } finally { + throw e + } + } } } diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerKrbJDBCIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerKrbJDBCIntegrationSuite.scala index 3865f9100b84..289d2535736f 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerKrbJDBCIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerKrbJDBCIntegrationSuite.scala @@ -42,27 +42,29 @@ abstract class DockerKrbJDBCIntegrationSuite extends DockerJDBCIntegrationSuite protected def setAuthentication(keytabFile: String, principal: String): Unit override def beforeAll(): Unit = { - SecurityUtils.setGlobalKrbDebug(true) + runIfTestsEnabled(s"Prepare for ${this.getClass.getName}") { + SecurityUtils.setGlobalKrbDebug(true) - val kdcDir = Utils.createTempDir() - val kdcConf = MiniKdc.createConf() - kdcConf.setProperty(MiniKdc.DEBUG, "true") - kdc = new MiniKdc(kdcConf, kdcDir) - kdc.start() + val kdcDir = Utils.createTempDir() + val kdcConf = MiniKdc.createConf() + kdcConf.setProperty(MiniKdc.DEBUG, "true") + kdc = new MiniKdc(kdcConf, kdcDir) + kdc.start() - principal = s"$userName@${kdc.getRealm}" + principal = s"$userName@${kdc.getRealm}" - entryPointDir = Utils.createTempDir() - initDbDir = Utils.createTempDir() - val keytabFile = new File(initDbDir, keytabFileName) - keytabFullPath = keytabFile.getAbsolutePath - kdc.createPrincipal(keytabFile, userName) - logInfo(s"Created keytab file: $keytabFullPath") + entryPointDir = Utils.createTempDir() + initDbDir = Utils.createTempDir() + val keytabFile = new File(initDbDir, keytabFileName) + keytabFullPath = keytabFile.getAbsolutePath + kdc.createPrincipal(keytabFile, userName) + logInfo(s"Created keytab file: $keytabFullPath") - setAuthentication(keytabFullPath, principal) + setAuthentication(keytabFullPath, principal) - // This must be executed intentionally later - super.beforeAll() + // This must be executed intentionally later + super.beforeAll() + } } override def afterAll(): Unit = { @@ -107,7 +109,7 @@ abstract class DockerKrbJDBCIntegrationSuite extends DockerJDBCIntegrationSuite conn.prepareStatement("INSERT INTO bar VALUES ('hello')").executeUpdate() } - test("Basic read test in query option") { + testIfEnabled("Basic read test in query option") { // This makes sure Spark must do authentication Configuration.setConfiguration(null) @@ -124,7 +126,7 @@ abstract class DockerKrbJDBCIntegrationSuite extends DockerJDBCIntegrationSuite assert(df.collect().toSet === expectedResult) } - test("Basic read test in create table path") { + testIfEnabled("Basic read test in create table path") { // This makes sure Spark must do authentication Configuration.setConfiguration(null) @@ -141,7 +143,7 @@ abstract class DockerKrbJDBCIntegrationSuite extends DockerJDBCIntegrationSuite assert(sql("select c0 from queryOption").collect().toSet === expectedResult) } - test("Basic write test") { + testIfEnabled("Basic write test") { // This makes sure Spark must do authentication Configuration.setConfiguration(null) @@ -162,7 +164,7 @@ abstract class DockerKrbJDBCIntegrationSuite extends DockerJDBCIntegrationSuite assert(rows(0).getString(1) === "bar") } - test("SPARK-35226: JDBCOption should accept refreshKrb5Config parameter") { + testIfEnabled("SPARK-35226: JDBCOption should accept refreshKrb5Config parameter") { // This makes sure Spark must do authentication Configuration.setConfiguration(null) withTempDir { dir => diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala index c6e5cd26958e..f20c5cd9c2aa 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala @@ -141,7 +141,7 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite { """.stripMargin).executeUpdate() } - test("Basic test") { + testIfEnabled("Basic test") { val df = spark.read.jdbc(jdbcUrl, "tbl", new Properties) val rows = df.collect() assert(rows.length == 2) @@ -151,7 +151,7 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite { assert(types(1).equals("class java.lang.String")) } - test("Numeric types") { + testIfEnabled("Numeric types") { Seq(true, false).foreach { flag => withSQLConf(SQLConf.LEGACY_MSSQLSERVER_NUMERIC_MAPPING_ENABLED.key -> s"$flag") { val df = spark.read.jdbc(jdbcUrl, "numbers", new Properties) @@ -206,7 +206,7 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite { } } - test("Date types") { + testIfEnabled("Date types") { withDefaultTimeZone(UTC) { val df = spark.read.jdbc(jdbcUrl, "dates", new Properties) val rows = df.collect() @@ -229,7 +229,7 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite { } } - test("String types") { + testIfEnabled("String types") { val df = spark.read.jdbc(jdbcUrl, "strings", new Properties) val rows = df.collect() assert(rows.length == 1) @@ -258,7 +258,7 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite { assert(java.util.Arrays.equals(row.getAs[Array[Byte]](8), Array[Byte](100, 111, 103))) } - test("Basic write test") { + testIfEnabled("Basic write test") { val df1 = spark.read.jdbc(jdbcUrl, "numbers", new Properties) val df2 = spark.read.jdbc(jdbcUrl, "dates", new Properties) val df3 = spark.read.jdbc(jdbcUrl, "strings", new Properties) @@ -267,7 +267,7 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite { df3.write.jdbc(jdbcUrl, "stringscopy", new Properties) } - test("SPARK-33813: MsSqlServerDialect should support spatial types") { + testIfEnabled("SPARK-33813: MsSqlServerDialect should support spatial types") { val df = spark.read.jdbc(jdbcUrl, "spatials", new Properties) val rows = df.collect() assert(rows.length == 1) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala index 5d4da7d7ed79..c0eecb509cea 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala @@ -74,7 +74,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { "'jumps', 'over', 'the', 'lazy', 'dog')").executeUpdate() } - test("Basic test") { + testIfEnabled("Basic test") { val df = sqlContext.read.jdbc(jdbcUrl, "tbl", new Properties) val rows = df.collect() assert(rows.length == 2) @@ -84,7 +84,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { assert(types(1).equals("class java.lang.String")) } - test("Numeric types") { + testIfEnabled("Numeric types") { val df = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties) val rows = df.collect() assert(rows.length == 1) @@ -111,7 +111,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { assert(rows(0).getDouble(8) == 1.0000000000000002) } - test("Date types") { + testIfEnabled("Date types") { withDefaultTimeZone(UTC) { val df = sqlContext.read.jdbc(jdbcUrl, "dates", new Properties) val rows = df.collect() @@ -132,7 +132,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { } } - test("String types") { + testIfEnabled("String types") { val df = sqlContext.read.jdbc(jdbcUrl, "strings", new Properties) val rows = df.collect() assert(rows.length == 1) @@ -158,7 +158,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](8), Array[Byte](100, 111, 103))) } - test("Basic write test") { + testIfEnabled("Basic write test") { val df1 = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties) val df2 = sqlContext.read.jdbc(jdbcUrl, "dates", new Properties) val df3 = sqlContext.read.jdbc(jdbcUrl, "strings", new Properties) @@ -167,7 +167,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { df3.write.jdbc(jdbcUrl, "stringscopy", new Properties) } - test("query JDBC option") { + testIfEnabled("query JDBC option") { val expectedResult = Set( (42, "fred"), (17, "dave") diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala index d5e1f7fb26ce..b47e9200ac88 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala @@ -151,7 +151,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark conn.commit() } - test("SPARK-16625 : Importing Oracle numeric types") { + testIfEnabled("SPARK-16625 : Importing Oracle numeric types") { val df = sqlContext.read.jdbc(jdbcUrl, "numerics", new Properties) val rows = df.collect() assert(rows.size == 1) @@ -167,7 +167,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark } - test("SPARK-12941: String datatypes to be mapped to Varchar in Oracle") { + testIfEnabled("SPARK-12941: String datatypes to be mapped to Varchar in Oracle") { // create a sample dataframe with string type val df1 = sparkContext.parallelize(Seq(("foo"))).toDF("x") // write the dataframe to the oracle table tbl @@ -183,7 +183,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark assert(rows(0).getString(0).equals("foo")) } - test("SPARK-16625: General data types to be mapped to Oracle") { + testIfEnabled("SPARK-16625: General data types to be mapped to Oracle") { val props = new Properties() props.put("oracle.jdbc.mapDateToTimestamp", "false") @@ -254,7 +254,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark assert(values.getTimestamp(10).equals(timestampVal)) } - test("SPARK-19318: connection property keys should be case-sensitive") { + testIfEnabled("SPARK-19318: connection property keys should be case-sensitive") { def checkRow(row: Row): Unit = { assert(row.getDecimal(0).equals(BigDecimal.valueOf(1))) assert(row.getDate(1).equals(Date.valueOf("1991-11-09"))) @@ -265,14 +265,14 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark checkRow(sql("SELECT * FROM datetime1 where id = 1").head()) } - test("SPARK-20557: column type TIMESTAMP with TIME ZONE should be recognized") { + testIfEnabled("SPARK-20557: column type TIMESTAMP with TIME ZONE should be recognized") { val dfRead = sqlContext.read.jdbc(jdbcUrl, "ts_with_timezone", new Properties) val rows = dfRead.collect() val types = rows(0).toSeq.map(x => x.getClass.toString) assert(types(1).equals("class java.sql.Timestamp")) } - test("Column type TIMESTAMP with SESSION_LOCAL_TIMEZONE is different from default") { + testIfEnabled("Column type TIMESTAMP with SESSION_LOCAL_TIMEZONE is different from default") { val defaultJVMTimeZone = TimeZone.getDefault // Pick the timezone different from the current default time zone of JVM val sofiaTimeZone = TimeZone.getTimeZone("Europe/Sofia") @@ -289,7 +289,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark } } - test("Column TIMESTAMP with TIME ZONE(JVM timezone)") { + testIfEnabled("Column TIMESTAMP with TIME ZONE(JVM timezone)") { def checkRow(row: Row, ts: String): Unit = { assert(row.getTimestamp(1).equals(Timestamp.valueOf(ts))) } @@ -312,7 +312,8 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark } } - test("SPARK-18004: Make sure date or timestamp related predicate is pushed down correctly") { + testIfEnabled( + "SPARK-18004: Make sure date or timestamp related predicate is pushed down correctly") { val props = new Properties() props.put("oracle.jdbc.mapDateToTimestamp", "false") @@ -360,7 +361,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark assert(row.getTimestamp(1).equals(timestampVal)) } - test("SPARK-20427/SPARK-20921: read table use custom schema by jdbc api") { + testIfEnabled("SPARK-20427/SPARK-20921: read table use custom schema by jdbc api") { // default will throw IllegalArgumentException val e = intercept[org.apache.spark.SparkException] { spark.read.jdbc(jdbcUrl, "tableWithCustomSchema", new Properties()).collect() @@ -388,7 +389,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark assert(values.getBoolean(2) == false) } - test("SPARK-22303: handle BINARY_DOUBLE and BINARY_FLOAT as DoubleType and FloatType") { + testIfEnabled("SPARK-22303: handle BINARY_DOUBLE and BINARY_FLOAT as DoubleType and FloatType") { val tableName = "oracle_types" val schema = StructType(Seq( StructField("d", DoubleType, true), @@ -416,7 +417,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark assert(values.getFloat(1) === 2.2f) } - test("SPARK-22814 support date/timestamp types in partitionColumn") { + testIfEnabled("SPARK-22814 support date/timestamp types in partitionColumn") { val expectedResult = Set( (1, "2018-07-06", "2018-07-06 05:50:00"), (2, "2018-07-06", "2018-07-06 08:10:08"), @@ -475,7 +476,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark assert(df2.collect.toSet === expectedResult) } - test("query JDBC option") { + testIfEnabled("query JDBC option") { val expectedResult = Set( (1, "1991-11-09", "1996-01-01 01:23:45") ).map { case (id, date, timestamp) => @@ -503,7 +504,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark assert(sql("select id, d, t from queryOption").collect.toSet == expectedResult) } - test("SPARK-32992: map Oracle's ROWID type to StringType") { + testIfEnabled("SPARK-32992: map Oracle's ROWID type to StringType") { val rows = spark.read.format("jdbc") .option("url", jdbcUrl) .option("query", "SELECT ROWID from datetime") diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala index 39450d1f5679..f012175d53c3 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala @@ -140,7 +140,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { "('$1,000.00')").executeUpdate() } - test("Type mapping for various types") { + testIfEnabled("Type mapping for various types") { val df = sqlContext.read.jdbc(jdbcUrl, "bar", new Properties) val rows = df.collect().sortBy(_.toString()) assert(rows.length == 2) @@ -240,7 +240,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { assert(0.until(16).forall(rows(1).isNullAt(_))) } - test("Basic write test") { + testIfEnabled("Basic write test") { val df = sqlContext.read.jdbc(jdbcUrl, "bar", new Properties) // Test only that it doesn't crash. df.write.jdbc(jdbcUrl, "public.barcopy", new Properties) @@ -253,7 +253,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { }: _*).write.jdbc(jdbcUrl, "public.barcopy2", new Properties) } - test("Creating a table with shorts and floats") { + testIfEnabled("Creating a table with shorts and floats") { sqlContext.createDataFrame(Seq((1.0f, 1.toShort))) .write.jdbc(jdbcUrl, "shortfloat", new Properties) val schema = sqlContext.read.jdbc(jdbcUrl, "shortfloat", new Properties).schema @@ -261,7 +261,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { assert(schema(1).dataType == ShortType) } - test("SPARK-20557: column type TIMESTAMP with TIME ZONE and TIME with TIME ZONE " + + testIfEnabled("SPARK-20557: column type TIMESTAMP with TIME ZONE and TIME with TIME ZONE " + "should be recognized") { // When using JDBC to read the columns of TIMESTAMP with TIME ZONE and TIME with TIME ZONE // the actual types are java.sql.Types.TIMESTAMP and java.sql.Types.TIME @@ -272,7 +272,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { assert(types(2).equals("class java.sql.Timestamp")) } - test("SPARK-22291: Conversion error when transforming array types of " + + testIfEnabled("SPARK-22291: Conversion error when transforming array types of " + "uuid, inet and cidr to StingType in PostgreSQL") { val df = sqlContext.read.jdbc(jdbcUrl, "st_with_array", new Properties) val rows = df.collect() @@ -310,7 +310,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { assert(rows(0).getSeq(26) == Seq("10:20:10,14,15")) } - test("query JDBC option") { + testIfEnabled("query JDBC option") { val expectedResult = Set( (42, 123456789012345L) ).map { case (c1, c3) => @@ -335,7 +335,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { assert(sql("select c1, c3 from queryOption").collect.toSet == expectedResult) } - test("write byte as smallint") { + testIfEnabled("write byte as smallint") { sqlContext.createDataFrame(Seq((1.toByte, 2.toShort))) .write.jdbc(jdbcUrl, "byte_to_smallint_test", new Properties) val df = sqlContext.read.jdbc(jdbcUrl, "byte_to_smallint_test", new Properties) @@ -348,7 +348,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { assert(rows(0).getShort(1) === 2) } - test("character type tests") { + testIfEnabled("character type tests") { val df = sqlContext.read.jdbc(jdbcUrl, "char_types", new Properties) val row = df.collect() assert(row.length == 1) @@ -360,7 +360,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { assert(row(0).getString(4) === "q") } - test("SPARK-32576: character array type tests") { + testIfEnabled("SPARK-32576: character array type tests") { val df = sqlContext.read.jdbc(jdbcUrl, "char_array_types", new Properties) val row = df.collect() assert(row.length == 1) @@ -372,7 +372,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { assert(row(0).getSeq[String](4) === Seq("q", "r")) } - test("SPARK-34333: money type tests") { + testIfEnabled("SPARK-34333: money type tests") { val df = sqlContext.read.jdbc(jdbcUrl, "money_types", new Properties) val row = df.collect() assert(row.length === 1) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala index 979b0784f044..e31bc48fd8a4 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala @@ -24,16 +24,17 @@ import org.apache.log4j.Level import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.connector.catalog.NamespaceChange import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog +import org.apache.spark.sql.jdbc.DockerIntegrationFunSuite import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.tags.DockerTest @DockerTest -private[v2] trait V2JDBCNamespaceTest extends SharedSparkSession { +private[v2] trait V2JDBCNamespaceTest extends SharedSparkSession with DockerIntegrationFunSuite { val catalog = new JDBCTableCatalog() def builtinNamespaces: Array[Array[String]] - test("listNamespaces: basic behavior") { + testIfEnabled("listNamespaces: basic behavior") { catalog.createNamespace(Array("foo"), Map("comment" -> "test comment").asJava) assert(catalog.listNamespaces() === Array(Array("foo")) ++ builtinNamespaces) assert(catalog.listNamespaces(Array("foo")) === Array()) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala index 3807eb732a1a..a76623429477 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala @@ -20,12 +20,13 @@ package org.apache.spark.sql.jdbc.v2 import org.apache.log4j.Level import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.jdbc.DockerIntegrationFunSuite import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types._ import org.apache.spark.tags.DockerTest @DockerTest -private[v2] trait V2JDBCTest extends SharedSparkSession { +private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFunSuite { val catalogName: String // dialect specific update column type test def testUpdateColumnType(tbl: String): Unit @@ -62,7 +63,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession { def testCreateTableWithProperty(tbl: String): Unit = {} - test("SPARK-33034: ALTER TABLE ... add new columns") { + testIfEnabled("SPARK-33034: ALTER TABLE ... add new columns") { withTable(s"$catalogName.alt_table") { sql(s"CREATE TABLE $catalogName.alt_table (ID STRING)") var t = spark.table(s"$catalogName.alt_table") @@ -90,7 +91,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession { assert(msg.contains("Table not found")) } - test("SPARK-33034: ALTER TABLE ... drop column") { + testIfEnabled("SPARK-33034: ALTER TABLE ... drop column") { withTable(s"$catalogName.alt_table") { sql(s"CREATE TABLE $catalogName.alt_table (C1 INTEGER, C2 STRING, c3 INTEGER)") sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN C1") @@ -111,7 +112,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession { assert(msg.contains("Table not found")) } - test("SPARK-33034: ALTER TABLE ... update column type") { + testIfEnabled("SPARK-33034: ALTER TABLE ... update column type") { withTable(s"$catalogName.alt_table") { testUpdateColumnType(s"$catalogName.alt_table") // Update not existing column @@ -127,7 +128,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession { assert(msg.contains("Table not found")) } - test("SPARK-33034: ALTER TABLE ... rename column") { + testIfEnabled("SPARK-33034: ALTER TABLE ... rename column") { withTable(s"$catalogName.alt_table") { sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL," + s" ID1 STRING NOT NULL, ID2 STRING NOT NULL)") @@ -145,7 +146,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession { assert(msg.contains("Table not found")) } - test("SPARK-33034: ALTER TABLE ... update column nullability") { + testIfEnabled("SPARK-33034: ALTER TABLE ... update column nullability") { withTable(s"$catalogName.alt_table") { testUpdateColumnNullability(s"$catalogName.alt_table") } @@ -156,7 +157,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession { assert(msg.contains("Table not found")) } - test("CREATE TABLE with table comment") { + testIfEnabled("CREATE TABLE with table comment") { withTable(s"$catalogName.new_table") { val logAppender = new LogAppender("table comment") withLogAppender(logAppender) { @@ -170,7 +171,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession { } } - test("CREATE TABLE with table property") { + testIfEnabled("CREATE TABLE with table property") { withTable(s"$catalogName.new_table") { val m = intercept[AnalysisException] { sql(s"CREATE TABLE $catalogName.new_table (i INT) TBLPROPERTIES('a'='1')") From 576d341aef5cff85cc07a6bea73ca6e7a79a3b16 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Thu, 27 May 2021 14:32:19 +0900 Subject: [PATCH 07/14] Change with simpler way. --- .github/workflows/build_and_test.yml | 3 +-- dev/run-tests.py | 4 --- dev/sparktestsupport/modules.py | 3 +++ .../spark/sql/jdbc/DB2IntegrationSuite.scala | 14 +++++----- .../sql/jdbc/DB2KrbIntegrationSuite.scala | 2 +- .../sql/jdbc/DockerIntegrationFunSuite.scala | 10 ++++--- .../sql/jdbc/DockerJDBCIntegrationSuite.scala | 4 +-- .../jdbc/DockerKrbJDBCIntegrationSuite.scala | 12 ++++----- .../sql/jdbc/MariaDBKrbIntegrationSuite.scala | 2 +- .../jdbc/MsSqlServerIntegrationSuite.scala | 14 +++++----- .../sql/jdbc/MySQLIntegrationSuite.scala | 14 +++++----- .../sql/jdbc/OracleIntegrationSuite.scala | 27 ++++++++++--------- .../sql/jdbc/PostgresIntegrationSuite.scala | 22 +++++++-------- .../jdbc/PostgresKrbIntegrationSuite.scala | 2 +- .../sql/jdbc/v2/DB2IntegrationSuite.scala | 2 +- .../jdbc/v2/MsSqlServerIntegrationSuite.scala | 2 +- .../sql/jdbc/v2/MySQLIntegrationSuite.scala | 4 +-- .../sql/jdbc/v2/OracleIntegrationSuite.scala | 1 + .../jdbc/v2/PostgresIntegrationSuite.scala | 2 +- .../sql/jdbc/v2/PostgresNamespaceSuite.scala | 2 +- .../sql/jdbc/v2/V2JDBCNamespaceTest.scala | 2 +- .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 14 +++++----- 22 files changed, 81 insertions(+), 81 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 3b45920571cf..a5aea0920a4e 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -635,7 +635,6 @@ jobs: GITHUB_PREV_SHA: ${{ github.event.before }} SPARK_LOCAL_IP: localhost ORACLE_DOCKER_IMAGE_NAME: oracle/database:18.4.0-xe - ENABLE_DOCKER_INTEGRATION_TESTS: 1 steps: - name: Checkout Spark repository uses: actions/checkout@v2 @@ -686,7 +685,7 @@ jobs: repository: oracle/docker-images ref: 3f422c4a35b423dfcdbcc57a84f01db6c82eb6c1 path: ./oracle/docker-images - - name: Install Oracle docker image + - name: Install Oracle Docker image run: | cd oracle/docker-images/OracleDatabase/SingleInstance/dockerfiles ./buildContainerImage.sh -v 18.4.0 -x diff --git a/dev/run-tests.py b/dev/run-tests.py index b0d06e79e580..3dee9077b685 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -689,10 +689,6 @@ def main(): test_modules = determine_modules_to_test(changed_modules) excluded_tags = determine_tags_to_exclude(changed_modules) - # With this script, disable docker integration by default. - if not os.environ.get("ENABLE_DOCKER_INTEGRATION_TESTS"): - os.environ["ENABLE_DOCKER_INTEGRATION_TESTS"] = "0" - # If there is no changed module found, tests all. if not changed_modules: changed_modules = [modules.root] diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index bcdef594016b..acc3b71c2b70 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -749,6 +749,9 @@ def __hash__(self): build_profile_flags=["-Pdocker-integration-tests"], source_file_regexes=["external/docker-integration-tests"], sbt_test_goals=["docker-integration-tests/test"], + environ={ + "ENABLE_DOCKER_INTEGRATION_TESTS": "1" + }, test_tags=[ "org.apache.spark.tags.DockerTest" ] diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala index b305dc75ddbe..77d72547ae75 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala @@ -31,7 +31,7 @@ import org.apache.spark.tags.DockerTest /** * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0): * {{{ - * DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0 + * ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0 * ./build/sbt -Pdocker-integration-tests * "testOnly org.apache.spark.sql.jdbc.DB2IntegrationSuite" * }}} @@ -80,7 +80,7 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite { + "'Kathy')").executeUpdate() } - testIfEnabled("Basic test") { + test("Basic test") { val df = sqlContext.read.jdbc(jdbcUrl, "tbl", new Properties) val rows = df.collect() assert(rows.length == 2) @@ -90,7 +90,7 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite { assert(types(1).equals("class java.lang.String")) } - testIfEnabled("Numeric types") { + test("Numeric types") { val df = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties) val rows = df.collect() assert(rows.length == 1) @@ -119,7 +119,7 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite { assert(rows(0).getDecimal(9) == new BigDecimal("1234567891234567.123456789123456789")) } - testIfEnabled("Date types") { + test("Date types") { withDefaultTimeZone(UTC) { val df = sqlContext.read.jdbc(jdbcUrl, "dates", new Properties) val rows = df.collect() @@ -135,7 +135,7 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite { } } - testIfEnabled("String types") { + test("String types") { val df = sqlContext.read.jdbc(jdbcUrl, "strings", new Properties) val rows = df.collect() assert(rows.length == 1) @@ -152,7 +152,7 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite { assert(rows(0).getString(4).equals("""Kathy""")) } - testIfEnabled("Basic write test") { + test("Basic write test") { // cast decflt column with precision value of 38 to DB2 max decimal precision value of 31. val df1 = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties) .selectExpr("small", "med", "big", "deci", "flt", "dbl", "real", @@ -173,7 +173,7 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite { assert(rows(0).getString(2) == "1") } - testIfEnabled("query JDBC option") { + test("query JDBC option") { val expectedResult = Set( (42, "fred"), (17, "dave") diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala index f79809f355c0..8d398d11d603 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala @@ -33,7 +33,7 @@ import org.apache.spark.tags.DockerTest /** * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0): * {{{ - * DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0 + * ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0 * ./build/sbt -Pdocker-integration-tests "testOnly *DB2KrbIntegrationSuite" * }}} */ diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerIntegrationFunSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerIntegrationFunSuite.scala index d6edcbc01fe5..d8d20c3988f6 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerIntegrationFunSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerIntegrationFunSuite.scala @@ -17,6 +17,9 @@ package org.apache.spark.sql.jdbc +import org.scalactic.source.Position +import org.scalatest.Tag + import org.apache.spark.SparkFunSuite /** @@ -25,15 +28,16 @@ import org.apache.spark.SparkFunSuite */ trait DockerIntegrationFunSuite extends SparkFunSuite { private val envVarNameForEnablingTests = "ENABLE_DOCKER_INTEGRATION_TESTS" - private val shouldRunTests = sys.env.getOrElse(envVarNameForEnablingTests, "1") match { + private val shouldRunTests = sys.env.getOrElse(envVarNameForEnablingTests, "0") match { case "1" => true case _ => false } /** Run the test if environment variable is set or ignore the test */ - def testIfEnabled(testName: String)(testBody: => Unit): Unit = { + override def test(testName: String, testTags: Tag*)(testBody: => Any) + (implicit pos: Position): Unit = { if (shouldRunTests) { - test(testName)(testBody) + super.test(testName)(testBody) } else { ignore(s"$testName [enable by setting env var $envVarNameForEnablingTests=1]")(testBody) } diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala index f9199d8a5041..51aefd863431 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala @@ -115,8 +115,7 @@ abstract class DockerJDBCIntegrationSuite private var pulled: Boolean = false protected var jdbcUrl: String = _ - override def beforeAll(): Unit = { - runIfTestsEnabled(s"Prepare for $this.getClass.getName") { + override def beforeAll(): Unit = runIfTestsEnabled(s"Prepare for ${this.getClass.getName}") { super.beforeAll() try { docker = DefaultDockerClient.fromEnv.build() @@ -182,7 +181,6 @@ abstract class DockerJDBCIntegrationSuite throw e } } - } } override def afterAll(): Unit = { diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerKrbJDBCIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerKrbJDBCIntegrationSuite.scala index 289d2535736f..645030eaab30 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerKrbJDBCIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerKrbJDBCIntegrationSuite.scala @@ -41,8 +41,7 @@ abstract class DockerKrbJDBCIntegrationSuite extends DockerJDBCIntegrationSuite protected var keytabFullPath: String = _ protected def setAuthentication(keytabFile: String, principal: String): Unit - override def beforeAll(): Unit = { - runIfTestsEnabled(s"Prepare for ${this.getClass.getName}") { + override def beforeAll(): Unit = runIfTestsEnabled(s"Prepare for ${this.getClass.getName}") { SecurityUtils.setGlobalKrbDebug(true) val kdcDir = Utils.createTempDir() @@ -64,7 +63,6 @@ abstract class DockerKrbJDBCIntegrationSuite extends DockerJDBCIntegrationSuite // This must be executed intentionally later super.beforeAll() - } } override def afterAll(): Unit = { @@ -109,7 +107,7 @@ abstract class DockerKrbJDBCIntegrationSuite extends DockerJDBCIntegrationSuite conn.prepareStatement("INSERT INTO bar VALUES ('hello')").executeUpdate() } - testIfEnabled("Basic read test in query option") { + test("Basic read test in query option") { // This makes sure Spark must do authentication Configuration.setConfiguration(null) @@ -126,7 +124,7 @@ abstract class DockerKrbJDBCIntegrationSuite extends DockerJDBCIntegrationSuite assert(df.collect().toSet === expectedResult) } - testIfEnabled("Basic read test in create table path") { + test("Basic read test in create table path") { // This makes sure Spark must do authentication Configuration.setConfiguration(null) @@ -143,7 +141,7 @@ abstract class DockerKrbJDBCIntegrationSuite extends DockerJDBCIntegrationSuite assert(sql("select c0 from queryOption").collect().toSet === expectedResult) } - testIfEnabled("Basic write test") { + test("Basic write test") { // This makes sure Spark must do authentication Configuration.setConfiguration(null) @@ -164,7 +162,7 @@ abstract class DockerKrbJDBCIntegrationSuite extends DockerJDBCIntegrationSuite assert(rows(0).getString(1) === "bar") } - testIfEnabled("SPARK-35226: JDBCOption should accept refreshKrb5Config parameter") { + test("SPARK-35226: JDBCOption should accept refreshKrb5Config parameter") { // This makes sure Spark must do authentication Configuration.setConfiguration(null) withTempDir { dir => diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala index 9b653f81afe9..8e89d98dc823 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala @@ -27,7 +27,7 @@ import org.apache.spark.tags.DockerTest /** * To run this test suite for a specific version (e.g., mariadb:10.5.8): * {{{ - * MARIADB_DOCKER_IMAGE_NAME=mariadb:10.5.8 + * ENABLE_DOCKER_INTEGRATION_TESTS=1 MARIADB_DOCKER_IMAGE_NAME=mariadb:10.5.8 * ./build/sbt -Pdocker-integration-tests * "testOnly org.apache.spark.sql.jdbc.MariaDBKrbIntegrationSuite" * }}} diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala index f20c5cd9c2aa..e5adb1933b39 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala @@ -28,7 +28,7 @@ import org.apache.spark.tags.DockerTest /** * To run this test suite for a specific version (e.g., 2019-GA-ubuntu-16.04): * {{{ - * MSSQLSERVER_DOCKER_IMAGE_NAME=2019-GA-ubuntu-16.04 + * ENABLE_DOCKER_INTEGRATION_TESTS=1 MSSQLSERVER_DOCKER_IMAGE_NAME=2019-GA-ubuntu-16.04 * ./build/sbt -Pdocker-integration-tests * "testOnly org.apache.spark.sql.jdbc.MsSqlServerIntegrationSuite" * }}} @@ -141,7 +141,7 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite { """.stripMargin).executeUpdate() } - testIfEnabled("Basic test") { + test("Basic test") { val df = spark.read.jdbc(jdbcUrl, "tbl", new Properties) val rows = df.collect() assert(rows.length == 2) @@ -151,7 +151,7 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite { assert(types(1).equals("class java.lang.String")) } - testIfEnabled("Numeric types") { + test("Numeric types") { Seq(true, false).foreach { flag => withSQLConf(SQLConf.LEGACY_MSSQLSERVER_NUMERIC_MAPPING_ENABLED.key -> s"$flag") { val df = spark.read.jdbc(jdbcUrl, "numbers", new Properties) @@ -206,7 +206,7 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite { } } - testIfEnabled("Date types") { + test("Date types") { withDefaultTimeZone(UTC) { val df = spark.read.jdbc(jdbcUrl, "dates", new Properties) val rows = df.collect() @@ -229,7 +229,7 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite { } } - testIfEnabled("String types") { + test("String types") { val df = spark.read.jdbc(jdbcUrl, "strings", new Properties) val rows = df.collect() assert(rows.length == 1) @@ -258,7 +258,7 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite { assert(java.util.Arrays.equals(row.getAs[Array[Byte]](8), Array[Byte](100, 111, 103))) } - testIfEnabled("Basic write test") { + test("Basic write test") { val df1 = spark.read.jdbc(jdbcUrl, "numbers", new Properties) val df2 = spark.read.jdbc(jdbcUrl, "dates", new Properties) val df3 = spark.read.jdbc(jdbcUrl, "strings", new Properties) @@ -267,7 +267,7 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite { df3.write.jdbc(jdbcUrl, "stringscopy", new Properties) } - testIfEnabled("SPARK-33813: MsSqlServerDialect should support spatial types") { + test("SPARK-33813: MsSqlServerDialect should support spatial types") { val df = spark.read.jdbc(jdbcUrl, "spatials", new Properties) val rows = df.collect() assert(rows.length == 1) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala index c0eecb509cea..e613163e606f 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala @@ -28,7 +28,7 @@ import org.apache.spark.tags.DockerTest /** * To run this test suite for a specific version (e.g., mysql:5.7.31): * {{{ - * MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.31 + * ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.31 * ./build/sbt -Pdocker-integration-tests * "testOnly org.apache.spark.sql.jdbc.MySQLIntegrationSuite" * }}} @@ -74,7 +74,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { "'jumps', 'over', 'the', 'lazy', 'dog')").executeUpdate() } - testIfEnabled("Basic test") { + test("Basic test") { val df = sqlContext.read.jdbc(jdbcUrl, "tbl", new Properties) val rows = df.collect() assert(rows.length == 2) @@ -84,7 +84,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { assert(types(1).equals("class java.lang.String")) } - testIfEnabled("Numeric types") { + test("Numeric types") { val df = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties) val rows = df.collect() assert(rows.length == 1) @@ -111,7 +111,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { assert(rows(0).getDouble(8) == 1.0000000000000002) } - testIfEnabled("Date types") { + test("Date types") { withDefaultTimeZone(UTC) { val df = sqlContext.read.jdbc(jdbcUrl, "dates", new Properties) val rows = df.collect() @@ -132,7 +132,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { } } - testIfEnabled("String types") { + test("String types") { val df = sqlContext.read.jdbc(jdbcUrl, "strings", new Properties) val rows = df.collect() assert(rows.length == 1) @@ -158,7 +158,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](8), Array[Byte](100, 111, 103))) } - testIfEnabled("Basic write test") { + test("Basic write test") { val df1 = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties) val df2 = sqlContext.read.jdbc(jdbcUrl, "dates", new Properties) val df3 = sqlContext.read.jdbc(jdbcUrl, "strings", new Properties) @@ -167,7 +167,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { df3.write.jdbc(jdbcUrl, "stringscopy", new Properties) } - testIfEnabled("query JDBC option") { + test("query JDBC option") { val expectedResult = Set( (42, "fred"), (17, "dave") diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala index b47e9200ac88..e92107fa7f42 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala @@ -50,6 +50,7 @@ import org.apache.spark.tags.DockerTest * $ cd docker-images/OracleDatabase/SingleInstance/dockerfiles * $ ./buildContainerImage.sh -v 18.4.0 -x * $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:18.4.0-xe + * $ export ENABLE_DOCKER_INTEGRATION_TESTS=1 * $ cd $SPARK_HOME * $ ./build/sbt -Pdocker-integration-tests * "testOnly org.apache.spark.sql.jdbc.OracleIntegrationSuite" @@ -151,7 +152,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark conn.commit() } - testIfEnabled("SPARK-16625 : Importing Oracle numeric types") { + test("SPARK-16625 : Importing Oracle numeric types") { val df = sqlContext.read.jdbc(jdbcUrl, "numerics", new Properties) val rows = df.collect() assert(rows.size == 1) @@ -167,7 +168,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark } - testIfEnabled("SPARK-12941: String datatypes to be mapped to Varchar in Oracle") { + test("SPARK-12941: String datatypes to be mapped to Varchar in Oracle") { // create a sample dataframe with string type val df1 = sparkContext.parallelize(Seq(("foo"))).toDF("x") // write the dataframe to the oracle table tbl @@ -183,7 +184,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark assert(rows(0).getString(0).equals("foo")) } - testIfEnabled("SPARK-16625: General data types to be mapped to Oracle") { + test("SPARK-16625: General data types to be mapped to Oracle") { val props = new Properties() props.put("oracle.jdbc.mapDateToTimestamp", "false") @@ -254,7 +255,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark assert(values.getTimestamp(10).equals(timestampVal)) } - testIfEnabled("SPARK-19318: connection property keys should be case-sensitive") { + test("SPARK-19318: connection property keys should be case-sensitive") { def checkRow(row: Row): Unit = { assert(row.getDecimal(0).equals(BigDecimal.valueOf(1))) assert(row.getDate(1).equals(Date.valueOf("1991-11-09"))) @@ -265,14 +266,14 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark checkRow(sql("SELECT * FROM datetime1 where id = 1").head()) } - testIfEnabled("SPARK-20557: column type TIMESTAMP with TIME ZONE should be recognized") { + test("SPARK-20557: column type TIMESTAMP with TIME ZONE should be recognized") { val dfRead = sqlContext.read.jdbc(jdbcUrl, "ts_with_timezone", new Properties) val rows = dfRead.collect() val types = rows(0).toSeq.map(x => x.getClass.toString) assert(types(1).equals("class java.sql.Timestamp")) } - testIfEnabled("Column type TIMESTAMP with SESSION_LOCAL_TIMEZONE is different from default") { + test("Column type TIMESTAMP with SESSION_LOCAL_TIMEZONE is different from default") { val defaultJVMTimeZone = TimeZone.getDefault // Pick the timezone different from the current default time zone of JVM val sofiaTimeZone = TimeZone.getTimeZone("Europe/Sofia") @@ -289,7 +290,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark } } - testIfEnabled("Column TIMESTAMP with TIME ZONE(JVM timezone)") { + test("Column TIMESTAMP with TIME ZONE(JVM timezone)") { def checkRow(row: Row, ts: String): Unit = { assert(row.getTimestamp(1).equals(Timestamp.valueOf(ts))) } @@ -312,7 +313,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark } } - testIfEnabled( + test( "SPARK-18004: Make sure date or timestamp related predicate is pushed down correctly") { val props = new Properties() props.put("oracle.jdbc.mapDateToTimestamp", "false") @@ -361,7 +362,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark assert(row.getTimestamp(1).equals(timestampVal)) } - testIfEnabled("SPARK-20427/SPARK-20921: read table use custom schema by jdbc api") { + test("SPARK-20427/SPARK-20921: read table use custom schema by jdbc api") { // default will throw IllegalArgumentException val e = intercept[org.apache.spark.SparkException] { spark.read.jdbc(jdbcUrl, "tableWithCustomSchema", new Properties()).collect() @@ -389,7 +390,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark assert(values.getBoolean(2) == false) } - testIfEnabled("SPARK-22303: handle BINARY_DOUBLE and BINARY_FLOAT as DoubleType and FloatType") { + test("SPARK-22303: handle BINARY_DOUBLE and BINARY_FLOAT as DoubleType and FloatType") { val tableName = "oracle_types" val schema = StructType(Seq( StructField("d", DoubleType, true), @@ -417,7 +418,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark assert(values.getFloat(1) === 2.2f) } - testIfEnabled("SPARK-22814 support date/timestamp types in partitionColumn") { + test("SPARK-22814 support date/timestamp types in partitionColumn") { val expectedResult = Set( (1, "2018-07-06", "2018-07-06 05:50:00"), (2, "2018-07-06", "2018-07-06 08:10:08"), @@ -476,7 +477,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark assert(df2.collect.toSet === expectedResult) } - testIfEnabled("query JDBC option") { + test("query JDBC option") { val expectedResult = Set( (1, "1991-11-09", "1996-01-01 01:23:45") ).map { case (id, date, timestamp) => @@ -504,7 +505,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark assert(sql("select id, d, t from queryOption").collect.toSet == expectedResult) } - testIfEnabled("SPARK-32992: map Oracle's ROWID type to StringType") { + test("SPARK-32992: map Oracle's ROWID type to StringType") { val rows = spark.read.format("jdbc") .option("url", jdbcUrl) .option("query", "SELECT ROWID from datetime") diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala index f012175d53c3..76149e87986b 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala @@ -31,7 +31,7 @@ import org.apache.spark.tags.DockerTest /** * To run this test suite for a specific version (e.g., postgres:13.0): * {{{ - * POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0 + * ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0 * ./build/sbt -Pdocker-integration-tests * "testOnly org.apache.spark.sql.jdbc.PostgresIntegrationSuite" * }}} @@ -140,7 +140,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { "('$1,000.00')").executeUpdate() } - testIfEnabled("Type mapping for various types") { + test("Type mapping for various types") { val df = sqlContext.read.jdbc(jdbcUrl, "bar", new Properties) val rows = df.collect().sortBy(_.toString()) assert(rows.length == 2) @@ -240,7 +240,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { assert(0.until(16).forall(rows(1).isNullAt(_))) } - testIfEnabled("Basic write test") { + test("Basic write test") { val df = sqlContext.read.jdbc(jdbcUrl, "bar", new Properties) // Test only that it doesn't crash. df.write.jdbc(jdbcUrl, "public.barcopy", new Properties) @@ -253,7 +253,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { }: _*).write.jdbc(jdbcUrl, "public.barcopy2", new Properties) } - testIfEnabled("Creating a table with shorts and floats") { + test("Creating a table with shorts and floats") { sqlContext.createDataFrame(Seq((1.0f, 1.toShort))) .write.jdbc(jdbcUrl, "shortfloat", new Properties) val schema = sqlContext.read.jdbc(jdbcUrl, "shortfloat", new Properties).schema @@ -261,7 +261,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { assert(schema(1).dataType == ShortType) } - testIfEnabled("SPARK-20557: column type TIMESTAMP with TIME ZONE and TIME with TIME ZONE " + + test("SPARK-20557: column type TIMESTAMP with TIME ZONE and TIME with TIME ZONE " + "should be recognized") { // When using JDBC to read the columns of TIMESTAMP with TIME ZONE and TIME with TIME ZONE // the actual types are java.sql.Types.TIMESTAMP and java.sql.Types.TIME @@ -272,7 +272,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { assert(types(2).equals("class java.sql.Timestamp")) } - testIfEnabled("SPARK-22291: Conversion error when transforming array types of " + + test("SPARK-22291: Conversion error when transforming array types of " + "uuid, inet and cidr to StingType in PostgreSQL") { val df = sqlContext.read.jdbc(jdbcUrl, "st_with_array", new Properties) val rows = df.collect() @@ -310,7 +310,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { assert(rows(0).getSeq(26) == Seq("10:20:10,14,15")) } - testIfEnabled("query JDBC option") { + test("query JDBC option") { val expectedResult = Set( (42, 123456789012345L) ).map { case (c1, c3) => @@ -335,7 +335,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { assert(sql("select c1, c3 from queryOption").collect.toSet == expectedResult) } - testIfEnabled("write byte as smallint") { + test("write byte as smallint") { sqlContext.createDataFrame(Seq((1.toByte, 2.toShort))) .write.jdbc(jdbcUrl, "byte_to_smallint_test", new Properties) val df = sqlContext.read.jdbc(jdbcUrl, "byte_to_smallint_test", new Properties) @@ -348,7 +348,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { assert(rows(0).getShort(1) === 2) } - testIfEnabled("character type tests") { + test("character type tests") { val df = sqlContext.read.jdbc(jdbcUrl, "char_types", new Properties) val row = df.collect() assert(row.length == 1) @@ -360,7 +360,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { assert(row(0).getString(4) === "q") } - testIfEnabled("SPARK-32576: character array type tests") { + test("SPARK-32576: character array type tests") { val df = sqlContext.read.jdbc(jdbcUrl, "char_array_types", new Properties) val row = df.collect() assert(row.length == 1) @@ -372,7 +372,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { assert(row(0).getSeq[String](4) === Seq("q", "r")) } - testIfEnabled("SPARK-34333: money type tests") { + test("SPARK-34333: money type tests") { val df = sqlContext.read.jdbc(jdbcUrl, "money_types", new Properties) val row = df.collect() assert(row.length === 1) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala index 1198ba8a3e46..a0e1950cd68e 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala @@ -27,7 +27,7 @@ import org.apache.spark.tags.DockerTest /** * To run this test suite for a specific version (e.g., postgres:13.0): * {{{ - * POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0 + * ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0 * ./build/sbt -Pdocker-integration-tests "testOnly *PostgresKrbIntegrationSuite" * }}} */ diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala index 3b8008aca1c9..012bdae5ffbd 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala @@ -31,7 +31,7 @@ import org.apache.spark.tags.DockerTest /** * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0): * {{{ - * DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0 + * ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0 * ./build/sbt -Pdocker-integration-tests "testOnly *v2.DB2IntegrationSuite" * }}} */ diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala index a756516457d1..083fa3c2a663 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala @@ -31,7 +31,7 @@ import org.apache.spark.tags.DockerTest /** * To run this test suite for a specific version (e.g., 2019-GA-ubuntu-16.04): * {{{ - * MSSQLSERVER_DOCKER_IMAGE_NAME=2019-GA-ubuntu-16.04 + * ENABLE_DOCKER_INTEGRATION_TESTS=1 MSSQLSERVER_DOCKER_IMAGE_NAME=2019-GA-ubuntu-16.04 * ./build/sbt -Pdocker-integration-tests "testOnly *v2*MsSqlServerIntegrationSuite" * }}} */ diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala index a567ab3b8236..9a9bda56dd88 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala @@ -32,8 +32,8 @@ import org.apache.spark.tags.DockerTest * * To run this test suite for a specific version (e.g., mysql:5.7.31): * {{{ - * MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.31 - * ./build/sbt -Pdocker-integration-tests "testOnly *v2*MySQLIntegrationSuite" + * ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.31 + * ./build/sbt -Pdocker-integration-tests "testOnly *v2*MySQLIntegrationSuite" * * }}} * diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala index f087f0616e92..d77b216f2b7a 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala @@ -45,6 +45,7 @@ import org.apache.spark.tags.DockerTest * $ cd docker-images/OracleDatabase/SingleInstance/dockerfiles * $ ./buildContainerImage.sh -v 18.4.0 -x * $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:18.4.0-xe + * $ export ENABLE_DOCKER_INTEGRATION_TESTS=1 * $ cd $SPARK_HOME * $ ./build/sbt -Pdocker-integration-tests * "testOnly org.apache.spark.sql.jdbc.v2.OracleIntegrationSuite" diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala index eded03afdaa3..386a7ad6b30e 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala @@ -29,7 +29,7 @@ import org.apache.spark.tags.DockerTest /** * To run this test suite for a specific version (e.g., postgres:13.0): * {{{ - * POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0 + * ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0 * ./build/sbt -Pdocker-integration-tests "testOnly *v2.PostgresIntegrationSuite" * }}} */ diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala index e534df84ce6f..b5cf3dfcb474 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala @@ -28,7 +28,7 @@ import org.apache.spark.tags.DockerTest /** * To run this test suite for a specific version (e.g., postgres:13.0): * {{{ - * POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0 + * ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0 * ./build/sbt -Pdocker-integration-tests "testOnly *v2.PostgresNamespaceSuite" * }}} */ diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala index e31bc48fd8a4..95d59fec2fac 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala @@ -34,7 +34,7 @@ private[v2] trait V2JDBCNamespaceTest extends SharedSparkSession with DockerInte def builtinNamespaces: Array[Array[String]] - testIfEnabled("listNamespaces: basic behavior") { + test("listNamespaces: basic behavior") { catalog.createNamespace(Array("foo"), Map("comment" -> "test comment").asJava) assert(catalog.listNamespaces() === Array(Array("foo")) ++ builtinNamespaces) assert(catalog.listNamespaces(Array("foo")) === Array()) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala index a76623429477..aaca1cc11d75 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala @@ -63,7 +63,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu def testCreateTableWithProperty(tbl: String): Unit = {} - testIfEnabled("SPARK-33034: ALTER TABLE ... add new columns") { + test("SPARK-33034: ALTER TABLE ... add new columns") { withTable(s"$catalogName.alt_table") { sql(s"CREATE TABLE $catalogName.alt_table (ID STRING)") var t = spark.table(s"$catalogName.alt_table") @@ -91,7 +91,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu assert(msg.contains("Table not found")) } - testIfEnabled("SPARK-33034: ALTER TABLE ... drop column") { + test("SPARK-33034: ALTER TABLE ... drop column") { withTable(s"$catalogName.alt_table") { sql(s"CREATE TABLE $catalogName.alt_table (C1 INTEGER, C2 STRING, c3 INTEGER)") sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN C1") @@ -112,7 +112,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu assert(msg.contains("Table not found")) } - testIfEnabled("SPARK-33034: ALTER TABLE ... update column type") { + test("SPARK-33034: ALTER TABLE ... update column type") { withTable(s"$catalogName.alt_table") { testUpdateColumnType(s"$catalogName.alt_table") // Update not existing column @@ -128,7 +128,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu assert(msg.contains("Table not found")) } - testIfEnabled("SPARK-33034: ALTER TABLE ... rename column") { + test("SPARK-33034: ALTER TABLE ... rename column") { withTable(s"$catalogName.alt_table") { sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL," + s" ID1 STRING NOT NULL, ID2 STRING NOT NULL)") @@ -146,7 +146,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu assert(msg.contains("Table not found")) } - testIfEnabled("SPARK-33034: ALTER TABLE ... update column nullability") { + test("SPARK-33034: ALTER TABLE ... update column nullability") { withTable(s"$catalogName.alt_table") { testUpdateColumnNullability(s"$catalogName.alt_table") } @@ -157,7 +157,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu assert(msg.contains("Table not found")) } - testIfEnabled("CREATE TABLE with table comment") { + test("CREATE TABLE with table comment") { withTable(s"$catalogName.new_table") { val logAppender = new LogAppender("table comment") withLogAppender(logAppender) { @@ -171,7 +171,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu } } - testIfEnabled("CREATE TABLE with table property") { + test("CREATE TABLE with table property") { withTable(s"$catalogName.new_table") { val m = intercept[AnalysisException] { sql(s"CREATE TABLE $catalogName.new_table (i INT) TBLPROPERTIES('a'='1')") From a96f2c58ce60b54ab0585e70947b68291a7584f8 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Thu, 27 May 2021 16:10:35 +0900 Subject: [PATCH 08/14] Fix indentation. --- .../sql/jdbc/DockerJDBCIntegrationSuite.scala | 121 +++++++++--------- .../jdbc/DockerKrbJDBCIntegrationSuite.scala | 32 ++--- 2 files changed, 76 insertions(+), 77 deletions(-) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala index 51aefd863431..e7bd05139c7a 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala @@ -116,71 +116,70 @@ abstract class DockerJDBCIntegrationSuite protected var jdbcUrl: String = _ override def beforeAll(): Unit = runIfTestsEnabled(s"Prepare for ${this.getClass.getName}") { - super.beforeAll() + super.beforeAll() + try { + docker = DefaultDockerClient.fromEnv.build() + // Check that Docker is actually up try { - docker = DefaultDockerClient.fromEnv.build() - // Check that Docker is actually up - try { - docker.ping() - } catch { - case NonFatal(e) => - log.error("Exception while connecting to Docker. Check whether Docker is running.") - throw e - } - // Ensure that the Docker image is installed: - try { - docker.inspectImage(db.imageName) - } catch { - case e: ImageNotFoundException => - log.warn(s"Docker image ${db.imageName} not found; pulling image from registry") - docker.pull(db.imageName) - pulled = true - } - val hostConfigBuilder = HostConfig.builder() - .privileged(db.privileged) - .networkMode("bridge") - .ipcMode(if (db.usesIpc) "host" else "") - .portBindings( - Map(s"${db.jdbcPort}/tcp" -> - List(PortBinding.of(dockerIp, externalPort)).asJava).asJava) - // Create the database container: - val containerConfigBuilder = ContainerConfig.builder() - .image(db.imageName) - .networkDisabled(false) - .env(db.env.map { case (k, v) => s"$k=$v" }.toSeq.asJava) - .exposedPorts(s"${db.jdbcPort}/tcp") - if (db.getEntryPoint.isDefined) { - containerConfigBuilder.entrypoint(db.getEntryPoint.get) - } - if (db.getStartupProcessName.isDefined) { - containerConfigBuilder.cmd(db.getStartupProcessName.get) - } - db.beforeContainerStart(hostConfigBuilder, containerConfigBuilder) - containerConfigBuilder.hostConfig(hostConfigBuilder.build()) - val config = containerConfigBuilder.build() - // Create the database container: - containerId = docker.createContainer(config).id - // Start the container and wait until the database can accept JDBC connections: - docker.startContainer(containerId) - jdbcUrl = db.getJdbcUrl(dockerIp, externalPort) - var conn: Connection = null - eventually(connectionTimeout, interval(1.second)) { - conn = getConnection() - } - // Run any setup queries: - try { - dataPreparation(conn) - } finally { - conn.close() - } + docker.ping() } catch { case NonFatal(e) => - try { - afterAll() - } finally { - throw e - } + log.error("Exception while connecting to Docker. Check whether Docker is running.") + throw e } + // Ensure that the Docker image is installed: + try { + docker.inspectImage(db.imageName) + } catch { + case e: ImageNotFoundException => + log.warn(s"Docker image ${db.imageName} not found; pulling image from registry") + docker.pull(db.imageName) + pulled = true + } + val hostConfigBuilder = HostConfig.builder() + .privileged(db.privileged) + .networkMode("bridge") + .ipcMode(if (db.usesIpc) "host" else "") + .portBindings( + Map(s"${db.jdbcPort}/tcp" -> List(PortBinding.of(dockerIp, externalPort)).asJava).asJava) + // Create the database container: + val containerConfigBuilder = ContainerConfig.builder() + .image(db.imageName) + .networkDisabled(false) + .env(db.env.map { case (k, v) => s"$k=$v" }.toSeq.asJava) + .exposedPorts(s"${db.jdbcPort}/tcp") + if (db.getEntryPoint.isDefined) { + containerConfigBuilder.entrypoint(db.getEntryPoint.get) + } + if (db.getStartupProcessName.isDefined) { + containerConfigBuilder.cmd(db.getStartupProcessName.get) + } + db.beforeContainerStart(hostConfigBuilder, containerConfigBuilder) + containerConfigBuilder.hostConfig(hostConfigBuilder.build()) + val config = containerConfigBuilder.build() + // Create the database container: + containerId = docker.createContainer(config).id + // Start the container and wait until the database can accept JDBC connections: + docker.startContainer(containerId) + jdbcUrl = db.getJdbcUrl(dockerIp, externalPort) + var conn: Connection = null + eventually(connectionTimeout, interval(1.second)) { + conn = getConnection() + } + // Run any setup queries: + try { + dataPreparation(conn) + } finally { + conn.close() + } + } catch { + case NonFatal(e) => + try { + afterAll() + } finally { + throw e + } + } } override def afterAll(): Unit = { diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerKrbJDBCIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerKrbJDBCIntegrationSuite.scala index 645030eaab30..30cca3bc1f25 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerKrbJDBCIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerKrbJDBCIntegrationSuite.scala @@ -42,27 +42,27 @@ abstract class DockerKrbJDBCIntegrationSuite extends DockerJDBCIntegrationSuite protected def setAuthentication(keytabFile: String, principal: String): Unit override def beforeAll(): Unit = runIfTestsEnabled(s"Prepare for ${this.getClass.getName}") { - SecurityUtils.setGlobalKrbDebug(true) + SecurityUtils.setGlobalKrbDebug(true) - val kdcDir = Utils.createTempDir() - val kdcConf = MiniKdc.createConf() - kdcConf.setProperty(MiniKdc.DEBUG, "true") - kdc = new MiniKdc(kdcConf, kdcDir) - kdc.start() + val kdcDir = Utils.createTempDir() + val kdcConf = MiniKdc.createConf() + kdcConf.setProperty(MiniKdc.DEBUG, "true") + kdc = new MiniKdc(kdcConf, kdcDir) + kdc.start() - principal = s"$userName@${kdc.getRealm}" + principal = s"$userName@${kdc.getRealm}" - entryPointDir = Utils.createTempDir() - initDbDir = Utils.createTempDir() - val keytabFile = new File(initDbDir, keytabFileName) - keytabFullPath = keytabFile.getAbsolutePath - kdc.createPrincipal(keytabFile, userName) - logInfo(s"Created keytab file: $keytabFullPath") + entryPointDir = Utils.createTempDir() + initDbDir = Utils.createTempDir() + val keytabFile = new File(initDbDir, keytabFileName) + keytabFullPath = keytabFile.getAbsolutePath + kdc.createPrincipal(keytabFile, userName) + logInfo(s"Created keytab file: $keytabFullPath") - setAuthentication(keytabFullPath, principal) + setAuthentication(keytabFullPath, principal) - // This must be executed intentionally later - super.beforeAll() + // This must be executed intentionally later + super.beforeAll() } override def afterAll(): Unit = { From eeae6e18b0bf1d3ee4030d7eae79ea076a0f4a21 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Thu, 27 May 2021 16:12:14 +0900 Subject: [PATCH 09/14] Revert unnecessary change. --- .../org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala index e92107fa7f42..eb8d286c14b8 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala @@ -313,8 +313,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark } } - test( - "SPARK-18004: Make sure date or timestamp related predicate is pushed down correctly") { + test("SPARK-18004: Make sure date or timestamp related predicate is pushed down correctly") { val props = new Properties() props.put("oracle.jdbc.mapDateToTimestamp", "false") From 19a4a93f98715cb5049a8e3dddbf3b61da01c14f Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Thu, 27 May 2021 21:14:55 +0900 Subject: [PATCH 10/14] Leave dependencies empty. --- dev/sparktestsupport/modules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index acc3b71c2b70..95b42821f819 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -745,7 +745,7 @@ def __hash__(self): docker_integration_tests = Module( name="docker-integration-tests", - dependencies=[sql], + dependencies=[], build_profile_flags=["-Pdocker-integration-tests"], source_file_regexes=["external/docker-integration-tests"], sbt_test_goals=["docker-integration-tests/test"], From 0cb6867abbee1bae9e5d12f791b55a5796ae9278 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Thu, 27 May 2021 21:32:47 +0900 Subject: [PATCH 11/14] Change to run only on GA. --- dev/sparktestsupport/modules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 95b42821f819..9d3d63935f94 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -749,7 +749,7 @@ def __hash__(self): build_profile_flags=["-Pdocker-integration-tests"], source_file_regexes=["external/docker-integration-tests"], sbt_test_goals=["docker-integration-tests/test"], - environ={ + environ=None if "GITHUB_ACTIONS" not in os.environ { "ENABLE_DOCKER_INTEGRATION_TESTS": "1" }, test_tags=[ From 10669ee9ea971a391f167f3ee8e8a5788382b89d Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Thu, 27 May 2021 21:51:13 +0900 Subject: [PATCH 12/14] Fix modules.py. --- dev/run-tests.py | 6 +++--- dev/sparktestsupport/modules.py | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 3dee9077b685..b43081801f1b 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -122,19 +122,19 @@ def determine_modules_to_test(changed_modules, deduplicated=True): ['graphx', 'examples'] >>> [x.name for x in determine_modules_to_test([modules.sql])] ... # doctest: +NORMALIZE_WHITESPACE - ['sql', 'avro', 'docker-integration-tests', 'hive', 'mllib', 'sql-kafka-0-10', 'examples', + ['sql', 'avro', 'hive', 'mllib', 'sql-kafka-0-10', 'examples', 'hive-thriftserver', 'pyspark-sql', 'repl', 'sparkr', 'pyspark-mllib', 'pyspark-pandas', 'pyspark-ml'] >>> sorted([x.name for x in determine_modules_to_test( ... [modules.sparkr, modules.sql], deduplicated=False)]) ... # doctest: +NORMALIZE_WHITESPACE - ['avro', 'docker-integration-tests', 'examples', 'hive', 'hive-thriftserver', 'mllib', + ['avro', 'examples', 'hive', 'hive-thriftserver', 'mllib', 'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas', 'pyspark-sql', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10'] >>> sorted([x.name for x in determine_modules_to_test( ... [modules.sql, modules.core], deduplicated=False)]) ... # doctest: +NORMALIZE_WHITESPACE - ['avro', 'catalyst', 'core', 'docker-integration-tests', 'examples', 'graphx', 'hive', + ['avro', 'catalyst', 'core', 'examples', 'graphx', 'hive', 'hive-thriftserver', 'mllib', 'mllib-local', 'pyspark-core', 'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas', 'pyspark-resource', 'pyspark-sql', 'pyspark-streaming', 'repl', 'root', 'sparkr', 'sql', 'sql-kafka-0-10', 'streaming', 'streaming-kafka-0-10', diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 9d3d63935f94..c602b02421b3 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -17,6 +17,7 @@ from functools import total_ordering import itertools +import os import re all_modules = [] @@ -749,7 +750,7 @@ def __hash__(self): build_profile_flags=["-Pdocker-integration-tests"], source_file_regexes=["external/docker-integration-tests"], sbt_test_goals=["docker-integration-tests/test"], - environ=None if "GITHUB_ACTIONS" not in os.environ { + environ=None if "GITHUB_ACTIONS" not in os.environ else { "ENABLE_DOCKER_INTEGRATION_TESTS": "1" }, test_tags=[ From 692d95d1458993cbb9cbd47014202e84cd6aa328 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Thu, 27 May 2021 21:58:31 +0900 Subject: [PATCH 13/14] super.test(testName)(testBody) -> super.test(testName, testTags: _*)(testBody) --- .../org/apache/spark/sql/jdbc/DockerIntegrationFunSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerIntegrationFunSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerIntegrationFunSuite.scala index d8d20c3988f6..9fbbc8ed2e0f 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerIntegrationFunSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerIntegrationFunSuite.scala @@ -37,7 +37,7 @@ trait DockerIntegrationFunSuite extends SparkFunSuite { override def test(testName: String, testTags: Tag*)(testBody: => Any) (implicit pos: Position): Unit = { if (shouldRunTests) { - super.test(testName)(testBody) + super.test(testName, testTags: _*)(testBody) } else { ignore(s"$testName [enable by setting env var $envVarNameForEnablingTests=1]")(testBody) } From bdbfea50edc6f5181044e3fb48e298f3babeb405 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Fri, 28 May 2021 14:50:36 +0900 Subject: [PATCH 14/14] Fix for considering run-tests.py. --- .github/workflows/build_and_test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index a5aea0920a4e..591b5fdb6cf8 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -646,9 +646,11 @@ jobs: if: github.repository != 'apache/spark' id: sync-branch run: | + apache_spark_ref=`git rev-parse HEAD` git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" + echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref" - name: Cache Scala, SBT and Maven uses: actions/cache@v2 with: