Skip to content

Commit

Permalink
[INFRA][Spark][Iceberg] Separate out the iceberg tests to a separate …
Browse files Browse the repository at this point in the history
…github action (delta-io#4155)

<!--
Thanks for sending a pull request!  Here are some tips for you:
1. If this is your first time, please read our contributor guidelines:
https://github.com/delta-io/delta/blob/master/CONTRIBUTING.md
2. If the PR is unfinished, add '[WIP]' in your PR title, e.g., '[WIP]
Your PR title ...'.
  3. Be sure to keep the PR description updated to reflect all changes.
  4. Please write your PR title to summarize what this PR proposes.
5. If possible, provide a concise example to reproduce the issue for a
faster review.
6. If applicable, include the corresponding issue number in the PR title
and link it in the body.
-->

#### Which Delta project/connector is this regarding?
<!--
Please add the component selected below to the beginning of the pull
request title
For example: [Spark] Title of my pull request
-->

- [ ] Spark
- [ ] Standalone
- [ ] Flink
- [ ] Kernel
- [X] Other (INFRA)

## Description

Separates out the iceberg tests to a separate github action. Also makes
the unidoc settings dependent on an environmental variable
`DISABLE_UNIDOC`. This is useful because due to unidoc, running tests
compiles all the projects in the entire repo. If you disable unidoc, you
can, for example, run `spark / test` without compiling the iceberg
projects.

## How was this patch tested?

CI jobs look good.

## Does this PR introduce _any_ user-facing changes?

No
  • Loading branch information
allisonport-db authored Feb 14, 2025
1 parent 318e2ec commit f2b17b9
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 7 deletions.
64 changes: 64 additions & 0 deletions .github/workflows/iceberg_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
name: "Delta Iceberg Latest"
on: [push, pull_request]
jobs:
test:
name: "DIL: Scala ${{ matrix.scala }}"
runs-on: ubuntu-20.04
strategy:
matrix:
# These Scala versions must match those in the build.sbt
scala: [2.12.18, 2.13.13]
env:
SCALA_VERSION: ${{ matrix.scala }}
steps:
- uses: actions/checkout@v3
# TODO we can make this more selective
- uses: technote-space/get-diff-action@v4
id: git-diff
with:
PATTERNS: |
**
.github/workflows/**
!kernel/**
!connectors/**
- name: install java
uses: actions/setup-java@v3
with:
distribution: "zulu"
java-version: "8"
- name: Cache Scala, SBT
uses: actions/cache@v3
with:
path: |
~/.sbt
~/.ivy2
~/.cache/coursier
# Change the key if dependencies are changed. For each key, GitHub Actions will cache the
# the above directories when we use the key for the first time. After that, each run will
# just use the cache. The cache is immutable so we need to use a new key when trying to
# cache new stuff.
key: delta-sbt-cache-spark3.2-scala${{ matrix.scala }}
- name: Install Job dependencies
run: |
sudo apt-get update
sudo apt-get install -y make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev libffi-dev liblzma-dev python-openssl git
sudo apt install libedit-dev
curl -LO https://github.com/bufbuild/buf/releases/download/v1.28.1/buf-Linux-x86_64.tar.gz
mkdir -p ~/buf
tar -xvzf buf-Linux-x86_64.tar.gz -C ~/buf --strip-components 1
rm buf-Linux-x86_64.tar.gz
sudo apt install python3-pip --fix-missing
sudo pip3 install pipenv==2021.5.29
curl https://pyenv.run | bash
export PATH="~/.pyenv/bin:$PATH"
eval "$(pyenv init -)"
eval "$(pyenv virtualenv-init -)"
pyenv install 3.8.18
pyenv global system 3.8.18
pipenv --python 3.8 install
if: steps.git-diff.outputs.diff
- name: Run Scala/Java and Python tests
# when changing TEST_PARALLELISM_COUNT make sure to also change it in spark_master_test.yaml
run: |
TEST_PARALLELISM_COUNT=4 pipenv run python run-tests.py --group iceberg
if: steps.git-diff.outputs.diff
15 changes: 10 additions & 5 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,6 @@ def crossSparkSettings(): Seq[Setting[_]] = getSparkVersion() match {
Antlr4 / antlr4Version := "4.9.3",
Test / javaOptions ++= Seq("-Dlog4j.configurationFile=log4j2.properties"),

// Java-/Scala-/Uni-Doc Settings
scalacOptions ++= Seq(
"-P:genjavadoc:strictVisibility=true" // hide package private types and methods in javadoc
),
unidocSourceFilePatterns := Seq(SourceFilePattern("io/delta/tables/", "io/delta/exceptions/"))
)

Expand Down Expand Up @@ -1570,7 +1566,16 @@ val createTargetClassesDir = taskKey[Unit]("create target classes dir")

// Don't use these groups for any other projects
lazy val sparkGroup = project
.aggregate(spark, contribs, storage, storageS3DynamoDB, iceberg, testDeltaIcebergJar, sharing, hudi)
.aggregate(spark, contribs, storage, storageS3DynamoDB, sharing, hudi)
.settings(
// crossScalaVersions must be set to Nil on the aggregating project
crossScalaVersions := Nil,
publishArtifact := false,
publish / skip := false,
)

lazy val icebergGroup = project
.aggregate(iceberg, testDeltaIcebergJar)
.settings(
// crossScalaVersions must be set to Nil on the aggregating project
crossScalaVersions := Nil,
Expand Down
8 changes: 7 additions & 1 deletion project/Unidoc.scala
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ object Unidoc {
generateScalaDoc: Boolean = false,
classPathToSkip: String = null
): Project = {
if (sys.env.contains("DISABLE_UNIDOC")) return projectToUpdate
if (!generatedJavaDoc && !generateScalaDoc) return projectToUpdate

var updatedProject: Project = projectToUpdate
Expand All @@ -69,7 +70,12 @@ object Unidoc {
generateUnidocSettings(docTitle, generateScalaDoc, classPathToSkip),

// Ensure unidoc is run with tests.
(Test / test) := ((Test / test) dependsOn (Compile / unidoc)).value
(Test / test) := ((Test / test) dependsOn (Compile / unidoc)).value,

// hide package private types and methods in javadoc
scalacOptions ++= Seq(
"-P:genjavadoc:strictVisibility=true"
),
)
}

Expand Down
6 changes: 5 additions & 1 deletion run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
# Define groups of subprojects that can be tested separately from other groups.
# As of now, we have only defined project groups in the SBT build, so these must match
# the group names defined in build.sbt.
valid_project_groups = ["spark", "kernel", "spark-python"]
valid_project_groups = ["spark", "iceberg", "kernel", "spark-python"]


def get_args():
Expand Down Expand Up @@ -209,6 +209,10 @@ def run_tests_in_docker(image_tag, test_group):
if test_parallelism is not None:
envs = envs + "-e TEST_PARALLELISM_COUNT=%s " % test_parallelism

disable_unidoc = os.getenv("DISABLE_UNIDOC")
if disable_unidoc is not None:
envs = envs + "-e DISABLE_UNIDOC=%s " % disable_unidoc

cwd = os.getcwd()
test_script = os.path.basename(__file__)

Expand Down

0 comments on commit f2b17b9

Please sign in to comment.