Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions spark-3.5.yaml → spark-3.5-scala-2.12.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package:
name: spark-3.5
name: spark-3.5-scala-2.12
version: 3.5.3
epoch: 5
epoch: 0
description: Unified engine for large-scale data analytics
copyright:
- license: Apache-2.0
Expand All @@ -18,7 +18,7 @@ environment:
- glibc-iconv
- glibc-locale-en
- grep
- maven
- maven-3.9
- openjdk-11
- openjdk-17
# Only 8 is used during the build process
Expand All @@ -32,6 +32,7 @@ environment:
- yaml-dev
environment:
LANG: en_US.UTF-8
M2_HOME: /usr/share/java/maven-3.9

pipeline:
- uses: git-checkout
Expand Down
File renamed without changes.
File renamed without changes.
52 changes: 34 additions & 18 deletions spark-3.5-scala-2.13.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package:
name: spark-3.5-scala-2.13
version: 3.5.3
epoch: 0
epoch: 1
description: Unified engine for large-scale data analytics
copyright:
- license: Apache-2.0
Expand All @@ -25,19 +25,22 @@ environment:
- glibc-iconv
- glibc-locale-en
- grep
- maven
- maven-3.9
- openjdk-17-default-jdk
- perl-utils
- procps
- py3-setuptools
- py3.11-pip
- python-3.11
- python3
- wolfi-base
- wolfi-baselayout
- yaml-dev
environment:
LANG: en_US.UTF-8
JAVA_HOME: /usr/lib/jvm/java-17-openjdk
MAVEN_OPTS: "-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g"
M2_HOME: /usr/share/java/maven-3.9
PATH: /usr/share/java/maven-3.9/bin:$PATH

pipeline:
- uses: git-checkout
Expand All @@ -46,35 +49,36 @@ pipeline:
tag: v${{package.version}}
expected-commit: 32232e9ed33bb16b93ad58cfde8b82e0f07c0970

- uses: patch
with:
patches: make-distribution.patch

- runs: |
./dev/change-scala-version.sh 2.13

- uses: maven/pombump
with:
properties-file: pombump-properties.yaml
pom: pom.xml

- runs: |
./build/mvn -DskipTests -Pscala-2.13 clean package

mkdir -p ${{targets.contextdir}}/usr/lib/spark
mkdir -p ${{targets.contextdir}}/usr/lib/spark/work-dir
mv bin/ ${{targets.contextdir}}/usr/lib/spark
mv sbin/ ${{targets.contextdir}}/usr/lib/spark
mv target ${{targets.contextdir}}/usr/lib/spark
cp resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh ${{targets.contextdir}}/usr/lib/spark/
cp -R assembly/target/scala-2.13/jars ${{targets.contextdir}}/usr/lib/spark/
mv assembly ${{targets.contextdir}}/usr/lib/spark
mkdir -p ${{targets.contextdir}}/usr/lib/spark/
./dev/make-distribution.sh --name pyspark-2.13 --pip --tgz -Pscala-2.13 -Phive -Phive-thriftserver -Pyarn
mv dist/* ${{targets.contextdir}}/usr/lib/spark/

subpackages:
- name: pyspark-2.13
pipeline:
- working-directory: python
pipeline:
- uses: python/build
- name: Python Install
uses: python/install

- name: ${{package.name}}-compat
description: "Compatibility package to place binaries in the location expected by upstream image"
pipeline:
- runs: |
mkdir -p "${{targets.subpkgdir}}"/usr/bin
mkdir -p "${{targets.subpkgdir}}"/opt
- runs: |
mkdir -p "${{targets.subpkgdir}}"/usr/bin
ln -s /usr/lib/spark/ ${{targets.subpkgdir}}/opt/spark
ln -sf /usr/lib/spark/bin/spark-submit ${{targets.subpkgdir}}/usr/bin/spark-submit
ln -sf /usr/lib/spark/bin/spark-shell ${{targets.subpkgdir}}/usr/bin/spark-shell
Expand All @@ -89,6 +93,7 @@ test:
packages:
- openjdk-17-default-jvm
- bash
- python3
environment:
LANG: en_US.UTF-8
SCALA_VERSION: 2.13
Expand All @@ -101,7 +106,7 @@ test:
- name: Test ${{package.name}} with OpenJDK 17
pipeline:
- name: Test if the Scala versions are correct
runs: ls /usr/lib/spark/assembly/target/scala-2.13/jars/scala-* | grep -q $SCALA_VERSION
runs: ls /usr/lib/spark/jars/scala-* | grep -q $SCALA_VERSION
- name: Check spark-shell --version
runs: /usr/lib/spark/bin/spark-shell --version
- name: Check spark-submit --version
Expand Down Expand Up @@ -133,6 +138,17 @@ test:
assert(result.count() == 1 && result.first().getString(0) == "Bob")
EOF
cat SQLTest.scala | /usr/lib/spark/bin/spark-shell --conf spark.jars.ivy=/tmp/.ivy --master local[*]
- name: Run a simple Spark job in Python
runs: |
cat <<EOF > simple_job.py
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("SimpleJob").getOrCreate()
data = [1, 2, 3, 4, 5]
rdd = spark.sparkContext.parallelize(data)
sum = rdd.reduce(lambda x, y: x + y)
assert sum == 15
EOF
/usr/lib/spark/bin/spark-submit simple_job.py --jars /usr/lib/spark/jars/guava-32.0.1-jre.jar --conf spark.jars.ivy=/tmp/.ivy --master local[*]

update:
enabled: true
Expand Down
21 changes: 21 additions & 0 deletions spark-3.5-scala-2.13/make-distribution.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index ef7c010e930..1769ecfad29 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh

@@ -36,7 +36,7 @@ MAKE_TGZ=false
MAKE_PIP=false
MAKE_R=false
NAME=none
-MVN="$SPARK_HOME/build/mvn"
+MVN="mvn"

@@ -166,7 +166,7 @@ export MAVEN_OPTS="${MAVEN_OPTS:--Xss128m -Xmx4g -XX:ReservedCodeCacheSize=128m}
# Store the command as an array because $MVN variable might have spaces in it.
# Normal quoting tricks don't work.
# See: http://mywiki.wooledge.org/BashFAQ/050
-BUILD_COMMAND=("$MVN" clean package \
+BUILD_COMMAND=("$MVN" -T$(grep -c processor /proc/cpuinfo) clean package \
-DskipTests \
-Dmaven.javadoc.skip=true \
-Dmaven.scaladoc.skip=true \
11 changes: 11 additions & 0 deletions spark-3.5-scala-2.13/pombump-deps.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
patches:
- groupId: com.squareup.okio
artifactId: okio
version: 1.17.6
scope: import
type: jar
- groupId: com.google.code.gson
artifactId: gson
version: 2.10.1
scope: import
type: jar
Loading