diff --git a/.github/workflows/ray-logs-sidecar.yml b/.github/workflows/ray-logs-sidecar.yml index 1a32774..7e03199 100644 --- a/.github/workflows/ray-logs-sidecar.yml +++ b/.github/workflows/ray-logs-sidecar.yml @@ -11,7 +11,7 @@ on: - ray-logs-sidecar/** push: branches: - - master + - main workflow_dispatch: jobs: diff --git a/.github/workflows/spark-history-server.yml b/.github/workflows/spark-history-server.yml new file mode 100644 index 0000000..5e916d2 --- /dev/null +++ b/.github/workflows/spark-history-server.yml @@ -0,0 +1,45 @@ +name: Build & Push Spark History Server Image + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +on: + pull_request: + paths: + - .github/workflows/spark-history-server.yml + - spark-history-server/** + push: + branches: + - main + workflow_dispatch: + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + strategy: + matrix: + container: [spark-history-server] + steps: + - uses: actions/checkout@v4 + - uses: docker/setup-qemu-action@v3 + - uses: docker/setup-buildx-action@v3 + - uses: docker/login-action@v2 + if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch' }} + with: + registry: https://ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - id: image-names + uses: docker/metadata-action@v5 + with: + images: ghcr.io/${{ github.repository_owner }}/${{ matrix.container }} + tags: | + type=raw,value=latest,enable=${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + type=sha,format=long + - uses: docker/build-push-action@v5 + with: + context: ${{ matrix.container }} + platforms: linux/amd64,linux/arm64 + push: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch' }} + tags: ${{ steps.image-names.outputs.tags }} diff --git a/README.md b/README.md index 3bcabdf..0d8f005 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This repository contains Dockerfiles for useful containers that can be used with The following containers are currently available: -| Container | Description | -| ------------------ | ------------------------------------------------------- | -| `ray-logs-sidecar` | Captures and exposes Ray job logs to container's stdout | - +| Container | Description | +| ---------------------- | ------------------------------------------------------------------------------------ | +| `ray-logs-sidecar` | Captures and exposes Ray job logs to container's stdout | +| `spark-history-server` | Spark History Server with support for S3, Google Cloud Storage, and Azure Blob Store | diff --git a/spark-history-server/Dockerfile b/spark-history-server/Dockerfile new file mode 100644 index 0000000..0582a20 --- /dev/null +++ b/spark-history-server/Dockerfile @@ -0,0 +1,16 @@ +FROM apache/spark:3.5.0 +LABEL org.opencontainers.image.source https://github.com/unionai-oss/containers + +# Hadoop +ADD https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-common/3.3.4/hadoop-common-3.3.4.jar ${SPARK_HOME}/jars + +# S3 +ADD https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.12.262/aws-java-sdk-bundle-1.12.262.jar ${SPARK_HOME}/jars +ADD https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.4/hadoop-aws-3.3.4.jar ${SPARK_HOME}/jars + +# Google Cloud Storage +ADD https://github.com/GoogleCloudDataproc/hadoop-connectors/releases/download/v2.2.18/gcs-connector-hadoop3-2.2.18-shaded.jar ${SPARK_HOME}/jars + +# Azure Blob Storage +ADD https://repo1.maven.org/maven2/com/microsoft/azure/azure-storage/7.0.1/azure-storage-7.0.1.jar ${SPARK_HOME}/jars +ADD https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-azure/3.3.4/hadoop-azure-3.3.4.jar ${SPARK_HOME}/jars