Skip to content

Fix OOM issue when uploading a large file using lakectl fs upload #13117

Fix OOM issue when uploading a large file using lakectl fs upload

Fix OOM issue when uploading a large file using lakectl fs upload #13117

Workflow file for this run

name: Esti
on:
pull_request:
push:
branches:
- master
workflow_dispatch:
# These permissions are needed to interact with GitHub's OIDC Token endpoint.
permissions:
id-token: write
contents: write
packages: write
issues: write
pull-requests: write
jobs:
check-secrets:
name: Check if secrets are available.
outputs:
secretsavailable: ${{ steps.enablejobs.outputs.secretsavailable }}
runs-on: ubuntu-22.04
steps:
- id: enablejobs
env:
ENABLE_NEXT_JOBS: ${{ secrets.AWS_ACCESS_KEY_ID }}
run: |
echo "Enable next jobs based on secrets existence: ${{ env.ENABLE_NEXT_JOBS != '' }}"
echo "secretsavailable=${{ env.ENABLE_NEXT_JOBS != '' }}" >> $GITHUB_OUTPUT
gen-code:
name: Generate code from latest lakeFS app
runs-on: ubuntu-22.04
outputs:
tag: ${{ steps.version.outputs.tag }}
steps:
- name: Check-out code
uses: actions/checkout@v4
- name: Extract version
id: version
shell: bash
run: echo "tag=sha-$(git rev-parse --short HEAD | sed s/^v//g)" >> $GITHUB_OUTPUT
- name: Restore cache
uses: actions/cache@v4
id: restore-cache
with:
path: /tmp/generated.tar.gz
key: ${{ runner.os }}-go-${{ hashFiles('./pkg/**', './api/**', './webui/**', './auth/**', './acl') }}
- name: Setup Go
if: steps.restore-cache.outputs.cache-hit != 'true'
uses: actions/setup-go@v4
with:
go-version: "1.22.6"
id: go
- name: Setup NodeJS
if: steps.restore-cache.outputs.cache-hit != 'true'
uses: actions/setup-node@v3
with:
node-version: 18
cache: "npm"
cache-dependency-path: webui/package-lock.json
- name: Generate code
if: steps.restore-cache.outputs.cache-hit != 'true'
run: |
make -j3 gen-api gen-code gen-ui VERSION=${{ steps.version.outputs.tag }}
go build ./contrib/auth/acl/cmd/acl
tar -czf /tmp/generated.tar.gz ./webui/dist ./pkg/auth/{client,service_wrapper,service_inviter_wrapper}.gen.go ./pkg/authentication/apiclient/client.gen.go ./pkg/permissions/actions.gen.go ./pkg/api/apigen/lakefs.gen.go ./acl
# must upload artifact in order to download generated later
- name: Store generated code
uses: actions/upload-artifact@v3
with:
name: generated-code
path: /tmp/generated.tar.gz
deploy-image:
name: Build and push Docker image
needs: [ check-secrets, gen-code ]
if: needs.check-secrets.outputs.secretsavailable == 'true'
runs-on: ubuntu-latest-16-cores
outputs:
tag: ${{ needs.gen-code.outputs.tag }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Retrieve generated code
uses: actions/download-artifact@v3
with:
name: generated-code
path: /tmp/
- name: Unpack generated code
run: tar -xzvf /tmp/generated.tar.gz
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: us-east-1
role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
- name: Login to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Build and Push
uses: docker/build-push-action@v5
with:
push: true
tags: ${{ steps.login-ecr.outputs.registry }}/lakefs:${{ needs.gen-code.outputs.tag }}
build-args: VERSION=${{ needs.gen-code.outputs.tag }}
context: .
cache-from: |
type=s3,region=us-east-1,bucket=lakefs-docker-cache,name=lakefs
cache-to: |
type=s3,region=us-east-1,bucket=lakefs-docker-cache,name=lakefs,mode=max
login-to-amazon-ecr:
runs-on: ubuntu-latest
needs: [ check-secrets ]
if: needs.check-secrets.outputs.secretsavailable == 'true'
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
aws-region: us-east-1
mask-aws-account-id: 'false'
- name: Login to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2
with:
mask-password: 'false'
outputs:
registry: ${{ steps.login-ecr.outputs.registry }}
docker_username: ${{ steps.login-ecr.outputs.docker_username_977611293394_dkr_ecr_us_east_1_amazonaws_com }}
docker_password: ${{ steps.login-ecr.outputs.docker_password_977611293394_dkr_ecr_us_east_1_amazonaws_com }}
unified-gc-test:
name: Test unified gc
needs: [deploy-image, login-to-amazon-ecr, build-spark3-metadata-client]
runs-on: ubuntu-latest-8-cores
services:
lakefs:
image: ${{ needs.login-to-amazon-ecr.outputs.registry }}/lakefs:${{ needs.deploy-image.outputs.tag }}
credentials:
username: ${{ needs.login-to-amazon-ecr.outputs.docker_username }}
password: ${{ needs.login-to-amazon-ecr.outputs.docker_password }}
ports:
- '8000:8000'
env:
LAKEFS_DATABASE_TYPE: local
LAKEFS_BLOCKSTORE_TYPE: s3
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID: ${{ secrets.ESTI_AWS_ACCESS_KEY_ID }}
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY: ${{ secrets.ESTI_AWS_SECRET_ACCESS_KEY }}
LAKEFS_AUTH_ENCRYPT_SECRET_KEY: some random secret string
LAKEFS_STATS_ENABLED: false
spark:
image: docker.io/bitnami/spark:3.2.1
options: --name spark-master
env:
SPARK_MODE: master
SPARK_RPC_AUTHENTICATION_ENABLED: no
SPARK_RPC_ENCRYPTION_ENABLED: no
SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED: no
SPARK_SSL_ENABLED: no
AWS_REGION: us-east-1
AWS_ACCESS_KEY_ID: ${{ secrets.ESTI_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ESTI_AWS_SECRET_ACCESS_KEY }}
ports:
- '8080:8080'
- '7077:7077'
spark-worker:
image: docker.io/bitnami/spark:3.2.1
env:
SPARK_MODE: worker
SPARK_MASTER_URL: spark://spark:7077
SPARK_WORKER_MEMORY: 4G
SPARK_WORKER_CORES: 4
SPARK_RPC_AUTHENTICATION_ENABLED: no
SPARK_RPC_ENCRYPTION_ENABLED: no
SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED: no
SPARK_SSL_ENABLED: no
AWS_REGION: us-east-1
AWS_ACCESS_KEY_ID: ${{ secrets.ESTI_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ESTI_AWS_SECRET_ACCESS_KEY }}
steps:
- name: Check-out code
uses: actions/checkout@v4
- name: Setup Go
uses: actions/setup-go@v4
with:
go-version: "1.22.6"
id: go
- name: Generate uniquifying value
id: unique
run: echo "value=$RANDOM" >> $GITHUB_OUTPUT
- name: Retrieve generated code
uses: actions/download-artifact@v3
with:
name: generated-code
path: /tmp/
- name: Unpack generated code
shell: bash
run: tar -xzvf /tmp/generated.tar.gz
- name: Restore cache
uses: actions/cache@v4
id: restore-cache
with:
path: ${{ github.workspace }}/test/spark/metaclient
key: metadata-client-${{ hashFiles('./clients/spark/**') }}
- name: GC test
run: |
go test -timeout 30m -v ./esti \
-system-tests -use-local-credentials -run=TestUnifiedGC \
-spark-image-tag=3.2.1 \
-metaclient-jar=$(pwd)/test/spark/metaclient/spark-assembly.jar
env:
ESTI_BLOCKSTORE_TYPE: s3
ESTI_STORAGE_NAMESPACE: s3://esti-system-testing/${{ github.run_number }}/gc-tests/${{ steps.unique.outputs.value }}
AWS_REGION: us-east-1
AWS_ACCESS_KEY_ID: ${{ secrets.ESTI_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ESTI_AWS_SECRET_ACCESS_KEY }}
ESTI_VERSION: ${{ needs.deploy-image.outputs.tag }}
ESTI_SETUP_LAKEFS: true
deploy-rclone-export-image:
name: Build and push rclone export Docker image
needs: check-secrets
if: needs.check-secrets.outputs.secretsavailable == 'true'
runs-on: ubuntu-22.04
outputs:
tag: ${{ steps.version.outputs.tag }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: us-east-1
role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
- name: Extract version
shell: bash
run: echo "tag=sha-$(git rev-parse --short HEAD | sed s/^v//g)" >> $GITHUB_OUTPUT
id: version
- name: Login to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Build and Push to Amazon ECR
uses: docker/build-push-action@v5
with:
push: true
tags: ${{ steps.login-ecr.outputs.registry }}/lakefs-rclone-export:${{ steps.version.outputs.tag }}
context: deployments/tools/export
hadoopfs-tests:
name: Test lakeFS Hadoop FileSystem
needs: [deploy-image, login-to-amazon-ecr]
runs-on: ubuntu-22.04
env:
TAG: ${{ needs.deploy-image.outputs.tag }}
REPO: ${{ needs.login-to-amazon-ecr.outputs.registry }}
steps:
- name: Check-out code
uses: actions/checkout@v4
- name: Start lakeFS for contract tests
uses: ./.github/actions/bootstrap-test-lakefs
with:
compose-directory: test/lakefsfs_contract
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
LAKEFS_DATABASE_TYPE: postgres
LAKEFS_BLOCKSTORE_TYPE: s3
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID: minio
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY: minio123
- name: Setup contract tests
working-directory: test/lakefsfs_contract
run: ./setup-test.sh
env:
AWS_ACCESS_KEY_ID: minio
AWS_SECRET_ACCESS_KEY: minio123
STORAGE_NAMESPACE: s3://test-bucket/data
REPOSITORY: lakefsfs-contract-test
- name: Build and test hadoopfs (hadoop2)
working-directory: clients/hadoopfs
run: mvn clean --quiet --batch-mode --update-snapshots -P'!treeverse-signing',contract-tests-hadoop2 verify
- name: Build and test hadoopfs (hadoop3)
working-directory: clients/hadoopfs
run: mvn clean --quiet --batch-mode --update-snapshots -P'!treeverse-signing',contract-tests-hadoop3 verify
- name: Build and test hadoopfs (hadoop3, presigned mode)
working-directory: clients/hadoopfs
run: mvn clean --quiet --batch-mode --update-snapshots -P'!treeverse-signing',contract-tests-hadoop3-presigned verify
- name: logs on failure
if: ${{ failure() }}
continue-on-error: true
working-directory: test/lakefsfs_contract
run: docker compose logs --tail=1000
hadoopfs-s3a-mpu:
name: Test lakeFS multipart upload with Hadoop S3A
needs: [deploy-image, login-to-amazon-ecr]
runs-on: ubuntu-22.04
env:
TAG: ${{ needs.deploy-image.outputs.tag }}
REPO: ${{ needs.login-to-amazon-ecr.outputs.registry }}
steps:
- name: Check-out code
uses: actions/checkout@v4
- uses: actions/setup-java@v3
with:
distribution: "adopt-hotspot"
java-version: "8"
cache: "sbt"
- name: Start lakeFS for Spark tests
uses: ./.github/actions/bootstrap-test-lakefs
with:
compose-directory: test/spark
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
LAKEFS_DATABASE_TYPE: postgres
LAKEFS_BLOCKSTORE_TYPE: s3
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID: ${{ secrets.ESTI_AWS_ACCESS_KEY_ID }}
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY: ${{ secrets.ESTI_AWS_SECRET_ACCESS_KEY }}
- name: Generate uniquifying value
id: unique
run: echo "value=$RANDOM" >> $GITHUB_OUTPUT
- name: Test lakeFS multipart upload with Hadoop S3A
env:
STORAGE_NAMESPACE: s3://esti-system-testing/${{ github.run_number }}-s3a-mpu/${{ steps.unique.outputs.value }}
REPOSITORY: s3a-mpu-test
AWS_ACCESS_KEY_ID: ${{ secrets.TESTER_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.TESTER_SECRET_ACCESS_KEY }}
# TODO(ariels): This depends on an actual DNS lookup
# (*.local.lakefs.io is 127.0.0.1) because it runs outside of
# a Docker container. Bypass this somehow.
ENDPOINT: "http://s3.local.lakefs.io:8000"
working-directory: test/spark/s3a-multipart
run: |
docker wait $(docker compose ps -a lakefs-setup -q)
docker compose exec -T lakefs lakectl repo create "lakefs://${REPOSITORY}" "${STORAGE_NAMESPACE}" -d main && sbt "run s3a://${REPOSITORY}/main/multipart.out"
- name: lakeFS logs on failure
if: ${{ failure() }}
continue-on-error: true
working-directory: test/spark
run: docker compose logs --tail=2500 lakefs
- name: Verify lakeFS performed a multipart upload
working-directory: test/spark
run: set -o pipefail && docker compose logs --tail=5000 -- lakefs 2>&1 | fgrep CompleteMultiPartUpload
build-lakefs-hadoopfs:
name: Build lakeFS HadoopFS
runs-on: ubuntu-22.04
steps:
- name: Check-out code
uses: actions/checkout@v4
- uses: actions/setup-java@v3
with:
distribution: "adopt-hotspot"
java-version: "8"
cache: "sbt"
- name: Build lakeFS HadoopFS
working-directory: clients/hadoopfs
run: mvn -Passembly -DfinalName=client --batch-mode --update-snapshots package -DskipTests
- name: Store lakeFS HadoopFS
uses: actions/upload-artifact@v3
with:
name: lakefs-hadoopfs
path: clients/hadoopfs/target/client.jar
spark-prep:
name: Prepare Spark Apps
runs-on: ubuntu-22.04
steps:
- name: Check-out code
uses: actions/checkout@v4
- uses: actions/setup-java@v3
with:
distribution: "adopt-hotspot"
java-version: "8"
cache: "sbt"
- name: Package Spark App
working-directory: test/spark/app
run: sbt package
- name: Store Spark App
uses: actions/upload-artifact@v3
with:
name: spark-apps
path: test/spark/app/target/
spark:
name: Test lakeFS with Spark ${{ matrix.spark.tag }}.X
needs: [spark-prep, build-lakefs-hadoopfs, deploy-image, login-to-amazon-ecr]
runs-on: ubuntu-22.04
strategy:
matrix:
spark:
- tag: 2
sonnet_jar: sonnets-246/target/sonnets-246/scala-2.11/sonnets-246_2.11-0.1.0.jar
- tag: 3
sonnet_jar: sonnets-311/target/sonnets-311/scala-2.12/sonnets-311_2.12-0.1.0.jar
env:
TAG: ${{ needs.deploy-image.outputs.tag }}
REPO: ${{ needs.login-to-amazon-ecr.outputs.registry }}
SPARK_TAG: ${{ matrix.spark.tag }}
steps:
- name: Check-out code
uses: actions/checkout@v4
- name: Download lakeFS HadoopFS
uses: actions/download-artifact@v3
with:
name: lakefs-hadoopfs
path: clients/hadoopfs/target
- name: Download Spark App
uses: actions/download-artifact@v3
with:
name: spark-apps
path: test/spark/app/target/
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.11
cache: 'pip'
- run: pip install -r ./test/spark/requirements.txt
- name: Generate uniquifying value
id: unique
run: echo "value=$RANDOM" >> $GITHUB_OUTPUT
- name: Start lakeFS for Spark tests
uses: ./.github/actions/bootstrap-test-lakefs
with:
compose-directory: test/spark
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
LAKEFS_DATABASE_TYPE: postgres
LAKEFS_BLOCKSTORE_TYPE: s3
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID: ${{ secrets.ESTI_AWS_ACCESS_KEY_ID }}
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY: ${{ secrets.ESTI_AWS_SECRET_ACCESS_KEY }}
- name: Spark${{ matrix.spark.tag }} + S3 gateway
timeout-minutes: 8
working-directory: test/spark
run: |
python ./run-test.py \
--storage_namespace s3://esti-system-testing/${{ github.run_number }}-spark${{ matrix.spark.tag }}-gw/${{ steps.unique.outputs.value }} \
--repository gateway-test-spark${{ matrix.spark.tag }} \
--sonnet_jar ${{ matrix.spark.sonnet_jar }}
- name: lakeFS Logs on Spark with gateway failure
if: ${{ failure() }}
continue-on-error: true
working-directory: test/spark
run: docker compose logs --tail=2500 lakefs
- name: Spark${{ matrix.spark.tag }} + lakeFS HadoopFS
timeout-minutes: 8
working-directory: test/spark
run: |
python ./run-test.py \
--storage_namespace s3://esti-system-testing/${{ github.run_number }}-spark${{ matrix.spark.tag }}-lakefsfs/${{ steps.unique.outputs.value }} \
--repository lakefsfs-test-spark \
--sonnet_jar ${{ matrix.spark.sonnet_jar }} \
--access_mode hadoopfs \
--aws_access_key ${{ secrets.ESTI_AWS_ACCESS_KEY_ID }} \
--aws_secret_key ${{ secrets.ESTI_AWS_SECRET_ACCESS_KEY }}
- name: lakeFS Logs on HadoopFS test failure
if: ${{ failure() }}
continue-on-error: true
working-directory: test/spark
run: docker-compose logs --tail=2500 lakefs
- name: Spark${{ matrix.spark.tag }} + lakeFS GW + Redirect
if: env.SPARK_TAG == '3'
timeout-minutes: 8
working-directory: test/spark
run: |
python ./run-test.py \
--storage_namespace s3://esti-system-testing/${{ github.run_number }}-spark${{ matrix.spark.tag }}-gw-redirect/${{ steps.unique.outputs.value }} \
--repository gateway-redirect-test-spark${{ matrix.spark.tag }} \
--sonnet_jar ${{ matrix.spark.sonnet_jar }} \
--redirect
- name: lakeFS Logs on Spark with gateway redirect failure
if: ${{ failure() }}
continue-on-error: true
working-directory: test/spark
run: docker compose logs --tail=2500 lakefs
- name: Spark${{ matrix.spark.tag }} + lakeFS HadoopFS presigned
timeout-minutes: 8
working-directory: test/spark
run: |
python ./run-test.py \
--storage_namespace s3://esti-system-testing/${{ github.run_number }}-spark${{ matrix.spark.tag }}-presigned/${{ steps.unique.outputs.value }} \
--repository thick-client-presigned-test \
--sonnet_jar ${{ matrix.spark.sonnet_jar }} \
--access_mode hadoopfs_presigned
- name: lakeFS Logs on HadoopFS presigned test failure
if: ${{ failure() }}
continue-on-error: true
working-directory: test/spark
run: docker compose logs --tail=2500 lakefs
export:
name: Test lakeFS rclone export functionality
needs: [deploy-image, deploy-rclone-export-image, login-to-amazon-ecr]
runs-on: ubuntu-22.04
env:
LAKEFS_TAG: ${{ needs.deploy-image.outputs.tag }}
EXPORT_TAG: ${{ needs.deploy-rclone-export-image.outputs.tag }}
REPO: ${{ needs.login-to-amazon-ecr.outputs.registry }}
steps:
- name: Check-out code
uses: actions/checkout@v4
- name: Start lakeFS for export tests
uses: ./.github/actions/bootstrap-test-lakefs
with:
compose-directory: test/rclone_export
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
LAKEFS_DATABASE_TYPE: postgres
LAKEFS_BLOCKSTORE_TYPE: s3
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID: ${{ secrets.ESTI_AWS_ACCESS_KEY_ID }}
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY: ${{ secrets.ESTI_AWS_SECRET_ACCESS_KEY }}
- name: Generate uniquifying value
id: unique
run: echo "value=$RANDOM" >> $GITHUB_OUTPUT
- name: Setup lakeFS for tests
env:
STORAGE_NAMESPACE: s3://esti-system-testing/${{ github.run_number }}-storage-rclone-export/${{ steps.unique.outputs.value }}
working-directory: test/rclone_export
run: ./setup-test.sh
- name: Test rclone export
env:
EXPORT_LOCATION: s3://esti-system-testing/${{ github.run_number }}-rclone-export-dest/${{ steps.unique.outputs.value }}
WORKING_DIRECTORY: test/rclone_export
AWS_ACCESS_KEY_ID: ${{ secrets.ESTI_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ESTI_AWS_SECRET_ACCESS_KEY }}
working-directory: test/rclone_export
run: ./run-test.sh
- name: logs on failure
if: ${{ failure() }}
continue-on-error: true
working-directory: test/rclone_export
run: docker compose logs --tail=1000
metastore-client-with-trino:
name: Test metastore client commands using trino
needs: [deploy-image, login-to-amazon-ecr]
runs-on: ubuntu-22.04
env:
TAG: ${{ needs.deploy-image.outputs.tag }}
REPO: ${{ needs.login-to-amazon-ecr.outputs.registry }}
steps:
- name: Check-out code
uses: actions/checkout@v4
- name: Generate uniquifying value
id: unique
run: echo "value=$RANDOM" >> $GITHUB_OUTPUT
- name: Login to GitHub Docker Registry
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Build and Push hive-metastore
uses: docker/build-push-action@v5
with:
push: true
tags: ghcr.io/treeverse/hive-metastore:${{ needs.deploy-image.outputs.tag }}
context: test/lakectl_metastore/hive
cache-from: type=gha,scope=hive-metastore
cache-to: type=gha,mode=max,scope=hive-metastore
- name: Start lakeFS for Metastore tests
uses: ./.github/actions/bootstrap-test-lakefs
with:
compose-directory: test/lakectl_metastore
env:
AWS_ACCOUNT_ID: ${{ secrets.AWS_ACCOUNT_ID }}
STORAGE_NAMESPACE: s3://esti-system-testing/${{ github.run_number }}-metaclient/${{ steps.unique.outputs.value }}
LAKEFS_BLOCKSTORE_TYPE: s3
LAKEFS_DATABASE_TYPE: postgres
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID: ${{ secrets.ESTI_AWS_ACCESS_KEY_ID }}
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY: ${{ secrets.ESTI_AWS_SECRET_ACCESS_KEY }}
LAKECTL_METASTORE_GLUE_CREDENTIALS_ACCESS_KEY_ID: ${{ secrets.ESTI_AWS_ACCESS_KEY_ID }}
LAKECTL_METASTORE_GLUE_CREDENTIALS_ACCESS_SECRET_KEY: ${{ secrets.ESTI_AWS_SECRET_ACCESS_KEY }}
- name: Setup lakeFS for tests
working-directory: test/lakectl_metastore
env:
STORAGE_NAMESPACE: s3://esti-system-testing/${{ github.run_number }}-metaclient/${{ steps.unique.outputs.value }}
run: ./setup-test.sh
- name: lakeFS Logs on Spark with gateway failure
if: ${{ failure() }}
continue-on-error: true
working-directory: test/lakectl_metastore
run: docker compose logs --tail=2500 lakefs
build-spark3-metadata-client:
name: Build metadata client for Spark 3.x
runs-on: ubuntu-latest-8-cores
needs: [check-secrets, deploy-image, login-to-amazon-ecr]
env:
TAG: ${{ needs.deploy-image.outputs.tag }}
REPO: ${{ needs.login-to-amazon-ecr.outputs.registry }}
steps:
- name: Check-out code
uses: actions/checkout@v4
- name: Restore cache
uses: actions/cache@v4
id: restore-cache
with:
path: ${{ github.workspace }}/test/spark/metaclient
key: metadata-client-${{ hashFiles('./clients/spark/**') }}
- uses: actions/setup-java@v3
if: steps.restore-cache.outputs.cache-hit != 'true'
with:
distribution: "adopt-hotspot"
java-version: "8"
cache: "sbt"
- name: Package Metaclient
if: steps.restore-cache.outputs.cache-hit != 'true'
working-directory: clients/spark
run: |
sbt 'set assembly / test := {}' assembly
- name: Prepare Metaclient location for export
if: steps.restore-cache.outputs.cache-hit != 'true'
# upload-artifact cannot take a working-directory option (that only
# applies to "run" steps), so copy the compiled metaclient to a
# known location.
working-directory: clients/spark
run: |
mkdir -p ${{ github.workspace }}/test/spark/metaclient
cp target/scala-2.12/lakefs-spark-client-assembly*.jar ${{ github.workspace }}/test/spark/metaclient/spark-assembly.jar
metadata-client-export-spark3:
name: Test lakeFS metadata client export with Spark 3.x
needs: [deploy-image, build-spark3-metadata-client, login-to-amazon-ecr]
runs-on: ubuntu-22.04
env:
SPARK_TAG: 3.2.1
REPO: ${{ needs.login-to-amazon-ecr.outputs.registry }}
TAG: ${{ needs.deploy-image.outputs.tag }}
steps:
- name: Check-out code
uses: actions/checkout@v4
- name: Restore cache
uses: actions/cache@v4
id: restore-cache
with:
path: ${{ github.workspace }}/test/spark/metaclient
key: metadata-client-${{ hashFiles('./clients/spark/**') }}
- name: Generate uniquifying value
id: unique
run: echo "value=$RANDOM" >> $GITHUB_OUTPUT
- name: Start lakeFS for Spark tests
uses: ./.github/actions/bootstrap-test-lakefs
with:
compose-directory: test/spark
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
LAKEFS_DATABASE_TYPE: postgres
LAKEFS_BLOCKSTORE_TYPE: s3
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID: ${{ secrets.ESTI_AWS_ACCESS_KEY_ID }}
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY: ${{ secrets.ESTI_AWS_SECRET_ACCESS_KEY }}
- name: Copy repository ref
run: aws s3 cp --recursive s3://esti-system-testing-data/golden-files/gc-test-data s3://esti-system-testing/${{ github.run_number }}-spark3.2.1-metaclient/exporter/${{ steps.unique.outputs.value }}
- name: Setup Exporter tests
env:
STORAGE_NAMESPACE: s3://esti-system-testing/${{ github.run_number }}-spark3.2.1-metaclient/exporter/${{ steps.unique.outputs.value }}
REPOSITORY: test-data-exporter
working-directory: test/spark
run: ./setup-exporter-test.sh
- name: Test Exporter with Spark 3.x
env:
STORAGE_NAMESPACE: s3://esti-system-testing/${{ github.run_number }}-spark3.2.1-metaclient/exporter/${{ steps.unique.outputs.value }}
REPOSITORY: test-data-exporter
CLIENT_JAR: ${{ github.workspace }}/test/spark/metaclient/spark-assembly.jar
EXPORT_LOCATION: s3://esti-system-testing/${{ github.run_number }}-spark3.2.1-client-export/${{ steps.unique.outputs.value }}
working-directory: test/spark
run: ./run-exporter-test.sh
- name: lakeFS Logs on Spark with gateway failure
if: ${{ failure() }}
continue-on-error: true
working-directory: test/spark
run: docker compose logs --tail=2500 lakefs
run-system-aws-s3-kv-dynamodb:
name: Run latest lakeFS app on AWS S3 DynamoDB KV
needs: [deploy-image, login-to-amazon-ecr]
runs-on: ubuntu-22.04
env:
TAG: ${{ needs.deploy-image.outputs.tag }}
REPO: ${{ needs.login-to-amazon-ecr.outputs.registry }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
LAKEFS_DATABASE_CONNECTION_STRING: "" # Override lakeFS docker compose settings
steps:
- name: Check-out code
uses: actions/checkout@v4
- name: Generate uniquifying value
id: unique
run: echo "value=$RANDOM" >> $GITHUB_OUTPUT
# Retrieve ACL server binary from cache
- name: Retrieve generated code
uses: actions/download-artifact@v3
with:
name: generated-code
path: /tmp/
- name: Unpack generated code
run: tar -xzvf /tmp/generated.tar.gz
# Run ACL server
- name: Run ACL server
env:
ACLSERVER_ENCRYPT_SECRET_KEY: some random secret string # Must be the same as lakeFS
ACLSERVER_DATABASE_TYPE: local
run: ./acl run &
- name: Test lakeFS with S3 tests KV
uses: ./.github/actions/bootstrap-test-lakefs
with:
compose-file: esti/ops/docker-compose-dynamodb.yaml
compose-flags: "--quiet-pull --exit-code-from=esti"
env:
LAKEFS_AUTH_API_ENDPOINT: http://host.docker.internal:8001/api/v1
LAKEFS_AUTH_UI_CONFIG_RBAC: simplified
LAKEFS_BLOCKSTORE_TYPE: s3
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID: ${{ secrets.ESTI_AWS_ACCESS_KEY_ID }}
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY: ${{ secrets.ESTI_AWS_SECRET_ACCESS_KEY }}
LAKEFS_DATABASE_TYPE: dynamodb
DOCKER_REG: ${{ needs.login-to-amazon-ecr.outputs.registry }}
ESTI_BLOCKSTORE_TYPE: s3
ESTI_STORAGE_NAMESPACE: s3://esti-system-testing/${{ github.run_number }}/${{ steps.unique.outputs.value }}
ESTI_AWS_ACCESS_KEY_ID: ${{ secrets.ESTI_AWS_ACCESS_KEY_ID }}
ESTI_AWS_SECRET_ACCESS_KEY: ${{ secrets.ESTI_AWS_SECRET_ACCESS_KEY }}
ESTI_VERSION: ${{ needs.deploy-image.outputs.tag }}
ESTI_DATABASE_KV_ENABLED: "true"
ESTI_DATABASE_CONNECTION_STRING: "true"
- name: Check files in S3 bucket KV
run: |
FILES_COUNT=`aws s3 ls s3://esti-system-testing/${{ github.run_number }}/${{ steps.unique.outputs.value }} --recursive | wc -l`
[ $FILES_COUNT -gt 5 ]
- name: lakeFS Logs on s3 failure KV
if: ${{ failure() }}
continue-on-error: true
run: docker compose -f esti/ops/docker-compose-dynamodb.yaml logs --tail=1000 lakefs
- name: Export DB KV
if: ${{ always() }}
working-directory: esti/ops
run: |
if docker compose ps -q postgres; then
docker compose exec -T postgres pg_dumpall --username=lakefs | gzip | aws s3 cp - s3://esti-system-testing/${{ github.run_number }}/${{ steps.unique.outputs.value }}/dump.gz
fi
run-system-aws-s3:
name: Run latest lakeFS app on AWS S3
needs: [deploy-image, login-to-amazon-ecr]
runs-on: ubuntu-22.04
strategy:
matrix:
branch_ownership: [false, true]
env:
TAG: ${{ needs.deploy-image.outputs.tag }}
REPO: ${{ needs.login-to-amazon-ecr.outputs.registry }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
steps:
- name: Check-out code
uses: actions/checkout@v4
- name: Generate uniquifying value
id: unique
run: echo "value=$RANDOM" >> $GITHUB_OUTPUT
# Retrieve ACL server binary from cache
- name: Retrieve generated code
uses: actions/download-artifact@v3
with:
name: generated-code
path: /tmp/
- name: Unpack generated code
run: tar -xzvf /tmp/generated.tar.gz
# Run ACL server
- name: Run ACL server
env:
ACLSERVER_ENCRYPT_SECRET_KEY: some random secret string # Must be the same as lakeFS
ACLSERVER_DATABASE_TYPE: local
run: ./acl run &
- name: Test lakeFS with S3 tests
uses: ./.github/actions/bootstrap-test-lakefs
with:
compose-file: esti/ops/docker-compose.yaml
compose-flags: "--quiet-pull --exit-code-from=esti"
env:
LAKEFS_AUTH_API_ENDPOINT: http://host.docker.internal:8001/api/v1
LAKEFS_AUTH_UI_CONFIG_RBAC: simplified
LAKEFS_BLOCKSTORE_TYPE: s3
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID: ${{ secrets.ESTI_AWS_ACCESS_KEY_ID }}
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY: ${{ secrets.ESTI_AWS_SECRET_ACCESS_KEY }}
LAKEFS_GRAVELER_BRANCH_OWNERSHIP_ENABLED: ${{ matrix.branch_ownership }}
LAKEFS_DATABASE_TYPE: postgres
DOCKER_REG: ${{ needs.login-to-amazon-ecr.outputs.registry }}
ESTI_BLOCKSTORE_TYPE: s3
ESTI_STORAGE_NAMESPACE: s3://esti-system-testing/${{ github.run_number }}/${{ steps.unique.outputs.value }}
ESTI_AWS_ACCESS_KEY_ID: ${{ secrets.ESTI_AWS_ACCESS_KEY_ID }}
ESTI_AWS_SECRET_ACCESS_KEY: ${{ secrets.ESTI_AWS_SECRET_ACCESS_KEY }}
ESTI_VERSION: ${{ needs.deploy-image.outputs.tag }}
AUTH0_CLIENT_ID: ${{ secrets.AUTH0_HAGRID_CLIENT_ID }}
AUTH0_CLIENT_SECRET: ${{ secrets.AUTH0_HAGRID_CLIENT_SECRET }}
- name: Check files in S3 bucket
run: |
FILES_COUNT=`aws s3 ls s3://esti-system-testing/${{ github.run_number }}/${{ steps.unique.outputs.value }} --recursive | wc -l`
[ $FILES_COUNT -gt 5 ]
- name: lakeFS Logs on s3 failure
if: ${{ failure() }}
continue-on-error: true
run: docker compose -f esti/ops/docker-compose.yaml logs --tail=1000 lakefs
- name: Export DB
if: ${{ always() }}
working-directory: esti/ops
run: |
if docker compose ps -q postgres; then
docker compose exec -T postgres pg_dumpall --username=lakefs | gzip | aws s3 cp - s3://esti-system-testing/${{ github.run_number }}/${{ steps.unique.outputs.value }}/dump.gz
fi
run-system-gcp-gs:
name: Run latest lakeFS app on Google Cloud Platform and Google Cloud Storage
needs: [deploy-image, login-to-amazon-ecr]
runs-on: ubuntu-22.04
env:
TAG: ${{ needs.deploy-image.outputs.tag }}
REPO: ${{ needs.login-to-amazon-ecr.outputs.registry }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
steps:
- name: Check-out code
uses: actions/checkout@v4
- name: Generate uniquifying value
id: unique
run: echo "value=$RANDOM" >> $GITHUB_OUTPUT
# Retrieve ACL server binary from cache
- name: Retrieve generated code
uses: actions/download-artifact@v3
with:
name: generated-code
path: /tmp/
- name: Unpack generated code
run: tar -xzvf /tmp/generated.tar.gz
# Run ACL server
- name: Run ACL server
env:
ACLSERVER_ENCRYPT_SECRET_KEY: some random secret string # Must be the same as lakeFS
ACLSERVER_DATABASE_TYPE: local
run: ./acl run &
- name: Start lakeFS with GS tests
uses: ./.github/actions/bootstrap-test-lakefs
with:
compose-file: esti/ops/docker-compose.yaml
compose-flags: "--quiet-pull --exit-code-from=esti"
env:
DOCKER_REG: ${{ needs.login-to-amazon-ecr.outputs.registry }}
LAKEFS_AUTH_API_ENDPOINT: http://host.docker.internal:8001/api/v1
LAKEFS_AUTH_UI_CONFIG_RBAC: simplified
LAKEFS_BLOCKSTORE_TYPE: gs
LAKEFS_DATABASE_TYPE: postgres
LAKEFS_BLOCKSTORE_GS_CREDENTIALS_JSON: ${{ secrets.LAKEFS_BLOCKSTORE_GS_CREDENTIALS_JSON }}
ESTI_BLOCKSTORE_TYPE: gs
ESTI_STORAGE_NAMESPACE: gs://esti-system-testing/${{ github.run_number }}/${{ steps.unique.outputs.value }}
- name: lakeFS Logs on GS failure
if: ${{ failure() }}
continue-on-error: true
run: docker compose -f esti/ops/docker-compose.yaml logs --tail=1000 lakefs
run-system-azure-abfs:
name: Run latest lakeFS app on Azure with Azure blobstore
needs: [deploy-image, login-to-amazon-ecr]
runs-on: ubuntu-22.04
env:
TAG: ${{ needs.deploy-image.outputs.tag }}
REPO: ${{ needs.login-to-amazon-ecr.outputs.registry }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
AZURE_CLIENT_SECRET: ${{ secrets.AZURE_CLIENT_SECRET }}
AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
steps:
- name: Check-out code
uses: actions/checkout@v4
- name: Generate uniquifying value
id: unique
run: echo "value=$RANDOM" >> $GITHUB_OUTPUT
# Retrieve ACL server binary from cache
- name: Retrieve generated code
uses: actions/download-artifact@v3
with:
name: generated-code
path: /tmp/
- name: Unpack generated code
run: tar -xzvf /tmp/generated.tar.gz
# Run ACL server
- name: Run ACL server
env:
ACLSERVER_ENCRYPT_SECRET_KEY: some random secret string # Must be the same as lakeFS
ACLSERVER_DATABASE_TYPE: local
run: ./acl run &
- name: Start lakeFS with Azure tests
uses: ./.github/actions/bootstrap-test-lakefs
with:
compose-file: esti/ops/docker-compose.yaml
compose-flags: "--quiet-pull --exit-code-from=esti"
env:
DOCKER_REG: ${{ needs.login-to-amazon-ecr.outputs.registry }}
LAKEFS_AUTH_API_ENDPOINT: http://host.docker.internal:8001/api/v1
LAKEFS_AUTH_UI_CONFIG_RBAC: simplified
LAKEFS_DATABASE_TYPE: postgres
LAKEFS_BLOCKSTORE_TYPE: azure
ESTI_AZURE_STORAGE_ACCOUNT: esti
ESTI_AZURE_STORAGE_ACCESS_KEY: ${{ secrets.LAKEFS_BLOCKSTORE_AZURE_STORAGE_ACCESS_KEY }}
ESTI_BLOCKSTORE_TYPE: azure
ESTI_STORAGE_NAMESPACE: https://esti.blob.core.windows.net/esti-system-testing/${{ github.run_number }}/${{ steps.unique.outputs.value }}
- name: lakeFS Logs on Azure failure
if: ${{ failure() }}
continue-on-error: true
run: docker compose -f esti/ops/docker-compose.yaml logs --tail=1000 lakefs
- name: See the env when we would have tried to publish coverage
run: env
run-system-azure-adls-gen2:
name: Run latest lakeFS app on Azure with Azure Data Lake Storage Gen2 and CosmosDB
needs: [deploy-image, login-to-amazon-ecr]
runs-on: ubuntu-22.04
env:
TAG: ${{ needs.deploy-image.outputs.tag }}
REPO: ${{ needs.login-to-amazon-ecr.outputs.registry }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
AZURE_CLIENT_SECRET: ${{ secrets.AZURE_CLIENT_SECRET }}
AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
COSMOSDB_DATABASE: esti-db
COSMOSDB_ACCOUNT: esti-e2e-tests
steps:
- name: Check-out code
uses: actions/checkout@v4
- name: Generate uniquifying value
id: unique
run: echo "value=$RANDOM" >> $GITHUB_OUTPUT
# Retrieve ACL server binary from cache
- name: Retrieve generated code
uses: actions/download-artifact@v3
with:
name: generated-code
path: /tmp/
- name: Unpack generated code
run: tar -xzvf /tmp/generated.tar.gz
# Run ACL server
- name: Run ACL server
env:
ACLSERVER_ENCRYPT_SECRET_KEY: some random secret string # Must be the same as lakeFS
ACLSERVER_DATABASE_TYPE: local
run: ./acl run &
- name: Start lakeFS with Azure tests
uses: ./.github/actions/bootstrap-test-lakefs
with:
compose-file: esti/ops/docker-compose-external-db.yaml
compose-flags: "--quiet-pull --exit-code-from=esti"
env:
DOCKER_REG: ${{ needs.login-to-amazon-ecr.outputs.registry }}
LAKEFS_AUTH_API_ENDPOINT: http://host.docker.internal:8001/api/v1
LAKEFS_AUTH_UI_CONFIG_RBAC: simplified
LAKEFS_DATABASE_TYPE: cosmosdb
LAKEFS_DATABASE_COSMOSDB_ENDPOINT: "https://${{ env.COSMOSDB_ACCOUNT }}.documents.azure.com"
LAKEFS_DATABASE_COSMOSDB_DATABASE: ${{ env.COSMOSDB_DATABASE }}
LAKEFS_DATABASE_COSMOSDB_CONTAINER: ${{ github.run_number }}-${{ steps.unique.outputs.value }}
LAKEFS_DATABASE_COSMOSDB_KEY: ${{ secrets.LAKEFS_DATABASE_COSMOSDB_READWRITEKEY }}
LAKEFS_BLOCKSTORE_TYPE: azure
ESTI_BLOCKSTORE_TYPE: azure
ESTI_STORAGE_NAMESPACE: https://esti4hns.blob.core.windows.net/esti-system-testing/${{ github.run_number }}/${{ steps.unique.outputs.value }}
ESTI_LARGE_OBJECT_PATH: https://esti4hns.blob.core.windows.net/esti-system-testing-data/copy-test-data/data.6mib
ESTI_AZURE_STORAGE_ACCOUNT: esti4hns
ESTI_AZURE_STORAGE_ACCESS_KEY: ${{ secrets.LAKEFS_BLOCKSTORE_AZURE_STORAGE_GEN2_ACCESS_KEY }}
- name: Az CLI login
if: always()
uses: azure/login@v1
with:
client-id: ${{ secrets.AZURE_COSMOSDB_CLEANUP_CLIENT_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
- name: cleanup cosmos db container
if: always()
run: |
echo "Delete database container: ${{ github.run_number }}-${{ steps.unique.outputs.value }}"
az cosmosdb sql container delete -a ${{ env.COSMOSDB_ACCOUNT }} -g ${{ env.COSMOSDB_ACCOUNT }} -d ${{ env.COSMOSDB_DATABASE }} -n ${{ github.run_number }}-${{ steps.unique.outputs.value }} --yes
- name: lakeFS Logs on Azure failure
if: ${{ failure() }}
continue-on-error: true
run: docker compose -f esti/ops/docker-compose.yaml logs --tail=1000 lakefs
python-wrapper:
name: Test lakeFS against the python wrapper client
needs: [deploy-image, login-to-amazon-ecr]
runs-on: ubuntu-22.04
env:
TAG: ${{ needs.deploy-image.outputs.tag }}
REPO: ${{ needs.login-to-amazon-ecr.outputs.registry }}
LAKEFS_INSTALLATION_ACCESS_KEY_ID: AKIAIOSFDNN7EXAMPLEQ
LAKEFS_INSTALLATION_SECRET_ACCESS_KEY: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
services:
lakefs:
image: ${{ needs.login-to-amazon-ecr.outputs.registry }}/lakefs:${{ needs.deploy-image.outputs.tag }}
credentials:
username: ${{ needs.login-to-amazon-ecr.outputs.docker_username }}
password: ${{ needs.login-to-amazon-ecr.outputs.docker_password }}
ports:
- '8000:8000'
env:
LAKEFS_DATABASE_TYPE: local
LAKEFS_BLOCKSTORE_TYPE: s3
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID: ${{ secrets.ESTI_AWS_ACCESS_KEY_ID }}
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY: ${{ secrets.ESTI_AWS_SECRET_ACCESS_KEY }}
LAKEFS_INSTALLATION_USER_NAME: admin
LAKEFS_INSTALLATION_ACCESS_KEY_ID: ${{ env.LAKEFS_INSTALLATION_ACCESS_KEY_ID }}
LAKEFS_INSTALLATION_SECRET_ACCESS_KEY: ${{env.LAKEFS_INSTALLATION_SECRET_ACCESS_KEY }}
LAKEFS_AUTH_ENCRYPT_SECRET_KEY: some random secret string
LAKEFS_STATS_ENABLED: false
steps:
- name: Check-out code
uses: actions/checkout@v4
- name: Set up Python 3.9
uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install dependencies
working-directory: ./clients/python-wrapper
run: pip install -r requirements.txt pytest pytest-md pytest-emoji
- name: Generate uniquifying value
id: unique
run: echo "value=$RANDOM" >> $GITHUB_OUTPUT
- name: Run Python Wrapper Tests
uses: pavelzw/pytest-action@v2
env:
LAKECTL_CREDENTIALS_ACCESS_KEY_ID: ${{ env.LAKEFS_INSTALLATION_ACCESS_KEY_ID }}
LAKECTL_CREDENTIALS_SECRET_ACCESS_KEY: ${{env.LAKEFS_INSTALLATION_SECRET_ACCESS_KEY }}
LAKECTL_SERVER_ENDPOINT_URL: http://localhost:8000
STORAGE_NAMESPACE: s3://esti-system-testing/${{ github.run_number }}-python-wrapper/${{ steps.unique.outputs.value }}
with:
verbose: true
emoji: true
job-summary: true
custom-arguments: './clients/python-wrapper/tests/integration -rP -We'
click-to-expand: true
report-title: 'Python Wrapper System Tests Report'
# e2e-<db type>-<block adapter type>
e2e-ddb-local-local:
name: E2E - DynamoDB Local - Local Block Adapter
needs:
- deploy-image
- login-to-amazon-ecr
runs-on: ubuntu-latest-8-cores
timeout-minutes: 20
services:
dynamodb:
image: amazon/dynamodb-local:1.13.6
lakefs:
image: ${{ needs.login-to-amazon-ecr.outputs.registry }}/lakefs:${{ needs.deploy-image.outputs.tag }}
credentials:
username: ${{ needs.login-to-amazon-ecr.outputs.docker_username }}
password: ${{ needs.login-to-amazon-ecr.outputs.docker_password }}
ports:
- '8000:8000'
env:
LAKEFS_DATABASE_TYPE: dynamodb
LAKEFS_DATABASE_DYNAMODB_ENDPOINT: http://dynamodb:8000
LAKEFS_DATABASE_DYNAMODB_AWS_ACCESS_KEY_ID: AKIAIOSFDNN7EXAMPLEQ
LAKEFS_DATABASE_DYNAMODB_AWS_SECRET_ACCESS_KEY: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
LAKEFS_BLOCKSTORE_TYPE: local
LAKEFS_EMAIL_SUBSCRIPTION_ENABLED: true
LAKEFS_AUTH_ENCRYPT_SECRET_KEY: some random secret string
LAKEFS_STATS_ENABLED: false
LAKEFS_LOGGING_LEVEL: DEBUG
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v3
with:
node-version: 18
cache: "npm"
cache-dependency-path: webui/package-lock.json
- name: Install dependencies
working-directory: webui
run: npm ci
- name: Get installed Playwright version
id: playwright-version
working-directory: webui
run: echo "PLAYWRIGHT_VERSION=$(node -e "console.log(require('./package-lock.json').dependencies['@playwright/test'].version)")" >> $GITHUB_ENV
- name: Cache Playwright binaries
uses: actions/cache@v4
id: playwright-cache
with:
path: |
~/.cache/ms-playwright
~/.cache/ms-playwright-*
key: ${{ runner.os }}-playwright-${{ env.PLAYWRIGHT_VERSION }}
restore-keys: |
${{ runner.os }}-playwright-
- name: Install browsers (no cache)
if: steps.playwright-cache.outputs.cache-hit != 'true'
working-directory: webui
run: npx playwright install --with-deps chromium
- name: Install browsers (with cache)
if: steps.playwright-cache.outputs.cache-hit == 'true'
working-directory: webui
run: npx playwright install-deps chromium
- name: Playwright run
env:
BASE_URL: http://localhost:8000
working-directory: webui
run: npx playwright test --project=common
- uses: actions/upload-artifact@v3
if: (success() || failure())
with:
name: playwright-report
path: webui/test-results/
retention-days: 7
- uses: test-summary/action@v2
if: ${{ github.event_name == 'pull_request' && (success() || failure()) }}
with:
paths: webui/test-results.xml
output: "test-results.md"
# will test and move to using the composite action once it's merged to master (GH limitation)
- uses: actions/github-script@v6
if: ${{ github.event_name == 'pull_request' && (success() || failure()) }}
env:
PLAYWRIGHT_PROJECT: "DynamoDB Local - Local Block Adapter"
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const fs = require("fs");
// 1. Retrieve existing bot comments for the PR
const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
})
const botComment = comments.find(comment => {
return comment.user.type === 'Bot' && comment.body.includes(`E2E Test Results - ${process.env.PLAYWRIGHT_PROJECT}`)
})
// 2. Read markdown files
const content = fs.readFileSync("test-results.md");
// 3. Prepare format of the comment
const output = `
# E2E Test Results - ${process.env.PLAYWRIGHT_PROJECT}
${content}
`;
// 4. If we have a comment, update it, otherwise create a new one
if (botComment) {
github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: botComment.id,
body: output
})
} else {
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: output
})
}
quickstart:
name: Quickstart
needs:
- deploy-image
- login-to-amazon-ecr
runs-on: ubuntu-latest-8-cores
timeout-minutes: 20
services:
dynamodb:
image: amazon/dynamodb-local:1.13.6
lakefs:
image: ${{ needs.login-to-amazon-ecr.outputs.registry }}/lakefs:${{ needs.deploy-image.outputs.tag }}
credentials:
username: ${{ needs.login-to-amazon-ecr.outputs.docker_username }}
password: ${{ needs.login-to-amazon-ecr.outputs.docker_password }}
ports:
- '8000:8000'
env:
LAKEFS_DATABASE_TYPE: dynamodb
LAKEFS_DATABASE_DYNAMODB_ENDPOINT: http://dynamodb:8000
LAKEFS_DATABASE_DYNAMODB_AWS_ACCESS_KEY_ID: AKIAIOSFDNN7EXAMPLEQ
LAKEFS_DATABASE_DYNAMODB_AWS_SECRET_ACCESS_KEY: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
LAKEFS_BLOCKSTORE_TYPE: local
LAKEFS_EMAIL_SUBSCRIPTION_ENABLED: true
LAKEFS_AUTH_ENCRYPT_SECRET_KEY: some random secret string
LAKEFS_STATS_ENABLED: false
LAKEFS_LOGGING_LEVEL: DEBUG
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v3
with:
node-version: 18
cache: "npm"
cache-dependency-path: webui/package-lock.json
- name: Install dependencies
working-directory: webui
run: npm ci
- name: Get installed Playwright version
id: playwright-version
working-directory: webui
run: echo "PLAYWRIGHT_VERSION=$(node -e "console.log(require('./package-lock.json').dependencies['@playwright/test'].version)")" >> $GITHUB_ENV
- name: Cache Playwright binaries
uses: actions/cache@v4
id: playwright-cache
with:
path: |
~/.cache/ms-playwright
~/.cache/ms-playwright-*
key: ${{ runner.os }}-playwright-${{ env.PLAYWRIGHT_VERSION }}
restore-keys: |
${{ runner.os }}-playwright-
- name: Install browsers (no cache)
if: steps.playwright-cache.outputs.cache-hit != 'true'
working-directory: webui
run: npx playwright install --with-deps chromium
- name: Install browsers (with cache)
if: steps.playwright-cache.outputs.cache-hit == 'true'
working-directory: webui
run: npx playwright install-deps chromium
- name: Playwright run
env:
BASE_URL: http://localhost:8000
working-directory: webui
run: npx playwright test --project=quickstart
- uses: actions/upload-artifact@v3
if: (success() || failure())
with:
name: playwright-report
path: webui/test-results/
retention-days: 7
- uses: test-summary/action@v2
if: ${{ github.event_name == 'pull_request' && (success() || failure()) }}
with:
paths: webui/test-results.xml
output: "quickstart-test-results.md"
# will test and move to using the composite action once it's merged to master (GH limitation)
- uses: actions/github-script@v6
if: ${{ github.event_name == 'pull_request' && (success() || failure()) }}
env:
PLAYWRIGHT_PROJECT: "Quickstart"
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const fs = require("fs");
// 1. Retrieve existing bot comments for the PR
const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
})
const botComment = comments.find(comment => {
return comment.user.type === 'Bot' && comment.body.includes(`E2E Test Results - ${process.env.PLAYWRIGHT_PROJECT}`)
})
// 2. Read markdown files
const content = fs.readFileSync("quickstart-test-results.md");
// 3. Prepare format of the comment
const output = `
# E2E Test Results - ${process.env.PLAYWRIGHT_PROJECT}
${content}
`;
// 4. If we have a comment, update it, otherwise create a new one
if (botComment) {
github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: botComment.id,
body: output
})
} else {
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: output
})
}
run-system-aws-s3-basic-auth:
name: Run latest lakeFS app on AWS S3 + Basic Auth
needs: [deploy-image, login-to-amazon-ecr]
runs-on: ubuntu-22.04
env:
TAG: ${{ needs.deploy-image.outputs.tag }}
REPO: ${{ needs.login-to-amazon-ecr.outputs.registry }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
steps:
- name: Check-out code
uses: actions/checkout@v4
- name: Generate uniquifying value
id: unique
run: echo "value=$RANDOM" >> $GITHUB_OUTPUT
- name: Test lakeFS with S3 tests + Basic Auth
uses: ./.github/actions/bootstrap-test-lakefs
with:
compose-file: esti/ops/docker-compose.yaml
compose-flags: "--quiet-pull --exit-code-from=esti"
env:
LAKEFS_BLOCKSTORE_TYPE: s3
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID: ${{ secrets.ESTI_AWS_ACCESS_KEY_ID }}
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY: ${{ secrets.ESTI_AWS_SECRET_ACCESS_KEY }}
LAKEFS_DATABASE_TYPE: postgres
DOCKER_REG: ${{ needs.login-to-amazon-ecr.outputs.registry }}
ESTI_BLOCKSTORE_TYPE: s3
ESTI_STORAGE_NAMESPACE: s3://esti-system-testing/${{ github.run_number }}/${{ steps.unique.outputs.value }}
ESTI_AWS_ACCESS_KEY_ID: ${{ secrets.ESTI_AWS_ACCESS_KEY_ID }}
ESTI_AWS_SECRET_ACCESS_KEY: ${{ secrets.ESTI_AWS_SECRET_ACCESS_KEY }}
ESTI_VERSION: ${{ needs.deploy-image.outputs.tag }}
- name: Check files in S3 bucket
run: |
FILES_COUNT=`aws s3 ls s3://esti-system-testing/${{ github.run_number }}/${{ steps.unique.outputs.value }} --recursive | wc -l`
[ $FILES_COUNT -gt 5 ]
- name: lakeFS Logs on s3 failure
if: ${{ failure() }}
continue-on-error: true
run: docker compose -f esti/ops/docker-compose.yaml logs --tail=1000 lakefs
- name: Export DB
if: ${{ always() }}
working-directory: esti/ops
run: |
if docker compose ps -q postgres; then
docker compose exec -T postgres pg_dumpall --username=lakefs | gzip | aws s3 cp - s3://esti-system-testing/${{ github.run_number }}/${{ steps.unique.outputs.value }}/dump.gz
fi