From 8c83cada88d3cb0fff0eb6c19aa880c6c163cf17 Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Fri, 5 Aug 2022 16:13:50 +0800 Subject: [PATCH] feat(base-image): Move conda to llb and cache it (#724) * feat(base-images): Move conda to llb Signed-off-by: Ce Gao * fix: Only trigger image build on new release Signed-off-by: Ce Gao * Update base-images/remote-cache/build-and-push-remote-cache.sh Co-authored-by: Keming * Update base-images/build.sh Co-authored-by: Keming Co-authored-by: Keming --- .github/workflows/CI.yml | 40 ++-------- .github/workflows/release.yml | 48 ++++++++++-- .goreleaser.yaml | 1 + Makefile | 33 ++++++-- base-images/build.sh | 10 ++- .../python3.9-ubuntu20.04-cuda11.6.Dockerfile | 31 -------- base-images/python3.9-ubuntu20.04.Dockerfile | 31 -------- .../build-and-push-remote-cache.sh | 14 ++++ base-images/remote-cache/build.envd | 2 + e2e/e2e_helper.go | 2 +- e2e/suite_test.go | 7 ++ examples/python-basic/build.envd | 2 +- pkg/app/version.go | 51 +++++++++++- pkg/builder/build.go | 32 +++++--- pkg/builder/builder.go | 16 ++++ pkg/lang/ir/compile.go | 27 ++++++- pkg/lang/ir/conda.go | 40 +++++++--- pkg/lang/ir/install-conda.sh | 26 +++++++ pkg/lang/ir/python.go | 17 +++- pkg/lang/ir/system.go | 34 +++++--- pkg/lang/ir/util.go | 1 + pkg/version/version.go | 78 +++++-------------- 22 files changed, 339 insertions(+), 204 deletions(-) create mode 100755 base-images/remote-cache/build-and-push-remote-cache.sh create mode 100644 base-images/remote-cache/build.envd create mode 100644 pkg/lang/ir/install-conda.sh diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 3c053f143..47f77e5b1 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -64,16 +64,9 @@ jobs: matrix: os: [ubuntu-latest] runs-on: ${{ matrix.os }} - env: - GOPATH: ${{ github.workspace }}/go - defaults: - run: - working-directory: ${{ env.GOPATH }}/src/github.com/tensorchord/envd steps: - name: Check out code uses: actions/checkout@v2 - with: - path: ${{ env.GOPATH }}/src/github.com/tensorchord/envd - name: Setup Go uses: actions/setup-go@v2 with: @@ -97,23 +90,16 @@ jobs: uses: actions/upload-artifact@v3 with: name: coverage-out - path: ${{ env.GOPATH }}/src/github.com/tensorchord/envd/coverage.out + path: coverage.out e2e: name: e2e strategy: matrix: os: [ubuntu-latest] runs-on: ${{ matrix.os }} - env: - GOPATH: ${{ github.workspace }}/go - defaults: - run: - working-directory: ${{ env.GOPATH }}/src/github.com/tensorchord/envd steps: - name: Check out code uses: actions/checkout@v2 - with: - path: ${{ env.GOPATH }}/src/github.com/tensorchord/envd - name: Setup Go uses: actions/setup-go@v2 with: @@ -127,29 +113,26 @@ jobs: ${{ runner.OS }}-build-${{ env.cache-name }}- ${{ runner.OS }}-build- ${{ runner.OS }}- + - uses: actions-ecosystem/action-get-latest-tag@v1 + id: get-latest-tag - name: e2e test run: make e2e-test + env: + GIT_LATEST_TAG: ${{ steps.get-latest-tag.outputs.tag }} - name: Upload coverage report uses: actions/upload-artifact@v3 with: name: e2e-coverage-out - path: ${{ env.GOPATH }}/src/github.com/tensorchord/envd/e2e-coverage.out + path: e2e-coverage.out build: name: build strategy: matrix: os: [ubuntu-latest, macos-latest] runs-on: ${{ matrix.os }} - env: - GOPATH: ${{ github.workspace }}/go - defaults: - run: - working-directory: ${{ env.GOPATH }}/src/github.com/tensorchord/envd steps: - name: Check out code uses: actions/checkout@v2 - with: - path: ${{ env.GOPATH }}/src/github.com/tensorchord/envd - name: Setup Go uses: actions/setup-go@v2 with: @@ -171,17 +154,10 @@ jobs: needs: - test - e2e - env: - GOPATH: ${{ github.workspace }}/go runs-on: ubuntu-latest - defaults: - run: - working-directory: ${{ env.GOPATH }}/src/github.com/tensorchord/envd steps: - name: Check out code uses: actions/checkout@v2 - with: - path: ${{ env.GOPATH }}/src/github.com/tensorchord/envd - name: Setup Go uses: actions/setup-go@v2 with: @@ -194,12 +170,12 @@ jobs: uses: actions/download-artifact@v3 with: name: coverage-out - path: ${{ env.GOPATH }}/src/github.com/tensorchord/envd + path: coverage.out - name: Get coverage report uses: actions/download-artifact@v3 with: name: e2e-coverage-out - path: ${{ env.GOPATH }}/src/github.com/tensorchord/envd + path: e2e-coverage.out # - name: Send coverage # env: # COVERALLS_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index c8d8c2650..fb007be70 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -36,7 +36,7 @@ jobs: version: latest args: release --rm-dist env: - GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: upload gobin uses: actions/upload-artifact@v3 with: @@ -55,13 +55,13 @@ jobs: strategy: matrix: os: [macos-10.15, ubuntu-20.04] - steps: + steps: - uses: actions/checkout@v3 - name: Get gobin uses: actions/download-artifact@v3 with: name: gobin_${{ github.event.release.tag_name }} - path: dist/ + path: dist/ - name: Configure linux build environment if: runner.os == 'Linux' run: | @@ -78,12 +78,12 @@ jobs: uses: pypa/cibuildwheel@v2.8.1 env: CIBW_ARCHS: auto64 - - name: Build source distribution + - name: Build source distribution if: runner.os == 'Linux' # Only release source under linux to avoid conflict run: | python3 setup.py sdist mv dist/*.tar.gz wheelhouse/ - - name: Upload to PyPI + - name: Upload to PyPI env: TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} @@ -93,7 +93,8 @@ jobs: python -m twine upload wheelhouse/* image_publish: name: Build & push images - if: github.repository == 'tensorchord/envd' + # only trigger on main repo when tag starts with v + if: github.repository == 'tensorchord/envd' && startsWith(github.ref, 'refs/tags/v') runs-on: ubuntu-latest needs: goreleaser steps: @@ -115,3 +116,38 @@ jobs: run: | docker login --username "${DOCKERIO_USERNAME}" --password "${DOCKERIO_PASSWORD}" ./base-images/build.sh + cache_publish: + name: Build & Push the remote cache + # only trigger on main repo when tag starts with v + if: github.repository == 'tensorchord/envd' && startsWith(github.ref, 'refs/tags/v') + runs-on: ubuntu-latest + needs: goreleaser + steps: + - uses: actions/checkout@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - name: Cache Docker layers + uses: actions/cache@v3 + id: cache + with: + path: /tmp/.buildx-cache + key: ${{ runner.os }}-buildx-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-buildx- + - name: Get gobin + uses: actions/download-artifact@v3 + with: + name: gobin_${{ github.event.release.tag_name }} + path: dist/ + - name: Configure linux build environment + if: runner.os == 'Linux' + run: | + mv dist/envd_linux_amd64_v1/envd /usr/local/bin/envd + chmod +x /usr/local/bin/envd + - name: Build and push + env: + DOCKERIO_USERNAME: ${{ secrets.DOCKERIO_USERNAME }} + DOCKERIO_PASSWORD: ${{ secrets.DOCKERIO_PASSWORD }} + run: | + docker login --username "${DOCKERIO_USERNAME}" --password "${DOCKERIO_PASSWORD}" + ./base-images/remote-cache/build-and-push-remote-cache.sh diff --git a/.goreleaser.yaml b/.goreleaser.yaml index b7be13a99..35cdcc846 100644 --- a/.goreleaser.yaml +++ b/.goreleaser.yaml @@ -18,6 +18,7 @@ builds: - -X github.com/tensorchord/envd/pkg/version.buildDate={{ .Date }} - -X github.com/tensorchord/envd/pkg/version.gitCommit={{ .Commit }} - -X github.com/tensorchord/envd/pkg/version.gitTreeState=clean + - -X github.com/tensorchord/envd/pkg/version.gitTag={{ .Tag }} - env: - CGO_ENABLED=0 goos: diff --git a/Makefile b/Makefile index ea1a14f69..261f0c903 100644 --- a/Makefile +++ b/Makefile @@ -74,6 +74,7 @@ GIT_COMMIT=$(shell git rev-parse HEAD) GIT_TAG=$(shell if [ -z "`git status --porcelain`" ]; then git describe --exact-match --tags HEAD 2>/dev/null; fi) GIT_TREE_STATE=$(shell if [ -z "`git status --porcelain`" ]; then echo "clean" ; else echo "dirty"; fi) GITSHA ?= $(shell git rev-parse --short HEAD) +GIT_LATEST_TAG ?= $(shell git describe --tags --abbrev=0) # Track code version with Docker Label. DOCKER_LABELS ?= git-describe="$(shell date -u +v%Y%m%d)-$(shell git describe --tags --always --dirty)" @@ -95,11 +96,20 @@ export GOFLAGS ?= -count=1 # # All targets. -.PHONY: help lint test build dev container push addlicense debug debug-local build-local generate clean test-local addlicense-install mockgen-install pypi-build +.PHONY: help lint test build dev container push addlicense debug debug-local build-local generate clean test-local addlicense-install mockgen-install pypi-build base-image -.DEFAULT_GOAL:=build +.DEFAULT_GOAL:=build-local -build: build-local ## Build the release version of envd +build-release: + @for target in $(TARGETS); do \ + CGO_ENABLED=$(CGO_ENABLED) go build -trimpath -v -o $(OUTPUT_DIR)/$${target} \ + -ldflags "-s -w -X $(ROOT)/pkg/version.version=$(VERSION) \ + -X $(ROOT)/pkg/version.buildDate=$(BUILD_DATE) \ + -X $(ROOT)/pkg/version.gitCommit=$(GIT_COMMIT) \ + -X $(ROOT)/pkg/version.gitTreeState=$(GIT_TREE_STATE) \ + -X $(ROOT)/pkg/version.gitTag=$(GIT_TAG)" \ + $(CMD_DIR)/$${target}; \ + done help: ## Display this help @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z0-9_-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) @@ -122,7 +132,12 @@ addlicense-install: build-local: @for target in $(TARGETS); do \ CGO_ENABLED=$(CGO_ENABLED) go build -trimpath -v -o $(OUTPUT_DIR)/$${target} \ - -ldflags "-s -w -X $(ROOT)/pkg/version.version=$(VERSION) -X $(ROOT)/pkg/version.buildDate=$(BUILD_DATE) -X $(ROOT)/pkg/version.gitCommit=$(GIT_COMMIT) -X $(ROOT)/pkg/version.gitTreeState=$(GIT_TREE_STATE)" \ + -ldflags "-s -w -X $(ROOT)/pkg/version.version=$(VERSION) \ + -X $(ROOT)/pkg/version.buildDate=$(BUILD_DATE) \ + -X $(ROOT)/pkg/version.gitCommit=$(GIT_COMMIT) \ + -X $(ROOT)/pkg/version.gitTreeState=$(GIT_TREE_STATE) \ + -X $(ROOT)/pkg/version.gitTag=$(GIT_LATEST_TAG) \ + -X $(ROOT)/pkg/version.developmentFlag=true" \ $(CMD_DIR)/$${target}; \ done @@ -156,8 +171,14 @@ test: generate ## Run the tests @go test -race -coverpkg=./pkg/... -coverprofile=coverage.out $(shell go list ./... | grep -v e2e) @go tool cover -func coverage.out | tail -n 1 | awk '{ print "Total coverage: " $$3 }' -e2e-test: generate - @go test -race -coverpkg=./pkg/app -coverprofile=e2e-coverage.out ./e2e +e2e-test: + @go test -ldflags "-s -w -X $(ROOT)/pkg/version.version=$(VERSION) \ + -X $(ROOT)/pkg/version.buildDate=$(BUILD_DATE) \ + -X $(ROOT)/pkg/version.gitCommit=$(GIT_COMMIT) \ + -X $(ROOT)/pkg/version.gitTreeState=$(GIT_TREE_STATE) \ + -X $(ROOT)/pkg/version.gitTag="$(shell git describe --tags --abbrev=0)" \ + -X $(ROOT)/pkg/version.developmentFlag=true" \ + -race -v -coverpkg=./pkg/app -coverprofile=e2e-coverage.out ./e2e clean: ## Clean the outputs and artifacts @-rm -vrf ${OUTPUT_DIR} diff --git a/base-images/build.sh b/base-images/build.sh index 4c0aa2470..e87dcc492 100755 --- a/base-images/build.sh +++ b/base-images/build.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash +set -euo pipefail + ROOT_DIR=`dirname $0` GIT_TAG_VERSION=$(git describe --tags --abbrev=0 | sed -r 's/[v]+//g') # remove v from version @@ -26,7 +28,7 @@ docker buildx build \ --build-arg HTTP_PROXY=${HTTP_PROXY} \ --build-arg HTTPS_PROXY=${HTTPS_PROXY} \ --pull --push --platform linux/x86_64,linux/arm64 \ - -t ${DOCKER_HUB_ORG}/python:${PYTHON_VERSION}-${ENVD_OS} \ + -t ${DOCKER_HUB_ORG}/python:${PYTHON_VERSION}-${ENVD_OS}-envd-v${ENVD_VERSION} \ -f python${PYTHON_VERSION}-${ENVD_OS}.Dockerfile . docker buildx build --build-arg IMAGE_NAME=docker.io/nvidia/cuda \ --build-arg ENVD_VERSION=${ENVD_VERSION} \ @@ -34,7 +36,7 @@ docker buildx build --build-arg IMAGE_NAME=docker.io/nvidia/cuda \ --build-arg HTTP_PROXY=${HTTP_PROXY} \ --build-arg HTTPS_PROXY=${HTTPS_PROXY} \ --pull --push --platform linux/x86_64,linux/arm64 \ - -t ${DOCKER_HUB_ORG}/python:${PYTHON_VERSION}-${ENVD_OS}-cuda11.6-cudnn8 \ + -t ${DOCKER_HUB_ORG}/python:${PYTHON_VERSION}-${ENVD_OS}-cuda11.6-cudnn8-envd-v${ENVD_VERSION} \ -f python${PYTHON_VERSION}-${ENVD_OS}-cuda11.6.Dockerfile . # TODO(gaocegege): Support linux/arm64 @@ -43,7 +45,7 @@ docker buildx build \ --build-arg ENVD_SSH_IMAGE=ghcr.io/tensorchord/envd-ssh-from-scratch \ --build-arg HTTP_PROXY=${HTTP_PROXY} \ --build-arg HTTPS_PROXY=${HTTPS_PROXY} \ - -t ${DOCKER_HUB_ORG}/r-base:${RLANG_VERSION} \ + -t ${DOCKER_HUB_ORG}/r-base:${RLANG_VERSION}-envd-v${ENVD_VERSION} \ --pull --push --platform linux/x86_64 \ -f r${RLANG_VERSION}.Dockerfile . docker buildx build \ @@ -51,7 +53,7 @@ docker buildx build \ --build-arg ENVD_SSH_IMAGE=ghcr.io/tensorchord/envd-ssh-from-scratch \ --build-arg HTTP_PROXY=${HTTP_PROXY} \ --build-arg HTTPS_PROXY=${HTTPS_PROXY} \ - -t ${DOCKER_HUB_ORG}/julia:${JULIA_VERSION}-${ENVD_OS} \ + -t ${DOCKER_HUB_ORG}/julia:${JULIA_VERSION}-${ENVD_OS}-envd-v${ENVD_VERSION} \ --pull --push --platform linux/x86_64,linux/arm64 \ -f julia${JULIA_VERSION}-${ENVD_OS}.Dockerfile . cd - > /dev/null diff --git a/base-images/python3.9-ubuntu20.04-cuda11.6.Dockerfile b/base-images/python3.9-ubuntu20.04-cuda11.6.Dockerfile index 03e7b7800..b5b2f2399 100644 --- a/base-images/python3.9-ubuntu20.04-cuda11.6.Dockerfile +++ b/base-images/python3.9-ubuntu20.04-cuda11.6.Dockerfile @@ -88,35 +88,4 @@ RUN apt-mark hold ${NV_LIBCUBLAS_DEV_PACKAGE_NAME} ${NV_LIBNCCL_DEV_PACKAGE_NAME ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs -# Leave these args here to better use the Docker build cache -ARG CONDA_VERSION=py39_4.11.0 - -RUN set -x && \ - UNAME_M="$(uname -m)" && \ - if [ "${UNAME_M}" = "x86_64" ]; then \ - MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-x86_64.sh"; \ - SHA256SUM="4ee9c3aa53329cd7a63b49877c0babb49b19b7e5af29807b793a76bdb1d362b4"; \ - elif [ "${UNAME_M}" = "s390x" ]; then \ - MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-s390x.sh"; \ - SHA256SUM="e5e5e89cdcef9332fe632cd25d318cf71f681eef029a24495c713b18e66a8018"; \ - elif [ "${UNAME_M}" = "aarch64" ]; then \ - MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-aarch64.sh"; \ - SHA256SUM="00c7127a8a8d3f4b9c2ab3391c661239d5b9a88eafe895fd0f3f2a8d9c0f4556"; \ - elif [ "${UNAME_M}" = "ppc64le" ]; then \ - MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-ppc64le.sh"; \ - SHA256SUM="8ee1f8d17ef7c8cb08a85f7d858b1cb55866c06fcf7545b98c3b82e4d0277e66"; \ - fi && \ - wget "${MINICONDA_URL}" -O miniconda.sh -q && \ - echo "${SHA256SUM} miniconda.sh" > shasum && \ - if [ "${CONDA_VERSION}" != "latest" ]; then sha256sum --check --status shasum; fi && \ - mkdir -p /opt && \ - sh miniconda.sh -b -p /opt/conda && \ - rm miniconda.sh shasum && \ - ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ - echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ - echo "conda activate base" >> ~/.bashrc && \ - find /opt/conda/ -follow -type f -name '*.a' -delete && \ - find /opt/conda/ -follow -type f -name '*.js.map' -delete && \ - /opt/conda/bin/conda clean -afy - COPY --from=envd /usr/bin/envd-ssh /var/envd/bin/envd-ssh diff --git a/base-images/python3.9-ubuntu20.04.Dockerfile b/base-images/python3.9-ubuntu20.04.Dockerfile index b9e4a283e..3813e2fc5 100644 --- a/base-images/python3.9-ubuntu20.04.Dockerfile +++ b/base-images/python3.9-ubuntu20.04.Dockerfile @@ -32,35 +32,4 @@ RUN apt-get update && \ # prompt && curl --proto '=https' --tlsv1.2 -sSf https://starship.rs/install.sh | sh -s -- -y -# Leave these args here to better use the Docker build cache -ARG CONDA_VERSION=py39_4.11.0 - -RUN set -x && \ - UNAME_M="$(uname -m)" && \ - if [ "${UNAME_M}" = "x86_64" ]; then \ - MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-x86_64.sh"; \ - SHA256SUM="4ee9c3aa53329cd7a63b49877c0babb49b19b7e5af29807b793a76bdb1d362b4"; \ - elif [ "${UNAME_M}" = "s390x" ]; then \ - MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-s390x.sh"; \ - SHA256SUM="e5e5e89cdcef9332fe632cd25d318cf71f681eef029a24495c713b18e66a8018"; \ - elif [ "${UNAME_M}" = "aarch64" ]; then \ - MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-aarch64.sh"; \ - SHA256SUM="00c7127a8a8d3f4b9c2ab3391c661239d5b9a88eafe895fd0f3f2a8d9c0f4556"; \ - elif [ "${UNAME_M}" = "ppc64le" ]; then \ - MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-ppc64le.sh"; \ - SHA256SUM="8ee1f8d17ef7c8cb08a85f7d858b1cb55866c06fcf7545b98c3b82e4d0277e66"; \ - fi && \ - wget "${MINICONDA_URL}" -O miniconda.sh -q && \ - echo "${SHA256SUM} miniconda.sh" > shasum && \ - if [ "${CONDA_VERSION}" != "latest" ]; then sha256sum --check --status shasum; fi && \ - mkdir -p /opt && \ - sh miniconda.sh -b -p /opt/conda && \ - rm miniconda.sh shasum && \ - ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ - echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ - echo "conda activate base" >> ~/.bashrc && \ - find /opt/conda/ -follow -type f -name '*.a' -delete && \ - find /opt/conda/ -follow -type f -name '*.js.map' -delete && \ - /opt/conda/bin/conda clean -afy - COPY --from=envd /usr/bin/envd-ssh /var/envd/bin/envd-ssh diff --git a/base-images/remote-cache/build-and-push-remote-cache.sh b/base-images/remote-cache/build-and-push-remote-cache.sh new file mode 100755 index 000000000..a5878cf6a --- /dev/null +++ b/base-images/remote-cache/build-and-push-remote-cache.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +set -euo pipefail + +ROOT_DIR=`dirname $0` + +GIT_TAG_VERSION=$(git describe --tags --abbrev=0 | sed -r 's/[v]+//g') # remove v from version +ENVD_VERSION="${ENVD_VERSION:-$GIT_TAG_VERSION}" + +cd ${ROOT_DIR} + +envd build --export-cache type=registry,ref=docker.io/tensorchord/python-cache:3.9-envd-v${ENVD_VERSION} --force + +cd - > /dev/null diff --git a/base-images/remote-cache/build.envd b/base-images/remote-cache/build.envd new file mode 100644 index 000000000..3b22de80e --- /dev/null +++ b/base-images/remote-cache/build.envd @@ -0,0 +1,2 @@ +def build(): + base(os="ubuntu20.04", language="python3") diff --git a/e2e/e2e_helper.go b/e2e/e2e_helper.go index 07bdbc9ad..e5de41596 100644 --- a/e2e/e2e_helper.go +++ b/e2e/e2e_helper.go @@ -19,7 +19,7 @@ import ( "context" "strings" - "github.com/pkg/errors" + "github.com/cockroachdb/errors" "github.com/sirupsen/logrus" "github.com/tensorchord/envd/pkg/app" diff --git a/e2e/suite_test.go b/e2e/suite_test.go index 117fe6c32..003f97361 100644 --- a/e2e/suite_test.go +++ b/e2e/suite_test.go @@ -15,12 +15,19 @@ package e2e import ( + "os" "testing" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + + "github.com/tensorchord/envd/pkg/version" ) +func init() { + version.SetGitTagForE2ETest(os.Getenv("GIT_LATEST_TAG")) +} + func TestMain(t *testing.T) { RegisterFailHandler(Fail) RunSpecs(t, "envd Suite") diff --git a/examples/python-basic/build.envd b/examples/python-basic/build.envd index 3a236afbc..fb2f67c51 100644 --- a/examples/python-basic/build.envd +++ b/examples/python-basic/build.envd @@ -1,5 +1,5 @@ def build(): - base(os="ubuntu20.04", language="python3") + base(os="ubuntu20.04", language="python3.8") #config.pip_index(url = "https://pypi.tuna.tsinghua.edu.cn/simple") install.python_packages([ "via", diff --git a/pkg/app/version.go b/pkg/app/version.go index 53ad4a729..fe5a1cc99 100644 --- a/pkg/app/version.go +++ b/pkg/app/version.go @@ -16,9 +16,13 @@ package app import ( "fmt" + "strings" + "github.com/cockroachdb/errors" "github.com/urfave/cli/v2" + "github.com/tensorchord/envd/pkg/envd" + "github.com/tensorchord/envd/pkg/types" "github.com/tensorchord/envd/pkg/version" ) @@ -48,7 +52,7 @@ func printVersion(ctx *cli.Context) error { short := ctx.Bool("short") detail := ctx.Bool("detail") ver := version.GetVersion() - detailVer, err := version.GetDetailedVersion(ctx) + detailVer, err := getDetailedVersion(ctx) fmt.Printf("envd: %s\n", ver) if short { return nil @@ -78,3 +82,48 @@ func printVersion(ctx *cli.Context) error { } return nil } + +func getDetailedVersion(clicontext *cli.Context) (detailedVersion, error) { + engine, err := envd.New(clicontext.Context) + if err != nil { + return detailedVersion{}, errors.Wrap( + err, "failed to create engine for docker server", + ) + } + + info, err := engine.GetInfo(clicontext.Context) + if err != nil { + return detailedVersion{}, errors.Wrap( + err, "failed to get detailed version info from docker server", + ) + } + + return detailedVersion{ + OSVersion: info.OSVersion, + OSType: info.OSType, + KernelVersion: info.KernelVersion, + DockerVersion: info.ServerVersion, + Architecture: info.Architecture, + DefaultRuntime: info.DefaultRuntime, + ContainerRuntimes: GetRuntimes(info), + }, nil +} + +type detailedVersion struct { + OSVersion string + OSType string + KernelVersion string + Architecture string + DockerVersion string + ContainerRuntimes string + DefaultRuntime string +} + +func GetRuntimes(info *types.EnvdInfo) string { + runtimesMap := info.Runtimes + keys := make([]string, 0, len(runtimesMap)) + for k := range runtimesMap { + keys = append(keys, k) + } + return "[" + strings.Join(keys, ",") + "]" +} diff --git a/pkg/builder/build.go b/pkg/builder/build.go index 568325f36..40d620c1b 100644 --- a/pkg/builder/build.go +++ b/pkg/builder/build.go @@ -25,31 +25,43 @@ import ( func (b generalBuilder) BuildFunc() func(ctx context.Context, c client.Client) (*client.Result, error) { return func(ctx context.Context, c client.Client) (*client.Result, error) { b.logger.Debug("running BuildFunc for envd") - def, err := b.compile(ctx) - if err != nil { - return nil, errors.Wrap(err, "failed to compile") - } - imageConfig, err := b.imageConfig(ctx) - if err != nil { - return nil, errors.Wrap(err, "failed to get labels") + sreq := client.SolveRequest{ + Definition: b.definition.ToPB(), } - sreq := client.SolveRequest{ - Definition: def.ToPB(), + // Get the envd default cache importer in docker.io/tensorchord/... + if defaultImporter, err := b.defaultCacheImporter(); err != nil { + return nil, errors.Wrap(err, "failed to get default importer") + } else if defaultImporter != nil { + b.logger.WithField("default-cache", *defaultImporter). + Debug("import remote cache") + ci, err := ParseImportCache([]string{*defaultImporter}) + if err != nil { + return nil, errors.Wrap(err, "failed to get the import cache") + } + sreq.CacheImports = append(sreq.CacheImports, ci...) } + + // Get the user-defined cache importer. if b.Options.ImportCache != "" { ci, err := ParseImportCache([]string{b.Options.ImportCache}) if err != nil { return nil, errors.Wrap(err, "failed to get the import cache") } - sreq.CacheImports = ci + sreq.CacheImports = append(sreq.CacheImports, ci...) } + res, err := c.Solve(ctx, sreq) if err != nil { return nil, errors.Wrap(err, "failed to solve") } + imageConfig, err := b.imageConfig(ctx) + if err != nil { + return nil, errors.Wrap(err, "failed to get labels") + } + res.AddMeta(exptypes.ExporterImageConfigKey, []byte(imageConfig)) b.logger.Debugf("setting image config: %s", imageConfig) diff --git a/pkg/builder/builder.go b/pkg/builder/builder.go index 7945cc56c..b6eeb0fbd 100644 --- a/pkg/builder/builder.go +++ b/pkg/builder/builder.go @@ -74,6 +74,8 @@ type generalBuilder struct { manifestCodeHash string entries []client.ExportEntry + definition *llb.Definition + logger *logrus.Entry starlark.Interpreter buildkitd.Client @@ -139,6 +141,13 @@ func (b generalBuilder) Build(ctx context.Context, force bool) error { if !force && !b.checkIfNeedBuild(ctx) { return nil } + + def, err := b.compile(ctx) + if err != nil { + return errors.Wrap(err, "failed to compile") + } + b.definition = def + pw, err := progresswriter.NewPrinter(ctx, os.Stdout, b.ProgressMode) if err != nil { return errors.Wrap(err, "failed to create progress writer") @@ -202,6 +211,13 @@ func (b generalBuilder) imageConfig(ctx context.Context) (string, error) { return data, nil } +func (b generalBuilder) defaultCacheImporter() (*string, error) { + if ir.DefaultGraph != nil { + return ir.DefaultGraph.DefaultCacheImporter() + } + return nil, nil +} + func (b generalBuilder) build(ctx context.Context, pw progresswriter.Writer) error { b.logger.Debug("building envd image") ce, err := ParseExportCache([]string{b.ExportCache}, nil) diff --git a/pkg/lang/ir/compile.go b/pkg/lang/ir/compile.go index 6f03b79f8..4f4d3d974 100644 --- a/pkg/lang/ir/compile.go +++ b/pkg/lang/ir/compile.go @@ -29,6 +29,7 @@ import ( "github.com/tensorchord/envd/pkg/progress/compileui" "github.com/tensorchord/envd/pkg/types" "github.com/tensorchord/envd/pkg/util/fileutil" + "github.com/tensorchord/envd/pkg/version" ) func NewGraph() *Graph { @@ -139,6 +140,26 @@ func (g Graph) ExposedPorts() (map[string]struct{}, error) { return ports, nil } +func (g Graph) DefaultCacheImporter() (*string, error) { + switch g.Language.Name { + case "python": + v, err := g.getAppropriatePythonVersion() + if err != nil { + return nil, errors.Wrap(err, "failed to get python version") + } + // We only support remote cache for 3.9 currently. + if v == "3.9" { + res := fmt.Sprintf( + "type=registry,ref=docker.io/tensorchord/python-cache:%s-envd-%s", + v, version.GetGitTagFromVersion()) + return &res, nil + } + return nil, nil + default: + return nil, nil + } +} + func (g Graph) Entrypoint(buildContextDir string) ([]string, error) { // Do not set entrypoint if the image is customized. if g.Image != nil { @@ -193,10 +214,12 @@ func (g Graph) Compile(uid, gid int) (llb.State, error) { }).Debug("compile LLB") // TODO(gaocegege): Support more OS and langs. - base := g.compileBase() + base, err := g.compileBase() + if err != nil { + return llb.State{}, errors.Wrap(err, "failed to get the base image") + } aptStage := g.compileUbuntuAPT(base) var merged llb.State - var err error // Use custom logic when image is specified. if g.Image != nil { merged, err = g.compileCustomPython(aptStage) diff --git a/pkg/lang/ir/conda.go b/pkg/lang/ir/conda.go index f0b137b22..2958ea82e 100644 --- a/pkg/lang/ir/conda.go +++ b/pkg/lang/ir/conda.go @@ -15,16 +15,23 @@ package ir import ( + _ "embed" "fmt" "strings" + "github.com/cockroachdb/errors" "github.com/moby/buildkit/client/llb" "github.com/sirupsen/logrus" ) const ( - condarc = "/home/envd/.condarc" - defaultVersion = "3.9" + condarc = "/home/envd/.condarc" + condaVersionDefault = "py39_4.11.0" +) + +var ( + //go:embed install-conda.sh + installCondaBash string ) func (g Graph) CondaEnabled() bool { @@ -80,7 +87,7 @@ func (g Graph) compileCondaPackages(root llb.State) llb.State { return run.Root() } -func (g Graph) compileCondaEnvironment(root llb.State) llb.State { +func (g Graph) compileCondaEnvironment(root llb.State) (llb.State, error) { root = llb.User("envd")(root) cacheDir := "/opt/conda/pkgs" @@ -95,17 +102,20 @@ func (g Graph) compileCondaEnvironment(root llb.State) llb.State { // Always init bash since we will use it to create jupyter notebook service. run := root.Run(llb.Shlex("bash -c \"/opt/conda/bin/conda init bash\""), llb.WithCustomName("[internal] initialize conda bash environment")) - pythonVersion, err := g.GetAppropriatePythonVersion() + pythonVersion, err := g.getAppropriatePythonVersion() if err != nil { - pythonVersion = defaultVersion + return llb.State{}, errors.Wrap(err, "failed to get python version") } + cmd := fmt.Sprintf( - "bash -c \"/opt/conda/bin/conda create -n envd python=%s\"", pythonVersion) + "bash -c \"/opt/conda/bin/conda create -n envd python=%s\"", + pythonVersion) // Create a conda environment. - run = run.Run(llb.Shlex(cmd), llb.WithCustomName("[internal] create conda environment")) - run.AddMount(cacheDir, cache, - llb.AsPersistentCacheDir(g.CacheID(cacheDir), llb.CacheMountShared), llb.SourcePath("/cache-conda")) + run = run.Run(llb.Shlex(cmd), + llb.WithCustomName("[internal] create conda environment")) + run.AddMount(cacheDir, cache, llb.AsPersistentCacheDir( + g.CacheID(cacheDir), llb.CacheMountShared), llb.SourcePath("/cache-conda")) switch g.Shell { case shellBASH: @@ -119,5 +129,15 @@ func (g Graph) compileCondaEnvironment(root llb.State) llb.State { llb.Shlex(`bash -c 'echo "source /opt/conda/bin/activate envd" >> /home/envd/.zshrc'`), llb.WithCustomName("[internal] add conda environment to zshrc")) } - return run.Root() + return run.Root(), nil +} + +func (g Graph) installConda(root llb.State) (llb.State, error) { + run := root.AddEnv("CONDA_VERSION", condaVersionDefault). + File(llb.Mkdir("/opt/conda", 0755, llb.WithParents(true), + llb.WithUIDGID(g.uid, g.gid)), + llb.WithCustomName("[internal] create conda directory")). + Run(llb.Shlex(fmt.Sprintf("bash -c '%s'", installCondaBash)), + llb.WithCustomName("[internal] install conda")) + return run.Root(), nil } diff --git a/pkg/lang/ir/install-conda.sh b/pkg/lang/ir/install-conda.sh new file mode 100644 index 000000000..1a04c5553 --- /dev/null +++ b/pkg/lang/ir/install-conda.sh @@ -0,0 +1,26 @@ +set -x && \ +UNAME_M="$(uname -m)" && \ +if [ "${UNAME_M}" = "x86_64" ]; then \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-x86_64.sh"; \ + SHA256SUM="4ee9c3aa53329cd7a63b49877c0babb49b19b7e5af29807b793a76bdb1d362b4"; \ +elif [ "${UNAME_M}" = "s390x" ]; then \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-s390x.sh"; \ + SHA256SUM="e5e5e89cdcef9332fe632cd25d318cf71f681eef029a24495c713b18e66a8018"; \ +elif [ "${UNAME_M}" = "aarch64" ]; then \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-aarch64.sh"; \ + SHA256SUM="00c7127a8a8d3f4b9c2ab3391c661239d5b9a88eafe895fd0f3f2a8d9c0f4556"; \ +elif [ "${UNAME_M}" = "ppc64le" ]; then \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-ppc64le.sh"; \ + SHA256SUM="8ee1f8d17ef7c8cb08a85f7d858b1cb55866c06fcf7545b98c3b82e4d0277e66"; \ +fi && \ +wget "${MINICONDA_URL}" -O /tmp/miniconda.sh && \ +echo "${SHA256SUM} /tmp/miniconda.sh" > /tmp/shasum && \ +if [ "${CONDA_VERSION}" != "latest" ]; then sha256sum --check --status /tmp/shasum; fi && \ +mkdir -p /opt && \ +sh /tmp/miniconda.sh -b -u -p /opt/conda && \ +rm /tmp/miniconda.sh /tmp/shasum && \ +echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ +echo "conda activate base" >> ~/.bashrc && \ +find /opt/conda/ -follow -type f -name '*.a' -delete && \ +find /opt/conda/ -follow -type f -name '*.js.map' -delete && \ +/opt/conda/bin/conda clean -afy diff --git a/pkg/lang/ir/python.go b/pkg/lang/ir/python.go index f6f97a20c..4ab5a24c2 100644 --- a/pkg/lang/ir/python.go +++ b/pkg/lang/ir/python.go @@ -24,10 +24,18 @@ import ( "github.com/sirupsen/logrus" ) -func (g Graph) GetAppropriatePythonVersion() (string, error) { +const ( + pythonVersionDefault = "3.9" +) + +func (g Graph) getAppropriatePythonVersion() (string, error) { + if g.Language.Version == nil { + return pythonVersionDefault, nil + } + version := *g.Language.Version if version == "3" || version == "" { - return defaultVersion, nil + return pythonVersionDefault, nil } if strings.HasPrefix(version, "3.") { return version, nil @@ -56,7 +64,10 @@ func (g Graph) compilePython(aptStage llb.State) (llb.State, error) { return llb.State{}, errors.Wrap(err, "failed to compile shell") } - condaEnvStage := g.compileCondaEnvironment(shellStage) + condaEnvStage, err := g.compileCondaEnvironment(shellStage) + if err != nil { + return llb.State{}, errors.Wrap(err, "failed to compile conda environment") + } condaStage := llb.Diff(builtinSystemStage, g.compileCondaPackages(condaEnvStage), diff --git a/pkg/lang/ir/system.go b/pkg/lang/ir/system.go index 9078c2419..59767cfc4 100644 --- a/pkg/lang/ir/system.go +++ b/pkg/lang/ir/system.go @@ -15,6 +15,7 @@ package ir import ( + _ "embed" "fmt" "os" "path/filepath" @@ -26,6 +27,7 @@ import ( "github.com/tensorchord/envd/pkg/config" "github.com/tensorchord/envd/pkg/flag" + "github.com/tensorchord/envd/pkg/version" ) func (g Graph) compileUbuntuAPT(root llb.State) llb.State { @@ -75,7 +77,9 @@ func (g Graph) compileCopy(root llb.State) llb.State { } func (g *Graph) compileCUDAPackages() llb.State { - root := llb.Image(fmt.Sprintf("docker.io/tensorchord/python:3.9-%s-cuda%s-cudnn%s", g.OS, *g.CUDA, *g.CUDNN)) + root := llb.Image(fmt.Sprintf( + "docker.io/tensorchord/python:3.9-%s-cuda%s-cudnn%s-envd-%s", + g.OS, *g.CUDA, *g.CUDNN, version.GetGitTagFromVersion())) return root } @@ -105,7 +109,7 @@ func (g Graph) compileSystemPackages(root llb.State) llb.State { return run.Root() } -func (g *Graph) compileBase() llb.State { +func (g *Graph) compileBase() (llb.State, error) { logger := logrus.WithFields(logrus.Fields{ "os": g.OS, "language": g.Language.Name, @@ -119,11 +123,12 @@ func (g *Graph) compileBase() llb.State { // Do not update user permission in the base image. if g.Image != nil { logger.WithField("image", *g.Image).Debugf("using custom base image") - return llb.Image(*g.Image) + return llb.Image(*g.Image), nil } else if g.CUDA == nil && g.CUDNN == nil { switch g.Language.Name { case "r": - base = llb.Image("docker.io/tensorchord/r-base:4.2") + base = llb.Image(fmt.Sprintf("docker.io/tensorchord/r-base:4.2-envd-%s", + version.GetGitTagFromVersion())) // r-base image already has GID 1000. // It is a trick, we actually use GID 1000 if g.gid == 1000 { @@ -133,17 +138,28 @@ func (g *Graph) compileBase() llb.State { g.uid = 1001 } case "python": - base = llb.Image("docker.io/tensorchord/python:3.9-ubuntu20.04") + base = llb.Image(fmt.Sprintf( + "docker.io/tensorchord/python:3.9-ubuntu20.04-envd-%s", + version.GetGitTagFromVersion())) case "julia": - base = llb.Image("docker.io/tensorchord/julia:1.8rc1-ubuntu20.04") + base = llb.Image(fmt.Sprintf( + "docker.io/tensorchord/julia:1.8rc1-ubuntu20.04-envd-%s", + version.GetGitTagFromVersion())) } } else { base = g.compileCUDAPackages() } var res llb.ExecState + + // Install conda first. + condaStage, err := g.installConda(base) + if err != nil { + return llb.State{}, errors.Wrap(err, "failed to install conda") + } + // TODO(gaocegege): Refactor user to a separate stage. if g.uid == 0 { - res = base. + res = condaStage. Run(llb.Shlex(fmt.Sprintf("groupadd -g %d envd", 1001)), llb.WithCustomName("[internal] still create group envd for root context")). Run(llb.Shlex(fmt.Sprintf("useradd -p \"\" -u %d -g envd -s /bin/sh -m envd", 1001)), @@ -159,7 +175,7 @@ func (g *Graph) compileBase() llb.State { Run(llb.Shlex("chown -R root:root /opt/conda"), llb.WithCustomName("[internal] configure user permissions")) } else { - res = base. + res = condaStage. Run(llb.Shlex(fmt.Sprintf("groupadd -g %d envd", g.gid)), llb.WithCustomName("[internal] create user group envd")). Run(llb.Shlex(fmt.Sprintf("useradd -p \"\" -u %d -g envd -s /bin/sh -m envd", g.uid)), @@ -171,7 +187,7 @@ func (g *Graph) compileBase() llb.State { Run(llb.Shlex("chown -R envd:envd /opt/conda"), llb.WithCustomName("[internal] configure user permissions")) } - return llb.User("envd")(res.Root()) + return llb.User("envd")(res.Root()), nil } func (g Graph) copySSHKey(root llb.State) (llb.State, error) { diff --git a/pkg/lang/ir/util.go b/pkg/lang/ir/util.go index b216ae937..a2f3f269b 100644 --- a/pkg/lang/ir/util.go +++ b/pkg/lang/ir/util.go @@ -29,6 +29,7 @@ func parseLanguage(l string) (string, *string, error) { return "", nil, errors.New("language is required") } + // Get version from the string. re := regexp.MustCompile(`\d[\d,]*[\.]?[\d{2}]*[\.]?[\d{2}]*`) if !re.MatchString(l) { language = l diff --git a/pkg/version/version.go b/pkg/version/version.go index 963da294c..136fd881d 100644 --- a/pkg/version/version.go +++ b/pkg/version/version.go @@ -24,12 +24,6 @@ import ( "runtime" "strings" "sync" - - "github.com/cockroachdb/errors" - "github.com/urfave/cli/v2" - - "github.com/tensorchord/envd/pkg/envd" - "github.com/tensorchord/envd/pkg/types" ) var ( @@ -40,11 +34,12 @@ var ( // the program at linking time. Revision = "" - version = "0.0.0+unknown" - buildDate = "1970-01-01T00:00:00Z" // output from `date -u +'%Y-%m-%dT%H:%M:%SZ'` - gitCommit = "" // output from `git rev-parse HEAD` - gitTag = "" // output from `git describe --exact-match --tags HEAD` (if clean tree state) - gitTreeState = "" // determined from `git status --porcelain`. either 'clean' or 'dirty' + version = "0.0.0+unknown" + buildDate = "1970-01-01T00:00:00Z" // output from `date -u +'%Y-%m-%dT%H:%M:%SZ'` + gitCommit = "" // output from `git rev-parse HEAD` + gitTag = "" // output from `git describe --exact-match --tags HEAD` (if clean tree state) + gitTreeState = "" // determined from `git status --porcelain`. either 'clean' or 'dirty' + developmentFlag = "false" ) // Version contains envd version information @@ -59,25 +54,29 @@ type Version struct { Platform string } -type DetailedVersion struct { - OSVersion string - OSType string - KernelVersion string - Architecture string - DockerVersion string - ContainerRuntimes string - DefaultRuntime string -} - func (v Version) String() string { return v.Version } +// GetGitTagFromVersion gets the git tag. +func GetGitTagFromVersion() string { + if gitTag != "" { + return gitTag + } + return "" +} + +// SetGitTagForE2ETest sets the gitTag for test purpose. +func SetGitTagForE2ETest(tag string) { + gitTag = tag +} + // GetEnvdVersion gets Envd version information func GetEnvdVersion() string { var versionStr string - if gitCommit != "" && gitTag != "" && gitTreeState == "clean" { + if gitCommit != "" && gitTag != "" && + gitTreeState == "clean" && developmentFlag == "false" { // if we have a clean tree state and the current commit is tagged, // this is an official release. versionStr = gitTag @@ -101,15 +100,6 @@ func GetEnvdVersion() string { return versionStr } -func GetRuntimes(info *types.EnvdInfo) string { - runtimesMap := info.Runtimes - keys := make([]string, 0, len(runtimesMap)) - for k := range runtimesMap { - keys = append(keys, k) - } - return "[" + strings.Join(keys, ",") + "]" -} - // GetVersion returns the version information func GetVersion() Version { return Version{ @@ -124,32 +114,6 @@ func GetVersion() Version { } } -func GetDetailedVersion(clicontext *cli.Context) (DetailedVersion, error) { - engine, err := envd.New(clicontext.Context) - if err != nil { - return DetailedVersion{}, errors.Wrap( - err, "failed to create engine for docker server", - ) - } - - info, err := engine.GetInfo(clicontext.Context) - if err != nil { - return DetailedVersion{}, errors.Wrap( - err, "failed to get detailed version info from docker server", - ) - } - - return DetailedVersion{ - OSVersion: info.OSVersion, - OSType: info.OSType, - KernelVersion: info.KernelVersion, - DockerVersion: info.ServerVersion, - Architecture: info.Architecture, - DefaultRuntime: info.DefaultRuntime, - ContainerRuntimes: GetRuntimes(info), - }, nil -} - var ( reRelease *regexp.Regexp reDev *regexp.Regexp