From f04ec20ac2367043fab6aa9919e989b61b5c454f Mon Sep 17 00:00:00 2001 From: Manu Seth <22492939+mseth10@users.noreply.github.com> Date: Fri, 26 Feb 2021 19:35:16 -0800 Subject: [PATCH] Fix nightly CD for python docker image releases (#19772) * install wget * test cd docker in ci * install docker * install python3-dev and gcc * remove docker testing from ci * remove python3-dev * ecr target * skip build test * adding back python3-dev for make * remove dynamic and pypi stages for testing * install build-essential * install zlib * update python version * update ld library path * install openssl * update test packages for python3.7 * remove call to deleted safe_docker_run.py * hardcode region for public ecr repo * use deadsnakes to install python * revert dependency change * refactor ecr login * update ecr repo jenkins global var * cleanup * update docker authentication * add ecr repo * add back pypi and tests * remove unused libmxnet pipeline * update cu112 base docker * update base docker images to ub18 --- cd/Jenkinsfile_cd_pipeline | 40 +++++-------- cd/Jenkinsfile_release_job | 7 +-- cd/README.md | 4 +- .../{static => }/Jenkins_pipeline.groovy | 0 cd/mxnet_lib/dynamic/Jenkins_pipeline.groovy | 58 ------------------- cd/python/docker/Dockerfile | 17 +++--- cd/python/docker/Dockerfile.test | 3 - cd/python/docker/python_images.sh | 18 +++--- cd/utils/mxnet_base_image.sh | 14 ++--- 9 files changed, 42 insertions(+), 119 deletions(-) rename cd/mxnet_lib/{static => }/Jenkins_pipeline.groovy (100%) delete mode 100644 cd/mxnet_lib/dynamic/Jenkins_pipeline.groovy diff --git a/cd/Jenkinsfile_cd_pipeline b/cd/Jenkinsfile_cd_pipeline index 36d1547e2adc..03f2fb12280e 100644 --- a/cd/Jenkinsfile_cd_pipeline +++ b/cd/Jenkinsfile_cd_pipeline @@ -55,33 +55,21 @@ pipeline { stage("MXNet Release") { steps { script { - cd_utils.error_checked_parallel([ - - "Static libmxnet based release": { - stage("Build") { - cd_utils.trigger_release_job(params.CD_RELEASE_JOB_NAME, "Build static libmxnet", "mxnet_lib/static", params.MXNET_VARIANTS) - } - stage("Releases") { - cd_utils.error_checked_parallel([ - "PyPI Release": { - echo "Building PyPI Release" - cd_utils.trigger_release_job(params.CD_RELEASE_JOB_NAME, "Release PyPI Packages", "python/pypi", params.MXNET_VARIANTS) - }, - "Python Docker Release": { - echo "Building Python Docker Release" - cd_utils.trigger_release_job(params.CD_RELEASE_JOB_NAME, "Release Python Docker Images", "python/docker", params.MXNET_VARIANTS) - } - ]) + stage("Build libmxnet") { + cd_utils.trigger_release_job(params.CD_RELEASE_JOB_NAME, "Build libmxnet", "mxnet_lib", params.MXNET_VARIANTS) + } + stage("Releases") { + cd_utils.error_checked_parallel([ + "PyPI Release": { + echo "Building PyPI Release" + cd_utils.trigger_release_job(params.CD_RELEASE_JOB_NAME, "Release PyPI Packages", "python/pypi", params.MXNET_VARIANTS) + }, + "Python Docker Release": { + echo "Building Python Docker Release" + cd_utils.trigger_release_job(params.CD_RELEASE_JOB_NAME, "Release Python Docker Images", "python/docker", params.MXNET_VARIANTS) } - }, - - "Dynamic libmxnet based release": { - stage("Build") { - cd_utils.trigger_release_job(params.CD_RELEASE_JOB_NAME, "Build dynamic libmxnet", "mxnet_lib/dynamic", params.MXNET_VARIANTS) - } - } - - ]) + ]) + } } } } diff --git a/cd/Jenkinsfile_release_job b/cd/Jenkinsfile_release_job index 064ed31ce41b..d86ecad9ebd9 100644 --- a/cd/Jenkinsfile_release_job +++ b/cd/Jenkinsfile_release_job @@ -42,8 +42,8 @@ pipeline { // Using string instead of choice parameter to keep the changes to the parameters minimal to avoid // any disruption caused by different COMMIT_ID values chaning the job parameter configuration on // Jenkins. - string(defaultValue: "mxnet_lib/static", description: "Pipeline to build", name: "RELEASE_JOB_TYPE") - string(defaultValue: "cpu,native,cu100,cu101,cu102,cu110,cu112", description: "Comma separated list of variants", name: "MXNET_VARIANTS") + string(defaultValue: "mxnet_lib", description: "Pipeline to build", name: "RELEASE_JOB_TYPE") + string(defaultValue: "cpu,native,cu101,cu102,cu110,cu112", description: "Comma separated list of variants", name: "MXNET_VARIANTS") booleanParam(defaultValue: false, description: 'Whether this is a release build or not', name: "RELEASE_BUILD") } @@ -90,8 +90,7 @@ pipeline { // Add new job types here def valid_job_types = [ - "mxnet_lib/static", - "mxnet_lib/dynamic", + "mxnet_lib", "python/pypi", "python/docker" ] diff --git a/cd/README.md b/cd/README.md index 0072c1c2652a..8735953d648c 100644 --- a/cd/README.md +++ b/cd/README.md @@ -60,7 +60,7 @@ The [release job](Jenkinsfile_release_job) takes five parameters: * **RELEASE\_JOB\_TYPE**: Defines the release pipeline you want to execute. * **COMMIT_ID**: The commit id to build -The release job executes, in parallel, the release pipeline for each of the variants (**MXNET_VARIANTS**) for the job type (**RELEASE\_JOB\_TYPE**). The job type the path to a directory (relative to the `cd` directory) that includes a `Jenkins_pipeline.groovy` file ([e.g.](mxnet_lib/static/Jenkins_pipeline.groovy)). +The release job executes, in parallel, the release pipeline for each of the variants (**MXNET_VARIANTS**) for the job type (**RELEASE\_JOB\_TYPE**). The job type the path to a directory (relative to the `cd` directory) that includes a `Jenkins_pipeline.groovy` file ([e.g.](mxnet_lib/Jenkins_pipeline.groovy)). NOTE: The **COMMIT_ID** is a little tricky and we must be very careful with it. It is necessary to ensure that the same commit is built through out the pipeline, but at the same time, it has the potential to change the current state of the release job configuration - specifically the parameter configuration. Any changes to this configuration will require a "dry-run" of the release job to ensure Jenkins has the current (master) version. This is acceptable as there will be few changes to the parameter configuration for the job, if any at all. But, it's something to keep in mind. @@ -192,4 +192,4 @@ def test(mxnet_variant) { Examples: -Both the [statically linked libmxnet](mxnet_lib/static/Jenkins_pipeline.groovy) and [dynamically linked libmxnet](mxnet_lib/dynamic/Jenkins_pipeline.groovy) pipelines have long running compilation and testing stages that **do not** require specialized/expensive hardware (e.g. GPUs). Therefore, as much as possible, it is important to run each stage in on its own node, and design the pipeline to spend the least amount of time possible on expensive hardware. E.g. for GPU builds, only run GPU tests on GPU instances, all other stages can be executed on CPU nodes. +The [libmxnet](mxnet_lib/Jenkins_pipeline.groovy) pipeline has long running compilation and testing stages that **do not** require specialized/expensive hardware (e.g. GPUs). Therefore, as much as possible, it is important to run each stage in on its own node, and design the pipeline to spend the least amount of time possible on expensive hardware. E.g. for GPU builds, only run GPU tests on GPU instances, all other stages can be executed on CPU nodes. diff --git a/cd/mxnet_lib/static/Jenkins_pipeline.groovy b/cd/mxnet_lib/Jenkins_pipeline.groovy similarity index 100% rename from cd/mxnet_lib/static/Jenkins_pipeline.groovy rename to cd/mxnet_lib/Jenkins_pipeline.groovy diff --git a/cd/mxnet_lib/dynamic/Jenkins_pipeline.groovy b/cd/mxnet_lib/dynamic/Jenkins_pipeline.groovy deleted file mode 100644 index 3cef8114c76c..000000000000 --- a/cd/mxnet_lib/dynamic/Jenkins_pipeline.groovy +++ /dev/null @@ -1,58 +0,0 @@ -// -*- mode: groovy -*- - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// -// Jenkins pipeline -// See documents at https://jenkins.io/doc/book/pipeline/jenkinsfile/ - -// NOTE: ci_utils is loaded by the originating Jenkins job, e.g. jenkins/Jenkinsfile_release_job - -// NOTE: the following variables are referenced in the mxnet_lib_pipeline jenkins file imported bellow -// libmxnet location -libmxnet = 'lib/libmxnet.so' - -// licenses -licenses = 'licenses/*' - -// libmxnet dependencies -mx_deps = '' -mx_native_deps = '' - -// library type -// either static or dynamic - depending on how it links to its dependencies -libtype = 'dynamic' - -libmxnet_pipeline = load('cd/mxnet_lib/mxnet_lib_pipeline.groovy') - -// Builds the dynamic binary for the specified mxnet variant -def build(mxnet_variant) { - node(NODE_LINUX_CPU) { - ws("workspace/mxnet_${libtype}/${mxnet_variant}/${env.BUILD_NUMBER}") { - def image = libmxnet_pipeline.get_environment(mxnet_variant) - ci_utils.init_git() - ci_utils.docker_run(image, "build_dynamic_libmxnet ${mxnet_variant}", false) - ci_utils.pack_lib("mxnet_${mxnet_variant}", libmxnet_pipeline.get_stash(mxnet_variant)) - } - } -} - -def get_pipeline(mxnet_variant) { - return libmxnet_pipeline.get_pipeline(mxnet_variant, this.&build) -} - -return this diff --git a/cd/python/docker/Dockerfile b/cd/python/docker/Dockerfile index c7a13a490991..194fda1e2a2c 100644 --- a/cd/python/docker/Dockerfile +++ b/cd/python/docker/Dockerfile @@ -23,15 +23,14 @@ ARG BASE_IMAGE FROM ${BASE_IMAGE} -RUN apt-get update || true -RUN apt-get install -y software-properties-common -RUN add-apt-repository -y ppa:deadsnakes/ppa -RUN apt-get update || true -RUN apt-get install -y python3.7-dev python3.7-distutils virtualenv wget -RUN ln -sf /usr/bin/python3.7 /usr/local/bin/python3 - -RUN wget -nv https://bootstrap.pypa.io/get-pip.py -RUN python3 get-pip.py +RUN apt-get update && \ + apt-get install -y software-properties-common && \ + add-apt-repository -y ppa:deadsnakes/ppa && \ + apt-get update && \ + apt-get install -y python3.7-dev python3.7-distutils virtualenv wget && \ + ln -sf /usr/bin/python3.7 /usr/local/bin/python3 && \ + wget -nv https://bootstrap.pypa.io/get-pip.py && \ + python3 get-pip.py ARG MXNET_COMMIT_ID ENV MXNET_COMMIT_ID=${MXNET_COMMIT_ID} diff --git a/cd/python/docker/Dockerfile.test b/cd/python/docker/Dockerfile.test index bed059d0fc73..3349e938cd75 100644 --- a/cd/python/docker/Dockerfile.test +++ b/cd/python/docker/Dockerfile.test @@ -23,9 +23,6 @@ ARG BASE_IMAGE FROM ${BASE_IMAGE} -# Install test dependencies -RUN pip install nose - ARG USER_ID=1001 ARG GROUP_ID=1001 diff --git a/cd/python/docker/python_images.sh b/cd/python/docker/python_images.sh index a93d5785d73e..9ed7573c0848 100755 --- a/cd/python/docker/python_images.sh +++ b/cd/python/docker/python_images.sh @@ -23,7 +23,7 @@ set -xe -usage="Usage: python_images.sh MXNET-VARIANT" +usage="Usage: python_images.sh MXNET-VARIANT" command=${1:?$usage} mxnet_variant=${2:?$usage} @@ -39,8 +39,8 @@ image_name="${repository}:${main_tag}" resources_path='cd/python/docker' -if [ ! -z "${RELEASE_DOCKERHUB_REPOSITORY}" ]; then - image_name="${RELEASE_DOCKERHUB_REPOSITORY}/${image_name}" +if [ ! -z "${RELEASE_PUBLIC_ECR_REPOSITORY}" ]; then + image_name="${RELEASE_PUBLIC_ECR_REPOSITORY}/${image_name}" fi build() { @@ -57,18 +57,16 @@ test() { # Ensure the correct context root is passed in when building - Dockerfile.test expects ci directory docker build -t "${test_image_name}" --build-arg USER_ID=`id -u` --build-arg GROUP_ID=`id -g` --build-arg BASE_IMAGE="${image_name}" -f ${resources_path}/Dockerfile.test ./ci - ./ci/safe_docker_run.py ${runtime_param} --cap-add "SYS_PTRACE" -u `id -u`:`id -g` -v `pwd`:/work/mxnet "${test_image_name}" ${resources_path}/test_python_image.sh "${mxnet_variant}" } push() { - if [ -z "${RELEASE_DOCKERHUB_REPOSITORY}" ]; then - echo "Cannot publish image without RELEASE_DOCKERHUB_REPOSITORY environment variable being set." + if [ -z "${RELEASE_PUBLIC_ECR_REPOSITORY}" ]; then + echo "Cannot publish image without RELEASE_PUBLIC_ECR_REPOSITORY environment variable being set." exit 1 fi - # The secret name env var is set in the Jenkins configuration - # Manage Jenkins -> Configure System - ./${ci_utils}/docker_login.py --secret-name "${RELEASE_DOCKERHUB_SECRET_NAME}" + # Retrieve an authentication token and authenticate Docker client to registry + aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/w6z5f7h2 # Push image docker push "${image_name}" @@ -76,7 +74,7 @@ push() { # Iterate over remaining tags, if any for ((i=1;i<${#docker_tags[@]};i++)); do local docker_tag="${docker_tags[${i}]}" - local latest_image_name="${RELEASE_DOCKERHUB_REPOSITORY}/${repository}:${docker_tag}_py3" + local latest_image_name="${RELEASE_PUBLIC_ECR_REPOSITORY}/${repository}:${docker_tag}_py3" docker tag "${image_name}" "${latest_image_name}" docker push "${latest_image_name}" diff --git a/cd/utils/mxnet_base_image.sh b/cd/utils/mxnet_base_image.sh index 3af6869bd915..5632fe8d024a 100755 --- a/cd/utils/mxnet_base_image.sh +++ b/cd/utils/mxnet_base_image.sh @@ -22,25 +22,25 @@ mxnet_variant=${1:?"Please specify the mxnet variant as the first parameter"} case ${mxnet_variant} in cu100*) - echo "nvidia/cuda:10.0-cudnn7-runtime-ubuntu16.04" + echo "nvidia/cuda:10.0-cudnn7-runtime-ubuntu18.04" ;; cu101*) - echo "nvidia/cuda:10.1-cudnn7-runtime-ubuntu16.04" + echo "nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04" ;; cu102*) - echo "nvidia/cuda:10.2-cudnn7-runtime-ubuntu16.04" + echo "nvidia/cuda:10.2-cudnn7-runtime-ubuntu18.04" ;; cu110*) - echo "nvidia/cuda:11.0-cudnn8-runtime-ubuntu16.04" + echo "nvidia/cuda:11.0-cudnn8-runtime-ubuntu18.04" ;; cu112*) - echo "nvidia/cuda:11.2.1-cudnn8-runtime-ubuntu16.04" + echo "nvidia/cuda:11.2.1-cudnn8-runtime-ubuntu18.04" ;; cpu) - echo "ubuntu:16.04" + echo "ubuntu:18.04" ;; native) - echo "ubuntu:16.04" + echo "ubuntu:18.04" ;; *) echo "Error: Unrecognized mxnet-variant: '${mxnet_variant}'"