Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[v1.x] Add new CI pipeline for building and testing with cuda 11.0. #19149

Merged
merged 12 commits into from
Sep 17, 2020
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions ci/docker/Dockerfile.build.ubuntu_build_cuda110
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# -*- mode: dockerfile -*-
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Dockerfile to build MXNet on Ubuntu 16.04 for GPU but on
# a CPU-only instance. This restriction is caused by the CPP-
# package generation, requiring the actual CUDA library to be
# present

FROM nvidia/cuda:11.0-cudnn8-devel-ubuntu16.04

WORKDIR /work/deps

COPY install/ubuntu_core.sh /work/
RUN /work/ubuntu_core.sh
COPY install/deb_ubuntu_ccache.sh /work/
RUN /work/deb_ubuntu_ccache.sh
COPY install/ubuntu_python.sh /work/
COPY install/requirements /work/
RUN /work/ubuntu_python.sh

# Keep this at the end since this command is not cachable
ARG USER_ID=0
ARG GROUP_ID=0
COPY install/ubuntu_adduser.sh /work/
RUN /work/ubuntu_adduser.sh

COPY runtime_functions.sh /work/

WORKDIR /work/mxnet
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/compat
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I remember facing issues with cuda/compat directory while migrating from G3 to G4. Do we need this? @ptrendx @leezu confirm.
It was only TVM OP which needed that right? And since we are disabling it we shouldn't need compat dir.

Correct me if wrong.

Copy link
Contributor

@samskalicky samskalicky Sep 15, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pretty sure we need this:

/usr/local/cuda-11.0/compat/libcuda.so.450.51.06
/usr/local/cuda-11.0/compat/libcuda.so
/usr/local/cuda-11.0/compat/libcuda.so.1

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without this, the cpp-package fails to build (due to unable to find libcuda.so.1, which libmxnet.so is linked against.)

I could also disable the cpp-package portion of the build, since it's actually not being used in the test pipeline steps.


44 changes: 44 additions & 0 deletions ci/docker/Dockerfile.build.ubuntu_gpu_cu110
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# -*- mode: dockerfile -*-
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Dockerfile to run MXNet on Ubuntu 16.04 for GPU

FROM nvidia/cuda:11.0-cudnn8-devel-ubuntu16.04

WORKDIR /work/deps

COPY install/ubuntu_core.sh /work/
RUN /work/ubuntu_core.sh

COPY install/deb_ubuntu_ccache.sh /work/
RUN /work/deb_ubuntu_ccache.sh

COPY install/ubuntu_python.sh /work/
COPY install/requirements /work/
RUN /work/ubuntu_python.sh

# Always last
ARG USER_ID=0
ARG GROUP_ID=0
COPY install/ubuntu_adduser.sh /work/
RUN /work/ubuntu_adduser.sh

COPY runtime_functions.sh /work/

WORKDIR /work/mxnet
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/compat
32 changes: 21 additions & 11 deletions ci/docker/install/ubuntu_cudnn.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,31 +32,41 @@ fi
apt-get update || true

case ${CUDA_VERSION} in
11\.0*)
export libcudnn_package="libcudnn8"
export libcudnn_version="${CUDNN_VERSION}-1+cuda11.0"
export libcudnn_dev_version="${CUDNN_VERSION}-1+cuda11.0"
;;
10\.2*)
export libcudnn7_version="${CUDNN_VERSION}-1+cuda10.2"
export libcudnn7_dev_version="${CUDNN_VERSION}-1+cuda10.2"
export libcudnn_package="libcudnn7"
export libcudnn_version="${CUDNN_VERSION}-1+cuda10.2"
export libcudnn_dev_version="${CUDNN_VERSION}-1+cuda10.2"
;;
10\.1*)
export libcudnn7_version="${CUDNN_VERSION}-1+cuda10.1"
export libcudnn7_dev_version="${CUDNN_VERSION}-1+cuda10.1"
export libcudnn_package="libcudnn7"
export libcudnn_version="${CUDNN_VERSION}-1+cuda10.1"
export libcudnn_dev_version="${CUDNN_VERSION}-1+cuda10.1"
;;
10\.0*)
export libcudnn7_version="${CUDNN_VERSION}-1+cuda10.0"
export libcudnn7_dev_version="${CUDNN_VERSION}-1+cuda10.0"
export libcudnn_package="libcudnn7"
export libcudnn_version="${CUDNN_VERSION}-1+cuda10.0"
export libcudnn_dev_version="${CUDNN_VERSION}-1+cuda10.0"
;;
9\.0*)
export libcudnn7_version="${CUDNN_VERSION}-1+cuda9.0"
export libcudnn7_dev_version="${CUDNN_VERSION}-1+cuda9.0"
export libcudnn_package="libcudnn7"
export libcudnn_version="${CUDNN_VERSION}-1+cuda9.0"
export libcudnn_dev_version="${CUDNN_VERSION}-1+cuda9.0"
;;
9\.2*)
export libcudnn7_version="${CUDNN_VERSION}-1+cuda9.2"
export libcudnn7_dev_version="${CUDNN_VERSION}-1+cuda9.2"
export libcudnn_package="libcudnn7"
export libcudnn_version="${CUDNN_VERSION}-1+cuda9.2"
export libcudnn_dev_version="${CUDNN_VERSION}-1+cuda9.2"
;;
*)
echo "Unsupported CUDA version ${CUDA_VERSION}"
exit 1
;;
esac

apt-get install -y --allow-downgrades libcudnn7=${libcudnn7_version} libcudnn7-dev=${libcudnn7_dev_version}
apt-get install -y --allow-downgrades ${libcudnn_package}=${libcudnn_version} ${libcudnn_package}-dev=${libcudnn_dev_version}

30 changes: 30 additions & 0 deletions ci/docker/runtime_functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -806,6 +806,24 @@ build_ubuntu_gpu_cuda101_cudnn7() {
make cython PYTHON=python3
}

build_ubuntu_gpu_cuda110_cudnn8() {
set -ex
build_ccache_wrappers
make \
USE_BLAS=openblas \
USE_MKLDNN=0 \
josephevans marked this conversation as resolved.
Show resolved Hide resolved
USE_CUDA=1 \
USE_CUDA_PATH=/usr/local/cuda \
USE_CUDNN=1 \
USE_TVM_OP=0 \
USE_CPP_PACKAGE=1 \
USE_DIST_KVSTORE=1 \
CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \
josephevans marked this conversation as resolved.
Show resolved Hide resolved
USE_SIGNAL_HANDLER=1 \
-j$(nproc)
make cython PYTHON=python3
}

build_ubuntu_gpu_cuda101_cudnn7_mkldnn_cpp_test() {
set -ex
build_ccache_wrappers
Expand Down Expand Up @@ -1083,6 +1101,18 @@ unittest_ubuntu_python3_quantization_gpu() {
nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_quantization_gpu.xml --verbose tests/python/quantization_gpu
}

unittest_ubuntu_python3_quantization_gpu_cu110() {
set -ex
export PYTHONPATH=./python/
export MXNET_MKLDNN_DEBUG=0 # Ignored if not present
export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
export MXNET_SUBGRAPH_VERBOSE=0
export CUDNN_VERSION=${CUDNN_VERSION:-8.0.33}
export MXNET_ENABLE_CYTHON=0
export DMLC_LOG_STACK_TRACE_DEPTH=10
nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_quantization_gpu.xml --verbose tests/python/quantization_gpu
}

unittest_centos7_cpu_scala() {
set -ex
cd /work/mxnet
Expand Down
48 changes: 48 additions & 0 deletions ci/jenkins/Jenkins_steps.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,20 @@ def compile_unix_full_gpu() {
}]
}

def compile_unix_full_gpu_cu110() {
return ['GPU: CUDA11.0+cuDNN8': {
node(NODE_LINUX_CPU) {
ws('workspace/build-gpu') {
timeout(time: max_time, unit: 'MINUTES') {
utils.init_git()
utils.docker_run('ubuntu_build_cuda110', 'build_ubuntu_gpu_cuda110_cudnn8', false)
utils.pack_lib('gpu_cu110', mx_lib_cpp_examples)
}
}
}
}]
}

def compile_unix_full_gpu_mkldnn_cpp_test() {
return ['GPU: CUDA10.1+cuDNN7+MKLDNN+CPPTEST': {
node(NODE_LINUX_CPU) {
Expand Down Expand Up @@ -737,6 +751,22 @@ def test_unix_python3_gpu() {
}]
}

def test_unix_python3_gpu_cu110() {
return ['Python3+CUDA11.0: GPU': {
node(NODE_LINUX_GPU_G4) {
ws('workspace/ut-python3-gpu') {
try {
utils.unpack_and_init('gpu_cu110', mx_lib_cython)
python3_gpu_ut_cython('ubuntu_gpu_cu110')
utils.publish_test_coverage()
} finally {
utils.collect_test_results_unix('nosetests_gpu.xml', 'nosetests_python3_gpu.xml')
}
}
}
}]
}

def test_unix_python3_quantize_gpu() {
return ['Python3: Quantize GPU': {
node(NODE_LINUX_GPU_P3) {
Expand All @@ -755,6 +785,24 @@ def test_unix_python3_quantize_gpu() {
}]
}

def test_unix_python3_quantize_gpu_cu110() {
return ['Python3+CUDA11.0: Quantize GPU': {
node(NODE_LINUX_GPU_P3) {
ws('workspace/ut-python3-quantize-gpu') {
timeout(time: max_time, unit: 'MINUTES') {
try {
utils.unpack_and_init('gpu_cu110', mx_lib)
utils.docker_run('ubuntu_gpu_cu110', 'unittest_ubuntu_python3_quantization_gpu', true)
utils.publish_test_coverage()
} finally {
utils.collect_test_results_unix('nosetests_quantization_gpu.xml', 'nosetests_python3_quantize_gpu.xml')
}
}
}
}
}]
}

def test_unix_python3_debug_cpu() {
return ['Python3: CPU debug': {
node(NODE_LINUX_CPU) {
Expand Down
1 change: 1 addition & 0 deletions ci/jenkins/Jenkinsfile_full
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def buildJobs = [
'miscellaneous',
'unix-cpu',
'unix-gpu',
'unix-gpu-cu110',
'website',
'windows-cpu',
'windows-gpu'
Expand Down
52 changes: 52 additions & 0 deletions ci/jenkins/Jenkinsfile_unix_gpu_cu110
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// -*- mode: groovy -*-

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// Jenkins pipeline
// See documents at https://jenkins.io/doc/book/pipeline/jenkinsfile/

// timeout in minutes
max_time = 180

node('utility') {
// Loading the utilities requires a node context unfortunately
checkout scm
utils = load('ci/Jenkinsfile_utils.groovy')
custom_steps = load('ci/jenkins/Jenkins_steps.groovy')
}
utils.assign_node_labels(utility: 'utility', linux_cpu: 'mxnetlinux-cpu', linux_gpu: 'mxnetlinux-gpu', linux_gpu_p3: 'mxnetlinux-gpu-p3', linux_gpu_g4: 'mxnetlinux-gpu-g4')

utils.main_wrapper(
core_logic: {
utils.parallel_stage('Build', [
custom_steps.compile_unix_full_gpu_cu110()
])

utils.parallel_stage('Tests', [
custom_steps.test_unix_python3_gpu_cu110(),
custom_steps.test_unix_python3_quantize_gpu_cu110()
])
}
,
failure_handler: {
// Only send email if master or release branches failed
if (currentBuild.result == "FAILURE" && (env.BRANCH_NAME == "master" || env.BRANCH_NAME.startsWith("v"))) {
emailext body: 'Build for MXNet branch ${BRANCH_NAME} has broken. Please view the build at ${BUILD_URL}', replyTo: '${EMAIL}', subject: '[BUILD FAILED] Branch ${BRANCH_NAME} build ${BUILD_NUMBER}', to: '${EMAIL}'
}
}
)