From dd5ba20f962fc71868f7321233f9128acac360d0 Mon Sep 17 00:00:00 2001 From: Dustin Dorroh Date: Thu, 25 Jul 2019 03:26:45 -0700 Subject: [PATCH 1/3] Update to cuda to 10.0 and tensorflow 1.14 for docker install --- Dockerfile | 2 +- components/cuda/docker-compose.cuda.yml | 2 +- components/cuda/install-cuda10-0.sh | 43 +++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 2 deletions(-) create mode 100755 components/cuda/install-cuda10-0.sh diff --git a/Dockerfile b/Dockerfile index a4c0e655bcf2..a13b87e84cd7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -125,7 +125,7 @@ RUN apt-get update && \ ARG CUDA_SUPPORT ENV CUDA_SUPPORT=${CUDA_SUPPORT} RUN if [ "$CUDA_SUPPORT" = "yes" ]; then \ - /tmp/components/cuda/install.sh; \ + /tmp/components/cuda/install-cuda-10-0.sh; \ fi # TODO: CHANGE URL diff --git a/components/cuda/docker-compose.cuda.yml b/components/cuda/docker-compose.cuda.yml index 66445f12437c..6c1076bd83dc 100644 --- a/components/cuda/docker-compose.cuda.yml +++ b/components/cuda/docker-compose.cuda.yml @@ -15,4 +15,4 @@ services: environment: NVIDIA_VISIBLE_DEVICES: all NVIDIA_DRIVER_CAPABILITIES: compute,utility - NVIDIA_REQUIRE_CUDA: "cuda>=9.0" + NVIDIA_REQUIRE_CUDA: "cuda>=10.0 brand=tesla,driver>=384,driver<385 brand=tesla,driver>=410,driver<411" diff --git a/components/cuda/install-cuda10-0.sh b/components/cuda/install-cuda10-0.sh new file mode 100755 index 000000000000..a56bf9648493 --- /dev/null +++ b/components/cuda/install-cuda10-0.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# +# cuda 10.0 base - https://gitlab.com/nvidia/cuda/blob/ubuntu16.04/10.0/base/Dockerfile +# cuda 10.0 runtime - https://gitlab.com/nvidia/cuda/blob/ubuntu16.04/10.0/runtime/Dockerfile +# cudnn7 - https://gitlab.com/nvidia/cuda/blob/ubuntu16.04/10.0/runtime/cudnn7/Dockerfile +# +# +set -e + +apt-get update && apt-get install -y --no-install-recommends ca-certificates apt-transport-https gnupg-curl && \ + rm -rf /var/lib/apt/lists/* && \ + NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \ + NVIDIA_GPGKEY_FPR=ae09fe4bbd223a84b2ccfce3f60f4b3d7fa2af80 && \ + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub && \ + apt-key adv --export --no-emit-version -a ${NVIDIA_GPGKEY_FPR} | tail -n +5 > cudasign.pub && \ + echo "${NVIDIA_GPGKEY_SUM} cudasign.pub" | sha256sum -c --strict - && rm cudasign.pub && \ + echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \ + echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list + +CUDA_VERSION=10.0.130 +NCCL_VERSION=2.4.2 +CUDNN_VERSION=7.6.0.64 +CUDA_PKG_VERSION="10-0=${CUDA_VERSION}-1" +echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf +echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf +echo 'export PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}' >> ${HOME}/.bashrc +echo 'export LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:${LD_LIBRARY_PATH}' >> ${HOME}/.bashrc + +# For libraries in the cuda-compat-* package: https://docs.nvidia.com/cuda/eula/index.html#attachment-a +apt-get update && apt-get install -y --no-install-recommends \ + cuda-cudart-${CUDA_PKG_VERSION} \ + cuda-compat-10-0 \ + cuda-libraries-${CUDA_PKG_VERSION} \ + cuda-nvtx-${CUDA_PKG_VERSION} \ + libnccl2=${NCCL_VERSION}-1+cuda10.0 \ + libcudnn7=${CUDNN_VERSION}-1+cuda10.0 && \ + apt-mark hold libnccl2 libcudnn7 && \ + ln -s cuda-10.0 /usr/local/cuda && \ + rm -rf /var/lib/apt/lists/* \ + /etc/apt/sources.list.d/cuda.list /etc/apt/sources.list.d/nvidia-ml.list + +pip3 uninstall -y tensorflow +pip3 install --no-cache-dir tensorflow-gpu==1.14.0 From 3f1f7e94917028fe1f04ac9f7cb814cb78f66b9e Mon Sep 17 00:00:00 2001 From: Dustin Dorroh Date: Tue, 3 Sep 2019 13:39:55 -0700 Subject: [PATCH 2/3] Update Dockerfile --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index a13b87e84cd7..a4c0e655bcf2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -125,7 +125,7 @@ RUN apt-get update && \ ARG CUDA_SUPPORT ENV CUDA_SUPPORT=${CUDA_SUPPORT} RUN if [ "$CUDA_SUPPORT" = "yes" ]; then \ - /tmp/components/cuda/install-cuda-10-0.sh; \ + /tmp/components/cuda/install.sh; \ fi # TODO: CHANGE URL From 4be81b170f9ae43033f4dd5d9f831ab41ec359fd Mon Sep 17 00:00:00 2001 From: Dustin Dorroh Date: Tue, 3 Sep 2019 13:42:02 -0700 Subject: [PATCH 3/3] renamed: components/cuda/install-cuda10-0.sh -> components/cuda/install.sh --- components/cuda/install-cuda10-0.sh | 43 --------------------- components/cuda/install.sh | 58 ++++++++++++++++------------- 2 files changed, 32 insertions(+), 69 deletions(-) delete mode 100755 components/cuda/install-cuda10-0.sh diff --git a/components/cuda/install-cuda10-0.sh b/components/cuda/install-cuda10-0.sh deleted file mode 100755 index a56bf9648493..000000000000 --- a/components/cuda/install-cuda10-0.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env bash -# -# cuda 10.0 base - https://gitlab.com/nvidia/cuda/blob/ubuntu16.04/10.0/base/Dockerfile -# cuda 10.0 runtime - https://gitlab.com/nvidia/cuda/blob/ubuntu16.04/10.0/runtime/Dockerfile -# cudnn7 - https://gitlab.com/nvidia/cuda/blob/ubuntu16.04/10.0/runtime/cudnn7/Dockerfile -# -# -set -e - -apt-get update && apt-get install -y --no-install-recommends ca-certificates apt-transport-https gnupg-curl && \ - rm -rf /var/lib/apt/lists/* && \ - NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \ - NVIDIA_GPGKEY_FPR=ae09fe4bbd223a84b2ccfce3f60f4b3d7fa2af80 && \ - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub && \ - apt-key adv --export --no-emit-version -a ${NVIDIA_GPGKEY_FPR} | tail -n +5 > cudasign.pub && \ - echo "${NVIDIA_GPGKEY_SUM} cudasign.pub" | sha256sum -c --strict - && rm cudasign.pub && \ - echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \ - echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list - -CUDA_VERSION=10.0.130 -NCCL_VERSION=2.4.2 -CUDNN_VERSION=7.6.0.64 -CUDA_PKG_VERSION="10-0=${CUDA_VERSION}-1" -echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf -echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf -echo 'export PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}' >> ${HOME}/.bashrc -echo 'export LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:${LD_LIBRARY_PATH}' >> ${HOME}/.bashrc - -# For libraries in the cuda-compat-* package: https://docs.nvidia.com/cuda/eula/index.html#attachment-a -apt-get update && apt-get install -y --no-install-recommends \ - cuda-cudart-${CUDA_PKG_VERSION} \ - cuda-compat-10-0 \ - cuda-libraries-${CUDA_PKG_VERSION} \ - cuda-nvtx-${CUDA_PKG_VERSION} \ - libnccl2=${NCCL_VERSION}-1+cuda10.0 \ - libcudnn7=${CUDNN_VERSION}-1+cuda10.0 && \ - apt-mark hold libnccl2 libcudnn7 && \ - ln -s cuda-10.0 /usr/local/cuda && \ - rm -rf /var/lib/apt/lists/* \ - /etc/apt/sources.list.d/cuda.list /etc/apt/sources.list.d/nvidia-ml.list - -pip3 uninstall -y tensorflow -pip3 install --no-cache-dir tensorflow-gpu==1.14.0 diff --git a/components/cuda/install.sh b/components/cuda/install.sh index 2cda99fcb769..a56bf9648493 100755 --- a/components/cuda/install.sh +++ b/components/cuda/install.sh @@ -1,37 +1,43 @@ -#!/bin/bash +#!/usr/bin/env bash # -# Copyright (C) 2018 Intel Corporation +# cuda 10.0 base - https://gitlab.com/nvidia/cuda/blob/ubuntu16.04/10.0/base/Dockerfile +# cuda 10.0 runtime - https://gitlab.com/nvidia/cuda/blob/ubuntu16.04/10.0/runtime/Dockerfile +# cudnn7 - https://gitlab.com/nvidia/cuda/blob/ubuntu16.04/10.0/runtime/cudnn7/Dockerfile # -# SPDX-License-Identifier: MIT # set -e -NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \ -NVIDIA_GPGKEY_FPR=ae09fe4bbd223a84b2ccfce3f60f4b3d7fa2af80 && \ -apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub && \ -apt-key adv --export --no-emit-version -a $NVIDIA_GPGKEY_FPR | tail -n +5 > cudasign.pub && \ -echo "$NVIDIA_GPGKEY_SUM cudasign.pub" | sha256sum -c --strict - && rm cudasign.pub && \ -echo "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \ -echo "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list +apt-get update && apt-get install -y --no-install-recommends ca-certificates apt-transport-https gnupg-curl && \ + rm -rf /var/lib/apt/lists/* && \ + NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \ + NVIDIA_GPGKEY_FPR=ae09fe4bbd223a84b2ccfce3f60f4b3d7fa2af80 && \ + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub && \ + apt-key adv --export --no-emit-version -a ${NVIDIA_GPGKEY_FPR} | tail -n +5 > cudasign.pub && \ + echo "${NVIDIA_GPGKEY_SUM} cudasign.pub" | sha256sum -c --strict - && rm cudasign.pub && \ + echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \ + echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list -CUDA_VERSION=9.0.176 -NCCL_VERSION=2.1.15 -CUDNN_VERSION=7.6.2.24 -CUDA_PKG_VERSION="9-0=${CUDA_VERSION}-1" +CUDA_VERSION=10.0.130 +NCCL_VERSION=2.4.2 +CUDNN_VERSION=7.6.0.64 +CUDA_PKG_VERSION="10-0=${CUDA_VERSION}-1" +echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf +echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf echo 'export PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}' >> ${HOME}/.bashrc echo 'export LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:${LD_LIBRARY_PATH}' >> ${HOME}/.bashrc -apt-get update && apt-get install -y --no-install-recommends --allow-unauthenticated \ - libprotobuf-dev \ - libprotoc-dev \ - protobuf-compiler \ - cuda-cudart-$CUDA_PKG_VERSION \ - cuda-libraries-$CUDA_PKG_VERSION \ - libnccl2=$NCCL_VERSION-1+cuda9.0 \ - libcudnn7=$CUDNN_VERSION-1+cuda9.0 && \ - ln -s cuda-9.0 /usr/local/cuda && \ -rm -rf /var/lib/apt/lists/* \ - /etc/apt/sources.list.d/nvidia-ml.list /etc/apt/sources.list.d/cuda.list +# For libraries in the cuda-compat-* package: https://docs.nvidia.com/cuda/eula/index.html#attachment-a +apt-get update && apt-get install -y --no-install-recommends \ + cuda-cudart-${CUDA_PKG_VERSION} \ + cuda-compat-10-0 \ + cuda-libraries-${CUDA_PKG_VERSION} \ + cuda-nvtx-${CUDA_PKG_VERSION} \ + libnccl2=${NCCL_VERSION}-1+cuda10.0 \ + libcudnn7=${CUDNN_VERSION}-1+cuda10.0 && \ + apt-mark hold libnccl2 libcudnn7 && \ + ln -s cuda-10.0 /usr/local/cuda && \ + rm -rf /var/lib/apt/lists/* \ + /etc/apt/sources.list.d/cuda.list /etc/apt/sources.list.d/nvidia-ml.list pip3 uninstall -y tensorflow -pip3 install --no-cache-dir tensorflow-gpu==1.12.3 +pip3 install --no-cache-dir tensorflow-gpu==1.14.0