From 3b53057565e86878c1f499f5c8e25459bbb1e45e Mon Sep 17 00:00:00 2001 From: FrAnCOisCokELaER Date: Thu, 17 Sep 2020 22:21:33 +0200 Subject: [PATCH 1/2] Docker for users with DeepSpeed - msdp-base | vision | nlp --- README.md | 6 +-- docker/README.md | 8 +++- docker/msdp/Dockerfile.msdp-base | 70 ++++++++++++++++++++++++++++++ docker/msdp/Dockerfile.msdp-nlp | 10 +++++ docker/msdp/Dockerfile.msdp-vision | 21 +++++++++ docs/source/index.rst | 6 +-- 6 files changed, 114 insertions(+), 7 deletions(-) create mode 100644 docker/msdp/Dockerfile.msdp-base create mode 100644 docker/msdp/Dockerfile.msdp-nlp create mode 100644 docker/msdp/Dockerfile.msdp-vision diff --git a/README.md b/README.md index cd6c2864f500..7e0107cb391c 100644 --- a/README.md +++ b/README.md @@ -308,11 +308,11 @@ docker run --gpus all -it -v $PWD:/workspace/project --network=host --shm-size 1 Available pre-built images are : -- `pytorchignite/base:latest | pytorchignite/hvd-base:latest` +- `pytorchignite/base:latest | pytorchignite/hvd-base:latest | pytorchignite/msdp-base:latest` - `pytorchignite/apex:latest | pytorchignite/hvd-apex:latest` -- `pytorchignite/vision:latest | pytorchignite/hvd-vision:latest` +- `pytorchignite/vision:latest | pytorchignite/hvd-vision:latest | pytorchignite/msdp-vision:latest` - `pytorchignite/apex-vision:latest | pytorchignite/hvd-apex-vision:latest` -- `pytorchignite/nlp:latest | pytorchignite/hvd-nlp:latest` +- `pytorchignite/nlp:latest | pytorchignite/hvd-nlp:latest | pytorchignite/msdp-nlp:latest` - `pytorchignite/apex-nlp:latest | pytorchignite/hvd-apex-nlp:latest` For more details, see [here](docker). diff --git a/docker/README.md b/docker/README.md index 905ad794575e..40bc79b8f068 100644 --- a/docker/README.md +++ b/docker/README.md @@ -50,7 +50,13 @@ Available Tensor Operations: * `docker pull pytorchignite/hvd-apex-vision:latest` - [hvd/Dockerfile.hvd-apex-nlp](hvd/Dockerfile.hvd-apex-nlp): base Horovod apex with useful NLP libraries * `docker pull pytorchignite/hvd-apex-nlp:latest` - +- [msdp/Dockerfile.msdp-base](msdp/Dockerfile.msdp-base): multi-stage MSDeepSpeed build with latest Pytorch, Ignite image with minimal dependencies + * `docker pull pytorchignite/msdp-base:latest` +- [msdp/Dockerfile.msdp-vision](msdp/Dockerfile.msdp-vision): base MSDeepSpeed build with useful computer vision libraries + * `docker pull pytorchignite/msdp-vision:latest` +- [msdp/Dockerfile.msdp-nlp](msdp/Dockerfile.msdp-nlp): base MSDeepSpeed build with useful NLP libraries + * `docker pull pytorchignite/msdp-nlp:latest` + ## How to use ```bash diff --git a/docker/msdp/Dockerfile.msdp-base b/docker/msdp/Dockerfile.msdp-base new file mode 100644 index 000000000000..7fc238115f67 --- /dev/null +++ b/docker/msdp/Dockerfile.msdp-base @@ -0,0 +1,70 @@ +# Multi-stage build +# 1/Building apex with pytorch:1.6.0-cuda10.1-cudnn7-devel +FROM pytorch/pytorch:1.6.0-cuda10.1-cudnn7-devel AS msdp-builder + +ARG ARG_TORCH_CUDA_ARCH_LIST="6.0;6.1;6.2;7.0;7.5" +ENV TORCH_CUDA_ARCH_LIST=$ARG_TORCH_CUDA_ARCH_LIST + +# Install git +RUN apt-get update && apt-get install -y --no-install-recommends git && \ + rm -rf /var/lib/apt/lists/* + +# For pip --use-feature option +RUN python -m pip install --upgrade pip + +# From https://github.com/microsoft/DeepSpeed/blob/master/docker/Dockerfile +############################################################################## +# DeepSpeed +############################################################################## +RUN git clone https://github.com/microsoft/DeepSpeed.git /tmp/DeepSpeed +RUN cd /tmp/DeepSpeed && \ + git checkout . && \ + git checkout master && \ + ./install.sh --allow_sudo + +# Build runtime image +FROM pytorch/pytorch:1.6.0-cuda10.1-cudnn7-runtime + +# For building cpufeature wheel +RUN apt-get update && apt-get install -y --no-install-recommends g++ gcc + +# Apex +COPY --from=msdp-builder /tmp/DeepSpeed/third_party/apex/dist/apex-*.whl /apex/ +RUN cd /apex && \ + pip install --no-cache-dir apex-*.whl && \ + rm -fr /apex + +# MSDeepSpeed +COPY --from=msdp-builder /tmp/DeepSpeed/dist/deepspeed-*.whl /msdp/ +RUN cd /msdp && \ + pip install --no-cache-dir deepspeed-*.whl && \ + rm -fr /msdp + +# Install tzdata / git +RUN apt-get update && \ + ln -fs /usr/share/zoneinfo/America/New_York /etc/localtime && \ + apt-get install -y tzdata && \ + dpkg-reconfigure --frontend noninteractive tzdata && \ + apt-get -y install --no-install-recommends git && \ + rm -rf /var/lib/apt/lists/* + +# Ignite main dependencies +RUN pip install --upgrade --no-cache-dir pytorch-ignite \ + tensorboard \ + tqdm +# Checkout Ignite examples only +RUN mkdir -p pytorch-ignite-examples && \ + cd pytorch-ignite-examples && \ + git init && \ + git config core.sparsecheckout true && \ + echo examples >> .git/info/sparse-checkout && \ + git remote add -f origin https://github.com/pytorch/ignite.git && \ + git pull origin master + +WORKDIR /workspace + +ENTRYPOINT ["/bin/bash"] + + + + diff --git a/docker/msdp/Dockerfile.msdp-nlp b/docker/msdp/Dockerfile.msdp-nlp new file mode 100644 index 000000000000..beda0d1e27cb --- /dev/null +++ b/docker/msdp/Dockerfile.msdp-nlp @@ -0,0 +1,10 @@ +# Dockerfile.msdp-nlp +FROM pytorchignite/msdp-base:latest + +# Ignite NLP dependencies +RUN pip install --upgrade --no-cache-dir torchtext \ + transformers \ + spacy \ + nltk + +ENTRYPOINT ["/bin/bash"] diff --git a/docker/msdp/Dockerfile.msdp-vision b/docker/msdp/Dockerfile.msdp-vision new file mode 100644 index 000000000000..00009b347ff8 --- /dev/null +++ b/docker/msdp/Dockerfile.msdp-vision @@ -0,0 +1,21 @@ +# Dockerfile.msdp-vision +FROM pytorchignite/msdp-base:latest + +# Install opencv dependencies +RUN apt-get update && \ + apt-get -y install --no-install-recommends libglib2.0 \ + libsm6 \ + libxext6 \ + libxrender-dev && \ + rm -rf /var/lib/apt/lists/* + +# Ignite vision dependencies +RUN pip install --upgrade --no-cache-dir albumentations \ + image-dataset-viz \ + numpy \ + opencv-python \ + py_config_runner \ + pillow \ + "trains>=0.15.0" + +ENTRYPOINT ["/bin/bash"] diff --git a/docs/source/index.rst b/docs/source/index.rst index 18be28e46afd..588d57c74140 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -78,11 +78,11 @@ Pull a pre-built docker image from `our Docker Hub `_. From 1935a6df97febe88d3135db6cab4117f413528f6 Mon Sep 17 00:00:00 2001 From: FrAnCOisCokELaER Date: Fri, 18 Sep 2020 14:47:59 +0200 Subject: [PATCH 2/2] Docker for users with DeepSpeed - rename images extensions to msdp-apex-* --- README.md | 6 +++--- docker/README.md | 6 +++--- .../{Dockerfile.msdp-base => Dockerfile.msdp-apex-base} | 0 .../msdp/{Dockerfile.msdp-nlp => Dockerfile.msdp-apex-nlp} | 4 ++-- .../{Dockerfile.msdp-vision => Dockerfile.msdp-apex-vision} | 4 ++-- docs/source/index.rst | 6 +++--- 6 files changed, 13 insertions(+), 13 deletions(-) rename docker/msdp/{Dockerfile.msdp-base => Dockerfile.msdp-apex-base} (100%) rename docker/msdp/{Dockerfile.msdp-nlp => Dockerfile.msdp-apex-nlp} (79%) rename docker/msdp/{Dockerfile.msdp-vision => Dockerfile.msdp-apex-vision} (91%) diff --git a/README.md b/README.md index 7e0107cb391c..8b837114ceb5 100644 --- a/README.md +++ b/README.md @@ -308,11 +308,11 @@ docker run --gpus all -it -v $PWD:/workspace/project --network=host --shm-size 1 Available pre-built images are : -- `pytorchignite/base:latest | pytorchignite/hvd-base:latest | pytorchignite/msdp-base:latest` +- `pytorchignite/base:latest | pytorchignite/hvd-base:latest | pytorchignite/msdp-apex-base:latest` - `pytorchignite/apex:latest | pytorchignite/hvd-apex:latest` -- `pytorchignite/vision:latest | pytorchignite/hvd-vision:latest | pytorchignite/msdp-vision:latest` +- `pytorchignite/vision:latest | pytorchignite/hvd-vision:latest | pytorchignite/msdp-apex-vision:latest` - `pytorchignite/apex-vision:latest | pytorchignite/hvd-apex-vision:latest` -- `pytorchignite/nlp:latest | pytorchignite/hvd-nlp:latest | pytorchignite/msdp-nlp:latest` +- `pytorchignite/nlp:latest | pytorchignite/hvd-nlp:latest | pytorchignite/msdp-apex-nlp:latest` - `pytorchignite/apex-nlp:latest | pytorchignite/hvd-apex-nlp:latest` For more details, see [here](docker). diff --git a/docker/README.md b/docker/README.md index 40bc79b8f068..658d9bdb47b3 100644 --- a/docker/README.md +++ b/docker/README.md @@ -50,11 +50,11 @@ Available Tensor Operations: * `docker pull pytorchignite/hvd-apex-vision:latest` - [hvd/Dockerfile.hvd-apex-nlp](hvd/Dockerfile.hvd-apex-nlp): base Horovod apex with useful NLP libraries * `docker pull pytorchignite/hvd-apex-nlp:latest` -- [msdp/Dockerfile.msdp-base](msdp/Dockerfile.msdp-base): multi-stage MSDeepSpeed build with latest Pytorch, Ignite image with minimal dependencies +- [msdp/Dockerfile.msdp-apex-base](msdp/Dockerfile.msdp-apex-base): multi-stage MSDeepSpeed build with latest Pytorch, Ignite image with minimal dependencies * `docker pull pytorchignite/msdp-base:latest` -- [msdp/Dockerfile.msdp-vision](msdp/Dockerfile.msdp-vision): base MSDeepSpeed build with useful computer vision libraries +- [msdp/Dockerfile.msdp-apex-vision](msdp/Dockerfile.msdp-apex-vision): base MSDeepSpeed build with useful computer vision libraries * `docker pull pytorchignite/msdp-vision:latest` -- [msdp/Dockerfile.msdp-nlp](msdp/Dockerfile.msdp-nlp): base MSDeepSpeed build with useful NLP libraries +- [msdp/Dockerfile.msdp-apex-nlp](msdp/Dockerfile.msdp-apex-nlp): base MSDeepSpeed build with useful NLP libraries * `docker pull pytorchignite/msdp-nlp:latest` ## How to use diff --git a/docker/msdp/Dockerfile.msdp-base b/docker/msdp/Dockerfile.msdp-apex-base similarity index 100% rename from docker/msdp/Dockerfile.msdp-base rename to docker/msdp/Dockerfile.msdp-apex-base diff --git a/docker/msdp/Dockerfile.msdp-nlp b/docker/msdp/Dockerfile.msdp-apex-nlp similarity index 79% rename from docker/msdp/Dockerfile.msdp-nlp rename to docker/msdp/Dockerfile.msdp-apex-nlp index beda0d1e27cb..bb4f118ec817 100644 --- a/docker/msdp/Dockerfile.msdp-nlp +++ b/docker/msdp/Dockerfile.msdp-apex-nlp @@ -1,5 +1,5 @@ -# Dockerfile.msdp-nlp -FROM pytorchignite/msdp-base:latest +# Dockerfile.msdp-apex-nlp +FROM pytorchignite/msdp-apex-base:latest # Ignite NLP dependencies RUN pip install --upgrade --no-cache-dir torchtext \ diff --git a/docker/msdp/Dockerfile.msdp-vision b/docker/msdp/Dockerfile.msdp-apex-vision similarity index 91% rename from docker/msdp/Dockerfile.msdp-vision rename to docker/msdp/Dockerfile.msdp-apex-vision index 00009b347ff8..c4d640e47172 100644 --- a/docker/msdp/Dockerfile.msdp-vision +++ b/docker/msdp/Dockerfile.msdp-apex-vision @@ -1,5 +1,5 @@ -# Dockerfile.msdp-vision -FROM pytorchignite/msdp-base:latest +# Dockerfile.msdp-apex-vision +FROM pytorchignite/msdp-apex-base:latest # Install opencv dependencies RUN apt-get update && \ diff --git a/docs/source/index.rst b/docs/source/index.rst index 588d57c74140..661a4d1bbe90 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -78,11 +78,11 @@ Pull a pre-built docker image from `our Docker Hub `_.