diff --git a/README.md b/README.md index cd6c2864f500..8b837114ceb5 100644 --- a/README.md +++ b/README.md @@ -308,11 +308,11 @@ docker run --gpus all -it -v $PWD:/workspace/project --network=host --shm-size 1 Available pre-built images are : -- `pytorchignite/base:latest | pytorchignite/hvd-base:latest` +- `pytorchignite/base:latest | pytorchignite/hvd-base:latest | pytorchignite/msdp-apex-base:latest` - `pytorchignite/apex:latest | pytorchignite/hvd-apex:latest` -- `pytorchignite/vision:latest | pytorchignite/hvd-vision:latest` +- `pytorchignite/vision:latest | pytorchignite/hvd-vision:latest | pytorchignite/msdp-apex-vision:latest` - `pytorchignite/apex-vision:latest | pytorchignite/hvd-apex-vision:latest` -- `pytorchignite/nlp:latest | pytorchignite/hvd-nlp:latest` +- `pytorchignite/nlp:latest | pytorchignite/hvd-nlp:latest | pytorchignite/msdp-apex-nlp:latest` - `pytorchignite/apex-nlp:latest | pytorchignite/hvd-apex-nlp:latest` For more details, see [here](docker). diff --git a/docker/README.md b/docker/README.md index 905ad794575e..658d9bdb47b3 100644 --- a/docker/README.md +++ b/docker/README.md @@ -50,7 +50,13 @@ Available Tensor Operations: * `docker pull pytorchignite/hvd-apex-vision:latest` - [hvd/Dockerfile.hvd-apex-nlp](hvd/Dockerfile.hvd-apex-nlp): base Horovod apex with useful NLP libraries * `docker pull pytorchignite/hvd-apex-nlp:latest` - +- [msdp/Dockerfile.msdp-apex-base](msdp/Dockerfile.msdp-apex-base): multi-stage MSDeepSpeed build with latest Pytorch, Ignite image with minimal dependencies + * `docker pull pytorchignite/msdp-base:latest` +- [msdp/Dockerfile.msdp-apex-vision](msdp/Dockerfile.msdp-apex-vision): base MSDeepSpeed build with useful computer vision libraries + * `docker pull pytorchignite/msdp-vision:latest` +- [msdp/Dockerfile.msdp-apex-nlp](msdp/Dockerfile.msdp-apex-nlp): base MSDeepSpeed build with useful NLP libraries + * `docker pull pytorchignite/msdp-nlp:latest` + ## How to use ```bash diff --git a/docker/msdp/Dockerfile.msdp-apex-base b/docker/msdp/Dockerfile.msdp-apex-base new file mode 100644 index 000000000000..7fc238115f67 --- /dev/null +++ b/docker/msdp/Dockerfile.msdp-apex-base @@ -0,0 +1,70 @@ +# Multi-stage build +# 1/Building apex with pytorch:1.6.0-cuda10.1-cudnn7-devel +FROM pytorch/pytorch:1.6.0-cuda10.1-cudnn7-devel AS msdp-builder + +ARG ARG_TORCH_CUDA_ARCH_LIST="6.0;6.1;6.2;7.0;7.5" +ENV TORCH_CUDA_ARCH_LIST=$ARG_TORCH_CUDA_ARCH_LIST + +# Install git +RUN apt-get update && apt-get install -y --no-install-recommends git && \ + rm -rf /var/lib/apt/lists/* + +# For pip --use-feature option +RUN python -m pip install --upgrade pip + +# From https://github.com/microsoft/DeepSpeed/blob/master/docker/Dockerfile +############################################################################## +# DeepSpeed +############################################################################## +RUN git clone https://github.com/microsoft/DeepSpeed.git /tmp/DeepSpeed +RUN cd /tmp/DeepSpeed && \ + git checkout . && \ + git checkout master && \ + ./install.sh --allow_sudo + +# Build runtime image +FROM pytorch/pytorch:1.6.0-cuda10.1-cudnn7-runtime + +# For building cpufeature wheel +RUN apt-get update && apt-get install -y --no-install-recommends g++ gcc + +# Apex +COPY --from=msdp-builder /tmp/DeepSpeed/third_party/apex/dist/apex-*.whl /apex/ +RUN cd /apex && \ + pip install --no-cache-dir apex-*.whl && \ + rm -fr /apex + +# MSDeepSpeed +COPY --from=msdp-builder /tmp/DeepSpeed/dist/deepspeed-*.whl /msdp/ +RUN cd /msdp && \ + pip install --no-cache-dir deepspeed-*.whl && \ + rm -fr /msdp + +# Install tzdata / git +RUN apt-get update && \ + ln -fs /usr/share/zoneinfo/America/New_York /etc/localtime && \ + apt-get install -y tzdata && \ + dpkg-reconfigure --frontend noninteractive tzdata && \ + apt-get -y install --no-install-recommends git && \ + rm -rf /var/lib/apt/lists/* + +# Ignite main dependencies +RUN pip install --upgrade --no-cache-dir pytorch-ignite \ + tensorboard \ + tqdm +# Checkout Ignite examples only +RUN mkdir -p pytorch-ignite-examples && \ + cd pytorch-ignite-examples && \ + git init && \ + git config core.sparsecheckout true && \ + echo examples >> .git/info/sparse-checkout && \ + git remote add -f origin https://github.com/pytorch/ignite.git && \ + git pull origin master + +WORKDIR /workspace + +ENTRYPOINT ["/bin/bash"] + + + + diff --git a/docker/msdp/Dockerfile.msdp-apex-nlp b/docker/msdp/Dockerfile.msdp-apex-nlp new file mode 100644 index 000000000000..bb4f118ec817 --- /dev/null +++ b/docker/msdp/Dockerfile.msdp-apex-nlp @@ -0,0 +1,10 @@ +# Dockerfile.msdp-apex-nlp +FROM pytorchignite/msdp-apex-base:latest + +# Ignite NLP dependencies +RUN pip install --upgrade --no-cache-dir torchtext \ + transformers \ + spacy \ + nltk + +ENTRYPOINT ["/bin/bash"] diff --git a/docker/msdp/Dockerfile.msdp-apex-vision b/docker/msdp/Dockerfile.msdp-apex-vision new file mode 100644 index 000000000000..c4d640e47172 --- /dev/null +++ b/docker/msdp/Dockerfile.msdp-apex-vision @@ -0,0 +1,21 @@ +# Dockerfile.msdp-apex-vision +FROM pytorchignite/msdp-apex-base:latest + +# Install opencv dependencies +RUN apt-get update && \ + apt-get -y install --no-install-recommends libglib2.0 \ + libsm6 \ + libxext6 \ + libxrender-dev && \ + rm -rf /var/lib/apt/lists/* + +# Ignite vision dependencies +RUN pip install --upgrade --no-cache-dir albumentations \ + image-dataset-viz \ + numpy \ + opencv-python \ + py_config_runner \ + pillow \ + "trains>=0.15.0" + +ENTRYPOINT ["/bin/bash"] diff --git a/docs/source/index.rst b/docs/source/index.rst index 18be28e46afd..661a4d1bbe90 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -78,11 +78,11 @@ Pull a pre-built docker image from `our Docker Hub `_.