Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

21.11 DLFW #73

Merged
merged 14 commits into from
Dec 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 31 additions & 8 deletions ci/test_container.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@ set -e

container=$1

# Test NVTabular
##############
# Unit tests #
##############

## Test NVTabular - All containers
pytest /nvtabular/tests/unit

# Test HugeCTR
## Test HugeCTR - Training container
if [ "$container" == "merlin-training" ]; then
layers_test && \
# Running oom in blossom
# layers_test && \
checker_test && \
data_reader_test && \
device_map_test && \
Expand All @@ -18,11 +23,29 @@ if [ "$container" == "merlin-training" ]; then
model_oversubscriber_test && \
parser_test && \
auc_test
## Test Transformers4Rec - Tensorflow container
elif [ "$container" == "merlin-tensorflow-training" ]; then
pytest /transformers4rec/tests/tf
# Test Transformers4Rec - Pytorch container
elif [ "$container" == "merlin-pytorch-training" ]; then
pytest /transformers4rec/tests/torch
# Test HugeCTR & Transformers4Rec - Inference container
elif [ "$container" == "merlin-inference" ]; then
/usr/local/hugectr/bin/inference_test
# HugeCTR - Deactivated until it is self-contained and it runs
# inference_test
# Transformers4Rec
pytest /transformers4rec/tests
fi

# Test Transformers4Rec
if [ "$container" != "merlin-training" ]; then
sh -c 'pytest /transformers4rec/tests; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'
fi
#####################
# Integration tests #
#####################

## Test NVTabular
# /nvtabular/ci/test_integration.sh $container 0

## Test HugeCTR
# Waiting to sync integration tests with them

## Test Transformers4Rec
# Waiting for integration tests to be developed
22 changes: 15 additions & 7 deletions docker/dockerfile.ctr
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@ RUN apt update -y --fix-missing && \
apt upgrade -y && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
gdb \
graphviz \
valgrind \
zlib1g-dev lsb-release clang-format \
libboost-serialization-dev \
openssl \
libssl-dev \
protobuf-compiler \
libtbb-dev \
libspdlog-dev \
libaio-dev \
slapd && \
apt install -y --no-install-recommends software-properties-common && \
Expand All @@ -44,9 +44,17 @@ RUN apt update -y --fix-missing && \

RUN pip install git+git://github.com/gevent/gevent.git@21.8.0#egg=gevent

# Instal cmake from source
RUN apt remove --purge cmake -y && wget http://www.cmake.org/files/v3.21/cmake-3.21.1.tar.gz && \
tar xf cmake-3.21.1.tar.gz && cd cmake-3.21.1 && ./configure && make && make install

# Install spdlog from source
RUN git clone --branch v1.9.2 https://github.com/gabime/spdlog.git build-env && \
pushd build-env && \
mkdir build && cd build && cmake .. && make -j && make install && \
popd && \
rm -rf build-env

# Install arrow from source
ENV ARROW_HOME=/usr/local
RUN git clone --branch apache-arrow-4.0.1 --recurse-submodules https://github.com/apache/arrow.git build-env && \
Expand Down Expand Up @@ -116,6 +124,7 @@ RUN git clone https://github.com/rapidsai/cudf.git build-env && cd build-env/ &&
export CUDF_HOME=${PWD} && \
export CUDF_ROOT=${PWD}/cpp/build/ && \
export CMAKE_LIBRARY_PATH=${CUDA_CUDA_LIBRARY} && \
export CUDAFLAGS=-Wno-error=unknown-pragmas && \
./build.sh libcudf cudf dask_cudf --allgpuarch && \
protoc -I=python/cudf/cudf/utils/metadata --python_out=/usr/local/lib/python3.8/dist-packages/cudf/utils/metadata python/cudf/cudf/utils/metadata/orc_column_statistics.proto && \
popd && \
Expand Down Expand Up @@ -211,10 +220,9 @@ RUN rm -rf /usr/lib/x86_64-linux-gnu/libibverbs.so && \
ln -s /usr/lib/x86_64-linux-gnu/libibverbs.so.1.14.36.0 /usr/lib/x86_64-linux-gnu/libibverbs.so

# Install hugectr
RUN mkdir -p /var/tmp && cd /var/tmp && git clone https://github.com/NVIDIA-Merlin/HugeCTR.git HugeCTR && cd - && \
cd /var/tmp/HugeCTR && if [ "$RELEASE" == "true" ] && [ ${HUGECTR_VER} != "vnightly" ]; then git fetch --all --tags && git checkout tags/${HUGECTR_VER}; else git checkout master; fi && \
git submodule update --init --recursive
RUN cd /var/tmp/HugeCTR && \
RUN git clone https://github.com/NVIDIA-Merlin/HugeCTR.git /hugectr && \
cd /hugectr && if [ "$RELEASE" == "true" ] && [ ${HUGECTR_VER} != "vnightly" ]; then git fetch --all --tags && git checkout tags/${HUGECTR_VER}; else git checkout master; fi && \
git submodule update --init --recursive && \
mkdir build && cd build && \
LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs/:$LD_LIBRARY_PATH && \
export PATH=$PATH:/usr/local/cuda-${CUDA_SHORT_VERSION}/compat/ && \
Expand All @@ -223,9 +231,9 @@ RUN cd /var/tmp/HugeCTR && \
make -j$(nproc) && make install && \
chmod +x /usr/local/hugectr/bin/* && \
chmod +x /usr/local/hugectr/lib/* && \
cd /var/tmp/HugeCTR/onnx_converter && \
cd /hugectr/onnx_converter && \
python3 setup.py install && \
rm -rf /var/tmp/HugeCTR
rm -rf /hugectr/build

ENV PATH=/usr/local/hugectr/bin:$PATH
ENV LIBRARY_PATH=/usr/local/hugectr/lib:$LIBRARY_PATH
Expand Down
83 changes: 2 additions & 81 deletions docker/dockerfile.tf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# syntax=docker/dockerfile:1
ARG IMAGE=nvcr.io/nvidia/tensorflow:21.10-tf2-py3
ARG IMAGE=nvcr.io/nvidia/tensorflow:21.11-tf2-py3
FROM ${IMAGE} AS phase1
ENV CUDA_SHORT_VERSION=11.4

Expand All @@ -9,8 +9,6 @@ ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extra
ENV DEBIAN_FRONTEND=noninteractive

ARG RELEASE=false
ARG RMM_VER=vnightly
ARG CUDF_VER=vnightly
ARG NVTAB_VER=vnightly
ARG TF4REC_VER=vnightly
ARG HUGECTR_VER=vnightly
Expand All @@ -21,9 +19,6 @@ ENV CUDA_PATH=$CUDA_HOME
ENV CUDA_CUDA_LIBRARY=${CUDA_HOME}/lib64/stubs
ENV PATH=${CUDA_HOME}/lib64/:${PATH}:${CUDA_HOME}/bin

# Build env variables for rmm
ENV INSTALL_PREFIX=/usr

RUN apt update -y --fix-missing && \
apt upgrade -y && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
Expand Down Expand Up @@ -52,82 +47,8 @@ RUN git clone --branch v1.9.2 https://github.com/gabime/spdlog.git build-env &&
popd && \
rm -rf build-env

# Install arrow from source
ENV ARROW_HOME=/usr/local
RUN git clone --branch apache-arrow-4.0.1 --recurse-submodules https://github.com/apache/arrow.git build-env && \
pushd build-env && \
export PARQUET_TEST_DATA="${PWD}/cpp/submodules/parquet-testing/data" && \
export ARROW_TEST_DATA="${PWD}/testing/data" && \
pip install -r python/requirements-build.txt && \
mkdir cpp/release && \
pushd cpp/release && \
cmake -DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \
-DCMAKE_INSTALL_LIBDIR=lib \
-DCMAKE_LIBRARY_PATH=${CUDA_CUDA_LIBRARY} \
-DARROW_FLIGHT=ON \
-DARROW_GANDIVA=OFF \
-DARROW_ORC=ON \
-DARROW_WITH_BZ2=ON \
-DARROW_WITH_ZLIB=ON \
-DARROW_WITH_ZSTD=ON \
-DARROW_WITH_LZ4=ON \
-DARROW_WITH_SNAPPY=ON \
-DARROW_WITH_BROTLI=ON \
-DARROW_PARQUET=ON \
-DARROW_PYTHON=ON \
-DARROW_PLASMA=ON \
-DARROW_BUILD_TESTS=ON \
-DARROW_CUDA=ON \
-DARROW_DATASET=ON \
.. && \
make -j$(nproc) && \
make install && \
popd && \
pushd python && \
export PYARROW_WITH_PARQUET=ON && \
export PYARROW_WITH_CUDA=ON && \
export PYARROW_WITH_ORC=ON && \
export PYARROW_WITH_DATASET=ON && \
python setup.py build_ext --build-type=release bdist_wheel && \
pip install dist/*.whl && \
popd && \
popd && \
rm -rf build-env

FROM phase1 as phase2

ARG RELEASE=false
ARG RMM_VER=vnightly
ARG CUDF_VER=vnightly

# Install rmm from source
RUN git clone https://github.com/rapidsai/rmm.git build-env && cd build-env/ && \
if [ "$RELEASE" == "true" ] && [ ${RMM_VER} != "vnightly" ] ; then git fetch --all --tags && git checkout tags/${RMM_VER}; else git checkout branch-21.08; fi; \
sed -i '/11.2/ a "11.4": "11.x",' python/setup.py && \
cd ..; \
pushd build-env && \
./build.sh librmm && \
pip install python/. && \
popd && \
rm -rf build-env


# Build env for CUDF build
RUN git clone https://github.com/rapidsai/cudf.git build-env && cd build-env/ && \
if [ "$RELEASE" == "true" ] && [ ${CUDF_VER} != "vnightly" ] ; then git fetch --all --tags && git checkout tags/${CUDF_VER}; else git checkout branch-21.08; fi; \
git submodule update --init --recursive && \
cd .. && \
pushd build-env && \
export CUDF_HOME=${PWD} && \
export CUDF_ROOT=${PWD}/cpp/build/ && \
export CMAKE_LIBRARY_PATH=${CUDA_CUDA_LIBRARY} && \
./build.sh libcudf cudf dask_cudf --allgpuarch && \
protoc -I=python/cudf/cudf/utils/metadata --python_out=/usr/local/lib/python3.8/dist-packages/cudf/utils/metadata python/cudf/cudf/utils/metadata/orc_column_statistics.proto && \
popd && \
rm -rf build-env

FROM phase2 AS phase3

ARG RELEASE=false
ARG NVTAB_VER=vnightly
ARG TF4REC_VER=vnightly
Expand Down Expand Up @@ -167,7 +88,7 @@ RUN git clone https://github.com/rapidsai/asvdb.git build-env && \
rm -rf build-env

RUN pip install dask==2021.07.1 distributed==2021.07.1 dask[dataframe]==2021.07.1 dask-cuda
FROM phase3 as phase4
FROM phase2 as phase3

ARG RELEASE=false
ARG HUGECTR_VER=vnightly
Expand Down
75 changes: 1 addition & 74 deletions docker/dockerfile.torch
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ARG IMAGE=nvcr.io/nvidia/pytorch:21.10-py3
ARG IMAGE=nvcr.io/nvidia/pytorch:21.11-py3
FROM ${IMAGE}
ENV CUDA_SHORT_VERSION=11.4

Expand All @@ -7,8 +7,6 @@ ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extra

ENV DEBIAN_FRONTEND=noninteractive

ARG RMM_VER=vnightly
ARG CUDF_VER=vnightly
ARG NVTAB_VER=vnightly
ARG TF4REC_VER=vnightly
ARG RELEASE=false
Expand All @@ -18,9 +16,6 @@ ENV CUDA_PATH=$CUDA_HOME
ENV CUDA_CUDA_LIBRARY=${CUDA_HOME}/lib64/stubs
ENV PATH=${CUDA_HOME}/lib64/:${PATH}:${CUDA_HOME}/bin

# Build env variables for rmm
ENV INSTALL_PREFIX=/usr

RUN apt update -y --fix-missing && \
apt upgrade -y && \
apt install -y --no-install-recommends software-properties-common && \
Expand All @@ -34,7 +29,6 @@ RUN apt install -y --no-install-recommends \
autoconf \
bison \
flex \
libjemalloc-dev \
wget \
libssl-dev \
protobuf-compiler && \
Expand All @@ -57,73 +51,6 @@ RUN git clone --branch v1.9.2 https://github.com/gabime/spdlog.git build-env &&
popd && \
rm -rf build-env

# Install arrow from source
ENV ARROW_HOME=/usr/local
RUN git clone --branch apache-arrow-4.0.1 --recurse-submodules https://github.com/apache/arrow.git build-env && \
pushd build-env && \
export PARQUET_TEST_DATA="${PWD}/cpp/submodules/parquet-testing/data" && \
export ARROW_TEST_DATA="${PWD}/testing/data" && \
pip install -r python/requirements-build.txt && \
mkdir cpp/release && \
pushd cpp/release && \
cmake -DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \
-DCMAKE_INSTALL_LIBDIR=lib \
-DCMAKE_LIBRARY_PATH=${CUDA_CUDA_LIBRARY} \
-DARROW_FLIGHT=ON \
-DARROW_GANDIVA=OFF \
-DARROW_ORC=ON \
-DARROW_WITH_BZ2=ON \
-DARROW_WITH_ZLIB=ON \
-DARROW_WITH_ZSTD=ON \
-DARROW_WITH_LZ4=ON \
-DARROW_WITH_SNAPPY=ON \
-DARROW_WITH_BROTLI=ON \
-DARROW_PARQUET=ON \
-DARROW_PYTHON=ON \
-DARROW_PLASMA=ON \
-DARROW_BUILD_TESTS=ON \
-DARROW_CUDA=ON \
-DARROW_DATASET=ON \
.. && \
make -j$(nproc) && \
make install && \
popd && \
pushd python && \
export PYARROW_WITH_PARQUET=ON && \
export PYARROW_WITH_CUDA=ON && \
export PYARROW_WITH_ORC=ON && \
export PYARROW_WITH_DATASET=ON && \
python setup.py build_ext --build-type=release bdist_wheel && \
pip install dist/*.whl && \
popd && \
popd && \
rm -rf build-env

# Install rmm from source
RUN git clone https://github.com/rapidsai/rmm.git build-env && cd build-env/ && \
if [ "$RELEASE" == "true" ] && [ ${RMM_VER} != "vnightly" ] ; then git fetch --all --tags && git checkout tags/${RMM_VER}; else git checkout branch-21.08; fi; \
sed -i '/11.2/ a "11.4": "11.x",' python/setup.py && \
cd ..; \
pushd build-env && \
./build.sh librmm && \
pip install python/. && \
popd && \
rm -rf build-env

# Build env for CUDF build
RUN git clone https://github.com/rapidsai/cudf.git build-env && cd build-env/ && \
if [ "$RELEASE" == "true" ] && [ ${CUDF_VER} != "vnightly" ] ; then git fetch --all --tags && git checkout tags/${CUDF_VER}; else git checkout branch-21.08; fi; \
git submodule update --init --recursive && \
cd .. && \
pushd build-env && \
export CUDF_HOME=${PWD} && \
export CUDF_ROOT=${PWD}/cpp/build/ && \
export CMAKE_LIBRARY_PATH=${CUDA_CUDA_LIBRARY} && \
./build.sh libcudf cudf dask_cudf --allgpuarch && \
protoc -I=python/cudf/cudf/utils/metadata --python_out=/opt/conda/lib/python3.8/site-packages/cudf/utils/metadata python/cudf/cudf/utils/metadata/orc_column_statistics.proto && \
popd && \
rm -rf build-env

SHELL ["/bin/bash", "-c"]

RUN pip install numpy==1.20.3
Expand Down
12 changes: 7 additions & 5 deletions docker/dockerfile.tri
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# syntax=docker/dockerfile:1
ARG TRITON_VERSION=21.10
ARG TRITON_VERSION=21.11
ARG IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3
FROM ${IMAGE}
ARG RMM_VER=vnightly
Expand Down Expand Up @@ -37,7 +37,6 @@ RUN apt install -y --no-install-recommends \
bison \
flex \
libboost-serialization-dev \
libjemalloc-dev \
wget \
libssl-dev \
zlib1g-dev \
Expand Down Expand Up @@ -129,6 +128,7 @@ RUN git clone https://github.com/rapidsai/cudf.git build-env && cd build-env/ &&
export CUDF_HOME=${PWD} && \
export CUDF_ROOT=${PWD}/cpp/build/ && \
export CMAKE_LIBRARY_PATH=${CUDA_CUDA_LIBRARY} && \
export CUDAFLAGS=-Wno-error=unknown-pragmas && \
./build.sh libcudf cudf dask_cudf --allgpuarch && \
protoc -I=python/cudf/cudf/utils/metadata --python_out=/usr/local/lib/python3.8/dist-packages/cudf/utils/metadata python/cudf/cudf/utils/metadata/orc_column_statistics.proto && \
popd && \
Expand Down Expand Up @@ -208,12 +208,13 @@ ENV CPATH=/usr/local/include:$CPATH

# Install HugeCTR
RUN apt update -y && apt install rapidjson-dev -y
RUN git clone https://github.com/NVIDIA-Merlin/HugeCTR.git /repos/HugeCTR && \
cd /repos/HugeCTR && if [ "$RELEASE" == "true" ] && [ ${HUGECTR_VER} != "vnightly" ]; then git fetch --all --tags && git checkout tags/${HUGECTR_VER}; else git checkout master; fi && \
RUN git clone https://github.com/NVIDIA-Merlin/HugeCTR.git /hugectr && \
cd /hugectr && if [ "$RELEASE" == "true" ] && [ ${HUGECTR_VER} != "vnightly" ]; then git fetch --all --tags && git checkout tags/${HUGECTR_VER}; else git checkout master; fi && \
git submodule update --init --recursive && \
mkdir -p build && cd build &&\
cmake -DCMAKE_BUILD_TYPE=Release -DSM=$SM -DENABLE_INFERENCE=ON .. && \
make -j$(nproc) && make install && \
chmod +x /usr/local/hugectr/bin/* &&\
export CPATH=/usr/local/hugectr/include:$CPATH && \
export LIBRARY_PATH=/usr/local/hugectr/lib:$LIBRARY_PATH && \
git clone https://github.com/triton-inference-server/hugectr_backend /repos/hugectr_inference_backend && \
Expand All @@ -223,7 +224,8 @@ RUN git clone https://github.com/NVIDIA-Merlin/HugeCTR.git /repos/HugeCTR && \
-DTRITON_COMMON_REPO_TAG="r$TRITON_VERSION" \
-DTRITON_CORE_REPO_TAG="r$TRITON_VERSION" \
-DTRITON_BACKEND_REPO_TAG="r$TRITON_VERSION" .. && \
make -j$(nproc) && make install
make -j$(nproc) && make install && \
rm -rf /hugectr/build

ENV CPATH=/usr/local/hugectr/include:$CPATH
ENV LIBRARY_PATH=/usr/local/hugectr/lib:$LIBRARY_PATH
Expand Down