Skip to content

Commit

Permalink
* Add presets for nvCOMP 3.0.4 (pull #1434)
Browse files Browse the repository at this point in the history
  • Loading branch information
ds58 committed Nov 15, 2023
1 parent cfcc2d5 commit 212e7a8
Show file tree
Hide file tree
Showing 36 changed files with 4,903 additions and 5 deletions.
9 changes: 7 additions & 2 deletions .github/actions/deploy-centos/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ runs:
fi
if [[ "$CI_DEPLOY_PLATFORM" == "linux-x86_64" ]] && [[ -n ${CI_DEPLOY_NEED_CUDA:-} ]]; then
echo Installing CUDA, cuDNN, etc
echo Installing CUDA, cuDNN, nvCOMP, etc
curl -LO https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda-repo-rhel7-12-1-local-12.1.1_530.30.02-1.x86_64.rpm
curl -LO https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/libcudnn8-8.9.1.23-1.cuda12.1.x86_64.rpm
curl -LO https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/libcudnn8-devel-8.9.1.23-1.cuda12.1.x86_64.rpm
Expand All @@ -86,6 +86,11 @@ runs:
mv /usr/include/cudnn* /usr/include/nccl* /usr/local/cuda/include/
mv /usr/lib64/libcudnn* /usr/lib64/libnccl* /usr/local/cuda/lib64/
curl -LO https://developer.download.nvidia.com/compute/nvcomp/3.0.4/local_installers/nvcomp_3.0.4_x86_64_12.x.tgz
tar -xvf nvcomp_3.0.4_x86_64_12.x.tgz -C /usr/local/cuda/lib64/ --strip-components=1 lib/
tar -xvf nvcomp_3.0.4_x86_64_12.x.tgz -C /usr/local/cuda/include/ --strip-components=1 include/
rm -f nvcomp_3.0.4_x86_64_12.x.tgz
# Work around issues with CUDA 10.2/11.x
mv /usr/include/cublas* /usr/include/nvblas* /usr/local/cuda/include/ || true
mv /usr/lib64/libcublas* /usr/lib64/libnvblas* /usr/local/cuda/lib64/ || true
Expand All @@ -112,7 +117,7 @@ runs:
sed -i /warp_merge_sort.cuh/d /usr/local/cuda/include/cub/cub.cuh
# Remove downloaded archives and unused libraries to avoid running out of disk space
rm -f $(find /usr/local/cuda/ -name '*.a' -and -not -name libcudart_static.a -and -not -name libcudadevrt.a)
rm -f $(find /usr/local/cuda/ -name '*.a' -and -not -name libcudart_static.a -and -not -name libcudadevrt.a -and -not -name libnvcomp_device.a)
fi
if [[ "$CI_DEPLOY_MODULE" == "nvcodec" ]]; then
Expand Down
13 changes: 11 additions & 2 deletions .github/actions/deploy-ubuntu/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ runs:
export CUDA=cuda-repo-rhel8-12-3-local-12.3.0_545.23.06-1.aarch64.rpm
export CUDNN=8.9.5.29-1.cuda12.2.aarch64
export NCCL=2.18.5-1+cuda12.2.aarch64
export NVCOMP=nvcomp_3.0.4_SBSA_12.x
export USERLAND_BUILDME="buildme --aarch64"
elif [[ "$CI_DEPLOY_PLATFORM" == "linux-ppc64le" ]]; then
export ARCH=ppc64el
Expand All @@ -52,6 +53,7 @@ runs:
export CUDA=cuda-repo-rhel8-12-3-local-12.3.0_545.23.06-1.x86_64.rpm
export CUDNN=8.9.5.29-1.cuda12.2.x86_64
export NCCL=2.18.5-1+cuda12.2.x86_64
export NVCOMP=nvcomp_3.0.4_x86_64_12.x
fi
echo "ARCH=$ARCH" >> $GITHUB_ENV
echo "PREFIX=$PREFIX" >> $GITHUB_ENV
Expand Down Expand Up @@ -137,7 +139,7 @@ runs:
fi
if [[ -n ${ARCH_CUDA:-} ]] && [[ -n ${CI_DEPLOY_NEED_CUDA:-} ]]; then
echo Installing CUDA, cuDNN, etc
echo Installing CUDA, cuDNN, nvCOMP, etc
curl -LO https://developer.download.nvidia.com/compute/cuda/12.3.0/local_installers/$CUDA
curl -LO https://developer.download.nvidia.com/compute/cuda/repos/rhel8/$ARCH_CUDA/libcudnn8-$CUDNN.rpm
curl -LO https://developer.download.nvidia.com/compute/cuda/repos/rhel8/$ARCH_CUDA/libcudnn8-devel-$CUDNN.rpm
Expand All @@ -153,6 +155,13 @@ runs:
$SUDO mv /usr/include/cudnn* /usr/include/nccl* /usr/local/cuda/include/
$SUDO mv /usr/lib64/libcudnn* /usr/lib64/libnccl* /usr/local/cuda/lib64/
if [[ -n ${NVCOMP:-} ]]; then
curl -LO https://developer.download.nvidia.com/compute/nvcomp/3.0.4/local_installers/$NVCOMP.tgz
$SUDO tar -xvf nvcomp_*.tgz -C /usr/local/cuda/lib64/ --strip-components=1 lib/
$SUDO tar -xvf nvcomp_*.tgz -C /usr/local/cuda/include/ --strip-components=1 include/
rm -f $NVCOMP.tgz
fi
# Work around issues with CUDA 10.2/11.x
$SUDO mv /usr/include/cublas* /usr/include/nvblas* /usr/local/cuda/include/ || true
$SUDO mv /usr/lib64/libcublas* /usr/lib64/libnvblas* /usr/local/cuda/lib64/ || true
Expand All @@ -179,7 +188,7 @@ runs:
$SUDO sed -i /warp_merge_sort.cuh/d /usr/local/cuda/include/cub/cub.cuh
# Remove downloaded archives and unused libraries to avoid running out of disk space
$SUDO rm -f $(find /usr/local/cuda/ -name '*.a' -and -not -name libcudart_static.a -and -not -name libcudadevrt.a)
$SUDO rm -f $(find /usr/local/cuda/ -name '*.a' -and -not -name libcudart_static.a -and -not -name libcudadevrt.a -and -not -name libnvcomp_device.a)
fi
if [[ "$CI_DEPLOY_MODULE" == "nvcodec" ]]; then
Expand Down
10 changes: 9 additions & 1 deletion .github/actions/deploy-windows/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,13 @@ runs:
)
if "%CI_DEPLOY_PLATFORM%"=="windows-x86_64" if not "%CI_DEPLOY_NEED_CUDA%"=="" (
echo Installing CUDA, cuDNN, etc
echo Installing CUDA, cuDNN, nvCOMP, etc
curl -LO https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_522.06_windows.exe
curl -LO https://developer.download.nvidia.com/compute/cuda/12.3.0/local_installers/cuda_12.3.0_545.84_windows.exe
rem curl -LO https://developer.download.nvidia.com/compute/redist/cudnn/v8.8.0/local_installers/12.0/cudnn_8.8.0.121_windows.exe
python -m gdown.cli https://drive.google.com/uc?id=1-5QHvwDZC_1rhn5W6fRHNWicXRPtqt31
curl -LO http://www.winimage.com/zLibDll/zlib123dllx64.zip
curl -LO https://developer.download.nvidia.com/compute/nvcomp/3.0.4/local_installers/nvcomp_3.0.4_windows_12.x.zip
cuda_11.8.0_522.06_windows.exe -s
bash -c "rm -Rf 'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8'"
bash -c "mv 'C:/Program Files/NVIDIA Corporation/NvToolsExt' 'C:/Program Files/NVIDIA Corporation/NvToolsExt_old'"
Expand All @@ -105,13 +106,20 @@ runs:
rem cudnn_8.8.0.121_windows.exe -s
unzip cudnn-windows-x86_64-8.9.5.29_cuda12-archive.zip
unzip zlib123dllx64.zip
unzip nvcomp_3.0.4_windows_12.x.zip
rem move "%ProgramFiles%\NVIDIA\CUDNN\v8.8\bin\*.dll" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\bin"
rem move "%ProgramFiles%\NVIDIA\CUDNN\v8.8\include\*.h" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\include"
rem move "%ProgramFiles%\NVIDIA\CUDNN\v8.8\lib\x64\*.lib" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\lib\x64"
move cudnn-windows-x86_64-8.9.5.29_cuda12-archive\bin\*.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\bin"
move cudnn-windows-x86_64-8.9.5.29_cuda12-archive\include\*.h "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\include"
move cudnn-windows-x86_64-8.9.5.29_cuda12-archive\lib\x64\*.lib "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\lib\x64"
move dll_x64\zlibwapi.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\bin"
move include\* "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\include"
move include\gdeflate "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\include"
move include\native "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\include"
move include\nvcomp "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\include"
move lib\nvcomp*.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\bin"
move lib\nvcomp*.lib "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\lib\x64"
rem echo Applying hotfix to Visual Studio 2019 for CUDA
rem curl -LO https://raw.githubusercontent.com/microsoft/STL/main/stl/inc/cmath
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip
* CUDA 12.3.x https://developer.nvidia.com/cuda-downloads
* cuDNN 8.9.x https://developer.nvidia.com/cudnn
* NCCL 2.18.x https://developer.nvidia.com/nccl
* nvCOMP 3.0.x https://developer.nvidia.com/nvcomp
* NVIDIA Video Codec SDK 12.1.x https://developer.nvidia.com/nvidia-video-codec-sdk
* OpenCL 3.0.x https://github.com/KhronosGroup/OpenCL-ICD-Loader
* MXNet 1.9.x https://github.com/apache/incubator-mxnet
Expand Down
3 changes: 3 additions & 0 deletions cuda/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ To view the license for cuDNN included in these archives, click [here](https://d
### NVIDIA Collective Communications Library (NCCL)
To view the license for NCCL included in these archives, click [here](https://github.com/NVIDIA/nccl/blob/master/LICENSE.txt)

### NVIDIA nvCOMP
To view the license for nvCOMP included in these archives, click [here](https://github.com/NVIDIA/nvcomp/blob/main/LICENSE)

Introduction
------------
Expand All @@ -26,6 +28,7 @@ This directory contains the JavaCPP Presets module for:
* CUDA 12.3.0 https://developer.nvidia.com/cuda-zone
* cuDNN 8.9.5 https://developer.nvidia.com/cudnn
* NCCL 2.18.5 https://developer.nvidia.com/nccl
* nvCOMP 3.0.4 https://developer.nvidia.com/nvcomp

Please refer to the parent README.md file for more detailed information about the JavaCPP Presets.

Expand Down
95 changes: 95 additions & 0 deletions cuda/samples/nvcompLZ4Example.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

import org.bytedeco.cuda.cudart.CUstream_st;
import org.bytedeco.cuda.global.nvcomp;
import org.bytedeco.cuda.nvcomp.*;
import org.bytedeco.javacpp.BytePointer;
import org.bytedeco.javacpp.Loader;

import static org.bytedeco.cuda.global.cudart.*;
import static org.bytedeco.cuda.global.nvcomp.*;

// https://github.com/NVIDIA/nvcomp/blob/main/examples/high_level_quickstart_example.cpp
public class nvcompLZ4Example {
private static void decomp_compressed_with_manager_factory_example(BytePointer device_input_ptrs, long input_buffer_len) {
CUstream_st stream = new CUstream_st();
int cuda_error = cudaStreamCreate(stream);

long chunk_size = 1 << 16;

nvcompBatchedLZ4Opts_t format_opts = new nvcompBatchedLZ4Opts_t();
format_opts.data_type(NVCOMP_TYPE_CHAR);
LZ4Manager nvcomp_manager = new LZ4Manager(chunk_size, format_opts, stream, 0, nvcomp.NoComputeNoVerify);
CompressionConfig comp_config = nvcomp_manager.configure_compression(input_buffer_len);

BytePointer comp_buffer = new BytePointer();
cuda_error = cudaMalloc(comp_buffer, comp_config.max_compressed_buffer_size());

nvcomp_manager.compress(device_input_ptrs, comp_buffer, comp_config);

// Construct a new nvcomp manager from the compressed buffer.
// Note we could use the nvcomp_manager from above, but here we demonstrate how to create a manager
// for the use case where a buffer is received and the user doesn't know how it was compressed
// Also note, creating the manager in this way synchronizes the stream, as the compressed buffer must be read to
// construct the manager
nvcompManagerBase decomp_nvcomp_manager = create_manager(comp_buffer, stream, 0, NoComputeNoVerify);

DecompressionConfig decomp_config = decomp_nvcomp_manager.configure_decompression(comp_buffer);
BytePointer res_decomp_buffer = new BytePointer();
cuda_error = cudaMalloc(res_decomp_buffer, decomp_config.decomp_data_size());

decomp_nvcomp_manager.decompress(res_decomp_buffer, comp_buffer, decomp_config);

cuda_error = cudaFree(comp_buffer);
cuda_error = cudaFree(res_decomp_buffer);
cuda_error = cudaStreamSynchronize(stream);
cuda_error = cudaStreamDestroy(stream);
}

public static void main(String[] args) {
Loader.load(nvcomp.class);

// Initialize a random array of chars
int input_buffer_len = 1000000;
byte[] uncompressed_data = new byte[input_buffer_len];

for (int i = 0; i < input_buffer_len; i++) {
uncompressed_data[i] = (byte) (Math.random() * 26 + 'a');
}

BytePointer uncompressed_data_ptr = new BytePointer(uncompressed_data);

BytePointer device_input_ptrs = new BytePointer();

int cuda_error = cudaMalloc(device_input_ptrs, input_buffer_len);
cuda_error = cudaMemcpy(device_input_ptrs, uncompressed_data_ptr, input_buffer_len, cudaMemcpyDefault);

decomp_compressed_with_manager_factory_example(device_input_ptrs, input_buffer_len);
}
}
Loading

0 comments on commit 212e7a8

Please sign in to comment.