* Add presets for nvCOMP 3.0.4 (pull #1434)

bytedeco · Nov 15, 2023 · 212e7a8 · 212e7a8
1 parent cfcc2d5
commit 212e7a8
Show file tree

Hide file tree

Showing 36 changed files with 4,903 additions and 5 deletions.
diff --git a/.github/actions/deploy-centos/action.yml b/.github/actions/deploy-centos/action.yml
@@ -71,7 +71,7 @@ runs:
  fi
 
  if [[ "$CI_DEPLOY_PLATFORM" == "linux-x86_64" ]] && [[ -n ${CI_DEPLOY_NEED_CUDA:-} ]]; then
- echo Installing CUDA, cuDNN, etc
+ echo Installing CUDA, cuDNN, nvCOMP, etc
  curl -LO https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda-repo-rhel7-12-1-local-12.1.1_530.30.02-1.x86_64.rpm
  curl -LO https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/libcudnn8-8.9.1.23-1.cuda12.1.x86_64.rpm
  curl -LO https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/libcudnn8-devel-8.9.1.23-1.cuda12.1.x86_64.rpm
@@ -86,6 +86,11 @@ runs:
  mv /usr/include/cudnn* /usr/include/nccl* /usr/local/cuda/include/
  mv /usr/lib64/libcudnn* /usr/lib64/libnccl* /usr/local/cuda/lib64/
 
+ curl -LO https://developer.download.nvidia.com/compute/nvcomp/3.0.4/local_installers/nvcomp_3.0.4_x86_64_12.x.tgz
+ tar -xvf nvcomp_3.0.4_x86_64_12.x.tgz -C /usr/local/cuda/lib64/ --strip-components=1 lib/
+ tar -xvf nvcomp_3.0.4_x86_64_12.x.tgz -C /usr/local/cuda/include/ --strip-components=1 include/
+ rm -f nvcomp_3.0.4_x86_64_12.x.tgz
+
  # Work around issues with CUDA 10.2/11.x
  mv /usr/include/cublas* /usr/include/nvblas* /usr/local/cuda/include/ || true
  mv /usr/lib64/libcublas* /usr/lib64/libnvblas* /usr/local/cuda/lib64/ || true
@@ -112,7 +117,7 @@ runs:
  sed -i /warp_merge_sort.cuh/d /usr/local/cuda/include/cub/cub.cuh
 
  # Remove downloaded archives and unused libraries to avoid running out of disk space
- rm -f $(find /usr/local/cuda/ -name '*.a' -and -not -name libcudart_static.a -and -not -name libcudadevrt.a)
+ rm -f $(find /usr/local/cuda/ -name '*.a' -and -not -name libcudart_static.a -and -not -name libcudadevrt.a -and -not -name libnvcomp_device.a)
  fi
 
  if [[ "$CI_DEPLOY_MODULE" == "nvcodec" ]]; then

diff --git a/.github/actions/deploy-ubuntu/action.yml b/.github/actions/deploy-ubuntu/action.yml
@@ -34,6 +34,7 @@ runs:
  export CUDA=cuda-repo-rhel8-12-3-local-12.3.0_545.23.06-1.aarch64.rpm
  export CUDNN=8.9.5.29-1.cuda12.2.aarch64
  export NCCL=2.18.5-1+cuda12.2.aarch64
+ export NVCOMP=nvcomp_3.0.4_SBSA_12.x
  export USERLAND_BUILDME="buildme --aarch64"
  elif [[ "$CI_DEPLOY_PLATFORM" == "linux-ppc64le" ]]; then
  export ARCH=ppc64el
@@ -52,6 +53,7 @@ runs:
  export CUDA=cuda-repo-rhel8-12-3-local-12.3.0_545.23.06-1.x86_64.rpm
  export CUDNN=8.9.5.29-1.cuda12.2.x86_64
  export NCCL=2.18.5-1+cuda12.2.x86_64
+ export NVCOMP=nvcomp_3.0.4_x86_64_12.x
  fi
  echo "ARCH=$ARCH" >> $GITHUB_ENV
  echo "PREFIX=$PREFIX" >> $GITHUB_ENV
@@ -137,7 +139,7 @@ runs:
  fi
 
  if [[ -n ${ARCH_CUDA:-} ]] && [[ -n ${CI_DEPLOY_NEED_CUDA:-} ]]; then
- echo Installing CUDA, cuDNN, etc
+ echo Installing CUDA, cuDNN, nvCOMP, etc
  curl -LO https://developer.download.nvidia.com/compute/cuda/12.3.0/local_installers/$CUDA
  curl -LO https://developer.download.nvidia.com/compute/cuda/repos/rhel8/$ARCH_CUDA/libcudnn8-$CUDNN.rpm
  curl -LO https://developer.download.nvidia.com/compute/cuda/repos/rhel8/$ARCH_CUDA/libcudnn8-devel-$CUDNN.rpm
@@ -153,6 +155,13 @@ runs:
  $SUDO mv /usr/include/cudnn* /usr/include/nccl* /usr/local/cuda/include/
  $SUDO mv /usr/lib64/libcudnn* /usr/lib64/libnccl* /usr/local/cuda/lib64/
 
+ if [[ -n ${NVCOMP:-} ]]; then
+ curl -LO https://developer.download.nvidia.com/compute/nvcomp/3.0.4/local_installers/$NVCOMP.tgz
+ $SUDO tar -xvf nvcomp_*.tgz -C /usr/local/cuda/lib64/ --strip-components=1 lib/
+ $SUDO tar -xvf nvcomp_*.tgz -C /usr/local/cuda/include/ --strip-components=1 include/
+ rm -f $NVCOMP.tgz
+ fi
+
  # Work around issues with CUDA 10.2/11.x
  $SUDO mv /usr/include/cublas* /usr/include/nvblas* /usr/local/cuda/include/ || true
  $SUDO mv /usr/lib64/libcublas* /usr/lib64/libnvblas* /usr/local/cuda/lib64/ || true
@@ -179,7 +188,7 @@ runs:
  $SUDO sed -i /warp_merge_sort.cuh/d /usr/local/cuda/include/cub/cub.cuh
 
  # Remove downloaded archives and unused libraries to avoid running out of disk space
- $SUDO rm -f $(find /usr/local/cuda/ -name '*.a' -and -not -name libcudart_static.a -and -not -name libcudadevrt.a)
+ $SUDO rm -f $(find /usr/local/cuda/ -name '*.a' -and -not -name libcudart_static.a -and -not -name libcudadevrt.a -and -not -name libnvcomp_device.a)
  fi
 
  if [[ "$CI_DEPLOY_MODULE" == "nvcodec" ]]; then

diff --git a/.github/actions/deploy-windows/action.yml b/.github/actions/deploy-windows/action.yml
@@ -90,12 +90,13 @@ runs:
  )
 
  if "%CI_DEPLOY_PLATFORM%"=="windows-x86_64" if not "%CI_DEPLOY_NEED_CUDA%"=="" (
- echo Installing CUDA, cuDNN, etc
+ echo Installing CUDA, cuDNN, nvCOMP, etc
  curl -LO https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_522.06_windows.exe
  curl -LO https://developer.download.nvidia.com/compute/cuda/12.3.0/local_installers/cuda_12.3.0_545.84_windows.exe
  rem curl -LO https://developer.download.nvidia.com/compute/redist/cudnn/v8.8.0/local_installers/12.0/cudnn_8.8.0.121_windows.exe
  python -m gdown.cli https://drive.google.com/uc?id=1-5QHvwDZC_1rhn5W6fRHNWicXRPtqt31
  curl -LO http://www.winimage.com/zLibDll/zlib123dllx64.zip
+ curl -LO https://developer.download.nvidia.com/compute/nvcomp/3.0.4/local_installers/nvcomp_3.0.4_windows_12.x.zip
  cuda_11.8.0_522.06_windows.exe -s
  bash -c "rm -Rf 'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8'"
  bash -c "mv 'C:/Program Files/NVIDIA Corporation/NvToolsExt' 'C:/Program Files/NVIDIA Corporation/NvToolsExt_old'"
@@ -105,13 +106,20 @@ runs:
  rem cudnn_8.8.0.121_windows.exe -s
  unzip cudnn-windows-x86_64-8.9.5.29_cuda12-archive.zip
  unzip zlib123dllx64.zip
+ unzip nvcomp_3.0.4_windows_12.x.zip
  rem move "%ProgramFiles%\NVIDIA\CUDNN\v8.8\bin\*.dll" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\bin"
  rem move "%ProgramFiles%\NVIDIA\CUDNN\v8.8\include\*.h" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\include"
  rem move "%ProgramFiles%\NVIDIA\CUDNN\v8.8\lib\x64\*.lib" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\lib\x64"
  move cudnn-windows-x86_64-8.9.5.29_cuda12-archive\bin\*.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\bin"
  move cudnn-windows-x86_64-8.9.5.29_cuda12-archive\include\*.h "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\include"
  move cudnn-windows-x86_64-8.9.5.29_cuda12-archive\lib\x64\*.lib "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\lib\x64"
  move dll_x64\zlibwapi.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\bin"
+ move include\* "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\include"
+ move include\gdeflate "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\include"
+ move include\native "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\include"
+ move include\nvcomp "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\include"
+ move lib\nvcomp*.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\bin"
+ move lib\nvcomp*.lib "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\lib\x64"
 
  rem echo Applying hotfix to Visual Studio 2019 for CUDA
  rem curl -LO https://raw.githubusercontent.com/microsoft/STL/main/stl/inc/cmath

diff --git a/README.md b/README.md
@@ -219,6 +219,7 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip
  * CUDA 12.3.x https://developer.nvidia.com/cuda-downloads
  * cuDNN 8.9.x https://developer.nvidia.com/cudnn
  * NCCL 2.18.x https://developer.nvidia.com/nccl
+ * nvCOMP 3.0.x https://developer.nvidia.com/nvcomp
  * NVIDIA Video Codec SDK 12.1.x https://developer.nvidia.com/nvidia-video-codec-sdk
  * OpenCL 3.0.x https://github.com/KhronosGroup/OpenCL-ICD-Loader
  * MXNet 1.9.x https://github.com/apache/incubator-mxnet

diff --git a/cuda/README.md b/cuda/README.md
@@ -18,6 +18,8 @@ To view the license for cuDNN included in these archives, click [here](https://d
 ### NVIDIA Collective Communications Library (NCCL)
 To view the license for NCCL included in these archives, click [here](https://github.com/NVIDIA/nccl/blob/master/LICENSE.txt)
 
+### NVIDIA nvCOMP
+To view the license for nvCOMP included in these archives, click [here](https://github.com/NVIDIA/nvcomp/blob/main/LICENSE)
 
 Introduction
 ------------
@@ -26,6 +28,7 @@ This directory contains the JavaCPP Presets module for:
  * CUDA 12.3.0 https://developer.nvidia.com/cuda-zone
  * cuDNN 8.9.5 https://developer.nvidia.com/cudnn
  * NCCL 2.18.5 https://developer.nvidia.com/nccl
+ * nvCOMP 3.0.4 https://developer.nvidia.com/nvcomp
 
 Please refer to the parent README.md file for more detailed information about the JavaCPP Presets.
 

diff --git a/cuda/samples/nvcompLZ4Example.java b/cuda/samples/nvcompLZ4Example.java
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of NVIDIA CORPORATION nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+import org.bytedeco.cuda.cudart.CUstream_st;
+import org.bytedeco.cuda.global.nvcomp;
+import org.bytedeco.cuda.nvcomp.*;
+import org.bytedeco.javacpp.BytePointer;
+import org.bytedeco.javacpp.Loader;
+
+import static org.bytedeco.cuda.global.cudart.*;
+import static org.bytedeco.cuda.global.nvcomp.*;
+
+// https://github.com/NVIDIA/nvcomp/blob/main/examples/high_level_quickstart_example.cpp
+public class nvcompLZ4Example {
+ private static void decomp_compressed_with_manager_factory_example(BytePointer device_input_ptrs, long input_buffer_len) {
+ CUstream_st stream = new CUstream_st();
+ int cuda_error = cudaStreamCreate(stream);
+
+ long chunk_size = 1 << 16;
+
+ nvcompBatchedLZ4Opts_t format_opts = new nvcompBatchedLZ4Opts_t();
+ format_opts.data_type(NVCOMP_TYPE_CHAR);
+ LZ4Manager nvcomp_manager = new LZ4Manager(chunk_size, format_opts, stream, 0, nvcomp.NoComputeNoVerify);
+ CompressionConfig comp_config = nvcomp_manager.configure_compression(input_buffer_len);
+
+ BytePointer comp_buffer = new BytePointer();
+ cuda_error = cudaMalloc(comp_buffer, comp_config.max_compressed_buffer_size());
+
+ nvcomp_manager.compress(device_input_ptrs, comp_buffer, comp_config);
+
+ // Construct a new nvcomp manager from the compressed buffer.
+ // Note we could use the nvcomp_manager from above, but here we demonstrate how to create a manager
+ // for the use case where a buffer is received and the user doesn't know how it was compressed
+ // Also note, creating the manager in this way synchronizes the stream, as the compressed buffer must be read to
+ // construct the manager
+ nvcompManagerBase decomp_nvcomp_manager = create_manager(comp_buffer, stream, 0, NoComputeNoVerify);
+
+ DecompressionConfig decomp_config = decomp_nvcomp_manager.configure_decompression(comp_buffer);
+ BytePointer res_decomp_buffer = new BytePointer();
+ cuda_error = cudaMalloc(res_decomp_buffer, decomp_config.decomp_data_size());
+
+ decomp_nvcomp_manager.decompress(res_decomp_buffer, comp_buffer, decomp_config);
+
+ cuda_error = cudaFree(comp_buffer);
+ cuda_error = cudaFree(res_decomp_buffer);
+ cuda_error = cudaStreamSynchronize(stream);
+ cuda_error = cudaStreamDestroy(stream);
+ }
+
+ public static void main(String[] args) {
+ Loader.load(nvcomp.class);
+
+ // Initialize a random array of chars
+ int input_buffer_len = 1000000;
+ byte[] uncompressed_data = new byte[input_buffer_len];
+
+ for (int i = 0; i < input_buffer_len; i++) {
+ uncompressed_data[i] = (byte) (Math.random() * 26 + 'a');
+ }
+
+ BytePointer uncompressed_data_ptr = new BytePointer(uncompressed_data);
+
+ BytePointer device_input_ptrs = new BytePointer();
+
+ int cuda_error = cudaMalloc(device_input_ptrs, input_buffer_len);
+ cuda_error = cudaMemcpy(device_input_ptrs, uncompressed_data_ptr, input_buffer_len, cudaMemcpyDefault);
+
+ decomp_compressed_with_manager_factory_example(device_input_ptrs, input_buffer_len);
+ }
+}