diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index bed83fca98..fd42a2842c 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -28,7 +28,7 @@ concurrency:
jobs:
cpp-build:
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.08
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
@@ -37,7 +37,7 @@ jobs:
python-build:
needs: [cpp-build]
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.08
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
@@ -46,7 +46,7 @@ jobs:
upload-conda:
needs: [cpp-build, python-build]
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.08
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
@@ -57,7 +57,7 @@ jobs:
if: github.ref_type == 'branch'
needs: python-build
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.08
with:
arch: "amd64"
branch: ${{ inputs.branch }}
@@ -69,19 +69,17 @@ jobs:
sha: ${{ inputs.sha }}
wheel-build-pylibraft:
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.08
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
sha: ${{ inputs.sha }}
date: ${{ inputs.date }}
- package-name: pylibraft
- package-dir: python/pylibraft
- skbuild-configure-options: "-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF"
+ script: ci/build_wheel_pylibraft.sh
wheel-publish-pylibraft:
needs: wheel-build-pylibraft
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@branch-23.08
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
@@ -91,19 +89,17 @@ jobs:
wheel-build-raft-dask:
needs: wheel-publish-pylibraft
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.08
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
sha: ${{ inputs.sha }}
date: ${{ inputs.date }}
- package-name: raft_dask
- package-dir: python/raft-dask
- skbuild-configure-options: "-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF"
+ script: ci/build_wheel_raft_dask.sh
wheel-publish-raft-dask:
needs: wheel-build-raft-dask
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@branch-23.08
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 28efc135b2..e7f3a1caff 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -23,41 +23,41 @@ jobs:
- wheel-build-raft-dask
- wheel-tests-raft-dask
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.08
checks:
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.08
with:
enable_check_generated_files: false
conda-cpp-build:
needs: checks
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.08
with:
build_type: pull-request
node_type: cpu16
conda-cpp-tests:
needs: conda-cpp-build
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.08
with:
build_type: pull-request
conda-python-build:
needs: conda-cpp-build
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.08
with:
build_type: pull-request
conda-python-tests:
needs: conda-python-build
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.08
with:
build_type: pull-request
docs-build:
needs: conda-python-build
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.08
with:
build_type: pull-request
node_type: "gpu-v100-latest-1"
@@ -67,40 +67,28 @@ jobs:
wheel-build-pylibraft:
needs: checks
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.08
with:
build_type: pull-request
- package-name: pylibraft
- package-dir: python/pylibraft
- skbuild-configure-options: "-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF"
+ script: ci/build_wheel_pylibraft.sh
wheel-tests-pylibraft:
needs: wheel-build-pylibraft
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.08
with:
build_type: pull-request
- package-name: pylibraft
- test-unittest: "python -m pytest ./python/pylibraft/pylibraft/test"
- test-smoketest: "python ./ci/wheel_smoke_test_pylibraft.py"
+ script: ci/test_wheel_pylibraft.sh
wheel-build-raft-dask:
needs: wheel-tests-pylibraft
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.08
with:
build_type: pull-request
- package-name: raft_dask
- package-dir: python/raft-dask
- before-wheel: "RAPIDS_PY_WHEEL_NAME=pylibraft_${{ '${PIP_CU_VERSION}' }} rapids-download-wheels-from-s3 ./local-pylibraft && python -m pip install --no-deps ./local-pylibraft/pylibraft*.whl"
- skbuild-configure-options: "-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF"
+ script: "ci/build_wheel_raft_dask.sh"
wheel-tests-raft-dask:
needs: wheel-build-raft-dask
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.08
with:
build_type: pull-request
- package-name: raft_dask
- # Always want to test against latest dask/distributed.
- test-before-amd64: "RAPIDS_PY_WHEEL_NAME=pylibraft_${{ '${PIP_CU_VERSION}' }} rapids-download-wheels-from-s3 ./local-pylibraft-dep && pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl && pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
- test-before-arm64: "RAPIDS_PY_WHEEL_NAME=pylibraft_${{ '${PIP_CU_VERSION}' }} rapids-download-wheels-from-s3 ./local-pylibraft-dep && pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl && pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
- test-unittest: "python -m pytest ./python/raft-dask/raft_dask/test"
- test-smoketest: "python ./ci/wheel_smoke_test_raft_dask.py"
+ script: ci/test_wheel_raft_dask.sh
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index ffd7fa3bcb..b752576b75 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -16,7 +16,7 @@ on:
jobs:
conda-cpp-tests:
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.08
with:
build_type: nightly
branch: ${{ inputs.branch }}
@@ -24,7 +24,7 @@ jobs:
sha: ${{ inputs.sha }}
conda-python-tests:
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.08
with:
build_type: nightly
branch: ${{ inputs.branch }}
@@ -32,23 +32,19 @@ jobs:
sha: ${{ inputs.sha }}
wheel-tests-pylibraft:
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.08
with:
build_type: nightly
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
- package-name: pylibraft
- test-unittest: "python -m pytest ./python/pylibraft/pylibraft/test"
+ script: ci/test_wheel_pylibraft.sh
wheel-tests-raft-dask:
secrets: inherit
- uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.08
with:
build_type: nightly
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
- package-name: raft_dask
- test-before-amd64: "pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
- test-before-arm64: "pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
- test-unittest: "python -m pytest ./python/raft-dask/raft_dask/test"
+ script: ci/test_wheel_raft_dask.sh
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 16c3ba4985..8642f2bdf3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,97 @@
+# raft 23.08.00 (9 Aug 2023)
+
+## 🚨 Breaking Changes
+
+- Separate CAGRA index type from internal idx type ([#1664](https://github.com/rapidsai/raft/pull/1664)) [@tfeher](https://github.com/tfeher)
+- Stop using setup.py in build.sh ([#1645](https://github.com/rapidsai/raft/pull/1645)) [@vyasr](https://github.com/vyasr)
+- CAGRA max_queries auto configuration ([#1613](https://github.com/rapidsai/raft/pull/1613)) [@enp1s0](https://github.com/enp1s0)
+- Rename the CAGRA prune function to optimize ([#1588](https://github.com/rapidsai/raft/pull/1588)) [@enp1s0](https://github.com/enp1s0)
+- CAGRA pad dataset for 128bit vectorized load ([#1505](https://github.com/rapidsai/raft/pull/1505)) [@tfeher](https://github.com/tfeher)
+- Sparse Pairwise Distances API Updates ([#1502](https://github.com/rapidsai/raft/pull/1502)) [@divyegala](https://github.com/divyegala)
+- Cagra index construction without copying device mdarrays ([#1494](https://github.com/rapidsai/raft/pull/1494)) [@tfeher](https://github.com/tfeher)
+- [FEA] Masked NN for connect_components ([#1445](https://github.com/rapidsai/raft/pull/1445)) [@tarang-jain](https://github.com/tarang-jain)
+- Limiting workspace memory resource ([#1356](https://github.com/rapidsai/raft/pull/1356)) [@achirkin](https://github.com/achirkin)
+
+## 🐛 Bug Fixes
+
+- Remove push condition on docs-build ([#1693](https://github.com/rapidsai/raft/pull/1693)) [@raydouglass](https://github.com/raydouglass)
+- IVF-PQ: Fix illegal memory access with large max_samples ([#1685](https://github.com/rapidsai/raft/pull/1685)) [@achirkin](https://github.com/achirkin)
+- Fix missing parameter for select_k ([#1682](https://github.com/rapidsai/raft/pull/1682)) [@ucassjy](https://github.com/ucassjy)
+- Separate CAGRA index type from internal idx type ([#1664](https://github.com/rapidsai/raft/pull/1664)) [@tfeher](https://github.com/tfeher)
+- Add rmm to pylibraft run dependencies, since it is used by Cython. ([#1656](https://github.com/rapidsai/raft/pull/1656)) [@bdice](https://github.com/bdice)
+- Hotfix: wrong constant in IVF-PQ fp_8bit2half ([#1654](https://github.com/rapidsai/raft/pull/1654)) [@achirkin](https://github.com/achirkin)
+- Fix sparse KNN for large batches ([#1640](https://github.com/rapidsai/raft/pull/1640)) [@viclafargue](https://github.com/viclafargue)
+- Fix uploading of RAFT nightly packages ([#1638](https://github.com/rapidsai/raft/pull/1638)) [@dantegd](https://github.com/dantegd)
+- Fix cagra multi CTA bug ([#1628](https://github.com/rapidsai/raft/pull/1628)) [@enp1s0](https://github.com/enp1s0)
+- pass correct stream to cutlass kernel launch of L2/cosine pairwise distance kernels ([#1597](https://github.com/rapidsai/raft/pull/1597)) [@mdoijade](https://github.com/mdoijade)
+- Fix launchconfig y-gridsize too large in epilogue kernel ([#1586](https://github.com/rapidsai/raft/pull/1586)) [@mfoerste4](https://github.com/mfoerste4)
+- Fix update version and pinnings for 23.08. ([#1556](https://github.com/rapidsai/raft/pull/1556)) [@bdice](https://github.com/bdice)
+- Fix for function exposing KNN merge ([#1418](https://github.com/rapidsai/raft/pull/1418)) [@viclafargue](https://github.com/viclafargue)
+
+## 📖 Documentation
+
+- Critical doc fixes and updates for 23.08 ([#1705](https://github.com/rapidsai/raft/pull/1705)) [@cjnolet](https://github.com/cjnolet)
+- Fix the documentation about changing the logging level ([#1596](https://github.com/rapidsai/raft/pull/1596)) [@enp1s0](https://github.com/enp1s0)
+- Fix raft::bitonic_sort small usage example ([#1580](https://github.com/rapidsai/raft/pull/1580)) [@enp1s0](https://github.com/enp1s0)
+
+## 🚀 New Features
+
+- Use rapids-cmake new parallel testing feature ([#1623](https://github.com/rapidsai/raft/pull/1623)) [@robertmaynard](https://github.com/robertmaynard)
+- Add support for row-major slice ([#1591](https://github.com/rapidsai/raft/pull/1591)) [@lowener](https://github.com/lowener)
+- IVF-PQ tutorial notebook ([#1544](https://github.com/rapidsai/raft/pull/1544)) [@achirkin](https://github.com/achirkin)
+- [FEA] Masked NN for connect_components ([#1445](https://github.com/rapidsai/raft/pull/1445)) [@tarang-jain](https://github.com/tarang-jain)
+- raft: Build CUDA 12 packages ([#1388](https://github.com/rapidsai/raft/pull/1388)) [@vyasr](https://github.com/vyasr)
+- Limiting workspace memory resource ([#1356](https://github.com/rapidsai/raft/pull/1356)) [@achirkin](https://github.com/achirkin)
+
+## 🛠️ Improvements
+
+- Pin `dask` and `distributed` for `23.08` release ([#1711](https://github.com/rapidsai/raft/pull/1711)) [@galipremsagar](https://github.com/galipremsagar)
+- Add algo parameter for CAGRA ANN bench ([#1687](https://github.com/rapidsai/raft/pull/1687)) [@tfeher](https://github.com/tfeher)
+- ANN benchmarks python wrapper for splitting billion-scale dataset groundtruth ([#1679](https://github.com/rapidsai/raft/pull/1679)) [@divyegala](https://github.com/divyegala)
+- Rename CAGRA parameter num_parents to search_width ([#1676](https://github.com/rapidsai/raft/pull/1676)) [@tfeher](https://github.com/tfeher)
+- Renaming namespaces to promote CAGRA from experimental ([#1666](https://github.com/rapidsai/raft/pull/1666)) [@cjnolet](https://github.com/cjnolet)
+- CAGRA Python wrappers ([#1665](https://github.com/rapidsai/raft/pull/1665)) [@dantegd](https://github.com/dantegd)
+- Add notebook for Vector Search - Question Retrieval ([#1662](https://github.com/rapidsai/raft/pull/1662)) [@lowener](https://github.com/lowener)
+- Fix CMake CUDA support for pylibraft when raft is found. ([#1659](https://github.com/rapidsai/raft/pull/1659)) [@bdice](https://github.com/bdice)
+- Cagra ANN benchmark improvements ([#1658](https://github.com/rapidsai/raft/pull/1658)) [@tfeher](https://github.com/tfeher)
+- ANN-benchmarks: avoid using the dataset during search when possible ([#1657](https://github.com/rapidsai/raft/pull/1657)) [@achirkin](https://github.com/achirkin)
+- Revert CUDA 12.0 CI workflows to branch-23.08. ([#1652](https://github.com/rapidsai/raft/pull/1652)) [@bdice](https://github.com/bdice)
+- ANN: Optimize host-side refine ([#1651](https://github.com/rapidsai/raft/pull/1651)) [@achirkin](https://github.com/achirkin)
+- Cagra template instantiations ([#1650](https://github.com/rapidsai/raft/pull/1650)) [@tfeher](https://github.com/tfeher)
+- Modify comm_split to avoid ucp ([#1649](https://github.com/rapidsai/raft/pull/1649)) [@ChuckHastings](https://github.com/ChuckHastings)
+- Stop using setup.py in build.sh ([#1645](https://github.com/rapidsai/raft/pull/1645)) [@vyasr](https://github.com/vyasr)
+- IVF-PQ: Add a (faster) direct conversion fp8->half ([#1644](https://github.com/rapidsai/raft/pull/1644)) [@achirkin](https://github.com/achirkin)
+- Simplify `bench/ann` scripts to Python based module ([#1642](https://github.com/rapidsai/raft/pull/1642)) [@divyegala](https://github.com/divyegala)
+- Further removal of uses-setup-env-vars ([#1639](https://github.com/rapidsai/raft/pull/1639)) [@dantegd](https://github.com/dantegd)
+- Drop blank line in `raft-dask/meta.yaml` ([#1637](https://github.com/rapidsai/raft/pull/1637)) [@jakirkham](https://github.com/jakirkham)
+- Enable conservative memory allocations for RAFT IVF-Flat benchmarks. ([#1634](https://github.com/rapidsai/raft/pull/1634)) [@tfeher](https://github.com/tfeher)
+- [FEA] Codepacking for IVF-flat ([#1632](https://github.com/rapidsai/raft/pull/1632)) [@tarang-jain](https://github.com/tarang-jain)
+- Fixing ann bench cmake (and docs) ([#1630](https://github.com/rapidsai/raft/pull/1630)) [@cjnolet](https://github.com/cjnolet)
+- [WIP] Test CI issues ([#1626](https://github.com/rapidsai/raft/pull/1626)) [@VibhuJawa](https://github.com/VibhuJawa)
+- Set pool memory resource for raft IVF ANN benchmarks ([#1625](https://github.com/rapidsai/raft/pull/1625)) [@tfeher](https://github.com/tfeher)
+- Adding sort option to matrix::select_k api ([#1615](https://github.com/rapidsai/raft/pull/1615)) [@cjnolet](https://github.com/cjnolet)
+- CAGRA max_queries auto configuration ([#1613](https://github.com/rapidsai/raft/pull/1613)) [@enp1s0](https://github.com/enp1s0)
+- Use exceptions instead of `exit(-1)` ([#1594](https://github.com/rapidsai/raft/pull/1594)) [@benfred](https://github.com/benfred)
+- [REVIEW] Add scheduler_file argument to support MNMG setup ([#1593](https://github.com/rapidsai/raft/pull/1593)) [@VibhuJawa](https://github.com/VibhuJawa)
+- Rename the CAGRA prune function to optimize ([#1588](https://github.com/rapidsai/raft/pull/1588)) [@enp1s0](https://github.com/enp1s0)
+- This PR adds support to __half and nb_bfloat16 to myAtomicReduce ([#1585](https://github.com/rapidsai/raft/pull/1585)) [@Kh4ster](https://github.com/Kh4ster)
+- [IMP] move core CUDA RT macros to cuda_rt_essentials.hpp ([#1584](https://github.com/rapidsai/raft/pull/1584)) [@MatthiasKohl](https://github.com/MatthiasKohl)
+- preprocessor syntax fix ([#1582](https://github.com/rapidsai/raft/pull/1582)) [@AyodeAwe](https://github.com/AyodeAwe)
+- use rapids-upload-docs script ([#1578](https://github.com/rapidsai/raft/pull/1578)) [@AyodeAwe](https://github.com/AyodeAwe)
+- Unpin `dask` and `distributed` for development and fix `merge_labels` test ([#1574](https://github.com/rapidsai/raft/pull/1574)) [@galipremsagar](https://github.com/galipremsagar)
+- Remove documentation build scripts for Jenkins ([#1570](https://github.com/rapidsai/raft/pull/1570)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Add support to __half and nv_bfloat16 to most math functions ([#1554](https://github.com/rapidsai/raft/pull/1554)) [@Kh4ster](https://github.com/Kh4ster)
+- Add RAFT ANN benchmark for CAGRA ([#1552](https://github.com/rapidsai/raft/pull/1552)) [@enp1s0](https://github.com/enp1s0)
+- Update CAGRA knn_graph_sort to use Raft::bitonic_sort ([#1550](https://github.com/rapidsai/raft/pull/1550)) [@enp1s0](https://github.com/enp1s0)
+- Add identity matrix function ([#1548](https://github.com/rapidsai/raft/pull/1548)) [@lowener](https://github.com/lowener)
+- Unpin scikit-build upper bound ([#1547](https://github.com/rapidsai/raft/pull/1547)) [@vyasr](https://github.com/vyasr)
+- Migrate wheel workflow scripts locally ([#1546](https://github.com/rapidsai/raft/pull/1546)) [@divyegala](https://github.com/divyegala)
+- Add sample filtering for ivf_flat. Filtering code refactoring and cleanup ([#1541](https://github.com/rapidsai/raft/pull/1541)) [@alexanderguzhva](https://github.com/alexanderguzhva)
+- CAGRA pad dataset for 128bit vectorized load ([#1505](https://github.com/rapidsai/raft/pull/1505)) [@tfeher](https://github.com/tfeher)
+- Sparse Pairwise Distances API Updates ([#1502](https://github.com/rapidsai/raft/pull/1502)) [@divyegala](https://github.com/divyegala)
+- Add CAGRA gbench ([#1496](https://github.com/rapidsai/raft/pull/1496)) [@tfeher](https://github.com/tfeher)
+- Cagra index construction without copying device mdarrays ([#1494](https://github.com/rapidsai/raft/pull/1494)) [@tfeher](https://github.com/tfeher)
+
# raft 23.06.00 (7 Jun 2023)
## 🚨 Breaking Changes
diff --git a/README.md b/README.md
index 10cd7b16fc..2c7f83ad02 100755
--- a/README.md
+++ b/README.md
@@ -1,19 +1,20 @@
-#
RAFT: Reusable Accelerated Functions and Tools
+# RAFT: Reusable Accelerated Functions and Tools for Vector Search and More
-![Navigating the canyons of accelerated possibilities](img/raft.png)
+![RAFT tech stack](img/raft-tech-stack-vss.png)
## Resources
- [RAFT Reference Documentation](https://docs.rapids.ai/api/raft/stable/): API Documentation.
- [RAFT Getting Started](./docs/source/quick_start.md): Getting started with RAFT.
- [Build and Install RAFT](./docs/source/build.md): Instructions for installing and building RAFT.
+- [Example Notebooks](./notebooks): Example jupyer notebooks
- [RAPIDS Community](https://rapids.ai/community.html): Get help, contribute, and collaborate.
- [GitHub repository](https://github.com/rapidsai/raft): Download the RAFT source code.
- [Issue tracker](https://github.com/rapidsai/raft/issues): Report issues or request features.
## Overview
-RAFT contains fundamental widely-used algorithms and primitives for data science and machine learning. The algorithms are CUDA-accelerated and form building blocks for rapidly composing analytics.
+RAFT contains fundamental widely-used algorithms and primitives for machine learning and information retrieval. The algorithms are CUDA-accelerated and form building blocks for more easily writing high performance applications.
By taking a primitives-based approach to algorithm development, RAFT
- accelerates algorithm construction time
@@ -22,20 +23,20 @@ By taking a primitives-based approach to algorithm development, RAFT
While not exhaustive, the following general categories help summarize the accelerated functions in RAFT:
#####
-| Category | Examples |
-| --- | --- |
-| **Data Formats** | sparse & dense, conversions, data generation |
+| Category | Examples |
+| --- |-----------------------------------------------------------------------------------------------------------------------------------|
+| **Data Formats** | sparse & dense, conversions, data generation |
| **Dense Operations** | linear algebra, matrix and vector operations, reductions, slicing, norms, factorization, least squares, svd & eigenvalue problems |
-| **Sparse Operations** | linear algebra, eigenvalue problems, slicing, norms, reductions, factorization, symmetrization, components & labeling |
-| **Spatial** | pairwise distances, nearest neighbors, neighborhood graph construction |
-| **Basic Clustering** | spectral clustering, hierarchical clustering, k-means |
-| **Solvers** | combinatorial optimization, iterative solvers |
-| **Statistics** | sampling, moments and summary statistics, metrics |
-| **Tools & Utilities** | common utilities for developing CUDA applications, multi-node multi-gpu infrastructure |
+| **Sparse Operations** | linear algebra, eigenvalue problems, slicing, norms, reductions, factorization, symmetrization, components & labeling |
+| **Spatial** | pairwise distances, nearest neighbors and vector search, neighborhood graph construction |
+| **Basic Clustering** | spectral clustering, hierarchical clustering, k-means |
+| **Solvers** | combinatorial optimization, iterative solvers |
+| **Statistics** | sampling, moments and summary statistics, metrics |
+| **Tools & Utilities** | common utilities for developing CUDA applications, multi-node multi-gpu infrastructure |
-RAFT is a C++ header-only template library with an optional shared library that
-1) can speed up compile times for common template types, and
+RAFT is a C++ header-only template library with an optional shared library that
+1) can speed up compile times for common template types, and
2) provides host-accessible "runtime" APIs, which don't require a CUDA compiler to use
In addition being a C++ library, RAFT also provides 2 Python libraries:
@@ -44,6 +45,29 @@ In addition being a C++ library, RAFT also provides 2 Python libraries:
![RAFT is a C++ header-only template library with optional shared library and lightweight Python wrappers](img/arch.png)
+## Use cases
+
+### Vector Similarity Search
+
+RAFT contains state-of-the-art implementations of approximate nearest neighbors algorithms on the GPU that enable vector similarity search. Vector similarity search applications often require fast online queries done one-at-a-time and RAFT's graph-based [CAGRA](https://docs.rapids.ai/api/raft/nightly/pylibraft_api/neighbors/#cagra) algorithm outperforms the state-of-the art on the CPU (hierarchical navigable small-world graph or HNSW).
+
+In addition to CAGRA, RAFT contains other state-of-the-art GPU-accelerated implementations of popular algorithms for vector similarity search, such as [IVF-Flat](https://docs.rapids.ai/api/raft/nightly/pylibraft_api/neighbors/#ivf-flat) and [IVF-PQ](https://docs.rapids.ai/api/raft/nightly/pylibraft_api/neighbors/#ivf-pq) algorithms originally popularized by the [FAISS](https://github.com/facebookresearch/faiss) library.
+
+### Information Retrieval
+
+RAFT also contains a catalog of reusable primitives for composing algorithms that require fast neighborhood computations, such as
+
+1. Computing distances between vectors and computing kernel gramm matrices
+2. Performing ball radius queries for constructing epsilon neighborhoods
+3. Clustering points to partition a space for smaller and faster searches
+4. Constructing neighborhood "connectivities" graphs from dense vectors
+
+As an example, computations such as the above list are critical for information retrieval, data mining, and machine learning applications such as clustering, manifold learning, and dimensionality reduction.
+
+## Is RAFT right for me?
+
+RAFT contains low level primitives for accelerating applications and workflows. Data source providers and application developers may find specific tools -- like ANN algorithms -- very useful. RAFT is not intended to be used directly by data scientists for discovery and experimentation. For data science tools, please see the [RAPIDS website](https://rapids.ai/).
+
## Getting started
### RAPIDS Memory Manager (RMM)
@@ -291,6 +315,7 @@ The folder structure mirrors other RAPIDS repos, with the following folders:
- `template`: A skeleton template containing the bare-bones file structure and cmake configuration for writing applications with RAFT.
- `test`: Googletests source code
- `docs`: Source code and scripts for building library documentation (Uses breath, doxygen, & pydocs)
+- `notebooks`: IPython notebooks with usage examples and tutorials
- `python`: Source code for Python libraries.
- `pylibraft`: Python build and source code for pylibraft library
- `raft-dask`: Python build and source code for raft-dask library
@@ -322,3 +347,14 @@ If citing the sparse pairwise distances API, please consider using the following
year={2021}
}
```
+
+If citing the single-linkage agglomerative clustering APIs, please consider the following bibtex:
+```bibtex
+@misc{nolet2023cuslink,
+ title={cuSLINK: Single-linkage Agglomerative Clustering on the GPU},
+ author={Corey J. Nolet and Divye Gala and Alex Fender and Mahesh Doijade and Joe Eaton and Edward Raff and John Zedlewski and Brad Rees and Tim Oates},
+ year={2023},
+ eprint={2306.16354},
+ archivePrefix={arXiv},
+ primaryClass={cs.LG}
+}
\ No newline at end of file
diff --git a/build.sh b/build.sh
index ab904abdad..1213500159 100755
--- a/build.sh
+++ b/build.sh
@@ -88,9 +88,7 @@ DISABLE_DEPRECATION_WARNINGS=ON
CMAKE_TARGET=""
# Set defaults for vars that may not have been defined externally
-# FIXME: if INSTALL_PREFIX is not set, check PREFIX, then check
-# CONDA_PREFIX, but there is no fallback from there!
-INSTALL_PREFIX=${INSTALL_PREFIX:=${PREFIX:=${CONDA_PREFIX}}}
+INSTALL_PREFIX=${INSTALL_PREFIX:=${PREFIX:=${CONDA_PREFIX:=$LIBRAFT_BUILD_DIR/install}}}
PARALLEL_LEVEL=${PARALLEL_LEVEL:=`nproc`}
BUILD_ABI=${BUILD_ABI:=ON}
@@ -367,8 +365,9 @@ if [[ ${CMAKE_TARGET} == "" ]]; then
fi
# Append `-DFIND_RAFT_CPP=ON` to EXTRA_CMAKE_ARGS unless a user specified the option.
+SKBUILD_EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS}"
if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_RAFT_CPP"* ]]; then
- EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DFIND_RAFT_CPP=ON"
+ SKBUILD_EXTRA_CMAKE_ARGS="${SKBUILD_EXTRA_CMAKE_ARGS} -DFIND_RAFT_CPP=ON"
fi
# If clean given, run it prior to any other steps
@@ -383,14 +382,6 @@ if (( ${CLEAN} == 1 )); then
rmdir ${bd} || true
fi
done
-
- cd ${REPODIR}/python/raft-dask
- python setup.py clean --all
- cd ${REPODIR}
-
- cd ${REPODIR}/python/pylibraft
- python setup.py clean --all
- cd ${REPODIR}
fi
################################################################################
@@ -484,29 +475,16 @@ fi
# Build and (optionally) install the pylibraft Python package
if (( ${NUMARGS} == 0 )) || hasArg pylibraft; then
- # Append `-DFIND_RAFT_CPP=ON` to EXTRA_CMAKE_ARGS unless a user specified the option.
- if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_RAFT_CPP"* ]]; then
- EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DFIND_RAFT_CPP=ON"
- fi
- cd ${REPODIR}/python/pylibraft
- python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH="${RAFT_DASK_BUILD_DIR};${INSTALL_PREFIX}" -DCMAKE_LIBRARY_PATH=${LIBRAFT_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
- if [[ ${INSTALL_TARGET} != "" ]]; then
- python setup.py install --single-version-externally-managed --record=record.txt -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} ${EXTRA_CMAKE_ARGS}
- fi
+ SKBUILD_CONFIGURE_OPTIONS="${SKBUILD_EXTRA_CMAKE_ARGS}" \
+ SKBUILD_BUILD_OPTIONS="-j${PARALLEL_LEVEL}" \
+ python -m pip install --no-build-isolation --no-deps ${REPODIR}/python/pylibraft
fi
# Build and (optionally) install the raft-dask Python package
if (( ${NUMARGS} == 0 )) || hasArg raft-dask; then
- # Append `-DFIND_RAFT_CPP=ON` to EXTRA_CMAKE_ARGS unless a user specified the option.
- if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_RAFT_CPP"* ]]; then
- EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DFIND_RAFT_CPP=ON"
- fi
-
- cd ${REPODIR}/python/raft-dask
- python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH="${RAFT_DASK_BUILD_DIR};${INSTALL_PREFIX}" -DCMAKE_LIBRARY_PATH=${LIBRAFT_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
- if [[ ${INSTALL_TARGET} != "" ]]; then
- python setup.py install --single-version-externally-managed --record=record.txt -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} ${EXTRA_CMAKE_ARGS}
- fi
+ SKBUILD_CONFIGURE_OPTIONS="${SKBUILD_EXTRA_CMAKE_ARGS}" \
+ SKBUILD_BUILD_OPTIONS="-j${PARALLEL_LEVEL}" \
+ python -m pip install --no-build-isolation --no-deps ${REPODIR}/python/raft-dask
fi
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index b1cb993798..4f99348c95 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -19,7 +19,6 @@ rapids-print-env
rapids-logger "Downloading artifacts from previous jobs"
CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
-VERSION_NUMBER="23.06"
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
@@ -29,21 +28,21 @@ rapids-mamba-retry install \
pylibraft \
raft-dask
+export RAPIDS_VERSION_NUMBER="23.08"
+export RAPIDS_DOCS_DIR="$(mktemp -d)"
-rapids-logger "Build Doxygen docs"
+rapids-logger "Build CPP docs"
pushd cpp/doxygen
doxygen Doxyfile
popd
-rapids-logger "Build Sphinx docs"
+rapids-logger "Build Python docs"
pushd docs
sphinx-build -b dirhtml source _html
sphinx-build -b text source _text
+mkdir -p "${RAPIDS_DOCS_DIR}/raft/"{html,txt}
+mv _html/* "${RAPIDS_DOCS_DIR}/raft/html"
+mv _text/* "${RAPIDS_DOCS_DIR}/raft/txt"
popd
-
-if [[ ${RAPIDS_BUILD_TYPE} != "pull-request" ]]; then
- rapids-logger "Upload Docs to S3"
- aws s3 sync --no-progress --delete docs/_html "s3://rapidsai-docs/raft/${VERSION_NUMBER}/html"
- aws s3 sync --no-progress --delete docs/_text "s3://rapidsai-docs/raft/${VERSION_NUMBER}/txt"
-fi
+rapids-upload-docs
diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
new file mode 100755
index 0000000000..a9f7f64294
--- /dev/null
+++ b/ci/build_wheel.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+package_name=$1
+package_dir=$2
+
+source rapids-configure-sccache
+source rapids-date-string
+
+# Use gha-tools rapids-pip-wheel-version to generate wheel version then
+# update the necessary files
+version_override="$(rapids-pip-wheel-version ${RAPIDS_DATE_STRING})"
+
+RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
+
+ci/release/apply_wheel_modifications.sh ${version_override} "-${RAPIDS_PY_CUDA_SUFFIX}"
+echo "The package name and/or version was modified in the package source. The git diff is:"
+git diff
+
+cd "${package_dir}"
+
+# Hardcode the output dir
+python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
+
+mkdir -p final_dist
+python -m auditwheel repair -w final_dist dist/*
+
+RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 final_dist
diff --git a/ci/build_wheel_pylibraft.sh b/ci/build_wheel_pylibraft.sh
new file mode 100755
index 0000000000..f17f038675
--- /dev/null
+++ b/ci/build_wheel_pylibraft.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+# Set up skbuild options. Enable sccache in skbuild config options
+export SKBUILD_CONFIGURE_OPTIONS="-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF"
+
+ci/build_wheel.sh pylibraft python/pylibraft
diff --git a/ci/build_wheel_raft_dask.sh b/ci/build_wheel_raft_dask.sh
new file mode 100755
index 0000000000..f0204d45c0
--- /dev/null
+++ b/ci/build_wheel_raft_dask.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+# Set up skbuild options. Enable sccache in skbuild config options
+export SKBUILD_CONFIGURE_OPTIONS="-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF"
+
+RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
+
+RAPIDS_PY_WHEEL_NAME=pylibraft_${RAPIDS_PY_CUDA_SUFFIX} rapids-download-wheels-from-s3 ./local-pylibraft
+python -m pip install --no-deps ./local-pylibraft/pylibraft*.whl
+
+ci/build_wheel.sh raft_dask python/raft-dask
diff --git a/ci/docs/build.sh b/ci/docs/build.sh
deleted file mode 100644
index e3062107c0..0000000000
--- a/ci/docs/build.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
-#################################
-# RAFT docs build script for CI #
-#################################
-
-if [ -z "$PROJECT_WORKSPACE" ]; then
- echo ">>>> ERROR: Could not detect PROJECT_WORKSPACE in environment"
- echo ">>>> WARNING: This script contains git commands meant for automated building, do not run locally"
- exit 1
-fi
-
-export DOCS_WORKSPACE="$WORKSPACE/docs"
-export PATH=/conda/bin:/usr/local/cuda/bin:$PATH
-export HOME="$WORKSPACE"
-export PROJECT_WORKSPACE=/rapids/raft
-export PROJECTS=(raft)
-
-gpuci_logger "Check environment"
-env
-
-gpuci_logger "Check GPU usage"
-nvidia-smi
-
-
-gpuci_logger "Activate conda env"
-. /opt/conda/etc/profile.d/conda.sh
-conda activate rapids
-
-gpuci_logger "Check versions"
-python --version
-$CC --version
-$CXX --version
-
-gpuci_logger "Show conda info"
-conda info
-conda config --show-sources
-conda list --show-channel-urls
-
-# Build Doxygen docs
-gpuci_logger "Build Doxygen and Sphinx docs"
-"$PROJECT_WORKSPACE/build.sh" docs -v
-
-#Commit to Website
-cd "$DOCS_WORKSPACE"
-
-for PROJECT in ${PROJECTS[@]}; do
- if [ ! -d "api/$PROJECT/$BRANCH_VERSION" ]; then
- mkdir -p "api/$PROJECT/$BRANCH_VERSION"
- fi
- rm -rf "$DOCS_WORKSPACE/api/$PROJECT/$BRANCH_VERSION/"*
-done
-
-mv "$PROJECT_WORKSPACE/docs/_html/"* "$DOCS_WORKSPACE/api/raft/$BRANCH_VERSION"
\ No newline at end of file
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index f6c6b08644..ef935ba518 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -25,6 +25,10 @@ NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR}
NEXT_UCX_PY_SHORT_TAG="$(curl -sL https://version.gpuci.io/rapids/${NEXT_SHORT_TAG})"
NEXT_UCX_PY_VERSION="${NEXT_UCX_PY_SHORT_TAG}.*"
+# Need to distutils-normalize the original version
+NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))")
+NEXT_UCX_PY_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_UCX_PY_SHORT_TAG}'))")
+
echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG"
# Inplace sed replace; workaround for Linux and Mac
@@ -33,6 +37,7 @@ function sed_runner() {
}
sed_runner "s/set(RAPIDS_VERSION .*)/set(RAPIDS_VERSION \"${NEXT_SHORT_TAG}\")/g" cpp/CMakeLists.txt
+sed_runner "s/set(RAPIDS_VERSION .*)/set(RAPIDS_VERSION \"${NEXT_SHORT_TAG}\")/g" cpp/template/cmake/thirdparty/fetch_rapids.cmake
sed_runner "s/set(RAFT_VERSION .*)/set(RAFT_VERSION \"${NEXT_FULL_TAG}\")/g" cpp/CMakeLists.txt
sed_runner 's/'"pylibraft_version .*)"'/'"pylibraft_version ${NEXT_FULL_TAG})"'/g' python/pylibraft/CMakeLists.txt
sed_runner 's/'"raft_dask_version .*)"'/'"raft_dask_version ${NEXT_FULL_TAG})"'/g' python/raft-dask/CMakeLists.txt
@@ -50,13 +55,23 @@ sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/raft-dask/p
sed_runner 's/version = .*/version = '"'${NEXT_SHORT_TAG}'"'/g' docs/source/conf.py
sed_runner 's/release = .*/release = '"'${NEXT_FULL_TAG}'"'/g' docs/source/conf.py
-for FILE in conda/environments/*.yaml dependencies.yaml; do
- sed_runner "s/dask-cuda=${CURRENT_SHORT_TAG}/dask-cuda=${NEXT_SHORT_TAG}/g" ${FILE};
- sed_runner "s/rapids-build-env=${CURRENT_SHORT_TAG}/rapids-build-env=${NEXT_SHORT_TAG}/g" ${FILE};
- sed_runner "s/rapids-doc-env=${CURRENT_SHORT_TAG}/rapids-doc-env=${NEXT_SHORT_TAG}/g" ${FILE};
- sed_runner "s/rapids-notebook-env=${CURRENT_SHORT_TAG}/rapids-notebook-env=${NEXT_SHORT_TAG}/g" ${FILE};
- sed_runner "s/rmm=${CURRENT_SHORT_TAG}/rmm=${NEXT_SHORT_TAG}/g" ${FILE};
- sed_runner "s/ucx-py=.*/ucx-py=${NEXT_UCX_PY_VERSION}/g" ${FILE};
+DEPENDENCIES=(
+ dask-cuda
+ pylibraft
+ rmm
+ # ucx-py is handled separately below
+)
+for FILE in dependencies.yaml conda/environments/*.yaml; do
+ for DEP in "${DEPENDENCIES[@]}"; do
+ sed_runner "/-.* ${DEP}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}\.*/g" ${FILE};
+ done
+ sed_runner "/-.* ucx-py==/ s/==.*/==${NEXT_UCX_PY_SHORT_TAG_PEP440}\.*/g" ${FILE};
+done
+for FILE in python/*/pyproject.toml; do
+ for DEP in "${DEPENDENCIES[@]}"; do
+ sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}.*\"/g" ${FILE}
+ done
+ sed_runner "/\"ucx-py==/ s/==.*\"/==${NEXT_UCX_PY_SHORT_TAG_PEP440}.*\"/g" ${FILE}
done
sed_runner "/^ucx_py_version:$/ {n;s/.*/ - \"${NEXT_UCX_PY_VERSION}\"/}" conda/recipes/raft-dask/conda_build_config.yaml
@@ -66,21 +81,10 @@ for FILE in .github/workflows/*.yaml; do
sed_runner "s/dask-cuda.git@branch-[^\"\s]\+/dask-cuda.git@branch-${NEXT_SHORT_TAG}/g" ${FILE};
done
-# Need to distutils-normalize the original version
-NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))")
-NEXT_UCX_PY_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_UCX_PY_SHORT_TAG}'))")
-
-# Dependency versions in pyproject.toml
-sed_runner "s/rmm==.*\",/rmm==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/pylibraft/pyproject.toml
-
-sed_runner "s/pylibraft==.*\",/pylibraft==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/raft-dask/pyproject.toml
-sed_runner "s/dask-cuda==.*\",/dask-cuda==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/raft-dask/pyproject.toml
-sed_runner "s/ucx-py.*\",/ucx-py==${NEXT_UCX_PY_SHORT_TAG_PEP440}.*\",/g" python/raft-dask/pyproject.toml
-
for FILE in .github/workflows/*.yaml; do
sed_runner "/shared-action-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
done
-sed_runner "s/VERSION_NUMBER=\".*/VERSION_NUMBER=\"${NEXT_SHORT_TAG}\"/g" ci/build_docs.sh
+sed_runner "s/RAPIDS_VERSION_NUMBER=\".*/RAPIDS_VERSION_NUMBER=\"${NEXT_SHORT_TAG}\"/g" ci/build_docs.sh
sed_runner "/^PROJECT_NUMBER/ s|\".*\"|\"${NEXT_SHORT_TAG}\"|g" cpp/doxygen/Doxyfile
diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh
index e32697a68a..9c487be156 100755
--- a/ci/test_cpp.sh
+++ b/ci/test_cpp.sh
@@ -36,12 +36,7 @@ trap "EXITCODE=1" ERR
set +e
# Run libraft gtests from libraft-tests package
-rapids-logger "Run gtests"
-for gt in "$CONDA_PREFIX"/bin/gtests/libraft/* ; do
- test_name=$(basename ${gt})
- echo "Running gtest $test_name"
- ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR}
-done
+ctest -j8 --output-on-failure
rapids-logger "Test script exiting with value: $EXITCODE"
exit ${EXITCODE}
diff --git a/ci/test_wheel_pylibraft.sh b/ci/test_wheel_pylibraft.sh
new file mode 100755
index 0000000000..d990a0e6c2
--- /dev/null
+++ b/ci/test_wheel_pylibraft.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+mkdir -p ./dist
+RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
+RAPIDS_PY_WHEEL_NAME="pylibraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
+
+# echo to expand wildcard before adding `[extra]` requires for pip
+python -m pip install $(echo ./dist/pylibraft*.whl)[test]
+
+# Run smoke tests for aarch64 pull requests
+if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then
+ python ./ci/wheel_smoke_test_pylibraft.py
+else
+ python -m pytest ./python/pylibraft/pylibraft/test
+fi
diff --git a/ci/test_wheel_raft_dask.sh b/ci/test_wheel_raft_dask.sh
new file mode 100755
index 0000000000..6aa459ca7c
--- /dev/null
+++ b/ci/test_wheel_raft_dask.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+mkdir -p ./dist
+RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
+RAPIDS_PY_WHEEL_NAME="raft_dask_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
+
+# Download the pylibraft built in the previous step
+RAPIDS_PY_WHEEL_NAME="pylibraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-pylibraft-dep
+python -m pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl
+
+# Always install latest dask for testing
+python -m pip install git+https://github.com/dask/dask.git@2023.7.1 git+https://github.com/dask/distributed.git@2023.7.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.08
+
+# echo to expand wildcard before adding `[extra]` requires for pip
+python -m pip install $(echo ./dist/raft_dask*.whl)[test]
+
+# Run smoke tests for aarch64 pull requests
+if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then
+ python ./ci/wheel_smoke_test_raft_dask.py
+else
+ python -m pytest ./python/raft-dask/raft_dask/test
+fi
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 9cb299889d..55e03f0be4 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -13,15 +13,16 @@ dependencies:
- clang=16.0.1
- cmake>=3.23.1,!=3.25.0
- cuda-profiler-api=11.8.86
-- cuda-python>=11.7.1,<12.0
-- cudatoolkit=11.8
+- cuda-python>=11.7.1,<12.0a0
+- cuda-version=11.8
+- cudatoolkit
- cupy>=12.0.0
- cxx-compiler
- cython>=0.29,<0.30
-- dask-core==2023.3.2
-- dask-cuda==23.6.*
-- dask==2023.3.2
-- distributed==2023.3.2.1
+- dask-core==2023.7.1
+- dask-cuda==23.8.*
+- dask==2023.7.1
+- distributed==2023.7.1
- doxygen>=1.8.20
- gcc_linux-64=11.*
- gmock>=1.13.0
@@ -46,14 +47,14 @@ dependencies:
- pytest
- pytest-cov
- recommonmark
-- rmm==23.6.*
-- scikit-build>=0.13.1,<0.17.2
+- rmm==23.8.*
+- scikit-build>=0.13.1
- scikit-learn
- scipy
- sphinx-copybutton
- sphinx-markdown-tables
- sysroot_linux-64==2.17
- ucx-proc=*=gpu
-- ucx-py=0.32.*
+- ucx-py==0.33.*
- ucx>=1.13.0
name: all_cuda-118_arch-x86_64
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
new file mode 100644
index 0000000000..28d7dd0591
--- /dev/null
+++ b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -0,0 +1,56 @@
+# This file is generated by `rapids-dependency-file-generator`.
+# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+channels:
+- rapidsai
+- rapidsai-nightly
+- dask/label/dev
+- conda-forge
+- nvidia
+dependencies:
+- breathe
+- c-compiler
+- clang-tools=16.0.1
+- clang=16.0.1
+- cmake>=3.23.1,!=3.25.0
+- cuda-cudart-dev
+- cuda-profiler-api
+- cuda-python>=12.0,<13.0a0
+- cuda-version=12.0
+- cupy>=12.0.0
+- cxx-compiler
+- cython>=0.29,<0.30
+- dask-core==2023.7.1
+- dask-cuda==23.8.*
+- dask==2023.7.1
+- distributed==2023.7.1
+- doxygen>=1.8.20
+- gcc_linux-64=11.*
+- gmock>=1.13.0
+- graphviz
+- gtest>=1.13.0
+- ipython
+- joblib>=0.11
+- libcublas-dev
+- libcurand-dev
+- libcusolver-dev
+- libcusparse-dev
+- nccl>=2.9.9
+- ninja
+- numba>=0.57
+- numpy>=1.21
+- numpydoc
+- pydata-sphinx-theme
+- pytest
+- pytest-cov
+- recommonmark
+- rmm==23.8.*
+- scikit-build>=0.13.1
+- scikit-learn
+- scipy
+- sphinx-copybutton
+- sphinx-markdown-tables
+- sysroot_linux-64==2.17
+- ucx-proc=*=gpu
+- ucx-py==0.33.*
+- ucx>=1.13.0
+name: all_cuda-120_arch-x86_64
diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
index 3ea560025e..a982febeed 100644
--- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
@@ -12,7 +12,8 @@ dependencies:
- clang=16.0.1
- cmake>=3.23.1,!=3.25.0
- cuda-profiler-api=11.8.86
-- cudatoolkit=11.8
+- cuda-version=11.8
+- cudatoolkit
- cxx-compiler
- cython>=0.29,<0.30
- faiss-proc=*=cuda
@@ -29,9 +30,10 @@ dependencies:
- libcusparse-dev=11.7.5.86
- libcusparse=11.7.5.86
- libfaiss>=1.7.1
+- matplotlib
- nccl>=2.9.9
- ninja
- nlohmann_json>=3.11.2
-- scikit-build>=0.13.1,<0.17.2
+- scikit-build>=0.13.1
- sysroot_linux-64==2.17
name: bench_ann_cuda-118_arch-x86_64
diff --git a/conda/recipes/libraft/build_libraft_template.sh b/conda/recipes/libraft/build_libraft_template.sh
index 9759402884..bd7719af76 100644
--- a/conda/recipes/libraft/build_libraft_template.sh
+++ b/conda/recipes/libraft/build_libraft_template.sh
@@ -2,4 +2,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Just building template so we verify it uses libraft.so and fail if it doesn't build
-./build.sh template
\ No newline at end of file
+./build.sh template
diff --git a/conda/recipes/libraft/conda_build_config.yaml b/conda/recipes/libraft/conda_build_config.yaml
index bec773d26d..c8dcce90eb 100644
--- a/conda/recipes/libraft/conda_build_config.yaml
+++ b/conda/recipes/libraft/conda_build_config.yaml
@@ -5,6 +5,9 @@ cxx_compiler_version:
- 11
cuda_compiler:
+ - cuda-nvcc
+
+cuda11_compiler:
- nvcc
sysroot_version:
@@ -31,40 +34,40 @@ h5py_version:
nlohmann_json_version:
- ">=3.11.2"
-# The CTK libraries below are missing from the conda-forge::cudatoolkit
-# package. The "*_host_*" version specifiers correspond to `11.8` packages and the
-# "*_run_*" version specifiers correspond to `11.x` packages.
+# The CTK libraries below are missing from the conda-forge::cudatoolkit package
+# for CUDA 11. The "*_host_*" version specifiers correspond to `11.8` packages
+# and the "*_run_*" version specifiers correspond to `11.x` packages.
-libcublas_host_version:
+cuda11_libcublas_host_version:
- "=11.11.3.6"
-libcublas_run_version:
+cuda11_libcublas_run_version:
- ">=11.5.2.43,<12.0.0"
-libcurand_host_version:
+cuda11_libcurand_host_version:
- "=10.3.0.86"
-libcurand_run_version:
+cuda11_libcurand_run_version:
- ">=10.2.5.43,<10.3.1"
-libcusolver_host_version:
+cuda11_libcusolver_host_version:
- "=11.4.1.48"
-libcusolver_run_version:
+cuda11_libcusolver_run_version:
- ">=11.2.0.43,<11.4.2"
-libcusparse_host_version:
+cuda11_libcusparse_host_version:
- "=11.7.5.86"
-libcusparse_run_version:
+cuda11_libcusparse_run_version:
- ">=11.6.0.43,<12.0.0"
# `cuda-profiler-api` only has `11.8.0` and `12.0.0` packages for all
# architectures. The "*_host_*" version specifiers correspond to `11.8` packages and the
# "*_run_*" version specifiers correspond to `11.x` packages.
-cuda_profiler_api_host_version:
+cuda11_cuda_profiler_api_host_version:
- "=11.8.86"
-cuda_profiler_api_run_version:
+cuda11_cuda_profiler_api_run_version:
- ">=11.4.240,<12"
diff --git a/conda/recipes/libraft/meta.yaml b/conda/recipes/libraft/meta.yaml
index b89fcfb788..09ef7ae4ab 100644
--- a/conda/recipes/libraft/meta.yaml
+++ b/conda/recipes/libraft/meta.yaml
@@ -40,21 +40,34 @@ outputs:
number: {{ GIT_DESCRIBE_NUMBER }}
string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
ignore_run_exports_from:
- - {{ compiler('cuda') }}
+ {% if cuda_major == "11" %}
+ - {{ compiler('cuda11') }}
+ {% endif %}
- librmm
requirements:
build:
- {{ compiler('c') }}
- {{ compiler('cxx') }}
- - {{ compiler('cuda') }} {{ cuda_version }}
+ {% if cuda_major == "11" %}
+ - {{ compiler('cuda11') }} ={{ cuda_version }}
+ {% else %}
+ - {{ compiler('cuda') }}
+ {% endif %}
+ - cuda-version ={{ cuda_version }}
- cmake {{ cmake_version }}
- ninja
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
+ {% if cuda_major != "11" %}
+ - cuda-cudart-dev
+ {% endif %}
+ - cuda-version ={{ cuda_version }}
- librmm ={{ minor_version }}
- - cudatoolkit {{ cuda_version }}
run:
- - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
+ - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
+ {% if cuda_major == "11" %}
+ - cudatoolkit
+ {% endif %}
- librmm ={{ minor_version }}
about:
home: https://rapids.ai/
@@ -66,21 +79,36 @@ outputs:
number: {{ GIT_DESCRIBE_NUMBER }}
string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
ignore_run_exports_from:
- - {{ compiler('cuda') }}
+ {% if cuda_major == "11" %}
+ - {{ compiler('cuda11') }}
+ {% endif %}
- librmm
requirements:
+ host:
+ - cuda-version ={{ cuda_version }}
run:
- {{ pin_subpackage('libraft-headers-only', exact=True) }}
- - cuda-profiler-api {{ cuda_profiler_api_run_version }}
- librmm ={{ minor_version }}
- - libcublas {{ libcublas_run_version }}
- - libcublas-dev {{ libcublas_run_version }}
- - libcurand {{ libcurand_run_version }}
- - libcurand-dev {{ libcurand_run_version }}
- - libcusolver {{ libcusolver_run_version }}
- - libcusolver-dev {{ libcusolver_run_version }}
- - libcusparse {{ libcusparse_run_version }}
- - libcusparse-dev {{ libcusparse_run_version }}
+ - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
+ {% if cuda_major == "11" %}
+ - cudatoolkit
+ - cuda-profiler-api {{ cuda11_cuda_profiler_api_run_version }}
+ - libcublas {{ cuda11_libcublas_run_version }}
+ - libcublas-dev {{ cuda11_libcublas_run_version }}
+ - libcurand {{ cuda11_libcurand_run_version }}
+ - libcurand-dev {{ cuda11_libcurand_run_version }}
+ - libcusolver {{ cuda11_libcusolver_run_version }}
+ - libcusolver-dev {{ cuda11_libcusolver_run_version }}
+ - libcusparse {{ cuda11_libcusparse_run_version }}
+ - libcusparse-dev {{ cuda11_libcusparse_run_version }}
+ {% else %}
+ - cuda-cudart-dev
+ - cuda-profiler-api
+ - libcublas-dev
+ - libcurand-dev
+ - libcusolver-dev
+ - libcusparse-dev
+ {% endif %}
about:
home: https://rapids.ai/
license: Apache-2.0
@@ -93,29 +121,45 @@ outputs:
number: {{ GIT_DESCRIBE_NUMBER }}
string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
ignore_run_exports_from:
- - {{ compiler('cuda') }}
+ {% if cuda_major == "11" %}
+ - {{ compiler('cuda11') }}
+ {% endif %}
requirements:
build:
- {{ compiler('c') }}
- - {{ compiler('cuda') }} {{ cuda_version }}
- {{ compiler('cxx') }}
+ {% if cuda_major == "11" %}
+ - {{ compiler('cuda11') }} ={{ cuda_version }}
+ {% else %}
+ - {{ compiler('cuda') }}
+ {% endif %}
+ - cuda-version ={{ cuda_version }}
- cmake {{ cmake_version }}
- ninja
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
- {{ pin_subpackage('libraft-headers', exact=True) }}
- - cudatoolkit {{ cuda_version }}
- - cuda-profiler-api {{ cuda_profiler_api_host_version }}
- - libcublas {{ libcublas_host_version }}
- - libcublas-dev {{ libcublas_host_version }}
- - libcurand {{ libcurand_host_version }}
- - libcurand-dev {{ libcurand_host_version }}
- - libcusolver {{ libcusolver_host_version }}
- - libcusolver-dev {{ libcusolver_host_version }}
- - libcusparse {{ libcusparse_host_version }}
- - libcusparse-dev {{ libcusparse_host_version }}
+ - cuda-version ={{ cuda_version }}
+ {% if cuda_major == "11" %}
+ - cuda-profiler-api {{ cuda11_cuda_profiler_api_host_version }}
+ - libcublas {{ cuda11_libcublas_host_version }}
+ - libcublas-dev {{ cuda11_libcublas_host_version }}
+ - libcurand {{ cuda11_libcurand_host_version }}
+ - libcurand-dev {{ cuda11_libcurand_host_version }}
+ - libcusolver {{ cuda11_libcusolver_host_version }}
+ - libcusolver-dev {{ cuda11_libcusolver_host_version }}
+ - libcusparse {{ cuda11_libcusparse_host_version }}
+ - libcusparse-dev {{ cuda11_libcusparse_host_version }}
+ {% else %}
+ - cuda-profiler-api
+ - libcublas-dev
+ - libcurand-dev
+ - libcusolver-dev
+ - libcusparse-dev
+ {% endif %}
run:
- {{ pin_subpackage('libraft-headers', exact=True) }}
+ - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
about:
home: https://rapids.ai/
license: Apache-2.0
@@ -128,30 +172,50 @@ outputs:
number: {{ GIT_DESCRIBE_NUMBER }}
string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
ignore_run_exports_from:
- - {{ compiler('cuda') }}
+ {% if cuda_major == "11" %}
+ - {{ compiler('cuda11') }}
+ {% endif %}
requirements:
build:
- {{ compiler('c') }}
- - {{ compiler('cuda') }} {{ cuda_version }}
- {{ compiler('cxx') }}
+ {% if cuda_major == "11" %}
+ - {{ compiler('cuda11') }} ={{ cuda_version }}
+ {% else %}
+ - {{ compiler('cuda') }}
+ {% endif %}
+ - cuda-version ={{ cuda_version }}
- cmake {{ cmake_version }}
- ninja
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
- {{ pin_subpackage('libraft', exact=True) }}
- - cudatoolkit {{ cuda_version }}
- - cuda-profiler-api {{ cuda_profiler_api_host_version }}
+ - cuda-version ={{ cuda_version }}
+ {% if cuda_major == "11" %}
+ - cuda-profiler-api {{ cuda11_cuda_profiler_api_run_version }}
+ - libcublas {{ cuda11_libcublas_host_version }}
+ - libcublas-dev {{ cuda11_libcublas_host_version }}
+ - libcurand {{ cuda11_libcurand_host_version }}
+ - libcurand-dev {{ cuda11_libcurand_host_version }}
+ - libcusolver {{ cuda11_libcusolver_host_version }}
+ - libcusolver-dev {{ cuda11_libcusolver_host_version }}
+ - libcusparse {{ cuda11_libcusparse_host_version }}
+ - libcusparse-dev {{ cuda11_libcusparse_host_version }}
+ {% else %}
+ - cuda-cudart-dev
+ - cuda-profiler-api
+ - libcublas-dev
+ - libcurand-dev
+ - libcusolver-dev
+ - libcusparse-dev
+ {% endif %}
- gmock {{ gtest_version }}
- gtest {{ gtest_version }}
- - libcublas {{ libcublas_host_version }}
- - libcublas-dev {{ libcublas_host_version }}
- - libcurand {{ libcurand_host_version }}
- - libcurand-dev {{ libcurand_host_version }}
- - libcusolver {{ libcusolver_host_version }}
- - libcusolver-dev {{ libcusolver_host_version }}
- - libcusparse {{ libcusparse_host_version }}
- - libcusparse-dev {{ libcusparse_host_version }}
run:
+ - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
+ {% if cuda_major == "11" %}
+ - cudatoolkit
+ {% endif %}
- {{ pin_subpackage('libraft', exact=True) }}
- gmock {{ gtest_version }}
- gtest {{ gtest_version }}
@@ -167,20 +231,39 @@ outputs:
number: {{ GIT_DESCRIBE_NUMBER }}
string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
ignore_run_exports_from:
- - {{ compiler('cuda') }}
+ {% if cuda_major == "11" %}
+ - {{ compiler('cuda11') }}
+ {% endif %}
requirements:
build:
- {{ compiler('c') }}
- - {{ compiler('cuda') }} {{ cuda_version }}
- {{ compiler('cxx') }}
+ {% if cuda_major == "11" %}
+ - {{ compiler('cuda11') }} ={{ cuda_version }}
+ {% else %}
+ - {{ compiler('cuda') }}
+ {% endif %}
+ - cuda-version ={{ cuda_version }}
- cmake {{ cmake_version }}
- ninja
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
- {{ pin_subpackage('libraft', exact=True) }}
- - libcublas {{ libcublas_host_version }}
- - libcublas-dev {{ libcublas_host_version }}
+ - {{ pin_subpackage('libraft-headers', exact=True) }}
+ - cuda-version ={{ cuda_version }}
+ {% if cuda_major == "11" %}
+ - cuda-profiler-api {{ cuda11_cuda_profiler_api_run_version }}
+ - libcublas {{ cuda11_libcublas_host_version }}
+ - libcublas-dev {{ cuda11_libcublas_host_version }}
+ {% else %}
+ - cuda-profiler-api
+ - libcublas-dev
+ {% endif %}
run:
+ - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
+ {% if cuda_major == "11" %}
+ - cudatoolkit
+ {% endif %}
- {{ pin_subpackage('libraft', exact=True) }}
about:
home: https://rapids.ai/
@@ -194,29 +277,52 @@ outputs:
number: {{ GIT_DESCRIBE_NUMBER }}
string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
ignore_run_exports_from:
- - {{ compiler('cuda') }}
+ {% if cuda_major == "11" %}
+ - {{ compiler('cuda11') }}
+ {% endif %}
requirements:
build:
- {{ compiler('c') }}
- - {{ compiler('cuda') }} {{ cuda_version }}
- {{ compiler('cxx') }}
+ {% if cuda_major == "11" %}
+ - {{ compiler('cuda11') }} ={{ cuda_version }}
+ {% else %}
+ - {{ compiler('cuda') }}
+ {% endif %}
+ - cuda-version ={{ cuda_version }}
- cmake {{ cmake_version }}
- ninja
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
- {{ pin_subpackage('libraft', exact=True) }}
- - cudatoolkit {{ cuda_version }}
- - libcublas {{ libcublas_host_version }}
- - libcublas-dev {{ libcublas_host_version }}
+ - cuda-version ={{ cuda_version }}
+ {% if cuda_major == "11" %}
+ - cuda-profiler-api {{ cuda11_cuda_profiler_api_run_version }}
+ - libcublas {{ cuda11_libcublas_host_version }}
+ - libcublas-dev {{ cuda11_libcublas_host_version }}
+ {% else %}
+ - cuda-profiler-api
+ - libcublas-dev
+ {% endif %}
- glog {{ glog_version }}
- nlohmann_json {{ nlohmann_json_version }}
- - libfaiss>=1.7.1
+ # Temporarily ignore faiss benchmarks on CUDA 12 because packages do not exist yet
+ {% if cuda_major == "11" %}
- faiss-proc=*=cuda
+ - libfaiss {{ faiss_version }}
+ {% endif %}
run:
- {{ pin_subpackage('libraft', exact=True) }}
+ - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
+ {% if cuda_major == "11" %}
+ - cudatoolkit
+ {% endif %}
- glog {{ glog_version }}
+ # Temporarily ignore faiss benchmarks on CUDA 12 because packages do not exist yet
+ {% if cuda_major == "11" %}
- faiss-proc=*=cuda
- libfaiss {{ faiss_version }}
+ {% endif %}
- h5py {{ h5py_version }}
about:
home: https://rapids.ai/
diff --git a/conda/recipes/pylibraft/conda_build_config.yaml b/conda/recipes/pylibraft/conda_build_config.yaml
index add119d796..41bf15c12c 100644
--- a/conda/recipes/pylibraft/conda_build_config.yaml
+++ b/conda/recipes/pylibraft/conda_build_config.yaml
@@ -5,6 +5,9 @@ cxx_compiler_version:
- 11
cuda_compiler:
+ - cuda-nvcc
+
+cuda11_compiler:
- nvcc
sysroot_version:
diff --git a/conda/recipes/pylibraft/meta.yaml b/conda/recipes/pylibraft/meta.yaml
index 7730801801..7468039539 100644
--- a/conda/recipes/pylibraft/meta.yaml
+++ b/conda/recipes/pylibraft/meta.yaml
@@ -20,19 +20,31 @@ build:
number: {{ GIT_DESCRIBE_NUMBER }}
string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
ignore_run_exports_from:
- - {{ compiler('cuda') }}
+ {% if cuda_major == "11" %}
+ - {{ compiler('cuda11') }}
+ {% endif %}
requirements:
build:
- {{ compiler('c') }}
- {{ compiler('cxx') }}
- - {{ compiler('cuda') }} {{ cuda_version }}
+ {% if cuda_major == "11" %}
+ - {{ compiler('cuda11') }} ={{ cuda_version }}
+ {% else %}
+ - {{ compiler('cuda') }}
+ {% endif %}
+ - cuda-version ={{ cuda_version }}
- cmake {{ cmake_version }}
- ninja
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
- - cuda-python >=11.7.1,<12.0
- - cudatoolkit ={{ cuda_version }}
+ {% if cuda_major == "11" %}
+ - cuda-python >=11.7.1,<12.0a0
+ - cudatoolkit
+ {% else %}
+ - cuda-python >=12.0,<13.0a0
+ {% endif %}
+ - cuda-version ={{ cuda_version }}
- cython >=0.29,<0.30
- libraft {{ version }}
- libraft-headers {{ version }}
@@ -42,15 +54,18 @@ requirements:
- scikit-build >=0.13.1
- setuptools
run:
- - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
- - cuda-python >=11.7.1,<12.0
+ - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
+ {% if cuda_major == "11" %}
+ - cudatoolkit
+ {% endif %}
- libraft {{ version }}
- libraft-headers {{ version }}
- python x.x
+ - rmm ={{ minor_version }}
tests:
requirements:
- - cudatoolkit ={{ cuda_version }}
+ - cuda-version ={{ cuda_version }}
imports:
- pylibraft
diff --git a/conda/recipes/raft-dask/conda_build_config.yaml b/conda/recipes/raft-dask/conda_build_config.yaml
index 4f88728f4b..fb09c6d1f5 100644
--- a/conda/recipes/raft-dask/conda_build_config.yaml
+++ b/conda/recipes/raft-dask/conda_build_config.yaml
@@ -5,6 +5,9 @@ cxx_compiler_version:
- 11
cuda_compiler:
+ - cuda-nvcc
+
+cuda11_compiler:
- nvcc
sysroot_version:
@@ -14,7 +17,7 @@ ucx_version:
- ">=1.13.0,<1.15.0"
ucx_py_version:
- - "0.32.*"
+ - "0.33.*"
cmake_version:
- ">=3.23.1,!=3.25.0"
diff --git a/conda/recipes/raft-dask/meta.yaml b/conda/recipes/raft-dask/meta.yaml
index cd08deabfa..5f3ea8257f 100644
--- a/conda/recipes/raft-dask/meta.yaml
+++ b/conda/recipes/raft-dask/meta.yaml
@@ -20,19 +20,31 @@ build:
number: {{ GIT_DESCRIBE_NUMBER }}
string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
ignore_run_exports_from:
- - {{ compiler('cuda') }}
+ {% if cuda_major == "11" %}
+ - {{ compiler('cuda11') }}
+ {% endif %}
requirements:
build:
- {{ compiler('c') }}
- {{ compiler('cxx') }}
- - {{ compiler('cuda') }} {{ cuda_version }}
+ {% if cuda_major == "11" %}
+ - {{ compiler('cuda11') }} ={{ cuda_version }}
+ {% else %}
+ - {{ compiler('cuda') }}
+ {% endif %}
+ - cuda-version ={{ cuda_version }}
- cmake {{ cmake_version }}
- ninja
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
- - cuda-python >=11.7.1,<12.0
- - cudatoolkit ={{ cuda_version }}
+ {% if cuda_major == "11" %}
+ - cuda-python >=11.7.1,<12.0a0
+ - cudatoolkit
+ {% else %}
+ - cuda-python >=12.0,<13.0a0
+ {% endif %}
+ - cuda-version ={{ cuda_version }}
- cython >=0.29,<0.30
- nccl >=2.9.9
- pylibraft {{ version }}
@@ -44,12 +56,14 @@ requirements:
- ucx-proc=*=gpu
- ucx-py {{ ucx_py_version }}
run:
- - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
- - cuda-python >=11.7.1,<12.0
- - dask ==2023.3.2
- - dask-core ==2023.3.2
+ {% if cuda_major == "11" %}
+ - cudatoolkit
+ {% endif %}
+ - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
+ - dask ==2023.7.1
+ - dask-core ==2023.7.1
- dask-cuda ={{ minor_version }}
- - distributed ==2023.3.2.1
+ - distributed ==2023.7.1
- joblib >=0.11
- nccl >=2.9.9
- pylibraft {{ version }}
@@ -61,7 +75,7 @@ requirements:
tests:
requirements:
- - cudatoolkit ={{ cuda_version }}
+ - cuda-version ={{ cuda_version }}
imports:
- raft_dask
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 9f3031c6d2..7ee8293c5d 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -10,8 +10,8 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
-set(RAPIDS_VERSION "23.06")
-set(RAFT_VERSION "23.06.02")
+set(RAPIDS_VERSION "23.08")
+set(RAFT_VERSION "23.08.00")
cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
include(../fetch_rapids.cmake)
@@ -307,6 +307,30 @@ if(RAFT_COMPILE_LIBRARY)
src/neighbors/brute_force_knn_int64_t_float_uint32_t.cu
src/neighbors/brute_force_knn_int_float_int.cu
src/neighbors/brute_force_knn_uint32_t_float_uint32_t.cu
+ src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu
+ src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu
+ src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu
+ src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu
+ src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu
+ src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu
+ src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu
+ src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu
+ src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu
+ src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu
+ src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu
+ src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu
+ src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu
+ src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu
+ src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu
+ src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu
+ src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu
+ src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu
+ src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu
+ src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu
+ src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu
+ src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu
+ src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu
+ src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu
src/neighbors/detail/ivf_flat_interleaved_scan_float_float_int64_t.cu
src/neighbors/detail/ivf_flat_interleaved_scan_int8_t_int32_t_int64_t.cu
src/neighbors/detail/ivf_flat_interleaved_scan_uint8_t_uint32_t_int64_t.cu
@@ -318,6 +342,9 @@ if(RAFT_COMPILE_LIBRARY)
src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false.cu
src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true.cu
src/neighbors/detail/ivf_pq_compute_similarity_half_half.cu
+ src/neighbors/detail/refine_host_float_float.cpp
+ src/neighbors/detail/refine_host_int8_t_float.cpp
+ src/neighbors/detail/refine_host_uint8_t_float.cpp
src/neighbors/detail/selection_faiss_int32_t_float.cu
src/neighbors/detail/selection_faiss_int_double.cu
src/neighbors/detail/selection_faiss_long_float.cu
@@ -363,6 +390,9 @@ if(RAFT_COMPILE_LIBRARY)
src/raft_runtime/distance/pairwise_distance.cu
src/raft_runtime/matrix/select_k_float_int64_t.cu
src/raft_runtime/neighbors/brute_force_knn_int64_t_float.cu
+ src/raft_runtime/neighbors/cagra_build.cu
+ src/raft_runtime/neighbors/cagra_search.cu
+ src/raft_runtime/neighbors/cagra_serialize.cu
src/raft_runtime/neighbors/ivf_flat_build.cu
src/raft_runtime/neighbors/ivf_flat_search.cu
src/raft_runtime/neighbors/ivf_flat_serialize.cu
@@ -602,7 +632,9 @@ target_link_libraries(raft::raft INTERFACE
# Use `rapids_export` for 22.04 as it will have COMPONENT support
rapids_export(
INSTALL raft
- EXPORT_SET raft-exports COMPONENTS ${raft_components} COMPONENTS_EXPORT_SET ${raft_export_sets}
+ EXPORT_SET raft-exports
+ COMPONENTS ${raft_components}
+ COMPONENTS_EXPORT_SET ${raft_export_sets}
GLOBAL_TARGETS raft compiled distributed
NAMESPACE raft::
DOCUMENTATION doc_string
@@ -613,7 +645,9 @@ rapids_export(
# * build export -------------------------------------------------------------
rapids_export(
BUILD raft
- EXPORT_SET raft-exports COMPONENTS ${raft_components} COMPONENTS_EXPORT_SET ${raft_export_sets}
+ EXPORT_SET raft-exports
+ COMPONENTS ${raft_components}
+ COMPONENTS_EXPORT_SET ${raft_export_sets}
GLOBAL_TARGETS raft compiled distributed
DOCUMENTATION doc_string
NAMESPACE raft::
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index a14018a15d..6977d77684 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -18,14 +18,22 @@
option(RAFT_ANN_BENCH_USE_FAISS_BFKNN "Include faiss' brute-force knn algorithm in benchmark" ON)
option(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT "Include faiss' ivf flat algorithm in benchmark" ON)
option(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ "Include faiss' ivf pq algorithm in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_RAFT_BFKNN "Include raft's brute-force knn algorithm in benchmark" ON)
option(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT "Include raft's ivf flat algorithm in benchmark" ON)
option(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ "Include raft's ivf pq algorithm in benchmark" ON)
+option(RAFT_ANN_BENCH_USE_RAFT_CAGRA "Include raft's CAGRA in benchmark" ON)
option(RAFT_ANN_BENCH_USE_HNSWLIB "Include hnsw algorithm in benchmark" ON)
option(RAFT_ANN_BENCH_USE_GGNN "Include ggnn algorithm in benchmark" ON)
find_package(Threads REQUIRED)
+# Disable faiss benchmarks on CUDA 12 since faiss is not yet CUDA 12-enabled.
+# https://github.com/rapidsai/raft/issues/1627
+if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0.0)
+ set(RAFT_ANN_BENCH_USE_FAISS_BFKNN OFF)
+ set(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT OFF)
+ set(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ OFF)
+endif()
+
set(RAFT_ANN_BENCH_USE_FAISS OFF)
if(RAFT_ANN_BENCH_USE_FAISS_BFKNN
OR RAFT_ANN_BENCH_USE_FAISS_IVFPQ
@@ -35,9 +43,9 @@ if(RAFT_ANN_BENCH_USE_FAISS_BFKNN
endif()
set(RAFT_ANN_BENCH_USE_RAFT OFF)
-if(RAFT_ANN_BENCH_USE_RAFT_BFKNN
- OR RAFT_ANN_BENCH_USE_RAFT_IVFPQ
- OR RAFT_ANN_BENCH_USE_RAFT_IVFFLAT
+if(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ
+ OR RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT
+ OR RAFT_ANN_BENCH_USE_RAFT_CAGRA
)
set(RAFT_ANN_BENCH_USE_RAFT ON)
endif()
@@ -133,25 +141,58 @@ if(RAFT_ANN_BENCH_USE_HNSWLIB)
)
endif()
-if(RAFT_ANN_BENCH_USE_RAFT)
+if(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ)
ConfigureAnnBench(
NAME
RAFT_IVF_PQ
PATH
bench/ann/src/raft/raft_benchmark.cu
$<$:bench/ann/src/raft/raft_ivf_pq.cu>
+ LINKS
+ raft::compiled
+ )
+endif()
+
+if(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT)
+ ConfigureAnnBench(
+ NAME
+ RAFT_IVF_FLAT
+ PATH
+ bench/ann/src/raft/raft_benchmark.cu
$<$:bench/ann/src/raft/raft_ivf_flat.cu>
LINKS
raft::compiled
)
endif()
-if(RAFT_ANN_BENCH_USE_FAISS)
+if(RAFT_ANN_BENCH_USE_RAFT_CAGRA)
+ ConfigureAnnBench(
+ NAME
+ RAFT_CAGRA
+ PATH
+ bench/ann/src/raft/raft_benchmark.cu
+ $<$:bench/ann/src/raft/raft_cagra.cu>
+ LINKS
+ raft::compiled
+ )
+endif()
+
+if(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT)
ConfigureAnnBench(
NAME FAISS_IVF_FLAT PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss
)
endif()
+if(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ)
+ ConfigureAnnBench(
+ NAME FAISS_IVF_PQ PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss
+ )
+endif()
+
+if(RAFT_ANN_BENCH_USE_FAISS_BFKNN)
+ ConfigureAnnBench(NAME FAISS_BFKNN PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss)
+endif()
+
if(RAFT_ANN_BENCH_USE_GGNN)
include(cmake/thirdparty/get_glog.cmake)
ConfigureAnnBench(
diff --git a/cpp/bench/ann/conf/bigann-100M.json b/cpp/bench/ann/conf/bigann-100M.json
index 5f16f3378d..0ff7df4776 100644
--- a/cpp/bench/ann/conf/bigann-100M.json
+++ b/cpp/bench/ann/conf/bigann-100M.json
@@ -168,7 +168,35 @@
"search_result_file" : "result/bigann-100M/ivf_flat/nlist100K"
},
+ {
+ "name" : "cagra.dim32",
+ "algo" : "cagra",
+ "build_param": {
+ "index_dim" : 32
+ },
+ "file" : "index/bigann-100M/cagra/dim32",
+ "search_params" : [
+ "itopk": 32,
+ "itopk": 64,
+ "itopk": 128
+ ],
+ "search_result_file" : "result/bigann-100M/cagra/dim32"
+ },
+ {
+ "name" : "cagra.dim64",
+ "algo" : "cagra",
+ "build_param": {
+ "index_dim" : 64
+ },
+ "file" : "index/bigann-100M/cagra/dim64",
+ "search_params" : [
+ "itopk": 32,
+ "itopk": 64,
+ "itopk": 128
+ ],
+ "search_result_file" : "result/bigann-100M/cagra/dim64"
+ }
]
}
diff --git a/cpp/bench/ann/conf/deep-100M.json b/cpp/bench/ann/conf/deep-100M.json
index b3a945d50e..97d670b614 100644
--- a/cpp/bench/ann/conf/deep-100M.json
+++ b/cpp/bench/ann/conf/deep-100M.json
@@ -218,6 +218,328 @@
"search_result_file" : "result/deep-100M/ivf_flat/nlist100K"
},
-
+ {
+ "name" : "cagra.dim32",
+ "algo" : "raft_cagra",
+ "build_param": {
+ "index_dim": 32,
+ "intermediate_graph_degree": 48
+ },
+ "file": "index/deep-100M/cagra/dim32",
+ "search_params": [
+ {
+ "itopk": 32,
+ "search_width": 1,
+ "max_iterations": 0,
+ "algo": "single_cta"
+ },
+ {
+ "itopk": 32,
+ "search_width": 1,
+ "max_iterations": 32,
+ "algo": "single_cta"
+ },
+ {
+ "itopk": 64,
+ "search_width": 4,
+ "max_iterations": 16,
+ "algo": "single_cta"
+ },
+ {
+ "itopk": 64,
+ "search_width": 1,
+ "max_iterations": 64,
+ "algo": "single_cta"
+ },
+ {
+ "itopk": 96,
+ "search_width": 2,
+ "max_iterations": 48,
+ "algo": "single_cta"
+ },
+ {
+ "itopk": 128,
+ "search_width": 8,
+ "max_iterations": 16,
+ "algo": "single_cta"
+ },
+ {
+ "itopk": 128,
+ "search_width": 2,
+ "max_iterations": 64,
+ "algo": "single_cta"
+ },
+ {
+ "itopk": 192,
+ "search_width": 8,
+ "max_iterations": 24,
+ "algo": "single_cta"
+ },
+ {
+ "itopk": 192,
+ "search_width": 2,
+ "max_iterations": 96,
+ "algo": "single_cta"
+ },
+ {
+ "itopk": 256,
+ "search_width": 8,
+ "max_iterations": 32,
+ "algo": "single_cta"
+ },
+ {
+ "itopk": 384,
+ "search_width": 8,
+ "max_iterations": 48,
+ "algo": "single_cta"
+ },
+ {
+ "itopk": 512,
+ "search_width": 8,
+ "max_iterations": 64,
+ "algo": "single_cta"
+ }
+ ],
+ "search_result_file": "result/deep-100M/cagra/dim32"
+ },
+ {
+ "name": "cagra.dim32.multi_cta",
+ "algo": "raft_cagra",
+ "build_param": {
+ "index_dim": 32,
+ "intermediate_graph_degree": 48
+ },
+ "file": "index/deep-100M/cagra/dim32",
+ "search_params": [
+ {
+ "itopk": 32,
+ "search_width": 1,
+ "max_iterations": 0,
+ "algo": "multi_cta"
+ },
+ {
+ "itopk": 32,
+ "search_width": 1,
+ "max_iterations": 32,
+ "algo": "multi_cta"
+ },
+ {
+ "itopk": 64,
+ "search_width": 4,
+ "max_iterations": 16,
+ "algo": "multi_cta"
+ },
+ {
+ "itopk": 64,
+ "search_width": 1,
+ "max_iterations": 64,
+ "algo": "multi_cta"
+ },
+ {
+ "itopk": 96,
+ "search_width": 2,
+ "max_iterations": 48,
+ "algo": "multi_cta"
+ },
+ {
+ "itopk": 128,
+ "search_width": 8,
+ "max_iterations": 16,
+ "algo": "multi_cta"
+ },
+ {
+ "itopk": 128,
+ "search_width": 2,
+ "max_iterations": 64,
+ "algo": "multi_cta"
+ },
+ {
+ "itopk": 192,
+ "search_width": 8,
+ "max_iterations": 24,
+ "algo": "multi_cta"
+ },
+ {
+ "itopk": 192,
+ "search_width": 2,
+ "max_iterations": 96,
+ "algo": "multi_cta"
+ },
+ {
+ "itopk": 256,
+ "search_width": 8,
+ "max_iterations": 32,
+ "algo": "multi_cta"
+ },
+ {
+ "itopk": 384,
+ "search_width": 8,
+ "max_iterations": 48,
+ "algo": "multi_cta"
+ },
+ {
+ "itopk": 512,
+ "search_width": 8,
+ "max_iterations": 64,
+ "algo": "multi_cta"
+ }
+ ],
+ "search_result_file": "result/deep-100M/cagra/dim32_multi_cta"
+ },
+ {
+ "name": "cagra.dim32.multi_kernel",
+ "algo": "raft_cagra",
+ "build_param": {
+ "index_dim": 32,
+ "intermediate_graph_degree": 48
+ },
+ "file": "index/deep-100M/cagra/dim32",
+ "search_params": [
+ {
+ "itopk": 32,
+ "search_width": 1,
+ "max_iterations": 0,
+ "algo": "multi_kernel"
+ },
+ {
+ "itopk": 32,
+ "search_width": 1,
+ "max_iterations": 32,
+ "algo": "multi_kernel"
+ },
+ {
+ "itopk": 64,
+ "search_width": 4,
+ "max_iterations": 16,
+ "algo": "multi_kernel"
+ },
+ {
+ "itopk": 64,
+ "search_width": 1,
+ "max_iterations": 64,
+ "algo": "multi_kernel"
+ },
+ {
+ "itopk": 96,
+ "search_width": 2,
+ "max_iterations": 48,
+ "algo": "multi_kernel"
+ },
+ {
+ "itopk": 128,
+ "search_width": 8,
+ "max_iterations": 16,
+ "algo": "multi_kernel"
+ },
+ {
+ "itopk": 128,
+ "search_width": 2,
+ "max_iterations": 64,
+ "algo": "multi_kernel"
+ },
+ {
+ "itopk": 192,
+ "search_width": 8,
+ "max_iterations": 24,
+ "algo": "multi_kernel"
+ },
+ {
+ "itopk": 192,
+ "search_width": 2,
+ "max_iterations": 96,
+ "algo": "multi_kernel"
+ },
+ {
+ "itopk": 256,
+ "search_width": 8,
+ "max_iterations": 32,
+ "algo": "multi_kernel"
+ },
+ {
+ "itopk": 384,
+ "search_width": 8,
+ "max_iterations": 48,
+ "algo": "multi_kernel"
+ },
+ {
+ "itopk": 512,
+ "search_width": 8,
+ "max_iterations": 64,
+ "algo": "multi_kernel"
+ }
+ ],
+ "search_result_file": "result/deep-100M/cagra/dim32_multi_kernel"
+ },
+ {
+ "name": "cagra.dim64",
+ "algo": "raft_cagra",
+ "build_param": {
+ "index_dim": 64
+ },
+ "file": "index/deep-100M/cagra/dim64",
+ "search_params" : [
+ {
+ "itopk": 32,
+ "search_width": 1,
+ "max_iterations": 0
+ },
+ {
+ "itopk": 32,
+ "search_width": 1,
+ "max_iterations": 32
+ },
+ {
+ "itopk": 64,
+ "search_width": 4,
+ "max_iterations": 16
+ },
+ {
+ "itopk": 64,
+ "search_width": 1,
+ "max_iterations": 64
+ },
+ {
+ "itopk": 96,
+ "search_width": 2,
+ "max_iterations": 48
+ },
+ {
+ "itopk": 128,
+ "search_width": 8,
+ "max_iterations": 16
+ },
+ {
+ "itopk": 128,
+ "search_width": 2,
+ "max_iterations": 64
+ },
+ {
+ "itopk": 192,
+ "search_width": 8,
+ "max_iterations": 24
+ },
+ {
+ "itopk": 192,
+ "search_width": 2,
+ "max_iterations": 96
+ },
+ {
+ "itopk": 256,
+ "search_width": 8,
+ "max_iterations": 32
+ },
+ {
+ "itopk": 384,
+ "search_width": 8,
+ "max_iterations": 48
+ },
+ {
+ "itopk": 512,
+ "search_width": 8,
+ "max_iterations": 64
+ }
+ ],
+ "search_result_file" : "result/deep-100M/cagra/dim32"
+ }
]
}
diff --git a/cpp/bench/ann/conf/glove-100-inner.json b/cpp/bench/ann/conf/glove-100-inner.json
index d210aca654..5d0bbf970c 100644
--- a/cpp/bench/ann/conf/glove-100-inner.json
+++ b/cpp/bench/ann/conf/glove-100-inner.json
@@ -789,9 +789,5 @@
],
"search_result_file" : "result/glove-100-inner/ggnn/kbuild96-segment64-refine2-k10"
- },
-
-
- ]
-
+ }]
}
diff --git a/cpp/bench/ann/conf/sift-128-euclidean.json b/cpp/bench/ann/conf/sift-128-euclidean.json
index 476c363ecd..98983fd62e 100644
--- a/cpp/bench/ann/conf/sift-128-euclidean.json
+++ b/cpp/bench/ann/conf/sift-128-euclidean.json
@@ -90,8 +90,8 @@
-
- {
+
+ {
"name": "raft_bfknn",
"algo": "raft_bfknn",
"build_param": {},
@@ -1316,6 +1316,36 @@
}
],
"search_result_file": "result/sift-128-euclidean/raft_ivf_flat/nlist16384"
+ },
+
+ {
+ "name" : "cagra.dim32",
+ "algo" : "raft_cagra",
+ "build_param": {
+ "index_dim" : 32
+ },
+ "file" : "index/sift-128-euclidean/cagra/dim32",
+ "search_params" : [
+ {"itopk": 32},
+ {"itopk": 64},
+ {"itopk": 128}
+ ],
+ "search_result_file" : "result/sift-128-euclidean/cagra/dim32"
+ },
+
+ {
+ "name" : "cagra.dim64",
+ "algo" : "raft_cagra",
+ "build_param": {
+ "index_dim" : 64
+ },
+ "file" : "index/sift-128-euclidean/cagra/dim64",
+ "search_params" : [
+ {"itopk": 32},
+ {"itopk": 64},
+ {"itopk": 128}
+ ],
+ "search_result_file" : "result/sift-128-euclidean/cagra/dim64"
}
]
}
diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp
index c34b95010f..28df4640ee 100644
--- a/cpp/bench/ann/src/common/benchmark.hpp
+++ b/cpp/bench/ann/src/common/benchmark.hpp
@@ -30,6 +30,8 @@
#include
#include
+#include
+
#include "benchmark_util.hpp"
#include "conf.h"
#include "dataset.h"
@@ -108,8 +110,8 @@ inline bool mkdir(const std::vector& dirs)
}
inline bool check(const std::vector& indices,
- bool build_mode,
- bool force_overwrite)
+ const bool build_mode,
+ const bool force_overwrite)
{
std::vector files_should_exist;
std::vector dirs_should_exist;
@@ -119,7 +121,7 @@ inline bool check(const std::vector& indices,
output_files.push_back(index.file);
output_files.push_back(index.file + ".txt");
- auto pos = index.file.rfind('/');
+ const auto pos = index.file.rfind('/');
if (pos != std::string::npos) { dirs_should_exist.push_back(index.file.substr(0, pos)); }
} else {
files_should_exist.push_back(index.file);
@@ -128,7 +130,7 @@ inline bool check(const std::vector& indices,
output_files.push_back(index.search_result_file + ".0.ibin");
output_files.push_back(index.search_result_file + ".0.txt");
- auto pos = index.search_result_file.rfind('/');
+ const auto pos = index.search_result_file.rfind('/');
if (pos != std::string::npos) {
dirs_should_exist.push_back(index.search_result_file.substr(0, pos));
}
@@ -149,7 +151,7 @@ inline void write_build_info(const std::string& file_prefix,
const std::string& name,
const std::string& algo,
const std::string& build_param,
- float build_time)
+ const float build_time)
{
std::ofstream ofs(file_prefix + ".txt");
if (!ofs) { throw std::runtime_error("can't open build info file: " + file_prefix + ".txt"); }
@@ -175,13 +177,13 @@ void build(const Dataset* dataset, const std::vector& i
for (const auto& index : indices) {
log_info("creating algo '%s', param=%s", index.algo.c_str(), index.build_param.dump().c_str());
- auto algo = create_algo(index.algo,
- dataset->distance(),
- dataset->dim(),
- index.refine_ratio,
- index.build_param,
- index.dev_list);
- auto algo_property = algo->get_property();
+ const auto algo = create_algo(index.algo,
+ dataset->distance(),
+ dataset->dim(),
+ index.refine_ratio,
+ index.build_param,
+ index.dev_list);
+ const auto algo_property = algo->get_property();
const T* base_set_ptr = nullptr;
if (algo_property.dataset_memory_type == MemoryType::Host) {
@@ -203,7 +205,7 @@ void build(const Dataset* dataset, const std::vector& i
Timer timer;
algo->build(base_set_ptr, dataset->base_set_size(), stream);
RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
- float elapsed_ms = timer.elapsed_ms();
+ const float elapsed_ms = timer.elapsed_ms();
#ifdef NVTX
nvtxRangePop();
#endif
@@ -232,15 +234,17 @@ inline void write_search_result(const std::string& file_prefix,
const std::string& algo,
const std::string& build_param,
const std::string& search_param,
- int batch_size,
- int run_count,
- int k,
+ std::size_t batch_size,
+ unsigned run_count,
+ unsigned k,
float search_time_average,
float search_time_p99,
float search_time_p999,
+ float query_per_second,
const int* neighbors,
size_t query_set_size)
{
+ log_info("throughput : %e [QPS]", query_per_second);
std::ofstream ofs(file_prefix + ".txt");
if (!ofs) { throw std::runtime_error("can't open search result file: " + file_prefix + ".txt"); }
ofs << "dataset: " << dataset << "\n"
@@ -254,13 +258,16 @@ inline void write_search_result(const std::string& file_prefix,
<< "batch_size: " << batch_size << "\n"
<< "run_count: " << run_count << "\n"
<< "k: " << k << "\n"
+ << "query_per_second: " << query_per_second << "\n"
<< "average_search_time: " << search_time_average << endl;
+
if (search_time_p99 != std::numeric_limits::max()) {
ofs << "p99_search_time: " << search_time_p99 << endl;
}
if (search_time_p999 != std::numeric_limits::max()) {
ofs << "p999_search_time: " << search_time_p999 << endl;
}
+
ofs.close();
if (!ofs) {
throw std::runtime_error("can't write to search result file: " + file_prefix + ".txt");
@@ -280,15 +287,15 @@ inline void search(const Dataset* dataset, const std::vectorname().c_str(),
dataset->query_set_size());
- const T* query_set = dataset->query_set();
+ const T* const query_set = dataset->query_set();
// query set is usually much smaller than base set, so load it eagerly
- const T* d_query_set = dataset->query_set_on_gpu();
- size_t query_set_size = dataset->query_set_size();
+ const T* const d_query_set = dataset->query_set_on_gpu();
+ const size_t query_set_size = dataset->query_set_size();
// currently all indices has same batch_size, k and run_count
- const int batch_size = indices[0].batch_size;
- const int k = indices[0].k;
- const int run_count = indices[0].run_count;
+ const std::size_t batch_size = indices[0].batch_size;
+ const unsigned k = indices[0].k;
+ const unsigned run_count = indices[0].run_count;
log_info(
"basic search parameters: batch_size = %d, k = %d, run_count = %d", batch_size, k, run_count);
if (query_set_size % batch_size != 0) {
@@ -297,10 +304,10 @@ inline void search(const Dataset* dataset, const std::vector search_times;
search_times.reserve(num_batches);
std::size_t* d_neighbors;
@@ -310,13 +317,13 @@ inline void search(const Dataset* dataset, const std::vector(index.algo,
- dataset->distance(),
- dataset->dim(),
- index.refine_ratio,
- index.build_param,
- index.dev_list);
- auto algo_property = algo->get_property();
+ const auto algo = create_algo(index.algo,
+ dataset->distance(),
+ dataset->dim(),
+ index.refine_ratio,
+ index.build_param,
+ index.dev_list);
+ const auto algo_property = algo->get_property();
log_info("loading index '%s' from file '%s'", index.name.c_str(), index.file.c_str());
algo->load(index.file);
@@ -349,7 +356,7 @@ inline void search(const Dataset* dataset, const std::vector(index.algo, index.search_params[i]);
+ const auto p_param = create_search_param(index.algo, index.search_params[i]);
algo->set_search_param(*p_param);
log_info("search with param: %s", index.search_params[i].dump().c_str());
@@ -364,11 +371,13 @@ inline void search(const Dataset* dataset, const std::vector::max();
float best_search_time_p99 = std::numeric_limits::max();
float best_search_time_p999 = std::numeric_limits::max();
- for (int run = 0; run < run_count; ++run) {
+ float total_search_time = 0;
+ for (unsigned run = 0; run < run_count; ++run) {
log_info("run %d / %d", run + 1, run_count);
for (std::size_t batch_id = 0; batch_id < num_batches; ++batch_id) {
- std::size_t row = batch_id * batch_size;
- int actual_batch_size = (batch_id == num_batches - 1) ? query_set_size - row : batch_size;
+ const std::size_t row = batch_id * batch_size;
+ const std::size_t actual_batch_size =
+ (batch_id == num_batches - 1) ? query_set_size - row : batch_size;
RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
#ifdef NVTX
string nvtx_label = "batch" + to_string(batch_id);
@@ -389,7 +398,7 @@ inline void search(const Dataset* dataset, const std::vector* dataset, const std::vector= 100) {
std::sort(search_times.begin(), search_times.end());
- auto calc_percentile_pos = [](float percentile, size_t N) {
+ const auto calc_percentile_pos = [](float percentile, size_t N) {
return static_cast(std::ceil(percentile / 100.0 * N)) - 1;
};
- float search_time_p99 = search_times[calc_percentile_pos(99, search_times.size())];
- best_search_time_p99 = std::min(best_search_time_p99, search_time_p99);
+ const float search_time_p99 = search_times[calc_percentile_pos(99, search_times.size())];
+ best_search_time_p99 = std::min(best_search_time_p99, search_time_p99);
if (search_times.size() >= 1000) {
- float search_time_p999 = search_times[calc_percentile_pos(99.9, search_times.size())];
- best_search_time_p999 = std::min(best_search_time_p999, search_time_p999);
+ const float search_time_p999 =
+ search_times[calc_percentile_pos(99.9, search_times.size())];
+ best_search_time_p999 = std::min(best_search_time_p999, search_time_p999);
}
}
search_times.clear();
}
RAFT_CUDA_TRY(cudaDeviceSynchronize());
RAFT_CUDA_TRY(cudaPeekAtLastError());
+ const auto query_per_second =
+ (run_count * raft::round_down_safe(query_set_size, batch_size)) / total_search_time;
if (algo_property.query_memory_type == MemoryType::Device) {
RAFT_CUDA_TRY(cudaMemcpy(neighbors,
@@ -436,7 +450,7 @@ inline void search(const Dataset* dataset, const std::vector* dataset, const std::vector
-inline int dispatch_benchmark(Configuration& conf,
- std::string& index_patterns,
+inline int dispatch_benchmark(const Configuration& conf,
+ const std::string& index_patterns,
bool force_overwrite,
bool only_check,
bool build_mode,
bool search_mode)
{
try {
- auto dataset_conf = conf.get_dataset_conf();
+ const auto dataset_conf = conf.get_dataset_conf();
BinDataset dataset(dataset_conf.name,
dataset_conf.base_file,
diff --git a/cpp/bench/ann/src/common/conf.cpp b/cpp/bench/ann/src/common/conf.cpp
index f690f68783..d180f37973 100644
--- a/cpp/bench/ann/src/common/conf.cpp
+++ b/cpp/bench/ann/src/common/conf.cpp
@@ -78,7 +78,7 @@ void Configuration::parse_dataset_(const nlohmann::json& conf)
} else if (!filename.compare(filename.size() - 5, 5, "i8bin")) {
dataset_conf_.dtype = "int8";
} else {
- log_error("Could not determine data type of the dataset");
+ log_error("Could not determine data type of the dataset %s", filename.c_str());
}
}
}
diff --git a/cpp/bench/ann/src/common/dataset.h b/cpp/bench/ann/src/common/dataset.h
index 46dd66d649..ae05cd02a1 100644
--- a/cpp/bench/ann/src/common/dataset.h
+++ b/cpp/bench/ann/src/common/dataset.h
@@ -14,21 +14,27 @@
* limitations under the License.
*/
#pragma once
+
+#include
+
+#ifndef CPU_ONLY
#include
+#include
+#else
+typedef uint16_t half;
+#endif
+
#include
#include
#include
#include
-#include
#include
#include
#include
#include
#include
-#include
-
namespace raft::bench::ann {
// http://big-ann-benchmarks.com/index.html:
@@ -46,13 +52,17 @@ class BinFile {
const std::string& mode,
uint32_t subset_first_row = 0,
uint32_t subset_size = 0);
- ~BinFile() { fclose(fp_); }
+ ~BinFile()
+ {
+ if (fp_) { fclose(fp_); }
+ }
BinFile(const BinFile&) = delete;
BinFile& operator=(const BinFile&) = delete;
- void get_shape(size_t* nrows, int* ndims)
+ void get_shape(size_t* nrows, int* ndims) const
{
assert(read_mode_);
+ if (!fp_) { open_file_(); }
*nrows = nrows_;
*ndims = ndims_;
}
@@ -60,6 +70,7 @@ class BinFile {
void read(T* data) const
{
assert(read_mode_);
+ if (!fp_) { open_file_(); }
size_t total = static_cast(nrows_) * ndims_;
if (fread(data, sizeof(T), total, fp_) != total) {
throw std::runtime_error("fread() BinFile " + file_ + " failed");
@@ -69,6 +80,7 @@ class BinFile {
void write(const T* data, uint32_t nrows, uint32_t ndims)
{
assert(!read_mode_);
+ if (!fp_) { open_file_(); }
if (fwrite(&nrows, sizeof(uint32_t), 1, fp_) != 1) {
throw std::runtime_error("fwrite() BinFile " + file_ + " failed");
}
@@ -82,34 +94,41 @@ class BinFile {
}
}
- void* map() const
+ T* map() const
{
assert(read_mode_);
- int fid = fileno(fp_);
- auto mmap_ptr = mmap(NULL, file_size_, PROT_READ, MAP_PRIVATE, fid, 0);
- if (mmap_ptr == MAP_FAILED) {
+ if (!fp_) { open_file_(); }
+ int fid = fileno(fp_);
+ mapped_ptr_ = mmap(nullptr, file_size_, PROT_READ, MAP_PRIVATE, fid, 0);
+ if (mapped_ptr_ == MAP_FAILED) {
throw std::runtime_error("mmap error: Value of errno " + std::to_string(errno) + ", " +
std::string(strerror(errno)));
}
- return mmap_ptr;
+ return reinterpret_cast(reinterpret_cast(mapped_ptr_) + 2 * sizeof(uint32_t) +
+ subset_first_row_ * ndims_ * sizeof(T));
}
- void unmap(void* data) const
+ void unmap() const
{
- if (munmap(data, file_size_) == -1) {
+ if (munmap(mapped_ptr_, file_size_) == -1) {
throw std::runtime_error("munmap error: " + std::string(strerror(errno)));
}
}
private:
void check_suffix_();
+ void open_file_() const;
std::string file_;
- FILE* fp_;
bool read_mode_;
- uint32_t nrows_;
- uint32_t ndims_;
- size_t file_size_;
+ uint32_t subset_first_row_;
+ uint32_t subset_size_;
+
+ mutable FILE* fp_;
+ mutable uint32_t nrows_;
+ mutable uint32_t ndims_;
+ mutable size_t file_size_;
+ mutable void* mapped_ptr_;
};
template
@@ -117,23 +136,32 @@ BinFile::BinFile(const std::string& file,
const std::string& mode,
uint32_t subset_first_row,
uint32_t subset_size)
- : file_(file)
+ : file_(file),
+ read_mode_(mode == "r"),
+ subset_first_row_(subset_first_row),
+ subset_size_(subset_size),
+ fp_(nullptr)
{
check_suffix_();
- if (mode == "r") {
- read_mode_ = true;
- } else if (mode == "w") {
- read_mode_ = false;
- if (subset_first_row != 0) {
- throw std::runtime_error("subset_first_row should be zero for write mode");
+ if (!read_mode_) {
+ if (mode == "w") {
+ if (subset_first_row != 0) {
+ throw std::runtime_error("subset_first_row should be zero for write mode");
+ }
+ if (subset_size != 0) {
+ throw std::runtime_error("subset_size should be zero for write mode");
+ }
+ } else {
+ throw std::runtime_error("BinFile's mode must be either 'r' or 'w': " + file_);
}
- if (subset_size != 0) { throw std::runtime_error("subset_size should be zero for write mode"); }
- } else {
- throw std::runtime_error("BinFile's mode must be either 'r' or 'w': " + file_);
}
+}
- fp_ = fopen(file_.c_str(), mode.c_str());
+template
+void BinFile::open_file_() const
+{
+ fp_ = fopen(file_.c_str(), read_mode_ ? "r" : "w");
if (!fp_) { throw std::runtime_error("open BinFile failed: " + file_); }
if (read_mode_) {
@@ -156,24 +184,24 @@ BinFile::BinFile(const std::string& file,
std::to_string(file_size_));
}
- if (subset_first_row >= nrows_) {
- throw std::runtime_error(file_ + ": subset_first_row (" + std::to_string(subset_first_row) +
+ if (subset_first_row_ >= nrows_) {
+ throw std::runtime_error(file_ + ": subset_first_row (" + std::to_string(subset_first_row_) +
") >= nrows (" + std::to_string(nrows_) + ")");
}
- if (subset_first_row + subset_size > nrows_) {
- throw std::runtime_error(file_ + ": subset_first_row (" + std::to_string(subset_first_row) +
- ") + subset_size (" + std::to_string(subset_size) + ") > nrows (" +
+ if (subset_first_row_ + subset_size_ > nrows_) {
+ throw std::runtime_error(file_ + ": subset_first_row (" + std::to_string(subset_first_row_) +
+ ") + subset_size (" + std::to_string(subset_size_) + ") > nrows (" +
std::to_string(nrows_) + ")");
}
- if (subset_first_row) {
+ if (subset_first_row_) {
static_assert(sizeof(long) == 8, "fseek() don't support 64-bit offset");
- if (fseek(fp_, sizeof(T) * subset_first_row * ndims_, SEEK_CUR) == -1) {
+ if (fseek(fp_, sizeof(T) * subset_first_row_ * ndims_, SEEK_CUR) == -1) {
throw std::runtime_error(file_ + ": fseek failed");
}
- nrows_ -= subset_first_row;
+ nrows_ -= subset_first_row_;
}
- if (subset_size) { nrows_ = subset_size; }
+ if (subset_size_) { nrows_ = subset_size_; }
}
}
@@ -225,9 +253,9 @@ class Dataset {
std::string name() const { return name_; }
std::string distance() const { return distance_; }
- int dim() const { return dim_; }
- size_t base_set_size() const { return base_set_size_; }
- size_t query_set_size() const { return query_set_size_; }
+ virtual int dim() const = 0;
+ virtual size_t base_set_size() const = 0;
+ virtual size_t query_set_size() const = 0;
// load data lazily, so don't pay the overhead of reading unneeded set
// e.g. don't load base set when searching
@@ -254,9 +282,6 @@ class Dataset {
std::string name_;
std::string distance_;
- int dim_;
- size_t base_set_size_;
- size_t query_set_size_;
mutable T* base_set_ = nullptr;
mutable T* query_set_ = nullptr;
@@ -270,31 +295,37 @@ Dataset::~Dataset()
{
delete[] base_set_;
delete[] query_set_;
- if (d_base_set_) { RAFT_CUDA_TRY_NO_THROW(cudaFree(d_base_set_)); }
- if (d_query_set_) { RAFT_CUDA_TRY_NO_THROW(cudaFree(d_query_set_)); }
+#ifndef CPU_ONLY
+ if (d_base_set_) { cudaFree(d_base_set_); }
+ if (d_query_set_) { cudaFree(d_query_set_); }
+#endif
}
template
const T* Dataset::base_set_on_gpu() const
{
+#ifndef CPU_ONLY
if (!d_base_set_) {
base_set();
- RAFT_CUDA_TRY(cudaMalloc((void**)&d_base_set_, base_set_size_ * dim_ * sizeof(T)));
+ RAFT_CUDA_TRY(cudaMalloc((void**)&d_base_set_, base_set_size() * dim() * sizeof(T)));
RAFT_CUDA_TRY(cudaMemcpy(
- d_base_set_, base_set_, base_set_size_ * dim_ * sizeof(T), cudaMemcpyHostToDevice));
+ d_base_set_, base_set_, base_set_size() * dim() * sizeof(T), cudaMemcpyHostToDevice));
}
+#endif
return d_base_set_;
}
template
const T* Dataset::query_set_on_gpu() const
{
+#ifndef CPU_ONLY
if (!d_query_set_) {
query_set();
- RAFT_CUDA_TRY(cudaMalloc((void**)&d_query_set_, query_set_size_ * dim_ * sizeof(T)));
+ RAFT_CUDA_TRY(cudaMalloc((void**)&d_query_set_, query_set_size() * dim() * sizeof(T)));
RAFT_CUDA_TRY(cudaMemcpy(
- d_query_set_, query_set_, query_set_size_ * dim_ * sizeof(T), cudaMemcpyHostToDevice));
+ d_query_set_, query_set_, query_set_size() * dim() * sizeof(T), cudaMemcpyHostToDevice));
}
+#endif
return d_query_set_;
}
@@ -316,24 +347,24 @@ class BinDataset : public Dataset {
const std::string& distance);
~BinDataset()
{
- if (this->mapped_base_set_) {
- base_file_.unmap(reinterpret_cast(this->mapped_base_set_) - subset_offset_);
- }
+ if (this->mapped_base_set_) { base_file_.unmap(); }
}
+ int dim() const override;
+ size_t base_set_size() const override;
+ size_t query_set_size() const override;
+
private:
void load_base_set_() const override;
void load_query_set_() const override;
void map_base_set_() const override;
- using Dataset::dim_;
- using Dataset::base_set_size_;
- using Dataset::query_set_size_;
+ mutable int dim_ = 0;
+ mutable size_t base_set_size_ = 0;
+ mutable size_t query_set_size_ = 0;
BinFile base_file_;
BinFile query_file_;
-
- size_t subset_offset_;
};
template
@@ -345,37 +376,71 @@ BinDataset::BinDataset(const std::string& name,
const std::string& distance)
: Dataset(name, distance),
base_file_(base_file, "r", subset_first_row, subset_size),
- query_file_(query_file, "r"),
- subset_offset_(2 * sizeof(uint32_t) + subset_first_row * dim_ * sizeof(T))
+ query_file_(query_file, "r")
+{
+}
+
+template
+int BinDataset::dim() const
+{
+ if (dim_ > 0) { return dim_; }
+ if (base_set_size() > 0) { return dim_; }
+ if (query_set_size() > 0) { return dim_; }
+ return dim_;
+}
+
+template
+size_t BinDataset::query_set_size() const
{
- base_file_.get_shape(&base_set_size_, &dim_);
- int query_dim;
- query_file_.get_shape(&query_set_size_, &query_dim);
- if (query_dim != dim_) {
+ if (query_set_size_ > 0) { return query_set_size_; }
+ int dim;
+ query_file_.get_shape(&query_set_size_, &dim);
+ if (query_set_size_ == 0) { throw std::runtime_error("Zero query set size"); }
+ if (dim == 0) { throw std::runtime_error("Zero query set dim"); }
+ if (dim_ == 0) {
+ dim_ = dim;
+ } else if (dim_ != dim) {
throw std::runtime_error("base set dim (" + std::to_string(dim_) + ") != query set dim (" +
- std::to_string(query_dim));
+ std::to_string(dim));
+ }
+ return query_set_size_;
+}
+
+template
+size_t BinDataset::base_set_size() const
+{
+ if (base_set_size_ > 0) { return base_set_size_; }
+ int dim;
+ base_file_.get_shape(&base_set_size_, &dim);
+ if (base_set_size_ == 0) { throw std::runtime_error("Zero base set size"); }
+ if (dim == 0) { throw std::runtime_error("Zero base set dim"); }
+ if (dim_ == 0) {
+ dim_ = dim;
+ } else if (dim_ != dim) {
+ throw std::runtime_error("base set dim (" + std::to_string(dim) + ") != query set dim (" +
+ std::to_string(dim_));
}
+ return base_set_size_;
}
template
void BinDataset::load_base_set_() const
{
- this->base_set_ = new T[base_set_size_ * dim_];
+ this->base_set_ = new T[base_set_size() * dim()];
base_file_.read(this->base_set_);
}
template
void BinDataset::load_query_set_() const
{
- this->query_set_ = new T[query_set_size_ * dim_];
+ this->query_set_ = new T[query_set_size() * dim()];
query_file_.read(this->query_set_);
}
template
void BinDataset::map_base_set_() const
{
- char* original_map_ptr = static_cast(base_file_.map());
- this->mapped_base_set_ = reinterpret_cast(original_map_ptr + subset_offset_);
+ this->mapped_base_set_ = base_file_.map();
}
} // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/faiss/faiss_benchmark.cu b/cpp/bench/ann/src/faiss/faiss_benchmark.cu
index 294da9a14f..0bad86905b 100644
--- a/cpp/bench/ann/src/faiss/faiss_benchmark.cu
+++ b/cpp/bench/ann/src/faiss/faiss_benchmark.cu
@@ -104,10 +104,10 @@ std::unique_ptr> create_algo(const std::string& algo,
// stop compiler warning; not all algorithms support multi-GPU so it may not be used
(void)dev_list;
- raft::bench::ann::Metric metric = parse_metric(distance);
std::unique_ptr> ann;
if constexpr (std::is_same_v) {
+ raft::bench::ann::Metric metric = parse_metric(distance);
if (algo == "faiss_gpu_ivf_flat") {
ann = make_algo(metric, dim, conf, dev_list);
} else if (algo == "faiss_gpu_ivf_pq") {
@@ -147,4 +147,4 @@ std::unique_ptr::AnnSearchParam> create_search
#include "../common/benchmark.hpp"
-int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
\ No newline at end of file
+int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu
index baff1b1c45..dcc4ae18be 100644
--- a/cpp/bench/ann/src/raft/raft_benchmark.cu
+++ b/cpp/bench/ann/src/raft/raft_benchmark.cu
@@ -40,6 +40,12 @@ extern template class raft::bench::ann::RaftIvfPQ;
extern template class raft::bench::ann::RaftIvfPQ;
extern template class raft::bench::ann::RaftIvfPQ;
#endif
+#ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA
+#include "raft_cagra_wrapper.h"
+extern template class raft::bench::ann::RaftCagra;
+extern template class raft::bench::ann::RaftCagra;
+extern template class raft::bench::ann::RaftCagra;
+#endif
#define JSON_DIAGNOSTICS 1
#include
@@ -117,28 +123,43 @@ void parse_search_param(const nlohmann::json& conf,
}
#endif
-template class Algo>
-std::unique_ptr> make_algo(raft::bench::ann::Metric metric,
- int dim,
- const nlohmann::json& conf)
+#ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA
+template
+void parse_build_param(const nlohmann::json& conf,
+ typename raft::bench::ann::RaftCagra::BuildParam& param)
{
- typename Algo::BuildParam param;
- parse_build_param(conf, param);
- return std::make_unique>(metric, dim, param);
+ if (conf.contains("index_dim")) {
+ param.graph_degree = conf.at("index_dim");
+ param.intermediate_graph_degree = param.graph_degree * 2;
+ }
+ if (conf.contains("intermediate_graph_degree")) {
+ param.intermediate_graph_degree = conf.at("intermediate_graph_degree");
+ }
}
-template class Algo>
-std::unique_ptr> make_algo(raft::bench::ann::Metric metric,
- int dim,
- const nlohmann::json& conf,
- const std::vector& dev_list)
+template
+void parse_search_param(const nlohmann::json& conf,
+ typename raft::bench::ann::RaftCagra::SearchParam& param)
{
- typename Algo::BuildParam param;
- parse_build_param(conf, param);
-
- (void)dev_list;
- return std::make_unique>(metric, dim, param);
+ if (conf.contains("itopk")) { param.p.itopk_size = conf.at("itopk"); }
+ if (conf.contains("search_width")) { param.p.search_width = conf.at("search_width"); }
+ if (conf.contains("max_iterations")) { param.p.max_iterations = conf.at("max_iterations"); }
+ if (conf.contains("algo")) {
+ if (conf.at("algo") == "single_cta") {
+ param.p.algo = raft::neighbors::experimental::cagra::search_algo::SINGLE_CTA;
+ } else if (conf.at("algo") == "multi_cta") {
+ param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_CTA;
+ } else if (conf.at("algo") == "multi_kernel") {
+ param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_KERNEL;
+ } else if (conf.at("algo") == "auto") {
+ param.p.algo = raft::neighbors::experimental::cagra::search_algo::AUTO;
+ } else {
+ std::string tmp = conf.at("algo");
+ THROW("Invalid value for algo: %s", tmp.c_str());
+ }
+ }
}
+#endif
template
std::unique_ptr> create_algo(const std::string& algo,
@@ -176,6 +197,13 @@ std::unique_ptr> create_algo(const std::string& algo,
ann =
std::make_unique>(metric, dim, param, refine_ratio);
}
+#endif
+#ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA
+ if (algo == "raft_cagra") {
+ typename raft::bench::ann::RaftCagra::BuildParam param;
+ parse_build_param(conf, param);
+ ann = std::make_unique>(metric, dim, param);
+ }
#endif
if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); }
@@ -207,6 +235,13 @@ std::unique_ptr::AnnSearchParam> create_search
parse_search_param(conf, *param);
return param;
}
+#endif
+#ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA
+ if (algo == "raft_cagra") {
+ auto param = std::make_unique::SearchParam>();
+ parse_search_param(conf, *param);
+ return param;
+ }
#endif
// else
throw std::runtime_error("invalid algo: '" + algo + "'");
@@ -216,4 +251,4 @@ std::unique_ptr::AnnSearchParam> create_search
#include "../common/benchmark.hpp"
-int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
\ No newline at end of file
+int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
diff --git a/cpp/bench/ann/src/raft/raft_cagra.cu b/cpp/bench/ann/src/raft/raft_cagra.cu
new file mode 100644
index 0000000000..be18af7f2c
--- /dev/null
+++ b/cpp/bench/ann/src/raft/raft_cagra.cu
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "raft_cagra_wrapper.h"
+
+namespace raft::bench::ann {
+template class RaftCagra;
+template class RaftCagra;
+template class RaftCagra;
+} // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h
new file mode 100644
index 0000000000..d47de1eeac
--- /dev/null
+++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "../common/ann_types.hpp"
+#include "raft_ann_bench_utils.h"
+#include
+
+namespace raft::bench::ann {
+
+template
+class RaftCagra : public ANN {
+ public:
+ using typename ANN::AnnSearchParam;
+
+ struct SearchParam : public AnnSearchParam {
+ raft::neighbors::experimental::cagra::search_params p;
+ };
+
+ using BuildParam = raft::neighbors::cagra::index_params;
+
+ RaftCagra(Metric metric, int dim, const BuildParam& param);
+
+ void build(const T* dataset, size_t nrow, cudaStream_t stream) final;
+
+ void set_search_param(const AnnSearchParam& param) override;
+
+ // TODO: if the number of results is less than k, the remaining elements of 'neighbors'
+ // will be filled with (size_t)-1
+ void search(const T* queries,
+ int batch_size,
+ int k,
+ size_t* neighbors,
+ float* distances,
+ cudaStream_t stream = 0) const override;
+
+ // to enable dataset access from GPU memory
+ AlgoProperty get_property() const override
+ {
+ AlgoProperty property;
+ property.dataset_memory_type = MemoryType::HostMmap;
+ property.query_memory_type = MemoryType::Device;
+ property.need_dataset_when_search = true;
+ return property;
+ }
+ void save(const std::string& file) const override;
+ void load(const std::string&) override;
+
+ ~RaftCagra() noexcept { rmm::mr::set_current_device_resource(mr_.get_upstream()); }
+
+ private:
+ raft::device_resources handle_;
+ BuildParam index_params_;
+ raft::neighbors::cagra::search_params search_params_;
+ std::optional> index_;
+ int device_;
+ int dimension_;
+ rmm::mr::pool_memory_resource mr_;
+};
+
+template
+RaftCagra::RaftCagra(Metric metric, int dim, const BuildParam& param)
+ : ANN(metric, dim),
+ index_params_(param),
+ dimension_(dim),
+ mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull)
+{
+ rmm::mr::set_current_device_resource(&mr_);
+ index_params_.metric = parse_metric_type(metric);
+ RAFT_CUDA_TRY(cudaGetDevice(&device_));
+}
+
+template
+void RaftCagra::build(const T* dataset, size_t nrow, cudaStream_t)
+{
+ if (get_property().dataset_memory_type != MemoryType::Device) {
+ auto dataset_view =
+ raft::make_host_matrix_view(dataset, IdxT(nrow), dimension_);
+ index_.emplace(raft::neighbors::cagra::build(handle_, index_params_, dataset_view));
+ } else {
+ auto dataset_view =
+ raft::make_device_matrix_view(dataset, IdxT(nrow), dimension_);
+ index_.emplace(raft::neighbors::cagra::build(handle_, index_params_, dataset_view));
+ }
+ return;
+}
+
+template
+void RaftCagra::set_search_param(const AnnSearchParam& param)
+{
+ auto search_param = dynamic_cast(param);
+ search_params_ = search_param.p;
+ return;
+}
+
+template
+void RaftCagra::save(const std::string& file) const
+{
+ raft::neighbors::cagra::serialize(handle_, file, *index_);
+ return;
+}
+
+template
+void RaftCagra::load(const std::string& file)
+{
+ index_ = raft::neighbors::cagra::deserialize(handle_, file);
+ return;
+}
+
+template
+void RaftCagra::search(
+ const T* queries, int batch_size, int k, size_t* neighbors, float* distances, cudaStream_t) const
+{
+ IdxT* neighbors_IdxT;
+ rmm::device_uvector neighbors_storage(0, resource::get_cuda_stream(handle_));
+ if constexpr (std::is_same::value) {
+ neighbors_IdxT = neighbors;
+ } else {
+ neighbors_storage.resize(batch_size * k, resource::get_cuda_stream(handle_));
+ neighbors_IdxT = neighbors_storage.data();
+ }
+
+ auto queries_view =
+ raft::make_device_matrix_view(queries, batch_size, dimension_);
+ auto neighbors_view = raft::make_device_matrix_view(neighbors_IdxT, batch_size, k);
+ auto distances_view = raft::make_device_matrix_view(distances, batch_size, k);
+
+ raft::neighbors::cagra::search(
+ handle_, search_params_, *index_, queries_view, neighbors_view, distances_view);
+
+ if (!std::is_same::value) {
+ raft::linalg::unaryOp(neighbors,
+ neighbors_IdxT,
+ batch_size * k,
+ raft::cast_op(),
+ resource::get_cuda_stream(handle_));
+ }
+
+ handle_.sync_stream();
+ return;
+}
+} // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h b/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
index 36b4931460..42fb9bd4a1 100644
--- a/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
+++ b/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
@@ -79,6 +79,8 @@ class RaftIvfFlatGpu : public ANN {
void save(const std::string& file) const override;
void load(const std::string&) override;
+ ~RaftIvfFlatGpu() noexcept { rmm::mr::set_current_device_resource(mr_.get_upstream()); }
+
private:
raft::device_resources handle_;
BuildParam index_params_;
@@ -96,7 +98,9 @@ RaftIvfFlatGpu::RaftIvfFlatGpu(Metric metric, int dim, const BuildParam
dimension_(dim),
mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull)
{
- index_params_.metric = parse_metric_type(metric);
+ index_params_.metric = parse_metric_type(metric);
+ index_params_.conservative_memory_allocation = true;
+ rmm::mr::set_current_device_resource(&mr_);
RAFT_CUDA_TRY(cudaGetDevice(&device_));
}
diff --git a/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h b/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
index c390d0bd7e..30bd5ab4d6 100644
--- a/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
+++ b/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
@@ -73,12 +73,14 @@ class RaftIvfPQ : public ANN {
AlgoProperty property;
property.dataset_memory_type = MemoryType::Host;
property.query_memory_type = MemoryType::Device;
- property.need_dataset_when_search = true; // actually it is only used during refinement
+ property.need_dataset_when_search = refine_ratio_ > 1.0;
return property;
}
void save(const std::string& file) const override;
void load(const std::string&) override;
+ ~RaftIvfPQ() noexcept { rmm::mr::set_current_device_resource(mr_.get_upstream()); }
+
private:
raft::device_resources handle_;
BuildParam index_params_;
@@ -98,6 +100,7 @@ RaftIvfPQ::RaftIvfPQ(Metric metric, int dim, const BuildParam& param, f
refine_ratio_(refine_ratio),
mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull)
{
+ rmm::mr::set_current_device_resource(&mr_);
index_params_.metric = parse_metric_type(metric);
RAFT_CUDA_TRY(cudaGetDevice(&device_));
}
diff --git a/cpp/bench/prims/CMakeLists.txt b/cpp/bench/prims/CMakeLists.txt
index c90886841b..e8d4739384 100644
--- a/cpp/bench/prims/CMakeLists.txt
+++ b/cpp/bench/prims/CMakeLists.txt
@@ -141,6 +141,7 @@ if(BUILD_PRIMS_BENCH)
PATH
bench/prims/neighbors/knn/brute_force_float_int64_t.cu
bench/prims/neighbors/knn/brute_force_float_uint32_t.cu
+ bench/prims/neighbors/knn/cagra_float_uint32_t.cu
bench/prims/neighbors/knn/ivf_flat_float_int64_t.cu
bench/prims/neighbors/knn/ivf_flat_int8_t_int64_t.cu
bench/prims/neighbors/knn/ivf_flat_uint8_t_int64_t.cu
diff --git a/cpp/bench/prims/neighbors/cagra_bench.cuh b/cpp/bench/prims/neighbors/cagra_bench.cuh
new file mode 100644
index 0000000000..bb405088bb
--- /dev/null
+++ b/cpp/bench/prims/neighbors/cagra_bench.cuh
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include
+#include
+#include
+#include
+
+#include
+
+namespace raft::bench::neighbors {
+
+struct params {
+ /** Size of the dataset. */
+ size_t n_samples;
+ /** Number of dimensions in the dataset. */
+ int n_dims;
+ /** The batch size -- number of KNN searches. */
+ int n_queries;
+ /** Number of nearest neighbours to find for every probe. */
+ int k;
+ /** kNN graph degree*/
+ int degree;
+ int itopk_size;
+ int block_size;
+ int search_width;
+ int max_iterations;
+};
+
+template
+struct CagraBench : public fixture {
+ explicit CagraBench(const params& ps)
+ : fixture(true),
+ params_(ps),
+ queries_(make_device_matrix(handle, ps.n_queries, ps.n_dims)),
+ dataset_(make_device_matrix(handle, ps.n_samples, ps.n_dims)),
+ knn_graph_(make_device_matrix(handle, ps.n_samples, ps.degree))
+ {
+ // Generate random dataset and queriees
+ raft::random::RngState state{42};
+ constexpr T kRangeMax = std::is_integral_v ? std::numeric_limits::max() : T(1);
+ constexpr T kRangeMin = std::is_integral_v ? std::numeric_limits::min() : T(-1);
+ if constexpr (std::is_integral_v) {
+ raft::random::uniformInt(
+ state, dataset_.data_handle(), dataset_.size(), kRangeMin, kRangeMax, stream);
+ raft::random::uniformInt(
+ state, queries_.data_handle(), queries_.size(), kRangeMin, kRangeMax, stream);
+ } else {
+ raft::random::uniform(
+ state, dataset_.data_handle(), dataset_.size(), kRangeMin, kRangeMax, stream);
+ raft::random::uniform(
+ state, queries_.data_handle(), queries_.size(), kRangeMin, kRangeMax, stream);
+ }
+
+ // Generate random knn graph
+
+ raft::random::uniformInt(
+ state, knn_graph_.data_handle(), knn_graph_.size(), 0, ps.n_samples - 1, stream);
+
+ auto metric = raft::distance::DistanceType::L2Expanded;
+
+ index_.emplace(raft::neighbors::cagra::index(
+ handle, metric, make_const_mdspan(dataset_.view()), make_const_mdspan(knn_graph_.view())));
+ }
+
+ void run_benchmark(::benchmark::State& state) override
+ {
+ raft::neighbors::cagra::search_params search_params;
+ search_params.max_queries = 1024;
+ search_params.itopk_size = params_.itopk_size;
+ search_params.team_size = 0;
+ search_params.thread_block_size = params_.block_size;
+ search_params.search_width = params_.search_width;
+
+ auto indices = make_device_matrix(handle, params_.n_queries, params_.k);
+ auto distances = make_device_matrix(handle, params_.n_queries, params_.k);
+ auto ind_v = make_device_matrix_view(
+ indices.data_handle(), params_.n_queries, params_.k);
+ auto dist_v = make_device_matrix_view(
+ distances.data_handle(), params_.n_queries, params_.k);
+
+ auto queries_v = make_const_mdspan(queries_.view());
+ loop_on_state(state, [&]() {
+ raft::neighbors::cagra::search(
+ this->handle, search_params, *this->index_, queries_v, ind_v, dist_v);
+ });
+
+ double data_size = params_.n_samples * params_.n_dims * sizeof(T);
+ double graph_size = params_.n_samples * params_.degree * sizeof(IdxT);
+
+ int iterations = params_.max_iterations;
+ if (iterations == 0) {
+ // see search_plan_impl::adjust_search_params()
+ double r = params_.itopk_size / static_cast(params_.search_width);
+ iterations = 1 + std::min(r * 1.1, r + 10);
+ }
+ state.counters["dataset (GiB)"] = data_size / (1 << 30);
+ state.counters["graph (GiB)"] = graph_size / (1 << 30);
+ state.counters["n_rows"] = params_.n_samples;
+ state.counters["n_cols"] = params_.n_dims;
+ state.counters["degree"] = params_.degree;
+ state.counters["n_queries"] = params_.n_queries;
+ state.counters["k"] = params_.k;
+ state.counters["itopk_size"] = params_.itopk_size;
+ state.counters["block_size"] = params_.block_size;
+ state.counters["search_width"] = params_.search_width;
+ state.counters["iterations"] = iterations;
+ }
+
+ private:
+ const params params_;
+ std::optional> index_;
+ raft::device_matrix queries_;
+ raft::device_matrix dataset_;
+ raft::device_matrix knn_graph_;
+};
+
+inline const std::vector generate_inputs()
+{
+ std::vector inputs =
+ raft::util::itertools::product({2000000ull}, // n_samples
+ {128, 256, 512, 1024}, // dataset dim
+ {1000}, // n_queries
+ {32}, // k
+ {64}, // knn graph degree
+ {64}, // itopk_size
+ {0}, // block_size
+ {1}, // search_width
+ {0} // max_iterations
+ );
+ auto inputs2 = raft::util::itertools::product({2000000ull, 10000000ull}, // n_samples
+ {128}, // dataset dim
+ {1000}, // n_queries
+ {32}, // k
+ {64}, // knn graph degree
+ {64}, // itopk_size
+ {64, 128, 256, 512, 1024}, // block_size
+ {1}, // search_width
+ {0} // max_iterations
+ );
+ inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());
+ return inputs;
+}
+
+const std::vector kCagraInputs = generate_inputs();
+
+#define CAGRA_REGISTER(ValT, IdxT, inputs) \
+ namespace BENCHMARK_PRIVATE_NAME(knn) { \
+ using AnnCagra = CagraBench; \
+ RAFT_BENCH_REGISTER(AnnCagra, #ValT "/" #IdxT, inputs); \
+ }
+
+} // namespace raft::bench::neighbors
diff --git a/cpp/bench/prims/neighbors/knn/cagra_float_uint32_t.cu b/cpp/bench/prims/neighbors/knn/cagra_float_uint32_t.cu
new file mode 100644
index 0000000000..5d762f6e85
--- /dev/null
+++ b/cpp/bench/prims/neighbors/knn/cagra_float_uint32_t.cu
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../cagra_bench.cuh"
+
+namespace raft::bench::neighbors {
+
+CAGRA_REGISTER(float, uint32_t, kCagraInputs);
+
+} // namespace raft::bench::neighbors
diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile
index 1948169c91..09353125b9 100644
--- a/cpp/doxygen/Doxyfile
+++ b/cpp/doxygen/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME = "RAFT C++ API"
# could be handy for archiving the generated documentation or if some version
# control system is used.
-PROJECT_NUMBER = "23.06"
+PROJECT_NUMBER = "23.08"
# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a
@@ -893,11 +893,8 @@ EXCLUDE = ../include/raft/sparse/linalg/symmetrize.hpp \
../include/raft/util/device_utils.cuh \
../include/raft/core/error.hpp \
../include/raft/core/handle.hpp \
- ../include/raft/util/integer_utils.hpp \
- ../include/raft/core/interruptible.hpp \
- ../include/raft/core/mdarray.hpp \
+ ../include/raft/util/integer_utils.hpp
../include/raft/util/pow2_utils.cuh \
- ../include/raft/core/span.hpp \
../include/raft/util/vectorized.cuh \
../include/raft/raft.hpp \
../include/raft/core/cudart_utils.hpp \
diff --git a/cpp/include/raft/cluster/detail/mst.cuh b/cpp/include/raft/cluster/detail/mst.cuh
index c4dd74f255..a962d4b7c6 100644
--- a/cpp/include/raft/cluster/detail/mst.cuh
+++ b/cpp/include/raft/cluster/detail/mst.cuh
@@ -20,7 +20,7 @@
#include
#include
-#include
+#include
#include
#include
#include
@@ -81,8 +81,20 @@ void connect_knn_graph(
raft::sparse::COO connected_edges(stream);
- raft::sparse::neighbors::connect_components(
- handle, connected_edges, X, color, m, n, reduction_op);
+ // default row and column batch sizes are chosen for computing cross component nearest neighbors.
+ // Reference: PR #1445
+ static constexpr size_t default_row_batch_size = 4096;
+ static constexpr size_t default_col_batch_size = 16;
+
+ raft::sparse::neighbors::cross_component_nn(handle,
+ connected_edges,
+ X,
+ color,
+ m,
+ n,
+ reduction_op,
+ min(m, default_row_batch_size),
+ min(n, default_col_batch_size));
rmm::device_uvector indptr2(m + 1, stream);
raft::sparse::convert::sorted_coo_to_csr(
@@ -192,4 +204,4 @@ void build_sorted_mst(
raft::copy_async(mst_weight, mst_coo.weights.data(), mst_coo.n_edges, stream);
}
-}; // namespace raft::cluster::detail
+}; // namespace raft::cluster::detail
\ No newline at end of file
diff --git a/cpp/include/raft/cluster/detail/single_linkage.cuh b/cpp/include/raft/cluster/detail/single_linkage.cuh
index ddd422a89b..848ca0357e 100644
--- a/cpp/include/raft/cluster/detail/single_linkage.cuh
+++ b/cpp/include/raft/cluster/detail/single_linkage.cuh
@@ -81,7 +81,7 @@ void single_linkage(raft::resources const& handle,
* 2. Construct MST, sorted by weights
*/
rmm::device_uvector color(m, stream);
- raft::sparse::neighbors::FixConnectivitiesRedOp op(color.data(), m);
+ raft::sparse::neighbors::FixConnectivitiesRedOp op(m);
detail::build_sorted_mst(handle,
X,
indptr.data(),
diff --git a/cpp/include/raft/comms/detail/std_comms.hpp b/cpp/include/raft/comms/detail/std_comms.hpp
index 8b92ed48f7..de2a7d3415 100644
--- a/cpp/include/raft/comms/detail/std_comms.hpp
+++ b/cpp/include/raft/comms/detail/std_comms.hpp
@@ -28,6 +28,8 @@
#include
+#include
+
#include
#include
@@ -138,50 +140,39 @@ class std_comms : public comms_iface {
update_host(h_colors.data(), d_colors.data(), get_size(), stream_);
update_host(h_keys.data(), d_keys.data(), get_size(), stream_);
- RAFT_CUDA_TRY(cudaStreamSynchronize(stream_));
-
- std::vector subcomm_ranks{};
- std::vector new_ucx_ptrs{};
+ this->sync_stream(stream_);
- for (int i = 0; i < get_size(); ++i) {
- if (h_colors[i] == color) {
- subcomm_ranks.push_back(i);
- if (ucp_worker_ != nullptr && subcomms_ucp_) { new_ucx_ptrs.push_back((*ucp_eps_)[i]); }
- }
- }
+ ncclComm_t nccl_comm;
+ // Create a structure to allgather...
ncclUniqueId id{};
- if (get_rank() == subcomm_ranks[0]) { // root of the new subcommunicator
- RAFT_NCCL_TRY(ncclGetUniqueId(&id));
- std::vector requests(subcomm_ranks.size() - 1);
- for (size_t i = 1; i < subcomm_ranks.size(); ++i) {
- isend(&id, sizeof(ncclUniqueId), subcomm_ranks[i], color, requests.data() + (i - 1));
- }
- waitall(requests.size(), requests.data());
- } else {
- request_t request{};
- irecv(&id, sizeof(ncclUniqueId), subcomm_ranks[0], color, &request);
- waitall(1, &request);
- }
- // FIXME: this seems unnecessary, do more testing and remove this
- barrier();
+ rmm::device_uvector d_nccl_ids(get_size(), stream_);
- ncclComm_t nccl_comm;
- RAFT_NCCL_TRY(ncclCommInitRank(&nccl_comm, subcomm_ranks.size(), id, key));
-
- if (ucp_worker_ != nullptr && subcomms_ucp_) {
- auto eps_sp = std::make_shared(new_ucx_ptrs.data());
- return std::unique_ptr(new std_comms(nccl_comm,
- (ucp_worker_h)ucp_worker_,
- eps_sp,
- subcomm_ranks.size(),
- key,
- stream_,
- subcomms_ucp_));
- } else {
- return std::unique_ptr(
- new std_comms(nccl_comm, subcomm_ranks.size(), key, stream_));
- }
+ if (key == 0) { RAFT_NCCL_TRY(ncclGetUniqueId(&id)); }
+
+ update_device(d_nccl_ids.data() + get_rank(), &id, 1, stream_);
+
+ allgather(d_nccl_ids.data() + get_rank(),
+ d_nccl_ids.data(),
+ sizeof(ncclUniqueId),
+ datatype_t::UINT8,
+ stream_);
+
+ auto offset =
+ std::distance(thrust::make_zip_iterator(h_colors.begin(), h_keys.begin()),
+ std::find_if(thrust::make_zip_iterator(h_colors.begin(), h_keys.begin()),
+ thrust::make_zip_iterator(h_colors.end(), h_keys.end()),
+ [color](auto tuple) { return thrust::get<0>(tuple) == color; }));
+
+ auto subcomm_size = std::count(h_colors.begin(), h_colors.end(), color);
+
+ update_host(&id, d_nccl_ids.data() + offset, 1, stream_);
+
+ this->sync_stream(stream_);
+
+ RAFT_NCCL_TRY(ncclCommInitRank(&nccl_comm, subcomm_size, id, key));
+
+ return std::unique_ptr(new std_comms(nccl_comm, subcomm_size, key, stream_));
}
void barrier() const
diff --git a/cpp/include/raft/core/coo_matrix.hpp b/cpp/include/raft/core/coo_matrix.hpp
index a5f7c05493..52ac69f163 100644
--- a/cpp/include/raft/core/coo_matrix.hpp
+++ b/cpp/include/raft/core/coo_matrix.hpp
@@ -23,6 +23,11 @@
namespace raft {
+/**
+ * \defgroup coo_matrix COO Matrix
+ * @{
+ */
+
template
class coordinate_structure_t : public sparse_structure {
public:
@@ -289,4 +294,7 @@ class coo_matrix
}
}
};
+
+/** @} */
+
} // namespace raft
\ No newline at end of file
diff --git a/cpp/include/raft/core/csr_matrix.hpp b/cpp/include/raft/core/csr_matrix.hpp
index 95d09d3eea..1113cc2023 100644
--- a/cpp/include/raft/core/csr_matrix.hpp
+++ b/cpp/include/raft/core/csr_matrix.hpp
@@ -22,6 +22,11 @@
namespace raft {
+/**
+ * \defgroup csr_matrix CSR Matrix
+ * @{
+ */
+
template
class compressed_structure_t : public sparse_structure {
public:
@@ -301,4 +306,7 @@ class csr_matrix
}
}
};
+
+/** @} */
+
} // namespace raft
\ No newline at end of file
diff --git a/cpp/include/raft/core/detail/macros.hpp b/cpp/include/raft/core/detail/macros.hpp
index 390acea697..bb4207938b 100644
--- a/cpp/include/raft/core/detail/macros.hpp
+++ b/cpp/include/raft/core/detail/macros.hpp
@@ -22,6 +22,14 @@
#endif
#endif
+#if defined(_RAFT_HAS_CUDA)
+#define CUDA_CONDITION_ELSE_TRUE(condition) condition
+#define CUDA_CONDITION_ELSE_FALSE(condition) condition
+#else
+#define CUDA_CONDITION_ELSE_TRUE(condition) true
+#define CUDA_CONDITION_ELSE_FALSE(condition) false
+#endif
+
#ifndef _RAFT_HOST_DEVICE
#if defined(_RAFT_HAS_CUDA)
#define _RAFT_DEVICE __device__
@@ -40,6 +48,10 @@
#define RAFT_INLINE_FUNCTION _RAFT_HOST_DEVICE _RAFT_FORCEINLINE
#endif
+#ifndef RAFT_DEVICE_INLINE_FUNCTION
+#define RAFT_DEVICE_INLINE_FUNCTION _RAFT_DEVICE _RAFT_FORCEINLINE
+#endif
+
// The RAFT_INLINE_CONDITIONAL is a conditional inline specifier that removes
// the inline specification when RAFT_COMPILED is defined.
//
diff --git a/cpp/include/raft/core/device_container_policy.hpp b/cpp/include/raft/core/device_container_policy.hpp
index eef981e56f..011de307db 100644
--- a/cpp/include/raft/core/device_container_policy.hpp
+++ b/cpp/include/raft/core/device_container_policy.hpp
@@ -164,10 +164,19 @@ class device_uvector_policy {
public:
auto create(raft::resources const& res, size_t n) -> container_type
{
- return container_type(n, resource::get_cuda_stream(res), resource::get_workspace_resource(res));
+ if (mr_ == nullptr) {
+ // NB: not using the workspace resource by default!
+ // The workspace resource is for short-lived temporary allocations.
+ return container_type(n, resource::get_cuda_stream(res));
+ } else {
+ return container_type(n, resource::get_cuda_stream(res), mr_);
+ }
}
- device_uvector_policy() = default;
+ constexpr device_uvector_policy() = default;
+ constexpr explicit device_uvector_policy(rmm::mr::device_memory_resource* mr) noexcept : mr_(mr)
+ {
+ }
[[nodiscard]] constexpr auto access(container_type& c, size_t n) const noexcept -> reference
{
@@ -181,6 +190,9 @@ class device_uvector_policy {
[[nodiscard]] auto make_accessor_policy() noexcept { return accessor_policy{}; }
[[nodiscard]] auto make_accessor_policy() const noexcept { return const_accessor_policy{}; }
+
+ private:
+ rmm::mr::device_memory_resource* mr_{nullptr};
};
} // namespace raft
diff --git a/cpp/include/raft/core/device_coo_matrix.hpp b/cpp/include/raft/core/device_coo_matrix.hpp
index 67aa4e12f1..41da605ff0 100644
--- a/cpp/include/raft/core/device_coo_matrix.hpp
+++ b/cpp/include/raft/core/device_coo_matrix.hpp
@@ -23,14 +23,26 @@
namespace raft {
-template
+using device_coordinate_structure_view = coordinate_structure_view;
+
+/**
+ * Specialization for a sparsity-owning coordinate structure which uses device memory
+ */
+template typename ContainerPolicy = device_uvector_policy,
- SparsityType sparsity_type = SparsityType::OWNING>
-using device_coo_matrix =
- coo_matrix