diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 7b7e3f62..3923e4d8 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -21,7 +21,7 @@ A clear and concise description of what you expected to happen. - Method of cuDF install: [conda, Docker, or from source] - If method of install is [Docker], provide `docker pull` & `docker run` commands used - Please run and attach the output of the `cudf/print_env.sh` script to gather relevant environment details - + **Additional context** Add any other context about the problem here. diff --git a/.github/workflows/build-image.yml b/.github/workflows/build-image.yml index f310d38f..8ed8b8c9 100644 --- a/.github/workflows/build-image.yml +++ b/.github/workflows/build-image.yml @@ -24,9 +24,6 @@ on: RAPIDS_VER: required: true type: string - DASK_SQL_VER: - required: true - type: string BASE_TAG: required: true type: string @@ -52,7 +49,6 @@ jobs: LINUX_VER: ["${{ inputs.LINUX_VER }}"] PYTHON_VER: ["${{ inputs.PYTHON_VER }}"] RAPIDS_VER: ["${{ inputs.RAPIDS_VER }}"] - DASK_SQL_VER: ["${{ inputs.DASK_SQL_VER }}"] fail-fast: false runs-on: "linux-${{ matrix.ARCH }}-cpu4" steps: @@ -90,7 +86,7 @@ jobs: driver: docker endpoint: builders - name: Build base image - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v6 with: context: context file: Dockerfile @@ -104,10 +100,9 @@ jobs: LINUX_VER=${{ inputs.LINUX_VER }} PYTHON_VER=${{ inputs.PYTHON_VER }} RAPIDS_VER=${{ inputs.RAPIDS_VER }} - DASK_SQL_VER=${{ inputs.DASK_SQL_VER }} tags: ${{ inputs.BASE_TAG }}-${{ matrix.ARCH }} - name: Build notebooks image - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v6 with: context: context file: Dockerfile @@ -121,10 +116,9 @@ jobs: LINUX_VER=${{ inputs.LINUX_VER }} PYTHON_VER=${{ inputs.PYTHON_VER }} RAPIDS_VER=${{ inputs.RAPIDS_VER }} - DASK_SQL_VER=${{ inputs.DASK_SQL_VER }} tags: ${{ inputs.NOTEBOOKS_TAG }}-${{ matrix.ARCH }} - name: Build RAFT ANN Benchmarks GPU image - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v6 with: context: context file: raft-ann-bench/gpu/Dockerfile @@ -138,7 +132,7 @@ jobs: RAPIDS_VER=${{ inputs.RAPIDS_VER }} tags: ${{ inputs.RAFT_ANN_BENCH_TAG }}-${{ matrix.ARCH }} - name: Build RAFT ANN Benchmarks GPU with datasets image - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v6 with: context: context file: raft-ann-bench/gpu/Dockerfile @@ -152,8 +146,8 @@ jobs: RAPIDS_VER=${{ inputs.RAPIDS_VER }} tags: ${{ inputs.RAFT_ANN_BENCH_DATASETS_TAG }}-${{ matrix.ARCH }} - name: Build RAFT ANN Benchmarks CPU image - if: inputs.CUDA_VER == '12.2.2' # we don't need to build CPU packages for different CUDA versions. - uses: docker/build-push-action@v4 + if: inputs.CUDA_VER == '12.5.1' # we don't need to build CPU packages for different CUDA versions. + uses: docker/build-push-action@v6 with: context: context file: raft-ann-bench/cpu/Dockerfile diff --git a/.github/workflows/build-test-publish-images.yml b/.github/workflows/build-test-publish-images.yml index 0d434a33..945aef39 100644 --- a/.github/workflows/build-test-publish-images.yml +++ b/.github/workflows/build-test-publish-images.yml @@ -133,7 +133,6 @@ jobs: LINUX_VER: ${{ matrix.LINUX_VER }} PYTHON_VER: ${{ matrix.PYTHON_VER }} RAPIDS_VER: ${{ needs.compute-matrix.outputs.RAPIDS_VER }} - DASK_SQL_VER: ${{ matrix.DASK_SQL_VER }} BASE_TAG: "rapidsai/${{ needs.compute-matrix.outputs.BASE_IMAGE_REPO }}:\ ${{ needs.compute-matrix.outputs.BASE_TAG_PREFIX }}\ diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index a29e8bfa..9521c907 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -10,7 +10,26 @@ concurrency: cancel-in-progress: true jobs: + pr-builder: + needs: + - checks + - docker + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.08 + checks: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + - name: Run pre-commit + run: | + pip install pre-commit + pre-commit run --all-files docker: + needs: [checks] uses: ./.github/workflows/build-test-publish-images.yml with: build_type: pull-request diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index a6f4d2ac..6cd8baf0 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -38,7 +38,7 @@ jobs: uses: actions/checkout@v4 - name: Update DockerHub README for ${{ matrix.repo_name }} - uses: peter-evans/dockerhub-description@v3 + uses: peter-evans/dockerhub-description@v4 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_PASSWORD }} diff --git a/.github/workflows/release-to-ngc.yml b/.github/workflows/release-to-ngc.yml index 8bbb9c70..e386b20f 100644 --- a/.github/workflows/release-to-ngc.yml +++ b/.github/workflows/release-to-ngc.yml @@ -20,7 +20,7 @@ jobs: id: generate-matrix run: | #!/bin/bash - matrix=$(yq 'del(.DASK_SQL_VER)' matrix.yaml | yq -o json | jq -c) + matrix=$(yq '.' matrix.yaml | yq -o json | jq -c) echo "matrix=${matrix}" | tee -a ${GITHUB_OUTPUT} copy-images: diff --git a/.github/workflows/update-dask-sql.yml b/.github/workflows/update-dask-sql.yml deleted file mode 100644 index 727e2eef..00000000 --- a/.github/workflows/update-dask-sql.yml +++ /dev/null @@ -1,66 +0,0 @@ -# Updates stable dask-sql version to use in images. -# Runs once daily. - -name: Check for new dask-sql version - -on: - schedule: - - cron: "0 0 * * *" # Daily “At 00:00” UTC - workflow_dispatch: - -jobs: - update-dask-sql: - runs-on: ubuntu-latest - container: - image: rapidsai/ci-conda:latest - if: github.repository == 'rapidsai/docker' - - steps: - - uses: actions/checkout@v4 - - - name: Get current dask-sql version - id: current_version - run: | - DASK_SQL_VER="$(yq -r '.DASK_SQL_VER.[0]' matrix.yaml)" - echo "DASK_SQL_VER=${DASK_SQL_VER}" | tee -a ${GITHUB_OUTPUT} - - - name: Get new dask-sql version - id: new_version - uses: jacobtomlinson/gha-anaconda-package-version@0.1.3 - with: - org: "conda-forge" - package: "dask-sql" - version_system: "SemVer" - - - name: Get current/new versions without patch - env: - FULL_VER: ${{ steps.current_version.outputs.DASK_SQL_VER }} - FULL_NEW_VER: ${{ steps.new_version.outputs.version }} - run: | - echo SHORT_VER=${FULL_VER%.*} >> $GITHUB_ENV - echo SHORT_NEW_VER=${FULL_NEW_VER%.*} >> $GITHUB_ENV - - - name: Find and replace full dask-sql version - uses: jacobtomlinson/gha-find-replace@v3 - with: - find: ${{ steps.current_version.outputs.DASK_SQL_VER }} - replace: ${{ steps.new_version.outputs.version }} - - - name: Find and replace short dask-sql version - uses: jacobtomlinson/gha-find-replace@v3 - with: - find: ${{ env.SHORT_VER }} - replace: ${{ env.SHORT_NEW_VER }} - - - name: Create pull request with changes - uses: peter-evans/create-pull-request@v6 - with: - token: ${{ secrets.GITHUB_TOKEN }} - commit-message: Update `DASK_SQL_VERSION` to `${{ steps.new_version.outputs.version }}` - title: Update `DASK_SQL_VERSION` to `${{ steps.new_version.outputs.version }}` - author: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> - branch: "upgrade-dask-sql" - body: | - A new stable dask-sql version has been detected. - - Updated all config files and READMEs to use `${{ steps.new_version.outputs.version }}`. diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..cb2f78eb --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,12 @@ +--- +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: end-of-file-fixer + - id: trailing-whitespace + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.5.2 + hooks: + - id: ruff + args: ["--config", "pyproject.toml"] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index af1d5474..9b7a9c9a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -15,4 +15,3 @@ To build just the `base` image with default arguments: `docker buildx build --pu - `CUDA_VER` - Version of CUDA to use. Should be `major.minor.patch` - `PYTHON_VER` - Version of Python to use. Should be `major.minor` - `RAPIDS_VER` - Version of RAPIDS to use. Should be `YY.MM` -- `DASK_SQL_VER` - Version of `dask-sql` to use. Should be `YYYY.M.P` diff --git a/Dockerfile b/Dockerfile index fec263c2..5b72c8e0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,12 @@ # syntax=docker/dockerfile:1 -ARG CUDA_VER=12.0.1 -ARG PYTHON_VER=3.11 +ARG CUDA_VER=unset +ARG PYTHON_VER=unset ARG LINUX_DISTRO=ubuntu ARG LINUX_DISTRO_VER=22.04 ARG LINUX_VER=${LINUX_DISTRO}${LINUX_DISTRO_VER} ARG RAPIDS_VER=24.08 -ARG DASK_SQL_VER=2024.5.0 # Gather dependency information FROM rapidsai/ci-conda:latest AS dependencies @@ -15,7 +14,6 @@ ARG CUDA_VER ARG PYTHON_VER ARG RAPIDS_VER -ARG DASK_SQL_VER ARG RAPIDS_BRANCH="branch-${RAPIDS_VER}" @@ -36,12 +34,11 @@ EOF # Base image -FROM rapidsai/miniforge-cuda:cuda${CUDA_VER}-base-${LINUX_VER}-py${PYTHON_VER} as base +FROM rapidsai/miniforge-cuda:cuda${CUDA_VER}-base-${LINUX_VER}-py${PYTHON_VER} AS base ARG CUDA_VER ARG PYTHON_VER ARG RAPIDS_VER -ARG DASK_SQL_VER SHELL ["/bin/bash", "-euo", "pipefail", "-c"] @@ -56,7 +53,6 @@ COPY condarc /opt/conda/.condarc RUN <=2.32 - then - . + ["arm64"] - else - . - end | - $x + {ARCHES: .}; + $x + {ARCHES: ["amd64", "arm64"]}; def compute_ubuntu_version($x): if diff --git a/context/notebooks.sh b/context/notebooks.sh index 0d28015f..fbe3c742 100755 --- a/context/notebooks.sh +++ b/context/notebooks.sh @@ -3,7 +3,6 @@ # Clones repos with notebooks & compiles notebook test dependencies # Requires environment variables: # RAPIDS_BRANCH -# DASK_SQL_VER # CUDA_VER # PYTHON_VER @@ -41,6 +40,5 @@ done pushd "/dependencies" conda-merge ./*.yaml | - yq ".dependencies += [\"dask-sql==${DASK_SQL_VER%.*}.*\"]" | # Ensure dask-sql dependency is not altered yq '.channels = load("/condarc").channels' | # Use channels provided by CI, not repos tee /test_notebooks_dependencies.yaml diff --git a/context/test_notebooks.py b/context/test_notebooks.py index 96538aad..7f987009 100755 --- a/context/test_notebooks.py +++ b/context/test_notebooks.py @@ -5,9 +5,7 @@ import sys import timeit from typing import Iterable -import nbconvert import nbformat -from datetime import datetime from nbconvert.preprocessors import ExecutePreprocessor import yaml @@ -29,12 +27,15 @@ # following nbs are marked as skipped 'cugraph/algorithms/layout/Force-Atlas2.ipynb', 'cuspatial/binary_predicates.ipynb', - 'cuspatial/cuproj_benchmark.ipynb' + 'cuspatial/cuproj_benchmark.ipynb', + # context on these being skipped: https://github.com/rapidsai/cuspatial/pull/1407 + 'cuspatial/cuspatial_api_examples.ipynb', + 'cuspatial/nyc_taxi_years_correlation.ipynb' ] def get_notebooks(directory: str) -> Iterable[str]: - for root, dirs, files in os.walk(directory): + for root, _, files in os.walk(directory): for file in files: if ( file.endswith(".ipynb") @@ -69,14 +70,15 @@ def test_notebook(notebook_file, executed_nb_file): warnings = [] outputs = [] + # use nbconvert to run the notebook natively ep = ExecutePreprocessor(timeout=600, kernel_name="python3", allow_errors=True) + task_init = timeit.default_timer() try: - task_init = timeit.default_timer() - nb, nb_resources = ep.preprocess(nb, {"metadata": {"path": ""}}) - execution_time = timeit.default_timer() - task_init + nb, _ = ep.preprocess(nb, {"metadata": {"path": ""}}) except Exception as e: errors.append(e) + execution_time = timeit.default_timer() - task_init with open(executed_nb_file, "w", encoding="utf-8") as f: nbformat.write(nb, f) @@ -152,7 +154,7 @@ def test_notebook(notebook_file, executed_nb_file): print(f"Input must be a directory. Got: {ns.input}") sys.exit(1) - notebooks = sorted(list(get_notebooks(ns.input))) + notebooks = sorted(get_notebooks(ns.input)) print(f"{len(notebooks)} Notebooks to be tested:") for notebook in notebooks: print(notebook) diff --git a/dockerhub-readme.md b/dockerhub-readme.md index 90233bac..1d7695bd 100644 --- a/dockerhub-readme.md +++ b/dockerhub-readme.md @@ -22,7 +22,6 @@ RAPIDS Libraries included in the images: - `cuxfilter` - `cuCIM` - `xgboost` -- `dask-sql` ### Image Types diff --git a/matrix-test.yaml b/matrix-test.yaml index d57f63a7..f8ffdc60 100644 --- a/matrix-test.yaml +++ b/matrix-test.yaml @@ -4,10 +4,11 @@ pull-request: - { CUDA_VER: '11.8', ARCH: 'amd64', PYTHON_VER: '3.9', GPU: 'v100', DRIVER: 'earliest' } - { CUDA_VER: '12.0', ARCH: 'amd64', PYTHON_VER: '3.10', GPU: 'v100', DRIVER: 'latest' } - { CUDA_VER: '12.2', ARCH: 'arm64', PYTHON_VER: '3.11', GPU: 'a100', DRIVER: 'latest' } + - { CUDA_VER: '12.5', ARCH: 'amd64', PYTHON_VER: '3.11', GPU: 'v100', DRIVER: 'latest' } branch: - { CUDA_VER: '11.8', ARCH: 'amd64', PYTHON_VER: '3.9', GPU: 'v100', DRIVER: 'earliest' } - { CUDA_VER: '11.8', ARCH: 'amd64', PYTHON_VER: '3.9', GPU: 'v100', DRIVER: 'latest' } - { CUDA_VER: '12.0', ARCH: 'amd64', PYTHON_VER: '3.10', GPU: 'v100', DRIVER: 'latest' } - { CUDA_VER: '12.0', ARCH: 'arm64', PYTHON_VER: '3.10', GPU: 'a100', DRIVER: 'latest' } - { CUDA_VER: '12.2', ARCH: 'amd64', PYTHON_VER: '3.11', GPU: 'v100', DRIVER: 'latest' } - - { CUDA_VER: '12.2', ARCH: 'arm64', PYTHON_VER: '3.11', GPU: 'a100', DRIVER: 'latest' } + - { CUDA_VER: '12.5', ARCH: 'arm64', PYTHON_VER: '3.11', GPU: 'a100', DRIVER: 'latest' } diff --git a/matrix.yaml b/matrix.yaml index 59e62dd6..0e0c0c0b 100644 --- a/matrix.yaml +++ b/matrix.yaml @@ -2,9 +2,8 @@ CUDA_VER: # Should be `..` (e.g. `11.2.2`) - "11.8.0" - "12.0.1" - "12.2.2" + - "12.5.1" PYTHON_VER: - "3.9" - "3.10" - "3.11" -DASK_SQL_VER: - - "2024.5.0" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..20bbdee6 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,40 @@ +[tool.ruff] +target-version = "py310" + +[tool.ruff.lint] +ignore = [ + # (flake8) line too long + "E501", + # (pylint) too many branches + "PLR0912", +] +select = [ + # flake8-builtins + "A", + # flake8-bugbear + "B", + # flake8-comprehensions + "C4", + # pycodestyle + "E", + # eradicate + "ERA", + # pyflakes + "F", + # flynt + "FLY", + # perflint + "PERF", + # pygrep-hooks + "PGH", + # pylint + "PL", + # flake8-pyi + "PYI", + # flake8-return + "RET", + # ruff-exclusive checks + "RUF", + # flake8-bandit + "S", +] diff --git a/raft-ann-bench/README.md b/raft-ann-bench/README.md index 8f611f0c..33c82c46 100644 --- a/raft-ann-bench/README.md +++ b/raft-ann-bench/README.md @@ -2,7 +2,7 @@ This folder contains the dockerfiles for generating GPU and CPU RAFT ANN benchmark images. -This images are meant to enable end users of RAFT's ANN algorithms to easily run and reproduce benchmarks and comparisons between RAFT and third party libraries. +These images are meant to enable end users of RAFT's ANN algorithms to easily run and reproduce benchmarks and comparisons between RAFT and third party libraries. # Image types: diff --git a/raft-ann-bench/cpu/Dockerfile b/raft-ann-bench/cpu/Dockerfile index 06477ced..db6ce245 100644 --- a/raft-ann-bench/cpu/Dockerfile +++ b/raft-ann-bench/cpu/Dockerfile @@ -60,4 +60,3 @@ RUN /home/rapids/raftannbench/get_datasets.sh CMD ["--dataset fashion-mnist-784-euclidean", "", "--algorithms hnswlib"] ENTRYPOINT ["/bin/bash", "/data/scripts/run_benchmark_preloaded_datasets.sh"] -