Add Python 3.12 and dask-expr support (#1317)

holoviz · Apr 8, 2024 · c3cdea1 · c3cdea1
1 parent 09da5a0
commit c3cdea1
Show file tree

Hide file tree

Showing 9 changed files with 85 additions and 21 deletions.
diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
@@ -31,7 +31,7 @@ jobs:
     env:
       DESC: "Documentation build"
     steps:
-      - uses: holoviz-dev/holoviz_tasks/install@v0.1a17
+      - uses: holoviz-dev/holoviz_tasks/install@v0
         with:
           name: Documentation
           python-version: "3.10"

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -7,50 +7,107 @@ on:
     branches:
     - '*'
   workflow_dispatch:
+    inputs:
+      target:
+        description: "How much of the test suite to run"
+        type: choice
+        default: default
+        options:
+          - default
+          - full
+          - downstream
+      cache:
+        description: "Use cache"
+        type: boolean
+        default: true
   schedule:
     - cron: '0 16 * * SUN'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
   cancel-in-progress: true
 
+env:
+  OMP_NUM_THREADS: 1
+  OPENBLAS_NUM_THREADS: 1
+  MKL_NUM_THREADS: 1
+  VECLIB_MAXIMUM_THREADS: 1
+  NUMEXPR_NUM_THREADS: 1
+  PYDEVD_DISABLE_FILE_VALIDATION: 1
+  DASK_DATAFRAME__QUERY_PLANNING: false
+
 jobs:
   pre_commit:
     name: Run pre-commit
     runs-on: 'ubuntu-latest'
     steps:
-      - uses: holoviz-dev/holoviz_tasks/pre-commit@v0.1a17
+      - uses: holoviz-dev/holoviz_tasks/pre-commit@v0
+
+  setup:
+    name: Setup workflow
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ env.MATRIX }}
+    steps:
+      - name: Set matrix option
+        run: |
+          if [[ '${{ github.event_name }}' == 'workflow_dispatch' ]]; then
+            OPTION=${{ github.event.inputs.target }}
+          elif [[ '${{ github.event_name }}' == 'schedule' ]]; then
+            OPTION="full"
+          elif [[ '${{ github.event_name }}' == 'push' && '${{ github.ref_type }}' == 'tag' ]]; then
+            OPTION="full"
+          else
+            OPTION="default"
+          fi
+          echo "MATRIX_OPTION=$OPTION" >> $GITHUB_ENV
+      - name: Set test matrix with 'default' option
+        if: env.MATRIX_OPTION == 'default'
+        run: |
+          MATRIX=$(jq -nsc '{
+              "os": ["ubuntu-latest", "macos-latest", "windows-latest"],
+              "python-version": ["3.9", "3.10", "3.11", "3.12"]
+          }')
+          echo "MATRIX=$MATRIX" >> $GITHUB_ENV
+      - name: Set test matrix with 'full' option
+        if: env.MATRIX_OPTION == 'full'
+        run: |
+          MATRIX=$(jq -nsc '{
+              "os": ["ubuntu-latest", "macos-latest", "windows-latest"],
+              "python-version": ["3.9", "3.10", "3.11", "3.12"]
+          }')
+          echo "MATRIX=$MATRIX" >> $GITHUB_ENV
+      - name: Set test matrix with 'downstream' option
+        if: env.MATRIX_OPTION == 'downstream'
+        run: |
+          MATRIX=$(jq -nsc '{
+              "os": ["ubuntu-latest"],
+              "python-version": ["3.11"]
+          }')
+          echo "MATRIX=$MATRIX" >> $GITHUB_ENV
 
   test_suite:
     name: Tests on ${{ matrix.os }} with Python ${{ matrix.python-version }}
-    needs: [pre_commit]
+    needs: [pre_commit, setup]
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
-      matrix:
-        os: ['ubuntu-latest', 'macos-latest', 'windows-latest']
-        python-version: ["3.9", "3.10", "3.11"]
+      matrix: ${{ fromJson(needs.setup.outputs.matrix) }}
     timeout-minutes: 180
     defaults:
       run:
         shell: bash -l {0}
     env:
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      OMP_NUM_THREADS: 1
-      OPENBLAS_NUM_THREADS: 1
-      MKL_NUM_THREADS: 1
-      VECLIB_MAXIMUM_THREADS: 1
-      NUMEXPR_NUM_THREADS: 1
-      PYDEVD_DISABLE_FILE_VALIDATION: 1
     steps:
-      - uses: holoviz-dev/holoviz_tasks/install@v0.1a17
+      - uses: holoviz-dev/holoviz_tasks/install@v0
         with:
           name: unit_test_suite
           python-version: ${{ matrix.python-version }}
-          channel-priority: strict
+          channel-priority: flexible
           channels: pyviz/label/dev,numba,conda-forge,nodefaults
           envs: "-o tests -o examples"
-          cache: true
+          cache: ${{ github.event.inputs.cache || github.event.inputs.cache == '' }}
           conda-update: true
         id: install
       - name: doit test_lint
@@ -85,7 +142,7 @@ jobs:
       fail-fast: false
       matrix:
         os: ['ubuntu-latest', 'macos-latest']
-        python-version: ["3.10"]
+        python-version: ["3.12"]
     steps:
       - name: Checkout source
         uses: actions/checkout@v3

diff --git a/README.md b/README.md
@@ -49,7 +49,7 @@ to work with much larger datasets than it would otherwise.
 
 ## Installation
 
-Datashader supports Python 3.8, 3.9, 3.10, and 3.11 on Linux, Windows, or
+Datashader supports Python 3.9, 3.10, 3.11, and 3.12 on Linux, Windows, or
 Mac and can be installed with conda:
 
     conda install datashader

diff --git a/datashader/data_libraries/dask.py b/datashader/data_libraries/dask.py
@@ -100,6 +100,7 @@ def func(partition: pd.DataFrame, cumulative_lens, partition_info=None):
 
     # Here be dragons
     # Get the dataframe graph
+    df = getattr(df, 'optimize', lambda: df)()  # Work with new dask_expr
     graph = df.__dask_graph__()
 
     # Guess a reasonable output dtype from combination of dataframe dtypes
@@ -210,6 +211,7 @@ def line(glyph, df, schema, canvas, summary, *, antialias=False, cuda=False):
     shape, bounds, st, axis = shape_bounds_st_and_axis(df, canvas, glyph)
 
     # Compile functions
+    df = getattr(df, 'optimize', lambda: df)()  # Work with new dask_expr
     partitioned = isinstance(df, dd.DataFrame) and df.npartitions > 1
     create, info, append, combine, finalize, antialias_stage_2, antialias_stage_2_funcs, _ = \
         compile_components(summary, schema, glyph, antialias=antialias, cuda=cuda,
@@ -232,6 +234,10 @@ def chunk(df, df2=None):
 
     name = tokenize(df.__dask_tokenize__(), canvas, glyph, summary)
     old_name = df.__dask_tokenize__()
+    # dask_expr return tokenize result as tuple of type and task name
+    # We only want to use the task name as input to the new graph
+    if isinstance(old_name, tuple):
+        old_name = old_name[1]
     dsk = {(name, 0): (chunk, (old_name, 0))}
     for i in range(1, df.npartitions):
         dsk[(name, i)] = (chunk, (old_name, i - 1), (old_name, i))

diff --git a/datashader/datashape/lexer.py b/datashader/datashape/lexer.py
@@ -17,7 +17,7 @@
 def _str_val(s):
     # Use the Python parser via the ast module to parse the string,
     # since the string_escape and unicode_escape codecs do the wrong thing
-    return ast.parse('u' + s).body[0].value.s
+    return ast.parse('u' + s).body[0].value.value
 
 # A list of the token names, corresponding regex, and value extraction function
 _tokens = [

diff --git a/datashader/tests/test_dask.py b/datashader/tests/test_dask.py
@@ -2385,7 +2385,7 @@ def test_dask_categorical_counts(on_gpu):
     )
     ddf = dd.from_pandas(df, npartitions=2)
     assert ddf.npartitions == 2
-    ddf.cat = ddf.cat.astype('category')
+    ddf["cat"] = ddf.cat.astype('category')
 
     # Categorical counts at the dataframe level to confirm test is reasonable.
     cat_totals = ddf.cat.value_counts().compute()

diff --git a/examples/conftest.py b/examples/conftest.py
@@ -1,6 +1,5 @@
 import platform
 
-
 collect_ignore_glob = []
 
 # 2023-07-21 with following error:

diff --git a/setup.py b/setup.py
@@ -57,6 +57,7 @@
         'rioxarray',
         'scikit-image',
         'spatialpandas',
+        'dask-expr',
     ],
     'examples': examples,
     'examples_extra': examples + [
@@ -119,6 +120,7 @@
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
         "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
         "Operating System :: OS Independent",
         "Intended Audience :: Science/Research",
         "Intended Audience :: Developers",

diff --git a/tox.ini b/tox.ini
@@ -3,7 +3,7 @@
 
 [tox]
 #          python version                     test group                     extra envs  extra commands
-envlist = {py39,py310,py311}-{lint,unit,unit_nojit,unit_deploy,examples,all,examples_extra}-{default}-{dev,pkg}
+envlist = {py39,py310,py311,py312}-{lint,unit,unit_nojit,unit_deploy,examples,all,examples_extra}-{default}-{dev,pkg}
 build = wheel
 
 [_lint]