From 7e04bad15eda7ee022db29e9f55664e0b9e87a0b Mon Sep 17 00:00:00 2001 From: anicusan <aln705@student.bham.ac.uk> Date: Mon, 4 Nov 2024 16:25:38 +0000 Subject: [PATCH] updated README and the CompatHelper --- .github/workflows/CompatHelper.yml | 37 +++++++++++++++++++++--- README.md | 6 ++-- prototype/reduce_nd_test.jl | 46 ++++++++++++++++++++---------- 3 files changed, 68 insertions(+), 21 deletions(-) diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml index cba9134..717b6ad 100644 --- a/.github/workflows/CompatHelper.yml +++ b/.github/workflows/CompatHelper.yml @@ -3,14 +3,43 @@ on: schedule: - cron: 0 0 * * * workflow_dispatch: +permissions: + contents: write + pull-requests: write jobs: CompatHelper: runs-on: ubuntu-latest steps: - - name: Pkg.add("CompatHelper") - run: julia -e 'using Pkg; Pkg.add("CompatHelper")' - - name: CompatHelper.main() + - name: Check if Julia is already available in the PATH + id: julia_in_path + run: which julia + continue-on-error: true + - name: Install Julia, but only if it is not already available in the PATH + uses: julia-actions/setup-julia@v2 + with: + version: '1' + # arch: ${{ runner.arch }} + if: steps.julia_in_path.outcome != 'success' + - name: "Add the General registry via Git" + run: | + import Pkg + ENV["JULIA_PKG_SERVER"] = "" + Pkg.Registry.add("General") + shell: julia --color=yes {0} + - name: "Install CompatHelper" + run: | + import Pkg + name = "CompatHelper" + uuid = "aa819f21-2bde-4658-8897-bab36330d9b7" + version = "3" + Pkg.add(; name, uuid, version) + shell: julia --color=yes {0} + - name: "Run CompatHelper" + run: | + import CompatHelper + CompatHelper.main() + shell: julia --color=yes {0} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }} - run: julia -e 'using CompatHelper; CompatHelper.main()' + # COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }} \ No newline at end of file diff --git a/README.md b/README.md index 08c2f0e..7d64d17 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ *"We need more speed" - Lightning McQueen or Scarface, I don't know* -<!-- [![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://anicusan.github.io/AcceleratedKernels.jl/stable/) --> +[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://anicusan.github.io/AcceleratedKernels.jl/stable/) [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://anicusan.github.io/AcceleratedKernels.jl/dev/) <!-- [![Build Status](https://github.com/anicusan/AcceleratedKernels.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/anicusan/AcceleratedKernels.jl/actions/workflows/CI.yml?query=branch%3Amain) --> @@ -41,7 +41,9 @@ Again, this is only possible because of the unique Julia compilation model, the ## 2. Status -This is the very first release of this library; while tests are included for all algorithms, I only ran them locally on the oneAPI (laptop Intel UHD Graphics 620), CUDA (laptop with Nvidia Quadro RTX 4000 and data centre Nvidia A100-40), Metal (Mac M2 and M3), and AMD (data centre AMD MI210) backends. Some kinks might still exist for some platform permutations before a CI is set up. The API may undergo some changes in the following weeks as we discuss it with the Julia community - please join the conversation! +The AcceleratedKernels.jl sorters were adopted as the official [AMDGPU algorithms](https://github.com/JuliaGPU/AMDGPU.jl/pull/688)! The API is starting to stabilise; it follows the Julia standard library fairly closely - additionally exposing all temporary arrays for memory reuse. For any new ideas / requests, please join the conversation on [Julia Discourse](https://discourse.julialang.org/t/ann-acceleratedkernels-jl-cross-architecture-parallel-algorithms-for-julias-gpu-backends/119698/16) or post [an issue](https://github.com/anicusan/AcceleratedKernels.jl/issues). + +We have an extensive test suite; however, I only ran them locally on the oneAPI (laptop Intel UHD Graphics 620), CUDA (laptop with Nvidia Quadro RTX 4000 and data centre Nvidia A100-40), Metal (Mac M2 and M3), and AMD (data centre AMD MI210) backends. Some kinks might still exist for some platform / OS permutations before a CI is set up. AcceleratedKernels.jl will also be a fundamental building block of applications developed at [EvoPhase](https://evophase.co.uk/), so it will see continuous heavy use with industry backing. Long-term stability, performance improvements and support are priorities for us. diff --git a/prototype/reduce_nd_test.jl b/prototype/reduce_nd_test.jl index a03f9eb..3aef6c3 100644 --- a/prototype/reduce_nd_test.jl +++ b/prototype/reduce_nd_test.jl @@ -1,16 +1,16 @@ -using Random -using BenchmarkTools -using Profile -using PProf +# using Random +# using BenchmarkTools +# using Profile +# using PProf -using KernelAbstractions -using Metal +# using KernelAbstractions +# using Metal -import AcceleratedKernels as AK +# import AcceleratedKernels as AK -Random.seed!(0) +# Random.seed!(0) @@ -21,28 +21,44 @@ Random.seed!(0) # d +using Metal +using KernelAbstractions +import AcceleratedKernels as AK +using BenchmarkTools +using Random +Random.seed!(0) -function redadd_base(s) - d = reduce(+, s; init=zero(eltype(s)), dims=1) +function sum_base(s; dims) + d = reduce(+, s; init=zero(eltype(s)), dims=dims) KernelAbstractions.synchronize(get_backend(s)) d end -function redadd_ak(s) - d = AK.reduce(+, s; init=zero(eltype(s)), dims=1) +function sum_ak(s; dims) + d = AK.reduce(+, s; init=zero(eltype(s)), dims=dims) KernelAbstractions.synchronize(get_backend(s)) d end +# Make array with highly unequal per-axis sizes s = MtlArray(rand(Int32(1):Int32(100), 10, 100_000)) -@assert redadd_base(s) == redadd_ak(s) -display(@benchmark redadd_base($s)) -display(@benchmark redadd_ak($s)) +# Correctness +@assert sum_base(s, dims=1) == sum_ak(s, dims=1) +@assert sum_base(s, dims=2) == sum_ak(s, dims=2) + +# Benchmarks +println("\nReduction over small axis - AK vs Base") +display(@benchmark sum_ak($s, dims=1)) +display(@benchmark sum_base($s, dims=1)) + +println("\nReduction over long axis - AK vs Base") +display(@benchmark sum_ak($s, dims=2)) +display(@benchmark sum_base($s, dims=2))