From 7e04bad15eda7ee022db29e9f55664e0b9e87a0b Mon Sep 17 00:00:00 2001
From: anicusan <aln705@student.bham.ac.uk>
Date: Mon, 4 Nov 2024 16:25:38 +0000
Subject: [PATCH] updated README and the CompatHelper

---
 .github/workflows/CompatHelper.yml | 37 +++++++++++++++++++++---
 README.md                          |  6 ++--
 prototype/reduce_nd_test.jl        | 46 ++++++++++++++++++++----------
 3 files changed, 68 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml
index cba9134..717b6ad 100644
--- a/.github/workflows/CompatHelper.yml
+++ b/.github/workflows/CompatHelper.yml
@@ -3,14 +3,43 @@ on:
   schedule:
     - cron: 0 0 * * *
   workflow_dispatch:
+permissions:
+  contents: write
+  pull-requests: write
 jobs:
   CompatHelper:
     runs-on: ubuntu-latest
     steps:
-      - name: Pkg.add("CompatHelper")
-        run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
-      - name: CompatHelper.main()
+      - name: Check if Julia is already available in the PATH
+        id: julia_in_path
+        run: which julia
+        continue-on-error: true
+      - name: Install Julia, but only if it is not already available in the PATH
+        uses: julia-actions/setup-julia@v2
+        with:
+          version: '1'
+          # arch: ${{ runner.arch }}
+        if: steps.julia_in_path.outcome != 'success'
+      - name: "Add the General registry via Git"
+        run: |
+          import Pkg
+          ENV["JULIA_PKG_SERVER"] = ""
+          Pkg.Registry.add("General")
+        shell: julia --color=yes {0}
+      - name: "Install CompatHelper"
+        run: |
+          import Pkg
+          name = "CompatHelper"
+          uuid = "aa819f21-2bde-4658-8897-bab36330d9b7"
+          version = "3"
+          Pkg.add(; name, uuid, version)
+        shell: julia --color=yes {0}
+      - name: "Run CompatHelper"
+        run: |
+          import CompatHelper
+          CompatHelper.main()
+        shell: julia --color=yes {0}
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
-        run: julia -e 'using CompatHelper; CompatHelper.main()'
+          # COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }}
\ No newline at end of file
diff --git a/README.md b/README.md
index 08c2f0e..7d64d17 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 *"We need more speed" - Lightning McQueen or Scarface, I don't know*
 
-<!-- [![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://anicusan.github.io/AcceleratedKernels.jl/stable/) -->
+[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://anicusan.github.io/AcceleratedKernels.jl/stable/)
 [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://anicusan.github.io/AcceleratedKernels.jl/dev/)
 <!-- [![Build Status](https://github.com/anicusan/AcceleratedKernels.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/anicusan/AcceleratedKernels.jl/actions/workflows/CI.yml?query=branch%3Amain) -->
 
@@ -41,7 +41,9 @@ Again, this is only possible because of the unique Julia compilation model, the
 
 
 ## 2. Status
-This is the very first release of this library; while tests are included for all algorithms, I only ran them locally on the oneAPI (laptop Intel UHD Graphics 620), CUDA (laptop with Nvidia Quadro RTX 4000 and data centre Nvidia A100-40), Metal (Mac M2 and M3), and AMD (data centre AMD MI210) backends. Some kinks might still exist for some platform permutations before a CI is set up. The API may undergo some changes in the following weeks as we discuss it with the Julia community - please join the conversation!
+The AcceleratedKernels.jl sorters were adopted as the official [AMDGPU algorithms](https://github.com/JuliaGPU/AMDGPU.jl/pull/688)! The API is starting to stabilise; it follows the Julia standard library fairly closely - additionally exposing all temporary arrays for memory reuse. For any new ideas / requests, please join the conversation on [Julia Discourse](https://discourse.julialang.org/t/ann-acceleratedkernels-jl-cross-architecture-parallel-algorithms-for-julias-gpu-backends/119698/16) or post [an issue](https://github.com/anicusan/AcceleratedKernels.jl/issues).
+
+We have an extensive test suite; however, I only ran them locally on the oneAPI (laptop Intel UHD Graphics 620), CUDA (laptop with Nvidia Quadro RTX 4000 and data centre Nvidia A100-40), Metal (Mac M2 and M3), and AMD (data centre AMD MI210) backends. Some kinks might still exist for some platform / OS permutations before a CI is set up.
 
 AcceleratedKernels.jl will also be a fundamental building block of applications developed at [EvoPhase](https://evophase.co.uk/), so it will see continuous heavy use with industry backing. Long-term stability, performance improvements and support are priorities for us.
 
diff --git a/prototype/reduce_nd_test.jl b/prototype/reduce_nd_test.jl
index a03f9eb..3aef6c3 100644
--- a/prototype/reduce_nd_test.jl
+++ b/prototype/reduce_nd_test.jl
@@ -1,16 +1,16 @@
 
-using Random
-using BenchmarkTools
-using Profile
-using PProf
+# using Random
+# using BenchmarkTools
+# using Profile
+# using PProf
 
-using KernelAbstractions
-using Metal
+# using KernelAbstractions
+# using Metal
 
-import AcceleratedKernels as AK
+# import AcceleratedKernels as AK
 
 
-Random.seed!(0)
+# Random.seed!(0)
 
 
 
@@ -21,28 +21,44 @@ Random.seed!(0)
 # d
 
 
+using Metal
+using KernelAbstractions
+import AcceleratedKernels as AK
 
+using BenchmarkTools
+using Random
+Random.seed!(0)
 
 
-function redadd_base(s)
-    d = reduce(+, s; init=zero(eltype(s)), dims=1)
+function sum_base(s; dims)
+    d = reduce(+, s; init=zero(eltype(s)), dims=dims)
     KernelAbstractions.synchronize(get_backend(s))
     d
 end
 
 
-function redadd_ak(s)
-    d = AK.reduce(+, s; init=zero(eltype(s)), dims=1)
+function sum_ak(s; dims)
+    d = AK.reduce(+, s; init=zero(eltype(s)), dims=dims)
     KernelAbstractions.synchronize(get_backend(s))
     d
 end
 
 
+# Make array with highly unequal per-axis sizes
 s = MtlArray(rand(Int32(1):Int32(100), 10, 100_000))
-@assert redadd_base(s) == redadd_ak(s)
 
-display(@benchmark redadd_base($s))
-display(@benchmark redadd_ak($s))
+# Correctness
+@assert sum_base(s, dims=1) == sum_ak(s, dims=1)
+@assert sum_base(s, dims=2) == sum_ak(s, dims=2)
+
+# Benchmarks
+println("\nReduction over small axis - AK vs Base")
+display(@benchmark sum_ak($s, dims=1))
+display(@benchmark sum_base($s, dims=1))
+
+println("\nReduction over long axis - AK vs Base")
+display(@benchmark sum_ak($s, dims=2))
+display(@benchmark sum_base($s, dims=2))