Merge pull request #199 from MiloLurati/HIPbackend

HIP Backend
KernelTuner · Sep 8, 2023 · 4f0aa8f · 4f0aa8f
2 parents b3ff4cd + 0d96807
commit 4f0aa8f
Show file tree

Hide file tree

Showing 17 changed files with 753 additions and 57 deletions.
diff --git a/INSTALL.rst b/INSTALL.rst
@@ -111,6 +111,31 @@ Or you could install Kernel Tuner and PyOpenCL together if you haven't done so a
 
 If this fails, please see the PyOpenCL installation guide (https://wiki.tiker.net/PyOpenCL/Installation)
 
+HIP and PyHIP
+-------------
+
+Before we can install PyHIP, you'll need to have the HIP runtime and compiler installed on your system. 
+The HIP compiler is included as part of the ROCm software stack. Here is AMD's installation guide:
+
+* `ROCm Documentation: HIP Installation Guide <https://docs.amd.com/bundle/HIP-Installation-Guide-v5.3/page/Introduction_to_HIP_Installation_Guide.html>`__
+
+After you've installed HIP, you will need to install PyHIP. Run the following command in your terminal to install:
+
+.. code-block:: bash
+
+    pip install pyhip-interface
+
+Alternatively, you can install PyHIP from the source code. First, clone the repository from GitHub:
+
+.. code-block:: bash
+
+    git clone https://github.com/jatinx/PyHIP
+
+Then, navigate to the repository directory and run the following command to install:
+
+.. code-block:: bash
+    
+    python setup.py install
 
 Installing the git version
 --------------------------
@@ -128,6 +153,7 @@ You can install Kernel Tuner with several optional dependencies, the full list i
 
 - `cuda`: install pycuda along with kernel_tuner
 - `opencl`: install pycuda along with kernel_tuner
+- `hip`: install pyhip along with kernel_tuner
 - `doc`: installs packages required to build the documentation
 - `tutorial`: install packages required to run the guides
 - `dev`: install everything you need to start development on Kernel Tuner

diff --git a/README.rst b/README.rst
@@ -28,9 +28,14 @@ To tune OpenCL kernels:
 - First, make sure you have an OpenCL compiler for your intended OpenCL platform
 - Then type: ``pip install kernel_tuner[opencl]``
 
-Or both:
+To tune HIP kernels:
 
-- ``pip install kernel_tuner[cuda,opencl]``
+- First, make sure you have an HIP runtime and compiler installed
+- Then type: ``pip install kernel_tuner[hip]``
+
+Or all:
+
+- ``pip install kernel_tuner[cuda,opencl,hip]``
 
 More information about how to install Kernel Tuner and its
 dependencies can be found in the `installation guide 

diff --git a/doc/source/architecture.png b/doc/source/architecture.png
diff --git a/doc/source/design.rst b/doc/source/design.rst
@@ -12,7 +12,7 @@ The Kernel Tuner is designed to be extensible and support
 different search and execution strategies. The current architecture of 
 the Kernel Tuner can be seen as:
 
-.. image:: architecture_0.4.3.png
+.. image:: architecture.png
    :width: 500pt
 
 At the top we have the kernel code and the Python script that tunes it, 
@@ -48,7 +48,7 @@ building blocks for implementing runners.
 The observers are explained in :ref:`observers`.
 
 At the bottom, the backends are shown. 
-PyCUDA, CuPy, cuda-python and PyOpenCL are for tuning either CUDA or OpenCL kernels.
+PyCUDA, CuPy, cuda-python, PyOpenCL and PyHIP are for tuning either CUDA, OpenCL, or HIP kernels.
 The C 
 Functions implementation can actually call any compiler, typically NVCC 
 or GCC is used. There is limited support for tuning Fortran kernels. 
@@ -128,6 +128,12 @@ kernel_tuner.backends.c.CFunctions
     :special-members: __init__
     :members:
 
+kernel_tuner.backends.hip.HipFunctions
+~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: kernel_tuner.backends.hip.HipFunctions
+    :special-members: __init__
+    :members:
+
 
 Util Functions
 --------------

diff --git a/doc/source/index.rst b/doc/source/index.rst
@@ -27,9 +27,14 @@ To tune OpenCL kernels:
 - First, make sure you have an OpenCL compiler for your intended OpenCL platform
 - Then type: ``pip install kernel_tuner[opencl]``
 
-Or both:
+To tune HIP kernels:
 
-- ``pip install kernel_tuner[cuda,opencl]``
+- First, make sure you have an HIP runtime and compiler installed
+- Then type: ``pip install kernel_tuner[hip]``
+
+Or all:
+
+- ``pip install kernel_tuner[cuda,opencl,hip]``
 
 More information about how to install Kernel Tuner and its
 dependencies can be found under :ref:`install`. 

diff --git a/examples/hip/test_vector_add.py b/examples/hip/test_vector_add.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+"""Minimal example for a HIP Kernel unit test with the Kernel Tuner"""
+
+import numpy
+from kernel_tuner import run_kernel
+import pytest
+
+#Check pyhip is installed and if a HIP capable device is present, if not skip the test
+try:
+    from pyhip import hip, hiprtc
+except ImportError:
+    pytest.skip("PyHIP not installed or PYTHONPATH does not includes PyHIP")
+    hip = None
+    hiprtc = None
+
+def test_vector_add():
+
+    kernel_string = """
+    __global__ void vector_add(float *c, float *a, float *b, int n) {
+        int i = blockIdx.x * block_size_x + threadIdx.x;
+        if (i<n) {
+            c[i] = a[i] + b[i];
+        }
+    }
+    """
+
+    size = 10000000
+    problem_size = (size, 1)
+
+    a = numpy.random.randn(size).astype(numpy.float32)
+    b = numpy.random.randn(size).astype(numpy.float32)
+    c = numpy.zeros_like(b)
+    n = numpy.int32(size)
+
+    args = [c, a, b, n]
+    params = {"block_size_x": 512}
+
+    answer = run_kernel("vector_add", kernel_string, problem_size, args, params, lang="HIP")
+
+    assert numpy.allclose(answer[0], a+b, atol=1e-8)
diff --git a/examples/hip/vector_add.py b/examples/hip/vector_add.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+"""This is the minimal example from the README"""
+
+import numpy
+from kernel_tuner import tune_kernel
+from kernel_tuner.file_utils import store_output_file, store_metadata_file
+import logging
+from collections import OrderedDict
+
+def tune():
+
+    kernel_string = """
+    __global__ void vector_add(float *c, float *a, float *b, int n) {
+        int i = blockIdx.x * block_size_x + threadIdx.x;
+        if (i<n) {
+            c[i] = a[i] + b[i];
+        }
+    }
+    """
+
+    size = 10000000
+
+    a = numpy.random.randn(size).astype(numpy.float32)
+    b = numpy.random.randn(size).astype(numpy.float32)
+    c = numpy.zeros_like(b)
+    n = numpy.int32(size)
+
+    args = [c, a, b, n]
+
+    tune_params = OrderedDict()
+    tune_params["block_size_x"] = [128+64*i for i in range(15)]
+
+    results, env = tune_kernel("vector_add", kernel_string, size, args, tune_params, lang="HIP", 
+                               cache="vector_add_cache.json", log=logging.DEBUG)
+
+    # Store the metadata of this run
+    store_metadata_file("vector_add-metadata.json")
+
+    return results
+
+
+if __name__ == "__main__":
+    tune()
diff --git a/examples/hip/vector_add_simulation_mode.py b/examples/hip/vector_add_simulation_mode.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+"""This is the minimal example from the README"""
+
+import numpy
+from kernel_tuner import tune_kernel
+from kernel_tuner.file_utils import store_output_file, store_metadata_file
+import logging
+from collections import OrderedDict
+import os
+
+def tune():
+
+    kernel_string = """
+    __global__ void vector_add(float *c, float *a, float *b, int n) {
+        int i = blockIdx.x * block_size_x + threadIdx.x;
+        if (i<n) {
+            c[i] = a[i] + b[i];
+        }
+    }
+    """
+
+    size = 10000000
+
+    a = numpy.random.randn(size).astype(numpy.float32)
+    b = numpy.random.randn(size).astype(numpy.float32)
+    c = numpy.zeros_like(b)
+    n = numpy.int32(size)
+
+    args = [c, a, b, n]
+
+    tune_params = OrderedDict()
+    tune_params["block_size_x"] = [128+64*i for i in range(15)]
+
+    filename = "vector_add_cache.json"
+    if os.path.isfile(filename):
+        results, env = tune_kernel("vector_add", kernel_string, size, args, tune_params, 
+                                strategy="random_sample",  strategy_options=dict(max_fevals=10),
+                                lang="HIP", simulation_mode=True, cache="vector_add_cache.json")
+
+    else:
+        print(f"{filename} does not exist in the directory, run vector_add.py first.")
+
+
+if __name__ == "__main__":
+    tune()