From 982e371f9a77e3cafd46f8de4b365d9065975097 Mon Sep 17 00:00:00 2001 From: NguyenNhuDi Date: Tue, 5 Nov 2024 18:07:55 +0000 Subject: [PATCH 1/4] updated default gpu to include gfx12 and gfx1151 --- rmake.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/rmake.py b/rmake.py index 265577f1d..b085bb809 100644 --- a/rmake.py +++ b/rmake.py @@ -20,6 +20,9 @@ def parse_args(): parser = argparse.ArgumentParser(description=""" Checks build arguments """) + + default_gpus = 'gfx906:xnack-,gfx1030,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201' + parser.add_argument('-g', '--debug', required=False, default=False, action='store_true', help='Generate Debug build (default: False)') parser.add_argument( '--build_dir', type=str, required=False, default="build", @@ -37,7 +40,7 @@ def parse_args(): help='Install after build (default: False)') parser.add_argument( '--cmake-darg', required=False, dest='cmake_dargs', action='append', default=[], help='List of additional cmake defines for builds (e.g. CMAKE_CXX_COMPILER_LAUNCHER=ccache)') - parser.add_argument('-a', '--architecture', dest='gpu_architecture', required=False, default="gfx906;gfx1030;gfx1100;gfx1101;gfx1102", #:sramecc+:xnack-" ) #gfx1030" ) #gfx906" ) # gfx1030" ) + parser.add_argument('-a', '--architecture', dest='gpu_architecture', required=False, default=default_gpus, #:sramecc+:xnack-" ) #gfx1030" ) #gfx906" ) # gfx1030" ) help='Set GPU architectures, e.g. all, gfx000, gfx803, gfx906:xnack-;gfx1030;gfx1100 (optional, default: all)') parser.add_argument('-v', '--verbose', required=False, default=False, action='store_true', help='Verbose build (default: False)') @@ -119,7 +122,7 @@ def config_cmd(): else: cmake_executable = "cmake" toolchain = "toolchain-linux.cmake" - cmake_platform_opts = f"-DROCM_DIR:PATH={rocm_path} -DCPACK_PACKAGING_INSTALL_PREFIX={rocm_path}" + cmake_platform_opts = [f"-DROCM_DIR:PATH={rocm_path}", f"-DCPACK_PACKAGING_INSTALL_PREFIX={rocm_path}"] tools = f"-DCMAKE_TOOLCHAIN_FILE={toolchain}" cmake_options.append( tools ) From 60b1f0badf4a42d9ef7b4d3989cfe0ba5de56620 Mon Sep 17 00:00:00 2001 From: NguyenNhuDi Date: Tue, 5 Nov 2024 18:09:27 +0000 Subject: [PATCH 2/4] updated changelog --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 49615b2c8..9d73c51b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ Full documentation for rocPRIM is available at [https://rocm.docs.amd.com/projec ## rocPRIM 3.3.0 for ROCm 6.3.0 ### Added - +* Updated the default value for -a argument from rmake.py to `gfx906:xnack-,gfx1030,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201` * The `--test smoke` option has been added to `rtest.py`. When `rtest.py` is called with this option it runs a subset of tests such that the total test time is 5 minutes. Use `python3 ./rtest.py --test smoke` or `python3 ./rtest.py -t smoke` to run the smoke test. * The `--seed` option has been added to `run_benchmarks.py`. The `--seed` option specifies a seed for the generation of random inputs. When the option is omitted, the default behavior is to use a random seed for each benchmark measurement. * Added configuration autotuning to device partition (`rocprim::partition`, `rocprim::partition_two_way`, and `rocprim::partition_three_way`), to device select (`rocprim::select`, `rocprim::unique`, and `rocprim::unique_by_key`), and to device reduce by key (`rocprim::reduce_by_key`) to improve performance on selected architectures. @@ -25,7 +25,7 @@ Full documentation for rocPRIM is available at [https://rocm.docs.amd.com/projec * `rocprim::thread_load()` and `rocprim::thread_store()` have been deprecated. Use `dereference()` instead. ### Resolved issues - +* Fixed an issue in `rmake.py` where the list storing cmake options would contain individual characters instead of full string of option * Resolved an issue in `rtest.py` where it crashed if the `build` folder was created without `release` or `debug` subdirectories. * Resolved an issue with `rtest.py` on Windows where passing an absolute path to `--install_dir` caused a `FileNotFound` error. * rocPRIM functions are no longer forcefully inlined on Windows. This significantly reduces the build From 3bd29ed0fba7df311d30e2ae2e439d658261d1ac Mon Sep 17 00:00:00 2001 From: NguyenNhuDi Date: Wed, 13 Nov 2024 16:37:58 +0000 Subject: [PATCH 3/4] fixed minor grammar mistake in changelog --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d73c51b2..871bbd872 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ Full documentation for rocPRIM is available at [https://rocm.docs.amd.com/projec ## rocPRIM 3.3.0 for ROCm 6.3.0 ### Added -* Updated the default value for -a argument from rmake.py to `gfx906:xnack-,gfx1030,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201` +* Updated the default value for the `-a` argument from `rmake.py` to `gfx906:xnack-,gfx1030,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201`. * The `--test smoke` option has been added to `rtest.py`. When `rtest.py` is called with this option it runs a subset of tests such that the total test time is 5 minutes. Use `python3 ./rtest.py --test smoke` or `python3 ./rtest.py -t smoke` to run the smoke test. * The `--seed` option has been added to `run_benchmarks.py`. The `--seed` option specifies a seed for the generation of random inputs. When the option is omitted, the default behavior is to use a random seed for each benchmark measurement. * Added configuration autotuning to device partition (`rocprim::partition`, `rocprim::partition_two_way`, and `rocprim::partition_three_way`), to device select (`rocprim::select`, `rocprim::unique`, and `rocprim::unique_by_key`), and to device reduce by key (`rocprim::reduce_by_key`) to improve performance on selected architectures. @@ -25,7 +25,7 @@ Full documentation for rocPRIM is available at [https://rocm.docs.amd.com/projec * `rocprim::thread_load()` and `rocprim::thread_store()` have been deprecated. Use `dereference()` instead. ### Resolved issues -* Fixed an issue in `rmake.py` where the list storing cmake options would contain individual characters instead of full string of option +* Fixed an issue in `rmake.py` where the list storing cmake options would contain individual characters instead of a full string of options. * Resolved an issue in `rtest.py` where it crashed if the `build` folder was created without `release` or `debug` subdirectories. * Resolved an issue with `rtest.py` on Windows where passing an absolute path to `--install_dir` caused a `FileNotFound` error. * rocPRIM functions are no longer forcefully inlined on Windows. This significantly reduces the build From 85fcaa9bf72527239e2391a546540756d5faf816 Mon Sep 17 00:00:00 2001 From: Di Nguyen Date: Wed, 13 Nov 2024 10:14:41 -0700 Subject: [PATCH 4/4] Update CHANGELOG.md Co-authored-by: spolifroni-amd --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 871bbd872..72d6c48d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ Full documentation for rocPRIM is available at [https://rocm.docs.amd.com/projec ## rocPRIM 3.3.0 for ROCm 6.3.0 ### Added -* Updated the default value for the `-a` argument from `rmake.py` to `gfx906:xnack-,gfx1030,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201`. +* * Changed the default value of `rmake.py -a` to `default_gpus`. This is equivalent to `gfx906:xnack-,gfx1030,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201`. * The `--test smoke` option has been added to `rtest.py`. When `rtest.py` is called with this option it runs a subset of tests such that the total test time is 5 minutes. Use `python3 ./rtest.py --test smoke` or `python3 ./rtest.py -t smoke` to run the smoke test. * The `--seed` option has been added to `run_benchmarks.py`. The `--seed` option specifies a seed for the generation of random inputs. When the option is omitted, the default behavior is to use a random seed for each benchmark measurement. * Added configuration autotuning to device partition (`rocprim::partition`, `rocprim::partition_two_way`, and `rocprim::partition_three_way`), to device select (`rocprim::select`, `rocprim::unique`, and `rocprim::unique_by_key`), and to device reduce by key (`rocprim::reduce_by_key`) to improve performance on selected architectures.