From 9502cf2c056eb5c1d0a9a7da835b3f0fa41476b0 Mon Sep 17 00:00:00 2001
From: Binyang Li <binyli@microsoft.com>
Date: Mon, 16 Dec 2024 19:04:58 +0000
Subject: [PATCH 1/5] fix for rocm

---
 src/registered_memory.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/registered_memory.cc b/src/registered_memory.cc
index 1ad97c1b2..6bb854e0f 100644
--- a/src/registered_memory.cc
+++ b/src/registered_memory.cc
@@ -56,6 +56,9 @@ CUmemAllocationHandleType getNvlsCompatibleMemHandleType() {
 
 // Check if ptr is allocaed by cuMemMap
 bool isCuMemMapAllocated(void* ptr) {
+#if defined(__HIP_PLATFORM_AMD__)
+  return false;
+#else
   CUmemGenericAllocationHandle handle;
   CUresult result = cuMemRetainAllocationHandle(&handle, ptr);
   if (result != CUDA_SUCCESS) {
@@ -66,6 +69,7 @@ bool isCuMemMapAllocated(void* ptr) {
     throw mscclpp::Error("cuMemMap is used in env without NVLS support", mscclpp::ErrorCode::InvalidUsage);
   }
   return true;
+#endif
 }
 
 }  // namespace

From 2b84a3111270193efb94aace58854547f8225bae Mon Sep 17 00:00:00 2001
From: Binyang Li <binyli@microsoft.com>
Date: Mon, 16 Dec 2024 19:36:04 +0000
Subject: [PATCH 2/5] trigger test

---
 src/registered_memory.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/registered_memory.cc b/src/registered_memory.cc
index 6bb854e0f..2468aaac0 100644
--- a/src/registered_memory.cc
+++ b/src/registered_memory.cc
@@ -71,6 +71,7 @@ bool isCuMemMapAllocated(void* ptr) {
   return true;
 #endif
 }
+// test trigger
 
 }  // namespace
 

From 93ee169432c462e65b511a0d144152237b4fd0d9 Mon Sep 17 00:00:00 2001
From: Binyang Li <binyli@microsoft.com>
Date: Mon, 16 Dec 2024 19:45:18 +0000
Subject: [PATCH 3/5] Fix

---
 src/registered_memory.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/registered_memory.cc b/src/registered_memory.cc
index 2468aaac0..6bb854e0f 100644
--- a/src/registered_memory.cc
+++ b/src/registered_memory.cc
@@ -71,7 +71,6 @@ bool isCuMemMapAllocated(void* ptr) {
   return true;
 #endif
 }
-// test trigger
 
 }  // namespace
 

From 725981838ef27534f9324f8bdfd917354e011571 Mon Sep 17 00:00:00 2001
From: Changho Hwang <changhohwang@microsoft.com>
Date: Mon, 16 Dec 2024 15:04:30 -0800
Subject: [PATCH 4/5] tackle comments

---
 src/registered_memory.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/registered_memory.cc b/src/registered_memory.cc
index 6bb854e0f..bf40470de 100644
--- a/src/registered_memory.cc
+++ b/src/registered_memory.cc
@@ -55,7 +55,7 @@ CUmemAllocationHandleType getNvlsCompatibleMemHandleType() {
 }
 
 // Check if ptr is allocaed by cuMemMap
-bool isCuMemMapAllocated(void* ptr) {
+bool isCuMemMapAllocated([[maybe_unused]] void* ptr) {
 #if defined(__HIP_PLATFORM_AMD__)
   return false;
 #else

From 19896057d1d27b21237012850ef9ea3e731d3f14 Mon Sep 17 00:00:00 2001
From: Binyang Li <binyli@microsoft.com>
Date: Tue, 17 Dec 2024 00:26:05 +0000
Subject: [PATCH 5/5] update doc

---
 docs/design/nccl-over-mscclpp.md | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/docs/design/nccl-over-mscclpp.md b/docs/design/nccl-over-mscclpp.md
index ca362e9b5..a50b3611d 100644
--- a/docs/design/nccl-over-mscclpp.md
+++ b/docs/design/nccl-over-mscclpp.md
@@ -51,12 +51,7 @@ The table below lists all NCCL APIs (v2.21). We may cover more APIs in the futur
 
 The executor is a versatile tool designed to specify how mscclpp executes algorithms. Currently, only the allReduce operation allows for algorithm customization. The following environment variables can be managed:
 
-- ALLREDUCEPKT_IP_JSON_FILE: Specifies the path to the JSON file that defines the algorithm for small-sized, in-place operations.
-- ALLREDUCEPKT_OP_JSON_FILE: Specifies the path to the JSON file that defines the algorithm for small-sized, out-of-place operations.
-- ALLREDUCE_IP_JSON_FILE: Specifies the path to the JSON file that defines the algorithm for larger-sized, in-place operations.
-- ALLREDUCE_OP_JSON_FILE: Specifies the path to the JSON file that defines the algorithm for larger-sized, out-of-place operations.
-- ALLREDUCE_SMALL_MSG_BOUNDARY: Defines the size threshold at which the algorithm will switch between fallback code and the customized algorithm for small messages.
-- ALLREDUCE_LARGE_MSG_BOUNDARY: Defines the size threshold at which the algorithm will switch between the customized algorithm for small messages and that for larger messages.
+- MSCCLPP_EXECUTION_PLAN_DIR: Specifies the directory where the executor will look for JSON files.
 
 ```{figure} ../figs/size_boundary_diagram.png
 :name: MMSCCL++ Abstractions
@@ -68,4 +63,5 @@ Decision Flowchart for Message Size-Based Algorithm Execution
 
 This is an example of executing the interface with the executor:
 ``` bash
-mpirun -np 8 -x ALLREDUCEPKT_IP_JSON_FILE=/root/azure-mscclpp/nccl/test/execution-files/allreducepacket.json -x ALLREDUCE_IP_JSON_FILE=/root/azure-mscclpp/nccl/test/execution-files/allreducesm.json -x ALLREDUCE_SMALL_MSG_BOUNDARY=16K -x ALLREDUCE_LARGE_MSG_BOUNDARY=1M ./apps/nccl/test/nccl_api_test
+mpirun -np 8 -x MSCCLPP_EXECUTION_PLAN_DIR=/root/azure-mscclpp/nccl/test/execution-files ./apps/nccl/test/nccl_api_test
+```