[ROCm] Using a more precise memory profiling (vllm-project#12624)

gshtras · shreyankg · commit 105bf03c8e4a · 2025-05-02T23:46:30.000+05:30
Signed-off-by: Gregory Shtrasberg &lt;Gregory.Shtrasberg@amd.com&gt;
diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
@@ -169,4 +169,5 @@ def get_current_memory_usage(cls,
                                  device: Optional[torch.types.Device] = None
                                  ) -> float:
         torch.cuda.reset_peak_memory_stats(device)
-        return torch.cuda.max_memory_allocated(device)
+        return torch.cuda.mem_get_info(device)[1] - torch.cuda.mem_get_info(
+            device)[0]