Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MI300 rhel and sles roofline binaries #480

Merged
merged 3 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/roofline.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def empirical_roofline(
"""Generate a set of empirical roofline plots given a directory containing required profiling and benchmarking data"""
# Create arithmetic intensity data that will populate the roofline model
console_debug("roofline", "Path: %s" % self.__run_parameters["workload_dir"])
self.__ai_data = calc_ai(self.__run_parameters["sort_type"], ret_df)
self.__ai_data = calc_ai(self.__mspec, self.__run_parameters["sort_type"], ret_df)

msg = "AI at each mem level:"
for i in self.__ai_data:
Expand Down
39 changes: 19 additions & 20 deletions src/utils/roofline_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@

IMGNAME = "empirRoof"

L2_BANKS = 32 # default assuming mi200, mi300

XMIN = 0.01
XMAX = 1000

Expand Down Expand Up @@ -188,7 +186,7 @@ def calc_ceilings(roofline_parameters, dtype, benchmark_data):
# Overlay application performance
# -------------------------------------------------------------------------------------
# Calculate relevant metrics for ai calculation
def calc_ai(sort_type, ret_df):
def calc_ai(mspec, sort_type, ret_df):
"""Given counter data, calculate arithmetic intensity for each kernel in the application."""
df = ret_df["pmc_perf"]
# Sort by top kernels or top dispatches?
Expand Down Expand Up @@ -306,8 +304,8 @@ def calc_ai(sort_type, ret_df):
lds_data += (
(df["SQ_LDS_IDX_ACTIVE"][idx] - df["SQ_LDS_BANK_CONFLICT"][idx])
* 4
* L2_BANKS
) # L2_BANKS = 32 (since assuming mi200 or mi300)
* (mspec.lds_banks_per_cu)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same number (32) in the end but should be lds banks- changing to using the per-arch defined lds_banks_per_cu from soc file instead of hardcoded value.

)
except KeyError:
console_debug(
"roofline",
Expand Down Expand Up @@ -338,37 +336,38 @@ def calc_ai(sort_type, ret_df):
)
pass
try:
if df.keys().str.contains("TCC_BUBBLE").sum() > 0:
# MI300 uses TCC_BUBBLE_sum to calculate hbm_data
if mspec.gpu_model == "MI200":
Copy link
Contributor Author

@cfallows-amd cfallows-amd Nov 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Diff is visually messy- change made is just checking if mi200 do original calculation, otherwise do the tcc_bubble_sum calculation for mi300 and above hardware. No changes made to the actual calculations, just the if else statement by checking for gpu model and not the counter.

hbm_data += (
(df["TCC_BUBBLE_sum"][idx] * 128)
+ (df["TCC_EA0_RDREQ_32B_sum"][idx] * 32)
(df["TCC_EA_RDREQ_32B_sum"][idx] * 32)
+ (
(
df["TCC_EA0_RDREQ_sum"][idx]
- df["TCC_BUBBLE_sum"][idx]
- df["TCC_EA0_RDREQ_32B_sum"][idx]
)
(df["TCC_EA_RDREQ_sum"][idx] - df["TCC_EA_RDREQ_32B_sum"][idx])
* 64
)
+ (df["TCC_EA_WRREQ_64B_sum"][idx] * 64)
+ (
(df["TCC_EA0_WRREQ_sum"][idx] - df["TCC_EA0_WRREQ_64B_sum"][idx])
(df["TCC_EA_WRREQ_sum"][idx] - df["TCC_EA_WRREQ_64B_sum"][idx])
* 32
)
+ (df["TCC_EA0_WRREQ_64B_sum"][idx] * 64)
)

else:
# Use TCC_BUBBLE_sum to calculate hbm_data
hbm_data += (
(df["TCC_EA_RDREQ_32B_sum"][idx] * 32)
(df["TCC_BUBBLE_sum"][idx] * 128)
+ (df["TCC_EA0_RDREQ_32B_sum"][idx] * 32)
+ (
(df["TCC_EA_RDREQ_sum"][idx] - df["TCC_EA_RDREQ_32B_sum"][idx])
(
df["TCC_EA0_RDREQ_sum"][idx]
- df["TCC_BUBBLE_sum"][idx]
- df["TCC_EA0_RDREQ_32B_sum"][idx]
)
* 64
)
+ (df["TCC_EA_WRREQ_64B_sum"][idx] * 64)
+ (
(df["TCC_EA_WRREQ_sum"][idx] - df["TCC_EA_WRREQ_64B_sum"][idx])
(df["TCC_EA0_WRREQ_sum"][idx] - df["TCC_EA0_WRREQ_64B_sum"][idx])
* 32
)
+ (df["TCC_EA0_WRREQ_64B_sum"][idx] * 64)
)
except KeyError:
console_debug(
Expand Down
Binary file not shown.
Binary file not shown.