diff --git a/src/argparser.py b/src/argparser.py index bfa4fec28..7b53377b8 100644 --- a/src/argparser.py +++ b/src/argparser.py @@ -105,7 +105,6 @@ def omniarg_parser( add_general_group(profile_parser, rocprof_compute_version) profile_group = profile_parser.add_argument_group("Profile Options") - roofline_group = profile_parser.add_argument_group("Standalone Roofline Options") profile_group.add_argument( "-n", @@ -220,61 +219,27 @@ def omniarg_parser( help="\t\t\tProfile without collecting roofline data.", ) profile_group.add_argument( - "remaining", - metavar="-- [ ...]", - default=None, - nargs=argparse.REMAINDER, - help="\t\t\tProvide command for profiling after double dash.", - ) - - ## Roofline Command Line Options - roofline_group.add_argument( "--roof-only", required=False, default=False, action="store_true", help="\t\t\tProfile roofline data only.", ) - roofline_group.add_argument( - "--sort", - required=False, - metavar="", - type=str, - default="kernels", - choices=["kernels", "dispatches"], - help="\t\t\tOverlay top kernels or top dispatches: (DEFAULT: kernels)\n\t\t\t kernels\n\t\t\t dispatches", - ) - roofline_group.add_argument( - "-m", - "--mem-level", - required=False, - choices=["HBM", "L2", "vL1D", "LDS"], - metavar="", - nargs="+", - type=str, - default="ALL", - help="\t\t\tFilter by memory level: (DEFAULT: ALL)\n\t\t\t HBM\n\t\t\t L2\n\t\t\t vL1D\n\t\t\t LDS", + profile_group.add_argument( + "remaining", + metavar="-- [ ...]", + default=None, + nargs=argparse.REMAINDER, + help="\t\t\tProvide command for profiling after double dash.", ) - roofline_group.add_argument( - "--device", + profile_group.add_argument( + "--roofline-device", metavar="", required=False, default=-1, type=int, help="\t\t\tTarget GPU device ID. (DEFAULT: ALL)", ) - roofline_group.add_argument( - "--kernel-names", - required=False, - default=False, - action="store_true", - help="\t\t\tInclude kernel names in roofline plot.", - ) - # roofline_group.add_argument('-w', '--workgroups', required=False, default=-1, type=int, help="\t\t\tNumber of kernel workgroups (DEFAULT: 1024)") - # roofline_group.add_argument('--wsize', required=False, default=-1, type=int, help="\t\t\tWorkgroup size (DEFAULT: 256)") - # roofline_group.add_argument('--dataset', required=False, default = -1, type=int, help="\t\t\tDataset size (DEFAULT: 536M)") - # roofline_group.add_argument('-e', '--experiments', required=False, default=-1, type=int, help="\t\t\tNumber of experiments (DEFAULT: 100)") - # roofline_group.add_argument('--iter', required=False, default=-1, type=int, help="\t\t\tNumber of iterations (DEFAULT: 10)") ## Database Command Line Options ## ---------------------------- @@ -393,6 +358,7 @@ def omniarg_parser( add_general_group(analyze_parser, rocprof_compute_version) analyze_group = analyze_parser.add_argument_group("Analyze Options") + roofline_group = analyze_parser.add_argument_group("Standalone Roofline Options") analyze_advanced_group = analyze_parser.add_argument_group("Advanced Options") analyze_group.add_argument( @@ -464,6 +430,48 @@ def omniarg_parser( const=8050, help="\t\tActivate a GUI to interate with rocprofiler-compute metrics.\n\t\tOptionally, specify port to launch application (DEFAULT: 8050)", ) + + ## Roofline Command Line Options + roofline_group.add_argument( + "--roofline", + required=False, + default=False, + action="store_true", + help="\t\t\tGenerate roofline plot as pdf.", + ) + roofline_group.add_argument( + "--sort", + required=False, + metavar="", + type=str, + default="kernels", + choices=["kernels", "dispatches"], + help="\t\t\tOverlay top kernels or top dispatches: (DEFAULT: kernels)\n\t\t\t kernels\n\t\t\t dispatches", + ) + roofline_group.add_argument( + "-m", + "--mem-level", + required=False, + choices=["HBM", "L2", "vL1D", "LDS"], + metavar="", + nargs="+", + type=str, + default="ALL", + help="\t\t\tFilter by memory level: (DEFAULT: ALL)\n\t\t\t HBM\n\t\t\t L2\n\t\t\t vL1D\n\t\t\t LDS", + ) + roofline_group.add_argument( + "--kernel-names", + required=False, + default=False, + action="store_true", + help="\t\t\tInclude kernel names in roofline plot.", + ) + # roofline_group.add_argument('-w', '--workgroups', required=False, default=-1, type=int, help="\t\t\tNumber of kernel workgroups (DEFAULT: 1024)") + # roofline_group.add_argument('--wsize', required=False, default=-1, type=int, help="\t\t\tWorkgroup size (DEFAULT: 256)") + # roofline_group.add_argument('--dataset', required=False, default = -1, type=int, help="\t\t\tDataset size (DEFAULT: 536M)") + # roofline_group.add_argument('-e', '--experiments', required=False, default=-1, type=int, help="\t\t\tNumber of experiments (DEFAULT: 100)") + # roofline_group.add_argument('--iter', required=False, default=-1, type=int, help="\t\t\tNumber of iterations (DEFAULT: 10)") + analyze_advanced_group.add_argument( "--random-port", action="store_true", diff --git a/src/rocprof_compute_analyze/analysis_cli.py b/src/rocprof_compute_analyze/analysis_cli.py index e0bf5f1d9..1fe0a2dae 100644 --- a/src/rocprof_compute_analyze/analysis_cli.py +++ b/src/rocprof_compute_analyze/analysis_cli.py @@ -22,6 +22,8 @@ # SOFTWARE. ##############################################################################el +import os + from rocprof_compute_analyze.analysis_base import OmniAnalyze_Base from utils import file_io, parser, tty from utils.kernel_name_shortener import kernel_name_shortener @@ -41,6 +43,7 @@ def pre_processing(self): for d in self.get_args().path: file_io.create_df_kernel_top_stats( raw_data_dir=d[0], + filter_kernel_ids=self._runs[d[0]].filter_kernel_ids, filter_gpu_ids=self._runs[d[0]].filter_gpu_ids, filter_dispatch_ids=self._runs[d[0]].filter_dispatch_ids, time_unit=self.get_args().time_unit, @@ -87,3 +90,34 @@ def run_analysis(self): ], self._output, ) + + if self.get_args().roofline: + self.generate_roofline() + + @demarcate + def generate_roofline(self): + workload_dir = self.get_args().path[0][0] + arch = self._runs[workload_dir].sys_info.iloc[0]["gpu_arch"] + has_roofline = os.path.isfile(os.path.join(workload_dir, "roofline.csv")) + if has_roofline and hasattr(self.get_socs()[arch], "roofline_obj"): + self.get_socs()[arch].analysis_setup( + roofline_parameters={ + "workload_dir": workload_dir, + "device_id": 0, + "sort_type": "kernels", + "mem_level": "ALL", + "include_kernel_names": False, + "roofline_cli": True, + } + ) + roof_obj = self.get_socs()[arch].roofline_obj + roof_obj.empirical_roofline( + ret_df=parser.apply_filters( + workload=self._runs[workload_dir], + dir=workload_dir, + is_gui=True, + debug=self.get_args().debug, + ) + ) + else: + console_error("Roofline unsupported for gpu or roofline.csv is missing.") diff --git a/src/rocprof_compute_analyze/analysis_webui.py b/src/rocprof_compute_analyze/analysis_webui.py index adb63c942..eac15438d 100644 --- a/src/rocprof_compute_analyze/analysis_webui.py +++ b/src/rocprof_compute_analyze/analysis_webui.py @@ -173,7 +173,7 @@ def generate_from_filter( "sort_type": "kernels", "mem_level": "ALL", "include_kernel_names": False, - "is_standalone": False, + "roofline_cli": False, } ) roof_obj = self.get_socs()[self.arch].roofline_obj diff --git a/src/roofline.py b/src/roofline.py index 77f491b59..bc3ea507d 100644 --- a/src/roofline.py +++ b/src/roofline.py @@ -56,7 +56,7 @@ def __init__(self, args, mspec, run_parameters=None): "sort_type": "kernels", "mem_level": "ALL", "include_kernel_names": False, - "is_standalone": False, + "roofline_cli": False, } ) self.__ai_data = None @@ -65,8 +65,8 @@ def __init__(self, args, mspec, run_parameters=None): # Set roofline run parameters from args if hasattr(self.__args, "path") and not run_parameters: self.__run_parameters["workload_dir"] = self.__args.path - if hasattr(self.__args, "roof_only") and self.__args.roof_only == True: - self.__run_parameters["is_standalone"] = True + if hasattr(self.__args, "roofline") and self.__args.roofline == True: + self.__run_parameters["roofline_cli"] = True if hasattr(self.__args, "kernel_names") and self.__args.kernel_names == True: self.__run_parameters["include_kernel_names"] = True if hasattr(self.__args, "mem_level") and self.__args.mem_level != "ALL": @@ -84,9 +84,14 @@ def __init__(self, args, mspec, run_parameters=None): def validate_parameters(self): if self.__run_parameters["include_kernel_names"] and ( - not self.__run_parameters["is_standalone"] + not self.__run_parameters["roofline_cli"] ): - console_error("--roof-only is required for --kernel-names") + console_error("--roofline is required for --kernel-names") + + # Change vL1D to a interpretable str, if required + if "vL1D" in self.__run_parameters["mem_level"]: + self.__run_parameters["mem_level"].remove("vL1D") + self.__run_parameters["mem_level"].append("L1") def roof_setup(self): # set default workload path if not specified @@ -115,7 +120,6 @@ def empirical_roofline( msg = "AI at each mem level:" for i in self.__ai_data: msg += "\n\t%s -> %s" % (i, self.__ai_data[i]) - console_debug(msg) # Generate a roofline figure for each data type fp32_fig = self.generate_plot(dtype="FP32") @@ -145,8 +149,8 @@ def empirical_roofline( ) self.__figure.update_xaxes(dtick=1) # Output will be different depending on interaction type: - # Save PDFs if we're in "standalone roofline" mode, otherwise return HTML to be used in GUI output - if self.__run_parameters["is_standalone"]: + # Save PDFs if we're in "roofline cli" mode, otherwise return HTML to be used in GUI output + if self.__run_parameters["roofline_cli"]: dev_id = str(self.__run_parameters["device_id"]) fp32_fig.write_image( @@ -212,7 +216,7 @@ def generate_plot(self, dtype, fig=None) -> go.Figure(): """Create graph object from ai_data (coordinate points) and ceiling_data (peak FLOP and BW) data.""" if fig is None: fig = go.Figure() - plot_mode = "lines+text" if self.__run_parameters["is_standalone"] else "lines" + plot_mode = "lines+text" if self.__run_parameters["roofline_cli"] else "lines" self.__ceiling_data = constuct_roof( roofline_parameters=self.__run_parameters, dtype=dtype, @@ -242,7 +246,7 @@ def generate_plot(self, dtype, fig=None) -> go.Figure(): ), ( None - if self.__run_parameters["is_standalone"] + if self.__run_parameters["roofline_cli"] else "{} GB/s".format( to_int(self.__ceiling_data[cache_level.lower()][2]) ) @@ -265,7 +269,7 @@ def generate_plot(self, dtype, fig=None) -> go.Figure(): text=[ ( None - if self.__run_parameters["is_standalone"] + if self.__run_parameters["roofline_cli"] else "{} GFLOP/s".format( to_int(self.__ceiling_data["valu"][2]) ) @@ -291,7 +295,7 @@ def generate_plot(self, dtype, fig=None) -> go.Figure(): text=[ ( None - if self.__run_parameters["is_standalone"] + if self.__run_parameters["roofline_cli"] else "{} GFLOP/s".format(to_int(self.__ceiling_data["mfma"][2])) ), "{} GFLOP/s".format(to_int(self.__ceiling_data["mfma"][2])), @@ -354,25 +358,6 @@ def generate_plot(self, dtype, fig=None) -> go.Figure(): return fig - @demarcate - def standalone_roofline(self): - from collections import OrderedDict - - import pandas as pd - - # Change vL1D to a interpretable str, if required - if "vL1D" in self.__run_parameters["mem_level"]: - self.__run_parameters["mem_level"].remove("vL1D") - self.__run_parameters["mem_level"].append("L1") - - app_path = os.path.join(self.__run_parameters["workload_dir"], "pmc_perf.csv") - roofline_exists = os.path.isfile(app_path) - if not roofline_exists: - console_error("roofline", "{} does not exist".format(app_path)) - t_df = OrderedDict() - t_df["pmc_perf"] = pd.read_csv(app_path) - self.empirical_roofline(ret_df=t_df) - # Main methods @abstractmethod def pre_processing(self): @@ -433,8 +418,7 @@ def profile(self): # we include pre_processing() and profile() methods for those who wish to borrow the roofline module @abstractmethod def post_processing(self): - if self.__run_parameters["is_standalone"]: - self.standalone_roofline() + return def to_int(a): diff --git a/src/utils/utils.py b/src/utils/utils.py index 5d3f8df3f..5503d322b 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -508,7 +508,7 @@ def mibench(args, mspec): "-o", args.path + "/" + "roofline.csv", "-d", - str(args.device), + str(args.roofline_device), ] if args.quiet: my_args += "--quiet"