Skip to content

Commit 4f650b7

Browse files
authored
Pytorch profiling - Break down python profiling, fixes in detailed profiling
1/ Break down python profiling steps into 2 parts (pre-forward to backend and backwardend to next preforward) 2/ detailed profiling enable profiler api changed for pytorch 1.6 . Fixed to check the version and then pass the parameter 3/ Function to generate count_parameter 4/ Enable cProfile to emit total time rather than just cpu time
2 parents 2b53112 + 8cd6b0c commit 4f650b7

File tree

5 files changed

+138
-65
lines changed

5 files changed

+138
-65
lines changed

smdebug/core/locations.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,11 @@ def get_python_profiling_stats_dir(
164164
step,
165165
start_time_since_epoch_in_micros,
166166
end_time_since_epoch_in_micros,
167+
step_phase="",
167168
):
168169
node_id = get_node_id()
170+
if step_phase:
171+
step = str(step) + "-" + step_phase
169172
folder_name = "{0}_{1}_{2}_{3}".format(
170173
start_time_since_epoch_in_micros, end_time_since_epoch_in_micros, node_id, step
171174
)

smdebug/profiler/analysis/python_stats_reader.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@ def load_python_profile_stats(self):
2828
"""Load the python profile stats. To be implemented in subclass.
2929
"""
3030

31+
def _get_step_stepphase(self, step_phase_str):
32+
splits = step_phase_str.split("-", 1)
33+
step = splits[0]
34+
step_phase = splits[1] if len(splits) > 1 else "full"
35+
return step, step_phase
36+
3137

3238
class S3PythonStatsReader(PythonStatsReader):
3339
"""Higher level stats reader to download python stats from s3.
@@ -93,7 +99,8 @@ def load_python_profile_stats(self):
9399
with open(stats_file_path, "wb") as f:
94100
f.write(object_data)
95101

96-
start_time, end_time, node_id, step = stats_dir.split("_")
102+
start_time, end_time, node_id, step_phase_str = stats_dir.split("_")
103+
step, step_phase = self._get_step_stepphase(step_phase_str)
97104
python_profile_stats.append(
98105
StepPythonProfileStats(
99106
profiler_name,
@@ -102,6 +109,7 @@ def load_python_profile_stats(self):
102109
float(end_time),
103110
node_id,
104111
stats_file_path,
112+
step_phase,
105113
)
106114
)
107115
python_profile_stats.sort(
@@ -131,7 +139,9 @@ def load_python_profile_stats(self):
131139
"""
132140
python_profile_stats = []
133141
for python_stat_dir in os.listdir(self.profile_dir):
134-
start_time, end_time, node_id, step = python_stat_dir.split("_")
142+
start_time, end_time, node_id, step_phase_str = python_stat_dir.split("_")
143+
step, step_phase = self._get_step_stepphase(step_phase_str)
144+
135145
stats_dir = os.path.join(self.profile_dir, python_stat_dir)
136146
if os.path.isfile(os.path.join(stats_dir, CPROFILE_STATS_FILENAME)):
137147
profiler_name = CPROFILE_NAME

smdebug/profiler/analysis/utils/python_profile_analysis_utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ def __init__(
1111
end_time_since_epoch_in_micros,
1212
node_id,
1313
stats_path,
14+
step_phase="",
1415
):
1516
"""Class that represents the metadata for profiling on a specific step (or before step 0).
1617
Used so that users can easily filter through which steps they want profiling stats of.
@@ -27,6 +28,7 @@ def __init__(
2728
self.end_time_since_epoch_in_micros = end_time_since_epoch_in_micros
2829
self.node_id = node_id
2930
self.stats_path = stats_path
31+
self.step_phase = step_phase
3032

3133
def in_time_interval(self, start_time_since_epoch_in_micros, end_time_since_epoch_in_micros):
3234
"""Returns whether this step is in the provided time interval.

smdebug/profiler/python_profiler.py

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,6 @@
1515

1616

1717
class PythonProfiler:
18-
name = "" # placeholder
19-
stats_filename = "" # placeholder
20-
2118
def __init__(self, base_folder, framework):
2219
"""Higher level class to manage execution of python profiler, dumping of python stats, and retrieval
2320
of stats based on time or step intervals.
@@ -51,6 +48,7 @@ def _reset_profiler(self):
5148
"""Reset attributes to defaults
5249
"""
5350
self._step, self._start_time_since_epoch_in_micros, self._is_profiling = None, None, False
51+
self._start_stepphase = ""
5452

5553
def _enable_profiler(self):
5654
"""Enable the profiler (to be implemented in subclass, where the actual profiler is defined).
@@ -64,16 +62,24 @@ def _dump_stats(self, stats_path):
6462
"""Dump the stats to the provided path (to be implemented in subclass, where the actual profiler is defined).
6563
"""
6664

67-
def start_profiling(self, start_step=-1):
65+
def _name(self):
66+
return "default"
67+
68+
def _stats_filename(self):
69+
# this is default value
70+
return "python_stats"
71+
72+
def start_profiling(self, start_step=-1, step_phase=""):
6873
"""Start the python profiler with the provided start step.
6974
If start step is -1, then this is profiling from import time to step 0.
7075
"""
7176
self._step = start_step
7277
self._start_time_since_epoch_in_micros = time.time() * CONVERT_TO_MICROSECS
7378
self._is_profiling = True
79+
self._start_stepphase = step_phase
7480
self._enable_profiler()
7581

76-
def stop_profiling(self):
82+
def stop_profiling(self, step_phase=""):
7783
"""Stop the python profiler.
7884
Dump the python stats for this step with a file path dependent on the base folder, framework, time and step.
7985
Append a record of this step's profiling with the corresponding metadata.
@@ -85,15 +91,19 @@ def stop_profiling(self):
8591
self._disable_profiler()
8692

8793
current_time_since_epoch_in_micros = time.time() * CONVERT_TO_MICROSECS
94+
step_phase_string = ""
95+
if self._start_stepphase != "" or step_phase != "":
96+
step_phase_string = f"{self._start_stepphase}-{step_phase}"
8897
stats_dir = TraceFileLocation.get_python_profiling_stats_dir(
8998
self._base_folder,
9099
self._framework,
91-
self.name,
100+
self._name(),
92101
self._step,
93102
self._start_time_since_epoch_in_micros,
94103
current_time_since_epoch_in_micros,
104+
step_phase_string,
95105
)
96-
self._dump_stats(os.path.join(stats_dir, self.stats_filename))
106+
self._dump_stats(os.path.join(stats_dir, self._stats_filename()))
97107

98108
self._reset_profiler()
99109

@@ -110,14 +120,26 @@ class cProfilePythonProfiler(PythonProfiler):
110120
This is also the default Python profiler used if profiling is enabled.
111121
"""
112122

113-
name = "cProfile"
114-
stats_filename = "python_stats"
115-
116123
def _reset_profiler(self):
117124
"""Reset profiler and corresponding attributes to defaults
118125
"""
119126
super()._reset_profiler()
120-
self._profiler = cProfileProfiler()
127+
self._profiler = cProfileProfiler(self._total_time)
128+
129+
def _name(self):
130+
return "cProfile"
131+
132+
def _total_time(self):
133+
times = os.times()
134+
return times.elapsed
135+
136+
def _off_cpu_time(self):
137+
times = os.times()
138+
return times.elapsed - (times.system + times.user)
139+
140+
def _stats_filename(self):
141+
# this is default value
142+
return "python_stats"
121143

122144
def _enable_profiler(self):
123145
"""Enable the cProfile profiler.
@@ -135,20 +157,22 @@ def _dump_stats(self, stats_file_path):
135157
get_logger("smdebug-profiler").info(f"Dumping cProfile stats to {stats_file_path}.")
136158
pstats.Stats(self._profiler).dump_stats(stats_file_path)
137159

138-
139160
class PyinstrumentPythonProfiler(PythonProfiler):
140161
"""Higher level class to oversee profiling specific to Pyinstrument, a third party Python profiler.
141162
"""
142163

143-
name = "pyinstrument"
144-
stats_filename = "python_stats.json"
145-
146164
def _reset_profiler(self):
147165
"""Reset profiler and corresponding attributes to defaults
148166
"""
149167
super()._reset_profiler()
150168
self._profiler = PyinstrumentProfiler()
151169

170+
def _name(self):
171+
return "pyinstrument"
172+
173+
def _stats_filename(self):
174+
return "python_stats.json"
175+
152176
def _enable_profiler(self):
153177
"""Enable the pyinstrument profiler.
154178
"""

0 commit comments

Comments
 (0)