-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refine profiler and expose to Python. #7576
Changes from 6 commits
d2a7024
579449b
609ede2
9baba9a
eaabf2a
05a733b
0358fd0
f18016b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -163,21 +163,36 @@ void EnableProfiler(ProfilerState state) { | |
Mark("_start_profiler_", nullptr); | ||
} | ||
|
||
std::vector<std::vector<Event>> DisableProfiler() { | ||
PADDLE_ENFORCE(g_state != ProfilerState::kDisabled, | ||
"Can't disable profiling, since it's not starting."); | ||
// Mark the profiling stop. | ||
Mark("_stop_profiler_", nullptr); | ||
g_state = ProfilerState::kDisabled; | ||
std::vector<std::vector<Event>> result; | ||
void ResetProfiler() { | ||
std::lock_guard<std::mutex> guard(g_all_event_lists_mutex); | ||
for (auto it = g_all_event_lists.begin(); it != g_all_event_lists.end(); | ||
++it) { | ||
(*it)->Clear(); | ||
} | ||
} | ||
|
||
std::vector<std::vector<Event>> GetAllEvents() { | ||
std::lock_guard<std::mutex> guard(g_all_event_lists_mutex); | ||
std::vector<std::vector<Event>> result; | ||
for (auto it = g_all_event_lists.begin(); it != g_all_event_lists.end(); | ||
++it) { | ||
result.emplace_back((*it)->Reduce()); | ||
} | ||
return result; | ||
} | ||
|
||
void DisableProfiler(EventSortingKey sorted_key) { | ||
PADDLE_ENFORCE(g_state != ProfilerState::kDisabled, | ||
"Can't disable profiling, since it's not starting."); | ||
// Mark the profiling stop. | ||
Mark("_stop_profiler_", nullptr); | ||
g_state = ProfilerState::kDisabled; | ||
|
||
std::vector<std::vector<Event>> all_events = GetAllEvents(); | ||
ParseEvents(all_events, sorted_key); | ||
ResetProfiler(); | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think there should have a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As the above reply. The less the operation, the better. |
||
void ParseEvents(std::vector<std::vector<Event>>& events, | ||
EventSortingKey sorted_by) { | ||
if (g_profiler_place == "") return; | ||
|
@@ -291,12 +306,12 @@ void ParseEvents(std::vector<std::vector<Event>>& events, | |
} | ||
|
||
// Print report | ||
PrintProfilingReport(events_table, sorted_domain, max_name_width + 4, 12); | ||
PrintProfiler(events_table, sorted_domain, max_name_width + 4, 12); | ||
} | ||
|
||
void PrintProfilingReport(std::vector<std::vector<EventItem>>& events_table, | ||
std::string& sorted_domain, const size_t name_width, | ||
const size_t data_width) { | ||
void PrintProfiler(std::vector<std::vector<EventItem>>& events_table, | ||
std::string& sorted_domain, const size_t name_width, | ||
const size_t data_width) { | ||
// Output header information | ||
std::cout << "\n------------------------->" | ||
<< " Profiling Report " | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -63,3 +63,57 @@ def cuda_profiler(output_file, output_mode=None, config=None): | |
# Disables profiler collection. | ||
core.nvprof_stop() | ||
os.remove(config_file) | ||
|
||
|
||
def reset_profiler(): | ||
"""The profiler clear interface. | ||
reset_profiler will clear the previous time record. | ||
""" | ||
core.reset_profiler() | ||
|
||
|
||
@contextmanager | ||
def profiler(state, sorted_key=None): | ||
"""The profiler interface. | ||
Different from cuda_profiler, this profiler can be used to profile both CPU | ||
and GPU program. By defalut, it records the CPU and GPU operator kernels, | ||
if you want to profile other program, you can refer the profiling tutorial | ||
to add more records. | ||
|
||
Args: | ||
state (string) : The profiling state, It should be 'CPU' or 'GPU'. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. -> The profiling state, which should be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
Although users may define CPUPlace or CUDAPlace when using Fluid, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
the profiler doesn't get the state based on this Place. Since the | ||
implementation is an independent part from the Fluid. | ||
sorted_key (string) : If None, the profiling results will be printed | ||
in the order of first end time of events. Otherwise, the profiling | ||
results will be sorted by the this flag. This flag should be one | ||
of 'calls', 'total', 'max', 'min' or 'ave'. | ||
The `calls` means sorting by the number of calls. | ||
The `total` means sorting by the total execution time. | ||
The `max` means sorting by the maximum execution time. | ||
The `min` means sorting by the minimum execution time. | ||
The `ave` means sorting by the average execution time. | ||
""" | ||
|
||
if state not in ['CPU', 'GPU']: | ||
raise ValueError("The state must be 'CPU' or 'GPU'.") | ||
prof_state = core.ProfilerState.kCUDA if state == "GPU" else core.ProfilerState.kCPU | ||
core.enable_profiler(prof_state) | ||
yield | ||
|
||
if sorted_key not in ['calls', 'total', 'max', 'min', 'ave']: | ||
raise ValueError("The state must be in 'calls', 'total', " | ||
"'max', 'min', 'ave'") | ||
sorted_key = 'default' if sorted_key is None else sorted_key | ||
key_map = { | ||
'default': core.EventSortingKey.kDefault, | ||
'calls': core.EventSortingKey.kCalls, | ||
'total': core.EventSortingKey.kTotal, | ||
'max': core.EventSortingKey.kMax, | ||
'min': core.EventSortingKey.kMin, | ||
'ave': core.EventSortingKey.kAve, | ||
} | ||
# TODO(qingqing) : redirect C++ ostream to Python stream. | ||
# with core.ostream_redirect(stdout=True, stderr=True): | ||
core.disable_profiler(key_map[sorted_key]) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,11 +13,12 @@ | |
# limitations under the License. | ||
|
||
import unittest | ||
import os | ||
import numpy as np | ||
import paddle.v2.fluid as fluid | ||
import paddle.v2.fluid.profiler as profiler | ||
import paddle.v2.fluid.layers as layers | ||
import os | ||
import paddle.v2.fluid.core as core | ||
|
||
|
||
class TestProfiler(unittest.TestCase): | ||
|
@@ -40,6 +41,50 @@ def test_nvprof(self): | |
exe.run(fluid.default_main_program(), feed={'data': input}) | ||
os.remove(output_file) | ||
|
||
def profiler(self, state): | ||
if state == 'GPU' and core.is_compile_gpu(): | ||
return | ||
startup_program = fluid.Program() | ||
main_program = fluid.Program() | ||
|
||
with fluid.program_guard(main_program, startup_program): | ||
image = fluid.layers.data(name='x', shape=[784], dtype='float32') | ||
hidden1 = fluid.layers.fc(input=image, size=128, act='relu') | ||
hidden2 = fluid.layers.fc(input=hidden1, size=64, act='relu') | ||
predict = fluid.layers.fc(input=hidden2, size=10, act='softmax') | ||
label = fluid.layers.data(name='y', shape=[1], dtype='int64') | ||
cost = fluid.layers.cross_entropy(input=predict, label=label) | ||
avg_cost = fluid.layers.mean(x=cost) | ||
accuracy = fluid.evaluator.Accuracy(input=predict, label=label) | ||
|
||
optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9) | ||
opts = optimizer.minimize(avg_cost, startup_program=startup_program) | ||
|
||
place = fluid.CPUPlace() if state == 'CPU' else fluid.CUDAPlace(0) | ||
exe = fluid.Executor(place) | ||
exe.run(startup_program) | ||
|
||
accuracy.reset(exe) | ||
with profiler.profiler(state, 'total') as prof: | ||
for iter in range(10): | ||
if iter == 2: | ||
profiler.reset_profiler() | ||
x = np.random.random((32, 784)).astype("float32") | ||
y = np.random.randint(0, 10, (32, 1)).astype("int64") | ||
|
||
outs = exe.run(main_program, | ||
feed={'x': x, | ||
'y': y}, | ||
fetch_list=[avg_cost] + accuracy.metrics) | ||
acc = np.array(outs[1]) | ||
pass_acc = accuracy.eval(exe) | ||
|
||
def not_test_cpu_profiler(self): | ||
self.profiler('CPU') | ||
|
||
def not_test_cuda_profiler(self): | ||
self.profiler('GPU') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. Thanks! |
||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
RecordEvent
should not be always called, only whenProfilerState
is notkDisabled
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@chengduoZH Thanks for your review. Whether to record timeline is judged in the constructor of
RecordEvent
: https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/profiler.cc#L130