-
Notifications
You must be signed in to change notification settings - Fork 49
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WIP - Reentrant concurrent snoopi_deep profiles. #309
Changes from all commits
52e910f
4e3c77a
9d0e0df
d9fc875
eac22c9
29e9c32
2e1d5cd
f86ce5b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -863,6 +863,7 @@ end | |||||
# pgdsgui(axs[2], rit; bystr="Inclusive", consts=true, interactive=false) | ||||||
end | ||||||
|
||||||
|
||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Snuck in? :)
Suggested change
|
||||||
@testset "Stale" begin | ||||||
cproj = Base.active_project() | ||||||
cd(joinpath("testmodules", "Stale")) do | ||||||
|
@@ -944,6 +945,163 @@ end | |||||
Pkg.activate(cproj) | ||||||
end | ||||||
|
||||||
_name(frame::SnoopCompileCore.InferenceTiming) = frame.mi_info.mi.def.name | ||||||
|
||||||
@testset "reentrant concurrent profiles 1 - overlap" begin | ||||||
# Warmup | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps something like the following? :)
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Likewise on the warmup comments below if you agree.) |
||||||
@eval foo1(x) = x+2 | ||||||
@eval foo1(2) | ||||||
|
||||||
# Test: | ||||||
t1 = SnoopCompileCore.start_deep_timing() | ||||||
|
||||||
@eval foo1(x) = x+2 | ||||||
@eval foo1(2) | ||||||
|
||||||
t2 = SnoopCompileCore.start_deep_timing() | ||||||
|
||||||
@eval foo2(x) = x+2 | ||||||
@eval foo2(2) | ||||||
|
||||||
SnoopCompileCore.stop_deep_timing!(t1) | ||||||
SnoopCompileCore.stop_deep_timing!(t2) | ||||||
|
||||||
prof1 = SnoopCompileCore.finish_snoopi_deep(t1) | ||||||
prof2 = SnoopCompileCore.finish_snoopi_deep(t2) | ||||||
|
||||||
@test Set(_name.(SnoopCompile.flatten(prof1))) == Set([:ROOT, :foo1, :foo2]) | ||||||
@test Set(_name.(SnoopCompile.flatten(prof2))) == Set([:ROOT, :foo2]) | ||||||
|
||||||
# Test Cleanup | ||||||
@test isempty(SnoopCompileCore.SnoopiDeepParallelism.invocations) | ||||||
@test isempty(Core.Compiler.Timings._timings[1].children) | ||||||
end | ||||||
|
||||||
@testset "reentrant concurrent profiles 2 - interleaved" begin | ||||||
# Warmup | ||||||
@eval foo1(x) = x+2 | ||||||
@eval foo1(2) | ||||||
|
||||||
# Test: | ||||||
t1 = SnoopCompileCore.start_deep_timing() | ||||||
|
||||||
@eval foo1(x) = x+2 | ||||||
@eval foo1(2) | ||||||
|
||||||
t2 = SnoopCompileCore.start_deep_timing() | ||||||
|
||||||
@eval foo2(x) = x+2 | ||||||
@eval foo2(2) | ||||||
|
||||||
SnoopCompileCore.stop_deep_timing!(t1) | ||||||
|
||||||
@eval foo3(x) = x+2 | ||||||
@eval foo3(2) | ||||||
|
||||||
SnoopCompileCore.stop_deep_timing!(t2) | ||||||
|
||||||
@eval foo4(x) = x+2 | ||||||
@eval foo4(2) | ||||||
|
||||||
prof1 = SnoopCompileCore.finish_snoopi_deep(t1) | ||||||
|
||||||
@eval foo5(x) = x+2 | ||||||
@eval foo5(2) | ||||||
|
||||||
prof2 = SnoopCompileCore.finish_snoopi_deep(t2) | ||||||
|
||||||
@test Set(_name.(SnoopCompile.flatten(prof1))) == Set([:ROOT, :foo1, :foo2]) | ||||||
@test Set(_name.(SnoopCompile.flatten(prof2))) == Set([:ROOT, :foo2, :foo3]) | ||||||
|
||||||
# Test Cleanup | ||||||
@test isempty(SnoopCompileCore.SnoopiDeepParallelism.invocations) | ||||||
@test isempty(Core.Compiler.Timings._timings[1].children) | ||||||
end | ||||||
|
||||||
@testset "reentrant concurrent profiles 3 - nested" begin | ||||||
# Warmup | ||||||
@eval foo1(x) = x+2 | ||||||
@eval foo1(2) | ||||||
|
||||||
# Test: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That this works is thoroughly pleasing. |
||||||
local prof1, prof2, prof3 | ||||||
prof1 = SnoopCompileCore.@snoopi_deep begin | ||||||
@eval foo1(x) = x+2 | ||||||
@eval foo1(2) | ||||||
prof2 = SnoopCompileCore.@snoopi_deep begin | ||||||
@eval foo2(x) = x+2 | ||||||
@eval foo2(2) | ||||||
prof3 = SnoopCompileCore.@snoopi_deep begin | ||||||
@eval foo3(x) = x+2 | ||||||
@eval foo3(2) | ||||||
end | ||||||
@eval foo4(x) = x+2 | ||||||
@eval foo4(2) | ||||||
end | ||||||
@eval foo5(x) = x+2 | ||||||
@eval foo5(2) | ||||||
end | ||||||
|
||||||
@test Set(_name.(SnoopCompile.flatten(prof1))) == Set([:ROOT, :foo1, :foo2, :foo3, :foo4, :foo5]) | ||||||
@test Set(_name.(SnoopCompile.flatten(prof2))) == Set([:ROOT, :foo2, :foo3, :foo4]) | ||||||
@test Set(_name.(SnoopCompile.flatten(prof3))) == Set([:ROOT, :foo3]) | ||||||
|
||||||
# Test Cleanup | ||||||
@test isempty(SnoopCompileCore.SnoopiDeepParallelism.invocations) | ||||||
@test isempty(Core.Compiler.Timings._timings[1].children) | ||||||
end | ||||||
|
||||||
@testset "reentrant concurrent profiles 3 - parallelism + accurate timing" begin | ||||||
# Warmup | ||||||
@eval foo1(x) = x+2 | ||||||
@eval foo1(2) | ||||||
|
||||||
# Test: | ||||||
local ts | ||||||
snoop_times = Float64[0.0, 0.0, 0.0, 0.0] | ||||||
# Run it twice to ensure we warmup the eval block | ||||||
for _ in 1:2 | ||||||
@sync begin | ||||||
ts = [ | ||||||
Threads.@spawn begin | ||||||
sleep((i-1) / 10) # (Divide by 10 so the test isn't too slow) | ||||||
snoop_time = @timed SnoopCompile.@snoopi_deep @eval begin | ||||||
$(Symbol("foo$i"))(x) = x + 1 | ||||||
sleep(1.5 / 10) | ||||||
Comment on lines
+1067
to
+1070
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I hope that these delays are large enough --- and think they should be by orders of magnitude --- to prevent nondeterministic failures due to jitter. But I've found the machines are just lying in wait, for the moment I think something like that. Then 🦈 🩸. |
||||||
$(Symbol("foo$i"))(2) | ||||||
end | ||||||
snoop_times[i] = snoop_time.time | ||||||
return snoop_time.value | ||||||
end | ||||||
for i in 1:4 | ||||||
] | ||||||
end | ||||||
end | ||||||
profs = fetch.(ts) | ||||||
|
||||||
@test Set(_name.(SnoopCompile.flatten(profs[1]))) == Set([:ROOT, :foo1]) | ||||||
@test Set(_name.(SnoopCompile.flatten(profs[2]))) == Set([:ROOT, :foo1, :foo2]) | ||||||
@test Set(_name.(SnoopCompile.flatten(profs[3]))) == Set([:ROOT, :foo2, :foo3]) | ||||||
@test Set(_name.(SnoopCompile.flatten(profs[4]))) == Set([:ROOT, :foo3, :foo4]) | ||||||
|
||||||
# Test the sanity of the reported Timings | ||||||
@testset for i in eachindex(profs) | ||||||
prof = profs[i] | ||||||
# Test that the time for the inference is accounted for | ||||||
@test 0.15 < prof.mi_timing.exclusive_time | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I fear the definitions of these times has already become hazy to me 😅. This one? 🤔How do know that the exclusive rather than inclusive time should be over 0.15 seconds? |
||||||
@test prof.mi_timing.exclusive_time < prof.mi_timing.inclusive_time | ||||||
# Test that the inclusive time (the total time reported by snoopi_deep) matches | ||||||
# the actual time to do the snoopi_deep, as measured by `@time`. | ||||||
# These should both be approximately ~0.15 seconds. | ||||||
@info prof.mi_timing.inclusive_time | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Debugging straggler? :) |
||||||
@test prof.mi_timing.inclusive_time <= snoop_times[i] | ||||||
end | ||||||
|
||||||
# Test Cleanup | ||||||
@test isempty(SnoopCompileCore.SnoopiDeepParallelism.invocations) | ||||||
@test isempty(Core.Compiler.Timings._timings[1].children) | ||||||
end | ||||||
|
||||||
if Base.VERSION >= v"1.7" | ||||||
@testset "JET integration" begin | ||||||
f(c) = sum(c[1]) | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm a bit hazy on these details by now but trust that it is correct 👍