Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 19 additions & 9 deletions devito/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
solve, symbols)
from sympy.abc import t

from devito.compiler import IntelMICCompiler, get_compiler_from_env
from devito.compiler import get_compiler_from_env
from devito.interfaces import SymbolicData, TimeData
from devito.propagator import Propagator

Expand Down Expand Up @@ -186,6 +186,22 @@ def signature(self):
return self.input_params + [param for param in self.output_params
if param not in self.input_params]

@property
def total_kernel_time(self):
return self.propagator.timings[0]

@property
def total_space_loops_time(self):
return self.propagator.timings[1]

@property
def total_pre_loops_time(self):
return self.propagator.timings[2]

@property
def total_post_loops_time(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could we document pre_loops and post_loops ? what do they mean?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pre_loop and post_loop are operations done outside of the spacial loop :
for (t=...)
pre_loop
for (x=.., y=...)
stencils, factors,...
endforxyz
post_loop
endfort

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please confirm what I am saying but I believe pre and post loops were old names for these variables which were later changed to time_loop_stencils. During some rebase/merge conflict resolution these old names seem to have crept back in and right now both versions of these names exist in the code creating redundancy. AFAIK, these old names pre_loop and post_loop are not being used anywhere in the code and I have been meaning to open a PR to remove them (again).

If you can confirm the above, please feel free to remove this.

Copy link
Contributor Author

@vincepandolfo vincepandolfo Aug 8, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be honest I have not seen those names anywhere in the code. I used them just because they were much shorter than time_loops_stencils.

Right now I am working on completely redoing the profiler by making a profiler class. I will close this PR until the new version is ready (it should be ready tomorrow) if it's not urgent to have this one merged.

return self.propagator.timings[3]

def apply(self, debug=False):
""":param debug: If True, use Python to apply the operator. Default False.

Expand All @@ -194,18 +210,12 @@ def apply(self, debug=False):
if debug:
return self.apply_python()

f = self.propagator.cfunction

for param in self.input_params:
if hasattr(param, 'initialize'):
param.initialize()

args = [param.data for param in self.signature]
if isinstance(self.compiler, IntelMICCompiler):
# Off-load propagator kernel via pymic stream
self.compiler._stream.invoke(f, *args)
self.compiler._stream.sync()
else:
f(*args)
self.propagator.run(args)

return tuple([param.data for param in self.output_params])

Expand Down
81 changes: 60 additions & 21 deletions devito/propagator.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,7 @@ def __init__(self, name, nt, shape, stencils, factorized=None, spc_border=0, tim

# Settings for performance profiling
self.profile = profile

if self.profile:
self.add_local_var("time", "double")
self.pre_loop.append(cgen.Statement("struct timeval start, end"))
self.pre_loop.append(cgen.Assign("time", 0))
self.post_loop.append(cgen.PrintStatement("time: %f\n", "time"))
self.timings = np.zeros(4)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not convinced passing an array here is the best thing to do, since it requires a property to the Operator for each timing we might add later, and it also makes the code slightly confusing. Instead, could we not use a ctypes.struct, to which we can just add fields and that we return to the user as a single object? This would also make it easier to inspect in IPython, I think.

Copy link
Contributor Author

@vincepandolfo vincepandolfo Aug 8, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem with that is that I would have to redefine struct timeval in ctypes but the definition of struct timeval is not platform independent.

Another solution would be to make the profiler a separate class with methods to "inject" profiling code in parts of the cgen syntax tree. This would make it more modular and easier to add new timings.


# Kernel operational intensity dictionary
self._kernel_dic_oi = {'add': 0, 'mul': 0, 'load': 0, 'store': 0,
Expand All @@ -117,6 +112,20 @@ def __init__(self, name, nt, shape, stencils, factorized=None, spc_border=0, tim
self._lib = None
self._cfunction = None

def run(self, args):
if self.profile:
args.append(self.timings)
self.add_param("timings", (4,), np.float64)

f = self.cfunction

if isinstance(self.compiler, IntelMICCompiler):
# Off-load propagator kernel via pymic stream
self.compiler._stream.invoke(f, *args)
self.compiler._stream.sync()
else:
f(*args)

@property
def basename(self):
"""Generate a unique basename path for auto-generated files
Expand Down Expand Up @@ -294,6 +303,22 @@ def generate_loops(self, loop_body):
time_stepping = self.get_time_stepping()
else:
time_stepping = []

if self.profile:
self.pre_loop += [
cgen.Statement("struct timeval start_kernel, end_kernel"),
cgen.Statement("struct timeval start_loops, end_loops"),
cgen.Statement("struct timeval start_pre, end_pre"),
cgen.Statement("struct timeval start_post, end_post"),
] + omp_master + [cgen.Statement("gettimeofday(&start_kernel, NULL)")]
self.post_loop += omp_master + [
cgen.Block([
cgen.Statement("gettimeofday(&end_kernel, NULL)"),
cgen.Statement("timings[0] = ((end_kernel.tv_sec - start_kernel.tv_sec) * " +
"1000000u + end_kernel.tv_usec - start_kernel.tv_usec) / 1.e6"),
])
]

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

omp_single should have been inside the parenthesis here :
(omp_single + [cgen.Block(time_stepping + time_loop_stencils_b)] if time_stepping or time_loop_stencils_b else [])
to avoid empty omp sigle section breaking the compilation. Does your change fix it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok I didn't know that. I think my changes breaks it.

loop_body = [cgen.Block(omp_for + loop_body)]
# Statements to be inserted into the time loop before the spatial loop
time_loop_stencils_b = [self.time_substitutions(x) for x in self.time_loop_stencils_b]
Expand All @@ -304,21 +329,35 @@ def generate_loops(self, loop_body):
time_loop_stencils_a = [self.convert_equality_to_cgen(x) for x in self.time_loop_stencils_a]

if self.profile:
start_time_stmt = omp_master + [cgen.Block([cgen.Statement("gettimeofday(&start, NULL)")])]
end_time_stmt = omp_master + [cgen.Block(
[cgen.Statement("gettimeofday(&end, NULL)")] +
[cgen.Statement("time += ((end.tv_sec - start.tv_sec) * ",
"1000000u + end.tv_usec - start.tv_usec) / 1.e6")]
)]
else:
start_time_stmt = []
end_time_stmt = []

initial_block = omp_single + ([cgen.Block(time_stepping + time_loop_stencils_b)]
if time_stepping or time_loop_stencils_b else [])
initial_block = initial_block + start_time_stmt
end_block = end_time_stmt + omp_single + ([cgen.Block(time_loop_stencils_a)]
if time_loop_stencils_a else end_time_stmt)
time_loop_stencils_b = [cgen.Statement("gettimeofday(&start_pre, NULL)")] + time_loop_stencils_b
time_loop_stencils_a = [cgen.Statement("gettimeofday(&start_post, NULL)")] + time_loop_stencils_a

time_loop_stencils_b += [
cgen.Statement("gettimeofday(&end_pre, NULL)"),
cgen.Statement("timings[2] += ((end_pre.tv_sec - start_pre.tv_sec) * " +
"1000000u + end_pre.tv_usec - start_pre.tv_usec) / 1.e6"),
]
time_loop_stencils_a += [
cgen.Statement("gettimeofday(&end_post, NULL)"),
cgen.Statement("timings[3] += ((end_post.tv_sec - start_post.tv_sec) * " +
"1000000u + end_post.tv_usec - start_post.tv_usec) / 1.e6"),
]

initial_block = time_stepping + time_loop_stencils_b + ([cgen.Statement("gettimeofday(&start_loops, NULL)")]
if self.profile else [])

if initial_block:
initial_block = omp_single + [cgen.Block(initial_block)]

end_block = time_loop_stencils_a + ([cgen.Statement("gettimeofday(&end_loops, NULL)"),
cgen.Statement(
"timings[1] += ((end_loops.tv_sec - start_loops.tv_sec) * " +
"1000000u + end_loops.tv_usec - start_loops.tv_usec) / 1.e6"),
] if self.profile else [])

if end_block:
end_block = omp_single + [cgen.Block(end_block)]

loop_body = cgen.Block(initial_block + loop_body + end_block)
loop_body = cgen.For(
cgen.InlineInitializer(cgen.Value("int", t_var), str(t_loop_limits[0])),
Expand Down