Skip to content

Commit

Permalink
Feature: mark a Func as no_profiling, to prevent injection of profili…
Browse files Browse the repository at this point in the history
…ng. (2nd implementation) (#8143)

* Small feature to allow you to specify that a (typically small inner loop) Func should not be profiled.

* Simplified the tuple name handling.

* Optimize tuple name normalization in Profiling.cpp

* Clang-format

* Feedback on Function already being a pointer. Bump the Patch version of the serialization.
  • Loading branch information
mcourteaux authored Mar 12, 2024
1 parent bf0d611 commit 4988ab5
Show file tree
Hide file tree
Showing 10 changed files with 107 additions and 26 deletions.
4 changes: 3 additions & 1 deletion src/Deserialization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -504,12 +504,14 @@ void Deserializer::deserialize_function(const Serialize::Func *function, Functio
const std::vector<std::string> trace_tags =
deserialize_vector<flatbuffers::String, std::string>(function->trace_tags(),
&Deserializer::deserialize_string);
const bool no_profiling = function->no_profiling();
const bool frozen = function->frozen();
hl_function.update_with_deserialization(name, origin_name, output_types, required_types,
required_dim, args, func_schedule, init_def, updates,
debug_file, output_buffers, extern_arguments, extern_function_name,
name_mangling, extern_function_device_api, extern_proxy_expr,
trace_loads, trace_stores, trace_realizations, trace_tags, frozen);
trace_loads, trace_stores, trace_realizations, trace_tags,
no_profiling, frozen);
}

Stmt Deserializer::deserialize_stmt(Serialize::Stmt type_code, const void *stmt) {
Expand Down
5 changes: 5 additions & 0 deletions src/Func.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3037,6 +3037,11 @@ Func &Func::add_trace_tag(const std::string &trace_tag) {
return *this;
}

Func &Func::no_profiling() {
func.do_not_profile();
return *this;
}

void Func::debug_to_file(const string &filename) {
invalidate_cache();
func.debug_file() = filename;
Expand Down
9 changes: 9 additions & 0 deletions src/Func.h
Original file line number Diff line number Diff line change
Expand Up @@ -2559,6 +2559,15 @@ class Func {
*/
Func &add_trace_tag(const std::string &trace_tag);

/** Marks this function as a function that should not be profiled
* when using the target feature Profile or ProfileByTimer.
* This is useful when this function is does too little work at once
* such that the overhead of setting the profiling token might
* become significant, or that the measured time is not representative
* due to modern processors (instruction level parallelism, out-of-order
* execution). */
Func &no_profiling();

/** Get a handle on the internal halide function that this Func
* represents. Useful if you want to do introspection on Halide
* functions */
Expand Down
19 changes: 15 additions & 4 deletions src/Function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ struct FunctionContents {
bool trace_loads = false, trace_stores = false, trace_realizations = false;
std::vector<string> trace_tags;

bool no_profiling = false;

bool frozen = false;

void accept(IRVisitor *visitor) const {
Expand Down Expand Up @@ -352,6 +354,7 @@ void Function::update_with_deserialization(const std::string &name,
bool trace_stores,
bool trace_realizations,
const std::vector<std::string> &trace_tags,
bool no_profiling,
bool frozen) {
contents->name = name;
contents->origin_name = origin_name;
Expand All @@ -373,6 +376,7 @@ void Function::update_with_deserialization(const std::string &name,
contents->trace_stores = trace_stores;
contents->trace_realizations = trace_realizations;
contents->trace_tags = trace_tags;
contents->no_profiling = no_profiling;
contents->frozen = frozen;
}

Expand Down Expand Up @@ -509,6 +513,7 @@ void Function::deep_copy(const FunctionPtr &copy, DeepCopyMap &copied_map) const
copy->trace_stores = contents->trace_stores;
copy->trace_realizations = contents->trace_realizations;
copy->trace_tags = contents->trace_tags;
copy->no_profiling = contents->no_profiling;
copy->frozen = contents->frozen;
copy->output_buffers = contents->output_buffers;
copy->func_schedule = contents->func_schedule.deep_copy(copied_map);
Expand Down Expand Up @@ -1139,10 +1144,6 @@ const std::vector<std::string> &Function::get_trace_tags() const {
return contents->trace_tags;
}

void Function::freeze() {
contents->frozen = true;
}

void Function::lock_loop_levels() {
auto &schedule = contents->func_schedule;
schedule.compute_level().lock();
Expand All @@ -1166,6 +1167,16 @@ void Function::lock_loop_levels() {
}
}

void Function::do_not_profile() {
contents->no_profiling = true;
}
bool Function::should_not_profile() const {
return contents->no_profiling;
}

void Function::freeze() {
contents->frozen = true;
}
bool Function::frozen() const {
return contents->frozen;
}
Expand Down
7 changes: 7 additions & 0 deletions src/Function.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ class Function {
bool trace_stores,
bool trace_realizations,
const std::vector<std::string> &trace_tags,
bool no_profiling,
bool frozen);

/** Get a handle on the halide function contents that this Function
Expand Down Expand Up @@ -290,6 +291,12 @@ class Function {
* cannot be mutated further. */
void lock_loop_levels();

/** Mark the function as too small for meaningful profiling. */
void do_not_profile();

/** Check if the function is marked as one that should not be profiled. */
bool should_not_profile() const;

/** Mark function as frozen, which means it cannot accept new
* definitions. */
void freeze();
Expand Down
2 changes: 1 addition & 1 deletion src/Lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ void lower_impl(const vector<Function> &output_funcs,

if (t.has_feature(Target::Profile) || t.has_feature(Target::ProfileByTimer)) {
debug(1) << "Injecting profiling...\n";
s = inject_profiling(s, pipeline_name);
s = inject_profiling(s, pipeline_name, env);
log("Lowering after injecting profiling:", s);
}

Expand Down
74 changes: 57 additions & 17 deletions src/Profiling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include <string>

#include "CodeGen_Internal.h"
#include "ExprUsesVar.h"
#include "Function.h"
#include "IRMutator.h"
#include "IROperator.h"
#include "InjectHostDevBufferCopies.h"
Expand Down Expand Up @@ -71,13 +71,14 @@ class InjectProfiling : public IRMutator {
vector<int> stack; // What produce nodes are we currently inside of.

string pipeline_name;
const map<string, Function> &env;

bool in_fork = false;
bool in_parallel = false;
bool in_leaf_task = false;

InjectProfiling(const string &pipeline_name)
: pipeline_name(pipeline_name) {
InjectProfiling(const string &pipeline_name, const map<string, Function> &env)
: pipeline_name(pipeline_name), env(env) {
stack.push_back(get_func_id("overhead"));
// ID 0 is treated specially in the runtime as overhead
internal_assert(stack.back() == 0);
Expand Down Expand Up @@ -119,10 +120,28 @@ class InjectProfiling : public IRMutator {
bool profiling_memory = true;

// Strip down the tuple name, e.g. f.0 into f
string normalize_name(const string &name) {
vector<string> v = split_string(name, ".");
internal_assert(!v.empty());
return v[0];
string normalize_name(const string &name) const {
size_t idx = name.find('.');
if (idx != std::string::npos) {
internal_assert(idx != 0);
return name.substr(0, idx);
} else {
return name;
}
}

Function lookup_function(const string &name) const {
auto it = env.find(name);
if (it != env.end()) {
return it->second;
}
string norm_name = normalize_name(name);
it = env.find(norm_name);
if (it != env.end()) {
return it->second;
}
internal_error << "No function in the environment found for name '" << name << "'.\n";
return {};
}

int get_func_id(const string &name) {
Expand Down Expand Up @@ -185,7 +204,6 @@ class InjectProfiling : public IRMutator {
}

Stmt visit(const Allocate *op) override {
int idx = get_func_id(op->name);

auto [new_extents, changed] = mutate_with_changes(op->extents);
Expr condition = mutate(op->condition);
Expand All @@ -199,6 +217,13 @@ class InjectProfiling : public IRMutator {
// always conditionally false. remove_dead_allocations() is called after
// inject_profiling() so this is a possible scenario.
if (!is_const_zero(size) && on_stack) {
int idx;
Function func = lookup_function(op->name);
if (func.should_not_profile()) {
idx = stack.back(); // Attribute the stack size contribution to the deepest _profiled_ func.
} else {
idx = get_func_id(op->name);
}
const uint64_t *int_size = as_const_uint(size);
internal_assert(int_size != nullptr); // Stack size is always a const int
func_stack_current[idx] += *int_size;
Expand All @@ -212,6 +237,7 @@ class InjectProfiling : public IRMutator {
vector<Stmt> tasks;
bool track_heap_allocation = !is_const_zero(size) && !on_stack && profiling_memory;
if (track_heap_allocation) {
int idx = get_func_id(op->name);
debug(3) << " Allocation on heap: " << op->name
<< "(" << size << ") in pipeline "
<< pipeline_name << "\n";
Expand Down Expand Up @@ -245,8 +271,6 @@ class InjectProfiling : public IRMutator {
}

Stmt visit(const Free *op) override {
int idx = get_func_id(op->name);

AllocSize alloc = func_alloc_sizes.get(op->name);
internal_assert(alloc.size.type() == UInt(64));
func_alloc_sizes.pop(op->name);
Expand All @@ -256,6 +280,7 @@ class InjectProfiling : public IRMutator {
if (!is_const_zero(alloc.size)) {
if (!alloc.on_stack) {
if (profiling_memory) {
int idx = get_func_id(op->name);
debug(3) << " Free on heap: " << op->name << "(" << alloc.size << ") in pipeline " << pipeline_name << "\n";

vector<Stmt> tasks{
Expand All @@ -271,6 +296,13 @@ class InjectProfiling : public IRMutator {
const uint64_t *int_size = as_const_uint(alloc.size);
internal_assert(int_size != nullptr);

int idx;
Function func = lookup_function(op->name);
if (func.should_not_profile()) {
idx = stack.back(); // Attribute the stack size contribution to the deepest _profiled_ func.
} else {
idx = get_func_id(op->name);
}
func_stack_current[idx] -= *int_size;
debug(3) << " Free on stack: " << op->name << "(" << alloc.size << ") in pipeline " << pipeline_name
<< "; current: " << func_stack_current[idx] << "; peak: " << func_stack_peak[idx] << "\n";
Expand All @@ -283,11 +315,19 @@ class InjectProfiling : public IRMutator {
int idx;
Stmt body;
if (op->is_producer) {
idx = get_func_id(op->name);
stack.push_back(idx);
Stmt set_current = set_current_func(idx);
body = Block::make(set_current, mutate(op->body));
stack.pop_back();
Function func = lookup_function(op->name);
if (func.should_not_profile()) {
body = mutate(op->body);
if (body.same_as(op->body)) {
return op;
}
} else {
idx = get_func_id(op->name);
stack.push_back(idx);
Stmt set_current = set_current_func(idx);
body = Block::make(set_current, mutate(op->body));
stack.pop_back();
}
} else {
// At the beginning of the consume step, set the current task
// back to the outer one.
Expand Down Expand Up @@ -498,8 +538,8 @@ class InjectProfiling : public IRMutator {

} // namespace

Stmt inject_profiling(Stmt s, const string &pipeline_name) {
InjectProfiling profiling(pipeline_name);
Stmt inject_profiling(Stmt s, const string &pipeline_name, const std::map<string, Function> &env) {
InjectProfiling profiling(pipeline_name, env);
s = profiling.mutate(s);

int num_funcs = (int)(profiling.indices.size());
Expand Down
5 changes: 4 additions & 1 deletion src/Profiling.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,24 @@
* mandelbrot: 0.006444ms (10%) peak: 505344 num: 104000 avg: 5376
* argmin: 0.027715ms (46%) stack: 20
*/
#include <map>
#include <string>

#include "Expr.h"

namespace Halide {
namespace Internal {

class Function;

/** Take a statement representing a halide pipeline insert
* high-resolution timing into the generated code (via spawning a
* thread that acts as a sampling profiler); summaries of execution
* times and counts will be logged at the end. Should be done before
* storage flattening, but after all bounds inference.
*
*/
Stmt inject_profiling(Stmt, const std::string &);
Stmt inject_profiling(Stmt, const std::string &, const std::map<std::string, Function> &env);

} // namespace Internal
} // namespace Halide
Expand Down
5 changes: 4 additions & 1 deletion src/Serialization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1029,6 +1029,7 @@ Offset<Serialize::Func> Serializer::serialize_function(FlatBufferBuilder &builde
for (const auto &tag : function.get_trace_tags()) {
trace_tags_serialized.push_back(serialize_string(builder, tag));
}
const bool no_profiling = function.should_not_profile();
const bool frozen = function.frozen();
auto func = Serialize::CreateFunc(builder,
name_serialized,
Expand All @@ -1050,7 +1051,9 @@ Offset<Serialize::Func> Serializer::serialize_function(FlatBufferBuilder &builde
trace_loads,
trace_stores,
trace_realizations,
builder.CreateVector(trace_tags_serialized), frozen);
builder.CreateVector(trace_tags_serialized),
no_profiling,
frozen);
return func;
}

Expand Down
3 changes: 2 additions & 1 deletion src/halide_ir.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ enum SerializationVersionMinor: int {
Value = 0
}
enum SerializationVersionPatch: int {
Value = 0
Value = 1
}

// from src/IR.cpp
Expand Down Expand Up @@ -713,6 +713,7 @@ table Func {
trace_stores: bool = false;
trace_realizations: bool = false;
trace_tags: [string];
no_profiling: bool = false;
frozen: bool = false;
}

Expand Down

0 comments on commit 4988ab5

Please sign in to comment.