Skip to content

Commit

Permalink
[PROFILER] Add shape, structural hash, and layout information to prof…
Browse files Browse the repository at this point in the history
…iling (apache#7894)

* [PROFILER] Add shape, structural hash, and layout information to profiling

Add a new pass that which inserts the layout and structual hash of the op into the
attrs of Functions.

* includes

* fix gcc5 issue

* old gcc fixes
  • Loading branch information
tkonolige authored and trevor-m committed May 11, 2021
1 parent 4fc9078 commit 3955769
Show file tree
Hide file tree
Showing 10 changed files with 198 additions and 11 deletions.
4 changes: 4 additions & 0 deletions include/tvm/runtime/vm/executable.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@
#ifndef TVM_RUNTIME_VM_EXECUTABLE_H_
#define TVM_RUNTIME_VM_EXECUTABLE_H_

#include <tvm/runtime/container.h>
#include <tvm/runtime/module.h>
#include <tvm/runtime/object.h>
#include <tvm/runtime/packed_func.h>
#include <tvm/runtime/vm/bytecode.h>

#include <map>
#include <string>
#include <unordered_map>
#include <vector>
Expand Down Expand Up @@ -181,6 +183,8 @@ class Executable : public ModuleNode {
* corresponds to the position of the `packed_funcs` list in a `VirtualMachine` object.
*/
std::unordered_map<std::string, Index> primitive_map;
/*! \brief The structural hashes of the operators in this function. */
std::map<Index, Map<String, ObjectRef>> op_attrs;
/*! \brief The virtual machine's function table. */
std::vector<VMFunction> functions;
/*! \brief The device type for each constant. */
Expand Down
4 changes: 4 additions & 0 deletions src/relay/backend/build_module.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@

namespace tvm {
namespace relay {
namespace transform {
Pass LabelOps();
}
namespace backend {

using TargetsMap = Map<tvm::Integer, tvm::Target>;
Expand Down Expand Up @@ -368,6 +371,7 @@ class RelayBuildModule : public runtime::ModuleNode {
// and vendor-provided libraries. So we don't handle for now.
relay_module = transform::Inline()(relay_module);
relay_module = transform::InferType()(relay_module);
relay_module = transform::LabelOps()(relay_module);

ICHECK(relay_module.defined());

Expand Down
21 changes: 16 additions & 5 deletions src/relay/backend/graph_executor_codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <tvm/ir/module.h>
#include <tvm/relay/expr_functor.h>
#include <tvm/runtime/device_api.h>
#include <tvm/runtime/object.h>

#include <list>
#include <string>
Expand Down Expand Up @@ -140,7 +141,7 @@ class GraphOpNode : public GraphNode {
attrs_ = nd_attrs;
op_name_ = op_name;
inputs_ = inputs;
op_attrs_ = attrs_;
op_attrs_ = attrs;
num_outputs_ = num_outputs;
op_attrs_["func_name"] = op_name_;
op_attrs_["flatten_data"] = std::string("0");
Expand Down Expand Up @@ -337,15 +338,15 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<
}

std::vector<GraphNodeRef> GraphAddCallNode(const CallNode* op, const std::string& op_name,
const std::string& func_name) {
const std::string& func_name, GraphAttrs attrs) {
std::vector<GraphNodeRef> inputs;
for (auto arg : op->args) {
auto res = VisitExpr(arg);
for (auto nr : res) {
inputs.push_back(nr);
}
}
auto node = GraphOpNode::make_node_ptr(op_name, GraphAttrs(), func_name, inputs, GraphAttrs());
auto node = GraphOpNode::make_node_ptr(op_name, GraphAttrs(), func_name, inputs, attrs);
return AddNode(node, GetRef<Expr>(op));
}

Expand Down Expand Up @@ -377,6 +378,15 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<
<< "(i.e functions composed of fusable operator invocations)";
}

// Copy attrs from function into the graph node
// For now we only handle strings
GraphAttrs attrs;
for (auto p : func->attrs->dict) {
if (p.second.as<StringObj>()) {
attrs[p.first] = std::string(Downcast<String>(p.second));
}
}

auto pf0 = GetPackedFunc("relay.backend._make_CCacheKey");
auto pf1 = GetPackedFunc("relay.backend._CompileEngineLower");
Target target;
Expand All @@ -387,7 +397,7 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<
CachedFunc ext_func = (*pf1)(compile_engine_, key);
ICHECK(ext_func.defined()) << "External function is not defined.";
UpdateConstants(func, &params_);
return GraphAddCallNode(op, ext_func->func_name, ext_func->func_name);
return GraphAddCallNode(op, ext_func->func_name, ext_func->func_name, attrs);
}

// In the current flat memory allocation scenario
Expand Down Expand Up @@ -430,7 +440,8 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<
lowered_funcs_[target->str()] = IRModule(Map<GlobalVar, BaseFunc>({}));
}
lowered_funcs_[target->str()]->Update(lowered_func->funcs);
return GraphAddCallNode(op, _GetUniqueName(lowered_func->func_name), lowered_func->func_name);
return GraphAddCallNode(op, _GetUniqueName(lowered_func->func_name), lowered_func->func_name,
attrs);
}

std::vector<GraphNodeRef> VisitExpr_(const LetNode* op) override {
Expand Down
17 changes: 17 additions & 0 deletions src/relay/backend/vm/compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include <tvm/te/operation.h>

#include <iostream>
#include <map>
#include <memory>
#include <string>
#include <tuple>
Expand All @@ -57,6 +58,7 @@ namespace transform {

Pass LambdaLift();
Pass InlinePrimitives();
Pass LabelOps();

Pass MemoryPlan() {
auto f = tvm::runtime::Registry::Get("relay.transform.MemoryPlan");
Expand Down Expand Up @@ -301,6 +303,11 @@ class VMFunctionCompiler : ExprFunctor<void(const Expr& expr)> {

return VMFunction(var->name_hint, params_, instructions_, registers_num_, params_device_type);
}
/*! \brief Attrs objects for each op. */
std::map<Index, Map<String, ObjectRef>> op_attrs;

/*! \brief Attrs objects for each callsite. */
std::map<Index, Map<String, ObjectRef>> callsite_attrs;

protected:
size_t NewRegister() { return registers_num_++; }
Expand Down Expand Up @@ -557,6 +564,9 @@ class VMFunctionCompiler : ExprFunctor<void(const Expr& expr)> {
}
}

// Extract functions attrs
op_attrs[op_index] = func->attrs->dict;

Emit(Instruction::InvokePacked(op_index, argument_registers.size(), output_tuple->fields.size(),
argument_registers));
}
Expand Down Expand Up @@ -931,6 +941,11 @@ void VMCompiler::Lower(IRModule mod, const TargetsMap& targets, const tvm::Targe
size_t func_index = context_.global_map.at(gvar);
ICHECK(func_index < exec_->functions.size());
exec_->functions[func_index] = vm_func;

// update structural hashes for tvm ops
for (auto p : func_compiler.op_attrs) {
exec_->op_attrs.insert(p);
}
}
}

Expand Down Expand Up @@ -1108,6 +1123,7 @@ IRModule VMCompiler::OptimizeModule(IRModule mod, const TargetsMap& targets_arg,

pass_seqs.push_back(MemoryOpt(target_host, targets));
pass_seqs.push_back(transform::InferType());
pass_seqs.push_back(transform::LabelOps());

transform::Sequential seq(pass_seqs);
tvm::With<relay::transform::PassContext> ctx(pass_ctx);
Expand Down Expand Up @@ -1175,6 +1191,7 @@ void VMCompiler::Codegen() {
}
lib = codegen::CreateMetadataModule(params_, lib, ext_mods, target_host_);
exec_->SetLib(lib);
CompileEngine::Global()->Clear();
}

ExprDeviceMap VMCompiler::AnalyzeContext() const {
Expand Down
119 changes: 119 additions & 0 deletions src/relay/transforms/label_ops.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include <tvm/ir/attrs.h>
#include <tvm/relay/expr_functor.h>
#include <tvm/relay/transform.h>
#include <tvm/runtime/container.h>

namespace tvm {
namespace relay {
namespace transform {

namespace {

/*! \brief Collect all attributes whose name contains "layout".
*/
struct CollectAttrs : public AttrVisitor {
void Visit(const char* key, std::string* value) final {
if (std::string(key).find("layout") != std::string::npos) {
attrs[key] = String(*value);
}
}
void Visit(const char* key, double* value) final {}
void Visit(const char* key, uint64_t* value) final {}
void Visit(const char* key, int* value) final {}
void Visit(const char* key, int64_t* value) final {}
void Visit(const char* key, bool* value) final {}
void Visit(const char* key, runtime::NDArray* value) final {}
void Visit(const char* key, ObjectRef* value) final {
if (std::string(key).find("layout") != std::string::npos) {
attrs[key] = *value;
}
}
void Visit(const char* key, DataType* value) final {}
void Visit(const char* key, void** value) final {}
std::unordered_map<std::string, ObjectRef> attrs;
};
} // namespace

/*! \brief Visitor to add structural hash and layout information to `Function`
* nodes. Sets the "hash" field on the attr to the structural hash of the
* function. Propogates any attributes with "layout" in their name from call
* nodes in the Function to the Function's attrs.
*/
class LabelOpsMutator : public MixedModeMutator {
private:
using MixedModeMutator::VisitExpr_;
std::unordered_map<std::string, ObjectRef> body_attrs;
Expr VisitExpr_(const FunctionNode* op) final {
// body_attrs collects attrs from Calls in the body of this Function. Reset
// it so we only get attrs from this Function.
body_attrs = {};
auto updated = ExprMutator::VisitExpr_(op);
size_t hash = StructuralHash()(updated);

// format hash as fixed length hex string so it is easier to read
std::stringstream s;
s << std::setfill('0') << std::setw(sizeof(size_t) * 2) << std::hex << hash;

Function f = WithAttr(Downcast<Function>(updated), "hash", String(s.str()));
for (auto p : body_attrs) {
f = WithAttr(f, p.first, p.second);
}
return std::move(f);
}

Expr Rewrite_(const CallNode* op, const Expr& post) final {
auto updated = MixedModeMutator::Rewrite_(op, post);
if (op->attrs.defined()) {
CollectAttrs collect;
const_cast<BaseAttrsNode*>(op->attrs.get())->VisitAttrs(&collect);
for (auto p : collect.attrs) {
if (body_attrs.find(p.first) != body_attrs.end() && p.second == body_attrs[p.first]) {
LOG(WARNING) << "LabelOps found two call sites with different values for " << p.first
<< " (" << p.second << " vs " << body_attrs[p.first]
<< "). Only the first will be recorded.";
}
body_attrs[p.first] = p.second;
}
}
return updated;
}
};

/*! \brief Add structural hash and layout information to Function nodes. This
* information is used later by the profiler.
*
* The hash and layout information is added to the attrs field of the Function.
* The key "hash" contains the structural hash of the node. Any attributes with
* "layout" in their name are also added to attrs (for example,
* `attrs["src_layout"]` contains the `src_layout` attribute of the TVM op
* corresponding to this function call).
*/
Pass LabelOps() {
runtime::TypedPackedFunc<Function(Function, IRModule, PassContext)> pass_func =
[=](Function f, IRModule m, PassContext pc) {
return Downcast<Function>(LabelOpsMutator().Mutate(f));
};
return CreateFunctionPass(pass_func, 1, "LabelOps", {});
}

} // namespace transform
} // namespace relay
} // namespace tvm
16 changes: 13 additions & 3 deletions src/runtime/graph_executor/debug/graph_executor_debug.cc
Original file line number Diff line number Diff line change
Expand Up @@ -300,9 +300,19 @@ class GraphExecutorDebug : public GraphExecutor {
}

uint32_t eid = entry_id(i, 0);
const Device& device = data_entry_[eid]->device;
prof.StartCall(nodes_[i].param.func_name, device,
{{"Argument Shapes", profiling::ShapeString(shapes)}});
const Device& dev = data_entry_[eid]->device;

std::unordered_map<std::string, ObjectRef> metrics;
for (auto p : nodes_[i].param.attrs) {
if (std::string(p.first).find("layout") != std::string::npos) {
metrics[p.first] = p.second;
}
}
if (nodes_[i].param.attrs.find("hash") != nodes_[i].param.attrs.end()) {
metrics["Hash"] = Downcast<String>(nodes_[i].param.attrs.at("hash"));
}
metrics["Argument Shapes"] = profiling::ShapeString(shapes);
prof.StartCall(nodes_[i].param.func_name, dev, metrics);
op_execs_[i]();
prof.StopCall();
}
Expand Down
3 changes: 3 additions & 0 deletions src/runtime/graph_executor/graph_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ namespace runtime {
/*! \brief operator attributes about tvm op */
struct TVMOpParam {
std::string func_name;
std::unordered_map<std::string, ObjectRef> attrs;
uint32_t num_inputs;
uint32_t num_outputs;
uint32_t flatten_data;
Expand Down Expand Up @@ -266,6 +267,8 @@ class TVM_DLL GraphExecutor : public ModuleNode {
} else if (key == "flatten_data") {
param->flatten_data = strtoul(value.c_str(), nullptr, 10);
bitmask |= 8;
} else {
param->attrs[key] = String(value);
}
}
ICHECK_EQ(bitmask, 1 | 2 | 4 | 8) << "invalid format";
Expand Down
3 changes: 3 additions & 0 deletions src/runtime/profiling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,9 @@ Report Profiler::Report(bool aggregate, bool sort) {
row["Count"] = ObjectRef(make_object<CountNode>(1));
row["Name"] = cf.name;
row["Device"] = String(DeviceString(cf.dev));
for (auto p : cf.extra_metrics) {
row[p.first] = p.second;
}
rows.push_back(row);
}

Expand Down
16 changes: 14 additions & 2 deletions src/runtime/vm/profiler/vm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,20 @@ void VirtualMachineDebug::InvokePacked(Index packed_index, const PackedFunc& fun
}
}

prof_.StartCall(packed_index_map_[packed_index], dev,
{{"Argument Shapes", profiling::ShapeString(shapes)}});
std::unordered_map<std::string, ObjectRef> metrics;
auto& op_attrs = exec_->op_attrs.at(packed_index);
for (auto p : op_attrs) {
if (std::string(p.first).find("layout") != std::string::npos) {
metrics[p.first] = p.second;
}
}
auto it = op_attrs.find("hash");
if (it != op_attrs.end()) {
metrics["Hash"] = Downcast<String>((*it).second);
}
metrics["Argument Shapes"] = profiling::ShapeString(shapes);

prof_.StartCall(packed_index_map_[packed_index], dev, metrics);
}
VirtualMachine::InvokePacked(packed_index, func, arg_count, output_size, args);
if (prof_.IsRunning()) {
Expand Down
6 changes: 5 additions & 1 deletion tests/python/unittest/test_runtime_profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,11 @@ def test_vm(target, dev):
f = StringIO(report.csv())
reader = csv.reader(f, delimiter=",")
# force parsing
in_header = True
for row in reader:
pass
if in_header:
assert "Hash" in row
in_header = False


@tvm.testing.parametrize_targets
Expand All @@ -57,3 +60,4 @@ def test_graph_executor(target, dev):
report = gr.profile(data=data)
assert "fused_nn_softmax" in str(report)
assert "Total" in str(report)
assert "Hash" in str(report)

0 comments on commit 3955769

Please sign in to comment.