From 9a956a8ca939de6ee9b659e8a4559498323b8d8f Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Wed, 13 Jul 2016 17:31:08 -0700 Subject: [PATCH] [PASS] Add order mutation (#7) * [PASS] Add order mutation * A few benchmarks on compose speed --- nnvm/include/nnvm/symbolic.h | 8 +- nnvm/src/c_api/c_api_common.h | 3 + nnvm/src/c_api/c_api_symbolic.cc | 26 +++--- nnvm/src/core/symbolic.cc | 53 +++++++----- nnvm/src/pass/order_mutation.cc | 142 +++++++++++++++++++++++++++++++ nnvm/src/pass/saveload_json.cc | 2 +- nnvm/src/test_main.cc | 99 +++++++++++++++------ nnvm/tests/python/test_graph.py | 18 ++++ nnvm/tests/python/test_symbol.py | 8 ++ 9 files changed, 296 insertions(+), 63 deletions(-) create mode 100644 nnvm/src/pass/order_mutation.cc diff --git a/nnvm/include/nnvm/symbolic.h b/nnvm/include/nnvm/symbolic.h index 5f2deaf948a6..2d5867a11892 100644 --- a/nnvm/include/nnvm/symbolic.h +++ b/nnvm/include/nnvm/symbolic.h @@ -73,8 +73,8 @@ class Symbol { * \param kwargs keyword arguments for the symbol * \param name name of returned symbol. */ - void Compose(const std::vector& args, - const std::unordered_map& kwargs, + void Compose(const array_view& args, + const std::unordered_map& kwargs, const std::string& name); /*! * \brief Apply the symbol as a function, compose with arguments @@ -84,8 +84,8 @@ class Symbol { * \param name name of returned symbol. * \return a new Symbol which is the composition of current symbol with its arguments */ - Symbol operator () (const std::vector& args, - const std::unordered_map& kwargs, + Symbol operator () (const array_view& args, + const std::unordered_map& kwargs, const std::string& name) const; /*! * \brief Add control flow depenencies to operators involved in symbols. diff --git a/nnvm/src/c_api/c_api_common.h b/nnvm/src/c_api/c_api_common.h index 170ceb2e58d3..40c91d8ffc11 100644 --- a/nnvm/src/c_api/c_api_common.h +++ b/nnvm/src/c_api/c_api_common.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -36,6 +37,8 @@ struct NNAPIThreadLocalEntry { std::vector ret_vec_charp; /*! \brief result holder for returning handles */ std::vector ret_handles; + /*! \brief argument holder to hold symbol */ + std::unordered_map kwarg_symbol; }; /*! \brief Thread local store that can be used to hold return values. */ diff --git a/nnvm/src/c_api/c_api_symbolic.cc b/nnvm/src/c_api/c_api_symbolic.cc index 335d163c7dae..bc6eed5c742a 100644 --- a/nnvm/src/c_api/c_api_symbolic.cc +++ b/nnvm/src/c_api/c_api_symbolic.cc @@ -217,22 +217,26 @@ int NNSymbolCompose(SymbolHandle sym, const char** keys, SymbolHandle* args) { API_BEGIN(); - std::string s_name; - if (name != nullptr) s_name = name; - + NNAPIThreadLocalEntry *ret = NNAPIThreadLocalStore::Get(); + std::string& s_name = ret->ret_str; + std::unordered_map& kwargs + = ret->kwarg_symbol; + if (name != nullptr) { + s_name = name; + } else { + s_name.clear(); + } Symbol* s = static_cast(sym); if (keys == nullptr && num_args != 0) { - std::vector pos_args; - for (nn_uint i = 0; i < num_args; ++i) { - pos_args.push_back(*((Symbol*)args[i])); // NOLINT(*) - } - s->Compose(pos_args, {}, s_name); + kwargs.clear(); + array_view parg( + (Symbol**)args, (Symbol**)args + num_args); // NOLINT(*) + s->Compose(parg, kwargs, s_name); } else { - std::unordered_map kwargs; for (nn_uint i = 0; i < num_args; ++i) { - kwargs[keys[i]] = *((Symbol*)args[i]); // NOLINT(*) + kwargs[keys[i]] = (Symbol*)args[i]; // NOLINT(*) } - s->Compose({}, kwargs, s_name); + s->Compose(array_view(), kwargs, s_name); } API_END(); } diff --git a/nnvm/src/core/symbolic.cc b/nnvm/src/core/symbolic.cc index 0d6f2a6c786e..3cd1d982e433 100644 --- a/nnvm/src/core/symbolic.cc +++ b/nnvm/src/core/symbolic.cc @@ -45,7 +45,7 @@ inline void UpdateNodeVersion(Node *n) { CHECK(e.node->is_variable()) << "Mutation target can only be Variable"; // increase the version of the variable. - ++nnvm::get(e.node->attrs.parsed).version; + e.version = ++nnvm::get(e.node->attrs.parsed).version; } } } @@ -98,7 +98,10 @@ Symbol Symbol::Copy() const { std::unordered_map > old_new; // use DFSVisit to copy all the nodes DFSVisit(this->outputs, [&old_new](const std::shared_ptr& node) { - old_new[node.get()] = std::make_shared(*node); + std::shared_ptr np = Node::Create(); + np->op = node->op; + np->attrs = node->attrs; + old_new[node.get()] = std::move(np); }); // connect nodes of new graph for (const auto &kv : old_new) { @@ -106,6 +109,9 @@ Symbol Symbol::Copy() const { Node *ptr = e.node.get(); kv.second->inputs.emplace_back(NodeEntry{old_new[ptr], e.index, e.version}); } + for (const std::shared_ptr& p : kv.first->control_deps) { + kv.second->control_deps.emplace_back(old_new[p.get()]); + } } // set the head Symbol ret; @@ -120,7 +126,7 @@ void Symbol::Print(std::ostream &os) const { os << "AtomicFunctor "<< " Op:" << outputs[0].node->op->name << '\n'; } else { // use DFSVisit to copy all the nodes - os << "Outputs:\n"; + os << "Symbol Outputs:\n"; for (size_t i = 0; i < outputs.size(); ++i) { os << "\toutput[" << i << "]=" << outputs[i].node->attrs.name << '(' << outputs[i].index << ")\n"; @@ -129,7 +135,8 @@ void Symbol::Print(std::ostream &os) const { if (node->is_variable()) { os << "Variable:" << node->attrs.name << '\n'; } else { - os << "Name: " << node->attrs.name << " Op:" << node->op->name << '\n' + os << "--------------------\n"; + os << "Op:" << node->op->name << ", Name=" << node->attrs.name << '\n' << "Inputs:\n"; for (size_t i = 0; i < node->inputs.size(); ++i) { const NodeEntry& e = node->inputs[i]; @@ -141,9 +148,17 @@ void Symbol::Print(std::ostream &os) const { os << '\n'; } } - os << "Attrs:\n"; - for (auto &kv : node->attrs.dict) { - os << '\t' << kv.first << '=' << kv.second << '\n'; + if (!node->attrs.dict.empty()) { + os << "Attrs:\n"; + for (auto &kv : node->attrs.dict) { + os << '\t' << kv.first << '=' << kv.second << '\n'; + } + } + if (node->control_deps.size() != 0) { + os << "Control deps:\n"; + for (size_t i = 0; i < node->control_deps.size(); ++i) { + os << "\tcdep[" << i << "]=" << node->control_deps[i]->attrs.name << '\n'; + } } } }); @@ -203,8 +218,8 @@ std::vector Symbol::ListOutputs() const { } // compositional logic -void Symbol::Compose(const std::vector& args, - const std::unordered_map& kwargs, +void Symbol::Compose(const array_view& args, + const std::unordered_map& kwargs, const std::string& name) { static auto& flist_inputs = Op::GetAttr("FListInputNames"); @@ -213,11 +228,11 @@ void Symbol::Compose(const std::vector& args, CHECK(!outputs[0].node->is_variable()) << "Variable cannot be composed"; // parameter check. for (size_t i = 0; i < args.size(); ++i) { - CHECK_EQ(args[i].outputs.size(), 1) + CHECK_EQ(args[i]->outputs.size(), 1) << "Argument " << i << " is a tuple, single value is required"; } for (const auto& kv : kwargs) { - CHECK_EQ(kv.second.outputs.size(), 1) + CHECK_EQ(kv.second->outputs.size(), 1) << "Keyword Argument " << kv.first << " is a tuple, single value is required"; } // assign new name @@ -234,7 +249,7 @@ void Symbol::Compose(const std::vector& args, << "Incorrect number of arguments, requires " << n_req << ", provided " << args.size(); for (size_t i = 0; i < args.size(); ++i) { - n->inputs[i] = args[i].outputs[0]; + n->inputs[i] = args[i]->outputs[0]; } // switch to keyword argument matching if (args.size() != n_req) { @@ -247,7 +262,7 @@ void Symbol::Compose(const std::vector& args, for (size_t i = args.size(); i < n_req; ++i) { auto it = kwargs.find(arg_names[i]); if (it != kwargs.end() && it->first == arg_names[i]) { - n->inputs[i] = it->second.outputs[0]; + n->inputs[i] = it->second->outputs[0]; ++nmatched; } else { n->inputs[i] = NodeEntry{ @@ -266,8 +281,8 @@ void Symbol::Compose(const std::vector& args, } else { CHECK_EQ(kwargs.size(), 0) << "Variable length function do not accept kwargs"; n->inputs.reserve(args.size()); - for (const Symbol& s : args) { - n->inputs.push_back(s.outputs[0]); + for (const Symbol* s : args) { + n->inputs.push_back(s->outputs[0]); } } UpdateNodeVersion(n); @@ -283,13 +298,13 @@ void Symbol::Compose(const std::vector& args, (const std::shared_ptr &node) { if (node->is_variable()) { if (arg_counter < args.size()) { - replace_map[node.get()] = &(args[arg_counter].outputs[0]); + replace_map[node.get()] = &(args[arg_counter]->outputs[0]); ++arg_counter; } else { // match kwargs auto kit = kwargs.find(node->attrs.name); if (kit != kwargs.end()) { - replace_map[node.get()] = &(kit->second.outputs[0]); + replace_map[node.get()] = &(kit->second->outputs[0]); ++nmatched; } } @@ -334,8 +349,8 @@ void Symbol::Compose(const std::vector& args, } } -Symbol Symbol::operator () (const std::vector& args, - const std::unordered_map& kwargs, +Symbol Symbol::operator () (const array_view& args, + const std::unordered_map& kwargs, const std::string& name) const { Symbol s = this->Copy(); s.Compose(args, kwargs, name); diff --git a/nnvm/src/pass/order_mutation.cc b/nnvm/src/pass/order_mutation.cc new file mode 100644 index 000000000000..d0d527472389 --- /dev/null +++ b/nnvm/src/pass/order_mutation.cc @@ -0,0 +1,142 @@ +/*! + * Copyright (c) 2016 by Contributors + * \file saveload_json.cc + * \brief Add control flow dependencies between nodes + * To correctly order mutation and read to resolve + * write after read problem and read after write problems. + */ +#include +#include + +namespace nnvm { + +template +inline T get_with_default(const std::unordered_map &map, + Node* key, + const T& def) { + auto it = map.find(key); + if (it != map.end()) return it->second; + return def; +} + +Graph OrderMutation(const Graph& src) { + std::unordered_map > version_hist; + DFSVisit(src.outputs, [&version_hist](const std::shared_ptr& n) { + for (const NodeEntry& e : n->inputs) { + if (e.node->is_variable()) { + if (e.version != 0 && version_hist.count(e.node.get()) == 0) { + version_hist[e.node.get()] = std::vector{}; + } + } + } + }); + // no mutation happens, everything if fine. + if (version_hist.size() == 0) return src; + // start preparing for remapping the nodes. + std::unordered_map > old_new; + auto prepare = [&version_hist, &old_new] (const std::shared_ptr& n) { + static auto& fmutate_inputs = Op::GetAttr("FMutateInput"); + bool need_repl = false; + for (size_t i = 0; i < n->inputs.size(); ++i) { + const NodeEntry& e = n->inputs[i]; + if (e.node->is_variable()) { + if (e.version != 0) need_repl = true; + auto it = version_hist.find(e.node.get()); + if (it != version_hist.end()) { + std::vector& vec = it->second; + uint32_t is_mutate = + fmutate_inputs.count(n->op) ? fmutate_inputs[n->op](n->attrs, i) : 0; + vec.emplace_back(NodeEntry{n, is_mutate, e.version}); + } + } else { + if (old_new.count(e.node.get()) != 0) need_repl = true; + } + } + for (const std::shared_ptr& p : n->control_deps) { + if (old_new.count(p.get()) != 0) need_repl = true; + } + if (need_repl) { + std::shared_ptr np = Node::Create(); + np->op = n->op; + np->attrs = n->attrs; + old_new[n.get()] = std::move(np); + } + }; + DFSVisit(src.outputs, prepare); + // comparator of history entry + auto comparator = [](const NodeEntry& a, const NodeEntry &b) { + if (a.version < b.version) return true; + if (a.version > b.version) return false; + return a.index > b.index; + }; + + for (auto &kv : version_hist) { + std::sort(kv.second.begin(), kv.second.end(), comparator); + } + // copy the nodes, as well as add control deps + for (auto &kv : old_new) { + // copy the nodes + for (const NodeEntry& e : kv.first->inputs) { + auto it = old_new.find(e.node.get()); + if (it != old_new.end()) { + kv.second->inputs.emplace_back(NodeEntry{it->second, e.index, e.version}); + } else { + kv.second->inputs.push_back(e); + } + } + for (const std::shared_ptr& p : kv.first->control_deps) { + kv.second->control_deps.emplace_back( + get_with_default(old_new, p.get(), p)); + } + // add control deps + static auto& fmutate_inputs = Op::GetAttr("FMutateInput"); + for (size_t i = 0; i < kv.first->inputs.size(); ++i) { + const NodeEntry& e = kv.first->inputs[i]; + if (e.node->is_variable() && version_hist.count(e.node.get()) != 0) { + FMutateInput fmutate = fmutate_inputs.get(kv.first->op, nullptr); + uint32_t is_mutate = (fmutate == nullptr) ? 0 : fmutate(kv.first->attrs, i); + std::vector& vec = version_hist.at(e.node.get()); + + auto it = std::lower_bound(vec.begin(), vec.end(), + NodeEntry{nullptr, 1, e.version}, + comparator); + if (is_mutate != 0) { + int read_dep = 0; + while (it != vec.begin()) { + --it; + if (it->index != 0) break; + ++read_dep; + // depend on previous read + kv.second->control_deps.push_back( + get_with_default(old_new, it->node.get(), it->node)); + } + if (read_dep == 0 && it->index != 0) { + // depend on last write + kv.second->control_deps.push_back( + get_with_default(old_new, it->node.get(), it->node)); + } + } else { + // depend on last write + if (it->index != 0) { + kv.second->control_deps.push_back( + get_with_default(old_new, it->node.get(), it->node)); + } + } + } + } + } + Graph ret; + for (const NodeEntry &e : src.outputs) { + ret.outputs.emplace_back(NodeEntry{ + get_with_default(old_new, e.node.get(), e.node), e.index, e.version}); + } + return ret; +} + +NNVM_REGISTER_PASS(OrderMutation) +.describe("Return a new graph that adds control dependencies, "\ + "to order the mutation and reads if mutation exists.") +.set_body(OrderMutation) +.set_change_graph(true); + +} // namespace nnvm diff --git a/nnvm/src/pass/saveload_json.cc b/nnvm/src/pass/saveload_json.cc index fee94fbf6eb9..4aa979e9cee9 100644 --- a/nnvm/src/pass/saveload_json.cc +++ b/nnvm/src/pass/saveload_json.cc @@ -1,7 +1,7 @@ /*! * Copyright (c) 2016 by Contributors * \file saveload_json.cc - * \brief Passes that defines save and load graph to/from JSON file. + * \brief Save and load graph to/from JSON file. */ #include #include diff --git a/nnvm/src/test_main.cc b/nnvm/src/test_main.cc index 51d4ec1513cf..b79573c05513 100644 --- a/nnvm/src/test_main.cc +++ b/nnvm/src/test_main.cc @@ -2,42 +2,85 @@ #include #include #include +#include #include +#include #include -void test_op() { - using namespace nnvm; - auto add = Op::Get("add"); - static auto& nick = Op::GetAttr("nick_name"); - LOG(INFO) << "nick=" << nick[add]; +void test_speed() { + auto add = nnvm::Op::Get("add"); + double tstart = dmlc::GetTime(); + size_t rep = 1000; + size_t n = 1000; + std::unordered_map tmp; + std::vector vec{2}; + std::string name = "xx"; + for (size_t t = 0; t < rep; ++t) { + nnvm::Symbol s = nnvm::Symbol::CreateVariable("x"); + for (size_t i = 0; i < n; ++i) { + nnvm::Symbol nw = nnvm::Symbol::CreateFunctor(add, {}); + vec[0] = &s; + vec[1] =&s; + tmp.clear(); + nw.Compose(vec, tmp, name); + s = nw; + } + } + double tend = dmlc::GetTime(); + LOG(INFO) << "compose speed = " << n * rep / (tend - tstart) << " ops/sec"; } -void test_tuple() { - using nnvm::Tuple; - using nnvm::TShape; - Tuple x{1, 2, 3}; - Tuple y{1, 2, 3, 5, 6}; - x = std::move(y); - - CHECK_EQ(x.ndim(), 5); - Tuple z{1, 2, 3, 5, 6}; - std::ostringstream os; - os << z; - CHECK_EQ(os.str(), "(1,2,3,5,6)"); - std::istringstream is(os.str()); - is >> y; - CHECK_EQ(x, y); - Tuple ss{1, 2, 3}; - TShape s = ss; - s = std::move(ss); - CHECK((s == TShape{1, 2, 3})); +void test_node_speed() { + using namespace nnvm; + auto add = nnvm::Op::Get("add"); + double tstart = dmlc::GetTime(); + size_t rep = 1000; + size_t n = 100; + for (size_t t = 0; t < rep; ++t) { + nnvm::Symbol s = nnvm::Symbol::CreateVariable("x"); + for (size_t i = 0; i < n; ++i) { + auto xx = NodeEntry{Node::Create(), 0, 0}; + NodeEntry x = s.outputs[0]; + xx.node->op = add; + xx.node->inputs.emplace_back(x); + xx.node->inputs.emplace_back(x); + Symbol ss; + ss.outputs.push_back(xx); + s = ss; + } + } + double tend = dmlc::GetTime(); + LOG(INFO) << "test_node_speed speed = " << n * rep / (tend - tstart) << " ops/sec"; } - -void test_graph() { - nnvm::Symbol s; +void test_api_speed() { + auto add = (void*)nnvm::Op::Get("add"); // NOLINT(*) + double tstart = dmlc::GetTime(); + size_t rep = 1000; + size_t n = 1000; + std::unordered_map tmp; + std::vector vec{2}; + std::string name = "xx"; + for (size_t t = 0; t < rep; ++t) { + SymbolHandle s; + NNSymbolCreateVariable("xx", &s); + for (size_t i = 0; i < n; ++i) { + SymbolHandle arg[2]; + SymbolHandle ss; + NNSymbolCreateAtomicSymbol(add, 0, nullptr, nullptr, &ss); + arg[0] = s; + arg[1] = s; + NNSymbolCompose(ss, "nn", 2, nullptr, arg); + s = ss; + } + } + double tend = dmlc::GetTime(); + LOG(INFO) << "API compose speed = " << n * rep / (tend - tstart) << " ops/sec"; } + int main() { - test_tuple(); + test_speed(); + test_node_speed(); + test_api_speed(); return 0; } diff --git a/nnvm/tests/python/test_graph.py b/nnvm/tests/python/test_graph.py index 8fa392db3425..b6082364458e 100644 --- a/nnvm/tests/python/test_graph.py +++ b/nnvm/tests/python/test_graph.py @@ -1,3 +1,4 @@ +import json import nnvm.symbol as sym import nnvm.graph as graph @@ -17,7 +18,24 @@ def test_graph_json_attr(): g._set_json_attr('ilist', [1,2,3], 'list_int') assert g.json_attr('ilist') == [1,2,3] +def test_order_mutation_pass(): + x = sym.Variable('x') + y = sym.conv2d(data=x, name='conv', dev='gpu') + y = sym.add(y, x, name='add1') + # write after read + z = sym.assign(x, y, name='assign') + # read after write + t = sym.add(y, x, name='add2') + g = graph.create(sym.Group([t, z])) + jgraph = json.loads(g.apply(['OrderMutation', 'SaveJSON']).json_attr('json')) + jnodes = jgraph['nodes'] + nindex = {n['name']: i for i, n in enumerate(jnodes)} + assert nindex['assign'] in jnodes[nindex['add2']]['control_deps'] + assert nindex['conv'] in jnodes[nindex['assign']]['control_deps'] + assert nindex['add1'] in jnodes[nindex['assign']]['control_deps'] + if __name__ == "__main__": + test_order_mutation_pass() test_graph_json_attr() test_json_pass() diff --git a/nnvm/tests/python/test_symbol.py b/nnvm/tests/python/test_symbol.py index 08d24536084d..8259862152ae 100644 --- a/nnvm/tests/python/test_symbol.py +++ b/nnvm/tests/python/test_symbol.py @@ -36,8 +36,16 @@ def test_mutate_input(): except NNVMError: pass +def test_copy(): + x = sym.Variable('x') + z = sym.Variable('z') + y = sym.exp(sym.add(x, x, name='add', gpu=2), + name='exp', gpu=1, attr={"kk": "1"}) + + assert y.__copy__().debug_str() == y.debug_str() if __name__ == "__main__": + test_copy() test_default_input() test_compose() test_mutate_input()