Skip to content

Commit

Permalink
Use the Bellman-Ford algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
xumingkuan committed Jun 29, 2021
1 parent 838bf5d commit 6652b8d
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 34 deletions.
56 changes: 33 additions & 23 deletions taichi/ir/control_flow_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -864,11 +864,13 @@ std::unordered_set<SNode *> ControlFlowGraph::gather_loaded_snodes() {
return snodes;
}

void ControlFlowGraph::determine_ad_stack_size(int max_ad_stack_size) {
void ControlFlowGraph::determine_ad_stack_size(int default_ad_stack_size) {
/**
* Determine all adaptive AD-stacks' capacity using the worklist algorithm
* (similar to the Bellman-Ford algorithm). The time complexity
* is O(num_statements + num_edges * max_ad_stack_size).
* Determine all adaptive AD-stacks' necessary size using the Bellman-Ford
* algorithm. When there is a positive loop (#pushes > #pops in a loop)
* for an AD-stack, we cannot determine the size of the AD-stack, and
* |default_ad_stack_size| is used.
* The time complexity is O(num_statements + num_edges * num_nodes).
*/
const int num_nodes = size();

Expand All @@ -894,8 +896,9 @@ void ControlFlowGraph::determine_ad_stack_size(int max_ad_stack_size) {

std::queue<int> to_visit;
std::vector<bool> in_queue(num_nodes);
std::vector<int> times_pushed_in_queue(num_nodes, 0);
std::unordered_map<CFGNode *, int> node_ids;
std::unordered_set<AdStackAllocaStmt *> oversized_stacks;
std::unordered_set<AdStackAllocaStmt *> indeterminable_stacks;

for (int i = 0; i < num_nodes; i++)
node_ids[nodes[i].get()] = i;
Expand All @@ -922,6 +925,7 @@ void ControlFlowGraph::determine_ad_stack_size(int max_ad_stack_size) {
}
to_visit.push(i);
in_queue[i] = true;
times_pushed_in_queue[i] = 1;
}

// The maximum stack size determining algorithm -- an algorithm similar to
Expand All @@ -938,10 +942,6 @@ void ControlFlowGraph::determine_ad_stack_size(int max_ad_stack_size) {
auto max_size_inside_this_node = it.second;
auto current_max_size =
max_size_at_node_begin[node_id][stack] + max_size_inside_this_node;
if (current_max_size > max_ad_stack_size) {
current_max_size = max_ad_stack_size;
oversized_stacks.insert(stack);
}
if (current_max_size > max_size[stack]) {
max_size[stack] = current_max_size;
}
Expand All @@ -953,37 +953,47 @@ void ControlFlowGraph::determine_ad_stack_size(int max_ad_stack_size) {
auto increase_in_this_node = it.second;
auto current_size =
max_size_at_node_begin[node_id][stack] + increase_in_this_node;
if (current_size > max_ad_stack_size) {
current_size = max_ad_stack_size; // avoid infinite loop
if (current_size > default_ad_stack_size) {
current_size = default_ad_stack_size; // avoid infinite loop
}
for (auto *next_node : now->next) {
int next_node_id = node_ids[next_node];
if (current_size > max_size_at_node_begin[next_node_id][stack]) {
max_size_at_node_begin[next_node_id][stack] = current_size;
if (!in_queue[next_node_id]) {
to_visit.push(next_node_id);
in_queue[next_node_id] = true;
if (times_pushed_in_queue[next_node_id] <= num_nodes) {
to_visit.push(next_node_id);
in_queue[next_node_id] = true;
times_pushed_in_queue[next_node_id]++;
} else {
// A positive loop is found because a node is going to be pushed
// into the queue the (num_nodes + 1)-th time.
indeterminable_stacks.insert(stack);
}
}
}
}
}
}

if (!oversized_stacks.empty()) {
std::vector<std::string> oversized_stacks_name;
oversized_stacks_name.reserve(oversized_stacks.size());
for (auto &stack : oversized_stacks) {
oversized_stacks_name.push_back(stack->name());
if (!indeterminable_stacks.empty()) {
std::vector<std::string> indeterminable_stacks_name;
indeterminable_stacks_name.reserve(indeterminable_stacks.size());
for (auto &stack : indeterminable_stacks) {
indeterminable_stacks_name.push_back(stack->name());
stack->max_size = default_ad_stack_size;
}
TI_WARN(
"The required capacity for autodiff stacks [{}] overflows the maximum "
"allowed. Use configured maximum capacity "
"(CompileConfig::max_ad_stack_size) {} instead.",
fmt::join(oversized_stacks_name, ", "), max_ad_stack_size);
TI_DEBUG(
"Unable to determine the necessary size for autodiff stacks [{}]. Use "
"configured size (CompileConfig::default_ad_stack_size) {} instead.",
fmt::join(indeterminable_stacks_name, ", "), default_ad_stack_size);
}

for (auto &it : max_size) {
auto *stack = it.first;
if (indeterminable_stacks.count(stack) > 0) {
continue;
}
// Since we use |max_size| == 0 for adaptive sizes, we do not want stacks
// with maximum capacity indeed equal to 0.
TI_WARN_IF(it.second == 0,
Expand Down
8 changes: 4 additions & 4 deletions taichi/ir/control_flow_graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,11 @@ class ControlFlowGraph {
std::unordered_set<SNode *> gather_loaded_snodes();

/**
* Determine all adaptive AD-stacks' capacity.
* @param max_ad_stack_size The maximum allowed AD stack size. This parameter
* is set to prevent infinite loops of the algorithm.
* Determine all adaptive AD-stacks' necessary size.
* @param default_ad_stack_size The default AD-stack's size when we are
* unable to determine some AD-stack's size.
*/
void determine_ad_stack_size(int max_ad_stack_size);
void determine_ad_stack_size(int default_ad_stack_size);
};

TLANG_NAMESPACE_END
2 changes: 1 addition & 1 deletion taichi/ir/transforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ void auto_diff(IRNode *root,
const CompileConfig &config,
bool use_stack = false);
/**
* Determine all adaptive AD-stacks' capacity. This pass is idempotent, i.e.,
* Determine all adaptive AD-stacks' size. This pass is idempotent, i.e.,
* there are no side effects if called more than once or called when not needed.
* @return Whether the IR is modified, i.e., whether there exists adaptive
* AD-stacks before this pass.
Expand Down
5 changes: 3 additions & 2 deletions taichi/program/compile_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ struct CompileConfig {
int default_gpu_block_dim;
int gpu_max_reg;
int ad_stack_size{0}; // 0 = adaptive
// The maximum size when automatically determining the stack size.
int max_ad_stack_size{32};
// The default size when the Taichi compiler is unable to automatically
// determine the autodiff stack size.
int default_ad_stack_size{32};

int saturating_grid_dim;
int max_block_dim;
Expand Down
2 changes: 1 addition & 1 deletion taichi/transforms/determine_ad_stack_size.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ bool determine_ad_stack_size(IRNode *root, const CompileConfig &config) {
}
auto cfg = analysis::build_cfg(root);
cfg->simplify_graph();
cfg->determine_ad_stack_size(config.max_ad_stack_size);
cfg->determine_ad_stack_size(config.default_ad_stack_size);
return true;
}

Expand Down
6 changes: 3 additions & 3 deletions tests/cpp/transforms/determine_ad_stack_size_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,10 @@ TEST_F(DetermineAdStackSizeTest, LoopInfeasible) {

CompileConfig config;
constexpr int kMaxAdStackSize = 32;
config.max_ad_stack_size = kMaxAdStackSize;
config.default_ad_stack_size = kMaxAdStackSize;
EXPECT_EQ(stack->max_size, 0);
// Should have a warning here (unable to determine capacity for autodiff
// stacks).
// Should have a debug message here (unable to determine the necessary size
// for autodiff stacks).
irpass::determine_ad_stack_size(ir_block, config);
EXPECT_EQ(stack->max_size, kMaxAdStackSize);
}
Expand Down

0 comments on commit 6652b8d

Please sign in to comment.