Skip to content

Commit

Permalink
[AutoScheduler] Guarantee init population sampling outputs a valid set (
Browse files Browse the repository at this point in the history
  • Loading branch information
comaniac authored and masahi committed Oct 23, 2020
1 parent c9f5b8b commit 1e7964e
Show file tree
Hide file tree
Showing 11 changed files with 85 additions and 27 deletions.
2 changes: 1 addition & 1 deletion python/tvm/auto_scheduler/cost_model/xgb_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def predict(self, task, states):
else:
ret = np.random.uniform(0, 1, (len(states),))

# Predict 0 for invalid states that failed to be lowered.
# Predict -inf for invalid states that failed to be lowered.
for idx, feature in enumerate(features):
if feature.min() == feature.max() == 0:
ret[idx] = float("-inf")
Expand Down
3 changes: 2 additions & 1 deletion python/tvm/auto_scheduler/search_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,11 @@ class SketchPolicy(SearchPolicy):
DEFAULT_PARAMS = {
"eps_greedy": 0.05,
"retry_search_one_round_on_empty": 10,
"sample_init_population": 50,
"sample_init_use_measured_ratio": 0.2,
"evolutionary_search_population": 2048,
"evolutionary_search_num_iters": 10,
"evolutionary_search_mutation_prob": 0.85,
"evolutionary_search_use_measured_ratio": 0.2,
"cpu_multi_level_tiling_structure": "SSRSRS",
"gpu_multi_level_tiling_structure": "SSSRRSRS",
# Notice: the default thread bind policy of GPU assumes the tiling structure to have at
Expand Down
2 changes: 1 addition & 1 deletion src/auto_scheduler/compute_dag.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1175,7 +1175,7 @@ Array<State> ComputeDAG::InferBound(const Array<State>& states) const {

support::parallel_for(0, states.size(), [this, &states, &out_states](int i) {
try {
out_states.Set(i, this->InferBound(states[i]));
out_states.Set(i, (states[i].defined()) ? this->InferBound(states[i]) : states[i]);
} catch (dmlc::Error& e) {
LOG(WARNING) << "InferBound fails on the state:\n"
<< states[i] << "\n"
Expand Down
70 changes: 59 additions & 11 deletions src/auto_scheduler/search_policy/sketch_policy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -258,12 +258,12 @@ std::pair<Array<MeasureInput>, Array<MeasureResult>> SketchPolicyNode::ContinueS

Array<State> SketchPolicyNode::SearchOneRound(int num_random_states, Array<State>* random_states) {
// Get parameters
int population = GetIntParam(params, SketchParamKey::EvolutionarySearch::population);
int num_use_measured =
std::min(static_cast<int>(measured_states_vector_.size()),
static_cast<int>(
GetDoubleParam(params, SketchParamKey::EvolutionarySearch::use_measured_ratio) *
population));
int population = GetIntParam(params, SketchParamKey::SampleInitPopulation::population);
int num_use_measured = std::min(
static_cast<int>(measured_states_vector_.size()),
static_cast<int>(
GetDoubleParam(params, SketchParamKey::SampleInitPopulation::use_measured_ratio) *
population));
bool is_cost_model_reasonable = !program_cost_model->IsInstance<RandomModelNode>();

// 1. Generate sketches
Expand Down Expand Up @@ -374,10 +374,14 @@ Array<State> SketchPolicyNode::SampleInitPopulation(const Array<State>& sketches
}
auto tic_begin = std::chrono::high_resolution_clock::now();

while (static_cast<int>(out_states.size()) < out_size && fail_ct < out_size) {
size_t iter = 1;
size_t target_size = out_size;
size_t unchange_cnt = 0;
while (out_states.size() < target_size) {
std::vector<State> temp_states(out_size);

support::parallel_for(0, out_size - out_states.size(),
// Initial a batch of states randomly
support::parallel_for(0, out_size,
[this, &temp_states, &sketches, &rand_gens](int index) {
// Randomly choose a sketch
State tmp_s = sketches[(rand_gens[index])() % sketches.size()];
Expand All @@ -395,13 +399,57 @@ Array<State> SketchPolicyNode::SampleInitPopulation(const Array<State>& sketches
}
});

for (int i = 0; i < out_size; i++) {
if (temp_states[i].defined()) {
out_states.push_back(std::move(temp_states[i]));
// Filter out the states that were failed to apply initial rules
Array<State> cand_states;
for (auto tmp_s : temp_states) {
if (tmp_s.defined()) {
cand_states.push_back(std::move(tmp_s));
} else {
fail_ct++;
}
}

unchange_cnt++;
if (!cand_states.empty()) {
// Run the cost model to make filter out states that failed to extract features.
// This may happen due to illegal schedules or the schedules that uses too much
// memory on GPU.
std::vector<float> pop_scores;
pop_scores.reserve(cand_states.size());
cand_states = search_task->compute_dag.InferBound(cand_states);
program_cost_model->Predict(search_task, cand_states, &pop_scores);

for (size_t i = 0; i < cand_states.size(); i++) {
if (pop_scores[i] > -1e10) {
out_states.push_back(std::move(cand_states[i]));
unchange_cnt = 0; // Reset the counter once we found a valid state
} else {
fail_ct++;
}
}
}

if (iter % 5 == 0) {
double duration = std::chrono::duration_cast<std::chrono::duration<double>>(
std::chrono::high_resolution_clock::now() - tic_begin)
.count();
StdCout(verbose) << "Sample Iter: " << iter << std::fixed << std::setprecision(4)
<< "\t#Pop: " << out_states.size() << "\t#Target: " << target_size
<< "\tfail_ct: " << fail_ct << "\tTime elapsed: " << std::fixed
<< std::setprecision(2) << duration << std::endl;
}

if (unchange_cnt == 5) {
// Reduce the target size to avoid too-long time in this phase if no valid state was found
// in the past iterations
if (target_size > 1) {
target_size /= 2;
StdCout(verbose) << "#Target has been reduced to " << target_size
<< " due to too many failures";
}
unchange_cnt = 0;
}
iter++;
}

double duration = std::chrono::duration_cast<std::chrono::duration<double>>(
Expand Down
12 changes: 8 additions & 4 deletions src/auto_scheduler/search_policy/sketch_policy.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,20 @@ struct SketchParamKey {
/*! \brief Retry several times if SearchOneRound gets no valid state. */
static constexpr const char* empty_retry_count = "retry_search_one_round_on_empty";

struct SampleInitPopulation {
/*! \brief The population size of initial sampling. */
static constexpr const char* population = "sample_init_population";
/*! \brief The maximum percentage of measured states in the initial sampling. */
static constexpr const char* use_measured_ratio = "sample_init_use_measured_ratio";
};

struct EvolutionarySearch {
/*! \brief The population size for evolutionary search. */
/*! \brief The population size of evolutionary search. */
static constexpr const char* population = "evolutionary_search_population";
/*! \brief The number of iterations performed by generic algorithm.*/
static constexpr const char* num_iters = "evolutionary_search_num_iters";
/*! \brief The mutation probability.*/
static constexpr const char* mutation_prob = "evolutionary_search_mutation_prob";
/*! \brief The maximum percentage of measured states in the initial population for evolutionary
* search. */
static constexpr const char* use_measured_ratio = "evolutionary_search_use_measured_ratio";
};

struct MultiLevelTiling {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def is_good_state(state):

def predict(self, task, states):
scores = []
found = False
for state in states:
scores.append(1 if self.is_good_state(state) else 0)
return scores
Expand Down Expand Up @@ -89,7 +88,6 @@ def is_good_state(state):

def predict(self, task, states):
scores = []
found = False
for state in states:
scores.append(1 if self.is_good_state(state) else 0)
return scores
Expand Down
6 changes: 5 additions & 1 deletion tests/scripts/task_python_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,11 @@ rm -rf docs/_build
mkdir -p docs/_build/html
rm -rf docs/gen_modules
rm -rf docs/doxygen
rm -rf tutorials/auto_scheduler/auto_scheduler_logs

# prepare auto scheduler tutorials
rm -rf tutorials/auto_scheduler/*logs
mkdir tutorials/auto_scheduler/logs
cp -f tutorials/auto_scheduler/{matmul,conv2d}.json tutorials/auto_scheduler/logs

# remove stale tutorials and always build from scratch.
rm -rf docs/tutorials
Expand Down
1 change: 1 addition & 0 deletions tutorials/auto_scheduler/conv2d.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"i": [["[\"conv2d_layer\", 1, 7, 7, 512, 512, 3, 3, [1, 1], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -thread_warp_size=32"], [[], [["CI", 5], ["SP", 3, 0, 1, [1, 1, 1, 1], 1], ["SP", 3, 5, 512, [1, 64, 2, 1], 1], ["SP", 3, 10, 7, [1, 1, 1, 1], 1], ["SP", 3, 15, 7, [1, 1, 7, 1], 1], ["SP", 3, 20, 512, [4, 2], 1], ["SP", 3, 23, 3, [1, 1], 1], ["SP", 3, 26, 3, [3, 1], 1], ["RE", 3, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 6, 0, 1, 3], ["FSP", 6, 4, 2, 3], ["FSP", 6, 8, 3, 3], ["FSP", 6, 12, 4, 3], ["RE", 6, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 3, 6, 11], ["CHR", 2, "shared", [3]], ["CA", 3, 4, 14], ["CHR", 1, "shared", [4]], ["CA", 2, 5, 14], ["CI", 1], ["FU", 8, [0, 1, 2, 3]], ["AN", 8, 0, 5], ["FU", 8, [1, 2, 3, 4]], ["AN", 8, 1, 4], ["FU", 8, [2, 3, 4, 5]], ["AN", 8, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 48, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [4, 3, 2, 1], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 504, [4], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [4, 3, 2, 1], 1, 1], ["AN", 2, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$512"]]]], "r": [[0.000429498], 0, 1.59126, 1603259147], "v": "v0.2"}
2 changes: 2 additions & 0 deletions tutorials/auto_scheduler/matmul.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Keep a valid schedule for demonstraction
{"i": [["[\"matmul_add\", 128, 128, 128, \"float32\"]", "llvm -keys=cpu"], [[], [["SP", 2, 0, 128, [4, 2, 4], 1], ["SP", 2, 4, 128, [1, 32, 2], 1], ["SP", 2, 8, 128, [2], 1], ["RE", 2, [0, 4, 1, 5, 8, 2, 6, 9, 3, 7]], ["FSP", 4, 0, 0, 1], ["FSP", 4, 2, 1, 1], ["RE", 4, [0, 2, 1, 3]], ["CA", 2, 4, 1], ["FU", 4, [0, 1]], ["AN", 4, 0, 3], ["PR", 2, 0, "auto_unroll_max_step$0"], ["AN", 2, 9, 2]]]], "r": [[5.80388e-05], 0, 0.299169, 1603402396], "v": "v0.2"}
6 changes: 3 additions & 3 deletions tutorials/auto_scheduler/tune_conv2d_layer_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,10 @@ def conv2d_layer(N, H, W, CO, CI, KH, KW, stride, padding):
# * see :any:`auto_scheduler.TuningOptions`,
# :any:`auto_scheduler.LocalRPCMeasureContext` for more parameters.

if not os.path.exists("./auto_scheduler_logs"):
os.mkdir("./auto_scheduler_logs")
if not os.path.exists("./logs"):
os.mkdir("./logs")

logfile = os.path.join("./auto_scheduler_logs", "conv2d.json")
logfile = os.path.join("./logs", "conv2d.json")
measure_ctx = auto_scheduler.LocalRPCMeasureContext(min_repeat_ms=300)
tune_option = auto_scheduler.TuningOptions(
num_measure_trials=10,
Expand Down
6 changes: 3 additions & 3 deletions tutorials/auto_scheduler/tune_matmul_x86.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,10 @@ def matmul_add(N, L, M, dtype):
# and do more analyses later.
# * see :any:`auto_scheduler.TuningOptions` for more parameters

if not os.path.exists("./auto_scheduler_logs"):
os.mkdir("./auto_scheduler_logs")
if not os.path.exists("./logs"):
os.mkdir("./logs")

logfile = os.path.join("./auto_scheduler_logs", "matmul.json")
logfile = os.path.join("./logs", "matmul.json")
tune_option = auto_scheduler.TuningOptions(
num_measure_trials=10, measure_callbacks=[auto_scheduler.RecordToFile(logfile)]
)
Expand Down

0 comments on commit 1e7964e

Please sign in to comment.