From d9eb9e74d208309df9e4eb1d630a26ac8a5fe86a Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Tue, 15 Sep 2020 17:11:46 -0700 Subject: [PATCH 1/8] add tutorial --- docs/conf.py | 1 + python/tvm/auto_scheduler/__init__.py | 2 +- python/tvm/auto_scheduler/auto_schedule.py | 27 +++++ tutorials/auto_scheduler/README.txt | 2 + tutorials/auto_scheduler/tune_matmul_x86.py | 103 ++++++++++++++++++++ tutorials/autotvm/README.txt | 2 +- 6 files changed, 135 insertions(+), 2 deletions(-) create mode 100644 tutorials/auto_scheduler/README.txt create mode 100644 tutorials/auto_scheduler/tune_matmul_x86.py diff --git a/docs/conf.py b/docs/conf.py index ca0bc9ba3de5..9322f5a38d12 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -193,6 +193,7 @@ "../tutorials/language", "../tutorials/optimize", "../tutorials/autotvm", + "../tutorials/auto_scheduler", "../tutorials/dev", "../tutorials/topi", "../tutorials/deployment", diff --git a/python/tvm/auto_scheduler/__init__.py b/python/tvm/auto_scheduler/__init__.py index 43e08a4aafa6..2b362872c814 100644 --- a/python/tvm/auto_scheduler/__init__.py +++ b/python/tvm/auto_scheduler/__init__.py @@ -26,7 +26,7 @@ from . import feature # Shortcut -from .auto_schedule import SearchTask, TuningOptions, HardwareParams, auto_schedule +from .auto_schedule import SearchTask, TuningOptions, HardwareParams, create_task, auto_schedule from .compute_dag import ComputeDAG from .cost_model import RandomModel, XGBModel from .measure import ( diff --git a/python/tvm/auto_scheduler/auto_schedule.py b/python/tvm/auto_scheduler/auto_schedule.py index af257f5aa8a4..0b80b8c3d432 100644 --- a/python/tvm/auto_scheduler/auto_schedule.py +++ b/python/tvm/auto_scheduler/auto_schedule.py @@ -31,7 +31,10 @@ import tvm._ffi from tvm.runtime import Object from .measure import LocalBuilder, LocalRunner +from .workload_registry import make_workload_key, workload_key_to_tensors +from .compute_dag import ComputeDAG from .search_policy import EmptyPolicy +from .utils import get_func_name from . import _ffi_api @@ -155,6 +158,30 @@ def __init__( measure_callbacks, ) +def create_task(func, args, target, target_host=None, hardware_params=None): + """Create a search task + + Parameters + ---------- + func : Union[Function, str] + The function that returns the compute declaration Tensors. + Can be the a function or the function name. + args : Args + The args of the function. + target : tvm.target.Target + The target device of this search task. + target_host : Optional[tvm.target.Target] + The target host device of this search task. + hardware_params : Optional[HardwareParams] + Hardware parameters used in this search task. + + Returns + ------- + task : the created task + """ + workload_key = make_workload_key(func, args) + dag = ComputeDAG(workload_key) + return SearchTask(dag, workload_key, target, target_host, hardware_params) def auto_schedule(task, search_policy=None, tuning_options=TuningOptions()): """Do auto scheduling for a computation declaration. diff --git a/tutorials/auto_scheduler/README.txt b/tutorials/auto_scheduler/README.txt new file mode 100644 index 000000000000..52c4c14b15b0 --- /dev/null +++ b/tutorials/auto_scheduler/README.txt @@ -0,0 +1,2 @@ +AutoScheduler : Template-free Auto Scheduling +----------- diff --git a/tutorials/auto_scheduler/tune_matmul_x86.py b/tutorials/auto_scheduler/tune_matmul_x86.py new file mode 100644 index 000000000000..2fd30a3c54ff --- /dev/null +++ b/tutorials/auto_scheduler/tune_matmul_x86.py @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Using the template-free auto-scheduler on CPU +============================================= +**Author**: `Lianmin Zheng `_, \ + `Chengfan Jia `_ + +This is a tutorial on how to use the auto-scheduler in TVM. + +Different from the exiting autotvm which relies on manual templates to +define the search space, the auto-scheduler does not require any templates. +The user only needs to write the computation declaration, +the auto-scheduler then automatically generate a large +search space and begins the search (or auto-tuning). + +We use matrix multiplication as an example in this tutorial. +""" + +import numpy as np +import tvm +from tvm import te, testing, auto_scheduler + +###################################################################### +# To begin with, we define the computation of a matmul with bias add. +# The function should return the list of input/output tensors. +# From these tensors, the auto-scheduler can get the whole computational graph. + +@auto_scheduler.register_workload +def matmul_add(N, L, M, dtype): + A = te.placeholder((N, L), name='A', dtype=dtype) + B = te.placeholder((L, M), name='B', dtype=dtype) + C = te.placeholder((N, M), name='C', dtype=dtype) + + k = te.reduce_axis((0, L), name='k') + matmul = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), + name='matmul') + D = te.compute((N, M), lambda i, j: matmul[i, j] + C[i, j], name='D') + + return [A, B, C, D] + +###################################################################### +# We then create the a search task with N=L=M=128 and dtype='float32' + +target = tvm.target.Target("llvm") +task = auto_scheduler.create_task(matmul_add, (128, 128, 128, 'float32'), target) + +print(task.compute_dag) + +###################################################################### +# Next, we set parameters for the auto-scheduler. +# `num_measure_trials` is the number of measurement trials we can use during the search. +# We only make 10 trials in this tutorial for fast demonstration. In practice, 1000 is a good value for +# the search to converge. You can do more trials according to your time budget. +# In addition, we use `RecordToFile` to log measurement records into a file `test.json`. +# The measurement records can be used to query the history best, resume the search, +# or train the cost model later. + +tune_option = auto_scheduler.TuningOptions(num_measure_trials=2, + measure_callbacks=[auto_scheduler.RecordToFile('test.json')]) + +###################################################################### +# Now we get all inputs ready. Pretty simple, isn't it? +# We can kick off the search and let the auto-scheduler do its magic. +# After some measurement trials, it will return the best schedule it founds. + +sch, args = auto_scheduler.auto_schedule(task, + tuning_options=tune_option) + +###################################################################### +# We can lower schedule to see the IR after auto-scheduling. +# We can also build the binary function as usual. + +print(tvm.lower(sch, args, simple_mode=True)) +func = tvm.build(sch, args) + +###################################################################### +# Finally, let use do a correctness check + +# check correctness +a_np = np.random.uniform(size=(128, 128)).astype(np.float32) +b_np = np.random.uniform(size=(128, 128)).astype(np.float32) +c_np = np.random.uniform(size=(128, 128)).astype(np.float32) +d_np = a_np.dot(b_np) + c_np + +d_tvm = tvm.nd.empty(d_np.shape) +func(tvm.nd.array(a_np), tvm.nd.array(b_np), tvm.nd.array(c_np), d_tvm) + +tvm.testing.assert_allclose(d_np, d_tvm.asnumpy(), rtol=1e-2) \ No newline at end of file diff --git a/tutorials/autotvm/README.txt b/tutorials/autotvm/README.txt index 38e3b3343f4e..970430320400 100644 --- a/tutorials/autotvm/README.txt +++ b/tutorials/autotvm/README.txt @@ -1,4 +1,4 @@ .. _tutorials-autotvm-sec: -Auto tuning +AutoTVM : Template-based Auto Tuning ----------- From 48aa0e9c9281fe777cd07542299fb35ef645947a Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Tue, 15 Sep 2020 18:34:13 -0700 Subject: [PATCH 2/8] add tutorial --- docs/api/python/auto_scheduler.rst | 35 ++++++++ docs/api/python/autotvm.rst | 2 +- docs/api/python/index.rst | 1 + python/tvm/auto_scheduler/auto_schedule.py | 19 ++-- tutorials/auto_scheduler/README.txt | 2 +- tutorials/auto_scheduler/tune_matmul_x86.py | 96 ++++++++++++++------- tutorials/autotvm/README.txt | 2 +- 7 files changed, 114 insertions(+), 43 deletions(-) create mode 100644 docs/api/python/auto_scheduler.rst diff --git a/docs/api/python/auto_scheduler.rst b/docs/api/python/auto_scheduler.rst new file mode 100644 index 000000000000..85ff22f58b37 --- /dev/null +++ b/docs/api/python/auto_scheduler.rst @@ -0,0 +1,35 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +tvm.auto_scheduler +------------------ +.. automodule:: tvm.auto_scheduler + +tvm.auto_scheduler.auto_schedule +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. automodule:: tvm.auto_scheduler.auto_schedule + +.. autoclass:: tvm.auto_scheduler.auto_schedule.SearchTask + +.. autoclass:: tvm.auto_scheduler.auto_schedule.TuningOptions + +.. autofunction:: tvm.auto_scheduler.auto_schedule.create_task + +.. autofunction:: tvm.auto_scheduler.auto_schedule.auto_schedule + + + diff --git a/docs/api/python/autotvm.rst b/docs/api/python/autotvm.rst index 9357d1b6be08..5bde9ac47962 100644 --- a/docs/api/python/autotvm.rst +++ b/docs/api/python/autotvm.rst @@ -18,7 +18,7 @@ tvm.autotvm ----------- .. automodule:: tvm.autotvm -.. automodule:: tvm.autotvm.apply_history_best +.. autofunction:: tvm.autotvm.apply_history_best tvm.autotvm.measure ~~~~~~~~~~~~~~~~~~~ diff --git a/docs/api/python/index.rst b/docs/api/python/index.rst index bc9ec5fd8304..a6179684413d 100644 --- a/docs/api/python/index.rst +++ b/docs/api/python/index.rst @@ -40,6 +40,7 @@ Python API relay/dataflow_pattern relay/testing autotvm + auto_scheduler rpc micro contrib diff --git a/python/tvm/auto_scheduler/auto_schedule.py b/python/tvm/auto_scheduler/auto_schedule.py index 0b80b8c3d432..e4ea1ec80313 100644 --- a/python/tvm/auto_scheduler/auto_schedule.py +++ b/python/tvm/auto_scheduler/auto_schedule.py @@ -31,10 +31,10 @@ import tvm._ffi from tvm.runtime import Object from .measure import LocalBuilder, LocalRunner -from .workload_registry import make_workload_key, workload_key_to_tensors +from .workload_registry import make_workload_key from .compute_dag import ComputeDAG -from .search_policy import EmptyPolicy -from .utils import get_func_name +from .cost_model import XGBModel +from .search_policy import SketchPolicy from . import _ffi_api @@ -158,6 +158,7 @@ def __init__( measure_callbacks, ) + def create_task(func, args, target, target_host=None, hardware_params=None): """Create a search task @@ -183,16 +184,16 @@ def create_task(func, args, target, target_host=None, hardware_params=None): dag = ComputeDAG(workload_key) return SearchTask(dag, workload_key, target, target_host, hardware_params) + def auto_schedule(task, search_policy=None, tuning_options=TuningOptions()): - """Do auto scheduling for a computation declaration. + """Run auto scheduling search for a task Parameters ---------- task : SearchTask The SearchTask for the computation declaration. search_policy : Optional[SearchPolicy] - The search policy to be used for schedule search. Use EmptyPolicy as default, which always - returns an empty schedule. + The search policy to be used for schedule search. tuning_options : Optional[TuningOptions] Tuning and measurement options. @@ -205,5 +206,9 @@ def auto_schedule(task, search_policy=None, tuning_options=TuningOptions()): "Invalid task: " + task + " . `auto_scheduler.auto_schedule` expects a SearchTask." ) - sch, tensors = _ffi_api.AutoSchedule(search_policy or EmptyPolicy(task), tuning_options) + if search_policy is None: + cost_model = XGBModel() + search_policy = SketchPolicy(task, cost_model) + + sch, tensors = _ffi_api.AutoSchedule(search_policy, tuning_options) return sch, tensors diff --git a/tutorials/auto_scheduler/README.txt b/tutorials/auto_scheduler/README.txt index 52c4c14b15b0..75986679f0bd 100644 --- a/tutorials/auto_scheduler/README.txt +++ b/tutorials/auto_scheduler/README.txt @@ -1,2 +1,2 @@ AutoScheduler : Template-free Auto Scheduling ------------ +--------------------------------------------- diff --git a/tutorials/auto_scheduler/tune_matmul_x86.py b/tutorials/auto_scheduler/tune_matmul_x86.py index 2fd30a3c54ff..fefdbf830e1b 100644 --- a/tutorials/auto_scheduler/tune_matmul_x86.py +++ b/tutorials/auto_scheduler/tune_matmul_x86.py @@ -20,13 +20,12 @@ **Author**: `Lianmin Zheng `_, \ `Chengfan Jia `_ -This is a tutorial on how to use the auto-scheduler in TVM. - -Different from the exiting autotvm which relies on manual templates to -define the search space, the auto-scheduler does not require any templates. -The user only needs to write the computation declaration, -the auto-scheduler then automatically generate a large -search space and begins the search (or auto-tuning). +Different from the exiting :ref:`autotvm ` which relies on +manual templates to define the search space, the auto-scheduler does not require any templates. +The auto-scheduler is template-free, so users only need to write the computation declaration without +any schedule commands or templates. +The auto-scheduler can automatically generate a large +search space and find a good schedule in the space. We use matrix multiplication as an example in this tutorial. """ @@ -36,62 +35,72 @@ from tvm import te, testing, auto_scheduler ###################################################################### +# Define the computation +# ^^^^^^^^^^^^^^^^^^^^^^ # To begin with, we define the computation of a matmul with bias add. # The function should return the list of input/output tensors. # From these tensors, the auto-scheduler can get the whole computational graph. + @auto_scheduler.register_workload def matmul_add(N, L, M, dtype): - A = te.placeholder((N, L), name='A', dtype=dtype) - B = te.placeholder((L, M), name='B', dtype=dtype) - C = te.placeholder((N, M), name='C', dtype=dtype) + A = te.placeholder((N, L), name="A", dtype=dtype) + B = te.placeholder((L, M), name="B", dtype=dtype) + C = te.placeholder((N, M), name="C", dtype=dtype) - k = te.reduce_axis((0, L), name='k') - matmul = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), - name='matmul') - D = te.compute((N, M), lambda i, j: matmul[i, j] + C[i, j], name='D') + k = te.reduce_axis((0, L), name="k") + matmul = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name="matmul") + out = te.compute((N, M), lambda i, j: matmul[i, j] + C[i, j], name="D") + + return [A, B, C, out] - return [A, B, C, D] ###################################################################### -# We then create the a search task with N=L=M=128 and dtype='float32' +# Create the search task +# ^^^^^^^^^^^^^^^^^^^^^^ +# We then create the a search task with N=L=M=128 and dtype="float32" target = tvm.target.Target("llvm") -task = auto_scheduler.create_task(matmul_add, (128, 128, 128, 'float32'), target) +task = auto_scheduler.create_task(matmul_add, (128, 128, 128, "float32"), target) +# inspect the computational graph print(task.compute_dag) ###################################################################### # Next, we set parameters for the auto-scheduler. -# `num_measure_trials` is the number of measurement trials we can use during the search. -# We only make 10 trials in this tutorial for fast demonstration. In practice, 1000 is a good value for -# the search to converge. You can do more trials according to your time budget. -# In addition, we use `RecordToFile` to log measurement records into a file `test.json`. -# The measurement records can be used to query the history best, resume the search, -# or train the cost model later. - -tune_option = auto_scheduler.TuningOptions(num_measure_trials=2, - measure_callbacks=[auto_scheduler.RecordToFile('test.json')]) +# +# * `num_measure_trials` is the number of measurement trials we can use during the search. +# We only make 10 trials in this tutorial for a fast demonstration. In practice, 1000 is a +# good value for the search to converge. You can do more trials according to your time budget. +# * In addition, we use `RecordToFile` to dump measurement records into a file `matmul.json`. +# The measurement records can be used to query the history best, resume the search, +# or do more analysis later. +# * see :any:`auto_schedule.TuningOptions`: for more parameters + +tune_option = auto_scheduler.TuningOptions( + num_measure_trials=10, measure_callbacks=[auto_scheduler.RecordToFile("matmul.json")] +) ###################################################################### +# Run the search +# ^^^^^^^^^^^^^^ # Now we get all inputs ready. Pretty simple, isn't it? # We can kick off the search and let the auto-scheduler do its magic. # After some measurement trials, it will return the best schedule it founds. -sch, args = auto_scheduler.auto_schedule(task, - tuning_options=tune_option) +sch, args = auto_scheduler.auto_schedule(task, tuning_options=tune_option) ###################################################################### # We can lower schedule to see the IR after auto-scheduling. -# We can also build the binary function as usual. print(tvm.lower(sch, args, simple_mode=True)) -func = tvm.build(sch, args) ###################################################################### -# Finally, let use do a correctness check +# Check correctness +# ^^^^^^^^^^^^^^^^^ +# We build the binary and check its correctness -# check correctness +func = tvm.build(sch, args) a_np = np.random.uniform(size=(128, 128)).astype(np.float32) b_np = np.random.uniform(size=(128, 128)).astype(np.float32) c_np = np.random.uniform(size=(128, 128)).astype(np.float32) @@ -100,4 +109,25 @@ def matmul_add(N, L, M, dtype): d_tvm = tvm.nd.empty(d_np.shape) func(tvm.nd.array(a_np), tvm.nd.array(b_np), tvm.nd.array(c_np), d_tvm) -tvm.testing.assert_allclose(d_np, d_tvm.asnumpy(), rtol=1e-2) \ No newline at end of file +tvm.testing.assert_allclose(d_np, d_tvm.asnumpy(), rtol=1e-3) + +###################################################################### +# Using the record file +# ^^^^^^^^^^^^^^^^^^^^^ +# During the search, all measuremnt records is dumpped into the record +# file "matmul.json". The measurement records can be used to resume the +# search, re-apply search results and other analysis. +# +# Here we show an example where we load the best schedule from a file, +# print the equivalent python schedule API, and build the binary again. + +inp, res = auto_scheduler.load_best("matmul.json", task.workload_key) + +# Print equivalent python schedule API. This can be used for debugging and +# learning the behavior of auto-scheduler. +print(task.compute_dag.print_python_code_from_state(inp.state)) + +# Rebuild the binary. This shows how you can apply the best schedule from a +# log file without reruning the search again. +sch, args = task.compute_dag.apply_steps_from_state(inp.state) +func = tvm.build(sch, args) diff --git a/tutorials/autotvm/README.txt b/tutorials/autotvm/README.txt index 970430320400..a1d33ba088cc 100644 --- a/tutorials/autotvm/README.txt +++ b/tutorials/autotvm/README.txt @@ -1,4 +1,4 @@ .. _tutorials-autotvm-sec: AutoTVM : Template-based Auto Tuning ------------ +------------------------------------ From a943a32b3113a8b0e5bba8119e7ca3df65b7dfca Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Tue, 15 Sep 2020 18:39:39 -0700 Subject: [PATCH 3/8] update --- tutorials/auto_scheduler/tune_matmul_x86.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tutorials/auto_scheduler/tune_matmul_x86.py b/tutorials/auto_scheduler/tune_matmul_x86.py index fefdbf830e1b..ded10cb33357 100644 --- a/tutorials/auto_scheduler/tune_matmul_x86.py +++ b/tutorials/auto_scheduler/tune_matmul_x86.py @@ -50,7 +50,7 @@ def matmul_add(N, L, M, dtype): k = te.reduce_axis((0, L), name="k") matmul = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name="matmul") - out = te.compute((N, M), lambda i, j: matmul[i, j] + C[i, j], name="D") + out = te.compute((N, M), lambda i, j: matmul[i, j] + C[i, j], name="out") return [A, B, C, out] @@ -63,7 +63,7 @@ def matmul_add(N, L, M, dtype): target = tvm.target.Target("llvm") task = auto_scheduler.create_task(matmul_add, (128, 128, 128, "float32"), target) -# inspect the computational graph +# Inspect the computational graph print(task.compute_dag) ###################################################################### @@ -92,6 +92,8 @@ def matmul_add(N, L, M, dtype): ###################################################################### # We can lower schedule to see the IR after auto-scheduling. +# The auto-scheduler correctly performs optimizations including multi-level tiling, +# parallelization, vectorization, unrolling and fusion. print(tvm.lower(sch, args, simple_mode=True)) @@ -114,13 +116,14 @@ def matmul_add(N, L, M, dtype): ###################################################################### # Using the record file # ^^^^^^^^^^^^^^^^^^^^^ -# During the search, all measuremnt records is dumpped into the record +# During the search, all measuremnt records are dumpped into the record # file "matmul.json". The measurement records can be used to resume the # search, re-apply search results and other analysis. # # Here we show an example where we load the best schedule from a file, # print the equivalent python schedule API, and build the binary again. +# Load the measuremnt record for the best schedule inp, res = auto_scheduler.load_best("matmul.json", task.workload_key) # Print equivalent python schedule API. This can be used for debugging and From 6dd4fd89f75983ae9fae66940867fb23e68e2adf Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Tue, 15 Sep 2020 20:50:52 -0700 Subject: [PATCH 4/8] Apply suggestions from code review Co-authored-by: Cody Yu --- python/tvm/auto_scheduler/auto_schedule.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/tvm/auto_scheduler/auto_schedule.py b/python/tvm/auto_scheduler/auto_schedule.py index e4ea1ec80313..b58ac31cc1fe 100644 --- a/python/tvm/auto_scheduler/auto_schedule.py +++ b/python/tvm/auto_scheduler/auto_schedule.py @@ -167,7 +167,7 @@ def create_task(func, args, target, target_host=None, hardware_params=None): func : Union[Function, str] The function that returns the compute declaration Tensors. Can be the a function or the function name. - args : Args + args : Union[Tuple[Any, ...], List[Any]] The args of the function. target : tvm.target.Target The target device of this search task. @@ -178,7 +178,7 @@ def create_task(func, args, target, target_host=None, hardware_params=None): Returns ------- - task : the created task + SearchTask: the created task """ workload_key = make_workload_key(func, args) dag = ComputeDAG(workload_key) From 77725e7c411c2b780b4df2ec039b72e9514499bb Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Tue, 15 Sep 2020 21:16:03 -0700 Subject: [PATCH 5/8] address comments --- tutorials/auto_scheduler/tune_matmul_x86.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tutorials/auto_scheduler/tune_matmul_x86.py b/tutorials/auto_scheduler/tune_matmul_x86.py index ded10cb33357..1c1ba47c47b6 100644 --- a/tutorials/auto_scheduler/tune_matmul_x86.py +++ b/tutorials/auto_scheduler/tune_matmul_x86.py @@ -58,7 +58,7 @@ def matmul_add(N, L, M, dtype): ###################################################################### # Create the search task # ^^^^^^^^^^^^^^^^^^^^^^ -# We then create the a search task with N=L=M=128 and dtype="float32" +# We then create a search task with N=L=M=128 and dtype="float32" target = tvm.target.Target("llvm") task = auto_scheduler.create_task(matmul_add, (128, 128, 128, "float32"), target) @@ -86,12 +86,12 @@ def matmul_add(N, L, M, dtype): # ^^^^^^^^^^^^^^ # Now we get all inputs ready. Pretty simple, isn't it? # We can kick off the search and let the auto-scheduler do its magic. -# After some measurement trials, it will return the best schedule it founds. +# After some measurement trials, it will return the best schedule it found. sch, args = auto_scheduler.auto_schedule(task, tuning_options=tune_option) ###################################################################### -# We can lower schedule to see the IR after auto-scheduling. +# We can lower the schedule to see the IR after auto-scheduling. # The auto-scheduler correctly performs optimizations including multi-level tiling, # parallelization, vectorization, unrolling and fusion. @@ -118,7 +118,7 @@ def matmul_add(N, L, M, dtype): # ^^^^^^^^^^^^^^^^^^^^^ # During the search, all measuremnt records are dumpped into the record # file "matmul.json". The measurement records can be used to resume the -# search, re-apply search results and other analysis. +# search, re-apply search results and perform other analyses. # # Here we show an example where we load the best schedule from a file, # print the equivalent python schedule API, and build the binary again. @@ -127,7 +127,7 @@ def matmul_add(N, L, M, dtype): inp, res = auto_scheduler.load_best("matmul.json", task.workload_key) # Print equivalent python schedule API. This can be used for debugging and -# learning the behavior of auto-scheduler. +# learning the behavior of the auto-scheduler. print(task.compute_dag.print_python_code_from_state(inp.state)) # Rebuild the binary. This shows how you can apply the best schedule from a From 9d418f89536109157f8f6d38100f8f57ed724689 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 16 Sep 2020 09:28:12 -0700 Subject: [PATCH 6/8] fix bugs --- python/tvm/auto_scheduler/auto_schedule.py | 30 +++++++++---------- .../search_policy/sketch_policy_rules.cc | 4 +-- tutorials/auto_scheduler/tune_matmul_x86.py | 2 +- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/python/tvm/auto_scheduler/auto_schedule.py b/python/tvm/auto_scheduler/auto_schedule.py index b58ac31cc1fe..c580eb5e600d 100644 --- a/python/tvm/auto_scheduler/auto_schedule.py +++ b/python/tvm/auto_scheduler/auto_schedule.py @@ -92,27 +92,27 @@ class TuningOptions(Object): Parameters ---------- num_measure_trials: int = 0 - The number of measurement trials. - The search policy measures `num_measure_trials` schedules in total and returns the best one - among them. - With `num_measure_trials` == 0, the policy will do the schedule search but won't involve - measurement. This can be used to get a runnable schedule quickly without auto-tuning. + The number of measurement trials. + The search policy measures `num_measure_trials` schedules in total and returns the best one + among them. + With `num_measure_trials` == 0, the policy will do the schedule search but won't involve + measurement. This can be used to get a runnable schedule quickly without auto-tuning. early_stopping: Optional[int] - Stop the tuning early if getting no improvement after n measurements. + Stop the tuning early if getting no improvement after n measurements. num_measures_per_round: int = 64 - The number of schedules to be measured at each search round. - The whole schedule search process will try a total number of `num_measure_trials` in several - rounds. + The number of schedules to be measured at each search round. + The whole schedule search process will try a total number of `num_measure_trials` in several + rounds. verbose: int = 1 - Verbosity level. 0 for silent, 1 to output information during schedule search. + Verbosity level. 0 for silent, 1 to output information during schedule search. builder: Union[ProgramBuilder, str] = 'local' - ProgramBuilder which builds the program. + ProgramBuilder which builds the program. runner: Union[ProgramRunner, str] = 'local' - ProgramRunner which runs the program and measures time costs. + ProgramRunner which runs the program and measures time costs. measure_callbacks: Optional[List[MeasureCallback]] - Callback functions called after each measurement. - Candidates: - - auto_scheduler.RecordToFile + Callback functions called after each measurement. + Candidates: + - auto_scheduler.RecordToFile """ def __init__( diff --git a/src/auto_scheduler/search_policy/sketch_policy_rules.cc b/src/auto_scheduler/search_policy/sketch_policy_rules.cc index 843301c2bb8f..38a72a7734b4 100644 --- a/src/auto_scheduler/search_policy/sketch_policy_rules.cc +++ b/src/auto_scheduler/search_policy/sketch_policy_rules.cc @@ -593,7 +593,7 @@ PopulationGenerationRule::ResultKind MutateComputeLocationCommon(SketchPolicyNod PopulationGenerationRule::ResultKind InitChangeComputeLocation::Apply(SketchPolicyNode* policy, State* state) const { - return MutateComputeLocationCommon(policy, state, false); + return MutateComputeLocationCommon(policy, state, true); } PopulationGenerationRule::ResultKind InitParallel::Apply(SketchPolicyNode* policy, @@ -1059,7 +1059,7 @@ PopulationGenerationRule::ResultKind MutateMaxUnrollFactor::Apply(SketchPolicyNo PopulationGenerationRule::ResultKind MutateComputeLocation::Apply(SketchPolicyNode* policy, State* state) const { - return MutateComputeLocationCommon(policy, state, true); + return MutateComputeLocationCommon(policy, state, false); } PopulationGenerationRule::ResultKind MutateParallel::Apply(SketchPolicyNode* policy, diff --git a/tutorials/auto_scheduler/tune_matmul_x86.py b/tutorials/auto_scheduler/tune_matmul_x86.py index 1c1ba47c47b6..1a3e06f9dd06 100644 --- a/tutorials/auto_scheduler/tune_matmul_x86.py +++ b/tutorials/auto_scheduler/tune_matmul_x86.py @@ -20,7 +20,7 @@ **Author**: `Lianmin Zheng `_, \ `Chengfan Jia `_ -Different from the exiting :ref:`autotvm ` which relies on +Different from the existing :ref:`autotvm ` which relies on manual templates to define the search space, the auto-scheduler does not require any templates. The auto-scheduler is template-free, so users only need to write the computation declaration without any schedule commands or templates. From 5cad061d9074ce5e74a28933b3f9425daabdcbaa Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 16 Sep 2020 10:19:01 -0700 Subject: [PATCH 7/8] add the exmple for resuming the search --- python/tvm/auto_scheduler/auto_schedule.py | 2 +- tutorials/auto_scheduler/tune_matmul_x86.py | 49 ++++++++++++++++++--- 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/python/tvm/auto_scheduler/auto_schedule.py b/python/tvm/auto_scheduler/auto_schedule.py index c580eb5e600d..80510d355d21 100644 --- a/python/tvm/auto_scheduler/auto_schedule.py +++ b/python/tvm/auto_scheduler/auto_schedule.py @@ -112,7 +112,7 @@ class TuningOptions(Object): measure_callbacks: Optional[List[MeasureCallback]] Callback functions called after each measurement. Candidates: - - auto_scheduler.RecordToFile + - auto_scheduler.RecordToFile """ def __init__( diff --git a/tutorials/auto_scheduler/tune_matmul_x86.py b/tutorials/auto_scheduler/tune_matmul_x86.py index 1a3e06f9dd06..59e28a84c2f2 100644 --- a/tutorials/auto_scheduler/tune_matmul_x86.py +++ b/tutorials/auto_scheduler/tune_matmul_x86.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. """ -Using the template-free auto-scheduler on CPU +Auto-scheduling a Subgraph for CPU ============================================= **Author**: `Lianmin Zheng `_, \ `Chengfan Jia `_ @@ -74,7 +74,7 @@ def matmul_add(N, L, M, dtype): # good value for the search to converge. You can do more trials according to your time budget. # * In addition, we use `RecordToFile` to dump measurement records into a file `matmul.json`. # The measurement records can be used to query the history best, resume the search, -# or do more analysis later. +# and do more analyses later. # * see :any:`auto_schedule.TuningOptions`: for more parameters tune_option = auto_scheduler.TuningOptions( @@ -117,10 +117,11 @@ def matmul_add(N, L, M, dtype): # Using the record file # ^^^^^^^^^^^^^^^^^^^^^ # During the search, all measuremnt records are dumpped into the record -# file "matmul.json". The measurement records can be used to resume the -# search, re-apply search results and perform other analyses. -# -# Here we show an example where we load the best schedule from a file, +# file "matmul.json". The measurement records can be used to re-apply search results, +# resume the search, and perform other analyses. + +###################################################################### +# Here is an example where we load the best schedule from a file, # print the equivalent python schedule API, and build the binary again. # Load the measuremnt record for the best schedule @@ -134,3 +135,39 @@ def matmul_add(N, L, M, dtype): # log file without reruning the search again. sch, args = task.compute_dag.apply_steps_from_state(inp.state) func = tvm.build(sch, args) + +###################################################################### +# A more complicated example is to resume the search. +# In this case, we need to create the search policy and cost model by ourselves +# and resume the status of search policy and cost model with the log file. +# In the example below we resume the status and do more 5 trials. + + +def resume_search(task, log_file): + cost_model = auto_scheduler.XGBModel() + cost_model.update_from_file(log_file) + search_policy = auto_scheduler.SketchPolicy( + task, cost_model, init_search_callbacks=[auto_scheduler.PreloadMeasuredStates(log_file)] + ) + tune_option = auto_scheduler.TuningOptions( + num_measure_trials=5, measure_callbacks=[auto_scheduler.RecordToFile(log_file)] + ) + sch, args = auto_scheduler.auto_schedule(task, search_policy, tuning_options=tune_option) + + +# resume_search(task, "matmul.json") + +###################################################################### +# .. note:: +# We cannot run the line above because of the conflict between +# python's multiprocessing and tvm's thread pool. +# After running a tvm generated binary (L112), the python's multiprocessing +# library will hang forever. +# You have to make sure that you don't run any tvm generated binaries before +# calling ansor's search. To run the L156 above, you should comment out L112-114. +# +# You should be careful about this problem in your applications. +# There are other workarounds for this problem. +# For example, you can start a new thread/process (with the builtin python library +# threading or multiprocessing) and run the tvm binaries in the new thread/process. +# This provides an isolation and avoids the conflict in the main thread/process. From 28bccefa2cbe0a5237d101c0c6fb26f460042ddc Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 16 Sep 2020 13:22:47 -0700 Subject: [PATCH 8/8] fix lint --- python/tvm/auto_scheduler/auto_schedule.py | 2 +- tutorials/auto_scheduler/tune_matmul_x86.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/tvm/auto_scheduler/auto_schedule.py b/python/tvm/auto_scheduler/auto_schedule.py index 80510d355d21..eae8b2527b6e 100644 --- a/python/tvm/auto_scheduler/auto_schedule.py +++ b/python/tvm/auto_scheduler/auto_schedule.py @@ -112,7 +112,7 @@ class TuningOptions(Object): measure_callbacks: Optional[List[MeasureCallback]] Callback functions called after each measurement. Candidates: - - auto_scheduler.RecordToFile + - auto_scheduler.RecordToFile """ def __init__( diff --git a/tutorials/auto_scheduler/tune_matmul_x86.py b/tutorials/auto_scheduler/tune_matmul_x86.py index 59e28a84c2f2..1a9af42510eb 100644 --- a/tutorials/auto_scheduler/tune_matmul_x86.py +++ b/tutorials/auto_scheduler/tune_matmul_x86.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. """ -Auto-scheduling a Subgraph for CPU +Auto-scheduling matrix multiplication for CPU ============================================= **Author**: `Lianmin Zheng `_, \ `Chengfan Jia `_