Skip to content

Commit

Permalink
Add autoscheduler support to tvmc (apache#7070)
Browse files Browse the repository at this point in the history
* Add autoscheduler support to tvmc

- Add an autoschedule module to tvmc
- Extract common tuning option between autotuner and autoscheduler
- Add testing

* Linting and small bug-fixing

* Addressing comments and refactoring

* Fix linting

* rebasing

* Addressing comments - 2

* Addressing comments -3

Change-Id: I207872757473210681d9db04bfdcd2c5e6deaa05

* Addressing comments - 4

Change-Id: I11f73c9b32e83c013cfb2224ccce06f60a128af7
  • Loading branch information
Giuseppe Rossini authored and Tushar Dey committed Jan 20, 2021
1 parent 877a81e commit f0f0c71
Show file tree
Hide file tree
Showing 4 changed files with 353 additions and 38 deletions.
259 changes: 227 additions & 32 deletions python/tvm/driver/tvmc/autotuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

from urllib.parse import urlparse

from tvm import autotvm
from tvm import autotvm, auto_scheduler
from tvm.autotvm.tuner import GATuner
from tvm.autotvm.tuner import GridSearchTuner
from tvm.autotvm.tuner import RandomTuner
Expand Down Expand Up @@ -116,12 +116,6 @@ def add_tune_parser(subparsers):
default=1000,
help="the maximum number of tuning trials to perform",
)
parser.add_argument(
"--tuner",
choices=["ga", "gridsearch", "random", "xgb", "xgb_knob", "xgb-rank"],
default="xgb",
help="type of tuner to use",
)
parser.add_argument(
"--tuning-records",
metavar="PATH",
Expand All @@ -133,6 +127,85 @@ def add_tune_parser(subparsers):
default=None,
help="change the data layout of the whole graph",
)
parser.add_argument(
"--enable-autoscheduler",
help="enable tuning the graph through the autoscheduler",
action="store_true",
)

auto_scheduler_group = parser.add_argument_group(
"Autoscheduler options",
"Autoscheduler options, used when --enabled-auto-scheduler is provided",
)

auto_scheduler_group.add_argument(
"--cache-line-bytes",
type=int,
default=64,
help="the size of cache line in bytes",
)
auto_scheduler_group.add_argument(
"--num-cores",
type=int,
default=4,
help="the number of device cores",
)
auto_scheduler_group.add_argument(
"--vector-unit-bytes",
type=int,
default=16,
help="the width of vector units in bytes",
)
auto_scheduler_group.add_argument(
"--max-shared-memory-per-block",
type=int,
default=0,
help="the max shared memory per block in bytes",
)
auto_scheduler_group.add_argument(
"--max-local-memory-per-block",
type=int,
default=0,
help="the max local memory per block in bytes",
)
auto_scheduler_group.add_argument(
"--max-threads-per-block",
type=int,
default=0,
help="the max number of threads per block",
)
auto_scheduler_group.add_argument(
"--max-vthread-extent",
type=int,
default=0,
help="the max vthread extent",
)
auto_scheduler_group.add_argument(
"--warp-size",
type=int,
default=0,
help="the thread numbers of a warp",
)
auto_scheduler_group.add_argument(
"--include-simple-tasks",
help="whether to extract simple tasks that do not include complicated ops",
action="store_true",
)
auto_scheduler_group.add_argument(
"--log-estimated-latency",
help="whether to log the estimated latency to the file after tuning a task",
action="store_true",
)
autotvm_group = parser.add_argument_group(
"autotvm options",
"autotvm options, used when the autoscheduler is not enabled",
)
autotvm_group.add_argument(
"--tuner",
choices=["ga", "gridsearch", "random", "xgb", "xgb_knob", "xgb-rank"],
default="xgb",
help="type of tuner to use when tuning with autotvm.",
)
# TODO (@leandron) This is a path to a physical file, but
# can be improved in future to add integration with a modelzoo
# or URL, for example.
Expand All @@ -147,7 +220,6 @@ def drive_tune(args):
args: argparse.Namespace
Arguments from command line parser.
"""

# extra arguments validation before importing the model, so that obvious errors
# are pointed in advance.
if args.rpc_tracker:
Expand All @@ -174,17 +246,9 @@ def drive_tune(args):
min_repeat_ms = 0 if target.keys[0] == "cpu" else 1000
logger.debug("Default --min-repeat-ms for this target is %s", min_repeat_ms)

tasks = get_tuning_tasks(
mod=mod,
params=params,
target=target,
target_host=args.target_host,
alter_layout=args.desired_layout,
)

if args.rpc_tracker:

runner = autotvm.RPCRunner(
runner_ctor = auto_scheduler.RPCRunner if args.enable_autoscheduler else autotvm.RPCRunner
runner = runner_ctor(
key=args.rpc_key,
host=rpc_hostname,
port=rpc_port,
Expand All @@ -196,29 +260,75 @@ def drive_tune(args):
)
else:
logger.info("starting localhost tuning")
runner = autotvm.LocalRunner(
runner_ctor = (
auto_scheduler.LocalRunner if args.enable_autoscheduler else autotvm.LocalRunner
)
runner = runner_ctor(
number=args.number,
repeat=args.repeat,
timeout=args.timeout,
min_repeat_ms=min_repeat_ms,
)

tuning_option = {
"tuner": args.tuner,
"trials": args.trials,
"early_stopping": args.early_stopping,
"measure_option": autotvm.measure_option(
builder=autotvm.LocalBuilder(build_func="default"), runner=runner
),
"tuning_records": args.tuning_records,
}
logger.debug(" tuning options: %s", tuning_option)
if args.enable_autoscheduler:
# Specify hardware parameters
hardware_params = auto_scheduler.HardwareParams(
args.num_cores,
args.vector_unit_bytes,
args.cache_line_bytes,
args.max_shared_memory_per_block,
args.max_local_memory_per_block,
args.max_threads_per_block,
args.max_vthread_extent,
args.warp_size,
)
tasks, weights = autoscheduler_get_tuning_tasks(
mod=mod,
params=params,
target=target,
target_host=args.target_host,
alter_layout=args.desired_layout,
hardware_params=hardware_params,
include_simple_tasks=args.include_simple_tasks,
)

tune_tasks(tasks, args.output, **tuning_option)
# Create the autoscheduler tuning options
tuning_options = auto_scheduler.TuningOptions(
num_measure_trials=args.trials,
measure_callbacks=[auto_scheduler.RecordToFile(args.output)],
runner=runner,
early_stopping=args.early_stopping,
)

# Schedule the tasks (i.e., produce a schedule for each task)
schedule_tasks(
tasks, weights, tuning_options, args.tuning_records, args.log_estimated_latency
)
else:
tasks = autotvm_get_tuning_tasks(
mod=mod,
params=params,
target=target,
target_host=args.target_host,
alter_layout=args.desired_layout,
)

def get_tuning_tasks(mod, params, target, target_host=None, alter_layout=None):
"""Get the tuning tasks for a given relay module.
tuning_option = {
"tuner": args.tuner,
"trials": args.trials,
"early_stopping": args.early_stopping,
"measure_option": autotvm.measure_option(
builder=autotvm.LocalBuilder(build_func="default"), runner=runner
),
"tuning_records": args.tuning_records,
}
logger.debug(" tuning options: %s", tuning_option)

tune_tasks(tasks, args.output, **tuning_option)


def autotvm_get_tuning_tasks(mod, params, target, target_host=None, alter_layout=None):
"""Get the autotvm tuning tasks for a given relay module.
Parameters
----------
Expand Down Expand Up @@ -253,6 +363,91 @@ def get_tuning_tasks(mod, params, target, target_host=None, alter_layout=None):
return tasks


def autoscheduler_get_tuning_tasks(
mod,
params,
target,
target_host=None,
alter_layout=None,
hardware_params=None,
include_simple_tasks=False,
):
"""Get the autoscheduler tuning tasks for a given relay module.
Parameters
----------
mod : tvm.relay.Module
The relay module from which to extract tuning tasks.
params : dict
The params for the relay module.
target : tvm.target.Target
The compilation target.
target_host : str, optional
The compilation target for the host.
alter_layout : str, optional
The layout to convert the graph to. Note, the convert layout
pass doesn't currently guarantee the whole of the graph will
be converted to the chosen layout.
hardware_params : Optional[HardwareParams]
Hardware parameters used for the search tasks
Returns
-------
tasks : list of autotvm.Tasks
list of tasks to be tuned
weights : List[int]
the weight (i.e. the number of appearance) of extracted tasks
"""
if alter_layout:
mod = common.convert_graph_layout(mod, alter_layout)

# Extract the tasks
tasks, task_weights = auto_scheduler.extract_tasks(
mod["main"],
params,
target=target,
target_host=target_host,
hardware_params=hardware_params,
include_simple_tasks=include_simple_tasks,
)

return tasks, task_weights


def schedule_tasks(
tasks, task_weights, tuning_options, tuning_records=None, log_estimated_latency=False
):
"""Generate the schedules for the different tasks (i.e., subgraphs) contained in the module.
Store the schedules in a json file that will be used later by the compiler.
Parameters
----------
tasks : list
A list of auto_scheduler.SearchTask to tune.
task_weights : list
The weight (i.e. the number of appearance) of extracted tasks
tuning_options: dict
The options of tuning
tuning_records : str, optional
The json file used to preload the autoscheduler
"""
if not log_estimated_latency:
callbacks = [auto_scheduler.task_scheduler.PrintTableInfo()]
else:
callbacks = [
auto_scheduler.task_scheduler.PrintTableInfo(),
auto_scheduler.task_scheduler.LogEstimatedLatency(("total_latency.tsv")),
]

# Create the scheduler
tuner = auto_scheduler.TaskScheduler(
tasks, task_weights, load_log_file=tuning_records, callbacks=callbacks
)

# Tune the tasks
tuner.tune(tuning_options)


def tune_tasks(
tasks,
log_file,
Expand Down
29 changes: 24 additions & 5 deletions python/tvm/driver/tvmc/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from pathlib import Path

import tvm
from tvm import autotvm
from tvm import autotvm, auto_scheduler
from tvm import relay
from tvm.contrib import cc
from tvm.contrib import utils
Expand Down Expand Up @@ -182,10 +182,29 @@ def compile_model(

if tuning_records and os.path.exists(tuning_records):
logger.debug("tuning records file provided: %s", tuning_records)
with autotvm.apply_history_best(tuning_records):
with tvm.transform.PassContext(opt_level=3):
logger.debug("building relay graph with tuning records")
graph_module = relay.build(mod, tvm_target, params=params, target_host=target_host)

use_autoscheduler = True
try:
auto_scheduler.load_records(tuning_records)
except tvm._ffi.base.TVMError:
use_autoscheduler = False

if use_autoscheduler:
with auto_scheduler.ApplyHistoryBest(tuning_records):
with tvm.transform.PassContext(
opt_level=3, config={"relay.backend.use_auto_scheduler": True}
):
logger.debug("building relay graph with autoscheduler")
graph_module = relay.build(
mod, target=target, params=params, target_host=target_host
)
else:
with autotvm.apply_history_best(tuning_records):
with tvm.transform.PassContext(opt_level=3):
logger.debug("building relay graph with tuning records")
graph_module = relay.build(
mod, tvm_target, params=params, target_host=target_host
)
else:
with tvm.transform.PassContext(opt_level=3):
logger.debug("building relay graph (no tuning records provided)")
Expand Down
Loading

0 comments on commit f0f0c71

Please sign in to comment.