Delete online autotuner

ezyang · ezyang · commit f863e3d6ed9d · 2022-08-26T10:57:04.000-07:00
Signed-off-by: Edward Z. Yang <ezyangfb.com> ghstack-source-id: 9188a92 Pull Request resolved: #1042
diff --git a/Makefile b/Makefile
@@ -124,12 +124,6 @@ offline-autotune-gpu: develop
 	python autotune.py --nvfuser
 	python benchmarks/torchbench.py --nvfuser -d cuda --offline-autotune -n100
 
-online-autotune-cpu: develop
-	python benchmarks/torchbench.py --online-autotune -n50
-
-online-autotune-gpu: develop
-	python benchmarks/torchbench.py --nvfuser -d cuda --online-autotune -n100
-
 fixed1-gpu: develop
 	python benchmarks/torchbench.py --nvfuser -d cuda --speedup-fixed1 -n100
 
diff --git a/benchmarks/common.py b/benchmarks/common.py
@@ -29,7 +29,6 @@
 from torchdynamo.optimizations.inference import fixed_strategy1
 from torchdynamo.optimizations.inference import fixed_strategy2
 from torchdynamo.optimizations.inference import offline_autotuner
-from torchdynamo.optimizations.inference import online_autotuner
 from torchdynamo.optimizations.log_args import conv_args_analysis
 from torchdynamo.profiler import Profiler
 from torchdynamo.profiler import fx_insert_profiling
@@ -1284,9 +1283,6 @@ def parse_args():
     group.add_argument(
         "--coverage", action="store_true", help="(default) " + help(coverage_experiment)
     )
-    group.add_argument(
-        "--online-autotune", action="store_true", help=help(speedup_experiment)
-    )
     group.add_argument(
         "--offline-autotune", action="store_true", help=help(speedup_experiment)
     )
@@ -1560,10 +1556,6 @@ def main(runner, original_dir=None):
         optimize_ctx = torchdynamo.optimize("inductor", nopython=args.nopython)
         experiment = speedup_experiment
         output_filename = "inductor.csv"
-    elif args.online_autotune:
-        optimize_ctx = torchdynamo.optimize(online_autotuner, nopython=args.nopython)
-        experiment = speedup_experiment
-        output_filename = "speedups.csv"
     elif args.offline_autotune:
         optimize_ctx = torchdynamo.optimize(offline_autotuner, nopython=args.nopython)
         experiment = speedup_experiment
diff --git a/torchdynamo/optimizations/__init__.py b/torchdynamo/optimizations/__init__.py
@@ -1,8 +1,7 @@
 from .backends import BACKENDS
 from .inference import offline_autotuner
-from .inference import online_autotuner
 from .training import create_aot_backends
 
 create_aot_backends()
 
-__all__ = ["online_autotuner", "offline_autotuner", "BACKENDS"]
+__all__ = ["offline_autotuner", "BACKENDS"]
diff --git a/torchdynamo/optimizations/inference.py b/torchdynamo/optimizations/inference.py
@@ -9,7 +9,6 @@
 import time
 from collections import defaultdict
 
-import numpy as np
 import torch
 
 from .. import config
@@ -19,7 +18,6 @@
 from ..utils import clone_inputs
 from ..utils import count_calls
 from ..utils import counters
-from ..utils import timed
 from .backends import BACKENDS
 from .normalize import long_name
 from .normalize import normalize_ir
@@ -257,55 +255,3 @@ def argmin(perf):
                 # small bias torwards using eager since it is more robust
                 best_sec *= 0.99
     return best
-
-
-class OnlineAutotuner(TorchScriptStrategy):
-    repeat = 15
-
-    def candidate(self):
-        if check_requires_grad(self.gm, self.original_example_inputs):
-            warning("not optimizing requires_grad=True")
-            return None
-        self.scripted = self.scripted.eval()
-        example_inputs_copy = self.example_inputs
-        models = [("eager", self.gm.forward)]
-        for name in self.select_backends():
-            try:
-                compiled_model = BACKENDS[name](self.scripted, example_inputs_copy)
-                if compiled_model is None:
-                    continue
-                self.restore()
-                result = compiled_model(*self.example_inputs)
-                assert same(result, self.correct)
-                models.append((name, compiled_model))
-            except AssertionError:
-                logging.exception(f"incorrect while running {name}")
-            except Exception:
-                logging.exception(f"error while running {name}")
-
-        timings = np.zeros((self.repeat, len(models)), np.float64)
-        for rep in range(timings.shape[0]):
-            # interleave the runs to handle frequency scaling and load changes
-            for i, (n, m) in enumerate(models):
-                result, timings[rep, i] = timed(m, example_inputs_copy)
-        median = np.median(timings, axis=0)
-        median[0] *= 0.99  # a bias towards eager
-        best = int(np.argmin(median))
-        counters["backend"][models[best][0]] += 1
-        return models[best][1]
-
-    def select_backends(self):
-        if check_is_cuda(self.gm, self.original_example_inputs):
-            backend_names = [
-                "ts",
-                "cudagraphs_ts_ofi",
-                "nnc_ofi",
-                "tensorrt",
-            ]
-        else:
-            backend_names = ["ofi", "onnxrt_cpu"]
-        return backend_names
-
-
-online_autotuner = OnlineAutotuner.compile_fn
-BACKENDS["autotune"] = online_autotuner