SIAnalytics · KKIEEK · Dec 19, 2022 · Nov 30, 2022 · Dec 1, 2022 · Dec 1, 2022
diff --git a/configs/_base_/scheduler/asynchb.py b/configs/_base_/scheduler/asynchb.py
@@ -1,4 +1,4 @@
-scheduler = dict(
+trial_scheduler = dict(
     type='AsyncHyperBandScheduler',
     time_attr='training_iteration',
     max_t=20,

diff --git a/configs/mmcls/mmcls_cifar_100_asynchb_nevergrad_pso.py b/configs/mmcls/mmcls_cifar_100_asynchb_nevergrad_pso.py
@@ -5,14 +5,11 @@
 ]
 
 space = {
+    'data.samples_per_gpu': {{_base_.batch_size}},
     'model': {{_base_.model}},
     'model.head.num_classes': 100,
     'optimizer': {{_base_.optimizer}},
-    'data.samples_per_gpu': {{_base_.batch_size}},
 }
 
 task = dict(type='MMClassification')
-metric = 'val/accuracy_top-1'
-mode = 'max'
-raise_on_failed_trial = False
-num_samples = 256
+tune_cfg = dict(num_samples=8, metric='val/accuracy_top-1', mode='max')
diff --git a/configs/mmdet/mmdet_asynchb_nevergrad_pso.py b/configs/mmdet/mmdet_asynchb_nevergrad_pso.py
@@ -5,13 +5,10 @@
 ]
 
 space = {
+    'data.samples_per_gpu': {{_base_.batch_size}},
     'model': {{_base_.model}},
     'optimizer': {{_base_.optimizer}},
-    'data.samples_per_gpu': {{_base_.batch_size}},
 }
 
 task = dict(type='MMDetection')
-metric = 'val/AP'
-mode = 'max'
-raise_on_failed_trial = False
-num_samples = 256
+tune_cfg = dict(num_samples=8, metric='val/AP', mode='max')
diff --git a/configs/mmseg/mmseg_asynchb_nevergrad_pso.py b/configs/mmseg/mmseg_asynchb_nevergrad_pso.py
@@ -5,15 +5,12 @@
 ]
 
 space = {
-    'model': {{_base_.model}},
-    'optimizer': {{_base_.optimizer}},
     'data.samples_per_gpu': {{_base_.batch_size}},
+    'model': {{_base_.model}},
     'model.decode_head.num_classes': 21,
     'model.auxiliary_head.num_classes': 21,
+    'optimizer': {{_base_.optimizer}},
 }
 
 task = dict(type='MMSegmentation')
-metric = 'val/mIoU'
-mode = 'max'
-raise_on_failed_trial = False
-num_samples = 256
+tune_cfg = dict(num_samples=8, metric='val/mIoU', mode='max')
diff --git a/requirements/optional.txt b/requirements/optional.txt
@@ -1,5 +1,5 @@
 bayesian-optimization==1.2.0
-flaml==0.9.7
+flaml==1.0.14
 hyperopt==0.2.5
 mlflow==1.23.1
 nevergrad==0.4.3.post7

diff --git a/requirements/runtime.txt b/requirements/runtime.txt
@@ -1,4 +1,5 @@
 pandas
 protobuf<=3.20
-ray[default]==1.9.1
+pyarrow
+ray[default]==2.1.0
 tabulate
diff --git a/setup.cfg b/setup.cfg
@@ -1,3 +1,7 @@
+[flake8]
+per-file-ignores =
+    siatune/mm/tasks/mm*.py: E251,E501
+
 [isort]
 line_length = 79
 multi_line_output = 0

diff --git a/siatune/apis/analysis.py b/siatune/apis/analysis.py
@@ -5,19 +5,19 @@
 from typing import Optional
 
 from mmcv.utils import Config, get_logger
-from ray import tune
+from ray.tune import ResultGrid
 
 from siatune.utils import ImmutableContainer, dump_cfg
 
 
-def log_analysis(analysis: tune.ExperimentAnalysis,
+def log_analysis(results: ResultGrid,
                  tune_config: Config,
                  task_config: Optional[Config] = None,
                  log_dir: Optional[str] = None) -> None:
     """Log the analysis of the experiment.
 
     Args:
-        analysis (tune.ExperimentAnalysis): The analysis of the experiment.
+        results (ResultGrid): Experiment results of `Tuner.fit()`.
         tune_config (Config): The tune config.
         task_config (Optional[Config]): The task config. Defaults to None.
         log_dir (Optional[str]): The log dir. Defaults to None.
@@ -33,10 +33,9 @@ def log_analysis(analysis: tune.ExperimentAnalysis,
     logger = get_logger(
         'siatune', log_file=osp.join(log_dir, f'{timestamp}.log'))
 
-    logger.info(
-        f'Best Hyperparam: \n'
-        f'{pformat(ImmutableContainer.decouple(analysis.best_config))}')
-    logger.info(
-        f'Best Results: \n'
-        f'{pformat(ImmutableContainer.decouple(analysis.best_result))}')
-    logger.info(f'Best Logdir: {analysis.best_logdir}')
+    result = results.get_best_result()
+    logger.info(f'Best Result: \n'
+                f'{pformat(ImmutableContainer.decouple(result))}')
+    logger.info(f'Best Hyperparam: \n'
+                f'{pformat(ImmutableContainer.decouple(result.config))}')
+    logger.info(f'Best Logdir: {result.log_dir}')
diff --git a/siatune/apis/tune.py b/siatune/apis/tune.py
@@ -1,16 +1,10 @@
 # Copyright (c) SI-Analytics. All rights reserved.
-from os import path as osp
 
-import mmcv
 import ray
 from mmcv.utils import Config
 
 from siatune.mm.tasks import BaseTask
-from siatune.ray.callbacks import build_callback
-from siatune.ray.schedulers import build_scheduler
-from siatune.ray.searchers import build_searcher
-from siatune.ray.spaces import build_space
-from siatune.ray.stoppers import build_stopper
+from siatune.ray import Tuner
 
 
 def tune(task_processor: BaseTask, tune_config: Config,
@@ -29,51 +23,5 @@ def tune(task_processor: BaseTask, tune_config: Config,
     trainable_cfg = tune_config.get('trainable', dict())
     trainable = task_processor.create_trainable(**trainable_cfg)
 
-    assert hasattr(tune_config, 'metric')
-    assert hasattr(tune_config, 'mode') and tune_config.mode in ['min', 'max']
-
-    tune_artifact_dir = osp.join(tune_config.work_dir, 'artifact')
-    mmcv.mkdir_or_exist(tune_artifact_dir)
-
-    stopper = tune_config.get('stop', None)
-    if stopper is not None:
-        stopper = build_stopper(stopper)
-
-    space = tune_config.get('space', None)
-    if space is not None:
-        space = build_space(space)
-
-    resources_per_trial = None
-    if not hasattr(trainable, 'default_resource_request'):
-        resources_per_trial = dict(
-            gpu=task_processor.num_workers *
-            task_processor.num_gpus_per_worker,
-            cpu=task_processor.num_workers *
-            task_processor.num_cpus_per_worker)
-
-    searcher = tune_config.get('searcher', None)
-    if searcher is not None:
-        searcher = build_searcher(searcher)
-
-    scheduler = tune_config.get('scheduler', None)
-    if scheduler is not None:
-        scheduler = build_scheduler(scheduler)
-
-    callbacks = tune_config.get('callbacks', None)
-    if callbacks is not None:
-        callbacks = [build_callback(callback) for callback in callbacks]
-
-    return ray.tune.run(
-        trainable,
-        name=exp_name,
-        metric=tune_config.metric,
-        mode=tune_config.mode,
-        stop=stopper,
-        config=space,
-        resources_per_trial=resources_per_trial,
-        num_samples=tune_config.get('num_samples', -1),
-        local_dir=tune_artifact_dir,
-        search_alg=searcher,
-        scheduler=scheduler,
-        raise_on_failed_trial=tune_config.get('raise_on_failed_trial', False),
-        callbacks=callbacks)
+    tuner = Tuner.from_cfg(tune_config, trainable)
+    return tuner.fit()
diff --git a/siatune/mm/context/rewriters/dump.py b/siatune/mm/context/rewriters/dump.py
@@ -3,7 +3,7 @@
 from os import path as osp
 from typing import Dict
 
-import ray
+from ray.air import session
 
 from siatune.utils import dump_cfg
 from .base import BaseRewriter
@@ -46,7 +46,7 @@ def __call__(self, context: Dict) -> Dict:
             Dict: The context after rewriting.
         """
         cfg = context.pop(self.key)
-        trial_id = ray.tune.get_trial_id()
+        trial_id = session.get_trial_id()
         tmp_path = self.get_temporary_path(f'{trial_id}.py')
         setattr(context.get('args'), self.arg_name, tmp_path)
         dump_cfg(cfg, tmp_path)

diff --git a/siatune/mm/context/rewriters/path.py b/siatune/mm/context/rewriters/path.py
@@ -1,7 +1,7 @@
 # Copyright (c) SI-Analytics. All rights reserved.
 from os import path as osp
 
-import ray
+from ray.air import session
 
 from .base import BaseRewriter
 from .builder import REWRITERS
@@ -31,5 +31,5 @@ def __call__(self, context: dict) -> dict:
         """
         value = getattr(context['args'], self.arg_name)
         setattr(context['args'], self.arg_name,
-                osp.join(value, ray.tune.get_trial_id()))
+                osp.join(value, session.get_trial_id()))
         return context
diff --git a/siatune/mm/hooks/checkpoint.py b/siatune/mm/hooks/checkpoint.py
@@ -4,13 +4,13 @@
 from typing import Optional
 
 import mmcv
+import ray.tune as tune
 import torch
 from mmcv.parallel import is_module_wrapper
 from mmcv.runner import HOOKS, BaseRunner
 from mmcv.runner.checkpoint import get_state_dict, weights_to_cpu
 from mmcv.runner.dist_utils import master_only
 from mmcv.runner.hooks import CheckpointHook as _CheckpointHook
-from ray.tune.integration.torch import distributed_checkpoint_dir
 from torch.optim import Optimizer
 
 
@@ -100,9 +100,10 @@ def _save_checkpoint(self, runner: BaseRunner) -> None:
             for name, optim in optimizer.items():
                 checkpoint['optimizer'][name] = optim.state_dict()
 
-        with distributed_checkpoint_dir(
-                step=(runner.epoch + 1) //
-                self.interval if self.by_epoch else (runner.iter + 1) //
-                self.interval) as checkpoint_dir:
+        step = (runner.epoch + 1) // self.interval
+        if not self.by_epoch:
+            step //= runner.iter + 1
+
+        with tune.checkpoint_dir(step=step) as checkpoint_dir:
             path = os.path.join(checkpoint_dir, 'ray_ckpt.pth')
             torch.save(checkpoint, path)
diff --git a/siatune/mm/hooks/reporter.py b/siatune/mm/hooks/reporter.py
@@ -1,8 +1,8 @@
 # Copyright (c) SI-Analytics. All rights reserved.
-import ray
 from mmcv.runner import HOOKS, BaseRunner
 from mmcv.runner.dist_utils import get_dist_info
 from mmcv.runner.hooks.logger import LoggerHook
+from ray.air import session
 from torch import distributed as dist
 
 
@@ -90,4 +90,4 @@ def log(self, runner: BaseRunner) -> None:
                 filter(lambda elem: self.filtering_key in elem, tags.keys())):
             return
         tags['global_step'] = self.get_iter(runner)
-        ray.tune.report(**tags)
+        session.report(tags)
diff --git a/siatune/mm/tasks/base.py b/siatune/mm/tasks/base.py
@@ -4,7 +4,7 @@
 from copy import deepcopy
 from typing import Any, Dict, List, Optional, Sequence
 
-import ray
+from ray.tune import Trainable
 
 from siatune.mm.context import ContextManager
 from siatune.utils import ImmutableContainer
@@ -29,9 +29,7 @@ class BaseTask(metaclass=ABCMeta):
             1. args (argparse.Namespace): The low level CLI arguments.
             2. searched_cfg (Dict):
                 The configuration searched by the algorithm.
-            3. checkpoint_dir (Optional[str]):
-                The directory of checkpoints that contains the states.
-        Inputs: searched_cfg (Dict), checkpoint_dir (Optional[str])
+        Inputs: searched_cfg (Dict)
         Outputs: None
     """
 
@@ -106,16 +104,14 @@ def parse_args(self, args: Sequence[str]) -> argparse.Namespace:
         """
         pass
 
-    def context_aware_run(self,
-                          searched_cfg: Dict,
-                          checkpoint_dir: Optional[str] = None,
-                          **kwargs) -> Any:
+    def context_aware_run(self, searched_cfg: Dict) -> Any:
         """Gather and refine the information received by users and Ray.tune to
         execute the objective task.
 
         Args:
             searched_cfg (Dict): The searched configuration.
             kwargs (**kwargs): The kwargs.
+
         Returns:
             Any: The result of the objective task.
         """
@@ -124,9 +120,7 @@ def context_aware_run(self,
         context = dict(
             args=deepcopy(self.args),
             searched_cfg=deepcopy(ImmutableContainer.decouple(searched_cfg)),
-            checkpoint_dir=checkpoint_dir,
         )
-        context.update(kwargs)
         return context_manager(self.run)(**context)
 
     @abstractmethod
@@ -140,7 +134,7 @@ def run(self, *, args: argparse.Namespace, **kwargs) -> None:
         pass
 
     @abstractmethod
-    def create_trainable(self, *args, **kwargs) -> ray.tune.Trainable:
+    def create_trainable(self, *args, **kwargs) -> Trainable:
         """Get ray trainable task.
 
         Args: