ray-project · edoakes · Jan 9, 2024 · Jan 8, 2024 · Jan 8, 2024 · Jan 8, 2024
diff --git a/python/ray/serve/_private/autoscaling_policy.py b/python/ray/serve/_private/autoscaling_policy.py
@@ -0,0 +1,112 @@
+import logging
+from typing import List, Optional
+
+from ray.serve._private.common import TargetCapacityDirection
+from ray.serve._private.constants import SERVE_LOGGER_NAME
+from ray.serve._private.utils import get_capacity_adjusted_num_replicas
+from ray.serve.config import AutoscalingConfig
+
+logger = logging.getLogger(SERVE_LOGGER_NAME)
+
+
+class AutoscalingPolicyManager:
+    """Managing autoscaling policies and the lifecycle of the scaling function calls."""
+
+    def __init__(self, config: Optional[AutoscalingConfig]):
+        self.config = config
+        self.policy = None
+        self.policy_state = {}
+        self._create_policy()
+
+    def _create_policy(self):
+        """Creates an autoscaling policy based on the given config."""
+        if self.config:
+            self.policy = self.config.get_policy()
+
+    def should_autoscale(self) -> bool:
+        """Returns whether autoscaling should be performed."""
+        return self.policy is not None
+
+    def get_decision_num_replicas(
+        self,
+        curr_target_num_replicas: int,
+        current_num_ongoing_requests: List[float],
+        current_handle_queued_queries: float,
+        target_capacity: Optional[float] = None,
+        target_capacity_direction: Optional[TargetCapacityDirection] = None,
+        _skip_bound_check: bool = False,
+    ) -> Optional[int]:
+        """Interface with the autoscaling policy to get a decision to scale replicas.
+
+        After the decision number of replicas is returned by the policy, it is then
+        bounded by the bounds min and max adjusted by the target capacity and returned.
+        If `_skip_bound_check` is True, then the bounds are not applied.
+        """
+        capacity_adjusted_min_replicas = self.get_current_lower_bound(
+            target_capacity,
+            target_capacity_direction,
+        )
+        capacity_adjusted_max_replicas = get_capacity_adjusted_num_replicas(
+            self.config.max_replicas,
+            target_capacity,
+        )
+        decision_num_replicas = self.policy(
+            curr_target_num_replicas=curr_target_num_replicas,
+            current_num_ongoing_requests=current_num_ongoing_requests,
+            current_handle_queued_queries=current_handle_queued_queries,
+            config=self.config,
+            capacity_adjusted_min_replicas=capacity_adjusted_min_replicas,
+            capacity_adjusted_max_replicas=capacity_adjusted_max_replicas,
+            policy_state=self.policy_state,
+        )
+
+        if _skip_bound_check:
+            return decision_num_replicas
+
+        return self.apply_bounds(
+            curr_target_num_replicas=decision_num_replicas,
+            target_capacity=target_capacity,
+            target_capacity_direction=target_capacity_direction,
+        )
+
+    def get_current_lower_bound(
+        self,
+        target_capacity: Optional[float] = None,
+        target_capacity_direction: Optional[TargetCapacityDirection] = None,
+    ) -> int:
+        """Get the autoscaling lower bound, including target_capacity changes.
+
+        The autoscaler uses initial_replicas scaled by target_capacity only
+        if the target capacity direction is UP.
+        """
+
+        if self.config.initial_replicas is not None and (
+            target_capacity_direction == TargetCapacityDirection.UP
+        ):
+            return get_capacity_adjusted_num_replicas(
+                self.config.initial_replicas,
+                target_capacity,
+            )
+        else:
+            return get_capacity_adjusted_num_replicas(
+                self.config.min_replicas,
+                target_capacity,
+            )
+
+    def apply_bounds(
+        self,
+        curr_target_num_replicas: int,
+        target_capacity: Optional[float] = None,
+        target_capacity_direction: Optional[TargetCapacityDirection] = None,
+    ) -> int:
+        """Clips curr_target_num_replicas using the current bounds."""
+
+        upper_bound = get_capacity_adjusted_num_replicas(
+            self.config.max_replicas,
+            target_capacity,
+        )
+        lower_bound = self.get_current_lower_bound(
+            target_capacity,
+            target_capacity_direction,
+        )
+        return max(lower_bound, min(upper_bound, curr_target_num_replicas))
diff --git a/python/ray/serve/_private/constants.py b/python/ray/serve/_private/constants.py
@@ -269,4 +269,4 @@
 )
 
 # The default autoscaling policy to use if none is specified.
-DEFAULT_AUTOSCALING_POLICY = "ray.serve.autoscaling_policy:DefaultAutoscalingPolicy"
+DEFAULT_AUTOSCALING_POLICY = "ray.serve.autoscaling_policy:default_autoscaling_policy"
diff --git a/python/ray/serve/_private/deployment_info.py b/python/ray/serve/_private/deployment_info.py
@@ -1,10 +1,10 @@
 from typing import Any, Dict, Optional
 
 import ray
+from ray.serve._private.autoscaling_policy import AutoscalingPolicyManager
 from ray.serve._private.common import TargetCapacityDirection
 from ray.serve._private.config import DeploymentConfig, ReplicaConfig
 from ray.serve._private.constants import REPLICA_CONTROL_PLANE_CONCURRENCY_GROUP
-from ray.serve.autoscaling_policy import DefaultAutoscalingPolicy
 from ray.serve.generated.serve_pb2 import DeploymentInfo as DeploymentInfoProto
 from ray.serve.generated.serve_pb2 import (
     TargetCapacityDirection as TargetCapacityDirectionProto,
@@ -53,12 +53,9 @@ def __init__(
         self.target_capacity = target_capacity
         self.target_capacity_direction = target_capacity_direction
 
-        if deployment_config.autoscaling_config is not None:
-            self.autoscaling_policy = DefaultAutoscalingPolicy(
-                deployment_config.autoscaling_config
-            )
-        else:
-            self.autoscaling_policy = None
+        self.autoscaling_policy_manager = AutoscalingPolicyManager(
+            config=deployment_config.autoscaling_config
+        )
 
     def __getstate__(self) -> Dict[Any, Any]:
         clean_dict = self.__dict__.copy()

diff --git a/python/ray/serve/_private/deployment_state.py b/python/ray/serve/_private/deployment_state.py
@@ -18,6 +18,7 @@
 from ray.serve import metrics
 from ray.serve._private import default_impl
 from ray.serve._private.autoscaling_metrics import InMemoryMetricsStore
+from ray.serve._private.autoscaling_policy import AutoscalingPolicyManager
 from ray.serve._private.cluster_node_info_cache import ClusterNodeInfoCache
 from ray.serve._private.common import (
     DeploymentID,
@@ -1260,17 +1261,21 @@ def __init__(
 
         self._last_notified_running_replica_infos: List[RunningReplicaInfo] = []
 
+    @property
+    def autoscaling_policy_manager(self) -> AutoscalingPolicyManager:
+        return self._target_state.info.autoscaling_policy_manager
+
     def should_autoscale(self) -> bool:
         """
         Check if the deployment is under autoscaling
         """
-        return self._target_state.info.autoscaling_policy is not None
+        return self.autoscaling_policy_manager.should_autoscale()
 
     def get_autoscale_metric_lookback_period(self) -> float:
         """
         Return the autoscaling metrics look back period
         """
-        return self._target_state.info.autoscaling_policy.config.look_back_period_s
+        return self.autoscaling_policy_manager.config.look_back_period_s
 
     def get_checkpoint_data(self) -> DeploymentTargetState:
         """
@@ -1497,18 +1502,16 @@ def deploy(self, deployment_info: DeploymentInfo) -> bool:
             return False
 
         # Decide new target num_replicas.
-        autoscaling_policy = deployment_info.autoscaling_policy
-        if autoscaling_policy is not None:
-            if (
-                deployment_settings_changed
-                and autoscaling_policy.config.initial_replicas is not None
-            ):
+        autoscaling_policy_manager = deployment_info.autoscaling_policy_manager
+        if autoscaling_policy_manager.should_autoscale():
+            initial_replicas = autoscaling_policy_manager.config.initial_replicas
+            if deployment_settings_changed and initial_replicas is not None:
                 target_num_replicas = get_capacity_adjusted_num_replicas(
-                    autoscaling_policy.config.initial_replicas,
+                    initial_replicas,
                     deployment_info.target_capacity,
                 )
             else:
-                target_num_replicas = autoscaling_policy.apply_bounds(
+                target_num_replicas = autoscaling_policy_manager.apply_bounds(
                     self._target_state.target_num_replicas,
                     deployment_info.target_capacity,
                     deployment_info.target_capacity_direction,
@@ -1581,16 +1584,19 @@ def autoscale(self, current_handle_queued_queries: int) -> int:
             return
 
         current_num_ongoing_requests = self.get_replica_current_ongoing_requests()
-        autoscaling_policy = self._target_state.info.autoscaling_policy
-        decision_num_replicas = autoscaling_policy.get_decision_num_replicas(
+        autoscaling_policy_manager = self.autoscaling_policy_manager
+        decision_num_replicas = autoscaling_policy_manager.get_decision_num_replicas(
             curr_target_num_replicas=self._target_state.target_num_replicas,
             current_num_ongoing_requests=current_num_ongoing_requests,
             current_handle_queued_queries=current_handle_queued_queries,
             target_capacity=self._target_state.info.target_capacity,
             target_capacity_direction=self._target_state.info.target_capacity_direction,
         )
 
-        if decision_num_replicas == self._target_state.target_num_replicas:
+        if (
+            decision_num_replicas is None
+            or decision_num_replicas == self._target_state.target_num_replicas
+        ):
             return
 
         logger.info(
@@ -1639,20 +1645,17 @@ def _is_within_autoscaling_bounds(self) -> bool:
             states=[ReplicaState.RUNNING], version=target_version
         )
 
-        autoscaling_policy = self._target_state.info.autoscaling_policy
-        assert autoscaling_policy is not None
+        assert self.autoscaling_policy_manager is not None
 
-        lower_bound = autoscaling_policy.get_current_lower_bound(
-            self._target_state.info.target_capacity,
-            self._target_state.info.target_capacity_direction,
-        )
-        upper_bound = get_capacity_adjusted_num_replicas(
-            autoscaling_policy.config.max_replicas,
-            self._target_state.info.target_capacity,
+        return (
+            self.autoscaling_policy_manager.apply_bounds(
+                num_replicas_running_at_target_version,
+                self._target_state.info.target_capacity,
+                self._target_state.info.target_capacity_direction,
+            )
+            == num_replicas_running_at_target_version
         )
 
-        return lower_bound <= num_replicas_running_at_target_version <= upper_bound
-
     def delete(self) -> None:
         if not self._target_state.deleting:
             self._set_target_state_deleting()