@@ -698,6 +698,7 @@ def deploy(
698698 deploy_request_timeout : Optional [float ] = None ,
699699 autoscaling_target_cpu_utilization : Optional [int ] = None ,
700700 autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
701+ autoscaling_target_request_count_per_minute : Optional [int ] = None ,
701702 deployment_resource_pool : Optional [DeploymentResourcePool ] = None ,
702703 disable_container_logging : bool = False ,
703704 fast_tryout_enabled : bool = False ,
@@ -778,6 +779,8 @@ def deploy(
778779 autoscaling_target_accelerator_duty_cycle (int): Target Accelerator Duty
779780 Cycle. Must also set accelerator_type and accelerator_count if
780781 specified. A default value of 60 will be used if not specified.
782+ autoscaling_target_request_count_per_minute (int): Target request
783+ count per minute per instance.
781784 deployment_resource_pool (DeploymentResourcePool): Optional.
782785 Resource pool where the model will be deployed. All models that
783786 are deployed to the same DeploymentResourcePool will be hosted in
@@ -806,7 +809,6 @@ def deploy(
806809 multihost_gpu_node_count (int): Optional. The number of nodes per
807810 replica for multihost GPU deployments. Required for multihost GPU
808811 deployments.
809-
810812 """
811813 self ._sync_gca_resource_if_skipped ()
812814
@@ -843,6 +845,7 @@ def deploy(
843845 deploy_request_timeout = deploy_request_timeout ,
844846 autoscaling_target_cpu_utilization = autoscaling_target_cpu_utilization ,
845847 autoscaling_target_accelerator_duty_cycle = autoscaling_target_accelerator_duty_cycle ,
848+ autoscaling_target_request_count_per_minute = autoscaling_target_request_count_per_minute ,
846849 deployment_resource_pool = deployment_resource_pool ,
847850 disable_container_logging = disable_container_logging ,
848851 fast_tryout_enabled = fast_tryout_enabled ,
@@ -871,6 +874,7 @@ def _deploy(
871874 deploy_request_timeout : Optional [float ] = None ,
872875 autoscaling_target_cpu_utilization : Optional [int ] = None ,
873876 autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
877+ autoscaling_target_request_count_per_minute : Optional [int ] = None ,
874878 deployment_resource_pool : Optional [DeploymentResourcePool ] = None ,
875879 disable_container_logging : bool = False ,
876880 fast_tryout_enabled : bool = False ,
@@ -945,6 +949,8 @@ def _deploy(
945949 autoscaling_target_accelerator_duty_cycle (int): Target Accelerator Duty
946950 Cycle. Must also set accelerator_type and accelerator_count if
947951 specified. A default value of 60 will be used if not specified.
952+ autoscaling_target_request_count_per_minute (int): Target request
953+ count per minute per instance.
948954 deployment_resource_pool (DeploymentResourcePool): Optional.
949955 Resource pool where the model will be deployed. All models that
950956 are deployed to the same DeploymentResourcePool will be hosted in
@@ -999,6 +1005,7 @@ def _deploy(
9991005 deploy_request_timeout = deploy_request_timeout ,
10001006 autoscaling_target_cpu_utilization = autoscaling_target_cpu_utilization ,
10011007 autoscaling_target_accelerator_duty_cycle = autoscaling_target_accelerator_duty_cycle ,
1008+ autoscaling_target_request_count_per_minute = autoscaling_target_request_count_per_minute ,
10021009 deployment_resource_pool = deployment_resource_pool ,
10031010 disable_container_logging = disable_container_logging ,
10041011 fast_tryout_enabled = fast_tryout_enabled ,
@@ -1034,6 +1041,7 @@ def _deploy_call(
10341041 deploy_request_timeout : Optional [float ] = None ,
10351042 autoscaling_target_cpu_utilization : Optional [int ] = None ,
10361043 autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
1044+ autoscaling_target_request_count_per_minute : Optional [int ] = None ,
10371045 deployment_resource_pool : Optional [DeploymentResourcePool ] = None ,
10381046 disable_container_logging : bool = False ,
10391047 fast_tryout_enabled : bool = False ,
@@ -1115,6 +1123,8 @@ def _deploy_call(
11151123 Accelerator Duty Cycle. Must also set accelerator_type and
11161124 accelerator_count if specified. A default value of 60 will be used if
11171125 not specified.
1126+ autoscaling_target_request_count_per_minute (int): Optional. Target
1127+ request count per minute per instance.
11181128 deployment_resource_pool (DeploymentResourcePool): Optional.
11191129 Resource pool where the model will be deployed. All models that
11201130 are deployed to the same DeploymentResourcePool will be hosted in
@@ -1194,6 +1204,7 @@ def _deploy_call(
11941204 or accelerator_type
11951205 or accelerator_count
11961206 or autoscaling_target_accelerator_duty_cycle
1207+ or autoscaling_target_request_count_per_minute
11971208 or autoscaling_target_cpu_utilization
11981209 )
11991210
@@ -1206,9 +1217,11 @@ def _deploy_call(
12061217 if provided_custom_machine_spec and not use_dedicated_resources :
12071218 _LOGGER .info (
12081219 "Model does not support dedicated deployment resources. "
1209- "The machine_type, accelerator_type and accelerator_count,"
1210- "autoscaling_target_accelerator_duty_cycle,"
1211- "autoscaling_target_cpu_utilization parameters are ignored."
1220+ "The machine_type, accelerator_type and accelerator_count, "
1221+ "autoscaling_target_accelerator_duty_cycle, "
1222+ "autoscaling_target_cpu_utilization, "
1223+ "autoscaling_target_request_count_per_minute parameters "
1224+ "are ignored."
12121225 )
12131226
12141227 if use_dedicated_resources and not machine_type :
@@ -1250,6 +1263,20 @@ def _deploy_call(
12501263 [autoscaling_metric_spec ]
12511264 )
12521265
1266+ if autoscaling_target_request_count_per_minute :
1267+ autoscaling_metric_spec = (
1268+ gca_machine_resources_compat .AutoscalingMetricSpec (
1269+ metric_name = (
1270+ "aiplatform.googleapis.com/prediction/online/"
1271+ "request_count"
1272+ ),
1273+ target = autoscaling_target_request_count_per_minute ,
1274+ )
1275+ )
1276+ dedicated_resources .autoscaling_metric_specs .extend (
1277+ [autoscaling_metric_spec ]
1278+ )
1279+
12531280 dedicated_resources .machine_spec = machine_spec
12541281
12551282 # Checking if flag fast_tryout_enabled is set, only in v1beta1
@@ -1296,15 +1323,18 @@ def _deploy_call(
12961323 or accelerator_count
12971324 or autoscaling_target_accelerator_duty_cycle
12981325 or autoscaling_target_cpu_utilization
1326+ or autoscaling_target_request_count_per_minute
12991327 )
13001328
13011329 if provided_custom_machine_spec :
13021330 raise ValueError (
13031331 "Conflicting parameters in deployment request. "
1304- "The machine_type, accelerator_type and accelerator_count,"
1305- "autoscaling_target_accelerator_duty_cycle,"
1306- "autoscaling_target_cpu_utilization parameters may not be set "
1307- "when `deployment_resource_pool` is specified."
1332+ "The machine_type, accelerator_type and accelerator_count, "
1333+ "autoscaling_target_accelerator_duty_cycle, "
1334+ "autoscaling_target_cpu_utilization, "
1335+ "autoscaling_target_request_count_per_minute parameters "
1336+ "may not be set when `deployment_resource_pool` is "
1337+ "specified."
13081338 )
13091339
13101340 deployed_model .shared_resources = deployment_resource_pool .resource_name
@@ -1561,6 +1591,7 @@ def deploy(
15611591 deploy_request_timeout : Optional [float ] = None ,
15621592 autoscaling_target_cpu_utilization : Optional [int ] = None ,
15631593 autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
1594+ autoscaling_target_request_count_per_minute : Optional [int ] = None ,
15641595 deployment_resource_pool : Optional [DeploymentResourcePool ] = None ,
15651596 disable_container_logging : bool = False ,
15661597 fast_tryout_enabled : bool = False ,
@@ -1662,6 +1693,8 @@ def deploy(
16621693 Accelerator Duty Cycle. Must also set accelerator_type and
16631694 accelerator_count if specified. A default value of 60 will be used if
16641695 not specified.
1696+ autoscaling_target_request_count_per_minute (int): Optional. Target
1697+ request count per minute per instance.
16651698 deployment_resource_pool (DeploymentResourcePool): Optional.
16661699 Resource pool where the model will be deployed. All models that
16671700 are deployed to the same DeploymentResourcePool will be hosted in
@@ -1688,8 +1721,8 @@ def deploy(
16881721 rollout_options (RolloutOptions):
16891722 Optional. Options to configure a rolling deployment.
16901723 multihost_gpu_node_count (int):
1691- Optional. The number of nodes per replica for multihost GPU
1692- deployments. Required for multihost GPU deployments.
1724+ Optional. The number of nodes per replica for multihost GPU
1725+ deployments. Required for multihost GPU deployments.
16931726
16941727 Returns:
16951728 endpoint (Union[Endpoint, models.PrivateEndpoint]):
@@ -1744,6 +1777,7 @@ def deploy(
17441777 deploy_request_timeout = deploy_request_timeout ,
17451778 autoscaling_target_cpu_utilization = autoscaling_target_cpu_utilization ,
17461779 autoscaling_target_accelerator_duty_cycle = autoscaling_target_accelerator_duty_cycle ,
1780+ autoscaling_target_request_count_per_minute = autoscaling_target_request_count_per_minute ,
17471781 deployment_resource_pool = deployment_resource_pool ,
17481782 disable_container_logging = disable_container_logging ,
17491783 fast_tryout_enabled = fast_tryout_enabled ,
@@ -1781,6 +1815,7 @@ def _deploy(
17811815 deploy_request_timeout : Optional [float ] = None ,
17821816 autoscaling_target_cpu_utilization : Optional [int ] = None ,
17831817 autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
1818+ autoscaling_target_request_count_per_minute : Optional [int ] = None ,
17841819 deployment_resource_pool : Optional [DeploymentResourcePool ] = None ,
17851820 disable_container_logging : bool = False ,
17861821 fast_tryout_enabled : bool = False ,
@@ -1874,6 +1909,8 @@ def _deploy(
18741909 Accelerator Duty Cycle. Must also set accelerator_type and
18751910 accelerator_count if specified. A default value of 60 will be used if
18761911 not specified.
1912+ autoscaling_target_request_count_per_minute (int): Optional. Target
1913+ request count per minute per instance.
18771914 deployment_resource_pool (DeploymentResourcePool): Optional.
18781915 Resource pool where the model will be deployed. All models that
18791916 are deployed to the same DeploymentResourcePool will be hosted in
@@ -1901,7 +1938,6 @@ def _deploy(
19011938 multihost_gpu_node_count (int):
19021939 Optional. The number of nodes per replica for multihost GPU
19031940 deployments. Required for multihost GPU deployments.
1904-
19051941 Returns:
19061942 endpoint (Union[Endpoint, models.PrivateEndpoint]):
19071943 Endpoint with the deployed model.
@@ -1961,6 +1997,7 @@ def _deploy(
19611997 deploy_request_timeout = deploy_request_timeout ,
19621998 autoscaling_target_cpu_utilization = autoscaling_target_cpu_utilization ,
19631999 autoscaling_target_accelerator_duty_cycle = autoscaling_target_accelerator_duty_cycle ,
2000+ autoscaling_target_request_count_per_minute = autoscaling_target_request_count_per_minute ,
19642001 deployment_resource_pool = deployment_resource_pool ,
19652002 disable_container_logging = disable_container_logging ,
19662003 fast_tryout_enabled = fast_tryout_enabled ,
0 commit comments