From 5672ebd738212841d47b7308ee892937024dfaa7 Mon Sep 17 00:00:00 2001 From: Andrei Tcibin Date: Tue, 12 Sep 2023 13:04:16 +0300 Subject: [PATCH] Add support for insufficient instance capacity mgmt to gcp (#3364) --- .../pipe-common/pipeline/autoscaling/awsprovider.py | 7 ++++--- .../pipeline/autoscaling/azureprovider.py | 5 +++-- .../pipeline/autoscaling/cloudprovider.py | 5 ++++- .../pipe-common/pipeline/autoscaling/gcpprovider.py | 12 +++++++++--- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/workflows/pipe-common/pipeline/autoscaling/awsprovider.py b/workflows/pipe-common/pipeline/autoscaling/awsprovider.py index a10117e7e9..461c059792 100644 --- a/workflows/pipe-common/pipeline/autoscaling/awsprovider.py +++ b/workflows/pipe-common/pipeline/autoscaling/awsprovider.py @@ -27,7 +27,8 @@ from botocore.exceptions import ClientError -from .cloudprovider import AbstractInstanceProvider, LIMIT_EXCEEDED_ERROR_MASSAGE, LIMIT_EXCEEDED_EXIT_CODE +from .cloudprovider import AbstractInstanceProvider, \ + LIMIT_EXCEEDED_ERROR_MESSAGE, LIMIT_EXCEEDED_EXIT_CODE from pipeline import TaskStatus from . import utils @@ -261,7 +262,7 @@ def __run_on_demand_instance(self, ins_img, ins_key, ins_type, ins_hdd, kms_ency ) except ClientError as client_error: if 'InstanceLimitExceeded' in client_error.message: - utils.pipe_log_warn(LIMIT_EXCEEDED_ERROR_MASSAGE) + utils.pipe_log_warn(LIMIT_EXCEEDED_ERROR_MESSAGE) sys.exit(LIMIT_EXCEEDED_EXIT_CODE) else: raise client_error @@ -443,7 +444,7 @@ def __find_spot_instance(self, bid_price, run_id, pool_id, ins_img, ins_type, in except ClientError as client_error: if 'Max spot instance count exceeded' in client_error.message or \ 'InstanceLimitExceeded' in client_error.message: - utils.pipe_log_warn(LIMIT_EXCEEDED_ERROR_MASSAGE) + utils.pipe_log_warn(LIMIT_EXCEEDED_ERROR_MESSAGE) sys.exit(LIMIT_EXCEEDED_EXIT_CODE) else: raise client_error diff --git a/workflows/pipe-common/pipeline/autoscaling/azureprovider.py b/workflows/pipe-common/pipeline/autoscaling/azureprovider.py index 6e431d6e87..6f2ba35410 100644 --- a/workflows/pipe-common/pipeline/autoscaling/azureprovider.py +++ b/workflows/pipe-common/pipeline/autoscaling/azureprovider.py @@ -26,7 +26,8 @@ from azure.mgmt.network import NetworkManagementClient from msrestazure.azure_exceptions import CloudError -from pipeline.autoscaling.cloudprovider import AbstractInstanceProvider, LIMIT_EXCEEDED_ERROR_MASSAGE, LIMIT_EXCEEDED_EXIT_CODE +from pipeline.autoscaling.cloudprovider import AbstractInstanceProvider, \ + LIMIT_EXCEEDED_ERROR_MESSAGE, LIMIT_EXCEEDED_EXIT_CODE from pipeline.autoscaling import utils @@ -403,7 +404,7 @@ def __create_node_resource(self, service, instance_name, node_parameters): self.__delete_all_by_run_id(node_parameters['tags']['Name']) error_message = client_error.__str__() if 'OperationNotAllowed' in error_message or 'ResourceQuotaExceeded' in error_message: - utils.pipe_log_warn(LIMIT_EXCEEDED_ERROR_MASSAGE) + utils.pipe_log_warn(LIMIT_EXCEEDED_ERROR_MESSAGE) sys.exit(LIMIT_EXCEEDED_EXIT_CODE) else: raise client_error diff --git a/workflows/pipe-common/pipeline/autoscaling/cloudprovider.py b/workflows/pipe-common/pipeline/autoscaling/cloudprovider.py index 55413c0e94..086ab6f9b8 100644 --- a/workflows/pipe-common/pipeline/autoscaling/cloudprovider.py +++ b/workflows/pipe-common/pipeline/autoscaling/cloudprovider.py @@ -12,8 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -LIMIT_EXCEEDED_ERROR_MASSAGE = 'Instance limit exceeded. A new one will be launched as soon as free space will be available.' LIMIT_EXCEEDED_EXIT_CODE = 6 +LIMIT_EXCEEDED_ERROR_MESSAGE = 'Instance limit exceeded. A new one will be launched as soon as free space will be available.' + +INSUFFICIENT_CAPACITY_EXIT_CODE = 7 +INSUFFICIENT_CAPACITY_ERROR_MESSAGE = 'Insufficient instance capacity.' class AbstractInstanceProvider(object): diff --git a/workflows/pipe-common/pipeline/autoscaling/gcpprovider.py b/workflows/pipe-common/pipeline/autoscaling/gcpprovider.py index 37f40bdfc2..21dcd8e465 100644 --- a/workflows/pipe-common/pipeline/autoscaling/gcpprovider.py +++ b/workflows/pipe-common/pipeline/autoscaling/gcpprovider.py @@ -18,7 +18,9 @@ import time import uuid -from cloudprovider import AbstractInstanceProvider, LIMIT_EXCEEDED_ERROR_MASSAGE, LIMIT_EXCEEDED_EXIT_CODE +from cloudprovider import AbstractInstanceProvider, \ + LIMIT_EXCEEDED_ERROR_MESSAGE, LIMIT_EXCEEDED_EXIT_CODE, \ + INSUFFICIENT_CAPACITY_ERROR_MESSAGE, INSUFFICIENT_CAPACITY_EXIT_CODE from random import randint from time import sleep @@ -122,9 +124,13 @@ def run_instance(self, is_spot, bid_price, ins_type, ins_hdd, ins_img, ins_platf body=body).execute() self.__wait_for_operation(response['name']) except Exception as client_error: - if 'quota' in client_error.__str__().lower(): - utils.pipe_log_warn(LIMIT_EXCEEDED_ERROR_MASSAGE) + err_msg = client_error.__str__().lower() + if 'quota' in err_msg: + utils.pipe_log_warn(LIMIT_EXCEEDED_ERROR_MESSAGE) sys.exit(LIMIT_EXCEEDED_EXIT_CODE) + elif 'instance is currently unavailable' in err_msg: + utils.pipe_log_warn(INSUFFICIENT_CAPACITY_ERROR_MESSAGE) + sys.exit(INSUFFICIENT_CAPACITY_EXIT_CODE) else: raise client_error