From bd0c4a0d1a6a05e1608a631e2bf656546dd7fda1 Mon Sep 17 00:00:00 2001 From: neko12583 <1258375097@qq.com> Date: Mon, 16 Oct 2023 17:58:23 +0800 Subject: [PATCH] =?UTF-8?q?feature:=20=E5=90=8E=E5=8F=B0=E7=BB=84=E4=BB=B6?= =?UTF-8?q?=E6=9C=8D=E5=8A=A1=E6=94=AF=E6=8C=81=E9=85=8D=E7=BD=AE=E8=B6=85?= =?UTF-8?q?=E6=97=B6=E6=97=B6=E9=97=B4(closed=20#1696)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../collections/agent_new/configure_policy.py | 4 ++-- .../collections/agent_new/get_agent_status.py | 4 ++-- .../collections/agent_new/install.py | 7 +++--- .../agent_new/push_host_identifier.py | 4 ++-- apps/backend/components/collections/base.py | 17 ++++++++++++- apps/backend/components/collections/job.py | 4 ++-- apps/backend/components/collections/plugin.py | 3 +-- .../components/collections/agent_new/base.py | 24 +++++++++++++++++-- .../agent_new/test_get_agent_status.py | 2 +- .../agent_new/test_push_host_identifier.py | 2 +- apps/node_man/models.py | 2 ++ 11 files changed, 55 insertions(+), 18 deletions(-) diff --git a/apps/backend/components/collections/agent_new/configure_policy.py b/apps/backend/components/collections/agent_new/configure_policy.py index 3ff55f475..b76fecc86 100644 --- a/apps/backend/components/collections/agent_new/configure_policy.py +++ b/apps/backend/components/collections/agent_new/configure_policy.py @@ -11,7 +11,7 @@ from django.utils.translation import ugettext_lazy as _ -from apps.backend.api.constants import POLLING_INTERVAL, POLLING_TIMEOUT +from apps.backend.api.constants import POLLING_INTERVAL from apps.backend.components.collections.agent_new.base import AgentBaseService from apps.node_man import models from apps.node_man.handlers.security_group import get_security_group_factory @@ -56,7 +56,7 @@ def _schedule(self, data, parent_data, callback_data=None): self.finish_schedule() return True - elif polling_time + POLLING_INTERVAL > POLLING_TIMEOUT / 2: + elif polling_time + POLLING_INTERVAL > self.service_polling_timeout / 2: self.move_insts_to_failed(subscription_instance_ids, _("配置到Gse和Nginx的策略失败请联系节点管理维护人员")) self.finish_schedule() return False diff --git a/apps/backend/components/collections/agent_new/get_agent_status.py b/apps/backend/components/collections/agent_new/get_agent_status.py index b4722c55f..0cdea1ed4 100644 --- a/apps/backend/components/collections/agent_new/get_agent_status.py +++ b/apps/backend/components/collections/agent_new/get_agent_status.py @@ -13,7 +13,7 @@ from django.utils.translation import ugettext_lazy as _ -from apps.backend.api.constants import POLLING_INTERVAL, POLLING_TIMEOUT +from apps.backend.api.constants import POLLING_INTERVAL from apps.node_man import constants, models from pipeline.core.flow import Service, StaticIntervalGenerator @@ -141,7 +141,7 @@ def _schedule(self, data, parent_data, callback_data=None): return polling_time = data.get_one_of_outputs("polling_time") - if polling_time + POLLING_INTERVAL > POLLING_TIMEOUT: + if polling_time + POLLING_INTERVAL > self.service_polling_timeout: sub_inst_ids = [host_id__sub_inst_id_map[host_id] for host_id in host_ids_need_to_query] self.move_insts_to_failed(sub_inst_ids=sub_inst_ids, log_content=_("查询 GSE 超时")) self.finish_schedule() diff --git a/apps/backend/components/collections/agent_new/install.py b/apps/backend/components/collections/agent_new/install.py index 2ab99807e..de3ba9f72 100644 --- a/apps/backend/components/collections/agent_new/install.py +++ b/apps/backend/components/collections/agent_new/install.py @@ -26,7 +26,7 @@ from apps.backend.agent import solution_maker from apps.backend.agent.tools import InstallationTools -from apps.backend.api.constants import POLLING_INTERVAL, POLLING_TIMEOUT +from apps.backend.api.constants import POLLING_INTERVAL from apps.backend.constants import ( REDIS_AGENT_CONF_KEY_TPL, REDIS_INSTALL_CALLBACK_KEY_TPL, @@ -365,10 +365,11 @@ def _execute(self, data, parent_data, common_data: base.AgentCommonData): # 缓存 Agent 配置 pipeline.mset(cache_key__config_content_map) # 设置过期时间 + polling_timeout = self.service_polling_timeout for cache_key in cache_key__config_content_map: # 根据调度超时时间预估一个过期时间 # 由于此时还未执行「命令下发」动作,随机增量过期时长,避免缓存雪崩 - pipeline.expire(cache_key, POLLING_TIMEOUT + random.randint(POLLING_TIMEOUT, 2 * POLLING_TIMEOUT)) + pipeline.expire(cache_key, polling_timeout + random.randint(polling_timeout, 2 * polling_timeout)) pipeline.execute() remote_conn_helpers_gby_result_type = self.bulk_check_ssh(remote_conn_helpers=lan_windows_sub_inst) @@ -866,7 +867,7 @@ def _schedule(self, data, parent_data, callback_data=None): return True polling_time = data.get_one_of_outputs("polling_time") - if polling_time + POLLING_INTERVAL > POLLING_TIMEOUT: + if polling_time + POLLING_INTERVAL > self.service_polling_timeout: self.move_insts_to_failed(left_scheduling_sub_inst_ids, _("安装超时")) self.finish_schedule() data.outputs.polling_time = polling_time + POLLING_INTERVAL diff --git a/apps/backend/components/collections/agent_new/push_host_identifier.py b/apps/backend/components/collections/agent_new/push_host_identifier.py index eec1925d7..df227c1f3 100644 --- a/apps/backend/components/collections/agent_new/push_host_identifier.py +++ b/apps/backend/components/collections/agent_new/push_host_identifier.py @@ -12,7 +12,7 @@ from django.utils.translation import ugettext_lazy as _ -from apps.backend.api.constants import POLLING_INTERVAL, POLLING_TIMEOUT +from apps.backend.api.constants import POLLING_INTERVAL from apps.core.concurrent import controller from apps.utils import concurrent from common.api import CCApi @@ -104,7 +104,7 @@ def _schedule(self, data, parent_data, callback_data=None): self.finish_schedule() return - if polling_time + POLLING_INTERVAL > POLLING_TIMEOUT: + if polling_time + POLLING_INTERVAL > self.service_polling_timeout: self.move_insts_to_failed(sub_inst_ids=pending_push_instance_ids, log_content=_("推送主机身份超时")) self.finish_schedule() return diff --git a/apps/backend/components/collections/base.py b/apps/backend/components/collections/base.py index 72ac75efc..a4378695e 100644 --- a/apps/backend/components/collections/base.py +++ b/apps/backend/components/collections/base.py @@ -12,6 +12,7 @@ import os import traceback import typing + from dataclasses import dataclass from typing import ( Any, @@ -33,6 +34,7 @@ from django.utils.translation import ugettext as _ from apps.adapters.api.gse import GseApiBaseHelper, get_gse_api_helper +from apps.backend.api.constants import POLLING_TIMEOUT from apps.backend.subscription import errors from apps.core.files.storage import get_storage from apps.exceptions import parse_exception @@ -218,6 +220,19 @@ def setup_pagent_file_content(self) -> bytes: return fh.read().encode() +class PollingTimeoutMixin: + @property + def service_polling_timeout(self) -> int: + service_name = self.__class__.__name__ + all_service_polling_timeout: dict = models.GlobalSettings.get_config( + key=models.GlobalSettings.KeyEnum.BACKEND_SERVICE_POLLING_TIMEOUT.value, + default={}, + ) + + service_polling_timeout = all_service_polling_timeout.get(service_name, POLLING_TIMEOUT) + return service_polling_timeout + + @dataclass class CommonData: """ @@ -237,7 +252,7 @@ class CommonData: subscription_instance_ids: Set[int] -class BaseService(Service, LogMixin, DBHelperMixin): +class BaseService(Service, LogMixin, DBHelperMixin, PollingTimeoutMixin): # 失败订阅实例ID - 失败原因 映射关系 failed_subscription_instance_id_reason_map: Optional[Dict[int, Any]] = None diff --git a/apps/backend/components/collections/job.py b/apps/backend/components/collections/job.py index ffbd19b52..eb1c787ae 100644 --- a/apps/backend/components/collections/job.py +++ b/apps/backend/components/collections/job.py @@ -21,7 +21,7 @@ from django.conf import settings from django.utils.translation import ugettext_lazy as _ -from apps.backend.api.constants import POLLING_INTERVAL, POLLING_TIMEOUT +from apps.backend.api.constants import POLLING_INTERVAL from apps.backend.api.job import process_parms from apps.backend.components.collections.base import BaseService, CommonData from apps.core.files.storage import get_storage @@ -340,7 +340,7 @@ def _schedule(self, data, parent_data, callback_data=None): ).exists() if is_finished: self.finish_schedule() - elif polling_time + POLLING_INTERVAL > POLLING_TIMEOUT: + elif polling_time + POLLING_INTERVAL > self.service_polling_timeout: # 由于JOB的超时机制可能会失效,因此这里节点管理自己需要有超时机制进行兜底 pending_job_sub_maps = models.JobSubscriptionInstanceMap.objects.filter( node_id=self.id, status=constants.BkJobStatus.PENDING diff --git a/apps/backend/components/collections/plugin.py b/apps/backend/components/collections/plugin.py index 828210398..bc2ddab3a 100644 --- a/apps/backend/components/collections/plugin.py +++ b/apps/backend/components/collections/plugin.py @@ -27,7 +27,6 @@ from apps.backend.api.constants import ( GSE_RUNNING_TASK_CODE, POLLING_INTERVAL, - POLLING_TIMEOUT, SUFFIX_MAP, GseDataErrCode, ) @@ -1249,7 +1248,7 @@ def handle_error_code( if error_code == GseDataErrCode.RUNNING: # 只要有运行中的任务,则认为未完成,标记 is_finished is_finished = False - if polling_time + POLLING_INTERVAL > POLLING_TIMEOUT: + if polling_time + POLLING_INTERVAL > self.service_polling_timeout: self.move_insts_to_failed([subscription_instance.id], _("GSE任务轮询超时")) elif success_conditions: # 状态码非 SUCCESS 的,但满足成功的特殊条件,认为是成功的,无需做任何处理 diff --git a/apps/backend/tests/components/collections/agent_new/base.py b/apps/backend/tests/components/collections/agent_new/base.py index 6a5cff3cf..35ced8995 100644 --- a/apps/backend/tests/components/collections/agent_new/base.py +++ b/apps/backend/tests/components/collections/agent_new/base.py @@ -17,8 +17,10 @@ from typing import Any, Dict, List, Optional import mock +from django.test import TestCase -from apps.backend.api.constants import POLLING_INTERVAL +from apps.backend.api.constants import POLLING_INTERVAL, POLLING_TIMEOUT +from apps.backend.components.collections.base import PollingTimeoutMixin from apps.mock_data import api_mkd from apps.mock_data import utils as mock_data_utils from apps.node_man import constants, models @@ -31,6 +33,23 @@ from . import utils +class PollingTimeoutMixinTestCase(TestCase): + def setUp(self): + self.mixin = PollingTimeoutMixin() + + def test_default_polling_time(self): + expected_timeout = POLLING_TIMEOUT + timeout = self.mixin.service_polling_timeout + self.assertEqual(timeout, expected_timeout) + + def test_component_polling_time(self): + expected_timeout = 20 + component_polling_timeout = {"PollingTimeoutMixin": expected_timeout} + with mock.patch('apps.node_man.models.GlobalSettings.get_config', return_value=component_polling_timeout): + timeout = self.mixin.service_polling_timeout + self.assertEqual(timeout, expected_timeout) + + class JobBaseTestCase(utils.AgentServiceBaseTestCase, ABC): """用于JobV3BaseService子类的测试基类""" @@ -241,7 +260,8 @@ def structure_mock_data(cls): def setUp(self) -> None: super().setUp() mock.patch( - "apps.backend.components.collections.job.POLLING_TIMEOUT", (self.POLLING_COUNT - 1) * POLLING_INTERVAL + "apps.backend.components.collections.base.PollingTimeoutMixin.service_polling_timeout", + (self.POLLING_COUNT - 1) * POLLING_INTERVAL ).start() def fetch_schedule_assertion(self) -> List[ScheduleAssertion]: diff --git a/apps/backend/tests/components/collections/agent_new/test_get_agent_status.py b/apps/backend/tests/components/collections/agent_new/test_get_agent_status.py index d28acd750..4c51f9250 100644 --- a/apps/backend/tests/components/collections/agent_new/test_get_agent_status.py +++ b/apps/backend/tests/components/collections/agent_new/test_get_agent_status.py @@ -230,7 +230,7 @@ def adjust_test_data_in_db(cls): def setUp(self) -> None: super().setUp() mock.patch( - "apps.backend.components.collections.agent_new.get_agent_status.POLLING_TIMEOUT", + "apps.backend.components.collections.base.PollingTimeoutMixin.service_polling_timeout", 2 * POLLING_INTERVAL - 1, ).start() diff --git a/apps/backend/tests/components/collections/agent_new/test_push_host_identifier.py b/apps/backend/tests/components/collections/agent_new/test_push_host_identifier.py index 34703be59..d4198d6d0 100644 --- a/apps/backend/tests/components/collections/agent_new/test_push_host_identifier.py +++ b/apps/backend/tests/components/collections/agent_new/test_push_host_identifier.py @@ -193,7 +193,7 @@ def get_default_case_name(cls) -> str: def setUp(self) -> None: super().setUp() mock.patch( - "apps.backend.api.constants.POLLING_TIMEOUT", + "apps.backend.components.collections.base.PollingTimeoutMixin.service_polling_timeout", 2 * POLLING_INTERVAL - 1, ).start() diff --git a/apps/node_man/models.py b/apps/node_man/models.py index 4e9460e43..a6e9efaef 100644 --- a/apps/node_man/models.py +++ b/apps/node_man/models.py @@ -146,6 +146,8 @@ class KeyEnum(Enum): ENABLE_AP_VERSION_MUTEX = "ENABLE_AP_VERSION_MUTEX" # 记录所有业务ID,用于同步新业务到灰度列表对比使用 ALL_BIZ_IDS = "ALL_BIZ_IDS" + # 后台组件服务超时时间 + BACKEND_SERVICE_POLLING_TIMEOUT = "BACKEND_SERVICE_POLLING_TIMEOUT" key = models.CharField(_("键"), max_length=255, db_index=True, primary_key=True) v_json = JSONField(_("值"))