diff --git a/src/backend/commons/gse-sdk/build.gradle b/src/backend/commons/gse-sdk/build.gradle index 9e90782ebd..0ec24c1593 100644 --- a/src/backend/commons/gse-sdk/build.gradle +++ b/src/backend/commons/gse-sdk/build.gradle @@ -33,6 +33,7 @@ dependencies { implementation 'org.apache.commons:commons-lang3' implementation 'org.apache.commons:commons-collections4' implementation 'org.apache.httpcomponents:httpclient' + implementation 'io.micrometer:micrometer-registry-prometheus' compileOnly 'org.projectlombok:lombok' annotationProcessor 'org.projectlombok:lombok' testImplementation 'org.junit.jupiter:junit-jupiter' diff --git a/src/backend/commons/gse-sdk/src/main/java/com/tencent/bk/job/common/gse/constants/AgentStatusEnum.java b/src/backend/commons/gse-sdk/src/main/java/com/tencent/bk/job/common/gse/constants/AgentStatusEnum.java new file mode 100644 index 0000000000..9971add8d4 --- /dev/null +++ b/src/backend/commons/gse-sdk/src/main/java/com/tencent/bk/job/common/gse/constants/AgentStatusEnum.java @@ -0,0 +1,66 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.common.gse.constants; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonValue; + +/** + * Agent 状态 + */ +public enum AgentStatusEnum { + /** + * 异常 + */ + NOT_ALIVE(0), + /** + * 正常 + */ + ALIVE(1), + /** + * 未知 + */ + UNKNOWN(2); + @JsonValue + private final int status; + + AgentStatusEnum(int status) { + this.status = status; + } + + @JsonCreator + public static AgentStatusEnum valOf(int status) { + for (AgentStatusEnum agentStatus : values()) { + if (agentStatus.status == status) { + return agentStatus; + } + } + return null; + } + + public int getValue() { + return status; + } +} diff --git a/src/backend/commons/gse-sdk/src/main/java/com/tencent/bk/job/common/gse/constants/GseConstants.java b/src/backend/commons/gse-sdk/src/main/java/com/tencent/bk/job/common/gse/constants/GseConstants.java new file mode 100644 index 0000000000..7ded706b52 --- /dev/null +++ b/src/backend/commons/gse-sdk/src/main/java/com/tencent/bk/job/common/gse/constants/GseConstants.java @@ -0,0 +1,37 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.common.gse.constants; + +public interface GseConstants { + /** + * 直连云区域ID + */ + int DEFAULT_CLOUD_ID = 0; + + /** + * GSE API 度量指标名称前缀 + */ + String GSE_API_METRICS_NAME_PREFIX = "gse.api"; +} diff --git a/src/backend/commons/gse-sdk/src/main/java/com/tencent/bk/job/common/gse/model/AgentStatusDTO.java b/src/backend/commons/gse-sdk/src/main/java/com/tencent/bk/job/common/gse/model/AgentStatusDTO.java index 38d0f10b21..0c465c42e7 100644 --- a/src/backend/commons/gse-sdk/src/main/java/com/tencent/bk/job/common/gse/model/AgentStatusDTO.java +++ b/src/backend/commons/gse-sdk/src/main/java/com/tencent/bk/job/common/gse/model/AgentStatusDTO.java @@ -27,19 +27,24 @@ import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; -/** - * @since 11/11/2019 17:54 - */ @Data public class AgentStatusDTO { @JsonProperty("businessid") private String businessId; - + /** + * Agent是否存在,1: 存在,0: 不存在 + */ private Integer exist; private String ip; + /** + * 云区域ID + */ private String region; + /** + * Agent版本 + */ private String version; } diff --git a/src/backend/commons/gse-sdk/src/main/java/com/tencent/bk/job/common/gse/service/QueryAgentStatusClientImpl.java b/src/backend/commons/gse-sdk/src/main/java/com/tencent/bk/job/common/gse/service/QueryAgentStatusClientImpl.java index 9f32a33c49..d97d54c100 100644 --- a/src/backend/commons/gse-sdk/src/main/java/com/tencent/bk/job/common/gse/service/QueryAgentStatusClientImpl.java +++ b/src/backend/commons/gse-sdk/src/main/java/com/tencent/bk/job/common/gse/service/QueryAgentStatusClientImpl.java @@ -29,13 +29,15 @@ import com.tencent.bk.gse.cacheapi.CacheIpInfo; import com.tencent.bk.gse.cacheapi.CacheUser; import com.tencent.bk.job.common.gse.config.GseConfig; +import com.tencent.bk.job.common.gse.constants.GseConstants; import com.tencent.bk.job.common.gse.model.AgentStatusDTO; import com.tencent.bk.job.common.gse.sdk.GseCacheClient; import com.tencent.bk.job.common.gse.sdk.GseCacheClientFactory; -import com.tencent.bk.job.common.util.AgentUtils; +import com.tencent.bk.job.common.gse.util.AgentUtils; import com.tencent.bk.job.common.util.ConcurrencyUtil; import com.tencent.bk.job.common.util.Utils; import com.tencent.bk.job.common.util.json.JsonUtils; +import io.micrometer.core.instrument.MeterRegistry; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.tuple.Pair; @@ -45,26 +47,19 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.TimeUnit; @Slf4j public class QueryAgentStatusClientImpl implements QueryAgentStatusClient { private final GseCacheClientFactory gseCacheClientFactory; private final GseConfig gseConfig; + private final MeterRegistry meterRegistry; - public QueryAgentStatusClientImpl(GseConfig gseConfig) { + public QueryAgentStatusClientImpl(GseConfig gseConfig, MeterRegistry meterRegistry) { this.gseConfig = gseConfig; - gseCacheClientFactory = new GseCacheClientFactory(gseConfig); - } - - /** - * 解析agent状态 预期解析的文本: {"businessid":"","exist":1,"ip":"1.1.1.1","region":"1","version":"NULL"} - * - * @return - */ - private static int parseCacheAgentStatus(String statusStr) { - AgentStatusDTO agentStatus = JsonUtils.fromJson(statusStr, AgentStatusDTO.class); - return agentStatus.getExist(); + this.gseCacheClientFactory = new GseCacheClientFactory(gseConfig); + this.meterRegistry = meterRegistry; } @Override @@ -98,31 +93,65 @@ public Map batchGetAgentStatus(List ips) { public Map batchGetAgentStatusWithoutLimit(Collection ips) { Map resultMap = new HashMap<>(); - GseCacheClient gseClient = gseCacheClientFactory.getClient(); - if (null == gseClient) { - log.error("get GSE cache connection failed"); + AgentStatusResponse response = queryAgentStatusFromCacheApi(ips); + if (response == null) { return resultMap; } - try { - List ipInfoList = new ArrayList<>(); - for (String ip : ips) { - String[] ipInfo = ip.split(":"); - if (ipInfo.length != 2) { - log.warn("Request ip format error! IP: {}", ip); - continue; - } + Map responseMap = response.getResult(); + for (Map.Entry item : responseMap.entrySet()) { + String[] ipInfo = item.getKey().split(":"); + if (ipInfo.length != 2) { + log.error("Query Gse agent not return businessId:ip: {}", item.getKey()); + continue; + } - CacheIpInfo cacheIpInfo = new CacheIpInfo(); - String sourceStr = "1".equals(ipInfo[0]) ? "0" : ipInfo[0]; - cacheIpInfo.setPlatId(sourceStr); - cacheIpInfo.setIp(ipInfo[1]); - ipInfoList.add(cacheIpInfo); + AgentStatus agentStatus = new AgentStatus(); + agentStatus.ip = ipInfo[1]; + agentStatus.status = parseCacheAgentStatus(item.getValue()); + agentStatus.cloudAreaId = Utils.tryParseInt(ipInfo[0]); + resultMap.put(agentStatus.cloudAreaId + ":" + agentStatus.ip, agentStatus); + } + return resultMap; + } + /** + * 解析agent状态 预期解析的文本: {"businessid":"","exist":1,"ip":"1.1.1.1","region":"1","version":"NULL"} + */ + private static int parseCacheAgentStatus(String statusStr) { + AgentStatusDTO agentStatus = JsonUtils.fromJson(statusStr, AgentStatusDTO.class); + return agentStatus.getExist(); + } + + + private AgentStatusResponse queryAgentStatusFromCacheApi(Collection ips) { + GseCacheClient gseClient = gseCacheClientFactory.getClient(); + if (null == gseClient) { + log.error("Get GSE cache client connection failed"); + return null; + } + + List ipInfoList = new ArrayList<>(); + for (String ip : ips) { + String[] ipInfo = ip.split(":"); + if (ipInfo.length != 2) { + log.warn("Request ip format error! IP: {}", ip); + continue; } - if (ipInfoList.isEmpty()) { - return resultMap; - } + CacheIpInfo cacheIpInfo = new CacheIpInfo(); + String cloudAreaIdStr = "1".equals(ipInfo[0]) ? + String.valueOf(GseConstants.DEFAULT_CLOUD_ID) : ipInfo[0]; + cacheIpInfo.setPlatId(cloudAreaIdStr); + cacheIpInfo.setIp(ipInfo[1]); + ipInfoList.add(cacheIpInfo); + } + if (ipInfoList.isEmpty()) { + return null; + } + + long start = System.currentTimeMillis(); + String status = "ok"; + try { CacheUser user = new CacheUser(); user.setUser("bitmap"); user.setPassword("bitmap"); @@ -131,32 +160,21 @@ public Map batchGetAgentStatusWithoutLimit(Collection responseMap = response.getResult(); - for (Map.Entry item : responseMap.entrySet()) { - String[] ipInfo = item.getKey().split(":"); - if (ipInfo.length != 2) { - log.error("query Gse agent not return businessId:ip: {}", item.getKey()); - continue; - } - - AgentStatus agentStatus = new AgentStatus(); - agentStatus.ip = ipInfo[1]; - agentStatus.status = parseCacheAgentStatus(item.getValue()); - agentStatus.cloudAreaId = Utils.tryParseInt(ipInfo[0]); - resultMap.put(agentStatus.cloudAreaId + ":" + agentStatus.ip, agentStatus); - } - return resultMap; - } catch (Exception e) { - log.error("gse request failed", e); - return resultMap; + log.debug("QueryAgentStatus response: {}", response); + return response; + } catch (Throwable e) { + log.error("QueryAgentStatus error", e); + status = "error"; + return null; } finally { + long end = System.currentTimeMillis(); + log.info("BatchGetAgentStatus {} ips, cost: {}ms", ips.size(), (end - start)); + if (this.meterRegistry != null) { + meterRegistry.timer(GseConstants.GSE_API_METRICS_NAME_PREFIX, "api_name", "quireAgentStatus", + "status", status).record(end - start, TimeUnit.MICROSECONDS); + } gseClient.tearDown(); } } @@ -192,7 +210,7 @@ private String getOneAliveIP(List ipList) { /** * 传入的multiIp为逗号分隔的不带云区域ID的IP * - * @param multiIp 多个IP + * @param multiIp 多IP,格式:ip1,ip2,ip3...ipN * @param cloudAreaId 云区域ID * @return 返回的单个IP不带云区域ID */ diff --git a/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/AgentUtils.java b/src/backend/commons/gse-sdk/src/main/java/com/tencent/bk/job/common/gse/util/AgentUtils.java similarity index 88% rename from src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/AgentUtils.java rename to src/backend/commons/gse-sdk/src/main/java/com/tencent/bk/job/common/gse/util/AgentUtils.java index 3d2f72e793..f305780ffb 100644 --- a/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/AgentUtils.java +++ b/src/backend/commons/gse-sdk/src/main/java/com/tencent/bk/job/common/gse/util/AgentUtils.java @@ -22,7 +22,9 @@ * IN THE SOFTWARE. */ -package com.tencent.bk.job.common.util; +package com.tencent.bk.job.common.gse.util; + +import com.tencent.bk.job.common.gse.constants.AgentStatusEnum; public final class AgentUtils { @@ -33,9 +35,9 @@ private AgentUtils() { * 返回Agent的状态是否正常 * * @param status agent的状态 - * @return 正常1 + * @return 正常: true, 异常: false */ public static boolean isAgentOkByStatus(int status) { - return status == 1; + return status == AgentStatusEnum.ALIVE.getValue(); } } diff --git a/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/config/GseClientAutoConfig.java b/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/config/GseClientAutoConfig.java index 91bdb408c6..dcda01c6f2 100644 --- a/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/config/GseClientAutoConfig.java +++ b/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/config/GseClientAutoConfig.java @@ -26,6 +26,7 @@ import com.tencent.bk.job.common.gse.service.QueryAgentStatusClient; import com.tencent.bk.job.common.gse.service.QueryAgentStatusClientImpl; +import io.micrometer.core.instrument.MeterRegistry; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @@ -34,7 +35,9 @@ public class GseClientAutoConfig { @Bean - public QueryAgentStatusClient gseCacheClient(@Autowired GseConfig gseConfigForExecute) { + @Autowired + public QueryAgentStatusClient gseCacheClient(GseConfig gseConfigForExecute, + MeterRegistry meterRegistry) { com.tencent.bk.job.common.gse.config.GseConfig gseConfig = new com.tencent.bk.job.common.gse.config.GseConfig(); gseConfig.setEnableSsl(gseConfigForExecute.isGseSSLEnable()); String[] cacheApiServerHosts = gseConfigForExecute.getGseCacheApiServerHost().split(","); @@ -48,6 +51,6 @@ public QueryAgentStatusClient gseCacheClient(@Autowired GseConfig gseConfigForEx gseConfig.setTrustManagerType(gseConfigForExecute.getGseSSLTruststoreManagerType()); gseConfig.setQueryBatchSize(gseConfigForExecute.getGseQueryBatchSize()); gseConfig.setQueryThreadsNum(gseConfigForExecute.getGseQueryThreadsNum()); - return new QueryAgentStatusClientImpl(gseConfig); + return new QueryAgentStatusClientImpl(gseConfig, meterRegistry); } } diff --git a/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/config/MetricsAutoConfig.java b/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/config/MetricsAutoConfig.java index d7ad3ad84b..8ba7fe38f2 100644 --- a/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/config/MetricsAutoConfig.java +++ b/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/config/MetricsAutoConfig.java @@ -24,6 +24,7 @@ package com.tencent.bk.job.execute.config; +import com.tencent.bk.job.common.gse.constants.GseConstants; import com.tencent.bk.job.execute.monitor.ExecuteMetricNames; import com.tencent.bk.job.execute.monitor.ExecuteMetricTags; import io.micrometer.core.instrument.Meter; @@ -47,7 +48,7 @@ public DistributionStatisticConfig configure(Meter.Id id, DistributionStatisticC // [10ms,3s] .minimumExpectedValue(10_000_000.0).maximumExpectedValue(3_000_000_000.0) .build().merge(config); - } else if (metricName.startsWith(ExecuteMetricNames.GSE_API_PREFIX)) { + } else if (metricName.startsWith(GseConstants.GSE_API_METRICS_NAME_PREFIX)) { return DistributionStatisticConfig.builder().percentilesHistogram(true) // [10ms,1s] .minimumExpectedValue(10_000_000.0).maximumExpectedValue(1_000_000_000.0) diff --git a/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/engine/gse/GseRequestUtils.java b/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/engine/gse/GseRequestUtils.java index db47e6cde6..b5f9109337 100644 --- a/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/engine/gse/GseRequestUtils.java +++ b/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/engine/gse/GseRequestUtils.java @@ -44,6 +44,7 @@ import com.tencent.bk.gse.taskapi.api_stop_task_request; import com.tencent.bk.gse.taskapi.api_task_detail_result; import com.tencent.bk.gse.taskapi.api_task_request; +import com.tencent.bk.job.common.gse.constants.GseConstants; import com.tencent.bk.job.common.model.dto.IpDTO; import com.tencent.bk.job.common.util.ApplicationContextRegister; import com.tencent.bk.job.execute.common.exception.ReadTimeoutException; @@ -51,7 +52,6 @@ import com.tencent.bk.job.execute.engine.model.LogPullProgress; import com.tencent.bk.job.execute.engine.model.RunSQLScriptFile; import com.tencent.bk.job.execute.gse.model.ProcessOperateTypeEnum; -import com.tencent.bk.job.execute.monitor.ExecuteMetricNames; import io.micrometer.core.instrument.MeterRegistry; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; @@ -71,7 +71,7 @@ */ @Slf4j public class GseRequestUtils { - private static MeterRegistry meterRegistry; + private static final MeterRegistry meterRegistry; static { meterRegistry = ApplicationContextRegister.getBean(MeterRegistry.class); @@ -125,9 +125,9 @@ public static api_copy_fileinfoV2 buildCopyFileInfo(api_agent src, String srcPat /** * 创建 Agent 列表 * - * @param userName - * @param passwd - * @return + * @param userName 用户名 + * @param passwd 密码 + * @return Agent列表 */ public static List buildAgentList(Set jobIpSet, String userName, String passwd) { List agentList = new ArrayList<>(); @@ -149,14 +149,14 @@ public static List buildAgentList(List ipList, String userName } /** - * @param userName - * @param fullIP - * @return + * @param userName 用户名 + * @param cloudIp 云区域:IP + * @return Agent */ - public static api_agent buildAgent(String fullIP, String userName, String passwd) { + public static api_agent buildAgent(String cloudIp, String userName, String passwd) { int source = 0; String ip; - String[] ipArray = fullIP.split(":"); + String[] ipArray = cloudIp.split(":"); if (ipArray.length > 1) { source = Integer.parseInt(ipArray[0]); ip = ipArray[1]; @@ -231,11 +231,13 @@ public static api_script_request buildScriptRequestWithSQL(List agent /** * 创建GSE执行脚本请求 * - * @param downloadPath - * @param scriptParam - * @param timeout - * @param agentList - * @return + * @param agentList Agent列表 + * @param scriptContent 脚本内容 + * @param scriptFileName 脚本名称 + * @param downloadPath 脚本存放路径 + * @param scriptParam 脚本参数 + * @param timeout 脚本任务超时时间,单位秒 + * @return 执行脚本任务请求 */ public static api_script_request buildScriptRequest(List agentList, String scriptContent, String scriptFileName, String downloadPath, String scriptParam, @@ -278,6 +280,13 @@ public static api_script_request buildScriptRequest(List agentList, S return scriptReq; } + /** + * 下发脚本任务 + * + * @param id 任务ID + * @param scriptRequest 请求内容 + * @return GSE Server响应 + */ public static GseTaskResponse sendScriptTaskRequest(long id, api_script_request scriptRequest) { return sendCmd("" + id, new GseTaskResponseCaller() { @Override @@ -320,7 +329,13 @@ public static api_process_req buildProcessRequest(List agentList, api return processReq; } - + /** + * 下发文件分发任务 + * + * @param id 任务ID + * @param copyFileInfoList 请求内容 + * @return GSE Server响应 + */ public static GseTaskResponse sendCopyFileTaskRequest(long id, List copyFileInfoList) { return sendCmd("" + id, new GseTaskResponseCaller() { @Override @@ -342,6 +357,13 @@ public String getApiName() { }); } + /** + * 强制终止任务 + * + * @param id 任务ID + * @param stopTaskRequest 终止任务请求 + * @return GSE SERVER 响应 + */ public static GseTaskResponse sendForceStopTaskRequest(long id, api_stop_task_request stopTaskRequest) { return sendCmd("" + id, new GseTaskResponseCaller() { @Override @@ -377,9 +399,9 @@ private static List buildQueryAtomTaskInfoList(int off * 拉取脚本执行任务日志 V2 * * @param gseTaskId gse任务ID - * @param cloudIps 目标服务器云区域IP - * @param runBushOffsetMap 脚本输出日志偏移 - * @return 结果 + * @param cloudIps 目标服务器列表 + * @param runBushOffsetMap 日志偏移量 + * @return 响应 */ public static api_query_task_info_v2 buildScriptLogRequestV2(String gseTaskId, Collection cloudIps, Map runBushOffsetMap) { @@ -430,7 +452,11 @@ private static api_host convertToApiHost(String cloudIp) { } /** - * 拉取文件任务执行结果 + * 获取文件任务执行结果 + * + * @param id job任务ID + * @param gseTaskId gse任务ID + * @return 结果 */ public static api_map_rsp pullCopyFileTaskLog(long id, String gseTaskId) { return sendCmd("" + id, new GseApiCallback() { @@ -563,9 +589,8 @@ private static T sendCmd(String id, GseApiCallback caller) { status = "error"; } finally { long end = System.nanoTime(); - meterRegistry.timer(ExecuteMetricNames.GSE_API_PREFIX, "api_name", caller.getApiName(), "status", - status) - .record(end - start, TimeUnit.NANOSECONDS); + meterRegistry.timer(GseConstants.GSE_API_METRICS_NAME_PREFIX, "api_name", caller.getApiName(), + "status", status).record(end - start, TimeUnit.NANOSECONDS); } } while (retry-- > 0); //重试1次 return caller.fail(connect); diff --git a/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/monitor/ExecuteMetricNames.java b/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/monitor/ExecuteMetricNames.java index f65335af60..3478ba43b1 100644 --- a/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/monitor/ExecuteMetricNames.java +++ b/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/monitor/ExecuteMetricNames.java @@ -60,10 +60,6 @@ public class ExecuteMetricNames { * 任务下发指标 */ public static final String EXECUTE_TASK_PREFIX = "job.execute.task"; - /** - * GSE-API - */ - public static final String GSE_API_PREFIX = "gse.api"; /** * 未被调度的任务数 */ diff --git a/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/service/impl/TaskExecuteServiceImpl.java b/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/service/impl/TaskExecuteServiceImpl.java index 6558de29e4..126f57b333 100644 --- a/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/service/impl/TaskExecuteServiceImpl.java +++ b/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/service/impl/TaskExecuteServiceImpl.java @@ -31,6 +31,7 @@ import com.tencent.bk.job.common.constant.ErrorCode; import com.tencent.bk.job.common.constant.TaskVariableTypeEnum; import com.tencent.bk.job.common.exception.ServiceException; +import com.tencent.bk.job.common.gse.constants.AgentStatusEnum; import com.tencent.bk.job.common.gse.service.QueryAgentStatusClient; import com.tencent.bk.job.common.iam.exception.InSufficientPermissionException; import com.tencent.bk.job.common.iam.model.AuthResult; @@ -1793,7 +1794,8 @@ private void setAgentStatus(List ips) { Map statusMap = queryAgentStatusClient.batchGetAgentStatus(ipList); for (IpDTO ip : ips) { String fullIp = ip.convertToStrIp(); - ip.setAlive(statusMap.get(fullIp) == null ? 0 : statusMap.get(fullIp).status); + ip.setAlive(statusMap.get(fullIp) == null ? + AgentStatusEnum.UNKNOWN.getValue() : statusMap.get(fullIp).status); } } @@ -2112,11 +2114,11 @@ public void authExecuteJobPlan(TaskExecuteParam executeParam) throws ServiceExce } private class GetTopoHostTask implements Callable>> { - private long appId; - private DynamicServerTopoNodeDTO topoNode; - private CountDownLatch latch; + private final long appId; + private final DynamicServerTopoNodeDTO topoNode; + private final CountDownLatch latch; - public GetTopoHostTask(long appId, DynamicServerTopoNodeDTO topoNode, CountDownLatch latch) { + private GetTopoHostTask(long appId, DynamicServerTopoNodeDTO topoNode, CountDownLatch latch) { this.appId = appId; this.topoNode = topoNode; this.latch = latch; @@ -2127,10 +2129,10 @@ public Pair> call() { try { List topoIps = serverService.getIpByTopoNodes(appId, Collections.singletonList(new CcInstanceDTO(topoNode.getNodeType(), topoNode.getTopoNodeId()))); - return new ImmutablePair(topoNode, topoIps); + return new ImmutablePair<>(topoNode, topoIps); } catch (Throwable e) { log.warn("Get hosts by topo fail", e); - return new ImmutablePair(topoNode, Collections.EMPTY_LIST); + return new ImmutablePair<>(topoNode, Collections.EMPTY_LIST); } finally { latch.countDown(); } diff --git a/src/backend/job-manage/service-job-manage/src/main/java/com/tencent/bk/job/manage/config/GseClientAutoConfig.java b/src/backend/job-manage/service-job-manage/src/main/java/com/tencent/bk/job/manage/config/GseClientAutoConfig.java index 614708c042..9f2f8d0641 100644 --- a/src/backend/job-manage/service-job-manage/src/main/java/com/tencent/bk/job/manage/config/GseClientAutoConfig.java +++ b/src/backend/job-manage/service-job-manage/src/main/java/com/tencent/bk/job/manage/config/GseClientAutoConfig.java @@ -27,6 +27,7 @@ import com.tencent.bk.job.common.gse.config.GseConfig; import com.tencent.bk.job.common.gse.service.QueryAgentStatusClient; import com.tencent.bk.job.common.gse.service.QueryAgentStatusClientImpl; +import io.micrometer.core.instrument.MeterRegistry; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.autoconfigure.AutoConfigureAfter; import org.springframework.context.annotation.Bean; @@ -37,7 +38,9 @@ public class GseClientAutoConfig { @Bean - public QueryAgentStatusClient gseCacheClient(@Autowired GseConfigForManage gseConfigForManage) { + @Autowired + public QueryAgentStatusClient gseCacheClient(GseConfigForManage gseConfigForManage, + MeterRegistry meterRegistry) { GseConfig gseConfig = new GseConfig(); gseConfig.setEnableSsl(gseConfigForManage.isEnableSsl()); gseConfig.setGseCacheApiServerHost(gseConfigForManage.getGseCacheApiServerHosts()); @@ -50,6 +53,6 @@ public QueryAgentStatusClient gseCacheClient(@Autowired GseConfigForManage gseCo gseConfig.setTrustManagerType(gseConfigForManage.getTrustManagerType()); gseConfig.setQueryBatchSize(gseConfigForManage.getGseQueryBatchSize()); gseConfig.setQueryThreadsNum(gseConfigForManage.getGseQueryThreadsNum()); - return new QueryAgentStatusClientImpl(gseConfig); + return new QueryAgentStatusClientImpl(gseConfig, meterRegistry); } }