From 8c61307d006ca8f14915424f35369846f5e4c269 Mon Sep 17 00:00:00 2001 From: jsonwan Date: Fri, 31 May 2024 16:24:13 +0800 Subject: [PATCH 01/24] =?UTF-8?q?perf:=20=E7=AC=AC=E4=B8=89=E6=96=B9?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E6=BA=90=E4=BB=BB=E5=8A=A1=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E6=97=A0=E6=8D=9F=E6=9B=B4=E6=96=B0=20#3017?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1.清理实际上并不使用的冗余代码; 2.优化部分代码格式。 --- .../inner/ServiceFileSourceTaskResource.java | 54 ++-------------- .../ServiceFileSourceTaskResourceImpl.java | 62 ++++++------------- .../service/BatchTaskService.java | 5 +- .../service/impl/BatchTaskServiceImpl.java | 22 ++++--- 4 files changed, 39 insertions(+), 104 deletions(-) diff --git a/src/backend/job-file-gateway/api-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/inner/ServiceFileSourceTaskResource.java b/src/backend/job-file-gateway/api-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/inner/ServiceFileSourceTaskResource.java index 4865298969..01c4278b7b 100644 --- a/src/backend/job-file-gateway/api-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/inner/ServiceFileSourceTaskResource.java +++ b/src/backend/job-file-gateway/api-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/inner/ServiceFileSourceTaskResource.java @@ -26,16 +26,11 @@ import com.tencent.bk.job.common.annotation.InternalAPI; import com.tencent.bk.job.common.model.InternalResponse; -import com.tencent.bk.job.file_gateway.model.req.inner.ClearBatchTaskFilesReq; import com.tencent.bk.job.file_gateway.model.req.inner.ClearTaskFilesReq; import com.tencent.bk.job.file_gateway.model.req.inner.FileSourceBatchDownloadTaskReq; -import com.tencent.bk.job.file_gateway.model.req.inner.FileSourceDownloadTaskReq; import com.tencent.bk.job.file_gateway.model.req.inner.StopBatchTaskReq; -import com.tencent.bk.job.file_gateway.model.req.inner.StopTaskReq; import com.tencent.bk.job.file_gateway.model.resp.inner.BatchTaskInfoDTO; import com.tencent.bk.job.file_gateway.model.resp.inner.BatchTaskStatusDTO; -import com.tencent.bk.job.file_gateway.model.resp.inner.FileSourceTaskStatusDTO; -import com.tencent.bk.job.file_gateway.model.resp.inner.TaskInfoDTO; import com.tentent.bk.job.common.api.feign.annotation.SmartFeignClient; import io.swagger.annotations.Api; import io.swagger.annotations.ApiOperation; @@ -53,24 +48,6 @@ @InternalAPI public interface ServiceFileSourceTaskResource { - // 直接转发至FileWorker的请求,URL子路径保持一致 - @ApiOperation(value = "创建并启动文件下载任务", produces = "application/json") - @PostMapping("/service/fileSource/filetask/downloadFiles/start") - InternalResponse startFileSourceDownloadTask( - @ApiParam("用户名") - @RequestHeader("username") - String username, - @ApiParam("文件源下载任务请求") - @RequestBody FileSourceDownloadTaskReq req - ); - - @ApiOperation(value = "清理任务已下载的文件", produces = "application/json") - @PostMapping("/service/fileSource/filetask/downloadFiles/stop") - InternalResponse stopTasks( - @ApiParam("文件源下载任务请求") - @RequestBody StopTaskReq req - ); - @ApiOperation(value = "清理任务已下载的文件", produces = "application/json") @PostMapping("/service/fileSource/filetask/clearFiles") InternalResponse clearTaskFiles( @@ -78,27 +55,12 @@ InternalResponse clearTaskFiles( @RequestBody ClearTaskFilesReq req ); - // 文件网关自有资源请求 - @ApiOperation(value = "获取文件任务状态", produces = "application/json") - @GetMapping("/service/fileSource/filetask/taskIds/{taskId}/status") - InternalResponse getFileSourceTaskStatusAndLogs( - @ApiParam("任务Id") - @PathVariable("taskId") - String taskId, - @ApiParam("日志开始位置") - @RequestParam(value = "logStart", required = false) - Long logStart, - @ApiParam("获取日志数量") - @RequestParam(value = "logLength", required = false) - Long logLength - ); - @ApiOperation(value = "创建并启动批量文件下载任务", produces = "application/json") @PostMapping("/service/fileSource/filetask/batch/downloadFiles/start") InternalResponse startFileSourceBatchDownloadTask( @ApiParam("用户名") @RequestHeader("username") - String username, + String username, @ApiParam("文件源下载任务请求") @RequestBody FileSourceBatchDownloadTaskReq req ); @@ -110,25 +72,17 @@ InternalResponse stopBatchTasks( @RequestBody StopBatchTaskReq req ); - @ApiOperation(value = "清理批量任务已下载的文件", produces = "application/json") - @PostMapping("/service/fileSource/filetask/batch/clearFiles") - InternalResponse clearBatchTaskFiles( - @ApiParam("文件源下载任务请求") - @RequestBody ClearBatchTaskFilesReq req - ); - - // 文件网关自有资源请求 @ApiOperation(value = "获取文件批量任务状态", produces = "application/json") @GetMapping("/service/fileSource/filetask/batch/batchTaskIds/{batchTaskId}/status") InternalResponse getBatchTaskStatusAndLogs( @ApiParam("任务Id") @PathVariable("batchTaskId") - String batchTaskId, + String batchTaskId, @ApiParam("日志开始位置") @RequestParam(value = "logStart", required = false) - Long logStart, + Long logStart, @ApiParam("获取日志数量") @RequestParam(value = "logLength", required = false) - Long logLength + Long logLength ); } diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/inner/ServiceFileSourceTaskResourceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/inner/ServiceFileSourceTaskResourceImpl.java index bbfda90faf..b550e49c16 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/inner/ServiceFileSourceTaskResourceImpl.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/inner/ServiceFileSourceTaskResourceImpl.java @@ -25,16 +25,11 @@ package com.tencent.bk.job.file_gateway.api.inner; import com.tencent.bk.job.common.model.InternalResponse; -import com.tencent.bk.job.file_gateway.model.req.inner.ClearBatchTaskFilesReq; import com.tencent.bk.job.file_gateway.model.req.inner.ClearTaskFilesReq; import com.tencent.bk.job.file_gateway.model.req.inner.FileSourceBatchDownloadTaskReq; -import com.tencent.bk.job.file_gateway.model.req.inner.FileSourceDownloadTaskReq; import com.tencent.bk.job.file_gateway.model.req.inner.StopBatchTaskReq; -import com.tencent.bk.job.file_gateway.model.req.inner.StopTaskReq; import com.tencent.bk.job.file_gateway.model.resp.inner.BatchTaskInfoDTO; import com.tencent.bk.job.file_gateway.model.resp.inner.BatchTaskStatusDTO; -import com.tencent.bk.job.file_gateway.model.resp.inner.FileSourceTaskStatusDTO; -import com.tencent.bk.job.file_gateway.model.resp.inner.TaskInfoDTO; import com.tencent.bk.job.file_gateway.service.BatchTaskService; import com.tencent.bk.job.file_gateway.service.FileSourceTaskService; import lombok.extern.slf4j.Slf4j; @@ -56,31 +51,6 @@ public ServiceFileSourceTaskResourceImpl(FileSourceTaskService fileSourceTaskSer this.batchTaskService = batchTaskService; } - @Override - public InternalResponse startFileSourceDownloadTask(String username, FileSourceDownloadTaskReq req) { - return InternalResponse.buildSuccessResp(fileSourceTaskService.startFileSourceDownloadTask(username, - req.getAppId(), req.getStepInstanceId(), req.getExecuteCount(), null, req.getFileSourceId(), - req.getFilePathList())); - } - - @Override - public InternalResponse stopTasks(StopTaskReq req) { - return InternalResponse.buildSuccessResp(fileSourceTaskService.stopTasks(req.getTaskIdList())); - } - - @Override - public InternalResponse getFileSourceTaskStatusAndLogs(String taskId, Long logStart, - Long logLength) { - if (logStart == null || logStart < 0) { - logStart = 0L; - } - if (logLength == null || logLength <= 0) { - logLength = -1L; - } - return InternalResponse.buildSuccessResp(fileSourceTaskService.getFileSourceTaskStatusAndLogs(taskId, - logStart, logLength)); - } - @Override public InternalResponse clearTaskFiles(ClearTaskFilesReq req) { return InternalResponse.buildSuccessResp(fileSourceTaskService.clearTaskFiles(req.getTaskIdList())); @@ -88,9 +58,16 @@ public InternalResponse clearTaskFiles(ClearTaskFilesReq req) { @Override public InternalResponse startFileSourceBatchDownloadTask(String username, - FileSourceBatchDownloadTaskReq req) { - return InternalResponse.buildSuccessResp(batchTaskService.startFileSourceBatchDownloadTask(username, - req.getAppId(), req.getStepInstanceId(), req.getExecuteCount(), req.getFileSourceTaskList())); + FileSourceBatchDownloadTaskReq req) { + return InternalResponse.buildSuccessResp( + batchTaskService.startFileSourceBatchDownloadTask( + username, + req.getAppId(), + req.getStepInstanceId(), + req.getExecuteCount(), + req.getFileSourceTaskList() + ) + ); } @Override @@ -99,20 +76,21 @@ public InternalResponse stopBatchTasks(StopBatchTaskReq req) { } @Override - public InternalResponse getBatchTaskStatusAndLogs(String batchTaskId, Long logStart, - Long logLength) { + public InternalResponse getBatchTaskStatusAndLogs(String batchTaskId, + Long logStart, + Long logLength) { if (logStart == null || logStart < 0) { logStart = 0L; } if (logLength == null || logLength <= 0) { logLength = -1L; } - return InternalResponse.buildSuccessResp(batchTaskService.getBatchTaskStatusAndLogs(batchTaskId, logStart, - logLength)); - } - - @Override - public InternalResponse clearBatchTaskFiles(ClearBatchTaskFilesReq req) { - return InternalResponse.buildSuccessResp(batchTaskService.clearBatchTaskFiles(req.getBatchTaskIdList())); + return InternalResponse.buildSuccessResp( + batchTaskService.getBatchTaskStatusAndLogs( + batchTaskId, + logStart, + logLength + ) + ); } } diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/BatchTaskService.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/BatchTaskService.java index 680eb61f45..ddeedaed94 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/BatchTaskService.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/BatchTaskService.java @@ -31,7 +31,9 @@ import java.util.List; public interface BatchTaskService { - BatchTaskInfoDTO startFileSourceBatchDownloadTask(String username, Long appId, Long stepInstanceId, + BatchTaskInfoDTO startFileSourceBatchDownloadTask(String username, + Long appId, + Long stepInstanceId, Integer executeCount, List fileSourceTaskList); @@ -39,5 +41,4 @@ BatchTaskInfoDTO startFileSourceBatchDownloadTask(String username, Long appId, L BatchTaskStatusDTO getBatchTaskStatusAndLogs(String batchTaskId, Long logStart, Long logLength); - Integer clearBatchTaskFiles(List batchTaskIdList); } diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/BatchTaskServiceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/BatchTaskServiceImpl.java index eaa4166070..e12af5bec2 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/BatchTaskServiceImpl.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/BatchTaskServiceImpl.java @@ -64,7 +64,9 @@ public BatchTaskServiceImpl(FileSourceTaskService fileSourceTaskService, } @Override - public BatchTaskInfoDTO startFileSourceBatchDownloadTask(String username, Long appId, Long stepInstanceId, + public BatchTaskInfoDTO startFileSourceBatchDownloadTask(String username, + Long appId, + Long stepInstanceId, Integer executeCount, List fileSourceTaskList) { BatchTaskInfoDTO batchTaskInfoDTO = new BatchTaskInfoDTO(); @@ -79,9 +81,15 @@ public BatchTaskInfoDTO startFileSourceBatchDownloadTask(String username, Long a batchTaskInfoDTO.setBatchTaskId(batchTaskId); List taskInfoDTOList = new ArrayList<>(); for (FileSourceTaskContent fileSourceTaskContent : fileSourceTaskList) { - TaskInfoDTO taskInfoDTO = fileSourceTaskService.startFileSourceDownloadTask(username, appId, - stepInstanceId, executeCount, batchTaskId, fileSourceTaskContent.getFileSourceId(), - fileSourceTaskContent.getFilePathList()); + TaskInfoDTO taskInfoDTO = fileSourceTaskService.startFileSourceDownloadTask( + username, + appId, + stepInstanceId, + executeCount, + batchTaskId, + fileSourceTaskContent.getFileSourceId(), + fileSourceTaskContent.getFilePathList() + ); taskInfoDTOList.add(taskInfoDTO); } batchTaskInfoDTO.setTaskInfoList(taskInfoDTOList); @@ -123,10 +131,4 @@ public BatchTaskStatusDTO getBatchTaskStatusAndLogs(String batchTaskId, Long log batchTaskStatusDTO.setFileSourceTaskStatusInfoList(fileSourceTaskStatusInfoList); return batchTaskStatusDTO; } - - @Override - public Integer clearBatchTaskFiles(List batchTaskIdList) { - List fileSourceTaskIdList = getFileSourceTaskIdListByBatch(batchTaskIdList); - return fileSourceTaskService.clearTaskFiles(fileSourceTaskIdList); - } } From 48c7fcd2ee713c3b12b0b7362d78641a9e3a67ce Mon Sep 17 00:00:00 2001 From: jsonwan Date: Fri, 31 May 2024 20:44:34 +0800 Subject: [PATCH 02/24] =?UTF-8?q?perf:=20=E7=AC=AC=E4=B8=89=E6=96=B9?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E6=BA=90=E4=BB=BB=E5=8A=A1=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E6=97=A0=E6=8D=9F=E6=9B=B4=E6=96=B0=20#3017?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1.启动任务过程添加事务保证; 2.针对UnknownHostException导致的任务启动失败进行重试。 --- .../service/FileSourceTaskService.java | 20 +++- .../RetryPolicyFileSourceTaskService.java | 39 ++++++++ .../impl/FileSourceTaskRetryContext.java | 37 +++++++ .../impl/FileSourceTaskServiceImpl.java | 23 ++++- .../RetryPolicyFileSourceTaskServiceImpl.java | 96 +++++++++++++++++++ .../retry/FileSourceTaskRetryPolicy.java | 34 +++++++ .../impl/UnknownHostExceptionRetryPolicy.java | 76 +++++++++++++++ 7 files changed, 315 insertions(+), 10 deletions(-) create mode 100644 src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/RetryPolicyFileSourceTaskService.java create mode 100644 src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/context/impl/FileSourceTaskRetryContext.java create mode 100644 src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/RetryPolicyFileSourceTaskServiceImpl.java create mode 100644 src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/retry/FileSourceTaskRetryPolicy.java create mode 100644 src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/retry/impl/UnknownHostExceptionRetryPolicy.java diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/FileSourceTaskService.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/FileSourceTaskService.java index 130629b177..1e7143f49a 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/FileSourceTaskService.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/FileSourceTaskService.java @@ -32,12 +32,22 @@ import java.util.List; public interface FileSourceTaskService { - TaskInfoDTO startFileSourceDownloadTask(String username, Long appId, Long stepInstanceId, Integer executeCount, - String batchTaskId, Integer fileSourceId, List filePathList); + TaskInfoDTO startFileSourceDownloadTask(String username, + Long appId, + Long stepInstanceId, + Integer executeCount, + String batchTaskId, + Integer fileSourceId, + List filePathList); - TaskInfoDTO startFileSourceDownloadTaskWithId(String username, Long appId, Long stepInstanceId, - Integer executeCount, String batchTaskId, Integer fileSourceId, - List filePathList, String fileSourceTaskId); + TaskInfoDTO startFileSourceDownloadTaskWithId(String username, + Long appId, + Long stepInstanceId, + Integer executeCount, + String batchTaskId, + Integer fileSourceId, + List filePathList, + String fileSourceTaskId); String updateFileSourceTask(FileTaskProgressDTO fileTaskProgressDTO); diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/RetryPolicyFileSourceTaskService.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/RetryPolicyFileSourceTaskService.java new file mode 100644 index 0000000000..e7a8888e8c --- /dev/null +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/RetryPolicyFileSourceTaskService.java @@ -0,0 +1,39 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file_gateway.service; + +import com.tencent.bk.job.file_gateway.model.resp.inner.TaskInfoDTO; + +import java.util.List; + +public interface RetryPolicyFileSourceTaskService { + TaskInfoDTO startFileSourceDownloadTask(String username, + Long appId, + Long stepInstanceId, + Integer executeCount, + String batchTaskId, + Integer fileSourceId, + List filePathList); +} diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/context/impl/FileSourceTaskRetryContext.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/context/impl/FileSourceTaskRetryContext.java new file mode 100644 index 0000000000..7591af725c --- /dev/null +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/context/impl/FileSourceTaskRetryContext.java @@ -0,0 +1,37 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file_gateway.service.context.impl; + +import lombok.AllArgsConstructor; +import lombok.Data; + +@Data +@AllArgsConstructor +public class FileSourceTaskRetryContext { + /** + * 重试前业务逻辑代码抛出的异常 + */ + private Exception exception; +} diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileSourceTaskServiceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileSourceTaskServiceImpl.java index 16d7ed72b3..8e74c2ce32 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileSourceTaskServiceImpl.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileSourceTaskServiceImpl.java @@ -103,17 +103,30 @@ public FileSourceTaskServiceImpl(FileSourceTaskUpdateService fileSourceTaskUpdat } @Override + @JobTransactional(transactionManager = "jobFileGatewayTransactionManager") public TaskInfoDTO startFileSourceDownloadTask(String username, Long appId, Long stepInstanceId, Integer executeCount, String batchTaskId, Integer fileSourceId, List filePathList) { - return startFileSourceDownloadTaskWithId(username, appId, stepInstanceId, executeCount, batchTaskId, - fileSourceId, filePathList, null); + return startFileSourceDownloadTaskWithId( + username, + appId, + stepInstanceId, + executeCount, + batchTaskId, + fileSourceId, + filePathList, + null + ); } @JobTransactional(transactionManager = "jobFileGatewayTransactionManager") - public TaskInfoDTO startFileSourceDownloadTaskWithId(String username, Long appId, Long stepInstanceId, - Integer executeCount, String batchTaskId, - Integer fileSourceId, List filePathList, + public TaskInfoDTO startFileSourceDownloadTaskWithId(String username, + Long appId, + Long stepInstanceId, + Integer executeCount, + String batchTaskId, + Integer fileSourceId, + List filePathList, String fileSourceTaskId) { log.info("Input=({},{},{},{},{},{},{})", username, appId, stepInstanceId, executeCount, batchTaskId, fileSourceId, filePathList); diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/RetryPolicyFileSourceTaskServiceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/RetryPolicyFileSourceTaskServiceImpl.java new file mode 100644 index 0000000000..1c1222a013 --- /dev/null +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/RetryPolicyFileSourceTaskServiceImpl.java @@ -0,0 +1,96 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file_gateway.service.impl; + +import com.tencent.bk.job.file_gateway.model.resp.inner.TaskInfoDTO; +import com.tencent.bk.job.file_gateway.service.FileSourceTaskService; +import com.tencent.bk.job.file_gateway.service.RetryPolicyFileSourceTaskService; +import com.tencent.bk.job.file_gateway.service.context.impl.FileSourceTaskRetryContext; +import com.tencent.bk.job.file_gateway.service.retry.FileSourceTaskRetryPolicy; +import com.tencent.bk.job.file_gateway.service.retry.impl.UnknownHostExceptionRetryPolicy; +import lombok.extern.slf4j.Slf4j; +import org.slf4j.helpers.MessageFormatter; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import java.util.List; + +@Slf4j +@Service +public class RetryPolicyFileSourceTaskServiceImpl implements RetryPolicyFileSourceTaskService { + + private final FileSourceTaskService fileSourceTaskService; + private final FileSourceTaskRetryPolicy retryPolicy = new UnknownHostExceptionRetryPolicy(3, 5000); + + @Autowired + public RetryPolicyFileSourceTaskServiceImpl(FileSourceTaskService fileSourceTaskService) { + this.fileSourceTaskService = fileSourceTaskService; + } + + @Override + public TaskInfoDTO startFileSourceDownloadTask(String username, + Long appId, + Long stepInstanceId, + Integer executeCount, + String batchTaskId, + Integer fileSourceId, + List filePathList) { + int retryCount = 0; + boolean shouldRetry; + do { + try { + return fileSourceTaskService.startFileSourceDownloadTaskWithId( + username, + appId, + stepInstanceId, + executeCount, + batchTaskId, + fileSourceId, + filePathList, + null + ); + } catch (Exception e) { + retryCount += 1; + FileSourceTaskRetryContext retryContext = new FileSourceTaskRetryContext(e); + shouldRetry = retryPolicy.shouldRetry(retryContext, retryCount); + if (shouldRetry) { + String msg = MessageFormatter.arrayFormat( + "Fail to startFileSourceDownloadTask, stepInstanceId={}, " + + "executeCount={}, batchTaskId={}, retry {}", + new Object[]{ + stepInstanceId, + executeCount, + batchTaskId, + retryCount + } + ).getMessage(); + log.warn(msg, e); + } else { + throw e; + } + } + } while (true); + } +} diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/retry/FileSourceTaskRetryPolicy.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/retry/FileSourceTaskRetryPolicy.java new file mode 100644 index 0000000000..df37fcb9f5 --- /dev/null +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/retry/FileSourceTaskRetryPolicy.java @@ -0,0 +1,34 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file_gateway.service.retry; + +import com.tencent.bk.job.file_gateway.service.context.impl.FileSourceTaskRetryContext; + +/** + * 第三方文件源任务重试策略接口 + */ +public interface FileSourceTaskRetryPolicy { + boolean shouldRetry(FileSourceTaskRetryContext context, int retryCount); +} diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/retry/impl/UnknownHostExceptionRetryPolicy.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/retry/impl/UnknownHostExceptionRetryPolicy.java new file mode 100644 index 0000000000..a742025394 --- /dev/null +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/retry/impl/UnknownHostExceptionRetryPolicy.java @@ -0,0 +1,76 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file_gateway.service.retry.impl; + +import com.tencent.bk.job.common.exception.InternalException; +import com.tencent.bk.job.common.util.ThreadUtils; +import com.tencent.bk.job.file_gateway.service.context.impl.FileSourceTaskRetryContext; +import com.tencent.bk.job.file_gateway.service.retry.FileSourceTaskRetryPolicy; +import org.springframework.web.client.ResourceAccessException; + +import java.net.UnknownHostException; + +/** + * 针对由于UnknownHostException异常导致的任务启动失败重试策略 + */ +public class UnknownHostExceptionRetryPolicy implements FileSourceTaskRetryPolicy { + + /** + * 最大重试次数 + */ + private final int maxRetryCount; + /** + * 重试前应当休眠的毫秒数 + */ + private final int sleepMillsBeforeRetryCount; + + public UnknownHostExceptionRetryPolicy(int maxRetryCount, int sleepMillsBeforeRetryCount) { + this.maxRetryCount = maxRetryCount; + this.sleepMillsBeforeRetryCount = sleepMillsBeforeRetryCount; + } + + @Override + public boolean shouldRetry(FileSourceTaskRetryContext context, int retryCount) { + // 超出重试次数后不再重试 + if (retryCount > maxRetryCount) { + return false; + } + // 各层次异常检查 + Exception exception = context.getException(); + if (!(exception instanceof InternalException)) { + return false; + } + Throwable cause = exception.getCause(); + if (!(cause instanceof ResourceAccessException)) { + return false; + } + Throwable innerCause = exception.getCause(); + if (innerCause instanceof UnknownHostException) { + ThreadUtils.sleep(sleepMillsBeforeRetryCount); + return true; + } + return false; + } +} From 57577638bd5b96923fc952b771f26b4e19e67d65 Mon Sep 17 00:00:00 2001 From: jsonwan Date: Tue, 4 Jun 2024 21:50:03 +0800 Subject: [PATCH 03/24] =?UTF-8?q?perf:=20=E7=AC=AC=E4=B8=89=E6=96=B9?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E6=BA=90=E4=BB=BB=E5=8A=A1=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E6=97=A0=E6=8D=9F=E6=9B=B4=E6=96=B0=20#3017?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 下线时等待最后一次心跳结束再下线 --- .../bk/job/file/worker/service/OpService.java | 21 +++++--- .../worker/task/heartbeat/HeartBeatTask.java | 52 +++++++++++++++++-- 2 files changed, 61 insertions(+), 12 deletions(-) diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/service/OpService.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/service/OpService.java index deb88f83c0..7be3501fd3 100644 --- a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/service/OpService.java +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/service/OpService.java @@ -53,22 +53,27 @@ public class OpService { private final GatewayInfoService gatewayInfoService; private final EnvironmentService environmentService; private final TaskReporter taskReporter; + private final HeartBeatTask heartBeatTask; @Autowired - public OpService(WorkerConfig workerConfig, FileTaskService fileTaskService, - GatewayInfoService gatewayInfoService, EnvironmentService environmentService, - TaskReporter taskReporter) { + public OpService(WorkerConfig workerConfig, + FileTaskService fileTaskService, + GatewayInfoService gatewayInfoService, + EnvironmentService environmentService, + TaskReporter taskReporter, + HeartBeatTask heartBeatTask) { this.workerConfig = workerConfig; this.fileTaskService = fileTaskService; this.gatewayInfoService = gatewayInfoService; this.environmentService = environmentService; this.taskReporter = taskReporter; + this.heartBeatTask = heartBeatTask; } public List offLine() { List runningTaskIdList = fileTaskService.getAllTaskIdList(); // 停止心跳 - HeartBeatTask.stopHeartBeat(); + heartBeatTask.stopAndWaitLastHeartBeatFinish(); // 调网关接口下线自己 String url = gatewayInfoService.getWorkerOffLineUrl(); OffLineAndReDispatchReq offLineReq = new OffLineAndReDispatchReq(); @@ -84,10 +89,10 @@ public List offLine() { String respStr; try { respStr = httpHelper.requestForSuccessResp( - HttpRequest.builder(HttpMethodEnum.POST, url) - .setStringEntity(req.getBody()) - .setHeaders(req.getHeaders()) - .build()) + HttpRequest.builder(HttpMethodEnum.POST, url) + .setStringEntity(req.getBody()) + .setHeaders(req.getHeaders()) + .build()) .getEntity(); log.info(String.format("respStr=%s", respStr)); // 停止任务 diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/task/heartbeat/HeartBeatTask.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/task/heartbeat/HeartBeatTask.java index 472b911a20..407e7775e4 100644 --- a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/task/heartbeat/HeartBeatTask.java +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/task/heartbeat/HeartBeatTask.java @@ -25,6 +25,7 @@ package com.tencent.bk.job.file.worker.task.heartbeat; import com.tencent.bk.job.common.model.http.HttpReq; +import com.tencent.bk.job.common.util.ThreadUtils; import com.tencent.bk.job.common.util.http.HttpReqGenUtil; import com.tencent.bk.job.common.util.http.JobHttpClient; import com.tencent.bk.job.common.util.json.JsonUtils; @@ -36,6 +37,7 @@ import com.tencent.bk.job.file_gateway.model.req.inner.HeartBeatReq; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.tuple.Pair; +import org.slf4j.helpers.MessageFormatter; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; @@ -45,7 +47,8 @@ @Service public class HeartBeatTask { - public static volatile boolean runFlag = true; + public volatile boolean shouldRun = true; + public volatile boolean running = false; private final JobHttpClient jobHttpClient; private final WorkerConfig workerConfig; @@ -66,8 +69,38 @@ public HeartBeatTask(JobHttpClient jobHttpClient, this.environmentService = environmentService; } - public static void stopHeartBeat() { - runFlag = false; + /** + * 停止心跳并等待最后一次心跳结束,防止下线后心跳请求再次将file-worker状态更新为在线 + */ + public void stopAndWaitLastHeartBeatFinish() { + shouldRun = false; + if (!running) { + return; + } + waitUntilNotRunning(30); + } + + @SuppressWarnings("SameParameterValue") + private void waitUntilNotRunning(int maxSeconds) { + long waitStartTimeMills = System.currentTimeMillis(); + boolean shouldWait; + do { + ThreadUtils.sleep(100); + long durationMills = System.currentTimeMillis() - waitStartTimeMills; + if (!running) { + String msg = MessageFormatter.format( + "Waited {}ms for last heartBeat finish", + durationMills + ).getMessage(); + if (durationMills >= 15000) { + log.warn(msg); + } else { + log.debug(msg); + } + return; + } + shouldWait = durationMills < maxSeconds * 1000L; + } while (shouldWait); } private HeartBeatReq getHeartBeatReq() { @@ -102,10 +135,21 @@ private HeartBeatReq getHeartBeatReq() { } public void run() { - if (!runFlag) { + if (!shouldRun) { log.info("HeartBeat closed, ignore"); return; } + try { + running = true; + doHeartBeat(); + } catch (Exception e) { + log.warn("Fail to doHeartBeat", e); + } finally { + running = false; + } + } + + private void doHeartBeat() { String url = gatewayInfoService.getHeartBeatUrl(); HeartBeatReq heartBeatReq = getHeartBeatReq(); log.info("HeartBeat: url={},body={}", url, JsonUtils.toJsonWithoutSkippedFields(heartBeatReq)); From afcedcec37882503252019beaf82b7b22036a7a4 Mon Sep 17 00:00:00 2001 From: jsonwan Date: Wed, 5 Jun 2024 11:03:50 +0800 Subject: [PATCH 04/24] =?UTF-8?q?perf:=20=E7=AC=AC=E4=B8=89=E6=96=B9?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E6=BA=90=E4=BB=BB=E5=8A=A1=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E6=97=A0=E6=8D=9F=E6=9B=B4=E6=96=B0=20#3017?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 应用重试策略 --- .../job/file_gateway/service/impl/BatchTaskServiceImpl.java | 6 +++++- .../service/impl/RetryPolicyFileSourceTaskServiceImpl.java | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/BatchTaskServiceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/BatchTaskServiceImpl.java index e12af5bec2..15dce4f51f 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/BatchTaskServiceImpl.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/BatchTaskServiceImpl.java @@ -38,6 +38,7 @@ import com.tencent.bk.job.file_gateway.model.resp.inner.TaskInfoDTO; import com.tencent.bk.job.file_gateway.service.BatchTaskService; import com.tencent.bk.job.file_gateway.service.FileSourceTaskService; +import com.tencent.bk.job.file_gateway.service.RetryPolicyFileSourceTaskService; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; @@ -51,14 +52,17 @@ public class BatchTaskServiceImpl implements BatchTaskService { private final FileSourceTaskService fileSourceTaskService; + private final RetryPolicyFileSourceTaskService retryPolicyFileSourceTaskService; private final FileSourceBatchTaskDAO fileSourceBatchTaskDAO; private final FileSourceTaskDAO fileSourceTaskDAO; @Autowired public BatchTaskServiceImpl(FileSourceTaskService fileSourceTaskService, + RetryPolicyFileSourceTaskService retryPolicyFileSourceTaskService, FileSourceBatchTaskDAO fileSourceBatchTaskDAO, FileSourceTaskDAO fileSourceTaskDAO) { this.fileSourceTaskService = fileSourceTaskService; + this.retryPolicyFileSourceTaskService = retryPolicyFileSourceTaskService; this.fileSourceBatchTaskDAO = fileSourceBatchTaskDAO; this.fileSourceTaskDAO = fileSourceTaskDAO; } @@ -81,7 +85,7 @@ public BatchTaskInfoDTO startFileSourceBatchDownloadTask(String username, batchTaskInfoDTO.setBatchTaskId(batchTaskId); List taskInfoDTOList = new ArrayList<>(); for (FileSourceTaskContent fileSourceTaskContent : fileSourceTaskList) { - TaskInfoDTO taskInfoDTO = fileSourceTaskService.startFileSourceDownloadTask( + TaskInfoDTO taskInfoDTO = retryPolicyFileSourceTaskService.startFileSourceDownloadTask( username, appId, stepInstanceId, diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/RetryPolicyFileSourceTaskServiceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/RetryPolicyFileSourceTaskServiceImpl.java index 1c1222a013..bbbb084bee 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/RetryPolicyFileSourceTaskServiceImpl.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/RetryPolicyFileSourceTaskServiceImpl.java @@ -86,7 +86,7 @@ public TaskInfoDTO startFileSourceDownloadTask(String username, retryCount } ).getMessage(); - log.warn(msg, e); + log.info(msg, e); } else { throw e; } From c5fac00eeb0d5a61d7484af5203ad38ac37a9c39 Mon Sep 17 00:00:00 2001 From: jsonwan Date: Wed, 5 Jun 2024 11:57:16 +0800 Subject: [PATCH 05/24] =?UTF-8?q?perf:=20=E7=AC=AC=E4=B8=89=E6=96=B9?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E6=BA=90=E4=BB=BB=E5=8A=A1=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E6=97=A0=E6=8D=9F=E6=9B=B4=E6=96=B0=20#3017?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 应用重试策略 --- .../service/retry/impl/UnknownHostExceptionRetryPolicy.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/retry/impl/UnknownHostExceptionRetryPolicy.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/retry/impl/UnknownHostExceptionRetryPolicy.java index a742025394..b08d248e29 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/retry/impl/UnknownHostExceptionRetryPolicy.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/retry/impl/UnknownHostExceptionRetryPolicy.java @@ -66,7 +66,7 @@ public boolean shouldRetry(FileSourceTaskRetryContext context, int retryCount) { if (!(cause instanceof ResourceAccessException)) { return false; } - Throwable innerCause = exception.getCause(); + Throwable innerCause = cause.getCause(); if (innerCause instanceof UnknownHostException) { ThreadUtils.sleep(sleepMillsBeforeRetryCount); return true; From b4f5f03296f6720451575423ad94dfa1b201f3bc Mon Sep 17 00:00:00 2001 From: hLinx <327159425@qq.com> Date: Thu, 6 Jun 2024 15:14:35 +0800 Subject: [PATCH 06/24] =?UTF-8?q?fix:=20=E4=B8=9A=E5=8A=A1=E9=9B=86?= =?UTF-8?q?=E6=89=A7=E8=A1=8C=E5=AF=B9=E8=B1=A1=E9=80=89=E6=8B=A9=E5=99=A8?= =?UTF-8?q?=EF=BC=8C=E9=80=89=E6=8B=A9=E5=AE=B9=E5=99=A8=E6=8B=93=E6=89=91?= =?UTF-8?q?=E6=8A=A5=E9=94=99=20#2996?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/components/task-step/common/execute-target/index.vue | 2 +- .../task-step/common/source-file/view/server/index.vue | 2 +- .../task-step/common/source-file/view/server/only-host.vue | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/frontend/src/components/task-step/common/execute-target/index.vue b/src/frontend/src/components/task-step/common/execute-target/index.vue index 5e3ccebed2..b71280d9d7 100644 --- a/src/frontend/src/components/task-step/common/execute-target/index.vue +++ b/src/frontend/src/components/task-step/common/execute-target/index.vue @@ -292,7 +292,7 @@ }, created() { this.ipSelectorConfig = {}; - if (this.from === 'execute') { + if (this.from === 'execute' && window.PROJECT_CONFIG.SCOPE_TYPE !== 'biz_set') { this.ipSelectorConfig = { panelList: [ 'staticTopo', diff --git a/src/frontend/src/components/task-step/common/source-file/view/server/index.vue b/src/frontend/src/components/task-step/common/source-file/view/server/index.vue index 27b3db5103..1542cf1a86 100644 --- a/src/frontend/src/components/task-step/common/source-file/view/server/index.vue +++ b/src/frontend/src/components/task-step/common/source-file/view/server/index.vue @@ -214,7 +214,7 @@ }, created() { this.ipSelectorConfig = {}; - if (this.from === 'execute') { + if (this.from === 'execute' && window.PROJECT_CONFIG.SCOPE_TYPE !== 'biz_set') { this.ipSelectorConfig = { panelList: [ 'staticTopo', diff --git a/src/frontend/src/components/task-step/common/source-file/view/server/only-host.vue b/src/frontend/src/components/task-step/common/source-file/view/server/only-host.vue index 8c8ef3913b..3080f70386 100644 --- a/src/frontend/src/components/task-step/common/source-file/view/server/only-host.vue +++ b/src/frontend/src/components/task-step/common/source-file/view/server/only-host.vue @@ -149,7 +149,7 @@ }, created() { this.ipSelectorConfig = {}; - if (this.from === 'execute') { + if (this.from === 'execute' && window.PROJECT_CONFIG.SCOPE_TYPE !== 'biz_set') { this.ipSelectorConfig = { panelList: [ 'staticTopo', From 6eefe9a35d77119bc6c6307daf3bd029a0901388 Mon Sep 17 00:00:00 2001 From: jsonwan Date: Thu, 6 Jun 2024 18:15:02 +0800 Subject: [PATCH 07/24] =?UTF-8?q?perf:=20=E7=AC=AC=E4=B8=89=E6=96=B9?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E6=BA=90=E4=BB=BB=E5=8A=A1=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E6=97=A0=E6=8D=9F=E6=9B=B4=E6=96=B0=20#3017?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1.重调度过程开启事务; 2.添加重调度相关度量指标。 --- .../model/resp/inner/TaskInfoDTO.java | 2 + .../file_gateway/api/op/OpResourceImpl.java | 4 +- .../remote/RemoteFileWorkerResourceImpl.java | 2 +- .../metrics/MetricsConstants.java | 4 + .../{ => dispatch}/DispatchService.java | 2 +- .../{ => dispatch}/ReDispatchService.java | 4 +- .../dispatch/ReDispatchTaskService.java | 31 +++ .../impl/DispatchServiceImpl.java | 5 +- .../dispatch/impl/ReDispatchServiceImpl.java | 144 ++++++++++ .../service/dispatch/impl/ReDispatchTask.java | 106 ++++++++ .../impl/ReDispatchTaskServiceImpl.java | 142 ++++++++++ .../service/impl/FileServiceImpl.java | 2 +- .../impl/FileSourceTaskServiceImpl.java | 4 +- .../service/impl/ReDispatchServiceImpl.java | 245 ------------------ .../task/dispatch/ReDispatchTimeoutTask.java | 2 +- .../FileSourceStatusUpdateTask.java | 2 +- 16 files changed, 444 insertions(+), 257 deletions(-) rename src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/{ => dispatch}/DispatchService.java (97%) rename src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/{ => dispatch}/ReDispatchService.java (94%) create mode 100644 src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/ReDispatchTaskService.java rename src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/{ => dispatch}/impl/DispatchServiceImpl.java (98%) create mode 100644 src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/ReDispatchServiceImpl.java create mode 100644 src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/ReDispatchTask.java create mode 100644 src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/ReDispatchTaskServiceImpl.java delete mode 100644 src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/ReDispatchServiceImpl.java diff --git a/src/backend/job-file-gateway/api-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/model/resp/inner/TaskInfoDTO.java b/src/backend/job-file-gateway/api-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/model/resp/inner/TaskInfoDTO.java index 37d25b15f1..ca824fbf94 100644 --- a/src/backend/job-file-gateway/api-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/model/resp/inner/TaskInfoDTO.java +++ b/src/backend/job-file-gateway/api-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/model/resp/inner/TaskInfoDTO.java @@ -35,6 +35,8 @@ public class TaskInfoDTO { String taskId; String fileSourceName; boolean fileSourcePublic; + Long workerId; + String workerAccessHost; Long cloudId; String ipProtocol; String ip; diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/op/OpResourceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/op/OpResourceImpl.java index 7d6c75e8b3..c0b42fce86 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/op/OpResourceImpl.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/op/OpResourceImpl.java @@ -25,7 +25,7 @@ package com.tencent.bk.job.file_gateway.api.op; import com.tencent.bk.job.common.model.Response; -import com.tencent.bk.job.file_gateway.service.ReDispatchService; +import com.tencent.bk.job.file_gateway.service.dispatch.ReDispatchService; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.web.bind.annotation.RestController; @@ -44,6 +44,6 @@ public OpResourceImpl(ReDispatchService reDispatchService) { @Override public Response getReDispatchThreadsNum(String username) { - return Response.buildSuccessResp(reDispatchService.getReDispatchThreadsNum(username)); + return Response.buildSuccessResp(reDispatchService.getReDispatchThreadsNum()); } } diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/remote/RemoteFileWorkerResourceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/remote/RemoteFileWorkerResourceImpl.java index ffc6dc7705..43076f374c 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/remote/RemoteFileWorkerResourceImpl.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/remote/RemoteFileWorkerResourceImpl.java @@ -32,7 +32,7 @@ import com.tencent.bk.job.file_gateway.model.req.inner.UpdateFileSourceTaskReq; import com.tencent.bk.job.file_gateway.service.FileSourceTaskService; import com.tencent.bk.job.file_gateway.service.FileWorkerService; -import com.tencent.bk.job.file_gateway.service.ReDispatchService; +import com.tencent.bk.job.file_gateway.service.dispatch.ReDispatchService; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.web.bind.annotation.RestController; diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/metrics/MetricsConstants.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/metrics/MetricsConstants.java index 94ee7053c7..7061a7d9cf 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/metrics/MetricsConstants.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/metrics/MetricsConstants.java @@ -31,16 +31,20 @@ public class MetricsConstants { public static final String NAME_FILE_WORKER_ONLINE_NUM = "fileWorker.online.num"; public static final String NAME_FILE_WORKER_RESPONSE_TIME = "fileWorker.response.time"; public static final String NAME_FILE_GATEWAY_DISPATCH_TIME = "fileGateway.dispatch.time"; + public static final String NAME_FILE_GATEWAY_REDISPATCH_TIME = "fileGateway.reDispatch.time"; // tag public static final String TAG_KEY_MODULE = "module"; public static final String TAG_KEY_REQUEST_SOURCE = "requestSource"; public static final String TAG_KEY_DISPATCH_RESULT = "dispatchResult"; + public static final String TAG_KEY_APP_ID = "appId"; // value public static final String TAG_VALUE_MODULE_FILE_WORKER = "fileWorker"; public static final String TAG_VALUE_MODULE_FILE_GATEWAY = "fileGateway"; public static final String TAG_VALUE_DISPATCH_RESULT_TRUE = "true"; public static final String TAG_VALUE_DISPATCH_RESULT_FALSE = "false"; + public static final String TAG_VALUE_REDISPATCH_STATUS_SUCCESS = "success"; + public static final String TAG_VALUE_REDISPATCH_STATUS_ERROR = "error"; } diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/DispatchService.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/DispatchService.java similarity index 97% rename from src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/DispatchService.java rename to src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/DispatchService.java index 32306d8104..fc2192aaa9 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/DispatchService.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/DispatchService.java @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ -package com.tencent.bk.job.file_gateway.service; +package com.tencent.bk.job.file_gateway.service.dispatch; import com.tencent.bk.job.file_gateway.model.dto.FileSourceDTO; import com.tencent.bk.job.file_gateway.model.dto.FileWorkerDTO; diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/ReDispatchService.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/ReDispatchService.java similarity index 94% rename from src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/ReDispatchService.java rename to src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/ReDispatchService.java index e96ef1bb91..71736c8983 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/ReDispatchService.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/ReDispatchService.java @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ -package com.tencent.bk.job.file_gateway.service; +package com.tencent.bk.job.file_gateway.service.dispatch; import java.util.List; @@ -39,6 +39,6 @@ List reDispatchByWorker( boolean reDispatchByGateway(String fileSourceTaskId, Long initDelayMills, Long intervalMills); - Integer getReDispatchThreadsNum(String username); + Integer getReDispatchThreadsNum(); } diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/ReDispatchTaskService.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/ReDispatchTaskService.java new file mode 100644 index 0000000000..ac52f5ca40 --- /dev/null +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/ReDispatchTaskService.java @@ -0,0 +1,31 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file_gateway.service.dispatch; + +import com.tencent.bk.job.file_gateway.model.resp.inner.TaskInfoDTO; + +public interface ReDispatchTaskService { + TaskInfoDTO reDispatchFileSourceTask(String fileSourceTaskId); +} diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/DispatchServiceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/DispatchServiceImpl.java similarity index 98% rename from src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/DispatchServiceImpl.java rename to src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/DispatchServiceImpl.java index 45cbced508..55e5f607d3 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/DispatchServiceImpl.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/DispatchServiceImpl.java @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ -package com.tencent.bk.job.file_gateway.service.impl; +package com.tencent.bk.job.file_gateway.service.dispatch.impl; import com.tencent.bk.job.common.util.json.JsonUtils; import com.tencent.bk.job.file_gateway.consts.WorkerSelectModeEnum; @@ -32,8 +32,9 @@ import com.tencent.bk.job.file_gateway.model.dto.FileSourceDTO; import com.tencent.bk.job.file_gateway.model.dto.FileWorkerDTO; import com.tencent.bk.job.file_gateway.service.AbilityTagService; -import com.tencent.bk.job.file_gateway.service.DispatchService; +import com.tencent.bk.job.file_gateway.service.dispatch.DispatchService; import com.tencent.bk.job.file_gateway.service.FileWorkerService; +import com.tencent.bk.job.file_gateway.service.impl.WorkerIdsCondition; import io.micrometer.core.instrument.MeterRegistry; import io.micrometer.core.instrument.Tag; import io.micrometer.core.instrument.Timer; diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/ReDispatchServiceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/ReDispatchServiceImpl.java new file mode 100644 index 0000000000..c1ad69716e --- /dev/null +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/ReDispatchServiceImpl.java @@ -0,0 +1,144 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file_gateway.service.dispatch.impl; + +import com.tencent.bk.job.common.constant.ErrorCode; +import com.tencent.bk.job.common.exception.InternalException; +import com.tencent.bk.job.file_gateway.model.dto.FileSourceTaskDTO; +import com.tencent.bk.job.file_gateway.model.dto.FileWorkerDTO; +import com.tencent.bk.job.file_gateway.service.FileSourceTaskService; +import com.tencent.bk.job.file_gateway.service.FileWorkerService; +import com.tencent.bk.job.file_gateway.service.dispatch.ReDispatchService; +import com.tencent.bk.job.file_gateway.service.dispatch.ReDispatchTaskService; +import lombok.extern.slf4j.Slf4j; +import org.slf4j.helpers.FormattingTuple; +import org.slf4j.helpers.MessageFormatter; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import java.util.Collections; +import java.util.List; +import java.util.Timer; + +@Slf4j +@Service +public class ReDispatchServiceImpl implements ReDispatchService { + + private final FileWorkerService fileWorkerService; + private final FileSourceTaskService fileSourceTaskService; + private final ReDispatchTaskService reDispatchTaskService; + // 最多使用50线程进行重调度 + private final int MAX_THREAD_NUM_REDISPATCH = 50; + + @Autowired + public ReDispatchServiceImpl( + FileWorkerService fileWorkerService, + FileSourceTaskService fileSourceTaskService, + ReDispatchTaskService reDispatchTaskService + ) { + this.fileWorkerService = fileWorkerService; + this.fileSourceTaskService = fileSourceTaskService; + this.reDispatchTaskService = reDispatchTaskService; + } + + @Override + public List reDispatchByWorker( + String accessHost, + Integer accessPort, + List taskIdList, + Long initDelayMills, + Long intervalMills + ) { + FileWorkerDTO fileWorkerDTO = fileWorkerService.getFileWorker(accessHost, accessPort); + if (fileWorkerDTO == null) { + FormattingTuple msg = MessageFormatter.format( + "Fail to find file-worker by accessHost:{} accessPort:{}", accessHost, accessPort + ); + log.warn(msg.getMessage()); + throw new InternalException( + ErrorCode.FILE_WORKER_NOT_FOUND, + new String[]{ + "accessHost:" + accessHost + ",accessPort:" + accessPort, + } + ); + } + Long workerId = fileWorkerDTO.getId(); + log.debug("worker {} apply to reDispatch tasks:{}, initDelayMills={}, intervalMills={}", workerId, taskIdList + , initDelayMills, intervalMills); + // 1.立即下线Worker + int affectedWorkerNum = fileWorkerService.offLine(workerId); + log.info("{} worker state changed to offline", affectedWorkerNum); + // 2.任务延时重调度 + for (String taskId : taskIdList) { + if (getReDispatchThreadsNum() >= MAX_THREAD_NUM_REDISPATCH) { + log.warn("reDispatch thread reach MAX_NUM:{}, do not reDispatch {}", MAX_THREAD_NUM_REDISPATCH, taskId); + continue; + } + Timer timer = new Timer(); + ReDispatchTask reDispatchTask = buildReDispatchTask(taskId, intervalMills); + timer.schedule(reDispatchTask, initDelayMills); + } + return taskIdList; + } + + @Override + public boolean reDispatchByGateway(String fileSourceTaskId, Long initDelayMills, Long intervalMills) { + // 1.尝试通知Worker主动取消该任务 + FileSourceTaskDTO fileSourceTaskDTO = fileSourceTaskService.getFileSourceTaskById(fileSourceTaskId); + if (fileSourceTaskDTO == null) { + log.warn("task not exist, ignore, id={}", fileSourceTaskId); + return false; + } + try { + fileSourceTaskService.recallTasks(Collections.singletonList(fileSourceTaskId)); + } catch (Throwable t) { + log.warn("Fail to recallTask:{}", fileSourceTaskId, t); + } + // 2.重调度 + if (getReDispatchThreadsNum() >= MAX_THREAD_NUM_REDISPATCH) { + log.warn("reDispatch thread reach MAX_NUM:{}, do not reDispatch {}", MAX_THREAD_NUM_REDISPATCH, + fileSourceTaskId); + return false; + } + Timer timer = new Timer(); + ReDispatchTask reDispatchTask = buildReDispatchTask(fileSourceTaskId, intervalMills); + timer.schedule(reDispatchTask, initDelayMills); + return true; + } + + private ReDispatchTask buildReDispatchTask(String fileSourceTaskId, Long intervalMills) { + return new ReDispatchTask( + reDispatchTaskService, + fileSourceTaskId, + intervalMills + ); + } + + @Override + public Integer getReDispatchThreadsNum() { + return ReDispatchTask.getReDispatchThreadsNum(); + } + +} diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/ReDispatchTask.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/ReDispatchTask.java new file mode 100644 index 0000000000..7dedfc13ec --- /dev/null +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/ReDispatchTask.java @@ -0,0 +1,106 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file_gateway.service.dispatch.impl; + +import com.tencent.bk.job.common.util.ThreadUtils; +import com.tencent.bk.job.file_gateway.model.resp.inner.TaskInfoDTO; +import com.tencent.bk.job.file_gateway.service.dispatch.ReDispatchTaskService; +import lombok.extern.slf4j.Slf4j; +import org.slf4j.helpers.MessageFormatter; + +import java.util.HashSet; +import java.util.Set; +import java.util.TimerTask; +import java.util.concurrent.atomic.AtomicInteger; + +@Slf4j +public class ReDispatchTask extends TimerTask { + private static final AtomicInteger reDispatchThreadNum = new AtomicInteger(0); + private static final Set reDispatchingTaskIds = new HashSet<>(); + private final ReDispatchTaskService reDispatchTaskService; + private final String fileSourceTaskId; + private final Long intervalMills; + + ReDispatchTask(ReDispatchTaskService reDispatchTaskService, + String fileSourceTaskId, + Long intervalMills) { + this.reDispatchTaskService = reDispatchTaskService; + this.fileSourceTaskId = fileSourceTaskId; + this.intervalMills = intervalMills; + } + + public static Integer getReDispatchThreadsNum() { + return reDispatchThreadNum.get(); + } + + @Override + public void run() { + synchronized (reDispatchingTaskIds) { + if (reDispatchingTaskIds.contains(fileSourceTaskId)) { + log.info("task {} already in reDispatching, ignore", fileSourceTaskId); + return; + } + reDispatchingTaskIds.add(fileSourceTaskId); + reDispatchThreadNum.incrementAndGet(); + } + reDispatchTaskWithRetry(); + synchronized (reDispatchingTaskIds) { + reDispatchingTaskIds.remove(fileSourceTaskId); + reDispatchThreadNum.decrementAndGet(); + } + } + + /** + * 对文件源任务进行重调度,失败时进行重试 + */ + private void reDispatchTaskWithRetry() { + boolean reDispatchSuccess = false; + int retryCount = 0; + int maxRetryCount = 3; + while (!reDispatchSuccess && retryCount < maxRetryCount) { + try { + TaskInfoDTO taskInfoDTO = reDispatchTaskService.reDispatchFileSourceTask(fileSourceTaskId); + reDispatchSuccess = true; + log.debug("reDispatch result of {}:{}", fileSourceTaskId, taskInfoDTO); + } catch (Exception e) { + retryCount += 1; + String message = MessageFormatter.format( + "Fail to redispatch task {}, wait {}ms to retry {}", + new Object[]{ + fileSourceTaskId, + intervalMills, + retryCount + } + ).getMessage(); + if (retryCount < maxRetryCount) { + log.info(message); + ThreadUtils.sleep(intervalMills); + } else { + log.error(message, e); + } + } + } + } +} diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/ReDispatchTaskServiceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/ReDispatchTaskServiceImpl.java new file mode 100644 index 0000000000..3403f508b6 --- /dev/null +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/ReDispatchTaskServiceImpl.java @@ -0,0 +1,142 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file_gateway.service.dispatch.impl; + +import com.tencent.bk.job.common.mysql.JobTransactional; +import com.tencent.bk.job.file_gateway.metrics.MetricsConstants; +import com.tencent.bk.job.file_gateway.model.dto.FileSourceTaskDTO; +import com.tencent.bk.job.file_gateway.model.dto.FileTaskDTO; +import com.tencent.bk.job.file_gateway.model.resp.inner.TaskInfoDTO; +import com.tencent.bk.job.file_gateway.service.FileSourceTaskService; +import com.tencent.bk.job.file_gateway.service.FileTaskService; +import com.tencent.bk.job.file_gateway.service.dispatch.ReDispatchTaskService; +import io.micrometer.core.instrument.MeterRegistry; +import io.micrometer.core.instrument.Tag; +import io.micrometer.core.instrument.Timer; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +@Slf4j +@Service +public class ReDispatchTaskServiceImpl implements ReDispatchTaskService { + + private final FileSourceTaskService fileSourceTaskService; + private final FileTaskService fileTaskService; + private final MeterRegistry meterRegistry; + + @Autowired + public ReDispatchTaskServiceImpl(FileSourceTaskService fileSourceTaskService, + FileTaskService fileTaskService, + MeterRegistry meterRegistry) { + this.fileSourceTaskService = fileSourceTaskService; + this.fileTaskService = fileTaskService; + this.meterRegistry = meterRegistry; + } + + /** + * 对文件源任务进行重调度,过程中开启事务保证数据一致性 + * + * @param fileSourceTaskId 文件源任务ID + * @return 重调度结果 + */ + @Override + @JobTransactional(transactionManager = "jobFileGatewayTransactionManager") + public TaskInfoDTO reDispatchFileSourceTask(String fileSourceTaskId) { + long startTime = System.currentTimeMillis(); + FileSourceTaskDTO fileSourceTaskDTO = fileSourceTaskService.getFileSourceTaskById(fileSourceTaskId); + String reDispatchStatus = null; + try { + TaskInfoDTO taskInfoDTO = doReDispatchFileSourceTask(fileSourceTaskDTO); + reDispatchStatus = MetricsConstants.TAG_VALUE_REDISPATCH_STATUS_SUCCESS; + return taskInfoDTO; + } catch (Exception e) { + reDispatchStatus = MetricsConstants.TAG_VALUE_REDISPATCH_STATUS_ERROR; + throw e; + } finally { + long timeConsumingMills = System.currentTimeMillis() - startTime; + recordReDispatchCost(timeConsumingMills, buildDispatchTags(fileSourceTaskDTO.getAppId(), reDispatchStatus)); + } + } + + private Iterable buildDispatchTags(Long appId, String reDispatchStatus) { + List tagList = new ArrayList<>(); + tagList.add(Tag.of(MetricsConstants.TAG_KEY_MODULE, MetricsConstants.TAG_VALUE_MODULE_FILE_GATEWAY)); + tagList.add(Tag.of(MetricsConstants.TAG_KEY_APP_ID, String.valueOf(appId))); + tagList.add(Tag.of(MetricsConstants.TAG_KEY_DISPATCH_RESULT, reDispatchStatus)); + return tagList; + } + + private TaskInfoDTO doReDispatchFileSourceTask(FileSourceTaskDTO fileSourceTaskDTO) { + String fileSourceTaskId = fileSourceTaskDTO.getId(); + Long oldFileWorkerId = fileSourceTaskDTO.getFileWorkerId(); + List fileTaskDTOList = fileTaskService.listFileTasks(fileSourceTaskId); + List filePathList = + fileTaskDTOList.stream().map(FileTaskDTO::getFilePath).collect(Collectors.toList()); + // 1.删除现有子任务 + int deletedTaskNum = fileTaskService.deleteTasks(fileSourceTaskId); + // 2.删除现有FileSourceTask任务 + int deletedFileSourceTaskNum = fileSourceTaskService.deleteFileSourceTaskById(fileSourceTaskId); + // 3.重新派发任务 + TaskInfoDTO taskInfoDTO = fileSourceTaskService.startFileSourceDownloadTaskWithId( + fileSourceTaskDTO.getCreator(), + fileSourceTaskDTO.getAppId(), + fileSourceTaskDTO.getStepInstanceId(), + fileSourceTaskDTO.getExecuteCount(), + fileSourceTaskDTO.getBatchTaskId(), + fileSourceTaskDTO.getFileSourceId(), + filePathList, + fileSourceTaskId + ); + log.info( + "FileSourceTask(id={}, oldFileWorkerId={}) reDispatched to worker(id={},accessHost={})," + + " [ {} fileTask, {} fileSourceTask] deleted and re-inserted", + fileSourceTaskId, + oldFileWorkerId, + taskInfoDTO.getWorkerId(), + taskInfoDTO.getWorkerAccessHost(), + deletedTaskNum, + deletedFileSourceTaskNum + ); + return taskInfoDTO; + } + + private void recordReDispatchCost(long timeConsumingMillis, Iterable tags) { + Timer.builder(MetricsConstants.NAME_FILE_GATEWAY_REDISPATCH_TIME) + .description("ReDispatch FileSourceTask Cost") + .tags(tags) + .publishPercentileHistogram(true) + .minimumExpectedValue(Duration.ofMillis(10)) + .maximumExpectedValue(Duration.ofSeconds(60L)) + .register(meterRegistry) + .record(timeConsumingMillis, TimeUnit.MILLISECONDS); + } +} diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileServiceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileServiceImpl.java index 753aec74c3..aefea20e56 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileServiceImpl.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileServiceImpl.java @@ -37,7 +37,7 @@ import com.tencent.bk.job.file_gateway.model.req.common.ExecuteActionReq; import com.tencent.bk.job.file_gateway.model.resp.common.FileNodesDTO; import com.tencent.bk.job.file_gateway.model.resp.common.FileNodesVO; -import com.tencent.bk.job.file_gateway.service.DispatchService; +import com.tencent.bk.job.file_gateway.service.dispatch.DispatchService; import com.tencent.bk.job.file_gateway.service.FileService; import com.tencent.bk.job.file_gateway.service.FileSourceService; import com.tencent.bk.job.file_gateway.service.remote.FileSourceReqGenService; diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileSourceTaskServiceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileSourceTaskServiceImpl.java index 8e74c2ce32..5af08d1e54 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileSourceTaskServiceImpl.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileSourceTaskServiceImpl.java @@ -47,9 +47,9 @@ import com.tencent.bk.job.file_gateway.model.resp.inner.FileSourceTaskStatusDTO; import com.tencent.bk.job.file_gateway.model.resp.inner.TaskInfoDTO; import com.tencent.bk.job.file_gateway.model.resp.inner.ThirdFileSourceTaskLogDTO; -import com.tencent.bk.job.file_gateway.service.DispatchService; import com.tencent.bk.job.file_gateway.service.FileSourceTaskService; import com.tencent.bk.job.file_gateway.service.FileSourceTaskUpdateService; +import com.tencent.bk.job.file_gateway.service.dispatch.DispatchService; import com.tencent.bk.job.file_gateway.service.remote.FileSourceTaskReqGenService; import lombok.extern.slf4j.Slf4j; import org.slf4j.helpers.MessageFormatter; @@ -197,6 +197,8 @@ public TaskInfoDTO startFileSourceDownloadTaskWithId(String username, fileSourceTaskId, fileSourceDTO.getAlias(), fileSourceDTO.getPublicFlag(), + fileWorkerDTO.getId(), + fileWorkerDTO.getAccessHost(), fileWorkerDTO.getCloudAreaId(), fileWorkerDTO.getInnerIpProtocol(), fileWorkerDTO.getInnerIp() diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/ReDispatchServiceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/ReDispatchServiceImpl.java deleted file mode 100644 index 9d21639807..0000000000 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/ReDispatchServiceImpl.java +++ /dev/null @@ -1,245 +0,0 @@ -/* - * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. - * - * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. - * - * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. - * - * License for BK-JOB蓝鲸智云作业平台: - * -------------------------------------------------------------------- - * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated - * documentation files (the "Software"), to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and - * to permit persons to whom the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all copies or substantial portions of - * the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO - * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF - * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -package com.tencent.bk.job.file_gateway.service.impl; - -import com.tencent.bk.job.common.constant.ErrorCode; -import com.tencent.bk.job.common.exception.InternalException; -import com.tencent.bk.job.file_gateway.model.dto.FileSourceDTO; -import com.tencent.bk.job.file_gateway.model.dto.FileSourceTaskDTO; -import com.tencent.bk.job.file_gateway.model.dto.FileTaskDTO; -import com.tencent.bk.job.file_gateway.model.dto.FileWorkerDTO; -import com.tencent.bk.job.file_gateway.model.resp.inner.TaskInfoDTO; -import com.tencent.bk.job.file_gateway.service.DispatchService; -import com.tencent.bk.job.file_gateway.service.FileSourceService; -import com.tencent.bk.job.file_gateway.service.FileSourceTaskService; -import com.tencent.bk.job.file_gateway.service.FileTaskService; -import com.tencent.bk.job.file_gateway.service.FileWorkerService; -import com.tencent.bk.job.file_gateway.service.ReDispatchService; -import lombok.extern.slf4j.Slf4j; -import org.slf4j.helpers.FormattingTuple; -import org.slf4j.helpers.MessageFormatter; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.stereotype.Service; - -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.Timer; -import java.util.TimerTask; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Collectors; - -@Slf4j -@Service -public class ReDispatchServiceImpl implements ReDispatchService { - - private final DispatchService dispatchService; - private final FileWorkerService fileWorkerService; - private final FileSourceService fileSourceService; - private final FileSourceTaskService fileSourceTaskService; - private final FileTaskService fileTaskService; - // 最多使用50线程进行重调度 - private final int MAX_THREAD_NUM_REDISPATCH = 50; - private final AtomicInteger reDispatchThreadNum = new AtomicInteger(0); - private final Set reDispatchingTaskIds = new HashSet<>(); - - @Autowired - public ReDispatchServiceImpl( - DispatchService dispatchService, - FileWorkerService fileWorkerService, - FileSourceService fileSourceService, - FileSourceTaskService fileSourceTaskService, - FileTaskService fileTaskService - ) { - this.dispatchService = dispatchService; - this.fileWorkerService = fileWorkerService; - this.fileSourceService = fileSourceService; - this.fileSourceTaskService = fileSourceTaskService; - this.fileTaskService = fileTaskService; - } - - @Override - public List reDispatchByWorker( - String accessHost, - Integer accessPort, - List taskIdList, - Long initDelayMills, - Long intervalMills - ) { - FileWorkerDTO fileWorkerDTO = fileWorkerService.getFileWorker(accessHost, accessPort); - if (fileWorkerDTO == null) { - FormattingTuple msg = MessageFormatter.format( - "Fail to find file-worker by accessHost:{} accessPort:{}", accessHost, accessPort - ); - log.warn(msg.getMessage()); - throw new InternalException( - ErrorCode.FILE_WORKER_NOT_FOUND, - new String[]{ - "accessHost:" + accessHost + ",accessPort:" + accessPort, - } - ); - } - Long workerId = fileWorkerDTO.getId(); - log.debug("worker {} apply to reDispatch tasks:{}, initDelayMills={}, intervalMills={}", workerId, taskIdList - , initDelayMills, intervalMills); - // 1.立即下线Worker - int affectedWorkerNum = fileWorkerService.offLine(workerId); - log.info("{} worker state changed to offline", affectedWorkerNum); - // 2.任务延时重调度 - for (String taskId : taskIdList) { - if (reDispatchThreadNum.get() >= MAX_THREAD_NUM_REDISPATCH) { - log.warn("reDispatch thread reach MAX_NUM:{}, do not reDispatch {}", MAX_THREAD_NUM_REDISPATCH, taskId); - } else { - Timer timer = new Timer(); - timer.schedule(new ReDispatchTask(taskId, intervalMills), initDelayMills); - } - } - return taskIdList; - } - - @Override - public boolean reDispatchByGateway(String fileSourceTaskId, Long initDelayMills, Long intervalMills) { - // 1.尝试通知Worker主动取消该任务 - FileSourceTaskDTO fileSourceTaskDTO = fileSourceTaskService.getFileSourceTaskById(fileSourceTaskId); - if (fileSourceTaskDTO == null) { - log.warn("task not exist, ignore, id={}", fileSourceTaskId); - return false; - } - try { - fileSourceTaskService.recallTasks(Collections.singletonList(fileSourceTaskId)); - } catch (Throwable t) { - log.warn("Fail to recallTask:{}", fileSourceTaskId, t); - } - // 2.重调度 - if (reDispatchThreadNum.get() >= MAX_THREAD_NUM_REDISPATCH) { - log.warn("reDispatch thread reach MAX_NUM:{}, do not reDispatch {}", MAX_THREAD_NUM_REDISPATCH, - fileSourceTaskId); - return false; - } else { - Timer timer = new Timer(); - timer.schedule(new ReDispatchTask(fileSourceTaskId, intervalMills), initDelayMills); - return true; - } - } - - @Override - public Integer getReDispatchThreadsNum(String username) { - return reDispatchThreadNum.get(); - } - - class ReDispatchTask extends TimerTask { - private final String fileSourceTaskId; - private final Long intervalMills; - - ReDispatchTask(String fileSourceTaskId, Long intervalMills) { - this.fileSourceTaskId = fileSourceTaskId; - this.intervalMills = intervalMills; - } - - @Override - public void run() { - synchronized (reDispatchingTaskIds) { - if (reDispatchingTaskIds.contains(fileSourceTaskId)) { - log.info("task {} already in reDispatching, ignore", fileSourceTaskId); - return; - } - reDispatchingTaskIds.add(fileSourceTaskId); - } - boolean reDispatchSuccess = false; - int retryCount = 0; - try { - reDispatchThreadNum.incrementAndGet(); - log.debug("taskId={}", fileSourceTaskId); - FileSourceTaskDTO fileSourceTaskDTO = fileSourceTaskService.getFileSourceTaskById(fileSourceTaskId); - log.debug("fileSourceTaskDTO={}", fileSourceTaskDTO); - if (fileSourceTaskDTO == null) { - log.warn("Cannot find fileSourceTaskDTO by id {}", fileSourceTaskId); - return; - } - List fileTaskDTOList = fileTaskService.listFileTasks(fileSourceTaskId); - log.debug("fileTaskDTOList={}", fileTaskDTOList); - List filePathList = - fileTaskDTOList.stream().map(FileTaskDTO::getFilePath).collect(Collectors.toList()); - - FileSourceDTO fileSourceDTO = fileSourceService.getFileSourceById(fileSourceTaskDTO.getFileSourceId()); - while (!reDispatchSuccess && retryCount < 100) { - // 1.删除现有子任务 - log.debug("delete fileTasks of fileSourceTask {}", fileSourceTaskId); - fileTaskService.deleteTasks(fileSourceTaskId); - // 2.删除现有FileSourceTask任务 - fileSourceTaskService.deleteFileSourceTaskById(fileSourceTaskId); - log.debug("delete fileSourceTask {}", fileSourceTaskId); - FileWorkerDTO fileWorkerDTO = dispatchService.findBestFileWorker(fileSourceDTO, "ReDispatch"); - log.debug("found bestWorker:{}", fileSourceDTO); - if (fileWorkerDTO != null) { - // 3.重新派发任务 - try { - TaskInfoDTO taskInfoDTO = - fileSourceTaskService.startFileSourceDownloadTaskWithId( - fileSourceTaskDTO.getCreator(), - fileSourceTaskDTO.getAppId(), - fileSourceTaskDTO.getStepInstanceId(), - fileSourceTaskDTO.getExecuteCount(), - fileSourceTaskDTO.getBatchTaskId(), - fileSourceTaskDTO.getFileSourceId(), - filePathList, - fileSourceTaskId - ); - reDispatchSuccess = true; - log.info("reDispatch result of {}:{}", fileSourceTaskId, taskInfoDTO); - } catch (Exception e) { - retryCount += 1; - log.info("Fail to redispatch task {}, wait {}ms to retry {}", fileSourceTaskId, - intervalMills, retryCount); - try { - Thread.sleep(intervalMills); - } catch (InterruptedException interruptedException) { - log.error("redispatch wait interrupted", e); - } - } - } else { - // 3.暂时没有合适的FileWorker,延时等待 - try { - retryCount += 1; - log.info("No suitable worker to redispatch task {}, wait {}ms to retry {}", - fileSourceTaskId, intervalMills, retryCount); - Thread.sleep(intervalMills); - } catch (InterruptedException e) { - log.error("redispatch wait interrupted", e); - } - } - } - } catch (Throwable t) { - log.error("ReDispatchTask fail", t); - } finally { - synchronized (reDispatchingTaskIds) { - reDispatchingTaskIds.remove(fileSourceTaskId); - } - reDispatchThreadNum.decrementAndGet(); - } - } - } -} diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/task/dispatch/ReDispatchTimeoutTask.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/task/dispatch/ReDispatchTimeoutTask.java index 30844bf584..9dff84dc7b 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/task/dispatch/ReDispatchTimeoutTask.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/task/dispatch/ReDispatchTimeoutTask.java @@ -31,7 +31,7 @@ import com.tencent.bk.job.common.util.ip.IpUtils; import com.tencent.bk.job.file_gateway.consts.TaskStatusEnum; import com.tencent.bk.job.file_gateway.dao.filesource.FileTaskDAO; -import com.tencent.bk.job.file_gateway.service.ReDispatchService; +import com.tencent.bk.job.file_gateway.service.dispatch.ReDispatchService; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/task/filesource/FileSourceStatusUpdateTask.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/task/filesource/FileSourceStatusUpdateTask.java index 0c3a433d26..5afb9acb2e 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/task/filesource/FileSourceStatusUpdateTask.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/task/filesource/FileSourceStatusUpdateTask.java @@ -27,7 +27,7 @@ import com.tencent.bk.job.file_gateway.consts.FileSourceStatusEnum; import com.tencent.bk.job.file_gateway.model.dto.FileSourceDTO; import com.tencent.bk.job.file_gateway.model.dto.FileWorkerDTO; -import com.tencent.bk.job.file_gateway.service.DispatchService; +import com.tencent.bk.job.file_gateway.service.dispatch.DispatchService; import com.tencent.bk.job.file_gateway.service.FileService; import com.tencent.bk.job.file_gateway.service.FileSourceService; import lombok.extern.slf4j.Slf4j; From def84d66f42ba2c0b0b2731903be094d8ec049bc Mon Sep 17 00:00:00 2001 From: jsonwan Date: Thu, 6 Jun 2024 20:40:57 +0800 Subject: [PATCH 08/24] =?UTF-8?q?perf:=20=E7=AC=AC=E4=B8=89=E6=96=B9?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E6=BA=90=E4=BB=BB=E5=8A=A1=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E6=97=A0=E6=8D=9F=E6=9B=B4=E6=96=B0=20#3017?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1.优化日志。 --- .../impl/FileSourceTaskServiceImpl.java | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileSourceTaskServiceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileSourceTaskServiceImpl.java index 5af08d1e54..3c5bfdafa4 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileSourceTaskServiceImpl.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileSourceTaskServiceImpl.java @@ -104,8 +104,12 @@ public FileSourceTaskServiceImpl(FileSourceTaskUpdateService fileSourceTaskUpdat @Override @JobTransactional(transactionManager = "jobFileGatewayTransactionManager") - public TaskInfoDTO startFileSourceDownloadTask(String username, Long appId, Long stepInstanceId, - Integer executeCount, String batchTaskId, Integer fileSourceId, + public TaskInfoDTO startFileSourceDownloadTask(String username, + Long appId, + Long stepInstanceId, + Integer executeCount, + String batchTaskId, + Integer fileSourceId, List filePathList) { return startFileSourceDownloadTaskWithId( username, @@ -128,8 +132,16 @@ public TaskInfoDTO startFileSourceDownloadTaskWithId(String username, Integer fileSourceId, List filePathList, String fileSourceTaskId) { - log.info("Input=({},{},{},{},{},{},{})", username, appId, stepInstanceId, executeCount, batchTaskId, - fileSourceId, filePathList); + log.info( + "startFileSourceDownloadTaskWithId, input=({},{},{},{},{},{},{})", + username, + appId, + stepInstanceId, + executeCount, + batchTaskId, + fileSourceId, + filePathList + ); FileSourceDTO fileSourceDTO = fileSourceDAO.getFileSourceById(fileSourceId); if (fileSourceDTO == null) { throw new RuntimeException("FileSource not exist, fileSourceId=" + fileSourceId.toString()); From 35565c60322ebc0455688369ab7153a6327b36e1 Mon Sep 17 00:00:00 2001 From: jsonwan Date: Fri, 7 Jun 2024 15:39:18 +0800 Subject: [PATCH 09/24] =?UTF-8?q?perf:=20=E7=AC=AC=E4=B8=89=E6=96=B9?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E6=BA=90=E4=BB=BB=E5=8A=A1=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E6=97=A0=E6=8D=9F=E6=9B=B4=E6=96=B0=20#3017?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1.增加File-Worker状态管理机制,解除心跳逻辑与状态管理的耦合; 2.确保心跳在File-Worker可被外界访问后再开始,避免UnknownHost问题。 --- .../tencent/bk/job/common}/event/Event.java | 2 +- .../job/common/util/http/HttpReqGenUtil.java | 6 + .../job/common/util/http/JobHttpClient.java | 2 + .../common/util/http/JobHttpClientImpl.java | 16 +++ .../crontab/listener/event/CrontabEvent.java | 1 + .../config/ApplicationReadyListener.java | 13 +- .../ApplicationReadyListenerConfig.java | 11 +- .../bk/job/file/worker/service/OpService.java | 16 ++- .../file/worker/state/WorkerStateEnum.java | 71 ++++++++++ .../file/worker/state/WorkerStateMachine.java | 83 +++++++++++ .../worker/state/event/WorkerActionEnum.java | 44 ++++++ .../file/worker/state/event/WorkerEvent.java | 67 +++++++++ .../state/event/WorkerEventDispatcher.java | 104 ++++++++++++++ .../state/event/WorkerEventService.java | 56 ++++++++ .../event/handler/DefaultEventHandler.java | 39 ++++++ .../state/event/handler/EventHandler.java | 31 +++++ .../state/event/handler/HealthResult.java | 41 ++++++ .../event/handler/HeartBeatEventHandler.java | 95 +++++++++++++ .../event/handler/OffLineEventHandler.java | 80 +++++++++++ .../event/handler/WaitAccessEventHandler.java | 130 ++++++++++++++++++ .../job/file/worker/task/ScheduledTasks.java | 13 +- .../worker/task/heartbeat/HeartBeatTask.java | 56 +------- 22 files changed, 896 insertions(+), 81 deletions(-) rename src/backend/{job-crontab/service-job-crontab/src/main/java/com/tencent/bk/job/crontab/listener => commons/common/src/main/java/com/tencent/bk/job/common}/event/Event.java (97%) create mode 100644 src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/WorkerStateEnum.java create mode 100644 src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/WorkerStateMachine.java create mode 100644 src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerActionEnum.java create mode 100644 src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerEvent.java create mode 100644 src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerEventDispatcher.java create mode 100644 src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerEventService.java create mode 100644 src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/DefaultEventHandler.java create mode 100644 src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/EventHandler.java create mode 100644 src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/HealthResult.java create mode 100644 src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/HeartBeatEventHandler.java create mode 100644 src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/OffLineEventHandler.java create mode 100644 src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/WaitAccessEventHandler.java diff --git a/src/backend/job-crontab/service-job-crontab/src/main/java/com/tencent/bk/job/crontab/listener/event/Event.java b/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/event/Event.java similarity index 97% rename from src/backend/job-crontab/service-job-crontab/src/main/java/com/tencent/bk/job/crontab/listener/event/Event.java rename to src/backend/commons/common/src/main/java/com/tencent/bk/job/common/event/Event.java index ab37c2170b..5c6edd432f 100644 --- a/src/backend/job-crontab/service-job-crontab/src/main/java/com/tencent/bk/job/crontab/listener/event/Event.java +++ b/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/event/Event.java @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ -package com.tencent.bk.job.crontab.listener.event; +package com.tencent.bk.job.common.event; import com.fasterxml.jackson.annotation.JsonInclude; import com.tencent.bk.job.common.util.date.DateUtils; diff --git a/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/HttpReqGenUtil.java b/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/HttpReqGenUtil.java index 3679092dbb..9c41c94155 100644 --- a/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/HttpReqGenUtil.java +++ b/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/HttpReqGenUtil.java @@ -47,4 +47,10 @@ public static HttpReq genSimpleJsonReq(String url, Object body) { httpReq.setHeaders(headerList.toArray(headers)); return httpReq; } + + public static HttpReq genUrlGetReq(String url) { + HttpReq httpReq = new HttpReq(); + httpReq.setUrl(url); + return httpReq; + } } diff --git a/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/JobHttpClient.java b/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/JobHttpClient.java index 15c1d64d12..b6b7aa88ff 100644 --- a/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/JobHttpClient.java +++ b/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/JobHttpClient.java @@ -31,6 +31,8 @@ */ public interface JobHttpClient { + String get(HttpReq req); + String post(HttpReq req); } diff --git a/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/JobHttpClientImpl.java b/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/JobHttpClientImpl.java index 25e93d99d1..b1f2266c3b 100644 --- a/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/JobHttpClientImpl.java +++ b/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/JobHttpClientImpl.java @@ -46,6 +46,22 @@ public JobHttpClientImpl(RestTemplate restTemplate) { this.restTemplate = restTemplate; } + @Override + public String get(HttpReq req) { + logReq(req); + ResponseEntity respEntity = restTemplate.getForEntity( + req.getUrl(), + String.class + ); + if (respEntity.getStatusCode() == HttpStatus.OK) { + String respStr = respEntity.getBody(); + logRespStr(respStr); + return respStr; + } + logAndThrow(respEntity); + return null; + } + @Override public String post(HttpReq req) { logReq(req); diff --git a/src/backend/job-crontab/service-job-crontab/src/main/java/com/tencent/bk/job/crontab/listener/event/CrontabEvent.java b/src/backend/job-crontab/service-job-crontab/src/main/java/com/tencent/bk/job/crontab/listener/event/CrontabEvent.java index baf09b953f..861ff5ff04 100644 --- a/src/backend/job-crontab/service-job-crontab/src/main/java/com/tencent/bk/job/crontab/listener/event/CrontabEvent.java +++ b/src/backend/job-crontab/service-job-crontab/src/main/java/com/tencent/bk/job/crontab/listener/event/CrontabEvent.java @@ -25,6 +25,7 @@ package com.tencent.bk.job.crontab.listener.event; import com.fasterxml.jackson.annotation.JsonInclude; +import com.tencent.bk.job.common.event.Event; import com.tencent.bk.job.crontab.constant.CrontabActionEnum; import lombok.Getter; import lombok.NoArgsConstructor; diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/config/ApplicationReadyListener.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/config/ApplicationReadyListener.java index 0e66134715..e20a171751 100644 --- a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/config/ApplicationReadyListener.java +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/config/ApplicationReadyListener.java @@ -24,7 +24,8 @@ package com.tencent.bk.job.file.worker.config; -import com.tencent.bk.job.file.worker.task.heartbeat.HeartBeatTask; +import com.tencent.bk.job.file.worker.state.event.WorkerEvent; +import com.tencent.bk.job.file.worker.state.event.WorkerEventService; import lombok.extern.slf4j.Slf4j; import org.springframework.boot.context.event.ApplicationReadyEvent; import org.springframework.context.ApplicationListener; @@ -36,12 +37,12 @@ public class ApplicationReadyListener implements ApplicationListener { private final WorkerConfig workerConfig; - private final HeartBeatTask heartBeatTask; + private final WorkerEventService workerEventService; public ApplicationReadyListener(WorkerConfig workerConfig, - HeartBeatTask heartBeatTask) { + WorkerEventService workerEventService) { this.workerConfig = workerConfig; - this.heartBeatTask = heartBeatTask; + this.workerEventService = workerEventService; } @SuppressWarnings("NullableProblems") @@ -56,7 +57,7 @@ public void onApplicationEvent(ApplicationReadyEvent event) { log.info("created JobFileWorker workspace:" + wsDirFile.getAbsolutePath()); } } - // 2.启动后立即上报一次心跳 - new Thread(heartBeatTask::run).start(); + // 2.启动后等待自身可被外界访问 + workerEventService.commitWorkerEvent(WorkerEvent.waitAccessReady()); } } diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/config/ApplicationReadyListenerConfig.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/config/ApplicationReadyListenerConfig.java index 3a6865e79d..7d0e3f4053 100644 --- a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/config/ApplicationReadyListenerConfig.java +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/config/ApplicationReadyListenerConfig.java @@ -24,17 +24,12 @@ package com.tencent.bk.job.file.worker.config; -import com.tencent.bk.job.file.worker.task.heartbeat.HeartBeatTask; +import com.tencent.bk.job.file.worker.state.event.WorkerEventService; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -/** - * @Description - * @Date 2020/2/24 - * @Version 1.0 - */ @Slf4j @Configuration @@ -42,9 +37,9 @@ public class ApplicationReadyListenerConfig { @Bean public ApplicationReadyListener applicationReadyListener(@Autowired WorkerConfig workerConfig, - @Autowired HeartBeatTask heartBeatTask) { + @Autowired WorkerEventService workerEventService) { log.info("applicationReadyListener inited"); - return new ApplicationReadyListener(workerConfig, heartBeatTask); + return new ApplicationReadyListener(workerConfig, workerEventService); } } diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/service/OpService.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/service/OpService.java index 7be3501fd3..51b27a45de 100644 --- a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/service/OpService.java +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/service/OpService.java @@ -32,6 +32,8 @@ import com.tencent.bk.job.common.util.http.HttpRequest; import com.tencent.bk.job.common.util.json.JsonUtils; import com.tencent.bk.job.file.worker.config.WorkerConfig; +import com.tencent.bk.job.file.worker.state.event.WorkerEvent; +import com.tencent.bk.job.file.worker.state.event.WorkerEventService; import com.tencent.bk.job.file.worker.task.heartbeat.HeartBeatTask; import com.tencent.bk.job.file_gateway.consts.TaskCommandEnum; import com.tencent.bk.job.file_gateway.model.req.inner.OffLineAndReDispatchReq; @@ -53,7 +55,7 @@ public class OpService { private final GatewayInfoService gatewayInfoService; private final EnvironmentService environmentService; private final TaskReporter taskReporter; - private final HeartBeatTask heartBeatTask; + private final WorkerEventService workerEventService; @Autowired public OpService(WorkerConfig workerConfig, @@ -61,19 +63,23 @@ public OpService(WorkerConfig workerConfig, GatewayInfoService gatewayInfoService, EnvironmentService environmentService, TaskReporter taskReporter, - HeartBeatTask heartBeatTask) { + WorkerEventService workerEventService) { this.workerConfig = workerConfig; this.fileTaskService = fileTaskService; this.gatewayInfoService = gatewayInfoService; this.environmentService = environmentService; this.taskReporter = taskReporter; - this.heartBeatTask = heartBeatTask; + this.workerEventService = workerEventService; } public List offLine() { List runningTaskIdList = fileTaskService.getAllTaskIdList(); - // 停止心跳 - heartBeatTask.stopAndWaitLastHeartBeatFinish(); + workerEventService.commitWorkerEvent(WorkerEvent.offLine()); + return runningTaskIdList; + } + + public List doOffLine() { + List runningTaskIdList = fileTaskService.getAllTaskIdList(); // 调网关接口下线自己 String url = gatewayInfoService.getWorkerOffLineUrl(); OffLineAndReDispatchReq offLineReq = new OffLineAndReDispatchReq(); diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/WorkerStateEnum.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/WorkerStateEnum.java new file mode 100644 index 0000000000..b2657e02da --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/WorkerStateEnum.java @@ -0,0 +1,71 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonValue; + +/** + * File-Worker状态枚举值 + */ +public enum WorkerStateEnum { + STARTING(1, "启动中"), + WAIT_ACCESS_READY(2, "等待自身可被外界访问"), + HEART_BEATING(3, "心跳中"), + HEART_BEAT_WAIT(4, "等待下一次心跳中"), + RUNNING(5, "运行中"), + OFFLINE_ING(6, "下线中"), + OFFLINE_FAILED(7, "下线失败"), + OFFLINE(8, "已下线"); + + /** + * 状态值 + */ + @JsonValue + private final int state; + /** + * 状态描述 + */ + private final String description; + + WorkerStateEnum(int state, String description) { + this.state = state; + this.description = description; + } + + @JsonCreator(mode = JsonCreator.Mode.DELEGATING) + public static WorkerStateEnum valOf(int state) { + for (WorkerStateEnum workerState : values()) { + if (workerState.state == state) { + return workerState; + } + } + return null; + } + + public int getValue() { + return state; + } +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/WorkerStateMachine.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/WorkerStateMachine.java new file mode 100644 index 0000000000..34e7b0c67f --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/WorkerStateMachine.java @@ -0,0 +1,83 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state; + +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.cloud.sleuth.Tracer; +import org.springframework.stereotype.Component; + +/** + * File-Worker状态机,管理Worker状态流转 + */ +@Getter +@Slf4j +@Component +public class WorkerStateMachine { + + private WorkerStateEnum workerState = WorkerStateEnum.STARTING; + + @Autowired + public WorkerStateMachine(Tracer tracer) { + } + + public void setWorkerState(WorkerStateEnum workerState) { + log.info("state change: {} -> {}", this.workerState.name(), workerState.name()); + this.workerState = workerState; + } + + public void waitAccessReady() { + setWorkerState(WorkerStateEnum.WAIT_ACCESS_READY); + } + + public void accessReady() { + setWorkerState(WorkerStateEnum.HEART_BEAT_WAIT); + } + + public void heartBeatStart() { + setWorkerState(WorkerStateEnum.HEART_BEATING); + } + + public void heartBeatSuccess() { + setWorkerState(WorkerStateEnum.RUNNING); + } + + public void heartBeatFailed() { + setWorkerState(WorkerStateEnum.HEART_BEAT_WAIT); + } + + public void offlineStart() { + setWorkerState(WorkerStateEnum.OFFLINE_ING); + } + + public void offlineFailed() { + setWorkerState(WorkerStateEnum.OFFLINE_FAILED); + } + + public void offlineSuccess() { + setWorkerState(WorkerStateEnum.OFFLINE); + } +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerActionEnum.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerActionEnum.java new file mode 100644 index 0000000000..82e3cc4ad8 --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerActionEnum.java @@ -0,0 +1,44 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state.event; + + +/** + * Worker动作 + */ +public enum WorkerActionEnum { + /** + * 等待外界访问路径准备好 + */ + WAIT_ACCESS_READY, + /** + * 定时心跳 + */ + HEART_BEAT, + /** + * 下线 + */ + OFF_LINE; +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerEvent.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerEvent.java new file mode 100644 index 0000000000..d3b6311c8e --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerEvent.java @@ -0,0 +1,67 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state.event; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.tencent.bk.job.common.event.Event; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + +import java.time.LocalDateTime; + +@Getter +@Setter +@NoArgsConstructor +@JsonInclude(JsonInclude.Include.NON_NULL) +public class WorkerEvent extends Event { + /** + * Worker动作 + * + * @see WorkerActionEnum + */ + private WorkerActionEnum action; + + public static WorkerEvent waitAccessReady() { + WorkerEvent workerEvent = new WorkerEvent(); + workerEvent.setAction(WorkerActionEnum.WAIT_ACCESS_READY); + workerEvent.setTime(LocalDateTime.now()); + return workerEvent; + } + + public static WorkerEvent heartBeat() { + WorkerEvent workerEvent = new WorkerEvent(); + workerEvent.setAction(WorkerActionEnum.HEART_BEAT); + workerEvent.setTime(LocalDateTime.now()); + return workerEvent; + } + + public static WorkerEvent offLine() { + WorkerEvent workerEvent = new WorkerEvent(); + workerEvent.setAction(WorkerActionEnum.OFF_LINE); + workerEvent.setTime(LocalDateTime.now()); + return workerEvent; + } +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerEventDispatcher.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerEventDispatcher.java new file mode 100644 index 0000000000..ee984301a0 --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerEventDispatcher.java @@ -0,0 +1,104 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state.event; + +import com.tencent.bk.job.common.tracing.util.SpanUtil; +import com.tencent.bk.job.file.worker.state.event.handler.DefaultEventHandler; +import com.tencent.bk.job.file.worker.state.event.handler.EventHandler; +import com.tencent.bk.job.file.worker.state.event.handler.HeartBeatEventHandler; +import com.tencent.bk.job.file.worker.state.event.handler.WaitAccessEventHandler; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.cloud.sleuth.Span; +import org.springframework.cloud.sleuth.Tracer; +import org.springframework.stereotype.Component; + +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.BlockingQueue; + +@Slf4j +@Component +public class WorkerEventDispatcher extends Thread { + + @SuppressWarnings("FieldCanBeLocal") + private boolean enabled = true; + /** + * 日志调用链tracer + */ + private final Tracer tracer; + private BlockingQueue eventQueue; + private static final Map handlerMap = new HashMap<>(); + private static final EventHandler defaultHandler = new DefaultEventHandler(); + + @Autowired + public WorkerEventDispatcher(Tracer tracer, + WaitAccessEventHandler waitAccessEventHandler, + HeartBeatEventHandler heartBeatEventHandler) { + this.tracer = tracer; + handlerMap.put(WorkerActionEnum.WAIT_ACCESS_READY, waitAccessEventHandler); + handlerMap.put(WorkerActionEnum.HEART_BEAT, heartBeatEventHandler); + } + + public void initQueue(BlockingQueue eventQueue) { + this.eventQueue = eventQueue; + } + + @Override + public void run() { + while (enabled) { + WorkerEvent event; + try { + event = eventQueue.take(); + dispatchEventWithTrace(event); + } catch (InterruptedException e) { + log.warn("queue.take interrupted", e); + } catch (Throwable t) { + log.error("Fail to handleEventWithTrace", t); + } + } + } + + private void dispatchEventWithTrace(WorkerEvent event) { + Span span = buildSpan(event); + try (Tracer.SpanInScope ignored = this.tracer.withSpan(span.start())) { + dispatchEvent(event); + } catch (Throwable t) { + span.error(t); + log.warn("Fail to handleEvent:" + event, t); + } finally { + span.end(); + } + } + + private void dispatchEvent(WorkerEvent event) { + EventHandler handler = handlerMap.getOrDefault(event.getAction(), defaultHandler); + handler.handleEvent(event); + } + + private Span buildSpan(WorkerEvent event) { + return SpanUtil.buildNewSpan(this.tracer, event.getAction().name()); + } +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerEventService.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerEventService.java new file mode 100644 index 0000000000..5438166e39 --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerEventService.java @@ -0,0 +1,56 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state.event; + +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; + +/** + * File-Worker生命周期事件服务,用于接收上层业务逻辑触发的事件 + */ +@Slf4j +@Service +public class WorkerEventService { + + private final BlockingQueue eventQueue = new LinkedBlockingQueue<>(100); + + @Autowired + public WorkerEventService(WorkerEventDispatcher workerEventDispatcher) { + workerEventDispatcher.initQueue(eventQueue); + workerEventDispatcher.start(); + } + + public void commitWorkerEvent(WorkerEvent event) { + boolean result = eventQueue.add(event); + if (!result) { + log.warn("Fail to add event to queue:{}, ignore", event); + } + } + +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/DefaultEventHandler.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/DefaultEventHandler.java new file mode 100644 index 0000000000..3aee2a72b3 --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/DefaultEventHandler.java @@ -0,0 +1,39 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state.event.handler; + +import com.tencent.bk.job.file.worker.state.event.WorkerEvent; +import lombok.extern.slf4j.Slf4j; + +/** + * 默认事件处理器,仅对事件做日志记录 + */ +@Slf4j +public class DefaultEventHandler implements EventHandler { + @Override + public void handleEvent(WorkerEvent event) { + log.warn("No handler specified for event:{}, ignore", event); + } +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/EventHandler.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/EventHandler.java new file mode 100644 index 0000000000..79073d3259 --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/EventHandler.java @@ -0,0 +1,31 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state.event.handler; + +import com.tencent.bk.job.file.worker.state.event.WorkerEvent; + +public interface EventHandler { + void handleEvent(WorkerEvent event); +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/HealthResult.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/HealthResult.java new file mode 100644 index 0000000000..3a33869027 --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/HealthResult.java @@ -0,0 +1,41 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state.event.handler; + +import lombok.Data; + +import java.util.List; + +@Data +public class HealthResult { + /** + * 健康状态 + */ + private String status; + /** + * 健康指标分组 + */ + private List groups; +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/HeartBeatEventHandler.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/HeartBeatEventHandler.java new file mode 100644 index 0000000000..926b4ec9ed --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/HeartBeatEventHandler.java @@ -0,0 +1,95 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state.event.handler; + +import com.tencent.bk.job.file.worker.state.WorkerStateEnum; +import com.tencent.bk.job.file.worker.state.WorkerStateMachine; +import com.tencent.bk.job.file.worker.state.event.WorkerEvent; +import com.tencent.bk.job.file.worker.state.event.WorkerEventService; +import com.tencent.bk.job.file.worker.task.heartbeat.HeartBeatTask; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +/** + * 心跳事件处理器,用于向File-Gateway上报Worker状态信息 + */ +@Slf4j +@Component +public class HeartBeatEventHandler implements EventHandler { + + private final WorkerEventService workerEventService; + private final WorkerStateMachine workerStateMachine; + private final HeartBeatTask heartBeatTask; + + @Autowired + public HeartBeatEventHandler(WorkerEventService workerEventService, + WorkerStateMachine workerStateMachine, + HeartBeatTask heartBeatTask) { + this.workerEventService = workerEventService; + this.workerStateMachine = workerStateMachine; + this.heartBeatTask = heartBeatTask; + } + + @Override + public void handleEvent(WorkerEvent event) { + WorkerStateEnum workerState = workerStateMachine.getWorkerState(); + switch (workerState) { + case STARTING: + workerEventService.commitWorkerEvent(WorkerEvent.waitAccessReady()); + break; + case WAIT_ACCESS_READY: + log.info("wait access ready, ignore current event:{}", event); + break; + case HEART_BEAT_WAIT: + case RUNNING: + heartBeat(); + break; + default: + log.info("currentState:{}, heartBeat condition not satisfy, ignore", workerState); + break; + } + } + + private Long lastSuccessHeartBeatTime = null; + + private void heartBeat() { + workerStateMachine.heartBeatStart(); + try { + // 如果上一次成功的心跳在10s内发生,则忽略本次心跳 + if (lastSuccessHeartBeatTime != null && System.currentTimeMillis() - lastSuccessHeartBeatTime < 10_000L) { + log.info("lastSuccessHeartBeat finish with 10s, ignore current heartBeat"); + workerStateMachine.heartBeatSuccess(); + } else { + heartBeatTask.doHeartBeat(); + workerStateMachine.heartBeatSuccess(); + lastSuccessHeartBeatTime = System.currentTimeMillis(); + } + } catch (Throwable t) { + log.warn("Fail to heartBeat", t); + workerStateMachine.heartBeatFailed(); + } + } +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/OffLineEventHandler.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/OffLineEventHandler.java new file mode 100644 index 0000000000..fe42ff2bf7 --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/OffLineEventHandler.java @@ -0,0 +1,80 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state.event.handler; + +import com.tencent.bk.job.file.worker.service.OpService; +import com.tencent.bk.job.file.worker.state.WorkerStateEnum; +import com.tencent.bk.job.file.worker.state.WorkerStateMachine; +import com.tencent.bk.job.file.worker.state.event.WorkerEvent; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +/** + * 下线事件处理器 + */ +@Slf4j +@Component +public class OffLineEventHandler implements EventHandler { + + private final WorkerStateMachine workerStateMachine; + private final OpService opService; + + @Autowired + public OffLineEventHandler(WorkerStateMachine workerStateMachine, + OpService opService) { + this.workerStateMachine = workerStateMachine; + this.opService = opService; + } + + @Override + public void handleEvent(WorkerEvent event) { + WorkerStateEnum workerState = workerStateMachine.getWorkerState(); + switch (workerState) { + case RUNNING: + case HEART_BEAT_WAIT: + case OFFLINE_FAILED: + offLine(); + break; + case OFFLINE_ING: + log.info("last offLine action is executing, ignore current one"); + break; + default: + log.info("currentState:{}, offLine condition not satisfy, ignore", workerState); + break; + } + } + + private void offLine() { + workerStateMachine.offlineStart(); + try { + opService.doOffLine(); + workerStateMachine.offlineSuccess(); + } catch (Throwable t) { + log.warn("Fail to offLine", t); + workerStateMachine.offlineFailed(); + } + } +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/WaitAccessEventHandler.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/WaitAccessEventHandler.java new file mode 100644 index 0000000000..f9fe9fa9f0 --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/WaitAccessEventHandler.java @@ -0,0 +1,130 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state.event.handler; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.tencent.bk.job.common.model.http.HttpReq; +import com.tencent.bk.job.common.util.ThreadUtils; +import com.tencent.bk.job.common.util.http.HttpReqGenUtil; +import com.tencent.bk.job.common.util.http.JobHttpClient; +import com.tencent.bk.job.common.util.json.JsonUtils; +import com.tencent.bk.job.file.worker.config.WorkerConfig; +import com.tencent.bk.job.file.worker.service.EnvironmentService; +import com.tencent.bk.job.file.worker.state.WorkerStateEnum; +import com.tencent.bk.job.file.worker.state.WorkerStateMachine; +import com.tencent.bk.job.file.worker.state.event.WorkerEvent; +import com.tencent.bk.job.file.worker.state.event.WorkerEventService; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +/** + * 等待Worker可被外界访问的事件处理器,实现检查与等待逻辑 + */ +@Slf4j +@Component +public class WaitAccessEventHandler implements EventHandler { + + private final WorkerEventService workerEventService; + private final WorkerStateMachine workerStateMachine; + private final JobHttpClient jobHttpClient; + private final String checkAccessUrl; + + @Autowired + public WaitAccessEventHandler(WorkerEventService workerEventService, + WorkerStateMachine workerStateMachine, + JobHttpClient jobHttpClient, + WorkerConfig workerConfig, + EnvironmentService environmentService) { + this.workerEventService = workerEventService; + this.workerStateMachine = workerStateMachine; + this.jobHttpClient = jobHttpClient; + this.checkAccessUrl = buildCheckAccessUrl(environmentService.getAccessHost(), workerConfig.getAccessPort()); + } + + @SuppressWarnings("HttpUrlsUsage") + private String buildCheckAccessUrl(String accessHost, Integer accessPort) { + return "http://" + accessHost + ":" + accessPort + "/actuator/health"; + } + + @Override + public void handleEvent(WorkerEvent event) { + WorkerStateEnum workerState = workerStateMachine.getWorkerState(); + switch (workerState) { + case STARTING: + case WAIT_ACCESS_READY: + workerStateMachine.waitAccessReady(); + waitAccessReady(); + break; + default: + log.info("currentState:{}, waitAccessReady condition not satisfy, ignore", workerState); + break; + } + } + + public void waitAccessReady() { + boolean accessReady = checkAccess(); + if (accessReady) { + // 1.状态切换 + workerStateMachine.accessReady(); + // 2.自身可被外界访问后立即触发心跳 + workerEventService.commitWorkerEvent(WorkerEvent.heartBeat()); + } else { + // 3.检查失败,状态不变,继续检查 + workerEventService.commitWorkerEvent(WorkerEvent.waitAccessReady()); + } + } + + private boolean checkAccess() { + boolean accessReady = false; + int maxCheckNum = 300; + int checkNum = 0; + int errorNum = 0; + do { + try { + checkNum += 1; + log.info("CheckAccess: url={}", checkAccessUrl); + HttpReq req = HttpReqGenUtil.genUrlGetReq(checkAccessUrl); + String respStr = jobHttpClient.get(req); + HealthResult healthResult = JsonUtils.fromJson(respStr, new TypeReference() { + }); + String status = healthResult.getStatus(); + if (status != null && status.equalsIgnoreCase("UP")) { + accessReady = true; + } + } catch (Throwable t) { + errorNum += 1; + if (errorNum % 10 == 0) { + log.info("Fail to checkAccess", t); + } + } + if (!accessReady && checkNum < maxCheckNum) { + log.info("Access not ready, checkNum={}, wait 1s", checkNum); + ThreadUtils.sleep(1000); + } + } while (!accessReady && checkNum < maxCheckNum); + return accessReady; + } +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/task/ScheduledTasks.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/task/ScheduledTasks.java index e1065d8d7f..a9e0652b6a 100644 --- a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/task/ScheduledTasks.java +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/task/ScheduledTasks.java @@ -24,8 +24,9 @@ package com.tencent.bk.job.file.worker.task; +import com.tencent.bk.job.file.worker.state.event.WorkerEvent; +import com.tencent.bk.job.file.worker.state.event.WorkerEventService; import com.tencent.bk.job.file.worker.task.clear.ClearFileTask; -import com.tencent.bk.job.file.worker.task.heartbeat.HeartBeatTask; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -39,12 +40,12 @@ public class ScheduledTasks { private static final Logger logger = LoggerFactory.getLogger(ScheduledTasks.class); - private final HeartBeatTask heartBeatTask; + private final WorkerEventService workerEventService; private final ClearFileTask clearFileTask; @Autowired - public ScheduledTasks(HeartBeatTask heartBeatTask, ClearFileTask clearFileTask) { - this.heartBeatTask = heartBeatTask; + public ScheduledTasks(WorkerEventService workerEventService, ClearFileTask clearFileTask) { + this.workerEventService = workerEventService; this.clearFileTask = clearFileTask; } @@ -81,9 +82,9 @@ public void checkVolumeAndClear() { public void heartBeat() { logger.info(Thread.currentThread().getId() + ":heartBeat start"); try { - heartBeatTask.run(); + workerEventService.commitWorkerEvent(WorkerEvent.heartBeat()); } catch (Exception e) { - logger.error("heartBeatTask fail", e); + logger.error("commit heartBeat event fail", e); } } } diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/task/heartbeat/HeartBeatTask.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/task/heartbeat/HeartBeatTask.java index 407e7775e4..9371b7e6e5 100644 --- a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/task/heartbeat/HeartBeatTask.java +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/task/heartbeat/HeartBeatTask.java @@ -25,7 +25,6 @@ package com.tencent.bk.job.file.worker.task.heartbeat; import com.tencent.bk.job.common.model.http.HttpReq; -import com.tencent.bk.job.common.util.ThreadUtils; import com.tencent.bk.job.common.util.http.HttpReqGenUtil; import com.tencent.bk.job.common.util.http.JobHttpClient; import com.tencent.bk.job.common.util.json.JsonUtils; @@ -37,7 +36,6 @@ import com.tencent.bk.job.file_gateway.model.req.inner.HeartBeatReq; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.tuple.Pair; -import org.slf4j.helpers.MessageFormatter; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; @@ -47,9 +45,6 @@ @Service public class HeartBeatTask { - public volatile boolean shouldRun = true; - public volatile boolean running = false; - private final JobHttpClient jobHttpClient; private final WorkerConfig workerConfig; private final GatewayInfoService gatewayInfoService; @@ -69,40 +64,6 @@ public HeartBeatTask(JobHttpClient jobHttpClient, this.environmentService = environmentService; } - /** - * 停止心跳并等待最后一次心跳结束,防止下线后心跳请求再次将file-worker状态更新为在线 - */ - public void stopAndWaitLastHeartBeatFinish() { - shouldRun = false; - if (!running) { - return; - } - waitUntilNotRunning(30); - } - - @SuppressWarnings("SameParameterValue") - private void waitUntilNotRunning(int maxSeconds) { - long waitStartTimeMills = System.currentTimeMillis(); - boolean shouldWait; - do { - ThreadUtils.sleep(100); - long durationMills = System.currentTimeMillis() - waitStartTimeMills; - if (!running) { - String msg = MessageFormatter.format( - "Waited {}ms for last heartBeat finish", - durationMills - ).getMessage(); - if (durationMills >= 15000) { - log.warn(msg); - } else { - log.debug(msg); - } - return; - } - shouldWait = durationMills < maxSeconds * 1000L; - } while (shouldWait); - } - private HeartBeatReq getHeartBeatReq() { HeartBeatReq heartBeatReq = new HeartBeatReq(); heartBeatReq.setName(workerConfig.getName()); @@ -134,22 +95,7 @@ private HeartBeatReq getHeartBeatReq() { return heartBeatReq; } - public void run() { - if (!shouldRun) { - log.info("HeartBeat closed, ignore"); - return; - } - try { - running = true; - doHeartBeat(); - } catch (Exception e) { - log.warn("Fail to doHeartBeat", e); - } finally { - running = false; - } - } - - private void doHeartBeat() { + public void doHeartBeat() { String url = gatewayInfoService.getHeartBeatUrl(); HeartBeatReq heartBeatReq = getHeartBeatReq(); log.info("HeartBeat: url={},body={}", url, JsonUtils.toJsonWithoutSkippedFields(heartBeatReq)); From 3804e861f896481ff85dbe2d7f14a0b492b55805 Mon Sep 17 00:00:00 2001 From: hLinx <327159425@qq.com> Date: Fri, 7 Jun 2024 15:57:29 +0800 Subject: [PATCH 10/24] =?UTF-8?q?feat:=20=E4=B8=BA=E2=80=9C=E6=BB=9A?= =?UTF-8?q?=E5=8A=A8=E6=89=A7=E8=A1=8C=E2=80=9D=E6=8F=90=E4=BE=9B=E4=B8=BB?= =?UTF-8?q?=E5=8A=A8=E6=8C=87=E5=BC=95=E7=9A=84=E4=BA=A4=E4=BA=92=E4=BC=98?= =?UTF-8?q?=E5=8C=96=20#3015?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../task-step/common/rolling/index.vue | 114 +++++++++++++++++- src/frontend/static/images/bk-tips.png | Bin 0 -> 7826 bytes 2 files changed, 110 insertions(+), 4 deletions(-) create mode 100644 src/frontend/static/images/bk-tips.png diff --git a/src/frontend/src/components/task-step/common/rolling/index.vue b/src/frontend/src/components/task-step/common/rolling/index.vue index ddf9feeaa0..fd09b15848 100644 --- a/src/frontend/src/components/task-step/common/rolling/index.vue +++ b/src/frontend/src/components/task-step/common/rolling/index.vue @@ -28,10 +28,12 @@