diff --git a/src/backend/job-crontab/service-job-crontab/src/main/java/com/tencent/bk/job/crontab/listener/event/Event.java b/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/event/Event.java similarity index 97% rename from src/backend/job-crontab/service-job-crontab/src/main/java/com/tencent/bk/job/crontab/listener/event/Event.java rename to src/backend/commons/common/src/main/java/com/tencent/bk/job/common/event/Event.java index ab37c2170b..5c6edd432f 100644 --- a/src/backend/job-crontab/service-job-crontab/src/main/java/com/tencent/bk/job/crontab/listener/event/Event.java +++ b/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/event/Event.java @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ -package com.tencent.bk.job.crontab.listener.event; +package com.tencent.bk.job.common.event; import com.fasterxml.jackson.annotation.JsonInclude; import com.tencent.bk.job.common.util.date.DateUtils; diff --git a/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/HttpReqGenUtil.java b/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/HttpReqGenUtil.java index 3679092dbb..9c41c94155 100644 --- a/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/HttpReqGenUtil.java +++ b/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/HttpReqGenUtil.java @@ -47,4 +47,10 @@ public static HttpReq genSimpleJsonReq(String url, Object body) { httpReq.setHeaders(headerList.toArray(headers)); return httpReq; } + + public static HttpReq genUrlGetReq(String url) { + HttpReq httpReq = new HttpReq(); + httpReq.setUrl(url); + return httpReq; + } } diff --git a/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/JobHttpClient.java b/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/JobHttpClient.java index 15c1d64d12..b6b7aa88ff 100644 --- a/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/JobHttpClient.java +++ b/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/JobHttpClient.java @@ -31,6 +31,8 @@ */ public interface JobHttpClient { + String get(HttpReq req); + String post(HttpReq req); } diff --git a/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/JobHttpClientImpl.java b/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/JobHttpClientImpl.java index 25e93d99d1..b1f2266c3b 100644 --- a/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/JobHttpClientImpl.java +++ b/src/backend/commons/common/src/main/java/com/tencent/bk/job/common/util/http/JobHttpClientImpl.java @@ -46,6 +46,22 @@ public JobHttpClientImpl(RestTemplate restTemplate) { this.restTemplate = restTemplate; } + @Override + public String get(HttpReq req) { + logReq(req); + ResponseEntity respEntity = restTemplate.getForEntity( + req.getUrl(), + String.class + ); + if (respEntity.getStatusCode() == HttpStatus.OK) { + String respStr = respEntity.getBody(); + logRespStr(respStr); + return respStr; + } + logAndThrow(respEntity); + return null; + } + @Override public String post(HttpReq req) { logReq(req); diff --git a/src/backend/job-crontab/service-job-crontab/src/main/java/com/tencent/bk/job/crontab/listener/event/CrontabEvent.java b/src/backend/job-crontab/service-job-crontab/src/main/java/com/tencent/bk/job/crontab/listener/event/CrontabEvent.java index baf09b953f..861ff5ff04 100644 --- a/src/backend/job-crontab/service-job-crontab/src/main/java/com/tencent/bk/job/crontab/listener/event/CrontabEvent.java +++ b/src/backend/job-crontab/service-job-crontab/src/main/java/com/tencent/bk/job/crontab/listener/event/CrontabEvent.java @@ -25,6 +25,7 @@ package com.tencent.bk.job.crontab.listener.event; import com.fasterxml.jackson.annotation.JsonInclude; +import com.tencent.bk.job.common.event.Event; import com.tencent.bk.job.crontab.constant.CrontabActionEnum; import lombok.Getter; import lombok.NoArgsConstructor; diff --git a/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/dao/FileSourceTaskLogDAO.java b/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/dao/FileSourceTaskLogDAO.java index e9ebecdfcb..3c4ffff59b 100644 --- a/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/dao/FileSourceTaskLogDAO.java +++ b/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/dao/FileSourceTaskLogDAO.java @@ -28,7 +28,9 @@ public interface FileSourceTaskLogDAO { - void insertOrUpdateFileSourceTaskLog(FileSourceTaskLogDTO fileSourceTaskLog); + int insertFileSourceTaskLog(FileSourceTaskLogDTO fileSourceTaskLog); + + int updateFileSourceTaskLogByStepInstance(FileSourceTaskLogDTO fileSourceTaskLog); FileSourceTaskLogDTO getFileSourceTaskLog(long stepInstanceId, int executeCount); diff --git a/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/dao/impl/FileSourceTaskLogDAOImpl.java b/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/dao/impl/FileSourceTaskLogDAOImpl.java index 39d120ac3e..acfce3c44b 100644 --- a/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/dao/impl/FileSourceTaskLogDAOImpl.java +++ b/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/dao/impl/FileSourceTaskLogDAOImpl.java @@ -29,6 +29,7 @@ import com.tencent.bk.job.execute.model.FileSourceTaskLogDTO; import com.tencent.bk.job.execute.model.tables.FileSourceTaskLog; import com.tencent.bk.job.execute.model.tables.records.FileSourceTaskLogRecord; +import org.jooq.Condition; import org.jooq.DSLContext; import org.jooq.Record; import org.jooq.UpdateSetFirstStep; @@ -37,6 +38,9 @@ import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.stereotype.Repository; +import java.util.ArrayList; +import java.util.List; + @Repository public class FileSourceTaskLogDAOImpl implements FileSourceTaskLogDAO { FileSourceTaskLog defaultTable = FileSourceTaskLog.FILE_SOURCE_TASK_LOG; @@ -65,9 +69,9 @@ private FileSourceTaskLogDTO extractInfo(Record record) { } @Override - public void insertOrUpdateFileSourceTaskLog(FileSourceTaskLogDTO fileSourceTaskLog) { + public int insertFileSourceTaskLog(FileSourceTaskLogDTO fileSourceTaskLog) { FileSourceTaskLog t = FileSourceTaskLog.FILE_SOURCE_TASK_LOG; - defaultContext.insertInto( + return defaultContext.insertInto( t, t.STEP_INSTANCE_ID, t.EXECUTE_COUNT, @@ -84,12 +88,22 @@ public void insertOrUpdateFileSourceTaskLog(FileSourceTaskLogDTO fileSourceTaskL fileSourceTaskLog.getTotalTime(), JooqDataTypeUtil.toByte(fileSourceTaskLog.getStatus()), fileSourceTaskLog.getFileSourceBatchTaskId() - ).onDuplicateKeyUpdate() - .set(t.START_TIME, fileSourceTaskLog.getStartTime()) - .set(t.END_TIME, fileSourceTaskLog.getEndTime()) - .set(t.TOTAL_TIME, fileSourceTaskLog.getTotalTime()) - .set(t.STATUS, JooqDataTypeUtil.toByte(fileSourceTaskLog.getStatus())) - .set(t.FILE_SOURCE_BATCH_TASK_ID, fileSourceTaskLog.getFileSourceBatchTaskId()) + ).execute(); + } + + @Override + public int updateFileSourceTaskLogByStepInstance(FileSourceTaskLogDTO fileSourceTaskLog) { + List conditionList = new ArrayList<>(); + conditionList.add(defaultTable.STEP_INSTANCE_ID.eq(fileSourceTaskLog.getStepInstanceId())); + conditionList.add(defaultTable.EXECUTE_COUNT.eq(fileSourceTaskLog.getExecuteCount())); + return defaultContext.update(defaultTable) + .set(defaultTable.START_TIME, fileSourceTaskLog.getStartTime()) + .set(defaultTable.END_TIME, fileSourceTaskLog.getEndTime()) + .set(defaultTable.TOTAL_TIME, fileSourceTaskLog.getTotalTime()) + .set(defaultTable.STATUS, JooqDataTypeUtil.toByte(fileSourceTaskLog.getStatus())) + .set(defaultTable.FILE_SOURCE_BATCH_TASK_ID, fileSourceTaskLog.getFileSourceBatchTaskId()) + .where(conditionList) + .limit(1) .execute(); } @@ -97,14 +111,14 @@ public void insertOrUpdateFileSourceTaskLog(FileSourceTaskLogDTO fileSourceTaskL public FileSourceTaskLogDTO getFileSourceTaskLog(long stepInstanceId, int executeCount) { FileSourceTaskLog t = FileSourceTaskLog.FILE_SOURCE_TASK_LOG; Record record = defaultContext.select( - t.STEP_INSTANCE_ID, - t.EXECUTE_COUNT, - t.START_TIME, - t.END_TIME, - t.TOTAL_TIME, - t.STATUS, - t.FILE_SOURCE_BATCH_TASK_ID - ).from(t) + t.STEP_INSTANCE_ID, + t.EXECUTE_COUNT, + t.START_TIME, + t.END_TIME, + t.TOTAL_TIME, + t.STATUS, + t.FILE_SOURCE_BATCH_TASK_ID + ).from(t) .where(t.STEP_INSTANCE_ID.eq(stepInstanceId)) .and(t.EXECUTE_COUNT.eq(executeCount)) .fetchOne(); @@ -114,8 +128,8 @@ public FileSourceTaskLogDTO getFileSourceTaskLog(long stepInstanceId, int execut @Override public FileSourceTaskLogDTO getFileSourceTaskLogByBatchTaskId(String fileSourceBatchTaskId) { Record record = defaultContext.select(defaultTable.STEP_INSTANCE_ID, defaultTable.EXECUTE_COUNT, - defaultTable.START_TIME, defaultTable.END_TIME, defaultTable.TOTAL_TIME, - defaultTable.STATUS, defaultTable.FILE_SOURCE_BATCH_TASK_ID).from(defaultTable) + defaultTable.START_TIME, defaultTable.END_TIME, defaultTable.TOTAL_TIME, + defaultTable.STATUS, defaultTable.FILE_SOURCE_BATCH_TASK_ID).from(defaultTable) .where(defaultTable.FILE_SOURCE_BATCH_TASK_ID.eq(fileSourceBatchTaskId)) .fetchOne(); return extractInfo(record); diff --git a/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/engine/prepare/third/ThirdFilePrepareService.java b/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/engine/prepare/third/ThirdFilePrepareService.java index ceaa15566b..19aa2748af 100644 --- a/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/engine/prepare/third/ThirdFilePrepareService.java +++ b/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/engine/prepare/third/ThirdFilePrepareService.java @@ -52,6 +52,7 @@ import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; +import org.jooq.exception.DataAccessException; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Primary; import org.springframework.stereotype.Component; @@ -119,6 +120,12 @@ private void setTaskInfoIntoThirdFileSource(StepInstanceBaseDTO stepInstance, taskInfoDTO.getIpProtocol(), taskInfoDTO.getIp() ); + log.info( + "[{}]: fileSourceTaskId={} start, sourceHost={}", + stepInstance.getUniqueKey(), + fileSourceTaskId, + hostDTO + ); hostDTOList.add(hostDTO); fileSourceDTO.getServers().setStaticIpList(hostDTOList); fileSourceDTO.getServers().buildMergedExecuteObjects(stepInstance.isSupportExecuteObjectFeature()); @@ -228,7 +235,7 @@ public void prepareThirdFileAsync( log.debug("[{}]: fileSourceList={}", stepInstance.getUniqueKey(), fileSourceList); // 放进文件源下载任务进度表中 FileSourceTaskLogDTO fileSourceTaskLogDTO = buildInitFileSourceTaskLog(stepInstance, batchTaskInfoDTO); - fileSourceTaskLogDAO.insertOrUpdateFileSourceTaskLog(fileSourceTaskLogDTO); + insertOrUpdateFileSourceTaskLog(fileSourceTaskLogDTO); // 更新文件源任务状态 stepInstanceService.updateResolvedSourceFile(stepInstance.getId(), fileSourceList); // 异步轮询文件下载任务 @@ -242,6 +249,28 @@ public void prepareThirdFileAsync( taskMap.put(stepInstance.getUniqueKey(), task); } + private void insertOrUpdateFileSourceTaskLog(FileSourceTaskLogDTO fileSourceTaskLogDTO) { + boolean shouldRetry; + do { + try { + int insertedNum = fileSourceTaskLogDAO.insertFileSourceTaskLog(fileSourceTaskLogDTO); + log.info("{} fileSourceTaskLog inserted", insertedNum); + return; + } catch (DataAccessException e) { + String message = e.getMessage(); + if (message != null && message.equalsIgnoreCase("Deadlock found")) { + log.info("Deadlock found when insert fileSourceTaskLog, retry", e); + shouldRetry = true; + } else { + log.info("Fail to insert fileSourceTaskLog, update instead", e); + shouldRetry = false; + } + } + } while (shouldRetry); + int updatedNum = fileSourceTaskLogDAO.updateFileSourceTaskLogByStepInstance(fileSourceTaskLogDTO); + log.info("{} fileSourceTaskLog updated", updatedNum); + } + /** * 立即继续步骤 * diff --git a/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/engine/prepare/third/ThirdFilePrepareTask.java b/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/engine/prepare/third/ThirdFilePrepareTask.java index c49385d621..fabce77dd4 100644 --- a/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/engine/prepare/third/ThirdFilePrepareTask.java +++ b/src/backend/job-execute/service-job-execute/src/main/java/com/tencent/bk/job/execute/engine/prepare/third/ThirdFilePrepareTask.java @@ -89,7 +89,6 @@ public class ThirdFilePrepareTask implements ContinuousScheduledTask, JobTaskCon */ private final Object stopMonitor = new Object(); volatile AtomicBoolean isDoneWrapper = new AtomicBoolean(false); - volatile AtomicBoolean isReadyForNextStepWrapper = new AtomicBoolean(false); private ServiceFileSourceTaskResource fileSourceTaskResource; private AccountService accountService; private FileWorkerHostService fileWorkerHostService; @@ -143,10 +142,6 @@ public boolean isFinished() { return this.isDoneWrapper.get(); } - public boolean isReadyForNext() { - return this.isReadyForNextStepWrapper.get(); - } - @Override public ScheduleStrategy getScheduleStrategy() { // 每秒拉取一次任务状态 @@ -223,6 +218,12 @@ public BatchTaskStatusDTO getFileSourceBatchTaskResults(StepInstanceDTO stepInst } // 任务结束了,且日志拉取完毕才算结束 isDone = batchTaskStatusDTO.isDone() && allLogDone; + log.info( + "[{}]: batchTaskDone={}, allLogDone={}", + stepInstance.getUniqueKey(), + batchTaskStatusDTO.isDone(), + allLogDone + ); } catch (Exception e) { FormattingTuple msg = MessageFormatter.format( "[{}][{}]:Exception occurred when getFileSourceTaskStatus, tried {} times", @@ -245,7 +246,6 @@ public BatchTaskStatusDTO getFileSourceBatchTaskResults(StepInstanceDTO stepInst if (isDone) { isDoneWrapper.set(true); handleFileSourceTaskResult(stepInstance, batchTaskStatusDTO); - isReadyForNextStepWrapper.set(true); } return batchTaskStatusDTO; } @@ -255,12 +255,7 @@ private void handleFileSourceTaskResult( BatchTaskStatusDTO batchTaskStatusDTO ) { // 更新文件源拉取任务耗时数据 - FileSourceTaskLogDTO fileSourceTaskLogDTO = fileSourceTaskLogDAO.getFileSourceTaskLogByBatchTaskId(batchTaskId); - if (fileSourceTaskLogDTO != null) { - Long endTime = System.currentTimeMillis(); - fileSourceTaskLogDAO.updateTimeConsumingByBatchTaskId(batchTaskId, null, endTime, - endTime - fileSourceTaskLogDTO.getStartTime()); - } + updateBatchTaskTimeStatistics(); List taskStatusList = batchTaskStatusDTO.getFileSourceTaskStatusInfoList(); if (taskStatusList.isEmpty()) { // 直接成功 @@ -277,6 +272,11 @@ private void handleFileSourceTaskResult( boolean allSuccess = statePair.getLeft(); boolean stopped = statePair.getRight(); if (allSuccess) { + log.info( + "[{}]: batchTaskId={}, fileSourceTaskStatus all success", + stepInstance.getUniqueKey(), + batchTaskId + ); onSuccess(stepInstance, taskStatusList); } else if (stopped) { resultHandler.onStopped(this); @@ -285,6 +285,20 @@ private void handleFileSourceTaskResult( } } + private void updateBatchTaskTimeStatistics() { + FileSourceTaskLogDTO fileSourceTaskLogDTO = fileSourceTaskLogDAO.getFileSourceTaskLogByBatchTaskId(batchTaskId); + if (fileSourceTaskLogDTO == null) { + return; + } + Long endTime = System.currentTimeMillis(); + fileSourceTaskLogDAO.updateTimeConsumingByBatchTaskId( + batchTaskId, + null, + endTime, + endTime - fileSourceTaskLogDTO.getStartTime() + ); + } + private Pair checkSuccessAndStopState(List fileSourceTaskStatusList) { boolean allSuccess = true; boolean stopped = false; @@ -318,79 +332,100 @@ private void onSuccess(StepInstanceDTO stepInstance, List map.put(taskStatus.getTaskId(), taskStatus)); //添加服务器文件信息 boolean isGseV2Task = stepInstance.isTargetGseV2Agent(); + int updatedNum = 0; for (FileSourceDTO fileSourceDTO : fileSourceList) { String fileSourceTaskId = fileSourceDTO.getFileSourceTaskId(); - if (StringUtils.isNotBlank(fileSourceTaskId)) { - FileSourceTaskStatusDTO fileSourceTaskStatusDTO = map.get(fileSourceTaskId); - fileSourceDTO.setAccount("root"); - AccountDTO accountDTO = accountService.getAccountByAccountName(stepInstance.getAppId(), "root"); - if (accountDTO == null) { - //业务无root账号,报错提示 - log.error( - "[{}]: No root account in appId={}, plz config one", - stepInstance.getUniqueKey(), - stepInstance.getAppId() - ); - stepInstanceService.updateStepStatus(stepInstance.getId(), RunStatusEnum.FAIL.getValue()); - taskExecuteMQEventDispatcher.dispatchJobEvent( - JobEvent.refreshJob(stepInstance.getTaskInstanceId(), - EventSource.buildStepEventSource(stepInstance.getId()))); - return; - } - fileSourceDTO.setAccountId(accountDTO.getId()); - fileSourceDTO.setLocalUpload(false); - - ExecuteTargetDTO executeTargetDTO = new ExecuteTargetDTO(); - HostDTO hostDTO = fileWorkerHostService.parseFileWorkerHostWithCache( - fileSourceTaskStatusDTO.getCloudId(), - fileSourceTaskStatusDTO.getIpProtocol(), - fileSourceTaskStatusDTO.getIp() - ); - if (hostDTO == null) { - log.error( - "[{}]: Cannot find file-worker host info by IP{} (cloudAreaId={}, ip={}), " + - "plz check whether file-worker gse agent is installed", - stepInstance.getUniqueKey(), - fileSourceTaskStatusDTO.getIpProtocol(), - fileSourceTaskStatusDTO.getCloudId(), - fileSourceTaskStatusDTO.getIp() - ); - throw new InternalException(ErrorCode.FILE_WORKER_NOT_FOUND); - } - - HostDTO sourceHost = hostDTO.clone(); - if (isGseV2Task) { - if (StringUtils.isBlank(sourceHost.getAgentId())) { - log.error("Using gseV2, source host agent id is empty! host: {}", sourceHost); - throw new InternalException(ErrorCode.CAN_NOT_FIND_AVAILABLE_FILE_WORKER); - } - } else { - sourceHost.setAgentId(sourceHost.toCloudIp()); - } - List hostDTOList = Collections.singletonList(sourceHost); - executeTargetDTO.addStaticHosts(hostDTOList); - executeTargetDTO.buildMergedExecuteObjects(stepInstance.isSupportExecuteObjectFeature()); - fileSourceDTO.setServers(executeTargetDTO); - Map filePathMap = fileSourceTaskStatusDTO.getFilePathMap(); - log.debug( - "[{}]: filePathMap={}", - stepInstance.getUniqueKey(), - filePathMap - ); - List files = fileSourceDTO.getFiles(); - // 设置downloadPath进行后续GSE分发 - for (FileDetailDTO file : files) { - String downloadPath = filePathMap.get(file.getThirdFilePath()); - file.setFilePath(downloadPath); - file.setResolvedFilePath(downloadPath); - } + if (StringUtils.isBlank(fileSourceTaskId)) { + continue; } + updateServerInfoForFileSource(map, fileSourceTaskId, fileSourceDTO, isGseV2Task); + updatedNum += 1; + } + if (updatedNum > 0) { + log.info("[{}]: {} serverInfo updated", stepInstance.getUniqueKey(), updatedNum); + } else { + log.warn("[{}]: no serverInfo updated", stepInstance.getUniqueKey()); } //更新StepInstance stepInstanceService.updateResolvedSourceFile(stepInstance.getId(), fileSourceList); resultHandler.onSuccess(this); } + private void updateServerInfoForFileSource(Map map, + String fileSourceTaskId, + FileSourceDTO fileSourceDTO, + boolean isGseV2Task) { + FileSourceTaskStatusDTO fileSourceTaskStatusDTO = map.get(fileSourceTaskId); + fileSourceDTO.setAccount("root"); + AccountDTO accountDTO = accountService.getAccountByAccountName(stepInstance.getAppId(), "root"); + if (accountDTO == null) { + //业务无root账号,报错提示 + log.error( + "[{}]: No root account in appId={}, plz config one", + stepInstance.getUniqueKey(), + stepInstance.getAppId() + ); + stepInstanceService.updateStepStatus(stepInstance.getId(), RunStatusEnum.FAIL.getValue()); + taskExecuteMQEventDispatcher.dispatchJobEvent( + JobEvent.refreshJob(stepInstance.getTaskInstanceId(), + EventSource.buildStepEventSource(stepInstance.getId()))); + return; + } + fileSourceDTO.setAccountId(accountDTO.getId()); + fileSourceDTO.setLocalUpload(false); + + ExecuteTargetDTO executeTargetDTO = new ExecuteTargetDTO(); + HostDTO hostDTO = fileWorkerHostService.parseFileWorkerHostWithCache( + fileSourceTaskStatusDTO.getCloudId(), + fileSourceTaskStatusDTO.getIpProtocol(), + fileSourceTaskStatusDTO.getIp() + ); + if (hostDTO == null) { + log.error( + "[{}]: Cannot find file-worker host info by IP{} (cloudAreaId={}, ip={}), " + + "plz check whether file-worker gse agent is installed", + stepInstance.getUniqueKey(), + fileSourceTaskStatusDTO.getIpProtocol(), + fileSourceTaskStatusDTO.getCloudId(), + fileSourceTaskStatusDTO.getIp() + ); + throw new InternalException(ErrorCode.FILE_WORKER_NOT_FOUND); + } + + HostDTO sourceHost = hostDTO.clone(); + if (isGseV2Task) { + if (StringUtils.isBlank(sourceHost.getAgentId())) { + log.error("Using gseV2, source host agent id is empty! host: {}", sourceHost); + throw new InternalException(ErrorCode.CAN_NOT_FIND_AVAILABLE_FILE_WORKER); + } + } else { + sourceHost.setAgentId(sourceHost.toCloudIp()); + } + log.info( + "[{}]: fileSourceTaskId={} success, sourceHost={}", + stepInstance.getUniqueKey(), + fileSourceTaskId, + sourceHost + ); + List hostDTOList = Collections.singletonList(sourceHost); + executeTargetDTO.addStaticHosts(hostDTOList); + executeTargetDTO.buildMergedExecuteObjects(stepInstance.isSupportExecuteObjectFeature()); + fileSourceDTO.setServers(executeTargetDTO); + Map filePathMap = fileSourceTaskStatusDTO.getFilePathMap(); + log.debug( + "[{}]: filePathMap={}", + stepInstance.getUniqueKey(), + filePathMap + ); + List files = fileSourceDTO.getFiles(); + // 设置downloadPath进行后续GSE分发 + for (FileDetailDTO file : files) { + String downloadPath = filePathMap.get(file.getThirdFilePath()); + file.setFilePath(downloadPath); + file.setResolvedFilePath(downloadPath); + } + } + private void writeLogs(StepInstanceDTO stepInstance, FileSourceTaskStatusDTO fileSourceTaskStatusDTO, diff --git a/src/backend/job-file-gateway/api-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/inner/ServiceFileSourceTaskResource.java b/src/backend/job-file-gateway/api-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/inner/ServiceFileSourceTaskResource.java index 4865298969..01c4278b7b 100644 --- a/src/backend/job-file-gateway/api-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/inner/ServiceFileSourceTaskResource.java +++ b/src/backend/job-file-gateway/api-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/inner/ServiceFileSourceTaskResource.java @@ -26,16 +26,11 @@ import com.tencent.bk.job.common.annotation.InternalAPI; import com.tencent.bk.job.common.model.InternalResponse; -import com.tencent.bk.job.file_gateway.model.req.inner.ClearBatchTaskFilesReq; import com.tencent.bk.job.file_gateway.model.req.inner.ClearTaskFilesReq; import com.tencent.bk.job.file_gateway.model.req.inner.FileSourceBatchDownloadTaskReq; -import com.tencent.bk.job.file_gateway.model.req.inner.FileSourceDownloadTaskReq; import com.tencent.bk.job.file_gateway.model.req.inner.StopBatchTaskReq; -import com.tencent.bk.job.file_gateway.model.req.inner.StopTaskReq; import com.tencent.bk.job.file_gateway.model.resp.inner.BatchTaskInfoDTO; import com.tencent.bk.job.file_gateway.model.resp.inner.BatchTaskStatusDTO; -import com.tencent.bk.job.file_gateway.model.resp.inner.FileSourceTaskStatusDTO; -import com.tencent.bk.job.file_gateway.model.resp.inner.TaskInfoDTO; import com.tentent.bk.job.common.api.feign.annotation.SmartFeignClient; import io.swagger.annotations.Api; import io.swagger.annotations.ApiOperation; @@ -53,24 +48,6 @@ @InternalAPI public interface ServiceFileSourceTaskResource { - // 直接转发至FileWorker的请求,URL子路径保持一致 - @ApiOperation(value = "创建并启动文件下载任务", produces = "application/json") - @PostMapping("/service/fileSource/filetask/downloadFiles/start") - InternalResponse startFileSourceDownloadTask( - @ApiParam("用户名") - @RequestHeader("username") - String username, - @ApiParam("文件源下载任务请求") - @RequestBody FileSourceDownloadTaskReq req - ); - - @ApiOperation(value = "清理任务已下载的文件", produces = "application/json") - @PostMapping("/service/fileSource/filetask/downloadFiles/stop") - InternalResponse stopTasks( - @ApiParam("文件源下载任务请求") - @RequestBody StopTaskReq req - ); - @ApiOperation(value = "清理任务已下载的文件", produces = "application/json") @PostMapping("/service/fileSource/filetask/clearFiles") InternalResponse clearTaskFiles( @@ -78,27 +55,12 @@ InternalResponse clearTaskFiles( @RequestBody ClearTaskFilesReq req ); - // 文件网关自有资源请求 - @ApiOperation(value = "获取文件任务状态", produces = "application/json") - @GetMapping("/service/fileSource/filetask/taskIds/{taskId}/status") - InternalResponse getFileSourceTaskStatusAndLogs( - @ApiParam("任务Id") - @PathVariable("taskId") - String taskId, - @ApiParam("日志开始位置") - @RequestParam(value = "logStart", required = false) - Long logStart, - @ApiParam("获取日志数量") - @RequestParam(value = "logLength", required = false) - Long logLength - ); - @ApiOperation(value = "创建并启动批量文件下载任务", produces = "application/json") @PostMapping("/service/fileSource/filetask/batch/downloadFiles/start") InternalResponse startFileSourceBatchDownloadTask( @ApiParam("用户名") @RequestHeader("username") - String username, + String username, @ApiParam("文件源下载任务请求") @RequestBody FileSourceBatchDownloadTaskReq req ); @@ -110,25 +72,17 @@ InternalResponse stopBatchTasks( @RequestBody StopBatchTaskReq req ); - @ApiOperation(value = "清理批量任务已下载的文件", produces = "application/json") - @PostMapping("/service/fileSource/filetask/batch/clearFiles") - InternalResponse clearBatchTaskFiles( - @ApiParam("文件源下载任务请求") - @RequestBody ClearBatchTaskFilesReq req - ); - - // 文件网关自有资源请求 @ApiOperation(value = "获取文件批量任务状态", produces = "application/json") @GetMapping("/service/fileSource/filetask/batch/batchTaskIds/{batchTaskId}/status") InternalResponse getBatchTaskStatusAndLogs( @ApiParam("任务Id") @PathVariable("batchTaskId") - String batchTaskId, + String batchTaskId, @ApiParam("日志开始位置") @RequestParam(value = "logStart", required = false) - Long logStart, + Long logStart, @ApiParam("获取日志数量") @RequestParam(value = "logLength", required = false) - Long logLength + Long logLength ); } diff --git a/src/backend/job-file-gateway/api-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/model/resp/inner/TaskInfoDTO.java b/src/backend/job-file-gateway/api-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/model/resp/inner/TaskInfoDTO.java index 37d25b15f1..ca824fbf94 100644 --- a/src/backend/job-file-gateway/api-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/model/resp/inner/TaskInfoDTO.java +++ b/src/backend/job-file-gateway/api-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/model/resp/inner/TaskInfoDTO.java @@ -35,6 +35,8 @@ public class TaskInfoDTO { String taskId; String fileSourceName; boolean fileSourcePublic; + Long workerId; + String workerAccessHost; Long cloudId; String ipProtocol; String ip; diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/inner/ServiceFileSourceTaskResourceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/inner/ServiceFileSourceTaskResourceImpl.java index bbfda90faf..b550e49c16 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/inner/ServiceFileSourceTaskResourceImpl.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/inner/ServiceFileSourceTaskResourceImpl.java @@ -25,16 +25,11 @@ package com.tencent.bk.job.file_gateway.api.inner; import com.tencent.bk.job.common.model.InternalResponse; -import com.tencent.bk.job.file_gateway.model.req.inner.ClearBatchTaskFilesReq; import com.tencent.bk.job.file_gateway.model.req.inner.ClearTaskFilesReq; import com.tencent.bk.job.file_gateway.model.req.inner.FileSourceBatchDownloadTaskReq; -import com.tencent.bk.job.file_gateway.model.req.inner.FileSourceDownloadTaskReq; import com.tencent.bk.job.file_gateway.model.req.inner.StopBatchTaskReq; -import com.tencent.bk.job.file_gateway.model.req.inner.StopTaskReq; import com.tencent.bk.job.file_gateway.model.resp.inner.BatchTaskInfoDTO; import com.tencent.bk.job.file_gateway.model.resp.inner.BatchTaskStatusDTO; -import com.tencent.bk.job.file_gateway.model.resp.inner.FileSourceTaskStatusDTO; -import com.tencent.bk.job.file_gateway.model.resp.inner.TaskInfoDTO; import com.tencent.bk.job.file_gateway.service.BatchTaskService; import com.tencent.bk.job.file_gateway.service.FileSourceTaskService; import lombok.extern.slf4j.Slf4j; @@ -56,31 +51,6 @@ public ServiceFileSourceTaskResourceImpl(FileSourceTaskService fileSourceTaskSer this.batchTaskService = batchTaskService; } - @Override - public InternalResponse startFileSourceDownloadTask(String username, FileSourceDownloadTaskReq req) { - return InternalResponse.buildSuccessResp(fileSourceTaskService.startFileSourceDownloadTask(username, - req.getAppId(), req.getStepInstanceId(), req.getExecuteCount(), null, req.getFileSourceId(), - req.getFilePathList())); - } - - @Override - public InternalResponse stopTasks(StopTaskReq req) { - return InternalResponse.buildSuccessResp(fileSourceTaskService.stopTasks(req.getTaskIdList())); - } - - @Override - public InternalResponse getFileSourceTaskStatusAndLogs(String taskId, Long logStart, - Long logLength) { - if (logStart == null || logStart < 0) { - logStart = 0L; - } - if (logLength == null || logLength <= 0) { - logLength = -1L; - } - return InternalResponse.buildSuccessResp(fileSourceTaskService.getFileSourceTaskStatusAndLogs(taskId, - logStart, logLength)); - } - @Override public InternalResponse clearTaskFiles(ClearTaskFilesReq req) { return InternalResponse.buildSuccessResp(fileSourceTaskService.clearTaskFiles(req.getTaskIdList())); @@ -88,9 +58,16 @@ public InternalResponse clearTaskFiles(ClearTaskFilesReq req) { @Override public InternalResponse startFileSourceBatchDownloadTask(String username, - FileSourceBatchDownloadTaskReq req) { - return InternalResponse.buildSuccessResp(batchTaskService.startFileSourceBatchDownloadTask(username, - req.getAppId(), req.getStepInstanceId(), req.getExecuteCount(), req.getFileSourceTaskList())); + FileSourceBatchDownloadTaskReq req) { + return InternalResponse.buildSuccessResp( + batchTaskService.startFileSourceBatchDownloadTask( + username, + req.getAppId(), + req.getStepInstanceId(), + req.getExecuteCount(), + req.getFileSourceTaskList() + ) + ); } @Override @@ -99,20 +76,21 @@ public InternalResponse stopBatchTasks(StopBatchTaskReq req) { } @Override - public InternalResponse getBatchTaskStatusAndLogs(String batchTaskId, Long logStart, - Long logLength) { + public InternalResponse getBatchTaskStatusAndLogs(String batchTaskId, + Long logStart, + Long logLength) { if (logStart == null || logStart < 0) { logStart = 0L; } if (logLength == null || logLength <= 0) { logLength = -1L; } - return InternalResponse.buildSuccessResp(batchTaskService.getBatchTaskStatusAndLogs(batchTaskId, logStart, - logLength)); - } - - @Override - public InternalResponse clearBatchTaskFiles(ClearBatchTaskFilesReq req) { - return InternalResponse.buildSuccessResp(batchTaskService.clearBatchTaskFiles(req.getBatchTaskIdList())); + return InternalResponse.buildSuccessResp( + batchTaskService.getBatchTaskStatusAndLogs( + batchTaskId, + logStart, + logLength + ) + ); } } diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/op/OpResourceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/op/OpResourceImpl.java index 7d6c75e8b3..c0b42fce86 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/op/OpResourceImpl.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/op/OpResourceImpl.java @@ -25,7 +25,7 @@ package com.tencent.bk.job.file_gateway.api.op; import com.tencent.bk.job.common.model.Response; -import com.tencent.bk.job.file_gateway.service.ReDispatchService; +import com.tencent.bk.job.file_gateway.service.dispatch.ReDispatchService; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.web.bind.annotation.RestController; @@ -44,6 +44,6 @@ public OpResourceImpl(ReDispatchService reDispatchService) { @Override public Response getReDispatchThreadsNum(String username) { - return Response.buildSuccessResp(reDispatchService.getReDispatchThreadsNum(username)); + return Response.buildSuccessResp(reDispatchService.getReDispatchThreadsNum()); } } diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/remote/RemoteFileWorkerResourceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/remote/RemoteFileWorkerResourceImpl.java index ffc6dc7705..43076f374c 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/remote/RemoteFileWorkerResourceImpl.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/api/remote/RemoteFileWorkerResourceImpl.java @@ -32,7 +32,7 @@ import com.tencent.bk.job.file_gateway.model.req.inner.UpdateFileSourceTaskReq; import com.tencent.bk.job.file_gateway.service.FileSourceTaskService; import com.tencent.bk.job.file_gateway.service.FileWorkerService; -import com.tencent.bk.job.file_gateway.service.ReDispatchService; +import com.tencent.bk.job.file_gateway.service.dispatch.ReDispatchService; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.web.bind.annotation.RestController; diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/metrics/MetricsConstants.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/metrics/MetricsConstants.java index 94ee7053c7..7061a7d9cf 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/metrics/MetricsConstants.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/metrics/MetricsConstants.java @@ -31,16 +31,20 @@ public class MetricsConstants { public static final String NAME_FILE_WORKER_ONLINE_NUM = "fileWorker.online.num"; public static final String NAME_FILE_WORKER_RESPONSE_TIME = "fileWorker.response.time"; public static final String NAME_FILE_GATEWAY_DISPATCH_TIME = "fileGateway.dispatch.time"; + public static final String NAME_FILE_GATEWAY_REDISPATCH_TIME = "fileGateway.reDispatch.time"; // tag public static final String TAG_KEY_MODULE = "module"; public static final String TAG_KEY_REQUEST_SOURCE = "requestSource"; public static final String TAG_KEY_DISPATCH_RESULT = "dispatchResult"; + public static final String TAG_KEY_APP_ID = "appId"; // value public static final String TAG_VALUE_MODULE_FILE_WORKER = "fileWorker"; public static final String TAG_VALUE_MODULE_FILE_GATEWAY = "fileGateway"; public static final String TAG_VALUE_DISPATCH_RESULT_TRUE = "true"; public static final String TAG_VALUE_DISPATCH_RESULT_FALSE = "false"; + public static final String TAG_VALUE_REDISPATCH_STATUS_SUCCESS = "success"; + public static final String TAG_VALUE_REDISPATCH_STATUS_ERROR = "error"; } diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/BatchTaskService.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/BatchTaskService.java index 680eb61f45..ddeedaed94 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/BatchTaskService.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/BatchTaskService.java @@ -31,7 +31,9 @@ import java.util.List; public interface BatchTaskService { - BatchTaskInfoDTO startFileSourceBatchDownloadTask(String username, Long appId, Long stepInstanceId, + BatchTaskInfoDTO startFileSourceBatchDownloadTask(String username, + Long appId, + Long stepInstanceId, Integer executeCount, List fileSourceTaskList); @@ -39,5 +41,4 @@ BatchTaskInfoDTO startFileSourceBatchDownloadTask(String username, Long appId, L BatchTaskStatusDTO getBatchTaskStatusAndLogs(String batchTaskId, Long logStart, Long logLength); - Integer clearBatchTaskFiles(List batchTaskIdList); } diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/FileSourceTaskService.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/FileSourceTaskService.java index 130629b177..1e7143f49a 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/FileSourceTaskService.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/FileSourceTaskService.java @@ -32,12 +32,22 @@ import java.util.List; public interface FileSourceTaskService { - TaskInfoDTO startFileSourceDownloadTask(String username, Long appId, Long stepInstanceId, Integer executeCount, - String batchTaskId, Integer fileSourceId, List filePathList); + TaskInfoDTO startFileSourceDownloadTask(String username, + Long appId, + Long stepInstanceId, + Integer executeCount, + String batchTaskId, + Integer fileSourceId, + List filePathList); - TaskInfoDTO startFileSourceDownloadTaskWithId(String username, Long appId, Long stepInstanceId, - Integer executeCount, String batchTaskId, Integer fileSourceId, - List filePathList, String fileSourceTaskId); + TaskInfoDTO startFileSourceDownloadTaskWithId(String username, + Long appId, + Long stepInstanceId, + Integer executeCount, + String batchTaskId, + Integer fileSourceId, + List filePathList, + String fileSourceTaskId); String updateFileSourceTask(FileTaskProgressDTO fileTaskProgressDTO); diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/RetryPolicyFileSourceTaskService.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/RetryPolicyFileSourceTaskService.java new file mode 100644 index 0000000000..e7a8888e8c --- /dev/null +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/RetryPolicyFileSourceTaskService.java @@ -0,0 +1,39 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file_gateway.service; + +import com.tencent.bk.job.file_gateway.model.resp.inner.TaskInfoDTO; + +import java.util.List; + +public interface RetryPolicyFileSourceTaskService { + TaskInfoDTO startFileSourceDownloadTask(String username, + Long appId, + Long stepInstanceId, + Integer executeCount, + String batchTaskId, + Integer fileSourceId, + List filePathList); +} diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/context/impl/FileSourceTaskRetryContext.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/context/impl/FileSourceTaskRetryContext.java new file mode 100644 index 0000000000..7591af725c --- /dev/null +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/context/impl/FileSourceTaskRetryContext.java @@ -0,0 +1,37 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file_gateway.service.context.impl; + +import lombok.AllArgsConstructor; +import lombok.Data; + +@Data +@AllArgsConstructor +public class FileSourceTaskRetryContext { + /** + * 重试前业务逻辑代码抛出的异常 + */ + private Exception exception; +} diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/DispatchService.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/DispatchService.java similarity index 97% rename from src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/DispatchService.java rename to src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/DispatchService.java index 32306d8104..fc2192aaa9 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/DispatchService.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/DispatchService.java @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ -package com.tencent.bk.job.file_gateway.service; +package com.tencent.bk.job.file_gateway.service.dispatch; import com.tencent.bk.job.file_gateway.model.dto.FileSourceDTO; import com.tencent.bk.job.file_gateway.model.dto.FileWorkerDTO; diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/ReDispatchService.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/ReDispatchService.java similarity index 94% rename from src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/ReDispatchService.java rename to src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/ReDispatchService.java index e96ef1bb91..71736c8983 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/ReDispatchService.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/ReDispatchService.java @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ -package com.tencent.bk.job.file_gateway.service; +package com.tencent.bk.job.file_gateway.service.dispatch; import java.util.List; @@ -39,6 +39,6 @@ List reDispatchByWorker( boolean reDispatchByGateway(String fileSourceTaskId, Long initDelayMills, Long intervalMills); - Integer getReDispatchThreadsNum(String username); + Integer getReDispatchThreadsNum(); } diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/ReDispatchTaskService.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/ReDispatchTaskService.java new file mode 100644 index 0000000000..ac52f5ca40 --- /dev/null +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/ReDispatchTaskService.java @@ -0,0 +1,31 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file_gateway.service.dispatch; + +import com.tencent.bk.job.file_gateway.model.resp.inner.TaskInfoDTO; + +public interface ReDispatchTaskService { + TaskInfoDTO reDispatchFileSourceTask(String fileSourceTaskId); +} diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/DispatchServiceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/DispatchServiceImpl.java similarity index 98% rename from src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/DispatchServiceImpl.java rename to src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/DispatchServiceImpl.java index 45cbced508..55e5f607d3 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/DispatchServiceImpl.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/DispatchServiceImpl.java @@ -22,7 +22,7 @@ * IN THE SOFTWARE. */ -package com.tencent.bk.job.file_gateway.service.impl; +package com.tencent.bk.job.file_gateway.service.dispatch.impl; import com.tencent.bk.job.common.util.json.JsonUtils; import com.tencent.bk.job.file_gateway.consts.WorkerSelectModeEnum; @@ -32,8 +32,9 @@ import com.tencent.bk.job.file_gateway.model.dto.FileSourceDTO; import com.tencent.bk.job.file_gateway.model.dto.FileWorkerDTO; import com.tencent.bk.job.file_gateway.service.AbilityTagService; -import com.tencent.bk.job.file_gateway.service.DispatchService; +import com.tencent.bk.job.file_gateway.service.dispatch.DispatchService; import com.tencent.bk.job.file_gateway.service.FileWorkerService; +import com.tencent.bk.job.file_gateway.service.impl.WorkerIdsCondition; import io.micrometer.core.instrument.MeterRegistry; import io.micrometer.core.instrument.Tag; import io.micrometer.core.instrument.Timer; diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/ReDispatchServiceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/ReDispatchServiceImpl.java new file mode 100644 index 0000000000..c1ad69716e --- /dev/null +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/ReDispatchServiceImpl.java @@ -0,0 +1,144 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file_gateway.service.dispatch.impl; + +import com.tencent.bk.job.common.constant.ErrorCode; +import com.tencent.bk.job.common.exception.InternalException; +import com.tencent.bk.job.file_gateway.model.dto.FileSourceTaskDTO; +import com.tencent.bk.job.file_gateway.model.dto.FileWorkerDTO; +import com.tencent.bk.job.file_gateway.service.FileSourceTaskService; +import com.tencent.bk.job.file_gateway.service.FileWorkerService; +import com.tencent.bk.job.file_gateway.service.dispatch.ReDispatchService; +import com.tencent.bk.job.file_gateway.service.dispatch.ReDispatchTaskService; +import lombok.extern.slf4j.Slf4j; +import org.slf4j.helpers.FormattingTuple; +import org.slf4j.helpers.MessageFormatter; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import java.util.Collections; +import java.util.List; +import java.util.Timer; + +@Slf4j +@Service +public class ReDispatchServiceImpl implements ReDispatchService { + + private final FileWorkerService fileWorkerService; + private final FileSourceTaskService fileSourceTaskService; + private final ReDispatchTaskService reDispatchTaskService; + // 最多使用50线程进行重调度 + private final int MAX_THREAD_NUM_REDISPATCH = 50; + + @Autowired + public ReDispatchServiceImpl( + FileWorkerService fileWorkerService, + FileSourceTaskService fileSourceTaskService, + ReDispatchTaskService reDispatchTaskService + ) { + this.fileWorkerService = fileWorkerService; + this.fileSourceTaskService = fileSourceTaskService; + this.reDispatchTaskService = reDispatchTaskService; + } + + @Override + public List reDispatchByWorker( + String accessHost, + Integer accessPort, + List taskIdList, + Long initDelayMills, + Long intervalMills + ) { + FileWorkerDTO fileWorkerDTO = fileWorkerService.getFileWorker(accessHost, accessPort); + if (fileWorkerDTO == null) { + FormattingTuple msg = MessageFormatter.format( + "Fail to find file-worker by accessHost:{} accessPort:{}", accessHost, accessPort + ); + log.warn(msg.getMessage()); + throw new InternalException( + ErrorCode.FILE_WORKER_NOT_FOUND, + new String[]{ + "accessHost:" + accessHost + ",accessPort:" + accessPort, + } + ); + } + Long workerId = fileWorkerDTO.getId(); + log.debug("worker {} apply to reDispatch tasks:{}, initDelayMills={}, intervalMills={}", workerId, taskIdList + , initDelayMills, intervalMills); + // 1.立即下线Worker + int affectedWorkerNum = fileWorkerService.offLine(workerId); + log.info("{} worker state changed to offline", affectedWorkerNum); + // 2.任务延时重调度 + for (String taskId : taskIdList) { + if (getReDispatchThreadsNum() >= MAX_THREAD_NUM_REDISPATCH) { + log.warn("reDispatch thread reach MAX_NUM:{}, do not reDispatch {}", MAX_THREAD_NUM_REDISPATCH, taskId); + continue; + } + Timer timer = new Timer(); + ReDispatchTask reDispatchTask = buildReDispatchTask(taskId, intervalMills); + timer.schedule(reDispatchTask, initDelayMills); + } + return taskIdList; + } + + @Override + public boolean reDispatchByGateway(String fileSourceTaskId, Long initDelayMills, Long intervalMills) { + // 1.尝试通知Worker主动取消该任务 + FileSourceTaskDTO fileSourceTaskDTO = fileSourceTaskService.getFileSourceTaskById(fileSourceTaskId); + if (fileSourceTaskDTO == null) { + log.warn("task not exist, ignore, id={}", fileSourceTaskId); + return false; + } + try { + fileSourceTaskService.recallTasks(Collections.singletonList(fileSourceTaskId)); + } catch (Throwable t) { + log.warn("Fail to recallTask:{}", fileSourceTaskId, t); + } + // 2.重调度 + if (getReDispatchThreadsNum() >= MAX_THREAD_NUM_REDISPATCH) { + log.warn("reDispatch thread reach MAX_NUM:{}, do not reDispatch {}", MAX_THREAD_NUM_REDISPATCH, + fileSourceTaskId); + return false; + } + Timer timer = new Timer(); + ReDispatchTask reDispatchTask = buildReDispatchTask(fileSourceTaskId, intervalMills); + timer.schedule(reDispatchTask, initDelayMills); + return true; + } + + private ReDispatchTask buildReDispatchTask(String fileSourceTaskId, Long intervalMills) { + return new ReDispatchTask( + reDispatchTaskService, + fileSourceTaskId, + intervalMills + ); + } + + @Override + public Integer getReDispatchThreadsNum() { + return ReDispatchTask.getReDispatchThreadsNum(); + } + +} diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/ReDispatchTask.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/ReDispatchTask.java new file mode 100644 index 0000000000..7dedfc13ec --- /dev/null +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/ReDispatchTask.java @@ -0,0 +1,106 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file_gateway.service.dispatch.impl; + +import com.tencent.bk.job.common.util.ThreadUtils; +import com.tencent.bk.job.file_gateway.model.resp.inner.TaskInfoDTO; +import com.tencent.bk.job.file_gateway.service.dispatch.ReDispatchTaskService; +import lombok.extern.slf4j.Slf4j; +import org.slf4j.helpers.MessageFormatter; + +import java.util.HashSet; +import java.util.Set; +import java.util.TimerTask; +import java.util.concurrent.atomic.AtomicInteger; + +@Slf4j +public class ReDispatchTask extends TimerTask { + private static final AtomicInteger reDispatchThreadNum = new AtomicInteger(0); + private static final Set reDispatchingTaskIds = new HashSet<>(); + private final ReDispatchTaskService reDispatchTaskService; + private final String fileSourceTaskId; + private final Long intervalMills; + + ReDispatchTask(ReDispatchTaskService reDispatchTaskService, + String fileSourceTaskId, + Long intervalMills) { + this.reDispatchTaskService = reDispatchTaskService; + this.fileSourceTaskId = fileSourceTaskId; + this.intervalMills = intervalMills; + } + + public static Integer getReDispatchThreadsNum() { + return reDispatchThreadNum.get(); + } + + @Override + public void run() { + synchronized (reDispatchingTaskIds) { + if (reDispatchingTaskIds.contains(fileSourceTaskId)) { + log.info("task {} already in reDispatching, ignore", fileSourceTaskId); + return; + } + reDispatchingTaskIds.add(fileSourceTaskId); + reDispatchThreadNum.incrementAndGet(); + } + reDispatchTaskWithRetry(); + synchronized (reDispatchingTaskIds) { + reDispatchingTaskIds.remove(fileSourceTaskId); + reDispatchThreadNum.decrementAndGet(); + } + } + + /** + * 对文件源任务进行重调度,失败时进行重试 + */ + private void reDispatchTaskWithRetry() { + boolean reDispatchSuccess = false; + int retryCount = 0; + int maxRetryCount = 3; + while (!reDispatchSuccess && retryCount < maxRetryCount) { + try { + TaskInfoDTO taskInfoDTO = reDispatchTaskService.reDispatchFileSourceTask(fileSourceTaskId); + reDispatchSuccess = true; + log.debug("reDispatch result of {}:{}", fileSourceTaskId, taskInfoDTO); + } catch (Exception e) { + retryCount += 1; + String message = MessageFormatter.format( + "Fail to redispatch task {}, wait {}ms to retry {}", + new Object[]{ + fileSourceTaskId, + intervalMills, + retryCount + } + ).getMessage(); + if (retryCount < maxRetryCount) { + log.info(message); + ThreadUtils.sleep(intervalMills); + } else { + log.error(message, e); + } + } + } + } +} diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/ReDispatchTaskServiceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/ReDispatchTaskServiceImpl.java new file mode 100644 index 0000000000..3403f508b6 --- /dev/null +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/dispatch/impl/ReDispatchTaskServiceImpl.java @@ -0,0 +1,142 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file_gateway.service.dispatch.impl; + +import com.tencent.bk.job.common.mysql.JobTransactional; +import com.tencent.bk.job.file_gateway.metrics.MetricsConstants; +import com.tencent.bk.job.file_gateway.model.dto.FileSourceTaskDTO; +import com.tencent.bk.job.file_gateway.model.dto.FileTaskDTO; +import com.tencent.bk.job.file_gateway.model.resp.inner.TaskInfoDTO; +import com.tencent.bk.job.file_gateway.service.FileSourceTaskService; +import com.tencent.bk.job.file_gateway.service.FileTaskService; +import com.tencent.bk.job.file_gateway.service.dispatch.ReDispatchTaskService; +import io.micrometer.core.instrument.MeterRegistry; +import io.micrometer.core.instrument.Tag; +import io.micrometer.core.instrument.Timer; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +@Slf4j +@Service +public class ReDispatchTaskServiceImpl implements ReDispatchTaskService { + + private final FileSourceTaskService fileSourceTaskService; + private final FileTaskService fileTaskService; + private final MeterRegistry meterRegistry; + + @Autowired + public ReDispatchTaskServiceImpl(FileSourceTaskService fileSourceTaskService, + FileTaskService fileTaskService, + MeterRegistry meterRegistry) { + this.fileSourceTaskService = fileSourceTaskService; + this.fileTaskService = fileTaskService; + this.meterRegistry = meterRegistry; + } + + /** + * 对文件源任务进行重调度,过程中开启事务保证数据一致性 + * + * @param fileSourceTaskId 文件源任务ID + * @return 重调度结果 + */ + @Override + @JobTransactional(transactionManager = "jobFileGatewayTransactionManager") + public TaskInfoDTO reDispatchFileSourceTask(String fileSourceTaskId) { + long startTime = System.currentTimeMillis(); + FileSourceTaskDTO fileSourceTaskDTO = fileSourceTaskService.getFileSourceTaskById(fileSourceTaskId); + String reDispatchStatus = null; + try { + TaskInfoDTO taskInfoDTO = doReDispatchFileSourceTask(fileSourceTaskDTO); + reDispatchStatus = MetricsConstants.TAG_VALUE_REDISPATCH_STATUS_SUCCESS; + return taskInfoDTO; + } catch (Exception e) { + reDispatchStatus = MetricsConstants.TAG_VALUE_REDISPATCH_STATUS_ERROR; + throw e; + } finally { + long timeConsumingMills = System.currentTimeMillis() - startTime; + recordReDispatchCost(timeConsumingMills, buildDispatchTags(fileSourceTaskDTO.getAppId(), reDispatchStatus)); + } + } + + private Iterable buildDispatchTags(Long appId, String reDispatchStatus) { + List tagList = new ArrayList<>(); + tagList.add(Tag.of(MetricsConstants.TAG_KEY_MODULE, MetricsConstants.TAG_VALUE_MODULE_FILE_GATEWAY)); + tagList.add(Tag.of(MetricsConstants.TAG_KEY_APP_ID, String.valueOf(appId))); + tagList.add(Tag.of(MetricsConstants.TAG_KEY_DISPATCH_RESULT, reDispatchStatus)); + return tagList; + } + + private TaskInfoDTO doReDispatchFileSourceTask(FileSourceTaskDTO fileSourceTaskDTO) { + String fileSourceTaskId = fileSourceTaskDTO.getId(); + Long oldFileWorkerId = fileSourceTaskDTO.getFileWorkerId(); + List fileTaskDTOList = fileTaskService.listFileTasks(fileSourceTaskId); + List filePathList = + fileTaskDTOList.stream().map(FileTaskDTO::getFilePath).collect(Collectors.toList()); + // 1.删除现有子任务 + int deletedTaskNum = fileTaskService.deleteTasks(fileSourceTaskId); + // 2.删除现有FileSourceTask任务 + int deletedFileSourceTaskNum = fileSourceTaskService.deleteFileSourceTaskById(fileSourceTaskId); + // 3.重新派发任务 + TaskInfoDTO taskInfoDTO = fileSourceTaskService.startFileSourceDownloadTaskWithId( + fileSourceTaskDTO.getCreator(), + fileSourceTaskDTO.getAppId(), + fileSourceTaskDTO.getStepInstanceId(), + fileSourceTaskDTO.getExecuteCount(), + fileSourceTaskDTO.getBatchTaskId(), + fileSourceTaskDTO.getFileSourceId(), + filePathList, + fileSourceTaskId + ); + log.info( + "FileSourceTask(id={}, oldFileWorkerId={}) reDispatched to worker(id={},accessHost={})," + + " [ {} fileTask, {} fileSourceTask] deleted and re-inserted", + fileSourceTaskId, + oldFileWorkerId, + taskInfoDTO.getWorkerId(), + taskInfoDTO.getWorkerAccessHost(), + deletedTaskNum, + deletedFileSourceTaskNum + ); + return taskInfoDTO; + } + + private void recordReDispatchCost(long timeConsumingMillis, Iterable tags) { + Timer.builder(MetricsConstants.NAME_FILE_GATEWAY_REDISPATCH_TIME) + .description("ReDispatch FileSourceTask Cost") + .tags(tags) + .publishPercentileHistogram(true) + .minimumExpectedValue(Duration.ofMillis(10)) + .maximumExpectedValue(Duration.ofSeconds(60L)) + .register(meterRegistry) + .record(timeConsumingMillis, TimeUnit.MILLISECONDS); + } +} diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/BatchTaskServiceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/BatchTaskServiceImpl.java index eaa4166070..15dce4f51f 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/BatchTaskServiceImpl.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/BatchTaskServiceImpl.java @@ -38,6 +38,7 @@ import com.tencent.bk.job.file_gateway.model.resp.inner.TaskInfoDTO; import com.tencent.bk.job.file_gateway.service.BatchTaskService; import com.tencent.bk.job.file_gateway.service.FileSourceTaskService; +import com.tencent.bk.job.file_gateway.service.RetryPolicyFileSourceTaskService; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; @@ -51,20 +52,25 @@ public class BatchTaskServiceImpl implements BatchTaskService { private final FileSourceTaskService fileSourceTaskService; + private final RetryPolicyFileSourceTaskService retryPolicyFileSourceTaskService; private final FileSourceBatchTaskDAO fileSourceBatchTaskDAO; private final FileSourceTaskDAO fileSourceTaskDAO; @Autowired public BatchTaskServiceImpl(FileSourceTaskService fileSourceTaskService, + RetryPolicyFileSourceTaskService retryPolicyFileSourceTaskService, FileSourceBatchTaskDAO fileSourceBatchTaskDAO, FileSourceTaskDAO fileSourceTaskDAO) { this.fileSourceTaskService = fileSourceTaskService; + this.retryPolicyFileSourceTaskService = retryPolicyFileSourceTaskService; this.fileSourceBatchTaskDAO = fileSourceBatchTaskDAO; this.fileSourceTaskDAO = fileSourceTaskDAO; } @Override - public BatchTaskInfoDTO startFileSourceBatchDownloadTask(String username, Long appId, Long stepInstanceId, + public BatchTaskInfoDTO startFileSourceBatchDownloadTask(String username, + Long appId, + Long stepInstanceId, Integer executeCount, List fileSourceTaskList) { BatchTaskInfoDTO batchTaskInfoDTO = new BatchTaskInfoDTO(); @@ -79,9 +85,15 @@ public BatchTaskInfoDTO startFileSourceBatchDownloadTask(String username, Long a batchTaskInfoDTO.setBatchTaskId(batchTaskId); List taskInfoDTOList = new ArrayList<>(); for (FileSourceTaskContent fileSourceTaskContent : fileSourceTaskList) { - TaskInfoDTO taskInfoDTO = fileSourceTaskService.startFileSourceDownloadTask(username, appId, - stepInstanceId, executeCount, batchTaskId, fileSourceTaskContent.getFileSourceId(), - fileSourceTaskContent.getFilePathList()); + TaskInfoDTO taskInfoDTO = retryPolicyFileSourceTaskService.startFileSourceDownloadTask( + username, + appId, + stepInstanceId, + executeCount, + batchTaskId, + fileSourceTaskContent.getFileSourceId(), + fileSourceTaskContent.getFilePathList() + ); taskInfoDTOList.add(taskInfoDTO); } batchTaskInfoDTO.setTaskInfoList(taskInfoDTOList); @@ -123,10 +135,4 @@ public BatchTaskStatusDTO getBatchTaskStatusAndLogs(String batchTaskId, Long log batchTaskStatusDTO.setFileSourceTaskStatusInfoList(fileSourceTaskStatusInfoList); return batchTaskStatusDTO; } - - @Override - public Integer clearBatchTaskFiles(List batchTaskIdList) { - List fileSourceTaskIdList = getFileSourceTaskIdListByBatch(batchTaskIdList); - return fileSourceTaskService.clearTaskFiles(fileSourceTaskIdList); - } } diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileServiceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileServiceImpl.java index 753aec74c3..aefea20e56 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileServiceImpl.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileServiceImpl.java @@ -37,7 +37,7 @@ import com.tencent.bk.job.file_gateway.model.req.common.ExecuteActionReq; import com.tencent.bk.job.file_gateway.model.resp.common.FileNodesDTO; import com.tencent.bk.job.file_gateway.model.resp.common.FileNodesVO; -import com.tencent.bk.job.file_gateway.service.DispatchService; +import com.tencent.bk.job.file_gateway.service.dispatch.DispatchService; import com.tencent.bk.job.file_gateway.service.FileService; import com.tencent.bk.job.file_gateway.service.FileSourceService; import com.tencent.bk.job.file_gateway.service.remote.FileSourceReqGenService; diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileSourceTaskServiceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileSourceTaskServiceImpl.java index 16d7ed72b3..035fd3c446 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileSourceTaskServiceImpl.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/FileSourceTaskServiceImpl.java @@ -30,7 +30,6 @@ import com.tencent.bk.job.common.exception.ServiceException; import com.tencent.bk.job.common.model.Response; import com.tencent.bk.job.common.model.http.HttpReq; -import com.tencent.bk.job.common.mysql.JobTransactional; import com.tencent.bk.job.common.util.http.JobHttpClient; import com.tencent.bk.job.common.util.json.JsonUtils; import com.tencent.bk.job.file_gateway.consts.TaskCommandEnum; @@ -47,9 +46,9 @@ import com.tencent.bk.job.file_gateway.model.resp.inner.FileSourceTaskStatusDTO; import com.tencent.bk.job.file_gateway.model.resp.inner.TaskInfoDTO; import com.tencent.bk.job.file_gateway.model.resp.inner.ThirdFileSourceTaskLogDTO; -import com.tencent.bk.job.file_gateway.service.DispatchService; import com.tencent.bk.job.file_gateway.service.FileSourceTaskService; import com.tencent.bk.job.file_gateway.service.FileSourceTaskUpdateService; +import com.tencent.bk.job.file_gateway.service.dispatch.DispatchService; import com.tencent.bk.job.file_gateway.service.remote.FileSourceTaskReqGenService; import lombok.extern.slf4j.Slf4j; import org.slf4j.helpers.MessageFormatter; @@ -103,20 +102,43 @@ public FileSourceTaskServiceImpl(FileSourceTaskUpdateService fileSourceTaskUpdat } @Override - public TaskInfoDTO startFileSourceDownloadTask(String username, Long appId, Long stepInstanceId, - Integer executeCount, String batchTaskId, Integer fileSourceId, + public TaskInfoDTO startFileSourceDownloadTask(String username, + Long appId, + Long stepInstanceId, + Integer executeCount, + String batchTaskId, + Integer fileSourceId, List filePathList) { - return startFileSourceDownloadTaskWithId(username, appId, stepInstanceId, executeCount, batchTaskId, - fileSourceId, filePathList, null); + return startFileSourceDownloadTaskWithId( + username, + appId, + stepInstanceId, + executeCount, + batchTaskId, + fileSourceId, + filePathList, + null + ); } - @JobTransactional(transactionManager = "jobFileGatewayTransactionManager") - public TaskInfoDTO startFileSourceDownloadTaskWithId(String username, Long appId, Long stepInstanceId, - Integer executeCount, String batchTaskId, - Integer fileSourceId, List filePathList, + public TaskInfoDTO startFileSourceDownloadTaskWithId(String username, + Long appId, + Long stepInstanceId, + Integer executeCount, + String batchTaskId, + Integer fileSourceId, + List filePathList, String fileSourceTaskId) { - log.info("Input=({},{},{},{},{},{},{})", username, appId, stepInstanceId, executeCount, batchTaskId, - fileSourceId, filePathList); + log.info( + "startFileSourceDownloadTaskWithId, input=({},{},{},{},{},{},{})", + username, + appId, + stepInstanceId, + executeCount, + batchTaskId, + fileSourceId, + filePathList + ); FileSourceDTO fileSourceDTO = fileSourceDAO.getFileSourceById(fileSourceId); if (fileSourceDTO == null) { throw new RuntimeException("FileSource not exist, fileSourceId=" + fileSourceId.toString()); @@ -130,34 +152,19 @@ public TaskInfoDTO startFileSourceDownloadTaskWithId(String username, Long appId "stepInstanceId=%d,fileSourceId=%d,filePathList=%s", appId, stepInstanceId, fileSourceId, filePathList.toString())); } - FileSourceTaskDTO fileSourceTaskDTO = new FileSourceTaskDTO(); - fileSourceTaskDTO.setId(fileSourceTaskId); - fileSourceTaskDTO.setBatchTaskId(batchTaskId); - fileSourceTaskDTO.setAppId(appId); - fileSourceTaskDTO.setCreator(username); - fileSourceTaskDTO.setCreateTime(System.currentTimeMillis()); - fileSourceTaskDTO.setStepInstanceId(stepInstanceId); - fileSourceTaskDTO.setExecuteCount(executeCount); - fileSourceTaskDTO.setFileSourceId(fileSourceId); - fileSourceTaskDTO.setFileWorkerId(fileWorkerDTO.getId()); - fileSourceTaskDTO.setStatus(TaskStatusEnum.INIT.getStatus()); - List fileTaskDTOList = new ArrayList<>(); - for (String filePath : filePathList) { - FileTaskDTO fileTaskDTO = new FileTaskDTO(); - fileTaskDTO.setId(null); - fileTaskDTO.setFileSourceTaskId(null); - fileTaskDTO.setCreateTime(System.currentTimeMillis()); - fileTaskDTO.setProgress(0); - fileTaskDTO.setFilePath(filePath); - fileTaskDTO.setDownloadPath(null); - fileTaskDTO.setStatus(TaskStatusEnum.INIT.getStatus()); - fileTaskDTO.setErrorMsg(""); - fileTaskDTOList.add(fileTaskDTO); - } - fileSourceTaskDTO.setFileTaskList(fileTaskDTOList); + FileSourceTaskDTO fileSourceTaskDTO = saveFileSourceTask( + username, + appId, + stepInstanceId, + executeCount, + batchTaskId, + fileSourceId, + filePathList, + fileSourceTaskId, + fileWorkerDTO.getId() + ); + fileSourceTaskId = fileSourceTaskDTO.getId(); try { - fileSourceTaskId = fileSourceTaskDAO.insertFileSourceTask(fileSourceTaskDTO); - fileSourceTaskDTO.setId(fileSourceTaskId); // 分发文件任务 HttpReq req = fileSourceTaskReqGenService.genDownloadFilesReq(appId, fileWorkerDTO, fileSourceDTO, fileSourceTaskDTO); @@ -168,12 +175,8 @@ public TaskInfoDTO startFileSourceDownloadTaskWithId(String username, Long appId JsonUtils.toJson(fileSourceTaskDTO) ).getMessage(); log.error(msg, e); - // 更新任务状态为启动失败 - fileSourceTaskDTO.setStatus(TaskStatusEnum.DISPATCH_FAILED.getStatus()); - int affectedCount = fileSourceTaskDAO.updateFileSourceTask(fileSourceTaskDTO); - if (affectedCount != 1) { - log.error("Fail to update status of FileSourceTask={}", JsonUtils.toJson(fileSourceTaskDTO)); - } + // 清理DB中的任务数据便于外层重试 + clearSavedFileSourceTask(fileSourceTaskId); throw new InternalException( e, ErrorCode.FAIL_TO_REQUEST_FILE_WORKER_START_FILE_SOURCE_DOWNLOAD_TASK, @@ -184,12 +187,66 @@ public TaskInfoDTO startFileSourceDownloadTaskWithId(String username, Long appId fileSourceTaskId, fileSourceDTO.getAlias(), fileSourceDTO.getPublicFlag(), + fileWorkerDTO.getId(), + fileWorkerDTO.getAccessHost(), fileWorkerDTO.getCloudAreaId(), fileWorkerDTO.getInnerIpProtocol(), fileWorkerDTO.getInnerIp() ); } + private FileSourceTaskDTO saveFileSourceTask(String username, + Long appId, + Long stepInstanceId, + Integer executeCount, + String batchTaskId, + Integer fileSourceId, + List filePathList, + String fileSourceTaskId, + Long fileWorkerId) { + FileSourceTaskDTO fileSourceTaskDTO = new FileSourceTaskDTO(); + fileSourceTaskDTO.setId(fileSourceTaskId); + fileSourceTaskDTO.setBatchTaskId(batchTaskId); + fileSourceTaskDTO.setAppId(appId); + fileSourceTaskDTO.setCreator(username); + fileSourceTaskDTO.setCreateTime(System.currentTimeMillis()); + fileSourceTaskDTO.setStepInstanceId(stepInstanceId); + fileSourceTaskDTO.setExecuteCount(executeCount); + fileSourceTaskDTO.setFileSourceId(fileSourceId); + fileSourceTaskDTO.setFileWorkerId(fileWorkerId); + fileSourceTaskDTO.setStatus(TaskStatusEnum.INIT.getStatus()); + List fileTaskDTOList = new ArrayList<>(); + for (String filePath : filePathList) { + FileTaskDTO fileTaskDTO = new FileTaskDTO(); + fileTaskDTO.setId(null); + fileTaskDTO.setFileSourceTaskId(null); + fileTaskDTO.setCreateTime(System.currentTimeMillis()); + fileTaskDTO.setProgress(0); + fileTaskDTO.setFilePath(filePath); + fileTaskDTO.setDownloadPath(null); + fileTaskDTO.setStatus(TaskStatusEnum.INIT.getStatus()); + fileTaskDTO.setErrorMsg(""); + fileTaskDTOList.add(fileTaskDTO); + } + fileSourceTaskDTO.setFileTaskList(fileTaskDTOList); + fileSourceTaskId = fileSourceTaskDAO.insertFileSourceTask(fileSourceTaskDTO); + fileSourceTaskDTO.setId(fileSourceTaskId); + return fileSourceTaskDTO; + } + + private void clearSavedFileSourceTask(String fileSourceTaskId) { + // 1.删除子任务 + int deletedTaskNum = fileTaskDAO.deleteFileTaskByFileSourceTaskId(fileSourceTaskId); + // 2.删除FileSourceTask任务 + int deletedFileSourceTaskNum = deleteFileSourceTaskById(fileSourceTaskId); + log.info( + "{} fileTask {} fileSourceTask deleted, fileSourceTaskId={}", + deletedTaskNum, + deletedFileSourceTaskNum, + fileSourceTaskId + ); + } + @Override public String updateFileSourceTask(FileTaskProgressDTO fileTaskProgressDTO) { String fileSourceTaskId = fileTaskProgressDTO.getFileSourceTaskId(); diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/ReDispatchServiceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/ReDispatchServiceImpl.java deleted file mode 100644 index 9d21639807..0000000000 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/ReDispatchServiceImpl.java +++ /dev/null @@ -1,245 +0,0 @@ -/* - * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. - * - * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. - * - * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. - * - * License for BK-JOB蓝鲸智云作业平台: - * -------------------------------------------------------------------- - * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated - * documentation files (the "Software"), to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and - * to permit persons to whom the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all copies or substantial portions of - * the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO - * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF - * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -package com.tencent.bk.job.file_gateway.service.impl; - -import com.tencent.bk.job.common.constant.ErrorCode; -import com.tencent.bk.job.common.exception.InternalException; -import com.tencent.bk.job.file_gateway.model.dto.FileSourceDTO; -import com.tencent.bk.job.file_gateway.model.dto.FileSourceTaskDTO; -import com.tencent.bk.job.file_gateway.model.dto.FileTaskDTO; -import com.tencent.bk.job.file_gateway.model.dto.FileWorkerDTO; -import com.tencent.bk.job.file_gateway.model.resp.inner.TaskInfoDTO; -import com.tencent.bk.job.file_gateway.service.DispatchService; -import com.tencent.bk.job.file_gateway.service.FileSourceService; -import com.tencent.bk.job.file_gateway.service.FileSourceTaskService; -import com.tencent.bk.job.file_gateway.service.FileTaskService; -import com.tencent.bk.job.file_gateway.service.FileWorkerService; -import com.tencent.bk.job.file_gateway.service.ReDispatchService; -import lombok.extern.slf4j.Slf4j; -import org.slf4j.helpers.FormattingTuple; -import org.slf4j.helpers.MessageFormatter; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.stereotype.Service; - -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.Timer; -import java.util.TimerTask; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Collectors; - -@Slf4j -@Service -public class ReDispatchServiceImpl implements ReDispatchService { - - private final DispatchService dispatchService; - private final FileWorkerService fileWorkerService; - private final FileSourceService fileSourceService; - private final FileSourceTaskService fileSourceTaskService; - private final FileTaskService fileTaskService; - // 最多使用50线程进行重调度 - private final int MAX_THREAD_NUM_REDISPATCH = 50; - private final AtomicInteger reDispatchThreadNum = new AtomicInteger(0); - private final Set reDispatchingTaskIds = new HashSet<>(); - - @Autowired - public ReDispatchServiceImpl( - DispatchService dispatchService, - FileWorkerService fileWorkerService, - FileSourceService fileSourceService, - FileSourceTaskService fileSourceTaskService, - FileTaskService fileTaskService - ) { - this.dispatchService = dispatchService; - this.fileWorkerService = fileWorkerService; - this.fileSourceService = fileSourceService; - this.fileSourceTaskService = fileSourceTaskService; - this.fileTaskService = fileTaskService; - } - - @Override - public List reDispatchByWorker( - String accessHost, - Integer accessPort, - List taskIdList, - Long initDelayMills, - Long intervalMills - ) { - FileWorkerDTO fileWorkerDTO = fileWorkerService.getFileWorker(accessHost, accessPort); - if (fileWorkerDTO == null) { - FormattingTuple msg = MessageFormatter.format( - "Fail to find file-worker by accessHost:{} accessPort:{}", accessHost, accessPort - ); - log.warn(msg.getMessage()); - throw new InternalException( - ErrorCode.FILE_WORKER_NOT_FOUND, - new String[]{ - "accessHost:" + accessHost + ",accessPort:" + accessPort, - } - ); - } - Long workerId = fileWorkerDTO.getId(); - log.debug("worker {} apply to reDispatch tasks:{}, initDelayMills={}, intervalMills={}", workerId, taskIdList - , initDelayMills, intervalMills); - // 1.立即下线Worker - int affectedWorkerNum = fileWorkerService.offLine(workerId); - log.info("{} worker state changed to offline", affectedWorkerNum); - // 2.任务延时重调度 - for (String taskId : taskIdList) { - if (reDispatchThreadNum.get() >= MAX_THREAD_NUM_REDISPATCH) { - log.warn("reDispatch thread reach MAX_NUM:{}, do not reDispatch {}", MAX_THREAD_NUM_REDISPATCH, taskId); - } else { - Timer timer = new Timer(); - timer.schedule(new ReDispatchTask(taskId, intervalMills), initDelayMills); - } - } - return taskIdList; - } - - @Override - public boolean reDispatchByGateway(String fileSourceTaskId, Long initDelayMills, Long intervalMills) { - // 1.尝试通知Worker主动取消该任务 - FileSourceTaskDTO fileSourceTaskDTO = fileSourceTaskService.getFileSourceTaskById(fileSourceTaskId); - if (fileSourceTaskDTO == null) { - log.warn("task not exist, ignore, id={}", fileSourceTaskId); - return false; - } - try { - fileSourceTaskService.recallTasks(Collections.singletonList(fileSourceTaskId)); - } catch (Throwable t) { - log.warn("Fail to recallTask:{}", fileSourceTaskId, t); - } - // 2.重调度 - if (reDispatchThreadNum.get() >= MAX_THREAD_NUM_REDISPATCH) { - log.warn("reDispatch thread reach MAX_NUM:{}, do not reDispatch {}", MAX_THREAD_NUM_REDISPATCH, - fileSourceTaskId); - return false; - } else { - Timer timer = new Timer(); - timer.schedule(new ReDispatchTask(fileSourceTaskId, intervalMills), initDelayMills); - return true; - } - } - - @Override - public Integer getReDispatchThreadsNum(String username) { - return reDispatchThreadNum.get(); - } - - class ReDispatchTask extends TimerTask { - private final String fileSourceTaskId; - private final Long intervalMills; - - ReDispatchTask(String fileSourceTaskId, Long intervalMills) { - this.fileSourceTaskId = fileSourceTaskId; - this.intervalMills = intervalMills; - } - - @Override - public void run() { - synchronized (reDispatchingTaskIds) { - if (reDispatchingTaskIds.contains(fileSourceTaskId)) { - log.info("task {} already in reDispatching, ignore", fileSourceTaskId); - return; - } - reDispatchingTaskIds.add(fileSourceTaskId); - } - boolean reDispatchSuccess = false; - int retryCount = 0; - try { - reDispatchThreadNum.incrementAndGet(); - log.debug("taskId={}", fileSourceTaskId); - FileSourceTaskDTO fileSourceTaskDTO = fileSourceTaskService.getFileSourceTaskById(fileSourceTaskId); - log.debug("fileSourceTaskDTO={}", fileSourceTaskDTO); - if (fileSourceTaskDTO == null) { - log.warn("Cannot find fileSourceTaskDTO by id {}", fileSourceTaskId); - return; - } - List fileTaskDTOList = fileTaskService.listFileTasks(fileSourceTaskId); - log.debug("fileTaskDTOList={}", fileTaskDTOList); - List filePathList = - fileTaskDTOList.stream().map(FileTaskDTO::getFilePath).collect(Collectors.toList()); - - FileSourceDTO fileSourceDTO = fileSourceService.getFileSourceById(fileSourceTaskDTO.getFileSourceId()); - while (!reDispatchSuccess && retryCount < 100) { - // 1.删除现有子任务 - log.debug("delete fileTasks of fileSourceTask {}", fileSourceTaskId); - fileTaskService.deleteTasks(fileSourceTaskId); - // 2.删除现有FileSourceTask任务 - fileSourceTaskService.deleteFileSourceTaskById(fileSourceTaskId); - log.debug("delete fileSourceTask {}", fileSourceTaskId); - FileWorkerDTO fileWorkerDTO = dispatchService.findBestFileWorker(fileSourceDTO, "ReDispatch"); - log.debug("found bestWorker:{}", fileSourceDTO); - if (fileWorkerDTO != null) { - // 3.重新派发任务 - try { - TaskInfoDTO taskInfoDTO = - fileSourceTaskService.startFileSourceDownloadTaskWithId( - fileSourceTaskDTO.getCreator(), - fileSourceTaskDTO.getAppId(), - fileSourceTaskDTO.getStepInstanceId(), - fileSourceTaskDTO.getExecuteCount(), - fileSourceTaskDTO.getBatchTaskId(), - fileSourceTaskDTO.getFileSourceId(), - filePathList, - fileSourceTaskId - ); - reDispatchSuccess = true; - log.info("reDispatch result of {}:{}", fileSourceTaskId, taskInfoDTO); - } catch (Exception e) { - retryCount += 1; - log.info("Fail to redispatch task {}, wait {}ms to retry {}", fileSourceTaskId, - intervalMills, retryCount); - try { - Thread.sleep(intervalMills); - } catch (InterruptedException interruptedException) { - log.error("redispatch wait interrupted", e); - } - } - } else { - // 3.暂时没有合适的FileWorker,延时等待 - try { - retryCount += 1; - log.info("No suitable worker to redispatch task {}, wait {}ms to retry {}", - fileSourceTaskId, intervalMills, retryCount); - Thread.sleep(intervalMills); - } catch (InterruptedException e) { - log.error("redispatch wait interrupted", e); - } - } - } - } catch (Throwable t) { - log.error("ReDispatchTask fail", t); - } finally { - synchronized (reDispatchingTaskIds) { - reDispatchingTaskIds.remove(fileSourceTaskId); - } - reDispatchThreadNum.decrementAndGet(); - } - } - } -} diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/RetryPolicyFileSourceTaskServiceImpl.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/RetryPolicyFileSourceTaskServiceImpl.java new file mode 100644 index 0000000000..b052820239 --- /dev/null +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/impl/RetryPolicyFileSourceTaskServiceImpl.java @@ -0,0 +1,95 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file_gateway.service.impl; + +import com.tencent.bk.job.file_gateway.model.resp.inner.TaskInfoDTO; +import com.tencent.bk.job.file_gateway.service.FileSourceTaskService; +import com.tencent.bk.job.file_gateway.service.RetryPolicyFileSourceTaskService; +import com.tencent.bk.job.file_gateway.service.context.impl.FileSourceTaskRetryContext; +import com.tencent.bk.job.file_gateway.service.retry.FileSourceTaskRetryPolicy; +import com.tencent.bk.job.file_gateway.service.retry.impl.ExceptionRetryPolicy; +import lombok.extern.slf4j.Slf4j; +import org.slf4j.helpers.MessageFormatter; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import java.util.List; + +@Slf4j +@Service +public class RetryPolicyFileSourceTaskServiceImpl implements RetryPolicyFileSourceTaskService { + + private final FileSourceTaskService fileSourceTaskService; + private final FileSourceTaskRetryPolicy retryPolicy = new ExceptionRetryPolicy(3, 5000); + + @Autowired + public RetryPolicyFileSourceTaskServiceImpl(FileSourceTaskService fileSourceTaskService) { + this.fileSourceTaskService = fileSourceTaskService; + } + + @Override + public TaskInfoDTO startFileSourceDownloadTask(String username, + Long appId, + Long stepInstanceId, + Integer executeCount, + String batchTaskId, + Integer fileSourceId, + List filePathList) { + int retryCount = 0; + boolean shouldRetry; + do { + try { + return fileSourceTaskService.startFileSourceDownloadTask( + username, + appId, + stepInstanceId, + executeCount, + batchTaskId, + fileSourceId, + filePathList + ); + } catch (Exception e) { + retryCount += 1; + FileSourceTaskRetryContext retryContext = new FileSourceTaskRetryContext(e); + shouldRetry = retryPolicy.shouldRetry(retryContext, retryCount); + if (shouldRetry) { + String msg = MessageFormatter.arrayFormat( + "Fail to startFileSourceDownloadTask, stepInstanceId={}, " + + "executeCount={}, batchTaskId={}, retry {}", + new Object[]{ + stepInstanceId, + executeCount, + batchTaskId, + retryCount + } + ).getMessage(); + log.info(msg, e); + } else { + throw e; + } + } + } while (true); + } +} diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/retry/FileSourceTaskRetryPolicy.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/retry/FileSourceTaskRetryPolicy.java new file mode 100644 index 0000000000..df37fcb9f5 --- /dev/null +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/retry/FileSourceTaskRetryPolicy.java @@ -0,0 +1,34 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file_gateway.service.retry; + +import com.tencent.bk.job.file_gateway.service.context.impl.FileSourceTaskRetryContext; + +/** + * 第三方文件源任务重试策略接口 + */ +public interface FileSourceTaskRetryPolicy { + boolean shouldRetry(FileSourceTaskRetryContext context, int retryCount); +} diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/retry/impl/ExceptionRetryPolicy.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/retry/impl/ExceptionRetryPolicy.java new file mode 100644 index 0000000000..fe5d560230 --- /dev/null +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/service/retry/impl/ExceptionRetryPolicy.java @@ -0,0 +1,94 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file_gateway.service.retry.impl; + +import com.tencent.bk.job.common.exception.InternalException; +import com.tencent.bk.job.common.util.ThreadUtils; +import com.tencent.bk.job.file_gateway.service.context.impl.FileSourceTaskRetryContext; +import com.tencent.bk.job.file_gateway.service.retry.FileSourceTaskRetryPolicy; +import org.springframework.web.client.ResourceAccessException; + +import java.net.SocketTimeoutException; +import java.net.UnknownHostException; + +/** + * 针对由于某些异常导致的任务启动失败重试策略 + * 当前支持的异常:UnknownHostException、SocketTimeoutException: connect timed out + */ +public class ExceptionRetryPolicy implements FileSourceTaskRetryPolicy { + + /** + * 最大重试次数 + */ + private final int maxRetryCount; + /** + * 重试前应当休眠的毫秒数 + */ + private final int sleepMillsBeforeRetryCount; + + public ExceptionRetryPolicy(int maxRetryCount, int sleepMillsBeforeRetryCount) { + this.maxRetryCount = maxRetryCount; + this.sleepMillsBeforeRetryCount = sleepMillsBeforeRetryCount; + } + + @Override + public boolean shouldRetry(FileSourceTaskRetryContext context, int retryCount) { + // 超出重试次数后不再重试 + if (retryCount > maxRetryCount) { + return false; + } + // 各层次异常检查 + Exception exception = context.getException(); + if (!(exception instanceof InternalException)) { + return false; + } + Throwable cause = exception.getCause(); + if (!(cause instanceof ResourceAccessException)) { + return false; + } + Throwable innerCause = cause.getCause(); + if (isTargetThrowable(innerCause)) { + ThreadUtils.sleep(sleepMillsBeforeRetryCount); + return true; + } + return false; + } + + private boolean isTargetThrowable(Throwable t) { + return isUnknownHostException(t) || isConnectTimeoutException(t); + } + + private boolean isUnknownHostException(Throwable t) { + return t instanceof UnknownHostException; + } + + private boolean isConnectTimeoutException(Throwable t) { + if (!(t instanceof SocketTimeoutException)) { + return false; + } + String message = t.getMessage(); + return message != null && message.equalsIgnoreCase("connect timed out"); + } +} diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/task/dispatch/ReDispatchTimeoutTask.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/task/dispatch/ReDispatchTimeoutTask.java index 30844bf584..9dff84dc7b 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/task/dispatch/ReDispatchTimeoutTask.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/task/dispatch/ReDispatchTimeoutTask.java @@ -31,7 +31,7 @@ import com.tencent.bk.job.common.util.ip.IpUtils; import com.tencent.bk.job.file_gateway.consts.TaskStatusEnum; import com.tencent.bk.job.file_gateway.dao.filesource.FileTaskDAO; -import com.tencent.bk.job.file_gateway.service.ReDispatchService; +import com.tencent.bk.job.file_gateway.service.dispatch.ReDispatchService; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; diff --git a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/task/filesource/FileSourceStatusUpdateTask.java b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/task/filesource/FileSourceStatusUpdateTask.java index 0c3a433d26..5afb9acb2e 100644 --- a/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/task/filesource/FileSourceStatusUpdateTask.java +++ b/src/backend/job-file-gateway/service-job-file-gateway/src/main/java/com/tencent/bk/job/file_gateway/task/filesource/FileSourceStatusUpdateTask.java @@ -27,7 +27,7 @@ import com.tencent.bk.job.file_gateway.consts.FileSourceStatusEnum; import com.tencent.bk.job.file_gateway.model.dto.FileSourceDTO; import com.tencent.bk.job.file_gateway.model.dto.FileWorkerDTO; -import com.tencent.bk.job.file_gateway.service.DispatchService; +import com.tencent.bk.job.file_gateway.service.dispatch.DispatchService; import com.tencent.bk.job.file_gateway.service.FileService; import com.tencent.bk.job.file_gateway.service.FileSourceService; import lombok.extern.slf4j.Slf4j; diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/config/ApplicationReadyListener.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/config/ApplicationReadyListener.java index 0e66134715..e20a171751 100644 --- a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/config/ApplicationReadyListener.java +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/config/ApplicationReadyListener.java @@ -24,7 +24,8 @@ package com.tencent.bk.job.file.worker.config; -import com.tencent.bk.job.file.worker.task.heartbeat.HeartBeatTask; +import com.tencent.bk.job.file.worker.state.event.WorkerEvent; +import com.tencent.bk.job.file.worker.state.event.WorkerEventService; import lombok.extern.slf4j.Slf4j; import org.springframework.boot.context.event.ApplicationReadyEvent; import org.springframework.context.ApplicationListener; @@ -36,12 +37,12 @@ public class ApplicationReadyListener implements ApplicationListener { private final WorkerConfig workerConfig; - private final HeartBeatTask heartBeatTask; + private final WorkerEventService workerEventService; public ApplicationReadyListener(WorkerConfig workerConfig, - HeartBeatTask heartBeatTask) { + WorkerEventService workerEventService) { this.workerConfig = workerConfig; - this.heartBeatTask = heartBeatTask; + this.workerEventService = workerEventService; } @SuppressWarnings("NullableProblems") @@ -56,7 +57,7 @@ public void onApplicationEvent(ApplicationReadyEvent event) { log.info("created JobFileWorker workspace:" + wsDirFile.getAbsolutePath()); } } - // 2.启动后立即上报一次心跳 - new Thread(heartBeatTask::run).start(); + // 2.启动后等待自身可被外界访问 + workerEventService.commitWorkerEvent(WorkerEvent.waitAccessReady()); } } diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/config/ApplicationReadyListenerConfig.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/config/ApplicationReadyListenerConfig.java index 3a6865e79d..7d0e3f4053 100644 --- a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/config/ApplicationReadyListenerConfig.java +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/config/ApplicationReadyListenerConfig.java @@ -24,17 +24,12 @@ package com.tencent.bk.job.file.worker.config; -import com.tencent.bk.job.file.worker.task.heartbeat.HeartBeatTask; +import com.tencent.bk.job.file.worker.state.event.WorkerEventService; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -/** - * @Description - * @Date 2020/2/24 - * @Version 1.0 - */ @Slf4j @Configuration @@ -42,9 +37,9 @@ public class ApplicationReadyListenerConfig { @Bean public ApplicationReadyListener applicationReadyListener(@Autowired WorkerConfig workerConfig, - @Autowired HeartBeatTask heartBeatTask) { + @Autowired WorkerEventService workerEventService) { log.info("applicationReadyListener inited"); - return new ApplicationReadyListener(workerConfig, heartBeatTask); + return new ApplicationReadyListener(workerConfig, workerEventService); } } diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/config/GracefulShutdown.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/config/GracefulShutdown.java index 0346378107..42acf4ee16 100644 --- a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/config/GracefulShutdown.java +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/config/GracefulShutdown.java @@ -24,7 +24,9 @@ package com.tencent.bk.job.file.worker.config; +import com.tencent.bk.job.common.util.ThreadUtils; import com.tencent.bk.job.file.worker.service.OpService; +import com.tencent.bk.job.file.worker.state.WorkerStateMachine; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; @@ -39,13 +41,15 @@ public class GracefulShutdown implements ApplicationListener { private final OpService opService; + private final WorkerStateMachine workerStateMachine; @Value("${app.shutdownTimeout:30}") int shutdownTimeout = 30; @Autowired - public GracefulShutdown(OpService opService) { + public GracefulShutdown(OpService opService, WorkerStateMachine workerStateMachine) { this.opService = opService; + this.workerStateMachine = workerStateMachine; } @Override @@ -54,6 +58,16 @@ public void onApplicationEvent(ContextClosedEvent event) { List runningTaskIdList = opService.offLine(); log.info("worker apply to offLine, {} tasks to be reDispatched are {}", runningTaskIdList.size(), runningTaskIdList); + long waitStart = System.currentTimeMillis(); + long maxWaitMills = 5000; + long waitMills; + do { + // 1.等待Worker主动下线完成 + ThreadUtils.sleep(100); + waitMills = System.currentTimeMillis() - waitStart; + } while (!workerStateMachine.isWorkerOffLineIncludeFail() && waitMills < maxWaitMills); + // 2.等待File-Gateway内存中存量已调度请求完成 + ThreadUtils.sleep(3000); + log.info("Worker offLine done"); } - } diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/service/OpService.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/service/OpService.java index deb88f83c0..51b27a45de 100644 --- a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/service/OpService.java +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/service/OpService.java @@ -32,6 +32,8 @@ import com.tencent.bk.job.common.util.http.HttpRequest; import com.tencent.bk.job.common.util.json.JsonUtils; import com.tencent.bk.job.file.worker.config.WorkerConfig; +import com.tencent.bk.job.file.worker.state.event.WorkerEvent; +import com.tencent.bk.job.file.worker.state.event.WorkerEventService; import com.tencent.bk.job.file.worker.task.heartbeat.HeartBeatTask; import com.tencent.bk.job.file_gateway.consts.TaskCommandEnum; import com.tencent.bk.job.file_gateway.model.req.inner.OffLineAndReDispatchReq; @@ -53,22 +55,31 @@ public class OpService { private final GatewayInfoService gatewayInfoService; private final EnvironmentService environmentService; private final TaskReporter taskReporter; + private final WorkerEventService workerEventService; @Autowired - public OpService(WorkerConfig workerConfig, FileTaskService fileTaskService, - GatewayInfoService gatewayInfoService, EnvironmentService environmentService, - TaskReporter taskReporter) { + public OpService(WorkerConfig workerConfig, + FileTaskService fileTaskService, + GatewayInfoService gatewayInfoService, + EnvironmentService environmentService, + TaskReporter taskReporter, + WorkerEventService workerEventService) { this.workerConfig = workerConfig; this.fileTaskService = fileTaskService; this.gatewayInfoService = gatewayInfoService; this.environmentService = environmentService; this.taskReporter = taskReporter; + this.workerEventService = workerEventService; } public List offLine() { List runningTaskIdList = fileTaskService.getAllTaskIdList(); - // 停止心跳 - HeartBeatTask.stopHeartBeat(); + workerEventService.commitWorkerEvent(WorkerEvent.offLine()); + return runningTaskIdList; + } + + public List doOffLine() { + List runningTaskIdList = fileTaskService.getAllTaskIdList(); // 调网关接口下线自己 String url = gatewayInfoService.getWorkerOffLineUrl(); OffLineAndReDispatchReq offLineReq = new OffLineAndReDispatchReq(); @@ -84,10 +95,10 @@ public List offLine() { String respStr; try { respStr = httpHelper.requestForSuccessResp( - HttpRequest.builder(HttpMethodEnum.POST, url) - .setStringEntity(req.getBody()) - .setHeaders(req.getHeaders()) - .build()) + HttpRequest.builder(HttpMethodEnum.POST, url) + .setStringEntity(req.getBody()) + .setHeaders(req.getHeaders()) + .build()) .getEntity(); log.info(String.format("respStr=%s", respStr)); // 停止任务 diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/WorkerStateEnum.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/WorkerStateEnum.java new file mode 100644 index 0000000000..b2657e02da --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/WorkerStateEnum.java @@ -0,0 +1,71 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonValue; + +/** + * File-Worker状态枚举值 + */ +public enum WorkerStateEnum { + STARTING(1, "启动中"), + WAIT_ACCESS_READY(2, "等待自身可被外界访问"), + HEART_BEATING(3, "心跳中"), + HEART_BEAT_WAIT(4, "等待下一次心跳中"), + RUNNING(5, "运行中"), + OFFLINE_ING(6, "下线中"), + OFFLINE_FAILED(7, "下线失败"), + OFFLINE(8, "已下线"); + + /** + * 状态值 + */ + @JsonValue + private final int state; + /** + * 状态描述 + */ + private final String description; + + WorkerStateEnum(int state, String description) { + this.state = state; + this.description = description; + } + + @JsonCreator(mode = JsonCreator.Mode.DELEGATING) + public static WorkerStateEnum valOf(int state) { + for (WorkerStateEnum workerState : values()) { + if (workerState.state == state) { + return workerState; + } + } + return null; + } + + public int getValue() { + return state; + } +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/WorkerStateMachine.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/WorkerStateMachine.java new file mode 100644 index 0000000000..306d1ee654 --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/WorkerStateMachine.java @@ -0,0 +1,87 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state; + +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.cloud.sleuth.Tracer; +import org.springframework.stereotype.Component; + +/** + * File-Worker状态机,管理Worker状态流转 + */ +@Getter +@Slf4j +@Component +public class WorkerStateMachine { + + private WorkerStateEnum workerState = WorkerStateEnum.STARTING; + + @Autowired + public WorkerStateMachine(Tracer tracer) { + } + + public void setWorkerState(WorkerStateEnum workerState) { + log.info("state change: {} -> {}", this.workerState.name(), workerState.name()); + this.workerState = workerState; + } + + public void waitAccessReady() { + setWorkerState(WorkerStateEnum.WAIT_ACCESS_READY); + } + + public void accessReady() { + setWorkerState(WorkerStateEnum.HEART_BEAT_WAIT); + } + + public void heartBeatStart() { + setWorkerState(WorkerStateEnum.HEART_BEATING); + } + + public void heartBeatSuccess() { + setWorkerState(WorkerStateEnum.RUNNING); + } + + public void heartBeatFailed() { + setWorkerState(WorkerStateEnum.HEART_BEAT_WAIT); + } + + public void offlineStart() { + setWorkerState(WorkerStateEnum.OFFLINE_ING); + } + + public void offlineFailed() { + setWorkerState(WorkerStateEnum.OFFLINE_FAILED); + } + + public void offlineSuccess() { + setWorkerState(WorkerStateEnum.OFFLINE); + } + + public boolean isWorkerOffLineIncludeFail() { + return workerState == WorkerStateEnum.OFFLINE || workerState == WorkerStateEnum.OFFLINE_FAILED; + } +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerActionEnum.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerActionEnum.java new file mode 100644 index 0000000000..82e3cc4ad8 --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerActionEnum.java @@ -0,0 +1,44 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state.event; + + +/** + * Worker动作 + */ +public enum WorkerActionEnum { + /** + * 等待外界访问路径准备好 + */ + WAIT_ACCESS_READY, + /** + * 定时心跳 + */ + HEART_BEAT, + /** + * 下线 + */ + OFF_LINE; +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerEvent.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerEvent.java new file mode 100644 index 0000000000..2f0acfce8d --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerEvent.java @@ -0,0 +1,69 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state.event; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.tencent.bk.job.common.event.Event; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import lombok.ToString; + +import java.time.LocalDateTime; + +@Getter +@Setter +@ToString +@NoArgsConstructor +@JsonInclude(JsonInclude.Include.NON_NULL) +public class WorkerEvent extends Event { + /** + * Worker动作 + * + * @see WorkerActionEnum + */ + private WorkerActionEnum action; + + public static WorkerEvent waitAccessReady() { + WorkerEvent workerEvent = new WorkerEvent(); + workerEvent.setAction(WorkerActionEnum.WAIT_ACCESS_READY); + workerEvent.setTime(LocalDateTime.now()); + return workerEvent; + } + + public static WorkerEvent heartBeat() { + WorkerEvent workerEvent = new WorkerEvent(); + workerEvent.setAction(WorkerActionEnum.HEART_BEAT); + workerEvent.setTime(LocalDateTime.now()); + return workerEvent; + } + + public static WorkerEvent offLine() { + WorkerEvent workerEvent = new WorkerEvent(); + workerEvent.setAction(WorkerActionEnum.OFF_LINE); + workerEvent.setTime(LocalDateTime.now()); + return workerEvent; + } +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerEventDispatcher.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerEventDispatcher.java new file mode 100644 index 0000000000..148fd256d1 --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerEventDispatcher.java @@ -0,0 +1,107 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state.event; + +import com.tencent.bk.job.common.tracing.util.SpanUtil; +import com.tencent.bk.job.file.worker.state.event.handler.DefaultEventHandler; +import com.tencent.bk.job.file.worker.state.event.handler.EventHandler; +import com.tencent.bk.job.file.worker.state.event.handler.HeartBeatEventHandler; +import com.tencent.bk.job.file.worker.state.event.handler.OffLineEventHandler; +import com.tencent.bk.job.file.worker.state.event.handler.WaitAccessEventHandler; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.cloud.sleuth.Span; +import org.springframework.cloud.sleuth.Tracer; +import org.springframework.stereotype.Component; + +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.BlockingQueue; + +@Slf4j +@Component +public class WorkerEventDispatcher extends Thread { + + @SuppressWarnings("FieldCanBeLocal") + private boolean enabled = true; + /** + * 日志调用链tracer + */ + private final Tracer tracer; + private BlockingQueue eventQueue; + private static final Map handlerMap = new HashMap<>(); + private static final EventHandler defaultHandler = new DefaultEventHandler(); + + @Autowired + public WorkerEventDispatcher(Tracer tracer, + WaitAccessEventHandler waitAccessEventHandler, + HeartBeatEventHandler heartBeatEventHandler, + OffLineEventHandler offLineEventHandler) { + this.tracer = tracer; + handlerMap.put(WorkerActionEnum.WAIT_ACCESS_READY, waitAccessEventHandler); + handlerMap.put(WorkerActionEnum.HEART_BEAT, heartBeatEventHandler); + handlerMap.put(WorkerActionEnum.OFF_LINE, offLineEventHandler); + } + + public void initQueue(BlockingQueue eventQueue) { + this.eventQueue = eventQueue; + } + + @Override + public void run() { + while (enabled) { + WorkerEvent event; + try { + event = eventQueue.take(); + dispatchEventWithTrace(event); + } catch (InterruptedException e) { + log.warn("queue.take interrupted", e); + } catch (Throwable t) { + log.error("Fail to handleEventWithTrace", t); + } + } + } + + private void dispatchEventWithTrace(WorkerEvent event) { + Span span = buildSpan(event); + try (Tracer.SpanInScope ignored = this.tracer.withSpan(span.start())) { + dispatchEvent(event); + } catch (Throwable t) { + span.error(t); + log.warn("Fail to handleEvent:" + event, t); + } finally { + span.end(); + } + } + + private void dispatchEvent(WorkerEvent event) { + EventHandler handler = handlerMap.getOrDefault(event.getAction(), defaultHandler); + handler.handleEvent(event); + } + + private Span buildSpan(WorkerEvent event) { + return SpanUtil.buildNewSpan(this.tracer, event.getAction().name()); + } +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerEventService.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerEventService.java new file mode 100644 index 0000000000..5438166e39 --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/WorkerEventService.java @@ -0,0 +1,56 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state.event; + +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; + +/** + * File-Worker生命周期事件服务,用于接收上层业务逻辑触发的事件 + */ +@Slf4j +@Service +public class WorkerEventService { + + private final BlockingQueue eventQueue = new LinkedBlockingQueue<>(100); + + @Autowired + public WorkerEventService(WorkerEventDispatcher workerEventDispatcher) { + workerEventDispatcher.initQueue(eventQueue); + workerEventDispatcher.start(); + } + + public void commitWorkerEvent(WorkerEvent event) { + boolean result = eventQueue.add(event); + if (!result) { + log.warn("Fail to add event to queue:{}, ignore", event); + } + } + +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/DefaultEventHandler.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/DefaultEventHandler.java new file mode 100644 index 0000000000..3aee2a72b3 --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/DefaultEventHandler.java @@ -0,0 +1,39 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state.event.handler; + +import com.tencent.bk.job.file.worker.state.event.WorkerEvent; +import lombok.extern.slf4j.Slf4j; + +/** + * 默认事件处理器,仅对事件做日志记录 + */ +@Slf4j +public class DefaultEventHandler implements EventHandler { + @Override + public void handleEvent(WorkerEvent event) { + log.warn("No handler specified for event:{}, ignore", event); + } +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/EventHandler.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/EventHandler.java new file mode 100644 index 0000000000..79073d3259 --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/EventHandler.java @@ -0,0 +1,31 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state.event.handler; + +import com.tencent.bk.job.file.worker.state.event.WorkerEvent; + +public interface EventHandler { + void handleEvent(WorkerEvent event); +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/HealthResult.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/HealthResult.java new file mode 100644 index 0000000000..3a33869027 --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/HealthResult.java @@ -0,0 +1,41 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state.event.handler; + +import lombok.Data; + +import java.util.List; + +@Data +public class HealthResult { + /** + * 健康状态 + */ + private String status; + /** + * 健康指标分组 + */ + private List groups; +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/HeartBeatEventHandler.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/HeartBeatEventHandler.java new file mode 100644 index 0000000000..4195d776e2 --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/HeartBeatEventHandler.java @@ -0,0 +1,96 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state.event.handler; + +import com.tencent.bk.job.file.worker.state.WorkerStateEnum; +import com.tencent.bk.job.file.worker.state.WorkerStateMachine; +import com.tencent.bk.job.file.worker.state.event.WorkerEvent; +import com.tencent.bk.job.file.worker.state.event.WorkerEventService; +import com.tencent.bk.job.file.worker.task.heartbeat.HeartBeatTask; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Lazy; +import org.springframework.stereotype.Component; + +/** + * 心跳事件处理器,用于向File-Gateway上报Worker状态信息 + */ +@Slf4j +@Component +public class HeartBeatEventHandler implements EventHandler { + + private final WorkerEventService workerEventService; + private final WorkerStateMachine workerStateMachine; + private final HeartBeatTask heartBeatTask; + + @Autowired + public HeartBeatEventHandler(@Lazy WorkerEventService workerEventService, + WorkerStateMachine workerStateMachine, + HeartBeatTask heartBeatTask) { + this.workerEventService = workerEventService; + this.workerStateMachine = workerStateMachine; + this.heartBeatTask = heartBeatTask; + } + + @Override + public void handleEvent(WorkerEvent event) { + WorkerStateEnum workerState = workerStateMachine.getWorkerState(); + switch (workerState) { + case STARTING: + workerEventService.commitWorkerEvent(WorkerEvent.waitAccessReady()); + break; + case WAIT_ACCESS_READY: + log.info("wait access ready, ignore current event:{}", event); + break; + case HEART_BEAT_WAIT: + case RUNNING: + heartBeat(); + break; + default: + log.info("currentState:{}, heartBeat condition not satisfy, ignore", workerState); + break; + } + } + + private Long lastSuccessHeartBeatTime = null; + + private void heartBeat() { + workerStateMachine.heartBeatStart(); + try { + // 如果上一次成功的心跳在10s内发生,则忽略本次心跳 + if (lastSuccessHeartBeatTime != null && System.currentTimeMillis() - lastSuccessHeartBeatTime < 10_000L) { + log.info("lastSuccessHeartBeat finish with 10s, ignore current heartBeat"); + workerStateMachine.heartBeatSuccess(); + } else { + heartBeatTask.doHeartBeat(); + workerStateMachine.heartBeatSuccess(); + lastSuccessHeartBeatTime = System.currentTimeMillis(); + } + } catch (Throwable t) { + log.warn("Fail to heartBeat", t); + workerStateMachine.heartBeatFailed(); + } + } +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/OffLineEventHandler.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/OffLineEventHandler.java new file mode 100644 index 0000000000..bfe1bff6c5 --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/OffLineEventHandler.java @@ -0,0 +1,81 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state.event.handler; + +import com.tencent.bk.job.file.worker.service.OpService; +import com.tencent.bk.job.file.worker.state.WorkerStateEnum; +import com.tencent.bk.job.file.worker.state.WorkerStateMachine; +import com.tencent.bk.job.file.worker.state.event.WorkerEvent; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Lazy; +import org.springframework.stereotype.Component; + +/** + * 下线事件处理器 + */ +@Slf4j +@Component +public class OffLineEventHandler implements EventHandler { + + private final WorkerStateMachine workerStateMachine; + private final OpService opService; + + @Autowired + public OffLineEventHandler(WorkerStateMachine workerStateMachine, + @Lazy OpService opService) { + this.workerStateMachine = workerStateMachine; + this.opService = opService; + } + + @Override + public void handleEvent(WorkerEvent event) { + WorkerStateEnum workerState = workerStateMachine.getWorkerState(); + switch (workerState) { + case RUNNING: + case HEART_BEAT_WAIT: + case OFFLINE_FAILED: + offLine(); + break; + case OFFLINE_ING: + log.info("last offLine action is executing, ignore current one"); + break; + default: + log.info("currentState:{}, offLine condition not satisfy, ignore", workerState); + break; + } + } + + private void offLine() { + workerStateMachine.offlineStart(); + try { + opService.doOffLine(); + workerStateMachine.offlineSuccess(); + } catch (Throwable t) { + log.warn("Fail to offLine", t); + workerStateMachine.offlineFailed(); + } + } +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/WaitAccessEventHandler.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/WaitAccessEventHandler.java new file mode 100644 index 0000000000..7d1b80daac --- /dev/null +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/state/event/handler/WaitAccessEventHandler.java @@ -0,0 +1,131 @@ +/* + * Tencent is pleased to support the open source community by making BK-JOB蓝鲸智云作业平台 available. + * + * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-JOB蓝鲸智云作业平台 is licensed under the MIT License. + * + * License for BK-JOB蓝鲸智云作业平台: + * -------------------------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + * to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +package com.tencent.bk.job.file.worker.state.event.handler; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.tencent.bk.job.common.model.http.HttpReq; +import com.tencent.bk.job.common.util.ThreadUtils; +import com.tencent.bk.job.common.util.http.HttpReqGenUtil; +import com.tencent.bk.job.common.util.http.JobHttpClient; +import com.tencent.bk.job.common.util.json.JsonUtils; +import com.tencent.bk.job.file.worker.config.WorkerConfig; +import com.tencent.bk.job.file.worker.service.EnvironmentService; +import com.tencent.bk.job.file.worker.state.WorkerStateEnum; +import com.tencent.bk.job.file.worker.state.WorkerStateMachine; +import com.tencent.bk.job.file.worker.state.event.WorkerEvent; +import com.tencent.bk.job.file.worker.state.event.WorkerEventService; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Lazy; +import org.springframework.stereotype.Component; + +/** + * 等待Worker可被外界访问的事件处理器,实现检查与等待逻辑 + */ +@Slf4j +@Component +public class WaitAccessEventHandler implements EventHandler { + + private final WorkerEventService workerEventService; + private final WorkerStateMachine workerStateMachine; + private final JobHttpClient jobHttpClient; + private final String checkAccessUrl; + + @Autowired + public WaitAccessEventHandler(@Lazy WorkerEventService workerEventService, + WorkerStateMachine workerStateMachine, + JobHttpClient jobHttpClient, + WorkerConfig workerConfig, + EnvironmentService environmentService) { + this.workerEventService = workerEventService; + this.workerStateMachine = workerStateMachine; + this.jobHttpClient = jobHttpClient; + this.checkAccessUrl = buildCheckAccessUrl(environmentService.getAccessHost(), workerConfig.getAccessPort()); + } + + @SuppressWarnings("HttpUrlsUsage") + private String buildCheckAccessUrl(String accessHost, Integer accessPort) { + return "http://" + accessHost + ":" + accessPort + "/actuator/health"; + } + + @Override + public void handleEvent(WorkerEvent event) { + WorkerStateEnum workerState = workerStateMachine.getWorkerState(); + switch (workerState) { + case STARTING: + case WAIT_ACCESS_READY: + workerStateMachine.waitAccessReady(); + waitAccessReady(); + break; + default: + log.info("currentState:{}, waitAccessReady condition not satisfy, ignore", workerState); + break; + } + } + + public void waitAccessReady() { + boolean accessReady = checkAccess(); + if (accessReady) { + // 1.状态切换 + workerStateMachine.accessReady(); + // 2.自身可被外界访问后立即触发心跳 + workerEventService.commitWorkerEvent(WorkerEvent.heartBeat()); + } else { + // 3.检查失败,状态不变,继续检查 + workerEventService.commitWorkerEvent(WorkerEvent.waitAccessReady()); + } + } + + private boolean checkAccess() { + boolean accessReady = false; + int maxCheckNum = 300; + int checkNum = 0; + int errorNum = 0; + do { + try { + checkNum += 1; + log.info("CheckAccess: url={}", checkAccessUrl); + HttpReq req = HttpReqGenUtil.genUrlGetReq(checkAccessUrl); + String respStr = jobHttpClient.get(req); + HealthResult healthResult = JsonUtils.fromJson(respStr, new TypeReference() { + }); + String status = healthResult.getStatus(); + if (status != null && status.equalsIgnoreCase("UP")) { + accessReady = true; + } + } catch (Throwable t) { + errorNum += 1; + if (errorNum % 10 == 0) { + log.info("Fail to checkAccess", t); + } + } + if (!accessReady && checkNum < maxCheckNum) { + log.info("Access not ready, checkNum={}, wait 1s", checkNum); + ThreadUtils.sleep(1000); + } + } while (!accessReady && checkNum < maxCheckNum); + return accessReady; + } +} diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/task/ScheduledTasks.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/task/ScheduledTasks.java index e1065d8d7f..a9e0652b6a 100644 --- a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/task/ScheduledTasks.java +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/task/ScheduledTasks.java @@ -24,8 +24,9 @@ package com.tencent.bk.job.file.worker.task; +import com.tencent.bk.job.file.worker.state.event.WorkerEvent; +import com.tencent.bk.job.file.worker.state.event.WorkerEventService; import com.tencent.bk.job.file.worker.task.clear.ClearFileTask; -import com.tencent.bk.job.file.worker.task.heartbeat.HeartBeatTask; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -39,12 +40,12 @@ public class ScheduledTasks { private static final Logger logger = LoggerFactory.getLogger(ScheduledTasks.class); - private final HeartBeatTask heartBeatTask; + private final WorkerEventService workerEventService; private final ClearFileTask clearFileTask; @Autowired - public ScheduledTasks(HeartBeatTask heartBeatTask, ClearFileTask clearFileTask) { - this.heartBeatTask = heartBeatTask; + public ScheduledTasks(WorkerEventService workerEventService, ClearFileTask clearFileTask) { + this.workerEventService = workerEventService; this.clearFileTask = clearFileTask; } @@ -81,9 +82,9 @@ public void checkVolumeAndClear() { public void heartBeat() { logger.info(Thread.currentThread().getId() + ":heartBeat start"); try { - heartBeatTask.run(); + workerEventService.commitWorkerEvent(WorkerEvent.heartBeat()); } catch (Exception e) { - logger.error("heartBeatTask fail", e); + logger.error("commit heartBeat event fail", e); } } } diff --git a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/task/heartbeat/HeartBeatTask.java b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/task/heartbeat/HeartBeatTask.java index 472b911a20..9371b7e6e5 100644 --- a/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/task/heartbeat/HeartBeatTask.java +++ b/src/backend/job-file-worker-sdk/service-job-file-worker-sdk/src/main/java/com/tencent/bk/job/file/worker/task/heartbeat/HeartBeatTask.java @@ -45,8 +45,6 @@ @Service public class HeartBeatTask { - public static volatile boolean runFlag = true; - private final JobHttpClient jobHttpClient; private final WorkerConfig workerConfig; private final GatewayInfoService gatewayInfoService; @@ -66,10 +64,6 @@ public HeartBeatTask(JobHttpClient jobHttpClient, this.environmentService = environmentService; } - public static void stopHeartBeat() { - runFlag = false; - } - private HeartBeatReq getHeartBeatReq() { HeartBeatReq heartBeatReq = new HeartBeatReq(); heartBeatReq.setName(workerConfig.getName()); @@ -101,11 +95,7 @@ private HeartBeatReq getHeartBeatReq() { return heartBeatReq; } - public void run() { - if (!runFlag) { - log.info("HeartBeat closed, ignore"); - return; - } + public void doHeartBeat() { String url = gatewayInfoService.getHeartBeatUrl(); HeartBeatReq heartBeatReq = getHeartBeatReq(); log.info("HeartBeat: url={},body={}", url, JsonUtils.toJsonWithoutSkippedFields(heartBeatReq)); diff --git a/src/frontend/package.json b/src/frontend/package.json index b39e62377a..7bb89f1c1b 100644 --- a/src/frontend/package.json +++ b/src/frontend/package.json @@ -14,7 +14,7 @@ "keywords": [], "license": "ISC", "dependencies": { - "@blueking/ip-selector": "0.3.0-beta.21", + "@blueking/ip-selector": "0.3.0-beta.26", "@blueking/login-modal": "1.0.4", "@blueking/notice-component-vue2": "2.0.0-beta.2", "@blueking/paas-login": "0.0.11", diff --git a/src/frontend/src/components/ace-editor/default-script.js b/src/frontend/src/components/ace-editor/default-script.js index 6a19f12691..68198b0b4e 100644 --- a/src/frontend/src/components/ace-editor/default-script.js +++ b/src/frontend/src/components/ace-editor/default-script.js @@ -56,7 +56,7 @@ export default { 'function job_success', '{', ' local msg="$*"', - ' echo "$(job_get_now) job_success: [$msg]"', + ' echo "$(job_get_now) job_success:[$msg]"', ' exit 0', '}', '', @@ -64,7 +64,7 @@ export default { 'function job_fail', '{', ' local msg="$*"', - ' echo "$(job_get_now) job_fail: [$msg]"', + ' echo "$(job_get_now) job_fail:[$msg]"', ' exit 1', '}', '', diff --git a/src/frontend/src/components/task-step/common/execute-target/index.vue b/src/frontend/src/components/task-step/common/execute-target/index.vue index 5e3ccebed2..b71280d9d7 100644 --- a/src/frontend/src/components/task-step/common/execute-target/index.vue +++ b/src/frontend/src/components/task-step/common/execute-target/index.vue @@ -292,7 +292,7 @@ }, created() { this.ipSelectorConfig = {}; - if (this.from === 'execute') { + if (this.from === 'execute' && window.PROJECT_CONFIG.SCOPE_TYPE !== 'biz_set') { this.ipSelectorConfig = { panelList: [ 'staticTopo', diff --git a/src/frontend/src/components/task-step/common/rolling/index.vue b/src/frontend/src/components/task-step/common/rolling/index.vue index ddf9feeaa0..64b502ed95 100644 --- a/src/frontend/src/components/task-step/common/rolling/index.vue +++ b/src/frontend/src/components/task-step/common/rolling/index.vue @@ -28,10 +28,12 @@