diff --git a/.gitignore b/.gitignore index e1b1a494..ef81cd14 100644 --- a/.gitignore +++ b/.gitignore @@ -29,4 +29,7 @@ mat-deps k8s/*.tgz cloud/ -*.tgz \ No newline at end of file +*.tgz + +bin +deploy/k8s_pattern/*.log \ No newline at end of file diff --git a/backend/common/src/main/java/org/eclipse/jifa/common/ErrorCode.java b/backend/common/src/main/java/org/eclipse/jifa/common/ErrorCode.java index 5bf505aa..78205ae5 100644 --- a/backend/common/src/main/java/org/eclipse/jifa/common/ErrorCode.java +++ b/backend/common/src/main/java/org/eclipse/jifa/common/ErrorCode.java @@ -71,6 +71,8 @@ public enum ErrorCode { FILE_TOO_BIG, + RETRY, + RELEASE_PENDING_JOB; public boolean isFatal() { diff --git a/backend/common/src/main/java/org/eclipse/jifa/common/vo/ErrorResult.java b/backend/common/src/main/java/org/eclipse/jifa/common/vo/ErrorResult.java index a4d73e69..32933980 100644 --- a/backend/common/src/main/java/org/eclipse/jifa/common/vo/ErrorResult.java +++ b/backend/common/src/main/java/org/eclipse/jifa/common/vo/ErrorResult.java @@ -12,6 +12,7 @@ ********************************************************************************/ package org.eclipse.jifa.common.vo; +import io.vertx.serviceproxy.ServiceException; import lombok.Data; import org.eclipse.jifa.common.ErrorCode; import org.eclipse.jifa.common.JifaException; @@ -27,6 +28,12 @@ public ErrorResult(Throwable t) { if (t instanceof JifaException) { errorCode = ((JifaException) t).getCode(); } + if (t instanceof ServiceException) { + ServiceException se = (ServiceException) t; + if (ErrorCode.values().length > se.failureCode() && se.failureCode() >= 0) { + errorCode = ErrorCode.values()[se.failureCode()]; + } + } if (t instanceof IllegalArgumentException) { errorCode = ErrorCode.ILLEGAL_ARGUMENT; diff --git a/backend/master/src/main/java/org/eclipse/jifa/master/http/FileRoute.java b/backend/master/src/main/java/org/eclipse/jifa/master/http/FileRoute.java index 32cd02f4..5a635136 100644 --- a/backend/master/src/main/java/org/eclipse/jifa/master/http/FileRoute.java +++ b/backend/master/src/main/java/org/eclipse/jifa/master/http/FileRoute.java @@ -184,7 +184,12 @@ private void transfer(RoutingContext context, TransferWay way) { String origin = extractOriginalName(sb.toString()); FileType type = FileType.valueOf(context.request().getParam("type")); - String name = buildFileName(userId, origin); + String name; + if (context.request().getParam("retry") != null) { + name = context.request().getParam("retry"); + } else { + name = buildFileName(userId, origin); + } request.params().add("fileName", name); fileService.rxTransfer(userId, type, origin, name, way, convert(request.params())) diff --git a/backend/master/src/main/java/org/eclipse/jifa/master/service/impl/Pivot.java b/backend/master/src/main/java/org/eclipse/jifa/master/service/impl/Pivot.java index a1dcb2ca..87c56d25 100644 --- a/backend/master/src/main/java/org/eclipse/jifa/master/service/impl/Pivot.java +++ b/backend/master/src/main/java/org/eclipse/jifa/master/service/impl/Pivot.java @@ -51,10 +51,7 @@ import org.eclipse.jifa.master.service.sql.JobSQL; import org.eclipse.jifa.master.service.sql.MasterSQL; import org.eclipse.jifa.master.service.sql.WorkerSQL; -import org.eclipse.jifa.master.support.Factory; -import org.eclipse.jifa.master.support.Pattern; -import org.eclipse.jifa.master.support.WorkerClient; -import org.eclipse.jifa.master.support.WorkerScheduler; +import org.eclipse.jifa.master.support.*; import org.eclipse.jifa.master.task.SchedulingTask; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -107,6 +104,8 @@ public class Pivot { private WorkerScheduler scheduler; + private boolean isDefaultPattern; + private Pivot() { } @@ -121,6 +120,7 @@ public static synchronized Pivot instance(Vertx vertx, JDBCClient dbClient, Json jm.vertx = vertx; Pattern pattern = Pattern.valueOf(ConfigHelper.getString(jm.config(SCHEDULER_PATTERN))); + jm.isDefaultPattern = pattern == Pattern.DEFAULT; jm.scheduler = Factory.create(pattern); jm.pendingJobMaxCount = ConfigHelper.getInt(jm.config(PENDING_JOB_MAX_COUNT_KEY)); @@ -152,6 +152,11 @@ public boolean isLeader() { return currentMaster.isLeader(); } + public boolean isDefaultPattern() { + SERVICE_ASSERT.isTrue(scheduler instanceof DefaultWorkerScheduler, "must be"); + return isDefaultPattern; + } + public void setSchedulingTask(SchedulingTask task) { this.schedulingTask = task; } @@ -277,7 +282,7 @@ private HashMap buildParams(File... files) { @SuppressWarnings("rawtypes") Map[] maps = new Map[files.length]; for (int i = 0; i < files.length; i++) { - Map map= new HashMap<>(); + Map map = new HashMap<>(); map.put("name", files[i].getName()); map.put("type", files[i].getType().name()); maps[i] = map; @@ -344,9 +349,14 @@ public Completable finish(Job job) { private Completable finish(Job job, Function post) { return inTransactionAndLock( - conn -> selectWorker(conn, job.getHostIP()) - .flatMapCompletable(worker -> updateWorkerLoad(conn, worker.getHostIP(), - worker.getCurrentLoad() - job.getEstimatedLoad())) + conn -> scheduler.decide(job, conn) + .flatMapCompletable(worker -> { + if (isDefaultPattern()) { + return updateWorkerLoad(conn, worker.getHostIP(), worker.getCurrentLoad() - job.getEstimatedLoad()); + } else { + return Completable.complete(); + } + }) .andThen(insertHistoricalJob(conn, job)) // delete old job .andThen(deleteActiveJob(conn, job)) diff --git a/backend/master/src/main/java/org/eclipse/jifa/master/support/K8SWorkerScheduler.java b/backend/master/src/main/java/org/eclipse/jifa/master/support/K8SWorkerScheduler.java index b2a15b99..90721a7e 100644 --- a/backend/master/src/main/java/org/eclipse/jifa/master/support/K8SWorkerScheduler.java +++ b/backend/master/src/main/java/org/eclipse/jifa/master/support/K8SWorkerScheduler.java @@ -17,25 +17,18 @@ import io.kubernetes.client.openapi.ApiException; import io.kubernetes.client.openapi.Configuration; import io.kubernetes.client.openapi.apis.CoreV1Api; -import io.kubernetes.client.openapi.models.V1ContainerPort; -import io.kubernetes.client.openapi.models.V1PersistentVolumeClaimVolumeSource; -import io.kubernetes.client.openapi.models.V1Pod; -import io.kubernetes.client.openapi.models.V1PodBuilder; -import io.kubernetes.client.openapi.models.V1ResourceRequirements; -import io.kubernetes.client.openapi.models.V1Volume; -import io.kubernetes.client.openapi.models.V1VolumeMount; +import io.kubernetes.client.openapi.models.*; import io.kubernetes.client.util.Config; import io.reactivex.Completable; import io.reactivex.Single; import io.vertx.core.json.JsonObject; import io.vertx.reactivex.core.Vertx; -import io.vertx.reactivex.core.buffer.Buffer; import io.vertx.reactivex.ext.sql.SQLConnection; -import io.vertx.reactivex.ext.web.client.HttpResponse; -import org.eclipse.jifa.master.Constant; +import io.vertx.serviceproxy.ServiceException; +import org.eclipse.jifa.common.ErrorCode; +import org.eclipse.jifa.common.JifaException; import org.eclipse.jifa.master.entity.Job; import org.eclipse.jifa.master.entity.Worker; -import org.eclipse.jifa.master.entity.enums.JobType; import org.eclipse.jifa.master.model.WorkerInfo; import org.eclipse.jifa.master.service.impl.Pivot; import org.eclipse.jifa.master.task.PVCCleanupTask; @@ -45,6 +38,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.net.ConnectException; import java.util.HashMap; import java.util.Map; import java.util.Objects; @@ -55,6 +49,8 @@ public class K8SWorkerScheduler implements WorkerScheduler { private static final Logger LOGGER = LoggerFactory.getLogger(K8SWorkerScheduler.class); + private static final String WORKER_PREFIX = "jifa-worker"; + private static String NAMESPACE; private static String WORKER_IMAGE; @@ -142,7 +138,12 @@ public void initialize(Pivot pivot, Vertx vertx, JsonObject config) { @Override public Single decide(Job job, SQLConnection conn) { - return null; + String name = buildWorkerName(job); + String workerIp = getWorkerInfo(name).getIp(); + Worker handmake = new Worker(); + handmake.setHostIP(workerIp); + handmake.setHostName(name); + return Single.just(handmake); } @Override @@ -151,34 +152,46 @@ public boolean supportPendingJob() { } private String buildWorkerName(Job job) { - return "my-worker" + job.getTarget().hashCode(); + return WORKER_PREFIX + job.getTarget().hashCode(); } @Override public Completable start(Job job) { + String name = buildWorkerName(job); + Map config = new HashMap<>(); + // FIXME + config.put("requestMemSize", Long.toString(512 * 1024 * 1024)); - return Completable.fromAction(() -> { - String name = buildWorkerName(job); - Map config = new HashMap<>(); - if (job.getType() != JobType.FILE_TRANSFER) { - // see AnalyzerRoute.calculateLoad - long size = job.getEstimatedLoad() / 10 * 1024 * 1024 * 1024; - config.put("requestMemSize", Long.toString(size)); - } - schedule(name, config); - - // FIXME - while (true) { - HttpResponse response = - WorkerClient.get(getWorkerInfo(name).getIp(), Constant.uri(Constant.PING)).blockingGet(); - if (response.statusCode() == Constant.HTTP_GET_OK_STATUS_CODE) { - return; - } - } - }); + schedule(name, config); + + String workerIp = getWorkerInfo(name).getIp(); + + if (workerIp == null) { + // Front-end would retry original request until worker pod has been started or + // timeout threshold reached. + return Completable.error(new ServiceException(ErrorCode.RETRY.ordinal(), job.getTarget())); + } + return WorkerClient.get(workerIp, uri(PING)) + .flatMap(resp -> Single.just("OK")) + .onErrorReturn(err -> { + if (err instanceof ConnectException) { + // ConnectionException is tolerable because it simply indicates worker is still + // starting + return "RETRY"; + } + return err.getMessage(); + }).flatMapCompletable(msg -> { + if (msg.equals("OK")) { + return Completable.complete(); + } else if (msg.equals("RETRY")) { + return Completable.error(new ServiceException(ErrorCode.RETRY.ordinal(), job.getTarget())); + } else { + return Completable.error(new JifaException("Can not start worker due to internal error: " + msg)); + } + }); } - public void schedule(String id, Map config) { + private void schedule(String id, Map config) { long requestMemSize = 0L; String tmp = config.get("requestMemSize"); @@ -187,9 +200,9 @@ public void schedule(String id, Map config) { } if (getWorkerInfo(id) != null) { - LOGGER.debug("Start worker " + id + " but it already exists"); + LOGGER.debug("Create worker {} but it already exists", id); } else { - LOGGER.debug("Start worker " + id + "[MemRequest:" + requestMemSize + "bytes]"); + LOGGER.debug("Create worker {} [MemRequest: {}bytes]", id, requestMemSize); createWorker(id, requestMemSize); } } @@ -207,7 +220,7 @@ public Completable stop(Job job) { }); } - public WorkerInfo getWorkerInfo(String id) { + private WorkerInfo getWorkerInfo(String id) { V1Pod npod = null; try { npod = api.readNamespacedPod(id, NAMESPACE, null, null, null); @@ -222,4 +235,4 @@ public WorkerInfo getWorkerInfo(String id) { return null; } } -} +} \ No newline at end of file diff --git a/backend/worker/src/main/java/org/eclipse/jifa/worker/route/SystemRoute.java b/backend/worker/src/main/java/org/eclipse/jifa/worker/route/SystemRoute.java index 14583f1d..1215ce93 100644 --- a/backend/worker/src/main/java/org/eclipse/jifa/worker/route/SystemRoute.java +++ b/backend/worker/src/main/java/org/eclipse/jifa/worker/route/SystemRoute.java @@ -32,4 +32,9 @@ void diskUsage(Promise promise) { promise.complete(new DiskUsage(totalSpaceInMb, usedSpaceInMb)); } + + @RouteMeta(path = "/system/ping") + void ping(Promise promise) { + promise.complete(); + } } diff --git a/deploy/k8s_pattern/jifa.yaml b/deploy/k8s_pattern/jifa.yaml index 425f0ddb..815d8475 100644 --- a/deploy/k8s_pattern/jifa.yaml +++ b/deploy/k8s_pattern/jifa.yaml @@ -75,7 +75,7 @@ spec: apiVersion: v1 kind: Pod metadata: - name: my-jifa-master + name: jifa-master labels: app: jifa-master spec: diff --git a/deploy/k8s_pattern/worker.yaml b/deploy/k8s_pattern/worker.yaml index c12fbd0d..7f5f0519 100644 --- a/deploy/k8s_pattern/worker.yaml +++ b/deploy/k8s_pattern/worker.yaml @@ -17,7 +17,7 @@ metadata: name: jifa-worker spec: containers: - - name: my-jifa-worker + - name: jifa-worker-test image: docker.io/jifadocker/jifa-worker-open ports: - containerPort: 8102 \ No newline at end of file diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 065937ee..73d2b76d 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -2101,6 +2101,30 @@ } } }, + "axios-retry": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/axios-retry/-/axios-retry-3.2.4.tgz", + "integrity": "sha512-Co3UXiv4npi6lM963mfnuH90/YFLKWWDmoBYfxkHT5xtkSSWNqK9zdG3fw5/CP/dsoKB5aMMJCsgab+tp1OxLQ==", + "requires": { + "@babel/runtime": "^7.15.4", + "is-retry-allowed": "^2.2.0" + }, + "dependencies": { + "@babel/runtime": { + "version": "7.16.7", + "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.16.7.tgz", + "integrity": "sha512-9E9FJowqAsytyOY6LG+1KuueckRL+aQW+mKvXRXnuFGyRAyepJPmEo9vgMfXUA6O9u3IeEdv9MAkppFcaQwogQ==", + "requires": { + "regenerator-runtime": "^0.13.4" + } + }, + "regenerator-runtime": { + "version": "0.13.9", + "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.9.tgz", + "integrity": "sha512-p3VT+cOEgxFsRRA9X4lkI1E+k2/CtnKtU4gcxyaCUreilL/vqI6CdZ3wxVUx3UOUg+gnUOQQcRI7BmSI656MYA==" + } + } + }, "babel-code-frame": { "version": "6.26.0", "resolved": "http://registry.npm.taobao.org/babel-code-frame/download/babel-code-frame-6.26.0.tgz", @@ -6973,6 +6997,11 @@ "integrity": "sha1-+xj4fOH+uSUWnJpAfBkxijIG7Yg=", "dev": true }, + "is-retry-allowed": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/is-retry-allowed/-/is-retry-allowed-2.2.0.tgz", + "integrity": "sha512-XVm7LOeLpTW4jV19QSH38vkswxoLud8sQ57YwJVTPWdiaI9I8keEhGFpBlslyVsgdQy4Opg8QOLb8YRgsyZiQg==" + }, "is-stream": { "version": "1.1.0", "resolved": "http://registry.npm.taobao.org/is-stream/download/is-stream-1.1.0.tgz", diff --git a/frontend/package.json b/frontend/package.json index aa892a15..3278f61a 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -11,6 +11,7 @@ }, "dependencies": { "axios": "^0.18.1", + "axios-retry": "^3.2.4", "bootstrap-vue": "^2.0.0-rc.19", "chart.js": "^2.8.0", "echarts": "^4.6.0", diff --git a/frontend/src/router.js b/frontend/src/router.js index f29ca80a..080f622a 100644 --- a/frontend/src/router.js +++ b/frontend/src/router.js @@ -20,12 +20,47 @@ import heapDump from "./components/heapdump/HeapDump" import auth from "./components/auth/Auth" -import axios from "axios"; +import axios from "axios" import notFound from "./components/404" import VueInsatence from "./main" -import JifaGlobal from "./Jifa"; + +import JifaGlobal from "./Jifa" + +import axiosRetry from "axios-retry" + +axiosRetry(axios, { + retries: 120, + retryDelay: (retryCount) => { + return 2000; + }, + retryCondition: (error) => { + let resp = error.response + if (resp) { + // Retry original request when rresponded something like + // {errorCode: "RETRY", "ServiceException: target_file_name"} + let status = resp.status + let data = resp.data + if (status === 500 || status === 400 || status === 401 || status === 403) { + if (data && data.hasOwnProperty('errorCode')) { + if (data.errorCode === 'RETRY' && data.message !== undefined) { + let [, targetName] = data.message.split(": "); + if (error.config.data !== undefined) { + if (error.config.data.indexOf("&retry=") === -1) { + error.config.data = error.config.data + "&retry=" + targetName + } + } else { + error.config.data = "retry=" + targetName + } + return true; + } + } + } + } + return false; + } +}); Vue.use(VueRouter) diff --git a/scripts/docker_images/build_image.sh b/scripts/docker_images/build_image.sh index 1d0cd131..406515f0 100755 --- a/scripts/docker_images/build_image.sh +++ b/scripts/docker_images/build_image.sh @@ -20,5 +20,7 @@ tar caf jifa.tgz artifacts mv jifa.tgz scripts/docker_images/ cd scripts/docker_images +# For non minikube users, please comment out below line +eval $(minikube docker-env) docker build -t jifa-master-open "." -f Dockerfile_master docker build -t jifa-worker-open "." -f Dockerfile_worker