Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SO1S-268 backend deployment 상태 변경 감지 #28

Merged
merged 22 commits into from
Sep 5, 2022
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@
import javax.validation.Valid;
import lombok.RequiredArgsConstructor;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.PutMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;

@Controller
@RestController
@RequestMapping("/api/v1/deployments")
@RequiredArgsConstructor
public class DeploymentController {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package io.so1s.backend.domain.deployment.dto.response;

import io.so1s.backend.global.entity.Status;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;
Expand All @@ -13,7 +14,7 @@ public class DeploymentFindResponseDto {

private String age;
private String deploymentName;
private String status;
private Status status;
private String endPoint;
private String strategy;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@

import io.so1s.backend.domain.model.entity.ModelMetadata;
import io.so1s.backend.global.entity.BaseTimeEntity;
import io.so1s.backend.global.entity.Status;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.EnumType;
import javax.persistence.Enumerated;
import javax.persistence.FetchType;
import javax.persistence.GeneratedValue;
import javax.persistence.GenerationType;
Expand Down Expand Up @@ -34,7 +37,11 @@ public class Deployment extends BaseTimeEntity {
private String name;

@Column(nullable = false)
private String status;
private String endPoint;

@Column(nullable = false)
@Enumerated(EnumType.STRING)
private Status status;

@ManyToOne(fetch = FetchType.LAZY)
@JoinColumn(name = "model_metadata_id")
Expand Down Expand Up @@ -63,10 +70,14 @@ public void setResource(Resource resource) {
resource.getDeployment().add(this);
}

public void update(ModelMetadata modelMetadata, DeploymentStrategy deploymentStrategy,
public void updateModel(ModelMetadata modelMetadata, DeploymentStrategy deploymentStrategy,
Resource resource) {
this.modelMetadata = modelMetadata;
this.deploymentStrategy = deploymentStrategy;
this.resource = resource;
}

public void changeStatus(Status status) {
this.status = status;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@
import io.so1s.backend.domain.deployment.repository.ResourceRepository;
import io.so1s.backend.domain.model.entity.ModelMetadata;
import io.so1s.backend.domain.model.service.ModelService;
import io.so1s.backend.global.entity.Status;
import io.so1s.backend.global.error.exception.DeploymentNotFoundException;
import io.so1s.backend.global.error.exception.DeploymentStrategyNotFoundException;
import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;

import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;

@Service
@RequiredArgsConstructor
Expand Down Expand Up @@ -48,7 +48,8 @@ public Deployment createDeployment(Resource resource, DeploymentRequestDto deplo

Deployment deployment = Deployment.builder()
.name(deploymentRequestDto.getName())
.status("pending")
.status(Status.PENDING)
.endPoint("inference-" + deploymentRequestDto.getName() + "so1s.io")
.build();
deployment.setModelMetadata(modelMetadata);
deployment.setDeploymentStrategy(deploymentStrategy);
Expand Down Expand Up @@ -78,7 +79,7 @@ public Deployment updateDeployment(DeploymentRequestDto deploymentRequestDto) {
deploymentRequestDto.getModelMetadataId());
Resource resource = createResource(deploymentRequestDto.getResources());

deployment.update(modelMetadata, deploymentStrategy, resource);
deployment.updateModel(modelMetadata, deploymentStrategy, resource);

return deployment;
}
Expand All @@ -99,7 +100,7 @@ public DeploymentFindResponseDto setDeploymentFindResponseDto(Deployment deploym
.age(deployment.getUpdatedOn().toString())
.deploymentName(deployment.getName())
.status(deployment.getStatus())
.endPoint("need-modify")
.endPoint(deployment.getEndPoint())
.strategy(deployment.getDeploymentStrategy().getName())
.modelName(deployment.getModelMetadata().getModel().getName())
.modelVersion(deployment.getModelMetadata().getVersion())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,16 @@ public boolean inferenceServerBuild(ModelMetadata modelMetadata) throws Interrup
String library = model.getLibrary().getName().toLowerCase();
String version = modelMetadata.getVersion().toLowerCase();

Map<String, String> labels = new HashMap<>();
labels.put("app", "inference-build");
labels.put("name", jobName);

final Job job = new JobBuilder()
.withApiVersion("batch/v1")
.withNewMetadata()
.withName(jobName)
.withNamespace(namespace)
.addToLabels("job-name", jobName)
.addToLabels(labels)
.endMetadata()
.withNewSpec()
.withNewTemplate()
Expand Down Expand Up @@ -173,14 +177,15 @@ public boolean deployInferenceServer(
io.so1s.backend.domain.deployment.entity.Deployment deployment) {

String namespace = "default";
String deployName = "inference-" + deployment.getName().toLowerCase();
String deployName = deployment.getName().toLowerCase();
String modelName = deployment.getModelMetadata().getModel().getName().toLowerCase();
String modelVersion = deployment.getModelMetadata().getVersion().toLowerCase();

Map<String, String> labels = new HashMap<>();
labels.put("apps", deployName);
labels.put("app", "inference");
labels.put("name", deployName);

String host = deployName + ".so1s.io"; // TODO: Fix hard-coded root domain
String host = deployment.getEndPoint();

Deployment inferenceDeployment = new DeploymentBuilder()
.withNewMetadata()
Expand Down Expand Up @@ -310,7 +315,8 @@ public boolean deployABTest(ABTest abTest) {
String bName = "inference-" + abTest.getB().getName().toLowerCase();

Map<String, String> labels = new HashMap<>();
labels.put("apps", abTestName);
labels.put("app", "ab-test");
labels.put("name", abTestName);

VirtualService abTestVirtualService = new VirtualServiceBuilder()
.withNewMetadata()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package io.so1s.backend.domain.kubernetes.utils;

import io.fabric8.kubernetes.client.KubernetesClient;
import io.so1s.backend.domain.deployment.entity.Deployment;
import io.so1s.backend.domain.deployment.repository.DeploymentRepository;
import io.so1s.backend.global.entity.Status;
import java.util.List;
import java.util.Optional;
import lombok.RequiredArgsConstructor;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.web.client.RestTemplate;

@RequiredArgsConstructor
@Component
public class DeploymentStatusCheckScheduler {

private final KubernetesClient client;
private final DeploymentRepository deploymentRepository;

@Scheduled(fixedDelay = 1000L * 60)
public void checkDeploymentStatus() {
List<Deployment> deployments = deploymentRepository.findAll();

List<io.fabric8.kubernetes.api.model.apps.Deployment> k8sDeployments = client.apps()
.deployments().inNamespace("default").withLabel("app", "inference").list().getItems();

for (Deployment deployment : deployments) {
Optional<io.fabric8.kubernetes.api.model.apps.Deployment> find = k8sDeployments.stream()
.parallel().filter(d -> d.getMetadata().getName().equalsIgnoreCase(deployment.getName()))
.findAny();
if (find.isPresent()) {
if (find.get().getStatus().getConditions().get(0).getStatus().equals("True")) {
if (checkApplicationHealth(deployment.getEndPoint())) {
setDeploymentStatus(deployment, Status.RUNNING);
continue;
}
}
setDeploymentStatus(deployment, Status.FAILED);
} else {
setDeploymentStatus(deployment, Status.UNKNOWN);
}
}
}

public boolean checkApplicationHealth(String url) {
try {
new RestTemplate().getForObject("https://" + url + "/healthz", String.class);
} catch (Exception e) {
e.printStackTrace();
return false;
}
return true;
}

@Transactional
public void setDeploymentStatus(Deployment deployment, Status status) {
deployment.changeStatus(status);
deploymentRepository.save(deployment);
}
}
10 changes: 10 additions & 0 deletions src/main/java/io/so1s/backend/global/config/ScheduledConfig.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package io.so1s.backend.global.config;

import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.annotation.EnableScheduling;

@EnableScheduling
@Configuration
public class ScheduledConfig {

}
6 changes: 3 additions & 3 deletions src/main/resources/data.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@ INSERT INTO MODEL_METADATA (CREATED_ON, UPDATED_ON, FILE_NAME, INPUT_DTYPE, INPU
INSERT INTO RESOURCE (CREATED_ON, UPDATED_ON, CPU, CPU_LIMIT, GPU, GPU_LIMIT, MEMORY, MEMORY_LIMIT) VALUES (now(), now(), '1', '2', '0', '0', '1Gi', '2Gi');
INSERT INTO RESOURCE (CREATED_ON, UPDATED_ON, CPU, CPU_LIMIT, GPU, GPU_LIMIT, MEMORY, MEMORY_LIMIT) VALUES (now(), now(), '1', '1', '0', '0', '500Mi', '1Gi');

INSERT INTO DEPLOYMENT (CREATED_ON, UPDATED_ON, NAME, STATUS, DEPLOYMENT_STRATEGY_ID, MODEL_METADATA_ID, RESOURCE_ID) VALUES (now(), now(), 'tensorflowModel1Deploy', 'RUNNING', 1, 1, 1);
INSERT INTO DEPLOYMENT (CREATED_ON, UPDATED_ON, NAME, STATUS, DEPLOYMENT_STRATEGY_ID, MODEL_METADATA_ID, RESOURCE_ID) VALUES (now(), now(), 'tensorflowModel2Deploy', 'PEDNING', 1, 2, 1);
INSERT INTO DEPLOYMENT (CREATED_ON, UPDATED_ON, NAME, STATUS, DEPLOYMENT_STRATEGY_ID, MODEL_METADATA_ID, RESOURCE_ID) VALUES (now(), now(), 'torchModel2Deploy1', 'RUNNING', 1, 4, 2);
INSERT INTO DEPLOYMENT (CREATED_ON, UPDATED_ON, NAME, STATUS, END_POINT, DEPLOYMENT_STRATEGY_ID, MODEL_METADATA_ID, RESOURCE_ID) VALUES (now(), now(), 'tensorflowModel1Deploy', 'RUNNING', 'http://tensorflowModel1Deploy.test.com/', 1, 1, 1);
INSERT INTO DEPLOYMENT (CREATED_ON, UPDATED_ON, NAME, STATUS, END_POINT, DEPLOYMENT_STRATEGY_ID, MODEL_METADATA_ID, RESOURCE_ID) VALUES (now(), now(), 'tensorflowModel2Deploy', 'PENDING', 'http://tensorflowModel1Deploy.test.com/', 1, 2, 1);
INSERT INTO DEPLOYMENT (CREATED_ON, UPDATED_ON, NAME, STATUS, END_POINT, DEPLOYMENT_STRATEGY_ID, MODEL_METADATA_ID, RESOURCE_ID) VALUES (now(), now(), 'torchModel2Deploy1', 'RUNNING', 'http://tensorflowModel1Deploy.test.com/', 1, 4, 2);
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import io.so1s.backend.domain.kubernetes.service.KubernetesService;
import io.so1s.backend.domain.model.service.ModelServiceImpl;
import io.so1s.backend.global.config.SecurityConfig;
import io.so1s.backend.global.entity.Status;
import io.so1s.backend.global.utils.HashGenerator;
import java.time.LocalDateTime;
import java.util.ArrayList;
Expand Down Expand Up @@ -158,7 +159,7 @@ public void findDeployments() throws Exception {
list.add(DeploymentFindResponseDto.builder()
.age(LocalDateTime.now().toString())
.deploymentName("testDeploy")
.status("running")
.status(Status.RUNNING)
.endPoint("http://test.endpoint.com/")
.build());
when(deploymentService.findDeployments()).thenReturn(list);
Expand All @@ -173,7 +174,7 @@ public void findDeployments() throws Exception {
result.andExpect(status().isOk())
.andExpect(jsonPath("$[0].age").exists()) // TimeStamp 불일치 문제로 임시 수정
.andExpect(jsonPath("$[0].deploymentName").value(list.get(0).getDeploymentName()))
.andExpect(jsonPath("$[0].status").value(list.get(0).getStatus()))
.andExpect(jsonPath("$[0].status").value(list.get(0).getStatus().toString()))
.andExpect(jsonPath("$[0].endPoint").value(list.get(0).getEndPoint()))
.andDo(print());
}
Expand All @@ -185,7 +186,7 @@ public void findDeployment() throws Exception {
DeploymentFindResponseDto responseDto = DeploymentFindResponseDto.builder()
.age(LocalDateTime.now().toString())
.deploymentName("testDeploy")
.status("running")
.status(Status.RUNNING)
.endPoint("http://test.endpoint.com/")
.build();
when(deploymentService.findDeployment(any())).thenReturn(responseDto);
Expand All @@ -200,7 +201,7 @@ public void findDeployment() throws Exception {
result.andExpect(status().isOk())
.andExpect(jsonPath("$.age").exists()) // TimeStamp 불일치 문제로 임시 수정
.andExpect(jsonPath("$.deploymentName").value(responseDto.getDeploymentName()))
.andExpect(jsonPath("$.status").value(responseDto.getStatus()))
.andExpect(jsonPath("$.status").value(responseDto.getStatus().toString()))
.andExpect(jsonPath("$.endPoint").value(responseDto.getEndPoint()))
.andDo(print());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ public void deployInferenceServerTest() throws Exception {
// given
Deployment deployment = Deployment.builder()
.name("testDeployment")
.status("pending")
.status(Status.PENDING)
.modelMetadata(ModelMetadata.builder()
.status(Status.SUCCEEDED)
.version(HashGenerator.sha256())
Expand Down Expand Up @@ -206,7 +206,7 @@ public void deployABTest() throws Exception {
// given
Deployment a = Deployment.builder()
.name("aDeployment")
.status("pending")
.status(Status.PENDING)
.modelMetadata(ModelMetadata.builder()
.status(Status.SUCCEEDED)
.version(HashGenerator.sha256())
Expand Down Expand Up @@ -235,7 +235,7 @@ public void deployABTest() throws Exception {

Deployment b = Deployment.builder()
.name("bDeployment")
.status("pending")
.status(Status.PENDING)
.modelMetadata(ModelMetadata.builder()
.status(Status.SUCCEEDED)
.version(HashGenerator.sha256())
Expand Down
Loading