From 53ff6a516e1d4075c1214e5eb993b1f453145c21 Mon Sep 17 00:00:00 2001 From: Naman Nandan Date: Wed, 2 Aug 2023 13:05:29 -0700 Subject: [PATCH] Add job queue status to describe API (#2464) * Add support for job queue metrics in describe API * add test * update documentation for describe API * Retain describe API metrics implementation --------- Co-authored-by: Naman Nandan Co-authored-by: Ankith Gunapal --- docs/management_api.md | 28 +++++++++++-- .../http/messages/DescribeModelResponse.java | 31 ++++++++++++++ .../pytorch/serve/openapi/OpenApiUtils.java | 11 ++++- .../java/org/pytorch/serve/util/ApiUtils.java | 6 +++ .../java/org/pytorch/serve/wlm/Model.java | 18 +++++++++ .../org/pytorch/serve/ModelServerTest.java | 18 +++++++++ .../test/resources/management_open_api.json | 40 ++++++++++++++++++- .../test/resources/model_management_api.json | 40 ++++++++++++++++++- 8 files changed, 183 insertions(+), 9 deletions(-) diff --git a/docs/management_api.md b/docs/management_api.md index 991746fe52..37feb7a4f4 100644 --- a/docs/management_api.md +++ b/docs/management_api.md @@ -204,7 +204,11 @@ curl http://localhost:8081/models/noop "gpu": false, "memoryUsage": 89247744 } - ] + ], + "jobQueueStatus": { + "remainingCapacity": 100, + "pendingRequests": 0 + } } ] ``` @@ -234,7 +238,11 @@ curl http://localhost:8081/models/noop/2.0 "gpu": false, "memoryUsage": 89247744 } - ] + ], + "jobQueueStatus": { + "remainingCapacity": 100, + "pendingRequests": 0 + } } ] ``` @@ -264,7 +272,11 @@ curl http://localhost:8081/models/noop/all "gpu": false, "memoryUsage": 89247744 } - ] + ], + "jobQueueStatus": { + "remainingCapacity": 100, + "pendingRequests": 0 + } }, { "modelName": "noop", @@ -284,7 +296,11 @@ curl http://localhost:8081/models/noop/all "gpu": false, "memoryUsage": 89247744 } - ] + ], + "jobQueueStatus": { + "remainingCapacity": 100, + "pendingRequests": 0 + } } ] ``` @@ -401,6 +417,10 @@ curl http://localhost:8081/models/noop-customized/1.0?customized=true "gpuUsage": "N/A" } ], + "jobQueueStatus": { + "remainingCapacity": 100, + "pendingRequests": 0 + }, "customizedMetadata": "{\n \"data1\": \"1\",\n \"data2\": \"2\"\n}" } ] diff --git a/frontend/server/src/main/java/org/pytorch/serve/http/messages/DescribeModelResponse.java b/frontend/server/src/main/java/org/pytorch/serve/http/messages/DescribeModelResponse.java index 1e37c278bb..c998784d84 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/http/messages/DescribeModelResponse.java +++ b/frontend/server/src/main/java/org/pytorch/serve/http/messages/DescribeModelResponse.java @@ -21,6 +21,7 @@ public class DescribeModelResponse { private List workers; private Metrics metrics; + private JobQueueStatus jobQueueStatus; private String customizedMetadata; public DescribeModelResponse() { @@ -150,6 +151,14 @@ public void setMetrics(Metrics metrics) { this.metrics = metrics; } + public JobQueueStatus getJobQueueStatus() { + return jobQueueStatus; + } + + public void setJobQueueStatus(JobQueueStatus jobQueueStatus) { + this.jobQueueStatus = jobQueueStatus; + } + public void setCustomizedMetadata(byte[] customizedMetadata) { this.customizedMetadata = new String(customizedMetadata, Charset.forName("UTF-8")); } @@ -257,4 +266,26 @@ public void setRequests(int requests) { this.requests = requests; } } + + public static final class JobQueueStatus { + + private int remainingCapacity; + private int pendingRequests; + + public int getRemainingCapacity() { + return remainingCapacity; + } + + public void setRemainingCapacity(int remainingCapacity) { + this.remainingCapacity = remainingCapacity; + } + + public int getPendingRequests() { + return pendingRequests; + } + + public void setPendingRequests(int pendingRequests) { + this.pendingRequests = pendingRequests; + } + } } diff --git a/frontend/server/src/main/java/org/pytorch/serve/openapi/OpenApiUtils.java b/frontend/server/src/main/java/org/pytorch/serve/openapi/OpenApiUtils.java index 31ccf6b950..b621bf39fb 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/openapi/OpenApiUtils.java +++ b/frontend/server/src/main/java/org/pytorch/serve/openapi/OpenApiUtils.java @@ -433,7 +433,6 @@ private static Operation getDescribeModelOperation(boolean version) { workerStatus.setEnumeration(status); worker.addProperty("status", workerStatus, true); workers.setItems(worker); - schema.addProperty("workers", workers, true); Schema metrics = new Schema("object"); metrics.addProperty( @@ -449,6 +448,16 @@ private static Operation getDescribeModelOperation(boolean version) { new Schema("integer", "Number requests processed in last 10 minutes."), true); schema.addProperty("metrics", metrics, true); + Schema jobQueueStatus = new Schema("object"); + jobQueueStatus.addProperty( + "remainingCapacity", + new Schema("integer", "Number of new requests that can be queued."), + true); + jobQueueStatus.addProperty( + "pendingRequests", + new Schema("integer", "Number of requests waiting in the queue."), + true); + schema.addProperty("jobQueueStatus", jobQueueStatus, true); MediaType mediaType = new MediaType(HttpHeaderValues.APPLICATION_JSON.toString(), schema); MediaType error = getErrorResponse(); diff --git a/frontend/server/src/main/java/org/pytorch/serve/util/ApiUtils.java b/frontend/server/src/main/java/org/pytorch/serve/util/ApiUtils.java index 80c31b5bbc..64266354f3 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/util/ApiUtils.java +++ b/frontend/server/src/main/java/org/pytorch/serve/util/ApiUtils.java @@ -416,6 +416,12 @@ private static DescribeModelResponse createModelResponse( resp.addWorker(workerId, startTime, isRunning, gpuId, memory, pid, gpuUsage); } + DescribeModelResponse.JobQueueStatus jobQueueStatus = + new DescribeModelResponse.JobQueueStatus(); + jobQueueStatus.setRemainingCapacity(model.getJobQueueRemainingCapacity()); + jobQueueStatus.setPendingRequests(model.getPendingRequestsInJobQueue()); + resp.setJobQueueStatus(jobQueueStatus); + return resp; } diff --git a/frontend/server/src/main/java/org/pytorch/serve/wlm/Model.java b/frontend/server/src/main/java/org/pytorch/serve/wlm/Model.java index 0e54dfd72b..b8e5fc414b 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/wlm/Model.java +++ b/frontend/server/src/main/java/org/pytorch/serve/wlm/Model.java @@ -402,4 +402,22 @@ public synchronized boolean getJobTickets() { this.numJobTickets.decrementAndGet(); return true; } + + public int getJobQueueRemainingCapacity() { + LinkedBlockingDeque jobsQueue = jobsDb.get(DEFAULT_DATA_QUEUE); + if (jobsQueue != null) { + return jobsQueue.remainingCapacity(); + } + + return 0; + } + + public int getPendingRequestsInJobQueue() { + LinkedBlockingDeque jobsQueue = jobsDb.get(DEFAULT_DATA_QUEUE); + if (jobsQueue != null) { + return jobsQueue.size(); + } + + return 0; + } } diff --git a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java index d19b450d99..b28b8963bb 100644 --- a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java +++ b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java @@ -356,6 +356,24 @@ public void testDescribeSpecificModelVersion() throws InterruptedException { @Test( alwaysRun = true, dependsOnMethods = {"testDescribeSpecificModelVersion"}) + public void testDescribeModelJobQueueStatus() throws InterruptedException { + testLoadModelWithInitialWorkers("noop.mar", "noop_describe", "1.11"); + + Channel channel = TestUtils.getManagementChannel(configManager); + TestUtils.setResult(null); + TestUtils.setLatch(new CountDownLatch(1)); + TestUtils.describeModel(channel, "noop_describe", "1.11", false); + TestUtils.getLatch().await(); + + DescribeModelResponse[] resp = + JsonUtils.GSON.fromJson(TestUtils.getResult(), DescribeModelResponse[].class); + Assert.assertEquals(resp[0].getJobQueueStatus().getRemainingCapacity(), 100); + Assert.assertEquals(resp[0].getJobQueueStatus().getPendingRequests(), 0); + } + + @Test( + alwaysRun = true, + dependsOnMethods = {"testDescribeModelJobQueueStatus"}) public void testNoopVersionedPrediction() throws InterruptedException { testPredictions("noopversioned", "OK", "1.11"); } diff --git a/frontend/server/src/test/resources/management_open_api.json b/frontend/server/src/test/resources/management_open_api.json index 486c6770d5..89224df531 100644 --- a/frontend/server/src/test/resources/management_open_api.json +++ b/frontend/server/src/test/resources/management_open_api.json @@ -479,7 +479,8 @@ "maxWorkers", "status", "workers", - "metrics" + "metrics", + "jobQueueStatus" ], "properties": { "modelName": { @@ -570,6 +571,23 @@ "description": "Number requests processed in last 10 minutes." } } + }, + "jobQueueStatus": { + "type": "object", + "required": [ + "remainingCapacity", + "pendingRequests" + ], + "properties": { + "remainingCapacity": { + "type": "integer", + "description": "Number of new requests that can be queued." + }, + "pendingRequests": { + "type": "integer", + "description": "Number of requests waiting in the queue." + } + } } } } @@ -1049,7 +1067,8 @@ "maxWorkers", "status", "workers", - "metrics" + "metrics", + "jobQueueStatus" ], "properties": { "modelName": { @@ -1140,6 +1159,23 @@ "description": "Number requests processed in last 10 minutes." } } + }, + "jobQueueStatus": { + "type": "object", + "required": [ + "remainingCapacity", + "pendingRequests" + ], + "properties": { + "remainingCapacity": { + "type": "integer", + "description": "Number of new requests that can be queued." + }, + "pendingRequests": { + "type": "integer", + "description": "Number of requests waiting in the queue." + } + } } } } diff --git a/frontend/server/src/test/resources/model_management_api.json b/frontend/server/src/test/resources/model_management_api.json index ea285f6821..f056c7aad7 100644 --- a/frontend/server/src/test/resources/model_management_api.json +++ b/frontend/server/src/test/resources/model_management_api.json @@ -35,7 +35,8 @@ "maxWorkers", "status", "workers", - "metrics" + "metrics", + "jobQueueStatus" ], "properties": { "modelName": { @@ -126,6 +127,23 @@ "description": "Number requests processed in last 10 minutes." } } + }, + "jobQueueStatus": { + "type": "object", + "required": [ + "remainingCapacity", + "pendingRequests" + ], + "properties": { + "remainingCapacity": { + "type": "integer", + "description": "Number of new requests that can be queued." + }, + "pendingRequests": { + "type": "integer", + "description": "Number of requests waiting in the queue." + } + } } } } @@ -605,7 +623,8 @@ "maxWorkers", "status", "workers", - "metrics" + "metrics", + "jobQueueStatus" ], "properties": { "modelName": { @@ -696,6 +715,23 @@ "description": "Number requests processed in last 10 minutes." } } + }, + "jobQueueStatus": { + "type": "object", + "required": [ + "remainingCapacity", + "pendingRequests" + ], + "properties": { + "remainingCapacity": { + "type": "integer", + "description": "Number of new requests that can be queued." + }, + "pendingRequests": { + "type": "integer", + "description": "Number of requests waiting in the queue." + } + } } } }