Skip to content
This repository was archived by the owner on Aug 7, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
27e4f1d
Merge pull request #1 from pytorch/master
AshwinChafale Aug 14, 2020
992fbfc
Added environment header in Sanity & Regression test suite
Aug 14, 2020
34ca168
Added collect_env.py python script
Aug 14, 2020
56db699
Undo few unwanted changes
AshwinChafale Sep 3, 2020
daea579
Added cuda memory usage info and pid of workers in model description API
AshwinChafale Sep 7, 2020
2fcedca
Modified WorkerThread.java file
AshwinChafale Sep 7, 2020
963f26c
.
AshwinChafale Sep 7, 2020
ef892d7
.
AshwinChafale Sep 7, 2020
6fa04ef
.
AshwinChafale Sep 7, 2020
7027ae9
..
AshwinChafale Sep 7, 2020
44bcd6d
Updated WorkerThread.java file
AshwinChafale Sep 7, 2020
7b4e4f7
Show gpuUsage specific to gpu-id on which the model is loaded
AshwinChafale Sep 10, 2020
dc3e363
Deleted cuda_script.py file
AshwinChafale Sep 10, 2020
2a5e0c1
Fixed sanity failed log : Array brackets at illegal position.
AshwinChafale Sep 10, 2020
1b113aa
Fixed sanity failed log : Array brackets at illegal position.
AshwinChafale Sep 10, 2020
34072d2
Merge pull request #2 from pytorch/master
AshwinChafale Sep 10, 2020
be5246c
Updated WorkerThread.java file to handle Cannot run program nvidia-sm…
AshwinChafale Sep 11, 2020
78c8bf7
Merge branch 'issue_597' of https://github.com/AshwinChafale/serve in…
AshwinChafale Sep 11, 2020
f455983
Corrected PWD violations
AshwinChafale Sep 14, 2020
3290523
Updated WorkerThread.java file
AshwinChafale Sep 14, 2020
9bda0ec
Merge branch 'master' into issue_597
codinnvrends Sep 15, 2020
23a2a74
Merge branch 'master' into issue_597
maaquib Sep 29, 2020
5c6b55a
Update frontend/server/src/main/java/org/pytorch/serve/wlm/WorkerThre…
AshwinChafale Sep 30, 2020
a03dfe5
Updated server/src/main/java/org/pytorch/serve/wlm/WorkerThread.java
AshwinChafale Sep 30, 2020
9408b31
Merge branch 'master' into issue_597
harshbafna Oct 14, 2020
90a4900
added todo
harshbafna Oct 14, 2020
07a0331
fixed java formatting
harshbafna Oct 14, 2020
22a5c58
Merge branch 'master' into issue_597
maaquib Oct 21, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -122,13 +122,21 @@ public void setWorkers(List<Worker> workers) {
}

public void addWorker(
String id, long startTime, boolean isRunning, int gpuId, long memoryUsage) {
String id,
long startTime,
boolean isRunning,
int gpuId,
long memoryUsage,
int pid,
String gpuUsage) {
Worker worker = new Worker();
worker.setId(id);
worker.setStartTime(new Date(startTime));
worker.setStatus(isRunning ? "READY" : "UNLOADING");
worker.setGpu(gpuId >= 0);
worker.setMemoryUsage(memoryUsage);
worker.setPid(pid);
worker.setGpu(gpuId >= 0);
worker.setGpuUsage(gpuUsage);
workers.add(worker);
}

Expand All @@ -145,11 +153,29 @@ public static final class Worker {
private String id;
private Date startTime;
private String status;
private boolean gpu;
private long memoryUsage;
private int pid;
private boolean gpu;
private String gpuUsage;

public Worker() {}

public String getGpuUsage() {
return gpuUsage;
}

public void setGpuUsage(String gpuUsage) {
this.gpuUsage = gpuUsage;
}

public int getPid() {
return pid;
}

public void setPid(int pid) {
this.pid = pid;
}

public String getId() {
return id;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,9 @@ private DescribeModelResponse createModelResponse(
boolean isRunning = worker.isRunning();
int gpuId = worker.getGpuId();
long memory = worker.getMemory();
resp.addWorker(workerId, startTime, isRunning, gpuId, memory);
int pid = worker.getPid();
String gpuUsage = worker.getGpuUsage();
resp.addWorker(workerId, startTime, isRunning, gpuId, memory, pid, gpuUsage);
}

return resp;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,12 @@
import io.netty.channel.EventLoopGroup;
import io.netty.channel.SimpleChannelInboundHandler;
import io.netty.handler.codec.http.HttpResponseStatus;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.SocketAddress;
import java.nio.charset.StandardCharsets;
import java.util.UUID;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.CountDownLatch;
Expand Down Expand Up @@ -76,6 +80,59 @@ public WorkerState getState() {
return state;
}

public String getGpuUsage() {
Process process;
StringBuffer gpuUsage = new StringBuffer();
if (gpuId >= 0) {
try {
// TODO : add a generic code to capture gpu details for different devices instead of
// just NVIDIA
process =
Runtime.getRuntime()
.exec(
"nvidia-smi -i "
+ gpuId
+ " --query-gpu=utilization.gpu,utilization.memory,memory.used --format=csv");
process.waitFor();
int exitCode = process.exitValue();
if (exitCode != 0) {
gpuUsage.append("failed to obtained gpu usage");
InputStream error = process.getErrorStream();
for (int i = 0; i < error.available(); i++) {
logger.error("" + error.read());
}
return gpuUsage.toString();
}
InputStream stdout = process.getInputStream();
BufferedReader reader =
new BufferedReader(new InputStreamReader(stdout, StandardCharsets.UTF_8));
String line;
String[] headers = new String[3];
Boolean firstLine = true;
while ((line = reader.readLine()) != null) {
if (firstLine) {
headers = line.split(",");
firstLine = false;
} else {
String[] values = line.split(",");
StringBuffer sb = new StringBuffer("gpuId::" + gpuId + " ");
for (int i = 0; i < headers.length; i++) {
sb.append(headers[i] + "::" + values[i].strip());
}
gpuUsage.append(sb.toString());
}
}
} catch (Exception e) {
gpuUsage.append("failed to obtained gpu usage");
logger.error("Exception Raised : " + e.toString());
}
} else {
gpuUsage.append("N/A");
}

return gpuUsage.toString();
}

public WorkerLifeCycle getLifeCycle() {
return lifeCycle;
}
Expand Down