Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add gpu monitor at web app #50

Merged
merged 10 commits into from
Oct 8, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions models/Qwen2.5-7B-Instruct-v8-k65536-65536-woft
TITC marked this conversation as resolved.
Show resolved Hide resolved
Submodule Qwen2.5-7B-Instruct-v8-k65536-65536-woft added at ce2453
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@ torch
transformers>=4.44
safetensors
psutil
accelerate
accelerate
gradio
plotly==5.9.0
39 changes: 24 additions & 15 deletions vptq/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import os

import gradio as gr
from app_gpu import update_charts as _update_charts
TITC marked this conversation as resolved.
Show resolved Hide resolved

from vptq.app_utils import get_chat_loop_generator

Expand Down Expand Up @@ -48,21 +49,29 @@ def respond(
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
with gr.Blocks(fill_height=True) as demo:
with gr.Row():
def update_chart():
return _update_charts(chart_height=200)
gpu_chart = gr.Plot(update_chart, every=0.01) # update every 0.01 seconds

with gr.Column():
chat_interface = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)


if __name__ == "__main__":
share = os.getenv("SHARE_LINK", None) in ["1", "true", "True"]
Expand Down
182 changes: 182 additions & 0 deletions vptq/app_gpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
import re
import subprocess
from collections import deque

import gradio as gr
import plotly.graph_objs as go

# Queues for storing historical data (saving the last 20 GPU utilization and memory usage values)
gpu_util_history = deque(maxlen=20)
mem_usage_history = deque(maxlen=20)


def get_nvidia_smi_info():
result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, text=True)
return result.stdout


def parse_nvidia_smi_output(output):
gpu_info = {}
utilization = re.search(r'(\d+)%\s+Default', output)
mem_used = re.search(r'(\d+)MiB / (\d+)MiB', output)
temp = re.search(r'(\d+)C', output)
power = re.search(r'(\d+)\s*/\s*(\d+)\s*W', output)
gpu_clock = re.search(r'(\d+)MHz\s+MEM\s+(\d+)MHz', output)

if utilization:
gpu_info['gpu_util'] = int(utilization.group(1))
if mem_used:
gpu_info['mem_used'] = int(mem_used.group(1))
gpu_info['mem_total'] = int(mem_used.group(2))
gpu_info['mem_percent'] = gpu_info['mem_used'] / gpu_info['mem_total'] * 100
if temp:
gpu_info['temp'] = int(temp.group(1))
if power:
gpu_info['power_used'] = int(power.group(1))
gpu_info['power_max'] = int(power.group(2))
if gpu_clock:
gpu_info['gpu_clock'] = int(gpu_clock.group(1))
gpu_info['mem_clock'] = int(gpu_clock.group(2))

return gpu_info


def update_charts(chart_height: int = 200) -> go.Figure:
"""
Update the GPU utilization and memory usage charts.

Args:
chart_height (int, optional): used to set the height of the chart. Defaults to 200.

Returns:
plotly.graph_objs.Figure: The updated figure containing the GPU and memory usage charts.
"""
# obtain GPU information
output = get_nvidia_smi_info()
gpu_info = parse_nvidia_smi_output(output)

# records the latest GPU utilization and memory usage values
gpu_util = round(gpu_info.get('gpu_util', 0), 1)
mem_percent = round(gpu_info.get('mem_percent', 0), 1)
gpu_util_history.append(gpu_util)
mem_usage_history.append(mem_percent)

# create GPU utilization line chart
gpu_trace = go.Scatter(
y=list(gpu_util_history),
mode='lines+markers+text',
name='GPU Utilization (%)',
text=list(gpu_util_history),
textposition='top center'
)

# create memory usage line chart
mem_trace = go.Scatter(
y=list(mem_usage_history),
mode='lines+markers+text',
name='Memory Usage (%)',
text=list(mem_usage_history),
textposition='top center'
)

# set the layout of the chart
layout = go.Layout(
# title="Real-time GPU Stats",
xaxis=dict(
title=None,
showticklabels=False,
ticks=''
),
yaxis=dict(
title='Percentage (%)',
range=[-5, 110] # adjust the range of the y-axis
),
height=chart_height, # set the height of the chart
margin=dict(l=10, r=10, t=0, b=0) # set the margin of the chart
)

fig = go.Figure(data=[gpu_trace, mem_trace], layout=layout)
return fig


def mem_bar(used: float, total: float) -> str:
"""
Generates a memory usage bar.

Args:
used (float): The amount of memory used in GiB.
total (float): The total amount of memory available in GiB.
Returns:
str: A string representing the memory usage bar in HTML format.
"""
bar_length = 50
used_bars = int(bar_length * used / total)
bar = '|' * used_bars + ' ' * (bar_length - used_bars)
return f"<span style='color: green;'>MEM[{bar}{used:.3f}Gi/{total:.3f}Gi]</span>"


def refresh_gpu_data():
"""
Refreshes and returns the current GPU data in an HTML formatted string.

Returns:
str: An HTML formatted string containing the GPU information, including
GPU clock speed, memory clock speed, temperature, power usage,
GPU utilization, and memory usage.
"""

output = get_nvidia_smi_info()
gpu_info = parse_nvidia_smi_output(output)

gpu_clock = gpu_info.get('gpu_clock', 'N/A')
mem_clock = gpu_info.get('mem_clock', 'N/A')
temp = gpu_info.get('temp', 'N/A')
power_used = gpu_info.get('power_used', 'N/A')
power_max = gpu_info.get('power_max', 'N/A')
gpu_util = gpu_info.get('gpu_util', 0)
mem_used = gpu_info.get('mem_used', 0) / 1024 # MiB to GiB
mem_total = gpu_info.get('mem_total', 0) / 1024 # MiB to GiB

gpu_info_display = (
f"<div style='font-family: monospace;'>"
f"<b style='color: yellow;'>Device 0</b> "
f"[<span style='color: cyan;'>NVIDIA A100 80GB PCIe</span>] "
f"PCIe GEN 4@16x RX: <b>0.000 KiB/s</b> TX: <b>0.000 KiB/s</b><br>"
f"GPU <b>{gpu_clock}MHz</b> MEM <b>{mem_clock}MHz</b> "
f"TEMP <b style='color: orange;'>{temp}°C</b> FAN <b>N/A%</b> "
f"POW <b style='color: red;'>{power_used} / {power_max} W</b><br>"
f"GPU[<b>{gpu_util}%</b>] {mem_bar(mem_used, mem_total)}"
f"</div>"
)

return gpu_info_display


def initialize_history():
"""
Initializes the GPU utilization and memory usage history.
"""
for _ in range(20):
output = get_nvidia_smi_info()
gpu_info = parse_nvidia_smi_output(output)
gpu_util_history.append(round(gpu_info.get('gpu_util', 0), 1))
mem_usage_history.append(round(gpu_info.get('mem_percent', 0), 1))


if __name__ == "__main__":
# set the update interval of the GPU information
time_interval = 0.01
# create the GPU information display and chart
with gr.Blocks() as demo:
# Flickering issue exists, temporarily commented out
gpu_info_display = gr.HTML(refresh_gpu_data, every=time_interval, elem_id="gpu_info")
initialize_history()
gpu_chart = gr.Plot(update_charts, every=time_interval)
# avoid the up and down movement of the GPU information
demo.css = """
#gpu_info {
height: 100px;
overflow: hidden;
}
"""
demo.launch()