Skip to content

Commit db10853

Browse files
committed
removed - file
Signed-off-by: Oviya Seeniraj <oseeniraj@nvidia.com>
1 parent 96f8008 commit db10853

File tree

1 file changed

+62
-10
lines changed

1 file changed

+62
-10
lines changed

tests/fault_tolerance/hardware/fault_injection_service/helpers/inference_testing.py

Lines changed: 62 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def __init__(self, endpoint: str, model_name: str, timeout: int = 30):
6464
self.thread: Optional[threading.Thread] = None
6565
self.results: List[Dict] = []
6666
self.lock = threading.Lock()
67+
self.checkpoint_index = 0 # Track checkpoint for per-phase stats
6768

6869
def send_inference_request(self, prompt: str = "Hello, world!") -> Dict:
6970
"""
@@ -152,36 +153,87 @@ def stop(self) -> List[Dict]:
152153
with self.lock:
153154
return self.results.copy()
154155

155-
def get_stats(self) -> Dict:
156+
def checkpoint(self):
157+
"""Mark current point for per-phase stats. Call before each test phase."""
158+
with self.lock:
159+
self.checkpoint_index = len(self.results)
160+
161+
def get_stats(self, since_checkpoint: bool = False) -> Dict:
156162
"""
157-
Get statistics for current results.
163+
Get statistics for results including latency percentiles.
164+
165+
Args:
166+
since_checkpoint: If True, only return stats since last checkpoint.
167+
If False, return cumulative stats (default).
158168
159169
Returns:
160-
Dict with keys: total, success, failed, success_rate, avg_latency, errors
170+
Dict with keys: total, success, failed, success_rate,
171+
avg_latency, p50_latency, p95_latency, p99_latency,
172+
min_latency, max_latency, errors
161173
"""
162174
with self.lock:
163-
if not self.results:
175+
# Get results based on whether we want per-phase or cumulative
176+
if since_checkpoint:
177+
results = self.results[self.checkpoint_index :]
178+
else:
179+
results = self.results
180+
181+
if not results:
164182
return {
165183
"total": 0,
166184
"success": 0,
167185
"failed": 0,
168186
"success_rate": 0.0,
169187
"avg_latency": 0.0,
188+
"p50_latency": 0.0,
189+
"p95_latency": 0.0,
190+
"p99_latency": 0.0,
191+
"min_latency": 0.0,
192+
"max_latency": 0.0,
170193
"errors": [],
171194
}
172195

173-
total = len(self.results)
174-
success = sum(1 for r in self.results if r["success"])
196+
total = len(results)
197+
success = sum(1 for r in results if r["success"])
175198
failed = total - success
176-
avg_latency = sum(r["latency"] for r in self.results if r["success"]) / max(
177-
success, 1
178-
)
199+
200+
# Calculate latency stats for successful requests only
201+
success_latencies = sorted([r["latency"] for r in results if r["success"]])
202+
203+
if success_latencies:
204+
avg_latency = sum(success_latencies) / len(success_latencies)
205+
min_latency = min(success_latencies)
206+
max_latency = max(success_latencies)
207+
208+
# Calculate percentiles
209+
def percentile(data, p):
210+
"""Calculate percentile (0-100)"""
211+
if not data:
212+
return 0.0
213+
k = (len(data) - 1) * (p / 100.0)
214+
f = int(k)
215+
c = f + 1 if (f + 1) < len(data) else f
216+
if f == c:
217+
return data[f]
218+
return data[f] * (c - k) + data[c] * (k - f)
219+
220+
p50 = percentile(success_latencies, 50)
221+
p95 = percentile(success_latencies, 95)
222+
p99 = percentile(success_latencies, 99)
223+
else:
224+
avg_latency = min_latency = max_latency = 0.0
225+
p50 = p95 = p99 = 0.0
179226

180227
return {
181228
"total": total,
182229
"success": success,
183230
"failed": failed,
184231
"success_rate": (success / total) * 100,
185232
"avg_latency": avg_latency,
186-
"errors": [r["error"] for r in self.results if r["error"]][:5],
233+
"p50_latency": p50,
234+
"p95_latency": p95,
235+
"p99_latency": p99,
236+
"min_latency": min_latency,
237+
"max_latency": max_latency,
238+
"errors": [r["error"] for r in results if r["error"]][:5],
187239
}

0 commit comments

Comments
 (0)