@@ -64,6 +64,7 @@ def __init__(self, endpoint: str, model_name: str, timeout: int = 30):
6464 self .thread : Optional [threading .Thread ] = None
6565 self .results : List [Dict ] = []
6666 self .lock = threading .Lock ()
67+ self .checkpoint_index = 0 # Track checkpoint for per-phase stats
6768
6869 def send_inference_request (self , prompt : str = "Hello, world!" ) -> Dict :
6970 """
@@ -152,36 +153,87 @@ def stop(self) -> List[Dict]:
152153 with self .lock :
153154 return self .results .copy ()
154155
155- def get_stats (self ) -> Dict :
156+ def checkpoint (self ):
157+ """Mark current point for per-phase stats. Call before each test phase."""
158+ with self .lock :
159+ self .checkpoint_index = len (self .results )
160+
161+ def get_stats (self , since_checkpoint : bool = False ) -> Dict :
156162 """
157- Get statistics for current results.
163+ Get statistics for results including latency percentiles.
164+
165+ Args:
166+ since_checkpoint: If True, only return stats since last checkpoint.
167+ If False, return cumulative stats (default).
158168
159169 Returns:
160- Dict with keys: total, success, failed, success_rate, avg_latency, errors
170+ Dict with keys: total, success, failed, success_rate,
171+ avg_latency, p50_latency, p95_latency, p99_latency,
172+ min_latency, max_latency, errors
161173 """
162174 with self .lock :
163- if not self .results :
175+ # Get results based on whether we want per-phase or cumulative
176+ if since_checkpoint :
177+ results = self .results [self .checkpoint_index :]
178+ else :
179+ results = self .results
180+
181+ if not results :
164182 return {
165183 "total" : 0 ,
166184 "success" : 0 ,
167185 "failed" : 0 ,
168186 "success_rate" : 0.0 ,
169187 "avg_latency" : 0.0 ,
188+ "p50_latency" : 0.0 ,
189+ "p95_latency" : 0.0 ,
190+ "p99_latency" : 0.0 ,
191+ "min_latency" : 0.0 ,
192+ "max_latency" : 0.0 ,
170193 "errors" : [],
171194 }
172195
173- total = len (self . results )
174- success = sum (1 for r in self . results if r ["success" ])
196+ total = len (results )
197+ success = sum (1 for r in results if r ["success" ])
175198 failed = total - success
176- avg_latency = sum (r ["latency" ] for r in self .results if r ["success" ]) / max (
177- success , 1
178- )
199+
200+ # Calculate latency stats for successful requests only
201+ success_latencies = sorted ([r ["latency" ] for r in results if r ["success" ]])
202+
203+ if success_latencies :
204+ avg_latency = sum (success_latencies ) / len (success_latencies )
205+ min_latency = min (success_latencies )
206+ max_latency = max (success_latencies )
207+
208+ # Calculate percentiles
209+ def percentile (data , p ):
210+ """Calculate percentile (0-100)"""
211+ if not data :
212+ return 0.0
213+ k = (len (data ) - 1 ) * (p / 100.0 )
214+ f = int (k )
215+ c = f + 1 if (f + 1 ) < len (data ) else f
216+ if f == c :
217+ return data [f ]
218+ return data [f ] * (c - k ) + data [c ] * (k - f )
219+
220+ p50 = percentile (success_latencies , 50 )
221+ p95 = percentile (success_latencies , 95 )
222+ p99 = percentile (success_latencies , 99 )
223+ else :
224+ avg_latency = min_latency = max_latency = 0.0
225+ p50 = p95 = p99 = 0.0
179226
180227 return {
181228 "total" : total ,
182229 "success" : success ,
183230 "failed" : failed ,
184231 "success_rate" : (success / total ) * 100 ,
185232 "avg_latency" : avg_latency ,
186- "errors" : [r ["error" ] for r in self .results if r ["error" ]][:5 ],
233+ "p50_latency" : p50 ,
234+ "p95_latency" : p95 ,
235+ "p99_latency" : p99 ,
236+ "min_latency" : min_latency ,
237+ "max_latency" : max_latency ,
238+ "errors" : [r ["error" ] for r in results if r ["error" ]][:5 ],
187239 }
0 commit comments