@@ -104,7 +104,7 @@ type Configuration struct {
104104 // KVCacheTransferOverheadStdDev similar to TimeToFirstTokenStdDev
105105 KVCacheTransferTimeStdDev int `yaml:"kv-cache-transfer-time-std-dev" json:"kv-cache-transfer-time-std-dev"`
106106
107- // TimeFactorUnderLoad is a multiplicative factor that affects the overall time taken for requests when parallel
107+ // TimeFactorUnderLoad is a multiplicative factor that affects the overall time taken for requests when parallel
108108 // requests are being processed.
109109 // The value of this factor must be >= 1.0, with a default of 1.0.
110110 // - If this factor is 1.0, no extra time is added.
@@ -176,27 +176,27 @@ type Configuration struct {
176176 DPSize int `yaml:"data-parallel-size" json:"data-parallel-size"`
177177}
178178
179- func (c * Configuration ) calcLoadFactor (runReqChan * chan int64 ) float64 {
179+ func (c * Configuration ) calcLoadFactor (nRunningReqs int64 ) float64 {
180180 if c .MaxNumSeqs <= 1 {
181181 return 1.0
182182 }
183- return 1 + (c .TimeFactorUnderLoad - 1 )* float64 (len ( * runReqChan ) - 1 )/ float64 (c .MaxNumSeqs - 1 )
183+ return 1 + (c .TimeFactorUnderLoad - 1 )* float64 (nRunningReqs - 1 )/ float64 (c .MaxNumSeqs - 1 )
184184}
185185
186- func (c * Configuration ) GetTimeToFirstToken (runReqChan * chan int64 ) int {
187- return int (float64 (c .TimeToFirstToken ) * c .calcLoadFactor (runReqChan ))
186+ func (c * Configuration ) GetTimeToFirstToken (nRunningReqs int64 ) int {
187+ return int (float64 (c .TimeToFirstToken ) * c .calcLoadFactor (nRunningReqs ))
188188}
189189
190- func (c * Configuration ) GetPrefillOverhead (runReqChan * chan int64 ) int {
191- return int (float64 (c .PrefillOverhead ) * c .calcLoadFactor (runReqChan ))
190+ func (c * Configuration ) GetPrefillOverhead (nRunningReqs int64 ) int {
191+ return int (float64 (c .PrefillOverhead ) * c .calcLoadFactor (nRunningReqs ))
192192}
193193
194- func (c * Configuration ) GetPrefillTimePerToken (runReqChan * chan int64 ) int {
195- return int (float64 (c .PrefillTimePerToken ) * c .calcLoadFactor (runReqChan ))
194+ func (c * Configuration ) GetPrefillTimePerToken (nRunningReqs int64 ) int {
195+ return int (float64 (c .PrefillTimePerToken ) * c .calcLoadFactor (nRunningReqs ))
196196}
197197
198- func (c * Configuration ) GetInterTokenLatency (runReqChan * chan int64 ) int {
199- return int (float64 (c .InterTokenLatency ) * c .calcLoadFactor (runReqChan ))
198+ func (c * Configuration ) GetInterTokenLatency (nRunningReqs int64 ) int {
199+ return int (float64 (c .InterTokenLatency ) * c .calcLoadFactor (nRunningReqs ))
200200}
201201
202202type Metrics struct {
0 commit comments