Skip to content

Commit 25bde53

Browse files
irar2smarunich
authored andcommitted
KV cache and tokenization related configuration (llm-d#125)
Signed-off-by: Ira <IRAR@il.ibm.com> Signed-off-by: Sergey Marunich <marunich.s@gmail.com>
1 parent 0308c8f commit 25bde53

File tree

5 files changed

+64
-10
lines changed

5 files changed

+64
-10
lines changed

README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,12 @@ For more details see the <a href="https://docs.vllm.ai/en/stable/getting_started
117117
- `tool-call-not-required-param-probability`: the probability to add a parameter, that is not required, in a tool call, optional, defaults to 50
118118
- `object-tool-call-not-required-field-probability`: the probability to add a field, that is not required, in an object in a tool call, optional, defaults to 50
119119
- `enable-kvcache`: if true, the KV cache support will be enabled in the simulator. In this case, the KV cache will be simulated, and ZQM events will be published when a KV cache block is added or evicted.
120-
120+
- `kv-cache-size`: the maximum number of token blocks in kv cache
121+
- `block-size`: token block size for contiguous chunks of tokens, possible values: 8,16,32,64,128
122+
- `tokenizers-cache-dir`: the directory for caching tokenizers
123+
- `hash-seed`: seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable)
124+
- `zmq-endpoint`: ZMQ address to publish events
125+
121126
In addition, as we are using klog, the following parameters are available:
122127
- `add_dir_header`: if true, adds the file directory to the header of the log messages
123128
- `alsologtostderr`: log to standard error as well as files (no effect when -logtostderr=true)

pkg/common/config.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,18 @@ type Configuration struct {
113113

114114
// EnableKVCache defines if kv cache feature will be enabled
115115
EnableKVCache bool `yaml:"enable-kvcache"`
116+
// KVCacheSize is the maximum number of token blocks in kv cache, the default value is 1024
117+
KVCacheSize int `yaml:"kv-cache-size"`
118+
119+
// TokenizersCacheDir is the directory for caching tokenizers
120+
TokenizersCacheDir string `yaml:"tokenizers-cache-dir"`
121+
// TokenBlockSize is token block size for contiguous chunks of tokens, possible values: 8,16,32,64,128, defaults to 16
122+
TokenBlockSize int `yaml:"block-size"`
123+
// HashSeed is the seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable)
124+
HashSeed string `yaml:"hash-seed"`
125+
126+
// ZMQEndpoint is the ZMQ address to publish events, the default value is tcp://localhost:5557
127+
ZMQEndpoint string `yaml:"zmq-endpoint"`
116128
}
117129

118130
type LoraModule struct {
@@ -168,6 +180,9 @@ func newConfig() *Configuration {
168180
MinToolCallArrayParamLength: 1,
169181
ToolCallNotRequiredParamProbability: 50,
170182
ObjectToolCallNotRequiredParamProbability: 50,
183+
KVCacheSize: 1024,
184+
TokenBlockSize: 16,
185+
ZMQEndpoint: "tcp://localhost:5557",
171186
}
172187
}
173188

@@ -269,6 +284,15 @@ func (c *Configuration) validate() error {
269284
if c.ObjectToolCallNotRequiredParamProbability < 0 || c.ObjectToolCallNotRequiredParamProbability > 100 {
270285
return errors.New("ObjectToolCallNotRequiredParamProbability should be between 0 and 100")
271286
}
287+
288+
if c.TokenBlockSize != 8 && c.TokenBlockSize != 16 && c.TokenBlockSize != 32 &&
289+
c.TokenBlockSize != 64 && c.TokenBlockSize != 128 {
290+
return errors.New("token block size should be one of the following: 8, 16, 32, 64, 128")
291+
}
292+
293+
if c.KVCacheSize < 0 {
294+
return errors.New("KV cache size cannot be negative")
295+
}
272296
return nil
273297
}
274298

@@ -313,7 +337,13 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
313337
f.IntVar(&config.MinToolCallArrayParamLength, "min-tool-call-array-param-length", config.MinToolCallArrayParamLength, "Minimum possible length of array parameters in a tool call")
314338
f.IntVar(&config.ToolCallNotRequiredParamProbability, "tool-call-not-required-param-probability", config.ToolCallNotRequiredParamProbability, "Probability to add a parameter, that is not required, in a tool call")
315339
f.IntVar(&config.ObjectToolCallNotRequiredParamProbability, "object-tool-call-not-required-field-probability", config.ObjectToolCallNotRequiredParamProbability, "Probability to add a field, that is not required, in an object in a tool call")
340+
316341
f.BoolVar(&config.EnableKVCache, "enable-kvcache", config.EnableKVCache, "Defines if KV cache feature is enabled")
342+
f.IntVar(&config.KVCacheSize, "kv-cache-size", config.KVCacheSize, "Maximum number of token blocks in kv cache")
343+
f.IntVar(&config.TokenBlockSize, "block-size", config.TokenBlockSize, "Token block size for contiguous chunks of tokens, possible values: 8,16,32,64,128")
344+
f.StringVar(&config.TokenizersCacheDir, "tokenizers-cache-dir", config.TokenizersCacheDir, "Directory for caching tokenizers")
345+
f.StringVar(&config.HashSeed, "hash-seed", config.HashSeed, "Seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable)")
346+
f.StringVar(&config.ZMQEndpoint, "zmq-endpoint", config.ZMQEndpoint, "ZMQ address to publish events")
317347

318348
// These values were manually parsed above in getParamValueFromArgs, we leave this in order to get these flags in --help
319349
var dummyString string
@@ -348,6 +378,13 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
348378
config.ServedModelNames = servedModelNames
349379
}
350380

381+
if config.HashSeed == "" {
382+
hashSeed := os.Getenv("PYTHONHASHSEED")
383+
if hashSeed != "" {
384+
config.HashSeed = hashSeed
385+
}
386+
}
387+
351388
if err := config.validate(); err != nil {
352389
return nil, err
353390
}

pkg/common/config_test.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,16 @@ var _ = Describe("Simulator configuration", func() {
281281
args: []string{"cmd", "--kv-cache-transfer-latency-std-dev", "-35",
282282
"--config", "../../manifests/config.yaml"},
283283
},
284+
{
285+
name: "invalid (negative) kv-cache-size",
286+
args: []string{"cmd", "--kv-cache-size", "-35",
287+
"--config", "../../manifests/config.yaml"},
288+
},
289+
{
290+
name: "invalid block-size",
291+
args: []string{"cmd", "--block-size", "35",
292+
"--config", "../../manifests/config.yaml"},
293+
},
284294
}
285295

286296
for _, test := range invalidTests {

pkg/kv-cache/kv_cache.go

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,29 +21,31 @@ import (
2121
"fmt"
2222

2323
"github.com/go-logr/logr"
24+
"github.com/llm-d/llm-d-inference-sim/pkg/common"
2425
openaiserverapi "github.com/llm-d/llm-d-inference-sim/pkg/openai-server-api"
2526
"github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache/kvblock"
2627
"github.com/llm-d/llm-d-kv-cache-manager/pkg/tokenization"
2728
)
2829

29-
const (
30-
// TODO move it to configuration
31-
maxBlocks = 100
32-
)
33-
3430
type KVCacheHelper struct {
3531
tokenizer tokenization.Tokenizer
3632
tokensProcessor kvblock.TokenProcessor // turns tokens to kv block keys
3733
logger logr.Logger
3834
blockCache *blockCache
3935
}
4036

41-
func NewKVCacheHelper(logger logr.Logger) (*KVCacheHelper, error) {
42-
// TODO update config by command line params
37+
func NewKVCacheHelper(config *common.Configuration, logger logr.Logger) (*KVCacheHelper, error) {
4338
tokenProcConfig := kvblock.DefaultTokenProcessorConfig()
39+
tokenProcConfig.BlockSize = config.TokenBlockSize
40+
if config.HashSeed != "" {
41+
tokenProcConfig.HashSeed = config.HashSeed
42+
}
4443
tokensProcessor := kvblock.NewChunkedTokenDatabase(tokenProcConfig)
4544

4645
tokenizationConfig := tokenization.DefaultConfig()
46+
if config.TokenizersCacheDir != "" {
47+
tokenizationConfig.TokenizersCacheDir = config.TokenizersCacheDir
48+
}
4749
tokenizer, err := tokenization.NewCachedHFTokenizer(tokenizationConfig.HFTokenizerConfig)
4850

4951
if err != nil {
@@ -53,7 +55,7 @@ func NewKVCacheHelper(logger logr.Logger) (*KVCacheHelper, error) {
5355
return &KVCacheHelper{
5456
tokenizer: tokenizer,
5557
tokensProcessor: tokensProcessor,
56-
blockCache: newBlockCache(maxBlocks, logger),
58+
blockCache: newBlockCache(config.KVCacheSize, logger),
5759
logger: logger,
5860
}, nil
5961
}

pkg/llm-d-inference-sim/simulator.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ func (s *VllmSimulator) Start(ctx context.Context) error {
118118
}
119119

120120
if s.config.EnableKVCache {
121-
s.kvcacheHelper, err = kvcache.NewKVCacheHelper(s.logger)
121+
s.kvcacheHelper, err = kvcache.NewKVCacheHelper(s.config, s.logger)
122122
if err != nil {
123123
return err
124124
}

0 commit comments

Comments
 (0)