Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,12 @@ For more details see the <a href="https://docs.vllm.ai/en/stable/getting_started
- `tool-call-not-required-param-probability`: the probability to add a parameter, that is not required, in a tool call, optional, defaults to 50
- `object-tool-call-not-required-field-probability`: the probability to add a field, that is not required, in an object in a tool call, optional, defaults to 50
- `enable-kvcache`: if true, the KV cache support will be enabled in the simulator. In this case, the KV cache will be simulated, and ZQM events will be published when a KV cache block is added or evicted.

- `kv-cache-size`: the maximum number of token blocks in kv cache
- `block-size`: token block size for contiguous chunks of tokens, possible values: 8,16,32,64,128
- `tokenizers-cache-dir`: the directory for caching tokenizers
- `hash-seed`: seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable)
- `zmq-endpoint`: ZMQ address to publish events

In addition, as we are using klog, the following parameters are available:
- `add_dir_header`: if true, adds the file directory to the header of the log messages
- `alsologtostderr`: log to standard error as well as files (no effect when -logtostderr=true)
Expand Down
37 changes: 37 additions & 0 deletions pkg/common/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,18 @@ type Configuration struct {

// EnableKVCache defines if kv cache feature will be enabled
EnableKVCache bool `yaml:"enable-kvcache"`
// KVCacheSize is the maximum number of token blocks in kv cache, the default value is 1024
KVCacheSize int `yaml:"kv-cache-size"`

// TokenizersCacheDir is the directory for caching tokenizers
TokenizersCacheDir string `yaml:"tokenizers-cache-dir"`
// TokenBlockSize is token block size for contiguous chunks of tokens, possible values: 8,16,32,64,128, defaults to 16
TokenBlockSize int `yaml:"block-size"`
// HashSeed is the seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable)
HashSeed string `yaml:"hash-seed"`

// ZMQEndpoint is the ZMQ address to publish events, the default value is tcp://localhost:5557
ZMQEndpoint string `yaml:"zmq-endpoint"`
}

type LoraModule struct {
Expand Down Expand Up @@ -168,6 +180,9 @@ func newConfig() *Configuration {
MinToolCallArrayParamLength: 1,
ToolCallNotRequiredParamProbability: 50,
ObjectToolCallNotRequiredParamProbability: 50,
KVCacheSize: 1024,
TokenBlockSize: 16,
ZMQEndpoint: "tcp://localhost:5557",
}
}

Expand Down Expand Up @@ -269,6 +284,15 @@ func (c *Configuration) validate() error {
if c.ObjectToolCallNotRequiredParamProbability < 0 || c.ObjectToolCallNotRequiredParamProbability > 100 {
return errors.New("ObjectToolCallNotRequiredParamProbability should be between 0 and 100")
}

if c.TokenBlockSize != 8 && c.TokenBlockSize != 16 && c.TokenBlockSize != 32 &&
c.TokenBlockSize != 64 && c.TokenBlockSize != 128 {
return errors.New("token block size should be one of the following: 8, 16, 32, 64, 128")
}

if c.KVCacheSize < 0 {
return errors.New("KV cache size cannot be negative")
}
return nil
}

Expand Down Expand Up @@ -313,7 +337,13 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
f.IntVar(&config.MinToolCallArrayParamLength, "min-tool-call-array-param-length", config.MinToolCallArrayParamLength, "Minimum possible length of array parameters in a tool call")
f.IntVar(&config.ToolCallNotRequiredParamProbability, "tool-call-not-required-param-probability", config.ToolCallNotRequiredParamProbability, "Probability to add a parameter, that is not required, in a tool call")
f.IntVar(&config.ObjectToolCallNotRequiredParamProbability, "object-tool-call-not-required-field-probability", config.ObjectToolCallNotRequiredParamProbability, "Probability to add a field, that is not required, in an object in a tool call")

f.BoolVar(&config.EnableKVCache, "enable-kvcache", config.EnableKVCache, "Defines if KV cache feature is enabled")
f.IntVar(&config.KVCacheSize, "kv-cache-size", config.KVCacheSize, "Maximum number of token blocks in kv cache")
f.IntVar(&config.TokenBlockSize, "block-size", config.TokenBlockSize, "Token block size for contiguous chunks of tokens, possible values: 8,16,32,64,128")
f.StringVar(&config.TokenizersCacheDir, "tokenizers-cache-dir", config.TokenizersCacheDir, "Directory for caching tokenizers")
f.StringVar(&config.HashSeed, "hash-seed", config.HashSeed, "Seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable)")
f.StringVar(&config.ZMQEndpoint, "zmq-endpoint", config.ZMQEndpoint, "ZMQ address to publish events")

// These values were manually parsed above in getParamValueFromArgs, we leave this in order to get these flags in --help
var dummyString string
Expand Down Expand Up @@ -348,6 +378,13 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
config.ServedModelNames = servedModelNames
}

if config.HashSeed == "" {
hashSeed := os.Getenv("PYTHONHASHSEED")
if hashSeed != "" {
config.HashSeed = hashSeed
}
}

if err := config.validate(); err != nil {
return nil, err
}
Expand Down
10 changes: 10 additions & 0 deletions pkg/common/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,16 @@ var _ = Describe("Simulator configuration", func() {
args: []string{"cmd", "--kv-cache-transfer-latency-std-dev", "-35",
"--config", "../../manifests/config.yaml"},
},
{
name: "invalid (negative) kv-cache-size",
args: []string{"cmd", "--kv-cache-size", "-35",
"--config", "../../manifests/config.yaml"},
},
{
name: "invalid block-size",
args: []string{"cmd", "--block-size", "35",
"--config", "../../manifests/config.yaml"},
},
}

for _, test := range invalidTests {
Expand Down
18 changes: 10 additions & 8 deletions pkg/kv-cache/kv_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,29 +21,31 @@ import (
"fmt"

"github.com/go-logr/logr"
"github.com/llm-d/llm-d-inference-sim/pkg/common"
openaiserverapi "github.com/llm-d/llm-d-inference-sim/pkg/openai-server-api"
"github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache/kvblock"
"github.com/llm-d/llm-d-kv-cache-manager/pkg/tokenization"
)

const (
// TODO move it to configuration
maxBlocks = 100
)

type KVCacheHelper struct {
tokenizer tokenization.Tokenizer
tokensProcessor kvblock.TokenProcessor // turns tokens to kv block keys
logger logr.Logger
blockCache *blockCache
}

func NewKVCacheHelper(logger logr.Logger) (*KVCacheHelper, error) {
// TODO update config by command line params
func NewKVCacheHelper(config *common.Configuration, logger logr.Logger) (*KVCacheHelper, error) {
tokenProcConfig := kvblock.DefaultTokenProcessorConfig()
tokenProcConfig.BlockSize = config.TokenBlockSize
if config.HashSeed != "" {
tokenProcConfig.HashSeed = config.HashSeed
}
tokensProcessor := kvblock.NewChunkedTokenDatabase(tokenProcConfig)

tokenizationConfig := tokenization.DefaultConfig()
if config.TokenizersCacheDir != "" {
tokenizationConfig.TokenizersCacheDir = config.TokenizersCacheDir
}
tokenizer, err := tokenization.NewCachedHFTokenizer(tokenizationConfig.HFTokenizerConfig)

if err != nil {
Expand All @@ -53,7 +55,7 @@ func NewKVCacheHelper(logger logr.Logger) (*KVCacheHelper, error) {
return &KVCacheHelper{
tokenizer: tokenizer,
tokensProcessor: tokensProcessor,
blockCache: newBlockCache(maxBlocks, logger),
blockCache: newBlockCache(config.KVCacheSize, logger),
logger: logger,
}, nil
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/llm-d-inference-sim/simulator.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ func (s *VllmSimulator) Start(ctx context.Context) error {
}

if s.config.EnableKVCache {
s.kvcacheHelper, err = kvcache.NewKVCacheHelper(s.logger)
s.kvcacheHelper, err = kvcache.NewKVCacheHelper(s.config, s.logger)
if err != nil {
return err
}
Expand Down