Skip to content

Commit c920cbd

Browse files
feat: Add --custom-jinja-template argument to pass a custom chat template for vLLM (#2829)
Signed-off-by: Krishnan Prashanth <kprashanth@nvidia.com>
1 parent dea5f88 commit c920cbd

File tree

13 files changed

+411
-21
lines changed

13 files changed

+411
-21
lines changed

components/backends/vllm/src/dynamo/vllm/args.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ class Config:
4747
migration_limit: int = 0
4848
kv_port: Optional[int] = None
4949
port_range: DynamoPortRange
50+
custom_jinja_template: Optional[str] = None
5051

5152
# mirror vLLM
5253
model: str
@@ -100,7 +101,7 @@ def parse_args() -> Config:
100101
help="List of connectors to use in order (e.g., --connector nixl lmcache). "
101102
"Options: nixl, lmcache, kvbm, null, none. Default: nixl. Order will be preserved in MultiConnector.",
102103
)
103-
# To avoid name conflicts with different backends, adoped prefix "dyn-" for dynamo specific args
104+
# To avoid name conflicts with different backends, adopted prefix "dyn-" for dynamo specific args
104105
parser.add_argument(
105106
"--dyn-tool-call-parser",
106107
type=str,
@@ -115,6 +116,12 @@ def parse_args() -> Config:
115116
choices=get_reasoning_parser_names(),
116117
help="Reasoning parser name for the model.",
117118
)
119+
parser.add_argument(
120+
"--custom-jinja-template",
121+
type=str,
122+
default=None,
123+
help="Path to a custom Jinja template file to override the model's default chat template. This template will take precedence over any template found in the model repository.",
124+
)
118125

119126
parser = AsyncEngineArgs.add_cli_args(parser)
120127
args = parser.parse_args()
@@ -148,6 +155,7 @@ def parse_args() -> Config:
148155
)
149156
config.tool_call_parser = args.dyn_tool_call_parser
150157
config.reasoning_parser = args.dyn_reasoning_parser
158+
config.custom_jinja_template = args.custom_jinja_template
151159
# Check for conflicting flags
152160
has_kv_transfer_config = (
153161
hasattr(engine_args, "kv_transfer_config")

components/backends/vllm/src/dynamo/vllm/main.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ async def init(runtime: DistributedRuntime, config: Config):
258258
kv_cache_block_size=config.engine_args.block_size,
259259
migration_limit=config.migration_limit,
260260
runtime_config=runtime_config,
261+
custom_template_path=config.custom_jinja_template,
261262
)
262263

263264
try:

0 commit comments

Comments
 (0)