Skip to content

Commit d537378

Browse files
authored
fix: Update disagg configs for trtllm 1.0.0rc4 changes (main) (#2278) (#2282)
1 parent 4b8a748 commit d537378

File tree

12 files changed

+42
-12
lines changed

12 files changed

+42
-12
lines changed

components/backends/trtllm/engine_configs/decode.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,4 @@ kv_cache_config:
2828
free_gpu_memory_fraction: 0.95
2929

3030
cache_transceiver_config:
31-
backend: default
31+
backend: default

components/backends/trtllm/engine_configs/deepseek_r1/mtp/mtp_decode.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,4 +51,7 @@ cuda_graph_config:
5151
- 128
5252
- 256
5353

54-
print_iter_log: true
54+
print_iter_log: true
55+
56+
cache_transceiver_config:
57+
backend: default

components/backends/trtllm/engine_configs/deepseek_r1/mtp/mtp_prefill.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,6 @@ disable_overlap_scheduler: true
3636
speculative_config:
3737
decoding_type: MTP
3838
num_nextn_predict_layers: 1
39+
40+
cache_transceiver_config:
41+
backend: default

components/backends/trtllm/engine_configs/deepseek_r1/simple/decode.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,6 @@ cuda_graph_config:
5555
- 256
5656

5757
print_iter_log: true
58+
59+
cache_transceiver_config:
60+
backend: default

components/backends/trtllm/engine_configs/deepseek_r1/simple/prefill.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,7 @@ kv_cache_config:
3333
# config field from 'enable_overlap_scheduler' to 'disable_overlap_scheduler':
3434
# https://github.com/NVIDIA/TensorRT-LLM/commit/b4e5df0ee0024eda3eeb83a6ba822245a30ab428
3535
disable_overlap_scheduler: true
36-
print_iter_log: true
36+
print_iter_log: true
37+
38+
cache_transceiver_config:
39+
backend: default

components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/wide_ep_decode.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,6 @@ cuda_graph_config:
6161

6262

6363
print_iter_log: true
64+
65+
cache_transceiver_config:
66+
backend: default

components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/wide_ep_prefill.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,7 @@ kv_cache_config:
3838
# config field from 'enable_overlap_scheduler' to 'disable_overlap_scheduler':
3939
# https://github.com/NVIDIA/TensorRT-LLM/commit/b4e5df0ee0024eda3eeb83a6ba822245a30ab428
4040
disable_overlap_scheduler: true
41-
print_iter_log: true
41+
print_iter_log: true
42+
43+
cache_transceiver_config:
44+
backend: default

components/backends/trtllm/engine_configs/llama4/eagle/eagle_decode.yaml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,13 @@ max_num_tokens: 512
2121
# 8704 = 8192 ISL + 512 OSL
2222
max_seq_len: 8704
2323
disable_overlap_scheduler: true
24-
autotuner_enabled: false
24+
enable_autotuner: false
2525

2626
# Enable Speculative Decoding in the model engine
2727
speculative_config:
2828
decoding_type: Eagle
2929
max_draft_len: 1
30-
pytorch_weights_path: nvidia/Llama-4-Maverick-17B-128E-Eagle3
30+
speculative_model_dir: nvidia/Llama-4-Maverick-17B-128E-Eagle3
3131
eagle3_one_model: false
3232

3333
kv_cache_config:
@@ -49,3 +49,6 @@ cuda_graph_config:
4949
- 256
5050

5151
print_iter_log: true
52+
53+
cache_transceiver_config:
54+
backend: default

components/backends/trtllm/engine_configs/llama4/eagle/eagle_prefill.yaml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,20 @@ max_batch_size: 1
2020
max_num_tokens: 8192
2121
max_seq_len: 8192
2222
print_iter_log: true
23-
kv_cache_dtype: fp8
2423
disable_overlap_scheduler: true
25-
autotuner_enabled: false
24+
enable_autotuner: false
2625

2726
# Enable Speculative Decoding in the model engine
2827
speculative_config:
2928
decoding_type: Eagle
3029
max_draft_len: 1
31-
pytorch_weights_path: nvidia/Llama-4-Maverick-17B-128E-Eagle3
30+
speculative_model_dir: nvidia/Llama-4-Maverick-17B-128E-Eagle3
3231
eagle3_one_model: false
3332

3433
kv_cache_config:
3534
free_gpu_memory_fraction: 0.5
3635
enable_block_reuse: false
36+
dtype: fp8
37+
38+
cache_transceiver_config:
39+
backend: default

components/backends/trtllm/engine_configs/llama4/eagle_one_model/eagle_agg.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ disable_overlap_scheduler: true # disable_overlap_scheduler is having acc issue
2424
speculative_config:
2525
decoding_type: Eagle
2626
max_draft_len: 3
27-
pytorch_weights_path: nvidia/Llama-4-Maverick-17B-128E-Eagle3
27+
speculative_model_dir: nvidia/Llama-4-Maverick-17B-128E-Eagle3
2828
eagle3_one_model: true
2929

3030
kv_cache_config:

0 commit comments

Comments
 (0)