Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion components/backends/trtllm/engine_configs/decode.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ kv_cache_config:
free_gpu_memory_fraction: 0.95

cache_transceiver_config:
backend: default
backend: default
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,7 @@ cuda_graph_config:
- 128
- 256

print_iter_log: true
print_iter_log: true

cache_transceiver_config:
backend: default
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,6 @@ disable_overlap_scheduler: true
speculative_config:
decoding_type: MTP
num_nextn_predict_layers: 1

cache_transceiver_config:
backend: default
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,6 @@ cuda_graph_config:
- 256

print_iter_log: true

cache_transceiver_config:
backend: default
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,7 @@ kv_cache_config:
# config field from 'enable_overlap_scheduler' to 'disable_overlap_scheduler':
# https://github.com/NVIDIA/TensorRT-LLM/commit/b4e5df0ee0024eda3eeb83a6ba822245a30ab428
disable_overlap_scheduler: true
print_iter_log: true
print_iter_log: true

cache_transceiver_config:
backend: default
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,6 @@ cuda_graph_config:


print_iter_log: true

cache_transceiver_config:
backend: default
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,7 @@ kv_cache_config:
# config field from 'enable_overlap_scheduler' to 'disable_overlap_scheduler':
# https://github.com/NVIDIA/TensorRT-LLM/commit/b4e5df0ee0024eda3eeb83a6ba822245a30ab428
disable_overlap_scheduler: true
print_iter_log: true
print_iter_log: true

cache_transceiver_config:
backend: default
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,6 @@ cuda_graph_config:
- 256

print_iter_log: true

cache_transceiver_config:
backend: default
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,6 @@ speculative_config:
kv_cache_config:
free_gpu_memory_fraction: 0.5
enable_block_reuse: false

cache_transceiver_config:
backend: default
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,6 @@ cuda_graph_config:
max_batch_size: 256

print_iter_log: true

cache_transceiver_config:
backend: default
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,6 @@ speculative_config:
kv_cache_config:
free_gpu_memory_fraction: 0.5
enable_block_reuse: false

cache_transceiver_config:
backend: default
Loading