@@ -30,6 +30,7 @@ class ParallelSetup(NamedTuple):
3030 tp_size : int
3131 pp_size : int
3232 dcp_size : int
33+ cp_kv_cache_interleave_size : int
3334 eager_mode : bool
3435 chunked_prefill : bool
3536
@@ -52,6 +53,7 @@ def detailed(
5253 tp_base : int = 4 ,
5354 pp_base : int = 1 ,
5455 dcp_base : int = 1 ,
56+ cp_kv_cache_interleave_size : int = 1 ,
5557 multi_node_only : bool = False ,
5658 runner : RunnerOption = "auto" ,
5759 load_format : str | None = None ,
@@ -66,6 +68,7 @@ def detailed(
6668 tp_size = tp_base ,
6769 pp_size = pp_multiplier * pp_base ,
6870 dcp_size = int (dcp_multiplier * tp_base ),
71+ cp_kv_cache_interleave_size = cp_kv_cache_interleave_size ,
6972 eager_mode = eager_mode_val ,
7073 chunked_prefill = chunked_prefill_val ,
7174 )
@@ -108,6 +111,7 @@ def _compare_cp_with_tp(
108111 tp_size ,
109112 pp_size ,
110113 dcp_size ,
114+ cp_kv_cache_interleave_size ,
111115 eager_mode ,
112116 chunked_prefill ,
113117 ) = parallel_setup
@@ -180,6 +184,8 @@ def _compare_cp_with_tp(
180184 str (pp_size ),
181185 "--decode-context-parallel-size" ,
182186 str (dcp_size ),
187+ "--cp-kv-cache-interleave-size" ,
188+ str (cp_kv_cache_interleave_size ),
183189 "--distributed-executor-backend" ,
184190 distributed_backend ,
185191 ]
@@ -207,6 +213,7 @@ def _compare_cp_with_tp(
207213 "deepseek-ai/DeepSeek-V2-Lite-Chat" : [
208214 CPTestSettings .detailed (),
209215 CPTestSettings .detailed (tp_base = 2 ),
216+ CPTestSettings .detailed (tp_base = 2 , cp_kv_cache_interleave_size = 64 ),
210217 ],
211218 "bigcode/gpt_bigcode-santacoder" : [
212219 CPTestSettings .detailed (),
0 commit comments