We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent d7db532 commit 7f1a038Copy full SHA for 7f1a038
tpu_commons/models/jax/deepseek_v3.py
@@ -45,7 +45,7 @@ def __init__(self,
45
self.vllm_config = vllm_config
46
self.rng = nnx.Rngs(rng)
47
48
- num_layers: int = 5
+ num_layers: int = 61
49
num_local_experts: int = 256
50
51
vocab_size: int = 129280
0 commit comments