pp notes

Chenyaaang · Chenyaaang · commit 96f523d3bbdc · 2025-10-03T17:46:08.000Z
Signed-off-by: Chenyaaang &lt;chenyangli@google.com&gt;
diff --git a/tpu_commons/executors/ray_distributed_executor.py b/tpu_commons/executors/ray_distributed_executor.py
@@ -120,6 +120,8 @@ def _initialize_ray_cluster(self) -> None:
                 f"current platform {current_platform.device_name} does not "
                 "support ray.")
 
+        # each node (host) serves as a unit, if 2 hosts, ray only knows 2 hosts
+        # ray doesn't divide the TPUs inside each host.
         placement_group_specs: List[Dict[str, float]] = [{
             device_str:
             node['Resources'][device_str]
@@ -155,6 +157,8 @@ def _initialize_ray_cluster(self) -> None:
 
     def _init_workers_ray(self, placement_group: "PlacementGroup",
                           **ray_remote_kwargs):
+        # placementgroup: "need 2 hosts, 4 chips on each"
+        # bundle: workers on same host is a bundle.
         # The workers are the actual ray actors.
         self.workers: List[RayWorkerWrapper] = []
 
@@ -190,6 +194,9 @@ def _init_workers_ray(self, placement_group: "PlacementGroup",
         driver_ip = get_ip()
         num_tpu_per_worker = placement_group.bundle_specs[0].get(
             current_platform.ray_device_key, 0)
+        
+        # create a worker per bundle, a bundle is a dict, in my example
+        # jax has 4 chips inside each bundle (v7x-4).
         for rank, bundle_id in enumerate(bundle_indices):
             scheduling_strategy = PlacementGroupSchedulingStrategy(
                 placement_group=placement_group,
diff --git a/tpu_commons/models/jax/model_loader.py b/tpu_commons/models/jax/model_loader.py
@@ -285,7 +285,7 @@ def get_vllm_model(
         rng=rng,
         mesh=mesh,
     )
-    params, lora_manager = model.load_weights()
+    params, lora_manager = model.load_weights() #jax
 
     jit_model = model.jit_step_func()
     compute_logits_fn = model.jit_compute_logits_func()
diff --git a/tpu_commons/models/vllm/vllm_model_wrapper.py b/tpu_commons/models/vllm/vllm_model_wrapper.py
@@ -67,7 +67,7 @@ def compute_hidden_state(
         inputs_embeds: Optional[torch.Tensor],
     ) -> torch.Tensor:
         hidden_state = self.vllm_model(input_ids, positions,
-                                       intermediate_tensors, inputs_embeds)
+                                       intermediate_tensors, inputs_embeds) # 这里的output可能是hidden state 也可能是intermediate tensor, 只是因为jax没有pp, 所以都是hidden state了
         return hidden_state
 
     def compute_logits(self, hidden_state: torch.Tensor) -> torch.Tensor:
@@ -136,7 +136,7 @@ def load_weights(self):
         # Returning to the jax land, so we need to wrap it into a JaxValue.
         return jax_view(params_and_buffers), lora_manager
 
-    def jit_step_func(self):
+    def jit_step_func(self):        # should also takes in intermediate_tensor.
 
         @functools.partial(
             jax.jit,
@@ -175,9 +175,9 @@ def step_fun(
                     self.model,
                     torch_view(params_and_buffers),
                     kwargs={
-                        "input_ids": torch_view(input_ids),
-                        "positions": torch_view(attn_metadata.input_positions),
-                        "intermediate_tensors": None,
+                        "input_ids": torch_view(input_ids), # torch_view(jax.array) -> torchax.tensor
+                        "positions": torch_view(attn_metadata.input_positions), # torch_view(jax.array) -> torchax.tensor
+                        "intermediate_tensors": None,   # 应该用这个
                         "inputs_embeds": None,
                     },
                     tie_weights=False,

Original file line number	Diff line number	Diff line change
`@@ -285,7 +285,7 @@ def get_vllm_model(`
`285`	`285`	`rng=rng,`
`286`	`286`	`mesh=mesh,`
`287`	`287`	`)`
`288`		`- params, lora_manager = model.load_weights()`
	`288`	`+ params, lora_manager = model.load_weights() #jax`
`289`	`289`
`290`	`290`	`jit_model = model.jit_step_func()`
`291`	`291`	`compute_logits_fn = model.jit_compute_logits_func()`