THUDM · gxlvera · Jan 20, 2026
diff --git a/examples/geo3k_vlm_multi_turn/README.md b/examples/geo3k_vlm_multi_turn/README.md
@@ -30,7 +30,7 @@ The reward model is the default math RM.
 export WANDB_API_KEY=...
 export SLIME_SCRIPT_MODEL_NAME=Qwen3-VL-2B-Instruct
 export SLIME_SCRIPT_NUM_GPUS=4
-export SLIME_SCRIPT_TRAIN_BACKEND=fsdp
+export SLIME_SCRIPT_TRAIN_BACKEND=megatron
 
 # 2) Download the dataset
 hf download --repo-type dataset VeraIsHere/geo3k_imgurl_processed --local-dir /root/datasets/geo3k_imgurl_processed

diff --git a/examples/geo3k_vlm_multi_turn/geo3k_vlm_multi_turn_reward.png b/examples/geo3k_vlm_multi_turn/geo3k_vlm_multi_turn_reward.png
diff --git a/examples/geo3k_vlm_multi_turn/rollout_experiment_result_megatron.png b/examples/geo3k_vlm_multi_turn/rollout_experiment_result_megatron.png
diff --git a/examples/geo3k_vlm_multi_turn/run_geo3k_vlm_multi_turn.py b/examples/geo3k_vlm_multi_turn/run_geo3k_vlm_multi_turn.py
@@ -15,8 +15,8 @@
 
 NUM_GPUS = int(os.environ.get("SLIME_SCRIPT_NUM_GPUS", "4"))
 EXTERNAL_RAY = int(os.environ.get("SLIME_SCRIPT_EXTERNAL_RAY", "0"))
-TRAIN_BACKEND = os.environ.get("SLIME_SCRIPT_TRAIN_BACKEND", "fsdp").lower()
-assert TRAIN_BACKEND in {"fsdp", "megatron"}
+TRAIN_BACKEND = os.environ.get("SLIME_SCRIPT_TRAIN_BACKEND", "megatron").lower()
+assert TRAIN_BACKEND in {"megatron"}
 
 DATASET_NAME = "VeraIsHere/geo3k_imgurl_processed"
 DATA_ROOT = "/root/datasets/geo3k_imgurl_processed"
@@ -104,13 +104,6 @@ def execute():
         f"--sglang-cuda-graph-bs {' '.join(map(str, [1, 2, 4, 8] + list(range(16, 257, 8))))} "
     )
 
-    fsdp_args = (
-        "--train-backend fsdp "
-        "--gradient-checkpointing "
-        "--sglang-attention-backend fa3 "
-        "--attn-implementation flash_attention_3 "
-        "--update-weight-buffer-size 536870912 "
-    )
 
     megatron_args = (
         "--train-backend megatron "
@@ -142,9 +135,6 @@ def execute():
         backend_args = megatron_args
         megatron_model_type = get_megatron_model_type(MODEL_NAME)
         os.environ["MODEL_ARGS_ROTARY_BASE"] = "5000000"
-    else:
-        backend_args = fsdp_args
-        megatron_model_type = None
 
     train_args = (
         f"{ckpt_args} "