diff --git a/examples/rec_gsm8k/README.md b/examples/rec_gsm8k/README.md
index c0d9376db1..b3a5673463 100644
--- a/examples/rec_gsm8k/README.md
+++ b/examples/rec_gsm8k/README.md
@@ -1,41 +1,40 @@
-# Example: REC on GSM8k dataset
+# Example: group-relative REINFORCE variants on GSM8k dataset
 
-This example shows the usage of REC on the [GSM8k dataset](https://huggingface.co/datasets/openai/gsm8k).
+This example shows the usage of group-relative REINFORCE variants on the [GSM8k dataset](https://huggingface.co/datasets/openai/gsm8k).
 
-For more detailed information, please refer to the [documentation](../../docs/sphinx_doc/source/tutorial/example_reasoning_basic.md).
+For more details about algorithm design, please refer to [our paper](https://arxiv.org/abs/2509.24203).
 
 The config file is located in [`gsm8k.yaml`](gsm8k.yaml).
 
-# Group-relative REINFORCE Families
-This folder provides **example configurations** for running different group-relative REINFORCE families within Trinity-RFT.
+## Group-relative REINFORCE variants
 
+This folder provides example configurations for running different group-relative REINFORCE variants within Trinity-RFT.
 It includes three major families:
 
-- **REC family** (clipping + importance sampling)
-- **REP family** (regularization-based variants)
-- **RED family** (data-distribution shaping strategies)
+- **REC family** (regularization by clipping)
+- **REP family** (regularization by an additive loss term)
+- **RED family** (actively shaping data distribution)
 
-We also provide baseline implementations such as **Vanilla REINFORCE** and **GRPO**.
+These include baseline algorithms like vanilla REINFORCE and GRPO as special cases.
 
 All algorithms are instantiated through modular YAML configs for easy reproduction and extension.
 
-# Summary Table 📝
+## Summary Table 📝
 
 | Family        | Variants                                        | Key Idea                            |
 | ------------- | ----------------------------------------------- | ----------------------------------- |
-| **Baselines** | REINFORCE, GRPO                                 | Standard references                 |
-| **REC**       | OneSide-NoIS, OneSide-IS, TwoSide-IS, Ring-NoIS | Clipping + importance sampling      |
-| **REP**       | AsymRE, OPMD                                    | Regularization |
-| **RED**       | Drop, Weight                                    | Data-distribution shaping           |
+| **Baselines** | REINFORCE, GRPO                                 | Standard references          |
+| **REC**       | OneSide/TwoSide/Ring-IS/NoIS                    | Clipping as regularization, with or without importance sampling   |
+| **REP**       | AsymRE, OPMD                                    | Regularization by an additive loss term |
+| **RED**       | Drop, Weight                                    | Actively shaping data distribution      |
 
 
 
-# Instantiations
+## Instantiations
 
-## Baselines
+### Baselines
 
-### REINFORCE
-Vanilla REINFORCE with group mean as baseline.
+**Vanilla REINFORCE** with group mean as baseline:
 
 ```
 algorithm:
@@ -52,8 +51,7 @@ algorithm:
     std_normalize: false
 ```
 
-### GRPO
-GRPO implemented with zero KL regularizer. Regularization can be enabled via `kl_loss_fn` and `kl_loss_fn_args`.
+**GRPO** with KL regularization (enabled via `kl_loss_fn` and `kl_loss_fn_args`):
 
 ```
 algorithm:
@@ -71,17 +69,11 @@ algorithm:
   kl_loss_fn: 'k2'
   kl_loss_fn_args:
     kl_coef:  0.0
-
 ```
 
-## REC family
-Variants of clipping and importance-sampling strategies.
-- REC-OneSide-NoIS
-- REC-OneSide-IS
-- REC-TwoSide-IS
-- REC-Ring-NoIS
+### REC family
 
-### REC-OneSide-NoIS
+**REC-OneSide-NoIS:**
 
 ```
 algorithm:
@@ -98,7 +90,7 @@ algorithm:
     std_normalize: false
 ```
 
-### REC-OneSide-IS
+**REC-OneSide-IS:**
 
 ```
 algorithm:
@@ -115,7 +107,7 @@ algorithm:
     std_normalize: false
 ```
 
-### REC-TwoSide-IS
+**REC-TwoSide-IS:**
 
 ```
 algorithm:
@@ -131,7 +123,8 @@ algorithm:
   advantage_fn_args:
     std_normalize: false
 ```
-### REC-Ring-NoIS
+
+**REC-Ring-NoIS:**
 
 ```
 algorithm:
@@ -150,13 +143,10 @@ algorithm:
     std_normalize: false
 ```
 
-## REP family
+### REP family
 
-Regularization-based algorithms.
-- AsymRE (forward KL regularization)
-- Kimi’s OPMD (k2 regularizer)
 
-### AsymRE
+**Meta's AsymRE:**
 
 ```
 algorithm:
@@ -172,7 +162,7 @@ algorithm:
 ```
 
 
-### Kimi's OPMD
+**Kimi's OPMD:**
 
 ```
 algorithm:
@@ -186,12 +176,10 @@ algorithm:
     std_normalize: false
 ```
 
-## RED family
-Data-distribution shaping variants.
-- RED-Drop (drop extra negative examples to balance the positive examples v.s. negative examples)
-- RED-Weight (advantage-weighting strategy)
+### RED family
+
 
-### RED-Drop
+**RED-Drop:**
 
 ```
 algorithm:
@@ -206,7 +194,7 @@ algorithm:
 ```
 
 
-### RED-Weight
+**RED-Weight:**
 
 ```
 algorithm:
@@ -219,3 +207,17 @@ algorithm:
   advantage_fn_args:
     std_normalize: false
 ```
+
+## Citation
+
+```bibtex
+@misc{yao2025grouprelativereinforcesecretlyoffpolicy,
+      title={Group-Relative REINFORCE Is Secretly an Off-Policy Algorithm: Demystifying Some Myths About GRPO and Its Friends},
+      author={Chaorui Yao and Yanxi Chen and Yuchang Sun and Yushuo Chen and Wenhao Zhang and Xuchen Pan and Yaliang Li and Bolin Ding},
+      year={2025},
+      eprint={2509.24203},
+      archivePrefix={arXiv},
+      primaryClass={cs.LG},
+      url={https://arxiv.org/abs/2509.24203},
+}
+```
diff --git a/examples/rec_gsm8k/gsm8k.yaml b/examples/rec_gsm8k/gsm8k.yaml
index 3be850e401..570136929b 100644
--- a/examples/rec_gsm8k/gsm8k.yaml
+++ b/examples/rec_gsm8k/gsm8k.yaml
@@ -1,4 +1,3 @@
-# Configuration file for the REC GSM8k project.
 project: "Trinity-RFT-GSM8K"
 name: rec_gsm8k
 checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
@@ -15,7 +14,7 @@ algorithm:
   policy_loss_fn_args:
     epsilon_low: 0.2
     epsilon_high: 0.2
-    clip_mode: "none"
+    clip_mode: "one-side"
     weight: "none"
     temp: 1.0
     regularizer: "none"
diff --git a/examples/rec_math/README.md b/examples/rec_math/README.md
deleted file mode 100644
index 8cc79050b8..0000000000
--- a/examples/rec_math/README.md
+++ /dev/null
@@ -1,221 +0,0 @@
-# Example: REC on MATH dataset
-
-This example shows the usage of REC on the [MATH dataset](https://huggingface.co/datasets/nlile/hendrycks-MATH-benchmark).
-
-For more detailed information, please refer to the [documentation](../../docs/sphinx_doc/source/tutorial/example_reasoning_basic.md).
-
-The config file is located in [`math.yaml`](math.yaml).
-
-# Group-relative REINFORCE Families
-This folder provides **example configurations** for running different group-relative REINFORCE families within Trinity-RFT.
-
-It includes three major families:
-
-- **REC family** (clipping + importance sampling)
-- **REP family** (regularization-based variants)
-- **RED family** (data-distribution shaping strategies)
-
-We also provide baseline implementations such as **Vanilla REINFORCE** and **GRPO**.
-
-All algorithms are instantiated through modular YAML configs for easy reproduction and extension.
-
-# Summary Table 📝
-
-| Family        | Variants                                        | Key Idea                            |
-| ------------- | ----------------------------------------------- | ----------------------------------- |
-| **Baselines** | REINFORCE, GRPO                                 | Standard references                 |
-| **REC**       | OneSide-NoIS, OneSide-IS, TwoSide-IS, Ring-NoIS | Clipping + importance sampling      |
-| **REP**       | AsymRE, OPMD                                    | Regularization |
-| **RED**       | Drop, Weight                                    | Data-distribution shaping           |
-
-
-
-# Instantiations
-
-## Baselines
-
-### REINFORCE
-Vanilla REINFORCE with group mean as baseline.
-
-```
-algorithm:
-  algorithm_type: rec
-  policy_loss_fn_args:
-    epsilon_low: 0.2
-    epsilon_high: 0.2
-    clip_mode: "none" # no clipping
-    weight: "none" # uniform weighting for samples
-    temp: 1.0
-    regularizer: "none" # no regularizer
-    regularizer_coef: 0.0
-  advantage_fn_args:
-    std_normalize: false
-```
-
-### GRPO
-GRPO implemented with zero KL regularizer. Regularization can be enabled via `kl_loss_fn` and `kl_loss_fn_args`.
-
-```
-algorithm:
-  algorithm_type: rec
-  policy_loss_fn_args:
-    epsilon_low: 0.2
-    epsilon_high: 0.2
-    clip_mode: "one-side"
-    weight: "importance_sampling"
-    temp: 1.0
-    regularizer: "none"
-    regularizer_coef: 0.0
-  advantage_fn_args:
-    std_normalize: true
-  kl_loss_fn: 'k2'
-  kl_loss_fn_args:
-    kl_coef:  0.0
-
-```
-
-## REC family
-Variants of clipping and importance-sampling strategies.
-- REC-OneSide-NoIS
-- REC-OneSide-IS
-- REC-TwoSide-IS
-- REC-Ring-NoIS
-
-### REC-OneSide-NoIS
-
-```
-algorithm:
-  algorithm_type: rec
-  policy_loss_fn_args:
-    epsilon_low: 0.2
-    epsilon_high: 0.2
-    clip_mode: "one-side"
-    weight: "none"
-    temp: 1.0
-    regularizer: "none"
-    regularizer_coef: 0.0
-  advantage_fn_args:
-    std_normalize: false
-```
-
-### REC-OneSide-IS
-
-```
-algorithm:
-  algorithm_type: rec
-  policy_loss_fn_args:
-    epsilon_low: 0.2
-    epsilon_high: 0.2
-    clip_mode: "one-side"
-    weight: "importance_sampling"
-    temp: 1.0
-    regularizer: "none"
-    regularizer_coef: 0.0
-  advantage_fn_args:
-    std_normalize: false
-```
-
-### REC-TwoSide-IS
-
-```
-algorithm:
-  algorithm_type: rec
-  policy_loss_fn_args:
-    epsilon_low: 0.2
-    epsilon_high: 0.2
-    clip_mode: "two-side"
-    weight: "importance_sampling"
-    temp: 1.0
-    regularizer: "none"
-    regularizer_coef: 0.0
-  advantage_fn_args:
-    std_normalize: false
-```
-### REC-Ring-NoIS
-
-```
-algorithm:
-  algorithm_type: rec
-  policy_loss_fn_args:
-    epsilon_low: 0.2
-    epsilon_high: 0.2
-    epsilon_low_prime: 0.6
-    epsilon_high_prime: 2.0
-    clip_mode: "ring"
-    weight: "none"
-    temp: 1.0
-    regularizer: "none"
-    regularizer_coef: 0.0
-  advantage_fn_args:
-    std_normalize: false
-```
-
-## REP family
-
-Regularization-based algorithms.
-- AsymRE (forward KL regularization)
-- Kimi’s OPMD (k2 regularizer)
-
-### AsymRE
-
-```
-algorithm:
-  algorithm_type: rec
-  policy_loss_fn_args:
-    clip_mode: "none"
-    weight: "none"
-    temp: 1.0
-    regularizer: "forward-kl"
-    regularizer_coef: 0.1
-  advantage_fn_args:
-    std_normalize: false
-```
-
-
-### Kimi's OPMD
-
-```
-algorithm:
-  algorithm_type: rec
-  policy_loss_fn_args:
-    clip_mode: "none"
-    weight: "none"
-    regularizer: "k2"
-    regularizer_coef: 0.1
-  advantage_fn_args:
-    std_normalize: false
-```
-
-## RED family
-Data-distribution shaping variants.
-- RED-Drop (drop extra negative examples to balance the positive examples v.s. negative examples)
-- RED-Weight (advantage-weighting strategy)
-
-### RED-Drop
-
-```
-algorithm:
-  algorithm_type: rec
-  policy_loss_fn_args:
-    clip_mode: "none"
-    weight: "none"
-    regularizer: "none"
-  advantage_fn_args:
-    std_normalize: false
-    drop: "balance"
-```
-
-
-### RED-Weight
-
-```
-algorithm:
-  algorithm_type: rec
-  policy_loss_fn_args:
-    clip_mode: "none"
-    weight: "advantage"
-    regularizer: "none"
-    temp: 1.0
-  advantage_fn_args:
-    std_normalize: false
-```
diff --git a/examples/rec_math/math.yaml b/examples/rec_math/math.yaml
deleted file mode 100644
index 226fbe7d75..0000000000
--- a/examples/rec_math/math.yaml
+++ /dev/null
@@ -1,82 +0,0 @@
-project: Trinity-RFT-rec_math
-name: rec_math
-checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
-mode: both
-model:
-  model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-3B-Instruct}
-  max_response_tokens: 2048
-  max_model_len: 2048
-algorithm:
-  algorithm_type: rec
-  repeat_times: 8
-  optimizer:
-    lr: 6e-8
-  policy_loss_fn_args:
-    epsilon_low: 0.2
-    epsilon_high: 0.2
-    epsilon_high_prime: 0.4
-    epsilon_low_prime: 0.4
-    clip_mode: none
-    weight: none
-  advantage_fn_args:
-    std_normalize: false
-cluster:
-  node_num: 1
-  gpu_per_node: 8
-buffer:
-  total_steps: 200
-  batch_size: 16
-  explorer_input:
-    taskset:
-      name: math
-      storage_type: file
-      path: ${oc.env:TRINITY_TASKSET_PATH}
-      format:
-        prompt_key: problem
-        response_key: solution
-      rollout_args:
-        temperature: 1.0
-        top_p: 1.0
-        logprobs: 0
-    eval_tasksets:
-      - name: math
-        storage_type: file
-        path: ${oc.env:TRINITY_EVAL_TASKSET_PATH}
-        split: test
-        format:
-          prompt_key: problem
-          response_key: solution
-        rollout_args:
-          temperature: 0.1
-          top_p: 0.95
-    default_workflow_type: math_boxed_workflow
-    default_reward_fn_type: math_boxed_reward
-  trainer_input:
-    experience_buffer:
-      name: math_buffer
-      storage_type: queue
-explorer:
-  eval_interval: 500
-  runner_per_model: 16
-  rollout_model:
-    engine_type: vllm_async
-    engine_num: 4
-    tensor_parallel_size: 1
-    enable_prefix_caching: false
-    enforce_eager: true
-    dtype: bfloat16
-    max_prompt_tokens: 1024
-    max_response_tokens: 2048
-    seed: 42
-synchronizer:
-  sync_method: nccl
-  sync_interval: 1
-  sync_timeout: 3600
-  sync_offset: 0
-trainer:
-  trainer_type: verl
-  save_interval: 100
-  grad_clip: 1.0
-  use_dynamic_bsz: true
-  max_token_len_per_gpu: 16384
-  ulysses_sequence_parallel_size: 1