Skip to content

Commit e24b173

Browse files
felipemello1Felipe Mello
andauthored
Metric Logging updates 5/N - enable streaming (#363)
Co-authored-by: Felipe Mello <felipemello@fb.com>
1 parent 8b753f8 commit e24b173

File tree

16 files changed

+474
-302
lines changed

16 files changed

+474
-302
lines changed

.meta/mast/qwen3_14b_mast.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ metric_logging:
1919
wandb:
2020
project: "grpo-training"
2121
group: "grpo_exp_${oc.env:USER}"
22-
reduce_across_ranks: True
22+
logging_mode: global_reduce
2323
console:
24-
reduce_across_ranks: True
24+
logging_mode: global_reduce
2525

2626
# Dataset configuration
2727
dataset:

.meta/mast/qwen3_1_7b_mast.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ metric_logging:
1919
wandb:
2020
project: "grpo-training"
2121
group: "grpo_exp_${oc.env:USER}"
22-
reduce_across_ranks: True
22+
logging_mode: global_reduce
2323
console:
24-
reduce_across_ranks: True
24+
logging_mode: global_reduce
2525

2626
# Dataset configuration
2727
dataset:

.meta/mast/qwen3_32b_mast.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ metric_logging:
1919
wandb:
2020
project: "grpo-training"
2121
group: "grpo_exp_${oc.env:USER}"
22-
reduce_across_ranks: True
22+
logging_mode: global_reduce
2323
console:
24-
reduce_across_ranks: True
24+
logging_mode: global_reduce
2525

2626
# Dataset configuration
2727
dataset:

.meta/mast/qwen3_4b_mast.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ metric_logging:
1919
wandb:
2020
project: "grpo-training"
2121
group: "grpo_exp_${oc.env:USER}"
22-
reduce_across_ranks: True
22+
logging_mode: global_reduce
2323
console:
24-
reduce_across_ranks: True
24+
logging_mode: global_reduce
2525

2626
# Dataset configuration
2727
dataset:

.meta/mast/qwen3_8b_mast.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ metric_logging:
1919
wandb:
2020
project: "grpo-training"
2121
group: "grpo_exp_${oc.env:USER}"
22-
reduce_across_ranks: True
22+
logging_mode: global_reduce
2323
console:
24-
reduce_across_ranks: True
24+
logging_mode: global_reduce
2525

2626
# Dataset configuration
2727
dataset:

apps/grpo/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ async def main(cfg: DictConfig):
304304
else:
305305
provisioner = await init_provisioner()
306306

307-
metric_logging_cfg = cfg.get("metric_logging", {"console": {"log_per_rank": False}})
307+
metric_logging_cfg = cfg.get("metric_logging", {})
308308
mlogger = await get_or_create_metric_logger(process_name="Controller")
309309
await mlogger.init_backends.call_one(metric_logging_cfg)
310310

apps/grpo/qwen3_1_7b.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@ rollout_threads: 1 # Recommended to set equal to policy.num_replicas
1616
# Observability configuration
1717
metric_logging:
1818
wandb:
19-
project: "grpo-training"
20-
group: "grpo_exp_${oc.env:USER}"
21-
reduce_across_ranks: True
19+
project: grpo-training
20+
group: grpo_exp_${oc.env:USER}
21+
logging_mode: global_reduce # global_reduce, per_rank_reduce, per_rank_no_reduce
2222
console:
23-
reduce_across_ranks: True
23+
logging_mode: global_reduce
2424

2525
# Dataset configuration
2626
dataset:

apps/grpo/qwen3_32b.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@ rollout_threads: 32 # make this 4x the number of policy replicas seems to work w
1919
# Observability configuration
2020
metric_logging:
2121
wandb:
22-
project: "grpo-training"
23-
group: "grpo_exp_${oc.env:USER}"
24-
reduce_across_ranks: True
22+
project: grpo-training
23+
group: grpo_exp_${oc.env:USER}
24+
logging_mode: global_reduce # global_reduce, per_rank_reduce, per_rank_no_reduce
2525
console:
26-
reduce_across_ranks: True
26+
logging_mode: global_reduce
2727

2828
# Dataset configuration
2929
dataset:

apps/grpo/qwen3_8b.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ off_by_n: 1 # Off by one by default
1212
# Observability configuration
1313
metric_logging:
1414
wandb:
15-
project: "grpo-training"
16-
group: "grpo_exp_${oc.env:USER}"
17-
reduce_across_ranks: True
15+
project: grpo-training
16+
group: grpo_exp_${oc.env:USER}
17+
logging_mode: global_reduce # global_reduce, per_rank_reduce, per_rank_no_reduce
1818
console:
19-
reduce_across_ranks: True
19+
logging_mode: global_reduce
2020

2121
# Dataset configuration
2222
dataset:

src/forge/observability/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212
from .metrics import (
1313
BackendRole,
1414
ConsoleBackend,
15+
get_logger_backend_class,
1516
LoggerBackend,
17+
LoggingMode,
1618
MaxAccumulator,
1719
MeanAccumulator,
1820
Metric,
@@ -43,6 +45,7 @@
4345
"BackendRole",
4446
# Enums
4547
"Reduce",
48+
"LoggingMode",
4649
# Utility functions
4750
"get_proc_name_with_rank",
4851
# Actor classes

0 commit comments

Comments
 (0)