Fix hints and typing

togethercomputer · Nov 18, 2024 · 9a1ce5f · 9a1ce5f
1 parent 306aa41
commit 9a1ce5f
Show file tree

Hide file tree

Showing 3 changed files with 8 additions and 8 deletions.
diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py
@@ -69,7 +69,7 @@ def fine_tuning(ctx: click.Context) -> None:
     "--min-lr-ratio",
     type=float,
     default=0.0,
-    help="Final learning rate ratio of the initial learning rate",
+    help="The ratio of the final learning rate to the peak learning rate",
 )
 @click.option(
     "--warmup-ratio",
@@ -81,7 +81,7 @@ def fine_tuning(ctx: click.Context) -> None:
     "--max-grad-norm",
     type=float,
     default=1.0,
-    help="Max gradient norm. Set to 0 to disable.",
+    help="Max gradient norm to be used for gradient clipping. Set to 0 to disable.",
 )
 @click.option(
     "--weight-decay",

diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py
@@ -139,7 +139,7 @@ def create(
         n_checkpoints: int | None = 1,
         batch_size: int | Literal["max"] = "max",
         learning_rate: float | None = 0.00001,
-        min_lr_ratio: float | None = 0.0,
+        min_lr_ratio: float = 0.0,
         warmup_ratio: float | None = 0.0,
         max_grad_norm: float = 1.0,
         weight_decay: float = 0.0,
@@ -171,7 +171,7 @@ def create(
             min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for
                 the learning rate scheduler. Defaults to 0.0.
             warmup_ratio (float, optional): Warmup ratio for learning rate scheduler.
-            max_grad_norm (float, optional): Max gradient norm. Defaults to 1.0. Set to 0 to disable.
+            max_grad_norm (float, optional): Max gradient norm. Defaults to 1.0, set to 0 to disable.
             weight_decay (float, optional): Weight decay. Defaults to 0.0.
             lora (bool, optional): Whether to use LoRA adapters. Defaults to True.
             lora_r (int, optional): Rank of LoRA adapters. Defaults to 8.
@@ -468,7 +468,7 @@ async def create(
         n_checkpoints: int | None = 1,
         batch_size: int | Literal["max"] = "max",
         learning_rate: float | None = 0.00001,
-        min_lr_ratio: float | None = 0.0,
+        min_lr_ratio: float = 0.0,
         warmup_ratio: float | None = 0.0,
         max_grad_norm: float = 1.0,
         weight_decay: float = 0.0,
@@ -500,7 +500,7 @@ async def create(
             min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for
                 the learning rate scheduler. Defaults to 0.0.
             warmup_ratio (float, optional): Warmup ratio for learning rate scheduler.
-            max_grad_norm (float, optional): Max gradient norm. Defaults to 1.0. Set to 0 to disable.
+            max_grad_norm (float, optional): Max gradient norm. Defaults to 1.0, set to 0 to disable.
             weight_decay (float, optional): Weight decay. Defaults to 0.0.
             lora (bool, optional): Whether to use LoRA adapters. Defaults to True.
             lora_r (int, optional): Rank of LoRA adapters. Defaults to 8.

diff --git a/src/together/types/finetune.py b/src/together/types/finetune.py
@@ -155,9 +155,9 @@ class FinetuneRequest(BaseModel):
     # learning rate warmup ratio
     warmup_ratio: float
     # max gradient norm
-    max_grad_norm: float | None = None
+    max_grad_norm: float
     # weight decay
-    weight_decay: float | None = None
+    weight_decay: float
     # number of checkpoints to save
     n_checkpoints: int | None = None
     # number of evaluation loops to run