Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
Rohan138 committed Nov 18, 2024
1 parent 5e75fda commit f49dd07
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 16 deletions.
14 changes: 4 additions & 10 deletions src/transformers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1774,12 +1774,7 @@ def _wrap_model(self, model, training=True, dataloader=None):

# train/eval could be run multiple-times - if already wrapped, don't re-wrap it again
if self.accelerator.unwrap_model(model) is not model:
if self.args.ort:
from torch_ort import ORTModule
if type(model) is not ORTModule:
return model
else:
return model
return model

# Mixed precision training with apex (torch < 1.6)
if self.use_apex and training:
Expand Down Expand Up @@ -2344,7 +2339,7 @@ def _inner_training_loop(
self._load_rng_state(resume_from_checkpoint)
rng_to_sync = False

if (self.state.global_step == 10):
if (self.state.global_step == args.stable_train_warmup_steps):
start_train_stable_time = time.time()

# Skip past any already trained steps if resuming training
Expand Down Expand Up @@ -2499,9 +2494,8 @@ def _inner_training_loop(

metrics = speed_metrics("train", start_time, num_samples=num_train_samples, num_steps=self.state.max_steps,num_tokens=num_train_tokens,)

total_samples = self.state.global_step*total_train_batch_size if args.max_steps > 0 else num_examples*num_train_epochs
perf_samples = total_samples - self.args.warmup_steps*total_train_batch_size
stable_train_metrics = speed_metrics("stable_train", start_train_stable_time, perf_samples)
stable_train_samples = num_train_samples - args.stable_train_warmup_steps*total_train_batch_size
stable_train_metrics = speed_metrics("stable_train", start_train_stable_time, stable_train_samples)

self.store_flos()
metrics["total_flos"] = self.state.total_flos
Expand Down
9 changes: 3 additions & 6 deletions src/transformers/training_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,8 @@ class TrainingArguments:
Ratio of total training steps used for a linear warmup from 0 to `learning_rate`.
warmup_steps (`int`, *optional*, defaults to 0):
Number of steps used for a linear warmup from 0 to `learning_rate`. Overrides any effect of `warmup_ratio`.
stable_train_warmup_steps (`int`, *optional*, defaults to 10):
Number of steps to skip before collecting performance numbers for stable_train_samples_per_second.
log_level (`str`, *optional*, defaults to `passive`):
Logger log level to use on the main process. Possible choices are the log levels as strings: 'debug',
'info', 'warning', 'error' and 'critical', plus a 'passive' level which doesn't set anything and keeps the
Expand Down Expand Up @@ -599,8 +601,6 @@ class TrainingArguments:
If `True`, an `Accelerator` or `PartialState` must be initialized. Note that by doing so, this could lead to issues
with hyperparameter tuning.
ortmodule (:obj:`bool`, `optional`):
Use `ORTModule <https://github.com/microsoft/onnxruntime>`__.
label_smoothing_factor (`float`, *optional*, defaults to 0.0):
The label smoothing factor to use. Zero means no label smoothing, otherwise the underlying onehot-encoded
labels are changed from 0s and 1s to `label_smoothing_factor/num_labels` and `1 - label_smoothing_factor +
Expand Down Expand Up @@ -912,6 +912,7 @@ class TrainingArguments:
default=0.0, metadata={"help": "Linear warmup over warmup_ratio fraction of total steps."}
)
warmup_steps: int = field(default=10, metadata={"help": "Linear warmup over warmup_steps."})
stable_train_warmup_steps: int = field(default=10, metadata={"help": "warmup steps to skip before collecting training performance."})

log_level: Optional[str] = field(
default="passive",
Expand Down Expand Up @@ -1258,10 +1259,6 @@ class TrainingArguments:
)
},
)
ort: Optional[bool] = field(
default=False,
metadata={"help": "Enable Ort"},
)
label_smoothing_factor: float = field(
default=0.0, metadata={"help": "The label smoothing epsilon to apply (zero means no label smoothing)."}
)
Expand Down

0 comments on commit f49dd07

Please sign in to comment.