Skip to content

Commit

Permalink
Fix typo in extra_columns variable name (#1269)
Browse files Browse the repository at this point in the history
Co-authored-by: Otto Laitila <otto.laitila@op.fi>
  • Loading branch information
otlaitil and Otto Laitila authored Jan 23, 2024
1 parent a3c5b71 commit 5760e5d
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions trl/trainer/sft_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,12 +428,12 @@ def tokenize(element):

signature_columns = ["input_ids", "labels", "attention_mask"]

extra_colmuns = list(set(dataset.column_names) - set(signature_columns))
extra_columns = list(set(dataset.column_names) - set(signature_columns))

if not remove_unused_columns and len(extra_colmuns) > 0:
if not remove_unused_columns and len(extra_columns) > 0:
warnings.warn(
"You passed `remove_unused_columns=False` on a non-packed dataset. This might create some issues with the default collator and yield to errors. If you want to "
f"inspect dataset other columns (in this case {extra_colmuns}), you can subclass `DataCollatorForLanguageModeling` in case you used the default collator and create your own data collator in order to inspect the unused dataset columns."
f"inspect dataset other columns (in this case {extra_columns}), you can subclass `DataCollatorForLanguageModeling` in case you used the default collator and create your own data collator in order to inspect the unused dataset columns."
)

tokenized_dataset = dataset.map(
Expand Down

0 comments on commit 5760e5d

Please sign in to comment.