From 47e08f33255abfb3d85408fb6a43bb61fc757dd9 Mon Sep 17 00:00:00 2001 From: hjh0119 Date: Wed, 7 Aug 2024 18:25:10 +0800 Subject: [PATCH] fix --- swift/llm/sft.py | 2 +- swift/llm/utils/argument.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/swift/llm/sft.py b/swift/llm/sft.py index d686552824..582f0702a6 100644 --- a/swift/llm/sft.py +++ b/swift/llm/sft.py @@ -339,7 +339,7 @@ def llm_sft(args: SftArguments) -> Dict[str, Any]: dataset_info['val_dataset'] = stat_dataset(val_dataset) if not streaming else None else: dataset_info = None - td0, tkwargs0 = template.encode(train_dataset[0]) if streaming else next(iter(train_dataset)), {} + td0, tkwargs0 = template.encode(train_dataset[0]) print_example(td0, tokenizer, tkwargs0) train_dataset = LazyLLMDataset(train_dataset, template) if val_dataset is not None: diff --git a/swift/llm/utils/argument.py b/swift/llm/utils/argument.py index f0b7ba9e38..82758ab6a0 100644 --- a/swift/llm/utils/argument.py +++ b/swift/llm/utils/argument.py @@ -1032,7 +1032,7 @@ def __post_init__(self) -> None: self.gradient_accumulation_steps = math.ceil(16 / self.batch_size / self.world_size) template_info = TEMPLATE_MAPPING[self.template_type] self._handle_streaming_args() - if self.lazy_tokenize is None: + if self.lazy_tokenize is None and not self.streaming: self.lazy_tokenize = template_info.get('lazy_tokenize', False) logger.info(f'Setting args.lazy_tokenize: {self.lazy_tokenize}') if self.dataloader_num_workers is None: