From 21fe299431e8323fd27f06c44f0fae5b7342a006 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=8A=B9=EB=8D=95/Infrastructure=EA=B7=B8?= =?UTF-8?q?=EB=A3=B9=28YA=29?= Date: Mon, 9 Oct 2023 16:33:49 +0900 Subject: [PATCH] Use max_length instead of sequence_len since they are the same values --- src/axolotl/prompt_strategies/completion.py | 4 ++-- src/axolotl/prompt_strategies/metharme.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/axolotl/prompt_strategies/completion.py b/src/axolotl/prompt_strategies/completion.py index 3285e667c..bb30c2710 100644 --- a/src/axolotl/prompt_strategies/completion.py +++ b/src/axolotl/prompt_strategies/completion.py @@ -53,8 +53,8 @@ def tokenize_prompt(self, prompt): tokenized_full_prompt = self._tokenize(full_prompt) for key, val in tokenized_full_prompt.items(): - for i in range(0, len(val), self.sequence_len): - res[key].append(val[i : i + self.sequence_len]) + for i in range(0, len(val), self.max_length): + res[key].append(val[i : i + self.max_length]) return dict(res) diff --git a/src/axolotl/prompt_strategies/metharme.py b/src/axolotl/prompt_strategies/metharme.py index 52d77c00c..62c5349bd 100644 --- a/src/axolotl/prompt_strategies/metharme.py +++ b/src/axolotl/prompt_strategies/metharme.py @@ -31,7 +31,7 @@ def _tokenize( result = self.tokenizer( prompt, truncation=True, - max_length=self.sequence_len, + max_length=self.max_length, padding=False, return_tensors=None, ) @@ -43,7 +43,7 @@ def _tokenize( if num_eos_tokens > 0 and add_eos_token and len(result["input_ids"]) > 0: for _ in range(num_eos_tokens): - if len(result["input_ids"]) < self.sequence_len: + if len(result["input_ids"]) < self.max_length: result["input_ids"].append(self.tokenizer.eos_token_id) result["attention_mask"].append(1)