Skip to content

Commit

Permalink
Use max_length instead of sequence_len since they are the same values
Browse files Browse the repository at this point in the history
  • Loading branch information
seungduk-yanolja authored and winglian committed Oct 19, 2023
1 parent 95e141d commit 21fe299
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
4 changes: 2 additions & 2 deletions src/axolotl/prompt_strategies/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ def tokenize_prompt(self, prompt):
tokenized_full_prompt = self._tokenize(full_prompt)

for key, val in tokenized_full_prompt.items():
for i in range(0, len(val), self.sequence_len):
res[key].append(val[i : i + self.sequence_len])
for i in range(0, len(val), self.max_length):
res[key].append(val[i : i + self.max_length])

return dict(res)

Expand Down
4 changes: 2 additions & 2 deletions src/axolotl/prompt_strategies/metharme.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def _tokenize(
result = self.tokenizer(
prompt,
truncation=True,
max_length=self.sequence_len,
max_length=self.max_length,
padding=False,
return_tensors=None,
)
Expand All @@ -43,7 +43,7 @@ def _tokenize(

if num_eos_tokens > 0 and add_eos_token and len(result["input_ids"]) > 0:
for _ in range(num_eos_tokens):
if len(result["input_ids"]) < self.sequence_len:
if len(result["input_ids"]) < self.max_length:
result["input_ids"].append(self.tokenizer.eos_token_id)
result["attention_mask"].append(1)

Expand Down

0 comments on commit 21fe299

Please sign in to comment.