docs: add note about jamba 1.5s (#100)

* docs: add note about jamba 1.5s * ci: add hf token from secrets for tests
AI21Labs · Sep 17, 2024 · d5eeb96 · d5eeb96
1 parent 1e09db1
commit d5eeb96
Show file tree

Hide file tree

Showing 6 changed files with 35 additions and 0 deletions.
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -5,6 +5,7 @@ on: [push, pull_request]
 env:
   POETRY_VERSION: "1.4.2"
   POETRY_URL: https://install.python-poetry.org
+  HF_TOKEN: ${{ secrets.HF_TOKEN }}
 
 jobs:
   lint:

diff --git a/README.md b/README.md
@@ -17,6 +17,12 @@
 
 ---
 
+## Prerequisites
+
+- If you wish to use the tokenizers for `Jamba 1.5 Mini` or `Jamba 1.5 Large`, you will need to request access to the relevant model's HuggingFace repo:
+  - [Jamba 1.5 Mini](https://huggingface.co/ai21labs/AI21-Jamba-1.5-Mini)
+  - [Jamba 1.5 Large](https://huggingface.co/ai21labs/AI21-Jamba-1.5-Large)
+
 ## Installation
 
 ### pip

diff --git a/examples/async_jamba_1_5_tokenizer.py b/examples/async_jamba_1_5_tokenizer.py
@@ -2,6 +2,13 @@
 
 from ai21_tokenizer import Tokenizer, PreTrainedTokenizers
 
+"""
+If you wish to use the tokenizers for `Jamba 1.5 Mini` or `Jamba 1.5 Large`,
+you will need to request access to the relevant model's HuggingFace repo:
+* https://huggingface.co/ai21labs/AI21-Jamba-1.5-Mini
+* https://huggingface.co/ai21labs/AI21-Jamba-1.5-Large
+"""
+
 
 async def main():
     tokenizer = await Tokenizer.get_async_tokenizer(PreTrainedTokenizers.JAMBA_1_5_MINI_TOKENIZER)

diff --git a/examples/jamba_1_5_tokenizer.py b/examples/jamba_1_5_tokenizer.py
@@ -1,5 +1,12 @@
 from ai21_tokenizer import Jamba1_5Tokenizer
 
+"""
+If you wish to use the tokenizers for `Jamba 1.5 Mini` or `Jamba 1.5 Large`,
+you will need to request access to the relevant model's HuggingFace repo:
+* https://huggingface.co/ai21labs/AI21-Jamba-1.5-Mini
+* https://huggingface.co/ai21labs/AI21-Jamba-1.5-Large
+"""
+
 model_path = "ai21labs/AI21-Jamba-1.5-Mini"
 
 tokenizer = Jamba1_5Tokenizer(model_path=model_path)

diff --git a/examples/use_async_tokenizer.py b/examples/use_async_tokenizer.py
@@ -2,6 +2,13 @@
 
 from ai21_tokenizer import Tokenizer
 
+"""
+If you wish to use the tokenizers for `Jamba 1.5 Mini` or `Jamba 1.5 Large`,
+you will need to request access to the relevant model's HuggingFace repo:
+* https://huggingface.co/ai21labs/AI21-Jamba-1.5-Mini
+* https://huggingface.co/ai21labs/AI21-Jamba-1.5-Large
+"""
+
 
 async def main():
     tokenizer = await Tokenizer.get_async_tokenizer()

diff --git a/examples/use_tokenizer.py b/examples/use_tokenizer.py
@@ -1,5 +1,12 @@
 from ai21_tokenizer import Tokenizer
 
+"""
+If you wish to use the tokenizers for `Jamba 1.5 Mini` or `Jamba 1.5 Large`,
+you will need to request access to the relevant model's HuggingFace repo:
+* https://huggingface.co/ai21labs/AI21-Jamba-1.5-Mini
+* https://huggingface.co/ai21labs/AI21-Jamba-1.5-Large
+"""
+
 tokenizer = Tokenizer.get_tokenizer()
 example_sentence = "This sentence should be encoded and then decoded. Hurray!!"
 encoded = tokenizer.encode(example_sentence)
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,6 +5,7 @@ on: [push, pull_request] @@
     env:
       POETRY_VERSION: "1.4.2"
       POETRY_URL: https://install.python-poetry.org
+      HF_TOKEN: ${{ secrets.HF_TOKEN }}
     jobs:
       lint:
@@ Expand Down @@