Merge pull request #53 from fixie-ai/juberti/more-llama3

Add more Llama 3 hosts and fix deploy action
fixie-ai · Apr 26, 2024 · a82fb75 · a82fb75
2 parents 5f51e91 + 28beb8f
commit a82fb75
Show file tree

Hide file tree

Showing 2 changed files with 23 additions and 2 deletions.
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
@@ -12,6 +12,8 @@ jobs:
     steps:
       - uses: actions/checkout@v3
       - uses: superfly/flyctl-actions/setup-flyctl@master
-      - run: flyctl deploy --remote-only
+      - run: |
+          echo '${{secrets.GCP_SERVICE_ACCOUNT}}' > service_account.json
+          flyctl deploy --remote-only
         env:
-          FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
+          FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
diff --git a/llm_benchmark_suite.py b/llm_benchmark_suite.py
@@ -94,6 +94,8 @@ def run(self):
 
 
 class _AnyscaleLlm(_Llm):
+    """See https://docs.endpoints.anyscale.com/text-generation/query-a-model"""
+
     def __init__(self, model: str, display_model: Optional[str] = None):
         super().__init__(
             model,
@@ -104,6 +106,8 @@ def __init__(self, model: str, display_model: Optional[str] = None):
 
 
 class _CloudflareLlm(_Llm):
+    """See https://developers.cloudflare.com/workers-ai/models/"""
+
     def __init__(self, model: str, display_model: Optional[str] = None):
         super().__init__(
             model,
@@ -112,6 +116,8 @@ def __init__(self, model: str, display_model: Optional[str] = None):
 
 
 class _DatabricksLlm(_Llm):
+    """See https://docs.databricks.com/en/machine-learning/foundation-models/supported-models.html"""
+
     def __init__(self, model: str, display_model: Optional[str] = None):
         super().__init__(
             model,
@@ -122,6 +128,8 @@ def __init__(self, model: str, display_model: Optional[str] = None):
 
 
 class _FireworksLlm(_Llm):
+    """See https://fireworks.ai/models"""
+
     def __init__(self, model: str, display_model: Optional[str] = None):
         super().__init__(
             model,
@@ -132,6 +140,8 @@ def __init__(self, model: str, display_model: Optional[str] = None):
 
 
 class _GroqLlm(_Llm):
+    """See https://console.groq.com/docs/models"""
+
     def __init__(self, model: str, display_model: Optional[str] = None):
         super().__init__(
             model,
@@ -142,6 +152,8 @@ def __init__(self, model: str, display_model: Optional[str] = None):
 
 
 class _OctoLlm(_Llm):
+    """See https://octo.ai/docs/getting-started/inference-models#serverless-endpoints"""
+
     def __init__(self, model: str, display_model: Optional[str] = None):
         super().__init__(
             model,
@@ -152,6 +164,8 @@ def __init__(self, model: str, display_model: Optional[str] = None):
 
 
 class _PerplexityLlm(_Llm):
+    """See https://docs.perplexity.ai/docs/model-cards"""
+
     def __init__(self, model: str, display_model: Optional[str] = None):
         super().__init__(
             model,
@@ -162,6 +176,8 @@ def __init__(self, model: str, display_model: Optional[str] = None):
 
 
 class _TogetherLlm(_Llm):
+    """See https://docs.together.ai/docs/inference-models"""
+
     def __init__(self, model: str, display_model: Optional[str] = None):
         super().__init__(
             model,
@@ -243,10 +259,12 @@ def _text_models():
         _PerplexityLlm("sonar-medium-chat"),
         # Llama 3 70b
         _AnyscaleLlm("meta-llama/Llama-3-70b-chat-hf", LLAMA_3_70B_CHAT),
+        _DatabricksLlm("databricks-meta-llama-3-70b-instruct", LLAMA_3_70B_CHAT),
         _FireworksLlm(
             "accounts/fireworks/models/llama-v3-70b-instruct", LLAMA_3_70B_CHAT
         ),
         _GroqLlm("llama3-70b-8192", LLAMA_3_70B_CHAT),
+        _OctoLlm("meta-llama-3-70b-instruct", LLAMA_3_70B_CHAT),
         _PerplexityLlm("llama-3-70b-instruct", LLAMA_3_70B_CHAT),
         _TogetherLlm("meta-llama/Llama-3-70b-chat-hf", LLAMA_3_70B_CHAT),
         # Llama 2 70b
@@ -278,6 +296,7 @@ def _text_models():
             "accounts/fireworks/models/llama-v3-8b-instruct", LLAMA_3_8B_CHAT
         ),
         _GroqLlm("llama3-8b-8192", LLAMA_3_8B_CHAT),
+        _OctoLlm("meta-llama-3-8b-instruct", LLAMA_3_8B_CHAT),
         _PerplexityLlm("llama-3-8b-instruct", LLAMA_3_8B_CHAT),
         _TogetherLlm("meta-llama/Llama-3-8b-chat-hf", LLAMA_3_8B_CHAT),
         # Llama 2 7b