run-llama · logan-markewich · Nov 22, 2024 · Nov 21, 2024
diff --git a/docs/docs/examples/llm/sambanovasystems.ipynb b/docs/docs/examples/llm/sambanovasystems.ipynb
@@ -90,6 +90,7 @@
     "\n",
     "llm = SambaNovaCloud(\n",
     "    model=\"Meta-Llama-3.1-70B-Instruct\",\n",
+    "    context_window=100000,\n",
     "    max_tokens=1024,\n",
     "    temperature=0.7,\n",
     "    top_k=1,\n",
@@ -397,6 +398,7 @@
     "\n",
     "llm = SambaStudio(\n",
     "    model=\"Meta-Llama-3-70B-Instruct-4096\",\n",
+    "    context_window=100000,\n",
     "    max_tokens=1024,\n",
     "    temperature=0.7,\n",
     "    top_k=1,\n",

diff --git a/...a-index-integrations/llms/llama-index-llms-modelscope/llama_index/llms/modelscope/base.py b/...a-index-integrations/llms/llama-index-llms-modelscope/llama_index/llms/modelscope/base.py
@@ -155,10 +155,10 @@ def class_name(cls) -> str:
     def metadata(self) -> LLMMetadata:
         """LLM metadata."""
         return LLMMetadata(
-            context_window=None,
-            num_output=None,
+            context_window=self.context_window,
+            num_output=self.max_new_tokens,
             model_name=self.model_name,
-            is_chat_model=self.is_chat_model,
+            is_chat_model=False,
         )
 
     @llm_completion_callback()

diff --git a/llama-index-integrations/llms/llama-index-llms-modelscope/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-modelscope/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-modelscope"
 readme = "README.md"
-version = "0.4.0"
+version = "0.4.1"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<3.12"

diff --git a/llama-index-integrations/llms/llama-index-llms-sambanovasystems/README.md b/llama-index-integrations/llms/llama-index-llms-sambanovasystems/README.md
@@ -25,6 +25,7 @@ SambaNovaCloud(
     sambanova_url="SambaNova cloud endpoint URL",
     sambanova_api_key="set with your SambaNova cloud API key",
     model="model name",
+    context_window=100000,
 )
 ```
 
@@ -37,5 +38,6 @@ SambaStudio(
     sambastudio_url="SambaStudio endpoint URL",
     sambastudio_api_key="set with your SambaStudio endppoint API key",
     model="model name",
+    context_window=100000,
 )
 ```
diff --git a/...grations/llms/llama-index-llms-sambanovasystems/llama_index/llms/sambanovasystems/base.py b/...grations/llms/llama-index-llms-sambanovasystems/llama_index/llms/sambanovasystems/base.py
@@ -121,6 +121,7 @@ class SambaNovaCloud(LLM):
                 top_p = model top p,
                 top_k = model top k,
                 stream_options = include usage to get generation metrics
+                context_window = model context window
             )
     Complete:
         .. code-block:: python
@@ -185,6 +186,8 @@ class SambaNovaCloud(LLM):
         description="Whether to use streaming handler when using non streaming methods",
     )
 
+    context_window: int = Field(default=4096, description="context window")
+
     max_tokens: int = Field(default=1024, description="max tokens to generate")
 
     temperature: float = Field(default=0.7, description="model temperature")
@@ -205,7 +208,7 @@ def class_name(cls) -> str:
     @property
     def metadata(self) -> LLMMetadata:
         return LLMMetadata(
-            context_window=None,
+            context_window=self.context_window,
             num_output=self.max_tokens,
             is_chat_model=True,
             model_name=self.model,
@@ -659,6 +662,7 @@ class SambaStudio(LLM):
                 model = model or expert name (set for CoE endpoints),
                 max_tokens = max number of tokens to generate,
                 temperature = model temperature,
+                context_window = model context window,
                 top_p = model top p,
                 top_k = model top k,
                 do_sample = whether to do sample
@@ -679,6 +683,8 @@ class SambaStudio(LLM):
             Whether to use streaming
         max_tokens: inthandler when using non streaming methods
             max tokens to generate
+        context_window: int
+            model context window
         temperature: float
             model temperature
         top_p: float
@@ -715,6 +721,7 @@ class SambaStudio(LLM):
                 model = model or expert name (set for CoE endpoints),
                 max_tokens = max number of tokens to generate,
                 temperature = model temperature,
+                context_window = model context window,
                 top_p = model top p,
                 top_k = model top k,
                 do_sample = whether to do sample
@@ -797,6 +804,8 @@ class SambaStudio(LLM):
         description="Whether to use streaming handler when using non streaming methods",
     )
 
+    context_window: int = Field(default=4096, description="context window")
+
     max_tokens: int = Field(default=1024, description="max tokens to generate")
 
     temperature: Optional[float] = Field(default=0.7, description="model temperature")
@@ -840,7 +849,7 @@ def class_name(cls) -> str:
     @property
     def metadata(self) -> LLMMetadata:
         return LLMMetadata(
-            context_window=None,
+            context_window=self.context_window,
             num_output=self.max_tokens,
             is_chat_model=True,
             model_name=self.model,

diff --git a/llama-index-integrations/llms/llama-index-llms-sambanovasystems/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-sambanovasystems/pyproject.toml
@@ -26,7 +26,7 @@ authors = ["Rodrigo Maldonado <rodrigo.maldonado@pucp.edu.pe>"]
 description = "llama-index llms sambanova cloud and sambastudio integration"
 name = "llama-index-llms-sambanovasystems"
 readme = "README.md"
-version = "0.4.0"
+version = "0.4.1"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"