From d0c7124f4b3f53462ae8e609ba101cc13343c8c1 Mon Sep 17 00:00:00 2001 From: Samhita Alla Date: Thu, 16 Jan 2025 15:25:22 +0530 Subject: [PATCH 01/10] update ollama plugin to reflect API changes Signed-off-by: Samhita Alla --- .../flytekitplugins/inference/ollama/serve.py | 118 ++++++++++++------ 1 file changed, 83 insertions(+), 35 deletions(-) diff --git a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py index c8f93c585e..5bf2cc22f7 100644 --- a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py +++ b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py @@ -1,6 +1,5 @@ -import base64 from dataclasses import dataclass -from typing import Optional +from typing import Any, Mapping, Optional, Sequence, Union from ..sidecar_template import ModelInferenceTemplate @@ -12,13 +11,29 @@ class Model: :param name: The name of the model. :param mem: The amount of memory allocated for the model, specified as a string. Default is "500Mi". :param cpu: The number of CPU cores allocated for the model. Default is 1. - :param modelfile: The actual model file as a JSON-serializable string. This represents the file content. Default is `None` if not applicable. + :param from: The name of an existing model to create the new model from. + :param files: A list of file names to create the model from. + :param adapters: A list of file names to create the model for LORA adapters. + :param template: The prompt template for the model. + :param license: A string or list of strings containing the license or licenses for the model. + :param system: A string containing the system prompt for the model. + :param parameters: A dictionary of parameters for the model. + :param messages: A list of message objects used to create a conversation. + :param quantize: Quantize a non-quantized (e.g. float16) model. """ name: str mem: str = "500Mi" cpu: int = 1 - modelfile: Optional[str] = None + from_: Optional[str] = None + files: Optional[list[str]] = None + adapters: Optional[list[str]] = None + template: Optional[str] = None + license: Optional[Union[str, list[str]]] = None + system: Optional[str] = None + parameters: Optional[Mapping[str, Any]] = None + messages: Optional[Sequence[Mapping[str, Any]]] = None + quantize: Optional[str] = None class Ollama(ModelInferenceTemplate): @@ -36,7 +51,7 @@ def __init__( ): """Initialize Ollama class for managing a Kubernetes pod template. - :param model: An instance of the Model class containing the model's configuration, including its name, memory, CPU, and file. + :param model: An instance of the Model class containing the model's configuration, including its name, memory, CPU, and the modelfile parameters. :param image: The Docker image to be used for the container. Default is "ollama/ollama". :param port: The port number on which the container should expose its service. Default is 11434. :param cpu: The number of CPU cores requested for the container. Default is 1. @@ -48,7 +63,15 @@ def __init__( self._model_name = model.name self._model_mem = model.mem self._model_cpu = model.cpu - self._model_modelfile = model.modelfile + self._model_from = model.from_ + self._model_files = model.files + self._model_adapters = model.adapters + self._model_template = model.template + self._model_license = model.license + self._model_system = model.system + self._model_parameters = model.parameters + self._model_messages = model.messages + self._model_quantize = model.quantize super().__init__( image=image, @@ -58,7 +81,7 @@ def __init__( mem=mem, download_inputs_mem=download_inputs_mem, download_inputs_cpu=download_inputs_cpu, - download_inputs=(True if self._model_modelfile and "{inputs" in self._model_modelfile else False), + download_inputs=bool(self._model_adapters or self._model_files), ) self.setup_ollama_pod_template() @@ -71,7 +94,19 @@ def setup_ollama_pod_template(self): V1VolumeMount, ) - container_name = "create-model" if self._model_modelfile else "pull-model" + custom_model = any( + [ + self._model_files, + self._model_adapters, + self._model_template, + self._model_license, + self._model_system, + self._model_parameters, + self._model_messages, + self._model_quantize, + ] + ) + container_name = "create-model" if custom_model else "pull-model" base_code = """ import base64 @@ -97,53 +132,64 @@ def setup_ollama_pod_template(self): print('Ollama service did not become ready in time') exit(1) """ - if self._model_modelfile: - encoded_modelfile = base64.b64encode(self._model_modelfile.encode("utf-8")).decode("utf-8") - - if "{inputs" in self._model_modelfile: + if custom_model: + if self._model_files or self._model_adapters: python_code = f""" {base_code} import json +from ollama._client import Client with open('/shared/inputs.json', 'r') as f: inputs = json.load(f) -class AttrDict(dict): - def __init__(self, *args, **kwargs): - super(AttrDict, self).__init__(*args, **kwargs) - self.__dict__ = self - -inputs = {{'inputs': AttrDict(inputs)}} - -encoded_model_file = '{encoded_modelfile}' +files = {{}} +adapters = {{}} +client = Client('{self.base_url}') -modelfile = base64.b64decode(encoded_model_file).decode('utf-8').format(**inputs) -modelfile = modelfile.replace('{{', '{{{{').replace('}}', '}}}}') - -with open('Modelfile', 'w') as f: - f.write(modelfile) +for input_name, input_value in inputs.items(): + if input_name in self._model_files: + files[input_name] = client.create_blob(input_value) + if input_name in self._model_adapters: + adapters[input_name] = client.create_blob(input_value) {ollama_service_ready} # Debugging: Shows the status of model creation. -for chunk in ollama.create(model='{self._model_name}', path='Modelfile', stream=True): +for chunk in ollama.create( + model='{self._model_name}', + from='{self._model_from}', + files=files if files else None, + adapters=adapters if adapters else None, + template='{self._model_template}', + license='{self._model_license}', + system='{self._model_system}', + parameters={self._model_parameters}, + messages={self._model_messages}, + quantize='{self._model_quantize}', + stream=True +): print(chunk) """ else: python_code = f""" {base_code} -encoded_model_file = '{encoded_modelfile}' - -modelfile = base64.b64decode(encoded_model_file).decode('utf-8') - -with open('Modelfile', 'w') as f: - f.write(modelfile) - {ollama_service_ready} # Debugging: Shows the status of model creation. -for chunk in ollama.create(model='{self._model_name}', path='Modelfile', stream=True): +for chunk in ollama.create( + model='{self._model_name}', + from='{self._model_from}', + files=None, + adapters=None, + template='{self._model_template}', + license='{self._model_license}', + system='{self._model_system}', + parameters={self._model_parameters}, + messages={self._model_messages}, + quantize='{self._model_quantize}', + stream=True +): print(chunk) """ else: @@ -164,7 +210,9 @@ def __init__(self, *args, **kwargs): name=container_name, image="python:3.11-slim", command=["/bin/sh", "-c"], - args=[f"pip install requests && pip install ollama==0.3.3 && {command}"], + args=[ + f"pip install requests && pip install git+https://github.com/ollama/ollama-python.git@eefe5c9666e2fa82ab17618155dd0aae47bba8fa && {command}" + ], resources=V1ResourceRequirements( requests={ "cpu": self._model_cpu, From 1510fa98615b1328eb0f416512d49de217c22851 Mon Sep 17 00:00:00 2001 From: Samhita Alla Date: Thu, 16 Jan 2025 15:47:24 +0530 Subject: [PATCH 02/10] add git Signed-off-by: Samhita Alla --- .../flytekit-inference/flytekitplugins/inference/ollama/serve.py | 1 + 1 file changed, 1 insertion(+) diff --git a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py index 5bf2cc22f7..14855f7dae 100644 --- a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py +++ b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py @@ -211,6 +211,7 @@ def setup_ollama_pod_template(self): image="python:3.11-slim", command=["/bin/sh", "-c"], args=[ + "apt-get update && apt-get install -y git && " f"pip install requests && pip install git+https://github.com/ollama/ollama-python.git@eefe5c9666e2fa82ab17618155dd0aae47bba8fa && {command}" ], resources=V1ResourceRequirements( From 209f64518bc148345f4ed97424f6f6961a51d546 Mon Sep 17 00:00:00 2001 From: Samhita Alla Date: Thu, 16 Jan 2025 16:30:01 +0530 Subject: [PATCH 03/10] update fstrings Signed-off-by: Samhita Alla --- .../flytekitplugins/inference/ollama/serve.py | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py index 14855f7dae..68fa2bcf74 100644 --- a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py +++ b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py @@ -156,16 +156,16 @@ def setup_ollama_pod_template(self): # Debugging: Shows the status of model creation. for chunk in ollama.create( - model='{self._model_name}', - from='{self._model_from}', + model={"\'" + self._model_name + "\'" if self._model_name is not None else None}, + from_={"\'" + self._model_from + "\'" if self._model_from is not None else None}, files=files if files else None, adapters=adapters if adapters else None, - template='{self._model_template}', - license='{self._model_license}', - system='{self._model_system}', - parameters={self._model_parameters}, - messages={self._model_messages}, - quantize='{self._model_quantize}', + template={"\'" + self._model_template.replace("\n", "\\n") + "\'" if self._model_template is not None else None}, + license={"\'" + self._model_license + "\'" if self._model_license is not None else None}, + system={"\'" + self._model_system.replace("\n", "\\n") + "\'" if self._model_system is not None else None}, + parameters={self._model_parameters if self._model_parameters is not None else None}, + messages={self._model_messages if self._model_messages is not None else None}, + quantize={"\'" + self._model_quantize + "\'" if self._model_quantize is not None else None}, stream=True ): print(chunk) @@ -178,16 +178,16 @@ def setup_ollama_pod_template(self): # Debugging: Shows the status of model creation. for chunk in ollama.create( - model='{self._model_name}', - from='{self._model_from}', + model={"\'" + self._model_name + "\'" if self._model_name is not None else None}, + from_={"\'" + self._model_from + "\'" if self._model_from is not None else None}, files=None, adapters=None, - template='{self._model_template}', - license='{self._model_license}', - system='{self._model_system}', - parameters={self._model_parameters}, - messages={self._model_messages}, - quantize='{self._model_quantize}', + template={"\'" + self._model_template.replace("\n", "\\n") + "\'" if self._model_template is not None else None}, + license={"\'" + self._model_license + "\'" if self._model_license is not None else None}, + system={"\'" + self._model_system.replace("\n", "\\n") + "\'" if self._model_system is not None else None}, + parameters={self._model_parameters if self._model_parameters is not None else None}, + messages={self._model_messages if self._model_messages is not None else None}, + quantize={"\'" + self._model_quantize + "\'" if self._model_quantize is not None else None}, stream=True ): print(chunk) From a8aca1f3274fc7dc95bda80c9ec57625d226a695 Mon Sep 17 00:00:00 2001 From: Samhita Alla Date: Thu, 16 Jan 2025 16:39:22 +0530 Subject: [PATCH 04/10] quotes Signed-off-by: Samhita Alla --- .../flytekitplugins/inference/ollama/serve.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py index 68fa2bcf74..403caefd73 100644 --- a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py +++ b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py @@ -156,16 +156,16 @@ def setup_ollama_pod_template(self): # Debugging: Shows the status of model creation. for chunk in ollama.create( - model={"\'" + self._model_name + "\'" if self._model_name is not None else None}, - from_={"\'" + self._model_from + "\'" if self._model_from is not None else None}, + model={"'" + self._model_name + "'" if self._model_name is not None else None}, + from_={"'" + self._model_from + "'" if self._model_from is not None else None}, files=files if files else None, adapters=adapters if adapters else None, - template={"\'" + self._model_template.replace("\n", "\\n") + "\'" if self._model_template is not None else None}, - license={"\'" + self._model_license + "\'" if self._model_license is not None else None}, - system={"\'" + self._model_system.replace("\n", "\\n") + "\'" if self._model_system is not None else None}, + template={"'" + self._model_template.replace("\n", "\\n") + "'" if self._model_template is not None else None}, + license={"'" + self._model_license + "'" if self._model_license is not None else None}, + system={"'" + self._model_system.replace("\n", "\\n") + "'" if self._model_system is not None else None}, parameters={self._model_parameters if self._model_parameters is not None else None}, messages={self._model_messages if self._model_messages is not None else None}, - quantize={"\'" + self._model_quantize + "\'" if self._model_quantize is not None else None}, + quantize={"'" + self._model_quantize + "'" if self._model_quantize is not None else None}, stream=True ): print(chunk) @@ -178,16 +178,16 @@ def setup_ollama_pod_template(self): # Debugging: Shows the status of model creation. for chunk in ollama.create( - model={"\'" + self._model_name + "\'" if self._model_name is not None else None}, - from_={"\'" + self._model_from + "\'" if self._model_from is not None else None}, + model={"'" + self._model_name + "'" if self._model_name is not None else None}, + from_={"'" + self._model_from + "'" if self._model_from is not None else None}, files=None, adapters=None, - template={"\'" + self._model_template.replace("\n", "\\n") + "\'" if self._model_template is not None else None}, - license={"\'" + self._model_license + "\'" if self._model_license is not None else None}, - system={"\'" + self._model_system.replace("\n", "\\n") + "\'" if self._model_system is not None else None}, + template={"'" + self._model_template.replace("\n", "\\n") + "'" if self._model_template is not None else None}, + license={"'" + self._model_license + "'" if self._model_license is not None else None}, + system={"'" + self._model_system.replace("\n", "\\n") + "'" if self._model_system is not None else None}, parameters={self._model_parameters if self._model_parameters is not None else None}, messages={self._model_messages if self._model_messages is not None else None}, - quantize={"\'" + self._model_quantize + "\'" if self._model_quantize is not None else None}, + quantize={"'" + self._model_quantize + "'" if self._model_quantize is not None else None}, stream=True ): print(chunk) From bad9ec15a56456f509d9d84ba44a51a5a90b478b Mon Sep 17 00:00:00 2001 From: Samhita Alla Date: Thu, 16 Jan 2025 16:50:52 +0530 Subject: [PATCH 05/10] self Signed-off-by: Samhita Alla --- plugins/flytekit-inference/README.md | 2 ++ .../flytekitplugins/inference/ollama/serve.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/plugins/flytekit-inference/README.md b/plugins/flytekit-inference/README.md index 646200c111..f3113e2526 100644 --- a/plugins/flytekit-inference/README.md +++ b/plugins/flytekit-inference/README.md @@ -70,6 +70,8 @@ def model_serving() -> str: ## Ollama +Use Python 3.12 + The Ollama plugin allows you to serve LLMs locally. You can either pull an existing model or create a new one. diff --git a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py index 403caefd73..cb872f5874 100644 --- a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py +++ b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py @@ -147,9 +147,9 @@ def setup_ollama_pod_template(self): client = Client('{self.base_url}') for input_name, input_value in inputs.items(): - if input_name in self._model_files: + if input_name in {self._model_files}: files[input_name] = client.create_blob(input_value) - if input_name in self._model_adapters: + if input_name in {self._model_adapters}: adapters[input_name] = client.create_blob(input_value) {ollama_service_ready} From 2a5b51c3cd58b6b20a186ae98c8b4b63ac133182 Mon Sep 17 00:00:00 2001 From: Samhita Alla Date: Thu, 16 Jan 2025 16:53:29 +0530 Subject: [PATCH 06/10] nit Signed-off-by: Samhita Alla --- .../flytekitplugins/inference/ollama/serve.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py index cb872f5874..4a7aebd77b 100644 --- a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py +++ b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py @@ -147,9 +147,9 @@ def setup_ollama_pod_template(self): client = Client('{self.base_url}') for input_name, input_value in inputs.items(): - if input_name in {self._model_files}: + if {self._model_files} and input_name in {self._model_files}: files[input_name] = client.create_blob(input_value) - if input_name in {self._model_adapters}: + if {self._model_adapters} and input_name in {self._model_adapters}: adapters[input_name] = client.create_blob(input_value) {ollama_service_ready} From dc81441ed1545b930cfbe6fb8e5f3d49b4383e11 Mon Sep 17 00:00:00 2001 From: Samhita Alla Date: Thu, 16 Jan 2025 18:56:31 +0530 Subject: [PATCH 07/10] cleanup Signed-off-by: Samhita Alla --- plugins/flytekit-inference/README.md | 2 - .../flytekitplugins/inference/ollama/serve.py | 37 ++++++++++--------- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/plugins/flytekit-inference/README.md b/plugins/flytekit-inference/README.md index f3113e2526..646200c111 100644 --- a/plugins/flytekit-inference/README.md +++ b/plugins/flytekit-inference/README.md @@ -70,8 +70,6 @@ def model_serving() -> str: ## Ollama -Use Python 3.12 - The Ollama plugin allows you to serve LLMs locally. You can either pull an existing model or create a new one. diff --git a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py index 4a7aebd77b..e00dad13ee 100644 --- a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py +++ b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py @@ -51,6 +51,9 @@ def __init__( ): """Initialize Ollama class for managing a Kubernetes pod template. + Python 3.12 or higher is required due to support for backslashes in f-strings: + https://realpython.com/python312-f-strings/#backslashes-now-allowed-in-f-strings + :param model: An instance of the Model class containing the model's configuration, including its name, memory, CPU, and the modelfile parameters. :param image: The Docker image to be used for the container. Default is "ollama/ollama". :param port: The port number on which the container should expose its service. Default is 11434. @@ -149,23 +152,23 @@ def setup_ollama_pod_template(self): for input_name, input_value in inputs.items(): if {self._model_files} and input_name in {self._model_files}: files[input_name] = client.create_blob(input_value) - if {self._model_adapters} and input_name in {self._model_adapters}: + elif {self._model_adapters} and input_name in {self._model_adapters}: adapters[input_name] = client.create_blob(input_value) {ollama_service_ready} # Debugging: Shows the status of model creation. for chunk in ollama.create( - model={"'" + self._model_name + "'" if self._model_name is not None else None}, - from_={"'" + self._model_from + "'" if self._model_from is not None else None}, + model={"'" + self._model_name + "'" if self._model_name else None}, + from_={"'" + self._model_from + "'" if self._model_from else None}, files=files if files else None, adapters=adapters if adapters else None, - template={"'" + self._model_template.replace("\n", "\\n") + "'" if self._model_template is not None else None}, - license={"'" + self._model_license + "'" if self._model_license is not None else None}, - system={"'" + self._model_system.replace("\n", "\\n") + "'" if self._model_system is not None else None}, - parameters={self._model_parameters if self._model_parameters is not None else None}, - messages={self._model_messages if self._model_messages is not None else None}, - quantize={"'" + self._model_quantize + "'" if self._model_quantize is not None else None}, + template={"'" + self._model_template.replace("\n", "\\n") + "'" if self._model_template else None}, + license={"'" + self._model_license + "'" if self._model_license else None}, + system={"'" + self._model_system.replace("\n", "\\n") + "'" if self._model_system else None}, + parameters={self._model_parameters if self._model_parameters else None}, + messages={self._model_messages if self._model_messages else None}, + quantize={"'" + self._model_quantize + "'" if self._model_quantize else None}, stream=True ): print(chunk) @@ -178,16 +181,16 @@ def setup_ollama_pod_template(self): # Debugging: Shows the status of model creation. for chunk in ollama.create( - model={"'" + self._model_name + "'" if self._model_name is not None else None}, - from_={"'" + self._model_from + "'" if self._model_from is not None else None}, + model={"'" + self._model_name + "'" if self._model_name else None}, + from_={"'" + self._model_from + "'" if self._model_from else None}, files=None, adapters=None, - template={"'" + self._model_template.replace("\n", "\\n") + "'" if self._model_template is not None else None}, - license={"'" + self._model_license + "'" if self._model_license is not None else None}, - system={"'" + self._model_system.replace("\n", "\\n") + "'" if self._model_system is not None else None}, - parameters={self._model_parameters if self._model_parameters is not None else None}, - messages={self._model_messages if self._model_messages is not None else None}, - quantize={"'" + self._model_quantize + "'" if self._model_quantize is not None else None}, + template={"'" + self._model_template.replace("\n", "\\n") + "'" if self._model_template else None}, + license={"'" + self._model_license + "'" if self._model_license else None}, + system={"'" + self._model_system.replace("\n", "\\n") + "'" if self._model_system else None}, + parameters={self._model_parameters if self._model_parameters else None}, + messages={self._model_messages if self._model_messages else None}, + quantize={"'" + self._model_quantize + "'" if self._model_quantize else None}, stream=True ): print(chunk) From 9c2c679aaef5d028069a1f6a5953e3a7339161fe Mon Sep 17 00:00:00 2001 From: Samhita Alla Date: Thu, 16 Jan 2025 19:02:41 +0530 Subject: [PATCH 08/10] update readme Signed-off-by: Samhita Alla --- plugins/flytekit-inference/README.md | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/plugins/flytekit-inference/README.md b/plugins/flytekit-inference/README.md index 646200c111..54bf0a022a 100644 --- a/plugins/flytekit-inference/README.md +++ b/plugins/flytekit-inference/README.md @@ -74,8 +74,6 @@ The Ollama plugin allows you to serve LLMs locally. You can either pull an existing model or create a new one. ```python -from textwrap import dedent - from flytekit import ImageSpec, Resources, task, workflow from flytekitplugins.inference import Ollama, Model from flytekit.extras.accelerators import A10G @@ -91,13 +89,10 @@ image = ImageSpec( ollama_instance = Ollama( model=Model( name="llama3-mario", - modelfile=dedent("""\ - FROM llama3 - ADAPTER {inputs.gguf} - PARAMETER temperature 1 - PARAMETER num_ctx 4096 - SYSTEM You are Mario from super mario bros, acting as an assistant.\ - """), + from_="llama3", + adapters=["gguf"], + parameters={"temperature": 1, "num_ctx": 4096}, + system="You are Mario from super mario bros, acting as an assistant." ) ) From 4c50a609a8a1035c627a5cc0764090e9cab304be Mon Sep 17 00:00:00 2001 From: Samhita Alla Date: Thu, 16 Jan 2025 19:08:04 +0530 Subject: [PATCH 09/10] update docstring Signed-off-by: Samhita Alla --- .../flytekitplugins/inference/ollama/serve.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py index e00dad13ee..ac75a8ee9f 100644 --- a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py +++ b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py @@ -11,7 +11,7 @@ class Model: :param name: The name of the model. :param mem: The amount of memory allocated for the model, specified as a string. Default is "500Mi". :param cpu: The number of CPU cores allocated for the model. Default is 1. - :param from: The name of an existing model to create the new model from. + :param from: The name of an existing model used as a base to create a new custom model. :param files: A list of file names to create the model from. :param adapters: A list of file names to create the model for LORA adapters. :param template: The prompt template for the model. From 4c6a57c2d29430b2b6a9fb40920808bbb935e462 Mon Sep 17 00:00:00 2001 From: Niels Bantilan Date: Thu, 16 Jan 2025 09:47:02 -0500 Subject: [PATCH 10/10] use newline global vars --- .../flytekitplugins/inference/ollama/serve.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py index ac75a8ee9f..edf075a4e0 100644 --- a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py +++ b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py @@ -4,6 +4,10 @@ from ..sidecar_template import ModelInferenceTemplate +NEWLINE = "\n" +NEWLINE_ESCAPED = "\\n" + + @dataclass class Model: """Represents the configuration for a model used in a Kubernetes pod template. @@ -163,9 +167,9 @@ def setup_ollama_pod_template(self): from_={"'" + self._model_from + "'" if self._model_from else None}, files=files if files else None, adapters=adapters if adapters else None, - template={"'" + self._model_template.replace("\n", "\\n") + "'" if self._model_template else None}, + template={"'" + self._model_template.replace(NEWLINE, NEWLINE_ESCAPED) + "'" if self._model_template else None}, license={"'" + self._model_license + "'" if self._model_license else None}, - system={"'" + self._model_system.replace("\n", "\\n") + "'" if self._model_system else None}, + system={"'" + self._model_system.replace(NEWLINE, NEWLINE_ESCAPED) + "'" if self._model_system else None}, parameters={self._model_parameters if self._model_parameters else None}, messages={self._model_messages if self._model_messages else None}, quantize={"'" + self._model_quantize + "'" if self._model_quantize else None}, @@ -185,9 +189,9 @@ def setup_ollama_pod_template(self): from_={"'" + self._model_from + "'" if self._model_from else None}, files=None, adapters=None, - template={"'" + self._model_template.replace("\n", "\\n") + "'" if self._model_template else None}, + template={"'" + self._model_template.replace(NEWLINE, NEWLINE_ESCAPED) + "'" if self._model_template else None}, license={"'" + self._model_license + "'" if self._model_license else None}, - system={"'" + self._model_system.replace("\n", "\\n") + "'" if self._model_system else None}, + system={"'" + self._model_system.replace(NEWLINE, NEWLINE_ESCAPED) + "'" if self._model_system else None}, parameters={self._model_parameters if self._model_parameters else None}, messages={self._model_messages if self._model_messages else None}, quantize={"'" + self._model_quantize + "'" if self._model_quantize else None},