From d0c7124f4b3f53462ae8e609ba101cc13343c8c1 Mon Sep 17 00:00:00 2001
From: Samhita Alla <aallasamhita@gmail.com>
Date: Thu, 16 Jan 2025 15:25:22 +0530
Subject: [PATCH 01/10] update ollama plugin to reflect API changes

Signed-off-by: Samhita Alla <aallasamhita@gmail.com>
---
 .../flytekitplugins/inference/ollama/serve.py | 118 ++++++++++++------
 1 file changed, 83 insertions(+), 35 deletions(-)

diff --git a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
index c8f93c585e..5bf2cc22f7 100644
--- a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
+++ b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
@@ -1,6 +1,5 @@
-import base64
 from dataclasses import dataclass
-from typing import Optional
+from typing import Any, Mapping, Optional, Sequence, Union
 
 from ..sidecar_template import ModelInferenceTemplate
 
@@ -12,13 +11,29 @@ class Model:
     :param name: The name of the model.
     :param mem: The amount of memory allocated for the model, specified as a string. Default is "500Mi".
     :param cpu: The number of CPU cores allocated for the model. Default is 1.
-    :param modelfile: The actual model file as a JSON-serializable string. This represents the file content. Default is `None` if not applicable.
+    :param from: The name of an existing model to create the new model from.
+    :param files: A list of file names to create the model from.
+    :param adapters: A list of file names to create the model for LORA adapters.
+    :param template: The prompt template for the model.
+    :param license: A string or list of strings containing the license or licenses for the model.
+    :param system: A string containing the system prompt for the model.
+    :param parameters: A dictionary of parameters for the model.
+    :param messages: A list of message objects used to create a conversation.
+    :param quantize: Quantize a non-quantized (e.g. float16) model.
     """
 
     name: str
     mem: str = "500Mi"
     cpu: int = 1
-    modelfile: Optional[str] = None
+    from_: Optional[str] = None
+    files: Optional[list[str]] = None
+    adapters: Optional[list[str]] = None
+    template: Optional[str] = None
+    license: Optional[Union[str, list[str]]] = None
+    system: Optional[str] = None
+    parameters: Optional[Mapping[str, Any]] = None
+    messages: Optional[Sequence[Mapping[str, Any]]] = None
+    quantize: Optional[str] = None
 
 
 class Ollama(ModelInferenceTemplate):
@@ -36,7 +51,7 @@ def __init__(
     ):
         """Initialize Ollama class for managing a Kubernetes pod template.
 
-        :param model: An instance of the Model class containing the model's configuration, including its name, memory, CPU, and file.
+        :param model: An instance of the Model class containing the model's configuration, including its name, memory, CPU, and the modelfile parameters.
         :param image: The Docker image to be used for the container. Default is "ollama/ollama".
         :param port: The port number on which the container should expose its service. Default is 11434.
         :param cpu: The number of CPU cores requested for the container. Default is 1.
@@ -48,7 +63,15 @@ def __init__(
         self._model_name = model.name
         self._model_mem = model.mem
         self._model_cpu = model.cpu
-        self._model_modelfile = model.modelfile
+        self._model_from = model.from_
+        self._model_files = model.files
+        self._model_adapters = model.adapters
+        self._model_template = model.template
+        self._model_license = model.license
+        self._model_system = model.system
+        self._model_parameters = model.parameters
+        self._model_messages = model.messages
+        self._model_quantize = model.quantize
 
         super().__init__(
             image=image,
@@ -58,7 +81,7 @@ def __init__(
             mem=mem,
             download_inputs_mem=download_inputs_mem,
             download_inputs_cpu=download_inputs_cpu,
-            download_inputs=(True if self._model_modelfile and "{inputs" in self._model_modelfile else False),
+            download_inputs=bool(self._model_adapters or self._model_files),
         )
 
         self.setup_ollama_pod_template()
@@ -71,7 +94,19 @@ def setup_ollama_pod_template(self):
             V1VolumeMount,
         )
 
-        container_name = "create-model" if self._model_modelfile else "pull-model"
+        custom_model = any(
+            [
+                self._model_files,
+                self._model_adapters,
+                self._model_template,
+                self._model_license,
+                self._model_system,
+                self._model_parameters,
+                self._model_messages,
+                self._model_quantize,
+            ]
+        )
+        container_name = "create-model" if custom_model else "pull-model"
 
         base_code = """
 import base64
@@ -97,53 +132,64 @@ def setup_ollama_pod_template(self):
     print('Ollama service did not become ready in time')
     exit(1)
 """
-        if self._model_modelfile:
-            encoded_modelfile = base64.b64encode(self._model_modelfile.encode("utf-8")).decode("utf-8")
-
-            if "{inputs" in self._model_modelfile:
+        if custom_model:
+            if self._model_files or self._model_adapters:
                 python_code = f"""
 {base_code}
 import json
+from ollama._client import Client
 
 with open('/shared/inputs.json', 'r') as f:
     inputs = json.load(f)
 
-class AttrDict(dict):
-    def __init__(self, *args, **kwargs):
-        super(AttrDict, self).__init__(*args, **kwargs)
-        self.__dict__ = self
-
-inputs = {{'inputs': AttrDict(inputs)}}
-
-encoded_model_file = '{encoded_modelfile}'
+files = {{}}
+adapters = {{}}
+client = Client('{self.base_url}')
 
-modelfile = base64.b64decode(encoded_model_file).decode('utf-8').format(**inputs)
-modelfile = modelfile.replace('{{', '{{{{').replace('}}', '}}}}')
-
-with open('Modelfile', 'w') as f:
-    f.write(modelfile)
+for input_name, input_value in inputs.items():
+    if input_name in self._model_files:
+        files[input_name] = client.create_blob(input_value)
+    if input_name in self._model_adapters:
+        adapters[input_name] = client.create_blob(input_value)
 
 {ollama_service_ready}
 
 # Debugging: Shows the status of model creation.
-for chunk in ollama.create(model='{self._model_name}', path='Modelfile', stream=True):
+for chunk in ollama.create(
+    model='{self._model_name}',
+    from='{self._model_from}',
+    files=files if files else None,
+    adapters=adapters if adapters else None,
+    template='{self._model_template}',
+    license='{self._model_license}',
+    system='{self._model_system}',
+    parameters={self._model_parameters},
+    messages={self._model_messages},
+    quantize='{self._model_quantize}',
+    stream=True
+):
     print(chunk)
 """
             else:
                 python_code = f"""
 {base_code}
 
-encoded_model_file = '{encoded_modelfile}'
-
-modelfile = base64.b64decode(encoded_model_file).decode('utf-8')
-
-with open('Modelfile', 'w') as f:
-    f.write(modelfile)
-
 {ollama_service_ready}
 
 # Debugging: Shows the status of model creation.
-for chunk in ollama.create(model='{self._model_name}', path='Modelfile', stream=True):
+for chunk in ollama.create(
+    model='{self._model_name}',
+    from='{self._model_from}',
+    files=None,
+    adapters=None,
+    template='{self._model_template}',
+    license='{self._model_license}',
+    system='{self._model_system}',
+    parameters={self._model_parameters},
+    messages={self._model_messages},
+    quantize='{self._model_quantize}',
+    stream=True
+):
     print(chunk)
 """
         else:
@@ -164,7 +210,9 @@ def __init__(self, *args, **kwargs):
                 name=container_name,
                 image="python:3.11-slim",
                 command=["/bin/sh", "-c"],
-                args=[f"pip install requests && pip install ollama==0.3.3 && {command}"],
+                args=[
+                    f"pip install requests && pip install git+https://github.com/ollama/ollama-python.git@eefe5c9666e2fa82ab17618155dd0aae47bba8fa && {command}"
+                ],
                 resources=V1ResourceRequirements(
                     requests={
                         "cpu": self._model_cpu,

From 1510fa98615b1328eb0f416512d49de217c22851 Mon Sep 17 00:00:00 2001
From: Samhita Alla <aallasamhita@gmail.com>
Date: Thu, 16 Jan 2025 15:47:24 +0530
Subject: [PATCH 02/10] add git

Signed-off-by: Samhita Alla <aallasamhita@gmail.com>
---
 .../flytekit-inference/flytekitplugins/inference/ollama/serve.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
index 5bf2cc22f7..14855f7dae 100644
--- a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
+++ b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
@@ -211,6 +211,7 @@ def setup_ollama_pod_template(self):
                 image="python:3.11-slim",
                 command=["/bin/sh", "-c"],
                 args=[
+                    "apt-get update && apt-get install -y git && "
                     f"pip install requests && pip install git+https://github.com/ollama/ollama-python.git@eefe5c9666e2fa82ab17618155dd0aae47bba8fa && {command}"
                 ],
                 resources=V1ResourceRequirements(

From 209f64518bc148345f4ed97424f6f6961a51d546 Mon Sep 17 00:00:00 2001
From: Samhita Alla <aallasamhita@gmail.com>
Date: Thu, 16 Jan 2025 16:30:01 +0530
Subject: [PATCH 03/10] update fstrings

Signed-off-by: Samhita Alla <aallasamhita@gmail.com>
---
 .../flytekitplugins/inference/ollama/serve.py | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
index 14855f7dae..68fa2bcf74 100644
--- a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
+++ b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
@@ -156,16 +156,16 @@ def setup_ollama_pod_template(self):
 
 # Debugging: Shows the status of model creation.
 for chunk in ollama.create(
-    model='{self._model_name}',
-    from='{self._model_from}',
+    model={"\'" + self._model_name + "\'" if self._model_name is not None else None},
+    from_={"\'" + self._model_from + "\'" if self._model_from is not None else None},
     files=files if files else None,
     adapters=adapters if adapters else None,
-    template='{self._model_template}',
-    license='{self._model_license}',
-    system='{self._model_system}',
-    parameters={self._model_parameters},
-    messages={self._model_messages},
-    quantize='{self._model_quantize}',
+    template={"\'" + self._model_template.replace("\n", "\\n") + "\'" if self._model_template is not None else None},
+    license={"\'" + self._model_license + "\'" if self._model_license is not None else None},
+    system={"\'" + self._model_system.replace("\n", "\\n") + "\'" if self._model_system is not None else None},
+    parameters={self._model_parameters if self._model_parameters is not None else None},
+    messages={self._model_messages if self._model_messages is not None else None},
+    quantize={"\'" + self._model_quantize + "\'" if self._model_quantize is not None else None},
     stream=True
 ):
     print(chunk)
@@ -178,16 +178,16 @@ def setup_ollama_pod_template(self):
 
 # Debugging: Shows the status of model creation.
 for chunk in ollama.create(
-    model='{self._model_name}',
-    from='{self._model_from}',
+    model={"\'" + self._model_name + "\'" if self._model_name is not None else None},
+    from_={"\'" + self._model_from + "\'" if self._model_from is not None else None},
     files=None,
     adapters=None,
-    template='{self._model_template}',
-    license='{self._model_license}',
-    system='{self._model_system}',
-    parameters={self._model_parameters},
-    messages={self._model_messages},
-    quantize='{self._model_quantize}',
+    template={"\'" + self._model_template.replace("\n", "\\n") + "\'" if self._model_template is not None else None},
+    license={"\'" + self._model_license + "\'" if self._model_license is not None else None},
+    system={"\'" + self._model_system.replace("\n", "\\n") + "\'" if self._model_system is not None else None},
+    parameters={self._model_parameters if self._model_parameters is not None else None},
+    messages={self._model_messages if self._model_messages is not None else None},
+    quantize={"\'" + self._model_quantize + "\'" if self._model_quantize is not None else None},
     stream=True
 ):
     print(chunk)

From a8aca1f3274fc7dc95bda80c9ec57625d226a695 Mon Sep 17 00:00:00 2001
From: Samhita Alla <aallasamhita@gmail.com>
Date: Thu, 16 Jan 2025 16:39:22 +0530
Subject: [PATCH 04/10] quotes

Signed-off-by: Samhita Alla <aallasamhita@gmail.com>
---
 .../flytekitplugins/inference/ollama/serve.py | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
index 68fa2bcf74..403caefd73 100644
--- a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
+++ b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
@@ -156,16 +156,16 @@ def setup_ollama_pod_template(self):
 
 # Debugging: Shows the status of model creation.
 for chunk in ollama.create(
-    model={"\'" + self._model_name + "\'" if self._model_name is not None else None},
-    from_={"\'" + self._model_from + "\'" if self._model_from is not None else None},
+    model={"'" + self._model_name + "'" if self._model_name is not None else None},
+    from_={"'" + self._model_from + "'" if self._model_from is not None else None},
     files=files if files else None,
     adapters=adapters if adapters else None,
-    template={"\'" + self._model_template.replace("\n", "\\n") + "\'" if self._model_template is not None else None},
-    license={"\'" + self._model_license + "\'" if self._model_license is not None else None},
-    system={"\'" + self._model_system.replace("\n", "\\n") + "\'" if self._model_system is not None else None},
+    template={"'" + self._model_template.replace("\n", "\\n") + "'" if self._model_template is not None else None},
+    license={"'" + self._model_license + "'" if self._model_license is not None else None},
+    system={"'" + self._model_system.replace("\n", "\\n") + "'" if self._model_system is not None else None},
     parameters={self._model_parameters if self._model_parameters is not None else None},
     messages={self._model_messages if self._model_messages is not None else None},
-    quantize={"\'" + self._model_quantize + "\'" if self._model_quantize is not None else None},
+    quantize={"'" + self._model_quantize + "'" if self._model_quantize is not None else None},
     stream=True
 ):
     print(chunk)
@@ -178,16 +178,16 @@ def setup_ollama_pod_template(self):
 
 # Debugging: Shows the status of model creation.
 for chunk in ollama.create(
-    model={"\'" + self._model_name + "\'" if self._model_name is not None else None},
-    from_={"\'" + self._model_from + "\'" if self._model_from is not None else None},
+    model={"'" + self._model_name + "'" if self._model_name is not None else None},
+    from_={"'" + self._model_from + "'" if self._model_from is not None else None},
     files=None,
     adapters=None,
-    template={"\'" + self._model_template.replace("\n", "\\n") + "\'" if self._model_template is not None else None},
-    license={"\'" + self._model_license + "\'" if self._model_license is not None else None},
-    system={"\'" + self._model_system.replace("\n", "\\n") + "\'" if self._model_system is not None else None},
+    template={"'" + self._model_template.replace("\n", "\\n") + "'" if self._model_template is not None else None},
+    license={"'" + self._model_license + "'" if self._model_license is not None else None},
+    system={"'" + self._model_system.replace("\n", "\\n") + "'" if self._model_system is not None else None},
     parameters={self._model_parameters if self._model_parameters is not None else None},
     messages={self._model_messages if self._model_messages is not None else None},
-    quantize={"\'" + self._model_quantize + "\'" if self._model_quantize is not None else None},
+    quantize={"'" + self._model_quantize + "'" if self._model_quantize is not None else None},
     stream=True
 ):
     print(chunk)

From bad9ec15a56456f509d9d84ba44a51a5a90b478b Mon Sep 17 00:00:00 2001
From: Samhita Alla <aallasamhita@gmail.com>
Date: Thu, 16 Jan 2025 16:50:52 +0530
Subject: [PATCH 05/10] self

Signed-off-by: Samhita Alla <aallasamhita@gmail.com>
---
 plugins/flytekit-inference/README.md                          | 2 ++
 .../flytekitplugins/inference/ollama/serve.py                 | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/plugins/flytekit-inference/README.md b/plugins/flytekit-inference/README.md
index 646200c111..f3113e2526 100644
--- a/plugins/flytekit-inference/README.md
+++ b/plugins/flytekit-inference/README.md
@@ -70,6 +70,8 @@ def model_serving() -> str:
 
 ## Ollama
 
+Use Python 3.12
+
 The Ollama plugin allows you to serve LLMs locally.
 You can either pull an existing model or create a new one.
 
diff --git a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
index 403caefd73..cb872f5874 100644
--- a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
+++ b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
@@ -147,9 +147,9 @@ def setup_ollama_pod_template(self):
 client = Client('{self.base_url}')
 
 for input_name, input_value in inputs.items():
-    if input_name in self._model_files:
+    if input_name in {self._model_files}:
         files[input_name] = client.create_blob(input_value)
-    if input_name in self._model_adapters:
+    if input_name in {self._model_adapters}:
         adapters[input_name] = client.create_blob(input_value)
 
 {ollama_service_ready}

From 2a5b51c3cd58b6b20a186ae98c8b4b63ac133182 Mon Sep 17 00:00:00 2001
From: Samhita Alla <aallasamhita@gmail.com>
Date: Thu, 16 Jan 2025 16:53:29 +0530
Subject: [PATCH 06/10] nit

Signed-off-by: Samhita Alla <aallasamhita@gmail.com>
---
 .../flytekitplugins/inference/ollama/serve.py                 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
index cb872f5874..4a7aebd77b 100644
--- a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
+++ b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
@@ -147,9 +147,9 @@ def setup_ollama_pod_template(self):
 client = Client('{self.base_url}')
 
 for input_name, input_value in inputs.items():
-    if input_name in {self._model_files}:
+    if {self._model_files} and input_name in {self._model_files}:
         files[input_name] = client.create_blob(input_value)
-    if input_name in {self._model_adapters}:
+    if {self._model_adapters} and input_name in {self._model_adapters}:
         adapters[input_name] = client.create_blob(input_value)
 
 {ollama_service_ready}

From dc81441ed1545b930cfbe6fb8e5f3d49b4383e11 Mon Sep 17 00:00:00 2001
From: Samhita Alla <aallasamhita@gmail.com>
Date: Thu, 16 Jan 2025 18:56:31 +0530
Subject: [PATCH 07/10] cleanup

Signed-off-by: Samhita Alla <aallasamhita@gmail.com>
---
 plugins/flytekit-inference/README.md          |  2 -
 .../flytekitplugins/inference/ollama/serve.py | 37 ++++++++++---------
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/plugins/flytekit-inference/README.md b/plugins/flytekit-inference/README.md
index f3113e2526..646200c111 100644
--- a/plugins/flytekit-inference/README.md
+++ b/plugins/flytekit-inference/README.md
@@ -70,8 +70,6 @@ def model_serving() -> str:
 
 ## Ollama
 
-Use Python 3.12
-
 The Ollama plugin allows you to serve LLMs locally.
 You can either pull an existing model or create a new one.
 
diff --git a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
index 4a7aebd77b..e00dad13ee 100644
--- a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
+++ b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
@@ -51,6 +51,9 @@ def __init__(
     ):
         """Initialize Ollama class for managing a Kubernetes pod template.
 
+        Python 3.12 or higher is required due to support for backslashes in f-strings:
+        https://realpython.com/python312-f-strings/#backslashes-now-allowed-in-f-strings
+
         :param model: An instance of the Model class containing the model's configuration, including its name, memory, CPU, and the modelfile parameters.
         :param image: The Docker image to be used for the container. Default is "ollama/ollama".
         :param port: The port number on which the container should expose its service. Default is 11434.
@@ -149,23 +152,23 @@ def setup_ollama_pod_template(self):
 for input_name, input_value in inputs.items():
     if {self._model_files} and input_name in {self._model_files}:
         files[input_name] = client.create_blob(input_value)
-    if {self._model_adapters} and input_name in {self._model_adapters}:
+    elif {self._model_adapters} and input_name in {self._model_adapters}:
         adapters[input_name] = client.create_blob(input_value)
 
 {ollama_service_ready}
 
 # Debugging: Shows the status of model creation.
 for chunk in ollama.create(
-    model={"'" + self._model_name + "'" if self._model_name is not None else None},
-    from_={"'" + self._model_from + "'" if self._model_from is not None else None},
+    model={"'" + self._model_name + "'" if self._model_name else None},
+    from_={"'" + self._model_from + "'" if self._model_from else None},
     files=files if files else None,
     adapters=adapters if adapters else None,
-    template={"'" + self._model_template.replace("\n", "\\n") + "'" if self._model_template is not None else None},
-    license={"'" + self._model_license + "'" if self._model_license is not None else None},
-    system={"'" + self._model_system.replace("\n", "\\n") + "'" if self._model_system is not None else None},
-    parameters={self._model_parameters if self._model_parameters is not None else None},
-    messages={self._model_messages if self._model_messages is not None else None},
-    quantize={"'" + self._model_quantize + "'" if self._model_quantize is not None else None},
+    template={"'" + self._model_template.replace("\n", "\\n") + "'" if self._model_template else None},
+    license={"'" + self._model_license + "'" if self._model_license else None},
+    system={"'" + self._model_system.replace("\n", "\\n") + "'" if self._model_system else None},
+    parameters={self._model_parameters if self._model_parameters else None},
+    messages={self._model_messages if self._model_messages else None},
+    quantize={"'" + self._model_quantize + "'" if self._model_quantize else None},
     stream=True
 ):
     print(chunk)
@@ -178,16 +181,16 @@ def setup_ollama_pod_template(self):
 
 # Debugging: Shows the status of model creation.
 for chunk in ollama.create(
-    model={"'" + self._model_name + "'" if self._model_name is not None else None},
-    from_={"'" + self._model_from + "'" if self._model_from is not None else None},
+    model={"'" + self._model_name + "'" if self._model_name else None},
+    from_={"'" + self._model_from + "'" if self._model_from else None},
     files=None,
     adapters=None,
-    template={"'" + self._model_template.replace("\n", "\\n") + "'" if self._model_template is not None else None},
-    license={"'" + self._model_license + "'" if self._model_license is not None else None},
-    system={"'" + self._model_system.replace("\n", "\\n") + "'" if self._model_system is not None else None},
-    parameters={self._model_parameters if self._model_parameters is not None else None},
-    messages={self._model_messages if self._model_messages is not None else None},
-    quantize={"'" + self._model_quantize + "'" if self._model_quantize is not None else None},
+    template={"'" + self._model_template.replace("\n", "\\n") + "'" if self._model_template else None},
+    license={"'" + self._model_license + "'" if self._model_license else None},
+    system={"'" + self._model_system.replace("\n", "\\n") + "'" if self._model_system else None},
+    parameters={self._model_parameters if self._model_parameters else None},
+    messages={self._model_messages if self._model_messages else None},
+    quantize={"'" + self._model_quantize + "'" if self._model_quantize else None},
     stream=True
 ):
     print(chunk)

From 9c2c679aaef5d028069a1f6a5953e3a7339161fe Mon Sep 17 00:00:00 2001
From: Samhita Alla <aallasamhita@gmail.com>
Date: Thu, 16 Jan 2025 19:02:41 +0530
Subject: [PATCH 08/10] update readme

Signed-off-by: Samhita Alla <aallasamhita@gmail.com>
---
 plugins/flytekit-inference/README.md | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/plugins/flytekit-inference/README.md b/plugins/flytekit-inference/README.md
index 646200c111..54bf0a022a 100644
--- a/plugins/flytekit-inference/README.md
+++ b/plugins/flytekit-inference/README.md
@@ -74,8 +74,6 @@ The Ollama plugin allows you to serve LLMs locally.
 You can either pull an existing model or create a new one.
 
 ```python
-from textwrap import dedent
-
 from flytekit import ImageSpec, Resources, task, workflow
 from flytekitplugins.inference import Ollama, Model
 from flytekit.extras.accelerators import A10G
@@ -91,13 +89,10 @@ image = ImageSpec(
 ollama_instance = Ollama(
     model=Model(
         name="llama3-mario",
-        modelfile=dedent("""\
-        FROM llama3
-        ADAPTER {inputs.gguf}
-        PARAMETER temperature 1
-        PARAMETER num_ctx 4096
-        SYSTEM You are Mario from super mario bros, acting as an assistant.\
-        """),
+        from_="llama3",
+        adapters=["gguf"],
+        parameters={"temperature": 1, "num_ctx": 4096},
+        system="You are Mario from super mario bros, acting as an assistant."
     )
 )
 

From 4c50a609a8a1035c627a5cc0764090e9cab304be Mon Sep 17 00:00:00 2001
From: Samhita Alla <aallasamhita@gmail.com>
Date: Thu, 16 Jan 2025 19:08:04 +0530
Subject: [PATCH 09/10] update docstring

Signed-off-by: Samhita Alla <aallasamhita@gmail.com>
---
 .../flytekitplugins/inference/ollama/serve.py                   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
index e00dad13ee..ac75a8ee9f 100644
--- a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
+++ b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
@@ -11,7 +11,7 @@ class Model:
     :param name: The name of the model.
     :param mem: The amount of memory allocated for the model, specified as a string. Default is "500Mi".
     :param cpu: The number of CPU cores allocated for the model. Default is 1.
-    :param from: The name of an existing model to create the new model from.
+    :param from: The name of an existing model used as a base to create a new custom model.
     :param files: A list of file names to create the model from.
     :param adapters: A list of file names to create the model for LORA adapters.
     :param template: The prompt template for the model.

From 4c6a57c2d29430b2b6a9fb40920808bbb935e462 Mon Sep 17 00:00:00 2001
From: Niels Bantilan <niels.bantilan@gmail.com>
Date: Thu, 16 Jan 2025 09:47:02 -0500
Subject: [PATCH 10/10] use newline global vars

---
 .../flytekitplugins/inference/ollama/serve.py        | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
index ac75a8ee9f..edf075a4e0 100644
--- a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
+++ b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
@@ -4,6 +4,10 @@
 from ..sidecar_template import ModelInferenceTemplate
 
 
+NEWLINE = "\n"
+NEWLINE_ESCAPED = "\\n"
+
+
 @dataclass
 class Model:
     """Represents the configuration for a model used in a Kubernetes pod template.
@@ -163,9 +167,9 @@ def setup_ollama_pod_template(self):
     from_={"'" + self._model_from + "'" if self._model_from else None},
     files=files if files else None,
     adapters=adapters if adapters else None,
-    template={"'" + self._model_template.replace("\n", "\\n") + "'" if self._model_template else None},
+    template={"'" + self._model_template.replace(NEWLINE, NEWLINE_ESCAPED) + "'" if self._model_template else None},
     license={"'" + self._model_license + "'" if self._model_license else None},
-    system={"'" + self._model_system.replace("\n", "\\n") + "'" if self._model_system else None},
+    system={"'" + self._model_system.replace(NEWLINE, NEWLINE_ESCAPED) + "'" if self._model_system else None},
     parameters={self._model_parameters if self._model_parameters else None},
     messages={self._model_messages if self._model_messages else None},
     quantize={"'" + self._model_quantize + "'" if self._model_quantize else None},
@@ -185,9 +189,9 @@ def setup_ollama_pod_template(self):
     from_={"'" + self._model_from + "'" if self._model_from else None},
     files=None,
     adapters=None,
-    template={"'" + self._model_template.replace("\n", "\\n") + "'" if self._model_template else None},
+    template={"'" + self._model_template.replace(NEWLINE, NEWLINE_ESCAPED) + "'" if self._model_template else None},
     license={"'" + self._model_license + "'" if self._model_license else None},
-    system={"'" + self._model_system.replace("\n", "\\n") + "'" if self._model_system else None},
+    system={"'" + self._model_system.replace(NEWLINE, NEWLINE_ESCAPED) + "'" if self._model_system else None},
     parameters={self._model_parameters if self._model_parameters else None},
     messages={self._model_messages if self._model_messages else None},
     quantize={"'" + self._model_quantize + "'" if self._model_quantize else None},