From dc646f73b4523a113e4433cc12ce60f727d0be1e Mon Sep 17 00:00:00 2001
From: Haijian Wang <130898843+Haijian06@users.noreply.github.com>
Date: Wed, 18 Sep 2024 15:14:46 +0800
Subject: [PATCH 01/15] Add files via upload

---
 llm/yi/qwen2-7b.yaml | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 llm/yi/qwen2-7b.yaml

diff --git a/llm/yi/qwen2-7b.yaml b/llm/yi/qwen2-7b.yaml
new file mode 100644
index 00000000000..ccf8d62d306
--- /dev/null
+++ b/llm/yi/qwen2-7b.yaml
@@ -0,0 +1,33 @@
+envs:
+  MODEL_NAME: Qwen/Qwen2-7B-Instruct
+
+service:
+  # Specifying the path to the endpoint to check the readiness of the replicas.
+  readiness_probe:
+    path: /v1/chat/completions
+    post_data:
+      model: $MODEL_NAME
+      messages:
+        - role: user
+          content: Hello! What is your name?
+      max_tokens: 1
+    initial_delay_seconds: 1200
+  # How many replicas to manage.
+  replicas: 2
+  
+
+resources:
+  accelerators: {L4, A10g, A10, L40, A40, A100, A100-80GB}
+  disk_tier: best
+  ports: 8000
+
+setup: |
+  pip install vllm==0.6.1.post2
+  pip install vllm-flash-attn
+
+run: |
+  export PATH=$PATH:/sbin
+  vllm serve $MODEL_NAME \
+    --host 0.0.0.0 \
+    --tensor-parallel-size $SKYPILOT_NUM_GPUS_PER_NODE \
+    --max-model-len 1024 | tee ~/openai_api_server.log

From 1a7fc1345bb63e1e029d1b9eddec2982d883ca58 Mon Sep 17 00:00:00 2001
From: Haijian Wang <130898843+Haijian06@users.noreply.github.com>
Date: Wed, 18 Sep 2024 15:18:30 +0800
Subject: [PATCH 02/15] Update and rename qwen2-7b.yaml to yi15-6b.yaml

---
 llm/yi/{qwen2-7b.yaml => yi15-6b.yaml} | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
 rename llm/yi/{qwen2-7b.yaml => yi15-6b.yaml} (89%)

diff --git a/llm/yi/qwen2-7b.yaml b/llm/yi/yi15-6b.yaml
similarity index 89%
rename from llm/yi/qwen2-7b.yaml
rename to llm/yi/yi15-6b.yaml
index ccf8d62d306..be3f380ff1b 100644
--- a/llm/yi/qwen2-7b.yaml
+++ b/llm/yi/yi15-6b.yaml
@@ -1,5 +1,5 @@
 envs:
-  MODEL_NAME: Qwen/Qwen2-7B-Instruct
+  MODEL_NAME: 01-ai/Yi-1.5-6B-Chat
 
 service:
   # Specifying the path to the endpoint to check the readiness of the replicas.
@@ -9,7 +9,7 @@ service:
       model: $MODEL_NAME
       messages:
         - role: user
-          content: Hello! What is your name?
+          content: Hi! What is your name?
       max_tokens: 1
     initial_delay_seconds: 1200
   # How many replicas to manage.

From 14feaf7b8ef3138af584d5c863cbd1cb55125f46 Mon Sep 17 00:00:00 2001
From: Haijian Wang <130898843+Haijian06@users.noreply.github.com>
Date: Wed, 18 Sep 2024 15:20:41 +0800
Subject: [PATCH 03/15] Add files via upload

---
 llm/yi/yi15-34b.yaml | 33 +++++++++++++++++++++++++++++++++
 llm/yi/yi15-9b.yaml  | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+)
 create mode 100644 llm/yi/yi15-34b.yaml
 create mode 100644 llm/yi/yi15-9b.yaml

diff --git a/llm/yi/yi15-34b.yaml b/llm/yi/yi15-34b.yaml
new file mode 100644
index 00000000000..be3f380ff1b
--- /dev/null
+++ b/llm/yi/yi15-34b.yaml
@@ -0,0 +1,33 @@
+envs:
+  MODEL_NAME: 01-ai/Yi-1.5-6B-Chat
+
+service:
+  # Specifying the path to the endpoint to check the readiness of the replicas.
+  readiness_probe:
+    path: /v1/chat/completions
+    post_data:
+      model: $MODEL_NAME
+      messages:
+        - role: user
+          content: Hi! What is your name?
+      max_tokens: 1
+    initial_delay_seconds: 1200
+  # How many replicas to manage.
+  replicas: 2
+  
+
+resources:
+  accelerators: {L4, A10g, A10, L40, A40, A100, A100-80GB}
+  disk_tier: best
+  ports: 8000
+
+setup: |
+  pip install vllm==0.6.1.post2
+  pip install vllm-flash-attn
+
+run: |
+  export PATH=$PATH:/sbin
+  vllm serve $MODEL_NAME \
+    --host 0.0.0.0 \
+    --tensor-parallel-size $SKYPILOT_NUM_GPUS_PER_NODE \
+    --max-model-len 1024 | tee ~/openai_api_server.log
diff --git a/llm/yi/yi15-9b.yaml b/llm/yi/yi15-9b.yaml
new file mode 100644
index 00000000000..be3f380ff1b
--- /dev/null
+++ b/llm/yi/yi15-9b.yaml
@@ -0,0 +1,33 @@
+envs:
+  MODEL_NAME: 01-ai/Yi-1.5-6B-Chat
+
+service:
+  # Specifying the path to the endpoint to check the readiness of the replicas.
+  readiness_probe:
+    path: /v1/chat/completions
+    post_data:
+      model: $MODEL_NAME
+      messages:
+        - role: user
+          content: Hi! What is your name?
+      max_tokens: 1
+    initial_delay_seconds: 1200
+  # How many replicas to manage.
+  replicas: 2
+  
+
+resources:
+  accelerators: {L4, A10g, A10, L40, A40, A100, A100-80GB}
+  disk_tier: best
+  ports: 8000
+
+setup: |
+  pip install vllm==0.6.1.post2
+  pip install vllm-flash-attn
+
+run: |
+  export PATH=$PATH:/sbin
+  vllm serve $MODEL_NAME \
+    --host 0.0.0.0 \
+    --tensor-parallel-size $SKYPILOT_NUM_GPUS_PER_NODE \
+    --max-model-len 1024 | tee ~/openai_api_server.log

From 4dc2a9182762b7f2cb8ebef75a963ab2b42e9381 Mon Sep 17 00:00:00 2001
From: Haijian Wang <130898843+Haijian06@users.noreply.github.com>
Date: Wed, 18 Sep 2024 15:22:37 +0800
Subject: [PATCH 04/15] Update yi15-9b.yaml

---
 llm/yi/yi15-9b.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llm/yi/yi15-9b.yaml b/llm/yi/yi15-9b.yaml
index be3f380ff1b..cb8901b5dfc 100644
--- a/llm/yi/yi15-9b.yaml
+++ b/llm/yi/yi15-9b.yaml
@@ -1,5 +1,5 @@
 envs:
-  MODEL_NAME: 01-ai/Yi-1.5-6B-Chat
+  MODEL_NAME: 01-ai/Yi-1.5-9B-Chat
 
 service:
   # Specifying the path to the endpoint to check the readiness of the replicas.
@@ -9,7 +9,7 @@ service:
       model: $MODEL_NAME
       messages:
         - role: user
-          content: Hi! What is your name?
+          content: Hello! What is your name?
       max_tokens: 1
     initial_delay_seconds: 1200
   # How many replicas to manage.
@@ -17,7 +17,7 @@ service:
   
 
 resources:
-  accelerators: {L4, A10g, A10, L40, A40, A100, A100-80GB}
+  accelerators: {L4:8, A10g:8, A10:8, A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8}
   disk_tier: best
   ports: 8000
 

From a1b68bcdca4e61e583518446ab32ac5aba77f078 Mon Sep 17 00:00:00 2001
From: Haijian Wang <130898843+Haijian06@users.noreply.github.com>
Date: Wed, 18 Sep 2024 15:24:51 +0800
Subject: [PATCH 05/15] Update yi15-34b.yaml

---
 llm/yi/yi15-34b.yaml | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/llm/yi/yi15-34b.yaml b/llm/yi/yi15-34b.yaml
index be3f380ff1b..e244cb0335d 100644
--- a/llm/yi/yi15-34b.yaml
+++ b/llm/yi/yi15-34b.yaml
@@ -1,5 +1,5 @@
 envs:
-  MODEL_NAME: 01-ai/Yi-1.5-6B-Chat
+  MODEL_NAME: 01-ai/Yi-1.5-34B-Chat
 
 service:
   # Specifying the path to the endpoint to check the readiness of the replicas.
@@ -9,7 +9,7 @@ service:
       model: $MODEL_NAME
       messages:
         - role: user
-          content: Hi! What is your name?
+          content: Hello! What is your name?
       max_tokens: 1
     initial_delay_seconds: 1200
   # How many replicas to manage.
@@ -17,8 +17,10 @@ service:
   
 
 resources:
-  accelerators: {L4, A10g, A10, L40, A40, A100, A100-80GB}
+  accelerators: {A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8}
+  disk_size: 1024
   disk_tier: best
+  memory: 32+
   ports: 8000
 
 setup: |

From 60cd1600991ebedf40f0b87840d5888bf5fc181f Mon Sep 17 00:00:00 2001
From: Haijian Wang <130898843+Haijian06@users.noreply.github.com>
Date: Wed, 18 Sep 2024 15:25:10 +0800
Subject: [PATCH 06/15] Update yi15-6b.yaml

---
 llm/yi/yi15-6b.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llm/yi/yi15-6b.yaml b/llm/yi/yi15-6b.yaml
index be3f380ff1b..42ed8897035 100644
--- a/llm/yi/yi15-6b.yaml
+++ b/llm/yi/yi15-6b.yaml
@@ -9,7 +9,7 @@ service:
       model: $MODEL_NAME
       messages:
         - role: user
-          content: Hi! What is your name?
+          content: Hello! What is your name?
       max_tokens: 1
     initial_delay_seconds: 1200
   # How many replicas to manage.

From 9be9bd9e4cdb1db6e340d375e94c4323a7725d00 Mon Sep 17 00:00:00 2001
From: Haijian Wang <130898843+Haijian06@users.noreply.github.com>
Date: Wed, 18 Sep 2024 15:28:21 +0800
Subject: [PATCH 07/15] Add files via upload

---
 llm/yi/yicoder-1_5b.yaml | 33 +++++++++++++++++++++++++++++++++
 llm/yi/yicoder-9b.yaml   | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+)
 create mode 100644 llm/yi/yicoder-1_5b.yaml
 create mode 100644 llm/yi/yicoder-9b.yaml

diff --git a/llm/yi/yicoder-1_5b.yaml b/llm/yi/yicoder-1_5b.yaml
new file mode 100644
index 00000000000..42ed8897035
--- /dev/null
+++ b/llm/yi/yicoder-1_5b.yaml
@@ -0,0 +1,33 @@
+envs:
+  MODEL_NAME: 01-ai/Yi-1.5-6B-Chat
+
+service:
+  # Specifying the path to the endpoint to check the readiness of the replicas.
+  readiness_probe:
+    path: /v1/chat/completions
+    post_data:
+      model: $MODEL_NAME
+      messages:
+        - role: user
+          content: Hello! What is your name?
+      max_tokens: 1
+    initial_delay_seconds: 1200
+  # How many replicas to manage.
+  replicas: 2
+  
+
+resources:
+  accelerators: {L4, A10g, A10, L40, A40, A100, A100-80GB}
+  disk_tier: best
+  ports: 8000
+
+setup: |
+  pip install vllm==0.6.1.post2
+  pip install vllm-flash-attn
+
+run: |
+  export PATH=$PATH:/sbin
+  vllm serve $MODEL_NAME \
+    --host 0.0.0.0 \
+    --tensor-parallel-size $SKYPILOT_NUM_GPUS_PER_NODE \
+    --max-model-len 1024 | tee ~/openai_api_server.log
diff --git a/llm/yi/yicoder-9b.yaml b/llm/yi/yicoder-9b.yaml
new file mode 100644
index 00000000000..42ed8897035
--- /dev/null
+++ b/llm/yi/yicoder-9b.yaml
@@ -0,0 +1,33 @@
+envs:
+  MODEL_NAME: 01-ai/Yi-1.5-6B-Chat
+
+service:
+  # Specifying the path to the endpoint to check the readiness of the replicas.
+  readiness_probe:
+    path: /v1/chat/completions
+    post_data:
+      model: $MODEL_NAME
+      messages:
+        - role: user
+          content: Hello! What is your name?
+      max_tokens: 1
+    initial_delay_seconds: 1200
+  # How many replicas to manage.
+  replicas: 2
+  
+
+resources:
+  accelerators: {L4, A10g, A10, L40, A40, A100, A100-80GB}
+  disk_tier: best
+  ports: 8000
+
+setup: |
+  pip install vllm==0.6.1.post2
+  pip install vllm-flash-attn
+
+run: |
+  export PATH=$PATH:/sbin
+  vllm serve $MODEL_NAME \
+    --host 0.0.0.0 \
+    --tensor-parallel-size $SKYPILOT_NUM_GPUS_PER_NODE \
+    --max-model-len 1024 | tee ~/openai_api_server.log

From a9ffe54dc4ac85d6281b88d9e01d6f675bc42ccf Mon Sep 17 00:00:00 2001
From: Haijian Wang <130898843+Haijian06@users.noreply.github.com>
Date: Wed, 18 Sep 2024 15:29:10 +0800
Subject: [PATCH 08/15] Update yicoder-1_5b.yaml

---
 llm/yi/yicoder-1_5b.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llm/yi/yicoder-1_5b.yaml b/llm/yi/yicoder-1_5b.yaml
index 42ed8897035..5c0d409483d 100644
--- a/llm/yi/yicoder-1_5b.yaml
+++ b/llm/yi/yicoder-1_5b.yaml
@@ -1,5 +1,5 @@
 envs:
-  MODEL_NAME: 01-ai/Yi-1.5-6B-Chat
+  MODEL_NAME: 01-ai/Yi-Coder-1.5B-Chat
 
 service:
   # Specifying the path to the endpoint to check the readiness of the replicas.

From 6de0cf76fb67833ab369d2f31e77e5770dd9f727 Mon Sep 17 00:00:00 2001
From: Haijian Wang <130898843+Haijian06@users.noreply.github.com>
Date: Wed, 18 Sep 2024 15:30:23 +0800
Subject: [PATCH 09/15] Update yicoder-9b.yaml

---
 llm/yi/yicoder-9b.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llm/yi/yicoder-9b.yaml b/llm/yi/yicoder-9b.yaml
index 42ed8897035..36aaea45111 100644
--- a/llm/yi/yicoder-9b.yaml
+++ b/llm/yi/yicoder-9b.yaml
@@ -1,5 +1,5 @@
 envs:
-  MODEL_NAME: 01-ai/Yi-1.5-6B-Chat
+  MODEL_NAME: 01-ai/Yi-Coder-9B-Chat
 
 service:
   # Specifying the path to the endpoint to check the readiness of the replicas.
@@ -17,7 +17,7 @@ service:
   
 
 resources:
-  accelerators: {L4, A10g, A10, L40, A40, A100, A100-80GB}
+  accelerators: {L4:8, A10g:8, A10:8, A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8}
   disk_tier: best
   ports: 8000
 

From a53e27a45c3a581993592e1b2170da97af8d689c Mon Sep 17 00:00:00 2001
From: Haijian Wang <130898843+Haijian06@users.noreply.github.com>
Date: Thu, 19 Sep 2024 09:38:45 +0800
Subject: [PATCH 10/15] Add files via upload

---
 llm/yi/README.md | 60 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 llm/yi/README.md

diff --git a/llm/yi/README.md b/llm/yi/README.md
new file mode 100644
index 00000000000..76fcf6151e6
--- /dev/null
+++ b/llm/yi/README.md
@@ -0,0 +1,60 @@
+# Serving Yi on Your Own Kubernetes or Cloud
+
+🤖 The Yi series models are the next generation of open-source large language models trained from scratch by [01.AI](https://www.lingyiwanwu.com/en).
+
+**Update (Sep 19, 2024) -** SkyPilot now supports the [**Yi**](https://01-ai.github.io/) model(Yi-Coder Yi-1.5)! 
+
+<p align="center">
+    <img src="https://raw.githubusercontent.com/01-ai/Yi/main/assets/img/coder/bench1.webp" alt="yi" width="600"/>
+</p>
+
+## Why use SkyPilot to deploy over commercial hosted solutions?
+
+* Get the best GPU availability by utilizing multiple resources pools across Kubernetes clusters and multiple regions/clouds.
+* Pay absolute minimum — SkyPilot picks the cheapest resources across Kubernetes clusters and regions/clouds. No managed solution markups.
+* Scale up to multiple replicas across different locations and accelerators, all served with a single endpoint 
+* Everything stays in your Kubernetes or cloud account (your VMs & buckets)
+* Completely private - no one else sees your chat history
+
+
+## Running Yi model with SkyPilot
+
+After [installing SkyPilot](https://skypilot.readthedocs.io/en/latest/getting-started/installation.html), run your own Yi model on vLLM with SkyPilot in 1-click:
+
+1. Start serving Yi-1.5 34B on a single instance with any available GPU in the list specified in [yi15-34b.yaml](https://github.com/skypilot-org/skypilot/blob/master/llm/yi/yi15-34b.yaml) with a vLLM powered OpenAI-compatible endpoint (You can also switch to [yicoder-9b.yaml](https://github.com/skypilot-org/skypilot/blob/master/llm/yi/yicoder-9b.yaml) or [other model](https://github.com/skypilot-org/skypilot/tree/master/llm/yi) for a smaller model):
+
+```console
+sky launch -c yi yi15-34b.yaml
+```
+2. Send a request to the endpoint for completion:
+```bash
+ENDPOINT=$(sky status --endpoint 8000 yi)
+
+curl http://$ENDPOINT/v1/completions \
+    -H "Content-Type: application/json" \
+    -d '{
+      "model": "01-ai/Yi-1.5-34B-Chat",
+      "prompt": "Who are you?",
+      "max_tokens": 512
+  }' | jq -r '.choices[0].text'
+```
+
+3. Send a request for chat completion:
+```bash
+curl http://$ENDPOINT/v1/chat/completions \
+    -H "Content-Type: application/json" \
+    -d '{
+      "model": "01-ai/Yi-1.5-34B-Chat",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant."
+        },
+        {
+          "role": "user",
+          "content": "Who are you?"
+        }
+      ],
+      "max_tokens": 512
+  }' | jq -r '.choices[0].message.content'
+```

From 022fa97762c64e4ab0c8892abf564efa7b3394ee Mon Sep 17 00:00:00 2001
From: Haijian Wang <130898843+Haijian06@users.noreply.github.com>
Date: Thu, 19 Sep 2024 14:39:08 +0800
Subject: [PATCH 11/15] Update yi15-34b.yaml

---
 llm/yi/yi15-34b.yaml | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/llm/yi/yi15-34b.yaml b/llm/yi/yi15-34b.yaml
index e244cb0335d..99fe5481d7a 100644
--- a/llm/yi/yi15-34b.yaml
+++ b/llm/yi/yi15-34b.yaml
@@ -1,21 +1,6 @@
 envs:
   MODEL_NAME: 01-ai/Yi-1.5-34B-Chat
-
-service:
-  # Specifying the path to the endpoint to check the readiness of the replicas.
-  readiness_probe:
-    path: /v1/chat/completions
-    post_data:
-      model: $MODEL_NAME
-      messages:
-        - role: user
-          content: Hello! What is your name?
-      max_tokens: 1
-    initial_delay_seconds: 1200
-  # How many replicas to manage.
-  replicas: 2
   
-
 resources:
   accelerators: {A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8}
   disk_size: 1024

From b746b2af42088ca1c95c74520ef67270dc96b132 Mon Sep 17 00:00:00 2001
From: Haijian Wang <130898843+Haijian06@users.noreply.github.com>
Date: Thu, 19 Sep 2024 14:39:26 +0800
Subject: [PATCH 12/15] Update yi15-6b.yaml

---
 llm/yi/yi15-6b.yaml | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/llm/yi/yi15-6b.yaml b/llm/yi/yi15-6b.yaml
index 42ed8897035..879f5ffea9c 100644
--- a/llm/yi/yi15-6b.yaml
+++ b/llm/yi/yi15-6b.yaml
@@ -1,21 +1,6 @@
 envs:
   MODEL_NAME: 01-ai/Yi-1.5-6B-Chat
 
-service:
-  # Specifying the path to the endpoint to check the readiness of the replicas.
-  readiness_probe:
-    path: /v1/chat/completions
-    post_data:
-      model: $MODEL_NAME
-      messages:
-        - role: user
-          content: Hello! What is your name?
-      max_tokens: 1
-    initial_delay_seconds: 1200
-  # How many replicas to manage.
-  replicas: 2
-  
-
 resources:
   accelerators: {L4, A10g, A10, L40, A40, A100, A100-80GB}
   disk_tier: best

From f58ec47821913f2c1c8f5a4af9b0d4aca4fdd73c Mon Sep 17 00:00:00 2001
From: Haijian Wang <130898843+Haijian06@users.noreply.github.com>
Date: Thu, 19 Sep 2024 14:39:58 +0800
Subject: [PATCH 13/15] Update yi15-9b.yaml

---
 llm/yi/yi15-9b.yaml | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/llm/yi/yi15-9b.yaml b/llm/yi/yi15-9b.yaml
index cb8901b5dfc..b7ac40b4e11 100644
--- a/llm/yi/yi15-9b.yaml
+++ b/llm/yi/yi15-9b.yaml
@@ -1,21 +1,6 @@
 envs:
   MODEL_NAME: 01-ai/Yi-1.5-9B-Chat
 
-service:
-  # Specifying the path to the endpoint to check the readiness of the replicas.
-  readiness_probe:
-    path: /v1/chat/completions
-    post_data:
-      model: $MODEL_NAME
-      messages:
-        - role: user
-          content: Hello! What is your name?
-      max_tokens: 1
-    initial_delay_seconds: 1200
-  # How many replicas to manage.
-  replicas: 2
-  
-
 resources:
   accelerators: {L4:8, A10g:8, A10:8, A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8}
   disk_tier: best

From 7cd568111e0de0d11e8fa342d72a8f0fa50cf989 Mon Sep 17 00:00:00 2001
From: Haijian Wang <130898843+Haijian06@users.noreply.github.com>
Date: Thu, 19 Sep 2024 14:40:13 +0800
Subject: [PATCH 14/15] Update yicoder-1_5b.yaml

---
 llm/yi/yicoder-1_5b.yaml | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/llm/yi/yicoder-1_5b.yaml b/llm/yi/yicoder-1_5b.yaml
index 5c0d409483d..383f88b657d 100644
--- a/llm/yi/yicoder-1_5b.yaml
+++ b/llm/yi/yicoder-1_5b.yaml
@@ -1,21 +1,6 @@
 envs:
   MODEL_NAME: 01-ai/Yi-Coder-1.5B-Chat
-
-service:
-  # Specifying the path to the endpoint to check the readiness of the replicas.
-  readiness_probe:
-    path: /v1/chat/completions
-    post_data:
-      model: $MODEL_NAME
-      messages:
-        - role: user
-          content: Hello! What is your name?
-      max_tokens: 1
-    initial_delay_seconds: 1200
-  # How many replicas to manage.
-  replicas: 2
   
-
 resources:
   accelerators: {L4, A10g, A10, L40, A40, A100, A100-80GB}
   disk_tier: best

From 55cf8db7b0229ea13bb70e1e6975188af5103119 Mon Sep 17 00:00:00 2001
From: Haijian Wang <130898843+Haijian06@users.noreply.github.com>
Date: Thu, 19 Sep 2024 14:40:28 +0800
Subject: [PATCH 15/15] Update yicoder-9b.yaml

---
 llm/yi/yicoder-9b.yaml | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/llm/yi/yicoder-9b.yaml b/llm/yi/yicoder-9b.yaml
index 36aaea45111..28e74b45bb5 100644
--- a/llm/yi/yicoder-9b.yaml
+++ b/llm/yi/yicoder-9b.yaml
@@ -1,21 +1,6 @@
 envs:
   MODEL_NAME: 01-ai/Yi-Coder-9B-Chat
-
-service:
-  # Specifying the path to the endpoint to check the readiness of the replicas.
-  readiness_probe:
-    path: /v1/chat/completions
-    post_data:
-      model: $MODEL_NAME
-      messages:
-        - role: user
-          content: Hello! What is your name?
-      max_tokens: 1
-    initial_delay_seconds: 1200
-  # How many replicas to manage.
-  replicas: 2
   
-
 resources:
   accelerators: {L4:8, A10g:8, A10:8, A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8}
   disk_tier: best