feat(adapters): add presets for Granite 3.1 in WatsonX and IBM vLLM (#…

…250) Signed-off-by: Graham White <gwhite@uk.ibm.com>
i-am-bee · Dec 13, 2024 · 972681f · 972681f
1 parent 407fba9
commit 972681f
Show file tree

Hide file tree

Showing 2 changed files with 38 additions and 0 deletions.
diff --git a/src/adapters/ibm-vllm/chatPreset.ts b/src/adapters/ibm-vllm/chatPreset.ts
@@ -29,6 +29,7 @@ export const IBMVllmModel = {
   LLAMA_3_1_70B_INSTRUCT: "meta-llama/llama-3-1-70b-instruct",
   LLAMA_3_1_8B_INSTRUCT: "meta-llama/llama-3-1-8b-instruct",
   GRANITE_3_0_8B_INSTRUCT: "ibm-granite/granite-3-0-8b-instruct",
+  GRANITE_3_1_8B_INSTRUCT: "ibm-granite/granite-3-1-8b-instruct",
 } as const;
 export type IBMVllmModel = (typeof IBMVllmModel)[keyof typeof IBMVllmModel];
 
@@ -136,6 +137,24 @@ export const IBMVllmChatLLMPreset = {
       },
     };
   },
+  [IBMVllmModel.GRANITE_3_1_8B_INSTRUCT]: (): IBMVllmChatLLMPreset => {
+    const { template, parameters, messagesToPrompt } = LLMChatTemplates.get("granite3Instruct");
+    return {
+      base: {
+        modelId: IBMVllmModel.GRANITE_3_1_8B_INSTRUCT,
+        parameters: {
+          method: "GREEDY",
+          stopping: {
+            stop_sequences: [...parameters.stop_sequence],
+            include_stop_sequence: false,
+          },
+        },
+      },
+      chat: {
+        messagesToPrompt: messagesToPrompt(template),
+      },
+    };
+  },
 } as const satisfies Record<IBMVllmModel, () => IBMVllmChatLLMPreset>;
 
 export type IBMVllmChatLLMPresetModel = keyof typeof IBMVllmChatLLMPreset;
diff --git a/src/adapters/watsonx/chatPreset.ts b/src/adapters/watsonx/chatPreset.ts
@@ -61,6 +61,25 @@ export const WatsonXChatLLMPreset = {
   "ibm/granite-3-2b-instruct"() {
     return WatsonXChatLLMPreset["ibm/granite-3-8b-instruct"]();
   },
+  "ibm/granite-3-1-8b-instruct": (): WatsonXChatLLMPreset => {
+    const { template, parameters, messagesToPrompt } = LLMChatTemplates.get("granite3Instruct");
+    return {
+      base: {
+        parameters: {
+          decoding_method: "greedy",
+          max_new_tokens: 512,
+          include_stop_sequence: false,
+          stop_sequences: [...parameters.stop_sequence],
+        },
+      },
+      chat: {
+        messagesToPrompt: messagesToPrompt(template),
+      },
+    };
+  },
+  "ibm/granite-3-1-2b-instruct"() {
+    return WatsonXChatLLMPreset["ibm/granite-3-8b-instruct"]();
+  },
   "meta-llama/llama-3-1-70b-instruct": (): WatsonXChatLLMPreset => {
     const { template, messagesToPrompt, parameters } = LLMChatTemplates.get("llama3.1");