diff --git a/core/llm/llms/Ollama.ts b/core/llm/llms/Ollama.ts index 0a239fce909..9f17385b8ec 100644 --- a/core/llm/llms/Ollama.ts +++ b/core/llm/llms/Ollama.ts @@ -114,6 +114,18 @@ type OllamaErrorResponse = { error: string; }; +type N8nChatReponse = { + type: string; + content?: string; + metadata: { + nodeId: string; + nodeName: string; + itemIndex: number; + runIndex: number; + timestamps: number; + }; +}; + type OllamaRawResponse = | OllamaErrorResponse | (OllamaBaseResponse & { @@ -124,7 +136,8 @@ type OllamaChatResponse = | OllamaErrorResponse | (OllamaBaseResponse & { message: OllamaChatMessage; - }); + }) + | N8nChatReponse; interface OllamaTool { type: "function"; @@ -427,12 +440,47 @@ class Ollama extends BaseLLM implements ModelInstaller { body: JSON.stringify(chatOptions), signal, }); + let isThinking: boolean = false; function convertChatMessage(res: OllamaChatResponse): ChatMessage[] { if ("error" in res) { throw new Error(res.error); } + if ("type" in res) { + const { content } = res; + + if (content === "") { + isThinking = true; + } + + if (isThinking && content) { + // TODO better support for streaming thinking chunks, or remove this and depend on redux parsing logic + const thinkingMessage: ThinkingChatMessage = { + role: "thinking", + content: content, + }; + + if (thinkingMessage) { + // could cause issues with termination if chunk doesn't match this exactly + if (content === "") { + isThinking = false; + } + // When Streaming you can't have both thinking and content + return [thinkingMessage]; + } + } + + if (content) { + const chatMessage: ChatMessage = { + role: "assistant", + content: content, + }; + return [chatMessage]; + } + return []; + } + const { role, content, thinking, tool_calls: toolCalls } = res.message; if (role === "tool") {