chore: fix context too long issues

feiskyer · Dec 29, 2023 · 187a044 · 187a044
1 parent 5473067
commit 187a044
Show file tree

Hide file tree

Showing 13 changed files with 1,149 additions and 1,016 deletions.
diff --git a/kube_copilot/chains.py b/kube_copilot/chains.py
@@ -1,21 +1,21 @@
 # -*- coding: utf-8 -*-
 import os
-from langchain.chat_models import ChatOpenAI
+from langchain.chat_models import ChatOpenAI, AzureChatOpenAI
 from langchain.agents import AgentType, Tool, initialize_agent
 from langchain.agents.agent import AgentExecutor
 from langchain.callbacks import HumanApprovalCallbackHandler
 from langchain.agents.structured_chat.base import StructuredChatAgent
 from langchain.utilities import GoogleSearchAPIWrapper
 from langchain_experimental.plan_and_execute import PlanAndExecute, load_chat_planner
 from langchain_experimental.plan_and_execute.executors.base import ChainExecutor
-from langchain_experimental.tools import PythonREPLTool
 from langchain.agents.structured_chat.base import StructuredChatAgent
 from langchain.callbacks import StdOutCallbackHandler
 from langchain.memory import ConversationBufferMemory
 from langchain.agents import OpenAIMultiFunctionsAgent
 from langchain.schema.messages import SystemMessage
 from langchain.prompts import MessagesPlaceholder
 from langchain.agents import AgentExecutor
+from kube_copilot.python import PythonTool
 from kube_copilot.shell import KubeProcess
 from kube_copilot.prompts import get_planner_prompt, _base_prompt
 from kube_copilot.output import ChatOutputParser
@@ -143,15 +143,21 @@ def get_chain(self, verbose=True, model="gpt-4", additional_tools=None, enable_p
 def get_llm_tools(model, additional_tools, enable_python=False, auto_approve=False):
     '''Initialize the LLM chain with useful tools.'''
     if os.getenv("OPENAI_API_TYPE") == "azure" or (os.getenv("OPENAI_API_BASE") is not None and "azure" in os.getenv("OPENAI_API_BASE")):
-        engine = model.replace(".", "")
-        llm = ChatOpenAI(model_name=model,
-                         temperature=0,
-                         request_timeout=120,
-                         model_kwargs={"engine": engine})
+        deployment_name = model.replace(".", "")
+        llm = AzureChatOpenAI(temperature=0,
+                              request_timeout=120,
+                              openai_api_key=os.getenv("OPENAI_API_KEY"),
+                              openai_api_base=os.getenv("OPENAI_API_BASE"),
+                              openai_api_version="2023-05-15",
+                              deployment_name=deployment_name)
     else:
         llm = ChatOpenAI(model_name=model,
                          temperature=0,
-                         request_timeout=120)
+                         request_timeout=120,
+                         openai_api_key=os.getenv("OPENAI_API_KEY"),
+                         openai_api_base=os.getenv(
+                             "OPENAI_API_BASE", "https://api.openai.com/v1"),
+                         openai_organization=os.getenv("OPENAI_ORGANIZATION", None))
 
     tools = [
         Tool(
@@ -167,17 +173,17 @@ def get_llm_tools(model, additional_tools, enable_python=False, auto_approve=Fal
     ]
 
     if enable_python:
-        python_tool = PythonREPLTool(
+        python_tool = PythonTool(
             callbacks=[HumanApprovalCallbackHandler(
                 approve=python_approval)]
         )
         if auto_approve:
-            python_tool = PythonREPLTool()
+            python_tool = PythonTool()
         tools = [
             Tool(
                 name="python",
                 func=python_tool.run,
-                description="Useful for executing Python code with Kubernetes Python SDK client. Results should be print out by calling `print(...)`. Input: Python code. Output: the result from the Python code's print()."
+                description="Useful for executing Python code with Kubernetes Python SDK client. Results should be print out by calling `print(...)`. Input: Python codes (kubectl commands must be converted to kubernetes python library first). Output: the result from the Python code's print()."
             ),
             Tool(
                 name="trivy",

diff --git a/kube_copilot/cli.py b/kube_copilot/cli.py
@@ -3,7 +3,6 @@
 import logging
 import sys
 import click
-from kube_copilot.llm import init_openai
 from kube_copilot.chains import ReActLLM
 from kube_copilot.shell import KubeProcess
 from kube_copilot.prompts import (
@@ -106,7 +105,6 @@ def generate(instructions, verbose, model):
 
 def main():
     '''Main function'''
-    init_openai()
     cli()
 
 

diff --git a/kube_copilot/kubeconfig.py b/kube_copilot/kubeconfig.py
@@ -3,8 +3,9 @@
 
 
 def get_kubeconfig():
-    token = open("/run/secrets/kubernetes.io/serviceaccount/token").read().strip()  # Strip newline characters
-    cert = open("/run/secrets/kubernetes.io/serviceaccount/ca.crt").read().strip()  # Strip newline characters
+    '''Get kubeconfig for the current Pod.'''
+    token = open("/run/secrets/kubernetes.io/serviceaccount/token", "r", encoding="utf-8").read().strip()  # Strip newline characters
+    cert = open("/run/secrets/kubernetes.io/serviceaccount/ca.crt", "r", encoding="utf-8").read().strip()  # Strip newline characters
     cert = base64.b64encode(cert.encode()).decode()
     host = os.environ.get("KUBERNETES_SERVICE_HOST")
     port = os.environ.get("KUBERNETES_SERVICE_PORT")
@@ -30,6 +31,7 @@ def get_kubeconfig():
 
 
 def setup_kubeconfig():
+    '''Set up kubeconfig if running inside a Pod.'''
     if not os.getenv("KUBERNETES_SERVICE_HOST"):
         # Not running inside a Pod, so no need to set up kubeconfig
         return
@@ -40,12 +42,12 @@ def setup_kubeconfig():
 
     # If kubeconfig already exists, no need to recreate it
     if os.path.exists(kubeconfig_file):
-        return
+       return
 
     os.makedirs(kubeconfig_path, exist_ok=True)
     kubeconfig = get_kubeconfig()
-    with open(kubeconfig_file, "w") as f:
-        f.write(kubeconfig)
+    with open(kubeconfig_file, "w", encoding="utf-8") as f:
+      f.write(kubeconfig)
 
 
 # Call the setup_kubeconfig function to set up kubeconfig if needed

diff --git a/kube_copilot/llm.py b/kube_copilot/llm.py
diff --git a/kube_copilot/output.py b/kube_copilot/output.py
@@ -40,10 +40,9 @@ def parse(self, text: str) -> Union[AgentAction, AgentFinish]:
                 )
             return AgentAction(response["action"], response["action_input"], text)
 
-        except Exception:
+        except Exception as exc:
             if not includes_answer:
-                raise OutputParserException(
-                    f"Could not parse LLM output: {text}")
+                raise OutputParserException(f"Could not parse LLM output: {text}") from exc
             return AgentFinish(
                 {"output": text.split(FINAL_ANSWER_ACTION)[-1].strip()}, text
             )

diff --git a/kube_copilot/python.py b/kube_copilot/python.py
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+from typing import Any, Dict, Optional, Type
+import tiktoken
+from langchain_experimental.tools import PythonREPLTool
+from langchain.callbacks.manager import (
+    AsyncCallbackManagerForToolRun,
+    CallbackManagerForToolRun,
+)
+
+class PythonTool(PythonREPLTool):
+
+    max_tokens = 2000
+    model = "gpt-4"
+
+    def trunk_tokens(self, msg):
+        # TODO: workarounds for the following context length error with ChatGPT
+        #   https://github.com/hwchase17/langchain/issues/2140
+        #   https://github.com/hwchase17/langchain/issues/1767
+        tokens = tiktoken.encoding_for_model(self.model).encode(msg)
+        while len(tokens) > self.max_tokens:
+            msg = msg[:len(msg) // 2]
+            tokens = tiktoken.encoding_for_model(self.model).encode(msg)
+        return msg
+
+    def _run(
+        self,
+        query: str,
+        run_manager: Optional[CallbackManagerForToolRun] = None,
+    ) -> Any:
+        result = super()._run(query, run_manager)
+        return self.trunk_tokens(result)
+
+    async def _arun(
+        self,
+        query: str,
+        run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
+    ) -> Any:
+        result = await super()._arun(query, run_manager)
+        return self.trunk_tokens(result)