From c9c54635f2ef9fbadb5d8dfd1a404a0ed74f7ca9 Mon Sep 17 00:00:00 2001 From: killian <63927363+KillianLucas@users.noreply.github.com> Date: Wed, 24 Jul 2024 15:23:22 -0700 Subject: [PATCH 1/7] `auto_run` cannot be set over the server --- interpreter/core/async_core.py | 4 ++++ interpreter/core/llm/run_function_calling_llm.py | 1 + 2 files changed, 5 insertions(+) diff --git a/interpreter/core/async_core.py b/interpreter/core/async_core.py index df56f3725b..7c7fe0d0ed 100644 --- a/interpreter/core/async_core.py +++ b/interpreter/core/async_core.py @@ -533,6 +533,10 @@ async def set_settings(payload: Dict[str, Any]): for key, value in payload.items(): print(f"Updating settings: {key} = {value}") if key in ["llm", "computer"] and isinstance(value, dict): + if key == "auto_run": + return { + "error": f"The setting {key} is not modifiable through the server due to security constraints." + }, 403 if hasattr(async_interpreter, key): for sub_key, sub_value in value.items(): if hasattr(getattr(async_interpreter, key), sub_key): diff --git a/interpreter/core/llm/run_function_calling_llm.py b/interpreter/core/llm/run_function_calling_llm.py index 17b59afdc8..46f047d151 100644 --- a/interpreter/core/llm/run_function_calling_llm.py +++ b/interpreter/core/llm/run_function_calling_llm.py @@ -54,6 +54,7 @@ def run_function_calling_llm(llm, request_params): if "content" in delta and delta["content"]: if function_call_detected: # More content after a code block? This is a code review by a judge layer. + # print("Code safety review:", delta["content"]) if delta["content"].strip() == "": yield {"type": "review", "format": "safe", "content": ""} elif "" in delta["content"]: From abcfdd168691a0db609f16cf54d4dba98251da4d Mon Sep 17 00:00:00 2001 From: killian <63927363+KillianLucas@users.noreply.github.com> Date: Wed, 24 Jul 2024 15:49:02 -0700 Subject: [PATCH 2/7] Better judge layer parsing --- .../core/llm/run_function_calling_llm.py | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/interpreter/core/llm/run_function_calling_llm.py b/interpreter/core/llm/run_function_calling_llm.py index 46f047d151..c8c4a0b36e 100644 --- a/interpreter/core/llm/run_function_calling_llm.py +++ b/interpreter/core/llm/run_function_calling_llm.py @@ -42,6 +42,9 @@ def run_function_calling_llm(llm, request_params): code = "" function_call_detected = False + accumulated_review = "" + review_category = None + for chunk in llm.completions(**request_params): if "choices" not in chunk or len(chunk["choices"]) == 0: # This happens sometimes @@ -55,22 +58,22 @@ def run_function_calling_llm(llm, request_params): if function_call_detected: # More content after a code block? This is a code review by a judge layer. # print("Code safety review:", delta["content"]) - if delta["content"].strip() == "": - yield {"type": "review", "format": "safe", "content": ""} - elif "" in delta["content"]: - content = ( - delta["content"] - .replace("", "") - .replace("", "") - ) - yield {"type": "review", "format": "unsafe", "content": content} - else: - content = ( - delta["content"] - .replace("", "") - .replace("", "") - ) - yield {"type": "review", "format": "warning", "content": content} + accumulated_review += delta["content"] + + if review_category == None: + if "" in accumulated_review: + review_category = "unsafe" + if "" in accumulated_review: + review_category = "warning" + if "" in accumulated_review: + review_category = "safe" + + if review_category != None: + yield { + "type": "review", + "format": review_category, + "content": delta["content"], + } else: yield {"type": "message", "content": delta["content"]} From 8f74e5b5ab3c5bd20311341fde715db68d170b68 Mon Sep 17 00:00:00 2001 From: killian <63927363+KillianLucas@users.noreply.github.com> Date: Wed, 24 Jul 2024 23:44:37 -0700 Subject: [PATCH 3/7] Use tool calling instead of function calling --- interpreter/core/llm/llm.py | 10 +++--- interpreter/core/llm/run_tool_calling_llm.py | 33 ++++++++++++++++---- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/interpreter/core/llm/llm.py b/interpreter/core/llm/llm.py index 04971248b1..8a52ecdcc2 100644 --- a/interpreter/core/llm/llm.py +++ b/interpreter/core/llm/llm.py @@ -15,10 +15,10 @@ from ...terminal_interface.utils.display_markdown_message import ( display_markdown_message, ) -from .run_function_calling_llm import run_function_calling_llm - -# from .run_tool_calling_llm import run_tool_calling_llm from .run_text_llm import run_text_llm + +# from .run_function_calling_llm import run_function_calling_llm +from .run_tool_calling_llm import run_tool_calling_llm from .utils.convert_to_openai_messages import convert_to_openai_messages @@ -287,8 +287,8 @@ def run(self, messages): time.sleep(5) if self.supports_functions: - yield from run_function_calling_llm(self, params) - # yield from run_tool_calling_llm(self, params) + # yield from run_function_calling_llm(self, params) + yield from run_tool_calling_llm(self, params) else: yield from run_text_llm(self, params) diff --git a/interpreter/core/llm/run_tool_calling_llm.py b/interpreter/core/llm/run_tool_calling_llm.py index 541c424734..7b4d3a2c1c 100644 --- a/interpreter/core/llm/run_tool_calling_llm.py +++ b/interpreter/core/llm/run_tool_calling_llm.py @@ -36,6 +36,23 @@ def run_tool_calling_llm(llm, request_params): ] request_params["tools"] = [tool_schema] + last_tool_id = 0 + for message in request_params["messages"]: + if "function_call" in message: + function = message.pop("function_call") + message["tool_calls"] = [ + { + "id": "toolu_" + str(last_tool_id), + "type": "function", + "function": function, + } + ] + if message["role"] == "function": + message["role"] = "tool" + message["tool_call_id"] = "toolu_" + str(last_tool_id) + + last_tool_id += 1 + # Add OpenAI's recommended function message # request_params["messages"][0][ # "content" @@ -55,12 +72,16 @@ def run_tool_calling_llm(llm, request_params): delta = chunk["choices"][0]["delta"] # Convert tool call into function call, which we have great parsing logic for below - if "tool_calls" in delta: - if ( - len(delta["tool_calls"]) > 0 - and "function_call" in delta["tool_calls"][0] - ): - delta["function_call"] = delta["tool_calls"][0]["function_call"] + if "tool_calls" in delta and delta["tool_calls"]: + # import pdb; pdb.set_trace() + if len(delta["tool_calls"]) > 0 and delta["tool_calls"][0].function: + delta = { + # "id": delta["tool_calls"][0], + "function_call": { + "name": delta["tool_calls"][0].function.name, + "arguments": delta["tool_calls"][0].function.arguments, + } + } # Accumulate deltas accumulated_deltas = merge_deltas(accumulated_deltas, delta) From 7925e4eac0ac9a6b275ec7d35db671cd2de01790 Mon Sep 17 00:00:00 2001 From: killian <63927363+KillianLucas@users.noreply.github.com> Date: Wed, 24 Jul 2024 23:51:13 -0700 Subject: [PATCH 4/7] Judge layer for tool calling models --- .../core/llm/run_function_calling_llm.py | 20 ++++++++-- interpreter/core/llm/run_tool_calling_llm.py | 38 ++++++++++++++++++- 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/interpreter/core/llm/run_function_calling_llm.py b/interpreter/core/llm/run_function_calling_llm.py index c8c4a0b36e..46278feb85 100644 --- a/interpreter/core/llm/run_function_calling_llm.py +++ b/interpreter/core/llm/run_function_calling_llm.py @@ -57,18 +57,30 @@ def run_function_calling_llm(llm, request_params): if "content" in delta and delta["content"]: if function_call_detected: # More content after a code block? This is a code review by a judge layer. + # print("Code safety review:", delta["content"]) - accumulated_review += delta["content"] if review_category == None: - if "" in accumulated_review: + accumulated_review += delta["content"] + + if "" in accumulated_review: review_category = "unsafe" - if "" in accumulated_review: + if "" in accumulated_review: review_category = "warning" - if "" in accumulated_review: + if "" in accumulated_review: review_category = "safe" if review_category != None: + for tag in [ + "", + "", + "", + "", + "", + "", + ]: + delta["content"] = delta["content"].replace(tag, "") + yield { "type": "review", "format": review_category, diff --git a/interpreter/core/llm/run_tool_calling_llm.py b/interpreter/core/llm/run_tool_calling_llm.py index 7b4d3a2c1c..0e2d10edb4 100644 --- a/interpreter/core/llm/run_tool_calling_llm.py +++ b/interpreter/core/llm/run_tool_calling_llm.py @@ -63,6 +63,7 @@ def run_tool_calling_llm(llm, request_params): accumulated_deltas = {} language = None code = "" + function_call_detected = False for chunk in llm.completions(**request_params): if "choices" not in chunk or len(chunk["choices"]) == 0: @@ -73,6 +74,8 @@ def run_tool_calling_llm(llm, request_params): # Convert tool call into function call, which we have great parsing logic for below if "tool_calls" in delta and delta["tool_calls"]: + function_call_detected = True + # import pdb; pdb.set_trace() if len(delta["tool_calls"]) > 0 and delta["tool_calls"][0].function: delta = { @@ -87,7 +90,40 @@ def run_tool_calling_llm(llm, request_params): accumulated_deltas = merge_deltas(accumulated_deltas, delta) if "content" in delta and delta["content"]: - yield {"type": "message", "content": delta["content"]} + if function_call_detected: + # More content after a code block? This is a code review by a judge layer. + + # print("Code safety review:", delta["content"]) + + if review_category == None: + accumulated_review += delta["content"] + + if "" in accumulated_review: + review_category = "unsafe" + if "" in accumulated_review: + review_category = "warning" + if "" in accumulated_review: + review_category = "safe" + + if review_category != None: + for tag in [ + "", + "", + "", + "", + "", + "", + ]: + delta["content"] = delta["content"].replace(tag, "") + + yield { + "type": "review", + "format": review_category, + "content": delta["content"], + } + + else: + yield {"type": "message", "content": delta["content"]} if ( accumulated_deltas.get("function_call") From 451ee22f40d511efd4f85c7506fdb088966fc40f Mon Sep 17 00:00:00 2001 From: killian <63927363+KillianLucas@users.noreply.github.com> Date: Thu, 25 Jul 2024 00:01:34 -0700 Subject: [PATCH 5/7] Judge layer fix for tool calling models --- interpreter/core/llm/run_tool_calling_llm.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/interpreter/core/llm/run_tool_calling_llm.py b/interpreter/core/llm/run_tool_calling_llm.py index 0e2d10edb4..a45f534a2e 100644 --- a/interpreter/core/llm/run_tool_calling_llm.py +++ b/interpreter/core/llm/run_tool_calling_llm.py @@ -64,6 +64,8 @@ def run_tool_calling_llm(llm, request_params): language = None code = "" function_call_detected = False + accumulated_review = "" + review_category = None for chunk in llm.completions(**request_params): if "choices" not in chunk or len(chunk["choices"]) == 0: From 5d1ca06b7438b13e4e68cb67a3dddccf229dcf48 Mon Sep 17 00:00:00 2001 From: killian <63927363+KillianLucas@users.noreply.github.com> Date: Thu, 25 Jul 2024 00:53:08 -0700 Subject: [PATCH 6/7] Fixed tool calling --- interpreter/core/llm/run_tool_calling_llm.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/interpreter/core/llm/run_tool_calling_llm.py b/interpreter/core/llm/run_tool_calling_llm.py index a45f534a2e..cfc20e9d66 100644 --- a/interpreter/core/llm/run_tool_calling_llm.py +++ b/interpreter/core/llm/run_tool_calling_llm.py @@ -37,7 +37,7 @@ def run_tool_calling_llm(llm, request_params): request_params["tools"] = [tool_schema] last_tool_id = 0 - for message in request_params["messages"]: + for i, message in enumerate(request_params["messages"]): if "function_call" in message: function = message.pop("function_call") message["tool_calls"] = [ @@ -48,10 +48,16 @@ def run_tool_calling_llm(llm, request_params): } ] if message["role"] == "function": - message["role"] = "tool" - message["tool_call_id"] = "toolu_" + str(last_tool_id) + if i != 0 and request_params["messages"][i - 1]["role"] == "tool": + request_params["messages"][i]["content"] += message["content"] + message = None + else: + message["role"] = "tool" + message["tool_call_id"] = "toolu_" + str(last_tool_id) + + last_tool_id += 1 - last_tool_id += 1 + request_params["messages"] = [m for m in request_params["messages"] if m != None] # Add OpenAI's recommended function message # request_params["messages"][0][ From ca5f45de4e78b88058ce81b875615ead544eccac Mon Sep 17 00:00:00 2001 From: killian <63927363+KillianLucas@users.noreply.github.com> Date: Thu, 25 Jul 2024 00:54:03 -0700 Subject: [PATCH 7/7] Fixed tool calling --- interpreter/core/llm/run_tool_calling_llm.py | 3 +-- numbers.txt | 3 +++ 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 numbers.txt diff --git a/interpreter/core/llm/run_tool_calling_llm.py b/interpreter/core/llm/run_tool_calling_llm.py index cfc20e9d66..932c3c00ae 100644 --- a/interpreter/core/llm/run_tool_calling_llm.py +++ b/interpreter/core/llm/run_tool_calling_llm.py @@ -39,6 +39,7 @@ def run_tool_calling_llm(llm, request_params): last_tool_id = 0 for i, message in enumerate(request_params["messages"]): if "function_call" in message: + last_tool_id += 1 function = message.pop("function_call") message["tool_calls"] = [ { @@ -55,8 +56,6 @@ def run_tool_calling_llm(llm, request_params): message["role"] = "tool" message["tool_call_id"] = "toolu_" + str(last_tool_id) - last_tool_id += 1 - request_params["messages"] = [m for m in request_params["messages"] if m != None] # Add OpenAI's recommended function message diff --git a/numbers.txt b/numbers.txt new file mode 100644 index 0000000000..01e79c32a8 --- /dev/null +++ b/numbers.txt @@ -0,0 +1,3 @@ +1 +2 +3