From f01ad40a89759798673f9f357a6fecfd4193bc05 Mon Sep 17 00:00:00 2001 From: cpacker Date: Tue, 2 Apr 2024 12:11:13 -0700 Subject: [PATCH 01/12] refactored llm api calling code into a directory --- memgpt/llm_api/__init__.py | 0 memgpt/llm_api/azure_openai.py | 154 +++++++++++++++++++++++ memgpt/llm_api/llm_api_tools.py | 217 ++++++++++++++++++++++++++++++++ memgpt/llm_api/openai.py | 136 ++++++++++++++++++++ 4 files changed, 507 insertions(+) create mode 100644 memgpt/llm_api/__init__.py create mode 100644 memgpt/llm_api/azure_openai.py create mode 100644 memgpt/llm_api/llm_api_tools.py create mode 100644 memgpt/llm_api/openai.py diff --git a/memgpt/llm_api/__init__.py b/memgpt/llm_api/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/memgpt/llm_api/azure_openai.py b/memgpt/llm_api/azure_openai.py new file mode 100644 index 0000000000..f6190feaba --- /dev/null +++ b/memgpt/llm_api/azure_openai.py @@ -0,0 +1,154 @@ +import requests +from typing import Union + +from memgpt.models.chat_completion_response import ChatCompletionResponse +from memgpt.models.embedding_response import EmbeddingResponse +from memgpt.utils import smart_urljoin + + +MODEL_TO_AZURE_ENGINE = { + "gpt-4-1106-preview": "gpt-4", + "gpt-4": "gpt-4", + "gpt-4-32k": "gpt-4-32k", + "gpt-3.5": "gpt-35-turbo", + "gpt-3.5-turbo": "gpt-35-turbo", + "gpt-3.5-turbo-16k": "gpt-35-turbo-16k", +} + + +def clean_azure_endpoint(raw_endpoint_name: str) -> str: + """Make sure the endpoint is of format 'https://YOUR_RESOURCE_NAME.openai.azure.com'""" + if raw_endpoint_name is None: + raise ValueError(raw_endpoint_name) + endpoint_address = raw_endpoint_name.strip("/").replace(".openai.azure.com", "") + endpoint_address = endpoint_address.replace("http://", "") + endpoint_address = endpoint_address.replace("https://", "") + return endpoint_address + + +def azure_openai_get_model_list(url: str, api_key: Union[str, None], api_version: str) -> dict: + """https://learn.microsoft.com/en-us/rest/api/azureopenai/models/list?view=rest-azureopenai-2023-05-15&tabs=HTTP""" + from memgpt.utils import printd + + # https://xxx.openai.azure.com/openai/models?api-version=xxx + url = smart_urljoin(url, "openai") + url = smart_urljoin(url, f"models?api-version={api_version}") + + headers = {"Content-Type": "application/json"} + if api_key is not None: + headers["api-key"] = f"{api_key}" + + printd(f"Sending request to {url}") + try: + response = requests.get(url, headers=headers) + response.raise_for_status() # Raises HTTPError for 4XX/5XX status + response = response.json() # convert to dict from string + printd(f"response = {response}") + return response + except requests.exceptions.HTTPError as http_err: + # Handle HTTP errors (e.g., response 4XX, 5XX) + try: + response = response.json() + except: + pass + printd(f"Got HTTPError, exception={http_err}, response={response}") + raise http_err + except requests.exceptions.RequestException as req_err: + # Handle other requests-related errors (e.g., connection error) + try: + response = response.json() + except: + pass + printd(f"Got RequestException, exception={req_err}, response={response}") + raise req_err + except Exception as e: + # Handle other potential errors + try: + response = response.json() + except: + pass + printd(f"Got unknown Exception, exception={e}, response={response}") + raise e + + +def azure_openai_chat_completions_request( + resource_name: str, deployment_id: str, api_version: str, api_key: str, data: dict +) -> ChatCompletionResponse: + """https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#chat-completions""" + from memgpt.utils import printd + + assert resource_name is not None, "Missing required field when calling Azure OpenAI" + assert deployment_id is not None, "Missing required field when calling Azure OpenAI" + assert api_version is not None, "Missing required field when calling Azure OpenAI" + assert api_key is not None, "Missing required field when calling Azure OpenAI" + + resource_name = clean_azure_endpoint(resource_name) + url = f"https://{resource_name}.openai.azure.com/openai/deployments/{deployment_id}/chat/completions?api-version={api_version}" + headers = {"Content-Type": "application/json", "api-key": f"{api_key}"} + + # If functions == None, strip from the payload + if "functions" in data and data["functions"] is None: + data.pop("functions") + data.pop("function_call", None) # extra safe, should exist always (default="auto") + + if "tools" in data and data["tools"] is None: + data.pop("tools") + data.pop("tool_choice", None) # extra safe, should exist always (default="auto") + + printd(f"Sending request to {url}") + try: + response = requests.post(url, headers=headers, json=data) + printd(f"response = {response}") + response.raise_for_status() # Raises HTTPError for 4XX/5XX status + response = response.json() # convert to dict from string + printd(f"response.json = {response}") + # NOTE: azure openai does not include "content" in the response when it is None, so we need to add it + if "content" not in response["choices"][0].get("message"): + response["choices"][0]["message"]["content"] = None + response = ChatCompletionResponse(**response) # convert to 'dot-dict' style which is the openai python client default + return response + except requests.exceptions.HTTPError as http_err: + # Handle HTTP errors (e.g., response 4XX, 5XX) + printd(f"Got HTTPError, exception={http_err}, payload={data}") + raise http_err + except requests.exceptions.RequestException as req_err: + # Handle other requests-related errors (e.g., connection error) + printd(f"Got RequestException, exception={req_err}") + raise req_err + except Exception as e: + # Handle other potential errors + printd(f"Got unknown Exception, exception={e}") + raise e + + +def azure_openai_embeddings_request( + resource_name: str, deployment_id: str, api_version: str, api_key: str, data: dict +) -> EmbeddingResponse: + """https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings""" + from memgpt.utils import printd + + resource_name = clean_azure_endpoint(resource_name) + url = f"https://{resource_name}.openai.azure.com/openai/deployments/{deployment_id}/embeddings?api-version={api_version}" + headers = {"Content-Type": "application/json", "api-key": f"{api_key}"} + + printd(f"Sending request to {url}") + try: + response = requests.post(url, headers=headers, json=data) + printd(f"response = {response}") + response.raise_for_status() # Raises HTTPError for 4XX/5XX status + response = response.json() # convert to dict from string + printd(f"response.json = {response}") + response = EmbeddingResponse(**response) # convert to 'dot-dict' style which is the openai python client default + return response + except requests.exceptions.HTTPError as http_err: + # Handle HTTP errors (e.g., response 4XX, 5XX) + printd(f"Got HTTPError, exception={http_err}, payload={data}") + raise http_err + except requests.exceptions.RequestException as req_err: + # Handle other requests-related errors (e.g., connection error) + printd(f"Got RequestException, exception={req_err}") + raise req_err + except Exception as e: + # Handle other potential errors + printd(f"Got unknown Exception, exception={e}") + raise e diff --git a/memgpt/llm_api/llm_api_tools.py b/memgpt/llm_api/llm_api_tools.py new file mode 100644 index 0000000000..3a5cf66a97 --- /dev/null +++ b/memgpt/llm_api/llm_api_tools.py @@ -0,0 +1,217 @@ +import random +import time +import requests +import time + +from memgpt.credentials import MemGPTCredentials +from memgpt.local_llm.chat_completion_proxy import get_chat_completion +from memgpt.constants import CLI_WARNING_PREFIX +from memgpt.models.chat_completion_response import ChatCompletionResponse + +from memgpt.data_types import AgentState + +from memgpt.llm_api.openai import openai_chat_completions_request +from memgpt.llm_api.azure_openai import azure_openai_chat_completions_request, MODEL_TO_AZURE_ENGINE + + +def is_context_overflow_error(exception: requests.exceptions.RequestException) -> bool: + """Checks if an exception is due to context overflow (based on common OpenAI response messages)""" + from memgpt.utils import printd + + match_string = "maximum context length" + + # Backwards compatibility with openai python package/client v0.28 (pre-v1 client migration) + if match_string in str(exception): + printd(f"Found '{match_string}' in str(exception)={(str(exception))}") + return True + + # Based on python requests + OpenAI REST API (/v1) + elif isinstance(exception, requests.exceptions.HTTPError): + if exception.response is not None and "application/json" in exception.response.headers.get("Content-Type", ""): + try: + error_details = exception.response.json() + if "error" not in error_details: + printd(f"HTTPError occurred, but couldn't find error field: {error_details}") + return False + else: + error_details = error_details["error"] + + # Check for the specific error code + if error_details.get("code") == "context_length_exceeded": + printd(f"HTTPError occurred, caught error code {error_details.get('code')}") + return True + # Soft-check for "maximum context length" inside of the message + elif error_details.get("message") and "maximum context length" in error_details.get("message"): + printd(f"HTTPError occurred, found '{match_string}' in error message contents ({error_details})") + return True + else: + printd(f"HTTPError occurred, but unknown error message: {error_details}") + return False + except ValueError: + # JSON decoding failed + printd(f"HTTPError occurred ({exception}), but no JSON error message.") + + # Generic fail + else: + return False + + +def retry_with_exponential_backoff( + func, + initial_delay: float = 1, + exponential_base: float = 2, + jitter: bool = True, + max_retries: int = 20, + # List of OpenAI error codes: https://github.com/openai/openai-python/blob/17ac6779958b2b74999c634c4ea4c7b74906027a/src/openai/_client.py#L227-L250 + # 429 = rate limit + error_codes: tuple = (429,), +): + """Retry a function with exponential backoff.""" + + def wrapper(*args, **kwargs): + from memgpt.utils import printd + + # Initialize variables + num_retries = 0 + delay = initial_delay + + # Loop until a successful response or max_retries is hit or an exception is raised + while True: + try: + return func(*args, **kwargs) + + except requests.exceptions.HTTPError as http_err: + # Retry on specified errors + if http_err.response.status_code in error_codes: + # Increment retries + num_retries += 1 + + # Check if max retries has been reached + if num_retries > max_retries: + raise Exception(f"Maximum number of retries ({max_retries}) exceeded.") + + # Increment the delay + delay *= exponential_base * (1 + jitter * random.random()) + + # Sleep for the delay + # printd(f"Got a rate limit error ('{http_err}') on LLM backend request, waiting {int(delay)}s then retrying...") + print( + f"{CLI_WARNING_PREFIX}Got a rate limit error ('{http_err}') on LLM backend request, waiting {int(delay)}s then retrying..." + ) + time.sleep(delay) + else: + # For other HTTP errors, re-raise the exception + raise + + # Raise exceptions for any errors not specified + except Exception as e: + raise e + + return wrapper + + +@retry_with_exponential_backoff +def create( + agent_state: AgentState, + messages, + functions=None, + functions_python=None, + function_call="auto", + # hint + first_message=False, + # use tool naming? + # if false, will use deprecated 'functions' style + use_tool_naming=True, +) -> ChatCompletionResponse: + """Return response to chat completion with backoff""" + from memgpt.utils import printd + + printd(f"Using model {agent_state.llm_config.model_endpoint_type}, endpoint: {agent_state.llm_config.model_endpoint}") + + # TODO eventually refactor so that credentials are passed through + credentials = MemGPTCredentials.load() + + if function_call and not functions: + printd("unsetting function_call because functions is None") + function_call = None + + # openai + if agent_state.llm_config.model_endpoint_type == "openai": + # TODO do the same for Azure? + if credentials.openai_key is None and agent_state.llm_config.model_endpoint == "https://api.openai.com/v1": + # only is a problem if we are *not* using an openai proxy + raise ValueError(f"OpenAI key is missing from MemGPT config file") + if use_tool_naming: + data = dict( + model=agent_state.llm_config.model, + messages=messages, + tools=[{"type": "function", "function": f} for f in functions] if functions else None, + tool_choice=function_call, + user=str(agent_state.user_id), + ) + else: + data = dict( + model=agent_state.llm_config.model, + messages=messages, + functions=functions, + function_call=function_call, + user=str(agent_state.user_id), + ) + return openai_chat_completions_request( + url=agent_state.llm_config.model_endpoint, # https://api.openai.com/v1 -> https://api.openai.com/v1/chat/completions + api_key=credentials.openai_key, + data=data, + ) + + # azure + elif agent_state.llm_config.model_endpoint_type == "azure": + azure_deployment = ( + credentials.azure_deployment + if credentials.azure_deployment is not None + else MODEL_TO_AZURE_ENGINE[agent_state.llm_config.model] + ) + if use_tool_naming: + data = dict( + # NOTE: don't pass model to Azure calls, that is the deployment_id + # model=agent_config.model, + messages=messages, + tools=[{"type": "function", "function": f} for f in functions] if functions else None, + tool_choice=function_call, + user=str(agent_state.user_id), + ) + else: + data = dict( + # NOTE: don't pass model to Azure calls, that is the deployment_id + # model=agent_config.model, + messages=messages, + functions=functions, + function_call=function_call, + user=str(agent_state.user_id), + ) + return azure_openai_chat_completions_request( + resource_name=credentials.azure_endpoint, + deployment_id=azure_deployment, + api_version=credentials.azure_version, + api_key=credentials.azure_key, + data=data, + ) + + # local model + else: + return get_chat_completion( + model=agent_state.llm_config.model, + messages=messages, + functions=functions, + functions_python=functions_python, + function_call=function_call, + context_window=agent_state.llm_config.context_window, + endpoint=agent_state.llm_config.model_endpoint, + endpoint_type=agent_state.llm_config.model_endpoint_type, + wrapper=agent_state.llm_config.model_wrapper, + user=str(agent_state.user_id), + # hint + first_message=first_message, + # auth-related + auth_type=credentials.openllm_auth_type, + auth_key=credentials.openllm_key, + ) diff --git a/memgpt/llm_api/openai.py b/memgpt/llm_api/openai.py new file mode 100644 index 0000000000..5fb0bed062 --- /dev/null +++ b/memgpt/llm_api/openai.py @@ -0,0 +1,136 @@ +import requests +import time +from typing import Union, Optional + +from memgpt.models.chat_completion_response import ChatCompletionResponse +from memgpt.models.embedding_response import EmbeddingResponse +from memgpt.utils import smart_urljoin + + +def openai_get_model_list(url: str, api_key: Union[str, None], fix_url: Optional[bool] = False) -> dict: + """https://platform.openai.com/docs/api-reference/models/list""" + from memgpt.utils import printd + + # In some cases we may want to double-check the URL and do basic correction, eg: + # In MemGPT config the address for vLLM is w/o a /v1 suffix for simplicity + # However if we're treating the server as an OpenAI proxy we want the /v1 suffix on our model hit + if fix_url: + if not url.endswith("/v1"): + url = smart_urljoin(url, "v1") + + url = smart_urljoin(url, "models") + + headers = {"Content-Type": "application/json"} + if api_key is not None: + headers["Authorization"] = f"Bearer {api_key}" + + printd(f"Sending request to {url}") + try: + response = requests.get(url, headers=headers) + response.raise_for_status() # Raises HTTPError for 4XX/5XX status + response = response.json() # convert to dict from string + printd(f"response = {response}") + return response + except requests.exceptions.HTTPError as http_err: + # Handle HTTP errors (e.g., response 4XX, 5XX) + try: + response = response.json() + except: + pass + printd(f"Got HTTPError, exception={http_err}, response={response}") + raise http_err + except requests.exceptions.RequestException as req_err: + # Handle other requests-related errors (e.g., connection error) + try: + response = response.json() + except: + pass + printd(f"Got RequestException, exception={req_err}, response={response}") + raise req_err + except Exception as e: + # Handle other potential errors + try: + response = response.json() + except: + pass + printd(f"Got unknown Exception, exception={e}, response={response}") + raise e + + +def openai_chat_completions_request(url: str, api_key: str, data: dict) -> ChatCompletionResponse: + """https://platform.openai.com/docs/guides/text-generation?lang=curl""" + from memgpt.utils import printd + + url = smart_urljoin(url, "chat/completions") + headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"} + + # If functions == None, strip from the payload + if "functions" in data and data["functions"] is None: + data.pop("functions") + data.pop("function_call", None) # extra safe, should exist always (default="auto") + + if "tools" in data and data["tools"] is None: + data.pop("tools") + data.pop("tool_choice", None) # extra safe, should exist always (default="auto") + + printd(f"Sending request to {url}") + try: + # Example code to trigger a rate limit response: + # mock_response = requests.Response() + # mock_response.status_code = 429 + # http_error = requests.exceptions.HTTPError("429 Client Error: Too Many Requests") + # http_error.response = mock_response + # raise http_error + + # Example code to trigger a context overflow response (for an 8k model) + # data["messages"][-1]["content"] = " ".join(["repeat after me this is not a fluke"] * 1000) + + response = requests.post(url, headers=headers, json=data) + printd(f"response = {response}") + response.raise_for_status() # Raises HTTPError for 4XX/5XX status + response = response.json() # convert to dict from string + printd(f"response.json = {response}") + response = ChatCompletionResponse(**response) # convert to 'dot-dict' style which is the openai python client default + return response + except requests.exceptions.HTTPError as http_err: + # Handle HTTP errors (e.g., response 4XX, 5XX) + printd(f"Got HTTPError, exception={http_err}, payload={data}") + raise http_err + except requests.exceptions.RequestException as req_err: + # Handle other requests-related errors (e.g., connection error) + printd(f"Got RequestException, exception={req_err}") + raise req_err + except Exception as e: + # Handle other potential errors + printd(f"Got unknown Exception, exception={e}") + raise e + + +def openai_embeddings_request(url: str, api_key: str, data: dict) -> EmbeddingResponse: + """https://platform.openai.com/docs/api-reference/embeddings/create""" + from memgpt.utils import printd + + url = smart_urljoin(url, "embeddings") + headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"} + + printd(f"Sending request to {url}") + try: + response = requests.post(url, headers=headers, json=data) + printd(f"response = {response}") + response.raise_for_status() # Raises HTTPError for 4XX/5XX status + response = response.json() # convert to dict from string + printd(f"response.json = {response}") + response = EmbeddingResponse(**response) # convert to 'dot-dict' style which is the openai python client default + return response + except requests.exceptions.HTTPError as http_err: + # Handle HTTP errors (e.g., response 4XX, 5XX) + printd(f"Got HTTPError, exception={http_err}, payload={data}") + raise http_err + except requests.exceptions.RequestException as req_err: + # Handle other requests-related errors (e.g., connection error) + printd(f"Got RequestException, exception={req_err}") + raise req_err + except Exception as e: + # Handle other potential errors + printd(f"Got unknown Exception, exception={e}") + raise e From f3b9205ab0a60be388ed40f4de60f9b9abaaf28a Mon Sep 17 00:00:00 2001 From: cpacker Date: Tue, 2 Apr 2024 12:51:48 -0700 Subject: [PATCH 02/12] refactor --- memgpt/agent.py | 2 +- memgpt/cli/cli_config.py | 3 +- memgpt/functions/function_sets/extras.py | 2 +- memgpt/llm_api_tools.py | 495 ----------------------- memgpt/memory.py | 2 +- memgpt/utils.py | 9 +- 6 files changed, 13 insertions(+), 500 deletions(-) delete mode 100644 memgpt/llm_api_tools.py diff --git a/memgpt/agent.py b/memgpt/agent.py index 706e9489b0..8f5e81e356 100644 --- a/memgpt/agent.py +++ b/memgpt/agent.py @@ -15,7 +15,7 @@ from memgpt.persistence_manager import LocalStateManager from memgpt.system import get_login_event, package_function_response, package_summarize_message, get_initial_boot_messages from memgpt.memory import CoreMemory as InContextMemory, summarize_messages, ArchivalMemory, RecallMemory -from memgpt.llm_api_tools import create, is_context_overflow_error +from memgpt.llm_api.llm_api_tools import create, is_context_overflow_error from memgpt.utils import ( get_utc_time, create_random_username, diff --git a/memgpt/cli/cli_config.py b/memgpt/cli/cli_config.py index 5dd5e52e2e..482bd65716 100644 --- a/memgpt/cli/cli_config.py +++ b/memgpt/cli/cli_config.py @@ -18,7 +18,8 @@ from memgpt.constants import MEMGPT_DIR from memgpt.credentials import MemGPTCredentials, SUPPORTED_AUTH_TYPES from memgpt.data_types import User, LLMConfig, EmbeddingConfig -from memgpt.llm_api_tools import openai_get_model_list, azure_openai_get_model_list, smart_urljoin +from memgpt.llm_api.openai import openai_get_model_list +from memgpt.llm_api.azure_openai import azure_openai_get_model_list from memgpt.local_llm.constants import DEFAULT_ENDPOINTS, DEFAULT_OLLAMA_MODEL, DEFAULT_WRAPPER_NAME from memgpt.local_llm.utils import get_available_wrappers from memgpt.server.utils import shorten_key_middle diff --git a/memgpt/functions/function_sets/extras.py b/memgpt/functions/function_sets/extras.py index 0f1a41c3fa..ee5abe1986 100644 --- a/memgpt/functions/function_sets/extras.py +++ b/memgpt/functions/function_sets/extras.py @@ -11,7 +11,7 @@ MAX_PAUSE_HEARTBEATS, JSON_ENSURE_ASCII, ) -from memgpt.llm_api_tools import create +from memgpt.llm_api.llm_api_tools import create def message_chatgpt(self, message: str): diff --git a/memgpt/llm_api_tools.py b/memgpt/llm_api_tools.py deleted file mode 100644 index b748aeed7b..0000000000 --- a/memgpt/llm_api_tools.py +++ /dev/null @@ -1,495 +0,0 @@ -import random -import time -import requests -import time -from typing import Union, Optional -import urllib - -from memgpt.credentials import MemGPTCredentials -from memgpt.local_llm.chat_completion_proxy import get_chat_completion -from memgpt.constants import CLI_WARNING_PREFIX -from memgpt.models.chat_completion_response import ChatCompletionResponse -from memgpt.models.embedding_response import EmbeddingResponse - -from memgpt.data_types import AgentState - -MODEL_TO_AZURE_ENGINE = { - "gpt-4-1106-preview": "gpt-4", - "gpt-4": "gpt-4", - "gpt-4-32k": "gpt-4-32k", - "gpt-3.5": "gpt-35-turbo", - "gpt-3.5-turbo": "gpt-35-turbo", - "gpt-3.5-turbo-16k": "gpt-35-turbo-16k", -} - - -def is_context_overflow_error(exception): - from memgpt.utils import printd - - match_string = "maximum context length" - - # Backwards compatibility with openai python package/client v0.28 (pre-v1 client migration) - if match_string in str(exception): - printd(f"Found '{match_string}' in str(exception)={(str(exception))}") - return True - - # Based on python requests + OpenAI REST API (/v1) - elif isinstance(exception, requests.exceptions.HTTPError): - if exception.response is not None and "application/json" in exception.response.headers.get("Content-Type", ""): - try: - error_details = exception.response.json() - if "error" not in error_details: - printd(f"HTTPError occurred, but couldn't find error field: {error_details}") - return False - else: - error_details = error_details["error"] - - # Check for the specific error code - if error_details.get("code") == "context_length_exceeded": - printd(f"HTTPError occurred, caught error code {error_details.get('code')}") - return True - # Soft-check for "maximum context length" inside of the message - elif error_details.get("message") and "maximum context length" in error_details.get("message"): - printd(f"HTTPError occurred, found '{match_string}' in error message contents ({error_details})") - return True - else: - printd(f"HTTPError occurred, but unknown error message: {error_details}") - return False - except ValueError: - # JSON decoding failed - printd(f"HTTPError occurred ({exception}), but no JSON error message.") - - # Generic fail - else: - return False - - -def smart_urljoin(base_url, relative_url): - """urljoin is stupid and wants a trailing / at the end of the endpoint address, or it will chop the suffix off""" - if not base_url.endswith("/"): - base_url += "/" - return urllib.parse.urljoin(base_url, relative_url) - - -def clean_azure_endpoint(raw_endpoint_name): - """Make sure the endpoint is of format 'https://YOUR_RESOURCE_NAME.openai.azure.com'""" - if raw_endpoint_name is None: - raise ValueError(raw_endpoint_name) - endpoint_address = raw_endpoint_name.strip("/").replace(".openai.azure.com", "") - endpoint_address = endpoint_address.replace("http://", "") - endpoint_address = endpoint_address.replace("https://", "") - return endpoint_address - - -def openai_get_model_list(url: str, api_key: Union[str, None], fix_url: Optional[bool] = False) -> dict: - """https://platform.openai.com/docs/api-reference/models/list""" - from memgpt.utils import printd - - # In some cases we may want to double-check the URL and do basic correction, eg: - # In MemGPT config the address for vLLM is w/o a /v1 suffix for simplicity - # However if we're treating the server as an OpenAI proxy we want the /v1 suffix on our model hit - if fix_url: - if not url.endswith("/v1"): - url = smart_urljoin(url, "v1") - - url = smart_urljoin(url, "models") - - headers = {"Content-Type": "application/json"} - if api_key is not None: - headers["Authorization"] = f"Bearer {api_key}" - - printd(f"Sending request to {url}") - try: - response = requests.get(url, headers=headers) - response.raise_for_status() # Raises HTTPError for 4XX/5XX status - response = response.json() # convert to dict from string - printd(f"response = {response}") - return response - except requests.exceptions.HTTPError as http_err: - # Handle HTTP errors (e.g., response 4XX, 5XX) - try: - response = response.json() - except: - pass - printd(f"Got HTTPError, exception={http_err}, response={response}") - raise http_err - except requests.exceptions.RequestException as req_err: - # Handle other requests-related errors (e.g., connection error) - try: - response = response.json() - except: - pass - printd(f"Got RequestException, exception={req_err}, response={response}") - raise req_err - except Exception as e: - # Handle other potential errors - try: - response = response.json() - except: - pass - printd(f"Got unknown Exception, exception={e}, response={response}") - raise e - - -def azure_openai_get_model_list(url: str, api_key: Union[str, None], api_version: str) -> dict: - """https://learn.microsoft.com/en-us/rest/api/azureopenai/models/list?view=rest-azureopenai-2023-05-15&tabs=HTTP""" - from memgpt.utils import printd - - # https://xxx.openai.azure.com/openai/models?api-version=xxx - url = smart_urljoin(url, "openai") - url = smart_urljoin(url, f"models?api-version={api_version}") - - headers = {"Content-Type": "application/json"} - if api_key is not None: - headers["api-key"] = f"{api_key}" - - printd(f"Sending request to {url}") - try: - response = requests.get(url, headers=headers) - response.raise_for_status() # Raises HTTPError for 4XX/5XX status - response = response.json() # convert to dict from string - printd(f"response = {response}") - return response - except requests.exceptions.HTTPError as http_err: - # Handle HTTP errors (e.g., response 4XX, 5XX) - try: - response = response.json() - except: - pass - printd(f"Got HTTPError, exception={http_err}, response={response}") - raise http_err - except requests.exceptions.RequestException as req_err: - # Handle other requests-related errors (e.g., connection error) - try: - response = response.json() - except: - pass - printd(f"Got RequestException, exception={req_err}, response={response}") - raise req_err - except Exception as e: - # Handle other potential errors - try: - response = response.json() - except: - pass - printd(f"Got unknown Exception, exception={e}, response={response}") - raise e - - -def openai_chat_completions_request(url, api_key, data): - """https://platform.openai.com/docs/guides/text-generation?lang=curl""" - from memgpt.utils import printd - - url = smart_urljoin(url, "chat/completions") - headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"} - - # If functions == None, strip from the payload - if "functions" in data and data["functions"] is None: - data.pop("functions") - data.pop("function_call", None) # extra safe, should exist always (default="auto") - - if "tools" in data and data["tools"] is None: - data.pop("tools") - data.pop("tool_choice", None) # extra safe, should exist always (default="auto") - - printd(f"Sending request to {url}") - try: - # Example code to trigger a rate limit response: - # mock_response = requests.Response() - # mock_response.status_code = 429 - # http_error = requests.exceptions.HTTPError("429 Client Error: Too Many Requests") - # http_error.response = mock_response - # raise http_error - - # Example code to trigger a context overflow response (for an 8k model) - # data["messages"][-1]["content"] = " ".join(["repeat after me this is not a fluke"] * 1000) - - response = requests.post(url, headers=headers, json=data) - printd(f"response = {response}") - response.raise_for_status() # Raises HTTPError for 4XX/5XX status - response = response.json() # convert to dict from string - printd(f"response.json = {response}") - response = ChatCompletionResponse(**response) # convert to 'dot-dict' style which is the openai python client default - return response - except requests.exceptions.HTTPError as http_err: - # Handle HTTP errors (e.g., response 4XX, 5XX) - printd(f"Got HTTPError, exception={http_err}, payload={data}") - raise http_err - except requests.exceptions.RequestException as req_err: - # Handle other requests-related errors (e.g., connection error) - printd(f"Got RequestException, exception={req_err}") - raise req_err - except Exception as e: - # Handle other potential errors - printd(f"Got unknown Exception, exception={e}") - raise e - - -def openai_embeddings_request(url, api_key, data): - """https://platform.openai.com/docs/api-reference/embeddings/create""" - from memgpt.utils import printd - - url = smart_urljoin(url, "embeddings") - headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"} - - printd(f"Sending request to {url}") - try: - response = requests.post(url, headers=headers, json=data) - printd(f"response = {response}") - response.raise_for_status() # Raises HTTPError for 4XX/5XX status - response = response.json() # convert to dict from string - printd(f"response.json = {response}") - response = EmbeddingResponse(**response) # convert to 'dot-dict' style which is the openai python client default - return response - except requests.exceptions.HTTPError as http_err: - # Handle HTTP errors (e.g., response 4XX, 5XX) - printd(f"Got HTTPError, exception={http_err}, payload={data}") - raise http_err - except requests.exceptions.RequestException as req_err: - # Handle other requests-related errors (e.g., connection error) - printd(f"Got RequestException, exception={req_err}") - raise req_err - except Exception as e: - # Handle other potential errors - printd(f"Got unknown Exception, exception={e}") - raise e - - -def azure_openai_chat_completions_request(resource_name, deployment_id, api_version, api_key, data): - """https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#chat-completions""" - from memgpt.utils import printd - - assert resource_name is not None, "Missing required field when calling Azure OpenAI" - assert deployment_id is not None, "Missing required field when calling Azure OpenAI" - assert api_version is not None, "Missing required field when calling Azure OpenAI" - assert api_key is not None, "Missing required field when calling Azure OpenAI" - - resource_name = clean_azure_endpoint(resource_name) - url = f"https://{resource_name}.openai.azure.com/openai/deployments/{deployment_id}/chat/completions?api-version={api_version}" - headers = {"Content-Type": "application/json", "api-key": f"{api_key}"} - - # If functions == None, strip from the payload - if "functions" in data and data["functions"] is None: - data.pop("functions") - data.pop("function_call", None) # extra safe, should exist always (default="auto") - - if "tools" in data and data["tools"] is None: - data.pop("tools") - data.pop("tool_choice", None) # extra safe, should exist always (default="auto") - - printd(f"Sending request to {url}") - try: - response = requests.post(url, headers=headers, json=data) - printd(f"response = {response}") - response.raise_for_status() # Raises HTTPError for 4XX/5XX status - response = response.json() # convert to dict from string - printd(f"response.json = {response}") - # NOTE: azure openai does not include "content" in the response when it is None, so we need to add it - if "content" not in response["choices"][0].get("message"): - response["choices"][0]["message"]["content"] = None - response = ChatCompletionResponse(**response) # convert to 'dot-dict' style which is the openai python client default - return response - except requests.exceptions.HTTPError as http_err: - # Handle HTTP errors (e.g., response 4XX, 5XX) - printd(f"Got HTTPError, exception={http_err}, payload={data}") - raise http_err - except requests.exceptions.RequestException as req_err: - # Handle other requests-related errors (e.g., connection error) - printd(f"Got RequestException, exception={req_err}") - raise req_err - except Exception as e: - # Handle other potential errors - printd(f"Got unknown Exception, exception={e}") - raise e - - -def azure_openai_embeddings_request(resource_name, deployment_id, api_version, api_key, data): - """https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings""" - from memgpt.utils import printd - - resource_name = clean_azure_endpoint(resource_name) - url = f"https://{resource_name}.openai.azure.com/openai/deployments/{deployment_id}/embeddings?api-version={api_version}" - headers = {"Content-Type": "application/json", "api-key": f"{api_key}"} - - printd(f"Sending request to {url}") - try: - response = requests.post(url, headers=headers, json=data) - printd(f"response = {response}") - response.raise_for_status() # Raises HTTPError for 4XX/5XX status - response = response.json() # convert to dict from string - printd(f"response.json = {response}") - response = EmbeddingResponse(**response) # convert to 'dot-dict' style which is the openai python client default - return response - except requests.exceptions.HTTPError as http_err: - # Handle HTTP errors (e.g., response 4XX, 5XX) - printd(f"Got HTTPError, exception={http_err}, payload={data}") - raise http_err - except requests.exceptions.RequestException as req_err: - # Handle other requests-related errors (e.g., connection error) - printd(f"Got RequestException, exception={req_err}") - raise req_err - except Exception as e: - # Handle other potential errors - printd(f"Got unknown Exception, exception={e}") - raise e - - -def retry_with_exponential_backoff( - func, - initial_delay: float = 1, - exponential_base: float = 2, - jitter: bool = True, - max_retries: int = 20, - # List of OpenAI error codes: https://github.com/openai/openai-python/blob/17ac6779958b2b74999c634c4ea4c7b74906027a/src/openai/_client.py#L227-L250 - # 429 = rate limit - error_codes: tuple = (429,), -): - """Retry a function with exponential backoff.""" - - def wrapper(*args, **kwargs): - from memgpt.utils import printd - - # Initialize variables - num_retries = 0 - delay = initial_delay - - # Loop until a successful response or max_retries is hit or an exception is raised - while True: - try: - return func(*args, **kwargs) - - except requests.exceptions.HTTPError as http_err: - # Retry on specified errors - if http_err.response.status_code in error_codes: - # Increment retries - num_retries += 1 - - # Check if max retries has been reached - if num_retries > max_retries: - raise Exception(f"Maximum number of retries ({max_retries}) exceeded.") - - # Increment the delay - delay *= exponential_base * (1 + jitter * random.random()) - - # Sleep for the delay - # printd(f"Got a rate limit error ('{http_err}') on LLM backend request, waiting {int(delay)}s then retrying...") - print( - f"{CLI_WARNING_PREFIX}Got a rate limit error ('{http_err}') on LLM backend request, waiting {int(delay)}s then retrying..." - ) - time.sleep(delay) - else: - # For other HTTP errors, re-raise the exception - raise - - # Raise exceptions for any errors not specified - except Exception as e: - raise e - - return wrapper - - -@retry_with_exponential_backoff -def create( - agent_state: AgentState, - messages, - functions=None, - functions_python=None, - function_call="auto", - # hint - first_message=False, - # use tool naming? - # if false, will use deprecated 'functions' style - use_tool_naming=True, -) -> ChatCompletionResponse: - """Return response to chat completion with backoff""" - from memgpt.utils import printd - - printd(f"Using model {agent_state.llm_config.model_endpoint_type}, endpoint: {agent_state.llm_config.model_endpoint}") - - # TODO eventually refactor so that credentials are passed through - credentials = MemGPTCredentials.load() - - if function_call and not functions: - printd("unsetting function_call because functions is None") - function_call = None - - # openai - if agent_state.llm_config.model_endpoint_type == "openai": - # TODO do the same for Azure? - if credentials.openai_key is None and agent_state.llm_config.model_endpoint == "https://api.openai.com/v1": - # only is a problem if we are *not* using an openai proxy - raise ValueError(f"OpenAI key is missing from MemGPT config file") - if use_tool_naming: - data = dict( - model=agent_state.llm_config.model, - messages=messages, - tools=[{"type": "function", "function": f} for f in functions] if functions else None, - tool_choice=function_call, - user=str(agent_state.user_id), - ) - else: - data = dict( - model=agent_state.llm_config.model, - messages=messages, - functions=functions, - function_call=function_call, - user=str(agent_state.user_id), - ) - return openai_chat_completions_request( - url=agent_state.llm_config.model_endpoint, # https://api.openai.com/v1 -> https://api.openai.com/v1/chat/completions - api_key=credentials.openai_key, - data=data, - ) - - # azure - elif agent_state.llm_config.model_endpoint_type == "azure": - azure_deployment = ( - credentials.azure_deployment - if credentials.azure_deployment is not None - else MODEL_TO_AZURE_ENGINE[agent_state.llm_config.model] - ) - if use_tool_naming: - data = dict( - # NOTE: don't pass model to Azure calls, that is the deployment_id - # model=agent_config.model, - messages=messages, - tools=[{"type": "function", "function": f} for f in functions] if functions else None, - tool_choice=function_call, - user=str(agent_state.user_id), - ) - else: - data = dict( - # NOTE: don't pass model to Azure calls, that is the deployment_id - # model=agent_config.model, - messages=messages, - functions=functions, - function_call=function_call, - user=str(agent_state.user_id), - ) - return azure_openai_chat_completions_request( - resource_name=credentials.azure_endpoint, - deployment_id=azure_deployment, - api_version=credentials.azure_version, - api_key=credentials.azure_key, - data=data, - ) - - # local model - else: - return get_chat_completion( - model=agent_state.llm_config.model, - messages=messages, - functions=functions, - functions_python=functions_python, - function_call=function_call, - context_window=agent_state.llm_config.context_window, - endpoint=agent_state.llm_config.model_endpoint, - endpoint_type=agent_state.llm_config.model_endpoint_type, - wrapper=agent_state.llm_config.model_wrapper, - user=str(agent_state.user_id), - # hint - first_message=first_message, - # auth-related - auth_type=credentials.openllm_auth_type, - auth_key=credentials.openllm_key, - ) diff --git a/memgpt/memory.py b/memgpt/memory.py index e3b967635f..d22db37613 100644 --- a/memgpt/memory.py +++ b/memgpt/memory.py @@ -6,7 +6,7 @@ from memgpt.constants import MESSAGE_SUMMARY_WARNING_FRAC from memgpt.utils import get_local_time, printd, count_tokens, validate_date_format, extract_date_from_timestamp from memgpt.prompts.gpt_summarize import SYSTEM as SUMMARY_PROMPT_SYSTEM -from memgpt.llm_api_tools import create +from memgpt.llm_api.llm_api_tools import create from memgpt.data_types import Message, Passage, AgentState from memgpt.embeddings import embedding_model, query_embedding, parse_and_chunk_text diff --git a/memgpt/utils.py b/memgpt/utils.py index 0b61439075..4ee947452e 100644 --- a/memgpt/utils.py +++ b/memgpt/utils.py @@ -17,7 +17,7 @@ from typing import get_type_hints, Union, _GenericAlias -from urllib.parse import urlparse +from urllib.parse import urlparse, urljoin from contextlib import contextmanager import difflib import demjson3 as demjson @@ -469,6 +469,13 @@ ] +def smart_urljoin(base_url: str, relative_url: str) -> str: + """urljoin is stupid and wants a trailing / at the end of the endpoint address, or it will chop the suffix off""" + if not base_url.endswith("/"): + base_url += "/" + return urljoin(base_url, relative_url) + + def is_utc_datetime(dt: datetime) -> bool: return dt.tzinfo is not None and dt.tzinfo.utcoffset(dt) == timedelta(0) From 5106fbbcd947dfba31a25c593eadcf0879cb9ad5 Mon Sep 17 00:00:00 2001 From: cpacker Date: Thu, 4 Apr 2024 13:15:02 -0700 Subject: [PATCH 03/12] modify create() method to take List[Messages] instead of List[dict], modify openai HTTP request route to take pydantic request model as arg instead of dict --- memgpt/agent.py | 8 +++--- memgpt/functions/function_sets/extras.py | 11 ++++---- memgpt/llm_api/llm_api_tools.py | 14 +++++---- memgpt/llm_api/openai.py | 4 ++- memgpt/memory.py | 16 ++++++++--- memgpt/models/chat_completion_request.py | 36 ++++++++++++++++++++++-- 6 files changed, 67 insertions(+), 22 deletions(-) diff --git a/memgpt/agent.py b/memgpt/agent.py index 8f5e81e356..d1bcffeb65 100644 --- a/memgpt/agent.py +++ b/memgpt/agent.py @@ -400,7 +400,7 @@ def _swap_system_message(self, new_system_message: Message): def _get_ai_reply( self, - message_sequence: List[dict], + message_sequence: List[Message], function_call: str = "auto", first_message: bool = False, # hint ) -> chat_completion_response.ChatCompletionResponse: @@ -694,12 +694,12 @@ def validate_json(user_message_text: str, raise_on_error: bool) -> str: self.interface.user_message(user_message.text, msg_obj=user_message) - input_message_sequence = self.messages + [user_message.to_openai_dict()] + input_message_sequence = self._messages + [user_message] # Alternatively, the requestor can send an empty user message else: - input_message_sequence = self.messages + input_message_sequence = self._messages - if len(input_message_sequence) > 1 and input_message_sequence[-1]["role"] != "user": + if len(input_message_sequence) > 1 and input_message_sequence[-1].role != "user": printd(f"{CLI_WARNING_PREFIX}Attempting to run ChatCompletion without user as the last message in the queue") # Step 1: send the conversation and available functions to GPT diff --git a/memgpt/functions/function_sets/extras.py b/memgpt/functions/function_sets/extras.py index ee5abe1986..943c153ab8 100644 --- a/memgpt/functions/function_sets/extras.py +++ b/memgpt/functions/function_sets/extras.py @@ -2,7 +2,7 @@ import os import json import requests - +import uuid from memgpt.constants import ( JSON_LOADS_STRICT, @@ -12,6 +12,7 @@ JSON_ENSURE_ASCII, ) from memgpt.llm_api.llm_api_tools import create +from memgpt.data_types import Message def message_chatgpt(self, message: str): @@ -24,15 +25,15 @@ def message_chatgpt(self, message: str): Returns: str: Reply message from ChatGPT """ + dummy_user_id = uuid.uuid4() + dummy_agent_id = uuid.uuid4() message_sequence = [ - {"role": "system", "content": MESSAGE_CHATGPT_FUNCTION_SYSTEM_MESSAGE}, - {"role": "user", "content": str(message)}, + Message(user_id=dummy_user_id, agent_id=dummy_agent_id, role="system", text=MESSAGE_CHATGPT_FUNCTION_SYSTEM_MESSAGE), + Message(user_id=dummy_user_id, agent_id=dummy_agent_id, role="user", text=str(message)), ] response = create( model=MESSAGE_CHATGPT_FUNCTION_MODEL, messages=message_sequence, - # functions=functions, - # function_call=function_call, ) reply = response.choices[0].message.content diff --git a/memgpt/llm_api/llm_api_tools.py b/memgpt/llm_api/llm_api_tools.py index 3a5cf66a97..7588f03d82 100644 --- a/memgpt/llm_api/llm_api_tools.py +++ b/memgpt/llm_api/llm_api_tools.py @@ -2,13 +2,15 @@ import time import requests import time +from typing import List from memgpt.credentials import MemGPTCredentials from memgpt.local_llm.chat_completion_proxy import get_chat_completion from memgpt.constants import CLI_WARNING_PREFIX from memgpt.models.chat_completion_response import ChatCompletionResponse +from memgpt.models.chat_completion_request import ChatCompletionRequest, cast_message_to_subtype -from memgpt.data_types import AgentState +from memgpt.data_types import AgentState, Message from memgpt.llm_api.openai import openai_chat_completions_request from memgpt.llm_api.azure_openai import azure_openai_chat_completions_request, MODEL_TO_AZURE_ENGINE @@ -113,7 +115,7 @@ def wrapper(*args, **kwargs): @retry_with_exponential_backoff def create( agent_state: AgentState, - messages, + messages: List[Message], functions=None, functions_python=None, function_call="auto", @@ -142,17 +144,17 @@ def create( # only is a problem if we are *not* using an openai proxy raise ValueError(f"OpenAI key is missing from MemGPT config file") if use_tool_naming: - data = dict( + data = ChatCompletionRequest( model=agent_state.llm_config.model, - messages=messages, + messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages], tools=[{"type": "function", "function": f} for f in functions] if functions else None, tool_choice=function_call, user=str(agent_state.user_id), ) else: - data = dict( + data = ChatCompletionRequest( model=agent_state.llm_config.model, - messages=messages, + messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages], functions=functions, function_call=function_call, user=str(agent_state.user_id), diff --git a/memgpt/llm_api/openai.py b/memgpt/llm_api/openai.py index 5fb0bed062..f24b61b8fe 100644 --- a/memgpt/llm_api/openai.py +++ b/memgpt/llm_api/openai.py @@ -3,6 +3,7 @@ from typing import Union, Optional from memgpt.models.chat_completion_response import ChatCompletionResponse +from memgpt.models.chat_completion_request import ChatCompletionRequest from memgpt.models.embedding_response import EmbeddingResponse from memgpt.utils import smart_urljoin @@ -57,12 +58,13 @@ def openai_get_model_list(url: str, api_key: Union[str, None], fix_url: Optional raise e -def openai_chat_completions_request(url: str, api_key: str, data: dict) -> ChatCompletionResponse: +def openai_chat_completions_request(url: str, api_key: str, data: ChatCompletionRequest) -> ChatCompletionResponse: """https://platform.openai.com/docs/guides/text-generation?lang=curl""" from memgpt.utils import printd url = smart_urljoin(url, "chat/completions") headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"} + data = data.model_dump(exclude_none=True) # If functions == None, strip from the payload if "functions" in data and data["functions"] is None: diff --git a/memgpt/memory.py b/memgpt/memory.py index d22db37613..bd49cb924b 100644 --- a/memgpt/memory.py +++ b/memgpt/memory.py @@ -102,16 +102,21 @@ def edit_replace(self, field, old_content, new_content): raise KeyError(f'No memory section named {field} (must be either "persona" or "human")') +def _format_summary_history(message_history: List[Message]): + # TODO use existing prompt formatters for this (eg ChatML) + return "\n".join([f"{m.role}: {m.text}" for m in message_history]) + + def summarize_messages( agent_state: AgentState, - message_sequence_to_summarize, + message_sequence_to_summarize: List[Message], ): """Summarize a message sequence using GPT""" # we need the context_window context_window = agent_state.llm_config.context_window summary_prompt = SUMMARY_PROMPT_SYSTEM - summary_input = str(message_sequence_to_summarize) + summary_input = _format_summary_history(message_sequence_to_summarize) summary_input_tkns = count_tokens(summary_input) if summary_input_tkns > MESSAGE_SUMMARY_WARNING_FRAC * context_window: trunc_ratio = (MESSAGE_SUMMARY_WARNING_FRAC * context_window / summary_input_tkns) * 0.8 # For good measure... @@ -120,9 +125,12 @@ def summarize_messages( [summarize_messages(agent_state, message_sequence_to_summarize=message_sequence_to_summarize[:cutoff])] + message_sequence_to_summarize[cutoff:] ) + + dummy_user_id = uuid.uuid4() + dummy_agent_id = uuid.uuid4() message_sequence = [ - {"role": "system", "content": summary_prompt}, - {"role": "user", "content": summary_input}, + Message(user_id=dummy_user_id, agent_id=dummy_agent_id, role="system", text=summary_prompt), + Message(user_id=dummy_user_id, agent_id=dummy_agent_id, role="user", text=summary_input), ] response = create( diff --git a/memgpt/models/chat_completion_request.py b/memgpt/models/chat_completion_request.py index 7adbd1ab26..3a6c7febe6 100644 --- a/memgpt/models/chat_completion_request.py +++ b/memgpt/models/chat_completion_request.py @@ -14,14 +14,46 @@ class UserMessage(BaseModel): name: Optional[str] = None +class ToolCallFunction(BaseModel): + name: str + arguments: str + + +class ToolCall(BaseModel): + id: str + type: Literal["function"] = "function" + function: ToolCallFunction + + class AssistantMessage(BaseModel): content: Optional[str] = None role: str = "assistant" name: Optional[str] = None - tool_calls: Optional[List] = None + tool_calls: Optional[List[ToolCall]] = None -ChatMessage = Union[SystemMessage, UserMessage, AssistantMessage] +class ToolMessage(BaseModel): + content: str + role: str = "tool" + tool_call_id: str + + +ChatMessage = Union[SystemMessage, UserMessage, AssistantMessage, ToolMessage] + + +def cast_message_to_subtype(m_dict: dict) -> ChatMessage: + """Cast a dictionary to one of the individual message types""" + role = m_dict.get("role") + if role == "system": + return SystemMessage(**m_dict) + elif role == "user": + return UserMessage(**m_dict) + elif role == "assistant": + return AssistantMessage(**m_dict) + elif role == "tool": + return ToolMessage(**m_dict) + else: + raise ValueError("Unknown message role") class ResponseFormat(BaseModel): From 3b71672886f8faee92fccd917dc6fa1f10b3adbb Mon Sep 17 00:00:00 2001 From: cpacker Date: Thu, 4 Apr 2024 13:22:04 -0700 Subject: [PATCH 04/12] patch summarize call --- memgpt/agent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/memgpt/agent.py b/memgpt/agent.py index d1bcffeb65..2218657bf6 100644 --- a/memgpt/agent.py +++ b/memgpt/agent.py @@ -858,14 +858,14 @@ def summarize_messages_inplace(self, cutoff=None, preserve_last_N_messages=True, printd(f"Selected cutoff {cutoff} was a 'tool', shifting one...") cutoff += 1 - message_sequence_to_summarize = self.messages[1:cutoff] # do NOT get rid of the system message + message_sequence_to_summarize = self._messages[1:cutoff] # do NOT get rid of the system message if len(message_sequence_to_summarize) <= 1: # This prevents a potential infinite loop of summarizing the same message over and over raise LLMError( f"Summarize error: tried to run summarize, but couldn't find enough messages to compress [len={len(message_sequence_to_summarize)} <= 1]" ) else: - printd(f"Attempting to summarize {len(message_sequence_to_summarize)} messages [1:{cutoff}] of {len(self.messages)}") + printd(f"Attempting to summarize {len(message_sequence_to_summarize)} messages [1:{cutoff}] of {len(self._messages)}") # We can't do summarize logic properly if context_window is undefined if self.agent_state.llm_config.context_window is None: From 262c60e0d6053a9874f0305a8200ab5b0d11d592 Mon Sep 17 00:00:00 2001 From: cpacker Date: Sat, 6 Apr 2024 13:16:18 -0700 Subject: [PATCH 05/12] add google ai --- memgpt/data_types.py | 72 ++++++++++++++++- memgpt/llm_api/google_ai.py | 135 ++++++++++++++++++++++++++++++++ memgpt/llm_api/llm_api_tools.py | 16 ++++ 3 files changed, 222 insertions(+), 1 deletion(-) create mode 100644 memgpt/llm_api/google_ai.py diff --git a/memgpt/data_types.py b/memgpt/data_types.py index d4056466b4..25b22b2186 100644 --- a/memgpt/data_types.py +++ b/memgpt/data_types.py @@ -82,6 +82,7 @@ def __init__( created_at: Optional[datetime] = None, tool_calls: Optional[List[ToolCall]] = None, # list of tool calls requested tool_call_id: Optional[str] = None, + # tool_call_name: Optional[str] = None, # not technically OpenAI spec, but it can be helpful to have on-hand embedding: Optional[np.ndarray] = None, embedding_dim: Optional[int] = None, embedding_model: Optional[str] = None, @@ -238,7 +239,7 @@ def dict_to_message( tool_call_id=openai_message_dict["tool_call_id"] if "tool_call_id" in openai_message_dict else None, ) - def to_openai_dict(self, max_tool_id_length=TOOL_CALL_ID_MAX_LEN): + def to_openai_dict(self, max_tool_id_length=TOOL_CALL_ID_MAX_LEN) -> dict: """Go from Message class to ChatCompletion message object""" # TODO change to pydantic casting, eg `return SystemMessageModel(self)` @@ -285,11 +286,80 @@ def to_openai_dict(self, max_tool_id_length=TOOL_CALL_ID_MAX_LEN): "role": self.role, "tool_call_id": self.tool_call_id[:max_tool_id_length] if max_tool_id_length else self.tool_call_id, } + else: raise ValueError(self.role) return openai_message + def to_google_ai_dict(self) -> dict: + """Go from Message class to Google AI REST message object + + type Content: https://ai.google.dev/api/rest/v1/Content / https://ai.google.dev/api/rest/v1beta/Content + parts[]: Part + role: str ('user' or 'model') + """ + if self.name is not None: + raise UserWarning(f"Using Google AI with non-null 'name' field ({self.name}) not yet supported.") + + if self.role == "system": + # NOTE: Gemini API doesn't have a 'system' role, use 'user' instead + # https://www.reddit.com/r/Bard/comments/1b90i8o/does_gemini_have_a_system_prompt_option_while/ + google_ai_message = { + "role": "user", # NOTE: no 'system' + "parts": [{"text": self.text}], + } + + elif self.role == "user": + assert all([v is not None for v in [self.text, self.role]]), vars(self) + google_ai_message = { + "role": "user", + "parts": [{"text": self.text}], + } + + elif self.role == "assistant": + assert self.tool_calls is not None or self.text is not None + google_ai_message = { + "role": "model", # NOTE: different + } + + # NOTE: Google AI API doesn't allow non-null content + function call + # To get around this, just two a two part message, inner thoughts first then + parts = [] + if self.text is not None: + parts.append({"text": self.text}) + if self.tool_calls is not None: + # NOTE: implied support for multiple calls + for tool_call in self.tool_calls: + parts.append( + { + "functionCall": { + "name": tool_call.function["name"], + "args": tool_call.function["arguments"], + } + } + ) + + elif self.role == "tool": + # NOTE: Significantly different tool calling format, more similar to function calling format + assert all([v is not None for v in [self.role, self.tool_call_id]]), vars(self) + google_ai_message = { + "role": "function", + "parts": [ + { + "functionResponse": { + "name": self.tool_call_id, # TODO override with name or replace with self.tool_call_name + "content": self.text, # NOTE this can be structured JSON + } + } + ], + } + + else: + raise ValueError(self.role) + + return google_ai_message + class Document(Record): """A document represent a document loaded into MemGPT, which is broken down into passages.""" diff --git a/memgpt/llm_api/google_ai.py b/memgpt/llm_api/google_ai.py new file mode 100644 index 0000000000..aa3fb5c22e --- /dev/null +++ b/memgpt/llm_api/google_ai.py @@ -0,0 +1,135 @@ +import requests +from typing import Union + +from memgpt.models.chat_completion_response import ChatCompletionResponse +from memgpt.models.chat_completion_request import ChatCompletionRequest +from memgpt.models.embedding_response import EmbeddingResponse +from memgpt.utils import smart_urljoin + + +SUPPORTED_MODELS = [ + "gemini-pro", +] + + +# TODO use pydantic model as input +def convert_openai_tool_call_to_google_ai(tool_call: dict) -> dict: + """ + OpenAI format: + { + "role": "tool", + "tool_call_id": + "content": + } + + Google AI format: + { + "role": "function", + "parts": [{ + "functionResponse": { + "name": "find_theaters", + "response": { + "name": "find_theaters", + "content": { + "movie": "Barbie", + "theaters": [{ + "name": "AMC Mountain View 16", + "address": "2000 W El Camino Real, Mountain View, CA 94040" + }, { + "name": "Regal Edwards 14", + "address": "245 Castro St, Mountain View, CA 94040" + }] + } + } + }] + } + """ + pass + + +# TODO use pydantic model as input +def convert_openai_assistant_content_to_google_ai(tool_call: dict) -> dict: + pass + + +# TODO use pydantic model as input +def to_google_ai(openai_message_dict: dict) -> dict: + + # TODO supports "parts" as part of multimodal support + assert not isinstance(openai_message_dict["content"], list), "Multi-part content is message not yet supported" + if openai_message_dict["role"] == "user": + google_ai_message_dict = { + "role": "user", + "parts": [{"text": openai_message_dict["content"]}], + } + elif openai_message_dict["role"] == "assistant": + google_ai_message_dict = { + "role": "model", # NOTE: diff + "parts": [{"text": openai_message_dict["content"]}], + } + elif openai_message_dict["role"] == "tool": + google_ai_message_dict = { + "role": "function", # NOTE: diff + "parts": [{"text": openai_message_dict["content"]}], + } + else: + raise ValueError(f"Unsupported conversion (OpenAI -> Google AI) from role {openai_message_dict['role']}") + + +def google_ai_chat_completions_request( + service_endpoint: str, model: str, api_key: str, data: ChatCompletionRequest +) -> ChatCompletionResponse: + """https://ai.google.dev/docs/function_calling + + From https://ai.google.dev/api/rest#service-endpoint: + "A service endpoint is a base URL that specifies the network address of an API service. + One service might have multiple service endpoints. + This service has the following service endpoint and all URIs below are relative to this service endpoint: + https://xxx.googleapis.com + """ + from memgpt.utils import printd + + assert service_endpoint is not None, "Missing service_endpoint when calling Google AI" + assert api_key is not None, "Missing api_key when calling Google AI" + assert model in SUPPORTED_MODELS, f"Model '{model}' not in supported models: {', '.join(SUPPORTED_MODELS)}" + + url = f"https://{service_endpoint}.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}" + headers = {"Content-Type": "application/json"} + + payload = { + "contents": data["messages"], + } + + # If functions == None, strip from the payload + if "functions" in data and data["functions"] is None: + data.pop("functions") + data.pop("function_call", None) # extra safe, should exist always (default="auto") + + if "tools" in data and data["tools"] is None: + data.pop("tools") + data.pop("tool_choice", None) # extra safe, should exist always (default="auto") + + printd(f"Sending request to {url}") + try: + response = requests.post(url, headers=headers, json=data) + printd(f"response = {response}") + response.raise_for_status() # Raises HTTPError for 4XX/5XX status + response = response.json() # convert to dict from string + printd(f"response.json = {response}") + # NOTE: azure openai does not include "content" in the response when it is None, so we need to add it + if "content" not in response["choices"][0].get("message"): + response["choices"][0]["message"]["content"] = None + response = ChatCompletionResponse(**response) # convert to 'dot-dict' style which is the openai python client default + return response + except requests.exceptions.HTTPError as http_err: + # Handle HTTP errors (e.g., response 4XX, 5XX) + printd(f"Got HTTPError, exception={http_err}, payload={data}") + raise http_err + except requests.exceptions.RequestException as req_err: + # Handle other requests-related errors (e.g., connection error) + printd(f"Got RequestException, exception={req_err}") + raise req_err + except Exception as e: + # Handle other potential errors + printd(f"Got unknown Exception, exception={e}") + raise e diff --git a/memgpt/llm_api/llm_api_tools.py b/memgpt/llm_api/llm_api_tools.py index 7588f03d82..65031ff9a4 100644 --- a/memgpt/llm_api/llm_api_tools.py +++ b/memgpt/llm_api/llm_api_tools.py @@ -14,6 +14,7 @@ from memgpt.llm_api.openai import openai_chat_completions_request from memgpt.llm_api.azure_openai import azure_openai_chat_completions_request, MODEL_TO_AZURE_ENGINE +from memgpt.llm_api.google_ai import google_ai_chat_completions_request, convert_tools_to_google_ai_format, annotate_messages_with_tools def is_context_overflow_error(exception: requests.exceptions.RequestException) -> bool: @@ -198,6 +199,21 @@ def create( data=data, ) + elif agent_state.llm_config.model_endpoint_type == "google_ai": + if not use_tool_naming: + raise NotImplementedError("Only tool calling supported on Google AI API requests") + + return google_ai_chat_completions_request( + service_endpoint="test", + model="gemini", + api_key="based", + # see structure of payload here: https://ai.google.dev/docs/function_calling + data=dict( + # contents=[m.to_google_ai_dict() for m in messages], + # tools= + ), + ) + # local model else: return get_chat_completion( From a1e60b8360a834a2e0080dcdeff93bbb7c254a93 Mon Sep 17 00:00:00 2001 From: cpacker Date: Mon, 8 Apr 2024 21:41:47 -0700 Subject: [PATCH 06/12] request working, response needs debug, need decide on mechanism for passing internal mono --- memgpt/data_types.py | 41 +++++++++-- memgpt/llm_api/google_ai.py | 120 +++++++++++++++++++++++++++----- memgpt/llm_api/llm_api_tools.py | 25 +++++-- 3 files changed, 156 insertions(+), 30 deletions(-) diff --git a/memgpt/data_types.py b/memgpt/data_types.py index 25b22b2186..dbf7901045 100644 --- a/memgpt/data_types.py +++ b/memgpt/data_types.py @@ -1,6 +1,7 @@ """ This module contains the data types used by MemGPT. Each data type must include a function to create a DB model. """ import uuid +import json from datetime import datetime, timezone from typing import Optional, List, Dict, TypeVar import numpy as np @@ -299,7 +300,7 @@ def to_google_ai_dict(self) -> dict: parts[]: Part role: str ('user' or 'model') """ - if self.name is not None: + if self.role != "tool" and self.name is not None: raise UserWarning(f"Using Google AI with non-null 'name' field ({self.name}) not yet supported.") if self.role == "system": @@ -326,30 +327,56 @@ def to_google_ai_dict(self) -> dict: # NOTE: Google AI API doesn't allow non-null content + function call # To get around this, just two a two part message, inner thoughts first then parts = [] - if self.text is not None: - parts.append({"text": self.text}) + # if self.text is not None: + # parts.append({"text": self.text}) if self.tool_calls is not None: # NOTE: implied support for multiple calls for tool_call in self.tool_calls: + function_name = tool_call.function["name"] + function_args = tool_call.function["arguments"] + try: + # NOTE: Google AI wants actual JSON objects, not strings + function_args = json.loads(function_args) + except: + raise UserWarning(f"Failed to parse JSON function args: {function_args}") + function_args = {"args": function_args} + parts.append( { "functionCall": { - "name": tool_call.function["name"], - "args": tool_call.function["arguments"], + "name": function_name, + "args": function_args, } } ) + google_ai_message["parts"] = parts elif self.role == "tool": # NOTE: Significantly different tool calling format, more similar to function calling format assert all([v is not None for v in [self.role, self.tool_call_id]]), vars(self) + + if self.name is None: + raise UserWarning(f"Couldn't find function name on tool call, defaulting to tool ID instead.") + function_name = self.tool_call_id + else: + function_name = self.name + + # NOTE: Google AI API wants the function response as JSON only, no string + try: + function_response = json.loads(self.text) + except: + function_response = {"function_response": self.text} + google_ai_message = { "role": "function", "parts": [ { "functionResponse": { - "name": self.tool_call_id, # TODO override with name or replace with self.tool_call_name - "content": self.text, # NOTE this can be structured JSON + "name": function_name, + "response": { + "name": function_name, # NOTE: name twice... why? + "content": function_response, + }, } } ], diff --git a/memgpt/llm_api/google_ai.py b/memgpt/llm_api/google_ai.py index aa3fb5c22e..907e7f460c 100644 --- a/memgpt/llm_api/google_ai.py +++ b/memgpt/llm_api/google_ai.py @@ -1,10 +1,13 @@ import requests -from typing import Union +from typing import Union, List from memgpt.models.chat_completion_response import ChatCompletionResponse -from memgpt.models.chat_completion_request import ChatCompletionRequest +from memgpt.models.chat_completion_request import ChatCompletionRequest, Tool from memgpt.models.embedding_response import EmbeddingResponse from memgpt.utils import smart_urljoin +from memgpt.constants import NON_USER_MSG_PREFIX + +# from memgpt.data_types import ToolCall SUPPORTED_MODELS = [ @@ -12,6 +15,31 @@ ] +def annotate_messages_with_tool_names(): + return + + +def add_dummy_model_messages(messages: List[dict]) -> List[dict]: + """Google AI API requires all function call returns are immediately followed by a 'model' role message. + + In MemGPT, the 'model' will often call a function (e.g. send_message) that itself yields to the user, + so there is no natural follow-up 'model' role message. + + To satisfy the Google AI API restrictions, we can add a dummy 'yield' message + with role == 'model' that is placed in-betweeen and function output + (role == 'tool') and user message (role == 'user'). + """ + dummy_yield_message = {"role": "model", "parts": [{"text": f"{NON_USER_MSG_PREFIX}Function call returned, waiting for user response."}]} + messages_with_padding = [] + for i, message in enumerate(messages): + messages_with_padding.append(message) + # Check if the current message role is 'tool' and the next message role is 'user' + if message["role"] in ["tool", "function"] and (i + 1 < len(messages) and messages[i + 1]["role"] == "user"): + messages_with_padding.append(dummy_yield_message) + + return messages_with_padding + + # TODO use pydantic model as input def convert_openai_tool_call_to_google_ai(tool_call: dict) -> dict: """ @@ -76,8 +104,64 @@ def to_google_ai(openai_message_dict: dict) -> dict: raise ValueError(f"Unsupported conversion (OpenAI -> Google AI) from role {openai_message_dict['role']}") +# TODO convert return type to pydantic +def convert_tools_to_google_ai_format(tools: List[Tool]) -> List[dict]: + """ + OpenAI style: + "tools": [{ + "type": "function", + "function": { + "name": "find_movies", + "description": "find ....", + "parameters": { + ... + } + } + } + ] + + Google AI style: + "tools": [{ + "functionDeclarations": [{ + "name": "find_movies", + "description": "find movie titles currently playing in theaters based on any description, genre, title words, etc.", + "parameters": { + "type": "OBJECT", + "properties": { + "location": { + "type": "STRING", + "description": "The city and state, e.g. San Francisco, CA or a zip code e.g. 95616" + }, + "description": { + "type": "STRING", + "description": "Any kind of description including category or genre, title words, attributes, etc." + } + }, + "required": ["description"] + } + }, { + "name": "find_theaters", + ... + """ + function_list = [ + dict( + name=t.function.name, + description=t.function.description, + parameters=t.function.parameters, # TODO need to unpack + ) + for t in tools + ] + return [{"functionDeclarations": function_list}] + + +# TODO convert 'data' type to pydantic def google_ai_chat_completions_request( - service_endpoint: str, model: str, api_key: str, data: ChatCompletionRequest + service_endpoint: str, + model: str, + api_key: str, + data: dict, + key_in_header: bool = True, + add_postfunc_model_messages: bool = True, ) -> ChatCompletionResponse: """https://ai.google.dev/docs/function_calling @@ -93,21 +177,21 @@ def google_ai_chat_completions_request( assert api_key is not None, "Missing api_key when calling Google AI" assert model in SUPPORTED_MODELS, f"Model '{model}' not in supported models: {', '.join(SUPPORTED_MODELS)}" - url = f"https://{service_endpoint}.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}" - headers = {"Content-Type": "application/json"} - - payload = { - "contents": data["messages"], - } + # Two ways to pass the key: https://ai.google.dev/tutorials/setup + if key_in_header: + url = f"https://{service_endpoint}.googleapis.com/v1beta/models/{model}:generateContent" + headers = {"Content-Type": "application/json", "x-goog-api-key": api_key} + else: + url = f"https://{service_endpoint}.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}" + headers = {"Content-Type": "application/json"} - # If functions == None, strip from the payload - if "functions" in data and data["functions"] is None: - data.pop("functions") - data.pop("function_call", None) # extra safe, should exist always (default="auto") + # data["contents"][-1]["role"] = "model" + if add_postfunc_model_messages: + data["contents"] = add_dummy_model_messages(data["contents"]) - if "tools" in data and data["tools"] is None: - data.pop("tools") - data.pop("tool_choice", None) # extra safe, should exist always (default="auto") + print(f"messages in 'contents'") + for m in data["contents"]: + print(m) printd(f"Sending request to {url}") try: @@ -124,6 +208,10 @@ def google_ai_chat_completions_request( except requests.exceptions.HTTPError as http_err: # Handle HTTP errors (e.g., response 4XX, 5XX) printd(f"Got HTTPError, exception={http_err}, payload={data}") + # Print the HTTP status code + print(f"HTTP Error: {http_err.response.status_code}") + # Print the response content (error message from server) + print(f"Message: {http_err.response.text}") raise http_err except requests.exceptions.RequestException as req_err: # Handle other requests-related errors (e.g., connection error) diff --git a/memgpt/llm_api/llm_api_tools.py b/memgpt/llm_api/llm_api_tools.py index 65031ff9a4..917e89c42d 100644 --- a/memgpt/llm_api/llm_api_tools.py +++ b/memgpt/llm_api/llm_api_tools.py @@ -1,6 +1,7 @@ import random import time import requests +import os import time from typing import List @@ -8,13 +9,17 @@ from memgpt.local_llm.chat_completion_proxy import get_chat_completion from memgpt.constants import CLI_WARNING_PREFIX from memgpt.models.chat_completion_response import ChatCompletionResponse -from memgpt.models.chat_completion_request import ChatCompletionRequest, cast_message_to_subtype +from memgpt.models.chat_completion_request import ChatCompletionRequest, Tool, cast_message_to_subtype from memgpt.data_types import AgentState, Message from memgpt.llm_api.openai import openai_chat_completions_request from memgpt.llm_api.azure_openai import azure_openai_chat_completions_request, MODEL_TO_AZURE_ENGINE -from memgpt.llm_api.google_ai import google_ai_chat_completions_request, convert_tools_to_google_ai_format, annotate_messages_with_tools +from memgpt.llm_api.google_ai import ( + google_ai_chat_completions_request, + convert_tools_to_google_ai_format, + annotate_messages_with_tool_names, +) def is_context_overflow_error(exception: requests.exceptions.RequestException) -> bool: @@ -138,6 +143,9 @@ def create( printd("unsetting function_call because functions is None") function_call = None + # TODO remove + agent_state.llm_config.model_endpoint_type = "google_ai" + # openai if agent_state.llm_config.model_endpoint_type == "openai": # TODO do the same for Azure? @@ -203,14 +211,17 @@ def create( if not use_tool_naming: raise NotImplementedError("Only tool calling supported on Google AI API requests") + tools = [{"type": "function", "function": f} for f in functions] if functions else None + tools = [Tool(**t) for t in tools] + return google_ai_chat_completions_request( - service_endpoint="test", - model="gemini", - api_key="based", + service_endpoint=os.getenv("GAI_SERVICE_ENDPOINT"), + model="gemini-pro", + api_key=os.getenv("GAI_API_KEY"), # see structure of payload here: https://ai.google.dev/docs/function_calling data=dict( - # contents=[m.to_google_ai_dict() for m in messages], - # tools= + contents=[m.to_google_ai_dict() for m in messages], + # tools=convert_tools_to_google_ai_format(tools), ), ) From aa1ebc4a034b2e4ad444bbaf4c98b8efdcfdc679 Mon Sep 17 00:00:00 2001 From: cpacker Date: Tue, 9 Apr 2024 14:56:01 -0700 Subject: [PATCH 07/12] working gemini w/o credentials --- memgpt/data_types.py | 15 +- memgpt/llm_api/google_ai.py | 222 +++++++++++++++++- memgpt/llm_api/llm_api_tools.py | 7 +- memgpt/local_llm/constants.py | 3 + .../llm_chat_completion_wrappers/chatml.py | 12 +- 5 files changed, 238 insertions(+), 21 deletions(-) diff --git a/memgpt/data_types.py b/memgpt/data_types.py index dbf7901045..5b2e4c3481 100644 --- a/memgpt/data_types.py +++ b/memgpt/data_types.py @@ -19,6 +19,7 @@ from memgpt.utils import get_utc_time, create_uuid_from_string from memgpt.models import chat_completion_response from memgpt.utils import get_human_text, get_persona_text, printd, is_utc_datetime +from memgpt.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION class Record: @@ -293,7 +294,7 @@ def to_openai_dict(self, max_tool_id_length=TOOL_CALL_ID_MAX_LEN) -> dict: return openai_message - def to_google_ai_dict(self) -> dict: + def to_google_ai_dict(self, put_inner_thoughts_in_kwargs: bool = True) -> dict: """Go from Message class to Google AI REST message object type Content: https://ai.google.dev/api/rest/v1/Content / https://ai.google.dev/api/rest/v1beta/Content @@ -327,8 +328,11 @@ def to_google_ai_dict(self) -> dict: # NOTE: Google AI API doesn't allow non-null content + function call # To get around this, just two a two part message, inner thoughts first then parts = [] - # if self.text is not None: - # parts.append({"text": self.text}) + if not put_inner_thoughts_in_kwargs and self.text is not None: + # NOTE: ideally we do multi-part for CoT / inner thoughts + function call, but Google AI API doesn't allow it + raise NotImplementedError + parts.append({"text": self.text}) + if self.tool_calls is not None: # NOTE: implied support for multiple calls for tool_call in self.tool_calls: @@ -341,6 +345,11 @@ def to_google_ai_dict(self) -> dict: raise UserWarning(f"Failed to parse JSON function args: {function_args}") function_args = {"args": function_args} + if put_inner_thoughts_in_kwargs and self.text is not None: + assert "inner_thoughts" not in function_args, function_args + assert len(self.tool_calls) == 1 + function_args[INNER_THOUGHTS_KWARG] = self.text + parts.append( { "functionCall": { diff --git a/memgpt/llm_api/google_ai.py b/memgpt/llm_api/google_ai.py index 907e7f460c..0794b46217 100644 --- a/memgpt/llm_api/google_ai.py +++ b/memgpt/llm_api/google_ai.py @@ -1,11 +1,14 @@ import requests -from typing import Union, List +import json +import uuid +from typing import Union, List, Optional -from memgpt.models.chat_completion_response import ChatCompletionResponse +from memgpt.models.chat_completion_response import ChatCompletionResponse, Choice, Message, ToolCall, FunctionCall, UsageStatistics from memgpt.models.chat_completion_request import ChatCompletionRequest, Tool from memgpt.models.embedding_response import EmbeddingResponse -from memgpt.utils import smart_urljoin -from memgpt.constants import NON_USER_MSG_PREFIX +from memgpt.utils import smart_urljoin, get_tool_call_id, get_utc_time +from memgpt.local_llm.utils import count_tokens +from memgpt.constants import NON_USER_MSG_PREFIX, JSON_ENSURE_ASCII # from memgpt.data_types import ToolCall @@ -41,7 +44,7 @@ def add_dummy_model_messages(messages: List[dict]) -> List[dict]: # TODO use pydantic model as input -def convert_openai_tool_call_to_google_ai(tool_call: dict) -> dict: +def convert_openai_tool_call_to_google_ai(tool_call: dict, inner_thoughts_in_kwargs: Optional[bool] = True) -> dict: """ OpenAI format: { @@ -105,7 +108,7 @@ def to_google_ai(openai_message_dict: dict) -> dict: # TODO convert return type to pydantic -def convert_tools_to_google_ai_format(tools: List[Tool]) -> List[dict]: +def convert_tools_to_google_ai_format(tools: List[Tool], inner_thoughts_in_kwargs: Optional[bool] = True) -> List[dict]: """ OpenAI style: "tools": [{ @@ -114,7 +117,15 @@ def convert_tools_to_google_ai_format(tools: List[Tool]) -> List[dict]: "name": "find_movies", "description": "find ....", "parameters": { - ... + "type": "object", + "properties": { + PARAM: { + "type": PARAM_TYPE, # eg "string" + "description": PARAM_DESCRIPTION, + }, + ... + }, + "required": List[str], } } } @@ -151,9 +162,187 @@ def convert_tools_to_google_ai_format(tools: List[Tool]) -> List[dict]: ) for t in tools ] + + # Correct casing + add inner thoughts if needed + print("YYY", function_list) + for func in function_list: + func["parameters"]["type"] = "OBJECT" + print("zzz", func["parameters"]["properties"]) + for param_name, param_fields in func["parameters"]["properties"].items(): + # print("XXX", param) + param_fields["type"] = param_fields["type"].upper() + # Add inner thoughts + if inner_thoughts_in_kwargs: + from memgpt.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION + + func["parameters"]["properties"][INNER_THOUGHTS_KWARG] = { + "type": "STRING", + "description": INNER_THOUGHTS_KWARG_DESCRIPTION, + } + func["parameters"]["required"].append(INNER_THOUGHTS_KWARG) + return [{"functionDeclarations": function_list}] +def convert_google_ai_response_to_chatcompletion( + response_json: dict, # REST response from Google AI API + model: str, # Required since not returned + input_messages: Optional[List[dict]] = None, # Required if the API doesn't return UsageMetadata + pull_inner_thoughts_from_args: Optional[bool] = True, +) -> ChatCompletionResponse: + """Google AI API response format is not the same as ChatCompletion, requires unpacking + + Example: + { + "candidates": [ + { + "content": { + "parts": [ + { + "text": " OK. Barbie is showing in two theaters in Mountain View, CA: AMC Mountain View 16 and Regal Edwards 14." + } + ] + } + } + ], + "usageMetadata": { + "promptTokenCount": 9, + "candidatesTokenCount": 27, + "totalTokenCount": 36 + } + } + """ + try: + choices = [] + for candidate in response_json["candidates"]: + content = candidate["content"] + + role = content["role"] + assert role == "model", f"Unknown role in response: {role}" + + parts = content["parts"] + # TODO support parts / multimodal + assert len(parts) == 1, f"Multi-part not yet supported:\n{parts}" + response_message = parts[0] + + # Convert the actual message style to OpenAI style + if "functionCall" in response_message and response_message["functionCall"] is not None: + function_call = response_message["functionCall"] + assert isinstance(function_call, dict), function_call + function_name = function_call["name"] + assert isinstance(function_name, str), function_name + function_args = function_call["args"] + assert isinstance(function_args, dict), function_args + + # NOTE: this also involves stripping the inner monologue out of the function + if pull_inner_thoughts_from_args: + from memgpt.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION + + assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}" + inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG) + assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}" + else: + inner_thoughts = None + + # Google AI API doesn't generate tool call IDs + openai_response_message = Message( + role="assistant", # NOTE: "model" -> "assistant" + content=inner_thoughts, + tool_calls=[ + ToolCall( + id=get_tool_call_id(), + type="function", + function=FunctionCall( + name=function_name, + arguments=json.dumps(function_args), + ), + ) + ], + ) + + else: + + # Inner thoughts are the content by default + inner_thoughts = response_message["text"] + + # Google AI API doesn't generate tool call IDs + openai_response_message = Message( + role="assistant", # NOTE: "model" -> "assistant" + content=inner_thoughts, + ) + + # Google AI API uses different finish reason strings than OpenAI + # OpenAI: 'stop', 'length', 'function_call', 'content_filter', null + # see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api + # Google AI API: FINISH_REASON_UNSPECIFIED, STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER + # see: https://ai.google.dev/api/python/google/ai/generativelanguage/Candidate/FinishReason + finish_reason = candidate["finishReason"] + if finish_reason == "STOP": + openai_finish_reason = ( + "function_call" + if openai_response_message.tool_calls is not None and len(openai_response_message.tool_calls) > 0 + else "stop" + ) + elif finish_reason == "MAX_TOKENS": + openai_finish_reason = "length" + elif finish_reason == "SAFETY": + openai_finish_reason = "content_filter" + elif finish_reason == "RECITATION": + openai_finish_reason = "content_filter" + else: + raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}") + + choices.append( + Choice( + finish_reason=openai_finish_reason, + index=candidate["index"], + message=openai_response_message, + ) + ) + + if len(choices) > 1: + raise UserWarning(f"Unexpected number of candidates in response (expected 1, got {len(choices)})") + + # NOTE: some of the Google AI APIs show UsageMetadata in the response, but it seems to not exist? + # "usageMetadata": { + # "promptTokenCount": 9, + # "candidatesTokenCount": 27, + # "totalTokenCount": 36 + # } + if "usageMetadata" in response_json: + usage = UsageStatistics( + prompt_tokens=response_json["usageMetadata"]["promptTokenCount"], + completion_tokens=response_json["usageMetadata"]["candidatesTokenCount"], + total_tokens=response_json["usageMetadata"]["totalTokenCount"], + ) + else: + # Count it ourselves + assert input_messages is not None, f"Didn't get UsageMetadata from the API response, so input_messages is required" + prompt_tokens = count_tokens( + json.dumps(input_messages, ensure_ascii=JSON_ENSURE_ASCII) + ) # NOTE: this is a very rough approximation + completion_tokens = count_tokens( + json.dumps(openai_response_message.model_dump(), ensure_ascii=JSON_ENSURE_ASCII) + ) # NOTE: this is also approximate + total_tokens = prompt_tokens + completion_tokens + usage = UsageStatistics( + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=total_tokens, + ) + + response_id = str(uuid.uuid4()) + return ChatCompletionResponse( + id=response_id, + choices=choices, + model=model, # NOTE: Google API doesn't pass back model in the response + created=get_utc_time(), + usage=usage, + ) + except KeyError as e: + raise e + + # TODO convert 'data' type to pydantic def google_ai_chat_completions_request( service_endpoint: str, @@ -162,6 +351,9 @@ def google_ai_chat_completions_request( data: dict, key_in_header: bool = True, add_postfunc_model_messages: bool = True, + # NOTE: Google AI API doesn't support mixing parts 'text' and 'function', + # so there's no clean way to put inner thoughts in the same message as a function call + inner_thoughts_in_kwargs: bool = True, ) -> ChatCompletionResponse: """https://ai.google.dev/docs/function_calling @@ -200,11 +392,15 @@ def google_ai_chat_completions_request( response.raise_for_status() # Raises HTTPError for 4XX/5XX status response = response.json() # convert to dict from string printd(f"response.json = {response}") - # NOTE: azure openai does not include "content" in the response when it is None, so we need to add it - if "content" not in response["choices"][0].get("message"): - response["choices"][0]["message"]["content"] = None - response = ChatCompletionResponse(**response) # convert to 'dot-dict' style which is the openai python client default - return response + + # Convert Google AI response to ChatCompletion style + return convert_google_ai_response_to_chatcompletion( + response_json=response, + model=model, + input_messages=data["contents"], + pull_inner_thoughts_from_args=inner_thoughts_in_kwargs, + ) + except requests.exceptions.HTTPError as http_err: # Handle HTTP errors (e.g., response 4XX, 5XX) printd(f"Got HTTPError, exception={http_err}, payload={data}") @@ -213,10 +409,12 @@ def google_ai_chat_completions_request( # Print the response content (error message from server) print(f"Message: {http_err.response.text}") raise http_err + except requests.exceptions.RequestException as req_err: # Handle other requests-related errors (e.g., connection error) printd(f"Got RequestException, exception={req_err}") raise req_err + except Exception as e: # Handle other potential errors printd(f"Got unknown Exception, exception={e}") diff --git a/memgpt/llm_api/llm_api_tools.py b/memgpt/llm_api/llm_api_tools.py index 917e89c42d..e357f570ce 100644 --- a/memgpt/llm_api/llm_api_tools.py +++ b/memgpt/llm_api/llm_api_tools.py @@ -211,17 +211,22 @@ def create( if not use_tool_naming: raise NotImplementedError("Only tool calling supported on Google AI API requests") + # NOTE: until Google AI supports CoT / text alongside function calls, + # we need to put it in a kwarg (unless we want to split the message into two) + google_ai_inner_thoughts_in_kwarg = True + tools = [{"type": "function", "function": f} for f in functions] if functions else None tools = [Tool(**t) for t in tools] return google_ai_chat_completions_request( + inner_thoughts_in_kwargs=google_ai_inner_thoughts_in_kwarg, service_endpoint=os.getenv("GAI_SERVICE_ENDPOINT"), model="gemini-pro", api_key=os.getenv("GAI_API_KEY"), # see structure of payload here: https://ai.google.dev/docs/function_calling data=dict( contents=[m.to_google_ai_dict() for m in messages], - # tools=convert_tools_to_google_ai_format(tools), + tools=convert_tools_to_google_ai_format(tools, inner_thoughts_in_kwargs=google_ai_inner_thoughts_in_kwarg), ), ) diff --git a/memgpt/local_llm/constants.py b/memgpt/local_llm/constants.py index a3734f165c..d4d4f81f35 100644 --- a/memgpt/local_llm/constants.py +++ b/memgpt/local_llm/constants.py @@ -23,3 +23,6 @@ DEFAULT_WRAPPER = ChatMLInnerMonologueWrapper DEFAULT_WRAPPER_NAME = "chatml" + +INNER_THOUGHTS_KWARG = "inner_thoughts" +INNER_THOUGHTS_KWARG_DESCRIPTION = "Deep inner monologue private to you only." diff --git a/memgpt/local_llm/llm_chat_completion_wrappers/chatml.py b/memgpt/local_llm/llm_chat_completion_wrappers/chatml.py index b7526628e7..b824bf7629 100644 --- a/memgpt/local_llm/llm_chat_completion_wrappers/chatml.py +++ b/memgpt/local_llm/llm_chat_completion_wrappers/chatml.py @@ -1,9 +1,9 @@ import json -from .wrapper_base import LLMChatCompletionWrapper -from ..json_parser import clean_json -from ...constants import JSON_ENSURE_ASCII, JSON_LOADS_STRICT -from ...errors import LLMJSONParsingError +from memgpt.local_llm.llm_chat_completion_wrappers.wrapper_base import LLMChatCompletionWrapper +from memgpt.local_llm.json_parser import clean_json +from memgpt.constants import JSON_ENSURE_ASCII, JSON_LOADS_STRICT +from memgpt.errors import LLMJSONParsingError PREFIX_HINT = """# Reminders: @@ -75,7 +75,9 @@ def _compile_function_description(self, schema, add_inner_thoughts=True) -> str: func_str += f"\n description: {schema['description']}" func_str += f"\n params:" if add_inner_thoughts: - func_str += f"\n inner_thoughts: Deep inner monologue private to you only." + from memgpt.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION + + func_str += f"\n {INNER_THOUGHTS_KWARG}: {INNER_THOUGHTS_KWARG_DESCRIPTION}" for param_k, param_v in schema["parameters"]["properties"].items(): # TODO we're ignoring type func_str += f"\n {param_k}: {param_v['description']}" From c542c3f7b640defad56070fcae5dc2a8c546f099 Mon Sep 17 00:00:00 2001 From: cpacker Date: Tue, 9 Apr 2024 14:57:48 -0700 Subject: [PATCH 08/12] drop prints --- memgpt/llm_api/google_ai.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/memgpt/llm_api/google_ai.py b/memgpt/llm_api/google_ai.py index 0794b46217..bd06a30a3c 100644 --- a/memgpt/llm_api/google_ai.py +++ b/memgpt/llm_api/google_ai.py @@ -164,12 +164,9 @@ def convert_tools_to_google_ai_format(tools: List[Tool], inner_thoughts_in_kwarg ] # Correct casing + add inner thoughts if needed - print("YYY", function_list) for func in function_list: func["parameters"]["type"] = "OBJECT" - print("zzz", func["parameters"]["properties"]) for param_name, param_fields in func["parameters"]["properties"].items(): - # print("XXX", param) param_fields["type"] = param_fields["type"].upper() # Add inner thoughts if inner_thoughts_in_kwargs: @@ -381,10 +378,6 @@ def google_ai_chat_completions_request( if add_postfunc_model_messages: data["contents"] = add_dummy_model_messages(data["contents"]) - print(f"messages in 'contents'") - for m in data["contents"]: - print(m) - printd(f"Sending request to {url}") try: response = requests.post(url, headers=headers, json=data) From 0465aaae251ec88984878b3a2e62a820c1fa79d0 Mon Sep 17 00:00:00 2001 From: cpacker Date: Tue, 9 Apr 2024 16:03:58 -0700 Subject: [PATCH 09/12] working credentials + config --- memgpt/cli/cli_config.py | 136 +++++++++++++++++++++++++++++--- memgpt/credentials.py | 13 ++- memgpt/llm_api/google_ai.py | 132 +++++++++++++++++++++---------- memgpt/llm_api/llm_api_tools.py | 7 +- 4 files changed, 230 insertions(+), 58 deletions(-) diff --git a/memgpt/cli/cli_config.py b/memgpt/cli/cli_config.py index 482bd65716..b5f2b366c3 100644 --- a/memgpt/cli/cli_config.py +++ b/memgpt/cli/cli_config.py @@ -20,6 +20,7 @@ from memgpt.data_types import User, LLMConfig, EmbeddingConfig from memgpt.llm_api.openai import openai_get_model_list from memgpt.llm_api.azure_openai import azure_openai_get_model_list +from memgpt.llm_api.google_ai import google_ai_get_model_list, google_ai_get_model_context_window from memgpt.local_llm.constants import DEFAULT_ENDPOINTS, DEFAULT_OLLAMA_MODEL, DEFAULT_WRAPPER_NAME from memgpt.local_llm.utils import get_available_wrappers from memgpt.server.utils import shorten_key_middle @@ -46,11 +47,16 @@ def get_azure_credentials(): return creds -def get_openai_credentials(): - openai_key = os.getenv("OPENAI_API_KEY") +def get_openai_credentials() -> Optional[str]: + openai_key = os.getenv("OPENAI_API_KEY", None) return openai_key +def get_google_ai_credentials() -> Optional[str]: + google_ai_key = os.getenv("GOOGLE_AI_API_KEY", None) + return google_ai_key + + def configure_llm_endpoint(config: MemGPTConfig, credentials: MemGPTCredentials): # configure model endpoint model_endpoint_type, model_endpoint = None, None @@ -60,11 +66,12 @@ def configure_llm_endpoint(config: MemGPTConfig, credentials: MemGPTCredentials) if config.default_llm_config.model_endpoint_type is not None and config.default_llm_config.model_endpoint_type not in [ "openai", "azure", + "google_ai", ]: # local model default_model_endpoint_type = "local" provider = questionary.select( - "Select LLM inference provider:", choices=["openai", "azure", "local"], default=default_model_endpoint_type + "Select LLM inference provider:", choices=["openai", "azure", "google_ai", "local"], default=default_model_endpoint_type ).ask() if provider is None: raise KeyboardInterrupt @@ -132,6 +139,51 @@ def configure_llm_endpoint(config: MemGPTConfig, credentials: MemGPTCredentials) model_endpoint_type = "azure" model_endpoint = azure_creds["azure_endpoint"] + elif provider == "google_ai": + + # check for key + if credentials.google_ai_key is None: + # allow key to get pulled from env vars + google_ai_key = get_google_ai_credentials() + # if we still can't find it, ask for it as input + if google_ai_key is None: + while google_ai_key is None or len(google_ai_key) == 0: + # Ask for API key as input + google_ai_key = questionary.password( + "Enter your Google AI (Gemini) API key (see https://aistudio.google.com/app/apikey):" + ).ask() + if google_ai_key is None: + raise KeyboardInterrupt + credentials.google_ai_key = google_ai_key + else: + # Give the user an opportunity to overwrite the key + google_ai_key = None + default_input = shorten_key_middle(credentials.google_ai_key) + + google_ai_key = questionary.password( + "Enter your Google AI (Gemini) API key (see https://aistudio.google.com/app/apikey):", + default=default_input, + ).ask() + if google_ai_key is None: + raise KeyboardInterrupt + # If the user modified it, use the new one + if google_ai_key != default_input: + credentials.google_ai_key = google_ai_key + + default_input = os.getenv("GOOGLE_AI_SERVICE_ENDPOINT", None) + if default_input is None: + default_input = "generativelanguage" + google_ai_service_endpoint = questionary.text( + "Enter your Google AI (Gemini) service endpoint (see https://ai.google.dev/api/rest):", + default=default_input, + ).ask() + credentials.google_ai_service_endpoint = google_ai_service_endpoint + + # write out the credentials + credentials.save() + + model_endpoint_type = "google_ai" + else: # local models # backend_options_old = ["webui", "webui-legacy", "llamacpp", "koboldcpp", "ollama", "lmstudio", "lmstudio-legacy", "vllm", "openai"] backend_options = builtins.list(DEFAULT_ENDPOINTS.keys()) @@ -224,6 +276,21 @@ def get_model_options( else: model_options = [obj["id"] for obj in fetched_model_options_response["data"]] + elif model_endpoint_type == "google_ai": + if credentials.google_ai_key is None: + raise ValueError("Missing Google AI API key") + if credentials.google_ai_service_endpoint is None: + raise ValueError("Missing Google AI service endpoint") + model_options = google_ai_get_model_list( + service_endpoint=credentials.google_ai_service_endpoint, api_key=credentials.google_ai_key + ) + model_options = [str(m["name"]) for m in model_options] + model_options = [mo[len("models/") :] if mo.startswith("models/") else mo for mo in model_options] + + # TODO remove manual filtering for gemini-pro + model_options = [mo for mo in model_options if str(mo).startswith("gemini") and "-pro" in str(mo)] + # model_options = ["gemini-pro"] + else: # Attempt to do OpenAI endpoint style model fetching # TODO support local auth with api-key header @@ -295,6 +362,26 @@ def configure_model(config: MemGPTConfig, credentials: MemGPTCredentials, model_ if model is None: raise KeyboardInterrupt + elif model_endpoint_type == "google_ai": + try: + fetched_model_options = get_model_options( + credentials=credentials, model_endpoint_type=model_endpoint_type, model_endpoint=model_endpoint + ) + except Exception as e: + # NOTE: if this fails, it means the user's key is probably bad + typer.secho( + f"Failed to get model list from {model_endpoint} - make sure your API key and endpoints are correct!", fg=typer.colors.RED + ) + raise e + + model = questionary.select( + "Select default model:", + choices=fetched_model_options, + default=fetched_model_options[0], + ).ask() + if model is None: + raise KeyboardInterrupt + else: # local models # ask about local auth @@ -413,7 +500,7 @@ def configure_model(config: MemGPTConfig, credentials: MemGPTCredentials, model_ # set: context_window if str(model) not in LLM_MAX_TOKENS: - # Ask the user to specify the context length + context_length_options = [ str(2**12), # 4096 str(2**13), # 8192 @@ -422,13 +509,40 @@ def configure_model(config: MemGPTConfig, credentials: MemGPTCredentials, model_ str(2**18), # 262144 "custom", # enter yourself ] - context_window_input = questionary.select( - "Select your model's context window (for Mistral 7B models, this is probably 8k / 8192):", - choices=context_length_options, - default=str(LLM_MAX_TOKENS["DEFAULT"]), - ).ask() - if context_window_input is None: - raise KeyboardInterrupt + + if model_endpoint_type == "google_ai": + try: + fetched_context_window = str( + google_ai_get_model_context_window( + service_endpoint=credentials.google_ai_service_endpoint, api_key=credentials.google_ai_key, model=model + ) + ) + print(f"Got context window {fetched_context_window} for model {model} (from Google API)") + context_length_options = [ + fetched_context_window, + "custom", + ] + except: + print(f"Failed to get model details for model '{model}' on Google AI API") + + context_window_input = questionary.select( + "Select your model's context window (see https://cloud.google.com/vertex-ai/generative-ai/docs/learn/model-versioning#gemini-model-versions):", + choices=context_length_options, + default=context_length_options[0], + ).ask() + if context_window_input is None: + raise KeyboardInterrupt + + else: + + # Ask the user to specify the context length + context_window_input = questionary.select( + "Select your model's context window (for Mistral 7B models, this is probably 8k / 8192):", + choices=context_length_options, + default=str(LLM_MAX_TOKENS["DEFAULT"]), + ).ask() + if context_window_input is None: + raise KeyboardInterrupt # If custom, ask for input if context_window_input == "custom": diff --git a/memgpt/credentials.py b/memgpt/credentials.py index 3464752b0f..af998369f7 100644 --- a/memgpt/credentials.py +++ b/memgpt/credentials.py @@ -28,6 +28,10 @@ class MemGPTCredentials: openai_auth_type: str = "bearer_token" openai_key: Optional[str] = None + # gemini config + google_ai_key: Optional[str] = None + google_ai_service_endpoint: Optional[str] = None + # azure config azure_auth_type: str = "api_key" azure_key: Optional[str] = None @@ -70,6 +74,9 @@ def load(cls) -> "MemGPTCredentials": "azure_embedding_version": get_field(config, "azure", "embedding_version"), "azure_embedding_endpoint": get_field(config, "azure", "embedding_endpoint"), "azure_embedding_deployment": get_field(config, "azure", "embedding_deployment"), + # gemini + "google_ai_key": get_field(config, "google_ai", "key"), + "google_ai_service_endpoint": get_field(config, "google_ai", "service_endpoint"), # open llm "openllm_auth_type": get_field(config, "openllm", "auth_type"), "openllm_key": get_field(config, "openllm", "key"), @@ -102,7 +109,11 @@ def save(self): set_field(config, "azure", "embedding_endpoint", self.azure_embedding_endpoint) set_field(config, "azure", "embedding_deployment", self.azure_embedding_deployment) - # openai config + # gemini + set_field(config, "google_ai", "key", self.google_ai_key) + set_field(config, "google_ai", "service_endpoint", self.google_ai_service_endpoint) + + # openllm config set_field(config, "openllm", "auth_type", self.openllm_auth_type) set_field(config, "openllm", "key", self.openllm_key) diff --git a/memgpt/llm_api/google_ai.py b/memgpt/llm_api/google_ai.py index bd06a30a3c..5277a586cc 100644 --- a/memgpt/llm_api/google_ai.py +++ b/memgpt/llm_api/google_ai.py @@ -18,8 +18,96 @@ ] -def annotate_messages_with_tool_names(): - return +def google_ai_get_model_details(service_endpoint: str, api_key: str, model: str, key_in_header: bool = True) -> List[dict]: + from memgpt.utils import printd + + # Two ways to pass the key: https://ai.google.dev/tutorials/setup + if key_in_header: + url = f"https://{service_endpoint}.googleapis.com/v1beta/models/{model}" + headers = {"Content-Type": "application/json", "x-goog-api-key": api_key} + else: + url = f"https://{service_endpoint}.googleapis.com/v1beta/models/{model}?key={api_key}" + headers = {"Content-Type": "application/json"} + + try: + response = requests.get(url, headers=headers) + printd(f"response = {response}") + response.raise_for_status() # Raises HTTPError for 4XX/5XX status + response = response.json() # convert to dict from string + printd(f"response.json = {response}") + + # Grab the models out + return response + + except requests.exceptions.HTTPError as http_err: + # Handle HTTP errors (e.g., response 4XX, 5XX) + printd(f"Got HTTPError, exception={http_err}") + # Print the HTTP status code + print(f"HTTP Error: {http_err.response.status_code}") + # Print the response content (error message from server) + print(f"Message: {http_err.response.text}") + raise http_err + + except requests.exceptions.RequestException as req_err: + # Handle other requests-related errors (e.g., connection error) + printd(f"Got RequestException, exception={req_err}") + raise req_err + + except Exception as e: + # Handle other potential errors + printd(f"Got unknown Exception, exception={e}") + raise e + + +def google_ai_get_model_context_window(service_endpoint: str, api_key: str, model: str, key_in_header: bool = True) -> int: + model_details = google_ai_get_model_details( + service_endpoint=service_endpoint, api_key=api_key, model=model, key_in_header=key_in_header + ) + # TODO should this be: + # return model_details["inputTokenLimit"] + model_details["outputTokenLimit"] + return int(model_details["inputTokenLimit"]) + + +def google_ai_get_model_list(service_endpoint: str, api_key: str, key_in_header: bool = True) -> List[dict]: + from memgpt.utils import printd + + # Two ways to pass the key: https://ai.google.dev/tutorials/setup + if key_in_header: + url = f"https://{service_endpoint}.googleapis.com/v1beta/models" + headers = {"Content-Type": "application/json", "x-goog-api-key": api_key} + else: + url = f"https://{service_endpoint}.googleapis.com/v1beta/models?key={api_key}" + headers = {"Content-Type": "application/json"} + + try: + response = requests.get(url, headers=headers) + printd(f"response = {response}") + response.raise_for_status() # Raises HTTPError for 4XX/5XX status + response = response.json() # convert to dict from string + printd(f"response.json = {response}") + + # Grab the models out + model_list = response["models"] + return model_list + + except requests.exceptions.HTTPError as http_err: + # Handle HTTP errors (e.g., response 4XX, 5XX) + printd(f"Got HTTPError, exception={http_err}") + # Print the HTTP status code + print(f"HTTP Error: {http_err.response.status_code}") + # Print the response content (error message from server) + print(f"Message: {http_err.response.text}") + raise http_err + + except requests.exceptions.RequestException as req_err: + # Handle other requests-related errors (e.g., connection error) + printd(f"Got RequestException, exception={req_err}") + raise req_err + + except Exception as e: + # Handle other potential errors + printd(f"Got unknown Exception, exception={e}") + raise e def add_dummy_model_messages(messages: List[dict]) -> List[dict]: @@ -43,46 +131,6 @@ def add_dummy_model_messages(messages: List[dict]) -> List[dict]: return messages_with_padding -# TODO use pydantic model as input -def convert_openai_tool_call_to_google_ai(tool_call: dict, inner_thoughts_in_kwargs: Optional[bool] = True) -> dict: - """ - OpenAI format: - { - "role": "tool", - "tool_call_id": - "content": - } - - Google AI format: - { - "role": "function", - "parts": [{ - "functionResponse": { - "name": "find_theaters", - "response": { - "name": "find_theaters", - "content": { - "movie": "Barbie", - "theaters": [{ - "name": "AMC Mountain View 16", - "address": "2000 W El Camino Real, Mountain View, CA 94040" - }, { - "name": "Regal Edwards 14", - "address": "245 Castro St, Mountain View, CA 94040" - }] - } - } - }] - } - """ - pass - - -# TODO use pydantic model as input -def convert_openai_assistant_content_to_google_ai(tool_call: dict) -> dict: - pass - - # TODO use pydantic model as input def to_google_ai(openai_message_dict: dict) -> dict: diff --git a/memgpt/llm_api/llm_api_tools.py b/memgpt/llm_api/llm_api_tools.py index e357f570ce..752a944dde 100644 --- a/memgpt/llm_api/llm_api_tools.py +++ b/memgpt/llm_api/llm_api_tools.py @@ -18,7 +18,6 @@ from memgpt.llm_api.google_ai import ( google_ai_chat_completions_request, convert_tools_to_google_ai_format, - annotate_messages_with_tool_names, ) @@ -220,9 +219,9 @@ def create( return google_ai_chat_completions_request( inner_thoughts_in_kwargs=google_ai_inner_thoughts_in_kwarg, - service_endpoint=os.getenv("GAI_SERVICE_ENDPOINT"), - model="gemini-pro", - api_key=os.getenv("GAI_API_KEY"), + service_endpoint=credentials.google_ai_service_endpoint, + model=agent_state.llm_config.model, + api_key=credentials.google_ai_key, # see structure of payload here: https://ai.google.dev/docs/function_calling data=dict( contents=[m.to_google_ai_dict() for m in messages], From 78410654762b72239a21f095b95d04fe1b7931f0 Mon Sep 17 00:00:00 2001 From: cpacker Date: Tue, 9 Apr 2024 16:20:36 -0700 Subject: [PATCH 10/12] remove google_ai default --- memgpt/llm_api/llm_api_tools.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/memgpt/llm_api/llm_api_tools.py b/memgpt/llm_api/llm_api_tools.py index 752a944dde..f7b9a8cad1 100644 --- a/memgpt/llm_api/llm_api_tools.py +++ b/memgpt/llm_api/llm_api_tools.py @@ -142,9 +142,6 @@ def create( printd("unsetting function_call because functions is None") function_call = None - # TODO remove - agent_state.llm_config.model_endpoint_type = "google_ai" - # openai if agent_state.llm_config.model_endpoint_type == "openai": # TODO do the same for Azure? From e63a371c0e414e14641d70999f9ad00352b436df Mon Sep 17 00:00:00 2001 From: cpacker Date: Wed, 10 Apr 2024 17:20:33 -0700 Subject: [PATCH 11/12] fix bug in summarize, add backslash stripping since Gemini API seems to be including a lot of extras --- memgpt/llm_api/google_ai.py | 3 ++- memgpt/llm_api/llm_api_tools.py | 10 +++++++--- memgpt/local_llm/json_parser.py | 12 ++++++++++++ memgpt/main.py | 3 ++- 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/memgpt/llm_api/google_ai.py b/memgpt/llm_api/google_ai.py index 5277a586cc..e0facf6429 100644 --- a/memgpt/llm_api/google_ai.py +++ b/memgpt/llm_api/google_ai.py @@ -8,6 +8,7 @@ from memgpt.models.embedding_response import EmbeddingResponse from memgpt.utils import smart_urljoin, get_tool_call_id, get_utc_time from memgpt.local_llm.utils import count_tokens +from memgpt.local_llm.json_parser import clean_json_string_extra_backslash from memgpt.constants import NON_USER_MSG_PREFIX, JSON_ENSURE_ASCII # from memgpt.data_types import ToolCall @@ -299,7 +300,7 @@ def convert_google_ai_response_to_chatcompletion( type="function", function=FunctionCall( name=function_name, - arguments=json.dumps(function_args), + arguments=clean_json_string_extra_backslash(json.dumps(function_args)), ), ) ], diff --git a/memgpt/llm_api/llm_api_tools.py b/memgpt/llm_api/llm_api_tools.py index f7b9a8cad1..c7824590cd 100644 --- a/memgpt/llm_api/llm_api_tools.py +++ b/memgpt/llm_api/llm_api_tools.py @@ -211,8 +211,12 @@ def create( # we need to put it in a kwarg (unless we want to split the message into two) google_ai_inner_thoughts_in_kwarg = True - tools = [{"type": "function", "function": f} for f in functions] if functions else None - tools = [Tool(**t) for t in tools] + if functions is not None: + tools = [{"type": "function", "function": f} for f in functions] + tools = [Tool(**t) for t in tools] + tools = (convert_tools_to_google_ai_format(tools, inner_thoughts_in_kwargs=google_ai_inner_thoughts_in_kwarg),) + else: + tools = None return google_ai_chat_completions_request( inner_thoughts_in_kwargs=google_ai_inner_thoughts_in_kwarg, @@ -222,7 +226,7 @@ def create( # see structure of payload here: https://ai.google.dev/docs/function_calling data=dict( contents=[m.to_google_ai_dict() for m in messages], - tools=convert_tools_to_google_ai_format(tools, inner_thoughts_in_kwargs=google_ai_inner_thoughts_in_kwarg), + tools=tools, ), ) diff --git a/memgpt/local_llm/json_parser.py b/memgpt/local_llm/json_parser.py index 5ca712fbe6..1f7a7cbc46 100644 --- a/memgpt/local_llm/json_parser.py +++ b/memgpt/local_llm/json_parser.py @@ -5,6 +5,18 @@ from memgpt.errors import LLMJSONParsingError +def clean_json_string_extra_backslash(s): + """Clean extra backslashes out from stringified JSON + + NOTE: Google AI Gemini API likes to include these + """ + # Strip slashes that are used to escape single quotes and other backslashes + # Use json.loads to parse it correctly + while "\\\\" in s: + s = s.replace("\\\\", "\\") + return s + + def replace_escaped_underscores(string: str): """Handles the case of escaped underscores, e.g.: diff --git a/memgpt/main.py b/memgpt/main.py index 5425318570..87c4000433 100644 --- a/memgpt/main.py +++ b/memgpt/main.py @@ -1,6 +1,7 @@ import os import sys import traceback +import requests import json import questionary @@ -262,7 +263,7 @@ def run_agent_loop(memgpt_agent, config: MemGPTConfig, first, ms: MetadataStore, fg=typer.colors.GREEN, bold=True, ) - except errors.LLMError as e: + except (errors.LLMError, requests.exceptions.HTTPError) as e: typer.secho( f"/summarize failed:\n{e}", fg=typer.colors.RED, From 424626788e21327bdac70ab74b18ad5e8d64560f Mon Sep 17 00:00:00 2001 From: cpacker Date: Wed, 10 Apr 2024 18:32:51 -0700 Subject: [PATCH 12/12] patched summarize method to work with gemini by adding an acknowledgement message --- memgpt/constants.py | 2 ++ memgpt/data_types.py | 3 +++ memgpt/memory.py | 12 +++++++----- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/memgpt/constants.py b/memgpt/constants.py index 55e82d30c3..4726b72d0a 100644 --- a/memgpt/constants.py +++ b/memgpt/constants.py @@ -86,6 +86,8 @@ ) # The fraction of tokens we truncate down to MESSAGE_SUMMARY_TRUNC_TOKEN_FRAC = 0.75 +# The ackknowledgement message used in the summarize sequence +MESSAGE_SUMMARY_REQUEST_ACK = "Understood, I will respond with a summary of the message (and only the summary, nothing else) once I receive the conversation history. I'm ready." # Even when summarizing, we want to keep a handful of recent messages # These serve as in-context examples of how to use functions / what user messages look like diff --git a/memgpt/data_types.py b/memgpt/data_types.py index 5b2e4c3481..dba9b7f969 100644 --- a/memgpt/data_types.py +++ b/memgpt/data_types.py @@ -358,6 +358,9 @@ def to_google_ai_dict(self, put_inner_thoughts_in_kwargs: bool = True) -> dict: } } ) + else: + assert self.text is not None + parts.append({"text": self.text}) google_ai_message["parts"] = parts elif self.role == "tool": diff --git a/memgpt/memory.py b/memgpt/memory.py index bd49cb924b..aed8f102f3 100644 --- a/memgpt/memory.py +++ b/memgpt/memory.py @@ -3,7 +3,7 @@ import uuid from typing import Optional, List, Tuple, Union -from memgpt.constants import MESSAGE_SUMMARY_WARNING_FRAC +from memgpt.constants import MESSAGE_SUMMARY_WARNING_FRAC, MESSAGE_SUMMARY_REQUEST_ACK from memgpt.utils import get_local_time, printd, count_tokens, validate_date_format, extract_date_from_timestamp from memgpt.prompts.gpt_summarize import SYSTEM as SUMMARY_PROMPT_SYSTEM from memgpt.llm_api.llm_api_tools import create @@ -110,6 +110,7 @@ def _format_summary_history(message_history: List[Message]): def summarize_messages( agent_state: AgentState, message_sequence_to_summarize: List[Message], + insert_acknowledgement_assistant_message: bool = True, ): """Summarize a message sequence using GPT""" # we need the context_window @@ -128,10 +129,11 @@ def summarize_messages( dummy_user_id = uuid.uuid4() dummy_agent_id = uuid.uuid4() - message_sequence = [ - Message(user_id=dummy_user_id, agent_id=dummy_agent_id, role="system", text=summary_prompt), - Message(user_id=dummy_user_id, agent_id=dummy_agent_id, role="user", text=summary_input), - ] + message_sequence = [] + message_sequence.append(Message(user_id=dummy_user_id, agent_id=dummy_agent_id, role="system", text=summary_prompt)) + if insert_acknowledgement_assistant_message: + message_sequence.append(Message(user_id=dummy_user_id, agent_id=dummy_agent_id, role="assistant", text=MESSAGE_SUMMARY_REQUEST_ACK)) + message_sequence.append(Message(user_id=dummy_user_id, agent_id=dummy_agent_id, role="user", text=summary_input)) response = create( agent_state=agent_state,