diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 8a75a1487ff..755531953f4 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,32 +1,32 @@ -#------------------------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See LICENSE file in the project root for license information. -#------------------------------------------------------------------------------------------------------------- - -FROM mcr.microsoft.com/vscode/devcontainers/python:3.10 - -# -# Update the OS and maybe install packages -# -ENV DEBIAN_FRONTEND=noninteractive - -# add git lhs to apt -RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash - -RUN apt-get update \ - && apt-get upgrade -y \ - && apt-get -y install --no-install-recommends build-essential npm git-lfs \ - && apt-get autoremove -y \ - && apt-get clean -y \ - && arch=$(arch | sed s/aarch64/arm64/ | sed s/x86_64/amd64/) \ - && wget https://github.com/quarto-dev/quarto-cli/releases/download/v1.5.23/quarto-1.5.23-linux-${arch}.deb \ - && dpkg -i quarto-1.5.23-linux-${arch}.deb \ - && rm -rf /var/lib/apt/lists/* quarto-1.5.23-linux-${arch}.deb -ENV DEBIAN_FRONTEND=dialog - -# For docs -RUN npm install --global yarn -RUN pip install --upgrade pip -RUN pip install pydoc-markdown -RUN pip install pyyaml -RUN pip install colored +#------------------------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See LICENSE file in the project root for license information. +#------------------------------------------------------------------------------------------------------------- + +FROM mcr.microsoft.com/vscode/devcontainers/python:3.10 + +# +# Update the OS and maybe install packages +# +ENV DEBIAN_FRONTEND=noninteractive + +# add git lhs to apt +RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash + +RUN apt-get update \ + && apt-get upgrade -y \ + && apt-get -y install --no-install-recommends build-essential npm git-lfs \ + && apt-get autoremove -y \ + && apt-get clean -y \ + && arch=$(arch | sed s/aarch64/arm64/ | sed s/x86_64/amd64/) \ + && wget https://github.com/quarto-dev/quarto-cli/releases/download/v1.5.23/quarto-1.5.23-linux-${arch}.deb \ + && dpkg -i quarto-1.5.23-linux-${arch}.deb \ + && rm -rf /var/lib/apt/lists/* quarto-1.5.23-linux-${arch}.deb +ENV DEBIAN_FRONTEND=dialog + +# For docs +RUN npm install --global yarn +RUN pip install --upgrade pip +RUN pip install pydoc-markdown +RUN pip install pyyaml +RUN pip install colored diff --git a/.devcontainer/studio/Dockerfile b/.devcontainer/studio/Dockerfile index d612cea9dab..4a08aea9872 100644 --- a/.devcontainer/studio/Dockerfile +++ b/.devcontainer/studio/Dockerfile @@ -1,27 +1,27 @@ -#------------------------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See LICENSE file in the project root for license information. -#------------------------------------------------------------------------------------------------------------- - -FROM mcr.microsoft.com/vscode/devcontainers/python:3.10 - -# -# Update the OS and maybe install packages -# -ENV DEBIAN_FRONTEND=noninteractive - -# add git lhs to apt -RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash - -RUN apt-get update \ - && apt-get upgrade -y \ - && apt-get -y install --no-install-recommends build-essential npm git-lfs \ - && apt-get autoremove -y \ - && apt-get clean -y \ - && rm -rf /var/lib/apt/lists/* -ENV DEBIAN_FRONTEND=dialog - -# For docs -RUN npm install --global yarn -RUN pip install --upgrade pip -RUN pip install pydoc-markdown +#------------------------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See LICENSE file in the project root for license information. +#------------------------------------------------------------------------------------------------------------- + +FROM mcr.microsoft.com/vscode/devcontainers/python:3.10 + +# +# Update the OS and maybe install packages +# +ENV DEBIAN_FRONTEND=noninteractive + +# add git lhs to apt +RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash + +RUN apt-get update \ + && apt-get upgrade -y \ + && apt-get -y install --no-install-recommends build-essential npm git-lfs \ + && apt-get autoremove -y \ + && apt-get clean -y \ + && rm -rf /var/lib/apt/lists/* +ENV DEBIAN_FRONTEND=dialog + +# For docs +RUN npm install --global yarn +RUN pip install --upgrade pip +RUN pip install pydoc-markdown diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8f2521cbdb0..c9a4405ac31 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,7 +8,7 @@ ci: repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 hooks: - id: check-added-large-files - id: check-ast @@ -23,21 +23,21 @@ repos: - id: end-of-file-fixer - id: no-commit-to-branch - repo: https://github.com/psf/black - rev: 24.3.0 + rev: 24.4.2 hooks: - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.3.4 + rev: v0.4.8 hooks: - id: ruff types_or: [ python, pyi, jupyter ] args: ["--fix", "--ignore=E402"] exclude: notebook/agentchat_databricks_dbrx.ipynb - repo: https://github.com/codespell-project/codespell - rev: v2.2.6 + rev: v2.3.0 hooks: - id: codespell - args: ["-L", "ans,linar,nam,tread,ot,"] + args: ["-L", "ans,linar,nam,tread,ot,assertIn,dependin,socio-economic"] exclude: | (?x)^( pyproject.toml | diff --git a/OAI_CONFIG_LIST_sample b/OAI_CONFIG_LIST_sample index 9fc0dc803a0..aa0b3921629 100644 --- a/OAI_CONFIG_LIST_sample +++ b/OAI_CONFIG_LIST_sample @@ -13,13 +13,13 @@ "api_key": "", "base_url": "", "api_type": "azure", - "api_version": "2024-02-15-preview" + "api_version": "2024-02-01" }, { "model": "", "api_key": "", "base_url": "", "api_type": "azure", - "api_version": "2024-02-15-preview" + "api_version": "2024-02-01" } ] diff --git a/autogen/agentchat/contrib/retrieve_user_proxy_agent.py b/autogen/agentchat/contrib/retrieve_user_proxy_agent.py index 8d4dbc0d50a..dd2ef019127 100644 --- a/autogen/agentchat/contrib/retrieve_user_proxy_agent.py +++ b/autogen/agentchat/contrib/retrieve_user_proxy_agent.py @@ -348,7 +348,7 @@ def _init_db(self): else: chunks, sources = split_files_to_chunks( get_files_from_dir(self._docs_path, self._custom_text_types, self._recursive), - self._max_tokens, + self._chunk_token_size, self._chunk_mode, self._must_break_at_empty_line, ) diff --git a/autogen/agentchat/contrib/vectordb/pgvectordb.py b/autogen/agentchat/contrib/vectordb/pgvectordb.py index 38507cb7998..ac86802b672 100644 --- a/autogen/agentchat/contrib/vectordb/pgvectordb.py +++ b/autogen/agentchat/contrib/vectordb/pgvectordb.py @@ -32,10 +32,11 @@ class Collection: client: The PGVector client. collection_name (str): The name of the collection. Default is "documents". embedding_function (Callable): The embedding function used to generate the vector representation. + Default is None. SentenceTransformer("all-MiniLM-L6-v2").encode will be used when None. + Models can be chosen from: + https://huggingface.co/models?library=sentence-transformers metadata (Optional[dict]): The metadata of the collection. get_or_create (Optional): The flag indicating whether to get or create the collection. - model_name: (Optional str) | Sentence embedding model to use. Models can be chosen from: - https://huggingface.co/models?library=sentence-transformers """ def __init__( @@ -45,7 +46,6 @@ def __init__( embedding_function: Callable = None, metadata=None, get_or_create=None, - model_name="all-MiniLM-L6-v2", ): """ Initialize the Collection object. @@ -56,30 +56,26 @@ def __init__( embedding_function: The embedding function used to generate the vector representation. metadata: The metadata of the collection. get_or_create: The flag indicating whether to get or create the collection. - model_name: | Sentence embedding model to use. Models can be chosen from: - https://huggingface.co/models?library=sentence-transformers Returns: None """ self.client = client - self.embedding_function = embedding_function - self.model_name = model_name self.name = self.set_collection_name(collection_name) self.require_embeddings_or_documents = False self.ids = [] - try: - self.embedding_function = ( - SentenceTransformer(self.model_name) if embedding_function is None else embedding_function - ) - except Exception as e: - logger.error( - f"Validate the model name entered: {self.model_name} " - f"from https://huggingface.co/models?library=sentence-transformers\nError: {e}" - ) - raise e + if embedding_function: + self.embedding_function = embedding_function + else: + self.embedding_function = SentenceTransformer("all-MiniLM-L6-v2").encode self.metadata = metadata if metadata else {"hnsw:space": "ip", "hnsw:construction_ef": 32, "hnsw:M": 16} self.documents = "" self.get_or_create = get_or_create + # This will get the model dimension size by computing the embeddings dimensions + sentences = [ + "The weather is lovely today in paradise.", + ] + embeddings = self.embedding_function(sentences) + self.dimension = len(embeddings[0]) def set_collection_name(self, collection_name) -> str: name = re.sub("-", "_", collection_name) @@ -115,14 +111,14 @@ def add(self, ids: List[ItemID], documents: List, embeddings: List = None, metad elif metadatas is not None: for doc_id, metadata, document in zip(ids, metadatas, documents): metadata = re.sub("'", '"', str(metadata)) - embedding = self.embedding_function.encode(document) + embedding = self.embedding_function(document) sql_values.append((doc_id, metadata, embedding, document)) sql_string = ( f"INSERT INTO {self.name} (id, metadatas, embedding, documents)\n" f"VALUES (%s, %s, %s, %s);\n" ) else: for doc_id, document in zip(ids, documents): - embedding = self.embedding_function.encode(document) + embedding = self.embedding_function(document) sql_values.append((doc_id, document, embedding)) sql_string = f"INSERT INTO {self.name} (id, documents, embedding)\n" f"VALUES (%s, %s, %s);\n" logger.debug(f"Add SQL String:\n{sql_string}\n{sql_values}") @@ -166,7 +162,7 @@ def upsert(self, ids: List[ItemID], documents: List, embeddings: List = None, me elif metadatas is not None: for doc_id, metadata, document in zip(ids, metadatas, documents): metadata = re.sub("'", '"', str(metadata)) - embedding = self.embedding_function.encode(document) + embedding = self.embedding_function(document) sql_values.append((doc_id, metadata, embedding, document, metadata, document, embedding)) sql_string = ( f"INSERT INTO {self.name} (id, metadatas, embedding, documents)\n" @@ -176,7 +172,7 @@ def upsert(self, ids: List[ItemID], documents: List, embeddings: List = None, me ) else: for doc_id, document in zip(ids, documents): - embedding = self.embedding_function.encode(document) + embedding = self.embedding_function(document) sql_values.append((doc_id, document, embedding, document)) sql_string = ( f"INSERT INTO {self.name} (id, documents, embedding)\n" @@ -304,7 +300,7 @@ def get( ) except (psycopg.errors.UndefinedTable, psycopg.errors.UndefinedColumn) as e: logger.info(f"Error executing select on non-existent table: {self.name}. Creating it instead. Error: {e}") - self.create_collection(collection_name=self.name) + self.create_collection(collection_name=self.name, dimension=self.dimension) logger.info(f"Created table {self.name}") cursor.close() @@ -419,7 +415,7 @@ def query( cursor = self.client.cursor() results = [] for query_text in query_texts: - vector = self.embedding_function.encode(query_text, convert_to_tensor=False).tolist() + vector = self.embedding_function(query_text, convert_to_tensor=False).tolist() if distance_type.lower() == "cosine": index_function = "<=>" elif distance_type.lower() == "euclidean": @@ -526,22 +522,31 @@ def delete_collection(self, collection_name: Optional[str] = None) -> None: cursor.execute(f"DROP TABLE IF EXISTS {self.name}") cursor.close() - def create_collection(self, collection_name: Optional[str] = None) -> None: + def create_collection( + self, collection_name: Optional[str] = None, dimension: Optional[Union[str, int]] = None + ) -> None: """ Create a new collection. Args: collection_name (Optional[str]): The name of the new collection. + dimension (Optional[Union[str, int]]): The dimension size of the sentence embedding model Returns: None """ if collection_name: self.name = collection_name + + if dimension: + self.dimension = dimension + elif self.dimension is None: + self.dimension = 384 + cursor = self.client.cursor() cursor.execute( f"CREATE TABLE {self.name} (" - f"documents text, id CHAR(8) PRIMARY KEY, metadatas JSONB, embedding vector(384));" + f"documents text, id CHAR(8) PRIMARY KEY, metadatas JSONB, embedding vector({self.dimension}));" f"CREATE INDEX " f'ON {self.name} USING hnsw (embedding vector_l2_ops) WITH (m = {self.metadata["hnsw:M"]}, ' f'ef_construction = {self.metadata["hnsw:construction_ef"]});' @@ -573,7 +578,6 @@ def __init__( connect_timeout: Optional[int] = 10, embedding_function: Callable = None, metadata: Optional[dict] = None, - model_name: Optional[str] = "all-MiniLM-L6-v2", ) -> None: """ Initialize the vector database. @@ -591,15 +595,14 @@ def __init__( username: str | The database username to use. Default is None. password: str | The database user password to use. Default is None. connect_timeout: int | The timeout to set for the connection. Default is 10. - embedding_function: Callable | The embedding function used to generate the vector representation - of the documents. Default is None. + embedding_function: Callable | The embedding function used to generate the vector representation. + Default is None. SentenceTransformer("all-MiniLM-L6-v2").encode will be used when None. + Models can be chosen from: + https://huggingface.co/models?library=sentence-transformers metadata: dict | The metadata of the vector database. Default is None. If None, it will use this setting: {"hnsw:space": "ip", "hnsw:construction_ef": 30, "hnsw:M": 16}. Creates Index on table using hnsw (embedding vector_l2_ops) WITH (m = hnsw:M) ef_construction = "hnsw:construction_ef". For more info: https://github.com/pgvector/pgvector?tab=readme-ov-file#hnsw - model_name: str | Sentence embedding model to use. Models can be chosen from: - https://huggingface.co/models?library=sentence-transformers - Returns: None """ @@ -613,17 +616,10 @@ def __init__( password=password, connect_timeout=connect_timeout, ) - self.model_name = model_name - try: - self.embedding_function = ( - SentenceTransformer(self.model_name) if embedding_function is None else embedding_function - ) - except Exception as e: - logger.error( - f"Validate the model name entered: {self.model_name} " - f"from https://huggingface.co/models?library=sentence-transformers\nError: {e}" - ) - raise e + if embedding_function: + self.embedding_function = embedding_function + else: + self.embedding_function = SentenceTransformer("all-MiniLM-L6-v2").encode self.metadata = metadata register_vector(self.client) self.active_collection = None @@ -738,7 +734,6 @@ def create_collection( embedding_function=self.embedding_function, get_or_create=get_or_create, metadata=self.metadata, - model_name=self.model_name, ) collection.set_collection_name(collection_name=collection_name) collection.create_collection(collection_name=collection_name) @@ -751,7 +746,6 @@ def create_collection( embedding_function=self.embedding_function, get_or_create=get_or_create, metadata=self.metadata, - model_name=self.model_name, ) collection.set_collection_name(collection_name=collection_name) collection.create_collection(collection_name=collection_name) @@ -765,7 +759,6 @@ def create_collection( embedding_function=self.embedding_function, get_or_create=get_or_create, metadata=self.metadata, - model_name=self.model_name, ) collection.set_collection_name(collection_name=collection_name) collection.create_collection(collection_name=collection_name) @@ -797,7 +790,6 @@ def get_collection(self, collection_name: str = None) -> Collection: client=self.client, collection_name=collection_name, embedding_function=self.embedding_function, - model_name=self.model_name, ) return self.active_collection diff --git a/autogen/oai/client.py b/autogen/oai/client.py index 4c1da7a3931..f1a9c2fbf25 100644 --- a/autogen/oai/client.py +++ b/autogen/oai/client.py @@ -349,7 +349,7 @@ def __init__(self, *, config_list: Optional[List[Dict[str, Any]]] = None, **base "api_key": os.environ.get("AZURE_OPENAI_API_KEY"), "api_type": "azure", "base_url": os.environ.get("AZURE_OPENAI_API_BASE"), - "api_version": "2024-02-15-preview", + "api_version": "2024-02-01", }, { "model": "gpt-3.5-turbo", @@ -559,7 +559,7 @@ def yes_or_no_filter(context, response): ``` - allow_format_str_template (bool | None): Whether to allow format string template in the config. Default to false. - - api_version (str | None): The api version. Default to None. E.g., "2024-02-15-preview". + - api_version (str | None): The api version. Default to None. E.g., "2024-02-01". Raises: - RuntimeError: If all declared custom model clients are not registered - APIError: If any model client create call raises an APIError diff --git a/autogen/oai/completion.py b/autogen/oai/completion.py index e3b01ee4dd8..5a62cde33df 100644 --- a/autogen/oai/completion.py +++ b/autogen/oai/completion.py @@ -741,7 +741,7 @@ def create( "api_key": os.environ.get("AZURE_OPENAI_API_KEY"), "api_type": "azure", "base_url": os.environ.get("AZURE_OPENAI_API_BASE"), - "api_version": "2024-02-15-preview", + "api_version": "2024-02-01", }, { "model": "gpt-3.5-turbo", diff --git a/autogen/oai/gemini.py b/autogen/oai/gemini.py index 60a2062bb89..54a5c85d3fe 100644 --- a/autogen/oai/gemini.py +++ b/autogen/oai/gemini.py @@ -151,7 +151,7 @@ def create(self, params: Dict) -> ChatCompletion: if not model_name: raise ValueError( "Please provide a model name for the Gemini Client. " - "You can configurate it in the OAI Config List file. " + "You can configure it in the OAI Config List file. " "See this [LLM configuration tutorial](https://microsoft.github.io/autogen/docs/topics/llm_configuration/) for more details." ) diff --git a/autogen/oai/openai_utils.py b/autogen/oai/openai_utils.py index a676e964390..0c8a0a41337 100644 --- a/autogen/oai/openai_utils.py +++ b/autogen/oai/openai_utils.py @@ -14,7 +14,7 @@ from packaging.version import parse NON_CACHE_KEY = ["api_key", "base_url", "api_type", "api_version", "azure_ad_token", "azure_ad_token_provider"] -DEFAULT_AZURE_API_VERSION = "2024-02-15-preview" +DEFAULT_AZURE_API_VERSION = "2024-02-01" OAI_PRICE1K = { # https://openai.com/api/pricing/ # gpt-4o @@ -127,7 +127,7 @@ def get_config_list( # Optionally, define the API type and version if they are common for all keys api_type = 'azure' - api_version = '2024-02-15-preview' + api_version = '2024-02-01' # Call the get_config_list function to get a list of configuration dictionaries config_list = get_config_list(api_keys, base_urls, api_type, api_version) diff --git a/autogen/version.py b/autogen/version.py index 968391a2dbd..4f6b515ecb2 100644 --- a/autogen/version.py +++ b/autogen/version.py @@ -1 +1 @@ -__version__ = "0.2.28" +__version__ = "0.2.29" diff --git a/dotnet/AutoGen.sln b/dotnet/AutoGen.sln index 2bc106c0aca..6c4e8f0396b 100644 --- a/dotnet/AutoGen.sln +++ b/dotnet/AutoGen.sln @@ -53,6 +53,11 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AutoGen.Anthropic.Tests", " EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AutoGen.Anthropic.Samples", "sample\AutoGen.Anthropic.Samples\AutoGen.Anthropic.Samples.csproj", "{834B4E85-64E5-4382-8465-548F332E5298}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AutoGen.Gemini", "src\AutoGen.Gemini\AutoGen.Gemini.csproj", "{EFE0DC86-80FC-4D52-95B7-07654BA1A769}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AutoGen.Gemini.Tests", "test\AutoGen.Gemini.Tests\AutoGen.Gemini.Tests.csproj", "{8EA16BAB-465A-4C07-ABC4-1070D40067E9}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AutoGen.Gemini.Sample", "sample\AutoGen.Gemini.Sample\AutoGen.Gemini.Sample.csproj", "{19679B75-CE3A-4DF0-A3F0-CA369D2760A4}" Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AutoGen.AotCompatibility.Tests", "test\AutoGen.AotCompatibility.Tests\AutoGen.AotCompatibility.Tests.csproj", "{6B82F26D-5040-4453-B21B-C8D1F913CE4C}" EndProject Global @@ -149,6 +154,18 @@ Global {834B4E85-64E5-4382-8465-548F332E5298}.Debug|Any CPU.Build.0 = Debug|Any CPU {834B4E85-64E5-4382-8465-548F332E5298}.Release|Any CPU.ActiveCfg = Release|Any CPU {834B4E85-64E5-4382-8465-548F332E5298}.Release|Any CPU.Build.0 = Release|Any CPU + {EFE0DC86-80FC-4D52-95B7-07654BA1A769}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {EFE0DC86-80FC-4D52-95B7-07654BA1A769}.Debug|Any CPU.Build.0 = Debug|Any CPU + {EFE0DC86-80FC-4D52-95B7-07654BA1A769}.Release|Any CPU.ActiveCfg = Release|Any CPU + {EFE0DC86-80FC-4D52-95B7-07654BA1A769}.Release|Any CPU.Build.0 = Release|Any CPU + {8EA16BAB-465A-4C07-ABC4-1070D40067E9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {8EA16BAB-465A-4C07-ABC4-1070D40067E9}.Debug|Any CPU.Build.0 = Debug|Any CPU + {8EA16BAB-465A-4C07-ABC4-1070D40067E9}.Release|Any CPU.ActiveCfg = Release|Any CPU + {8EA16BAB-465A-4C07-ABC4-1070D40067E9}.Release|Any CPU.Build.0 = Release|Any CPU + {19679B75-CE3A-4DF0-A3F0-CA369D2760A4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {19679B75-CE3A-4DF0-A3F0-CA369D2760A4}.Debug|Any CPU.Build.0 = Debug|Any CPU + {19679B75-CE3A-4DF0-A3F0-CA369D2760A4}.Release|Any CPU.ActiveCfg = Release|Any CPU + {19679B75-CE3A-4DF0-A3F0-CA369D2760A4}.Release|Any CPU.Build.0 = Release|Any CPU {6B82F26D-5040-4453-B21B-C8D1F913CE4C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {6B82F26D-5040-4453-B21B-C8D1F913CE4C}.Debug|Any CPU.Build.0 = Debug|Any CPU {6B82F26D-5040-4453-B21B-C8D1F913CE4C}.Release|Any CPU.ActiveCfg = Release|Any CPU @@ -180,6 +197,9 @@ Global {6A95E113-B824-4524-8F13-CD0C3E1C8804} = {18BF8DD7-0585-48BF-8F97-AD333080CE06} {815E937E-86D6-4476-9EC6-B7FBCBBB5DB6} = {F823671B-3ECA-4AE6-86DA-25E920D3FE64} {834B4E85-64E5-4382-8465-548F332E5298} = {FBFEAD1F-29EB-4D99-A672-0CD8473E10B9} + {EFE0DC86-80FC-4D52-95B7-07654BA1A769} = {18BF8DD7-0585-48BF-8F97-AD333080CE06} + {8EA16BAB-465A-4C07-ABC4-1070D40067E9} = {F823671B-3ECA-4AE6-86DA-25E920D3FE64} + {19679B75-CE3A-4DF0-A3F0-CA369D2760A4} = {FBFEAD1F-29EB-4D99-A672-0CD8473E10B9} {6B82F26D-5040-4453-B21B-C8D1F913CE4C} = {F823671B-3ECA-4AE6-86DA-25E920D3FE64} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution diff --git a/dotnet/Directory.Build.props b/dotnet/Directory.Build.props index aeb667438e2..4b3e9441f1e 100644 --- a/dotnet/Directory.Build.props +++ b/dotnet/Directory.Build.props @@ -13,12 +13,37 @@ CS1998;CS1591 $(NoWarn);$(CSNoWarn);NU5104 true + true false true true + false $(MSBuildThisFileDirectory) + + + + + + + + + + + + + Always + testData/%(RecursiveDir)%(Filename)%(Extension) + + + + + + Always + resource/%(RecursiveDir)%(Filename)%(Extension) + + diff --git a/dotnet/eng/Version.props b/dotnet/eng/Version.props index a43da436b38..0b8dcaa565c 100644 --- a/dotnet/eng/Version.props +++ b/dotnet/eng/Version.props @@ -12,6 +12,7 @@ 17.7.0 1.0.0-beta.24229.4 8.0.0 + 3.0.0 4.3.0.2 \ No newline at end of file diff --git a/dotnet/sample/AutoGen.Ollama.Sample/images/background.png b/dotnet/resource/images/background.png similarity index 100% rename from dotnet/sample/AutoGen.Ollama.Sample/images/background.png rename to dotnet/resource/images/background.png diff --git a/dotnet/resource/images/square.png b/dotnet/resource/images/square.png new file mode 100644 index 00000000000..afb4f4cd4df --- /dev/null +++ b/dotnet/resource/images/square.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8323d0b8eceb752e14c29543b2e28bb2fc648ed9719095c31b7708867a4dc918 +size 491 diff --git a/dotnet/sample/AutoGen.BasicSamples/Example05_Dalle_And_GPT4V.cs b/dotnet/sample/AutoGen.BasicSamples/Example05_Dalle_And_GPT4V.cs index 2d21615ef71..67fd40ea3ac 100644 --- a/dotnet/sample/AutoGen.BasicSamples/Example05_Dalle_And_GPT4V.cs +++ b/dotnet/sample/AutoGen.BasicSamples/Example05_Dalle_And_GPT4V.cs @@ -93,7 +93,7 @@ public static async Task RunAsync() if (reply.GetContent() is string content && content.Contains("IMAGE_GENERATION")) { var imageUrl = content.Split("\n").Last(); - var imageMessage = new ImageMessage(Role.Assistant, imageUrl, from: reply.From); + var imageMessage = new ImageMessage(Role.Assistant, imageUrl, from: reply.From, mimeType: "image/png"); Console.WriteLine($"download image from {imageUrl} to {imagePath}"); var httpClient = new HttpClient(); diff --git a/dotnet/sample/AutoGen.Gemini.Sample/AutoGen.Gemini.Sample.csproj b/dotnet/sample/AutoGen.Gemini.Sample/AutoGen.Gemini.Sample.csproj new file mode 100644 index 00000000000..b1779b56c39 --- /dev/null +++ b/dotnet/sample/AutoGen.Gemini.Sample/AutoGen.Gemini.Sample.csproj @@ -0,0 +1,19 @@ + + + + Exe + net8.0 + enable + enable + true + True + + + + + + + + + + diff --git a/dotnet/sample/AutoGen.Gemini.Sample/Chat_With_Google_Gemini.cs b/dotnet/sample/AutoGen.Gemini.Sample/Chat_With_Google_Gemini.cs new file mode 100644 index 00000000000..233c35c8122 --- /dev/null +++ b/dotnet/sample/AutoGen.Gemini.Sample/Chat_With_Google_Gemini.cs @@ -0,0 +1,38 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Chat_With_Google_Gemini.cs + +using AutoGen.Core; +using AutoGen.Gemini.Middleware; +using FluentAssertions; + +namespace AutoGen.Gemini.Sample; + +public class Chat_With_Google_Gemini +{ + public static async Task RunAsync() + { + var apiKey = Environment.GetEnvironmentVariable("GOOGLE_GEMINI_API_KEY"); + + if (apiKey is null) + { + Console.WriteLine("Please set GOOGLE_GEMINI_API_KEY environment variable."); + return; + } + + #region Create_Gemini_Agent + var geminiAgent = new GeminiChatAgent( + name: "gemini", + model: "gemini-1.5-flash-001", + apiKey: apiKey, + systemMessage: "You are a helpful C# engineer, put your code between ```csharp and ```, don't explain the code") + .RegisterMessageConnector() + .RegisterPrintMessage(); + #endregion Create_Gemini_Agent + + var reply = await geminiAgent.SendAsync("Can you write a piece of C# code to calculate 100th of fibonacci?"); + + #region verify_reply + reply.Should().BeOfType(); + #endregion verify_reply + } +} diff --git a/dotnet/sample/AutoGen.Gemini.Sample/Chat_With_Vertex_Gemini.cs b/dotnet/sample/AutoGen.Gemini.Sample/Chat_With_Vertex_Gemini.cs new file mode 100644 index 00000000000..679a07ed69b --- /dev/null +++ b/dotnet/sample/AutoGen.Gemini.Sample/Chat_With_Vertex_Gemini.cs @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Chat_With_Vertex_Gemini.cs + +using AutoGen.Core; +using AutoGen.Gemini.Middleware; +using FluentAssertions; + +namespace AutoGen.Gemini.Sample; + +public class Chat_With_Vertex_Gemini +{ + public static async Task RunAsync() + { + var projectID = Environment.GetEnvironmentVariable("GCP_VERTEX_PROJECT_ID"); + + if (projectID is null) + { + Console.WriteLine("Please set GCP_VERTEX_PROJECT_ID environment variable."); + return; + } + + #region Create_Gemini_Agent + var geminiAgent = new GeminiChatAgent( + name: "gemini", + model: "gemini-1.5-flash-001", + location: "us-east1", + project: projectID, + systemMessage: "You are a helpful C# engineer, put your code between ```csharp and ```, don't explain the code") + .RegisterMessageConnector() + .RegisterPrintMessage(); + #endregion Create_Gemini_Agent + + var reply = await geminiAgent.SendAsync("Can you write a piece of C# code to calculate 100th of fibonacci?"); + + #region verify_reply + reply.Should().BeOfType(); + #endregion verify_reply + } +} diff --git a/dotnet/sample/AutoGen.Gemini.Sample/Function_Call_With_Gemini.cs b/dotnet/sample/AutoGen.Gemini.Sample/Function_Call_With_Gemini.cs new file mode 100644 index 00000000000..d1b681d8709 --- /dev/null +++ b/dotnet/sample/AutoGen.Gemini.Sample/Function_Call_With_Gemini.cs @@ -0,0 +1,129 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Function_Call_With_Gemini.cs + +using AutoGen.Core; +using AutoGen.Gemini.Middleware; +using FluentAssertions; +using Google.Cloud.AIPlatform.V1; + +namespace AutoGen.Gemini.Sample; + +public partial class MovieFunction +{ + /// + /// find movie titles currently playing in theaters based on any description, genre, title words, etc. + /// + /// The city and state, e.g. San Francisco, CA or a zip code e.g. 95616 + /// Any kind of description including category or genre, title words, attributes, etc. + /// + [Function] + public async Task FindMovies(string location, string description) + { + // dummy implementation + var movies = new List { "Barbie", "Spiderman", "Batman" }; + var result = $"Movies playing in {location} based on {description} are: {string.Join(", ", movies)}"; + + return result; + } + + /// + /// find theaters based on location and optionally movie title which is currently playing in theaters + /// + /// The city and state, e.g. San Francisco, CA or a zip code e.g. 95616 + /// Any movie title + [Function] + public async Task FindTheaters(string location, string movie) + { + // dummy implementation + var theaters = new List { "AMC", "Regal", "Cinemark" }; + var result = $"Theaters playing {movie} in {location} are: {string.Join(", ", theaters)}"; + + return result; + } + + /// + /// Find the start times for movies playing in a specific theater + /// + /// The city and state, e.g. San Francisco, CA or a zip code e.g. 95616 + /// Any movie title + /// Name of the theater + /// Date for requested showtime + /// + [Function] + public async Task GetShowtimes(string location, string movie, string theater, string date) + { + // dummy implementation + var showtimes = new List { "10:00 AM", "12:00 PM", "2:00 PM", "4:00 PM", "6:00 PM", "8:00 PM" }; + var result = $"Showtimes for {movie} at {theater} in {location} are: {string.Join(", ", showtimes)}"; + + return result; + } + +} + +/// +/// Modified from https://ai.google.dev/gemini-api/docs/function-calling +/// +public partial class Function_Call_With_Gemini +{ + public static async Task RunAsync() + { + var projectID = Environment.GetEnvironmentVariable("GCP_VERTEX_PROJECT_ID"); + + if (projectID is null) + { + Console.WriteLine("Please set GCP_VERTEX_PROJECT_ID environment variable."); + return; + } + + var movieFunction = new MovieFunction(); + var functionMiddleware = new FunctionCallMiddleware( + functions: [ + movieFunction.FindMoviesFunctionContract, + movieFunction.FindTheatersFunctionContract, + movieFunction.GetShowtimesFunctionContract + ], + functionMap: new Dictionary>> + { + { movieFunction.FindMoviesFunctionContract.Name!, movieFunction.FindMoviesWrapper }, + { movieFunction.FindTheatersFunctionContract.Name!, movieFunction.FindTheatersWrapper }, + { movieFunction.GetShowtimesFunctionContract.Name!, movieFunction.GetShowtimesWrapper }, + }); + + #region Create_Gemini_Agent + var geminiAgent = new GeminiChatAgent( + name: "gemini", + model: "gemini-1.5-flash-001", + location: "us-central1", + project: projectID, + systemMessage: "You are a helpful AI assistant", + toolConfig: new ToolConfig() + { + FunctionCallingConfig = new FunctionCallingConfig() + { + Mode = FunctionCallingConfig.Types.Mode.Auto, + } + }) + .RegisterMessageConnector() + .RegisterPrintMessage() + .RegisterStreamingMiddleware(functionMiddleware); + #endregion Create_Gemini_Agent + + #region Single_turn + var question = new TextMessage(Role.User, "What movies are showing in North Seattle tonight?"); + var functionCallReply = await geminiAgent.SendAsync(question); + #endregion Single_turn + + #region Single_turn_verify_reply + functionCallReply.Should().BeOfType(); + #endregion Single_turn_verify_reply + + #region Multi_turn + var finalReply = await geminiAgent.SendAsync(chatHistory: [question, functionCallReply]); + #endregion Multi_turn + + #region Multi_turn_verify_reply + finalReply.Should().BeOfType(); + #endregion Multi_turn_verify_reply + } +} diff --git a/dotnet/sample/AutoGen.Gemini.Sample/Image_Chat_With_Vertex_Gemini.cs b/dotnet/sample/AutoGen.Gemini.Sample/Image_Chat_With_Vertex_Gemini.cs new file mode 100644 index 00000000000..86193b653d9 --- /dev/null +++ b/dotnet/sample/AutoGen.Gemini.Sample/Image_Chat_With_Vertex_Gemini.cs @@ -0,0 +1,44 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Image_Chat_With_Vertex_Gemini.cs + +using AutoGen.Core; +using AutoGen.Gemini.Middleware; +using FluentAssertions; + +namespace AutoGen.Gemini.Sample; + +public class Image_Chat_With_Vertex_Gemini +{ + public static async Task RunAsync() + { + var projectID = Environment.GetEnvironmentVariable("GCP_VERTEX_PROJECT_ID"); + + if (projectID is null) + { + Console.WriteLine("Please set GCP_VERTEX_PROJECT_ID environment variable."); + return; + } + + #region Create_Gemini_Agent + var geminiAgent = new GeminiChatAgent( + name: "gemini", + model: "gemini-1.5-flash-001", + location: "us-east4", + project: projectID, + systemMessage: "You explain image content to user") + .RegisterMessageConnector() + .RegisterPrintMessage(); + #endregion Create_Gemini_Agent + + #region Send_Image_Request + var imagePath = Path.Combine("resource", "images", "background.png"); + var image = await File.ReadAllBytesAsync(imagePath); + var imageMessage = new ImageMessage(Role.User, BinaryData.FromBytes(image, "image/png")); + var reply = await geminiAgent.SendAsync("what's in the image", [imageMessage]); + #endregion Send_Image_Request + + #region Verify_Reply + reply.Should().BeOfType(); + #endregion Verify_Reply + } +} diff --git a/dotnet/sample/AutoGen.Gemini.Sample/Program.cs b/dotnet/sample/AutoGen.Gemini.Sample/Program.cs new file mode 100644 index 00000000000..5e76942209a --- /dev/null +++ b/dotnet/sample/AutoGen.Gemini.Sample/Program.cs @@ -0,0 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Program.cs + +using AutoGen.Gemini.Sample; + +Image_Chat_With_Vertex_Gemini.RunAsync().Wait(); diff --git a/dotnet/sample/AutoGen.Ollama.Sample/AutoGen.Ollama.Sample.csproj b/dotnet/sample/AutoGen.Ollama.Sample/AutoGen.Ollama.Sample.csproj index 1dc94400869..5277408d595 100644 --- a/dotnet/sample/AutoGen.Ollama.Sample/AutoGen.Ollama.Sample.csproj +++ b/dotnet/sample/AutoGen.Ollama.Sample/AutoGen.Ollama.Sample.csproj @@ -5,6 +5,7 @@ enable True $(NoWarn);CS8981;CS8600;CS8602;CS8604;CS8618;CS0219;SKEXP0054;SKEXP0050;SKEXP0110 + true @@ -15,10 +16,4 @@ - - - PreserveNewest - - - diff --git a/dotnet/sample/AutoGen.Ollama.Sample/Chat_With_LLaVA.cs b/dotnet/sample/AutoGen.Ollama.Sample/Chat_With_LLaVA.cs index d52afb075e1..d9e38c886c2 100644 --- a/dotnet/sample/AutoGen.Ollama.Sample/Chat_With_LLaVA.cs +++ b/dotnet/sample/AutoGen.Ollama.Sample/Chat_With_LLaVA.cs @@ -28,7 +28,7 @@ public static async Task RunAsync() #endregion Create_Ollama_Agent #region Send_Message - var image = Path.Combine("images", "background.png"); + var image = Path.Combine("resource", "images", "background.png"); var binaryData = BinaryData.FromBytes(File.ReadAllBytes(image), "image/png"); var imageMessage = new ImageMessage(Role.User, binaryData); var textMessage = new TextMessage(Role.User, "what's in this image?"); diff --git a/dotnet/src/AutoGen.Anthropic/AnthropicClient.cs b/dotnet/src/AutoGen.Anthropic/AnthropicClient.cs index 8ea0bef86e2..90bd33683f2 100644 --- a/dotnet/src/AutoGen.Anthropic/AnthropicClient.cs +++ b/dotnet/src/AutoGen.Anthropic/AnthropicClient.cs @@ -23,7 +23,8 @@ public sealed class AnthropicClient : IDisposable private static readonly JsonSerializerOptions JsonSerializerOptions = new() { - DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull + DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, + Converters = { new ContentBaseConverter() } }; private static readonly JsonSerializerOptions JsonDeserializerOptions = new() diff --git a/dotnet/src/AutoGen.Anthropic/DTO/ChatCompletionRequest.cs b/dotnet/src/AutoGen.Anthropic/DTO/ChatCompletionRequest.cs index fa1654bc11d..0c1749eaa98 100644 --- a/dotnet/src/AutoGen.Anthropic/DTO/ChatCompletionRequest.cs +++ b/dotnet/src/AutoGen.Anthropic/DTO/ChatCompletionRequest.cs @@ -1,11 +1,10 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. - +// Copyright (c) Microsoft Corporation. All rights reserved. +// ChatCompletionRequest.cs using System.Text.Json.Serialization; +using System.Collections.Generic; namespace AutoGen.Anthropic.DTO; -using System.Collections.Generic; - public class ChatCompletionRequest { [JsonPropertyName("model")] @@ -50,9 +49,15 @@ public class ChatMessage public string Role { get; set; } [JsonPropertyName("content")] - public string Content { get; set; } + public List Content { get; set; } public ChatMessage(string role, string content) + { + Role = role; + Content = new List() { new TextContent { Text = content } }; + } + + public ChatMessage(string role, List content) { Role = role; Content = content; diff --git a/dotnet/src/AutoGen.Anthropic/Middleware/AnthropicMessageConnector.cs b/dotnet/src/AutoGen.Anthropic/Middleware/AnthropicMessageConnector.cs index bfe79190925..bb2f5820f74 100644 --- a/dotnet/src/AutoGen.Anthropic/Middleware/AnthropicMessageConnector.cs +++ b/dotnet/src/AutoGen.Anthropic/Middleware/AnthropicMessageConnector.cs @@ -4,6 +4,7 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Net.Http; using System.Runtime.CompilerServices; using System.Threading; using System.Threading.Tasks; @@ -19,7 +20,7 @@ public class AnthropicMessageConnector : IStreamingMiddleware public async Task InvokeAsync(MiddlewareContext context, IAgent agent, CancellationToken cancellationToken = default) { var messages = context.Messages; - var chatMessages = ProcessMessage(messages, agent); + var chatMessages = await ProcessMessageAsync(messages, agent); var response = await agent.GenerateReplyAsync(chatMessages, context.Options, cancellationToken); return response is IMessage chatMessage @@ -31,7 +32,7 @@ public async IAsyncEnumerable InvokeAsync(MiddlewareContext c [EnumeratorCancellation] CancellationToken cancellationToken = default) { var messages = context.Messages; - var chatMessages = ProcessMessage(messages, agent); + var chatMessages = await ProcessMessageAsync(messages, agent); await foreach (var reply in agent.GenerateStreamingReplyAsync(chatMessages, context.Options, cancellationToken)) { @@ -53,60 +54,78 @@ public async IAsyncEnumerable InvokeAsync(MiddlewareContext c private IStreamingMessage? ProcessChatCompletionResponse(IStreamingMessage chatMessage, IStreamingAgent agent) { - Delta? delta = chatMessage.Content.Delta; + var delta = chatMessage.Content.Delta; return delta != null && !string.IsNullOrEmpty(delta.Text) ? new TextMessageUpdate(role: Role.Assistant, delta.Text, from: agent.Name) : null; } - private IEnumerable ProcessMessage(IEnumerable messages, IAgent agent) + private async Task> ProcessMessageAsync(IEnumerable messages, IAgent agent) { - return messages.SelectMany(m => + var processedMessages = new List(); + + foreach (var message in messages) { - return m switch + var processedMessage = message switch { TextMessage textMessage => ProcessTextMessage(textMessage, agent), - _ => [m], + + ImageMessage imageMessage => + new MessageEnvelope(new ChatMessage("user", + new ContentBase[] { new ImageContent { Source = await ProcessImageSourceAsync(imageMessage) } } + .ToList()), + from: agent.Name), + + MultiModalMessage multiModalMessage => await ProcessMultiModalMessageAsync(multiModalMessage, agent), + _ => message, }; - }); + + processedMessages.Add(processedMessage); + } + + return processedMessages; } private IMessage PostProcessMessage(ChatCompletionResponse response, IAgent from) { if (response.Content is null) + { throw new ArgumentNullException(nameof(response.Content)); + } if (response.Content.Count != 1) + { throw new NotSupportedException($"{nameof(response.Content)} != 1"); + } return new TextMessage(Role.Assistant, ((TextContent)response.Content[0]).Text ?? string.Empty, from: from.Name); } - private IEnumerable> ProcessTextMessage(TextMessage textMessage, IAgent agent) + private IMessage ProcessTextMessage(TextMessage textMessage, IAgent agent) { - IEnumerable messages; + ChatMessage messages; if (textMessage.From == agent.Name) { - messages = [new ChatMessage( - "assistant", textMessage.Content)]; + messages = new ChatMessage( + "assistant", textMessage.Content); } else if (textMessage.From is null) { if (textMessage.Role == Role.User) { - messages = [new ChatMessage( - "user", textMessage.Content)]; + messages = new ChatMessage( + "user", textMessage.Content); } else if (textMessage.Role == Role.Assistant) { - messages = [new ChatMessage( - "assistant", textMessage.Content)]; + messages = new ChatMessage( + "assistant", textMessage.Content); } else if (textMessage.Role == Role.System) { - messages = [new ChatMessage( - "system", textMessage.Content)]; + messages = new ChatMessage( + "system", textMessage.Content); } else { @@ -116,10 +135,61 @@ private IEnumerable> ProcessTextMessage(TextMessage textMe else { // if from is not null, then the message is from user - messages = [new ChatMessage( - "user", textMessage.Content)]; + messages = new ChatMessage( + "user", textMessage.Content); } - return messages.Select(m => new MessageEnvelope(m, from: textMessage.From)); + return new MessageEnvelope(messages, from: textMessage.From); + } + + private async Task ProcessMultiModalMessageAsync(MultiModalMessage multiModalMessage, IAgent agent) + { + var content = new List(); + foreach (var message in multiModalMessage.Content) + { + switch (message) + { + case TextMessage textMessage when textMessage.GetContent() is not null: + content.Add(new TextContent { Text = textMessage.GetContent() }); + break; + case ImageMessage imageMessage: + content.Add(new ImageContent() { Source = await ProcessImageSourceAsync(imageMessage) }); + break; + } + } + + var chatMessage = new ChatMessage("user", content); + return MessageEnvelope.Create(chatMessage, agent.Name); + } + + private async Task ProcessImageSourceAsync(ImageMessage imageMessage) + { + if (imageMessage.Data != null) + { + return new ImageSource + { + MediaType = imageMessage.Data.MediaType, + Data = Convert.ToBase64String(imageMessage.Data.ToArray()) + }; + } + + if (imageMessage.Url is null) + { + throw new InvalidOperationException("Invalid ImageMessage, the data or url must be provided"); + } + + var uri = new Uri(imageMessage.Url); + using var client = new HttpClient(); + var response = client.GetAsync(uri).Result; + if (!response.IsSuccessStatusCode) + { + throw new HttpRequestException($"Failed to download the image from {uri}"); + } + + return new ImageSource + { + MediaType = "image/jpeg", + Data = Convert.ToBase64String(await response.Content.ReadAsByteArrayAsync()) + }; } } diff --git a/dotnet/src/AutoGen.Core/Agent/MiddlewareStreamingAgent.cs b/dotnet/src/AutoGen.Core/Agent/MiddlewareStreamingAgent.cs index 251d3c110f9..52967d6ff1c 100644 --- a/dotnet/src/AutoGen.Core/Agent/MiddlewareStreamingAgent.cs +++ b/dotnet/src/AutoGen.Core/Agent/MiddlewareStreamingAgent.cs @@ -49,7 +49,6 @@ public Task GenerateReplyAsync(IEnumerable messages, Generat public IAsyncEnumerable GenerateStreamingReplyAsync(IEnumerable messages, GenerateReplyOptions? options = null, CancellationToken cancellationToken = default) { - return _agent.GenerateStreamingReplyAsync(messages, options, cancellationToken); } diff --git a/dotnet/src/AutoGen.Core/Message/ImageMessage.cs b/dotnet/src/AutoGen.Core/Message/ImageMessage.cs index d2e2d080300..685354dfe7a 100644 --- a/dotnet/src/AutoGen.Core/Message/ImageMessage.cs +++ b/dotnet/src/AutoGen.Core/Message/ImageMessage.cs @@ -7,18 +7,34 @@ namespace AutoGen.Core; public class ImageMessage : IMessage { - public ImageMessage(Role role, string url, string? from = null) + public ImageMessage(Role role, string url, string? from = null, string? mimeType = null) + : this(role, new Uri(url), from, mimeType) { - this.Role = role; - this.From = from; - this.Url = url; } - public ImageMessage(Role role, Uri uri, string? from = null) + public ImageMessage(Role role, Uri uri, string? from = null, string? mimeType = null) { this.Role = role; this.From = from; this.Url = uri.ToString(); + + // try infer mimeType from uri extension if not provided + if (mimeType is null) + { + mimeType = uri switch + { + _ when uri.AbsoluteUri.EndsWith(".png", StringComparison.OrdinalIgnoreCase) => "image/png", + _ when uri.AbsoluteUri.EndsWith(".jpg", StringComparison.OrdinalIgnoreCase) => "image/jpeg", + _ when uri.AbsoluteUri.EndsWith(".jpeg", StringComparison.OrdinalIgnoreCase) => "image/jpeg", + _ when uri.AbsoluteUri.EndsWith(".gif", StringComparison.OrdinalIgnoreCase) => "image/gif", + _ when uri.AbsoluteUri.EndsWith(".bmp", StringComparison.OrdinalIgnoreCase) => "image/bmp", + _ when uri.AbsoluteUri.EndsWith(".webp", StringComparison.OrdinalIgnoreCase) => "image/webp", + _ when uri.AbsoluteUri.EndsWith(".svg", StringComparison.OrdinalIgnoreCase) => "image/svg+xml", + _ => throw new ArgumentException("MimeType is required for ImageMessage", nameof(mimeType)) + }; + } + + this.MimeType = mimeType; } public ImageMessage(Role role, BinaryData data, string? from = null) @@ -28,7 +44,7 @@ public ImageMessage(Role role, BinaryData data, string? from = null) throw new ArgumentException("Data cannot be empty", nameof(data)); } - if (string.IsNullOrWhiteSpace(data.MediaType)) + if (data.MediaType is null) { throw new ArgumentException("MediaType is needed for DataUri Images", nameof(data)); } @@ -36,15 +52,18 @@ public ImageMessage(Role role, BinaryData data, string? from = null) this.Role = role; this.From = from; this.Data = data; + this.MimeType = data.MediaType; } - public Role Role { get; set; } + public Role Role { get; } - public string? Url { get; set; } + public string? Url { get; } public string? From { get; set; } - public BinaryData? Data { get; set; } + public BinaryData? Data { get; } + + public string MimeType { get; } public string BuildDataUri() { @@ -53,7 +72,7 @@ public string BuildDataUri() throw new NullReferenceException($"{nameof(Data)}"); } - return $"data:{this.Data.MediaType};base64,{Convert.ToBase64String(this.Data.ToArray())}"; + return $"data:{this.MimeType};base64,{Convert.ToBase64String(this.Data.ToArray())}"; } public override string ToString() diff --git a/dotnet/src/AutoGen.Gemini/AutoGen.Gemini.csproj b/dotnet/src/AutoGen.Gemini/AutoGen.Gemini.csproj new file mode 100644 index 00000000000..5a2a42ceb58 --- /dev/null +++ b/dotnet/src/AutoGen.Gemini/AutoGen.Gemini.csproj @@ -0,0 +1,18 @@ + + + + netstandard2.0 + + + + + + + + + + + + + + diff --git a/dotnet/src/AutoGen.Gemini/Extension/FunctionContractExtension.cs b/dotnet/src/AutoGen.Gemini/Extension/FunctionContractExtension.cs new file mode 100644 index 00000000000..64f78fa165b --- /dev/null +++ b/dotnet/src/AutoGen.Gemini/Extension/FunctionContractExtension.cs @@ -0,0 +1,90 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// FunctionContractExtension.cs + +using System.Collections.Generic; +using System.Linq; +using AutoGen.Core; +using Google.Cloud.AIPlatform.V1; +using Json.Schema; +using Json.Schema.Generation; +using OpenAPISchemaType = Google.Cloud.AIPlatform.V1.Type; +using Type = System.Type; + +namespace AutoGen.Gemini.Extension; + +public static class FunctionContractExtension +{ + /// + /// Convert a to a that can be used in gpt funciton call. + /// + public static FunctionDeclaration ToFunctionDeclaration(this FunctionContract function) + { + var required = function.Parameters!.Where(p => p.IsRequired) + .Select(p => p.Name) + .ToList(); + var parameterProperties = new Dictionary(); + + foreach (var parameter in function.Parameters ?? Enumerable.Empty()) + { + var schema = ToOpenApiSchema(parameter.ParameterType); + schema.Description = parameter.Description; + schema.Title = parameter.Name; + schema.Nullable = !parameter.IsRequired; + parameterProperties.Add(parameter.Name!, schema); + } + + return new FunctionDeclaration + { + Name = function.Name, + Description = function.Description, + Parameters = new OpenApiSchema + { + Required = + { + required, + }, + Properties = + { + parameterProperties, + }, + Type = OpenAPISchemaType.Object, + }, + }; + } + + private static OpenApiSchema ToOpenApiSchema(Type? type) + { + if (type == null) + { + return new OpenApiSchema + { + Type = OpenAPISchemaType.Unspecified + }; + } + + var schema = new JsonSchemaBuilder().FromType(type).Build(); + var openApiSchema = new OpenApiSchema + { + Type = schema.GetJsonType() switch + { + SchemaValueType.Array => OpenAPISchemaType.Array, + SchemaValueType.Boolean => OpenAPISchemaType.Boolean, + SchemaValueType.Integer => OpenAPISchemaType.Integer, + SchemaValueType.Number => OpenAPISchemaType.Number, + SchemaValueType.Object => OpenAPISchemaType.Object, + SchemaValueType.String => OpenAPISchemaType.String, + _ => OpenAPISchemaType.Unspecified + }, + }; + + if (schema.GetJsonType() == SchemaValueType.Object && schema.GetProperties() is var properties && properties != null) + { + foreach (var property in properties) + { + openApiSchema.Properties.Add(property.Key, ToOpenApiSchema(property.Value.GetType())); + } + } + + return openApiSchema; + } +} diff --git a/dotnet/src/AutoGen.Gemini/GeminiChatAgent.cs b/dotnet/src/AutoGen.Gemini/GeminiChatAgent.cs new file mode 100644 index 00000000000..b081faae832 --- /dev/null +++ b/dotnet/src/AutoGen.Gemini/GeminiChatAgent.cs @@ -0,0 +1,268 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// GeminiChatAgent.cs + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Threading; +using System.Threading.Tasks; +using AutoGen.Core; +using AutoGen.Gemini.Extension; +using Google.Cloud.AIPlatform.V1; +using Google.Protobuf.Collections; +namespace AutoGen.Gemini; + +public class GeminiChatAgent : IStreamingAgent +{ + private readonly IGeminiClient client; + private readonly string? systemMessage; + private readonly string model; + private readonly ToolConfig? toolConfig; + private readonly RepeatedField? safetySettings; + private readonly string responseMimeType; + private readonly Tool[]? tools; + + /// + /// Create that connects to Gemini. + /// + /// the gemini client to use. e.g. + /// agent name + /// the model id. It needs to be in the format of + /// 'projects/{project}/locations/{location}/publishers/{provider}/models/{model}' if the is + /// system message + /// tool config + /// tools + /// safety settings + /// response mime type, available values are ['application/json', 'text/plain'], default is 'text/plain' + public GeminiChatAgent( + IGeminiClient client, + string name, + string model, + string? systemMessage = null, + ToolConfig? toolConfig = null, + Tool[]? tools = null, + RepeatedField? safetySettings = null, + string responseMimeType = "text/plain") + { + this.client = client; + this.Name = name; + this.systemMessage = systemMessage; + this.model = model; + this.toolConfig = toolConfig; + this.safetySettings = safetySettings; + this.responseMimeType = responseMimeType; + this.tools = tools; + } + + /// + /// Create that connects to Gemini using + /// + /// agent name + /// the name of gemini model, e.g. gemini-1.5-flash-001 + /// google gemini api key + /// system message + /// tool config + /// tools + /// + /// response mime type, available values are ['application/json', 'text/plain'], default is 'text/plain' + /// /// + /// + /// + public GeminiChatAgent( + string name, + string model, + string apiKey, + string systemMessage = "You are a helpful AI assistant", + ToolConfig? toolConfig = null, + Tool[]? tools = null, + RepeatedField? safetySettings = null, + string responseMimeType = "text/plain") + : this( + client: new GoogleGeminiClient(apiKey), + name: name, + model: model, + systemMessage: systemMessage, + toolConfig: toolConfig, + tools: tools, + safetySettings: safetySettings, + responseMimeType: responseMimeType) + { + } + + /// + /// Create that connects to Vertex AI. + /// + /// agent name + /// system message + /// the name of gemini model, e.g. gemini-1.5-flash-001 + /// project id + /// model location + /// model provider, default is 'google' + /// tool config + /// tools + /// + /// response mime type, available values are ['application/json', 'text/plain'], default is 'text/plain' + /// + /// + /// + public GeminiChatAgent( + string name, + string model, + string project, + string location, + string provider = "google", + string? systemMessage = null, + ToolConfig? toolConfig = null, + Tool[]? tools = null, + RepeatedField? safetySettings = null, + string responseMimeType = "text/plain") + : this( + client: new VertexGeminiClient(location), + name: name, + model: $"projects/{project}/locations/{location}/publishers/{provider}/models/{model}", + systemMessage: systemMessage, + toolConfig: toolConfig, + tools: tools, + safetySettings: safetySettings, + responseMimeType: responseMimeType) + { + } + + public string Name { get; } + + public async Task GenerateReplyAsync(IEnumerable messages, GenerateReplyOptions? options = null, CancellationToken cancellationToken = default) + { + var request = BuildChatRequest(messages, options); + var response = await this.client.GenerateContentAsync(request, cancellationToken: cancellationToken).ConfigureAwait(false); + + return MessageEnvelope.Create(response, this.Name); + } + + public async IAsyncEnumerable GenerateStreamingReplyAsync(IEnumerable messages, GenerateReplyOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + var request = BuildChatRequest(messages, options); + var response = this.client.GenerateContentStreamAsync(request); + + await foreach (var item in response.WithCancellation(cancellationToken).ConfigureAwait(false)) + { + yield return MessageEnvelope.Create(item, this.Name); + } + } + + private GenerateContentRequest BuildChatRequest(IEnumerable messages, GenerateReplyOptions? options) + { + var geminiMessages = messages.Select(m => m switch + { + IMessage contentMessage => contentMessage.Content, + _ => throw new NotSupportedException($"Message type {m.GetType()} is not supported.") + }); + + // there are several rules applies to the messages that can be sent to Gemini in a multi-turn chat + // - The first message must be from the user or function + // - The (user|model) roles must alternate e.g. (user, model, user, model, ...) + // - The last message must be from the user or function + + // check if the first message is from the user + if (geminiMessages.FirstOrDefault()?.Role != "user" && geminiMessages.FirstOrDefault()?.Role != "function") + { + throw new ArgumentException("The first message must be from the user or function", nameof(messages)); + } + + // check if the last message is from the user + if (geminiMessages.LastOrDefault()?.Role != "user" && geminiMessages.LastOrDefault()?.Role != "function") + { + throw new ArgumentException("The last message must be from the user or function", nameof(messages)); + } + + // merge continuous messages with the same role into one message + var mergedMessages = geminiMessages.Aggregate(new List(), (acc, message) => + { + if (acc.Count == 0 || acc.Last().Role != message.Role) + { + acc.Add(message); + } + else + { + acc.Last().Parts.AddRange(message.Parts); + } + + return acc; + }); + + var systemMessage = this.systemMessage switch + { + null => null, + string message => new Content + { + Parts = { new[] { new Part { Text = message } } }, + Role = "system_instruction" + } + }; + + List tools = this.tools?.ToList() ?? new List(); + + var request = new GenerateContentRequest() + { + Contents = { mergedMessages }, + SystemInstruction = systemMessage, + Model = this.model, + GenerationConfig = new GenerationConfig + { + StopSequences = { options?.StopSequence ?? Enumerable.Empty() }, + ResponseMimeType = this.responseMimeType, + CandidateCount = 1, + }, + }; + + if (this.toolConfig is not null) + { + request.ToolConfig = this.toolConfig; + } + + if (this.safetySettings is not null) + { + request.SafetySettings.Add(this.safetySettings); + } + + if (options?.MaxToken.HasValue is true) + { + request.GenerationConfig.MaxOutputTokens = options.MaxToken.Value; + } + + if (options?.Temperature.HasValue is true) + { + request.GenerationConfig.Temperature = options.Temperature.Value; + } + + if (options?.Functions is { Length: > 0 }) + { + foreach (var function in options.Functions) + { + tools.Add(new Tool + { + FunctionDeclarations = { function.ToFunctionDeclaration() }, + }); + } + } + + // merge tools into one tool + // because multipe tools are currently not supported by Gemini + // see https://github.com/googleapis/python-aiplatform/issues/3771 + var aggregatedTool = new Tool + { + FunctionDeclarations = { tools.SelectMany(t => t.FunctionDeclarations) }, + }; + + if (aggregatedTool is { FunctionDeclarations: { Count: > 0 } }) + { + request.Tools.Add(aggregatedTool); + } + + return request; + } +} diff --git a/dotnet/src/AutoGen.Gemini/GoogleGeminiClient.cs b/dotnet/src/AutoGen.Gemini/GoogleGeminiClient.cs new file mode 100644 index 00000000000..9489061e27e --- /dev/null +++ b/dotnet/src/AutoGen.Gemini/GoogleGeminiClient.cs @@ -0,0 +1,83 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// GoogleGeminiClient.cs + +using System; +using System.Collections.Generic; +using System.Net.Http; +using System.Threading; +using System.Threading.Tasks; +using Google.Cloud.AIPlatform.V1; +using Google.Protobuf; + +namespace AutoGen.Gemini; + +public class GoogleGeminiClient : IGeminiClient +{ + private readonly string apiKey; + private const string endpoint = "https://generativelanguage.googleapis.com/v1beta"; + private readonly HttpClient httpClient = new(); + private const string generateContentPath = "models/{0}:generateContent"; + private const string generateContentStreamPath = "models/{0}:streamGenerateContent"; + + public GoogleGeminiClient(HttpClient httpClient, string apiKey) + { + this.apiKey = apiKey; + this.httpClient = httpClient; + } + + public GoogleGeminiClient(string apiKey) + { + this.apiKey = apiKey; + } + + public async Task GenerateContentAsync(GenerateContentRequest request, CancellationToken cancellationToken = default) + { + var path = string.Format(generateContentPath, request.Model); + var url = $"{endpoint}/{path}?key={apiKey}"; + + var httpContent = new StringContent(JsonFormatter.Default.Format(request), System.Text.Encoding.UTF8, "application/json"); + var response = await httpClient.PostAsync(url, httpContent, cancellationToken); + + if (!response.IsSuccessStatusCode) + { + throw new Exception($"Failed to generate content. Status code: {response.StatusCode}"); + } + + var json = await response.Content.ReadAsStringAsync(); + return GenerateContentResponse.Parser.ParseJson(json); + } + + public async IAsyncEnumerable GenerateContentStreamAsync(GenerateContentRequest request) + { + var path = string.Format(generateContentStreamPath, request.Model); + var url = $"{endpoint}/{path}?key={apiKey}&alt=sse"; + + var httpContent = new StringContent(JsonFormatter.Default.Format(request), System.Text.Encoding.UTF8, "application/json"); + var requestMessage = new HttpRequestMessage(HttpMethod.Post, url) + { + Content = httpContent + }; + + var response = await httpClient.SendAsync(requestMessage, HttpCompletionOption.ResponseHeadersRead); + + if (!response.IsSuccessStatusCode) + { + throw new Exception($"Failed to generate content. Status code: {response.StatusCode}"); + } + + var stream = await response.Content.ReadAsStreamAsync(); + var jp = new JsonParser(JsonParser.Settings.Default.WithIgnoreUnknownFields(true)); + using var streamReader = new System.IO.StreamReader(stream); + while (!streamReader.EndOfStream) + { + var json = await streamReader.ReadLineAsync(); + if (string.IsNullOrWhiteSpace(json)) + { + continue; + } + + json = json.Substring("data:".Length).Trim(); + yield return jp.Parse(json); + } + } +} diff --git a/dotnet/src/AutoGen.Gemini/IGeminiClient.cs b/dotnet/src/AutoGen.Gemini/IGeminiClient.cs new file mode 100644 index 00000000000..2e209e02b03 --- /dev/null +++ b/dotnet/src/AutoGen.Gemini/IGeminiClient.cs @@ -0,0 +1,15 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// IVertexGeminiClient.cs + +using System.Collections.Generic; +using System.Threading; +using System.Threading.Tasks; +using Google.Cloud.AIPlatform.V1; + +namespace AutoGen.Gemini; + +public interface IGeminiClient +{ + Task GenerateContentAsync(GenerateContentRequest request, CancellationToken cancellationToken = default); + IAsyncEnumerable GenerateContentStreamAsync(GenerateContentRequest request); +} diff --git a/dotnet/src/AutoGen.Gemini/Middleware/GeminiAgentExtension.cs b/dotnet/src/AutoGen.Gemini/Middleware/GeminiAgentExtension.cs new file mode 100644 index 00000000000..8718d54f960 --- /dev/null +++ b/dotnet/src/AutoGen.Gemini/Middleware/GeminiAgentExtension.cs @@ -0,0 +1,40 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// GeminiAgentExtension.cs + +using AutoGen.Core; + +namespace AutoGen.Gemini.Middleware; + +public static class GeminiAgentExtension +{ + + /// + /// Register an to the + /// + /// the connector to use. If null, a new instance of will be created. + public static MiddlewareStreamingAgent RegisterMessageConnector( + this GeminiChatAgent agent, GeminiMessageConnector? connector = null) + { + if (connector == null) + { + connector = new GeminiMessageConnector(); + } + + return agent.RegisterStreamingMiddleware(connector); + } + + /// + /// Register an to the where T is + /// + /// the connector to use. If null, a new instance of will be created. + public static MiddlewareStreamingAgent RegisterMessageConnector( + this MiddlewareStreamingAgent agent, GeminiMessageConnector? connector = null) + { + if (connector == null) + { + connector = new GeminiMessageConnector(); + } + + return agent.RegisterStreamingMiddleware(connector); + } +} diff --git a/dotnet/src/AutoGen.Gemini/Middleware/GeminiMessageConnector.cs b/dotnet/src/AutoGen.Gemini/Middleware/GeminiMessageConnector.cs new file mode 100644 index 00000000000..35008ebf00c --- /dev/null +++ b/dotnet/src/AutoGen.Gemini/Middleware/GeminiMessageConnector.cs @@ -0,0 +1,483 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// GeminiMessageConnector.cs + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Text.Json; +using System.Text.Json.Nodes; +using System.Threading; +using System.Threading.Tasks; +using AutoGen.Core; +using Google.Cloud.AIPlatform.V1; +using Google.Protobuf; +using Google.Protobuf.WellKnownTypes; +using static Google.Cloud.AIPlatform.V1.Candidate.Types; +using IMessage = AutoGen.Core.IMessage; + +namespace AutoGen.Gemini.Middleware; + +public class GeminiMessageConnector : IStreamingMiddleware +{ + /// + /// if true, the connector will throw an exception if it encounters an unsupport message type. + /// Otherwise, it will ignore processing the message and return the message as is. + /// + private readonly bool strictMode; + + /// + /// Initializes a new instance of the class. + /// + /// whether to throw an exception if it encounters an unsupport message type. + /// If true, the connector will throw an exception if it encounters an unsupport message type. + /// If false, it will ignore processing the message and return the message as is. + public GeminiMessageConnector(bool strictMode = false) + { + this.strictMode = strictMode; + } + + public string Name => nameof(GeminiMessageConnector); + + public async IAsyncEnumerable InvokeAsync(MiddlewareContext context, IStreamingAgent agent, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + var messages = ProcessMessage(context.Messages, agent); + + var bucket = new List(); + + await foreach (var reply in agent.GenerateStreamingReplyAsync(messages, context.Options, cancellationToken)) + { + if (reply is Core.IMessage m) + { + // if m.Content is empty and stop reason is Stop, ignore the message + if (m.Content.Candidates.Count == 1 && m.Content.Candidates[0].Content.Parts.Count == 1 && m.Content.Candidates[0].Content.Parts[0].DataCase == Part.DataOneofCase.Text) + { + var text = m.Content.Candidates[0].Content.Parts[0].Text; + var stopReason = m.Content.Candidates[0].FinishReason; + if (string.IsNullOrEmpty(text) && stopReason == FinishReason.Stop) + { + continue; + } + } + + bucket.Add(m.Content); + + yield return PostProcessStreamingMessage(m.Content, agent); + } + else if (strictMode) + { + throw new InvalidOperationException($"Unsupported message type: {reply.GetType()}"); + } + else + { + yield return reply; + } + + // aggregate the message updates from bucket into a single message + if (bucket is { Count: > 0 }) + { + var isTextMessageUpdates = bucket.All(m => m.Candidates.Count == 1 && m.Candidates[0].Content.Parts.Count == 1 && m.Candidates[0].Content.Parts[0].DataCase == Part.DataOneofCase.Text); + var isFunctionCallUpdates = bucket.Any(m => m.Candidates.Count == 1 && m.Candidates[0].Content.Parts.Count == 1 && m.Candidates[0].Content.Parts[0].DataCase == Part.DataOneofCase.FunctionCall); + if (isTextMessageUpdates) + { + var text = string.Join(string.Empty, bucket.Select(m => m.Candidates[0].Content.Parts[0].Text)); + var textMessage = new TextMessage(Role.Assistant, text, agent.Name); + + yield return textMessage; + } + else if (isFunctionCallUpdates) + { + var functionCallParts = bucket.Where(m => m.Candidates.Count == 1 && m.Candidates[0].Content.Parts.Count == 1 && m.Candidates[0].Content.Parts[0].DataCase == Part.DataOneofCase.FunctionCall) + .Select(m => m.Candidates[0].Content.Parts[0]).ToList(); + + var toolCalls = new List(); + foreach (var part in functionCallParts) + { + var fc = part.FunctionCall; + var toolCall = new ToolCall(fc.Name, fc.Args.ToString()); + + toolCalls.Add(toolCall); + } + + var toolCallMessage = new ToolCallMessage(toolCalls, agent.Name); + + yield return toolCallMessage; + } + else + { + throw new InvalidOperationException("The response should contain either text or tool calls."); + } + } + } + } + + public async Task InvokeAsync(MiddlewareContext context, IAgent agent, CancellationToken cancellationToken = default) + { + var messages = ProcessMessage(context.Messages, agent); + var reply = await agent.GenerateReplyAsync(messages, context.Options, cancellationToken); + + return reply switch + { + Core.IMessage m => PostProcessMessage(m.Content, agent), + _ when strictMode => throw new InvalidOperationException($"Unsupported message type: {reply.GetType()}"), + _ => reply, + }; + } + + private IMessage PostProcessStreamingMessage(GenerateContentResponse m, IAgent agent) + { + this.ValidateGenerateContentResponse(m); + + var candidate = m.Candidates[0]; + var parts = candidate.Content.Parts; + + if (parts.Count == 1 && parts[0].DataCase == Part.DataOneofCase.Text) + { + var content = parts[0].Text; + return new TextMessageUpdate(Role.Assistant, content, agent.Name); + } + else + { + var toolCalls = new List(); + foreach (var part in parts) + { + if (part.DataCase == Part.DataOneofCase.FunctionCall) + { + var fc = part.FunctionCall; + var toolCall = new ToolCall(fc.Name, fc.Args.ToString()); + + toolCalls.Add(toolCall); + } + } + + if (toolCalls.Count > 0) + { + var toolCallMessage = new ToolCallMessage(toolCalls, agent.Name); + return toolCallMessage; + } + else + { + throw new InvalidOperationException("The response should contain either text or tool calls."); + } + } + } + + private IMessage PostProcessMessage(GenerateContentResponse m, IAgent agent) + { + this.ValidateGenerateContentResponse(m); + var candidate = m.Candidates[0]; + var parts = candidate.Content.Parts; + + if (parts.Count == 1 && parts[0].DataCase == Part.DataOneofCase.Text) + { + var content = parts[0].Text; + return new TextMessage(Role.Assistant, content, agent.Name); + } + else + { + var toolCalls = new List(); + foreach (var part in parts) + { + if (part.DataCase == Part.DataOneofCase.FunctionCall) + { + var fc = part.FunctionCall; + var toolCall = new ToolCall(fc.Name, fc.Args.ToString()); + + toolCalls.Add(toolCall); + } + } + + if (toolCalls.Count > 0) + { + var toolCallMessage = new ToolCallMessage(toolCalls, agent.Name); + return toolCallMessage; + } + else + { + throw new InvalidOperationException("The response should contain either text or tool calls."); + } + } + } + + private IEnumerable ProcessMessage(IEnumerable messages, IAgent agent) + { + return messages.SelectMany(m => + { + if (m is Core.IMessage messageEnvelope) + { + return [m]; + } + else + { + return m switch + { + TextMessage textMessage => ProcessTextMessage(textMessage, agent), + ImageMessage imageMessage => ProcessImageMessage(imageMessage, agent), + MultiModalMessage multiModalMessage => ProcessMultiModalMessage(multiModalMessage, agent), + ToolCallMessage toolCallMessage => ProcessToolCallMessage(toolCallMessage, agent), + ToolCallResultMessage toolCallResultMessage => ProcessToolCallResultMessage(toolCallResultMessage, agent), + ToolCallAggregateMessage toolCallAggregateMessage => ProcessToolCallAggregateMessage(toolCallAggregateMessage, agent), + _ when strictMode => throw new InvalidOperationException($"Unsupported message type: {m.GetType()}"), + _ => [m], + }; + } + }); + } + + private IEnumerable ProcessToolCallAggregateMessage(ToolCallAggregateMessage toolCallAggregateMessage, IAgent agent) + { + var parseAsUser = ShouldParseAsUser(toolCallAggregateMessage, agent); + if (parseAsUser) + { + var content = toolCallAggregateMessage.GetContent(); + + if (content is string str) + { + var textMessage = new TextMessage(Role.User, str, toolCallAggregateMessage.From); + + return ProcessTextMessage(textMessage, agent); + } + + return []; + } + else + { + var toolCallContents = ProcessToolCallMessage(toolCallAggregateMessage.Message1, agent); + var toolCallResultContents = ProcessToolCallResultMessage(toolCallAggregateMessage.Message2, agent); + + return toolCallContents.Concat(toolCallResultContents); + } + } + + private void ValidateGenerateContentResponse(GenerateContentResponse response) + { + if (response.Candidates.Count != 1) + { + throw new InvalidOperationException("The response should contain exactly one candidate."); + } + + var candidate = response.Candidates[0]; + if (candidate.Content is null) + { + var finishReason = candidate.FinishReason; + var finishMessage = candidate.FinishMessage; + + throw new InvalidOperationException($"The response should contain content but the content is empty. FinishReason: {finishReason}, FinishMessage: {finishMessage}"); + } + } + + private IEnumerable ProcessToolCallResultMessage(ToolCallResultMessage toolCallResultMessage, IAgent agent) + { + var functionCallResultParts = new List(); + foreach (var toolCallResult in toolCallResultMessage.ToolCalls) + { + if (toolCallResult.Result is null) + { + continue; + } + + // if result is already a json object, use it as is + var json = toolCallResult.Result; + try + { + JsonNode.Parse(json); + } + catch (JsonException) + { + // if the result is not a json object, wrap it in a json object + var result = new { result = json }; + json = JsonSerializer.Serialize(result); + } + var part = new Part + { + FunctionResponse = new FunctionResponse + { + Name = toolCallResult.FunctionName, + Response = Struct.Parser.ParseJson(json), + } + }; + + functionCallResultParts.Add(part); + } + + var content = new Content + { + Parts = { functionCallResultParts }, + Role = "function", + }; + + return [MessageEnvelope.Create(content, toolCallResultMessage.From)]; + } + + private IEnumerable ProcessToolCallMessage(ToolCallMessage toolCallMessage, IAgent agent) + { + var shouldParseAsUser = ShouldParseAsUser(toolCallMessage, agent); + if (strictMode && shouldParseAsUser) + { + throw new InvalidOperationException("ToolCallMessage is not supported as user role in Gemini."); + } + + var functionCallParts = new List(); + foreach (var toolCall in toolCallMessage.ToolCalls) + { + var part = new Part + { + FunctionCall = new FunctionCall + { + Name = toolCall.FunctionName, + Args = Struct.Parser.ParseJson(toolCall.FunctionArguments), + } + }; + + functionCallParts.Add(part); + } + var content = new Content + { + Parts = { functionCallParts }, + Role = "model" + }; + + return [MessageEnvelope.Create(content, toolCallMessage.From)]; + } + + private IEnumerable ProcessMultiModalMessage(MultiModalMessage multiModalMessage, IAgent agent) + { + var parts = new List(); + foreach (var message in multiModalMessage.Content) + { + if (message is TextMessage textMessage) + { + parts.Add(new Part { Text = textMessage.Content }); + } + else if (message is ImageMessage imageMessage) + { + parts.Add(CreateImagePart(imageMessage)); + } + else + { + throw new InvalidOperationException($"Unsupported message type: {message.GetType()}"); + } + } + + var shouldParseAsUser = ShouldParseAsUser(multiModalMessage, agent); + + if (strictMode && !shouldParseAsUser) + { + // image message is not supported as model role in Gemini + throw new InvalidOperationException("Image message is not supported as model role in Gemini."); + } + + var content = new Content + { + Parts = { parts }, + Role = shouldParseAsUser ? "user" : "model", + }; + + return [MessageEnvelope.Create(content, multiModalMessage.From)]; + } + + private IEnumerable ProcessTextMessage(TextMessage textMessage, IAgent agent) + { + if (textMessage.Role == Role.System) + { + // there are only user | model role in Gemini + // if the role is system and the strict mode is enabled, throw an exception + if (strictMode) + { + throw new InvalidOperationException("System role is not supported in Gemini."); + } + + // if strict mode is not enabled, parse the message as a user message + var content = new Content + { + Parts = { new[] { new Part { Text = textMessage.Content } } }, + Role = "user", + }; + + return [MessageEnvelope.Create(content, textMessage.From)]; + } + + var shouldParseAsUser = ShouldParseAsUser(textMessage, agent); + + if (shouldParseAsUser) + { + var content = new Content + { + Parts = { new[] { new Part { Text = textMessage.Content } } }, + Role = "user", + }; + + return [MessageEnvelope.Create(content, textMessage.From)]; + } + else + { + var content = new Content + { + Parts = { new[] { new Part { Text = textMessage.Content } } }, + Role = "model", + }; + + return [MessageEnvelope.Create(content, textMessage.From)]; + } + } + + private IEnumerable ProcessImageMessage(ImageMessage imageMessage, IAgent agent) + { + var imagePart = CreateImagePart(imageMessage); + var shouldParseAsUser = ShouldParseAsUser(imageMessage, agent); + + if (strictMode && !shouldParseAsUser) + { + // image message is not supported as model role in Gemini + throw new InvalidOperationException("Image message is not supported as model role in Gemini."); + } + + var content = new Content + { + Parts = { imagePart }, + Role = shouldParseAsUser ? "user" : "model", + }; + + return [MessageEnvelope.Create(content, imageMessage.From)]; + } + + private Part CreateImagePart(ImageMessage message) + { + if (message.Url is string url) + { + return new Part + { + FileData = new FileData + { + FileUri = url, + MimeType = message.MimeType + } + }; + } + else if (message.Data is BinaryData data) + { + return new Part + { + InlineData = new Blob + { + MimeType = message.MimeType, + Data = ByteString.CopyFrom(data.ToArray()), + } + }; + } + else + { + throw new InvalidOperationException("Invalid ImageMessage, the data or url must be provided"); + } + } + + private bool ShouldParseAsUser(IMessage message, IAgent agent) + { + return message switch + { + TextMessage textMessage => (textMessage.Role == Role.User && textMessage.From is null) + || (textMessage.From != agent.Name), + _ => message.From != agent.Name, + }; + } +} diff --git a/dotnet/src/AutoGen.Gemini/VertexGeminiClient.cs b/dotnet/src/AutoGen.Gemini/VertexGeminiClient.cs new file mode 100644 index 00000000000..c54f2280dfd --- /dev/null +++ b/dotnet/src/AutoGen.Gemini/VertexGeminiClient.cs @@ -0,0 +1,38 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// IGeminiClient.cs + +using System.Collections.Generic; +using System.Threading; +using System.Threading.Tasks; +using Google.Cloud.AIPlatform.V1; + +namespace AutoGen.Gemini; + +internal class VertexGeminiClient : IGeminiClient +{ + private readonly PredictionServiceClient client; + public VertexGeminiClient(PredictionServiceClient client) + { + this.client = client; + } + + public VertexGeminiClient(string location) + { + PredictionServiceClientBuilder builder = new() + { + Endpoint = $"{location}-aiplatform.googleapis.com", + }; + + this.client = builder.Build(); + } + + public Task GenerateContentAsync(GenerateContentRequest request, CancellationToken cancellationToken = default) + { + return client.GenerateContentAsync(request, cancellationToken); + } + + public IAsyncEnumerable GenerateContentStreamAsync(GenerateContentRequest request) + { + return client.StreamGenerateContent(request).GetResponseStream(); + } +} diff --git a/dotnet/test/AutoGen.Anthropic.Tests/AnthropicClientAgentTest.cs b/dotnet/test/AutoGen.Anthropic.Tests/AnthropicClientAgentTest.cs index ba31f2297ba..d29025b44af 100644 --- a/dotnet/test/AutoGen.Anthropic.Tests/AnthropicClientAgentTest.cs +++ b/dotnet/test/AutoGen.Anthropic.Tests/AnthropicClientAgentTest.cs @@ -1,31 +1,108 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // AnthropicClientAgentTest.cs +using AutoGen.Anthropic.DTO; using AutoGen.Anthropic.Extensions; using AutoGen.Anthropic.Utils; +using AutoGen.Core; using AutoGen.Tests; -using Xunit.Abstractions; +using FluentAssertions; -namespace AutoGen.Anthropic; +namespace AutoGen.Anthropic.Tests; public class AnthropicClientAgentTest { - private readonly ITestOutputHelper _output; - - public AnthropicClientAgentTest(ITestOutputHelper output) => _output = output; - [ApiKeyFact("ANTHROPIC_API_KEY")] public async Task AnthropicAgentChatCompletionTestAsync() { var client = new AnthropicClient(new HttpClient(), AnthropicConstants.Endpoint, AnthropicTestUtils.ApiKey); + var agent = new AnthropicClientAgent( + client, + name: "AnthropicAgent", + AnthropicConstants.Claude3Haiku, + systemMessage: "You are a helpful AI assistant that convert user message to upper case") + .RegisterMessageConnector(); + + var uppCaseMessage = new TextMessage(Role.User, "abcdefg"); + + var reply = await agent.SendAsync(chatHistory: new[] { uppCaseMessage }); + + reply.GetContent().Should().Contain("ABCDEFG"); + reply.From.Should().Be(agent.Name); + } + + [ApiKeyFact("ANTHROPIC_API_KEY")] + public async Task AnthropicAgentTestProcessImageAsync() + { + var client = new AnthropicClient(new HttpClient(), AnthropicConstants.Endpoint, AnthropicTestUtils.ApiKey); var agent = new AnthropicClientAgent( client, name: "AnthropicAgent", AnthropicConstants.Claude3Haiku).RegisterMessageConnector(); - var singleAgentTest = new SingleAgentTest(_output); - await singleAgentTest.UpperCaseTestAsync(agent); - await singleAgentTest.UpperCaseStreamingTestAsync(agent); + var base64Image = await AnthropicTestUtils.Base64FromImageAsync("square.png"); + var imageMessage = new ChatMessage("user", + [new ImageContent { Source = new ImageSource { MediaType = "image/png", Data = base64Image } }]); + + var messages = new IMessage[] { MessageEnvelope.Create(imageMessage) }; + + // test streaming + foreach (var message in messages) + { + var reply = agent.GenerateStreamingReplyAsync([message]); + + await foreach (var streamingMessage in reply) + { + streamingMessage.Should().BeOfType(); + streamingMessage.As().From.Should().Be(agent.Name); + } + } + } + + [ApiKeyFact("ANTHROPIC_API_KEY")] + public async Task AnthropicAgentTestMultiModalAsync() + { + var client = new AnthropicClient(new HttpClient(), AnthropicConstants.Endpoint, AnthropicTestUtils.ApiKey); + var agent = new AnthropicClientAgent( + client, + name: "AnthropicAgent", + AnthropicConstants.Claude3Haiku) + .RegisterMessageConnector(); + + var image = Path.Combine("images", "square.png"); + var binaryData = BinaryData.FromBytes(await File.ReadAllBytesAsync(image), "image/png"); + var imageMessage = new ImageMessage(Role.User, binaryData); + var textMessage = new TextMessage(Role.User, "What's in this image?"); + var multiModalMessage = new MultiModalMessage(Role.User, [textMessage, imageMessage]); + + var reply = await agent.SendAsync(multiModalMessage); + reply.Should().BeOfType(); + reply.GetRole().Should().Be(Role.Assistant); + reply.GetContent().Should().NotBeNullOrEmpty(); + reply.From.Should().Be(agent.Name); + } + + [ApiKeyFact("ANTHROPIC_API_KEY")] + public async Task AnthropicAgentTestImageMessageAsync() + { + var client = new AnthropicClient(new HttpClient(), AnthropicConstants.Endpoint, AnthropicTestUtils.ApiKey); + var agent = new AnthropicClientAgent( + client, + name: "AnthropicAgent", + AnthropicConstants.Claude3Haiku, + systemMessage: "You are a helpful AI assistant that is capable of determining what an image is. Tell me a brief description of the image." + ) + .RegisterMessageConnector(); + + var image = Path.Combine("images", "square.png"); + var binaryData = BinaryData.FromBytes(await File.ReadAllBytesAsync(image), "image/png"); + var imageMessage = new ImageMessage(Role.User, binaryData); + + var reply = await agent.SendAsync(imageMessage); + reply.Should().BeOfType(); + reply.GetRole().Should().Be(Role.Assistant); + reply.GetContent().Should().NotBeNullOrEmpty(); + reply.From.Should().Be(agent.Name); } } diff --git a/dotnet/test/AutoGen.Anthropic.Tests/AnthropicClientTest.cs b/dotnet/test/AutoGen.Anthropic.Tests/AnthropicClientTest.cs index 0b64c9e4e3c..a0b1f60cfb9 100644 --- a/dotnet/test/AutoGen.Anthropic.Tests/AnthropicClientTest.cs +++ b/dotnet/test/AutoGen.Anthropic.Tests/AnthropicClientTest.cs @@ -1,4 +1,4 @@ -using System.Text; +using System.Text; using System.Text.Json; using System.Text.Json.Serialization; using AutoGen.Anthropic.DTO; @@ -7,7 +7,7 @@ using FluentAssertions; using Xunit; -namespace AutoGen.Anthropic; +namespace AutoGen.Anthropic.Tests; public class AnthropicClientTests { @@ -43,7 +43,7 @@ public async Task AnthropicClientStreamingChatCompletionTestAsync() request.Model = AnthropicConstants.Claude3Haiku; request.Stream = true; request.MaxTokens = 500; - request.SystemMessage = "You are a helpful assistant that convert input to json object"; + request.SystemMessage = "You are a helpful assistant that convert input to json object, use JSON format."; request.Messages = new List() { new("user", "name: John, age: 41, email: g123456@gmail.com") @@ -73,6 +73,41 @@ public async Task AnthropicClientStreamingChatCompletionTestAsync() results.First().streamingMessage!.Role.Should().Be("assistant"); } + [ApiKeyFact("ANTHROPIC_API_KEY")] + public async Task AnthropicClientImageChatCompletionTestAsync() + { + var anthropicClient = new AnthropicClient(new HttpClient(), AnthropicConstants.Endpoint, AnthropicTestUtils.ApiKey); + + var request = new ChatCompletionRequest(); + request.Model = AnthropicConstants.Claude3Haiku; + request.Stream = false; + request.MaxTokens = 100; + request.SystemMessage = "You are a LLM that is suppose to describe the content of the image. Give me a description of the provided image."; + + var base64Image = await AnthropicTestUtils.Base64FromImageAsync("square.png"); + var messages = new List + { + new("user", + [ + new ImageContent { Source = new ImageSource {MediaType = "image/png", Data = base64Image} } + ]) + }; + + request.Messages = messages; + + var response = await anthropicClient.CreateChatCompletionsAsync(request, CancellationToken.None); + + Assert.NotNull(response); + Assert.NotNull(response.Content); + Assert.NotEmpty(response.Content); + response.Content.Count.Should().Be(1); + response.Content.First().Should().BeOfType(); + var textContent = (TextContent)response.Content.First(); + Assert.Equal("text", textContent.Type); + Assert.NotNull(response.Usage); + response.Usage.OutputTokens.Should().BeGreaterThan(0); + } + private sealed class Person { [JsonPropertyName("name")] diff --git a/dotnet/test/AutoGen.Anthropic.Tests/AnthropicTestUtils.cs b/dotnet/test/AutoGen.Anthropic.Tests/AnthropicTestUtils.cs index a5b80eee3bd..de630da6d87 100644 --- a/dotnet/test/AutoGen.Anthropic.Tests/AnthropicTestUtils.cs +++ b/dotnet/test/AutoGen.Anthropic.Tests/AnthropicTestUtils.cs @@ -1,10 +1,16 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // AnthropicTestUtils.cs -namespace AutoGen.Anthropic; +namespace AutoGen.Anthropic.Tests; public static class AnthropicTestUtils { public static string ApiKey => Environment.GetEnvironmentVariable("ANTHROPIC_API_KEY") ?? throw new Exception("Please set ANTHROPIC_API_KEY environment variable."); + + public static async Task Base64FromImageAsync(string imageName) + { + return Convert.ToBase64String( + await File.ReadAllBytesAsync(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "images", imageName))); + } } diff --git a/dotnet/test/AutoGen.Anthropic.Tests/AutoGen.Anthropic.Tests.csproj b/dotnet/test/AutoGen.Anthropic.Tests/AutoGen.Anthropic.Tests.csproj index 8cd1e3003b0..0f22d9fe676 100644 --- a/dotnet/test/AutoGen.Anthropic.Tests/AutoGen.Anthropic.Tests.csproj +++ b/dotnet/test/AutoGen.Anthropic.Tests/AutoGen.Anthropic.Tests.csproj @@ -6,18 +6,17 @@ false True AutoGen.Anthropic.Tests + True - - - - - + + - - + + PreserveNewest + diff --git a/dotnet/test/AutoGen.Anthropic.Tests/images/.gitattributes b/dotnet/test/AutoGen.Anthropic.Tests/images/.gitattributes new file mode 100644 index 00000000000..56e7c34d498 --- /dev/null +++ b/dotnet/test/AutoGen.Anthropic.Tests/images/.gitattributes @@ -0,0 +1 @@ +square.png filter=lfs diff=lfs merge=lfs -text diff --git a/dotnet/test/AutoGen.Anthropic.Tests/images/square.png b/dotnet/test/AutoGen.Anthropic.Tests/images/square.png new file mode 100644 index 00000000000..5c2b3ed820b --- /dev/null +++ b/dotnet/test/AutoGen.Anthropic.Tests/images/square.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8341030e5b93aab2c55dcd40ffa26ced8e42cc15736a8348176ffd155ad2d937 +size 8167 diff --git a/dotnet/test/AutoGen.DotnetInteractive.Tests/AutoGen.DotnetInteractive.Tests.csproj b/dotnet/test/AutoGen.DotnetInteractive.Tests/AutoGen.DotnetInteractive.Tests.csproj index cf2c24eaf78..0f77db2c1c3 100644 --- a/dotnet/test/AutoGen.DotnetInteractive.Tests/AutoGen.DotnetInteractive.Tests.csproj +++ b/dotnet/test/AutoGen.DotnetInteractive.Tests/AutoGen.DotnetInteractive.Tests.csproj @@ -4,18 +4,10 @@ $(TestTargetFramework) enable false + True True - - - - - - - - - diff --git a/dotnet/test/AutoGen.Gemini.Tests/ApprovalTests/FunctionContractExtensionTests.ItGenerateGetWeatherToolTest.approved.txt b/dotnet/test/AutoGen.Gemini.Tests/ApprovalTests/FunctionContractExtensionTests.ItGenerateGetWeatherToolTest.approved.txt new file mode 100644 index 00000000000..d7ec585cb20 --- /dev/null +++ b/dotnet/test/AutoGen.Gemini.Tests/ApprovalTests/FunctionContractExtensionTests.ItGenerateGetWeatherToolTest.approved.txt @@ -0,0 +1,17 @@ +{ + "name": "GetWeatherAsync", + "description": "Get weather for a city.", + "parameters": { + "type": "OBJECT", + "properties": { + "city": { + "type": "STRING", + "description": "city", + "title": "city" +} + }, + "required": [ + "city" + ] + } +} \ No newline at end of file diff --git a/dotnet/test/AutoGen.Gemini.Tests/AutoGen.Gemini.Tests.csproj b/dotnet/test/AutoGen.Gemini.Tests/AutoGen.Gemini.Tests.csproj new file mode 100644 index 00000000000..f4fb55825e5 --- /dev/null +++ b/dotnet/test/AutoGen.Gemini.Tests/AutoGen.Gemini.Tests.csproj @@ -0,0 +1,19 @@ + + + + Exe + $(TestTargetFramework) + enable + enable + True + True + + + + + + + + + + diff --git a/dotnet/test/AutoGen.Gemini.Tests/FunctionContractExtensionTests.cs b/dotnet/test/AutoGen.Gemini.Tests/FunctionContractExtensionTests.cs new file mode 100644 index 00000000000..51d799acc22 --- /dev/null +++ b/dotnet/test/AutoGen.Gemini.Tests/FunctionContractExtensionTests.cs @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// FunctionContractExtensionTests.cs + +using ApprovalTests; +using ApprovalTests.Namers; +using ApprovalTests.Reporters; +using AutoGen.Gemini.Extension; +using Google.Protobuf; +using Xunit; + +namespace AutoGen.Gemini.Tests; + +public class FunctionContractExtensionTests +{ + private readonly Functions functions = new Functions(); + [Fact] + [UseReporter(typeof(DiffReporter))] + [UseApprovalSubdirectory("ApprovalTests")] + public void ItGenerateGetWeatherToolTest() + { + var contract = functions.GetWeatherAsyncFunctionContract; + var tool = contract.ToFunctionDeclaration(); + var formatter = new JsonFormatter(JsonFormatter.Settings.Default.WithIndentation(" ")); + var json = formatter.Format(tool); + Approvals.Verify(json); + } +} diff --git a/dotnet/test/AutoGen.Gemini.Tests/Functions.cs b/dotnet/test/AutoGen.Gemini.Tests/Functions.cs new file mode 100644 index 00000000000..e3e07ee633f --- /dev/null +++ b/dotnet/test/AutoGen.Gemini.Tests/Functions.cs @@ -0,0 +1,28 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Functions.cs + +using AutoGen.Core; + +namespace AutoGen.Gemini.Tests; + +public partial class Functions +{ + /// + /// Get weather for a city. + /// + /// city + /// weather + [Function] + public async Task GetWeatherAsync(string city) + { + return await Task.FromResult($"The weather in {city} is sunny."); + } + + [Function] + public async Task GetMovies(string location, string description) + { + var movies = new List { "Barbie", "Spiderman", "Batman" }; + + return await Task.FromResult($"Movies playing in {location} based on {description} are: {string.Join(", ", movies)}"); + } +} diff --git a/dotnet/test/AutoGen.Gemini.Tests/GeminiAgentTests.cs b/dotnet/test/AutoGen.Gemini.Tests/GeminiAgentTests.cs new file mode 100644 index 00000000000..220492d6457 --- /dev/null +++ b/dotnet/test/AutoGen.Gemini.Tests/GeminiAgentTests.cs @@ -0,0 +1,311 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// GeminiAgentTests.cs + +using AutoGen.Tests; +using Google.Cloud.AIPlatform.V1; +using AutoGen.Core; +using FluentAssertions; +using AutoGen.Gemini.Extension; +using static Google.Cloud.AIPlatform.V1.Part; +using Xunit.Abstractions; +using AutoGen.Gemini.Middleware; +namespace AutoGen.Gemini.Tests; + +public class GeminiAgentTests +{ + private readonly Functions functions = new Functions(); + private readonly ITestOutputHelper _output; + + public GeminiAgentTests(ITestOutputHelper output) + { + _output = output; + } + + [ApiKeyFact("GCP_VERTEX_PROJECT_ID")] + public async Task VertexGeminiAgentGenerateReplyForTextContentAsync() + { + var location = "us-central1"; + var project = Environment.GetEnvironmentVariable("GCP_VERTEX_PROJECT_ID") ?? throw new InvalidOperationException("GCP_VERTEX_PROJECT_ID is not set."); + var model = "gemini-1.5-flash-001"; + + var textContent = new Content + { + Role = "user", + Parts = + { + new Part + { + Text = "Hello", + } + } + }; + + var agent = new GeminiChatAgent( + name: "assistant", + model: model, + project: project, + location: location, + systemMessage: "You are a helpful AI assistant"); + var message = MessageEnvelope.Create(textContent, from: agent.Name); + + var completion = await agent.SendAsync(message); + + completion.Should().BeOfType>(); + completion.From.Should().Be(agent.Name); + + var response = ((MessageEnvelope)completion).Content; + response.Should().NotBeNull(); + response.Candidates.Count.Should().BeGreaterThan(0); + response.Candidates[0].Content.Parts[0].Text.Should().NotBeNullOrEmpty(); + } + + [ApiKeyFact("GCP_VERTEX_PROJECT_ID")] + public async Task VertexGeminiAgentGenerateStreamingReplyForTextContentAsync() + { + var location = "us-central1"; + var project = Environment.GetEnvironmentVariable("GCP_VERTEX_PROJECT_ID") ?? throw new InvalidOperationException("GCP_VERTEX_PROJECT_ID is not set."); + var model = "gemini-1.5-flash-001"; + + var textContent = new Content + { + Role = "user", + Parts = + { + new Part + { + Text = "Hello", + } + } + }; + + var agent = new GeminiChatAgent( + name: "assistant", + model: model, + project: project, + location: location, + systemMessage: "You are a helpful AI assistant"); + var message = MessageEnvelope.Create(textContent, from: agent.Name); + + var completion = agent.GenerateStreamingReplyAsync([message]); + var chunks = new List(); + IStreamingMessage finalReply = null!; + + await foreach (var item in completion) + { + item.Should().NotBeNull(); + item.From.Should().Be(agent.Name); + var streamingMessage = (IMessage)item; + streamingMessage.Content.Candidates.Should().NotBeNullOrEmpty(); + chunks.Add(item); + finalReply = item; + } + + chunks.Count.Should().BeGreaterThan(0); + finalReply.Should().NotBeNull(); + finalReply.Should().BeOfType>(); + var response = ((MessageEnvelope)finalReply).Content; + response.UsageMetadata.CandidatesTokenCount.Should().BeGreaterThan(0); + } + + [ApiKeyFact("GCP_VERTEX_PROJECT_ID")] + public async Task VertexGeminiAgentGenerateReplyWithToolsAsync() + { + var location = "us-central1"; + var project = Environment.GetEnvironmentVariable("GCP_VERTEX_PROJECT_ID") ?? throw new InvalidOperationException("GCP_VERTEX_PROJECT_ID is not set."); + var model = "gemini-1.5-flash-001"; + var tools = new Tool[] + { + new Tool + { + FunctionDeclarations = { + functions.GetWeatherAsyncFunctionContract.ToFunctionDeclaration(), + }, + }, + new Tool + { + FunctionDeclarations = + { + functions.GetMoviesFunctionContract.ToFunctionDeclaration(), + }, + }, + }; + + var textContent = new Content + { + Role = "user", + Parts = + { + new Part + { + Text = "what's the weather in seattle", + } + } + }; + + var agent = new GeminiChatAgent( + name: "assistant", + model: model, + project: project, + location: location, + systemMessage: "You are a helpful AI assistant", + tools: tools, + toolConfig: new ToolConfig() + { + FunctionCallingConfig = new FunctionCallingConfig() + { + Mode = FunctionCallingConfig.Types.Mode.Auto, + } + }); + + var message = MessageEnvelope.Create(textContent, from: agent.Name); + + var completion = await agent.SendAsync(message); + + completion.Should().BeOfType>(); + completion.From.Should().Be(agent.Name); + + var response = ((MessageEnvelope)completion).Content; + response.Should().NotBeNull(); + response.Candidates.Count.Should().BeGreaterThan(0); + response.Candidates[0].Content.Parts[0].DataCase.Should().Be(DataOneofCase.FunctionCall); + } + + [ApiKeyFact("GCP_VERTEX_PROJECT_ID")] + public async Task VertexGeminiAgentGenerateStreamingReplyWithToolsAsync() + { + var location = "us-central1"; + var project = Environment.GetEnvironmentVariable("GCP_VERTEX_PROJECT_ID") ?? throw new InvalidOperationException("GCP_VERTEX_PROJECT_ID is not set."); + var model = "gemini-1.5-flash-001"; + var tools = new Tool[] + { + new Tool + { + FunctionDeclarations = { functions.GetWeatherAsyncFunctionContract.ToFunctionDeclaration() }, + }, + }; + + var textContent = new Content + { + Role = "user", + Parts = + { + new Part + { + Text = "what's the weather in seattle", + } + } + }; + + var agent = new GeminiChatAgent( + name: "assistant", + model: model, + project: project, + location: location, + systemMessage: "You are a helpful AI assistant", + tools: tools, + toolConfig: new ToolConfig() + { + FunctionCallingConfig = new FunctionCallingConfig() + { + Mode = FunctionCallingConfig.Types.Mode.Auto, + } + }); + + var message = MessageEnvelope.Create(textContent, from: agent.Name); + + var chunks = new List(); + IStreamingMessage finalReply = null!; + + var completion = agent.GenerateStreamingReplyAsync([message]); + + await foreach (var item in completion) + { + item.Should().NotBeNull(); + item.From.Should().Be(agent.Name); + var streamingMessage = (IMessage)item; + streamingMessage.Content.Candidates.Should().NotBeNullOrEmpty(); + if (streamingMessage.Content.Candidates[0].FinishReason != Candidate.Types.FinishReason.Stop) + { + streamingMessage.Content.Candidates[0].Content.Parts[0].DataCase.Should().Be(DataOneofCase.FunctionCall); + } + chunks.Add(item); + finalReply = item; + } + + chunks.Count.Should().BeGreaterThan(0); + finalReply.Should().NotBeNull(); + finalReply.Should().BeOfType>(); + var response = ((MessageEnvelope)finalReply).Content; + response.UsageMetadata.CandidatesTokenCount.Should().BeGreaterThan(0); + } + + [ApiKeyFact("GCP_VERTEX_PROJECT_ID")] + public async Task GeminiAgentUpperCaseTestAsync() + { + var location = "us-central1"; + var project = Environment.GetEnvironmentVariable("GCP_VERTEX_PROJECT_ID") ?? throw new InvalidOperationException("GCP_VERTEX_PROJECT_ID is not set."); + var model = "gemini-1.5-flash-001"; + var agent = new GeminiChatAgent( + name: "assistant", + model: model, + project: project, + location: location) + .RegisterMessageConnector(); + + var singleAgentTest = new SingleAgentTest(_output); + await singleAgentTest.UpperCaseStreamingTestAsync(agent); + await singleAgentTest.UpperCaseTestAsync(agent); + } + + [ApiKeyFact("GCP_VERTEX_PROJECT_ID")] + public async Task GeminiAgentEchoFunctionCallTestAsync() + { + var location = "us-central1"; + var project = Environment.GetEnvironmentVariable("GCP_VERTEX_PROJECT_ID") ?? throw new InvalidOperationException("GCP_VERTEX_PROJECT_ID is not set."); + var model = "gemini-1.5-flash-001"; + var singleAgentTest = new SingleAgentTest(_output); + var echoFunctionContract = singleAgentTest.EchoAsyncFunctionContract; + var agent = new GeminiChatAgent( + name: "assistant", + model: model, + project: project, + location: location, + tools: + [ + new Tool + { + FunctionDeclarations = { echoFunctionContract.ToFunctionDeclaration() }, + }, + ]) + .RegisterMessageConnector(); + + await singleAgentTest.EchoFunctionCallTestAsync(agent); + } + + [ApiKeyFact("GCP_VERTEX_PROJECT_ID")] + public async Task GeminiAgentEchoFunctionCallExecutionTestAsync() + { + var location = "us-central1"; + var project = Environment.GetEnvironmentVariable("GCP_VERTEX_PROJECT_ID") ?? throw new InvalidOperationException("GCP_VERTEX_PROJECT_ID is not set."); + var model = "gemini-1.5-flash-001"; + var singleAgentTest = new SingleAgentTest(_output); + var echoFunctionContract = singleAgentTest.EchoAsyncFunctionContract; + var functionMiddleware = new FunctionCallMiddleware( + functions: [echoFunctionContract], + functionMap: new Dictionary>>() + { + { echoFunctionContract.Name!, singleAgentTest.EchoAsyncWrapper }, + }); + + var agent = new GeminiChatAgent( + name: "assistant", + model: model, + project: project, + location: location) + .RegisterMessageConnector() + .RegisterStreamingMiddleware(functionMiddleware); + + await singleAgentTest.EchoFunctionCallExecutionStreamingTestAsync(agent); + await singleAgentTest.EchoFunctionCallExecutionTestAsync(agent); + } +} diff --git a/dotnet/test/AutoGen.Gemini.Tests/GeminiMessageTests.cs b/dotnet/test/AutoGen.Gemini.Tests/GeminiMessageTests.cs new file mode 100644 index 00000000000..7d72c18f143 --- /dev/null +++ b/dotnet/test/AutoGen.Gemini.Tests/GeminiMessageTests.cs @@ -0,0 +1,380 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// GeminiMessageTests.cs + +using AutoGen.Core; +using AutoGen.Gemini.Middleware; +using AutoGen.Tests; +using FluentAssertions; +using Google.Cloud.AIPlatform.V1; +using Xunit; + +namespace AutoGen.Gemini.Tests; + +public class GeminiMessageTests +{ + [Fact] + public async Task ItProcessUserTextMessageAsync() + { + var messageConnector = new GeminiMessageConnector(); + var agent = new EchoAgent("assistant") + .RegisterMiddleware(async (msgs, _, innerAgent, ct) => + { + msgs.Count().Should().Be(1); + var innerMessage = msgs.First(); + innerMessage.Should().BeOfType>(); + var message = (IMessage)innerMessage; + message.Content.Parts.Count.Should().Be(1); + message.Content.Role.Should().Be("user"); + return await innerAgent.GenerateReplyAsync(msgs); + }) + .RegisterMiddleware(messageConnector); + + // when from is null and role is user + await agent.SendAsync("Hello"); + + // when from is user and role is user + var userMessage = new TextMessage(Role.User, "Hello", from: "user"); + await agent.SendAsync(userMessage); + + // when from is user but role is assistant + userMessage = new TextMessage(Role.Assistant, "Hello", from: "user"); + await agent.SendAsync(userMessage); + } + + [Fact] + public async Task ItProcessAssistantTextMessageAsync() + { + var messageConnector = new GeminiMessageConnector(); + var agent = new EchoAgent("assistant") + .RegisterMiddleware(async (msgs, _, innerAgent, ct) => + { + msgs.Count().Should().Be(1); + var innerMessage = msgs.First(); + innerMessage.Should().BeOfType>(); + var message = (IMessage)innerMessage; + message.Content.Parts.Count.Should().Be(1); + message.Content.Role.Should().Be("model"); + return await innerAgent.GenerateReplyAsync(msgs); + }) + .RegisterMiddleware(messageConnector); + + // when from is user and role is assistant + var message = new TextMessage(Role.User, "Hello", from: agent.Name); + await agent.SendAsync(message); + + // when from is assistant and role is assistant + message = new TextMessage(Role.Assistant, "Hello", from: agent.Name); + await agent.SendAsync(message); + } + + [Fact] + public async Task ItProcessSystemTextMessageAsUserMessageWhenStrictModeIsFalseAsync() + { + var messageConnector = new GeminiMessageConnector(); + var agent = new EchoAgent("assistant") + .RegisterMiddleware(async (msgs, _, innerAgent, ct) => + { + msgs.Count().Should().Be(1); + var innerMessage = msgs.First(); + innerMessage.Should().BeOfType>(); + var message = (IMessage)innerMessage; + message.Content.Parts.Count.Should().Be(1); + message.Content.Role.Should().Be("user"); + return await innerAgent.GenerateReplyAsync(msgs); + }) + .RegisterMiddleware(messageConnector); + + var message = new TextMessage(Role.System, "Hello", from: agent.Name); + await agent.SendAsync(message); + } + + [Fact] + public async Task ItThrowExceptionOnSystemMessageWhenStrictModeIsTrueAsync() + { + var messageConnector = new GeminiMessageConnector(true); + var agent = new EchoAgent("assistant") + .RegisterMiddleware(messageConnector); + + var message = new TextMessage(Role.System, "Hello", from: agent.Name); + var action = new Func(async () => await agent.SendAsync(message)); + await action.Should().ThrowAsync(); + } + + [Fact] + public async Task ItProcessUserImageMessageAsInlineDataAsync() + { + var messageConnector = new GeminiMessageConnector(); + var agent = new EchoAgent("assistant") + .RegisterMiddleware(async (msgs, _, innerAgent, ct) => + { + msgs.Count().Should().Be(1); + var innerMessage = msgs.First(); + innerMessage.Should().BeOfType>(); + var message = (IMessage)innerMessage; + message.Content.Parts.Count.Should().Be(1); + message.Content.Role.Should().Be("user"); + message.Content.Parts.First().DataCase.Should().Be(Part.DataOneofCase.InlineData); + return await innerAgent.GenerateReplyAsync(msgs); + }) + .RegisterMiddleware(messageConnector); + + var imagePath = Path.Combine("testData", "images", "background.png"); + var image = File.ReadAllBytes(imagePath); + var message = new ImageMessage(Role.User, BinaryData.FromBytes(image, "image/png")); + message.MimeType.Should().Be("image/png"); + + await agent.SendAsync(message); + } + + [Fact] + public async Task ItProcessUserImageMessageAsFileDataAsync() + { + var messageConnector = new GeminiMessageConnector(); + var agent = new EchoAgent("assistant") + .RegisterMiddleware(async (msgs, _, innerAgent, ct) => + { + msgs.Count().Should().Be(1); + var innerMessage = msgs.First(); + innerMessage.Should().BeOfType>(); + var message = (IMessage)innerMessage; + message.Content.Parts.Count.Should().Be(1); + message.Content.Role.Should().Be("user"); + message.Content.Parts.First().DataCase.Should().Be(Part.DataOneofCase.FileData); + return await innerAgent.GenerateReplyAsync(msgs); + }) + .RegisterMiddleware(messageConnector); + + var imagePath = Path.Combine("testData", "images", "image.png"); + var url = new Uri(Path.GetFullPath(imagePath)).AbsoluteUri; + var message = new ImageMessage(Role.User, url); + message.MimeType.Should().Be("image/png"); + + await agent.SendAsync(message); + } + + [Fact] + public async Task ItProcessMultiModalMessageAsync() + { + var messageConnector = new GeminiMessageConnector(); + var agent = new EchoAgent("assistant") + .RegisterMiddleware(async (msgs, _, innerAgent, ct) => + { + msgs.Count().Should().Be(1); + var innerMessage = msgs.First(); + innerMessage.Should().BeOfType>(); + var message = (IMessage)innerMessage; + message.Content.Parts.Count.Should().Be(2); + message.Content.Role.Should().Be("user"); + message.Content.Parts.First().DataCase.Should().Be(Part.DataOneofCase.Text); + message.Content.Parts.Last().DataCase.Should().Be(Part.DataOneofCase.FileData); + return await innerAgent.GenerateReplyAsync(msgs); + }) + .RegisterMiddleware(messageConnector); + + var imagePath = Path.Combine("testData", "images", "image.png"); + var url = new Uri(Path.GetFullPath(imagePath)).AbsoluteUri; + var message = new ImageMessage(Role.User, url); + message.MimeType.Should().Be("image/png"); + var textMessage = new TextMessage(Role.User, "What's in this image?"); + var multiModalMessage = new MultiModalMessage(Role.User, [textMessage, message]); + + await agent.SendAsync(multiModalMessage); + } + + [Fact] + public async Task ItProcessToolCallMessageAsync() + { + var messageConnector = new GeminiMessageConnector(); + var agent = new EchoAgent("assistant") + .RegisterMiddleware(async (msgs, _, innerAgent, ct) => + { + msgs.Count().Should().Be(1); + var innerMessage = msgs.First(); + innerMessage.Should().BeOfType>(); + var message = (IMessage)innerMessage; + message.Content.Role.Should().Be("model"); + message.Content.Parts.First().DataCase.Should().Be(Part.DataOneofCase.FunctionCall); + return await innerAgent.GenerateReplyAsync(msgs); + }) + .RegisterMiddleware(messageConnector); + + var toolCallMessage = new ToolCallMessage("test", "{}", "user"); + await agent.SendAsync(toolCallMessage); + } + + [Fact] + public async Task ItProcessStreamingTextMessageAsync() + { + var messageConnector = new GeminiMessageConnector(); + var agent = new EchoAgent("assistant") + .RegisterStreamingMiddleware(messageConnector); + + var messageChunks = Enumerable.Range(0, 10) + .Select(i => new GenerateContentResponse() + { + Candidates = + { + new Candidate() + { + Content = new Content() + { + Role = "user", + Parts = { new Part { Text = i.ToString() } }, + } + } + } + }) + .Select(m => MessageEnvelope.Create(m)); + + IStreamingMessage? finalReply = null; + await foreach (var reply in agent.GenerateStreamingReplyAsync(messageChunks)) + { + reply.Should().BeAssignableTo(); + finalReply = reply; + } + + finalReply.Should().BeOfType(); + var textMessage = (TextMessage)finalReply!; + textMessage.GetContent().Should().Be("0123456789"); + } + + [Fact] + public async Task ItProcessToolCallResultMessageAsync() + { + var messageConnector = new GeminiMessageConnector(); + var agent = new EchoAgent("assistant") + .RegisterMiddleware(async (msgs, _, innerAgent, ct) => + { + msgs.Count().Should().Be(1); + var innerMessage = msgs.First(); + innerMessage.Should().BeOfType>(); + var message = (IMessage)innerMessage; + message.Content.Role.Should().Be("function"); + message.Content.Parts.First().DataCase.Should().Be(Part.DataOneofCase.FunctionResponse); + message.Content.Parts.First().FunctionResponse.Response.ToString().Should().Be("{ \"result\": \"result\" }"); + return await innerAgent.GenerateReplyAsync(msgs); + }) + .RegisterMiddleware(messageConnector); + + var message = new ToolCallResultMessage("result", "test", "{}", "user"); + await agent.SendAsync(message); + + // when the result is already a json object string + message = new ToolCallResultMessage("{ \"result\": \"result\" }", "test", "{}", "user"); + await agent.SendAsync(message); + } + + [Fact] + public async Task ItProcessToolCallAggregateMessageAsTextContentAsync() + { + var messageConnector = new GeminiMessageConnector(); + var agent = new EchoAgent("assistant") + .RegisterMiddleware(async (msgs, _, innerAgent, ct) => + { + msgs.Count().Should().Be(1); + var innerMessage = msgs.First(); + innerMessage.Should().BeOfType>(); + var message = (IMessage)innerMessage; + message.Content.Role.Should().Be("user"); + message.Content.Parts.First().DataCase.Should().Be(Part.DataOneofCase.Text); + return await innerAgent.GenerateReplyAsync(msgs); + }) + .RegisterMiddleware(messageConnector); + var toolCallMessage = new ToolCallMessage("test", "{}", "user"); + var toolCallResultMessage = new ToolCallResultMessage("result", "test", "{}", "user"); + var message = new ToolCallAggregateMessage(toolCallMessage, toolCallResultMessage, from: "user"); + await agent.SendAsync(message); + } + + [Fact] + public async Task ItProcessToolCallAggregateMessageAsFunctionContentAsync() + { + var messageConnector = new GeminiMessageConnector(); + var agent = new EchoAgent("assistant") + .RegisterMiddleware(async (msgs, _, innerAgent, ct) => + { + msgs.Count().Should().Be(2); + var functionCallMessage = msgs.First(); + functionCallMessage.Should().BeOfType>(); + var message = (IMessage)functionCallMessage; + message.Content.Role.Should().Be("model"); + message.Content.Parts.First().DataCase.Should().Be(Part.DataOneofCase.FunctionCall); + + var functionResultMessage = msgs.Last(); + functionResultMessage.Should().BeOfType>(); + message = (IMessage)functionResultMessage; + message.Content.Role.Should().Be("function"); + message.Content.Parts.First().DataCase.Should().Be(Part.DataOneofCase.FunctionResponse); + + return await innerAgent.GenerateReplyAsync(msgs); + }) + .RegisterMiddleware(messageConnector); + var toolCallMessage = new ToolCallMessage("test", "{}", agent.Name); + var toolCallResultMessage = new ToolCallResultMessage("result", "test", "{}", agent.Name); + var message = new ToolCallAggregateMessage(toolCallMessage, toolCallResultMessage, from: agent.Name); + await agent.SendAsync(message); + } + + [Fact] + public async Task ItThrowExceptionWhenProcessingUnknownMessageTypeInStrictModeAsync() + { + var messageConnector = new GeminiMessageConnector(true); + var agent = new EchoAgent("assistant") + .RegisterMiddleware(messageConnector); + + var unknownMessage = new + { + text = "Hello", + }; + + var message = MessageEnvelope.Create(unknownMessage, from: agent.Name); + var action = new Func(async () => await agent.SendAsync(message)); + + await action.Should().ThrowAsync(); + } + + [Fact] + public async Task ItReturnUnknownMessageTypeInNonStrictModeAsync() + { + var messageConnector = new GeminiMessageConnector(); + var agent = new EchoAgent("assistant") + .RegisterMiddleware(async (msgs, _, innerAgent, ct) => + { + var message = msgs.First(); + message.Should().BeAssignableTo(); + return message; + }) + .RegisterMiddleware(messageConnector); + + var unknownMessage = new + { + text = "Hello", + }; + + var message = MessageEnvelope.Create(unknownMessage, from: agent.Name); + await agent.SendAsync(message); + } + + [Fact] + public async Task ItShortcircuitContentTypeAsync() + { + var messageConnector = new GeminiMessageConnector(); + var agent = new EchoAgent("assistant") + .RegisterMiddleware(async (msgs, _, innerAgent, ct) => + { + var message = msgs.First(); + message.Should().BeOfType>(); + + return message; + }) + .RegisterMiddleware(messageConnector); + + var message = new Content() + { + Parts = { new Part { Text = "Hello" } }, + Role = "user", + }; + + await agent.SendAsync(MessageEnvelope.Create(message, from: agent.Name)); + } +} diff --git a/dotnet/test/AutoGen.Gemini.Tests/GoogleGeminiClientTests.cs b/dotnet/test/AutoGen.Gemini.Tests/GoogleGeminiClientTests.cs new file mode 100644 index 00000000000..3bda12eda1a --- /dev/null +++ b/dotnet/test/AutoGen.Gemini.Tests/GoogleGeminiClientTests.cs @@ -0,0 +1,132 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// GoogleGeminiClientTests.cs + +using AutoGen.Tests; +using FluentAssertions; +using Google.Cloud.AIPlatform.V1; +using Google.Protobuf; +using static Google.Cloud.AIPlatform.V1.Candidate.Types; + +namespace AutoGen.Gemini.Tests; + +public class GoogleGeminiClientTests +{ + [ApiKeyFact("GOOGLE_GEMINI_API_KEY")] + public async Task ItGenerateContentAsync() + { + var apiKey = Environment.GetEnvironmentVariable("GOOGLE_GEMINI_API_KEY") ?? throw new InvalidOperationException("GOOGLE_GEMINI_API_KEY is not set"); + var client = new GoogleGeminiClient(apiKey); + var model = "gemini-1.5-flash-001"; + + var text = "Write a long, tedious story"; + var request = new GenerateContentRequest + { + Model = model, + Contents = + { + new Content + { + Role = "user", + Parts = + { + new Part + { + Text = text, + } + } + } + } + }; + var completion = await client.GenerateContentAsync(request); + + completion.Should().NotBeNull(); + completion.Candidates.Count.Should().BeGreaterThan(0); + completion.Candidates[0].Content.Parts[0].Text.Should().NotBeNullOrEmpty(); + } + + [ApiKeyFact("GOOGLE_GEMINI_API_KEY")] + public async Task ItGenerateContentWithImageAsync() + { + var apiKey = Environment.GetEnvironmentVariable("GOOGLE_GEMINI_API_KEY") ?? throw new InvalidOperationException("GOOGLE_GEMINI_API_KEY is not set"); + var client = new GoogleGeminiClient(apiKey); + var model = "gemini-1.5-flash-001"; + + var text = "what's in the image"; + var imagePath = Path.Combine("testData", "images", "background.png"); + var image = File.ReadAllBytes(imagePath); + var request = new GenerateContentRequest + { + Model = model, + Contents = + { + new Content + { + Role = "user", + Parts = + { + new Part + { + Text = text, + }, + new Part + { + InlineData = new () + { + MimeType = "image/png", + Data = ByteString.CopyFrom(image), + }, + } + } + } + } + }; + + var completion = await client.GenerateContentAsync(request); + completion.Should().NotBeNull(); + completion.Candidates.Count.Should().BeGreaterThan(0); + completion.Candidates[0].Content.Parts[0].Text.Should().NotBeNullOrEmpty(); + } + + [ApiKeyFact("GOOGLE_GEMINI_API_KEY")] + public async Task ItStreamingGenerateContentTestAsync() + { + var apiKey = Environment.GetEnvironmentVariable("GOOGLE_GEMINI_API_KEY") ?? throw new InvalidOperationException("GOOGLE_GEMINI_API_KEY is not set"); + var client = new GoogleGeminiClient(apiKey); + var model = "gemini-1.5-flash-001"; + + var text = "Tell me a long tedious joke"; + var request = new GenerateContentRequest + { + Model = model, + Contents = + { + new Content + { + Role = "user", + Parts = + { + new Part + { + Text = text, + } + } + } + } + }; + + var response = client.GenerateContentStreamAsync(request); + var chunks = new List(); + GenerateContentResponse? final = null; + await foreach (var item in response) + { + item.Candidates.Count.Should().BeGreaterThan(0); + final = item; + chunks.Add(final); + } + + chunks.Should().NotBeEmpty(); + final.Should().NotBeNull(); + final!.UsageMetadata.CandidatesTokenCount.Should().BeGreaterThan(0); + final!.Candidates[0].FinishReason.Should().Be(FinishReason.Stop); + } +} diff --git a/dotnet/test/AutoGen.Gemini.Tests/SampleTests.cs b/dotnet/test/AutoGen.Gemini.Tests/SampleTests.cs new file mode 100644 index 00000000000..1f9b557af24 --- /dev/null +++ b/dotnet/test/AutoGen.Gemini.Tests/SampleTests.cs @@ -0,0 +1,28 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// SampleTests.cs + +using AutoGen.Gemini.Sample; +using AutoGen.Tests; + +namespace AutoGen.Gemini.Tests; + +public class SampleTests +{ + [ApiKeyFact("GCP_VERTEX_PROJECT_ID")] + public async Task TestChatWithVertexGeminiAsync() + { + await Chat_With_Vertex_Gemini.RunAsync(); + } + + [ApiKeyFact("GCP_VERTEX_PROJECT_ID")] + public async Task TestFunctionCallWithGeminiAsync() + { + await Function_Call_With_Gemini.RunAsync(); + } + + [ApiKeyFact("GCP_VERTEX_PROJECT_ID")] + public async Task TestImageChatWithVertexGeminiAsync() + { + await Image_Chat_With_Vertex_Gemini.RunAsync(); + } +} diff --git a/dotnet/test/AutoGen.Gemini.Tests/VertexGeminiClientTests.cs b/dotnet/test/AutoGen.Gemini.Tests/VertexGeminiClientTests.cs new file mode 100644 index 00000000000..2f06305ed59 --- /dev/null +++ b/dotnet/test/AutoGen.Gemini.Tests/VertexGeminiClientTests.cs @@ -0,0 +1,134 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// GeminiVertexClientTests.cs + +using AutoGen.Tests; +using FluentAssertions; +using Google.Cloud.AIPlatform.V1; +using Google.Protobuf; +using static Google.Cloud.AIPlatform.V1.Candidate.Types; +namespace AutoGen.Gemini.Tests; + +public class VertexGeminiClientTests +{ + [ApiKeyFact("GCP_VERTEX_PROJECT_ID")] + public async Task ItGenerateContentAsync() + { + var location = "us-central1"; + var project = Environment.GetEnvironmentVariable("GCP_VERTEX_PROJECT_ID"); + var client = new VertexGeminiClient(location); + var model = "gemini-1.5-flash-001"; + + var text = "Hello"; + var request = new GenerateContentRequest + { + Model = $"projects/{project}/locations/{location}/publishers/google/models/{model}", + Contents = + { + new Content + { + Role = "user", + Parts = + { + new Part + { + Text = text, + } + } + } + } + }; + var completion = await client.GenerateContentAsync(request); + + completion.Should().NotBeNull(); + completion.Candidates.Count.Should().BeGreaterThan(0); + completion.Candidates[0].Content.Parts[0].Text.Should().NotBeNullOrEmpty(); + } + + [ApiKeyFact("GCP_VERTEX_PROJECT_ID")] + public async Task ItGenerateContentWithImageAsync() + { + var location = "us-central1"; + var project = Environment.GetEnvironmentVariable("GCP_VERTEX_PROJECT_ID"); + var client = new VertexGeminiClient(location); + var model = "gemini-1.5-flash-001"; + + var text = "what's in the image"; + var imagePath = Path.Combine("testData", "images", "image.png"); + var image = File.ReadAllBytes(imagePath); + var request = new GenerateContentRequest + { + Model = $"projects/{project}/locations/{location}/publishers/google/models/{model}", + Contents = + { + new Content + { + Role = "user", + Parts = + { + new Part + { + Text = text, + }, + new Part + { + InlineData = new () + { + MimeType = "image/png", + Data = ByteString.CopyFrom(image), + }, + } + } + } + } + }; + + var completion = await client.GenerateContentAsync(request); + completion.Should().NotBeNull(); + completion.Candidates.Count.Should().BeGreaterThan(0); + completion.Candidates[0].Content.Parts[0].Text.Should().NotBeNullOrEmpty(); + } + + [ApiKeyFact("GCP_VERTEX_PROJECT_ID")] + public async Task ItStreamingGenerateContentTestAsync() + { + var location = "us-central1"; + var project = Environment.GetEnvironmentVariable("GCP_VERTEX_PROJECT_ID"); + var client = new VertexGeminiClient(location); + var model = "gemini-1.5-flash-001"; + + var text = "Hello, write a long tedious joke"; + var request = new GenerateContentRequest + { + Model = $"projects/{project}/locations/{location}/publishers/google/models/{model}", + Contents = + { + new Content + { + Role = "user", + Parts = + { + new Part + { + Text = text, + } + } + } + } + }; + + var response = client.GenerateContentStreamAsync(request); + var chunks = new List(); + GenerateContentResponse? final = null; + await foreach (var item in response) + { + item.Candidates.Count.Should().BeGreaterThan(0); + final = item; + chunks.Add(final); + } + + chunks.Should().NotBeEmpty(); + final.Should().NotBeNull(); + final!.UsageMetadata.CandidatesTokenCount.Should().BeGreaterThan(0); + final!.Candidates[0].FinishReason.Should().Be(FinishReason.Stop); + } +} diff --git a/dotnet/test/AutoGen.Mistral.Tests/AutoGen.Mistral.Tests.csproj b/dotnet/test/AutoGen.Mistral.Tests/AutoGen.Mistral.Tests.csproj index eff70486928..d734119dbb0 100644 --- a/dotnet/test/AutoGen.Mistral.Tests/AutoGen.Mistral.Tests.csproj +++ b/dotnet/test/AutoGen.Mistral.Tests/AutoGen.Mistral.Tests.csproj @@ -4,18 +4,10 @@ $(TestTargetFramework) enable false + True True - - - - - - - - - diff --git a/dotnet/test/AutoGen.Ollama.Tests/AutoGen.Ollama.Tests.csproj b/dotnet/test/AutoGen.Ollama.Tests/AutoGen.Ollama.Tests.csproj index 27f80716f1c..1e26b38d8a4 100644 --- a/dotnet/test/AutoGen.Ollama.Tests/AutoGen.Ollama.Tests.csproj +++ b/dotnet/test/AutoGen.Ollama.Tests/AutoGen.Ollama.Tests.csproj @@ -4,18 +4,10 @@ $(TestTargetFramework) enable false + True True - - - - - - - - - diff --git a/dotnet/test/AutoGen.OpenAI.Tests/AutoGen.OpenAI.Tests.csproj b/dotnet/test/AutoGen.OpenAI.Tests/AutoGen.OpenAI.Tests.csproj index 044975354b8..ba499232beb 100644 --- a/dotnet/test/AutoGen.OpenAI.Tests/AutoGen.OpenAI.Tests.csproj +++ b/dotnet/test/AutoGen.OpenAI.Tests/AutoGen.OpenAI.Tests.csproj @@ -3,18 +3,10 @@ $(TestTargetFramework) false + True True - - - - - - - - - diff --git a/dotnet/test/AutoGen.SemanticKernel.Tests/AutoGen.SemanticKernel.Tests.csproj b/dotnet/test/AutoGen.SemanticKernel.Tests/AutoGen.SemanticKernel.Tests.csproj index b6d03ddc4af..8be4b55b172 100644 --- a/dotnet/test/AutoGen.SemanticKernel.Tests/AutoGen.SemanticKernel.Tests.csproj +++ b/dotnet/test/AutoGen.SemanticKernel.Tests/AutoGen.SemanticKernel.Tests.csproj @@ -5,18 +5,10 @@ enable false $(NoWarn);SKEXP0110 + True True - - - - - - - - - diff --git a/dotnet/test/AutoGen.SourceGenerator.Tests/AutoGen.SourceGenerator.Tests.csproj b/dotnet/test/AutoGen.SourceGenerator.Tests/AutoGen.SourceGenerator.Tests.csproj index 0d0d91e0522..2e0ead045be 100644 --- a/dotnet/test/AutoGen.SourceGenerator.Tests/AutoGen.SourceGenerator.Tests.csproj +++ b/dotnet/test/AutoGen.SourceGenerator.Tests/AutoGen.SourceGenerator.Tests.csproj @@ -4,18 +4,10 @@ $(TestTargetFramework) enable false + True True - - - - - - - - - diff --git a/dotnet/test/AutoGen.Tests/AutoGen.Tests.csproj b/dotnet/test/AutoGen.Tests/AutoGen.Tests.csproj index 740772c0407..4def281ed7b 100644 --- a/dotnet/test/AutoGen.Tests/AutoGen.Tests.csproj +++ b/dotnet/test/AutoGen.Tests/AutoGen.Tests.csproj @@ -3,18 +3,10 @@ $(TestTargetFramework) True + True $(NoWarn);xUnit1013;SKEXP0110 - - - - - - - - - diff --git a/dotnet/test/AutoGen.Tests/ImageMessageTests.cs b/dotnet/test/AutoGen.Tests/ImageMessageTests.cs new file mode 100644 index 00000000000..210cb1017ed --- /dev/null +++ b/dotnet/test/AutoGen.Tests/ImageMessageTests.cs @@ -0,0 +1,38 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// ImageMessageTests.cs + +using System; +using System.IO; +using System.Threading.Tasks; +using FluentAssertions; +using Xunit; + +namespace AutoGen.Tests; + +public class ImageMessageTests +{ + [Fact] + public async Task ItCreateFromLocalImage() + { + var image = Path.Combine("testData", "images", "background.png"); + var binary = File.ReadAllBytes(image); + var base64 = Convert.ToBase64String(binary); + var imageMessage = new ImageMessage(Role.User, BinaryData.FromBytes(binary, "image/png")); + + imageMessage.MimeType.Should().Be("image/png"); + imageMessage.BuildDataUri().Should().Be($"data:image/png;base64,{base64}"); + } + + [Fact] + public async Task ItCreateFromUrl() + { + var image = Path.Combine("testData", "images", "background.png"); + var fullPath = Path.GetFullPath(image); + var localUrl = new Uri(fullPath).AbsoluteUri; + var imageMessage = new ImageMessage(Role.User, localUrl); + + imageMessage.Url.Should().Be(localUrl); + imageMessage.MimeType.Should().Be("image/png"); + imageMessage.Data.Should().BeNull(); + } +} diff --git a/dotnet/test/AutoGen.Tests/SingleAgentTest.cs b/dotnet/test/AutoGen.Tests/SingleAgentTest.cs index b784ff8da03..418b55e70c7 100644 --- a/dotnet/test/AutoGen.Tests/SingleAgentTest.cs +++ b/dotnet/test/AutoGen.Tests/SingleAgentTest.cs @@ -266,10 +266,10 @@ public async Task GetHighestLabel(string labelName, string color) public async Task EchoFunctionCallTestAsync(IAgent agent) { - var message = new TextMessage(Role.System, "You are a helpful AI assistant that call echo function"); + //var message = new TextMessage(Role.System, "You are a helpful AI assistant that call echo function"); var helloWorld = new TextMessage(Role.User, "echo Hello world"); - var reply = await agent.SendAsync(chatHistory: new[] { message, helloWorld }); + var reply = await agent.SendAsync(chatHistory: new[] { helloWorld }); reply.From.Should().Be(agent.Name); reply.GetToolCalls()!.First().FunctionName.Should().Be(nameof(EchoAsync)); @@ -277,10 +277,10 @@ public async Task EchoFunctionCallTestAsync(IAgent agent) public async Task EchoFunctionCallExecutionTestAsync(IAgent agent) { - var message = new TextMessage(Role.System, "You are a helpful AI assistant that echo whatever user says"); + //var message = new TextMessage(Role.System, "You are a helpful AI assistant that echo whatever user says"); var helloWorld = new TextMessage(Role.User, "echo Hello world"); - var reply = await agent.SendAsync(chatHistory: new[] { message, helloWorld }); + var reply = await agent.SendAsync(chatHistory: new[] { helloWorld }); reply.GetContent().Should().Be("[ECHO] Hello world"); reply.From.Should().Be(agent.Name); @@ -289,13 +289,13 @@ public async Task EchoFunctionCallExecutionTestAsync(IAgent agent) public async Task EchoFunctionCallExecutionStreamingTestAsync(IStreamingAgent agent) { - var message = new TextMessage(Role.System, "You are a helpful AI assistant that echo whatever user says"); + //var message = new TextMessage(Role.System, "You are a helpful AI assistant that echo whatever user says"); var helloWorld = new TextMessage(Role.User, "echo Hello world"); var option = new GenerateReplyOptions { Temperature = 0, }; - var replyStream = agent.GenerateStreamingReplyAsync(messages: new[] { message, helloWorld }, option); + var replyStream = agent.GenerateStreamingReplyAsync(messages: new[] { helloWorld }, option); var answer = "[ECHO] Hello world"; IStreamingMessage? finalReply = default; await foreach (var reply in replyStream) @@ -319,25 +319,23 @@ public async Task EchoFunctionCallExecutionStreamingTestAsync(IStreamingAgent ag public async Task UpperCaseTestAsync(IAgent agent) { - var message = new TextMessage(Role.System, "You are a helpful AI assistant that convert user message to upper case"); - var uppCaseMessage = new TextMessage(Role.User, "abcdefg"); + var message = new TextMessage(Role.User, "Please convert abcde to upper case."); - var reply = await agent.SendAsync(chatHistory: new[] { message, uppCaseMessage }); + var reply = await agent.SendAsync(chatHistory: new[] { message }); - reply.GetContent().Should().Contain("ABCDEFG"); + reply.GetContent().Should().Contain("ABCDE"); reply.From.Should().Be(agent.Name); } public async Task UpperCaseStreamingTestAsync(IStreamingAgent agent) { - var message = new TextMessage(Role.System, "You are a helpful AI assistant that convert user message to upper case"); - var helloWorld = new TextMessage(Role.User, "a b c d e f g h i j k l m n"); + var message = new TextMessage(Role.User, "Please convert 'hello world' to upper case"); var option = new GenerateReplyOptions { Temperature = 0, }; - var replyStream = agent.GenerateStreamingReplyAsync(messages: new[] { message, helloWorld }, option); - var answer = "A B C D E F G H I J K L M N"; + var replyStream = agent.GenerateStreamingReplyAsync(messages: new[] { message }, option); + var answer = "HELLO WORLD"; TextMessage? finalReply = default; await foreach (var reply in replyStream) { diff --git a/notebook/agentchat_MathChat.ipynb b/notebook/agentchat_MathChat.ipynb index 8a234ede013..afa00fb7562 100644 --- a/notebook/agentchat_MathChat.ipynb +++ b/notebook/agentchat_MathChat.ipynb @@ -84,14 +84,14 @@ " 'api_key': '',\n", " 'base_url': '',\n", " 'api_type': 'azure',\n", - " 'api_version': '2024-02-15-preview',\n", + " 'api_version': '2024-02-01',\n", " },\n", " {\n", " 'model': 'gpt-3.5-turbo',\n", " 'api_key': '',\n", " 'base_url': '',\n", " 'api_type': 'azure',\n", - " 'api_version': '2024-02-15-preview',\n", + " 'api_version': '2024-02-01',\n", " },\n", "]\n", "```\n", diff --git a/notebook/agentchat_cost_token_tracking.ipynb b/notebook/agentchat_cost_token_tracking.ipynb index 7feb7a908f4..fecc98f3276 100644 --- a/notebook/agentchat_cost_token_tracking.ipynb +++ b/notebook/agentchat_cost_token_tracking.ipynb @@ -88,7 +88,7 @@ " \"model\": \"gpt-35-turbo-0613\", # 0613 or newer is needed to use functions\n", " \"base_url\": \"\", \n", " \"api_type\": \"azure\", \n", - " \"api_version\": \"2024-02-15-preview\", # 2023-07-01-preview or newer is needed to use functions\n", + " \"api_version\": \"2024-02-01\", # 2023-07-01-preview or newer is needed to use functions\n", " \"api_key\": \"\",\n", " \"tags\": [\"gpt-3.5-turbo\", \"0613\"],\n", " }\n", diff --git a/notebook/agentchat_custom_model.ipynb b/notebook/agentchat_custom_model.ipynb index b06d2c3cf4e..5097713a092 100644 --- a/notebook/agentchat_custom_model.ipynb +++ b/notebook/agentchat_custom_model.ipynb @@ -226,14 +226,14 @@ " \"api_key\": \"\",\n", " \"base_url\": \"\",\n", " \"api_type\": \"azure\",\n", - " \"api_version\": \"2024-02-15-preview\"\n", + " \"api_version\": \"2024-02-01\"\n", " },\n", " {\n", " \"model\": \"gpt-4-32k\",\n", " \"api_key\": \"\",\n", " \"base_url\": \"\",\n", " \"api_type\": \"azure\",\n", - " \"api_version\": \"2024-02-15-preview\"\n", + " \"api_version\": \"2024-02-01\"\n", " }\n", "]\n", "```\n", diff --git a/notebook/agentchat_dalle_and_gpt4v.ipynb b/notebook/agentchat_dalle_and_gpt4v.ipynb index 258b49d6976..e07578016a9 100644 --- a/notebook/agentchat_dalle_and_gpt4v.ipynb +++ b/notebook/agentchat_dalle_and_gpt4v.ipynb @@ -93,7 +93,7 @@ " {\n", " 'model': 'dalle',\n", " 'api_key': 'Your API Key here',\n", - " 'api_version': '2024-02-15-preview'\n", + " 'api_version': '2024-02-01'\n", " }\n", "]\n", " ```" diff --git a/notebook/agentchat_function_call.ipynb b/notebook/agentchat_function_call.ipynb index c91699d0d44..2a173c8e269 100644 --- a/notebook/agentchat_function_call.ipynb +++ b/notebook/agentchat_function_call.ipynb @@ -90,7 +90,7 @@ " 'api_key': '',\n", " 'base_url': '',\n", " 'api_type': 'azure',\n", - " 'api_version': '2024-02-15-preview',\n", + " 'api_version': '2024-02-01\n", " 'tags': ['tool', 'gpt-3.5-turbo'],\n", " },\n", " {\n", @@ -98,7 +98,7 @@ " 'api_key': '',\n", " 'base_url': '',\n", " 'api_type': 'azure',\n", - " 'api_version': '2024-02-15-preview',\n", + " 'api_version': '2024-02-01\n", " 'tags': ['tool', 'gpt-3.5-turbo-16k'],\n", " },\n", "]\n", diff --git a/notebook/agentchat_function_call_currency_calculator.ipynb b/notebook/agentchat_function_call_currency_calculator.ipynb index a7a5a92bbd9..d6ce5a88762 100644 --- a/notebook/agentchat_function_call_currency_calculator.ipynb +++ b/notebook/agentchat_function_call_currency_calculator.ipynb @@ -90,7 +90,7 @@ " 'api_key': '',\n", " 'base_url': '',\n", " 'api_type': 'azure',\n", - " 'api_version': '2024-02-15-preview',\n", + " 'api_version': '2024-02-01',\n", " 'tags': ['tool', '3.5-tool'],\n", " },\n", " {\n", @@ -98,7 +98,7 @@ " 'api_key': '',\n", " 'base_url': '',\n", " 'api_type': 'azure',\n", - " 'api_version': '2024-02-15-preview',\n", + " 'api_version': '2024-02-01',\n", " 'tags': ['tool', '3.5-tool'],\n", " },\n", "]\n", diff --git a/notebook/agentchat_groupchat_stateflow.ipynb b/notebook/agentchat_groupchat_stateflow.ipynb index b8810d2fb63..53eb0f2ff98 100644 --- a/notebook/agentchat_groupchat_stateflow.ipynb +++ b/notebook/agentchat_groupchat_stateflow.ipynb @@ -74,7 +74,7 @@ "- Scientist: Read the papers and write a summary.\n", "\n", "\n", - "In the Figure, we define a simple workflow for research with 4 states: Init, Retrieve, Reserach and End. Within each state, we will call different agents to perform the tasks.\n", + "In the Figure, we define a simple workflow for research with 4 states: Init, Retrieve, Research and End. Within each state, we will call different agents to perform the tasks.\n", "- Init: We use the initializer to start the workflow.\n", "- Retrieve: We will first call the coder to write code and then call the executor to execute the code.\n", "- Research: We will call the scientist to read the papers and write a summary.\n", diff --git a/notebook/agentchat_human_feedback.ipynb b/notebook/agentchat_human_feedback.ipynb index 75078e67cf9..000d788d6a5 100644 --- a/notebook/agentchat_human_feedback.ipynb +++ b/notebook/agentchat_human_feedback.ipynb @@ -90,14 +90,14 @@ " 'api_key': '',\n", " 'base_url': '',\n", " 'api_type': 'azure',\n", - " 'api_version': '2024-02-15-preview',\n", + " 'api_version': '2024-02-01',\n", " },\n", " {\n", " 'model': 'gpt-3.5-turbo-16k',\n", " 'api_key': '',\n", " 'base_url': '',\n", " 'api_type': 'azure',\n", - " 'api_version': '2024-02-15-preview',\n", + " 'api_version': '2024-02-01',\n", " },\n", "]\n", "```\n", diff --git a/notebook/agentchat_microsoft_fabric.ipynb b/notebook/agentchat_microsoft_fabric.ipynb index 55793e0abb1..e4c2a7119cf 100644 --- a/notebook/agentchat_microsoft_fabric.ipynb +++ b/notebook/agentchat_microsoft_fabric.ipynb @@ -2,23 +2,32 @@ "cells": [ { "cell_type": "markdown", - "id": "be5a8d87", - "metadata": {}, + "id": "0", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, "source": [ - "# Use AutoGen in Microsoft Fabric\n", + "## Use AutoGen in Microsoft Fabric\n", "\n", - "AutoGen offers conversable LLM agents, which can be used to solve various tasks with human or automatic feedback, including tasks that require using tools via code.\n", + "[AutoGen](https://github.com/microsoft/autogen) offers conversable LLM agents, which can be used to solve various tasks with human or automatic feedback, including tasks that require using tools via code.\n", "Please find documentation about this feature [here](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat).\n", "\n", - "[Microsoft Fabric](https://learn.microsoft.com/en-us/fabric/get-started/microsoft-fabric-overview) is an all-in-one analytics solution for enterprises that covers everything from data movement to data science, Real-Time Analytics, and business intelligence. It offers a comprehensive suite of services, including data lake, data engineering, and data integration, all in one place. Its pre-built AI models include GPT-x models such as `gpt-4-turbo`, `gpt-4`, `gpt-4-8k`, `gpt-4-32k`, `gpt-35-turbo`, `gpt-35-turbo-16k` and `gpt-35-turbo-instruct`, etc. It's important to note that the Azure Open AI service is not supported on trial SKUs and only paid SKUs (F64 or higher, or P1 or higher) are supported. Azure Open AI is being enabled in stages, with access for all users expected by March 2024.\n", + "[Microsoft Fabric](https://learn.microsoft.com/en-us/fabric/get-started/microsoft-fabric-overview) is an all-in-one analytics solution for enterprises that covers everything from data movement to data science, Real-Time Analytics, and business intelligence. It offers a comprehensive suite of services, including data lake, data engineering, and data integration, all in one place. Its pre-built AI models include GPT-x models such as `gpt-4o`, `gpt-4-turbo`, `gpt-4`, `gpt-4-8k`, `gpt-4-32k`, `gpt-35-turbo`, `gpt-35-turbo-16k` and `gpt-35-turbo-instruct`, etc. It's important to note that the Azure Open AI service is not supported on trial SKUs and only paid SKUs (F64 or higher, or P1 or higher) are supported.\n", "\n", - "In this notebook, we demonstrate how to use `AssistantAgent` and `UserProxyAgent` to write code and execute the code. Here `AssistantAgent` is an LLM-based agent that can write Python code (in a Python coding block) for a user to execute for a given task. `UserProxyAgent` is an agent which serves as a proxy for the human user to execute the code written by `AssistantAgent`, or automatically execute the code. Depending on the setting of `human_input_mode` and `max_consecutive_auto_reply`, the `UserProxyAgent` either solicits feedback from the human user or returns auto-feedback based on the result of code execution (success or failure and corresponding outputs) to `AssistantAgent`. `AssistantAgent` will debug the code and suggest new code if the result contains error. The two agents keep communicating to each other until the task is done.\n", + "In this notebook, we demonstrate several examples:\n", + "- 1. How to use `AssistantAgent` and `UserProxyAgent` to write code and execute the code.\n", + "- 2. How to use `RetrieveAssistantAgent` and `RetrieveUserProxyAgent` to do Retrieval Augmented Generation (RAG) for QA and Code Generation.\n", + "- 3. How to use `MultimodalConversableAgent` to chat with images.\n", "\n", - "## Requirements\n", + "### Requirements\n", "\n", "AutoGen requires `Python>=3.8`. To run this notebook example, please install:\n", "```bash\n", - "pip install \"pyautogen\"\n", + "pip install \"pyautogen[retrievechat,lmm]>=0.2.28\"\n", "```\n", "\n", "Also, this notebook depends on Microsoft Fabric pre-built LLM endpoints. Running it elsewhere may encounter errors." @@ -26,7 +35,7 @@ }, { "cell_type": "markdown", - "id": "34ce050c-134a-4787-9655-73d9bd7afb6b", + "id": "1", "metadata": { "nteract": { "transient": { @@ -35,112 +44,37 @@ } }, "source": [ - "## AutoGen version < 0.2.0\n", - "\n", - "For AutoGen version < 0.2.0, the Azure OpenAI endpoint is pre-configured." + "### Install AutoGen" ] }, { "cell_type": "code", "execution_count": null, - "id": "6a6b4a95-5766-442d-9de5-b7fc1fb3d140", + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install \"pyautogen[retrievechat,lmm]>=0.2.28\" -q" + ] + }, + { + "cell_type": "markdown", + "id": "3", "metadata": { - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, "nteract": { "transient": { "deleting": false } } }, - "outputs": [ - { - "data": { - "application/vnd.livy.statement-meta+json": { - "execution_finish_time": "2023-12-11T05:07:36.8889779Z", - "execution_start_time": "2023-12-11T05:07:36.8886587Z", - "livy_statement_state": "available", - "parent_msg_id": "4aa7c4ee-8126-4206-8a8b-b38491ff16dc", - "queued_time": "2023-12-11T05:07:11.6799575Z", - "session_id": null, - "session_start_time": null, - "spark_pool": null, - "state": "finished", - "statement_id": -1 - }, - "text/plain": [ - "StatementMeta(, , -1, Finished, Available)" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": {}, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting pyautogen<0.2.0\n", - " Downloading pyautogen-0.1.14-py3-none-any.whl (88 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m88.8/88.8 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: diskcache in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from pyautogen<0.2.0) (5.6.3)\n", - "Requirement already satisfied: flaml in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from pyautogen<0.2.0) (2.1.1.dev2)\n", - "Requirement already satisfied: openai<1 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from pyautogen<0.2.0) (0.27.8)\n", - "Collecting python-dotenv (from pyautogen<0.2.0)\n", - " Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)\n", - "Requirement already satisfied: termcolor in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from pyautogen<0.2.0) (2.3.0)\n", - "Requirement already satisfied: requests>=2.20 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from openai<1->pyautogen<0.2.0) (2.31.0)\n", - "Requirement already satisfied: tqdm in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from openai<1->pyautogen<0.2.0) (4.66.1)\n", - "Requirement already satisfied: aiohttp in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from openai<1->pyautogen<0.2.0) (3.8.6)\n", - "Requirement already satisfied: NumPy>=1.17.0rc1 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from flaml->pyautogen<0.2.0) (1.24.3)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from requests>=2.20->openai<1->pyautogen<0.2.0) (3.3.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from requests>=2.20->openai<1->pyautogen<0.2.0) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from requests>=2.20->openai<1->pyautogen<0.2.0) (1.26.17)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from requests>=2.20->openai<1->pyautogen<0.2.0) (2023.7.22)\n", - "Requirement already satisfied: attrs>=17.3.0 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from aiohttp->openai<1->pyautogen<0.2.0) (23.1.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from aiohttp->openai<1->pyautogen<0.2.0) (6.0.4)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from aiohttp->openai<1->pyautogen<0.2.0) (4.0.3)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from aiohttp->openai<1->pyautogen<0.2.0) (1.9.2)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from aiohttp->openai<1->pyautogen<0.2.0) (1.4.0)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from aiohttp->openai<1->pyautogen<0.2.0) (1.3.1)\n", - "Installing collected packages: python-dotenv, pyautogen\n", - "Successfully installed pyautogen-0.1.14 python-dotenv-1.0.0\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - }, - { - "data": {}, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Warning: PySpark kernel has been restarted to use updated packages.\n", - "\n" - ] - } - ], "source": [ - "%pip install \"pyautogen<0.2.0\"" + "### Set up config_list and llm_config" ] }, { "cell_type": "code", "execution_count": null, - "id": "448f26d0-d1f7-4b2a-8dab-035ff2abbedc", + "id": "4", "metadata": { "jupyter": { "outputs_hidden": false, @@ -156,19 +90,22 @@ { "data": { "application/vnd.livy.statement-meta+json": { - "execution_finish_time": "2023-12-11T05:18:00.2585542Z", - "execution_start_time": "2023-12-11T05:17:59.8269627Z", + "execution_finish_time": "2024-06-07T15:24:20.5752101Z", + "execution_start_time": "2024-06-07T15:24:03.7868628Z", "livy_statement_state": "available", - "parent_msg_id": "0c686a15-8b9c-4479-ac26-2cca81b21cf3", - "queued_time": "2023-12-11T05:17:59.3165049Z", - "session_id": "865e72a4-f70b-46cf-8421-9f25745bd9bd", + "parent_msg_id": "bf8925aa-a2a2-4686-9388-3ec1eb12c5d7", + "queued_time": "2024-06-07T15:23:08.5880731Z", + "session_id": "1d5e9aec-2019-408c-a19a-5db9fb175ae2", "session_start_time": null, "spark_pool": null, "state": "finished", - "statement_id": 27 + "statement_id": 9, + "statement_ids": [ + 9 + ] }, "text/plain": [ - "StatementMeta(, 865e72a4-f70b-46cf-8421-9f25745bd9bd, 27, Finished, Available)" + "StatementMeta(, 1d5e9aec-2019-408c-a19a-5db9fb175ae2, 9, Finished, Available)" ] }, "metadata": {}, @@ -176,14 +113,24 @@ } ], "source": [ - "from synapse.ml.mlflow import get_mlflow_env_config\n", + "import types\n", + "\n", + "import httpx\n", + "from synapse.ml.fabric.credentials import get_openai_httpx_sync_client\n", "\n", "import autogen\n", "\n", - "# Choose different models\n", + "http_client = get_openai_httpx_sync_client()\n", + "http_client.__deepcopy__ = types.MethodType(\n", + " lambda self, memo: self, http_client\n", + ") # https://microsoft.github.io/autogen/docs/topics/llm_configuration#adding-http-client-in-llm_config-for-proxy\n", + "\n", "config_list = [\n", " {\n", - " \"model\": \"gpt-4-turbo\",\n", + " \"model\": \"gpt-4o\",\n", + " \"http_client\": http_client,\n", + " \"api_type\": \"azure\",\n", + " \"api_version\": \"2024-02-01\",\n", " },\n", "]\n", "\n", @@ -194,10 +141,25 @@ "}" ] }, + { + "cell_type": "markdown", + "id": "5", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "### Example 1\n", + "How to use `AssistantAgent` and `UserProxyAgent` to write code and execute the code." + ] + }, { "cell_type": "code", "execution_count": null, - "id": "793b6eb1-f8af-4b98-809d-21fd53f7de41", + "id": "6", "metadata": { "jupyter": { "outputs_hidden": false, @@ -213,19 +175,22 @@ { "data": { "application/vnd.livy.statement-meta+json": { - "execution_finish_time": "2023-12-11T05:18:21.8907776Z", - "execution_start_time": "2023-12-11T05:18:01.7118817Z", + "execution_finish_time": "2024-06-07T15:25:04.5390713Z", + "execution_start_time": "2024-06-07T15:24:21.6208975Z", "livy_statement_state": "available", - "parent_msg_id": "a3a03b66-c113-4b91-872f-213880814fbd", - "queued_time": "2023-12-11T05:18:01.293131Z", - "session_id": "865e72a4-f70b-46cf-8421-9f25745bd9bd", + "parent_msg_id": "93157ebd-4f6e-4ad6-b089-5b40edea3787", + "queued_time": "2024-06-07T15:23:08.5886561Z", + "session_id": "1d5e9aec-2019-408c-a19a-5db9fb175ae2", "session_start_time": null, "spark_pool": null, "state": "finished", - "statement_id": 28 + "statement_id": 10, + "statement_ids": [ + 10 + ] }, "text/plain": [ - "StatementMeta(, 865e72a4-f70b-46cf-8421-9f25745bd9bd, 28, Finished, Available)" + "StatementMeta(, 1d5e9aec-2019-408c-a19a-5db9fb175ae2, 10, Finished, Available)" ] }, "metadata": {}, @@ -244,34 +209,46 @@ "--------------------------------------------------------------------------------\n", "\u001b[33massistant\u001b[0m (to user_proxy):\n", "\n", - "To determine who should read the paper titled \"Learning to Prompt for Continual Learning\" available on arXiv, we need to first understand the abstract and the topics covered in the paper. I will fetch the abstract from the provided URL and analyze its content to suggest the target audience.\n", + "To determine who should read the paper titled \"https://arxiv.org/abs/2308.08155\", we need to extract and analyze the abstract and other relevant information from the paper. This will help us understand the content and target audience of the paper.\n", + "\n", + "Let's write a Python script to fetch and print the abstract and other relevant details from the arXiv page.\n", "\n", "```python\n", - "# filename: fetch_arxiv_abstract.py\n", + "# filename: fetch_arxiv_paper_info.py\n", + "\n", "import requests\n", "from bs4 import BeautifulSoup\n", "\n", - "# Function to get the abstract of the paper from arXiv\n", - "def get_arxiv_abstract(url):\n", + "def fetch_arxiv_paper_info(url):\n", " response = requests.get(url)\n", " if response.status_code == 200:\n", " soup = BeautifulSoup(response.content, 'html.parser')\n", - " abstract_text = soup.find('blockquote', class_='abstract').text\n", - " # Clean up the abstract text\n", - " abstract_text = abstract_text.replace('Abstract: ', '').strip()\n", - " return abstract_text\n", + " \n", + " # Extract the title\n", + " title = soup.find('h1', class_='title').text.replace('Title:', '').strip()\n", + " \n", + " # Extract the authors\n", + " authors = soup.find('div', class_='authors').text.replace('Authors:', '').strip()\n", + " \n", + " # Extract the abstract\n", + " abstract = soup.find('blockquote', class_='abstract').text.replace('Abstract:', '').strip()\n", + " \n", + " # Extract the subjects\n", + " subjects = soup.find('span', class_='primary-subject').text.strip()\n", + " \n", + " print(f\"Title: {title}\\n\")\n", + " print(f\"Authors: {authors}\\n\")\n", + " print(f\"Abstract: {abstract}\\n\")\n", + " print(f\"Subjects: {subjects}\\n\")\n", " else:\n", - " return \"Error: Unable to fetch the abstract from arXiv.\"\n", - "\n", - "# URL of the paper\n", - "paper_url = 'https://arxiv.org/abs/2308.08155'\n", + " print(\"Failed to fetch the paper information.\")\n", "\n", - "# Get the abstract of the paper\n", - "abstract = get_arxiv_abstract(paper_url)\n", - "print(abstract)\n", + "# URL of the arXiv paper\n", + "url = \"https://arxiv.org/abs/2308.08155\"\n", + "fetch_arxiv_paper_info(url)\n", "```\n", "\n", - "Please run the above Python script to fetch the abstract of the paper. Once we have the abstract, I will analyze it to suggest the appropriate audience.\n", + "Please save the code in a file named `fetch_arxiv_paper_info.py` and execute it. This script will fetch and print the title, authors, abstract, and subjects of the paper, which will help us determine the target audience.\n", "\n", "--------------------------------------------------------------------------------\n", "\u001b[31m\n", @@ -280,31 +257,41 @@ "\n", "exitcode: 0 (execution succeeded)\n", "Code output: \n", - "Abstract:AutoGen is an open-source framework that allows developers to build LLM applications via multiple agents that can converse with each other to accomplish tasks. AutoGen agents are customizable, conversable, and can operate in various modes that employ combinations of LLMs, human inputs, and tools. Using AutoGen, developers can also flexibly define agent interaction behaviors. Both natural language and computer code can be used to program flexible conversation patterns for different applications. AutoGen serves as a generic infrastructure to build diverse applications of various complexities and LLM capacities. Empirical studies demonstrate the effectiveness of the framework in many example applications, with domains ranging from mathematics, coding, question answering, operations research, online decision-making, entertainment, etc.\n", + "Title: AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\n", "\n", + "Authors: Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Beibin Li, Erkang Zhu, Li Jiang, Xiaoyun Zhang, Shaokun Zhang, Jiale Liu, Ahmed Hassan Awadallah, Ryen W White, Doug Burger, Chi Wang\n", "\n", - "--------------------------------------------------------------------------------\n", - "\u001b[33massistant\u001b[0m (to user_proxy):\n", + "Abstract: AutoGen is an open-source framework that allows developers to build LLM applications via multiple agents that can converse with each other to accomplish tasks. AutoGen agents are customizable, conversable, and can operate in various modes that employ combinations of LLMs, human inputs, and tools. Using AutoGen, developers can also flexibly define agent interaction behaviors. Both natural language and computer code can be used to program flexible conversation patterns for different applications. AutoGen serves as a generic infrastructure to build diverse applications of various complexities and LLM capacities. Empirical studies demonstrate the effectiveness of the framework in many example applications, with domains ranging from mathematics, coding, question answering, operations research, online decision-making, entertainment, etc.\n", "\n", - "Based on the abstract provided, the paper titled \"AutoGen: An Open-Source Framework for Building LLM Applications with Conversable Agents\" seems to be focused on a framework that enables developers to create applications using large language models (LLMs) with agents that can interact through conversation to accomplish tasks.\n", + "Subjects: Artificial Intelligence (cs.AI)\n", "\n", - "The target audience for this paper would likely include:\n", "\n", - "1. **Software Developers and Engineers** who are interested in building applications that leverage large language models and conversational agents.\n", "\n", - "2. **Researchers in Artificial Intelligence and Machine Learning** who are working on natural language processing, conversational AI, and the integration of human inputs with AI agents.\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33massistant\u001b[0m (to user_proxy):\n", + "\n", + "Based on the extracted information, here is a summary of who should read the paper titled \"AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\":\n", "\n", - "3. **Product Managers and Technical Leads** who are looking to understand how conversational AI can be applied to various domains such as mathematics, coding, question answering, operations research, online decision-making, and entertainment.\n", + "### Title:\n", + "**AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation**\n", "\n", - "4. **Educators and Students** in computer science and related fields who are interested in the latest developments in AI frameworks and applications.\n", + "### Authors:\n", + "Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Beibin Li, Erkang Zhu, Li Jiang, Xiaoyun Zhang, Shaokun Zhang, Jiale Liu, Ahmed Hassan Awadallah, Ryen W White, Doug Burger, Chi Wang\n", "\n", - "5. **Innovators and Entrepreneurs** in the tech industry who are exploring new ways to incorporate AI into their products and services.\n", + "### Abstract:\n", + "AutoGen is an open-source framework that allows developers to build LLM (Large Language Model) applications via multiple agents that can converse with each other to accomplish tasks. AutoGen agents are customizable, conversable, and can operate in various modes that employ combinations of LLMs, human inputs, and tools. Using AutoGen, developers can also flexibly define agent interaction behaviors. Both natural language and computer code can be used to program flexible conversation patterns for different applications. AutoGen serves as a generic infrastructure to build diverse applications of various complexities and LLM capacities. Empirical studies demonstrate the effectiveness of the framework in many example applications, with domains ranging from mathematics, coding, question answering, operations research, online decision-making, entertainment, etc.\n", "\n", - "6. **AI Enthusiasts and Hobbyists** who have a keen interest in the practical applications of large language models and conversational interfaces.\n", + "### Subjects:\n", + "**Artificial Intelligence (cs.AI)**\n", "\n", - "The paper would be particularly relevant for those who are looking to understand or utilize the AutoGen framework to build complex applications that require the capabilities of LLMs.\n", + "### Target Audience:\n", + "1. **AI Researchers and Practitioners**: Those who are working in the field of artificial intelligence, especially those focusing on large language models (LLMs) and multi-agent systems.\n", + "2. **Developers and Engineers**: Software developers and engineers interested in building applications using LLMs and multi-agent frameworks.\n", + "3. **Academics and Students**: Academics and students studying AI, machine learning, and related fields who are interested in the latest frameworks and methodologies for building LLM applications.\n", + "4. **Industry Professionals**: Professionals in industries such as technology, operations research, and entertainment who are looking to leverage AI and LLMs for various applications.\n", + "5. **Open-Source Community**: Contributors and users of open-source AI frameworks who are interested in new tools and frameworks for developing AI applications.\n", "\n", - "If you are part of or know someone who belongs to these groups, this paper would be a valuable read.\n", + "This paper is particularly relevant for those interested in the practical applications and infrastructure for building complex AI systems using conversational agents.\n", "\n", "TERMINATE\n", "\n", @@ -335,7 +322,7 @@ ")\n", "\n", "# the assistant receives a message from the user, which contains the task description\n", - "user_proxy.initiate_chat(\n", + "chat_result = user_proxy.initiate_chat(\n", " assistant,\n", " message=\"\"\"\n", "Who should read this paper: https://arxiv.org/abs/2308.08155\n", @@ -343,26 +330,10 @@ ")" ] }, - { - "cell_type": "markdown", - "id": "a958cf54-23e8-46e8-be78-782c1a17bc82", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "## AutoGen version >= 0.2.0\n", - "\n", - "For AutoGen version >= 0.2.0, we need to set up an API endpoint because the version of the openai-python package is different from the pre-configured version." - ] - }, { "cell_type": "code", "execution_count": null, - "id": "83867b85-6fb2-4ca1-8859-206f0b854b24", + "id": "7", "metadata": { "jupyter": { "outputs_hidden": false, @@ -378,114 +349,61 @@ { "data": { "application/vnd.livy.statement-meta+json": { - "execution_finish_time": "2023-12-11T05:23:56.8983159Z", - "execution_start_time": "2023-12-11T05:23:56.8981286Z", + "execution_finish_time": "2024-06-07T15:26:14.0364536Z", + "execution_start_time": "2024-06-07T15:26:13.6931272Z", "livy_statement_state": "available", - "parent_msg_id": "cb272a67-8c4b-4e7f-8dfe-153b85d6b7fd", - "queued_time": "2023-12-11T05:23:43.2251661Z", - "session_id": null, + "parent_msg_id": "50747d08-5234-4212-9d18-ea3133cfb35e", + "queued_time": "2024-06-07T15:26:12.4397897Z", + "session_id": "1d5e9aec-2019-408c-a19a-5db9fb175ae2", "session_start_time": null, "spark_pool": null, "state": "finished", - "statement_id": -1 + "statement_id": 13, + "statement_ids": [ + 13 + ] }, "text/plain": [ - "StatementMeta(, , -1, Finished, Available)" + "StatementMeta(, 1d5e9aec-2019-408c-a19a-5db9fb175ae2, 13, Finished, Available)" ] }, "metadata": {}, "output_type": "display_data" }, - { - "data": {}, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting pyautogen>=0.2.0\n", - " Downloading pyautogen-0.2.2-py3-none-any.whl (124 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.0/124.0 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: diskcache in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from pyautogen>=0.2.0) (5.6.3)\n", - "Requirement already satisfied: flaml in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from pyautogen>=0.2.0) (2.1.1.dev2)\n", - "Collecting openai~=1.3 (from pyautogen>=0.2.0)\n", - " Downloading openai-1.3.8-py3-none-any.whl (221 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m221.5/221.5 kB\u001b[0m \u001b[31m37.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: python-dotenv in /nfs4/pyenv-b962c9b1-be7a-4052-b362-e359a86c2a98/lib/python3.10/site-packages (from pyautogen>=0.2.0) (1.0.0)\n", - "Requirement already satisfied: termcolor in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from pyautogen>=0.2.0) (2.3.0)\n", - "Requirement already satisfied: tiktoken in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from pyautogen>=0.2.0) (0.5.1)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from openai~=1.3->pyautogen>=0.2.0) (3.7.1)\n", - "Collecting distro<2,>=1.7.0 (from openai~=1.3->pyautogen>=0.2.0)\n", - " Downloading distro-1.8.0-py3-none-any.whl (20 kB)\n", - "Collecting httpx<1,>=0.23.0 (from openai~=1.3->pyautogen>=0.2.0)\n", - " Downloading httpx-0.25.2-py3-none-any.whl (74 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.0/75.0 kB\u001b[0m \u001b[31m40.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: pydantic<3,>=1.9.0 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from openai~=1.3->pyautogen>=0.2.0) (1.10.9)\n", - "Requirement already satisfied: sniffio in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from openai~=1.3->pyautogen>=0.2.0) (1.3.0)\n", - "Requirement already satisfied: tqdm>4 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from openai~=1.3->pyautogen>=0.2.0) (4.66.1)\n", - "Requirement already satisfied: typing-extensions<5,>=4.5 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from openai~=1.3->pyautogen>=0.2.0) (4.5.0)\n", - "Requirement already satisfied: NumPy>=1.17.0rc1 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from flaml->pyautogen>=0.2.0) (1.24.3)\n", - "Requirement already satisfied: regex>=2022.1.18 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from tiktoken->pyautogen>=0.2.0) (2023.8.8)\n", - "Requirement already satisfied: requests>=2.26.0 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from tiktoken->pyautogen>=0.2.0) (2.31.0)\n", - "Requirement already satisfied: idna>=2.8 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai~=1.3->pyautogen>=0.2.0) (3.4)\n", - "Requirement already satisfied: exceptiongroup in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai~=1.3->pyautogen>=0.2.0) (1.1.3)\n", - "Requirement already satisfied: certifi in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from httpx<1,>=0.23.0->openai~=1.3->pyautogen>=0.2.0) (2023.7.22)\n", - "Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai~=1.3->pyautogen>=0.2.0)\n", - " Downloading httpcore-1.0.2-py3-none-any.whl (76 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.9/76.9 kB\u001b[0m \u001b[31m39.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: h11<0.15,>=0.13 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai~=1.3->pyautogen>=0.2.0) (0.14.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken->pyautogen>=0.2.0) (3.3.1)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken->pyautogen>=0.2.0) (1.26.17)\n", - "Installing collected packages: httpcore, distro, httpx, openai, pyautogen\n", - " Attempting uninstall: openai\n", - " Found existing installation: openai 0.27.8\n", - " Not uninstalling openai at /home/trusted-service-user/cluster-env/trident_env/lib/python3.10/site-packages, outside environment /nfs4/pyenv-b962c9b1-be7a-4052-b362-e359a86c2a98\n", - " Can't uninstall 'openai'. No files were found to uninstall.\n", - " Attempting uninstall: pyautogen\n", - " Found existing installation: pyautogen 0.1.14\n", - " Uninstalling pyautogen-0.1.14:\n", - " Successfully uninstalled pyautogen-0.1.14\n", - "Successfully installed distro-1.8.0 httpcore-1.0.2 httpx-0.25.2 openai-1.3.8 pyautogen-0.2.2\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - }, - { - "data": {}, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - }, { "name": "stdout", "output_type": "stream", "text": [ - "Warning: PySpark kernel has been restarted to use updated packages.\n", - "\n" + "Cost for the chat:\n", + "{'usage_including_cached_inference': {'total_cost': 0.02107, 'gpt-4o-2024-05-13': {'cost': 0.02107, 'prompt_tokens': 1616, 'completion_tokens': 866, 'total_tokens': 2482}}, 'usage_excluding_cached_inference': {'total_cost': 0.02107, 'gpt-4o-2024-05-13': {'cost': 0.02107, 'prompt_tokens': 1616, 'completion_tokens': 866, 'total_tokens': 2482}}}\n" ] } ], "source": [ - "%pip install \"pyautogen>=0.2.0\"" + "print(f\"Cost for the chat:\\n{chat_result.cost}\")" ] }, { "cell_type": "markdown", - "id": "c485fcab", - "metadata": {}, + "id": "8", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, "source": [ - "## Set your API endpoint" + "### Example 2\n", + "How to use `RetrieveAssistantAgent` and `RetrieveUserProxyAgent` to do Retrieval Augmented Generation (RAG) for QA and Code Generation.\n", + "\n", + "Check out this [blog](https://microsoft.github.io/autogen/blog/2023/10/18/RetrieveChat) for more details." ] }, { "cell_type": "code", "execution_count": null, - "id": "13005ac5-7f2a-4ba6-85b9-d45671093be2", + "id": "9", "metadata": { "jupyter": { "outputs_hidden": false, @@ -501,42 +419,47 @@ { "data": { "application/vnd.livy.statement-meta+json": { - "execution_finish_time": "2023-12-11T05:27:12.0400654Z", - "execution_start_time": "2023-12-11T05:27:10.9380797Z", + "execution_finish_time": "2024-06-07T15:26:26.4217205Z", + "execution_start_time": "2024-06-07T15:26:26.0872609Z", "livy_statement_state": "available", - "parent_msg_id": "8429d912-c8af-41c2-bfde-697adb0bbf46", - "queued_time": "2023-12-11T05:27:10.4608238Z", - "session_id": "865e72a4-f70b-46cf-8421-9f25745bd9bd", + "parent_msg_id": "2d2b3ee3-300e-4959-b68c-c95843c42eb7", + "queued_time": "2024-06-07T15:26:25.1160753Z", + "session_id": "1d5e9aec-2019-408c-a19a-5db9fb175ae2", "session_start_time": null, "spark_pool": null, "state": "finished", - "statement_id": 36 + "statement_id": 14, + "statement_ids": [ + 14 + ] }, "text/plain": [ - "StatementMeta(, 865e72a4-f70b-46cf-8421-9f25745bd9bd, 36, Finished, Available)" + "StatementMeta(, 1d5e9aec-2019-408c-a19a-5db9fb175ae2, 14, Finished, Available)" ] }, "metadata": {}, "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-12-11:05:27:11,251 WARNING [synapse_mlflow_utils.py:244] To save or load Apache Spark model files, please attach a Lakehouse.\n" - ] } ], "source": [ - "mlflow_env_configs = get_mlflow_env_config()\n", - "access_token = mlflow_env_configs.driver_aad_token\n", - "prebuilt_AI_base_url = mlflow_env_configs.workload_endpoint + \"cognitive/openai/\"" + "import tempfile\n", + "\n", + "from autogen.coding import LocalCommandLineCodeExecutor\n", + "\n", + "# Create a temporary directory to store the code files.\n", + "temp_dir = tempfile.TemporaryDirectory()\n", + "\n", + "# Create a local command line code executor.\n", + "code_executor = LocalCommandLineCodeExecutor(\n", + " timeout=40, # Timeout for each code execution in seconds.\n", + " work_dir=temp_dir.name, # Use the temporary directory to store the code files.\n", + ")" ] }, { "cell_type": "code", "execution_count": null, - "id": "1470b833-9cf2-4735-a28d-57d30714f562", + "id": "10", "metadata": { "jupyter": { "outputs_hidden": false, @@ -548,63 +471,124 @@ } } }, - "outputs": [ - { - "data": { - "application/vnd.livy.statement-meta+json": { - "execution_finish_time": "2023-12-11T05:27:12.9516846Z", - "execution_start_time": "2023-12-11T05:27:12.5600767Z", - "livy_statement_state": "available", - "parent_msg_id": "7512dc56-5ad2-46eb-a0f7-3a62d15e7385", - "queued_time": "2023-12-11T05:27:11.574982Z", - "session_id": "865e72a4-f70b-46cf-8421-9f25745bd9bd", - "session_start_time": null, - "spark_pool": null, - "state": "finished", - "statement_id": 37 - }, - "text/plain": [ - "StatementMeta(, 865e72a4-f70b-46cf-8421-9f25745bd9bd, 37, Finished, Available)" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "config_list = [\n", - " {\n", - " \"model\": \"gpt-4-turbo\",\n", - " \"api_key\": access_token,\n", - " \"base_url\": prebuilt_AI_base_url,\n", - " \"api_type\": \"azure\",\n", - " \"api_version\": \"2024-02-15-preview\",\n", + "from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent\n", + "from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent\n", + "\n", + "# 1. create an RetrieveAssistantAgent instance named \"assistant\"\n", + "assistant = RetrieveAssistantAgent(\n", + " name=\"assistant\",\n", + " system_message=\"You are a helpful assistant.\",\n", + " llm_config=llm_config,\n", + ")\n", + "\n", + "# 2. create the RetrieveUserProxyAgent instance named \"ragproxyagent\"\n", + "ragproxyagent = RetrieveUserProxyAgent(\n", + " name=\"ragproxyagent\",\n", + " human_input_mode=\"NEVER\",\n", + " max_consecutive_auto_reply=5,\n", + " retrieve_config={\n", + " \"docs_path\": [\n", + " \"https://learn.microsoft.com/en-us/fabric/get-started/microsoft-fabric-overview\",\n", + " \"https://learn.microsoft.com/en-us/fabric/data-science/tuning-automated-machine-learning-visualizations\",\n", + " ],\n", + " \"chunk_token_size\": 2000,\n", + " \"model\": config_list[0][\"model\"],\n", + " \"vector_db\": \"chroma\", # to use the deprecated `client` parameter, set to None and uncomment the line above\n", + " \"overwrite\": True, # set to True if you want to overwrite an existing collection\n", " },\n", - "]" + " code_execution_config={\"executor\": code_executor}, # Use the local command line code executor.\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "11", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "#### 2.1 let's ask a question \"List all the Components of Microsoft Fabric\".\n", + "\n", + "The answer from **ChatGPT with gpt-4o** at June 7th, 2024 is as below:\n", + "```\n", + "Microsoft Fabric is a comprehensive data platform that integrates various services and tools for data management, analytics, and collaboration. As of the latest information available, Microsoft Fabric includes the following components:\n", + "\n", + "Data Integration:\n", + "\n", + "Azure Data Factory: For creating, scheduling, and orchestrating data workflows.\n", + "Power Query: A data transformation and data preparation tool.\n", + "Data Engineering:\n", + "\n", + "Azure Synapse Analytics: For big data and data warehousing solutions, including Synapse SQL, Spark, and Data Explorer.\n", + "Data Science:\n", + "\n", + "Azure Machine Learning: For building, training, and deploying machine learning models.\n", + "Azure Databricks: For collaborative big data and AI solutions.\n", + "Data Warehousing:\n", + "\n", + "...\n", + "```\n", + "\n", + "While the answer from AutoGen RAG agent with gpt-4o is as below:\n", + "```\n", + "The components of Microsoft Fabric are:\n", + "\n", + "1. Power BI\n", + "2. Data Factory\n", + "3. Data Activator\n", + "4. Industry Solutions\n", + "5. Real-Time Intelligence\n", + "6. Synapse Data Engineering\n", + "7. Synapse Data Science\n", + "8. Synapse Data Warehouse\n", + "\n", + "Sources: [Microsoft Fabric Overview](https://learn.microsoft.com/en-us/fabric/get-started/microsoft-fabric-overview)\n", + "```\n", + "\n", + "AutoGen RAG agent's answer is exactly the right answer per the official documents while ChatGPT made a few mistakes, it even listed Azure Databricks." ] }, { "cell_type": "code", "execution_count": null, - "id": "951c0d05-1d58-4b42-88ea-7303c1da88aa", - "metadata": {}, + "id": "12", + "metadata": { + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, "outputs": [ { "data": { "application/vnd.livy.statement-meta+json": { - "execution_finish_time": "2023-12-11T05:28:09.3148816Z", - "execution_start_time": "2023-12-11T05:27:37.4931459Z", + "execution_finish_time": "2024-06-07T15:27:29.0170714Z", + "execution_start_time": "2024-06-07T15:27:14.1923093Z", "livy_statement_state": "available", - "parent_msg_id": "4c9275dc-25d3-4204-8641-fc8ed22b7d54", - "queued_time": "2023-12-11T05:27:37.0516131Z", - "session_id": "865e72a4-f70b-46cf-8421-9f25745bd9bd", + "parent_msg_id": "47d2a7c5-affb-44c5-9fef-a01d3026c638", + "queued_time": "2024-06-07T15:26:25.4548817Z", + "session_id": "1d5e9aec-2019-408c-a19a-5db9fb175ae2", "session_start_time": null, "spark_pool": null, "state": "finished", - "statement_id": 38 + "statement_id": 16, + "statement_ids": [ + 16 + ] }, "text/plain": [ - "StatementMeta(, 865e72a4-f70b-46cf-8421-9f25745bd9bd, 38, Finished, Available)" + "StatementMeta(, 1d5e9aec-2019-408c-a19a-5db9fb175ae2, 16, Finished, Available)" ] }, "metadata": {}, @@ -614,187 +598,2482 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33muser_proxy\u001b[0m (to assistant):\n", + "Trying to create collection.\n", + "Number of requested results 20 is greater than number of elements in index 2, updating n_results = 2\n", + "VectorDB returns doc_ids: [['f7c9052b', '621d4a0b']]\n", + "\u001b[32mAdding content of doc f7c9052b to context.\u001b[0m\n", + "\u001b[33mragproxyagent\u001b[0m (to assistant):\n", + "\n", + "You're a retrieve augmented chatbot. You answer user's questions based on your own knowledge and the\n", + "context provided by the user. You should follow the following steps to answer a question:\n", + "Step 1, you estimate the user's intent based on the question and context. The intent can be a code generation task or\n", + "a question answering task.\n", + "Step 2, you reply based on the intent.\n", + "If you can't answer the question with or without the current context, you should reply exactly `UPDATE CONTEXT`.\n", + "If user's intent is code generation, you must obey the following rules:\n", + "Rule 1. You MUST NOT install any packages because all the packages needed are already installed.\n", + "Rule 2. You must follow the formats below to write your code:\n", + "```language\n", + "# your code\n", + "```\n", "\n", - "What date is today? Compare the year-to-date gain for META and TESLA.\n", + "If user's intent is question answering, you must give as short an answer as possible.\n", "\n", - "--------------------------------------------------------------------------------\n", - "\u001b[33massistant\u001b[0m (to user_proxy):\n", + "User's question is: List all the Components of Microsoft Fabric\n", "\n", - "To get the current date, we can write a simple Python script to print out today's date using the `datetime` module. Then, to compare the year-to-date (YTD) gain for META (Meta Platforms Inc.) and TESLA (Tesla, Inc.), we need to retrieve the stock prices from the beginning of the current year and the most recent closing price for both companies and calculate the percentage change.\n", + "Context is: # What is Microsoft Fabric - Microsoft Fabric | Microsoft Learn\n", "\n", - "Here's the plan to solve the task step by step:\n", - "1. Write and execute a Python script to get today's date.\n", - "2. Use a Python script to retrieve the opening stock price for both Meta Platforms Inc. (META) and Tesla, Inc. (TSLA) as of the first trading day of the current year.\n", - "3. Retrieve the most recent closing stock price for both companies.\n", - "4. Calculate the percentage change from the opening price to the latest closing price for both stocks.\n", - "5. Compare the YTD gains and display the result.\n", + "What is Microsoft Fabric - Microsoft Fabric | Microsoft Learn\n", "\n", - "First, let's start with step 1 by getting today's date:\n", + "[Skip to main content](#main)\n", "\n", - "```python\n", - "# filename: get_current_date.py\n", - "import datetime\n", + "This browser is no longer supported.\n", "\n", - "def get_current_date():\n", - " # Get today's date\n", - " return datetime.date.today()\n", + "Upgrade to Microsoft Edge to take advantage of the latest features, security updates, and technical support.\n", "\n", - "# Print the current date\n", - "print(f\"Today's date is: {get_current_date()}\")\n", - "```\n", + "[Download Microsoft Edge](https://go.microsoft.com/fwlink/p/?LinkID=2092881 ) \n", + "[More info about Internet Explorer and Microsoft Edge](https://learn.microsoft.com/en-us/lifecycle/faq/internet-explorer-microsoft-edge) \n", "\n", - "Please execute the above script to get today's date. After that, we will proceed to the next steps of retrieving stock prices and comparing YTD gains.\n", + "Table of contents \n", "\n", - "--------------------------------------------------------------------------------\n", - "\u001b[31m\n", - ">>>>>>>> EXECUTING CODE BLOCK 0 (inferred language is python)...\u001b[0m\n", - "\u001b[33muser_proxy\u001b[0m (to assistant):\n", + "Exit focus mode\n", "\n", - "exitcode: 0 (execution succeeded)\n", - "Code output: \n", - "Today's date is: 2023-12-11\n", + "Read in English\n", "\n", + "Save\n", "\n", - "--------------------------------------------------------------------------------\n", - "\u001b[33massistant\u001b[0m (to user_proxy):\n", + "Table of contents\n", "\n", - "It seems there might be a typo in your output since today cannot be December 11, 2023, considering the knowledge cutoff date is in early 2023. However, I will proceed assuming today's date is correctly given as December 11, 2023.\n", + "Read in English\n", "\n", - "To move forward with the next steps, I will utilize Python code to do the following:\n", - "- Fetch the historical stock data for META and TESLA.\n", - "- Extract the relevant opening prices at the start of the current year and the latest available closing prices.\n", - "- Calculate the YTD gains for both stocks.\n", + "Save\n", "\n", - "This will require accessing financial data through an API such as Yahoo Finance. We'll use the `yfinance` library to fetch the stock data. This library must be installed in your Python environment. If it's not already installed, please install it by executing `pip install yfinance` before running the following script.\n", + "Add to Plan\n", "\n", - "Let's fetch the stock data and calculate the YTD gains:\n", + "[Edit](https://github.com/MicrosoftDocs/fabric-docs/blob/main/docs/get-started/microsoft-fabric-overview.md \"Edit This Document\")\n", "\n", - "```python\n", - "# filename: compare_ytd_gains.py\n", - "import yfinance as yf\n", - "from datetime import datetime\n", - "\n", - "# Function to calculate the YTD gain of a stock\n", - "def calculate_ytd_gain(ticker):\n", - " # Get data from the start of the year to the current date\n", - " start_of_year = datetime(datetime.now().year, 1, 1)\n", - " current_date = datetime.now().strftime('%Y-%m-%d')\n", - " data = yf.download(ticker, start=start_of_year.strftime('%Y-%m-%d'), end=current_date)\n", - "\n", - " # Ensure we have data to compute the gain\n", - " if data.empty:\n", - " return None\n", - "\n", - " # Get the first available opening price of the year and the most recent available closing price\n", - " opening_price = data['Open'].iloc[0]\n", - " closing_price = data['Close'].iloc[-1]\n", - "\n", - " # Calculate YTD gain and return it\n", - " ytd_gain = ((closing_price - opening_price) / opening_price) * 100\n", - " return ytd_gain\n", - "\n", - "# Get the YTD gains\n", - "meta_ytd_gain = calculate_ytd_gain('META')\n", - "tesla_ytd_gain = calculate_ytd_gain('TSLA')\n", - "\n", - "# Output the YTD gains\n", - "print(f\"Year-to-Date gain for Meta Platforms Inc. (META): {meta_ytd_gain:.2f}%\")\n", - "print(f\"Year-to-Date gain for Tesla, Inc. (TSLA): {tesla_ytd_gain:.2f}%\")\n", - "\n", - "# Compare the YTD gains\n", - "if meta_ytd_gain is not None and tesla_ytd_gain is not None:\n", - " if meta_ytd_gain > tesla_ytd_gain:\n", - " print(\"META has a higher YTD gain than TESLA.\")\n", - " elif meta_ytd_gain < tesla_ytd_gain:\n", - " print(\"TESLA has a higher YTD gain than META.\")\n", - " else:\n", - " print(\"META and TESLA have the same YTD gain.\")\n", - "else:\n", - " print(\"Unable to calculate YTD gains, possibly due to missing data.\")\n", - "```\n", + "---\n", "\n", - "Please execute the above code to compare the Year-to-Date gains of META and TESLA. Remember, the actual output will depend on the stock prices on the dates fetched. If there are difficulties or errors encountered when executing the code, please inform me so that we can troubleshoot accordingly.\n", + "#### Share via\n", "\n", - "--------------------------------------------------------------------------------\n", - "\u001b[31m\n", - ">>>>>>>> EXECUTING CODE BLOCK 0 (inferred language is python)...\u001b[0m\n", - "\u001b[33muser_proxy\u001b[0m (to assistant):\n", + "Facebook\n", + "x.com\n", + "LinkedIn\n", + "Email\n", "\n", - "exitcode: 0 (execution succeeded)\n", - "Code output: \n", + "---\n", "\n", - "[*********************100%%**********************] 1 of 1 completed\n", + "Print\n", "\n", - "[*********************100%%**********************] 1 of 1 completed\n", - "Year-to-Date gain for Meta Platforms Inc. (META): 170.92%\n", - "Year-to-Date gain for Tesla, Inc. (TSLA): 105.82%\n", - "META has a higher YTD gain than TESLA.\n", + "Table of contents\n", "\n", + "What is Microsoft Fabric?\n", + "=========================\n", "\n", - "--------------------------------------------------------------------------------\n", - "\u001b[33massistant\u001b[0m (to user_proxy):\n", + "* Article\n", + "* 05/21/2024\n", + "* 15 contributors\n", "\n", - "The executed code has successfully calculated the Year-to-Date (YTD) gains for Meta Platforms Inc. (META) and Tesla, Inc. (TSLA). According to the output you provided:\n", + "Feedback\n", "\n", - "- META has a YTD gain of 170.92%.\n", - "- TESLA has a YTD gain of 105.82%.\n", + "In this article\n", + "---------------\n", "\n", - "Based on these results, Meta Platforms Inc. (META) has a higher YTD gain compared to Tesla, Inc. (TSLA) as of the current date.\n", + "Microsoft Fabric is an end-to-end analytics and data platform designed for enterprises that require a unified solution. It encompasses data movement, processing, ingestion, transformation, real-time event routing, and report building. It offers a comprehensive suite of services including Data Engineering, Data Factory, Data Science, Real-Time Analytics, Data Warehouse, and Databases.\n", "\n", - "Please let me know if you need assistance with any other queries.\n", + "With Fabric, you don't need to assemble different services from multiple vendors. Instead, it offers a seamlessly integrated, user-friendly platform that simplifies your analytics requirements. Operating on a Software as a Service (SaaS) model, Fabric brings simplicity and integration to your solutions.\n", "\n", - "TERMINATE\n", + "Microsoft Fabric integrates separate components into a cohesive stack. Instead of relying on different databases or data warehouses, you can centralize data storage with OneLake. AI capabilities are seamlessly embedded within Fabric, eliminating the need for manual integration. With Fabric, you can easily transition your raw data into actionable insights for business users.\n", "\n", - "--------------------------------------------------------------------------------\n" - ] - } - ], - "source": [ - "# create an AssistantAgent named \"assistant\"\n", - "assistant = autogen.AssistantAgent(\n", - " name=\"assistant\",\n", - " llm_config={\n", - " # \"cache_seed\": 42, # seed for caching and reproducibility\n", - " \"config_list\": config_list, # a list of OpenAI API configurations\n", - " # \"temperature\": 0, # temperature for sampling\n", - " }, # configuration for autogen's enhanced inference API which is compatible with OpenAI API\n", - ")\n", - "# create a UserProxyAgent instance named \"user_proxy\"\n", - "user_proxy = autogen.UserProxyAgent(\n", - " name=\"user_proxy\",\n", - " human_input_mode=\"NEVER\",\n", - " max_consecutive_auto_reply=10,\n", - " is_termination_msg=lambda x: x.get(\"content\", \"\").rstrip().endswith(\"TERMINATE\"),\n", - " code_execution_config={\n", - " \"work_dir\": \"coding\",\n", - " \"use_docker\": False, # Please set use_docker=True if docker is available to run the generated code. Using docker is safer than running the generated code directly.\n", - " },\n", - ")\n", - "# the assistant receives a message from the user_proxy, which contains the task description\n", - "user_proxy.initiate_chat(\n", - " assistant,\n", - " message=\"\"\"What date is today? Compare the year-to-date gain for META and TESLA.\"\"\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1006fec8-87c6-43cd-a857-4ecd37fbfa86", - "metadata": { - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [] + "Unification with SaaS foundation\n", + "--------------------------------\n", + "\n", + "Microsoft Fabric is built on a foundation of Software as a Service (SaaS). It combines both new and existing components from Power BI, Azure Synapse Analytics, Azure Data Factory, and more services into a unified environment. These components are then tailored into customized user experiences.\n", + "\n", + "[![Diagram of the software as a service foundation beneath the different experiences of Fabric.](media/microsoft-fabric-overview/fabric-architecture.png)](media/microsoft-fabric-overview/fabric-architecture.png#lightbox)\n", + "\n", + "Fabric integrates workloads such as Data Engineering, Data Factory, Data Science, Data Warehouse, Real-Time Intelligence, Industry solutions, and Power BI into a shared SaaS foundation. Each of these experiences is tailored for distinct user roles like data engineers, scientists, or warehousing professionals, and they serve a specific task. The entire Fabric stack has AI integration and it accelerates the data journey. These workloads work together seemlessly and provide the following advantages:\n", + "\n", + "* Access to an extensive range of deeply integrated analytics in the industry.\n", + "* Shared experiences across experiences that are familiar and easy to learn.\n", + "* Easy access to, and readily reuse all assets.\n", + "* Unified data lake storage that preserves data in its original location while using your preferred analytics tools.\n", + "* Centralized administration and governance across all experiences.\n", + "\n", + "Fabric seamlessly integrates data and services, enabling unified management, governance, and discovery. It ensures security for items, data, and row-level access. You can centrally configure core enterprise capabilities. Permissions are automatically applied across all the underlying services. Additionally, data sensitivity labels inherit automatically across the items in the suite. Governance is powered by Purview which is built into Fabric.\n", + "\n", + "Fabric allows creators to concentrate on producing their best work, freeing them from the need to integrate, manage, or even understand the underlying infrastructure.\n", + "\n", + "Components of Microsoft Fabric\n", + "------------------------------\n", + "\n", + "Fabric offers a comprehensive set of analytics experiences designed to work together seamlessly. The platform tailors each of these experiences to a specific persona and a specific task:\n", + "\n", + "![Screenshot of the Fabric menu of experiences.](media/microsoft-fabric-overview/workload-menu.png)\n", + "\n", + "* **Power BI** - Power BI lets you easily connect to your data sources, visualize and discover what's important, and share that with anyone or everyone you want. This integrated experience allows business owners to access all data in Fabric quickly and intuitively and to make better decisions with data. For more information, see [What is Power BI?](/en-us/power-bi/fundamentals/power-bi-overview)\n", + "* **Data Factory** - Data Factory provides a modern data integration experience to ingest, prepare, and transform data from a rich set of data sources. It incorporates the simplicity of Power Query, and you can use more than 200 native connectors to connect to data sources on-premises and in the cloud. For more information, see [What is Data Factory in Microsoft Fabric?](../data-factory/data-factory-overview)\n", + "* **Data Activator** - Data Activator is a no-code experience in Fabric that allows you to specify actions, such as email notifications and Power Automate workflows, to launch when Data Activator detects specific patterns or conditions in your changing data. It monitors data in Power BI reports and eventstreams; when the data hits certain thresholds or matches other patterns, it automatically takes the appropriate action. For more information, see [What is Data Activator?](../data-activator/data-activator-introduction)\n", + "* **Industry Solutions** - Fabric provides industry-specific data solutions that address unique industry needs and challenges, and include data management, analytics, and decision-making. For more information, see [Industry Solutions in Microsoft Fabric](/en-us/industry/industry-data-solutions-fabric).\n", + "* **Real-Time Intelligence** - Real-time Intelligence is an end-to-end solution for event-driven scenarios, streaming data, and data logs. It enables the extraction of insights, visualization, and action on data in motion by handling data ingestion, transformation, storage, analytics, visualization, tracking, AI, and real-time actions. The [Real-Time hub](#real-time-hub---the-unification-of-data-streams) in Real-Time Intelligence provides a wide variety of no-code connectors, converging into a catalog of organizational data that is protected, governed, and integrated across Fabric. For more information, see [What is Real-Time Intelligence in Fabric?](../real-time-intelligence/overview).\n", + "* **Synapse Data Engineering** - Synapse Data Engineering provides a Spark platform with great authoring experiences. It enables you to create, manage, and optimize infrastructures for collecting, storing, processing, and analyzing vast data volumes. Fabric Spark's integration with Data Factory allows you to schedule and orchestrate notebooks and Spark jobs. For more information, see [What is Data engineering in Microsoft Fabric?](../data-engineering/data-engineering-overview)\n", + "* **Synapse Data Science** - Synapse Data Science enables you to build, deploy, and operationalize machine learning models from Fabric. It integrates with Azure Machine Learning to provide built-in experiment tracking and model registry. Data scientists can enrich organizational data with predictions and business analysts can integrate those predictions into their BI reports, allowing a shift from descriptive to predictive insights. For more information, see [What is Data science in Microsoft Fabric?](../data-science/data-science-overview)\n", + "* **Synapse Data Warehouse** - Synapse Data Warehouse provides industry leading SQL performance and scale. It separates compute from storage, enabling independent scaling of both components. Additionally, it natively stores data in the open Delta Lake format. For more information, see [What is data warehousing in Microsoft Fabric?](../data-warehouse/data-warehousing)\n", + "\n", + "Microsoft Fabric enables organizations and individuals to turn large and complex data repositories into actionable workloads and analytics, and is an implementation of data mesh architecture. For more information, see [What is a data mesh?](/en-us/azure/cloud-adoption-framework/scenarios/cloud-scale-analytics/architectures/what-is-data-mesh)\n", + "\n", + "OneLake: The unification of lakehouses\n", + "--------------------------------------\n", + "\n", + "The Microsoft Fabric platform unifies the OneLake and lakehouse architecture across an enterprise.\n", + "\n", + "### OneLake\n", + "\n", + "A data lake is the foundation on which all the Fabric workloads are built. Microsoft Fabric Lake is also known as [OneLake](../onelake/onelake-overview). OneLake is built into the Fabric platform and provides a unified location to store all organizational data where the workloads operate.\n", + "\n", + "OneLake is built on ADLS (Azure Data Lake Storage) Gen2. It provides a single SaaS experience and a tenant-wide store for data that serves both professional and citizen developers. OneLake simplifies Fabric experiences by eliminating the need for you to understand infrastructure concepts such as resource groups, RBAC (Role-Based Access Control), Azure Resource Manager, redundancy, or regions. You don't need an Azure account to use Fabric.\n", + "\n", + "OneLake eliminates data silos, which individual developers often create when they provision and configure their own isolated storage accounts. Instead, OneLake provides a single, unified storage system for all developers. It ensures easy data discovery, sharing, and uniform enforcement of policy and security settings. For more information, see [What is OneLake?](../onelake/onelake-overview)\n", + "\n", + "### OneLake and lakehouse data hierarchy\n", + "\n", + "OneLake is hierarchical in nature to simplify management across your organization. Microsoft Fabric includes OneLake and there's no requirement for any up-front provisioning. There's only one OneLake per tenant and it provides a single-pane-of-glass file-system namespace that spans across users, regions, and clouds. OneLake organizes data into manageable containers for easy handling.\n", + "\n", + "The tenant maps to the root of OneLake and is at the top level of the hierarchy. You can create any number of workspaces, which you can think of as folders, within a tenant.\n", + "\n", + "The following image shows how Fabric stores data in various items within OneLake. As shown, you can create multiple workspaces within a tenant, and create multiple lakehouses within each workspace. A lakehouse is a collection of files, folders, and tables that represents a database over a data lake. To learn more, see [What is a lakehouse?](../data-engineering/lakehouse-overview).\n", + "\n", + "![Diagram of the hierarchy of items like lakehouses and semantic models within a workspace within a tenant.](media/microsoft-fabric-overview/hierarchy-within-tenant.png)\n", + "\n", + "Every developer and business unit in the tenant can easily create their own workspaces in OneLake. They can ingest data into their own lakehouses, then start processing, analyzing, and collaborating on the data, just like OneDrive in Microsoft Office.\n", + "\n", + "All the Microsoft Fabric compute experiences are prewired to OneLake, just like the Office applications are prewired to use the organizational OneDrive. The experiences such as Data Engineering, Data Warehouse, Data Factory, Power BI, and Real-Time Intelligence use OneLake as their native store. They don't need any extra configuration.\n", + "\n", + "[![Diagram of different Fabric experiences all accessing the same OneLake data storage.](media/microsoft-fabric-overview/onelake-architecture.png)](media/microsoft-fabric-overview/onelake-architecture.png#lightbox)\n", + "\n", + "OneLake allows instant mounting of your existing Platform as a Service (PaaS) storage accounts into OneLake with the [Shortcut](../onelake/onelake-shortcuts) feature. You don't need to migrate or move any of your existing data. Using shortcuts, you can access the data stored in your Azure Data Lake Storage.\n", + "\n", + "Shortcuts also allow you to easily share data between users and applications without moving or duplicating information. You can create shortcuts to other storage systems, allowing you to compose and analyze data across clouds with transparent, intelligent caching that reduces egress costs and brings data closer to compute.\n", + "\n", + "Real-Time hub - the unification of data streams\n", + "-----------------------------------------------\n", + "\n", + "The Real-Time hub is a foundational location for data in motion.\n", + "\n", + "The Real-Time hub provides a unified SaaS experience and tenant-wide logical place for all data-in-motion. The Real-Time hub lists all data in motion from all sources that customers can discover, ingest, manage, and consume and react upon, and contains both [streams](../real-time-intelligence/event-streams/overview) and [KQL database](../real-time-intelligence/create-database) tables. Streams includes [**Data streams**](../real-time-intelligence/event-streams/create-manage-an-eventstream), **Microsoft sources** (for example, [Azure Event Hubs](../real-time-hub/add-source-azure-event-hubs), [Azure IoT Hub](../real-time-hub/add-source-azure-iot-hub), [Azure SQL DB Change Data Capture (CDC)](../real-time-hub/add-source-azure-sql-database-cdc), [Azure Cosmos DB CDC](../real-time-hub/add-source-azure-cosmos-db-cdc), and [PostgreSQL DB CDC](../real-time-hub/add-source-postgresql-database-cdc)), and [**Fabric events**](../real-time-intelligence/event-streams/add-source-fabric-workspace) (Fabric system events and external system events brought in from Azure, Microsoft 365, or other clouds).\n", + "\n", + "The Real-Time hub enables users to easily discover, ingest, manage, and consume data-in-motion from a wide variety of source so that they can collaborate and develop streaming applications within one place. For more information, see [What is the Real-Time hub?](../real-time-hub/real-time-hub-overview)\n", + "\n", + "Fabric solutions for ISVs\n", + "-------------------------\n", + "\n", + "If you're an Independent Software Vendors (ISVs) looking to integrate your solutions with Microsoft Fabric, you can use one of the following paths based on your desired level of integration:\n", + "\n", + "* **Interop** - Integrate your solution with the OneLake Foundation and establish basic connections and interoperability with Fabric.\n", + "* **Develop on Fabric** - Build your solution on top of the Fabric platform or seamlessly embed Fabric's functionalities into your existing applications. You can easily use Fabric capabilities with this option.\n", + "* **Build a Fabric workload** - Create customized workloads and experiences in Fabric tailoring your offerings to maximize their impact within the Fabric ecosystem.\n", + "\n", + "For more information, see the [Fabric ISV partner ecosystem](../cicd/partners/partner-integration).\n", + "\n", + "Related content\n", + "---------------\n", + "\n", + "* [Microsoft Fabric terminology](fabric-terminology)\n", + "* [Create a workspace](create-workspaces)\n", + "* [Navigate to your items from Microsoft Fabric Home page](fabric-home)\n", + "* [End-to-end tutorials in Microsoft Fabric](end-to-end-tutorials)\n", + "\n", + "---\n", + "\n", + "Feedback\n", + "--------\n", + "\n", + "Was this page helpful?\n", + "\n", + "Yes\n", + "\n", + "No\n", + "\n", + "[Provide product feedback](https://ideas.fabric.microsoft.com/)\n", + "|\n", + "\n", + "[Ask the community](https://community.fabric.microsoft.com/powerbi)\n", + "\n", + "Feedback\n", + "--------\n", + "\n", + "Coming soon: Throughout 2024 we will be phasing out GitHub Issues as the feedback mechanism for content and replacing it with a new feedback system. For more information see: . \n", + "\n", + "Submit and view feedback for\n", + "\n", + "[This product](https://ideas.fabric.microsoft.com/)\n", + "This page\n", + "\n", + "[View all page feedback](https://github.com//issues)\n", + "\n", + "---\n", + "\n", + "Additional resources\n", + "--------------------\n", + "\n", + "[California Consumer Privacy Act (CCPA) Opt-Out Icon\n", + "\n", + "Your Privacy Choices](https://aka.ms/yourcaliforniaprivacychoices)\n", + "\n", + "Theme\n", + "\n", + "* Light\n", + "* Dark\n", + "* High contrast\n", + "\n", + "* \n", + "* [Previous Versions](/en-us/previous-versions/)\n", + "* [Blog](https://techcommunity.microsoft.com/t5/microsoft-learn-blog/bg-p/MicrosoftLearnBlog)\n", + "* [Contribute](/en-us/contribute/)\n", + "* [Privacy](https://go.microsoft.com/fwlink/?LinkId=521839)\n", + "* [Terms of Use](/en-us/legal/termsofuse)\n", + "* [Trademarks](https://www.microsoft.com/legal/intellectualproperty/Trademarks/)\n", + "* © Microsoft 2024\n", + "\n", + "Additional resources\n", + "--------------------\n", + "\n", + "### In this article\n", + "\n", + "[California Consumer Privacy Act (CCPA) Opt-Out Icon\n", + "\n", + "Your Privacy Choices](https://aka.ms/yourcaliforniaprivacychoices)\n", + "\n", + "Theme\n", + "\n", + "* Light\n", + "* Dark\n", + "* High contrast\n", + "\n", + "* \n", + "* [Previous Versions](/en-us/previous-versions/)\n", + "* [Blog](https://techcommunity.microsoft.com/t5/microsoft-learn-blog/bg-p/MicrosoftLearnBlog)\n", + "* [Contribute](/en-us/contribute/)\n", + "* [Privacy](https://go.microsoft.com/fwlink/?LinkId=521839)\n", + "* [Terms of Use](/en-us/legal/termsofuse)\n", + "* [Trademarks](https://www.microsoft.com/legal/intellectualproperty/Trademarks/)\n", + "* © Microsoft 2024\n", + "\n", + "\n", + "The source of the context is: ['https://learn.microsoft.com/en-us/fabric/get-started/microsoft-fabric-overview']\n", + "\n", + "If you can answer the question, in the end of your answer, add the source of the context in the format of `Sources: source1, source2, ...`.\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33massistant\u001b[0m (to ragproxyagent):\n", + "\n", + "The components of Microsoft Fabric are:\n", + "\n", + "1. Power BI\n", + "2. Data Factory\n", + "3. Data Activator\n", + "4. Industry Solutions\n", + "5. Real-Time Intelligence\n", + "6. Synapse Data Engineering\n", + "7. Synapse Data Science\n", + "8. Synapse Data Warehouse\n", + "\n", + "Sources: https://learn.microsoft.com/en-us/fabric/get-started/microsoft-fabric-overview\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-06-07 15:27:15,139 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - Found 2 chunks.\u001b[0m\n", + "2024-06-07 15:27:15,142 - autogen.agentchat.contrib.vectordb.chromadb - INFO - No content embedding is provided. Will use the VectorDB's embedding function to generate the content embedding.\u001b[0m\n" + ] + } + ], + "source": [ + "assistant.reset()\n", + "problem = \"List all the Components of Microsoft Fabric\"\n", + "chat_result = ragproxyagent.initiate_chat(assistant, message=ragproxyagent.message_generator, problem=problem)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": { + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [ + { + "data": { + "application/vnd.livy.statement-meta+json": { + "execution_finish_time": "2024-06-07T15:27:30.3621271Z", + "execution_start_time": "2024-06-07T15:27:30.0131748Z", + "livy_statement_state": "available", + "parent_msg_id": "d9d3c442-0b5b-4eee-a34d-187119f9b420", + "queued_time": "2024-06-07T15:26:25.6902567Z", + "session_id": "1d5e9aec-2019-408c-a19a-5db9fb175ae2", + "session_start_time": null, + "spark_pool": null, + "state": "finished", + "statement_id": 17, + "statement_ids": [ + 17 + ] + }, + "text/plain": [ + "StatementMeta(, 1d5e9aec-2019-408c-a19a-5db9fb175ae2, 17, Finished, Available)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cost for the chat:\n", + "{'usage_including_cached_inference': {'total_cost': 0.019565000000000003, 'gpt-4o-2024-05-13': {'cost': 0.019565000000000003, 'prompt_tokens': 3688, 'completion_tokens': 75, 'total_tokens': 3763}}, 'usage_excluding_cached_inference': {'total_cost': 0.019565000000000003, 'gpt-4o-2024-05-13': {'cost': 0.019565000000000003, 'prompt_tokens': 3688, 'completion_tokens': 75, 'total_tokens': 3763}}}\n" + ] + } + ], + "source": [ + "print(f\"Cost for the chat:\\n{chat_result.cost}\")" + ] + }, + { + "cell_type": "markdown", + "id": "14", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "#### 2.2 let's ask it to generate AutoML code for us\n", + "\n", + "The question is \"Train a regression model, set time budget to 12s, plot the time line plot after training.\".\n", + "\n", + "ChatGPT's answer is as below:\n", + "\n", + "[It showed a figure]\n", + "\n", + "The timeline plot above shows the elapsed time during the training of a linear regression model. The red dashed line indicates the 12-second time budget. The model was trained iteratively, and the plot demonstrates that the training process was monitored to ensure it stayed within the specified time budget.\n", + "```\n", + "import time\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.datasets import make_regression\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LinearRegression\n", + "\n", + "# Create a synthetic regression dataset\n", + "X, y = make_regression(n_samples=1000, n_features=20, noise=0.1)\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "# Initialize the model\n", + "model = LinearRegression()\n", + "\n", + "# Record the start time\n", + "start_time = time.time()\n", + "\n", + "# Train the model and record intermediate times\n", + "times = []\n", + "time_budget = 12 # in seconds\n", + "\n", + "for _ in range(100):\n", + " model.fit(X_train, y_train)\n", + " current_time = time.time()\n", + " elapsed_time = current_time - start_time\n", + " times.append(elapsed_time)\n", + " if elapsed_time > time_budget:\n", + " break\n", + "\n", + "# Plot the timeline\n", + "plt.figure(figsize=(10, 5))\n", + "plt.plot(times, label='Training time')\n", + "plt.axhline(y=time_budget, color='r', linestyle='--', label='Time Budget (12s)')\n", + "plt.xlabel('Iteration')\n", + "plt.ylabel('Elapsed Time (s)')\n", + "plt.title('Training Time Line Plot')\n", + "plt.legend()\n", + "plt.grid(True)\n", + "plt.show()\n", + "```\n", + "\n", + "It's not what I need, as ChatGPT has no context of the [AutoML](https://learn.microsoft.com/en-us/fabric/data-science/tuning-automated-machine-learning-visualizations) solution in Fabric Data Science.\n", + "\n", + "AutoGen RAG agent's answer is much better and ready for deployment. It retrieved the document related to the question and generated code based on the document. It automatically ran the code, fixed the errors in the code based on the output, and finally it got the correct code." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15", + "metadata": { + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [ + { + "data": { + "application/vnd.livy.statement-meta+json": { + "execution_finish_time": "2024-06-07T15:28:21.4439921Z", + "execution_start_time": "2024-06-07T15:27:31.3321982Z", + "livy_statement_state": "available", + "parent_msg_id": "19420cb8-2f86-495b-8f20-5349cb41d940", + "queued_time": "2024-06-07T15:26:25.8861394Z", + "session_id": "1d5e9aec-2019-408c-a19a-5db9fb175ae2", + "session_start_time": null, + "spark_pool": null, + "state": "finished", + "statement_id": 18, + "statement_ids": [ + 18 + ] + }, + "text/plain": [ + "StatementMeta(, 1d5e9aec-2019-408c-a19a-5db9fb175ae2, 18, Finished, Available)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of requested results 20 is greater than number of elements in index 2, updating n_results = 2\n", + "VectorDB returns doc_ids: [['621d4a0b', 'f7c9052b']]\n", + "\u001b[32mAdding content of doc 621d4a0b to context.\u001b[0m\n", + "\u001b[33mragproxyagent\u001b[0m (to assistant):\n", + "\n", + "You're a retrieve augmented chatbot. You answer user's questions based on your own knowledge and the\n", + "context provided by the user. You should follow the following steps to answer a question:\n", + "Step 1, you estimate the user's intent based on the question and context. The intent can be a code generation task or\n", + "a question answering task.\n", + "Step 2, you reply based on the intent.\n", + "If you can't answer the question with or without the current context, you should reply exactly `UPDATE CONTEXT`.\n", + "If user's intent is code generation, you must obey the following rules:\n", + "Rule 1. You MUST NOT install any packages because all the packages needed are already installed.\n", + "Rule 2. You must follow the formats below to write your code:\n", + "```language\n", + "# your code\n", + "```\n", + "\n", + "If user's intent is question answering, you must give as short an answer as possible.\n", + "\n", + "User's question is: Train a regression model, set time budget to 12s, plot the time line plot after training.\n", + "\n", + "Context is: # Visualize tuning and AutoML trials - Microsoft Fabric | Microsoft Learn\n", + "\n", + "Visualize tuning and AutoML trials - Microsoft Fabric | Microsoft Learn\n", + "\n", + "[Skip to main content](#main)\n", + "\n", + "This browser is no longer supported.\n", + "\n", + "Upgrade to Microsoft Edge to take advantage of the latest features, security updates, and technical support.\n", + "\n", + "[Download Microsoft Edge](https://go.microsoft.com/fwlink/p/?LinkID=2092881 ) \n", + "[More info about Internet Explorer and Microsoft Edge](https://learn.microsoft.com/en-us/lifecycle/faq/internet-explorer-microsoft-edge) \n", + "\n", + "Table of contents \n", + "\n", + "Exit focus mode\n", + "\n", + "Read in English\n", + "\n", + "Save\n", + "\n", + "Table of contents\n", + "\n", + "Read in English\n", + "\n", + "Save\n", + "\n", + "Add to Plan\n", + "\n", + "[Edit](https://github.com/MicrosoftDocs/fabric-docs/blob/main/docs/data-science/tuning-automated-machine-learning-visualizations.md \"Edit This Document\")\n", + "\n", + "---\n", + "\n", + "#### Share via\n", + "\n", + "Facebook\n", + "x.com\n", + "LinkedIn\n", + "Email\n", + "\n", + "---\n", + "\n", + "Print\n", + "\n", + "Table of contents\n", + "\n", + "Training visualizations (preview)\n", + "=================================\n", + "\n", + "* Article\n", + "* 03/26/2024\n", + "* 4 contributors\n", + "\n", + "Feedback\n", + "\n", + "In this article\n", + "---------------\n", + "\n", + "A hyperparameter trial or AutoML trial searches for the optimal parameters for a machine learning model. Each trial consists of multiple runs, where each run evaluates a specific parameter combination. Users can monitor these runs using ML experiment items in Fabric.\n", + "\n", + "The `flaml.visualization` module offers functions to plot and compare the runs in FLAML. Users can use Plotly to interact with their AutoML experiment plots. To use these functions, users need to input their optimized `flaml.AutoML` or `flaml.tune.tune.ExperimentAnalysis` object.\n", + "\n", + "This article teaches you how to use the `flaml.visualization` module to analyze and explore your AutoML trial results. You can follow the same steps for your hyperparameter trial as well.\n", + "\n", + "Important\n", + "\n", + "This feature is in [preview](../get-started/preview).\n", + "\n", + "Create an AutoML trial\n", + "----------------------\n", + "\n", + "AutoML offers a suite of automated processes that can identify the best machine learning pipeline for your dataset, making the entire modeling process more straightforward and often more accurate. In essence, it saves you the trouble of hand-tuning different models and hyperparameters.\n", + "\n", + "In the code cell below, we will:\n", + "\n", + "1. Load the Iris dataset.\n", + "2. Split the data into training and test sets.\n", + "3. Initiate an AutoML trial to fit our training data.\n", + "4. Explore the results of our AutoML trial with the visualizations from `flaml.visualization`.\n", + "\n", + "```\n", + "from sklearn.datasets import load_iris\n", + "from sklearn.model_selection import train_test_split\n", + "from flaml import AutoML\n", + "\n", + "# Load the Iris data and split it into train and test sets\n", + "x, y = load_iris(return_X_y=True, as_frame=True)\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=7654321)\n", + "\n", + "# Create an AutoML instance and set the parameters\n", + "automl = AutoML()\n", + "automl_settings = {\n", + " \"time_budget\": 10, # Time limit in seconds\n", + " \"task\": \"classification\", # Type of machine learning task\n", + " \"log_file_name\": \"aml_iris.log\", # Name of the log file\n", + " \"metric\": \"accuracy\", # Evaluation metric\n", + " \"log_type\": \"all\", # Level of logging\n", + "}\n", + "# Fit the AutoML instance on the training data\n", + "automl.fit(X_train=x_train, y_train=y_train, **automl_settings)\n", + "\n", + "```\n", + "\n", + "Visualize the experiment results\n", + "--------------------------------\n", + "\n", + "Once you run an AutoML trial, you need to visualize the outcomes to analyze how well the models performed and how they behaved. In this part of our documentation, we show you how to use the built-in utilities in the FLAML library for this purpose.\n", + "\n", + "### Import visualization module\n", + "\n", + "To access these visualization utilities, we run the following import command:\n", + "\n", + "```\n", + "import flaml.visualization as fviz\n", + "\n", + "```\n", + "\n", + "### Optimization history\n", + "\n", + "An optimization history plot typically has the number of trials/iterations on the x-axis and a performance metric (like accuracy, RMSE, etc.) on the y-axis. As the number of trials increases, you would see a line or scatter plot indicating the performance of each trial.\n", + "\n", + "```\n", + "fig = fviz.plot_optimization_history(automl)\n", + "# or\n", + "fig = fviz.plot(automl, \"optimization_history\")\n", + "fig.show()\n", + "\n", + "```\n", + "\n", + "Here is the resulting plot:\n", + "\n", + "[![Graph of optimization history plot.](media/model-training/optimization-history.png)](media/model-training/optimization-history.png#lightbox)\n", + "\n", + "### Feature importance\n", + "\n", + "A feature importance plot is a powerful visualization tool that allows you to understand the significance of different input features in determining the predictions of a model.\n", + "\n", + "```\n", + "fig = fviz.plot_feature_importance(automl)\n", + "# or\n", + "fig = fviz.plot(automl, \"feature_importance\")\n", + "fig.show()\n", + "\n", + "```\n", + "\n", + "Here is the resulting plot:\n", + "\n", + "[![Graph of feature importance plot.](media/model-training/feature-importance.png)](media/model-training/feature-importance.png#lightbox)\n", + "\n", + "### Parallel coordinate plot\n", + "\n", + "A parallel coordinate plot is a visualization tool that represents multi-dimensional data by drawing multiple vertical lines (axes) corresponding to variables or hyperparameters, with data points plotted as connected lines across these axes. In the context of an AutoML or tuning experiment, it's instrumental in visualizing and analyzing the performance of different hyperparameter combinations. By tracing the paths of high-performing configurations, one can discern patterns or trends in hyperparameter choices and their interactions. This plot aids in understanding which combinations lead to optimal performance, pinpointing potential areas for further exploration, and identifying any trade-offs between different hyperparameters.\n", + "\n", + "This utility takes the following other arguments:\n", + "\n", + "* `learner`: Specify the learner you intend to study in the experiment. This parameter is only applicable for AutoML experiment results. By leaving this blank, the system chooses the best learner in the whole experiment.\n", + "* `params`: A list to specify which hyperparameter to display. By leaving this blank, the system displays all the available hyperparameters.\n", + "\n", + "```\n", + "fig = fviz.plot_parallel_coordinate(automl, learner=\"lgbm\", params=[\"n_estimators\", \"num_leaves\", \"learning_rate\"])\n", + "# or\n", + "fig = fviz.plot(automl, \"parallel_coordinate\", learner=\"lgbm\", params=[\"n_estimators\", \"num_leaves\", \"learning_rate\"])\n", + "fig.show()\n", + "\n", + "```\n", + "\n", + "Here is the resulting plot:\n", + "\n", + "[![Graph of parallel coordinate plot.](media/model-training/parallel-coordinate-plot.png)](media/model-training/parallel-coordinate-plot.png#lightbox)\n", + "\n", + "### Contour plot\n", + "\n", + "A contour plot visualizes three-dimensional data in two dimensions, where the x and y axes represent two hyperparameters, and the contour lines or filled contours depict levels of a performance metric (for example, accuracy or loss). In the context of an AutoML or tuning experiment, a contour plot is beneficial for understanding the relationship between two hyperparameters and their combined effect on model performance.\n", + "\n", + "By examining the density and positioning of the contour lines, one can identify regions of hyperparameter space where performance is optimized, ascertain potential trade-offs between hyperparameters, and gain insights into their interactions. This visualization helps refine the search space and tuning process.\n", + "\n", + "This utility also takes the following arguments:\n", + "\n", + "* `learner`: Specify the learner you intend to study in the experiment. This parameter is only applicable for AutoML experiment results. By leaving this blank, the system chooses the best learner in the whole experiment.\n", + "* `params`: A list to specify which hyperparameter to display. By leaving this blank, the system displays all the available hyperparameters.\n", + "\n", + "```\n", + "fig = fviz.plot_contour(automl, learner=\"lgbm\", params=[\"n_estimators\", \"num_leaves\", \"learning_rate\"])\n", + "# or\n", + "fig = fviz.plot(automl, \"contour\", learner=\"lgbm\", params=[\"n_estimators\", \"num_leaves\", \"learning_rate\"])\n", + "fig.show()\n", + "\n", + "```\n", + "\n", + "Here is the resulting plot:\n", + "\n", + "[![Graph of contour plot.](media/model-training/contour-plot.png)](media/model-training/contour-plot.png#lightbox)\n", + "\n", + "### Empirical distribution function\n", + "\n", + "An empirical distribution function (EDF) plot, often visualized as a step function, represents the cumulative probability of data points being less than or equal to a particular value. Within an AutoML or tuning experiment, an EDF plot can be employed to visualize the distribution of model performances across different hyperparameter configurations.\n", + "\n", + "By observing the steepness or flatness of the curve at various points, one can understand the concentration of good or poor model performances, respectively. This visualization offers insights into the overall efficacy of the tuning process, highlighting whether most of the attempted configurations are yielding satisfactory results or if only a few configurations stand out.\n", + "\n", + "Note\n", + "\n", + "For AutoML experiments, multiple models will be applied during training. The trials of each learner are represented as an optimization series.\n", + "For hyperparameter tuning experiments, there will be only a single learner that is evaluated. However, you can provide additional tuning experiments to see the trends across each learner.\n", + "\n", + "```\n", + "fig = fviz.plot_edf(automl)\n", + "# or\n", + "fig = fviz.plot(automl, \"edf\")\n", + "fig.show()\n", + "\n", + "```\n", + "\n", + "Here is the resulting plot:\n", + "\n", + "[![Graph of the empirical distribution function plot.](media/model-training/empirical-distribution-function-plot.png)](media/model-training/empirical-distribution-function-plot.png#lightbox)\n", + "\n", + "### Timeline plot\n", + "\n", + "A timeline plot, often represented as a Gantt chart or a sequence of bars, visualizes the start, duration, and completion of tasks over time. In the context of an AutoML or tuning experiment, a timeline plot can showcase the progression of various model evaluations and their respective durations, plotted against time. By observing this plot, users can grasp the efficiency of the search process, identify any potential bottlenecks or idle periods, and understand the temporal dynamics of different hyperparameter evaluations.\n", + "\n", + "```\n", + "fig = fviz.plot_timeline(automl)\n", + "# or\n", + "fig = fviz.plot(automl, \"timeline\")\n", + "fig.show()\n", + "\n", + "```\n", + "\n", + "Here is the resulting plot:\n", + "\n", + "[![Graph of timeline plot.](media/model-training/timeline-plot.png)](media/model-training/timeline-plot.png#lightbox)\n", + "\n", + "### Slice plot\n", + "\n", + "Plot the parameter relationship as slice plot in a study.\n", + "\n", + "This utility also takes the following arguments:\n", + "\n", + "* `learner`: Specify the learner you intend to study in the experiment. This parameter is only applicable for AutoML experiment results. By leaving this blank, the system chooses the best learner in the whole experiment.\n", + "* `params`: A list to specify which hyperparameter to display. By leaving this blank, the system displays all the available hyperparameters.\n", + "\n", + "```\n", + "fig = fviz.plot_slice(automl, learner=\"sgd\")\n", + "# or\n", + "fig = fviz.plot(automl, \"slice\", learner=\"sgd\")\n", + "fig.show()\n", + "\n", + "```\n", + "\n", + "Here is the resulting plot:\n", + "\n", + "[![Graph of slice plot.](media/model-training/slice-plot.png)](media/model-training/slice-plot.png#lightbox)\n", + "\n", + "### Hyperparameter importance\n", + "\n", + "A hyperparameter importance plot visually ranks hyperparameters based on their influence on model performance in an AutoML or tuning experiment. Displayed typically as a bar chart, it quantifies the impact of each hyperparameter on the target metric. By examining this plot, practitioners can discern which hyperparameters are pivotal in determining model outcomes and which ones have minimal effect.\n", + "\n", + "This utility also takes the following arguments:\n", + "\n", + "* `learner`: Specify the learner you intend to study in the experiment. This parameter is only applicable for AutoML experiment results. By leaving this blank, the system chooses the best learner in the whole experiment.\n", + "* `params`: A list to specify which hyperparameter to display. By leaving this blank, the system displays all the available hyperparameters.\n", + "\n", + "```\n", + "fig = fviz.plot_param_importance(automl, learner=\"sgd\")\n", + "# or\n", + "fig = fviz.plot(automl, \"param_importance\", learner=\"sgd\")\n", + "fig.show()\n", + "\n", + "```\n", + "\n", + "Here is the resulting plot:\n", + "\n", + "[![Graph of hyperparameter importance plot.](media/model-training/hyperparameter-importance-plot.png)](media/model-training/hyperparameter-importance-plot.png#lightbox)\n", + "\n", + "Related content\n", + "---------------\n", + "\n", + "* [Tune a SynapseML Spark LightGBM model](how-to-tune-lightgbm-flaml)\n", + "\n", + "---\n", + "\n", + "Feedback\n", + "--------\n", + "\n", + "Was this page helpful?\n", + "\n", + "Yes\n", + "\n", + "No\n", + "\n", + "[Provide product feedback](https://ideas.fabric.microsoft.com/?forum=f2a1a698-503e-ed11-bba2-000d3a8b12b6&category=91402968-e13f-ed11-bba3-000d3a8b12b6)\n", + "|\n", + "\n", + "[Ask the community](https://community.fabric.microsoft.com/synapse)\n", + "\n", + "Feedback\n", + "--------\n", + "\n", + "Coming soon: Throughout 2024 we will be phasing out GitHub Issues as the feedback mechanism for content and replacing it with a new feedback system. For more information see: . \n", + "\n", + "Submit and view feedback for\n", + "\n", + "[This product](https://ideas.fabric.microsoft.com/?forum=f2a1a698-503e-ed11-bba2-000d3a8b12b6&category=91402968-e13f-ed11-bba3-000d3a8b12b6)\n", + "This page\n", + "\n", + "[View all page feedback](https://github.com//issues)\n", + "\n", + "---\n", + "\n", + "Additional resources\n", + "--------------------\n", + "\n", + "[California Consumer Privacy Act (CCPA) Opt-Out Icon\n", + "\n", + "Your Privacy Choices](https://aka.ms/yourcaliforniaprivacychoices)\n", + "\n", + "Theme\n", + "\n", + "* Light\n", + "* Dark\n", + "* High contrast\n", + "\n", + "* \n", + "* [Previous Versions](/en-us/previous-versions/)\n", + "* [Blog](https://techcommunity.microsoft.com/t5/microsoft-learn-blog/bg-p/MicrosoftLearnBlog)\n", + "* [Contribute](/en-us/contribute/)\n", + "* [Privacy](https://go.microsoft.com/fwlink/?LinkId=521839)\n", + "* [Terms of Use](/en-us/legal/termsofuse)\n", + "* [Trademarks](https://www.microsoft.com/legal/intellectualproperty/Trademarks/)\n", + "* © Microsoft 2024\n", + "\n", + "Additional resources\n", + "--------------------\n", + "\n", + "### In this article\n", + "\n", + "[California Consumer Privacy Act (CCPA) Opt-Out Icon\n", + "\n", + "Your Privacy Choices](https://aka.ms/yourcaliforniaprivacychoices)\n", + "\n", + "Theme\n", + "\n", + "* Light\n", + "* Dark\n", + "* High contrast\n", + "\n", + "* \n", + "* [Previous Versions](/en-us/previous-versions/)\n", + "* [Blog](https://techcommunity.microsoft.com/t5/microsoft-learn-blog/bg-p/MicrosoftLearnBlog)\n", + "* [Contribute](/en-us/contribute/)\n", + "* [Privacy](https://go.microsoft.com/fwlink/?LinkId=521839)\n", + "* [Terms of Use](/en-us/legal/termsofuse)\n", + "* [Trademarks](https://www.microsoft.com/legal/intellectualproperty/Trademarks/)\n", + "* © Microsoft 2024\n", + "\n", + "\n", + "The source of the context is: ['https://learn.microsoft.com/en-us/fabric/data-science/tuning-automated-machine-learning-visualizations']\n", + "\n", + "If you can answer the question, in the end of your answer, add the source of the context in the format of `Sources: source1, source2, ...`.\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33massistant\u001b[0m (to ragproxyagent):\n", + "\n", + "Step 1: The user's intent is a code generation task to train a regression model with a time budget of 12 seconds and plot the timeline plot after training.\n", + "\n", + "Step 2: Here is the code to achieve this:\n", + "\n", + "```python\n", + "from sklearn.datasets import load_boston\n", + "from sklearn.model_selection import train_test_split\n", + "from flaml import AutoML\n", + "import flaml.visualization as fviz\n", + "\n", + "# Load the Boston housing data and split it into train and test sets\n", + "x, y = load_boston(return_X_y=True)\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=7654321)\n", + "\n", + "# Create an AutoML instance and set the parameters\n", + "automl = AutoML()\n", + "automl_settings = {\n", + " \"time_budget\": 12, # Time limit in seconds\n", + " \"task\": \"regression\", # Type of machine learning task\n", + " \"log_file_name\": \"aml_boston.log\", # Name of the log file\n", + " \"metric\": \"rmse\", # Evaluation metric\n", + " \"log_type\": \"all\", # Level of logging\n", + "}\n", + "\n", + "# Fit the AutoML instance on the training data\n", + "automl.fit(X_train=x_train, y_train=y_train, **automl_settings)\n", + "\n", + "# Plot the timeline plot\n", + "fig = fviz.plot_timeline(automl)\n", + "fig.show()\n", + "```\n", + "\n", + "Sources: [Visualize tuning and AutoML trials - Microsoft Fabric | Microsoft Learn](https://learn.microsoft.com/en-us/fabric/data-science/tuning-automated-machine-learning-visualizations)\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[31m\n", + ">>>>>>>> EXECUTING CODE BLOCK (inferred language is python)...\u001b[0m\n", + "\u001b[33mragproxyagent\u001b[0m (to assistant):\n", + "\n", + "exitcode: 1 (execution failed)\n", + "Code output: Traceback (most recent call last):\n", + " File \"/tmp/tmp41070gi5/tmp_code_4463932bbc95a1921034eb428e7ded0c.py\", line 1, in \n", + " from sklearn.datasets import load_boston\n", + " File \"/home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages/sklearn/datasets/__init__.py\", line 157, in __getattr__\n", + " raise ImportError(msg)\n", + "ImportError: \n", + "`load_boston` has been removed from scikit-learn since version 1.2.\n", + "\n", + "The Boston housing prices dataset has an ethical problem: as\n", + "investigated in [1], the authors of this dataset engineered a\n", + "non-invertible variable \"B\" assuming that racial self-segregation had a\n", + "positive impact on house prices [2]. Furthermore the goal of the\n", + "research that led to the creation of this dataset was to study the\n", + "impact of air quality but it did not give adequate demonstration of the\n", + "validity of this assumption.\n", + "\n", + "The scikit-learn maintainers therefore strongly discourage the use of\n", + "this dataset unless the purpose of the code is to study and educate\n", + "about ethical issues in data science and machine learning.\n", + "\n", + "In this special case, you can fetch the dataset from the original\n", + "source::\n", + "\n", + " import pandas as pd\n", + " import numpy as np\n", + "\n", + " data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n", + " raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n", + " data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n", + " target = raw_df.values[1::2, 2]\n", + "\n", + "Alternative datasets include the California housing dataset and the\n", + "Ames housing dataset. You can load the datasets as follows::\n", + "\n", + " from sklearn.datasets import fetch_california_housing\n", + " housing = fetch_california_housing()\n", + "\n", + "for the California housing dataset and::\n", + "\n", + " from sklearn.datasets import fetch_openml\n", + " housing = fetch_openml(name=\"house_prices\", as_frame=True)\n", + "\n", + "for the Ames housing dataset.\n", + "\n", + "[1] M Carlisle.\n", + "\"Racist data destruction?\"\n", + "\n", + "\n", + "[2] Harrison Jr, David, and Daniel L. Rubinfeld.\n", + "\"Hedonic housing prices and the demand for clean air.\"\n", + "Journal of environmental economics and management 5.1 (1978): 81-102.\n", + "\n", + "\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33massistant\u001b[0m (to ragproxyagent):\n", + "\n", + "The `load_boston` dataset has been removed from scikit-learn due to ethical concerns. Let's use the California housing dataset instead.\n", + "\n", + "Here is the updated code:\n", + "\n", + "```python\n", + "from sklearn.datasets import fetch_california_housing\n", + "from sklearn.model_selection import train_test_split\n", + "from flaml import AutoML\n", + "import flaml.visualization as fviz\n", + "\n", + "# Load the California housing data and split it into train and test sets\n", + "housing = fetch_california_housing()\n", + "x, y = housing.data, housing.target\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=7654321)\n", + "\n", + "# Create an AutoML instance and set the parameters\n", + "automl = AutoML()\n", + "automl_settings = {\n", + " \"time_budget\": 12, # Time limit in seconds\n", + " \"task\": \"regression\", # Type of machine learning task\n", + " \"log_file_name\": \"aml_california.log\", # Name of the log file\n", + " \"metric\": \"rmse\", # Evaluation metric\n", + " \"log_type\": \"all\", # Level of logging\n", + "}\n", + "\n", + "# Fit the AutoML instance on the training data\n", + "automl.fit(X_train=x_train, y_train=y_train, **automl_settings)\n", + "\n", + "# Plot the timeline plot\n", + "fig = fviz.plot_timeline(automl)\n", + "fig.show()\n", + "```\n", + "\n", + "Sources: [Visualize tuning and AutoML trials - Microsoft Fabric | Microsoft Learn](https://learn.microsoft.com/en-us/fabric/data-science/tuning-automated-machine-learning-visualizations)\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[31m\n", + ">>>>>>>> EXECUTING CODE BLOCK (inferred language is python)...\u001b[0m\n", + "\u001b[33mragproxyagent\u001b[0m (to assistant):\n", + "\n", + "exitcode: 0 (execution succeeded)\n", + "Code output: Please make sure you passed environment EnvConfigs to workers by calling `set_mlflow_env_config` in order to trigger mlflow on workers correctly.\n", + "Please make sure you passed environment EnvConfigs to workers by calling `set_mlflow_env_config` in order to trigger mlflow on workers correctly.\n", + "Please make sure you passed environment EnvConfigs to workers by calling `set_mlflow_env_config` in order to trigger mlflow on workers correctly.\n", + "Please make sure you passed environment EnvConfigs to workers by calling `set_mlflow_env_config` in order to trigger mlflow on workers correctly.\n", + "[flaml.automl.logger: 06-07 15:28:07] {1767} INFO - task = regression\n", + "[flaml.automl.logger: 06-07 15:28:07] {1778} INFO - Evaluation method: holdout\n", + "[flaml.automl.logger: 06-07 15:28:07] {1881} INFO - Minimizing error metric: rmse\n", + "[flaml.automl.logger: 06-07 15:28:09] {1999} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'sgd', 'catboost']\n", + "[flaml.automl.logger: 06-07 15:28:09] {2309} INFO - iteration 0, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:09] {2444} INFO - Estimated sufficient time budget=3982s. Estimated necessary time budget=34s.\n", + "[flaml.automl.logger: 06-07 15:28:09] {2493} INFO - at 4.9s,\testimator lgbm's best error=0.9511,\tbest estimator lgbm's best error=0.9511\n", + "[flaml.automl.logger: 06-07 15:28:09] {2309} INFO - iteration 1, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:09] {2493} INFO - at 4.9s,\testimator lgbm's best error=0.9511,\tbest estimator lgbm's best error=0.9511\n", + "[flaml.automl.logger: 06-07 15:28:09] {2309} INFO - iteration 2, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:09] {2493} INFO - at 4.9s,\testimator lgbm's best error=0.8172,\tbest estimator lgbm's best error=0.8172\n", + "[flaml.automl.logger: 06-07 15:28:09] {2309} INFO - iteration 3, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:09] {2493} INFO - at 4.9s,\testimator lgbm's best error=0.6288,\tbest estimator lgbm's best error=0.6288\n", + "[flaml.automl.logger: 06-07 15:28:09] {2309} INFO - iteration 4, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:09] {2493} INFO - at 5.0s,\testimator lgbm's best error=0.6288,\tbest estimator lgbm's best error=0.6288\n", + "[flaml.automl.logger: 06-07 15:28:09] {2309} INFO - iteration 5, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:09] {2493} INFO - at 5.0s,\testimator lgbm's best error=0.6104,\tbest estimator lgbm's best error=0.6104\n", + "[flaml.automl.logger: 06-07 15:28:09] {2309} INFO - iteration 6, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:09] {2493} INFO - at 5.0s,\testimator lgbm's best error=0.6104,\tbest estimator lgbm's best error=0.6104\n", + "[flaml.automl.logger: 06-07 15:28:09] {2309} INFO - iteration 7, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:09] {2493} INFO - at 5.0s,\testimator lgbm's best error=0.6104,\tbest estimator lgbm's best error=0.6104\n", + "[flaml.automl.logger: 06-07 15:28:09] {2309} INFO - iteration 8, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:09] {2493} INFO - at 5.0s,\testimator lgbm's best error=0.5627,\tbest estimator lgbm's best error=0.5627\n", + "[flaml.automl.logger: 06-07 15:28:09] {2309} INFO - iteration 9, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:09] {2493} INFO - at 5.0s,\testimator lgbm's best error=0.5627,\tbest estimator lgbm's best error=0.5627\n", + "[flaml.automl.logger: 06-07 15:28:09] {2309} INFO - iteration 10, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:09] {2493} INFO - at 5.1s,\testimator lgbm's best error=0.5001,\tbest estimator lgbm's best error=0.5001\n", + "[flaml.automl.logger: 06-07 15:28:09] {2309} INFO - iteration 11, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:10] {2493} INFO - at 5.3s,\testimator lgbm's best error=0.5001,\tbest estimator lgbm's best error=0.5001\n", + "[flaml.automl.logger: 06-07 15:28:10] {2309} INFO - iteration 12, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:10] {2493} INFO - at 5.3s,\testimator lgbm's best error=0.5001,\tbest estimator lgbm's best error=0.5001\n", + "[flaml.automl.logger: 06-07 15:28:10] {2309} INFO - iteration 13, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:10] {2493} INFO - at 5.4s,\testimator lgbm's best error=0.5001,\tbest estimator lgbm's best error=0.5001\n", + "[flaml.automl.logger: 06-07 15:28:10] {2309} INFO - iteration 14, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:10] {2493} INFO - at 5.6s,\testimator lgbm's best error=0.4888,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:10] {2309} INFO - iteration 15, current learner sgd\n", + "[flaml.automl.logger: 06-07 15:28:10] {2493} INFO - at 5.6s,\testimator sgd's best error=1.1240,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:10] {2309} INFO - iteration 16, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:10] {2493} INFO - at 6.0s,\testimator lgbm's best error=0.4888,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:10] {2309} INFO - iteration 17, current learner sgd\n", + "[flaml.automl.logger: 06-07 15:28:10] {2493} INFO - at 6.0s,\testimator sgd's best error=1.1240,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:10] {2309} INFO - iteration 18, current learner sgd\n", + "[flaml.automl.logger: 06-07 15:28:10] {2493} INFO - at 6.1s,\testimator sgd's best error=1.1240,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:10] {2309} INFO - iteration 19, current learner sgd\n", + "[flaml.automl.logger: 06-07 15:28:10] {2493} INFO - at 6.1s,\testimator sgd's best error=1.1067,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:10] {2309} INFO - iteration 20, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:10] {2493} INFO - at 6.2s,\testimator lgbm's best error=0.4888,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:10] {2309} INFO - iteration 21, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:11] {2493} INFO - at 6.5s,\testimator lgbm's best error=0.4888,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:11] {2309} INFO - iteration 22, current learner xgboost\n", + "[flaml.automl.logger: 06-07 15:28:11] {2493} INFO - at 6.6s,\testimator xgboost's best error=1.3843,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:11] {2309} INFO - iteration 23, current learner xgboost\n", + "[flaml.automl.logger: 06-07 15:28:11] {2493} INFO - at 6.7s,\testimator xgboost's best error=1.3843,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:11] {2309} INFO - iteration 24, current learner xgboost\n", + "[flaml.automl.logger: 06-07 15:28:11] {2493} INFO - at 6.7s,\testimator xgboost's best error=0.9469,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:11] {2309} INFO - iteration 25, current learner xgboost\n", + "[flaml.automl.logger: 06-07 15:28:11] {2493} INFO - at 6.7s,\testimator xgboost's best error=0.6871,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:11] {2309} INFO - iteration 26, current learner xgboost\n", + "[flaml.automl.logger: 06-07 15:28:11] {2493} INFO - at 6.7s,\testimator xgboost's best error=0.6871,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:11] {2309} INFO - iteration 27, current learner xgboost\n", + "[flaml.automl.logger: 06-07 15:28:11] {2493} INFO - at 6.7s,\testimator xgboost's best error=0.6871,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:11] {2309} INFO - iteration 28, current learner xgboost\n", + "[flaml.automl.logger: 06-07 15:28:11] {2493} INFO - at 6.7s,\testimator xgboost's best error=0.6203,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:11] {2309} INFO - iteration 29, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:11] {2493} INFO - at 6.8s,\testimator lgbm's best error=0.4888,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:11] {2309} INFO - iteration 30, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:11] {2493} INFO - at 6.9s,\testimator lgbm's best error=0.4888,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:11] {2309} INFO - iteration 31, current learner xgboost\n", + "[flaml.automl.logger: 06-07 15:28:11] {2493} INFO - at 6.9s,\testimator xgboost's best error=0.6053,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:11] {2309} INFO - iteration 32, current learner xgboost\n", + "[flaml.automl.logger: 06-07 15:28:11] {2493} INFO - at 6.9s,\testimator xgboost's best error=0.5953,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:11] {2309} INFO - iteration 33, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:12] {2493} INFO - at 7.4s,\testimator lgbm's best error=0.4888,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:12] {2309} INFO - iteration 34, current learner xgboost\n", + "[flaml.automl.logger: 06-07 15:28:12] {2493} INFO - at 7.4s,\testimator xgboost's best error=0.5550,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:12] {2309} INFO - iteration 35, current learner xgboost\n", + "[flaml.automl.logger: 06-07 15:28:12] {2493} INFO - at 7.4s,\testimator xgboost's best error=0.5550,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:12] {2309} INFO - iteration 36, current learner xgboost\n", + "[flaml.automl.logger: 06-07 15:28:12] {2493} INFO - at 7.4s,\testimator xgboost's best error=0.5550,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:12] {2309} INFO - iteration 37, current learner xgboost\n", + "[flaml.automl.logger: 06-07 15:28:12] {2493} INFO - at 7.5s,\testimator xgboost's best error=0.5285,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:12] {2309} INFO - iteration 38, current learner xgboost\n", + "[flaml.automl.logger: 06-07 15:28:12] {2493} INFO - at 7.5s,\testimator xgboost's best error=0.5285,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:12] {2309} INFO - iteration 39, current learner xgboost\n", + "[flaml.automl.logger: 06-07 15:28:12] {2493} INFO - at 7.6s,\testimator xgboost's best error=0.5285,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:12] {2309} INFO - iteration 40, current learner xgboost\n", + "[flaml.automl.logger: 06-07 15:28:12] {2493} INFO - at 7.6s,\testimator xgboost's best error=0.5285,\tbest estimator lgbm's best error=0.4888\n", + "[flaml.automl.logger: 06-07 15:28:12] {2309} INFO - iteration 41, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:12] {2493} INFO - at 7.7s,\testimator lgbm's best error=0.4824,\tbest estimator lgbm's best error=0.4824\n", + "[flaml.automl.logger: 06-07 15:28:12] {2309} INFO - iteration 42, current learner xgboost\n", + "[flaml.automl.logger: 06-07 15:28:12] {2493} INFO - at 7.8s,\testimator xgboost's best error=0.5285,\tbest estimator lgbm's best error=0.4824\n", + "[flaml.automl.logger: 06-07 15:28:12] {2309} INFO - iteration 43, current learner extra_tree\n", + "[flaml.automl.logger: 06-07 15:28:12] {2493} INFO - at 8.0s,\testimator extra_tree's best error=0.8723,\tbest estimator lgbm's best error=0.4824\n", + "[flaml.automl.logger: 06-07 15:28:12] {2309} INFO - iteration 44, current learner sgd\n", + "[flaml.automl.logger: 06-07 15:28:12] {2493} INFO - at 8.0s,\testimator sgd's best error=1.1055,\tbest estimator lgbm's best error=0.4824\n", + "[flaml.automl.logger: 06-07 15:28:12] {2309} INFO - iteration 45, current learner extra_tree\n", + "[flaml.automl.logger: 06-07 15:28:12] {2493} INFO - at 8.0s,\testimator extra_tree's best error=0.7612,\tbest estimator lgbm's best error=0.4824\n", + "[flaml.automl.logger: 06-07 15:28:12] {2309} INFO - iteration 46, current learner xgboost\n", + "[flaml.automl.logger: 06-07 15:28:12] {2493} INFO - at 8.1s,\testimator xgboost's best error=0.5285,\tbest estimator lgbm's best error=0.4824\n", + "[flaml.automl.logger: 06-07 15:28:12] {2309} INFO - iteration 47, current learner extra_tree\n", + "[flaml.automl.logger: 06-07 15:28:13] {2493} INFO - at 8.3s,\testimator extra_tree's best error=0.7612,\tbest estimator lgbm's best error=0.4824\n", + "[flaml.automl.logger: 06-07 15:28:13] {2309} INFO - iteration 48, current learner rf\n", + "[flaml.automl.logger: 06-07 15:28:13] {2493} INFO - at 8.4s,\testimator rf's best error=0.8142,\tbest estimator lgbm's best error=0.4824\n", + "[flaml.automl.logger: 06-07 15:28:13] {2309} INFO - iteration 49, current learner rf\n", + "[flaml.automl.logger: 06-07 15:28:13] {2493} INFO - at 8.5s,\testimator rf's best error=0.6937,\tbest estimator lgbm's best error=0.4824\n", + "[flaml.automl.logger: 06-07 15:28:13] {2309} INFO - iteration 50, current learner rf\n", + "[flaml.automl.logger: 06-07 15:28:13] {2493} INFO - at 8.6s,\testimator rf's best error=0.6937,\tbest estimator lgbm's best error=0.4824\n", + "[flaml.automl.logger: 06-07 15:28:13] {2309} INFO - iteration 51, current learner extra_tree\n", + "[flaml.automl.logger: 06-07 15:28:13] {2493} INFO - at 8.6s,\testimator extra_tree's best error=0.7209,\tbest estimator lgbm's best error=0.4824\n", + "[flaml.automl.logger: 06-07 15:28:13] {2309} INFO - iteration 52, current learner rf\n", + "[flaml.automl.logger: 06-07 15:28:13] {2493} INFO - at 8.8s,\testimator rf's best error=0.6425,\tbest estimator lgbm's best error=0.4824\n", + "[flaml.automl.logger: 06-07 15:28:13] {2309} INFO - iteration 53, current learner rf\n", + "[flaml.automl.logger: 06-07 15:28:13] {2493} INFO - at 9.0s,\testimator rf's best error=0.6055,\tbest estimator lgbm's best error=0.4824\n", + "[flaml.automl.logger: 06-07 15:28:13] {2309} INFO - iteration 54, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:14] {2493} INFO - at 9.2s,\testimator lgbm's best error=0.4824,\tbest estimator lgbm's best error=0.4824\n", + "[flaml.automl.logger: 06-07 15:28:14] {2309} INFO - iteration 55, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:14] {2493} INFO - at 9.4s,\testimator lgbm's best error=0.4824,\tbest estimator lgbm's best error=0.4824\n", + "[flaml.automl.logger: 06-07 15:28:14] {2309} INFO - iteration 56, current learner xgboost\n", + "[flaml.automl.logger: 06-07 15:28:14] {2493} INFO - at 9.5s,\testimator xgboost's best error=0.5187,\tbest estimator lgbm's best error=0.4824\n", + "[flaml.automl.logger: 06-07 15:28:14] {2309} INFO - iteration 57, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:14] {2493} INFO - at 9.8s,\testimator lgbm's best error=0.4824,\tbest estimator lgbm's best error=0.4824\n", + "[flaml.automl.logger: 06-07 15:28:14] {2309} INFO - iteration 58, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:15] {2493} INFO - at 10.2s,\testimator lgbm's best error=0.4794,\tbest estimator lgbm's best error=0.4794\n", + "[flaml.automl.logger: 06-07 15:28:15] {2309} INFO - iteration 59, current learner rf\n", + "[flaml.automl.logger: 06-07 15:28:15] {2493} INFO - at 10.5s,\testimator rf's best error=0.6055,\tbest estimator lgbm's best error=0.4794\n", + "[flaml.automl.logger: 06-07 15:28:15] {2309} INFO - iteration 60, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:15] {2493} INFO - at 10.7s,\testimator lgbm's best error=0.4794,\tbest estimator lgbm's best error=0.4794\n", + "[flaml.automl.logger: 06-07 15:28:15] {2309} INFO - iteration 61, current learner rf\n", + "[flaml.automl.logger: 06-07 15:28:15] {2493} INFO - at 11.0s,\testimator rf's best error=0.5968,\tbest estimator lgbm's best error=0.4794\n", + "[flaml.automl.logger: 06-07 15:28:15] {2309} INFO - iteration 62, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:16] {2493} INFO - at 12.1s,\testimator lgbm's best error=0.4794,\tbest estimator lgbm's best error=0.4794\n", + "[flaml.automl.logger: 06-07 15:28:17] {2736} INFO - retrain lgbm for 0.5s\n", + "[flaml.automl.logger: 06-07 15:28:17] {2739} INFO - retrained model: LGBMRegressor(colsample_bytree=0.591579264701285,\n", + " learning_rate=0.0715412842452619, max_bin=511,\n", + " min_child_samples=2, n_estimators=1, n_jobs=-1, num_leaves=168,\n", + " reg_alpha=0.01435520144866301, reg_lambda=0.006874802748054268,\n", + " verbose=-1)\n", + "[flaml.automl.logger: 06-07 15:28:17] {2740} INFO - Auto Feature Engineering pipeline: None\n", + "[flaml.automl.logger: 06-07 15:28:17] {2035} INFO - fit succeeded\n", + "[flaml.automl.logger: 06-07 15:28:17] {2036} INFO - Time taken to find the best model: 10.24332308769226\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33massistant\u001b[0m (to ragproxyagent):\n", + "\n", + "TERMINATE\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "assistant.reset()\n", + "problem = \"Train a regression model, set time budget to 12s, plot the time line plot after training.\"\n", + "\n", + "chat_result = ragproxyagent.initiate_chat(assistant, message=ragproxyagent.message_generator, problem=problem)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16", + "metadata": { + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [ + { + "data": { + "application/vnd.livy.statement-meta+json": { + "execution_finish_time": "2024-06-07T15:28:22.7924281Z", + "execution_start_time": "2024-06-07T15:28:22.4431692Z", + "livy_statement_state": "available", + "parent_msg_id": "8c89a821-45eb-47f0-8608-11ac711f02e9", + "queued_time": "2024-06-07T15:26:26.0620587Z", + "session_id": "1d5e9aec-2019-408c-a19a-5db9fb175ae2", + "session_start_time": null, + "spark_pool": null, + "state": "finished", + "statement_id": 19, + "statement_ids": [ + 19 + ] + }, + "text/plain": [ + "StatementMeta(, 1d5e9aec-2019-408c-a19a-5db9fb175ae2, 19, Finished, Available)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cost for the chat:\n", + "{'usage_including_cached_inference': {'total_cost': 0.04863, 'gpt-4o-2024-05-13': {'cost': 0.04863, 'prompt_tokens': 7737, 'completion_tokens': 663, 'total_tokens': 8400}}, 'usage_excluding_cached_inference': {'total_cost': 0.04863, 'gpt-4o-2024-05-13': {'cost': 0.04863, 'prompt_tokens': 7737, 'completion_tokens': 663, 'total_tokens': 8400}}}\n" + ] + } + ], + "source": [ + "print(f\"Cost for the chat:\\n{chat_result.cost}\")" + ] + }, + { + "cell_type": "markdown", + "id": "17", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "Below is the code generated by AutoGen RAG agent. It's not a copy of the code in the related document as we asked for different task and training time, but AutoGen RAG agent adapted it very well." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18", + "metadata": { + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [ + { + "data": { + "application/vnd.livy.statement-meta+json": { + "execution_finish_time": "2024-06-07T15:28:56.954585Z", + "execution_start_time": "2024-06-07T15:28:23.7618029Z", + "livy_statement_state": "available", + "parent_msg_id": "ced1bbe3-3ab3-421a-a8a9-6eb151a3a7d3", + "queued_time": "2024-06-07T15:26:26.2444398Z", + "session_id": "1d5e9aec-2019-408c-a19a-5db9fb175ae2", + "session_start_time": null, + "spark_pool": null, + "state": "finished", + "statement_id": 20, + "statement_ids": [ + 20 + ] + }, + "text/plain": [ + "StatementMeta(, 1d5e9aec-2019-408c-a19a-5db9fb175ae2, 20, Finished, Available)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 06-07 15:28:28] {1767} INFO - task = regression\n", + "[flaml.automl.logger: 06-07 15:28:28] {1778} INFO - Evaluation method: holdout\n", + "[flaml.automl.logger: 06-07 15:28:28] {1881} INFO - Minimizing error metric: rmse\n", + "[flaml.automl.logger: 06-07 15:28:28] {1999} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'sgd', 'catboost']\n", + "[flaml.automl.logger: 06-07 15:28:28] {2309} INFO - iteration 0, current learner lgbm\n", + "[flaml.automl.logger: 06-07 15:28:28] {2444} INFO - Estimated sufficient time budget=145s. Estimated necessary time budget=1s.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages/_distutils_hack/__init__.py:26: UserWarning: Setuptools is replacing distutils.\n", + " warnings.warn(\"Setuptools is replacing distutils.\")\n", + "2024/06/07 15:28:47 WARNING mlflow.utils.requirements_utils: The following packages were not found in the public PyPI package index as of 2024-02-29; if these packages are not present in the public PyPI index, you must install them manually before loading your model: {'synapseml-internal', 'synapseml-mlflow'}\n" + ] + }, + { + "data": { + "application/vnd.mlflow.run-widget+json": { + "data": { + "metrics": { + "best_validation_loss": 0.9510965242768078, + "iter_counter": 0, + "rmse": 0.9510965242768078, + "trial_time": 0.012721061706542969, + "validation_loss": 0.9510965242768078, + "wall_clock_time": 4.973712205886841 + }, + "params": { + "colsample_bytree": "1.0", + "learner": "lgbm", + "learning_rate": "0.09999999999999995", + "log_max_bin": "8", + "min_child_samples": "20", + "n_estimators": "4", + "num_leaves": "4", + "reg_alpha": "0.0009765625", + "reg_lambda": "1.0", + "sample_size": "14860" + }, + "tags": { + "flaml.best_run": "False", + "flaml.estimator_class": "LGBMEstimator", + "flaml.estimator_name": "lgbm", + "flaml.iteration_number": "0", + "flaml.learner": "lgbm", + "flaml.log_type": "r_autolog", + "flaml.meric": "rmse", + "flaml.run_source": "flaml-automl", + "flaml.sample_size": "14860", + "flaml.version": "2.1.2.post1", + "mlflow.rootRunId": "da4aff39-ef24-4953-ab30-f9adc0c843bd", + "mlflow.runName": "careful_stomach_bzw71tb4", + "mlflow.user": "0e0e6551-b66b-41f3-bc82-bd86e0d203dc", + "synapseml.experiment.artifactId": "2ba08dad-7edc-4af2-b41b-5802fb6180c2", + "synapseml.experimentName": "autogen", + "synapseml.livy.id": "1d5e9aec-2019-408c-a19a-5db9fb175ae2", + "synapseml.notebook.artifactId": "72c91c1d-9cbf-4ca5-8180-2e318bb7d1d5", + "synapseml.user.id": "8abb9091-0a62-4ecd-bf6a-e49dbbf94431", + "synapseml.user.name": "Li Jiang" + } + }, + "info": { + "artifact_uri": "sds://onelakedxt.pbidedicated.windows.net/a9c17701-dbed-452d-91ee-ffeef4d6674f/2ba08dad-7edc-4af2-b41b-5802fb6180c2/da4aff39-ef24-4953-ab30-f9adc0c843bd/artifacts", + "end_time": 1717774129, + "experiment_id": "9d1ec9c8-d313-40a4-9ed8-b9bf496195ae", + "lifecycle_stage": "active", + "run_id": "da4aff39-ef24-4953-ab30-f9adc0c843bd", + "run_name": "", + "run_uuid": "da4aff39-ef24-4953-ab30-f9adc0c843bd", + "start_time": 1717774109, + "status": "FINISHED", + "user_id": "9ec1a2ed-32f8-4061-910f-25871321251b" + }, + "inputs": { + "dataset_inputs": [] + } + } + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[flaml.automl.logger: 06-07 15:28:53] {2493} INFO - at 5.0s,\testimator lgbm's best error=0.9511,\tbest estimator lgbm's best error=0.9511\n", + "[flaml.automl.logger: 06-07 15:28:54] {2736} INFO - retrain lgbm for 0.0s\n", + "[flaml.automl.logger: 06-07 15:28:54] {2739} INFO - retrained model: LGBMRegressor(learning_rate=0.09999999999999995, max_bin=255, n_estimators=1,\n", + " n_jobs=-1, num_leaves=4, reg_alpha=0.0009765625, reg_lambda=1.0,\n", + " verbose=-1)\n", + "[flaml.automl.logger: 06-07 15:28:54] {2740} INFO - Auto Feature Engineering pipeline: None\n", + "[flaml.automl.logger: 06-07 15:28:54] {2742} INFO - Best MLflow run name: \n", + "[flaml.automl.logger: 06-07 15:28:54] {2743} INFO - Best MLflow run id: da4aff39-ef24-4953-ab30-f9adc0c843bd\n", + "[flaml.automl.logger: 06-07 15:28:54] {2035} INFO - fit succeeded\n", + "[flaml.automl.logger: 06-07 15:28:54] {2036} INFO - Time taken to find the best model: 4.973712205886841\n" + ] + }, + { + "data": { + "text/html": [ + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "base": [ + 4.960991144180298 + ], + "name": "lgbm", + "orientation": "h", + "type": "bar", + "x": [ + 0.012721061706542969 + ], + "y": [ + 0 + ] + } + ], + "layout": { + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Timeline Plot" + }, + "xaxis": { + "title": { + "text": "Time (s)" + } + }, + "yaxis": { + "title": { + "text": "Trial" + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import flaml.visualization as fviz\n", + "from flaml import AutoML\n", + "from sklearn.datasets import fetch_california_housing\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# Load the California housing data and split it into train and test sets\n", + "housing = fetch_california_housing()\n", + "x, y = housing.data, housing.target\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=7654321)\n", + "\n", + "# Create an AutoML instance and set the parameters\n", + "automl = AutoML()\n", + "automl_settings = {\n", + " \"time_budget\": 12, # Time limit in seconds\n", + " \"task\": \"regression\", # Type of machine learning task\n", + " \"log_file_name\": \"aml_california.log\", # Name of the log file\n", + " \"metric\": \"rmse\", # Evaluation metric\n", + " \"log_type\": \"all\", # Level of logging\n", + "}\n", + "\n", + "# Fit the AutoML instance on the training data\n", + "automl.fit(X_train=x_train, y_train=y_train, **automl_settings)\n", + "\n", + "# Plot the timeline plot\n", + "fig = fviz.plot_timeline(automl)\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "19", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "### Example 3\n", + "How to use `MultimodalConversableAgent` to chat with images.\n", + "\n", + "Check out this [blog](https://microsoft.github.io/autogen/blog/2023/11/06/LMM-Agent) for more details." + ] + }, + { + "cell_type": "markdown", + "id": "20", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "We'll ask a question about below image:![image-alt-text](https://th.bing.com/th/id/R.422068ce8af4e15b0634fe2540adea7a?rik=y4OcXBE%2fqutDOw&pid=ImgRaw&r=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21", + "metadata": { + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [ + { + "data": { + "application/vnd.livy.statement-meta+json": { + "execution_finish_time": "2024-06-07T15:29:04.6027047Z", + "execution_start_time": "2024-06-07T15:28:57.9532564Z", + "livy_statement_state": "available", + "parent_msg_id": "71bfdcee-445d-4564-b423-61d9a6378939", + "queued_time": "2024-06-07T15:26:26.4400435Z", + "session_id": "1d5e9aec-2019-408c-a19a-5db9fb175ae2", + "session_start_time": null, + "spark_pool": null, + "state": "finished", + "statement_id": 21, + "statement_ids": [ + 21 + ] + }, + "text/plain": [ + "StatementMeta(, 1d5e9aec-2019-408c-a19a-5db9fb175ae2, 21, Finished, Available)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mUser_proxy\u001b[0m (to image-explainer):\n", + "\n", + "What's the breed of this dog?\n", + ".\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[31m\n", + ">>>>>>>> USING AUTO REPLY...\u001b[0m\n", + "\u001b[33mimage-explainer\u001b[0m (to User_proxy):\n", + "\n", + "The dog in the image appears to be a Poodle or a Poodle mix, such as a Labradoodle or a Goldendoodle, based on its curly coat and overall appearance.\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "from autogen.agentchat.contrib.multimodal_conversable_agent import MultimodalConversableAgent\n", + "\n", + "image_agent = MultimodalConversableAgent(\n", + " name=\"image-explainer\",\n", + " max_consecutive_auto_reply=10,\n", + " llm_config={\"config_list\": config_list, \"temperature\": 0.5, \"max_tokens\": 300},\n", + ")\n", + "\n", + "user_proxy = autogen.UserProxyAgent(\n", + " name=\"User_proxy\",\n", + " system_message=\"A human admin.\",\n", + " human_input_mode=\"NEVER\", # Try between ALWAYS or NEVER\n", + " max_consecutive_auto_reply=0,\n", + " code_execution_config={\n", + " \"use_docker\": False\n", + " }, # Please set use_docker=True if docker is available to run the generated code. Using docker is safer than running the generated code directly.\n", + ")\n", + "\n", + "# Ask the question with an image\n", + "chat_result = user_proxy.initiate_chat(\n", + " image_agent,\n", + " message=\"\"\"What's the breed of this dog?\n", + ".\"\"\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22", + "metadata": { + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [ + { + "data": { + "application/vnd.livy.statement-meta+json": { + "execution_finish_time": "2024-06-07T15:29:05.9669658Z", + "execution_start_time": "2024-06-07T15:29:05.613333Z", + "livy_statement_state": "available", + "parent_msg_id": "af81a0c7-9ee8-4da4-aa6e-dcd735209961", + "queued_time": "2024-06-07T15:26:26.7741139Z", + "session_id": "1d5e9aec-2019-408c-a19a-5db9fb175ae2", + "session_start_time": null, + "spark_pool": null, + "state": "finished", + "statement_id": 22, + "statement_ids": [ + 22 + ] + }, + "text/plain": [ + "StatementMeta(, 1d5e9aec-2019-408c-a19a-5db9fb175ae2, 22, Finished, Available)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cost for the chat:\n", + "{'usage_including_cached_inference': {'total_cost': 0.0053950000000000005, 'gpt-4o-2024-05-13': {'cost': 0.0053950000000000005, 'prompt_tokens': 965, 'completion_tokens': 38, 'total_tokens': 1003}}, 'usage_excluding_cached_inference': {'total_cost': 0.0053950000000000005, 'gpt-4o-2024-05-13': {'cost': 0.0053950000000000005, 'prompt_tokens': 965, 'completion_tokens': 38, 'total_tokens': 1003}}}\n" + ] + } + ], + "source": [ + "print(f\"Cost for the chat:\\n{chat_result.cost}\")" + ] } ], "metadata": { @@ -802,24 +3081,17 @@ "name": "synapse_pyspark" }, "kernelspec": { - "display_name": "Synapse PySpark", - "language": "Python", + "display_name": "synapse_pyspark", "name": "synapse_pyspark" }, "language_info": { "name": "python" }, - "notebook_environment": {}, "nteract": { "version": "nteract-front-end@1.0.0" }, - "save_output": true, "spark_compute": { - "compute_id": "/trident/default", - "session_options": { - "conf": {}, - "enableDebugMode": false - } + "compute_id": "/trident/default" } }, "nbformat": 4, diff --git a/notebook/agentchat_pgvector_RetrieveChat.ipynb b/notebook/agentchat_pgvector_RetrieveChat.ipynb index 9b037b7c468..1a8d70e2965 100644 --- a/notebook/agentchat_pgvector_RetrieveChat.ipynb +++ b/notebook/agentchat_pgvector_RetrieveChat.ipynb @@ -72,14 +72,14 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "models to use: ['gpt-35-turbo', 'gpt4-1106-preview', 'gpt-35-turbo-0613']\n" + "models to use: ['gpt4-1106-preview', 'gpt-4o', 'gpt-35-turbo', 'gpt-35-turbo-0613']\n" ] } ], @@ -89,6 +89,7 @@ "\n", "import chromadb\n", "import psycopg\n", + "from sentence_transformers import SentenceTransformer\n", "\n", "import autogen\n", "from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent\n", @@ -114,7 +115,10 @@ " \"api_key\": \"...\",\n", " },\n", "]\n", - "\n", + "config_list = autogen.config_list_from_json(\n", + " \"OAI_CONFIG_LIST\",\n", + " file_location=\".\",\n", + ")\n", "assert len(config_list) > 0\n", "print(\"models to use: \", [config_list[i][\"model\"] for i in range(len(config_list))])" ] @@ -137,7 +141,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -145,7 +149,7 @@ "output_type": "stream", "text": [ "Accepted file formats for `docs_path`:\n", - "['txt', 'json', 'csv', 'tsv', 'md', 'html', 'htm', 'rtf', 'rst', 'jsonl', 'log', 'xml', 'yaml', 'yml', 'pdf']\n" + "['yaml', 'ppt', 'rst', 'jsonl', 'xml', 'txt', 'yml', 'log', 'rtf', 'msg', 'xlsx', 'htm', 'pdf', 'org', 'pptx', 'md', 'docx', 'epub', 'tsv', 'csv', 'html', 'doc', 'odt', 'json']\n" ] } ], @@ -156,17 +160,15 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/workspace/anaconda3/envs/autogen/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n", - "/workspace/anaconda3/envs/autogen/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", - " warnings.warn(\n" + "/home/lijiang1/anaconda3/envs/autogen/lib/python3.10/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n", + " torch.utils._pytree._register_pytree_node(\n" ] } ], @@ -185,6 +187,9 @@ "# Optionally create psycopg conn object\n", "# conn = psycopg.connect(conninfo=\"postgresql://postgres:postgres@localhost:5432/postgres\", autocommit=True)\n", "\n", + "# Optionally create embedding function object\n", + "sentence_transformer_ef = SentenceTransformer(\"all-distilroberta-v1\").encode\n", + "\n", "# 2. create the RetrieveUserProxyAgent instance named \"ragproxyagent\"\n", "# By default, the human_input_mode is \"ALWAYS\", which means the agent will ask for human input at every step. We set it to \"NEVER\" here.\n", "# `docs_path` is the path to the docs directory. It can also be the path to a single file, or the url to a single file. By default,\n", @@ -218,11 +223,11 @@ " # \"dbname\": \"postgres\", # Optional vector database name\n", " # \"username\": \"postgres\", # Optional vector database username\n", " # \"password\": \"postgres\", # Optional vector database password\n", - " \"model_name\": \"all-MiniLM-L6-v2\", # Sentence embedding model from https://huggingface.co/models?library=sentence-transformers or https://www.sbert.net/docs/pretrained_models.html\n", " # \"conn\": conn, # Optional - conn object to connect to database\n", " },\n", " \"get_or_create\": True, # set to False if you don't want to reuse an existing collection\n", - " \"overwrite\": False, # set to True if you want to overwrite an existing collection\n", + " \"overwrite\": True, # set to True if you want to overwrite an existing collection\n", + " \"embedding_function\": sentence_transformer_ef, # If left out SentenceTransformer(\"all-MiniLM-L6-v2\").encode will be used\n", " },\n", " code_execution_config=False, # set to False if you don't want to execute the code\n", ")" @@ -244,40 +249,43 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Trying to create collection.\n" + ] + }, { "name": "stderr", "output_type": "stream", "text": [ - "2024-05-23 08:48:18,875 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - \u001b[32mUse the existing collection `flaml_collection`.\u001b[0m\n" + "2024-06-11 19:57:44,122 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - Found 2 chunks.\u001b[0m\n", + "Model gpt4-1106-preview not found. Using cl100k_base encoding.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Trying to create collection.\n" + "VectorDB returns doc_ids: [['bdfbc921', '7968cf3c']]\n", + "\u001b[32mAdding content of doc bdfbc921 to context.\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "2024-05-23 08:48:19,975 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - Found 2 chunks.\u001b[0m\n", - "2024-05-23 08:48:19,977 - autogen.agentchat.contrib.vectordb.pgvectordb - INFO - Error executing select on non-existent table: flaml_collection. Creating it instead. Error: relation \"flaml_collection\" does not exist\n", - "LINE 1: SELECT id, metadatas, documents, embedding FROM flaml_collec...\n", - " ^\u001b[0m\n", - "2024-05-23 08:48:19,996 - autogen.agentchat.contrib.vectordb.pgvectordb - INFO - Created table flaml_collection\u001b[0m\n" + "Model gpt4-1106-preview not found. Using cl100k_base encoding.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "VectorDB returns doc_ids: [['bdfbc921', '7968cf3c']]\n", - "\u001b[32mAdding content of doc bdfbc921 to context.\u001b[0m\n", "\u001b[32mAdding content of doc 7968cf3c to context.\u001b[0m\n", "\u001b[33mragproxyagent\u001b[0m (to assistant):\n", "\n", @@ -540,7 +548,6 @@ "\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[32mAdding content of doc 7968cf3c to context.\u001b[0m\n", "\u001b[33mragproxyagent\u001b[0m (to assistant):\n", "\n", "You're a retrieve augmented coding assistant. You answer user's questions based on your own knowledge and the\n", @@ -804,7 +811,50 @@ "--------------------------------------------------------------------------------\n", "\u001b[33massistant\u001b[0m (to ragproxyagent):\n", "\n", - "To use FLAML to perform a classification task and use Spark to do parallel training, you need to use the Spark ML estimators for AutoML. First, you need to prepare your data in the required format as described in the previous section. FLAML provides a convenient function \"to_pandas_on_spark\" to convert your data into a pandas-on-spark dataframe/series, which Spark estimators require. After that, use the pandas-on-spark data like non-spark data and pass them using X_train, y_train or dataframe, label. Finally, configure FLAML to use Spark as the parallel backend during parallel tuning by setting the use_spark to true. An example code snippet is provided in the context above.\n", + "Based on the provided context which details the integration of Spark with FLAML for distributed training, and the requirement to perform a classification task with parallel training in Spark, here's a code snippet that configures FLAML to train a classification model for 30 seconds and cancels the jobs if the time limit is reached.\n", + "\n", + "```python\n", + "from flaml import AutoML\n", + "from flaml.automl.spark.utils import to_pandas_on_spark\n", + "import pandas as pd\n", + "\n", + "# Your pandas DataFrame 'data' goes here\n", + "# Assuming 'data' is already a pandas DataFrame with appropriate data for classification\n", + "# and 'label_column' is the name of the column that we want to predict.\n", + "\n", + "# First, convert your pandas DataFrame to a pandas-on-spark DataFrame\n", + "psdf = to_pandas_on_spark(data)\n", + "\n", + "# Now, we prepare the settings for the AutoML training with Spark\n", + "automl_settings = {\n", + " \"time_budget\": 30, # Train for 30 seconds\n", + " \"metric\": \"accuracy\", # Assuming you want to use accuracy as the metric\n", + " \"task\": \"classification\",\n", + " \"n_concurrent_trials\": 2, # Adjust the number of concurrent trials depending on your cluster setup\n", + " \"use_spark\": True,\n", + " \"force_cancel\": True, # Force cancel jobs if time limit is reached\n", + "}\n", + "\n", + "# Create an AutoML instance\n", + "automl = AutoML()\n", + "\n", + "# Run the AutoML search\n", + "# You need to replace 'psdf' with your actual pandas-on-spark DataFrame variable\n", + "# and 'label_column' with the name of your label column\n", + "automl.fit(dataframe=psdf, label=label_column, **automl_settings)\n", + "```\n", + "\n", + "This code snippet assumes that the `data` variable contains the pandas DataFrame you want to classify and that `label_column` is the name of the target variable for the classification task. Make sure to replace 'data' and 'label_column' with your actual data and label column name before running this code.\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33mragproxyagent\u001b[0m (to assistant):\n", + "\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33massistant\u001b[0m (to ragproxyagent):\n", + "\n", + "UPDATE CONTEXT\n", "\n", "--------------------------------------------------------------------------------\n" ] @@ -840,15 +890,51 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/lijiang1/anaconda3/envs/autogen/lib/python3.10/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n", + " torch.utils._pytree._register_pytree_node(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Trying to create collection.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-06-11 19:58:21,076 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - Found 2 chunks.\u001b[0m\n", + "Model gpt4-1106-preview not found. Using cl100k_base encoding.\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ "VectorDB returns doc_ids: [['7968cf3c', 'bdfbc921']]\n", - "\u001b[32mAdding content of doc 7968cf3c to context.\u001b[0m\n", + "\u001b[32mAdding content of doc 7968cf3c to context.\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Model gpt4-1106-preview not found. Using cl100k_base encoding.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "\u001b[32mAdding content of doc bdfbc921 to context.\u001b[0m\n", "\u001b[33mragproxyagent\u001b[0m (to assistant):\n", "\n", @@ -1110,18 +1196,270 @@ "\n", "\n", "\n", - "--------------------------------------------------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[33massistant\u001b[0m (to ragproxyagent):\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33mragproxyagent\u001b[0m (to assistant):\n", + "\n", + "You're a retrieve augmented coding assistant. You answer user's questions based on your own knowledge and the\n", + "context provided by the user.\n", + "If you can't answer the question with or without the current context, you should reply exactly `UPDATE CONTEXT`.\n", + "For code generation, you must obey the following rules:\n", + "Rule 1. You MUST NOT install any packages because all the packages needed are already installed.\n", + "Rule 2. You must follow the formats below to write your code:\n", + "```language\n", + "# your code\n", + "```\n", + "\n", + "User's question is: Who is the author of FLAML?\n", + "\n", + "Context is: # Research\n", + "\n", + "For technical details, please check our research publications.\n", + "\n", + "- [FLAML: A Fast and Lightweight AutoML Library](https://www.microsoft.com/en-us/research/publication/flaml-a-fast-and-lightweight-automl-library/). Chi Wang, Qingyun Wu, Markus Weimer, Erkang Zhu. MLSys 2021.\n", + "\n", + "```bibtex\n", + "@inproceedings{wang2021flaml,\n", + " title={FLAML: A Fast and Lightweight AutoML Library},\n", + " author={Chi Wang and Qingyun Wu and Markus Weimer and Erkang Zhu},\n", + " year={2021},\n", + " booktitle={MLSys},\n", + "}\n", + "```\n", + "\n", + "- [Frugal Optimization for Cost-related Hyperparameters](https://arxiv.org/abs/2005.01571). Qingyun Wu, Chi Wang, Silu Huang. AAAI 2021.\n", + "\n", + "```bibtex\n", + "@inproceedings{wu2021cfo,\n", + " title={Frugal Optimization for Cost-related Hyperparameters},\n", + " author={Qingyun Wu and Chi Wang and Silu Huang},\n", + " year={2021},\n", + " booktitle={AAAI},\n", + "}\n", + "```\n", + "\n", + "- [Economical Hyperparameter Optimization With Blended Search Strategy](https://www.microsoft.com/en-us/research/publication/economical-hyperparameter-optimization-with-blended-search-strategy/). Chi Wang, Qingyun Wu, Silu Huang, Amin Saied. ICLR 2021.\n", + "\n", + "```bibtex\n", + "@inproceedings{wang2021blendsearch,\n", + " title={Economical Hyperparameter Optimization With Blended Search Strategy},\n", + " author={Chi Wang and Qingyun Wu and Silu Huang and Amin Saied},\n", + " year={2021},\n", + " booktitle={ICLR},\n", + "}\n", + "```\n", + "\n", + "- [An Empirical Study on Hyperparameter Optimization for Fine-Tuning Pre-trained Language Models](https://aclanthology.org/2021.acl-long.178.pdf). Susan Xueqing Liu, Chi Wang. ACL 2021.\n", + "\n", + "```bibtex\n", + "@inproceedings{liuwang2021hpolm,\n", + " title={An Empirical Study on Hyperparameter Optimization for Fine-Tuning Pre-trained Language Models},\n", + " author={Susan Xueqing Liu and Chi Wang},\n", + " year={2021},\n", + " booktitle={ACL},\n", + "}\n", + "```\n", + "\n", + "- [ChaCha for Online AutoML](https://www.microsoft.com/en-us/research/publication/chacha-for-online-automl/). Qingyun Wu, Chi Wang, John Langford, Paul Mineiro and Marco Rossi. ICML 2021.\n", + "\n", + "```bibtex\n", + "@inproceedings{wu2021chacha,\n", + " title={ChaCha for Online AutoML},\n", + " author={Qingyun Wu and Chi Wang and John Langford and Paul Mineiro and Marco Rossi},\n", + " year={2021},\n", + " booktitle={ICML},\n", + "}\n", + "```\n", + "\n", + "- [Fair AutoML](https://arxiv.org/abs/2111.06495). Qingyun Wu, Chi Wang. ArXiv preprint arXiv:2111.06495 (2021).\n", + "\n", + "```bibtex\n", + "@inproceedings{wuwang2021fairautoml,\n", + " title={Fair AutoML},\n", + " author={Qingyun Wu and Chi Wang},\n", + " year={2021},\n", + " booktitle={ArXiv preprint arXiv:2111.06495},\n", + "}\n", + "```\n", + "\n", + "- [Mining Robust Default Configurations for Resource-constrained AutoML](https://arxiv.org/abs/2202.09927). Moe Kayali, Chi Wang. ArXiv preprint arXiv:2202.09927 (2022).\n", + "\n", + "```bibtex\n", + "@inproceedings{kayaliwang2022default,\n", + " title={Mining Robust Default Configurations for Resource-constrained AutoML},\n", + " author={Moe Kayali and Chi Wang},\n", + " year={2022},\n", + " booktitle={ArXiv preprint arXiv:2202.09927},\n", + "}\n", + "```\n", + "\n", + "- [Targeted Hyperparameter Optimization with Lexicographic Preferences Over Multiple Objectives](https://openreview.net/forum?id=0Ij9_q567Ma). Shaokun Zhang, Feiran Jia, Chi Wang, Qingyun Wu. ICLR 2023 (notable-top-5%).\n", + "\n", + "```bibtex\n", + "@inproceedings{zhang2023targeted,\n", + " title={Targeted Hyperparameter Optimization with Lexicographic Preferences Over Multiple Objectives},\n", + " author={Shaokun Zhang and Feiran Jia and Chi Wang and Qingyun Wu},\n", + " booktitle={International Conference on Learning Representations},\n", + " year={2023},\n", + " url={https://openreview.net/forum?id=0Ij9_q567Ma},\n", + "}\n", + "```\n", + "\n", + "- [Cost-Effective Hyperparameter Optimization for Large Language Model Generation Inference](https://arxiv.org/abs/2303.04673). Chi Wang, Susan Xueqing Liu, Ahmed H. Awadallah. ArXiv preprint arXiv:2303.04673 (2023).\n", + "\n", + "```bibtex\n", + "@inproceedings{wang2023EcoOptiGen,\n", + " title={Cost-Effective Hyperparameter Optimization for Large Language Model Generation Inference},\n", + " author={Chi Wang and Susan Xueqing Liu and Ahmed H. Awadallah},\n", + " year={2023},\n", + " booktitle={ArXiv preprint arXiv:2303.04673},\n", + "}\n", + "```\n", + "\n", + "- [An Empirical Study on Challenging Math Problem Solving with GPT-4](https://arxiv.org/abs/2306.01337). Yiran Wu, Feiran Jia, Shaokun Zhang, Hangyu Li, Erkang Zhu, Yue Wang, Yin Tat Lee, Richard Peng, Qingyun Wu, Chi Wang. ArXiv preprint arXiv:2306.01337 (2023).\n", + "\n", + "```bibtex\n", + "@inproceedings{wu2023empirical,\n", + " title={An Empirical Study on Challenging Math Problem Solving with GPT-4},\n", + " author={Yiran Wu and Feiran Jia and Shaokun Zhang and Hangyu Li and Erkang Zhu and Yue Wang and Yin Tat Lee and Richard Peng and Qingyun Wu and Chi Wang},\n", + " year={2023},\n", + " booktitle={ArXiv preprint arXiv:2306.01337},\n", + "}\n", + "```\n", + "# Integrate - Spark\n", + "\n", + "FLAML has integrated Spark for distributed training. There are two main aspects of integration with Spark:\n", + "\n", + "- Use Spark ML estimators for AutoML.\n", + "- Use Spark to run training in parallel spark jobs.\n", + "\n", + "## Spark ML Estimators\n", + "\n", + "FLAML integrates estimators based on Spark ML models. These models are trained in parallel using Spark, so we called them Spark estimators. To use these models, you first need to organize your data in the required format.\n", + "\n", + "### Data\n", + "\n", + "For Spark estimators, AutoML only consumes Spark data. FLAML provides a convenient function `to_pandas_on_spark` in the `flaml.automl.spark.utils` module to convert your data into a pandas-on-spark (`pyspark.pandas`) dataframe/series, which Spark estimators require.\n", + "\n", + "This utility function takes data in the form of a `pandas.Dataframe` or `pyspark.sql.Dataframe` and converts it into a pandas-on-spark dataframe. It also takes `pandas.Series` or `pyspark.sql.Dataframe` and converts it into a [pandas-on-spark](https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/index.html) series. If you pass in a `pyspark.pandas.Dataframe`, it will not make any changes.\n", + "\n", + "This function also accepts optional arguments `index_col` and `default_index_type`.\n", + "\n", + "- `index_col` is the column name to use as the index, default is None.\n", + "- `default_index_type` is the default index type, default is \"distributed-sequence\". More info about default index type could be found on Spark official [documentation](https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/options.html#default-index-type)\n", + "\n", + "Here is an example code snippet for Spark Data:\n", + "\n", + "```python\n", + "import pandas as pd\n", + "from flaml.automl.spark.utils import to_pandas_on_spark\n", + "\n", + "# Creating a dictionary\n", + "data = {\n", + " \"Square_Feet\": [800, 1200, 1800, 1500, 850],\n", + " \"Age_Years\": [20, 15, 10, 7, 25],\n", + " \"Price\": [100000, 200000, 300000, 240000, 120000],\n", + "}\n", + "\n", + "# Creating a pandas DataFrame\n", + "dataframe = pd.DataFrame(data)\n", + "label = \"Price\"\n", + "\n", + "# Convert to pandas-on-spark dataframe\n", + "psdf = to_pandas_on_spark(dataframe)\n", + "```\n", + "\n", + "To use Spark ML models you need to format your data appropriately. Specifically, use [`VectorAssembler`](https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.ml.feature.VectorAssembler.html) to merge all feature columns into a single vector column.\n", + "\n", + "Here is an example of how to use it:\n", + "\n", + "```python\n", + "from pyspark.ml.feature import VectorAssembler\n", + "\n", + "columns = psdf.columns\n", + "feature_cols = [col for col in columns if col != label]\n", + "featurizer = VectorAssembler(inputCols=feature_cols, outputCol=\"features\")\n", + "psdf = featurizer.transform(psdf.to_spark(index_col=\"index\"))[\"index\", \"features\"]\n", + "```\n", + "\n", + "Later in conducting the experiment, use your pandas-on-spark data like non-spark data and pass them using `X_train, y_train` or `dataframe, label`.\n", + "\n", + "### Estimators\n", + "\n", + "#### Model List\n", + "\n", + "- `lgbm_spark`: The class for fine-tuning Spark version LightGBM models, using [SynapseML](https://microsoft.github.io/SynapseML/docs/features/lightgbm/about/) API.\n", + "\n", + "#### Usage\n", + "\n", + "First, prepare your data in the required format as described in the previous section.\n", + "\n", + "By including the models you intend to try in the `estimators_list` argument to `flaml.automl`, FLAML will start trying configurations for these models. If your input is Spark data, FLAML will also use estimators with the `_spark` postfix by default, even if you haven't specified them.\n", + "\n", + "Here is an example code snippet using SparkML models in AutoML:\n", + "\n", + "```python\n", + "import flaml\n", + "\n", + "# prepare your data in pandas-on-spark format as we previously mentioned\n", + "\n", + "automl = flaml.AutoML()\n", + "settings = {\n", + " \"time_budget\": 30,\n", + " \"metric\": \"r2\",\n", + " \"estimator_list\": [\"lgbm_spark\"], # this setting is optional\n", + " \"task\": \"regression\",\n", + "}\n", + "\n", + "automl.fit(\n", + " dataframe=psdf,\n", + " label=label,\n", + " **settings,\n", + ")\n", + "```\n", + "\n", + "[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/automl_bankrupt_synapseml.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/automl_bankrupt_synapseml.ipynb)\n", + "\n", + "## Parallel Spark Jobs\n", + "\n", + "You can activate Spark as the parallel backend during parallel tuning in both [AutoML](/docs/Use-Cases/Task-Oriented-AutoML#parallel-tuning) and [Hyperparameter Tuning](/docs/Use-Cases/Tune-User-Defined-Function#parallel-tuning), by setting the `use_spark` to `true`. FLAML will dispatch your job to the distributed Spark backend using [`joblib-spark`](https://github.com/joblib/joblib-spark).\n", + "\n", + "Please note that you should not set `use_spark` to `true` when applying AutoML and Tuning for Spark Data. This is because only SparkML models will be used for Spark Data in AutoML and Tuning. As SparkML models run in parallel, there is no need to distribute them with `use_spark` again.\n", + "\n", + "All the Spark-related arguments are stated below. These arguments are available in both Hyperparameter Tuning and AutoML:\n", + "\n", + "- `use_spark`: boolean, default=False | Whether to use spark to run the training in parallel spark jobs. This can be used to accelerate training on large models and large datasets, but will incur more overhead in time and thus slow down training in some cases. GPU training is not supported yet when use_spark is True. For Spark clusters, by default, we will launch one trial per executor. However, sometimes we want to launch more trials than the number of executors (e.g., local mode). In this case, we can set the environment variable `FLAML_MAX_CONCURRENT` to override the detected `num_executors`. The final number of concurrent trials will be the minimum of `n_concurrent_trials` and `num_executors`.\n", + "- `n_concurrent_trials`: int, default=1 | The number of concurrent trials. When n_concurrent_trials > 1, FLAML performes parallel tuning.\n", + "- `force_cancel`: boolean, default=False | Whether to forcely cancel Spark jobs if the search time exceeded the time budget. Spark jobs include parallel tuning jobs and Spark-based model training jobs.\n", + "\n", + "An example code snippet for using parallel Spark jobs:\n", + "\n", + "```python\n", + "import flaml\n", + "\n", + "automl_experiment = flaml.AutoML()\n", + "automl_settings = {\n", + " \"time_budget\": 30,\n", + " \"metric\": \"r2\",\n", + " \"task\": \"regression\",\n", + " \"n_concurrent_trials\": 2,\n", + " \"use_spark\": True,\n", + " \"force_cancel\": True, # Activating the force_cancel option can immediately halt Spark jobs once they exceed the allocated time_budget.\n", + "}\n", + "\n", + "automl.fit(\n", + " dataframe=dataframe,\n", + " label=label,\n", + " **automl_settings,\n", + ")\n", + "```\n", + "\n", + "[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/integrate_spark.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/integrate_spark.ipynb)\n", + "\n", "\n", - "The authors of FLAML are Chi Wang, Qingyun Wu, Markus Weimer, and Erkang Zhu.\n", "\n", "--------------------------------------------------------------------------------\n", + "\u001b[33massistant\u001b[0m (to ragproxyagent):\n", + "\n", "The authors of FLAML are Chi Wang, Qingyun Wu, Markus Weimer, and Erkang Zhu.\n", "\n", "--------------------------------------------------------------------------------\n" @@ -1132,16 +1470,43 @@ "# reset the assistant. Always reset the assistant before starting a new conversation.\n", "assistant.reset()\n", "\n", + "# Optionally create psycopg conn object\n", + "conn = psycopg.connect(conninfo=\"postgresql://postgres:postgres@localhost:5432/postgres\", autocommit=True)\n", + "\n", + "ragproxyagent = RetrieveUserProxyAgent(\n", + " name=\"ragproxyagent\",\n", + " human_input_mode=\"NEVER\",\n", + " max_consecutive_auto_reply=1,\n", + " retrieve_config={\n", + " \"task\": \"code\",\n", + " \"docs_path\": [\n", + " \"https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Examples/Integrate%20-%20Spark.md\",\n", + " \"https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Research.md\",\n", + " os.path.join(os.path.abspath(\"\"), \"..\", \"website\", \"docs\"),\n", + " ],\n", + " \"custom_text_types\": [\"non-existent-type\"],\n", + " \"chunk_token_size\": 2000,\n", + " \"model\": config_list[0][\"model\"],\n", + " \"vector_db\": \"pgvector\", # PGVector database\n", + " \"collection_name\": \"flaml_collection\",\n", + " \"db_config\": {\n", + " # \"connection_string\": \"postgresql://postgres:postgres@localhost:5432/postgres\", # Optional - connect to an external vector database\n", + " # \"host\": \"postgres\", # Optional vector database host\n", + " # \"port\": 5432, # Optional vector database port\n", + " # \"dbname\": \"postgres\", # Optional vector database name\n", + " # \"username\": \"postgres\", # Optional vector database username\n", + " # \"password\": \"postgres\", # Optional vector database password\n", + " \"conn\": conn, # Optional - conn object to connect to database\n", + " },\n", + " \"get_or_create\": True, # set to False if you don't want to reuse an existing collection\n", + " \"overwrite\": True, # set to True if you want to overwrite an existing collection\n", + " },\n", + " code_execution_config=False, # set to False if you don't want to execute the code\n", + ")\n", + "\n", "qa_problem = \"Who is the author of FLAML?\"\n", "chat_result = ragproxyagent.initiate_chat(assistant, message=ragproxyagent.message_generator, problem=qa_problem)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -1166,7 +1531,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.10.13" }, "skip_test": "Requires interactive usage" }, diff --git a/notebook/agentchat_planning.ipynb b/notebook/agentchat_planning.ipynb index 508792f01a5..14b393958dc 100644 --- a/notebook/agentchat_planning.ipynb +++ b/notebook/agentchat_planning.ipynb @@ -93,14 +93,14 @@ " 'api_key': '',\n", " 'base_url': '',\n", " 'api_type': 'azure',\n", - " 'api_version': '2024-02-15-preview',\n", + " 'api_version': '2024-02-01',\n", " }, # Azure OpenAI API endpoint for gpt-4\n", " {\n", " 'model': 'gpt-4-32k',\n", " 'api_key': '',\n", " 'base_url': '',\n", " 'api_type': 'azure',\n", - " 'api_version': '2024-02-15-preview',\n", + " 'api_version': '2024-02-01',\n", " }, # Azure OpenAI API endpoint for gpt-4-32k\n", "]\n", "```\n", diff --git a/notebook/agentchat_stream.ipynb b/notebook/agentchat_stream.ipynb index 8cb899d2b50..8127cdfbab0 100644 --- a/notebook/agentchat_stream.ipynb +++ b/notebook/agentchat_stream.ipynb @@ -90,14 +90,14 @@ " 'api_key': '',\n", " 'base_url': '',\n", " 'api_type': 'azure',\n", - " 'api_version': '2024-02-15-preview',\n", + " 'api_version': '2024-02-01',\n", " },\n", " {\n", " 'model': 'gpt-3.5-turbo-16k',\n", " 'api_key': '',\n", " 'base_url': '',\n", " 'api_type': 'azure',\n", - " 'api_version': '2024-02-15-preview',\n", + " 'api_version': '2024-02-01',\n", " },\n", "]\n", "```\n", diff --git a/notebook/agentchat_teachable_oai_assistants.ipynb b/notebook/agentchat_teachable_oai_assistants.ipynb index 9bd69c9d51c..3753be414f3 100644 --- a/notebook/agentchat_teachable_oai_assistants.ipynb +++ b/notebook/agentchat_teachable_oai_assistants.ipynb @@ -112,14 +112,14 @@ " 'api_key': '',\n", " 'base_url': '',\n", " 'api_type': 'azure',\n", - " 'api_version': '2024-02-15-preview',\n", + " 'api_version': '2024-02-01',\n", " },\n", " {\n", " 'model': 'gpt-4-32k',\n", " 'api_key': '',\n", " 'base_url': '',\n", " 'api_type': 'azure',\n", - " 'api_version': '2024-02-15-preview',\n", + " 'api_version': '2024-02-01',\n", " },\n", "]\n", "```\n", diff --git a/notebook/agentchat_two_users.ipynb b/notebook/agentchat_two_users.ipynb index 21749278688..eb9e0c1fbf2 100644 --- a/notebook/agentchat_two_users.ipynb +++ b/notebook/agentchat_two_users.ipynb @@ -70,14 +70,14 @@ " \"api_key\": \"\",\n", " \"base_url\": \"\",\n", " \"api_type\": \"azure\",\n", - " \"api_version\": \"2024-02-15-preview\"\n", + " \"api_version\": \"2024-02-01\"\n", " },\n", " {\n", " \"model\": \"gpt-4-32k\",\n", " \"api_key\": \"\",\n", " \"base_url\": \"\",\n", " \"api_type\": \"azure\",\n", - " \"api_version\": \"2024-02-15-preview\"\n", + " \"api_version\": \"2024-02-01\"\n", " }\n", "]\n", "```\n", diff --git a/notebook/agentchat_web_info.ipynb b/notebook/agentchat_web_info.ipynb index 31ac248ec9e..f990c128b78 100644 --- a/notebook/agentchat_web_info.ipynb +++ b/notebook/agentchat_web_info.ipynb @@ -104,14 +104,14 @@ " 'api_key': '',\n", " 'base_url': '',\n", " 'api_type': 'azure',\n", - " 'api_version': '2024-02-15-preview',\n", + " 'api_version': '2024-02-01',\n", " },\n", " {\n", " 'model': 'gpt-4-32k-0314',\n", " 'api_key': '',\n", " 'base_url': '',\n", " 'api_type': 'azure',\n", - " 'api_version': '2024-02-15-preview',\n", + " 'api_version': '2024-02-01',\n", " },\n", "]\n", "```\n", diff --git a/notebook/oai_chatgpt_gpt4.ipynb b/notebook/oai_chatgpt_gpt4.ipynb index 34b5e5357fa..280b7145e93 100644 --- a/notebook/oai_chatgpt_gpt4.ipynb +++ b/notebook/oai_chatgpt_gpt4.ipynb @@ -131,13 +131,13 @@ " 'api_key': '',\n", " 'base_url': '',\n", " 'api_type': 'azure',\n", - " 'api_version': '2024-02-15-preview',\n", + " 'api_version': '2024-02-01',\n", " }, # only if at least one Azure OpenAI API key is found\n", " {\n", " 'api_key': '',\n", " 'base_url': '',\n", " 'api_type': 'azure',\n", - " 'api_version': '2024-02-15-preview',\n", + " 'api_version': '2024-02-01',\n", " }, # only if the second Azure OpenAI API key is found\n", "]\n", "```\n", diff --git a/notebook/oai_completion.ipynb b/notebook/oai_completion.ipynb index 514ba6a4ede..ac1b3f9c95f 100644 --- a/notebook/oai_completion.ipynb +++ b/notebook/oai_completion.ipynb @@ -97,13 +97,13 @@ "# 'api_key': '',\n", "# 'base_url': '',\n", "# 'api_type': 'azure',\n", - "# 'api_version': '2024-02-15-preview',\n", + "# 'api_version': '2024-02-01',\n", "# }, # Azure OpenAI API endpoint for gpt-4\n", "# {\n", "# 'api_key': '',\n", "# 'base_url': '',\n", "# 'api_type': 'azure',\n", - "# 'api_version': '2024-02-15-preview',\n", + "# 'api_version': '2024-02-01',\n", "# }, # another Azure OpenAI API endpoint for gpt-4\n", "# ]\n", "\n", @@ -131,14 +131,14 @@ "# 'api_key': '',\n", "# 'base_url': '',\n", "# 'api_type': 'azure',\n", - "# 'api_version': '2024-02-15-preview',\n", + "# 'api_version': '2024-02-01',\n", "# }, # Azure OpenAI API endpoint for gpt-3.5-turbo\n", "# {\n", "# 'model': 'gpt-35-turbo-v0301',\n", "# 'api_key': '',\n", "# 'base_url': '',\n", "# 'api_type': 'azure',\n", - "# 'api_version': '2024-02-15-preview',\n", + "# 'api_version': '2024-02-01',\n", "# }, # another Azure OpenAI API endpoint for gpt-3.5-turbo with deployment name gpt-35-turbo-v0301\n", "# ]" ] diff --git a/samples/apps/websockets/application.py b/samples/apps/websockets/application.py index f2e453d9248..fe75d135330 100755 --- a/samples/apps/websockets/application.py +++ b/samples/apps/websockets/application.py @@ -35,7 +35,7 @@ def _get_config_list() -> List[Dict[str, str]]: 'api_key': '0123456789abcdef0123456789abcdef', 'base_url': 'https://my-deployment.openai.azure.com/', 'api_type': 'azure', - 'api_version': '2024-02-15-preview', + 'api_version': '2024-02-01', }, { 'model': 'gpt-4', diff --git a/test/agentchat/contrib/retrievechat/test_pgvector_retrievechat.py b/test/agentchat/contrib/retrievechat/test_pgvector_retrievechat.py index b104f25af76..ca24f952f76 100644 --- a/test/agentchat/contrib/retrievechat/test_pgvector_retrievechat.py +++ b/test/agentchat/contrib/retrievechat/test_pgvector_retrievechat.py @@ -56,7 +56,7 @@ def test_retrievechat(): }, ) - sentence_transformer_ef = SentenceTransformer("all-MiniLM-L6-v2") + sentence_transformer_ef = SentenceTransformer("all-MiniLM-L6-v2").encode ragproxyagent = RetrieveUserProxyAgent( name="ragproxyagent", human_input_mode="NEVER", diff --git a/test/oai/_test_completion.py b/test/oai/_test_completion.py index 05798911c65..fe410255d2f 100755 --- a/test/oai/_test_completion.py +++ b/test/oai/_test_completion.py @@ -4,7 +4,6 @@ import os import sys from functools import partial -from test.oai.test_utils import KEY_LOC, OAI_CONFIG_LIST import datasets import numpy as np @@ -18,6 +17,7 @@ implement, ) from autogen.math_utils import eval_math_responses, solve_problem +from test.oai.test_utils import KEY_LOC, OAI_CONFIG_LIST here = os.path.abspath(os.path.dirname(__file__)) diff --git a/test/oai/test_utils.py b/test/oai/test_utils.py index d5ad84d8355..99f8d8d24e8 100755 --- a/test/oai/test_utils.py +++ b/test/oai/test_utils.py @@ -58,7 +58,7 @@ "api_key": "111113fc7e8a46419bfac511bb301111", "base_url": "https://1111.openai.azure.com", "api_type": "azure", - "api_version": "2024-02-15-preview" + "api_version": "2024-02-01" }, { "model": "gpt", @@ -83,7 +83,7 @@ "expected": JSON_SAMPLE_DICT[2:4], }, { - "filter_dict": {"api_type": "azure", "api_version": "2024-02-15-preview"}, + "filter_dict": {"api_type": "azure", "api_version": "2024-02-01"}, "exclude": False, "expected": [JSON_SAMPLE_DICT[2]], }, diff --git a/test/test_logging.py b/test/test_logging.py index c6f7a182c5c..bd9a74d3fd4 100644 --- a/test/test_logging.py +++ b/test/test_logging.py @@ -202,7 +202,7 @@ def test_log_oai_client(db_connection): openai_config = { "api_key": "some_key", - "api_version": "2024-02-15-preview", + "api_version": "2024-02-01", "azure_deployment": "gpt-4", "azure_endpoint": "https://foobar.openai.azure.com/", } diff --git a/test/test_notebook.py b/test/test_notebook.py index 46622c287eb..9d05533c913 100755 --- a/test/test_notebook.py +++ b/test/test_notebook.py @@ -1,137 +1,137 @@ -#!/usr/bin/env python3 -m pytest - -import os -import sys - -import pytest -from conftest import skip_openai - -try: - import openai -except ImportError: - skip = True -else: - skip = False or skip_openai - - -here = os.path.abspath(os.path.dirname(__file__)) - - -def run_notebook(input_nb, output_nb="executed_openai_notebook.ipynb", save=False): - import nbformat - from nbconvert.preprocessors import CellExecutionError, ExecutePreprocessor - - try: - nb_loc = os.path.join(here, os.pardir, "notebook") - file_path = os.path.join(nb_loc, input_nb) - with open(file_path) as nb_file: - nb = nbformat.read(nb_file, as_version=4) - preprocessor = ExecutePreprocessor(timeout=4800, kernel_name="python3") - preprocessor.preprocess(nb, {"metadata": {"path": nb_loc}}) - - output_file_name = "executed_openai_notebook_output.txt" - output_file = os.path.join(here, output_file_name) - with open(output_file, "a") as nb_output_file: - for cell in nb.cells: - if cell.cell_type == "code" and "outputs" in cell: - for output in cell.outputs: - if "text" in output: - nb_output_file.write(output["text"].strip() + "\n") - elif "data" in output and "text/plain" in output["data"]: - nb_output_file.write(output["data"]["text/plain"].strip() + "\n") - except CellExecutionError: - raise - finally: - if save: - with open(os.path.join(here, output_nb), "w", encoding="utf-8") as nb_executed_file: - nbformat.write(nb, nb_executed_file) - - -@pytest.mark.skipif( - skip or not sys.version.startswith("3.10"), - reason="do not run if openai is not installed or py!=3.10", -) -def test_agentchat_auto_feedback_from_code(save=False): - run_notebook("agentchat_auto_feedback_from_code_execution.ipynb", save=save) - - -@pytest.mark.skipif( - skip or not sys.version.startswith("3.11"), - reason="do not run if openai is not installed or py!=3.11", -) -def _test_oai_completion(save=False): - run_notebook("oai_completion.ipynb", save=save) - - -@pytest.mark.skipif( - skip or not sys.version.startswith("3.12"), - reason="do not run if openai is not installed or py!=3.12", -) -def test_agentchat_function_call(save=False): - run_notebook("agentchat_function_call.ipynb", save=save) - - -@pytest.mark.skipif( - skip or not sys.version.startswith("3.10"), - reason="do not run if openai is not installed or py!=3.10", -) -def test_agentchat_function_call_currency_calculator(save=False): - run_notebook("agentchat_function_call_currency_calculator.ipynb", save=save) - - -@pytest.mark.skipif( - skip or not sys.version.startswith("3.11"), - reason="do not run if openai is not installed or py!=3.11", -) -def test_agentchat_function_call_async(save=False): - run_notebook("agentchat_function_call_async.ipynb", save=save) - - -@pytest.mark.skipif( - skip or not sys.version.startswith("3.12"), - reason="do not run if openai is not installed or py!=3.12", -) -def _test_agentchat_MathChat(save=False): - run_notebook("agentchat_MathChat.ipynb", save=save) - - -@pytest.mark.skipif( - skip or not sys.version.startswith("3.10"), - reason="do not run if openai is not installed or py!=3.10", -) -def _test_oai_chatgpt_gpt4(save=False): - run_notebook("oai_chatgpt_gpt4.ipynb", save=save) - - -@pytest.mark.skipif( - skip or not sys.version.startswith("3.12"), - reason="do not run if openai is not installed or py!=3.12", -) -def test_agentchat_groupchat_finite_state_machine(save=False): - run_notebook("agentchat_groupchat_finite_state_machine.ipynb", save=save) - - -@pytest.mark.skipif( - skip or not sys.version.startswith("3.10"), - reason="do not run if openai is not installed or py!=3.10", -) -def test_agentchat_cost_token_tracking(save=False): - run_notebook("agentchat_cost_token_tracking.ipynb", save=save) - - -@pytest.mark.skipif( - skip or not sys.version.startswith("3.11"), - reason="do not run if openai is not installed or py!=3.11", -) -def test_agentchat_groupchat_stateflow(save=False): - run_notebook("agentchat_groupchat_stateflow.ipynb", save=save) - - -if __name__ == "__main__": - # test_agentchat_auto_feedback_from_code(save=True) - # test_oai_chatgpt_gpt4(save=True) - # test_oai_completion(save=True) - # test_agentchat_MathChat(save=True) - # test_agentchat_function_call(save=True) - # test_graph_modelling_language_using_select_speaker(save=True) - test_agentchat_function_call_async(save=True) +#!/usr/bin/env python3 -m pytest + +import os +import sys + +import pytest +from conftest import skip_openai + +try: + import openai +except ImportError: + skip = True +else: + skip = False or skip_openai + + +here = os.path.abspath(os.path.dirname(__file__)) + + +def run_notebook(input_nb, output_nb="executed_openai_notebook.ipynb", save=False): + import nbformat + from nbconvert.preprocessors import CellExecutionError, ExecutePreprocessor + + try: + nb_loc = os.path.join(here, os.pardir, "notebook") + file_path = os.path.join(nb_loc, input_nb) + with open(file_path) as nb_file: + nb = nbformat.read(nb_file, as_version=4) + preprocessor = ExecutePreprocessor(timeout=4800, kernel_name="python3") + preprocessor.preprocess(nb, {"metadata": {"path": nb_loc}}) + + output_file_name = "executed_openai_notebook_output.txt" + output_file = os.path.join(here, output_file_name) + with open(output_file, "a") as nb_output_file: + for cell in nb.cells: + if cell.cell_type == "code" and "outputs" in cell: + for output in cell.outputs: + if "text" in output: + nb_output_file.write(output["text"].strip() + "\n") + elif "data" in output and "text/plain" in output["data"]: + nb_output_file.write(output["data"]["text/plain"].strip() + "\n") + except CellExecutionError: + raise + finally: + if save: + with open(os.path.join(here, output_nb), "w", encoding="utf-8") as nb_executed_file: + nbformat.write(nb, nb_executed_file) + + +@pytest.mark.skipif( + skip or not sys.version.startswith("3.10"), + reason="do not run if openai is not installed or py!=3.10", +) +def test_agentchat_auto_feedback_from_code(save=False): + run_notebook("agentchat_auto_feedback_from_code_execution.ipynb", save=save) + + +@pytest.mark.skipif( + skip or not sys.version.startswith("3.11"), + reason="do not run if openai is not installed or py!=3.11", +) +def _test_oai_completion(save=False): + run_notebook("oai_completion.ipynb", save=save) + + +@pytest.mark.skipif( + skip or not sys.version.startswith("3.12"), + reason="do not run if openai is not installed or py!=3.12", +) +def test_agentchat_function_call(save=False): + run_notebook("agentchat_function_call.ipynb", save=save) + + +@pytest.mark.skipif( + skip or not sys.version.startswith("3.10"), + reason="do not run if openai is not installed or py!=3.10", +) +def test_agentchat_function_call_currency_calculator(save=False): + run_notebook("agentchat_function_call_currency_calculator.ipynb", save=save) + + +@pytest.mark.skipif( + skip or not sys.version.startswith("3.11"), + reason="do not run if openai is not installed or py!=3.11", +) +def test_agentchat_function_call_async(save=False): + run_notebook("agentchat_function_call_async.ipynb", save=save) + + +@pytest.mark.skipif( + skip or not sys.version.startswith("3.12"), + reason="do not run if openai is not installed or py!=3.12", +) +def _test_agentchat_MathChat(save=False): + run_notebook("agentchat_MathChat.ipynb", save=save) + + +@pytest.mark.skipif( + skip or not sys.version.startswith("3.10"), + reason="do not run if openai is not installed or py!=3.10", +) +def _test_oai_chatgpt_gpt4(save=False): + run_notebook("oai_chatgpt_gpt4.ipynb", save=save) + + +@pytest.mark.skipif( + skip or not sys.version.startswith("3.12"), + reason="do not run if openai is not installed or py!=3.12", +) +def test_agentchat_groupchat_finite_state_machine(save=False): + run_notebook("agentchat_groupchat_finite_state_machine.ipynb", save=save) + + +@pytest.mark.skipif( + skip or not sys.version.startswith("3.10"), + reason="do not run if openai is not installed or py!=3.10", +) +def test_agentchat_cost_token_tracking(save=False): + run_notebook("agentchat_cost_token_tracking.ipynb", save=save) + + +@pytest.mark.skipif( + skip or not sys.version.startswith("3.11"), + reason="do not run if openai is not installed or py!=3.11", +) +def test_agentchat_groupchat_stateflow(save=False): + run_notebook("agentchat_groupchat_stateflow.ipynb", save=save) + + +if __name__ == "__main__": + # test_agentchat_auto_feedback_from_code(save=True) + # test_oai_chatgpt_gpt4(save=True) + # test_oai_completion(save=True) + # test_agentchat_MathChat(save=True) + # test_agentchat_function_call(save=True) + # test_graph_modelling_language_using_select_speaker(save=True) + test_agentchat_function_call_async(save=True) diff --git a/website/blog/2023-11-20-AgentEval/index.mdx b/website/blog/2023-11-20-AgentEval/index.mdx index 070d431b135..1abb9e6c9f8 100644 --- a/website/blog/2023-11-20-AgentEval/index.mdx +++ b/website/blog/2023-11-20-AgentEval/index.mdx @@ -1,151 +1,151 @@ ---- -title: How to Assess Utility of LLM-powered Applications? -authors: - - julianakiseleva - - narabzad -tags: [LLM, GPT, evaluation, task utility] ---- - - -![Fig.1: A verification framework](img/agenteval-CQ.png) - -

Fig.1 illustrates the general flow of AgentEval

- -**TL;DR:** -* As a developer of an LLM-powered application, how can you assess the utility it brings to end users while helping them with their tasks? -* To shed light on the question above, we introduce `AgentEval` — the first version of the framework to assess the utility of any LLM-powered application crafted to assist users in specific tasks. AgentEval aims to simplify the evaluation process by automatically proposing a set of criteria tailored to the unique purpose of your application. This allows for a comprehensive assessment, quantifying the utility of your application against the suggested criteria. -* We demonstrate how `AgentEval` work using [math problems dataset](https://microsoft.github.io/autogen/blog/2023/06/28/MathChat) as an example in the [following notebook](https://github.com/microsoft/autogen/blob/main/notebook/agenteval_cq_math.ipynb). Any feedback would be useful for future development. Please contact us on our [Discord](http://aka.ms/autogen-dc). - - -## Introduction - - AutoGen aims to simplify the development of LLM-powered multi-agent systems for various applications, ultimately making end users' lives easier by assisting with their tasks. Next, we all yearn to understand how our developed systems perform, their utility for users, and, perhaps most crucially, how we can enhance them. Directly evaluating multi-agent systems poses challenges as current approaches predominantly rely on success metrics – essentially, whether the agent accomplishes tasks. However, comprehending user interaction with a system involves far more than success alone. Take math problems, for instance; it's not merely about the agent solving the problem. Equally significant is its ability to convey solutions based on various criteria, including completeness, conciseness, and the clarity of the provided explanation. Furthermore, success isn't always clearly defined for every task. - - Rapid advances in LLMs and multi-agent systems have brought forth many emerging capabilities that we're keen on translating into tangible utilities for end users. We introduce the first version of `AgentEval` framework - a tool crafted to empower developers in swiftly gauging the utility of LLM-powered applications designed to help end users accomplish the desired task. - - -![Fig.2: An overview of the tasks taxonomy](img/tasks-taxonomy.png) -

Fig. 2 provides an overview of the tasks taxonomy

- - -Let's first look into an overview of the suggested task taxonomy that a multi-agent system can be designed for. In general, the tasks can be split into two types, where: -* _Success is not clearly defined_ - refer to instances when users utilize a system in an assistive manner, seeking suggestions rather than expecting the system to solve the task. For example, a user might request the system to generate an email. In many cases, this generated content serves as a template that the user will later edit. However, defining success precisely for such tasks is relatively complex. -* _Success is clearly defined_ - refer to instances where we can clearly define whether a system solved the task or not. Consider agents that assist in accomplishing household tasks, where the definition of success is clear and measurable. This category can be further divided into two separate subcategories: - * _The optimal solution exits_ - these are tasks where only one solution is possible. For example, if you ask your assistant to turn on the light, the success of this task is clearly defined, and there is only one way to accomplish it. - * _Multiple solutions exist_ - increasingly, we observe situations where multiple trajectories of agent behavior can lead to either success or failure. In such cases, it is crucial to differentiate between the various successful and unsuccessful trajectories. For example, when you ask the agent to suggest you a food recipe or tell you a joke. - -In our `AgentEval` framework, we are currently focusing on tasks where _Success is clearly defined_. Next, we will introduce the suggested framework. - -## `AgentEval` Framework - -Our previous research on [assistive agents in Minecraft](https://github.com/microsoft/iglu-datasets) suggested that the most optimal way to obtain human judgments is to present humans with two agents side by side and ask for preferences. In this setup of pairwise comparison, humans can develop criteria to explain why they prefer the behavior of one agent over another. For instance, _'the first agent was faster in execution,'_ or _'the second agent moves more naturally.'_ So, the comparative nature led humans to come up with a list of criteria that helps to infer the utility of the task. With this idea in mind, we designed `AgentEval` (shown in Fig. 1), where we employ LLMs to help us understand, verify, and assess task *utility* for the multi-agent system. Namely: - -* The goal of `CriticAgent` is to suggest the list of criteria (Fig. 1), that can be used to assess task utility. This is an example of how `CriticAgent` is defined using `Autogen`: - -```python -critic = autogen.AssistantAgent( - name="critic", - llm_config={"config_list": config_list}, - system_message="""You are a helpful assistant. You suggest criteria for evaluating different tasks. They should be distinguishable, quantifiable, and not redundant. - Convert the evaluation criteria into a dictionary where the keys are the criteria. - The value of each key is a dictionary as follows {"description": criteria description, "accepted_values": possible accepted inputs for this key} - Make sure the keys are criteria for assessing the given task. "accepted_values" include the acceptable inputs for each key that are fine-grained and preferably multi-graded levels. "description" includes the criterion description. - Return only the dictionary.""" -) -``` - -Next, the critic is given successful and failed examples of the task execution; then, it is able to return a list of criteria (Fig. 1). For reference, use the [following notebook](https://github.com/microsoft/autogen/blob/main/notebook/agenteval_cq_math.ipynb). - -* The goal of `QuantifierAgent` is to quantify each of the suggested criteria (Fig. 1), providing us with an idea of the utility of this system for the given task. Here is an example of how it can be defined: - -```python -quantifier = autogen.AssistantAgent( - name="quantifier", - llm_config={"config_list": config_list}, - system_message = """You are a helpful assistant. You quantify the output of different tasks based on the given criteria. - The criterion is given in a dictionary format where each key is a distinct criteria. - The value of each key is a dictionary as follows {"description": criteria description , "accepted_values": possible accepted inputs for this key} - You are going to quantify each of the criteria for a given task based on the task description. - Return a dictionary where the keys are the criteria and the values are the assessed performance based on accepted values for each criteria. - Return only the dictionary.""" - -) -``` - -## `AgentEval` Results based on Math Problems Dataset - - As an example, after running CriticAgent, we obtained the following criteria to verify the results for math problem dataset: - -| Criteria | Description | Accepted Values| -|-----------|-----|----------------| -| Problem Interpretation | Ability to correctly interpret the problem | ["completely off", "slightly relevant", "relevant", "mostly accurate", "completely accurate"]| -| Mathematical Methodology | Adequacy of the chosen mathematical or algorithmic methodology for the question | ["inappropriate", "barely adequate", "adequate", "mostly effective", "completely effective"] | -| Calculation Correctness | Accuracy of calculations made and solutions given | ["completely incorrect", "mostly incorrect", "neither", "mostly correct", "completely correct"] | -| Explanation Clarity | Clarity and comprehensibility of explanations, including language use and structure | ["not at all clear", "slightly clear", "moderately clear", "very clear", "completely clear"] | -| Code Efficiency | Quality of code in terms of efficiency and elegance |["not at all efficient", "slightly efficient", "moderately efficient", "very efficient", "extremely efficient"] | -| Code Correctness | Correctness of the provided code | ["completely incorrect", "mostly incorrect", "partly correct", "mostly correct", "completely correct"] - - -Then, after running QuantifierAgent, we obtained the results presented in Fig. 3, where you can see three models: -* AgentChat -* ReAct -* GPT-4 Vanilla Solver - -Lighter colors represent estimates for failed cases, and brighter colors show how discovered criteria were quantified. - -![Fig.3: Results based on overall math problems dataset `_s` stands for successful cases, `_f` - stands for failed cases](img/math-problems-plot.png) -

Fig.3 presents results based on overall math problems dataset `_s` stands for successful cases, `_f` - stands for failed cases

- -We note that while applying agentEval to math problems, the agent was not exposed to any ground truth information about the problem. As such, this figure illustrates an estimated performance of the three different agents, namely, Autogen (blue), Gpt-4 (red), and ReAct (green). We observe that by comparing the performance of any of the three agents in successful cases (dark bars of any color) versus unsuccessful cases (lighter version of the same bar), we note that AgentEval was able to assign higher quantification to successful cases than that of failed ones. This observation verifies AgentEval's ability for task utility prediction. Additionally, AgentEval allows us to go beyond just a binary definition of success, enabling a more in-depth comparison between successful and failed cases. - -It's important not only to identify what is not working but also to recognize what and why actually went well. - -## Limitations and Future Work -The current implementation of `AgentEval` has a number of limitations which are planning to overcome in the future: -* The list of criteria varies per run (unless you store a seed). We would recommend to run `CriticAgent` at least two times, and pick criteria you think is important for your domain. -* The results of the `QuantifierAgent` can vary with each run, so we recommend conducting multiple runs to observe the extent of result variations. - -To mitigate the limitations mentioned above, we are working on VerifierAgent, whose goal is to stabilize the results and provide additional explanations. - -## Summary -`CriticAgent` and `QuantifierAgent` can be applied to the logs of any type of application, providing you with an in-depth understanding of the utility your solution brings to the user for a given task. - -We would love to hear about how AgentEval works for your application. Any feedback would be useful for future development. Please contact us on our [Discord](http://aka.ms/autogen-dc). - - -## Previous Research - -``` -@InProceedings{pmlr-v176-kiseleva22a, - title = "Interactive Grounded Language Understanding in a Collaborative Environment: IGLU 2021", - author = "Kiseleva, Julia and Li, Ziming and Aliannejadi, Mohammad and Mohanty, Shrestha and ter Hoeve, Maartje and Burtsev, Mikhail and Skrynnik, Alexey and Zholus, Artem and Panov, Aleksandr and Srinet, Kavya and Szlam, Arthur and Sun, Yuxuan and Hofmann, Katja and C{\^o}t{\'e}, Marc-Alexandre and Awadallah, Ahmed and Abdrazakov, Linar and Churin, Igor and Manggala, Putra and Naszadi, Kata and van der Meer, Michiel and Kim, Taewoon", - booktitle = "Proceedings of the NeurIPS 2021 Competitions and Demonstrations Track", - pages = "146--161", - year = 2022, - editor = "Kiela, Douwe and Ciccone, Marco and Caputo, Barbara", - volume = 176, - series = "Proceedings of Machine Learning Research", - month = "06--14 Dec", - publisher = "PMLR", - pdf = {https://proceedings.mlr.press/v176/kiseleva22a/kiseleva22a.pdf}, - url = {https://proceedings.mlr.press/v176/kiseleva22a.html}. -} -``` - - -``` -@InProceedings{pmlr-v220-kiseleva22a, - title = "Interactive Grounded Language Understanding in a Collaborative Environment: Retrospective on Iglu 2022 Competition", - author = "Kiseleva, Julia and Skrynnik, Alexey and Zholus, Artem and Mohanty, Shrestha and Arabzadeh, Negar and C\^{o}t\'e, Marc-Alexandre and Aliannejadi, Mohammad and Teruel, Milagro and Li, Ziming and Burtsev, Mikhail and ter Hoeve, Maartje and Volovikova, Zoya and Panov, Aleksandr and Sun, Yuxuan and Srinet, Kavya and Szlam, Arthur and Awadallah, Ahmed and Rho, Seungeun and Kwon, Taehwan and Wontae Nam, Daniel and Bivort Haiek, Felipe and Zhang, Edwin and Abdrazakov, Linar and Qingyam, Guo and Zhang, Jason and Guo, Zhibin", - booktitle = "Proceedings of the NeurIPS 2022 Competitions Track", - pages = "204--216", - year = 2022, - editor = "Ciccone, Marco and Stolovitzky, Gustavo and Albrecht, Jacob", - volume = 220, - series = "Proceedings of Machine Learning Research", - month = "28 Nov--09 Dec", - publisher = "PMLR", - pdf = "https://proceedings.mlr.press/v220/kiseleva22a/kiseleva22a.pdf", - url = "https://proceedings.mlr.press/v220/kiseleva22a.html". -} -``` +--- +title: How to Assess Utility of LLM-powered Applications? +authors: + - julianakiseleva + - narabzad +tags: [LLM, GPT, evaluation, task utility] +--- + + +![Fig.1: A verification framework](img/agenteval-CQ.png) + +

Fig.1 illustrates the general flow of AgentEval

+ +**TL;DR:** +* As a developer of an LLM-powered application, how can you assess the utility it brings to end users while helping them with their tasks? +* To shed light on the question above, we introduce `AgentEval` — the first version of the framework to assess the utility of any LLM-powered application crafted to assist users in specific tasks. AgentEval aims to simplify the evaluation process by automatically proposing a set of criteria tailored to the unique purpose of your application. This allows for a comprehensive assessment, quantifying the utility of your application against the suggested criteria. +* We demonstrate how `AgentEval` work using [math problems dataset](https://microsoft.github.io/autogen/blog/2023/06/28/MathChat) as an example in the [following notebook](https://github.com/microsoft/autogen/blob/main/notebook/agenteval_cq_math.ipynb). Any feedback would be useful for future development. Please contact us on our [Discord](http://aka.ms/autogen-dc). + + +## Introduction + + AutoGen aims to simplify the development of LLM-powered multi-agent systems for various applications, ultimately making end users' lives easier by assisting with their tasks. Next, we all yearn to understand how our developed systems perform, their utility for users, and, perhaps most crucially, how we can enhance them. Directly evaluating multi-agent systems poses challenges as current approaches predominantly rely on success metrics – essentially, whether the agent accomplishes tasks. However, comprehending user interaction with a system involves far more than success alone. Take math problems, for instance; it's not merely about the agent solving the problem. Equally significant is its ability to convey solutions based on various criteria, including completeness, conciseness, and the clarity of the provided explanation. Furthermore, success isn't always clearly defined for every task. + + Rapid advances in LLMs and multi-agent systems have brought forth many emerging capabilities that we're keen on translating into tangible utilities for end users. We introduce the first version of `AgentEval` framework - a tool crafted to empower developers in swiftly gauging the utility of LLM-powered applications designed to help end users accomplish the desired task. + + +![Fig.2: An overview of the tasks taxonomy](img/tasks-taxonomy.png) +

Fig. 2 provides an overview of the tasks taxonomy

+ + +Let's first look into an overview of the suggested task taxonomy that a multi-agent system can be designed for. In general, the tasks can be split into two types, where: +* _Success is not clearly defined_ - refer to instances when users utilize a system in an assistive manner, seeking suggestions rather than expecting the system to solve the task. For example, a user might request the system to generate an email. In many cases, this generated content serves as a template that the user will later edit. However, defining success precisely for such tasks is relatively complex. +* _Success is clearly defined_ - refer to instances where we can clearly define whether a system solved the task or not. Consider agents that assist in accomplishing household tasks, where the definition of success is clear and measurable. This category can be further divided into two separate subcategories: + * _The optimal solution exits_ - these are tasks where only one solution is possible. For example, if you ask your assistant to turn on the light, the success of this task is clearly defined, and there is only one way to accomplish it. + * _Multiple solutions exist_ - increasingly, we observe situations where multiple trajectories of agent behavior can lead to either success or failure. In such cases, it is crucial to differentiate between the various successful and unsuccessful trajectories. For example, when you ask the agent to suggest you a food recipe or tell you a joke. + +In our `AgentEval` framework, we are currently focusing on tasks where _Success is clearly defined_. Next, we will introduce the suggested framework. + +## `AgentEval` Framework + +Our previous research on [assistive agents in Minecraft](https://github.com/microsoft/iglu-datasets) suggested that the most optimal way to obtain human judgments is to present humans with two agents side by side and ask for preferences. In this setup of pairwise comparison, humans can develop criteria to explain why they prefer the behavior of one agent over another. For instance, _'the first agent was faster in execution,'_ or _'the second agent moves more naturally.'_ So, the comparative nature led humans to come up with a list of criteria that helps to infer the utility of the task. With this idea in mind, we designed `AgentEval` (shown in Fig. 1), where we employ LLMs to help us understand, verify, and assess task *utility* for the multi-agent system. Namely: + +* The goal of `CriticAgent` is to suggest the list of criteria (Fig. 1), that can be used to assess task utility. This is an example of how `CriticAgent` is defined using `Autogen`: + +```python +critic = autogen.AssistantAgent( + name="critic", + llm_config={"config_list": config_list}, + system_message="""You are a helpful assistant. You suggest criteria for evaluating different tasks. They should be distinguishable, quantifiable, and not redundant. + Convert the evaluation criteria into a dictionary where the keys are the criteria. + The value of each key is a dictionary as follows {"description": criteria description, "accepted_values": possible accepted inputs for this key} + Make sure the keys are criteria for assessing the given task. "accepted_values" include the acceptable inputs for each key that are fine-grained and preferably multi-graded levels. "description" includes the criterion description. + Return only the dictionary.""" +) +``` + +Next, the critic is given successful and failed examples of the task execution; then, it is able to return a list of criteria (Fig. 1). For reference, use the [following notebook](https://github.com/microsoft/autogen/blob/main/notebook/agenteval_cq_math.ipynb). + +* The goal of `QuantifierAgent` is to quantify each of the suggested criteria (Fig. 1), providing us with an idea of the utility of this system for the given task. Here is an example of how it can be defined: + +```python +quantifier = autogen.AssistantAgent( + name="quantifier", + llm_config={"config_list": config_list}, + system_message = """You are a helpful assistant. You quantify the output of different tasks based on the given criteria. + The criterion is given in a dictionary format where each key is a distinct criteria. + The value of each key is a dictionary as follows {"description": criteria description , "accepted_values": possible accepted inputs for this key} + You are going to quantify each of the criteria for a given task based on the task description. + Return a dictionary where the keys are the criteria and the values are the assessed performance based on accepted values for each criteria. + Return only the dictionary.""" + +) +``` + +## `AgentEval` Results based on Math Problems Dataset + + As an example, after running CriticAgent, we obtained the following criteria to verify the results for math problem dataset: + +| Criteria | Description | Accepted Values| +|-----------|-----|----------------| +| Problem Interpretation | Ability to correctly interpret the problem | ["completely off", "slightly relevant", "relevant", "mostly accurate", "completely accurate"]| +| Mathematical Methodology | Adequacy of the chosen mathematical or algorithmic methodology for the question | ["inappropriate", "barely adequate", "adequate", "mostly effective", "completely effective"] | +| Calculation Correctness | Accuracy of calculations made and solutions given | ["completely incorrect", "mostly incorrect", "neither", "mostly correct", "completely correct"] | +| Explanation Clarity | Clarity and comprehensibility of explanations, including language use and structure | ["not at all clear", "slightly clear", "moderately clear", "very clear", "completely clear"] | +| Code Efficiency | Quality of code in terms of efficiency and elegance |["not at all efficient", "slightly efficient", "moderately efficient", "very efficient", "extremely efficient"] | +| Code Correctness | Correctness of the provided code | ["completely incorrect", "mostly incorrect", "partly correct", "mostly correct", "completely correct"] + + +Then, after running QuantifierAgent, we obtained the results presented in Fig. 3, where you can see three models: +* AgentChat +* ReAct +* GPT-4 Vanilla Solver + +Lighter colors represent estimates for failed cases, and brighter colors show how discovered criteria were quantified. + +![Fig.3: Results based on overall math problems dataset `_s` stands for successful cases, `_f` - stands for failed cases](img/math-problems-plot.png) +

Fig.3 presents results based on overall math problems dataset `_s` stands for successful cases, `_f` - stands for failed cases

+ +We note that while applying agentEval to math problems, the agent was not exposed to any ground truth information about the problem. As such, this figure illustrates an estimated performance of the three different agents, namely, Autogen (blue), Gpt-4 (red), and ReAct (green). We observe that by comparing the performance of any of the three agents in successful cases (dark bars of any color) versus unsuccessful cases (lighter version of the same bar), we note that AgentEval was able to assign higher quantification to successful cases than that of failed ones. This observation verifies AgentEval's ability for task utility prediction. Additionally, AgentEval allows us to go beyond just a binary definition of success, enabling a more in-depth comparison between successful and failed cases. + +It's important not only to identify what is not working but also to recognize what and why actually went well. + +## Limitations and Future Work +The current implementation of `AgentEval` has a number of limitations which are planning to overcome in the future: +* The list of criteria varies per run (unless you store a seed). We would recommend to run `CriticAgent` at least two times, and pick criteria you think is important for your domain. +* The results of the `QuantifierAgent` can vary with each run, so we recommend conducting multiple runs to observe the extent of result variations. + +To mitigate the limitations mentioned above, we are working on VerifierAgent, whose goal is to stabilize the results and provide additional explanations. + +## Summary +`CriticAgent` and `QuantifierAgent` can be applied to the logs of any type of application, providing you with an in-depth understanding of the utility your solution brings to the user for a given task. + +We would love to hear about how AgentEval works for your application. Any feedback would be useful for future development. Please contact us on our [Discord](http://aka.ms/autogen-dc). + + +## Previous Research + +``` +@InProceedings{pmlr-v176-kiseleva22a, + title = "Interactive Grounded Language Understanding in a Collaborative Environment: IGLU 2021", + author = "Kiseleva, Julia and Li, Ziming and Aliannejadi, Mohammad and Mohanty, Shrestha and ter Hoeve, Maartje and Burtsev, Mikhail and Skrynnik, Alexey and Zholus, Artem and Panov, Aleksandr and Srinet, Kavya and Szlam, Arthur and Sun, Yuxuan and Hofmann, Katja and C{\^o}t{\'e}, Marc-Alexandre and Awadallah, Ahmed and Abdrazakov, Linar and Churin, Igor and Manggala, Putra and Naszadi, Kata and van der Meer, Michiel and Kim, Taewoon", + booktitle = "Proceedings of the NeurIPS 2021 Competitions and Demonstrations Track", + pages = "146--161", + year = 2022, + editor = "Kiela, Douwe and Ciccone, Marco and Caputo, Barbara", + volume = 176, + series = "Proceedings of Machine Learning Research", + month = "06--14 Dec", + publisher = "PMLR", + pdf = {https://proceedings.mlr.press/v176/kiseleva22a/kiseleva22a.pdf}, + url = {https://proceedings.mlr.press/v176/kiseleva22a.html}. +} +``` + + +``` +@InProceedings{pmlr-v220-kiseleva22a, + title = "Interactive Grounded Language Understanding in a Collaborative Environment: Retrospective on Iglu 2022 Competition", + author = "Kiseleva, Julia and Skrynnik, Alexey and Zholus, Artem and Mohanty, Shrestha and Arabzadeh, Negar and C\^{o}t\'e, Marc-Alexandre and Aliannejadi, Mohammad and Teruel, Milagro and Li, Ziming and Burtsev, Mikhail and ter Hoeve, Maartje and Volovikova, Zoya and Panov, Aleksandr and Sun, Yuxuan and Srinet, Kavya and Szlam, Arthur and Awadallah, Ahmed and Rho, Seungeun and Kwon, Taehwan and Wontae Nam, Daniel and Bivort Haiek, Felipe and Zhang, Edwin and Abdrazakov, Linar and Qingyam, Guo and Zhang, Jason and Guo, Zhibin", + booktitle = "Proceedings of the NeurIPS 2022 Competitions Track", + pages = "204--216", + year = 2022, + editor = "Ciccone, Marco and Stolovitzky, Gustavo and Albrecht, Jacob", + volume = 220, + series = "Proceedings of Machine Learning Research", + month = "28 Nov--09 Dec", + publisher = "PMLR", + pdf = "https://proceedings.mlr.press/v220/kiseleva22a/kiseleva22a.pdf", + url = "https://proceedings.mlr.press/v220/kiseleva22a.html". +} +``` diff --git a/website/blog/2023-12-01-AutoGenStudio/index.mdx b/website/blog/2023-12-01-AutoGenStudio/index.mdx index 49151f7b355..a2558acb01d 100644 --- a/website/blog/2023-12-01-AutoGenStudio/index.mdx +++ b/website/blog/2023-12-01-AutoGenStudio/index.mdx @@ -1,237 +1,237 @@ ---- -title: "AutoGen Studio: Interactively Explore Multi-Agent Workflows" -authors: - - victordibia - - gagb - - samershi -tags: [AutoGen, UI, web, UX] ---- - -![AutoGen Studio Playground View: Solving a task with multiple agents that generate a pdf document with images.](img/autogenstudio_home.png) - -

- - AutoGen Studio: Solving a task with multiple agents that generate a pdf - document with images. - -

- -## TL;DR - -To help you rapidly prototype multi-agent solutions for your tasks, we are introducing AutoGen Studio, an interface powered by [AutoGen](https://github.com/microsoft/autogen/tree/main/autogen). It allows you to: - -- Declaratively define and modify agents and multi-agent workflows through a point and click, drag and drop interface (e.g., you can select the parameters of two agents that will communicate to solve your task). -- Use our UI to create chat sessions with the specified agents and view results (e.g., view chat history, generated files, and time taken). -- Explicitly add skills to your agents and accomplish more tasks. -- Publish your sessions to a local gallery. - - -See the official AutoGen Studio documentation [here](https://microsoft.github.io/autogen/docs/autogen-studio/getting-started) for more details. - -AutoGen Studio is open source [code here](https://github.com/microsoft/autogen/tree/main/samples/apps/autogen-studio), and can be installed via pip. Give it a try! - -```bash -pip install autogenstudio -``` - -## Introduction - -The accelerating pace of technology has ushered us into an era where digital assistants (or agents) are becoming integral to our lives. [AutoGen](https://github.com/microsoft/autogen/tree/main/autogen) has emerged as a leading framework for orchestrating the power of agents. In the spirit of expanding this frontier and democratizing this capability, we are thrilled to introduce a new user-friendly interface: **AutoGen Studio**. - -With AutoGen Studio, users can rapidly create, manage, and interact with agents that can learn, adapt, and collaborate. As we release this interface into the open-source community, our ambition is not only to enhance productivity but to inspire a level of personalized interaction between humans and agents. - -> **Note**: AutoGen Studio is meant to help you rapidly prototype multi-agent workflows and demonstrate an example of end user interfaces built with AutoGen. It is not meant to be a production-ready app. - -## Getting Started with AutoGen Studio - -The following guide will help you get AutoGen Studio up and running on your system. - -### Configuring an LLM Provider - -To get started, you need access to a language model. You can get this set up by following the steps in the AutoGen documentation [here](https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints). Configure your environment with either `OPENAI_API_KEY` or `AZURE_OPENAI_API_KEY`. - -For example, in your terminal, you would set the API key like this: - -```bash -export OPENAI_API_KEY= -``` - -You can also specify the model directly in the agent's configuration as shown below. - -```python -llm_config = LLMConfig( - config_list=[{ - "model": "gpt-4", - "api_key": "", - "base_url": "", - "api_type": "azure", - "api_version": "2024-02-15-preview" - }], - temperature=0, -) -``` - -### Installation - -There are two ways to install AutoGen Studio - from PyPi or from source. We **recommend installing from PyPi** unless you plan to modify the source code. - -1. **Install from PyPi** - - We recommend using a virtual environment (e.g., conda) to avoid conflicts with existing Python packages. With Python 3.10 or newer active in your virtual environment, use pip to install AutoGen Studio: - - ```bash - pip install autogenstudio - ``` - -2. **Install from Source** - - > Note: This approach requires some familiarity with building interfaces in React. - - If you prefer to install from source, ensure you have Python 3.10+ and Node.js (version above 14.15.0) installed. Here's how you get started: - - - Clone the AutoGen Studio repository and install its Python dependencies: - - ```bash - pip install -e . - ``` - - - Navigate to the `samples/apps/autogen-studio/frontend` directory, install dependencies, and build the UI: - - ```bash - npm install -g gatsby-cli - npm install --global yarn - yarn install - yarn build - ``` - - For Windows users, to build the frontend, you may need alternative commands provided in the [autogen studio readme](https://github.com/microsoft/autogen/tree/main/samples/apps/autogen-studio). - -### Running the Application - -Once installed, run the web UI by entering the following in your terminal: - -```bash -autogenstudio ui --port 8081 -``` - -This will start the application on the specified port. Open your web browser and go to `http://localhost:8081/` to begin using AutoGen Studio. - -Now that you have AutoGen Studio installed and running, you are ready to explore its capabilities, including defining and modifying agent workflows, interacting with agents and sessions, and expanding agent skills. - -## What Can You Do with AutoGen Studio? - -The AutoGen Studio UI is organized into 3 high level sections - **Build**, **Playground**, and **Gallery**. - -### Build - -![Specify Agents.](img/autogenstudio_config.png) - -This section focuses on defining the properties of agents and agent workflows. It includes the following concepts: - -**Skills**: Skills are functions (e.g., Python functions) that describe how to solve a task. In general, a good skill has a descriptive name (e.g. `generate_images`), extensive docstrings and good defaults (e.g., writing out files to disk for persistence and reuse). You can add new skills to AutoGen Studio via the provided UI. At inference time, these skills are made available to the assistant agent as they address your tasks. - -![View and add skills.](img/autogenstudio_skills.png) - -

- - AutoGen Studio Build View: View, add or edit skills that an agent can - leverage in addressing tasks. - -

- -**Agents**: This provides an interface to declaratively specify properties for an AutoGen agent (mirrors most of the members of a base [AutoGen conversable agent](https://github.com/microsoft/autogen/blob/main/autogen/agentchat/conversable_agent.py) class). - -**Agent Workflows**: An agent workflow is a specification of a set of agents that can work together to accomplish a task. The simplest version of this is a setup with two agents – a user proxy agent (that represents a user i.e. it compiles code and prints result) and an assistant that can address task requests (e.g., generating plans, writing code, evaluating responses, proposing error recovery steps, etc.). A more complex flow could be a group chat where even more agents work towards a solution. - -### Playground - -![AutoGen Studio Playground View: Solving a task with multiple agents that generate a pdf document with images.](img/autogenstudio_home.png) - -

- - AutoGen Studio Playground View: Agents collaborate, use available skills - (ability to generate images) to address a user task (generate pdf's). - -

- -The playground section is focused on interacting with agent workflows defined in the previous build section. It includes the following concepts: - -**Session**: A session refers to a period of continuous interaction or engagement with an agent workflow, typically characterized by a sequence of activities or operations aimed at achieving specific objectives. It includes the agent workflow configuration, the interactions between the user and the agents. A session can be “published” to a “gallery”. - -**Chat View**: A chat is a sequence of interactions between a user and an agent. It is a part of a session. - -### Gallery - -This section is focused on sharing and reusing artifacts (e.g., workflow configurations, sessions, etc.). - -AutoGen Studio comes with 3 example skills: `fetch_profile`, `find_papers`, `generate_images`. Please feel free to review the repo to learn more about how they work. - -## The AutoGen Studio API - -While AutoGen Studio is a web interface, it is powered by an underlying python API that is reusable and modular. Importantly, we have implemented an API where agent workflows can be declaratively specified (in JSON), loaded and run. An example of the current API is shown below. Please consult the [AutoGen Studio repo](https://github.com/microsoft/autogen/tree/main/samples/apps/autogen-studio) for more details. - -```python -import json -from autogenstudio import AutoGenWorkFlowManager, AgentWorkFlowConfig - -# load an agent specification in JSON -agent_spec = json.load(open('agent_spec.json')) - -# Create an AutoGen Workflow Configuration from the agent specification -agent_work_flow_config = FlowConfig(**agent_spec) - -# Create a Workflow from the configuration -agent_work_flow = AutoGenWorkFlowManager(agent_work_flow_config) - -# Run the workflow on a task -task_query = "What is the height of the Eiffel Tower?" -agent_work_flow.run(message=task_query) -``` - -## Road Map and Next Steps - -As we continue to develop and refine AutoGen Studio, the road map below outlines an array of enhancements and new features planned for future releases. Here's what users can look forward to: - -- **Complex Agent Workflows**: We're working on integrating support for more sophisticated agent workflows, such as `GroupChat`, allowing for richer interaction between multiple agents or dynamic topologies. -- **Improved User Experience**: This includes features like streaming intermediate model output for real-time feedback, better summarization of agent responses, information on costs of each interaction. We will also invest in improving the workflow for composing and reusing agents. We will also explore support for more interactive human in the loop feedback to agents. -- **Expansion of Agent Skills**: We will work towards improving the workflow for authoring, composing and reusing agent skills. -- **Community Features**: Facilitation of sharing and collaboration within AutoGen Studio user community is a key goal. We're exploring options for sharing sessions and results more easily among users and contributing to a shared repository of skills, agents, and agent workflows. - -## Contribution Guide - -We welcome contributions to AutoGen Studio. We recommend the following general steps to contribute to the project: - -- Review the overall AutoGen project [contribution guide](https://github.com/microsoft/autogen?tab=readme-ov-file#contributing). -- Please review the AutoGen Studio [roadmap](https://github.com/microsoft/autogen/issues/737) to get a sense of the current priorities for the project. Help is appreciated especially with Studio issues tagged with `help-wanted`. -- Please initiate a discussion on the roadmap issue or a new issue to discuss your proposed contribution. -- Please review the autogenstudio dev branch here [dev branch].(https://github.com/microsoft/autogen/tree/autogenstudio) and use as a base for your contribution. This way, your contribution will be aligned with the latest changes in the AutoGen Studio project. -- Submit a pull request with your contribution! -- If you are modifying AutoGen Studio in vscode, it has its own devcontainer to simplify dev work. See instructions in `.devcontainer/README.md` on how to use it. -- Please use the tag `studio` for any issues, questions, and PRs related to Studio. - -### FAQ - -**Q: Where can I adjust the default skills, agent and workflow configurations?** -A: You can modify agent configurations directly from the UI or by editing the `autogentstudio/utils/dbdefaults.json` file which is used to initialize the database. - -**Q: If I want to reset the entire conversation with an agent, how do I go about it?** -A: To reset your conversation history, you can delete the `database.sqlite` file. If you need to clear user-specific data, remove the relevant `autogenstudio/web/files/user/` folder. - -**Q: Is it possible to view the output and messages generated by the agents during interactions?** -A: Yes, you can view the generated messages in the debug console of the web UI, providing insights into the agent interactions. Alternatively, you can inspect the `database.sqlite` file for a comprehensive record of messages. - -**Q: Where can I find documentation and support for AutoGen Studio?** -A: We are constantly working to improve AutoGen Studio. For the latest updates, please refer to the [AutoGen Studio Readme](https://github.com/microsoft/autogen/tree/main/samples/apps/autogen-studio). For additional support, please open an issue on [GitHub](https://github.com/microsoft/autogen) or ask questions on [Discord](https://aka.ms/autogen-dc). - -**Q: Can I use Other Models with AutoGen Studio?** -Yes. AutoGen standardizes on the openai model api format, and you can use any api server that offers an openai compliant endpoint. In the AutoGen Studio UI, each agent has an `llm_config` field where you can input your model endpoint details including `model name`, `api key`, `base url`, `model type` and `api version`. For Azure OpenAI models, you can find these details in the Azure portal. Note that for Azure OpenAI, the `model name` is the deployment id or engine, and the `model type` is "azure". -For other OSS models, we recommend using a server such as vllm to instantiate an openai compliant endpoint. - -**Q: The Server Starts But I Can't Access the UI** -A: If you are running the server on a remote machine (or a local machine that fails to resolve localhost correstly), you may need to specify the host address. By default, the host address is set to `localhost`. You can specify the host address using the `--host ` argument. For example, to start the server on port 8081 and local address such that it is accessible from other machines on the network, you can run the following command: - -```bash -autogenstudio ui --port 8081 --host 0.0.0.0 -``` - -
+--- +title: "AutoGen Studio: Interactively Explore Multi-Agent Workflows" +authors: + - victordibia + - gagb + - samershi +tags: [AutoGen, UI, web, UX] +--- + +![AutoGen Studio Playground View: Solving a task with multiple agents that generate a pdf document with images.](img/autogenstudio_home.png) + +

+ + AutoGen Studio: Solving a task with multiple agents that generate a pdf + document with images. + +

+ +## TL;DR + +To help you rapidly prototype multi-agent solutions for your tasks, we are introducing AutoGen Studio, an interface powered by [AutoGen](https://github.com/microsoft/autogen/tree/main/autogen). It allows you to: + +- Declaratively define and modify agents and multi-agent workflows through a point and click, drag and drop interface (e.g., you can select the parameters of two agents that will communicate to solve your task). +- Use our UI to create chat sessions with the specified agents and view results (e.g., view chat history, generated files, and time taken). +- Explicitly add skills to your agents and accomplish more tasks. +- Publish your sessions to a local gallery. + + +See the official AutoGen Studio documentation [here](https://microsoft.github.io/autogen/docs/autogen-studio/getting-started) for more details. + +AutoGen Studio is open source [code here](https://github.com/microsoft/autogen/tree/main/samples/apps/autogen-studio), and can be installed via pip. Give it a try! + +```bash +pip install autogenstudio +``` + +## Introduction + +The accelerating pace of technology has ushered us into an era where digital assistants (or agents) are becoming integral to our lives. [AutoGen](https://github.com/microsoft/autogen/tree/main/autogen) has emerged as a leading framework for orchestrating the power of agents. In the spirit of expanding this frontier and democratizing this capability, we are thrilled to introduce a new user-friendly interface: **AutoGen Studio**. + +With AutoGen Studio, users can rapidly create, manage, and interact with agents that can learn, adapt, and collaborate. As we release this interface into the open-source community, our ambition is not only to enhance productivity but to inspire a level of personalized interaction between humans and agents. + +> **Note**: AutoGen Studio is meant to help you rapidly prototype multi-agent workflows and demonstrate an example of end user interfaces built with AutoGen. It is not meant to be a production-ready app. + +## Getting Started with AutoGen Studio + +The following guide will help you get AutoGen Studio up and running on your system. + +### Configuring an LLM Provider + +To get started, you need access to a language model. You can get this set up by following the steps in the AutoGen documentation [here](https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints). Configure your environment with either `OPENAI_API_KEY` or `AZURE_OPENAI_API_KEY`. + +For example, in your terminal, you would set the API key like this: + +```bash +export OPENAI_API_KEY= +``` + +You can also specify the model directly in the agent's configuration as shown below. + +```python +llm_config = LLMConfig( + config_list=[{ + "model": "gpt-4", + "api_key": "", + "base_url": "", + "api_type": "azure", + "api_version": "2024-02-01" + }], + temperature=0, +) +``` + +### Installation + +There are two ways to install AutoGen Studio - from PyPi or from source. We **recommend installing from PyPi** unless you plan to modify the source code. + +1. **Install from PyPi** + + We recommend using a virtual environment (e.g., conda) to avoid conflicts with existing Python packages. With Python 3.10 or newer active in your virtual environment, use pip to install AutoGen Studio: + + ```bash + pip install autogenstudio + ``` + +2. **Install from Source** + + > Note: This approach requires some familiarity with building interfaces in React. + + If you prefer to install from source, ensure you have Python 3.10+ and Node.js (version above 14.15.0) installed. Here's how you get started: + + - Clone the AutoGen Studio repository and install its Python dependencies: + + ```bash + pip install -e . + ``` + + - Navigate to the `samples/apps/autogen-studio/frontend` directory, install dependencies, and build the UI: + + ```bash + npm install -g gatsby-cli + npm install --global yarn + yarn install + yarn build + ``` + + For Windows users, to build the frontend, you may need alternative commands provided in the [autogen studio readme](https://github.com/microsoft/autogen/tree/main/samples/apps/autogen-studio). + +### Running the Application + +Once installed, run the web UI by entering the following in your terminal: + +```bash +autogenstudio ui --port 8081 +``` + +This will start the application on the specified port. Open your web browser and go to `http://localhost:8081/` to begin using AutoGen Studio. + +Now that you have AutoGen Studio installed and running, you are ready to explore its capabilities, including defining and modifying agent workflows, interacting with agents and sessions, and expanding agent skills. + +## What Can You Do with AutoGen Studio? + +The AutoGen Studio UI is organized into 3 high level sections - **Build**, **Playground**, and **Gallery**. + +### Build + +![Specify Agents.](img/autogenstudio_config.png) + +This section focuses on defining the properties of agents and agent workflows. It includes the following concepts: + +**Skills**: Skills are functions (e.g., Python functions) that describe how to solve a task. In general, a good skill has a descriptive name (e.g. `generate_images`), extensive docstrings and good defaults (e.g., writing out files to disk for persistence and reuse). You can add new skills to AutoGen Studio via the provided UI. At inference time, these skills are made available to the assistant agent as they address your tasks. + +![View and add skills.](img/autogenstudio_skills.png) + +

+ + AutoGen Studio Build View: View, add or edit skills that an agent can + leverage in addressing tasks. + +

+ +**Agents**: This provides an interface to declaratively specify properties for an AutoGen agent (mirrors most of the members of a base [AutoGen conversable agent](https://github.com/microsoft/autogen/blob/main/autogen/agentchat/conversable_agent.py) class). + +**Agent Workflows**: An agent workflow is a specification of a set of agents that can work together to accomplish a task. The simplest version of this is a setup with two agents – a user proxy agent (that represents a user i.e. it compiles code and prints result) and an assistant that can address task requests (e.g., generating plans, writing code, evaluating responses, proposing error recovery steps, etc.). A more complex flow could be a group chat where even more agents work towards a solution. + +### Playground + +![AutoGen Studio Playground View: Solving a task with multiple agents that generate a pdf document with images.](img/autogenstudio_home.png) + +

+ + AutoGen Studio Playground View: Agents collaborate, use available skills + (ability to generate images) to address a user task (generate pdf's). + +

+ +The playground section is focused on interacting with agent workflows defined in the previous build section. It includes the following concepts: + +**Session**: A session refers to a period of continuous interaction or engagement with an agent workflow, typically characterized by a sequence of activities or operations aimed at achieving specific objectives. It includes the agent workflow configuration, the interactions between the user and the agents. A session can be “published” to a “gallery”. + +**Chat View**: A chat is a sequence of interactions between a user and an agent. It is a part of a session. + +### Gallery + +This section is focused on sharing and reusing artifacts (e.g., workflow configurations, sessions, etc.). + +AutoGen Studio comes with 3 example skills: `fetch_profile`, `find_papers`, `generate_images`. Please feel free to review the repo to learn more about how they work. + +## The AutoGen Studio API + +While AutoGen Studio is a web interface, it is powered by an underlying python API that is reusable and modular. Importantly, we have implemented an API where agent workflows can be declaratively specified (in JSON), loaded and run. An example of the current API is shown below. Please consult the [AutoGen Studio repo](https://github.com/microsoft/autogen/tree/main/samples/apps/autogen-studio) for more details. + +```python +import json +from autogenstudio import AutoGenWorkFlowManager, AgentWorkFlowConfig + +# load an agent specification in JSON +agent_spec = json.load(open('agent_spec.json')) + +# Create an AutoGen Workflow Configuration from the agent specification +agent_work_flow_config = FlowConfig(**agent_spec) + +# Create a Workflow from the configuration +agent_work_flow = AutoGenWorkFlowManager(agent_work_flow_config) + +# Run the workflow on a task +task_query = "What is the height of the Eiffel Tower?" +agent_work_flow.run(message=task_query) +``` + +## Road Map and Next Steps + +As we continue to develop and refine AutoGen Studio, the road map below outlines an array of enhancements and new features planned for future releases. Here's what users can look forward to: + +- **Complex Agent Workflows**: We're working on integrating support for more sophisticated agent workflows, such as `GroupChat`, allowing for richer interaction between multiple agents or dynamic topologies. +- **Improved User Experience**: This includes features like streaming intermediate model output for real-time feedback, better summarization of agent responses, information on costs of each interaction. We will also invest in improving the workflow for composing and reusing agents. We will also explore support for more interactive human in the loop feedback to agents. +- **Expansion of Agent Skills**: We will work towards improving the workflow for authoring, composing and reusing agent skills. +- **Community Features**: Facilitation of sharing and collaboration within AutoGen Studio user community is a key goal. We're exploring options for sharing sessions and results more easily among users and contributing to a shared repository of skills, agents, and agent workflows. + +## Contribution Guide + +We welcome contributions to AutoGen Studio. We recommend the following general steps to contribute to the project: + +- Review the overall AutoGen project [contribution guide](https://github.com/microsoft/autogen?tab=readme-ov-file#contributing). +- Please review the AutoGen Studio [roadmap](https://github.com/microsoft/autogen/issues/737) to get a sense of the current priorities for the project. Help is appreciated especially with Studio issues tagged with `help-wanted`. +- Please initiate a discussion on the roadmap issue or a new issue to discuss your proposed contribution. +- Please review the autogenstudio dev branch here [dev branch].(https://github.com/microsoft/autogen/tree/autogenstudio) and use as a base for your contribution. This way, your contribution will be aligned with the latest changes in the AutoGen Studio project. +- Submit a pull request with your contribution! +- If you are modifying AutoGen Studio in vscode, it has its own devcontainer to simplify dev work. See instructions in `.devcontainer/README.md` on how to use it. +- Please use the tag `studio` for any issues, questions, and PRs related to Studio. + +### FAQ + +**Q: Where can I adjust the default skills, agent and workflow configurations?** +A: You can modify agent configurations directly from the UI or by editing the `autogentstudio/utils/dbdefaults.json` file which is used to initialize the database. + +**Q: If I want to reset the entire conversation with an agent, how do I go about it?** +A: To reset your conversation history, you can delete the `database.sqlite` file. If you need to clear user-specific data, remove the relevant `autogenstudio/web/files/user/` folder. + +**Q: Is it possible to view the output and messages generated by the agents during interactions?** +A: Yes, you can view the generated messages in the debug console of the web UI, providing insights into the agent interactions. Alternatively, you can inspect the `database.sqlite` file for a comprehensive record of messages. + +**Q: Where can I find documentation and support for AutoGen Studio?** +A: We are constantly working to improve AutoGen Studio. For the latest updates, please refer to the [AutoGen Studio Readme](https://github.com/microsoft/autogen/tree/main/samples/apps/autogen-studio). For additional support, please open an issue on [GitHub](https://github.com/microsoft/autogen) or ask questions on [Discord](https://aka.ms/autogen-dc). + +**Q: Can I use Other Models with AutoGen Studio?** +Yes. AutoGen standardizes on the openai model api format, and you can use any api server that offers an openai compliant endpoint. In the AutoGen Studio UI, each agent has an `llm_config` field where you can input your model endpoint details including `model name`, `api key`, `base url`, `model type` and `api version`. For Azure OpenAI models, you can find these details in the Azure portal. Note that for Azure OpenAI, the `model name` is the deployment id or engine, and the `model type` is "azure". +For other OSS models, we recommend using a server such as vllm to instantiate an openai compliant endpoint. + +**Q: The Server Starts But I Can't Access the UI** +A: If you are running the server on a remote machine (or a local machine that fails to resolve localhost correstly), you may need to specify the host address. By default, the host address is set to `localhost`. You can specify the host address using the `--host ` argument. For example, to start the server on port 8081 and local address such that it is accessible from other machines on the network, you can run the following command: + +```bash +autogenstudio ui --port 8081 --host 0.0.0.0 +``` + +
diff --git a/website/blog/2023-12-29-AgentDescriptions/index.mdx b/website/blog/2023-12-29-AgentDescriptions/index.mdx index 0471d545dc4..f1201c6f956 100644 --- a/website/blog/2023-12-29-AgentDescriptions/index.mdx +++ b/website/blog/2023-12-29-AgentDescriptions/index.mdx @@ -1,139 +1,139 @@ ---- -title: "All About Agent Descriptions" -authors: - - afourney -tags: [AutoGen] ---- - - -## TL;DR - -AutoGen 0.2.2 introduces a [description](https://microsoft.github.io/autogen/docs/reference/agentchat/conversable_agent#__init__) field to ConversableAgent (and all subclasses), and changes GroupChat so that it uses agent `description`s rather than `system_message`s when choosing which agents should speak next. - -This is expected to simplify GroupChat’s job, improve orchestration, and make it easier to implement new GroupChat or GroupChat-like alternatives. - -If you are a developer, and things were already working well for you, no action is needed -- backward compatibility is ensured because the `description` field defaults to the `system_message` when no description is provided. - -However, if you were struggling with getting GroupChat to work, you can now try updating the `description` field. - -## Introduction - -As AutoGen matures and developers build increasingly complex combinations of agents, orchestration is becoming an important capability. At present, [GroupChat](https://microsoft.github.io/autogen/docs/reference/agentchat/groupchat#groupchat-objects) and the [GroupChatManager](https://microsoft.github.io/autogen/docs/reference/agentchat/groupchat#groupchatmanager-objects) are the main built-in tools for orchestrating conversations between 3 or more agents. For orchestrators like GroupChat to work well, they need to know something about each agent so that they can decide who should speak and when. Prior to AutoGen 0.2.2, GroupChat relied on each agent's `system_message` and `name` to learn about each participating agent. This is likely fine when the system prompt is short and sweet, but can lead to problems when the instructions are very long (e.g., with the [AssistantAgent](https://microsoft.github.io/autogen/docs/reference/agentchat/assistant_agent)), or non-existent (e.g., with the [UserProxyAgent](https://microsoft.github.io/autogen/docs/reference/agentchat/user_proxy_agent)). - -AutoGen 0.2.2 introduces a [description](https://microsoft.github.io/autogen/docs/reference/agentchat/conversable_agent#__init__) field to all agents, and replaces the use of the `system_message` for orchestration in GroupChat and all future orchestrators. The `description` field defaults to the `system_message` to ensure backwards compatibility, so you may not need to change anything with your code if things are working well for you. However, if you were struggling with GroupChat, give setting the `description` field a try. - -The remainder of this post provides an example of how using the `description` field simplifies GroupChat's job, provides some evidence of its effectiveness, and provides tips for writing good descriptions. - -## Example - -The current GroupChat orchestration system prompt has the following template: - -``` -You are in a role play game. The following roles are available: - -{self._participant_roles(agents)}. - -Read the following conversation. -Then select the next role from {[agent.name for agent in agents]} to play. Only return the role. -``` - -Suppose that you wanted to include 3 agents: A UserProxyAgent, an AssistantAgent, and perhaps a GuardrailsAgent. - -Prior to 0.2.2, this template would expand to: - -``` -You are in a role play game. The following roles are available: - -assistant: You are a helpful AI assistant. -Solve tasks using your coding and language skills. -In the following cases, suggest python code (in a python coding block) or shell script (in a sh coding block) for the user to execute. -1. When you need to collect info, use the code to output the info you need, for example, browse or search the web, download/read a file, print the content of a webpage or a file, get the current date/time, check the operating system. After sufficient info is printed and the task is ready to be solved based on your language skill, you can solve the task by yourself. -2. When you need to perform some task with code, use the code to perform the task and output the result. Finish the task smartly. -Solve the task step by step if you need to. If a plan is not provided, explain your plan first. Be clear which step uses code, and which step uses your language skill. -When using code, you must indicate the script type in the code block. The user cannot provide any other feedback or perform any other action beyond executing the code you suggest. The user can't modify your code. So do not suggest incomplete code which requires users to modify. Don't use a code block if it's not intended to be executed by the user. -If you want the user to save the code in a file before executing it, put # filename: inside the code block as the first line. Don't include multiple code blocks in one response. Do not ask users to copy and paste the result. Instead, use 'print' function for the output when relevant. Check the execution result returned by the user. -If the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try. -When you find an answer, verify the answer carefully. Include verifiable evidence in your response if possible. -Reply "TERMINATE" in the end when everything is done. -user_proxy: -guardrails_agent: You are a guardrails agent and are tasked with ensuring that all parties adhere to the following responsible AI policies: -- You MUST TERMINATE the conversation if it involves writing or running HARMFUL or DESTRUCTIVE code. -- You MUST TERMINATE the conversation if it involves discussions of anything relating to hacking, computer exploits, or computer security. -- You MUST TERMINATE the conversation if it involves violent or graphic content such as Harm to Others, Self-Harm, Suicide. -- You MUST TERMINATE the conversation if it involves demeaning speech, hate speech, discriminatory remarks, or any form of harassment based on race, gender, sexuality, religion, nationality, disability, or any other protected characteristic. -- You MUST TERMINATE the conversation if it involves seeking or giving advice in highly regulated domains such as medical advice, mental health, legal advice or financial advice -- You MUST TERMINATE the conversation if it involves illegal activities including when encouraging or providing guidance on illegal activities. -- You MUST TERMINATE the conversation if it involves manipulative or deceptive Content including scams, phishing and spread false information. -- You MUST TERMINATE the conversation if it involves involve sexually explicit content or discussions. -- You MUST TERMINATE the conversation if it involves sharing or soliciting personal, sensitive, or confidential information from users. This includes financial details, health records, and other private matters. -- You MUST TERMINATE the conversation if it involves deep personal problems such as dealing with serious personal issues, mental health concerns, or crisis situations. -If you decide that the conversation must be terminated, explain your reasoning then output the uppercase word "TERMINATE". If, on the other hand, you decide the conversation is acceptable by the above standards, indicate as much, then ask the other parties to proceed. - -Read the following conversation. -Then select the next role from [assistant, user_proxy, guardrails_agent] to play. Only return the role. - -``` - -As you can see, this description is super confusing: - -- It is hard to make out where each agent's role-description ends -- `You` appears numerous times, and refers to three separate agents (GroupChatManager, AssistantAgent, and GuardrailsAgent) -- It takes a lot of tokens! - -Consequently, it's not hard to see why the GroupChat manager sometimes struggles with this orchestration task. - -With AutoGen 0.2.2 onward, GroupChat instead relies on the description field. With a description field the orchestration prompt becomes: - -``` -You are in a role play game. The following roles are available: - -assistant: A helpful and general-purpose AI assistant that has strong language skills, Python skills, and Linux command line skills. -user_proxy: A user that can run Python code or input command line commands at a Linux terminal and report back the execution results. -guradrails_agent: An agent that ensures the conversation conforms to responsible AI guidelines. - -Read the following conversation. -Then select the next role from [assistant, user_proxy, guardrails_agent] to play. Only return the role. -``` - -This is much easier to parse and understand, and it doesn't use nearly as many tokens. Moreover, the following experiment provides early evidence that it works. - -## An Experiment with Distraction - -To illustrate the impact of the `description` field, we set up a three-agent experiment with a reduced 26-problem subset of the HumanEval benchmark. Here, three agents were added to a GroupChat to solve programming problems. The three agents were: - -- Coder (default Assistant prompt) -- UserProxy (configured to execute code) -- ExecutiveChef (added as a distraction) - -The Coder and UserProxy used the AssistantAgent and UserProxy defaults (provided above), while the ExecutiveChef was given the system prompt: - -``` -You are an executive chef with 28 years of industry experience. You can answer questions about menu planning, meal preparation, and cooking techniques. -``` - -The ExecutiveChef is clearly the distractor here -- given that no HumanEval problems are food-related, the GroupChat should rarely consult with the chef. However, when configured with GPT-3.5-turbo-16k, we can clearly see the GroupChat struggling with orchestration: - -#### With versions prior to 0.2.2, using `system_message`: - -- The Agents solve 3 out of 26 problems on their first turn -- The ExecutiveChef is called upon 54 times! (almost as much as the Coder at 68 times) - -#### With version 0.2.2, using `description`: - -- The Agents solve 7 out of 26 problems on the first turn -- The ExecutiveChef is called upon 27 times! (versus 84 times for the Coder) - -Using the `description` field doubles performance on this task and halves the incidence of calling upon the distractor agent. - -## Tips for Writing Good Descriptions -Since `descriptions` serve a different purpose than `system_message`s, it is worth reviewing what makes a good agent description. While descriptions are new, the following tips appear to lead to good results: - -- Avoid using the 1st or 2nd person perspective. Descriptions should not contain "I" or "You", unless perhaps "You" is in reference to the GroupChat / orchestrator -- Include any details that might help the orchestrator know when to call upon the agent -- Keep descriptions short (e.g., "A helpful AI assistant with strong natural language and Python coding skills."). - -The main thing to remember is that **the description is for the benefit of the GroupChatManager, not for the Agent's own use or instruction**. - -## Conclusion - -AutoGen 0.2.2 introduces a `description`, becoming the main way agents describe themselves to orchestrators like GroupChat. Since the `description` defaults to the `system_message`, there's nothing you need to change if you were already satisfied with how your group chats were working. However, we expect this feature to generally improve orchestration, so please consider experimenting with the `description` field if you are struggling with GroupChat or want to boost performance. +--- +title: "All About Agent Descriptions" +authors: + - afourney +tags: [AutoGen] +--- + + +## TL;DR + +AutoGen 0.2.2 introduces a [description](https://microsoft.github.io/autogen/docs/reference/agentchat/conversable_agent#__init__) field to ConversableAgent (and all subclasses), and changes GroupChat so that it uses agent `description`s rather than `system_message`s when choosing which agents should speak next. + +This is expected to simplify GroupChat’s job, improve orchestration, and make it easier to implement new GroupChat or GroupChat-like alternatives. + +If you are a developer, and things were already working well for you, no action is needed -- backward compatibility is ensured because the `description` field defaults to the `system_message` when no description is provided. + +However, if you were struggling with getting GroupChat to work, you can now try updating the `description` field. + +## Introduction + +As AutoGen matures and developers build increasingly complex combinations of agents, orchestration is becoming an important capability. At present, [GroupChat](https://microsoft.github.io/autogen/docs/reference/agentchat/groupchat#groupchat-objects) and the [GroupChatManager](https://microsoft.github.io/autogen/docs/reference/agentchat/groupchat#groupchatmanager-objects) are the main built-in tools for orchestrating conversations between 3 or more agents. For orchestrators like GroupChat to work well, they need to know something about each agent so that they can decide who should speak and when. Prior to AutoGen 0.2.2, GroupChat relied on each agent's `system_message` and `name` to learn about each participating agent. This is likely fine when the system prompt is short and sweet, but can lead to problems when the instructions are very long (e.g., with the [AssistantAgent](https://microsoft.github.io/autogen/docs/reference/agentchat/assistant_agent)), or non-existent (e.g., with the [UserProxyAgent](https://microsoft.github.io/autogen/docs/reference/agentchat/user_proxy_agent)). + +AutoGen 0.2.2 introduces a [description](https://microsoft.github.io/autogen/docs/reference/agentchat/conversable_agent#__init__) field to all agents, and replaces the use of the `system_message` for orchestration in GroupChat and all future orchestrators. The `description` field defaults to the `system_message` to ensure backwards compatibility, so you may not need to change anything with your code if things are working well for you. However, if you were struggling with GroupChat, give setting the `description` field a try. + +The remainder of this post provides an example of how using the `description` field simplifies GroupChat's job, provides some evidence of its effectiveness, and provides tips for writing good descriptions. + +## Example + +The current GroupChat orchestration system prompt has the following template: + +``` +You are in a role play game. The following roles are available: + +{self._participant_roles(agents)}. + +Read the following conversation. +Then select the next role from {[agent.name for agent in agents]} to play. Only return the role. +``` + +Suppose that you wanted to include 3 agents: A UserProxyAgent, an AssistantAgent, and perhaps a GuardrailsAgent. + +Prior to 0.2.2, this template would expand to: + +``` +You are in a role play game. The following roles are available: + +assistant: You are a helpful AI assistant. +Solve tasks using your coding and language skills. +In the following cases, suggest python code (in a python coding block) or shell script (in a sh coding block) for the user to execute. +1. When you need to collect info, use the code to output the info you need, for example, browse or search the web, download/read a file, print the content of a webpage or a file, get the current date/time, check the operating system. After sufficient info is printed and the task is ready to be solved based on your language skill, you can solve the task by yourself. +2. When you need to perform some task with code, use the code to perform the task and output the result. Finish the task smartly. +Solve the task step by step if you need to. If a plan is not provided, explain your plan first. Be clear which step uses code, and which step uses your language skill. +When using code, you must indicate the script type in the code block. The user cannot provide any other feedback or perform any other action beyond executing the code you suggest. The user can't modify your code. So do not suggest incomplete code which requires users to modify. Don't use a code block if it's not intended to be executed by the user. +If you want the user to save the code in a file before executing it, put # filename: inside the code block as the first line. Don't include multiple code blocks in one response. Do not ask users to copy and paste the result. Instead, use 'print' function for the output when relevant. Check the execution result returned by the user. +If the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try. +When you find an answer, verify the answer carefully. Include verifiable evidence in your response if possible. +Reply "TERMINATE" in the end when everything is done. +user_proxy: +guardrails_agent: You are a guardrails agent and are tasked with ensuring that all parties adhere to the following responsible AI policies: +- You MUST TERMINATE the conversation if it involves writing or running HARMFUL or DESTRUCTIVE code. +- You MUST TERMINATE the conversation if it involves discussions of anything relating to hacking, computer exploits, or computer security. +- You MUST TERMINATE the conversation if it involves violent or graphic content such as Harm to Others, Self-Harm, Suicide. +- You MUST TERMINATE the conversation if it involves demeaning speech, hate speech, discriminatory remarks, or any form of harassment based on race, gender, sexuality, religion, nationality, disability, or any other protected characteristic. +- You MUST TERMINATE the conversation if it involves seeking or giving advice in highly regulated domains such as medical advice, mental health, legal advice or financial advice +- You MUST TERMINATE the conversation if it involves illegal activities including when encouraging or providing guidance on illegal activities. +- You MUST TERMINATE the conversation if it involves manipulative or deceptive Content including scams, phishing and spread false information. +- You MUST TERMINATE the conversation if it involves involve sexually explicit content or discussions. +- You MUST TERMINATE the conversation if it involves sharing or soliciting personal, sensitive, or confidential information from users. This includes financial details, health records, and other private matters. +- You MUST TERMINATE the conversation if it involves deep personal problems such as dealing with serious personal issues, mental health concerns, or crisis situations. +If you decide that the conversation must be terminated, explain your reasoning then output the uppercase word "TERMINATE". If, on the other hand, you decide the conversation is acceptable by the above standards, indicate as much, then ask the other parties to proceed. + +Read the following conversation. +Then select the next role from [assistant, user_proxy, guardrails_agent] to play. Only return the role. + +``` + +As you can see, this description is super confusing: + +- It is hard to make out where each agent's role-description ends +- `You` appears numerous times, and refers to three separate agents (GroupChatManager, AssistantAgent, and GuardrailsAgent) +- It takes a lot of tokens! + +Consequently, it's not hard to see why the GroupChat manager sometimes struggles with this orchestration task. + +With AutoGen 0.2.2 onward, GroupChat instead relies on the description field. With a description field the orchestration prompt becomes: + +``` +You are in a role play game. The following roles are available: + +assistant: A helpful and general-purpose AI assistant that has strong language skills, Python skills, and Linux command line skills. +user_proxy: A user that can run Python code or input command line commands at a Linux terminal and report back the execution results. +guradrails_agent: An agent that ensures the conversation conforms to responsible AI guidelines. + +Read the following conversation. +Then select the next role from [assistant, user_proxy, guardrails_agent] to play. Only return the role. +``` + +This is much easier to parse and understand, and it doesn't use nearly as many tokens. Moreover, the following experiment provides early evidence that it works. + +## An Experiment with Distraction + +To illustrate the impact of the `description` field, we set up a three-agent experiment with a reduced 26-problem subset of the HumanEval benchmark. Here, three agents were added to a GroupChat to solve programming problems. The three agents were: + +- Coder (default Assistant prompt) +- UserProxy (configured to execute code) +- ExecutiveChef (added as a distraction) + +The Coder and UserProxy used the AssistantAgent and UserProxy defaults (provided above), while the ExecutiveChef was given the system prompt: + +``` +You are an executive chef with 28 years of industry experience. You can answer questions about menu planning, meal preparation, and cooking techniques. +``` + +The ExecutiveChef is clearly the distractor here -- given that no HumanEval problems are food-related, the GroupChat should rarely consult with the chef. However, when configured with GPT-3.5-turbo-16k, we can clearly see the GroupChat struggling with orchestration: + +#### With versions prior to 0.2.2, using `system_message`: + +- The Agents solve 3 out of 26 problems on their first turn +- The ExecutiveChef is called upon 54 times! (almost as much as the Coder at 68 times) + +#### With version 0.2.2, using `description`: + +- The Agents solve 7 out of 26 problems on the first turn +- The ExecutiveChef is called upon 27 times! (versus 84 times for the Coder) + +Using the `description` field doubles performance on this task and halves the incidence of calling upon the distractor agent. + +## Tips for Writing Good Descriptions +Since `descriptions` serve a different purpose than `system_message`s, it is worth reviewing what makes a good agent description. While descriptions are new, the following tips appear to lead to good results: + +- Avoid using the 1st or 2nd person perspective. Descriptions should not contain "I" or "You", unless perhaps "You" is in reference to the GroupChat / orchestrator +- Include any details that might help the orchestrator know when to call upon the agent +- Keep descriptions short (e.g., "A helpful AI assistant with strong natural language and Python coding skills."). + +The main thing to remember is that **the description is for the benefit of the GroupChatManager, not for the Agent's own use or instruction**. + +## Conclusion + +AutoGen 0.2.2 introduces a `description`, becoming the main way agents describe themselves to orchestrators like GroupChat. Since the `description` defaults to the `system_message`, there's nothing you need to change if you were already satisfied with how your group chats were working. However, we expect this feature to generally improve orchestration, so please consider experimenting with the `description` field if you are struggling with GroupChat or want to boost performance. diff --git a/website/blog/2024-01-23-Code-execution-in-docker/index.mdx b/website/blog/2024-01-23-Code-execution-in-docker/index.mdx index 067f16210ce..f729e37a126 100644 --- a/website/blog/2024-01-23-Code-execution-in-docker/index.mdx +++ b/website/blog/2024-01-23-Code-execution-in-docker/index.mdx @@ -33,7 +33,7 @@ user_proxy.initiate_chat(assistant, message="Plot a chart of NVDA and TESLA stoc To opt out of from this default behaviour there are some options. -### Diasable code execution entirely +### Disable code execution entirely - Set `code_execution_config` to `False` for each code-execution agent. E.g.: diff --git a/website/blog/2024-01-25-AutoGenBench/index.mdx b/website/blog/2024-01-25-AutoGenBench/index.mdx index 28cdcd8e6a5..3f4b2d4f216 100644 --- a/website/blog/2024-01-25-AutoGenBench/index.mdx +++ b/website/blog/2024-01-25-AutoGenBench/index.mdx @@ -1,148 +1,148 @@ ---- -title: "AutoGenBench -- A Tool for Measuring and Evaluating AutoGen Agents" -authors: - - afourney - - qingyunwu -tags: [AutoGen] ---- - -![AutoGenBench](img/teaser.jpg) - -

- - AutoGenBench is a standalone tool for evaluating AutoGen agents and - workflows on common benchmarks. - -

- -## TL;DR - -Today we are releasing AutoGenBench - a tool for evaluating AutoGen agents and workflows on established LLM and agentic benchmarks. - -AutoGenBench is a standalone command line tool, installable from PyPI, which handles downloading, configuring, running, and reporting supported benchmarks. AutoGenBench works best when run alongside Docker, since it uses Docker to isolate tests from one another. - -- See the [AutoGenBench README](https://github.com/microsoft/autogen/blob/main/samples/tools/autogenbench/README.md) for information on installation and running benchmarks. -- See the [AutoGenBench CONTRIBUTING guide](https://github.com/microsoft/autogen/blob/main/samples/tools/autogenbench/CONTRIBUTING.md) for information on developing or contributing benchmark datasets. - -### Quick Start - -Get started quickly by running the following commands in a bash terminal. - -_Note:_ You may need to adjust the path to the `OAI_CONFIG_LIST`, as appropriate. - -```sh -export OAI_CONFIG_LIST=$(cat ./OAI_CONFIG_LIST) -pip install autogenbench -autogenbench clone HumanEval -cd HumanEval -cat README.md -autogenbench run --subsample 0.1 --repeat 3 Tasks/human_eval_two_agents.jsonl -autogenbench tabulate Results/human_eval_two_agents -``` - -## Introduction - -Measurement and evaluation are core components of every major AI or ML research project. The same is true for AutoGen. To this end, today we are releasing AutoGenBench, a standalone command line tool that we have been using to guide development of AutoGen. Conveniently, AutoGenBench handles: downloading, configuring, running, and reporting results of agents on various public benchmark datasets. In addition to reporting top-line numbers, each AutoGenBench run produces a comprehensive set of logs and telemetry that can be used for debugging, profiling, computing custom metrics, and as input to [AgentEval](https://microsoft.github.io/autogen/blog/2023/11/20/AgentEval). In the remainder of this blog post, we outline core design principles for AutoGenBench (key to understanding its operation); present a guide to installing and running AutoGenBench; outline a roadmap for evaluation; and conclude with an open call for contributions. - -## Design Principles - -AutoGenBench is designed around three core design principles. Knowing these principles will help you understand the tool, its operation and its output. These three principles are: - -- **Repetition:** LLMs are stochastic, and in many cases, so too is the code they write to solve problems. For example, a Python script might call an external search engine, and the results may vary run-to-run. This can lead to variance in agent performance. Repetition is key to measuring and understanding this variance. To this end, AutoGenBench is built from the ground up with an understanding that tasks may be run multiple times, and that variance is a metric we often want to measure. - -- **Isolation:** Agents interact with their worlds in both subtle and overt ways. For example an agent may install a python library or write a file to disk. This can lead to ordering effects that can impact future measurements. Consider, for example, comparing two agents on a common benchmark. One agent may appear more efficient than the other simply because it ran second, and benefitted from the hard work the first agent did in installing and debugging necessary Python libraries. To address this, AutoGenBench isolates each task in its own Docker container. This ensures that all runs start with the same initial conditions. (Docker is also a _much safer way to run agent-produced code_, in general.) - -- **Instrumentation:** While top-line metrics are great for comparing agents or models, we often want much more information about how the agents are performing, where they are getting stuck, and how they can be improved. We may also later think of new research questions that require computing a different set of metrics. To this end, AutoGenBench is designed to log everything, and to compute metrics from those logs. This ensures that one can always go back to the logs to answer questions about what happened, run profiling software, or feed the logs into tools like [AgentEval](https://microsoft.github.io/autogen/blog/2023/11/20/AgentEval). - -## Installing and Running AutoGenBench - -As noted above, isolation is a key design principle, and so AutoGenBench must be run in an environment where Docker is available (desktop or Engine). **It will not run in GitHub codespaces**, unless you opt for native execution (which is strongly discouraged). To install Docker Desktop see [https://www.docker.com/products/docker-desktop/](https://www.docker.com/products/docker-desktop/). -Once Docker is installed, AutoGenBench can then be installed as a standalone tool from PyPI. With `pip`, installation can be achieved as follows: - -```sh -pip install autogenbench -``` - -After installation, you must configure your API keys. As with other AutoGen applications, AutoGenBench will look for the OpenAI keys in the OAI_CONFIG_LIST file in the current working directory, or the OAI_CONFIG_LIST environment variable. This behavior can be overridden using a command-line parameter. - -If you will be running multiple benchmarks, it is often most convenient to leverage the environment variable option. You can load your keys into the environment variable by executing: - -```sh -export OAI_CONFIG_LIST=$(cat ./OAI_CONFIG_LIST) -``` - -## A Typical Session - -Once AutoGenBench and necessary keys are installed, a typical session will look as follows: - -``` -autogenbench clone HumanEval -cd HumanEval -cat README.md -autogenbench run --subsample 0.1 --repeat 3 Tasks/human_eval_two_agents.jsonl -autogenbench tabulate results/human_eval_two_agents -``` - -Where: - -- `autogenbench clone HumanEval` downloads and expands the HumanEval benchmark scenario. -- `cd HumanEval; cat README.md` navigates to the benchmark directory, and prints the README (which you should always read!) -- `autogenbench run --subsample 0.1 --repeat 3 Tasks/human_eval_two_agents.jsonl` - runs a 10% subsample of the tasks defined in `Tasks/human_eval_two_agents.jsonl`. Each task is run 3 times. -- `autogenbench tabulate results/human_eval_two_agents` tabulates the results of the run. - -After running the above `tabulate` command, you should see output similar to the following: - -``` - Trial 0 Trial 1 Trial 2 -Task Id Success Success Success -------------- --------- --------- --------- -HumanEval_107 False True True -HumanEval_22 True True True -HumanEval_43 True True True -HumanEval_88 True True True -HumanEval_14 True True True -HumanEval_157 True True True -HumanEval_141 True True True -HumanEval_57 True True True -HumanEval_154 True True True -HumanEval_153 True True True -HumanEval_93 False True False -HumanEval_137 True True True -HumanEval_143 True True True -HumanEval_13 True True True -HumanEval_49 True True True -HumanEval_95 True True True -------------- --------- --------- --------- -Successes 14 16 15 -Failures 2 0 1 -Missing 0 0 0 -Total 16 16 16 - -CAUTION: 'autogenbench tabulate' is in early preview. -Please do not cite these values in academic work without first inspecting and verifying the results in the logs yourself. -``` - -From this output we can see the results of the three separate repetitions of each task, and final summary statistics of each run. In this case, the results were generated via GPT-4 (as defined in the OAI_CONFIG_LIST that was provided), and used the `TwoAgents` template. **It is important to remember that AutoGenBench evaluates _specific_ end-to-end configurations of agents (as opposed to evaluating a model or cognitive framework more generally).** - -Finally, complete execution traces and logs can be found in the `Results` folder. See the [AutoGenBench README](https://github.com/microsoft/autogen/blob/main/samples/tools/autogenbench/README.md) for more details about command-line options and output formats. Each of these commands also offers extensive in-line help via: - -- `autogenbench --help` -- `autogenbench clone --help` -- `autogenbench run --help` -- `autogenbench tabulate --help` - -## Roadmap - -While we are announcing AutoGenBench, we note that it is very much an evolving project in its own right. Over the next few weeks and months we hope to: - -- Onboard many additional benchmarks beyond those shipping today -- Greatly improve logging and telemetry -- Introduce new core metrics including total costs, task completion time, conversation turns, etc. -- Provide tighter integration with AgentEval and AutoGen Studio - -For an up to date tracking of our work items on this project, please see [AutoGenBench Work Items](https://github.com/microsoft/autogen/issues/973) - -## Call for Participation - -Finally, we want to end this blog post with an open call for contributions. AutoGenBench is still nascent, and has much opportunity for improvement. New benchmarks are constantly being published, and will need to be added. Everyone may have their own distinct set of metrics that they care most about optimizing, and these metrics should be onboarded. To this end, we welcome any and all contributions to this corner of the AutoGen project. If contributing is something that interests you, please see the [contributor’s guide](https://github.com/microsoft/autogen/blob/main/samples/tools/autogenbench/CONTRIBUTING.md) and join our [Discord](https://aka.ms/autogen-dc) discussion in the [#autogenbench](https://discord.com/channels/1153072414184452236/1199851779328847902) channel! +--- +title: "AutoGenBench -- A Tool for Measuring and Evaluating AutoGen Agents" +authors: + - afourney + - qingyunwu +tags: [AutoGen] +--- + +![AutoGenBench](img/teaser.jpg) + +

+ + AutoGenBench is a standalone tool for evaluating AutoGen agents and + workflows on common benchmarks. + +

+ +## TL;DR + +Today we are releasing AutoGenBench - a tool for evaluating AutoGen agents and workflows on established LLM and agentic benchmarks. + +AutoGenBench is a standalone command line tool, installable from PyPI, which handles downloading, configuring, running, and reporting supported benchmarks. AutoGenBench works best when run alongside Docker, since it uses Docker to isolate tests from one another. + +- See the [AutoGenBench README](https://github.com/microsoft/autogen/blob/main/samples/tools/autogenbench/README.md) for information on installation and running benchmarks. +- See the [AutoGenBench CONTRIBUTING guide](https://github.com/microsoft/autogen/blob/main/samples/tools/autogenbench/CONTRIBUTING.md) for information on developing or contributing benchmark datasets. + +### Quick Start + +Get started quickly by running the following commands in a bash terminal. + +_Note:_ You may need to adjust the path to the `OAI_CONFIG_LIST`, as appropriate. + +```sh +export OAI_CONFIG_LIST=$(cat ./OAI_CONFIG_LIST) +pip install autogenbench +autogenbench clone HumanEval +cd HumanEval +cat README.md +autogenbench run --subsample 0.1 --repeat 3 Tasks/human_eval_two_agents.jsonl +autogenbench tabulate Results/human_eval_two_agents +``` + +## Introduction + +Measurement and evaluation are core components of every major AI or ML research project. The same is true for AutoGen. To this end, today we are releasing AutoGenBench, a standalone command line tool that we have been using to guide development of AutoGen. Conveniently, AutoGenBench handles: downloading, configuring, running, and reporting results of agents on various public benchmark datasets. In addition to reporting top-line numbers, each AutoGenBench run produces a comprehensive set of logs and telemetry that can be used for debugging, profiling, computing custom metrics, and as input to [AgentEval](https://microsoft.github.io/autogen/blog/2023/11/20/AgentEval). In the remainder of this blog post, we outline core design principles for AutoGenBench (key to understanding its operation); present a guide to installing and running AutoGenBench; outline a roadmap for evaluation; and conclude with an open call for contributions. + +## Design Principles + +AutoGenBench is designed around three core design principles. Knowing these principles will help you understand the tool, its operation and its output. These three principles are: + +- **Repetition:** LLMs are stochastic, and in many cases, so too is the code they write to solve problems. For example, a Python script might call an external search engine, and the results may vary run-to-run. This can lead to variance in agent performance. Repetition is key to measuring and understanding this variance. To this end, AutoGenBench is built from the ground up with an understanding that tasks may be run multiple times, and that variance is a metric we often want to measure. + +- **Isolation:** Agents interact with their worlds in both subtle and overt ways. For example an agent may install a python library or write a file to disk. This can lead to ordering effects that can impact future measurements. Consider, for example, comparing two agents on a common benchmark. One agent may appear more efficient than the other simply because it ran second, and benefitted from the hard work the first agent did in installing and debugging necessary Python libraries. To address this, AutoGenBench isolates each task in its own Docker container. This ensures that all runs start with the same initial conditions. (Docker is also a _much safer way to run agent-produced code_, in general.) + +- **Instrumentation:** While top-line metrics are great for comparing agents or models, we often want much more information about how the agents are performing, where they are getting stuck, and how they can be improved. We may also later think of new research questions that require computing a different set of metrics. To this end, AutoGenBench is designed to log everything, and to compute metrics from those logs. This ensures that one can always go back to the logs to answer questions about what happened, run profiling software, or feed the logs into tools like [AgentEval](https://microsoft.github.io/autogen/blog/2023/11/20/AgentEval). + +## Installing and Running AutoGenBench + +As noted above, isolation is a key design principle, and so AutoGenBench must be run in an environment where Docker is available (desktop or Engine). **It will not run in GitHub codespaces**, unless you opt for native execution (which is strongly discouraged). To install Docker Desktop see [https://www.docker.com/products/docker-desktop/](https://www.docker.com/products/docker-desktop/). +Once Docker is installed, AutoGenBench can then be installed as a standalone tool from PyPI. With `pip`, installation can be achieved as follows: + +```sh +pip install autogenbench +``` + +After installation, you must configure your API keys. As with other AutoGen applications, AutoGenBench will look for the OpenAI keys in the OAI_CONFIG_LIST file in the current working directory, or the OAI_CONFIG_LIST environment variable. This behavior can be overridden using a command-line parameter. + +If you will be running multiple benchmarks, it is often most convenient to leverage the environment variable option. You can load your keys into the environment variable by executing: + +```sh +export OAI_CONFIG_LIST=$(cat ./OAI_CONFIG_LIST) +``` + +## A Typical Session + +Once AutoGenBench and necessary keys are installed, a typical session will look as follows: + +``` +autogenbench clone HumanEval +cd HumanEval +cat README.md +autogenbench run --subsample 0.1 --repeat 3 Tasks/human_eval_two_agents.jsonl +autogenbench tabulate results/human_eval_two_agents +``` + +Where: + +- `autogenbench clone HumanEval` downloads and expands the HumanEval benchmark scenario. +- `cd HumanEval; cat README.md` navigates to the benchmark directory, and prints the README (which you should always read!) +- `autogenbench run --subsample 0.1 --repeat 3 Tasks/human_eval_two_agents.jsonl` + runs a 10% subsample of the tasks defined in `Tasks/human_eval_two_agents.jsonl`. Each task is run 3 times. +- `autogenbench tabulate results/human_eval_two_agents` tabulates the results of the run. + +After running the above `tabulate` command, you should see output similar to the following: + +``` + Trial 0 Trial 1 Trial 2 +Task Id Success Success Success +------------- --------- --------- --------- +HumanEval_107 False True True +HumanEval_22 True True True +HumanEval_43 True True True +HumanEval_88 True True True +HumanEval_14 True True True +HumanEval_157 True True True +HumanEval_141 True True True +HumanEval_57 True True True +HumanEval_154 True True True +HumanEval_153 True True True +HumanEval_93 False True False +HumanEval_137 True True True +HumanEval_143 True True True +HumanEval_13 True True True +HumanEval_49 True True True +HumanEval_95 True True True +------------- --------- --------- --------- +Successes 14 16 15 +Failures 2 0 1 +Missing 0 0 0 +Total 16 16 16 + +CAUTION: 'autogenbench tabulate' is in early preview. +Please do not cite these values in academic work without first inspecting and verifying the results in the logs yourself. +``` + +From this output we can see the results of the three separate repetitions of each task, and final summary statistics of each run. In this case, the results were generated via GPT-4 (as defined in the OAI_CONFIG_LIST that was provided), and used the `TwoAgents` template. **It is important to remember that AutoGenBench evaluates _specific_ end-to-end configurations of agents (as opposed to evaluating a model or cognitive framework more generally).** + +Finally, complete execution traces and logs can be found in the `Results` folder. See the [AutoGenBench README](https://github.com/microsoft/autogen/blob/main/samples/tools/autogenbench/README.md) for more details about command-line options and output formats. Each of these commands also offers extensive in-line help via: + +- `autogenbench --help` +- `autogenbench clone --help` +- `autogenbench run --help` +- `autogenbench tabulate --help` + +## Roadmap + +While we are announcing AutoGenBench, we note that it is very much an evolving project in its own right. Over the next few weeks and months we hope to: + +- Onboard many additional benchmarks beyond those shipping today +- Greatly improve logging and telemetry +- Introduce new core metrics including total costs, task completion time, conversation turns, etc. +- Provide tighter integration with AgentEval and AutoGen Studio + +For an up to date tracking of our work items on this project, please see [AutoGenBench Work Items](https://github.com/microsoft/autogen/issues/973) + +## Call for Participation + +Finally, we want to end this blog post with an open call for contributions. AutoGenBench is still nascent, and has much opportunity for improvement. New benchmarks are constantly being published, and will need to be added. Everyone may have their own distinct set of metrics that they care most about optimizing, and these metrics should be onboarded. To this end, we welcome any and all contributions to this corner of the AutoGen project. If contributing is something that interests you, please see the [contributor’s guide](https://github.com/microsoft/autogen/blob/main/samples/tools/autogenbench/CONTRIBUTING.md) and join our [Discord](https://aka.ms/autogen-dc) discussion in the [#autogenbench](https://discord.com/channels/1153072414184452236/1199851779328847902) channel! diff --git a/website/blog/2024-02-11-FSM-GroupChat/index.mdx b/website/blog/2024-02-11-FSM-GroupChat/index.mdx index 74b5b49e35f..3f2f7a5dba1 100644 --- a/website/blog/2024-02-11-FSM-GroupChat/index.mdx +++ b/website/blog/2024-02-11-FSM-GroupChat/index.mdx @@ -1,288 +1,288 @@ ---- -title: "FSM Group Chat -- User-specified agent transitions" -authors: - - joshkyh - - freedeaths -tags: [AutoGen] ---- - -![FSM Group Chat](img/teaser.jpg) -

Finite State Machine (FSM) Group Chat allows the user to constrain agent transitions.

- - -## TL;DR -Recently, FSM Group Chat is released that allows the user to input a transition graph to constrain agent transitions. This is useful as the number of agents increases because the number of transition pairs (N choose 2 combinations) increases exponentially increasing the risk of sub-optimal transitions, which leads to wastage of tokens and/or poor outcomes. - -## Possible use-cases for transition graph -1. One-pass workflow, i.e., we want each agent to only have one pass at the problem, Agent A -> B -> C. -2. Decision tree flow, like a decision tree, we start with a root node (agent), and flow down the decision tree with agents being nodes. For example, if the query is a SQL query, hand over to the SQL agent, else if the query is a RAG query, hand over to the RAG agent. -3. Sequential Team Ops. Suppose we have a team of 3 developer agents, each responsible for a different GitHub repo. We also have a team of business analyst that discuss and debate the overall goal of the user. We could have the manager agent of the developer team speak to the manager agent of the business analysis team. That way, the discussions are more focused team-wise, and better outcomes can be expected. - -Note that we are not enforcing a directed acyclic graph; the user can specify the graph to be acyclic, but cyclic workflows can also be useful to iteratively work on a problem, and layering additional analysis onto the solution. - - -## Usage Guide -We have added two parameters `allowed_or_disallowed_speaker_transitions` and `speaker_transitions_type`. -- `allowed_or_disallowed_speaker_transitions`: is a dictionary with the type expectation of `{Agent: [Agent]}`. The key refers to the source agent, while the value(s) in the list refers to the target agent(s). If none, a fully connection graph is assumed. -- `speaker_transitions_type`: is a string with the type expectation of string, and specifically, one of ["allowed", "disallowed"]. We wanted the user to be able to supply a dictionary of allowed or disallowed transitions to improve the ease of use. In the code base, we would invert the disallowed transition into a allowed transition dictionary `allowed_speaker_transitions_dict`. - - -### Application of the FSM Feature - -A quick demonstration of how to initiate a FSM-based `GroupChat` in the `AutoGen` framework. In this demonstration, if we consider each agent as a state, and each agent speaks according to certain conditions. For example, User always initiates the task first, followed by Planner creating a plan. Then Engineer and Executor work alternately, with Critic intervening when necessary, and after Critic, only Planner should revise additional plans. Each state can only exist at a time, and there are transition conditions between states. Therefore, GroupChat can be well abstracted as a Finite-State Machine (FSM). - -![visualization](img/FSM_logic.png) - - -### Usage - -0. Pre-requisites -```bash -pip install autogen[graph] -``` - -1. Import dependencies - - ```python - from autogen.agentchat import GroupChat, AssistantAgent, UserProxyAgent, GroupChatManager - from autogen.oai.openai_utils import config_list_from_dotenv - ``` -2. Configure LLM parameters - - ```python - # Please feel free to change it as you wish - config_list = config_list_from_dotenv( - dotenv_file_path='.env', - model_api_key_map={'gpt-4-1106-preview':'OPENAI_API_KEY'}, - filter_dict={ - "model": { - "gpt-4-1106-preview" - } - } - ) - - gpt_config = { - "cache_seed": None, - "temperature": 0, - "config_list": config_list, - "timeout": 100, - } - ``` - -3. Define the task - - ```python - # describe the task - task = """Add 1 to the number output by the previous role. If the previous number is 20, output "TERMINATE".""" - ``` - -4. Define agents - - ```python - # agents configuration - engineer = AssistantAgent( - name="Engineer", - llm_config=gpt_config, - system_message=task, - description="""I am **ONLY** allowed to speak **immediately** after `Planner`, `Critic` and `Executor`. - If the last number mentioned by `Critic` is not a multiple of 5, the next speaker must be `Engineer`. - """ - ) - - planner = AssistantAgent( - name="Planner", - system_message=task, - llm_config=gpt_config, - description="""I am **ONLY** allowed to speak **immediately** after `User` or `Critic`. - If the last number mentioned by `Critic` is a multiple of 5, the next speaker must be `Planner`. - """ - ) - - executor = AssistantAgent( - name="Executor", - system_message=task, - is_termination_msg=lambda x: x.get("content", "") and x.get("content", "").rstrip().endswith("FINISH"), - llm_config=gpt_config, - description="""I am **ONLY** allowed to speak **immediately** after `Engineer`. - If the last number mentioned by `Engineer` is a multiple of 3, the next speaker can only be `Executor`. - """ - ) - - critic = AssistantAgent( - name="Critic", - system_message=task, - llm_config=gpt_config, - description="""I am **ONLY** allowed to speak **immediately** after `Engineer`. - If the last number mentioned by `Engineer` is not a multiple of 3, the next speaker can only be `Critic`. - """ - ) - - user_proxy = UserProxyAgent( - name="User", - system_message=task, - code_execution_config=False, - human_input_mode="NEVER", - llm_config=False, - description=""" - Never select me as a speaker. - """ - ) - ``` - - 1. Here, I have configured the `system_messages` as "task" because every agent should know what it needs to do. In this example, each agent has the same task, which is to count in sequence. - 2. **The most important point is the `description` parameter, where I have used natural language to describe the transition conditions of the FSM. Because the manager knows which agents are available next based on the constraints of the graph, I describe in the `description` field of each candidate agent when it can speak, effectively describing the transition conditions in the FSM.** - -5. Define the graph - - ```python - graph_dict = {} - graph_dict[user_proxy] = [planner] - graph_dict[planner] = [engineer] - graph_dict[engineer] = [critic, executor] - graph_dict[critic] = [engineer, planner] - graph_dict[executor] = [engineer] - ``` - - 1. **The graph here and the transition conditions mentioned above together form a complete FSM. Both are essential and cannot be missing.** - 2. You can visualize it as you wish, which is shown as follows - - ![visualization](img/FSM_of_multi-agents.png) - -6. Define a `GroupChat` and a `GroupChatManager` - - ```python - agents = [user_proxy, engineer, planner, executor, critic] - - # create the groupchat - group_chat = GroupChat(agents=agents, messages=[], max_round=25, allowed_or_disallowed_speaker_transitions=graph_dict, allow_repeat_speaker=None, speaker_transitions_type="allowed") - - # create the manager - manager = GroupChatManager( - groupchat=group_chat, - llm_config=gpt_config, - is_termination_msg=lambda x: x.get("content", "") and x.get("content", "").rstrip().endswith("TERMINATE"), - code_execution_config=False, - ) - ``` - -7. Initiate the chat - - ```python - # initiate the task - user_proxy.initiate_chat( - manager, - message="1", - clear_history=True - ) - ``` - -8. You may get the following output(I deleted the ignorable warning): - - ``` - User (to chat_manager): - - 1 - - -------------------------------------------------------------------------------- - Planner (to chat_manager): - - 2 - - -------------------------------------------------------------------------------- - Engineer (to chat_manager): - - 3 - - -------------------------------------------------------------------------------- - Executor (to chat_manager): - - 4 - - -------------------------------------------------------------------------------- - Engineer (to chat_manager): - - 5 - - -------------------------------------------------------------------------------- - Critic (to chat_manager): - - 6 - - -------------------------------------------------------------------------------- - Engineer (to chat_manager): - - 7 - - -------------------------------------------------------------------------------- - Critic (to chat_manager): - - 8 - - -------------------------------------------------------------------------------- - Engineer (to chat_manager): - - 9 - - -------------------------------------------------------------------------------- - Executor (to chat_manager): - - 10 - - -------------------------------------------------------------------------------- - Engineer (to chat_manager): - - 11 - - -------------------------------------------------------------------------------- - Critic (to chat_manager): - - 12 - - -------------------------------------------------------------------------------- - Engineer (to chat_manager): - - 13 - - -------------------------------------------------------------------------------- - Critic (to chat_manager): - - 14 - - -------------------------------------------------------------------------------- - Engineer (to chat_manager): - - 15 - - -------------------------------------------------------------------------------- - Executor (to chat_manager): - - 16 - - -------------------------------------------------------------------------------- - Engineer (to chat_manager): - - 17 - - -------------------------------------------------------------------------------- - Critic (to chat_manager): - - 18 - - -------------------------------------------------------------------------------- - Engineer (to chat_manager): - - 19 - - -------------------------------------------------------------------------------- - Critic (to chat_manager): - - 20 - - -------------------------------------------------------------------------------- - Planner (to chat_manager): - - TERMINATE - ``` - -## Notebook examples -More examples can be found in the [notebook](https://microsoft.github.io/autogen/docs/notebooks/agentchat_groupchat_finite_state_machine/). The notebook includes more examples of possible transition paths such as (1) hub and spoke, (2) sequential team operations, and (3) think aloud and debate. It also uses the function `visualize_speaker_transitions_dict` from `autogen.graph_utils` to visualize the various graphs. +--- +title: "FSM Group Chat -- User-specified agent transitions" +authors: + - joshkyh + - freedeaths +tags: [AutoGen] +--- + +![FSM Group Chat](img/teaser.jpg) +

Finite State Machine (FSM) Group Chat allows the user to constrain agent transitions.

+ + +## TL;DR +Recently, FSM Group Chat is released that allows the user to input a transition graph to constrain agent transitions. This is useful as the number of agents increases because the number of transition pairs (N choose 2 combinations) increases exponentially increasing the risk of sub-optimal transitions, which leads to wastage of tokens and/or poor outcomes. + +## Possible use-cases for transition graph +1. One-pass workflow, i.e., we want each agent to only have one pass at the problem, Agent A -> B -> C. +2. Decision tree flow, like a decision tree, we start with a root node (agent), and flow down the decision tree with agents being nodes. For example, if the query is a SQL query, hand over to the SQL agent, else if the query is a RAG query, hand over to the RAG agent. +3. Sequential Team Ops. Suppose we have a team of 3 developer agents, each responsible for a different GitHub repo. We also have a team of business analyst that discuss and debate the overall goal of the user. We could have the manager agent of the developer team speak to the manager agent of the business analysis team. That way, the discussions are more focused team-wise, and better outcomes can be expected. + +Note that we are not enforcing a directed acyclic graph; the user can specify the graph to be acyclic, but cyclic workflows can also be useful to iteratively work on a problem, and layering additional analysis onto the solution. + + +## Usage Guide +We have added two parameters `allowed_or_disallowed_speaker_transitions` and `speaker_transitions_type`. +- `allowed_or_disallowed_speaker_transitions`: is a dictionary with the type expectation of `{Agent: [Agent]}`. The key refers to the source agent, while the value(s) in the list refers to the target agent(s). If none, a fully connection graph is assumed. +- `speaker_transitions_type`: is a string with the type expectation of string, and specifically, one of ["allowed", "disallowed"]. We wanted the user to be able to supply a dictionary of allowed or disallowed transitions to improve the ease of use. In the code base, we would invert the disallowed transition into a allowed transition dictionary `allowed_speaker_transitions_dict`. + + +### Application of the FSM Feature + +A quick demonstration of how to initiate a FSM-based `GroupChat` in the `AutoGen` framework. In this demonstration, if we consider each agent as a state, and each agent speaks according to certain conditions. For example, User always initiates the task first, followed by Planner creating a plan. Then Engineer and Executor work alternately, with Critic intervening when necessary, and after Critic, only Planner should revise additional plans. Each state can only exist at a time, and there are transition conditions between states. Therefore, GroupChat can be well abstracted as a Finite-State Machine (FSM). + +![visualization](img/FSM_logic.png) + + +### Usage + +0. Pre-requisites +```bash +pip install autogen[graph] +``` + +1. Import dependencies + + ```python + from autogen.agentchat import GroupChat, AssistantAgent, UserProxyAgent, GroupChatManager + from autogen.oai.openai_utils import config_list_from_dotenv + ``` +2. Configure LLM parameters + + ```python + # Please feel free to change it as you wish + config_list = config_list_from_dotenv( + dotenv_file_path='.env', + model_api_key_map={'gpt-4-1106-preview':'OPENAI_API_KEY'}, + filter_dict={ + "model": { + "gpt-4-1106-preview" + } + } + ) + + gpt_config = { + "cache_seed": None, + "temperature": 0, + "config_list": config_list, + "timeout": 100, + } + ``` + +3. Define the task + + ```python + # describe the task + task = """Add 1 to the number output by the previous role. If the previous number is 20, output "TERMINATE".""" + ``` + +4. Define agents + + ```python + # agents configuration + engineer = AssistantAgent( + name="Engineer", + llm_config=gpt_config, + system_message=task, + description="""I am **ONLY** allowed to speak **immediately** after `Planner`, `Critic` and `Executor`. + If the last number mentioned by `Critic` is not a multiple of 5, the next speaker must be `Engineer`. + """ + ) + + planner = AssistantAgent( + name="Planner", + system_message=task, + llm_config=gpt_config, + description="""I am **ONLY** allowed to speak **immediately** after `User` or `Critic`. + If the last number mentioned by `Critic` is a multiple of 5, the next speaker must be `Planner`. + """ + ) + + executor = AssistantAgent( + name="Executor", + system_message=task, + is_termination_msg=lambda x: x.get("content", "") and x.get("content", "").rstrip().endswith("FINISH"), + llm_config=gpt_config, + description="""I am **ONLY** allowed to speak **immediately** after `Engineer`. + If the last number mentioned by `Engineer` is a multiple of 3, the next speaker can only be `Executor`. + """ + ) + + critic = AssistantAgent( + name="Critic", + system_message=task, + llm_config=gpt_config, + description="""I am **ONLY** allowed to speak **immediately** after `Engineer`. + If the last number mentioned by `Engineer` is not a multiple of 3, the next speaker can only be `Critic`. + """ + ) + + user_proxy = UserProxyAgent( + name="User", + system_message=task, + code_execution_config=False, + human_input_mode="NEVER", + llm_config=False, + description=""" + Never select me as a speaker. + """ + ) + ``` + + 1. Here, I have configured the `system_messages` as "task" because every agent should know what it needs to do. In this example, each agent has the same task, which is to count in sequence. + 2. **The most important point is the `description` parameter, where I have used natural language to describe the transition conditions of the FSM. Because the manager knows which agents are available next based on the constraints of the graph, I describe in the `description` field of each candidate agent when it can speak, effectively describing the transition conditions in the FSM.** + +5. Define the graph + + ```python + graph_dict = {} + graph_dict[user_proxy] = [planner] + graph_dict[planner] = [engineer] + graph_dict[engineer] = [critic, executor] + graph_dict[critic] = [engineer, planner] + graph_dict[executor] = [engineer] + ``` + + 1. **The graph here and the transition conditions mentioned above together form a complete FSM. Both are essential and cannot be missing.** + 2. You can visualize it as you wish, which is shown as follows + + ![visualization](img/FSM_of_multi-agents.png) + +6. Define a `GroupChat` and a `GroupChatManager` + + ```python + agents = [user_proxy, engineer, planner, executor, critic] + + # create the groupchat + group_chat = GroupChat(agents=agents, messages=[], max_round=25, allowed_or_disallowed_speaker_transitions=graph_dict, allow_repeat_speaker=None, speaker_transitions_type="allowed") + + # create the manager + manager = GroupChatManager( + groupchat=group_chat, + llm_config=gpt_config, + is_termination_msg=lambda x: x.get("content", "") and x.get("content", "").rstrip().endswith("TERMINATE"), + code_execution_config=False, + ) + ``` + +7. Initiate the chat + + ```python + # initiate the task + user_proxy.initiate_chat( + manager, + message="1", + clear_history=True + ) + ``` + +8. You may get the following output(I deleted the ignorable warning): + + ``` + User (to chat_manager): + + 1 + + -------------------------------------------------------------------------------- + Planner (to chat_manager): + + 2 + + -------------------------------------------------------------------------------- + Engineer (to chat_manager): + + 3 + + -------------------------------------------------------------------------------- + Executor (to chat_manager): + + 4 + + -------------------------------------------------------------------------------- + Engineer (to chat_manager): + + 5 + + -------------------------------------------------------------------------------- + Critic (to chat_manager): + + 6 + + -------------------------------------------------------------------------------- + Engineer (to chat_manager): + + 7 + + -------------------------------------------------------------------------------- + Critic (to chat_manager): + + 8 + + -------------------------------------------------------------------------------- + Engineer (to chat_manager): + + 9 + + -------------------------------------------------------------------------------- + Executor (to chat_manager): + + 10 + + -------------------------------------------------------------------------------- + Engineer (to chat_manager): + + 11 + + -------------------------------------------------------------------------------- + Critic (to chat_manager): + + 12 + + -------------------------------------------------------------------------------- + Engineer (to chat_manager): + + 13 + + -------------------------------------------------------------------------------- + Critic (to chat_manager): + + 14 + + -------------------------------------------------------------------------------- + Engineer (to chat_manager): + + 15 + + -------------------------------------------------------------------------------- + Executor (to chat_manager): + + 16 + + -------------------------------------------------------------------------------- + Engineer (to chat_manager): + + 17 + + -------------------------------------------------------------------------------- + Critic (to chat_manager): + + 18 + + -------------------------------------------------------------------------------- + Engineer (to chat_manager): + + 19 + + -------------------------------------------------------------------------------- + Critic (to chat_manager): + + 20 + + -------------------------------------------------------------------------------- + Planner (to chat_manager): + + TERMINATE + ``` + +## Notebook examples +More examples can be found in the [notebook](https://microsoft.github.io/autogen/docs/notebooks/agentchat_groupchat_finite_state_machine/). The notebook includes more examples of possible transition paths such as (1) hub and spoke, (2) sequential team operations, and (3) think aloud and debate. It also uses the function `visualize_speaker_transitions_dict` from `autogen.graph_utils` to visualize the various graphs. diff --git a/website/blog/2024-02-29-StateFlow/index.mdx b/website/blog/2024-02-29-StateFlow/index.mdx index 17334400204..c4fff53132b 100644 --- a/website/blog/2024-02-29-StateFlow/index.mdx +++ b/website/blog/2024-02-29-StateFlow/index.mdx @@ -102,7 +102,7 @@ scientist = autogen.AssistantAgent( ) ``` -In the Figure, we define a simple workflow for research with 4 states: Init, Retrieve, Reserach, and End. Within each state, we will call different agents to perform the tasks. +In the Figure, we define a simple workflow for research with 4 states: Init, Retrieve, Research, and End. Within each state, we will call different agents to perform the tasks. - Init: We use the initializer to start the workflow. - Retrieve: We will first call the coder to write code and then call the executor to execute the code. - Research: We will call the scientist to read the papers and write a summary. diff --git a/website/blog/2024-05-24-Agent/index.mdx b/website/blog/2024-05-24-Agent/index.mdx index 520805dc693..25fa0d937ee 100644 --- a/website/blog/2024-05-24-Agent/index.mdx +++ b/website/blog/2024-05-24-Agent/index.mdx @@ -141,7 +141,7 @@ better with low cost. [EcoAssistant](/blog/2023/11/09/EcoAssistant) is a good ex - [AutoDefense](/blog/2024/03/11/AutoDefense/Defending%20LLMs%20Against%20Jailbreak%20Attacks%20with%20AutoDefense) demonstrates that using multi-agents reduces the risk of suffering from jailbreak attacks. -There are certainly tradeoffs to make. The large design space of multi-agents offers these tradeoffs and opens up new opportunites for optimization. +There are certainly tradeoffs to make. The large design space of multi-agents offers these tradeoffs and opens up new opportunities for optimization. > Over a year since the debut of Ask AT&T, the generative AI platform to which we’ve onboarded over 80,000 users, AT&T has been enhancing its capabilities by incorporating 'AI Agents'. These agents, powered by the Autogen framework pioneered by Microsoft (https://microsoft.github.io/autogen/blog/2023/12/01/AutoGenStudio/), are designed to tackle complicated workflows and tasks that traditional language models find challenging. To drive collaboration, AT&T is contributing back to the open-source project by introducing features that facilitate enhanced security and role-based access for various projects and data. > diff --git a/website/blog/authors.yml b/website/blog/authors.yml index b52fffbdd0f..70a4e5c0f9c 100644 --- a/website/blog/authors.yml +++ b/website/blog/authors.yml @@ -1,125 +1,125 @@ -sonichi: - name: Chi Wang - title: Principal Researcher at Microsoft Research - url: https://www.linkedin.com/in/chi-wang-49b15b16/ - image_url: https://github.com/sonichi.png - -qingyunwu: - name: Qingyun Wu - title: Assistant Professor at the Pennsylvania State University - url: https://qingyun-wu.github.io/ - image_url: https://github.com/qingyun-wu.png - -yiranwu: - name: Yiran Wu - title: PhD student at Pennsylvania State University - url: https://github.com/kevin666aa - image_url: https://github.com/kevin666aa.png - -jialeliu: - name: Jiale Liu - title: Undergraduate student at Xidian University - url: https://leoljl.github.io - image_url: https://github.com/LeoLjl/leoljl.github.io/blob/main/profile.jpg?raw=true - -thinkall: - name: Li Jiang - title: Senior Software Engineer at Microsoft - url: https://github.com/thinkall - image_url: https://github.com/thinkall.png - -rickyloynd-microsoft: - name: Ricky Loynd - title: Senior Research Engineer at Microsoft - url: https://github.com/rickyloynd-microsoft - image_url: https://github.com/rickyloynd-microsoft.png - -samershi: - name: Saleema Amershi - title: Senior Principal Research Manager at Microsoft Research - url: https://github.com/samershi - image_url: https://github.com/samershi.png - -pcdeadeasy: - name: Piali Choudhury - title: Principal RSDE at Microsoft Research - url: https://github.com/pcdeadeasy - image_url: https://github.com/pcdeadeasy.png - -victordibia: - name: Victor Dibia - title: Principal RSDE at Microsoft Research - url: https://github.com/victordibia - image_url: https://github.com/victordibia.png - -afourney: - name: Adam Fourney - title: Principal Researcher Microsoft Research - url: https://www.adamfourney.com - image_url: https://github.com/afourney.png - -beibinli: - name: Beibin Li - title: Senior Research Engineer at Microsoft - url: https://github.com/beibinli - image_url: https://github.com/beibinli.png - -gagb: - name: Gagan Bansal - title: Senior Researcher at Microsoft Research - url: https://www.linkedin.com/in/gagan-bansal/ - image_url: https://github.com/gagb.png - -jieyuz2: - name: Jieyu Zhang - title: PhD student at University of Washington - url: https://jieyuz2.github.io/ - image_url: https://github.com/jieyuz2.png - -julianakiseleva: - name: Julia Kiseleva - title: Senior Researcher at Microsoft Research - url: https://github.com/julianakiseleva/ - image_url: https://avatars.githubusercontent.com/u/5908392?v=4 - -narabzad: - name: Negar Arabzadeh - title: PhD student at the University of Waterloo - url: https://www.negara.me/ - image_url: https://github.com/Narabzad.png - -LinxinS97: - name: Linxin Song - title: MS student at Waseda University - url: https://linxins97.github.io/ - image_url: https://github.com/LinxinS97.png - -skzhang1: - name: Shaokun Zhang - title: PhD student at the Pennsylvania State University - url: https://github.com/skzhang1 - image_url: https://github.com/skzhang1.png - -olgavrou: - name: Olga Vrousgou - title: Senior Software Engineer at Microsoft Research - url: https://github.com/olgavrou/ - image_url: https://github.com/olgavrou.png - -joshkyh: - name: Joshua Kim - title: AI Freelancer at SpectData - url: https://github.com/joshkyh/ - image_url: https://github.com/joshkyh.png - -freedeaths: - name: Yishen Sun - title: Data Scientist at PingCAP LAB - url: https://github.com/freedeaths/ - image_url: https://github.com/freedeaths.png - -yifanzeng: - name: Yifan Zeng - title: PhD student at Oregon State University - url: https://xhmy.github.io/ +sonichi: + name: Chi Wang + title: Principal Researcher at Microsoft Research + url: https://www.linkedin.com/in/chi-wang-49b15b16/ + image_url: https://github.com/sonichi.png + +qingyunwu: + name: Qingyun Wu + title: Assistant Professor at the Pennsylvania State University + url: https://qingyun-wu.github.io/ + image_url: https://github.com/qingyun-wu.png + +yiranwu: + name: Yiran Wu + title: PhD student at Pennsylvania State University + url: https://github.com/kevin666aa + image_url: https://github.com/kevin666aa.png + +jialeliu: + name: Jiale Liu + title: Undergraduate student at Xidian University + url: https://leoljl.github.io + image_url: https://github.com/LeoLjl/leoljl.github.io/blob/main/profile.jpg?raw=true + +thinkall: + name: Li Jiang + title: Senior Software Engineer at Microsoft + url: https://github.com/thinkall + image_url: https://github.com/thinkall.png + +rickyloynd-microsoft: + name: Ricky Loynd + title: Senior Research Engineer at Microsoft + url: https://github.com/rickyloynd-microsoft + image_url: https://github.com/rickyloynd-microsoft.png + +samershi: + name: Saleema Amershi + title: Senior Principal Research Manager at Microsoft Research + url: https://github.com/samershi + image_url: https://github.com/samershi.png + +pcdeadeasy: + name: Piali Choudhury + title: Principal RSDE at Microsoft Research + url: https://github.com/pcdeadeasy + image_url: https://github.com/pcdeadeasy.png + +victordibia: + name: Victor Dibia + title: Principal RSDE at Microsoft Research + url: https://github.com/victordibia + image_url: https://github.com/victordibia.png + +afourney: + name: Adam Fourney + title: Principal Researcher Microsoft Research + url: https://www.adamfourney.com + image_url: https://github.com/afourney.png + +beibinli: + name: Beibin Li + title: Senior Research Engineer at Microsoft + url: https://github.com/beibinli + image_url: https://github.com/beibinli.png + +gagb: + name: Gagan Bansal + title: Senior Researcher at Microsoft Research + url: https://www.linkedin.com/in/gagan-bansal/ + image_url: https://github.com/gagb.png + +jieyuz2: + name: Jieyu Zhang + title: PhD student at University of Washington + url: https://jieyuz2.github.io/ + image_url: https://github.com/jieyuz2.png + +julianakiseleva: + name: Julia Kiseleva + title: Senior Researcher at Microsoft Research + url: https://github.com/julianakiseleva/ + image_url: https://avatars.githubusercontent.com/u/5908392?v=4 + +narabzad: + name: Negar Arabzadeh + title: PhD student at the University of Waterloo + url: https://www.negara.me/ + image_url: https://github.com/Narabzad.png + +LinxinS97: + name: Linxin Song + title: MS student at Waseda University + url: https://linxins97.github.io/ + image_url: https://github.com/LinxinS97.png + +skzhang1: + name: Shaokun Zhang + title: PhD student at the Pennsylvania State University + url: https://github.com/skzhang1 + image_url: https://github.com/skzhang1.png + +olgavrou: + name: Olga Vrousgou + title: Senior Software Engineer at Microsoft Research + url: https://github.com/olgavrou/ + image_url: https://github.com/olgavrou.png + +joshkyh: + name: Joshua Kim + title: AI Freelancer at SpectData + url: https://github.com/joshkyh/ + image_url: https://github.com/joshkyh.png + +freedeaths: + name: Yishen Sun + title: Data Scientist at PingCAP LAB + url: https://github.com/freedeaths/ + image_url: https://github.com/freedeaths.png + +yifanzeng: + name: Yifan Zeng + title: PhD student at Oregon State University + url: https://xhmy.github.io/ image_url: https://xhmy.github.io/assets/img/photo.JPG diff --git a/website/docs/Use-Cases/enhanced_inference.md b/website/docs/Use-Cases/enhanced_inference.md index e97b67fa9de..14723391e8c 100644 --- a/website/docs/Use-Cases/enhanced_inference.md +++ b/website/docs/Use-Cases/enhanced_inference.md @@ -183,7 +183,7 @@ client = OpenAIWrapper( "api_key": os.environ.get("AZURE_OPENAI_API_KEY"), "api_type": "azure", "base_url": os.environ.get("AZURE_OPENAI_API_BASE"), - "api_version": "2024-02-15-preview", + "api_version": "2024-02-01", }, { "model": "gpt-3.5-turbo", diff --git a/website/docs/topics/llm_configuration.ipynb b/website/docs/topics/llm_configuration.ipynb index c0a1b7e74a9..f6f383cd85d 100644 --- a/website/docs/topics/llm_configuration.ipynb +++ b/website/docs/topics/llm_configuration.ipynb @@ -92,7 +92,7 @@ " \"api_type\": \"azure\",\n", " \"api_key\": os.environ['AZURE_OPENAI_API_KEY'],\n", " \"base_url\": \"https://ENDPOINT.openai.azure.com/\",\n", - " \"api_version\": \"2024-02-15-preview\"\n", + " \"api_version\": \"2024-02-01\"\n", " }\n", " ]\n", " ```\n", @@ -328,7 +328,7 @@ " \"api_key\": os.environ.get(\"AZURE_OPENAI_API_KEY\"),\n", " \"api_type\": \"azure\",\n", " \"base_url\": os.environ.get(\"AZURE_OPENAI_API_BASE\"),\n", - " \"api_version\": \"2024-02-15-preview\",\n", + " \"api_version\": \"2024-02-01\",\n", " },\n", " {\n", " \"model\": \"llama-7B\",\n",