diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index b1d65147e..4fcf56a56 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -65,6 +65,12 @@ # ServiceLabel: %tools-AppService # ServiceOwners: @ArthurMa1978 @weidongxu-microsoft +# PRLabel: %tools-AIBestPractices +/tools/Azure.Mcp.Tools.AzureAIBestPractices/ @XiaofuHuang @microsoft/azure-mcp + +# ServiceLabel: %tools-AIBestPractices +# ServiceOwners: @XiaofuHuang + # PRLabel: %tools-BestPractices /tools/Azure.Mcp.Tools.AzureBestPractices/ @g2vinay @conniey @fanyang-mono @microsoft/azure-mcp diff --git a/AzureMcp.sln b/AzureMcp.sln index a9de84e32..cbafdf58b 100644 --- a/AzureMcp.sln +++ b/AzureMcp.sln @@ -549,6 +549,15 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Fabric.Mcp.Tools.PublicApi. EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Azure.Mcp.Tools.Postgres.LiveTests", "tools\Azure.Mcp.Tools.Postgres\tests\Azure.Mcp.Tools.Postgres.LiveTests\Azure.Mcp.Tools.Postgres.LiveTests.csproj", "{BF0354AE-3748-A8DC-F79D-B21FDDEDDFAE}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Azure.Mcp.Tools.AzureAIBestPractices", "Azure.Mcp.Tools.AzureAIBestPractices", "{156D9C17-61FD-98D6-32C0-065B406D0434}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{5D760DD8-DBA3-B865-9021-FDE8FD3497A8}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Azure.Mcp.Tools.AzureAIBestPractices", "tools\Azure.Mcp.Tools.AzureAIBestPractices\src\Azure.Mcp.Tools.AzureAIBestPractices.csproj", "{87C51120-6A0A-4D14-B644-1787DB6C6D6E}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{50124EEC-97B0-320E-80D4-8464D7692B22}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Azure.Mcp.Tools.AzureAIBestPractices.UnitTests", "tools\Azure.Mcp.Tools.AzureAIBestPractices\tests\Azure.Mcp.Tools.AzureAIBestPractices.UnitTests\Azure.Mcp.Tools.AzureAIBestPractices.UnitTests.csproj", "{BE8CFF4C-E536-43DB-9D01-001E9A052D37}" Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{319B94CD-694C-16E8-9E3A-9577B99158DD}" EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Azure.Mcp.Server.UnitTests", "servers\Azure.Mcp.Server\tests\Azure.Mcp.Server.UnitTests\Azure.Mcp.Server.UnitTests.csproj", "{ADF14627-FCB5-4BD3-B65F-DDCC3A3F727C}" @@ -2087,6 +2096,30 @@ Global {BF0354AE-3748-A8DC-F79D-B21FDDEDDFAE}.Release|x64.Build.0 = Release|Any CPU {BF0354AE-3748-A8DC-F79D-B21FDDEDDFAE}.Release|x86.ActiveCfg = Release|Any CPU {BF0354AE-3748-A8DC-F79D-B21FDDEDDFAE}.Release|x86.Build.0 = Release|Any CPU + {87C51120-6A0A-4D14-B644-1787DB6C6D6E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {87C51120-6A0A-4D14-B644-1787DB6C6D6E}.Debug|Any CPU.Build.0 = Debug|Any CPU + {87C51120-6A0A-4D14-B644-1787DB6C6D6E}.Debug|x64.ActiveCfg = Debug|Any CPU + {87C51120-6A0A-4D14-B644-1787DB6C6D6E}.Debug|x64.Build.0 = Debug|Any CPU + {87C51120-6A0A-4D14-B644-1787DB6C6D6E}.Debug|x86.ActiveCfg = Debug|Any CPU + {87C51120-6A0A-4D14-B644-1787DB6C6D6E}.Debug|x86.Build.0 = Debug|Any CPU + {87C51120-6A0A-4D14-B644-1787DB6C6D6E}.Release|Any CPU.ActiveCfg = Release|Any CPU + {87C51120-6A0A-4D14-B644-1787DB6C6D6E}.Release|Any CPU.Build.0 = Release|Any CPU + {87C51120-6A0A-4D14-B644-1787DB6C6D6E}.Release|x64.ActiveCfg = Release|Any CPU + {87C51120-6A0A-4D14-B644-1787DB6C6D6E}.Release|x64.Build.0 = Release|Any CPU + {87C51120-6A0A-4D14-B644-1787DB6C6D6E}.Release|x86.ActiveCfg = Release|Any CPU + {87C51120-6A0A-4D14-B644-1787DB6C6D6E}.Release|x86.Build.0 = Release|Any CPU + {BE8CFF4C-E536-43DB-9D01-001E9A052D37}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {BE8CFF4C-E536-43DB-9D01-001E9A052D37}.Debug|Any CPU.Build.0 = Debug|Any CPU + {BE8CFF4C-E536-43DB-9D01-001E9A052D37}.Debug|x64.ActiveCfg = Debug|Any CPU + {BE8CFF4C-E536-43DB-9D01-001E9A052D37}.Debug|x64.Build.0 = Debug|Any CPU + {BE8CFF4C-E536-43DB-9D01-001E9A052D37}.Debug|x86.ActiveCfg = Debug|Any CPU + {BE8CFF4C-E536-43DB-9D01-001E9A052D37}.Debug|x86.Build.0 = Debug|Any CPU + {BE8CFF4C-E536-43DB-9D01-001E9A052D37}.Release|Any CPU.ActiveCfg = Release|Any CPU + {BE8CFF4C-E536-43DB-9D01-001E9A052D37}.Release|Any CPU.Build.0 = Release|Any CPU + {BE8CFF4C-E536-43DB-9D01-001E9A052D37}.Release|x64.ActiveCfg = Release|Any CPU + {BE8CFF4C-E536-43DB-9D01-001E9A052D37}.Release|x64.Build.0 = Release|Any CPU + {BE8CFF4C-E536-43DB-9D01-001E9A052D37}.Release|x86.ActiveCfg = Release|Any CPU + {BE8CFF4C-E536-43DB-9D01-001E9A052D37}.Release|x86.Build.0 = Release|Any CPU {ADF14627-FCB5-4BD3-B65F-DDCC3A3F727C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {ADF14627-FCB5-4BD3-B65F-DDCC3A3F727C}.Debug|Any CPU.Build.0 = Debug|Any CPU {ADF14627-FCB5-4BD3-B65F-DDCC3A3F727C}.Debug|x64.ActiveCfg = Debug|Any CPU @@ -2373,6 +2406,11 @@ Global {294AC723-70DA-F50A-2C7A-AC6C0AEA0A62} = {9072C7AF-9EB2-E481-3974-77957587AC76} {D3F46C2D-3AFD-FD9C-9C6A-180B1514DD2F} = {294AC723-70DA-F50A-2C7A-AC6C0AEA0A62} {BF0354AE-3748-A8DC-F79D-B21FDDEDDFAE} = {37B0CE47-14C8-F5BF-BDDD-13EEBE580A88} + {156D9C17-61FD-98D6-32C0-065B406D0434} = {07C2787E-EAC7-C090-1BA3-A61EC2A24D84} + {5D760DD8-DBA3-B865-9021-FDE8FD3497A8} = {156D9C17-61FD-98D6-32C0-065B406D0434} + {87C51120-6A0A-4D14-B644-1787DB6C6D6E} = {5D760DD8-DBA3-B865-9021-FDE8FD3497A8} + {50124EEC-97B0-320E-80D4-8464D7692B22} = {156D9C17-61FD-98D6-32C0-065B406D0434} + {BE8CFF4C-E536-43DB-9D01-001E9A052D37} = {50124EEC-97B0-320E-80D4-8464D7692B22} {319B94CD-694C-16E8-9E3A-9577B99158DD} = {F7E192D1-DE6C-42A2-B52F-02849D482450} {ADF14627-FCB5-4BD3-B65F-DDCC3A3F727C} = {319B94CD-694C-16E8-9E3A-9577B99158DD} EndGlobalSection diff --git a/core/Azure.Mcp.Core/src/Areas/Server/Resources/azure-rules.txt b/core/Azure.Mcp.Core/src/Areas/Server/Resources/azure-rules.txt index 6e339f09c..2ce9212a8 100644 --- a/core/Azure.Mcp.Core/src/Areas/Server/Resources/azure-rules.txt +++ b/core/Azure.Mcp.Core/src/Areas/Server/Resources/azure-rules.txt @@ -12,6 +12,9 @@ sure the user is discussing Azure Functions; do not call it otherwise. - Use Azure Functions Deployment Best Practices: When deploying Azure Functions apps to Azure or preparing for the deployment, invoke your `bestpractices` tool if available. Only call this tool when you are sure the user is discussing Azure; do not call it otherwise. +- Use Azure AI Code Generation Best Practices: When generating code for AI applications, AI agents, workflows, +or chatbots in Azure, invoke your `azureaibestpractices` tool if available. Only call this tool when you are sure +the user is discussing Azure and code generation; do not call it for Azure resource operations. - Use Azure SWA Best Practices: When working with static web apps, invoke your `bestpractices` tool if available. Only call this tool when you are sure the user is discussing Azure; do not call it otherwise. - Plan before editing files for Azure web apps: When generating code for Azure Functions and Azure Static Web App, diff --git a/core/Azure.Mcp.Core/src/Areas/Server/Resources/consolidated-tools.json b/core/Azure.Mcp.Core/src/Areas/Server/Resources/consolidated-tools.json index 2979539ed..193a6a46f 100644 --- a/core/Azure.Mcp.Core/src/Areas/Server/Resources/consolidated-tools.json +++ b/core/Azure.Mcp.Core/src/Areas/Server/Resources/consolidated-tools.json @@ -982,7 +982,7 @@ }, { "name": "get_azure_best_practices", - "description": "Retrieve Azure best practices and infrastructure schema for code generation, deployment, and operations. Covers general Azure practices, Azure Functions best practices, Terraform configurations, Bicep template schemas, deployment best practices and AI Foundry sdk code samples.", + "description": "Retrieve Azure best practices and infrastructure schema for code generation, deployment, and operations. Covers general Azure practices, Azure Functions best practices, AI app development best practices, Terraform configurations, Bicep template schemas, deployment best practices and AI Foundry sdk code samples.", "toolMetadata": { "destructive": { "value": false, @@ -1010,6 +1010,7 @@ } }, "mappedToolList": [ + "azureaibestpractices_get", "azureterraformbestpractices_get", "bicepschema_get", "get_bestpractices_get", diff --git a/eng/tools/ToolDescriptionEvaluator/prompts.json b/eng/tools/ToolDescriptionEvaluator/prompts.json index 700e0804e..638bac49c 100644 --- a/eng/tools/ToolDescriptionEvaluator/prompts.json +++ b/eng/tools/ToolDescriptionEvaluator/prompts.json @@ -9,9 +9,24 @@ "List all agents in my Azure AI Foundry resource", "Show me the available agents in my Azure AI Foundry resource" ], + "foundry_agents_create": [ + "Create a new Azure AI Foundry agent using instructions in the active editor" + ], "foundry_agents_query-and-evaluate": [ "Query and evaluate an agent in my Azure AI Foundry resource for task_adherence" ], + "foundry_agents_get-sdk-sample": [ + "Create a CLI app that can talk to an Azure AI Foundry Agent using Python SDK" + ], + "foundry_threads_create": [ + "Create an Azure AI Foundry thread to hold the conversation" + ], + "foundry_threads_list": [ + "List my AI Foundry threads" + ], + "foundry_threads_get-messages": [ + "Show me the messages in the AI Foundry thread with id " + ], "foundry_knowledge_index_list": [ "List all knowledge indexes in my AI Foundry project", "Show me the knowledge indexes in my AI Foundry project" @@ -526,6 +541,13 @@ "Search for Microsoft products in the marketplace", "Show me marketplace products from publisher " ], + "azureaibestpractices_get": [ + "Get best practices for building AI applications in Azure", + "Show me the best practices for Azure AI Foundry agents code generation", + "Get guidance for building agents with Azure AI Foundry", + "Create an AI app that helps me to manage travel queries.", + "Create an AI app that helps me to manage travel queries in Azure AI Foundry" + ], "get_bestpractices_get": [ "Get the latest Azure code generation best practices", "Get the latest Azure deployment best practices", diff --git a/eng/tools/ToolDescriptionEvaluator/results.md b/eng/tools/ToolDescriptionEvaluator/results.md index af8955b3e..d4c06c36d 100644 --- a/eng/tools/ToolDescriptionEvaluator/results.md +++ b/eng/tools/ToolDescriptionEvaluator/results.md @@ -1,15 +1,15 @@ # Tool Selection Analysis Setup -**Setup completed:** 2025-11-03 14:57:47 -**Tool count:** 173 -**Database setup time:** 1.2016078s +**Setup completed:** 2025-11-06 17:16:26 +**Tool count:** 179 +**Database setup time:** 32.4934401s --- # Tool Selection Analysis Results -**Analysis Date:** 2025-11-03 14:57:47 -**Tool count:** 173 +**Analysis Date:** 2025-11-06 17:16:26 +**Tool count:** 179 ## Table of Contents @@ -17,210 +17,210 @@ - [Test 2: foundry_agents_evaluate](#test-2) - [Test 3: foundry_agents_list](#test-3) - [Test 4: foundry_agents_list](#test-4) -- [Test 5: foundry_agents_query-and-evaluate](#test-5) -- [Test 6: foundry_knowledge_index_list](#test-6) -- [Test 7: foundry_knowledge_index_list](#test-7) -- [Test 8: foundry_knowledge_index_schema](#test-8) -- [Test 9: foundry_knowledge_index_schema](#test-9) -- [Test 10: foundry_models_deploy](#test-10) -- [Test 11: foundry_models_deployments_list](#test-11) -- [Test 12: foundry_models_deployments_list](#test-12) -- [Test 13: foundry_models_list](#test-13) -- [Test 14: foundry_models_list](#test-14) -- [Test 15: foundry_openai_chat-completions-create](#test-15) -- [Test 16: foundry_openai_create-completion](#test-16) -- [Test 17: foundry_openai_embeddings-create](#test-17) -- [Test 18: foundry_openai_embeddings-create](#test-18) -- [Test 19: foundry_openai_models-list](#test-19) -- [Test 20: foundry_openai_models-list](#test-20) -- [Test 21: foundry_resource_get](#test-21) -- [Test 22: foundry_resource_get](#test-22) -- [Test 23: foundry_resource_get](#test-23) -- [Test 24: search_knowledge_base_get](#test-24) -- [Test 25: search_knowledge_base_get](#test-25) -- [Test 26: search_knowledge_base_get](#test-26) -- [Test 27: search_knowledge_base_get](#test-27) -- [Test 28: search_knowledge_base_get](#test-28) +- [Test 5: foundry_agents_create](#test-5) +- [Test 6: foundry_agents_query-and-evaluate](#test-6) +- [Test 7: foundry_agents_get-sdk-sample](#test-7) +- [Test 8: foundry_threads_create](#test-8) +- [Test 9: foundry_threads_list](#test-9) +- [Test 10: foundry_threads_get-messages](#test-10) +- [Test 11: foundry_knowledge_index_list](#test-11) +- [Test 12: foundry_knowledge_index_list](#test-12) +- [Test 13: foundry_knowledge_index_schema](#test-13) +- [Test 14: foundry_knowledge_index_schema](#test-14) +- [Test 15: foundry_models_deploy](#test-15) +- [Test 16: foundry_models_deployments_list](#test-16) +- [Test 17: foundry_models_deployments_list](#test-17) +- [Test 18: foundry_models_list](#test-18) +- [Test 19: foundry_models_list](#test-19) +- [Test 20: foundry_openai_chat-completions-create](#test-20) +- [Test 21: foundry_openai_create-completion](#test-21) +- [Test 22: foundry_openai_embeddings-create](#test-22) +- [Test 23: foundry_openai_embeddings-create](#test-23) +- [Test 24: foundry_openai_models-list](#test-24) +- [Test 25: foundry_openai_models-list](#test-25) +- [Test 26: foundry_resource_get](#test-26) +- [Test 27: foundry_resource_get](#test-27) +- [Test 28: foundry_resource_get](#test-28) - [Test 29: search_knowledge_base_get](#test-29) -- [Test 30: search_knowledge_base_retrieve](#test-30) -- [Test 31: search_knowledge_base_retrieve](#test-31) -- [Test 32: search_knowledge_base_retrieve](#test-32) -- [Test 33: search_knowledge_base_retrieve](#test-33) -- [Test 34: search_knowledge_base_retrieve](#test-34) +- [Test 30: search_knowledge_base_get](#test-30) +- [Test 31: search_knowledge_base_get](#test-31) +- [Test 32: search_knowledge_base_get](#test-32) +- [Test 33: search_knowledge_base_get](#test-33) +- [Test 34: search_knowledge_base_get](#test-34) - [Test 35: search_knowledge_base_retrieve](#test-35) - [Test 36: search_knowledge_base_retrieve](#test-36) - [Test 37: search_knowledge_base_retrieve](#test-37) -- [Test 38: search_knowledge_source_get](#test-38) -- [Test 39: search_knowledge_source_get](#test-39) -- [Test 40: search_knowledge_source_get](#test-40) -- [Test 41: search_knowledge_source_get](#test-41) -- [Test 42: search_knowledge_source_get](#test-42) +- [Test 38: search_knowledge_base_retrieve](#test-38) +- [Test 39: search_knowledge_base_retrieve](#test-39) +- [Test 40: search_knowledge_base_retrieve](#test-40) +- [Test 41: search_knowledge_base_retrieve](#test-41) +- [Test 42: search_knowledge_base_retrieve](#test-42) - [Test 43: search_knowledge_source_get](#test-43) -- [Test 44: search_index_get](#test-44) -- [Test 45: search_index_get](#test-45) -- [Test 46: search_index_get](#test-46) -- [Test 47: search_index_query](#test-47) -- [Test 48: search_service_list](#test-48) -- [Test 49: search_service_list](#test-49) -- [Test 50: search_service_list](#test-50) -- [Test 51: speech_stt_recognize](#test-51) -- [Test 52: speech_stt_recognize](#test-52) -- [Test 53: speech_stt_recognize](#test-53) -- [Test 54: speech_stt_recognize](#test-54) -- [Test 55: speech_stt_recognize](#test-55) +- [Test 44: search_knowledge_source_get](#test-44) +- [Test 45: search_knowledge_source_get](#test-45) +- [Test 46: search_knowledge_source_get](#test-46) +- [Test 47: search_knowledge_source_get](#test-47) +- [Test 48: search_knowledge_source_get](#test-48) +- [Test 49: search_index_get](#test-49) +- [Test 50: search_index_get](#test-50) +- [Test 51: search_index_get](#test-51) +- [Test 52: search_index_query](#test-52) +- [Test 53: search_service_list](#test-53) +- [Test 54: search_service_list](#test-54) +- [Test 55: search_service_list](#test-55) - [Test 56: speech_stt_recognize](#test-56) - [Test 57: speech_stt_recognize](#test-57) - [Test 58: speech_stt_recognize](#test-58) - [Test 59: speech_stt_recognize](#test-59) - [Test 60: speech_stt_recognize](#test-60) -- [Test 61: appconfig_account_list](#test-61) -- [Test 62: appconfig_account_list](#test-62) -- [Test 63: appconfig_account_list](#test-63) -- [Test 64: appconfig_kv_delete](#test-64) -- [Test 65: appconfig_kv_get](#test-65) -- [Test 66: appconfig_kv_get](#test-66) -- [Test 67: appconfig_kv_get](#test-67) -- [Test 68: appconfig_kv_get](#test-68) -- [Test 69: appconfig_kv_lock_set](#test-69) -- [Test 70: appconfig_kv_lock_set](#test-70) -- [Test 71: appconfig_kv_set](#test-71) -- [Test 72: applens_resource_diagnose](#test-72) -- [Test 73: applens_resource_diagnose](#test-73) -- [Test 74: applens_resource_diagnose](#test-74) -- [Test 75: appservice_database_add](#test-75) -- [Test 76: appservice_database_add](#test-76) -- [Test 77: appservice_database_add](#test-77) -- [Test 78: appservice_database_add](#test-78) -- [Test 79: appservice_database_add](#test-79) +- [Test 61: speech_stt_recognize](#test-61) +- [Test 62: speech_stt_recognize](#test-62) +- [Test 63: speech_stt_recognize](#test-63) +- [Test 64: speech_stt_recognize](#test-64) +- [Test 65: speech_stt_recognize](#test-65) +- [Test 66: appconfig_account_list](#test-66) +- [Test 67: appconfig_account_list](#test-67) +- [Test 68: appconfig_account_list](#test-68) +- [Test 69: appconfig_kv_delete](#test-69) +- [Test 70: appconfig_kv_get](#test-70) +- [Test 71: appconfig_kv_get](#test-71) +- [Test 72: appconfig_kv_get](#test-72) +- [Test 73: appconfig_kv_get](#test-73) +- [Test 74: appconfig_kv_lock_set](#test-74) +- [Test 75: appconfig_kv_lock_set](#test-75) +- [Test 76: appconfig_kv_set](#test-76) +- [Test 77: applens_resource_diagnose](#test-77) +- [Test 78: applens_resource_diagnose](#test-78) +- [Test 79: applens_resource_diagnose](#test-79) - [Test 80: appservice_database_add](#test-80) - [Test 81: appservice_database_add](#test-81) - [Test 82: appservice_database_add](#test-82) - [Test 83: appservice_database_add](#test-83) - [Test 84: appservice_database_add](#test-84) -- [Test 85: applicationinsights_recommendation_list](#test-85) -- [Test 86: applicationinsights_recommendation_list](#test-86) -- [Test 87: applicationinsights_recommendation_list](#test-87) -- [Test 88: applicationinsights_recommendation_list](#test-88) -- [Test 89: extension_cli_generate](#test-89) -- [Test 90: extension_cli_generate](#test-90) -- [Test 91: extension_cli_generate](#test-91) -- [Test 92: extension_cli_install](#test-92) -- [Test 93: extension_cli_install](#test-93) -- [Test 94: extension_cli_install](#test-94) -- [Test 95: acr_registry_list](#test-95) -- [Test 96: acr_registry_list](#test-96) -- [Test 97: acr_registry_list](#test-97) -- [Test 98: acr_registry_list](#test-98) -- [Test 99: acr_registry_list](#test-99) -- [Test 100: acr_registry_repository_list](#test-100) -- [Test 101: acr_registry_repository_list](#test-101) -- [Test 102: acr_registry_repository_list](#test-102) -- [Test 103: acr_registry_repository_list](#test-103) -- [Test 104: communication_email_send](#test-104) -- [Test 105: communication_email_send](#test-105) -- [Test 106: communication_email_send](#test-106) -- [Test 107: communication_email_send](#test-107) -- [Test 108: communication_email_send](#test-108) +- [Test 85: appservice_database_add](#test-85) +- [Test 86: appservice_database_add](#test-86) +- [Test 87: appservice_database_add](#test-87) +- [Test 88: appservice_database_add](#test-88) +- [Test 89: appservice_database_add](#test-89) +- [Test 90: applicationinsights_recommendation_list](#test-90) +- [Test 91: applicationinsights_recommendation_list](#test-91) +- [Test 92: applicationinsights_recommendation_list](#test-92) +- [Test 93: applicationinsights_recommendation_list](#test-93) +- [Test 94: extension_cli_generate](#test-94) +- [Test 95: extension_cli_generate](#test-95) +- [Test 96: extension_cli_generate](#test-96) +- [Test 97: extension_cli_install](#test-97) +- [Test 98: extension_cli_install](#test-98) +- [Test 99: extension_cli_install](#test-99) +- [Test 100: acr_registry_list](#test-100) +- [Test 101: acr_registry_list](#test-101) +- [Test 102: acr_registry_list](#test-102) +- [Test 103: acr_registry_list](#test-103) +- [Test 104: acr_registry_list](#test-104) +- [Test 105: acr_registry_repository_list](#test-105) +- [Test 106: acr_registry_repository_list](#test-106) +- [Test 107: acr_registry_repository_list](#test-107) +- [Test 108: acr_registry_repository_list](#test-108) - [Test 109: communication_email_send](#test-109) - [Test 110: communication_email_send](#test-110) - [Test 111: communication_email_send](#test-111) -- [Test 112: communication_sms_send](#test-112) -- [Test 113: communication_sms_send](#test-113) -- [Test 114: communication_sms_send](#test-114) -- [Test 115: communication_sms_send](#test-115) -- [Test 116: communication_sms_send](#test-116) +- [Test 112: communication_email_send](#test-112) +- [Test 113: communication_email_send](#test-113) +- [Test 114: communication_email_send](#test-114) +- [Test 115: communication_email_send](#test-115) +- [Test 116: communication_email_send](#test-116) - [Test 117: communication_sms_send](#test-117) - [Test 118: communication_sms_send](#test-118) - [Test 119: communication_sms_send](#test-119) -- [Test 120: confidentialledger_entries_append](#test-120) -- [Test 121: confidentialledger_entries_append](#test-121) -- [Test 122: confidentialledger_entries_append](#test-122) -- [Test 123: confidentialledger_entries_append](#test-123) -- [Test 124: confidentialledger_entries_append](#test-124) -- [Test 125: confidentialledger_entries_get](#test-125) -- [Test 126: confidentialledger_entries_get](#test-126) -- [Test 127: cosmos_account_list](#test-127) -- [Test 128: cosmos_account_list](#test-128) -- [Test 129: cosmos_account_list](#test-129) -- [Test 130: cosmos_database_container_item_query](#test-130) -- [Test 131: cosmos_database_container_list](#test-131) -- [Test 132: cosmos_database_container_list](#test-132) -- [Test 133: cosmos_database_list](#test-133) -- [Test 134: cosmos_database_list](#test-134) -- [Test 135: kusto_cluster_get](#test-135) -- [Test 136: kusto_cluster_list](#test-136) -- [Test 137: kusto_cluster_list](#test-137) -- [Test 138: kusto_cluster_list](#test-138) -- [Test 139: kusto_database_list](#test-139) -- [Test 140: kusto_database_list](#test-140) -- [Test 141: kusto_query](#test-141) -- [Test 142: kusto_sample](#test-142) -- [Test 143: kusto_table_list](#test-143) -- [Test 144: kusto_table_list](#test-144) -- [Test 145: kusto_table_schema](#test-145) -- [Test 146: mysql_database_list](#test-146) -- [Test 147: mysql_database_list](#test-147) -- [Test 148: mysql_database_query](#test-148) -- [Test 149: mysql_server_config_get](#test-149) -- [Test 150: mysql_server_list](#test-150) -- [Test 151: mysql_server_list](#test-151) -- [Test 152: mysql_server_list](#test-152) -- [Test 153: mysql_server_param_get](#test-153) -- [Test 154: mysql_server_param_set](#test-154) -- [Test 155: mysql_table_list](#test-155) -- [Test 156: mysql_table_list](#test-156) -- [Test 157: mysql_table_schema_get](#test-157) -- [Test 158: postgres_database_list](#test-158) -- [Test 159: postgres_database_list](#test-159) -- [Test 160: postgres_database_query](#test-160) -- [Test 161: postgres_server_config_get](#test-161) -- [Test 162: postgres_server_list](#test-162) -- [Test 163: postgres_server_list](#test-163) -- [Test 164: postgres_server_list](#test-164) -- [Test 165: postgres_server_param_get](#test-165) -- [Test 166: postgres_server_param_set](#test-166) -- [Test 167: postgres_table_list](#test-167) -- [Test 168: postgres_table_list](#test-168) -- [Test 169: postgres_table_schema_get](#test-169) -- [Test 170: deploy_app_logs_get](#test-170) -- [Test 171: deploy_architecture_diagram_generate](#test-171) -- [Test 172: deploy_iac_rules_get](#test-172) -- [Test 173: deploy_pipeline_guidance_get](#test-173) -- [Test 174: deploy_plan_get](#test-174) -- [Test 175: eventgrid_events_publish](#test-175) -- [Test 176: eventgrid_events_publish](#test-176) -- [Test 177: eventgrid_events_publish](#test-177) -- [Test 178: eventgrid_topic_list](#test-178) -- [Test 179: eventgrid_topic_list](#test-179) -- [Test 180: eventgrid_topic_list](#test-180) -- [Test 181: eventgrid_topic_list](#test-181) -- [Test 182: eventgrid_subscription_list](#test-182) -- [Test 183: eventgrid_subscription_list](#test-183) -- [Test 184: eventgrid_subscription_list](#test-184) -- [Test 185: eventgrid_subscription_list](#test-185) -- [Test 186: eventgrid_subscription_list](#test-186) +- [Test 120: communication_sms_send](#test-120) +- [Test 121: communication_sms_send](#test-121) +- [Test 122: communication_sms_send](#test-122) +- [Test 123: communication_sms_send](#test-123) +- [Test 124: communication_sms_send](#test-124) +- [Test 125: confidentialledger_entries_append](#test-125) +- [Test 126: confidentialledger_entries_append](#test-126) +- [Test 127: confidentialledger_entries_append](#test-127) +- [Test 128: confidentialledger_entries_append](#test-128) +- [Test 129: confidentialledger_entries_append](#test-129) +- [Test 130: confidentialledger_entries_get](#test-130) +- [Test 131: confidentialledger_entries_get](#test-131) +- [Test 132: cosmos_account_list](#test-132) +- [Test 133: cosmos_account_list](#test-133) +- [Test 134: cosmos_account_list](#test-134) +- [Test 135: cosmos_database_container_item_query](#test-135) +- [Test 136: cosmos_database_container_list](#test-136) +- [Test 137: cosmos_database_container_list](#test-137) +- [Test 138: cosmos_database_list](#test-138) +- [Test 139: cosmos_database_list](#test-139) +- [Test 140: kusto_cluster_get](#test-140) +- [Test 141: kusto_cluster_list](#test-141) +- [Test 142: kusto_cluster_list](#test-142) +- [Test 143: kusto_cluster_list](#test-143) +- [Test 144: kusto_database_list](#test-144) +- [Test 145: kusto_database_list](#test-145) +- [Test 146: kusto_query](#test-146) +- [Test 147: kusto_sample](#test-147) +- [Test 148: kusto_table_list](#test-148) +- [Test 149: kusto_table_list](#test-149) +- [Test 150: kusto_table_schema](#test-150) +- [Test 151: mysql_database_list](#test-151) +- [Test 152: mysql_database_list](#test-152) +- [Test 153: mysql_database_query](#test-153) +- [Test 154: mysql_server_config_get](#test-154) +- [Test 155: mysql_server_list](#test-155) +- [Test 156: mysql_server_list](#test-156) +- [Test 157: mysql_server_list](#test-157) +- [Test 158: mysql_server_param_get](#test-158) +- [Test 159: mysql_server_param_set](#test-159) +- [Test 160: mysql_table_list](#test-160) +- [Test 161: mysql_table_list](#test-161) +- [Test 162: mysql_table_schema_get](#test-162) +- [Test 163: postgres_database_list](#test-163) +- [Test 164: postgres_database_list](#test-164) +- [Test 165: postgres_database_query](#test-165) +- [Test 166: postgres_server_config_get](#test-166) +- [Test 167: postgres_server_list](#test-167) +- [Test 168: postgres_server_list](#test-168) +- [Test 169: postgres_server_list](#test-169) +- [Test 170: postgres_server_param_get](#test-170) +- [Test 171: postgres_server_param_set](#test-171) +- [Test 172: postgres_table_list](#test-172) +- [Test 173: postgres_table_list](#test-173) +- [Test 174: postgres_table_schema_get](#test-174) +- [Test 175: deploy_app_logs_get](#test-175) +- [Test 176: deploy_architecture_diagram_generate](#test-176) +- [Test 177: deploy_iac_rules_get](#test-177) +- [Test 178: deploy_pipeline_guidance_get](#test-178) +- [Test 179: deploy_plan_get](#test-179) +- [Test 180: eventgrid_events_publish](#test-180) +- [Test 181: eventgrid_events_publish](#test-181) +- [Test 182: eventgrid_events_publish](#test-182) +- [Test 183: eventgrid_topic_list](#test-183) +- [Test 184: eventgrid_topic_list](#test-184) +- [Test 185: eventgrid_topic_list](#test-185) +- [Test 186: eventgrid_topic_list](#test-186) - [Test 187: eventgrid_subscription_list](#test-187) - [Test 188: eventgrid_subscription_list](#test-188) -- [Test 189: eventhubs_eventhub_consumergroup_delete](#test-189) -- [Test 190: eventhubs_eventhub_consumergroup_get](#test-190) -- [Test 191: eventhubs_eventhub_consumergroup_get](#test-191) -- [Test 192: eventhubs_eventhub_consumergroup_update](#test-192) -- [Test 193: eventhubs_eventhub_consumergroup_update](#test-193) -- [Test 194: eventhubs_eventhub_delete](#test-194) -- [Test 195: eventhubs_eventhub_get](#test-195) -- [Test 196: eventhubs_eventhub_get](#test-196) -- [Test 197: eventhubs_eventhub_update](#test-197) -- [Test 198: eventhubs_eventhub_update](#test-198) -- [Test 199: eventhubs_namespace_delete](#test-199) -- [Test 200: eventhubs_namespace_get](#test-200) -- [Test 201: eventhubs_namespace_get](#test-201) -- [Test 202: eventhubs_namespace_update](#test-202) -- [Test 203: eventhubs_namespace_update](#test-203) -- [Test 204: functionapp_get](#test-204) -- [Test 205: functionapp_get](#test-205) -- [Test 206: functionapp_get](#test-206) -- [Test 207: functionapp_get](#test-207) -- [Test 208: functionapp_get](#test-208) +- [Test 189: eventgrid_subscription_list](#test-189) +- [Test 190: eventgrid_subscription_list](#test-190) +- [Test 191: eventgrid_subscription_list](#test-191) +- [Test 192: eventgrid_subscription_list](#test-192) +- [Test 193: eventgrid_subscription_list](#test-193) +- [Test 194: eventhubs_eventhub_consumergroup_delete](#test-194) +- [Test 195: eventhubs_eventhub_consumergroup_get](#test-195) +- [Test 196: eventhubs_eventhub_consumergroup_get](#test-196) +- [Test 197: eventhubs_eventhub_consumergroup_update](#test-197) +- [Test 198: eventhubs_eventhub_consumergroup_update](#test-198) +- [Test 199: eventhubs_eventhub_delete](#test-199) +- [Test 200: eventhubs_eventhub_get](#test-200) +- [Test 201: eventhubs_eventhub_get](#test-201) +- [Test 202: eventhubs_eventhub_update](#test-202) +- [Test 203: eventhubs_eventhub_update](#test-203) +- [Test 204: eventhubs_namespace_delete](#test-204) +- [Test 205: eventhubs_namespace_get](#test-205) +- [Test 206: eventhubs_namespace_get](#test-206) +- [Test 207: eventhubs_namespace_update](#test-207) +- [Test 208: eventhubs_namespace_update](#test-208) - [Test 209: functionapp_get](#test-209) - [Test 210: functionapp_get](#test-210) - [Test 211: functionapp_get](#test-211) @@ -228,245 +228,255 @@ - [Test 213: functionapp_get](#test-213) - [Test 214: functionapp_get](#test-214) - [Test 215: functionapp_get](#test-215) -- [Test 216: keyvault_admin_settings_get](#test-216) -- [Test 217: keyvault_admin_settings_get](#test-217) -- [Test 218: keyvault_admin_settings_get](#test-218) -- [Test 219: keyvault_certificate_create](#test-219) -- [Test 220: keyvault_certificate_create](#test-220) -- [Test 221: keyvault_certificate_create](#test-221) -- [Test 222: keyvault_certificate_create](#test-222) -- [Test 223: keyvault_certificate_create](#test-223) -- [Test 224: keyvault_certificate_get](#test-224) -- [Test 225: keyvault_certificate_get](#test-225) -- [Test 226: keyvault_certificate_get](#test-226) -- [Test 227: keyvault_certificate_get](#test-227) -- [Test 228: keyvault_certificate_get](#test-228) -- [Test 229: keyvault_certificate_import](#test-229) -- [Test 230: keyvault_certificate_import](#test-230) -- [Test 231: keyvault_certificate_import](#test-231) -- [Test 232: keyvault_certificate_import](#test-232) -- [Test 233: keyvault_certificate_import](#test-233) -- [Test 234: keyvault_certificate_list](#test-234) -- [Test 235: keyvault_certificate_list](#test-235) -- [Test 236: keyvault_certificate_list](#test-236) -- [Test 237: keyvault_certificate_list](#test-237) -- [Test 238: keyvault_certificate_list](#test-238) +- [Test 216: functionapp_get](#test-216) +- [Test 217: functionapp_get](#test-217) +- [Test 218: functionapp_get](#test-218) +- [Test 219: functionapp_get](#test-219) +- [Test 220: functionapp_get](#test-220) +- [Test 221: keyvault_admin_settings_get](#test-221) +- [Test 222: keyvault_admin_settings_get](#test-222) +- [Test 223: keyvault_admin_settings_get](#test-223) +- [Test 224: keyvault_certificate_create](#test-224) +- [Test 225: keyvault_certificate_create](#test-225) +- [Test 226: keyvault_certificate_create](#test-226) +- [Test 227: keyvault_certificate_create](#test-227) +- [Test 228: keyvault_certificate_create](#test-228) +- [Test 229: keyvault_certificate_get](#test-229) +- [Test 230: keyvault_certificate_get](#test-230) +- [Test 231: keyvault_certificate_get](#test-231) +- [Test 232: keyvault_certificate_get](#test-232) +- [Test 233: keyvault_certificate_get](#test-233) +- [Test 234: keyvault_certificate_import](#test-234) +- [Test 235: keyvault_certificate_import](#test-235) +- [Test 236: keyvault_certificate_import](#test-236) +- [Test 237: keyvault_certificate_import](#test-237) +- [Test 238: keyvault_certificate_import](#test-238) - [Test 239: keyvault_certificate_list](#test-239) -- [Test 240: keyvault_key_create](#test-240) -- [Test 241: keyvault_key_create](#test-241) -- [Test 242: keyvault_key_create](#test-242) -- [Test 243: keyvault_key_create](#test-243) -- [Test 244: keyvault_key_create](#test-244) -- [Test 245: keyvault_key_get](#test-245) -- [Test 246: keyvault_key_get](#test-246) -- [Test 247: keyvault_key_get](#test-247) -- [Test 248: keyvault_key_get](#test-248) -- [Test 249: keyvault_key_get](#test-249) -- [Test 250: keyvault_key_list](#test-250) -- [Test 251: keyvault_key_list](#test-251) -- [Test 252: keyvault_key_list](#test-252) -- [Test 253: keyvault_key_list](#test-253) -- [Test 254: keyvault_key_list](#test-254) +- [Test 240: keyvault_certificate_list](#test-240) +- [Test 241: keyvault_certificate_list](#test-241) +- [Test 242: keyvault_certificate_list](#test-242) +- [Test 243: keyvault_certificate_list](#test-243) +- [Test 244: keyvault_certificate_list](#test-244) +- [Test 245: keyvault_key_create](#test-245) +- [Test 246: keyvault_key_create](#test-246) +- [Test 247: keyvault_key_create](#test-247) +- [Test 248: keyvault_key_create](#test-248) +- [Test 249: keyvault_key_create](#test-249) +- [Test 250: keyvault_key_get](#test-250) +- [Test 251: keyvault_key_get](#test-251) +- [Test 252: keyvault_key_get](#test-252) +- [Test 253: keyvault_key_get](#test-253) +- [Test 254: keyvault_key_get](#test-254) - [Test 255: keyvault_key_list](#test-255) -- [Test 256: keyvault_secret_create](#test-256) -- [Test 257: keyvault_secret_create](#test-257) -- [Test 258: keyvault_secret_create](#test-258) -- [Test 259: keyvault_secret_create](#test-259) -- [Test 260: keyvault_secret_create](#test-260) -- [Test 261: keyvault_secret_get](#test-261) -- [Test 262: keyvault_secret_get](#test-262) -- [Test 263: keyvault_secret_get](#test-263) -- [Test 264: keyvault_secret_get](#test-264) -- [Test 265: keyvault_secret_get](#test-265) -- [Test 266: keyvault_secret_list](#test-266) -- [Test 267: keyvault_secret_list](#test-267) -- [Test 268: keyvault_secret_list](#test-268) -- [Test 269: keyvault_secret_list](#test-269) -- [Test 270: keyvault_secret_list](#test-270) +- [Test 256: keyvault_key_list](#test-256) +- [Test 257: keyvault_key_list](#test-257) +- [Test 258: keyvault_key_list](#test-258) +- [Test 259: keyvault_key_list](#test-259) +- [Test 260: keyvault_key_list](#test-260) +- [Test 261: keyvault_secret_create](#test-261) +- [Test 262: keyvault_secret_create](#test-262) +- [Test 263: keyvault_secret_create](#test-263) +- [Test 264: keyvault_secret_create](#test-264) +- [Test 265: keyvault_secret_create](#test-265) +- [Test 266: keyvault_secret_get](#test-266) +- [Test 267: keyvault_secret_get](#test-267) +- [Test 268: keyvault_secret_get](#test-268) +- [Test 269: keyvault_secret_get](#test-269) +- [Test 270: keyvault_secret_get](#test-270) - [Test 271: keyvault_secret_list](#test-271) -- [Test 272: aks_cluster_get](#test-272) -- [Test 273: aks_cluster_get](#test-273) -- [Test 274: aks_cluster_get](#test-274) -- [Test 275: aks_cluster_get](#test-275) -- [Test 276: aks_cluster_get](#test-276) +- [Test 272: keyvault_secret_list](#test-272) +- [Test 273: keyvault_secret_list](#test-273) +- [Test 274: keyvault_secret_list](#test-274) +- [Test 275: keyvault_secret_list](#test-275) +- [Test 276: keyvault_secret_list](#test-276) - [Test 277: aks_cluster_get](#test-277) - [Test 278: aks_cluster_get](#test-278) -- [Test 279: aks_nodepool_get](#test-279) -- [Test 280: aks_nodepool_get](#test-280) -- [Test 281: aks_nodepool_get](#test-281) -- [Test 282: aks_nodepool_get](#test-282) -- [Test 283: aks_nodepool_get](#test-283) +- [Test 279: aks_cluster_get](#test-279) +- [Test 280: aks_cluster_get](#test-280) +- [Test 281: aks_cluster_get](#test-281) +- [Test 282: aks_cluster_get](#test-282) +- [Test 283: aks_cluster_get](#test-283) - [Test 284: aks_nodepool_get](#test-284) -- [Test 285: loadtesting_test_create](#test-285) -- [Test 286: loadtesting_test_get](#test-286) -- [Test 287: loadtesting_testresource_create](#test-287) -- [Test 288: loadtesting_testresource_list](#test-288) -- [Test 289: loadtesting_testrun_create](#test-289) -- [Test 290: loadtesting_testrun_get](#test-290) -- [Test 291: loadtesting_testrun_list](#test-291) -- [Test 292: loadtesting_testrun_update](#test-292) -- [Test 293: grafana_list](#test-293) -- [Test 294: managedlustre_fs_create](#test-294) -- [Test 295: managedlustre_fs_list](#test-295) -- [Test 296: managedlustre_fs_list](#test-296) -- [Test 297: managedlustre_fs_sku_get](#test-297) -- [Test 298: managedlustre_fs_subnetsize_ask](#test-298) -- [Test 299: managedlustre_fs_subnetsize_validate](#test-299) -- [Test 300: managedlustre_fs_update](#test-300) -- [Test 301: marketplace_product_get](#test-301) -- [Test 302: marketplace_product_list](#test-302) -- [Test 303: marketplace_product_list](#test-303) -- [Test 304: get_bestpractices_get](#test-304) -- [Test 305: get_bestpractices_get](#test-305) -- [Test 306: get_bestpractices_get](#test-306) -- [Test 307: get_bestpractices_get](#test-307) -- [Test 308: get_bestpractices_get](#test-308) -- [Test 309: get_bestpractices_get](#test-309) -- [Test 310: get_bestpractices_get](#test-310) -- [Test 311: get_bestpractices_get](#test-311) -- [Test 312: get_bestpractices_get](#test-312) -- [Test 313: monitor_activitylog_list](#test-313) -- [Test 314: monitor_healthmodels_entity_get](#test-314) -- [Test 315: monitor_metrics_definitions](#test-315) -- [Test 316: monitor_metrics_definitions](#test-316) -- [Test 317: monitor_metrics_definitions](#test-317) -- [Test 318: monitor_metrics_query](#test-318) -- [Test 319: monitor_metrics_query](#test-319) -- [Test 320: monitor_metrics_query](#test-320) -- [Test 321: monitor_metrics_query](#test-321) -- [Test 322: monitor_metrics_query](#test-322) -- [Test 323: monitor_metrics_query](#test-323) -- [Test 324: monitor_resource_log_query](#test-324) -- [Test 325: monitor_table_list](#test-325) -- [Test 326: monitor_table_list](#test-326) -- [Test 327: monitor_table_type_list](#test-327) -- [Test 328: monitor_table_type_list](#test-328) -- [Test 329: monitor_webtests_create](#test-329) -- [Test 330: monitor_webtests_get](#test-330) -- [Test 331: monitor_webtests_list](#test-331) -- [Test 332: monitor_webtests_list](#test-332) -- [Test 333: monitor_webtests_update](#test-333) -- [Test 334: monitor_workspace_list](#test-334) -- [Test 335: monitor_workspace_list](#test-335) -- [Test 336: monitor_workspace_list](#test-336) -- [Test 337: monitor_workspace_log_query](#test-337) -- [Test 338: datadog_monitoredresources_list](#test-338) -- [Test 339: datadog_monitoredresources_list](#test-339) -- [Test 340: extension_azqr](#test-340) -- [Test 341: extension_azqr](#test-341) -- [Test 342: extension_azqr](#test-342) -- [Test 343: quota_region_availability_list](#test-343) -- [Test 344: quota_usage_check](#test-344) -- [Test 345: role_assignment_list](#test-345) -- [Test 346: role_assignment_list](#test-346) -- [Test 347: redis_list](#test-347) -- [Test 348: redis_list](#test-348) -- [Test 349: redis_list](#test-349) -- [Test 350: redis_list](#test-350) -- [Test 351: redis_list](#test-351) -- [Test 352: group_list](#test-352) -- [Test 353: group_list](#test-353) -- [Test 354: group_list](#test-354) -- [Test 355: resourcehealth_availability-status_get](#test-355) -- [Test 356: resourcehealth_availability-status_get](#test-356) -- [Test 357: resourcehealth_availability-status_get](#test-357) -- [Test 358: resourcehealth_availability-status_list](#test-358) -- [Test 359: resourcehealth_availability-status_list](#test-359) -- [Test 360: resourcehealth_availability-status_list](#test-360) -- [Test 361: resourcehealth_health-events_list](#test-361) -- [Test 362: resourcehealth_health-events_list](#test-362) -- [Test 363: resourcehealth_health-events_list](#test-363) -- [Test 364: resourcehealth_health-events_list](#test-364) -- [Test 365: resourcehealth_health-events_list](#test-365) -- [Test 366: servicebus_queue_details](#test-366) -- [Test 367: servicebus_topic_details](#test-367) -- [Test 368: servicebus_topic_subscription_details](#test-368) -- [Test 369: signalr_runtime_get](#test-369) -- [Test 370: signalr_runtime_get](#test-370) -- [Test 371: signalr_runtime_get](#test-371) -- [Test 372: signalr_runtime_get](#test-372) -- [Test 373: signalr_runtime_get](#test-373) -- [Test 374: signalr_runtime_get](#test-374) -- [Test 375: sql_db_create](#test-375) -- [Test 376: sql_db_create](#test-376) -- [Test 377: sql_db_create](#test-377) -- [Test 378: sql_db_delete](#test-378) -- [Test 379: sql_db_delete](#test-379) -- [Test 380: sql_db_delete](#test-380) -- [Test 381: sql_db_list](#test-381) -- [Test 382: sql_db_list](#test-382) -- [Test 383: sql_db_rename](#test-383) -- [Test 384: sql_db_rename](#test-384) -- [Test 385: sql_db_show](#test-385) -- [Test 386: sql_db_show](#test-386) -- [Test 387: sql_db_update](#test-387) -- [Test 388: sql_db_update](#test-388) -- [Test 389: sql_elastic-pool_list](#test-389) -- [Test 390: sql_elastic-pool_list](#test-390) -- [Test 391: sql_elastic-pool_list](#test-391) -- [Test 392: sql_server_create](#test-392) -- [Test 393: sql_server_create](#test-393) -- [Test 394: sql_server_create](#test-394) -- [Test 395: sql_server_delete](#test-395) -- [Test 396: sql_server_delete](#test-396) -- [Test 397: sql_server_delete](#test-397) -- [Test 398: sql_server_entra-admin_list](#test-398) -- [Test 399: sql_server_entra-admin_list](#test-399) -- [Test 400: sql_server_entra-admin_list](#test-400) -- [Test 401: sql_server_firewall-rule_create](#test-401) -- [Test 402: sql_server_firewall-rule_create](#test-402) -- [Test 403: sql_server_firewall-rule_create](#test-403) -- [Test 404: sql_server_firewall-rule_delete](#test-404) -- [Test 405: sql_server_firewall-rule_delete](#test-405) -- [Test 406: sql_server_firewall-rule_delete](#test-406) -- [Test 407: sql_server_firewall-rule_list](#test-407) -- [Test 408: sql_server_firewall-rule_list](#test-408) -- [Test 409: sql_server_firewall-rule_list](#test-409) -- [Test 410: sql_server_list](#test-410) -- [Test 411: sql_server_list](#test-411) -- [Test 412: sql_server_show](#test-412) -- [Test 413: sql_server_show](#test-413) -- [Test 414: sql_server_show](#test-414) -- [Test 415: storage_account_create](#test-415) -- [Test 416: storage_account_create](#test-416) -- [Test 417: storage_account_create](#test-417) -- [Test 418: storage_account_get](#test-418) -- [Test 419: storage_account_get](#test-419) -- [Test 420: storage_account_get](#test-420) -- [Test 421: storage_account_get](#test-421) -- [Test 422: storage_account_get](#test-422) -- [Test 423: storage_blob_container_create](#test-423) -- [Test 424: storage_blob_container_create](#test-424) -- [Test 425: storage_blob_container_create](#test-425) -- [Test 426: storage_blob_container_get](#test-426) -- [Test 427: storage_blob_container_get](#test-427) -- [Test 428: storage_blob_container_get](#test-428) -- [Test 429: storage_blob_get](#test-429) -- [Test 430: storage_blob_get](#test-430) -- [Test 431: storage_blob_get](#test-431) -- [Test 432: storage_blob_get](#test-432) -- [Test 433: storage_blob_upload](#test-433) -- [Test 434: subscription_list](#test-434) -- [Test 435: subscription_list](#test-435) -- [Test 436: subscription_list](#test-436) -- [Test 437: subscription_list](#test-437) -- [Test 438: azureterraformbestpractices_get](#test-438) -- [Test 439: azureterraformbestpractices_get](#test-439) -- [Test 440: virtualdesktop_hostpool_list](#test-440) -- [Test 441: virtualdesktop_hostpool_host_list](#test-441) -- [Test 442: virtualdesktop_hostpool_host_user-list](#test-442) -- [Test 443: workbooks_create](#test-443) -- [Test 444: workbooks_delete](#test-444) -- [Test 445: workbooks_list](#test-445) -- [Test 446: workbooks_list](#test-446) -- [Test 447: workbooks_show](#test-447) -- [Test 448: workbooks_show](#test-448) -- [Test 449: workbooks_update](#test-449) -- [Test 450: bicepschema_get](#test-450) -- [Test 451: cloudarchitect_design](#test-451) -- [Test 452: cloudarchitect_design](#test-452) -- [Test 453: cloudarchitect_design](#test-453) -- [Test 454: cloudarchitect_design](#test-454) +- [Test 285: aks_nodepool_get](#test-285) +- [Test 286: aks_nodepool_get](#test-286) +- [Test 287: aks_nodepool_get](#test-287) +- [Test 288: aks_nodepool_get](#test-288) +- [Test 289: aks_nodepool_get](#test-289) +- [Test 290: loadtesting_test_create](#test-290) +- [Test 291: loadtesting_test_get](#test-291) +- [Test 292: loadtesting_testresource_create](#test-292) +- [Test 293: loadtesting_testresource_list](#test-293) +- [Test 294: loadtesting_testrun_create](#test-294) +- [Test 295: loadtesting_testrun_get](#test-295) +- [Test 296: loadtesting_testrun_list](#test-296) +- [Test 297: loadtesting_testrun_update](#test-297) +- [Test 298: grafana_list](#test-298) +- [Test 299: managedlustre_fs_create](#test-299) +- [Test 300: managedlustre_fs_list](#test-300) +- [Test 301: managedlustre_fs_list](#test-301) +- [Test 302: managedlustre_fs_sku_get](#test-302) +- [Test 303: managedlustre_fs_subnetsize_ask](#test-303) +- [Test 304: managedlustre_fs_subnetsize_validate](#test-304) +- [Test 305: managedlustre_fs_update](#test-305) +- [Test 306: marketplace_product_get](#test-306) +- [Test 307: marketplace_product_list](#test-307) +- [Test 308: marketplace_product_list](#test-308) +- [Test 309: azureaibestpractices_get](#test-309) +- [Test 310: azureaibestpractices_get](#test-310) +- [Test 311: azureaibestpractices_get](#test-311) +- [Test 312: azureaibestpractices_get](#test-312) +- [Test 313: azureaibestpractices_get](#test-313) +- [Test 314: get_bestpractices_get](#test-314) +- [Test 315: get_bestpractices_get](#test-315) +- [Test 316: get_bestpractices_get](#test-316) +- [Test 317: get_bestpractices_get](#test-317) +- [Test 318: get_bestpractices_get](#test-318) +- [Test 319: get_bestpractices_get](#test-319) +- [Test 320: get_bestpractices_get](#test-320) +- [Test 321: get_bestpractices_get](#test-321) +- [Test 322: get_bestpractices_get](#test-322) +- [Test 323: monitor_activitylog_list](#test-323) +- [Test 324: monitor_healthmodels_entity_get](#test-324) +- [Test 325: monitor_metrics_definitions](#test-325) +- [Test 326: monitor_metrics_definitions](#test-326) +- [Test 327: monitor_metrics_definitions](#test-327) +- [Test 328: monitor_metrics_query](#test-328) +- [Test 329: monitor_metrics_query](#test-329) +- [Test 330: monitor_metrics_query](#test-330) +- [Test 331: monitor_metrics_query](#test-331) +- [Test 332: monitor_metrics_query](#test-332) +- [Test 333: monitor_metrics_query](#test-333) +- [Test 334: monitor_resource_log_query](#test-334) +- [Test 335: monitor_table_list](#test-335) +- [Test 336: monitor_table_list](#test-336) +- [Test 337: monitor_table_type_list](#test-337) +- [Test 338: monitor_table_type_list](#test-338) +- [Test 339: monitor_webtests_create](#test-339) +- [Test 340: monitor_webtests_get](#test-340) +- [Test 341: monitor_webtests_list](#test-341) +- [Test 342: monitor_webtests_list](#test-342) +- [Test 343: monitor_webtests_update](#test-343) +- [Test 344: monitor_workspace_list](#test-344) +- [Test 345: monitor_workspace_list](#test-345) +- [Test 346: monitor_workspace_list](#test-346) +- [Test 347: monitor_workspace_log_query](#test-347) +- [Test 348: datadog_monitoredresources_list](#test-348) +- [Test 349: datadog_monitoredresources_list](#test-349) +- [Test 350: extension_azqr](#test-350) +- [Test 351: extension_azqr](#test-351) +- [Test 352: extension_azqr](#test-352) +- [Test 353: quota_region_availability_list](#test-353) +- [Test 354: quota_usage_check](#test-354) +- [Test 355: role_assignment_list](#test-355) +- [Test 356: role_assignment_list](#test-356) +- [Test 357: redis_list](#test-357) +- [Test 358: redis_list](#test-358) +- [Test 359: redis_list](#test-359) +- [Test 360: redis_list](#test-360) +- [Test 361: redis_list](#test-361) +- [Test 362: group_list](#test-362) +- [Test 363: group_list](#test-363) +- [Test 364: group_list](#test-364) +- [Test 365: resourcehealth_availability-status_get](#test-365) +- [Test 366: resourcehealth_availability-status_get](#test-366) +- [Test 367: resourcehealth_availability-status_get](#test-367) +- [Test 368: resourcehealth_availability-status_list](#test-368) +- [Test 369: resourcehealth_availability-status_list](#test-369) +- [Test 370: resourcehealth_availability-status_list](#test-370) +- [Test 371: resourcehealth_health-events_list](#test-371) +- [Test 372: resourcehealth_health-events_list](#test-372) +- [Test 373: resourcehealth_health-events_list](#test-373) +- [Test 374: resourcehealth_health-events_list](#test-374) +- [Test 375: resourcehealth_health-events_list](#test-375) +- [Test 376: servicebus_queue_details](#test-376) +- [Test 377: servicebus_topic_details](#test-377) +- [Test 378: servicebus_topic_subscription_details](#test-378) +- [Test 379: signalr_runtime_get](#test-379) +- [Test 380: signalr_runtime_get](#test-380) +- [Test 381: signalr_runtime_get](#test-381) +- [Test 382: signalr_runtime_get](#test-382) +- [Test 383: signalr_runtime_get](#test-383) +- [Test 384: signalr_runtime_get](#test-384) +- [Test 385: sql_db_create](#test-385) +- [Test 386: sql_db_create](#test-386) +- [Test 387: sql_db_create](#test-387) +- [Test 388: sql_db_delete](#test-388) +- [Test 389: sql_db_delete](#test-389) +- [Test 390: sql_db_delete](#test-390) +- [Test 391: sql_db_list](#test-391) +- [Test 392: sql_db_list](#test-392) +- [Test 393: sql_db_rename](#test-393) +- [Test 394: sql_db_rename](#test-394) +- [Test 395: sql_db_show](#test-395) +- [Test 396: sql_db_show](#test-396) +- [Test 397: sql_db_update](#test-397) +- [Test 398: sql_db_update](#test-398) +- [Test 399: sql_elastic-pool_list](#test-399) +- [Test 400: sql_elastic-pool_list](#test-400) +- [Test 401: sql_elastic-pool_list](#test-401) +- [Test 402: sql_server_create](#test-402) +- [Test 403: sql_server_create](#test-403) +- [Test 404: sql_server_create](#test-404) +- [Test 405: sql_server_delete](#test-405) +- [Test 406: sql_server_delete](#test-406) +- [Test 407: sql_server_delete](#test-407) +- [Test 408: sql_server_entra-admin_list](#test-408) +- [Test 409: sql_server_entra-admin_list](#test-409) +- [Test 410: sql_server_entra-admin_list](#test-410) +- [Test 411: sql_server_firewall-rule_create](#test-411) +- [Test 412: sql_server_firewall-rule_create](#test-412) +- [Test 413: sql_server_firewall-rule_create](#test-413) +- [Test 414: sql_server_firewall-rule_delete](#test-414) +- [Test 415: sql_server_firewall-rule_delete](#test-415) +- [Test 416: sql_server_firewall-rule_delete](#test-416) +- [Test 417: sql_server_firewall-rule_list](#test-417) +- [Test 418: sql_server_firewall-rule_list](#test-418) +- [Test 419: sql_server_firewall-rule_list](#test-419) +- [Test 420: sql_server_list](#test-420) +- [Test 421: sql_server_list](#test-421) +- [Test 422: sql_server_show](#test-422) +- [Test 423: sql_server_show](#test-423) +- [Test 424: sql_server_show](#test-424) +- [Test 425: storage_account_create](#test-425) +- [Test 426: storage_account_create](#test-426) +- [Test 427: storage_account_create](#test-427) +- [Test 428: storage_account_get](#test-428) +- [Test 429: storage_account_get](#test-429) +- [Test 430: storage_account_get](#test-430) +- [Test 431: storage_account_get](#test-431) +- [Test 432: storage_account_get](#test-432) +- [Test 433: storage_blob_container_create](#test-433) +- [Test 434: storage_blob_container_create](#test-434) +- [Test 435: storage_blob_container_create](#test-435) +- [Test 436: storage_blob_container_get](#test-436) +- [Test 437: storage_blob_container_get](#test-437) +- [Test 438: storage_blob_container_get](#test-438) +- [Test 439: storage_blob_get](#test-439) +- [Test 440: storage_blob_get](#test-440) +- [Test 441: storage_blob_get](#test-441) +- [Test 442: storage_blob_get](#test-442) +- [Test 443: storage_blob_upload](#test-443) +- [Test 444: subscription_list](#test-444) +- [Test 445: subscription_list](#test-445) +- [Test 446: subscription_list](#test-446) +- [Test 447: subscription_list](#test-447) +- [Test 448: azureterraformbestpractices_get](#test-448) +- [Test 449: azureterraformbestpractices_get](#test-449) +- [Test 450: virtualdesktop_hostpool_list](#test-450) +- [Test 451: virtualdesktop_hostpool_host_list](#test-451) +- [Test 452: virtualdesktop_hostpool_host_user-list](#test-452) +- [Test 453: workbooks_create](#test-453) +- [Test 454: workbooks_delete](#test-454) +- [Test 455: workbooks_list](#test-455) +- [Test 456: workbooks_list](#test-456) +- [Test 457: workbooks_show](#test-457) +- [Test 458: workbooks_show](#test-458) +- [Test 459: workbooks_update](#test-459) +- [Test 460: bicepschema_get](#test-460) +- [Test 461: cloudarchitect_design](#test-461) +- [Test 462: cloudarchitect_design](#test-462) +- [Test 463: cloudarchitect_design](#test-463) +- [Test 464: cloudarchitect_design](#test-464) --- @@ -480,10 +490,10 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.705410 | `foundry_agents_connect` | ✅ **EXPECTED** | -| 2 | 0.663568 | `foundry_agents_list` | ❌ | +| 2 | 0.663468 | `foundry_agents_list` | ❌ | | 3 | 0.617213 | `foundry_resource_get` | ❌ | | 4 | 0.548044 | `foundry_openai_models-list` | ❌ | -| 5 | 0.537580 | `foundry_agents_query-and-evaluate` | ❌ | +| 5 | 0.547459 | `foundry_agents_get-sdk-sample` | ❌ | --- @@ -496,11 +506,11 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.544099 | `foundry_agents_query-and-evaluate` | ❌ | -| 2 | 0.469428 | `foundry_agents_evaluate` | ✅ **EXPECTED** | -| 3 | 0.445964 | `foundry_agents_connect` | ❌ | -| 4 | 0.278921 | `foundry_agents_list` | ❌ | -| 5 | 0.250023 | `monitor_workspace_log_query` | ❌ | +| 1 | 0.543045 | `foundry_agents_query-and-evaluate` | ❌ | +| 2 | 0.469272 | `foundry_agents_evaluate` | ✅ **EXPECTED** | +| 3 | 0.445585 | `foundry_agents_connect` | ❌ | +| 4 | 0.298494 | `foundry_threads_list` | ❌ | +| 5 | 0.279058 | `foundry_agents_list` | ❌ | --- @@ -513,11 +523,11 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.797877 | `foundry_agents_list` | ✅ **EXPECTED** | +| 1 | 0.797701 | `foundry_agents_list` | ✅ **EXPECTED** | | 2 | 0.666021 | `foundry_resource_get` | ❌ | | 3 | 0.654206 | `foundry_openai_models-list` | ❌ | -| 4 | 0.575553 | `foundry_models_deployments_list` | ❌ | -| 5 | 0.561946 | `search_service_list` | ❌ | +| 4 | 0.647246 | `foundry_threads_list` | ❌ | +| 5 | 0.575761 | `foundry_models_deployments_list` | ❌ | --- @@ -530,16 +540,33 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.749829 | `foundry_agents_list` | ✅ **EXPECTED** | -| 2 | 0.630288 | `foundry_resource_get` | ❌ | -| 3 | 0.611722 | `foundry_openai_models-list` | ❌ | -| 4 | 0.548511 | `foundry_agents_connect` | ❌ | -| 5 | 0.535020 | `foundry_models_list` | ❌ | +| 1 | 0.749704 | `foundry_agents_list` | ✅ **EXPECTED** | +| 2 | 0.630323 | `foundry_resource_get` | ❌ | +| 3 | 0.611801 | `foundry_openai_models-list` | ❌ | +| 4 | 0.603708 | `foundry_threads_list` | ❌ | +| 5 | 0.556580 | `foundry_agents_get-sdk-sample` | ❌ | --- ## Test 5 +**Expected Tool:** `foundry_agents_create` +**Prompt:** Create a new Azure AI Foundry agent using instructions in the active editor + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.587064 | `foundry_agents_create` | ✅ **EXPECTED** | +| 2 | 0.561567 | `foundry_agents_get-sdk-sample` | ❌ | +| 3 | 0.554070 | `foundry_threads_create` | ❌ | +| 4 | 0.525727 | `foundry_models_deploy` | ❌ | +| 5 | 0.525461 | `foundry_agents_list` | ❌ | + +--- + +## Test 6 + **Expected Tool:** `foundry_agents_query-and-evaluate` **Prompt:** Query and evaluate an agent in my Azure AI Foundry resource for task_adherence @@ -548,14 +575,82 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.652200 | `foundry_agents_connect` | ❌ | -| 2 | 0.570788 | `foundry_agents_list` | ❌ | -| 3 | 0.553190 | `foundry_agents_query-and-evaluate` | ✅ **EXPECTED** | -| 4 | 0.493779 | `foundry_agents_evaluate` | ❌ | -| 5 | 0.460662 | `foundry_resource_get` | ❌ | +| 2 | 0.570725 | `foundry_agents_list` | ❌ | +| 3 | 0.553233 | `foundry_agents_query-and-evaluate` | ✅ **EXPECTED** | +| 4 | 0.493778 | `foundry_agents_evaluate` | ❌ | +| 5 | 0.469431 | `foundry_threads_list` | ❌ | --- -## Test 6 +## Test 7 + +**Expected Tool:** `foundry_agents_get-sdk-sample` +**Prompt:** Create a CLI app that can talk to an Azure AI Foundry Agent using Python SDK + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.595581 | `foundry_agents_get-sdk-sample` | ✅ **EXPECTED** | +| 2 | 0.552197 | `foundry_threads_create` | ❌ | +| 3 | 0.521920 | `foundry_agents_connect` | ❌ | +| 4 | 0.518552 | `foundry_agents_create` | ❌ | +| 5 | 0.509581 | `foundry_agents_list` | ❌ | + +--- + +## Test 8 + +**Expected Tool:** `foundry_threads_create` +**Prompt:** Create an Azure AI Foundry thread to hold the conversation + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.606811 | `foundry_threads_create` | ✅ **EXPECTED** | +| 2 | 0.528310 | `foundry_openai_chat-completions-create` | ❌ | +| 3 | 0.519709 | `foundry_threads_get-messages` | ❌ | +| 4 | 0.506089 | `foundry_threads_list` | ❌ | +| 5 | 0.490796 | `foundry_models_deploy` | ❌ | + +--- + +## Test 9 + +**Expected Tool:** `foundry_threads_list` +**Prompt:** List my AI Foundry threads + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.677249 | `foundry_threads_list` | ✅ **EXPECTED** | +| 2 | 0.574068 | `foundry_threads_get-messages` | ❌ | +| 3 | 0.566999 | `foundry_threads_create` | ❌ | +| 4 | 0.471737 | `foundry_agents_get-sdk-sample` | ❌ | +| 5 | 0.448682 | `foundry_agents_list` | ❌ | + +--- + +## Test 10 + +**Expected Tool:** `foundry_threads_get-messages` +**Prompt:** Show me the messages in the AI Foundry thread with id + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.669937 | `foundry_threads_get-messages` | ✅ **EXPECTED** | +| 2 | 0.584431 | `foundry_threads_create` | ❌ | +| 3 | 0.529381 | `foundry_threads_list` | ❌ | +| 4 | 0.437894 | `foundry_agents_get-sdk-sample` | ❌ | +| 5 | 0.427894 | `foundry_agents_create` | ❌ | + +--- + +## Test 11 **Expected Tool:** `foundry_knowledge_index_list` **Prompt:** List all knowledge indexes in my AI Foundry project @@ -565,14 +660,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.703772 | `foundry_knowledge_index_list` | ✅ **EXPECTED** | -| 2 | 0.537700 | `foundry_agents_list` | ❌ | +| 2 | 0.537540 | `foundry_agents_list` | ❌ | | 3 | 0.526528 | `foundry_knowledge_index_schema` | ❌ | -| 4 | 0.475802 | `foundry_models_deployments_list` | ❌ | -| 5 | 0.471613 | `foundry_openai_models-list` | ❌ | +| 4 | 0.500786 | `foundry_threads_list` | ❌ | +| 5 | 0.475746 | `foundry_models_deployments_list` | ❌ | --- -## Test 7 +## Test 12 **Expected Tool:** `foundry_knowledge_index_list` **Prompt:** Show me the knowledge indexes in my AI Foundry project @@ -581,15 +676,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.615468 | `foundry_knowledge_index_list` | ✅ **EXPECTED** | -| 2 | 0.489350 | `foundry_knowledge_index_schema` | ❌ | -| 3 | 0.484482 | `foundry_agents_list` | ❌ | -| 4 | 0.441545 | `foundry_resource_get` | ❌ | -| 5 | 0.431361 | `foundry_models_deployments_list` | ❌ | +| 1 | 0.615458 | `foundry_knowledge_index_list` | ✅ **EXPECTED** | +| 2 | 0.489311 | `foundry_knowledge_index_schema` | ❌ | +| 3 | 0.484329 | `foundry_agents_list` | ❌ | +| 4 | 0.454174 | `foundry_threads_list` | ❌ | +| 5 | 0.441521 | `foundry_resource_get` | ❌ | --- -## Test 8 +## Test 13 **Expected Tool:** `foundry_knowledge_index_schema` **Prompt:** Show me the schema for knowledge index in my Azure AI Foundry resource @@ -606,7 +701,7 @@ --- -## Test 9 +## Test 14 **Expected Tool:** `foundry_knowledge_index_schema` **Prompt:** Get the schema configuration for knowledge index @@ -615,15 +710,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.650269 | `foundry_knowledge_index_schema` | ✅ **EXPECTED** | -| 2 | 0.432759 | `postgres_table_schema_get` | ❌ | -| 3 | 0.417421 | `kusto_table_schema` | ❌ | -| 4 | 0.398186 | `mysql_table_schema_get` | ❌ | -| 5 | 0.396194 | `foundry_knowledge_index_list` | ❌ | +| 1 | 0.650203 | `foundry_knowledge_index_schema` | ✅ **EXPECTED** | +| 2 | 0.432792 | `postgres_table_schema_get` | ❌ | +| 3 | 0.417496 | `kusto_table_schema` | ❌ | +| 4 | 0.398322 | `mysql_table_schema_get` | ❌ | +| 5 | 0.396119 | `foundry_knowledge_index_list` | ❌ | --- -## Test 10 +## Test 15 **Expected Tool:** `foundry_models_deploy` **Prompt:** Deploy a GPT4o instance on my resource @@ -636,11 +731,11 @@ | 2 | 0.299986 | `foundry_openai_models-list` | ❌ | | 3 | 0.298490 | `loadtesting_testrun_create` | ❌ | | 4 | 0.293050 | `loadtesting_testresource_create` | ❌ | -| 5 | 0.290381 | `foundry_openai_embeddings-create` | ❌ | +| 5 | 0.290387 | `foundry_openai_embeddings-create` | ❌ | --- -## Test 11 +## Test 16 **Expected Tool:** `foundry_models_deployments_list` **Prompt:** List all AI Foundry model deployments @@ -649,15 +744,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.681385 | `foundry_models_deployments_list` | ✅ **EXPECTED** | +| 1 | 0.681081 | `foundry_models_deployments_list` | ✅ **EXPECTED** | | 2 | 0.674510 | `foundry_openai_models-list` | ❌ | -| 3 | 0.569059 | `foundry_agents_list` | ❌ | -| 4 | 0.566272 | `foundry_resource_get` | ❌ | -| 5 | 0.549636 | `foundry_models_list` | ❌ | +| 3 | 0.572625 | `foundry_threads_list` | ❌ | +| 4 | 0.568871 | `foundry_agents_list` | ❌ | +| 5 | 0.566272 | `foundry_resource_get` | ❌ | --- -## Test 12 +## Test 17 **Expected Tool:** `foundry_models_deployments_list` **Prompt:** Show me all AI Foundry model deployments @@ -666,15 +761,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.620173 | `foundry_models_deployments_list` | ✅ **EXPECTED** | -| 2 | 0.619231 | `foundry_openai_models-list` | ❌ | -| 3 | 0.543352 | `foundry_resource_get` | ❌ | -| 4 | 0.540551 | `foundry_agents_list` | ❌ | -| 5 | 0.521475 | `foundry_models_deploy` | ❌ | +| 1 | 0.619840 | `foundry_models_deployments_list` | ✅ **EXPECTED** | +| 2 | 0.619299 | `foundry_openai_models-list` | ❌ | +| 3 | 0.543385 | `foundry_resource_get` | ❌ | +| 4 | 0.540528 | `foundry_agents_list` | ❌ | +| 5 | 0.527141 | `foundry_threads_list` | ❌ | --- -## Test 13 +## Test 18 **Expected Tool:** `foundry_models_list` **Prompt:** List all AI Foundry models @@ -685,13 +780,13 @@ |------|-------|------|--------| | 1 | 0.603415 | `foundry_openai_models-list` | ❌ | | 2 | 0.560022 | `foundry_models_list` | ✅ **EXPECTED** | -| 3 | 0.537981 | `foundry_models_deployments_list` | ❌ | -| 4 | 0.519472 | `foundry_agents_list` | ❌ | -| 5 | 0.514253 | `foundry_resource_get` | ❌ | +| 3 | 0.553634 | `foundry_threads_list` | ❌ | +| 4 | 0.537958 | `foundry_models_deployments_list` | ❌ | +| 5 | 0.519191 | `foundry_agents_list` | ❌ | --- -## Test 14 +## Test 19 **Expected Tool:** `foundry_models_list` **Prompt:** Show me the available AI Foundry models @@ -703,12 +798,12 @@ | 1 | 0.576904 | `foundry_openai_models-list` | ❌ | | 2 | 0.574818 | `foundry_models_list` | ✅ **EXPECTED** | | 3 | 0.525312 | `foundry_resource_get` | ❌ | -| 4 | 0.517980 | `foundry_models_deployments_list` | ❌ | -| 5 | 0.504087 | `foundry_agents_list` | ❌ | +| 4 | 0.522153 | `foundry_agents_get-sdk-sample` | ❌ | +| 5 | 0.517825 | `foundry_models_deployments_list` | ❌ | --- -## Test 15 +## Test 20 **Expected Tool:** `foundry_openai_chat-completions-create` **Prompt:** Create a chat completion with the message "Hello, how are you today?" using my Azure AI Foundry resource @@ -719,13 +814,13 @@ |------|-------|------|--------| | 1 | 0.641293 | `foundry_openai_chat-completions-create` | ✅ **EXPECTED** | | 2 | 0.546736 | `foundry_openai_create-completion` | ❌ | -| 3 | 0.415483 | `foundry_agents_connect` | ❌ | -| 4 | 0.399383 | `foundry_openai_embeddings-create` | ❌ | -| 5 | 0.364105 | `foundry_models_deploy` | ❌ | +| 3 | 0.420018 | `foundry_threads_create` | ❌ | +| 4 | 0.415482 | `foundry_agents_connect` | ❌ | +| 5 | 0.399382 | `foundry_openai_embeddings-create` | ❌ | --- -## Test 16 +## Test 21 **Expected Tool:** `foundry_openai_create-completion` **Prompt:** Create a completion with the prompt "What is Azure?" using my Azure AI Foundry resource @@ -736,13 +831,13 @@ |------|-------|------|--------| | 1 | 0.696936 | `foundry_openai_create-completion` | ✅ **EXPECTED** | | 2 | 0.579108 | `foundry_openai_chat-completions-create` | ❌ | -| 3 | 0.463703 | `foundry_models_deploy` | ❌ | -| 4 | 0.459126 | `foundry_resource_get` | ❌ | -| 5 | 0.458622 | `foundry_openai_embeddings-create` | ❌ | +| 3 | 0.465558 | `azureaibestpractices_get` | ❌ | +| 4 | 0.463703 | `foundry_models_deploy` | ❌ | +| 5 | 0.459126 | `foundry_resource_get` | ❌ | --- -## Test 17 +## Test 22 **Expected Tool:** `foundry_openai_embeddings-create` **Prompt:** Generate embeddings for the text "Azure OpenAI Service" using my Azure AI Foundry resource @@ -751,15 +846,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.766338 | `foundry_openai_embeddings-create` | ✅ **EXPECTED** | -| 2 | 0.543338 | `foundry_models_deploy` | ❌ | +| 1 | 0.766496 | `foundry_openai_embeddings-create` | ✅ **EXPECTED** | +| 2 | 0.543339 | `foundry_models_deploy` | ❌ | | 3 | 0.542214 | `foundry_openai_create-completion` | ❌ | | 4 | 0.520746 | `foundry_openai_models-list` | ❌ | | 5 | 0.519335 | `foundry_resource_get` | ❌ | --- -## Test 18 +## Test 23 **Expected Tool:** `foundry_openai_embeddings-create` **Prompt:** Create vector embeddings for my text using my Azure AI Foundry resource @@ -768,15 +863,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.724120 | `foundry_openai_embeddings-create` | ✅ **EXPECTED** | -| 2 | 0.494485 | `foundry_resource_get` | ❌ | -| 3 | 0.480296 | `foundry_models_deploy` | ❌ | -| 4 | 0.480218 | `foundry_openai_create-completion` | ❌ | -| 5 | 0.463797 | `foundry_openai_chat-completions-create` | ❌ | +| 1 | 0.724369 | `foundry_openai_embeddings-create` | ✅ **EXPECTED** | +| 2 | 0.494544 | `foundry_resource_get` | ❌ | +| 3 | 0.480389 | `foundry_models_deploy` | ❌ | +| 4 | 0.480294 | `foundry_openai_create-completion` | ❌ | +| 5 | 0.463885 | `foundry_openai_chat-completions-create` | ❌ | --- -## Test 19 +## Test 24 **Expected Tool:** `foundry_openai_models-list` **Prompt:** List all available OpenAI models in my Azure AI Foundry resource @@ -787,13 +882,13 @@ |------|-------|------|--------| | 1 | 0.799059 | `foundry_openai_models-list` | ✅ **EXPECTED** | | 2 | 0.668887 | `foundry_resource_get` | ❌ | -| 3 | 0.667040 | `foundry_models_list` | ❌ | -| 4 | 0.666207 | `foundry_models_deployments_list` | ❌ | -| 5 | 0.657546 | `foundry_agents_list` | ❌ | +| 3 | 0.667041 | `foundry_models_list` | ❌ | +| 4 | 0.666560 | `foundry_models_deployments_list` | ❌ | +| 5 | 0.657393 | `foundry_agents_list` | ❌ | --- -## Test 20 +## Test 25 **Expected Tool:** `foundry_openai_models-list` **Prompt:** Show me the OpenAI model deployments in my Azure AI Foundry resource @@ -803,14 +898,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.741659 | `foundry_openai_models-list` | ✅ **EXPECTED** | -| 2 | 0.660160 | `foundry_models_deployments_list` | ❌ | -| 3 | 0.648219 | `foundry_resource_get` | ❌ | +| 2 | 0.660115 | `foundry_models_deployments_list` | ❌ | +| 3 | 0.648218 | `foundry_resource_get` | ❌ | | 4 | 0.640650 | `foundry_models_deploy` | ❌ | -| 5 | 0.619878 | `foundry_agents_list` | ❌ | +| 5 | 0.619790 | `foundry_agents_list` | ❌ | --- -## Test 21 +## Test 26 **Expected Tool:** `foundry_resource_get` **Prompt:** List all AI Foundry resources in my subscription @@ -821,13 +916,13 @@ |------|-------|------|--------| | 1 | 0.594096 | `foundry_resource_get` | ✅ **EXPECTED** | | 2 | 0.571916 | `foundry_openai_models-list` | ❌ | -| 3 | 0.567019 | `foundry_agents_list` | ❌ | -| 4 | 0.558290 | `search_service_list` | ❌ | -| 5 | 0.524645 | `grafana_list` | ❌ | +| 3 | 0.566762 | `foundry_agents_list` | ❌ | +| 4 | 0.558075 | `foundry_threads_list` | ❌ | +| 5 | 0.556154 | `search_service_list` | ❌ | --- -## Test 22 +## Test 27 **Expected Tool:** `foundry_resource_get` **Prompt:** Show me the AI Foundry resources in resource group @@ -838,13 +933,13 @@ |------|-------|------|--------| | 1 | 0.665311 | `foundry_resource_get` | ✅ **EXPECTED** | | 2 | 0.585305 | `foundry_openai_models-list` | ❌ | -| 3 | 0.553993 | `foundry_agents_list` | ❌ | -| 4 | 0.518767 | `foundry_openai_embeddings-create` | ❌ | +| 3 | 0.553808 | `foundry_agents_list` | ❌ | +| 4 | 0.518747 | `foundry_openai_embeddings-create` | ❌ | | 5 | 0.492911 | `foundry_models_deploy` | ❌ | --- -## Test 23 +## Test 28 **Expected Tool:** `foundry_resource_get` **Prompt:** Get details for AI Foundry resource in resource group @@ -855,13 +950,13 @@ |------|-------|------|--------| | 1 | 0.735316 | `foundry_resource_get` | ✅ **EXPECTED** | | 2 | 0.571906 | `foundry_openai_models-list` | ❌ | -| 3 | 0.510197 | `monitor_webtests_get` | ❌ | -| 4 | 0.497090 | `foundry_openai_embeddings-create` | ❌ | -| 5 | 0.475722 | `foundry_agents_list` | ❌ | +| 3 | 0.509484 | `monitor_webtests_get` | ❌ | +| 4 | 0.496980 | `foundry_openai_embeddings-create` | ❌ | +| 5 | 0.475498 | `foundry_agents_list` | ❌ | --- -## Test 24 +## Test 29 **Expected Tool:** `search_knowledge_base_get` **Prompt:** List all knowledge bases in the Azure AI Search service @@ -870,15 +965,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.785556 | `search_knowledge_base_get` | ✅ **EXPECTED** | -| 2 | 0.700785 | `search_knowledge_source_get` | ❌ | -| 3 | 0.693600 | `search_service_list` | ❌ | -| 4 | 0.635477 | `search_knowledge_base_retrieve` | ❌ | -| 5 | 0.586578 | `search_index_get` | ❌ | +| 1 | 0.785967 | `search_knowledge_base_get` | ✅ **EXPECTED** | +| 2 | 0.700824 | `search_knowledge_source_get` | ❌ | +| 3 | 0.692681 | `search_service_list` | ❌ | +| 4 | 0.635863 | `search_knowledge_base_retrieve` | ❌ | +| 5 | 0.586575 | `search_index_get` | ❌ | --- -## Test 25 +## Test 30 **Expected Tool:** `search_knowledge_base_get` **Prompt:** Show me the knowledge bases in the Azure AI Search service @@ -890,12 +985,12 @@ | 1 | 0.748213 | `search_knowledge_base_get` | ✅ **EXPECTED** | | 2 | 0.668487 | `search_knowledge_source_get` | ❌ | | 3 | 0.628582 | `search_knowledge_base_retrieve` | ❌ | -| 4 | 0.624479 | `search_service_list` | ❌ | +| 4 | 0.623715 | `search_service_list` | ❌ | | 5 | 0.566618 | `search_index_get` | ❌ | --- -## Test 26 +## Test 31 **Expected Tool:** `search_knowledge_base_get` **Prompt:** List all knowledge bases in the search service @@ -907,12 +1002,12 @@ | 1 | 0.702942 | `search_knowledge_base_get` | ✅ **EXPECTED** | | 2 | 0.605964 | `search_knowledge_source_get` | ❌ | | 3 | 0.583234 | `search_knowledge_base_retrieve` | ❌ | -| 4 | 0.513638 | `search_service_list` | ❌ | -| 5 | 0.476816 | `foundry_knowledge_index_list` | ❌ | +| 4 | 0.512825 | `search_service_list` | ❌ | +| 5 | 0.476815 | `foundry_knowledge_index_list` | ❌ | --- -## Test 27 +## Test 32 **Expected Tool:** `search_knowledge_base_get` **Prompt:** Show me the knowledge bases in the search service @@ -921,15 +1016,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.688051 | `search_knowledge_base_get` | ✅ **EXPECTED** | -| 2 | 0.599247 | `search_knowledge_source_get` | ❌ | -| 3 | 0.578499 | `search_knowledge_base_retrieve` | ❌ | -| 4 | 0.457619 | `search_service_list` | ❌ | -| 5 | 0.439529 | `foundry_knowledge_index_list` | ❌ | +| 1 | 0.688155 | `search_knowledge_base_get` | ✅ **EXPECTED** | +| 2 | 0.599348 | `search_knowledge_source_get` | ❌ | +| 3 | 0.578437 | `search_knowledge_base_retrieve` | ❌ | +| 4 | 0.456512 | `search_service_list` | ❌ | +| 5 | 0.439493 | `foundry_knowledge_index_list` | ❌ | --- -## Test 28 +## Test 33 **Expected Tool:** `search_knowledge_base_get` **Prompt:** Get the details of knowledge base in the Azure AI Search service @@ -938,15 +1033,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.769443 | `search_knowledge_base_get` | ✅ **EXPECTED** | -| 2 | 0.685642 | `search_knowledge_source_get` | ❌ | -| 3 | 0.636767 | `search_knowledge_base_retrieve` | ❌ | -| 4 | 0.586085 | `search_index_get` | ❌ | -| 5 | 0.533859 | `search_service_list` | ❌ | +| 1 | 0.769383 | `search_knowledge_base_get` | ✅ **EXPECTED** | +| 2 | 0.685640 | `search_knowledge_source_get` | ❌ | +| 3 | 0.636958 | `search_knowledge_base_retrieve` | ❌ | +| 4 | 0.585949 | `search_index_get` | ❌ | +| 5 | 0.533298 | `search_service_list` | ❌ | --- -## Test 29 +## Test 34 **Expected Tool:** `search_knowledge_base_get` **Prompt:** Show me the knowledge base in search service @@ -958,12 +1053,12 @@ | 1 | 0.595585 | `search_knowledge_base_get` | ✅ **EXPECTED** | | 2 | 0.551922 | `search_knowledge_base_retrieve` | ❌ | | 3 | 0.515480 | `search_knowledge_source_get` | ❌ | -| 4 | 0.366893 | `search_service_list` | ❌ | +| 4 | 0.366170 | `search_service_list` | ❌ | | 5 | 0.365633 | `search_index_get` | ❌ | --- -## Test 30 +## Test 35 **Expected Tool:** `search_knowledge_base_retrieve` **Prompt:** Run a retrieval with knowledge base in Azure AI Search service for the query @@ -972,15 +1067,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.724846 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | -| 2 | 0.650590 | `search_knowledge_base_get` | ❌ | -| 3 | 0.575307 | `search_index_query` | ❌ | -| 4 | 0.567361 | `search_knowledge_source_get` | ❌ | -| 5 | 0.520360 | `foundry_agents_connect` | ❌ | +| 1 | 0.724869 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | +| 2 | 0.650606 | `search_knowledge_base_get` | ❌ | +| 3 | 0.575356 | `search_index_query` | ❌ | +| 4 | 0.567386 | `search_knowledge_source_get` | ❌ | +| 5 | 0.520336 | `foundry_agents_connect` | ❌ | --- -## Test 31 +## Test 36 **Expected Tool:** `search_knowledge_base_retrieve` **Prompt:** Ask knowledge base in search service to retrieve information about @@ -989,15 +1084,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.633766 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | -| 2 | 0.589869 | `search_knowledge_base_get` | ❌ | -| 3 | 0.502085 | `search_knowledge_source_get` | ❌ | -| 4 | 0.422671 | `foundry_agents_query-and-evaluate` | ❌ | -| 5 | 0.399595 | `search_index_query` | ❌ | +| 1 | 0.633877 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | +| 2 | 0.589927 | `search_knowledge_base_get` | ❌ | +| 3 | 0.502173 | `search_knowledge_source_get` | ❌ | +| 4 | 0.422676 | `foundry_agents_query-and-evaluate` | ❌ | +| 5 | 0.399110 | `search_index_query` | ❌ | --- -## Test 32 +## Test 37 **Expected Tool:** `search_knowledge_base_retrieve` **Prompt:** Run a retrieval with knowledge base in search service for the query @@ -1006,15 +1101,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.657844 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | -| 2 | 0.557115 | `search_knowledge_base_get` | ❌ | -| 3 | 0.463461 | `search_knowledge_source_get` | ❌ | -| 4 | 0.436952 | `foundry_agents_query-and-evaluate` | ❌ | -| 5 | 0.422469 | `foundry_agents_connect` | ❌ | +| 1 | 0.657866 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | +| 2 | 0.557206 | `search_knowledge_base_get` | ❌ | +| 3 | 0.463605 | `search_knowledge_source_get` | ❌ | +| 4 | 0.436719 | `foundry_agents_query-and-evaluate` | ❌ | +| 5 | 0.422173 | `foundry_agents_connect` | ❌ | --- -## Test 33 +## Test 38 **Expected Tool:** `search_knowledge_base_retrieve` **Prompt:** Ask knowledge base in search service to retrieve information about @@ -1026,12 +1121,12 @@ | 1 | 0.633766 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | | 2 | 0.589869 | `search_knowledge_base_get` | ❌ | | 3 | 0.502085 | `search_knowledge_source_get` | ❌ | -| 4 | 0.422671 | `foundry_agents_query-and-evaluate` | ❌ | -| 5 | 0.399595 | `search_index_query` | ❌ | +| 4 | 0.422610 | `foundry_agents_query-and-evaluate` | ❌ | +| 5 | 0.399521 | `search_index_query` | ❌ | --- -## Test 34 +## Test 39 **Expected Tool:** `search_knowledge_base_retrieve` **Prompt:** Query knowledge base in search service about @@ -1042,13 +1137,13 @@ |------|-------|------|--------| | 1 | 0.598868 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | | 2 | 0.547862 | `search_knowledge_base_get` | ❌ | -| 3 | 0.467907 | `foundry_agents_query-and-evaluate` | ❌ | +| 3 | 0.467868 | `foundry_agents_query-and-evaluate` | ❌ | | 4 | 0.464904 | `search_knowledge_source_get` | ❌ | | 5 | 0.412481 | `foundry_agents_connect` | ❌ | --- -## Test 35 +## Test 40 **Expected Tool:** `search_knowledge_base_retrieve` **Prompt:** Search knowledge base in Azure AI Search service for @@ -1057,15 +1152,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.649751 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | -| 2 | 0.631420 | `search_knowledge_base_get` | ❌ | -| 3 | 0.581412 | `search_index_query` | ❌ | -| 4 | 0.571126 | `search_knowledge_source_get` | ❌ | -| 5 | 0.544488 | `search_service_list` | ❌ | +| 1 | 0.649767 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | +| 2 | 0.631435 | `search_knowledge_base_get` | ❌ | +| 3 | 0.581359 | `search_index_query` | ❌ | +| 4 | 0.571156 | `search_knowledge_source_get` | ❌ | +| 5 | 0.544545 | `search_service_list` | ❌ | --- -## Test 36 +## Test 41 **Expected Tool:** `search_knowledge_base_retrieve` **Prompt:** What does knowledge base in search service know about @@ -1076,13 +1171,13 @@ |------|-------|------|--------| | 1 | 0.579716 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | | 2 | 0.560688 | `search_knowledge_base_get` | ❌ | -| 3 | 0.477942 | `search_knowledge_source_get` | ❌ | -| 4 | 0.402582 | `foundry_agents_query-and-evaluate` | ❌ | +| 3 | 0.477941 | `search_knowledge_source_get` | ❌ | +| 4 | 0.402530 | `foundry_agents_query-and-evaluate` | ❌ | | 5 | 0.361231 | `foundry_knowledge_index_list` | ❌ | --- -## Test 37 +## Test 42 **Expected Tool:** `search_knowledge_base_retrieve` **Prompt:** Find information about using knowledge base in search service @@ -1091,15 +1186,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.582660 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | -| 2 | 0.528583 | `search_knowledge_base_get` | ❌ | -| 3 | 0.449290 | `search_knowledge_source_get` | ❌ | -| 4 | 0.447915 | `foundry_agents_query-and-evaluate` | ❌ | -| 5 | 0.397238 | `foundry_agents_connect` | ❌ | +| 1 | 0.582662 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | +| 2 | 0.528610 | `search_knowledge_base_get` | ❌ | +| 3 | 0.449336 | `search_knowledge_source_get` | ❌ | +| 4 | 0.447690 | `foundry_agents_query-and-evaluate` | ❌ | +| 5 | 0.397187 | `foundry_agents_connect` | ❌ | --- -## Test 38 +## Test 43 **Expected Tool:** `search_knowledge_source_get` **Prompt:** List all knowledge sources in the Azure AI Search service @@ -1108,15 +1203,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.760416 | `search_knowledge_source_get` | ✅ **EXPECTED** | -| 2 | 0.691931 | `search_service_list` | ❌ | -| 3 | 0.665923 | `search_knowledge_base_get` | ❌ | -| 4 | 0.573012 | `search_index_get` | ❌ | -| 5 | 0.560779 | `search_knowledge_base_retrieve` | ❌ | +| 1 | 0.760406 | `search_knowledge_source_get` | ✅ **EXPECTED** | +| 2 | 0.690845 | `search_service_list` | ❌ | +| 3 | 0.665905 | `search_knowledge_base_get` | ❌ | +| 4 | 0.573014 | `search_index_get` | ❌ | +| 5 | 0.560755 | `search_knowledge_base_retrieve` | ❌ | --- -## Test 39 +## Test 44 **Expected Tool:** `search_knowledge_source_get` **Prompt:** Show me the knowledge sources in the Azure AI Search service @@ -1126,14 +1221,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.737860 | `search_knowledge_source_get` | ✅ **EXPECTED** | -| 2 | 0.660170 | `search_service_list` | ❌ | +| 2 | 0.659236 | `search_service_list` | ❌ | | 3 | 0.652969 | `search_knowledge_base_get` | ❌ | -| 4 | 0.578835 | `search_index_get` | ❌ | -| 5 | 0.560564 | `search_index_query` | ❌ | +| 4 | 0.578836 | `search_index_get` | ❌ | +| 5 | 0.560519 | `search_index_query` | ❌ | --- -## Test 40 +## Test 45 **Expected Tool:** `search_knowledge_source_get` **Prompt:** List all knowledge sources in the search service @@ -1142,15 +1237,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.657935 | `search_knowledge_source_get` | ✅ **EXPECTED** | +| 1 | 0.657936 | `search_knowledge_source_get` | ✅ **EXPECTED** | | 2 | 0.558516 | `search_knowledge_base_get` | ❌ | -| 3 | 0.511469 | `search_service_list` | ❌ | +| 3 | 0.510338 | `search_service_list` | ❌ | | 4 | 0.470560 | `search_knowledge_base_retrieve` | ❌ | | 5 | 0.433657 | `foundry_knowledge_index_list` | ❌ | --- -## Test 41 +## Test 46 **Expected Tool:** `search_knowledge_source_get` **Prompt:** Show me the knowledge sources in the search service @@ -1161,13 +1256,13 @@ |------|-------|------|--------| | 1 | 0.652945 | `search_knowledge_source_get` | ✅ **EXPECTED** | | 2 | 0.563270 | `search_knowledge_base_get` | ❌ | -| 3 | 0.487022 | `search_service_list` | ❌ | +| 3 | 0.485934 | `search_service_list` | ❌ | | 4 | 0.477636 | `search_knowledge_base_retrieve` | ❌ | | 5 | 0.430518 | `search_index_get` | ❌ | --- -## Test 42 +## Test 47 **Expected Tool:** `search_knowledge_source_get` **Prompt:** Get the details of knowledge source in the Azure AI Search service @@ -1177,14 +1272,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.825604 | `search_knowledge_source_get` | ✅ **EXPECTED** | -| 2 | 0.693437 | `search_knowledge_base_get` | ❌ | +| 2 | 0.693438 | `search_knowledge_base_get` | ❌ | | 3 | 0.595643 | `search_index_get` | ❌ | | 4 | 0.540550 | `search_knowledge_base_retrieve` | ❌ | -| 5 | 0.531247 | `search_service_list` | ❌ | +| 5 | 0.531085 | `search_service_list` | ❌ | --- -## Test 43 +## Test 48 **Expected Tool:** `search_knowledge_source_get` **Prompt:** Show me the knowledge source in search service @@ -1197,11 +1292,11 @@ | 2 | 0.523643 | `search_knowledge_base_get` | ❌ | | 3 | 0.459923 | `search_knowledge_base_retrieve` | ❌ | | 4 | 0.371465 | `search_index_get` | ❌ | -| 5 | 0.370838 | `search_service_list` | ❌ | +| 5 | 0.370585 | `search_service_list` | ❌ | --- -## Test 44 +## Test 49 **Expected Tool:** `search_index_get` **Prompt:** Show me the details of the index in Cognitive Search service @@ -1214,11 +1309,11 @@ | 2 | 0.544557 | `foundry_knowledge_index_schema` | ❌ | | 3 | 0.528153 | `search_knowledge_base_get` | ❌ | | 4 | 0.521765 | `search_knowledge_source_get` | ❌ | -| 5 | 0.490624 | `search_service_list` | ❌ | +| 5 | 0.490553 | `search_service_list` | ❌ | --- -## Test 45 +## Test 50 **Expected Tool:** `search_index_get` **Prompt:** List all indexes in the Cognitive Search service @@ -1228,14 +1323,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.640256 | `search_index_get` | ✅ **EXPECTED** | -| 2 | 0.620140 | `search_service_list` | ❌ | +| 2 | 0.619949 | `search_service_list` | ❌ | | 3 | 0.538885 | `foundry_knowledge_index_list` | ❌ | | 4 | 0.511485 | `search_knowledge_base_get` | ❌ | | 5 | 0.496094 | `search_knowledge_source_get` | ❌ | --- -## Test 46 +## Test 51 **Expected Tool:** `search_index_get` **Prompt:** Show me the indexes in the Cognitive Search service @@ -1245,14 +1340,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.620759 | `search_index_get` | ✅ **EXPECTED** | -| 2 | 0.562775 | `search_service_list` | ❌ | +| 2 | 0.562503 | `search_service_list` | ❌ | | 3 | 0.538471 | `foundry_knowledge_index_list` | ❌ | | 4 | 0.500365 | `search_knowledge_base_get` | ❌ | | 5 | 0.490025 | `search_knowledge_source_get` | ❌ | --- -## Test 47 +## Test 52 **Expected Tool:** `search_index_query` **Prompt:** Search for instances of in the index in Cognitive Search service @@ -1261,15 +1356,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.522953 | `search_index_get` | ❌ | -| 2 | 0.515871 | `search_index_query` | ✅ **EXPECTED** | -| 3 | 0.497392 | `search_service_list` | ❌ | -| 4 | 0.447993 | `search_knowledge_base_retrieve` | ❌ | -| 5 | 0.437640 | `postgres_database_query` | ❌ | +| 1 | 0.522598 | `search_index_get` | ❌ | +| 2 | 0.515911 | `search_index_query` | ✅ **EXPECTED** | +| 3 | 0.498264 | `search_service_list` | ❌ | +| 4 | 0.447868 | `search_knowledge_base_retrieve` | ❌ | +| 5 | 0.437608 | `postgres_database_query` | ❌ | --- -## Test 48 +## Test 53 **Expected Tool:** `search_service_list` **Prompt:** List all Cognitive Search services in my subscription @@ -1278,15 +1373,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.793651 | `search_service_list` | ✅ **EXPECTED** | -| 2 | 0.553011 | `kusto_cluster_list` | ❌ | +| 1 | 0.791803 | `search_service_list` | ✅ **EXPECTED** | +| 2 | 0.553012 | `kusto_cluster_list` | ❌ | | 3 | 0.509479 | `subscription_list` | ❌ | | 4 | 0.505971 | `search_index_get` | ❌ | | 5 | 0.504693 | `marketplace_product_list` | ❌ | --- -## Test 49 +## Test 54 **Expected Tool:** `search_service_list` **Prompt:** Show me the Cognitive Search services in my subscription @@ -1295,15 +1390,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.686140 | `search_service_list` | ✅ **EXPECTED** | +| 1 | 0.684837 | `search_service_list` | ✅ **EXPECTED** | | 2 | 0.484092 | `marketplace_product_list` | ❌ | | 3 | 0.479898 | `search_index_get` | ❌ | -| 4 | 0.462336 | `search_knowledge_base_get` | ❌ | +| 4 | 0.462337 | `search_knowledge_base_get` | ❌ | | 5 | 0.461786 | `kusto_cluster_list` | ❌ | --- -## Test 50 +## Test 55 **Expected Tool:** `search_service_list` **Prompt:** Show me my Cognitive Search services @@ -1312,15 +1407,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.553025 | `search_service_list` | ✅ **EXPECTED** | +| 1 | 0.551241 | `search_service_list` | ✅ **EXPECTED** | | 2 | 0.436230 | `search_index_get` | ❌ | | 3 | 0.415277 | `search_knowledge_base_get` | ❌ | | 4 | 0.410461 | `search_knowledge_source_get` | ❌ | -| 5 | 0.404758 | `search_index_query` | ❌ | +| 5 | 0.404707 | `search_index_query` | ❌ | --- -## Test 51 +## Test 56 **Expected Tool:** `speech_stt_recognize` **Prompt:** Convert this audio file to text using Azure Speech Services @@ -1330,14 +1425,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.666038 | `speech_stt_recognize` | ✅ **EXPECTED** | -| 2 | 0.377022 | `foundry_openai_embeddings-create` | ❌ | +| 2 | 0.377210 | `foundry_openai_embeddings-create` | ❌ | | 3 | 0.351127 | `deploy_plan_get` | ❌ | -| 4 | 0.338047 | `extension_cli_generate` | ❌ | -| 5 | 0.337685 | `deploy_pipeline_guidance_get` | ❌ | +| 4 | 0.338137 | `extension_cli_generate` | ❌ | +| 5 | 0.337763 | `deploy_pipeline_guidance_get` | ❌ | --- -## Test 52 +## Test 57 **Expected Tool:** `speech_stt_recognize` **Prompt:** Recognize speech from my audio file with language detection @@ -1347,14 +1442,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.511324 | `speech_stt_recognize` | ✅ **EXPECTED** | -| 2 | 0.192450 | `foundry_openai_embeddings-create` | ❌ | -| 3 | 0.170157 | `foundry_openai_create-completion` | ❌ | -| 4 | 0.167159 | `foundry_openai_chat-completions-create` | ❌ | -| 5 | 0.159108 | `foundry_agents_connect` | ❌ | +| 2 | 0.198123 | `foundry_agents_get-sdk-sample` | ❌ | +| 3 | 0.192462 | `foundry_openai_embeddings-create` | ❌ | +| 4 | 0.170157 | `foundry_openai_create-completion` | ❌ | +| 5 | 0.167159 | `foundry_openai_chat-completions-create` | ❌ | --- -## Test 53 +## Test 58 **Expected Tool:** `speech_stt_recognize` **Prompt:** Transcribe speech from audio file with profanity filtering @@ -1364,14 +1459,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.486489 | `speech_stt_recognize` | ✅ **EXPECTED** | -| 2 | 0.160209 | `foundry_agents_connect` | ❌ | -| 3 | 0.156850 | `deploy_pipeline_guidance_get` | ❌ | -| 4 | 0.154737 | `foundry_openai_create-completion` | ❌ | -| 5 | 0.154098 | `foundry_openai_embeddings-create` | ❌ | +| 2 | 0.162863 | `foundry_threads_create` | ❌ | +| 3 | 0.160209 | `foundry_agents_connect` | ❌ | +| 4 | 0.156936 | `deploy_pipeline_guidance_get` | ❌ | +| 5 | 0.154737 | `foundry_openai_create-completion` | ❌ | --- -## Test 54 +## Test 59 **Expected Tool:** `speech_stt_recognize` **Prompt:** Convert speech to text from audio file using endpoint @@ -1380,15 +1475,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.611992 | `speech_stt_recognize` | ✅ **EXPECTED** | -| 2 | 0.309895 | `foundry_openai_embeddings-create` | ❌ | -| 3 | 0.244218 | `foundry_resource_get` | ❌ | -| 4 | 0.243626 | `foundry_openai_create-completion` | ❌ | -| 5 | 0.242771 | `foundry_openai_chat-completions-create` | ❌ | +| 1 | 0.612032 | `speech_stt_recognize` | ✅ **EXPECTED** | +| 2 | 0.309860 | `foundry_openai_embeddings-create` | ❌ | +| 3 | 0.244223 | `foundry_resource_get` | ❌ | +| 4 | 0.243658 | `foundry_openai_create-completion` | ❌ | +| 5 | 0.242816 | `foundry_openai_chat-completions-create` | ❌ | --- -## Test 55 +## Test 60 **Expected Tool:** `speech_stt_recognize` **Prompt:** Transcribe the audio file in Spanish language @@ -1398,14 +1493,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.410533 | `speech_stt_recognize` | ✅ **EXPECTED** | -| 2 | 0.152391 | `foundry_openai_embeddings-create` | ❌ | +| 2 | 0.152414 | `foundry_openai_embeddings-create` | ❌ | | 3 | 0.152137 | `foundry_models_deploy` | ❌ | -| 4 | 0.151632 | `deploy_pipeline_guidance_get` | ❌ | +| 4 | 0.151799 | `deploy_pipeline_guidance_get` | ❌ | | 5 | 0.140373 | `deploy_plan_get` | ❌ | --- -## Test 56 +## Test 61 **Expected Tool:** `speech_stt_recognize` **Prompt:** Convert speech to text with detailed output format from audio file @@ -1416,13 +1511,13 @@ |------|-------|------|--------| | 1 | 0.546259 | `speech_stt_recognize` | ✅ **EXPECTED** | | 2 | 0.218092 | `foundry_resource_get` | ❌ | -| 3 | 0.202935 | `foundry_openai_embeddings-create` | ❌ | -| 4 | 0.183927 | `extension_azqr` | ❌ | +| 3 | 0.202860 | `foundry_openai_embeddings-create` | ❌ | +| 4 | 0.183420 | `extension_azqr` | ❌ | | 5 | 0.181020 | `search_index_get` | ❌ | --- -## Test 57 +## Test 62 **Expected Tool:** `speech_stt_recognize` **Prompt:** Recognize speech from with phrase hints for better accuracy @@ -1431,15 +1526,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.540249 | `speech_stt_recognize` | ✅ **EXPECTED** | -| 2 | 0.227953 | `foundry_openai_create-completion` | ❌ | -| 3 | 0.203215 | `foundry_agents_connect` | ❌ | -| 4 | 0.199441 | `foundry_openai_embeddings-create` | ❌ | -| 5 | 0.197199 | `foundry_openai_chat-completions-create` | ❌ | +| 1 | 0.539963 | `speech_stt_recognize` | ✅ **EXPECTED** | +| 2 | 0.228587 | `foundry_openai_create-completion` | ❌ | +| 3 | 0.203413 | `foundry_agents_connect` | ❌ | +| 4 | 0.199517 | `foundry_openai_embeddings-create` | ❌ | +| 5 | 0.197301 | `foundry_openai_chat-completions-create` | ❌ | --- -## Test 58 +## Test 63 **Expected Tool:** `speech_stt_recognize` **Prompt:** Transcribe audio using multiple phrase hints: "Azure", "cognitive services", "machine learning" @@ -1448,15 +1543,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.548967 | `speech_stt_recognize` | ✅ **EXPECTED** | -| 2 | 0.342494 | `extension_cli_generate` | ❌ | -| 3 | 0.337434 | `cloudarchitect_design` | ❌ | -| 4 | 0.335792 | `foundry_openai_create-completion` | ❌ | -| 5 | 0.333130 | `get_bestpractices_get` | ❌ | +| 1 | 0.549151 | `speech_stt_recognize` | ✅ **EXPECTED** | +| 2 | 0.393626 | `azureaibestpractices_get` | ❌ | +| 3 | 0.342537 | `extension_cli_generate` | ❌ | +| 4 | 0.337387 | `cloudarchitect_design` | ❌ | +| 5 | 0.335741 | `foundry_openai_create-completion` | ❌ | --- -## Test 59 +## Test 64 **Expected Tool:** `speech_stt_recognize` **Prompt:** Convert speech to text with comma-separated phrase hints: "Azure, cognitive services, API" @@ -1467,13 +1562,13 @@ |------|-------|------|--------| | 1 | 0.532536 | `speech_stt_recognize` | ✅ **EXPECTED** | | 2 | 0.349892 | `foundry_openai_create-completion` | ❌ | -| 3 | 0.340893 | `foundry_openai_chat-completions-create` | ❌ | -| 4 | 0.332669 | `foundry_openai_embeddings-create` | ❌ | -| 5 | 0.326712 | `get_bestpractices_get` | ❌ | +| 3 | 0.348381 | `azureaibestpractices_get` | ❌ | +| 4 | 0.340893 | `foundry_openai_chat-completions-create` | ❌ | +| 5 | 0.332862 | `foundry_openai_embeddings-create` | ❌ | --- -## Test 60 +## Test 65 **Expected Tool:** `speech_stt_recognize` **Prompt:** Transcribe audio with raw profanity output from file @@ -1483,14 +1578,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.453396 | `speech_stt_recognize` | ✅ **EXPECTED** | -| 2 | 0.173205 | `deploy_pipeline_guidance_get` | ❌ | -| 3 | 0.164990 | `foundry_openai_embeddings-create` | ❌ | -| 4 | 0.160523 | `extension_azqr` | ❌ | -| 5 | 0.160483 | `foundry_agents_connect` | ❌ | +| 2 | 0.173280 | `deploy_pipeline_guidance_get` | ❌ | +| 3 | 0.164929 | `foundry_openai_embeddings-create` | ❌ | +| 4 | 0.160483 | `foundry_agents_connect` | ❌ | +| 5 | 0.160185 | `extension_azqr` | ❌ | --- -## Test 61 +## Test 66 **Expected Tool:** `appconfig_account_list` **Prompt:** List all App Configuration stores in my subscription @@ -1499,15 +1594,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.786360 | `appconfig_account_list` | ✅ **EXPECTED** | +| 1 | 0.786298 | `appconfig_account_list` | ✅ **EXPECTED** | | 2 | 0.530613 | `appconfig_kv_get` | ❌ | -| 3 | 0.491358 | `postgres_server_list` | ❌ | +| 3 | 0.491380 | `postgres_server_list` | ❌ | | 4 | 0.481223 | `kusto_cluster_list` | ❌ | | 5 | 0.479997 | `subscription_list` | ❌ | --- -## Test 62 +## Test 67 **Expected Tool:** `appconfig_account_list` **Prompt:** Show me the App Configuration stores in my subscription @@ -1516,15 +1611,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.634978 | `appconfig_account_list` | ✅ **EXPECTED** | -| 2 | 0.464865 | `appconfig_kv_get` | ❌ | -| 3 | 0.398520 | `subscription_list` | ❌ | -| 4 | 0.391291 | `redis_list` | ❌ | -| 5 | 0.372428 | `postgres_server_list` | ❌ | +| 1 | 0.635056 | `appconfig_account_list` | ✅ **EXPECTED** | +| 2 | 0.464826 | `appconfig_kv_get` | ❌ | +| 3 | 0.398562 | `subscription_list` | ❌ | +| 4 | 0.391398 | `redis_list` | ❌ | +| 5 | 0.372579 | `postgres_server_list` | ❌ | --- -## Test 63 +## Test 68 **Expected Tool:** `appconfig_account_list` **Prompt:** Show me my App Configuration stores @@ -1533,7 +1628,7 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.565435 | `appconfig_account_list` | ✅ **EXPECTED** | +| 1 | 0.565365 | `appconfig_account_list` | ✅ **EXPECTED** | | 2 | 0.465344 | `appconfig_kv_get` | ❌ | | 3 | 0.355916 | `postgres_server_config_get` | ❌ | | 4 | 0.348661 | `appconfig_kv_delete` | ❌ | @@ -1541,7 +1636,7 @@ --- -## Test 64 +## Test 69 **Expected Tool:** `appconfig_kv_delete` **Prompt:** Delete the key in App Configuration store @@ -1550,15 +1645,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.618277 | `appconfig_kv_delete` | ✅ **EXPECTED** | +| 1 | 0.618276 | `appconfig_kv_delete` | ✅ **EXPECTED** | | 2 | 0.464358 | `appconfig_kv_get` | ❌ | | 3 | 0.424344 | `appconfig_kv_set` | ❌ | | 4 | 0.422700 | `appconfig_kv_lock_set` | ❌ | -| 5 | 0.392016 | `appconfig_account_list` | ❌ | +| 5 | 0.392260 | `appconfig_account_list` | ❌ | --- -## Test 65 +## Test 70 **Expected Tool:** `appconfig_kv_get` **Prompt:** List all key-value settings in App Configuration store @@ -1567,15 +1662,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.632687 | `appconfig_kv_get` | ✅ **EXPECTED** | -| 2 | 0.557810 | `appconfig_account_list` | ❌ | -| 3 | 0.530884 | `appconfig_kv_set` | ❌ | -| 4 | 0.464635 | `appconfig_kv_delete` | ❌ | -| 5 | 0.439089 | `appconfig_kv_lock_set` | ❌ | +| 1 | 0.632652 | `appconfig_kv_get` | ✅ **EXPECTED** | +| 2 | 0.558116 | `appconfig_account_list` | ❌ | +| 3 | 0.531033 | `appconfig_kv_set` | ❌ | +| 4 | 0.464568 | `appconfig_kv_delete` | ❌ | +| 5 | 0.438999 | `appconfig_kv_lock_set` | ❌ | --- -## Test 66 +## Test 71 **Expected Tool:** `appconfig_kv_get` **Prompt:** Show me the key-value settings in App Configuration store @@ -1585,14 +1680,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.612555 | `appconfig_kv_get` | ✅ **EXPECTED** | -| 2 | 0.522426 | `appconfig_account_list` | ❌ | +| 2 | 0.522671 | `appconfig_account_list` | ❌ | | 3 | 0.512945 | `appconfig_kv_set` | ❌ | | 4 | 0.468503 | `appconfig_kv_delete` | ❌ | | 5 | 0.457866 | `appconfig_kv_lock_set` | ❌ | --- -## Test 67 +## Test 72 **Expected Tool:** `appconfig_kv_get` **Prompt:** List all key-value settings with key name starting with 'prod-' in App Configuration store @@ -1601,15 +1696,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.512880 | `appconfig_kv_get` | ✅ **EXPECTED** | -| 2 | 0.449934 | `appconfig_account_list` | ❌ | -| 3 | 0.398698 | `appconfig_kv_set` | ❌ | -| 4 | 0.380636 | `appconfig_kv_delete` | ❌ | -| 5 | 0.346156 | `appconfig_kv_lock_set` | ❌ | +| 1 | 0.512883 | `appconfig_kv_get` | ✅ **EXPECTED** | +| 2 | 0.450109 | `appconfig_account_list` | ❌ | +| 3 | 0.398684 | `appconfig_kv_set` | ❌ | +| 4 | 0.380614 | `appconfig_kv_delete` | ❌ | +| 5 | 0.346166 | `appconfig_kv_lock_set` | ❌ | --- -## Test 68 +## Test 73 **Expected Tool:** `appconfig_kv_get` **Prompt:** Show the content for the key in App Configuration store @@ -1621,12 +1716,12 @@ | 1 | 0.552300 | `appconfig_kv_get` | ✅ **EXPECTED** | | 2 | 0.448912 | `appconfig_kv_set` | ❌ | | 3 | 0.441713 | `appconfig_kv_delete` | ❌ | -| 4 | 0.437432 | `appconfig_account_list` | ❌ | +| 4 | 0.437745 | `appconfig_account_list` | ❌ | | 5 | 0.416264 | `appconfig_kv_lock_set` | ❌ | --- -## Test 69 +## Test 74 **Expected Tool:** `appconfig_kv_lock_set` **Prompt:** Lock the key in App Configuration store @@ -1635,15 +1730,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.591237 | `appconfig_kv_lock_set` | ✅ **EXPECTED** | -| 2 | 0.487174 | `appconfig_kv_get` | ❌ | -| 3 | 0.445551 | `appconfig_kv_set` | ❌ | -| 4 | 0.431516 | `appconfig_kv_delete` | ❌ | -| 5 | 0.373656 | `appconfig_account_list` | ❌ | +| 1 | 0.591253 | `appconfig_kv_lock_set` | ✅ **EXPECTED** | +| 2 | 0.487221 | `appconfig_kv_get` | ❌ | +| 3 | 0.445541 | `appconfig_kv_set` | ❌ | +| 4 | 0.431462 | `appconfig_kv_delete` | ❌ | +| 5 | 0.373617 | `appconfig_account_list` | ❌ | --- -## Test 70 +## Test 75 **Expected Tool:** `appconfig_kv_lock_set` **Prompt:** Unlock the key in App Configuration store @@ -1654,13 +1749,13 @@ |------|-------|------|--------| | 1 | 0.555699 | `appconfig_kv_lock_set` | ✅ **EXPECTED** | | 2 | 0.505681 | `appconfig_kv_get` | ❌ | -| 3 | 0.476496 | `appconfig_kv_delete` | ❌ | +| 3 | 0.476497 | `appconfig_kv_delete` | ❌ | | 4 | 0.425488 | `appconfig_kv_set` | ❌ | -| 5 | 0.409406 | `appconfig_account_list` | ❌ | +| 5 | 0.409649 | `appconfig_account_list` | ❌ | --- -## Test 71 +## Test 76 **Expected Tool:** `appconfig_kv_set` **Prompt:** Set the key in App Configuration store to @@ -1673,11 +1768,11 @@ | 2 | 0.536497 | `appconfig_kv_lock_set` | ❌ | | 3 | 0.512707 | `appconfig_kv_get` | ❌ | | 4 | 0.505571 | `appconfig_kv_delete` | ❌ | -| 5 | 0.377919 | `appconfig_account_list` | ❌ | +| 5 | 0.378223 | `appconfig_account_list` | ❌ | --- -## Test 72 +## Test 77 **Expected Tool:** `applens_resource_diagnose` **Prompt:** Please help me diagnose issues with my app using app lens @@ -1687,14 +1782,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.595632 | `applens_resource_diagnose` | ✅ **EXPECTED** | -| 2 | 0.336090 | `deploy_app_logs_get` | ❌ | -| 3 | 0.300617 | `deploy_architecture_diagram_generate` | ❌ | -| 4 | 0.273082 | `cloudarchitect_design` | ❌ | +| 2 | 0.335768 | `deploy_app_logs_get` | ❌ | +| 3 | 0.300786 | `deploy_architecture_diagram_generate` | ❌ | +| 4 | 0.273083 | `cloudarchitect_design` | ❌ | | 5 | 0.254473 | `monitor_resource_log_query` | ❌ | --- -## Test 73 +## Test 78 **Expected Tool:** `applens_resource_diagnose` **Prompt:** Use app lens to check why my app is slow? @@ -1704,14 +1799,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.502361 | `applens_resource_diagnose` | ✅ **EXPECTED** | -| 2 | 0.316297 | `deploy_app_logs_get` | ❌ | -| 3 | 0.255757 | `deploy_architecture_diagram_generate` | ❌ | +| 2 | 0.316002 | `deploy_app_logs_get` | ❌ | +| 3 | 0.255570 | `deploy_architecture_diagram_generate` | ❌ | | 4 | 0.249583 | `monitor_resource_log_query` | ❌ | -| 5 | 0.226092 | `quota_usage_check` | ❌ | +| 5 | 0.226030 | `quota_usage_check` | ❌ | --- -## Test 74 +## Test 79 **Expected Tool:** `applens_resource_diagnose` **Prompt:** What does app lens say is wrong with my service? @@ -1721,14 +1816,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.492820 | `applens_resource_diagnose` | ✅ **EXPECTED** | -| 2 | 0.256753 | `deploy_architecture_diagram_generate` | ❌ | +| 2 | 0.256325 | `deploy_architecture_diagram_generate` | ❌ | | 3 | 0.242574 | `cloudarchitect_design` | ❌ | | 4 | 0.225608 | `resourcehealth_health-events_list` | ❌ | -| 5 | 0.211564 | `deploy_app_logs_get` | ❌ | +| 5 | 0.211260 | `deploy_app_logs_get` | ❌ | --- -## Test 75 +## Test 80 **Expected Tool:** `appservice_database_add` **Prompt:** Add database connection to my app service for database in resource group @@ -1737,15 +1832,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.717887 | `appservice_database_add` | ✅ **EXPECTED** | -| 2 | 0.401337 | `sql_db_rename` | ❌ | -| 3 | 0.399997 | `sql_db_create` | ❌ | -| 4 | 0.362889 | `sql_db_show` | ❌ | -| 5 | 0.357708 | `sql_db_list` | ❌ | +| 1 | 0.717878 | `appservice_database_add` | ✅ **EXPECTED** | +| 2 | 0.401376 | `sql_db_rename` | ❌ | +| 3 | 0.399941 | `sql_db_create` | ❌ | +| 4 | 0.362997 | `sql_db_show` | ❌ | +| 5 | 0.357919 | `sql_db_list` | ❌ | --- -## Test 76 +## Test 81 **Expected Tool:** `appservice_database_add` **Prompt:** Configure SQL Server database for app service with connection string in resource group @@ -1754,15 +1849,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.688364 | `appservice_database_add` | ✅ **EXPECTED** | -| 2 | 0.498175 | `sql_db_rename` | ❌ | -| 3 | 0.497711 | `sql_db_create` | ❌ | -| 4 | 0.469526 | `sql_db_show` | ❌ | -| 5 | 0.453040 | `sql_db_list` | ❌ | +| 1 | 0.688410 | `appservice_database_add` | ✅ **EXPECTED** | +| 2 | 0.498122 | `sql_db_rename` | ❌ | +| 3 | 0.497502 | `sql_db_create` | ❌ | +| 4 | 0.469326 | `sql_db_show` | ❌ | +| 5 | 0.452937 | `sql_db_list` | ❌ | --- -## Test 77 +## Test 82 **Expected Tool:** `appservice_database_add` **Prompt:** Add MySQL database to app service using connection in resource group @@ -1771,15 +1866,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.675548 | `appservice_database_add` | ✅ **EXPECTED** | -| 2 | 0.465376 | `sql_db_create` | ❌ | -| 3 | 0.452528 | `sql_db_rename` | ❌ | -| 4 | 0.433256 | `mysql_server_list` | ❌ | -| 5 | 0.410221 | `sql_db_show` | ❌ | +| 1 | 0.675970 | `appservice_database_add` | ✅ **EXPECTED** | +| 2 | 0.464756 | `sql_db_create` | ❌ | +| 3 | 0.452407 | `sql_db_rename` | ❌ | +| 4 | 0.432948 | `mysql_server_list` | ❌ | +| 5 | 0.410292 | `sql_db_show` | ❌ | --- -## Test 78 +## Test 83 **Expected Tool:** `appservice_database_add` **Prompt:** Add PostgreSQL database to app service using connection in resource group @@ -1788,15 +1883,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.627847 | `appservice_database_add` | ✅ **EXPECTED** | -| 2 | 0.444822 | `sql_db_create` | ❌ | -| 3 | 0.404711 | `postgres_database_query` | ❌ | -| 4 | 0.401105 | `postgres_database_list` | ❌ | -| 5 | 0.400866 | `sql_db_rename` | ❌ | +| 1 | 0.628119 | `appservice_database_add` | ✅ **EXPECTED** | +| 2 | 0.444212 | `sql_db_create` | ❌ | +| 3 | 0.405314 | `postgres_database_query` | ❌ | +| 4 | 0.401117 | `postgres_database_list` | ❌ | +| 5 | 0.400767 | `sql_db_rename` | ❌ | --- -## Test 79 +## Test 84 **Expected Tool:** `appservice_database_add` **Prompt:** Connect CosmosDB database using connection string to app service in resource group @@ -1805,15 +1900,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.663498 | `appservice_database_add` | ✅ **EXPECTED** | -| 2 | 0.446339 | `cosmos_database_list` | ❌ | -| 3 | 0.441990 | `cosmos_database_container_item_query` | ❌ | -| 4 | 0.427167 | `cosmos_database_container_list` | ❌ | -| 5 | 0.420405 | `sql_db_rename` | ❌ | +| 1 | 0.663086 | `appservice_database_add` | ✅ **EXPECTED** | +| 2 | 0.446465 | `cosmos_database_list` | ❌ | +| 3 | 0.441966 | `cosmos_database_container_item_query` | ❌ | +| 4 | 0.427284 | `cosmos_database_container_list` | ❌ | +| 5 | 0.420488 | `sql_db_rename` | ❌ | --- -## Test 80 +## Test 85 **Expected Tool:** `appservice_database_add` **Prompt:** Add database connection for database on server to app service in resource group @@ -1822,15 +1917,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.733775 | `appservice_database_add` | ✅ **EXPECTED** | -| 2 | 0.454433 | `sql_db_create` | ❌ | -| 3 | 0.415274 | `sql_db_rename` | ❌ | +| 1 | 0.733852 | `appservice_database_add` | ✅ **EXPECTED** | +| 2 | 0.454554 | `sql_db_create` | ❌ | +| 3 | 0.415271 | `sql_db_rename` | ❌ | | 4 | 0.414045 | `sql_server_create` | ❌ | -| 5 | 0.410100 | `sql_db_list` | ❌ | +| 5 | 0.410260 | `sql_db_list` | ❌ | --- -## Test 81 +## Test 86 **Expected Tool:** `appservice_database_add` **Prompt:** Add database connection string for to app service using connection string in resource group @@ -1839,15 +1934,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.746379 | `appservice_database_add` | ✅ **EXPECTED** | -| 2 | 0.441584 | `sql_db_rename` | ❌ | -| 3 | 0.434079 | `sql_db_create` | ❌ | -| 4 | 0.391000 | `sql_db_list` | ❌ | -| 5 | 0.389995 | `sql_db_show` | ❌ | +| 1 | 0.746766 | `appservice_database_add` | ✅ **EXPECTED** | +| 2 | 0.441682 | `sql_db_rename` | ❌ | +| 3 | 0.434020 | `sql_db_create` | ❌ | +| 4 | 0.391311 | `sql_db_list` | ❌ | +| 5 | 0.390014 | `sql_db_show` | ❌ | --- -## Test 82 +## Test 87 **Expected Tool:** `appservice_database_add` **Prompt:** Connect database to my app service using connection string in resource group @@ -1856,15 +1951,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.680525 | `appservice_database_add` | ✅ **EXPECTED** | -| 2 | 0.429291 | `sql_db_rename` | ❌ | -| 3 | 0.406599 | `sql_db_create` | ❌ | -| 4 | 0.396524 | `sql_db_show` | ❌ | -| 5 | 0.391416 | `sql_db_list` | ❌ | +| 1 | 0.680503 | `appservice_database_add` | ✅ **EXPECTED** | +| 2 | 0.429273 | `sql_db_rename` | ❌ | +| 3 | 0.406267 | `sql_db_create` | ❌ | +| 4 | 0.396537 | `sql_db_show` | ❌ | +| 5 | 0.391409 | `sql_db_list` | ❌ | --- -## Test 83 +## Test 88 **Expected Tool:** `appservice_database_add` **Prompt:** Set up database for app service with connection string under resource group @@ -1873,15 +1968,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.640622 | `appservice_database_add` | ✅ **EXPECTED** | -| 2 | 0.456508 | `sql_db_create` | ❌ | -| 3 | 0.402651 | `sql_db_rename` | ❌ | -| 4 | 0.402081 | `sql_db_show` | ❌ | -| 5 | 0.394177 | `sql_db_list` | ❌ | +| 1 | 0.640738 | `appservice_database_add` | ✅ **EXPECTED** | +| 2 | 0.456785 | `sql_db_create` | ❌ | +| 3 | 0.402668 | `sql_db_rename` | ❌ | +| 4 | 0.401985 | `sql_db_show` | ❌ | +| 5 | 0.394072 | `sql_db_list` | ❌ | --- -## Test 84 +## Test 89 **Expected Tool:** `appservice_database_add` **Prompt:** Configure database for app service with the connection string in resource group @@ -1890,15 +1985,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.688347 | `appservice_database_add` | ✅ **EXPECTED** | -| 2 | 0.449170 | `sql_db_rename` | ❌ | -| 3 | 0.448369 | `sql_db_create` | ❌ | +| 1 | 0.688527 | `appservice_database_add` | ✅ **EXPECTED** | +| 2 | 0.449176 | `sql_db_rename` | ❌ | +| 3 | 0.448382 | `sql_db_create` | ❌ | | 4 | 0.414329 | `sql_db_show` | ❌ | -| 5 | 0.411724 | `sql_db_list` | ❌ | +| 5 | 0.411782 | `sql_db_list` | ❌ | --- -## Test 85 +## Test 90 **Expected Tool:** `applicationinsights_recommendation_list` **Prompt:** List code optimization recommendations across my Application Insights components @@ -1908,14 +2003,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.572473 | `applicationinsights_recommendation_list` | ✅ **EXPECTED** | -| 2 | 0.445157 | `get_bestpractices_get` | ❌ | -| 3 | 0.390549 | `azureterraformbestpractices_get` | ❌ | -| 4 | 0.383948 | `applens_resource_diagnose` | ❌ | -| 5 | 0.375286 | `deploy_iac_rules_get` | ❌ | +| 2 | 0.454559 | `azureaibestpractices_get` | ❌ | +| 3 | 0.445157 | `get_bestpractices_get` | ❌ | +| 4 | 0.390478 | `azureterraformbestpractices_get` | ❌ | +| 5 | 0.383948 | `applens_resource_diagnose` | ❌ | --- -## Test 86 +## Test 91 **Expected Tool:** `applicationinsights_recommendation_list` **Prompt:** Show me code optimization recommendations for all Application Insights resources in my subscription @@ -1925,14 +2020,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.696531 | `applicationinsights_recommendation_list` | ✅ **EXPECTED** | -| 2 | 0.468384 | `get_bestpractices_get` | ❌ | -| 3 | 0.452231 | `applens_resource_diagnose` | ❌ | -| 4 | 0.435281 | `azureterraformbestpractices_get` | ❌ | -| 5 | 0.424622 | `search_service_list` | ❌ | +| 2 | 0.506351 | `azureaibestpractices_get` | ❌ | +| 3 | 0.468384 | `get_bestpractices_get` | ❌ | +| 4 | 0.452231 | `applens_resource_diagnose` | ❌ | +| 5 | 0.435241 | `azureterraformbestpractices_get` | ❌ | --- -## Test 87 +## Test 92 **Expected Tool:** `applicationinsights_recommendation_list` **Prompt:** List profiler recommendations for Application Insights in resource group @@ -1943,13 +2038,13 @@ |------|-------|------|--------| | 1 | 0.626722 | `applicationinsights_recommendation_list` | ✅ **EXPECTED** | | 2 | 0.488002 | `loadtesting_testresource_list` | ❌ | -| 3 | 0.479416 | `mysql_server_list` | ❌ | +| 3 | 0.479392 | `mysql_server_list` | ❌ | | 4 | 0.477396 | `applens_resource_diagnose` | ❌ | -| 5 | 0.469150 | `resourcehealth_availability-status_list` | ❌ | +| 5 | 0.468847 | `resourcehealth_availability-status_list` | ❌ | --- -## Test 88 +## Test 93 **Expected Tool:** `applicationinsights_recommendation_list` **Prompt:** Show me performance improvement recommendations from Application Insights @@ -1958,15 +2053,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.509502 | `applicationinsights_recommendation_list` | ✅ **EXPECTED** | -| 2 | 0.419670 | `applens_resource_diagnose` | ❌ | -| 3 | 0.383767 | `get_bestpractices_get` | ❌ | -| 4 | 0.367260 | `deploy_architecture_diagram_generate` | ❌ | -| 5 | 0.343931 | `cloudarchitect_design` | ❌ | +| 1 | 0.509615 | `applicationinsights_recommendation_list` | ✅ **EXPECTED** | +| 2 | 0.433835 | `azureaibestpractices_get` | ❌ | +| 3 | 0.419699 | `applens_resource_diagnose` | ❌ | +| 4 | 0.383861 | `get_bestpractices_get` | ❌ | +| 5 | 0.367317 | `deploy_architecture_diagram_generate` | ❌ | --- -## Test 89 +## Test 94 **Expected Tool:** `extension_cli_generate` **Prompt:** Create a Storage account with name using Azure CLI @@ -1975,15 +2070,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.593242 | `storage_account_create` | ❌ | +| 1 | 0.593241 | `storage_account_create` | ❌ | | 2 | 0.564940 | `storage_blob_container_create` | ❌ | -| 3 | 0.493609 | `storage_account_get` | ❌ | -| 4 | 0.474399 | `storage_blob_container_get` | ❌ | -| 5 | 0.454194 | `managedlustre_fs_create` | ❌ | +| 3 | 0.493684 | `storage_account_get` | ❌ | +| 4 | 0.473547 | `storage_blob_container_get` | ❌ | +| 5 | 0.456428 | `managedlustre_fs_create` | ❌ | --- -## Test 90 +## Test 95 **Expected Tool:** `extension_cli_generate` **Prompt:** List all virtual machines in my subscription using Azure CLI @@ -1992,15 +2087,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.593467 | `search_service_list` | ❌ | +| 1 | 0.592102 | `search_service_list` | ❌ | | 2 | 0.575274 | `kusto_cluster_list` | ❌ | -| 3 | 0.549966 | `virtualdesktop_hostpool_list` | ❌ | -| 4 | 0.544412 | `monitor_workspace_list` | ❌ | +| 3 | 0.549918 | `virtualdesktop_hostpool_list` | ❌ | +| 4 | 0.544688 | `monitor_workspace_list` | ❌ | | 5 | 0.536238 | `subscription_list` | ❌ | --- -## Test 91 +## Test 96 **Expected Tool:** `extension_cli_generate` **Prompt:** Show me the details of the storage account with Azure CLI commands @@ -2009,15 +2104,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.710155 | `storage_account_get` | ❌ | -| 2 | 0.602173 | `storage_blob_container_get` | ❌ | +| 1 | 0.710307 | `storage_account_get` | ❌ | +| 2 | 0.601571 | `storage_blob_container_get` | ❌ | | 3 | 0.543268 | `storage_blob_get` | ❌ | | 4 | 0.519788 | `storage_account_create` | ❌ | | 5 | 0.493145 | `cosmos_account_list` | ❌ | --- -## Test 92 +## Test 97 **Expected Tool:** `extension_cli_install` **Prompt:** @@ -2026,15 +2121,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.479590 | `extension_cli_install` | ✅ **EXPECTED** | -| 2 | 0.473266 | `extension_cli_generate` | ❌ | -| 3 | 0.389369 | `azureterraformbestpractices_get` | ❌ | -| 4 | 0.382389 | `deploy_plan_get` | ❌ | -| 5 | 0.366012 | `get_bestpractices_get` | ❌ | +| 1 | 0.479652 | `extension_cli_install` | ✅ **EXPECTED** | +| 2 | 0.473369 | `extension_cli_generate` | ❌ | +| 3 | 0.389405 | `azureterraformbestpractices_get` | ❌ | +| 4 | 0.382473 | `deploy_plan_get` | ❌ | +| 5 | 0.366067 | `get_bestpractices_get` | ❌ | --- -## Test 93 +## Test 98 **Expected Tool:** `extension_cli_install` **Prompt:** How to install azd @@ -2044,14 +2139,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.460416 | `extension_cli_install` | ✅ **EXPECTED** | -| 2 | 0.429599 | `deploy_app_logs_get` | ❌ | +| 2 | 0.429269 | `deploy_app_logs_get` | ❌ | | 3 | 0.365212 | `deploy_iac_rules_get` | ❌ | | 4 | 0.335279 | `deploy_plan_get` | ❌ | -| 5 | 0.326135 | `deploy_pipeline_guidance_get` | ❌ | +| 5 | 0.326165 | `deploy_pipeline_guidance_get` | ❌ | --- -## Test 94 +## Test 99 **Expected Tool:** `extension_cli_install` **Prompt:** What is Azure Functions Core tools and how to install it @@ -2060,15 +2155,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.622705 | `extension_cli_install` | ✅ **EXPECTED** | -| 2 | 0.439474 | `get_bestpractices_get` | ❌ | -| 3 | 0.432913 | `deploy_pipeline_guidance_get` | ❌ | -| 4 | 0.430483 | `extension_cli_generate` | ❌ | -| 5 | 0.418161 | `deploy_plan_get` | ❌ | +| 1 | 0.622670 | `extension_cli_install` | ✅ **EXPECTED** | +| 2 | 0.439414 | `get_bestpractices_get` | ❌ | +| 3 | 0.432859 | `deploy_pipeline_guidance_get` | ❌ | +| 4 | 0.430682 | `extension_cli_generate` | ❌ | +| 5 | 0.418085 | `deploy_plan_get` | ❌ | --- -## Test 95 +## Test 100 **Expected Tool:** `acr_registry_list` **Prompt:** List all Azure Container Registries in my subscription @@ -2080,12 +2175,12 @@ | 1 | 0.743568 | `acr_registry_list` | ✅ **EXPECTED** | | 2 | 0.711580 | `acr_registry_repository_list` | ❌ | | 3 | 0.585675 | `kusto_cluster_list` | ❌ | -| 4 | 0.541506 | `search_service_list` | ❌ | +| 4 | 0.540241 | `search_service_list` | ❌ | | 5 | 0.514293 | `cosmos_account_list` | ❌ | --- -## Test 96 +## Test 101 **Expected Tool:** `acr_registry_list` **Prompt:** Show me my Azure Container Registries @@ -2096,13 +2191,13 @@ |------|-------|------|--------| | 1 | 0.586014 | `acr_registry_list` | ✅ **EXPECTED** | | 2 | 0.563636 | `acr_registry_repository_list` | ❌ | -| 3 | 0.460544 | `storage_blob_container_get` | ❌ | +| 3 | 0.460834 | `storage_blob_container_get` | ❌ | | 4 | 0.415552 | `cosmos_database_container_list` | ❌ | | 5 | 0.402247 | `redis_list` | ❌ | --- -## Test 97 +## Test 102 **Expected Tool:** `acr_registry_list` **Prompt:** Show me the container registries in my subscription @@ -2114,12 +2209,12 @@ | 1 | 0.637130 | `acr_registry_list` | ✅ **EXPECTED** | | 2 | 0.563476 | `acr_registry_repository_list` | ❌ | | 3 | 0.516769 | `kusto_cluster_list` | ❌ | -| 4 | 0.515378 | `storage_blob_container_get` | ❌ | +| 4 | 0.515365 | `storage_blob_container_get` | ❌ | | 5 | 0.480352 | `redis_list` | ❌ | --- -## Test 98 +## Test 103 **Expected Tool:** `acr_registry_list` **Prompt:** List container registries in resource group @@ -2130,13 +2225,13 @@ |------|-------|------|--------| | 1 | 0.654318 | `acr_registry_repository_list` | ❌ | | 2 | 0.633938 | `acr_registry_list` | ✅ **EXPECTED** | -| 3 | 0.476043 | `mysql_server_list` | ❌ | +| 3 | 0.476015 | `mysql_server_list` | ❌ | | 4 | 0.454929 | `group_list` | ❌ | | 5 | 0.454003 | `datadog_monitoredresources_list` | ❌ | --- -## Test 99 +## Test 104 **Expected Tool:** `acr_registry_list` **Prompt:** Show me the container registries in resource group @@ -2147,13 +2242,13 @@ |------|-------|------|--------| | 1 | 0.639391 | `acr_registry_list` | ✅ **EXPECTED** | | 2 | 0.637972 | `acr_registry_repository_list` | ❌ | -| 3 | 0.468078 | `mysql_server_list` | ❌ | +| 3 | 0.468028 | `mysql_server_list` | ❌ | | 4 | 0.449649 | `datadog_monitoredresources_list` | ❌ | | 5 | 0.445741 | `group_list` | ❌ | --- -## Test 100 +## Test 105 **Expected Tool:** `acr_registry_repository_list` **Prompt:** List all container registry repositories in my subscription @@ -2165,12 +2260,12 @@ | 1 | 0.626482 | `acr_registry_repository_list` | ✅ **EXPECTED** | | 2 | 0.617504 | `acr_registry_list` | ❌ | | 3 | 0.544172 | `kusto_cluster_list` | ❌ | -| 4 | 0.508483 | `storage_blob_container_get` | ❌ | -| 5 | 0.495526 | `postgres_server_list` | ❌ | +| 4 | 0.508863 | `storage_blob_container_get` | ❌ | +| 5 | 0.495567 | `postgres_server_list` | ❌ | --- -## Test 101 +## Test 106 **Expected Tool:** `acr_registry_repository_list` **Prompt:** Show me my container registry repositories @@ -2179,15 +2274,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.546333 | `acr_registry_repository_list` | ✅ **EXPECTED** | +| 1 | 0.546334 | `acr_registry_repository_list` | ✅ **EXPECTED** | | 2 | 0.469295 | `acr_registry_list` | ❌ | -| 3 | 0.451083 | `storage_blob_container_get` | ❌ | +| 3 | 0.451973 | `storage_blob_container_get` | ❌ | | 4 | 0.407973 | `cosmos_database_container_list` | ❌ | | 5 | 0.373464 | `storage_blob_get` | ❌ | --- -## Test 102 +## Test 107 **Expected Tool:** `acr_registry_repository_list` **Prompt:** List repositories in the container registry @@ -2198,13 +2293,13 @@ |------|-------|------|--------| | 1 | 0.674296 | `acr_registry_repository_list` | ✅ **EXPECTED** | | 2 | 0.541779 | `acr_registry_list` | ❌ | -| 3 | 0.437509 | `storage_blob_container_get` | ❌ | +| 3 | 0.437756 | `storage_blob_container_get` | ❌ | | 4 | 0.433927 | `cosmos_database_container_list` | ❌ | -| 5 | 0.383201 | `kusto_database_list` | ❌ | +| 5 | 0.383001 | `kusto_database_list` | ❌ | --- -## Test 103 +## Test 108 **Expected Tool:** `acr_registry_repository_list` **Prompt:** Show me the repositories in the container registry @@ -2215,13 +2310,13 @@ |------|-------|------|--------| | 1 | 0.600780 | `acr_registry_repository_list` | ✅ **EXPECTED** | | 2 | 0.501842 | `acr_registry_list` | ❌ | -| 3 | 0.430880 | `storage_blob_container_get` | ❌ | +| 3 | 0.431148 | `storage_blob_container_get` | ❌ | | 4 | 0.418623 | `cosmos_database_container_list` | ❌ | | 5 | 0.378151 | `redis_list` | ❌ | --- -## Test 104 +## Test 109 **Expected Tool:** `communication_email_send` **Prompt:** Send an email to with subject @@ -2230,15 +2325,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.498292 | `communication_email_send` | ✅ **EXPECTED** | -| 2 | 0.226847 | `communication_sms_send` | ❌ | +| 1 | 0.498396 | `communication_email_send` | ✅ **EXPECTED** | +| 2 | 0.229071 | `communication_sms_send` | ❌ | | 3 | 0.188975 | `eventgrid_events_publish` | ❌ | -| 4 | 0.145951 | `servicebus_topic_details` | ❌ | -| 5 | 0.142099 | `foundry_openai_chat-completions-create` | ❌ | +| 4 | 0.161257 | `foundry_agents_create` | ❌ | +| 5 | 0.146045 | `servicebus_topic_details` | ❌ | --- -## Test 105 +## Test 110 **Expected Tool:** `communication_email_send` **Prompt:** Send an email from my communication service to @@ -2247,15 +2342,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.498406 | `communication_email_send` | ✅ **EXPECTED** | -| 2 | 0.313058 | `communication_sms_send` | ❌ | -| 3 | 0.235127 | `foundry_openai_chat-completions-create` | ❌ | -| 4 | 0.211154 | `search_knowledge_base_retrieve` | ❌ | -| 5 | 0.194094 | `speech_stt_recognize` | ❌ | +| 1 | 0.498459 | `communication_email_send` | ✅ **EXPECTED** | +| 2 | 0.314408 | `communication_sms_send` | ❌ | +| 3 | 0.235110 | `foundry_openai_chat-completions-create` | ❌ | +| 4 | 0.211067 | `search_knowledge_base_retrieve` | ❌ | +| 5 | 0.210014 | `foundry_agents_create` | ❌ | --- -## Test 106 +## Test 111 **Expected Tool:** `communication_email_send` **Prompt:** Send HTML-formatted email to with subject @@ -2264,15 +2359,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.520967 | `communication_email_send` | ✅ **EXPECTED** | -| 2 | 0.205130 | `communication_sms_send` | ❌ | +| 1 | 0.521087 | `communication_email_send` | ✅ **EXPECTED** | +| 2 | 0.207644 | `communication_sms_send` | ❌ | | 3 | 0.152418 | `eventgrid_events_publish` | ❌ | -| 4 | 0.152013 | `servicebus_topic_details` | ❌ | +| 4 | 0.152056 | `servicebus_topic_details` | ❌ | | 5 | 0.143660 | `foundry_agents_evaluate` | ❌ | --- -## Test 107 +## Test 112 **Expected Tool:** `communication_email_send` **Prompt:** Send email with CC to and @@ -2281,15 +2376,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.533447 | `communication_email_send` | ✅ **EXPECTED** | -| 2 | 0.217412 | `communication_sms_send` | ❌ | -| 3 | 0.106026 | `foundry_agents_query-and-evaluate` | ❌ | +| 1 | 0.533532 | `communication_email_send` | ✅ **EXPECTED** | +| 2 | 0.219566 | `communication_sms_send` | ❌ | +| 3 | 0.106042 | `foundry_agents_query-and-evaluate` | ❌ | | 4 | 0.103723 | `foundry_openai_chat-completions-create` | ❌ | | 5 | 0.084905 | `cosmos_account_list` | ❌ | --- -## Test 108 +## Test 113 **Expected Tool:** `communication_email_send` **Prompt:** Send email to multiple recipients: , @@ -2298,15 +2393,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.540792 | `communication_email_send` | ✅ **EXPECTED** | -| 2 | 0.241620 | `communication_sms_send` | ❌ | -| 3 | 0.134975 | `foundry_openai_chat-completions-create` | ❌ | -| 4 | 0.114324 | `foundry_agents_query-and-evaluate` | ❌ | -| 5 | 0.087063 | `postgres_server_param_set` | ❌ | +| 1 | 0.540910 | `communication_email_send` | ✅ **EXPECTED** | +| 2 | 0.244525 | `communication_sms_send` | ❌ | +| 3 | 0.134996 | `foundry_openai_chat-completions-create` | ❌ | +| 4 | 0.114359 | `foundry_agents_query-and-evaluate` | ❌ | +| 5 | 0.087005 | `postgres_server_param_set` | ❌ | --- -## Test 109 +## Test 114 **Expected Tool:** `communication_email_send` **Prompt:** Send email with reply-to address set to @@ -2315,15 +2410,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.512623 | `communication_email_send` | ✅ **EXPECTED** | -| 2 | 0.198552 | `communication_sms_send` | ❌ | -| 3 | 0.164115 | `mysql_server_param_set` | ❌ | +| 1 | 0.512721 | `communication_email_send` | ✅ **EXPECTED** | +| 2 | 0.200189 | `communication_sms_send` | ❌ | +| 3 | 0.164422 | `mysql_server_param_set` | ❌ | | 4 | 0.158759 | `postgres_server_param_set` | ❌ | | 5 | 0.143574 | `appconfig_kv_set` | ❌ | --- -## Test 110 +## Test 115 **Expected Tool:** `communication_email_send` **Prompt:** Send email with custom sender name @@ -2332,15 +2427,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.473175 | `communication_email_send` | ✅ **EXPECTED** | -| 2 | 0.253449 | `communication_sms_send` | ❌ | +| 1 | 0.473192 | `communication_email_send` | ✅ **EXPECTED** | +| 2 | 0.255124 | `communication_sms_send` | ❌ | | 3 | 0.164811 | `foundry_openai_chat-completions-create` | ❌ | -| 4 | 0.160393 | `foundry_openai_embeddings-create` | ❌ | -| 5 | 0.156871 | `cosmos_database_container_item_query` | ❌ | +| 4 | 0.160285 | `foundry_openai_embeddings-create` | ❌ | +| 5 | 0.156869 | `cosmos_database_container_item_query` | ❌ | --- -## Test 111 +## Test 116 **Expected Tool:** `communication_email_send` **Prompt:** Send an email with BCC recipients @@ -2349,15 +2444,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.528789 | `communication_email_send` | ✅ **EXPECTED** | -| 2 | 0.239846 | `communication_sms_send` | ❌ | -| 3 | 0.137565 | `confidentialledger_entries_append` | ❌ | -| 4 | 0.108725 | `confidentialledger_entries_get` | ❌ | +| 1 | 0.528899 | `communication_email_send` | ✅ **EXPECTED** | +| 2 | 0.241091 | `communication_sms_send` | ❌ | +| 3 | 0.137538 | `confidentialledger_entries_append` | ❌ | +| 4 | 0.108748 | `confidentialledger_entries_get` | ❌ | | 5 | 0.105033 | `storage_blob_upload` | ❌ | --- -## Test 112 +## Test 117 **Expected Tool:** `communication_sms_send` **Prompt:** Send an SMS message to saying "Hello" @@ -2366,15 +2461,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.533763 | `communication_sms_send` | ✅ **EXPECTED** | -| 2 | 0.251429 | `communication_email_send` | ❌ | +| 1 | 0.533822 | `communication_sms_send` | ✅ **EXPECTED** | +| 2 | 0.251480 | `communication_email_send` | ❌ | | 3 | 0.218656 | `foundry_openai_chat-completions-create` | ❌ | -| 4 | 0.148584 | `foundry_agents_connect` | ❌ | -| 5 | 0.148240 | `foundry_openai_create-completion` | ❌ | +| 4 | 0.175534 | `foundry_agents_create` | ❌ | +| 5 | 0.156040 | `foundry_threads_create` | ❌ | --- -## Test 113 +## Test 118 **Expected Tool:** `communication_sms_send` **Prompt:** Send SMS to from with message "Test message" @@ -2383,15 +2478,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.543875 | `communication_sms_send` | ✅ **EXPECTED** | -| 2 | 0.294603 | `communication_email_send` | ❌ | -| 3 | 0.204487 | `loadtesting_testrun_create` | ❌ | -| 4 | 0.200633 | `foundry_openai_chat-completions-create` | ❌ | -| 5 | 0.136763 | `loadtesting_testrun_update` | ❌ | +| 1 | 0.546006 | `communication_sms_send` | ✅ **EXPECTED** | +| 2 | 0.294912 | `communication_email_send` | ❌ | +| 3 | 0.204585 | `loadtesting_testrun_create` | ❌ | +| 4 | 0.200656 | `foundry_openai_chat-completions-create` | ❌ | +| 5 | 0.141105 | `foundry_agents_create` | ❌ | --- -## Test 114 +## Test 119 **Expected Tool:** `communication_sms_send` **Prompt:** Send SMS to multiple recipients: , @@ -2400,15 +2495,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.543753 | `communication_sms_send` | ✅ **EXPECTED** | -| 2 | 0.421988 | `communication_email_send` | ❌ | +| 1 | 0.545744 | `communication_sms_send` | ✅ **EXPECTED** | +| 2 | 0.422028 | `communication_email_send` | ❌ | | 3 | 0.186088 | `foundry_openai_chat-completions-create` | ❌ | -| 4 | 0.142030 | `foundry_agents_query-and-evaluate` | ❌ | -| 5 | 0.104124 | `search_knowledge_base_retrieve` | ❌ | +| 4 | 0.142054 | `foundry_agents_query-and-evaluate` | ❌ | +| 5 | 0.113722 | `foundry_threads_get-messages` | ❌ | --- -## Test 115 +## Test 120 **Expected Tool:** `communication_sms_send` **Prompt:** Send SMS with delivery reporting enabled @@ -2417,15 +2512,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.548617 | `communication_sms_send` | ✅ **EXPECTED** | -| 2 | 0.269080 | `communication_email_send` | ❌ | -| 3 | 0.192340 | `extension_azqr` | ❌ | +| 1 | 0.554917 | `communication_sms_send` | ✅ **EXPECTED** | +| 2 | 0.269203 | `communication_email_send` | ❌ | +| 3 | 0.191848 | `extension_azqr` | ❌ | | 4 | 0.185916 | `foundry_openai_chat-completions-create` | ❌ | -| 5 | 0.170726 | `foundry_agents_query-and-evaluate` | ❌ | +| 5 | 0.170749 | `foundry_agents_query-and-evaluate` | ❌ | --- -## Test 116 +## Test 121 **Expected Tool:** `communication_sms_send` **Prompt:** Send SMS message with custom tracking tag "campaign1" @@ -2434,15 +2529,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.534739 | `communication_sms_send` | ✅ **EXPECTED** | -| 2 | 0.269794 | `communication_email_send` | ❌ | +| 1 | 0.538893 | `communication_sms_send` | ✅ **EXPECTED** | +| 2 | 0.269915 | `communication_email_send` | ❌ | | 3 | 0.188153 | `loadtesting_testrun_create` | ❌ | | 4 | 0.185403 | `foundry_openai_chat-completions-create` | ❌ | -| 5 | 0.159177 | `appconfig_kv_set` | ❌ | +| 5 | 0.175135 | `foundry_agents_create` | ❌ | --- -## Test 117 +## Test 122 **Expected Tool:** `communication_sms_send` **Prompt:** Send broadcast SMS to and saying "Urgent notification" @@ -2451,15 +2546,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.471991 | `communication_sms_send` | ✅ **EXPECTED** | -| 2 | 0.286936 | `communication_email_send` | ❌ | -| 3 | 0.164059 | `foundry_agents_query-and-evaluate` | ❌ | -| 4 | 0.146501 | `foundry_openai_chat-completions-create` | ❌ | -| 5 | 0.128592 | `cosmos_account_list` | ❌ | +| 1 | 0.474775 | `communication_sms_send` | ✅ **EXPECTED** | +| 2 | 0.286381 | `communication_email_send` | ❌ | +| 3 | 0.164341 | `foundry_agents_query-and-evaluate` | ❌ | +| 4 | 0.147338 | `foundry_openai_chat-completions-create` | ❌ | +| 5 | 0.128704 | `cosmos_account_list` | ❌ | --- -## Test 118 +## Test 123 **Expected Tool:** `communication_sms_send` **Prompt:** Send SMS from my communication service to @@ -2468,15 +2563,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.563359 | `communication_sms_send` | ✅ **EXPECTED** | -| 2 | 0.302360 | `communication_email_send` | ❌ | -| 3 | 0.238341 | `foundry_openai_chat-completions-create` | ❌ | -| 4 | 0.183684 | `search_knowledge_base_retrieve` | ❌ | -| 5 | 0.174092 | `foundry_openai_create-completion` | ❌ | +| 1 | 0.564058 | `communication_sms_send` | ✅ **EXPECTED** | +| 2 | 0.302377 | `communication_email_send` | ❌ | +| 3 | 0.238340 | `foundry_openai_chat-completions-create` | ❌ | +| 4 | 0.184240 | `foundry_agents_create` | ❌ | +| 5 | 0.183684 | `search_knowledge_base_retrieve` | ❌ | --- -## Test 119 +## Test 124 **Expected Tool:** `communication_sms_send` **Prompt:** Send an SMS with delivery receipt tracking @@ -2485,15 +2580,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.592519 | `communication_sms_send` | ✅ **EXPECTED** | -| 2 | 0.314134 | `communication_email_send` | ❌ | -| 3 | 0.206916 | `foundry_agents_query-and-evaluate` | ❌ | +| 1 | 0.598236 | `communication_sms_send` | ✅ **EXPECTED** | +| 2 | 0.314267 | `communication_email_send` | ❌ | +| 3 | 0.206931 | `foundry_agents_query-and-evaluate` | ❌ | | 4 | 0.201142 | `foundry_openai_chat-completions-create` | ❌ | -| 5 | 0.187793 | `confidentialledger_entries_append` | ❌ | +| 5 | 0.187824 | `confidentialledger_entries_append` | ❌ | --- -## Test 120 +## Test 125 **Expected Tool:** `confidentialledger_entries_append` **Prompt:** Append an entry to my ledger with data {"key": "value"} @@ -2502,15 +2597,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.510689 | `confidentialledger_entries_append` | ✅ **EXPECTED** | -| 2 | 0.293736 | `confidentialledger_entries_get` | ❌ | -| 3 | 0.292014 | `appconfig_kv_set` | ❌ | -| 4 | 0.258967 | `appconfig_kv_lock_set` | ❌ | -| 5 | 0.249908 | `keyvault_certificate_import` | ❌ | +| 1 | 0.511241 | `confidentialledger_entries_append` | ✅ **EXPECTED** | +| 2 | 0.295319 | `confidentialledger_entries_get` | ❌ | +| 3 | 0.291757 | `appconfig_kv_set` | ❌ | +| 4 | 0.258741 | `appconfig_kv_lock_set` | ❌ | +| 5 | 0.250106 | `keyvault_certificate_import` | ❌ | --- -## Test 121 +## Test 126 **Expected Tool:** `confidentialledger_entries_append` **Prompt:** Write a tamper-proof entry to ledger containing {"transaction": "data"} @@ -2519,15 +2614,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.602257 | `confidentialledger_entries_append` | ✅ **EXPECTED** | -| 2 | 0.356510 | `confidentialledger_entries_get` | ❌ | -| 3 | 0.211990 | `appconfig_kv_lock_set` | ❌ | -| 4 | 0.195471 | `keyvault_secret_create` | ❌ | -| 5 | 0.184077 | `keyvault_certificate_import` | ❌ | +| 1 | 0.602321 | `confidentialledger_entries_append` | ✅ **EXPECTED** | +| 2 | 0.357401 | `confidentialledger_entries_get` | ❌ | +| 3 | 0.211998 | `appconfig_kv_lock_set` | ❌ | +| 4 | 0.195461 | `keyvault_secret_create` | ❌ | +| 5 | 0.184070 | `keyvault_certificate_import` | ❌ | --- -## Test 122 +## Test 127 **Expected Tool:** `confidentialledger_entries_append` **Prompt:** Append {"hello": "from mcp"} to my confidential ledger in collection @@ -2536,15 +2631,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.546573 | `confidentialledger_entries_append` | ✅ **EXPECTED** | -| 2 | 0.451031 | `confidentialledger_entries_get` | ❌ | -| 3 | 0.224978 | `appconfig_kv_lock_set` | ❌ | -| 4 | 0.215862 | `appconfig_kv_set` | ❌ | -| 5 | 0.203109 | `keyvault_certificate_import` | ❌ | +| 1 | 0.546786 | `confidentialledger_entries_append` | ✅ **EXPECTED** | +| 2 | 0.452117 | `confidentialledger_entries_get` | ❌ | +| 3 | 0.225013 | `appconfig_kv_lock_set` | ❌ | +| 4 | 0.215828 | `appconfig_kv_set` | ❌ | +| 5 | 0.203162 | `keyvault_certificate_import` | ❌ | --- -## Test 123 +## Test 128 **Expected Tool:** `confidentialledger_entries_append` **Prompt:** Create an immutable ledger entry in with content {"audit": "log"} @@ -2553,15 +2648,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.496032 | `confidentialledger_entries_append` | ✅ **EXPECTED** | -| 2 | 0.338270 | `confidentialledger_entries_get` | ❌ | -| 3 | 0.218518 | `monitor_activitylog_list` | ❌ | +| 1 | 0.496023 | `confidentialledger_entries_append` | ✅ **EXPECTED** | +| 2 | 0.340187 | `confidentialledger_entries_get` | ❌ | +| 3 | 0.218473 | `monitor_activitylog_list` | ❌ | | 4 | 0.215229 | `storage_blob_container_create` | ❌ | | 5 | 0.204925 | `monitor_resource_log_query` | ❌ | --- -## Test 124 +## Test 129 **Expected Tool:** `confidentialledger_entries_append` **Prompt:** Write an entry to confidential ledger @@ -2570,15 +2665,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.622097 | `confidentialledger_entries_append` | ✅ **EXPECTED** | -| 2 | 0.523205 | `confidentialledger_entries_get` | ❌ | +| 1 | 0.622138 | `confidentialledger_entries_append` | ✅ **EXPECTED** | +| 2 | 0.524777 | `confidentialledger_entries_get` | ❌ | | 3 | 0.252508 | `appconfig_kv_lock_set` | ❌ | | 4 | 0.240252 | `keyvault_secret_create` | ❌ | | 5 | 0.186890 | `appconfig_kv_set` | ❌ | --- -## Test 125 +## Test 130 **Expected Tool:** `confidentialledger_entries_get` **Prompt:** Get entry from Confidential Ledger for transaction on ledger @@ -2587,15 +2682,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.706506 | `confidentialledger_entries_get` | ✅ **EXPECTED** | -| 2 | 0.551901 | `confidentialledger_entries_append` | ❌ | -| 3 | 0.245541 | `keyvault_secret_get` | ❌ | -| 4 | 0.229943 | `keyvault_key_get` | ❌ | -| 5 | 0.212658 | `loadtesting_testrun_get` | ❌ | +| 1 | 0.707252 | `confidentialledger_entries_get` | ✅ **EXPECTED** | +| 2 | 0.551953 | `confidentialledger_entries_append` | ❌ | +| 3 | 0.245549 | `keyvault_secret_get` | ❌ | +| 4 | 0.231190 | `keyvault_key_get` | ❌ | +| 5 | 0.211839 | `loadtesting_testrun_get` | ❌ | --- -## Test 126 +## Test 131 **Expected Tool:** `confidentialledger_entries_get` **Prompt:** Get transaction from ledger @@ -2604,15 +2699,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.510283 | `confidentialledger_entries_get` | ✅ **EXPECTED** | -| 2 | 0.416550 | `confidentialledger_entries_append` | ❌ | -| 3 | 0.224523 | `loadtesting_testrun_get` | ❌ | +| 1 | 0.509714 | `confidentialledger_entries_get` | ✅ **EXPECTED** | +| 2 | 0.416580 | `confidentialledger_entries_append` | ❌ | +| 3 | 0.223959 | `loadtesting_testrun_get` | ❌ | | 4 | 0.218412 | `monitor_resource_log_query` | ❌ | | 5 | 0.217671 | `loadtesting_testrun_list` | ❌ | --- -## Test 127 +## Test 132 **Expected Tool:** `cosmos_account_list` **Prompt:** List all cosmosdb accounts in my subscription @@ -2629,7 +2724,7 @@ --- -## Test 128 +## Test 133 **Expected Tool:** `cosmos_account_list` **Prompt:** Show me my cosmosdb accounts @@ -2638,15 +2733,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.665447 | `cosmos_account_list` | ✅ **EXPECTED** | -| 2 | 0.605357 | `cosmos_database_list` | ❌ | -| 3 | 0.571613 | `cosmos_database_container_list` | ❌ | -| 4 | 0.549476 | `cosmos_database_container_item_query` | ❌ | -| 5 | 0.504032 | `storage_account_get` | ❌ | +| 1 | 0.665422 | `cosmos_account_list` | ✅ **EXPECTED** | +| 2 | 0.605325 | `cosmos_database_list` | ❌ | +| 3 | 0.571573 | `cosmos_database_container_list` | ❌ | +| 4 | 0.549420 | `cosmos_database_container_item_query` | ❌ | +| 5 | 0.503865 | `storage_account_get` | ❌ | --- -## Test 129 +## Test 134 **Expected Tool:** `cosmos_account_list` **Prompt:** Show me the cosmosdb accounts in my subscription @@ -2659,11 +2754,11 @@ | 2 | 0.607165 | `subscription_list` | ❌ | | 3 | 0.605125 | `cosmos_database_list` | ❌ | | 4 | 0.566249 | `cosmos_database_container_list` | ❌ | -| 5 | 0.563959 | `cosmos_database_container_item_query` | ❌ | +| 5 | 0.563922 | `cosmos_database_container_item_query` | ❌ | --- -## Test 130 +## Test 135 **Expected Tool:** `cosmos_database_container_item_query` **Prompt:** Show me the items that contain the word in the container in the database for the cosmosdb account @@ -2672,15 +2767,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.658738 | `cosmos_database_container_item_query` | ✅ **EXPECTED** | +| 1 | 0.658701 | `cosmos_database_container_item_query` | ✅ **EXPECTED** | | 2 | 0.605253 | `cosmos_database_container_list` | ❌ | -| 3 | 0.487612 | `storage_blob_container_get` | ❌ | +| 3 | 0.488353 | `storage_blob_container_get` | ❌ | | 4 | 0.477874 | `cosmos_database_list` | ❌ | | 5 | 0.447757 | `cosmos_account_list` | ❌ | --- -## Test 131 +## Test 136 **Expected Tool:** `cosmos_database_container_list` **Prompt:** List all the containers in the database for the cosmosdb account @@ -2689,15 +2784,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.852832 | `cosmos_database_container_list` | ✅ **EXPECTED** | -| 2 | 0.681044 | `cosmos_database_list` | ❌ | -| 3 | 0.680794 | `cosmos_database_container_item_query` | ❌ | -| 4 | 0.632335 | `storage_blob_container_get` | ❌ | -| 5 | 0.630659 | `cosmos_account_list` | ❌ | +| 1 | 0.852875 | `cosmos_database_container_list` | ✅ **EXPECTED** | +| 2 | 0.680991 | `cosmos_database_list` | ❌ | +| 3 | 0.680758 | `cosmos_database_container_item_query` | ❌ | +| 4 | 0.632634 | `storage_blob_container_get` | ❌ | +| 5 | 0.630588 | `cosmos_account_list` | ❌ | --- -## Test 132 +## Test 137 **Expected Tool:** `cosmos_database_container_list` **Prompt:** Show me the containers in the database for the cosmosdb account @@ -2706,15 +2801,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.789413 | `cosmos_database_container_list` | ✅ **EXPECTED** | -| 2 | 0.648207 | `cosmos_database_container_item_query` | ❌ | -| 3 | 0.614278 | `cosmos_database_list` | ❌ | -| 4 | 0.591387 | `storage_blob_container_get` | ❌ | -| 5 | 0.562096 | `cosmos_account_list` | ❌ | +| 1 | 0.789395 | `cosmos_database_container_list` | ✅ **EXPECTED** | +| 2 | 0.648126 | `cosmos_database_container_item_query` | ❌ | +| 3 | 0.614220 | `cosmos_database_list` | ❌ | +| 4 | 0.591350 | `storage_blob_container_get` | ❌ | +| 5 | 0.562062 | `cosmos_account_list` | ❌ | --- -## Test 133 +## Test 138 **Expected Tool:** `cosmos_database_list` **Prompt:** List all the databases in the cosmosdb account @@ -2726,12 +2821,12 @@ | 1 | 0.815683 | `cosmos_database_list` | ✅ **EXPECTED** | | 2 | 0.668515 | `cosmos_account_list` | ❌ | | 3 | 0.665298 | `cosmos_database_container_list` | ❌ | -| 4 | 0.606414 | `cosmos_database_container_item_query` | ❌ | -| 5 | 0.583507 | `kusto_database_list` | ❌ | +| 4 | 0.606433 | `cosmos_database_container_item_query` | ❌ | +| 5 | 0.582804 | `kusto_database_list` | ❌ | --- -## Test 134 +## Test 139 **Expected Tool:** `cosmos_database_list` **Prompt:** Show me the databases in the cosmosdb account @@ -2743,12 +2838,12 @@ | 1 | 0.749370 | `cosmos_database_list` | ✅ **EXPECTED** | | 2 | 0.624759 | `cosmos_database_container_list` | ❌ | | 3 | 0.614572 | `cosmos_account_list` | ❌ | -| 4 | 0.579913 | `cosmos_database_container_item_query` | ❌ | +| 4 | 0.579919 | `cosmos_database_container_item_query` | ❌ | | 5 | 0.538479 | `mysql_database_list` | ❌ | --- -## Test 135 +## Test 140 **Expected Tool:** `kusto_cluster_get` **Prompt:** Show me the details of the Data Explorer cluster @@ -2760,12 +2855,12 @@ | 1 | 0.590264 | `kusto_cluster_get` | ✅ **EXPECTED** | | 2 | 0.463832 | `kusto_cluster_list` | ❌ | | 3 | 0.428159 | `kusto_query` | ❌ | -| 4 | 0.425688 | `kusto_database_list` | ❌ | +| 4 | 0.425909 | `kusto_database_list` | ❌ | | 5 | 0.422577 | `kusto_table_schema` | ❌ | --- -## Test 136 +## Test 141 **Expected Tool:** `kusto_cluster_list` **Prompt:** List all Data Explorer clusters in my subscription @@ -2775,14 +2870,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.793744 | `kusto_cluster_list` | ✅ **EXPECTED** | -| 2 | 0.630504 | `kusto_database_list` | ❌ | +| 2 | 0.630451 | `kusto_database_list` | ❌ | | 3 | 0.573395 | `kusto_cluster_get` | ❌ | | 4 | 0.525025 | `aks_cluster_get` | ❌ | -| 5 | 0.509396 | `grafana_list` | ❌ | +| 5 | 0.509397 | `grafana_list` | ❌ | --- -## Test 137 +## Test 142 **Expected Tool:** `kusto_cluster_list` **Prompt:** Show me my Data Explorer clusters @@ -2793,13 +2888,13 @@ |------|-------|------|--------| | 1 | 0.531307 | `kusto_cluster_list` | ✅ **EXPECTED** | | 2 | 0.465277 | `kusto_cluster_get` | ❌ | -| 3 | 0.432320 | `kusto_database_list` | ❌ | +| 3 | 0.432311 | `kusto_database_list` | ❌ | | 4 | 0.369596 | `aks_cluster_get` | ❌ | | 5 | 0.363119 | `kusto_table_schema` | ❌ | --- -## Test 138 +## Test 143 **Expected Tool:** `kusto_cluster_list` **Prompt:** Show me the Data Explorer clusters in my subscription @@ -2810,13 +2905,13 @@ |------|-------|------|--------| | 1 | 0.701484 | `kusto_cluster_list` | ✅ **EXPECTED** | | 2 | 0.571191 | `kusto_cluster_get` | ❌ | -| 3 | 0.548690 | `kusto_database_list` | ❌ | +| 3 | 0.548734 | `kusto_database_list` | ❌ | | 4 | 0.498909 | `aks_cluster_get` | ❌ | | 5 | 0.474201 | `redis_list` | ❌ | --- -## Test 139 +## Test 144 **Expected Tool:** `kusto_database_list` **Prompt:** List all databases in the Data Explorer cluster @@ -2825,15 +2920,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.677042 | `kusto_database_list` | ✅ **EXPECTED** | +| 1 | 0.676656 | `kusto_database_list` | ✅ **EXPECTED** | | 2 | 0.560592 | `kusto_cluster_list` | ❌ | -| 3 | 0.556688 | `kusto_table_list` | ❌ | +| 3 | 0.556795 | `kusto_table_list` | ❌ | | 4 | 0.553218 | `postgres_database_list` | ❌ | | 5 | 0.549673 | `cosmos_database_list` | ❌ | --- -## Test 140 +## Test 145 **Expected Tool:** `kusto_database_list` **Prompt:** Show me the databases in the Data Explorer cluster @@ -2842,15 +2937,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.623528 | `kusto_database_list` | ✅ **EXPECTED** | -| 2 | 0.509953 | `kusto_cluster_list` | ❌ | -| 3 | 0.506997 | `kusto_table_list` | ❌ | +| 1 | 0.623242 | `kusto_database_list` | ✅ **EXPECTED** | +| 2 | 0.509952 | `kusto_cluster_list` | ❌ | +| 3 | 0.507073 | `kusto_table_list` | ❌ | | 4 | 0.497144 | `cosmos_database_list` | ❌ | | 5 | 0.491400 | `mysql_database_list` | ❌ | --- -## Test 141 +## Test 146 **Expected Tool:** `kusto_query` **Prompt:** Show me all items that contain the word in the Data Explorer table in cluster @@ -2860,14 +2955,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.423660 | `kusto_query` | ✅ **EXPECTED** | -| 2 | 0.409558 | `postgres_database_query` | ❌ | +| 2 | 0.409485 | `postgres_database_query` | ❌ | | 3 | 0.408178 | `kusto_table_schema` | ❌ | -| 4 | 0.407741 | `kusto_sample` | ❌ | -| 5 | 0.403990 | `kusto_cluster_list` | ❌ | +| 4 | 0.407740 | `kusto_sample` | ❌ | +| 5 | 0.403989 | `kusto_cluster_list` | ❌ | --- -## Test 142 +## Test 147 **Expected Tool:** `kusto_sample` **Prompt:** Show me a data sample from the Data Explorer table in cluster @@ -2878,13 +2973,13 @@ |------|-------|------|--------| | 1 | 0.595554 | `kusto_sample` | ✅ **EXPECTED** | | 2 | 0.510233 | `kusto_table_schema` | ❌ | -| 3 | 0.424221 | `kusto_table_list` | ❌ | +| 3 | 0.424212 | `kusto_table_list` | ❌ | | 4 | 0.400924 | `kusto_cluster_list` | ❌ | | 5 | 0.399525 | `kusto_cluster_get` | ❌ | --- -## Test 143 +## Test 148 **Expected Tool:** `kusto_table_list` **Prompt:** List all tables in the Data Explorer database in cluster @@ -2893,15 +2988,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.679655 | `kusto_table_list` | ✅ **EXPECTED** | +| 1 | 0.679642 | `kusto_table_list` | ✅ **EXPECTED** | | 2 | 0.585237 | `postgres_table_list` | ❌ | -| 3 | 0.581184 | `kusto_database_list` | ❌ | -| 4 | 0.556686 | `mysql_table_list` | ❌ | -| 5 | 0.550007 | `monitor_table_list` | ❌ | +| 3 | 0.580964 | `kusto_database_list` | ❌ | +| 4 | 0.556724 | `mysql_table_list` | ❌ | +| 5 | 0.550005 | `monitor_table_list` | ❌ | --- -## Test 144 +## Test 149 **Expected Tool:** `kusto_table_list` **Prompt:** Show me the tables in the Data Explorer database in cluster @@ -2910,15 +3005,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.619269 | `kusto_table_list` | ✅ **EXPECTED** | -| 2 | 0.554333 | `kusto_table_schema` | ❌ | -| 3 | 0.527616 | `kusto_database_list` | ❌ | -| 4 | 0.524607 | `mysql_table_list` | ❌ | +| 1 | 0.619252 | `kusto_table_list` | ✅ **EXPECTED** | +| 2 | 0.554332 | `kusto_table_schema` | ❌ | +| 3 | 0.527431 | `kusto_database_list` | ❌ | +| 4 | 0.524691 | `mysql_table_list` | ❌ | | 5 | 0.523432 | `postgres_table_list` | ❌ | --- -## Test 145 +## Test 150 **Expected Tool:** `kusto_table_schema` **Prompt:** Show me the schema for table in the Data Explorer database in cluster @@ -2927,15 +3022,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.667033 | `kusto_table_schema` | ✅ **EXPECTED** | -| 2 | 0.564282 | `postgres_table_schema_get` | ❌ | -| 3 | 0.527921 | `mysql_table_schema_get` | ❌ | -| 4 | 0.490939 | `kusto_sample` | ❌ | -| 5 | 0.489722 | `kusto_table_list` | ❌ | +| 1 | 0.666980 | `kusto_table_schema` | ✅ **EXPECTED** | +| 2 | 0.564204 | `postgres_table_schema_get` | ❌ | +| 3 | 0.528301 | `mysql_table_schema_get` | ❌ | +| 4 | 0.490892 | `kusto_sample` | ❌ | +| 5 | 0.489745 | `kusto_table_list` | ❌ | --- -## Test 146 +## Test 151 **Expected Tool:** `mysql_database_list` **Prompt:** List all MySQL databases in server @@ -2944,15 +3039,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.633973 | `postgres_database_list` | ❌ | -| 2 | 0.623333 | `mysql_database_list` | ✅ **EXPECTED** | -| 3 | 0.534537 | `mysql_table_list` | ❌ | -| 4 | 0.498854 | `mysql_server_list` | ❌ | -| 5 | 0.490179 | `sql_db_list` | ❌ | +| 1 | 0.633991 | `postgres_database_list` | ❌ | +| 2 | 0.623359 | `mysql_database_list` | ✅ **EXPECTED** | +| 3 | 0.534434 | `mysql_table_list` | ❌ | +| 4 | 0.498902 | `mysql_server_list` | ❌ | +| 5 | 0.490102 | `sql_db_list` | ❌ | --- -## Test 147 +## Test 152 **Expected Tool:** `mysql_database_list` **Prompt:** Show me the MySQL databases in server @@ -2961,15 +3056,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.588122 | `mysql_database_list` | ✅ **EXPECTED** | +| 1 | 0.588121 | `mysql_database_list` | ✅ **EXPECTED** | | 2 | 0.574089 | `postgres_database_list` | ❌ | -| 3 | 0.483938 | `mysql_table_list` | ❌ | -| 4 | 0.463238 | `mysql_server_list` | ❌ | -| 5 | 0.444622 | `sql_db_list` | ❌ | +| 3 | 0.483855 | `mysql_table_list` | ❌ | +| 4 | 0.463244 | `mysql_server_list` | ❌ | +| 5 | 0.444547 | `sql_db_list` | ❌ | --- -## Test 148 +## Test 153 **Expected Tool:** `mysql_database_query` **Prompt:** Show me all items that contain the word in the MySQL database in server @@ -2978,15 +3073,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.476539 | `mysql_table_list` | ❌ | +| 1 | 0.476423 | `mysql_table_list` | ❌ | | 2 | 0.455770 | `mysql_database_list` | ❌ | -| 3 | 0.433392 | `mysql_database_query` | ✅ **EXPECTED** | -| 4 | 0.419938 | `mysql_server_list` | ❌ | -| 5 | 0.409445 | `mysql_table_schema_get` | ❌ | +| 3 | 0.432703 | `mysql_database_query` | ✅ **EXPECTED** | +| 4 | 0.419859 | `mysql_server_list` | ❌ | +| 5 | 0.409655 | `mysql_table_schema_get` | ❌ | --- -## Test 149 +## Test 154 **Expected Tool:** `mysql_server_config_get` **Prompt:** Show me the configuration of MySQL server @@ -2995,15 +3090,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.531887 | `postgres_server_config_get` | ❌ | -| 2 | 0.516894 | `mysql_server_param_set` | ❌ | -| 3 | 0.489816 | `mysql_server_config_get` | ✅ **EXPECTED** | -| 4 | 0.476863 | `mysql_server_param_get` | ❌ | -| 5 | 0.426507 | `mysql_table_schema_get` | ❌ | +| 1 | 0.531964 | `postgres_server_config_get` | ❌ | +| 2 | 0.517385 | `mysql_server_param_set` | ❌ | +| 3 | 0.489870 | `mysql_server_config_get` | ✅ **EXPECTED** | +| 4 | 0.476944 | `mysql_server_param_get` | ❌ | +| 5 | 0.426840 | `mysql_table_schema_get` | ❌ | --- -## Test 150 +## Test 155 **Expected Tool:** `mysql_server_list` **Prompt:** List all MySQL servers in my subscription @@ -3012,15 +3107,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.678536 | `postgres_server_list` | ❌ | +| 1 | 0.678473 | `postgres_server_list` | ❌ | | 2 | 0.558177 | `mysql_database_list` | ❌ | -| 3 | 0.554810 | `mysql_server_list` | ✅ **EXPECTED** | +| 3 | 0.554818 | `mysql_server_list` | ✅ **EXPECTED** | | 4 | 0.513706 | `kusto_cluster_list` | ❌ | -| 5 | 0.501289 | `mysql_table_list` | ❌ | +| 5 | 0.501199 | `mysql_table_list` | ❌ | --- -## Test 151 +## Test 156 **Expected Tool:** `mysql_server_list` **Prompt:** Show me my MySQL servers @@ -3030,14 +3125,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.478518 | `mysql_database_list` | ❌ | -| 2 | 0.474630 | `mysql_server_list` | ✅ **EXPECTED** | -| 3 | 0.435692 | `postgres_server_list` | ❌ | -| 4 | 0.412417 | `mysql_table_list` | ❌ | +| 2 | 0.474586 | `mysql_server_list` | ✅ **EXPECTED** | +| 3 | 0.435642 | `postgres_server_list` | ❌ | +| 4 | 0.412380 | `mysql_table_list` | ❌ | | 5 | 0.389993 | `postgres_database_list` | ❌ | --- -## Test 152 +## Test 157 **Expected Tool:** `mysql_server_list` **Prompt:** Show me the MySQL servers in my subscription @@ -3046,15 +3141,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.636471 | `postgres_server_list` | ❌ | -| 2 | 0.534277 | `mysql_server_list` | ✅ **EXPECTED** | +| 1 | 0.636435 | `postgres_server_list` | ❌ | +| 2 | 0.534266 | `mysql_server_list` | ✅ **EXPECTED** | | 3 | 0.530210 | `mysql_database_list` | ❌ | | 4 | 0.475052 | `kusto_cluster_list` | ❌ | | 5 | 0.470468 | `redis_list` | ❌ | --- -## Test 153 +## Test 158 **Expected Tool:** `mysql_server_param_get` **Prompt:** Show me the value of connection timeout in seconds in my MySQL server @@ -3064,14 +3159,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.495071 | `mysql_server_param_get` | ✅ **EXPECTED** | -| 2 | 0.438075 | `mysql_server_param_set` | ❌ | -| 3 | 0.333841 | `mysql_database_query` | ❌ | -| 4 | 0.313150 | `mysql_table_schema_get` | ❌ | -| 5 | 0.310834 | `postgres_server_param_get` | ❌ | +| 2 | 0.437857 | `mysql_server_param_set` | ❌ | +| 3 | 0.333041 | `mysql_database_query` | ❌ | +| 4 | 0.313364 | `mysql_table_schema_get` | ❌ | +| 5 | 0.310856 | `postgres_server_param_get` | ❌ | --- -## Test 154 +## Test 159 **Expected Tool:** `mysql_server_param_set` **Prompt:** Set connection timeout to 20 seconds for my MySQL server @@ -3080,15 +3175,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.449419 | `mysql_server_param_set` | ✅ **EXPECTED** | +| 1 | 0.449612 | `mysql_server_param_set` | ✅ **EXPECTED** | | 2 | 0.381144 | `mysql_server_param_get` | ❌ | | 3 | 0.303499 | `postgres_server_param_set` | ❌ | -| 4 | 0.298911 | `mysql_database_query` | ❌ | -| 5 | 0.254206 | `mysql_server_list` | ❌ | +| 4 | 0.298661 | `mysql_database_query` | ❌ | +| 5 | 0.254180 | `mysql_server_list` | ❌ | --- -## Test 155 +## Test 160 **Expected Tool:** `mysql_table_list` **Prompt:** List all tables in the MySQL database in server @@ -3097,15 +3192,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.633547 | `mysql_table_list` | ✅ **EXPECTED** | -| 2 | 0.573844 | `postgres_table_list` | ❌ | -| 3 | 0.550898 | `postgres_database_list` | ❌ | -| 4 | 0.546963 | `mysql_database_list` | ❌ | -| 5 | 0.511906 | `kusto_table_list` | ❌ | +| 1 | 0.633542 | `mysql_table_list` | ✅ **EXPECTED** | +| 2 | 0.573851 | `postgres_table_list` | ❌ | +| 3 | 0.550878 | `postgres_database_list` | ❌ | +| 4 | 0.546988 | `mysql_database_list` | ❌ | +| 5 | 0.511879 | `kusto_table_list` | ❌ | --- -## Test 156 +## Test 161 **Expected Tool:** `mysql_table_list` **Prompt:** Show me the tables in the MySQL database in server @@ -3114,15 +3209,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.609111 | `mysql_table_list` | ✅ **EXPECTED** | +| 1 | 0.609131 | `mysql_table_list` | ✅ **EXPECTED** | | 2 | 0.526236 | `postgres_table_list` | ❌ | | 3 | 0.525709 | `mysql_database_list` | ❌ | -| 4 | 0.507258 | `mysql_table_schema_get` | ❌ | +| 4 | 0.507532 | `mysql_table_schema_get` | ❌ | | 5 | 0.498050 | `postgres_database_list` | ❌ | --- -## Test 157 +## Test 162 **Expected Tool:** `mysql_table_schema_get` **Prompt:** Show me the schema of table in the MySQL database in server @@ -3131,15 +3226,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.630623 | `mysql_table_schema_get` | ✅ **EXPECTED** | +| 1 | 0.630824 | `mysql_table_schema_get` | ✅ **EXPECTED** | | 2 | 0.558306 | `postgres_table_schema_get` | ❌ | -| 3 | 0.545162 | `mysql_table_list` | ❌ | +| 3 | 0.545025 | `mysql_table_list` | ❌ | | 4 | 0.517419 | `kusto_table_schema` | ❌ | | 5 | 0.457739 | `mysql_database_list` | ❌ | --- -## Test 158 +## Test 163 **Expected Tool:** `postgres_database_list` **Prompt:** List all PostgreSQL databases in server @@ -3148,15 +3243,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.815693 | `postgres_database_list` | ✅ **EXPECTED** | -| 2 | 0.644105 | `postgres_table_list` | ❌ | -| 3 | 0.622833 | `postgres_server_list` | ❌ | -| 4 | 0.542646 | `postgres_server_config_get` | ❌ | -| 5 | 0.490864 | `postgres_server_param_get` | ❌ | +| 1 | 0.815470 | `postgres_database_list` | ✅ **EXPECTED** | +| 2 | 0.643680 | `postgres_table_list` | ❌ | +| 3 | 0.622824 | `postgres_server_list` | ❌ | +| 4 | 0.542912 | `postgres_server_config_get` | ❌ | +| 5 | 0.490950 | `postgres_server_param_get` | ❌ | --- -## Test 159 +## Test 164 **Expected Tool:** `postgres_database_list` **Prompt:** Show me the PostgreSQL databases in server @@ -3166,14 +3261,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.760033 | `postgres_database_list` | ✅ **EXPECTED** | -| 2 | 0.589811 | `postgres_server_list` | ❌ | +| 2 | 0.589784 | `postgres_server_list` | ❌ | | 3 | 0.585891 | `postgres_table_list` | ❌ | | 4 | 0.552660 | `postgres_server_config_get` | ❌ | -| 5 | 0.495629 | `postgres_server_param_get` | ❌ | +| 5 | 0.495685 | `postgres_server_param_get` | ❌ | --- -## Test 160 +## Test 165 **Expected Tool:** `postgres_database_query` **Prompt:** Show me all items that contain the word in the PostgreSQL database in server @@ -3183,14 +3278,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.546211 | `postgres_database_list` | ❌ | -| 2 | 0.523142 | `postgres_database_query` | ✅ **EXPECTED** | +| 2 | 0.523223 | `postgres_database_query` | ✅ **EXPECTED** | | 3 | 0.503267 | `postgres_table_list` | ❌ | -| 4 | 0.466608 | `postgres_server_list` | ❌ | -| 5 | 0.403969 | `postgres_server_param_get` | ❌ | +| 4 | 0.466599 | `postgres_server_list` | ❌ | +| 5 | 0.403963 | `postgres_server_param_get` | ❌ | --- -## Test 161 +## Test 166 **Expected Tool:** `postgres_server_config_get` **Prompt:** Show me the configuration of PostgreSQL server @@ -3201,13 +3296,13 @@ |------|-------|------|--------| | 1 | 0.756593 | `postgres_server_config_get` | ✅ **EXPECTED** | | 2 | 0.615429 | `postgres_server_param_set` | ❌ | -| 3 | 0.599471 | `postgres_server_param_get` | ❌ | -| 4 | 0.535049 | `postgres_database_list` | ❌ | -| 5 | 0.518593 | `postgres_server_list` | ❌ | +| 3 | 0.599487 | `postgres_server_param_get` | ❌ | +| 4 | 0.535050 | `postgres_database_list` | ❌ | +| 5 | 0.518574 | `postgres_server_list` | ❌ | --- -## Test 162 +## Test 167 **Expected Tool:** `postgres_server_list` **Prompt:** List all PostgreSQL servers in my subscription @@ -3216,7 +3311,7 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.900052 | `postgres_server_list` | ✅ **EXPECTED** | +| 1 | 0.900023 | `postgres_server_list` | ✅ **EXPECTED** | | 2 | 0.640733 | `postgres_database_list` | ❌ | | 3 | 0.565914 | `postgres_table_list` | ❌ | | 4 | 0.538997 | `postgres_server_config_get` | ❌ | @@ -3224,7 +3319,7 @@ --- -## Test 163 +## Test 168 **Expected Tool:** `postgres_server_list` **Prompt:** Show me my PostgreSQL servers @@ -3233,15 +3328,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.674359 | `postgres_server_list` | ✅ **EXPECTED** | +| 1 | 0.674327 | `postgres_server_list` | ✅ **EXPECTED** | | 2 | 0.607062 | `postgres_database_list` | ❌ | -| 3 | 0.576349 | `postgres_server_config_get` | ❌ | -| 4 | 0.522996 | `postgres_table_list` | ❌ | -| 5 | 0.506171 | `postgres_server_param_get` | ❌ | +| 3 | 0.576348 | `postgres_server_config_get` | ❌ | +| 4 | 0.522995 | `postgres_table_list` | ❌ | +| 5 | 0.506254 | `postgres_server_param_get` | ❌ | --- -## Test 164 +## Test 169 **Expected Tool:** `postgres_server_list` **Prompt:** Show me the PostgreSQL servers in my subscription @@ -3250,15 +3345,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.832164 | `postgres_server_list` | ✅ **EXPECTED** | +| 1 | 0.832155 | `postgres_server_list` | ✅ **EXPECTED** | | 2 | 0.579232 | `postgres_database_list` | ❌ | | 3 | 0.531804 | `postgres_server_config_get` | ❌ | | 4 | 0.514445 | `postgres_table_list` | ❌ | -| 5 | 0.505869 | `postgres_server_param_get` | ❌ | +| 5 | 0.505978 | `postgres_server_param_get` | ❌ | --- -## Test 165 +## Test 170 **Expected Tool:** `postgres_server_param_get` **Prompt:** Show me if the parameter my PostgreSQL server has replication enabled @@ -3267,15 +3362,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.594753 | `postgres_server_param_get` | ✅ **EXPECTED** | +| 1 | 0.594770 | `postgres_server_param_get` | ✅ **EXPECTED** | | 2 | 0.552678 | `postgres_server_param_set` | ❌ | | 3 | 0.539671 | `postgres_server_config_get` | ❌ | -| 4 | 0.489701 | `postgres_server_list` | ❌ | +| 4 | 0.489693 | `postgres_server_list` | ❌ | | 5 | 0.451871 | `postgres_database_list` | ❌ | --- -## Test 166 +## Test 171 **Expected Tool:** `postgres_server_param_set` **Prompt:** Enable replication for my PostgreSQL server @@ -3284,15 +3379,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.579873 | `postgres_server_param_set` | ✅ **EXPECTED** | -| 2 | 0.488474 | `postgres_server_config_get` | ❌ | -| 3 | 0.469833 | `postgres_server_list` | ❌ | -| 4 | 0.447011 | `postgres_server_param_get` | ❌ | -| 5 | 0.440760 | `postgres_database_list` | ❌ | +| 1 | 0.579909 | `postgres_server_param_set` | ✅ **EXPECTED** | +| 2 | 0.488496 | `postgres_server_config_get` | ❌ | +| 3 | 0.469810 | `postgres_server_list` | ❌ | +| 4 | 0.447051 | `postgres_server_param_get` | ❌ | +| 5 | 0.440716 | `postgres_database_list` | ❌ | --- -## Test 167 +## Test 172 **Expected Tool:** `postgres_table_list` **Prompt:** List all tables in the PostgreSQL database in server @@ -3301,15 +3396,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.789883 | `postgres_table_list` | ✅ **EXPECTED** | -| 2 | 0.750580 | `postgres_database_list` | ❌ | -| 3 | 0.574946 | `postgres_server_list` | ❌ | -| 4 | 0.519820 | `postgres_table_schema_get` | ❌ | -| 5 | 0.501400 | `postgres_server_config_get` | ❌ | +| 1 | 0.789934 | `postgres_table_list` | ✅ **EXPECTED** | +| 2 | 0.750592 | `postgres_database_list` | ❌ | +| 3 | 0.574975 | `postgres_server_list` | ❌ | +| 4 | 0.519816 | `postgres_table_schema_get` | ❌ | +| 5 | 0.501361 | `postgres_server_config_get` | ❌ | --- -## Test 168 +## Test 173 **Expected Tool:** `postgres_table_list` **Prompt:** Show me the tables in the PostgreSQL database in server @@ -3321,12 +3416,12 @@ | 1 | 0.736083 | `postgres_table_list` | ✅ **EXPECTED** | | 2 | 0.690112 | `postgres_database_list` | ❌ | | 3 | 0.558357 | `postgres_table_schema_get` | ❌ | -| 4 | 0.543342 | `postgres_server_list` | ❌ | +| 4 | 0.543331 | `postgres_server_list` | ❌ | | 5 | 0.521570 | `postgres_server_config_get` | ❌ | --- -## Test 169 +## Test 174 **Expected Tool:** `postgres_table_schema_get` **Prompt:** Show me the schema of table
in the PostgreSQL database in server @@ -3335,15 +3430,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.714877 | `postgres_table_schema_get` | ✅ **EXPECTED** | -| 2 | 0.597846 | `postgres_table_list` | ❌ | -| 3 | 0.574230 | `postgres_database_list` | ❌ | -| 4 | 0.508082 | `postgres_server_config_get` | ❌ | -| 5 | 0.502626 | `kusto_table_schema` | ❌ | +| 1 | 0.714916 | `postgres_table_schema_get` | ✅ **EXPECTED** | +| 2 | 0.597892 | `postgres_table_list` | ❌ | +| 3 | 0.574251 | `postgres_database_list` | ❌ | +| 4 | 0.508090 | `postgres_server_config_get` | ❌ | +| 5 | 0.502593 | `kusto_table_schema` | ❌ | --- -## Test 170 +## Test 175 **Expected Tool:** `deploy_app_logs_get` **Prompt:** Show me the log of the application deployed by azd @@ -3352,15 +3447,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.711770 | `deploy_app_logs_get` | ✅ **EXPECTED** | +| 1 | 0.711844 | `deploy_app_logs_get` | ✅ **EXPECTED** | | 2 | 0.471692 | `deploy_plan_get` | ❌ | -| 3 | 0.451653 | `monitor_activitylog_list` | ❌ | -| 4 | 0.404890 | `deploy_pipeline_guidance_get` | ❌ | +| 3 | 0.451639 | `monitor_activitylog_list` | ❌ | +| 4 | 0.404892 | `deploy_pipeline_guidance_get` | ❌ | | 5 | 0.401388 | `monitor_resource_log_query` | ❌ | --- -## Test 171 +## Test 176 **Expected Tool:** `deploy_architecture_diagram_generate` **Prompt:** Generate the azure architecture diagram for this application @@ -3369,15 +3464,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.680557 | `deploy_architecture_diagram_generate` | ✅ **EXPECTED** | -| 2 | 0.562521 | `deploy_plan_get` | ❌ | -| 3 | 0.497193 | `deploy_pipeline_guidance_get` | ❌ | -| 4 | 0.489344 | `cloudarchitect_design` | ❌ | -| 5 | 0.435921 | `deploy_iac_rules_get` | ❌ | +| 1 | 0.680599 | `deploy_architecture_diagram_generate` | ✅ **EXPECTED** | +| 2 | 0.562485 | `deploy_plan_get` | ❌ | +| 3 | 0.497326 | `deploy_pipeline_guidance_get` | ❌ | +| 4 | 0.489325 | `cloudarchitect_design` | ❌ | +| 5 | 0.435899 | `deploy_iac_rules_get` | ❌ | --- -## Test 172 +## Test 177 **Expected Tool:** `deploy_iac_rules_get` **Prompt:** Show me the rules to generate bicep scripts @@ -3387,14 +3482,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.529092 | `deploy_iac_rules_get` | ✅ **EXPECTED** | -| 2 | 0.479903 | `bicepschema_get` | ❌ | +| 2 | 0.480324 | `bicepschema_get` | ❌ | | 3 | 0.391965 | `get_bestpractices_get` | ❌ | -| 4 | 0.383214 | `azureterraformbestpractices_get` | ❌ | -| 5 | 0.375558 | `extension_cli_generate` | ❌ | +| 4 | 0.383210 | `azureterraformbestpractices_get` | ❌ | +| 5 | 0.375561 | `extension_cli_generate` | ❌ | --- -## Test 173 +## Test 178 **Expected Tool:** `deploy_pipeline_guidance_get` **Prompt:** How can I create a CI/CD pipeline to deploy this app to Azure? @@ -3403,15 +3498,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.638841 | `deploy_pipeline_guidance_get` | ✅ **EXPECTED** | +| 1 | 0.638588 | `deploy_pipeline_guidance_get` | ✅ **EXPECTED** | | 2 | 0.499242 | `deploy_plan_get` | ❌ | -| 3 | 0.448918 | `deploy_iac_rules_get` | ❌ | -| 4 | 0.385920 | `deploy_app_logs_get` | ❌ | +| 3 | 0.448917 | `deploy_iac_rules_get` | ❌ | +| 4 | 0.385670 | `deploy_app_logs_get` | ❌ | | 5 | 0.382240 | `get_bestpractices_get` | ❌ | --- -## Test 174 +## Test 179 **Expected Tool:** `deploy_plan_get` **Prompt:** Create a plan to deploy this application to azure @@ -3421,14 +3516,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.688055 | `deploy_plan_get` | ✅ **EXPECTED** | -| 2 | 0.587903 | `deploy_pipeline_guidance_get` | ❌ | +| 2 | 0.587963 | `deploy_pipeline_guidance_get` | ❌ | | 3 | 0.499385 | `deploy_iac_rules_get` | ❌ | -| 4 | 0.497869 | `deploy_architecture_diagram_generate` | ❌ | -| 5 | 0.448692 | `loadtesting_test_create` | ❌ | +| 4 | 0.498575 | `deploy_architecture_diagram_generate` | ❌ | +| 5 | 0.448912 | `loadtesting_test_create` | ❌ | --- -## Test 175 +## Test 180 **Expected Tool:** `eventgrid_events_publish` **Prompt:** Publish an event to Event Grid topic using with the following data @@ -3437,15 +3532,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.755366 | `eventgrid_events_publish` | ✅ **EXPECTED** | -| 2 | 0.482575 | `eventgrid_subscription_list` | ❌ | -| 3 | 0.465432 | `eventgrid_topic_list` | ❌ | -| 4 | 0.360845 | `eventhubs_eventhub_update` | ❌ | -| 5 | 0.354313 | `servicebus_topic_details` | ❌ | +| 1 | 0.755353 | `eventgrid_events_publish` | ✅ **EXPECTED** | +| 2 | 0.482544 | `eventgrid_subscription_list` | ❌ | +| 3 | 0.465759 | `eventgrid_topic_list` | ❌ | +| 4 | 0.360686 | `eventhubs_eventhub_update` | ❌ | +| 5 | 0.355213 | `servicebus_topic_details` | ❌ | --- -## Test 176 +## Test 181 **Expected Tool:** `eventgrid_events_publish` **Prompt:** Publish event to my Event Grid topic with the following events @@ -3454,15 +3549,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.654647 | `eventgrid_events_publish` | ✅ **EXPECTED** | -| 2 | 0.524503 | `eventgrid_subscription_list` | ❌ | -| 3 | 0.510039 | `eventgrid_topic_list` | ❌ | -| 4 | 0.373718 | `servicebus_topic_details` | ❌ | +| 1 | 0.654648 | `eventgrid_events_publish` | ✅ **EXPECTED** | +| 2 | 0.524134 | `eventgrid_subscription_list` | ❌ | +| 3 | 0.509777 | `eventgrid_topic_list` | ❌ | +| 4 | 0.373438 | `servicebus_topic_details` | ❌ | | 5 | 0.359908 | `eventhubs_eventhub_update` | ❌ | --- -## Test 177 +## Test 182 **Expected Tool:** `eventgrid_events_publish` **Prompt:** Send an event to Event Grid topic in resource group with @@ -3472,14 +3567,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.600274 | `eventgrid_events_publish` | ✅ **EXPECTED** | -| 2 | 0.521240 | `eventgrid_topic_list` | ❌ | -| 3 | 0.504808 | `eventgrid_subscription_list` | ❌ | -| 4 | 0.411390 | `eventhubs_eventhub_consumergroup_update` | ❌ | +| 2 | 0.521041 | `eventgrid_topic_list` | ❌ | +| 3 | 0.504642 | `eventgrid_subscription_list` | ❌ | +| 4 | 0.411129 | `eventhubs_eventhub_consumergroup_update` | ❌ | | 5 | 0.389439 | `eventhubs_eventhub_consumergroup_get` | ❌ | --- -## Test 178 +## Test 183 **Expected Tool:** `eventgrid_topic_list` **Prompt:** List all Event Grid topics in my subscription @@ -3488,15 +3583,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.770140 | `eventgrid_topic_list` | ✅ **EXPECTED** | -| 2 | 0.745470 | `eventgrid_subscription_list` | ❌ | +| 1 | 0.769921 | `eventgrid_topic_list` | ✅ **EXPECTED** | +| 2 | 0.745048 | `eventgrid_subscription_list` | ❌ | | 3 | 0.561862 | `kusto_cluster_list` | ❌ | -| 4 | 0.545540 | `search_service_list` | ❌ | +| 4 | 0.543887 | `search_service_list` | ❌ | | 5 | 0.526123 | `subscription_list` | ❌ | --- -## Test 179 +## Test 184 **Expected Tool:** `eventgrid_topic_list` **Prompt:** Show me the Event Grid topics in my subscription @@ -3505,15 +3600,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.738258 | `eventgrid_topic_list` | ✅ **EXPECTED** | -| 2 | 0.737486 | `eventgrid_subscription_list` | ❌ | +| 1 | 0.738040 | `eventgrid_topic_list` | ✅ **EXPECTED** | +| 2 | 0.736919 | `eventgrid_subscription_list` | ❌ | | 3 | 0.492592 | `kusto_cluster_list` | ❌ | | 4 | 0.480252 | `subscription_list` | ❌ | -| 5 | 0.475119 | `search_service_list` | ❌ | +| 5 | 0.473459 | `search_service_list` | ❌ | --- -## Test 180 +## Test 185 **Expected Tool:** `eventgrid_topic_list` **Prompt:** List all Event Grid topics in subscription @@ -3522,15 +3617,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.770140 | `eventgrid_topic_list` | ✅ **EXPECTED** | -| 2 | 0.721362 | `eventgrid_subscription_list` | ❌ | -| 3 | 0.535326 | `kusto_cluster_list` | ❌ | -| 4 | 0.514248 | `search_service_list` | ❌ | -| 5 | 0.495952 | `subscription_list` | ❌ | +| 1 | 0.769840 | `eventgrid_topic_list` | ✅ **EXPECTED** | +| 2 | 0.720426 | `eventgrid_subscription_list` | ❌ | +| 3 | 0.535369 | `kusto_cluster_list` | ❌ | +| 4 | 0.513921 | `search_service_list` | ❌ | +| 5 | 0.495939 | `subscription_list` | ❌ | --- -## Test 181 +## Test 186 **Expected Tool:** `eventgrid_topic_list` **Prompt:** List all Event Grid topics in resource group in subscription @@ -3539,15 +3634,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.758816 | `eventgrid_topic_list` | ✅ **EXPECTED** | -| 2 | 0.704462 | `eventgrid_subscription_list` | ❌ | +| 1 | 0.758562 | `eventgrid_topic_list` | ✅ **EXPECTED** | +| 2 | 0.704062 | `eventgrid_subscription_list` | ❌ | | 3 | 0.609175 | `group_list` | ❌ | -| 4 | 0.544896 | `monitor_webtests_list` | ❌ | +| 4 | 0.544809 | `monitor_webtests_list` | ❌ | | 5 | 0.524209 | `eventhubs_namespace_get` | ❌ | --- -## Test 182 +## Test 187 **Expected Tool:** `eventgrid_subscription_list` **Prompt:** Show me all Event Grid subscriptions for topic @@ -3556,15 +3651,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.769097 | `eventgrid_subscription_list` | ✅ **EXPECTED** | -| 2 | 0.720606 | `eventgrid_topic_list` | ❌ | -| 3 | 0.498615 | `servicebus_topic_details` | ❌ | +| 1 | 0.768696 | `eventgrid_subscription_list` | ✅ **EXPECTED** | +| 2 | 0.720373 | `eventgrid_topic_list` | ❌ | +| 3 | 0.498398 | `servicebus_topic_details` | ❌ | | 4 | 0.486216 | `servicebus_topic_subscription_details` | ❌ | | 5 | 0.486162 | `eventgrid_events_publish` | ❌ | --- -## Test 183 +## Test 188 **Expected Tool:** `eventgrid_subscription_list` **Prompt:** List Event Grid subscriptions for topic in subscription @@ -3573,15 +3668,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.718109 | `eventgrid_subscription_list` | ✅ **EXPECTED** | -| 2 | 0.709805 | `eventgrid_topic_list` | ❌ | +| 1 | 0.717676 | `eventgrid_subscription_list` | ✅ **EXPECTED** | +| 2 | 0.709586 | `eventgrid_topic_list` | ❌ | | 3 | 0.539977 | `servicebus_topic_subscription_details` | ❌ | -| 4 | 0.529286 | `servicebus_topic_details` | ❌ | +| 4 | 0.529084 | `servicebus_topic_details` | ❌ | | 5 | 0.477876 | `eventgrid_events_publish` | ❌ | --- -## Test 184 +## Test 189 **Expected Tool:** `eventgrid_subscription_list` **Prompt:** List Event Grid subscriptions for topic in resource group @@ -3590,15 +3685,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.746815 | `eventgrid_subscription_list` | ✅ **EXPECTED** | -| 2 | 0.746174 | `eventgrid_topic_list` | ❌ | -| 3 | 0.535731 | `monitor_webtests_list` | ❌ | -| 4 | 0.524919 | `group_list` | ❌ | -| 5 | 0.503158 | `servicebus_topic_details` | ❌ | +| 1 | 0.746672 | `eventgrid_subscription_list` | ✅ **EXPECTED** | +| 2 | 0.745851 | `eventgrid_topic_list` | ❌ | +| 3 | 0.535463 | `monitor_webtests_list` | ❌ | +| 4 | 0.524802 | `group_list` | ❌ | +| 5 | 0.502884 | `servicebus_topic_details` | ❌ | --- -## Test 185 +## Test 190 **Expected Tool:** `eventgrid_subscription_list` **Prompt:** Show all Event Grid subscriptions in my subscription @@ -3607,15 +3702,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.736436 | `eventgrid_subscription_list` | ✅ **EXPECTED** | -| 2 | 0.659727 | `eventgrid_topic_list` | ❌ | -| 3 | 0.569256 | `subscription_list` | ❌ | +| 1 | 0.736844 | `eventgrid_subscription_list` | ✅ **EXPECTED** | +| 2 | 0.659612 | `eventgrid_topic_list` | ❌ | +| 3 | 0.569255 | `subscription_list` | ❌ | | 4 | 0.537922 | `kusto_cluster_list` | ❌ | -| 5 | 0.518857 | `search_service_list` | ❌ | +| 5 | 0.517276 | `search_service_list` | ❌ | --- -## Test 186 +## Test 191 **Expected Tool:** `eventgrid_subscription_list` **Prompt:** List all Event Grid subscriptions in subscription @@ -3624,15 +3719,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.684444 | `eventgrid_subscription_list` | ✅ **EXPECTED** | -| 2 | 0.656183 | `eventgrid_topic_list` | ❌ | -| 3 | 0.542320 | `subscription_list` | ❌ | -| 4 | 0.521015 | `kusto_cluster_list` | ❌ | -| 5 | 0.510024 | `group_list` | ❌ | +| 1 | 0.684586 | `eventgrid_subscription_list` | ✅ **EXPECTED** | +| 2 | 0.656227 | `eventgrid_topic_list` | ❌ | +| 3 | 0.542362 | `subscription_list` | ❌ | +| 4 | 0.521053 | `kusto_cluster_list` | ❌ | +| 5 | 0.510115 | `group_list` | ❌ | --- -## Test 187 +## Test 192 **Expected Tool:** `eventgrid_subscription_list` **Prompt:** Show Event Grid subscriptions in resource group in subscription @@ -3641,15 +3736,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.696101 | `eventgrid_subscription_list` | ✅ **EXPECTED** | -| 2 | 0.691739 | `eventgrid_topic_list` | ❌ | +| 1 | 0.696332 | `eventgrid_subscription_list` | ✅ **EXPECTED** | +| 2 | 0.691623 | `eventgrid_topic_list` | ❌ | | 3 | 0.557573 | `group_list` | ❌ | -| 4 | 0.510814 | `monitor_webtests_list` | ❌ | -| 5 | 0.505497 | `resourcehealth_availability-status_list` | ❌ | +| 4 | 0.510684 | `monitor_webtests_list` | ❌ | +| 5 | 0.504984 | `resourcehealth_availability-status_list` | ❌ | --- -## Test 188 +## Test 193 **Expected Tool:** `eventgrid_subscription_list` **Prompt:** List Event Grid subscriptions for subscription in location @@ -3658,15 +3753,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.709801 | `eventgrid_subscription_list` | ✅ **EXPECTED** | -| 2 | 0.642095 | `eventgrid_topic_list` | ❌ | +| 1 | 0.710457 | `eventgrid_subscription_list` | ✅ **EXPECTED** | +| 2 | 0.642001 | `eventgrid_topic_list` | ❌ | | 3 | 0.506618 | `subscription_list` | ❌ | -| 4 | 0.476763 | `search_service_list` | ❌ | +| 4 | 0.476396 | `search_service_list` | ❌ | | 5 | 0.475782 | `kusto_cluster_list` | ❌ | --- -## Test 189 +## Test 194 **Expected Tool:** `eventhubs_eventhub_consumergroup_delete` **Prompt:** Delete my consumer group in my event hub , namespace , and resource group @@ -3675,15 +3770,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.766896 | `eventhubs_eventhub_consumergroup_delete` | ✅ **EXPECTED** | -| 2 | 0.675127 | `eventhubs_eventhub_consumergroup_update` | ❌ | -| 3 | 0.641111 | `eventhubs_eventhub_consumergroup_get` | ❌ | -| 4 | 0.633848 | `eventhubs_namespace_delete` | ❌ | -| 5 | 0.605802 | `eventhubs_eventhub_delete` | ❌ | +| 1 | 0.766928 | `eventhubs_eventhub_consumergroup_delete` | ✅ **EXPECTED** | +| 2 | 0.675842 | `eventhubs_eventhub_consumergroup_update` | ❌ | +| 3 | 0.641112 | `eventhubs_eventhub_consumergroup_get` | ❌ | +| 4 | 0.633788 | `eventhubs_namespace_delete` | ❌ | +| 5 | 0.605465 | `eventhubs_eventhub_delete` | ❌ | --- -## Test 190 +## Test 195 **Expected Tool:** `eventhubs_eventhub_consumergroup_get` **Prompt:** List all consumer groups in my event hub in namespace @@ -3693,14 +3788,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.738475 | `eventhubs_eventhub_consumergroup_get` | ✅ **EXPECTED** | -| 2 | 0.634345 | `eventhubs_eventhub_consumergroup_update` | ❌ | -| 3 | 0.626485 | `eventhubs_eventhub_consumergroup_delete` | ❌ | +| 2 | 0.634517 | `eventhubs_eventhub_consumergroup_update` | ❌ | +| 3 | 0.626486 | `eventhubs_eventhub_consumergroup_delete` | ❌ | | 4 | 0.606619 | `eventhubs_namespace_get` | ❌ | -| 5 | 0.593274 | `eventhubs_eventhub_get` | ❌ | +| 5 | 0.593098 | `eventhubs_eventhub_get` | ❌ | --- -## Test 191 +## Test 196 **Expected Tool:** `eventhubs_eventhub_consumergroup_get` **Prompt:** Get the details of my consumer group in my event hub , namespace , and resource group @@ -3710,14 +3805,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.712861 | `eventhubs_eventhub_consumergroup_get` | ✅ **EXPECTED** | -| 2 | 0.637418 | `eventhubs_eventhub_consumergroup_update` | ❌ | +| 2 | 0.637170 | `eventhubs_eventhub_consumergroup_update` | ❌ | | 3 | 0.625913 | `eventhubs_eventhub_consumergroup_delete` | ❌ | | 4 | 0.576800 | `eventhubs_namespace_get` | ❌ | -| 5 | 0.530108 | `eventhubs_eventhub_get` | ❌ | +| 5 | 0.529940 | `eventhubs_eventhub_get` | ❌ | --- -## Test 192 +## Test 197 **Expected Tool:** `eventhubs_eventhub_consumergroup_update` **Prompt:** Create a new consumer group in my event hub , namespace , and resource group @@ -3726,15 +3821,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.757520 | `eventhubs_eventhub_consumergroup_update` | ✅ **EXPECTED** | -| 2 | 0.688923 | `eventhubs_eventhub_consumergroup_get` | ❌ | -| 3 | 0.670026 | `eventhubs_eventhub_consumergroup_delete` | ❌ | -| 4 | 0.554314 | `eventhubs_eventhub_update` | ❌ | -| 5 | 0.545003 | `eventhubs_namespace_get` | ❌ | +| 1 | 0.756873 | `eventhubs_eventhub_consumergroup_update` | ✅ **EXPECTED** | +| 2 | 0.688248 | `eventhubs_eventhub_consumergroup_get` | ❌ | +| 3 | 0.669384 | `eventhubs_eventhub_consumergroup_delete` | ❌ | +| 4 | 0.553692 | `eventhubs_eventhub_update` | ❌ | +| 5 | 0.544512 | `eventhubs_namespace_get` | ❌ | --- -## Test 193 +## Test 198 **Expected Tool:** `eventhubs_eventhub_consumergroup_update` **Prompt:** Update my consumer group in my event hub , namespace , and resource group @@ -3743,15 +3838,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.739615 | `eventhubs_eventhub_consumergroup_update` | ✅ **EXPECTED** | -| 2 | 0.655951 | `eventhubs_eventhub_consumergroup_delete` | ❌ | -| 3 | 0.642701 | `eventhubs_eventhub_consumergroup_get` | ❌ | -| 4 | 0.552830 | `eventhubs_eventhub_update` | ❌ | -| 5 | 0.524428 | `eventhubs_namespace_delete` | ❌ | +| 1 | 0.739158 | `eventhubs_eventhub_consumergroup_update` | ✅ **EXPECTED** | +| 2 | 0.655927 | `eventhubs_eventhub_consumergroup_delete` | ❌ | +| 3 | 0.642524 | `eventhubs_eventhub_consumergroup_get` | ❌ | +| 4 | 0.552602 | `eventhubs_eventhub_update` | ❌ | +| 5 | 0.524106 | `eventhubs_namespace_delete` | ❌ | --- -## Test 194 +## Test 199 **Expected Tool:** `eventhubs_eventhub_delete` **Prompt:** Delete my event hub in my namespace and resource group @@ -3760,15 +3855,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.699621 | `eventhubs_namespace_delete` | ❌ | -| 2 | 0.689171 | `eventhubs_eventhub_delete` | ✅ **EXPECTED** | -| 3 | 0.627887 | `eventhubs_eventhub_consumergroup_delete` | ❌ | -| 4 | 0.579273 | `eventhubs_namespace_get` | ❌ | -| 5 | 0.553715 | `eventhubs_eventhub_get` | ❌ | +| 1 | 0.699266 | `eventhubs_namespace_delete` | ❌ | +| 2 | 0.688646 | `eventhubs_eventhub_delete` | ✅ **EXPECTED** | +| 3 | 0.627721 | `eventhubs_eventhub_consumergroup_delete` | ❌ | +| 4 | 0.578653 | `eventhubs_namespace_get` | ❌ | +| 5 | 0.552963 | `eventhubs_eventhub_get` | ❌ | --- -## Test 195 +## Test 200 **Expected Tool:** `eventhubs_eventhub_get` **Prompt:** List all Event Hubs in my namespace @@ -3777,15 +3872,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.773231 | `eventhubs_eventhub_get` | ✅ **EXPECTED** | -| 2 | 0.687582 | `eventhubs_namespace_get` | ❌ | -| 3 | 0.578689 | `eventhubs_eventhub_update` | ❌ | -| 4 | 0.561545 | `eventhubs_namespace_delete` | ❌ | -| 5 | 0.545475 | `eventhubs_eventhub_consumergroup_get` | ❌ | +| 1 | 0.773277 | `eventhubs_eventhub_get` | ✅ **EXPECTED** | +| 2 | 0.687596 | `eventhubs_namespace_get` | ❌ | +| 3 | 0.578709 | `eventhubs_eventhub_update` | ❌ | +| 4 | 0.561587 | `eventhubs_namespace_delete` | ❌ | +| 5 | 0.545481 | `eventhubs_eventhub_consumergroup_get` | ❌ | --- -## Test 196 +## Test 201 **Expected Tool:** `eventhubs_eventhub_get` **Prompt:** Get the details of my event hub in my namespace and resource group @@ -3794,15 +3889,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.638030 | `eventhubs_namespace_get` | ❌ | -| 2 | 0.627606 | `eventhubs_eventhub_get` | ✅ **EXPECTED** | -| 3 | 0.570898 | `eventhubs_eventhub_consumergroup_get` | ❌ | -| 4 | 0.527564 | `eventhubs_eventhub_update` | ❌ | -| 5 | 0.521837 | `eventhubs_namespace_delete` | ❌ | +| 1 | 0.638112 | `eventhubs_namespace_get` | ❌ | +| 2 | 0.627528 | `eventhubs_eventhub_get` | ✅ **EXPECTED** | +| 3 | 0.570964 | `eventhubs_eventhub_consumergroup_get` | ❌ | +| 4 | 0.527503 | `eventhubs_eventhub_update` | ❌ | +| 5 | 0.521930 | `eventhubs_namespace_delete` | ❌ | --- -## Test 197 +## Test 202 **Expected Tool:** `eventhubs_eventhub_update` **Prompt:** Create a new event hub in my namespace and resource group @@ -3811,15 +3906,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.645723 | `eventhubs_eventhub_update` | ✅ **EXPECTED** | -| 2 | 0.605716 | `eventhubs_namespace_get` | ❌ | -| 3 | 0.574303 | `eventhubs_eventhub_get` | ❌ | -| 4 | 0.571748 | `eventhubs_eventhub_consumergroup_update` | ❌ | -| 5 | 0.557530 | `eventhubs_namespace_delete` | ❌ | +| 1 | 0.645976 | `eventhubs_eventhub_update` | ✅ **EXPECTED** | +| 2 | 0.605856 | `eventhubs_namespace_get` | ❌ | +| 3 | 0.574389 | `eventhubs_eventhub_get` | ❌ | +| 4 | 0.571676 | `eventhubs_eventhub_consumergroup_update` | ❌ | +| 5 | 0.557550 | `eventhubs_namespace_delete` | ❌ | --- -## Test 198 +## Test 203 **Expected Tool:** `eventhubs_eventhub_update` **Prompt:** Update my event hub in my namespace and resource group @@ -3828,15 +3923,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.655261 | `eventhubs_eventhub_update` | ✅ **EXPECTED** | -| 2 | 0.571762 | `eventhubs_eventhub_delete` | ❌ | -| 3 | 0.569417 | `eventhubs_eventhub_consumergroup_update` | ❌ | -| 4 | 0.568279 | `eventhubs_namespace_get` | ❌ | -| 5 | 0.565852 | `eventhubs_namespace_delete` | ❌ | +| 1 | 0.655283 | `eventhubs_eventhub_update` | ✅ **EXPECTED** | +| 2 | 0.571661 | `eventhubs_eventhub_delete` | ❌ | +| 3 | 0.568605 | `eventhubs_eventhub_consumergroup_update` | ❌ | +| 4 | 0.568396 | `eventhubs_namespace_get` | ❌ | +| 5 | 0.565977 | `eventhubs_namespace_delete` | ❌ | --- -## Test 199 +## Test 204 **Expected Tool:** `eventhubs_namespace_delete` **Prompt:** Delete my namespace in my resource group @@ -3846,14 +3941,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.623995 | `eventhubs_namespace_delete` | ✅ **EXPECTED** | -| 2 | 0.525446 | `eventhubs_namespace_update` | ❌ | +| 2 | 0.525810 | `eventhubs_namespace_update` | ❌ | | 3 | 0.505082 | `eventhubs_eventhub_consumergroup_delete` | ❌ | | 4 | 0.449841 | `eventhubs_namespace_get` | ❌ | | 5 | 0.435037 | `workbooks_delete` | ❌ | --- -## Test 200 +## Test 205 **Expected Tool:** `eventhubs_namespace_get` **Prompt:** List all Event Hubs namespaces in my subscription @@ -3865,12 +3960,12 @@ | 1 | 0.659838 | `eventhubs_eventhub_get` | ❌ | | 2 | 0.658827 | `eventhubs_namespace_get` | ✅ **EXPECTED** | | 3 | 0.607372 | `kusto_cluster_list` | ❌ | -| 4 | 0.557200 | `eventgrid_topic_list` | ❌ | -| 5 | 0.556126 | `eventgrid_subscription_list` | ❌ | +| 4 | 0.557150 | `eventgrid_topic_list` | ❌ | +| 5 | 0.556016 | `eventgrid_subscription_list` | ❌ | --- -## Test 201 +## Test 206 **Expected Tool:** `eventhubs_namespace_get` **Prompt:** Get the details of my namespace in my resource group @@ -3879,15 +3974,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.510078 | `monitor_webtests_get` | ❌ | -| 2 | 0.509993 | `eventhubs_namespace_get` | ✅ **EXPECTED** | -| 3 | 0.497527 | `servicebus_queue_details` | ❌ | -| 4 | 0.490095 | `eventhubs_namespace_update` | ❌ | -| 5 | 0.470636 | `functionapp_get` | ❌ | +| 1 | 0.509749 | `eventhubs_namespace_get` | ✅ **EXPECTED** | +| 2 | 0.509432 | `monitor_webtests_get` | ❌ | +| 3 | 0.497399 | `servicebus_queue_details` | ❌ | +| 4 | 0.490015 | `eventhubs_namespace_update` | ❌ | +| 5 | 0.470455 | `functionapp_get` | ❌ | --- -## Test 202 +## Test 207 **Expected Tool:** `eventhubs_namespace_update` **Prompt:** Create an new namespace in my resource group @@ -3896,15 +3991,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.610456 | `eventhubs_namespace_update` | ✅ **EXPECTED** | +| 1 | 0.610313 | `eventhubs_namespace_update` | ✅ **EXPECTED** | | 2 | 0.466721 | `eventhubs_namespace_get` | ❌ | | 3 | 0.458458 | `eventhubs_namespace_delete` | ❌ | | 4 | 0.449724 | `workbooks_create` | ❌ | -| 5 | 0.438886 | `eventhubs_eventhub_consumergroup_update` | ❌ | +| 5 | 0.438492 | `eventhubs_eventhub_consumergroup_update` | ❌ | --- -## Test 203 +## Test 208 **Expected Tool:** `eventhubs_namespace_update` **Prompt:** Update my namespace in my resource group @@ -3913,15 +4008,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.622338 | `eventhubs_namespace_update` | ✅ **EXPECTED** | -| 2 | 0.474099 | `eventhubs_namespace_delete` | ❌ | +| 1 | 0.622219 | `eventhubs_namespace_update` | ✅ **EXPECTED** | +| 2 | 0.474098 | `eventhubs_namespace_delete` | ❌ | | 3 | 0.448723 | `eventhubs_namespace_get` | ❌ | -| 4 | 0.437139 | `eventhubs_eventhub_consumergroup_update` | ❌ | -| 5 | 0.372632 | `sql_db_rename` | ❌ | +| 4 | 0.436549 | `eventhubs_eventhub_consumergroup_update` | ❌ | +| 5 | 0.372490 | `sql_db_rename` | ❌ | --- -## Test 204 +## Test 209 **Expected Tool:** `functionapp_get` **Prompt:** Describe the function app in resource group @@ -3931,14 +4026,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.660116 | `functionapp_get` | ✅ **EXPECTED** | -| 2 | 0.451613 | `deploy_app_logs_get` | ❌ | +| 2 | 0.451226 | `deploy_app_logs_get` | ❌ | | 3 | 0.450457 | `applens_resource_diagnose` | ❌ | -| 4 | 0.390107 | `mysql_server_list` | ❌ | +| 4 | 0.390048 | `mysql_server_list` | ❌ | | 5 | 0.380314 | `get_bestpractices_get` | ❌ | --- -## Test 205 +## Test 210 **Expected Tool:** `functionapp_get` **Prompt:** Get configuration for function app @@ -3949,13 +4044,13 @@ |------|-------|------|--------| | 1 | 0.607276 | `functionapp_get` | ✅ **EXPECTED** | | 2 | 0.447400 | `mysql_server_config_get` | ❌ | -| 3 | 0.424693 | `appconfig_account_list` | ❌ | +| 3 | 0.424765 | `appconfig_account_list` | ❌ | | 4 | 0.411267 | `appconfig_kv_get` | ❌ | -| 5 | 0.400402 | `deploy_app_logs_get` | ❌ | +| 5 | 0.400002 | `deploy_app_logs_get` | ❌ | --- -## Test 206 +## Test 211 **Expected Tool:** `functionapp_get` **Prompt:** Get function app status for @@ -3965,14 +4060,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.622384 | `functionapp_get` | ✅ **EXPECTED** | -| 2 | 0.413481 | `resourcehealth_availability-status_get` | ❌ | -| 3 | 0.390766 | `resourcehealth_availability-status_list` | ❌ | -| 4 | 0.383533 | `deploy_app_logs_get` | ❌ | -| 5 | 0.360677 | `storage_account_get` | ❌ | +| 2 | 0.413523 | `resourcehealth_availability-status_get` | ❌ | +| 3 | 0.390708 | `resourcehealth_availability-status_list` | ❌ | +| 4 | 0.383293 | `deploy_app_logs_get` | ❌ | +| 5 | 0.360665 | `storage_account_get` | ❌ | --- -## Test 207 +## Test 212 **Expected Tool:** `functionapp_get` **Prompt:** Get information about my function app in @@ -3983,13 +4078,13 @@ |------|-------|------|--------| | 1 | 0.690933 | `functionapp_get` | ✅ **EXPECTED** | | 2 | 0.441937 | `foundry_resource_get` | ❌ | -| 3 | 0.432458 | `resourcehealth_availability-status_list` | ❌ | +| 3 | 0.432317 | `resourcehealth_availability-status_list` | ❌ | | 4 | 0.431821 | `applens_resource_diagnose` | ❌ | -| 5 | 0.428944 | `storage_account_get` | ❌ | +| 5 | 0.429077 | `storage_account_get` | ❌ | --- -## Test 208 +## Test 213 **Expected Tool:** `functionapp_get` **Prompt:** Retrieve host name and status of function app @@ -3999,14 +4094,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.592791 | `functionapp_get` | ✅ **EXPECTED** | -| 2 | 0.417817 | `resourcehealth_availability-status_get` | ❌ | -| 3 | 0.409712 | `deploy_app_logs_get` | ❌ | -| 4 | 0.399896 | `storage_account_get` | ❌ | +| 2 | 0.417779 | `resourcehealth_availability-status_get` | ❌ | +| 3 | 0.409487 | `deploy_app_logs_get` | ❌ | +| 4 | 0.399953 | `storage_account_get` | ❌ | | 5 | 0.392237 | `applens_resource_diagnose` | ❌ | --- -## Test 209 +## Test 214 **Expected Tool:** `functionapp_get` **Prompt:** Show function app details for in @@ -4016,14 +4111,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.687356 | `functionapp_get` | ✅ **EXPECTED** | -| 2 | 0.449588 | `deploy_app_logs_get` | ❌ | +| 2 | 0.449033 | `deploy_app_logs_get` | ❌ | | 3 | 0.428689 | `applens_resource_diagnose` | ❌ | | 4 | 0.424686 | `foundry_resource_get` | ❌ | -| 5 | 0.392451 | `monitor_webtests_get` | ❌ | +| 5 | 0.391781 | `monitor_webtests_get` | ❌ | --- -## Test 210 +## Test 215 **Expected Tool:** `functionapp_get` **Prompt:** Show me the details for the function app @@ -4033,14 +4128,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.644882 | `functionapp_get` | ✅ **EXPECTED** | -| 2 | 0.430189 | `deploy_app_logs_get` | ❌ | -| 3 | 0.421127 | `storage_account_get` | ❌ | -| 4 | 0.403311 | `signalr_runtime_get` | ❌ | +| 2 | 0.429692 | `deploy_app_logs_get` | ❌ | +| 3 | 0.421082 | `storage_account_get` | ❌ | +| 4 | 0.403261 | `signalr_runtime_get` | ❌ | | 5 | 0.391615 | `foundry_resource_get` | ❌ | --- -## Test 211 +## Test 216 **Expected Tool:** `functionapp_get` **Prompt:** Show plan and region for function app @@ -4050,14 +4145,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.554980 | `functionapp_get` | ✅ **EXPECTED** | -| 2 | 0.426976 | `quota_usage_check` | ❌ | -| 3 | 0.424610 | `deploy_app_logs_get` | ❌ | +| 2 | 0.426921 | `quota_usage_check` | ❌ | +| 3 | 0.424062 | `deploy_app_logs_get` | ❌ | | 4 | 0.408011 | `deploy_plan_get` | ❌ | -| 5 | 0.381236 | `deploy_architecture_diagram_generate` | ❌ | +| 5 | 0.381629 | `deploy_architecture_diagram_generate` | ❌ | --- -## Test 212 +## Test 217 **Expected Tool:** `functionapp_get` **Prompt:** What is the status of function app ? @@ -4067,14 +4162,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.565797 | `functionapp_get` | ✅ **EXPECTED** | -| 2 | 0.403665 | `deploy_app_logs_get` | ❌ | -| 3 | 0.384186 | `resourcehealth_availability-status_list` | ❌ | +| 2 | 0.403246 | `deploy_app_logs_get` | ❌ | +| 3 | 0.384159 | `resourcehealth_availability-status_list` | ❌ | | 4 | 0.369868 | `applens_resource_diagnose` | ❌ | -| 5 | 0.355044 | `resourcehealth_availability-status_get` | ❌ | +| 5 | 0.354912 | `resourcehealth_availability-status_get` | ❌ | --- -## Test 213 +## Test 218 **Expected Tool:** `functionapp_get` **Prompt:** List all function apps in my subscription @@ -4084,14 +4179,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.646561 | `functionapp_get` | ✅ **EXPECTED** | -| 2 | 0.559382 | `search_service_list` | ❌ | -| 3 | 0.534935 | `subscription_list` | ❌ | +| 2 | 0.557549 | `search_service_list` | ❌ | +| 3 | 0.534936 | `subscription_list` | ❌ | | 4 | 0.529031 | `kusto_cluster_list` | ❌ | | 5 | 0.516618 | `cosmos_account_list` | ❌ | --- -## Test 214 +## Test 219 **Expected Tool:** `functionapp_get` **Prompt:** Show me my Azure function apps @@ -4101,14 +4196,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.560249 | `functionapp_get` | ✅ **EXPECTED** | -| 2 | 0.464985 | `deploy_app_logs_get` | ❌ | -| 3 | 0.412646 | `search_service_list` | ❌ | -| 4 | 0.411323 | `get_bestpractices_get` | ❌ | +| 2 | 0.464637 | `deploy_app_logs_get` | ❌ | +| 3 | 0.411323 | `get_bestpractices_get` | ❌ | +| 4 | 0.410461 | `search_service_list` | ❌ | | 5 | 0.398503 | `extension_cli_install` | ❌ | --- -## Test 215 +## Test 220 **Expected Tool:** `functionapp_get` **Prompt:** What function apps do I have? @@ -4117,15 +4212,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.433674 | `functionapp_get` | ✅ **EXPECTED** | -| 2 | 0.346619 | `deploy_app_logs_get` | ❌ | +| 1 | 0.433675 | `functionapp_get` | ✅ **EXPECTED** | +| 2 | 0.346031 | `deploy_app_logs_get` | ❌ | | 3 | 0.337966 | `applens_resource_diagnose` | ❌ | | 4 | 0.316594 | `extension_cli_install` | ❌ | | 5 | 0.284362 | `get_bestpractices_get` | ❌ | --- -## Test 216 +## Test 221 **Expected Tool:** `keyvault_admin_settings_get` **Prompt:** Get the account settings for my key vault @@ -4134,15 +4229,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.604797 | `keyvault_admin_settings_get` | ✅ **EXPECTED** | -| 2 | 0.532029 | `storage_account_get` | ❌ | -| 3 | 0.496629 | `keyvault_key_get` | ❌ | -| 4 | 0.452366 | `appconfig_kv_set` | ❌ | -| 5 | 0.448039 | `keyvault_secret_get` | ❌ | +| 1 | 0.604780 | `keyvault_admin_settings_get` | ✅ **EXPECTED** | +| 2 | 0.532196 | `storage_account_get` | ❌ | +| 3 | 0.496042 | `keyvault_key_get` | ❌ | +| 4 | 0.452367 | `appconfig_kv_set` | ❌ | +| 5 | 0.448265 | `keyvault_secret_get` | ❌ | --- -## Test 217 +## Test 222 **Expected Tool:** `keyvault_admin_settings_get` **Prompt:** Show me the account settings for managed HSM keyvault @@ -4151,15 +4246,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.671368 | `keyvault_admin_settings_get` | ✅ **EXPECTED** | -| 2 | 0.455516 | `storage_account_get` | ❌ | -| 3 | 0.441225 | `keyvault_key_get` | ❌ | +| 1 | 0.671370 | `keyvault_admin_settings_get` | ✅ **EXPECTED** | +| 2 | 0.455561 | `storage_account_get` | ❌ | +| 3 | 0.440966 | `keyvault_key_get` | ❌ | | 4 | 0.404666 | `appconfig_kv_set` | ❌ | -| 5 | 0.395274 | `keyvault_secret_get` | ❌ | +| 5 | 0.395449 | `keyvault_secret_get` | ❌ | --- -## Test 218 +## Test 223 **Expected Tool:** `keyvault_admin_settings_get` **Prompt:** What's the value of the setting in my key vault with name @@ -4168,15 +4263,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.505731 | `keyvault_admin_settings_get` | ✅ **EXPECTED** | -| 2 | 0.496540 | `appconfig_kv_set` | ❌ | -| 3 | 0.420145 | `appconfig_kv_lock_set` | ❌ | -| 4 | 0.419126 | `keyvault_key_get` | ❌ | -| 5 | 0.410215 | `keyvault_secret_get` | ❌ | +| 1 | 0.505709 | `keyvault_admin_settings_get` | ✅ **EXPECTED** | +| 2 | 0.496565 | `appconfig_kv_set` | ❌ | +| 3 | 0.420067 | `appconfig_kv_lock_set` | ❌ | +| 4 | 0.419642 | `keyvault_key_get` | ❌ | +| 5 | 0.410219 | `keyvault_secret_get` | ❌ | --- -## Test 219 +## Test 224 **Expected Tool:** `keyvault_certificate_create` **Prompt:** Create a new certificate called in the key vault @@ -4185,15 +4280,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.627882 | `keyvault_certificate_create` | ✅ **EXPECTED** | -| 2 | 0.570708 | `keyvault_certificate_import` | ❌ | -| 3 | 0.540476 | `keyvault_key_create` | ❌ | -| 4 | 0.519268 | `keyvault_certificate_get` | ❌ | -| 5 | 0.500093 | `keyvault_certificate_list` | ❌ | +| 1 | 0.627727 | `keyvault_certificate_create` | ✅ **EXPECTED** | +| 2 | 0.570319 | `keyvault_certificate_import` | ❌ | +| 3 | 0.540199 | `keyvault_key_create` | ❌ | +| 4 | 0.519218 | `keyvault_certificate_get` | ❌ | +| 5 | 0.500027 | `keyvault_certificate_list` | ❌ | --- -## Test 220 +## Test 225 **Expected Tool:** `keyvault_certificate_create` **Prompt:** Generate a certificate named in key vault @@ -4202,15 +4297,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.599990 | `keyvault_certificate_create` | ✅ **EXPECTED** | -| 2 | 0.561445 | `keyvault_certificate_import` | ❌ | -| 3 | 0.522706 | `keyvault_certificate_get` | ❌ | -| 4 | 0.502128 | `keyvault_key_create` | ❌ | -| 5 | 0.497145 | `keyvault_certificate_list` | ❌ | +| 1 | 0.599548 | `keyvault_certificate_create` | ✅ **EXPECTED** | +| 2 | 0.561717 | `keyvault_certificate_import` | ❌ | +| 3 | 0.521910 | `keyvault_certificate_get` | ❌ | +| 4 | 0.501291 | `keyvault_key_create` | ❌ | +| 5 | 0.496516 | `keyvault_certificate_list` | ❌ | --- -## Test 221 +## Test 226 **Expected Tool:** `keyvault_certificate_create` **Prompt:** Request creation of certificate in the key vault @@ -4219,15 +4314,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.574040 | `keyvault_certificate_create` | ✅ **EXPECTED** | -| 2 | 0.527743 | `keyvault_certificate_import` | ❌ | -| 3 | 0.498226 | `keyvault_certificate_get` | ❌ | -| 4 | 0.481666 | `keyvault_key_create` | ❌ | -| 5 | 0.469651 | `keyvault_certificate_list` | ❌ | +| 1 | 0.573998 | `keyvault_certificate_create` | ✅ **EXPECTED** | +| 2 | 0.527759 | `keyvault_certificate_import` | ❌ | +| 3 | 0.498278 | `keyvault_certificate_get` | ❌ | +| 4 | 0.481548 | `keyvault_key_create` | ❌ | +| 5 | 0.469601 | `keyvault_certificate_list` | ❌ | --- -## Test 222 +## Test 227 **Expected Tool:** `keyvault_certificate_create` **Prompt:** Provision a new key vault certificate in vault @@ -4244,7 +4339,7 @@ --- -## Test 223 +## Test 228 **Expected Tool:** `keyvault_certificate_create` **Prompt:** Issue a certificate in key vault @@ -4261,7 +4356,7 @@ --- -## Test 224 +## Test 229 **Expected Tool:** `keyvault_certificate_get` **Prompt:** Show me the certificate in the key vault @@ -4274,11 +4369,11 @@ | 2 | 0.528405 | `keyvault_certificate_list` | ❌ | | 3 | 0.519037 | `keyvault_certificate_import` | ❌ | | 4 | 0.499293 | `keyvault_certificate_create` | ❌ | -| 5 | 0.486609 | `keyvault_key_get` | ❌ | +| 5 | 0.487691 | `keyvault_key_get` | ❌ | --- -## Test 225 +## Test 230 **Expected Tool:** `keyvault_certificate_get` **Prompt:** Show me the details of the certificate in the key vault @@ -4288,14 +4383,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.646098 | `keyvault_certificate_get` | ✅ **EXPECTED** | -| 2 | 0.562988 | `keyvault_key_get` | ❌ | -| 3 | 0.514170 | `keyvault_secret_get` | ❌ | +| 2 | 0.563263 | `keyvault_key_get` | ❌ | +| 3 | 0.514499 | `keyvault_secret_get` | ❌ | | 4 | 0.509446 | `keyvault_certificate_list` | ❌ | -| 5 | 0.507737 | `keyvault_certificate_import` | ❌ | +| 5 | 0.507738 | `keyvault_certificate_import` | ❌ | --- -## Test 226 +## Test 231 **Expected Tool:** `keyvault_certificate_get` **Prompt:** Get the certificate from vault @@ -4308,11 +4403,11 @@ | 2 | 0.515570 | `keyvault_certificate_list` | ❌ | | 3 | 0.511197 | `keyvault_certificate_create` | ❌ | | 4 | 0.507768 | `keyvault_certificate_import` | ❌ | -| 5 | 0.474394 | `keyvault_key_get` | ❌ | +| 5 | 0.475674 | `keyvault_key_get` | ❌ | --- -## Test 227 +## Test 232 **Expected Tool:** `keyvault_certificate_get` **Prompt:** Display the certificate details for in vault @@ -4322,14 +4417,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.647669 | `keyvault_certificate_get` | ✅ **EXPECTED** | -| 2 | 0.527400 | `keyvault_key_get` | ❌ | +| 2 | 0.528243 | `keyvault_key_get` | ❌ | | 3 | 0.521556 | `keyvault_certificate_list` | ❌ | | 4 | 0.509796 | `keyvault_certificate_import` | ❌ | -| 5 | 0.501988 | `keyvault_secret_get` | ❌ | +| 5 | 0.502403 | `keyvault_secret_get` | ❌ | --- -## Test 228 +## Test 233 **Expected Tool:** `keyvault_certificate_get` **Prompt:** Retrieve certificate metadata for in vault @@ -4338,15 +4433,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.595902 | `keyvault_certificate_get` | ✅ **EXPECTED** | -| 2 | 0.527167 | `keyvault_certificate_list` | ❌ | -| 3 | 0.518836 | `keyvault_certificate_import` | ❌ | -| 4 | 0.500932 | `keyvault_certificate_create` | ❌ | -| 5 | 0.465265 | `keyvault_key_get` | ❌ | +| 1 | 0.595959 | `keyvault_certificate_get` | ✅ **EXPECTED** | +| 2 | 0.527404 | `keyvault_certificate_list` | ❌ | +| 3 | 0.519059 | `keyvault_certificate_import` | ❌ | +| 4 | 0.501138 | `keyvault_certificate_create` | ❌ | +| 5 | 0.465429 | `keyvault_key_get` | ❌ | --- -## Test 229 +## Test 234 **Expected Tool:** `keyvault_certificate_import` **Prompt:** Import the certificate in file into the key vault @@ -4355,15 +4450,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.585549 | `keyvault_certificate_import` | ✅ **EXPECTED** | -| 2 | 0.420798 | `keyvault_certificate_get` | ❌ | -| 3 | 0.402853 | `keyvault_certificate_create` | ❌ | -| 4 | 0.399353 | `keyvault_certificate_list` | ❌ | -| 5 | 0.353196 | `keyvault_key_create` | ❌ | +| 1 | 0.585481 | `keyvault_certificate_import` | ✅ **EXPECTED** | +| 2 | 0.420747 | `keyvault_certificate_get` | ❌ | +| 3 | 0.402595 | `keyvault_certificate_create` | ❌ | +| 4 | 0.399342 | `keyvault_certificate_list` | ❌ | +| 5 | 0.352905 | `keyvault_key_create` | ❌ | --- -## Test 230 +## Test 235 **Expected Tool:** `keyvault_certificate_import` **Prompt:** Import a certificate into the key vault using the name @@ -4380,7 +4475,7 @@ --- -## Test 231 +## Test 236 **Expected Tool:** `keyvault_certificate_import` **Prompt:** Upload certificate file to key vault @@ -4397,7 +4492,7 @@ --- -## Test 232 +## Test 237 **Expected Tool:** `keyvault_certificate_import` **Prompt:** Load certificate from file into vault @@ -4414,7 +4509,7 @@ --- -## Test 233 +## Test 238 **Expected Tool:** `keyvault_certificate_import` **Prompt:** Add existing certificate file to the key vault with name @@ -4423,15 +4518,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.595417 | `keyvault_certificate_import` | ✅ **EXPECTED** | -| 2 | 0.452489 | `keyvault_certificate_create` | ❌ | +| 1 | 0.595418 | `keyvault_certificate_import` | ✅ **EXPECTED** | +| 2 | 0.452490 | `keyvault_certificate_create` | ❌ | | 3 | 0.441616 | `keyvault_certificate_get` | ❌ | | 4 | 0.408018 | `keyvault_key_create` | ❌ | | 5 | 0.392244 | `keyvault_secret_create` | ❌ | --- -## Test 234 +## Test 239 **Expected Tool:** `keyvault_certificate_list` **Prompt:** List all certificates in the key vault @@ -4441,14 +4536,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.726124 | `keyvault_certificate_list` | ✅ **EXPECTED** | -| 2 | 0.583138 | `keyvault_key_list` | ❌ | +| 2 | 0.583110 | `keyvault_key_list` | ❌ | | 3 | 0.531988 | `keyvault_secret_list` | ❌ | | 4 | 0.515236 | `keyvault_certificate_get` | ❌ | | 5 | 0.485792 | `keyvault_certificate_create` | ❌ | --- -## Test 235 +## Test 240 **Expected Tool:** `keyvault_certificate_list` **Prompt:** Show me the certificates in the key vault @@ -4459,13 +4554,13 @@ |------|-------|------|--------| | 1 | 0.615541 | `keyvault_certificate_list` | ✅ **EXPECTED** | | 2 | 0.522453 | `keyvault_certificate_get` | ❌ | -| 3 | 0.475197 | `keyvault_key_list` | ❌ | +| 3 | 0.475156 | `keyvault_key_list` | ❌ | | 4 | 0.460973 | `keyvault_certificate_create` | ❌ | -| 5 | 0.448139 | `keyvault_key_get` | ❌ | +| 5 | 0.449381 | `keyvault_key_get` | ❌ | --- -## Test 236 +## Test 241 **Expected Tool:** `keyvault_certificate_list` **Prompt:** What certificates are in the key vault ? @@ -4474,15 +4569,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.624711 | `keyvault_certificate_list` | ✅ **EXPECTED** | +| 1 | 0.624710 | `keyvault_certificate_list` | ✅ **EXPECTED** | | 2 | 0.519739 | `keyvault_certificate_get` | ❌ | | 3 | 0.510048 | `keyvault_certificate_create` | ❌ | | 4 | 0.505534 | `keyvault_certificate_import` | ❌ | -| 5 | 0.497395 | `keyvault_key_list` | ❌ | +| 5 | 0.497356 | `keyvault_key_list` | ❌ | --- -## Test 237 +## Test 242 **Expected Tool:** `keyvault_certificate_list` **Prompt:** List certificate names in vault @@ -4492,14 +4587,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.672622 | `keyvault_certificate_list` | ✅ **EXPECTED** | -| 2 | 0.554016 | `keyvault_key_list` | ❌ | +| 2 | 0.553990 | `keyvault_key_list` | ❌ | | 3 | 0.511905 | `keyvault_secret_list` | ❌ | | 4 | 0.507062 | `keyvault_certificate_get` | ❌ | | 5 | 0.492357 | `keyvault_certificate_create` | ❌ | --- -## Test 238 +## Test 243 **Expected Tool:** `keyvault_certificate_list` **Prompt:** Enumerate certificates in key vault @@ -4508,15 +4603,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.747407 | `keyvault_certificate_list` | ✅ **EXPECTED** | -| 2 | 0.594268 | `keyvault_key_list` | ❌ | +| 1 | 0.747408 | `keyvault_certificate_list` | ✅ **EXPECTED** | +| 2 | 0.594216 | `keyvault_key_list` | ❌ | | 3 | 0.558771 | `keyvault_secret_list` | ❌ | | 4 | 0.515568 | `keyvault_certificate_get` | ❌ | | 5 | 0.490876 | `keyvault_certificate_create` | ❌ | --- -## Test 239 +## Test 244 **Expected Tool:** `keyvault_certificate_list` **Prompt:** Show certificate names in the key vault @@ -4527,13 +4622,13 @@ |------|-------|------|--------| | 1 | 0.639711 | `keyvault_certificate_list` | ✅ **EXPECTED** | | 2 | 0.512475 | `keyvault_certificate_get` | ❌ | -| 3 | 0.507603 | `keyvault_key_list` | ❌ | +| 3 | 0.507572 | `keyvault_key_list` | ❌ | | 4 | 0.482583 | `keyvault_certificate_create` | ❌ | | 5 | 0.464725 | `keyvault_secret_list` | ❌ | --- -## Test 240 +## Test 245 **Expected Tool:** `keyvault_key_create` **Prompt:** Create a new key called with the RSA type in the key vault @@ -4546,11 +4641,11 @@ | 2 | 0.456580 | `keyvault_secret_create` | ❌ | | 3 | 0.451790 | `keyvault_certificate_create` | ❌ | | 4 | 0.429614 | `keyvault_certificate_import` | ❌ | -| 5 | 0.399326 | `keyvault_key_get` | ❌ | +| 5 | 0.399469 | `keyvault_key_get` | ❌ | --- -## Test 241 +## Test 246 **Expected Tool:** `keyvault_key_create` **Prompt:** Generate a key with type in vault @@ -4559,15 +4654,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.641022 | `keyvault_key_create` | ✅ **EXPECTED** | -| 2 | 0.428461 | `keyvault_key_get` | ❌ | -| 3 | 0.422686 | `keyvault_certificate_create` | ❌ | -| 4 | 0.419964 | `keyvault_secret_create` | ❌ | -| 5 | 0.405612 | `appconfig_kv_set` | ❌ | +| 1 | 0.641070 | `keyvault_key_create` | ✅ **EXPECTED** | +| 2 | 0.428964 | `keyvault_key_get` | ❌ | +| 3 | 0.422763 | `keyvault_certificate_create` | ❌ | +| 4 | 0.420045 | `keyvault_secret_create` | ❌ | +| 5 | 0.405644 | `appconfig_kv_set` | ❌ | --- -## Test 242 +## Test 247 **Expected Tool:** `keyvault_key_create` **Prompt:** Create an oct key in the vault @@ -4576,15 +4671,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.548424 | `keyvault_key_create` | ✅ **EXPECTED** | -| 2 | 0.464221 | `keyvault_secret_create` | ❌ | -| 3 | 0.448379 | `keyvault_certificate_create` | ❌ | -| 4 | 0.421467 | `keyvault_key_get` | ❌ | -| 5 | 0.405195 | `keyvault_certificate_import` | ❌ | +| 1 | 0.547493 | `keyvault_key_create` | ✅ **EXPECTED** | +| 2 | 0.463557 | `keyvault_secret_create` | ❌ | +| 3 | 0.447410 | `keyvault_certificate_create` | ❌ | +| 4 | 0.420793 | `keyvault_key_get` | ❌ | +| 5 | 0.404350 | `keyvault_certificate_import` | ❌ | --- -## Test 243 +## Test 248 **Expected Tool:** `keyvault_key_create` **Prompt:** Create an RSA key in the vault with name @@ -4597,11 +4692,11 @@ | 2 | 0.501636 | `keyvault_secret_create` | ❌ | | 3 | 0.491735 | `keyvault_certificate_create` | ❌ | | 4 | 0.464557 | `keyvault_certificate_import` | ❌ | -| 5 | 0.451016 | `keyvault_key_get` | ❌ | +| 5 | 0.451505 | `keyvault_key_get` | ❌ | --- -## Test 244 +## Test 249 **Expected Tool:** `keyvault_key_create` **Prompt:** Create an EC key with name in the vault @@ -4610,15 +4705,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.571718 | `keyvault_key_create` | ✅ **EXPECTED** | -| 2 | 0.443369 | `keyvault_certificate_create` | ❌ | -| 3 | 0.434675 | `keyvault_secret_create` | ❌ | -| 4 | 0.421721 | `keyvault_key_get` | ❌ | -| 5 | 0.400533 | `keyvault_certificate_import` | ❌ | +| 1 | 0.571793 | `keyvault_key_create` | ✅ **EXPECTED** | +| 2 | 0.443085 | `keyvault_certificate_create` | ❌ | +| 3 | 0.434697 | `keyvault_secret_create` | ❌ | +| 4 | 0.421997 | `keyvault_key_get` | ❌ | +| 5 | 0.400514 | `keyvault_certificate_import` | ❌ | --- -## Test 245 +## Test 250 **Expected Tool:** `keyvault_key_get` **Prompt:** Show me the key in the key vault @@ -4627,15 +4722,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.549488 | `keyvault_key_get` | ✅ **EXPECTED** | -| 2 | 0.468165 | `keyvault_secret_get` | ❌ | +| 1 | 0.550225 | `keyvault_key_get` | ✅ **EXPECTED** | +| 2 | 0.468243 | `keyvault_secret_get` | ❌ | | 3 | 0.452816 | `keyvault_key_create` | ❌ | -| 4 | 0.440015 | `keyvault_key_list` | ❌ | +| 4 | 0.439969 | `keyvault_key_list` | ❌ | | 5 | 0.426545 | `keyvault_certificate_get` | ❌ | --- -## Test 246 +## Test 251 **Expected Tool:** `keyvault_key_get` **Prompt:** Show me the details of the key in the key vault @@ -4644,15 +4739,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.629552 | `keyvault_key_get` | ✅ **EXPECTED** | -| 2 | 0.532651 | `keyvault_secret_get` | ❌ | -| 3 | 0.512106 | `storage_account_get` | ❌ | +| 1 | 0.629372 | `keyvault_key_get` | ✅ **EXPECTED** | +| 2 | 0.532872 | `keyvault_secret_get` | ❌ | +| 3 | 0.512278 | `storage_account_get` | ❌ | | 4 | 0.495957 | `keyvault_certificate_get` | ❌ | | 5 | 0.456992 | `keyvault_key_create` | ❌ | --- -## Test 247 +## Test 252 **Expected Tool:** `keyvault_key_get` **Prompt:** Get the key from vault @@ -4661,15 +4756,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.484645 | `keyvault_key_get` | ✅ **EXPECTED** | +| 1 | 0.485492 | `keyvault_key_get` | ✅ **EXPECTED** | | 2 | 0.443182 | `keyvault_key_create` | ❌ | -| 3 | 0.409388 | `keyvault_secret_get` | ❌ | -| 4 | 0.395482 | `keyvault_admin_settings_get` | ❌ | +| 3 | 0.409356 | `keyvault_secret_get` | ❌ | +| 4 | 0.395491 | `keyvault_admin_settings_get` | ❌ | | 5 | 0.383519 | `appconfig_kv_lock_set` | ❌ | --- -## Test 248 +## Test 253 **Expected Tool:** `keyvault_key_get` **Prompt:** Display the key details for in vault @@ -4678,15 +4773,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.590303 | `keyvault_key_get` | ✅ **EXPECTED** | -| 2 | 0.488213 | `keyvault_secret_get` | ❌ | -| 3 | 0.476278 | `storage_account_get` | ❌ | +| 1 | 0.590297 | `keyvault_key_get` | ✅ **EXPECTED** | +| 2 | 0.488574 | `keyvault_secret_get` | ❌ | +| 3 | 0.476498 | `storage_account_get` | ❌ | | 4 | 0.460796 | `keyvault_certificate_get` | ❌ | -| 5 | 0.436493 | `keyvault_admin_settings_get` | ❌ | +| 5 | 0.436511 | `keyvault_admin_settings_get` | ❌ | --- -## Test 249 +## Test 254 **Expected Tool:** `keyvault_key_get` **Prompt:** Retrieve key metadata for in vault @@ -4695,15 +4790,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.518886 | `keyvault_key_get` | ✅ **EXPECTED** | -| 2 | 0.432731 | `keyvault_admin_settings_get` | ❌ | -| 3 | 0.432677 | `storage_account_get` | ❌ | +| 1 | 0.518346 | `keyvault_key_get` | ✅ **EXPECTED** | +| 2 | 0.432950 | `storage_account_get` | ❌ | +| 3 | 0.432742 | `keyvault_admin_settings_get` | ❌ | | 4 | 0.429131 | `keyvault_key_create` | ❌ | -| 5 | 0.422536 | `keyvault_secret_get` | ❌ | +| 5 | 0.422731 | `keyvault_secret_get` | ❌ | --- -## Test 250 +## Test 255 **Expected Tool:** `keyvault_key_list` **Prompt:** List all keys in the key vault @@ -4712,15 +4807,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.701474 | `keyvault_key_list` | ✅ **EXPECTED** | +| 1 | 0.701448 | `keyvault_key_list` | ✅ **EXPECTED** | | 2 | 0.601513 | `keyvault_certificate_list` | ❌ | | 3 | 0.587427 | `keyvault_secret_list` | ❌ | | 4 | 0.498767 | `cosmos_account_list` | ❌ | -| 5 | 0.480130 | `keyvault_admin_settings_get` | ❌ | +| 5 | 0.480129 | `keyvault_admin_settings_get` | ❌ | --- -## Test 251 +## Test 256 **Expected Tool:** `keyvault_key_list` **Prompt:** Show me the keys in the key vault @@ -4729,15 +4824,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.549498 | `keyvault_key_list` | ✅ **EXPECTED** | -| 2 | 0.506815 | `keyvault_key_get` | ❌ | +| 1 | 0.549453 | `keyvault_key_list` | ✅ **EXPECTED** | +| 2 | 0.507865 | `keyvault_key_get` | ❌ | | 3 | 0.475507 | `keyvault_certificate_list` | ❌ | -| 4 | 0.472457 | `keyvault_admin_settings_get` | ❌ | -| 5 | 0.455683 | `keyvault_secret_get` | ❌ | +| 4 | 0.472465 | `keyvault_admin_settings_get` | ❌ | +| 5 | 0.455936 | `keyvault_secret_get` | ❌ | --- -## Test 252 +## Test 257 **Expected Tool:** `keyvault_key_list` **Prompt:** What keys are in the key vault ? @@ -4746,15 +4841,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.582010 | `keyvault_key_list` | ✅ **EXPECTED** | -| 2 | 0.502252 | `keyvault_admin_settings_get` | ❌ | +| 1 | 0.581970 | `keyvault_key_list` | ✅ **EXPECTED** | +| 2 | 0.502245 | `keyvault_admin_settings_get` | ❌ | | 3 | 0.501481 | `keyvault_certificate_list` | ❌ | -| 4 | 0.476470 | `keyvault_key_get` | ❌ | +| 4 | 0.477451 | `keyvault_key_get` | ❌ | | 5 | 0.472414 | `keyvault_secret_list` | ❌ | --- -## Test 253 +## Test 258 **Expected Tool:** `keyvault_key_list` **Prompt:** List key names in vault @@ -4763,15 +4858,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.641339 | `keyvault_key_list` | ✅ **EXPECTED** | +| 1 | 0.641314 | `keyvault_key_list` | ✅ **EXPECTED** | | 2 | 0.559550 | `keyvault_certificate_list` | ❌ | | 3 | 0.553553 | `keyvault_secret_list` | ❌ | -| 4 | 0.486371 | `keyvault_admin_settings_get` | ❌ | +| 4 | 0.486377 | `keyvault_admin_settings_get` | ❌ | | 5 | 0.475992 | `cosmos_account_list` | ❌ | --- -## Test 254 +## Test 259 **Expected Tool:** `keyvault_key_list` **Prompt:** Enumerate keys in key vault @@ -4780,15 +4875,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.723318 | `keyvault_key_list` | ✅ **EXPECTED** | +| 1 | 0.723266 | `keyvault_key_list` | ✅ **EXPECTED** | | 2 | 0.611366 | `keyvault_certificate_list` | ❌ | | 3 | 0.611185 | `keyvault_secret_list` | ❌ | -| 4 | 0.473874 | `keyvault_admin_settings_get` | ❌ | -| 5 | 0.441881 | `keyvault_key_get` | ❌ | +| 4 | 0.473886 | `keyvault_admin_settings_get` | ❌ | +| 5 | 0.443322 | `keyvault_key_get` | ❌ | --- -## Test 255 +## Test 260 **Expected Tool:** `keyvault_key_list` **Prompt:** Show key names in the key vault @@ -4797,15 +4892,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.570489 | `keyvault_key_list` | ✅ **EXPECTED** | -| 2 | 0.501073 | `keyvault_key_get` | ❌ | +| 1 | 0.570444 | `keyvault_key_list` | ✅ **EXPECTED** | +| 2 | 0.501953 | `keyvault_key_get` | ❌ | | 3 | 0.500103 | `keyvault_certificate_list` | ❌ | -| 4 | 0.496907 | `storage_account_get` | ❌ | +| 4 | 0.496817 | `storage_account_get` | ❌ | | 5 | 0.490367 | `keyvault_secret_list` | ❌ | --- -## Test 256 +## Test 261 **Expected Tool:** `keyvault_secret_create` **Prompt:** Create a new secret called with value in the key vault @@ -4816,13 +4911,13 @@ |------|-------|------|--------| | 1 | 0.678482 | `keyvault_secret_create` | ✅ **EXPECTED** | | 2 | 0.553018 | `keyvault_key_create` | ❌ | -| 3 | 0.512856 | `keyvault_secret_get` | ❌ | +| 3 | 0.512602 | `keyvault_secret_get` | ❌ | | 4 | 0.475097 | `keyvault_certificate_create` | ❌ | | 5 | 0.461437 | `appconfig_kv_set` | ❌ | --- -## Test 257 +## Test 262 **Expected Tool:** `keyvault_secret_create` **Prompt:** Set a secret named with value in key vault @@ -4832,14 +4927,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.663094 | `keyvault_secret_create` | ✅ **EXPECTED** | -| 2 | 0.519601 | `keyvault_secret_get` | ❌ | +| 2 | 0.519306 | `keyvault_secret_get` | ❌ | | 3 | 0.512233 | `appconfig_kv_set` | ❌ | | 4 | 0.458502 | `keyvault_key_create` | ❌ | | 5 | 0.429785 | `appconfig_kv_lock_set` | ❌ | --- -## Test 258 +## Test 263 **Expected Tool:** `keyvault_secret_create` **Prompt:** Store secret value in the key vault @@ -4849,14 +4944,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.639897 | `keyvault_secret_create` | ✅ **EXPECTED** | -| 2 | 0.509674 | `keyvault_secret_get` | ❌ | +| 2 | 0.509526 | `keyvault_secret_get` | ❌ | | 3 | 0.485203 | `appconfig_kv_set` | ❌ | | 4 | 0.484680 | `keyvault_key_create` | ❌ | | 5 | 0.448995 | `appconfig_kv_lock_set` | ❌ | --- -## Test 259 +## Test 264 **Expected Tool:** `keyvault_secret_create` **Prompt:** Add a new version of secret with value in vault @@ -4865,15 +4960,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.675147 | `keyvault_secret_create` | ✅ **EXPECTED** | -| 2 | 0.499602 | `keyvault_secret_get` | ❌ | -| 3 | 0.498196 | `keyvault_key_create` | ❌ | -| 4 | 0.479173 | `keyvault_certificate_import` | ❌ | -| 5 | 0.458587 | `appconfig_kv_set` | ❌ | +| 1 | 0.675145 | `keyvault_secret_create` | ✅ **EXPECTED** | +| 2 | 0.499276 | `keyvault_secret_get` | ❌ | +| 3 | 0.498228 | `keyvault_key_create` | ❌ | +| 4 | 0.479174 | `keyvault_certificate_import` | ❌ | +| 5 | 0.458574 | `appconfig_kv_set` | ❌ | --- -## Test 260 +## Test 265 **Expected Tool:** `keyvault_secret_create` **Prompt:** Update secret to value in the key vault @@ -4882,15 +4977,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.571716 | `keyvault_secret_create` | ✅ **EXPECTED** | -| 2 | 0.513963 | `keyvault_secret_get` | ❌ | -| 3 | 0.441281 | `appconfig_kv_set` | ❌ | -| 4 | 0.417998 | `appconfig_kv_lock_set` | ❌ | -| 5 | 0.408505 | `keyvault_key_get` | ❌ | +| 1 | 0.571597 | `keyvault_secret_create` | ✅ **EXPECTED** | +| 2 | 0.513012 | `keyvault_secret_get` | ❌ | +| 3 | 0.441198 | `appconfig_kv_set` | ❌ | +| 4 | 0.417911 | `appconfig_kv_lock_set` | ❌ | +| 5 | 0.408739 | `keyvault_key_get` | ❌ | --- -## Test 261 +## Test 266 **Expected Tool:** `keyvault_secret_get` **Prompt:** Show me the secret in the key vault @@ -4899,15 +4994,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.605040 | `keyvault_secret_get` | ✅ **EXPECTED** | -| 2 | 0.504063 | `keyvault_key_get` | ❌ | -| 3 | 0.502826 | `keyvault_secret_create` | ❌ | -| 4 | 0.479767 | `keyvault_secret_list` | ❌ | -| 5 | 0.440063 | `keyvault_certificate_get` | ❌ | +| 1 | 0.602686 | `keyvault_secret_get` | ✅ **EXPECTED** | +| 2 | 0.505620 | `keyvault_key_get` | ❌ | +| 3 | 0.501397 | `keyvault_secret_create` | ❌ | +| 4 | 0.478769 | `keyvault_secret_list` | ❌ | +| 5 | 0.439521 | `keyvault_certificate_get` | ❌ | --- -## Test 262 +## Test 267 **Expected Tool:** `keyvault_secret_get` **Prompt:** Show me the details of the secret in the key vault @@ -4916,15 +5011,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.653871 | `keyvault_secret_get` | ✅ **EXPECTED** | -| 2 | 0.566786 | `keyvault_key_get` | ❌ | -| 3 | 0.517355 | `storage_account_get` | ❌ | +| 1 | 0.653920 | `keyvault_secret_get` | ✅ **EXPECTED** | +| 2 | 0.567036 | `keyvault_key_get` | ❌ | +| 3 | 0.517547 | `storage_account_get` | ❌ | | 4 | 0.496050 | `keyvault_certificate_get` | ❌ | | 5 | 0.485249 | `keyvault_secret_list` | ❌ | --- -## Test 263 +## Test 268 **Expected Tool:** `keyvault_secret_get` **Prompt:** Get the secret from vault @@ -4933,15 +5028,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.578479 | `keyvault_secret_get` | ✅ **EXPECTED** | -| 2 | 0.492213 | `keyvault_key_get` | ❌ | +| 1 | 0.578261 | `keyvault_secret_get` | ✅ **EXPECTED** | +| 2 | 0.493543 | `keyvault_key_get` | ❌ | | 3 | 0.488705 | `keyvault_secret_create` | ❌ | | 4 | 0.443676 | `keyvault_secret_list` | ❌ | -| 5 | 0.424164 | `keyvault_admin_settings_get` | ❌ | +| 5 | 0.424167 | `keyvault_admin_settings_get` | ❌ | --- -## Test 264 +## Test 269 **Expected Tool:** `keyvault_secret_get` **Prompt:** Display the secret details for in vault @@ -4950,15 +5045,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.649267 | `keyvault_secret_get` | ✅ **EXPECTED** | -| 2 | 0.546992 | `keyvault_key_get` | ❌ | -| 3 | 0.497258 | `storage_account_get` | ❌ | +| 1 | 0.649423 | `keyvault_secret_get` | ✅ **EXPECTED** | +| 2 | 0.548102 | `keyvault_key_get` | ❌ | +| 3 | 0.497402 | `storage_account_get` | ❌ | | 4 | 0.492583 | `keyvault_certificate_get` | ❌ | -| 5 | 0.491596 | `keyvault_secret_list` | ❌ | +| 5 | 0.491597 | `keyvault_secret_list` | ❌ | --- -## Test 265 +## Test 270 **Expected Tool:** `keyvault_secret_get` **Prompt:** Retrieve secret metadata for in vault @@ -4967,15 +5062,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.577477 | `keyvault_secret_get` | ✅ **EXPECTED** | -| 2 | 0.475443 | `keyvault_key_get` | ❌ | +| 1 | 0.577338 | `keyvault_secret_get` | ✅ **EXPECTED** | +| 2 | 0.475492 | `keyvault_key_get` | ❌ | | 3 | 0.466890 | `keyvault_secret_create` | ❌ | | 4 | 0.447602 | `keyvault_secret_list` | ❌ | -| 5 | 0.439381 | `storage_account_get` | ❌ | +| 5 | 0.439583 | `storage_account_get` | ❌ | --- -## Test 266 +## Test 271 **Expected Tool:** `keyvault_secret_list` **Prompt:** List all secrets in the key vault @@ -4985,14 +5080,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.701227 | `keyvault_secret_list` | ✅ **EXPECTED** | -| 2 | 0.563760 | `keyvault_key_list` | ❌ | +| 2 | 0.563736 | `keyvault_key_list` | ❌ | | 3 | 0.538337 | `keyvault_certificate_list` | ❌ | -| 4 | 0.499642 | `keyvault_secret_get` | ❌ | +| 4 | 0.499888 | `keyvault_secret_get` | ❌ | | 5 | 0.455500 | `cosmos_account_list` | ❌ | --- -## Test 267 +## Test 272 **Expected Tool:** `keyvault_secret_list` **Prompt:** Show me the secrets in the key vault @@ -5002,14 +5097,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.555681 | `keyvault_secret_list` | ✅ **EXPECTED** | -| 2 | 0.543861 | `keyvault_secret_get` | ❌ | -| 3 | 0.497525 | `keyvault_key_get` | ❌ | -| 4 | 0.464705 | `keyvault_key_list` | ❌ | -| 5 | 0.453107 | `keyvault_admin_settings_get` | ❌ | +| 2 | 0.544015 | `keyvault_secret_get` | ❌ | +| 3 | 0.498713 | `keyvault_key_get` | ❌ | +| 4 | 0.464661 | `keyvault_key_list` | ❌ | +| 5 | 0.453130 | `keyvault_admin_settings_get` | ❌ | --- -## Test 268 +## Test 273 **Expected Tool:** `keyvault_secret_list` **Prompt:** What secrets are in the key vault ? @@ -5019,14 +5114,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.572540 | `keyvault_secret_list` | ✅ **EXPECTED** | -| 2 | 0.529258 | `keyvault_secret_get` | ❌ | -| 3 | 0.493797 | `keyvault_key_list` | ❌ | -| 4 | 0.487611 | `keyvault_admin_settings_get` | ❌ | -| 5 | 0.475273 | `keyvault_key_get` | ❌ | +| 2 | 0.529389 | `keyvault_secret_get` | ❌ | +| 3 | 0.493761 | `keyvault_key_list` | ❌ | +| 4 | 0.487620 | `keyvault_admin_settings_get` | ❌ | +| 5 | 0.476109 | `keyvault_key_get` | ❌ | --- -## Test 269 +## Test 274 **Expected Tool:** `keyvault_secret_list` **Prompt:** List secrets names in vault @@ -5036,14 +5131,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.624290 | `keyvault_secret_list` | ✅ **EXPECTED** | -| 2 | 0.559700 | `keyvault_key_list` | ❌ | +| 2 | 0.559681 | `keyvault_key_list` | ❌ | | 3 | 0.517516 | `keyvault_certificate_list` | ❌ | -| 4 | 0.479547 | `keyvault_secret_get` | ❌ | -| 5 | 0.454288 | `storage_blob_container_get` | ❌ | +| 4 | 0.479771 | `keyvault_secret_get` | ❌ | +| 5 | 0.453295 | `storage_blob_container_get` | ❌ | --- -## Test 270 +## Test 275 **Expected Tool:** `keyvault_secret_list` **Prompt:** Enumerate secrets in key vault @@ -5053,14 +5148,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.742358 | `keyvault_secret_list` | ✅ **EXPECTED** | -| 2 | 0.601234 | `keyvault_key_list` | ❌ | +| 2 | 0.601183 | `keyvault_key_list` | ❌ | | 3 | 0.567827 | `keyvault_certificate_list` | ❌ | -| 4 | 0.496127 | `keyvault_secret_get` | ❌ | -| 5 | 0.437534 | `keyvault_admin_settings_get` | ❌ | +| 4 | 0.496363 | `keyvault_secret_get` | ❌ | +| 5 | 0.437560 | `keyvault_admin_settings_get` | ❌ | --- -## Test 271 +## Test 276 **Expected Tool:** `keyvault_secret_list` **Prompt:** Show secrets names in the key vault @@ -5070,14 +5165,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.567110 | `keyvault_secret_list` | ✅ **EXPECTED** | -| 2 | 0.522398 | `keyvault_secret_get` | ❌ | -| 3 | 0.476354 | `keyvault_key_list` | ❌ | -| 4 | 0.462676 | `keyvault_secret_create` | ❌ | -| 5 | 0.461326 | `keyvault_key_get` | ❌ | +| 2 | 0.522600 | `keyvault_secret_get` | ❌ | +| 3 | 0.476309 | `keyvault_key_list` | ❌ | +| 4 | 0.462711 | `keyvault_key_get` | ❌ | +| 5 | 0.462677 | `keyvault_secret_create` | ❌ | --- -## Test 272 +## Test 277 **Expected Tool:** `aks_cluster_get` **Prompt:** Get the configuration of AKS cluster @@ -5090,11 +5185,11 @@ | 2 | 0.544302 | `aks_nodepool_get` | ❌ | | 3 | 0.517279 | `kusto_cluster_get` | ❌ | | 4 | 0.481416 | `mysql_server_config_get` | ❌ | -| 5 | 0.430975 | `postgres_server_config_get` | ❌ | +| 5 | 0.430976 | `postgres_server_config_get` | ❌ | --- -## Test 273 +## Test 278 **Expected Tool:** `aks_cluster_get` **Prompt:** Show me the details of AKS cluster in resource group @@ -5103,15 +5198,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.621536 | `aks_cluster_get` | ✅ **EXPECTED** | -| 2 | 0.575434 | `aks_nodepool_get` | ❌ | -| 3 | 0.567416 | `kusto_cluster_get` | ❌ | -| 4 | 0.461358 | `sql_db_show` | ❌ | -| 5 | 0.445310 | `monitor_webtests_get` | ❌ | +| 1 | 0.621759 | `aks_cluster_get` | ✅ **EXPECTED** | +| 2 | 0.575626 | `aks_nodepool_get` | ❌ | +| 3 | 0.567870 | `kusto_cluster_get` | ❌ | +| 4 | 0.461466 | `sql_db_show` | ❌ | +| 5 | 0.444327 | `monitor_webtests_get` | ❌ | --- -## Test 274 +## Test 279 **Expected Tool:** `aks_cluster_get` **Prompt:** Show me the network configuration for AKS cluster @@ -5128,7 +5223,7 @@ --- -## Test 275 +## Test 280 **Expected Tool:** `aks_cluster_get` **Prompt:** What are the details of my AKS cluster in ? @@ -5140,12 +5235,12 @@ | 1 | 0.588634 | `aks_cluster_get` | ✅ **EXPECTED** | | 2 | 0.550555 | `aks_nodepool_get` | ❌ | | 3 | 0.527511 | `kusto_cluster_get` | ❌ | -| 4 | 0.445813 | `storage_account_get` | ❌ | +| 4 | 0.445722 | `storage_account_get` | ❌ | | 5 | 0.435597 | `foundry_resource_get` | ❌ | --- -## Test 276 +## Test 281 **Expected Tool:** `aks_cluster_get` **Prompt:** List all AKS clusters in my subscription @@ -5157,12 +5252,12 @@ | 1 | 0.756471 | `aks_cluster_get` | ✅ **EXPECTED** | | 2 | 0.749416 | `kusto_cluster_list` | ❌ | | 3 | 0.590166 | `aks_nodepool_get` | ❌ | -| 4 | 0.568440 | `kusto_database_list` | ❌ | -| 5 | 0.562043 | `search_service_list` | ❌ | +| 4 | 0.568635 | `kusto_database_list` | ❌ | +| 5 | 0.560522 | `search_service_list` | ❌ | --- -## Test 277 +## Test 282 **Expected Tool:** `aks_cluster_get` **Prompt:** Show me my Azure Kubernetes Service clusters @@ -5175,11 +5270,11 @@ | 2 | 0.586661 | `kusto_cluster_list` | ❌ | | 3 | 0.507757 | `aks_nodepool_get` | ❌ | | 4 | 0.489724 | `kusto_cluster_get` | ❌ | -| 5 | 0.462991 | `kusto_database_list` | ❌ | +| 5 | 0.462950 | `kusto_database_list` | ❌ | --- -## Test 278 +## Test 283 **Expected Tool:** `aks_cluster_get` **Prompt:** What AKS clusters do I have? @@ -5188,15 +5283,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.628429 | `aks_cluster_get` | ✅ **EXPECTED** | -| 2 | 0.563189 | `aks_nodepool_get` | ❌ | -| 3 | 0.526756 | `kusto_cluster_list` | ❌ | -| 4 | 0.426157 | `kusto_cluster_get` | ❌ | -| 5 | 0.409163 | `kusto_database_list` | ❌ | +| 1 | 0.628470 | `aks_cluster_get` | ✅ **EXPECTED** | +| 2 | 0.563211 | `aks_nodepool_get` | ❌ | +| 3 | 0.526840 | `kusto_cluster_list` | ❌ | +| 4 | 0.426233 | `kusto_cluster_get` | ❌ | +| 5 | 0.409379 | `kusto_database_list` | ❌ | --- -## Test 279 +## Test 284 **Expected Tool:** `aks_nodepool_get` **Prompt:** Get details for nodepool in AKS cluster in @@ -5205,15 +5300,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.729136 | `aks_nodepool_get` | ✅ **EXPECTED** | -| 2 | 0.517116 | `kusto_cluster_get` | ❌ | -| 3 | 0.510014 | `aks_cluster_get` | ❌ | -| 4 | 0.468597 | `virtualdesktop_hostpool_list` | ❌ | -| 5 | 0.463489 | `sql_elastic-pool_list` | ❌ | +| 1 | 0.728569 | `aks_nodepool_get` | ✅ **EXPECTED** | +| 2 | 0.516573 | `kusto_cluster_get` | ❌ | +| 3 | 0.509314 | `aks_cluster_get` | ❌ | +| 4 | 0.468516 | `virtualdesktop_hostpool_list` | ❌ | +| 5 | 0.463185 | `sql_elastic-pool_list` | ❌ | --- -## Test 280 +## Test 285 **Expected Tool:** `aks_nodepool_get` **Prompt:** Show me the configuration for nodepool in AKS cluster in resource group @@ -5225,12 +5320,12 @@ | 1 | 0.654106 | `aks_nodepool_get` | ✅ **EXPECTED** | | 2 | 0.458596 | `sql_elastic-pool_list` | ❌ | | 3 | 0.446035 | `aks_cluster_get` | ❌ | -| 4 | 0.440182 | `virtualdesktop_hostpool_list` | ❌ | +| 4 | 0.440273 | `virtualdesktop_hostpool_list` | ❌ | | 5 | 0.413758 | `kusto_cluster_get` | ❌ | --- -## Test 281 +## Test 286 **Expected Tool:** `aks_nodepool_get` **Prompt:** What is the setup of nodepool for AKS cluster in ? @@ -5241,13 +5336,13 @@ |------|-------|------|--------| | 1 | 0.592806 | `aks_nodepool_get` | ✅ **EXPECTED** | | 2 | 0.402556 | `aks_cluster_get` | ❌ | -| 3 | 0.385173 | `virtualdesktop_hostpool_list` | ❌ | +| 3 | 0.385218 | `virtualdesktop_hostpool_list` | ❌ | | 4 | 0.383045 | `sql_elastic-pool_list` | ❌ | | 5 | 0.355090 | `kusto_cluster_get` | ❌ | --- -## Test 282 +## Test 287 **Expected Tool:** `aks_nodepool_get` **Prompt:** List nodepools for AKS cluster in @@ -5256,15 +5351,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.692264 | `aks_nodepool_get` | ✅ **EXPECTED** | -| 2 | 0.519034 | `aks_cluster_get` | ❌ | -| 3 | 0.506649 | `virtualdesktop_hostpool_list` | ❌ | -| 4 | 0.500705 | `kusto_cluster_list` | ❌ | -| 5 | 0.487723 | `sql_elastic-pool_list` | ❌ | +| 1 | 0.692231 | `aks_nodepool_get` | ✅ **EXPECTED** | +| 2 | 0.519037 | `aks_cluster_get` | ❌ | +| 3 | 0.506720 | `virtualdesktop_hostpool_list` | ❌ | +| 4 | 0.500749 | `kusto_cluster_list` | ❌ | +| 5 | 0.487707 | `sql_elastic-pool_list` | ❌ | --- -## Test 283 +## Test 288 **Expected Tool:** `aks_nodepool_get` **Prompt:** Show me the nodepool list for AKS cluster in @@ -5273,15 +5368,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.732131 | `aks_nodepool_get` | ✅ **EXPECTED** | +| 1 | 0.732132 | `aks_nodepool_get` | ✅ **EXPECTED** | | 2 | 0.561829 | `aks_cluster_get` | ❌ | | 3 | 0.510269 | `sql_elastic-pool_list` | ❌ | -| 4 | 0.509732 | `virtualdesktop_hostpool_list` | ❌ | +| 4 | 0.509840 | `virtualdesktop_hostpool_list` | ❌ | | 5 | 0.486700 | `kusto_cluster_list` | ❌ | --- -## Test 284 +## Test 289 **Expected Tool:** `aks_nodepool_get` **Prompt:** What nodepools do I have for AKS cluster in @@ -5290,15 +5385,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.629359 | `aks_nodepool_get` | ✅ **EXPECTED** | +| 1 | 0.629358 | `aks_nodepool_get` | ✅ **EXPECTED** | | 2 | 0.456911 | `aks_cluster_get` | ❌ | -| 3 | 0.443902 | `virtualdesktop_hostpool_list` | ❌ | +| 3 | 0.443940 | `virtualdesktop_hostpool_list` | ❌ | | 4 | 0.433006 | `kusto_cluster_list` | ❌ | | 5 | 0.425448 | `sql_elastic-pool_list` | ❌ | --- -## Test 285 +## Test 290 **Expected Tool:** `loadtesting_test_create` **Prompt:** Create a basic URL test using the following endpoint URL that runs for 30 minutes with 45 virtual users. The test name is with the test id and the load testing resource is in the resource group in my subscription @@ -5307,15 +5402,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.577811 | `loadtesting_test_create` | ✅ **EXPECTED** | -| 2 | 0.519418 | `loadtesting_testresource_create` | ❌ | -| 3 | 0.512099 | `loadtesting_testrun_create` | ❌ | -| 4 | 0.472753 | `monitor_webtests_create` | ❌ | -| 5 | 0.460717 | `loadtesting_testresource_list` | ❌ | +| 1 | 0.579172 | `loadtesting_test_create` | ✅ **EXPECTED** | +| 2 | 0.520449 | `loadtesting_testresource_create` | ❌ | +| 3 | 0.513419 | `loadtesting_testrun_create` | ❌ | +| 4 | 0.473951 | `monitor_webtests_create` | ❌ | +| 5 | 0.461959 | `loadtesting_testresource_list` | ❌ | --- -## Test 286 +## Test 291 **Expected Tool:** `loadtesting_test_get` **Prompt:** Get the load test with id in the load test resource in resource group @@ -5324,15 +5419,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.626213 | `loadtesting_testresource_list` | ❌ | -| 2 | 0.620147 | `loadtesting_test_get` | ✅ **EXPECTED** | -| 3 | 0.594630 | `loadtesting_testresource_create` | ❌ | -| 4 | 0.591112 | `monitor_webtests_get` | ❌ | -| 5 | 0.535891 | `monitor_webtests_list` | ❌ | +| 1 | 0.626226 | `loadtesting_testresource_list` | ❌ | +| 2 | 0.619944 | `loadtesting_test_get` | ✅ **EXPECTED** | +| 3 | 0.594666 | `loadtesting_testresource_create` | ❌ | +| 4 | 0.590698 | `monitor_webtests_get` | ❌ | +| 5 | 0.536024 | `monitor_webtests_list` | ❌ | --- -## Test 287 +## Test 292 **Expected Tool:** `loadtesting_testresource_create` **Prompt:** Create a load test resource in the resource group in my subscription @@ -5341,15 +5436,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.645750 | `loadtesting_testresource_create` | ✅ **EXPECTED** | -| 2 | 0.618984 | `loadtesting_testresource_list` | ❌ | -| 3 | 0.541950 | `loadtesting_test_create` | ❌ | -| 4 | 0.539866 | `loadtesting_testrun_create` | ❌ | -| 5 | 0.526644 | `monitor_webtests_list` | ❌ | +| 1 | 0.645537 | `loadtesting_testresource_create` | ✅ **EXPECTED** | +| 2 | 0.618773 | `loadtesting_testresource_list` | ❌ | +| 3 | 0.541696 | `loadtesting_test_create` | ❌ | +| 4 | 0.539771 | `loadtesting_testrun_create` | ❌ | +| 5 | 0.526684 | `monitor_webtests_list` | ❌ | --- -## Test 288 +## Test 293 **Expected Tool:** `loadtesting_testresource_list` **Prompt:** List all load testing resources in the resource group in my subscription @@ -5359,14 +5454,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.794326 | `loadtesting_testresource_list` | ✅ **EXPECTED** | -| 2 | 0.653137 | `monitor_webtests_list` | ❌ | +| 2 | 0.653165 | `monitor_webtests_list` | ❌ | | 3 | 0.577408 | `group_list` | ❌ | | 4 | 0.575172 | `loadtesting_testresource_create` | ❌ | | 5 | 0.565565 | `datadog_monitoredresources_list` | ❌ | --- -## Test 289 +## Test 294 **Expected Tool:** `loadtesting_testrun_create` **Prompt:** Create a test run using the id for test in the load testing resource in resource group . Use the name of test run and description as @@ -5377,13 +5472,13 @@ |------|-------|------|--------| | 1 | 0.688976 | `loadtesting_testrun_create` | ✅ **EXPECTED** | | 2 | 0.594879 | `loadtesting_testrun_update` | ❌ | -| 3 | 0.558636 | `loadtesting_test_create` | ❌ | +| 3 | 0.558566 | `loadtesting_test_create` | ❌ | | 4 | 0.547102 | `loadtesting_testresource_create` | ❌ | | 5 | 0.496224 | `loadtesting_testresource_list` | ❌ | --- -## Test 290 +## Test 295 **Expected Tool:** `loadtesting_testrun_get` **Prompt:** Get the load test run with id in the load test resource in resource group @@ -5395,12 +5490,12 @@ | 1 | 0.619146 | `loadtesting_testresource_list` | ❌ | | 2 | 0.601927 | `loadtesting_test_get` | ❌ | | 3 | 0.597430 | `loadtesting_testresource_create` | ❌ | -| 4 | 0.577924 | `monitor_webtests_get` | ❌ | +| 4 | 0.577532 | `monitor_webtests_get` | ❌ | | 5 | 0.565996 | `loadtesting_testrun_list` | ❌ | --- -## Test 291 +## Test 296 **Expected Tool:** `loadtesting_testrun_list` **Prompt:** Get all the load test runs for the test with id in the load test resource in resource group @@ -5409,15 +5504,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.669180 | `loadtesting_testresource_list` | ❌ | -| 2 | 0.640360 | `loadtesting_testrun_list` | ✅ **EXPECTED** | -| 3 | 0.601075 | `loadtesting_test_get` | ❌ | -| 4 | 0.577460 | `loadtesting_testresource_create` | ❌ | -| 5 | 0.569963 | `monitor_webtests_get` | ❌ | +| 1 | 0.669307 | `loadtesting_testresource_list` | ❌ | +| 2 | 0.640644 | `loadtesting_testrun_list` | ✅ **EXPECTED** | +| 3 | 0.600977 | `loadtesting_test_get` | ❌ | +| 4 | 0.577403 | `loadtesting_testresource_create` | ❌ | +| 5 | 0.569287 | `monitor_webtests_list` | ❌ | --- -## Test 292 +## Test 297 **Expected Tool:** `loadtesting_testrun_update` **Prompt:** Update a test run display name as for the id for test in the load testing resource in resource group . @@ -5428,13 +5523,13 @@ |------|-------|------|--------| | 1 | 0.706747 | `loadtesting_testrun_update` | ✅ **EXPECTED** | | 2 | 0.514428 | `loadtesting_testrun_create` | ❌ | -| 3 | 0.486980 | `monitor_webtests_update` | ❌ | +| 3 | 0.486977 | `monitor_webtests_update` | ❌ | | 4 | 0.470337 | `loadtesting_testresource_list` | ❌ | -| 5 | 0.468205 | `monitor_webtests_get` | ❌ | +| 5 | 0.468374 | `monitor_webtests_get` | ❌ | --- -## Test 293 +## Test 298 **Expected Tool:** `grafana_list` **Prompt:** List all Azure Managed Grafana in one subscription @@ -5445,13 +5540,13 @@ |------|-------|------|--------| | 1 | 0.599427 | `kusto_cluster_list` | ❌ | | 2 | 0.578892 | `grafana_list` | ✅ **EXPECTED** | -| 3 | 0.551851 | `search_service_list` | ❌ | -| 4 | 0.550372 | `subscription_list` | ❌ | +| 3 | 0.550372 | `subscription_list` | ❌ | +| 4 | 0.549957 | `search_service_list` | ❌ | | 5 | 0.531259 | `redis_list` | ❌ | --- -## Test 294 +## Test 299 **Expected Tool:** `managedlustre_fs_create` **Prompt:** Create an Azure Managed Lustre filesystem with name , size , SKU , and subnet for availability zone in location . Maintenance should occur on at @@ -5460,15 +5555,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.728113 | `managedlustre_fs_create` | ✅ **EXPECTED** | -| 2 | 0.615874 | `managedlustre_fs_list` | ❌ | -| 3 | 0.605775 | `managedlustre_fs_sku_get` | ❌ | -| 4 | 0.598255 | `managedlustre_fs_update` | ❌ | +| 1 | 0.726553 | `managedlustre_fs_create` | ✅ **EXPECTED** | +| 2 | 0.616164 | `managedlustre_fs_list` | ❌ | +| 3 | 0.605701 | `managedlustre_fs_sku_get` | ❌ | +| 4 | 0.598215 | `managedlustre_fs_update` | ❌ | | 5 | 0.557720 | `managedlustre_fs_subnetsize_validate` | ❌ | --- -## Test 295 +## Test 300 **Expected Tool:** `managedlustre_fs_list` **Prompt:** List the Azure Managed Lustre filesystems in my subscription @@ -5477,15 +5572,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.750302 | `managedlustre_fs_list` | ✅ **EXPECTED** | -| 2 | 0.631770 | `managedlustre_fs_sku_get` | ❌ | -| 3 | 0.582660 | `managedlustre_fs_create` | ❌ | +| 1 | 0.750675 | `managedlustre_fs_list` | ✅ **EXPECTED** | +| 2 | 0.631730 | `managedlustre_fs_sku_get` | ❌ | +| 3 | 0.579855 | `managedlustre_fs_create` | ❌ | | 4 | 0.562377 | `kusto_cluster_list` | ❌ | -| 5 | 0.513156 | `search_service_list` | ❌ | +| 5 | 0.512086 | `search_service_list` | ❌ | --- -## Test 296 +## Test 301 **Expected Tool:** `managedlustre_fs_list` **Prompt:** List the Azure Managed Lustre filesystems in my resource group @@ -5494,15 +5589,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.743639 | `managedlustre_fs_list` | ✅ **EXPECTED** | -| 2 | 0.613217 | `managedlustre_fs_sku_get` | ❌ | -| 3 | 0.565856 | `managedlustre_fs_create` | ❌ | +| 1 | 0.743903 | `managedlustre_fs_list` | ✅ **EXPECTED** | +| 2 | 0.613164 | `managedlustre_fs_sku_get` | ❌ | +| 3 | 0.563081 | `managedlustre_fs_create` | ❌ | | 4 | 0.519986 | `datadog_monitoredresources_list` | ❌ | | 5 | 0.515433 | `loadtesting_testresource_list` | ❌ | --- -## Test 297 +## Test 302 **Expected Tool:** `managedlustre_fs_sku_get` **Prompt:** List the Azure Managed Lustre SKUs available in location @@ -5511,15 +5606,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.827381 | `managedlustre_fs_sku_get` | ✅ **EXPECTED** | -| 2 | 0.613245 | `managedlustre_fs_list` | ❌ | -| 3 | 0.513242 | `managedlustre_fs_create` | ❌ | +| 1 | 0.827360 | `managedlustre_fs_sku_get` | ✅ **EXPECTED** | +| 2 | 0.613674 | `managedlustre_fs_list` | ❌ | +| 3 | 0.511625 | `managedlustre_fs_create` | ❌ | | 4 | 0.496242 | `managedlustre_fs_subnetsize_validate` | ❌ | | 5 | 0.470241 | `kusto_cluster_list` | ❌ | --- -## Test 298 +## Test 303 **Expected Tool:** `managedlustre_fs_subnetsize_ask` **Prompt:** Tell me how many IP addresses I need for an Azure Managed Lustre filesystem of size using the SKU @@ -5528,15 +5623,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.739679 | `managedlustre_fs_subnetsize_ask` | ✅ **EXPECTED** | -| 2 | 0.651615 | `managedlustre_fs_subnetsize_validate` | ❌ | -| 3 | 0.594695 | `managedlustre_fs_sku_get` | ❌ | -| 4 | 0.559034 | `managedlustre_fs_list` | ❌ | -| 5 | 0.533796 | `managedlustre_fs_create` | ❌ | +| 1 | 0.739766 | `managedlustre_fs_subnetsize_ask` | ✅ **EXPECTED** | +| 2 | 0.651598 | `managedlustre_fs_subnetsize_validate` | ❌ | +| 3 | 0.594536 | `managedlustre_fs_sku_get` | ❌ | +| 4 | 0.559498 | `managedlustre_fs_list` | ❌ | +| 5 | 0.533351 | `managedlustre_fs_create` | ❌ | --- -## Test 299 +## Test 304 **Expected Tool:** `managedlustre_fs_subnetsize_validate` **Prompt:** Validate if the network can host Azure Managed Lustre filesystem of size using the SKU @@ -5545,15 +5640,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.879541 | `managedlustre_fs_subnetsize_validate` | ✅ **EXPECTED** | -| 2 | 0.622603 | `managedlustre_fs_subnetsize_ask` | ❌ | -| 3 | 0.542788 | `managedlustre_fs_sku_get` | ❌ | -| 4 | 0.515947 | `managedlustre_fs_create` | ❌ | -| 5 | 0.480673 | `managedlustre_fs_list` | ❌ | +| 1 | 0.879240 | `managedlustre_fs_subnetsize_validate` | ✅ **EXPECTED** | +| 2 | 0.622368 | `managedlustre_fs_subnetsize_ask` | ❌ | +| 3 | 0.542555 | `managedlustre_fs_sku_get` | ❌ | +| 4 | 0.516032 | `managedlustre_fs_create` | ❌ | +| 5 | 0.480796 | `managedlustre_fs_list` | ❌ | --- -## Test 300 +## Test 305 **Expected Tool:** `managedlustre_fs_update` **Prompt:** Update the maintenance window of the Azure Managed Lustre filesystem to at @@ -5562,15 +5657,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.739000 | `managedlustre_fs_update` | ✅ **EXPECTED** | -| 2 | 0.527525 | `managedlustre_fs_create` | ❌ | -| 3 | 0.487003 | `managedlustre_fs_list` | ❌ | -| 4 | 0.385349 | `managedlustre_fs_sku_get` | ❌ | +| 1 | 0.738895 | `managedlustre_fs_update` | ✅ **EXPECTED** | +| 2 | 0.525980 | `managedlustre_fs_create` | ❌ | +| 3 | 0.487193 | `managedlustre_fs_list` | ❌ | +| 4 | 0.385318 | `managedlustre_fs_sku_get` | ❌ | | 5 | 0.344891 | `managedlustre_fs_subnetsize_validate` | ❌ | --- -## Test 301 +## Test 306 **Expected Tool:** `marketplace_product_get` **Prompt:** Get details about marketplace product @@ -5579,15 +5674,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.570028 | `marketplace_product_get` | ✅ **EXPECTED** | -| 2 | 0.499184 | `marketplace_product_list` | ❌ | -| 3 | 0.353256 | `servicebus_topic_subscription_details` | ❌ | -| 4 | 0.333160 | `servicebus_topic_details` | ❌ | -| 5 | 0.330935 | `servicebus_queue_details` | ❌ | +| 1 | 0.570164 | `marketplace_product_get` | ✅ **EXPECTED** | +| 2 | 0.499208 | `marketplace_product_list` | ❌ | +| 3 | 0.353280 | `servicebus_topic_subscription_details` | ❌ | +| 4 | 0.333304 | `servicebus_topic_details` | ❌ | +| 5 | 0.330949 | `servicebus_queue_details` | ❌ | --- -## Test 302 +## Test 307 **Expected Tool:** `marketplace_product_list` **Prompt:** Search for Microsoft products in the marketplace @@ -5596,15 +5691,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.607916 | `marketplace_product_list` | ✅ **EXPECTED** | -| 2 | 0.443178 | `marketplace_product_get` | ❌ | -| 3 | 0.343549 | `search_service_list` | ❌ | -| 4 | 0.330500 | `foundry_models_list` | ❌ | -| 5 | 0.328676 | `managedlustre_fs_sku_get` | ❌ | +| 1 | 0.607950 | `marketplace_product_list` | ✅ **EXPECTED** | +| 2 | 0.443177 | `marketplace_product_get` | ❌ | +| 3 | 0.341360 | `search_service_list` | ❌ | +| 4 | 0.330544 | `foundry_models_list` | ❌ | +| 5 | 0.328671 | `managedlustre_fs_sku_get` | ❌ | --- -## Test 303 +## Test 308 **Expected Tool:** `marketplace_product_list` **Prompt:** Show me marketplace products from publisher @@ -5614,14 +5709,99 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.537726 | `marketplace_product_list` | ✅ **EXPECTED** | -| 2 | 0.385198 | `marketplace_product_get` | ❌ | +| 2 | 0.385167 | `marketplace_product_get` | ❌ | | 3 | 0.308769 | `foundry_models_list` | ❌ | | 4 | 0.288006 | `redis_list` | ❌ | -| 5 | 0.260387 | `managedlustre_fs_sku_get` | ❌ | +| 5 | 0.260421 | `managedlustre_fs_sku_get` | ❌ | --- -## Test 304 +## Test 309 + +**Expected Tool:** `azureaibestpractices_get` +**Prompt:** Get best practices for building AI applications in Azure + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.675775 | `azureaibestpractices_get` | ✅ **EXPECTED** | +| 2 | 0.555579 | `get_bestpractices_get` | ❌ | +| 3 | 0.501210 | `azureterraformbestpractices_get` | ❌ | +| 4 | 0.480026 | `deploy_pipeline_guidance_get` | ❌ | +| 5 | 0.477592 | `cloudarchitect_design` | ❌ | + +--- + +## Test 310 + +**Expected Tool:** `azureaibestpractices_get` +**Prompt:** Show me the best practices for Azure AI Foundry agents code generation + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.699440 | `azureaibestpractices_get` | ✅ **EXPECTED** | +| 2 | 0.603487 | `foundry_agents_get-sdk-sample` | ❌ | +| 3 | 0.534202 | `get_bestpractices_get` | ❌ | +| 4 | 0.520202 | `foundry_agents_list` | ❌ | +| 5 | 0.508727 | `azureterraformbestpractices_get` | ❌ | + +--- + +## Test 311 + +**Expected Tool:** `azureaibestpractices_get` +**Prompt:** Get guidance for building agents with Azure AI Foundry + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.635165 | `azureaibestpractices_get` | ✅ **EXPECTED** | +| 2 | 0.593029 | `foundry_agents_get-sdk-sample` | ❌ | +| 3 | 0.553580 | `foundry_agents_list` | ❌ | +| 4 | 0.534256 | `foundry_agents_create` | ❌ | +| 5 | 0.513217 | `foundry_agents_connect` | ❌ | + +--- + +## Test 312 + +**Expected Tool:** `azureaibestpractices_get` +**Prompt:** Create an AI app that helps me to manage travel queries. + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.417629 | `azureaibestpractices_get` | ✅ **EXPECTED** | +| 2 | 0.343844 | `foundry_threads_create` | ❌ | +| 3 | 0.327503 | `foundry_openai_chat-completions-create` | ❌ | +| 4 | 0.320532 | `foundry_openai_create-completion` | ❌ | +| 5 | 0.311958 | `foundry_agents_connect` | ❌ | + +--- + +## Test 313 + +**Expected Tool:** `azureaibestpractices_get` +**Prompt:** Create an AI app that helps me to manage travel queries in Azure AI Foundry + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.517931 | `azureaibestpractices_get` | ✅ **EXPECTED** | +| 2 | 0.478747 | `foundry_openai_embeddings-create` | ❌ | +| 3 | 0.469654 | `foundry_openai_chat-completions-create` | ❌ | +| 4 | 0.466216 | `foundry_openai_create-completion` | ❌ | +| 5 | 0.456719 | `foundry_resource_get` | ❌ | + +--- + +## Test 314 **Expected Tool:** `get_bestpractices_get` **Prompt:** Get the latest Azure code generation best practices @@ -5630,15 +5810,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.646857 | `get_bestpractices_get` | ✅ **EXPECTED** | -| 2 | 0.635437 | `azureterraformbestpractices_get` | ❌ | -| 3 | 0.586894 | `deploy_iac_rules_get` | ❌ | -| 4 | 0.531727 | `deploy_pipeline_guidance_get` | ❌ | -| 5 | 0.490235 | `deploy_plan_get` | ❌ | +| 1 | 0.656395 | `azureaibestpractices_get` | ❌ | +| 2 | 0.646844 | `get_bestpractices_get` | ✅ **EXPECTED** | +| 3 | 0.635406 | `azureterraformbestpractices_get` | ❌ | +| 4 | 0.586907 | `deploy_iac_rules_get` | ❌ | +| 5 | 0.531457 | `deploy_pipeline_guidance_get` | ❌ | --- -## Test 305 +## Test 315 **Expected Tool:** `get_bestpractices_get` **Prompt:** Get the latest Azure deployment best practices @@ -5648,14 +5828,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.600903 | `get_bestpractices_get` | ✅ **EXPECTED** | -| 2 | 0.548655 | `azureterraformbestpractices_get` | ❌ | +| 2 | 0.548542 | `azureterraformbestpractices_get` | ❌ | | 3 | 0.541091 | `deploy_iac_rules_get` | ❌ | | 4 | 0.516852 | `deploy_plan_get` | ❌ | -| 5 | 0.516443 | `deploy_pipeline_guidance_get` | ❌ | +| 5 | 0.516203 | `deploy_pipeline_guidance_get` | ❌ | --- -## Test 306 +## Test 316 **Expected Tool:** `get_bestpractices_get` **Prompt:** Get the latest Azure best practices @@ -5665,14 +5845,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.625259 | `get_bestpractices_get` | ✅ **EXPECTED** | -| 2 | 0.594455 | `azureterraformbestpractices_get` | ❌ | -| 3 | 0.518643 | `deploy_iac_rules_get` | ❌ | -| 4 | 0.465572 | `deploy_pipeline_guidance_get` | ❌ | -| 5 | 0.450629 | `cloudarchitect_design` | ❌ | +| 2 | 0.594323 | `azureterraformbestpractices_get` | ❌ | +| 3 | 0.539715 | `azureaibestpractices_get` | ❌ | +| 4 | 0.518643 | `deploy_iac_rules_get` | ❌ | +| 5 | 0.465370 | `deploy_pipeline_guidance_get` | ❌ | --- -## Test 307 +## Test 317 **Expected Tool:** `get_bestpractices_get` **Prompt:** Get the latest Azure Functions code generation best practices @@ -5682,14 +5862,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.624273 | `get_bestpractices_get` | ✅ **EXPECTED** | -| 2 | 0.570547 | `azureterraformbestpractices_get` | ❌ | -| 3 | 0.522998 | `deploy_iac_rules_get` | ❌ | -| 4 | 0.493998 | `deploy_pipeline_guidance_get` | ❌ | -| 5 | 0.467377 | `extension_cli_install` | ❌ | +| 2 | 0.587474 | `azureaibestpractices_get` | ❌ | +| 3 | 0.570488 | `azureterraformbestpractices_get` | ❌ | +| 4 | 0.522998 | `deploy_iac_rules_get` | ❌ | +| 5 | 0.493745 | `deploy_pipeline_guidance_get` | ❌ | --- -## Test 308 +## Test 318 **Expected Tool:** `get_bestpractices_get` **Prompt:** Get the latest Azure Functions deployment best practices @@ -5699,14 +5879,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.581850 | `get_bestpractices_get` | ✅ **EXPECTED** | -| 2 | 0.497350 | `deploy_pipeline_guidance_get` | ❌ | +| 2 | 0.497056 | `deploy_pipeline_guidance_get` | ❌ | | 3 | 0.495659 | `deploy_iac_rules_get` | ❌ | -| 4 | 0.487012 | `azureterraformbestpractices_get` | ❌ | +| 4 | 0.486886 | `azureterraformbestpractices_get` | ❌ | | 5 | 0.474511 | `deploy_plan_get` | ❌ | --- -## Test 309 +## Test 319 **Expected Tool:** `get_bestpractices_get` **Prompt:** Get the latest Azure Functions best practices @@ -5716,14 +5896,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.610986 | `get_bestpractices_get` | ✅ **EXPECTED** | -| 2 | 0.532921 | `azureterraformbestpractices_get` | ❌ | -| 3 | 0.487322 | `deploy_iac_rules_get` | ❌ | -| 4 | 0.458060 | `deploy_pipeline_guidance_get` | ❌ | -| 5 | 0.448034 | `extension_cli_install` | ❌ | +| 2 | 0.532790 | `azureterraformbestpractices_get` | ❌ | +| 3 | 0.518386 | `azureaibestpractices_get` | ❌ | +| 4 | 0.487322 | `deploy_iac_rules_get` | ❌ | +| 5 | 0.457812 | `deploy_pipeline_guidance_get` | ❌ | --- -## Test 310 +## Test 320 **Expected Tool:** `get_bestpractices_get` **Prompt:** Get the latest Azure Static Web Apps best practices @@ -5733,14 +5913,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.557862 | `get_bestpractices_get` | ✅ **EXPECTED** | -| 2 | 0.513385 | `azureterraformbestpractices_get` | ❌ | -| 3 | 0.505123 | `deploy_iac_rules_get` | ❌ | -| 4 | 0.483705 | `deploy_pipeline_guidance_get` | ❌ | -| 5 | 0.421581 | `cloudarchitect_design` | ❌ | +| 2 | 0.513262 | `azureterraformbestpractices_get` | ❌ | +| 3 | 0.510399 | `azureaibestpractices_get` | ❌ | +| 4 | 0.505123 | `deploy_iac_rules_get` | ❌ | +| 5 | 0.483482 | `deploy_pipeline_guidance_get` | ❌ | --- -## Test 311 +## Test 321 **Expected Tool:** `get_bestpractices_get` **Prompt:** What are azure function best practices? @@ -5750,14 +5930,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.582541 | `get_bestpractices_get` | ✅ **EXPECTED** | -| 2 | 0.500479 | `azureterraformbestpractices_get` | ❌ | -| 3 | 0.472112 | `deploy_iac_rules_get` | ❌ | -| 4 | 0.433134 | `deploy_pipeline_guidance_get` | ❌ | -| 5 | 0.432087 | `cloudarchitect_design` | ❌ | +| 2 | 0.500368 | `azureterraformbestpractices_get` | ❌ | +| 3 | 0.475018 | `azureaibestpractices_get` | ❌ | +| 4 | 0.472112 | `deploy_iac_rules_get` | ❌ | +| 5 | 0.432959 | `deploy_pipeline_guidance_get` | ❌ | --- -## Test 312 +## Test 322 **Expected Tool:** `get_bestpractices_get` **Prompt:** configure azure mcp in coding agent for my repo @@ -5767,14 +5947,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.488855 | `deploy_plan_get` | ❌ | -| 2 | 0.460956 | `deploy_pipeline_guidance_get` | ❌ | +| 2 | 0.460745 | `deploy_pipeline_guidance_get` | ❌ | | 3 | 0.390270 | `deploy_iac_rules_get` | ❌ | -| 4 | 0.370382 | `azureterraformbestpractices_get` | ❌ | -| 5 | 0.369169 | `extension_cli_install` | ❌ | +| 4 | 0.370753 | `azureaibestpractices_get` | ❌ | +| 5 | 0.370298 | `azureterraformbestpractices_get` | ❌ | --- -## Test 313 +## Test 323 **Expected Tool:** `monitor_activitylog_list` **Prompt:** List the activity logs of the last month for @@ -5783,15 +5963,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.537916 | `monitor_activitylog_list` | ✅ **EXPECTED** | +| 1 | 0.537893 | `monitor_activitylog_list` | ✅ **EXPECTED** | | 2 | 0.506212 | `monitor_resource_log_query` | ❌ | -| 3 | 0.371727 | `monitor_workspace_log_query` | ❌ | +| 3 | 0.371728 | `monitor_workspace_log_query` | ❌ | | 4 | 0.363798 | `resourcehealth_health-events_list` | ❌ | | 5 | 0.344629 | `datadog_monitoredresources_list` | ❌ | --- -## Test 314 +## Test 324 **Expected Tool:** `monitor_healthmodels_entity_get` **Prompt:** Show me the health status of entity using the health model @@ -5801,14 +5981,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.660947 | `monitor_healthmodels_entity_get` | ✅ **EXPECTED** | -| 2 | 0.609276 | `resourcehealth_availability-status_get` | ❌ | -| 3 | 0.351518 | `resourcehealth_availability-status_list` | ❌ | +| 2 | 0.608665 | `resourcehealth_availability-status_get` | ❌ | +| 3 | 0.351697 | `resourcehealth_availability-status_list` | ❌ | | 4 | 0.328321 | `resourcehealth_health-events_list` | ❌ | -| 5 | 0.288705 | `foundry_models_deployments_list` | ❌ | +| 5 | 0.288127 | `foundry_models_deployments_list` | ❌ | --- -## Test 315 +## Test 325 **Expected Tool:** `monitor_metrics_definitions` **Prompt:** Get metric definitions for from the namespace @@ -5817,15 +5997,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.592676 | `monitor_metrics_definitions` | ✅ **EXPECTED** | -| 2 | 0.424006 | `monitor_metrics_query` | ❌ | -| 3 | 0.368319 | `bicepschema_get` | ❌ | -| 4 | 0.332356 | `monitor_table_type_list` | ❌ | -| 5 | 0.324986 | `resourcehealth_availability-status_get` | ❌ | +| 1 | 0.592640 | `monitor_metrics_definitions` | ✅ **EXPECTED** | +| 2 | 0.424141 | `monitor_metrics_query` | ❌ | +| 3 | 0.368006 | `bicepschema_get` | ❌ | +| 4 | 0.332369 | `monitor_table_type_list` | ❌ | +| 5 | 0.325634 | `resourcehealth_availability-status_get` | ❌ | --- -## Test 316 +## Test 326 **Expected Tool:** `monitor_metrics_definitions` **Prompt:** Show me all available metrics and their definitions for storage account @@ -5834,15 +6014,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.607537 | `storage_account_get` | ❌ | -| 2 | 0.587640 | `monitor_metrics_definitions` | ✅ **EXPECTED** | -| 3 | 0.544781 | `storage_blob_container_get` | ❌ | +| 1 | 0.607600 | `storage_account_get` | ❌ | +| 2 | 0.587736 | `monitor_metrics_definitions` | ✅ **EXPECTED** | +| 3 | 0.544043 | `storage_blob_container_get` | ❌ | | 4 | 0.495829 | `storage_blob_get` | ❌ | -| 5 | 0.473306 | `managedlustre_fs_list` | ❌ | +| 5 | 0.473421 | `managedlustre_fs_list` | ❌ | --- -## Test 317 +## Test 327 **Expected Tool:** `monitor_metrics_definitions` **Prompt:** What metric definitions are available for the Application Insights resource @@ -5851,15 +6031,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.633132 | `monitor_metrics_definitions` | ✅ **EXPECTED** | -| 2 | 0.495439 | `monitor_metrics_query` | ❌ | +| 1 | 0.633173 | `monitor_metrics_definitions` | ✅ **EXPECTED** | +| 2 | 0.495513 | `monitor_metrics_query` | ❌ | | 3 | 0.433945 | `monitor_resource_log_query` | ❌ | | 4 | 0.392960 | `loadtesting_testresource_list` | ❌ | -| 5 | 0.388750 | `bicepschema_get` | ❌ | +| 5 | 0.388569 | `bicepschema_get` | ❌ | --- -## Test 318 +## Test 328 **Expected Tool:** `monitor_metrics_query` **Prompt:** Analyze the performance trends and response times for Application Insights resource over the last @@ -5868,7 +6048,7 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.555502 | `monitor_metrics_query` | ✅ **EXPECTED** | +| 1 | 0.555377 | `monitor_metrics_query` | ✅ **EXPECTED** | | 2 | 0.527530 | `monitor_resource_log_query` | ❌ | | 3 | 0.464743 | `applens_resource_diagnose` | ❌ | | 4 | 0.420462 | `resourcehealth_health-events_list` | ❌ | @@ -5876,7 +6056,7 @@ --- -## Test 319 +## Test 329 **Expected Tool:** `monitor_metrics_query` **Prompt:** Check the availability metrics for my Application Insights resource for the last @@ -5885,15 +6065,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.557831 | `monitor_metrics_query` | ✅ **EXPECTED** | +| 1 | 0.557830 | `monitor_metrics_query` | ✅ **EXPECTED** | | 2 | 0.476671 | `monitor_resource_log_query` | ❌ | -| 3 | 0.460351 | `resourcehealth_availability-status_list` | ❌ | -| 4 | 0.456321 | `quota_usage_check` | ❌ | -| 5 | 0.438171 | `monitor_metrics_definitions` | ❌ | +| 3 | 0.460611 | `resourcehealth_availability-status_list` | ❌ | +| 4 | 0.456360 | `quota_usage_check` | ❌ | +| 5 | 0.438233 | `monitor_metrics_definitions` | ❌ | --- -## Test 320 +## Test 330 **Expected Tool:** `monitor_metrics_query` **Prompt:** Get the metric for over the last with intervals @@ -5902,15 +6082,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.461138 | `monitor_metrics_query` | ✅ **EXPECTED** | -| 2 | 0.389998 | `monitor_metrics_definitions` | ❌ | -| 3 | 0.338392 | `monitor_resource_log_query` | ❌ | -| 4 | 0.334417 | `resourcehealth_availability-status_get` | ❌ | -| 5 | 0.306224 | `resourcehealth_availability-status_list` | ❌ | +| 1 | 0.461249 | `monitor_metrics_query` | ✅ **EXPECTED** | +| 2 | 0.390029 | `monitor_metrics_definitions` | ❌ | +| 3 | 0.338557 | `monitor_resource_log_query` | ❌ | +| 4 | 0.335118 | `resourcehealth_availability-status_get` | ❌ | +| 5 | 0.306338 | `resourcehealth_availability-status_list` | ❌ | --- -## Test 321 +## Test 331 **Expected Tool:** `monitor_metrics_query` **Prompt:** Investigate error rates and failed requests for Application Insights resource for the last @@ -5920,14 +6100,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.496878 | `monitor_resource_log_query` | ❌ | -| 2 | 0.491782 | `monitor_metrics_query` | ✅ **EXPECTED** | +| 2 | 0.492138 | `monitor_metrics_query` | ✅ **EXPECTED** | | 3 | 0.448148 | `applens_resource_diagnose` | ❌ | | 4 | 0.412184 | `resourcehealth_health-events_list` | ❌ | -| 5 | 0.397785 | `quota_usage_check` | ❌ | +| 5 | 0.397853 | `quota_usage_check` | ❌ | --- -## Test 322 +## Test 332 **Expected Tool:** `monitor_metrics_query` **Prompt:** Query the metric for for the last @@ -5936,15 +6116,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.525326 | `monitor_metrics_query` | ✅ **EXPECTED** | -| 2 | 0.406185 | `monitor_resource_log_query` | ❌ | -| 3 | 0.384524 | `monitor_metrics_definitions` | ❌ | -| 4 | 0.347723 | `monitor_workspace_log_query` | ❌ | -| 5 | 0.330713 | `resourcehealth_availability-status_get` | ❌ | +| 1 | 0.525890 | `monitor_metrics_query` | ✅ **EXPECTED** | +| 2 | 0.405838 | `monitor_resource_log_query` | ❌ | +| 3 | 0.384811 | `monitor_metrics_definitions` | ❌ | +| 4 | 0.347228 | `monitor_workspace_log_query` | ❌ | +| 5 | 0.330657 | `resourcehealth_availability-status_get` | ❌ | --- -## Test 323 +## Test 333 **Expected Tool:** `monitor_metrics_query` **Prompt:** What's the request per second rate for my Application Insights resource over the last @@ -5953,15 +6133,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.480420 | `monitor_metrics_query` | ✅ **EXPECTED** | +| 1 | 0.480140 | `monitor_metrics_query` | ✅ **EXPECTED** | | 2 | 0.444779 | `monitor_resource_log_query` | ❌ | | 3 | 0.388382 | `applens_resource_diagnose` | ❌ | -| 4 | 0.363640 | `quota_usage_check` | ❌ | +| 4 | 0.363672 | `quota_usage_check` | ❌ | | 5 | 0.350076 | `resourcehealth_health-events_list` | ❌ | --- -## Test 324 +## Test 334 **Expected Tool:** `monitor_resource_log_query` **Prompt:** Show me the logs for the past hour for the resource in the Log Analytics workspace @@ -5972,13 +6152,13 @@ |------|-------|------|--------| | 1 | 0.687852 | `monitor_resource_log_query` | ✅ **EXPECTED** | | 2 | 0.621919 | `monitor_workspace_log_query` | ❌ | -| 3 | 0.598436 | `monitor_activitylog_list` | ❌ | -| 4 | 0.485633 | `deploy_app_logs_get` | ❌ | -| 5 | 0.470119 | `monitor_metrics_query` | ❌ | +| 3 | 0.598393 | `monitor_activitylog_list` | ❌ | +| 4 | 0.485528 | `deploy_app_logs_get` | ❌ | +| 5 | 0.469703 | `monitor_metrics_query` | ❌ | --- -## Test 325 +## Test 335 **Expected Tool:** `monitor_table_list` **Prompt:** List all tables in the Log Analytics workspace @@ -5988,14 +6168,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.851075 | `monitor_table_list` | ✅ **EXPECTED** | -| 2 | 0.725738 | `monitor_table_type_list` | ❌ | -| 3 | 0.620445 | `monitor_workspace_list` | ❌ | -| 4 | 0.541959 | `kusto_table_list` | ❌ | +| 2 | 0.725693 | `monitor_table_type_list` | ❌ | +| 3 | 0.620451 | `monitor_workspace_list` | ❌ | +| 4 | 0.541928 | `kusto_table_list` | ❌ | | 5 | 0.539481 | `monitor_workspace_log_query` | ❌ | --- -## Test 326 +## Test 336 **Expected Tool:** `monitor_table_list` **Prompt:** Show me the tables in the Log Analytics workspace @@ -6004,15 +6184,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.798460 | `monitor_table_list` | ✅ **EXPECTED** | -| 2 | 0.701122 | `monitor_table_type_list` | ❌ | -| 3 | 0.599917 | `monitor_workspace_list` | ❌ | -| 4 | 0.542821 | `monitor_workspace_log_query` | ❌ | +| 1 | 0.798459 | `monitor_table_list` | ✅ **EXPECTED** | +| 2 | 0.701092 | `monitor_table_type_list` | ❌ | +| 3 | 0.600003 | `monitor_workspace_list` | ❌ | +| 4 | 0.542820 | `monitor_workspace_log_query` | ❌ | | 5 | 0.502882 | `monitor_resource_log_query` | ❌ | --- -## Test 327 +## Test 337 **Expected Tool:** `monitor_table_type_list` **Prompt:** List all available table types in the Log Analytics workspace @@ -6021,15 +6201,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.881524 | `monitor_table_type_list` | ✅ **EXPECTED** | -| 2 | 0.765702 | `monitor_table_list` | ❌ | -| 3 | 0.569921 | `monitor_workspace_list` | ❌ | -| 4 | 0.504789 | `mysql_table_list` | ❌ | +| 1 | 0.881468 | `monitor_table_type_list` | ✅ **EXPECTED** | +| 2 | 0.765694 | `monitor_table_list` | ❌ | +| 3 | 0.570092 | `monitor_workspace_list` | ❌ | +| 4 | 0.504683 | `mysql_table_list` | ❌ | | 5 | 0.497622 | `monitor_workspace_log_query` | ❌ | --- -## Test 328 +## Test 338 **Expected Tool:** `monitor_table_type_list` **Prompt:** Show me the available table types in the Log Analytics workspace @@ -6038,15 +6218,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.843138 | `monitor_table_type_list` | ✅ **EXPECTED** | -| 2 | 0.736837 | `monitor_table_list` | ❌ | -| 3 | 0.576731 | `monitor_workspace_list` | ❌ | +| 1 | 0.843110 | `monitor_table_type_list` | ✅ **EXPECTED** | +| 2 | 0.736831 | `monitor_table_list` | ❌ | +| 3 | 0.576934 | `monitor_workspace_list` | ❌ | | 4 | 0.509598 | `monitor_workspace_log_query` | ❌ | -| 5 | 0.481229 | `mysql_table_list` | ❌ | +| 5 | 0.481189 | `mysql_table_list` | ❌ | --- -## Test 329 +## Test 339 **Expected Tool:** `monitor_webtests_create` **Prompt:** Create a new Standard Web Test with name in my subscription in in a given @@ -6055,15 +6235,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.650749 | `monitor_webtests_create` | ✅ **EXPECTED** | -| 2 | 0.569999 | `monitor_webtests_list` | ❌ | -| 3 | 0.550088 | `monitor_webtests_update` | ❌ | -| 4 | 0.533466 | `monitor_webtests_get` | ❌ | -| 5 | 0.482122 | `loadtesting_testresource_create` | ❌ | +| 1 | 0.651084 | `monitor_webtests_create` | ✅ **EXPECTED** | +| 2 | 0.570105 | `monitor_webtests_list` | ❌ | +| 3 | 0.550426 | `monitor_webtests_update` | ❌ | +| 4 | 0.533477 | `monitor_webtests_get` | ❌ | +| 5 | 0.482251 | `loadtesting_testresource_create` | ❌ | --- -## Test 330 +## Test 340 **Expected Tool:** `monitor_webtests_get` **Prompt:** Get Web Test details for in my subscription in @@ -6072,15 +6252,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.759380 | `monitor_webtests_get` | ✅ **EXPECTED** | -| 2 | 0.725337 | `monitor_webtests_list` | ❌ | -| 3 | 0.583816 | `loadtesting_testresource_list` | ❌ | -| 4 | 0.562797 | `monitor_webtests_update` | ❌ | -| 5 | 0.530557 | `monitor_webtests_create` | ❌ | +| 1 | 0.758910 | `monitor_webtests_get` | ✅ **EXPECTED** | +| 2 | 0.725360 | `monitor_webtests_list` | ❌ | +| 3 | 0.583663 | `loadtesting_testresource_list` | ❌ | +| 4 | 0.562785 | `monitor_webtests_update` | ❌ | +| 5 | 0.530432 | `monitor_webtests_create` | ❌ | --- -## Test 331 +## Test 341 **Expected Tool:** `monitor_webtests_list` **Prompt:** List all Web Test resources in my subscription @@ -6089,15 +6269,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.730568 | `monitor_webtests_list` | ✅ **EXPECTED** | +| 1 | 0.730616 | `monitor_webtests_list` | ✅ **EXPECTED** | | 2 | 0.610160 | `loadtesting_testresource_list` | ❌ | | 3 | 0.547708 | `grafana_list` | ❌ | -| 4 | 0.520829 | `redis_list` | ❌ | -| 5 | 0.496381 | `monitor_webtests_get` | ❌ | +| 4 | 0.520828 | `redis_list` | ❌ | +| 5 | 0.496166 | `monitor_webtests_get` | ❌ | --- -## Test 332 +## Test 342 **Expected Tool:** `monitor_webtests_list` **Prompt:** List all Web Test resources in my subscription in @@ -6106,15 +6286,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.793702 | `monitor_webtests_list` | ✅ **EXPECTED** | +| 1 | 0.793807 | `monitor_webtests_list` | ✅ **EXPECTED** | | 2 | 0.675965 | `loadtesting_testresource_list` | ❌ | -| 3 | 0.584942 | `monitor_webtests_get` | ❌ | +| 3 | 0.584429 | `monitor_webtests_get` | ❌ | | 4 | 0.573602 | `group_list` | ❌ | -| 5 | 0.546327 | `resourcehealth_availability-status_list` | ❌ | +| 5 | 0.546088 | `resourcehealth_availability-status_list` | ❌ | --- -## Test 333 +## Test 343 **Expected Tool:** `monitor_webtests_update` **Prompt:** Update an existing Standard Web Test with name in my subscription in in a given @@ -6123,15 +6303,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.686449 | `monitor_webtests_update` | ✅ **EXPECTED** | -| 2 | 0.559199 | `monitor_webtests_get` | ❌ | -| 3 | 0.558234 | `monitor_webtests_create` | ❌ | -| 4 | 0.553545 | `monitor_webtests_list` | ❌ | -| 5 | 0.508736 | `loadtesting_testrun_update` | ❌ | +| 1 | 0.686427 | `monitor_webtests_update` | ✅ **EXPECTED** | +| 2 | 0.558816 | `monitor_webtests_get` | ❌ | +| 3 | 0.557828 | `monitor_webtests_create` | ❌ | +| 4 | 0.553372 | `monitor_webtests_list` | ❌ | +| 5 | 0.509192 | `loadtesting_testrun_update` | ❌ | --- -## Test 334 +## Test 344 **Expected Tool:** `monitor_workspace_list` **Prompt:** List all Log Analytics workspaces in my subscription @@ -6140,15 +6320,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.813902 | `monitor_workspace_list` | ✅ **EXPECTED** | +| 1 | 0.813871 | `monitor_workspace_list` | ✅ **EXPECTED** | | 2 | 0.680201 | `grafana_list` | ❌ | -| 3 | 0.660135 | `monitor_table_list` | ❌ | +| 3 | 0.660127 | `monitor_table_list` | ❌ | | 4 | 0.610623 | `kusto_cluster_list` | ❌ | -| 5 | 0.600802 | `search_service_list` | ❌ | +| 5 | 0.599636 | `search_service_list` | ❌ | --- -## Test 335 +## Test 345 **Expected Tool:** `monitor_workspace_list` **Prompt:** Show me my Log Analytics workspaces @@ -6157,15 +6337,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.656194 | `monitor_workspace_list` | ✅ **EXPECTED** | -| 2 | 0.585436 | `monitor_table_list` | ❌ | -| 3 | 0.531083 | `monitor_table_type_list` | ❌ | -| 4 | 0.518254 | `grafana_list` | ❌ | -| 5 | 0.506772 | `monitor_workspace_log_query` | ❌ | +| 1 | 0.656159 | `monitor_workspace_list` | ✅ **EXPECTED** | +| 2 | 0.585355 | `monitor_table_list` | ❌ | +| 3 | 0.531036 | `monitor_table_type_list` | ❌ | +| 4 | 0.518275 | `grafana_list` | ❌ | +| 5 | 0.506663 | `monitor_workspace_log_query` | ❌ | --- -## Test 336 +## Test 346 **Expected Tool:** `monitor_workspace_list` **Prompt:** Show me the Log Analytics workspaces in my subscription @@ -6174,15 +6354,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.732962 | `monitor_workspace_list` | ✅ **EXPECTED** | +| 1 | 0.732964 | `monitor_workspace_list` | ✅ **EXPECTED** | | 2 | 0.601481 | `grafana_list` | ❌ | -| 3 | 0.580261 | `monitor_table_list` | ❌ | +| 3 | 0.580244 | `monitor_table_list` | ❌ | | 4 | 0.523782 | `monitor_workspace_log_query` | ❌ | | 5 | 0.522749 | `kusto_cluster_list` | ❌ | --- -## Test 337 +## Test 347 **Expected Tool:** `monitor_workspace_log_query` **Prompt:** Show me the logs for the past hour in the Log Analytics workspace @@ -6193,13 +6373,13 @@ |------|-------|------|--------| | 1 | 0.610115 | `monitor_workspace_log_query` | ✅ **EXPECTED** | | 2 | 0.587614 | `monitor_resource_log_query` | ❌ | -| 3 | 0.527756 | `monitor_activitylog_list` | ❌ | -| 4 | 0.498269 | `deploy_app_logs_get` | ❌ | -| 5 | 0.485984 | `monitor_table_list` | ❌ | +| 3 | 0.527733 | `monitor_activitylog_list` | ❌ | +| 4 | 0.498148 | `deploy_app_logs_get` | ❌ | +| 5 | 0.485982 | `monitor_table_list` | ❌ | --- -## Test 338 +## Test 348 **Expected Tool:** `datadog_monitoredresources_list` **Prompt:** List all monitored resources in the Datadog resource @@ -6208,15 +6388,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.668827 | `datadog_monitoredresources_list` | ✅ **EXPECTED** | +| 1 | 0.668828 | `datadog_monitoredresources_list` | ✅ **EXPECTED** | | 2 | 0.454270 | `redis_list` | ❌ | | 3 | 0.413661 | `loadtesting_testresource_list` | ❌ | -| 4 | 0.413208 | `monitor_metrics_query` | ❌ | +| 4 | 0.413173 | `monitor_metrics_query` | ❌ | | 5 | 0.401731 | `grafana_list` | ❌ | --- -## Test 339 +## Test 349 **Expected Tool:** `datadog_monitoredresources_list` **Prompt:** Show me the monitored resources in the Datadog resource @@ -6226,14 +6406,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.624066 | `datadog_monitoredresources_list` | ✅ **EXPECTED** | -| 2 | 0.443813 | `monitor_metrics_query` | ❌ | +| 2 | 0.443481 | `monitor_metrics_query` | ❌ | | 3 | 0.440052 | `redis_list` | ❌ | | 4 | 0.424391 | `monitor_resource_log_query` | ❌ | | 5 | 0.385122 | `loadtesting_testresource_list` | ❌ | --- -## Test 340 +## Test 350 **Expected Tool:** `extension_azqr` **Prompt:** Check my Azure subscription for any compliance issues or recommendations @@ -6242,15 +6422,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.533406 | `quota_usage_check` | ❌ | -| 2 | 0.481236 | `azureterraformbestpractices_get` | ❌ | -| 3 | 0.476761 | `extension_azqr` | ✅ **EXPECTED** | +| 1 | 0.533403 | `quota_usage_check` | ❌ | +| 2 | 0.481143 | `azureterraformbestpractices_get` | ❌ | +| 3 | 0.476826 | `extension_azqr` | ✅ **EXPECTED** | | 4 | 0.471547 | `subscription_list` | ❌ | | 5 | 0.468404 | `applens_resource_diagnose` | ❌ | --- -## Test 341 +## Test 351 **Expected Tool:** `extension_azqr` **Prompt:** Provide compliance recommendations for my current Azure subscription @@ -6259,15 +6439,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.532869 | `azureterraformbestpractices_get` | ❌ | +| 1 | 0.532792 | `azureterraformbestpractices_get` | ❌ | | 2 | 0.492863 | `get_bestpractices_get` | ❌ | | 3 | 0.476164 | `applicationinsights_recommendation_list` | ❌ | | 4 | 0.473365 | `deploy_iac_rules_get` | ❌ | -| 5 | 0.464604 | `cloudarchitect_design` | ❌ | +| 5 | 0.468491 | `azureaibestpractices_get` | ❌ | --- -## Test 342 +## Test 352 **Expected Tool:** `extension_azqr` **Prompt:** Scan my Azure subscription for compliance recommendations @@ -6276,15 +6456,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.536984 | `azureterraformbestpractices_get` | ❌ | -| 2 | 0.516810 | `extension_azqr` | ✅ **EXPECTED** | -| 3 | 0.514978 | `applicationinsights_recommendation_list` | ❌ | -| 4 | 0.504929 | `quota_usage_check` | ❌ | -| 5 | 0.494872 | `deploy_plan_get` | ❌ | +| 1 | 0.536917 | `azureterraformbestpractices_get` | ❌ | +| 2 | 0.516910 | `extension_azqr` | ✅ **EXPECTED** | +| 3 | 0.514947 | `applicationinsights_recommendation_list` | ❌ | +| 4 | 0.504918 | `quota_usage_check` | ❌ | +| 5 | 0.494808 | `deploy_plan_get` | ❌ | --- -## Test 343 +## Test 353 **Expected Tool:** `quota_region_availability_list` **Prompt:** Show me the available regions for these resource types @@ -6294,14 +6474,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.590878 | `quota_region_availability_list` | ✅ **EXPECTED** | -| 2 | 0.413577 | `quota_usage_check` | ❌ | +| 2 | 0.413662 | `quota_usage_check` | ❌ | | 3 | 0.391332 | `redis_list` | ❌ | -| 4 | 0.373069 | `resourcehealth_availability-status_list` | ❌ | -| 5 | 0.369855 | `managedlustre_fs_sku_get` | ❌ | +| 4 | 0.372940 | `resourcehealth_availability-status_list` | ❌ | +| 5 | 0.369915 | `managedlustre_fs_sku_get` | ❌ | --- -## Test 344 +## Test 354 **Expected Tool:** `quota_usage_check` **Prompt:** Check usage information for in region @@ -6310,15 +6490,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.609607 | `quota_usage_check` | ✅ **EXPECTED** | +| 1 | 0.609711 | `quota_usage_check` | ✅ **EXPECTED** | | 2 | 0.491058 | `quota_region_availability_list` | ❌ | -| 3 | 0.384500 | `resourcehealth_availability-status_list` | ❌ | -| 4 | 0.376368 | `resourcehealth_availability-status_get` | ❌ | +| 3 | 0.384350 | `resourcehealth_availability-status_list` | ❌ | +| 4 | 0.376819 | `resourcehealth_availability-status_get` | ❌ | | 5 | 0.371407 | `redis_list` | ❌ | --- -## Test 345 +## Test 355 **Expected Tool:** `role_assignment_list` **Prompt:** List all available role assignments in my subscription @@ -6327,7 +6507,7 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.645259 | `role_assignment_list` | ✅ **EXPECTED** | +| 1 | 0.645258 | `role_assignment_list` | ✅ **EXPECTED** | | 2 | 0.539757 | `subscription_list` | ❌ | | 3 | 0.483988 | `group_list` | ❌ | | 4 | 0.478700 | `grafana_list` | ❌ | @@ -6335,7 +6515,7 @@ --- -## Test 346 +## Test 356 **Expected Tool:** `role_assignment_list` **Prompt:** Show me the available role assignments in my subscription @@ -6344,15 +6524,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.609705 | `role_assignment_list` | ✅ **EXPECTED** | +| 1 | 0.609704 | `role_assignment_list` | ✅ **EXPECTED** | | 2 | 0.514697 | `subscription_list` | ❌ | | 3 | 0.456956 | `grafana_list` | ❌ | -| 4 | 0.449210 | `eventgrid_subscription_list` | ❌ | +| 4 | 0.449753 | `eventgrid_subscription_list` | ❌ | | 5 | 0.445149 | `redis_list` | ❌ | --- -## Test 347 +## Test 357 **Expected Tool:** `redis_list` **Prompt:** List all Redis resources in my subscription @@ -6361,15 +6541,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.810487 | `redis_list` | ✅ **EXPECTED** | -| 2 | 0.587872 | `grafana_list` | ❌ | -| 3 | 0.512995 | `kusto_cluster_list` | ❌ | -| 4 | 0.508555 | `datadog_monitoredresources_list` | ❌ | -| 5 | 0.501183 | `postgres_server_list` | ❌ | +| 1 | 0.810504 | `redis_list` | ✅ **EXPECTED** | +| 2 | 0.587836 | `grafana_list` | ❌ | +| 3 | 0.512954 | `kusto_cluster_list` | ❌ | +| 4 | 0.508532 | `datadog_monitoredresources_list` | ❌ | +| 5 | 0.501218 | `postgres_server_list` | ❌ | --- -## Test 348 +## Test 358 **Expected Tool:** `redis_list` **Prompt:** Show me my Redis resources @@ -6379,14 +6559,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.685128 | `redis_list` | ✅ **EXPECTED** | -| 2 | 0.374328 | `grafana_list` | ❌ | +| 2 | 0.374327 | `grafana_list` | ❌ | | 3 | 0.364197 | `datadog_monitoredresources_list` | ❌ | -| 4 | 0.359709 | `mysql_server_list` | ❌ | +| 4 | 0.359659 | `mysql_server_list` | ❌ | | 5 | 0.331502 | `mysql_database_list` | ❌ | --- -## Test 349 +## Test 359 **Expected Tool:** `redis_list` **Prompt:** Show me the Redis resources in my subscription @@ -6398,12 +6578,12 @@ | 1 | 0.781228 | `redis_list` | ✅ **EXPECTED** | | 2 | 0.539177 | `grafana_list` | ❌ | | 3 | 0.449276 | `datadog_monitoredresources_list` | ❌ | -| 4 | 0.448989 | `postgres_server_list` | ❌ | +| 4 | 0.449014 | `postgres_server_list` | ❌ | | 5 | 0.442854 | `kusto_cluster_list` | ❌ | --- -## Test 350 +## Test 360 **Expected Tool:** `redis_list` **Prompt:** Show me my Redis caches @@ -6415,12 +6595,12 @@ | 1 | 0.572767 | `redis_list` | ✅ **EXPECTED** | | 2 | 0.316630 | `mysql_database_list` | ❌ | | 3 | 0.301786 | `postgres_database_list` | ❌ | -| 4 | 0.286570 | `mysql_server_list` | ❌ | +| 4 | 0.286513 | `mysql_server_list` | ❌ | | 5 | 0.273014 | `kusto_cluster_list` | ❌ | --- -## Test 351 +## Test 361 **Expected Tool:** `redis_list` **Prompt:** Get Redis clusters @@ -6429,15 +6609,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.478109 | `redis_list` | ✅ **EXPECTED** | -| 2 | 0.456382 | `kusto_cluster_list` | ❌ | -| 3 | 0.384637 | `kusto_cluster_get` | ❌ | -| 4 | 0.359466 | `kusto_database_list` | ❌ | -| 5 | 0.343367 | `aks_cluster_get` | ❌ | +| 1 | 0.478070 | `redis_list` | ✅ **EXPECTED** | +| 2 | 0.456308 | `kusto_cluster_list` | ❌ | +| 3 | 0.384630 | `kusto_cluster_get` | ❌ | +| 4 | 0.359935 | `kusto_database_list` | ❌ | +| 5 | 0.343305 | `aks_cluster_get` | ❌ | --- -## Test 352 +## Test 362 **Expected Tool:** `group_list` **Prompt:** List all resource groups in my subscription @@ -6447,14 +6627,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.755935 | `group_list` | ✅ **EXPECTED** | -| 2 | 0.566497 | `workbooks_list` | ❌ | +| 2 | 0.566552 | `workbooks_list` | ❌ | | 3 | 0.564566 | `loadtesting_testresource_list` | ❌ | | 4 | 0.552633 | `datadog_monitoredresources_list` | ❌ | -| 5 | 0.549500 | `monitor_webtests_list` | ❌ | +| 5 | 0.549477 | `monitor_webtests_list` | ❌ | --- -## Test 353 +## Test 363 **Expected Tool:** `group_list` **Prompt:** Show me my resource groups @@ -6466,12 +6646,12 @@ | 1 | 0.529504 | `group_list` | ✅ **EXPECTED** | | 2 | 0.464690 | `redis_list` | ❌ | | 3 | 0.463685 | `datadog_monitoredresources_list` | ❌ | -| 4 | 0.462388 | `mysql_server_list` | ❌ | +| 4 | 0.462391 | `mysql_server_list` | ❌ | | 5 | 0.460280 | `loadtesting_testresource_list` | ❌ | --- -## Test 354 +## Test 364 **Expected Tool:** `group_list` **Prompt:** Show me the resource groups in my subscription @@ -6480,15 +6660,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.665771 | `group_list` | ✅ **EXPECTED** | +| 1 | 0.665772 | `group_list` | ✅ **EXPECTED** | | 2 | 0.532656 | `datadog_monitoredresources_list` | ❌ | | 3 | 0.532505 | `redis_list` | ❌ | -| 4 | 0.532369 | `resourcehealth_availability-status_list` | ❌ | -| 5 | 0.532054 | `eventgrid_topic_list` | ❌ | +| 4 | 0.532015 | `eventgrid_topic_list` | ❌ | +| 5 | 0.531920 | `resourcehealth_availability-status_list` | ❌ | --- -## Test 355 +## Test 365 **Expected Tool:** `resourcehealth_availability-status_get` **Prompt:** Get the availability status for resource @@ -6497,15 +6677,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.556629 | `resourcehealth_availability-status_get` | ✅ **EXPECTED** | -| 2 | 0.538277 | `resourcehealth_availability-status_list` | ❌ | -| 3 | 0.377966 | `quota_usage_check` | ❌ | +| 1 | 0.556926 | `resourcehealth_availability-status_get` | ✅ **EXPECTED** | +| 2 | 0.538273 | `resourcehealth_availability-status_list` | ❌ | +| 3 | 0.378030 | `quota_usage_check` | ❌ | | 4 | 0.373112 | `monitor_healthmodels_entity_get` | ❌ | -| 5 | 0.349980 | `datadog_monitoredresources_list` | ❌ | +| 5 | 0.349981 | `datadog_monitoredresources_list` | ❌ | --- -## Test 356 +## Test 366 **Expected Tool:** `resourcehealth_availability-status_get` **Prompt:** Show me the health status of the storage account @@ -6514,15 +6694,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.576617 | `storage_account_get` | ❌ | -| 2 | 0.564128 | `resourcehealth_availability-status_get` | ✅ **EXPECTED** | -| 3 | 0.556167 | `storage_blob_container_get` | ❌ | +| 1 | 0.576591 | `storage_account_get` | ❌ | +| 2 | 0.564706 | `resourcehealth_availability-status_get` | ✅ **EXPECTED** | +| 3 | 0.555636 | `storage_blob_container_get` | ❌ | | 4 | 0.487207 | `storage_blob_get` | ❌ | -| 5 | 0.466950 | `resourcehealth_availability-status_list` | ❌ | +| 5 | 0.466885 | `resourcehealth_availability-status_list` | ❌ | --- -## Test 357 +## Test 367 **Expected Tool:** `resourcehealth_availability-status_get` **Prompt:** What is the availability status of virtual machine in resource group ? @@ -6531,15 +6711,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.577529 | `resourcehealth_availability-status_list` | ❌ | -| 2 | 0.501568 | `resourcehealth_availability-status_get` | ✅ **EXPECTED** | -| 3 | 0.424957 | `mysql_server_list` | ❌ | +| 1 | 0.577398 | `resourcehealth_availability-status_list` | ❌ | +| 2 | 0.502794 | `resourcehealth_availability-status_get` | ✅ **EXPECTED** | +| 3 | 0.424939 | `mysql_server_list` | ❌ | | 4 | 0.412025 | `loadtesting_testresource_list` | ❌ | -| 5 | 0.393471 | `managedlustre_fs_list` | ❌ | +| 5 | 0.393479 | `managedlustre_fs_list` | ❌ | --- -## Test 358 +## Test 368 **Expected Tool:** `resourcehealth_availability-status_list` **Prompt:** List availability status for all resources in my subscription @@ -6548,7 +6728,7 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.737550 | `resourcehealth_availability-status_list` | ✅ **EXPECTED** | +| 1 | 0.737219 | `resourcehealth_availability-status_list` | ✅ **EXPECTED** | | 2 | 0.585501 | `redis_list` | ❌ | | 3 | 0.549914 | `loadtesting_testresource_list` | ❌ | | 4 | 0.548549 | `grafana_list` | ❌ | @@ -6556,7 +6736,7 @@ --- -## Test 359 +## Test 369 **Expected Tool:** `resourcehealth_availability-status_list` **Prompt:** Show me the health status of all my Azure resources @@ -6565,15 +6745,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.644908 | `resourcehealth_availability-status_list` | ✅ **EXPECTED** | -| 2 | 0.545208 | `resourcehealth_availability-status_get` | ❌ | +| 1 | 0.644982 | `resourcehealth_availability-status_list` | ✅ **EXPECTED** | +| 2 | 0.544917 | `resourcehealth_availability-status_get` | ❌ | | 3 | 0.509740 | `resourcehealth_health-events_list` | ❌ | -| 4 | 0.508703 | `quota_usage_check` | ❌ | +| 4 | 0.508766 | `quota_usage_check` | ❌ | | 5 | 0.505776 | `redis_list` | ❌ | --- -## Test 360 +## Test 370 **Expected Tool:** `resourcehealth_availability-status_list` **Prompt:** What resources in resource group have health issues? @@ -6582,15 +6762,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.596817 | `resourcehealth_availability-status_list` | ✅ **EXPECTED** | -| 2 | 0.549900 | `resourcehealth_availability-status_get` | ❌ | +| 1 | 0.596890 | `resourcehealth_availability-status_list` | ✅ **EXPECTED** | +| 2 | 0.550812 | `resourcehealth_availability-status_get` | ❌ | | 3 | 0.496640 | `resourcehealth_health-events_list` | ❌ | | 4 | 0.441921 | `applens_resource_diagnose` | ❌ | | 5 | 0.433614 | `loadtesting_testresource_list` | ❌ | --- -## Test 361 +## Test 371 **Expected Tool:** `resourcehealth_health-events_list` **Prompt:** List all service health events in my subscription @@ -6599,15 +6779,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.690719 | `resourcehealth_health-events_list` | ✅ **EXPECTED** | -| 2 | 0.554895 | `search_service_list` | ❌ | -| 3 | 0.534250 | `eventgrid_topic_list` | ❌ | -| 4 | 0.529761 | `eventgrid_subscription_list` | ❌ | -| 5 | 0.518595 | `resourcehealth_availability-status_list` | ❌ | +| 1 | 0.690720 | `resourcehealth_health-events_list` | ✅ **EXPECTED** | +| 2 | 0.553485 | `search_service_list` | ❌ | +| 3 | 0.534169 | `eventgrid_topic_list` | ❌ | +| 4 | 0.529200 | `eventgrid_subscription_list` | ❌ | +| 5 | 0.518372 | `resourcehealth_availability-status_list` | ❌ | --- -## Test 362 +## Test 372 **Expected Tool:** `resourcehealth_health-events_list` **Prompt:** Show me Azure service health events for subscription @@ -6617,14 +6797,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.686448 | `resourcehealth_health-events_list` | ✅ **EXPECTED** | -| 2 | 0.534556 | `eventgrid_subscription_list` | ❌ | -| 3 | 0.513815 | `search_service_list` | ❌ | -| 4 | 0.513259 | `eventgrid_topic_list` | ❌ | +| 2 | 0.534707 | `eventgrid_subscription_list` | ❌ | +| 3 | 0.513302 | `search_service_list` | ❌ | +| 4 | 0.513237 | `eventgrid_topic_list` | ❌ | | 5 | 0.501121 | `subscription_list` | ❌ | --- -## Test 363 +## Test 373 **Expected Tool:** `resourcehealth_health-events_list` **Prompt:** What service issues have occurred in the last 30 days? @@ -6633,15 +6813,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.450909 | `resourcehealth_health-events_list` | ✅ **EXPECTED** | -| 2 | 0.267752 | `applens_resource_diagnose` | ❌ | -| 3 | 0.245709 | `cloudarchitect_design` | ❌ | -| 4 | 0.217130 | `resourcehealth_availability-status_list` | ❌ | -| 5 | 0.211900 | `search_service_list` | ❌ | +| 1 | 0.450841 | `resourcehealth_health-events_list` | ✅ **EXPECTED** | +| 2 | 0.267663 | `applens_resource_diagnose` | ❌ | +| 3 | 0.245720 | `cloudarchitect_design` | ❌ | +| 4 | 0.216847 | `resourcehealth_availability-status_list` | ❌ | +| 5 | 0.211043 | `search_service_list` | ❌ | --- -## Test 364 +## Test 374 **Expected Tool:** `resourcehealth_health-events_list` **Prompt:** List active service health events in my subscription @@ -6651,14 +6831,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.685391 | `resourcehealth_health-events_list` | ✅ **EXPECTED** | -| 2 | 0.527905 | `eventgrid_subscription_list` | ❌ | -| 3 | 0.524063 | `eventgrid_topic_list` | ❌ | -| 4 | 0.520197 | `search_service_list` | ❌ | -| 5 | 0.502345 | `resourcehealth_availability-status_list` | ❌ | +| 2 | 0.527255 | `eventgrid_subscription_list` | ❌ | +| 3 | 0.523975 | `eventgrid_topic_list` | ❌ | +| 4 | 0.518668 | `search_service_list` | ❌ | +| 5 | 0.502064 | `resourcehealth_availability-status_list` | ❌ | --- -## Test 365 +## Test 375 **Expected Tool:** `resourcehealth_health-events_list` **Prompt:** Show me planned maintenance events for my Azure services @@ -6668,14 +6848,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.565851 | `resourcehealth_health-events_list` | ✅ **EXPECTED** | -| 2 | 0.437868 | `search_service_list` | ❌ | -| 3 | 0.403665 | `eventgrid_subscription_list` | ❌ | -| 4 | 0.402532 | `resourcehealth_availability-status_list` | ❌ | -| 5 | 0.398084 | `quota_usage_check` | ❌ | +| 2 | 0.436322 | `search_service_list` | ❌ | +| 3 | 0.404191 | `eventgrid_subscription_list` | ❌ | +| 4 | 0.402493 | `resourcehealth_availability-status_list` | ❌ | +| 5 | 0.398050 | `quota_usage_check` | ❌ | --- -## Test 366 +## Test 376 **Expected Tool:** `servicebus_queue_details` **Prompt:** Show me the details of service bus queue @@ -6686,13 +6866,13 @@ |------|-------|------|--------| | 1 | 0.642876 | `servicebus_queue_details` | ✅ **EXPECTED** | | 2 | 0.460932 | `servicebus_topic_subscription_details` | ❌ | -| 3 | 0.436980 | `servicebus_topic_details` | ❌ | +| 3 | 0.437000 | `servicebus_topic_details` | ❌ | | 4 | 0.385812 | `search_knowledge_base_get` | ❌ | -| 5 | 0.384133 | `storage_account_get` | ❌ | +| 5 | 0.384139 | `storage_account_get` | ❌ | --- -## Test 367 +## Test 377 **Expected Tool:** `servicebus_topic_details` **Prompt:** Show me the details of service bus topic @@ -6701,15 +6881,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.642952 | `servicebus_topic_details` | ✅ **EXPECTED** | -| 2 | 0.571861 | `servicebus_topic_subscription_details` | ❌ | +| 1 | 0.642762 | `servicebus_topic_details` | ✅ **EXPECTED** | +| 2 | 0.571860 | `servicebus_topic_subscription_details` | ❌ | | 3 | 0.483976 | `servicebus_queue_details` | ❌ | -| 4 | 0.482958 | `eventgrid_topic_list` | ❌ | -| 5 | 0.458711 | `eventgrid_subscription_list` | ❌ | +| 4 | 0.482735 | `eventgrid_topic_list` | ❌ | +| 5 | 0.457603 | `eventgrid_subscription_list` | ❌ | --- -## Test 368 +## Test 378 **Expected Tool:** `servicebus_topic_subscription_details` **Prompt:** Show me the details of service bus subscription @@ -6719,14 +6899,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.633187 | `servicebus_topic_subscription_details` | ✅ **EXPECTED** | -| 2 | 0.517623 | `servicebus_topic_details` | ❌ | +| 2 | 0.517516 | `servicebus_topic_details` | ❌ | | 3 | 0.494515 | `servicebus_queue_details` | ❌ | -| 4 | 0.493853 | `eventgrid_topic_list` | ❌ | -| 5 | 0.472128 | `eventgrid_subscription_list` | ❌ | +| 4 | 0.493776 | `eventgrid_topic_list` | ❌ | +| 5 | 0.471876 | `eventgrid_subscription_list` | ❌ | --- -## Test 369 +## Test 379 **Expected Tool:** `signalr_runtime_get` **Prompt:** Show me the details of SignalR @@ -6735,7 +6915,7 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.532544 | `signalr_runtime_get` | ✅ **EXPECTED** | +| 1 | 0.532742 | `signalr_runtime_get` | ✅ **EXPECTED** | | 2 | 0.355028 | `redis_list` | ❌ | | 3 | 0.329804 | `foundry_resource_get` | ❌ | | 4 | 0.319981 | `sql_server_show` | ❌ | @@ -6743,7 +6923,7 @@ --- -## Test 370 +## Test 380 **Expected Tool:** `signalr_runtime_get` **Prompt:** Show me the network information of SignalR runtime @@ -6752,15 +6932,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.573446 | `signalr_runtime_get` | ✅ **EXPECTED** | +| 1 | 0.573540 | `signalr_runtime_get` | ✅ **EXPECTED** | | 2 | 0.337342 | `sql_server_show` | ❌ | | 3 | 0.306559 | `foundry_resource_get` | ❌ | | 4 | 0.305021 | `redis_list` | ❌ | -| 5 | 0.300956 | `servicebus_topic_details` | ❌ | +| 5 | 0.301114 | `servicebus_topic_details` | ❌ | --- -## Test 371 +## Test 381 **Expected Tool:** `signalr_runtime_get` **Prompt:** Describe the SignalR runtime in resource group @@ -6769,15 +6949,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.710353 | `signalr_runtime_get` | ✅ **EXPECTED** | +| 1 | 0.710281 | `signalr_runtime_get` | ✅ **EXPECTED** | | 2 | 0.411396 | `loadtesting_testresource_list` | ❌ | | 3 | 0.410606 | `foundry_resource_get` | ❌ | -| 4 | 0.399745 | `resourcehealth_availability-status_list` | ❌ | -| 5 | 0.382472 | `sql_server_list` | ❌ | +| 4 | 0.399412 | `resourcehealth_availability-status_list` | ❌ | +| 5 | 0.382028 | `sql_server_list` | ❌ | --- -## Test 372 +## Test 382 **Expected Tool:** `signalr_runtime_get` **Prompt:** Get information about my SignalR runtime in @@ -6786,15 +6966,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.715913 | `signalr_runtime_get` | ✅ **EXPECTED** | -| 2 | 0.459979 | `foundry_resource_get` | ❌ | -| 3 | 0.431800 | `resourcehealth_availability-status_list` | ❌ | -| 4 | 0.431393 | `loadtesting_testresource_list` | ❌ | -| 5 | 0.417497 | `functionapp_get` | ❌ | +| 1 | 0.715701 | `signalr_runtime_get` | ✅ **EXPECTED** | +| 2 | 0.458894 | `foundry_resource_get` | ❌ | +| 3 | 0.431212 | `resourcehealth_availability-status_list` | ❌ | +| 4 | 0.430721 | `loadtesting_testresource_list` | ❌ | +| 5 | 0.417313 | `functionapp_get` | ❌ | --- -## Test 373 +## Test 383 **Expected Tool:** `signalr_runtime_get` **Prompt:** Show all the SignalRs information in @@ -6803,15 +6983,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.564072 | `signalr_runtime_get` | ✅ **EXPECTED** | +| 1 | 0.563883 | `signalr_runtime_get` | ✅ **EXPECTED** | | 2 | 0.501077 | `redis_list` | ❌ | -| 3 | 0.494808 | `resourcehealth_availability-status_list` | ❌ | +| 3 | 0.494478 | `resourcehealth_availability-status_list` | ❌ | | 4 | 0.481428 | `loadtesting_testresource_list` | ❌ | | 5 | 0.462090 | `mysql_server_list` | ❌ | --- -## Test 374 +## Test 384 **Expected Tool:** `signalr_runtime_get` **Prompt:** List all SignalRs in my subscription @@ -6820,15 +7000,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.530646 | `signalr_runtime_get` | ✅ **EXPECTED** | -| 2 | 0.507608 | `postgres_server_list` | ❌ | +| 1 | 0.530514 | `signalr_runtime_get` | ✅ **EXPECTED** | +| 2 | 0.507654 | `postgres_server_list` | ❌ | | 3 | 0.495157 | `redis_list` | ❌ | | 4 | 0.494498 | `kusto_cluster_list` | ❌ | | 5 | 0.487906 | `subscription_list` | ❌ | --- -## Test 375 +## Test 385 **Expected Tool:** `sql_db_create` **Prompt:** Create a new SQL database named in server @@ -6837,15 +7017,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.515502 | `sql_db_create` | ✅ **EXPECTED** | +| 1 | 0.516780 | `sql_db_create` | ✅ **EXPECTED** | | 2 | 0.470892 | `sql_server_create` | ❌ | -| 3 | 0.420504 | `sql_db_rename` | ❌ | +| 3 | 0.420389 | `sql_db_rename` | ❌ | | 4 | 0.408515 | `sql_db_delete` | ❌ | | 5 | 0.404860 | `sql_server_delete` | ❌ | --- -## Test 376 +## Test 386 **Expected Tool:** `sql_db_create` **Prompt:** Create a SQL database with Basic tier in server @@ -6854,15 +7034,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.570985 | `sql_db_create` | ✅ **EXPECTED** | +| 1 | 0.571760 | `sql_db_create` | ✅ **EXPECTED** | | 2 | 0.459672 | `sql_server_create` | ❌ | -| 3 | 0.437526 | `sql_server_delete` | ❌ | +| 3 | 0.437525 | `sql_server_delete` | ❌ | | 4 | 0.420843 | `sql_db_show` | ❌ | -| 5 | 0.417662 | `sql_db_delete` | ❌ | +| 5 | 0.417661 | `sql_db_delete` | ❌ | --- -## Test 377 +## Test 387 **Expected Tool:** `sql_db_create` **Prompt:** Create a new database called on SQL server in resource group @@ -6871,15 +7051,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.603797 | `sql_db_create` | ✅ **EXPECTED** | +| 1 | 0.604472 | `sql_db_create` | ✅ **EXPECTED** | | 2 | 0.545906 | `sql_server_create` | ❌ | -| 3 | 0.504013 | `sql_db_rename` | ❌ | +| 3 | 0.503938 | `sql_db_rename` | ❌ | | 4 | 0.494377 | `sql_db_show` | ❌ | -| 5 | 0.473859 | `sql_db_list` | ❌ | +| 5 | 0.473975 | `sql_db_list` | ❌ | --- -## Test 378 +## Test 388 **Expected Tool:** `sql_db_delete` **Prompt:** Delete the SQL database from server @@ -6890,13 +7070,13 @@ |------|-------|------|--------| | 1 | 0.568196 | `sql_db_delete` | ✅ **EXPECTED** | | 2 | 0.567412 | `sql_server_delete` | ❌ | -| 3 | 0.391509 | `sql_db_rename` | ❌ | -| 4 | 0.386564 | `sql_server_firewall-rule_delete` | ❌ | +| 3 | 0.391436 | `sql_db_rename` | ❌ | +| 4 | 0.386721 | `sql_server_firewall-rule_delete` | ❌ | | 5 | 0.364776 | `sql_db_show` | ❌ | --- -## Test 379 +## Test 389 **Expected Tool:** `sql_db_delete` **Prompt:** Remove database from SQL server in resource group @@ -6905,15 +7085,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.567481 | `sql_server_delete` | ❌ | -| 2 | 0.543378 | `sql_db_delete` | ✅ **EXPECTED** | -| 3 | 0.500746 | `sql_db_show` | ❌ | -| 4 | 0.480981 | `sql_db_rename` | ❌ | -| 5 | 0.478583 | `sql_db_list` | ❌ | +| 1 | 0.567513 | `sql_server_delete` | ❌ | +| 2 | 0.543440 | `sql_db_delete` | ✅ **EXPECTED** | +| 3 | 0.500756 | `sql_db_show` | ❌ | +| 4 | 0.481023 | `sql_db_rename` | ❌ | +| 5 | 0.478729 | `sql_db_list` | ❌ | --- -## Test 380 +## Test 390 **Expected Tool:** `sql_db_delete` **Prompt:** Delete the database called on server @@ -6923,14 +7103,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.509916 | `sql_db_delete` | ✅ **EXPECTED** | -| 2 | 0.490892 | `sql_server_delete` | ❌ | +| 2 | 0.490893 | `sql_server_delete` | ❌ | | 3 | 0.364494 | `postgres_database_list` | ❌ | | 4 | 0.355416 | `mysql_database_list` | ❌ | -| 5 | 0.347837 | `sql_db_rename` | ❌ | +| 5 | 0.347703 | `sql_db_rename` | ❌ | --- -## Test 381 +## Test 391 **Expected Tool:** `sql_db_list` **Prompt:** List all databases in the Azure SQL server @@ -6939,15 +7119,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.643202 | `sql_db_list` | ✅ **EXPECTED** | -| 2 | 0.639694 | `mysql_database_list` | ❌ | -| 3 | 0.609178 | `postgres_database_list` | ❌ | -| 4 | 0.602890 | `cosmos_database_list` | ❌ | -| 5 | 0.570103 | `kusto_database_list` | ❌ | +| 1 | 0.643138 | `sql_db_list` | ✅ **EXPECTED** | +| 2 | 0.639644 | `mysql_database_list` | ❌ | +| 3 | 0.609116 | `postgres_database_list` | ❌ | +| 4 | 0.602872 | `cosmos_database_list` | ❌ | +| 5 | 0.569464 | `kusto_database_list` | ❌ | --- -## Test 382 +## Test 392 **Expected Tool:** `sql_db_list` **Prompt:** Show me all the databases configuration details in the Azure SQL server @@ -6957,14 +7137,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.617746 | `sql_server_show` | ❌ | -| 2 | 0.609291 | `sql_db_list` | ✅ **EXPECTED** | +| 2 | 0.609322 | `sql_db_list` | ✅ **EXPECTED** | | 3 | 0.557353 | `mysql_database_list` | ❌ | | 4 | 0.553488 | `mysql_server_config_get` | ❌ | | 5 | 0.524274 | `sql_db_show` | ❌ | --- -## Test 383 +## Test 393 **Expected Tool:** `sql_db_rename` **Prompt:** Rename the SQL database on server to @@ -6973,15 +7153,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.593308 | `sql_db_rename` | ✅ **EXPECTED** | -| 2 | 0.425296 | `sql_server_delete` | ❌ | -| 3 | 0.416187 | `sql_db_delete` | ❌ | -| 4 | 0.396109 | `sql_db_create` | ❌ | -| 5 | 0.345991 | `sql_db_show` | ❌ | +| 1 | 0.593251 | `sql_db_rename` | ✅ **EXPECTED** | +| 2 | 0.425282 | `sql_server_delete` | ❌ | +| 3 | 0.416207 | `sql_db_delete` | ❌ | +| 4 | 0.396947 | `sql_db_create` | ❌ | +| 5 | 0.346018 | `sql_db_show` | ❌ | --- -## Test 384 +## Test 394 **Expected Tool:** `sql_db_rename` **Prompt:** Rename my Azure SQL database to on server @@ -6990,15 +7170,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.710788 | `sql_db_rename` | ✅ **EXPECTED** | -| 2 | 0.516432 | `sql_server_delete` | ❌ | -| 3 | 0.506388 | `sql_db_delete` | ❌ | -| 4 | 0.500926 | `sql_db_create` | ❌ | -| 5 | 0.434133 | `sql_server_show` | ❌ | +| 1 | 0.711257 | `sql_db_rename` | ✅ **EXPECTED** | +| 2 | 0.516770 | `sql_server_delete` | ❌ | +| 3 | 0.506834 | `sql_db_delete` | ❌ | +| 4 | 0.501963 | `sql_db_create` | ❌ | +| 5 | 0.434094 | `sql_server_show` | ❌ | --- -## Test 385 +## Test 395 **Expected Tool:** `sql_db_show` **Prompt:** Get the configuration details for the SQL database on server @@ -7007,15 +7187,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.611215 | `sql_server_show` | ❌ | -| 2 | 0.593200 | `postgres_server_config_get` | ❌ | -| 3 | 0.530520 | `mysql_server_config_get` | ❌ | -| 4 | 0.528378 | `sql_db_show` | ✅ **EXPECTED** | -| 5 | 0.465779 | `sql_db_list` | ❌ | +| 1 | 0.610991 | `sql_server_show` | ❌ | +| 2 | 0.593150 | `postgres_server_config_get` | ❌ | +| 3 | 0.530422 | `mysql_server_config_get` | ❌ | +| 4 | 0.528136 | `sql_db_show` | ✅ **EXPECTED** | +| 5 | 0.465693 | `sql_db_list` | ❌ | --- -## Test 386 +## Test 396 **Expected Tool:** `sql_db_show` **Prompt:** Show me the details of SQL database in server @@ -7024,15 +7204,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.530071 | `sql_db_show` | ✅ **EXPECTED** | -| 2 | 0.503602 | `sql_server_show` | ❌ | -| 3 | 0.439895 | `sql_db_list` | ❌ | -| 4 | 0.438615 | `mysql_table_schema_get` | ❌ | -| 5 | 0.432907 | `mysql_database_list` | ❌ | +| 1 | 0.530095 | `sql_db_show` | ✅ **EXPECTED** | +| 2 | 0.503681 | `sql_server_show` | ❌ | +| 3 | 0.440073 | `sql_db_list` | ❌ | +| 4 | 0.439076 | `mysql_table_schema_get` | ❌ | +| 5 | 0.432919 | `mysql_database_list` | ❌ | --- -## Test 387 +## Test 397 **Expected Tool:** `sql_db_update` **Prompt:** Update the performance tier of SQL database on server @@ -7041,15 +7221,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.603537 | `sql_db_update` | ✅ **EXPECTED** | -| 2 | 0.467332 | `sql_db_create` | ❌ | -| 3 | 0.440688 | `sql_db_rename` | ❌ | -| 4 | 0.427542 | `sql_db_show` | ❌ | -| 5 | 0.414267 | `sql_server_delete` | ❌ | +| 1 | 0.603271 | `sql_db_update` | ✅ **EXPECTED** | +| 2 | 0.467571 | `sql_db_create` | ❌ | +| 3 | 0.440442 | `sql_db_rename` | ❌ | +| 4 | 0.427621 | `sql_db_show` | ❌ | +| 5 | 0.413941 | `sql_server_delete` | ❌ | --- -## Test 388 +## Test 398 **Expected Tool:** `sql_db_update` **Prompt:** Scale SQL database on server to use SKU @@ -7058,15 +7238,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.550501 | `sql_db_update` | ✅ **EXPECTED** | -| 2 | 0.418334 | `sql_server_delete` | ❌ | -| 3 | 0.401717 | `sql_db_list` | ❌ | -| 4 | 0.395462 | `sql_db_rename` | ❌ | -| 5 | 0.394705 | `sql_db_show` | ❌ | +| 1 | 0.550449 | `sql_db_update` | ✅ **EXPECTED** | +| 2 | 0.418358 | `sql_server_delete` | ❌ | +| 3 | 0.401817 | `sql_db_list` | ❌ | +| 4 | 0.395508 | `sql_db_rename` | ❌ | +| 5 | 0.394770 | `sql_db_show` | ❌ | --- -## Test 389 +## Test 399 **Expected Tool:** `sql_elastic-pool_list` **Prompt:** List all elastic pools in SQL server @@ -7076,14 +7256,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.678124 | `sql_elastic-pool_list` | ✅ **EXPECTED** | -| 2 | 0.502382 | `sql_db_list` | ❌ | +| 2 | 0.502376 | `sql_db_list` | ❌ | | 3 | 0.498367 | `mysql_database_list` | ❌ | | 4 | 0.485249 | `aks_nodepool_get` | ❌ | | 5 | 0.479044 | `sql_server_show` | ❌ | --- -## Test 390 +## Test 400 **Expected Tool:** `sql_elastic-pool_list` **Prompt:** Show me the elastic pools configured for SQL server @@ -7092,15 +7272,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.606478 | `sql_elastic-pool_list` | ✅ **EXPECTED** | -| 2 | 0.502977 | `sql_server_show` | ❌ | -| 3 | 0.457262 | `sql_db_list` | ❌ | -| 4 | 0.450790 | `aks_nodepool_get` | ❌ | -| 5 | 0.432867 | `mysql_database_list` | ❌ | +| 1 | 0.606425 | `sql_elastic-pool_list` | ✅ **EXPECTED** | +| 2 | 0.502877 | `sql_server_show` | ❌ | +| 3 | 0.457164 | `sql_db_list` | ❌ | +| 4 | 0.450743 | `aks_nodepool_get` | ❌ | +| 5 | 0.432816 | `mysql_database_list` | ❌ | --- -## Test 391 +## Test 401 **Expected Tool:** `sql_elastic-pool_list` **Prompt:** What elastic pools are available in my SQL server ? @@ -7112,12 +7292,12 @@ | 1 | 0.592709 | `sql_elastic-pool_list` | ✅ **EXPECTED** | | 2 | 0.420325 | `mysql_database_list` | ❌ | | 3 | 0.407169 | `aks_nodepool_get` | ❌ | -| 4 | 0.402602 | `mysql_server_list` | ❌ | -| 5 | 0.397708 | `sql_db_list` | ❌ | +| 4 | 0.402616 | `mysql_server_list` | ❌ | +| 5 | 0.397670 | `sql_db_list` | ❌ | --- -## Test 392 +## Test 402 **Expected Tool:** `sql_server_create` **Prompt:** Create a new Azure SQL server named in resource group @@ -7126,15 +7306,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.682198 | `sql_server_create` | ✅ **EXPECTED** | -| 2 | 0.563307 | `sql_db_create` | ❌ | -| 3 | 0.529314 | `sql_server_list` | ❌ | -| 4 | 0.481645 | `storage_account_create` | ❌ | -| 5 | 0.473844 | `sql_db_rename` | ❌ | +| 1 | 0.682605 | `sql_server_create` | ✅ **EXPECTED** | +| 2 | 0.563707 | `sql_db_create` | ❌ | +| 3 | 0.529198 | `sql_server_list` | ❌ | +| 4 | 0.482102 | `storage_account_create` | ❌ | +| 5 | 0.474180 | `sql_db_rename` | ❌ | --- -## Test 393 +## Test 403 **Expected Tool:** `sql_server_create` **Prompt:** Create an Azure SQL server with name in location with admin user @@ -7143,15 +7323,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.618244 | `sql_server_create` | ✅ **EXPECTED** | -| 2 | 0.510507 | `sql_db_create` | ❌ | +| 1 | 0.618354 | `sql_server_create` | ✅ **EXPECTED** | +| 2 | 0.510222 | `sql_db_create` | ❌ | | 3 | 0.472462 | `sql_server_show` | ❌ | -| 4 | 0.441228 | `sql_server_delete` | ❌ | -| 5 | 0.401085 | `sql_db_rename` | ❌ | +| 4 | 0.441267 | `sql_server_delete` | ❌ | +| 5 | 0.400941 | `sql_db_rename` | ❌ | --- -## Test 394 +## Test 404 **Expected Tool:** `sql_server_create` **Prompt:** Set up a new SQL server called in my resource group @@ -7161,14 +7341,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.589818 | `sql_server_create` | ✅ **EXPECTED** | -| 2 | 0.500874 | `sql_db_create` | ❌ | -| 3 | 0.498255 | `sql_server_list` | ❌ | -| 4 | 0.461181 | `sql_db_rename` | ❌ | -| 5 | 0.442984 | `mysql_server_list` | ❌ | +| 2 | 0.501403 | `sql_db_create` | ❌ | +| 3 | 0.497890 | `sql_server_list` | ❌ | +| 4 | 0.461147 | `sql_db_rename` | ❌ | +| 5 | 0.442934 | `mysql_server_list` | ❌ | --- -## Test 395 +## Test 405 **Expected Tool:** `sql_server_delete` **Prompt:** Delete the Azure SQL server from resource group @@ -7179,13 +7359,13 @@ |------|-------|------|--------| | 1 | 0.656593 | `sql_server_delete` | ✅ **EXPECTED** | | 2 | 0.548064 | `sql_db_delete` | ❌ | -| 3 | 0.518306 | `sql_server_list` | ❌ | +| 3 | 0.518037 | `sql_server_list` | ❌ | | 4 | 0.495550 | `sql_server_create` | ❌ | | 5 | 0.483132 | `workbooks_delete` | ❌ | --- -## Test 396 +## Test 406 **Expected Tool:** `sql_server_delete` **Prompt:** Remove the SQL server from my subscription @@ -7195,14 +7375,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.615073 | `sql_server_delete` | ✅ **EXPECTED** | -| 2 | 0.393923 | `postgres_server_list` | ❌ | +| 2 | 0.393885 | `postgres_server_list` | ❌ | | 3 | 0.379760 | `sql_db_delete` | ❌ | | 4 | 0.376660 | `sql_server_show` | ❌ | -| 5 | 0.350384 | `sql_server_list` | ❌ | +| 5 | 0.350103 | `sql_server_list` | ❌ | --- -## Test 397 +## Test 407 **Expected Tool:** `sql_server_delete` **Prompt:** Delete SQL server permanently @@ -7213,13 +7393,13 @@ |------|-------|------|--------| | 1 | 0.624310 | `sql_server_delete` | ✅ **EXPECTED** | | 2 | 0.454892 | `sql_db_delete` | ❌ | -| 3 | 0.362389 | `sql_server_firewall-rule_delete` | ❌ | +| 3 | 0.362561 | `sql_server_firewall-rule_delete` | ❌ | | 4 | 0.341503 | `sql_server_show` | ❌ | -| 5 | 0.319013 | `eventhubs_eventhub_delete` | ❌ | +| 5 | 0.318758 | `eventhubs_eventhub_delete` | ❌ | --- -## Test 398 +## Test 408 **Expected Tool:** `sql_server_entra-admin_list` **Prompt:** List Microsoft Entra ID administrators for SQL server @@ -7230,13 +7410,13 @@ |------|-------|------|--------| | 1 | 0.783479 | `sql_server_entra-admin_list` | ✅ **EXPECTED** | | 2 | 0.456051 | `sql_server_show` | ❌ | -| 3 | 0.434565 | `sql_server_list` | ❌ | -| 4 | 0.401908 | `sql_server_firewall-rule_list` | ❌ | -| 5 | 0.375977 | `sql_db_list` | ❌ | +| 3 | 0.434868 | `sql_server_list` | ❌ | +| 4 | 0.401854 | `sql_server_firewall-rule_list` | ❌ | +| 5 | 0.376055 | `sql_db_list` | ❌ | --- -## Test 399 +## Test 409 **Expected Tool:** `sql_server_entra-admin_list` **Prompt:** Show me the Entra ID administrators configured for SQL server @@ -7247,13 +7427,13 @@ |------|-------|------|--------| | 1 | 0.713306 | `sql_server_entra-admin_list` | ✅ **EXPECTED** | | 2 | 0.413144 | `sql_server_show` | ❌ | -| 3 | 0.367692 | `sql_server_list` | ❌ | -| 4 | 0.315939 | `sql_db_list` | ❌ | -| 5 | 0.311071 | `postgres_server_list` | ❌ | +| 3 | 0.368082 | `sql_server_list` | ❌ | +| 4 | 0.315966 | `sql_db_list` | ❌ | +| 5 | 0.311085 | `postgres_server_list` | ❌ | --- -## Test 400 +## Test 410 **Expected Tool:** `sql_server_entra-admin_list` **Prompt:** What Microsoft Entra ID administrators are set up for my SQL server ? @@ -7264,13 +7444,13 @@ |------|-------|------|--------| | 1 | 0.646419 | `sql_server_entra-admin_list` | ✅ **EXPECTED** | | 2 | 0.356025 | `sql_server_show` | ❌ | -| 3 | 0.322084 | `sql_server_list` | ❌ | +| 3 | 0.322155 | `sql_server_list` | ❌ | | 4 | 0.307823 | `sql_server_create` | ❌ | | 5 | 0.269788 | `sql_server_delete` | ❌ | --- -## Test 401 +## Test 411 **Expected Tool:** `sql_server_firewall-rule_create` **Prompt:** Create a firewall rule for my Azure SQL server @@ -7279,15 +7459,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.635466 | `sql_server_firewall-rule_create` | ✅ **EXPECTED** | -| 2 | 0.532712 | `sql_server_firewall-rule_list` | ❌ | -| 3 | 0.522184 | `sql_server_firewall-rule_delete` | ❌ | +| 1 | 0.635467 | `sql_server_firewall-rule_create` | ✅ **EXPECTED** | +| 2 | 0.532658 | `sql_server_firewall-rule_list` | ❌ | +| 3 | 0.522133 | `sql_server_firewall-rule_delete` | ❌ | | 4 | 0.448822 | `sql_server_create` | ❌ | | 5 | 0.440845 | `sql_server_delete` | ❌ | --- -## Test 402 +## Test 412 **Expected Tool:** `sql_server_firewall-rule_create` **Prompt:** Add a firewall rule to allow access from IP range to for SQL server @@ -7296,15 +7476,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.670233 | `sql_server_firewall-rule_create` | ✅ **EXPECTED** | -| 2 | 0.533669 | `sql_server_firewall-rule_list` | ❌ | -| 3 | 0.503500 | `sql_server_firewall-rule_delete` | ❌ | -| 4 | 0.316954 | `sql_server_list` | ❌ | -| 5 | 0.302510 | `sql_server_delete` | ❌ | +| 1 | 0.670392 | `sql_server_firewall-rule_create` | ✅ **EXPECTED** | +| 2 | 0.533587 | `sql_server_firewall-rule_list` | ❌ | +| 3 | 0.503740 | `sql_server_firewall-rule_delete` | ❌ | +| 4 | 0.316700 | `sql_server_list` | ❌ | +| 5 | 0.302273 | `sql_server_delete` | ❌ | --- -## Test 403 +## Test 413 **Expected Tool:** `sql_server_firewall-rule_create` **Prompt:** Create a new firewall rule named for SQL server @@ -7313,15 +7493,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.685107 | `sql_server_firewall-rule_create` | ✅ **EXPECTED** | -| 2 | 0.574336 | `sql_server_firewall-rule_list` | ❌ | -| 3 | 0.539577 | `sql_server_firewall-rule_delete` | ❌ | -| 4 | 0.428919 | `sql_server_create` | ❌ | -| 5 | 0.394446 | `sql_db_create` | ❌ | +| 1 | 0.685125 | `sql_server_firewall-rule_create` | ✅ **EXPECTED** | +| 2 | 0.574393 | `sql_server_firewall-rule_list` | ❌ | +| 3 | 0.539643 | `sql_server_firewall-rule_delete` | ❌ | +| 4 | 0.428987 | `sql_server_create` | ❌ | +| 5 | 0.395244 | `sql_db_create` | ❌ | --- -## Test 404 +## Test 414 **Expected Tool:** `sql_server_firewall-rule_delete` **Prompt:** Delete a firewall rule from my Azure SQL server @@ -7330,15 +7510,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.691421 | `sql_server_firewall-rule_delete` | ✅ **EXPECTED** | +| 1 | 0.691498 | `sql_server_firewall-rule_delete` | ✅ **EXPECTED** | | 2 | 0.584379 | `sql_server_delete` | ❌ | -| 3 | 0.543857 | `sql_server_firewall-rule_list` | ❌ | +| 3 | 0.543780 | `sql_server_firewall-rule_list` | ❌ | | 4 | 0.540333 | `sql_server_firewall-rule_create` | ❌ | | 5 | 0.498444 | `sql_db_delete` | ❌ | --- -## Test 405 +## Test 415 **Expected Tool:** `sql_server_firewall-rule_delete` **Prompt:** Remove the firewall rule from SQL server @@ -7347,15 +7527,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.670179 | `sql_server_firewall-rule_delete` | ✅ **EXPECTED** | -| 2 | 0.574340 | `sql_server_firewall-rule_list` | ❌ | +| 1 | 0.670233 | `sql_server_firewall-rule_delete` | ✅ **EXPECTED** | +| 2 | 0.574296 | `sql_server_firewall-rule_list` | ❌ | | 3 | 0.530419 | `sql_server_firewall-rule_create` | ❌ | | 4 | 0.488418 | `sql_server_delete` | ❌ | | 5 | 0.360381 | `sql_db_delete` | ❌ | --- -## Test 406 +## Test 416 **Expected Tool:** `sql_server_firewall-rule_delete` **Prompt:** Delete firewall rule for SQL server @@ -7364,15 +7544,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.671212 | `sql_server_firewall-rule_delete` | ✅ **EXPECTED** | -| 2 | 0.601230 | `sql_server_firewall-rule_list` | ❌ | +| 1 | 0.671298 | `sql_server_firewall-rule_delete` | ✅ **EXPECTED** | +| 2 | 0.601174 | `sql_server_firewall-rule_list` | ❌ | | 3 | 0.577330 | `sql_server_firewall-rule_create` | ❌ | | 4 | 0.499272 | `sql_server_delete` | ❌ | -| 5 | 0.378585 | `sql_db_delete` | ❌ | +| 5 | 0.378586 | `sql_db_delete` | ❌ | --- -## Test 407 +## Test 417 **Expected Tool:** `sql_server_firewall-rule_list` **Prompt:** List all firewall rules for SQL server @@ -7381,15 +7561,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.729372 | `sql_server_firewall-rule_list` | ✅ **EXPECTED** | +| 1 | 0.729336 | `sql_server_firewall-rule_list` | ✅ **EXPECTED** | | 2 | 0.549667 | `sql_server_firewall-rule_create` | ❌ | -| 3 | 0.513114 | `sql_server_firewall-rule_delete` | ❌ | +| 3 | 0.513187 | `sql_server_firewall-rule_delete` | ❌ | | 4 | 0.468812 | `sql_server_show` | ❌ | -| 5 | 0.418869 | `sql_server_list` | ❌ | +| 5 | 0.418817 | `sql_server_list` | ❌ | --- -## Test 408 +## Test 418 **Expected Tool:** `sql_server_firewall-rule_list` **Prompt:** Show me the firewall rules for SQL server @@ -7398,15 +7578,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.630731 | `sql_server_firewall-rule_list` | ✅ **EXPECTED** | +| 1 | 0.630671 | `sql_server_firewall-rule_list` | ✅ **EXPECTED** | | 2 | 0.524126 | `sql_server_firewall-rule_create` | ❌ | -| 3 | 0.476757 | `sql_server_firewall-rule_delete` | ❌ | +| 3 | 0.476792 | `sql_server_firewall-rule_delete` | ❌ | | 4 | 0.410680 | `sql_server_show` | ❌ | -| 5 | 0.348249 | `sql_server_list` | ❌ | +| 5 | 0.348100 | `sql_server_list` | ❌ | --- -## Test 409 +## Test 419 **Expected Tool:** `sql_server_firewall-rule_list` **Prompt:** What firewall rules are configured for my SQL server ? @@ -7415,15 +7595,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.630546 | `sql_server_firewall-rule_list` | ✅ **EXPECTED** | +| 1 | 0.630460 | `sql_server_firewall-rule_list` | ✅ **EXPECTED** | | 2 | 0.532454 | `sql_server_firewall-rule_create` | ❌ | -| 3 | 0.473501 | `sql_server_firewall-rule_delete` | ❌ | +| 3 | 0.473596 | `sql_server_firewall-rule_delete` | ❌ | | 4 | 0.412957 | `sql_server_show` | ❌ | -| 5 | 0.350545 | `sql_server_list` | ❌ | +| 5 | 0.350513 | `sql_server_list` | ❌ | --- -## Test 410 +## Test 420 **Expected Tool:** `sql_server_list` **Prompt:** List all Azure SQL servers in resource group @@ -7432,15 +7612,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.694268 | `sql_server_list` | ✅ **EXPECTED** | -| 2 | 0.596720 | `mysql_server_list` | ❌ | -| 3 | 0.578135 | `sql_db_list` | ❌ | +| 1 | 0.694404 | `sql_server_list` | ✅ **EXPECTED** | +| 2 | 0.596686 | `mysql_server_list` | ❌ | +| 3 | 0.578238 | `sql_db_list` | ❌ | | 4 | 0.515851 | `sql_elastic-pool_list` | ❌ | | 5 | 0.509789 | `sql_db_show` | ❌ | --- -## Test 411 +## Test 421 **Expected Tool:** `sql_server_list` **Prompt:** Show me every SQL server available in resource group @@ -7449,15 +7629,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.618206 | `sql_server_list` | ✅ **EXPECTED** | -| 2 | 0.593874 | `mysql_server_list` | ❌ | -| 3 | 0.542307 | `sql_db_list` | ❌ | -| 4 | 0.507683 | `resourcehealth_availability-status_list` | ❌ | +| 1 | 0.618218 | `sql_server_list` | ✅ **EXPECTED** | +| 2 | 0.593837 | `mysql_server_list` | ❌ | +| 3 | 0.542398 | `sql_db_list` | ❌ | +| 4 | 0.507404 | `resourcehealth_availability-status_list` | ❌ | | 5 | 0.496200 | `group_list` | ❌ | --- -## Test 412 +## Test 422 **Expected Tool:** `sql_server_show` **Prompt:** Show me the details of Azure SQL server in resource group @@ -7468,13 +7648,13 @@ |------|-------|------|--------| | 1 | 0.629672 | `sql_db_show` | ❌ | | 2 | 0.595184 | `sql_server_show` | ✅ **EXPECTED** | -| 3 | 0.587826 | `sql_server_list` | ❌ | -| 4 | 0.559936 | `mysql_server_list` | ❌ | -| 5 | 0.540037 | `sql_db_list` | ❌ | +| 3 | 0.587728 | `sql_server_list` | ❌ | +| 4 | 0.559893 | `mysql_server_list` | ❌ | +| 5 | 0.540218 | `sql_db_list` | ❌ | --- -## Test 413 +## Test 423 **Expected Tool:** `sql_server_show` **Prompt:** Get the configuration details for SQL server @@ -7487,11 +7667,11 @@ | 2 | 0.610507 | `postgres_server_config_get` | ❌ | | 3 | 0.538034 | `mysql_server_config_get` | ❌ | | 4 | 0.471541 | `sql_db_show` | ❌ | -| 5 | 0.445430 | `postgres_server_param_get` | ❌ | +| 5 | 0.445432 | `postgres_server_param_get` | ❌ | --- -## Test 414 +## Test 424 **Expected Tool:** `sql_server_show` **Prompt:** Display the properties of SQL server @@ -7502,13 +7682,13 @@ |------|-------|------|--------| | 1 | 0.563143 | `sql_server_show` | ✅ **EXPECTED** | | 2 | 0.392532 | `postgres_server_config_get` | ❌ | -| 3 | 0.380021 | `postgres_server_param_get` | ❌ | -| 4 | 0.372194 | `sql_server_firewall-rule_list` | ❌ | +| 3 | 0.380035 | `postgres_server_param_get` | ❌ | +| 4 | 0.372102 | `sql_server_firewall-rule_list` | ❌ | | 5 | 0.370539 | `sql_db_show` | ❌ | --- -## Test 415 +## Test 425 **Expected Tool:** `storage_account_create` **Prompt:** Create a new storage account called testaccount123 in East US region @@ -7519,13 +7699,13 @@ |------|-------|------|--------| | 1 | 0.533552 | `storage_account_create` | ✅ **EXPECTED** | | 2 | 0.438046 | `storage_blob_container_create` | ❌ | -| 3 | 0.418002 | `storage_account_get` | ❌ | -| 4 | 0.414518 | `storage_blob_container_get` | ❌ | -| 5 | 0.370957 | `managedlustre_fs_create` | ❌ | +| 3 | 0.418191 | `storage_account_get` | ❌ | +| 4 | 0.413950 | `storage_blob_container_get` | ❌ | +| 5 | 0.373651 | `managedlustre_fs_create` | ❌ | --- -## Test 416 +## Test 426 **Expected Tool:** `storage_account_create` **Prompt:** Create a storage account with premium performance and LRS replication @@ -7535,14 +7715,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.500638 | `storage_account_create` | ✅ **EXPECTED** | -| 2 | 0.483202 | `managedlustre_fs_create` | ❌ | -| 3 | 0.407182 | `storage_account_get` | ❌ | +| 2 | 0.484584 | `managedlustre_fs_create` | ❌ | +| 3 | 0.407222 | `storage_account_get` | ❌ | | 4 | 0.406804 | `storage_blob_container_create` | ❌ | -| 5 | 0.400151 | `managedlustre_fs_sku_get` | ❌ | +| 5 | 0.400134 | `managedlustre_fs_sku_get` | ❌ | --- -## Test 417 +## Test 427 **Expected Tool:** `storage_account_create` **Prompt:** Create a new storage account with Data Lake Storage Gen2 enabled @@ -7551,15 +7731,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.589003 | `storage_account_create` | ✅ **EXPECTED** | -| 2 | 0.535501 | `managedlustre_fs_create` | ❌ | +| 1 | 0.589002 | `storage_account_create` | ✅ **EXPECTED** | +| 2 | 0.538023 | `managedlustre_fs_create` | ❌ | | 3 | 0.509731 | `storage_blob_container_create` | ❌ | -| 4 | 0.462494 | `storage_account_get` | ❌ | -| 5 | 0.447560 | `sql_db_create` | ❌ | +| 4 | 0.462519 | `storage_account_get` | ❌ | +| 5 | 0.447156 | `sql_db_create` | ❌ | --- -## Test 418 +## Test 428 **Expected Tool:** `storage_account_get` **Prompt:** Show me the details for my storage account @@ -7568,15 +7748,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.673569 | `storage_account_get` | ✅ **EXPECTED** | -| 2 | 0.608073 | `storage_blob_container_get` | ❌ | -| 3 | 0.556407 | `storage_blob_get` | ❌ | -| 4 | 0.483573 | `storage_account_create` | ❌ | -| 5 | 0.439109 | `cosmos_account_list` | ❌ | +| 1 | 0.673750 | `storage_account_get` | ✅ **EXPECTED** | +| 2 | 0.607762 | `storage_blob_container_get` | ❌ | +| 3 | 0.556457 | `storage_blob_get` | ❌ | +| 4 | 0.483435 | `storage_account_create` | ❌ | +| 5 | 0.439236 | `cosmos_account_list` | ❌ | --- -## Test 419 +## Test 429 **Expected Tool:** `storage_account_get` **Prompt:** Get details about the storage account @@ -7585,15 +7765,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.692473 | `storage_account_get` | ✅ **EXPECTED** | -| 2 | 0.577547 | `storage_blob_container_get` | ❌ | +| 1 | 0.692687 | `storage_account_get` | ✅ **EXPECTED** | +| 2 | 0.577173 | `storage_blob_container_get` | ❌ | | 3 | 0.529205 | `storage_blob_get` | ❌ | | 4 | 0.518215 | `storage_account_create` | ❌ | -| 5 | 0.448507 | `storage_blob_container_create` | ❌ | +| 5 | 0.448506 | `storage_blob_container_create` | ❌ | --- -## Test 420 +## Test 430 **Expected Tool:** `storage_account_get` **Prompt:** List all storage accounts in my subscription including their location and SKU @@ -7602,15 +7782,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.649393 | `storage_account_get` | ✅ **EXPECTED** | -| 2 | 0.557016 | `managedlustre_fs_sku_get` | ❌ | -| 3 | 0.550148 | `storage_blob_container_get` | ❌ | +| 1 | 0.649215 | `storage_account_get` | ✅ **EXPECTED** | +| 2 | 0.557093 | `managedlustre_fs_sku_get` | ❌ | +| 3 | 0.549448 | `storage_blob_container_get` | ❌ | | 4 | 0.547577 | `subscription_list` | ❌ | | 5 | 0.536909 | `cosmos_account_list` | ❌ | --- -## Test 421 +## Test 431 **Expected Tool:** `storage_account_get` **Prompt:** Show me my storage accounts with whether hierarchical namespace (HNS) is enabled @@ -7619,15 +7799,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.557064 | `storage_account_get` | ✅ **EXPECTED** | -| 2 | 0.482418 | `storage_blob_container_get` | ❌ | -| 3 | 0.461308 | `managedlustre_fs_list` | ❌ | +| 1 | 0.556860 | `storage_account_get` | ✅ **EXPECTED** | +| 2 | 0.481664 | `storage_blob_container_get` | ❌ | +| 3 | 0.461284 | `managedlustre_fs_list` | ❌ | | 4 | 0.421642 | `cosmos_account_list` | ❌ | | 5 | 0.410587 | `storage_blob_get` | ❌ | --- -## Test 422 +## Test 432 **Expected Tool:** `storage_account_get` **Prompt:** Show me the storage accounts in my subscription and include HTTPS-only and public blob access settings @@ -7636,15 +7816,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.619639 | `storage_account_get` | ✅ **EXPECTED** | -| 2 | 0.556436 | `storage_blob_container_get` | ❌ | +| 1 | 0.619462 | `storage_account_get` | ✅ **EXPECTED** | +| 2 | 0.555677 | `storage_blob_container_get` | ❌ | | 3 | 0.518229 | `storage_blob_get` | ❌ | | 4 | 0.473598 | `cosmos_account_list` | ❌ | | 5 | 0.465527 | `subscription_list` | ❌ | --- -## Test 423 +## Test 433 **Expected Tool:** `storage_blob_container_create` **Prompt:** Create the storage container mycontainer in storage account @@ -7654,14 +7834,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.649793 | `storage_blob_container_create` | ✅ **EXPECTED** | -| 2 | 0.583896 | `storage_blob_container_get` | ❌ | +| 2 | 0.585556 | `storage_blob_container_get` | ❌ | | 3 | 0.524779 | `storage_account_create` | ❌ | | 4 | 0.496679 | `storage_blob_get` | ❌ | | 5 | 0.447784 | `cosmos_database_container_list` | ❌ | --- -## Test 424 +## Test 434 **Expected Tool:** `storage_blob_container_create` **Prompt:** Create the container using blob public access in storage account @@ -7671,14 +7851,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.682161 | `storage_blob_container_create` | ✅ **EXPECTED** | -| 2 | 0.590160 | `storage_blob_container_get` | ❌ | -| 3 | 0.559263 | `storage_blob_get` | ❌ | -| 4 | 0.500624 | `storage_account_create` | ❌ | -| 5 | 0.420434 | `storage_account_get` | ❌ | +| 2 | 0.590826 | `storage_blob_container_get` | ❌ | +| 3 | 0.559264 | `storage_blob_get` | ❌ | +| 4 | 0.500625 | `storage_account_create` | ❌ | +| 5 | 0.420514 | `storage_account_get` | ❌ | --- -## Test 425 +## Test 435 **Expected Tool:** `storage_blob_container_create` **Prompt:** Create a new blob container named documents with container public access in storage account @@ -7688,14 +7868,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.625397 | `storage_blob_container_create` | ✅ **EXPECTED** | -| 2 | 0.543503 | `storage_blob_container_get` | ❌ | +| 2 | 0.544024 | `storage_blob_container_get` | ❌ | | 3 | 0.497804 | `storage_blob_get` | ❌ | | 4 | 0.463198 | `storage_account_create` | ❌ | | 5 | 0.435099 | `cosmos_database_container_list` | ❌ | --- -## Test 426 +## Test 436 **Expected Tool:** `storage_blob_container_get` **Prompt:** Show me the properties of the storage container in the storage account @@ -7704,15 +7884,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.701642 | `storage_blob_container_get` | ✅ **EXPECTED** | +| 1 | 0.703348 | `storage_blob_container_get` | ✅ **EXPECTED** | | 2 | 0.623681 | `storage_blob_get` | ❌ | -| 3 | 0.577740 | `storage_account_get` | ❌ | -| 4 | 0.549803 | `storage_blob_container_create` | ❌ | -| 5 | 0.523288 | `cosmos_database_container_list` | ❌ | +| 3 | 0.577921 | `storage_account_get` | ❌ | +| 4 | 0.549804 | `storage_blob_container_create` | ❌ | +| 5 | 0.523289 | `cosmos_database_container_list` | ❌ | --- -## Test 427 +## Test 437 **Expected Tool:** `storage_blob_container_get` **Prompt:** List all blob containers in the storage account @@ -7721,15 +7901,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.712037 | `storage_blob_container_get` | ✅ **EXPECTED** | +| 1 | 0.712012 | `storage_blob_container_get` | ✅ **EXPECTED** | | 2 | 0.680802 | `storage_blob_get` | ❌ | | 3 | 0.613933 | `cosmos_database_container_list` | ❌ | | 4 | 0.556319 | `storage_blob_container_create` | ❌ | -| 5 | 0.518401 | `storage_account_get` | ❌ | +| 5 | 0.518266 | `storage_account_get` | ❌ | --- -## Test 428 +## Test 438 **Expected Tool:** `storage_blob_container_get` **Prompt:** Show me the containers in the storage account @@ -7738,15 +7918,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.713527 | `storage_blob_container_get` | ✅ **EXPECTED** | +| 1 | 0.713080 | `storage_blob_container_get` | ✅ **EXPECTED** | | 2 | 0.592373 | `cosmos_database_container_list` | ❌ | | 3 | 0.586169 | `storage_blob_get` | ❌ | -| 4 | 0.523353 | `storage_account_get` | ❌ | -| 5 | 0.487521 | `storage_blob_container_create` | ❌ | +| 4 | 0.523322 | `storage_account_get` | ❌ | +| 5 | 0.487520 | `storage_blob_container_create` | ❌ | --- -## Test 429 +## Test 439 **Expected Tool:** `storage_blob_get` **Prompt:** Show me the properties for blob in container in storage account @@ -7755,15 +7935,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.700969 | `storage_blob_get` | ✅ **EXPECTED** | -| 2 | 0.647029 | `storage_blob_container_get` | ❌ | -| 3 | 0.541060 | `storage_blob_container_create` | ❌ | -| 4 | 0.527327 | `storage_account_get` | ❌ | -| 5 | 0.477993 | `cosmos_database_container_list` | ❌ | +| 1 | 0.700963 | `storage_blob_get` | ✅ **EXPECTED** | +| 2 | 0.648279 | `storage_blob_container_get` | ❌ | +| 3 | 0.540987 | `storage_blob_container_create` | ❌ | +| 4 | 0.527363 | `storage_account_get` | ❌ | +| 5 | 0.477959 | `cosmos_database_container_list` | ❌ | --- -## Test 430 +## Test 440 **Expected Tool:** `storage_blob_get` **Prompt:** Get the details about blob in the container in storage account @@ -7773,14 +7953,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.694997 | `storage_blob_get` | ✅ **EXPECTED** | -| 2 | 0.631161 | `storage_blob_container_get` | ❌ | -| 3 | 0.589152 | `storage_blob_container_create` | ❌ | -| 4 | 0.579989 | `storage_account_get` | ❌ | +| 2 | 0.633397 | `storage_blob_container_get` | ❌ | +| 3 | 0.589151 | `storage_blob_container_create` | ❌ | +| 4 | 0.580226 | `storage_account_get` | ❌ | | 5 | 0.457038 | `storage_account_create` | ❌ | --- -## Test 431 +## Test 441 **Expected Tool:** `storage_blob_get` **Prompt:** List all blobs in the blob container in the storage account @@ -7790,14 +7970,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.733586 | `storage_blob_get` | ✅ **EXPECTED** | -| 2 | 0.700891 | `storage_blob_container_get` | ❌ | +| 2 | 0.702342 | `storage_blob_container_get` | ❌ | | 3 | 0.605993 | `storage_blob_container_create` | ❌ | | 4 | 0.579070 | `cosmos_database_container_list` | ❌ | -| 5 | 0.506792 | `cosmos_database_container_item_query` | ❌ | +| 5 | 0.506639 | `cosmos_database_container_item_query` | ❌ | --- -## Test 432 +## Test 442 **Expected Tool:** `storage_blob_get` **Prompt:** Show me the blobs in the blob container in the storage account @@ -7807,14 +7987,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.704426 | `storage_blob_get` | ✅ **EXPECTED** | -| 2 | 0.664940 | `storage_blob_container_get` | ❌ | +| 2 | 0.666342 | `storage_blob_container_get` | ❌ | | 3 | 0.561557 | `storage_blob_container_create` | ❌ | | 4 | 0.533515 | `cosmos_database_container_list` | ❌ | -| 5 | 0.484052 | `cosmos_database_container_item_query` | ❌ | +| 5 | 0.484018 | `storage_account_get` | ❌ | --- -## Test 433 +## Test 443 **Expected Tool:** `storage_blob_upload` **Prompt:** Upload file to storage blob in container in storage account @@ -7823,15 +8003,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.566280 | `storage_blob_upload` | ✅ **EXPECTED** | -| 2 | 0.525689 | `storage_blob_container_create` | ❌ | -| 3 | 0.517628 | `storage_blob_get` | ❌ | -| 4 | 0.473667 | `storage_blob_container_get` | ❌ | -| 5 | 0.382148 | `storage_account_create` | ❌ | +| 1 | 0.566278 | `storage_blob_upload` | ✅ **EXPECTED** | +| 2 | 0.525685 | `storage_blob_container_create` | ❌ | +| 3 | 0.517524 | `storage_blob_get` | ❌ | +| 4 | 0.474395 | `storage_blob_container_get` | ❌ | +| 5 | 0.382007 | `storage_account_create` | ❌ | --- -## Test 434 +## Test 444 **Expected Tool:** `subscription_list` **Prompt:** List all subscriptions for my account @@ -7842,13 +8022,13 @@ |------|-------|------|--------| | 1 | 0.654048 | `subscription_list` | ✅ **EXPECTED** | | 2 | 0.512964 | `cosmos_account_list` | ❌ | -| 3 | 0.471615 | `postgres_server_list` | ❌ | +| 3 | 0.471653 | `postgres_server_list` | ❌ | | 4 | 0.469023 | `kusto_cluster_list` | ❌ | | 5 | 0.461078 | `redis_list` | ❌ | --- -## Test 435 +## Test 445 **Expected Tool:** `subscription_list` **Prompt:** Show me my subscriptions @@ -7858,14 +8038,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.458834 | `subscription_list` | ✅ **EXPECTED** | -| 2 | 0.407471 | `eventgrid_subscription_list` | ❌ | -| 3 | 0.393695 | `eventgrid_topic_list` | ❌ | +| 2 | 0.407101 | `eventgrid_subscription_list` | ❌ | +| 3 | 0.393662 | `eventgrid_topic_list` | ❌ | | 4 | 0.391555 | `redis_list` | ❌ | -| 5 | 0.381219 | `postgres_server_list` | ❌ | +| 5 | 0.381238 | `postgres_server_list` | ❌ | --- -## Test 436 +## Test 446 **Expected Tool:** `subscription_list` **Prompt:** What is my current subscription? @@ -7876,13 +8056,13 @@ |------|-------|------|--------| | 1 | 0.433242 | `subscription_list` | ✅ **EXPECTED** | | 2 | 0.319579 | `marketplace_product_list` | ❌ | -| 3 | 0.315354 | `marketplace_product_get` | ❌ | -| 4 | 0.293772 | `eventgrid_subscription_list` | ❌ | -| 5 | 0.289334 | `eventgrid_topic_list` | ❌ | +| 3 | 0.315547 | `marketplace_product_get` | ❌ | +| 4 | 0.293009 | `eventgrid_subscription_list` | ❌ | +| 5 | 0.289280 | `eventgrid_topic_list` | ❌ | --- -## Test 437 +## Test 447 **Expected Tool:** `subscription_list` **Prompt:** What subscriptions do I have? @@ -7892,14 +8072,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.477657 | `subscription_list` | ✅ **EXPECTED** | -| 2 | 0.357625 | `eventgrid_subscription_list` | ❌ | +| 2 | 0.356775 | `eventgrid_subscription_list` | ❌ | | 3 | 0.354286 | `marketplace_product_list` | ❌ | | 4 | 0.344549 | `redis_list` | ❌ | -| 5 | 0.340837 | `eventgrid_topic_list` | ❌ | +| 5 | 0.340764 | `eventgrid_topic_list` | ❌ | --- -## Test 438 +## Test 448 **Expected Tool:** `azureterraformbestpractices_get` **Prompt:** Fetch the Azure Terraform best practices @@ -7908,15 +8088,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.686949 | `azureterraformbestpractices_get` | ✅ **EXPECTED** | +| 1 | 0.686886 | `azureterraformbestpractices_get` | ✅ **EXPECTED** | | 2 | 0.625270 | `deploy_iac_rules_get` | ❌ | -| 3 | 0.605047 | `get_bestpractices_get` | ❌ | -| 4 | 0.482936 | `deploy_pipeline_guidance_get` | ❌ | -| 5 | 0.466199 | `deploy_plan_get` | ❌ | +| 3 | 0.605048 | `get_bestpractices_get` | ❌ | +| 4 | 0.482745 | `deploy_pipeline_guidance_get` | ❌ | +| 5 | 0.468390 | `azureaibestpractices_get` | ❌ | --- -## Test 439 +## Test 449 **Expected Tool:** `azureterraformbestpractices_get` **Prompt:** Show me the Azure Terraform best practices and generate code sample to get a secret from Azure Key Vault @@ -7925,15 +8105,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.581332 | `azureterraformbestpractices_get` | ✅ **EXPECTED** | +| 1 | 0.581316 | `azureterraformbestpractices_get` | ✅ **EXPECTED** | | 2 | 0.512141 | `get_bestpractices_get` | ❌ | -| 3 | 0.510004 | `deploy_iac_rules_get` | ❌ | -| 4 | 0.473596 | `keyvault_secret_get` | ❌ | -| 5 | 0.444297 | `deploy_pipeline_guidance_get` | ❌ | +| 3 | 0.510005 | `deploy_iac_rules_get` | ❌ | +| 4 | 0.473943 | `keyvault_secret_get` | ❌ | +| 5 | 0.451726 | `azureaibestpractices_get` | ❌ | --- -## Test 440 +## Test 450 **Expected Tool:** `virtualdesktop_hostpool_list` **Prompt:** List all host pools in my subscription @@ -7942,15 +8122,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.711969 | `virtualdesktop_hostpool_list` | ✅ **EXPECTED** | +| 1 | 0.711905 | `virtualdesktop_hostpool_list` | ✅ **EXPECTED** | | 2 | 0.659763 | `virtualdesktop_hostpool_host_list` | ❌ | -| 3 | 0.620666 | `kusto_cluster_list` | ❌ | -| 4 | 0.548888 | `search_service_list` | ❌ | -| 5 | 0.535777 | `virtualdesktop_hostpool_host_user-list` | ❌ | +| 3 | 0.620665 | `kusto_cluster_list` | ❌ | +| 4 | 0.546744 | `search_service_list` | ❌ | +| 5 | 0.536423 | `virtualdesktop_hostpool_host_user-list` | ❌ | --- -## Test 441 +## Test 451 **Expected Tool:** `virtualdesktop_hostpool_host_list` **Prompt:** List all session hosts in host pool @@ -7960,14 +8140,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.727054 | `virtualdesktop_hostpool_host_list` | ✅ **EXPECTED** | -| 2 | 0.714553 | `virtualdesktop_hostpool_host_user-list` | ❌ | -| 3 | 0.573352 | `virtualdesktop_hostpool_list` | ❌ | +| 2 | 0.715572 | `virtualdesktop_hostpool_host_user-list` | ❌ | +| 3 | 0.573350 | `virtualdesktop_hostpool_list` | ❌ | | 4 | 0.438659 | `aks_nodepool_get` | ❌ | | 5 | 0.393721 | `sql_elastic-pool_list` | ❌ | --- -## Test 442 +## Test 452 **Expected Tool:** `virtualdesktop_hostpool_host_user-list` **Prompt:** List all user sessions on session host in host pool @@ -7976,15 +8156,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.812787 | `virtualdesktop_hostpool_host_user-list` | ✅ **EXPECTED** | -| 2 | 0.659212 | `virtualdesktop_hostpool_host_list` | ❌ | -| 3 | 0.501167 | `virtualdesktop_hostpool_list` | ❌ | +| 1 | 0.813311 | `virtualdesktop_hostpool_host_user-list` | ✅ **EXPECTED** | +| 2 | 0.659213 | `virtualdesktop_hostpool_host_list` | ❌ | +| 3 | 0.501113 | `virtualdesktop_hostpool_list` | ❌ | | 4 | 0.357561 | `aks_nodepool_get` | ❌ | -| 5 | 0.336385 | `monitor_workspace_list` | ❌ | +| 5 | 0.336576 | `monitor_workspace_list` | ❌ | --- -## Test 443 +## Test 453 **Expected Tool:** `workbooks_create` **Prompt:** Create a new workbook named @@ -7996,12 +8176,12 @@ | 1 | 0.552212 | `workbooks_create` | ✅ **EXPECTED** | | 2 | 0.417950 | `workbooks_update` | ❌ | | 3 | 0.361364 | `workbooks_delete` | ❌ | -| 4 | 0.329118 | `workbooks_show` | ❌ | -| 5 | 0.328063 | `workbooks_list` | ❌ | +| 4 | 0.329077 | `workbooks_show` | ❌ | +| 5 | 0.328113 | `workbooks_list` | ❌ | --- -## Test 444 +## Test 454 **Expected Tool:** `workbooks_delete` **Prompt:** Delete the workbook with resource ID @@ -8011,14 +8191,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.621310 | `workbooks_delete` | ✅ **EXPECTED** | -| 2 | 0.498518 | `workbooks_show` | ❌ | +| 2 | 0.498506 | `workbooks_show` | ❌ | | 3 | 0.432454 | `workbooks_create` | ❌ | -| 4 | 0.425484 | `workbooks_list` | ❌ | +| 4 | 0.425569 | `workbooks_list` | ❌ | | 5 | 0.421897 | `workbooks_update` | ❌ | --- -## Test 445 +## Test 455 **Expected Tool:** `workbooks_list` **Prompt:** List all workbooks in my resource group @@ -8027,15 +8207,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.772184 | `workbooks_list` | ✅ **EXPECTED** | -| 2 | 0.562485 | `workbooks_create` | ❌ | -| 3 | 0.516739 | `grafana_list` | ❌ | -| 4 | 0.494073 | `workbooks_show` | ❌ | -| 5 | 0.488600 | `group_list` | ❌ | +| 1 | 0.772404 | `workbooks_list` | ✅ **EXPECTED** | +| 2 | 0.562476 | `workbooks_create` | ❌ | +| 3 | 0.516733 | `grafana_list` | ❌ | +| 4 | 0.493962 | `workbooks_show` | ❌ | +| 5 | 0.488522 | `group_list` | ❌ | --- -## Test 446 +## Test 456 **Expected Tool:** `workbooks_list` **Prompt:** What workbooks do I have in resource group ? @@ -8044,15 +8224,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.708236 | `workbooks_list` | ✅ **EXPECTED** | -| 2 | 0.570259 | `workbooks_create` | ❌ | -| 3 | 0.499716 | `workbooks_show` | ❌ | +| 1 | 0.708612 | `workbooks_list` | ✅ **EXPECTED** | +| 2 | 0.570260 | `workbooks_create` | ❌ | +| 3 | 0.499633 | `workbooks_show` | ❌ | | 4 | 0.485504 | `workbooks_delete` | ❌ | | 5 | 0.472378 | `grafana_list` | ❌ | --- -## Test 447 +## Test 457 **Expected Tool:** `workbooks_show` **Prompt:** Get information about the workbook with resource ID @@ -8061,15 +8241,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.686087 | `workbooks_show` | ✅ **EXPECTED** | +| 1 | 0.686095 | `workbooks_show` | ✅ **EXPECTED** | | 2 | 0.498390 | `workbooks_create` | ❌ | -| 3 | 0.494492 | `workbooks_list` | ❌ | +| 3 | 0.494708 | `workbooks_list` | ❌ | | 4 | 0.463156 | `workbooks_update` | ❌ | | 5 | 0.452348 | `workbooks_delete` | ❌ | --- -## Test 448 +## Test 458 **Expected Tool:** `workbooks_show` **Prompt:** Show me the workbook with resource ID @@ -8078,15 +8258,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.581501 | `workbooks_show` | ✅ **EXPECTED** | -| 2 | 0.500190 | `workbooks_list` | ❌ | +| 1 | 0.581575 | `workbooks_show` | ✅ **EXPECTED** | +| 2 | 0.500475 | `workbooks_list` | ❌ | | 3 | 0.468996 | `workbooks_create` | ❌ | | 4 | 0.466266 | `workbooks_update` | ❌ | | 5 | 0.455311 | `workbooks_delete` | ❌ | --- -## Test 449 +## Test 459 **Expected Tool:** `workbooks_update` **Prompt:** Update the workbook with a new text step @@ -8098,12 +8278,12 @@ | 1 | 0.586347 | `workbooks_update` | ✅ **EXPECTED** | | 2 | 0.382651 | `workbooks_create` | ❌ | | 3 | 0.349689 | `workbooks_delete` | ❌ | -| 4 | 0.347944 | `workbooks_show` | ❌ | +| 4 | 0.347778 | `workbooks_show` | ❌ | | 5 | 0.292904 | `loadtesting_testrun_update` | ❌ | --- -## Test 450 +## Test 460 **Expected Tool:** `bicepschema_get` **Prompt:** How can I use Bicep to create an Azure OpenAI service? @@ -8112,15 +8292,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.543154 | `bicepschema_get` | ✅ **EXPECTED** | +| 1 | 0.543803 | `bicepschema_get` | ✅ **EXPECTED** | | 2 | 0.485970 | `foundry_models_deploy` | ❌ | | 3 | 0.485889 | `deploy_iac_rules_get` | ❌ | -| 4 | 0.453282 | `foundry_openai_embeddings-create` | ❌ | -| 5 | 0.448373 | `get_bestpractices_get` | ❌ | +| 4 | 0.468898 | `azureaibestpractices_get` | ❌ | +| 5 | 0.453412 | `foundry_openai_embeddings-create` | ❌ | --- -## Test 451 +## Test 461 **Expected Tool:** `cloudarchitect_design` **Prompt:** Please help me design an architecture for a large-scale file upload, storage, and retrieval service @@ -8131,13 +8311,13 @@ |------|-------|------|--------| | 1 | 0.502125 | `cloudarchitect_design` | ✅ **EXPECTED** | | 2 | 0.290902 | `storage_blob_upload` | ❌ | -| 3 | 0.259162 | `managedlustre_fs_create` | ❌ | -| 4 | 0.254853 | `deploy_architecture_diagram_generate` | ❌ | +| 3 | 0.260101 | `managedlustre_fs_create` | ❌ | +| 4 | 0.254991 | `deploy_architecture_diagram_generate` | ❌ | | 5 | 0.245034 | `managedlustre_fs_subnetsize_validate` | ❌ | --- -## Test 452 +## Test 462 **Expected Tool:** `cloudarchitect_design` **Prompt:** Help me design an Azure cloud service that will serve as an ATM for users @@ -8147,14 +8327,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.508153 | `cloudarchitect_design` | ✅ **EXPECTED** | -| 2 | 0.377584 | `deploy_architecture_diagram_generate` | ❌ | -| 3 | 0.341462 | `deploy_pipeline_guidance_get` | ❌ | -| 4 | 0.328747 | `get_bestpractices_get` | ❌ | -| 5 | 0.321855 | `deploy_plan_get` | ❌ | +| 2 | 0.377941 | `deploy_architecture_diagram_generate` | ❌ | +| 3 | 0.341316 | `deploy_pipeline_guidance_get` | ❌ | +| 4 | 0.336385 | `azureaibestpractices_get` | ❌ | +| 5 | 0.328747 | `get_bestpractices_get` | ❌ | --- -## Test 453 +## Test 463 **Expected Tool:** `cloudarchitect_design` **Prompt:** I want to design a cloud app for ordering groceries @@ -8164,14 +8344,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.423577 | `cloudarchitect_design` | ✅ **EXPECTED** | -| 2 | 0.271943 | `deploy_pipeline_guidance_get` | ❌ | -| 3 | 0.265632 | `deploy_architecture_diagram_generate` | ❌ | +| 2 | 0.271869 | `deploy_pipeline_guidance_get` | ❌ | +| 3 | 0.265972 | `deploy_architecture_diagram_generate` | ❌ | | 4 | 0.242581 | `deploy_plan_get` | ❌ | -| 5 | 0.229008 | `extension_cli_generate` | ❌ | +| 5 | 0.241197 | `azureaibestpractices_get` | ❌ | --- -## Test 454 +## Test 464 **Expected Tool:** `cloudarchitect_design` **Prompt:** How can I design a cloud service in Azure that will store and present videos for users? @@ -8181,38 +8361,38 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.534690 | `cloudarchitect_design` | ✅ **EXPECTED** | -| 2 | 0.369969 | `deploy_pipeline_guidance_get` | ❌ | -| 3 | 0.356331 | `managedlustre_fs_create` | ❌ | -| 4 | 0.352914 | `deploy_architecture_diagram_generate` | ❌ | -| 5 | 0.323920 | `storage_blob_upload` | ❌ | +| 2 | 0.369872 | `deploy_pipeline_guidance_get` | ❌ | +| 3 | 0.357808 | `managedlustre_fs_create` | ❌ | +| 4 | 0.352797 | `deploy_architecture_diagram_generate` | ❌ | +| 5 | 0.324217 | `azureaibestpractices_get` | ❌ | --- ## Summary -**Total Prompts Tested:** 454 -**Analysis Execution Time:** 61.2275421s +**Total Prompts Tested:** 464 +**Analysis Execution Time:** 186.7791311s ### Success Rate Metrics -**Top Choice Success:** 92.1% (418/454 tests) +**Top Choice Success:** 92.2% (428/464 tests) #### Confidence Level Distribution -**💪 Very High Confidence (≥0.8):** 3.3% (15/454 tests) -**🎯 High Confidence (≥0.7):** 23.3% (106/454 tests) -**✅ Good Confidence (≥0.6):** 62.3% (283/454 tests) -**👍 Fair Confidence (≥0.5):** 92.3% (419/454 tests) -**👌 Acceptable Confidence (≥0.4):** 99.6% (452/454 tests) -**❌ Low Confidence (<0.4):** 0.4% (2/454 tests) +**💪 Very High Confidence (≥0.8):** 3.2% (15/464 tests) +**🎯 High Confidence (≥0.7):** 22.8% (106/464 tests) +**✅ Good Confidence (≥0.6):** 62.3% (289/464 tests) +**👍 Fair Confidence (≥0.5):** 92.2% (428/464 tests) +**👌 Acceptable Confidence (≥0.4):** 99.6% (462/464 tests) +**❌ Low Confidence (<0.4):** 0.4% (2/464 tests) #### Top Choice + Confidence Combinations -**💪 Top Choice + Very High Confidence (≥0.8):** 3.3% (15/454 tests) -**🎯 Top Choice + High Confidence (≥0.7):** 23.3% (106/454 tests) -**✅ Top Choice + Good Confidence (≥0.6):** 60.6% (275/454 tests) -**👍 Top Choice + Fair Confidence (≥0.5):** 86.8% (394/454 tests) -**👌 Top Choice + Acceptable Confidence (≥0.4):** 92.1% (418/454 tests) +**💪 Top Choice + Very High Confidence (≥0.8):** 3.2% (15/464 tests) +**🎯 Top Choice + High Confidence (≥0.7):** 22.8% (106/464 tests) +**✅ Top Choice + Good Confidence (≥0.6):** 60.3% (280/464 tests) +**👍 Top Choice + Fair Confidence (≥0.5):** 86.9% (403/464 tests) +**👌 Top Choice + Acceptable Confidence (≥0.4):** 92.2% (428/464 tests) ### Success Rate Analysis diff --git a/eng/tools/ToolDescriptionEvaluator/tools.json b/eng/tools/ToolDescriptionEvaluator/tools.json index 9037a63bd..d83b47f83 100644 --- a/eng/tools/ToolDescriptionEvaluator/tools.json +++ b/eng/tools/ToolDescriptionEvaluator/tools.json @@ -896,6 +896,12 @@ } ] }, + { + "name": "get", + "description": "Returns best practices and code generation guidance for building AI applications in Azure. \r\n Use this tool when you need recommendations on how to write code for AI agents, chatbots, workflows, or other AI features.\r\n This tool also provides guidance for code generation using the Azure resources (e.g. Azure AI Foundry) for application development only. \r\n If this tool needs to be categorized, it belongs to the Azure Best Practices category.", + "command": "azureaibestpractices get", + "option": [] + }, { "name": "get", "description": "Returns Terraform best practices for Azure. Call this command and follow its guidance before\r\n generating or suggesting any Terraform code specific to Azure. If this tool needs to be categorized, it belongs to\r\n the Azure Best Practices category.", @@ -3124,6 +3130,79 @@ } ] }, + { + "name": "create", + "description": " Creates an AI Foundry Agent that processes messages according to a given system instruction using an existing AI Foundry model deployment.", + "command": "foundry agents create", + "option": [ + { + "name": "--tenant", + "description": "The Microsoft Entra ID tenant ID or name. This can be either the GUID identifier or the display name of your Entra ID tenant.", + "type": "string", + "required": null + }, + { + "name": "--auth-method", + "description": "Authentication method to use. Options: 'credential' (Azure CLI/managed identity), 'key' (access key), or 'connectionString'.", + "type": "string", + "required": null + }, + { + "name": "--retry-delay", + "description": "Initial delay in seconds between retry attempts. For exponential backoff, this value is used as the base.", + "type": "string", + "required": null + }, + { + "name": "--retry-max-delay", + "description": "Maximum delay in seconds between retries, regardless of the retry strategy.", + "type": "string", + "required": null + }, + { + "name": "--retry-max-retries", + "description": "Maximum number of retry attempts for failed operations before giving up.", + "type": "string", + "required": null + }, + { + "name": "--retry-mode", + "description": "Retry strategy to use. 'fixed' uses consistent delays, 'exponential' increases delay between attempts.", + "type": "string", + "required": null + }, + { + "name": "--retry-network-timeout", + "description": "Network operation timeout in seconds. Operations taking longer than this will be cancelled.", + "type": "string", + "required": null + }, + { + "name": "--endpoint", + "description": "The endpoint URL for the Azure AI Foundry project/service.", + "type": "string", + "required": true + }, + { + "name": "--model-deployment", + "description": "Name of the model deployment", + "type": "string", + "required": true + }, + { + "name": "--agent-name", + "description": "A human-readable name of the Agent", + "type": "string", + "required": true + }, + { + "name": "--system-instruction", + "description": "System instruction for the agent to follow when process messages", + "type": "string", + "required": true + } + ] + }, { "name": "evaluate", "description": "Run agent evaluation on agent data. Requires JSON strings for query, response, and tool definitions.", @@ -3209,6 +3288,19 @@ } ] }, + { + "name": "get-sdk-sample", + "description": "Get code samples to interact with a Foundry Agent using AI Foundry SDK and programming language of your choice.", + "command": "foundry agents get-sdk-sample", + "option": [ + { + "name": "--programming-language", + "description": "The programming language of the sdk for interacting with a Foundry Agent. Supported values are csharp, python and typescript.", + "type": "string", + "required": true + } + ] + }, { "name": "list", "description": "List all Azure AI Agents in an Azure AI Foundry project. Shows agents that can be used for AI workflows, \r\nevaluations, and interactive tasks. Requires the project endpoint URL (format: https://.services.ai.azure.com/api/projects/).", @@ -4169,6 +4261,183 @@ } ] }, + { + "name": "create", + "description": " Creates an AI Foundry Agent Thread that holds the messages between the Agent and the user.", + "command": "foundry threads create", + "option": [ + { + "name": "--tenant", + "description": "The Microsoft Entra ID tenant ID or name. This can be either the GUID identifier or the display name of your Entra ID tenant.", + "type": "string", + "required": null + }, + { + "name": "--auth-method", + "description": "Authentication method to use. Options: 'credential' (Azure CLI/managed identity), 'key' (access key), or 'connectionString'.", + "type": "string", + "required": null + }, + { + "name": "--retry-delay", + "description": "Initial delay in seconds between retry attempts. For exponential backoff, this value is used as the base.", + "type": "string", + "required": null + }, + { + "name": "--retry-max-delay", + "description": "Maximum delay in seconds between retries, regardless of the retry strategy.", + "type": "string", + "required": null + }, + { + "name": "--retry-max-retries", + "description": "Maximum number of retry attempts for failed operations before giving up.", + "type": "string", + "required": null + }, + { + "name": "--retry-mode", + "description": "Retry strategy to use. 'fixed' uses consistent delays, 'exponential' increases delay between attempts.", + "type": "string", + "required": null + }, + { + "name": "--retry-network-timeout", + "description": "Network operation timeout in seconds. Operations taking longer than this will be cancelled.", + "type": "string", + "required": null + }, + { + "name": "--endpoint", + "description": "The endpoint URL for the Azure AI Foundry project/service.", + "type": "string", + "required": true + }, + { + "name": "--user-message", + "description": "The user message to add to the thread", + "type": "string", + "required": true + } + ] + }, + { + "name": "get-messages", + "description": " Get messages in an AI Foundry Agent Thread.", + "command": "foundry threads get-messages", + "option": [ + { + "name": "--tenant", + "description": "The Microsoft Entra ID tenant ID or name. This can be either the GUID identifier or the display name of your Entra ID tenant.", + "type": "string", + "required": null + }, + { + "name": "--auth-method", + "description": "Authentication method to use. Options: 'credential' (Azure CLI/managed identity), 'key' (access key), or 'connectionString'.", + "type": "string", + "required": null + }, + { + "name": "--retry-delay", + "description": "Initial delay in seconds between retry attempts. For exponential backoff, this value is used as the base.", + "type": "string", + "required": null + }, + { + "name": "--retry-max-delay", + "description": "Maximum delay in seconds between retries, regardless of the retry strategy.", + "type": "string", + "required": null + }, + { + "name": "--retry-max-retries", + "description": "Maximum number of retry attempts for failed operations before giving up.", + "type": "string", + "required": null + }, + { + "name": "--retry-mode", + "description": "Retry strategy to use. 'fixed' uses consistent delays, 'exponential' increases delay between attempts.", + "type": "string", + "required": null + }, + { + "name": "--retry-network-timeout", + "description": "Network operation timeout in seconds. Operations taking longer than this will be cancelled.", + "type": "string", + "required": null + }, + { + "name": "--endpoint", + "description": "The endpoint URL for the Azure AI Foundry project/service.", + "type": "string", + "required": true + }, + { + "name": "--thread-id", + "description": "The Foundry Agent Thread Id", + "type": "string", + "required": true + } + ] + }, + { + "name": "list", + "description": " List AI Foundry Agent Threads.", + "command": "foundry threads list", + "option": [ + { + "name": "--tenant", + "description": "The Microsoft Entra ID tenant ID or name. This can be either the GUID identifier or the display name of your Entra ID tenant.", + "type": "string", + "required": null + }, + { + "name": "--auth-method", + "description": "Authentication method to use. Options: 'credential' (Azure CLI/managed identity), 'key' (access key), or 'connectionString'.", + "type": "string", + "required": null + }, + { + "name": "--retry-delay", + "description": "Initial delay in seconds between retry attempts. For exponential backoff, this value is used as the base.", + "type": "string", + "required": null + }, + { + "name": "--retry-max-delay", + "description": "Maximum delay in seconds between retries, regardless of the retry strategy.", + "type": "string", + "required": null + }, + { + "name": "--retry-max-retries", + "description": "Maximum number of retry attempts for failed operations before giving up.", + "type": "string", + "required": null + }, + { + "name": "--retry-mode", + "description": "Retry strategy to use. 'fixed' uses consistent delays, 'exponential' increases delay between attempts.", + "type": "string", + "required": null + }, + { + "name": "--retry-network-timeout", + "description": "Network operation timeout in seconds. Operations taking longer than this will be cancelled.", + "type": "string", + "required": null + }, + { + "name": "--endpoint", + "description": "The endpoint URL for the Azure AI Foundry project/service.", + "type": "string", + "required": true + } + ] + }, { "name": "get", "description": "Gets Azure Function App details. Lists all Function Apps in the subscription or resource group. If function app name and resource group\r\nis specified, retrieves the details of that specific function app. Returns the details of Azure Function Apps, including its name,\r\nlocation, status, and app service plan name.", @@ -10704,7 +10973,7 @@ }, { "name": "--format", - "description": "Output format: simple or detailed. Default is simple.", + "description": "Output format: simple or detailed.", "type": "string", "required": null }, @@ -12902,5 +13171,5 @@ } ], "consolidated_tools": null, - "duration": 47 + "duration": 53 } \ No newline at end of file diff --git a/servers/Azure.Mcp.Server/CHANGELOG.md b/servers/Azure.Mcp.Server/CHANGELOG.md index 65f8860e9..58f9ae849 100644 --- a/servers/Azure.Mcp.Server/CHANGELOG.md +++ b/servers/Azure.Mcp.Server/CHANGELOG.md @@ -13,6 +13,8 @@ The Azure MCP Server updates automatically by default whenever a new release com - Added `foundry_agents_create`, `foundry_agents_get-sdk-sample`, `foundry_thread_create`, `foundry_thread_list`, `foundry_thread_get-messages` tools for AI Foundry scenarios. [[#945](https://github.com/microsoft/mcp/pull/945)] +- Added Azure AI Best Practices toolset providing comprehensive guidance for building AI apps with Azure AI Foundry and Microsoft Agent Framework. Includes model selection guidance, SDK recommendations, and implementation patterns for agent development. [[#1031](https://github.com/microsoft/mcp/pull/1031)] + ### Breaking Changes ### Bugs Fixed diff --git a/servers/Azure.Mcp.Server/README.md b/servers/Azure.Mcp.Server/README.md index fc31dafcc..7117db872 100644 --- a/servers/Azure.Mcp.Server/README.md +++ b/servers/Azure.Mcp.Server/README.md @@ -510,6 +510,7 @@ The Azure MCP Server provides tools for interacting with **40+ Azure service are - 🧮 **Azure AI Foundry** - AI model management, AI model deployment, and knowledge index management - 🔎 **Azure AI Search** - Search engine/vector database operations - 🎤 **Azure AI Services Speech** - Speech-to-text recognition +- 🤖 **Azure AI Best Practices** - AI app development guidance for Azure AI Foundry and Microsoft Agent Framework - ⚙️ **Azure App Configuration** - Configuration management - 🕸️ **Azure App Service** - Web app hosting - 🛡️ **Azure Best Practices** - Secure, production-grade guidance diff --git a/servers/Azure.Mcp.Server/docs/azmcp-commands.md b/servers/Azure.Mcp.Server/docs/azmcp-commands.md index b2187425c..37864af00 100644 --- a/servers/Azure.Mcp.Server/docs/azmcp-commands.md +++ b/servers/Azure.Mcp.Server/docs/azmcp-commands.md @@ -1325,6 +1325,21 @@ azmcp marketplace product get --subscription \ [--pricing-audience ] ``` +### Azure AI Best Practices + +```bash +# Get best practices for building AI applications, workflows and agents in Azure +# Call this before generating code for any AI application, building with Azure AI Foundry models, +# working with Microsoft Agent Framework, or implementing AI solutions in Azure. +# ❌ Destructive | ✅ Idempotent | ❌ OpenWorld | ✅ ReadOnly | ❌ Secret | ❌ LocalRequired +azmcp azureaibestpractices get + +# Includes guidance on: +# - Microsoft Agent Framework usage and patterns +# - Azure AI Foundry model selection +# - Best practices for ai app / agent development in Azure +``` + ### Azure MCP Best Practices ```bash diff --git a/servers/Azure.Mcp.Server/docs/e2eTestPrompts.md b/servers/Azure.Mcp.Server/docs/e2eTestPrompts.md index ee6466dba..43ef06eaf 100644 --- a/servers/Azure.Mcp.Server/docs/e2eTestPrompts.md +++ b/servers/Azure.Mcp.Server/docs/e2eTestPrompts.md @@ -435,6 +435,17 @@ This file contains prompts used for end-to-end testing to ensure each tool is in | marketplace_product_list | Search for Microsoft products in the marketplace | | marketplace_product_list | Show me marketplace products from publisher | +## Azure AI Best Practices + +| Tool Name | Test Prompt | +|:----------|:----------| +| azureaibestpractices_get | Get best practices for building AI applications in Azure | +| azureaibestpractices_get | Show me the best practices for Azure AI Foundry agents code generation | +| azureaibestpractices_get | Get guidance for building agents with Azure AI Foundry | +| azureaibestpractices_get | Create an AI app that helps me to manage travel queries. | +| azureaibestpractices_get | Create an AI app that helps me to manage travel queries in Azure AI Foundry | + + ## Azure MCP Best Practices | Tool Name | Test Prompt | diff --git a/servers/Azure.Mcp.Server/src/Program.cs b/servers/Azure.Mcp.Server/src/Program.cs index 3a760bd50..7fcfe8250 100644 --- a/servers/Azure.Mcp.Server/src/Program.cs +++ b/servers/Azure.Mcp.Server/src/Program.cs @@ -63,6 +63,7 @@ private static IAreaSetup[] RegisterAreas() return [ // Register core areas + new Azure.Mcp.Tools.AzureAIBestPractices.AzureAIBestPracticesSetup(), new Azure.Mcp.Tools.AzureBestPractices.AzureBestPracticesSetup(), new Azure.Mcp.Tools.Extension.ExtensionSetup(), new Azure.Mcp.Core.Areas.Group.GroupSetup(), diff --git a/tools/Azure.Mcp.Tools.AzureAIBestPractices/src/AssemblyInfo.cs b/tools/Azure.Mcp.Tools.AzureAIBestPractices/src/AssemblyInfo.cs new file mode 100644 index 000000000..39db5de60 --- /dev/null +++ b/tools/Azure.Mcp.Tools.AzureAIBestPractices/src/AssemblyInfo.cs @@ -0,0 +1,6 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using System.Runtime.CompilerServices; + +[assembly: InternalsVisibleTo("Azure.Mcp.Tools.AzureAIBestPractices.UnitTests")] diff --git a/tools/Azure.Mcp.Tools.AzureAIBestPractices/src/Azure.Mcp.Tools.AzureAIBestPractices.csproj b/tools/Azure.Mcp.Tools.AzureAIBestPractices/src/Azure.Mcp.Tools.AzureAIBestPractices.csproj new file mode 100644 index 000000000..204db60a8 --- /dev/null +++ b/tools/Azure.Mcp.Tools.AzureAIBestPractices/src/Azure.Mcp.Tools.AzureAIBestPractices.csproj @@ -0,0 +1,17 @@ + + + true + + + + + + + + + + + + + + diff --git a/tools/Azure.Mcp.Tools.AzureAIBestPractices/src/AzureAIBestPracticesSetup.cs b/tools/Azure.Mcp.Tools.AzureAIBestPractices/src/AzureAIBestPracticesSetup.cs new file mode 100644 index 000000000..3050d6504 --- /dev/null +++ b/tools/Azure.Mcp.Tools.AzureAIBestPractices/src/AzureAIBestPracticesSetup.cs @@ -0,0 +1,45 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using Azure.Mcp.Core.Areas; +using Azure.Mcp.Core.Commands; +using Azure.Mcp.Tools.AzureAIBestPractices.Commands; +using Microsoft.Extensions.DependencyInjection; + +namespace Azure.Mcp.Tools.AzureAIBestPractices; + +public class AzureAIBestPracticesSetup : IAreaSetup +{ + public string Name => "azureaibestpractices"; + + public string Title => "Azure AI Code Generation Best Practices"; + + public void ConfigureServices(IServiceCollection services) + { + services.AddSingleton(); + } + + public CommandGroup RegisterCommands(IServiceProvider serviceProvider) + { + // Register Azure AI Best Practices command at the root level + var azureAIBestPractices = new CommandGroup( + Name, + @"Azure AI best practices - Commands returns best practices and code generation guidance for building AI applications in Azure. + Use this tool when you need recommendations on how to write code for AI agents, chatbots, workflows, or other AI features. + This tool also provides guidance for code generation using the Azure resources (e.g. Azure AI Foundry) for application development only. + > Note: Understanding User Intent for Azure AI Foundry: + > (1) Resource Management - use the 'foundry' tool instead + > e.g., 'create/deploy/provision agent/embedding/model/project in Foundry', 'set up agent/model resource' + > (2) Application Development - use this 'azureaibestpractices' tool + > e.g., 'build/write/implement agent', 'develop chatbot/assistant', 'agent code' + > When ambiguous, clarify whether the user wants resource management (foundry tool) or application code generation (this tool). + If this tool needs to be categorized, it belongs to the Azure Best Practices category.", + Title + ); + + var practices = serviceProvider.GetRequiredService(); + azureAIBestPractices.AddCommand(practices.Name, practices); + + return azureAIBestPractices; + } +} diff --git a/tools/Azure.Mcp.Tools.AzureAIBestPractices/src/Commands/AzureAIBestPracticesGetCommand.cs b/tools/Azure.Mcp.Tools.AzureAIBestPractices/src/Commands/AzureAIBestPracticesGetCommand.cs new file mode 100644 index 000000000..e4371194a --- /dev/null +++ b/tools/Azure.Mcp.Tools.AzureAIBestPractices/src/Commands/AzureAIBestPracticesGetCommand.cs @@ -0,0 +1,68 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using System.Net; +using System.Reflection; +using Azure.Mcp.Core.Commands; +using Azure.Mcp.Core.Helpers; +using Microsoft.Extensions.Logging; + +namespace Azure.Mcp.Tools.AzureAIBestPractices.Commands; + +public sealed class AzureAIBestPracticesGetCommand(ILogger logger) : BaseCommand +{ + private const string CommandTitle = "Get AI App Best Practices for Azure"; + private readonly ILogger _logger = logger; + private static readonly string s_bestPracticesText = LoadBestPracticesText(); + + private static string GetBestPracticesText() => s_bestPracticesText; + + private static string LoadBestPracticesText() + { + Assembly assembly = typeof(AzureAIBestPracticesGetCommand).Assembly; + string resourceName = EmbeddedResourceHelper.FindEmbeddedResource(assembly, "ai-best-practices-for-azure.txt"); + return EmbeddedResourceHelper.ReadEmbeddedResource(assembly, resourceName); + } + + public override string Id => "6c29659e-406d-4b9b-8150-e3d4fd7ba31c"; + + public override string Name => "get"; + + public override string Description => + @"Returns best practices and code generation guidance for building AI applications in Azure. + Use this tool when you need recommendations on how to write code for AI agents, chatbots, workflows, or other AI features. + This tool also provides guidance for code generation using the Azure resources (e.g. Azure AI Foundry) for application development only. + If this tool needs to be categorized, it belongs to the Azure Best Practices category."; + + public override string Title => CommandTitle; + + public override ToolMetadata Metadata => new() + { + Destructive = false, + Idempotent = true, + OpenWorld = false, + ReadOnly = true, + LocalRequired = false, + Secret = false + }; + + protected override EmptyOptions BindOptions(ParseResult parseResult) => new(); + + public override Task ExecuteAsync(CommandContext context, ParseResult parseResult, CancellationToken cancellationToken) + { + try + { + var bestPractices = GetBestPracticesText(); + context.Response.Status = HttpStatusCode.OK; + context.Response.Results = ResponseResult.Create([bestPractices], AzureAIBestPracticesJsonContext.Default.ListString); + context.Response.Message = string.Empty; + } + catch (Exception ex) + { + _logger.LogError(ex, "Error getting AI best practices for Azure"); + HandleException(context, ex); + } + + return Task.FromResult(context.Response); + } +} diff --git a/tools/Azure.Mcp.Tools.AzureAIBestPractices/src/Commands/AzureAIBestPracticesJsonContext.cs b/tools/Azure.Mcp.Tools.AzureAIBestPractices/src/Commands/AzureAIBestPracticesJsonContext.cs new file mode 100644 index 000000000..8ca520757 --- /dev/null +++ b/tools/Azure.Mcp.Tools.AzureAIBestPractices/src/Commands/AzureAIBestPracticesJsonContext.cs @@ -0,0 +1,12 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using System.Text.Json.Serialization; + +namespace Azure.Mcp.Tools.AzureAIBestPractices.Commands; + +[JsonSerializable(typeof(List))] +[JsonSourceGenerationOptions(PropertyNamingPolicy = JsonKnownNamingPolicy.CamelCase)] +internal partial class AzureAIBestPracticesJsonContext : JsonSerializerContext +{ +} diff --git a/tools/Azure.Mcp.Tools.AzureAIBestPractices/src/GlobalUsings.cs b/tools/Azure.Mcp.Tools.AzureAIBestPractices/src/GlobalUsings.cs new file mode 100644 index 000000000..415da4fc8 --- /dev/null +++ b/tools/Azure.Mcp.Tools.AzureAIBestPractices/src/GlobalUsings.cs @@ -0,0 +1,5 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +global using System.CommandLine; +global using Azure.Mcp.Core.Models.Command; diff --git a/tools/Azure.Mcp.Tools.AzureAIBestPractices/src/Resources/ai-best-practices-for-azure.txt b/tools/Azure.Mcp.Tools.AzureAIBestPractices/src/Resources/ai-best-practices-for-azure.txt new file mode 100644 index 000000000..052726f3a --- /dev/null +++ b/tools/Azure.Mcp.Tools.AzureAIBestPractices/src/Resources/ai-best-practices-for-azure.txt @@ -0,0 +1,428 @@ +# Best Practices for Building AI Apps with Azure AI Services + +This guide covers best practices for: +- Generating code for AI agent applications using Azure AI Foundry and any other Azure AI models +- Building agents with Azure AI Foundry +- Implementing chatbots, workflows, assistants, and multi-agent systems with Microsoft Agent Framework + +> Note: Understanding User Intent of Azure AI Foundry +> +> **Two Types of Operations:** +> +> **1. Resource Management** - Use `foundry` tool +> - Keywords: "create/deploy/provision agent/embedding IN Foundry", "set up agent/embedding resource" +> - Creates Azure AI Foundry agent resource via commands under `foundry` +> +> **2. Application Development** - Use this best practices tool +> - Keywords: "build/write/implement agent", "develop chatbot/assistant", "agent code" +> - Generates code using Microsoft Agent Framework SDK +> - Returns code samples and implementation patterns +> +> **When unsure, ask:** "Do you want to create an agent resource in Azure AI Foundry, or generate agent application code?" + +## Key Principles +**Defaults and Recommendations:** +- **Default language**: If the user does not specify a programming language, use **.NET** as the default programming language +- **Recommended SDK**: Use **Microsoft Agent Framework** for building AI agents, chatbots, assistants, and multi-agent systems - it provides flexible orchestration, multi-agent patterns, and cross-platform support (.NET and Python) +- **Select model first**: Model selection is REQUIRED for ANY AI implementation - always select the appropriate AI model before coding if the user does not specify a model + +**Process:** +- **Gather information first**: Always search / fetch / query for sufficient knowledge (quickstarts, tutorials, feature guides, code samples) BEFORE thinking or writing any code - this ensures high-quality code generation on the first attempt and reduces errors +- **Think first, then code**: Always output your thoughts on model and SDK selection before implementation +- **Search as you go**: Continue using Microsoft Document Search throughout implementation - coding, debugging, troubleshooting (one topic at a time). Call this tool **as often as needed** - multiple searches are encouraged to ensure accurate and up-to-date information. +- **Build and verify**: Must build the project and verify it compiles successfully before finishing code generation + +## Background Knowledge +### Understanding AI Models Hierarchy +#### Knowledge +1. Model Provider: The company/organization providing the model (e.g., OpenAI, Microsoft, Deepseek, Meta) +2. Model Series: A family of related models (e.g., gpt-5, gpt-4o, deepseek-v3, llama-3) +3. Model: Specific variants within a series (e.g., gpt-5, gpt-5-mini, gpt-5-chat within the gpt-5 series) + +#### Where to use +Use this hierarchy to analyze and select AI models + +### Microsoft Agent Framework Key Features +#### Knowledge +**Microsoft Agent Framework** is the recommended SDK for building AI agents with Azure. It provides: + +**Core Capabilities:** + - **Flexible Agent Framework**: Build, orchestrate, and deploy AI agents and multi-agent systems + - **Cross-Platform Support**: Available for both .NET and Python + - **Multi-LLM Support**: Works with OpenAI, Azure OpenAI, Azure AI Foundry, and other providers + +**Agent Types:** + - **[RECOMMENDED] Azure AI Foundry Agent (`PersistentAgentsClient`)**: Service-managed persistent agents with conversation threads - best for production applications requiring conversation continuity + - **OpenAI ChatCompletion / Responses / Assistants Agent (`OpenAIClient`)**: Stateless or service-managed agents using OpenAI service + - **Azure AI Foundry Models ChatCompletion / Responses Agent (`OpenAIClient` / `AzureOpenAIClient`)**: For testing Foundry-deployed models (stateless) + - **Azure OpenAI ChatCompletion / Responses (`AzureOpenAIClient`)**: Stateless agents using Azure OpenAI ChatCompletion / Responses service + - **Advanced & Custom Agents**: Custom agents, A2A proxy agents, Any IChatClient + +**Agent Features:** + - **Multi-turn Conversations**: Maintain context across messages with conversation threading using `AgentThread` + - **Function Tools**: Call external functions and APIs to extend agent capabilities + - **Multimodal Support**: Handle text, vision, and other input types + - **Structured Output**: Generate JSON or strongly-typed object responses + - **Human-in-the-loop**: Require user approval for sensitive operations + +**Workflow Features:** (For Multi-Agent Systems): + - **Sequential**: Process tasks one after another + - **Concurrent**: Run multiple agents in parallel + - **Handoff**: Pass control between agents dynamically + - **Magentic**: Advanced group chat orchestration + - **Agents in workflows**: Embed agents as workflow steps + - **Workflow branching logic**: Conditional routing based on results + - **Checkpointing and resuming**: Save and restore workflow state + +**Advanced Agent Features**: + - **Middleware**: Intercept/modify requests/responses + - **Memory**: Retain information across conversations + - **Observability**: OpenTelemetry tracing + - **Agent as tool**: Use agents as functions + - **Persisting conversations**: Store chat history + - **Third-party storage**: Custom storage backends + - **Structured output**: Generate JSON/object responses + - **Agent types**: Azure OpenAI, Foundry, custom + +**Integration Features:**: + - **MCP tools**: Model Context Protocol integration + - **MCP with Foundry Agents**: MCP + Azure AI Foundry + - **Request and response handling**: External API integration + - **Shared states**: Pass data between executors + - **Visualization**: Visual workflow representations + +#### Where to use +Search Microsoft Document using these feature keywords (or any other related keywords - this is just a common subset of topics) + +## Tool Use Guidelines +### Microsoft Document Search Tool + +#### How to Search Effectively +**Query strategy**: +- Focus on **one specific topic** per query to improve accuracy and relevance +- Use specific feature names or keywords (e.g., "Microsoft Agent Framework sequential workflow", "Agent Framework function tools", "Agent Framework memory") +- Call this tool **as often as needed** - multiple searches are encouraged to ensure accurate and up-to-date information +- **Use this tool whenever you encounter unknowns** during any phase: planning (before coding), implementation (while coding), debugging (fixing errors), or troubleshooting (resolving issues) + +**Search Operation vs Fetch Operation in Microsoft Document Search Tool**: +- **Search**: Use when you need to discover information or find relevant documents across Microsoft Document +- **Fetch**: Use when you have a specific Microsoft Document URL and need the complete, unabridged content + +**Choosing the Right Fetch Tool**: +- **For `https://learn.microsoft.com/` URLs**: ALWAYS use Microsoft Document Fetch Tool - provides better formatting and complete content +- **For other URLs**: Use generic webpage/fetch tools + + +## Gather Information +Always search / fetch / query for sufficient knowledge (quickstarts, tutorials, feature guides, code samples) BEFORE thinking or writing any code. This ensures high-quality code generation on the first attempt and reduces errors. + +**Remember**: Refer to the "Understanding Microsoft Document Tools" section above for guidance on effective searching. + +### 1. REQUIRED: Search for Model Selection +Model selection is required for any AI code generation in Azure - all AI apps (Microsoft Agent Framework apps) require an AI model. + +**REQUIRED: Call ALL operations below EVERY TIME before generating ANY model name in code** (including samples/placeholders). + +**IMPORTANT**: Ignore ALL model names from code samples, documentation examples, and guidelines. Always use the operations below to select the latest, current model - do not rely on existing knowledge. + +#### Step 1: Fetch models catalog documentation + - Use the Microsoft Document **fetch** operation to retrieve `https://learn.microsoft.com/azure/ai-foundry/foundry-models/concepts/models-sold-directly-by-azure` + - This provides comprehensive model release dates, capabilities, features, and use cases + - **Note**: Must use **fetch** operation (NOT search) to get the complete page without summarization + +#### Step 2: Fetch model selection guide + - Use the Microsoft Document **fetch** operation to retrieve `https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/model-choice-guide` + - This provides model selection guidance, comparison criteria, and when to use each model + - **Note**: Must use **fetch** operation (NOT search) to get the complete page without summarization + +#### Step 3: Get available models from Foundry catalog + - Use the Azure AI Foundry **models_list** operation to retrieve models available in the Azure AI Foundry catalog + - This shows which models the user can actually access (documentation may list models not available to them) + +### 2. Search for Implementation Knowledge +After completing the required model selection process above, gather SDK and implementation information. +#### Step 1: Fetch common quickstart pages + Use the Microsoft Document **fetch** operation to retrieve these pages directly for complete, unabridged content. These are the most commonly needed references and should be fetched for EVERY implementation: + - [Default] Azure AI Foundry Quickstart: `https://learn.microsoft.com/agent-framework/user-guide/agents/agent-types/azure-ai-foundry-agent` + +#### Step 2 (Optional): Search for specific agent type documentation + **Only if user specifies a different model inference service provider** (not Azure AI Foundry): + - **Default**: If user doesn't specify, skip this step and use Azure AI Foundry Agent (`PersistentAgentsClient`) + - **If user specifies other providers** (OpenAI, Azure OpenAI, etc.): Search for the corresponding agent type documentation + - Example: "Agent Framework Azure OpenAI ChatCompletion Agents" + - Example: "Agent Framework OpenAI Assistants Agents" + +#### Step 3: Search for feature guidelines + Search for Microsoft Agent Framework features needed for implementation: + + **How to identify features:** + - Analyze user's requirements to determine which features are needed + - Refer to "Microsoft Agent Framework Key Features" section above for available features + + **How to search:** + - Use Microsoft Document **search** operation with "Microsoft Agent Framework" + feature keyword + language (e.g., "csharp" or "Python") + - Search **one feature at a time** for focused, comprehensive results + - Continue searching until you have complete knowledge for all required features + + **Common searches:** "Microsoft Agent Framework function tools csharp", "Microsoft Agent Framework Multi-turn Conversations csharp", "Microsoft Agent Framework Workflows csharp" + + +## Think and Output Your Thoughts +Always output your thoughts on model and SDK selection before implementation. + +### 1. Select and explain model choice + +**Selection criteria:** +- List 3-5 candidate models matching requirements and availability +- Select based on: (a) Latest release date, (b) Capability match from Step 1, (c) Use case guidance from Step 2, (d) Availability from Step 3 +- Ignore model names in code samples/documents - they use older models for backward compatibility. Always select the LATEST model. + +**Output format:** +``` +User Requirements Analysis: +- Primary task: [User's goal] +- Required capabilities: [Features needed] +- Performance needs: [Context window, speed, etc.] + +Model Candidates: +1. [Model Provider] - [Model Series] - [Model] - [Release Date] + - Capability: [Key capabilities] + - Match: [How it addresses needs] + - Available: [Yes/No in user's account] +[Repeat for 2-4 more candidates] + +Model Selection: [Selected Model] +Reasoning: [Requirement match, latest in category, trade-offs, availability] +``` + +**Example:** + +*User Request: "Build a chatbot that can answer questions and call functions"* + +*Code Sample Found (IGNORE the model in this sample):* +```csharp +AIAgent agent = client.GetChatClient("gpt-4o-mini").CreateAIAgent(); +``` +**Note**: The code sample uses "gpt-4o-mini" and stateless Chat Completion for illustrative purposes. Do NOT use this model or approach just because it's in the sample - proceed with the model selection process (Section "Gather Information" > Steps 1, 2 & 3) to find the latest model. + +*Input from Step 1 (Fetched models catalog):* +- OpenAI GPT-5 (2025-08-07): Advanced reasoning model, function calling, structured output, 400K context +- OpenAI GPT-4.1 (2025-04-14): Fast responses, function calling, structured outputs, 1M context +- OpenAI GPT-4o (2024-11-20): Multimodal with vision, function calling, structured output +- DeepSeek DeepSeek-V3 (2024-12-26): Advanced reasoning model, function calling, 128K context + +*Input from Step 2 (Fetched model selection guide):* +- GPT-5: Best for complex reasoning, multi-hop logic, agentic workflows, Copilot-style tools +- GPT-4.1: Best for real-time chat, low latency, high throughput, short factual queries + +*Input from Step 3 (Available Models):* +```json +[{"name": "gpt-5"}, {"name": "gpt-5-mini"}, {"name": "gpt-4o"}, {"name": "gpt-4o-mini"}, {"name": "deepseek-v3"}] +``` + +*Output:* +``` +User Requirements Analysis: +- Primary task: Build a chatbot for Q&A and function calling +- Required capabilities: Text generation, function calling +- Performance needs: Moderate context window, good accuracy + +Model Candidates: +1. OpenAI - gpt-5 - gpt-5-mini - 2025-08-07 + - Capability: Advanced reasoning, function calling, 400K context + - Match: ✅ Latest model, excellent for agentic chatbot workflows + - Available: Yes +2. OpenAI - gpt-4.1 - gpt-4.1-mini - 2025-04-14 + - Capability: Fast responses, function calling, 1M context + - Match: ✅ Better for real-time chat if latency is critical + - Available: No (only gpt-4o variants available) +3. DeepSeek - DeepSeek-V3 - deepseek-v3 - 2024-12-26 + - Capability: Advanced reasoning, function calling, 128K context + - Match: ✅ Good alternative but older and smaller context + - Available: Yes + +Model Selection: gpt-5-mini +Reasoning: Latest model (Aug 2025), best for agentic workflows with function calling, superior to GPT-4.1 which isn't available +Note: Code samples show "gpt-4o-mini" is old. For backward compatibility, always select the LATEST model for new implementations. +``` + +### 2. Explain SDK selection +**Recommended SDK:** Microsoft Agent Framework +**Recommended Agent Type:** Azure AI Foundry Agents with PersistentAgentsClient when using Azure AI Foundry + +** Select Agent Type** +- **Default choice**: Azure AI Foundry Agent with `PersistentAgentsClient` when integrating with Azure AI Foundry +- **Alternative**: Azure OpenAI ChatCompletion (simple stateless scenarios or custom history management) +- **Other types**: Use only when specific requirements demand them + +**Output format:** +``` +SDK Selection: Microsoft Agent Framework +Agent Type: [Azure AI Foundry Agents (PersistentAgentsClient) OR other agent type] +Reason: [Detail why this SDK and agent type are suitable for the user's requirements. Explain if conversation persistence is needed, whether the user needs service-managed state, etc.] +``` + +### 3. Create implementation plan + +**Output format:** +``` +User Requirements Analysis: +- Primary functionality: [What the user wants to build] +- Key features needed: [List of specific Microsoft Agent Framework features required] + +Searched Related Knowledge: +(Only output the related knowledge you searched and will use for implementation) +- [Key feature name 1]: + - Documentation: [List documentation URLs/titles matching the target language] + - Programming Language: [Target language: "CSharp" or "Python"] + - Code Samples: [Output the entire code sample that may be useful for implementation] + - How it will be used: [Brief description] +- [Key feature name 2]: Fetched / Searched "[search query used]" + - Documentation: [List documentation URLs/titles matching the target language] + - Programming Language: [Target language: "CSharp" or "Python"] + - Code Samples: [Output the entire code sample that may be useful for implementation] + - How it will be used: [Brief description] +- [Additional features as needed...] + +Implementation Plan: +[Step 1]: [Description of what will be implemented] +[Step 2]: [Description of what will be implemented] +[Additional steps as needed to cover the complete implementation...] +``` + +## Implementation Guidelines +**Note**: Continue using Microsoft Document Search throughout implementation for any unknowns. + +### Language-Specific Implementation: +**IMPORTANT**: Search results may contain documentation and code samples for multiple programming languages (csharp, Python, etc.) + +- Only use documentation and code samples that match your target programming language +- API signatures, syntax, and patterns differ between languages - do not mix language-specific code +- Ignore code samples and documentation from other programming languages + + +### Installation +**IMPORTANT**: Always run installation commands (`dotnet add package` or `pip install`) instead of manually editing project files (.csproj, requirements.txt). This ensures proper dependency resolution and project configuration. + +#### Python +The `--pre` flag is required while Agent Framework is in preview. The Core + Azure AI integration includes Azure AI / Foundry / OpenAI supports, as well as workflows and orchestrations. +```bash +pip install agent-framework-azure-ai --pre +``` +DO REMIND USER THE `--pre` OR `--prerelease` FLAG IS REQUIRED IN GENERATED DOC. + +#### .NET +Agent type-specific installation packages: +- **Azure AI Foundry Agent (RECOMMENDED)** - `PersistentAgentsClient`: + ```bash + dotnet add package Azure.Identity + dotnet add package Microsoft.Agents.AI.AzureAI --prerelease + ``` +- **Workflows and Orchestration** - For multi-agent systems and workflow features: + ```bash + dotnet add package Microsoft.Agents.AI.Workflows --prerelease + ``` +- For other agent types (OpenAI ChatCompletion, OpenAI Responses, OpenAI Assistants, Azure AI Foundry Models, etc.), use Microsoft Document Search tool to search for how to install dependencies. + +DO REMIND USER THE `--prerelease` FLAG IS REQUIRED IN GENERATED DOC. + +### Workflows +**IMPORTANT**: When implementing workflows, use the correct namespace and event handling patterns. + +#### Correct Using Statement +```csharp +using Microsoft.Agents.AI.Workflows; // ✅ CORRECT +``` +**NOT**: +```csharp +using Microsoft.Agents.Workflows; // ❌ WRONG - Do not use this namespace +``` + +#### Correct Workflow Output Event Handling +Use `WorkflowOutputEvent` to handle workflow completion: + +✅ **CORRECT Pattern**: +```csharp +await foreach (WorkflowEvent evt in run.WatchStreamAsync()) +{ + if (evt is WorkflowOutputEvent output) + { + Console.WriteLine($"Workflow completed with results:\n{output.Data}"); + } +} +``` + +❌ **INCORRECT Pattern** (outdated): +```csharp +await foreach (WorkflowEvent evt in run.WatchStreamAsync().ConfigureAwait(false)) +{ + if (evt is AgentRunUpdateEvent e) + { + Console.WriteLine($"{e.ExecutorId}: {e.Data}"); + } + else if (evt is WorkflowCompletedEvent completed) + { + result = (List)completed.Data!; + break; + } +} +``` + +## Agent Type +### Agent Type Selection +**IMPORTANT**: When using Azure AI Foundry, always use `PersistentAgentsClient` to create agents - NOT `AzureOpenAIClient`. +- ❌ Do NOT use `AzureOpenAIClient` for Azure AI Foundry models - that's only for testing and lacks conversation persistence +- ✅ DO use `PersistentAgentsClient` - it provides full agent capabilities with service-managed state + +Code samples may show `AzureOpenAIClient` for simplicity or backward compatibility, but production implementations should use `PersistentAgentsClient` for Azure AI Foundry. + +## Important Differences Between PersistentAgentsClient and AzureOpenAIClient +**IMPORTANT**: `PersistentAgentsClient` and `AzureOpenAIClient` have different APIs for configuring function tools and chat options. +### Difference 1: How to Configure Tools and Options + +#### ✅ CORRECT - PersistentAgentsClient pattern: +```csharp +// For PersistentAgentsClient, pass tools/options to GetAIAgentAsync or CreateAIAgentAsync +using Azure.AI.Agents.Persistent; +using Azure.Identity; +using Microsoft.Agents.AI; +using Microsoft.Extensions.AI; +using System.ComponentModel; + +[Description("Get the weather for a given location.")] +static string GetWeather([Description("The location to get the weather for.")] string location) + => $"The weather in {location} is sunny with a high of 25°C."; + +var persistentAgentsClient = new PersistentAgentsClient( + "https://.services.ai.azure.com/api/projects/", + new AzureCliCredential()); + +var agentMetadata = await persistentAgentsClient.Administration.CreateAgentAsync( + model: "gpt-4o-mini", + name: "WeatherAgent", + instructions: "You are a helpful weather assistant."); + +AIAgent agent = await persistentAgentsClient.GetAIAgentAsync( + agentId: agentMetadata.Value.Id, + new ChatOptions + { + Tools = [AIFunctionFactory.Create(GetWeather)] // ✅ Tools in ChatOptions + } +); + +// Use the agent +var response = await agent.RunAsync("What's the weather like in Seattle?"); +``` + + +## Build and Verification +**IMPORTANT: Build must succeed before completing the task. This step is REQUIRED and NON-NEGOTIABLE.** +### Build Requirements +- Run build after ALL code generation is complete +- Fix all fails and rebuild after fixes +- Use Microsoft Document Search to search for more feature guidelines, code samples and API reference help to fix +- Repeat to fix until build successful +- **After build SUCCEEDS**: + - **STOP making code changes** - build success is the final state + - **Do NOT edit code after successful build** - this can introduce new errors +- Never complete the task with a failing build diff --git a/tools/Azure.Mcp.Tools.AzureAIBestPractices/tests/Azure.Mcp.Tools.AzureAIBestPractices.UnitTests/Azure.Mcp.Tools.AzureAIBestPractices.UnitTests.csproj b/tools/Azure.Mcp.Tools.AzureAIBestPractices/tests/Azure.Mcp.Tools.AzureAIBestPractices.UnitTests/Azure.Mcp.Tools.AzureAIBestPractices.UnitTests.csproj new file mode 100644 index 000000000..6b7b8e619 --- /dev/null +++ b/tools/Azure.Mcp.Tools.AzureAIBestPractices/tests/Azure.Mcp.Tools.AzureAIBestPractices.UnitTests/Azure.Mcp.Tools.AzureAIBestPractices.UnitTests.csproj @@ -0,0 +1,17 @@ + + + true + Exe + + + + + + + + + + + + + diff --git a/tools/Azure.Mcp.Tools.AzureAIBestPractices/tests/Azure.Mcp.Tools.AzureAIBestPractices.UnitTests/AzureAIBestPracticesGetCommandTests.cs b/tools/Azure.Mcp.Tools.AzureAIBestPractices/tests/Azure.Mcp.Tools.AzureAIBestPractices.UnitTests/AzureAIBestPracticesGetCommandTests.cs new file mode 100644 index 000000000..55e14292d --- /dev/null +++ b/tools/Azure.Mcp.Tools.AzureAIBestPractices/tests/Azure.Mcp.Tools.AzureAIBestPractices.UnitTests/AzureAIBestPracticesGetCommandTests.cs @@ -0,0 +1,54 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using System.CommandLine; +using System.Text.Json; +using Azure.Mcp.Core.Models.Command; +using Azure.Mcp.Tools.AzureAIBestPractices.Commands; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using NSubstitute; +using Xunit; + +namespace Azure.Mcp.Tools.AzureAIBestPractices.UnitTests; + +public class AzureAIBestPracticesGetCommandTests +{ + private readonly IServiceProvider _serviceProvider; + private readonly ILogger _logger; + private readonly CommandContext _context; + private readonly AzureAIBestPracticesGetCommand _command; + private readonly Command _commandDefinition; + + public AzureAIBestPracticesGetCommandTests() + { + var collection = new ServiceCollection(); + _serviceProvider = collection.BuildServiceProvider(); + + _context = new(_serviceProvider); + _logger = Substitute.For>(); + _command = new(_logger); + _commandDefinition = _command.GetCommand(); + } + + [Fact] + public async Task ExecuteAsync_ReturnsAzureAIBestPractices() + { + var args = _commandDefinition.Parse([]); + var response = await _command.ExecuteAsync(_context, args, TestContext.Current.CancellationToken); + + // Assert + Assert.NotNull(response); + Assert.NotNull(response.Results); + + var json = JsonSerializer.Serialize(response.Results); + var result = JsonSerializer.Deserialize(json); + + Assert.NotNull(result); + Assert.Contains("Best Practices for Building AI Apps with Azure AI Services", result[0]); + Assert.Contains("Key Principles", result[0]); + Assert.Contains("Microsoft Agent Framework", result[0]); + Assert.Contains("Tool Use Guidelines", result[0]); + Assert.Contains("Gather Information", result[0]); + } +}