diff --git a/eng/tools/ToolDescriptionEvaluator/prompts.json b/eng/tools/ToolDescriptionEvaluator/prompts.json index 638bac49c..79de5ee70 100644 --- a/eng/tools/ToolDescriptionEvaluator/prompts.json +++ b/eng/tools/ToolDescriptionEvaluator/prompts.json @@ -2,31 +2,22 @@ "foundry_agents_connect": [ "Query an agent in my Azure AI foundry resource" ], + "foundry_agents_create": [ + "Create a new Azure AI Foundry agent using instructions in the active editor" + ], "foundry_agents_evaluate": [ "Evaluate the full query and response I got from my agent for task_adherence" ], + "foundry_agents_get-sdk-sample": [ + "Create a CLI app that can talk to an Azure AI Foundry Agent using Python SDK" + ], "foundry_agents_list": [ "List all agents in my Azure AI Foundry resource", "Show me the available agents in my Azure AI Foundry resource" ], - "foundry_agents_create": [ - "Create a new Azure AI Foundry agent using instructions in the active editor" - ], "foundry_agents_query-and-evaluate": [ "Query and evaluate an agent in my Azure AI Foundry resource for task_adherence" ], - "foundry_agents_get-sdk-sample": [ - "Create a CLI app that can talk to an Azure AI Foundry Agent using Python SDK" - ], - "foundry_threads_create": [ - "Create an Azure AI Foundry thread to hold the conversation" - ], - "foundry_threads_list": [ - "List my AI Foundry threads" - ], - "foundry_threads_get-messages": [ - "Show me the messages in the AI Foundry thread with id " - ], "foundry_knowledge_index_list": [ "List all knowledge indexes in my AI Foundry project", "Show me the knowledge indexes in my AI Foundry project" @@ -65,6 +56,15 @@ "Show me the AI Foundry resources in resource group ", "Get details for AI Foundry resource in resource group " ], + "foundry_threads_create": [ + "Create an Azure AI Foundry thread to hold the conversation" + ], + "foundry_threads_get-messages": [ + "Show me the messages in the AI Foundry thread with id " + ], + "foundry_threads_list": [ + "List my AI Foundry threads" + ], "search_knowledge_base_get": [ "List all knowledge bases in the Azure AI Search service ", "Show me the knowledge bases in the Azure AI Search service ", @@ -116,6 +116,18 @@ "Convert speech to text with comma-separated phrase hints: \"Azure, cognitive services, API\"", "Transcribe audio with raw profanity output from file " ], + "speech_tts_synthesize": [ + "Convert text to speech and save to output.wav", + "Synthesize speech from \"Hello, welcome to Azure\" and save to welcome.wav", + "Generate speech audio from text \"Hello world\" using Azure Speech Services", + "Convert text to speech with Spanish language and save to spanish-audio.wav", + "Synthesize speech with voice en-US-JennyNeural from text \"Azure AI Services\"", + "Create MP3 audio file from text \"Welcome to Azure\" with high quality format", + "Generate speech with custom voice model using endpoint ID ", + "Convert text to OGG/Opus format audio file", + "Synthesize long text content to audio file with streaming", + "Create audio file from text in French language with appropriate voice" + ], "appconfig_account_list": [ "List all App Configuration stores in my subscription", "Show me the App Configuration stores in my subscription", diff --git a/eng/tools/ToolDescriptionEvaluator/results.md b/eng/tools/ToolDescriptionEvaluator/results.md index d4c06c36d..b41f6850c 100644 --- a/eng/tools/ToolDescriptionEvaluator/results.md +++ b/eng/tools/ToolDescriptionEvaluator/results.md @@ -1,46 +1,46 @@ # Tool Selection Analysis Setup -**Setup completed:** 2025-11-06 17:16:26 +**Setup completed:** 2025-11-10 11:23:50 **Tool count:** 179 -**Database setup time:** 32.4934401s +**Database setup time:** 2.2959325s --- # Tool Selection Analysis Results -**Analysis Date:** 2025-11-06 17:16:26 +**Analysis Date:** 2025-11-10 11:23:50 **Tool count:** 179 ## Table of Contents - [Test 1: foundry_agents_connect](#test-1) -- [Test 2: foundry_agents_evaluate](#test-2) -- [Test 3: foundry_agents_list](#test-3) -- [Test 4: foundry_agents_list](#test-4) -- [Test 5: foundry_agents_create](#test-5) -- [Test 6: foundry_agents_query-and-evaluate](#test-6) -- [Test 7: foundry_agents_get-sdk-sample](#test-7) -- [Test 8: foundry_threads_create](#test-8) -- [Test 9: foundry_threads_list](#test-9) -- [Test 10: foundry_threads_get-messages](#test-10) -- [Test 11: foundry_knowledge_index_list](#test-11) -- [Test 12: foundry_knowledge_index_list](#test-12) -- [Test 13: foundry_knowledge_index_schema](#test-13) -- [Test 14: foundry_knowledge_index_schema](#test-14) -- [Test 15: foundry_models_deploy](#test-15) -- [Test 16: foundry_models_deployments_list](#test-16) -- [Test 17: foundry_models_deployments_list](#test-17) -- [Test 18: foundry_models_list](#test-18) -- [Test 19: foundry_models_list](#test-19) -- [Test 20: foundry_openai_chat-completions-create](#test-20) -- [Test 21: foundry_openai_create-completion](#test-21) -- [Test 22: foundry_openai_embeddings-create](#test-22) -- [Test 23: foundry_openai_embeddings-create](#test-23) -- [Test 24: foundry_openai_models-list](#test-24) -- [Test 25: foundry_openai_models-list](#test-25) -- [Test 26: foundry_resource_get](#test-26) -- [Test 27: foundry_resource_get](#test-27) -- [Test 28: foundry_resource_get](#test-28) +- [Test 2: foundry_agents_create](#test-2) +- [Test 3: foundry_agents_evaluate](#test-3) +- [Test 4: foundry_agents_get-sdk-sample](#test-4) +- [Test 5: foundry_agents_list](#test-5) +- [Test 6: foundry_agents_list](#test-6) +- [Test 7: foundry_agents_query-and-evaluate](#test-7) +- [Test 8: foundry_knowledge_index_list](#test-8) +- [Test 9: foundry_knowledge_index_list](#test-9) +- [Test 10: foundry_knowledge_index_schema](#test-10) +- [Test 11: foundry_knowledge_index_schema](#test-11) +- [Test 12: foundry_models_deploy](#test-12) +- [Test 13: foundry_models_deployments_list](#test-13) +- [Test 14: foundry_models_deployments_list](#test-14) +- [Test 15: foundry_models_list](#test-15) +- [Test 16: foundry_models_list](#test-16) +- [Test 17: foundry_openai_chat-completions-create](#test-17) +- [Test 18: foundry_openai_create-completion](#test-18) +- [Test 19: foundry_openai_embeddings-create](#test-19) +- [Test 20: foundry_openai_embeddings-create](#test-20) +- [Test 21: foundry_openai_models-list](#test-21) +- [Test 22: foundry_openai_models-list](#test-22) +- [Test 23: foundry_resource_get](#test-23) +- [Test 24: foundry_resource_get](#test-24) +- [Test 25: foundry_resource_get](#test-25) +- [Test 26: foundry_threads_create](#test-26) +- [Test 27: foundry_threads_get-messages](#test-27) +- [Test 28: foundry_threads_list](#test-28) - [Test 29: search_knowledge_base_get](#test-29) - [Test 30: search_knowledge_base_get](#test-30) - [Test 31: search_knowledge_base_get](#test-31) @@ -78,405 +78,415 @@ - [Test 63: speech_stt_recognize](#test-63) - [Test 64: speech_stt_recognize](#test-64) - [Test 65: speech_stt_recognize](#test-65) -- [Test 66: appconfig_account_list](#test-66) -- [Test 67: appconfig_account_list](#test-67) -- [Test 68: appconfig_account_list](#test-68) -- [Test 69: appconfig_kv_delete](#test-69) -- [Test 70: appconfig_kv_get](#test-70) -- [Test 71: appconfig_kv_get](#test-71) -- [Test 72: appconfig_kv_get](#test-72) -- [Test 73: appconfig_kv_get](#test-73) -- [Test 74: appconfig_kv_lock_set](#test-74) -- [Test 75: appconfig_kv_lock_set](#test-75) -- [Test 76: appconfig_kv_set](#test-76) -- [Test 77: applens_resource_diagnose](#test-77) -- [Test 78: applens_resource_diagnose](#test-78) -- [Test 79: applens_resource_diagnose](#test-79) -- [Test 80: appservice_database_add](#test-80) -- [Test 81: appservice_database_add](#test-81) -- [Test 82: appservice_database_add](#test-82) -- [Test 83: appservice_database_add](#test-83) -- [Test 84: appservice_database_add](#test-84) -- [Test 85: appservice_database_add](#test-85) -- [Test 86: appservice_database_add](#test-86) -- [Test 87: appservice_database_add](#test-87) -- [Test 88: appservice_database_add](#test-88) -- [Test 89: appservice_database_add](#test-89) -- [Test 90: applicationinsights_recommendation_list](#test-90) -- [Test 91: applicationinsights_recommendation_list](#test-91) -- [Test 92: applicationinsights_recommendation_list](#test-92) -- [Test 93: applicationinsights_recommendation_list](#test-93) -- [Test 94: extension_cli_generate](#test-94) -- [Test 95: extension_cli_generate](#test-95) -- [Test 96: extension_cli_generate](#test-96) -- [Test 97: extension_cli_install](#test-97) -- [Test 98: extension_cli_install](#test-98) -- [Test 99: extension_cli_install](#test-99) -- [Test 100: acr_registry_list](#test-100) -- [Test 101: acr_registry_list](#test-101) -- [Test 102: acr_registry_list](#test-102) -- [Test 103: acr_registry_list](#test-103) -- [Test 104: acr_registry_list](#test-104) -- [Test 105: acr_registry_repository_list](#test-105) -- [Test 106: acr_registry_repository_list](#test-106) -- [Test 107: acr_registry_repository_list](#test-107) -- [Test 108: acr_registry_repository_list](#test-108) -- [Test 109: communication_email_send](#test-109) -- [Test 110: communication_email_send](#test-110) -- [Test 111: communication_email_send](#test-111) -- [Test 112: communication_email_send](#test-112) -- [Test 113: communication_email_send](#test-113) -- [Test 114: communication_email_send](#test-114) -- [Test 115: communication_email_send](#test-115) -- [Test 116: communication_email_send](#test-116) -- [Test 117: communication_sms_send](#test-117) -- [Test 118: communication_sms_send](#test-118) -- [Test 119: communication_sms_send](#test-119) -- [Test 120: communication_sms_send](#test-120) -- [Test 121: communication_sms_send](#test-121) -- [Test 122: communication_sms_send](#test-122) -- [Test 123: communication_sms_send](#test-123) -- [Test 124: communication_sms_send](#test-124) -- [Test 125: confidentialledger_entries_append](#test-125) -- [Test 126: confidentialledger_entries_append](#test-126) -- [Test 127: confidentialledger_entries_append](#test-127) -- [Test 128: confidentialledger_entries_append](#test-128) -- [Test 129: confidentialledger_entries_append](#test-129) -- [Test 130: confidentialledger_entries_get](#test-130) -- [Test 131: confidentialledger_entries_get](#test-131) -- [Test 132: cosmos_account_list](#test-132) -- [Test 133: cosmos_account_list](#test-133) -- [Test 134: cosmos_account_list](#test-134) -- [Test 135: cosmos_database_container_item_query](#test-135) -- [Test 136: cosmos_database_container_list](#test-136) -- [Test 137: cosmos_database_container_list](#test-137) -- [Test 138: cosmos_database_list](#test-138) -- [Test 139: cosmos_database_list](#test-139) -- [Test 140: kusto_cluster_get](#test-140) -- [Test 141: kusto_cluster_list](#test-141) -- [Test 142: kusto_cluster_list](#test-142) -- [Test 143: kusto_cluster_list](#test-143) -- [Test 144: kusto_database_list](#test-144) -- [Test 145: kusto_database_list](#test-145) -- [Test 146: kusto_query](#test-146) -- [Test 147: kusto_sample](#test-147) -- [Test 148: kusto_table_list](#test-148) -- [Test 149: kusto_table_list](#test-149) -- [Test 150: kusto_table_schema](#test-150) -- [Test 151: mysql_database_list](#test-151) -- [Test 152: mysql_database_list](#test-152) -- [Test 153: mysql_database_query](#test-153) -- [Test 154: mysql_server_config_get](#test-154) -- [Test 155: mysql_server_list](#test-155) -- [Test 156: mysql_server_list](#test-156) -- [Test 157: mysql_server_list](#test-157) -- [Test 158: mysql_server_param_get](#test-158) -- [Test 159: mysql_server_param_set](#test-159) -- [Test 160: mysql_table_list](#test-160) -- [Test 161: mysql_table_list](#test-161) -- [Test 162: mysql_table_schema_get](#test-162) -- [Test 163: postgres_database_list](#test-163) -- [Test 164: postgres_database_list](#test-164) -- [Test 165: postgres_database_query](#test-165) -- [Test 166: postgres_server_config_get](#test-166) -- [Test 167: postgres_server_list](#test-167) -- [Test 168: postgres_server_list](#test-168) -- [Test 169: postgres_server_list](#test-169) -- [Test 170: postgres_server_param_get](#test-170) -- [Test 171: postgres_server_param_set](#test-171) -- [Test 172: postgres_table_list](#test-172) -- [Test 173: postgres_table_list](#test-173) -- [Test 174: postgres_table_schema_get](#test-174) -- [Test 175: deploy_app_logs_get](#test-175) -- [Test 176: deploy_architecture_diagram_generate](#test-176) -- [Test 177: deploy_iac_rules_get](#test-177) -- [Test 178: deploy_pipeline_guidance_get](#test-178) -- [Test 179: deploy_plan_get](#test-179) -- [Test 180: eventgrid_events_publish](#test-180) -- [Test 181: eventgrid_events_publish](#test-181) -- [Test 182: eventgrid_events_publish](#test-182) -- [Test 183: eventgrid_topic_list](#test-183) -- [Test 184: eventgrid_topic_list](#test-184) -- [Test 185: eventgrid_topic_list](#test-185) -- [Test 186: eventgrid_topic_list](#test-186) -- [Test 187: eventgrid_subscription_list](#test-187) -- [Test 188: eventgrid_subscription_list](#test-188) -- [Test 189: eventgrid_subscription_list](#test-189) -- [Test 190: eventgrid_subscription_list](#test-190) -- [Test 191: eventgrid_subscription_list](#test-191) -- [Test 192: eventgrid_subscription_list](#test-192) -- [Test 193: eventgrid_subscription_list](#test-193) -- [Test 194: eventhubs_eventhub_consumergroup_delete](#test-194) -- [Test 195: eventhubs_eventhub_consumergroup_get](#test-195) -- [Test 196: eventhubs_eventhub_consumergroup_get](#test-196) -- [Test 197: eventhubs_eventhub_consumergroup_update](#test-197) -- [Test 198: eventhubs_eventhub_consumergroup_update](#test-198) -- [Test 199: eventhubs_eventhub_delete](#test-199) -- [Test 200: eventhubs_eventhub_get](#test-200) -- [Test 201: eventhubs_eventhub_get](#test-201) -- [Test 202: eventhubs_eventhub_update](#test-202) -- [Test 203: eventhubs_eventhub_update](#test-203) -- [Test 204: eventhubs_namespace_delete](#test-204) -- [Test 205: eventhubs_namespace_get](#test-205) -- [Test 206: eventhubs_namespace_get](#test-206) -- [Test 207: eventhubs_namespace_update](#test-207) -- [Test 208: eventhubs_namespace_update](#test-208) -- [Test 209: functionapp_get](#test-209) -- [Test 210: functionapp_get](#test-210) -- [Test 211: functionapp_get](#test-211) -- [Test 212: functionapp_get](#test-212) -- [Test 213: functionapp_get](#test-213) -- [Test 214: functionapp_get](#test-214) -- [Test 215: functionapp_get](#test-215) -- [Test 216: functionapp_get](#test-216) -- [Test 217: functionapp_get](#test-217) -- [Test 218: functionapp_get](#test-218) +- [Test 66: speech_tts_synthesize](#test-66) +- [Test 67: speech_tts_synthesize](#test-67) +- [Test 68: speech_tts_synthesize](#test-68) +- [Test 69: speech_tts_synthesize](#test-69) +- [Test 70: speech_tts_synthesize](#test-70) +- [Test 71: speech_tts_synthesize](#test-71) +- [Test 72: speech_tts_synthesize](#test-72) +- [Test 73: speech_tts_synthesize](#test-73) +- [Test 74: speech_tts_synthesize](#test-74) +- [Test 75: speech_tts_synthesize](#test-75) +- [Test 76: appconfig_account_list](#test-76) +- [Test 77: appconfig_account_list](#test-77) +- [Test 78: appconfig_account_list](#test-78) +- [Test 79: appconfig_kv_delete](#test-79) +- [Test 80: appconfig_kv_get](#test-80) +- [Test 81: appconfig_kv_get](#test-81) +- [Test 82: appconfig_kv_get](#test-82) +- [Test 83: appconfig_kv_get](#test-83) +- [Test 84: appconfig_kv_lock_set](#test-84) +- [Test 85: appconfig_kv_lock_set](#test-85) +- [Test 86: appconfig_kv_set](#test-86) +- [Test 87: applens_resource_diagnose](#test-87) +- [Test 88: applens_resource_diagnose](#test-88) +- [Test 89: applens_resource_diagnose](#test-89) +- [Test 90: appservice_database_add](#test-90) +- [Test 91: appservice_database_add](#test-91) +- [Test 92: appservice_database_add](#test-92) +- [Test 93: appservice_database_add](#test-93) +- [Test 94: appservice_database_add](#test-94) +- [Test 95: appservice_database_add](#test-95) +- [Test 96: appservice_database_add](#test-96) +- [Test 97: appservice_database_add](#test-97) +- [Test 98: appservice_database_add](#test-98) +- [Test 99: appservice_database_add](#test-99) +- [Test 100: applicationinsights_recommendation_list](#test-100) +- [Test 101: applicationinsights_recommendation_list](#test-101) +- [Test 102: applicationinsights_recommendation_list](#test-102) +- [Test 103: applicationinsights_recommendation_list](#test-103) +- [Test 104: extension_cli_generate](#test-104) +- [Test 105: extension_cli_generate](#test-105) +- [Test 106: extension_cli_generate](#test-106) +- [Test 107: extension_cli_install](#test-107) +- [Test 108: extension_cli_install](#test-108) +- [Test 109: extension_cli_install](#test-109) +- [Test 110: acr_registry_list](#test-110) +- [Test 111: acr_registry_list](#test-111) +- [Test 112: acr_registry_list](#test-112) +- [Test 113: acr_registry_list](#test-113) +- [Test 114: acr_registry_list](#test-114) +- [Test 115: acr_registry_repository_list](#test-115) +- [Test 116: acr_registry_repository_list](#test-116) +- [Test 117: acr_registry_repository_list](#test-117) +- [Test 118: acr_registry_repository_list](#test-118) +- [Test 119: communication_email_send](#test-119) +- [Test 120: communication_email_send](#test-120) +- [Test 121: communication_email_send](#test-121) +- [Test 122: communication_email_send](#test-122) +- [Test 123: communication_email_send](#test-123) +- [Test 124: communication_email_send](#test-124) +- [Test 125: communication_email_send](#test-125) +- [Test 126: communication_email_send](#test-126) +- [Test 127: communication_sms_send](#test-127) +- [Test 128: communication_sms_send](#test-128) +- [Test 129: communication_sms_send](#test-129) +- [Test 130: communication_sms_send](#test-130) +- [Test 131: communication_sms_send](#test-131) +- [Test 132: communication_sms_send](#test-132) +- [Test 133: communication_sms_send](#test-133) +- [Test 134: communication_sms_send](#test-134) +- [Test 135: confidentialledger_entries_append](#test-135) +- [Test 136: confidentialledger_entries_append](#test-136) +- [Test 137: confidentialledger_entries_append](#test-137) +- [Test 138: confidentialledger_entries_append](#test-138) +- [Test 139: confidentialledger_entries_append](#test-139) +- [Test 140: confidentialledger_entries_get](#test-140) +- [Test 141: confidentialledger_entries_get](#test-141) +- [Test 142: cosmos_account_list](#test-142) +- [Test 143: cosmos_account_list](#test-143) +- [Test 144: cosmos_account_list](#test-144) +- [Test 145: cosmos_database_container_item_query](#test-145) +- [Test 146: cosmos_database_container_list](#test-146) +- [Test 147: cosmos_database_container_list](#test-147) +- [Test 148: cosmos_database_list](#test-148) +- [Test 149: cosmos_database_list](#test-149) +- [Test 150: kusto_cluster_get](#test-150) +- [Test 151: kusto_cluster_list](#test-151) +- [Test 152: kusto_cluster_list](#test-152) +- [Test 153: kusto_cluster_list](#test-153) +- [Test 154: kusto_database_list](#test-154) +- [Test 155: kusto_database_list](#test-155) +- [Test 156: kusto_query](#test-156) +- [Test 157: kusto_sample](#test-157) +- [Test 158: kusto_table_list](#test-158) +- [Test 159: kusto_table_list](#test-159) +- [Test 160: kusto_table_schema](#test-160) +- [Test 161: mysql_database_list](#test-161) +- [Test 162: mysql_database_list](#test-162) +- [Test 163: mysql_database_query](#test-163) +- [Test 164: mysql_server_config_get](#test-164) +- [Test 165: mysql_server_list](#test-165) +- [Test 166: mysql_server_list](#test-166) +- [Test 167: mysql_server_list](#test-167) +- [Test 168: mysql_server_param_get](#test-168) +- [Test 169: mysql_server_param_set](#test-169) +- [Test 170: mysql_table_list](#test-170) +- [Test 171: mysql_table_list](#test-171) +- [Test 172: mysql_table_schema_get](#test-172) +- [Test 173: postgres_database_list](#test-173) +- [Test 174: postgres_database_list](#test-174) +- [Test 175: postgres_database_query](#test-175) +- [Test 176: postgres_server_config_get](#test-176) +- [Test 177: postgres_server_list](#test-177) +- [Test 178: postgres_server_list](#test-178) +- [Test 179: postgres_server_list](#test-179) +- [Test 180: postgres_server_param_get](#test-180) +- [Test 181: postgres_server_param_set](#test-181) +- [Test 182: postgres_table_list](#test-182) +- [Test 183: postgres_table_list](#test-183) +- [Test 184: postgres_table_schema_get](#test-184) +- [Test 185: deploy_app_logs_get](#test-185) +- [Test 186: deploy_architecture_diagram_generate](#test-186) +- [Test 187: deploy_iac_rules_get](#test-187) +- [Test 188: deploy_pipeline_guidance_get](#test-188) +- [Test 189: deploy_plan_get](#test-189) +- [Test 190: eventgrid_events_publish](#test-190) +- [Test 191: eventgrid_events_publish](#test-191) +- [Test 192: eventgrid_events_publish](#test-192) +- [Test 193: eventgrid_topic_list](#test-193) +- [Test 194: eventgrid_topic_list](#test-194) +- [Test 195: eventgrid_topic_list](#test-195) +- [Test 196: eventgrid_topic_list](#test-196) +- [Test 197: eventgrid_subscription_list](#test-197) +- [Test 198: eventgrid_subscription_list](#test-198) +- [Test 199: eventgrid_subscription_list](#test-199) +- [Test 200: eventgrid_subscription_list](#test-200) +- [Test 201: eventgrid_subscription_list](#test-201) +- [Test 202: eventgrid_subscription_list](#test-202) +- [Test 203: eventgrid_subscription_list](#test-203) +- [Test 204: eventhubs_eventhub_consumergroup_delete](#test-204) +- [Test 205: eventhubs_eventhub_consumergroup_get](#test-205) +- [Test 206: eventhubs_eventhub_consumergroup_get](#test-206) +- [Test 207: eventhubs_eventhub_consumergroup_update](#test-207) +- [Test 208: eventhubs_eventhub_consumergroup_update](#test-208) +- [Test 209: eventhubs_eventhub_delete](#test-209) +- [Test 210: eventhubs_eventhub_get](#test-210) +- [Test 211: eventhubs_eventhub_get](#test-211) +- [Test 212: eventhubs_eventhub_update](#test-212) +- [Test 213: eventhubs_eventhub_update](#test-213) +- [Test 214: eventhubs_namespace_delete](#test-214) +- [Test 215: eventhubs_namespace_get](#test-215) +- [Test 216: eventhubs_namespace_get](#test-216) +- [Test 217: eventhubs_namespace_update](#test-217) +- [Test 218: eventhubs_namespace_update](#test-218) - [Test 219: functionapp_get](#test-219) - [Test 220: functionapp_get](#test-220) -- [Test 221: keyvault_admin_settings_get](#test-221) -- [Test 222: keyvault_admin_settings_get](#test-222) -- [Test 223: keyvault_admin_settings_get](#test-223) -- [Test 224: keyvault_certificate_create](#test-224) -- [Test 225: keyvault_certificate_create](#test-225) -- [Test 226: keyvault_certificate_create](#test-226) -- [Test 227: keyvault_certificate_create](#test-227) -- [Test 228: keyvault_certificate_create](#test-228) -- [Test 229: keyvault_certificate_get](#test-229) -- [Test 230: keyvault_certificate_get](#test-230) -- [Test 231: keyvault_certificate_get](#test-231) -- [Test 232: keyvault_certificate_get](#test-232) -- [Test 233: keyvault_certificate_get](#test-233) -- [Test 234: keyvault_certificate_import](#test-234) -- [Test 235: keyvault_certificate_import](#test-235) -- [Test 236: keyvault_certificate_import](#test-236) -- [Test 237: keyvault_certificate_import](#test-237) -- [Test 238: keyvault_certificate_import](#test-238) -- [Test 239: keyvault_certificate_list](#test-239) -- [Test 240: keyvault_certificate_list](#test-240) -- [Test 241: keyvault_certificate_list](#test-241) -- [Test 242: keyvault_certificate_list](#test-242) -- [Test 243: keyvault_certificate_list](#test-243) -- [Test 244: keyvault_certificate_list](#test-244) -- [Test 245: keyvault_key_create](#test-245) -- [Test 246: keyvault_key_create](#test-246) -- [Test 247: keyvault_key_create](#test-247) -- [Test 248: keyvault_key_create](#test-248) -- [Test 249: keyvault_key_create](#test-249) -- [Test 250: keyvault_key_get](#test-250) -- [Test 251: keyvault_key_get](#test-251) -- [Test 252: keyvault_key_get](#test-252) -- [Test 253: keyvault_key_get](#test-253) -- [Test 254: keyvault_key_get](#test-254) -- [Test 255: keyvault_key_list](#test-255) -- [Test 256: keyvault_key_list](#test-256) -- [Test 257: keyvault_key_list](#test-257) -- [Test 258: keyvault_key_list](#test-258) -- [Test 259: keyvault_key_list](#test-259) -- [Test 260: keyvault_key_list](#test-260) -- [Test 261: keyvault_secret_create](#test-261) -- [Test 262: keyvault_secret_create](#test-262) -- [Test 263: keyvault_secret_create](#test-263) -- [Test 264: keyvault_secret_create](#test-264) -- [Test 265: keyvault_secret_create](#test-265) -- [Test 266: keyvault_secret_get](#test-266) -- [Test 267: keyvault_secret_get](#test-267) -- [Test 268: keyvault_secret_get](#test-268) -- [Test 269: keyvault_secret_get](#test-269) -- [Test 270: keyvault_secret_get](#test-270) -- [Test 271: keyvault_secret_list](#test-271) -- [Test 272: keyvault_secret_list](#test-272) -- [Test 273: keyvault_secret_list](#test-273) -- [Test 274: keyvault_secret_list](#test-274) -- [Test 275: keyvault_secret_list](#test-275) -- [Test 276: keyvault_secret_list](#test-276) -- [Test 277: aks_cluster_get](#test-277) -- [Test 278: aks_cluster_get](#test-278) -- [Test 279: aks_cluster_get](#test-279) -- [Test 280: aks_cluster_get](#test-280) -- [Test 281: aks_cluster_get](#test-281) -- [Test 282: aks_cluster_get](#test-282) -- [Test 283: aks_cluster_get](#test-283) -- [Test 284: aks_nodepool_get](#test-284) -- [Test 285: aks_nodepool_get](#test-285) -- [Test 286: aks_nodepool_get](#test-286) -- [Test 287: aks_nodepool_get](#test-287) -- [Test 288: aks_nodepool_get](#test-288) -- [Test 289: aks_nodepool_get](#test-289) -- [Test 290: loadtesting_test_create](#test-290) -- [Test 291: loadtesting_test_get](#test-291) -- [Test 292: loadtesting_testresource_create](#test-292) -- [Test 293: loadtesting_testresource_list](#test-293) -- [Test 294: loadtesting_testrun_create](#test-294) -- [Test 295: loadtesting_testrun_get](#test-295) -- [Test 296: loadtesting_testrun_list](#test-296) -- [Test 297: loadtesting_testrun_update](#test-297) -- [Test 298: grafana_list](#test-298) -- [Test 299: managedlustre_fs_create](#test-299) -- [Test 300: managedlustre_fs_list](#test-300) -- [Test 301: managedlustre_fs_list](#test-301) -- [Test 302: managedlustre_fs_sku_get](#test-302) -- [Test 303: managedlustre_fs_subnetsize_ask](#test-303) -- [Test 304: managedlustre_fs_subnetsize_validate](#test-304) -- [Test 305: managedlustre_fs_update](#test-305) -- [Test 306: marketplace_product_get](#test-306) -- [Test 307: marketplace_product_list](#test-307) -- [Test 308: marketplace_product_list](#test-308) -- [Test 309: azureaibestpractices_get](#test-309) -- [Test 310: azureaibestpractices_get](#test-310) -- [Test 311: azureaibestpractices_get](#test-311) -- [Test 312: azureaibestpractices_get](#test-312) -- [Test 313: azureaibestpractices_get](#test-313) -- [Test 314: get_bestpractices_get](#test-314) -- [Test 315: get_bestpractices_get](#test-315) -- [Test 316: get_bestpractices_get](#test-316) -- [Test 317: get_bestpractices_get](#test-317) -- [Test 318: get_bestpractices_get](#test-318) -- [Test 319: get_bestpractices_get](#test-319) -- [Test 320: get_bestpractices_get](#test-320) -- [Test 321: get_bestpractices_get](#test-321) -- [Test 322: get_bestpractices_get](#test-322) -- [Test 323: monitor_activitylog_list](#test-323) -- [Test 324: monitor_healthmodels_entity_get](#test-324) -- [Test 325: monitor_metrics_definitions](#test-325) -- [Test 326: monitor_metrics_definitions](#test-326) -- [Test 327: monitor_metrics_definitions](#test-327) -- [Test 328: monitor_metrics_query](#test-328) -- [Test 329: monitor_metrics_query](#test-329) -- [Test 330: monitor_metrics_query](#test-330) -- [Test 331: monitor_metrics_query](#test-331) -- [Test 332: monitor_metrics_query](#test-332) -- [Test 333: monitor_metrics_query](#test-333) -- [Test 334: monitor_resource_log_query](#test-334) -- [Test 335: monitor_table_list](#test-335) -- [Test 336: monitor_table_list](#test-336) -- [Test 337: monitor_table_type_list](#test-337) -- [Test 338: monitor_table_type_list](#test-338) -- [Test 339: monitor_webtests_create](#test-339) -- [Test 340: monitor_webtests_get](#test-340) -- [Test 341: monitor_webtests_list](#test-341) -- [Test 342: monitor_webtests_list](#test-342) -- [Test 343: monitor_webtests_update](#test-343) -- [Test 344: monitor_workspace_list](#test-344) -- [Test 345: monitor_workspace_list](#test-345) -- [Test 346: monitor_workspace_list](#test-346) -- [Test 347: monitor_workspace_log_query](#test-347) -- [Test 348: datadog_monitoredresources_list](#test-348) -- [Test 349: datadog_monitoredresources_list](#test-349) -- [Test 350: extension_azqr](#test-350) -- [Test 351: extension_azqr](#test-351) -- [Test 352: extension_azqr](#test-352) -- [Test 353: quota_region_availability_list](#test-353) -- [Test 354: quota_usage_check](#test-354) -- [Test 355: role_assignment_list](#test-355) -- [Test 356: role_assignment_list](#test-356) -- [Test 357: redis_list](#test-357) -- [Test 358: redis_list](#test-358) -- [Test 359: redis_list](#test-359) -- [Test 360: redis_list](#test-360) -- [Test 361: redis_list](#test-361) -- [Test 362: group_list](#test-362) -- [Test 363: group_list](#test-363) -- [Test 364: group_list](#test-364) -- [Test 365: resourcehealth_availability-status_get](#test-365) -- [Test 366: resourcehealth_availability-status_get](#test-366) -- [Test 367: resourcehealth_availability-status_get](#test-367) -- [Test 368: resourcehealth_availability-status_list](#test-368) -- [Test 369: resourcehealth_availability-status_list](#test-369) -- [Test 370: resourcehealth_availability-status_list](#test-370) -- [Test 371: resourcehealth_health-events_list](#test-371) -- [Test 372: resourcehealth_health-events_list](#test-372) -- [Test 373: resourcehealth_health-events_list](#test-373) -- [Test 374: resourcehealth_health-events_list](#test-374) -- [Test 375: resourcehealth_health-events_list](#test-375) -- [Test 376: servicebus_queue_details](#test-376) -- [Test 377: servicebus_topic_details](#test-377) -- [Test 378: servicebus_topic_subscription_details](#test-378) -- [Test 379: signalr_runtime_get](#test-379) -- [Test 380: signalr_runtime_get](#test-380) -- [Test 381: signalr_runtime_get](#test-381) -- [Test 382: signalr_runtime_get](#test-382) -- [Test 383: signalr_runtime_get](#test-383) -- [Test 384: signalr_runtime_get](#test-384) -- [Test 385: sql_db_create](#test-385) -- [Test 386: sql_db_create](#test-386) -- [Test 387: sql_db_create](#test-387) -- [Test 388: sql_db_delete](#test-388) -- [Test 389: sql_db_delete](#test-389) -- [Test 390: sql_db_delete](#test-390) -- [Test 391: sql_db_list](#test-391) -- [Test 392: sql_db_list](#test-392) -- [Test 393: sql_db_rename](#test-393) -- [Test 394: sql_db_rename](#test-394) -- [Test 395: sql_db_show](#test-395) -- [Test 396: sql_db_show](#test-396) -- [Test 397: sql_db_update](#test-397) -- [Test 398: sql_db_update](#test-398) -- [Test 399: sql_elastic-pool_list](#test-399) -- [Test 400: sql_elastic-pool_list](#test-400) -- [Test 401: sql_elastic-pool_list](#test-401) -- [Test 402: sql_server_create](#test-402) -- [Test 403: sql_server_create](#test-403) -- [Test 404: sql_server_create](#test-404) -- [Test 405: sql_server_delete](#test-405) -- [Test 406: sql_server_delete](#test-406) -- [Test 407: sql_server_delete](#test-407) -- [Test 408: sql_server_entra-admin_list](#test-408) -- [Test 409: sql_server_entra-admin_list](#test-409) -- [Test 410: sql_server_entra-admin_list](#test-410) -- [Test 411: sql_server_firewall-rule_create](#test-411) -- [Test 412: sql_server_firewall-rule_create](#test-412) -- [Test 413: sql_server_firewall-rule_create](#test-413) -- [Test 414: sql_server_firewall-rule_delete](#test-414) -- [Test 415: sql_server_firewall-rule_delete](#test-415) -- [Test 416: sql_server_firewall-rule_delete](#test-416) -- [Test 417: sql_server_firewall-rule_list](#test-417) -- [Test 418: sql_server_firewall-rule_list](#test-418) -- [Test 419: sql_server_firewall-rule_list](#test-419) -- [Test 420: sql_server_list](#test-420) -- [Test 421: sql_server_list](#test-421) -- [Test 422: sql_server_show](#test-422) -- [Test 423: sql_server_show](#test-423) -- [Test 424: sql_server_show](#test-424) -- [Test 425: storage_account_create](#test-425) -- [Test 426: storage_account_create](#test-426) -- [Test 427: storage_account_create](#test-427) -- [Test 428: storage_account_get](#test-428) -- [Test 429: storage_account_get](#test-429) -- [Test 430: storage_account_get](#test-430) -- [Test 431: storage_account_get](#test-431) -- [Test 432: storage_account_get](#test-432) -- [Test 433: storage_blob_container_create](#test-433) -- [Test 434: storage_blob_container_create](#test-434) -- [Test 435: storage_blob_container_create](#test-435) -- [Test 436: storage_blob_container_get](#test-436) -- [Test 437: storage_blob_container_get](#test-437) -- [Test 438: storage_blob_container_get](#test-438) -- [Test 439: storage_blob_get](#test-439) -- [Test 440: storage_blob_get](#test-440) -- [Test 441: storage_blob_get](#test-441) -- [Test 442: storage_blob_get](#test-442) -- [Test 443: storage_blob_upload](#test-443) -- [Test 444: subscription_list](#test-444) -- [Test 445: subscription_list](#test-445) -- [Test 446: subscription_list](#test-446) -- [Test 447: subscription_list](#test-447) -- [Test 448: azureterraformbestpractices_get](#test-448) -- [Test 449: azureterraformbestpractices_get](#test-449) -- [Test 450: virtualdesktop_hostpool_list](#test-450) -- [Test 451: virtualdesktop_hostpool_host_list](#test-451) -- [Test 452: virtualdesktop_hostpool_host_user-list](#test-452) -- [Test 453: workbooks_create](#test-453) -- [Test 454: workbooks_delete](#test-454) -- [Test 455: workbooks_list](#test-455) -- [Test 456: workbooks_list](#test-456) -- [Test 457: workbooks_show](#test-457) -- [Test 458: workbooks_show](#test-458) -- [Test 459: workbooks_update](#test-459) -- [Test 460: bicepschema_get](#test-460) -- [Test 461: cloudarchitect_design](#test-461) -- [Test 462: cloudarchitect_design](#test-462) -- [Test 463: cloudarchitect_design](#test-463) -- [Test 464: cloudarchitect_design](#test-464) +- [Test 221: functionapp_get](#test-221) +- [Test 222: functionapp_get](#test-222) +- [Test 223: functionapp_get](#test-223) +- [Test 224: functionapp_get](#test-224) +- [Test 225: functionapp_get](#test-225) +- [Test 226: functionapp_get](#test-226) +- [Test 227: functionapp_get](#test-227) +- [Test 228: functionapp_get](#test-228) +- [Test 229: functionapp_get](#test-229) +- [Test 230: functionapp_get](#test-230) +- [Test 231: keyvault_admin_settings_get](#test-231) +- [Test 232: keyvault_admin_settings_get](#test-232) +- [Test 233: keyvault_admin_settings_get](#test-233) +- [Test 234: keyvault_certificate_create](#test-234) +- [Test 235: keyvault_certificate_create](#test-235) +- [Test 236: keyvault_certificate_create](#test-236) +- [Test 237: keyvault_certificate_create](#test-237) +- [Test 238: keyvault_certificate_create](#test-238) +- [Test 239: keyvault_certificate_get](#test-239) +- [Test 240: keyvault_certificate_get](#test-240) +- [Test 241: keyvault_certificate_get](#test-241) +- [Test 242: keyvault_certificate_get](#test-242) +- [Test 243: keyvault_certificate_get](#test-243) +- [Test 244: keyvault_certificate_import](#test-244) +- [Test 245: keyvault_certificate_import](#test-245) +- [Test 246: keyvault_certificate_import](#test-246) +- [Test 247: keyvault_certificate_import](#test-247) +- [Test 248: keyvault_certificate_import](#test-248) +- [Test 249: keyvault_certificate_list](#test-249) +- [Test 250: keyvault_certificate_list](#test-250) +- [Test 251: keyvault_certificate_list](#test-251) +- [Test 252: keyvault_certificate_list](#test-252) +- [Test 253: keyvault_certificate_list](#test-253) +- [Test 254: keyvault_certificate_list](#test-254) +- [Test 255: keyvault_key_create](#test-255) +- [Test 256: keyvault_key_create](#test-256) +- [Test 257: keyvault_key_create](#test-257) +- [Test 258: keyvault_key_create](#test-258) +- [Test 259: keyvault_key_create](#test-259) +- [Test 260: keyvault_key_get](#test-260) +- [Test 261: keyvault_key_get](#test-261) +- [Test 262: keyvault_key_get](#test-262) +- [Test 263: keyvault_key_get](#test-263) +- [Test 264: keyvault_key_get](#test-264) +- [Test 265: keyvault_key_list](#test-265) +- [Test 266: keyvault_key_list](#test-266) +- [Test 267: keyvault_key_list](#test-267) +- [Test 268: keyvault_key_list](#test-268) +- [Test 269: keyvault_key_list](#test-269) +- [Test 270: keyvault_key_list](#test-270) +- [Test 271: keyvault_secret_create](#test-271) +- [Test 272: keyvault_secret_create](#test-272) +- [Test 273: keyvault_secret_create](#test-273) +- [Test 274: keyvault_secret_create](#test-274) +- [Test 275: keyvault_secret_create](#test-275) +- [Test 276: keyvault_secret_get](#test-276) +- [Test 277: keyvault_secret_get](#test-277) +- [Test 278: keyvault_secret_get](#test-278) +- [Test 279: keyvault_secret_get](#test-279) +- [Test 280: keyvault_secret_get](#test-280) +- [Test 281: keyvault_secret_list](#test-281) +- [Test 282: keyvault_secret_list](#test-282) +- [Test 283: keyvault_secret_list](#test-283) +- [Test 284: keyvault_secret_list](#test-284) +- [Test 285: keyvault_secret_list](#test-285) +- [Test 286: keyvault_secret_list](#test-286) +- [Test 287: aks_cluster_get](#test-287) +- [Test 288: aks_cluster_get](#test-288) +- [Test 289: aks_cluster_get](#test-289) +- [Test 290: aks_cluster_get](#test-290) +- [Test 291: aks_cluster_get](#test-291) +- [Test 292: aks_cluster_get](#test-292) +- [Test 293: aks_cluster_get](#test-293) +- [Test 294: aks_nodepool_get](#test-294) +- [Test 295: aks_nodepool_get](#test-295) +- [Test 296: aks_nodepool_get](#test-296) +- [Test 297: aks_nodepool_get](#test-297) +- [Test 298: aks_nodepool_get](#test-298) +- [Test 299: aks_nodepool_get](#test-299) +- [Test 300: loadtesting_test_create](#test-300) +- [Test 301: loadtesting_test_get](#test-301) +- [Test 302: loadtesting_testresource_create](#test-302) +- [Test 303: loadtesting_testresource_list](#test-303) +- [Test 304: loadtesting_testrun_create](#test-304) +- [Test 305: loadtesting_testrun_get](#test-305) +- [Test 306: loadtesting_testrun_list](#test-306) +- [Test 307: loadtesting_testrun_update](#test-307) +- [Test 308: grafana_list](#test-308) +- [Test 309: managedlustre_fs_create](#test-309) +- [Test 310: managedlustre_fs_list](#test-310) +- [Test 311: managedlustre_fs_list](#test-311) +- [Test 312: managedlustre_fs_sku_get](#test-312) +- [Test 313: managedlustre_fs_subnetsize_ask](#test-313) +- [Test 314: managedlustre_fs_subnetsize_validate](#test-314) +- [Test 315: managedlustre_fs_update](#test-315) +- [Test 316: marketplace_product_get](#test-316) +- [Test 317: marketplace_product_list](#test-317) +- [Test 318: marketplace_product_list](#test-318) +- [Test 319: azureaibestpractices_get](#test-319) +- [Test 320: azureaibestpractices_get](#test-320) +- [Test 321: azureaibestpractices_get](#test-321) +- [Test 322: azureaibestpractices_get](#test-322) +- [Test 323: azureaibestpractices_get](#test-323) +- [Test 324: get_bestpractices_get](#test-324) +- [Test 325: get_bestpractices_get](#test-325) +- [Test 326: get_bestpractices_get](#test-326) +- [Test 327: get_bestpractices_get](#test-327) +- [Test 328: get_bestpractices_get](#test-328) +- [Test 329: get_bestpractices_get](#test-329) +- [Test 330: get_bestpractices_get](#test-330) +- [Test 331: get_bestpractices_get](#test-331) +- [Test 332: get_bestpractices_get](#test-332) +- [Test 333: monitor_activitylog_list](#test-333) +- [Test 334: monitor_healthmodels_entity_get](#test-334) +- [Test 335: monitor_metrics_definitions](#test-335) +- [Test 336: monitor_metrics_definitions](#test-336) +- [Test 337: monitor_metrics_definitions](#test-337) +- [Test 338: monitor_metrics_query](#test-338) +- [Test 339: monitor_metrics_query](#test-339) +- [Test 340: monitor_metrics_query](#test-340) +- [Test 341: monitor_metrics_query](#test-341) +- [Test 342: monitor_metrics_query](#test-342) +- [Test 343: monitor_metrics_query](#test-343) +- [Test 344: monitor_resource_log_query](#test-344) +- [Test 345: monitor_table_list](#test-345) +- [Test 346: monitor_table_list](#test-346) +- [Test 347: monitor_table_type_list](#test-347) +- [Test 348: monitor_table_type_list](#test-348) +- [Test 349: monitor_webtests_create](#test-349) +- [Test 350: monitor_webtests_get](#test-350) +- [Test 351: monitor_webtests_list](#test-351) +- [Test 352: monitor_webtests_list](#test-352) +- [Test 353: monitor_webtests_update](#test-353) +- [Test 354: monitor_workspace_list](#test-354) +- [Test 355: monitor_workspace_list](#test-355) +- [Test 356: monitor_workspace_list](#test-356) +- [Test 357: monitor_workspace_log_query](#test-357) +- [Test 358: datadog_monitoredresources_list](#test-358) +- [Test 359: datadog_monitoredresources_list](#test-359) +- [Test 360: extension_azqr](#test-360) +- [Test 361: extension_azqr](#test-361) +- [Test 362: extension_azqr](#test-362) +- [Test 363: quota_region_availability_list](#test-363) +- [Test 364: quota_usage_check](#test-364) +- [Test 365: role_assignment_list](#test-365) +- [Test 366: role_assignment_list](#test-366) +- [Test 367: redis_list](#test-367) +- [Test 368: redis_list](#test-368) +- [Test 369: redis_list](#test-369) +- [Test 370: redis_list](#test-370) +- [Test 371: redis_list](#test-371) +- [Test 372: group_list](#test-372) +- [Test 373: group_list](#test-373) +- [Test 374: group_list](#test-374) +- [Test 375: resourcehealth_availability-status_get](#test-375) +- [Test 376: resourcehealth_availability-status_get](#test-376) +- [Test 377: resourcehealth_availability-status_get](#test-377) +- [Test 378: resourcehealth_availability-status_list](#test-378) +- [Test 379: resourcehealth_availability-status_list](#test-379) +- [Test 380: resourcehealth_availability-status_list](#test-380) +- [Test 381: resourcehealth_health-events_list](#test-381) +- [Test 382: resourcehealth_health-events_list](#test-382) +- [Test 383: resourcehealth_health-events_list](#test-383) +- [Test 384: resourcehealth_health-events_list](#test-384) +- [Test 385: resourcehealth_health-events_list](#test-385) +- [Test 386: servicebus_queue_details](#test-386) +- [Test 387: servicebus_topic_details](#test-387) +- [Test 388: servicebus_topic_subscription_details](#test-388) +- [Test 389: signalr_runtime_get](#test-389) +- [Test 390: signalr_runtime_get](#test-390) +- [Test 391: signalr_runtime_get](#test-391) +- [Test 392: signalr_runtime_get](#test-392) +- [Test 393: signalr_runtime_get](#test-393) +- [Test 394: signalr_runtime_get](#test-394) +- [Test 395: sql_db_create](#test-395) +- [Test 396: sql_db_create](#test-396) +- [Test 397: sql_db_create](#test-397) +- [Test 398: sql_db_delete](#test-398) +- [Test 399: sql_db_delete](#test-399) +- [Test 400: sql_db_delete](#test-400) +- [Test 401: sql_db_list](#test-401) +- [Test 402: sql_db_list](#test-402) +- [Test 403: sql_db_rename](#test-403) +- [Test 404: sql_db_rename](#test-404) +- [Test 405: sql_db_show](#test-405) +- [Test 406: sql_db_show](#test-406) +- [Test 407: sql_db_update](#test-407) +- [Test 408: sql_db_update](#test-408) +- [Test 409: sql_elastic-pool_list](#test-409) +- [Test 410: sql_elastic-pool_list](#test-410) +- [Test 411: sql_elastic-pool_list](#test-411) +- [Test 412: sql_server_create](#test-412) +- [Test 413: sql_server_create](#test-413) +- [Test 414: sql_server_create](#test-414) +- [Test 415: sql_server_delete](#test-415) +- [Test 416: sql_server_delete](#test-416) +- [Test 417: sql_server_delete](#test-417) +- [Test 418: sql_server_entra-admin_list](#test-418) +- [Test 419: sql_server_entra-admin_list](#test-419) +- [Test 420: sql_server_entra-admin_list](#test-420) +- [Test 421: sql_server_firewall-rule_create](#test-421) +- [Test 422: sql_server_firewall-rule_create](#test-422) +- [Test 423: sql_server_firewall-rule_create](#test-423) +- [Test 424: sql_server_firewall-rule_delete](#test-424) +- [Test 425: sql_server_firewall-rule_delete](#test-425) +- [Test 426: sql_server_firewall-rule_delete](#test-426) +- [Test 427: sql_server_firewall-rule_list](#test-427) +- [Test 428: sql_server_firewall-rule_list](#test-428) +- [Test 429: sql_server_firewall-rule_list](#test-429) +- [Test 430: sql_server_list](#test-430) +- [Test 431: sql_server_list](#test-431) +- [Test 432: sql_server_show](#test-432) +- [Test 433: sql_server_show](#test-433) +- [Test 434: sql_server_show](#test-434) +- [Test 435: storage_account_create](#test-435) +- [Test 436: storage_account_create](#test-436) +- [Test 437: storage_account_create](#test-437) +- [Test 438: storage_account_get](#test-438) +- [Test 439: storage_account_get](#test-439) +- [Test 440: storage_account_get](#test-440) +- [Test 441: storage_account_get](#test-441) +- [Test 442: storage_account_get](#test-442) +- [Test 443: storage_blob_container_create](#test-443) +- [Test 444: storage_blob_container_create](#test-444) +- [Test 445: storage_blob_container_create](#test-445) +- [Test 446: storage_blob_container_get](#test-446) +- [Test 447: storage_blob_container_get](#test-447) +- [Test 448: storage_blob_container_get](#test-448) +- [Test 449: storage_blob_get](#test-449) +- [Test 450: storage_blob_get](#test-450) +- [Test 451: storage_blob_get](#test-451) +- [Test 452: storage_blob_get](#test-452) +- [Test 453: storage_blob_upload](#test-453) +- [Test 454: subscription_list](#test-454) +- [Test 455: subscription_list](#test-455) +- [Test 456: subscription_list](#test-456) +- [Test 457: subscription_list](#test-457) +- [Test 458: azureterraformbestpractices_get](#test-458) +- [Test 459: azureterraformbestpractices_get](#test-459) +- [Test 460: virtualdesktop_hostpool_list](#test-460) +- [Test 461: virtualdesktop_hostpool_host_list](#test-461) +- [Test 462: virtualdesktop_hostpool_host_user-list](#test-462) +- [Test 463: workbooks_create](#test-463) +- [Test 464: workbooks_delete](#test-464) +- [Test 465: workbooks_list](#test-465) +- [Test 466: workbooks_list](#test-466) +- [Test 467: workbooks_show](#test-467) +- [Test 468: workbooks_show](#test-468) +- [Test 469: workbooks_update](#test-469) +- [Test 470: bicepschema_get](#test-470) +- [Test 471: cloudarchitect_design](#test-471) +- [Test 472: cloudarchitect_design](#test-472) +- [Test 473: cloudarchitect_design](#test-473) +- [Test 474: cloudarchitect_design](#test-474) --- @@ -490,66 +500,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.705410 | `foundry_agents_connect` | ✅ **EXPECTED** | -| 2 | 0.663468 | `foundry_agents_list` | ❌ | +| 2 | 0.663568 | `foundry_agents_list` | ❌ | | 3 | 0.617213 | `foundry_resource_get` | ❌ | -| 4 | 0.548044 | `foundry_openai_models-list` | ❌ | -| 5 | 0.547459 | `foundry_agents_get-sdk-sample` | ❌ | +| 4 | 0.548108 | `foundry_agents_get-sdk-sample` | ❌ | +| 5 | 0.548044 | `foundry_openai_models-list` | ❌ | --- ## Test 2 -**Expected Tool:** `foundry_agents_evaluate` -**Prompt:** Evaluate the full query and response I got from my agent for task_adherence - -### Results - -| Rank | Score | Tool | Status | -|------|-------|------|--------| -| 1 | 0.543045 | `foundry_agents_query-and-evaluate` | ❌ | -| 2 | 0.469272 | `foundry_agents_evaluate` | ✅ **EXPECTED** | -| 3 | 0.445585 | `foundry_agents_connect` | ❌ | -| 4 | 0.298494 | `foundry_threads_list` | ❌ | -| 5 | 0.279058 | `foundry_agents_list` | ❌ | - ---- - -## Test 3 - -**Expected Tool:** `foundry_agents_list` -**Prompt:** List all agents in my Azure AI Foundry resource - -### Results - -| Rank | Score | Tool | Status | -|------|-------|------|--------| -| 1 | 0.797701 | `foundry_agents_list` | ✅ **EXPECTED** | -| 2 | 0.666021 | `foundry_resource_get` | ❌ | -| 3 | 0.654206 | `foundry_openai_models-list` | ❌ | -| 4 | 0.647246 | `foundry_threads_list` | ❌ | -| 5 | 0.575761 | `foundry_models_deployments_list` | ❌ | - ---- - -## Test 4 - -**Expected Tool:** `foundry_agents_list` -**Prompt:** Show me the available agents in my Azure AI Foundry resource - -### Results - -| Rank | Score | Tool | Status | -|------|-------|------|--------| -| 1 | 0.749704 | `foundry_agents_list` | ✅ **EXPECTED** | -| 2 | 0.630323 | `foundry_resource_get` | ❌ | -| 3 | 0.611801 | `foundry_openai_models-list` | ❌ | -| 4 | 0.603708 | `foundry_threads_list` | ❌ | -| 5 | 0.556580 | `foundry_agents_get-sdk-sample` | ❌ | - ---- - -## Test 5 - **Expected Tool:** `foundry_agents_create` **Prompt:** Create a new Azure AI Foundry agent using instructions in the active editor @@ -557,32 +516,32 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.587064 | `foundry_agents_create` | ✅ **EXPECTED** | -| 2 | 0.561567 | `foundry_agents_get-sdk-sample` | ❌ | -| 3 | 0.554070 | `foundry_threads_create` | ❌ | +| 1 | 0.586996 | `foundry_agents_create` | ✅ **EXPECTED** | +| 2 | 0.562087 | `foundry_agents_get-sdk-sample` | ❌ | +| 3 | 0.554009 | `foundry_threads_create` | ❌ | | 4 | 0.525727 | `foundry_models_deploy` | ❌ | -| 5 | 0.525461 | `foundry_agents_list` | ❌ | +| 5 | 0.525615 | `foundry_agents_list` | ❌ | --- -## Test 6 +## Test 3 -**Expected Tool:** `foundry_agents_query-and-evaluate` -**Prompt:** Query and evaluate an agent in my Azure AI Foundry resource for task_adherence +**Expected Tool:** `foundry_agents_evaluate` +**Prompt:** Evaluate the full query and response I got from my agent for task_adherence ### Results | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.652200 | `foundry_agents_connect` | ❌ | -| 2 | 0.570725 | `foundry_agents_list` | ❌ | -| 3 | 0.553233 | `foundry_agents_query-and-evaluate` | ✅ **EXPECTED** | -| 4 | 0.493778 | `foundry_agents_evaluate` | ❌ | -| 5 | 0.469431 | `foundry_threads_list` | ❌ | +| 1 | 0.544099 | `foundry_agents_query-and-evaluate` | ❌ | +| 2 | 0.469428 | `foundry_agents_evaluate` | ✅ **EXPECTED** | +| 3 | 0.445964 | `foundry_agents_connect` | ❌ | +| 4 | 0.297986 | `foundry_threads_list` | ❌ | +| 5 | 0.278921 | `foundry_agents_list` | ❌ | --- -## Test 7 +## Test 4 **Expected Tool:** `foundry_agents_get-sdk-sample` **Prompt:** Create a CLI app that can talk to an Azure AI Foundry Agent using Python SDK @@ -591,66 +550,66 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.595581 | `foundry_agents_get-sdk-sample` | ✅ **EXPECTED** | -| 2 | 0.552197 | `foundry_threads_create` | ❌ | +| 1 | 0.595766 | `foundry_agents_get-sdk-sample` | ✅ **EXPECTED** | +| 2 | 0.552180 | `foundry_threads_create` | ❌ | | 3 | 0.521920 | `foundry_agents_connect` | ❌ | -| 4 | 0.518552 | `foundry_agents_create` | ❌ | -| 5 | 0.509581 | `foundry_agents_list` | ❌ | +| 4 | 0.518652 | `foundry_agents_create` | ❌ | +| 5 | 0.509764 | `foundry_agents_list` | ❌ | --- -## Test 8 +## Test 5 -**Expected Tool:** `foundry_threads_create` -**Prompt:** Create an Azure AI Foundry thread to hold the conversation +**Expected Tool:** `foundry_agents_list` +**Prompt:** List all agents in my Azure AI Foundry resource ### Results | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.606811 | `foundry_threads_create` | ✅ **EXPECTED** | -| 2 | 0.528310 | `foundry_openai_chat-completions-create` | ❌ | -| 3 | 0.519709 | `foundry_threads_get-messages` | ❌ | -| 4 | 0.506089 | `foundry_threads_list` | ❌ | -| 5 | 0.490796 | `foundry_models_deploy` | ❌ | +| 1 | 0.797877 | `foundry_agents_list` | ✅ **EXPECTED** | +| 2 | 0.666021 | `foundry_resource_get` | ❌ | +| 3 | 0.654206 | `foundry_openai_models-list` | ❌ | +| 4 | 0.647246 | `foundry_threads_list` | ❌ | +| 5 | 0.575553 | `foundry_models_deployments_list` | ❌ | --- -## Test 9 +## Test 6 -**Expected Tool:** `foundry_threads_list` -**Prompt:** List my AI Foundry threads +**Expected Tool:** `foundry_agents_list` +**Prompt:** Show me the available agents in my Azure AI Foundry resource ### Results | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.677249 | `foundry_threads_list` | ✅ **EXPECTED** | -| 2 | 0.574068 | `foundry_threads_get-messages` | ❌ | -| 3 | 0.566999 | `foundry_threads_create` | ❌ | -| 4 | 0.471737 | `foundry_agents_get-sdk-sample` | ❌ | -| 5 | 0.448682 | `foundry_agents_list` | ❌ | +| 1 | 0.749829 | `foundry_agents_list` | ✅ **EXPECTED** | +| 2 | 0.630288 | `foundry_resource_get` | ❌ | +| 3 | 0.611722 | `foundry_openai_models-list` | ❌ | +| 4 | 0.603689 | `foundry_threads_list` | ❌ | +| 5 | 0.556990 | `foundry_agents_get-sdk-sample` | ❌ | --- -## Test 10 +## Test 7 -**Expected Tool:** `foundry_threads_get-messages` -**Prompt:** Show me the messages in the AI Foundry thread with id +**Expected Tool:** `foundry_agents_query-and-evaluate` +**Prompt:** Query and evaluate an agent in my Azure AI Foundry resource for task_adherence ### Results | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.669937 | `foundry_threads_get-messages` | ✅ **EXPECTED** | -| 2 | 0.584431 | `foundry_threads_create` | ❌ | -| 3 | 0.529381 | `foundry_threads_list` | ❌ | -| 4 | 0.437894 | `foundry_agents_get-sdk-sample` | ❌ | -| 5 | 0.427894 | `foundry_agents_create` | ❌ | +| 1 | 0.652200 | `foundry_agents_connect` | ❌ | +| 2 | 0.570788 | `foundry_agents_list` | ❌ | +| 3 | 0.553190 | `foundry_agents_query-and-evaluate` | ✅ **EXPECTED** | +| 4 | 0.493779 | `foundry_agents_evaluate` | ❌ | +| 5 | 0.469431 | `foundry_threads_list` | ❌ | --- -## Test 11 +## Test 8 **Expected Tool:** `foundry_knowledge_index_list` **Prompt:** List all knowledge indexes in my AI Foundry project @@ -660,14 +619,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.703772 | `foundry_knowledge_index_list` | ✅ **EXPECTED** | -| 2 | 0.537540 | `foundry_agents_list` | ❌ | +| 2 | 0.537700 | `foundry_agents_list` | ❌ | | 3 | 0.526528 | `foundry_knowledge_index_schema` | ❌ | | 4 | 0.500786 | `foundry_threads_list` | ❌ | -| 5 | 0.475746 | `foundry_models_deployments_list` | ❌ | +| 5 | 0.475802 | `foundry_models_deployments_list` | ❌ | --- -## Test 12 +## Test 9 **Expected Tool:** `foundry_knowledge_index_list` **Prompt:** Show me the knowledge indexes in my AI Foundry project @@ -678,13 +637,13 @@ |------|-------|------|--------| | 1 | 0.615458 | `foundry_knowledge_index_list` | ✅ **EXPECTED** | | 2 | 0.489311 | `foundry_knowledge_index_schema` | ❌ | -| 3 | 0.484329 | `foundry_agents_list` | ❌ | +| 3 | 0.484466 | `foundry_agents_list` | ❌ | | 4 | 0.454174 | `foundry_threads_list` | ❌ | | 5 | 0.441521 | `foundry_resource_get` | ❌ | --- -## Test 13 +## Test 10 **Expected Tool:** `foundry_knowledge_index_schema` **Prompt:** Show me the schema for knowledge index in my Azure AI Foundry resource @@ -701,7 +660,7 @@ --- -## Test 14 +## Test 11 **Expected Tool:** `foundry_knowledge_index_schema` **Prompt:** Get the schema configuration for knowledge index @@ -710,15 +669,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.650203 | `foundry_knowledge_index_schema` | ✅ **EXPECTED** | -| 2 | 0.432792 | `postgres_table_schema_get` | ❌ | -| 3 | 0.417496 | `kusto_table_schema` | ❌ | -| 4 | 0.398322 | `mysql_table_schema_get` | ❌ | -| 5 | 0.396119 | `foundry_knowledge_index_list` | ❌ | +| 1 | 0.650269 | `foundry_knowledge_index_schema` | ✅ **EXPECTED** | +| 2 | 0.432759 | `postgres_table_schema_get` | ❌ | +| 3 | 0.417421 | `kusto_table_schema` | ❌ | +| 4 | 0.398186 | `mysql_table_schema_get` | ❌ | +| 5 | 0.396194 | `foundry_knowledge_index_list` | ❌ | --- -## Test 15 +## Test 12 **Expected Tool:** `foundry_models_deploy` **Prompt:** Deploy a GPT4o instance on my resource @@ -731,11 +690,11 @@ | 2 | 0.299986 | `foundry_openai_models-list` | ❌ | | 3 | 0.298490 | `loadtesting_testrun_create` | ❌ | | 4 | 0.293050 | `loadtesting_testresource_create` | ❌ | -| 5 | 0.290387 | `foundry_openai_embeddings-create` | ❌ | +| 5 | 0.290381 | `foundry_openai_embeddings-create` | ❌ | --- -## Test 16 +## Test 13 **Expected Tool:** `foundry_models_deployments_list` **Prompt:** List all AI Foundry model deployments @@ -744,15 +703,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.681081 | `foundry_models_deployments_list` | ✅ **EXPECTED** | +| 1 | 0.681385 | `foundry_models_deployments_list` | ✅ **EXPECTED** | | 2 | 0.674510 | `foundry_openai_models-list` | ❌ | | 3 | 0.572625 | `foundry_threads_list` | ❌ | -| 4 | 0.568871 | `foundry_agents_list` | ❌ | +| 4 | 0.569059 | `foundry_agents_list` | ❌ | | 5 | 0.566272 | `foundry_resource_get` | ❌ | --- -## Test 17 +## Test 14 **Expected Tool:** `foundry_models_deployments_list` **Prompt:** Show me all AI Foundry model deployments @@ -761,15 +720,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.619840 | `foundry_models_deployments_list` | ✅ **EXPECTED** | -| 2 | 0.619299 | `foundry_openai_models-list` | ❌ | -| 3 | 0.543385 | `foundry_resource_get` | ❌ | -| 4 | 0.540528 | `foundry_agents_list` | ❌ | -| 5 | 0.527141 | `foundry_threads_list` | ❌ | +| 1 | 0.620173 | `foundry_models_deployments_list` | ✅ **EXPECTED** | +| 2 | 0.619231 | `foundry_openai_models-list` | ❌ | +| 3 | 0.543352 | `foundry_resource_get` | ❌ | +| 4 | 0.540551 | `foundry_agents_list` | ❌ | +| 5 | 0.527121 | `foundry_threads_list` | ❌ | --- -## Test 18 +## Test 15 **Expected Tool:** `foundry_models_list` **Prompt:** List all AI Foundry models @@ -781,12 +740,12 @@ | 1 | 0.603415 | `foundry_openai_models-list` | ❌ | | 2 | 0.560022 | `foundry_models_list` | ✅ **EXPECTED** | | 3 | 0.553634 | `foundry_threads_list` | ❌ | -| 4 | 0.537958 | `foundry_models_deployments_list` | ❌ | -| 5 | 0.519191 | `foundry_agents_list` | ❌ | +| 4 | 0.537981 | `foundry_models_deployments_list` | ❌ | +| 5 | 0.519472 | `foundry_agents_list` | ❌ | --- -## Test 19 +## Test 16 **Expected Tool:** `foundry_models_list` **Prompt:** Show me the available AI Foundry models @@ -798,12 +757,12 @@ | 1 | 0.576904 | `foundry_openai_models-list` | ❌ | | 2 | 0.574818 | `foundry_models_list` | ✅ **EXPECTED** | | 3 | 0.525312 | `foundry_resource_get` | ❌ | -| 4 | 0.522153 | `foundry_agents_get-sdk-sample` | ❌ | -| 5 | 0.517825 | `foundry_models_deployments_list` | ❌ | +| 4 | 0.521474 | `foundry_agents_get-sdk-sample` | ❌ | +| 5 | 0.517980 | `foundry_models_deployments_list` | ❌ | --- -## Test 20 +## Test 17 **Expected Tool:** `foundry_openai_chat-completions-create` **Prompt:** Create a chat completion with the message "Hello, how are you today?" using my Azure AI Foundry resource @@ -814,13 +773,13 @@ |------|-------|------|--------| | 1 | 0.641293 | `foundry_openai_chat-completions-create` | ✅ **EXPECTED** | | 2 | 0.546736 | `foundry_openai_create-completion` | ❌ | -| 3 | 0.420018 | `foundry_threads_create` | ❌ | -| 4 | 0.415482 | `foundry_agents_connect` | ❌ | -| 5 | 0.399382 | `foundry_openai_embeddings-create` | ❌ | +| 3 | 0.419991 | `foundry_threads_create` | ❌ | +| 4 | 0.415483 | `foundry_agents_connect` | ❌ | +| 5 | 0.399383 | `foundry_openai_embeddings-create` | ❌ | --- -## Test 21 +## Test 18 **Expected Tool:** `foundry_openai_create-completion` **Prompt:** Create a completion with the prompt "What is Azure?" using my Azure AI Foundry resource @@ -831,13 +790,13 @@ |------|-------|------|--------| | 1 | 0.696936 | `foundry_openai_create-completion` | ✅ **EXPECTED** | | 2 | 0.579108 | `foundry_openai_chat-completions-create` | ❌ | -| 3 | 0.465558 | `azureaibestpractices_get` | ❌ | -| 4 | 0.463703 | `foundry_models_deploy` | ❌ | -| 5 | 0.459126 | `foundry_resource_get` | ❌ | +| 3 | 0.463703 | `foundry_models_deploy` | ❌ | +| 4 | 0.459126 | `foundry_resource_get` | ❌ | +| 5 | 0.458622 | `foundry_openai_embeddings-create` | ❌ | --- -## Test 22 +## Test 19 **Expected Tool:** `foundry_openai_embeddings-create` **Prompt:** Generate embeddings for the text "Azure OpenAI Service" using my Azure AI Foundry resource @@ -846,15 +805,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.766496 | `foundry_openai_embeddings-create` | ✅ **EXPECTED** | -| 2 | 0.543339 | `foundry_models_deploy` | ❌ | -| 3 | 0.542214 | `foundry_openai_create-completion` | ❌ | -| 4 | 0.520746 | `foundry_openai_models-list` | ❌ | -| 5 | 0.519335 | `foundry_resource_get` | ❌ | +| 1 | 0.766123 | `foundry_openai_embeddings-create` | ✅ **EXPECTED** | +| 2 | 0.542818 | `foundry_models_deploy` | ❌ | +| 3 | 0.542113 | `foundry_openai_create-completion` | ❌ | +| 4 | 0.520274 | `foundry_openai_models-list` | ❌ | +| 5 | 0.518834 | `foundry_resource_get` | ❌ | --- -## Test 23 +## Test 20 **Expected Tool:** `foundry_openai_embeddings-create` **Prompt:** Create vector embeddings for my text using my Azure AI Foundry resource @@ -863,15 +822,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.724369 | `foundry_openai_embeddings-create` | ✅ **EXPECTED** | -| 2 | 0.494544 | `foundry_resource_get` | ❌ | -| 3 | 0.480389 | `foundry_models_deploy` | ❌ | -| 4 | 0.480294 | `foundry_openai_create-completion` | ❌ | -| 5 | 0.463885 | `foundry_openai_chat-completions-create` | ❌ | +| 1 | 0.724120 | `foundry_openai_embeddings-create` | ✅ **EXPECTED** | +| 2 | 0.494485 | `foundry_resource_get` | ❌ | +| 3 | 0.480296 | `foundry_models_deploy` | ❌ | +| 4 | 0.480218 | `foundry_openai_create-completion` | ❌ | +| 5 | 0.463797 | `foundry_openai_chat-completions-create` | ❌ | --- -## Test 24 +## Test 21 **Expected Tool:** `foundry_openai_models-list` **Prompt:** List all available OpenAI models in my Azure AI Foundry resource @@ -882,13 +841,13 @@ |------|-------|------|--------| | 1 | 0.799059 | `foundry_openai_models-list` | ✅ **EXPECTED** | | 2 | 0.668887 | `foundry_resource_get` | ❌ | -| 3 | 0.667041 | `foundry_models_list` | ❌ | -| 4 | 0.666560 | `foundry_models_deployments_list` | ❌ | -| 5 | 0.657393 | `foundry_agents_list` | ❌ | +| 3 | 0.667040 | `foundry_models_list` | ❌ | +| 4 | 0.666207 | `foundry_models_deployments_list` | ❌ | +| 5 | 0.657546 | `foundry_agents_list` | ❌ | --- -## Test 25 +## Test 22 **Expected Tool:** `foundry_openai_models-list` **Prompt:** Show me the OpenAI model deployments in my Azure AI Foundry resource @@ -898,14 +857,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.741659 | `foundry_openai_models-list` | ✅ **EXPECTED** | -| 2 | 0.660115 | `foundry_models_deployments_list` | ❌ | -| 3 | 0.648218 | `foundry_resource_get` | ❌ | +| 2 | 0.660160 | `foundry_models_deployments_list` | ❌ | +| 3 | 0.648219 | `foundry_resource_get` | ❌ | | 4 | 0.640650 | `foundry_models_deploy` | ❌ | -| 5 | 0.619790 | `foundry_agents_list` | ❌ | +| 5 | 0.619878 | `foundry_agents_list` | ❌ | --- -## Test 26 +## Test 23 **Expected Tool:** `foundry_resource_get` **Prompt:** List all AI Foundry resources in my subscription @@ -916,13 +875,13 @@ |------|-------|------|--------| | 1 | 0.594096 | `foundry_resource_get` | ✅ **EXPECTED** | | 2 | 0.571916 | `foundry_openai_models-list` | ❌ | -| 3 | 0.566762 | `foundry_agents_list` | ❌ | -| 4 | 0.558075 | `foundry_threads_list` | ❌ | -| 5 | 0.556154 | `search_service_list` | ❌ | +| 3 | 0.567019 | `foundry_agents_list` | ❌ | +| 4 | 0.558290 | `search_service_list` | ❌ | +| 5 | 0.558076 | `foundry_threads_list` | ❌ | --- -## Test 27 +## Test 24 **Expected Tool:** `foundry_resource_get` **Prompt:** Show me the AI Foundry resources in resource group @@ -933,13 +892,13 @@ |------|-------|------|--------| | 1 | 0.665311 | `foundry_resource_get` | ✅ **EXPECTED** | | 2 | 0.585305 | `foundry_openai_models-list` | ❌ | -| 3 | 0.553808 | `foundry_agents_list` | ❌ | -| 4 | 0.518747 | `foundry_openai_embeddings-create` | ❌ | +| 3 | 0.553993 | `foundry_agents_list` | ❌ | +| 4 | 0.518767 | `foundry_openai_embeddings-create` | ❌ | | 5 | 0.492911 | `foundry_models_deploy` | ❌ | --- -## Test 28 +## Test 25 **Expected Tool:** `foundry_resource_get` **Prompt:** Get details for AI Foundry resource in resource group @@ -951,8 +910,59 @@ | 1 | 0.735316 | `foundry_resource_get` | ✅ **EXPECTED** | | 2 | 0.571906 | `foundry_openai_models-list` | ❌ | | 3 | 0.509484 | `monitor_webtests_get` | ❌ | -| 4 | 0.496980 | `foundry_openai_embeddings-create` | ❌ | -| 5 | 0.475498 | `foundry_agents_list` | ❌ | +| 4 | 0.497090 | `foundry_openai_embeddings-create` | ❌ | +| 5 | 0.475722 | `foundry_agents_list` | ❌ | + +--- + +## Test 26 + +**Expected Tool:** `foundry_threads_create` +**Prompt:** Create an Azure AI Foundry thread to hold the conversation + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.606760 | `foundry_threads_create` | ✅ **EXPECTED** | +| 2 | 0.528310 | `foundry_openai_chat-completions-create` | ❌ | +| 3 | 0.519794 | `foundry_threads_get-messages` | ❌ | +| 4 | 0.506089 | `foundry_threads_list` | ❌ | +| 5 | 0.490796 | `foundry_models_deploy` | ❌ | + +--- + +## Test 27 + +**Expected Tool:** `foundry_threads_get-messages` +**Prompt:** Show me the messages in the AI Foundry thread with id + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.669946 | `foundry_threads_get-messages` | ✅ **EXPECTED** | +| 2 | 0.584411 | `foundry_threads_create` | ❌ | +| 3 | 0.529381 | `foundry_threads_list` | ❌ | +| 4 | 0.437480 | `foundry_agents_get-sdk-sample` | ❌ | +| 5 | 0.427594 | `foundry_agents_create` | ❌ | + +--- + +## Test 28 + +**Expected Tool:** `foundry_threads_list` +**Prompt:** List my AI Foundry threads + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.677248 | `foundry_threads_list` | ✅ **EXPECTED** | +| 2 | 0.574065 | `foundry_threads_get-messages` | ❌ | +| 3 | 0.566910 | `foundry_threads_create` | ❌ | +| 4 | 0.471544 | `foundry_agents_get-sdk-sample` | ❌ | +| 5 | 0.448963 | `foundry_agents_list` | ❌ | --- @@ -967,9 +977,9 @@ |------|-------|------|--------| | 1 | 0.785967 | `search_knowledge_base_get` | ✅ **EXPECTED** | | 2 | 0.700824 | `search_knowledge_source_get` | ❌ | -| 3 | 0.692681 | `search_service_list` | ❌ | +| 3 | 0.693471 | `search_service_list` | ❌ | | 4 | 0.635863 | `search_knowledge_base_retrieve` | ❌ | -| 5 | 0.586575 | `search_index_get` | ❌ | +| 5 | 0.586574 | `search_index_get` | ❌ | --- @@ -985,7 +995,7 @@ | 1 | 0.748213 | `search_knowledge_base_get` | ✅ **EXPECTED** | | 2 | 0.668487 | `search_knowledge_source_get` | ❌ | | 3 | 0.628582 | `search_knowledge_base_retrieve` | ❌ | -| 4 | 0.623715 | `search_service_list` | ❌ | +| 4 | 0.624479 | `search_service_list` | ❌ | | 5 | 0.566618 | `search_index_get` | ❌ | --- @@ -1002,8 +1012,8 @@ | 1 | 0.702942 | `search_knowledge_base_get` | ✅ **EXPECTED** | | 2 | 0.605964 | `search_knowledge_source_get` | ❌ | | 3 | 0.583234 | `search_knowledge_base_retrieve` | ❌ | -| 4 | 0.512825 | `search_service_list` | ❌ | -| 5 | 0.476815 | `foundry_knowledge_index_list` | ❌ | +| 4 | 0.513638 | `search_service_list` | ❌ | +| 5 | 0.476816 | `foundry_knowledge_index_list` | ❌ | --- @@ -1016,11 +1026,11 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.688155 | `search_knowledge_base_get` | ✅ **EXPECTED** | -| 2 | 0.599348 | `search_knowledge_source_get` | ❌ | -| 3 | 0.578437 | `search_knowledge_base_retrieve` | ❌ | -| 4 | 0.456512 | `search_service_list` | ❌ | -| 5 | 0.439493 | `foundry_knowledge_index_list` | ❌ | +| 1 | 0.688051 | `search_knowledge_base_get` | ✅ **EXPECTED** | +| 2 | 0.599247 | `search_knowledge_source_get` | ❌ | +| 3 | 0.578499 | `search_knowledge_base_retrieve` | ❌ | +| 4 | 0.457619 | `search_service_list` | ❌ | +| 5 | 0.439529 | `foundry_knowledge_index_list` | ❌ | --- @@ -1033,11 +1043,11 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.769383 | `search_knowledge_base_get` | ✅ **EXPECTED** | +| 1 | 0.769384 | `search_knowledge_base_get` | ✅ **EXPECTED** | | 2 | 0.685640 | `search_knowledge_source_get` | ❌ | | 3 | 0.636958 | `search_knowledge_base_retrieve` | ❌ | | 4 | 0.585949 | `search_index_get` | ❌ | -| 5 | 0.533298 | `search_service_list` | ❌ | +| 5 | 0.533700 | `search_service_list` | ❌ | --- @@ -1053,7 +1063,7 @@ | 1 | 0.595585 | `search_knowledge_base_get` | ✅ **EXPECTED** | | 2 | 0.551922 | `search_knowledge_base_retrieve` | ❌ | | 3 | 0.515480 | `search_knowledge_source_get` | ❌ | -| 4 | 0.366170 | `search_service_list` | ❌ | +| 4 | 0.366893 | `search_service_list` | ❌ | | 5 | 0.365633 | `search_index_get` | ❌ | --- @@ -1067,11 +1077,11 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.724869 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | -| 2 | 0.650606 | `search_knowledge_base_get` | ❌ | -| 3 | 0.575356 | `search_index_query` | ❌ | -| 4 | 0.567386 | `search_knowledge_source_get` | ❌ | -| 5 | 0.520336 | `foundry_agents_connect` | ❌ | +| 1 | 0.724846 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | +| 2 | 0.650590 | `search_knowledge_base_get` | ❌ | +| 3 | 0.575307 | `search_index_query` | ❌ | +| 4 | 0.567361 | `search_knowledge_source_get` | ❌ | +| 5 | 0.520360 | `foundry_agents_connect` | ❌ | --- @@ -1084,11 +1094,11 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.633877 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | -| 2 | 0.589927 | `search_knowledge_base_get` | ❌ | -| 3 | 0.502173 | `search_knowledge_source_get` | ❌ | -| 4 | 0.422676 | `foundry_agents_query-and-evaluate` | ❌ | -| 5 | 0.399110 | `search_index_query` | ❌ | +| 1 | 0.633766 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | +| 2 | 0.589869 | `search_knowledge_base_get` | ❌ | +| 3 | 0.502085 | `search_knowledge_source_get` | ❌ | +| 4 | 0.422671 | `foundry_agents_query-and-evaluate` | ❌ | +| 5 | 0.399595 | `search_index_query` | ❌ | --- @@ -1101,10 +1111,10 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.657866 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | +| 1 | 0.657865 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | | 2 | 0.557206 | `search_knowledge_base_get` | ❌ | | 3 | 0.463605 | `search_knowledge_source_get` | ❌ | -| 4 | 0.436719 | `foundry_agents_query-and-evaluate` | ❌ | +| 4 | 0.436739 | `foundry_agents_query-and-evaluate` | ❌ | | 5 | 0.422173 | `foundry_agents_connect` | ❌ | --- @@ -1118,11 +1128,11 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.633766 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | -| 2 | 0.589869 | `search_knowledge_base_get` | ❌ | -| 3 | 0.502085 | `search_knowledge_source_get` | ❌ | -| 4 | 0.422610 | `foundry_agents_query-and-evaluate` | ❌ | -| 5 | 0.399521 | `search_index_query` | ❌ | +| 1 | 0.633677 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | +| 2 | 0.589633 | `search_knowledge_base_get` | ❌ | +| 3 | 0.501914 | `search_knowledge_source_get` | ❌ | +| 4 | 0.422494 | `foundry_agents_query-and-evaluate` | ❌ | +| 5 | 0.399138 | `search_index_query` | ❌ | --- @@ -1137,7 +1147,7 @@ |------|-------|------|--------| | 1 | 0.598868 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | | 2 | 0.547862 | `search_knowledge_base_get` | ❌ | -| 3 | 0.467868 | `foundry_agents_query-and-evaluate` | ❌ | +| 3 | 0.467907 | `foundry_agents_query-and-evaluate` | ❌ | | 4 | 0.464904 | `search_knowledge_source_get` | ❌ | | 5 | 0.412481 | `foundry_agents_connect` | ❌ | @@ -1152,11 +1162,11 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.649767 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | -| 2 | 0.631435 | `search_knowledge_base_get` | ❌ | -| 3 | 0.581359 | `search_index_query` | ❌ | -| 4 | 0.571156 | `search_knowledge_source_get` | ❌ | -| 5 | 0.544545 | `search_service_list` | ❌ | +| 1 | 0.649090 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | +| 2 | 0.630905 | `search_knowledge_base_get` | ❌ | +| 3 | 0.581015 | `search_index_query` | ❌ | +| 4 | 0.570684 | `search_knowledge_source_get` | ❌ | +| 5 | 0.544428 | `search_service_list` | ❌ | --- @@ -1171,8 +1181,8 @@ |------|-------|------|--------| | 1 | 0.579716 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | | 2 | 0.560688 | `search_knowledge_base_get` | ❌ | -| 3 | 0.477941 | `search_knowledge_source_get` | ❌ | -| 4 | 0.402530 | `foundry_agents_query-and-evaluate` | ❌ | +| 3 | 0.477942 | `search_knowledge_source_get` | ❌ | +| 4 | 0.402582 | `foundry_agents_query-and-evaluate` | ❌ | | 5 | 0.361231 | `foundry_knowledge_index_list` | ❌ | --- @@ -1189,7 +1199,7 @@ | 1 | 0.582662 | `search_knowledge_base_retrieve` | ✅ **EXPECTED** | | 2 | 0.528610 | `search_knowledge_base_get` | ❌ | | 3 | 0.449336 | `search_knowledge_source_get` | ❌ | -| 4 | 0.447690 | `foundry_agents_query-and-evaluate` | ❌ | +| 4 | 0.447780 | `foundry_agents_query-and-evaluate` | ❌ | | 5 | 0.397187 | `foundry_agents_connect` | ❌ | --- @@ -1203,11 +1213,11 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.760406 | `search_knowledge_source_get` | ✅ **EXPECTED** | -| 2 | 0.690845 | `search_service_list` | ❌ | -| 3 | 0.665905 | `search_knowledge_base_get` | ❌ | -| 4 | 0.573014 | `search_index_get` | ❌ | -| 5 | 0.560755 | `search_knowledge_base_retrieve` | ❌ | +| 1 | 0.760416 | `search_knowledge_source_get` | ✅ **EXPECTED** | +| 2 | 0.691931 | `search_service_list` | ❌ | +| 3 | 0.665923 | `search_knowledge_base_get` | ❌ | +| 4 | 0.573012 | `search_index_get` | ❌ | +| 5 | 0.560779 | `search_knowledge_base_retrieve` | ❌ | --- @@ -1221,10 +1231,10 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.737860 | `search_knowledge_source_get` | ✅ **EXPECTED** | -| 2 | 0.659236 | `search_service_list` | ❌ | +| 2 | 0.660170 | `search_service_list` | ❌ | | 3 | 0.652969 | `search_knowledge_base_get` | ❌ | -| 4 | 0.578836 | `search_index_get` | ❌ | -| 5 | 0.560519 | `search_index_query` | ❌ | +| 4 | 0.578835 | `search_index_get` | ❌ | +| 5 | 0.560564 | `search_index_query` | ❌ | --- @@ -1237,9 +1247,9 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.657936 | `search_knowledge_source_get` | ✅ **EXPECTED** | +| 1 | 0.657935 | `search_knowledge_source_get` | ✅ **EXPECTED** | | 2 | 0.558516 | `search_knowledge_base_get` | ❌ | -| 3 | 0.510338 | `search_service_list` | ❌ | +| 3 | 0.511469 | `search_service_list` | ❌ | | 4 | 0.470560 | `search_knowledge_base_retrieve` | ❌ | | 5 | 0.433657 | `foundry_knowledge_index_list` | ❌ | @@ -1256,7 +1266,7 @@ |------|-------|------|--------| | 1 | 0.652945 | `search_knowledge_source_get` | ✅ **EXPECTED** | | 2 | 0.563270 | `search_knowledge_base_get` | ❌ | -| 3 | 0.485934 | `search_service_list` | ❌ | +| 3 | 0.487022 | `search_service_list` | ❌ | | 4 | 0.477636 | `search_knowledge_base_retrieve` | ❌ | | 5 | 0.430518 | `search_index_get` | ❌ | @@ -1272,10 +1282,10 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.825604 | `search_knowledge_source_get` | ✅ **EXPECTED** | -| 2 | 0.693438 | `search_knowledge_base_get` | ❌ | +| 2 | 0.693437 | `search_knowledge_base_get` | ❌ | | 3 | 0.595643 | `search_index_get` | ❌ | | 4 | 0.540550 | `search_knowledge_base_retrieve` | ❌ | -| 5 | 0.531085 | `search_service_list` | ❌ | +| 5 | 0.531247 | `search_service_list` | ❌ | --- @@ -1292,7 +1302,7 @@ | 2 | 0.523643 | `search_knowledge_base_get` | ❌ | | 3 | 0.459923 | `search_knowledge_base_retrieve` | ❌ | | 4 | 0.371465 | `search_index_get` | ❌ | -| 5 | 0.370585 | `search_service_list` | ❌ | +| 5 | 0.370838 | `search_service_list` | ❌ | --- @@ -1309,7 +1319,7 @@ | 2 | 0.544557 | `foundry_knowledge_index_schema` | ❌ | | 3 | 0.528153 | `search_knowledge_base_get` | ❌ | | 4 | 0.521765 | `search_knowledge_source_get` | ❌ | -| 5 | 0.490553 | `search_service_list` | ❌ | +| 5 | 0.490624 | `search_service_list` | ❌ | --- @@ -1323,7 +1333,7 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.640256 | `search_index_get` | ✅ **EXPECTED** | -| 2 | 0.619949 | `search_service_list` | ❌ | +| 2 | 0.620140 | `search_service_list` | ❌ | | 3 | 0.538885 | `foundry_knowledge_index_list` | ❌ | | 4 | 0.511485 | `search_knowledge_base_get` | ❌ | | 5 | 0.496094 | `search_knowledge_source_get` | ❌ | @@ -1340,7 +1350,7 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.620759 | `search_index_get` | ✅ **EXPECTED** | -| 2 | 0.562503 | `search_service_list` | ❌ | +| 2 | 0.562775 | `search_service_list` | ❌ | | 3 | 0.538471 | `foundry_knowledge_index_list` | ❌ | | 4 | 0.500365 | `search_knowledge_base_get` | ❌ | | 5 | 0.490025 | `search_knowledge_source_get` | ❌ | @@ -1356,11 +1366,11 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.522598 | `search_index_get` | ❌ | -| 2 | 0.515911 | `search_index_query` | ✅ **EXPECTED** | -| 3 | 0.498264 | `search_service_list` | ❌ | -| 4 | 0.447868 | `search_knowledge_base_retrieve` | ❌ | -| 5 | 0.437608 | `postgres_database_query` | ❌ | +| 1 | 0.522826 | `search_index_get` | ❌ | +| 2 | 0.515870 | `search_index_query` | ✅ **EXPECTED** | +| 3 | 0.497467 | `search_service_list` | ❌ | +| 4 | 0.447977 | `search_knowledge_base_retrieve` | ❌ | +| 5 | 0.437665 | `postgres_database_query` | ❌ | --- @@ -1373,9 +1383,9 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.791803 | `search_service_list` | ✅ **EXPECTED** | -| 2 | 0.553012 | `kusto_cluster_list` | ❌ | -| 3 | 0.509479 | `subscription_list` | ❌ | +| 1 | 0.793651 | `search_service_list` | ✅ **EXPECTED** | +| 2 | 0.553011 | `kusto_cluster_list` | ❌ | +| 3 | 0.509461 | `subscription_list` | ❌ | | 4 | 0.505971 | `search_index_get` | ❌ | | 5 | 0.504693 | `marketplace_product_list` | ❌ | @@ -1390,10 +1400,10 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.684837 | `search_service_list` | ✅ **EXPECTED** | +| 1 | 0.686140 | `search_service_list` | ✅ **EXPECTED** | | 2 | 0.484092 | `marketplace_product_list` | ❌ | | 3 | 0.479898 | `search_index_get` | ❌ | -| 4 | 0.462337 | `search_knowledge_base_get` | ❌ | +| 4 | 0.462336 | `search_knowledge_base_get` | ❌ | | 5 | 0.461786 | `kusto_cluster_list` | ❌ | --- @@ -1407,11 +1417,11 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.551241 | `search_service_list` | ✅ **EXPECTED** | +| 1 | 0.553025 | `search_service_list` | ✅ **EXPECTED** | | 2 | 0.436230 | `search_index_get` | ❌ | | 3 | 0.415277 | `search_knowledge_base_get` | ❌ | | 4 | 0.410461 | `search_knowledge_source_get` | ❌ | -| 5 | 0.404707 | `search_index_query` | ❌ | +| 5 | 0.404758 | `search_index_query` | ❌ | --- @@ -1424,11 +1434,11 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.666038 | `speech_stt_recognize` | ✅ **EXPECTED** | -| 2 | 0.377210 | `foundry_openai_embeddings-create` | ❌ | -| 3 | 0.351127 | `deploy_plan_get` | ❌ | -| 4 | 0.338137 | `extension_cli_generate` | ❌ | -| 5 | 0.337763 | `deploy_pipeline_guidance_get` | ❌ | +| 1 | 0.682065 | `speech_tts_synthesize` | ❌ | +| 2 | 0.666038 | `speech_stt_recognize` | ✅ **EXPECTED** | +| 3 | 0.377022 | `foundry_openai_embeddings-create` | ❌ | +| 4 | 0.351127 | `deploy_plan_get` | ❌ | +| 5 | 0.338137 | `extension_cli_generate` | ❌ | --- @@ -1442,10 +1452,10 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.511324 | `speech_stt_recognize` | ✅ **EXPECTED** | -| 2 | 0.198123 | `foundry_agents_get-sdk-sample` | ❌ | -| 3 | 0.192462 | `foundry_openai_embeddings-create` | ❌ | -| 4 | 0.170157 | `foundry_openai_create-completion` | ❌ | -| 5 | 0.167159 | `foundry_openai_chat-completions-create` | ❌ | +| 2 | 0.344404 | `speech_tts_synthesize` | ❌ | +| 3 | 0.197854 | `foundry_agents_get-sdk-sample` | ❌ | +| 4 | 0.192450 | `foundry_openai_embeddings-create` | ❌ | +| 5 | 0.170157 | `foundry_openai_create-completion` | ❌ | --- @@ -1459,10 +1469,10 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.486489 | `speech_stt_recognize` | ✅ **EXPECTED** | -| 2 | 0.162863 | `foundry_threads_create` | ❌ | -| 3 | 0.160209 | `foundry_agents_connect` | ❌ | -| 4 | 0.156936 | `deploy_pipeline_guidance_get` | ❌ | -| 5 | 0.154737 | `foundry_openai_create-completion` | ❌ | +| 2 | 0.335115 | `speech_tts_synthesize` | ❌ | +| 3 | 0.162870 | `foundry_threads_create` | ❌ | +| 4 | 0.160209 | `foundry_agents_connect` | ❌ | +| 5 | 0.156850 | `deploy_pipeline_guidance_get` | ❌ | --- @@ -1475,11 +1485,11 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.612032 | `speech_stt_recognize` | ✅ **EXPECTED** | -| 2 | 0.309860 | `foundry_openai_embeddings-create` | ❌ | -| 3 | 0.244223 | `foundry_resource_get` | ❌ | -| 4 | 0.243658 | `foundry_openai_create-completion` | ❌ | -| 5 | 0.242816 | `foundry_openai_chat-completions-create` | ❌ | +| 1 | 0.611992 | `speech_stt_recognize` | ✅ **EXPECTED** | +| 2 | 0.573185 | `speech_tts_synthesize` | ❌ | +| 3 | 0.309895 | `foundry_openai_embeddings-create` | ❌ | +| 4 | 0.244218 | `foundry_resource_get` | ❌ | +| 5 | 0.243626 | `foundry_openai_create-completion` | ❌ | --- @@ -1493,10 +1503,10 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.410533 | `speech_stt_recognize` | ✅ **EXPECTED** | -| 2 | 0.152414 | `foundry_openai_embeddings-create` | ❌ | -| 3 | 0.152137 | `foundry_models_deploy` | ❌ | -| 4 | 0.151799 | `deploy_pipeline_guidance_get` | ❌ | -| 5 | 0.140373 | `deploy_plan_get` | ❌ | +| 2 | 0.353783 | `speech_tts_synthesize` | ❌ | +| 3 | 0.152391 | `foundry_openai_embeddings-create` | ❌ | +| 4 | 0.152137 | `foundry_models_deploy` | ❌ | +| 5 | 0.151632 | `deploy_pipeline_guidance_get` | ❌ | --- @@ -1510,10 +1520,10 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.546259 | `speech_stt_recognize` | ✅ **EXPECTED** | -| 2 | 0.218092 | `foundry_resource_get` | ❌ | -| 3 | 0.202860 | `foundry_openai_embeddings-create` | ❌ | -| 4 | 0.183420 | `extension_azqr` | ❌ | -| 5 | 0.181020 | `search_index_get` | ❌ | +| 2 | 0.480203 | `speech_tts_synthesize` | ❌ | +| 3 | 0.218092 | `foundry_resource_get` | ❌ | +| 4 | 0.202935 | `foundry_openai_embeddings-create` | ❌ | +| 5 | 0.183420 | `extension_azqr` | ❌ | --- @@ -1527,10 +1537,10 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.539963 | `speech_stt_recognize` | ✅ **EXPECTED** | -| 2 | 0.228587 | `foundry_openai_create-completion` | ❌ | -| 3 | 0.203413 | `foundry_agents_connect` | ❌ | -| 4 | 0.199517 | `foundry_openai_embeddings-create` | ❌ | -| 5 | 0.197301 | `foundry_openai_chat-completions-create` | ❌ | +| 2 | 0.367401 | `speech_tts_synthesize` | ❌ | +| 3 | 0.228587 | `foundry_openai_create-completion` | ❌ | +| 4 | 0.203413 | `foundry_agents_connect` | ❌ | +| 5 | 0.199585 | `foundry_openai_embeddings-create` | ❌ | --- @@ -1544,9 +1554,9 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.549151 | `speech_stt_recognize` | ✅ **EXPECTED** | -| 2 | 0.393626 | `azureaibestpractices_get` | ❌ | +| 2 | 0.468161 | `speech_tts_synthesize` | ❌ | | 3 | 0.342537 | `extension_cli_generate` | ❌ | -| 4 | 0.337387 | `cloudarchitect_design` | ❌ | +| 4 | 0.338302 | `cloudarchitect_design` | ❌ | | 5 | 0.335741 | `foundry_openai_create-completion` | ❌ | --- @@ -1561,10 +1571,10 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.532536 | `speech_stt_recognize` | ✅ **EXPECTED** | -| 2 | 0.349892 | `foundry_openai_create-completion` | ❌ | -| 3 | 0.348381 | `azureaibestpractices_get` | ❌ | +| 2 | 0.515532 | `speech_tts_synthesize` | ❌ | +| 3 | 0.349892 | `foundry_openai_create-completion` | ❌ | | 4 | 0.340893 | `foundry_openai_chat-completions-create` | ❌ | -| 5 | 0.332862 | `foundry_openai_embeddings-create` | ❌ | +| 5 | 0.332669 | `foundry_openai_embeddings-create` | ❌ | --- @@ -1578,15 +1588,185 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.453396 | `speech_stt_recognize` | ✅ **EXPECTED** | -| 2 | 0.173280 | `deploy_pipeline_guidance_get` | ❌ | -| 3 | 0.164929 | `foundry_openai_embeddings-create` | ❌ | -| 4 | 0.160483 | `foundry_agents_connect` | ❌ | -| 5 | 0.160185 | `extension_azqr` | ❌ | +| 2 | 0.322710 | `speech_tts_synthesize` | ❌ | +| 3 | 0.173205 | `deploy_pipeline_guidance_get` | ❌ | +| 4 | 0.164990 | `foundry_openai_embeddings-create` | ❌ | +| 5 | 0.160483 | `foundry_agents_connect` | ❌ | --- ## Test 66 +**Expected Tool:** `speech_tts_synthesize` +**Prompt:** Convert text to speech and save to output.wav + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.521797 | `speech_tts_synthesize` | ✅ **EXPECTED** | +| 2 | 0.422457 | `speech_stt_recognize` | ❌ | +| 3 | 0.196049 | `foundry_openai_embeddings-create` | ❌ | +| 4 | 0.189438 | `foundry_openai_create-completion` | ❌ | +| 5 | 0.174955 | `foundry_openai_chat-completions-create` | ❌ | + +--- + +## Test 67 + +**Expected Tool:** `speech_tts_synthesize` +**Prompt:** Synthesize speech from "Hello, welcome to Azure" and save to welcome.wav + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.516973 | `speech_tts_synthesize` | ✅ **EXPECTED** | +| 2 | 0.486019 | `speech_stt_recognize` | ❌ | +| 3 | 0.329765 | `deploy_pipeline_guidance_get` | ❌ | +| 4 | 0.323728 | `extension_cli_generate` | ❌ | +| 5 | 0.317525 | `azureterraformbestpractices_get` | ❌ | + +--- + +## Test 68 + +**Expected Tool:** `speech_tts_synthesize` +**Prompt:** Generate speech audio from text "Hello world" using Azure Speech Services + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.592156 | `speech_tts_synthesize` | ✅ **EXPECTED** | +| 2 | 0.534002 | `speech_stt_recognize` | ❌ | +| 3 | 0.339580 | `foundry_openai_embeddings-create` | ❌ | +| 4 | 0.327397 | `foundry_openai_create-completion` | ❌ | +| 5 | 0.315764 | `foundry_openai_chat-completions-create` | ❌ | + +--- + +## Test 69 + +**Expected Tool:** `speech_tts_synthesize` +**Prompt:** Convert text to speech with Spanish language and save to spanish-audio.wav + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.501096 | `speech_tts_synthesize` | ✅ **EXPECTED** | +| 2 | 0.452648 | `speech_stt_recognize` | ❌ | +| 3 | 0.210841 | `foundry_openai_embeddings-create` | ❌ | +| 4 | 0.196766 | `foundry_models_deploy` | ❌ | +| 5 | 0.191812 | `foundry_openai_chat-completions-create` | ❌ | + +--- + +## Test 70 + +**Expected Tool:** `speech_tts_synthesize` +**Prompt:** Synthesize speech with voice en-US-JennyNeural from text "Azure AI Services" + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.604878 | `speech_tts_synthesize` | ✅ **EXPECTED** | +| 2 | 0.496715 | `speech_stt_recognize` | ❌ | +| 3 | 0.417045 | `foundry_openai_embeddings-create` | ❌ | +| 4 | 0.379840 | `foundry_openai_create-completion` | ❌ | +| 5 | 0.354130 | `foundry_openai_chat-completions-create` | ❌ | + +--- + +## Test 71 + +**Expected Tool:** `speech_tts_synthesize` +**Prompt:** Create MP3 audio file from text "Welcome to Azure" with high quality format + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.561285 | `speech_tts_synthesize` | ✅ **EXPECTED** | +| 2 | 0.510908 | `speech_stt_recognize` | ❌ | +| 3 | 0.348757 | `foundry_openai_embeddings-create` | ❌ | +| 4 | 0.347597 | `deploy_pipeline_guidance_get` | ❌ | +| 5 | 0.345073 | `deploy_iac_rules_get` | ❌ | + +--- + +## Test 72 + +**Expected Tool:** `speech_tts_synthesize` +**Prompt:** Generate speech with custom voice model using endpoint ID + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.527400 | `speech_tts_synthesize` | ✅ **EXPECTED** | +| 2 | 0.455811 | `speech_stt_recognize` | ❌ | +| 3 | 0.353132 | `foundry_resource_get` | ❌ | +| 4 | 0.343330 | `foundry_models_deploy` | ❌ | +| 5 | 0.337912 | `foundry_openai_embeddings-create` | ❌ | + +--- + +## Test 73 + +**Expected Tool:** `speech_tts_synthesize` +**Prompt:** Convert text to OGG/Opus format audio file + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.432836 | `speech_tts_synthesize` | ✅ **EXPECTED** | +| 2 | 0.410086 | `speech_stt_recognize` | ❌ | +| 3 | 0.234237 | `foundry_openai_embeddings-create` | ❌ | +| 4 | 0.196153 | `extension_cli_generate` | ❌ | +| 5 | 0.175963 | `foundry_openai_create-completion` | ❌ | + +--- + +## Test 74 + +**Expected Tool:** `speech_tts_synthesize` +**Prompt:** Synthesize long text content to audio file with streaming + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.428079 | `speech_tts_synthesize` | ✅ **EXPECTED** | +| 2 | 0.369045 | `speech_stt_recognize` | ❌ | +| 3 | 0.230725 | `foundry_openai_embeddings-create` | ❌ | +| 4 | 0.220793 | `foundry_openai_create-completion` | ❌ | +| 5 | 0.216475 | `foundry_openai_chat-completions-create` | ❌ | + +--- + +## Test 75 + +**Expected Tool:** `speech_tts_synthesize` +**Prompt:** Create audio file from text in French language with appropriate voice + +### Results + +| Rank | Score | Tool | Status | +|------|-------|------|--------| +| 1 | 0.444444 | `speech_tts_synthesize` | ✅ **EXPECTED** | +| 2 | 0.385267 | `speech_stt_recognize` | ❌ | +| 3 | 0.229890 | `foundry_openai_create-completion` | ❌ | +| 4 | 0.228317 | `foundry_openai_embeddings-create` | ❌ | +| 5 | 0.213222 | `foundry_openai_chat-completions-create` | ❌ | + +--- + +## Test 76 + **Expected Tool:** `appconfig_account_list` **Prompt:** List all App Configuration stores in my subscription @@ -1594,15 +1774,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.786298 | `appconfig_account_list` | ✅ **EXPECTED** | +| 1 | 0.786360 | `appconfig_account_list` | ✅ **EXPECTED** | | 2 | 0.530613 | `appconfig_kv_get` | ❌ | | 3 | 0.491380 | `postgres_server_list` | ❌ | | 4 | 0.481223 | `kusto_cluster_list` | ❌ | -| 5 | 0.479997 | `subscription_list` | ❌ | +| 5 | 0.479988 | `subscription_list` | ❌ | --- -## Test 67 +## Test 77 **Expected Tool:** `appconfig_account_list` **Prompt:** Show me the App Configuration stores in my subscription @@ -1611,15 +1791,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.635056 | `appconfig_account_list` | ✅ **EXPECTED** | -| 2 | 0.464826 | `appconfig_kv_get` | ❌ | -| 3 | 0.398562 | `subscription_list` | ❌ | -| 4 | 0.391398 | `redis_list` | ❌ | -| 5 | 0.372579 | `postgres_server_list` | ❌ | +| 1 | 0.634978 | `appconfig_account_list` | ✅ **EXPECTED** | +| 2 | 0.464865 | `appconfig_kv_get` | ❌ | +| 3 | 0.398495 | `subscription_list` | ❌ | +| 4 | 0.391291 | `redis_list` | ❌ | +| 5 | 0.372456 | `postgres_server_list` | ❌ | --- -## Test 68 +## Test 78 **Expected Tool:** `appconfig_account_list` **Prompt:** Show me my App Configuration stores @@ -1628,7 +1808,7 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.565365 | `appconfig_account_list` | ✅ **EXPECTED** | +| 1 | 0.565435 | `appconfig_account_list` | ✅ **EXPECTED** | | 2 | 0.465344 | `appconfig_kv_get` | ❌ | | 3 | 0.355916 | `postgres_server_config_get` | ❌ | | 4 | 0.348661 | `appconfig_kv_delete` | ❌ | @@ -1636,7 +1816,7 @@ --- -## Test 69 +## Test 79 **Expected Tool:** `appconfig_kv_delete` **Prompt:** Delete the key in App Configuration store @@ -1645,15 +1825,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.618276 | `appconfig_kv_delete` | ✅ **EXPECTED** | +| 1 | 0.618277 | `appconfig_kv_delete` | ✅ **EXPECTED** | | 2 | 0.464358 | `appconfig_kv_get` | ❌ | | 3 | 0.424344 | `appconfig_kv_set` | ❌ | | 4 | 0.422700 | `appconfig_kv_lock_set` | ❌ | -| 5 | 0.392260 | `appconfig_account_list` | ❌ | +| 5 | 0.392016 | `appconfig_account_list` | ❌ | --- -## Test 70 +## Test 80 **Expected Tool:** `appconfig_kv_get` **Prompt:** List all key-value settings in App Configuration store @@ -1662,15 +1842,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.632652 | `appconfig_kv_get` | ✅ **EXPECTED** | -| 2 | 0.558116 | `appconfig_account_list` | ❌ | -| 3 | 0.531033 | `appconfig_kv_set` | ❌ | -| 4 | 0.464568 | `appconfig_kv_delete` | ❌ | -| 5 | 0.438999 | `appconfig_kv_lock_set` | ❌ | +| 1 | 0.632687 | `appconfig_kv_get` | ✅ **EXPECTED** | +| 2 | 0.557810 | `appconfig_account_list` | ❌ | +| 3 | 0.530884 | `appconfig_kv_set` | ❌ | +| 4 | 0.464635 | `appconfig_kv_delete` | ❌ | +| 5 | 0.439089 | `appconfig_kv_lock_set` | ❌ | --- -## Test 71 +## Test 81 **Expected Tool:** `appconfig_kv_get` **Prompt:** Show me the key-value settings in App Configuration store @@ -1680,14 +1860,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.612555 | `appconfig_kv_get` | ✅ **EXPECTED** | -| 2 | 0.522671 | `appconfig_account_list` | ❌ | +| 2 | 0.522426 | `appconfig_account_list` | ❌ | | 3 | 0.512945 | `appconfig_kv_set` | ❌ | | 4 | 0.468503 | `appconfig_kv_delete` | ❌ | | 5 | 0.457866 | `appconfig_kv_lock_set` | ❌ | --- -## Test 72 +## Test 82 **Expected Tool:** `appconfig_kv_get` **Prompt:** List all key-value settings with key name starting with 'prod-' in App Configuration store @@ -1696,15 +1876,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.512883 | `appconfig_kv_get` | ✅ **EXPECTED** | -| 2 | 0.450109 | `appconfig_account_list` | ❌ | -| 3 | 0.398684 | `appconfig_kv_set` | ❌ | -| 4 | 0.380614 | `appconfig_kv_delete` | ❌ | -| 5 | 0.346166 | `appconfig_kv_lock_set` | ❌ | +| 1 | 0.513021 | `appconfig_kv_get` | ✅ **EXPECTED** | +| 2 | 0.450004 | `appconfig_account_list` | ❌ | +| 3 | 0.398800 | `appconfig_kv_set` | ❌ | +| 4 | 0.380746 | `appconfig_kv_delete` | ❌ | +| 5 | 0.346260 | `appconfig_kv_lock_set` | ❌ | --- -## Test 73 +## Test 83 **Expected Tool:** `appconfig_kv_get` **Prompt:** Show the content for the key in App Configuration store @@ -1716,12 +1896,12 @@ | 1 | 0.552300 | `appconfig_kv_get` | ✅ **EXPECTED** | | 2 | 0.448912 | `appconfig_kv_set` | ❌ | | 3 | 0.441713 | `appconfig_kv_delete` | ❌ | -| 4 | 0.437745 | `appconfig_account_list` | ❌ | +| 4 | 0.437432 | `appconfig_account_list` | ❌ | | 5 | 0.416264 | `appconfig_kv_lock_set` | ❌ | --- -## Test 74 +## Test 84 **Expected Tool:** `appconfig_kv_lock_set` **Prompt:** Lock the key in App Configuration store @@ -1730,15 +1910,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.591253 | `appconfig_kv_lock_set` | ✅ **EXPECTED** | -| 2 | 0.487221 | `appconfig_kv_get` | ❌ | -| 3 | 0.445541 | `appconfig_kv_set` | ❌ | -| 4 | 0.431462 | `appconfig_kv_delete` | ❌ | -| 5 | 0.373617 | `appconfig_account_list` | ❌ | +| 1 | 0.591237 | `appconfig_kv_lock_set` | ✅ **EXPECTED** | +| 2 | 0.487174 | `appconfig_kv_get` | ❌ | +| 3 | 0.445551 | `appconfig_kv_set` | ❌ | +| 4 | 0.431516 | `appconfig_kv_delete` | ❌ | +| 5 | 0.373656 | `appconfig_account_list` | ❌ | --- -## Test 75 +## Test 85 **Expected Tool:** `appconfig_kv_lock_set` **Prompt:** Unlock the key in App Configuration store @@ -1749,13 +1929,13 @@ |------|-------|------|--------| | 1 | 0.555699 | `appconfig_kv_lock_set` | ✅ **EXPECTED** | | 2 | 0.505681 | `appconfig_kv_get` | ❌ | -| 3 | 0.476497 | `appconfig_kv_delete` | ❌ | +| 3 | 0.476496 | `appconfig_kv_delete` | ❌ | | 4 | 0.425488 | `appconfig_kv_set` | ❌ | -| 5 | 0.409649 | `appconfig_account_list` | ❌ | +| 5 | 0.409406 | `appconfig_account_list` | ❌ | --- -## Test 76 +## Test 86 **Expected Tool:** `appconfig_kv_set` **Prompt:** Set the key in App Configuration store to @@ -1768,11 +1948,11 @@ | 2 | 0.536497 | `appconfig_kv_lock_set` | ❌ | | 3 | 0.512707 | `appconfig_kv_get` | ❌ | | 4 | 0.505571 | `appconfig_kv_delete` | ❌ | -| 5 | 0.378223 | `appconfig_account_list` | ❌ | +| 5 | 0.377919 | `appconfig_account_list` | ❌ | --- -## Test 77 +## Test 87 **Expected Tool:** `applens_resource_diagnose` **Prompt:** Please help me diagnose issues with my app using app lens @@ -1781,15 +1961,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.595632 | `applens_resource_diagnose` | ✅ **EXPECTED** | -| 2 | 0.335768 | `deploy_app_logs_get` | ❌ | -| 3 | 0.300786 | `deploy_architecture_diagram_generate` | ❌ | -| 4 | 0.273083 | `cloudarchitect_design` | ❌ | -| 5 | 0.254473 | `monitor_resource_log_query` | ❌ | +| 1 | 0.595737 | `applens_resource_diagnose` | ✅ **EXPECTED** | +| 2 | 0.336142 | `deploy_app_logs_get` | ❌ | +| 3 | 0.300835 | `deploy_architecture_diagram_generate` | ❌ | +| 4 | 0.272689 | `cloudarchitect_design` | ❌ | +| 5 | 0.254527 | `monitor_resource_log_query` | ❌ | --- -## Test 78 +## Test 88 **Expected Tool:** `applens_resource_diagnose` **Prompt:** Use app lens to check why my app is slow? @@ -1799,14 +1979,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.502361 | `applens_resource_diagnose` | ✅ **EXPECTED** | -| 2 | 0.316002 | `deploy_app_logs_get` | ❌ | +| 2 | 0.316297 | `deploy_app_logs_get` | ❌ | | 3 | 0.255570 | `deploy_architecture_diagram_generate` | ❌ | | 4 | 0.249583 | `monitor_resource_log_query` | ❌ | -| 5 | 0.226030 | `quota_usage_check` | ❌ | +| 5 | 0.225972 | `quota_usage_check` | ❌ | --- -## Test 79 +## Test 89 **Expected Tool:** `applens_resource_diagnose` **Prompt:** What does app lens say is wrong with my service? @@ -1817,13 +1997,13 @@ |------|-------|------|--------| | 1 | 0.492820 | `applens_resource_diagnose` | ✅ **EXPECTED** | | 2 | 0.256325 | `deploy_architecture_diagram_generate` | ❌ | -| 3 | 0.242574 | `cloudarchitect_design` | ❌ | +| 3 | 0.242800 | `cloudarchitect_design` | ❌ | | 4 | 0.225608 | `resourcehealth_health-events_list` | ❌ | -| 5 | 0.211260 | `deploy_app_logs_get` | ❌ | +| 5 | 0.211564 | `deploy_app_logs_get` | ❌ | --- -## Test 80 +## Test 90 **Expected Tool:** `appservice_database_add` **Prompt:** Add database connection to my app service for database in resource group @@ -1832,15 +2012,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.717878 | `appservice_database_add` | ✅ **EXPECTED** | -| 2 | 0.401376 | `sql_db_rename` | ❌ | -| 3 | 0.399941 | `sql_db_create` | ❌ | -| 4 | 0.362997 | `sql_db_show` | ❌ | -| 5 | 0.357919 | `sql_db_list` | ❌ | +| 1 | 0.718027 | `appservice_database_add` | ✅ **EXPECTED** | +| 2 | 0.402135 | `sql_db_rename` | ❌ | +| 3 | 0.400714 | `sql_db_create` | ❌ | +| 4 | 0.363505 | `sql_db_show` | ❌ | +| 5 | 0.358956 | `sql_db_list` | ❌ | --- -## Test 81 +## Test 91 **Expected Tool:** `appservice_database_add` **Prompt:** Configure SQL Server database for app service with connection string in resource group @@ -1849,15 +2029,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.688410 | `appservice_database_add` | ✅ **EXPECTED** | -| 2 | 0.498122 | `sql_db_rename` | ❌ | -| 3 | 0.497502 | `sql_db_create` | ❌ | -| 4 | 0.469326 | `sql_db_show` | ❌ | -| 5 | 0.452937 | `sql_db_list` | ❌ | +| 1 | 0.688364 | `appservice_database_add` | ✅ **EXPECTED** | +| 2 | 0.498175 | `sql_db_rename` | ❌ | +| 3 | 0.497522 | `sql_db_create` | ❌ | +| 4 | 0.469526 | `sql_db_show` | ❌ | +| 5 | 0.453088 | `sql_db_list` | ❌ | --- -## Test 82 +## Test 92 **Expected Tool:** `appservice_database_add` **Prompt:** Add MySQL database to app service using connection in resource group @@ -1866,15 +2046,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.675970 | `appservice_database_add` | ✅ **EXPECTED** | -| 2 | 0.464756 | `sql_db_create` | ❌ | -| 3 | 0.452407 | `sql_db_rename` | ❌ | -| 4 | 0.432948 | `mysql_server_list` | ❌ | -| 5 | 0.410292 | `sql_db_show` | ❌ | +| 1 | 0.675443 | `appservice_database_add` | ✅ **EXPECTED** | +| 2 | 0.465187 | `sql_db_create` | ❌ | +| 3 | 0.452537 | `sql_db_rename` | ❌ | +| 4 | 0.432946 | `mysql_server_list` | ❌ | +| 5 | 0.410266 | `sql_db_show` | ❌ | --- -## Test 83 +## Test 93 **Expected Tool:** `appservice_database_add` **Prompt:** Add PostgreSQL database to app service using connection in resource group @@ -1883,15 +2063,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.628119 | `appservice_database_add` | ✅ **EXPECTED** | -| 2 | 0.444212 | `sql_db_create` | ❌ | -| 3 | 0.405314 | `postgres_database_query` | ❌ | -| 4 | 0.401117 | `postgres_database_list` | ❌ | -| 5 | 0.400767 | `sql_db_rename` | ❌ | +| 1 | 0.627784 | `appservice_database_add` | ✅ **EXPECTED** | +| 2 | 0.444152 | `sql_db_create` | ❌ | +| 3 | 0.404874 | `postgres_database_query` | ❌ | +| 4 | 0.401137 | `postgres_database_list` | ❌ | +| 5 | 0.400754 | `sql_db_rename` | ❌ | --- -## Test 84 +## Test 94 **Expected Tool:** `appservice_database_add` **Prompt:** Connect CosmosDB database using connection string to app service in resource group @@ -1900,15 +2080,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.663086 | `appservice_database_add` | ✅ **EXPECTED** | -| 2 | 0.446465 | `cosmos_database_list` | ❌ | -| 3 | 0.441966 | `cosmos_database_container_item_query` | ❌ | -| 4 | 0.427284 | `cosmos_database_container_list` | ❌ | -| 5 | 0.420488 | `sql_db_rename` | ❌ | +| 1 | 0.663057 | `appservice_database_add` | ✅ **EXPECTED** | +| 2 | 0.446328 | `cosmos_database_list` | ❌ | +| 3 | 0.441849 | `cosmos_database_container_item_query` | ❌ | +| 4 | 0.427159 | `cosmos_database_container_list` | ❌ | +| 5 | 0.420379 | `sql_db_rename` | ❌ | --- -## Test 85 +## Test 95 **Expected Tool:** `appservice_database_add` **Prompt:** Add database connection for database on server to app service in resource group @@ -1917,15 +2097,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.733852 | `appservice_database_add` | ✅ **EXPECTED** | +| 1 | 0.733775 | `appservice_database_add` | ✅ **EXPECTED** | | 2 | 0.454554 | `sql_db_create` | ❌ | -| 3 | 0.415271 | `sql_db_rename` | ❌ | -| 4 | 0.414045 | `sql_server_create` | ❌ | +| 3 | 0.415274 | `sql_db_rename` | ❌ | +| 4 | 0.414101 | `sql_server_create` | ❌ | | 5 | 0.410260 | `sql_db_list` | ❌ | --- -## Test 86 +## Test 96 **Expected Tool:** `appservice_database_add` **Prompt:** Add database connection string for to app service using connection string in resource group @@ -1934,15 +2114,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.746766 | `appservice_database_add` | ✅ **EXPECTED** | -| 2 | 0.441682 | `sql_db_rename` | ❌ | -| 3 | 0.434020 | `sql_db_create` | ❌ | -| 4 | 0.391311 | `sql_db_list` | ❌ | -| 5 | 0.390014 | `sql_db_show` | ❌ | +| 1 | 0.746518 | `appservice_database_add` | ✅ **EXPECTED** | +| 2 | 0.441688 | `sql_db_rename` | ❌ | +| 3 | 0.433979 | `sql_db_create` | ❌ | +| 4 | 0.391370 | `sql_db_list` | ❌ | +| 5 | 0.390219 | `sql_db_show` | ❌ | --- -## Test 87 +## Test 97 **Expected Tool:** `appservice_database_add` **Prompt:** Connect database to my app service using connection string in resource group @@ -1951,15 +2131,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.680503 | `appservice_database_add` | ✅ **EXPECTED** | -| 2 | 0.429273 | `sql_db_rename` | ❌ | -| 3 | 0.406267 | `sql_db_create` | ❌ | -| 4 | 0.396537 | `sql_db_show` | ❌ | -| 5 | 0.391409 | `sql_db_list` | ❌ | +| 1 | 0.680411 | `appservice_database_add` | ✅ **EXPECTED** | +| 2 | 0.429294 | `sql_db_rename` | ❌ | +| 3 | 0.406245 | `sql_db_create` | ❌ | +| 4 | 0.396491 | `sql_db_show` | ❌ | +| 5 | 0.391479 | `sql_db_list` | ❌ | --- -## Test 88 +## Test 98 **Expected Tool:** `appservice_database_add` **Prompt:** Set up database for app service with connection string under resource group @@ -1968,15 +2148,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.640738 | `appservice_database_add` | ✅ **EXPECTED** | -| 2 | 0.456785 | `sql_db_create` | ❌ | -| 3 | 0.402668 | `sql_db_rename` | ❌ | -| 4 | 0.401985 | `sql_db_show` | ❌ | -| 5 | 0.394072 | `sql_db_list` | ❌ | +| 1 | 0.640585 | `appservice_database_add` | ✅ **EXPECTED** | +| 2 | 0.456892 | `sql_db_create` | ❌ | +| 3 | 0.402757 | `sql_db_rename` | ❌ | +| 4 | 0.402144 | `sql_db_show` | ❌ | +| 5 | 0.394206 | `sql_db_list` | ❌ | --- -## Test 89 +## Test 99 **Expected Tool:** `appservice_database_add` **Prompt:** Configure database for app service with the connection string in resource group @@ -1985,15 +2165,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.688527 | `appservice_database_add` | ✅ **EXPECTED** | -| 2 | 0.449176 | `sql_db_rename` | ❌ | -| 3 | 0.448382 | `sql_db_create` | ❌ | -| 4 | 0.414329 | `sql_db_show` | ❌ | -| 5 | 0.411782 | `sql_db_list` | ❌ | +| 1 | 0.688394 | `appservice_database_add` | ✅ **EXPECTED** | +| 2 | 0.449133 | `sql_db_rename` | ❌ | +| 3 | 0.448418 | `sql_db_create` | ❌ | +| 4 | 0.414400 | `sql_db_show` | ❌ | +| 5 | 0.411818 | `sql_db_list` | ❌ | --- -## Test 90 +## Test 100 **Expected Tool:** `applicationinsights_recommendation_list` **Prompt:** List code optimization recommendations across my Application Insights components @@ -2003,14 +2183,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.572473 | `applicationinsights_recommendation_list` | ✅ **EXPECTED** | -| 2 | 0.454559 | `azureaibestpractices_get` | ❌ | -| 3 | 0.445157 | `get_bestpractices_get` | ❌ | -| 4 | 0.390478 | `azureterraformbestpractices_get` | ❌ | -| 5 | 0.383948 | `applens_resource_diagnose` | ❌ | +| 2 | 0.445157 | `get_bestpractices_get` | ❌ | +| 3 | 0.390478 | `azureterraformbestpractices_get` | ❌ | +| 4 | 0.383948 | `applens_resource_diagnose` | ❌ | +| 5 | 0.375286 | `deploy_iac_rules_get` | ❌ | --- -## Test 91 +## Test 101 **Expected Tool:** `applicationinsights_recommendation_list` **Prompt:** Show me code optimization recommendations for all Application Insights resources in my subscription @@ -2020,14 +2200,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.696531 | `applicationinsights_recommendation_list` | ✅ **EXPECTED** | -| 2 | 0.506351 | `azureaibestpractices_get` | ❌ | -| 3 | 0.468384 | `get_bestpractices_get` | ❌ | -| 4 | 0.452231 | `applens_resource_diagnose` | ❌ | -| 5 | 0.435241 | `azureterraformbestpractices_get` | ❌ | +| 2 | 0.468384 | `get_bestpractices_get` | ❌ | +| 3 | 0.452231 | `applens_resource_diagnose` | ❌ | +| 4 | 0.435241 | `azureterraformbestpractices_get` | ❌ | +| 5 | 0.424622 | `search_service_list` | ❌ | --- -## Test 92 +## Test 102 **Expected Tool:** `applicationinsights_recommendation_list` **Prompt:** List profiler recommendations for Application Insights in resource group @@ -2038,13 +2218,13 @@ |------|-------|------|--------| | 1 | 0.626722 | `applicationinsights_recommendation_list` | ✅ **EXPECTED** | | 2 | 0.488002 | `loadtesting_testresource_list` | ❌ | -| 3 | 0.479392 | `mysql_server_list` | ❌ | +| 3 | 0.479425 | `mysql_server_list` | ❌ | | 4 | 0.477396 | `applens_resource_diagnose` | ❌ | | 5 | 0.468847 | `resourcehealth_availability-status_list` | ❌ | --- -## Test 93 +## Test 103 **Expected Tool:** `applicationinsights_recommendation_list` **Prompt:** Show me performance improvement recommendations from Application Insights @@ -2053,15 +2233,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.509615 | `applicationinsights_recommendation_list` | ✅ **EXPECTED** | -| 2 | 0.433835 | `azureaibestpractices_get` | ❌ | -| 3 | 0.419699 | `applens_resource_diagnose` | ❌ | -| 4 | 0.383861 | `get_bestpractices_get` | ❌ | -| 5 | 0.367317 | `deploy_architecture_diagram_generate` | ❌ | +| 1 | 0.509502 | `applicationinsights_recommendation_list` | ✅ **EXPECTED** | +| 2 | 0.419670 | `applens_resource_diagnose` | ❌ | +| 3 | 0.383767 | `get_bestpractices_get` | ❌ | +| 4 | 0.367278 | `deploy_architecture_diagram_generate` | ❌ | +| 5 | 0.343879 | `cloudarchitect_design` | ❌ | --- -## Test 94 +## Test 104 **Expected Tool:** `extension_cli_generate` **Prompt:** Create a Storage account with name using Azure CLI @@ -2070,15 +2250,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.593241 | `storage_account_create` | ❌ | +| 1 | 0.593242 | `storage_account_create` | ❌ | | 2 | 0.564940 | `storage_blob_container_create` | ❌ | | 3 | 0.493684 | `storage_account_get` | ❌ | -| 4 | 0.473547 | `storage_blob_container_get` | ❌ | -| 5 | 0.456428 | `managedlustre_fs_create` | ❌ | +| 4 | 0.474399 | `storage_blob_container_get` | ❌ | +| 5 | 0.454194 | `managedlustre_fs_create` | ❌ | --- -## Test 95 +## Test 105 **Expected Tool:** `extension_cli_generate` **Prompt:** List all virtual machines in my subscription using Azure CLI @@ -2087,15 +2267,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.592102 | `search_service_list` | ❌ | +| 1 | 0.593467 | `search_service_list` | ❌ | | 2 | 0.575274 | `kusto_cluster_list` | ❌ | -| 3 | 0.549918 | `virtualdesktop_hostpool_list` | ❌ | -| 4 | 0.544688 | `monitor_workspace_list` | ❌ | -| 5 | 0.536238 | `subscription_list` | ❌ | +| 3 | 0.549966 | `virtualdesktop_hostpool_list` | ❌ | +| 4 | 0.544412 | `monitor_workspace_list` | ❌ | +| 5 | 0.536252 | `subscription_list` | ❌ | --- -## Test 96 +## Test 106 **Expected Tool:** `extension_cli_generate` **Prompt:** Show me the details of the storage account with Azure CLI commands @@ -2105,14 +2285,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.710307 | `storage_account_get` | ❌ | -| 2 | 0.601571 | `storage_blob_container_get` | ❌ | +| 2 | 0.602173 | `storage_blob_container_get` | ❌ | | 3 | 0.543268 | `storage_blob_get` | ❌ | | 4 | 0.519788 | `storage_account_create` | ❌ | | 5 | 0.493145 | `cosmos_account_list` | ❌ | --- -## Test 97 +## Test 107 **Expected Tool:** `extension_cli_install` **Prompt:** @@ -2121,15 +2301,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.479652 | `extension_cli_install` | ✅ **EXPECTED** | -| 2 | 0.473369 | `extension_cli_generate` | ❌ | -| 3 | 0.389405 | `azureterraformbestpractices_get` | ❌ | -| 4 | 0.382473 | `deploy_plan_get` | ❌ | -| 5 | 0.366067 | `get_bestpractices_get` | ❌ | +| 1 | 0.479590 | `extension_cli_install` | ✅ **EXPECTED** | +| 2 | 0.473250 | `extension_cli_generate` | ❌ | +| 3 | 0.389354 | `azureterraformbestpractices_get` | ❌ | +| 4 | 0.382389 | `deploy_plan_get` | ❌ | +| 5 | 0.366012 | `get_bestpractices_get` | ❌ | --- -## Test 98 +## Test 108 **Expected Tool:** `extension_cli_install` **Prompt:** How to install azd @@ -2139,14 +2319,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.460416 | `extension_cli_install` | ✅ **EXPECTED** | -| 2 | 0.429269 | `deploy_app_logs_get` | ❌ | +| 2 | 0.429599 | `deploy_app_logs_get` | ❌ | | 3 | 0.365212 | `deploy_iac_rules_get` | ❌ | | 4 | 0.335279 | `deploy_plan_get` | ❌ | -| 5 | 0.326165 | `deploy_pipeline_guidance_get` | ❌ | +| 5 | 0.326135 | `deploy_pipeline_guidance_get` | ❌ | --- -## Test 99 +## Test 109 **Expected Tool:** `extension_cli_install` **Prompt:** What is Azure Functions Core tools and how to install it @@ -2155,15 +2335,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.622670 | `extension_cli_install` | ✅ **EXPECTED** | -| 2 | 0.439414 | `get_bestpractices_get` | ❌ | -| 3 | 0.432859 | `deploy_pipeline_guidance_get` | ❌ | -| 4 | 0.430682 | `extension_cli_generate` | ❌ | -| 5 | 0.418085 | `deploy_plan_get` | ❌ | +| 1 | 0.622705 | `extension_cli_install` | ✅ **EXPECTED** | +| 2 | 0.439474 | `get_bestpractices_get` | ❌ | +| 3 | 0.432913 | `deploy_pipeline_guidance_get` | ❌ | +| 4 | 0.430723 | `extension_cli_generate` | ❌ | +| 5 | 0.418161 | `deploy_plan_get` | ❌ | --- -## Test 100 +## Test 110 **Expected Tool:** `acr_registry_list` **Prompt:** List all Azure Container Registries in my subscription @@ -2175,12 +2355,12 @@ | 1 | 0.743568 | `acr_registry_list` | ✅ **EXPECTED** | | 2 | 0.711580 | `acr_registry_repository_list` | ❌ | | 3 | 0.585675 | `kusto_cluster_list` | ❌ | -| 4 | 0.540241 | `search_service_list` | ❌ | +| 4 | 0.541506 | `search_service_list` | ❌ | | 5 | 0.514293 | `cosmos_account_list` | ❌ | --- -## Test 101 +## Test 111 **Expected Tool:** `acr_registry_list` **Prompt:** Show me my Azure Container Registries @@ -2191,13 +2371,13 @@ |------|-------|------|--------| | 1 | 0.586014 | `acr_registry_list` | ✅ **EXPECTED** | | 2 | 0.563636 | `acr_registry_repository_list` | ❌ | -| 3 | 0.460834 | `storage_blob_container_get` | ❌ | +| 3 | 0.460544 | `storage_blob_container_get` | ❌ | | 4 | 0.415552 | `cosmos_database_container_list` | ❌ | | 5 | 0.402247 | `redis_list` | ❌ | --- -## Test 102 +## Test 112 **Expected Tool:** `acr_registry_list` **Prompt:** Show me the container registries in my subscription @@ -2209,12 +2389,12 @@ | 1 | 0.637130 | `acr_registry_list` | ✅ **EXPECTED** | | 2 | 0.563476 | `acr_registry_repository_list` | ❌ | | 3 | 0.516769 | `kusto_cluster_list` | ❌ | -| 4 | 0.515365 | `storage_blob_container_get` | ❌ | +| 4 | 0.515378 | `storage_blob_container_get` | ❌ | | 5 | 0.480352 | `redis_list` | ❌ | --- -## Test 103 +## Test 113 **Expected Tool:** `acr_registry_list` **Prompt:** List container registries in resource group @@ -2225,13 +2405,13 @@ |------|-------|------|--------| | 1 | 0.654318 | `acr_registry_repository_list` | ❌ | | 2 | 0.633938 | `acr_registry_list` | ✅ **EXPECTED** | -| 3 | 0.476015 | `mysql_server_list` | ❌ | -| 4 | 0.454929 | `group_list` | ❌ | +| 3 | 0.476294 | `mysql_server_list` | ❌ | +| 4 | 0.454887 | `group_list` | ❌ | | 5 | 0.454003 | `datadog_monitoredresources_list` | ❌ | --- -## Test 104 +## Test 114 **Expected Tool:** `acr_registry_list` **Prompt:** Show me the container registries in resource group @@ -2242,13 +2422,13 @@ |------|-------|------|--------| | 1 | 0.639391 | `acr_registry_list` | ✅ **EXPECTED** | | 2 | 0.637972 | `acr_registry_repository_list` | ❌ | -| 3 | 0.468028 | `mysql_server_list` | ❌ | +| 3 | 0.468371 | `mysql_server_list` | ❌ | | 4 | 0.449649 | `datadog_monitoredresources_list` | ❌ | -| 5 | 0.445741 | `group_list` | ❌ | +| 5 | 0.445729 | `group_list` | ❌ | --- -## Test 105 +## Test 115 **Expected Tool:** `acr_registry_repository_list` **Prompt:** List all container registry repositories in my subscription @@ -2260,12 +2440,12 @@ | 1 | 0.626482 | `acr_registry_repository_list` | ✅ **EXPECTED** | | 2 | 0.617504 | `acr_registry_list` | ❌ | | 3 | 0.544172 | `kusto_cluster_list` | ❌ | -| 4 | 0.508863 | `storage_blob_container_get` | ❌ | +| 4 | 0.508483 | `storage_blob_container_get` | ❌ | | 5 | 0.495567 | `postgres_server_list` | ❌ | --- -## Test 106 +## Test 116 **Expected Tool:** `acr_registry_repository_list` **Prompt:** Show me my container registry repositories @@ -2274,15 +2454,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.546334 | `acr_registry_repository_list` | ✅ **EXPECTED** | +| 1 | 0.546333 | `acr_registry_repository_list` | ✅ **EXPECTED** | | 2 | 0.469295 | `acr_registry_list` | ❌ | -| 3 | 0.451973 | `storage_blob_container_get` | ❌ | +| 3 | 0.451083 | `storage_blob_container_get` | ❌ | | 4 | 0.407973 | `cosmos_database_container_list` | ❌ | | 5 | 0.373464 | `storage_blob_get` | ❌ | --- -## Test 107 +## Test 117 **Expected Tool:** `acr_registry_repository_list` **Prompt:** List repositories in the container registry @@ -2293,13 +2473,13 @@ |------|-------|------|--------| | 1 | 0.674296 | `acr_registry_repository_list` | ✅ **EXPECTED** | | 2 | 0.541779 | `acr_registry_list` | ❌ | -| 3 | 0.437756 | `storage_blob_container_get` | ❌ | +| 3 | 0.437509 | `storage_blob_container_get` | ❌ | | 4 | 0.433927 | `cosmos_database_container_list` | ❌ | -| 5 | 0.383001 | `kusto_database_list` | ❌ | +| 5 | 0.383387 | `kusto_database_list` | ❌ | --- -## Test 108 +## Test 118 **Expected Tool:** `acr_registry_repository_list` **Prompt:** Show me the repositories in the container registry @@ -2310,13 +2490,13 @@ |------|-------|------|--------| | 1 | 0.600780 | `acr_registry_repository_list` | ✅ **EXPECTED** | | 2 | 0.501842 | `acr_registry_list` | ❌ | -| 3 | 0.431148 | `storage_blob_container_get` | ❌ | +| 3 | 0.430880 | `storage_blob_container_get` | ❌ | | 4 | 0.418623 | `cosmos_database_container_list` | ❌ | | 5 | 0.378151 | `redis_list` | ❌ | --- -## Test 109 +## Test 119 **Expected Tool:** `communication_email_send` **Prompt:** Send an email to with subject @@ -2325,15 +2505,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.498396 | `communication_email_send` | ✅ **EXPECTED** | -| 2 | 0.229071 | `communication_sms_send` | ❌ | +| 1 | 0.498292 | `communication_email_send` | ✅ **EXPECTED** | +| 2 | 0.229081 | `communication_sms_send` | ❌ | | 3 | 0.188975 | `eventgrid_events_publish` | ❌ | -| 4 | 0.161257 | `foundry_agents_create` | ❌ | -| 5 | 0.146045 | `servicebus_topic_details` | ❌ | +| 4 | 0.161150 | `foundry_agents_create` | ❌ | +| 5 | 0.145951 | `servicebus_topic_details` | ❌ | --- -## Test 110 +## Test 120 **Expected Tool:** `communication_email_send` **Prompt:** Send an email from my communication service to @@ -2342,15 +2522,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.498459 | `communication_email_send` | ✅ **EXPECTED** | -| 2 | 0.314408 | `communication_sms_send` | ❌ | -| 3 | 0.235110 | `foundry_openai_chat-completions-create` | ❌ | -| 4 | 0.211067 | `search_knowledge_base_retrieve` | ❌ | -| 5 | 0.210014 | `foundry_agents_create` | ❌ | +| 1 | 0.498406 | `communication_email_send` | ✅ **EXPECTED** | +| 2 | 0.314462 | `communication_sms_send` | ❌ | +| 3 | 0.235127 | `foundry_openai_chat-completions-create` | ❌ | +| 4 | 0.215392 | `speech_tts_synthesize` | ❌ | +| 5 | 0.211154 | `search_knowledge_base_retrieve` | ❌ | --- -## Test 111 +## Test 121 **Expected Tool:** `communication_email_send` **Prompt:** Send HTML-formatted email to with subject @@ -2359,15 +2539,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.521087 | `communication_email_send` | ✅ **EXPECTED** | -| 2 | 0.207644 | `communication_sms_send` | ❌ | +| 1 | 0.520967 | `communication_email_send` | ✅ **EXPECTED** | +| 2 | 0.207658 | `communication_sms_send` | ❌ | | 3 | 0.152418 | `eventgrid_events_publish` | ❌ | -| 4 | 0.152056 | `servicebus_topic_details` | ❌ | +| 4 | 0.152013 | `servicebus_topic_details` | ❌ | | 5 | 0.143660 | `foundry_agents_evaluate` | ❌ | --- -## Test 112 +## Test 122 **Expected Tool:** `communication_email_send` **Prompt:** Send email with CC to and @@ -2376,15 +2556,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.533532 | `communication_email_send` | ✅ **EXPECTED** | -| 2 | 0.219566 | `communication_sms_send` | ❌ | -| 3 | 0.106042 | `foundry_agents_query-and-evaluate` | ❌ | +| 1 | 0.533447 | `communication_email_send` | ✅ **EXPECTED** | +| 2 | 0.219584 | `communication_sms_send` | ❌ | +| 3 | 0.106026 | `foundry_agents_query-and-evaluate` | ❌ | | 4 | 0.103723 | `foundry_openai_chat-completions-create` | ❌ | | 5 | 0.084905 | `cosmos_account_list` | ❌ | --- -## Test 113 +## Test 123 **Expected Tool:** `communication_email_send` **Prompt:** Send email to multiple recipients: , @@ -2393,15 +2573,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.540910 | `communication_email_send` | ✅ **EXPECTED** | -| 2 | 0.244525 | `communication_sms_send` | ❌ | -| 3 | 0.134996 | `foundry_openai_chat-completions-create` | ❌ | -| 4 | 0.114359 | `foundry_agents_query-and-evaluate` | ❌ | -| 5 | 0.087005 | `postgres_server_param_set` | ❌ | +| 1 | 0.540792 | `communication_email_send` | ✅ **EXPECTED** | +| 2 | 0.244521 | `communication_sms_send` | ❌ | +| 3 | 0.134975 | `foundry_openai_chat-completions-create` | ❌ | +| 4 | 0.114324 | `foundry_agents_query-and-evaluate` | ❌ | +| 5 | 0.087063 | `postgres_server_param_set` | ❌ | --- -## Test 114 +## Test 124 **Expected Tool:** `communication_email_send` **Prompt:** Send email with reply-to address set to @@ -2410,15 +2590,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.512721 | `communication_email_send` | ✅ **EXPECTED** | -| 2 | 0.200189 | `communication_sms_send` | ❌ | -| 3 | 0.164422 | `mysql_server_param_set` | ❌ | +| 1 | 0.512623 | `communication_email_send` | ✅ **EXPECTED** | +| 2 | 0.200177 | `communication_sms_send` | ❌ | +| 3 | 0.164115 | `mysql_server_param_set` | ❌ | | 4 | 0.158759 | `postgres_server_param_set` | ❌ | | 5 | 0.143574 | `appconfig_kv_set` | ❌ | --- -## Test 115 +## Test 125 **Expected Tool:** `communication_email_send` **Prompt:** Send email with custom sender name @@ -2427,15 +2607,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.473192 | `communication_email_send` | ✅ **EXPECTED** | -| 2 | 0.255124 | `communication_sms_send` | ❌ | +| 1 | 0.473175 | `communication_email_send` | ✅ **EXPECTED** | +| 2 | 0.255169 | `communication_sms_send` | ❌ | | 3 | 0.164811 | `foundry_openai_chat-completions-create` | ❌ | -| 4 | 0.160285 | `foundry_openai_embeddings-create` | ❌ | +| 4 | 0.160393 | `foundry_openai_embeddings-create` | ❌ | | 5 | 0.156869 | `cosmos_database_container_item_query` | ❌ | --- -## Test 116 +## Test 126 **Expected Tool:** `communication_email_send` **Prompt:** Send an email with BCC recipients @@ -2444,15 +2624,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.528899 | `communication_email_send` | ✅ **EXPECTED** | -| 2 | 0.241091 | `communication_sms_send` | ❌ | -| 3 | 0.137538 | `confidentialledger_entries_append` | ❌ | -| 4 | 0.108748 | `confidentialledger_entries_get` | ❌ | -| 5 | 0.105033 | `storage_blob_upload` | ❌ | +| 1 | 0.528759 | `communication_email_send` | ✅ **EXPECTED** | +| 2 | 0.241047 | `communication_sms_send` | ❌ | +| 3 | 0.137545 | `confidentialledger_entries_append` | ❌ | +| 4 | 0.108728 | `confidentialledger_entries_get` | ❌ | +| 5 | 0.105048 | `storage_blob_upload` | ❌ | --- -## Test 117 +## Test 127 **Expected Tool:** `communication_sms_send` **Prompt:** Send an SMS message to saying "Hello" @@ -2461,15 +2641,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.533822 | `communication_sms_send` | ✅ **EXPECTED** | -| 2 | 0.251480 | `communication_email_send` | ❌ | +| 1 | 0.533868 | `communication_sms_send` | ✅ **EXPECTED** | +| 2 | 0.251429 | `communication_email_send` | ❌ | | 3 | 0.218656 | `foundry_openai_chat-completions-create` | ❌ | -| 4 | 0.175534 | `foundry_agents_create` | ❌ | -| 5 | 0.156040 | `foundry_threads_create` | ❌ | +| 4 | 0.175341 | `foundry_agents_create` | ❌ | +| 5 | 0.166041 | `speech_tts_synthesize` | ❌ | --- -## Test 118 +## Test 128 **Expected Tool:** `communication_sms_send` **Prompt:** Send SMS to from with message "Test message" @@ -2478,15 +2658,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.546006 | `communication_sms_send` | ✅ **EXPECTED** | -| 2 | 0.294912 | `communication_email_send` | ❌ | -| 3 | 0.204585 | `loadtesting_testrun_create` | ❌ | -| 4 | 0.200656 | `foundry_openai_chat-completions-create` | ❌ | -| 5 | 0.141105 | `foundry_agents_create` | ❌ | +| 1 | 0.545976 | `communication_sms_send` | ✅ **EXPECTED** | +| 2 | 0.294793 | `communication_email_send` | ❌ | +| 3 | 0.204688 | `loadtesting_testrun_create` | ❌ | +| 4 | 0.200676 | `foundry_openai_chat-completions-create` | ❌ | +| 5 | 0.141118 | `foundry_agents_create` | ❌ | --- -## Test 119 +## Test 129 **Expected Tool:** `communication_sms_send` **Prompt:** Send SMS to multiple recipients: , @@ -2495,15 +2675,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.545744 | `communication_sms_send` | ✅ **EXPECTED** | -| 2 | 0.422028 | `communication_email_send` | ❌ | +| 1 | 0.545755 | `communication_sms_send` | ✅ **EXPECTED** | +| 2 | 0.421988 | `communication_email_send` | ❌ | | 3 | 0.186088 | `foundry_openai_chat-completions-create` | ❌ | -| 4 | 0.142054 | `foundry_agents_query-and-evaluate` | ❌ | -| 5 | 0.113722 | `foundry_threads_get-messages` | ❌ | +| 4 | 0.142030 | `foundry_agents_query-and-evaluate` | ❌ | +| 5 | 0.113656 | `foundry_threads_get-messages` | ❌ | --- -## Test 120 +## Test 130 **Expected Tool:** `communication_sms_send` **Prompt:** Send SMS with delivery reporting enabled @@ -2512,15 +2692,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.554917 | `communication_sms_send` | ✅ **EXPECTED** | -| 2 | 0.269203 | `communication_email_send` | ❌ | +| 1 | 0.554908 | `communication_sms_send` | ✅ **EXPECTED** | +| 2 | 0.269080 | `communication_email_send` | ❌ | | 3 | 0.191848 | `extension_azqr` | ❌ | | 4 | 0.185916 | `foundry_openai_chat-completions-create` | ❌ | -| 5 | 0.170749 | `foundry_agents_query-and-evaluate` | ❌ | +| 5 | 0.170726 | `foundry_agents_query-and-evaluate` | ❌ | --- -## Test 121 +## Test 131 **Expected Tool:** `communication_sms_send` **Prompt:** Send SMS message with custom tracking tag "campaign1" @@ -2529,15 +2709,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.538893 | `communication_sms_send` | ✅ **EXPECTED** | -| 2 | 0.269915 | `communication_email_send` | ❌ | +| 1 | 0.538827 | `communication_sms_send` | ✅ **EXPECTED** | +| 2 | 0.269794 | `communication_email_send` | ❌ | | 3 | 0.188153 | `loadtesting_testrun_create` | ❌ | | 4 | 0.185403 | `foundry_openai_chat-completions-create` | ❌ | -| 5 | 0.175135 | `foundry_agents_create` | ❌ | +| 5 | 0.174747 | `foundry_agents_create` | ❌ | --- -## Test 122 +## Test 132 **Expected Tool:** `communication_sms_send` **Prompt:** Send broadcast SMS to and saying "Urgent notification" @@ -2546,15 +2726,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.474775 | `communication_sms_send` | ✅ **EXPECTED** | -| 2 | 0.286381 | `communication_email_send` | ❌ | -| 3 | 0.164341 | `foundry_agents_query-and-evaluate` | ❌ | +| 1 | 0.474786 | `communication_sms_send` | ✅ **EXPECTED** | +| 2 | 0.286338 | `communication_email_send` | ❌ | +| 3 | 0.164289 | `foundry_agents_query-and-evaluate` | ❌ | | 4 | 0.147338 | `foundry_openai_chat-completions-create` | ❌ | | 5 | 0.128704 | `cosmos_account_list` | ❌ | --- -## Test 123 +## Test 133 **Expected Tool:** `communication_sms_send` **Prompt:** Send SMS from my communication service to @@ -2563,15 +2743,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.564058 | `communication_sms_send` | ✅ **EXPECTED** | -| 2 | 0.302377 | `communication_email_send` | ❌ | -| 3 | 0.238340 | `foundry_openai_chat-completions-create` | ❌ | -| 4 | 0.184240 | `foundry_agents_create` | ❌ | -| 5 | 0.183684 | `search_knowledge_base_retrieve` | ❌ | +| 1 | 0.564114 | `communication_sms_send` | ✅ **EXPECTED** | +| 2 | 0.302363 | `communication_email_send` | ❌ | +| 3 | 0.238296 | `foundry_openai_chat-completions-create` | ❌ | +| 4 | 0.184482 | `foundry_agents_create` | ❌ | +| 5 | 0.183651 | `search_knowledge_base_retrieve` | ❌ | --- -## Test 124 +## Test 134 **Expected Tool:** `communication_sms_send` **Prompt:** Send an SMS with delivery receipt tracking @@ -2580,15 +2760,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.598236 | `communication_sms_send` | ✅ **EXPECTED** | -| 2 | 0.314267 | `communication_email_send` | ❌ | -| 3 | 0.206931 | `foundry_agents_query-and-evaluate` | ❌ | +| 1 | 0.598211 | `communication_sms_send` | ✅ **EXPECTED** | +| 2 | 0.314134 | `communication_email_send` | ❌ | +| 3 | 0.206916 | `foundry_agents_query-and-evaluate` | ❌ | | 4 | 0.201142 | `foundry_openai_chat-completions-create` | ❌ | -| 5 | 0.187824 | `confidentialledger_entries_append` | ❌ | +| 5 | 0.187855 | `confidentialledger_entries_append` | ❌ | --- -## Test 125 +## Test 135 **Expected Tool:** `confidentialledger_entries_append` **Prompt:** Append an entry to my ledger with data {"key": "value"} @@ -2597,15 +2777,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.511241 | `confidentialledger_entries_append` | ✅ **EXPECTED** | -| 2 | 0.295319 | `confidentialledger_entries_get` | ❌ | -| 3 | 0.291757 | `appconfig_kv_set` | ❌ | -| 4 | 0.258741 | `appconfig_kv_lock_set` | ❌ | -| 5 | 0.250106 | `keyvault_certificate_import` | ❌ | +| 1 | 0.510554 | `confidentialledger_entries_append` | ✅ **EXPECTED** | +| 2 | 0.294885 | `confidentialledger_entries_get` | ❌ | +| 3 | 0.292014 | `appconfig_kv_set` | ❌ | +| 4 | 0.258967 | `appconfig_kv_lock_set` | ❌ | +| 5 | 0.249908 | `keyvault_certificate_import` | ❌ | --- -## Test 126 +## Test 136 **Expected Tool:** `confidentialledger_entries_append` **Prompt:** Write a tamper-proof entry to ledger containing {"transaction": "data"} @@ -2614,15 +2794,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.602321 | `confidentialledger_entries_append` | ✅ **EXPECTED** | -| 2 | 0.357401 | `confidentialledger_entries_get` | ❌ | -| 3 | 0.211998 | `appconfig_kv_lock_set` | ❌ | -| 4 | 0.195461 | `keyvault_secret_create` | ❌ | -| 5 | 0.184070 | `keyvault_certificate_import` | ❌ | +| 1 | 0.602324 | `confidentialledger_entries_append` | ✅ **EXPECTED** | +| 2 | 0.357780 | `confidentialledger_entries_get` | ❌ | +| 3 | 0.211969 | `appconfig_kv_lock_set` | ❌ | +| 4 | 0.195485 | `keyvault_secret_create` | ❌ | +| 5 | 0.184066 | `keyvault_certificate_import` | ❌ | --- -## Test 127 +## Test 137 **Expected Tool:** `confidentialledger_entries_append` **Prompt:** Append {"hello": "from mcp"} to my confidential ledger in collection @@ -2631,15 +2811,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.546786 | `confidentialledger_entries_append` | ✅ **EXPECTED** | -| 2 | 0.452117 | `confidentialledger_entries_get` | ❌ | -| 3 | 0.225013 | `appconfig_kv_lock_set` | ❌ | -| 4 | 0.215828 | `appconfig_kv_set` | ❌ | -| 5 | 0.203162 | `keyvault_certificate_import` | ❌ | +| 1 | 0.546394 | `confidentialledger_entries_append` | ✅ **EXPECTED** | +| 2 | 0.451837 | `confidentialledger_entries_get` | ❌ | +| 3 | 0.225163 | `appconfig_kv_lock_set` | ❌ | +| 4 | 0.216036 | `appconfig_kv_set` | ❌ | +| 5 | 0.203220 | `keyvault_certificate_import` | ❌ | --- -## Test 128 +## Test 138 **Expected Tool:** `confidentialledger_entries_append` **Prompt:** Create an immutable ledger entry in with content {"audit": "log"} @@ -2648,7 +2828,7 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.496023 | `confidentialledger_entries_append` | ✅ **EXPECTED** | +| 1 | 0.496008 | `confidentialledger_entries_append` | ✅ **EXPECTED** | | 2 | 0.340187 | `confidentialledger_entries_get` | ❌ | | 3 | 0.218473 | `monitor_activitylog_list` | ❌ | | 4 | 0.215229 | `storage_blob_container_create` | ❌ | @@ -2656,7 +2836,7 @@ --- -## Test 129 +## Test 139 **Expected Tool:** `confidentialledger_entries_append` **Prompt:** Write an entry to confidential ledger @@ -2665,15 +2845,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.622138 | `confidentialledger_entries_append` | ✅ **EXPECTED** | +| 1 | 0.622014 | `confidentialledger_entries_append` | ✅ **EXPECTED** | | 2 | 0.524777 | `confidentialledger_entries_get` | ❌ | | 3 | 0.252508 | `appconfig_kv_lock_set` | ❌ | -| 4 | 0.240252 | `keyvault_secret_create` | ❌ | +| 4 | 0.240315 | `keyvault_secret_create` | ❌ | | 5 | 0.186890 | `appconfig_kv_set` | ❌ | --- -## Test 130 +## Test 140 **Expected Tool:** `confidentialledger_entries_get` **Prompt:** Get entry from Confidential Ledger for transaction on ledger @@ -2683,14 +2863,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.707252 | `confidentialledger_entries_get` | ✅ **EXPECTED** | -| 2 | 0.551953 | `confidentialledger_entries_append` | ❌ | -| 3 | 0.245549 | `keyvault_secret_get` | ❌ | -| 4 | 0.231190 | `keyvault_key_get` | ❌ | +| 2 | 0.551952 | `confidentialledger_entries_append` | ❌ | +| 3 | 0.245541 | `keyvault_secret_get` | ❌ | +| 4 | 0.229943 | `keyvault_key_get` | ❌ | | 5 | 0.211839 | `loadtesting_testrun_get` | ❌ | --- -## Test 131 +## Test 141 **Expected Tool:** `confidentialledger_entries_get` **Prompt:** Get transaction from ledger @@ -2700,14 +2880,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.509714 | `confidentialledger_entries_get` | ✅ **EXPECTED** | -| 2 | 0.416580 | `confidentialledger_entries_append` | ❌ | +| 2 | 0.416730 | `confidentialledger_entries_append` | ❌ | | 3 | 0.223959 | `loadtesting_testrun_get` | ❌ | | 4 | 0.218412 | `monitor_resource_log_query` | ❌ | | 5 | 0.217671 | `loadtesting_testrun_list` | ❌ | --- -## Test 132 +## Test 142 **Expected Tool:** `cosmos_account_list` **Prompt:** List all cosmosdb accounts in my subscription @@ -2718,13 +2898,13 @@ |------|-------|------|--------| | 1 | 0.818357 | `cosmos_account_list` | ✅ **EXPECTED** | | 2 | 0.668480 | `cosmos_database_list` | ❌ | -| 3 | 0.636009 | `subscription_list` | ❌ | +| 3 | 0.636036 | `subscription_list` | ❌ | | 4 | 0.615268 | `cosmos_database_container_list` | ❌ | | 5 | 0.601467 | `kusto_cluster_list` | ❌ | --- -## Test 133 +## Test 143 **Expected Tool:** `cosmos_account_list` **Prompt:** Show me my cosmosdb accounts @@ -2733,15 +2913,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.665422 | `cosmos_account_list` | ✅ **EXPECTED** | -| 2 | 0.605325 | `cosmos_database_list` | ❌ | -| 3 | 0.571573 | `cosmos_database_container_list` | ❌ | -| 4 | 0.549420 | `cosmos_database_container_item_query` | ❌ | -| 5 | 0.503865 | `storage_account_get` | ❌ | +| 1 | 0.665477 | `cosmos_account_list` | ✅ **EXPECTED** | +| 2 | 0.605386 | `cosmos_database_list` | ❌ | +| 3 | 0.571628 | `cosmos_database_container_list` | ❌ | +| 4 | 0.549485 | `cosmos_database_container_item_query` | ❌ | +| 5 | 0.503849 | `storage_account_get` | ❌ | --- -## Test 134 +## Test 144 **Expected Tool:** `cosmos_account_list` **Prompt:** Show me the cosmosdb accounts in my subscription @@ -2750,15 +2930,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.752494 | `cosmos_account_list` | ✅ **EXPECTED** | -| 2 | 0.607165 | `subscription_list` | ❌ | -| 3 | 0.605125 | `cosmos_database_list` | ❌ | -| 4 | 0.566249 | `cosmos_database_container_list` | ❌ | -| 5 | 0.563922 | `cosmos_database_container_item_query` | ❌ | +| 1 | 0.752413 | `cosmos_account_list` | ✅ **EXPECTED** | +| 2 | 0.606937 | `subscription_list` | ❌ | +| 3 | 0.605196 | `cosmos_database_list` | ❌ | +| 4 | 0.566379 | `cosmos_database_container_list` | ❌ | +| 5 | 0.564082 | `cosmos_database_container_item_query` | ❌ | --- -## Test 135 +## Test 145 **Expected Tool:** `cosmos_database_container_item_query` **Prompt:** Show me the items that contain the word in the container in the database for the cosmosdb account @@ -2769,13 +2949,13 @@ |------|-------|------|--------| | 1 | 0.658701 | `cosmos_database_container_item_query` | ✅ **EXPECTED** | | 2 | 0.605253 | `cosmos_database_container_list` | ❌ | -| 3 | 0.488353 | `storage_blob_container_get` | ❌ | +| 3 | 0.487612 | `storage_blob_container_get` | ❌ | | 4 | 0.477874 | `cosmos_database_list` | ❌ | | 5 | 0.447757 | `cosmos_account_list` | ❌ | --- -## Test 136 +## Test 146 **Expected Tool:** `cosmos_database_container_list` **Prompt:** List all the containers in the database for the cosmosdb account @@ -2784,15 +2964,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.852875 | `cosmos_database_container_list` | ✅ **EXPECTED** | -| 2 | 0.680991 | `cosmos_database_list` | ❌ | -| 3 | 0.680758 | `cosmos_database_container_item_query` | ❌ | -| 4 | 0.632634 | `storage_blob_container_get` | ❌ | -| 5 | 0.630588 | `cosmos_account_list` | ❌ | +| 1 | 0.852826 | `cosmos_database_container_list` | ✅ **EXPECTED** | +| 2 | 0.681006 | `cosmos_database_list` | ❌ | +| 3 | 0.680795 | `cosmos_database_container_item_query` | ❌ | +| 4 | 0.632368 | `storage_blob_container_get` | ❌ | +| 5 | 0.630666 | `cosmos_account_list` | ❌ | --- -## Test 137 +## Test 147 **Expected Tool:** `cosmos_database_container_list` **Prompt:** Show me the containers in the database for the cosmosdb account @@ -2804,12 +2984,12 @@ | 1 | 0.789395 | `cosmos_database_container_list` | ✅ **EXPECTED** | | 2 | 0.648126 | `cosmos_database_container_item_query` | ❌ | | 3 | 0.614220 | `cosmos_database_list` | ❌ | -| 4 | 0.591350 | `storage_blob_container_get` | ❌ | +| 4 | 0.591361 | `storage_blob_container_get` | ❌ | | 5 | 0.562062 | `cosmos_account_list` | ❌ | --- -## Test 138 +## Test 148 **Expected Tool:** `cosmos_database_list` **Prompt:** List all the databases in the cosmosdb account @@ -2822,11 +3002,11 @@ | 2 | 0.668515 | `cosmos_account_list` | ❌ | | 3 | 0.665298 | `cosmos_database_container_list` | ❌ | | 4 | 0.606433 | `cosmos_database_container_item_query` | ❌ | -| 5 | 0.582804 | `kusto_database_list` | ❌ | +| 5 | 0.583402 | `kusto_database_list` | ❌ | --- -## Test 139 +## Test 149 **Expected Tool:** `cosmos_database_list` **Prompt:** Show me the databases in the cosmosdb account @@ -2839,11 +3019,11 @@ | 2 | 0.624759 | `cosmos_database_container_list` | ❌ | | 3 | 0.614572 | `cosmos_account_list` | ❌ | | 4 | 0.579919 | `cosmos_database_container_item_query` | ❌ | -| 5 | 0.538479 | `mysql_database_list` | ❌ | +| 5 | 0.538045 | `mysql_database_list` | ❌ | --- -## Test 140 +## Test 150 **Expected Tool:** `kusto_cluster_get` **Prompt:** Show me the details of the Data Explorer cluster @@ -2855,12 +3035,12 @@ | 1 | 0.590264 | `kusto_cluster_get` | ✅ **EXPECTED** | | 2 | 0.463832 | `kusto_cluster_list` | ❌ | | 3 | 0.428159 | `kusto_query` | ❌ | -| 4 | 0.425909 | `kusto_database_list` | ❌ | +| 4 | 0.425508 | `kusto_database_list` | ❌ | | 5 | 0.422577 | `kusto_table_schema` | ❌ | --- -## Test 141 +## Test 151 **Expected Tool:** `kusto_cluster_list` **Prompt:** List all Data Explorer clusters in my subscription @@ -2870,14 +3050,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.793744 | `kusto_cluster_list` | ✅ **EXPECTED** | -| 2 | 0.630451 | `kusto_database_list` | ❌ | +| 2 | 0.630260 | `kusto_database_list` | ❌ | | 3 | 0.573395 | `kusto_cluster_get` | ❌ | | 4 | 0.525025 | `aks_cluster_get` | ❌ | -| 5 | 0.509397 | `grafana_list` | ❌ | +| 5 | 0.509396 | `grafana_list` | ❌ | --- -## Test 142 +## Test 152 **Expected Tool:** `kusto_cluster_list` **Prompt:** Show me my Data Explorer clusters @@ -2888,13 +3068,13 @@ |------|-------|------|--------| | 1 | 0.531307 | `kusto_cluster_list` | ✅ **EXPECTED** | | 2 | 0.465277 | `kusto_cluster_get` | ❌ | -| 3 | 0.432311 | `kusto_database_list` | ❌ | +| 3 | 0.432028 | `kusto_database_list` | ❌ | | 4 | 0.369596 | `aks_cluster_get` | ❌ | | 5 | 0.363119 | `kusto_table_schema` | ❌ | --- -## Test 143 +## Test 153 **Expected Tool:** `kusto_cluster_list` **Prompt:** Show me the Data Explorer clusters in my subscription @@ -2905,13 +3085,13 @@ |------|-------|------|--------| | 1 | 0.701484 | `kusto_cluster_list` | ✅ **EXPECTED** | | 2 | 0.571191 | `kusto_cluster_get` | ❌ | -| 3 | 0.548734 | `kusto_database_list` | ❌ | +| 3 | 0.548216 | `kusto_database_list` | ❌ | | 4 | 0.498909 | `aks_cluster_get` | ❌ | | 5 | 0.474201 | `redis_list` | ❌ | --- -## Test 144 +## Test 154 **Expected Tool:** `kusto_database_list` **Prompt:** List all databases in the Data Explorer cluster @@ -2920,15 +3100,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.676656 | `kusto_database_list` | ✅ **EXPECTED** | -| 2 | 0.560592 | `kusto_cluster_list` | ❌ | -| 3 | 0.556795 | `kusto_table_list` | ❌ | -| 4 | 0.553218 | `postgres_database_list` | ❌ | -| 5 | 0.549673 | `cosmos_database_list` | ❌ | +| 1 | 0.677160 | `kusto_database_list` | ✅ **EXPECTED** | +| 2 | 0.560715 | `kusto_cluster_list` | ❌ | +| 3 | 0.556662 | `kusto_table_list` | ❌ | +| 4 | 0.553239 | `postgres_database_list` | ❌ | +| 5 | 0.549605 | `cosmos_database_list` | ❌ | --- -## Test 145 +## Test 155 **Expected Tool:** `kusto_database_list` **Prompt:** Show me the databases in the Data Explorer cluster @@ -2937,15 +3117,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.623242 | `kusto_database_list` | ✅ **EXPECTED** | -| 2 | 0.509952 | `kusto_cluster_list` | ❌ | +| 1 | 0.623592 | `kusto_database_list` | ✅ **EXPECTED** | +| 2 | 0.509953 | `kusto_cluster_list` | ❌ | | 3 | 0.507073 | `kusto_table_list` | ❌ | | 4 | 0.497144 | `cosmos_database_list` | ❌ | -| 5 | 0.491400 | `mysql_database_list` | ❌ | +| 5 | 0.491166 | `mysql_database_list` | ❌ | --- -## Test 146 +## Test 156 **Expected Tool:** `kusto_query` **Prompt:** Show me all items that contain the word in the Data Explorer table in cluster @@ -2955,14 +3135,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.423660 | `kusto_query` | ✅ **EXPECTED** | -| 2 | 0.409485 | `postgres_database_query` | ❌ | +| 2 | 0.409526 | `postgres_database_query` | ❌ | | 3 | 0.408178 | `kusto_table_schema` | ❌ | -| 4 | 0.407740 | `kusto_sample` | ❌ | -| 5 | 0.403989 | `kusto_cluster_list` | ❌ | +| 4 | 0.407741 | `kusto_sample` | ❌ | +| 5 | 0.403990 | `kusto_cluster_list` | ❌ | --- -## Test 147 +## Test 157 **Expected Tool:** `kusto_sample` **Prompt:** Show me a data sample from the Data Explorer table in cluster @@ -2979,7 +3159,7 @@ --- -## Test 148 +## Test 158 **Expected Tool:** `kusto_table_list` **Prompt:** List all tables in the Data Explorer database in cluster @@ -2990,13 +3170,13 @@ |------|-------|------|--------| | 1 | 0.679642 | `kusto_table_list` | ✅ **EXPECTED** | | 2 | 0.585237 | `postgres_table_list` | ❌ | -| 3 | 0.580964 | `kusto_database_list` | ❌ | +| 3 | 0.580885 | `kusto_database_list` | ❌ | | 4 | 0.556724 | `mysql_table_list` | ❌ | -| 5 | 0.550005 | `monitor_table_list` | ❌ | +| 5 | 0.549940 | `monitor_table_list` | ❌ | --- -## Test 149 +## Test 159 **Expected Tool:** `kusto_table_list` **Prompt:** Show me the tables in the Data Explorer database in cluster @@ -3006,14 +3186,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.619252 | `kusto_table_list` | ✅ **EXPECTED** | -| 2 | 0.554332 | `kusto_table_schema` | ❌ | -| 3 | 0.527431 | `kusto_database_list` | ❌ | +| 2 | 0.554333 | `kusto_table_schema` | ❌ | +| 3 | 0.527314 | `kusto_database_list` | ❌ | | 4 | 0.524691 | `mysql_table_list` | ❌ | | 5 | 0.523432 | `postgres_table_list` | ❌ | --- -## Test 150 +## Test 160 **Expected Tool:** `kusto_table_schema` **Prompt:** Show me the schema for table in the Data Explorer database in cluster @@ -3022,15 +3202,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.666980 | `kusto_table_schema` | ✅ **EXPECTED** | -| 2 | 0.564204 | `postgres_table_schema_get` | ❌ | -| 3 | 0.528301 | `mysql_table_schema_get` | ❌ | -| 4 | 0.490892 | `kusto_sample` | ❌ | -| 5 | 0.489745 | `kusto_table_list` | ❌ | +| 1 | 0.666757 | `kusto_table_schema` | ✅ **EXPECTED** | +| 2 | 0.564118 | `postgres_table_schema_get` | ❌ | +| 3 | 0.527778 | `mysql_table_schema_get` | ❌ | +| 4 | 0.490798 | `kusto_sample` | ❌ | +| 5 | 0.489446 | `kusto_table_list` | ❌ | --- -## Test 151 +## Test 161 **Expected Tool:** `mysql_database_list` **Prompt:** List all MySQL databases in server @@ -3039,15 +3219,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.633991 | `postgres_database_list` | ❌ | -| 2 | 0.623359 | `mysql_database_list` | ✅ **EXPECTED** | -| 3 | 0.534434 | `mysql_table_list` | ❌ | -| 4 | 0.498902 | `mysql_server_list` | ❌ | -| 5 | 0.490102 | `sql_db_list` | ❌ | +| 1 | 0.634056 | `postgres_database_list` | ❌ | +| 2 | 0.623043 | `mysql_database_list` | ✅ **EXPECTED** | +| 3 | 0.534457 | `mysql_table_list` | ❌ | +| 4 | 0.499107 | `mysql_server_list` | ❌ | +| 5 | 0.490148 | `sql_db_list` | ❌ | --- -## Test 152 +## Test 162 **Expected Tool:** `mysql_database_list` **Prompt:** Show me the MySQL databases in server @@ -3056,15 +3236,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.588121 | `mysql_database_list` | ✅ **EXPECTED** | +| 1 | 0.587863 | `mysql_database_list` | ✅ **EXPECTED** | | 2 | 0.574089 | `postgres_database_list` | ❌ | | 3 | 0.483855 | `mysql_table_list` | ❌ | -| 4 | 0.463244 | `mysql_server_list` | ❌ | +| 4 | 0.463405 | `mysql_server_list` | ❌ | | 5 | 0.444547 | `sql_db_list` | ❌ | --- -## Test 153 +## Test 163 **Expected Tool:** `mysql_database_query` **Prompt:** Show me all items that contain the word in the MySQL database in server @@ -3073,15 +3253,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.476423 | `mysql_table_list` | ❌ | -| 2 | 0.455770 | `mysql_database_list` | ❌ | -| 3 | 0.432703 | `mysql_database_query` | ✅ **EXPECTED** | -| 4 | 0.419859 | `mysql_server_list` | ❌ | -| 5 | 0.409655 | `mysql_table_schema_get` | ❌ | +| 1 | 0.476256 | `mysql_table_list` | ❌ | +| 2 | 0.456053 | `mysql_database_list` | ❌ | +| 3 | 0.433203 | `mysql_database_query` | ✅ **EXPECTED** | +| 4 | 0.419831 | `mysql_server_list` | ❌ | +| 5 | 0.409321 | `mysql_table_schema_get` | ❌ | --- -## Test 154 +## Test 164 **Expected Tool:** `mysql_server_config_get` **Prompt:** Show me the configuration of MySQL server @@ -3090,15 +3270,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.531964 | `postgres_server_config_get` | ❌ | -| 2 | 0.517385 | `mysql_server_param_set` | ❌ | -| 3 | 0.489870 | `mysql_server_config_get` | ✅ **EXPECTED** | -| 4 | 0.476944 | `mysql_server_param_get` | ❌ | -| 5 | 0.426840 | `mysql_table_schema_get` | ❌ | +| 1 | 0.531887 | `postgres_server_config_get` | ❌ | +| 2 | 0.516894 | `mysql_server_param_set` | ❌ | +| 3 | 0.489816 | `mysql_server_config_get` | ✅ **EXPECTED** | +| 4 | 0.476863 | `mysql_server_param_get` | ❌ | +| 5 | 0.426507 | `mysql_table_schema_get` | ❌ | --- -## Test 155 +## Test 165 **Expected Tool:** `mysql_server_list` **Prompt:** List all MySQL servers in my subscription @@ -3107,15 +3287,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.678473 | `postgres_server_list` | ❌ | -| 2 | 0.558177 | `mysql_database_list` | ❌ | -| 3 | 0.554818 | `mysql_server_list` | ✅ **EXPECTED** | +| 1 | 0.678472 | `postgres_server_list` | ❌ | +| 2 | 0.558115 | `mysql_database_list` | ❌ | +| 3 | 0.554998 | `mysql_server_list` | ✅ **EXPECTED** | | 4 | 0.513706 | `kusto_cluster_list` | ❌ | | 5 | 0.501199 | `mysql_table_list` | ❌ | --- -## Test 156 +## Test 166 **Expected Tool:** `mysql_server_list` **Prompt:** Show me my MySQL servers @@ -3124,15 +3304,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.478518 | `mysql_database_list` | ❌ | -| 2 | 0.474586 | `mysql_server_list` | ✅ **EXPECTED** | +| 1 | 0.478503 | `mysql_database_list` | ❌ | +| 2 | 0.474891 | `mysql_server_list` | ✅ **EXPECTED** | | 3 | 0.435642 | `postgres_server_list` | ❌ | | 4 | 0.412380 | `mysql_table_list` | ❌ | | 5 | 0.389993 | `postgres_database_list` | ❌ | --- -## Test 157 +## Test 167 **Expected Tool:** `mysql_server_list` **Prompt:** Show me the MySQL servers in my subscription @@ -3142,14 +3322,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.636435 | `postgres_server_list` | ❌ | -| 2 | 0.534266 | `mysql_server_list` | ✅ **EXPECTED** | -| 3 | 0.530210 | `mysql_database_list` | ❌ | +| 2 | 0.534464 | `mysql_server_list` | ✅ **EXPECTED** | +| 3 | 0.530312 | `mysql_database_list` | ❌ | | 4 | 0.475052 | `kusto_cluster_list` | ❌ | | 5 | 0.470468 | `redis_list` | ❌ | --- -## Test 158 +## Test 168 **Expected Tool:** `mysql_server_param_get` **Prompt:** Show me the value of connection timeout in seconds in my MySQL server @@ -3159,14 +3339,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.495071 | `mysql_server_param_get` | ✅ **EXPECTED** | -| 2 | 0.437857 | `mysql_server_param_set` | ❌ | -| 3 | 0.333041 | `mysql_database_query` | ❌ | -| 4 | 0.313364 | `mysql_table_schema_get` | ❌ | -| 5 | 0.310856 | `postgres_server_param_get` | ❌ | +| 2 | 0.438075 | `mysql_server_param_set` | ❌ | +| 3 | 0.333841 | `mysql_database_query` | ❌ | +| 4 | 0.313150 | `mysql_table_schema_get` | ❌ | +| 5 | 0.310834 | `postgres_server_param_get` | ❌ | --- -## Test 159 +## Test 169 **Expected Tool:** `mysql_server_param_set` **Prompt:** Set connection timeout to 20 seconds for my MySQL server @@ -3175,15 +3355,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.449612 | `mysql_server_param_set` | ✅ **EXPECTED** | +| 1 | 0.449419 | `mysql_server_param_set` | ✅ **EXPECTED** | | 2 | 0.381144 | `mysql_server_param_get` | ❌ | | 3 | 0.303499 | `postgres_server_param_set` | ❌ | -| 4 | 0.298661 | `mysql_database_query` | ❌ | -| 5 | 0.254180 | `mysql_server_list` | ❌ | +| 4 | 0.298911 | `mysql_database_query` | ❌ | +| 5 | 0.254159 | `mysql_server_list` | ❌ | --- -## Test 160 +## Test 170 **Expected Tool:** `mysql_table_list` **Prompt:** List all tables in the MySQL database in server @@ -3192,15 +3372,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.633542 | `mysql_table_list` | ✅ **EXPECTED** | -| 2 | 0.573851 | `postgres_table_list` | ❌ | -| 3 | 0.550878 | `postgres_database_list` | ❌ | -| 4 | 0.546988 | `mysql_database_list` | ❌ | -| 5 | 0.511879 | `kusto_table_list` | ❌ | +| 1 | 0.633448 | `mysql_table_list` | ✅ **EXPECTED** | +| 2 | 0.573844 | `postgres_table_list` | ❌ | +| 3 | 0.550898 | `postgres_database_list` | ❌ | +| 4 | 0.546779 | `mysql_database_list` | ❌ | +| 5 | 0.511847 | `kusto_table_list` | ❌ | --- -## Test 161 +## Test 171 **Expected Tool:** `mysql_table_list` **Prompt:** Show me the tables in the MySQL database in server @@ -3211,13 +3391,13 @@ |------|-------|------|--------| | 1 | 0.609131 | `mysql_table_list` | ✅ **EXPECTED** | | 2 | 0.526236 | `postgres_table_list` | ❌ | -| 3 | 0.525709 | `mysql_database_list` | ❌ | -| 4 | 0.507532 | `mysql_table_schema_get` | ❌ | +| 3 | 0.525627 | `mysql_database_list` | ❌ | +| 4 | 0.507258 | `mysql_table_schema_get` | ❌ | | 5 | 0.498050 | `postgres_database_list` | ❌ | --- -## Test 162 +## Test 172 **Expected Tool:** `mysql_table_schema_get` **Prompt:** Show me the schema of table in the MySQL database in server @@ -3226,15 +3406,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.630824 | `mysql_table_schema_get` | ✅ **EXPECTED** | +| 1 | 0.630623 | `mysql_table_schema_get` | ✅ **EXPECTED** | | 2 | 0.558306 | `postgres_table_schema_get` | ❌ | | 3 | 0.545025 | `mysql_table_list` | ❌ | | 4 | 0.517419 | `kusto_table_schema` | ❌ | -| 5 | 0.457739 | `mysql_database_list` | ❌ | +| 5 | 0.457648 | `mysql_database_list` | ❌ | --- -## Test 163 +## Test 173 **Expected Tool:** `postgres_database_list` **Prompt:** List all PostgreSQL databases in server @@ -3243,15 +3423,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.815470 | `postgres_database_list` | ✅ **EXPECTED** | -| 2 | 0.643680 | `postgres_table_list` | ❌ | -| 3 | 0.622824 | `postgres_server_list` | ❌ | -| 4 | 0.542912 | `postgres_server_config_get` | ❌ | -| 5 | 0.490950 | `postgres_server_param_get` | ❌ | +| 1 | 0.815617 | `postgres_database_list` | ✅ **EXPECTED** | +| 2 | 0.644014 | `postgres_table_list` | ❌ | +| 3 | 0.622790 | `postgres_server_list` | ❌ | +| 4 | 0.542685 | `postgres_server_config_get` | ❌ | +| 5 | 0.490904 | `postgres_server_param_get` | ❌ | --- -## Test 164 +## Test 174 **Expected Tool:** `postgres_database_list` **Prompt:** Show me the PostgreSQL databases in server @@ -3261,14 +3441,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.760033 | `postgres_database_list` | ✅ **EXPECTED** | -| 2 | 0.589784 | `postgres_server_list` | ❌ | +| 2 | 0.589783 | `postgres_server_list` | ❌ | | 3 | 0.585891 | `postgres_table_list` | ❌ | | 4 | 0.552660 | `postgres_server_config_get` | ❌ | -| 5 | 0.495685 | `postgres_server_param_get` | ❌ | +| 5 | 0.495629 | `postgres_server_param_get` | ❌ | --- -## Test 165 +## Test 175 **Expected Tool:** `postgres_database_query` **Prompt:** Show me all items that contain the word in the PostgreSQL database in server @@ -3278,14 +3458,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.546211 | `postgres_database_list` | ❌ | -| 2 | 0.523223 | `postgres_database_query` | ✅ **EXPECTED** | +| 2 | 0.523127 | `postgres_database_query` | ✅ **EXPECTED** | | 3 | 0.503267 | `postgres_table_list` | ❌ | | 4 | 0.466599 | `postgres_server_list` | ❌ | -| 5 | 0.403963 | `postgres_server_param_get` | ❌ | +| 5 | 0.403969 | `postgres_server_param_get` | ❌ | --- -## Test 166 +## Test 176 **Expected Tool:** `postgres_server_config_get` **Prompt:** Show me the configuration of PostgreSQL server @@ -3296,13 +3476,13 @@ |------|-------|------|--------| | 1 | 0.756593 | `postgres_server_config_get` | ✅ **EXPECTED** | | 2 | 0.615429 | `postgres_server_param_set` | ❌ | -| 3 | 0.599487 | `postgres_server_param_get` | ❌ | -| 4 | 0.535050 | `postgres_database_list` | ❌ | +| 3 | 0.599471 | `postgres_server_param_get` | ❌ | +| 4 | 0.535049 | `postgres_database_list` | ❌ | | 5 | 0.518574 | `postgres_server_list` | ❌ | --- -## Test 167 +## Test 177 **Expected Tool:** `postgres_server_list` **Prompt:** List all PostgreSQL servers in my subscription @@ -3319,7 +3499,7 @@ --- -## Test 168 +## Test 178 **Expected Tool:** `postgres_server_list` **Prompt:** Show me my PostgreSQL servers @@ -3330,13 +3510,13 @@ |------|-------|------|--------| | 1 | 0.674327 | `postgres_server_list` | ✅ **EXPECTED** | | 2 | 0.607062 | `postgres_database_list` | ❌ | -| 3 | 0.576348 | `postgres_server_config_get` | ❌ | -| 4 | 0.522995 | `postgres_table_list` | ❌ | -| 5 | 0.506254 | `postgres_server_param_get` | ❌ | +| 3 | 0.576349 | `postgres_server_config_get` | ❌ | +| 4 | 0.522996 | `postgres_table_list` | ❌ | +| 5 | 0.506171 | `postgres_server_param_get` | ❌ | --- -## Test 169 +## Test 179 **Expected Tool:** `postgres_server_list` **Prompt:** Show me the PostgreSQL servers in my subscription @@ -3349,11 +3529,11 @@ | 2 | 0.579232 | `postgres_database_list` | ❌ | | 3 | 0.531804 | `postgres_server_config_get` | ❌ | | 4 | 0.514445 | `postgres_table_list` | ❌ | -| 5 | 0.505978 | `postgres_server_param_get` | ❌ | +| 5 | 0.505869 | `postgres_server_param_get` | ❌ | --- -## Test 170 +## Test 180 **Expected Tool:** `postgres_server_param_get` **Prompt:** Show me if the parameter my PostgreSQL server has replication enabled @@ -3362,7 +3542,7 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.594770 | `postgres_server_param_get` | ✅ **EXPECTED** | +| 1 | 0.594753 | `postgres_server_param_get` | ✅ **EXPECTED** | | 2 | 0.552678 | `postgres_server_param_set` | ❌ | | 3 | 0.539671 | `postgres_server_config_get` | ❌ | | 4 | 0.489693 | `postgres_server_list` | ❌ | @@ -3370,7 +3550,7 @@ --- -## Test 171 +## Test 181 **Expected Tool:** `postgres_server_param_set` **Prompt:** Enable replication for my PostgreSQL server @@ -3379,15 +3559,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.579909 | `postgres_server_param_set` | ✅ **EXPECTED** | -| 2 | 0.488496 | `postgres_server_config_get` | ❌ | -| 3 | 0.469810 | `postgres_server_list` | ❌ | -| 4 | 0.447051 | `postgres_server_param_get` | ❌ | -| 5 | 0.440716 | `postgres_database_list` | ❌ | +| 1 | 0.579873 | `postgres_server_param_set` | ✅ **EXPECTED** | +| 2 | 0.488474 | `postgres_server_config_get` | ❌ | +| 3 | 0.469794 | `postgres_server_list` | ❌ | +| 4 | 0.447011 | `postgres_server_param_get` | ❌ | +| 5 | 0.440760 | `postgres_database_list` | ❌ | --- -## Test 172 +## Test 182 **Expected Tool:** `postgres_table_list` **Prompt:** List all tables in the PostgreSQL database in server @@ -3396,15 +3576,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.789934 | `postgres_table_list` | ✅ **EXPECTED** | -| 2 | 0.750592 | `postgres_database_list` | ❌ | -| 3 | 0.574975 | `postgres_server_list` | ❌ | -| 4 | 0.519816 | `postgres_table_schema_get` | ❌ | -| 5 | 0.501361 | `postgres_server_config_get` | ❌ | +| 1 | 0.789883 | `postgres_table_list` | ✅ **EXPECTED** | +| 2 | 0.750580 | `postgres_database_list` | ❌ | +| 3 | 0.574930 | `postgres_server_list` | ❌ | +| 4 | 0.519820 | `postgres_table_schema_get` | ❌ | +| 5 | 0.501400 | `postgres_server_config_get` | ❌ | --- -## Test 173 +## Test 183 **Expected Tool:** `postgres_table_list` **Prompt:** Show me the tables in the PostgreSQL database in server @@ -3421,7 +3601,7 @@ --- -## Test 174 +## Test 184 **Expected Tool:** `postgres_table_schema_get` **Prompt:** Show me the schema of table
in the PostgreSQL database in server @@ -3430,15 +3610,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.714916 | `postgres_table_schema_get` | ✅ **EXPECTED** | -| 2 | 0.597892 | `postgres_table_list` | ❌ | -| 3 | 0.574251 | `postgres_database_list` | ❌ | -| 4 | 0.508090 | `postgres_server_config_get` | ❌ | -| 5 | 0.502593 | `kusto_table_schema` | ❌ | +| 1 | 0.714596 | `postgres_table_schema_get` | ✅ **EXPECTED** | +| 2 | 0.597838 | `postgres_table_list` | ❌ | +| 3 | 0.574339 | `postgres_database_list` | ❌ | +| 4 | 0.507791 | `postgres_server_config_get` | ❌ | +| 5 | 0.502705 | `kusto_table_schema` | ❌ | --- -## Test 175 +## Test 185 **Expected Tool:** `deploy_app_logs_get` **Prompt:** Show me the log of the application deployed by azd @@ -3447,15 +3627,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.711844 | `deploy_app_logs_get` | ✅ **EXPECTED** | +| 1 | 0.711770 | `deploy_app_logs_get` | ✅ **EXPECTED** | | 2 | 0.471692 | `deploy_plan_get` | ❌ | -| 3 | 0.451639 | `monitor_activitylog_list` | ❌ | -| 4 | 0.404892 | `deploy_pipeline_guidance_get` | ❌ | +| 3 | 0.451638 | `monitor_activitylog_list` | ❌ | +| 4 | 0.404890 | `deploy_pipeline_guidance_get` | ❌ | | 5 | 0.401388 | `monitor_resource_log_query` | ❌ | --- -## Test 176 +## Test 186 **Expected Tool:** `deploy_architecture_diagram_generate` **Prompt:** Generate the azure architecture diagram for this application @@ -3464,15 +3644,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.680599 | `deploy_architecture_diagram_generate` | ✅ **EXPECTED** | -| 2 | 0.562485 | `deploy_plan_get` | ❌ | -| 3 | 0.497326 | `deploy_pipeline_guidance_get` | ❌ | -| 4 | 0.489325 | `cloudarchitect_design` | ❌ | -| 5 | 0.435899 | `deploy_iac_rules_get` | ❌ | +| 1 | 0.680640 | `deploy_architecture_diagram_generate` | ✅ **EXPECTED** | +| 2 | 0.562521 | `deploy_plan_get` | ❌ | +| 3 | 0.497193 | `deploy_pipeline_guidance_get` | ❌ | +| 4 | 0.490040 | `cloudarchitect_design` | ❌ | +| 5 | 0.435921 | `deploy_iac_rules_get` | ❌ | --- -## Test 177 +## Test 187 **Expected Tool:** `deploy_iac_rules_get` **Prompt:** Show me the rules to generate bicep scripts @@ -3482,14 +3662,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.529092 | `deploy_iac_rules_get` | ✅ **EXPECTED** | -| 2 | 0.480324 | `bicepschema_get` | ❌ | +| 2 | 0.479903 | `bicepschema_get` | ❌ | | 3 | 0.391965 | `get_bestpractices_get` | ❌ | | 4 | 0.383210 | `azureterraformbestpractices_get` | ❌ | | 5 | 0.375561 | `extension_cli_generate` | ❌ | --- -## Test 178 +## Test 188 **Expected Tool:** `deploy_pipeline_guidance_get` **Prompt:** How can I create a CI/CD pipeline to deploy this app to Azure? @@ -3498,15 +3678,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.638588 | `deploy_pipeline_guidance_get` | ✅ **EXPECTED** | +| 1 | 0.638841 | `deploy_pipeline_guidance_get` | ✅ **EXPECTED** | | 2 | 0.499242 | `deploy_plan_get` | ❌ | -| 3 | 0.448917 | `deploy_iac_rules_get` | ❌ | -| 4 | 0.385670 | `deploy_app_logs_get` | ❌ | +| 3 | 0.448918 | `deploy_iac_rules_get` | ❌ | +| 4 | 0.385920 | `deploy_app_logs_get` | ❌ | | 5 | 0.382240 | `get_bestpractices_get` | ❌ | --- -## Test 179 +## Test 189 **Expected Tool:** `deploy_plan_get` **Prompt:** Create a plan to deploy this application to azure @@ -3516,14 +3696,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.688055 | `deploy_plan_get` | ✅ **EXPECTED** | -| 2 | 0.587963 | `deploy_pipeline_guidance_get` | ❌ | +| 2 | 0.587903 | `deploy_pipeline_guidance_get` | ❌ | | 3 | 0.499385 | `deploy_iac_rules_get` | ❌ | | 4 | 0.498575 | `deploy_architecture_diagram_generate` | ❌ | -| 5 | 0.448912 | `loadtesting_test_create` | ❌ | +| 5 | 0.448692 | `loadtesting_test_create` | ❌ | --- -## Test 180 +## Test 190 **Expected Tool:** `eventgrid_events_publish` **Prompt:** Publish an event to Event Grid topic using with the following data @@ -3532,15 +3712,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.755353 | `eventgrid_events_publish` | ✅ **EXPECTED** | -| 2 | 0.482544 | `eventgrid_subscription_list` | ❌ | -| 3 | 0.465759 | `eventgrid_topic_list` | ❌ | -| 4 | 0.360686 | `eventhubs_eventhub_update` | ❌ | -| 5 | 0.355213 | `servicebus_topic_details` | ❌ | +| 1 | 0.755140 | `eventgrid_events_publish` | ✅ **EXPECTED** | +| 2 | 0.482731 | `eventgrid_subscription_list` | ❌ | +| 3 | 0.465891 | `eventgrid_topic_list` | ❌ | +| 4 | 0.360374 | `eventhubs_eventhub_update` | ❌ | +| 5 | 0.355481 | `servicebus_topic_details` | ❌ | --- -## Test 181 +## Test 191 **Expected Tool:** `eventgrid_events_publish` **Prompt:** Publish event to my Event Grid topic with the following events @@ -3549,15 +3729,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.654648 | `eventgrid_events_publish` | ✅ **EXPECTED** | -| 2 | 0.524134 | `eventgrid_subscription_list` | ❌ | -| 3 | 0.509777 | `eventgrid_topic_list` | ❌ | -| 4 | 0.373438 | `servicebus_topic_details` | ❌ | +| 1 | 0.654647 | `eventgrid_events_publish` | ✅ **EXPECTED** | +| 2 | 0.524503 | `eventgrid_subscription_list` | ❌ | +| 3 | 0.510039 | `eventgrid_topic_list` | ❌ | +| 4 | 0.373718 | `servicebus_topic_details` | ❌ | | 5 | 0.359908 | `eventhubs_eventhub_update` | ❌ | --- -## Test 182 +## Test 192 **Expected Tool:** `eventgrid_events_publish` **Prompt:** Send an event to Event Grid topic in resource group with @@ -3566,15 +3746,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.600274 | `eventgrid_events_publish` | ✅ **EXPECTED** | -| 2 | 0.521041 | `eventgrid_topic_list` | ❌ | -| 3 | 0.504642 | `eventgrid_subscription_list` | ❌ | -| 4 | 0.411129 | `eventhubs_eventhub_consumergroup_update` | ❌ | -| 5 | 0.389439 | `eventhubs_eventhub_consumergroup_get` | ❌ | +| 1 | 0.600271 | `eventgrid_events_publish` | ✅ **EXPECTED** | +| 2 | 0.521247 | `eventgrid_topic_list` | ❌ | +| 3 | 0.504794 | `eventgrid_subscription_list` | ❌ | +| 4 | 0.411140 | `eventhubs_eventhub_consumergroup_update` | ❌ | +| 5 | 0.389466 | `eventhubs_eventhub_consumergroup_get` | ❌ | --- -## Test 183 +## Test 193 **Expected Tool:** `eventgrid_topic_list` **Prompt:** List all Event Grid topics in my subscription @@ -3583,15 +3763,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.769921 | `eventgrid_topic_list` | ✅ **EXPECTED** | -| 2 | 0.745048 | `eventgrid_subscription_list` | ❌ | +| 1 | 0.770140 | `eventgrid_topic_list` | ✅ **EXPECTED** | +| 2 | 0.745470 | `eventgrid_subscription_list` | ❌ | | 3 | 0.561862 | `kusto_cluster_list` | ❌ | -| 4 | 0.543887 | `search_service_list` | ❌ | -| 5 | 0.526123 | `subscription_list` | ❌ | +| 4 | 0.545540 | `search_service_list` | ❌ | +| 5 | 0.526138 | `subscription_list` | ❌ | --- -## Test 184 +## Test 194 **Expected Tool:** `eventgrid_topic_list` **Prompt:** Show me the Event Grid topics in my subscription @@ -3600,15 +3780,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.738040 | `eventgrid_topic_list` | ✅ **EXPECTED** | -| 2 | 0.736919 | `eventgrid_subscription_list` | ❌ | +| 1 | 0.738258 | `eventgrid_topic_list` | ✅ **EXPECTED** | +| 2 | 0.737486 | `eventgrid_subscription_list` | ❌ | | 3 | 0.492592 | `kusto_cluster_list` | ❌ | -| 4 | 0.480252 | `subscription_list` | ❌ | -| 5 | 0.473459 | `search_service_list` | ❌ | +| 4 | 0.480287 | `subscription_list` | ❌ | +| 5 | 0.475119 | `search_service_list` | ❌ | --- -## Test 185 +## Test 195 **Expected Tool:** `eventgrid_topic_list` **Prompt:** List all Event Grid topics in subscription @@ -3617,15 +3797,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.769840 | `eventgrid_topic_list` | ✅ **EXPECTED** | -| 2 | 0.720426 | `eventgrid_subscription_list` | ❌ | -| 3 | 0.535369 | `kusto_cluster_list` | ❌ | -| 4 | 0.513921 | `search_service_list` | ❌ | -| 5 | 0.495939 | `subscription_list` | ❌ | +| 1 | 0.770140 | `eventgrid_topic_list` | ✅ **EXPECTED** | +| 2 | 0.721362 | `eventgrid_subscription_list` | ❌ | +| 3 | 0.535326 | `kusto_cluster_list` | ❌ | +| 4 | 0.514248 | `search_service_list` | ❌ | +| 5 | 0.495987 | `subscription_list` | ❌ | --- -## Test 186 +## Test 196 **Expected Tool:** `eventgrid_topic_list` **Prompt:** List all Event Grid topics in resource group in subscription @@ -3634,15 +3814,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.758562 | `eventgrid_topic_list` | ✅ **EXPECTED** | -| 2 | 0.704062 | `eventgrid_subscription_list` | ❌ | -| 3 | 0.609175 | `group_list` | ❌ | -| 4 | 0.544809 | `monitor_webtests_list` | ❌ | -| 5 | 0.524209 | `eventhubs_namespace_get` | ❌ | +| 1 | 0.758595 | `eventgrid_topic_list` | ✅ **EXPECTED** | +| 2 | 0.704232 | `eventgrid_subscription_list` | ❌ | +| 3 | 0.609085 | `group_list` | ❌ | +| 4 | 0.544465 | `monitor_webtests_list` | ❌ | +| 5 | 0.524019 | `eventhubs_namespace_get` | ❌ | --- -## Test 187 +## Test 197 **Expected Tool:** `eventgrid_subscription_list` **Prompt:** Show me all Event Grid subscriptions for topic @@ -3651,15 +3831,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.768696 | `eventgrid_subscription_list` | ✅ **EXPECTED** | -| 2 | 0.720373 | `eventgrid_topic_list` | ❌ | -| 3 | 0.498398 | `servicebus_topic_details` | ❌ | +| 1 | 0.769097 | `eventgrid_subscription_list` | ✅ **EXPECTED** | +| 2 | 0.720606 | `eventgrid_topic_list` | ❌ | +| 3 | 0.498615 | `servicebus_topic_details` | ❌ | | 4 | 0.486216 | `servicebus_topic_subscription_details` | ❌ | | 5 | 0.486162 | `eventgrid_events_publish` | ❌ | --- -## Test 188 +## Test 198 **Expected Tool:** `eventgrid_subscription_list` **Prompt:** List Event Grid subscriptions for topic in subscription @@ -3668,15 +3848,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.717676 | `eventgrid_subscription_list` | ✅ **EXPECTED** | -| 2 | 0.709586 | `eventgrid_topic_list` | ❌ | +| 1 | 0.718109 | `eventgrid_subscription_list` | ✅ **EXPECTED** | +| 2 | 0.709805 | `eventgrid_topic_list` | ❌ | | 3 | 0.539977 | `servicebus_topic_subscription_details` | ❌ | -| 4 | 0.529084 | `servicebus_topic_details` | ❌ | +| 4 | 0.529286 | `servicebus_topic_details` | ❌ | | 5 | 0.477876 | `eventgrid_events_publish` | ❌ | --- -## Test 189 +## Test 199 **Expected Tool:** `eventgrid_subscription_list` **Prompt:** List Event Grid subscriptions for topic in resource group @@ -3685,15 +3865,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.746672 | `eventgrid_subscription_list` | ✅ **EXPECTED** | -| 2 | 0.745851 | `eventgrid_topic_list` | ❌ | -| 3 | 0.535463 | `monitor_webtests_list` | ❌ | -| 4 | 0.524802 | `group_list` | ❌ | -| 5 | 0.502884 | `servicebus_topic_details` | ❌ | +| 1 | 0.746815 | `eventgrid_subscription_list` | ✅ **EXPECTED** | +| 2 | 0.746174 | `eventgrid_topic_list` | ❌ | +| 3 | 0.535484 | `monitor_webtests_list` | ❌ | +| 4 | 0.524923 | `group_list` | ❌ | +| 5 | 0.503158 | `servicebus_topic_details` | ❌ | --- -## Test 190 +## Test 200 **Expected Tool:** `eventgrid_subscription_list` **Prompt:** Show all Event Grid subscriptions in my subscription @@ -3702,15 +3882,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.736844 | `eventgrid_subscription_list` | ✅ **EXPECTED** | -| 2 | 0.659612 | `eventgrid_topic_list` | ❌ | -| 3 | 0.569255 | `subscription_list` | ❌ | +| 1 | 0.736436 | `eventgrid_subscription_list` | ✅ **EXPECTED** | +| 2 | 0.659727 | `eventgrid_topic_list` | ❌ | +| 3 | 0.569254 | `subscription_list` | ❌ | | 4 | 0.537922 | `kusto_cluster_list` | ❌ | -| 5 | 0.517276 | `search_service_list` | ❌ | +| 5 | 0.518857 | `search_service_list` | ❌ | --- -## Test 191 +## Test 201 **Expected Tool:** `eventgrid_subscription_list` **Prompt:** List all Event Grid subscriptions in subscription @@ -3719,15 +3899,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.684586 | `eventgrid_subscription_list` | ✅ **EXPECTED** | -| 2 | 0.656227 | `eventgrid_topic_list` | ❌ | -| 3 | 0.542362 | `subscription_list` | ❌ | -| 4 | 0.521053 | `kusto_cluster_list` | ❌ | -| 5 | 0.510115 | `group_list` | ❌ | +| 1 | 0.684522 | `eventgrid_subscription_list` | ✅ **EXPECTED** | +| 2 | 0.656271 | `eventgrid_topic_list` | ❌ | +| 3 | 0.542366 | `subscription_list` | ❌ | +| 4 | 0.521031 | `kusto_cluster_list` | ❌ | +| 5 | 0.510078 | `group_list` | ❌ | --- -## Test 192 +## Test 202 **Expected Tool:** `eventgrid_subscription_list` **Prompt:** Show Event Grid subscriptions in resource group in subscription @@ -3736,15 +3916,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.696332 | `eventgrid_subscription_list` | ✅ **EXPECTED** | -| 2 | 0.691623 | `eventgrid_topic_list` | ❌ | -| 3 | 0.557573 | `group_list` | ❌ | -| 4 | 0.510684 | `monitor_webtests_list` | ❌ | +| 1 | 0.696101 | `eventgrid_subscription_list` | ✅ **EXPECTED** | +| 2 | 0.691739 | `eventgrid_topic_list` | ❌ | +| 3 | 0.557598 | `group_list` | ❌ | +| 4 | 0.510586 | `monitor_webtests_list` | ❌ | | 5 | 0.504984 | `resourcehealth_availability-status_list` | ❌ | --- -## Test 193 +## Test 203 **Expected Tool:** `eventgrid_subscription_list` **Prompt:** List Event Grid subscriptions for subscription in location @@ -3753,15 +3933,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.710457 | `eventgrid_subscription_list` | ✅ **EXPECTED** | -| 2 | 0.642001 | `eventgrid_topic_list` | ❌ | -| 3 | 0.506618 | `subscription_list` | ❌ | -| 4 | 0.476396 | `search_service_list` | ❌ | +| 1 | 0.709801 | `eventgrid_subscription_list` | ✅ **EXPECTED** | +| 2 | 0.642095 | `eventgrid_topic_list` | ❌ | +| 3 | 0.506697 | `subscription_list` | ❌ | +| 4 | 0.476763 | `search_service_list` | ❌ | | 5 | 0.475782 | `kusto_cluster_list` | ❌ | --- -## Test 194 +## Test 204 **Expected Tool:** `eventhubs_eventhub_consumergroup_delete` **Prompt:** Delete my consumer group in my event hub , namespace , and resource group @@ -3770,15 +3950,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.766928 | `eventhubs_eventhub_consumergroup_delete` | ✅ **EXPECTED** | -| 2 | 0.675842 | `eventhubs_eventhub_consumergroup_update` | ❌ | -| 3 | 0.641112 | `eventhubs_eventhub_consumergroup_get` | ❌ | -| 4 | 0.633788 | `eventhubs_namespace_delete` | ❌ | -| 5 | 0.605465 | `eventhubs_eventhub_delete` | ❌ | +| 1 | 0.766923 | `eventhubs_eventhub_consumergroup_delete` | ✅ **EXPECTED** | +| 2 | 0.675846 | `eventhubs_eventhub_consumergroup_update` | ❌ | +| 3 | 0.641111 | `eventhubs_eventhub_consumergroup_get` | ❌ | +| 4 | 0.633787 | `eventhubs_namespace_delete` | ❌ | +| 5 | 0.605477 | `eventhubs_eventhub_delete` | ❌ | --- -## Test 195 +## Test 205 **Expected Tool:** `eventhubs_eventhub_consumergroup_get` **Prompt:** List all consumer groups in my event hub in namespace @@ -3789,13 +3969,13 @@ |------|-------|------|--------| | 1 | 0.738475 | `eventhubs_eventhub_consumergroup_get` | ✅ **EXPECTED** | | 2 | 0.634517 | `eventhubs_eventhub_consumergroup_update` | ❌ | -| 3 | 0.626486 | `eventhubs_eventhub_consumergroup_delete` | ❌ | +| 3 | 0.626485 | `eventhubs_eventhub_consumergroup_delete` | ❌ | | 4 | 0.606619 | `eventhubs_namespace_get` | ❌ | | 5 | 0.593098 | `eventhubs_eventhub_get` | ❌ | --- -## Test 196 +## Test 206 **Expected Tool:** `eventhubs_eventhub_consumergroup_get` **Prompt:** Get the details of my consumer group in my event hub , namespace , and resource group @@ -3812,7 +3992,7 @@ --- -## Test 197 +## Test 207 **Expected Tool:** `eventhubs_eventhub_consumergroup_update` **Prompt:** Create a new consumer group in my event hub , namespace , and resource group @@ -3821,15 +4001,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.756873 | `eventhubs_eventhub_consumergroup_update` | ✅ **EXPECTED** | -| 2 | 0.688248 | `eventhubs_eventhub_consumergroup_get` | ❌ | -| 3 | 0.669384 | `eventhubs_eventhub_consumergroup_delete` | ❌ | -| 4 | 0.553692 | `eventhubs_eventhub_update` | ❌ | -| 5 | 0.544512 | `eventhubs_namespace_get` | ❌ | +| 1 | 0.757614 | `eventhubs_eventhub_consumergroup_update` | ✅ **EXPECTED** | +| 2 | 0.688923 | `eventhubs_eventhub_consumergroup_get` | ❌ | +| 3 | 0.670026 | `eventhubs_eventhub_consumergroup_delete` | ❌ | +| 4 | 0.554314 | `eventhubs_eventhub_update` | ❌ | +| 5 | 0.545003 | `eventhubs_namespace_get` | ❌ | --- -## Test 198 +## Test 208 **Expected Tool:** `eventhubs_eventhub_consumergroup_update` **Prompt:** Update my consumer group in my event hub , namespace , and resource group @@ -3838,15 +4018,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.739158 | `eventhubs_eventhub_consumergroup_update` | ✅ **EXPECTED** | -| 2 | 0.655927 | `eventhubs_eventhub_consumergroup_delete` | ❌ | -| 3 | 0.642524 | `eventhubs_eventhub_consumergroup_get` | ❌ | -| 4 | 0.552602 | `eventhubs_eventhub_update` | ❌ | -| 5 | 0.524106 | `eventhubs_namespace_delete` | ❌ | +| 1 | 0.738818 | `eventhubs_eventhub_consumergroup_update` | ✅ **EXPECTED** | +| 2 | 0.655614 | `eventhubs_eventhub_consumergroup_delete` | ❌ | +| 3 | 0.642219 | `eventhubs_eventhub_consumergroup_get` | ❌ | +| 4 | 0.552234 | `eventhubs_eventhub_update` | ❌ | +| 5 | 0.524019 | `eventhubs_namespace_delete` | ❌ | --- -## Test 199 +## Test 209 **Expected Tool:** `eventhubs_eventhub_delete` **Prompt:** Delete my event hub in my namespace and resource group @@ -3855,15 +4035,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.699266 | `eventhubs_namespace_delete` | ❌ | -| 2 | 0.688646 | `eventhubs_eventhub_delete` | ✅ **EXPECTED** | -| 3 | 0.627721 | `eventhubs_eventhub_consumergroup_delete` | ❌ | -| 4 | 0.578653 | `eventhubs_namespace_get` | ❌ | -| 5 | 0.552963 | `eventhubs_eventhub_get` | ❌ | +| 1 | 0.699271 | `eventhubs_namespace_delete` | ❌ | +| 2 | 0.688649 | `eventhubs_eventhub_delete` | ✅ **EXPECTED** | +| 3 | 0.627530 | `eventhubs_eventhub_consumergroup_delete` | ❌ | +| 4 | 0.578627 | `eventhubs_namespace_get` | ❌ | +| 5 | 0.553129 | `eventhubs_eventhub_get` | ❌ | --- -## Test 200 +## Test 210 **Expected Tool:** `eventhubs_eventhub_get` **Prompt:** List all Event Hubs in my namespace @@ -3872,15 +4052,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.773277 | `eventhubs_eventhub_get` | ✅ **EXPECTED** | -| 2 | 0.687596 | `eventhubs_namespace_get` | ❌ | -| 3 | 0.578709 | `eventhubs_eventhub_update` | ❌ | -| 4 | 0.561587 | `eventhubs_namespace_delete` | ❌ | -| 5 | 0.545481 | `eventhubs_eventhub_consumergroup_get` | ❌ | +| 1 | 0.773242 | `eventhubs_eventhub_get` | ✅ **EXPECTED** | +| 2 | 0.687582 | `eventhubs_namespace_get` | ❌ | +| 3 | 0.578689 | `eventhubs_eventhub_update` | ❌ | +| 4 | 0.561545 | `eventhubs_namespace_delete` | ❌ | +| 5 | 0.545475 | `eventhubs_eventhub_consumergroup_get` | ❌ | --- -## Test 201 +## Test 211 **Expected Tool:** `eventhubs_eventhub_get` **Prompt:** Get the details of my event hub in my namespace and resource group @@ -3889,15 +4069,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.638112 | `eventhubs_namespace_get` | ❌ | -| 2 | 0.627528 | `eventhubs_eventhub_get` | ✅ **EXPECTED** | -| 3 | 0.570964 | `eventhubs_eventhub_consumergroup_get` | ❌ | -| 4 | 0.527503 | `eventhubs_eventhub_update` | ❌ | -| 5 | 0.521930 | `eventhubs_namespace_delete` | ❌ | +| 1 | 0.638083 | `eventhubs_namespace_get` | ❌ | +| 2 | 0.627638 | `eventhubs_eventhub_get` | ✅ **EXPECTED** | +| 3 | 0.570904 | `eventhubs_eventhub_consumergroup_get` | ❌ | +| 4 | 0.527646 | `eventhubs_eventhub_update` | ❌ | +| 5 | 0.521920 | `eventhubs_namespace_delete` | ❌ | --- -## Test 202 +## Test 212 **Expected Tool:** `eventhubs_eventhub_update` **Prompt:** Create a new event hub in my namespace and resource group @@ -3906,15 +4086,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.645976 | `eventhubs_eventhub_update` | ✅ **EXPECTED** | -| 2 | 0.605856 | `eventhubs_namespace_get` | ❌ | -| 3 | 0.574389 | `eventhubs_eventhub_get` | ❌ | -| 4 | 0.571676 | `eventhubs_eventhub_consumergroup_update` | ❌ | -| 5 | 0.557550 | `eventhubs_namespace_delete` | ❌ | +| 1 | 0.646034 | `eventhubs_eventhub_update` | ✅ **EXPECTED** | +| 2 | 0.605911 | `eventhubs_namespace_get` | ❌ | +| 3 | 0.574464 | `eventhubs_eventhub_get` | ❌ | +| 4 | 0.571638 | `eventhubs_eventhub_consumergroup_update` | ❌ | +| 5 | 0.557592 | `eventhubs_namespace_delete` | ❌ | --- -## Test 203 +## Test 213 **Expected Tool:** `eventhubs_eventhub_update` **Prompt:** Update my event hub in my namespace and resource group @@ -3925,13 +4105,13 @@ |------|-------|------|--------| | 1 | 0.655283 | `eventhubs_eventhub_update` | ✅ **EXPECTED** | | 2 | 0.571661 | `eventhubs_eventhub_delete` | ❌ | -| 3 | 0.568605 | `eventhubs_eventhub_consumergroup_update` | ❌ | +| 3 | 0.568606 | `eventhubs_eventhub_consumergroup_update` | ❌ | | 4 | 0.568396 | `eventhubs_namespace_get` | ❌ | | 5 | 0.565977 | `eventhubs_namespace_delete` | ❌ | --- -## Test 204 +## Test 214 **Expected Tool:** `eventhubs_namespace_delete` **Prompt:** Delete my namespace in my resource group @@ -3941,14 +4121,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.623995 | `eventhubs_namespace_delete` | ✅ **EXPECTED** | -| 2 | 0.525810 | `eventhubs_namespace_update` | ❌ | +| 2 | 0.525823 | `eventhubs_namespace_update` | ❌ | | 3 | 0.505082 | `eventhubs_eventhub_consumergroup_delete` | ❌ | | 4 | 0.449841 | `eventhubs_namespace_get` | ❌ | | 5 | 0.435037 | `workbooks_delete` | ❌ | --- -## Test 205 +## Test 215 **Expected Tool:** `eventhubs_namespace_get` **Prompt:** List all Event Hubs namespaces in my subscription @@ -3960,12 +4140,12 @@ | 1 | 0.659838 | `eventhubs_eventhub_get` | ❌ | | 2 | 0.658827 | `eventhubs_namespace_get` | ✅ **EXPECTED** | | 3 | 0.607372 | `kusto_cluster_list` | ❌ | -| 4 | 0.557150 | `eventgrid_topic_list` | ❌ | -| 5 | 0.556016 | `eventgrid_subscription_list` | ❌ | +| 4 | 0.557200 | `eventgrid_topic_list` | ❌ | +| 5 | 0.556126 | `eventgrid_subscription_list` | ❌ | --- -## Test 206 +## Test 216 **Expected Tool:** `eventhubs_namespace_get` **Prompt:** Get the details of my namespace in my resource group @@ -3975,14 +4155,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.509749 | `eventhubs_namespace_get` | ✅ **EXPECTED** | -| 2 | 0.509432 | `monitor_webtests_get` | ❌ | +| 2 | 0.509431 | `monitor_webtests_get` | ❌ | | 3 | 0.497399 | `servicebus_queue_details` | ❌ | -| 4 | 0.490015 | `eventhubs_namespace_update` | ❌ | +| 4 | 0.489992 | `eventhubs_namespace_update` | ❌ | | 5 | 0.470455 | `functionapp_get` | ❌ | --- -## Test 207 +## Test 217 **Expected Tool:** `eventhubs_namespace_update` **Prompt:** Create an new namespace in my resource group @@ -3991,7 +4171,7 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.610313 | `eventhubs_namespace_update` | ✅ **EXPECTED** | +| 1 | 0.610205 | `eventhubs_namespace_update` | ✅ **EXPECTED** | | 2 | 0.466721 | `eventhubs_namespace_get` | ❌ | | 3 | 0.458458 | `eventhubs_namespace_delete` | ❌ | | 4 | 0.449724 | `workbooks_create` | ❌ | @@ -3999,7 +4179,7 @@ --- -## Test 208 +## Test 218 **Expected Tool:** `eventhubs_namespace_update` **Prompt:** Update my namespace in my resource group @@ -4008,15 +4188,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.622219 | `eventhubs_namespace_update` | ✅ **EXPECTED** | -| 2 | 0.474098 | `eventhubs_namespace_delete` | ❌ | +| 1 | 0.622140 | `eventhubs_namespace_update` | ✅ **EXPECTED** | +| 2 | 0.474099 | `eventhubs_namespace_delete` | ❌ | | 3 | 0.448723 | `eventhubs_namespace_get` | ❌ | | 4 | 0.436549 | `eventhubs_eventhub_consumergroup_update` | ❌ | -| 5 | 0.372490 | `sql_db_rename` | ❌ | +| 5 | 0.372632 | `sql_db_rename` | ❌ | --- -## Test 209 +## Test 219 **Expected Tool:** `functionapp_get` **Prompt:** Describe the function app in resource group @@ -4026,14 +4206,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.660116 | `functionapp_get` | ✅ **EXPECTED** | -| 2 | 0.451226 | `deploy_app_logs_get` | ❌ | +| 2 | 0.451613 | `deploy_app_logs_get` | ❌ | | 3 | 0.450457 | `applens_resource_diagnose` | ❌ | -| 4 | 0.390048 | `mysql_server_list` | ❌ | +| 4 | 0.390229 | `mysql_server_list` | ❌ | | 5 | 0.380314 | `get_bestpractices_get` | ❌ | --- -## Test 210 +## Test 220 **Expected Tool:** `functionapp_get` **Prompt:** Get configuration for function app @@ -4044,13 +4224,13 @@ |------|-------|------|--------| | 1 | 0.607276 | `functionapp_get` | ✅ **EXPECTED** | | 2 | 0.447400 | `mysql_server_config_get` | ❌ | -| 3 | 0.424765 | `appconfig_account_list` | ❌ | +| 3 | 0.424693 | `appconfig_account_list` | ❌ | | 4 | 0.411267 | `appconfig_kv_get` | ❌ | -| 5 | 0.400002 | `deploy_app_logs_get` | ❌ | +| 5 | 0.400402 | `deploy_app_logs_get` | ❌ | --- -## Test 211 +## Test 221 **Expected Tool:** `functionapp_get` **Prompt:** Get function app status for @@ -4060,14 +4240,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.622384 | `functionapp_get` | ✅ **EXPECTED** | -| 2 | 0.413523 | `resourcehealth_availability-status_get` | ❌ | +| 2 | 0.413481 | `resourcehealth_availability-status_get` | ❌ | | 3 | 0.390708 | `resourcehealth_availability-status_list` | ❌ | -| 4 | 0.383293 | `deploy_app_logs_get` | ❌ | +| 4 | 0.383533 | `deploy_app_logs_get` | ❌ | | 5 | 0.360665 | `storage_account_get` | ❌ | --- -## Test 212 +## Test 222 **Expected Tool:** `functionapp_get` **Prompt:** Get information about my function app in @@ -4084,7 +4264,7 @@ --- -## Test 213 +## Test 223 **Expected Tool:** `functionapp_get` **Prompt:** Retrieve host name and status of function app @@ -4094,14 +4274,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.592791 | `functionapp_get` | ✅ **EXPECTED** | -| 2 | 0.417779 | `resourcehealth_availability-status_get` | ❌ | -| 3 | 0.409487 | `deploy_app_logs_get` | ❌ | +| 2 | 0.417817 | `resourcehealth_availability-status_get` | ❌ | +| 3 | 0.409712 | `deploy_app_logs_get` | ❌ | | 4 | 0.399953 | `storage_account_get` | ❌ | | 5 | 0.392237 | `applens_resource_diagnose` | ❌ | --- -## Test 214 +## Test 224 **Expected Tool:** `functionapp_get` **Prompt:** Show function app details for in @@ -4111,14 +4291,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.687356 | `functionapp_get` | ✅ **EXPECTED** | -| 2 | 0.449033 | `deploy_app_logs_get` | ❌ | +| 2 | 0.449588 | `deploy_app_logs_get` | ❌ | | 3 | 0.428689 | `applens_resource_diagnose` | ❌ | | 4 | 0.424686 | `foundry_resource_get` | ❌ | | 5 | 0.391781 | `monitor_webtests_get` | ❌ | --- -## Test 215 +## Test 225 **Expected Tool:** `functionapp_get` **Prompt:** Show me the details for the function app @@ -4128,14 +4308,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.644882 | `functionapp_get` | ✅ **EXPECTED** | -| 2 | 0.429692 | `deploy_app_logs_get` | ❌ | +| 2 | 0.430189 | `deploy_app_logs_get` | ❌ | | 3 | 0.421082 | `storage_account_get` | ❌ | -| 4 | 0.403261 | `signalr_runtime_get` | ❌ | +| 4 | 0.403311 | `signalr_runtime_get` | ❌ | | 5 | 0.391615 | `foundry_resource_get` | ❌ | --- -## Test 216 +## Test 226 **Expected Tool:** `functionapp_get` **Prompt:** Show plan and region for function app @@ -4145,14 +4325,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.554980 | `functionapp_get` | ✅ **EXPECTED** | -| 2 | 0.426921 | `quota_usage_check` | ❌ | -| 3 | 0.424062 | `deploy_app_logs_get` | ❌ | +| 2 | 0.426703 | `quota_usage_check` | ❌ | +| 3 | 0.424610 | `deploy_app_logs_get` | ❌ | | 4 | 0.408011 | `deploy_plan_get` | ❌ | | 5 | 0.381629 | `deploy_architecture_diagram_generate` | ❌ | --- -## Test 217 +## Test 227 **Expected Tool:** `functionapp_get` **Prompt:** What is the status of function app ? @@ -4162,14 +4342,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.565797 | `functionapp_get` | ✅ **EXPECTED** | -| 2 | 0.403246 | `deploy_app_logs_get` | ❌ | +| 2 | 0.403665 | `deploy_app_logs_get` | ❌ | | 3 | 0.384159 | `resourcehealth_availability-status_list` | ❌ | | 4 | 0.369868 | `applens_resource_diagnose` | ❌ | -| 5 | 0.354912 | `resourcehealth_availability-status_get` | ❌ | +| 5 | 0.355044 | `resourcehealth_availability-status_get` | ❌ | --- -## Test 218 +## Test 228 **Expected Tool:** `functionapp_get` **Prompt:** List all function apps in my subscription @@ -4179,14 +4359,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.646561 | `functionapp_get` | ✅ **EXPECTED** | -| 2 | 0.557549 | `search_service_list` | ❌ | -| 3 | 0.534936 | `subscription_list` | ❌ | +| 2 | 0.559382 | `search_service_list` | ❌ | +| 3 | 0.534930 | `subscription_list` | ❌ | | 4 | 0.529031 | `kusto_cluster_list` | ❌ | | 5 | 0.516618 | `cosmos_account_list` | ❌ | --- -## Test 219 +## Test 229 **Expected Tool:** `functionapp_get` **Prompt:** Show me my Azure function apps @@ -4196,14 +4376,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.560249 | `functionapp_get` | ✅ **EXPECTED** | -| 2 | 0.464637 | `deploy_app_logs_get` | ❌ | -| 3 | 0.411323 | `get_bestpractices_get` | ❌ | -| 4 | 0.410461 | `search_service_list` | ❌ | +| 2 | 0.464985 | `deploy_app_logs_get` | ❌ | +| 3 | 0.412646 | `search_service_list` | ❌ | +| 4 | 0.411323 | `get_bestpractices_get` | ❌ | | 5 | 0.398503 | `extension_cli_install` | ❌ | --- -## Test 220 +## Test 230 **Expected Tool:** `functionapp_get` **Prompt:** What function apps do I have? @@ -4212,15 +4392,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.433675 | `functionapp_get` | ✅ **EXPECTED** | -| 2 | 0.346031 | `deploy_app_logs_get` | ❌ | +| 1 | 0.433674 | `functionapp_get` | ✅ **EXPECTED** | +| 2 | 0.346619 | `deploy_app_logs_get` | ❌ | | 3 | 0.337966 | `applens_resource_diagnose` | ❌ | | 4 | 0.316594 | `extension_cli_install` | ❌ | | 5 | 0.284362 | `get_bestpractices_get` | ❌ | --- -## Test 221 +## Test 231 **Expected Tool:** `keyvault_admin_settings_get` **Prompt:** Get the account settings for my key vault @@ -4231,13 +4411,13 @@ |------|-------|------|--------| | 1 | 0.604780 | `keyvault_admin_settings_get` | ✅ **EXPECTED** | | 2 | 0.532196 | `storage_account_get` | ❌ | -| 3 | 0.496042 | `keyvault_key_get` | ❌ | -| 4 | 0.452367 | `appconfig_kv_set` | ❌ | -| 5 | 0.448265 | `keyvault_secret_get` | ❌ | +| 3 | 0.496629 | `keyvault_key_get` | ❌ | +| 4 | 0.452366 | `appconfig_kv_set` | ❌ | +| 5 | 0.448039 | `keyvault_secret_get` | ❌ | --- -## Test 222 +## Test 232 **Expected Tool:** `keyvault_admin_settings_get` **Prompt:** Show me the account settings for managed HSM keyvault @@ -4248,13 +4428,13 @@ |------|-------|------|--------| | 1 | 0.671370 | `keyvault_admin_settings_get` | ✅ **EXPECTED** | | 2 | 0.455561 | `storage_account_get` | ❌ | -| 3 | 0.440966 | `keyvault_key_get` | ❌ | +| 3 | 0.441225 | `keyvault_key_get` | ❌ | | 4 | 0.404666 | `appconfig_kv_set` | ❌ | -| 5 | 0.395449 | `keyvault_secret_get` | ❌ | +| 5 | 0.395274 | `keyvault_secret_get` | ❌ | --- -## Test 223 +## Test 233 **Expected Tool:** `keyvault_admin_settings_get` **Prompt:** What's the value of the setting in my key vault with name @@ -4263,15 +4443,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.505709 | `keyvault_admin_settings_get` | ✅ **EXPECTED** | -| 2 | 0.496565 | `appconfig_kv_set` | ❌ | -| 3 | 0.420067 | `appconfig_kv_lock_set` | ❌ | -| 4 | 0.419642 | `keyvault_key_get` | ❌ | -| 5 | 0.410219 | `keyvault_secret_get` | ❌ | +| 1 | 0.505750 | `keyvault_admin_settings_get` | ✅ **EXPECTED** | +| 2 | 0.496540 | `appconfig_kv_set` | ❌ | +| 3 | 0.420145 | `appconfig_kv_lock_set` | ❌ | +| 4 | 0.419126 | `keyvault_key_get` | ❌ | +| 5 | 0.410215 | `keyvault_secret_get` | ❌ | --- -## Test 224 +## Test 234 **Expected Tool:** `keyvault_certificate_create` **Prompt:** Create a new certificate called in the key vault @@ -4280,15 +4460,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.627727 | `keyvault_certificate_create` | ✅ **EXPECTED** | -| 2 | 0.570319 | `keyvault_certificate_import` | ❌ | -| 3 | 0.540199 | `keyvault_key_create` | ❌ | -| 4 | 0.519218 | `keyvault_certificate_get` | ❌ | -| 5 | 0.500027 | `keyvault_certificate_list` | ❌ | +| 1 | 0.627713 | `keyvault_certificate_create` | ✅ **EXPECTED** | +| 2 | 0.570324 | `keyvault_certificate_import` | ❌ | +| 3 | 0.540181 | `keyvault_key_create` | ❌ | +| 4 | 0.515939 | `keyvault_certificate_get` | ❌ | +| 5 | 0.500018 | `keyvault_certificate_list` | ❌ | --- -## Test 225 +## Test 235 **Expected Tool:** `keyvault_certificate_create` **Prompt:** Generate a certificate named in key vault @@ -4297,15 +4477,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.599548 | `keyvault_certificate_create` | ✅ **EXPECTED** | -| 2 | 0.561717 | `keyvault_certificate_import` | ❌ | -| 3 | 0.521910 | `keyvault_certificate_get` | ❌ | -| 4 | 0.501291 | `keyvault_key_create` | ❌ | -| 5 | 0.496516 | `keyvault_certificate_list` | ❌ | +| 1 | 0.600005 | `keyvault_certificate_create` | ✅ **EXPECTED** | +| 2 | 0.561459 | `keyvault_certificate_import` | ❌ | +| 3 | 0.519600 | `keyvault_certificate_get` | ❌ | +| 4 | 0.502052 | `keyvault_key_create` | ❌ | +| 5 | 0.497159 | `keyvault_certificate_list` | ❌ | --- -## Test 226 +## Test 236 **Expected Tool:** `keyvault_certificate_create` **Prompt:** Request creation of certificate in the key vault @@ -4314,15 +4494,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.573998 | `keyvault_certificate_create` | ✅ **EXPECTED** | -| 2 | 0.527759 | `keyvault_certificate_import` | ❌ | -| 3 | 0.498278 | `keyvault_certificate_get` | ❌ | -| 4 | 0.481548 | `keyvault_key_create` | ❌ | -| 5 | 0.469601 | `keyvault_certificate_list` | ❌ | +| 1 | 0.573920 | `keyvault_certificate_create` | ✅ **EXPECTED** | +| 2 | 0.527631 | `keyvault_certificate_import` | ❌ | +| 3 | 0.495169 | `keyvault_certificate_get` | ❌ | +| 4 | 0.481471 | `keyvault_key_create` | ❌ | +| 5 | 0.469500 | `keyvault_certificate_list` | ❌ | --- -## Test 227 +## Test 237 **Expected Tool:** `keyvault_certificate_create` **Prompt:** Provision a new key vault certificate in vault @@ -4333,13 +4513,13 @@ |------|-------|------|--------| | 1 | 0.591697 | `keyvault_certificate_create` | ✅ **EXPECTED** | | 2 | 0.562265 | `keyvault_certificate_import` | ❌ | -| 3 | 0.522147 | `keyvault_certificate_get` | ❌ | +| 3 | 0.518739 | `keyvault_certificate_get` | ❌ | | 4 | 0.502529 | `keyvault_key_create` | ❌ | | 5 | 0.479992 | `keyvault_certificate_list` | ❌ | --- -## Test 228 +## Test 238 **Expected Tool:** `keyvault_certificate_create` **Prompt:** Issue a certificate in key vault @@ -4350,13 +4530,13 @@ |------|-------|------|--------| | 1 | 0.622788 | `keyvault_certificate_create` | ✅ **EXPECTED** | | 2 | 0.558532 | `keyvault_certificate_import` | ❌ | -| 3 | 0.534503 | `keyvault_certificate_get` | ❌ | +| 3 | 0.531287 | `keyvault_certificate_get` | ❌ | | 4 | 0.521316 | `keyvault_certificate_list` | ❌ | | 5 | 0.465056 | `keyvault_key_create` | ❌ | --- -## Test 229 +## Test 239 **Expected Tool:** `keyvault_certificate_get` **Prompt:** Show me the certificate in the key vault @@ -4365,15 +4545,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.600625 | `keyvault_certificate_get` | ✅ **EXPECTED** | +| 1 | 0.603235 | `keyvault_certificate_get` | ✅ **EXPECTED** | | 2 | 0.528405 | `keyvault_certificate_list` | ❌ | | 3 | 0.519037 | `keyvault_certificate_import` | ❌ | | 4 | 0.499293 | `keyvault_certificate_create` | ❌ | -| 5 | 0.487691 | `keyvault_key_get` | ❌ | +| 5 | 0.486609 | `keyvault_key_get` | ❌ | --- -## Test 230 +## Test 240 **Expected Tool:** `keyvault_certificate_get` **Prompt:** Show me the details of the certificate in the key vault @@ -4382,15 +4562,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.646098 | `keyvault_certificate_get` | ✅ **EXPECTED** | -| 2 | 0.563263 | `keyvault_key_get` | ❌ | -| 3 | 0.514499 | `keyvault_secret_get` | ❌ | -| 4 | 0.509446 | `keyvault_certificate_list` | ❌ | -| 5 | 0.507738 | `keyvault_certificate_import` | ❌ | +| 1 | 0.649214 | `keyvault_certificate_get` | ✅ **EXPECTED** | +| 2 | 0.563103 | `keyvault_key_get` | ❌ | +| 3 | 0.514222 | `keyvault_secret_get` | ❌ | +| 4 | 0.509503 | `keyvault_certificate_list` | ❌ | +| 5 | 0.507757 | `keyvault_certificate_import` | ❌ | --- -## Test 231 +## Test 241 **Expected Tool:** `keyvault_certificate_get` **Prompt:** Get the certificate from vault @@ -4399,15 +4579,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.609523 | `keyvault_certificate_get` | ✅ **EXPECTED** | +| 1 | 0.606958 | `keyvault_certificate_get` | ✅ **EXPECTED** | | 2 | 0.515570 | `keyvault_certificate_list` | ❌ | | 3 | 0.511197 | `keyvault_certificate_create` | ❌ | | 4 | 0.507768 | `keyvault_certificate_import` | ❌ | -| 5 | 0.475674 | `keyvault_key_get` | ❌ | +| 5 | 0.474394 | `keyvault_key_get` | ❌ | --- -## Test 232 +## Test 242 **Expected Tool:** `keyvault_certificate_get` **Prompt:** Display the certificate details for in vault @@ -4416,15 +4596,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.647669 | `keyvault_certificate_get` | ✅ **EXPECTED** | -| 2 | 0.528243 | `keyvault_key_get` | ❌ | +| 1 | 0.649758 | `keyvault_certificate_get` | ✅ **EXPECTED** | +| 2 | 0.527400 | `keyvault_key_get` | ❌ | | 3 | 0.521556 | `keyvault_certificate_list` | ❌ | | 4 | 0.509796 | `keyvault_certificate_import` | ❌ | -| 5 | 0.502403 | `keyvault_secret_get` | ❌ | +| 5 | 0.501988 | `keyvault_secret_get` | ❌ | --- -## Test 233 +## Test 243 **Expected Tool:** `keyvault_certificate_get` **Prompt:** Retrieve certificate metadata for in vault @@ -4433,15 +4613,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.595959 | `keyvault_certificate_get` | ✅ **EXPECTED** | +| 1 | 0.594012 | `keyvault_certificate_get` | ✅ **EXPECTED** | | 2 | 0.527404 | `keyvault_certificate_list` | ❌ | | 3 | 0.519059 | `keyvault_certificate_import` | ❌ | | 4 | 0.501138 | `keyvault_certificate_create` | ❌ | -| 5 | 0.465429 | `keyvault_key_get` | ❌ | +| 5 | 0.465174 | `keyvault_key_get` | ❌ | --- -## Test 234 +## Test 244 **Expected Tool:** `keyvault_certificate_import` **Prompt:** Import the certificate in file into the key vault @@ -4451,14 +4631,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.585481 | `keyvault_certificate_import` | ✅ **EXPECTED** | -| 2 | 0.420747 | `keyvault_certificate_get` | ❌ | +| 2 | 0.420009 | `keyvault_certificate_get` | ❌ | | 3 | 0.402595 | `keyvault_certificate_create` | ❌ | | 4 | 0.399342 | `keyvault_certificate_list` | ❌ | | 5 | 0.352905 | `keyvault_key_create` | ❌ | --- -## Test 235 +## Test 245 **Expected Tool:** `keyvault_certificate_import` **Prompt:** Import a certificate into the key vault using the name @@ -4468,14 +4648,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.622125 | `keyvault_certificate_import` | ✅ **EXPECTED** | -| 2 | 0.504314 | `keyvault_certificate_get` | ❌ | +| 2 | 0.501864 | `keyvault_certificate_get` | ❌ | | 3 | 0.498847 | `keyvault_certificate_create` | ❌ | | 4 | 0.448105 | `keyvault_certificate_list` | ❌ | | 5 | 0.419811 | `keyvault_key_create` | ❌ | --- -## Test 236 +## Test 246 **Expected Tool:** `keyvault_certificate_import` **Prompt:** Upload certificate file to key vault @@ -4486,13 +4666,13 @@ |------|-------|------|--------| | 1 | 0.595707 | `keyvault_certificate_import` | ✅ **EXPECTED** | | 2 | 0.453929 | `keyvault_certificate_create` | ❌ | -| 3 | 0.452551 | `keyvault_certificate_get` | ❌ | +| 3 | 0.451713 | `keyvault_certificate_get` | ❌ | | 4 | 0.418203 | `keyvault_certificate_list` | ❌ | | 5 | 0.413377 | `keyvault_key_create` | ❌ | --- -## Test 237 +## Test 247 **Expected Tool:** `keyvault_certificate_import` **Prompt:** Load certificate from file into vault @@ -4502,14 +4682,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.619480 | `keyvault_certificate_import` | ✅ **EXPECTED** | -| 2 | 0.517804 | `keyvault_certificate_get` | ❌ | +| 2 | 0.515610 | `keyvault_certificate_get` | ❌ | | 3 | 0.480815 | `keyvault_certificate_create` | ❌ | | 4 | 0.444386 | `keyvault_certificate_list` | ❌ | | 5 | 0.381873 | `keyvault_key_create` | ❌ | --- -## Test 238 +## Test 248 **Expected Tool:** `keyvault_certificate_import` **Prompt:** Add existing certificate file to the key vault with name @@ -4518,15 +4698,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.595418 | `keyvault_certificate_import` | ✅ **EXPECTED** | -| 2 | 0.452490 | `keyvault_certificate_create` | ❌ | -| 3 | 0.441616 | `keyvault_certificate_get` | ❌ | +| 1 | 0.595417 | `keyvault_certificate_import` | ✅ **EXPECTED** | +| 2 | 0.452489 | `keyvault_certificate_create` | ❌ | +| 3 | 0.440366 | `keyvault_certificate_get` | ❌ | | 4 | 0.408018 | `keyvault_key_create` | ❌ | -| 5 | 0.392244 | `keyvault_secret_create` | ❌ | +| 5 | 0.392284 | `keyvault_secret_create` | ❌ | --- -## Test 239 +## Test 249 **Expected Tool:** `keyvault_certificate_list` **Prompt:** List all certificates in the key vault @@ -4537,13 +4717,13 @@ |------|-------|------|--------| | 1 | 0.726124 | `keyvault_certificate_list` | ✅ **EXPECTED** | | 2 | 0.583110 | `keyvault_key_list` | ❌ | -| 3 | 0.531988 | `keyvault_secret_list` | ❌ | -| 4 | 0.515236 | `keyvault_certificate_get` | ❌ | +| 3 | 0.531838 | `keyvault_secret_list` | ❌ | +| 4 | 0.514152 | `keyvault_certificate_get` | ❌ | | 5 | 0.485792 | `keyvault_certificate_create` | ❌ | --- -## Test 240 +## Test 250 **Expected Tool:** `keyvault_certificate_list` **Prompt:** Show me the certificates in the key vault @@ -4553,14 +4733,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.615541 | `keyvault_certificate_list` | ✅ **EXPECTED** | -| 2 | 0.522453 | `keyvault_certificate_get` | ❌ | +| 2 | 0.525122 | `keyvault_certificate_get` | ❌ | | 3 | 0.475156 | `keyvault_key_list` | ❌ | | 4 | 0.460973 | `keyvault_certificate_create` | ❌ | -| 5 | 0.449381 | `keyvault_key_get` | ❌ | +| 5 | 0.448139 | `keyvault_key_get` | ❌ | --- -## Test 241 +## Test 251 **Expected Tool:** `keyvault_certificate_list` **Prompt:** What certificates are in the key vault ? @@ -4569,15 +4749,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.624710 | `keyvault_certificate_list` | ✅ **EXPECTED** | -| 2 | 0.519739 | `keyvault_certificate_get` | ❌ | +| 1 | 0.624711 | `keyvault_certificate_list` | ✅ **EXPECTED** | +| 2 | 0.518577 | `keyvault_certificate_get` | ❌ | | 3 | 0.510048 | `keyvault_certificate_create` | ❌ | | 4 | 0.505534 | `keyvault_certificate_import` | ❌ | | 5 | 0.497356 | `keyvault_key_list` | ❌ | --- -## Test 242 +## Test 252 **Expected Tool:** `keyvault_certificate_list` **Prompt:** List certificate names in vault @@ -4588,13 +4768,13 @@ |------|-------|------|--------| | 1 | 0.672622 | `keyvault_certificate_list` | ✅ **EXPECTED** | | 2 | 0.553990 | `keyvault_key_list` | ❌ | -| 3 | 0.511905 | `keyvault_secret_list` | ❌ | -| 4 | 0.507062 | `keyvault_certificate_get` | ❌ | +| 3 | 0.511668 | `keyvault_secret_list` | ❌ | +| 4 | 0.505198 | `keyvault_certificate_get` | ❌ | | 5 | 0.492357 | `keyvault_certificate_create` | ❌ | --- -## Test 243 +## Test 253 **Expected Tool:** `keyvault_certificate_list` **Prompt:** Enumerate certificates in key vault @@ -4603,15 +4783,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.747408 | `keyvault_certificate_list` | ✅ **EXPECTED** | +| 1 | 0.747407 | `keyvault_certificate_list` | ✅ **EXPECTED** | | 2 | 0.594216 | `keyvault_key_list` | ❌ | -| 3 | 0.558771 | `keyvault_secret_list` | ❌ | -| 4 | 0.515568 | `keyvault_certificate_get` | ❌ | +| 3 | 0.558644 | `keyvault_secret_list` | ❌ | +| 4 | 0.513381 | `keyvault_certificate_get` | ❌ | | 5 | 0.490876 | `keyvault_certificate_create` | ❌ | --- -## Test 244 +## Test 254 **Expected Tool:** `keyvault_certificate_list` **Prompt:** Show certificate names in the key vault @@ -4621,14 +4801,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.639711 | `keyvault_certificate_list` | ✅ **EXPECTED** | -| 2 | 0.512475 | `keyvault_certificate_get` | ❌ | +| 2 | 0.512269 | `keyvault_certificate_get` | ❌ | | 3 | 0.507572 | `keyvault_key_list` | ❌ | | 4 | 0.482583 | `keyvault_certificate_create` | ❌ | -| 5 | 0.464725 | `keyvault_secret_list` | ❌ | +| 5 | 0.464535 | `keyvault_secret_list` | ❌ | --- -## Test 245 +## Test 255 **Expected Tool:** `keyvault_key_create` **Prompt:** Create a new key called with the RSA type in the key vault @@ -4638,14 +4818,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.661466 | `keyvault_key_create` | ✅ **EXPECTED** | -| 2 | 0.456580 | `keyvault_secret_create` | ❌ | +| 2 | 0.456633 | `keyvault_secret_create` | ❌ | | 3 | 0.451790 | `keyvault_certificate_create` | ❌ | | 4 | 0.429614 | `keyvault_certificate_import` | ❌ | -| 5 | 0.399469 | `keyvault_key_get` | ❌ | +| 5 | 0.399326 | `keyvault_key_get` | ❌ | --- -## Test 246 +## Test 256 **Expected Tool:** `keyvault_key_create` **Prompt:** Generate a key with type in vault @@ -4655,14 +4835,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.641070 | `keyvault_key_create` | ✅ **EXPECTED** | -| 2 | 0.428964 | `keyvault_key_get` | ❌ | +| 2 | 0.428502 | `keyvault_key_get` | ❌ | | 3 | 0.422763 | `keyvault_certificate_create` | ❌ | -| 4 | 0.420045 | `keyvault_secret_create` | ❌ | +| 4 | 0.420135 | `keyvault_secret_create` | ❌ | | 5 | 0.405644 | `appconfig_kv_set` | ❌ | --- -## Test 247 +## Test 257 **Expected Tool:** `keyvault_key_create` **Prompt:** Create an oct key in the vault @@ -4672,14 +4852,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.547493 | `keyvault_key_create` | ✅ **EXPECTED** | -| 2 | 0.463557 | `keyvault_secret_create` | ❌ | +| 2 | 0.463630 | `keyvault_secret_create` | ❌ | | 3 | 0.447410 | `keyvault_certificate_create` | ❌ | -| 4 | 0.420793 | `keyvault_key_get` | ❌ | +| 4 | 0.420366 | `keyvault_key_get` | ❌ | | 5 | 0.404350 | `keyvault_certificate_import` | ❌ | --- -## Test 248 +## Test 258 **Expected Tool:** `keyvault_key_create` **Prompt:** Create an RSA key in the vault with name @@ -4689,14 +4869,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.641369 | `keyvault_key_create` | ✅ **EXPECTED** | -| 2 | 0.501636 | `keyvault_secret_create` | ❌ | +| 2 | 0.501664 | `keyvault_secret_create` | ❌ | | 3 | 0.491735 | `keyvault_certificate_create` | ❌ | | 4 | 0.464557 | `keyvault_certificate_import` | ❌ | -| 5 | 0.451505 | `keyvault_key_get` | ❌ | +| 5 | 0.451016 | `keyvault_key_get` | ❌ | --- -## Test 249 +## Test 259 **Expected Tool:** `keyvault_key_create` **Prompt:** Create an EC key with name in the vault @@ -4705,15 +4885,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.571793 | `keyvault_key_create` | ✅ **EXPECTED** | -| 2 | 0.443085 | `keyvault_certificate_create` | ❌ | -| 3 | 0.434697 | `keyvault_secret_create` | ❌ | -| 4 | 0.421997 | `keyvault_key_get` | ❌ | -| 5 | 0.400514 | `keyvault_certificate_import` | ❌ | +| 1 | 0.571718 | `keyvault_key_create` | ✅ **EXPECTED** | +| 2 | 0.443369 | `keyvault_certificate_create` | ❌ | +| 3 | 0.434701 | `keyvault_secret_create` | ❌ | +| 4 | 0.421721 | `keyvault_key_get` | ❌ | +| 5 | 0.400533 | `keyvault_certificate_import` | ❌ | --- -## Test 250 +## Test 260 **Expected Tool:** `keyvault_key_get` **Prompt:** Show me the key in the key vault @@ -4722,15 +4902,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.550225 | `keyvault_key_get` | ✅ **EXPECTED** | -| 2 | 0.468243 | `keyvault_secret_get` | ❌ | +| 1 | 0.549488 | `keyvault_key_get` | ✅ **EXPECTED** | +| 2 | 0.468165 | `keyvault_secret_get` | ❌ | | 3 | 0.452816 | `keyvault_key_create` | ❌ | | 4 | 0.439969 | `keyvault_key_list` | ❌ | -| 5 | 0.426545 | `keyvault_certificate_get` | ❌ | +| 5 | 0.430038 | `keyvault_certificate_get` | ❌ | --- -## Test 251 +## Test 261 **Expected Tool:** `keyvault_key_get` **Prompt:** Show me the details of the key in the key vault @@ -4739,15 +4919,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.629372 | `keyvault_key_get` | ✅ **EXPECTED** | -| 2 | 0.532872 | `keyvault_secret_get` | ❌ | +| 1 | 0.629552 | `keyvault_key_get` | ✅ **EXPECTED** | +| 2 | 0.532651 | `keyvault_secret_get` | ❌ | | 3 | 0.512278 | `storage_account_get` | ❌ | -| 4 | 0.495957 | `keyvault_certificate_get` | ❌ | +| 4 | 0.499757 | `keyvault_certificate_get` | ❌ | | 5 | 0.456992 | `keyvault_key_create` | ❌ | --- -## Test 252 +## Test 262 **Expected Tool:** `keyvault_key_get` **Prompt:** Get the key from vault @@ -4756,15 +4936,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.485492 | `keyvault_key_get` | ✅ **EXPECTED** | +| 1 | 0.484645 | `keyvault_key_get` | ✅ **EXPECTED** | | 2 | 0.443182 | `keyvault_key_create` | ❌ | -| 3 | 0.409356 | `keyvault_secret_get` | ❌ | +| 3 | 0.409388 | `keyvault_secret_get` | ❌ | | 4 | 0.395491 | `keyvault_admin_settings_get` | ❌ | | 5 | 0.383519 | `appconfig_kv_lock_set` | ❌ | --- -## Test 253 +## Test 263 **Expected Tool:** `keyvault_key_get` **Prompt:** Display the key details for in vault @@ -4773,15 +4953,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.590297 | `keyvault_key_get` | ✅ **EXPECTED** | -| 2 | 0.488574 | `keyvault_secret_get` | ❌ | +| 1 | 0.590303 | `keyvault_key_get` | ✅ **EXPECTED** | +| 2 | 0.488213 | `keyvault_secret_get` | ❌ | | 3 | 0.476498 | `storage_account_get` | ❌ | -| 4 | 0.460796 | `keyvault_certificate_get` | ❌ | +| 4 | 0.464283 | `keyvault_certificate_get` | ❌ | | 5 | 0.436511 | `keyvault_admin_settings_get` | ❌ | --- -## Test 254 +## Test 264 **Expected Tool:** `keyvault_key_get` **Prompt:** Retrieve key metadata for in vault @@ -4790,15 +4970,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.518346 | `keyvault_key_get` | ✅ **EXPECTED** | -| 2 | 0.432950 | `storage_account_get` | ❌ | -| 3 | 0.432742 | `keyvault_admin_settings_get` | ❌ | -| 4 | 0.429131 | `keyvault_key_create` | ❌ | -| 5 | 0.422731 | `keyvault_secret_get` | ❌ | +| 1 | 0.518839 | `keyvault_key_get` | ✅ **EXPECTED** | +| 2 | 0.432982 | `storage_account_get` | ❌ | +| 3 | 0.432733 | `keyvault_admin_settings_get` | ❌ | +| 4 | 0.429089 | `keyvault_key_create` | ❌ | +| 5 | 0.422498 | `keyvault_secret_get` | ❌ | --- -## Test 255 +## Test 265 **Expected Tool:** `keyvault_key_list` **Prompt:** List all keys in the key vault @@ -4809,13 +4989,13 @@ |------|-------|------|--------| | 1 | 0.701448 | `keyvault_key_list` | ✅ **EXPECTED** | | 2 | 0.601513 | `keyvault_certificate_list` | ❌ | -| 3 | 0.587427 | `keyvault_secret_list` | ❌ | +| 3 | 0.587218 | `keyvault_secret_list` | ❌ | | 4 | 0.498767 | `cosmos_account_list` | ❌ | | 5 | 0.480129 | `keyvault_admin_settings_get` | ❌ | --- -## Test 256 +## Test 266 **Expected Tool:** `keyvault_key_list` **Prompt:** Show me the keys in the key vault @@ -4825,14 +5005,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.549453 | `keyvault_key_list` | ✅ **EXPECTED** | -| 2 | 0.507865 | `keyvault_key_get` | ❌ | +| 2 | 0.506815 | `keyvault_key_get` | ❌ | | 3 | 0.475507 | `keyvault_certificate_list` | ❌ | | 4 | 0.472465 | `keyvault_admin_settings_get` | ❌ | -| 5 | 0.455936 | `keyvault_secret_get` | ❌ | +| 5 | 0.455683 | `keyvault_secret_get` | ❌ | --- -## Test 257 +## Test 267 **Expected Tool:** `keyvault_key_list` **Prompt:** What keys are in the key vault ? @@ -4844,12 +5024,12 @@ | 1 | 0.581970 | `keyvault_key_list` | ✅ **EXPECTED** | | 2 | 0.502245 | `keyvault_admin_settings_get` | ❌ | | 3 | 0.501481 | `keyvault_certificate_list` | ❌ | -| 4 | 0.477451 | `keyvault_key_get` | ❌ | -| 5 | 0.472414 | `keyvault_secret_list` | ❌ | +| 4 | 0.476470 | `keyvault_key_get` | ❌ | +| 5 | 0.472124 | `keyvault_secret_list` | ❌ | --- -## Test 258 +## Test 268 **Expected Tool:** `keyvault_key_list` **Prompt:** List key names in vault @@ -4860,13 +5040,13 @@ |------|-------|------|--------| | 1 | 0.641314 | `keyvault_key_list` | ✅ **EXPECTED** | | 2 | 0.559550 | `keyvault_certificate_list` | ❌ | -| 3 | 0.553553 | `keyvault_secret_list` | ❌ | +| 3 | 0.553257 | `keyvault_secret_list` | ❌ | | 4 | 0.486377 | `keyvault_admin_settings_get` | ❌ | | 5 | 0.475992 | `cosmos_account_list` | ❌ | --- -## Test 259 +## Test 269 **Expected Tool:** `keyvault_key_list` **Prompt:** Enumerate keys in key vault @@ -4877,13 +5057,13 @@ |------|-------|------|--------| | 1 | 0.723266 | `keyvault_key_list` | ✅ **EXPECTED** | | 2 | 0.611366 | `keyvault_certificate_list` | ❌ | -| 3 | 0.611185 | `keyvault_secret_list` | ❌ | +| 3 | 0.611042 | `keyvault_secret_list` | ❌ | | 4 | 0.473886 | `keyvault_admin_settings_get` | ❌ | -| 5 | 0.443322 | `keyvault_key_get` | ❌ | +| 5 | 0.441881 | `keyvault_key_get` | ❌ | --- -## Test 260 +## Test 270 **Expected Tool:** `keyvault_key_list` **Prompt:** Show key names in the key vault @@ -4893,14 +5073,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.570444 | `keyvault_key_list` | ✅ **EXPECTED** | -| 2 | 0.501953 | `keyvault_key_get` | ❌ | +| 2 | 0.501073 | `keyvault_key_get` | ❌ | | 3 | 0.500103 | `keyvault_certificate_list` | ❌ | | 4 | 0.496817 | `storage_account_get` | ❌ | -| 5 | 0.490367 | `keyvault_secret_list` | ❌ | +| 5 | 0.490100 | `keyvault_secret_list` | ❌ | --- -## Test 261 +## Test 271 **Expected Tool:** `keyvault_secret_create` **Prompt:** Create a new secret called with value in the key vault @@ -4909,15 +5089,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.678482 | `keyvault_secret_create` | ✅ **EXPECTED** | -| 2 | 0.553018 | `keyvault_key_create` | ❌ | -| 3 | 0.512602 | `keyvault_secret_get` | ❌ | -| 4 | 0.475097 | `keyvault_certificate_create` | ❌ | -| 5 | 0.461437 | `appconfig_kv_set` | ❌ | +| 1 | 0.678470 | `keyvault_secret_create` | ✅ **EXPECTED** | +| 2 | 0.552708 | `keyvault_key_create` | ❌ | +| 3 | 0.512617 | `keyvault_secret_get` | ❌ | +| 4 | 0.474664 | `keyvault_certificate_create` | ❌ | +| 5 | 0.462012 | `appconfig_kv_set` | ❌ | --- -## Test 262 +## Test 272 **Expected Tool:** `keyvault_secret_create` **Prompt:** Set a secret named with value in key vault @@ -4926,15 +5106,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.663094 | `keyvault_secret_create` | ✅ **EXPECTED** | -| 2 | 0.519306 | `keyvault_secret_get` | ❌ | +| 1 | 0.663147 | `keyvault_secret_create` | ✅ **EXPECTED** | +| 2 | 0.519601 | `keyvault_secret_get` | ❌ | | 3 | 0.512233 | `appconfig_kv_set` | ❌ | | 4 | 0.458502 | `keyvault_key_create` | ❌ | | 5 | 0.429785 | `appconfig_kv_lock_set` | ❌ | --- -## Test 263 +## Test 273 **Expected Tool:** `keyvault_secret_create` **Prompt:** Store secret value in the key vault @@ -4943,15 +5123,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.639897 | `keyvault_secret_create` | ✅ **EXPECTED** | -| 2 | 0.509526 | `keyvault_secret_get` | ❌ | +| 1 | 0.639917 | `keyvault_secret_create` | ✅ **EXPECTED** | +| 2 | 0.509674 | `keyvault_secret_get` | ❌ | | 3 | 0.485203 | `appconfig_kv_set` | ❌ | | 4 | 0.484680 | `keyvault_key_create` | ❌ | | 5 | 0.448995 | `appconfig_kv_lock_set` | ❌ | --- -## Test 264 +## Test 274 **Expected Tool:** `keyvault_secret_create` **Prompt:** Add a new version of secret with value in vault @@ -4960,15 +5140,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.675145 | `keyvault_secret_create` | ✅ **EXPECTED** | -| 2 | 0.499276 | `keyvault_secret_get` | ❌ | +| 1 | 0.675185 | `keyvault_secret_create` | ✅ **EXPECTED** | +| 2 | 0.499612 | `keyvault_secret_get` | ❌ | | 3 | 0.498228 | `keyvault_key_create` | ❌ | | 4 | 0.479174 | `keyvault_certificate_import` | ❌ | | 5 | 0.458574 | `appconfig_kv_set` | ❌ | --- -## Test 265 +## Test 275 **Expected Tool:** `keyvault_secret_create` **Prompt:** Update secret to value in the key vault @@ -4977,15 +5157,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.571597 | `keyvault_secret_create` | ✅ **EXPECTED** | -| 2 | 0.513012 | `keyvault_secret_get` | ❌ | -| 3 | 0.441198 | `appconfig_kv_set` | ❌ | -| 4 | 0.417911 | `appconfig_kv_lock_set` | ❌ | -| 5 | 0.408739 | `keyvault_key_get` | ❌ | +| 1 | 0.571490 | `keyvault_secret_create` | ✅ **EXPECTED** | +| 2 | 0.513686 | `keyvault_secret_get` | ❌ | +| 3 | 0.440666 | `appconfig_kv_set` | ❌ | +| 4 | 0.417799 | `appconfig_kv_lock_set` | ❌ | +| 5 | 0.407937 | `keyvault_key_get` | ❌ | --- -## Test 266 +## Test 276 **Expected Tool:** `keyvault_secret_get` **Prompt:** Show me the secret in the key vault @@ -4994,15 +5174,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.602686 | `keyvault_secret_get` | ✅ **EXPECTED** | -| 2 | 0.505620 | `keyvault_key_get` | ❌ | -| 3 | 0.501397 | `keyvault_secret_create` | ❌ | -| 4 | 0.478769 | `keyvault_secret_list` | ❌ | -| 5 | 0.439521 | `keyvault_certificate_get` | ❌ | +| 1 | 0.602769 | `keyvault_secret_get` | ✅ **EXPECTED** | +| 2 | 0.504212 | `keyvault_key_get` | ❌ | +| 3 | 0.501412 | `keyvault_secret_create` | ❌ | +| 4 | 0.478600 | `keyvault_secret_list` | ❌ | +| 5 | 0.442183 | `keyvault_certificate_get` | ❌ | --- -## Test 267 +## Test 277 **Expected Tool:** `keyvault_secret_get` **Prompt:** Show me the details of the secret in the key vault @@ -5011,15 +5191,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.653920 | `keyvault_secret_get` | ✅ **EXPECTED** | -| 2 | 0.567036 | `keyvault_key_get` | ❌ | +| 1 | 0.653871 | `keyvault_secret_get` | ✅ **EXPECTED** | +| 2 | 0.566786 | `keyvault_key_get` | ❌ | | 3 | 0.517547 | `storage_account_get` | ❌ | -| 4 | 0.496050 | `keyvault_certificate_get` | ❌ | -| 5 | 0.485249 | `keyvault_secret_list` | ❌ | +| 4 | 0.499014 | `keyvault_certificate_get` | ❌ | +| 5 | 0.485117 | `keyvault_secret_list` | ❌ | --- -## Test 268 +## Test 278 **Expected Tool:** `keyvault_secret_get` **Prompt:** Get the secret from vault @@ -5028,15 +5208,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.578261 | `keyvault_secret_get` | ✅ **EXPECTED** | -| 2 | 0.493543 | `keyvault_key_get` | ❌ | -| 3 | 0.488705 | `keyvault_secret_create` | ❌ | -| 4 | 0.443676 | `keyvault_secret_list` | ❌ | +| 1 | 0.578479 | `keyvault_secret_get` | ✅ **EXPECTED** | +| 2 | 0.492213 | `keyvault_key_get` | ❌ | +| 3 | 0.488680 | `keyvault_secret_create` | ❌ | +| 4 | 0.443595 | `keyvault_secret_list` | ❌ | | 5 | 0.424167 | `keyvault_admin_settings_get` | ❌ | --- -## Test 269 +## Test 279 **Expected Tool:** `keyvault_secret_get` **Prompt:** Display the secret details for in vault @@ -5045,15 +5225,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.649423 | `keyvault_secret_get` | ✅ **EXPECTED** | -| 2 | 0.548102 | `keyvault_key_get` | ❌ | +| 1 | 0.649267 | `keyvault_secret_get` | ✅ **EXPECTED** | +| 2 | 0.546992 | `keyvault_key_get` | ❌ | | 3 | 0.497402 | `storage_account_get` | ❌ | -| 4 | 0.492583 | `keyvault_certificate_get` | ❌ | -| 5 | 0.491597 | `keyvault_secret_list` | ❌ | +| 4 | 0.494759 | `keyvault_certificate_get` | ❌ | +| 5 | 0.491412 | `keyvault_secret_list` | ❌ | --- -## Test 270 +## Test 280 **Expected Tool:** `keyvault_secret_get` **Prompt:** Retrieve secret metadata for in vault @@ -5062,15 +5242,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.577338 | `keyvault_secret_get` | ✅ **EXPECTED** | -| 2 | 0.475492 | `keyvault_key_get` | ❌ | -| 3 | 0.466890 | `keyvault_secret_create` | ❌ | -| 4 | 0.447602 | `keyvault_secret_list` | ❌ | +| 1 | 0.577477 | `keyvault_secret_get` | ✅ **EXPECTED** | +| 2 | 0.475443 | `keyvault_key_get` | ❌ | +| 3 | 0.466873 | `keyvault_secret_create` | ❌ | +| 4 | 0.447533 | `keyvault_secret_list` | ❌ | | 5 | 0.439583 | `storage_account_get` | ❌ | --- -## Test 271 +## Test 281 **Expected Tool:** `keyvault_secret_list` **Prompt:** List all secrets in the key vault @@ -5079,15 +5259,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.701227 | `keyvault_secret_list` | ✅ **EXPECTED** | +| 1 | 0.701015 | `keyvault_secret_list` | ✅ **EXPECTED** | | 2 | 0.563736 | `keyvault_key_list` | ❌ | | 3 | 0.538337 | `keyvault_certificate_list` | ❌ | -| 4 | 0.499888 | `keyvault_secret_get` | ❌ | +| 4 | 0.499642 | `keyvault_secret_get` | ❌ | | 5 | 0.455500 | `cosmos_account_list` | ❌ | --- -## Test 272 +## Test 282 **Expected Tool:** `keyvault_secret_list` **Prompt:** Show me the secrets in the key vault @@ -5096,15 +5276,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.555681 | `keyvault_secret_list` | ✅ **EXPECTED** | -| 2 | 0.544015 | `keyvault_secret_get` | ❌ | -| 3 | 0.498713 | `keyvault_key_get` | ❌ | +| 1 | 0.555367 | `keyvault_secret_list` | ✅ **EXPECTED** | +| 2 | 0.543861 | `keyvault_secret_get` | ❌ | +| 3 | 0.497525 | `keyvault_key_get` | ❌ | | 4 | 0.464661 | `keyvault_key_list` | ❌ | | 5 | 0.453130 | `keyvault_admin_settings_get` | ❌ | --- -## Test 273 +## Test 283 **Expected Tool:** `keyvault_secret_list` **Prompt:** What secrets are in the key vault ? @@ -5113,15 +5293,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.572540 | `keyvault_secret_list` | ✅ **EXPECTED** | -| 2 | 0.529389 | `keyvault_secret_get` | ❌ | +| 1 | 0.572149 | `keyvault_secret_list` | ✅ **EXPECTED** | +| 2 | 0.529258 | `keyvault_secret_get` | ❌ | | 3 | 0.493761 | `keyvault_key_list` | ❌ | | 4 | 0.487620 | `keyvault_admin_settings_get` | ❌ | -| 5 | 0.476109 | `keyvault_key_get` | ❌ | +| 5 | 0.475273 | `keyvault_key_get` | ❌ | --- -## Test 274 +## Test 284 **Expected Tool:** `keyvault_secret_list` **Prompt:** List secrets names in vault @@ -5130,15 +5310,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.624290 | `keyvault_secret_list` | ✅ **EXPECTED** | +| 1 | 0.624070 | `keyvault_secret_list` | ✅ **EXPECTED** | | 2 | 0.559681 | `keyvault_key_list` | ❌ | | 3 | 0.517516 | `keyvault_certificate_list` | ❌ | -| 4 | 0.479771 | `keyvault_secret_get` | ❌ | -| 5 | 0.453295 | `storage_blob_container_get` | ❌ | +| 4 | 0.479547 | `keyvault_secret_get` | ❌ | +| 5 | 0.454288 | `storage_blob_container_get` | ❌ | --- -## Test 275 +## Test 285 **Expected Tool:** `keyvault_secret_list` **Prompt:** Enumerate secrets in key vault @@ -5147,15 +5327,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.742358 | `keyvault_secret_list` | ✅ **EXPECTED** | +| 1 | 0.742121 | `keyvault_secret_list` | ✅ **EXPECTED** | | 2 | 0.601183 | `keyvault_key_list` | ❌ | | 3 | 0.567827 | `keyvault_certificate_list` | ❌ | -| 4 | 0.496363 | `keyvault_secret_get` | ❌ | +| 4 | 0.496127 | `keyvault_secret_get` | ❌ | | 5 | 0.437560 | `keyvault_admin_settings_get` | ❌ | --- -## Test 276 +## Test 286 **Expected Tool:** `keyvault_secret_list` **Prompt:** Show secrets names in the key vault @@ -5164,15 +5344,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.567110 | `keyvault_secret_list` | ✅ **EXPECTED** | -| 2 | 0.522600 | `keyvault_secret_get` | ❌ | +| 1 | 0.566824 | `keyvault_secret_list` | ✅ **EXPECTED** | +| 2 | 0.522398 | `keyvault_secret_get` | ❌ | | 3 | 0.476309 | `keyvault_key_list` | ❌ | -| 4 | 0.462711 | `keyvault_key_get` | ❌ | -| 5 | 0.462677 | `keyvault_secret_create` | ❌ | +| 4 | 0.462720 | `keyvault_secret_create` | ❌ | +| 5 | 0.461326 | `keyvault_key_get` | ❌ | --- -## Test 277 +## Test 287 **Expected Tool:** `aks_cluster_get` **Prompt:** Get the configuration of AKS cluster @@ -5182,14 +5362,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.588300 | `aks_cluster_get` | ✅ **EXPECTED** | -| 2 | 0.544302 | `aks_nodepool_get` | ❌ | +| 2 | 0.544309 | `aks_nodepool_get` | ❌ | | 3 | 0.517279 | `kusto_cluster_get` | ❌ | | 4 | 0.481416 | `mysql_server_config_get` | ❌ | -| 5 | 0.430976 | `postgres_server_config_get` | ❌ | +| 5 | 0.430975 | `postgres_server_config_get` | ❌ | --- -## Test 278 +## Test 288 **Expected Tool:** `aks_cluster_get` **Prompt:** Show me the details of AKS cluster in resource group @@ -5199,14 +5379,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.621759 | `aks_cluster_get` | ✅ **EXPECTED** | -| 2 | 0.575626 | `aks_nodepool_get` | ❌ | +| 2 | 0.575634 | `aks_nodepool_get` | ❌ | | 3 | 0.567870 | `kusto_cluster_get` | ❌ | | 4 | 0.461466 | `sql_db_show` | ❌ | | 5 | 0.444327 | `monitor_webtests_get` | ❌ | --- -## Test 279 +## Test 289 **Expected Tool:** `aks_cluster_get` **Prompt:** Show me the network configuration for AKS cluster @@ -5216,14 +5396,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.522525 | `aks_cluster_get` | ✅ **EXPECTED** | -| 2 | 0.483220 | `aks_nodepool_get` | ❌ | +| 2 | 0.483225 | `aks_nodepool_get` | ❌ | | 3 | 0.434684 | `kusto_cluster_get` | ❌ | | 4 | 0.380301 | `mysql_server_config_get` | ❌ | | 5 | 0.366689 | `kusto_cluster_list` | ❌ | --- -## Test 280 +## Test 290 **Expected Tool:** `aks_cluster_get` **Prompt:** What are the details of my AKS cluster in ? @@ -5233,14 +5413,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.588634 | `aks_cluster_get` | ✅ **EXPECTED** | -| 2 | 0.550555 | `aks_nodepool_get` | ❌ | +| 2 | 0.550582 | `aks_nodepool_get` | ❌ | | 3 | 0.527511 | `kusto_cluster_get` | ❌ | | 4 | 0.445722 | `storage_account_get` | ❌ | | 5 | 0.435597 | `foundry_resource_get` | ❌ | --- -## Test 281 +## Test 291 **Expected Tool:** `aks_cluster_get` **Prompt:** List all AKS clusters in my subscription @@ -5251,13 +5431,13 @@ |------|-------|------|--------| | 1 | 0.756471 | `aks_cluster_get` | ✅ **EXPECTED** | | 2 | 0.749416 | `kusto_cluster_list` | ❌ | -| 3 | 0.590166 | `aks_nodepool_get` | ❌ | -| 4 | 0.568635 | `kusto_database_list` | ❌ | -| 5 | 0.560522 | `search_service_list` | ❌ | +| 3 | 0.590161 | `aks_nodepool_get` | ❌ | +| 4 | 0.568502 | `kusto_database_list` | ❌ | +| 5 | 0.562043 | `search_service_list` | ❌ | --- -## Test 282 +## Test 292 **Expected Tool:** `aks_cluster_get` **Prompt:** Show me my Azure Kubernetes Service clusters @@ -5268,13 +5448,13 @@ |------|-------|------|--------| | 1 | 0.612123 | `aks_cluster_get` | ✅ **EXPECTED** | | 2 | 0.586661 | `kusto_cluster_list` | ❌ | -| 3 | 0.507757 | `aks_nodepool_get` | ❌ | +| 3 | 0.507701 | `aks_nodepool_get` | ❌ | | 4 | 0.489724 | `kusto_cluster_get` | ❌ | -| 5 | 0.462950 | `kusto_database_list` | ❌ | +| 5 | 0.462957 | `kusto_database_list` | ❌ | --- -## Test 283 +## Test 293 **Expected Tool:** `aks_cluster_get` **Prompt:** What AKS clusters do I have? @@ -5283,15 +5463,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.628470 | `aks_cluster_get` | ✅ **EXPECTED** | -| 2 | 0.563211 | `aks_nodepool_get` | ❌ | -| 3 | 0.526840 | `kusto_cluster_list` | ❌ | -| 4 | 0.426233 | `kusto_cluster_get` | ❌ | -| 5 | 0.409379 | `kusto_database_list` | ❌ | +| 1 | 0.628429 | `aks_cluster_get` | ✅ **EXPECTED** | +| 2 | 0.563208 | `aks_nodepool_get` | ❌ | +| 3 | 0.526756 | `kusto_cluster_list` | ❌ | +| 4 | 0.426157 | `kusto_cluster_get` | ❌ | +| 5 | 0.409308 | `kusto_database_list` | ❌ | --- -## Test 284 +## Test 294 **Expected Tool:** `aks_nodepool_get` **Prompt:** Get details for nodepool in AKS cluster in @@ -5300,15 +5480,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.728569 | `aks_nodepool_get` | ✅ **EXPECTED** | -| 2 | 0.516573 | `kusto_cluster_get` | ❌ | -| 3 | 0.509314 | `aks_cluster_get` | ❌ | -| 4 | 0.468516 | `virtualdesktop_hostpool_list` | ❌ | -| 5 | 0.463185 | `sql_elastic-pool_list` | ❌ | +| 1 | 0.728958 | `aks_nodepool_get` | ✅ **EXPECTED** | +| 2 | 0.517021 | `kusto_cluster_get` | ❌ | +| 3 | 0.509820 | `aks_cluster_get` | ❌ | +| 4 | 0.468392 | `virtualdesktop_hostpool_list` | ❌ | +| 5 | 0.463192 | `sql_elastic-pool_list` | ❌ | --- -## Test 285 +## Test 295 **Expected Tool:** `aks_nodepool_get` **Prompt:** Show me the configuration for nodepool in AKS cluster in resource group @@ -5317,15 +5497,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.654106 | `aks_nodepool_get` | ✅ **EXPECTED** | -| 2 | 0.458596 | `sql_elastic-pool_list` | ❌ | -| 3 | 0.446035 | `aks_cluster_get` | ❌ | -| 4 | 0.440273 | `virtualdesktop_hostpool_list` | ❌ | -| 5 | 0.413758 | `kusto_cluster_get` | ❌ | +| 1 | 0.654192 | `aks_nodepool_get` | ✅ **EXPECTED** | +| 2 | 0.458497 | `sql_elastic-pool_list` | ❌ | +| 3 | 0.446296 | `aks_cluster_get` | ❌ | +| 4 | 0.440322 | `virtualdesktop_hostpool_list` | ❌ | +| 5 | 0.414154 | `kusto_cluster_get` | ❌ | --- -## Test 286 +## Test 296 **Expected Tool:** `aks_nodepool_get` **Prompt:** What is the setup of nodepool for AKS cluster in ? @@ -5334,15 +5514,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.592806 | `aks_nodepool_get` | ✅ **EXPECTED** | -| 2 | 0.402556 | `aks_cluster_get` | ❌ | -| 3 | 0.385218 | `virtualdesktop_hostpool_list` | ❌ | -| 4 | 0.383045 | `sql_elastic-pool_list` | ❌ | -| 5 | 0.355090 | `kusto_cluster_get` | ❌ | +| 1 | 0.592931 | `aks_nodepool_get` | ✅ **EXPECTED** | +| 2 | 0.402605 | `aks_cluster_get` | ❌ | +| 3 | 0.385275 | `virtualdesktop_hostpool_list` | ❌ | +| 4 | 0.383112 | `sql_elastic-pool_list` | ❌ | +| 5 | 0.355131 | `kusto_cluster_get` | ❌ | --- -## Test 287 +## Test 297 **Expected Tool:** `aks_nodepool_get` **Prompt:** List nodepools for AKS cluster in @@ -5351,15 +5531,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.692231 | `aks_nodepool_get` | ✅ **EXPECTED** | +| 1 | 0.692235 | `aks_nodepool_get` | ✅ **EXPECTED** | | 2 | 0.519037 | `aks_cluster_get` | ❌ | -| 3 | 0.506720 | `virtualdesktop_hostpool_list` | ❌ | +| 3 | 0.506624 | `virtualdesktop_hostpool_list` | ❌ | | 4 | 0.500749 | `kusto_cluster_list` | ❌ | | 5 | 0.487707 | `sql_elastic-pool_list` | ❌ | --- -## Test 288 +## Test 298 **Expected Tool:** `aks_nodepool_get` **Prompt:** Show me the nodepool list for AKS cluster in @@ -5368,15 +5548,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.732132 | `aks_nodepool_get` | ✅ **EXPECTED** | +| 1 | 0.732102 | `aks_nodepool_get` | ✅ **EXPECTED** | | 2 | 0.561829 | `aks_cluster_get` | ❌ | | 3 | 0.510269 | `sql_elastic-pool_list` | ❌ | -| 4 | 0.509840 | `virtualdesktop_hostpool_list` | ❌ | +| 4 | 0.509732 | `virtualdesktop_hostpool_list` | ❌ | | 5 | 0.486700 | `kusto_cluster_list` | ❌ | --- -## Test 289 +## Test 299 **Expected Tool:** `aks_nodepool_get` **Prompt:** What nodepools do I have for AKS cluster in @@ -5385,15 +5565,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.629358 | `aks_nodepool_get` | ✅ **EXPECTED** | -| 2 | 0.456911 | `aks_cluster_get` | ❌ | -| 3 | 0.443940 | `virtualdesktop_hostpool_list` | ❌ | -| 4 | 0.433006 | `kusto_cluster_list` | ❌ | -| 5 | 0.425448 | `sql_elastic-pool_list` | ❌ | +| 1 | 0.629316 | `aks_nodepool_get` | ✅ **EXPECTED** | +| 2 | 0.456894 | `aks_cluster_get` | ❌ | +| 3 | 0.443957 | `virtualdesktop_hostpool_list` | ❌ | +| 4 | 0.432997 | `kusto_cluster_list` | ❌ | +| 5 | 0.425570 | `sql_elastic-pool_list` | ❌ | --- -## Test 290 +## Test 300 **Expected Tool:** `loadtesting_test_create` **Prompt:** Create a basic URL test using the following endpoint URL that runs for 30 minutes with 45 virtual users. The test name is with the test id and the load testing resource is in the resource group in my subscription @@ -5402,15 +5582,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.579172 | `loadtesting_test_create` | ✅ **EXPECTED** | -| 2 | 0.520449 | `loadtesting_testresource_create` | ❌ | -| 3 | 0.513419 | `loadtesting_testrun_create` | ❌ | -| 4 | 0.473951 | `monitor_webtests_create` | ❌ | -| 5 | 0.461959 | `loadtesting_testresource_list` | ❌ | +| 1 | 0.577811 | `loadtesting_test_create` | ✅ **EXPECTED** | +| 2 | 0.519418 | `loadtesting_testresource_create` | ❌ | +| 3 | 0.512099 | `loadtesting_testrun_create` | ❌ | +| 4 | 0.472777 | `monitor_webtests_create` | ❌ | +| 5 | 0.460717 | `loadtesting_testresource_list` | ❌ | --- -## Test 291 +## Test 301 **Expected Tool:** `loadtesting_test_get` **Prompt:** Get the load test with id in the load test resource in resource group @@ -5422,12 +5602,12 @@ | 1 | 0.626226 | `loadtesting_testresource_list` | ❌ | | 2 | 0.619944 | `loadtesting_test_get` | ✅ **EXPECTED** | | 3 | 0.594666 | `loadtesting_testresource_create` | ❌ | -| 4 | 0.590698 | `monitor_webtests_get` | ❌ | -| 5 | 0.536024 | `monitor_webtests_list` | ❌ | +| 4 | 0.590697 | `monitor_webtests_get` | ❌ | +| 5 | 0.535650 | `monitor_webtests_list` | ❌ | --- -## Test 292 +## Test 302 **Expected Tool:** `loadtesting_testresource_create` **Prompt:** Create a load test resource in the resource group in my subscription @@ -5438,13 +5618,13 @@ |------|-------|------|--------| | 1 | 0.645537 | `loadtesting_testresource_create` | ✅ **EXPECTED** | | 2 | 0.618773 | `loadtesting_testresource_list` | ❌ | -| 3 | 0.541696 | `loadtesting_test_create` | ❌ | +| 3 | 0.541746 | `loadtesting_test_create` | ❌ | | 4 | 0.539771 | `loadtesting_testrun_create` | ❌ | -| 5 | 0.526684 | `monitor_webtests_list` | ❌ | +| 5 | 0.526226 | `monitor_webtests_list` | ❌ | --- -## Test 293 +## Test 303 **Expected Tool:** `loadtesting_testresource_list` **Prompt:** List all load testing resources in the resource group in my subscription @@ -5454,14 +5634,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.794326 | `loadtesting_testresource_list` | ✅ **EXPECTED** | -| 2 | 0.653165 | `monitor_webtests_list` | ❌ | -| 3 | 0.577408 | `group_list` | ❌ | +| 2 | 0.652990 | `monitor_webtests_list` | ❌ | +| 3 | 0.577427 | `group_list` | ❌ | | 4 | 0.575172 | `loadtesting_testresource_create` | ❌ | | 5 | 0.565565 | `datadog_monitoredresources_list` | ❌ | --- -## Test 294 +## Test 304 **Expected Tool:** `loadtesting_testrun_create` **Prompt:** Create a test run using the id for test in the load testing resource in resource group . Use the name of test run and description as @@ -5472,13 +5652,13 @@ |------|-------|------|--------| | 1 | 0.688976 | `loadtesting_testrun_create` | ✅ **EXPECTED** | | 2 | 0.594879 | `loadtesting_testrun_update` | ❌ | -| 3 | 0.558566 | `loadtesting_test_create` | ❌ | +| 3 | 0.558636 | `loadtesting_test_create` | ❌ | | 4 | 0.547102 | `loadtesting_testresource_create` | ❌ | | 5 | 0.496224 | `loadtesting_testresource_list` | ❌ | --- -## Test 295 +## Test 305 **Expected Tool:** `loadtesting_testrun_get` **Prompt:** Get the load test run with id in the load test resource in resource group @@ -5487,15 +5667,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.619146 | `loadtesting_testresource_list` | ❌ | -| 2 | 0.601927 | `loadtesting_test_get` | ❌ | -| 3 | 0.597430 | `loadtesting_testresource_create` | ❌ | -| 4 | 0.577532 | `monitor_webtests_get` | ❌ | -| 5 | 0.565996 | `loadtesting_testrun_list` | ❌ | +| 1 | 0.618909 | `loadtesting_testresource_list` | ❌ | +| 2 | 0.601963 | `loadtesting_test_get` | ❌ | +| 3 | 0.597266 | `loadtesting_testresource_create` | ❌ | +| 4 | 0.577220 | `monitor_webtests_get` | ❌ | +| 5 | 0.566095 | `loadtesting_testrun_list` | ❌ | --- -## Test 296 +## Test 306 **Expected Tool:** `loadtesting_testrun_list` **Prompt:** Get all the load test runs for the test with id in the load test resource in resource group @@ -5504,15 +5684,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.669307 | `loadtesting_testresource_list` | ❌ | -| 2 | 0.640644 | `loadtesting_testrun_list` | ✅ **EXPECTED** | -| 3 | 0.600977 | `loadtesting_test_get` | ❌ | -| 4 | 0.577403 | `loadtesting_testresource_create` | ❌ | -| 5 | 0.569287 | `monitor_webtests_list` | ❌ | +| 1 | 0.669180 | `loadtesting_testresource_list` | ❌ | +| 2 | 0.640360 | `loadtesting_testrun_list` | ✅ **EXPECTED** | +| 3 | 0.601075 | `loadtesting_test_get` | ❌ | +| 4 | 0.577460 | `loadtesting_testresource_create` | ❌ | +| 5 | 0.569424 | `monitor_webtests_get` | ❌ | --- -## Test 297 +## Test 307 **Expected Tool:** `loadtesting_testrun_update` **Prompt:** Update a test run display name as for the id for test in the load testing resource in resource group . @@ -5523,13 +5703,13 @@ |------|-------|------|--------| | 1 | 0.706747 | `loadtesting_testrun_update` | ✅ **EXPECTED** | | 2 | 0.514428 | `loadtesting_testrun_create` | ❌ | -| 3 | 0.486977 | `monitor_webtests_update` | ❌ | +| 3 | 0.486980 | `monitor_webtests_update` | ❌ | | 4 | 0.470337 | `loadtesting_testresource_list` | ❌ | | 5 | 0.468374 | `monitor_webtests_get` | ❌ | --- -## Test 298 +## Test 308 **Expected Tool:** `grafana_list` **Prompt:** List all Azure Managed Grafana in one subscription @@ -5540,13 +5720,13 @@ |------|-------|------|--------| | 1 | 0.599427 | `kusto_cluster_list` | ❌ | | 2 | 0.578892 | `grafana_list` | ✅ **EXPECTED** | -| 3 | 0.550372 | `subscription_list` | ❌ | -| 4 | 0.549957 | `search_service_list` | ❌ | +| 3 | 0.551851 | `search_service_list` | ❌ | +| 4 | 0.550372 | `subscription_list` | ❌ | | 5 | 0.531259 | `redis_list` | ❌ | --- -## Test 299 +## Test 309 **Expected Tool:** `managedlustre_fs_create` **Prompt:** Create an Azure Managed Lustre filesystem with name , size , SKU , and subnet for availability zone in location . Maintenance should occur on at @@ -5555,15 +5735,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.726553 | `managedlustre_fs_create` | ✅ **EXPECTED** | +| 1 | 0.728113 | `managedlustre_fs_create` | ✅ **EXPECTED** | | 2 | 0.616164 | `managedlustre_fs_list` | ❌ | -| 3 | 0.605701 | `managedlustre_fs_sku_get` | ❌ | -| 4 | 0.598215 | `managedlustre_fs_update` | ❌ | +| 3 | 0.605775 | `managedlustre_fs_sku_get` | ❌ | +| 4 | 0.598255 | `managedlustre_fs_update` | ❌ | | 5 | 0.557720 | `managedlustre_fs_subnetsize_validate` | ❌ | --- -## Test 300 +## Test 310 **Expected Tool:** `managedlustre_fs_list` **Prompt:** List the Azure Managed Lustre filesystems in my subscription @@ -5573,14 +5753,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.750675 | `managedlustre_fs_list` | ✅ **EXPECTED** | -| 2 | 0.631730 | `managedlustre_fs_sku_get` | ❌ | -| 3 | 0.579855 | `managedlustre_fs_create` | ❌ | +| 2 | 0.631770 | `managedlustre_fs_sku_get` | ❌ | +| 3 | 0.582660 | `managedlustre_fs_create` | ❌ | | 4 | 0.562377 | `kusto_cluster_list` | ❌ | -| 5 | 0.512086 | `search_service_list` | ❌ | +| 5 | 0.513156 | `search_service_list` | ❌ | --- -## Test 301 +## Test 311 **Expected Tool:** `managedlustre_fs_list` **Prompt:** List the Azure Managed Lustre filesystems in my resource group @@ -5589,15 +5769,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.743903 | `managedlustre_fs_list` | ✅ **EXPECTED** | -| 2 | 0.613164 | `managedlustre_fs_sku_get` | ❌ | -| 3 | 0.563081 | `managedlustre_fs_create` | ❌ | -| 4 | 0.519986 | `datadog_monitoredresources_list` | ❌ | -| 5 | 0.515433 | `loadtesting_testresource_list` | ❌ | +| 1 | 0.743881 | `managedlustre_fs_list` | ✅ **EXPECTED** | +| 2 | 0.613165 | `managedlustre_fs_sku_get` | ❌ | +| 3 | 0.565820 | `managedlustre_fs_create` | ❌ | +| 4 | 0.520005 | `datadog_monitoredresources_list` | ❌ | +| 5 | 0.515449 | `loadtesting_testresource_list` | ❌ | --- -## Test 302 +## Test 312 **Expected Tool:** `managedlustre_fs_sku_get` **Prompt:** List the Azure Managed Lustre SKUs available in location @@ -5606,15 +5786,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.827360 | `managedlustre_fs_sku_get` | ✅ **EXPECTED** | +| 1 | 0.827381 | `managedlustre_fs_sku_get` | ✅ **EXPECTED** | | 2 | 0.613674 | `managedlustre_fs_list` | ❌ | -| 3 | 0.511625 | `managedlustre_fs_create` | ❌ | +| 3 | 0.513242 | `managedlustre_fs_create` | ❌ | | 4 | 0.496242 | `managedlustre_fs_subnetsize_validate` | ❌ | | 5 | 0.470241 | `kusto_cluster_list` | ❌ | --- -## Test 303 +## Test 313 **Expected Tool:** `managedlustre_fs_subnetsize_ask` **Prompt:** Tell me how many IP addresses I need for an Azure Managed Lustre filesystem of size using the SKU @@ -5625,13 +5805,13 @@ |------|-------|------|--------| | 1 | 0.739766 | `managedlustre_fs_subnetsize_ask` | ✅ **EXPECTED** | | 2 | 0.651598 | `managedlustre_fs_subnetsize_validate` | ❌ | -| 3 | 0.594536 | `managedlustre_fs_sku_get` | ❌ | +| 3 | 0.594585 | `managedlustre_fs_sku_get` | ❌ | | 4 | 0.559498 | `managedlustre_fs_list` | ❌ | -| 5 | 0.533351 | `managedlustre_fs_create` | ❌ | +| 5 | 0.533684 | `managedlustre_fs_create` | ❌ | --- -## Test 304 +## Test 314 **Expected Tool:** `managedlustre_fs_subnetsize_validate` **Prompt:** Validate if the network can host Azure Managed Lustre filesystem of size using the SKU @@ -5640,15 +5820,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.879240 | `managedlustre_fs_subnetsize_validate` | ✅ **EXPECTED** | -| 2 | 0.622368 | `managedlustre_fs_subnetsize_ask` | ❌ | -| 3 | 0.542555 | `managedlustre_fs_sku_get` | ❌ | -| 4 | 0.516032 | `managedlustre_fs_create` | ❌ | -| 5 | 0.480796 | `managedlustre_fs_list` | ❌ | +| 1 | 0.879389 | `managedlustre_fs_subnetsize_validate` | ✅ **EXPECTED** | +| 2 | 0.622463 | `managedlustre_fs_subnetsize_ask` | ❌ | +| 3 | 0.542808 | `managedlustre_fs_sku_get` | ❌ | +| 4 | 0.515936 | `managedlustre_fs_create` | ❌ | +| 5 | 0.480855 | `managedlustre_fs_list` | ❌ | --- -## Test 305 +## Test 315 **Expected Tool:** `managedlustre_fs_update` **Prompt:** Update the maintenance window of the Azure Managed Lustre filesystem to at @@ -5657,15 +5837,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.738895 | `managedlustre_fs_update` | ✅ **EXPECTED** | -| 2 | 0.525980 | `managedlustre_fs_create` | ❌ | +| 1 | 0.739000 | `managedlustre_fs_update` | ✅ **EXPECTED** | +| 2 | 0.527525 | `managedlustre_fs_create` | ❌ | | 3 | 0.487193 | `managedlustre_fs_list` | ❌ | -| 4 | 0.385318 | `managedlustre_fs_sku_get` | ❌ | +| 4 | 0.385349 | `managedlustre_fs_sku_get` | ❌ | | 5 | 0.344891 | `managedlustre_fs_subnetsize_validate` | ❌ | --- -## Test 306 +## Test 316 **Expected Tool:** `marketplace_product_get` **Prompt:** Get details about marketplace product @@ -5674,15 +5854,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.570164 | `marketplace_product_get` | ✅ **EXPECTED** | -| 2 | 0.499208 | `marketplace_product_list` | ❌ | -| 3 | 0.353280 | `servicebus_topic_subscription_details` | ❌ | -| 4 | 0.333304 | `servicebus_topic_details` | ❌ | -| 5 | 0.330949 | `servicebus_queue_details` | ❌ | +| 1 | 0.570189 | `marketplace_product_get` | ✅ **EXPECTED** | +| 2 | 0.499184 | `marketplace_product_list` | ❌ | +| 3 | 0.353256 | `servicebus_topic_subscription_details` | ❌ | +| 4 | 0.333160 | `servicebus_topic_details` | ❌ | +| 5 | 0.330935 | `servicebus_queue_details` | ❌ | --- -## Test 307 +## Test 317 **Expected Tool:** `marketplace_product_list` **Prompt:** Search for Microsoft products in the marketplace @@ -5691,15 +5871,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.607950 | `marketplace_product_list` | ✅ **EXPECTED** | -| 2 | 0.443177 | `marketplace_product_get` | ❌ | -| 3 | 0.341360 | `search_service_list` | ❌ | -| 4 | 0.330544 | `foundry_models_list` | ❌ | -| 5 | 0.328671 | `managedlustre_fs_sku_get` | ❌ | +| 1 | 0.607916 | `marketplace_product_list` | ✅ **EXPECTED** | +| 2 | 0.443000 | `marketplace_product_get` | ❌ | +| 3 | 0.343549 | `search_service_list` | ❌ | +| 4 | 0.330500 | `foundry_models_list` | ❌ | +| 5 | 0.328676 | `managedlustre_fs_sku_get` | ❌ | --- -## Test 308 +## Test 318 **Expected Tool:** `marketplace_product_list` **Prompt:** Show me marketplace products from publisher @@ -5709,14 +5889,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.537726 | `marketplace_product_list` | ✅ **EXPECTED** | -| 2 | 0.385167 | `marketplace_product_get` | ❌ | +| 2 | 0.385151 | `marketplace_product_get` | ❌ | | 3 | 0.308769 | `foundry_models_list` | ❌ | | 4 | 0.288006 | `redis_list` | ❌ | -| 5 | 0.260421 | `managedlustre_fs_sku_get` | ❌ | +| 5 | 0.260387 | `managedlustre_fs_sku_get` | ❌ | --- -## Test 309 +## Test 319 **Expected Tool:** `azureaibestpractices_get` **Prompt:** Get best practices for building AI applications in Azure @@ -5725,15 +5905,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.675775 | `azureaibestpractices_get` | ✅ **EXPECTED** | -| 2 | 0.555579 | `get_bestpractices_get` | ❌ | -| 3 | 0.501210 | `azureterraformbestpractices_get` | ❌ | -| 4 | 0.480026 | `deploy_pipeline_guidance_get` | ❌ | -| 5 | 0.477592 | `cloudarchitect_design` | ❌ | +| 1 | 0.555579 | `get_bestpractices_get` | ❌ | +| 2 | 0.501211 | `azureterraformbestpractices_get` | ❌ | +| 3 | 0.480235 | `deploy_pipeline_guidance_get` | ❌ | +| 4 | 0.478355 | `cloudarchitect_design` | ❌ | +| 5 | 0.476579 | `deploy_iac_rules_get` | ❌ | --- -## Test 310 +## Test 320 **Expected Tool:** `azureaibestpractices_get` **Prompt:** Show me the best practices for Azure AI Foundry agents code generation @@ -5742,15 +5922,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.699440 | `azureaibestpractices_get` | ✅ **EXPECTED** | -| 2 | 0.603487 | `foundry_agents_get-sdk-sample` | ❌ | -| 3 | 0.534202 | `get_bestpractices_get` | ❌ | -| 4 | 0.520202 | `foundry_agents_list` | ❌ | -| 5 | 0.508727 | `azureterraformbestpractices_get` | ❌ | +| 1 | 0.603773 | `foundry_agents_get-sdk-sample` | ❌ | +| 2 | 0.534202 | `get_bestpractices_get` | ❌ | +| 3 | 0.520223 | `foundry_agents_list` | ❌ | +| 4 | 0.508727 | `azureterraformbestpractices_get` | ❌ | +| 5 | 0.480034 | `deploy_plan_get` | ❌ | --- -## Test 311 +## Test 321 **Expected Tool:** `azureaibestpractices_get` **Prompt:** Get guidance for building agents with Azure AI Foundry @@ -5759,15 +5939,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.635165 | `azureaibestpractices_get` | ✅ **EXPECTED** | -| 2 | 0.593029 | `foundry_agents_get-sdk-sample` | ❌ | -| 3 | 0.553580 | `foundry_agents_list` | ❌ | -| 4 | 0.534256 | `foundry_agents_create` | ❌ | -| 5 | 0.513217 | `foundry_agents_connect` | ❌ | +| 1 | 0.593216 | `foundry_agents_get-sdk-sample` | ❌ | +| 2 | 0.553662 | `foundry_agents_list` | ❌ | +| 3 | 0.534160 | `foundry_agents_create` | ❌ | +| 4 | 0.513217 | `foundry_agents_connect` | ❌ | +| 5 | 0.505706 | `deploy_pipeline_guidance_get` | ❌ | --- -## Test 312 +## Test 322 **Expected Tool:** `azureaibestpractices_get` **Prompt:** Create an AI app that helps me to manage travel queries. @@ -5776,15 +5956,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.417629 | `azureaibestpractices_get` | ✅ **EXPECTED** | -| 2 | 0.343844 | `foundry_threads_create` | ❌ | -| 3 | 0.327503 | `foundry_openai_chat-completions-create` | ❌ | -| 4 | 0.320532 | `foundry_openai_create-completion` | ❌ | -| 5 | 0.311958 | `foundry_agents_connect` | ❌ | +| 1 | 0.343793 | `foundry_threads_create` | ❌ | +| 2 | 0.327503 | `foundry_openai_chat-completions-create` | ❌ | +| 3 | 0.320532 | `foundry_openai_create-completion` | ❌ | +| 4 | 0.311958 | `foundry_agents_connect` | ❌ | +| 5 | 0.305073 | `foundry_agents_get-sdk-sample` | ❌ | --- -## Test 313 +## Test 323 **Expected Tool:** `azureaibestpractices_get` **Prompt:** Create an AI app that helps me to manage travel queries in Azure AI Foundry @@ -5793,15 +5973,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.517931 | `azureaibestpractices_get` | ✅ **EXPECTED** | -| 2 | 0.478747 | `foundry_openai_embeddings-create` | ❌ | -| 3 | 0.469654 | `foundry_openai_chat-completions-create` | ❌ | -| 4 | 0.466216 | `foundry_openai_create-completion` | ❌ | -| 5 | 0.456719 | `foundry_resource_get` | ❌ | +| 1 | 0.478745 | `foundry_openai_embeddings-create` | ❌ | +| 2 | 0.469654 | `foundry_openai_chat-completions-create` | ❌ | +| 3 | 0.466216 | `foundry_openai_create-completion` | ❌ | +| 4 | 0.456719 | `foundry_resource_get` | ❌ | +| 5 | 0.448502 | `foundry_agents_list` | ❌ | --- -## Test 314 +## Test 324 **Expected Tool:** `get_bestpractices_get` **Prompt:** Get the latest Azure code generation best practices @@ -5810,15 +5990,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.656395 | `azureaibestpractices_get` | ❌ | -| 2 | 0.646844 | `get_bestpractices_get` | ✅ **EXPECTED** | -| 3 | 0.635406 | `azureterraformbestpractices_get` | ❌ | -| 4 | 0.586907 | `deploy_iac_rules_get` | ❌ | -| 5 | 0.531457 | `deploy_pipeline_guidance_get` | ❌ | +| 1 | 0.646844 | `get_bestpractices_get` | ✅ **EXPECTED** | +| 2 | 0.635406 | `azureterraformbestpractices_get` | ❌ | +| 3 | 0.586907 | `deploy_iac_rules_get` | ❌ | +| 4 | 0.531727 | `deploy_pipeline_guidance_get` | ❌ | +| 5 | 0.490235 | `deploy_plan_get` | ❌ | --- -## Test 315 +## Test 325 **Expected Tool:** `get_bestpractices_get` **Prompt:** Get the latest Azure deployment best practices @@ -5831,11 +6011,11 @@ | 2 | 0.548542 | `azureterraformbestpractices_get` | ❌ | | 3 | 0.541091 | `deploy_iac_rules_get` | ❌ | | 4 | 0.516852 | `deploy_plan_get` | ❌ | -| 5 | 0.516203 | `deploy_pipeline_guidance_get` | ❌ | +| 5 | 0.516443 | `deploy_pipeline_guidance_get` | ❌ | --- -## Test 316 +## Test 326 **Expected Tool:** `get_bestpractices_get` **Prompt:** Get the latest Azure best practices @@ -5846,13 +6026,13 @@ |------|-------|------|--------| | 1 | 0.625259 | `get_bestpractices_get` | ✅ **EXPECTED** | | 2 | 0.594323 | `azureterraformbestpractices_get` | ❌ | -| 3 | 0.539715 | `azureaibestpractices_get` | ❌ | -| 4 | 0.518643 | `deploy_iac_rules_get` | ❌ | -| 5 | 0.465370 | `deploy_pipeline_guidance_get` | ❌ | +| 3 | 0.518643 | `deploy_iac_rules_get` | ❌ | +| 4 | 0.465572 | `deploy_pipeline_guidance_get` | ❌ | +| 5 | 0.451502 | `cloudarchitect_design` | ❌ | --- -## Test 317 +## Test 327 **Expected Tool:** `get_bestpractices_get` **Prompt:** Get the latest Azure Functions code generation best practices @@ -5862,14 +6042,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.624273 | `get_bestpractices_get` | ✅ **EXPECTED** | -| 2 | 0.587474 | `azureaibestpractices_get` | ❌ | -| 3 | 0.570488 | `azureterraformbestpractices_get` | ❌ | -| 4 | 0.522998 | `deploy_iac_rules_get` | ❌ | -| 5 | 0.493745 | `deploy_pipeline_guidance_get` | ❌ | +| 2 | 0.570488 | `azureterraformbestpractices_get` | ❌ | +| 3 | 0.522998 | `deploy_iac_rules_get` | ❌ | +| 4 | 0.493998 | `deploy_pipeline_guidance_get` | ❌ | +| 5 | 0.467377 | `extension_cli_install` | ❌ | --- -## Test 318 +## Test 328 **Expected Tool:** `get_bestpractices_get` **Prompt:** Get the latest Azure Functions deployment best practices @@ -5878,15 +6058,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.581850 | `get_bestpractices_get` | ✅ **EXPECTED** | -| 2 | 0.497056 | `deploy_pipeline_guidance_get` | ❌ | -| 3 | 0.495659 | `deploy_iac_rules_get` | ❌ | -| 4 | 0.486886 | `azureterraformbestpractices_get` | ❌ | -| 5 | 0.474511 | `deploy_plan_get` | ❌ | +| 1 | 0.581868 | `get_bestpractices_get` | ✅ **EXPECTED** | +| 2 | 0.497378 | `deploy_pipeline_guidance_get` | ❌ | +| 3 | 0.495688 | `deploy_iac_rules_get` | ❌ | +| 4 | 0.486928 | `azureterraformbestpractices_get` | ❌ | +| 5 | 0.474572 | `deploy_plan_get` | ❌ | --- -## Test 319 +## Test 329 **Expected Tool:** `get_bestpractices_get` **Prompt:** Get the latest Azure Functions best practices @@ -5897,13 +6077,13 @@ |------|-------|------|--------| | 1 | 0.610986 | `get_bestpractices_get` | ✅ **EXPECTED** | | 2 | 0.532790 | `azureterraformbestpractices_get` | ❌ | -| 3 | 0.518386 | `azureaibestpractices_get` | ❌ | -| 4 | 0.487322 | `deploy_iac_rules_get` | ❌ | -| 5 | 0.457812 | `deploy_pipeline_guidance_get` | ❌ | +| 3 | 0.487322 | `deploy_iac_rules_get` | ❌ | +| 4 | 0.458060 | `deploy_pipeline_guidance_get` | ❌ | +| 5 | 0.448034 | `extension_cli_install` | ❌ | --- -## Test 320 +## Test 330 **Expected Tool:** `get_bestpractices_get` **Prompt:** Get the latest Azure Static Web Apps best practices @@ -5914,13 +6094,13 @@ |------|-------|------|--------| | 1 | 0.557862 | `get_bestpractices_get` | ✅ **EXPECTED** | | 2 | 0.513262 | `azureterraformbestpractices_get` | ❌ | -| 3 | 0.510399 | `azureaibestpractices_get` | ❌ | -| 4 | 0.505123 | `deploy_iac_rules_get` | ❌ | -| 5 | 0.483482 | `deploy_pipeline_guidance_get` | ❌ | +| 3 | 0.505123 | `deploy_iac_rules_get` | ❌ | +| 4 | 0.483705 | `deploy_pipeline_guidance_get` | ❌ | +| 5 | 0.422144 | `cloudarchitect_design` | ❌ | --- -## Test 321 +## Test 331 **Expected Tool:** `get_bestpractices_get` **Prompt:** What are azure function best practices? @@ -5931,13 +6111,13 @@ |------|-------|------|--------| | 1 | 0.582541 | `get_bestpractices_get` | ✅ **EXPECTED** | | 2 | 0.500368 | `azureterraformbestpractices_get` | ❌ | -| 3 | 0.475018 | `azureaibestpractices_get` | ❌ | -| 4 | 0.472112 | `deploy_iac_rules_get` | ❌ | -| 5 | 0.432959 | `deploy_pipeline_guidance_get` | ❌ | +| 3 | 0.472112 | `deploy_iac_rules_get` | ❌ | +| 4 | 0.433134 | `deploy_pipeline_guidance_get` | ❌ | +| 5 | 0.432810 | `cloudarchitect_design` | ❌ | --- -## Test 322 +## Test 332 **Expected Tool:** `get_bestpractices_get` **Prompt:** configure azure mcp in coding agent for my repo @@ -5947,14 +6127,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.488855 | `deploy_plan_get` | ❌ | -| 2 | 0.460745 | `deploy_pipeline_guidance_get` | ❌ | +| 2 | 0.460956 | `deploy_pipeline_guidance_get` | ❌ | | 3 | 0.390270 | `deploy_iac_rules_get` | ❌ | -| 4 | 0.370753 | `azureaibestpractices_get` | ❌ | -| 5 | 0.370298 | `azureterraformbestpractices_get` | ❌ | +| 4 | 0.370298 | `azureterraformbestpractices_get` | ❌ | +| 5 | 0.369169 | `extension_cli_install` | ❌ | --- -## Test 323 +## Test 333 **Expected Tool:** `monitor_activitylog_list` **Prompt:** List the activity logs of the last month for @@ -5965,13 +6145,13 @@ |------|-------|------|--------| | 1 | 0.537893 | `monitor_activitylog_list` | ✅ **EXPECTED** | | 2 | 0.506212 | `monitor_resource_log_query` | ❌ | -| 3 | 0.371728 | `monitor_workspace_log_query` | ❌ | +| 3 | 0.371727 | `monitor_workspace_log_query` | ❌ | | 4 | 0.363798 | `resourcehealth_health-events_list` | ❌ | | 5 | 0.344629 | `datadog_monitoredresources_list` | ❌ | --- -## Test 324 +## Test 334 **Expected Tool:** `monitor_healthmodels_entity_get` **Prompt:** Show me the health status of entity using the health model @@ -5981,14 +6161,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.660947 | `monitor_healthmodels_entity_get` | ✅ **EXPECTED** | -| 2 | 0.608665 | `resourcehealth_availability-status_get` | ❌ | +| 2 | 0.609276 | `resourcehealth_availability-status_get` | ❌ | | 3 | 0.351697 | `resourcehealth_availability-status_list` | ❌ | | 4 | 0.328321 | `resourcehealth_health-events_list` | ❌ | -| 5 | 0.288127 | `foundry_models_deployments_list` | ❌ | +| 5 | 0.288705 | `foundry_models_deployments_list` | ❌ | --- -## Test 325 +## Test 335 **Expected Tool:** `monitor_metrics_definitions` **Prompt:** Get metric definitions for from the namespace @@ -5999,13 +6179,13 @@ |------|-------|------|--------| | 1 | 0.592640 | `monitor_metrics_definitions` | ✅ **EXPECTED** | | 2 | 0.424141 | `monitor_metrics_query` | ❌ | -| 3 | 0.368006 | `bicepschema_get` | ❌ | -| 4 | 0.332369 | `monitor_table_type_list` | ❌ | -| 5 | 0.325634 | `resourcehealth_availability-status_get` | ❌ | +| 3 | 0.368319 | `bicepschema_get` | ❌ | +| 4 | 0.332356 | `monitor_table_type_list` | ❌ | +| 5 | 0.324986 | `resourcehealth_availability-status_get` | ❌ | --- -## Test 326 +## Test 336 **Expected Tool:** `monitor_metrics_definitions` **Prompt:** Show me all available metrics and their definitions for storage account @@ -6016,13 +6196,13 @@ |------|-------|------|--------| | 1 | 0.607600 | `storage_account_get` | ❌ | | 2 | 0.587736 | `monitor_metrics_definitions` | ✅ **EXPECTED** | -| 3 | 0.544043 | `storage_blob_container_get` | ❌ | +| 3 | 0.544781 | `storage_blob_container_get` | ❌ | | 4 | 0.495829 | `storage_blob_get` | ❌ | | 5 | 0.473421 | `managedlustre_fs_list` | ❌ | --- -## Test 327 +## Test 337 **Expected Tool:** `monitor_metrics_definitions` **Prompt:** What metric definitions are available for the Application Insights resource @@ -6031,15 +6211,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.633173 | `monitor_metrics_definitions` | ✅ **EXPECTED** | -| 2 | 0.495513 | `monitor_metrics_query` | ❌ | -| 3 | 0.433945 | `monitor_resource_log_query` | ❌ | -| 4 | 0.392960 | `loadtesting_testresource_list` | ❌ | -| 5 | 0.388569 | `bicepschema_get` | ❌ | +| 1 | 0.633136 | `monitor_metrics_definitions` | ✅ **EXPECTED** | +| 2 | 0.495555 | `monitor_metrics_query` | ❌ | +| 3 | 0.434042 | `monitor_resource_log_query` | ❌ | +| 4 | 0.392971 | `loadtesting_testresource_list` | ❌ | +| 5 | 0.388754 | `bicepschema_get` | ❌ | --- -## Test 328 +## Test 338 **Expected Tool:** `monitor_metrics_query` **Prompt:** Analyze the performance trends and response times for Application Insights resource over the last @@ -6056,7 +6236,7 @@ --- -## Test 329 +## Test 339 **Expected Tool:** `monitor_metrics_query` **Prompt:** Check the availability metrics for my Application Insights resource for the last @@ -6068,12 +6248,12 @@ | 1 | 0.557830 | `monitor_metrics_query` | ✅ **EXPECTED** | | 2 | 0.476671 | `monitor_resource_log_query` | ❌ | | 3 | 0.460611 | `resourcehealth_availability-status_list` | ❌ | -| 4 | 0.456360 | `quota_usage_check` | ❌ | +| 4 | 0.455904 | `quota_usage_check` | ❌ | | 5 | 0.438233 | `monitor_metrics_definitions` | ❌ | --- -## Test 330 +## Test 340 **Expected Tool:** `monitor_metrics_query` **Prompt:** Get the metric for over the last with intervals @@ -6085,12 +6265,12 @@ | 1 | 0.461249 | `monitor_metrics_query` | ✅ **EXPECTED** | | 2 | 0.390029 | `monitor_metrics_definitions` | ❌ | | 3 | 0.338557 | `monitor_resource_log_query` | ❌ | -| 4 | 0.335118 | `resourcehealth_availability-status_get` | ❌ | +| 4 | 0.334519 | `resourcehealth_availability-status_get` | ❌ | | 5 | 0.306338 | `resourcehealth_availability-status_list` | ❌ | --- -## Test 331 +## Test 341 **Expected Tool:** `monitor_metrics_query` **Prompt:** Investigate error rates and failed requests for Application Insights resource for the last @@ -6103,11 +6283,11 @@ | 2 | 0.492138 | `monitor_metrics_query` | ✅ **EXPECTED** | | 3 | 0.448148 | `applens_resource_diagnose` | ❌ | | 4 | 0.412184 | `resourcehealth_health-events_list` | ❌ | -| 5 | 0.397853 | `quota_usage_check` | ❌ | +| 5 | 0.397335 | `quota_usage_check` | ❌ | --- -## Test 332 +## Test 342 **Expected Tool:** `monitor_metrics_query` **Prompt:** Query the metric for for the last @@ -6116,15 +6296,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.525890 | `monitor_metrics_query` | ✅ **EXPECTED** | -| 2 | 0.405838 | `monitor_resource_log_query` | ❌ | -| 3 | 0.384811 | `monitor_metrics_definitions` | ❌ | -| 4 | 0.347228 | `monitor_workspace_log_query` | ❌ | -| 5 | 0.330657 | `resourcehealth_availability-status_get` | ❌ | +| 1 | 0.525585 | `monitor_metrics_query` | ✅ **EXPECTED** | +| 2 | 0.406185 | `monitor_resource_log_query` | ❌ | +| 3 | 0.384482 | `monitor_metrics_definitions` | ❌ | +| 4 | 0.347723 | `monitor_workspace_log_query` | ❌ | +| 5 | 0.330713 | `resourcehealth_availability-status_get` | ❌ | --- -## Test 333 +## Test 343 **Expected Tool:** `monitor_metrics_query` **Prompt:** What's the request per second rate for my Application Insights resource over the last @@ -6136,12 +6316,12 @@ | 1 | 0.480140 | `monitor_metrics_query` | ✅ **EXPECTED** | | 2 | 0.444779 | `monitor_resource_log_query` | ❌ | | 3 | 0.388382 | `applens_resource_diagnose` | ❌ | -| 4 | 0.363672 | `quota_usage_check` | ❌ | +| 4 | 0.363412 | `quota_usage_check` | ❌ | | 5 | 0.350076 | `resourcehealth_health-events_list` | ❌ | --- -## Test 334 +## Test 344 **Expected Tool:** `monitor_resource_log_query` **Prompt:** Show me the logs for the past hour for the resource in the Log Analytics workspace @@ -6153,12 +6333,12 @@ | 1 | 0.687852 | `monitor_resource_log_query` | ✅ **EXPECTED** | | 2 | 0.621919 | `monitor_workspace_log_query` | ❌ | | 3 | 0.598393 | `monitor_activitylog_list` | ❌ | -| 4 | 0.485528 | `deploy_app_logs_get` | ❌ | +| 4 | 0.485633 | `deploy_app_logs_get` | ❌ | | 5 | 0.469703 | `monitor_metrics_query` | ❌ | --- -## Test 335 +## Test 345 **Expected Tool:** `monitor_table_list` **Prompt:** List all tables in the Log Analytics workspace @@ -6167,15 +6347,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.851075 | `monitor_table_list` | ✅ **EXPECTED** | -| 2 | 0.725693 | `monitor_table_type_list` | ❌ | -| 3 | 0.620451 | `monitor_workspace_list` | ❌ | +| 1 | 0.850711 | `monitor_table_list` | ✅ **EXPECTED** | +| 2 | 0.725738 | `monitor_table_type_list` | ❌ | +| 3 | 0.620445 | `monitor_workspace_list` | ❌ | | 4 | 0.541928 | `kusto_table_list` | ❌ | | 5 | 0.539481 | `monitor_workspace_log_query` | ❌ | --- -## Test 336 +## Test 346 **Expected Tool:** `monitor_table_list` **Prompt:** Show me the tables in the Log Analytics workspace @@ -6184,15 +6364,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.798459 | `monitor_table_list` | ✅ **EXPECTED** | -| 2 | 0.701092 | `monitor_table_type_list` | ❌ | -| 3 | 0.600003 | `monitor_workspace_list` | ❌ | -| 4 | 0.542820 | `monitor_workspace_log_query` | ❌ | +| 1 | 0.798147 | `monitor_table_list` | ✅ **EXPECTED** | +| 2 | 0.701122 | `monitor_table_type_list` | ❌ | +| 3 | 0.599917 | `monitor_workspace_list` | ❌ | +| 4 | 0.542821 | `monitor_workspace_log_query` | ❌ | | 5 | 0.502882 | `monitor_resource_log_query` | ❌ | --- -## Test 337 +## Test 347 **Expected Tool:** `monitor_table_type_list` **Prompt:** List all available table types in the Log Analytics workspace @@ -6201,15 +6381,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.881468 | `monitor_table_type_list` | ✅ **EXPECTED** | -| 2 | 0.765694 | `monitor_table_list` | ❌ | -| 3 | 0.570092 | `monitor_workspace_list` | ❌ | +| 1 | 0.881524 | `monitor_table_type_list` | ✅ **EXPECTED** | +| 2 | 0.765557 | `monitor_table_list` | ❌ | +| 3 | 0.569921 | `monitor_workspace_list` | ❌ | | 4 | 0.504683 | `mysql_table_list` | ❌ | | 5 | 0.497622 | `monitor_workspace_log_query` | ❌ | --- -## Test 338 +## Test 348 **Expected Tool:** `monitor_table_type_list` **Prompt:** Show me the available table types in the Log Analytics workspace @@ -6218,15 +6398,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.843110 | `monitor_table_type_list` | ✅ **EXPECTED** | -| 2 | 0.736831 | `monitor_table_list` | ❌ | -| 3 | 0.576934 | `monitor_workspace_list` | ❌ | +| 1 | 0.843138 | `monitor_table_type_list` | ✅ **EXPECTED** | +| 2 | 0.736728 | `monitor_table_list` | ❌ | +| 3 | 0.576731 | `monitor_workspace_list` | ❌ | | 4 | 0.509598 | `monitor_workspace_log_query` | ❌ | | 5 | 0.481189 | `mysql_table_list` | ❌ | --- -## Test 339 +## Test 349 **Expected Tool:** `monitor_webtests_create` **Prompt:** Create a new Standard Web Test with name in my subscription in in a given @@ -6235,15 +6415,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.651084 | `monitor_webtests_create` | ✅ **EXPECTED** | -| 2 | 0.570105 | `monitor_webtests_list` | ❌ | -| 3 | 0.550426 | `monitor_webtests_update` | ❌ | -| 4 | 0.533477 | `monitor_webtests_get` | ❌ | -| 5 | 0.482251 | `loadtesting_testresource_create` | ❌ | +| 1 | 0.650766 | `monitor_webtests_create` | ✅ **EXPECTED** | +| 2 | 0.569868 | `monitor_webtests_list` | ❌ | +| 3 | 0.550072 | `monitor_webtests_update` | ❌ | +| 4 | 0.533352 | `monitor_webtests_get` | ❌ | +| 5 | 0.482145 | `loadtesting_testresource_create` | ❌ | --- -## Test 340 +## Test 350 **Expected Tool:** `monitor_webtests_get` **Prompt:** Get Web Test details for in my subscription in @@ -6252,15 +6432,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.758910 | `monitor_webtests_get` | ✅ **EXPECTED** | -| 2 | 0.725360 | `monitor_webtests_list` | ❌ | -| 3 | 0.583663 | `loadtesting_testresource_list` | ❌ | -| 4 | 0.562785 | `monitor_webtests_update` | ❌ | -| 5 | 0.530432 | `monitor_webtests_create` | ❌ | +| 1 | 0.759015 | `monitor_webtests_get` | ✅ **EXPECTED** | +| 2 | 0.725302 | `monitor_webtests_list` | ❌ | +| 3 | 0.583816 | `loadtesting_testresource_list` | ❌ | +| 4 | 0.562797 | `monitor_webtests_update` | ❌ | +| 5 | 0.530581 | `monitor_webtests_create` | ❌ | --- -## Test 341 +## Test 351 **Expected Tool:** `monitor_webtests_list` **Prompt:** List all Web Test resources in my subscription @@ -6269,15 +6449,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.730616 | `monitor_webtests_list` | ✅ **EXPECTED** | +| 1 | 0.730837 | `monitor_webtests_list` | ✅ **EXPECTED** | | 2 | 0.610160 | `loadtesting_testresource_list` | ❌ | | 3 | 0.547708 | `grafana_list` | ❌ | -| 4 | 0.520828 | `redis_list` | ❌ | +| 4 | 0.520829 | `redis_list` | ❌ | | 5 | 0.496166 | `monitor_webtests_get` | ❌ | --- -## Test 342 +## Test 352 **Expected Tool:** `monitor_webtests_list` **Prompt:** List all Web Test resources in my subscription in @@ -6286,15 +6466,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.793807 | `monitor_webtests_list` | ✅ **EXPECTED** | +| 1 | 0.793880 | `monitor_webtests_list` | ✅ **EXPECTED** | | 2 | 0.675965 | `loadtesting_testresource_list` | ❌ | | 3 | 0.584429 | `monitor_webtests_get` | ❌ | -| 4 | 0.573602 | `group_list` | ❌ | +| 4 | 0.573620 | `group_list` | ❌ | | 5 | 0.546088 | `resourcehealth_availability-status_list` | ❌ | --- -## Test 343 +## Test 353 **Expected Tool:** `monitor_webtests_update` **Prompt:** Update an existing Standard Web Test with name in my subscription in in a given @@ -6303,15 +6483,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.686427 | `monitor_webtests_update` | ✅ **EXPECTED** | -| 2 | 0.558816 | `monitor_webtests_get` | ❌ | -| 3 | 0.557828 | `monitor_webtests_create` | ❌ | -| 4 | 0.553372 | `monitor_webtests_list` | ❌ | -| 5 | 0.509192 | `loadtesting_testrun_update` | ❌ | +| 1 | 0.686449 | `monitor_webtests_update` | ✅ **EXPECTED** | +| 2 | 0.559296 | `monitor_webtests_get` | ❌ | +| 3 | 0.558239 | `monitor_webtests_create` | ❌ | +| 4 | 0.553466 | `monitor_webtests_list` | ❌ | +| 5 | 0.508736 | `loadtesting_testrun_update` | ❌ | --- -## Test 344 +## Test 354 **Expected Tool:** `monitor_workspace_list` **Prompt:** List all Log Analytics workspaces in my subscription @@ -6320,15 +6500,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.813871 | `monitor_workspace_list` | ✅ **EXPECTED** | +| 1 | 0.813902 | `monitor_workspace_list` | ✅ **EXPECTED** | | 2 | 0.680201 | `grafana_list` | ❌ | -| 3 | 0.660127 | `monitor_table_list` | ❌ | +| 3 | 0.659497 | `monitor_table_list` | ❌ | | 4 | 0.610623 | `kusto_cluster_list` | ❌ | -| 5 | 0.599636 | `search_service_list` | ❌ | +| 5 | 0.600802 | `search_service_list` | ❌ | --- -## Test 345 +## Test 355 **Expected Tool:** `monitor_workspace_list` **Prompt:** Show me my Log Analytics workspaces @@ -6337,15 +6517,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.656159 | `monitor_workspace_list` | ✅ **EXPECTED** | -| 2 | 0.585355 | `monitor_table_list` | ❌ | -| 3 | 0.531036 | `monitor_table_type_list` | ❌ | -| 4 | 0.518275 | `grafana_list` | ❌ | -| 5 | 0.506663 | `monitor_workspace_log_query` | ❌ | +| 1 | 0.656194 | `monitor_workspace_list` | ✅ **EXPECTED** | +| 2 | 0.584758 | `monitor_table_list` | ❌ | +| 3 | 0.531083 | `monitor_table_type_list` | ❌ | +| 4 | 0.518254 | `grafana_list` | ❌ | +| 5 | 0.506772 | `monitor_workspace_log_query` | ❌ | --- -## Test 346 +## Test 356 **Expected Tool:** `monitor_workspace_list` **Prompt:** Show me the Log Analytics workspaces in my subscription @@ -6354,15 +6534,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.732964 | `monitor_workspace_list` | ✅ **EXPECTED** | +| 1 | 0.732962 | `monitor_workspace_list` | ✅ **EXPECTED** | | 2 | 0.601481 | `grafana_list` | ❌ | -| 3 | 0.580244 | `monitor_table_list` | ❌ | +| 3 | 0.579669 | `monitor_table_list` | ❌ | | 4 | 0.523782 | `monitor_workspace_log_query` | ❌ | | 5 | 0.522749 | `kusto_cluster_list` | ❌ | --- -## Test 347 +## Test 357 **Expected Tool:** `monitor_workspace_log_query` **Prompt:** Show me the logs for the past hour in the Log Analytics workspace @@ -6374,12 +6554,12 @@ | 1 | 0.610115 | `monitor_workspace_log_query` | ✅ **EXPECTED** | | 2 | 0.587614 | `monitor_resource_log_query` | ❌ | | 3 | 0.527733 | `monitor_activitylog_list` | ❌ | -| 4 | 0.498148 | `deploy_app_logs_get` | ❌ | -| 5 | 0.485982 | `monitor_table_list` | ❌ | +| 4 | 0.498269 | `deploy_app_logs_get` | ❌ | +| 5 | 0.485470 | `monitor_table_list` | ❌ | --- -## Test 348 +## Test 358 **Expected Tool:** `datadog_monitoredresources_list` **Prompt:** List all monitored resources in the Datadog resource @@ -6388,7 +6568,7 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.668828 | `datadog_monitoredresources_list` | ✅ **EXPECTED** | +| 1 | 0.668827 | `datadog_monitoredresources_list` | ✅ **EXPECTED** | | 2 | 0.454270 | `redis_list` | ❌ | | 3 | 0.413661 | `loadtesting_testresource_list` | ❌ | | 4 | 0.413173 | `monitor_metrics_query` | ❌ | @@ -6396,7 +6576,7 @@ --- -## Test 349 +## Test 359 **Expected Tool:** `datadog_monitoredresources_list` **Prompt:** Show me the monitored resources in the Datadog resource @@ -6413,7 +6593,7 @@ --- -## Test 350 +## Test 360 **Expected Tool:** `extension_azqr` **Prompt:** Check my Azure subscription for any compliance issues or recommendations @@ -6422,15 +6602,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.533403 | `quota_usage_check` | ❌ | +| 1 | 0.533164 | `quota_usage_check` | ❌ | | 2 | 0.481143 | `azureterraformbestpractices_get` | ❌ | | 3 | 0.476826 | `extension_azqr` | ✅ **EXPECTED** | -| 4 | 0.471547 | `subscription_list` | ❌ | +| 4 | 0.471499 | `subscription_list` | ❌ | | 5 | 0.468404 | `applens_resource_diagnose` | ❌ | --- -## Test 351 +## Test 361 **Expected Tool:** `extension_azqr` **Prompt:** Provide compliance recommendations for my current Azure subscription @@ -6443,11 +6623,11 @@ | 2 | 0.492863 | `get_bestpractices_get` | ❌ | | 3 | 0.476164 | `applicationinsights_recommendation_list` | ❌ | | 4 | 0.473365 | `deploy_iac_rules_get` | ❌ | -| 5 | 0.468491 | `azureaibestpractices_get` | ❌ | +| 5 | 0.464954 | `cloudarchitect_design` | ❌ | --- -## Test 352 +## Test 362 **Expected Tool:** `extension_azqr` **Prompt:** Scan my Azure subscription for compliance recommendations @@ -6456,15 +6636,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.536917 | `azureterraformbestpractices_get` | ❌ | -| 2 | 0.516910 | `extension_azqr` | ✅ **EXPECTED** | -| 3 | 0.514947 | `applicationinsights_recommendation_list` | ❌ | -| 4 | 0.504918 | `quota_usage_check` | ❌ | -| 5 | 0.494808 | `deploy_plan_get` | ❌ | +| 1 | 0.536934 | `azureterraformbestpractices_get` | ❌ | +| 2 | 0.516925 | `extension_azqr` | ✅ **EXPECTED** | +| 3 | 0.514978 | `applicationinsights_recommendation_list` | ❌ | +| 4 | 0.504673 | `quota_usage_check` | ❌ | +| 5 | 0.494872 | `deploy_plan_get` | ❌ | --- -## Test 353 +## Test 363 **Expected Tool:** `quota_region_availability_list` **Prompt:** Show me the available regions for these resource types @@ -6473,15 +6653,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.590878 | `quota_region_availability_list` | ✅ **EXPECTED** | -| 2 | 0.413662 | `quota_usage_check` | ❌ | +| 1 | 0.590950 | `quota_region_availability_list` | ✅ **EXPECTED** | +| 2 | 0.413274 | `quota_usage_check` | ❌ | | 3 | 0.391332 | `redis_list` | ❌ | | 4 | 0.372940 | `resourcehealth_availability-status_list` | ❌ | -| 5 | 0.369915 | `managedlustre_fs_sku_get` | ❌ | +| 5 | 0.369855 | `managedlustre_fs_sku_get` | ❌ | --- -## Test 354 +## Test 364 **Expected Tool:** `quota_usage_check` **Prompt:** Check usage information for in region @@ -6490,15 +6670,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.609711 | `quota_usage_check` | ✅ **EXPECTED** | -| 2 | 0.491058 | `quota_region_availability_list` | ❌ | -| 3 | 0.384350 | `resourcehealth_availability-status_list` | ❌ | -| 4 | 0.376819 | `resourcehealth_availability-status_get` | ❌ | -| 5 | 0.371407 | `redis_list` | ❌ | +| 1 | 0.609378 | `quota_usage_check` | ✅ **EXPECTED** | +| 2 | 0.491300 | `quota_region_availability_list` | ❌ | +| 3 | 0.384035 | `resourcehealth_availability-status_list` | ❌ | +| 4 | 0.376096 | `resourcehealth_availability-status_get` | ❌ | +| 5 | 0.371392 | `redis_list` | ❌ | --- -## Test 355 +## Test 365 **Expected Tool:** `role_assignment_list` **Prompt:** List all available role assignments in my subscription @@ -6507,15 +6687,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.645258 | `role_assignment_list` | ✅ **EXPECTED** | -| 2 | 0.539757 | `subscription_list` | ❌ | -| 3 | 0.483988 | `group_list` | ❌ | +| 1 | 0.645259 | `role_assignment_list` | ✅ **EXPECTED** | +| 2 | 0.539761 | `subscription_list` | ❌ | +| 3 | 0.484047 | `group_list` | ❌ | | 4 | 0.478700 | `grafana_list` | ❌ | | 5 | 0.471364 | `cosmos_account_list` | ❌ | --- -## Test 356 +## Test 366 **Expected Tool:** `role_assignment_list` **Prompt:** Show me the available role assignments in my subscription @@ -6524,15 +6704,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.609704 | `role_assignment_list` | ✅ **EXPECTED** | -| 2 | 0.514697 | `subscription_list` | ❌ | +| 1 | 0.609705 | `role_assignment_list` | ✅ **EXPECTED** | +| 2 | 0.514696 | `subscription_list` | ❌ | | 3 | 0.456956 | `grafana_list` | ❌ | -| 4 | 0.449753 | `eventgrid_subscription_list` | ❌ | +| 4 | 0.449210 | `eventgrid_subscription_list` | ❌ | | 5 | 0.445149 | `redis_list` | ❌ | --- -## Test 357 +## Test 367 **Expected Tool:** `redis_list` **Prompt:** List all Redis resources in my subscription @@ -6544,12 +6724,12 @@ | 1 | 0.810504 | `redis_list` | ✅ **EXPECTED** | | 2 | 0.587836 | `grafana_list` | ❌ | | 3 | 0.512954 | `kusto_cluster_list` | ❌ | -| 4 | 0.508532 | `datadog_monitoredresources_list` | ❌ | +| 4 | 0.508531 | `datadog_monitoredresources_list` | ❌ | | 5 | 0.501218 | `postgres_server_list` | ❌ | --- -## Test 358 +## Test 368 **Expected Tool:** `redis_list` **Prompt:** Show me my Redis resources @@ -6559,14 +6739,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.685128 | `redis_list` | ✅ **EXPECTED** | -| 2 | 0.374327 | `grafana_list` | ❌ | +| 2 | 0.374328 | `grafana_list` | ❌ | | 3 | 0.364197 | `datadog_monitoredresources_list` | ❌ | -| 4 | 0.359659 | `mysql_server_list` | ❌ | -| 5 | 0.331502 | `mysql_database_list` | ❌ | +| 4 | 0.359774 | `mysql_server_list` | ❌ | +| 5 | 0.331841 | `mysql_database_list` | ❌ | --- -## Test 359 +## Test 369 **Expected Tool:** `redis_list` **Prompt:** Show me the Redis resources in my subscription @@ -6583,7 +6763,7 @@ --- -## Test 360 +## Test 370 **Expected Tool:** `redis_list` **Prompt:** Show me my Redis caches @@ -6593,14 +6773,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.572767 | `redis_list` | ✅ **EXPECTED** | -| 2 | 0.316630 | `mysql_database_list` | ❌ | +| 2 | 0.316869 | `mysql_database_list` | ❌ | | 3 | 0.301786 | `postgres_database_list` | ❌ | -| 4 | 0.286513 | `mysql_server_list` | ❌ | +| 4 | 0.286679 | `mysql_server_list` | ❌ | | 5 | 0.273014 | `kusto_cluster_list` | ❌ | --- -## Test 361 +## Test 371 **Expected Tool:** `redis_list` **Prompt:** Get Redis clusters @@ -6610,14 +6790,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.478070 | `redis_list` | ✅ **EXPECTED** | -| 2 | 0.456308 | `kusto_cluster_list` | ❌ | +| 2 | 0.456309 | `kusto_cluster_list` | ❌ | | 3 | 0.384630 | `kusto_cluster_get` | ❌ | -| 4 | 0.359935 | `kusto_database_list` | ❌ | +| 4 | 0.359573 | `kusto_database_list` | ❌ | | 5 | 0.343305 | `aks_cluster_get` | ❌ | --- -## Test 362 +## Test 372 **Expected Tool:** `group_list` **Prompt:** List all resource groups in my subscription @@ -6626,15 +6806,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.755935 | `group_list` | ✅ **EXPECTED** | +| 1 | 0.755933 | `group_list` | ✅ **EXPECTED** | | 2 | 0.566552 | `workbooks_list` | ❌ | | 3 | 0.564566 | `loadtesting_testresource_list` | ❌ | | 4 | 0.552633 | `datadog_monitoredresources_list` | ❌ | -| 5 | 0.549477 | `monitor_webtests_list` | ❌ | +| 5 | 0.549537 | `monitor_webtests_list` | ❌ | --- -## Test 363 +## Test 373 **Expected Tool:** `group_list` **Prompt:** Show me my resource groups @@ -6643,15 +6823,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.529504 | `group_list` | ✅ **EXPECTED** | +| 1 | 0.529503 | `group_list` | ✅ **EXPECTED** | | 2 | 0.464690 | `redis_list` | ❌ | | 3 | 0.463685 | `datadog_monitoredresources_list` | ❌ | -| 4 | 0.462391 | `mysql_server_list` | ❌ | +| 4 | 0.462699 | `mysql_server_list` | ❌ | | 5 | 0.460280 | `loadtesting_testresource_list` | ❌ | --- -## Test 364 +## Test 374 **Expected Tool:** `group_list` **Prompt:** Show me the resource groups in my subscription @@ -6660,15 +6840,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.665772 | `group_list` | ✅ **EXPECTED** | +| 1 | 0.665819 | `group_list` | ✅ **EXPECTED** | | 2 | 0.532656 | `datadog_monitoredresources_list` | ❌ | | 3 | 0.532505 | `redis_list` | ❌ | -| 4 | 0.532015 | `eventgrid_topic_list` | ❌ | +| 4 | 0.532054 | `eventgrid_topic_list` | ❌ | | 5 | 0.531920 | `resourcehealth_availability-status_list` | ❌ | --- -## Test 365 +## Test 375 **Expected Tool:** `resourcehealth_availability-status_get` **Prompt:** Get the availability status for resource @@ -6677,15 +6857,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.556926 | `resourcehealth_availability-status_get` | ✅ **EXPECTED** | +| 1 | 0.556629 | `resourcehealth_availability-status_get` | ✅ **EXPECTED** | | 2 | 0.538273 | `resourcehealth_availability-status_list` | ❌ | -| 3 | 0.378030 | `quota_usage_check` | ❌ | +| 3 | 0.377586 | `quota_usage_check` | ❌ | | 4 | 0.373112 | `monitor_healthmodels_entity_get` | ❌ | -| 5 | 0.349981 | `datadog_monitoredresources_list` | ❌ | +| 5 | 0.349980 | `datadog_monitoredresources_list` | ❌ | --- -## Test 366 +## Test 376 **Expected Tool:** `resourcehealth_availability-status_get` **Prompt:** Show me the health status of the storage account @@ -6695,14 +6875,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.576591 | `storage_account_get` | ❌ | -| 2 | 0.564706 | `resourcehealth_availability-status_get` | ✅ **EXPECTED** | -| 3 | 0.555636 | `storage_blob_container_get` | ❌ | +| 2 | 0.564128 | `resourcehealth_availability-status_get` | ✅ **EXPECTED** | +| 3 | 0.556167 | `storage_blob_container_get` | ❌ | | 4 | 0.487207 | `storage_blob_get` | ❌ | | 5 | 0.466885 | `resourcehealth_availability-status_list` | ❌ | --- -## Test 367 +## Test 377 **Expected Tool:** `resourcehealth_availability-status_get` **Prompt:** What is the availability status of virtual machine in resource group ? @@ -6712,14 +6892,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.577398 | `resourcehealth_availability-status_list` | ❌ | -| 2 | 0.502794 | `resourcehealth_availability-status_get` | ✅ **EXPECTED** | -| 3 | 0.424939 | `mysql_server_list` | ❌ | +| 2 | 0.501568 | `resourcehealth_availability-status_get` | ✅ **EXPECTED** | +| 3 | 0.425180 | `mysql_server_list` | ❌ | | 4 | 0.412025 | `loadtesting_testresource_list` | ❌ | | 5 | 0.393479 | `managedlustre_fs_list` | ❌ | --- -## Test 368 +## Test 378 **Expected Tool:** `resourcehealth_availability-status_list` **Prompt:** List availability status for all resources in my subscription @@ -6732,11 +6912,11 @@ | 2 | 0.585501 | `redis_list` | ❌ | | 3 | 0.549914 | `loadtesting_testresource_list` | ❌ | | 4 | 0.548549 | `grafana_list` | ❌ | -| 5 | 0.544514 | `subscription_list` | ❌ | +| 5 | 0.544505 | `subscription_list` | ❌ | --- -## Test 369 +## Test 379 **Expected Tool:** `resourcehealth_availability-status_list` **Prompt:** Show me the health status of all my Azure resources @@ -6746,14 +6926,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.644982 | `resourcehealth_availability-status_list` | ✅ **EXPECTED** | -| 2 | 0.544917 | `resourcehealth_availability-status_get` | ❌ | +| 2 | 0.545208 | `resourcehealth_availability-status_get` | ❌ | | 3 | 0.509740 | `resourcehealth_health-events_list` | ❌ | -| 4 | 0.508766 | `quota_usage_check` | ❌ | +| 4 | 0.508252 | `quota_usage_check` | ❌ | | 5 | 0.505776 | `redis_list` | ❌ | --- -## Test 370 +## Test 380 **Expected Tool:** `resourcehealth_availability-status_list` **Prompt:** What resources in resource group have health issues? @@ -6763,14 +6943,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.596890 | `resourcehealth_availability-status_list` | ✅ **EXPECTED** | -| 2 | 0.550812 | `resourcehealth_availability-status_get` | ❌ | +| 2 | 0.549900 | `resourcehealth_availability-status_get` | ❌ | | 3 | 0.496640 | `resourcehealth_health-events_list` | ❌ | | 4 | 0.441921 | `applens_resource_diagnose` | ❌ | | 5 | 0.433614 | `loadtesting_testresource_list` | ❌ | --- -## Test 371 +## Test 381 **Expected Tool:** `resourcehealth_health-events_list` **Prompt:** List all service health events in my subscription @@ -6779,15 +6959,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.690720 | `resourcehealth_health-events_list` | ✅ **EXPECTED** | -| 2 | 0.553485 | `search_service_list` | ❌ | -| 3 | 0.534169 | `eventgrid_topic_list` | ❌ | -| 4 | 0.529200 | `eventgrid_subscription_list` | ❌ | +| 1 | 0.690719 | `resourcehealth_health-events_list` | ✅ **EXPECTED** | +| 2 | 0.554895 | `search_service_list` | ❌ | +| 3 | 0.534250 | `eventgrid_topic_list` | ❌ | +| 4 | 0.529761 | `eventgrid_subscription_list` | ❌ | | 5 | 0.518372 | `resourcehealth_availability-status_list` | ❌ | --- -## Test 372 +## Test 382 **Expected Tool:** `resourcehealth_health-events_list` **Prompt:** Show me Azure service health events for subscription @@ -6797,14 +6977,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.686448 | `resourcehealth_health-events_list` | ✅ **EXPECTED** | -| 2 | 0.534707 | `eventgrid_subscription_list` | ❌ | -| 3 | 0.513302 | `search_service_list` | ❌ | -| 4 | 0.513237 | `eventgrid_topic_list` | ❌ | -| 5 | 0.501121 | `subscription_list` | ❌ | +| 2 | 0.534556 | `eventgrid_subscription_list` | ❌ | +| 3 | 0.513815 | `search_service_list` | ❌ | +| 4 | 0.513259 | `eventgrid_topic_list` | ❌ | +| 5 | 0.501135 | `subscription_list` | ❌ | --- -## Test 373 +## Test 383 **Expected Tool:** `resourcehealth_health-events_list` **Prompt:** What service issues have occurred in the last 30 days? @@ -6815,13 +6995,13 @@ |------|-------|------|--------| | 1 | 0.450841 | `resourcehealth_health-events_list` | ✅ **EXPECTED** | | 2 | 0.267663 | `applens_resource_diagnose` | ❌ | -| 3 | 0.245720 | `cloudarchitect_design` | ❌ | +| 3 | 0.245709 | `cloudarchitect_design` | ❌ | | 4 | 0.216847 | `resourcehealth_availability-status_list` | ❌ | -| 5 | 0.211043 | `search_service_list` | ❌ | +| 5 | 0.211842 | `search_service_list` | ❌ | --- -## Test 374 +## Test 384 **Expected Tool:** `resourcehealth_health-events_list` **Prompt:** List active service health events in my subscription @@ -6831,14 +7011,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.685391 | `resourcehealth_health-events_list` | ✅ **EXPECTED** | -| 2 | 0.527255 | `eventgrid_subscription_list` | ❌ | -| 3 | 0.523975 | `eventgrid_topic_list` | ❌ | -| 4 | 0.518668 | `search_service_list` | ❌ | +| 2 | 0.527905 | `eventgrid_subscription_list` | ❌ | +| 3 | 0.524063 | `eventgrid_topic_list` | ❌ | +| 4 | 0.520197 | `search_service_list` | ❌ | | 5 | 0.502064 | `resourcehealth_availability-status_list` | ❌ | --- -## Test 375 +## Test 385 **Expected Tool:** `resourcehealth_health-events_list` **Prompt:** Show me planned maintenance events for my Azure services @@ -6848,14 +7028,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.565851 | `resourcehealth_health-events_list` | ✅ **EXPECTED** | -| 2 | 0.436322 | `search_service_list` | ❌ | -| 3 | 0.404191 | `eventgrid_subscription_list` | ❌ | +| 2 | 0.437868 | `search_service_list` | ❌ | +| 3 | 0.403665 | `eventgrid_subscription_list` | ❌ | | 4 | 0.402493 | `resourcehealth_availability-status_list` | ❌ | -| 5 | 0.398050 | `quota_usage_check` | ❌ | +| 5 | 0.397735 | `quota_usage_check` | ❌ | --- -## Test 376 +## Test 386 **Expected Tool:** `servicebus_queue_details` **Prompt:** Show me the details of service bus queue @@ -6864,15 +7044,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.642876 | `servicebus_queue_details` | ✅ **EXPECTED** | -| 2 | 0.460932 | `servicebus_topic_subscription_details` | ❌ | -| 3 | 0.437000 | `servicebus_topic_details` | ❌ | -| 4 | 0.385812 | `search_knowledge_base_get` | ❌ | -| 5 | 0.384139 | `storage_account_get` | ❌ | +| 1 | 0.642896 | `servicebus_queue_details` | ✅ **EXPECTED** | +| 2 | 0.460952 | `servicebus_topic_subscription_details` | ❌ | +| 3 | 0.436934 | `servicebus_topic_details` | ❌ | +| 4 | 0.385791 | `search_knowledge_base_get` | ❌ | +| 5 | 0.384199 | `storage_account_get` | ❌ | --- -## Test 377 +## Test 387 **Expected Tool:** `servicebus_topic_details` **Prompt:** Show me the details of service bus topic @@ -6881,15 +7061,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.642762 | `servicebus_topic_details` | ✅ **EXPECTED** | -| 2 | 0.571860 | `servicebus_topic_subscription_details` | ❌ | +| 1 | 0.642952 | `servicebus_topic_details` | ✅ **EXPECTED** | +| 2 | 0.571861 | `servicebus_topic_subscription_details` | ❌ | | 3 | 0.483976 | `servicebus_queue_details` | ❌ | -| 4 | 0.482735 | `eventgrid_topic_list` | ❌ | -| 5 | 0.457603 | `eventgrid_subscription_list` | ❌ | +| 4 | 0.482958 | `eventgrid_topic_list` | ❌ | +| 5 | 0.458711 | `eventgrid_subscription_list` | ❌ | --- -## Test 378 +## Test 388 **Expected Tool:** `servicebus_topic_subscription_details` **Prompt:** Show me the details of service bus subscription @@ -6899,14 +7079,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.633187 | `servicebus_topic_subscription_details` | ✅ **EXPECTED** | -| 2 | 0.517516 | `servicebus_topic_details` | ❌ | +| 2 | 0.517623 | `servicebus_topic_details` | ❌ | | 3 | 0.494515 | `servicebus_queue_details` | ❌ | -| 4 | 0.493776 | `eventgrid_topic_list` | ❌ | -| 5 | 0.471876 | `eventgrid_subscription_list` | ❌ | +| 4 | 0.493853 | `eventgrid_topic_list` | ❌ | +| 5 | 0.472128 | `eventgrid_subscription_list` | ❌ | --- -## Test 379 +## Test 389 **Expected Tool:** `signalr_runtime_get` **Prompt:** Show me the details of SignalR @@ -6915,7 +7095,7 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.532742 | `signalr_runtime_get` | ✅ **EXPECTED** | +| 1 | 0.532544 | `signalr_runtime_get` | ✅ **EXPECTED** | | 2 | 0.355028 | `redis_list` | ❌ | | 3 | 0.329804 | `foundry_resource_get` | ❌ | | 4 | 0.319981 | `sql_server_show` | ❌ | @@ -6923,7 +7103,7 @@ --- -## Test 380 +## Test 390 **Expected Tool:** `signalr_runtime_get` **Prompt:** Show me the network information of SignalR runtime @@ -6932,15 +7112,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.573540 | `signalr_runtime_get` | ✅ **EXPECTED** | +| 1 | 0.573446 | `signalr_runtime_get` | ✅ **EXPECTED** | | 2 | 0.337342 | `sql_server_show` | ❌ | | 3 | 0.306559 | `foundry_resource_get` | ❌ | | 4 | 0.305021 | `redis_list` | ❌ | -| 5 | 0.301114 | `servicebus_topic_details` | ❌ | +| 5 | 0.300956 | `servicebus_topic_details` | ❌ | --- -## Test 381 +## Test 391 **Expected Tool:** `signalr_runtime_get` **Prompt:** Describe the SignalR runtime in resource group @@ -6949,15 +7129,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.710281 | `signalr_runtime_get` | ✅ **EXPECTED** | +| 1 | 0.710353 | `signalr_runtime_get` | ✅ **EXPECTED** | | 2 | 0.411396 | `loadtesting_testresource_list` | ❌ | | 3 | 0.410606 | `foundry_resource_get` | ❌ | | 4 | 0.399412 | `resourcehealth_availability-status_list` | ❌ | -| 5 | 0.382028 | `sql_server_list` | ❌ | +| 5 | 0.382099 | `sql_server_list` | ❌ | --- -## Test 382 +## Test 392 **Expected Tool:** `signalr_runtime_get` **Prompt:** Get information about my SignalR runtime in @@ -6966,15 +7146,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.715701 | `signalr_runtime_get` | ✅ **EXPECTED** | -| 2 | 0.458894 | `foundry_resource_get` | ❌ | -| 3 | 0.431212 | `resourcehealth_availability-status_list` | ❌ | -| 4 | 0.430721 | `loadtesting_testresource_list` | ❌ | -| 5 | 0.417313 | `functionapp_get` | ❌ | +| 1 | 0.715974 | `signalr_runtime_get` | ✅ **EXPECTED** | +| 2 | 0.459045 | `foundry_resource_get` | ❌ | +| 3 | 0.430829 | `resourcehealth_availability-status_list` | ❌ | +| 4 | 0.430765 | `loadtesting_testresource_list` | ❌ | +| 5 | 0.417032 | `functionapp_get` | ❌ | --- -## Test 383 +## Test 393 **Expected Tool:** `signalr_runtime_get` **Prompt:** Show all the SignalRs information in @@ -6983,15 +7163,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.563883 | `signalr_runtime_get` | ✅ **EXPECTED** | +| 1 | 0.564072 | `signalr_runtime_get` | ✅ **EXPECTED** | | 2 | 0.501077 | `redis_list` | ❌ | | 3 | 0.494478 | `resourcehealth_availability-status_list` | ❌ | | 4 | 0.481428 | `loadtesting_testresource_list` | ❌ | -| 5 | 0.462090 | `mysql_server_list` | ❌ | +| 5 | 0.462417 | `mysql_server_list` | ❌ | --- -## Test 384 +## Test 394 **Expected Tool:** `signalr_runtime_get` **Prompt:** List all SignalRs in my subscription @@ -7000,15 +7180,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.530514 | `signalr_runtime_get` | ✅ **EXPECTED** | -| 2 | 0.507654 | `postgres_server_list` | ❌ | +| 1 | 0.530646 | `signalr_runtime_get` | ✅ **EXPECTED** | +| 2 | 0.507653 | `postgres_server_list` | ❌ | | 3 | 0.495157 | `redis_list` | ❌ | | 4 | 0.494498 | `kusto_cluster_list` | ❌ | -| 5 | 0.487906 | `subscription_list` | ❌ | +| 5 | 0.487856 | `subscription_list` | ❌ | --- -## Test 385 +## Test 395 **Expected Tool:** `sql_db_create` **Prompt:** Create a new SQL database named in server @@ -7018,14 +7198,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.516780 | `sql_db_create` | ✅ **EXPECTED** | -| 2 | 0.470892 | `sql_server_create` | ❌ | -| 3 | 0.420389 | `sql_db_rename` | ❌ | +| 2 | 0.470913 | `sql_server_create` | ❌ | +| 3 | 0.420504 | `sql_db_rename` | ❌ | | 4 | 0.408515 | `sql_db_delete` | ❌ | | 5 | 0.404860 | `sql_server_delete` | ❌ | --- -## Test 386 +## Test 396 **Expected Tool:** `sql_db_create` **Prompt:** Create a SQL database with Basic tier in server @@ -7035,14 +7215,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.571760 | `sql_db_create` | ✅ **EXPECTED** | -| 2 | 0.459672 | `sql_server_create` | ❌ | -| 3 | 0.437525 | `sql_server_delete` | ❌ | +| 2 | 0.459683 | `sql_server_create` | ❌ | +| 3 | 0.437526 | `sql_server_delete` | ❌ | | 4 | 0.420843 | `sql_db_show` | ❌ | -| 5 | 0.417661 | `sql_db_delete` | ❌ | +| 5 | 0.417662 | `sql_db_delete` | ❌ | --- -## Test 387 +## Test 397 **Expected Tool:** `sql_db_create` **Prompt:** Create a new database called on SQL server in resource group @@ -7052,14 +7232,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.604472 | `sql_db_create` | ✅ **EXPECTED** | -| 2 | 0.545906 | `sql_server_create` | ❌ | -| 3 | 0.503938 | `sql_db_rename` | ❌ | +| 2 | 0.545986 | `sql_server_create` | ❌ | +| 3 | 0.504013 | `sql_db_rename` | ❌ | | 4 | 0.494377 | `sql_db_show` | ❌ | | 5 | 0.473975 | `sql_db_list` | ❌ | --- -## Test 388 +## Test 398 **Expected Tool:** `sql_db_delete` **Prompt:** Delete the SQL database from server @@ -7070,13 +7250,13 @@ |------|-------|------|--------| | 1 | 0.568196 | `sql_db_delete` | ✅ **EXPECTED** | | 2 | 0.567412 | `sql_server_delete` | ❌ | -| 3 | 0.391436 | `sql_db_rename` | ❌ | -| 4 | 0.386721 | `sql_server_firewall-rule_delete` | ❌ | +| 3 | 0.391509 | `sql_db_rename` | ❌ | +| 4 | 0.386564 | `sql_server_firewall-rule_delete` | ❌ | | 5 | 0.364776 | `sql_db_show` | ❌ | --- -## Test 389 +## Test 399 **Expected Tool:** `sql_db_delete` **Prompt:** Remove database from SQL server in resource group @@ -7088,12 +7268,12 @@ | 1 | 0.567513 | `sql_server_delete` | ❌ | | 2 | 0.543440 | `sql_db_delete` | ✅ **EXPECTED** | | 3 | 0.500756 | `sql_db_show` | ❌ | -| 4 | 0.481023 | `sql_db_rename` | ❌ | +| 4 | 0.481083 | `sql_db_rename` | ❌ | | 5 | 0.478729 | `sql_db_list` | ❌ | --- -## Test 390 +## Test 400 **Expected Tool:** `sql_db_delete` **Prompt:** Delete the database called on server @@ -7103,14 +7283,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.509916 | `sql_db_delete` | ✅ **EXPECTED** | -| 2 | 0.490893 | `sql_server_delete` | ❌ | +| 2 | 0.490892 | `sql_server_delete` | ❌ | | 3 | 0.364494 | `postgres_database_list` | ❌ | -| 4 | 0.355416 | `mysql_database_list` | ❌ | -| 5 | 0.347703 | `sql_db_rename` | ❌ | +| 4 | 0.354710 | `mysql_database_list` | ❌ | +| 5 | 0.347837 | `sql_db_rename` | ❌ | --- -## Test 391 +## Test 401 **Expected Tool:** `sql_db_list` **Prompt:** List all databases in the Azure SQL server @@ -7119,15 +7299,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.643138 | `sql_db_list` | ✅ **EXPECTED** | -| 2 | 0.639644 | `mysql_database_list` | ❌ | -| 3 | 0.609116 | `postgres_database_list` | ❌ | -| 4 | 0.602872 | `cosmos_database_list` | ❌ | -| 5 | 0.569464 | `kusto_database_list` | ❌ | +| 1 | 0.643186 | `sql_db_list` | ✅ **EXPECTED** | +| 2 | 0.639398 | `mysql_database_list` | ❌ | +| 3 | 0.609178 | `postgres_database_list` | ❌ | +| 4 | 0.602890 | `cosmos_database_list` | ❌ | +| 5 | 0.570278 | `kusto_database_list` | ❌ | --- -## Test 392 +## Test 402 **Expected Tool:** `sql_db_list` **Prompt:** Show me all the databases configuration details in the Azure SQL server @@ -7138,13 +7318,13 @@ |------|-------|------|--------| | 1 | 0.617746 | `sql_server_show` | ❌ | | 2 | 0.609322 | `sql_db_list` | ✅ **EXPECTED** | -| 3 | 0.557353 | `mysql_database_list` | ❌ | +| 3 | 0.557245 | `mysql_database_list` | ❌ | | 4 | 0.553488 | `mysql_server_config_get` | ❌ | | 5 | 0.524274 | `sql_db_show` | ❌ | --- -## Test 393 +## Test 403 **Expected Tool:** `sql_db_rename` **Prompt:** Rename the SQL database on server to @@ -7153,15 +7333,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.593251 | `sql_db_rename` | ✅ **EXPECTED** | -| 2 | 0.425282 | `sql_server_delete` | ❌ | -| 3 | 0.416207 | `sql_db_delete` | ❌ | -| 4 | 0.396947 | `sql_db_create` | ❌ | -| 5 | 0.346018 | `sql_db_show` | ❌ | +| 1 | 0.593278 | `sql_db_rename` | ✅ **EXPECTED** | +| 2 | 0.425161 | `sql_server_delete` | ❌ | +| 3 | 0.416057 | `sql_db_delete` | ❌ | +| 4 | 0.396824 | `sql_db_create` | ❌ | +| 5 | 0.345805 | `sql_db_show` | ❌ | --- -## Test 394 +## Test 404 **Expected Tool:** `sql_db_rename` **Prompt:** Rename my Azure SQL database to on server @@ -7170,15 +7350,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.711257 | `sql_db_rename` | ✅ **EXPECTED** | -| 2 | 0.516770 | `sql_server_delete` | ❌ | -| 3 | 0.506834 | `sql_db_delete` | ❌ | -| 4 | 0.501963 | `sql_db_create` | ❌ | -| 5 | 0.434094 | `sql_server_show` | ❌ | +| 1 | 0.711063 | `sql_db_rename` | ✅ **EXPECTED** | +| 2 | 0.516485 | `sql_server_delete` | ❌ | +| 3 | 0.506499 | `sql_db_delete` | ❌ | +| 4 | 0.501476 | `sql_db_create` | ❌ | +| 5 | 0.433897 | `sql_server_show` | ❌ | --- -## Test 395 +## Test 405 **Expected Tool:** `sql_db_show` **Prompt:** Get the configuration details for the SQL database on server @@ -7187,15 +7367,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.610991 | `sql_server_show` | ❌ | -| 2 | 0.593150 | `postgres_server_config_get` | ❌ | -| 3 | 0.530422 | `mysql_server_config_get` | ❌ | -| 4 | 0.528136 | `sql_db_show` | ✅ **EXPECTED** | -| 5 | 0.465693 | `sql_db_list` | ❌ | +| 1 | 0.610788 | `sql_server_show` | ❌ | +| 2 | 0.593239 | `postgres_server_config_get` | ❌ | +| 3 | 0.530655 | `mysql_server_config_get` | ❌ | +| 4 | 0.528543 | `sql_db_show` | ✅ **EXPECTED** | +| 5 | 0.465617 | `sql_db_list` | ❌ | --- -## Test 396 +## Test 406 **Expected Tool:** `sql_db_show` **Prompt:** Show me the details of SQL database in server @@ -7207,12 +7387,12 @@ | 1 | 0.530095 | `sql_db_show` | ✅ **EXPECTED** | | 2 | 0.503681 | `sql_server_show` | ❌ | | 3 | 0.440073 | `sql_db_list` | ❌ | -| 4 | 0.439076 | `mysql_table_schema_get` | ❌ | -| 5 | 0.432919 | `mysql_database_list` | ❌ | +| 4 | 0.438622 | `mysql_table_schema_get` | ❌ | +| 5 | 0.432990 | `mysql_database_list` | ❌ | --- -## Test 397 +## Test 407 **Expected Tool:** `sql_db_update` **Prompt:** Update the performance tier of SQL database on server @@ -7221,15 +7401,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.603271 | `sql_db_update` | ✅ **EXPECTED** | +| 1 | 0.603366 | `sql_db_update` | ✅ **EXPECTED** | | 2 | 0.467571 | `sql_db_create` | ❌ | -| 3 | 0.440442 | `sql_db_rename` | ❌ | +| 3 | 0.440493 | `sql_db_rename` | ❌ | | 4 | 0.427621 | `sql_db_show` | ❌ | | 5 | 0.413941 | `sql_server_delete` | ❌ | --- -## Test 398 +## Test 408 **Expected Tool:** `sql_db_update` **Prompt:** Scale SQL database on server to use SKU @@ -7238,15 +7418,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.550449 | `sql_db_update` | ✅ **EXPECTED** | +| 1 | 0.550556 | `sql_db_update` | ✅ **EXPECTED** | | 2 | 0.418358 | `sql_server_delete` | ❌ | | 3 | 0.401817 | `sql_db_list` | ❌ | -| 4 | 0.395508 | `sql_db_rename` | ❌ | +| 4 | 0.395518 | `sql_db_rename` | ❌ | | 5 | 0.394770 | `sql_db_show` | ❌ | --- -## Test 399 +## Test 409 **Expected Tool:** `sql_elastic-pool_list` **Prompt:** List all elastic pools in SQL server @@ -7257,13 +7437,13 @@ |------|-------|------|--------| | 1 | 0.678124 | `sql_elastic-pool_list` | ✅ **EXPECTED** | | 2 | 0.502376 | `sql_db_list` | ❌ | -| 3 | 0.498367 | `mysql_database_list` | ❌ | -| 4 | 0.485249 | `aks_nodepool_get` | ❌ | +| 3 | 0.498208 | `mysql_database_list` | ❌ | +| 4 | 0.485167 | `aks_nodepool_get` | ❌ | | 5 | 0.479044 | `sql_server_show` | ❌ | --- -## Test 400 +## Test 410 **Expected Tool:** `sql_elastic-pool_list` **Prompt:** Show me the elastic pools configured for SQL server @@ -7274,13 +7454,13 @@ |------|-------|------|--------| | 1 | 0.606425 | `sql_elastic-pool_list` | ✅ **EXPECTED** | | 2 | 0.502877 | `sql_server_show` | ❌ | -| 3 | 0.457164 | `sql_db_list` | ❌ | -| 4 | 0.450743 | `aks_nodepool_get` | ❌ | -| 5 | 0.432816 | `mysql_database_list` | ❌ | +| 3 | 0.457163 | `sql_db_list` | ❌ | +| 4 | 0.450655 | `aks_nodepool_get` | ❌ | +| 5 | 0.432815 | `mysql_database_list` | ❌ | --- -## Test 401 +## Test 411 **Expected Tool:** `sql_elastic-pool_list` **Prompt:** What elastic pools are available in my SQL server ? @@ -7290,14 +7470,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.592709 | `sql_elastic-pool_list` | ✅ **EXPECTED** | -| 2 | 0.420325 | `mysql_database_list` | ❌ | -| 3 | 0.407169 | `aks_nodepool_get` | ❌ | -| 4 | 0.402616 | `mysql_server_list` | ❌ | +| 2 | 0.420481 | `mysql_database_list` | ❌ | +| 3 | 0.407084 | `aks_nodepool_get` | ❌ | +| 4 | 0.402474 | `mysql_server_list` | ❌ | | 5 | 0.397670 | `sql_db_list` | ❌ | --- -## Test 402 +## Test 412 **Expected Tool:** `sql_server_create` **Prompt:** Create a new Azure SQL server named in resource group @@ -7306,15 +7486,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.682605 | `sql_server_create` | ✅ **EXPECTED** | -| 2 | 0.563707 | `sql_db_create` | ❌ | -| 3 | 0.529198 | `sql_server_list` | ❌ | +| 1 | 0.682736 | `sql_server_create` | ✅ **EXPECTED** | +| 2 | 0.563708 | `sql_db_create` | ❌ | +| 3 | 0.529372 | `sql_server_list` | ❌ | | 4 | 0.482102 | `storage_account_create` | ❌ | -| 5 | 0.474180 | `sql_db_rename` | ❌ | +| 5 | 0.474207 | `sql_db_rename` | ❌ | --- -## Test 403 +## Test 413 **Expected Tool:** `sql_server_create` **Prompt:** Create an Azure SQL server with name in location with admin user @@ -7323,15 +7503,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.618354 | `sql_server_create` | ✅ **EXPECTED** | -| 2 | 0.510222 | `sql_db_create` | ❌ | -| 3 | 0.472462 | `sql_server_show` | ❌ | -| 4 | 0.441267 | `sql_server_delete` | ❌ | -| 5 | 0.400941 | `sql_db_rename` | ❌ | +| 1 | 0.618390 | `sql_server_create` | ✅ **EXPECTED** | +| 2 | 0.510169 | `sql_db_create` | ❌ | +| 3 | 0.472463 | `sql_server_show` | ❌ | +| 4 | 0.441174 | `sql_server_delete` | ❌ | +| 5 | 0.400939 | `sql_db_rename` | ❌ | --- -## Test 404 +## Test 414 **Expected Tool:** `sql_server_create` **Prompt:** Set up a new SQL server called in my resource group @@ -7340,15 +7520,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.589818 | `sql_server_create` | ✅ **EXPECTED** | +| 1 | 0.589934 | `sql_server_create` | ✅ **EXPECTED** | | 2 | 0.501403 | `sql_db_create` | ❌ | -| 3 | 0.497890 | `sql_server_list` | ❌ | -| 4 | 0.461147 | `sql_db_rename` | ❌ | -| 5 | 0.442934 | `mysql_server_list` | ❌ | +| 3 | 0.498302 | `sql_server_list` | ❌ | +| 4 | 0.461181 | `sql_db_rename` | ❌ | +| 5 | 0.442943 | `mysql_server_list` | ❌ | --- -## Test 405 +## Test 415 **Expected Tool:** `sql_server_delete` **Prompt:** Delete the Azure SQL server from resource group @@ -7359,13 +7539,13 @@ |------|-------|------|--------| | 1 | 0.656593 | `sql_server_delete` | ✅ **EXPECTED** | | 2 | 0.548064 | `sql_db_delete` | ❌ | -| 3 | 0.518037 | `sql_server_list` | ❌ | -| 4 | 0.495550 | `sql_server_create` | ❌ | +| 3 | 0.518178 | `sql_server_list` | ❌ | +| 4 | 0.495640 | `sql_server_create` | ❌ | | 5 | 0.483132 | `workbooks_delete` | ❌ | --- -## Test 406 +## Test 416 **Expected Tool:** `sql_server_delete` **Prompt:** Remove the SQL server from my subscription @@ -7378,11 +7558,11 @@ | 2 | 0.393885 | `postgres_server_list` | ❌ | | 3 | 0.379760 | `sql_db_delete` | ❌ | | 4 | 0.376660 | `sql_server_show` | ❌ | -| 5 | 0.350103 | `sql_server_list` | ❌ | +| 5 | 0.350228 | `sql_server_list` | ❌ | --- -## Test 407 +## Test 417 **Expected Tool:** `sql_server_delete` **Prompt:** Delete SQL server permanently @@ -7393,13 +7573,13 @@ |------|-------|------|--------| | 1 | 0.624310 | `sql_server_delete` | ✅ **EXPECTED** | | 2 | 0.454892 | `sql_db_delete` | ❌ | -| 3 | 0.362561 | `sql_server_firewall-rule_delete` | ❌ | +| 3 | 0.362389 | `sql_server_firewall-rule_delete` | ❌ | | 4 | 0.341503 | `sql_server_show` | ❌ | | 5 | 0.318758 | `eventhubs_eventhub_delete` | ❌ | --- -## Test 408 +## Test 418 **Expected Tool:** `sql_server_entra-admin_list` **Prompt:** List Microsoft Entra ID administrators for SQL server @@ -7410,13 +7590,13 @@ |------|-------|------|--------| | 1 | 0.783479 | `sql_server_entra-admin_list` | ✅ **EXPECTED** | | 2 | 0.456051 | `sql_server_show` | ❌ | -| 3 | 0.434868 | `sql_server_list` | ❌ | -| 4 | 0.401854 | `sql_server_firewall-rule_list` | ❌ | +| 3 | 0.434815 | `sql_server_list` | ❌ | +| 4 | 0.401908 | `sql_server_firewall-rule_list` | ❌ | | 5 | 0.376055 | `sql_db_list` | ❌ | --- -## Test 409 +## Test 419 **Expected Tool:** `sql_server_entra-admin_list` **Prompt:** Show me the Entra ID administrators configured for SQL server @@ -7427,13 +7607,13 @@ |------|-------|------|--------| | 1 | 0.713306 | `sql_server_entra-admin_list` | ✅ **EXPECTED** | | 2 | 0.413144 | `sql_server_show` | ❌ | -| 3 | 0.368082 | `sql_server_list` | ❌ | +| 3 | 0.368036 | `sql_server_list` | ❌ | | 4 | 0.315966 | `sql_db_list` | ❌ | | 5 | 0.311085 | `postgres_server_list` | ❌ | --- -## Test 410 +## Test 420 **Expected Tool:** `sql_server_entra-admin_list` **Prompt:** What Microsoft Entra ID administrators are set up for my SQL server ? @@ -7444,13 +7624,13 @@ |------|-------|------|--------| | 1 | 0.646419 | `sql_server_entra-admin_list` | ✅ **EXPECTED** | | 2 | 0.356025 | `sql_server_show` | ❌ | -| 3 | 0.322155 | `sql_server_list` | ❌ | -| 4 | 0.307823 | `sql_server_create` | ❌ | +| 3 | 0.322358 | `sql_server_list` | ❌ | +| 4 | 0.307885 | `sql_server_create` | ❌ | | 5 | 0.269788 | `sql_server_delete` | ❌ | --- -## Test 411 +## Test 421 **Expected Tool:** `sql_server_firewall-rule_create` **Prompt:** Create a firewall rule for my Azure SQL server @@ -7459,15 +7639,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.635467 | `sql_server_firewall-rule_create` | ✅ **EXPECTED** | -| 2 | 0.532658 | `sql_server_firewall-rule_list` | ❌ | -| 3 | 0.522133 | `sql_server_firewall-rule_delete` | ❌ | -| 4 | 0.448822 | `sql_server_create` | ❌ | +| 1 | 0.635466 | `sql_server_firewall-rule_create` | ✅ **EXPECTED** | +| 2 | 0.532712 | `sql_server_firewall-rule_list` | ❌ | +| 3 | 0.522184 | `sql_server_firewall-rule_delete` | ❌ | +| 4 | 0.448938 | `sql_server_create` | ❌ | | 5 | 0.440845 | `sql_server_delete` | ❌ | --- -## Test 412 +## Test 422 **Expected Tool:** `sql_server_firewall-rule_create` **Prompt:** Add a firewall rule to allow access from IP range to for SQL server @@ -7476,15 +7656,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.670392 | `sql_server_firewall-rule_create` | ✅ **EXPECTED** | -| 2 | 0.533587 | `sql_server_firewall-rule_list` | ❌ | -| 3 | 0.503740 | `sql_server_firewall-rule_delete` | ❌ | -| 4 | 0.316700 | `sql_server_list` | ❌ | -| 5 | 0.302273 | `sql_server_delete` | ❌ | +| 1 | 0.670189 | `sql_server_firewall-rule_create` | ✅ **EXPECTED** | +| 2 | 0.533562 | `sql_server_firewall-rule_list` | ❌ | +| 3 | 0.503648 | `sql_server_firewall-rule_delete` | ❌ | +| 4 | 0.316596 | `sql_server_list` | ❌ | +| 5 | 0.302362 | `sql_server_delete` | ❌ | --- -## Test 413 +## Test 423 **Expected Tool:** `sql_server_firewall-rule_create` **Prompt:** Create a new firewall rule named for SQL server @@ -7493,15 +7673,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.685125 | `sql_server_firewall-rule_create` | ✅ **EXPECTED** | -| 2 | 0.574393 | `sql_server_firewall-rule_list` | ❌ | -| 3 | 0.539643 | `sql_server_firewall-rule_delete` | ❌ | -| 4 | 0.428987 | `sql_server_create` | ❌ | -| 5 | 0.395244 | `sql_db_create` | ❌ | +| 1 | 0.685107 | `sql_server_firewall-rule_create` | ✅ **EXPECTED** | +| 2 | 0.574336 | `sql_server_firewall-rule_list` | ❌ | +| 3 | 0.539577 | `sql_server_firewall-rule_delete` | ❌ | +| 4 | 0.428986 | `sql_server_create` | ❌ | +| 5 | 0.395165 | `sql_db_create` | ❌ | --- -## Test 414 +## Test 424 **Expected Tool:** `sql_server_firewall-rule_delete` **Prompt:** Delete a firewall rule from my Azure SQL server @@ -7510,15 +7690,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.691498 | `sql_server_firewall-rule_delete` | ✅ **EXPECTED** | +| 1 | 0.691421 | `sql_server_firewall-rule_delete` | ✅ **EXPECTED** | | 2 | 0.584379 | `sql_server_delete` | ❌ | -| 3 | 0.543780 | `sql_server_firewall-rule_list` | ❌ | +| 3 | 0.543857 | `sql_server_firewall-rule_list` | ❌ | | 4 | 0.540333 | `sql_server_firewall-rule_create` | ❌ | | 5 | 0.498444 | `sql_db_delete` | ❌ | --- -## Test 415 +## Test 425 **Expected Tool:** `sql_server_firewall-rule_delete` **Prompt:** Remove the firewall rule from SQL server @@ -7527,15 +7707,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.670233 | `sql_server_firewall-rule_delete` | ✅ **EXPECTED** | -| 2 | 0.574296 | `sql_server_firewall-rule_list` | ❌ | -| 3 | 0.530419 | `sql_server_firewall-rule_create` | ❌ | -| 4 | 0.488418 | `sql_server_delete` | ❌ | -| 5 | 0.360381 | `sql_db_delete` | ❌ | +| 1 | 0.670091 | `sql_server_firewall-rule_delete` | ✅ **EXPECTED** | +| 2 | 0.574319 | `sql_server_firewall-rule_list` | ❌ | +| 3 | 0.530412 | `sql_server_firewall-rule_create` | ❌ | +| 4 | 0.488400 | `sql_server_delete` | ❌ | +| 5 | 0.360385 | `sql_db_delete` | ❌ | --- -## Test 416 +## Test 426 **Expected Tool:** `sql_server_firewall-rule_delete` **Prompt:** Delete firewall rule for SQL server @@ -7544,15 +7724,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.671298 | `sql_server_firewall-rule_delete` | ✅ **EXPECTED** | -| 2 | 0.601174 | `sql_server_firewall-rule_list` | ❌ | +| 1 | 0.671212 | `sql_server_firewall-rule_delete` | ✅ **EXPECTED** | +| 2 | 0.601230 | `sql_server_firewall-rule_list` | ❌ | | 3 | 0.577330 | `sql_server_firewall-rule_create` | ❌ | | 4 | 0.499272 | `sql_server_delete` | ❌ | -| 5 | 0.378586 | `sql_db_delete` | ❌ | +| 5 | 0.378585 | `sql_db_delete` | ❌ | --- -## Test 417 +## Test 427 **Expected Tool:** `sql_server_firewall-rule_list` **Prompt:** List all firewall rules for SQL server @@ -7561,15 +7741,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.729336 | `sql_server_firewall-rule_list` | ✅ **EXPECTED** | +| 1 | 0.729372 | `sql_server_firewall-rule_list` | ✅ **EXPECTED** | | 2 | 0.549667 | `sql_server_firewall-rule_create` | ❌ | -| 3 | 0.513187 | `sql_server_firewall-rule_delete` | ❌ | +| 3 | 0.513114 | `sql_server_firewall-rule_delete` | ❌ | | 4 | 0.468812 | `sql_server_show` | ❌ | -| 5 | 0.418817 | `sql_server_list` | ❌ | +| 5 | 0.418681 | `sql_server_list` | ❌ | --- -## Test 418 +## Test 428 **Expected Tool:** `sql_server_firewall-rule_list` **Prompt:** Show me the firewall rules for SQL server @@ -7578,15 +7758,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.630671 | `sql_server_firewall-rule_list` | ✅ **EXPECTED** | +| 1 | 0.630731 | `sql_server_firewall-rule_list` | ✅ **EXPECTED** | | 2 | 0.524126 | `sql_server_firewall-rule_create` | ❌ | -| 3 | 0.476792 | `sql_server_firewall-rule_delete` | ❌ | +| 3 | 0.476757 | `sql_server_firewall-rule_delete` | ❌ | | 4 | 0.410680 | `sql_server_show` | ❌ | -| 5 | 0.348100 | `sql_server_list` | ❌ | +| 5 | 0.348049 | `sql_server_list` | ❌ | --- -## Test 419 +## Test 429 **Expected Tool:** `sql_server_firewall-rule_list` **Prompt:** What firewall rules are configured for my SQL server ? @@ -7595,15 +7775,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.630460 | `sql_server_firewall-rule_list` | ✅ **EXPECTED** | -| 2 | 0.532454 | `sql_server_firewall-rule_create` | ❌ | -| 3 | 0.473596 | `sql_server_firewall-rule_delete` | ❌ | -| 4 | 0.412957 | `sql_server_show` | ❌ | -| 5 | 0.350513 | `sql_server_list` | ❌ | +| 1 | 0.630555 | `sql_server_firewall-rule_list` | ✅ **EXPECTED** | +| 2 | 0.532484 | `sql_server_firewall-rule_create` | ❌ | +| 3 | 0.473499 | `sql_server_firewall-rule_delete` | ❌ | +| 4 | 0.412906 | `sql_server_show` | ❌ | +| 5 | 0.350385 | `sql_server_list` | ❌ | --- -## Test 420 +## Test 430 **Expected Tool:** `sql_server_list` **Prompt:** List all Azure SQL servers in resource group @@ -7612,15 +7792,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.694404 | `sql_server_list` | ✅ **EXPECTED** | -| 2 | 0.596686 | `mysql_server_list` | ❌ | -| 3 | 0.578238 | `sql_db_list` | ❌ | -| 4 | 0.515851 | `sql_elastic-pool_list` | ❌ | -| 5 | 0.509789 | `sql_db_show` | ❌ | +| 1 | 0.694271 | `sql_server_list` | ✅ **EXPECTED** | +| 2 | 0.596669 | `mysql_server_list` | ❌ | +| 3 | 0.578222 | `sql_db_list` | ❌ | +| 4 | 0.515894 | `sql_elastic-pool_list` | ❌ | +| 5 | 0.509835 | `sql_db_show` | ❌ | --- -## Test 421 +## Test 431 **Expected Tool:** `sql_server_list` **Prompt:** Show me every SQL server available in resource group @@ -7629,15 +7809,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.618218 | `sql_server_list` | ✅ **EXPECTED** | -| 2 | 0.593837 | `mysql_server_list` | ❌ | +| 1 | 0.618187 | `sql_server_list` | ✅ **EXPECTED** | +| 2 | 0.594043 | `mysql_server_list` | ❌ | | 3 | 0.542398 | `sql_db_list` | ❌ | | 4 | 0.507404 | `resourcehealth_availability-status_list` | ❌ | -| 5 | 0.496200 | `group_list` | ❌ | +| 5 | 0.496257 | `group_list` | ❌ | --- -## Test 422 +## Test 432 **Expected Tool:** `sql_server_show` **Prompt:** Show me the details of Azure SQL server in resource group @@ -7648,13 +7828,13 @@ |------|-------|------|--------| | 1 | 0.629672 | `sql_db_show` | ❌ | | 2 | 0.595184 | `sql_server_show` | ✅ **EXPECTED** | -| 3 | 0.587728 | `sql_server_list` | ❌ | -| 4 | 0.559893 | `mysql_server_list` | ❌ | +| 3 | 0.587768 | `sql_server_list` | ❌ | +| 4 | 0.560004 | `mysql_server_list` | ❌ | | 5 | 0.540218 | `sql_db_list` | ❌ | --- -## Test 423 +## Test 433 **Expected Tool:** `sql_server_show` **Prompt:** Get the configuration details for SQL server @@ -7667,11 +7847,11 @@ | 2 | 0.610507 | `postgres_server_config_get` | ❌ | | 3 | 0.538034 | `mysql_server_config_get` | ❌ | | 4 | 0.471541 | `sql_db_show` | ❌ | -| 5 | 0.445432 | `postgres_server_param_get` | ❌ | +| 5 | 0.445430 | `postgres_server_param_get` | ❌ | --- -## Test 424 +## Test 434 **Expected Tool:** `sql_server_show` **Prompt:** Display the properties of SQL server @@ -7682,13 +7862,13 @@ |------|-------|------|--------| | 1 | 0.563143 | `sql_server_show` | ✅ **EXPECTED** | | 2 | 0.392532 | `postgres_server_config_get` | ❌ | -| 3 | 0.380035 | `postgres_server_param_get` | ❌ | -| 4 | 0.372102 | `sql_server_firewall-rule_list` | ❌ | +| 3 | 0.380021 | `postgres_server_param_get` | ❌ | +| 4 | 0.372194 | `sql_server_firewall-rule_list` | ❌ | | 5 | 0.370539 | `sql_db_show` | ❌ | --- -## Test 425 +## Test 435 **Expected Tool:** `storage_account_create` **Prompt:** Create a new storage account called testaccount123 in East US region @@ -7700,12 +7880,12 @@ | 1 | 0.533552 | `storage_account_create` | ✅ **EXPECTED** | | 2 | 0.438046 | `storage_blob_container_create` | ❌ | | 3 | 0.418191 | `storage_account_get` | ❌ | -| 4 | 0.413950 | `storage_blob_container_get` | ❌ | -| 5 | 0.373651 | `managedlustre_fs_create` | ❌ | +| 4 | 0.414518 | `storage_blob_container_get` | ❌ | +| 5 | 0.370957 | `managedlustre_fs_create` | ❌ | --- -## Test 426 +## Test 436 **Expected Tool:** `storage_account_create` **Prompt:** Create a storage account with premium performance and LRS replication @@ -7715,14 +7895,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.500638 | `storage_account_create` | ✅ **EXPECTED** | -| 2 | 0.484584 | `managedlustre_fs_create` | ❌ | +| 2 | 0.483202 | `managedlustre_fs_create` | ❌ | | 3 | 0.407222 | `storage_account_get` | ❌ | | 4 | 0.406804 | `storage_blob_container_create` | ❌ | -| 5 | 0.400134 | `managedlustre_fs_sku_get` | ❌ | +| 5 | 0.400151 | `managedlustre_fs_sku_get` | ❌ | --- -## Test 427 +## Test 437 **Expected Tool:** `storage_account_create` **Prompt:** Create a new storage account with Data Lake Storage Gen2 enabled @@ -7731,15 +7911,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.589002 | `storage_account_create` | ✅ **EXPECTED** | -| 2 | 0.538023 | `managedlustre_fs_create` | ❌ | +| 1 | 0.589003 | `storage_account_create` | ✅ **EXPECTED** | +| 2 | 0.535501 | `managedlustre_fs_create` | ❌ | | 3 | 0.509731 | `storage_blob_container_create` | ❌ | | 4 | 0.462519 | `storage_account_get` | ❌ | | 5 | 0.447156 | `sql_db_create` | ❌ | --- -## Test 428 +## Test 438 **Expected Tool:** `storage_account_get` **Prompt:** Show me the details for my storage account @@ -7748,15 +7928,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.673750 | `storage_account_get` | ✅ **EXPECTED** | -| 2 | 0.607762 | `storage_blob_container_get` | ❌ | +| 1 | 0.673749 | `storage_account_get` | ✅ **EXPECTED** | +| 2 | 0.608256 | `storage_blob_container_get` | ❌ | | 3 | 0.556457 | `storage_blob_get` | ❌ | | 4 | 0.483435 | `storage_account_create` | ❌ | | 5 | 0.439236 | `cosmos_account_list` | ❌ | --- -## Test 429 +## Test 439 **Expected Tool:** `storage_account_get` **Prompt:** Get details about the storage account @@ -7766,14 +7946,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.692687 | `storage_account_get` | ✅ **EXPECTED** | -| 2 | 0.577173 | `storage_blob_container_get` | ❌ | +| 2 | 0.577547 | `storage_blob_container_get` | ❌ | | 3 | 0.529205 | `storage_blob_get` | ❌ | | 4 | 0.518215 | `storage_account_create` | ❌ | -| 5 | 0.448506 | `storage_blob_container_create` | ❌ | +| 5 | 0.448507 | `storage_blob_container_create` | ❌ | --- -## Test 430 +## Test 440 **Expected Tool:** `storage_account_get` **Prompt:** List all storage accounts in my subscription including their location and SKU @@ -7783,14 +7963,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.649215 | `storage_account_get` | ✅ **EXPECTED** | -| 2 | 0.557093 | `managedlustre_fs_sku_get` | ❌ | -| 3 | 0.549448 | `storage_blob_container_get` | ❌ | -| 4 | 0.547577 | `subscription_list` | ❌ | +| 2 | 0.557016 | `managedlustre_fs_sku_get` | ❌ | +| 3 | 0.550148 | `storage_blob_container_get` | ❌ | +| 4 | 0.547647 | `subscription_list` | ❌ | | 5 | 0.536909 | `cosmos_account_list` | ❌ | --- -## Test 431 +## Test 441 **Expected Tool:** `storage_account_get` **Prompt:** Show me my storage accounts with whether hierarchical namespace (HNS) is enabled @@ -7800,14 +7980,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.556860 | `storage_account_get` | ✅ **EXPECTED** | -| 2 | 0.481664 | `storage_blob_container_get` | ❌ | +| 2 | 0.482418 | `storage_blob_container_get` | ❌ | | 3 | 0.461284 | `managedlustre_fs_list` | ❌ | | 4 | 0.421642 | `cosmos_account_list` | ❌ | | 5 | 0.410587 | `storage_blob_get` | ❌ | --- -## Test 432 +## Test 442 **Expected Tool:** `storage_account_get` **Prompt:** Show me the storage accounts in my subscription and include HTTPS-only and public blob access settings @@ -7817,14 +7997,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.619462 | `storage_account_get` | ✅ **EXPECTED** | -| 2 | 0.555677 | `storage_blob_container_get` | ❌ | +| 2 | 0.556436 | `storage_blob_container_get` | ❌ | | 3 | 0.518229 | `storage_blob_get` | ❌ | | 4 | 0.473598 | `cosmos_account_list` | ❌ | -| 5 | 0.465527 | `subscription_list` | ❌ | +| 5 | 0.465571 | `subscription_list` | ❌ | --- -## Test 433 +## Test 443 **Expected Tool:** `storage_blob_container_create` **Prompt:** Create the storage container mycontainer in storage account @@ -7834,14 +8014,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.649793 | `storage_blob_container_create` | ✅ **EXPECTED** | -| 2 | 0.585556 | `storage_blob_container_get` | ❌ | +| 2 | 0.583896 | `storage_blob_container_get` | ❌ | | 3 | 0.524779 | `storage_account_create` | ❌ | | 4 | 0.496679 | `storage_blob_get` | ❌ | | 5 | 0.447784 | `cosmos_database_container_list` | ❌ | --- -## Test 434 +## Test 444 **Expected Tool:** `storage_blob_container_create` **Prompt:** Create the container using blob public access in storage account @@ -7851,14 +8031,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.682161 | `storage_blob_container_create` | ✅ **EXPECTED** | -| 2 | 0.590826 | `storage_blob_container_get` | ❌ | -| 3 | 0.559264 | `storage_blob_get` | ❌ | -| 4 | 0.500625 | `storage_account_create` | ❌ | +| 2 | 0.590160 | `storage_blob_container_get` | ❌ | +| 3 | 0.559263 | `storage_blob_get` | ❌ | +| 4 | 0.500624 | `storage_account_create` | ❌ | | 5 | 0.420514 | `storage_account_get` | ❌ | --- -## Test 435 +## Test 445 **Expected Tool:** `storage_blob_container_create` **Prompt:** Create a new blob container named documents with container public access in storage account @@ -7867,15 +8047,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.625397 | `storage_blob_container_create` | ✅ **EXPECTED** | -| 2 | 0.544024 | `storage_blob_container_get` | ❌ | -| 3 | 0.497804 | `storage_blob_get` | ❌ | +| 1 | 0.625490 | `storage_blob_container_create` | ✅ **EXPECTED** | +| 2 | 0.543562 | `storage_blob_container_get` | ❌ | +| 3 | 0.497792 | `storage_blob_get` | ❌ | | 4 | 0.463198 | `storage_account_create` | ❌ | -| 5 | 0.435099 | `cosmos_database_container_list` | ❌ | +| 5 | 0.435103 | `cosmos_database_container_list` | ❌ | --- -## Test 436 +## Test 446 **Expected Tool:** `storage_blob_container_get` **Prompt:** Show me the properties of the storage container in the storage account @@ -7884,15 +8064,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.703348 | `storage_blob_container_get` | ✅ **EXPECTED** | +| 1 | 0.701642 | `storage_blob_container_get` | ✅ **EXPECTED** | | 2 | 0.623681 | `storage_blob_get` | ❌ | | 3 | 0.577921 | `storage_account_get` | ❌ | -| 4 | 0.549804 | `storage_blob_container_create` | ❌ | -| 5 | 0.523289 | `cosmos_database_container_list` | ❌ | +| 4 | 0.549803 | `storage_blob_container_create` | ❌ | +| 5 | 0.523288 | `cosmos_database_container_list` | ❌ | --- -## Test 437 +## Test 447 **Expected Tool:** `storage_blob_container_get` **Prompt:** List all blob containers in the storage account @@ -7901,7 +8081,7 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.712012 | `storage_blob_container_get` | ✅ **EXPECTED** | +| 1 | 0.712037 | `storage_blob_container_get` | ✅ **EXPECTED** | | 2 | 0.680802 | `storage_blob_get` | ❌ | | 3 | 0.613933 | `cosmos_database_container_list` | ❌ | | 4 | 0.556319 | `storage_blob_container_create` | ❌ | @@ -7909,7 +8089,7 @@ --- -## Test 438 +## Test 448 **Expected Tool:** `storage_blob_container_get` **Prompt:** Show me the containers in the storage account @@ -7918,15 +8098,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.713080 | `storage_blob_container_get` | ✅ **EXPECTED** | +| 1 | 0.713527 | `storage_blob_container_get` | ✅ **EXPECTED** | | 2 | 0.592373 | `cosmos_database_container_list` | ❌ | | 3 | 0.586169 | `storage_blob_get` | ❌ | | 4 | 0.523322 | `storage_account_get` | ❌ | -| 5 | 0.487520 | `storage_blob_container_create` | ❌ | +| 5 | 0.487521 | `storage_blob_container_create` | ❌ | --- -## Test 439 +## Test 449 **Expected Tool:** `storage_blob_get` **Prompt:** Show me the properties for blob in container in storage account @@ -7935,15 +8115,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.700963 | `storage_blob_get` | ✅ **EXPECTED** | -| 2 | 0.648279 | `storage_blob_container_get` | ❌ | -| 3 | 0.540987 | `storage_blob_container_create` | ❌ | -| 4 | 0.527363 | `storage_account_get` | ❌ | -| 5 | 0.477959 | `cosmos_database_container_list` | ❌ | +| 1 | 0.700973 | `storage_blob_get` | ✅ **EXPECTED** | +| 2 | 0.646973 | `storage_blob_container_get` | ❌ | +| 3 | 0.541019 | `storage_blob_container_create` | ❌ | +| 4 | 0.527427 | `storage_account_get` | ❌ | +| 5 | 0.477946 | `cosmos_database_container_list` | ❌ | --- -## Test 440 +## Test 450 **Expected Tool:** `storage_blob_get` **Prompt:** Get the details about blob in the container in storage account @@ -7953,14 +8133,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.694997 | `storage_blob_get` | ✅ **EXPECTED** | -| 2 | 0.633397 | `storage_blob_container_get` | ❌ | -| 3 | 0.589151 | `storage_blob_container_create` | ❌ | +| 2 | 0.631161 | `storage_blob_container_get` | ❌ | +| 3 | 0.589152 | `storage_blob_container_create` | ❌ | | 4 | 0.580226 | `storage_account_get` | ❌ | | 5 | 0.457038 | `storage_account_create` | ❌ | --- -## Test 441 +## Test 451 **Expected Tool:** `storage_blob_get` **Prompt:** List all blobs in the blob container in the storage account @@ -7970,14 +8150,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.733586 | `storage_blob_get` | ✅ **EXPECTED** | -| 2 | 0.702342 | `storage_blob_container_get` | ❌ | +| 2 | 0.700891 | `storage_blob_container_get` | ❌ | | 3 | 0.605993 | `storage_blob_container_create` | ❌ | | 4 | 0.579070 | `cosmos_database_container_list` | ❌ | | 5 | 0.506639 | `cosmos_database_container_item_query` | ❌ | --- -## Test 442 +## Test 452 **Expected Tool:** `storage_blob_get` **Prompt:** Show me the blobs in the blob container in the storage account @@ -7987,14 +8167,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.704426 | `storage_blob_get` | ✅ **EXPECTED** | -| 2 | 0.666342 | `storage_blob_container_get` | ❌ | +| 2 | 0.664940 | `storage_blob_container_get` | ❌ | | 3 | 0.561557 | `storage_blob_container_create` | ❌ | | 4 | 0.533515 | `cosmos_database_container_list` | ❌ | | 5 | 0.484018 | `storage_account_get` | ❌ | --- -## Test 443 +## Test 453 **Expected Tool:** `storage_blob_upload` **Prompt:** Upload file to storage blob in container in storage account @@ -8003,15 +8183,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.566278 | `storage_blob_upload` | ✅ **EXPECTED** | -| 2 | 0.525685 | `storage_blob_container_create` | ❌ | -| 3 | 0.517524 | `storage_blob_get` | ❌ | -| 4 | 0.474395 | `storage_blob_container_get` | ❌ | -| 5 | 0.382007 | `storage_account_create` | ❌ | +| 1 | 0.566319 | `storage_blob_upload` | ✅ **EXPECTED** | +| 2 | 0.525579 | `storage_blob_container_create` | ❌ | +| 3 | 0.517548 | `storage_blob_get` | ❌ | +| 4 | 0.473592 | `storage_blob_container_get` | ❌ | +| 5 | 0.381886 | `storage_account_create` | ❌ | --- -## Test 444 +## Test 454 **Expected Tool:** `subscription_list` **Prompt:** List all subscriptions for my account @@ -8020,7 +8200,7 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.654048 | `subscription_list` | ✅ **EXPECTED** | +| 1 | 0.654071 | `subscription_list` | ✅ **EXPECTED** | | 2 | 0.512964 | `cosmos_account_list` | ❌ | | 3 | 0.471653 | `postgres_server_list` | ❌ | | 4 | 0.469023 | `kusto_cluster_list` | ❌ | @@ -8028,7 +8208,7 @@ --- -## Test 445 +## Test 455 **Expected Tool:** `subscription_list` **Prompt:** Show me my subscriptions @@ -8037,15 +8217,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.458834 | `subscription_list` | ✅ **EXPECTED** | -| 2 | 0.407101 | `eventgrid_subscription_list` | ❌ | -| 3 | 0.393662 | `eventgrid_topic_list` | ❌ | +| 1 | 0.458821 | `subscription_list` | ✅ **EXPECTED** | +| 2 | 0.407471 | `eventgrid_subscription_list` | ❌ | +| 3 | 0.393695 | `eventgrid_topic_list` | ❌ | | 4 | 0.391555 | `redis_list` | ❌ | | 5 | 0.381238 | `postgres_server_list` | ❌ | --- -## Test 446 +## Test 456 **Expected Tool:** `subscription_list` **Prompt:** What is my current subscription? @@ -8054,15 +8234,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.433242 | `subscription_list` | ✅ **EXPECTED** | +| 1 | 0.433196 | `subscription_list` | ✅ **EXPECTED** | | 2 | 0.319579 | `marketplace_product_list` | ❌ | -| 3 | 0.315547 | `marketplace_product_get` | ❌ | -| 4 | 0.293009 | `eventgrid_subscription_list` | ❌ | -| 5 | 0.289280 | `eventgrid_topic_list` | ❌ | +| 3 | 0.315615 | `marketplace_product_get` | ❌ | +| 4 | 0.293772 | `eventgrid_subscription_list` | ❌ | +| 5 | 0.289334 | `eventgrid_topic_list` | ❌ | --- -## Test 447 +## Test 457 **Expected Tool:** `subscription_list` **Prompt:** What subscriptions do I have? @@ -8071,15 +8251,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.477657 | `subscription_list` | ✅ **EXPECTED** | -| 2 | 0.356775 | `eventgrid_subscription_list` | ❌ | +| 1 | 0.477592 | `subscription_list` | ✅ **EXPECTED** | +| 2 | 0.357625 | `eventgrid_subscription_list` | ❌ | | 3 | 0.354286 | `marketplace_product_list` | ❌ | | 4 | 0.344549 | `redis_list` | ❌ | -| 5 | 0.340764 | `eventgrid_topic_list` | ❌ | +| 5 | 0.340837 | `eventgrid_topic_list` | ❌ | --- -## Test 448 +## Test 458 **Expected Tool:** `azureterraformbestpractices_get` **Prompt:** Fetch the Azure Terraform best practices @@ -8088,15 +8268,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.686886 | `azureterraformbestpractices_get` | ✅ **EXPECTED** | -| 2 | 0.625270 | `deploy_iac_rules_get` | ❌ | -| 3 | 0.605048 | `get_bestpractices_get` | ❌ | -| 4 | 0.482745 | `deploy_pipeline_guidance_get` | ❌ | -| 5 | 0.468390 | `azureaibestpractices_get` | ❌ | +| 1 | 0.686971 | `azureterraformbestpractices_get` | ✅ **EXPECTED** | +| 2 | 0.625325 | `deploy_iac_rules_get` | ❌ | +| 3 | 0.605182 | `get_bestpractices_get` | ❌ | +| 4 | 0.483016 | `deploy_pipeline_guidance_get` | ❌ | +| 5 | 0.466241 | `deploy_plan_get` | ❌ | --- -## Test 449 +## Test 459 **Expected Tool:** `azureterraformbestpractices_get` **Prompt:** Show me the Azure Terraform best practices and generate code sample to get a secret from Azure Key Vault @@ -8107,13 +8287,13 @@ |------|-------|------|--------| | 1 | 0.581316 | `azureterraformbestpractices_get` | ✅ **EXPECTED** | | 2 | 0.512141 | `get_bestpractices_get` | ❌ | -| 3 | 0.510005 | `deploy_iac_rules_get` | ❌ | -| 4 | 0.473943 | `keyvault_secret_get` | ❌ | -| 5 | 0.451726 | `azureaibestpractices_get` | ❌ | +| 3 | 0.510004 | `deploy_iac_rules_get` | ❌ | +| 4 | 0.473596 | `keyvault_secret_get` | ❌ | +| 5 | 0.444297 | `deploy_pipeline_guidance_get` | ❌ | --- -## Test 450 +## Test 460 **Expected Tool:** `virtualdesktop_hostpool_list` **Prompt:** List all host pools in my subscription @@ -8122,15 +8302,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.711905 | `virtualdesktop_hostpool_list` | ✅ **EXPECTED** | +| 1 | 0.711969 | `virtualdesktop_hostpool_list` | ✅ **EXPECTED** | | 2 | 0.659763 | `virtualdesktop_hostpool_host_list` | ❌ | -| 3 | 0.620665 | `kusto_cluster_list` | ❌ | -| 4 | 0.546744 | `search_service_list` | ❌ | -| 5 | 0.536423 | `virtualdesktop_hostpool_host_user-list` | ❌ | +| 3 | 0.620666 | `kusto_cluster_list` | ❌ | +| 4 | 0.548888 | `search_service_list` | ❌ | +| 5 | 0.535739 | `virtualdesktop_hostpool_host_user-list` | ❌ | --- -## Test 451 +## Test 461 **Expected Tool:** `virtualdesktop_hostpool_host_list` **Prompt:** List all session hosts in host pool @@ -8140,14 +8320,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.727054 | `virtualdesktop_hostpool_host_list` | ✅ **EXPECTED** | -| 2 | 0.715572 | `virtualdesktop_hostpool_host_user-list` | ❌ | -| 3 | 0.573350 | `virtualdesktop_hostpool_list` | ❌ | -| 4 | 0.438659 | `aks_nodepool_get` | ❌ | +| 2 | 0.714469 | `virtualdesktop_hostpool_host_user-list` | ❌ | +| 3 | 0.573352 | `virtualdesktop_hostpool_list` | ❌ | +| 4 | 0.438621 | `aks_nodepool_get` | ❌ | | 5 | 0.393721 | `sql_elastic-pool_list` | ❌ | --- -## Test 452 +## Test 462 **Expected Tool:** `virtualdesktop_hostpool_host_user-list` **Prompt:** List all user sessions on session host in host pool @@ -8156,15 +8336,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.813311 | `virtualdesktop_hostpool_host_user-list` | ✅ **EXPECTED** | -| 2 | 0.659213 | `virtualdesktop_hostpool_host_list` | ❌ | -| 3 | 0.501113 | `virtualdesktop_hostpool_list` | ❌ | -| 4 | 0.357561 | `aks_nodepool_get` | ❌ | -| 5 | 0.336576 | `monitor_workspace_list` | ❌ | +| 1 | 0.812659 | `virtualdesktop_hostpool_host_user-list` | ✅ **EXPECTED** | +| 2 | 0.659212 | `virtualdesktop_hostpool_host_list` | ❌ | +| 3 | 0.501167 | `virtualdesktop_hostpool_list` | ❌ | +| 4 | 0.357540 | `aks_nodepool_get` | ❌ | +| 5 | 0.336385 | `monitor_workspace_list` | ❌ | --- -## Test 453 +## Test 463 **Expected Tool:** `workbooks_create` **Prompt:** Create a new workbook named @@ -8181,7 +8361,7 @@ --- -## Test 454 +## Test 464 **Expected Tool:** `workbooks_delete` **Prompt:** Delete the workbook with resource ID @@ -8198,7 +8378,7 @@ --- -## Test 455 +## Test 465 **Expected Tool:** `workbooks_list` **Prompt:** List all workbooks in my resource group @@ -8207,15 +8387,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.772404 | `workbooks_list` | ✅ **EXPECTED** | -| 2 | 0.562476 | `workbooks_create` | ❌ | -| 3 | 0.516733 | `grafana_list` | ❌ | -| 4 | 0.493962 | `workbooks_show` | ❌ | -| 5 | 0.488522 | `group_list` | ❌ | +| 1 | 0.772431 | `workbooks_list` | ✅ **EXPECTED** | +| 2 | 0.562485 | `workbooks_create` | ❌ | +| 3 | 0.516739 | `grafana_list` | ❌ | +| 4 | 0.493975 | `workbooks_show` | ❌ | +| 5 | 0.488609 | `group_list` | ❌ | --- -## Test 456 +## Test 466 **Expected Tool:** `workbooks_list` **Prompt:** What workbooks do I have in resource group ? @@ -8225,14 +8405,14 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| | 1 | 0.708612 | `workbooks_list` | ✅ **EXPECTED** | -| 2 | 0.570260 | `workbooks_create` | ❌ | +| 2 | 0.570259 | `workbooks_create` | ❌ | | 3 | 0.499633 | `workbooks_show` | ❌ | | 4 | 0.485504 | `workbooks_delete` | ❌ | | 5 | 0.472378 | `grafana_list` | ❌ | --- -## Test 457 +## Test 467 **Expected Tool:** `workbooks_show` **Prompt:** Get information about the workbook with resource ID @@ -8249,7 +8429,7 @@ --- -## Test 458 +## Test 468 **Expected Tool:** `workbooks_show` **Prompt:** Show me the workbook with resource ID @@ -8266,7 +8446,7 @@ --- -## Test 459 +## Test 469 **Expected Tool:** `workbooks_update` **Prompt:** Update the workbook with a new text step @@ -8283,7 +8463,7 @@ --- -## Test 460 +## Test 470 **Expected Tool:** `bicepschema_get` **Prompt:** How can I use Bicep to create an Azure OpenAI service? @@ -8292,15 +8472,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.543803 | `bicepschema_get` | ✅ **EXPECTED** | +| 1 | 0.543154 | `bicepschema_get` | ✅ **EXPECTED** | | 2 | 0.485970 | `foundry_models_deploy` | ❌ | | 3 | 0.485889 | `deploy_iac_rules_get` | ❌ | -| 4 | 0.468898 | `azureaibestpractices_get` | ❌ | -| 5 | 0.453412 | `foundry_openai_embeddings-create` | ❌ | +| 4 | 0.453282 | `foundry_openai_embeddings-create` | ❌ | +| 5 | 0.448373 | `get_bestpractices_get` | ❌ | --- -## Test 461 +## Test 471 **Expected Tool:** `cloudarchitect_design` **Prompt:** Please help me design an architecture for a large-scale file upload, storage, and retrieval service @@ -8309,15 +8489,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.502125 | `cloudarchitect_design` | ✅ **EXPECTED** | +| 1 | 0.502110 | `cloudarchitect_design` | ✅ **EXPECTED** | | 2 | 0.290902 | `storage_blob_upload` | ❌ | -| 3 | 0.260101 | `managedlustre_fs_create` | ❌ | +| 3 | 0.259162 | `managedlustre_fs_create` | ❌ | | 4 | 0.254991 | `deploy_architecture_diagram_generate` | ❌ | | 5 | 0.245034 | `managedlustre_fs_subnetsize_validate` | ❌ | --- -## Test 462 +## Test 472 **Expected Tool:** `cloudarchitect_design` **Prompt:** Help me design an Azure cloud service that will serve as an ATM for users @@ -8326,15 +8506,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.508153 | `cloudarchitect_design` | ✅ **EXPECTED** | +| 1 | 0.508504 | `cloudarchitect_design` | ✅ **EXPECTED** | | 2 | 0.377941 | `deploy_architecture_diagram_generate` | ❌ | -| 3 | 0.341316 | `deploy_pipeline_guidance_get` | ❌ | -| 4 | 0.336385 | `azureaibestpractices_get` | ❌ | -| 5 | 0.328747 | `get_bestpractices_get` | ❌ | +| 3 | 0.341462 | `deploy_pipeline_guidance_get` | ❌ | +| 4 | 0.328747 | `get_bestpractices_get` | ❌ | +| 5 | 0.321855 | `deploy_plan_get` | ❌ | --- -## Test 463 +## Test 473 **Expected Tool:** `cloudarchitect_design` **Prompt:** I want to design a cloud app for ordering groceries @@ -8343,15 +8523,15 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.423577 | `cloudarchitect_design` | ✅ **EXPECTED** | -| 2 | 0.271869 | `deploy_pipeline_guidance_get` | ❌ | +| 1 | 0.423059 | `cloudarchitect_design` | ✅ **EXPECTED** | +| 2 | 0.271943 | `deploy_pipeline_guidance_get` | ❌ | | 3 | 0.265972 | `deploy_architecture_diagram_generate` | ❌ | | 4 | 0.242581 | `deploy_plan_get` | ❌ | -| 5 | 0.241197 | `azureaibestpractices_get` | ❌ | +| 5 | 0.229074 | `extension_cli_generate` | ❌ | --- -## Test 464 +## Test 474 **Expected Tool:** `cloudarchitect_design` **Prompt:** How can I design a cloud service in Azure that will store and present videos for users? @@ -8360,39 +8540,39 @@ | Rank | Score | Tool | Status | |------|-------|------|--------| -| 1 | 0.534690 | `cloudarchitect_design` | ✅ **EXPECTED** | -| 2 | 0.369872 | `deploy_pipeline_guidance_get` | ❌ | -| 3 | 0.357808 | `managedlustre_fs_create` | ❌ | +| 1 | 0.535224 | `cloudarchitect_design` | ✅ **EXPECTED** | +| 2 | 0.369969 | `deploy_pipeline_guidance_get` | ❌ | +| 3 | 0.356331 | `managedlustre_fs_create` | ❌ | | 4 | 0.352797 | `deploy_architecture_diagram_generate` | ❌ | -| 5 | 0.324217 | `azureaibestpractices_get` | ❌ | +| 5 | 0.323920 | `storage_blob_upload` | ❌ | --- ## Summary -**Total Prompts Tested:** 464 -**Analysis Execution Time:** 186.7791311s +**Total Prompts Tested:** 474 +**Analysis Execution Time:** 105.8478990s ### Success Rate Metrics -**Top Choice Success:** 92.2% (428/464 tests) +**Top Choice Success:** 91.4% (433/474 tests) #### Confidence Level Distribution -**💪 Very High Confidence (≥0.8):** 3.2% (15/464 tests) -**🎯 High Confidence (≥0.7):** 22.8% (106/464 tests) -**✅ Good Confidence (≥0.6):** 62.3% (289/464 tests) -**👍 Fair Confidence (≥0.5):** 92.2% (428/464 tests) -**👌 Acceptable Confidence (≥0.4):** 99.6% (462/464 tests) -**❌ Low Confidence (<0.4):** 0.4% (2/464 tests) +**💪 Very High Confidence (≥0.8):** 3.2% (15/474 tests) +**🎯 High Confidence (≥0.7):** 22.4% (106/474 tests) +**✅ Good Confidence (≥0.6):** 60.5% (287/474 tests) +**👍 Fair Confidence (≥0.5):** 90.9% (431/474 tests) +**👌 Acceptable Confidence (≥0.4):** 98.5% (467/474 tests) +**❌ Low Confidence (<0.4):** 1.5% (7/474 tests) #### Top Choice + Confidence Combinations -**💪 Top Choice + Very High Confidence (≥0.8):** 3.2% (15/464 tests) -**🎯 Top Choice + High Confidence (≥0.7):** 22.8% (106/464 tests) -**✅ Top Choice + Good Confidence (≥0.6):** 60.3% (280/464 tests) -**👍 Top Choice + Fair Confidence (≥0.5):** 86.9% (403/464 tests) -**👌 Top Choice + Acceptable Confidence (≥0.4):** 92.2% (428/464 tests) +**💪 Top Choice + Very High Confidence (≥0.8):** 3.2% (15/474 tests) +**🎯 Top Choice + High Confidence (≥0.7):** 22.4% (106/474 tests) +**✅ Top Choice + Good Confidence (≥0.6):** 58.6% (278/474 tests) +**👍 Top Choice + Fair Confidence (≥0.5):** 85.7% (406/474 tests) +**👌 Top Choice + Acceptable Confidence (≥0.4):** 91.4% (433/474 tests) ### Success Rate Analysis diff --git a/eng/tools/ToolDescriptionEvaluator/tools.json b/eng/tools/ToolDescriptionEvaluator/tools.json index d83b47f83..8cf5aa60d 100644 --- a/eng/tools/ToolDescriptionEvaluator/tools.json +++ b/eng/tools/ToolDescriptionEvaluator/tools.json @@ -896,12 +896,6 @@ } ] }, - { - "name": "get", - "description": "Returns best practices and code generation guidance for building AI applications in Azure. \r\n Use this tool when you need recommendations on how to write code for AI agents, chatbots, workflows, or other AI features.\r\n This tool also provides guidance for code generation using the Azure resources (e.g. Azure AI Foundry) for application development only. \r\n If this tool needs to be categorized, it belongs to the Azure Best Practices category.", - "command": "azureaibestpractices get", - "option": [] - }, { "name": "get", "description": "Returns Terraform best practices for Azure. Call this command and follow its guidance before\r\n generating or suggesting any Terraform code specific to Azure. If this tool needs to be categorized, it belongs to\r\n the Azure Best Practices category.", @@ -10985,6 +10979,103 @@ } ] }, + { + "name": "synthesize", + "description": "Convert text to speech using Azure AI Services Speech. This command takes text input and generates an audio file using advanced neural text-to-speech capabilities.\r\nYou must provide an Azure AI Services endpoint (e.g., https://your-service.cognitiveservices.azure.com/), the text to convert, and an output file path.\r\nOptional parameters include language specification (default: en-US), voice selection, audio output format (default: Riff24Khz16BitMonoPcm), and custom voice endpoint ID.\r\nThe command supports a wide variety of output formats and neural voices for natural-sounding speech synthesis.", + "command": "speech tts synthesize", + "option": [ + { + "name": "--tenant", + "description": "The Microsoft Entra ID tenant ID or name. This can be either the GUID identifier or the display name of your Entra ID tenant.", + "type": "string", + "required": null + }, + { + "name": "--auth-method", + "description": "Authentication method to use. Options: 'credential' (Azure CLI/managed identity), 'key' (access key), or 'connectionString'.", + "type": "string", + "required": null + }, + { + "name": "--retry-delay", + "description": "Initial delay in seconds between retry attempts. For exponential backoff, this value is used as the base.", + "type": "string", + "required": null + }, + { + "name": "--retry-max-delay", + "description": "Maximum delay in seconds between retries, regardless of the retry strategy.", + "type": "string", + "required": null + }, + { + "name": "--retry-max-retries", + "description": "Maximum number of retry attempts for failed operations before giving up.", + "type": "string", + "required": null + }, + { + "name": "--retry-mode", + "description": "Retry strategy to use. 'fixed' uses consistent delays, 'exponential' increases delay between attempts.", + "type": "string", + "required": null + }, + { + "name": "--retry-network-timeout", + "description": "Network operation timeout in seconds. Operations taking longer than this will be cancelled.", + "type": "string", + "required": null + }, + { + "name": "--subscription", + "description": "Specifies the Azure subscription to use. Accepts either a subscription ID (GUID) or display name. If not specified, the AZURE_SUBSCRIPTION_ID environment variable will be used instead.", + "type": "string", + "required": null + }, + { + "name": "--endpoint", + "description": "The Azure AI Services endpoint URL (e.g., https://your-service.cognitiveservices.azure.com/).", + "type": "string", + "required": true + }, + { + "name": "--text", + "description": "The text to convert to speech.", + "type": "string", + "required": true + }, + { + "name": "--outputAudio", + "description": "Path where the synthesized audio file will be saved.", + "type": "string", + "required": true + }, + { + "name": "--language", + "description": "The language for speech recognition (e.g., en-US, es-ES). Default is en-US.", + "type": "string", + "required": null + }, + { + "name": "--voice", + "description": "The voice to use for speech synthesis (e.g., en-US-JennyNeural). If not specified, the default voice for the language will be used.", + "type": "string", + "required": null + }, + { + "name": "--format", + "description": "Output format: simple or detailed.", + "type": "string", + "required": null + }, + { + "name": "--endpointId", + "description": "The endpoint ID of a custom voice model for speech synthesis.", + "type": "string", + "required": null + } + ] + }, { "name": "create", "description": "Create a new Azure SQL Database on an existing SQL Server. This command creates a database with configurable\r\nperformance tiers, size limits, and other settings. Equivalent to 'az sql db create'.\r\nReturns the newly created database information including configuration details.", @@ -13171,5 +13262,5 @@ } ], "consolidated_tools": null, - "duration": 53 + "duration": 56 } \ No newline at end of file diff --git a/servers/Azure.Mcp.Server/README.md b/servers/Azure.Mcp.Server/README.md index 7117db872..c6938d2b0 100644 --- a/servers/Azure.Mcp.Server/README.md +++ b/servers/Azure.Mcp.Server/README.md @@ -368,6 +368,9 @@ To use Azure Entra ID, review the [troubleshooting guide](https://github.com/mic * "Recognize speech from my audio file with language detection" * "Transcribe speech from audio with profanity filtering" * "Transcribe audio with phrase hints for better accuracy" +* "Convert text to speech and save to output.wav" +* "Synthesize speech from 'Hello, welcome to Azure' with Spanish voice" +* "Generate MP3 audio from text with high quality format" ### ⚙️ Azure App Configuration @@ -509,7 +512,7 @@ The Azure MCP Server provides tools for interacting with **40+ Azure service are - 🧮 **Azure AI Foundry** - AI model management, AI model deployment, and knowledge index management - 🔎 **Azure AI Search** - Search engine/vector database operations -- 🎤 **Azure AI Services Speech** - Speech-to-text recognition +- 🎤 **Azure AI Services Speech** - Speech-to-text recognition and text-to-speech synthesis - 🤖 **Azure AI Best Practices** - AI app development guidance for Azure AI Foundry and Microsoft Agent Framework - ⚙️ **Azure App Configuration** - Configuration management - 🕸️ **Azure App Service** - Web app hosting diff --git a/servers/Azure.Mcp.Server/docs/azmcp-commands.md b/servers/Azure.Mcp.Server/docs/azmcp-commands.md index a38694ba2..cd0cd9d89 100644 --- a/servers/Azure.Mcp.Server/docs/azmcp-commands.md +++ b/servers/Azure.Mcp.Server/docs/azmcp-commands.md @@ -418,6 +418,71 @@ azmcp speech stt recognize --endpoint --file audio.wav \ Use phrase hints when you expect specific terminology, technical terms, or domain-specific vocabulary in your audio content. This significantly improves recognition accuracy for specialized content. +```bash +# Synthesize speech from text and save to an audio file using Azure AI Services Speech +# ❌ Destructive | ✅ Idempotent | ❌ OpenWorld | ❌ ReadOnly | ❌ Secret | ✅ LocalRequired +azmcp speech tts synthesize --endpoint \ + --text \ + --outputAudio \ + [--language ] \ + [--voice ] \ + [--format ] \ + [--endpointId ] +``` + +#### Text-to-Speech Parameters + +| Parameter | Required | Description | +|-----------|----------|-------------| +| `--endpoint` | Yes | Azure AI Services endpoint URL (e.g., https://your-service.cognitiveservices.azure.com/) | +| `--text` | Yes | The text to convert to speech | +| `--outputAudio` | Yes | Path where the synthesized audio file will be saved (e.g., output.wav, speech.mp3) | +| `--language` | No | Speech synthesis language (default: en-US). Examples: es-ES, fr-FR, de-DE | +| `--voice` | No | Neural voice to use (e.g., en-US-JennyNeural, es-ES-ElviraNeural). If not specified, default voice for the language is used | +| `--format` | No | Output audio format (default: Riff24Khz16BitMonoPcm). Supported formats: Riff24Khz16BitMonoPcm, Audio16Khz32KBitRateMonoMp3, Audio24Khz96KBitRateMonoMp3, Ogg16Khz16BitMonoOpus, Raw16Khz16BitMonoPcm | +| `--endpointId` | No | Endpoint ID of a custom voice model for personalized speech synthesis | + +#### Supported Audio Formats + +The `--format` parameter accepts the following values: + +- **WAV formats**: `Riff24Khz16BitMonoPcm` (default), `Riff16Khz16BitMonoPcm`, `Raw16Khz16BitMonoPcm` +- **MP3 formats**: `Audio16Khz32KBitRateMonoMp3`, `Audio24Khz96KBitRateMonoMp3`, `Audio48Khz192KBitRateMonoMp3` +- **OGG/Opus formats**: `Ogg16Khz16BitMonoOpus`, `Ogg24Khz16BitMonoOpus` + +**Examples:** + +```bash +# Basic text-to-speech synthesis +# ❌ Destructive | ✅ Idempotent | ❌ OpenWorld | ❌ ReadOnly | ❌ Secret | ✅ LocalRequired +azmcp speech tts synthesize --endpoint https://myservice.cognitiveservices.azure.com/ \ + --text "Hello, welcome to Azure AI Services Speech" \ + --outputAudio welcome.wav + +# Synthesize with specific language and voice +# ❌ Destructive | ✅ Idempotent | ❌ OpenWorld | ❌ ReadOnly | ❌ Secret | ✅ LocalRequired +azmcp speech tts synthesize --endpoint https://myservice.cognitiveservices.azure.com/ \ + --text "Hola, bienvenido a los servicios de voz de Azure" \ + --outputAudio spanish-greeting.wav \ + --language es-ES \ + --voice es-ES-ElviraNeural + +# Generate MP3 output with high quality +# ❌ Destructive | ✅ Idempotent | ❌ OpenWorld | ❌ ReadOnly | ❌ Secret | ✅ LocalRequired +azmcp speech tts synthesize --endpoint https://myservice.cognitiveservices.azure.com/ \ + --text "This is a high quality audio output" \ + --outputAudio output.mp3 \ + --format Audio48Khz192KBitRateMonoMp3 + +# Use custom voice model +# ❌ Destructive | ✅ Idempotent | ❌ OpenWorld | ❌ ReadOnly | ❌ Secret | ✅ LocalRequired +azmcp speech tts synthesize --endpoint https://myservice.cognitiveservices.azure.com/ \ + --text "This uses my custom trained voice" \ + --outputAudio custom-voice.wav \ + --voice my-custom-voice-model + --endpointId my-custom-voice-endpoint-id +``` + ### Azure App Configuration Operations ```bash diff --git a/servers/Azure.Mcp.Server/docs/e2eTestPrompts.md b/servers/Azure.Mcp.Server/docs/e2eTestPrompts.md index 33746b90e..b5b8a104b 100644 --- a/servers/Azure.Mcp.Server/docs/e2eTestPrompts.md +++ b/servers/Azure.Mcp.Server/docs/e2eTestPrompts.md @@ -81,6 +81,16 @@ This file contains prompts used for end-to-end testing to ensure each tool is in | speech_stt_recognize | Transcribe audio using multiple phrase hints: "Azure", "cognitive services", "machine learning" | | speech_stt_recognize | Convert speech to text with comma-separated phrase hints: "Azure, cognitive services, API" | | speech_stt_recognize | Transcribe audio with raw profanity output from file | +| speech_tts_synthesize | Convert text to speech and save to output.wav | +| speech_tts_synthesize | Synthesize speech from "Hello, welcome to Azure" and save to welcome.wav | +| speech_tts_synthesize | Generate speech audio from text "Hello world" using Azure Speech Services | +| speech_tts_synthesize | Convert text to speech with Spanish language and save to spanish-audio.wav | +| speech_tts_synthesize | Synthesize speech with voice en-US-JennyNeural from text "Azure AI Services" | +| speech_tts_synthesize | Create MP3 audio file from text "Welcome to Azure" with high quality format | +| speech_tts_synthesize | Generate speech with custom voice model using endpoint ID | +| speech_tts_synthesize | Convert text to OGG/Opus format audio file | +| speech_tts_synthesize | Synthesize long text content to audio file with streaming | +| speech_tts_synthesize | Create audio file from text in French language with appropriate voice | ## Azure App Configuration diff --git a/tools/Azure.Mcp.Tools.Speech/src/Azure.Mcp.Tools.Speech.csproj b/tools/Azure.Mcp.Tools.Speech/src/Azure.Mcp.Tools.Speech.csproj index c320a34e2..7b1158487 100644 --- a/tools/Azure.Mcp.Tools.Speech/src/Azure.Mcp.Tools.Speech.csproj +++ b/tools/Azure.Mcp.Tools.Speech/src/Azure.Mcp.Tools.Speech.csproj @@ -1,4 +1,4 @@ - + true diff --git a/tools/Azure.Mcp.Tools.Speech/src/Commands/SpeechJsonContext.cs b/tools/Azure.Mcp.Tools.Speech/src/Commands/SpeechJsonContext.cs index dffbc8dff..9a36c66e0 100644 --- a/tools/Azure.Mcp.Tools.Speech/src/Commands/SpeechJsonContext.cs +++ b/tools/Azure.Mcp.Tools.Speech/src/Commands/SpeechJsonContext.cs @@ -3,6 +3,7 @@ using System.Text.Json.Serialization; using Azure.Mcp.Tools.Speech.Commands.Stt; +using Azure.Mcp.Tools.Speech.Commands.Tts; using Azure.Mcp.Tools.Speech.Models; using Azure.Mcp.Tools.Speech.Models.FastTranscription; using Azure.Mcp.Tools.Speech.Models.Realtime; @@ -24,6 +25,8 @@ [JsonSerializable(typeof(SpeechRecognitionResult))] [JsonSerializable(typeof(SttRecognizeCommand.SttRecognizeCommandResult))] +[JsonSerializable(typeof(SynthesisResult))] +[JsonSerializable(typeof(TtsSynthesizeCommand.TtsSynthesizeCommandResult))] [JsonSourceGenerationOptions( PropertyNamingPolicy = JsonKnownNamingPolicy.CamelCase, WriteIndented = true, diff --git a/tools/Azure.Mcp.Tools.Speech/src/Commands/Tts/TtsSynthesizeCommand.cs b/tools/Azure.Mcp.Tools.Speech/src/Commands/Tts/TtsSynthesizeCommand.cs new file mode 100644 index 000000000..56fa08f26 --- /dev/null +++ b/tools/Azure.Mcp.Tools.Speech/src/Commands/Tts/TtsSynthesizeCommand.cs @@ -0,0 +1,182 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using System.Net; +using Azure.Mcp.Core.Commands; +using Azure.Mcp.Core.Extensions; +using Azure.Mcp.Tools.Speech.Models; +using Azure.Mcp.Tools.Speech.Options; +using Azure.Mcp.Tools.Speech.Options.Tts; +using Azure.Mcp.Tools.Speech.Services; +using Microsoft.Extensions.Logging; + +namespace Azure.Mcp.Tools.Speech.Commands.Tts; + +public sealed class TtsSynthesizeCommand(ILogger logger) : BaseSpeechCommand() +{ + internal record TtsSynthesizeCommandResult(SynthesisResult Result); + + private const string CommandTitle = "Synthesize Speech from Text"; + private static readonly HashSet SupportedExtensions = [".wav", ".mp3", ".ogg", ".raw"]; + private readonly ILogger _logger = logger; + + public override string Name => "synthesize"; + + public override string Id => "d6f6687f-feee-4e15-9b98-71aea4076e04"; + + public override string Description => + """ + Convert text to speech using Azure AI Services Speech. This command takes text input and generates an audio file using advanced neural text-to-speech capabilities. + You must provide an Azure AI Services endpoint (e.g., https://your-service.cognitiveservices.azure.com/), the text to convert, and an output file path. + Optional parameters include language specification (default: en-US), voice selection, audio output format (default: Riff24Khz16BitMonoPcm), and custom voice endpoint ID. + The command supports a wide variety of output formats and neural voices for natural-sounding speech synthesis. + """; + + public override string Title => CommandTitle; + + public override ToolMetadata Metadata => new() + { + Destructive = false, + Idempotent = true, + OpenWorld = false, + ReadOnly = false, + LocalRequired = true, // Requires local file output + Secret = false + }; + + protected override void RegisterOptions(Command command) + { + base.RegisterOptions(command); + + command.Options.Add(SpeechOptionDefinitions.Text); + command.Options.Add(SpeechOptionDefinitions.OutputAudio); + command.Options.Add(SpeechOptionDefinitions.Language); + command.Options.Add(SpeechOptionDefinitions.Voice); + command.Options.Add(SpeechOptionDefinitions.Format); + command.Options.Add(SpeechOptionDefinitions.EndpointId); + + // Command-level validation + command.Validators.Add(commandResult => + { + var textValue = commandResult.GetValueOrDefault(SpeechOptionDefinitions.Text); + + // Validate text is not empty + if (string.IsNullOrWhiteSpace(textValue)) + { + commandResult.AddError("Text cannot be empty or whitespace."); + } + + var fileValue = commandResult.GetValueOrDefault(SpeechOptionDefinitions.OutputAudio); + + // Validate output file path + if (string.IsNullOrWhiteSpace(fileValue)) + { + commandResult.AddError("Output file path cannot be empty."); + } + else + { + // Check if file already exists (don't allow overwriting) + if (File.Exists(fileValue)) + { + commandResult.AddError($"Output file already exists: {fileValue}. Please specify a different file path or delete the existing file."); + } + + // Validate file extension + var extension = Path.GetExtension(fileValue).ToLowerInvariant(); + + if (!SupportedExtensions.Contains(extension)) + { + commandResult.AddError($"Unsupported output file format: {extension}. Only {string.Join(", ", SupportedExtensions)} are supported."); + } + } + + // Validate language format if provided + var languageValue = commandResult.GetValueOrDefault(SpeechOptionDefinitions.Language); + if (!string.IsNullOrEmpty(languageValue)) + { + // Basic validation: language should be in format like "en-US", "es-ES" + if (!System.Text.RegularExpressions.Regex.IsMatch(languageValue, @"^[a-z]{2}-[A-Z]{2}$")) + { + commandResult.AddError($"Language must be in format 'xx-XX' (e.g., 'en-US', 'es-ES'). Got: {languageValue}"); + } + } + }); + } + + protected override TtsSynthesizeOptions BindOptions(ParseResult parseResult) + { + var options = base.BindOptions(parseResult); + options.Text = parseResult.GetValueOrDefault(SpeechOptionDefinitions.Text.Name); + options.OutputAudio = parseResult.GetValueOrDefault(SpeechOptionDefinitions.OutputAudio.Name); + options.Language = parseResult.GetValueOrDefault(SpeechOptionDefinitions.Language.Name); + options.Voice = parseResult.GetValueOrDefault(SpeechOptionDefinitions.Voice.Name); + options.Format = parseResult.GetValueOrDefault(SpeechOptionDefinitions.Format.Name); + options.EndpointId = parseResult.GetValueOrDefault(SpeechOptionDefinitions.EndpointId.Name); + + return options; + } + + public override async Task ExecuteAsync(CommandContext context, ParseResult parseResult, CancellationToken cancellationToken) + { + if (!Validate(parseResult.CommandResult, context.Response).IsValid) + { + return context.Response; + } + + var options = BindOptions(parseResult); + + try + { + var speechService = context.GetService(); + var result = await speechService.SynthesizeSpeechToFile( + options.Endpoint!, + options.Text!, + options.OutputAudio!, + options.Language, + options.Voice, + options.Format, + options.EndpointId, + options.RetryPolicy); + + _logger.LogInformation( + "Successfully synthesized speech to file: {File}. Audio size: {Size} bytes, Voice: {Voice}", + result.FilePath, + result.AudioSize, + result.Voice); + + context.Response.Status = HttpStatusCode.OK; + context.Response.Message = "Speech synthesis completed successfully."; + context.Response.Results = ResponseResult.Create( + new(result), + SpeechJsonContext.Default.TtsSynthesizeCommandResult); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error synthesizing speech to file: {File}", options.OutputAudio); + HandleException(context, ex); + } + + return context.Response; + } + + protected override string GetErrorMessage(Exception ex) => ex switch + { + ArgumentException argEx => $"Invalid parameter: {argEx.Message}", + UnauthorizedAccessException => "Access denied. Check Azure AI Services credentials and permissions.", + DirectoryNotFoundException => "Output directory not found. Ensure the directory exists before synthesizing.", + IOException ioEx => $"File operation failed: {ioEx.Message}", + _ => base.GetErrorMessage(ex) + }; + + protected override HttpStatusCode GetStatusCode(Exception ex) => ex switch + { + ArgumentException => HttpStatusCode.BadRequest, + UnauthorizedAccessException => HttpStatusCode.Unauthorized, + DirectoryNotFoundException => HttpStatusCode.NotFound, + IOException => HttpStatusCode.InternalServerError, + HttpRequestException => HttpStatusCode.ServiceUnavailable, + TimeoutException => HttpStatusCode.GatewayTimeout, + InvalidOperationException => HttpStatusCode.InternalServerError, + _ => base.GetStatusCode(ex) + }; +} diff --git a/tools/Azure.Mcp.Tools.Speech/src/Models/SynthesisResult.cs b/tools/Azure.Mcp.Tools.Speech/src/Models/SynthesisResult.cs new file mode 100644 index 000000000..fa3e68717 --- /dev/null +++ b/tools/Azure.Mcp.Tools.Speech/src/Models/SynthesisResult.cs @@ -0,0 +1,24 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using System.Text.Json.Serialization; + +namespace Azure.Mcp.Tools.Speech.Models; + +public class SynthesisResult +{ + [JsonPropertyName("filePath")] + public string? FilePath { get; set; } + + [JsonPropertyName("audioSize")] + public long AudioSize { get; set; } + + [JsonPropertyName("format")] + public string? Format { get; set; } + + [JsonPropertyName("voice")] + public string? Voice { get; set; } + + [JsonPropertyName("language")] + public string? Language { get; set; } +} diff --git a/tools/Azure.Mcp.Tools.Speech/src/Options/SpeechOptionDefinitions.cs b/tools/Azure.Mcp.Tools.Speech/src/Options/SpeechOptionDefinitions.cs index 6c57b71c2..e8031b698 100644 --- a/tools/Azure.Mcp.Tools.Speech/src/Options/SpeechOptionDefinitions.cs +++ b/tools/Azure.Mcp.Tools.Speech/src/Options/SpeechOptionDefinitions.cs @@ -7,10 +7,14 @@ public static class SpeechOptionDefinitions { public const string EndpointName = "endpoint"; public const string FileName = "file"; + public const string OutputAudioName = "outputAudio"; public const string LanguageName = "language"; public const string PhrasesName = "phrases"; public const string FormatName = "format"; public const string ProfanityName = "profanity"; + public const string TextName = "text"; + public const string VoiceName = "voice"; + public const string EndpointIdName = "endpointId"; public static readonly Option Endpoint = new( $"--{EndpointName}") @@ -50,4 +54,30 @@ public static class SpeechOptionDefinitions { Description = "Profanity filter: masked, removed, or raw. Default is masked." }; + + public static readonly Option Text = new( + $"--{TextName}") + { + Description = "The text to convert to speech.", + Required = true + }; + + public static readonly Option OutputAudio = new( + $"--{OutputAudioName}") + { + Description = "Path where the synthesized audio file will be saved.", + Required = true + }; + + public static readonly Option Voice = new( + $"--{VoiceName}") + { + Description = "The voice to use for speech synthesis (e.g., en-US-JennyNeural). If not specified, the default voice for the language will be used." + }; + + public static readonly Option EndpointId = new( + $"--{EndpointIdName}") + { + Description = "The endpoint ID of a custom voice model for speech synthesis." + }; } diff --git a/tools/Azure.Mcp.Tools.Speech/src/Options/Tts/TtsSynthesizeOptions.cs b/tools/Azure.Mcp.Tools.Speech/src/Options/Tts/TtsSynthesizeOptions.cs new file mode 100644 index 000000000..f0d843bdf --- /dev/null +++ b/tools/Azure.Mcp.Tools.Speech/src/Options/Tts/TtsSynthesizeOptions.cs @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using System.Text.Json.Serialization; + +namespace Azure.Mcp.Tools.Speech.Options.Tts; + +public class TtsSynthesizeOptions : BaseSpeechOptions +{ + [JsonPropertyName(SpeechOptionDefinitions.TextName)] + public string? Text { get; set; } + + [JsonPropertyName(SpeechOptionDefinitions.OutputAudioName)] + public string? OutputAudio { get; set; } + + [JsonPropertyName(SpeechOptionDefinitions.LanguageName)] + public string? Language { get; set; } + + [JsonPropertyName(SpeechOptionDefinitions.VoiceName)] + public string? Voice { get; set; } + + [JsonPropertyName(SpeechOptionDefinitions.FormatName)] + public string? Format { get; set; } + + [JsonPropertyName(SpeechOptionDefinitions.EndpointIdName)] + public string? EndpointId { get; set; } +} diff --git a/tools/Azure.Mcp.Tools.Speech/src/Services/ISpeechService.cs b/tools/Azure.Mcp.Tools.Speech/src/Services/ISpeechService.cs index dcc6fb0aa..213ba8c12 100644 --- a/tools/Azure.Mcp.Tools.Speech/src/Services/ISpeechService.cs +++ b/tools/Azure.Mcp.Tools.Speech/src/Services/ISpeechService.cs @@ -16,4 +16,14 @@ Task RecognizeSpeechFromFile( string? format = null, string? profanity = null, RetryPolicyOptions? retryPolicy = null); + + Task SynthesizeSpeechToFile( + string endpoint, + string text, + string outputFilePath, + string? language = null, + string? voice = null, + string? format = null, + string? endpointId = null, + RetryPolicyOptions? retryPolicy = null); } diff --git a/tools/Azure.Mcp.Tools.Speech/src/Services/SpeechService.cs b/tools/Azure.Mcp.Tools.Speech/src/Services/SpeechService.cs index ff49d3142..e25306c12 100644 --- a/tools/Azure.Mcp.Tools.Speech/src/Services/SpeechService.cs +++ b/tools/Azure.Mcp.Tools.Speech/src/Services/SpeechService.cs @@ -6,15 +6,23 @@ using Azure.Mcp.Core.Services.Azure.Tenant; using Azure.Mcp.Tools.Speech.Models; using Azure.Mcp.Tools.Speech.Services.Recognizers; +using Azure.Mcp.Tools.Speech.Services.Synthesizers; using Microsoft.Extensions.Logging; namespace Azure.Mcp.Tools.Speech.Services; -public class SpeechService(ITenantService tenantService, ILogger logger, IFastTranscriptionRecognizer fastTranscriptionRecognizer, IRealtimeTranscriptionRecognizer realtimeTranscriptionRecognizer) : BaseAzureService(tenantService), ISpeechService +public class SpeechService( + ITenantService tenantService, + ILogger logger, + IFastTranscriptionRecognizer fastTranscriptionRecognizer, + IRealtimeTranscriptionRecognizer realtimeTranscriptionRecognizer, + IRealtimeTtsSynthesizer speechSynthesizer) + : BaseAzureService(tenantService), ISpeechService { private readonly ILogger _logger = logger; private readonly IFastTranscriptionRecognizer _fastTranscriptionRecognizer = fastTranscriptionRecognizer; private readonly IRealtimeTranscriptionRecognizer _realtimeTranscriptionRecognizer = realtimeTranscriptionRecognizer; + private readonly IRealtimeTtsSynthesizer _speechSynthesizer = speechSynthesizer; /// /// Recognizes speech from an audio file using either Fast Transcription or Realtime Transcription. /// Fast Transcription is preferred when the language is supported. @@ -81,4 +89,38 @@ public async Task RecognizeSpeechFromFile( throw; } } + + /// + /// Synthesizes speech from text and saves it to an audio file using Azure AI Services Speech. + /// Delegates to the speech synthesizer for actual synthesis implementation. + /// + /// Azure AI Services endpoint (e.g., https://your-service.cognitiveservices.azure.com/) + /// The text to convert to speech + /// Path where the audio file will be saved + /// Language for synthesis (default: en-US) + /// Voice name to use (e.g., en-US-JennyNeural). If not specified, default voice for language is used + /// Output audio format (default: Riff24Khz16BitMonoPcm) + /// Optional endpoint ID for custom voice model + /// Optional retry policy for resilience + /// Synthesis result with file information + public async Task SynthesizeSpeechToFile( + string endpoint, + string text, + string outputFilePath, + string? language = null, + string? voice = null, + string? format = null, + string? endpointId = null, + RetryPolicyOptions? retryPolicy = null) + { + return await _speechSynthesizer.SynthesizeToFileAsync( + endpoint, + text, + outputFilePath, + language, + voice, + format, + endpointId, + retryPolicy); + } } diff --git a/tools/Azure.Mcp.Tools.Speech/src/Services/Synthesizers/IRealtimeTtsSynthesizer.cs b/tools/Azure.Mcp.Tools.Speech/src/Services/Synthesizers/IRealtimeTtsSynthesizer.cs new file mode 100644 index 000000000..ea04476b6 --- /dev/null +++ b/tools/Azure.Mcp.Tools.Speech/src/Services/Synthesizers/IRealtimeTtsSynthesizer.cs @@ -0,0 +1,35 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using Azure.Mcp.Core.Options; +using Azure.Mcp.Tools.Speech.Models; + +namespace Azure.Mcp.Tools.Speech.Services.Synthesizers; + +/// +/// Interface for speech synthesis services. +/// +public interface IRealtimeTtsSynthesizer +{ + /// + /// Synthesizes speech from text and saves it to an audio file. + /// + /// Azure AI Services endpoint + /// The text to convert to speech + /// Path where the audio file will be saved + /// Language for synthesis (default: en-US) + /// Voice name to use (e.g., en-US-JennyNeural) + /// Output audio format (default: Riff24Khz16BitMonoPcm) + /// Optional endpoint ID for custom voice model + /// Optional retry policy for resilience + /// Synthesis result with file information + Task SynthesizeToFileAsync( + string endpoint, + string text, + string outputFilePath, + string? language = null, + string? voice = null, + string? format = null, + string? endpointId = null, + RetryPolicyOptions? retryPolicy = null); +} diff --git a/tools/Azure.Mcp.Tools.Speech/src/Services/Synthesizers/RealtimeTtsSynthesizer.cs b/tools/Azure.Mcp.Tools.Speech/src/Services/Synthesizers/RealtimeTtsSynthesizer.cs new file mode 100644 index 000000000..7ead0cc24 --- /dev/null +++ b/tools/Azure.Mcp.Tools.Speech/src/Services/Synthesizers/RealtimeTtsSynthesizer.cs @@ -0,0 +1,274 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using Azure.Core; +using Azure.Mcp.Core.Options; +using Azure.Mcp.Core.Services.Azure; +using Azure.Mcp.Core.Services.Azure.Tenant; +using Azure.Mcp.Tools.Speech.Models; +using Microsoft.CognitiveServices.Speech; +using Microsoft.CognitiveServices.Speech.Audio; +using Microsoft.Extensions.Logging; + +namespace Azure.Mcp.Tools.Speech.Services.Synthesizers; + +/// +/// Neural speech synthesizer using Azure AI Services Speech SDK. +/// Implements streaming synthesis for efficient memory management with large texts. +/// +public class RealtimeTtsSynthesizer(ITenantService tenantService, ILogger logger) + : BaseAzureService(tenantService), IRealtimeTtsSynthesizer +{ + private readonly ILogger _logger = logger; + + /// + /// Synthesizes speech from text and saves it to an audio file using Azure AI Services Speech. + /// Uses streaming synthesis to handle large texts efficiently and avoid memory issues. + /// + public async Task SynthesizeToFileAsync( + string endpoint, + string text, + string outputFilePath, + string? language = null, + string? voice = null, + string? format = null, + string? endpointId = null, + RetryPolicyOptions? retryPolicy = null) + { + ValidateRequiredParameters((nameof(endpoint), endpoint), (nameof(text), text), (nameof(outputFilePath), outputFilePath)); + + if (string.IsNullOrWhiteSpace(text)) + { + throw new ArgumentException("Text cannot be empty or whitespace.", nameof(text)); + } + + try + { + // Use the reusable streaming synthesis method + var (audioData, actualVoice) = await SynthesizeSpeechToStreamAsync( + endpoint, text, language, voice, format, endpointId); + + // Write the complete audio data to file + await File.WriteAllBytesAsync(outputFilePath, audioData); + + _logger.LogInformation( + "Speech synthesized and saved to file: {OutputFile}, Audio size: {AudioSize} bytes", + outputFilePath, + audioData.Length); + + return new SynthesisResult + { + FilePath = outputFilePath, + AudioSize = audioData.Length, + Format = format ?? "Riff24Khz16BitMonoPcm", + Voice = actualVoice, + Language = language ?? "en-US" + }; + } + catch (Exception ex) + { + _logger.LogError(ex, "Error during speech synthesis."); + + // Clean up partial file on error + if (File.Exists(outputFilePath)) + { + try + { + File.Delete(outputFilePath); + _logger.LogInformation("Cleaned up partial output file after error: {OutputFile}", outputFilePath); + } + catch (Exception cleanupEx) + { + _logger.LogWarning(cleanupEx, "Failed to clean up partial output file: {OutputFile}", outputFilePath); + } + } + + throw; + } + } + + /// + /// Synthesizes speech from text and returns the audio data as a byte array. + /// This method uses push stream to collect audio data during synthesis for efficient memory management. + /// + private async Task<(byte[] AudioData, string Voice)> SynthesizeSpeechToStreamAsync( + string endpoint, + string text, + string? language = null, + string? voice = null, + string? format = null, + string? endpointId = null) + { + // Get Azure AD credential and token + var credential = await GetCredential(); + + // Get access token for Cognitive Services with proper scope + var tokenRequestContext = new TokenRequestContext(["https://cognitiveservices.azure.com/.default"]); + var accessToken = await credential.GetTokenAsync(tokenRequestContext, CancellationToken.None); + + // Configure Speech SDK with endpoint + var config = SpeechConfig.FromEndpoint(new Uri(endpoint)); + + // Set the authorization token + config.AuthorizationToken = accessToken.Token; + + // Set language (default to en-US) + var synthesisLanguage = language ?? "en-US"; + config.SpeechSynthesisLanguage = synthesisLanguage; + + // Set voice if provided + string? actualVoice = voice; + if (!string.IsNullOrEmpty(voice)) + { + config.SpeechSynthesisVoiceName = voice; + } + + // Set output format (default to Riff24Khz16BitMonoPcm) + var outputFormat = ParseOutputFormat(format); + config.SetSpeechSynthesisOutputFormat(outputFormat); + + // Set custom endpoint ID if provided + if (!string.IsNullOrEmpty(endpointId)) + { + config.EndpointId = endpointId; + } + + // Create a memory stream to collect audio data via push stream + var audioStream = new MemoryStream(); + using var pushStream = AudioOutputStream.CreatePushStream(new PushAudioStreamCallback(audioStream, _logger)); + using var audioConfig = AudioConfig.FromStreamOutput(pushStream); + using var synthesizer = new SpeechSynthesizer(config, audioConfig); + + // Track synthesis progress + var taskCompletionSource = new TaskCompletionSource(); + SpeechSynthesisCancellationDetails? cancellationDetails = null; + + // Subscribe to synthesis events + synthesizer.SynthesisStarted += (s, e) => + { + _logger.LogInformation("Speech synthesis started for text length: {Length} characters", text.Length); + }; + + synthesizer.Synthesizing += (s, e) => + { + if (e.Result.AudioData.Length > 0) + { + _logger.LogDebug("Received audio chunk: {ChunkSize} bytes", e.Result.AudioData.Length); + } + }; + + synthesizer.SynthesisCompleted += (s, e) => + { + _logger.LogInformation("Speech synthesis completed"); + taskCompletionSource.TrySetResult(true); + }; + + synthesizer.SynthesisCanceled += (s, e) => + { + var details = SpeechSynthesisCancellationDetails.FromResult(e.Result); + _logger.LogError("Speech synthesis canceled: Reason={Reason}, ErrorCode={ErrorCode}, ErrorDetails={ErrorDetails}", + details.Reason, details.ErrorCode, details.ErrorDetails); + cancellationDetails = details; + taskCompletionSource.TrySetResult(false); + }; + + // Start synthesis + await synthesizer.SpeakTextAsync(text); + + // Wait for synthesis to complete + var success = await taskCompletionSource.Task; + + // Check if synthesis was successful + if (!success && cancellationDetails != null) + { + if (IsSynthesisInvalidEndpointError(cancellationDetails)) + { + throw new InvalidOperationException( + $"Invalid endpoint or connectivity issue. Reason: {cancellationDetails.Reason}, ErrorCode: {cancellationDetails.ErrorCode}, Details: {cancellationDetails.ErrorDetails}"); + } + + throw new InvalidOperationException( + $"Speech synthesis failed: {cancellationDetails.Reason} - {cancellationDetails.ErrorDetails}"); + } + + if (!success) + { + throw new InvalidOperationException("Speech synthesis failed for unknown reason"); + } + + // Get the collected audio data from the stream + var audioData = audioStream.ToArray(); + + _logger.LogInformation( + "Speech synthesized successfully. Total audio length: {AudioLength} bytes", + audioData.Length); + + // Get actual voice used (either specified or default) + if (string.IsNullOrEmpty(actualVoice)) + { + actualVoice = voice ?? "default"; + } + + return (audioData, actualVoice); + } + + /// + /// Push stream callback that writes audio data to a memory stream as it arrives. + /// This allows for efficient collection of audio data during synthesis without blocking. + /// + private sealed class PushAudioStreamCallback(MemoryStream targetStream, ILogger logger) : PushAudioOutputStreamCallback + { + private readonly MemoryStream _targetStream = targetStream; + private readonly ILogger _logger = logger; + + public override uint Write(byte[] dataBuffer) + { + if (dataBuffer != null && dataBuffer.Length > 0) + { + _targetStream.Write(dataBuffer, 0, dataBuffer.Length); + _logger.LogDebug("Wrote {BytesWritten} bytes to audio stream", dataBuffer.Length); + return (uint)dataBuffer.Length; + } + return 0; + } + + public override void Close() + { + _logger.LogDebug("Push stream closed, total bytes collected: {TotalBytes}", _targetStream.Length); + } + } + + /// + /// Determines if the cancellation details indicate an invalid endpoint error for synthesis. + /// + private static bool IsSynthesisInvalidEndpointError(SpeechSynthesisCancellationDetails cancellationDetails) + { + return cancellationDetails.Reason == CancellationReason.Error && + (cancellationDetails.ErrorCode == CancellationErrorCode.ConnectionFailure || + cancellationDetails.ErrorCode == CancellationErrorCode.AuthenticationFailure || + cancellationDetails.ErrorCode == CancellationErrorCode.Forbidden || + cancellationDetails.ErrorDetails?.Contains("endpoint", StringComparison.OrdinalIgnoreCase) == true || + cancellationDetails.ErrorDetails?.Contains("connection", StringComparison.OrdinalIgnoreCase) == true || + cancellationDetails.ErrorDetails?.Contains("network", StringComparison.OrdinalIgnoreCase) == true); + } + + /// + /// Parses the output format string to SpeechSynthesisOutputFormat enum. + /// + private static SpeechSynthesisOutputFormat ParseOutputFormat(string? format) + { + if (string.IsNullOrEmpty(format)) + { + return SpeechSynthesisOutputFormat.Riff24Khz16BitMonoPcm; + } + + // Try to parse the format string directly to enum + if (Enum.TryParse(format, true, out var parsedFormat)) + { + return parsedFormat; + } + + // If parsing fails, default to Riff24Khz16BitMonoPcm + return SpeechSynthesisOutputFormat.Riff24Khz16BitMonoPcm; + } +} diff --git a/tools/Azure.Mcp.Tools.Speech/src/SpeechSetup.cs b/tools/Azure.Mcp.Tools.Speech/src/SpeechSetup.cs index c1f3737a5..754d49eb7 100644 --- a/tools/Azure.Mcp.Tools.Speech/src/SpeechSetup.cs +++ b/tools/Azure.Mcp.Tools.Speech/src/SpeechSetup.cs @@ -4,8 +4,10 @@ using Azure.Mcp.Core.Areas; using Azure.Mcp.Core.Commands; using Azure.Mcp.Tools.Speech.Commands.Stt; +using Azure.Mcp.Tools.Speech.Commands.Tts; using Azure.Mcp.Tools.Speech.Services; using Azure.Mcp.Tools.Speech.Services.Recognizers; +using Azure.Mcp.Tools.Speech.Services.Synthesizers; using Microsoft.Extensions.DependencyInjection; namespace Azure.Mcp.Tools.Speech; @@ -18,11 +20,19 @@ public class SpeechSetup : IAreaSetup public void ConfigureServices(IServiceCollection services) { - // New recognizer-based architecture + // New recognizer-based architecture for STT services.AddSingleton(); services.AddSingleton(); + + // New synthesizer-based architecture for TTS + services.AddSingleton(); + + // Orchestration service services.AddSingleton(); + + // Commands services.AddSingleton(); + services.AddSingleton(); } public CommandGroup RegisterCommands(IServiceProvider serviceProvider) @@ -48,6 +58,16 @@ Services Speech endpoints and will only access speech resources accessible to th stt.AddCommand(sttRecognize.Name, sttRecognize); speech.AddSubGroup(stt); + + var tts = new CommandGroup( + name: "tts", + description: "Text-to-speech operations - Commands for converting text to spoken audio using Azure AI Services Speech synthesis."); + + var ttsSynthesize = serviceProvider.GetRequiredService(); + tts.AddCommand(ttsSynthesize.Name, ttsSynthesize); + + speech.AddSubGroup(tts); + return speech; } } diff --git a/tools/Azure.Mcp.Tools.Speech/tests/Azure.Mcp.Tools.Speech.LiveTests/SpeechCommandTests.cs b/tools/Azure.Mcp.Tools.Speech/tests/Azure.Mcp.Tools.Speech.LiveTests/SpeechCommandTests.cs index faf4984b8..51855aa3a 100644 --- a/tools/Azure.Mcp.Tools.Speech/tests/Azure.Mcp.Tools.Speech.LiveTests/SpeechCommandTests.cs +++ b/tools/Azure.Mcp.Tools.Speech/tests/Azure.Mcp.Tools.Speech.LiveTests/SpeechCommandTests.cs @@ -2,6 +2,7 @@ // Licensed under the MIT License. using System.Text.Json; +using Azure.Mcp.Tests; using Azure.Mcp.Tests.Client; using Azure.Mcp.Tools.Speech.Models; using Azure.Mcp.Tools.Speech.Models.Realtime; @@ -413,12 +414,12 @@ public async Task SpeechToText_WithBrokenFile_ShouldHandleGracefully() var resultObject = jsonResult.RootElement; // Validate Error message for corrupted file - Assert.True(resultObject.TryGetProperty("message", out var messageProperty)); + var messageProperty = resultObject.AssertProperty("message"); var message = messageProperty.GetString() ?? ""; Assert.True(message.Contains("The audio file appears to be empty or corrupted. Please provide a valid audio file.", StringComparison.OrdinalIgnoreCase)); // Validate exception type - Assert.True(resultObject.TryGetProperty("type", out var exceptionTypeProperty)); + var exceptionTypeProperty = resultObject.AssertProperty("type"); var exceptionType = exceptionTypeProperty.GetString() ?? ""; Assert.True(exceptionType.Contains("InvalidOperationException", StringComparison.OrdinalIgnoreCase)); } @@ -506,19 +507,289 @@ public async Task SpeechToText_RecognizeCompressedAudioWithRealtimeTranscription var resultObject = jsonResult.RootElement; // Validate Error message for corrupted file - Assert.True(resultObject.TryGetProperty("message", out var messageProperty)); + var messageProperty = resultObject.AssertProperty("message"); var message = messageProperty.GetString() ?? ""; Assert.True(message.Contains("Cannot process compressed audio file", StringComparison.OrdinalIgnoreCase)); Assert.True(message.Contains("because GStreamer is not properly installed or configured.", StringComparison.OrdinalIgnoreCase)); // Validate exception type - Assert.True(resultObject.TryGetProperty("type", out var exceptionTypeProperty)); + var exceptionTypeProperty = resultObject.AssertProperty("type"); var exceptionType = exceptionTypeProperty.GetString() ?? ""; Assert.True(exceptionType.Contains("InvalidOperationException", StringComparison.OrdinalIgnoreCase)); } #endregion + #region TTS Synthesize Tests + + [Fact] + public async Task Should_synthesize_speech_to_file_with_text() + { + // Test basic TTS synthesis with text input + var aiServicesEndpoint = $"https://{Settings.ResourceBaseName}.cognitiveservices.azure.com/"; + var outputFile = Path.Combine(Path.GetTempPath(), $"tts-test-{Guid.NewGuid()}.wav"); + + try + { + var result = await CallToolAsync( + "speech_tts_synthesize", + new() + { + { "subscription", Settings.SubscriptionId }, + { "endpoint", aiServicesEndpoint }, + { "text", "Hello, this is a test of text to speech synthesis." }, + { "outputAudio", outputFile }, + { "language", "en-US" } + }); + + // Verify successful response + Assert.NotNull(result); + var resultText = result.ToString(); + Assert.NotNull(resultText); + + // Parse and validate the JSON result + var jsonResult = JsonDocument.Parse(resultText); + var resultObject = jsonResult.RootElement; + var resultProperty = resultObject.AssertProperty("result"); + + // Verify file path + var filePathProperty = resultProperty.AssertProperty("filePath"); + Assert.Equal(outputFile, filePathProperty.GetString()); + + var audioLengthProperty = resultProperty.AssertProperty("audioSize"); + Assert.True(audioLengthProperty.GetInt64() > 0); + + // Verify the output file was created and has content + Assert.True(File.Exists(outputFile), $"Output file not created at: {outputFile}"); + var fileInfo = new FileInfo(outputFile); + Assert.True(fileInfo.Length > 0, "Output file should not be empty"); + } + finally + { + // Clean up + if (File.Exists(outputFile)) + { + File.Delete(outputFile); + } + } + } + + [Theory] + [InlineData("en-US", "en-US-JennyNeural")] + [InlineData("zh-CN", "zh-CN-XiaoxiaoNeural")] + [InlineData("ja-JP", "ja-JP-NanamiNeural")] + public async Task Should_synthesize_speech_with_different_voices(string language, string voice) + { + // Test TTS synthesis with different language/voice combinations + var aiServicesEndpoint = $"https://{Settings.ResourceBaseName}.cognitiveservices.azure.com/"; + var outputFile = Path.Combine(Path.GetTempPath(), $"tts-test-{language}-{Guid.NewGuid()}.wav"); + + try + { + var result = await CallToolAsync( + "speech_tts_synthesize", + new() + { + { "subscription", Settings.SubscriptionId }, + { "endpoint", aiServicesEndpoint }, + { "text", "Hello world" }, + { "outputAudio", outputFile }, + { "language", language }, + { "voice", voice } + }); + + Assert.NotNull(result); + var resultText = result.ToString(); + Assert.NotNull(resultText); + + var jsonResult = JsonDocument.Parse(resultText); + var resultObject = jsonResult.RootElement; + var resultProperty = resultObject.AssertProperty("result"); + + // Verify voice was used + var voiceProperty = resultProperty.AssertProperty("voice"); + Assert.Equal(voice, voiceProperty.GetString()); + + // Verify language + var languageProperty = resultProperty.AssertProperty("language"); + Assert.Equal(language, languageProperty.GetString()); + + // Verify file exists + Assert.True(File.Exists(outputFile)); + } + finally + { + if (File.Exists(outputFile)) + { + File.Delete(outputFile); + } + } + } + + [Theory] + [InlineData("Riff8Khz16BitMonoPcm")] + [InlineData("Riff24Khz16BitMonoPcm")] + [InlineData("Audio16Khz32KBitRateMonoMp3")] + public async Task Should_synthesize_speech_with_different_formats(string format) + { + // Test TTS synthesis with different audio formats + var aiServicesEndpoint = $"https://{Settings.ResourceBaseName}.cognitiveservices.azure.com/"; + var extension = format.Contains("Mp3") ? ".mp3" : ".wav"; + var outputFile = Path.Combine(Path.GetTempPath(), $"tts-test-{format}-{Guid.NewGuid()}{extension}"); + + try + { + var result = await CallToolAsync( + "speech_tts_synthesize", + new() + { + { "subscription", Settings.SubscriptionId }, + { "endpoint", aiServicesEndpoint }, + { "text", "Testing different audio formats" }, + { "outputAudio", outputFile }, + { "language", "en-US" }, + { "format", format } + }); + + Assert.NotNull(result); + var resultText = result.ToString(); + Assert.NotNull(resultText); + + var jsonResult = JsonDocument.Parse(resultText); + var resultObject = jsonResult.RootElement; + var resultProperty = resultObject.AssertProperty("result"); + + // Verify format + var formatProperty = resultProperty.AssertProperty("format"); + Assert.Equal(format, formatProperty.GetString()); + + // Verify file exists and has content + Assert.True(File.Exists(outputFile)); + var fileInfo = new FileInfo(outputFile); + Assert.True(fileInfo.Length > 0); + } + finally + { + if (File.Exists(outputFile)) + { + File.Delete(outputFile); + } + } + } + + [Fact] + public async Task Should_handle_invalid_text_input() + { + // Test error handling for empty text + var aiServicesEndpoint = $"https://{Settings.ResourceBaseName}.cognitiveservices.azure.com/"; + var outputFile = Path.Combine(Path.GetTempPath(), $"tts-test-invalid-{Guid.NewGuid()}.wav"); + + try + { + var result = await CallToolAsync( + "speech_tts_synthesize", + new() + { + { "subscription", Settings.SubscriptionId }, + { "endpoint", aiServicesEndpoint }, + { "text", "" }, // Empty text should fail validation + { "outputAudio", outputFile }, + { "language", "en-US" } + }); + + // Should return error response + Assert.Null(result); + } + finally + { + if (File.Exists(outputFile)) + { + File.Delete(outputFile); + } + } + } + + [Fact] + public async Task Should_handle_invalid_language_format() + { + // Test error handling for invalid language format + var aiServicesEndpoint = $"https://{Settings.ResourceBaseName}.cognitiveservices.azure.com/"; + var outputFile = Path.Combine(Path.GetTempPath(), $"tts-test-invalid-lang-{Guid.NewGuid()}.wav"); + + try + { + var result = await CallToolAsync( + "speech_tts_synthesize", + new() + { + { "subscription", Settings.SubscriptionId }, + { "endpoint", aiServicesEndpoint }, + { "text", "Hello world" }, + { "outputAudio", outputFile }, + { "language", "invalid-format" } // Invalid language format + }); + + // Should return error response + Assert.Null(result); + } + finally + { + if (File.Exists(outputFile)) + { + File.Delete(outputFile); + } + } + } + + [Fact] + public async Task Should_handle_large_text_input() + { + // Test TTS with larger text to verify streaming works correctly + var aiServicesEndpoint = $"https://{Settings.ResourceBaseName}.cognitiveservices.azure.com/"; + var outputFile = Path.Combine(Path.GetTempPath(), $"tts-test-large-{Guid.NewGuid()}.wav"); + + // Create a longer text (around 1000 words) + var largeText = string.Join(" ", Enumerable.Repeat( + "This is a test of text to speech synthesis with a longer input to verify that streaming works correctly.", + 50)); + + try + { + var result = await CallToolAsync( + "speech_tts_synthesize", + new() + { + { "subscription", Settings.SubscriptionId }, + { "endpoint", aiServicesEndpoint }, + { "text", largeText }, + { "outputAudio", outputFile }, + { "language", "en-US" } + }); + + Assert.NotNull(result); + var resultText = result.ToString(); + Assert.NotNull(resultText); + + var jsonResult = JsonDocument.Parse(resultText); + var resultObject = jsonResult.RootElement; + var resultProperty = resultObject.AssertProperty("result"); + + // Verify file exists and is significantly larger than a short phrase + Assert.True(File.Exists(outputFile)); + var fileInfo = new FileInfo(outputFile); + Assert.True(fileInfo.Length > 50000, "Large text should produce a substantial audio file"); + } + finally + { + if (File.Exists(outputFile)) + { + File.Delete(outputFile); + } + } + } + + #endregion + /// /// Create a WAV file with given duration (seconds). /// If durationSeconds = 0, generates an empty WAV file with header only. diff --git a/tools/Azure.Mcp.Tools.Speech/tests/Azure.Mcp.Tools.Speech.UnitTests/Services/SpeechServiceTests.cs b/tools/Azure.Mcp.Tools.Speech/tests/Azure.Mcp.Tools.Speech.UnitTests/Services/SpeechServiceTests.cs index 7386c90ba..1c9b41363 100644 --- a/tools/Azure.Mcp.Tools.Speech/tests/Azure.Mcp.Tools.Speech.UnitTests/Services/SpeechServiceTests.cs +++ b/tools/Azure.Mcp.Tools.Speech/tests/Azure.Mcp.Tools.Speech.UnitTests/Services/SpeechServiceTests.cs @@ -4,6 +4,7 @@ using Azure.Mcp.Core.Services.Azure.Tenant; using Azure.Mcp.Tools.Speech.Services; using Azure.Mcp.Tools.Speech.Services.Recognizers; +using Azure.Mcp.Tools.Speech.Services.Synthesizers; using Microsoft.Extensions.Logging; using NSubstitute; using Xunit; @@ -16,6 +17,7 @@ public class SpeechServiceTests private readonly ILogger _logger; private readonly IFastTranscriptionRecognizer _fastTranscriptionRecognizer; private readonly IRealtimeTranscriptionRecognizer _realtimeTranscriptionRecognizer; + private readonly IRealtimeTtsSynthesizer _realtimeTtsSynthesizer; private readonly SpeechService _speechService; public SpeechServiceTests() @@ -24,15 +26,16 @@ public SpeechServiceTests() _logger = Substitute.For>(); _fastTranscriptionRecognizer = Substitute.For(); _realtimeTranscriptionRecognizer = Substitute.For(); + _realtimeTtsSynthesizer = Substitute.For(); - _speechService = new SpeechService(_tenantService, _logger, _fastTranscriptionRecognizer, _realtimeTranscriptionRecognizer); + _speechService = new SpeechService(_tenantService, _logger, _fastTranscriptionRecognizer, _realtimeTranscriptionRecognizer, _realtimeTtsSynthesizer); } [Fact] public void Constructor_WithValidParameters_ShouldCreateInstance() { // Arrange & Act - var service = new SpeechService(_tenantService, _logger, _fastTranscriptionRecognizer, _realtimeTranscriptionRecognizer); + var service = new SpeechService(_tenantService, _logger, _fastTranscriptionRecognizer, _realtimeTranscriptionRecognizer, _realtimeTtsSynthesizer); // Assert Assert.NotNull(service); diff --git a/tools/Azure.Mcp.Tools.Speech/tests/Azure.Mcp.Tools.Speech.UnitTests/Stt/SttRecognizeCommandTests.cs b/tools/Azure.Mcp.Tools.Speech/tests/Azure.Mcp.Tools.Speech.UnitTests/Stt/SttRecognizeCommandTests.cs index 662beabc9..ff56773ce 100644 --- a/tools/Azure.Mcp.Tools.Speech/tests/Azure.Mcp.Tools.Speech.UnitTests/Stt/SttRecognizeCommandTests.cs +++ b/tools/Azure.Mcp.Tools.Speech/tests/Azure.Mcp.Tools.Speech.UnitTests/Stt/SttRecognizeCommandTests.cs @@ -13,6 +13,7 @@ using Azure.Mcp.Tools.Speech.Models.Realtime; using Azure.Mcp.Tools.Speech.Services; using Azure.Mcp.Tools.Speech.Services.Recognizers; +using Azure.Mcp.Tools.Speech.Services.Synthesizers; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using NSubstitute; @@ -27,6 +28,7 @@ public class SttRecognizeCommandTests : IDisposable private readonly ISpeechService _speechService; private readonly IFastTranscriptionRecognizer _fastTranscriptionRecognizer; private readonly IRealtimeTranscriptionRecognizer _realtimeTranscriptionRecognizer; + private readonly IRealtimeTtsSynthesizer _realtimeTtsSynthesizer; private readonly ITenantService _tenantService; private readonly ILogger _logger; private readonly ILogger _speechServiceLogger; @@ -42,12 +44,13 @@ public SttRecognizeCommandTests() // Mock the recognizers and their dependencies _fastTranscriptionRecognizer = Substitute.For(); _realtimeTranscriptionRecognizer = Substitute.For(); + _realtimeTtsSynthesizer = Substitute.For(); _tenantService = Substitute.For(); _logger = Substitute.For>(); _speechServiceLogger = Substitute.For>(); // Create real SpeechService with mocked dependencies - _speechService = new SpeechService(_tenantService, _speechServiceLogger, _fastTranscriptionRecognizer, _realtimeTranscriptionRecognizer); + _speechService = new SpeechService(_tenantService, _speechServiceLogger, _fastTranscriptionRecognizer, _realtimeTranscriptionRecognizer, _realtimeTtsSynthesizer); var collection = new ServiceCollection().AddSingleton(_speechService); diff --git a/tools/Azure.Mcp.Tools.Speech/tests/Azure.Mcp.Tools.Speech.UnitTests/Tts/TtsSynthesizeCommandTests.cs b/tools/Azure.Mcp.Tools.Speech/tests/Azure.Mcp.Tools.Speech.UnitTests/Tts/TtsSynthesizeCommandTests.cs new file mode 100644 index 000000000..a74810645 --- /dev/null +++ b/tools/Azure.Mcp.Tools.Speech/tests/Azure.Mcp.Tools.Speech.UnitTests/Tts/TtsSynthesizeCommandTests.cs @@ -0,0 +1,259 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using System.CommandLine; +using System.Net; +using System.Text.Json; +using Azure.Mcp.Core.Models.Command; +using Azure.Mcp.Core.Options; +using Azure.Mcp.Tools.Speech.Commands.Tts; +using Azure.Mcp.Tools.Speech.Models; +using Azure.Mcp.Tools.Speech.Services; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using NSubstitute; +using NSubstitute.ExceptionExtensions; +using Xunit; + +namespace Azure.Mcp.Tools.Speech.UnitTests.Tts; + +public class TtsSynthesizeCommandTests +{ + private readonly IServiceProvider _serviceProvider; + private readonly ISpeechService _speechService; + private readonly ILogger _logger; + private readonly TtsSynthesizeCommand _command; + private readonly CommandContext _context; + private readonly Command _commandDefinition; + private readonly string _knownEndpoint = "https://eastus.cognitiveservices.azure.com/"; + private readonly string _knownSubscription = "sub123"; + + public TtsSynthesizeCommandTests() + { + _speechService = Substitute.For(); + _logger = Substitute.For>(); + + var collection = new ServiceCollection().AddSingleton(_speechService); + + _serviceProvider = collection.BuildServiceProvider(); + _command = new(_logger); + _context = new(_serviceProvider); + _commandDefinition = _command.GetCommand(); + } + + [Theory] + [InlineData("", false, "Missing Required options: --endpoint, --text, --outputAudio")] + [InlineData("--subscription sub123", false, "Missing Required options: --endpoint, --text, --outputAudio")] + [InlineData("--subscription sub123 --endpoint https://test.cognitiveservices.azure.com/", false, "Missing Required options: --text, --outputAudio")] + [InlineData("--subscription sub123 --endpoint https://test.cognitiveservices.azure.com/ --text Hello", false, "Missing Required options: --outputAudio")] + [InlineData("--subscription sub123 --endpoint https://test.cognitiveservices.azure.com/ --text Hello --outputAudio output.txt", false, "Unsupported output file format")] + [InlineData("--subscription sub123 --endpoint https://test.cognitiveservices.azure.com/ --text Hello --outputAudio output.wav --language invalid", false, "Language must be in format 'xx-XX'")] + public async Task ExecuteAsync_ValidatesInput(string args, bool shouldSucceed, string expectedError) + { + var parseResult = _commandDefinition.Parse(args.Split(' ', StringSplitOptions.RemoveEmptyEntries)); + var response = await _command.ExecuteAsync(_context, parseResult, TestContext.Current.CancellationToken); + + if (shouldSucceed) + { + Assert.Equal(HttpStatusCode.OK, response.Status); + } + else + { + Assert.NotEqual(HttpStatusCode.OK, response.Status); + Assert.Contains(expectedError, response.Message, StringComparison.OrdinalIgnoreCase); + } + } + + [Fact] + public async Task ExecuteAsync_WithValidParameters_ShouldSucceed() + { + // Arrange + var text = "HelloWorld"; + var outputFile = "test-output.wav"; + + var expectedResult = new SynthesisResult + { + FilePath = outputFile, + AudioSize = 48000, + Format = "Riff24Khz16BitMonoPcm", + Voice = "en-US-JennyNeural", + Language = "en-US" + }; + + _speechService.SynthesizeSpeechToFile( + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(expectedResult); + + try + { + // Act + var args = $"--subscription {_knownSubscription} --endpoint {_knownEndpoint} --text {text} --outputAudio {outputFile}"; + var parseResult = _commandDefinition.Parse(args.Split(' ', StringSplitOptions.RemoveEmptyEntries)); + var response = await _command.ExecuteAsync(_context, parseResult, TestContext.Current.CancellationToken); + + // Assert + Assert.Equal(HttpStatusCode.OK, response.Status); + Assert.NotNull(response.Results); + + var result = JsonSerializer.Deserialize( + JsonSerializer.Serialize(response.Results), SpeechJsonContext.Default.TtsSynthesizeCommandResult); + Assert.NotNull(result); + Assert.Equal(outputFile, result.Result.FilePath); + Assert.Equal(48000, result.Result.AudioSize); + } + finally + { + // Clean up + if (File.Exists(outputFile)) + { + File.Delete(outputFile); + } + } + } + + [Fact] + public async Task ExecuteAsync_WithAllOptionalParameters_ShouldPassThemCorrectly() + { + // Arrange + var text = "HolaMundo"; + var outputFile = "test-output-spanish.wav"; + var language = "es-ES"; + var voice = "es-ES-ElviraNeural"; + var format = "Audio16Khz32KBitRateMonoMp3"; + var endpointId = "custom-endpoint-id"; + + var expectedResult = new SynthesisResult + { + FilePath = outputFile, + AudioSize = 32000, + Format = format, + Voice = voice, + Language = language + }; + + _speechService.SynthesizeSpeechToFile( + Arg.Is(_knownEndpoint), + Arg.Is(text), + Arg.Is(outputFile), + Arg.Is(language), + Arg.Is(voice), + Arg.Is(format), + Arg.Is(endpointId), + Arg.Any()) + .Returns(expectedResult); + + try + { + // Act + var args = $"--subscription {_knownSubscription} --endpoint {_knownEndpoint} --text {text} --outputAudio {outputFile} --language {language} --voice {voice} --format {format} --endpointId {endpointId}"; + var parseResult = _commandDefinition.Parse(args.Split(' ', StringSplitOptions.RemoveEmptyEntries)); + var response = await _command.ExecuteAsync(_context, parseResult, TestContext.Current.CancellationToken); + + // Assert + Assert.Equal(HttpStatusCode.OK, response.Status); + + await _speechService.Received(1).SynthesizeSpeechToFile( + _knownEndpoint, + text, + outputFile, + language, + voice, + format, + endpointId, + Arg.Any()); + } + finally + { + // Clean up + if (File.Exists(outputFile)) + { + File.Delete(outputFile); + } + } + } + + [Fact] + public async Task ExecuteAsync_ServiceThrowsException_ShouldHandleGracefully() + { + // Arrange + var text = "HelloWorld"; + var outputFile = "test-output-error.wav"; + + _speechService.SynthesizeSpeechToFile( + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .ThrowsAsync(new InvalidOperationException("Synthesis failed")); + + try + { + // Act + var args = $"--subscription {_knownSubscription} --endpoint {_knownEndpoint} --text {text} --outputAudio {outputFile}"; + var parseResult = _commandDefinition.Parse(args.Split(' ', StringSplitOptions.RemoveEmptyEntries)); + var response = await _command.ExecuteAsync(_context, parseResult, TestContext.Current.CancellationToken); + + // Assert + Assert.Equal(HttpStatusCode.InternalServerError, response.Status); + Assert.Contains("synthesis failed", response.Message.ToLower()); + } + finally + { + // Clean up + if (File.Exists(outputFile)) + { + File.Delete(outputFile); + } + } + } + + [Fact] + public async Task ExecuteAsync_UnauthorizedException_ShouldReturnUnauthorizedStatus() + { + // Arrange + var text = "HelloWorld"; + var outputFile = "test-output-unauth.wav"; + + _speechService.SynthesizeSpeechToFile( + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .ThrowsAsync(new UnauthorizedAccessException("Access denied")); + + try + { + // Act + var args = $"--subscription {_knownSubscription} --endpoint {_knownEndpoint} --text {text} --outputAudio {outputFile}"; + var parseResult = _commandDefinition.Parse(args.Split(' ', StringSplitOptions.RemoveEmptyEntries)); + var response = await _command.ExecuteAsync(_context, parseResult, TestContext.Current.CancellationToken); + + // Assert + Assert.Equal(HttpStatusCode.Unauthorized, response.Status); + } + finally + { + // Clean up + if (File.Exists(outputFile)) + { + File.Delete(outputFile); + } + } + } +} +