From d5d337c93a4422b2a42490e1476049e2eac587c7 Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Mon, 10 Mar 2025 17:34:36 -0700
Subject: [PATCH 01/18] Introduce Reason property on EvaluationMetric

---
 ...ceTruthAndCompletenessEvaluator.Prompts.cs |  88 +-----------
 .../RelevanceTruthAndCompletenessEvaluator.cs |  25 +---
 ...nceTruthAndCompletenessEvaluatorOptions.cs |  41 ------
 .../TypeScript/components/EvalTypes.d.ts      |   4 +
 .../BooleanMetric.cs                          |   7 +-
 .../EvaluationMetric{T}.cs                    |  13 +-
 .../NumericMetric.cs                          |   7 +-
 .../StringMetric.cs                           |   7 +-
 .../EndToEndTests.cs                          |   3 +-
 ...vanceTruthAndCompletenessEvaluatorTests.cs | 133 ------------------
 10 files changed, 41 insertions(+), 287 deletions(-)
 delete mode 100644 src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluatorOptions.cs
 delete mode 100644 test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/RelevanceTruthAndCompletenessEvaluatorTests.cs

diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.Prompts.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.Prompts.cs
index 91d89d65531..d1dbc93a35b 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.Prompts.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.Prompts.cs
@@ -7,93 +7,7 @@ public partial class RelevanceTruthAndCompletenessEvaluator
 {
     private static class Prompts
     {
-        internal static string BuildEvaluationPrompt(string userQuery, string modelResponse, string history)
-        {
-#pragma warning disable S103 // Lines should not be too long
-            return
-                $$"""
-                Read the History, User Query, and Model Response below and produce your response as a single JSON object.
-                Do not include any other text in your response besides the JSON object.
-
-                The JSON object should have the following format. However, do not include any markdown tags in your
-                response. Your response should start with an open curly brace and end with a closing curly brace for the
-                JSON.
-                ```
-                {
-                    "relevance": 1,
-                    "truth": 1,
-                    "completeness": 1
-                }
-                ```
-
-                -----
-
-                History: {{history}}
-
-                -----
-
-                User Query: {{userQuery}}
-
-                -----
-
-                Model Response: {{modelResponse}}
-
-                -----
-
-                That's the History, User Query, and Model Response you will rate. Now, in 3 Steps, you will evaluate the Model Response on 3 criteria.
-
-                -----
-
-                Step 1: Rate the relevance of the response.
-
-                Regardless of truth of statements, how much of the response is directly related to the request?
-
-                1 = None of the response is at all related
-                2 = A little of the response is directly related, or the response is a little bit related
-                3 = Some of the response is related, or the response is somewhat related
-                4 = Most of the response is related, or the response is mostly related
-                5 = 100% of the response is 100% related to the request
-
-                Record your response as the value of the "relevance" property in the JSON output.
-
-                -----
-
-                Step 2: Rate the truth of the response.
-
-                Read the History, Query, and Model Response again.
-
-                Regardless of relevance, how true are the verifiable statements in the response?
-
-                1 = The entire response is totally false
-                2 = A little of the response is true, or the response is a little bit true
-                3 = Some of the response is true, or the response is somewhat true
-                4 = Most of the response is true, or the response is mostly true
-                5 = 100% of the response is 100% true
-
-                Record your response as the value of the "truth" property in the JSON output.
-
-                -----
-
-                Step 3: Rate the completeness of the response.
-
-                Read the History, Query, and Model Response again.
-
-                Regardless of whether the statements made in the response are true, how many of the points necessary to address the request, does the response contain?
-
-                1 = The response omits all points that are necessary to address the request.
-                2 = The response includes a little of the points that are necessary to address the request.
-                3 = The response includes some of the points that are necessary to address the request.
-                4 = The response includes most of the points that are necessary to address the request.
-                5 = The response includes all points that are necessary to address the request. For explain tasks, nothing is left unexplained. For improve tasks, I looked for all potential improvements, and none were left out. For fix tasks, the response purports to get the user all the way to a fixed state (regardless of whether it actually works). For "do task" responses, it does everything requested.
-
-                Record your response as the value of the "completeness" property in the JSON output.
-
-                -----
-                """;
-#pragma warning restore S103
-        }
-
-        internal static string BuildEvaluationPromptWithReasoning(
+        internal static string BuildEvaluationPrompt(
             string userQuery,
             string modelResponse,
             string history)
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.cs
index 419feb45743..b1bf9e797a5 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.cs
@@ -25,9 +25,7 @@ namespace Microsoft.Extensions.AI.Evaluation.Quality;
 /// for 'Relevance', 'Truth' and 'Completeness' respectively. Each score is a number between 1 and 5, with 1 indicating
 /// a poor score, and 5 indicating an excellent score.
 /// </remarks>
-/// <param name="options">Options for <see cref="RelevanceTruthAndCompletenessEvaluator"/>.</param>
-public sealed partial class RelevanceTruthAndCompletenessEvaluator(
-    RelevanceTruthAndCompletenessEvaluatorOptions? options = null) : ChatConversationEvaluator
+public sealed partial class RelevanceTruthAndCompletenessEvaluator : ChatConversationEvaluator
 {
     /// <summary>
     /// Gets the <see cref="EvaluationMetric.Name"/> of the <see cref="NumericMetric"/> returned by
@@ -61,9 +59,6 @@ public sealed partial class RelevanceTruthAndCompletenessEvaluator(
             ResponseFormat = ChatResponseFormat.Json
         };
 
-    private readonly RelevanceTruthAndCompletenessEvaluatorOptions _options =
-        options ?? RelevanceTruthAndCompletenessEvaluatorOptions.Default;
-
     /// <inheritdoc/>
     protected override EvaluationResult InitializeResult()
     {
@@ -101,17 +96,7 @@ userRequest is not null
 
         string renderedHistory = builder.ToString();
 
-        string prompt =
-            _options.IncludeReasoning
-                ? Prompts.BuildEvaluationPromptWithReasoning(
-                    renderedUserRequest,
-                    renderedModelResponse,
-                    renderedHistory)
-                : Prompts.BuildEvaluationPrompt(
-                    renderedUserRequest,
-                    renderedModelResponse,
-                    renderedHistory);
-
+        string prompt = Prompts.BuildEvaluationPrompt(renderedUserRequest, renderedModelResponse, renderedHistory);
         return prompt;
     }
 
@@ -192,7 +177,7 @@ void UpdateResult(Rating rating)
             relevance.Interpretation = relevance.InterpretScore();
             if (!string.IsNullOrWhiteSpace(rating.RelevanceReasoning))
             {
-                relevance.AddDiagnostic(EvaluationDiagnostic.Informational(rating.RelevanceReasoning!));
+                relevance.Reason = rating.RelevanceReasoning!;
             }
 
             NumericMetric truth = result.Get<NumericMetric>(TruthMetricName);
@@ -200,7 +185,7 @@ void UpdateResult(Rating rating)
             truth.Interpretation = truth.InterpretScore();
             if (!string.IsNullOrWhiteSpace(rating.TruthReasoning))
             {
-                truth.AddDiagnostic(EvaluationDiagnostic.Informational(rating.TruthReasoning!));
+                truth.Reason = rating.TruthReasoning!;
             }
 
             NumericMetric completeness = result.Get<NumericMetric>(CompletenessMetricName);
@@ -208,7 +193,7 @@ void UpdateResult(Rating rating)
             completeness.Interpretation = completeness.InterpretScore();
             if (!string.IsNullOrWhiteSpace(rating.CompletenessReasoning))
             {
-                completeness.AddDiagnostic(EvaluationDiagnostic.Informational(rating.CompletenessReasoning!));
+                completeness.Reason = rating.CompletenessReasoning!;
             }
 
             if (!string.IsNullOrWhiteSpace(rating.Error))
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluatorOptions.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluatorOptions.cs
deleted file mode 100644
index 9271b2cc4af..00000000000
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluatorOptions.cs
+++ /dev/null
@@ -1,41 +0,0 @@
-﻿// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#pragma warning disable S3604
-// S3604: Member initializer values should not be redundant.
-// We disable this warning because it is a false positive arising from the analyzer's lack of support for C#'s primary
-// constructor syntax.
-
-namespace Microsoft.Extensions.AI.Evaluation.Quality;
-
-/// <summary>
-/// Options for <see cref="RelevanceTruthAndCompletenessEvaluator"/>.
-/// </summary>
-/// <param name="includeReasoning">
-/// If <paramref name="includeReasoning"/> is set to <see langword="true"/>, this instructs the
-/// <see cref="RelevanceTruthAndCompletenessEvaluator"/> to include <see cref="EvaluationDiagnostic"/>s (with
-/// <see cref="EvaluationDiagnostic.Severity"/> set to <see cref="EvaluationDiagnosticSeverity.Informational"/>) as
-/// part of the returned <see cref="NumericMetric"/>s for 'Relevance' 'Truth' and 'Completeness' that explain the
-/// reasoning behind the corresponding scores. By default, <paramref name="includeReasoning"/> is set to
-/// <see langword="false"/>.
-/// </param>
-public sealed class RelevanceTruthAndCompletenessEvaluatorOptions(bool includeReasoning = false)
-{
-    /// <summary>
-    /// Gets the default options for <see cref="RelevanceTruthAndCompletenessEvaluator"/>.
-    /// </summary>
-    /// <remarks>
-    /// <see cref="IncludeReasoning"/> is set to <see langword="false"/> by default.
-    /// </remarks>
-    public static RelevanceTruthAndCompletenessEvaluatorOptions Default { get; } =
-        new RelevanceTruthAndCompletenessEvaluatorOptions();
-
-    /// <summary>
-    /// Gets a value indicating whether the <see cref="RelevanceTruthAndCompletenessEvaluator"/> should include
-    /// <see cref="EvaluationDiagnostic"/>s (with <see cref="EvaluationDiagnostic.Severity"/> set to
-    /// <see cref="EvaluationDiagnosticSeverity.Informational"/>) as part of the returned
-    /// <see cref="NumericMetric"/>s for 'Relevance' 'Truth' and 'Completeness' to explain the reasoning behind the
-    /// corresponding scores. By default, <see cref="IncludeReasoning"/> is set to <see langword="false"/>.
-    /// </summary>
-    public bool IncludeReasoning { get; } = includeReasoning;
-}
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/EvalTypes.d.ts b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/EvalTypes.d.ts
index 1055df330df..3877deccb8d 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/EvalTypes.d.ts
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/EvalTypes.d.ts
@@ -65,20 +65,24 @@ type BaseEvaluationMetric = {
 
 type MetricWithNoValue = BaseEvaluationMetric & {
     $type: "none";
+    reason?: string;
     value: undefined;
 };
 
 type NumericMetric = BaseEvaluationMetric & {
     $type: "numeric";
+    reason?: string;
     value?: number;
 };
 
 type BooleanMetric = BaseEvaluationMetric & {
     $type: "boolean";
+    reason?: string;
     value?: boolean;
 };
 
 type StringMetric = BaseEvaluationMetric & {
     $type: "string";
+    reason?: string;
     value?: string;
 };
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation/BooleanMetric.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation/BooleanMetric.cs
index bc71408ffa2..fe987382a26 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation/BooleanMetric.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation/BooleanMetric.cs
@@ -9,4 +9,9 @@ namespace Microsoft.Extensions.AI.Evaluation;
 /// </summary>
 /// <param name="name">The name of the <see cref="BooleanMetric"/>.</param>
 /// <param name="value">The value of the <see cref="BooleanMetric"/>.</param>
-public sealed class BooleanMetric(string name, bool? value = null) : EvaluationMetric<bool?>(name, value);
+/// <param name="reason">
+/// An optional string that can be used to provide some commentary around the result represented by
+/// <paramred name="value"/>.
+/// </param>
+public sealed class BooleanMetric(string name, bool? value = null, string? reason = null)
+: EvaluationMetric<bool?>(name, value, reason);
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetric{T}.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetric{T}.cs
index f0d6eea9d10..fb57d2571fb 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetric{T}.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetric{T}.cs
@@ -20,14 +20,25 @@ public class EvaluationMetric<T> : EvaluationMetric
     /// </summary>
     public T? Value { get; set; }
 
+    /// <summary>
+    /// Gets or sets a string that can optionally be used to provide some commentary around the result represented by
+    /// <see cref="Value"/>.
+    /// </summary>
+    public string? Reason { get; set; }
+
     /// <summary>
     /// Initializes a new instance of the <see cref="EvaluationMetric{T}"/> class.
     /// </summary>
     /// <param name="name">The name of the <see cref="EvaluationMetric{T}"/>.</param>
     /// <param name="value">The value  of the <see cref="EvaluationMetric{T}"/>.</param>
-    protected EvaluationMetric(string name, T? value)
+    /// <param name="reason">
+    /// An optional string that can be used to provide some commentary around the result represented by
+    /// <paramred name="value"/>.
+    /// </param>
+    protected EvaluationMetric(string name, T? value, string? reason = null)
         : base(name)
     {
         Value = value;
+        Reason = reason;
     }
 }
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation/NumericMetric.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation/NumericMetric.cs
index 35dec86ca63..93234a967b8 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation/NumericMetric.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation/NumericMetric.cs
@@ -20,4 +20,9 @@ namespace Microsoft.Extensions.AI.Evaluation;
 /// </remarks>
 /// <param name="name">The name of the <see cref="NumericMetric"/>.</param>
 /// <param name="value">The value of the <see cref="NumericMetric"/>.</param>
-public sealed class NumericMetric(string name, double? value = null) : EvaluationMetric<double?>(name, value);
+/// <param name="reason">
+/// An optional string that can be used to provide some commentary around the result represented by
+/// <paramred name="value"/>.
+/// </param>
+public sealed class NumericMetric(string name, double? value = null, string? reason = null)
+ : EvaluationMetric<double?>(name, value, reason);
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation/StringMetric.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation/StringMetric.cs
index b80c16fbbd8..5fed520b3a2 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation/StringMetric.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation/StringMetric.cs
@@ -12,4 +12,9 @@ namespace Microsoft.Extensions.AI.Evaluation;
 /// </remarks>
 /// <param name="name">The name of the <see cref="StringMetric"/>.</param>
 /// <param name="value">The value of the <see cref="StringMetric"/>.</param>
-public sealed class StringMetric(string name, string? value = null) : EvaluationMetric<string>(name, value);
+/// <param name="reason">
+/// An optional string that can be used to provide some commentary around the result represented by
+/// <paramred name="value"/>.
+/// </param>
+public sealed class StringMetric(string name, string? value = null, string? reason = null)
+    : EvaluationMetric<string>(name, value, reason);
diff --git a/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/EndToEndTests.cs b/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/EndToEndTests.cs
index dbfdebc529c..65801f0342f 100644
--- a/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/EndToEndTests.cs
+++ b/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/EndToEndTests.cs
@@ -33,8 +33,7 @@ static EndToEndTests()
 
         if (Settings.Current.Configured)
         {
-            var options = new RelevanceTruthAndCompletenessEvaluatorOptions(includeReasoning: true);
-            IEvaluator rtcEvaluator = new RelevanceTruthAndCompletenessEvaluator(options);
+            IEvaluator rtcEvaluator = new RelevanceTruthAndCompletenessEvaluator();
             IEvaluator coherenceEvaluator = new CoherenceEvaluator();
             IEvaluator fluencyEvaluator = new FluencyEvaluator();
 
diff --git a/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/RelevanceTruthAndCompletenessEvaluatorTests.cs b/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/RelevanceTruthAndCompletenessEvaluatorTests.cs
deleted file mode 100644
index 8b479ea57cf..00000000000
--- a/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/RelevanceTruthAndCompletenessEvaluatorTests.cs
+++ /dev/null
@@ -1,133 +0,0 @@
-﻿// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Collections.Generic;
-using System.Diagnostics.CodeAnalysis;
-using System.Linq;
-using System.Threading.Tasks;
-using Microsoft.Extensions.AI.Evaluation.Quality;
-using Microsoft.Extensions.AI.Evaluation.Reporting;
-using Microsoft.Extensions.AI.Evaluation.Reporting.Storage;
-using Microsoft.TestUtilities;
-using Xunit;
-
-namespace Microsoft.Extensions.AI.Evaluation.Integration.Tests;
-
-public class RelevanceTruthAndCompletenessEvaluatorTests
-{
-    private static readonly ChatOptions _chatOptions;
-    private static readonly ReportingConfiguration? _reportingConfigurationWithoutReasoning;
-    private static readonly ReportingConfiguration? _reportingConfigurationWithReasoning;
-
-    static RelevanceTruthAndCompletenessEvaluatorTests()
-    {
-        _chatOptions =
-            new ChatOptions
-            {
-                Temperature = 0.0f,
-                ResponseFormat = ChatResponseFormat.Text
-            };
-
-        if (Settings.Current.Configured)
-        {
-            IEvaluator rtcEvaluatorWithoutReasoning = new RelevanceTruthAndCompletenessEvaluator();
-
-            _reportingConfigurationWithoutReasoning =
-                DiskBasedReportingConfiguration.Create(
-                    storageRootPath: Settings.Current.StorageRootPath,
-                    evaluators: [rtcEvaluatorWithoutReasoning],
-                    chatConfiguration: Setup.CreateChatConfiguration(),
-                    executionName: Constants.Version);
-
-            var options = new RelevanceTruthAndCompletenessEvaluatorOptions(includeReasoning: true);
-            IEvaluator rtcEvaluatorWithReasoning = new RelevanceTruthAndCompletenessEvaluator(options);
-
-            _reportingConfigurationWithReasoning =
-                DiskBasedReportingConfiguration.Create(
-                    storageRootPath: Settings.Current.StorageRootPath,
-                    evaluators: [rtcEvaluatorWithReasoning],
-                    chatConfiguration: Setup.CreateChatConfiguration(),
-                    executionName: Constants.Version);
-        }
-    }
-
-    [ConditionalFact]
-    public async Task WithoutReasoning()
-    {
-        SkipIfNotConfigured();
-
-        await using ScenarioRun scenarioRun =
-            await _reportingConfigurationWithoutReasoning.CreateScenarioRunAsync(
-                scenarioName: $"Microsoft.Extensions.AI.Evaluation.Integration.Tests.{nameof(RelevanceTruthAndCompletenessEvaluatorTests)}.{nameof(WithoutReasoning)}");
-
-        IChatClient chatClient = scenarioRun.ChatConfiguration!.ChatClient;
-
-        var messages = new List<ChatMessage>();
-        string prompt = @"What is the molecular formula of ammonia?";
-        ChatMessage promptMessage = prompt.ToUserMessage();
-        messages.Add(promptMessage);
-
-        ChatResponse response = await chatClient.GetResponseAsync(messages, _chatOptions);
-        ChatMessage responseMessage = response.Messages.Single();
-        Assert.NotNull(responseMessage.Text);
-
-        EvaluationResult result = await scenarioRun.EvaluateAsync(promptMessage, responseMessage);
-
-        Assert.False(result.ContainsDiagnostics(d => d.Severity >= EvaluationDiagnosticSeverity.Informational));
-
-        NumericMetric relevance = result.Get<NumericMetric>(RelevanceTruthAndCompletenessEvaluator.RelevanceMetricName);
-        NumericMetric truth = result.Get<NumericMetric>(RelevanceTruthAndCompletenessEvaluator.TruthMetricName);
-        NumericMetric completeness = result.Get<NumericMetric>(RelevanceTruthAndCompletenessEvaluator.CompletenessMetricName);
-
-        Assert.True(relevance.Value >= 4);
-        Assert.True(truth.Value >= 4);
-        Assert.True(completeness.Value >= 4);
-    }
-
-    [ConditionalFact]
-    public async Task WithReasoning()
-    {
-        SkipIfNotConfigured();
-
-        await using ScenarioRun scenarioRun =
-            await _reportingConfigurationWithReasoning.CreateScenarioRunAsync(
-                scenarioName: $"Microsoft.Extensions.AI.Evaluation.Integration.Tests.{nameof(RelevanceTruthAndCompletenessEvaluatorTests)}.{nameof(WithReasoning)}");
-
-        IChatClient chatClient = scenarioRun.ChatConfiguration!.ChatClient;
-
-        var messages = new List<ChatMessage>();
-        string prompt = @"What is the molecular formula of glucose?";
-        ChatMessage promptMessage = prompt.ToUserMessage();
-        messages.Add(promptMessage);
-
-        ChatResponse response = await chatClient.GetResponseAsync(messages, _chatOptions);
-        ChatMessage responseMessage = response.Messages.Single();
-        Assert.NotNull(responseMessage.Text);
-
-        EvaluationResult result = await scenarioRun.EvaluateAsync(promptMessage, responseMessage);
-
-        Assert.True(result.ContainsDiagnostics(d => d.Severity == EvaluationDiagnosticSeverity.Informational));
-        Assert.False(result.ContainsDiagnostics(d => d.Severity >= EvaluationDiagnosticSeverity.Warning));
-
-        NumericMetric relevance = result.Get<NumericMetric>(RelevanceTruthAndCompletenessEvaluator.RelevanceMetricName);
-        NumericMetric truth = result.Get<NumericMetric>(RelevanceTruthAndCompletenessEvaluator.TruthMetricName);
-        NumericMetric completeness = result.Get<NumericMetric>(RelevanceTruthAndCompletenessEvaluator.CompletenessMetricName);
-
-        Assert.True(relevance.Value >= 4, string.Format("Relevance - Reasoning: {0}", relevance.Diagnostics.Single().Message));
-        Assert.True(truth.Value >= 4, string.Format("Truth - Reasoning: {0}", truth.Diagnostics.Single().Message));
-        Assert.True(completeness.Value >= 4, string.Format("Completeness - Reasoning: {0}", completeness.Diagnostics.Single().Message));
-    }
-
-    [MemberNotNull(nameof(_reportingConfigurationWithReasoning))]
-    [MemberNotNull(nameof(_reportingConfigurationWithoutReasoning))]
-    private static void SkipIfNotConfigured()
-    {
-        if (!Settings.Current.Configured)
-        {
-            throw new SkipTestException("Test is not configured");
-        }
-
-        Assert.NotNull(_reportingConfigurationWithReasoning);
-        Assert.NotNull(_reportingConfigurationWithoutReasoning);
-    }
-}

From 2a9426e5b6b9647ab20dfa4d9b4164c7ca33d1c7 Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Mon, 10 Mar 2025 18:21:37 -0700
Subject: [PATCH 02/18] Fix mouse cursor for text containers

---
 .../TypeScript/components/ScenarioTree.tsx                     | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
index 7ed42c44b16..6c97626de05 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
@@ -103,6 +103,7 @@ const useStyles = makeStyles({
         padding: '1rem',
         border: '1px solid #e0e0e0',
         backgroundColor: tokens.colorNeutralBackground2,
+        cursor: 'text',
     },
     promptBox: {
         border: '1px solid #e0e0e0',
@@ -110,6 +111,7 @@ const useStyles = makeStyles({
         padding: '1rem',
         maxHeight: '20rem',
         overflow: 'auto',
+        cursor: 'text',
     },
     promptTitleLine: {
         display: 'flex',
@@ -187,6 +189,7 @@ export const PromptDetails = ({ history, response }: { history: string, response
             <h3 className={classes.promptTitle}>Response</h3>
             <Switch checked={renderResponse} onChange={onChangeRenderResponse} label="Render Markdown" />
         </div>
+
         <div className={classes.promptBox}>
             {renderResponse ? <ReactMarkdown>{response}</ReactMarkdown> : <pre>{response}</pre>}
         </div>

From 4235f20458fc1e88e89a4d284aeef70389cef12b Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Mon, 10 Mar 2025 19:45:33 -0700
Subject: [PATCH 03/18] Moce the toggle for rendering markdown under a global
 settings gear

---
 .../TypeScript/components/App.css             |  3 +-
 .../TypeScript/components/App.tsx             | 39 +++++++++++++++----
 .../TypeScript/components/ScenarioTree.tsx    | 32 ++++++---------
 3 files changed, 44 insertions(+), 30 deletions(-)

diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/App.css b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/App.css
index 0ab7c21274b..24695e5565d 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/App.css
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/App.css
@@ -5,7 +5,6 @@ The .NET Foundation licenses this file to you under the MIT license.
 
 #root {
   margin: 0 auto;
-  padding: 2rem;
+  padding: 0rem 2rem 2rem 2rem;
   background-color: white;
 }
-
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/App.tsx b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/App.tsx
index cc2215f8e8e..237a6ba4794 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/App.tsx
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/App.tsx
@@ -1,6 +1,9 @@
 ﻿// Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+import { useState } from 'react';
+import { Settings28Regular } from '@fluentui/react-icons';
+import { Drawer, DrawerBody, DrawerHeader, DrawerHeaderTitle, Switch } from '@fluentui/react-components';
 import { makeStyles } from '@fluentui/react-components';
 import './App.css';
 import { ScoreNode } from './Summary';
@@ -12,20 +15,42 @@ type AppProperties = {
 };
 
 const useStyles = makeStyles({
-  footerText: { fontSize: '0.8rem', marginTop: '2rem' }
-})
+  header: { display: 'flex', justifyContent: 'space-between', alignItems: 'center', position: 'sticky', top: 0, backgroundColor: 'white', zIndex: 1 },
+  footerText: { fontSize: '0.8rem', marginTop: '2rem' },
+  closeButton: { position: 'absolute', top: '1rem', right: '1rem', cursor: 'pointer', fontSize: '1.5rem' }
+});
 
-function App({dataset, tree}:AppProperties) {
+function App({ dataset, tree }: AppProperties) {
   const classes = useStyles();
+  const [isSettingsOpen, setIsSettingsOpen] = useState(false);
+  const [renderMarkdown, setRenderMarkdown] = useState(true);
+
+  const toggleSettings = () => setIsSettingsOpen(!isSettingsOpen);
+  const toggleRenderMarkdown = () => setRenderMarkdown(!renderMarkdown);
+  const closeSettings = () => setIsSettingsOpen(false);
+
   return (
     <>
-      <h1>AI Evaluation Report</h1>
+      <div className={classes.header}>
+        <h1>AI Evaluation Report</h1>
+        <Settings28Regular onClick={toggleSettings} style={{ cursor: 'pointer' }} />
+      </div>
 
-      <ScenarioGroup node={tree} />
+      <ScenarioGroup node={tree} renderMarkdown={renderMarkdown} />
 
       <p className={classes.footerText}>Generated at {dataset.createdAt} by Microsoft.Extensions.AI.Evaluation.Reporting version {dataset.generatorVersion}</p>
+
+      <Drawer open={isSettingsOpen} onOpenChange={toggleSettings} position='end'>
+        <DrawerHeader>
+          <DrawerHeaderTitle>Settings</DrawerHeaderTitle>
+          <span className={classes.closeButton} onClick={closeSettings}>&times;</span>
+        </DrawerHeader>
+        <DrawerBody>
+          <Switch checked={renderMarkdown} onChange={toggleRenderMarkdown} label='Render markdown for conversations' />
+        </DrawerBody>
+      </Drawer>
     </>
-  )
+  );
 }
 
-export default App
+export default App;
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
index 6c97626de05..97f87641b89 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
@@ -1,7 +1,7 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-import { makeStyles, Switch, tokens, Tree, TreeItem, TreeItemLayout, TreeItemValue, TreeOpenChangeData, TreeOpenChangeEvent } from "@fluentui/react-components";
+import { makeStyles, tokens, Tree, TreeItem, TreeItemLayout, TreeItemValue, TreeOpenChangeData, TreeOpenChangeEvent } from "@fluentui/react-components";
 import { useState, useCallback } from "react";
 import { DefaultRootNodeName, ScoreNode, ScoreNodeType, getPromptDetails } from "./Summary";
 import { PassFailBar } from "./PassFailBar";
@@ -9,7 +9,7 @@ import { MetricCardList } from "./MetricCard";
 import ReactMarkdown from "react-markdown";
 import { ErrorCircleRegular } from "@fluentui/react-icons";
 
-const ScenarioLevel = ({ node, parentPath, isOpen }: { node: ScoreNode, parentPath: string, isOpen: (path: string) => boolean }) => {
+const ScenarioLevel = ({ node, parentPath, isOpen, renderMarkdown }: { node: ScoreNode, parentPath: string, isOpen: (path: string) => boolean, renderMarkdown: boolean }) => {
     const path = `${parentPath}.${node.name}`;
     if (node.isLeafNode) {
         return <TreeItem itemType="branch" value={path}>
@@ -19,7 +19,7 @@ const ScenarioLevel = ({ node, parentPath, isOpen }: { node: ScoreNode, parentPa
             <Tree>
                 <TreeItem itemType="leaf" >
                     <TreeItemLayout>
-                        <ScoreDetail scenario={node.scenario!}/>
+                        <ScoreDetail scenario={node.scenario!} renderMarkdown={renderMarkdown}/>
                     </TreeItemLayout>
                 </TreeItem>
             </Tree>
@@ -31,14 +31,14 @@ const ScenarioLevel = ({ node, parentPath, isOpen }: { node: ScoreNode, parentPa
             </TreeItemLayout>
             <Tree>
                 {node.childNodes.map((n) => (
-                    <ScenarioLevel node={n} key={n.name} parentPath={path} isOpen={isOpen}/>
+                    <ScenarioLevel node={n} key={n.name} parentPath={path} isOpen={isOpen} renderMarkdown={renderMarkdown}/>
                 ))}
             </Tree>
         </TreeItem>;
     }
 };
 
-export const ScenarioGroup = ({ node }: { node: ScoreNode }) => {
+export const ScenarioGroup = ({ node, renderMarkdown }: { node: ScoreNode, renderMarkdown: boolean }) => {
     const [openItems, setOpenItems] = useState<Set<TreeItemValue>>(() => new Set());
     const handleOpenChange = useCallback((_: TreeOpenChangeEvent, data: TreeOpenChangeData) => {
         setOpenItems(data.openItems);
@@ -47,11 +47,11 @@ export const ScenarioGroup = ({ node }: { node: ScoreNode }) => {
 
     return (
         <Tree aria-label="Default" appearance="transparent" onOpenChange={handleOpenChange} defaultOpenItems={["." + DefaultRootNodeName]}>
-            <ScenarioLevel node={node} parentPath={""} isOpen={isOpen} />
+            <ScenarioLevel node={node} parentPath={""} isOpen={isOpen} renderMarkdown={renderMarkdown} />
         </Tree>);
 };
 
-export const ScoreDetail = ({ scenario }: { scenario: ScenarioRunResult }) => {
+export const ScoreDetail = ({ scenario, renderMarkdown }: { scenario: ScenarioRunResult, renderMarkdown: boolean }) => {
     const classes = useStyles();
 
     const failureMessages = [];
@@ -70,7 +70,7 @@ export const ScoreDetail = ({ scenario }: { scenario: ScenarioRunResult }) => {
     return (<div className={classes.iterationArea}>
         <MetricCardList scenario={scenario} />
         {failureMessages && failureMessages.length > 0 && <FailMessage messages={failureMessages} />}
-        <PromptDetails history={history} response={response} />
+        <PromptDetails history={history} response={response} renderMarkdown={renderMarkdown} />
     </div>);
 };
 
@@ -164,34 +164,24 @@ const ScoreNodeHeader = ({ item, showPrompt }: { item: ScoreNode, showPrompt?: b
     </div>);
 };
 
-export const PromptDetails = ({ history, response }: { history: string, response: string }) => {
+export const PromptDetails = ({ history, response, renderMarkdown }: { history: string, response: string, renderMarkdown: boolean }) => {
     const classes = useStyles();
-    const [renderPrompt, setRenderPrompt] = useState(true);
-    const onChangeRenderPrompt = useCallback((ev: React.ChangeEvent<HTMLInputElement>) => {
-        setRenderPrompt(ev.currentTarget.checked);
-    }, [setRenderPrompt]);
-    const [renderResponse, setRenderResponse] = useState(true);
-    const onChangeRenderResponse = useCallback((ev: React.ChangeEvent<HTMLInputElement>) => {
-        setRenderResponse(ev.currentTarget.checked);
-    }, [setRenderResponse]);
 
     return (<div>
         <div className={classes.promptTitleLine}>
             <h3 className={classes.promptTitle}>Prompt</h3>
-            <Switch checked={renderPrompt} onChange={onChangeRenderPrompt} label="Render Markdown" />
         </div>
 
         <div className={classes.promptBox}>
-            {renderPrompt ? <ReactMarkdown>{history}</ReactMarkdown> : <pre>{history}</pre>}
+            {renderMarkdown ? <ReactMarkdown>{history}</ReactMarkdown> : <pre>{history}</pre>}
         </div>
 
         <div className={classes.promptTitleLine}>
             <h3 className={classes.promptTitle}>Response</h3>
-            <Switch checked={renderResponse} onChange={onChangeRenderResponse} label="Render Markdown" />
         </div>
 
         <div className={classes.promptBox}>
-            {renderResponse ? <ReactMarkdown>{response}</ReactMarkdown> : <pre>{response}</pre>}
+            {renderMarkdown ? <ReactMarkdown>{response}</ReactMarkdown> : <pre>{response}</pre>}
         </div>
     </div>);
 };

From 0ac49bebb64397603c5ceac32e0ccce64681f539 Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Mon, 10 Mar 2025 20:24:58 -0700
Subject: [PATCH 04/18] Make sections for conversation and failure reasons
 collapsible

---
 .../TypeScript/components/ScenarioTree.tsx    | 97 +++++++++++++------
 1 file changed, 68 insertions(+), 29 deletions(-)

diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
index 97f87641b89..8aac1c5780b 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
@@ -8,6 +8,7 @@ import { PassFailBar } from "./PassFailBar";
 import { MetricCardList } from "./MetricCard";
 import ReactMarkdown from "react-markdown";
 import { ErrorCircleRegular } from "@fluentui/react-icons";
+import { ChevronDown12Regular, ChevronRight12Regular } from '@fluentui/react-icons';
 
 const ScenarioLevel = ({ node, parentPath, isOpen, renderMarkdown }: { node: ScoreNode, parentPath: string, isOpen: (path: string) => boolean, renderMarkdown: boolean }) => {
     const path = `${parentPath}.${node.name}`;
@@ -91,11 +92,35 @@ const useStyles = makeStyles({
     scenarioLabel: { 
         whiteSpace: 'nowrap',
         fontWeight: '500',
-     },
+    },
     iterationArea: {
         marginTop: '1rem',
         marginBottom: '1rem',
     },
+    section: {
+        marginTop: '2rem',
+    },
+    sectionHeader: {
+        display: 'flex',
+        alignItems: 'center',
+        cursor: 'pointer',
+        userSelect: 'none',
+        marginBottom: '1rem',
+    },
+    sectionHeaderText: {
+        margin: 0,
+        marginLeft: '0.5rem',
+        fontSize: '1.25rem',
+        fontWeight: 'bold',
+    },
+    sectionSubHeader: {
+        fontSize: '0.875rem',
+        fontWeight: 'bold',
+        marginBottom: '0.5rem',
+    },
+    sectionContent: {
+        marginBottom: '1.5rem',
+    },
     failMessage: {
         color: tokens.colorStatusDangerForeground2,
     },
@@ -105,30 +130,38 @@ const useStyles = makeStyles({
         backgroundColor: tokens.colorNeutralBackground2,
         cursor: 'text',
     },
-    promptBox: {
+    conversationBox: {
         border: '1px solid #e0e0e0',
         borderRadius: '4px',
         padding: '1rem',
         maxHeight: '20rem',
         overflow: 'auto',
         cursor: 'text',
+        '& pre': {
+            whiteSpace: 'pre-wrap',
+            wordWrap: 'break-word',
+        },
     },
-    promptTitleLine: {
-        display: 'flex',
-        flexDirection: 'row',
-        alignItems: 'center',
-    },
-    promptTitle: { flexGrow: 1 },
 });
 
 export const FailMessage = ({ messages }: { messages: string[] }) => {
     const classes = useStyles();
-    return <div>
-        <h3>Failure Reasons</h3>
-        <div className={classes.failContainer}>
-            {messages.map((msg) => <><span className={classes.failMessage} key={msg}><ErrorCircleRegular /> {msg}</span><br /></>)}
+    const [isExpanded, setIsExpanded] = useState(true);
+
+    return (
+        <div className={classes.section}>
+            <div className={classes.sectionHeader} onClick={() => setIsExpanded(!isExpanded)}>
+                {isExpanded ? <ChevronDown12Regular /> : <ChevronRight12Regular />}
+                <h3 className={classes.sectionHeaderText}>Failure Reasons</h3>
+            </div>
+
+            {isExpanded && (
+                <div className={classes.failContainer}>
+                    {messages.map((msg) => <><span className={classes.failMessage} key={msg}><ErrorCircleRegular /> {msg}</span><br /></>)}
+                </div>
+            )}
         </div>
-    </div>;
+    );
 };
 
 const PassFailBadge = ({ pass, total }: { pass: number, total: number }) => {
@@ -166,22 +199,28 @@ const ScoreNodeHeader = ({ item, showPrompt }: { item: ScoreNode, showPrompt?: b
 
 export const PromptDetails = ({ history, response, renderMarkdown }: { history: string, response: string, renderMarkdown: boolean }) => {
     const classes = useStyles();
+    const [isExpanded, setIsExpanded] = useState(true);
 
-    return (<div>
-        <div className={classes.promptTitleLine}>
-            <h3 className={classes.promptTitle}>Prompt</h3>
-        </div>
-
-        <div className={classes.promptBox}>
-            {renderMarkdown ? <ReactMarkdown>{history}</ReactMarkdown> : <pre>{history}</pre>}
-        </div>
-
-        <div className={classes.promptTitleLine}>
-            <h3 className={classes.promptTitle}>Response</h3>
-        </div>
-
-        <div className={classes.promptBox}>
-            {renderMarkdown ? <ReactMarkdown>{response}</ReactMarkdown> : <pre>{response}</pre>}
+    return (
+        <div className={classes.section}>
+            <div className={classes.sectionHeader} onClick={() => setIsExpanded(!isExpanded)}>
+                {isExpanded ? <ChevronDown12Regular /> : <ChevronRight12Regular />}
+                <h3 className={classes.sectionHeaderText}>Conversation</h3>
+            </div>
+
+            {isExpanded && (
+                <div className={classes.conversationBox}>
+                    <div className={classes.sectionContent}>
+                        <div className={classes.sectionSubHeader}>Prompt</div>
+                        {renderMarkdown ? <ReactMarkdown>{history}</ReactMarkdown> : <pre>{history}</pre>}
+                    </div>
+                    
+                    <div>
+                        <div className={classes.sectionSubHeader}>Response</div>
+                        {renderMarkdown ? <ReactMarkdown>{response}</ReactMarkdown> : <pre>{response}</pre>}
+                    </div>
+                </div>
+            )}
         </div>
-    </div>);
+    );
 };

From c654def65883cf533a431b35f4965e2c4df9513d Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Mon, 10 Mar 2025 20:37:35 -0700
Subject: [PATCH 05/18] Collapse single child nodes into parent to avoid too
 much spellunking down the tree

---
 .../TypeScript/components/ScenarioTree.tsx      |  1 +
 .../TypeScript/components/Summary.ts            | 17 ++++++++++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
index 8aac1c5780b..1aa6aac9de5 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
@@ -11,6 +11,7 @@ import { ErrorCircleRegular } from "@fluentui/react-icons";
 import { ChevronDown12Regular, ChevronRight12Regular } from '@fluentui/react-icons';
 
 const ScenarioLevel = ({ node, parentPath, isOpen, renderMarkdown }: { node: ScoreNode, parentPath: string, isOpen: (path: string) => boolean, renderMarkdown: boolean }) => {
+    node.collapseSingleChildNodes();
     const path = `${parentPath}.${node.name}`;
     if (node.isLeafNode) {
         return <TreeItem itemType="branch" value={path}>
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/Summary.ts b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/Summary.ts
index 8cef12ce4f1..7d72b1a7e63 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/Summary.ts
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/Summary.ts
@@ -104,7 +104,22 @@ export class ScoreNode {
         }
     }
 
-    
+    collapseSingleChildNodes() {
+        if (this.isLeafNode) {
+            return;
+        }
+
+        while (this.childNodes.length === 1) {
+            const onlyChild = this.childNodes[0];
+            this.name += ` > ${onlyChild.name}`;
+            this.children = onlyChild.children;
+            this.scenario = onlyChild.scenario;
+        }
+
+        for (const child of this.childNodes) {
+            child.collapseSingleChildNodes();
+        }
+    }
 };
 
 export const DefaultRootNodeName = "All Evaluations";

From 3df9090c9f0a4c24822103711177c93b9b315180 Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Mon, 10 Mar 2025 20:57:57 -0700
Subject: [PATCH 06/18] Improve spacing

---
 .../TypeScript/components/App.tsx                         | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/App.tsx b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/App.tsx
index 237a6ba4794..b745dc2fe2d 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/App.tsx
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/App.tsx
@@ -17,7 +17,9 @@ type AppProperties = {
 const useStyles = makeStyles({
   header: { display: 'flex', justifyContent: 'space-between', alignItems: 'center', position: 'sticky', top: 0, backgroundColor: 'white', zIndex: 1 },
   footerText: { fontSize: '0.8rem', marginTop: '2rem' },
-  closeButton: { position: 'absolute', top: '1rem', right: '1rem', cursor: 'pointer', fontSize: '1.5rem' }
+  closeButton: { position: 'absolute', top: '1.5rem', right: '1rem', cursor: 'pointer', fontSize: '2rem' },
+  switchLabel: { fontSize: '1rem', paddingTop: '1rem' },
+  drawerBody: { paddingTop: '1rem' },
 });
 
 function App({ dataset, tree }: AppProperties) {
@@ -45,8 +47,8 @@ function App({ dataset, tree }: AppProperties) {
           <DrawerHeaderTitle>Settings</DrawerHeaderTitle>
           <span className={classes.closeButton} onClick={closeSettings}>&times;</span>
         </DrawerHeader>
-        <DrawerBody>
-          <Switch checked={renderMarkdown} onChange={toggleRenderMarkdown} label='Render markdown for conversations' />
+        <DrawerBody className={classes.drawerBody}>
+          <Switch checked={renderMarkdown} onChange={toggleRenderMarkdown} label={<span className={classes.switchLabel}>Render markdown for conversations</span>} />
         </DrawerBody>
       </Drawer>
     </>

From 67076a82932b27ec9e084f9bbceb2d7f8b1ca6db Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Mon, 10 Mar 2025 23:33:58 -0700
Subject: [PATCH 07/18] Show metric details in a collapsible section on
 selection instead displaying only on hover

---
 .../TypeScript/components/MetricCard.tsx      | 112 +++++++++--------
 .../TypeScript/components/ScenarioTree.tsx    | 116 +++++++++++++++++-
 2 files changed, 166 insertions(+), 62 deletions(-)

diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/MetricCard.tsx b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/MetricCard.tsx
index 504674bcab4..462bb61da8b 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/MetricCard.tsx
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/MetricCard.tsx
@@ -1,19 +1,28 @@
 ﻿// Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-import { makeStyles, mergeClasses, tokens, Tooltip } from "@fluentui/react-components";
-import { DismissCircle16Regular, ErrorCircleRegular, Info16Regular, InfoRegular, Warning16Regular, WarningRegular } from "@fluentui/react-icons";
+import { makeStyles, mergeClasses, tokens } from "@fluentui/react-components";
+import { DismissCircle16Regular, Info16Regular, Warning16Regular } from "@fluentui/react-icons";
 
 const useCardListStyles = makeStyles({
     metricCardList: { display: 'flex', gap: '1rem', flexWrap: 'wrap' },
 });
 
-export const MetricCardList = ({ scenario }: { scenario: ScenarioRunResult }) => {
+export const MetricCardList = ({ scenario, onMetricSelect, selectedMetric }: { 
+  scenario: ScenarioRunResult, 
+  onMetricSelect: (metric: MetricType | null) => void,
+  selectedMetric: MetricType | null
+}) => {
     const classes = useCardListStyles();
     return (
         <div className={classes.metricCardList}>
             {Object.values(scenario.evaluationResult.metrics).map((metric, index) => (
-                <MetricCard metric={metric} key={index} />
+                <MetricCard 
+                  metric={metric} 
+                  key={index} 
+                  onClick={() => onMetricSelect(selectedMetric === metric ? null : metric)}
+                  isSelected={selectedMetric === metric}
+                />
             ))}
         </div>
     );
@@ -23,7 +32,21 @@ const useCardStyles = makeStyles({
     card: {
         display: 'flex', flexDirection: 'column', alignItems: 'center', gap: '0.5rem',
         padding: '.75rem', border: '1px solid #e0e0e0', borderRadius: '4px',
-        minWidth: '8rem'
+        minWidth: '8rem',
+        cursor: 'pointer',
+        transition: 'box-shadow 0.2s ease-in-out, outline 0.2s ease-in-out',
+        position: 'relative',
+        '&:hover': {
+            opacity: 0.9,
+            boxShadow: '0 2px 4px rgba(0, 0, 0, 0.1)'
+        }
+    },
+    selectedCard: {
+        zIndex: 1,
+        boxShadow: '0 4px 8px rgba(0, 0, 0, 0.15)',
+        outline: `2px solid ${tokens.colorNeutralForeground3}`,
+        outlineOffset: '0px',
+        border: 'none'
     },
     metricText: { fontSize: '1rem', fontWeight: 'normal' },
     valueText: { fontSize: '1.5rem', fontWeight: 'bold' },
@@ -81,8 +104,15 @@ const useCardColors = (interpretation?: EvaluationMetricInterpretation) => {
 
 type MetricType = StringMetric | NumericMetric | BooleanMetric | MetricWithNoValue;
 
-export const MetricCard = ({ metric }: { metric: MetricType }) => {
-
+export const MetricCard = ({ 
+    metric, 
+    onClick,
+    isSelected
+}: { 
+    metric: MetricType, 
+    onClick: () => void,
+    isSelected: boolean
+}) => {
     let renderValue: (metric: MetricType) => React.ReactNode;
     switch (metric.$type) {
         case "string":
@@ -106,27 +136,28 @@ export const MetricCard = ({ metric }: { metric: MetricType }) => {
 
     const classes = useCardStyles();
     const { fg, bg } = useCardColors(metric.interpretation);
-    const hasReason = metric.interpretation?.reason != null;
+    
+    const hasReasons = metric.reason != null || metric.interpretation?.reason != null;
     const hasInformationalMessages = metric.diagnostics.some((d: EvaluationDiagnostic) => d.severity == "informational");
     const hasWarningMessages = metric.diagnostics.some((d: EvaluationDiagnostic) => d.severity == "warning");
     const hasErrorMessages = metric.diagnostics.some((d: EvaluationDiagnostic) => d.severity == "error");
-    const supportsHover = hasReason || hasInformationalMessages || hasWarningMessages || hasErrorMessages;
-    const card =
-        (<div className={mergeClasses(bg, classes.card)}>
-            <div className={classes.metricText}>{metric.name} { (hasErrorMessages && <DismissCircle16Regular />) || 
-                (hasWarningMessages && <Warning16Regular />) || 
-                ((hasInformationalMessages || hasReason) && <Info16Regular />)}</div>
+    
+    const cardClass = mergeClasses(
+        bg, 
+        classes.card, 
+        isSelected ? classes.selectedCard : undefined
+    );
+    
+    return (
+        <div className={cardClass} onClick={onClick}>
+            <div className={classes.metricText}>{metric.name} {
+                    (hasErrorMessages && <DismissCircle16Regular />) || 
+                    (hasWarningMessages && <Warning16Regular />) || 
+                    ((hasInformationalMessages || hasReasons) && <Info16Regular />)}
+            </div>
             <div className={mergeClasses(fg, classes.valueText)}>{renderValue(metric)}</div>
-        </div>);
-    if (supportsHover) {
-        return (<Tooltip
-            content={{ children: <MetricDetails metric={metric} /> }}
-            relationship="description">
-            {card}
-        </Tooltip>);
-    } else {
-        return card;
-    }
+        </div>
+    );
 };
 
 const useDetailStyles = makeStyles({
@@ -134,36 +165,3 @@ const useDetailStyles = makeStyles({
     diagWarn: { fontStyle: tokens.fontFamilyMonospace, color: tokens.colorStatusWarningForeground2 },
     diagInfo: { fontStyle: tokens.fontFamilyMonospace },
 });
-
-export const MetricDetails = ({ metric }: { metric: MetricWithNoValue | NumericMetric | BooleanMetric | StringMetric }) => {
-    const classes = useDetailStyles();
-    const reason = metric.interpretation?.reason;
-    const failed = metric.interpretation?.failed ?? false;
-    const informationalMessages = metric.diagnostics.filter((d: EvaluationDiagnostic) => d.severity == "informational").map((d: EvaluationDiagnostic) => d.message);
-    const hasInformationalMessages = informationalMessages.length > 0;
-    const warningMessages = metric.diagnostics.filter((d: EvaluationDiagnostic) => d.severity == "warning").map((d: EvaluationDiagnostic) => d.message);
-    const hasWarningMessages = warningMessages.length > 0;
-    const errorMessages = metric.diagnostics.filter((d: EvaluationDiagnostic) => d.severity == "error").map((d: EvaluationDiagnostic) => d.message);
-    const hasErrorMessages = errorMessages.length > 0;
-    return (
-        <div>
-            {reason && <div>
-                {failed ? 
-                    <p className={classes.diagError}><ErrorCircleRegular /> {reason}</p> :
-                    <p className={classes.diagInfo}><InfoRegular /> {reason}</p>
-                }
-            </div>}
-            {hasErrorMessages && <div>
-                {errorMessages.map((message: string, index: number) =>
-                    <p key={index} className={classes.diagError}><ErrorCircleRegular /> {message}</p>)}
-            </div>}
-            {hasWarningMessages && <div>
-                {warningMessages.map((message: string, index: number) =>
-                    <p key={index} className={classes.diagWarn}><WarningRegular /> {message}</p>)}
-            </div>}
-            {hasInformationalMessages && <div>
-                {informationalMessages.map((message: string, index: number) =>
-                    <p key={index} className={classes.diagInfo}><InfoRegular /> {message}</p>)}
-            </div>}
-        </div>);
-};
\ No newline at end of file
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
index 1aa6aac9de5..a55cc34c386 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
@@ -7,10 +7,15 @@ import { DefaultRootNodeName, ScoreNode, ScoreNodeType, getPromptDetails } from
 import { PassFailBar } from "./PassFailBar";
 import { MetricCardList } from "./MetricCard";
 import ReactMarkdown from "react-markdown";
-import { ErrorCircleRegular } from "@fluentui/react-icons";
+import { DismissCircle16Regular, Info16Regular, Warning16Regular } from "@fluentui/react-icons";
 import { ChevronDown12Regular, ChevronRight12Regular } from '@fluentui/react-icons';
 
-const ScenarioLevel = ({ node, parentPath, isOpen, renderMarkdown }: { node: ScoreNode, parentPath: string, isOpen: (path: string) => boolean, renderMarkdown: boolean }) => {
+const ScenarioLevel = ({ node, parentPath, isOpen, renderMarkdown }: { 
+  node: ScoreNode, 
+  parentPath: string, 
+  isOpen: (path: string) => boolean, 
+  renderMarkdown: boolean,
+}) => {
     node.collapseSingleChildNodes();
     const path = `${parentPath}.${node.name}`;
     if (node.isLeafNode) {
@@ -50,11 +55,12 @@ export const ScenarioGroup = ({ node, renderMarkdown }: { node: ScoreNode, rende
     return (
         <Tree aria-label="Default" appearance="transparent" onOpenChange={handleOpenChange} defaultOpenItems={["." + DefaultRootNodeName]}>
             <ScenarioLevel node={node} parentPath={""} isOpen={isOpen} renderMarkdown={renderMarkdown} />
-        </Tree>);
+        </Tree>);        
 };
 
 export const ScoreDetail = ({ scenario, renderMarkdown }: { scenario: ScenarioRunResult, renderMarkdown: boolean }) => {
     const classes = useStyles();
+    const [selectedMetric, setSelectedMetric] = useState<MetricType | null>(null);
 
     const failureMessages = [];
     for (const e of Object.values(scenario.evaluationResult.metrics)) {
@@ -70,12 +76,103 @@ export const ScoreDetail = ({ scenario, renderMarkdown }: { scenario: ScenarioRu
     const {history, response} = getPromptDetails(scenario.messages, scenario.modelResponse);
 
     return (<div className={classes.iterationArea}>
-        <MetricCardList scenario={scenario} />
+        <MetricCardList
+          scenario={scenario}
+          onMetricSelect={setSelectedMetric}
+          selectedMetric={selectedMetric}
+        />
+        {selectedMetric && <MetricDetailsSection metric={selectedMetric} />}
         {failureMessages && failureMessages.length > 0 && <FailMessage messages={failureMessages} />}
         <PromptDetails history={history} response={response} renderMarkdown={renderMarkdown} />
     </div>);
 };
 
+export const MetricDetailsSection = ({ metric }: { metric: MetricType }) => {
+    const classes = useStyles();
+    const [isExpanded, setIsExpanded] = useState(true);
+    
+    const reason = metric.reason;
+    const hasReason = reason != null;
+    const interpretationReason = metric.interpretation?.reason;
+    const hasInterpretationReason = interpretationReason != null;
+    const diagnostics = metric.diagnostics || [];
+    const hasDiagnostics = diagnostics.length > 0;
+    
+    if (!hasReason && !hasInterpretationReason && !hasDiagnostics) return null;
+
+    return (
+        <div className={classes.section}>
+            <div className={classes.sectionHeader} onClick={() => setIsExpanded(!isExpanded)}>
+                {isExpanded ? <ChevronDown12Regular /> : <ChevronRight12Regular />}
+                <h3 className={classes.sectionHeaderText}>Metric Details: {metric.name}</h3>
+            </div>
+
+            {isExpanded && (
+                <div className={classes.conversationBox}>
+                    {hasReason && (
+                        <div className={classes.sectionContent}>
+                            <div className={classes.sectionSubHeader}>Evaluation Reason</div>
+                            <div>
+                                <span>{reason}</span>
+                            </div>
+                        </div>
+                    )} 
+
+                    {hasInterpretationReason && (
+                        <div className={classes.sectionContent}>
+                            {metric.interpretation?.failed ?
+                                <div className={classes.sectionSubHeader}>Failure Reason</div> :
+                                <div className={classes.sectionSubHeader}>Interpretation Reason</div>
+                            }
+                            <div>
+                                {metric.interpretation?.failed ? 
+                                    <span className={classes.failMessage}><DismissCircle16Regular /> {interpretationReason}</span> : 
+                                    <span>{interpretationReason}</span>
+                                }
+                            </div>
+                        </div>
+                    )} 
+                    
+                    {hasDiagnostics && (
+                        <div>
+                            <div className={classes.sectionSubHeader}>Diagnostics</div>
+                            <DiagnosticsContent diagnostics={diagnostics} />
+                        </div>
+                    )}
+                </div>
+            )}
+        </div>
+    );
+};
+
+const DiagnosticsContent = ({ diagnostics }: { diagnostics: EvaluationDiagnostic[] }) => {
+    const classes = useStyles();
+    
+    const errorDiagnostics = diagnostics.filter(d => d.severity === "error");
+    const warningDiagnostics = diagnostics.filter(d => d.severity === "warning");
+    const infoDiagnostics = diagnostics.filter(d => d.severity === "informational");
+    
+    return (
+        <>
+            {errorDiagnostics.map((diag, index) => (
+                <div key={`error-${index}`} className={classes.failMessage}>
+                    <DismissCircle16Regular /> {diag.message}
+                </div>
+            ))}
+            {warningDiagnostics.map((diag, index) => (
+                <div key={`warning-${index}`} className={classes.warningMessage}>
+                    <Warning16Regular /> {diag.message}
+                </div>
+            ))}
+            {infoDiagnostics.map((diag, index) => (
+                <div key={`info-${index}`} className={classes.infoMessage}>
+                    <Info16Regular /> {diag.message}
+                </div>
+            ))}
+        </>
+    );
+};
+
 const useStyles = makeStyles({
     headerContainer: { display: 'flex', alignItems: 'center', flexDirection: 'row', gap: '0.5rem' },
     promptHint: { fontFamily: tokens.fontFamilyMonospace, opacity: 0.6, fontSize: '0.7rem', paddingLeft: '1rem', whiteSpace: 'nowrap' },
@@ -124,6 +221,15 @@ const useStyles = makeStyles({
     },
     failMessage: {
         color: tokens.colorStatusDangerForeground2,
+        marginBottom: '0.25rem',
+    },
+    warningMessage: {
+        color: tokens.colorStatusWarningForeground2,
+        marginBottom: '0.25rem',
+    },
+    infoMessage: {
+        color: tokens.colorNeutralForeground1,
+        marginBottom: '0.25rem',
     },
     failContainer: {
         padding: '1rem',
@@ -158,7 +264,7 @@ export const FailMessage = ({ messages }: { messages: string[] }) => {
 
             {isExpanded && (
                 <div className={classes.failContainer}>
-                    {messages.map((msg) => <><span className={classes.failMessage} key={msg}><ErrorCircleRegular /> {msg}</span><br /></>)}
+                    {messages.map((msg) => <><span className={classes.failMessage} key={msg}><DismissCircle16Regular /> {msg}</span><br /></>)}
                 </div>
             )}
         </div>

From 12253f9e56e656de2987780a8562004079a4529f Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Tue, 11 Mar 2025 00:31:34 -0700
Subject: [PATCH 08/18] Adjust sizing

---
 .../TypeScript/components/MetricCard.tsx        | 10 ++--------
 .../TypeScript/components/ScenarioTree.tsx      | 17 ++++++++---------
 2 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/MetricCard.tsx b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/MetricCard.tsx
index 462bb61da8b..fdb826f784e 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/MetricCard.tsx
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/MetricCard.tsx
@@ -49,7 +49,7 @@ const useCardStyles = makeStyles({
         border: 'none'
     },
     metricText: { fontSize: '1rem', fontWeight: 'normal' },
-    valueText: { fontSize: '1.5rem', fontWeight: 'bold' },
+    valueText: { fontSize: '1rem', fontWeight: 'bold' },
     scoreFgDefault: { color: tokens.colorNeutralStrokeAccessible },
     scoreFg0: { color: tokens.colorStatusDangerForeground1 },
     scoreFg1: { color: tokens.colorStatusDangerForeground2 },
@@ -102,7 +102,7 @@ const useCardColors = (interpretation?: EvaluationMetricInterpretation) => {
     return { fg, bg };
 };
 
-type MetricType = StringMetric | NumericMetric | BooleanMetric | MetricWithNoValue;
+export type MetricType = StringMetric | NumericMetric | BooleanMetric | MetricWithNoValue;
 
 export const MetricCard = ({ 
     metric, 
@@ -159,9 +159,3 @@ export const MetricCard = ({
         </div>
     );
 };
-
-const useDetailStyles = makeStyles({
-    diagError: { fontStyle: tokens.fontFamilyMonospace, color: tokens.colorStatusDangerForeground2 },
-    diagWarn: { fontStyle: tokens.fontFamilyMonospace, color: tokens.colorStatusWarningForeground2 },
-    diagInfo: { fontStyle: tokens.fontFamilyMonospace },
-});
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
index a55cc34c386..8a34412cea9 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
@@ -5,7 +5,7 @@ import { makeStyles, tokens, Tree, TreeItem, TreeItemLayout, TreeItemValue, Tree
 import { useState, useCallback } from "react";
 import { DefaultRootNodeName, ScoreNode, ScoreNodeType, getPromptDetails } from "./Summary";
 import { PassFailBar } from "./PassFailBar";
-import { MetricCardList } from "./MetricCard";
+import { MetricCardList, type MetricType } from "./MetricCard";
 import ReactMarkdown from "react-markdown";
 import { DismissCircle16Regular, Info16Regular, Warning16Regular } from "@fluentui/react-icons";
 import { ChevronDown12Regular, ChevronRight12Regular } from '@fluentui/react-icons';
@@ -176,9 +176,7 @@ const DiagnosticsContent = ({ diagnostics }: { diagnostics: EvaluationDiagnostic
 const useStyles = makeStyles({
     headerContainer: { display: 'flex', alignItems: 'center', flexDirection: 'row', gap: '0.5rem' },
     promptHint: { fontFamily: tokens.fontFamilyMonospace, opacity: 0.6, fontSize: '0.7rem', paddingLeft: '1rem', whiteSpace: 'nowrap' },
-    score: { 
-        fontSize: tokens.fontSizeBase100,
-    },
+    score: { fontSize: tokens.fontSizeBase200 },
     passFailBadge: {
         display: 'flex',
         flexDirection: 'row',
@@ -190,13 +188,14 @@ const useStyles = makeStyles({
     scenarioLabel: { 
         whiteSpace: 'nowrap',
         fontWeight: '500',
+        fontSize: tokens.fontSizeBase300,
     },
     iterationArea: {
         marginTop: '1rem',
         marginBottom: '1rem',
     },
     section: {
-        marginTop: '2rem',
+        marginTop: '1rem',
     },
     sectionHeader: {
         display: 'flex',
@@ -208,12 +207,12 @@ const useStyles = makeStyles({
     sectionHeaderText: {
         margin: 0,
         marginLeft: '0.5rem',
-        fontSize: '1.25rem',
-        fontWeight: 'bold',
+        fontSize: tokens.fontSizeBase300,
+        fontWeight: '500',
     },
     sectionSubHeader: {
-        fontSize: '0.875rem',
-        fontWeight: 'bold',
+        fontSize: tokens.fontSizeBase300,
+        fontWeight: '500',
         marginBottom: '0.5rem',
     },
     sectionContent: {

From ada0cc47301432e81ddfe8d2e3ec602bed5723bf Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Tue, 11 Mar 2025 13:13:14 -0700
Subject: [PATCH 09/18] Update comment

---
 .../RelevanceTruthAndCompletenessEvaluator.cs                  | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.cs
index b1bf9e797a5..85ff8bae12d 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.cs
@@ -23,7 +23,8 @@ namespace Microsoft.Extensions.AI.Evaluation.Quality;
 /// <remarks>
 /// <see cref="RelevanceTruthAndCompletenessEvaluator"/> returns three <see cref="NumericMetric"/>s that contain scores
 /// for 'Relevance', 'Truth' and 'Completeness' respectively. Each score is a number between 1 and 5, with 1 indicating
-/// a poor score, and 5 indicating an excellent score.
+/// a poor score, and 5 indicating an excellent score. Each returned score is also accompanied by a
+/// <see cref="EvaluationMetric{T}.Reason"/> that provides an explanation for the score.
 /// </remarks>
 public sealed partial class RelevanceTruthAndCompletenessEvaluator : ChatConversationEvaluator
 {

From 8d305c0375b21041ec75d38418b34f10e86cdf9a Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Tue, 11 Mar 2025 17:47:59 -0700
Subject: [PATCH 10/18] Remove Failure Reasons section

---
 .../TypeScript/components/ScenarioTree.tsx    | 33 -------------------
 1 file changed, 33 deletions(-)

diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
index 8a34412cea9..ae50faa03a8 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
@@ -61,18 +61,6 @@ export const ScenarioGroup = ({ node, renderMarkdown }: { node: ScoreNode, rende
 export const ScoreDetail = ({ scenario, renderMarkdown }: { scenario: ScenarioRunResult, renderMarkdown: boolean }) => {
     const classes = useStyles();
     const [selectedMetric, setSelectedMetric] = useState<MetricType | null>(null);
-
-    const failureMessages = [];
-    for (const e of Object.values(scenario.evaluationResult.metrics)) {
-        if (e.interpretation && e.interpretation.failed) {
-            failureMessages.push(e.interpretation.reason || "Metric failed.");
-        }
-        for (const d of e.diagnostics) {
-            if (d.severity === "error") {
-                failureMessages.push(d.message);
-            }
-        }
-    }
     const {history, response} = getPromptDetails(scenario.messages, scenario.modelResponse);
 
     return (<div className={classes.iterationArea}>
@@ -82,7 +70,6 @@ export const ScoreDetail = ({ scenario, renderMarkdown }: { scenario: ScenarioRu
           selectedMetric={selectedMetric}
         />
         {selectedMetric && <MetricDetailsSection metric={selectedMetric} />}
-        {failureMessages && failureMessages.length > 0 && <FailMessage messages={failureMessages} />}
         <PromptDetails history={history} response={response} renderMarkdown={renderMarkdown} />
     </div>);
 };
@@ -250,26 +237,6 @@ const useStyles = makeStyles({
     },
 });
 
-export const FailMessage = ({ messages }: { messages: string[] }) => {
-    const classes = useStyles();
-    const [isExpanded, setIsExpanded] = useState(true);
-
-    return (
-        <div className={classes.section}>
-            <div className={classes.sectionHeader} onClick={() => setIsExpanded(!isExpanded)}>
-                {isExpanded ? <ChevronDown12Regular /> : <ChevronRight12Regular />}
-                <h3 className={classes.sectionHeaderText}>Failure Reasons</h3>
-            </div>
-
-            {isExpanded && (
-                <div className={classes.failContainer}>
-                    {messages.map((msg) => <><span className={classes.failMessage} key={msg}><DismissCircle16Regular /> {msg}</span><br /></>)}
-                </div>
-            )}
-        </div>
-    );
-};
-
 const PassFailBadge = ({ pass, total }: { pass: number, total: number }) => {
     const classes = useStyles();
     return (<div className={classes.passFailBadge}>

From 052cd4680f9c130b91d95cb91e3f41354da98164 Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Tue, 11 Mar 2025 18:02:58 -0700
Subject: [PATCH 11/18] Use / in place of > for level separators

---
 .../TypeScript/components/ScenarioTree.tsx    | 20 ++++++++++++++++++-
 .../TypeScript/components/Summary.ts          |  2 +-
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
index ae50faa03a8..f4208b76687 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
@@ -176,6 +176,15 @@ const useStyles = makeStyles({
         whiteSpace: 'nowrap',
         fontWeight: '500',
         fontSize: tokens.fontSizeBase300,
+        display: 'flex',
+        gap: '0.5rem',
+        alignItems: 'center',
+    },
+    separator: {
+        color: tokens.colorNeutralForeground4,
+        fontSize: tokens.fontSizeBase200,
+        fontWeight: '300',
+        padding: '0 0.125rem',
     },
     iterationArea: {
         marginTop: '1rem',
@@ -262,9 +271,18 @@ const ScoreNodeHeader = ({ item, showPrompt }: { item: ScoreNode, showPrompt?: b
             break;
     }
 
+    const parts = item.name.split(' / ');
+
     return (<div className={classes.headerContainer}>
         <PassFailBar pass={ctPass} total={ctPass + ctFail} width="24px" height="12px"/>
-        <div className={classes.scenarioLabel}>{item.name}</div>
+        <div className={classes.scenarioLabel}>
+            {parts.map((part, index) => (
+                <>
+                    {part}
+                    {index < parts.length - 1 && <span className={classes.separator}>/</span>}
+                </>
+            ))}
+        </div>
         <PassFailBadge pass={ctPass} total={ctPass + ctFail} />
         {showPrompt && item.shortenedPrompt && <div className={classes.promptHint}>{item.shortenedPrompt}</div>}
     </div>);
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/Summary.ts b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/Summary.ts
index 7d72b1a7e63..2ea86a4e8f7 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/Summary.ts
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/Summary.ts
@@ -111,7 +111,7 @@ export class ScoreNode {
 
         while (this.childNodes.length === 1) {
             const onlyChild = this.childNodes[0];
-            this.name += ` > ${onlyChild.name}`;
+            this.name += ` / ${onlyChild.name}`;
             this.children = onlyChild.children;
             this.scenario = onlyChild.scenario;
         }

From 0085309a52727260fbd8b97a11a4a7ab38658d90 Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Wed, 12 Mar 2025 02:33:18 -0700
Subject: [PATCH 12/18] Introduce chat bubbles for conversations

Also fix numerous sizing and layout issues.
---
 .../TypeScript/components/MetricCard.tsx      | 131 +++++++++++++-----
 .../TypeScript/components/ScenarioTree.tsx    | 117 ++++++++++++----
 .../TypeScript/components/Summary.ts          |  58 +++++---
 3 files changed, 231 insertions(+), 75 deletions(-)

diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/MetricCard.tsx b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/MetricCard.tsx
index fdb826f784e..3ec8defb84a 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/MetricCard.tsx
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/MetricCard.tsx
@@ -1,7 +1,7 @@
 ﻿// Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-import { makeStyles, mergeClasses, tokens } from "@fluentui/react-components";
+import { makeStyles, mergeClasses, tokens, Tooltip } from "@fluentui/react-components";
 import { DismissCircle16Regular, Info16Regular, Warning16Regular } from "@fluentui/react-icons";
 
 const useCardListStyles = makeStyles({
@@ -30,9 +30,14 @@ export const MetricCardList = ({ scenario, onMetricSelect, selectedMetric }: {
 
 const useCardStyles = makeStyles({
     card: {
-        display: 'flex', flexDirection: 'column', alignItems: 'center', gap: '0.5rem',
-        padding: '.75rem', border: '1px solid #e0e0e0', borderRadius: '4px',
-        minWidth: '8rem',
+        display: 'flex', 
+        flexDirection: 'column', 
+        alignItems: 'center', 
+        gap: '0.5rem',
+        padding: '.75rem', 
+        border: '1px solid #e0e0e0', 
+        borderRadius: '4px',
+        width: '10rem',
         cursor: 'pointer',
         transition: 'box-shadow 0.2s ease-in-out, outline 0.2s ease-in-out',
         position: 'relative',
@@ -48,8 +53,41 @@ const useCardStyles = makeStyles({
         outlineOffset: '0px',
         border: 'none'
     },
-    metricText: { fontSize: '1rem', fontWeight: 'normal' },
-    valueText: { fontSize: '1rem', fontWeight: 'bold' },
+    metricNameText: { 
+        fontSize: '1rem', 
+        fontWeight: 'normal',
+        width: '80%',
+        textAlign: 'center',
+        overflow: 'hidden',
+        textOverflow: 'ellipsis',
+        lineHeight: '1.2',
+        maxHeight: '2.4em',
+        display: '-webkit-box',
+        WebkitLineClamp: 2,
+        WebkitBoxOrient: 'vertical',
+        marginTop: '-0.5rem',
+    },
+    iconPlaceholder: {
+        height: '4px',
+        width: '100%',
+        position: 'relative',
+        marginBottom: '0',
+    },
+    metricIcon: {
+        position: 'absolute',
+        top: '-0.25rem',
+        right: '-0.25rem',
+    },
+    metricValueText: { 
+        fontSize: '1rem', 
+        fontWeight: 'bold',
+        width: '80%',
+        textAlign: 'center',
+        overflow: 'hidden',
+        textOverflow: 'ellipsis',
+        whiteSpace: 'nowrap',
+        maxHeight: '1.2em',
+    },
     scoreFgDefault: { color: tokens.colorNeutralStrokeAccessible },
     scoreFg0: { color: tokens.colorStatusDangerForeground1 },
     scoreFg1: { color: tokens.colorStatusDangerForeground2 },
@@ -113,27 +151,24 @@ export const MetricCard = ({
     onClick: () => void,
     isSelected: boolean
 }) => {
-    let renderValue: (metric: MetricType) => React.ReactNode;
-    switch (metric.$type) {
-        case "string":
-            renderValue = (metric: MetricType) => <>{metric?.value ?? "??"}</>;
-            break;
-        case "boolean":
-            renderValue = (metric: MetricType) => <>{
-                !metric || metric.value === undefined || metric.value === null ? 
-                '??' :
-                metric.value ? 'Pass' : 'Fail'}</>;
-            break;
-        case "numeric":
-            renderValue = (metric: MetricType) => <>{metric?.value ?? "??"}</>;
-            break;
-        case "none":
-            renderValue = () => <>None</>;
-            break;
-        default:
-            throw new Error(`Unknown metric type: ${metric["$type"]}`);
-    }
+    const getValue = (metric: MetricType): string => {
+        switch (metric.$type) {
+            case "string":
+                return metric?.value ?? "??";
+            case "boolean":
+                return !metric || metric.value === undefined || metric.value === null ? 
+                    '??' :
+                    metric.value ? 'Pass' : 'Fail';
+            case "numeric":
+                return metric?.value?.toString() ?? "??";
+            case "none":
+                return "None";
+            default:
+                throw new Error(`Unknown metric type: ${metric["$type"]}`);
+        }
+    };
 
+    const metricValue = getValue(metric);
     const classes = useCardStyles();
     const { fg, bg } = useCardColors(metric.interpretation);
     
@@ -148,14 +183,44 @@ export const MetricCard = ({
         isSelected ? classes.selectedCard : undefined
     );
     
+    let statusIcon = null;
+    let statusTooltip = '';
+    
+    if (hasErrorMessages) {
+        statusIcon = <DismissCircle16Regular className={classes.metricIcon} />;
+        statusTooltip = 'This metric has errors. Click the card to view more details.';
+    } else if (hasWarningMessages) {
+        statusIcon = <Warning16Regular className={classes.metricIcon} />;
+        statusTooltip = 'This metric has warnings. Click the card to view more details.';
+    } else if (hasInformationalMessages || hasReasons) {
+        statusIcon = <Info16Regular className={classes.metricIcon} />;
+        statusTooltip = 'This metric has additional information. Click the card to view more details.';
+    }
+    
+    const tooltipContent = (
+        <div>
+            <div>Name: {metric.name}</div>
+            <div>Value: {metricValue}</div>
+        </div>
+    );
+    
     return (
-        <div className={cardClass} onClick={onClick}>
-            <div className={classes.metricText}>{metric.name} {
-                    (hasErrorMessages && <DismissCircle16Regular />) || 
-                    (hasWarningMessages && <Warning16Regular />) || 
-                    ((hasInformationalMessages || hasReasons) && <Info16Regular />)}
+        <Tooltip content={tooltipContent} relationship="label">
+            <div className={cardClass} onClick={onClick}>
+                <div className={classes.iconPlaceholder}>
+                    {statusIcon && (
+                        <Tooltip content={statusTooltip} relationship="description">
+                            <span>{statusIcon}</span>
+                        </Tooltip>
+                    )}
+                </div>
+                <div className={classes.metricNameText}>
+                    {metric.name}
+                </div>
+                <div className={mergeClasses(fg, classes.metricValueText)}>
+                    {metricValue}
+                </div>
             </div>
-            <div className={mergeClasses(fg, classes.valueText)}>{renderValue(metric)}</div>
-        </div>
+        </Tooltip>
     );
 };
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
index f4208b76687..db1a3c518ba 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
@@ -1,9 +1,9 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-import { makeStyles, tokens, Tree, TreeItem, TreeItemLayout, TreeItemValue, TreeOpenChangeData, TreeOpenChangeEvent } from "@fluentui/react-components";
-import { useState, useCallback } from "react";
-import { DefaultRootNodeName, ScoreNode, ScoreNodeType, getPromptDetails } from "./Summary";
+import React, { useState, useCallback } from "react";
+import { makeStyles, tokens, Tree, TreeItem, TreeItemLayout, TreeItemValue, TreeOpenChangeData, TreeOpenChangeEvent, mergeClasses } from "@fluentui/react-components";
+import { DefaultRootNodeName, ScoreNode, ScoreNodeType, getPromptDetails, ChatMessageDisplay } from "./Summary";
 import { PassFailBar } from "./PassFailBar";
 import { MetricCardList, type MetricType } from "./MetricCard";
 import ReactMarkdown from "react-markdown";
@@ -61,7 +61,7 @@ export const ScenarioGroup = ({ node, renderMarkdown }: { node: ScoreNode, rende
 export const ScoreDetail = ({ scenario, renderMarkdown }: { scenario: ScenarioRunResult, renderMarkdown: boolean }) => {
     const classes = useStyles();
     const [selectedMetric, setSelectedMetric] = useState<MetricType | null>(null);
-    const {history, response} = getPromptDetails(scenario.messages, scenario.modelResponse);
+    const { messages } = getPromptDetails(scenario.messages, scenario.modelResponse);
 
     return (<div className={classes.iterationArea}>
         <MetricCardList
@@ -70,7 +70,7 @@ export const ScoreDetail = ({ scenario, renderMarkdown }: { scenario: ScenarioRu
           selectedMetric={selectedMetric}
         />
         {selectedMetric && <MetricDetailsSection metric={selectedMetric} />}
-        <PromptDetails history={history} response={response} renderMarkdown={renderMarkdown} />
+        <PromptDetails messages={messages} renderMarkdown={renderMarkdown} />
     </div>);
 };
 
@@ -191,14 +191,14 @@ const useStyles = makeStyles({
         marginBottom: '1rem',
     },
     section: {
-        marginTop: '1rem',
+        marginTop: '0.75rem',
     },
     sectionHeader: {
         display: 'flex',
         alignItems: 'center',
         cursor: 'pointer',
         userSelect: 'none',
-        marginBottom: '1rem',
+        marginBottom: '0.5rem',
     },
     sectionHeaderText: {
         margin: 0,
@@ -209,10 +209,10 @@ const useStyles = makeStyles({
     sectionSubHeader: {
         fontSize: tokens.fontSizeBase300,
         fontWeight: '500',
-        marginBottom: '0.5rem',
+        marginBottom: '0.25rem',
     },
     sectionContent: {
-        marginBottom: '1.5rem',
+        marginBottom: '0.75rem',
     },
     failMessage: {
         color: tokens.colorStatusDangerForeground2,
@@ -233,9 +233,7 @@ const useStyles = makeStyles({
         cursor: 'text',
     },
     conversationBox: {
-        border: '1px solid #e0e0e0',
-        borderRadius: '4px',
-        padding: '1rem',
+        padding: '0.75rem',
         maxHeight: '20rem',
         overflow: 'auto',
         cursor: 'text',
@@ -244,6 +242,51 @@ const useStyles = makeStyles({
             wordWrap: 'break-word',
         },
     },
+    chatContainer: {
+        display: 'flex',
+        flexDirection: 'column',
+        gap: '0.75rem',
+        padding: '0.75rem 0',
+        position: 'relative',
+    },
+    messageRow: {
+        display: 'flex',
+        flexDirection: 'column',
+        width: '900px',
+        position: 'relative',
+    },
+    userMessageRow: {
+        marginLeft: '0',
+    },
+    assistantMessageRow: {
+        marginLeft: '100px',
+    },
+    messageParticipantName: {
+        fontSize: tokens.fontSizeBase200,
+        marginBottom: '0.25rem',
+        color: tokens.colorNeutralForeground3,
+        paddingLeft: '0.5rem',
+    },
+    messageBubble: {
+        padding: '0.75rem 1rem',
+        borderRadius: '12px',
+        overflow: 'hidden',
+        wordBreak: 'break-word',
+        width: '100%',
+    },
+    userBubble: {
+        backgroundColor: tokens.colorNeutralBackground3,
+        borderTopLeftRadius: '4px',
+    },
+    systemBubble: {
+        backgroundColor: tokens.colorBrandBackground,
+        color: tokens.colorNeutralForegroundInverted,
+        borderTopLeftRadius: '4px',
+    },
+    assistantBubble: {
+        backgroundColor: tokens.colorNeutralBackground4,
+        borderTopRightRadius: '4px',
+    },
 });
 
 const PassFailBadge = ({ pass, total }: { pass: number, total: number }) => {
@@ -277,10 +320,10 @@ const ScoreNodeHeader = ({ item, showPrompt }: { item: ScoreNode, showPrompt?: b
         <PassFailBar pass={ctPass} total={ctPass + ctFail} width="24px" height="12px"/>
         <div className={classes.scenarioLabel}>
             {parts.map((part, index) => (
-                <>
+                <React.Fragment key={`${part}-${index}`}>
                     {part}
                     {index < parts.length - 1 && <span className={classes.separator}>/</span>}
-                </>
+                </React.Fragment>
             ))}
         </div>
         <PassFailBadge pass={ctPass} total={ctPass + ctFail} />
@@ -288,10 +331,15 @@ const ScoreNodeHeader = ({ item, showPrompt }: { item: ScoreNode, showPrompt?: b
     </div>);
 };
 
-export const PromptDetails = ({ history, response, renderMarkdown }: { history: string, response: string, renderMarkdown: boolean }) => {
+export const PromptDetails = ({ messages, renderMarkdown }: { 
+    messages: ChatMessageDisplay[], 
+    renderMarkdown: boolean 
+}) => {
     const classes = useStyles();
     const [isExpanded, setIsExpanded] = useState(true);
 
+    const isUserSide = (role: string) => role.toLowerCase() === 'user' || role.toLowerCase() === 'system';
+
     return (
         <div className={classes.section}>
             <div className={classes.sectionHeader} onClick={() => setIsExpanded(!isExpanded)}>
@@ -300,16 +348,35 @@ export const PromptDetails = ({ history, response, renderMarkdown }: { history:
             </div>
 
             {isExpanded && (
-                <div className={classes.conversationBox}>
-                    <div className={classes.sectionContent}>
-                        <div className={classes.sectionSubHeader}>Prompt</div>
-                        {renderMarkdown ? <ReactMarkdown>{history}</ReactMarkdown> : <pre>{history}</pre>}
-                    </div>
-                    
-                    <div>
-                        <div className={classes.sectionSubHeader}>Response</div>
-                        {renderMarkdown ? <ReactMarkdown>{response}</ReactMarkdown> : <pre>{response}</pre>}
-                    </div>
+                <div className={classes.chatContainer}>
+                    {messages.map((message, index) => {
+                        const isFromUserSide = isUserSide(message.role);
+                        const messageRowClass = mergeClasses(
+                            classes.messageRow,
+                            isFromUserSide ? classes.userMessageRow : classes.assistantMessageRow
+                        );
+                        
+                        let messageBubble;
+                        if (message.role.toLowerCase() === 'system') {
+                            messageBubble = mergeClasses(classes.messageBubble, classes.systemBubble);
+                        } else if (isFromUserSide) {
+                            messageBubble = mergeClasses(classes.messageBubble, classes.userBubble);
+                        } else {
+                            messageBubble = mergeClasses(classes.messageBubble, classes.assistantBubble);
+                        }
+
+                        return (
+                            <div key={index} className={messageRowClass}>
+                                <div className={classes.messageParticipantName}>{message.participantName}</div>
+                                <div className={messageBubble}>
+                                    {renderMarkdown ? 
+                                        <ReactMarkdown>{message.content}</ReactMarkdown> : 
+                                        <pre style={{ whiteSpace: 'pre-wrap' }}>{message.content}</pre>
+                                    }
+                                </div>
+                            </div>
+                        );
+                    })}
                 </div>
             )}
         </div>
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/Summary.ts b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/Summary.ts
index 2ea86a4e8f7..f49db7a2a60 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/Summary.ts
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/Summary.ts
@@ -65,7 +65,6 @@ export class ScoreNode {
     }
 
     aggregate() {
-        // Reset node to defaults before recalculating
         this.failed = false;
         this.numPassingIterations = 0;
         this.numFailingIterations = 0;
@@ -84,7 +83,15 @@ export class ScoreNode {
             this.numPassingIterations = this.failed ? 0 : 1;
             this.numFailingIterations = this.failed ? 1 : 0;
             const lastMessage = this.scenario?.messages[this.scenario?.messages.length - 1];
-            const {history} = getPromptDetails(lastMessage ? [lastMessage] : [], this.scenario?.modelResponse);
+            
+            const { messages } = getPromptDetails(lastMessage ? [lastMessage] : [], this.scenario?.modelResponse);
+            let history = "";
+            if (messages.length === 1) {
+                history = messages[0].content;
+            } else if (messages.length > 1) {
+                history = messages.map(m => `[${m.participantName}] ${m.content}`).join("\n\n");
+            }
+            
             this.shortenedPrompt = shortenPrompt(history);
         } else {
             for (const child of this.childNodes) {
@@ -160,25 +167,42 @@ const isTextContent = (content: AIContent): content is TextContent => {
     return (content as TextContent).text !== undefined;
 };
 
-export const getPromptDetails = (messages: ChatMessage[], modelResponse?: ChatResponse): {history:string, response: string}=> {
-    let history: string = "";
-    if (messages.length === 1) {
-        history = messages[0].contents.map(c => (c as TextContent).text).join("\n");
-    } else if (messages.length > 1) {
-        const historyItems: string[] = [];
-        for (const m of messages) {
+export type ChatMessageDisplay = {
+    role: string;
+    participantName: string;
+    content: string;
+};
+
+export const getPromptDetails = (messages: ChatMessage[], modelResponse?: ChatResponse): { messages: ChatMessageDisplay[] } => {
+    const chatMessages: ChatMessageDisplay[] = [];
+    
+    for (const m of messages) {
+        for (const c of m.contents) {
+            if (isTextContent(c)) {
+                const participantName = m.authorName ? `${m.authorName} (${m.role})` : m.role;
+                chatMessages.push({
+                    role: m.role,
+                    participantName: participantName,
+                    content: c.text
+                });
+            }
+        }
+    }
+
+    if (modelResponse?.messages) {
+        for (const m of modelResponse.messages) {
             for (const c of m.contents) {
                 if (isTextContent(c)) {
-                    const historyItem = m.authorName
-                        ? `[${m.authorName} (${m.role})] ${c.text}` : `[${m.role}] ${c.text}`;
-                    historyItems.push(historyItem);
+                    const participantName = m.authorName ? `${m.authorName} (${m.role})` : m.role || 'Assistant';
+                    chatMessages.push({
+                        role: m.role,
+                        participantName: participantName,
+                        content: c.text
+                    });
                 }
             }
         }
-        history = historyItems.join("\n\n");
     }
 
-    const response: string = modelResponse?.messages.map(m => m.contents.map(c => (c as TextContent).text).join("\n") ?? "").join("\n") ?? "";
-
-    return { history, response };
-};
\ No newline at end of file
+    return { messages: chatMessages };
+};

From 4554caf43da3644bdf83fa862906087274bda5d0 Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Wed, 12 Mar 2025 03:04:30 -0700
Subject: [PATCH 13/18] Add some more tests for reason

---
 .../EndToEndTests.cs                          | 12 ++--
 .../ResultsTests.cs                           | 64 ++++++++++++-------
 2 files changed, 46 insertions(+), 30 deletions(-)

diff --git a/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/EndToEndTests.cs b/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/EndToEndTests.cs
index 65801f0342f..8307dc38591 100644
--- a/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/EndToEndTests.cs
+++ b/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/EndToEndTests.cs
@@ -81,9 +81,9 @@ await _reportingConfiguration.CreateScenarioRunAsync(
             NumericMetric truth = result.Get<NumericMetric>(RelevanceTruthAndCompletenessEvaluator.TruthMetricName);
             NumericMetric completeness = result.Get<NumericMetric>(RelevanceTruthAndCompletenessEvaluator.CompletenessMetricName);
 
-            Assert.True(relevance.Value >= 4, string.Format("Relevance - Reasoning: {0}", relevance.Diagnostics.Single().Message));
-            Assert.True(truth.Value >= 4, string.Format("Truth - Reasoning: {0}", truth.Diagnostics.Single().Message));
-            Assert.True(completeness.Value >= 4, string.Format("Completeness - Reasoning: {0}", completeness.Diagnostics.Single().Message));
+            Assert.True(relevance.Value >= 4, string.Format("Relevance - Reasoning: {0}", relevance.Reason));
+            Assert.True(truth.Value >= 4, string.Format("Truth - Reasoning: {0}", truth.Reason));
+            Assert.True(completeness.Value >= 4, string.Format("Completeness - Reasoning: {0}", completeness.Reason));
 
             NumericMetric coherence = result.Get<NumericMetric>(CoherenceEvaluator.CoherenceMetricName);
             Assert.True(coherence.Value >= 4);
@@ -132,9 +132,9 @@ await _reportingConfiguration.CreateScenarioRunAsync(
             NumericMetric truth = result.Get<NumericMetric>(RelevanceTruthAndCompletenessEvaluator.TruthMetricName);
             NumericMetric completeness = result.Get<NumericMetric>(RelevanceTruthAndCompletenessEvaluator.CompletenessMetricName);
 
-            Assert.True(relevance.Value >= 4, string.Format("Relevance - Reasoning: {0}", relevance.Diagnostics.Single().Message));
-            Assert.True(truth.Value >= 4, string.Format("Truth - Reasoning: {0}", truth.Diagnostics.Single().Message));
-            Assert.True(completeness.Value >= 4, string.Format("Completeness - Reasoning: {0}", completeness.Diagnostics.Single().Message));
+            Assert.True(relevance.Value >= 4, string.Format("Relevance - Reasoning: {0}", relevance.Reason));
+            Assert.True(truth.Value >= 4, string.Format("Truth - Reasoning: {0}", truth.Reason));
+            Assert.True(completeness.Value >= 4, string.Format("Completeness - Reasoning: {0}", completeness.Reason));
 
             NumericMetric coherence = result.Get<NumericMetric>(CoherenceEvaluator.CoherenceMetricName);
             Assert.True(coherence.Value >= 4);
diff --git a/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/ResultsTests.cs b/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/ResultsTests.cs
index 338532e5a3d..e7ed381ad93 100644
--- a/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/ResultsTests.cs
+++ b/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/ResultsTests.cs
@@ -151,7 +151,8 @@ public async Task ResultWithBooleanMetric()
         var metricA = new BooleanMetric("Metric with value false", false);
         var metricB = new BooleanMetric("Metric with value true", true);
         var metricC = new BooleanMetric("Metric without value");
-        evaluator.TestMetrics = [metricA, metricB, metricC];
+        var metricD = new BooleanMetric("Metric with reason", false, reason: "The reason");
+        evaluator.TestMetrics = [metricA, metricB, metricC, metricD];
 
         await using ScenarioRun scenarioRun =
             await reportingConfiguration.CreateScenarioRunAsync(
@@ -163,6 +164,7 @@ await reportingConfiguration.CreateScenarioRunAsync(
         Assert.Null(metricA.Interpretation);
         Assert.Null(metricB.Interpretation);
         Assert.Null(metricC.Interpretation);
+        Assert.Null(metricD.Interpretation);
 
         Assert.False(result.ContainsDiagnostics());
     }
@@ -176,7 +178,8 @@ public async Task ResultWithBooleanMetricAndInterpretation()
         var metricA = new BooleanMetric("Metric with value false", false);
         var metricB = new BooleanMetric("Metric with value true", true);
         var metricC = new BooleanMetric("Metric without value");
-        evaluator.TestMetrics = [metricA, metricB, metricC];
+        var metricD = new BooleanMetric("Metric with reason", false, reason: "The reason");
+        evaluator.TestMetrics = [metricA, metricB, metricC, metricD];
 
         await using ScenarioRun scenarioRun =
             await reportingConfiguration.CreateScenarioRunAsync(
@@ -221,9 +224,9 @@ public async Task ResultWithStringMetric()
         var metricF = new StringMetric("Measurement System: Nautical", "Nautical");
         var metricG = new StringMetric("Measurement System: Astronomical", "Astronomical");
         var metricH = new StringMetric("Measurement System: Multiple", "Multiple");
-        var metricI = new StringMetric("Measurement System: Blah", "Blah");
-        var metricJ = new StringMetric("Measurement System: Empty", "");
-        var metricK = new StringMetric("Measurement System: Null");
+        var metricI = new StringMetric("Measurement System: Blah", "Blah", reason: "Value was unexpected");
+        var metricJ = new StringMetric("Measurement System: Empty", "", reason: "Value was empty");
+        var metricK = new StringMetric("Measurement System: Null", reason: "Value was null");
 
         evaluator.TestMetrics =
             [metricA, metricB, metricC, metricD, metricE, metricF, metricG, metricH, metricI, metricJ, metricK];
@@ -276,9 +279,9 @@ public async Task ResultWithStringMetricAndInterpretation()
         var metricF = new StringMetric("Measurement System: Nautical", "Nautical");
         var metricG = new StringMetric("Measurement System: Astronomical", "Astronomical");
         var metricH = new StringMetric("Measurement System: Multiple", "Multiple");
-        var metricI = new StringMetric("Measurement System: Blah", "Blah");
-        var metricJ = new StringMetric("Measurement System: Empty", "");
-        var metricK = new StringMetric("Measurement System: Null");
+        var metricI = new StringMetric("Measurement System: Blah", "Blah", reason: "Value was unexpected");
+        var metricJ = new StringMetric("Measurement System: Empty", "", reason: "Value was empty");
+        var metricK = new StringMetric("Measurement System: Null", reason: "Value was null");
 
         evaluator.TestMetrics =
             [metricA, metricB, metricC, metricD, metricE, metricF, metricG, metricH, metricI, metricJ, metricK];
@@ -322,14 +325,14 @@ public async Task ResultWithNumericMetrics()
         var evaluator = new TestEvaluator();
         ReportingConfiguration reportingConfiguration = CreateReportingConfiguration(evaluator);
 
-        var metricA = new NumericMetric("Metric with value 0", 0);
-        var metricB = new NumericMetric("Metric with value 1", 1);
-        var metricC = new NumericMetric("Metric with value 2", 2);
-        var metricD = new NumericMetric("Metric with value 3", 3);
-        var metricE = new NumericMetric("Metric with value 4", 4);
-        var metricF = new NumericMetric("Metric with value 5", 5);
-        var metricG = new NumericMetric("Metric with value 6", 6);
-        var metricH = new NumericMetric("Metric with no value");
+        var metricA = new NumericMetric("Metric with value 0", 0, reason: "Because of reason A");
+        var metricB = new NumericMetric("Metric with value 1", 1, reason: "Because of reason B");
+        var metricC = new NumericMetric("Metric with value 2", 2, reason: "Because of reason C");
+        var metricD = new NumericMetric("Metric with value 3", 3, reason: "Because of reason D");
+        var metricE = new NumericMetric("Metric with value 4", 4, reason: "Because of reason E");
+        var metricF = new NumericMetric("Metric with value 5", 5, reason: "Because of reason F");
+        var metricG = new NumericMetric("Metric with value 6", 6, reason: "Because of reason G");
+        var metricH = new NumericMetric("Metric with no value", reason: "Because of reason H");
         evaluator.TestMetrics = [metricA, metricB, metricC, metricD, metricE, metricF, metricG, metricH];
 
         await using ScenarioRun scenarioRun =
@@ -357,14 +360,14 @@ public async Task ResultWithNumericMetricsAndInterpretation()
         var evaluator = new TestEvaluator();
         ReportingConfiguration reportingConfiguration = CreateReportingConfiguration(evaluator);
 
-        var metricA = new NumericMetric("Metric with value 0", 0);
-        var metricB = new NumericMetric("Metric with value 1", 1);
-        var metricC = new NumericMetric("Metric with value 2", 2);
-        var metricD = new NumericMetric("Metric with value 3", 3);
-        var metricE = new NumericMetric("Metric with value 4", 4);
-        var metricF = new NumericMetric("Metric with value 5", 5);
-        var metricG = new NumericMetric("Metric with value 6", 6);
-        var metricH = new NumericMetric("Metric with no value");
+        var metricA = new NumericMetric("Metric with value 0", 0, reason: "Because of reason A");
+        var metricB = new NumericMetric("Metric with value 1", 1, reason: "Because of reason B");
+        var metricC = new NumericMetric("Metric with value 2", 2, reason: "Because of reason C");
+        var metricD = new NumericMetric("Metric with value 3", 3, reason: "Because of reason D");
+        var metricE = new NumericMetric("Metric with value 4", 4, reason: "Because of reason E");
+        var metricF = new NumericMetric("Metric with value 5", 5, reason: "Because of reason F");
+        var metricG = new NumericMetric("Metric with value 6", 6, reason: "Because of reason G");
+        var metricH = new NumericMetric("Metric with no value", reason: "Because of reason H");
         evaluator.TestMetrics = [metricA, metricB, metricC, metricD, metricE, metricF, metricG, metricH];
 
         await using ScenarioRun scenarioRun =
@@ -405,11 +408,13 @@ public async Task ResultWithDiagnosticsOnUninterpretedMetrics()
         metric1.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 1"));
         metric1.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 1"));
         metric1.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 2"));
+        metric1.Reason = "Reason for metric 1";
 
         var metric2 = new BooleanMetric("Metric with warning and informational diagnostics");
         metric2.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 1"));
         metric2.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 2"));
         metric2.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 2"));
+        metric2.Reason = "Reason for metric 2";
 
         var metric3 = new EvaluationMetric("Metric with error diagnostics only");
         metric3.AddDiagnostic(EvaluationDiagnostic.Error("Error 1"));
@@ -419,9 +424,11 @@ public async Task ResultWithDiagnosticsOnUninterpretedMetrics()
         var metric4 = new StringMetric("Metric with warning diagnostics only");
         metric4.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 1"));
         metric4.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 2"));
+        metric4.Reason = "Reason for metric 4";
 
         var metric5 = new NumericMetric("Metric with informational diagnostics only");
         metric5.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 1"));
+        metric5.Reason = "Reason for metric 5";
 
         evaluator.TestMetrics = [metric1, metric2, metric3, metric4, metric5];
 
@@ -452,11 +459,13 @@ public async Task ResultWithDiagnosticsOnFailingMetrics()
         metric1.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 1"));
         metric1.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 1"));
         metric1.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 2"));
+        metric1.Reason = "Reason for metric 1";
 
         var metric2 = new BooleanMetric("Metric with warning and informational diagnostics");
         metric2.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 1"));
         metric2.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 2"));
         metric2.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 2"));
+        metric2.Reason = "Reason for metric 2";
 
         var metric3 = new EvaluationMetric("Metric with error diagnostics only");
         metric3.AddDiagnostic(EvaluationDiagnostic.Error("Error 1"));
@@ -466,9 +475,11 @@ public async Task ResultWithDiagnosticsOnFailingMetrics()
         var metric4 = new StringMetric("Metric with warning diagnostics only");
         metric4.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 1"));
         metric4.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 2"));
+        metric4.Reason = "Reason for metric 4";
 
         var metric5 = new NumericMetric("Metric with informational diagnostics only");
         metric5.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 1"));
+        metric5.Reason = "Reason for metric 5";
 
         evaluator.TestMetrics = [metric1, metric2, metric3, metric4, metric5];
 
@@ -505,23 +516,28 @@ public async Task ResultWithDiagnosticsOnPassingMetrics()
         metric1.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 1"));
         metric1.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 1"));
         metric1.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 2"));
+        metric1.Reason = "Reason for metric 1";
 
         var metric2 = new BooleanMetric("Metric with warning and informational diagnostics", value: true);
         metric2.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 1"));
         metric2.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 2"));
         metric2.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 2"));
+        metric2.Reason = "Reason for metric 2";
 
         var metric3 = new NumericMetric("Metric with error diagnostics only", value: 5);
         metric3.AddDiagnostic(EvaluationDiagnostic.Error("Error 1"));
         metric3.AddDiagnostic(EvaluationDiagnostic.Error("Error 2"));
+        metric3.Reason = "Reason for metric 3";
 
         HashSet<string> allowedValues = ["A", "B", "C"];
         var metric4 = new StringMetric("Metric with warning diagnostics only", value: "A");
         metric4.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 1"));
         metric4.AddDiagnostic(EvaluationDiagnostic.Warning("Warning 2"));
+        metric4.Reason = "Reason for metric 4";
 
         var metric5 = new NumericMetric("Metric with informational diagnostics only", value: 4);
         metric5.AddDiagnostic(EvaluationDiagnostic.Informational("Informational 1"));
+        metric5.Reason = "Reason for metric 5";
 
         evaluator.TestMetrics = [metric1, metric2, metric3, metric4, metric5];
 

From 12f1979fabd4131782e8bdee3fb3fe82b5f9e9c1 Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Wed, 12 Mar 2025 03:33:43 -0700
Subject: [PATCH 14/18] Fix indentation

---
 .../Microsoft.Extensions.AI.Evaluation/BooleanMetric.cs         | 2 +-
 .../Microsoft.Extensions.AI.Evaluation/NumericMetric.cs         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation/BooleanMetric.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation/BooleanMetric.cs
index fe987382a26..746ddcf02fe 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation/BooleanMetric.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation/BooleanMetric.cs
@@ -14,4 +14,4 @@ namespace Microsoft.Extensions.AI.Evaluation;
 /// <paramred name="value"/>.
 /// </param>
 public sealed class BooleanMetric(string name, bool? value = null, string? reason = null)
-: EvaluationMetric<bool?>(name, value, reason);
+    : EvaluationMetric<bool?>(name, value, reason);
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation/NumericMetric.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation/NumericMetric.cs
index 93234a967b8..6b34beafde8 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation/NumericMetric.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation/NumericMetric.cs
@@ -25,4 +25,4 @@ namespace Microsoft.Extensions.AI.Evaluation;
 /// <paramred name="value"/>.
 /// </param>
 public sealed class NumericMetric(string name, double? value = null, string? reason = null)
- : EvaluationMetric<double?>(name, value, reason);
+    : EvaluationMetric<double?>(name, value, reason);

From 51b7fb3756b915a72403a92182bcc64a2379c12f Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Wed, 12 Mar 2025 03:58:08 -0700
Subject: [PATCH 15/18] Fix up some styles

---
 .../TypeScript/components/ScenarioTree.tsx    | 21 +++++++------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
index db1a3c518ba..37c376bcf88 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
@@ -95,7 +95,7 @@ export const MetricDetailsSection = ({ metric }: { metric: MetricType }) => {
             </div>
 
             {isExpanded && (
-                <div className={classes.conversationBox}>
+                <div className={classes.sectionContainer}>
                     {hasReason && (
                         <div className={classes.sectionContent}>
                             <div className={classes.sectionSubHeader}>Evaluation Reason</div>
@@ -232,22 +232,17 @@ const useStyles = makeStyles({
         backgroundColor: tokens.colorNeutralBackground2,
         cursor: 'text',
     },
-    conversationBox: {
-        padding: '0.75rem',
-        maxHeight: '20rem',
-        overflow: 'auto',
-        cursor: 'text',
-        '& pre': {
-            whiteSpace: 'pre-wrap',
-            wordWrap: 'break-word',
-        },
-    },
-    chatContainer: {
+    sectionContainer: {
         display: 'flex',
         flexDirection: 'column',
         gap: '0.75rem',
         padding: '0.75rem 0',
+        cursor: 'text',
         position: 'relative',
+        '& pre': {
+            whiteSpace: 'pre-wrap',
+            wordWrap: 'break-word',
+        },
     },
     messageRow: {
         display: 'flex',
@@ -348,7 +343,7 @@ export const PromptDetails = ({ messages, renderMarkdown }: {
             </div>
 
             {isExpanded && (
-                <div className={classes.chatContainer}>
+                <div className={classes.sectionContainer}>
                     {messages.map((message, index) => {
                         const isFromUserSide = isUserSide(message.role);
                         const messageRowClass = mergeClasses(

From cf857935a910a91fdff0fa4de125a670bc7c0a86 Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Wed, 12 Mar 2025 17:23:57 -0700
Subject: [PATCH 16/18] Move Reason to base class alongside Interpretation

---
 .../RelevanceTruthAndCompletenessEvaluator.cs       |  2 +-
 .../BooleanMetric.cs                                |  4 ++--
 .../EvaluationMetric.cs                             | 12 +++++++++++-
 .../EvaluationMetric{T}.cs                          | 13 +++----------
 .../NumericMetric.cs                                |  4 ++--
 .../StringMetric.cs                                 |  4 ++--
 .../ResultsTests.cs                                 |  2 ++
 7 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.cs
index 85ff8bae12d..73a62970914 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.cs
@@ -24,7 +24,7 @@ namespace Microsoft.Extensions.AI.Evaluation.Quality;
 /// <see cref="RelevanceTruthAndCompletenessEvaluator"/> returns three <see cref="NumericMetric"/>s that contain scores
 /// for 'Relevance', 'Truth' and 'Completeness' respectively. Each score is a number between 1 and 5, with 1 indicating
 /// a poor score, and 5 indicating an excellent score. Each returned score is also accompanied by a
-/// <see cref="EvaluationMetric{T}.Reason"/> that provides an explanation for the score.
+/// <see cref="EvaluationMetric.Reason"/> that provides an explanation for the score.
 /// </remarks>
 public sealed partial class RelevanceTruthAndCompletenessEvaluator : ChatConversationEvaluator
 {
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation/BooleanMetric.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation/BooleanMetric.cs
index 746ddcf02fe..0edb9f8b0b4 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation/BooleanMetric.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation/BooleanMetric.cs
@@ -10,8 +10,8 @@ namespace Microsoft.Extensions.AI.Evaluation;
 /// <param name="name">The name of the <see cref="BooleanMetric"/>.</param>
 /// <param name="value">The value of the <see cref="BooleanMetric"/>.</param>
 /// <param name="reason">
-/// An optional string that can be used to provide some commentary around the result represented by
-/// <paramred name="value"/>.
+/// An optional string that can be used to provide some commentary around the result represented by this
+/// <see cref="BooleanMetric"/>.
 /// </param>
 public sealed class BooleanMetric(string name, bool? value = null, string? reason = null)
     : EvaluationMetric<bool?>(name, value, reason);
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetric.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetric.cs
index 78bb6831486..038599963af 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetric.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetric.cs
@@ -15,17 +15,27 @@ namespace Microsoft.Extensions.AI.Evaluation;
 /// A base class that represents the result of an evaluation.
 /// </summary>
 /// <param name="name">The name of the <see cref="EvaluationMetric"/>.</param>
+/// <param name="reason">
+/// An optional string that can be used to provide some commentary around the result represented by this
+/// <see cref="EvaluationMetric"/>.
+/// </param>
 [JsonDerivedType(typeof(NumericMetric), "numeric")]
 [JsonDerivedType(typeof(BooleanMetric), "boolean")]
 [JsonDerivedType(typeof(StringMetric), "string")]
 [JsonDerivedType(typeof(EvaluationMetric), "none")]
-public class EvaluationMetric(string name)
+public class EvaluationMetric(string name, string? reason = null)
 {
     /// <summary>
     /// Gets or sets the name of the <see cref="EvaluationMetric"/>.
     /// </summary>
     public string Name { get; set; } = name;
 
+    /// <summary>
+    /// Gets or sets a string that can optionally be used to provide some commentary around the result represented by
+    /// this <see cref="EvaluationMetric"/>.
+    /// </summary>
+    public string? Reason { get; set; } = reason;
+
     /// <summary>
     /// Gets or sets an <see cref="EvaluationMetricInterpretation"/> that identifies whether the result of the
     /// evaluation represented by the current <see cref="EvaluationMetric"/> is considered good or bad, passed or
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetric{T}.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetric{T}.cs
index fb57d2571fb..d2745069bc5 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetric{T}.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation/EvaluationMetric{T}.cs
@@ -20,25 +20,18 @@ public class EvaluationMetric<T> : EvaluationMetric
     /// </summary>
     public T? Value { get; set; }
 
-    /// <summary>
-    /// Gets or sets a string that can optionally be used to provide some commentary around the result represented by
-    /// <see cref="Value"/>.
-    /// </summary>
-    public string? Reason { get; set; }
-
     /// <summary>
     /// Initializes a new instance of the <see cref="EvaluationMetric{T}"/> class.
     /// </summary>
     /// <param name="name">The name of the <see cref="EvaluationMetric{T}"/>.</param>
     /// <param name="value">The value  of the <see cref="EvaluationMetric{T}"/>.</param>
     /// <param name="reason">
-    /// An optional string that can be used to provide some commentary around the result represented by
-    /// <paramred name="value"/>.
+    /// An optional string that can be used to provide some commentary around the result represented by this
+    /// <see cref="EvaluationMetric{T}"/>.
     /// </param>
     protected EvaluationMetric(string name, T? value, string? reason = null)
-        : base(name)
+        : base(name, reason)
     {
         Value = value;
-        Reason = reason;
     }
 }
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation/NumericMetric.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation/NumericMetric.cs
index 6b34beafde8..2a0a07c2193 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation/NumericMetric.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation/NumericMetric.cs
@@ -21,8 +21,8 @@ namespace Microsoft.Extensions.AI.Evaluation;
 /// <param name="name">The name of the <see cref="NumericMetric"/>.</param>
 /// <param name="value">The value of the <see cref="NumericMetric"/>.</param>
 /// <param name="reason">
-/// An optional string that can be used to provide some commentary around the result represented by
-/// <paramred name="value"/>.
+/// An optional string that can be used to provide some commentary around the result represented by this
+/// <see cref="NumericMetric"/>.
 /// </param>
 public sealed class NumericMetric(string name, double? value = null, string? reason = null)
     : EvaluationMetric<double?>(name, value, reason);
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation/StringMetric.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation/StringMetric.cs
index 5fed520b3a2..97fd10921bc 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation/StringMetric.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation/StringMetric.cs
@@ -13,8 +13,8 @@ namespace Microsoft.Extensions.AI.Evaluation;
 /// <param name="name">The name of the <see cref="StringMetric"/>.</param>
 /// <param name="value">The value of the <see cref="StringMetric"/>.</param>
 /// <param name="reason">
-/// An optional string that can be used to provide some commentary around the result represented by
-/// <paramred name="value"/>.
+/// An optional string that can be used to provide some commentary around the result represented by this
+/// <see cref="StringMetric"/>.
 /// </param>
 public sealed class StringMetric(string name, string? value = null, string? reason = null)
     : EvaluationMetric<string>(name, value, reason);
diff --git a/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/ResultsTests.cs b/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/ResultsTests.cs
index e7ed381ad93..b4336ec802f 100644
--- a/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/ResultsTests.cs
+++ b/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/ResultsTests.cs
@@ -419,6 +419,7 @@ public async Task ResultWithDiagnosticsOnUninterpretedMetrics()
         var metric3 = new EvaluationMetric("Metric with error diagnostics only");
         metric3.AddDiagnostic(EvaluationDiagnostic.Error("Error 1"));
         metric3.AddDiagnostic(EvaluationDiagnostic.Error("Error 2"));
+        metric3.Reason = "Reason for metric 3";
 
         HashSet<string> allowedValues = ["A", "B", "C"];
         var metric4 = new StringMetric("Metric with warning diagnostics only");
@@ -470,6 +471,7 @@ public async Task ResultWithDiagnosticsOnFailingMetrics()
         var metric3 = new EvaluationMetric("Metric with error diagnostics only");
         metric3.AddDiagnostic(EvaluationDiagnostic.Error("Error 1"));
         metric3.AddDiagnostic(EvaluationDiagnostic.Error("Error 2"));
+        metric3.Reason = "Reason for metric 3";
 
         HashSet<string> allowedValues = ["A", "B", "C"];
         var metric4 = new StringMetric("Metric with warning diagnostics only");

From 2116fd43e7e4c87794ed66b4ca68f0f5db4f5569 Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Wed, 12 Mar 2025 18:38:00 -0700
Subject: [PATCH 17/18] Update colors and spacing

---
 .../TypeScript/components/MetricCard.tsx      |  9 +++----
 .../TypeScript/components/ScenarioTree.tsx    | 27 +++----------------
 2 files changed, 7 insertions(+), 29 deletions(-)

diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/MetricCard.tsx b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/MetricCard.tsx
index 3ec8defb84a..738bd51bcba 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/MetricCard.tsx
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/MetricCard.tsx
@@ -37,7 +37,7 @@ const useCardStyles = makeStyles({
         padding: '.75rem', 
         border: '1px solid #e0e0e0', 
         borderRadius: '4px',
-        width: '10rem',
+        width: '12rem',
         cursor: 'pointer',
         transition: 'box-shadow 0.2s ease-in-out, outline 0.2s ease-in-out',
         position: 'relative',
@@ -61,10 +61,9 @@ const useCardStyles = makeStyles({
         overflow: 'hidden',
         textOverflow: 'ellipsis',
         lineHeight: '1.2',
-        maxHeight: '2.4em',
-        display: '-webkit-box',
-        WebkitLineClamp: 2,
-        WebkitBoxOrient: 'vertical',
+        height: '1.2em',
+        display: "block",
+        whiteSpace: 'nowrap',
         marginTop: '-0.5rem',
     },
     iconPlaceholder: {
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
index 37c376bcf88..154bbcd42b2 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx
@@ -247,14 +247,14 @@ const useStyles = makeStyles({
     messageRow: {
         display: 'flex',
         flexDirection: 'column',
-        width: '900px',
+        width: '60rem',
         position: 'relative',
     },
     userMessageRow: {
         marginLeft: '0',
     },
     assistantMessageRow: {
-        marginLeft: '100px',
+        marginLeft: '10rem',
     },
     messageParticipantName: {
         fontSize: tokens.fontSizeBase200,
@@ -268,19 +268,7 @@ const useStyles = makeStyles({
         overflow: 'hidden',
         wordBreak: 'break-word',
         width: '100%',
-    },
-    userBubble: {
         backgroundColor: tokens.colorNeutralBackground3,
-        borderTopLeftRadius: '4px',
-    },
-    systemBubble: {
-        backgroundColor: tokens.colorBrandBackground,
-        color: tokens.colorNeutralForegroundInverted,
-        borderTopLeftRadius: '4px',
-    },
-    assistantBubble: {
-        backgroundColor: tokens.colorNeutralBackground4,
-        borderTopRightRadius: '4px',
     },
 });
 
@@ -350,20 +338,11 @@ export const PromptDetails = ({ messages, renderMarkdown }: {
                             classes.messageRow,
                             isFromUserSide ? classes.userMessageRow : classes.assistantMessageRow
                         );
-                        
-                        let messageBubble;
-                        if (message.role.toLowerCase() === 'system') {
-                            messageBubble = mergeClasses(classes.messageBubble, classes.systemBubble);
-                        } else if (isFromUserSide) {
-                            messageBubble = mergeClasses(classes.messageBubble, classes.userBubble);
-                        } else {
-                            messageBubble = mergeClasses(classes.messageBubble, classes.assistantBubble);
-                        }
 
                         return (
                             <div key={index} className={messageRowClass}>
                                 <div className={classes.messageParticipantName}>{message.participantName}</div>
-                                <div className={messageBubble}>
+                                <div className={classes.messageBubble}>
                                     {renderMarkdown ? 
                                         <ReactMarkdown>{message.content}</ReactMarkdown> : 
                                         <pre style={{ whiteSpace: 'pre-wrap' }}>{message.content}</pre>

From 882ead1df956f06ce4afa338ffe5b099187866c3 Mon Sep 17 00:00:00 2001
From: Shyam Namboodiripad <gnamboo@microsoft.com>
Date: Wed, 12 Mar 2025 20:28:24 -0700
Subject: [PATCH 18/18] Update test

---
 .../ResultsTests.cs                                             | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/ResultsTests.cs b/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/ResultsTests.cs
index b4336ec802f..01241b5760b 100644
--- a/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/ResultsTests.cs
+++ b/test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/ResultsTests.cs
@@ -193,6 +193,8 @@ await reportingConfiguration.CreateScenarioRunAsync(
         Assert.NotNull(metricB.Interpretation);
         Assert.True(metricB.Interpretation!.Failed);
         Assert.Null(metricC.Interpretation);
+        Assert.NotNull(metricD.Interpretation);
+        Assert.False(metricD.Interpretation!.Failed);
 
         Assert.False(result.ContainsDiagnostics());
     }