Skip to content

Commit a3baf04

Browse files
authored
Add fuzzy filtering to the evaluation reports (#6262)
* Add filtering to the evaluation reports. * Convert to uFuzzy library. * Clean up unneeded import.
1 parent 9b50722 commit a3baf04

File tree

6 files changed

+820
-679
lines changed

6 files changed

+820
-679
lines changed

src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/App.tsx

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
import { useState } from 'react';
55
import { Settings28Regular, FilterDismissRegular, DismissRegular, ArrowDownloadRegular } from '@fluentui/react-icons';
6-
import { Button, Drawer, DrawerBody, DrawerHeader, DrawerHeaderTitle, Switch, Tooltip } from '@fluentui/react-components';
6+
import { Button, Drawer, DrawerBody, DrawerHeader, DrawerHeaderTitle, SearchBox, Switch, Tooltip } from '@fluentui/react-components';
77
import { makeStyles } from '@fluentui/react-components';
88
import './App.css';
99
import { ScenarioGroup } from './ScenarioTree';
@@ -47,7 +47,7 @@ const useStyles = makeStyles({
4747

4848
function App() {
4949
const classes = useStyles();
50-
const { dataset, scoreSummary, selectedTags, clearFilters } = useReportContext();
50+
const { dataset, scoreSummary, selectedTags, clearFilters, searchValue, setSearchValue } = useReportContext();
5151
const [isSettingsOpen, setIsSettingsOpen] = useState(false);
5252
const { renderMarkdown, setRenderMarkdown } = useReportContext();
5353
const { globalTags, filterableTags } = categorizeAndSortTags(dataset, scoreSummary.primaryResult.executionName);
@@ -77,11 +77,14 @@ function App() {
7777
<div className={classes.headerTop}>
7878
<h1>AI Evaluation Report</h1>
7979
<div className={classes.headerActions}>
80-
{selectedTags.length > 0 && (
80+
{(selectedTags.length > 0 || !!searchValue) && (
8181
<Tooltip content="Clear Filters" relationship="description">
8282
<Button icon={<FilterDismissRegular />} appearance="subtle" onClick={clearFilters} />
8383
</Tooltip>
8484
)}
85+
<SearchBox placeholder="Search / Filter " value={searchValue} type="text"
86+
style={{width: "16rem"}}
87+
onChange={(_ev, data) => setSearchValue(data.value)} />
8588
<Tooltip content="Download Data as JSON" relationship="description">
8689
<Button icon={<ArrowDownloadRegular />} appearance="subtle" onClick={downloadDataset} />
8790
</Tooltip>

src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ReportContext.tsx

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { useContext, createContext, useState } from "react";
2-
import { ScoreNode, ScoreNodeType, ScoreSummary } from "./Summary";
2+
import { ReverseTextIndex, ScoreNode, ScoreNodeType, ScoreSummary } from "./Summary";
33

44
export type ReportContextType = {
55
dataset: Dataset,
@@ -8,9 +8,12 @@ export type ReportContextType = {
88
selectScenarioLevel: (key: string) => void,
99
renderMarkdown: boolean,
1010
setRenderMarkdown: (renderMarkdown: boolean) => void,
11+
searchValue: string,
12+
setSearchValue: (searchValue: string) => void,
1113
selectedTags: string[],
1214
handleTagClick: (tag: string) => void,
1315
clearFilters: () => void,
16+
filterTree: (node: ScoreNode) => ScoreNode | null,
1417
};
1518

1619
// Create the default context, which will be used to provide the context value
@@ -22,6 +25,7 @@ const defaultReportContext = createContext<ReportContextType>({
2225
includesReportHistory: false,
2326
executionHistory: new Map<string, ScoreNode>(),
2427
nodesByKey: new Map<string, Map<string, ScoreNode>>(),
28+
reverseTextIndex: new ReverseTextIndex(),
2529
},
2630
selectedScenarioLevel: undefined,
2731
selectScenarioLevel: (_selectedScenarioLevel: string) => {
@@ -31,9 +35,12 @@ const defaultReportContext = createContext<ReportContextType>({
3135
setRenderMarkdown: (_renderMarkdown: boolean) => {
3236
throw new Error("setRenderMarkdown function not implemented");
3337
},
38+
searchValue: '',
39+
setSearchValue: (_searchValue: string | undefined) => { throw new Error("setSearchValue function not implemented"); },
3440
selectedTags: [],
3541
handleTagClick: (_tag: string) => { throw new Error("handleTagClick function not implemented"); },
3642
clearFilters: () => { throw new Error("clearFilters function not implemented"); },
43+
filterTree: (_node: ScoreNode) => { throw new Error("filterTree function not implemented"); },
3744
});
3845

3946
export const ReportContextProvider = ({ dataset, scoreSummary, children }:
@@ -56,6 +63,7 @@ const useProvideReportContext = (dataset: Dataset, scoreSummary: ScoreSummary):
5663
const [selectedScenarioLevel, setSelectedScenarioLevel] = useState<string | undefined>(undefined);
5764
const [renderMarkdown, setRenderMarkdown] = useState<boolean>(true);
5865
const [selectedTags, setSelectedTags] = useState<string[]>([]);
66+
const [searchValue, setSearchValue] = useState<string>("");
5967

6068
const selectScenarioLevel = (key: string) => {
6169
if (key === selectedScenarioLevel) {
@@ -74,17 +82,52 @@ const useProvideReportContext = (dataset: Dataset, scoreSummary: ScoreSummary):
7482

7583
const clearFilters = () => {
7684
setSelectedTags([]);
85+
setSearchValue("");
7786
};
7887

88+
const filterTree = (node: ScoreNode): ScoreNode | null => {
89+
if (selectedTags.length === 0 && searchValue === "") {
90+
return node;
91+
}
92+
93+
const searchedNodes = scoreSummary.reverseTextIndex.search(searchValue);
94+
95+
const srch = (node: ScoreNode) : ScoreNode | null => {
96+
if (node.isLeafNode) {
97+
const tagMatches = selectedTags.length > 0 && node.scenario?.tags?.some(tag => selectedTags.includes(tag));
98+
const searchMatches = searchValue !== "" && searchedNodes.has(node.nodeKey);
99+
return tagMatches || searchMatches ? node : null;
100+
}
101+
102+
const filteredChildren = node.childNodes
103+
.map(srch)
104+
.filter((child): child is ScoreNode => child !== null);
105+
106+
if (filteredChildren.length > 0) {
107+
const newNode = new ScoreNode(node.name, node.nodeType, node.nodeKey, node.executionName);
108+
newNode.setChildren(new Map(filteredChildren.map(child => [child.name, child])));
109+
newNode.aggregate();
110+
return newNode;
111+
}
112+
113+
return null;
114+
};
115+
116+
return srch(node);
117+
}
118+
79119
return {
80120
dataset,
81121
scoreSummary,
82122
selectedScenarioLevel,
83123
selectScenarioLevel,
84124
renderMarkdown,
85125
setRenderMarkdown,
126+
searchValue,
127+
setSearchValue,
86128
selectedTags,
87129
handleTagClick,
88-
clearFilters
130+
clearFilters,
131+
filterTree,
89132
};
90133
};

src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/ScenarioTree.tsx

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -46,36 +46,13 @@ export const ScenarioGroup = ({ node, scoreSummary }: {
4646
node: ScoreNode,
4747
scoreSummary: ScoreSummary,
4848
}) => {
49-
const { selectedTags } = useReportContext();
49+
const { filterTree } = useReportContext();
5050
const [openItems, setOpenItems] = useState<Set<TreeItemValue>>(() => new Set());
5151
const handleOpenChange = useCallback((_: TreeOpenChangeEvent, data: TreeOpenChangeData) => {
5252
setOpenItems(data.openItems);
5353
}, []);
5454
const isOpen = (name: string) => openItems.has(name);
5555

56-
const filterTree = (node: ScoreNode): ScoreNode | null => {
57-
if (selectedTags.length === 0) {
58-
return node;
59-
}
60-
61-
if (node.isLeafNode) {
62-
return node.scenario?.tags?.some(tag => selectedTags.includes(tag)) ? node : null;
63-
}
64-
65-
const filteredChildren = node.childNodes
66-
.map(filterTree)
67-
.filter((child): child is ScoreNode => child !== null);
68-
69-
if (filteredChildren.length > 0) {
70-
const newNode = new ScoreNode(node.name, node.nodeType, node.nodeKey, node.executionName);
71-
newNode.setChildren(new Map(filteredChildren.map(child => [child.name, child])));
72-
newNode.aggregate(selectedTags);
73-
return newNode;
74-
}
75-
76-
return null;
77-
};
78-
7956
const filteredNode = filterTree(node);
8057

8158
if (!filteredNode) {

src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/Summary.ts

Lines changed: 61 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33

4+
import uFuzzy from "@leeoniya/ufuzzy";
5+
46
export enum ScoreNodeType {
57
Group,
68
Scenario,
@@ -12,6 +14,7 @@ export type ScoreSummary = {
1214
includesReportHistory: boolean;
1315
executionHistory: Map<string, ScoreNode>;
1416
nodesByKey: Map<string, Map<string, ScoreNode>>;
17+
reverseTextIndex: ReverseTextIndex
1518
};
1619

1720
export class ScoreNode {
@@ -80,17 +83,14 @@ export class ScoreNode {
8083
return [...flattener(this)];
8184
}
8285

83-
aggregate(filteredTags: string[] = []) {
86+
aggregate() {
8487
this.failed = false;
8588
this.numPassingIterations = 0;
8689
this.numFailingIterations = 0;
8790
this.numPassingScenarios = 0;
8891
this.numFailingScenarios = 0;
8992

9093
if (this.isLeafNode) {
91-
if (filteredTags.length > 0 && !this.scenario?.tags?.some(tag => filteredTags.includes(tag))) {
92-
return;
93-
}
9494

9595
this.failed = false;
9696
for (const metric of Object.values(this.scenario?.evaluationResult.metrics ?? [])) {
@@ -116,8 +116,8 @@ export class ScoreNode {
116116
this.shortenedPrompt = shortenPrompt(history);
117117
} else {
118118
for (const child of this.childNodes) {
119-
child.aggregate(filteredTags);
120-
if (filteredTags.length === 0 || child.numPassingIterations + child.numFailingIterations > 0) {
119+
child.aggregate();
120+
if (child.numPassingIterations + child.numFailingIterations > 0) {
121121
this.failed = this.failed || child.failed;
122122
this.numPassingIterations += child.numPassingIterations;
123123
this.numFailingIterations += child.numFailingIterations;
@@ -152,6 +152,38 @@ export class ScoreNode {
152152
}
153153
};
154154

155+
export class ReverseTextIndex {
156+
157+
private stringsToSearch: string[] = [];
158+
private keys: string[] = [];
159+
160+
addText(key: string, text?: string) {
161+
if (!text) {
162+
return;
163+
}
164+
this.stringsToSearch.push(text);
165+
this.keys.push(key);
166+
}
167+
168+
search(searchValue: string): Set<string> {
169+
const opts = {
170+
intraMode: 0,
171+
unicode: true,
172+
} as uFuzzy.Options;
173+
const fz = new uFuzzy(opts);
174+
const terms = fz.split(searchValue);
175+
const keys = new Set<string>();
176+
for (const term of terms) {
177+
const searchResult = fz.search(this.stringsToSearch, term) as uFuzzy.FilteredResult;
178+
const matches = searchResult[0];
179+
for (const match of matches) {
180+
keys.add(this.keys[match]);
181+
}
182+
}
183+
return keys;
184+
}
185+
}
186+
155187
export const createScoreSummary = (dataset: Dataset): ScoreSummary => {
156188

157189
const executionHistory = new Map<string, ScoreNode>();
@@ -183,11 +215,34 @@ export const createScoreSummary = (dataset: Dataset): ScoreSummary => {
183215
const [primaryResult] = executionHistory.values();
184216
primaryResult.collapseSingleChildNodes();
185217

218+
const reverseTextIndex = new ReverseTextIndex();
219+
220+
// build the reverse text index from searchable strings in the data
221+
for (const node of primaryResult.flattenedNodes) {
222+
reverseTextIndex.addText(node.nodeKey, node.scenario?.scenarioName);
223+
reverseTextIndex.addText(node.nodeKey, node.scenario?.iterationName);
224+
for (const message of node.scenario?.messages ?? []) {
225+
for (const content of message.contents) {
226+
if (isTextContent(content)) {
227+
reverseTextIndex.addText(node.nodeKey, content.text);
228+
}
229+
}
230+
}
231+
for (const message of node.scenario?.modelResponse?.messages ?? []) {
232+
for (const content of message.contents) {
233+
if (isTextContent(content)) {
234+
reverseTextIndex.addText(node.nodeKey, content.text);
235+
}
236+
}
237+
}
238+
}
239+
186240
return {
187241
primaryResult,
188242
includesReportHistory: executionHistory.size > 1,
189243
executionHistory,
190244
nodesByKey,
245+
reverseTextIndex,
191246
} as ScoreSummary;
192247
};
193248

0 commit comments

Comments
 (0)