Skip to content

De-duplicate prediction results with the history results #3543

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 52 additions & 12 deletions PSReadLine/Prediction.Views.cs
Original file line number Diff line number Diff line change
Expand Up @@ -144,12 +144,9 @@ protected List<SuggestionEntry> GetHistorySuggestions(string input, int count)
continue;
}

if (results == null)
{
results = new List<SuggestionEntry>(capacity: count);
}

_cacheHistorySet.Add(line);
results ??= new List<SuggestionEntry>(capacity: count);

if (matchIndex == 0)
{
results.Add(new SuggestionEntry(line, matchIndex));
Expand Down Expand Up @@ -224,8 +221,11 @@ private class PredictionListView : PredictionViewBase
private bool _updatePending;

// Caches re-used when aggregating the suggestion results from predictors and history.
// Those caches help us avoid allocation on tons of short-lived collections.
private List<int> _cacheList1;
private List<int> _cacheList2;
private HashSet<string> _cachedHistorySet;
private StringComparer _cachedComparer;

/// <summary>
/// Gets whether the current window size meets the minimum requirement for the List view to work.
Expand Down Expand Up @@ -376,7 +376,7 @@ private void AggregateSuggestions()

// Assign the results of each plugin to the average slots.
// Note that it's possible a plugin may return less results than the average slots,
// and in that case, the unused slots will be come remaining slots that are to be
// and in that case, the unused slots will become remaining slots which are to be
// distributed again.
for (int i = 0; i < pCount; i++)
{
Expand Down Expand Up @@ -419,6 +419,18 @@ private void AggregateSuggestions()
if (hCount > 0)
{
_listItems.RemoveRange(hCount, _listItems.Count - hCount);

if (_cachedComparer != _singleton._options.HistoryStringComparer)
{
// Create the cached history set if not yet, or re-create the set if case-sensitivity was changed by the user.
_cachedComparer = _singleton._options.HistoryStringComparer;
_cachedHistorySet = new HashSet<string>(_cachedComparer);
}

foreach (SuggestionEntry entry in _listItems)
{
_cachedHistorySet.Add(entry.SuggestionText);
}
}

int index = -1;
Expand All @@ -435,19 +447,46 @@ private void AggregateSuggestions()
break;
}

int skipCount = 0;
int num = _cacheList2[index];
for (int i = 0; i < num; i++)
foreach (PredictiveSuggestion suggestion in item.Suggestions)
{
string sugText = item.Suggestions[i].SuggestionText ?? string.Empty;
string sugText = suggestion.SuggestionText ?? string.Empty;
if (_cachedHistorySet?.Contains(sugText) == true)
{
// Skip the prediction result that is exactly the same as one of the history results.
skipCount++;
continue;
}

int matchIndex = sugText.IndexOf(_inputText, comparison);
_listItems.Add(new SuggestionEntry(item.Name, item.Id, item.Session, sugText, matchIndex));

if (--num == 0)
{
// Break after we've added the desired number of prediction results.
break;
}
}

if (item.Session.HasValue)
// Get the number of prediction results that were actually put in the list after filtering out the duplicate ones.
int count = _cacheList2[index] - num;
if (item.Session.HasValue && count > 0)
{
// Send feedback only if the mini-session id is specified.
// When it's not specified, we consider the predictor doesn't accept feedback.
_singleton._mockableMethods.OnSuggestionDisplayed(item.Id, item.Session.Value, num);
// Send feedback only if the mini-session id is specified and we truely have its results in the list to be rendered.
// When the mini-session id is not specified, we consider the predictor doesn't accept feedback.
//
// NOTE: when any duplicate results were skipped, the 'count' passed in here won't be accurate as it still includes
// those skipped ones. This is due to the limitation of the 'OnSuggestionDisplayed' interface method, which didn't
// assume any prediction results from a predictor could be filtered out at the initial design time. We will have to
// change the predictor interface to pass in accurate information, such as:
// void OnSuggestionDisplayed(Guid predictorId, uint session, int countOrIndex, int[] skippedIndices)
//
// However, an interface change has huge impacts. At least, a newer version of PSReadLine will stop working on the
// existing PowerShell 7+ versions. For this particular issue, the chance that it could happen is low and the impact
// of the inaccurate feedback is also low, so we should delay this interface change until another highly-demanded
// change to the interface is required in future (e.g. changes related to supporting OpenAI models).
Comment on lines +479 to +488
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the detailed note 👍

_singleton._mockableMethods.OnSuggestionDisplayed(item.Id, item.Session.Value, count + skipCount);
}
}
}
Expand All @@ -456,6 +495,7 @@ private void AggregateSuggestions()
{
_cacheList1.Clear();
_cacheList2.Clear();
_cachedHistorySet?.Clear();
}
}

Expand Down
135 changes: 135 additions & 0 deletions test/ListPredictionTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,16 @@ private Disposable SetPrediction(PredictionSource source, PredictionViewStyle vi
new SetPSReadLineOption { PredictionSource = oldSource, PredictionViewStyle = oldView }));
}

private Disposable SetHistorySearchCaseSensitive(bool caseSensitive)
{
var options = PSConsoleReadLine.GetOptions();
var oldValue = options.HistorySearchCaseSensitive;

PSConsoleReadLine.SetOptions(new SetPSReadLineOption { HistorySearchCaseSensitive = caseSensitive });
return new Disposable(() => PSConsoleReadLine.SetOptions(
new SetPSReadLineOption { HistorySearchCaseSensitive = oldValue }));
}

private void AssertDisplayedSuggestions(int count, Guid predictorId, uint session, int countOrIndex)
{
Assert.Equal(count, _mockedMethods.displayedSuggestions.Count);
Expand Down Expand Up @@ -1711,6 +1721,131 @@ public void List_HistoryAndPluginSource_Acceptance()
Assert.Equal("SOME NEW TEX SOME TEXT AFTER", _mockedMethods.commandHistory[3]);
}

[SkippableFact]
public void List_HistoryAndPluginSource_Deduplication()
{
TestSetup(KeyMode.Cmd);
int listWidth = CheckWindowSize();
var emphasisColors = Tuple.Create(PSConsoleReadLineOptions.DefaultEmphasisColor, _console.BackgroundColor);

// Using the 'HistoryAndPlugin' source will make PSReadLine get prediction from both history and plugin.
using var disp1 = SetPrediction(PredictionSource.HistoryAndPlugin, PredictionViewStyle.ListView);
_mockedMethods.ClearPredictionFields();

// The 1st result from 'predictorId_1' is the same as the 1st entry in history with case-insensitive comparison,
// which is the default comparison. So, that result will be filtered out due to the de-duplication logic.
SetHistory("some TEXT BEFORE de-dup", "de-dup -of");
Test("de-dup", Keys(
"de-dup", CheckThat(() => AssertScreenIs(6,
TokenClassification.Command, "de-dup",
NextLine,
TokenClassification.ListPrediction, '>',
TokenClassification.None, ' ',
emphasisColors, "de-dup",
TokenClassification.None, " -of",
TokenClassification.None, new string(' ', listWidth - 21), // 21 is the length of '> de-dup -of' plus '[History]'.
TokenClassification.None, '[',
TokenClassification.ListPrediction, "History",
TokenClassification.None, ']',
NextLine,
TokenClassification.ListPrediction, '>',
TokenClassification.None, " some TEXT BEFORE ",
emphasisColors, "de-dup",
TokenClassification.None, new string(' ', listWidth - 34), // 34 is the length of '> SOME TEXT BEFORE de-dup' plus '[History]'.
TokenClassification.None, '[',
TokenClassification.ListPrediction, "History",
TokenClassification.None, ']',
NextLine,
TokenClassification.ListPrediction, '>',
TokenClassification.None, ' ',
emphasisColors, "de-dup",
TokenClassification.None, " SOME TEXT AFTER",
TokenClassification.None, new string(' ', listWidth - 39), // 35 is the length of '> de-dup SOME TEXT AFTER' plus '[TestPredictor]'.
TokenClassification.None, '[',
TokenClassification.ListPrediction, "TestPredictor",
TokenClassification.None, ']',
NextLine,
TokenClassification.ListPrediction, '>',
TokenClassification.None, " SOME NEW TEXT",
TokenClassification.None, new string(' ', listWidth - 32), // 32 is the length of '> SOME NEW TEXT' plus '[LongNamePred...]'
TokenClassification.None, '[',
TokenClassification.ListPrediction, "LongNamePred...",
TokenClassification.None, ']',
// List view is done, no more list item following.
NextLine,
NextLine
)),
// `OnSuggestionDisplayed` should be fired for both predictors.
// For 'predictorId_1', the reported 'countOrIndex' from feedback is still 2 even though its 1st result was filtered out due to duplication.
CheckThat(() => AssertDisplayedSuggestions(count: 2, predictorId_1, MiniSessionId, 2)),
CheckThat(() => AssertDisplayedSuggestions(count: 2, predictorId_2, MiniSessionId, 1)),
CheckThat(() => _mockedMethods.ClearPredictionFields()),
// Once accepted, the list should be cleared.
_.Enter, CheckThat(() => AssertScreenIs(2,
TokenClassification.Command, "de-dup",
NextLine,
NextLine))
));

// Change the setting to be case sensitive, and check the list view content.
using var disp2 = SetHistorySearchCaseSensitive(caseSensitive: true);
_mockedMethods.ClearPredictionFields();

// The 1st result from 'predictorId_1' is not the same as the 2nd entry in history with the case-sensitive comparison.
// But the 2nd result from 'predictorId_1' is the same as teh 1st entry in history with the case-sensitive comparison,
// so, that result will be filtered out due to the de-duplication logic.
SetHistory("de-dup SOME TEXT AFTER", "some TEXT BEFORE de-dup");
Test("de-dup", Keys(
"de-dup", CheckThat(() => AssertScreenIs(6,
TokenClassification.Command, "de-dup",
NextLine,
TokenClassification.ListPrediction, '>',
TokenClassification.None, ' ',
emphasisColors, "de-dup",
TokenClassification.None, " SOME TEXT AFTER",
TokenClassification.None, new string(' ', listWidth - 33), // 33 is the length of '> de-dup SOME TEXT AFTER' plus '[History]'.
TokenClassification.None, '[',
TokenClassification.ListPrediction, "History",
TokenClassification.None, ']',
NextLine,
TokenClassification.ListPrediction, '>',
TokenClassification.None, " some TEXT BEFORE ",
emphasisColors, "de-dup",
TokenClassification.None, new string(' ', listWidth - 34), // 34 is the length of '> some TEXT BEFORE de-dup' plus '[History]'.
TokenClassification.None, '[',
TokenClassification.ListPrediction, "History",
TokenClassification.None, ']',
NextLine,
TokenClassification.ListPrediction, '>',
TokenClassification.None, " SOME TEXT BEFORE ",
emphasisColors, "de-dup",
TokenClassification.None, new string(' ', listWidth - 40), // 40 is the length of '> SOME TEXT BEFORE de-dup' plus '[TestPredictor]'.
TokenClassification.None, '[',
TokenClassification.ListPrediction, "TestPredictor",
TokenClassification.None, ']',
NextLine,
TokenClassification.ListPrediction, '>',
TokenClassification.None, " SOME NEW TEXT",
TokenClassification.None, new string(' ', listWidth - 32), // 32 is the length of '> SOME NEW TEXT' plus '[LongNamePred...]'
TokenClassification.None, '[',
TokenClassification.ListPrediction, "LongNamePred...",
TokenClassification.None, ']',
// List view is done, no more list item following.
NextLine,
NextLine
)),
// `OnSuggestionDisplayed` should be fired for both predictors.
// For 'predictorId_1', the reported 'countOrIndex' from feedback is still 2 even though its 2nd result was filtered out due to duplication.
CheckThat(() => AssertDisplayedSuggestions(count: 2, predictorId_1, MiniSessionId, 2)),
CheckThat(() => AssertDisplayedSuggestions(count: 2, predictorId_2, MiniSessionId, 1)),
// Once accepted, the list should be cleared.
_.Enter, CheckThat(() => AssertScreenIs(2,
TokenClassification.Command, "de-dup",
NextLine,
NextLine))
));
}

[SkippableFact]
public void List_NoneSource_ExecutionStatus()
{
Expand Down