Skip to content

Commit

Permalink
Merge branch 'main' into onnxembeddinggeneration
Browse files Browse the repository at this point in the history
  • Loading branch information
stephentoub committed Mar 20, 2024
2 parents 0d9c8aa + 51ee30f commit 5198d23
Show file tree
Hide file tree
Showing 31 changed files with 853 additions and 324 deletions.
2 changes: 1 addition & 1 deletion docs/decisions/0015-completion-service-selection.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
# These are optional elements. Feel free to remove any of them.
status: accepted
status: superseded by [ADR-0038](0038-completion-service-selection.md)
contact: SergeyMenshykh
date: 2023-10-25
deciders: markwallace-microsoft, matthewbolanos
Expand Down
28 changes: 28 additions & 0 deletions docs/decisions/0038-completion-service-selection.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
---
# These are optional elements. Feel free to remove any of them.
status: accepted
contact: markwallace-microsoft
date: 2024-03-14
deciders: sergeymenshykh, markwallace, rbarreto, dmytrostruk
consulted:
informed:
---

# Completion Service Selection Strategy

## Context and Problem Statement

Today, SK uses the current `IAIServiceSelector` implementation to determine which type of service is used when running a text prompt.
The `IAIServiceSelector` implementation will return either a chat completion service, text generation service or it could return a service that implements both.
The prompt will be run using chat completion by default and falls back to text generation as the alternate option.

The behavior supersedes that description in [ADR-0015](0015-completion-service-selection.md)

## Decision Drivers

- Chat completion services are becoming dominant in the industry e.g. OpenAI has deprecated most of it's text generation services.
- Chat completion generally provides better responses and the ability to use advanced features e.g. tool calling.

## Decision Outcome

Chosen option: Keep the current behavior as described above.
4 changes: 2 additions & 2 deletions dotnet/nuget/nuget-package.props
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project>
<PropertyGroup>
<!-- Central version prefix - applies to all nuget packages. -->
<VersionPrefix>1.6.2</VersionPrefix>
<VersionPrefix>1.6.3</VersionPrefix>

<PackageVersion Condition="'$(VersionSuffix)' != ''">$(VersionPrefix)-$(VersionSuffix)</PackageVersion>
<PackageVersion Condition="'$(VersionSuffix)' == ''">$(VersionPrefix)</PackageVersion>
Expand All @@ -10,7 +10,7 @@
<IsPackable>true</IsPackable>

<!-- Package validation. Baseline Version should be lower than current version. -->
<PackageValidationBaselineVersion>1.6.2</PackageValidationBaselineVersion>
<PackageValidationBaselineVersion>1.6.3</PackageValidationBaselineVersion>
<!-- Validate assembly attributes only for Publish builds -->
<NoWarn Condition="'$(Configuration)' != 'Publish'">$(NoWarn);CP0003</NoWarn>
<!-- Do not validate reference assemblies -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ private static Dictionary<string, string> SampleData()
= "Jupyter notebook describing how to get started with the Semantic Kernel",
["https://github.com/microsoft/semantic-kernel/tree/main/samples/plugins/ChatPlugin/ChatGPT"]
= "Sample demonstrating how to create a chat plugin interfacing with ChatGPT",
["https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/SemanticKernel/Memory/VolatileMemoryStore.cs"]
["https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/Plugins/Plugins.Memory/VolatileMemoryStore.cs"]
= "C# class that defines a volatile embedding store",
};
}
Expand Down
104 changes: 66 additions & 38 deletions dotnet/samples/KernelSyntaxExamples/Example55_TextChunker.cs
Original file line number Diff line number Diff line change
Expand Up @@ -84,75 +84,103 @@ public enum TokenCounterType
/// Custom token counter implementation using SharpToken.
/// Note: SharpToken is used for demonstration purposes only, it's possible to use any available or custom tokenization logic.
/// </summary>
private static TokenCounter SharpTokenTokenCounter => (string input) =>
public class SharpTokenTokenCounter
{
// Initialize encoding by encoding name
var encoding = GptEncoding.GetEncoding("cl100k_base");
private readonly GptEncoding _encoding;

// Initialize encoding by model name
// var encoding = GptEncoding.GetEncodingForModel("gpt-4");
public SharpTokenTokenCounter()
{
this._encoding = GptEncoding.GetEncoding("cl100k_base");
// Initialize encoding by model name
// this._encoding = GptEncoding.GetEncodingForModel("gpt-4");
}

var tokens = encoding.Encode(input);
public int Count(string input)
{
var tokens = this._encoding.Encode(input);

return tokens.Count;
};
return tokens.Count;
}
}

/// <summary>
/// MicrosoftML token counter implementation.
/// </summary>
private static TokenCounter MicrosoftMLTokenCounter => (string input) =>
public class MicrosoftMLTokenCounter
{
Tokenizer tokenizer = new(new Bpe());
var tokens = tokenizer.Encode(input).Tokens;
private readonly Tokenizer _tokenizer;

return tokens.Count;
};
public MicrosoftMLTokenCounter()
{
this._tokenizer = new(new Bpe());
}

public int Count(string input)
{
var tokens = this._tokenizer.Encode(input).Tokens;

return tokens.Count;
}
}

/// <summary>
/// MicrosoftML token counter implementation using Roberta and local vocab
/// </summary>
private static TokenCounter MicrosoftMLRobertaTokenCounter => (string input) =>
public class MicrosoftMLRobertaTokenCounter
{
var encoder = EmbeddedResource.ReadStream("EnglishRoberta.encoder.json");
var vocab = EmbeddedResource.ReadStream("EnglishRoberta.vocab.bpe");
var dict = EmbeddedResource.ReadStream("EnglishRoberta.dict.txt");
private readonly Tokenizer _tokenizer;

if (encoder is null || vocab is null || dict is null)
public MicrosoftMLRobertaTokenCounter()
{
throw new FileNotFoundException("Missing required resources");
}
var encoder = EmbeddedResource.ReadStream("EnglishRoberta.encoder.json");
var vocab = EmbeddedResource.ReadStream("EnglishRoberta.vocab.bpe");
var dict = EmbeddedResource.ReadStream("EnglishRoberta.dict.txt");

EnglishRoberta model = new(encoder, vocab, dict);
if (encoder is null || vocab is null || dict is null)
{
throw new FileNotFoundException("Missing required resources");
}

model.AddMaskSymbol(); // Not sure what this does, but it's in the example
Tokenizer tokenizer = new(model, new RobertaPreTokenizer());
var tokens = tokenizer.Encode(input).Tokens;
EnglishRoberta model = new(encoder, vocab, dict);

return tokens.Count;
};
model.AddMaskSymbol(); // Not sure what this does, but it's in the example
this._tokenizer = new(model, new RobertaPreTokenizer());
}

public int Count(string input)
{
var tokens = this._tokenizer.Encode(input).Tokens;

return tokens.Count;
}
}

/// <summary>
/// DeepDev token counter implementation.
/// </summary>
private static TokenCounter DeepDevTokenCounter => (string input) =>
public class DeepDevTokenCounter
{
// Initialize encoding by encoding name
var tokenizer = TokenizerBuilder.CreateByEncoderNameAsync("cl100k_base").GetAwaiter().GetResult();
private readonly ITokenizer _tokenizer;

// Initialize encoding by model name
// var tokenizer = TokenizerBuilder.CreateByModelNameAsync("gpt-4").GetAwaiter().GetResult();
public DeepDevTokenCounter()
{
this._tokenizer = TokenizerBuilder.CreateByEncoderNameAsync("cl100k_base").GetAwaiter().GetResult();
}

var tokens = tokenizer.Encode(input, new HashSet<string>());
return tokens.Count;
};
public int Count(string input)
{
var tokens = this._tokenizer.Encode(input, new HashSet<string>());
return tokens.Count;
}
}

private static readonly Func<TokenCounterType, TokenCounter> s_tokenCounterFactory = (TokenCounterType counterType) =>
counterType switch
{
TokenCounterType.SharpToken => (string input) => SharpTokenTokenCounter(input),
TokenCounterType.MicrosoftML => (string input) => MicrosoftMLTokenCounter(input),
TokenCounterType.DeepDev => (string input) => DeepDevTokenCounter(input),
TokenCounterType.MicrosoftMLRoberta => (string input) => MicrosoftMLRobertaTokenCounter(input),
TokenCounterType.SharpToken => new SharpTokenTokenCounter().Count,
TokenCounterType.MicrosoftML => new MicrosoftMLTokenCounter().Count,
TokenCounterType.DeepDev => new DeepDevTokenCounter().Count,
TokenCounterType.MicrosoftMLRoberta => new MicrosoftMLRobertaTokenCounter().Count,
_ => throw new ArgumentOutOfRangeException(nameof(counterType), counterType, null),
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -749,6 +749,7 @@ internal static OpenAIClientOptions GetOpenAIClientOptions(HttpClient? httpClien
{
options.Transport = new HttpClientTransport(httpClient);
options.RetryPolicy = new RetryPolicy(maxRetries: 0); // Disable Azure SDK retry policy if and only if a custom HttpClient is provided.
options.Retry.NetworkTimeout = Timeout.InfiniteTimeSpan; // Disable Azure SDK default timeout
}

return options;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public class AzureOpenAIChatCompletionWithDataConfig
public string CompletionApiKey { get; set; } = string.Empty;

/// <summary>
/// Azure OpenAI Completion API version (e.g. 2023-06-01-preview)
/// Azure OpenAI Completion API version (e.g. 2024-02-01)
/// </summary>
public string CompletionApiVersion { get; set; } = string.Empty;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ public async IAsyncEnumerable<StreamingTextContent> GetStreamingTextContentsAsyn

#region private ================================================================================

private const string DefaultApiVersion = "2023-06-01-preview";
private const string DefaultApiVersion = "2024-02-01";

private readonly AzureOpenAIChatCompletionWithDataConfig _config;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ private static OpenAIClientOptions GetClientOptions(HttpClient? httpClient, stri
{
OpenAIClientOptions.ServiceVersion version = apiVersion switch
{
// DALL-E 3 is only supported post 2023-12-01-preview
// DALL-E 3 is supported in the latest API releases
_ => OpenAIClientOptions.ServiceVersion.V2024_02_15_Preview
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ public async Task DefaultApiVersionShouldBeUsedAsync()
// Assert
var actualUri = this._messageHandlerStub.RequestUri?.AbsoluteUri;

Assert.Contains("2023-06-01-preview", actualUri, StringComparison.OrdinalIgnoreCase);
Assert.Contains("2024-02-01", actualUri, StringComparison.OrdinalIgnoreCase);
}

[Fact]
Expand Down
18 changes: 18 additions & 0 deletions dotnet/src/SemanticKernel.Core/CompatibilitySuppressions.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- https://learn.microsoft.com/en-us/dotnet/fundamentals/package-validation/diagnostic-ids -->
<Suppressions xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<Suppression>
<DiagnosticId>CP0002</DiagnosticId>
<Target>M:Microsoft.SemanticKernel.Text.TextChunker.SplitMarkdownParagraphs(System.Collections.Generic.List{System.String},System.Int32,System.Int32,System.String,Microsoft.SemanticKernel.Text.TextChunker.TokenCounter)</Target>
<Left>lib/netstandard2.0/Microsoft.SemanticKernel.Core.dll</Left>
<Right>lib/netstandard2.0/Microsoft.SemanticKernel.Core.dll</Right>
<IsBaselineSuppression>true</IsBaselineSuppression>
</Suppression>
<Suppression>
<DiagnosticId>CP0002</DiagnosticId>
<Target>M:Microsoft.SemanticKernel.Text.TextChunker.SplitPlainTextParagraphs(System.Collections.Generic.List{System.String},System.Int32,System.Int32,System.String,Microsoft.SemanticKernel.Text.TextChunker.TokenCounter)</Target>
<Left>lib/netstandard2.0/Microsoft.SemanticKernel.Core.dll</Left>
<Right>lib/netstandard2.0/Microsoft.SemanticKernel.Core.dll</Right>
<IsBaselineSuppression>true</IsBaselineSuppression>
</Suppression>
</Suppressions>
19 changes: 5 additions & 14 deletions dotnet/src/SemanticKernel.Core/Text/TextChunker.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ public static class TextChunker
/// <param name="maxTokensPerLine">Maximum number of tokens per line.</param>
/// <param name="tokenCounter">Function to count tokens in a string. If not supplied, the default counter will be used.</param>
/// <returns>List of lines.</returns>
[Experimental("SKEXP0050")]
public static List<string> SplitPlainTextLines(string text, int maxTokensPerLine, TokenCounter? tokenCounter = null) =>
InternalSplitLines(text, maxTokensPerLine, trim: true, s_plaintextSplitOptions, tokenCounter);

Expand All @@ -46,7 +45,6 @@ public static List<string> SplitPlainTextLines(string text, int maxTokensPerLine
/// <param name="maxTokensPerLine">Maximum number of tokens per line.</param>
/// <param name="tokenCounter">Function to count tokens in a string. If not supplied, the default counter will be used.</param>
/// <returns>List of lines.</returns>
[Experimental("SKEXP0050")]
public static List<string> SplitMarkDownLines(string text, int maxTokensPerLine, TokenCounter? tokenCounter = null) =>
InternalSplitLines(text, maxTokensPerLine, trim: true, s_markdownSplitOptions, tokenCounter);

Expand All @@ -59,8 +57,7 @@ public static List<string> SplitMarkDownLines(string text, int maxTokensPerLine,
/// <param name="chunkHeader">Text to be prepended to each individual chunk.</param>
/// <param name="tokenCounter">Function to count tokens in a string. If not supplied, the default counter will be used.</param>
/// <returns>List of paragraphs.</returns>
[Experimental("SKEXP0050")]
public static List<string> SplitPlainTextParagraphs(List<string> lines, int maxTokensPerParagraph, int overlapTokens = 0, string? chunkHeader = null, TokenCounter? tokenCounter = null) =>
public static List<string> SplitPlainTextParagraphs(IEnumerable<string> lines, int maxTokensPerParagraph, int overlapTokens = 0, string? chunkHeader = null, TokenCounter? tokenCounter = null) =>
InternalSplitTextParagraphs(lines, maxTokensPerParagraph, overlapTokens, chunkHeader, static (text, maxTokens, tokenCounter) => InternalSplitLines(text, maxTokens, trim: false, s_plaintextSplitOptions, tokenCounter), tokenCounter);

/// <summary>
Expand All @@ -72,12 +69,10 @@ public static List<string> SplitPlainTextParagraphs(List<string> lines, int maxT
/// <param name="chunkHeader">Text to be prepended to each individual chunk.</param>
/// <param name="tokenCounter">Function to count tokens in a string. If not supplied, the default counter will be used.</param>
/// <returns>List of paragraphs.</returns>
[Experimental("SKEXP0050")]
public static List<string> SplitMarkdownParagraphs(List<string> lines, int maxTokensPerParagraph, int overlapTokens = 0, string? chunkHeader = null, TokenCounter? tokenCounter = null) =>
public static List<string> SplitMarkdownParagraphs(IEnumerable<string> lines, int maxTokensPerParagraph, int overlapTokens = 0, string? chunkHeader = null, TokenCounter? tokenCounter = null) =>
InternalSplitTextParagraphs(lines, maxTokensPerParagraph, overlapTokens, chunkHeader, static (text, maxTokens, tokenCounter) => InternalSplitLines(text, maxTokens, trim: false, s_markdownSplitOptions, tokenCounter), tokenCounter);

[Experimental("SKEXP0050")]
private static List<string> InternalSplitTextParagraphs(List<string> lines, int maxTokensPerParagraph, int overlapTokens, string? chunkHeader, Func<string, int, TokenCounter?, List<string>> longLinesSplitter, TokenCounter? tokenCounter)
private static List<string> InternalSplitTextParagraphs(IEnumerable<string> lines, int maxTokensPerParagraph, int overlapTokens, string? chunkHeader, Func<string, int, TokenCounter?, List<string>> longLinesSplitter, TokenCounter? tokenCounter)
{
if (maxTokensPerParagraph <= 0)
{
Expand All @@ -89,7 +84,8 @@ private static List<string> InternalSplitTextParagraphs(List<string> lines, int
throw new ArgumentException("overlapTokens cannot be larger than maxTokensPerParagraph", nameof(maxTokensPerParagraph));
}

if (lines.Count == 0)
// Optimize empty inputs if we can efficiently determine the're empty
if (lines is ICollection<string> c && c.Count == 0)
{
return new List<string>();
}
Expand All @@ -106,7 +102,6 @@ private static List<string> InternalSplitTextParagraphs(List<string> lines, int
return processedParagraphs;
}

[Experimental("SKEXP0050")]
private static List<string> BuildParagraph(IEnumerable<string> truncatedLines, int maxTokensPerParagraph, TokenCounter? tokenCounter)
{
StringBuilder paragraphBuilder = new();
Expand Down Expand Up @@ -147,7 +142,6 @@ private static List<string> BuildParagraph(IEnumerable<string> truncatedLines, i
return paragraphs;
}

[Experimental("SKEXP0050")]
private static List<string> ProcessParagraphs(List<string> paragraphs, int adjustedMaxTokensPerParagraph, int overlapTokens, string? chunkHeader, Func<string, int, TokenCounter?, List<string>> longLinesSplitter, TokenCounter? tokenCounter)
{
// distribute text more evenly in the last paragraphs when the last paragraph is too short.
Expand Down Expand Up @@ -212,7 +206,6 @@ private static List<string> ProcessParagraphs(List<string> paragraphs, int adjus
return processedParagraphs;
}

[Experimental("SKEXP0050")]
private static List<string> InternalSplitLines(string text, int maxTokensPerLine, bool trim, string?[] splitOptions, TokenCounter? tokenCounter)
{
var result = new List<string>();
Expand All @@ -233,7 +226,6 @@ private static List<string> InternalSplitLines(string text, int maxTokensPerLine
return result;
}

[Experimental("SKEXP0050")]
private static (List<string>, bool) Split(List<string> input, int maxTokens, ReadOnlySpan<char> separators, bool trim, TokenCounter? tokenCounter)
{
bool inputWasSplit = false;
Expand All @@ -248,7 +240,6 @@ private static (List<string>, bool) Split(List<string> input, int maxTokens, Rea
return (result, inputWasSplit);
}

[Experimental("SKEXP0050")]
private static (List<string>, bool) Split(ReadOnlySpan<char> input, string? inputString, int maxTokens, ReadOnlySpan<char> separators, bool trim, TokenCounter? tokenCounter)
{
Debug.Assert(inputString is null || input.SequenceEqual(inputString.AsSpan()));
Expand Down
20 changes: 19 additions & 1 deletion python/.conf/.pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
files: ^python/
fail_fast: true
repos:
- repo: https://github.com/floatingpurr/sync_with_poetry
rev: 1.1.0
Expand All @@ -24,4 +26,20 @@ repos:
rev: v0.3.2
hooks:
- id: ruff
args: [ --fix, --exit-non-zero-on-fix ]
args: [ --fix, --exit-non-zero-on-fix ]
- repo: local
hooks:
- id: mypy
name: mypy
entry: poetry -C python/ run python -m mypy --no-namespace-packages --config-file=python/mypy.ini
language: system
types: [python]
pass_filenames: true
- repo: local
hooks:
- id: tests
name: tests
entry: poetry -C python/ run coverage run -m pytest python/tests/unit
language: system
types: [python]
pass_filenames: true
Loading

0 comments on commit 5198d23

Please sign in to comment.